krkn-lib 5.1.4__py3-none-any.whl → 5.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- krkn_lib/__init__.py +0 -0
- krkn_lib/k8s/krkn_kubernetes.py +12 -380
- krkn_lib/k8s/pod_monitor/__init__.py +12 -0
- krkn_lib/k8s/pod_monitor/pod_monitor.py +304 -0
- krkn_lib/models/elastic/models.py +5 -2
- krkn_lib/models/k8s/models.py +1 -25
- krkn_lib/models/pod_monitor/__init__.py +0 -0
- krkn_lib/models/pod_monitor/models.py +224 -0
- krkn_lib/models/telemetry/models.py +6 -4
- krkn_lib/tests/base_test.py +32 -31
- krkn_lib/tests/test_krkn_elastic_models.py +5 -4
- krkn_lib/tests/test_krkn_kubernetes_pods_monitor.py +513 -0
- krkn_lib/tests/test_krkn_kubernetes_pods_monitor_models.py +405 -0
- krkn_lib/tests/test_utils.py +12 -8
- {krkn_lib-5.1.4.dist-info → krkn_lib-5.1.6.dist-info}/METADATA +1 -2
- {krkn_lib-5.1.4.dist-info → krkn_lib-5.1.6.dist-info}/RECORD +18 -14
- krkn_lib/k8s/pods_monitor_pool.py +0 -202
- krkn_lib/tests/test_krkn_kubernetes_monitor.py +0 -367
- krkn_lib/tests/test_krkn_kubernetes_pods_monitor_pool.py +0 -128
- {krkn_lib-5.1.4.dist-info → krkn_lib-5.1.6.dist-info}/LICENSE +0 -0
- {krkn_lib-5.1.4.dist-info → krkn_lib-5.1.6.dist-info}/WHEEL +0 -0
krkn_lib/tests/base_test.py
CHANGED
|
@@ -549,7 +549,7 @@ class BaseTest(unittest.TestCase):
|
|
|
549
549
|
"kernel_version": "5.4.0-66-generic",
|
|
550
550
|
"kubelet_version": "v2.1.2",
|
|
551
551
|
"os_version": "Linux",
|
|
552
|
-
"nodes_type": "master"
|
|
552
|
+
"nodes_type": "master",
|
|
553
553
|
}
|
|
554
554
|
],
|
|
555
555
|
"node_taints": [
|
|
@@ -573,36 +573,37 @@ class BaseTest(unittest.TestCase):
|
|
|
573
573
|
}
|
|
574
574
|
],
|
|
575
575
|
"virt_checks": [
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
576
|
+
{
|
|
577
|
+
"node_name": "h03-r660",
|
|
578
|
+
"namespace": "benchmark-runner",
|
|
579
|
+
"ip_address": "0.0.0.0",
|
|
580
|
+
"vm_name": "windows-vm-50",
|
|
581
|
+
"status": True,
|
|
582
|
+
"start_timestamp": "2025-03-12T14:57:34.555878",
|
|
583
|
+
"end_timestamp": "2025-03-12T14:57:54.904352",
|
|
584
|
+
"duration": 20.348474,
|
|
585
|
+
},
|
|
586
|
+
{
|
|
587
|
+
"node_name": "h27-r660",
|
|
588
|
+
"namespace": "benchmark-runner",
|
|
589
|
+
"vm_name": "windows-vm-51",
|
|
590
|
+
"ip_address": "0.0.0.1",
|
|
591
|
+
"status": True,
|
|
592
|
+
"start_timestamp": "2025-03-12T14:57:34.759105",
|
|
593
|
+
"end_timestamp": "2025-03-12T14:57:54.904352",
|
|
594
|
+
"duration": 20.145247,
|
|
595
|
+
},
|
|
596
|
+
{
|
|
597
|
+
"node_name": "h10-r660",
|
|
598
|
+
"namespace": "benchmark-runner",
|
|
599
|
+
"vm_name": "windows-vm-52",
|
|
600
|
+
"ip_address": "0.0.0.2",
|
|
601
|
+
"status": False,
|
|
602
|
+
"start_timestamp": "2025-03-12T14:57:35.308957",
|
|
603
|
+
"end_timestamp": "2025-03-12T14:57:54.904352",
|
|
604
|
+
"duration": 19.595395,
|
|
605
|
+
},
|
|
606
|
+
],
|
|
606
607
|
"total_node_count": 3,
|
|
607
608
|
"cloud_infrastructure": "AWS",
|
|
608
609
|
"cloud_type": "EC2",
|
|
@@ -238,15 +238,16 @@ class TestKrknElasticModels(BaseTest):
|
|
|
238
238
|
elastic_telemetry.virt_checks[0].end_timestamp,
|
|
239
239
|
datetime.datetime.fromisoformat("2025-03-12T14:57:54.904352"),
|
|
240
240
|
)
|
|
241
|
-
self.assertEqual(
|
|
242
|
-
elastic_telemetry.virt_checks[0].duration, 20.348474
|
|
243
|
-
)
|
|
241
|
+
self.assertEqual(elastic_telemetry.virt_checks[0].duration, 20.348474)
|
|
244
242
|
|
|
245
243
|
self.assertEqual(elastic_telemetry.total_node_count, 3)
|
|
246
244
|
self.assertEqual(elastic_telemetry.cloud_infrastructure, "AWS")
|
|
247
245
|
self.assertEqual(elastic_telemetry.cloud_type, "EC2")
|
|
248
246
|
self.assertEqual(elastic_telemetry.run_uuid, run_uuid)
|
|
249
|
-
self.assertEqual(
|
|
247
|
+
self.assertEqual(
|
|
248
|
+
elastic_telemetry.build_url,
|
|
249
|
+
"https://github.com/krkn-chaos/krkn-lib/actions/runs/16724993547",
|
|
250
|
+
)
|
|
250
251
|
|
|
251
252
|
def test_ElasticChaosRunTelemetry(self):
|
|
252
253
|
run_uuid = str(uuid.uuid4())
|
|
@@ -0,0 +1,513 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from krkn_lib.k8s.pod_monitor import (
|
|
5
|
+
select_and_monitor_by_label,
|
|
6
|
+
select_and_monitor_by_name_pattern_and_namespace_pattern,
|
|
7
|
+
select_and_monitor_by_namespace_pattern_and_label,
|
|
8
|
+
)
|
|
9
|
+
from krkn_lib.tests import BaseTest
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TestKrknKubernetesPodsMonitor(BaseTest):
|
|
13
|
+
def test_monitor_pods_by_label_no_pods_affected(self):
|
|
14
|
+
# test no pods affected
|
|
15
|
+
namespace = "test-ns-0-" + self.get_random_string(10)
|
|
16
|
+
delayed_1 = "delayed-0-" + self.get_random_string(10)
|
|
17
|
+
delayed_2 = "delayed-0-" + self.get_random_string(10)
|
|
18
|
+
label = "readiness-" + self.get_random_string(5)
|
|
19
|
+
self.deploy_namespace(namespace, [])
|
|
20
|
+
self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
|
|
21
|
+
self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
|
|
22
|
+
|
|
23
|
+
while not self.lib_k8s.is_pod_running(
|
|
24
|
+
delayed_1, namespace
|
|
25
|
+
) and not self.lib_k8s.is_pod_running(delayed_2, namespace):
|
|
26
|
+
time.sleep(1)
|
|
27
|
+
continue
|
|
28
|
+
time.sleep(3)
|
|
29
|
+
|
|
30
|
+
monitor_timeout = 2
|
|
31
|
+
|
|
32
|
+
start_time = time.time()
|
|
33
|
+
|
|
34
|
+
future = select_and_monitor_by_label(
|
|
35
|
+
label_selector=f"test={label}",
|
|
36
|
+
max_timeout=monitor_timeout,
|
|
37
|
+
v1_client=self.lib_k8s.cli,
|
|
38
|
+
)
|
|
39
|
+
snapshot = future.result()
|
|
40
|
+
end_time = time.time()
|
|
41
|
+
pods_status = snapshot.get_pods_status()
|
|
42
|
+
self.background_delete_pod(delayed_1, namespace)
|
|
43
|
+
self.background_delete_pod(delayed_2, namespace)
|
|
44
|
+
# added half second of delay that might be introduced to API
|
|
45
|
+
# calls
|
|
46
|
+
self.assertAlmostEqual(end_time - start_time, monitor_timeout, 0)
|
|
47
|
+
|
|
48
|
+
self.assertEqual(len(pods_status.recovered), 0)
|
|
49
|
+
self.assertEqual(len(pods_status.unrecovered), 0)
|
|
50
|
+
self.background_delete_ns(namespace)
|
|
51
|
+
|
|
52
|
+
def test_pods_by_name_and_namespace_pattern_different_names_respawn(
|
|
53
|
+
self,
|
|
54
|
+
):
|
|
55
|
+
# test pod with different name recovered
|
|
56
|
+
namespace_random_pattern = "test-ns-1-" + self.get_random_string(3)
|
|
57
|
+
namespace = f"{namespace_random_pattern}-" + self.get_random_string(10)
|
|
58
|
+
delayed_1 = "delayed-1-" + self.get_random_string(10)
|
|
59
|
+
delayed_2 = "delayed-1-" + self.get_random_string(10)
|
|
60
|
+
delayed_respawn = "delayed-1-respawn-" + self.get_random_string(10)
|
|
61
|
+
label = "readiness-" + self.get_random_string(5)
|
|
62
|
+
pod_delay = 1
|
|
63
|
+
monitor_timeout = 10
|
|
64
|
+
self.deploy_namespace(namespace, [])
|
|
65
|
+
self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
|
|
66
|
+
self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
|
|
67
|
+
while not self.lib_k8s.is_pod_running(
|
|
68
|
+
delayed_1, namespace
|
|
69
|
+
) and not self.lib_k8s.is_pod_running(delayed_2, namespace):
|
|
70
|
+
time.sleep(1)
|
|
71
|
+
continue
|
|
72
|
+
time.sleep(3)
|
|
73
|
+
|
|
74
|
+
future = select_and_monitor_by_name_pattern_and_namespace_pattern(
|
|
75
|
+
pod_name_pattern="^delayed-1-.*",
|
|
76
|
+
namespace_pattern=f"^{namespace_random_pattern}-.*",
|
|
77
|
+
max_timeout=monitor_timeout,
|
|
78
|
+
v1_client=self.lib_k8s.cli,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
self.background_delete_pod(delayed_1, namespace)
|
|
82
|
+
# to prevent the pod scheduling happening before the deletion
|
|
83
|
+
# event that in a real world scenario
|
|
84
|
+
# can't happen (eg. replicaset or deployment)
|
|
85
|
+
time.sleep(1)
|
|
86
|
+
self.deploy_delayed_readiness_pod(
|
|
87
|
+
delayed_respawn, namespace, pod_delay, label
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
while not self.lib_k8s.is_pod_running(
|
|
91
|
+
delayed_1, namespace
|
|
92
|
+
) and not self.lib_k8s.is_pod_running(delayed_respawn, namespace):
|
|
93
|
+
time.sleep(1)
|
|
94
|
+
continue
|
|
95
|
+
time.sleep(3)
|
|
96
|
+
|
|
97
|
+
snapshot = future.result()
|
|
98
|
+
print(f"\nRunning test ID: {self.id()}")
|
|
99
|
+
print(json.dumps(snapshot.to_dict(), indent=True))
|
|
100
|
+
pods_status = snapshot.get_pods_status()
|
|
101
|
+
|
|
102
|
+
self.assertEqual(len(pods_status.recovered), 1)
|
|
103
|
+
self.assertEqual(pods_status.recovered[0].pod_name, delayed_respawn)
|
|
104
|
+
self.assertEqual(pods_status.recovered[0].namespace, namespace)
|
|
105
|
+
self.assertTrue(pods_status.recovered[0].pod_readiness_time > 0)
|
|
106
|
+
self.assertTrue(pods_status.recovered[0].pod_rescheduling_time > 0)
|
|
107
|
+
self.assertTrue(
|
|
108
|
+
pods_status.recovered[0].total_recovery_time >= pod_delay
|
|
109
|
+
)
|
|
110
|
+
self.assertEqual(len(pods_status.unrecovered), 0)
|
|
111
|
+
self.background_delete_ns(namespace)
|
|
112
|
+
|
|
113
|
+
def test_pods_by_namespace_pattern_and_label_same_name_respawn(
|
|
114
|
+
self,
|
|
115
|
+
):
|
|
116
|
+
# flaky
|
|
117
|
+
# test pod with same name recovered
|
|
118
|
+
namespace = "test-ns-2-" + self.get_random_string(10)
|
|
119
|
+
delayed_1 = "delayed-2-1-" + self.get_random_string(10)
|
|
120
|
+
delayed_2 = "delayed-2-2-" + self.get_random_string(10)
|
|
121
|
+
label = "readiness-" + self.get_random_string(5)
|
|
122
|
+
self.deploy_namespace(namespace, [])
|
|
123
|
+
self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
|
|
124
|
+
self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
|
|
125
|
+
self.wait_pod(delayed_1, namespace)
|
|
126
|
+
self.wait_pod(delayed_2, namespace)
|
|
127
|
+
monitor_timeout = 45
|
|
128
|
+
pod_delay = 0
|
|
129
|
+
|
|
130
|
+
future = select_and_monitor_by_namespace_pattern_and_label(
|
|
131
|
+
namespace_pattern="^test-ns-2-.*",
|
|
132
|
+
label_selector=f"test={label}",
|
|
133
|
+
max_timeout=monitor_timeout,
|
|
134
|
+
v1_client=self.lib_k8s.cli,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
self.lib_k8s.delete_pod(delayed_1, namespace)
|
|
138
|
+
# to prevent the pod scheduling happening before the deletion
|
|
139
|
+
# event that in a real world scenario can't happen
|
|
140
|
+
# (eg. replicaset or deployment)
|
|
141
|
+
time.sleep(1)
|
|
142
|
+
self.deploy_delayed_readiness_pod(
|
|
143
|
+
delayed_1, namespace, pod_delay, label
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
while not self.lib_k8s.is_pod_running(delayed_1, namespace):
|
|
147
|
+
time.sleep(1)
|
|
148
|
+
continue
|
|
149
|
+
time.sleep(3)
|
|
150
|
+
|
|
151
|
+
snapshot = future.result()
|
|
152
|
+
# print(f"\nRunning test ID: {self.id()}")
|
|
153
|
+
# print(json.dumps(snapshot.to_dict(), indent=True))
|
|
154
|
+
pods_status = snapshot.get_pods_status()
|
|
155
|
+
self.background_delete_ns(namespace)
|
|
156
|
+
self.assertEqual(len(pods_status.recovered), 1)
|
|
157
|
+
self.assertEqual(pods_status.recovered[0].pod_name, delayed_1)
|
|
158
|
+
self.assertEqual(pods_status.recovered[0].namespace, namespace)
|
|
159
|
+
self.assertTrue(pods_status.recovered[0].pod_readiness_time > 0)
|
|
160
|
+
self.assertTrue(pods_status.recovered[0].pod_rescheduling_time > 0)
|
|
161
|
+
self.assertTrue(
|
|
162
|
+
pods_status.recovered[0].total_recovery_time >= pod_delay
|
|
163
|
+
)
|
|
164
|
+
self.assertEqual(len(pods_status.unrecovered), 0)
|
|
165
|
+
|
|
166
|
+
def test_pods_by_label_respawn_timeout(self):
|
|
167
|
+
# test pod will not recover before the timeout
|
|
168
|
+
namespace = "test-ns-3-" + self.get_random_string(10)
|
|
169
|
+
delayed_1 = "delayed-3-" + self.get_random_string(10)
|
|
170
|
+
delayed_2 = "delayed-3-" + self.get_random_string(10)
|
|
171
|
+
delayed_respawn = "delayed-respawn-3-" + self.get_random_string(10)
|
|
172
|
+
label = "readiness-" + self.get_random_string(5)
|
|
173
|
+
|
|
174
|
+
self.deploy_namespace(namespace, [])
|
|
175
|
+
self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
|
|
176
|
+
self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
|
|
177
|
+
while not self.lib_k8s.is_pod_running(
|
|
178
|
+
delayed_1, namespace
|
|
179
|
+
) and not self.lib_k8s.is_pod_running(delayed_2, namespace):
|
|
180
|
+
time.sleep(1)
|
|
181
|
+
continue
|
|
182
|
+
time.sleep(3)
|
|
183
|
+
|
|
184
|
+
monitor_timeout = 20
|
|
185
|
+
pod_delay = 21
|
|
186
|
+
|
|
187
|
+
future = select_and_monitor_by_label(
|
|
188
|
+
label_selector=f"test={label}",
|
|
189
|
+
max_timeout=monitor_timeout,
|
|
190
|
+
v1_client=self.lib_k8s.cli,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
self.background_delete_pod(delayed_1, namespace)
|
|
194
|
+
# to prevent the pod scheduling happening before the deletion
|
|
195
|
+
# event that in a real world scenario can't happen
|
|
196
|
+
# (eg. replicaset or deployment)
|
|
197
|
+
time.sleep(1)
|
|
198
|
+
self.deploy_delayed_readiness_pod(
|
|
199
|
+
delayed_respawn, namespace, pod_delay, label
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
snapshot = future.result()
|
|
203
|
+
# print(f"\nRunning test ID: {self.id()}")
|
|
204
|
+
# print(json.dumps(snapshot.to_dict(), indent=True))
|
|
205
|
+
pods_status = snapshot.get_pods_status()
|
|
206
|
+
|
|
207
|
+
self.assertEqual(len(pods_status.unrecovered), 1)
|
|
208
|
+
self.assertEqual(pods_status.unrecovered[0].pod_name, delayed_respawn)
|
|
209
|
+
self.assertEqual(pods_status.unrecovered[0].namespace, namespace)
|
|
210
|
+
self.assertEqual(len(pods_status.recovered), 0)
|
|
211
|
+
self.background_delete_ns(namespace)
|
|
212
|
+
|
|
213
|
+
def test_pods_by_label_never_respawn(self):
|
|
214
|
+
# test pod will never recover
|
|
215
|
+
namespace = "test-ns-4-" + self.get_random_string(10)
|
|
216
|
+
delayed_1 = "delayed-4-" + self.get_random_string(10)
|
|
217
|
+
delayed_2 = "delayed-4-" + self.get_random_string(10)
|
|
218
|
+
label = "readiness-" + self.get_random_string(5)
|
|
219
|
+
self.deploy_namespace(namespace, [])
|
|
220
|
+
self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
|
|
221
|
+
self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
|
|
222
|
+
|
|
223
|
+
while not self.lib_k8s.is_pod_running(
|
|
224
|
+
delayed_1, namespace
|
|
225
|
+
) and not self.lib_k8s.is_pod_running(delayed_2, namespace):
|
|
226
|
+
time.sleep(1)
|
|
227
|
+
continue
|
|
228
|
+
time.sleep(3)
|
|
229
|
+
|
|
230
|
+
monitor_timeout = 15
|
|
231
|
+
|
|
232
|
+
future = select_and_monitor_by_label(
|
|
233
|
+
label_selector=f"test={label}",
|
|
234
|
+
max_timeout=monitor_timeout,
|
|
235
|
+
v1_client=self.lib_k8s.cli,
|
|
236
|
+
)
|
|
237
|
+
self.background_delete_pod(delayed_1, namespace)
|
|
238
|
+
snapshot = future.result()
|
|
239
|
+
# print(f"\nRunning test ID: {self.id()}")
|
|
240
|
+
# print(json.dumps(snapshot.to_dict(), indent=True))
|
|
241
|
+
pods_status = snapshot.get_pods_status()
|
|
242
|
+
|
|
243
|
+
self.assertEqual(len(pods_status.unrecovered), 1)
|
|
244
|
+
self.assertEqual(len(pods_status.recovered), 0)
|
|
245
|
+
self.background_delete_ns(namespace)
|
|
246
|
+
|
|
247
|
+
def test_pods_by_label_multiple_respawn(self):
|
|
248
|
+
# test pod will never recover
|
|
249
|
+
namespace = "test-ns-4-" + self.get_random_string(10)
|
|
250
|
+
delayed_1 = "delayed-4-" + self.get_random_string(10)
|
|
251
|
+
delayed_2 = "delayed-4-" + self.get_random_string(10)
|
|
252
|
+
delayed_3 = "delayed-4-" + self.get_random_string(10)
|
|
253
|
+
delayed_respawn_1 = "delayed-4-respawn-" + self.get_random_string(10)
|
|
254
|
+
delayed_respawn_2 = "delayed-4-respawn-" + self.get_random_string(10)
|
|
255
|
+
label = "readiness-" + self.get_random_string(5)
|
|
256
|
+
self.deploy_namespace(namespace, [])
|
|
257
|
+
self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
|
|
258
|
+
self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
|
|
259
|
+
self.deploy_delayed_readiness_pod(delayed_3, namespace, 0, label)
|
|
260
|
+
while (
|
|
261
|
+
not self.lib_k8s.is_pod_running(delayed_1, namespace)
|
|
262
|
+
and not self.lib_k8s.is_pod_running(delayed_2, namespace)
|
|
263
|
+
and not self.lib_k8s.is_pod_running(delayed_3, namespace)
|
|
264
|
+
):
|
|
265
|
+
time.sleep(1)
|
|
266
|
+
continue
|
|
267
|
+
time.sleep(3)
|
|
268
|
+
|
|
269
|
+
monitor_timeout = 20
|
|
270
|
+
pod_delay = 2
|
|
271
|
+
|
|
272
|
+
future = select_and_monitor_by_label(
|
|
273
|
+
label_selector=f"test={label}",
|
|
274
|
+
max_timeout=monitor_timeout,
|
|
275
|
+
v1_client=self.lib_k8s.cli,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
self.background_delete_pod(delayed_1, namespace)
|
|
279
|
+
self.background_delete_pod(delayed_2, namespace)
|
|
280
|
+
# to prevent the pod scheduling happening before the deletion
|
|
281
|
+
# event that in a real world scenario can't happen
|
|
282
|
+
# (eg. replicaset or deployment)
|
|
283
|
+
time.sleep(1)
|
|
284
|
+
self.deploy_delayed_readiness_pod(
|
|
285
|
+
delayed_respawn_1, namespace, pod_delay, label
|
|
286
|
+
)
|
|
287
|
+
# introduce a delay in the next recovering pod to check
|
|
288
|
+
# if delayed recoveries are captured
|
|
289
|
+
time.sleep(2)
|
|
290
|
+
self.deploy_delayed_readiness_pod(
|
|
291
|
+
delayed_respawn_2, namespace, pod_delay, label
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
snapshot = future.result()
|
|
295
|
+
# print(f"\nRunning test ID: {self.id()}")
|
|
296
|
+
# print(json.dumps(snapshot.to_dict(), indent=True))
|
|
297
|
+
pods_status = snapshot.get_pods_status()
|
|
298
|
+
|
|
299
|
+
self.background_delete_pod(delayed_3, namespace)
|
|
300
|
+
self.background_delete_pod(delayed_respawn_1, namespace)
|
|
301
|
+
self.background_delete_pod(delayed_respawn_2, namespace)
|
|
302
|
+
|
|
303
|
+
self.assertEqual(len(pods_status.unrecovered), 0)
|
|
304
|
+
self.assertEqual(len(pods_status.recovered), 2)
|
|
305
|
+
self.assertTrue(
|
|
306
|
+
delayed_respawn_1 in [p.pod_name for p in pods_status.recovered]
|
|
307
|
+
)
|
|
308
|
+
self.assertTrue(
|
|
309
|
+
delayed_respawn_2 in [p.pod_name for p in pods_status.recovered]
|
|
310
|
+
)
|
|
311
|
+
self.background_delete_ns(namespace)
|
|
312
|
+
|
|
313
|
+
def test_pods_by_label_multiple_respawn_one_too_late(self):
|
|
314
|
+
# flaky
|
|
315
|
+
# test pod will never recover
|
|
316
|
+
namespace = "test-ns-4-" + self.get_random_string(10)
|
|
317
|
+
delayed_1 = "delayed-4-" + self.get_random_string(10)
|
|
318
|
+
delayed_2 = "delayed-4-" + self.get_random_string(10)
|
|
319
|
+
delayed_3 = "delayed-4-" + self.get_random_string(10)
|
|
320
|
+
delayed_respawn_1 = "delayed-4-respawn-" + self.get_random_string(10)
|
|
321
|
+
delayed_respawn_2 = "delayed-4-respawn-" + self.get_random_string(10)
|
|
322
|
+
label = "readiness-" + self.get_random_string(5)
|
|
323
|
+
self.deploy_namespace(namespace, [])
|
|
324
|
+
self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
|
|
325
|
+
self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
|
|
326
|
+
self.deploy_delayed_readiness_pod(delayed_3, namespace, 0, label)
|
|
327
|
+
while (
|
|
328
|
+
not self.lib_k8s.is_pod_running(delayed_1, namespace)
|
|
329
|
+
and not self.lib_k8s.is_pod_running(delayed_2, namespace)
|
|
330
|
+
and not self.lib_k8s.is_pod_running(delayed_3, namespace)
|
|
331
|
+
):
|
|
332
|
+
time.sleep(1)
|
|
333
|
+
continue
|
|
334
|
+
time.sleep(3)
|
|
335
|
+
|
|
336
|
+
monitor_timeout = 20
|
|
337
|
+
pod_delay = 0
|
|
338
|
+
pod_too_much_delay = 25
|
|
339
|
+
future = select_and_monitor_by_label(
|
|
340
|
+
label_selector=f"test={label}",
|
|
341
|
+
max_timeout=monitor_timeout,
|
|
342
|
+
v1_client=self.lib_k8s.cli,
|
|
343
|
+
)
|
|
344
|
+
self.background_delete_pod(delayed_1, namespace)
|
|
345
|
+
self.background_delete_pod(delayed_2, namespace)
|
|
346
|
+
# to prevent the pod scheduling happening before the deletion
|
|
347
|
+
# event that in a real world scenario can't happen
|
|
348
|
+
# (eg. replicaset or deployment)
|
|
349
|
+
time.sleep(1)
|
|
350
|
+
self.deploy_delayed_readiness_pod(
|
|
351
|
+
delayed_respawn_1, namespace, pod_delay, label
|
|
352
|
+
)
|
|
353
|
+
self.deploy_delayed_readiness_pod(
|
|
354
|
+
delayed_respawn_2, namespace, pod_too_much_delay, label
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
snapshot = future.result()
|
|
358
|
+
# print(f"\nRunning test ID: {self.id()}")
|
|
359
|
+
# print(json.dumps(snapshot.to_dict(), indent=True))
|
|
360
|
+
pods_status = snapshot.get_pods_status()
|
|
361
|
+
self.background_delete_ns(namespace)
|
|
362
|
+
|
|
363
|
+
self.assertEqual(len(pods_status.unrecovered), 1)
|
|
364
|
+
self.assertEqual(len(pods_status.recovered), 1)
|
|
365
|
+
self.assertTrue(
|
|
366
|
+
delayed_respawn_1 in [p.pod_name for p in pods_status.recovered]
|
|
367
|
+
)
|
|
368
|
+
self.assertTrue(
|
|
369
|
+
delayed_respawn_2 in [p.pod_name for p in pods_status.unrecovered]
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
def test_pods_by_label_multiple_respawn_one_fails(self):
|
|
373
|
+
# test pod will never recover
|
|
374
|
+
namespace = "test-ns-4-" + self.get_random_string(10)
|
|
375
|
+
delayed_1 = "delayed-4-" + self.get_random_string(10)
|
|
376
|
+
delayed_2 = "delayed-4-" + self.get_random_string(10)
|
|
377
|
+
delayed_3 = "delayed-4-" + self.get_random_string(10)
|
|
378
|
+
delayed_respawn_1 = "delayed-4-respawn-" + self.get_random_string(10)
|
|
379
|
+
label = "readiness-" + self.get_random_string(5)
|
|
380
|
+
self.deploy_namespace(namespace, [])
|
|
381
|
+
self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
|
|
382
|
+
self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
|
|
383
|
+
self.deploy_delayed_readiness_pod(delayed_3, namespace, 0, label)
|
|
384
|
+
while (
|
|
385
|
+
not self.lib_k8s.is_pod_running(delayed_1, namespace)
|
|
386
|
+
and not self.lib_k8s.is_pod_running(delayed_2, namespace)
|
|
387
|
+
and not self.lib_k8s.is_pod_running(delayed_3, namespace)
|
|
388
|
+
):
|
|
389
|
+
time.sleep(1)
|
|
390
|
+
continue
|
|
391
|
+
time.sleep(3)
|
|
392
|
+
|
|
393
|
+
monitor_timeout = 10
|
|
394
|
+
pod_delay = 1
|
|
395
|
+
future = select_and_monitor_by_label(
|
|
396
|
+
label_selector=f"test={label}",
|
|
397
|
+
max_timeout=monitor_timeout,
|
|
398
|
+
v1_client=self.lib_k8s.cli,
|
|
399
|
+
)
|
|
400
|
+
self.background_delete_pod(delayed_1, namespace)
|
|
401
|
+
self.background_delete_pod(delayed_2, namespace)
|
|
402
|
+
time.sleep(0.1)
|
|
403
|
+
self.deploy_delayed_readiness_pod(
|
|
404
|
+
delayed_respawn_1, namespace, pod_delay, label
|
|
405
|
+
)
|
|
406
|
+
snapshot = future.result()
|
|
407
|
+
# print(f"\nRunning test ID: {self.id()}")
|
|
408
|
+
# print(json.dumps(snapshot.to_dict(), indent=True))
|
|
409
|
+
pods_status = snapshot.get_pods_status()
|
|
410
|
+
self.background_delete_ns(namespace)
|
|
411
|
+
self.assertEqual(len(pods_status.unrecovered), 1)
|
|
412
|
+
self.assertEqual(len(pods_status.recovered), 1)
|
|
413
|
+
|
|
414
|
+
self.assertTrue(
|
|
415
|
+
delayed_respawn_1 in [p.pod_name for p in pods_status.recovered]
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
def test_pods_becoming_not_ready(self):
|
|
419
|
+
# test pod will never recover
|
|
420
|
+
namespace = "test-ns-5-" + self.get_random_string(10)
|
|
421
|
+
delayed_1 = "delayed-5-" + self.get_random_string(10)
|
|
422
|
+
self.deploy_namespace(namespace, [])
|
|
423
|
+
self.deploy_nginx(namespace, delayed_1)
|
|
424
|
+
while not self.lib_k8s.is_pod_running(delayed_1, namespace):
|
|
425
|
+
time.sleep(1)
|
|
426
|
+
continue
|
|
427
|
+
time.sleep(3)
|
|
428
|
+
|
|
429
|
+
monitor_timeout = 20
|
|
430
|
+
|
|
431
|
+
future = select_and_monitor_by_name_pattern_and_namespace_pattern(
|
|
432
|
+
delayed_1,
|
|
433
|
+
namespace,
|
|
434
|
+
max_timeout=monitor_timeout,
|
|
435
|
+
v1_client=self.lib_k8s.cli,
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
self.lib_k8s.exec_cmd_in_pod(["kill 1"], delayed_1, namespace)
|
|
439
|
+
snapshot = future.result()
|
|
440
|
+
|
|
441
|
+
pods_status = snapshot.get_pods_status()
|
|
442
|
+
self.background_delete_ns(namespace)
|
|
443
|
+
self.assertEqual(len(pods_status.recovered), 1)
|
|
444
|
+
self.assertEqual(pods_status.recovered[0].pod_rescheduling_time, None)
|
|
445
|
+
self.assertGreater(pods_status.recovered[0].pod_readiness_time, 0)
|
|
446
|
+
|
|
447
|
+
def test_monitor_stopping_earlier(self):
|
|
448
|
+
|
|
449
|
+
# tests that the monitor deadlines:
|
|
450
|
+
# - if the monitored pods status changes and is restored
|
|
451
|
+
# before than the 120 seconds deadline the monitor returns earlier
|
|
452
|
+
# the assertions checks that the monitor returns within 10 seconds
|
|
453
|
+
# 120 - (end-start) >= 110
|
|
454
|
+
# - if no change is made in the set of monitor pods the monitor is
|
|
455
|
+
# forced to wait all the time set
|
|
456
|
+
|
|
457
|
+
namespace = "test-ns-6-" + self.get_random_string(10)
|
|
458
|
+
delayed_1 = "delayed-6-" + self.get_random_string(10)
|
|
459
|
+
delayed_respawn_1 = "delayed-6-respawn-" + self.get_random_string(10)
|
|
460
|
+
label = "readiness-" + self.get_random_string(5)
|
|
461
|
+
self.deploy_namespace(namespace, [])
|
|
462
|
+
self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
|
|
463
|
+
pod_delay = 3
|
|
464
|
+
while not self.lib_k8s.is_pod_running(delayed_1, namespace):
|
|
465
|
+
time.sleep(1)
|
|
466
|
+
continue
|
|
467
|
+
time.sleep(3)
|
|
468
|
+
|
|
469
|
+
monitor_timeout = 120
|
|
470
|
+
|
|
471
|
+
start_time = time.time()
|
|
472
|
+
|
|
473
|
+
future = select_and_monitor_by_label(
|
|
474
|
+
label_selector=f"test={label}",
|
|
475
|
+
max_timeout=monitor_timeout,
|
|
476
|
+
v1_client=self.lib_k8s.cli,
|
|
477
|
+
)
|
|
478
|
+
self.deploy_delayed_readiness_pod(
|
|
479
|
+
delayed_respawn_1, namespace, pod_delay, label
|
|
480
|
+
)
|
|
481
|
+
_ = future.result()
|
|
482
|
+
end_time = time.time()
|
|
483
|
+
|
|
484
|
+
self.assertGreater(monitor_timeout - (end_time - start_time), 110)
|
|
485
|
+
|
|
486
|
+
def test_monitor_forced_to_wait_with_no_status_change(self):
|
|
487
|
+
# tests that the monitor deadlines:
|
|
488
|
+
# - if no change is made in the set of monitored pods the monitor is
|
|
489
|
+
# forced to wait all the time set in case something happens
|
|
490
|
+
|
|
491
|
+
namespace = "test-ns-7-" + self.get_random_string(10)
|
|
492
|
+
delayed_1 = "delayed-7" + self.get_random_string(10)
|
|
493
|
+
label = "readiness-" + self.get_random_string(5)
|
|
494
|
+
self.deploy_namespace(namespace, [])
|
|
495
|
+
self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
|
|
496
|
+
while not self.lib_k8s.is_pod_running(delayed_1, namespace):
|
|
497
|
+
time.sleep(1)
|
|
498
|
+
continue
|
|
499
|
+
time.sleep(3)
|
|
500
|
+
|
|
501
|
+
monitor_timeout = 20
|
|
502
|
+
|
|
503
|
+
start_time = time.time()
|
|
504
|
+
|
|
505
|
+
future = select_and_monitor_by_label(
|
|
506
|
+
label_selector=f"test={label}",
|
|
507
|
+
max_timeout=monitor_timeout,
|
|
508
|
+
v1_client=self.lib_k8s.cli,
|
|
509
|
+
)
|
|
510
|
+
_ = future.result()
|
|
511
|
+
end_time = time.time()
|
|
512
|
+
|
|
513
|
+
self.assertGreaterEqual((end_time - start_time), monitor_timeout)
|