krkn-lib 5.1.4__py3-none-any.whl → 5.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -549,7 +549,7 @@ class BaseTest(unittest.TestCase):
549
549
  "kernel_version": "5.4.0-66-generic",
550
550
  "kubelet_version": "v2.1.2",
551
551
  "os_version": "Linux",
552
- "nodes_type": "master"
552
+ "nodes_type": "master",
553
553
  }
554
554
  ],
555
555
  "node_taints": [
@@ -573,36 +573,37 @@ class BaseTest(unittest.TestCase):
573
573
  }
574
574
  ],
575
575
  "virt_checks": [
576
- {
577
- "node_name": "h03-r660",
578
- "namespace": "benchmark-runner",
579
- "ip_address": "0.0.0.0",
580
- "vm_name": "windows-vm-50",
581
- "status": True,
582
- "start_timestamp": "2025-03-12T14:57:34.555878",
583
- "end_timestamp": "2025-03-12T14:57:54.904352",
584
- "duration": 20.348474
585
- },
586
- {
587
- "node_name": "h27-r660",
588
- "namespace": "benchmark-runner",
589
- "vm_name": "windows-vm-51",
590
- "ip_address": "0.0.0.1",
591
- "status": True,
592
- "start_timestamp": "2025-03-12T14:57:34.759105",
593
- "end_timestamp": "2025-03-12T14:57:54.904352",
594
- "duration": 20.145247
595
- },
596
- {
597
- "node_name": "h10-r660",
598
- "namespace": "benchmark-runner",
599
- "vm_name": "windows-vm-52",
600
- "ip_address": "0.0.0.2",
601
- "status": False,
602
- "start_timestamp": "2025-03-12T14:57:35.308957",
603
- "end_timestamp": "2025-03-12T14:57:54.904352",
604
- "duration": 19.595395
605
- }],
576
+ {
577
+ "node_name": "h03-r660",
578
+ "namespace": "benchmark-runner",
579
+ "ip_address": "0.0.0.0",
580
+ "vm_name": "windows-vm-50",
581
+ "status": True,
582
+ "start_timestamp": "2025-03-12T14:57:34.555878",
583
+ "end_timestamp": "2025-03-12T14:57:54.904352",
584
+ "duration": 20.348474,
585
+ },
586
+ {
587
+ "node_name": "h27-r660",
588
+ "namespace": "benchmark-runner",
589
+ "vm_name": "windows-vm-51",
590
+ "ip_address": "0.0.0.1",
591
+ "status": True,
592
+ "start_timestamp": "2025-03-12T14:57:34.759105",
593
+ "end_timestamp": "2025-03-12T14:57:54.904352",
594
+ "duration": 20.145247,
595
+ },
596
+ {
597
+ "node_name": "h10-r660",
598
+ "namespace": "benchmark-runner",
599
+ "vm_name": "windows-vm-52",
600
+ "ip_address": "0.0.0.2",
601
+ "status": False,
602
+ "start_timestamp": "2025-03-12T14:57:35.308957",
603
+ "end_timestamp": "2025-03-12T14:57:54.904352",
604
+ "duration": 19.595395,
605
+ },
606
+ ],
606
607
  "total_node_count": 3,
607
608
  "cloud_infrastructure": "AWS",
608
609
  "cloud_type": "EC2",
@@ -238,15 +238,16 @@ class TestKrknElasticModels(BaseTest):
238
238
  elastic_telemetry.virt_checks[0].end_timestamp,
239
239
  datetime.datetime.fromisoformat("2025-03-12T14:57:54.904352"),
240
240
  )
241
- self.assertEqual(
242
- elastic_telemetry.virt_checks[0].duration, 20.348474
243
- )
241
+ self.assertEqual(elastic_telemetry.virt_checks[0].duration, 20.348474)
244
242
 
245
243
  self.assertEqual(elastic_telemetry.total_node_count, 3)
246
244
  self.assertEqual(elastic_telemetry.cloud_infrastructure, "AWS")
247
245
  self.assertEqual(elastic_telemetry.cloud_type, "EC2")
248
246
  self.assertEqual(elastic_telemetry.run_uuid, run_uuid)
249
- self.assertEqual(elastic_telemetry.build_url, "https://github.com/krkn-chaos/krkn-lib/actions/runs/16724993547")
247
+ self.assertEqual(
248
+ elastic_telemetry.build_url,
249
+ "https://github.com/krkn-chaos/krkn-lib/actions/runs/16724993547",
250
+ )
250
251
 
251
252
  def test_ElasticChaosRunTelemetry(self):
252
253
  run_uuid = str(uuid.uuid4())
@@ -0,0 +1,513 @@
1
+ import json
2
+ import time
3
+
4
+ from krkn_lib.k8s.pod_monitor import (
5
+ select_and_monitor_by_label,
6
+ select_and_monitor_by_name_pattern_and_namespace_pattern,
7
+ select_and_monitor_by_namespace_pattern_and_label,
8
+ )
9
+ from krkn_lib.tests import BaseTest
10
+
11
+
12
+ class TestKrknKubernetesPodsMonitor(BaseTest):
13
+ def test_monitor_pods_by_label_no_pods_affected(self):
14
+ # test no pods affected
15
+ namespace = "test-ns-0-" + self.get_random_string(10)
16
+ delayed_1 = "delayed-0-" + self.get_random_string(10)
17
+ delayed_2 = "delayed-0-" + self.get_random_string(10)
18
+ label = "readiness-" + self.get_random_string(5)
19
+ self.deploy_namespace(namespace, [])
20
+ self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
21
+ self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
22
+
23
+ while not self.lib_k8s.is_pod_running(
24
+ delayed_1, namespace
25
+ ) and not self.lib_k8s.is_pod_running(delayed_2, namespace):
26
+ time.sleep(1)
27
+ continue
28
+ time.sleep(3)
29
+
30
+ monitor_timeout = 2
31
+
32
+ start_time = time.time()
33
+
34
+ future = select_and_monitor_by_label(
35
+ label_selector=f"test={label}",
36
+ max_timeout=monitor_timeout,
37
+ v1_client=self.lib_k8s.cli,
38
+ )
39
+ snapshot = future.result()
40
+ end_time = time.time()
41
+ pods_status = snapshot.get_pods_status()
42
+ self.background_delete_pod(delayed_1, namespace)
43
+ self.background_delete_pod(delayed_2, namespace)
44
+ # added half second of delay that might be introduced to API
45
+ # calls
46
+ self.assertAlmostEqual(end_time - start_time, monitor_timeout, 0)
47
+
48
+ self.assertEqual(len(pods_status.recovered), 0)
49
+ self.assertEqual(len(pods_status.unrecovered), 0)
50
+ self.background_delete_ns(namespace)
51
+
52
+ def test_pods_by_name_and_namespace_pattern_different_names_respawn(
53
+ self,
54
+ ):
55
+ # test pod with different name recovered
56
+ namespace_random_pattern = "test-ns-1-" + self.get_random_string(3)
57
+ namespace = f"{namespace_random_pattern}-" + self.get_random_string(10)
58
+ delayed_1 = "delayed-1-" + self.get_random_string(10)
59
+ delayed_2 = "delayed-1-" + self.get_random_string(10)
60
+ delayed_respawn = "delayed-1-respawn-" + self.get_random_string(10)
61
+ label = "readiness-" + self.get_random_string(5)
62
+ pod_delay = 1
63
+ monitor_timeout = 10
64
+ self.deploy_namespace(namespace, [])
65
+ self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
66
+ self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
67
+ while not self.lib_k8s.is_pod_running(
68
+ delayed_1, namespace
69
+ ) and not self.lib_k8s.is_pod_running(delayed_2, namespace):
70
+ time.sleep(1)
71
+ continue
72
+ time.sleep(3)
73
+
74
+ future = select_and_monitor_by_name_pattern_and_namespace_pattern(
75
+ pod_name_pattern="^delayed-1-.*",
76
+ namespace_pattern=f"^{namespace_random_pattern}-.*",
77
+ max_timeout=monitor_timeout,
78
+ v1_client=self.lib_k8s.cli,
79
+ )
80
+
81
+ self.background_delete_pod(delayed_1, namespace)
82
+ # to prevent the pod scheduling happening before the deletion
83
+ # event that in a real world scenario
84
+ # can't happen (eg. replicaset or deployment)
85
+ time.sleep(1)
86
+ self.deploy_delayed_readiness_pod(
87
+ delayed_respawn, namespace, pod_delay, label
88
+ )
89
+
90
+ while not self.lib_k8s.is_pod_running(
91
+ delayed_1, namespace
92
+ ) and not self.lib_k8s.is_pod_running(delayed_respawn, namespace):
93
+ time.sleep(1)
94
+ continue
95
+ time.sleep(3)
96
+
97
+ snapshot = future.result()
98
+ print(f"\nRunning test ID: {self.id()}")
99
+ print(json.dumps(snapshot.to_dict(), indent=True))
100
+ pods_status = snapshot.get_pods_status()
101
+
102
+ self.assertEqual(len(pods_status.recovered), 1)
103
+ self.assertEqual(pods_status.recovered[0].pod_name, delayed_respawn)
104
+ self.assertEqual(pods_status.recovered[0].namespace, namespace)
105
+ self.assertTrue(pods_status.recovered[0].pod_readiness_time > 0)
106
+ self.assertTrue(pods_status.recovered[0].pod_rescheduling_time > 0)
107
+ self.assertTrue(
108
+ pods_status.recovered[0].total_recovery_time >= pod_delay
109
+ )
110
+ self.assertEqual(len(pods_status.unrecovered), 0)
111
+ self.background_delete_ns(namespace)
112
+
113
+ def test_pods_by_namespace_pattern_and_label_same_name_respawn(
114
+ self,
115
+ ):
116
+ # flaky
117
+ # test pod with same name recovered
118
+ namespace = "test-ns-2-" + self.get_random_string(10)
119
+ delayed_1 = "delayed-2-1-" + self.get_random_string(10)
120
+ delayed_2 = "delayed-2-2-" + self.get_random_string(10)
121
+ label = "readiness-" + self.get_random_string(5)
122
+ self.deploy_namespace(namespace, [])
123
+ self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
124
+ self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
125
+ self.wait_pod(delayed_1, namespace)
126
+ self.wait_pod(delayed_2, namespace)
127
+ monitor_timeout = 45
128
+ pod_delay = 0
129
+
130
+ future = select_and_monitor_by_namespace_pattern_and_label(
131
+ namespace_pattern="^test-ns-2-.*",
132
+ label_selector=f"test={label}",
133
+ max_timeout=monitor_timeout,
134
+ v1_client=self.lib_k8s.cli,
135
+ )
136
+
137
+ self.lib_k8s.delete_pod(delayed_1, namespace)
138
+ # to prevent the pod scheduling happening before the deletion
139
+ # event that in a real world scenario can't happen
140
+ # (eg. replicaset or deployment)
141
+ time.sleep(1)
142
+ self.deploy_delayed_readiness_pod(
143
+ delayed_1, namespace, pod_delay, label
144
+ )
145
+
146
+ while not self.lib_k8s.is_pod_running(delayed_1, namespace):
147
+ time.sleep(1)
148
+ continue
149
+ time.sleep(3)
150
+
151
+ snapshot = future.result()
152
+ # print(f"\nRunning test ID: {self.id()}")
153
+ # print(json.dumps(snapshot.to_dict(), indent=True))
154
+ pods_status = snapshot.get_pods_status()
155
+ self.background_delete_ns(namespace)
156
+ self.assertEqual(len(pods_status.recovered), 1)
157
+ self.assertEqual(pods_status.recovered[0].pod_name, delayed_1)
158
+ self.assertEqual(pods_status.recovered[0].namespace, namespace)
159
+ self.assertTrue(pods_status.recovered[0].pod_readiness_time > 0)
160
+ self.assertTrue(pods_status.recovered[0].pod_rescheduling_time > 0)
161
+ self.assertTrue(
162
+ pods_status.recovered[0].total_recovery_time >= pod_delay
163
+ )
164
+ self.assertEqual(len(pods_status.unrecovered), 0)
165
+
166
+ def test_pods_by_label_respawn_timeout(self):
167
+ # test pod will not recover before the timeout
168
+ namespace = "test-ns-3-" + self.get_random_string(10)
169
+ delayed_1 = "delayed-3-" + self.get_random_string(10)
170
+ delayed_2 = "delayed-3-" + self.get_random_string(10)
171
+ delayed_respawn = "delayed-respawn-3-" + self.get_random_string(10)
172
+ label = "readiness-" + self.get_random_string(5)
173
+
174
+ self.deploy_namespace(namespace, [])
175
+ self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
176
+ self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
177
+ while not self.lib_k8s.is_pod_running(
178
+ delayed_1, namespace
179
+ ) and not self.lib_k8s.is_pod_running(delayed_2, namespace):
180
+ time.sleep(1)
181
+ continue
182
+ time.sleep(3)
183
+
184
+ monitor_timeout = 20
185
+ pod_delay = 21
186
+
187
+ future = select_and_monitor_by_label(
188
+ label_selector=f"test={label}",
189
+ max_timeout=monitor_timeout,
190
+ v1_client=self.lib_k8s.cli,
191
+ )
192
+
193
+ self.background_delete_pod(delayed_1, namespace)
194
+ # to prevent the pod scheduling happening before the deletion
195
+ # event that in a real world scenario can't happen
196
+ # (eg. replicaset or deployment)
197
+ time.sleep(1)
198
+ self.deploy_delayed_readiness_pod(
199
+ delayed_respawn, namespace, pod_delay, label
200
+ )
201
+
202
+ snapshot = future.result()
203
+ # print(f"\nRunning test ID: {self.id()}")
204
+ # print(json.dumps(snapshot.to_dict(), indent=True))
205
+ pods_status = snapshot.get_pods_status()
206
+
207
+ self.assertEqual(len(pods_status.unrecovered), 1)
208
+ self.assertEqual(pods_status.unrecovered[0].pod_name, delayed_respawn)
209
+ self.assertEqual(pods_status.unrecovered[0].namespace, namespace)
210
+ self.assertEqual(len(pods_status.recovered), 0)
211
+ self.background_delete_ns(namespace)
212
+
213
+ def test_pods_by_label_never_respawn(self):
214
+ # test pod will never recover
215
+ namespace = "test-ns-4-" + self.get_random_string(10)
216
+ delayed_1 = "delayed-4-" + self.get_random_string(10)
217
+ delayed_2 = "delayed-4-" + self.get_random_string(10)
218
+ label = "readiness-" + self.get_random_string(5)
219
+ self.deploy_namespace(namespace, [])
220
+ self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
221
+ self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
222
+
223
+ while not self.lib_k8s.is_pod_running(
224
+ delayed_1, namespace
225
+ ) and not self.lib_k8s.is_pod_running(delayed_2, namespace):
226
+ time.sleep(1)
227
+ continue
228
+ time.sleep(3)
229
+
230
+ monitor_timeout = 15
231
+
232
+ future = select_and_monitor_by_label(
233
+ label_selector=f"test={label}",
234
+ max_timeout=monitor_timeout,
235
+ v1_client=self.lib_k8s.cli,
236
+ )
237
+ self.background_delete_pod(delayed_1, namespace)
238
+ snapshot = future.result()
239
+ # print(f"\nRunning test ID: {self.id()}")
240
+ # print(json.dumps(snapshot.to_dict(), indent=True))
241
+ pods_status = snapshot.get_pods_status()
242
+
243
+ self.assertEqual(len(pods_status.unrecovered), 1)
244
+ self.assertEqual(len(pods_status.recovered), 0)
245
+ self.background_delete_ns(namespace)
246
+
247
+ def test_pods_by_label_multiple_respawn(self):
248
+ # test pod will never recover
249
+ namespace = "test-ns-4-" + self.get_random_string(10)
250
+ delayed_1 = "delayed-4-" + self.get_random_string(10)
251
+ delayed_2 = "delayed-4-" + self.get_random_string(10)
252
+ delayed_3 = "delayed-4-" + self.get_random_string(10)
253
+ delayed_respawn_1 = "delayed-4-respawn-" + self.get_random_string(10)
254
+ delayed_respawn_2 = "delayed-4-respawn-" + self.get_random_string(10)
255
+ label = "readiness-" + self.get_random_string(5)
256
+ self.deploy_namespace(namespace, [])
257
+ self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
258
+ self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
259
+ self.deploy_delayed_readiness_pod(delayed_3, namespace, 0, label)
260
+ while (
261
+ not self.lib_k8s.is_pod_running(delayed_1, namespace)
262
+ and not self.lib_k8s.is_pod_running(delayed_2, namespace)
263
+ and not self.lib_k8s.is_pod_running(delayed_3, namespace)
264
+ ):
265
+ time.sleep(1)
266
+ continue
267
+ time.sleep(3)
268
+
269
+ monitor_timeout = 20
270
+ pod_delay = 2
271
+
272
+ future = select_and_monitor_by_label(
273
+ label_selector=f"test={label}",
274
+ max_timeout=monitor_timeout,
275
+ v1_client=self.lib_k8s.cli,
276
+ )
277
+
278
+ self.background_delete_pod(delayed_1, namespace)
279
+ self.background_delete_pod(delayed_2, namespace)
280
+ # to prevent the pod scheduling happening before the deletion
281
+ # event that in a real world scenario can't happen
282
+ # (eg. replicaset or deployment)
283
+ time.sleep(1)
284
+ self.deploy_delayed_readiness_pod(
285
+ delayed_respawn_1, namespace, pod_delay, label
286
+ )
287
+ # introduce a delay in the next recovering pod to check
288
+ # if delayed recoveries are captured
289
+ time.sleep(2)
290
+ self.deploy_delayed_readiness_pod(
291
+ delayed_respawn_2, namespace, pod_delay, label
292
+ )
293
+
294
+ snapshot = future.result()
295
+ # print(f"\nRunning test ID: {self.id()}")
296
+ # print(json.dumps(snapshot.to_dict(), indent=True))
297
+ pods_status = snapshot.get_pods_status()
298
+
299
+ self.background_delete_pod(delayed_3, namespace)
300
+ self.background_delete_pod(delayed_respawn_1, namespace)
301
+ self.background_delete_pod(delayed_respawn_2, namespace)
302
+
303
+ self.assertEqual(len(pods_status.unrecovered), 0)
304
+ self.assertEqual(len(pods_status.recovered), 2)
305
+ self.assertTrue(
306
+ delayed_respawn_1 in [p.pod_name for p in pods_status.recovered]
307
+ )
308
+ self.assertTrue(
309
+ delayed_respawn_2 in [p.pod_name for p in pods_status.recovered]
310
+ )
311
+ self.background_delete_ns(namespace)
312
+
313
+ def test_pods_by_label_multiple_respawn_one_too_late(self):
314
+ # flaky
315
+ # test pod will never recover
316
+ namespace = "test-ns-4-" + self.get_random_string(10)
317
+ delayed_1 = "delayed-4-" + self.get_random_string(10)
318
+ delayed_2 = "delayed-4-" + self.get_random_string(10)
319
+ delayed_3 = "delayed-4-" + self.get_random_string(10)
320
+ delayed_respawn_1 = "delayed-4-respawn-" + self.get_random_string(10)
321
+ delayed_respawn_2 = "delayed-4-respawn-" + self.get_random_string(10)
322
+ label = "readiness-" + self.get_random_string(5)
323
+ self.deploy_namespace(namespace, [])
324
+ self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
325
+ self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
326
+ self.deploy_delayed_readiness_pod(delayed_3, namespace, 0, label)
327
+ while (
328
+ not self.lib_k8s.is_pod_running(delayed_1, namespace)
329
+ and not self.lib_k8s.is_pod_running(delayed_2, namespace)
330
+ and not self.lib_k8s.is_pod_running(delayed_3, namespace)
331
+ ):
332
+ time.sleep(1)
333
+ continue
334
+ time.sleep(3)
335
+
336
+ monitor_timeout = 20
337
+ pod_delay = 0
338
+ pod_too_much_delay = 25
339
+ future = select_and_monitor_by_label(
340
+ label_selector=f"test={label}",
341
+ max_timeout=monitor_timeout,
342
+ v1_client=self.lib_k8s.cli,
343
+ )
344
+ self.background_delete_pod(delayed_1, namespace)
345
+ self.background_delete_pod(delayed_2, namespace)
346
+ # to prevent the pod scheduling happening before the deletion
347
+ # event that in a real world scenario can't happen
348
+ # (eg. replicaset or deployment)
349
+ time.sleep(1)
350
+ self.deploy_delayed_readiness_pod(
351
+ delayed_respawn_1, namespace, pod_delay, label
352
+ )
353
+ self.deploy_delayed_readiness_pod(
354
+ delayed_respawn_2, namespace, pod_too_much_delay, label
355
+ )
356
+
357
+ snapshot = future.result()
358
+ # print(f"\nRunning test ID: {self.id()}")
359
+ # print(json.dumps(snapshot.to_dict(), indent=True))
360
+ pods_status = snapshot.get_pods_status()
361
+ self.background_delete_ns(namespace)
362
+
363
+ self.assertEqual(len(pods_status.unrecovered), 1)
364
+ self.assertEqual(len(pods_status.recovered), 1)
365
+ self.assertTrue(
366
+ delayed_respawn_1 in [p.pod_name for p in pods_status.recovered]
367
+ )
368
+ self.assertTrue(
369
+ delayed_respawn_2 in [p.pod_name for p in pods_status.unrecovered]
370
+ )
371
+
372
+ def test_pods_by_label_multiple_respawn_one_fails(self):
373
+ # test pod will never recover
374
+ namespace = "test-ns-4-" + self.get_random_string(10)
375
+ delayed_1 = "delayed-4-" + self.get_random_string(10)
376
+ delayed_2 = "delayed-4-" + self.get_random_string(10)
377
+ delayed_3 = "delayed-4-" + self.get_random_string(10)
378
+ delayed_respawn_1 = "delayed-4-respawn-" + self.get_random_string(10)
379
+ label = "readiness-" + self.get_random_string(5)
380
+ self.deploy_namespace(namespace, [])
381
+ self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
382
+ self.deploy_delayed_readiness_pod(delayed_2, namespace, 0, label)
383
+ self.deploy_delayed_readiness_pod(delayed_3, namespace, 0, label)
384
+ while (
385
+ not self.lib_k8s.is_pod_running(delayed_1, namespace)
386
+ and not self.lib_k8s.is_pod_running(delayed_2, namespace)
387
+ and not self.lib_k8s.is_pod_running(delayed_3, namespace)
388
+ ):
389
+ time.sleep(1)
390
+ continue
391
+ time.sleep(3)
392
+
393
+ monitor_timeout = 10
394
+ pod_delay = 1
395
+ future = select_and_monitor_by_label(
396
+ label_selector=f"test={label}",
397
+ max_timeout=monitor_timeout,
398
+ v1_client=self.lib_k8s.cli,
399
+ )
400
+ self.background_delete_pod(delayed_1, namespace)
401
+ self.background_delete_pod(delayed_2, namespace)
402
+ time.sleep(0.1)
403
+ self.deploy_delayed_readiness_pod(
404
+ delayed_respawn_1, namespace, pod_delay, label
405
+ )
406
+ snapshot = future.result()
407
+ # print(f"\nRunning test ID: {self.id()}")
408
+ # print(json.dumps(snapshot.to_dict(), indent=True))
409
+ pods_status = snapshot.get_pods_status()
410
+ self.background_delete_ns(namespace)
411
+ self.assertEqual(len(pods_status.unrecovered), 1)
412
+ self.assertEqual(len(pods_status.recovered), 1)
413
+
414
+ self.assertTrue(
415
+ delayed_respawn_1 in [p.pod_name for p in pods_status.recovered]
416
+ )
417
+
418
+ def test_pods_becoming_not_ready(self):
419
+ # test pod will never recover
420
+ namespace = "test-ns-5-" + self.get_random_string(10)
421
+ delayed_1 = "delayed-5-" + self.get_random_string(10)
422
+ self.deploy_namespace(namespace, [])
423
+ self.deploy_nginx(namespace, delayed_1)
424
+ while not self.lib_k8s.is_pod_running(delayed_1, namespace):
425
+ time.sleep(1)
426
+ continue
427
+ time.sleep(3)
428
+
429
+ monitor_timeout = 20
430
+
431
+ future = select_and_monitor_by_name_pattern_and_namespace_pattern(
432
+ delayed_1,
433
+ namespace,
434
+ max_timeout=monitor_timeout,
435
+ v1_client=self.lib_k8s.cli,
436
+ )
437
+
438
+ self.lib_k8s.exec_cmd_in_pod(["kill 1"], delayed_1, namespace)
439
+ snapshot = future.result()
440
+
441
+ pods_status = snapshot.get_pods_status()
442
+ self.background_delete_ns(namespace)
443
+ self.assertEqual(len(pods_status.recovered), 1)
444
+ self.assertEqual(pods_status.recovered[0].pod_rescheduling_time, None)
445
+ self.assertGreater(pods_status.recovered[0].pod_readiness_time, 0)
446
+
447
+ def test_monitor_stopping_earlier(self):
448
+
449
+ # tests that the monitor deadlines:
450
+ # - if the monitored pods status changes and is restored
451
+ # before than the 120 seconds deadline the monitor returns earlier
452
+ # the assertions checks that the monitor returns within 10 seconds
453
+ # 120 - (end-start) >= 110
454
+ # - if no change is made in the set of monitor pods the monitor is
455
+ # forced to wait all the time set
456
+
457
+ namespace = "test-ns-6-" + self.get_random_string(10)
458
+ delayed_1 = "delayed-6-" + self.get_random_string(10)
459
+ delayed_respawn_1 = "delayed-6-respawn-" + self.get_random_string(10)
460
+ label = "readiness-" + self.get_random_string(5)
461
+ self.deploy_namespace(namespace, [])
462
+ self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
463
+ pod_delay = 3
464
+ while not self.lib_k8s.is_pod_running(delayed_1, namespace):
465
+ time.sleep(1)
466
+ continue
467
+ time.sleep(3)
468
+
469
+ monitor_timeout = 120
470
+
471
+ start_time = time.time()
472
+
473
+ future = select_and_monitor_by_label(
474
+ label_selector=f"test={label}",
475
+ max_timeout=monitor_timeout,
476
+ v1_client=self.lib_k8s.cli,
477
+ )
478
+ self.deploy_delayed_readiness_pod(
479
+ delayed_respawn_1, namespace, pod_delay, label
480
+ )
481
+ _ = future.result()
482
+ end_time = time.time()
483
+
484
+ self.assertGreater(monitor_timeout - (end_time - start_time), 110)
485
+
486
+ def test_monitor_forced_to_wait_with_no_status_change(self):
487
+ # tests that the monitor deadlines:
488
+ # - if no change is made in the set of monitored pods the monitor is
489
+ # forced to wait all the time set in case something happens
490
+
491
+ namespace = "test-ns-7-" + self.get_random_string(10)
492
+ delayed_1 = "delayed-7" + self.get_random_string(10)
493
+ label = "readiness-" + self.get_random_string(5)
494
+ self.deploy_namespace(namespace, [])
495
+ self.deploy_delayed_readiness_pod(delayed_1, namespace, 0, label)
496
+ while not self.lib_k8s.is_pod_running(delayed_1, namespace):
497
+ time.sleep(1)
498
+ continue
499
+ time.sleep(3)
500
+
501
+ monitor_timeout = 20
502
+
503
+ start_time = time.time()
504
+
505
+ future = select_and_monitor_by_label(
506
+ label_selector=f"test={label}",
507
+ max_timeout=monitor_timeout,
508
+ v1_client=self.lib_k8s.cli,
509
+ )
510
+ _ = future.result()
511
+ end_time = time.time()
512
+
513
+ self.assertGreaterEqual((end_time - start_time), monitor_timeout)