krkn-lib 5.1.10__tar.gz → 5.1.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/PKG-INFO +4 -3
  2. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/pyproject.toml +4 -3
  3. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/k8s/krkn_kubernetes.py +376 -2
  4. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/k8s/pod_monitor/pod_monitor.py +142 -54
  5. krkn_lib-5.1.13/src/krkn_lib/k8s/templates/snapshot.j2 +10 -0
  6. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/models/elastic/models.py +23 -0
  7. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/models/telemetry/models.py +9 -0
  8. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/base_test.py +13 -0
  9. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_krkn_elastic_models.py +21 -0
  10. krkn_lib-5.1.13/src/krkn_lib/tests/test_krkn_kubernetes_virt.py +790 -0
  11. krkn_lib-5.1.13/src/krkn_lib/tests/test_krkn_openshift.py +645 -0
  12. krkn_lib-5.1.13/src/krkn_lib/tests/test_krkn_telemetry_kubernetes.py +850 -0
  13. krkn_lib-5.1.13/src/krkn_lib/tests/test_safe_logger.py +494 -0
  14. krkn_lib-5.1.10/src/krkn_lib/tests/test_krkn_openshift.py +0 -123
  15. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/LICENSE +0 -0
  16. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/README.md +0 -0
  17. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/__init__.py +0 -0
  18. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/aws_tests/__init__.py +0 -0
  19. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/aws_tests/test_krkn_telemetry_kubernetes.py +0 -0
  20. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/aws_tests/test_krkn_telemetry_openshift.py +0 -0
  21. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/elastic/__init__.py +0 -0
  22. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/elastic/krkn_elastic.py +0 -0
  23. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/k8s/__init__.py +0 -0
  24. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/k8s/pod_monitor/__init__.py +0 -0
  25. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/k8s/templates/hog_pod.j2 +0 -0
  26. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/k8s/templates/node_exec_pod.j2 +0 -0
  27. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/k8s/templates/service_hijacking_config_map.j2 +0 -0
  28. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/k8s/templates/service_hijacking_pod.j2 +0 -0
  29. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/k8s/templates/syn_flood_pod.j2 +0 -0
  30. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/models/__init__.py +0 -0
  31. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/models/elastic/__init__.py +0 -0
  32. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/models/k8s/__init__.py +0 -0
  33. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/models/k8s/models.py +0 -0
  34. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/models/krkn/__init__.py +0 -0
  35. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/models/krkn/models.py +0 -0
  36. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/models/pod_monitor/__init__.py +0 -0
  37. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/models/pod_monitor/models.py +0 -0
  38. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/models/telemetry/__init__.py +0 -0
  39. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/ocp/__init__.py +0 -0
  40. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/ocp/krkn_openshift.py +0 -0
  41. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/prometheus/__init__.py +0 -0
  42. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/prometheus/krkn_prometheus.py +0 -0
  43. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/telemetry/__init__.py +0 -0
  44. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/telemetry/k8s/__init__.py +0 -0
  45. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/telemetry/k8s/krkn_telemetry_kubernetes.py +0 -0
  46. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/telemetry/ocp/__init__.py +0 -0
  47. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/telemetry/ocp/krkn_telemetry_openshift.py +0 -0
  48. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/__init__.py +0 -0
  49. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_krkn_elastic.py +0 -0
  50. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_krkn_kubernetes_check.py +0 -0
  51. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_krkn_kubernetes_create.py +0 -0
  52. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_krkn_kubernetes_delete.py +0 -0
  53. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_krkn_kubernetes_exec.py +0 -0
  54. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_krkn_kubernetes_get.py +0 -0
  55. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_krkn_kubernetes_list.py +0 -0
  56. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_krkn_kubernetes_misc.py +0 -0
  57. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_krkn_kubernetes_models.py +0 -0
  58. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_krkn_kubernetes_pods_monitor.py +0 -0
  59. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_krkn_kubernetes_pods_monitor_models.py +0 -0
  60. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_krkn_prometheus.py +0 -0
  61. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_krkn_telemetry_models.py +0 -0
  62. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_utils.py +0 -0
  63. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/tests/test_version.py +0 -0
  64. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/utils/__init__.py +0 -0
  65. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/utils/functions.py +0 -0
  66. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/utils/safe_logger.py +0 -0
  67. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/version/__init__.py +0 -0
  68. {krkn_lib-5.1.10 → krkn_lib-5.1.13}/src/krkn_lib/version/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: krkn-lib
3
- Version: 5.1.10
3
+ Version: 5.1.13
4
4
  Summary: Foundation library for Kraken
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE
@@ -19,8 +19,9 @@ Requires-Dist: base64io (>=1.0.3,<2.0.0)
19
19
  Requires-Dist: coverage (>=7.6.12,<8.0.0)
20
20
  Requires-Dist: cython (==3.0)
21
21
  Requires-Dist: deprecation (==2.1.0)
22
- Requires-Dist: elasticsearch (==8.19.1)
23
- Requires-Dist: elasticsearch-dsl (==8.18.0)
22
+ Requires-Dist: elasticsearch (==7.13.4)
23
+ Requires-Dist: elasticsearch-dsl (==7.4.1)
24
+ Requires-Dist: importlib-metadata (>=8.7.0,<9.0.0)
24
25
  Requires-Dist: kubeconfig (>=1.1.1,<2.0.0)
25
26
  Requires-Dist: kubernetes (==34.1.0)
26
27
  Requires-Dist: numpy (==1.26.4)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "krkn-lib"
3
- version = "5.1.10"
3
+ version = "5.1.13"
4
4
  description = "Foundation library for Kraken"
5
5
  authors = ["Red Hat Chaos Team"]
6
6
  license = "Apache-2.0"
@@ -20,13 +20,14 @@ tzlocal = "5.1"
20
20
  pytz = "^2023.3"
21
21
  PyYAML = "6.0.1"
22
22
  prometheus-api-client = "^0.5.4"
23
- elasticsearch = "8.19.1"
24
- elasticsearch-dsl = "8.18.0"
23
+ elasticsearch = "7.13.4"
24
+ elasticsearch-dsl = "7.4.1"
25
25
  wheel = "^0.42.0"
26
26
  cython = "3.0"
27
27
  numpy= "1.26.4"
28
28
  deprecation="2.1.0"
29
29
  coverage="^7.6.12"
30
+ importlib-metadata = "^8.7.0"
30
31
 
31
32
  [tool.poetry.group.test.dependencies]
32
33
  jinja2 = "^3.1.2"
@@ -167,9 +167,9 @@ class KrknKubernetes:
167
167
  client.Configuration.set_default(self.client_config)
168
168
  self.watch_resource = watch.Watch()
169
169
  # Get the logger for the kubernetes client
170
- kubernetes_logger = logging.getLogger('kubernetes')
170
+ kubernetes_logger = logging.getLogger("kubernetes")
171
171
 
172
- # Set the logging level to a higher level than DEBUG,
172
+ # Set the logging level to a higher level than DEBUG,
173
173
  # such as INFO, WARNING, or ERROR
174
174
  # This will effectively disable DEBUG level messages.
175
175
  kubernetes_logger.setLevel(logging.INFO)
@@ -1388,6 +1388,304 @@ class KrknKubernetes:
1388
1388
  str(e),
1389
1389
  )
1390
1390
 
1391
+ def get_vm(self, name: str, namespace: str) -> Optional[Dict]:
1392
+ """
1393
+ Get a Virtual Machine by name and namespace.
1394
+
1395
+ :param name: Name of the VM to retrieve
1396
+ :param namespace: Namespace of the VM
1397
+ :return: The VM object if found, None otherwise
1398
+ """
1399
+ try:
1400
+ vm = self.custom_object_client.get_namespaced_custom_object(
1401
+ group="kubevirt.io",
1402
+ version="v1",
1403
+ namespace=namespace,
1404
+ plural="virtualmachines",
1405
+ name=name,
1406
+ )
1407
+ return vm
1408
+ except ApiException as e:
1409
+ if e.status == 404:
1410
+ logging.warning(
1411
+ f"VM {name} not found in namespace {namespace}"
1412
+ )
1413
+ return None
1414
+ else:
1415
+ logging.error(f"Error getting VM {name}: {e}")
1416
+ raise
1417
+ except Exception as e:
1418
+ logging.error(f"Unexpected error getting VM {name}: {e}")
1419
+ raise
1420
+
1421
+ def get_vmi(self, name: str, namespace: str) -> Optional[Dict]:
1422
+ """
1423
+ Get a Virtual Machine Instance by name and namespace.
1424
+
1425
+ :param name: Name of the VMI to retrieve
1426
+ :param namespace: Namespace of the VMI
1427
+ :return: The VMI object if found, None otherwise
1428
+ """
1429
+ try:
1430
+ vmi = self.custom_object_client.get_namespaced_custom_object(
1431
+ group="kubevirt.io",
1432
+ version="v1",
1433
+ namespace=namespace,
1434
+ plural="virtualmachineinstances",
1435
+ name=name,
1436
+ )
1437
+ return vmi
1438
+ except ApiException as e:
1439
+ if e.status == 404:
1440
+ logging.warning(
1441
+ f"VMI {name} not found in namespace {namespace}"
1442
+ )
1443
+ return None
1444
+ else:
1445
+ logging.error(f"Error getting VMI {name}: {e}")
1446
+ raise
1447
+ except Exception as e:
1448
+ logging.error(f"Unexpected error getting VMI {name}: {e}")
1449
+ raise
1450
+
1451
+ def get_vmis(self, regex_name: str, namespace: str) -> Optional[Dict]:
1452
+ """
1453
+ Get a Virtual Machine Instance by name and namespace.
1454
+
1455
+ :param name: Name of the VMI to retrieve
1456
+ :param namespace: Namespace of the VMI
1457
+ :return: The VMI object if found, None otherwise
1458
+ """
1459
+ try:
1460
+ vmis_list = []
1461
+ namespaces = self.list_namespaces_by_regex(namespace)
1462
+ for namespace in namespaces:
1463
+ vmis = self.custom_object_client.list_namespaced_custom_object(
1464
+ group="kubevirt.io",
1465
+ version="v1",
1466
+ namespace=namespace,
1467
+ plural="virtualmachineinstances",
1468
+ )
1469
+
1470
+ for vmi in vmis.get("items"):
1471
+ vmi_name = vmi.get("metadata", {}).get("name")
1472
+ match = re.match(regex_name, vmi_name)
1473
+ if match:
1474
+ vmis_list.append(vmi)
1475
+ except ApiException as e:
1476
+ if e.status == 404:
1477
+ logging.warning(
1478
+ f"VMI {regex_name} not found in namespace {namespace}"
1479
+ )
1480
+ return []
1481
+ else:
1482
+ logging.error(f"Error getting VMI {regex_name}: {e}")
1483
+ raise
1484
+ except Exception as e:
1485
+ logging.error(f"Unexpected error getting VMI {regex_name}: {e}")
1486
+ raise
1487
+ return vmis_list
1488
+
1489
+ def create_vmi(self, name: str, namespace: str, vm_name: str, vmi_body: dict) -> Optional[Dict]:
1490
+ """
1491
+ Create a Virtual Machine Instance by name and namespace.
1492
+
1493
+ :param name: Name of the VMI to create
1494
+ :param namespace: Namespace of the VMI
1495
+ :param vm_name: Name of the Virtual Machine to create the VMI from
1496
+ :return: The VMI object if created, None otherwise
1497
+ """
1498
+ try:
1499
+ vmi = self.custom_object_client.create_namespaced_custom_object(
1500
+ group="kubevirt.io",
1501
+ version="v1",
1502
+ namespace=namespace,
1503
+ plural="virtualmachineinstances",
1504
+ name=name,
1505
+ body=vmi_body,
1506
+ )
1507
+ return vmi
1508
+ except ApiException as e:
1509
+ if e.status == 404:
1510
+ logging.warning(f"VMI {name} not found in namespace {namespace}")
1511
+ return None
1512
+ else:
1513
+ logging.error(f"Error creating VMI {name}: {e}")
1514
+ raise
1515
+ except Exception as e:
1516
+ logging.error(f"Unexpected error creating VMI {name}: {e}")
1517
+ raise
1518
+
1519
+ def patch_vm(self, name: str, namespace: str, vm_body: dict) -> Optional[Dict]:
1520
+ """
1521
+ Patch a Virtual Machine by name and namespace.
1522
+
1523
+ :param name: Name of the VM to patch
1524
+ :param namespace: Namespace of the VM
1525
+ :param vm_body: Body of the VM to patch
1526
+ :return: The VM object if patched, None otherwise
1527
+ """
1528
+ try:
1529
+ vmi = self.custom_object_client.patch_namespaced_custom_object(
1530
+ group="kubevirt.io",
1531
+ version="v1",
1532
+ namespace=namespace,
1533
+ plural="virtualmachines",
1534
+ name=name,
1535
+ body=vm_body,
1536
+ )
1537
+ return vmi
1538
+ except ApiException as e:
1539
+ if e.status == 404:
1540
+ logging.warning(f"VM {name} not found in namespace {namespace}")
1541
+ return None
1542
+ else:
1543
+ logging.error(f"Error patching VM {name}: {e}")
1544
+ raise
1545
+ except Exception as e:
1546
+ logging.error(f"Unexpected error patching VM {name}: {e}")
1547
+ raise
1548
+
1549
+ def patch_vmi(self, name: str, namespace: str, vmi_body: dict) -> Optional[Dict]:
1550
+ """
1551
+ Patch a Virtual Machine Instance by name and namespace.
1552
+
1553
+ :param name: Name of the VMI to patch
1554
+ :param namespace: Namespace of the VMI
1555
+ :param vmi_body: Body of the VMI to patch
1556
+ :return: The VMI object if patched, None otherwise
1557
+ """
1558
+ try:
1559
+ vmi = self.custom_object_client.patch_namespaced_custom_object(
1560
+ group="kubevirt.io",
1561
+ version="v1",
1562
+ namespace=namespace,
1563
+ plural="virtualmachineinstances",
1564
+ name=name,
1565
+ body=vmi_body,
1566
+ )
1567
+ return vmi
1568
+ except ApiException as e:
1569
+ if e.status == 404:
1570
+ logging.warning(f"VMI {name} not found in namespace {namespace}")
1571
+ return None
1572
+ else:
1573
+ logging.error(f"Error patching VMI {name}: {e}")
1574
+ raise
1575
+ except Exception as e:
1576
+ logging.error(f"Unexpected error patching VMI {name}: {e}")
1577
+ raise
1578
+
1579
+ def get_vms(self, regex_name: str, namespace: str) -> Optional[Dict]:
1580
+ """
1581
+ Get a Virtual Machine by name and namespace.
1582
+
1583
+ :param name: Name of the VM to retrieve
1584
+ :param namespace: Namespace of the VM
1585
+ :return: The VM object if found, None otherwise
1586
+ """
1587
+ try:
1588
+ vms_list = []
1589
+ namespaces = self.list_namespaces_by_regex(namespace)
1590
+ for namespace in namespaces:
1591
+ vms = self.custom_object_client.list_namespaced_custom_object(
1592
+ group="kubevirt.io",
1593
+ version="v1",
1594
+ namespace=namespace,
1595
+ plural="virtualmachines",
1596
+ )
1597
+
1598
+ for vm in vms.get("items"):
1599
+ vm_name = vm.get("metadata", {}).get("name")
1600
+ match = re.match(regex_name, vm_name)
1601
+ if match:
1602
+ vms_list.append(vm)
1603
+ return vms_list
1604
+ except ApiException as e:
1605
+ if e.status == 404:
1606
+ logging.warning(
1607
+ f"VM {regex_name} not found in namespace {namespace}"
1608
+ )
1609
+ return []
1610
+ else:
1611
+ logging.error(f"Error getting VM {regex_name}: {e}")
1612
+ raise
1613
+ except Exception as e:
1614
+ logging.error(f"Unexpected error getting VM {regex_name}: {e}")
1615
+ raise
1616
+
1617
+ def get_snapshot(self, name: str, namespace: str) -> Optional[Dict]:
1618
+ """
1619
+ Get a Snapshot by name and namespace.
1620
+
1621
+ :param name: Name of the Snapshot to retrieve
1622
+ :param namespace: Namespace of the Snapshot
1623
+ :return: The Snapshot object if found, None otherwise
1624
+ """
1625
+ try:
1626
+ vmi = self.custom_object_client.get_namespaced_custom_object(
1627
+ group="kubevirt.io",
1628
+ version="v1",
1629
+ namespace=namespace,
1630
+ plural="VirtualMachineSnapshot",
1631
+ name=name,
1632
+ )
1633
+ return vmi
1634
+ except ApiException as e:
1635
+ if e.status == 404:
1636
+ logging.warning(
1637
+ f"VMI {name} not found in namespace {namespace}"
1638
+ )
1639
+ return None
1640
+ else:
1641
+ logging.error(f"Error getting VMI {name}: {e}")
1642
+ raise
1643
+ except Exception as e:
1644
+ logging.error(f"Unexpected error getting VMI {name}: {e}")
1645
+ raise
1646
+
1647
+ def create_snapshot(
1648
+ self, name: str, namespace: str, vm_name: str
1649
+ ) -> Optional[Dict]:
1650
+ """
1651
+ Create a Snapshot by name and namespace.
1652
+
1653
+ :param name: Name of the Snapshot to create
1654
+ :param namespace: Namespace of the Snapshot
1655
+ :param vm_name: Name of the Virtual Machine to create the Snapshot from
1656
+ :return: The Snapshot object if created, None otherwise
1657
+ """
1658
+ try:
1659
+ file_loader = PackageLoader("krkn_lib.k8s", "templates")
1660
+ env = Environment(loader=file_loader, autoescape=True)
1661
+ snapshot_template = env.get_template("snapshot.j2")
1662
+ ss_body = yaml.safe_load(
1663
+ snapshot_template.render(
1664
+ name=name, namespace=namespace, vm_name=vm_name
1665
+ )
1666
+ )
1667
+ vmi = self.custom_object_client.create_namespaced_custom_object(
1668
+ group="kubevirt.io",
1669
+ version="v1",
1670
+ namespace=namespace,
1671
+ plural="VirtualMachineSnapshot",
1672
+ name=name,
1673
+ body=ss_body,
1674
+ )
1675
+ return vmi
1676
+ except ApiException as e:
1677
+ if e.status == 404:
1678
+ logging.warning(
1679
+ f"Snapshot {name} not found in namespace {namespace}"
1680
+ )
1681
+ return None
1682
+ else:
1683
+ logging.error(f"Error creating Snapshot {name}: {e}")
1684
+ raise
1685
+ except Exception as e:
1686
+ logging.error(f"Unexpected error creating Snapshot {name}: {e}")
1687
+ raise
1688
+
1391
1689
  def get_job_status(
1392
1690
  self, name: str, namespace: str = "default"
1393
1691
  ) -> client.V1Job:
@@ -1410,6 +1708,82 @@ class KrknKubernetes:
1410
1708
  )
1411
1709
  raise
1412
1710
 
1711
+ def delete_vm(self, name: str, namespace: str) -> Optional[Dict]:
1712
+ """
1713
+ Delete a Virtual Machine by name and namespace.
1714
+
1715
+ :param name: Name of the VM to delete
1716
+ :param namespace: Namespace of the VM
1717
+ :return: The VM object if found, None otherwise
1718
+ """
1719
+ try:
1720
+ return self.custom_object_client.delete_namespaced_custom_object(
1721
+ group="kubevirt.io",
1722
+ version="v1",
1723
+ namespace=namespace,
1724
+ plural="virtualmachines",
1725
+ name=name,
1726
+ )
1727
+ except ApiException as e:
1728
+ if e.status == 404:
1729
+ logging.warning(
1730
+ f"VM {name} not found in namespace {namespace}"
1731
+ )
1732
+ return None
1733
+ else:
1734
+ logging.error(f"Error deleting VM {name}: {e}")
1735
+ raise
1736
+ except Exception as e:
1737
+ logging.error(f"Error deleting VM {name}: {e}")
1738
+ raise
1739
+
1740
+ def delete_vmi(self, vm_name: str, namespace: str):
1741
+ """
1742
+ Delete a Virtual Machine Instance to simulate a VM outage.
1743
+
1744
+ :param vm_name: Name of the VMI to delete
1745
+ :param namespace: Namespace of the VMI
1746
+ :return: 0 for success, 1 for failure
1747
+ """
1748
+ logging.info(
1749
+ f"Injecting chaos: Deleting VMI {vm_name} in namespace {namespace}"
1750
+ )
1751
+ try:
1752
+ self.custom_object_client.delete_namespaced_custom_object(
1753
+ group="kubevirt.io",
1754
+ version="v1",
1755
+ namespace=namespace,
1756
+ plural="virtualmachineinstances",
1757
+ name=vm_name,
1758
+ )
1759
+ except ApiException as e:
1760
+ if e.status == 404:
1761
+ logging.warning(f"VMI {vm_name} not found during deletion")
1762
+ return 1
1763
+ else:
1764
+ logging.error(f"API error during VMI deletion: {e}")
1765
+ return 1
1766
+
1767
+ def delete_snapshot(self, snapshot_name: str, namespace: str):
1768
+ """Helper method to delete any snapshot created by the scenario."""
1769
+ self.logger.info(f"Deleting snapshot '{self.snapshot_name}'...")
1770
+ try:
1771
+ self.custom_object_client.delete_namespaced_custom_object(
1772
+ group="kubevirt.io",
1773
+ version="v1",
1774
+ namespace=namespace,
1775
+ plural="VirtualMachineSnapshot",
1776
+ name=snapshot_name,
1777
+ )
1778
+ self.logger.info(
1779
+ f"Snapshot '{self.snapshot_name}' deleted successfully."
1780
+ )
1781
+ except Exception as e:
1782
+ self.logger.warning(
1783
+ "Failed to delete snapshot, "
1784
+ f"might have been already deleted: {e}"
1785
+ )
1786
+
1413
1787
  def monitor_nodes(
1414
1788
  self,
1415
1789
  ) -> (bool, list[str]):
@@ -1,10 +1,14 @@
1
+ import logging
1
2
  import re
3
+ import time
4
+ import traceback
2
5
  from concurrent.futures import Future
3
6
  from concurrent.futures.thread import ThreadPoolExecutor
4
7
  from functools import partial
5
8
 
6
9
  from kubernetes import watch
7
10
  from kubernetes.client import V1Pod, CoreV1Api
11
+ from urllib3.exceptions import ProtocolError
8
12
 
9
13
  from krkn_lib.models.pod_monitor.models import (
10
14
  PodsSnapshot,
@@ -47,67 +51,151 @@ def _monitor_pods(
47
51
  max_timeout: int,
48
52
  name_pattern: str = None,
49
53
  namespace_pattern: str = None,
54
+ max_retries: int = 3,
50
55
  ) -> PodsSnapshot:
51
- w = watch.Watch(return_type=V1Pod)
56
+ """
57
+ Monitor pods with automatic retry on watch stream disconnection.
58
+
59
+ :param monitor_partial: Partial function for monitoring pods
60
+ :param snapshot: Snapshot to populate with pod events
61
+ :param max_timeout: Maximum time to monitor (seconds)
62
+ :param name_pattern: Regex pattern for pod names
63
+ :param namespace_pattern: Regex pattern for namespaces
64
+ :param max_retries: Maximum number of retries on connection error
65
+ (default: 3)
66
+ :return: PodsSnapshot with collected pod events
67
+ """
68
+
69
+ start_time = time.time()
70
+ retry_count = 0
52
71
  deleted_parent_pods = []
53
72
  restored_pods = []
54
73
  cluster_restored = False
55
- for event in w.stream(monitor_partial, timeout_seconds=max_timeout):
56
- match_name = True
57
- match_namespace = True
58
- event_type = event["type"]
59
- pod = event["object"]
60
74
 
61
- if namespace_pattern:
62
- match = re.match(namespace_pattern, pod.metadata.namespace)
63
- match_namespace = match is not None
64
- if name_pattern:
65
- match = re.match(name_pattern, pod.metadata.name)
66
- match_name = match is not None
75
+ while retry_count <= max_retries:
76
+ try:
77
+ # Calculate remaining timeout if retrying
78
+ if retry_count > 0:
79
+ elapsed = time.time() - start_time
80
+ remain_timeout = max(1, int(max_timeout - elapsed))
81
+ logging.info("remain timeout " + str(remain_timeout))
82
+ if remain_timeout <= 0:
83
+ logging.info("Maximum timeout reached, stopping monitoring")
84
+ break
85
+ logging.info(
86
+ "Reconnecting watch stream"
87
+ f"(attempt {retry_count}/{max_retries}),"
88
+ f"remaining timeout: {remain_timeout}s"
89
+ )
90
+ else:
91
+ remain_timeout = max_timeout
67
92
 
68
- if match_name and match_namespace:
69
- pod_event = PodEvent()
70
- if event_type == "MODIFIED":
71
- if pod.metadata.deletion_timestamp is not None:
72
- pod_event.status = PodStatus.DELETION_SCHEDULED
73
- deleted_parent_pods.append(pod.metadata.name)
74
- elif _is_pod_ready(pod):
75
- pod_event.status = PodStatus.READY
76
- # if there are at least the same number of ready
77
- # pods as the snapshot.initial_pods set we assume that
78
- # the cluster is restored to the initial condition
79
- restored_pods.append(pod.metadata.name)
80
- if len(restored_pods) >= len(snapshot.initial_pods):
81
- cluster_restored = True
82
- else:
83
- pod_event.status = PodStatus.NOT_READY
84
-
85
- elif event_type == "DELETED":
86
- pod_event.status = PodStatus.DELETED
87
- elif event_type == "ADDED":
88
- pod_event.status = PodStatus.ADDED
89
-
90
- if pod_event.status == PodStatus.ADDED:
91
- snapshot.added_pods.append(pod.metadata.name)
92
- # in case a pod is respawn with the same name
93
- # the dictionary must not be reinitialized
94
- if pod.metadata.name not in snapshot.pods:
95
- snapshot.pods[pod.metadata.name] = MonitoredPod()
96
- snapshot.pods[pod.metadata.name].name = pod.metadata.name
97
- snapshot.pods[pod.metadata.name].namespace = (
98
- pod.metadata.namespace
99
- )
100
- # skips events out of the snapshot
101
- if pod.metadata.name in snapshot.pods:
102
- snapshot.pods[pod.metadata.name].status_changes.append(
103
- pod_event
93
+ w = watch.Watch(return_type=V1Pod)
94
+
95
+ for e in w.stream(monitor_partial, timeout_seconds=remain_timeout):
96
+ match_name = True
97
+ match_namespace = True
98
+ event_type = e["type"]
99
+ pod = e["object"]
100
+
101
+ if namespace_pattern:
102
+ match = re.match(namespace_pattern, pod.metadata.namespace)
103
+ match_namespace = match is not None
104
+ if name_pattern:
105
+ match = re.match(name_pattern, pod.metadata.name)
106
+ match_name = match is not None
107
+
108
+ if match_name and match_namespace:
109
+ pod_event = PodEvent()
110
+ pod_name = pod.metadata.name
111
+ if event_type == "MODIFIED":
112
+ if pod.metadata.deletion_timestamp is not None:
113
+ pod_event.status = PodStatus.DELETION_SCHEDULED
114
+ if pod_name not in deleted_parent_pods:
115
+ deleted_parent_pods.append(pod_name)
116
+ elif _is_pod_ready(pod):
117
+ pod_event.status = PodStatus.READY
118
+ # if there are at least the same number of ready
119
+ # pods as the snapshot.initial_pods set we assume
120
+ # the cluster is restored to the initial condition
121
+ if pod_name not in restored_pods:
122
+ restored_pods.append(pod_name)
123
+ inital_pod_len = len(snapshot.initial_pods)
124
+ if len(restored_pods) >= inital_pod_len:
125
+ cluster_restored = True
126
+ else:
127
+ pod_event.status = PodStatus.NOT_READY
128
+
129
+ elif event_type == "DELETED":
130
+ pod_event.status = PodStatus.DELETED
131
+ elif event_type == "ADDED":
132
+ pod_event.status = PodStatus.ADDED
133
+
134
+ if pod_event.status == PodStatus.ADDED:
135
+
136
+ if pod_name not in snapshot.added_pods:
137
+ snapshot.added_pods.append(pod_name)
138
+ # in case a pod is respawn with the same name
139
+ # the dictionary must not be reinitialized
140
+ if pod_name not in snapshot.pods:
141
+ snapshot.pods[pod_name] = MonitoredPod()
142
+ snapshot.pods[pod_name].name = pod_name
143
+ snapshot.pods[pod_name].namespace = (
144
+ pod.metadata.namespace
145
+ )
146
+ # skips events out of the snapshot
147
+ if pod_name in snapshot.pods:
148
+ snapshot.pods[pod_name].status_changes.append(
149
+ pod_event
150
+ )
151
+ # this flag is set when all the pods
152
+ # that has been deleted or not ready
153
+ # have been restored, if True the
154
+ # monitoring is stopped earlier
155
+ if cluster_restored:
156
+ logging.info("Cluster restored, stopping monitoring")
157
+ w.stop()
158
+ return snapshot
159
+
160
+ # If we exit the loop normally (timeout reached), we're done
161
+ logging.info("Watch stream completed normally")
162
+ break
163
+
164
+ except ProtocolError as e:
165
+
166
+ if retry_count > max_retries:
167
+ logging.warning(
168
+ f"Watch stream connection broken after {max_retries}"
169
+ f"retries. ProtocolError: {e}. Returning snapshot "
170
+ "with data collected so far."
171
+ )
172
+ break
173
+
174
+ # Log retry attempt
175
+ logging.info(
176
+ f"Watch stream connection broken (ProtocolError): {e}. "
177
+ f"Retry {retry_count}/{max_retries} in progress..."
178
+ )
179
+ backoff_time = 1
180
+
181
+ # Check if we have time for backoff
182
+ elapsed = time.time() - start_time
183
+ if elapsed + backoff_time >= max_timeout:
184
+ logging.info(
185
+ "Not enough time remaining for backoff, "
186
+ "returning snapshot with data collected."
104
187
  )
105
- # this flag is set when all the pods
106
- # that has been deleted or not ready
107
- # have been restored, if True the
108
- # monitoring is stopeed earlier
109
- if cluster_restored:
110
- w.stop()
188
+ break
189
+
190
+ logging.debug(f"Waiting {backoff_time}s before retry...")
191
+ time.sleep(backoff_time)
192
+
193
+ except Exception as e:
194
+ logging.error("Error in monitor pods: " + str(e))
195
+ logging.error("Stack trace:\n%s", traceback.format_exc())
196
+ raise Exception(e)
197
+
198
+ retry_count += 1
111
199
 
112
200
  return snapshot
113
201
 
@@ -0,0 +1,10 @@
1
+ apiVersion: snapshot.kubevirt.io/v1alpha1
2
+ kind: VirtualMachineSnapshot
3
+ metadata:
4
+ name: {{name}}
5
+ namespace: {{namespace}}
6
+ spec:
7
+ source:
8
+ apiGroup: kubevirt.io
9
+ kind: VirtualMachine
10
+ name: {{vm_name}}