krkn-lib 5.0.2__py3-none-any.whl → 5.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. krkn_lib/aws_tests/__init__.py +1 -0
  2. krkn_lib/{tests → aws_tests}/test_krkn_telemetry_kubernetes.py +57 -51
  3. krkn_lib/k8s/krkn_kubernetes.py +219 -106
  4. krkn_lib/k8s/pods_monitor_pool.py +14 -3
  5. krkn_lib/k8s/templates/node_exec_pod.j2 +6 -1
  6. krkn_lib/models/elastic/models.py +37 -4
  7. krkn_lib/models/k8s/models.py +4 -0
  8. krkn_lib/models/telemetry/models.py +64 -0
  9. krkn_lib/prometheus/krkn_prometheus.py +1 -1
  10. krkn_lib/telemetry/k8s/krkn_telemetry_kubernetes.py +1 -2
  11. krkn_lib/tests/base_test.py +40 -0
  12. krkn_lib/tests/test_krkn_elastic_models.py +33 -1
  13. krkn_lib/tests/test_krkn_kubernetes_check.py +2 -3
  14. krkn_lib/tests/test_krkn_kubernetes_create.py +3 -5
  15. krkn_lib/tests/test_krkn_kubernetes_delete.py +2 -3
  16. krkn_lib/tests/test_krkn_kubernetes_exec.py +1 -1
  17. krkn_lib/tests/test_krkn_kubernetes_get.py +104 -5
  18. krkn_lib/tests/test_krkn_kubernetes_list.py +13 -0
  19. krkn_lib/tests/test_krkn_kubernetes_misc.py +2 -2
  20. krkn_lib/tests/test_krkn_kubernetes_models.py +1 -1
  21. krkn_lib/tests/test_krkn_kubernetes_monitor.py +168 -137
  22. krkn_lib/tests/test_krkn_kubernetes_pods_monitor_pool.py +7 -3
  23. krkn_lib/tests/test_krkn_prometheus.py +133 -128
  24. krkn_lib/tests/test_utils.py +21 -0
  25. krkn_lib/tests/test_version.py +1 -1
  26. krkn_lib/utils/functions.py +24 -0
  27. {krkn_lib-5.0.2.dist-info → krkn_lib-5.1.1.dist-info}/METADATA +1 -1
  28. krkn_lib-5.1.1.dist-info/RECORD +59 -0
  29. krkn_lib-5.0.2.dist-info/RECORD +0 -58
  30. /krkn_lib/{tests → aws_tests}/test_krkn_telemetry_openshift.py +0 -0
  31. {krkn_lib-5.0.2.dist-info → krkn_lib-5.1.1.dist-info}/LICENSE +0 -0
  32. {krkn_lib-5.0.2.dist-info → krkn_lib-5.1.1.dist-info}/WHEEL +0 -0
@@ -529,7 +529,10 @@ class KrknKubernetes:
529
529
  return managedclusters
530
530
 
531
531
  def list_pods(
532
- self, namespace: str, label_selector: str = None
532
+ self,
533
+ namespace: str,
534
+ label_selector: str = None,
535
+ field_selector: str = None,
533
536
  ) -> list[str]:
534
537
  """
535
538
  List pods in the given namespace
@@ -537,11 +540,15 @@ class KrknKubernetes:
537
540
  :param namespace: namespace to search for pods
538
541
  :param label_selector: filter by label selector
539
542
  (optional default `None`)
543
+ :param field_selector: filter results by config details
544
+ select only running pods by setting "status.phase=Running"
540
545
  :return: a list of pod names
541
546
  """
542
547
  pods = []
543
548
  try:
544
- ret = self.get_all_pod_info(namespace, label_selector)
549
+ ret = self.get_all_pod_info(
550
+ namespace, label_selector, field_selector
551
+ )
545
552
  except ApiException as e:
546
553
  logging.error(
547
554
  "Exception when calling list_pods: %s\n",
@@ -734,12 +741,16 @@ class KrknKubernetes:
734
741
  logging.error("Failed to get deployment data %s", str(e))
735
742
  raise e
736
743
 
737
- def get_all_pods(self, label_selector: str = None) -> list[[str, str]]:
744
+ def get_all_pods(
745
+ self, label_selector: str = None, field_selector: str = None
746
+ ) -> list[[str, str]]:
738
747
  """
739
748
  Return a list of tuples containing pod name [0] and namespace [1]
740
749
 
741
750
  :param label_selector: filter by label_selector
742
751
  (optional default `None`)
752
+ :param field_selector: filter results by config details
753
+ select only running pods by setting "status.phase=Running"
743
754
  :return: list of tuples pod,namespace
744
755
  """
745
756
  pods = []
@@ -749,12 +760,14 @@ class KrknKubernetes:
749
760
  pretty=True,
750
761
  label_selector=label_selector,
751
762
  limit=self.request_chunk_size,
763
+ field_selector=field_selector,
752
764
  )
753
765
  else:
754
766
  ret = self.list_continue_helper(
755
767
  self.cli.list_pod_for_all_namespaces,
756
768
  pretty=True,
757
769
  limit=self.request_chunk_size,
770
+ field_selector=field_selector,
758
771
  )
759
772
  for ret_list in ret:
760
773
  for pod in ret_list.items:
@@ -854,11 +867,15 @@ class KrknKubernetes:
854
867
  self,
855
868
  namespace: str = "default",
856
869
  label_selector: str = None,
870
+ field_selector: str = None,
857
871
  ) -> list[str]:
858
872
  """
859
873
  Get details of all pods in a namespace
860
874
 
861
875
  :param namespace: namespace (optional default `default`)
876
+ :param field_selector: filter results by config details
877
+ select only running pods by setting "status.phase=Running"
878
+
862
879
  :return list of pod details
863
880
  """
864
881
  try:
@@ -869,12 +886,14 @@ class KrknKubernetes:
869
886
  pretty=True,
870
887
  label_selector=label_selector,
871
888
  limit=self.request_chunk_size,
889
+ field_selector=field_selector,
872
890
  )
873
891
  else:
874
892
  ret = self.list_continue_helper(
875
893
  self.cli.list_namespaced_pod,
876
894
  namespace,
877
895
  limit=self.request_chunk_size,
896
+ field_selector=field_selector,
878
897
  )
879
898
  except ApiException as e:
880
899
  logging.error(
@@ -1493,7 +1512,9 @@ class KrknKubernetes:
1493
1512
  except Exception as e:
1494
1513
  logging.error("Error trying to apply_yaml" + str(e))
1495
1514
 
1496
- def get_pod_info(self, name: str, namespace: str = "default") -> Pod:
1515
+ def get_pod_info(
1516
+ self, name: str, namespace: str = "default", delete_expected: bool = False
1517
+ ) -> Optional[Pod]:
1497
1518
  """
1498
1519
  Retrieve information about a specific pod
1499
1520
 
@@ -1503,63 +1524,75 @@ class KrknKubernetes:
1503
1524
  kubectl command in the given format if the pod exists.
1504
1525
  Returns None if the pod doesn't exist
1505
1526
  """
1506
-
1507
- pod_exists = self.check_if_pod_exists(name=name, namespace=namespace)
1508
- if pod_exists:
1527
+ try:
1528
+ pod_info = None
1509
1529
  response = self.cli.read_namespaced_pod(
1510
1530
  name=name, namespace=namespace, pretty="true"
1511
1531
  )
1512
- container_list = []
1513
-
1514
- # Create a list of containers present in the pod
1515
- for container in response.spec.containers:
1516
- volume_mount_list = []
1517
- for volume_mount in container.volume_mounts:
1518
- volume_mount_list.append(
1519
- VolumeMount(
1520
- name=volume_mount.name,
1521
- mountPath=volume_mount.mount_path,
1532
+ if response:
1533
+ container_list = []
1534
+
1535
+ # Create a list of containers present in the pod
1536
+ for container in response.spec.containers:
1537
+ volume_mount_list = []
1538
+ for volume_mount in container.volume_mounts:
1539
+ volume_mount_list.append(
1540
+ VolumeMount(
1541
+ name=volume_mount.name,
1542
+ mountPath=volume_mount.mount_path,
1543
+ )
1544
+ )
1545
+ container_list.append(
1546
+ Container(
1547
+ name=container.name,
1548
+ image=container.image,
1549
+ volumeMounts=volume_mount_list,
1522
1550
  )
1523
1551
  )
1524
- container_list.append(
1525
- Container(
1526
- name=container.name,
1527
- image=container.image,
1528
- volumeMounts=volume_mount_list,
1552
+
1553
+ for i, container in enumerate(
1554
+ response.status.container_statuses
1555
+ ):
1556
+ container_list[i].ready = container.ready
1557
+ container_list[i].containerId = (
1558
+ response.status.container_statuses[i].container_id
1559
+ )
1560
+
1561
+ # Create a list of volumes associated with the pod
1562
+ volume_list = []
1563
+ for volume in response.spec.volumes:
1564
+ volume_name = volume.name
1565
+ pvc_name = (
1566
+ volume.persistent_volume_claim.claim_name
1567
+ if volume.persistent_volume_claim is not None
1568
+ else None
1569
+ )
1570
+ volume_list.append(
1571
+ Volume(name=volume_name, pvcName=pvc_name)
1529
1572
  )
1530
- )
1531
1573
 
1532
- for i, container in enumerate(response.status.container_statuses):
1533
- container_list[i].ready = container.ready
1534
-
1535
- # Create a list of volumes associated with the pod
1536
- volume_list = []
1537
- for volume in response.spec.volumes:
1538
- volume_name = volume.name
1539
- pvc_name = (
1540
- volume.persistent_volume_claim.claim_name
1541
- if volume.persistent_volume_claim is not None
1542
- else None
1574
+ # Create the Pod data class object
1575
+ pod_info = Pod(
1576
+ name=response.metadata.name,
1577
+ podIP=response.status.pod_ip,
1578
+ namespace=response.metadata.namespace,
1579
+ containers=container_list,
1580
+ nodeName=response.spec.node_name,
1581
+ volumes=volume_list,
1582
+ status=response.status.phase,
1583
+ creation_timestamp=response.metadata.creation_timestamp,
1584
+ )
1585
+ except Exception:
1586
+ if not delete_expected:
1587
+ logging.error(
1588
+ "Pod '%s' doesn't exist in namespace '%s'", name, namespace
1589
+ )
1590
+ else:
1591
+ logging.info(
1592
+ "Pod '%s' doesn't exist in namespace '%s'", name, namespace
1543
1593
  )
1544
- volume_list.append(Volume(name=volume_name, pvcName=pvc_name))
1545
-
1546
- # Create the Pod data class object
1547
- pod_info = Pod(
1548
- name=response.metadata.name,
1549
- podIP=response.status.pod_ip,
1550
- namespace=response.metadata.namespace,
1551
- containers=container_list,
1552
- nodeName=response.spec.node_name,
1553
- volumes=volume_list,
1554
- status=response.status.phase,
1555
- creation_timestamp=response.metadata.creation_timestamp,
1556
- )
1557
- return pod_info
1558
- else:
1559
- logging.error(
1560
- "Pod '%s' doesn't exist in namespace '%s'", name, namespace
1561
- )
1562
1594
  return None
1595
+ return pod_info
1563
1596
 
1564
1597
  def check_if_namespace_exists(self, name: str) -> bool:
1565
1598
  """
@@ -2356,9 +2389,7 @@ class KrknKubernetes:
2356
2389
  # due to pipes and scripts the command is executed in
2357
2390
 
2358
2391
  if not os.path.isdir(download_path):
2359
- raise Exception(
2360
- f"download path {download_path} does not exist"
2361
- )
2392
+ os.mkdir(download_path)
2362
2393
  if not self.path_exists_in_pod(
2363
2394
  pod_name, container_name, namespace, remote_archive_path
2364
2395
  ):
@@ -2612,24 +2643,21 @@ class KrknKubernetes:
2612
2643
  )
2613
2644
  return None
2614
2645
 
2615
- def select_pods_by_label(self, label_selector: str) -> list[(str, str)]:
2646
+ def select_pods_by_label(
2647
+ self, label_selector: str, field_selector: str = None
2648
+ ) -> list[(str, str)]:
2616
2649
  """
2617
2650
  Selects the pods identified by a label_selector
2618
2651
 
2619
2652
  :param label_selector: a label selector string
2620
2653
  in the format "key=value"
2621
- :param max_timeout: the maximum time in seconds
2622
- to wait before considering the pod "not recovered" after the Chaos
2654
+ :param field_selector: filter results by config details
2655
+ select only running pods by setting "status.phase=Running"
2623
2656
  :return: a list of pod_name and namespace tuples
2624
2657
  """
2625
- pods_and_namespaces = self.get_all_pods(label_selector)
2658
+ pods_and_namespaces = self.get_all_pods(label_selector, field_selector)
2626
2659
  pods_and_namespaces = [(pod[0], pod[1]) for pod in pods_and_namespaces]
2627
- # select only running pods
2628
- pods_and_namespaces = [
2629
- pod
2630
- for pod in pods_and_namespaces
2631
- if not self.is_pod_terminating(pod[0], pod[1])
2632
- ]
2660
+
2633
2661
  return pods_and_namespaces
2634
2662
 
2635
2663
  def select_service_by_label(
@@ -2661,7 +2689,10 @@ class KrknKubernetes:
2661
2689
  return selected_services
2662
2690
 
2663
2691
  def select_pods_by_name_pattern_and_namespace_pattern(
2664
- self, pod_name_pattern: str, namespace_pattern: str
2692
+ self,
2693
+ pod_name_pattern: str,
2694
+ namespace_pattern: str,
2695
+ field_selector: str = None,
2665
2696
  ) -> list[(str, str)]:
2666
2697
  """
2667
2698
  Selects the pods identified by a namespace_pattern
@@ -2671,6 +2702,8 @@ class KrknKubernetes:
2671
2702
  :param namespace_pattern: a namespace pattern to match
2672
2703
  :param max_timeout: the maximum time in seconds to wait
2673
2704
  before considering the pod "not recovered" after the Chaos
2705
+ :param field_selector: filter results by config details
2706
+ select only running pods by setting "status.phase=Running"
2674
2707
  :return: a list of pod_name and namespace tuples
2675
2708
  """
2676
2709
  namespace_re = re.compile(namespace_pattern)
@@ -2679,20 +2712,18 @@ class KrknKubernetes:
2679
2712
  pods_and_namespaces = []
2680
2713
  for namespace in namespaces:
2681
2714
  if namespace_re.match(namespace):
2682
- pods = self.list_pods(namespace)
2715
+ pods = self.list_pods(namespace, field_selector=field_selector)
2683
2716
  for pod in pods:
2684
2717
  if podname_re.match(pod):
2685
2718
  pods_and_namespaces.append((pod, namespace))
2686
- # select only running pods
2687
- pods_and_namespaces = [
2688
- (pod[0], pod[1])
2689
- for pod in pods_and_namespaces
2690
- if not self.is_pod_terminating(pod[0], pod[1])
2691
- ]
2719
+
2692
2720
  return pods_and_namespaces
2693
2721
 
2694
2722
  def select_pods_by_namespace_pattern_and_label(
2695
- self, namespace_pattern: str, label_selector: str
2723
+ self,
2724
+ namespace_pattern: str,
2725
+ label_selector: str,
2726
+ field_selector: str = None,
2696
2727
  ) -> list[(str, str)]:
2697
2728
  """
2698
2729
  Selects the pods identified by a label_selector
@@ -2701,29 +2732,24 @@ class KrknKubernetes:
2701
2732
  :param namespace_pattern: a namespace pattern to match
2702
2733
  :param label_selector: a label selector string
2703
2734
  in the format "key=value"
2704
- :param max_timeout: the maximum time in seconds
2705
- to wait before considering the pod "not recovered" after the Chaos
2735
+ :param field_selector: filter results by config details
2736
+ select only running pods by setting "status.phase=Running"
2706
2737
  :return: a list of pod_name and namespace tuples
2707
2738
  """
2708
2739
  namespace_re = re.compile(namespace_pattern)
2709
- pods_and_namespaces = self.get_all_pods(label_selector)
2710
- pods_and_namespaces = [
2711
- pod for pod in pods_and_namespaces if namespace_re.match(pod[1])
2712
- ]
2713
-
2714
- # select only running pods
2740
+ pods_and_namespaces = self.get_all_pods(label_selector, field_selector)
2715
2741
  pods_and_namespaces = [
2716
2742
  (pod[0], pod[1])
2717
2743
  for pod in pods_and_namespaces
2718
- if not self.is_pod_terminating(pod[0], pod[1])
2744
+ if namespace_re.match(pod[1])
2719
2745
  ]
2720
-
2721
2746
  return pods_and_namespaces
2722
2747
 
2723
2748
  def monitor_pods_by_label(
2724
2749
  self,
2725
2750
  label_selector: str,
2726
2751
  pods_and_namespaces: list[(str, str)],
2752
+ field_selector: str = None,
2727
2753
  max_timeout: int = 30,
2728
2754
  event: threading.Event = None,
2729
2755
  ) -> PodsMonitorThread:
@@ -2741,6 +2767,8 @@ class KrknKubernetes:
2741
2767
  :param pods_and_namespaces: the list of pods collected
2742
2768
  by `select_pods_by_label` against which the changes
2743
2769
  in the pods state is monitored
2770
+ :param field_selector: filter results by config details
2771
+ select only running pods by setting "status.phase=Running"
2744
2772
  :param max_timeout: the expected time the pods should take
2745
2773
  to recover. If the killed pods are replaced in this time frame,
2746
2774
  but they didn't reach the Ready State, they will be marked as
@@ -2762,6 +2790,7 @@ class KrknKubernetes:
2762
2790
  max_timeout=max_timeout,
2763
2791
  pods_status=pods_status,
2764
2792
  label_selector=label_selector,
2793
+ field_selector=field_selector,
2765
2794
  event=event,
2766
2795
  )
2767
2796
 
@@ -2770,6 +2799,7 @@ class KrknKubernetes:
2770
2799
  pod_name_pattern: str,
2771
2800
  namespace_pattern: str,
2772
2801
  pods_and_namespaces: list[(str, str)],
2802
+ field_selector: str = None,
2773
2803
  max_timeout=30,
2774
2804
  event: threading.Event = None,
2775
2805
  ) -> PodsMonitorThread:
@@ -2793,6 +2823,8 @@ class KrknKubernetes:
2793
2823
  :param pods_and_namespaces: the list of pods collected by
2794
2824
  `select_pods_by_name_pattern_and_namespace_pattern` against
2795
2825
  which the changes in the pods state is monitored
2826
+ :param field_selector: filter results by config details
2827
+ select only running pods by setting "status.phase=Running"
2796
2828
  :param max_timeout: the expected time the pods should take to
2797
2829
  recover. If the killed pods are replaced in this time frame,
2798
2830
  but they didn't reach the Ready State, they will be marked as
@@ -2813,6 +2845,7 @@ class KrknKubernetes:
2813
2845
  pods_and_namespaces=pods_and_namespaces,
2814
2846
  max_timeout=max_timeout,
2815
2847
  pods_status=pods_status,
2848
+ field_selector=field_selector,
2816
2849
  name_pattern=pod_name_pattern,
2817
2850
  namespace_pattern=namespace_pattern,
2818
2851
  event=event,
@@ -2823,6 +2856,7 @@ class KrknKubernetes:
2823
2856
  namespace_pattern: str,
2824
2857
  label_selector: str,
2825
2858
  pods_and_namespaces: list[(str, str)],
2859
+ field_selector: str = None,
2826
2860
  max_timeout=30,
2827
2861
  event: threading.Event = None,
2828
2862
  ) -> PodsMonitorThread:
@@ -2845,6 +2879,8 @@ class KrknKubernetes:
2845
2879
  :param pods_and_namespaces: the list of pods collected by
2846
2880
  `select_pods_by_name_pattern_and_namespace_pattern` against
2847
2881
  which the changes in the pods state is monitored
2882
+ :param field_selector: filter results by config details
2883
+ select only running pods by setting "status.phase=Running"
2848
2884
  :param max_timeout: the expected time the pods should take to recover.
2849
2885
  If the killed pods are replaced in this time frame, but they
2850
2886
  didn't reach the Ready State, they will be marked as unrecovered.
@@ -2866,6 +2902,7 @@ class KrknKubernetes:
2866
2902
  max_timeout=max_timeout,
2867
2903
  pods_status=pods_status,
2868
2904
  label_selector=label_selector,
2905
+ field_selector=field_selector,
2869
2906
  namespace_pattern=namespace_pattern,
2870
2907
  event=event,
2871
2908
  )
@@ -2876,6 +2913,7 @@ class KrknKubernetes:
2876
2913
  pods_status: PodsStatus,
2877
2914
  max_timeout: int,
2878
2915
  label_selector: str = None,
2916
+ field_selector: str = None,
2879
2917
  pod_name: str = None,
2880
2918
  namespace_pattern: str = None,
2881
2919
  name_pattern: str = None,
@@ -2888,6 +2926,7 @@ class KrknKubernetes:
2888
2926
  pods_status=pods_status,
2889
2927
  max_timeout=max_timeout,
2890
2928
  label_selector=label_selector,
2929
+ field_selector=field_selector,
2891
2930
  pod_name=pod_name,
2892
2931
  namespace_pattern=namespace_pattern,
2893
2932
  name_pattern=name_pattern,
@@ -2902,6 +2941,7 @@ class KrknKubernetes:
2902
2941
  pods_status: PodsStatus,
2903
2942
  max_timeout: int,
2904
2943
  label_selector: str = None,
2944
+ field_selector: str = None,
2905
2945
  pod_name: str = None,
2906
2946
  namespace_pattern: str = None,
2907
2947
  name_pattern: str = None,
@@ -2921,6 +2961,7 @@ class KrknKubernetes:
2921
2961
  select_method = partial(
2922
2962
  self.select_pods_by_label,
2923
2963
  label_selector=label_selector,
2964
+ field_selector=field_selector,
2924
2965
  )
2925
2966
  elif (
2926
2967
  name_pattern
@@ -2932,6 +2973,7 @@ class KrknKubernetes:
2932
2973
  self.select_pods_by_name_pattern_and_namespace_pattern,
2933
2974
  pod_name_pattern=name_pattern,
2934
2975
  namespace_pattern=namespace_pattern,
2976
+ field_selector=field_selector,
2935
2977
  )
2936
2978
  elif (
2937
2979
  namespace_pattern
@@ -2943,6 +2985,7 @@ class KrknKubernetes:
2943
2985
  self.select_pods_by_namespace_pattern_and_label,
2944
2986
  namespace_pattern=namespace_pattern,
2945
2987
  label_selector=label_selector,
2988
+ field_selector=field_selector,
2946
2989
  )
2947
2990
  else:
2948
2991
  pods_status.error = (
@@ -2952,29 +2995,25 @@ class KrknKubernetes:
2952
2995
  return pods_status
2953
2996
 
2954
2997
  while time.time() - start_time <= max_timeout:
2955
- if event:
2956
- if event.is_set():
2957
- return pods_status
2998
+ if event and event.is_set():
2999
+ return pods_status
2958
3000
 
2959
3001
  time_offset = time.time() - start_time
2960
3002
  remaining_time = max_timeout - time_offset
2961
3003
  current_pods_and_namespaces = select_method()
2962
-
2963
3004
  # no pods have been killed or pods have been killed and
2964
3005
  # respawned with the same names
2965
3006
  if set(pods_and_namespaces) == set(current_pods_and_namespaces):
2966
3007
  for pod in current_pods_and_namespaces:
2967
- pod_info = self.get_pod_info(pod[0], pod[1])
2968
- if pod_info is not None:
3008
+
3009
+ pod_info = self.get_pod_info(pod[0], pod[1], delete_expected=True)
3010
+ # for pod_info in pod_list_info:
3011
+ if pod_info:
2969
3012
  pod_creation_timestamp = (
2970
3013
  pod_info.creation_timestamp.timestamp()
2971
3014
  )
2972
- else:
2973
- continue
2974
- if pod_info.status and start_time < pod_creation_timestamp:
2975
- # in this case the pods to wait have been respawn
2976
- # with the same name
2977
- missing_pods.add(pod)
3015
+ if start_time < pod_creation_timestamp:
3016
+ missing_pods.add(pod)
2978
3017
  pods_to_wait.update(missing_pods)
2979
3018
 
2980
3019
  # pods have been killed but respawned with different names
@@ -3004,9 +3043,7 @@ class KrknKubernetes:
3004
3043
  # inject the chaos, let's see the next iteration.
3005
3044
  if len(pods_to_wait) == 0:
3006
3045
  continue
3007
-
3008
3046
  futures = []
3009
-
3010
3047
  with ThreadPoolExecutor() as executor:
3011
3048
  for pod_and_namespace in pods_to_wait:
3012
3049
  if pod_and_namespace not in pods_already_watching:
@@ -3030,20 +3067,24 @@ class KrknKubernetes:
3030
3067
  # sum the time elapsed waiting before the pod
3031
3068
  # has been rescheduled (rescheduling time)
3032
3069
  # to the effective recovery time of the pod
3033
- result.pod_rescheduling_time = (
3034
- time.time() - start_time - result.pod_readiness_time
3035
- )
3036
- result.total_recovery_time = (
3037
- result.pod_readiness_time
3038
- + result.pod_rescheduling_time
3039
- )
3070
+ if result.pod_readiness_time:
3071
+ result.pod_rescheduling_time = (
3072
+ time.time()
3073
+ - start_time
3074
+ - result.pod_readiness_time
3075
+ )
3076
+ result.total_recovery_time = (
3077
+ result.pod_readiness_time
3078
+ + result.pod_rescheduling_time
3079
+ )
3040
3080
 
3041
- pods_status.recovered.append(result)
3081
+ pods_status.recovered.append(result)
3042
3082
  for future in undone:
3043
3083
  result = future.result()
3044
3084
  pods_status.unrecovered.append(result)
3045
3085
 
3046
3086
  missing_pods.clear()
3087
+
3047
3088
  # if there are missing pods, pods affected
3048
3089
  # by the chaos did not restart after the chaos
3049
3090
  # an exception will be set in the PodsStatus
@@ -3383,3 +3424,75 @@ class KrknKubernetes:
3383
3424
  resources.memory = json_obj["node"]["memory"]["availableBytes"]
3384
3425
  resources.disk_space = json_obj["node"]["fs"]["availableBytes"]
3385
3426
  return resources
3427
+
3428
+ def get_container_ids(self, pod_name: str, namespace: str) -> list[str]:
3429
+ """
3430
+ Gets the container ids of the selected pod
3431
+ :param pod_name: name of the pod
3432
+ :param namespace: namespace of the pod
3433
+
3434
+ :return: a list of container id
3435
+ """
3436
+
3437
+ container_ids: list[str] = []
3438
+
3439
+ pod = self.get_pod_info(pod_name, namespace)
3440
+ if pod:
3441
+ for container in pod.containers:
3442
+ container_ids.append(
3443
+ re.sub(r".*://", "", container.containerId)
3444
+ )
3445
+ return container_ids
3446
+
3447
+ def get_pod_pids(
3448
+ self,
3449
+ base_pod_name: str,
3450
+ base_pod_namespace: str,
3451
+ base_pod_container_name: str,
3452
+ pod_name: str,
3453
+ pod_namespace: str,
3454
+ pod_container_id: str,
3455
+ ) -> Optional[list[str]]:
3456
+ """
3457
+ Retrieves the PIDs assigned to the pod in the node. The command
3458
+ must be executed inside a privileged Pod with `hostPID` set to true
3459
+
3460
+ :param base_pod_name: name of the pod where the command is run
3461
+ :param base_pod_namespace: namespace of the pod
3462
+ where the command is run
3463
+ :param base_pod_container_name: container name of the pod
3464
+ where the command is run
3465
+ :param pod_name: Pod name associated with the PID
3466
+ :param pod_namespace: namespace of the Pod associated with the PID
3467
+ :param pod_container_id: container id of Pod associated with the PID
3468
+
3469
+ :return: list of pids None.
3470
+ """
3471
+
3472
+ if not self.check_if_pod_exists(base_pod_name, base_pod_namespace):
3473
+ raise Exception(
3474
+ f"base pod {base_pod_name} does not exist in "
3475
+ f"namespace {base_pod_namespace}"
3476
+ )
3477
+ if not self.check_if_pod_exists(pod_name, pod_namespace):
3478
+ raise Exception(
3479
+ f"target pod {pod_name} does not exist in "
3480
+ f"namespace {pod_namespace}"
3481
+ )
3482
+
3483
+ cmd = (
3484
+ "for dir in /proc/[0-9]*; do [ $(cat $dir/cgroup | grep %s) ] && "
3485
+ "echo ${dir/\/proc\//}; done" % pod_container_id # noqa
3486
+ )
3487
+
3488
+ pids = self.exec_cmd_in_pod(
3489
+ [cmd],
3490
+ base_pod_name,
3491
+ base_pod_namespace,
3492
+ base_pod_container_name,
3493
+ )
3494
+ if pids:
3495
+ pids_list = pids.split("\n")
3496
+ pids_list = list(filter(None, pids_list))
3497
+ return pids_list
3498
+ return None