krkn-lib 5.1.3__py3-none-any.whl → 5.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
krkn_lib/__init__.py ADDED
File without changes
@@ -7,8 +7,6 @@ import re
7
7
  import threading
8
8
  import time
9
9
  import warnings
10
- from concurrent.futures import ThreadPoolExecutor, wait
11
- from functools import partial
12
10
  from queue import Queue
13
11
  from typing import Any, Dict, List, Optional
14
12
  from urllib.parse import urlparse
@@ -27,13 +25,10 @@ from urllib3 import HTTPResponse
27
25
  from krkn_lib.models.k8s import (
28
26
  PVC,
29
27
  AffectedNode,
30
- AffectedPod,
31
28
  ApiRequestException,
32
29
  Container,
33
30
  NodeResources,
34
31
  Pod,
35
- PodsMonitorThread,
36
- PodsStatus,
37
32
  ServiceHijacking,
38
33
  Volume,
39
34
  VolumeMount,
@@ -1513,11 +1508,15 @@ class KrknKubernetes:
1513
1508
  logging.error("Error trying to apply_yaml" + str(e))
1514
1509
 
1515
1510
  def get_pod_info(
1516
- self, name: str, namespace: str = "default", delete_expected: bool = False
1511
+ self,
1512
+ name: str,
1513
+ namespace: str = "default",
1514
+ delete_expected: bool = False,
1517
1515
  ) -> Optional[Pod]:
1518
1516
  """
1519
1517
  Retrieve information about a specific pod
1520
1518
 
1519
+
1521
1520
  :param name: pod name
1522
1521
  :param namespace: namespace (optional default `default`)
1523
1522
  :return: Data class object of type Pod with the output of the above
@@ -1583,11 +1582,11 @@ class KrknKubernetes:
1583
1582
  creation_timestamp=response.metadata.creation_timestamp,
1584
1583
  )
1585
1584
  except Exception:
1586
- if not delete_expected:
1585
+ if not delete_expected:
1587
1586
  logging.error(
1588
1587
  "Pod '%s' doesn't exist in namespace '%s'", name, namespace
1589
1588
  )
1590
- else:
1589
+ else:
1591
1590
  logging.info(
1592
1591
  "Pod '%s' doesn't exist in namespace '%s'", name, namespace
1593
1592
  )
@@ -2745,374 +2744,6 @@ class KrknKubernetes:
2745
2744
  ]
2746
2745
  return pods_and_namespaces
2747
2746
 
2748
- def monitor_pods_by_label(
2749
- self,
2750
- label_selector: str,
2751
- pods_and_namespaces: list[(str, str)],
2752
- field_selector: str = None,
2753
- max_timeout: int = 30,
2754
- event: threading.Event = None,
2755
- ) -> PodsMonitorThread:
2756
- """
2757
- Starts monitoring a list of pods identified as tuples
2758
- (pod_name, namespace) filtered by label selector
2759
- and collects infos about the pods recovery after a kill scenario.
2760
- Returns a PodsMonitorThread that can be joined after the scenario
2761
- to retrieve the PodsStatus object containing all the information
2762
- collected in background during the chaos run.
2763
-
2764
- :param label_selector: the label selector used
2765
- to filter the pods to monitor (must be the
2766
- same used in `select_pods_by_label`)
2767
- :param pods_and_namespaces: the list of pods collected
2768
- by `select_pods_by_label` against which the changes
2769
- in the pods state is monitored
2770
- :param field_selector: filter results by config details
2771
- select only running pods by setting "status.phase=Running"
2772
- :param max_timeout: the expected time the pods should take
2773
- to recover. If the killed pods are replaced in this time frame,
2774
- but they didn't reach the Ready State, they will be marked as
2775
- unrecovered. If during the time frame the pods are not replaced
2776
- at all the error field of the PodsStatus structure will be
2777
- valorized with an exception.
2778
- :param event: a threading event can be passed to interrupt the process
2779
- before the timeout. Simply call set() method on the event passed
2780
- to make the thread return immediately
2781
- :return: a PodsMonitorThread structure that can be joined
2782
- in any place of the code, to collect the PodsStatus structure
2783
- returned, in order to make the process run in background
2784
- while a chaos scenario is performed.
2785
-
2786
- """
2787
- pods_status = PodsStatus()
2788
- return self.__start_monitoring_pods(
2789
- pods_and_namespaces=pods_and_namespaces,
2790
- max_timeout=max_timeout,
2791
- pods_status=pods_status,
2792
- label_selector=label_selector,
2793
- field_selector=field_selector,
2794
- event=event,
2795
- )
2796
-
2797
- def monitor_pods_by_name_pattern_and_namespace_pattern(
2798
- self,
2799
- pod_name_pattern: str,
2800
- namespace_pattern: str,
2801
- pods_and_namespaces: list[(str, str)],
2802
- field_selector: str = None,
2803
- max_timeout=30,
2804
- event: threading.Event = None,
2805
- ) -> PodsMonitorThread:
2806
- """
2807
- Starts monitoring a list of pods identified as tuples
2808
- (pod_name, namespace) filtered by a pod name regex pattern
2809
- and a namespace regex pattern, and collects infos about the
2810
- pods recovery after a kill scenario. Returns a PodsMonitorThread
2811
- that can be joined after the scenario to retrieve the PodsStatus
2812
- object containing all the information collected in background during
2813
- the chaos run.
2814
-
2815
- :param pod_name_pattern: a regex representing the
2816
- pod name pattern used to filter the pods to be monitored
2817
- (must be the same used in
2818
- `select_pods_by_name_pattern_and_namespace_pattern`)
2819
- :param namespace_pattern: a regex representing the namespace
2820
- pattern used to filter the pods to be monitored
2821
- (must be the same used in
2822
- `select_pods_by_name_pattern_and_namespace_pattern`)
2823
- :param pods_and_namespaces: the list of pods collected by
2824
- `select_pods_by_name_pattern_and_namespace_pattern` against
2825
- which the changes in the pods state is monitored
2826
- :param field_selector: filter results by config details
2827
- select only running pods by setting "status.phase=Running"
2828
- :param max_timeout: the expected time the pods should take to
2829
- recover. If the killed pods are replaced in this time frame,
2830
- but they didn't reach the Ready State, they will be marked as
2831
- unrecovered. If during the time frame the pods are not replaced
2832
- at all the error field of the PodsStatus structure will be
2833
- valorized with an exception.
2834
- :param event: a threading event can be passed to interrupt the process
2835
- before the timeout. Simply call set() method on the event passed
2836
- to make the thread return immediately
2837
- :return: a PodsMonitorThread structure that can be joined in any
2838
- place of the code, to collect the PodsStatus structure returned,
2839
- in order to make the process run in background while a chaos
2840
- scenario is performed.
2841
-
2842
- """
2843
- pods_status = PodsStatus()
2844
- return self.__start_monitoring_pods(
2845
- pods_and_namespaces=pods_and_namespaces,
2846
- max_timeout=max_timeout,
2847
- pods_status=pods_status,
2848
- field_selector=field_selector,
2849
- name_pattern=pod_name_pattern,
2850
- namespace_pattern=namespace_pattern,
2851
- event=event,
2852
- )
2853
-
2854
- def monitor_pods_by_namespace_pattern_and_label(
2855
- self,
2856
- namespace_pattern: str,
2857
- label_selector: str,
2858
- pods_and_namespaces: list[(str, str)],
2859
- field_selector: str = None,
2860
- max_timeout=30,
2861
- event: threading.Event = None,
2862
- ) -> PodsMonitorThread:
2863
- """
2864
- Starts monitoring a list of pods identified as tuples
2865
- (pod_name, namespace) filtered by a namespace regex pattern
2866
- and a pod label selector, and collects infos about the
2867
- pods recovery after a kill scenario. Returns a PodsMonitorThread
2868
- that can be joined after the scenario to retrieve the PodsStatus
2869
- object containing all the information collected in background during
2870
- the chaos run.
2871
-
2872
- :param label_selector: the label selector used to filter
2873
- the pods to monitor (must be the same used in
2874
- `select_pods_by_label`)
2875
- :param namespace_pattern: a regex representing the namespace
2876
- pattern used to filter the pods to be monitored (must be
2877
- the same used
2878
- in `select_pods_by_name_pattern_and_namespace_pattern`)
2879
- :param pods_and_namespaces: the list of pods collected by
2880
- `select_pods_by_name_pattern_and_namespace_pattern` against
2881
- which the changes in the pods state is monitored
2882
- :param field_selector: filter results by config details
2883
- select only running pods by setting "status.phase=Running"
2884
- :param max_timeout: the expected time the pods should take to recover.
2885
- If the killed pods are replaced in this time frame, but they
2886
- didn't reach the Ready State, they will be marked as unrecovered.
2887
- If during the time frame the pods are not replaced
2888
- at all the error field of the PodsStatus structure will be
2889
- valorized with an exception.
2890
- :param event: a threading event can be passed to interrupt the process
2891
- before the timeout. Simply call set() method on the event passed
2892
- to make the thread return immediately
2893
- :return: a PodsMonitorThread structure that can be joined in
2894
- any place of the code, to collect the PodsStatus structure
2895
- returned, in order to make the process run in background while
2896
- a chaos scenario is performed.
2897
-
2898
- """
2899
- pods_status = PodsStatus()
2900
- return self.__start_monitoring_pods(
2901
- pods_and_namespaces=pods_and_namespaces,
2902
- max_timeout=max_timeout,
2903
- pods_status=pods_status,
2904
- label_selector=label_selector,
2905
- field_selector=field_selector,
2906
- namespace_pattern=namespace_pattern,
2907
- event=event,
2908
- )
2909
-
2910
- def __start_monitoring_pods(
2911
- self,
2912
- pods_and_namespaces: list[(str, str)],
2913
- pods_status: PodsStatus,
2914
- max_timeout: int,
2915
- label_selector: str = None,
2916
- field_selector: str = None,
2917
- pod_name: str = None,
2918
- namespace_pattern: str = None,
2919
- name_pattern: str = None,
2920
- event: threading.Event = None,
2921
- ) -> PodsMonitorThread:
2922
- executor = ThreadPoolExecutor()
2923
- future = executor.submit(
2924
- self.__monitor_pods_worker,
2925
- pods_and_namespaces=pods_and_namespaces,
2926
- pods_status=pods_status,
2927
- max_timeout=max_timeout,
2928
- label_selector=label_selector,
2929
- field_selector=field_selector,
2930
- pod_name=pod_name,
2931
- namespace_pattern=namespace_pattern,
2932
- name_pattern=name_pattern,
2933
- event=event,
2934
- )
2935
-
2936
- return PodsMonitorThread(executor, future)
2937
-
2938
- def __monitor_pods_worker(
2939
- self,
2940
- pods_and_namespaces: [(str, str)],
2941
- pods_status: PodsStatus,
2942
- max_timeout: int,
2943
- label_selector: str = None,
2944
- field_selector: str = None,
2945
- pod_name: str = None,
2946
- namespace_pattern: str = None,
2947
- name_pattern: str = None,
2948
- event: threading.Event = None,
2949
- ) -> PodsStatus:
2950
- missing_pods = set()
2951
- pods_to_wait = set()
2952
- pods_already_watching = set()
2953
- start_time = time.time()
2954
- _event = threading.Event() if not event else event
2955
- if (
2956
- label_selector
2957
- and not pod_name
2958
- and not name_pattern
2959
- and not namespace_pattern
2960
- ):
2961
- select_method = partial(
2962
- self.select_pods_by_label,
2963
- label_selector=label_selector,
2964
- field_selector=field_selector,
2965
- )
2966
- elif (
2967
- name_pattern
2968
- and namespace_pattern
2969
- and not pod_name
2970
- and not label_selector
2971
- ):
2972
- select_method = partial(
2973
- self.select_pods_by_name_pattern_and_namespace_pattern,
2974
- pod_name_pattern=name_pattern,
2975
- namespace_pattern=namespace_pattern,
2976
- field_selector=field_selector,
2977
- )
2978
- elif (
2979
- namespace_pattern
2980
- and label_selector
2981
- and not pod_name
2982
- and not name_pattern
2983
- ):
2984
- select_method = partial(
2985
- self.select_pods_by_namespace_pattern_and_label,
2986
- namespace_pattern=namespace_pattern,
2987
- label_selector=label_selector,
2988
- field_selector=field_selector,
2989
- )
2990
- else:
2991
- pods_status.error = (
2992
- "invalid parameter combination, "
2993
- "check hasn't been performed, aborting."
2994
- )
2995
- return pods_status
2996
-
2997
- while time.time() - start_time <= max_timeout:
2998
- if event and event.is_set():
2999
- return pods_status
3000
-
3001
- time_offset = time.time() - start_time
3002
- remaining_time = max_timeout - time_offset
3003
- current_pods_and_namespaces = select_method()
3004
- # no pods have been killed or pods have been killed and
3005
- # respawned with the same names
3006
- if set(pods_and_namespaces) == set(current_pods_and_namespaces):
3007
- for pod in current_pods_and_namespaces:
3008
-
3009
- pod_info = self.get_pod_info(pod[0], pod[1], delete_expected=True)
3010
- # for pod_info in pod_list_info:
3011
- if pod_info:
3012
- pod_creation_timestamp = (
3013
- pod_info.creation_timestamp.timestamp()
3014
- )
3015
- if start_time < pod_creation_timestamp:
3016
- missing_pods.add(pod)
3017
- pods_to_wait.update(missing_pods)
3018
-
3019
- # pods have been killed but respawned with different names
3020
- elif set(pods_and_namespaces) != set(
3021
- current_pods_and_namespaces
3022
- ) and len(pods_and_namespaces) <= len(current_pods_and_namespaces):
3023
- # in this case the pods to wait have been respawn
3024
- # with different names
3025
- pods_to_wait.update(
3026
- set(current_pods_and_namespaces) - set(pods_and_namespaces)
3027
- )
3028
-
3029
- # pods have been killed and are not
3030
- # respawned yet (missing pods names
3031
- # are collected
3032
- elif set(pods_and_namespaces) != set(
3033
- current_pods_and_namespaces
3034
- ) and len(pods_and_namespaces) > len(current_pods_and_namespaces):
3035
- # update on missing_pods set is idempotent since the tuple
3036
- # pod_name,namespace is unique in the cluster
3037
- missing_pods.update(
3038
- set(pods_and_namespaces) - set(current_pods_and_namespaces)
3039
- )
3040
- continue
3041
- # no change has been made in the pod set,
3042
- # maybe is taking some time to
3043
- # inject the chaos, let's see the next iteration.
3044
- if len(pods_to_wait) == 0:
3045
- continue
3046
- futures = []
3047
- with ThreadPoolExecutor() as executor:
3048
- for pod_and_namespace in pods_to_wait:
3049
- if pod_and_namespace not in pods_already_watching:
3050
-
3051
- # need name of new pod
3052
- future = executor.submit(
3053
- self.__wait_until_pod_is_ready_worker,
3054
- pod_name=pod_and_namespace[0],
3055
- namespace=pod_and_namespace[1],
3056
- event=_event,
3057
- )
3058
- futures.append(future)
3059
- pods_already_watching.add(pod_and_namespace)
3060
-
3061
- # this will wait all the futures to
3062
- # finish within the remaining time
3063
- done, undone = wait(futures, timeout=remaining_time)
3064
- _event.set()
3065
- for future in done:
3066
- result = future.result()
3067
- # sum the time elapsed waiting before the pod
3068
- # has been rescheduled (rescheduling time)
3069
- # to the effective recovery time of the pod
3070
- if result.pod_readiness_time:
3071
- result.pod_rescheduling_time = (
3072
- time.time()
3073
- - start_time
3074
- - result.pod_readiness_time
3075
- )
3076
- result.total_recovery_time = (
3077
- result.pod_readiness_time
3078
- + result.pod_rescheduling_time
3079
- )
3080
-
3081
- pods_status.recovered.append(result)
3082
- for future in undone:
3083
- result = future.result()
3084
- pods_status.unrecovered.append(result)
3085
-
3086
- missing_pods.clear()
3087
-
3088
- # if there are missing pods, pods affected
3089
- # by the chaos did not restart after the chaos
3090
- # an exception will be set in the PodsStatus
3091
- # structure that will be catched at the end of
3092
- # the monitoring,
3093
- if len(missing_pods) > 0:
3094
- if not _event.is_set():
3095
- pods_status.error = f'{", ".join([f"pod: {p[0]} namespace:{p[1]}" for p in missing_pods])}' # NOQA
3096
-
3097
- return pods_status
3098
-
3099
- def __wait_until_pod_is_ready_worker(
3100
- self, pod_name: str, namespace: str, event: threading.Event
3101
- ) -> AffectedPod:
3102
- start_time = time.time()
3103
- ready = False
3104
-
3105
- while not ready and not event.is_set():
3106
- ready = self.is_pod_running(pod_name, namespace)
3107
- end_time = time.time()
3108
- pod = AffectedPod(
3109
- pod_name=pod_name,
3110
- namespace=namespace,
3111
- )
3112
- if not event.is_set():
3113
- pod.pod_readiness_time = end_time - start_time
3114
- return pod
3115
-
3116
2747
  def replace_service_selector(
3117
2748
  self, new_selectors: list[str], service_name: str, namespace: str
3118
2749
  ) -> Optional[dict[Any]]:
@@ -3217,7 +2848,7 @@ class KrknKubernetes:
3217
2848
  port_number: int = 5000,
3218
2849
  port_name: str = "flask",
3219
2850
  stats_route: str = "/stats",
3220
- privileged: bool = True
2851
+ privileged: bool = True,
3221
2852
  ) -> ServiceHijacking:
3222
2853
  """
3223
2854
  Deploys a pod running the service-hijacking webservice
@@ -3271,7 +2902,7 @@ class KrknKubernetes:
3271
2902
  config_map_name=config_map_name,
3272
2903
  port_number=port_number,
3273
2904
  stats_route=stats_route,
3274
- privileged=privileged
2905
+ privileged=privileged,
3275
2906
  )
3276
2907
  )
3277
2908
 
@@ -3483,8 +3114,9 @@ class KrknKubernetes:
3483
3114
  )
3484
3115
 
3485
3116
  cmd = (
3486
- "for dir in /proc/[0-9]*; do [ $(cat $dir/cgroup | grep %s) ] && "
3487
- "echo ${dir/\/proc\//}; done" % pod_container_id # noqa
3117
+ f"for dir in /proc/[0-9]*; do grep -q {pod_container_id} "
3118
+ f"$dir/cgroup 2>/dev/null "
3119
+ "&& echo ${dir/\/proc\//}; done" # NOQA
3488
3120
  )
3489
3121
 
3490
3122
  pids = self.exec_cmd_in_pod(
@@ -0,0 +1,12 @@
1
+ from .pod_monitor import (
2
+ select_and_monitor_by_label,
3
+ select_and_monitor_by_namespace_pattern_and_label,
4
+ select_and_monitor_by_name_pattern_and_namespace_pattern,
5
+ )
6
+
7
+
8
+ __all__ = [
9
+ "select_and_monitor_by_label",
10
+ "select_and_monitor_by_namespace_pattern_and_label",
11
+ "select_and_monitor_by_name_pattern_and_namespace_pattern",
12
+ ]