krkn-lib 5.1.4__tar.gz → 5.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/PKG-INFO +1 -2
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/README.md +0 -1
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/pyproject.toml +1 -1
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/k8s/krkn_kubernetes.py +12 -380
- krkn_lib-5.1.6/src/krkn_lib/k8s/pod_monitor/__init__.py +12 -0
- krkn_lib-5.1.6/src/krkn_lib/k8s/pod_monitor/pod_monitor.py +304 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/models/elastic/models.py +5 -2
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/models/k8s/models.py +1 -25
- krkn_lib-5.1.6/src/krkn_lib/models/pod_monitor/models.py +224 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/models/telemetry/models.py +6 -4
- krkn_lib-5.1.6/src/krkn_lib/prometheus/__init__.py +0 -0
- krkn_lib-5.1.6/src/krkn_lib/telemetry/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/base_test.py +32 -31
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_krkn_elastic_models.py +5 -4
- krkn_lib-5.1.6/src/krkn_lib/tests/test_krkn_kubernetes_pods_monitor.py +513 -0
- krkn_lib-5.1.6/src/krkn_lib/tests/test_krkn_kubernetes_pods_monitor_models.py +405 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_utils.py +12 -8
- krkn_lib-5.1.4/src/krkn_lib/k8s/pods_monitor_pool.py +0 -202
- krkn_lib-5.1.4/src/krkn_lib/tests/test_krkn_kubernetes_monitor.py +0 -367
- krkn_lib-5.1.4/src/krkn_lib/tests/test_krkn_kubernetes_pods_monitor_pool.py +0 -128
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/LICENSE +0 -0
- {krkn_lib-5.1.4/src/krkn_lib/models → krkn_lib-5.1.6/src/krkn_lib}/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/aws_tests/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/aws_tests/test_krkn_telemetry_kubernetes.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/aws_tests/test_krkn_telemetry_openshift.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/elastic/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/elastic/krkn_elastic.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/k8s/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/k8s/templates/hog_pod.j2 +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/k8s/templates/node_exec_pod.j2 +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/k8s/templates/service_hijacking_config_map.j2 +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/k8s/templates/service_hijacking_pod.j2 +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/k8s/templates/syn_flood_pod.j2 +0 -0
- {krkn_lib-5.1.4/src/krkn_lib/prometheus → krkn_lib-5.1.6/src/krkn_lib/models}/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/models/elastic/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/models/k8s/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/models/krkn/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/models/krkn/models.py +0 -0
- {krkn_lib-5.1.4/src/krkn_lib/telemetry → krkn_lib-5.1.6/src/krkn_lib/models/pod_monitor}/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/models/telemetry/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/ocp/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/ocp/krkn_openshift.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/prometheus/krkn_prometheus.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/telemetry/k8s/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/telemetry/k8s/krkn_telemetry_kubernetes.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/telemetry/ocp/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/telemetry/ocp/krkn_telemetry_openshift.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_krkn_elastic.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_krkn_kubernetes_check.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_krkn_kubernetes_create.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_krkn_kubernetes_delete.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_krkn_kubernetes_exec.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_krkn_kubernetes_get.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_krkn_kubernetes_list.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_krkn_kubernetes_misc.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_krkn_kubernetes_models.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_krkn_openshift.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_krkn_prometheus.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_krkn_telemetry_models.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/tests/test_version.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/utils/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/utils/functions.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/utils/safe_logger.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/version/__init__.py +0 -0
- {krkn_lib-5.1.4 → krkn_lib-5.1.6}/src/krkn_lib/version/version.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: krkn-lib
|
|
3
|
-
Version: 5.1.
|
|
3
|
+
Version: 5.1.6
|
|
4
4
|
Summary: Foundation library for Kraken
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Red Hat Chaos Team
|
|
@@ -65,4 +65,3 @@ of the [reStructuredText Docstring Format](https://peps.python.org/pep-0287/) co
|
|
|
65
65
|
|
|
66
66
|
|
|
67
67
|
|
|
68
|
-
|
|
@@ -7,8 +7,6 @@ import re
|
|
|
7
7
|
import threading
|
|
8
8
|
import time
|
|
9
9
|
import warnings
|
|
10
|
-
from concurrent.futures import ThreadPoolExecutor, wait
|
|
11
|
-
from functools import partial
|
|
12
10
|
from queue import Queue
|
|
13
11
|
from typing import Any, Dict, List, Optional
|
|
14
12
|
from urllib.parse import urlparse
|
|
@@ -27,13 +25,10 @@ from urllib3 import HTTPResponse
|
|
|
27
25
|
from krkn_lib.models.k8s import (
|
|
28
26
|
PVC,
|
|
29
27
|
AffectedNode,
|
|
30
|
-
AffectedPod,
|
|
31
28
|
ApiRequestException,
|
|
32
29
|
Container,
|
|
33
30
|
NodeResources,
|
|
34
31
|
Pod,
|
|
35
|
-
PodsMonitorThread,
|
|
36
|
-
PodsStatus,
|
|
37
32
|
ServiceHijacking,
|
|
38
33
|
Volume,
|
|
39
34
|
VolumeMount,
|
|
@@ -1513,11 +1508,15 @@ class KrknKubernetes:
|
|
|
1513
1508
|
logging.error("Error trying to apply_yaml" + str(e))
|
|
1514
1509
|
|
|
1515
1510
|
def get_pod_info(
|
|
1516
|
-
self,
|
|
1511
|
+
self,
|
|
1512
|
+
name: str,
|
|
1513
|
+
namespace: str = "default",
|
|
1514
|
+
delete_expected: bool = False,
|
|
1517
1515
|
) -> Optional[Pod]:
|
|
1518
1516
|
"""
|
|
1519
1517
|
Retrieve information about a specific pod
|
|
1520
1518
|
|
|
1519
|
+
|
|
1521
1520
|
:param name: pod name
|
|
1522
1521
|
:param namespace: namespace (optional default `default`)
|
|
1523
1522
|
:return: Data class object of type Pod with the output of the above
|
|
@@ -1583,11 +1582,11 @@ class KrknKubernetes:
|
|
|
1583
1582
|
creation_timestamp=response.metadata.creation_timestamp,
|
|
1584
1583
|
)
|
|
1585
1584
|
except Exception:
|
|
1586
|
-
if not delete_expected:
|
|
1585
|
+
if not delete_expected:
|
|
1587
1586
|
logging.error(
|
|
1588
1587
|
"Pod '%s' doesn't exist in namespace '%s'", name, namespace
|
|
1589
1588
|
)
|
|
1590
|
-
else:
|
|
1589
|
+
else:
|
|
1591
1590
|
logging.info(
|
|
1592
1591
|
"Pod '%s' doesn't exist in namespace '%s'", name, namespace
|
|
1593
1592
|
)
|
|
@@ -2745,374 +2744,6 @@ class KrknKubernetes:
|
|
|
2745
2744
|
]
|
|
2746
2745
|
return pods_and_namespaces
|
|
2747
2746
|
|
|
2748
|
-
def monitor_pods_by_label(
|
|
2749
|
-
self,
|
|
2750
|
-
label_selector: str,
|
|
2751
|
-
pods_and_namespaces: list[(str, str)],
|
|
2752
|
-
field_selector: str = None,
|
|
2753
|
-
max_timeout: int = 30,
|
|
2754
|
-
event: threading.Event = None,
|
|
2755
|
-
) -> PodsMonitorThread:
|
|
2756
|
-
"""
|
|
2757
|
-
Starts monitoring a list of pods identified as tuples
|
|
2758
|
-
(pod_name, namespace) filtered by label selector
|
|
2759
|
-
and collects infos about the pods recovery after a kill scenario.
|
|
2760
|
-
Returns a PodsMonitorThread that can be joined after the scenario
|
|
2761
|
-
to retrieve the PodsStatus object containing all the information
|
|
2762
|
-
collected in background during the chaos run.
|
|
2763
|
-
|
|
2764
|
-
:param label_selector: the label selector used
|
|
2765
|
-
to filter the pods to monitor (must be the
|
|
2766
|
-
same used in `select_pods_by_label`)
|
|
2767
|
-
:param pods_and_namespaces: the list of pods collected
|
|
2768
|
-
by `select_pods_by_label` against which the changes
|
|
2769
|
-
in the pods state is monitored
|
|
2770
|
-
:param field_selector: filter results by config details
|
|
2771
|
-
select only running pods by setting "status.phase=Running"
|
|
2772
|
-
:param max_timeout: the expected time the pods should take
|
|
2773
|
-
to recover. If the killed pods are replaced in this time frame,
|
|
2774
|
-
but they didn't reach the Ready State, they will be marked as
|
|
2775
|
-
unrecovered. If during the time frame the pods are not replaced
|
|
2776
|
-
at all the error field of the PodsStatus structure will be
|
|
2777
|
-
valorized with an exception.
|
|
2778
|
-
:param event: a threading event can be passed to interrupt the process
|
|
2779
|
-
before the timeout. Simply call set() method on the event passed
|
|
2780
|
-
to make the thread return immediately
|
|
2781
|
-
:return: a PodsMonitorThread structure that can be joined
|
|
2782
|
-
in any place of the code, to collect the PodsStatus structure
|
|
2783
|
-
returned, in order to make the process run in background
|
|
2784
|
-
while a chaos scenario is performed.
|
|
2785
|
-
|
|
2786
|
-
"""
|
|
2787
|
-
pods_status = PodsStatus()
|
|
2788
|
-
return self.__start_monitoring_pods(
|
|
2789
|
-
pods_and_namespaces=pods_and_namespaces,
|
|
2790
|
-
max_timeout=max_timeout,
|
|
2791
|
-
pods_status=pods_status,
|
|
2792
|
-
label_selector=label_selector,
|
|
2793
|
-
field_selector=field_selector,
|
|
2794
|
-
event=event,
|
|
2795
|
-
)
|
|
2796
|
-
|
|
2797
|
-
def monitor_pods_by_name_pattern_and_namespace_pattern(
|
|
2798
|
-
self,
|
|
2799
|
-
pod_name_pattern: str,
|
|
2800
|
-
namespace_pattern: str,
|
|
2801
|
-
pods_and_namespaces: list[(str, str)],
|
|
2802
|
-
field_selector: str = None,
|
|
2803
|
-
max_timeout=30,
|
|
2804
|
-
event: threading.Event = None,
|
|
2805
|
-
) -> PodsMonitorThread:
|
|
2806
|
-
"""
|
|
2807
|
-
Starts monitoring a list of pods identified as tuples
|
|
2808
|
-
(pod_name, namespace) filtered by a pod name regex pattern
|
|
2809
|
-
and a namespace regex pattern, and collects infos about the
|
|
2810
|
-
pods recovery after a kill scenario. Returns a PodsMonitorThread
|
|
2811
|
-
that can be joined after the scenario to retrieve the PodsStatus
|
|
2812
|
-
object containing all the information collected in background during
|
|
2813
|
-
the chaos run.
|
|
2814
|
-
|
|
2815
|
-
:param pod_name_pattern: a regex representing the
|
|
2816
|
-
pod name pattern used to filter the pods to be monitored
|
|
2817
|
-
(must be the same used in
|
|
2818
|
-
`select_pods_by_name_pattern_and_namespace_pattern`)
|
|
2819
|
-
:param namespace_pattern: a regex representing the namespace
|
|
2820
|
-
pattern used to filter the pods to be monitored
|
|
2821
|
-
(must be the same used in
|
|
2822
|
-
`select_pods_by_name_pattern_and_namespace_pattern`)
|
|
2823
|
-
:param pods_and_namespaces: the list of pods collected by
|
|
2824
|
-
`select_pods_by_name_pattern_and_namespace_pattern` against
|
|
2825
|
-
which the changes in the pods state is monitored
|
|
2826
|
-
:param field_selector: filter results by config details
|
|
2827
|
-
select only running pods by setting "status.phase=Running"
|
|
2828
|
-
:param max_timeout: the expected time the pods should take to
|
|
2829
|
-
recover. If the killed pods are replaced in this time frame,
|
|
2830
|
-
but they didn't reach the Ready State, they will be marked as
|
|
2831
|
-
unrecovered. If during the time frame the pods are not replaced
|
|
2832
|
-
at all the error field of the PodsStatus structure will be
|
|
2833
|
-
valorized with an exception.
|
|
2834
|
-
:param event: a threading event can be passed to interrupt the process
|
|
2835
|
-
before the timeout. Simply call set() method on the event passed
|
|
2836
|
-
to make the thread return immediately
|
|
2837
|
-
:return: a PodsMonitorThread structure that can be joined in any
|
|
2838
|
-
place of the code, to collect the PodsStatus structure returned,
|
|
2839
|
-
in order to make the process run in background while a chaos
|
|
2840
|
-
scenario is performed.
|
|
2841
|
-
|
|
2842
|
-
"""
|
|
2843
|
-
pods_status = PodsStatus()
|
|
2844
|
-
return self.__start_monitoring_pods(
|
|
2845
|
-
pods_and_namespaces=pods_and_namespaces,
|
|
2846
|
-
max_timeout=max_timeout,
|
|
2847
|
-
pods_status=pods_status,
|
|
2848
|
-
field_selector=field_selector,
|
|
2849
|
-
name_pattern=pod_name_pattern,
|
|
2850
|
-
namespace_pattern=namespace_pattern,
|
|
2851
|
-
event=event,
|
|
2852
|
-
)
|
|
2853
|
-
|
|
2854
|
-
def monitor_pods_by_namespace_pattern_and_label(
|
|
2855
|
-
self,
|
|
2856
|
-
namespace_pattern: str,
|
|
2857
|
-
label_selector: str,
|
|
2858
|
-
pods_and_namespaces: list[(str, str)],
|
|
2859
|
-
field_selector: str = None,
|
|
2860
|
-
max_timeout=30,
|
|
2861
|
-
event: threading.Event = None,
|
|
2862
|
-
) -> PodsMonitorThread:
|
|
2863
|
-
"""
|
|
2864
|
-
Starts monitoring a list of pods identified as tuples
|
|
2865
|
-
(pod_name, namespace) filtered by a namespace regex pattern
|
|
2866
|
-
and a pod label selector, and collects infos about the
|
|
2867
|
-
pods recovery after a kill scenario. Returns a PodsMonitorThread
|
|
2868
|
-
that can be joined after the scenario to retrieve the PodsStatus
|
|
2869
|
-
object containing all the information collected in background during
|
|
2870
|
-
the chaos run.
|
|
2871
|
-
|
|
2872
|
-
:param label_selector: the label selector used to filter
|
|
2873
|
-
the pods to monitor (must be the same used in
|
|
2874
|
-
`select_pods_by_label`)
|
|
2875
|
-
:param namespace_pattern: a regex representing the namespace
|
|
2876
|
-
pattern used to filter the pods to be monitored (must be
|
|
2877
|
-
the same used
|
|
2878
|
-
in `select_pods_by_name_pattern_and_namespace_pattern`)
|
|
2879
|
-
:param pods_and_namespaces: the list of pods collected by
|
|
2880
|
-
`select_pods_by_name_pattern_and_namespace_pattern` against
|
|
2881
|
-
which the changes in the pods state is monitored
|
|
2882
|
-
:param field_selector: filter results by config details
|
|
2883
|
-
select only running pods by setting "status.phase=Running"
|
|
2884
|
-
:param max_timeout: the expected time the pods should take to recover.
|
|
2885
|
-
If the killed pods are replaced in this time frame, but they
|
|
2886
|
-
didn't reach the Ready State, they will be marked as unrecovered.
|
|
2887
|
-
If during the time frame the pods are not replaced
|
|
2888
|
-
at all the error field of the PodsStatus structure will be
|
|
2889
|
-
valorized with an exception.
|
|
2890
|
-
:param event: a threading event can be passed to interrupt the process
|
|
2891
|
-
before the timeout. Simply call set() method on the event passed
|
|
2892
|
-
to make the thread return immediately
|
|
2893
|
-
:return: a PodsMonitorThread structure that can be joined in
|
|
2894
|
-
any place of the code, to collect the PodsStatus structure
|
|
2895
|
-
returned, in order to make the process run in background while
|
|
2896
|
-
a chaos scenario is performed.
|
|
2897
|
-
|
|
2898
|
-
"""
|
|
2899
|
-
pods_status = PodsStatus()
|
|
2900
|
-
return self.__start_monitoring_pods(
|
|
2901
|
-
pods_and_namespaces=pods_and_namespaces,
|
|
2902
|
-
max_timeout=max_timeout,
|
|
2903
|
-
pods_status=pods_status,
|
|
2904
|
-
label_selector=label_selector,
|
|
2905
|
-
field_selector=field_selector,
|
|
2906
|
-
namespace_pattern=namespace_pattern,
|
|
2907
|
-
event=event,
|
|
2908
|
-
)
|
|
2909
|
-
|
|
2910
|
-
def __start_monitoring_pods(
|
|
2911
|
-
self,
|
|
2912
|
-
pods_and_namespaces: list[(str, str)],
|
|
2913
|
-
pods_status: PodsStatus,
|
|
2914
|
-
max_timeout: int,
|
|
2915
|
-
label_selector: str = None,
|
|
2916
|
-
field_selector: str = None,
|
|
2917
|
-
pod_name: str = None,
|
|
2918
|
-
namespace_pattern: str = None,
|
|
2919
|
-
name_pattern: str = None,
|
|
2920
|
-
event: threading.Event = None,
|
|
2921
|
-
) -> PodsMonitorThread:
|
|
2922
|
-
executor = ThreadPoolExecutor()
|
|
2923
|
-
future = executor.submit(
|
|
2924
|
-
self.__monitor_pods_worker,
|
|
2925
|
-
pods_and_namespaces=pods_and_namespaces,
|
|
2926
|
-
pods_status=pods_status,
|
|
2927
|
-
max_timeout=max_timeout,
|
|
2928
|
-
label_selector=label_selector,
|
|
2929
|
-
field_selector=field_selector,
|
|
2930
|
-
pod_name=pod_name,
|
|
2931
|
-
namespace_pattern=namespace_pattern,
|
|
2932
|
-
name_pattern=name_pattern,
|
|
2933
|
-
event=event,
|
|
2934
|
-
)
|
|
2935
|
-
|
|
2936
|
-
return PodsMonitorThread(executor, future)
|
|
2937
|
-
|
|
2938
|
-
def __monitor_pods_worker(
|
|
2939
|
-
self,
|
|
2940
|
-
pods_and_namespaces: [(str, str)],
|
|
2941
|
-
pods_status: PodsStatus,
|
|
2942
|
-
max_timeout: int,
|
|
2943
|
-
label_selector: str = None,
|
|
2944
|
-
field_selector: str = None,
|
|
2945
|
-
pod_name: str = None,
|
|
2946
|
-
namespace_pattern: str = None,
|
|
2947
|
-
name_pattern: str = None,
|
|
2948
|
-
event: threading.Event = None,
|
|
2949
|
-
) -> PodsStatus:
|
|
2950
|
-
missing_pods = set()
|
|
2951
|
-
pods_to_wait = set()
|
|
2952
|
-
pods_already_watching = set()
|
|
2953
|
-
start_time = time.time()
|
|
2954
|
-
_event = threading.Event() if not event else event
|
|
2955
|
-
if (
|
|
2956
|
-
label_selector
|
|
2957
|
-
and not pod_name
|
|
2958
|
-
and not name_pattern
|
|
2959
|
-
and not namespace_pattern
|
|
2960
|
-
):
|
|
2961
|
-
select_method = partial(
|
|
2962
|
-
self.select_pods_by_label,
|
|
2963
|
-
label_selector=label_selector,
|
|
2964
|
-
field_selector=field_selector,
|
|
2965
|
-
)
|
|
2966
|
-
elif (
|
|
2967
|
-
name_pattern
|
|
2968
|
-
and namespace_pattern
|
|
2969
|
-
and not pod_name
|
|
2970
|
-
and not label_selector
|
|
2971
|
-
):
|
|
2972
|
-
select_method = partial(
|
|
2973
|
-
self.select_pods_by_name_pattern_and_namespace_pattern,
|
|
2974
|
-
pod_name_pattern=name_pattern,
|
|
2975
|
-
namespace_pattern=namespace_pattern,
|
|
2976
|
-
field_selector=field_selector,
|
|
2977
|
-
)
|
|
2978
|
-
elif (
|
|
2979
|
-
namespace_pattern
|
|
2980
|
-
and label_selector
|
|
2981
|
-
and not pod_name
|
|
2982
|
-
and not name_pattern
|
|
2983
|
-
):
|
|
2984
|
-
select_method = partial(
|
|
2985
|
-
self.select_pods_by_namespace_pattern_and_label,
|
|
2986
|
-
namespace_pattern=namespace_pattern,
|
|
2987
|
-
label_selector=label_selector,
|
|
2988
|
-
field_selector=field_selector,
|
|
2989
|
-
)
|
|
2990
|
-
else:
|
|
2991
|
-
pods_status.error = (
|
|
2992
|
-
"invalid parameter combination, "
|
|
2993
|
-
"check hasn't been performed, aborting."
|
|
2994
|
-
)
|
|
2995
|
-
return pods_status
|
|
2996
|
-
|
|
2997
|
-
while time.time() - start_time <= max_timeout:
|
|
2998
|
-
if event and event.is_set():
|
|
2999
|
-
return pods_status
|
|
3000
|
-
|
|
3001
|
-
time_offset = time.time() - start_time
|
|
3002
|
-
remaining_time = max_timeout - time_offset
|
|
3003
|
-
current_pods_and_namespaces = select_method()
|
|
3004
|
-
# no pods have been killed or pods have been killed and
|
|
3005
|
-
# respawned with the same names
|
|
3006
|
-
if set(pods_and_namespaces) == set(current_pods_and_namespaces):
|
|
3007
|
-
for pod in current_pods_and_namespaces:
|
|
3008
|
-
|
|
3009
|
-
pod_info = self.get_pod_info(pod[0], pod[1], delete_expected=True)
|
|
3010
|
-
# for pod_info in pod_list_info:
|
|
3011
|
-
if pod_info:
|
|
3012
|
-
pod_creation_timestamp = (
|
|
3013
|
-
pod_info.creation_timestamp.timestamp()
|
|
3014
|
-
)
|
|
3015
|
-
if start_time < pod_creation_timestamp:
|
|
3016
|
-
missing_pods.add(pod)
|
|
3017
|
-
pods_to_wait.update(missing_pods)
|
|
3018
|
-
|
|
3019
|
-
# pods have been killed but respawned with different names
|
|
3020
|
-
elif set(pods_and_namespaces) != set(
|
|
3021
|
-
current_pods_and_namespaces
|
|
3022
|
-
) and len(pods_and_namespaces) <= len(current_pods_and_namespaces):
|
|
3023
|
-
# in this case the pods to wait have been respawn
|
|
3024
|
-
# with different names
|
|
3025
|
-
pods_to_wait.update(
|
|
3026
|
-
set(current_pods_and_namespaces) - set(pods_and_namespaces)
|
|
3027
|
-
)
|
|
3028
|
-
|
|
3029
|
-
# pods have been killed and are not
|
|
3030
|
-
# respawned yet (missing pods names
|
|
3031
|
-
# are collected
|
|
3032
|
-
elif set(pods_and_namespaces) != set(
|
|
3033
|
-
current_pods_and_namespaces
|
|
3034
|
-
) and len(pods_and_namespaces) > len(current_pods_and_namespaces):
|
|
3035
|
-
# update on missing_pods set is idempotent since the tuple
|
|
3036
|
-
# pod_name,namespace is unique in the cluster
|
|
3037
|
-
missing_pods.update(
|
|
3038
|
-
set(pods_and_namespaces) - set(current_pods_and_namespaces)
|
|
3039
|
-
)
|
|
3040
|
-
continue
|
|
3041
|
-
# no change has been made in the pod set,
|
|
3042
|
-
# maybe is taking some time to
|
|
3043
|
-
# inject the chaos, let's see the next iteration.
|
|
3044
|
-
if len(pods_to_wait) == 0:
|
|
3045
|
-
continue
|
|
3046
|
-
futures = []
|
|
3047
|
-
with ThreadPoolExecutor() as executor:
|
|
3048
|
-
for pod_and_namespace in pods_to_wait:
|
|
3049
|
-
if pod_and_namespace not in pods_already_watching:
|
|
3050
|
-
|
|
3051
|
-
# need name of new pod
|
|
3052
|
-
future = executor.submit(
|
|
3053
|
-
self.__wait_until_pod_is_ready_worker,
|
|
3054
|
-
pod_name=pod_and_namespace[0],
|
|
3055
|
-
namespace=pod_and_namespace[1],
|
|
3056
|
-
event=_event,
|
|
3057
|
-
)
|
|
3058
|
-
futures.append(future)
|
|
3059
|
-
pods_already_watching.add(pod_and_namespace)
|
|
3060
|
-
|
|
3061
|
-
# this will wait all the futures to
|
|
3062
|
-
# finish within the remaining time
|
|
3063
|
-
done, undone = wait(futures, timeout=remaining_time)
|
|
3064
|
-
_event.set()
|
|
3065
|
-
for future in done:
|
|
3066
|
-
result = future.result()
|
|
3067
|
-
# sum the time elapsed waiting before the pod
|
|
3068
|
-
# has been rescheduled (rescheduling time)
|
|
3069
|
-
# to the effective recovery time of the pod
|
|
3070
|
-
if result.pod_readiness_time:
|
|
3071
|
-
result.pod_rescheduling_time = (
|
|
3072
|
-
time.time()
|
|
3073
|
-
- start_time
|
|
3074
|
-
- result.pod_readiness_time
|
|
3075
|
-
)
|
|
3076
|
-
result.total_recovery_time = (
|
|
3077
|
-
result.pod_readiness_time
|
|
3078
|
-
+ result.pod_rescheduling_time
|
|
3079
|
-
)
|
|
3080
|
-
|
|
3081
|
-
pods_status.recovered.append(result)
|
|
3082
|
-
for future in undone:
|
|
3083
|
-
result = future.result()
|
|
3084
|
-
pods_status.unrecovered.append(result)
|
|
3085
|
-
|
|
3086
|
-
missing_pods.clear()
|
|
3087
|
-
|
|
3088
|
-
# if there are missing pods, pods affected
|
|
3089
|
-
# by the chaos did not restart after the chaos
|
|
3090
|
-
# an exception will be set in the PodsStatus
|
|
3091
|
-
# structure that will be catched at the end of
|
|
3092
|
-
# the monitoring,
|
|
3093
|
-
if len(missing_pods) > 0:
|
|
3094
|
-
if not _event.is_set():
|
|
3095
|
-
pods_status.error = f'{", ".join([f"pod: {p[0]} namespace:{p[1]}" for p in missing_pods])}' # NOQA
|
|
3096
|
-
|
|
3097
|
-
return pods_status
|
|
3098
|
-
|
|
3099
|
-
def __wait_until_pod_is_ready_worker(
|
|
3100
|
-
self, pod_name: str, namespace: str, event: threading.Event
|
|
3101
|
-
) -> AffectedPod:
|
|
3102
|
-
start_time = time.time()
|
|
3103
|
-
ready = False
|
|
3104
|
-
|
|
3105
|
-
while not ready and not event.is_set():
|
|
3106
|
-
ready = self.is_pod_running(pod_name, namespace)
|
|
3107
|
-
end_time = time.time()
|
|
3108
|
-
pod = AffectedPod(
|
|
3109
|
-
pod_name=pod_name,
|
|
3110
|
-
namespace=namespace,
|
|
3111
|
-
)
|
|
3112
|
-
if not event.is_set():
|
|
3113
|
-
pod.pod_readiness_time = end_time - start_time
|
|
3114
|
-
return pod
|
|
3115
|
-
|
|
3116
2747
|
def replace_service_selector(
|
|
3117
2748
|
self, new_selectors: list[str], service_name: str, namespace: str
|
|
3118
2749
|
) -> Optional[dict[Any]]:
|
|
@@ -3217,7 +2848,7 @@ class KrknKubernetes:
|
|
|
3217
2848
|
port_number: int = 5000,
|
|
3218
2849
|
port_name: str = "flask",
|
|
3219
2850
|
stats_route: str = "/stats",
|
|
3220
|
-
privileged: bool = True
|
|
2851
|
+
privileged: bool = True,
|
|
3221
2852
|
) -> ServiceHijacking:
|
|
3222
2853
|
"""
|
|
3223
2854
|
Deploys a pod running the service-hijacking webservice
|
|
@@ -3271,7 +2902,7 @@ class KrknKubernetes:
|
|
|
3271
2902
|
config_map_name=config_map_name,
|
|
3272
2903
|
port_number=port_number,
|
|
3273
2904
|
stats_route=stats_route,
|
|
3274
|
-
privileged=privileged
|
|
2905
|
+
privileged=privileged,
|
|
3275
2906
|
)
|
|
3276
2907
|
)
|
|
3277
2908
|
|
|
@@ -3483,8 +3114,9 @@ class KrknKubernetes:
|
|
|
3483
3114
|
)
|
|
3484
3115
|
|
|
3485
3116
|
cmd = (
|
|
3486
|
-
"for dir in /proc/[0-9]*; do
|
|
3487
|
-
"
|
|
3117
|
+
f"for dir in /proc/[0-9]*; do grep -q {pod_container_id} "
|
|
3118
|
+
f"$dir/cgroup 2>/dev/null "
|
|
3119
|
+
"&& echo ${dir/\/proc\//}; done" # NOQA
|
|
3488
3120
|
)
|
|
3489
3121
|
|
|
3490
3122
|
pids = self.exec_cmd_in_pod(
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from .pod_monitor import (
|
|
2
|
+
select_and_monitor_by_label,
|
|
3
|
+
select_and_monitor_by_namespace_pattern_and_label,
|
|
4
|
+
select_and_monitor_by_name_pattern_and_namespace_pattern,
|
|
5
|
+
)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"select_and_monitor_by_label",
|
|
10
|
+
"select_and_monitor_by_namespace_pattern_and_label",
|
|
11
|
+
"select_and_monitor_by_name_pattern_and_namespace_pattern",
|
|
12
|
+
]
|