PyPI - krkn-lib - Versions diffs - 5.1.4__py3-none-any.whl → 5.1.6__py3-none-any.whl - Mend

krkn-lib 5.1.4py3-none-any.whl → 5.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

krkn_lib/__init__.py +0 -0
krkn_lib/k8s/krkn_kubernetes.py +12 -380
krkn_lib/k8s/pod_monitor/__init__.py +12 -0
krkn_lib/k8s/pod_monitor/pod_monitor.py +304 -0
krkn_lib/models/elastic/models.py +5 -2
krkn_lib/models/k8s/models.py +1 -25
krkn_lib/models/pod_monitor/__init__.py +0 -0
krkn_lib/models/pod_monitor/models.py +224 -0
krkn_lib/models/telemetry/models.py +6 -4
krkn_lib/tests/base_test.py +32 -31
krkn_lib/tests/test_krkn_elastic_models.py +5 -4
krkn_lib/tests/test_krkn_kubernetes_pods_monitor.py +513 -0
krkn_lib/tests/test_krkn_kubernetes_pods_monitor_models.py +405 -0
krkn_lib/tests/test_utils.py +12 -8
{krkn_lib-5.1.4.dist-info → krkn_lib-5.1.6.dist-info}/METADATA +1 -2
{krkn_lib-5.1.4.dist-info → krkn_lib-5.1.6.dist-info}/RECORD +18 -14
krkn_lib/k8s/pods_monitor_pool.py +0 -202
krkn_lib/tests/test_krkn_kubernetes_monitor.py +0 -367
krkn_lib/tests/test_krkn_kubernetes_pods_monitor_pool.py +0 -128
{krkn_lib-5.1.4.dist-info → krkn_lib-5.1.6.dist-info}/LICENSE +0 -0
{krkn_lib-5.1.4.dist-info → krkn_lib-5.1.6.dist-info}/WHEEL +0 -0

krkn_lib/k8s/pod_monitor/pod_monitor.py ADDED Viewed

@@ -0,0 +1,304 @@
+import re
+from concurrent.futures import Future
+from concurrent.futures.thread import ThreadPoolExecutor
+from functools import partial
+from kubernetes import watch
+from kubernetes.client import V1Pod, CoreV1Api
+from krkn_lib.models.pod_monitor.models import (
+    PodsSnapshot,
+    MonitoredPod,
+    PodEvent,
+    PodStatus,
+)
+def _select_pods(
+    select_partial: partial,
+    namespace_pattern: str = None,
+    name_pattern: str = None,
+):
+    initial_pods = select_partial()
+    snapshot = PodsSnapshot()
+    snapshot.resource_version = initial_pods.metadata.resource_version
+    for pod in initial_pods.items:
+        match_name = True
+        match_namespace = True
+        if namespace_pattern:
+            match = re.match(namespace_pattern, pod.metadata.namespace)
+            match_namespace = match is not None
+        if name_pattern:
+            match = re.match(name_pattern, pod.metadata.name)
+            match_name = match is not None
+        if match_name and match_namespace:
+            mon_pod = MonitoredPod()
+            snapshot.initial_pods.append(pod.metadata.name)
+            mon_pod.name = pod.metadata.name
+            mon_pod.namespace = pod.metadata.namespace
+            snapshot.pods[mon_pod.name] = mon_pod
+    return snapshot
+def _monitor_pods(
+    monitor_partial: partial,
+    snapshot: PodsSnapshot,
+    max_timeout: int,
+    name_pattern: str = None,
+    namespace_pattern: str = None,
+) -> PodsSnapshot:
+    w = watch.Watch(return_type=V1Pod)
+    deleted_parent_pods = []
+    restored_pods = []
+    cluster_restored = False
+    for event in w.stream(monitor_partial, timeout_seconds=max_timeout):
+        match_name = True
+        match_namespace = True
+        event_type = event["type"]
+        pod = event["object"]
+        if namespace_pattern:
+            match = re.match(namespace_pattern, pod.metadata.namespace)
+            match_namespace = match is not None
+        if name_pattern:
+            match = re.match(name_pattern, pod.metadata.name)
+            match_name = match is not None
+        if match_name and match_namespace:
+            pod_event = PodEvent()
+            if event_type == "MODIFIED":
+                if pod.metadata.deletion_timestamp is not None:
+                    pod_event.status = PodStatus.DELETION_SCHEDULED
+                    deleted_parent_pods.append(pod.metadata.name)
+                elif _is_pod_ready(pod):
+                    pod_event.status = PodStatus.READY
+                    # if there are at least the same number of ready
+                    # pods as the snapshot.initial_pods set we assume that
+                    # the cluster is restored to the initial condition
+                    restored_pods.append(pod.metadata.name)
+                    if len(restored_pods) >= len(snapshot.initial_pods):
+                        cluster_restored = True
+                else:
+                    pod_event.status = PodStatus.NOT_READY
+            elif event_type == "DELETED":
+                pod_event.status = PodStatus.DELETED
+            elif event_type == "ADDED":
+                pod_event.status = PodStatus.ADDED
+            if pod_event.status == PodStatus.ADDED:
+                snapshot.added_pods.append(pod.metadata.name)
+                # in case a pod is respawn with the same name
+                # the dictionary must not be reinitialized
+                if pod.metadata.name not in snapshot.pods:
+                    snapshot.pods[pod.metadata.name] = MonitoredPod()
+                    snapshot.pods[pod.metadata.name].name = pod.metadata.name
+                    snapshot.pods[pod.metadata.name].namespace = (
+                        pod.metadata.namespace
+                    )
+            # skips events out of the snapshot
+            if pod.metadata.name in snapshot.pods:
+                snapshot.pods[pod.metadata.name].status_changes.append(
+                    pod_event
+                )
+            # this flag is set when all the pods
+            # that has been deleted or not ready
+            # have been restored, if True the
+            # monitoring is stopeed earlier
+            if cluster_restored:
+                w.stop()
+    return snapshot
+def _is_pod_ready(pod: V1Pod) -> bool:
+    if not pod.status.container_statuses:
+        return False
+    for status in pod.status.container_statuses:
+        if not status.ready:
+            return False
+    return True
+def _is_pod_terminating(pod: V1Pod) -> bool:
+    if pod.metadata.deletion_timestamp is not None:
+        return True
+    return False
+def select_and_monitor_by_label(
+    label_selector: str,
+    max_timeout: int,
+    v1_client: CoreV1Api,
+) -> Future:
+    """
+    Monitors all the pods identified
+    by a label selector and collects infos about the
+    pods recovery after a kill scenario while the scenario is running.
+    :param label_selector: the label selector used
+        to filter the pods to monitor (must be the
+        same used in `select_pods_by_label`)
+    :param max_timeout: the expected time the pods should take
+        to recover. If the killed pods are replaced in this time frame,
+        but they didn't reach the Ready State, they will be marked as
+        unrecovered. If during the time frame the pods are not replaced
+        at all the error field of the PodsStatus structure will be
+        valorized with an exception.
+    :param v1_client: kubernetes V1Api client
+    :return:
+        a future which result (PodsSnapshot) must be
+        gathered to obtain the pod infos.
+    """
+    select_partial = partial(
+        v1_client.list_pod_for_all_namespaces,
+        label_selector=label_selector,
+        field_selector="status.phase=Running",
+    )
+    snapshot = _select_pods(select_partial)
+    monitor_partial = partial(
+        v1_client.list_pod_for_all_namespaces,
+        resource_version=snapshot.resource_version,
+        label_selector=label_selector,
+    )
+    pool = ThreadPoolExecutor(max_workers=1)
+    future = pool.submit(
+        _monitor_pods,
+        monitor_partial,
+        snapshot,
+        max_timeout,
+        name_pattern=None,
+        namespace_pattern=None,
+    )
+    return future
+def select_and_monitor_by_name_pattern_and_namespace_pattern(
+    pod_name_pattern: str,
+    namespace_pattern: str,
+    max_timeout: int,
+    v1_client: CoreV1Api,
+):
+    """
+    Monitors all the pods identified by a pod name regex pattern
+    and a namespace regex pattern, that collects infos about the
+    pods recovery after a kill scenario while the scenario is running.
+    Raises an exception if the regex format is not correct.
+    :param pod_name_pattern: a regex representing the
+        pod name pattern used to filter the pods to be monitored
+        (must be the same used in
+        `select_pods_by_name_pattern_and_namespace_pattern`)
+    :param namespace_pattern: a regex representing the namespace
+        pattern used to filter the pods to be monitored
+        (must be the same used in
+        `select_pods_by_name_pattern_and_namespace_pattern`)
+    :param max_timeout: the expected time the pods should take to
+        recover. If the killed pods are replaced in this time frame,
+        but they didn't reach the Ready State, they will be marked as
+        unrecovered. If during the time frame the pods are not replaced
+        at all the error field of the PodsStatus structure will be
+        valorized with an exception.
+    :param v1_client: kubernetes V1Api client
+    :return:
+        a future which result (PodsSnapshot) must be
+        gathered to obtain the pod infos.
+    """
+    try:
+        re.compile(pod_name_pattern)
+    except re.error as e:
+        raise Exception(f"invalid pod name pattern regex: {e}")
+    try:
+        re.compile(namespace_pattern)
+    except re.error as e:
+        raise Exception(f"invalid pod namespace regex: {e}")
+    select_partial = partial(
+        v1_client.list_pod_for_all_namespaces,
+        field_selector="status.phase=Running",
+    )
+    snapshot = _select_pods(
+        select_partial,
+        name_pattern=pod_name_pattern,
+        namespace_pattern=namespace_pattern,
+    )
+    monitor_partial = partial(
+        v1_client.list_pod_for_all_namespaces,
+        resource_version=snapshot.resource_version,
+    )
+    pool = ThreadPoolExecutor(max_workers=1)
+    future = pool.submit(
+        _monitor_pods,
+        monitor_partial,
+        snapshot,
+        max_timeout,
+        name_pattern=pod_name_pattern,
+        namespace_pattern=namespace_pattern,
+    )
+    return future
+def select_and_monitor_by_namespace_pattern_and_label(
+    namespace_pattern: str,
+    label_selector: str,
+    v1_client: CoreV1Api,
+    max_timeout=30,
+):
+    """
+    Monitors all the pods identified
+    by a namespace regex pattern
+    and a pod label selector, that collects infos about the
+    pods recovery after a kill scenario while the scenario is running.
+    Raises an exception if the regex format is not correct.
+    :param label_selector: the label selector used to filter
+        the pods to monitor (must be the same used in
+        `select_pods_by_label`)
+    :param v1_client: kubernetes V1Api client
+    :param namespace_pattern: a regex representing the namespace
+        pattern used to filter the pods to be monitored (must be
+        the same used
+        in `select_pods_by_name_pattern_and_namespace_pattern`)
+    :param max_timeout: the expected time the pods should take to recover.
+        If the killed pods are replaced in this time frame, but they
+        didn't reach the Ready State, they will be marked as unrecovered.
+        If during the time frame the pods are not replaced
+        at all the error field of the PodsStatus structure will be
+        valorized with an exception.
+    :return:
+        a future which result (PodsSnapshot) must be
+        gathered to obtain the pod infos.
+    """
+    try:
+        re.compile(namespace_pattern)
+    except re.error as e:
+        raise Exception(f"invalid pod namespace regex: {e}")
+    select_partial = partial(
+        v1_client.list_pod_for_all_namespaces,
+        label_selector=label_selector,
+        field_selector="status.phase=Running",
+    )
+    snapshot = _select_pods(
+        select_partial,
+        namespace_pattern=namespace_pattern,
+    )
+    monitor_partial = partial(
+        v1_client.list_pod_for_all_namespaces,
+        resource_version=snapshot.resource_version,
+        label_selector=label_selector,
+    )
+    pool = ThreadPoolExecutor(max_workers=1)
+    future = pool.submit(
+        _monitor_pods,
+        monitor_partial,
+        snapshot,
+        max_timeout,
+        name_pattern=None,
+        namespace_pattern=namespace_pattern,
+    )
+    return future

krkn_lib/models/elastic/models.py CHANGED Viewed

@@ -114,6 +114,7 @@ class ElasticHealthChecks(InnerDoc):
     end_timestamp = Date()
     duration = Float()
 class ElasticVirtChecks(InnerDoc):
     vm_name = Text()
     ip_address = Text()
@@ -124,6 +125,7 @@ class ElasticVirtChecks(InnerDoc):
     end_timestamp = Date()
     duration = Float()
 class ElasticChaosRunTelemetry(Document):
     scenarios = Nested(ElasticScenarioTelemetry, multi=True)
     node_summary_infos = Nested(ElasticNodeInfo, multi=True)
@@ -141,6 +143,7 @@ class ElasticChaosRunTelemetry(Document):
     run_uuid = Text(fields={"keyword": Keyword()})
     health_checks = Nested(ElasticHealthChecks, multi=True)
     virt_checks = Nested(ElasticVirtChecks, multi=True)
     class Index:
         name = "chaos_run_telemetry"
@@ -215,7 +218,7 @@ class ElasticChaosRunTelemetry(Document):
             chaos_run_telemetry.kubernetes_objects_count
         )
         self.network_plugins = chaos_run_telemetry.network_plugins
         if chaos_run_telemetry.health_checks:
             self.health_checks = [
                 ElasticHealthChecks(
@@ -234,7 +237,7 @@ class ElasticChaosRunTelemetry(Document):
             ]
         else:
             self.health_checks = None
         if chaos_run_telemetry.virt_checks:
             self.virt_checks = [
                 ElasticVirtChecks(

krkn_lib/models/k8s/models.py CHANGED Viewed

@@ -1,6 +1,5 @@
-from concurrent.futures import Future, ThreadPoolExecutor
 from dataclasses import dataclass
-from typing import Any, Optional
+from typing import Any
 @dataclass(frozen=True, order=False)
@@ -186,7 +185,6 @@ class PodsStatus:
     recovered: list[AffectedPod]
     unrecovered: list[AffectedPod]
-    error: Optional[str]
     def __init__(self, json_object: str = None):
         self.recovered = []
@@ -220,28 +218,6 @@ class PodsStatus:
             self.unrecovered.append(unrecovered)
-class PodsMonitorThread:
-    executor: ThreadPoolExecutor
-    future: Future
-    def __init__(self, executor: ThreadPoolExecutor, future: Future):
-        self.future = future
-        self.executor = executor
-    def join(self, timeout: int = 120) -> PodsStatus:
-        try:
-            result = self.future.result(timeout=timeout)
-            self.executor.shutdown(wait=False, cancel_futures=True)
-            return result
-        except Exception as e:
-            pods_status = PodsStatus()
-            pods_status.error = Exception(
-                f"Thread pool did not shutdown correctly,"
-                f"aborting.\nException: {e}"
-            )
-            return pods_status
 class AffectedNode:
     """
     A node affected by a chaos scenario

krkn_lib/models/pod_monitor/__init__.py ADDED Viewed

File without changes

krkn_lib/models/pod_monitor/models.py ADDED Viewed

@@ -0,0 +1,224 @@
+import json
+import time
+from dataclasses import dataclass
+from enum import Enum
+from typing import Optional, Any
+from krkn_lib.models.k8s import PodsStatus, AffectedPod
+class PodStatus(Enum):
+    READY = 1
+    NOT_READY = 2
+    DELETION_SCHEDULED = 3
+    DELETED = 4
+    ADDED = 5
+@dataclass
+class PodEvent:
+    status: PodStatus
+    def __init__(self, timestamp: float = None):
+        if not timestamp:
+            self._timestamp = time.time()
+        else:
+            self._timestamp = timestamp
+    @property
+    def timestamp(self):
+        return self._timestamp
+    @timestamp.setter
+    def timestamp(self, value):
+        raise AttributeError("timestamp cannot be set")
+@dataclass
+class MonitoredPod:
+    namespace: str
+    name: str
+    status_changes: list[PodEvent]
+    def __init__(self):
+        self.namespace = ""
+        self.name = ""
+        self.status_changes = []
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "namespace": self.namespace,
+            "name": self.name,
+            "status_changes": [
+                {"status": v.status.name, "timestamp": v.timestamp}
+                for v in self.status_changes
+            ],
+        }
+@dataclass
+class PodsSnapshot:
+    resource_version: str
+    pods: dict[str, MonitoredPod]
+    added_pods: list[str]
+    initial_pods: list[str]
+    _found_rescheduled_pods: dict[str, str]
+    def __init__(self, json_str: str = None):
+        self.resource_version = ""
+        self.pods = {}
+        self.added_pods = []
+        self.initial_pods = []
+        self._found_rescheduled_pods = {}
+        if json_str:
+            json_obj = json.loads(json_str)
+            for _, pod in json_obj["pods"]:
+                p = MonitoredPod()
+                p.name = pod["name"]
+                p.namespace = pod["namespace"]
+                for status in pod["status_changes"]:
+                    s = PodEvent(timestamp=status["timestamp"])
+                    if status["status"] == "READY":
+                        s.status = PodStatus.READY
+                    elif status["status"] == "NOT_READY":
+                        s.status = PodStatus.NOT_READY
+                    elif status["status"] == "DELETION_SCHEDULED":
+                        s.status = PodStatus.DELETION_SCHEDULED
+                    elif status["status"] == "DELETED":
+                        s.status = PodStatus.DELETED
+                    elif status["status"] == "ADDED":
+                        s.status = PodStatus.ADDED
+                    p.status_changes.append(s)
+                self.pods[p.name] = p
+            for p in json_obj["added_pods"]:
+                self.added_pods.append(p)
+            for p in json_obj["initial_pods"]:
+                self.initial_pods.append(p)
+                pass
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "resource_version": self.resource_version,
+            "pods": [[k, v.to_dict()] for k, v in self.pods.items()],
+            "added_pods": self.added_pods,
+            "initial_pods": self.initial_pods,
+        }
+    def _find_rescheduled_pod(self, parent: str) -> Optional[MonitoredPod]:
+        for _, v in self.pods.items():
+            found_pod = next(
+                filter(
+                    lambda p: p.status == PodStatus.ADDED,
+                    v.status_changes,
+                ),
+                None,
+            )
+            if found_pod and v.name not in self._found_rescheduled_pods:
+                # just pick rescheduled pods once
+                # keeping the parent for future uses
+                self._found_rescheduled_pods[v.name] = parent
+                return v
+        return None
+    def get_pods_status(self) -> PodsStatus:
+        pods_status = PodsStatus()
+        for pod_name in self.initial_pods:
+            pod = self.pods[pod_name]
+            for status_change in pod.status_changes:
+                if status_change.status == PodStatus.NOT_READY:
+                    ready_status = next(
+                        filter(
+                            lambda s: s.status == PodStatus.READY,
+                            pod.status_changes,
+                        ),
+                        None,
+                    )
+                    if not ready_status:
+                        pods_status.unrecovered.append(
+                            AffectedPod(
+                                pod_name=pod.name, namespace=pod.namespace
+                            )
+                        )
+                    else:
+                        pods_status.recovered.append(
+                            AffectedPod(
+                                pod_name=pod.name,
+                                namespace=pod.namespace,
+                                pod_readiness_time=ready_status.timestamp
+                                - status_change.timestamp,
+                            )
+                        )
+                    break
+                # if there's a DELETION_SCHEDULED events
+                # looks for the rescheduled pod
+                # and calculates its scheduling and readiness time
+                if status_change.status == PodStatus.DELETION_SCHEDULED:
+                    rescheduled_pod = self._find_rescheduled_pod(pod_name)
+                    if not rescheduled_pod:
+                        pods_status.unrecovered.append(
+                            AffectedPod(
+                                pod_name=pod.name, namespace=pod.namespace
+                            )
+                        )
+                    else:
+                        rescheduled_start_ts = next(
+                            map(
+                                lambda e: e.timestamp,
+                                filter(
+                                    lambda s: s.status == PodStatus.ADDED,
+                                    rescheduled_pod.status_changes,
+                                ),
+                            ),
+                            None,
+                        )
+                        rescheduled_ready_ts = next(
+                            map(
+                                lambda e: e.timestamp,
+                                filter(
+                                    lambda s: s.status == PodStatus.READY,
+                                    rescheduled_pod.status_changes,
+                                ),
+                            ),
+                            None,
+                        )
+                        # the pod might be rescheduled correctly
+                        # but do not become ready in the expected time
+                        # so it must be marked as `unrecovered` in that
+                        # case
+                        if not rescheduled_ready_ts:
+                            pods_status.unrecovered.append(
+                                AffectedPod(
+                                    pod_name=rescheduled_pod.name,
+                                    namespace=pod.namespace,
+                                )
+                            )
+                        else:
+                            rescheduling_time = (
+                                rescheduled_start_ts - status_change.timestamp
+                                if rescheduled_start_ts
+                                else None
+                            )
+                            readiness_time = (
+                                rescheduled_ready_ts - status_change.timestamp
+                                if rescheduled_ready_ts
+                                else None
+                            )
+                            pods_status.recovered.append(
+                                AffectedPod(
+                                    pod_name=rescheduled_pod.name,
+                                    namespace=rescheduled_pod.namespace,
+                                    pod_rescheduling_time=rescheduling_time,
+                                    pod_readiness_time=readiness_time,
+                                    total_recovery_time=(
+                                        rescheduling_time + readiness_time
+                                        if rescheduling_time and readiness_time
+                                        else None
+                                    ),
+                                )
+                            )
+                    break
+        return pods_status

krkn_lib/models/telemetry/models.py CHANGED Viewed

@@ -421,6 +421,7 @@ class HealthCheck:
             self.end_timestamp = json_dict["end_timestamp"]
             self.duration = json_dict["duration"]
 @dataclass(order=False)
 class VirtCheck:
     """
@@ -466,10 +467,11 @@ class VirtCheck:
             self.ip_address = json_dict["ip_address"]
             self.namespace = json_dict["namespace"]
             self.vm_name = json_dict["vm_name"]
-            self.status = json_dict.get("status",True)
-            self.start_timestamp = json_dict.get("start_timestamp","")
-            self.end_timestamp = json_dict.get("end_timestamp","")
-            self.duration = json_dict.get("duration","")
+            self.status = json_dict.get("status", True)
+            self.start_timestamp = json_dict.get("start_timestamp", "")
+            self.end_timestamp = json_dict.get("end_timestamp", "")
+            self.duration = json_dict.get("duration", "")
 @dataclass(order=False)
 class ChaosRunTelemetry:

krkn-lib 5.1.4__py3-none-any.whl → 5.1.6__py3-none-any.whl

krkn-lib 5.1.4py3-none-any.whl → 5.1.6py3-none-any.whl