apache-airflow-providers-cncf-kubernetes 10.10.0rc1__py3-none-any.whl → 10.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/cncf/kubernetes/__init__.py +3 -3
- airflow/providers/cncf/kubernetes/backcompat/backwards_compat_converters.py +1 -1
- airflow/providers/cncf/kubernetes/callbacks.py +1 -1
- airflow/providers/cncf/kubernetes/decorators/kubernetes.py +8 -3
- airflow/providers/cncf/kubernetes/decorators/kubernetes_cmd.py +6 -3
- airflow/providers/cncf/kubernetes/exceptions.py +7 -3
- airflow/providers/cncf/kubernetes/executors/kubernetes_executor.py +1 -2
- airflow/providers/cncf/kubernetes/executors/kubernetes_executor_utils.py +1 -1
- airflow/providers/cncf/kubernetes/hooks/kubernetes.py +118 -18
- airflow/providers/cncf/kubernetes/kubernetes_helper_functions.py +65 -20
- airflow/providers/cncf/kubernetes/operators/custom_object_launcher.py +1 -1
- airflow/providers/cncf/kubernetes/operators/job.py +13 -7
- airflow/providers/cncf/kubernetes/operators/kueue.py +1 -1
- airflow/providers/cncf/kubernetes/operators/pod.py +86 -34
- airflow/providers/cncf/kubernetes/operators/resource.py +3 -9
- airflow/providers/cncf/kubernetes/operators/spark_kubernetes.py +20 -9
- airflow/providers/cncf/kubernetes/resource_convert/env_variable.py +1 -1
- airflow/providers/cncf/kubernetes/sensors/spark_kubernetes.py +2 -3
- airflow/providers/cncf/kubernetes/template_rendering.py +1 -1
- airflow/providers/cncf/kubernetes/triggers/pod.py +23 -8
- airflow/providers/cncf/kubernetes/utils/pod_manager.py +98 -86
- airflow/providers/cncf/kubernetes/version_compat.py +5 -1
- {apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info → apache_airflow_providers_cncf_kubernetes-10.12.0.dist-info}/METADATA +12 -10
- {apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info → apache_airflow_providers_cncf_kubernetes-10.12.0.dist-info}/RECORD +28 -28
- {apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info → apache_airflow_providers_cncf_kubernetes-10.12.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info → apache_airflow_providers_cncf_kubernetes-10.12.0.dist-info}/entry_points.txt +0 -0
- {apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info → apache_airflow_providers_cncf_kubernetes-10.12.0.dist-info}/licenses/LICENSE +0 -0
- {apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info → apache_airflow_providers_cncf_kubernetes-10.12.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -29,11 +29,11 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "10.
|
|
32
|
+
__version__ = "10.12.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
|
-
"2.
|
|
35
|
+
"2.11.0"
|
|
36
36
|
):
|
|
37
37
|
raise RuntimeError(
|
|
38
|
-
f"The package `apache-airflow-providers-cncf-kubernetes:{__version__}` needs Apache Airflow 2.
|
|
38
|
+
f"The package `apache-airflow-providers-cncf-kubernetes:{__version__}` needs Apache Airflow 2.11.0+"
|
|
39
39
|
)
|
|
@@ -20,7 +20,7 @@ from __future__ import annotations
|
|
|
20
20
|
|
|
21
21
|
from kubernetes.client import ApiClient, models as k8s
|
|
22
22
|
|
|
23
|
-
from airflow.
|
|
23
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
def _convert_kube_model_object(obj, new_class):
|
|
@@ -24,7 +24,7 @@ import kubernetes_asyncio.client as async_k8s
|
|
|
24
24
|
|
|
25
25
|
if TYPE_CHECKING:
|
|
26
26
|
from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator
|
|
27
|
-
from airflow.
|
|
27
|
+
from airflow.sdk import Context
|
|
28
28
|
|
|
29
29
|
client_type: TypeAlias = k8s.CoreV1Api | async_k8s.CoreV1Api
|
|
30
30
|
|
|
@@ -38,7 +38,7 @@ from airflow.providers.common.compat.sdk import (
|
|
|
38
38
|
)
|
|
39
39
|
|
|
40
40
|
if TYPE_CHECKING:
|
|
41
|
-
from airflow.
|
|
41
|
+
from airflow.sdk import Context
|
|
42
42
|
|
|
43
43
|
_PYTHON_SCRIPT_ENV = "__PYTHON_SCRIPT"
|
|
44
44
|
_PYTHON_INPUT_ENV = "__PYTHON_INPUT"
|
|
@@ -87,7 +87,13 @@ class _KubernetesDecoratedOperator(DecoratedOperator, KubernetesPodOperator):
|
|
|
87
87
|
def _generate_cmds(self) -> list[str]:
|
|
88
88
|
script_filename = "/tmp/script.py"
|
|
89
89
|
input_filename = "/tmp/script.in"
|
|
90
|
-
|
|
90
|
+
|
|
91
|
+
if getattr(self, "do_xcom_push", False):
|
|
92
|
+
output_filename = "/airflow/xcom/return.json"
|
|
93
|
+
make_xcom_dir_cmd = "mkdir -p /airflow/xcom"
|
|
94
|
+
else:
|
|
95
|
+
output_filename = "/dev/null"
|
|
96
|
+
make_xcom_dir_cmd = ":" # shell no-op
|
|
91
97
|
|
|
92
98
|
write_local_script_file_cmd = (
|
|
93
99
|
f"{_generate_decoded_command(quote(_PYTHON_SCRIPT_ENV), quote(script_filename))}"
|
|
@@ -95,7 +101,6 @@ class _KubernetesDecoratedOperator(DecoratedOperator, KubernetesPodOperator):
|
|
|
95
101
|
write_local_input_file_cmd = (
|
|
96
102
|
f"{_generate_decoded_command(quote(_PYTHON_INPUT_ENV), quote(input_filename))}"
|
|
97
103
|
)
|
|
98
|
-
make_xcom_dir_cmd = "mkdir -p /airflow/xcom"
|
|
99
104
|
exec_python_cmd = f"python {script_filename} {input_filename} {output_filename}"
|
|
100
105
|
return [
|
|
101
106
|
"bash",
|
|
@@ -30,13 +30,14 @@ from airflow.providers.common.compat.sdk import (
|
|
|
30
30
|
from airflow.utils.operator_helpers import determine_kwargs
|
|
31
31
|
|
|
32
32
|
if TYPE_CHECKING:
|
|
33
|
-
from airflow.
|
|
33
|
+
from airflow.sdk import Context
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
class _KubernetesCmdDecoratedOperator(DecoratedOperator, KubernetesPodOperator):
|
|
37
37
|
custom_operator_name = "@task.kubernetes_cmd"
|
|
38
38
|
|
|
39
|
-
template_fields: Sequence[str] = KubernetesPodOperator.template_fields
|
|
39
|
+
template_fields: Sequence[str] = tuple({"op_args", "op_kwargs", *KubernetesPodOperator.template_fields})
|
|
40
|
+
|
|
40
41
|
overwrite_rtif_after_execution: bool = True
|
|
41
42
|
|
|
42
43
|
def __init__(self, *, python_callable: Callable, args_only: bool = False, **kwargs) -> None:
|
|
@@ -69,6 +70,8 @@ class _KubernetesCmdDecoratedOperator(DecoratedOperator, KubernetesPodOperator):
|
|
|
69
70
|
)
|
|
70
71
|
|
|
71
72
|
def execute(self, context: Context):
|
|
73
|
+
self.render_template_fields(context)
|
|
74
|
+
|
|
72
75
|
generated = self._generate_cmds(context)
|
|
73
76
|
if self.args_only:
|
|
74
77
|
self.cmds = []
|
|
@@ -76,7 +79,7 @@ class _KubernetesCmdDecoratedOperator(DecoratedOperator, KubernetesPodOperator):
|
|
|
76
79
|
else:
|
|
77
80
|
self.cmds = generated
|
|
78
81
|
self.arguments = []
|
|
79
|
-
|
|
82
|
+
self.render_template_fields(context)
|
|
80
83
|
return super().execute(context)
|
|
81
84
|
|
|
82
85
|
def _generate_cmds(self, context: Context) -> list[str]:
|
|
@@ -16,9 +16,13 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
-
from airflow.exceptions import
|
|
20
|
-
|
|
21
|
-
|
|
19
|
+
from airflow.exceptions import AirflowException
|
|
20
|
+
|
|
21
|
+
# Todo: we cannot have a backcompat import for AirflowException yet
|
|
22
|
+
# because PodMutationHookException is redefined in airflow.exception
|
|
23
|
+
# Remove this and either import AirflowException from common.sdk or
|
|
24
|
+
# import it from airflow.sdk.exceptions when PodMutationHookException
|
|
25
|
+
# is removed from airflow.exceptions
|
|
22
26
|
|
|
23
27
|
|
|
24
28
|
class PodMutationHookException(AirflowException):
|
|
@@ -30,7 +30,6 @@ import logging
|
|
|
30
30
|
import multiprocessing
|
|
31
31
|
import time
|
|
32
32
|
from collections import Counter, defaultdict
|
|
33
|
-
from collections.abc import Sequence
|
|
34
33
|
from contextlib import suppress
|
|
35
34
|
from datetime import datetime
|
|
36
35
|
from queue import Empty, Queue
|
|
@@ -71,7 +70,7 @@ from airflow.providers.cncf.kubernetes.executors.kubernetes_executor_types impor
|
|
|
71
70
|
)
|
|
72
71
|
from airflow.providers.cncf.kubernetes.kube_config import KubeConfig
|
|
73
72
|
from airflow.providers.cncf.kubernetes.kubernetes_helper_functions import annotations_to_key
|
|
74
|
-
from airflow.
|
|
73
|
+
from airflow.providers.common.compat.sdk import Stats
|
|
75
74
|
from airflow.utils.log.logging_mixin import remove_escape_codes
|
|
76
75
|
from airflow.utils.session import NEW_SESSION, provide_session
|
|
77
76
|
from airflow.utils.state import TaskInstanceState
|
|
@@ -27,7 +27,6 @@ from kubernetes import client, watch
|
|
|
27
27
|
from kubernetes.client.rest import ApiException
|
|
28
28
|
from urllib3.exceptions import ReadTimeoutError
|
|
29
29
|
|
|
30
|
-
from airflow.exceptions import AirflowException
|
|
31
30
|
from airflow.providers.cncf.kubernetes.backcompat import get_logical_date_key
|
|
32
31
|
from airflow.providers.cncf.kubernetes.executors.kubernetes_executor_types import (
|
|
33
32
|
ADOPTED,
|
|
@@ -46,6 +45,7 @@ from airflow.providers.cncf.kubernetes.kubernetes_helper_functions import (
|
|
|
46
45
|
create_unique_id,
|
|
47
46
|
)
|
|
48
47
|
from airflow.providers.cncf.kubernetes.pod_generator import PodGenerator, workload_to_command_args
|
|
48
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
49
49
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
50
50
|
from airflow.utils.singleton import Singleton
|
|
51
51
|
from airflow.utils.state import TaskInstanceState
|
|
@@ -20,36 +20,35 @@ import asyncio
|
|
|
20
20
|
import contextlib
|
|
21
21
|
import json
|
|
22
22
|
import tempfile
|
|
23
|
-
from collections.abc import Generator
|
|
24
23
|
from functools import cached_property
|
|
25
24
|
from time import sleep
|
|
26
25
|
from typing import TYPE_CHECKING, Any, Protocol
|
|
27
26
|
|
|
28
27
|
import aiofiles
|
|
29
28
|
import requests
|
|
30
|
-
import tenacity
|
|
31
29
|
from asgiref.sync import sync_to_async
|
|
32
30
|
from kubernetes import client, config, utils, watch
|
|
33
31
|
from kubernetes.client.models import V1Deployment
|
|
34
32
|
from kubernetes.config import ConfigException
|
|
35
|
-
from kubernetes_asyncio import client as async_client, config as async_config
|
|
33
|
+
from kubernetes_asyncio import client as async_client, config as async_config, watch as async_watch
|
|
36
34
|
from urllib3.exceptions import HTTPError
|
|
37
35
|
|
|
38
|
-
from airflow.exceptions import AirflowException, AirflowNotFoundException
|
|
39
36
|
from airflow.models import Connection
|
|
40
37
|
from airflow.providers.cncf.kubernetes.exceptions import KubernetesApiError, KubernetesApiPermissionError
|
|
41
38
|
from airflow.providers.cncf.kubernetes.kube_client import _disable_verify_ssl, _enable_tcp_keepalive
|
|
42
|
-
from airflow.providers.cncf.kubernetes.kubernetes_helper_functions import
|
|
39
|
+
from airflow.providers.cncf.kubernetes.kubernetes_helper_functions import generic_api_retry
|
|
43
40
|
from airflow.providers.cncf.kubernetes.utils.container import (
|
|
44
41
|
container_is_completed,
|
|
45
42
|
container_is_running,
|
|
46
43
|
)
|
|
47
|
-
from airflow.providers.common.compat.sdk import BaseHook
|
|
44
|
+
from airflow.providers.common.compat.sdk import AirflowException, AirflowNotFoundException, BaseHook
|
|
48
45
|
from airflow.utils import yaml
|
|
49
46
|
|
|
50
47
|
if TYPE_CHECKING:
|
|
48
|
+
from collections.abc import AsyncGenerator, Generator
|
|
49
|
+
|
|
51
50
|
from kubernetes.client import V1JobList
|
|
52
|
-
from kubernetes.client.models import CoreV1EventList, V1Job, V1Pod
|
|
51
|
+
from kubernetes.client.models import CoreV1Event, CoreV1EventList, V1Job, V1Pod
|
|
53
52
|
|
|
54
53
|
LOADING_KUBE_CONFIG_FILE_RESOURCE = "Loading Kubernetes configuration file kube_config from {}..."
|
|
55
54
|
|
|
@@ -390,6 +389,7 @@ class KubernetesHook(BaseHook, PodOperatorHookProtocol):
|
|
|
390
389
|
self.log.debug("Response: %s", response)
|
|
391
390
|
return response
|
|
392
391
|
|
|
392
|
+
@generic_api_retry
|
|
393
393
|
def get_custom_object(
|
|
394
394
|
self, group: str, version: str, plural: str, name: str, namespace: str | None = None
|
|
395
395
|
):
|
|
@@ -412,6 +412,7 @@ class KubernetesHook(BaseHook, PodOperatorHookProtocol):
|
|
|
412
412
|
)
|
|
413
413
|
return response
|
|
414
414
|
|
|
415
|
+
@generic_api_retry
|
|
415
416
|
def delete_custom_object(
|
|
416
417
|
self, group: str, version: str, plural: str, name: str, namespace: str | None = None, **kwargs
|
|
417
418
|
):
|
|
@@ -540,12 +541,7 @@ class KubernetesHook(BaseHook, PodOperatorHookProtocol):
|
|
|
540
541
|
name=name, namespace=namespace, pretty=True, **kwargs
|
|
541
542
|
)
|
|
542
543
|
|
|
543
|
-
@
|
|
544
|
-
stop=tenacity.stop_after_attempt(3),
|
|
545
|
-
wait=tenacity.wait_random_exponential(),
|
|
546
|
-
reraise=True,
|
|
547
|
-
retry=tenacity.retry_if_exception(should_retry_creation),
|
|
548
|
-
)
|
|
544
|
+
@generic_api_retry
|
|
549
545
|
def create_job(
|
|
550
546
|
self,
|
|
551
547
|
job: V1Job,
|
|
@@ -572,6 +568,7 @@ class KubernetesHook(BaseHook, PodOperatorHookProtocol):
|
|
|
572
568
|
raise e
|
|
573
569
|
return resp
|
|
574
570
|
|
|
571
|
+
@generic_api_retry
|
|
575
572
|
def get_job(self, job_name: str, namespace: str) -> V1Job:
|
|
576
573
|
"""
|
|
577
574
|
Get Job of specified name and namespace.
|
|
@@ -582,6 +579,7 @@ class KubernetesHook(BaseHook, PodOperatorHookProtocol):
|
|
|
582
579
|
"""
|
|
583
580
|
return self.batch_v1_client.read_namespaced_job(name=job_name, namespace=namespace, pretty=True)
|
|
584
581
|
|
|
582
|
+
@generic_api_retry
|
|
585
583
|
def get_job_status(self, job_name: str, namespace: str) -> V1Job:
|
|
586
584
|
"""
|
|
587
585
|
Get job with status of specified name and namespace.
|
|
@@ -611,6 +609,7 @@ class KubernetesHook(BaseHook, PodOperatorHookProtocol):
|
|
|
611
609
|
self.log.info("The job '%s' is incomplete. Sleeping for %i sec.", job_name, job_poll_interval)
|
|
612
610
|
sleep(job_poll_interval)
|
|
613
611
|
|
|
612
|
+
@generic_api_retry
|
|
614
613
|
def list_jobs_all_namespaces(self) -> V1JobList:
|
|
615
614
|
"""
|
|
616
615
|
Get list of Jobs from all namespaces.
|
|
@@ -619,6 +618,7 @@ class KubernetesHook(BaseHook, PodOperatorHookProtocol):
|
|
|
619
618
|
"""
|
|
620
619
|
return self.batch_v1_client.list_job_for_all_namespaces(pretty=True)
|
|
621
620
|
|
|
621
|
+
@generic_api_retry
|
|
622
622
|
def list_jobs_from_namespace(self, namespace: str) -> V1JobList:
|
|
623
623
|
"""
|
|
624
624
|
Get list of Jobs from dedicated namespace.
|
|
@@ -674,6 +674,7 @@ class KubernetesHook(BaseHook, PodOperatorHookProtocol):
|
|
|
674
674
|
return bool(next((c for c in conditions if c.type == "Complete" and c.status), None))
|
|
675
675
|
return False
|
|
676
676
|
|
|
677
|
+
@generic_api_retry
|
|
677
678
|
def patch_namespaced_job(self, job_name: str, namespace: str, body: object) -> V1Job:
|
|
678
679
|
"""
|
|
679
680
|
Update the specified Job.
|
|
@@ -777,11 +778,14 @@ def _get_bool(val) -> bool | None:
|
|
|
777
778
|
class AsyncKubernetesHook(KubernetesHook):
|
|
778
779
|
"""Hook to use Kubernetes SDK asynchronously."""
|
|
779
780
|
|
|
780
|
-
def __init__(
|
|
781
|
+
def __init__(
|
|
782
|
+
self, config_dict: dict | None = None, connection_extras: dict | None = None, *args, **kwargs
|
|
783
|
+
):
|
|
781
784
|
super().__init__(*args, **kwargs)
|
|
782
785
|
|
|
783
786
|
self.config_dict = config_dict
|
|
784
|
-
self._extras: dict | None =
|
|
787
|
+
self._extras: dict | None = connection_extras
|
|
788
|
+
self._event_polling_fallback = False
|
|
785
789
|
|
|
786
790
|
async def _load_config(self):
|
|
787
791
|
"""Return Kubernetes API session for use with requests."""
|
|
@@ -831,6 +835,13 @@ class AsyncKubernetesHook(KubernetesHook):
|
|
|
831
835
|
"Reading kubernetes configuration file from connection "
|
|
832
836
|
"object and writing temporary config file with its content",
|
|
833
837
|
)
|
|
838
|
+
if isinstance(kubeconfig, dict):
|
|
839
|
+
self.log.debug(
|
|
840
|
+
LOADING_KUBE_CONFIG_FILE_RESOURCE.format(
|
|
841
|
+
"connection kube_config dictionary (serializing)"
|
|
842
|
+
)
|
|
843
|
+
)
|
|
844
|
+
kubeconfig = json.dumps(kubeconfig)
|
|
834
845
|
await temp_config.write(kubeconfig.encode())
|
|
835
846
|
await temp_config.flush()
|
|
836
847
|
self._is_in_cluster = False
|
|
@@ -872,6 +883,7 @@ class AsyncKubernetesHook(KubernetesHook):
|
|
|
872
883
|
if kube_client is not None:
|
|
873
884
|
await kube_client.close()
|
|
874
885
|
|
|
886
|
+
@generic_api_retry
|
|
875
887
|
async def get_pod(self, name: str, namespace: str) -> V1Pod:
|
|
876
888
|
"""
|
|
877
889
|
Get pod's object.
|
|
@@ -892,6 +904,7 @@ class AsyncKubernetesHook(KubernetesHook):
|
|
|
892
904
|
raise KubernetesApiPermissionError("Permission denied (403) from Kubernetes API.") from e
|
|
893
905
|
raise KubernetesApiError from e
|
|
894
906
|
|
|
907
|
+
@generic_api_retry
|
|
895
908
|
async def delete_pod(self, name: str, namespace: str):
|
|
896
909
|
"""
|
|
897
910
|
Delete pod's object.
|
|
@@ -910,6 +923,7 @@ class AsyncKubernetesHook(KubernetesHook):
|
|
|
910
923
|
if str(e.status) != "404":
|
|
911
924
|
raise
|
|
912
925
|
|
|
926
|
+
@generic_api_retry
|
|
913
927
|
async def read_logs(
|
|
914
928
|
self, name: str, namespace: str, container_name: str | None = None, since_seconds: int | None = None
|
|
915
929
|
) -> list[str]:
|
|
@@ -932,7 +946,7 @@ class AsyncKubernetesHook(KubernetesHook):
|
|
|
932
946
|
logs = await v1_api.read_namespaced_pod_log(
|
|
933
947
|
name=name,
|
|
934
948
|
namespace=namespace,
|
|
935
|
-
|
|
949
|
+
container=container_name,
|
|
936
950
|
follow=False,
|
|
937
951
|
timestamps=True,
|
|
938
952
|
since_seconds=since_seconds,
|
|
@@ -942,14 +956,25 @@ class AsyncKubernetesHook(KubernetesHook):
|
|
|
942
956
|
except HTTPError as e:
|
|
943
957
|
raise KubernetesApiError from e
|
|
944
958
|
|
|
945
|
-
|
|
946
|
-
|
|
959
|
+
@generic_api_retry
|
|
960
|
+
async def get_pod_events(
|
|
961
|
+
self, name: str, namespace: str, resource_version: str | None = None
|
|
962
|
+
) -> CoreV1EventList:
|
|
963
|
+
"""
|
|
964
|
+
Get pod events.
|
|
965
|
+
|
|
966
|
+
:param name: Pod name to get events for
|
|
967
|
+
:param namespace: Kubernetes namespace
|
|
968
|
+
:param resource_version: Only return events not older than this resource version
|
|
969
|
+
"""
|
|
947
970
|
async with self.get_conn() as connection:
|
|
948
971
|
try:
|
|
949
972
|
v1_api = async_client.CoreV1Api(connection)
|
|
950
973
|
events: CoreV1EventList = await v1_api.list_namespaced_event(
|
|
951
974
|
field_selector=f"involvedObject.name={name}",
|
|
952
975
|
namespace=namespace,
|
|
976
|
+
resource_version=resource_version,
|
|
977
|
+
resource_version_match="NotOlderThan" if resource_version else None,
|
|
953
978
|
)
|
|
954
979
|
return events
|
|
955
980
|
except HTTPError as e:
|
|
@@ -957,6 +982,81 @@ class AsyncKubernetesHook(KubernetesHook):
|
|
|
957
982
|
raise KubernetesApiPermissionError("Permission denied (403) from Kubernetes API.") from e
|
|
958
983
|
raise KubernetesApiError from e
|
|
959
984
|
|
|
985
|
+
@generic_api_retry
|
|
986
|
+
async def watch_pod_events(
|
|
987
|
+
self,
|
|
988
|
+
name: str,
|
|
989
|
+
namespace: str,
|
|
990
|
+
resource_version: str | None = None,
|
|
991
|
+
timeout_seconds: int = 30,
|
|
992
|
+
) -> AsyncGenerator[CoreV1Event]:
|
|
993
|
+
"""
|
|
994
|
+
Watch pod events using Kubernetes Watch API.
|
|
995
|
+
|
|
996
|
+
:param name: Pod name to watch events for
|
|
997
|
+
:param namespace: Kubernetes namespace
|
|
998
|
+
:param resource_version: Only return events not older than this resource version
|
|
999
|
+
:param timeout_seconds: Timeout in seconds for the watch stream
|
|
1000
|
+
"""
|
|
1001
|
+
if self._event_polling_fallback:
|
|
1002
|
+
async for event_polled in self.watch_pod_events_polling_fallback(
|
|
1003
|
+
name, namespace, resource_version, timeout_seconds
|
|
1004
|
+
):
|
|
1005
|
+
yield event_polled
|
|
1006
|
+
|
|
1007
|
+
try:
|
|
1008
|
+
w = async_watch.Watch()
|
|
1009
|
+
async with self.get_conn() as connection:
|
|
1010
|
+
v1_api = async_client.CoreV1Api(connection)
|
|
1011
|
+
|
|
1012
|
+
async for event_watched in w.stream(
|
|
1013
|
+
v1_api.list_namespaced_event,
|
|
1014
|
+
namespace=namespace,
|
|
1015
|
+
field_selector=f"involvedObject.name={name}",
|
|
1016
|
+
resource_version=resource_version,
|
|
1017
|
+
timeout_seconds=timeout_seconds,
|
|
1018
|
+
):
|
|
1019
|
+
event: CoreV1Event = event_watched.get("object")
|
|
1020
|
+
yield event
|
|
1021
|
+
|
|
1022
|
+
except async_client.exceptions.ApiException as e:
|
|
1023
|
+
if hasattr(e, "status") and e.status == 403:
|
|
1024
|
+
self.log.warning(
|
|
1025
|
+
"Triggerer does not have Kubernetes API permission to 'watch' events: %s Falling back to polling.",
|
|
1026
|
+
str(e),
|
|
1027
|
+
)
|
|
1028
|
+
self._event_polling_fallback = True
|
|
1029
|
+
async for event_polled in self.watch_pod_events_polling_fallback(
|
|
1030
|
+
name, namespace, resource_version, timeout_seconds
|
|
1031
|
+
):
|
|
1032
|
+
yield event_polled
|
|
1033
|
+
|
|
1034
|
+
finally:
|
|
1035
|
+
w.stop()
|
|
1036
|
+
|
|
1037
|
+
async def watch_pod_events_polling_fallback(
|
|
1038
|
+
self,
|
|
1039
|
+
name: str,
|
|
1040
|
+
namespace: str,
|
|
1041
|
+
resource_version: str | None = None,
|
|
1042
|
+
interval: int = 30,
|
|
1043
|
+
) -> AsyncGenerator[CoreV1Event]:
|
|
1044
|
+
"""
|
|
1045
|
+
Fallback method to poll pod event at regular intervals.
|
|
1046
|
+
|
|
1047
|
+
This is required when the Airflow triggerer does not have permission to watch events.
|
|
1048
|
+
|
|
1049
|
+
:param name: Pod name to watch events for
|
|
1050
|
+
:param namespace: Kubernetes namespace
|
|
1051
|
+
:param resource_version: Only return events not older than this resource version
|
|
1052
|
+
:param interval: Polling interval in seconds
|
|
1053
|
+
"""
|
|
1054
|
+
events: CoreV1EventList = await self.get_pod_events(name, namespace, resource_version)
|
|
1055
|
+
for event in events.items:
|
|
1056
|
+
yield event
|
|
1057
|
+
await asyncio.sleep(interval)
|
|
1058
|
+
|
|
1059
|
+
@generic_api_retry
|
|
960
1060
|
async def get_job_status(self, name: str, namespace: str) -> V1Job:
|
|
961
1061
|
"""
|
|
962
1062
|
Get job's status object.
|
|
@@ -23,11 +23,16 @@ from functools import cache
|
|
|
23
23
|
from typing import TYPE_CHECKING
|
|
24
24
|
|
|
25
25
|
import pendulum
|
|
26
|
-
|
|
26
|
+
import tenacity
|
|
27
|
+
from kubernetes.client.rest import ApiException as SyncApiException
|
|
28
|
+
from kubernetes_asyncio.client.exceptions import ApiException as AsyncApiException
|
|
27
29
|
from slugify import slugify
|
|
30
|
+
from sqlalchemy import select
|
|
31
|
+
from urllib3.exceptions import HTTPError
|
|
28
32
|
|
|
29
33
|
from airflow.configuration import conf
|
|
30
34
|
from airflow.providers.cncf.kubernetes.backcompat import get_logical_date_key
|
|
35
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
31
36
|
|
|
32
37
|
if TYPE_CHECKING:
|
|
33
38
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
|
@@ -39,6 +44,62 @@ alphanum_lower = string.ascii_lowercase + string.digits
|
|
|
39
44
|
POD_NAME_MAX_LENGTH = 63 # Matches Linux kernel's HOST_NAME_MAX default value minus 1.
|
|
40
45
|
|
|
41
46
|
|
|
47
|
+
class PodLaunchFailedException(AirflowException):
|
|
48
|
+
"""When pod launching fails in KubernetesPodOperator."""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class KubernetesApiException(AirflowException):
|
|
52
|
+
"""When communication with kubernetes API fails."""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
API_RETRIES = conf.getint("workers", "api_retries", fallback=5)
|
|
56
|
+
API_RETRY_WAIT_MIN = conf.getfloat("workers", "api_retry_wait_min", fallback=1)
|
|
57
|
+
API_RETRY_WAIT_MAX = conf.getfloat("workers", "api_retry_wait_max", fallback=15)
|
|
58
|
+
|
|
59
|
+
_default_wait = tenacity.wait_exponential(min=API_RETRY_WAIT_MIN, max=API_RETRY_WAIT_MAX)
|
|
60
|
+
|
|
61
|
+
TRANSIENT_STATUS_CODES = {409, 429, 500, 502, 503, 504}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _should_retry_api(exc: BaseException) -> bool:
|
|
65
|
+
"""Retry on selected ApiException status codes, plus plain HTTP/timeout errors."""
|
|
66
|
+
if isinstance(exc, (SyncApiException, AsyncApiException)):
|
|
67
|
+
return exc.status in TRANSIENT_STATUS_CODES
|
|
68
|
+
return isinstance(exc, (HTTPError, KubernetesApiException))
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class WaitRetryAfterOrExponential(tenacity.wait.wait_base):
|
|
72
|
+
"""Wait strategy that honors Retry-After header on 429, else falls back to exponential backoff."""
|
|
73
|
+
|
|
74
|
+
def __call__(self, retry_state):
|
|
75
|
+
exc = retry_state.outcome.exception() if retry_state.outcome else None
|
|
76
|
+
if isinstance(exc, (SyncApiException, AsyncApiException)) and exc.status == 429:
|
|
77
|
+
retry_after = (exc.headers or {}).get("Retry-After")
|
|
78
|
+
if retry_after:
|
|
79
|
+
try:
|
|
80
|
+
return float(int(retry_after))
|
|
81
|
+
except ValueError:
|
|
82
|
+
pass
|
|
83
|
+
# Inline exponential fallback
|
|
84
|
+
return _default_wait(retry_state)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def generic_api_retry(func):
|
|
88
|
+
"""
|
|
89
|
+
Retry to Kubernetes API calls.
|
|
90
|
+
|
|
91
|
+
- Retries only transient ApiException status codes.
|
|
92
|
+
- Honors Retry-After on 429.
|
|
93
|
+
"""
|
|
94
|
+
return tenacity.retry(
|
|
95
|
+
stop=tenacity.stop_after_attempt(API_RETRIES),
|
|
96
|
+
wait=WaitRetryAfterOrExponential(),
|
|
97
|
+
retry=tenacity.retry_if_exception(_should_retry_api),
|
|
98
|
+
reraise=True,
|
|
99
|
+
before_sleep=tenacity.before_sleep_log(log, logging.WARNING),
|
|
100
|
+
)(func)
|
|
101
|
+
|
|
102
|
+
|
|
42
103
|
def rand_str(num):
|
|
43
104
|
"""
|
|
44
105
|
Generate random lowercase alphanumeric string of length num.
|
|
@@ -115,15 +176,14 @@ def annotations_to_key(annotations: dict[str, str]) -> TaskInstanceKey:
|
|
|
115
176
|
raise RuntimeError("Session not configured. Call configure_orm() first.")
|
|
116
177
|
session = Session()
|
|
117
178
|
|
|
118
|
-
task_instance_run_id = (
|
|
119
|
-
|
|
179
|
+
task_instance_run_id = session.scalar(
|
|
180
|
+
select(TaskInstance.run_id)
|
|
120
181
|
.join(TaskInstance.dag_run)
|
|
121
|
-
.
|
|
182
|
+
.where(
|
|
122
183
|
TaskInstance.dag_id == dag_id,
|
|
123
184
|
TaskInstance.task_id == task_id,
|
|
124
185
|
getattr(DagRun, logical_date_key) == logical_date,
|
|
125
186
|
)
|
|
126
|
-
.scalar()
|
|
127
187
|
)
|
|
128
188
|
else:
|
|
129
189
|
task_instance_run_id = annotation_run_id
|
|
@@ -148,18 +208,3 @@ def annotations_for_logging_task_metadata(annotation_set):
|
|
|
148
208
|
else:
|
|
149
209
|
annotations_for_logging = "<omitted>"
|
|
150
210
|
return annotations_for_logging
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
def should_retry_creation(exception: BaseException) -> bool:
|
|
154
|
-
"""
|
|
155
|
-
Check if an Exception indicates a transient error and warrants retrying.
|
|
156
|
-
|
|
157
|
-
This function is needed for preventing 'No agent available' error. The error appears time to time
|
|
158
|
-
when users try to create a Resource or Job. This issue is inside kubernetes and in the current moment
|
|
159
|
-
has no solution. Like a temporary solution we decided to retry Job or Resource creation request each
|
|
160
|
-
time when this error appears.
|
|
161
|
-
More about this issue here: https://github.com/cert-manager/cert-manager/issues/6457
|
|
162
|
-
"""
|
|
163
|
-
if isinstance(exception, ApiException):
|
|
164
|
-
return str(exception.status) == "500"
|
|
165
|
-
return False
|
|
@@ -28,7 +28,6 @@ import tenacity
|
|
|
28
28
|
from kubernetes.client import CoreV1Api, CustomObjectsApi, models as k8s
|
|
29
29
|
from kubernetes.client.rest import ApiException
|
|
30
30
|
|
|
31
|
-
from airflow.exceptions import AirflowException
|
|
32
31
|
from airflow.providers.cncf.kubernetes.resource_convert.configmap import (
|
|
33
32
|
convert_configmap,
|
|
34
33
|
convert_configmap_to_volume,
|
|
@@ -39,6 +38,7 @@ from airflow.providers.cncf.kubernetes.resource_convert.secret import (
|
|
|
39
38
|
convert_secret,
|
|
40
39
|
)
|
|
41
40
|
from airflow.providers.cncf.kubernetes.utils.pod_manager import PodManager
|
|
41
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
42
42
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
43
43
|
|
|
44
44
|
|
|
@@ -32,9 +32,10 @@ from kubernetes.client.api_client import ApiClient
|
|
|
32
32
|
from kubernetes.client.rest import ApiException
|
|
33
33
|
|
|
34
34
|
from airflow.configuration import conf
|
|
35
|
-
from airflow.exceptions import
|
|
35
|
+
from airflow.exceptions import AirflowProviderDeprecationWarning
|
|
36
36
|
from airflow.providers.cncf.kubernetes.hooks.kubernetes import KubernetesHook
|
|
37
37
|
from airflow.providers.cncf.kubernetes.kubernetes_helper_functions import (
|
|
38
|
+
POD_NAME_MAX_LENGTH,
|
|
38
39
|
add_unique_suffix,
|
|
39
40
|
create_unique_id,
|
|
40
41
|
)
|
|
@@ -43,19 +44,21 @@ from airflow.providers.cncf.kubernetes.pod_generator import PodGenerator, merge_
|
|
|
43
44
|
from airflow.providers.cncf.kubernetes.triggers.job import KubernetesJobTrigger
|
|
44
45
|
from airflow.providers.cncf.kubernetes.utils.pod_manager import EMPTY_XCOM_RESULT, PodNotFoundException
|
|
45
46
|
from airflow.providers.cncf.kubernetes.version_compat import AIRFLOW_V_3_1_PLUS
|
|
47
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
48
|
+
from airflow.utils import yaml
|
|
46
49
|
|
|
47
50
|
if AIRFLOW_V_3_1_PLUS:
|
|
48
51
|
from airflow.sdk import BaseOperator
|
|
49
52
|
else:
|
|
50
53
|
from airflow.models import BaseOperator
|
|
51
|
-
from airflow.utils import yaml
|
|
52
|
-
from airflow.utils.context import Context
|
|
53
54
|
|
|
54
55
|
if TYPE_CHECKING:
|
|
55
|
-
from airflow.
|
|
56
|
+
from airflow.sdk import Context
|
|
56
57
|
|
|
57
58
|
log = logging.getLogger(__name__)
|
|
58
59
|
|
|
60
|
+
JOB_NAME_PREFIX = "job-"
|
|
61
|
+
|
|
59
62
|
|
|
60
63
|
class KubernetesJobOperator(KubernetesPodOperator):
|
|
61
64
|
"""
|
|
@@ -378,15 +381,18 @@ class KubernetesJobOperator(KubernetesPodOperator):
|
|
|
378
381
|
|
|
379
382
|
job = self.reconcile_jobs(job_template, job)
|
|
380
383
|
|
|
384
|
+
# Account for job name prefix when generating/truncating the name
|
|
385
|
+
max_base_length = POD_NAME_MAX_LENGTH - len(JOB_NAME_PREFIX)
|
|
386
|
+
|
|
381
387
|
if not job.metadata.name:
|
|
382
388
|
job.metadata.name = create_unique_id(
|
|
383
|
-
task_id=self.task_id, unique=self.random_name_suffix, max_length=
|
|
389
|
+
task_id=self.task_id, unique=self.random_name_suffix, max_length=max_base_length
|
|
384
390
|
)
|
|
385
391
|
elif self.random_name_suffix:
|
|
386
392
|
# user has supplied job name, we're just adding suffix
|
|
387
|
-
job.metadata.name = add_unique_suffix(name=job.metadata.name)
|
|
393
|
+
job.metadata.name = add_unique_suffix(name=job.metadata.name, max_len=max_base_length)
|
|
388
394
|
|
|
389
|
-
job.metadata.name = f"
|
|
395
|
+
job.metadata.name = f"{JOB_NAME_PREFIX}{job.metadata.name}"
|
|
390
396
|
|
|
391
397
|
if not job.metadata.namespace:
|
|
392
398
|
hook_namespace = self.hook.get_namespace()
|
|
@@ -24,10 +24,10 @@ from functools import cached_property
|
|
|
24
24
|
|
|
25
25
|
from kubernetes.utils import FailToCreateError
|
|
26
26
|
|
|
27
|
-
from airflow.exceptions import AirflowException
|
|
28
27
|
from airflow.providers.cncf.kubernetes.hooks.kubernetes import KubernetesHook
|
|
29
28
|
from airflow.providers.cncf.kubernetes.operators.job import KubernetesJobOperator
|
|
30
29
|
from airflow.providers.cncf.kubernetes.version_compat import AIRFLOW_V_3_1_PLUS
|
|
30
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
31
31
|
|
|
32
32
|
if AIRFLOW_V_3_1_PLUS:
|
|
33
33
|
from airflow.sdk import BaseOperator
|