apache-airflow-providers-cncf-kubernetes 3.1.0__py3-none-any.whl → 10.10.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/cncf/kubernetes/__init__.py +18 -23
- airflow/providers/cncf/kubernetes/backcompat/__init__.py +17 -0
- airflow/providers/cncf/kubernetes/backcompat/backwards_compat_converters.py +31 -49
- airflow/providers/cncf/kubernetes/callbacks.py +200 -0
- airflow/providers/cncf/kubernetes/cli/__init__.py +16 -0
- airflow/providers/cncf/kubernetes/cli/kubernetes_command.py +195 -0
- airflow/providers/cncf/kubernetes/decorators/kubernetes.py +163 -0
- airflow/providers/cncf/kubernetes/decorators/kubernetes_cmd.py +118 -0
- airflow/providers/cncf/kubernetes/exceptions.py +37 -0
- airflow/providers/cncf/kubernetes/executors/__init__.py +17 -0
- airflow/providers/cncf/kubernetes/executors/kubernetes_executor.py +831 -0
- airflow/providers/cncf/kubernetes/executors/kubernetes_executor_types.py +91 -0
- airflow/providers/cncf/kubernetes/executors/kubernetes_executor_utils.py +736 -0
- airflow/providers/cncf/kubernetes/executors/local_kubernetes_executor.py +306 -0
- airflow/providers/cncf/kubernetes/get_provider_info.py +249 -50
- airflow/providers/cncf/kubernetes/hooks/kubernetes.py +846 -112
- airflow/providers/cncf/kubernetes/k8s_model.py +62 -0
- airflow/providers/cncf/kubernetes/kube_client.py +156 -0
- airflow/providers/cncf/kubernetes/kube_config.py +125 -0
- airflow/providers/cncf/kubernetes/kubernetes_executor_templates/__init__.py +16 -0
- airflow/providers/cncf/kubernetes/kubernetes_executor_templates/basic_template.yaml +79 -0
- airflow/providers/cncf/kubernetes/kubernetes_helper_functions.py +165 -0
- airflow/providers/cncf/kubernetes/operators/custom_object_launcher.py +368 -0
- airflow/providers/cncf/kubernetes/operators/job.py +646 -0
- airflow/providers/cncf/kubernetes/operators/kueue.py +132 -0
- airflow/providers/cncf/kubernetes/operators/pod.py +1417 -0
- airflow/providers/cncf/kubernetes/operators/resource.py +191 -0
- airflow/providers/cncf/kubernetes/operators/spark_kubernetes.py +336 -35
- airflow/providers/cncf/kubernetes/pod_generator.py +592 -0
- airflow/providers/cncf/kubernetes/pod_template_file_examples/__init__.py +16 -0
- airflow/providers/cncf/kubernetes/pod_template_file_examples/dags_in_image_template.yaml +68 -0
- airflow/providers/cncf/kubernetes/pod_template_file_examples/dags_in_volume_template.yaml +74 -0
- airflow/providers/cncf/kubernetes/pod_template_file_examples/git_sync_template.yaml +95 -0
- airflow/providers/cncf/kubernetes/python_kubernetes_script.jinja2 +51 -0
- airflow/providers/cncf/kubernetes/python_kubernetes_script.py +92 -0
- airflow/providers/cncf/kubernetes/resource_convert/__init__.py +16 -0
- airflow/providers/cncf/kubernetes/resource_convert/configmap.py +52 -0
- airflow/providers/cncf/kubernetes/resource_convert/env_variable.py +39 -0
- airflow/providers/cncf/kubernetes/resource_convert/secret.py +40 -0
- airflow/providers/cncf/kubernetes/secret.py +128 -0
- airflow/providers/cncf/kubernetes/sensors/spark_kubernetes.py +30 -14
- airflow/providers/cncf/kubernetes/template_rendering.py +81 -0
- airflow/providers/cncf/kubernetes/triggers/__init__.py +16 -0
- airflow/providers/cncf/kubernetes/triggers/job.py +176 -0
- airflow/providers/cncf/kubernetes/triggers/pod.py +344 -0
- airflow/providers/cncf/kubernetes/utils/__init__.py +3 -0
- airflow/providers/cncf/kubernetes/utils/container.py +118 -0
- airflow/providers/cncf/kubernetes/utils/delete_from.py +154 -0
- airflow/providers/cncf/kubernetes/utils/k8s_resource_iterator.py +46 -0
- airflow/providers/cncf/kubernetes/utils/pod_manager.py +887 -152
- airflow/providers/cncf/kubernetes/utils/xcom_sidecar.py +25 -16
- airflow/providers/cncf/kubernetes/version_compat.py +38 -0
- apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/METADATA +125 -0
- apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/RECORD +62 -0
- {apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info → apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info}/WHEEL +1 -2
- apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/entry_points.txt +3 -0
- apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/licenses/NOTICE +5 -0
- airflow/providers/cncf/kubernetes/backcompat/pod.py +0 -119
- airflow/providers/cncf/kubernetes/backcompat/pod_runtime_info_env.py +0 -56
- airflow/providers/cncf/kubernetes/backcompat/volume.py +0 -62
- airflow/providers/cncf/kubernetes/backcompat/volume_mount.py +0 -58
- airflow/providers/cncf/kubernetes/example_dags/example_kubernetes.py +0 -163
- airflow/providers/cncf/kubernetes/example_dags/example_spark_kubernetes.py +0 -66
- airflow/providers/cncf/kubernetes/example_dags/example_spark_kubernetes_spark_pi.yaml +0 -57
- airflow/providers/cncf/kubernetes/operators/kubernetes_pod.py +0 -622
- apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/METADATA +0 -452
- apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/NOTICE +0 -6
- apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/RECORD +0 -29
- apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/entry_points.txt +0 -3
- apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/top_level.txt +0 -1
- /airflow/providers/cncf/kubernetes/{example_dags → decorators}/__init__.py +0 -0
- {apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info → apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
"""Manage a Kubernetes Resource."""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
from collections.abc import Sequence
|
|
23
|
+
from functools import cached_property
|
|
24
|
+
from typing import TYPE_CHECKING
|
|
25
|
+
|
|
26
|
+
import tenacity
|
|
27
|
+
import yaml
|
|
28
|
+
from kubernetes.utils import create_from_yaml
|
|
29
|
+
|
|
30
|
+
from airflow.exceptions import AirflowException
|
|
31
|
+
from airflow.providers.cncf.kubernetes.hooks.kubernetes import KubernetesHook
|
|
32
|
+
from airflow.providers.cncf.kubernetes.kubernetes_helper_functions import should_retry_creation
|
|
33
|
+
from airflow.providers.cncf.kubernetes.utils.delete_from import delete_from_yaml
|
|
34
|
+
from airflow.providers.cncf.kubernetes.utils.k8s_resource_iterator import k8s_resource_iterator
|
|
35
|
+
from airflow.providers.cncf.kubernetes.version_compat import AIRFLOW_V_3_1_PLUS
|
|
36
|
+
|
|
37
|
+
if AIRFLOW_V_3_1_PLUS:
|
|
38
|
+
from airflow.sdk import BaseOperator
|
|
39
|
+
else:
|
|
40
|
+
from airflow.models import BaseOperator
|
|
41
|
+
|
|
42
|
+
if TYPE_CHECKING:
|
|
43
|
+
from kubernetes.client import ApiClient, CustomObjectsApi
|
|
44
|
+
|
|
45
|
+
__all__ = ["KubernetesCreateResourceOperator", "KubernetesDeleteResourceOperator"]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class KubernetesResourceBaseOperator(BaseOperator):
|
|
49
|
+
"""
|
|
50
|
+
Abstract base class for all Kubernetes Resource operators.
|
|
51
|
+
|
|
52
|
+
:param yaml_conf: string. Contains the kubernetes resources to Create or Delete
|
|
53
|
+
:param yaml_conf_file: path to the kubernetes resources file (templated)
|
|
54
|
+
:param namespace: string. Contains the namespace to create all resources inside.
|
|
55
|
+
The namespace must preexist otherwise the resource creation will fail.
|
|
56
|
+
If the API object in the yaml file already contains a namespace definition then
|
|
57
|
+
this parameter has no effect.
|
|
58
|
+
:param kubernetes_conn_id: The :ref:`kubernetes connection id <howto/connection:kubernetes>`
|
|
59
|
+
for the Kubernetes cluster.
|
|
60
|
+
:param namespaced: specified that Kubernetes resource is or isn't in a namespace.
|
|
61
|
+
This parameter works only when custom_resource_definition parameter is True.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
template_fields: Sequence[str] = ("yaml_conf", "yaml_conf_file")
|
|
65
|
+
template_fields_renderers = {"yaml_conf": "yaml"}
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
*,
|
|
70
|
+
yaml_conf: str | None = None,
|
|
71
|
+
yaml_conf_file: str | None = None,
|
|
72
|
+
namespace: str | None = None,
|
|
73
|
+
kubernetes_conn_id: str | None = KubernetesHook.default_conn_name,
|
|
74
|
+
custom_resource_definition: bool = False,
|
|
75
|
+
namespaced: bool = True,
|
|
76
|
+
config_file: str | None = None,
|
|
77
|
+
**kwargs,
|
|
78
|
+
) -> None:
|
|
79
|
+
super().__init__(**kwargs)
|
|
80
|
+
self._namespace = namespace
|
|
81
|
+
self.kubernetes_conn_id = kubernetes_conn_id
|
|
82
|
+
self.yaml_conf = yaml_conf
|
|
83
|
+
self.yaml_conf_file = yaml_conf_file
|
|
84
|
+
self.custom_resource_definition = custom_resource_definition
|
|
85
|
+
self.namespaced = namespaced
|
|
86
|
+
self.config_file = config_file
|
|
87
|
+
|
|
88
|
+
if not any([self.yaml_conf, self.yaml_conf_file]):
|
|
89
|
+
raise AirflowException("One of `yaml_conf` or `yaml_conf_file` arguments must be provided")
|
|
90
|
+
|
|
91
|
+
@cached_property
|
|
92
|
+
def client(self) -> ApiClient:
|
|
93
|
+
return self.hook.api_client
|
|
94
|
+
|
|
95
|
+
@cached_property
|
|
96
|
+
def custom_object_client(self) -> CustomObjectsApi:
|
|
97
|
+
return self.hook.custom_object_client
|
|
98
|
+
|
|
99
|
+
@cached_property
|
|
100
|
+
def hook(self) -> KubernetesHook:
|
|
101
|
+
hook = KubernetesHook(conn_id=self.kubernetes_conn_id, config_file=self.config_file)
|
|
102
|
+
return hook
|
|
103
|
+
|
|
104
|
+
def get_namespace(self) -> str:
|
|
105
|
+
if self._namespace:
|
|
106
|
+
return self._namespace
|
|
107
|
+
return self.hook.get_namespace() or "default"
|
|
108
|
+
|
|
109
|
+
def get_crd_fields(self, body: dict) -> tuple[str, str, str, str]:
|
|
110
|
+
api_version = body["apiVersion"]
|
|
111
|
+
group = api_version[0 : api_version.find("/")]
|
|
112
|
+
version = api_version[api_version.find("/") + 1 :]
|
|
113
|
+
|
|
114
|
+
metadata = body.get("metadata", {}) if body else None
|
|
115
|
+
namespace = metadata.get("namespace") if metadata else None
|
|
116
|
+
|
|
117
|
+
if namespace is None:
|
|
118
|
+
namespace = self.get_namespace()
|
|
119
|
+
|
|
120
|
+
plural = body["kind"].lower() + "s"
|
|
121
|
+
|
|
122
|
+
return group, version, namespace, plural
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class KubernetesCreateResourceOperator(KubernetesResourceBaseOperator):
|
|
126
|
+
"""Create a resource in a kubernetes."""
|
|
127
|
+
|
|
128
|
+
def create_custom_from_yaml_object(self, body: dict):
|
|
129
|
+
group, version, namespace, plural = self.get_crd_fields(body)
|
|
130
|
+
if self.namespaced:
|
|
131
|
+
self.custom_object_client.create_namespaced_custom_object(group, version, namespace, plural, body)
|
|
132
|
+
else:
|
|
133
|
+
self.custom_object_client.create_cluster_custom_object(group, version, plural, body)
|
|
134
|
+
|
|
135
|
+
@tenacity.retry(
|
|
136
|
+
stop=tenacity.stop_after_attempt(3),
|
|
137
|
+
wait=tenacity.wait_random_exponential(),
|
|
138
|
+
reraise=True,
|
|
139
|
+
retry=tenacity.retry_if_exception(should_retry_creation),
|
|
140
|
+
)
|
|
141
|
+
def _create_objects(self, objects):
|
|
142
|
+
self.log.info("Starting resource creation")
|
|
143
|
+
if not self.custom_resource_definition:
|
|
144
|
+
create_from_yaml(
|
|
145
|
+
k8s_client=self.client,
|
|
146
|
+
yaml_objects=objects,
|
|
147
|
+
namespace=self.get_namespace(),
|
|
148
|
+
)
|
|
149
|
+
else:
|
|
150
|
+
k8s_resource_iterator(self.create_custom_from_yaml_object, objects)
|
|
151
|
+
|
|
152
|
+
def execute(self, context) -> None:
|
|
153
|
+
if self.yaml_conf:
|
|
154
|
+
self._create_objects(yaml.safe_load_all(self.yaml_conf))
|
|
155
|
+
elif self.yaml_conf_file and os.path.exists(self.yaml_conf_file):
|
|
156
|
+
with open(self.yaml_conf_file) as stream:
|
|
157
|
+
self._create_objects(yaml.safe_load_all(stream))
|
|
158
|
+
else:
|
|
159
|
+
raise AirflowException("File %s not found", self.yaml_conf_file)
|
|
160
|
+
self.log.info("Resource was created")
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class KubernetesDeleteResourceOperator(KubernetesResourceBaseOperator):
|
|
164
|
+
"""Delete a resource in a kubernetes."""
|
|
165
|
+
|
|
166
|
+
def delete_custom_from_yaml_object(self, body: dict):
|
|
167
|
+
name = body["metadata"]["name"]
|
|
168
|
+
group, version, namespace, plural = self.get_crd_fields(body)
|
|
169
|
+
if self.namespaced:
|
|
170
|
+
self.custom_object_client.delete_namespaced_custom_object(group, version, namespace, plural, name)
|
|
171
|
+
else:
|
|
172
|
+
self.custom_object_client.delete_cluster_custom_object(group, version, plural, name)
|
|
173
|
+
|
|
174
|
+
def _delete_objects(self, objects):
|
|
175
|
+
if not self.custom_resource_definition:
|
|
176
|
+
delete_from_yaml(
|
|
177
|
+
k8s_client=self.client,
|
|
178
|
+
yaml_objects=objects,
|
|
179
|
+
namespace=self.get_namespace(),
|
|
180
|
+
)
|
|
181
|
+
else:
|
|
182
|
+
k8s_resource_iterator(self.delete_custom_from_yaml_object, objects)
|
|
183
|
+
|
|
184
|
+
def execute(self, context) -> None:
|
|
185
|
+
if self.yaml_conf:
|
|
186
|
+
self._delete_objects(yaml.safe_load_all(self.yaml_conf))
|
|
187
|
+
elif self.yaml_conf_file and os.path.exists(self.yaml_conf_file):
|
|
188
|
+
with open(self.yaml_conf_file) as stream:
|
|
189
|
+
self._delete_objects(yaml.safe_load_all(stream))
|
|
190
|
+
else:
|
|
191
|
+
raise AirflowException("File %s not found", self.yaml_conf_file)
|
|
@@ -15,61 +15,362 @@
|
|
|
15
15
|
# KIND, either express or implied. See the License for the
|
|
16
16
|
# specific language governing permissions and limitations
|
|
17
17
|
# under the License.
|
|
18
|
-
from
|
|
18
|
+
from __future__ import annotations
|
|
19
19
|
|
|
20
|
-
from
|
|
21
|
-
from
|
|
20
|
+
from functools import cached_property
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
23
|
+
|
|
24
|
+
from kubernetes.client import CoreV1Api, CustomObjectsApi, models as k8s
|
|
25
|
+
|
|
26
|
+
from airflow.exceptions import AirflowException
|
|
27
|
+
from airflow.providers.cncf.kubernetes import pod_generator
|
|
28
|
+
from airflow.providers.cncf.kubernetes.hooks.kubernetes import KubernetesHook, _load_body_to_dict
|
|
29
|
+
from airflow.providers.cncf.kubernetes.kubernetes_helper_functions import add_unique_suffix
|
|
30
|
+
from airflow.providers.cncf.kubernetes.operators.custom_object_launcher import CustomObjectLauncher
|
|
31
|
+
from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator
|
|
32
|
+
from airflow.providers.cncf.kubernetes.pod_generator import MAX_LABEL_LEN, PodGenerator
|
|
33
|
+
from airflow.providers.cncf.kubernetes.utils.pod_manager import PodManager
|
|
34
|
+
from airflow.utils.helpers import prune_dict
|
|
22
35
|
|
|
23
36
|
if TYPE_CHECKING:
|
|
24
|
-
|
|
37
|
+
import jinja2
|
|
25
38
|
|
|
39
|
+
try:
|
|
40
|
+
from airflow.sdk.definitions.context import Context
|
|
41
|
+
except ImportError:
|
|
42
|
+
# TODO: Remove once provider drops support for Airflow 2
|
|
43
|
+
from airflow.utils.context import Context
|
|
26
44
|
|
|
27
|
-
|
|
45
|
+
|
|
46
|
+
class SparkKubernetesOperator(KubernetesPodOperator):
|
|
28
47
|
"""
|
|
29
|
-
Creates sparkApplication object in kubernetes cluster
|
|
48
|
+
Creates sparkApplication object in kubernetes cluster.
|
|
30
49
|
|
|
31
50
|
.. seealso::
|
|
32
51
|
For more detail about Spark Application Object have a look at the reference:
|
|
33
|
-
https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/v1beta2-1.
|
|
52
|
+
https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/v1beta2-1.3.3-3.1.1/docs/api-docs.md#sparkapplication
|
|
34
53
|
|
|
35
|
-
:param
|
|
36
|
-
|
|
54
|
+
:param image: Docker image you wish to launch. Defaults to hub.docker.com,
|
|
55
|
+
:param code_path: path to the spark code in image,
|
|
37
56
|
:param namespace: kubernetes namespace to put sparkApplication
|
|
38
|
-
:param
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
:param
|
|
57
|
+
:param name: name of the pod in which the task will run, will be used (plus a random
|
|
58
|
+
suffix if random_name_suffix is True) to generate a pod id (DNS-1123 subdomain,
|
|
59
|
+
containing only [a-z0-9.-]).
|
|
60
|
+
:param application_file: filepath to kubernetes custom_resource_definition of sparkApplication
|
|
61
|
+
:param template_spec: kubernetes sparkApplication specification
|
|
62
|
+
:param get_logs: get the stdout of the container as logs of the tasks.
|
|
63
|
+
:param do_xcom_push: If True, the content of the file
|
|
64
|
+
/airflow/xcom/return.json in the container will also be pushed to an
|
|
65
|
+
XCom when the container completes.
|
|
66
|
+
:param success_run_history_limit: Number of past successful runs of the application to keep.
|
|
67
|
+
:param startup_timeout_seconds: timeout in seconds to startup the pod.
|
|
68
|
+
:param log_events_on_failure: Log the pod's events if a failure occurs
|
|
69
|
+
:param reattach_on_restart: if the scheduler dies while the pod is running, reattach and monitor.
|
|
70
|
+
When enabled, the operator automatically adds Airflow task context labels (dag_id, task_id, run_id)
|
|
71
|
+
to the driver and executor pods to enable finding them for reattachment.
|
|
72
|
+
:param delete_on_termination: What to do when the pod reaches its final
|
|
73
|
+
state, or the execution is interrupted. If True (default), delete the
|
|
74
|
+
pod; if False, leave the pod.
|
|
75
|
+
:param kubernetes_conn_id: the connection to Kubernetes cluster
|
|
76
|
+
:param random_name_suffix: If True, adds a random suffix to the pod name
|
|
42
77
|
"""
|
|
43
78
|
|
|
44
|
-
template_fields
|
|
45
|
-
|
|
46
|
-
|
|
79
|
+
template_fields = ["application_file", "namespace", "template_spec", "kubernetes_conn_id"]
|
|
80
|
+
template_fields_renderers = {"template_spec": "py"}
|
|
81
|
+
template_ext = ("yaml", "yml", "json")
|
|
82
|
+
ui_color = "#f4a460"
|
|
83
|
+
|
|
84
|
+
BASE_CONTAINER_NAME = "spark-kubernetes-driver"
|
|
47
85
|
|
|
48
86
|
def __init__(
|
|
49
87
|
self,
|
|
50
88
|
*,
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
89
|
+
image: str | None = None,
|
|
90
|
+
code_path: str | None = None,
|
|
91
|
+
namespace: str = "default",
|
|
92
|
+
name: str | None = None,
|
|
93
|
+
application_file: str | None = None,
|
|
94
|
+
template_spec=None,
|
|
95
|
+
get_logs: bool = True,
|
|
96
|
+
do_xcom_push: bool = False,
|
|
97
|
+
success_run_history_limit: int = 1,
|
|
98
|
+
startup_timeout_seconds=600,
|
|
99
|
+
log_events_on_failure: bool = False,
|
|
100
|
+
reattach_on_restart: bool = True,
|
|
101
|
+
delete_on_termination: bool = True,
|
|
102
|
+
kubernetes_conn_id: str = "kubernetes_default",
|
|
103
|
+
random_name_suffix: bool = True,
|
|
56
104
|
**kwargs,
|
|
57
105
|
) -> None:
|
|
58
|
-
super().__init__(**kwargs)
|
|
106
|
+
super().__init__(name=name, **kwargs)
|
|
107
|
+
self.image = image
|
|
108
|
+
self.code_path = code_path
|
|
59
109
|
self.application_file = application_file
|
|
60
|
-
self.
|
|
110
|
+
self.template_spec = template_spec
|
|
61
111
|
self.kubernetes_conn_id = kubernetes_conn_id
|
|
62
|
-
self.
|
|
63
|
-
self.
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
self.
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
112
|
+
self.startup_timeout_seconds = startup_timeout_seconds
|
|
113
|
+
self.reattach_on_restart = reattach_on_restart
|
|
114
|
+
self.delete_on_termination = delete_on_termination
|
|
115
|
+
self.do_xcom_push = do_xcom_push
|
|
116
|
+
self.namespace = namespace
|
|
117
|
+
self.get_logs = get_logs
|
|
118
|
+
self.log_events_on_failure = log_events_on_failure
|
|
119
|
+
self.success_run_history_limit = success_run_history_limit
|
|
120
|
+
self.random_name_suffix = random_name_suffix
|
|
121
|
+
|
|
122
|
+
# fix mypy typing
|
|
123
|
+
self.base_container_name: str
|
|
124
|
+
self.container_logs: list[str]
|
|
125
|
+
|
|
126
|
+
if self.base_container_name != self.BASE_CONTAINER_NAME:
|
|
127
|
+
self.log.warning(
|
|
128
|
+
"base_container_name is not supported and will be overridden to %s", self.BASE_CONTAINER_NAME
|
|
129
|
+
)
|
|
130
|
+
self.base_container_name = self.BASE_CONTAINER_NAME
|
|
131
|
+
|
|
132
|
+
if self.get_logs and self.container_logs != self.BASE_CONTAINER_NAME:
|
|
133
|
+
self.log.warning(
|
|
134
|
+
"container_logs is not supported and will be overridden to %s", self.BASE_CONTAINER_NAME
|
|
135
|
+
)
|
|
136
|
+
self.container_logs = [self.BASE_CONTAINER_NAME]
|
|
137
|
+
|
|
138
|
+
def _render_nested_template_fields(
|
|
139
|
+
self,
|
|
140
|
+
content: Any,
|
|
141
|
+
context: Context,
|
|
142
|
+
jinja_env: jinja2.Environment,
|
|
143
|
+
seen_oids: set,
|
|
144
|
+
) -> None:
|
|
145
|
+
if id(content) not in seen_oids and isinstance(content, k8s.V1EnvVar):
|
|
146
|
+
seen_oids.add(id(content))
|
|
147
|
+
self._do_render_template_fields(content, ("value", "name"), context, jinja_env, seen_oids)
|
|
148
|
+
return
|
|
149
|
+
|
|
150
|
+
super()._render_nested_template_fields(content, context, jinja_env, seen_oids)
|
|
151
|
+
|
|
152
|
+
def manage_template_specs(self):
|
|
153
|
+
if self.application_file:
|
|
154
|
+
try:
|
|
155
|
+
filepath = Path(self.application_file.rstrip()).resolve(strict=True)
|
|
156
|
+
except (FileNotFoundError, OSError, RuntimeError, ValueError):
|
|
157
|
+
application_file_body = self.application_file
|
|
158
|
+
else:
|
|
159
|
+
application_file_body = filepath.read_text()
|
|
160
|
+
template_body = _load_body_to_dict(application_file_body)
|
|
161
|
+
if not isinstance(template_body, dict):
|
|
162
|
+
msg = f"application_file body can't transformed into the dictionary:\n{application_file_body}"
|
|
163
|
+
raise TypeError(msg)
|
|
164
|
+
elif self.template_spec:
|
|
165
|
+
template_body = self.template_spec
|
|
166
|
+
else:
|
|
167
|
+
raise AirflowException("either application_file or template_spec should be passed")
|
|
168
|
+
if "spark" not in template_body:
|
|
169
|
+
template_body = {"spark": template_body}
|
|
170
|
+
return template_body
|
|
171
|
+
|
|
172
|
+
def create_job_name(self):
|
|
173
|
+
name = (
|
|
174
|
+
self.name or self.template_body.get("spark", {}).get("metadata", {}).get("name") or self.task_id
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
if self.random_name_suffix:
|
|
178
|
+
updated_name = add_unique_suffix(name=name, max_len=MAX_LABEL_LEN)
|
|
179
|
+
else:
|
|
180
|
+
# truncation is required to maintain the same behavior as before
|
|
181
|
+
updated_name = name[:MAX_LABEL_LEN]
|
|
182
|
+
|
|
183
|
+
return self._set_name(updated_name)
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def _get_ti_pod_labels(context: Context | None = None, include_try_number: bool = True) -> dict[str, str]:
|
|
187
|
+
"""
|
|
188
|
+
Generate labels for the pod to track the pod in case of Operator crash.
|
|
189
|
+
|
|
190
|
+
:param include_try_number: add try number to labels
|
|
191
|
+
:param context: task context provided by airflow DAG
|
|
192
|
+
:return: dict.
|
|
193
|
+
"""
|
|
194
|
+
if not context:
|
|
195
|
+
return {}
|
|
196
|
+
|
|
197
|
+
context_dict = cast("dict", context)
|
|
198
|
+
ti = context_dict["ti"]
|
|
199
|
+
run_id = context_dict["run_id"]
|
|
200
|
+
|
|
201
|
+
labels = {
|
|
202
|
+
"dag_id": ti.dag_id,
|
|
203
|
+
"task_id": ti.task_id,
|
|
204
|
+
"run_id": run_id,
|
|
205
|
+
"spark_kubernetes_operator": "True",
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
map_index = ti.map_index
|
|
209
|
+
if map_index is not None and map_index >= 0:
|
|
210
|
+
labels["map_index"] = str(map_index)
|
|
211
|
+
|
|
212
|
+
if include_try_number:
|
|
213
|
+
labels.update(try_number=str(ti.try_number))
|
|
214
|
+
|
|
215
|
+
# In the case of sub dags this is just useful
|
|
216
|
+
# TODO: Remove this when the minimum version of Airflow is bumped to 3.0
|
|
217
|
+
if getattr(context_dict["dag"], "parent_dag", False):
|
|
218
|
+
labels["parent_dag_id"] = context_dict["dag"].parent_dag.dag_id
|
|
219
|
+
# Ensure that label is valid for Kube,
|
|
220
|
+
# and if not truncate/remove invalid chars and replace with short hash.
|
|
221
|
+
for label_id, label in labels.items():
|
|
222
|
+
safe_label = pod_generator.make_safe_label_value(str(label))
|
|
223
|
+
labels[label_id] = safe_label
|
|
224
|
+
return labels
|
|
225
|
+
|
|
226
|
+
@cached_property
|
|
227
|
+
def pod_manager(self) -> PodManager:
|
|
228
|
+
return PodManager(kube_client=self.client)
|
|
229
|
+
|
|
230
|
+
def _try_numbers_match(self, context, pod) -> bool:
|
|
231
|
+
task_instance = context["task_instance"]
|
|
232
|
+
task_context_labels = self._get_ti_pod_labels(context)
|
|
233
|
+
pod_try_number = pod.metadata.labels.get(task_context_labels.get("try_number", ""), "")
|
|
234
|
+
return str(task_instance.try_number) == str(pod_try_number)
|
|
235
|
+
|
|
236
|
+
@property
|
|
237
|
+
def template_body(self):
|
|
238
|
+
"""Templated body for CustomObjectLauncher."""
|
|
239
|
+
return self.manage_template_specs()
|
|
240
|
+
|
|
241
|
+
def find_spark_job(self, context, exclude_checked: bool = True):
|
|
242
|
+
label_selector = (
|
|
243
|
+
self._build_find_pod_label_selector(context, exclude_checked=exclude_checked)
|
|
244
|
+
+ ",spark-role=driver"
|
|
245
|
+
)
|
|
246
|
+
pod_list = self.client.list_namespaced_pod(self.namespace, label_selector=label_selector).items
|
|
247
|
+
|
|
248
|
+
pod = None
|
|
249
|
+
if len(pod_list) > 1: # and self.reattach_on_restart:
|
|
250
|
+
raise AirflowException(f"More than one pod running with labels: {label_selector}")
|
|
251
|
+
if len(pod_list) == 1:
|
|
252
|
+
pod = pod_list[0]
|
|
253
|
+
self.log.info(
|
|
254
|
+
"Found matching driver pod %s with labels %s", pod.metadata.name, pod.metadata.labels
|
|
255
|
+
)
|
|
256
|
+
self.log.info("`try_number` of task_instance: %s", context["ti"].try_number)
|
|
257
|
+
self.log.info("`try_number` of pod: %s", pod.metadata.labels.get("try_number", "unknown"))
|
|
258
|
+
return pod
|
|
259
|
+
|
|
260
|
+
def process_pod_deletion(self, pod, *, reraise=True):
|
|
261
|
+
if pod is not None:
|
|
262
|
+
if self.delete_on_termination:
|
|
263
|
+
pod_name = pod.metadata.name.replace("-driver", "")
|
|
264
|
+
self.log.info("Deleting spark job: %s", pod_name)
|
|
265
|
+
self.launcher.delete_spark_job(pod_name)
|
|
266
|
+
else:
|
|
267
|
+
self.log.info("skipping deleting spark job: %s", pod.metadata.name)
|
|
268
|
+
|
|
269
|
+
@cached_property
|
|
270
|
+
def hook(self) -> KubernetesHook:
|
|
271
|
+
hook = KubernetesHook(
|
|
272
|
+
conn_id=self.kubernetes_conn_id,
|
|
273
|
+
in_cluster=self.in_cluster or self.template_body.get("kubernetes", {}).get("in_cluster", False),
|
|
274
|
+
config_file=self.config_file
|
|
275
|
+
or self.template_body.get("kubernetes", {}).get("kube_config_file", None),
|
|
276
|
+
cluster_context=self.cluster_context
|
|
277
|
+
or self.template_body.get("kubernetes", {}).get("cluster_context", None),
|
|
278
|
+
)
|
|
279
|
+
return hook
|
|
280
|
+
|
|
281
|
+
@cached_property
|
|
282
|
+
def client(self) -> CoreV1Api:
|
|
283
|
+
return self.hook.core_v1_client
|
|
284
|
+
|
|
285
|
+
@cached_property
|
|
286
|
+
def custom_obj_api(self) -> CustomObjectsApi:
|
|
287
|
+
return CustomObjectsApi()
|
|
288
|
+
|
|
289
|
+
def get_or_create_spark_crd(self, launcher: CustomObjectLauncher, context) -> k8s.V1Pod:
|
|
290
|
+
if self.reattach_on_restart:
|
|
291
|
+
driver_pod = self.find_spark_job(context)
|
|
292
|
+
if driver_pod:
|
|
293
|
+
return driver_pod
|
|
294
|
+
|
|
295
|
+
driver_pod, spark_obj_spec = launcher.start_spark_job(
|
|
296
|
+
image=self.image, code_path=self.code_path, startup_timeout=self.startup_timeout_seconds
|
|
297
|
+
)
|
|
298
|
+
return driver_pod
|
|
299
|
+
|
|
300
|
+
def execute(self, context: Context):
|
|
301
|
+
self.name = self.create_job_name()
|
|
302
|
+
|
|
303
|
+
self._setup_spark_configuration(context)
|
|
304
|
+
|
|
305
|
+
if self.deferrable:
|
|
306
|
+
self.execute_async(context)
|
|
307
|
+
|
|
308
|
+
return super().execute(context)
|
|
309
|
+
|
|
310
|
+
def _setup_spark_configuration(self, context: Context):
|
|
311
|
+
"""Set up Spark-specific configuration including reattach logic."""
|
|
312
|
+
import copy
|
|
313
|
+
|
|
314
|
+
template_body = copy.deepcopy(self.template_body)
|
|
315
|
+
|
|
316
|
+
if self.reattach_on_restart:
|
|
317
|
+
task_context_labels = self._get_ti_pod_labels(context)
|
|
318
|
+
|
|
319
|
+
existing_pod = self.find_spark_job(context)
|
|
320
|
+
if existing_pod:
|
|
321
|
+
self.log.info(
|
|
322
|
+
"Found existing Spark driver pod %s. Reattaching to it.", existing_pod.metadata.name
|
|
323
|
+
)
|
|
324
|
+
self.pod = existing_pod
|
|
325
|
+
self.pod_request_obj = None
|
|
326
|
+
return
|
|
327
|
+
|
|
328
|
+
if "spark" not in template_body:
|
|
329
|
+
template_body["spark"] = {}
|
|
330
|
+
if "spec" not in template_body["spark"]:
|
|
331
|
+
template_body["spark"]["spec"] = {}
|
|
332
|
+
|
|
333
|
+
spec_dict = template_body["spark"]["spec"]
|
|
334
|
+
|
|
335
|
+
if "labels" not in spec_dict:
|
|
336
|
+
spec_dict["labels"] = {}
|
|
337
|
+
spec_dict["labels"].update(task_context_labels)
|
|
338
|
+
|
|
339
|
+
for component in ["driver", "executor"]:
|
|
340
|
+
if component not in spec_dict:
|
|
341
|
+
spec_dict[component] = {}
|
|
342
|
+
|
|
343
|
+
if "labels" not in spec_dict[component]:
|
|
344
|
+
spec_dict[component]["labels"] = {}
|
|
345
|
+
|
|
346
|
+
spec_dict[component]["labels"].update(task_context_labels)
|
|
347
|
+
|
|
348
|
+
self.log.info("Creating sparkApplication.")
|
|
349
|
+
self.launcher = CustomObjectLauncher(
|
|
350
|
+
name=self.name,
|
|
73
351
|
namespace=self.namespace,
|
|
352
|
+
kube_client=self.client,
|
|
353
|
+
custom_obj_api=self.custom_obj_api,
|
|
354
|
+
template_body=template_body,
|
|
74
355
|
)
|
|
75
|
-
|
|
356
|
+
self.pod = self.get_or_create_spark_crd(self.launcher, context)
|
|
357
|
+
self.pod_request_obj = self.launcher.pod_spec
|
|
358
|
+
|
|
359
|
+
def find_pod(self, namespace: str, context: Context, *, exclude_checked: bool = True):
|
|
360
|
+
"""Override parent's find_pod to use our Spark-specific find_spark_job method."""
|
|
361
|
+
return self.find_spark_job(context, exclude_checked=exclude_checked)
|
|
362
|
+
|
|
363
|
+
def on_kill(self) -> None:
|
|
364
|
+
if self.launcher:
|
|
365
|
+
self.log.debug("Deleting spark job for task %s", self.task_id)
|
|
366
|
+
self.launcher.delete_spark_job()
|
|
367
|
+
|
|
368
|
+
def patch_already_checked(self, pod: k8s.V1Pod, *, reraise=True):
|
|
369
|
+
"""Add an "already checked" annotation to ensure we don't reattach on retries."""
|
|
370
|
+
pod.metadata.labels["already_checked"] = "True"
|
|
371
|
+
body = PodGenerator.serialize_pod(pod)
|
|
372
|
+
self.client.patch_namespaced_pod(pod.metadata.name, pod.metadata.namespace, body)
|
|
373
|
+
|
|
374
|
+
def dry_run(self) -> None:
|
|
375
|
+
"""Print out the spark job that would be created by this operator."""
|
|
376
|
+
print(prune_dict(self.launcher.body, mode="strict"))
|