apache-airflow-providers-cncf-kubernetes 3.1.0__py3-none-any.whl → 10.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. airflow/providers/cncf/kubernetes/__init__.py +18 -23
  2. airflow/providers/cncf/kubernetes/backcompat/__init__.py +17 -0
  3. airflow/providers/cncf/kubernetes/backcompat/backwards_compat_converters.py +31 -49
  4. airflow/providers/cncf/kubernetes/callbacks.py +200 -0
  5. airflow/providers/cncf/kubernetes/cli/__init__.py +16 -0
  6. airflow/providers/cncf/kubernetes/cli/kubernetes_command.py +195 -0
  7. airflow/providers/cncf/kubernetes/decorators/kubernetes.py +163 -0
  8. airflow/providers/cncf/kubernetes/decorators/kubernetes_cmd.py +118 -0
  9. airflow/providers/cncf/kubernetes/exceptions.py +37 -0
  10. airflow/providers/cncf/kubernetes/executors/__init__.py +17 -0
  11. airflow/providers/cncf/kubernetes/executors/kubernetes_executor.py +831 -0
  12. airflow/providers/cncf/kubernetes/executors/kubernetes_executor_types.py +91 -0
  13. airflow/providers/cncf/kubernetes/executors/kubernetes_executor_utils.py +736 -0
  14. airflow/providers/cncf/kubernetes/executors/local_kubernetes_executor.py +306 -0
  15. airflow/providers/cncf/kubernetes/get_provider_info.py +249 -50
  16. airflow/providers/cncf/kubernetes/hooks/kubernetes.py +846 -112
  17. airflow/providers/cncf/kubernetes/k8s_model.py +62 -0
  18. airflow/providers/cncf/kubernetes/kube_client.py +156 -0
  19. airflow/providers/cncf/kubernetes/kube_config.py +125 -0
  20. airflow/providers/cncf/kubernetes/kubernetes_executor_templates/__init__.py +16 -0
  21. airflow/providers/cncf/kubernetes/kubernetes_executor_templates/basic_template.yaml +79 -0
  22. airflow/providers/cncf/kubernetes/kubernetes_helper_functions.py +165 -0
  23. airflow/providers/cncf/kubernetes/operators/custom_object_launcher.py +368 -0
  24. airflow/providers/cncf/kubernetes/operators/job.py +646 -0
  25. airflow/providers/cncf/kubernetes/operators/kueue.py +132 -0
  26. airflow/providers/cncf/kubernetes/operators/pod.py +1417 -0
  27. airflow/providers/cncf/kubernetes/operators/resource.py +191 -0
  28. airflow/providers/cncf/kubernetes/operators/spark_kubernetes.py +336 -35
  29. airflow/providers/cncf/kubernetes/pod_generator.py +592 -0
  30. airflow/providers/cncf/kubernetes/pod_template_file_examples/__init__.py +16 -0
  31. airflow/providers/cncf/kubernetes/pod_template_file_examples/dags_in_image_template.yaml +68 -0
  32. airflow/providers/cncf/kubernetes/pod_template_file_examples/dags_in_volume_template.yaml +74 -0
  33. airflow/providers/cncf/kubernetes/pod_template_file_examples/git_sync_template.yaml +95 -0
  34. airflow/providers/cncf/kubernetes/python_kubernetes_script.jinja2 +51 -0
  35. airflow/providers/cncf/kubernetes/python_kubernetes_script.py +92 -0
  36. airflow/providers/cncf/kubernetes/resource_convert/__init__.py +16 -0
  37. airflow/providers/cncf/kubernetes/resource_convert/configmap.py +52 -0
  38. airflow/providers/cncf/kubernetes/resource_convert/env_variable.py +39 -0
  39. airflow/providers/cncf/kubernetes/resource_convert/secret.py +40 -0
  40. airflow/providers/cncf/kubernetes/secret.py +128 -0
  41. airflow/providers/cncf/kubernetes/sensors/spark_kubernetes.py +30 -14
  42. airflow/providers/cncf/kubernetes/template_rendering.py +81 -0
  43. airflow/providers/cncf/kubernetes/triggers/__init__.py +16 -0
  44. airflow/providers/cncf/kubernetes/triggers/job.py +176 -0
  45. airflow/providers/cncf/kubernetes/triggers/pod.py +344 -0
  46. airflow/providers/cncf/kubernetes/utils/__init__.py +3 -0
  47. airflow/providers/cncf/kubernetes/utils/container.py +118 -0
  48. airflow/providers/cncf/kubernetes/utils/delete_from.py +154 -0
  49. airflow/providers/cncf/kubernetes/utils/k8s_resource_iterator.py +46 -0
  50. airflow/providers/cncf/kubernetes/utils/pod_manager.py +887 -152
  51. airflow/providers/cncf/kubernetes/utils/xcom_sidecar.py +25 -16
  52. airflow/providers/cncf/kubernetes/version_compat.py +38 -0
  53. apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/METADATA +125 -0
  54. apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/RECORD +62 -0
  55. {apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info → apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info}/WHEEL +1 -2
  56. apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/entry_points.txt +3 -0
  57. apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/licenses/NOTICE +5 -0
  58. airflow/providers/cncf/kubernetes/backcompat/pod.py +0 -119
  59. airflow/providers/cncf/kubernetes/backcompat/pod_runtime_info_env.py +0 -56
  60. airflow/providers/cncf/kubernetes/backcompat/volume.py +0 -62
  61. airflow/providers/cncf/kubernetes/backcompat/volume_mount.py +0 -58
  62. airflow/providers/cncf/kubernetes/example_dags/example_kubernetes.py +0 -163
  63. airflow/providers/cncf/kubernetes/example_dags/example_spark_kubernetes.py +0 -66
  64. airflow/providers/cncf/kubernetes/example_dags/example_spark_kubernetes_spark_pi.yaml +0 -57
  65. airflow/providers/cncf/kubernetes/operators/kubernetes_pod.py +0 -622
  66. apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/METADATA +0 -452
  67. apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/NOTICE +0 -6
  68. apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/RECORD +0 -29
  69. apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/entry_points.txt +0 -3
  70. apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/top_level.txt +0 -1
  71. /airflow/providers/cncf/kubernetes/{example_dags → decorators}/__init__.py +0 -0
  72. {apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info → apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,368 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ """Launches Custom object."""
18
+
19
+ from __future__ import annotations
20
+
21
+ import contextlib
22
+ import time
23
+ from copy import deepcopy
24
+ from datetime import datetime as dt
25
+ from functools import cached_property
26
+
27
+ import tenacity
28
+ from kubernetes.client import CoreV1Api, CustomObjectsApi, models as k8s
29
+ from kubernetes.client.rest import ApiException
30
+
31
+ from airflow.exceptions import AirflowException
32
+ from airflow.providers.cncf.kubernetes.resource_convert.configmap import (
33
+ convert_configmap,
34
+ convert_configmap_to_volume,
35
+ )
36
+ from airflow.providers.cncf.kubernetes.resource_convert.env_variable import convert_env_vars
37
+ from airflow.providers.cncf.kubernetes.resource_convert.secret import (
38
+ convert_image_pull_secrets,
39
+ convert_secret,
40
+ )
41
+ from airflow.providers.cncf.kubernetes.utils.pod_manager import PodManager
42
+ from airflow.utils.log.logging_mixin import LoggingMixin
43
+
44
+
45
+ def should_retry_start_spark_job(exception: BaseException) -> bool:
46
+ """Check if an Exception indicates a transient error and warrants retrying."""
47
+ if isinstance(exception, ApiException):
48
+ return str(exception.status) == "409"
49
+ return False
50
+
51
+
52
+ class SparkJobSpec:
53
+ """Spark job spec."""
54
+
55
+ def __init__(self, **entries):
56
+ self.__dict__.update(entries)
57
+ self.validate()
58
+ self.update_resources()
59
+
60
+ def validate(self):
61
+ if self.spec.get("dynamicAllocation", {}).get("enabled"):
62
+ if not all(
63
+ [
64
+ self.spec["dynamicAllocation"].get("minExecutors"),
65
+ self.spec["dynamicAllocation"].get("maxExecutors"),
66
+ ]
67
+ ):
68
+ raise AirflowException("Make sure min/max value for dynamic allocation is passed")
69
+
70
+ def update_resources(self):
71
+ if self.spec["driver"].get("container_resources"):
72
+ spark_resources = SparkResources(
73
+ self.spec["driver"].pop("container_resources"),
74
+ self.spec["executor"].pop("container_resources"),
75
+ )
76
+ self.spec["driver"].update(spark_resources.resources["driver"])
77
+ self.spec["executor"].update(spark_resources.resources["executor"])
78
+
79
+
80
+ class KubernetesSpec:
81
+ """Spark kubernetes spec."""
82
+
83
+ def __init__(self, **entries):
84
+ self.__dict__.update(entries)
85
+ self.set_attribute()
86
+
87
+ def set_attribute(self):
88
+ self.env_vars = convert_env_vars(self.env_vars) if self.env_vars else []
89
+ self.image_pull_secrets = (
90
+ convert_image_pull_secrets(self.image_pull_secrets) if self.image_pull_secrets else []
91
+ )
92
+ if self.config_map_mounts:
93
+ vols, vols_mounts = convert_configmap_to_volume(self.config_map_mounts)
94
+ self.volumes.extend(vols)
95
+ self.volume_mounts.extend(vols_mounts)
96
+ if self.from_env_config_map:
97
+ self.env_from.extend([convert_configmap(c_name) for c_name in self.from_env_config_map])
98
+ if self.from_env_secret:
99
+ self.env_from.extend([convert_secret(c) for c in self.from_env_secret])
100
+
101
+
102
+ class SparkResources:
103
+ """spark resources."""
104
+
105
+ def __init__(
106
+ self,
107
+ driver: dict | None = None,
108
+ executor: dict | None = None,
109
+ ):
110
+ self.default = {
111
+ "gpu": {"name": None, "quantity": 0},
112
+ "cpu": {"request": None, "limit": None},
113
+ "memory": {"request": None, "limit": None},
114
+ }
115
+ self.driver = deepcopy(self.default)
116
+ self.executor = deepcopy(self.default)
117
+ if driver:
118
+ self.driver.update(driver)
119
+ if executor:
120
+ self.executor.update(executor)
121
+ self.convert_resources()
122
+
123
+ @property
124
+ def resources(self):
125
+ """Return job resources."""
126
+ return {"driver": self.driver_resources, "executor": self.executor_resources}
127
+
128
+ @property
129
+ def driver_resources(self):
130
+ """Return resources to use."""
131
+ driver = {}
132
+ if self.driver["cpu"].get("request"):
133
+ driver["cores"] = self.driver["cpu"]["request"]
134
+ if self.driver["cpu"].get("limit"):
135
+ driver["coreLimit"] = self.driver["cpu"]["limit"]
136
+ if self.driver["memory"].get("limit"):
137
+ driver["memory"] = self.driver["memory"]["limit"]
138
+ if self.driver["gpu"].get("name") and self.driver["gpu"].get("quantity"):
139
+ driver["gpu"] = {"name": self.driver["gpu"]["name"], "quantity": self.driver["gpu"]["quantity"]}
140
+ return driver
141
+
142
+ @property
143
+ def executor_resources(self):
144
+ """Return resources to use."""
145
+ executor = {}
146
+ if self.executor["cpu"].get("request"):
147
+ executor["cores"] = self.executor["cpu"]["request"]
148
+ if self.executor["cpu"].get("limit"):
149
+ executor["coreLimit"] = self.executor["cpu"]["limit"]
150
+ if self.executor["memory"].get("limit"):
151
+ executor["memory"] = self.executor["memory"]["limit"]
152
+ if self.executor["gpu"].get("name") and self.executor["gpu"].get("quantity"):
153
+ executor["gpu"] = {
154
+ "name": self.executor["gpu"]["name"],
155
+ "quantity": self.executor["gpu"]["quantity"],
156
+ }
157
+ return executor
158
+
159
+ def convert_resources(self):
160
+ if isinstance(self.driver["memory"].get("limit"), str):
161
+ if "G" in self.driver["memory"]["limit"] or "Gi" in self.driver["memory"]["limit"]:
162
+ self.driver["memory"]["limit"] = float(self.driver["memory"]["limit"].rstrip("Gi G")) * 1024
163
+ elif "m" in self.driver["memory"]["limit"]:
164
+ self.driver["memory"]["limit"] = float(self.driver["memory"]["limit"].rstrip("m"))
165
+ # Adjusting the memory value as operator adds 40% to the given value
166
+ self.driver["memory"]["limit"] = str(int(self.driver["memory"]["limit"] / 1.4)) + "m"
167
+
168
+ if isinstance(self.executor["memory"].get("limit"), str):
169
+ if "G" in self.executor["memory"]["limit"] or "Gi" in self.executor["memory"]["limit"]:
170
+ self.executor["memory"]["limit"] = (
171
+ float(self.executor["memory"]["limit"].rstrip("Gi G")) * 1024
172
+ )
173
+ elif "m" in self.executor["memory"]["limit"]:
174
+ self.executor["memory"]["limit"] = float(self.executor["memory"]["limit"].rstrip("m"))
175
+ # Adjusting the memory value as operator adds 40% to the given value
176
+ self.executor["memory"]["limit"] = str(int(self.executor["memory"]["limit"] / 1.4)) + "m"
177
+
178
+ if self.driver["cpu"].get("request"):
179
+ self.driver["cpu"]["request"] = int(float(self.driver["cpu"]["request"]))
180
+ if self.driver["cpu"].get("limit"):
181
+ self.driver["cpu"]["limit"] = str(self.driver["cpu"]["limit"])
182
+ if self.executor["cpu"].get("request"):
183
+ self.executor["cpu"]["request"] = int(float(self.executor["cpu"]["request"]))
184
+ if self.executor["cpu"].get("limit"):
185
+ self.executor["cpu"]["limit"] = str(self.executor["cpu"]["limit"])
186
+
187
+ if self.driver["gpu"].get("quantity"):
188
+ self.driver["gpu"]["quantity"] = int(float(self.driver["gpu"]["quantity"]))
189
+ if self.executor["gpu"].get("quantity"):
190
+ self.executor["gpu"]["quantity"] = int(float(self.executor["gpu"]["quantity"]))
191
+
192
+
193
+ class CustomObjectStatus:
194
+ """Status of the PODs."""
195
+
196
+ SUBMITTED = "SUBMITTED"
197
+ RUNNING = "RUNNING"
198
+ FAILED = "FAILED"
199
+ SUCCEEDED = "SUCCEEDED"
200
+
201
+
202
+ class CustomObjectLauncher(LoggingMixin):
203
+ """Launches PODS."""
204
+
205
+ def __init__(
206
+ self,
207
+ name: str | None,
208
+ namespace: str | None,
209
+ kube_client: CoreV1Api,
210
+ custom_obj_api: CustomObjectsApi,
211
+ template_body: str | None = None,
212
+ ):
213
+ """
214
+ Create custom object launcher(sparkapplications crd).
215
+
216
+ :param kube_client: kubernetes client.
217
+ """
218
+ super().__init__()
219
+ self.name = name
220
+ self.namespace = namespace
221
+ self.template_body = template_body
222
+ self.body: dict = self.get_body()
223
+ self.kind = self.body["kind"]
224
+ self.plural = f"{self.kind.lower()}s"
225
+ if self.body.get("apiVersion"):
226
+ self.api_group, self.api_version = self.body["apiVersion"].split("/")
227
+ else:
228
+ self.api_group = self.body["apiGroup"]
229
+ self.api_version = self.body["version"]
230
+ self._client = kube_client
231
+ self.custom_obj_api = custom_obj_api
232
+ self.spark_obj_spec: dict = {}
233
+ self.pod_spec: k8s.V1Pod | None = None
234
+
235
+ @cached_property
236
+ def pod_manager(self) -> PodManager:
237
+ return PodManager(kube_client=self._client)
238
+
239
+ def get_body(self):
240
+ self.body: dict = SparkJobSpec(**self.template_body["spark"])
241
+ if not hasattr(self.body, "metadata") or not isinstance(self.body.metadata, dict):
242
+ self.body.metadata = {}
243
+ self.body.metadata.update({"name": self.name, "namespace": self.namespace})
244
+ if self.template_body.get("kubernetes"):
245
+ k8s_spec: dict = KubernetesSpec(**self.template_body["kubernetes"])
246
+ self.body.spec["volumes"] = k8s_spec.volumes
247
+ if k8s_spec.image_pull_secrets:
248
+ self.body.spec["imagePullSecrets"] = k8s_spec.image_pull_secrets
249
+ for item in ["driver", "executor"]:
250
+ # Env List
251
+ self.body.spec[item]["env"] = k8s_spec.env_vars
252
+ self.body.spec[item]["envFrom"] = k8s_spec.env_from
253
+ # Volumes
254
+ self.body.spec[item]["volumeMounts"] = k8s_spec.volume_mounts
255
+ # Add affinity
256
+ self.body.spec[item]["affinity"] = k8s_spec.affinity
257
+ self.body.spec[item]["tolerations"] = k8s_spec.tolerations
258
+ self.body.spec[item]["nodeSelector"] = k8s_spec.node_selector
259
+ # Labels
260
+ self.body.spec[item]["labels"] = self.body.spec["labels"]
261
+
262
+ return self.body.__dict__
263
+
264
+ @tenacity.retry(
265
+ stop=tenacity.stop_after_attempt(3),
266
+ wait=tenacity.wait_random_exponential(),
267
+ reraise=True,
268
+ retry=tenacity.retry_if_exception(should_retry_start_spark_job),
269
+ )
270
+ def start_spark_job(self, image=None, code_path=None, startup_timeout: int = 600):
271
+ """
272
+ Launch the pod synchronously and waits for completion.
273
+
274
+ :param image: image name
275
+ :param code_path: path to the .py file for python and jar file for scala
276
+ :param startup_timeout: Timeout for startup of the pod (if pod is pending for too long, fails task)
277
+ :return:
278
+ """
279
+ try:
280
+ if image:
281
+ self.body["spec"]["image"] = image
282
+ if code_path:
283
+ self.body["spec"]["mainApplicationFile"] = code_path
284
+ self.log.debug("Spark Job Creation Request Submitted")
285
+ self.spark_obj_spec = self.custom_obj_api.create_namespaced_custom_object(
286
+ group=self.api_group,
287
+ version=self.api_version,
288
+ namespace=self.namespace,
289
+ plural=self.plural,
290
+ body=self.body,
291
+ )
292
+ self.log.debug("Spark Job Creation Response: %s", self.spark_obj_spec)
293
+
294
+ # Wait for the driver pod to come alive
295
+ self.pod_spec = k8s.V1Pod(
296
+ metadata=k8s.V1ObjectMeta(
297
+ labels=self.spark_obj_spec["spec"]["driver"].get("labels"),
298
+ name=self.spark_obj_spec["metadata"]["name"] + "-driver",
299
+ namespace=self.namespace,
300
+ )
301
+ )
302
+ curr_time = dt.now()
303
+ while self.spark_job_not_running(self.spark_obj_spec):
304
+ self.log.warning(
305
+ "Spark job submitted but not yet started. job_id: %s",
306
+ self.spark_obj_spec["metadata"]["name"],
307
+ )
308
+ self.check_pod_start_failure()
309
+ delta = dt.now() - curr_time
310
+ if delta.total_seconds() >= startup_timeout:
311
+ pod_status = self.pod_manager.read_pod(self.pod_spec).status.container_statuses
312
+ raise AirflowException(f"Job took too long to start. pod status: {pod_status}")
313
+ time.sleep(10)
314
+ except Exception as e:
315
+ self.log.exception("Exception when attempting to create spark job")
316
+ raise e
317
+
318
+ return self.pod_spec, self.spark_obj_spec
319
+
320
+ def spark_job_not_running(self, spark_obj_spec):
321
+ """Test if spark_obj_spec has not started."""
322
+ spark_job_info = self.custom_obj_api.get_namespaced_custom_object_status(
323
+ group=self.api_group,
324
+ version=self.api_version,
325
+ namespace=self.namespace,
326
+ name=spark_obj_spec["metadata"]["name"],
327
+ plural=self.plural,
328
+ )
329
+ driver_state = spark_job_info.get("status", {}).get("applicationState", {}).get("state", "SUBMITTED")
330
+ if driver_state == CustomObjectStatus.FAILED:
331
+ err = spark_job_info.get("status", {}).get("applicationState", {}).get("errorMessage", "N/A")
332
+ with contextlib.suppress(Exception):
333
+ self.pod_manager.fetch_container_logs(
334
+ pod=self.pod_spec, container_name="spark-kubernetes-driver"
335
+ )
336
+ raise AirflowException(f"Spark Job Failed. Error stack: {err}")
337
+ return driver_state == CustomObjectStatus.SUBMITTED
338
+
339
+ def check_pod_start_failure(self):
340
+ try:
341
+ waiting_status = (
342
+ self.pod_manager.read_pod(self.pod_spec).status.container_statuses[0].state.waiting
343
+ )
344
+ waiting_reason = waiting_status.reason
345
+ waiting_message = waiting_status.message
346
+ except Exception:
347
+ return
348
+ if waiting_reason not in ("ContainerCreating", "PodInitializing"):
349
+ raise AirflowException(f"Spark Job Failed. Status: {waiting_reason}, Error: {waiting_message}")
350
+
351
+ def delete_spark_job(self, spark_job_name=None):
352
+ """Delete spark job."""
353
+ spark_job_name = spark_job_name or self.spark_obj_spec.get("metadata", {}).get("name")
354
+ if not spark_job_name:
355
+ self.log.warning("Spark job not found: %s", spark_job_name)
356
+ return
357
+ try:
358
+ self.custom_obj_api.delete_namespaced_custom_object(
359
+ group=self.api_group,
360
+ version=self.api_version,
361
+ namespace=self.namespace,
362
+ plural=self.plural,
363
+ name=spark_job_name,
364
+ )
365
+ except ApiException as e:
366
+ # If the pod is already deleted
367
+ if str(e.status) != "404":
368
+ raise