apache-airflow-providers-cncf-kubernetes 3.1.0__py3-none-any.whl → 10.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. airflow/providers/cncf/kubernetes/__init__.py +18 -23
  2. airflow/providers/cncf/kubernetes/backcompat/__init__.py +17 -0
  3. airflow/providers/cncf/kubernetes/backcompat/backwards_compat_converters.py +31 -49
  4. airflow/providers/cncf/kubernetes/callbacks.py +200 -0
  5. airflow/providers/cncf/kubernetes/cli/__init__.py +16 -0
  6. airflow/providers/cncf/kubernetes/cli/kubernetes_command.py +195 -0
  7. airflow/providers/cncf/kubernetes/decorators/kubernetes.py +163 -0
  8. airflow/providers/cncf/kubernetes/decorators/kubernetes_cmd.py +118 -0
  9. airflow/providers/cncf/kubernetes/exceptions.py +37 -0
  10. airflow/providers/cncf/kubernetes/executors/__init__.py +17 -0
  11. airflow/providers/cncf/kubernetes/executors/kubernetes_executor.py +831 -0
  12. airflow/providers/cncf/kubernetes/executors/kubernetes_executor_types.py +91 -0
  13. airflow/providers/cncf/kubernetes/executors/kubernetes_executor_utils.py +736 -0
  14. airflow/providers/cncf/kubernetes/executors/local_kubernetes_executor.py +306 -0
  15. airflow/providers/cncf/kubernetes/get_provider_info.py +249 -50
  16. airflow/providers/cncf/kubernetes/hooks/kubernetes.py +846 -112
  17. airflow/providers/cncf/kubernetes/k8s_model.py +62 -0
  18. airflow/providers/cncf/kubernetes/kube_client.py +156 -0
  19. airflow/providers/cncf/kubernetes/kube_config.py +125 -0
  20. airflow/providers/cncf/kubernetes/kubernetes_executor_templates/__init__.py +16 -0
  21. airflow/providers/cncf/kubernetes/kubernetes_executor_templates/basic_template.yaml +79 -0
  22. airflow/providers/cncf/kubernetes/kubernetes_helper_functions.py +165 -0
  23. airflow/providers/cncf/kubernetes/operators/custom_object_launcher.py +368 -0
  24. airflow/providers/cncf/kubernetes/operators/job.py +646 -0
  25. airflow/providers/cncf/kubernetes/operators/kueue.py +132 -0
  26. airflow/providers/cncf/kubernetes/operators/pod.py +1417 -0
  27. airflow/providers/cncf/kubernetes/operators/resource.py +191 -0
  28. airflow/providers/cncf/kubernetes/operators/spark_kubernetes.py +336 -35
  29. airflow/providers/cncf/kubernetes/pod_generator.py +592 -0
  30. airflow/providers/cncf/kubernetes/pod_template_file_examples/__init__.py +16 -0
  31. airflow/providers/cncf/kubernetes/pod_template_file_examples/dags_in_image_template.yaml +68 -0
  32. airflow/providers/cncf/kubernetes/pod_template_file_examples/dags_in_volume_template.yaml +74 -0
  33. airflow/providers/cncf/kubernetes/pod_template_file_examples/git_sync_template.yaml +95 -0
  34. airflow/providers/cncf/kubernetes/python_kubernetes_script.jinja2 +51 -0
  35. airflow/providers/cncf/kubernetes/python_kubernetes_script.py +92 -0
  36. airflow/providers/cncf/kubernetes/resource_convert/__init__.py +16 -0
  37. airflow/providers/cncf/kubernetes/resource_convert/configmap.py +52 -0
  38. airflow/providers/cncf/kubernetes/resource_convert/env_variable.py +39 -0
  39. airflow/providers/cncf/kubernetes/resource_convert/secret.py +40 -0
  40. airflow/providers/cncf/kubernetes/secret.py +128 -0
  41. airflow/providers/cncf/kubernetes/sensors/spark_kubernetes.py +30 -14
  42. airflow/providers/cncf/kubernetes/template_rendering.py +81 -0
  43. airflow/providers/cncf/kubernetes/triggers/__init__.py +16 -0
  44. airflow/providers/cncf/kubernetes/triggers/job.py +176 -0
  45. airflow/providers/cncf/kubernetes/triggers/pod.py +344 -0
  46. airflow/providers/cncf/kubernetes/utils/__init__.py +3 -0
  47. airflow/providers/cncf/kubernetes/utils/container.py +118 -0
  48. airflow/providers/cncf/kubernetes/utils/delete_from.py +154 -0
  49. airflow/providers/cncf/kubernetes/utils/k8s_resource_iterator.py +46 -0
  50. airflow/providers/cncf/kubernetes/utils/pod_manager.py +887 -152
  51. airflow/providers/cncf/kubernetes/utils/xcom_sidecar.py +25 -16
  52. airflow/providers/cncf/kubernetes/version_compat.py +38 -0
  53. apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/METADATA +125 -0
  54. apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/RECORD +62 -0
  55. {apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info → apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info}/WHEEL +1 -2
  56. apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/entry_points.txt +3 -0
  57. apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/licenses/NOTICE +5 -0
  58. airflow/providers/cncf/kubernetes/backcompat/pod.py +0 -119
  59. airflow/providers/cncf/kubernetes/backcompat/pod_runtime_info_env.py +0 -56
  60. airflow/providers/cncf/kubernetes/backcompat/volume.py +0 -62
  61. airflow/providers/cncf/kubernetes/backcompat/volume_mount.py +0 -58
  62. airflow/providers/cncf/kubernetes/example_dags/example_kubernetes.py +0 -163
  63. airflow/providers/cncf/kubernetes/example_dags/example_spark_kubernetes.py +0 -66
  64. airflow/providers/cncf/kubernetes/example_dags/example_spark_kubernetes_spark_pi.yaml +0 -57
  65. airflow/providers/cncf/kubernetes/operators/kubernetes_pod.py +0 -622
  66. apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/METADATA +0 -452
  67. apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/NOTICE +0 -6
  68. apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/RECORD +0 -29
  69. apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/entry_points.txt +0 -3
  70. apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info/top_level.txt +0 -1
  71. /airflow/providers/cncf/kubernetes/{example_dags → decorators}/__init__.py +0 -0
  72. {apache_airflow_providers_cncf_kubernetes-3.1.0.dist-info → apache_airflow_providers_cncf_kubernetes-10.10.0rc1.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,1417 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ """Executes task in a Kubernetes POD."""
18
+
19
+ from __future__ import annotations
20
+
21
+ import asyncio
22
+ import datetime
23
+ import json
24
+ import logging
25
+ import math
26
+ import os
27
+ import re
28
+ import shlex
29
+ import string
30
+ from collections.abc import Callable, Container, Iterable, Sequence
31
+ from contextlib import AbstractContextManager
32
+ from enum import Enum
33
+ from functools import cached_property
34
+ from typing import TYPE_CHECKING, Any, Literal
35
+
36
+ import kubernetes
37
+ import tenacity
38
+ from kubernetes.client import CoreV1Api, V1Pod, models as k8s
39
+ from kubernetes.client.exceptions import ApiException
40
+ from kubernetes.stream import stream
41
+ from urllib3.exceptions import HTTPError
42
+
43
+ from airflow.configuration import conf
44
+ from airflow.exceptions import (
45
+ AirflowException,
46
+ AirflowSkipException,
47
+ TaskDeferred,
48
+ )
49
+ from airflow.providers.cncf.kubernetes import pod_generator
50
+ from airflow.providers.cncf.kubernetes.backcompat.backwards_compat_converters import (
51
+ convert_affinity,
52
+ convert_configmap,
53
+ convert_env_vars,
54
+ convert_env_vars_or_raise_error,
55
+ convert_image_pull_secrets,
56
+ convert_pod_runtime_info_env,
57
+ convert_port,
58
+ convert_toleration,
59
+ convert_volume,
60
+ convert_volume_mount,
61
+ )
62
+ from airflow.providers.cncf.kubernetes.callbacks import ExecutionMode, KubernetesPodOperatorCallback
63
+ from airflow.providers.cncf.kubernetes.hooks.kubernetes import KubernetesHook
64
+ from airflow.providers.cncf.kubernetes.kubernetes_helper_functions import (
65
+ POD_NAME_MAX_LENGTH,
66
+ add_unique_suffix,
67
+ create_unique_id,
68
+ )
69
+ from airflow.providers.cncf.kubernetes.pod_generator import PodGenerator
70
+ from airflow.providers.cncf.kubernetes.triggers.pod import KubernetesPodTrigger
71
+ from airflow.providers.cncf.kubernetes.utils import xcom_sidecar
72
+ from airflow.providers.cncf.kubernetes.utils.container import (
73
+ container_is_succeeded,
74
+ get_container_termination_message,
75
+ )
76
+ from airflow.providers.cncf.kubernetes.utils.pod_manager import (
77
+ EMPTY_XCOM_RESULT,
78
+ OnFinishAction,
79
+ PodLaunchFailedException,
80
+ PodManager,
81
+ PodNotFoundException,
82
+ PodPhase,
83
+ )
84
+ from airflow.providers.cncf.kubernetes.version_compat import AIRFLOW_V_3_1_PLUS
85
+ from airflow.providers.common.compat.sdk import XCOM_RETURN_KEY
86
+
87
+ if AIRFLOW_V_3_1_PLUS:
88
+ from airflow.sdk import BaseOperator
89
+ else:
90
+ from airflow.models import BaseOperator
91
+ from airflow.settings import pod_mutation_hook
92
+ from airflow.utils import yaml
93
+ from airflow.utils.helpers import prune_dict, validate_key
94
+ from airflow.version import version as airflow_version
95
+
96
+ if TYPE_CHECKING:
97
+ import jinja2
98
+ from pendulum import DateTime
99
+
100
+ from airflow.providers.cncf.kubernetes.hooks.kubernetes import PodOperatorHookProtocol
101
+ from airflow.providers.cncf.kubernetes.secret import Secret
102
+
103
+ try:
104
+ from airflow.sdk.definitions.context import Context
105
+ except ImportError:
106
+ # TODO: Remove once provider drops support for Airflow 2
107
+ from airflow.utils.context import Context
108
+
109
+ alphanum_lower = string.ascii_lowercase + string.digits
110
+
111
+ KUBE_CONFIG_ENV_VAR = "KUBECONFIG"
112
+
113
+
114
+ class PodEventType(Enum):
115
+ """Type of Events emitted by kubernetes pod."""
116
+
117
+ WARNING = "Warning"
118
+ NORMAL = "Normal"
119
+
120
+
121
+ class PodReattachFailure(AirflowException):
122
+ """When we expect to be able to find a pod but cannot."""
123
+
124
+
125
+ class PodCredentialsExpiredFailure(AirflowException):
126
+ """When pod fails to refresh credentials."""
127
+
128
+
129
+ class KubernetesPodOperator(BaseOperator):
130
+ """
131
+ Execute a task in a Kubernetes Pod.
132
+
133
+ .. seealso::
134
+ For more information on how to use this operator, take a look at the guide:
135
+ :ref:`howto/operator:KubernetesPodOperator`
136
+
137
+ .. note::
138
+ If you use `Google Kubernetes Engine <https://cloud.google.com/kubernetes-engine/>`__
139
+ and Airflow is not running in the same cluster, consider using
140
+ :class:`~airflow.providers.google.cloud.operators.kubernetes_engine.GKEStartPodOperator`, which
141
+ simplifies the authorization process.
142
+
143
+ :param kubernetes_conn_id: The :ref:`kubernetes connection id <howto/connection:kubernetes>`
144
+ for the Kubernetes cluster. (templated)
145
+ :param namespace: the namespace to run within kubernetes.
146
+ :param image: Container image you wish to launch. Defaults to hub.docker.com,
147
+ but fully qualified URLS will point to custom repositories. (templated)
148
+ :param name: name of the pod in which the task will run, will be used (plus a random
149
+ suffix if random_name_suffix is True) to generate a pod id (DNS-1123 subdomain,
150
+ containing only [a-z0-9.-]). (templated)
151
+ :param random_name_suffix: if True, will generate a random suffix.
152
+ :param cmds: entrypoint of the container.
153
+ The container images's entrypoint is used if this is not provided. (templated)
154
+ :param arguments: arguments of the entrypoint.
155
+ The container image's CMD is used if this is not provided. (templated)
156
+ :param ports: ports for the launched pod.
157
+ :param volume_mounts: volumeMounts for the launched pod. (templated)
158
+ :param volumes: volumes for the launched pod. Includes ConfigMaps and PersistentVolumes. (templated)
159
+ :param env_vars: Environment variables initialized in the container. (templated)
160
+ :param env_from: (Optional) List of sources to populate environment variables in the container. (templated)
161
+ :param secrets: Kubernetes secrets to inject in the container.
162
+ They can be exposed as environment vars or files in a volume.
163
+ :param in_cluster: run kubernetes client with in_cluster configuration.
164
+ :param cluster_context: context that points to kubernetes cluster.
165
+ Ignored when in_cluster is True. If None, current-context is used. (templated)
166
+ :param reattach_on_restart: if the worker dies while the pod is running, reattach and monitor
167
+ during the next try. If False, always create a new pod for each try.
168
+ :param labels: labels to apply to the Pod. (templated)
169
+ :param startup_timeout_seconds: timeout in seconds to startup the pod after pod was scheduled.
170
+ :param startup_check_interval_seconds: interval in seconds to check if the pod has already started
171
+ :param schedule_timeout_seconds: timeout in seconds to schedule pod in cluster.
172
+ :param get_logs: get the stdout of the base container as logs of the tasks.
173
+ :param init_container_logs: list of init containers whose logs will be published to stdout
174
+ Takes a sequence of containers, a single container name or True. If True,
175
+ all the containers logs are published.
176
+ :param container_logs: list of containers whose logs will be published to stdout
177
+ Takes a sequence of containers, a single container name or True. If True,
178
+ all the containers logs are published. Works in conjunction with get_logs param.
179
+ The default value is the base container.
180
+ :param image_pull_policy: Specify a policy to cache or always pull an image.
181
+ :param annotations: non-identifying metadata you can attach to the Pod.
182
+ Can be a large range of data, and can include characters
183
+ that are not permitted by labels. (templated)
184
+ :param container_resources: resources for the launched pod. (templated)
185
+ :param affinity: affinity scheduling rules for the launched pod.
186
+ :param config_file: The path to the Kubernetes config file. (templated)
187
+ If not specified, default value is ``~/.kube/config``
188
+ :param node_selector: A dict containing a group of scheduling rules. (templated)
189
+ :param image_pull_secrets: Any image pull secrets to be given to the pod.
190
+ If more than one secret is required, provide a
191
+ comma separated list: secret_a,secret_b
192
+ :param service_account_name: Name of the service account
193
+ :param automount_service_account_token: indicates whether pods running as this service account should have an API token automatically mounted
194
+ :param hostnetwork: If True enable host networking on the pod.
195
+ :param host_aliases: A list of host aliases to apply to the containers in the pod.
196
+ :param tolerations: A list of kubernetes tolerations.
197
+ :param security_context: security options the pod should run with (PodSecurityContext).
198
+ :param container_security_context: security options the container should run with.
199
+ :param dnspolicy: dnspolicy for the pod.
200
+ :param dns_config: dns configuration (ip addresses, searches, options) for the pod.
201
+ :param hostname: hostname for the pod. (templated)
202
+ :param subdomain: subdomain for the pod.
203
+ :param schedulername: Specify a schedulername for the pod
204
+ :param full_pod_spec: The complete podSpec
205
+ :param init_containers: init container for the launched Pod
206
+ :param log_events_on_failure: Log the pod's events if a failure occurs
207
+ :param do_xcom_push: If True, the content of the file
208
+ /airflow/xcom/return.json in the container will also be pushed to an
209
+ XCom when the container completes.
210
+ :param pod_template_file: path to pod template file (templated)
211
+ :param pod_template_dict: pod template dictionary (templated)
212
+ :param priority_class_name: priority class name for the launched Pod
213
+ :param pod_runtime_info_envs: (Optional) A list of environment variables,
214
+ to be set in the container.
215
+ :param termination_grace_period: Termination grace period (in seconds) for the pod.
216
+ This sets the pod's ``terminationGracePeriodSeconds`` and is also used as the grace period
217
+ when deleting the pod if the task is killed. If not specified, uses the Kubernetes default (30 seconds).
218
+ :param configmaps: (Optional) A list of names of config maps from which it collects ConfigMaps
219
+ to populate the environment variables with. The contents of the target
220
+ ConfigMap's Data field will represent the key-value pairs as environment variables.
221
+ Extends env_from.
222
+ :param skip_on_exit_code: If task exits with this exit code, leave the task
223
+ in ``skipped`` state (default: None). If set to ``None``, any non-zero
224
+ exit code will be treated as a failure.
225
+ :param base_container_name: The name of the base container in the pod. This container's logs
226
+ will appear as part of this task's logs if get_logs is True. Defaults to None. If None,
227
+ will consult the class variable BASE_CONTAINER_NAME (which defaults to "base") for the base
228
+ container name to use. (templated)
229
+ :param base_container_status_polling_interval: Polling period in seconds to check for the pod base
230
+ container status. Default to 1s.
231
+ :param deferrable: Run operator in the deferrable mode.
232
+ :param poll_interval: Polling period in seconds to check for the status. Used only in deferrable mode.
233
+ :param log_pod_spec_on_failure: Log the pod's specification if a failure occurs
234
+ :param on_finish_action: What to do when the pod reaches its final state, or the execution is interrupted.
235
+ If "delete_pod", the pod will be deleted regardless its state; if "delete_succeeded_pod",
236
+ only succeeded pod will be deleted. You can set to "keep_pod" to keep the pod.
237
+ :param termination_message_policy: The termination message policy of the base container.
238
+ Default value is "File"
239
+ :param active_deadline_seconds: The active_deadline_seconds which translates to active_deadline_seconds
240
+ in V1PodSpec.
241
+ :param callbacks: KubernetesPodOperatorCallback instance contains the callbacks methods on different step
242
+ of KubernetesPodOperator.
243
+ :param logging_interval: max time in seconds that task should be in deferred state before
244
+ resuming to fetch the latest logs. If ``None``, then the task will remain in deferred state until pod
245
+ is done, and no logs will be visible until that time.
246
+ :param trigger_kwargs: additional keyword parameters passed to the trigger
247
+ :param container_name_log_prefix_enabled: if True, will prefix container name to each log line.
248
+ Default to True.
249
+ :param log_formatter: custom log formatter function that takes two string arguments:
250
+ the first string is the container_name and the second string is the message_to_log.
251
+ The function should return a formatted string. If None, the default formatting will be used.
252
+ """
253
+
254
+ # !!! Changes in KubernetesPodOperator's arguments should be also reflected in !!!
255
+ # - airflow-core/src/airflow/decorators/__init__.pyi (by a separate PR)
256
+
257
+ # This field can be overloaded at the instance level via base_container_name
258
+ BASE_CONTAINER_NAME = "base"
259
+ ISTIO_CONTAINER_NAME = "istio-proxy"
260
+ KILL_ISTIO_PROXY_SUCCESS_MSG = "HTTP/1.1 200"
261
+ POD_CHECKED_KEY = "already_checked"
262
+ POST_TERMINATION_TIMEOUT = 120
263
+
264
+ template_fields: Sequence[str] = (
265
+ "image",
266
+ "name",
267
+ "hostname",
268
+ "cmds",
269
+ "annotations",
270
+ "arguments",
271
+ "env_vars",
272
+ "labels",
273
+ "config_file",
274
+ "pod_template_file",
275
+ "pod_template_dict",
276
+ "namespace",
277
+ "container_resources",
278
+ "volumes",
279
+ "volume_mounts",
280
+ "cluster_context",
281
+ "env_from",
282
+ "node_selector",
283
+ "kubernetes_conn_id",
284
+ "base_container_name",
285
+ "trigger_kwargs",
286
+ )
287
+ template_fields_renderers = {"env_vars": "py"}
288
+
289
+ def __init__(
290
+ self,
291
+ *,
292
+ kubernetes_conn_id: str | None = KubernetesHook.default_conn_name,
293
+ namespace: str | None = None,
294
+ image: str | None = None,
295
+ name: str | None = None,
296
+ random_name_suffix: bool = True,
297
+ cmds: list[str] | None = None,
298
+ arguments: list[str] | None = None,
299
+ ports: list[k8s.V1ContainerPort] | None = None,
300
+ volume_mounts: list[k8s.V1VolumeMount] | None = None,
301
+ volumes: list[k8s.V1Volume] | None = None,
302
+ env_vars: list[k8s.V1EnvVar] | dict[str, str] | None = None,
303
+ env_from: list[k8s.V1EnvFromSource] | None = None,
304
+ secrets: list[Secret] | None = None,
305
+ in_cluster: bool | None = None,
306
+ cluster_context: str | None = None,
307
+ labels: dict | None = None,
308
+ reattach_on_restart: bool = True,
309
+ startup_timeout_seconds: int = 120,
310
+ startup_check_interval_seconds: int = 5,
311
+ schedule_timeout_seconds: int | None = None,
312
+ get_logs: bool = True,
313
+ base_container_name: str | None = None,
314
+ base_container_status_polling_interval: float = 1,
315
+ init_container_logs: Iterable[str] | str | Literal[True] | None = None,
316
+ container_logs: Iterable[str] | str | Literal[True] | None = None,
317
+ image_pull_policy: str | None = None,
318
+ annotations: dict | None = None,
319
+ container_resources: k8s.V1ResourceRequirements | None = None,
320
+ affinity: k8s.V1Affinity | None = None,
321
+ config_file: str | None = None,
322
+ node_selector: dict | None = None,
323
+ image_pull_secrets: list[k8s.V1LocalObjectReference] | None = None,
324
+ service_account_name: str | None = None,
325
+ automount_service_account_token: bool | None = None,
326
+ hostnetwork: bool = False,
327
+ host_aliases: list[k8s.V1HostAlias] | None = None,
328
+ tolerations: list[k8s.V1Toleration] | None = None,
329
+ security_context: k8s.V1PodSecurityContext | dict | None = None,
330
+ container_security_context: k8s.V1SecurityContext | dict | None = None,
331
+ dnspolicy: str | None = None,
332
+ dns_config: k8s.V1PodDNSConfig | None = None,
333
+ hostname: str | None = None,
334
+ subdomain: str | None = None,
335
+ schedulername: str | None = None,
336
+ full_pod_spec: k8s.V1Pod | None = None,
337
+ init_containers: list[k8s.V1Container] | None = None,
338
+ log_events_on_failure: bool = False,
339
+ do_xcom_push: bool = False,
340
+ pod_template_file: str | None = None,
341
+ pod_template_dict: dict | None = None,
342
+ priority_class_name: str | None = None,
343
+ pod_runtime_info_envs: list[k8s.V1EnvVar] | None = None,
344
+ termination_grace_period: int | None = None,
345
+ configmaps: list[str] | None = None,
346
+ skip_on_exit_code: int | Container[int] | None = None,
347
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
348
+ poll_interval: float = 2,
349
+ log_pod_spec_on_failure: bool = True,
350
+ on_finish_action: str = "delete_pod",
351
+ is_delete_operator_pod: None | bool = None,
352
+ termination_message_policy: str = "File",
353
+ active_deadline_seconds: int | None = None,
354
+ callbacks: (
355
+ list[type[KubernetesPodOperatorCallback]] | type[KubernetesPodOperatorCallback] | None
356
+ ) = None,
357
+ progress_callback: Callable[[str], None] | None = None,
358
+ logging_interval: int | None = None,
359
+ trigger_kwargs: dict | None = None,
360
+ container_name_log_prefix_enabled: bool = True,
361
+ log_formatter: Callable[[str, str], str] | None = None,
362
+ **kwargs,
363
+ ) -> None:
364
+ super().__init__(**kwargs)
365
+ self.kubernetes_conn_id = kubernetes_conn_id
366
+ self.do_xcom_push = do_xcom_push
367
+ self.image = image
368
+ self.namespace = namespace
369
+ self.cmds = cmds or []
370
+ self.arguments = arguments or []
371
+ self.labels = labels or {}
372
+ self.startup_timeout_seconds = startup_timeout_seconds
373
+ self.startup_check_interval_seconds = startup_check_interval_seconds
374
+ # New parameter startup_timeout_seconds adds breaking change, to handle this as smooth as possible just reuse startup time
375
+ self.schedule_timeout_seconds = schedule_timeout_seconds or startup_timeout_seconds
376
+ env_vars = convert_env_vars(env_vars) if env_vars else []
377
+ self.env_vars = env_vars
378
+ pod_runtime_info_envs = (
379
+ [convert_pod_runtime_info_env(p) for p in pod_runtime_info_envs] if pod_runtime_info_envs else []
380
+ )
381
+ self.pod_runtime_info_envs = pod_runtime_info_envs
382
+ self.env_from = env_from or []
383
+ if configmaps:
384
+ self.env_from.extend([convert_configmap(c) for c in configmaps])
385
+ self.ports = [convert_port(p) for p in ports] if ports else []
386
+ volume_mounts = [convert_volume_mount(v) for v in volume_mounts] if volume_mounts else []
387
+ self.volume_mounts = volume_mounts
388
+ volumes = [convert_volume(volume) for volume in volumes] if volumes else []
389
+ self.volumes = volumes
390
+ self.secrets = secrets or []
391
+ self.in_cluster = in_cluster
392
+ self.cluster_context = cluster_context
393
+ self.reattach_on_restart = reattach_on_restart
394
+ self.get_logs = get_logs
395
+ # Fallback to the class variable BASE_CONTAINER_NAME here instead of via default argument value
396
+ # in the init method signature, to be compatible with subclasses overloading the class variable value.
397
+ self.base_container_name = base_container_name or self.BASE_CONTAINER_NAME
398
+ self.base_container_status_polling_interval = base_container_status_polling_interval
399
+ self.init_container_logs = init_container_logs
400
+ self.container_logs = container_logs or self.base_container_name
401
+ self.image_pull_policy = image_pull_policy
402
+ self.node_selector = node_selector or {}
403
+ self.annotations = annotations or {}
404
+ self.affinity = convert_affinity(affinity) if affinity else {}
405
+ self.container_resources = container_resources
406
+ self.config_file = config_file
407
+ self.image_pull_secrets = convert_image_pull_secrets(image_pull_secrets) if image_pull_secrets else []
408
+ self.service_account_name = service_account_name
409
+ self.automount_service_account_token = automount_service_account_token
410
+ self.hostnetwork = hostnetwork
411
+ self.host_aliases = host_aliases
412
+ self.tolerations = (
413
+ [convert_toleration(toleration) for toleration in tolerations] if tolerations else []
414
+ )
415
+ self.security_context = security_context or {}
416
+ self.container_security_context = container_security_context
417
+ self.dnspolicy = dnspolicy
418
+ self.dns_config = dns_config
419
+ self.hostname = hostname
420
+ self.subdomain = subdomain
421
+ self.schedulername = schedulername
422
+ self.full_pod_spec = full_pod_spec
423
+ self.init_containers = init_containers or []
424
+ self.log_events_on_failure = log_events_on_failure
425
+ self.priority_class_name = priority_class_name
426
+ self.pod_template_file = pod_template_file
427
+ self.pod_template_dict = pod_template_dict
428
+ self.name = name
429
+ self.random_name_suffix = random_name_suffix
430
+ self.termination_grace_period = termination_grace_period
431
+ self.pod_request_obj: k8s.V1Pod | None = None
432
+ self.pod: k8s.V1Pod | None = None
433
+ self.skip_on_exit_code = (
434
+ skip_on_exit_code
435
+ if isinstance(skip_on_exit_code, Container)
436
+ else [skip_on_exit_code]
437
+ if skip_on_exit_code is not None
438
+ else []
439
+ )
440
+ self.deferrable = deferrable
441
+ self.poll_interval = poll_interval
442
+ self.remote_pod: k8s.V1Pod | None = None
443
+ self.log_pod_spec_on_failure = log_pod_spec_on_failure
444
+ self.on_finish_action = OnFinishAction(on_finish_action)
445
+ # The `is_delete_operator_pod` parameter should have been removed in provider version 10.0.0.
446
+ # TODO: remove it from here and from the operator's parameters list when the next major version bumped
447
+ self._is_delete_operator_pod = self.on_finish_action == OnFinishAction.DELETE_POD
448
+ self.termination_message_policy = termination_message_policy
449
+ self.active_deadline_seconds = active_deadline_seconds
450
+ self.logging_interval = logging_interval
451
+ self.trigger_kwargs = trigger_kwargs
452
+
453
+ self._config_dict: dict | None = None # TODO: remove it when removing convert_config_file_to_dict
454
+ self._progress_callback = progress_callback
455
+ self.callbacks = [] if not callbacks else callbacks if isinstance(callbacks, list) else [callbacks]
456
+ self._killed: bool = False
457
+ self.container_name_log_prefix_enabled = container_name_log_prefix_enabled
458
+ self.log_formatter = log_formatter
459
+
460
+ @cached_property
461
+ def _incluster_namespace(self):
462
+ from pathlib import Path
463
+
464
+ path = Path("/var/run/secrets/kubernetes.io/serviceaccount/namespace")
465
+ return path.exists() and path.read_text() or None
466
+
467
+ def _render_nested_template_fields(
468
+ self,
469
+ content: Any,
470
+ context: Context,
471
+ jinja_env: jinja2.Environment,
472
+ seen_oids: set,
473
+ ) -> None:
474
+ if id(content) not in seen_oids:
475
+ template_fields: tuple | None
476
+
477
+ if isinstance(content, k8s.V1EnvVar):
478
+ template_fields = ("value", "name")
479
+ elif isinstance(content, k8s.V1ResourceRequirements):
480
+ template_fields = ("limits", "requests")
481
+ elif isinstance(content, k8s.V1Volume):
482
+ template_fields = ("name", "persistent_volume_claim", "config_map")
483
+ elif isinstance(content, k8s.V1VolumeMount):
484
+ template_fields = ("name", "sub_path")
485
+ elif isinstance(content, k8s.V1PersistentVolumeClaimVolumeSource):
486
+ template_fields = ("claim_name",)
487
+ elif isinstance(content, k8s.V1ConfigMapVolumeSource):
488
+ template_fields = ("name",)
489
+ elif isinstance(content, k8s.V1EnvFromSource):
490
+ template_fields = ("config_map_ref",)
491
+ elif isinstance(content, k8s.V1ConfigMapEnvSource):
492
+ template_fields = ("name",)
493
+ else:
494
+ template_fields = None
495
+
496
+ if template_fields:
497
+ seen_oids.add(id(content))
498
+ self._do_render_template_fields(content, template_fields, context, jinja_env, seen_oids)
499
+ return
500
+
501
+ super()._render_nested_template_fields(content, context, jinja_env, seen_oids)
502
+
503
+ @staticmethod
504
+ def _get_ti_pod_labels(context: Context | None = None, include_try_number: bool = True) -> dict[str, str]:
505
+ """
506
+ Generate labels for the pod to track the pod in case of Operator crash.
507
+
508
+ :param context: task context provided by airflow DAG.
509
+ :param include_try_number: if set to True will add the try number
510
+ from the task context to the pod labels.
511
+ :return: dict
512
+ """
513
+ if not context:
514
+ return {}
515
+
516
+ ti = context["ti"]
517
+ run_id = context["run_id"]
518
+
519
+ labels = {
520
+ "dag_id": ti.dag_id,
521
+ "task_id": ti.task_id,
522
+ "run_id": run_id,
523
+ "kubernetes_pod_operator": "True",
524
+ }
525
+
526
+ map_index = ti.map_index
527
+ if map_index is not None and map_index >= 0:
528
+ labels["map_index"] = str(map_index)
529
+
530
+ if include_try_number:
531
+ labels.update(try_number=str(ti.try_number))
532
+ # In the case of sub dags this is just useful
533
+ # TODO: Remove this when the minimum version of Airflow is bumped to 3.0
534
+ if getattr(context["dag"], "parent_dag", False):
535
+ labels["parent_dag_id"] = context["dag"].parent_dag.dag_id # type: ignore[attr-defined]
536
+
537
+ # Ensure that label is valid for Kube,
538
+ # and if not truncate/remove invalid chars and replace with short hash.
539
+ for label_id, label in labels.items():
540
+ safe_label = pod_generator.make_safe_label_value(str(label))
541
+ labels[label_id] = safe_label
542
+ return labels
543
+
544
+ @cached_property
545
+ def pod_manager(self) -> PodManager:
546
+ return PodManager(kube_client=self.client, callbacks=self.callbacks)
547
+
548
+ @cached_property
549
+ def hook(self) -> PodOperatorHookProtocol:
550
+ hook = KubernetesHook(
551
+ conn_id=self.kubernetes_conn_id,
552
+ in_cluster=self.in_cluster,
553
+ config_file=self.config_file,
554
+ cluster_context=self.cluster_context,
555
+ )
556
+ return hook
557
+
558
+ @cached_property
559
+ def client(self) -> CoreV1Api:
560
+ client = self.hook.core_v1_client
561
+
562
+ for callback in self.callbacks:
563
+ callback.on_sync_client_creation(client=client, operator=self)
564
+ return client
565
+
566
+ def find_pod(self, namespace: str, context: Context, *, exclude_checked: bool = True) -> k8s.V1Pod | None:
567
+ """Return an already-running pod for this task instance if one exists."""
568
+ label_selector = self._build_find_pod_label_selector(context, exclude_checked=exclude_checked)
569
+ pod_list = self.client.list_namespaced_pod(
570
+ namespace=namespace,
571
+ label_selector=label_selector,
572
+ ).items
573
+
574
+ pod = None
575
+ num_pods = len(pod_list)
576
+
577
+ if num_pods == 1:
578
+ pod = pod_list[0]
579
+ self.log_matching_pod(pod=pod, context=context)
580
+ elif num_pods > 1:
581
+ if self.reattach_on_restart:
582
+ raise AirflowException(f"More than one pod running with labels {label_selector}")
583
+ self.log.warning("Found more than one pod running with labels %s, resolving ...", label_selector)
584
+ pod = self.process_duplicate_label_pods(pod_list)
585
+ self.log_matching_pod(pod=pod, context=context)
586
+
587
+ return pod
588
+
589
+ def log_matching_pod(self, pod: k8s.V1Pod, context: Context) -> None:
590
+ self.log.info("Found matching pod %s with labels %s", pod.metadata.name, pod.metadata.labels)
591
+ self.log.info("`try_number` of task_instance: %s", context["ti"].try_number)
592
+ self.log.info("`try_number` of pod: %s", pod.metadata.labels["try_number"])
593
+
594
+ def get_or_create_pod(self, pod_request_obj: k8s.V1Pod, context: Context) -> k8s.V1Pod:
595
+ if self.reattach_on_restart:
596
+ pod = self.find_pod(pod_request_obj.metadata.namespace, context=context)
597
+ if pod:
598
+ # If pod is terminated then delete the pod an create a new as not possible to get xcom
599
+ pod_phase = pod.status.phase if pod.status and pod.status.phase else None
600
+ pod_reason = pod.status.reason.lower() if pod.status and pod.status.reason else ""
601
+ if pod_phase not in (PodPhase.SUCCEEDED, PodPhase.FAILED) and pod_reason != "evicted":
602
+ self.log.info(
603
+ "Reusing existing pod '%s' (phase=%s, reason=%s) since it is not terminated or evicted.",
604
+ pod.metadata.name,
605
+ pod_phase,
606
+ pod_reason,
607
+ )
608
+ return pod
609
+
610
+ self.log.info(
611
+ "Found terminated old matching pod %s with labels %s",
612
+ pod.metadata.name,
613
+ pod.metadata.labels,
614
+ )
615
+
616
+ # if not required to delete the pod then keep old logic and not automatically create new pod
617
+ deleted_pod = self.process_pod_deletion(pod)
618
+ if not deleted_pod:
619
+ return pod
620
+
621
+ self.log.info("Deleted pod to handle rerun and create new pod!")
622
+
623
+ self.log.debug("Starting pod:\n%s", yaml.safe_dump(pod_request_obj.to_dict()))
624
+ self.pod_manager.create_pod(pod=pod_request_obj)
625
+ return pod_request_obj
626
+
627
+ def await_pod_start(self, pod: k8s.V1Pod) -> None:
628
+ try:
629
+
630
+ async def _await_pod_start():
631
+ events_task = self.pod_manager.watch_pod_events(pod, self.startup_check_interval_seconds)
632
+ pod_start_task = self.pod_manager.await_pod_start(
633
+ pod=pod,
634
+ schedule_timeout=self.schedule_timeout_seconds,
635
+ startup_timeout=self.startup_timeout_seconds,
636
+ check_interval=self.startup_check_interval_seconds,
637
+ )
638
+ await asyncio.gather(pod_start_task, events_task)
639
+
640
+ asyncio.run(_await_pod_start())
641
+ except PodLaunchFailedException:
642
+ if self.log_events_on_failure:
643
+ self._read_pod_container_states(pod, reraise=False)
644
+ self._read_pod_events(pod, reraise=False)
645
+ raise
646
+
647
+ def extract_xcom(self, pod: k8s.V1Pod) -> dict[Any, Any] | None:
648
+ """Retrieve xcom value and kill xcom sidecar container."""
649
+ result = self.pod_manager.extract_xcom(pod)
650
+ if isinstance(result, str) and result.rstrip() == EMPTY_XCOM_RESULT:
651
+ self.log.info("xcom result file is empty.")
652
+ return None
653
+
654
+ self.log.debug("xcom result: \n%s", result)
655
+ return json.loads(result)
656
+
657
+ def execute(self, context: Context):
658
+ """Based on the deferrable parameter runs the pod asynchronously or synchronously."""
659
+ self.name = self._set_name(self.name)
660
+ if not self.deferrable:
661
+ return self.execute_sync(context)
662
+
663
+ self.execute_async(context)
664
+
665
+ def execute_sync(self, context: Context):
666
+ result = None
667
+ try:
668
+ if self.pod_request_obj is None:
669
+ self.pod_request_obj = self.build_pod_request_obj(context)
670
+ for callback in self.callbacks:
671
+ callback.on_pod_manifest_created(
672
+ pod_request=self.pod_request_obj,
673
+ client=self.client,
674
+ mode=ExecutionMode.SYNC,
675
+ context=context,
676
+ operator=self,
677
+ )
678
+ if self.pod is None:
679
+ self.pod = self.get_or_create_pod( # must set `self.pod` for `on_kill`
680
+ pod_request_obj=self.pod_request_obj,
681
+ context=context,
682
+ )
683
+ # push to xcom now so that if there is an error we still have the values
684
+ ti = context["ti"]
685
+ ti.xcom_push(key="pod_name", value=self.pod.metadata.name)
686
+ ti.xcom_push(key="pod_namespace", value=self.pod.metadata.namespace)
687
+
688
+ # get remote pod for use in cleanup methods
689
+ self.remote_pod = self.find_pod(self.pod.metadata.namespace, context=context)
690
+ for callback in self.callbacks:
691
+ callback.on_pod_creation(
692
+ pod=self.remote_pod,
693
+ client=self.client,
694
+ mode=ExecutionMode.SYNC,
695
+ context=context,
696
+ operator=self,
697
+ )
698
+
699
+ self.await_init_containers_completion(pod=self.pod)
700
+
701
+ self.await_pod_start(pod=self.pod)
702
+ if self.callbacks:
703
+ pod = self.find_pod(self.pod.metadata.namespace, context=context)
704
+ for callback in self.callbacks:
705
+ callback.on_pod_starting(
706
+ pod=pod,
707
+ client=self.client,
708
+ mode=ExecutionMode.SYNC,
709
+ context=context,
710
+ operator=self,
711
+ )
712
+
713
+ self.await_pod_completion(pod=self.pod)
714
+ if self.callbacks:
715
+ pod = self.find_pod(self.pod.metadata.namespace, context=context)
716
+ for callback in self.callbacks:
717
+ callback.on_pod_completion(
718
+ pod=pod,
719
+ client=self.client,
720
+ mode=ExecutionMode.SYNC,
721
+ context=context,
722
+ operator=self,
723
+ )
724
+ for callback in self.callbacks:
725
+ callback.on_pod_teardown(
726
+ pod=pod,
727
+ client=self.client,
728
+ mode=ExecutionMode.SYNC,
729
+ context=context,
730
+ operator=self,
731
+ )
732
+
733
+ if self.do_xcom_push:
734
+ self.pod_manager.await_xcom_sidecar_container_start(pod=self.pod)
735
+ result = self.extract_xcom(pod=self.pod)
736
+ istio_enabled = self.is_istio_enabled(self.pod)
737
+ self.remote_pod = self.pod_manager.await_pod_completion(
738
+ self.pod, istio_enabled, self.base_container_name
739
+ )
740
+ finally:
741
+ pod_to_clean = self.pod or self.pod_request_obj
742
+ self.post_complete_action(
743
+ pod=pod_to_clean, remote_pod=self.remote_pod, context=context, result=result
744
+ )
745
+
746
+ if self.do_xcom_push:
747
+ return result
748
+
749
+ @tenacity.retry(
750
+ wait=tenacity.wait_exponential(max=15),
751
+ retry=tenacity.retry_if_exception_type(PodCredentialsExpiredFailure),
752
+ reraise=True,
753
+ )
754
+ def await_init_containers_completion(self, pod: k8s.V1Pod):
755
+ try:
756
+ if self.init_container_logs:
757
+ self.pod_manager.fetch_requested_init_container_logs(
758
+ pod=pod,
759
+ init_containers=self.init_container_logs,
760
+ follow_logs=True,
761
+ container_name_log_prefix_enabled=self.container_name_log_prefix_enabled,
762
+ log_formatter=self.log_formatter,
763
+ )
764
+ except kubernetes.client.exceptions.ApiException as exc:
765
+ self._handle_api_exception(exc, pod)
766
+
767
+ @tenacity.retry(
768
+ wait=tenacity.wait_exponential(max=15),
769
+ retry=tenacity.retry_if_exception_type(PodCredentialsExpiredFailure),
770
+ reraise=True,
771
+ )
772
+ def await_pod_completion(self, pod: k8s.V1Pod):
773
+ try:
774
+ if self.get_logs:
775
+ self.pod_manager.fetch_requested_container_logs(
776
+ pod=pod,
777
+ containers=self.container_logs,
778
+ follow_logs=True,
779
+ container_name_log_prefix_enabled=self.container_name_log_prefix_enabled,
780
+ log_formatter=self.log_formatter,
781
+ )
782
+ if not self.get_logs or (
783
+ self.container_logs is not True and self.base_container_name not in self.container_logs
784
+ ):
785
+ self.pod_manager.await_container_completion(
786
+ pod=pod,
787
+ container_name=self.base_container_name,
788
+ polling_time=self.base_container_status_polling_interval,
789
+ )
790
+ except kubernetes.client.exceptions.ApiException as exc:
791
+ self._handle_api_exception(exc, pod)
792
+
793
+ def _handle_api_exception(
794
+ self,
795
+ exc: kubernetes.client.exceptions.ApiException,
796
+ pod: k8s.V1Pod,
797
+ ):
798
+ if exc.status and str(exc.status) == "401":
799
+ self.log.warning(
800
+ "Failed to check container status due to permission error. Refreshing credentials and retrying."
801
+ )
802
+ self._refresh_cached_properties()
803
+ self.pod_manager.read_pod(pod=pod) # attempt using refreshed credentials, raises if still invalid
804
+ raise PodCredentialsExpiredFailure("Kubernetes credentials expired, retrying after refresh.")
805
+ raise exc
806
+
807
+ def _refresh_cached_properties(self):
808
+ del self.hook
809
+ del self.client
810
+ del self.pod_manager
811
+
812
+ def execute_async(self, context: Context) -> None:
813
+ if self.pod_request_obj is None:
814
+ self.pod_request_obj = self.build_pod_request_obj(context)
815
+ for callback in self.callbacks:
816
+ callback.on_pod_manifest_created(
817
+ pod_request=self.pod_request_obj,
818
+ client=self.client,
819
+ mode=ExecutionMode.SYNC,
820
+ context=context,
821
+ operator=self,
822
+ )
823
+ if self.pod is None:
824
+ self.pod = self.get_or_create_pod( # must set `self.pod` for `on_kill`
825
+ pod_request_obj=self.pod_request_obj,
826
+ context=context,
827
+ )
828
+
829
+ if self.callbacks:
830
+ pod = self.find_pod(self.pod.metadata.namespace, context=context)
831
+ for callback in self.callbacks:
832
+ callback.on_pod_creation(
833
+ pod=pod,
834
+ client=self.client,
835
+ mode=ExecutionMode.SYNC,
836
+ context=context,
837
+ operator=self,
838
+ )
839
+ ti = context["ti"]
840
+ ti.xcom_push(key="pod_name", value=self.pod.metadata.name)
841
+ ti.xcom_push(key="pod_namespace", value=self.pod.metadata.namespace)
842
+
843
+ self.invoke_defer_method()
844
+
845
+ def convert_config_file_to_dict(self):
846
+ """Convert passed config_file to dict representation."""
847
+ config_file = self.config_file if self.config_file else os.environ.get(KUBE_CONFIG_ENV_VAR)
848
+ if config_file:
849
+ with open(config_file) as f:
850
+ self._config_dict = yaml.safe_load(f)
851
+ else:
852
+ self._config_dict = None
853
+
854
+ def invoke_defer_method(self, last_log_time: DateTime | None = None) -> None:
855
+ """Redefine triggers which are being used in child classes."""
856
+ self.convert_config_file_to_dict()
857
+ trigger_start_time = datetime.datetime.now(tz=datetime.timezone.utc)
858
+ self.defer(
859
+ trigger=KubernetesPodTrigger(
860
+ pod_name=self.pod.metadata.name, # type: ignore[union-attr]
861
+ pod_namespace=self.pod.metadata.namespace, # type: ignore[union-attr]
862
+ trigger_start_time=trigger_start_time,
863
+ kubernetes_conn_id=self.kubernetes_conn_id,
864
+ cluster_context=self.cluster_context,
865
+ config_dict=self._config_dict,
866
+ in_cluster=self.in_cluster,
867
+ poll_interval=self.poll_interval,
868
+ get_logs=self.get_logs,
869
+ startup_timeout=self.startup_timeout_seconds,
870
+ startup_check_interval=self.startup_check_interval_seconds,
871
+ schedule_timeout=self.schedule_timeout_seconds,
872
+ base_container_name=self.base_container_name,
873
+ on_finish_action=self.on_finish_action.value,
874
+ last_log_time=last_log_time,
875
+ logging_interval=self.logging_interval,
876
+ trigger_kwargs=self.trigger_kwargs,
877
+ ),
878
+ method_name="trigger_reentry",
879
+ )
880
+
881
+ def trigger_reentry(self, context: Context, event: dict[str, Any]) -> Any:
882
+ """
883
+ Point of re-entry from trigger.
884
+
885
+ If ``logging_interval`` is None, then at this point, the pod should be done, and we'll just fetch
886
+ the logs and exit.
887
+
888
+ If ``logging_interval`` is not None, it could be that the pod is still running, and we'll just
889
+ grab the latest logs and defer back to the trigger again.
890
+ """
891
+ self.pod = None
892
+ xcom_sidecar_output = None
893
+ try:
894
+ pod_name = event["name"]
895
+ pod_namespace = event["namespace"]
896
+
897
+ self.pod = self.hook.get_pod(pod_name, pod_namespace)
898
+
899
+ if not self.pod:
900
+ raise PodNotFoundException("Could not find pod after resuming from deferral")
901
+
902
+ follow = self.logging_interval is None
903
+ last_log_time = event.get("last_log_time")
904
+
905
+ if event["status"] in ("error", "failed", "timeout", "success"):
906
+ if self.get_logs:
907
+ self._write_logs(self.pod, follow=follow, since_time=last_log_time)
908
+
909
+ for callback in self.callbacks:
910
+ callback.on_pod_completion(
911
+ pod=self.pod,
912
+ client=self.client,
913
+ mode=ExecutionMode.SYNC,
914
+ context=context,
915
+ operator=self,
916
+ )
917
+ for callback in self.callbacks:
918
+ callback.on_pod_teardown(
919
+ pod=self.pod,
920
+ client=self.client,
921
+ mode=ExecutionMode.SYNC,
922
+ context=context,
923
+ operator=self,
924
+ )
925
+
926
+ xcom_sidecar_output = self.extract_xcom(pod=self.pod) if self.do_xcom_push else None
927
+
928
+ if event["status"] != "success":
929
+ self.log.error(
930
+ "Trigger emitted an %s event, failing the task: %s", event["status"], event["message"]
931
+ )
932
+ message = event.get("stack_trace", event["message"])
933
+ raise AirflowException(message)
934
+ except TaskDeferred:
935
+ raise
936
+ finally:
937
+ self._clean(event=event, context=context, result=xcom_sidecar_output)
938
+
939
+ def _clean(self, event: dict[str, Any], result: dict | None, context: Context) -> None:
940
+ if self.pod is None:
941
+ return
942
+
943
+ istio_enabled = self.is_istio_enabled(self.pod)
944
+ # Skip await_pod_completion when the event is 'timeout' due to the pod can hang
945
+ # on the ErrImagePull or ContainerCreating step and it will never complete
946
+ if event["status"] != "timeout":
947
+ try:
948
+ self.pod = self.pod_manager.await_pod_completion(
949
+ self.pod, istio_enabled, self.base_container_name
950
+ )
951
+ except ApiException as e:
952
+ if e.status == 404:
953
+ self.pod = None
954
+ self.log.warning(
955
+ "Pod not found while waiting for completion. The last status was %r", event["status"]
956
+ )
957
+ else:
958
+ raise e
959
+ if self.pod is not None:
960
+ self.post_complete_action(
961
+ pod=self.pod,
962
+ remote_pod=self.pod,
963
+ context=context,
964
+ result=result,
965
+ )
966
+
967
+ def _write_logs(self, pod: k8s.V1Pod, follow: bool = False, since_time: DateTime | None = None) -> None:
968
+ try:
969
+ since_seconds = (
970
+ math.ceil((datetime.datetime.now(tz=datetime.timezone.utc) - since_time).total_seconds())
971
+ if since_time
972
+ else None
973
+ )
974
+ logs = self.client.read_namespaced_pod_log(
975
+ name=pod.metadata.name,
976
+ namespace=pod.metadata.namespace,
977
+ container=self.base_container_name,
978
+ follow=follow,
979
+ timestamps=False,
980
+ since_seconds=since_seconds,
981
+ _preload_content=False,
982
+ )
983
+ for raw_line in logs:
984
+ line = raw_line.decode("utf-8", errors="backslashreplace").rstrip("\n")
985
+ if line:
986
+ self.log.info("[%s] logs: %s", self.base_container_name, line)
987
+ except (HTTPError, ApiException) as e:
988
+ self.log.warning(
989
+ "Reading of logs interrupted with error %r; will retry. "
990
+ "Set log level to DEBUG for traceback.",
991
+ e if not isinstance(e, ApiException) else e.reason,
992
+ )
993
+
994
+ def post_complete_action(
995
+ self, *, pod: k8s.V1Pod, remote_pod: k8s.V1Pod, context: Context, result: dict | None, **kwargs
996
+ ) -> None:
997
+ """Actions that must be done after operator finishes logic of the deferrable_execution."""
998
+ self.cleanup(
999
+ pod=pod,
1000
+ remote_pod=remote_pod,
1001
+ xcom_result=result,
1002
+ context=context,
1003
+ )
1004
+ for callback in self.callbacks:
1005
+ callback.on_pod_cleanup(
1006
+ pod=pod, client=self.client, mode=ExecutionMode.SYNC, operator=self, context=context
1007
+ )
1008
+
1009
+ def cleanup(
1010
+ self,
1011
+ pod: k8s.V1Pod,
1012
+ remote_pod: k8s.V1Pod,
1013
+ xcom_result: dict | None = None,
1014
+ context: Context | None = None,
1015
+ ) -> None:
1016
+ # Skip cleaning the pod in the following scenarios.
1017
+ # 1. If a task got marked as failed, "on_kill" method would be called and the pod will be cleaned up
1018
+ # there. Cleaning it up again will raise an exception (which might cause retry).
1019
+ # 2. remote pod is null (ex: pod creation failed)
1020
+ if self._killed or not remote_pod:
1021
+ return
1022
+
1023
+ istio_enabled = self.is_istio_enabled(remote_pod)
1024
+ pod_phase = remote_pod.status.phase if hasattr(remote_pod, "status") else None
1025
+
1026
+ # if the pod fails or success, but we don't want to delete it
1027
+ if pod_phase != PodPhase.SUCCEEDED or self.on_finish_action == OnFinishAction.KEEP_POD:
1028
+ self.patch_already_checked(remote_pod, reraise=False)
1029
+
1030
+ failed = (pod_phase != PodPhase.SUCCEEDED and not istio_enabled) or (
1031
+ istio_enabled and not container_is_succeeded(remote_pod, self.base_container_name)
1032
+ )
1033
+
1034
+ if failed:
1035
+ if self.do_xcom_push and xcom_result and context:
1036
+ # Ensure that existing XCom is pushed even in case of failure
1037
+ context["ti"].xcom_push(XCOM_RETURN_KEY, xcom_result)
1038
+
1039
+ if self.log_events_on_failure:
1040
+ self._read_pod_container_states(pod, reraise=False)
1041
+ self._read_pod_events(pod, reraise=False)
1042
+
1043
+ self.process_pod_deletion(remote_pod, reraise=False)
1044
+
1045
+ if self.skip_on_exit_code:
1046
+ container_statuses = (
1047
+ remote_pod.status.container_statuses if remote_pod and remote_pod.status else None
1048
+ ) or []
1049
+ base_container_status = next(
1050
+ (x for x in container_statuses if x.name == self.base_container_name), None
1051
+ )
1052
+ exit_code = (
1053
+ base_container_status.state.terminated.exit_code
1054
+ if base_container_status
1055
+ and base_container_status.state
1056
+ and base_container_status.state.terminated
1057
+ else None
1058
+ )
1059
+ if exit_code in self.skip_on_exit_code:
1060
+ raise AirflowSkipException(
1061
+ f"Pod {pod and pod.metadata.name} returned exit code {exit_code}. Skipping."
1062
+ )
1063
+
1064
+ if failed:
1065
+ error_message = get_container_termination_message(remote_pod, self.base_container_name)
1066
+ raise AirflowException(
1067
+ "\n".join(
1068
+ filter(
1069
+ None,
1070
+ [
1071
+ f"Pod {pod and pod.metadata.name} returned a failure.",
1072
+ error_message if isinstance(error_message, str) else None,
1073
+ f"remote_pod: {remote_pod}" if self.log_pod_spec_on_failure else None,
1074
+ ],
1075
+ )
1076
+ )
1077
+ )
1078
+
1079
+ def _read_pod_events(self, pod, *, reraise=True) -> None:
1080
+ """Will fetch and emit events from pod."""
1081
+ with _optionally_suppress(reraise=reraise):
1082
+ for event in self.pod_manager.read_pod_events(pod).items:
1083
+ if event.type == PodEventType.WARNING.value:
1084
+ self.log.warning("Pod Event: %s - %s", event.reason, event.message)
1085
+ else:
1086
+ # events.k8s.io/v1 at this stage will always be Normal
1087
+ self.log.info("Pod Event: %s - %s", event.reason, event.message)
1088
+
1089
+ def _read_pod_container_states(self, pod, *, reraise=True) -> None:
1090
+ """Log detailed container states of pod for debugging."""
1091
+ with _optionally_suppress(reraise=reraise):
1092
+ remote_pod = self.pod_manager.read_pod(pod)
1093
+ pod_phase = getattr(remote_pod.status, "phase", None)
1094
+ pod_reason = getattr(remote_pod.status, "reason", None)
1095
+ self.log.info("Pod phase: %s, reason: %s", pod_phase, pod_reason)
1096
+
1097
+ container_statuses = getattr(remote_pod.status, "container_statuses", None) or []
1098
+ for status in container_statuses:
1099
+ name = status.name
1100
+ state = status.state
1101
+ if state.terminated:
1102
+ level = self.log.error if state.terminated.exit_code != 0 else self.log.info
1103
+ level(
1104
+ "Container '%s': state='TERMINATED', reason='%s', exit_code=%s, message='%s'",
1105
+ name,
1106
+ state.terminated.reason,
1107
+ state.terminated.exit_code,
1108
+ state.terminated.message,
1109
+ )
1110
+ elif state.waiting:
1111
+ self.log.warning(
1112
+ "Container '%s': state='WAITING', reason='%s', message='%s'",
1113
+ name,
1114
+ state.waiting.reason,
1115
+ state.waiting.message,
1116
+ )
1117
+ elif state.running:
1118
+ self.log.info(
1119
+ "Container '%s': state='RUNNING', started_at=%s",
1120
+ name,
1121
+ state.running.started_at,
1122
+ )
1123
+
1124
+ def is_istio_enabled(self, pod: V1Pod) -> bool:
1125
+ """Check if istio is enabled for the namespace of the pod by inspecting the namespace labels."""
1126
+ if not pod:
1127
+ return False
1128
+
1129
+ remote_pod = self.pod_manager.read_pod(pod)
1130
+
1131
+ return any(container.name == self.ISTIO_CONTAINER_NAME for container in remote_pod.spec.containers)
1132
+
1133
+ def kill_istio_sidecar(self, pod: V1Pod) -> None:
1134
+ command = "/bin/sh -c 'curl -fsI -X POST http://localhost:15020/quitquitquit'"
1135
+ command_to_container = shlex.split(command)
1136
+ resp = stream(
1137
+ self.client.connect_get_namespaced_pod_exec,
1138
+ name=pod.metadata.name,
1139
+ namespace=pod.metadata.namespace,
1140
+ container=self.ISTIO_CONTAINER_NAME,
1141
+ command=command_to_container,
1142
+ stderr=True,
1143
+ stdin=True,
1144
+ stdout=True,
1145
+ tty=False,
1146
+ _preload_content=False,
1147
+ )
1148
+ output = []
1149
+ while resp.is_open():
1150
+ if resp.peek_stdout():
1151
+ output.append(resp.read_stdout())
1152
+
1153
+ resp.close()
1154
+ output_str = "".join(output)
1155
+ self.log.info("Output of curl command to kill istio: %s", output_str)
1156
+ resp.close()
1157
+ if self.KILL_ISTIO_PROXY_SUCCESS_MSG not in output_str:
1158
+ raise AirflowException("Error while deleting istio-proxy sidecar: %s", output_str)
1159
+
1160
+ def process_pod_deletion(self, pod: k8s.V1Pod, *, reraise=True) -> bool:
1161
+ with _optionally_suppress(reraise=reraise):
1162
+ if pod is not None:
1163
+ should_delete_pod = (self.on_finish_action == OnFinishAction.DELETE_POD) or (
1164
+ self.on_finish_action == OnFinishAction.DELETE_SUCCEEDED_POD
1165
+ and (
1166
+ pod.status.phase == PodPhase.SUCCEEDED
1167
+ or container_is_succeeded(pod, self.base_container_name)
1168
+ )
1169
+ )
1170
+ if should_delete_pod:
1171
+ self.log.info("Deleting pod: %s", pod.metadata.name)
1172
+ self.pod_manager.delete_pod(pod)
1173
+ return True
1174
+ self.log.info("Skipping deleting pod: %s", pod.metadata.name)
1175
+
1176
+ return False
1177
+
1178
+ def _build_find_pod_label_selector(self, context: Context | None = None, *, exclude_checked=True) -> str:
1179
+ labels = {
1180
+ **self.labels,
1181
+ **self._get_ti_pod_labels(context, include_try_number=False),
1182
+ }
1183
+ labels = _normalize_labels_dict(labels)
1184
+ label_strings = [f"{label_id}={label}" for label_id, label in sorted(labels.items())]
1185
+ labels_value = ",".join(label_strings)
1186
+ if exclude_checked:
1187
+ labels_value = f"{labels_value},{self.POD_CHECKED_KEY}!=True"
1188
+ labels_value = f"{labels_value},!airflow-worker"
1189
+ return labels_value
1190
+
1191
+ @staticmethod
1192
+ def _set_name(name: str | None) -> str | None:
1193
+ if name is not None:
1194
+ validate_key(name, max_length=220)
1195
+ return re.sub(r"[^a-z0-9-]+", "-", name.lower())
1196
+ return None
1197
+
1198
+ def patch_already_checked(self, pod: k8s.V1Pod, *, reraise=True):
1199
+ """Add an "already checked" label to ensure we don't reattach on retries."""
1200
+ with _optionally_suppress(reraise=reraise):
1201
+ self.client.patch_namespaced_pod(
1202
+ name=pod.metadata.name,
1203
+ namespace=pod.metadata.namespace,
1204
+ body={"metadata": {"labels": {self.POD_CHECKED_KEY: "True"}}},
1205
+ )
1206
+
1207
+ def on_kill(self) -> None:
1208
+ self._killed = True
1209
+ if self.pod:
1210
+ pod = self.pod
1211
+ kwargs = {
1212
+ "name": pod.metadata.name,
1213
+ "namespace": pod.metadata.namespace,
1214
+ }
1215
+ if self.termination_grace_period is not None:
1216
+ kwargs.update(grace_period_seconds=self.termination_grace_period)
1217
+
1218
+ try:
1219
+ self.client.delete_namespaced_pod(**kwargs)
1220
+ except kubernetes.client.exceptions.ApiException:
1221
+ self.log.exception("Unable to delete pod %s", self.pod.metadata.name)
1222
+
1223
+ def build_pod_request_obj(self, context: Context | None = None) -> k8s.V1Pod:
1224
+ """
1225
+ Return V1Pod object based on pod template file, full pod spec, and other operator parameters.
1226
+
1227
+ The V1Pod attributes are derived (in order of precedence) from operator params, full pod spec, pod
1228
+ template file.
1229
+ """
1230
+ self.log.debug("Creating pod for KubernetesPodOperator task %s", self.task_id)
1231
+
1232
+ self.env_vars = convert_env_vars_or_raise_error(self.env_vars) if self.env_vars else []
1233
+ if self.pod_runtime_info_envs:
1234
+ self.env_vars.extend(self.pod_runtime_info_envs)
1235
+
1236
+ if self.pod_template_file:
1237
+ self.log.debug("Pod template file found, will parse for base pod")
1238
+ pod_template = pod_generator.PodGenerator.deserialize_model_file(self.pod_template_file)
1239
+ if self.full_pod_spec:
1240
+ pod_template = PodGenerator.reconcile_pods(pod_template, self.full_pod_spec)
1241
+ elif self.pod_template_dict:
1242
+ self.log.debug("Pod template dict found, will parse for base pod")
1243
+ pod_template = pod_generator.PodGenerator.deserialize_model_dict(self.pod_template_dict)
1244
+ if self.full_pod_spec:
1245
+ pod_template = PodGenerator.reconcile_pods(pod_template, self.full_pod_spec)
1246
+ elif self.full_pod_spec:
1247
+ pod_template = self.full_pod_spec
1248
+ else:
1249
+ pod_template = k8s.V1Pod(metadata=k8s.V1ObjectMeta())
1250
+
1251
+ pod = k8s.V1Pod(
1252
+ api_version="v1",
1253
+ kind="Pod",
1254
+ metadata=k8s.V1ObjectMeta(
1255
+ namespace=self.namespace,
1256
+ labels=_normalize_labels_dict(self.labels),
1257
+ name=self.name,
1258
+ annotations=self.annotations,
1259
+ ),
1260
+ spec=k8s.V1PodSpec(
1261
+ node_selector=self.node_selector,
1262
+ affinity=self.affinity,
1263
+ tolerations=self.tolerations,
1264
+ init_containers=self.init_containers,
1265
+ host_aliases=self.host_aliases,
1266
+ containers=[
1267
+ k8s.V1Container(
1268
+ image=self.image,
1269
+ name=self.base_container_name,
1270
+ command=self.cmds,
1271
+ ports=self.ports,
1272
+ image_pull_policy=self.image_pull_policy,
1273
+ resources=self.container_resources,
1274
+ volume_mounts=self.volume_mounts,
1275
+ args=self.arguments,
1276
+ env=self.env_vars,
1277
+ env_from=self.env_from,
1278
+ security_context=self.container_security_context,
1279
+ termination_message_policy=self.termination_message_policy,
1280
+ )
1281
+ ],
1282
+ image_pull_secrets=self.image_pull_secrets,
1283
+ service_account_name=self.service_account_name,
1284
+ automount_service_account_token=self.automount_service_account_token,
1285
+ host_network=self.hostnetwork,
1286
+ hostname=self.hostname,
1287
+ subdomain=self.subdomain,
1288
+ security_context=self.security_context,
1289
+ dns_policy=self.dnspolicy,
1290
+ dns_config=self.dns_config,
1291
+ scheduler_name=self.schedulername,
1292
+ restart_policy="Never",
1293
+ priority_class_name=self.priority_class_name,
1294
+ volumes=self.volumes,
1295
+ active_deadline_seconds=self.active_deadline_seconds,
1296
+ termination_grace_period_seconds=self.termination_grace_period,
1297
+ ),
1298
+ )
1299
+
1300
+ pod = PodGenerator.reconcile_pods(pod_template, pod)
1301
+
1302
+ if not pod.metadata.name:
1303
+ pod.metadata.name = create_unique_id(
1304
+ task_id=self.task_id, unique=self.random_name_suffix, max_length=POD_NAME_MAX_LENGTH
1305
+ )
1306
+ elif self.random_name_suffix:
1307
+ # user has supplied pod name, we're just adding suffix
1308
+ pod.metadata.name = add_unique_suffix(name=pod.metadata.name)
1309
+
1310
+ if not pod.metadata.namespace:
1311
+ hook_namespace = self.hook.get_namespace()
1312
+ pod_namespace = self.namespace or hook_namespace or self._incluster_namespace or "default"
1313
+ pod.metadata.namespace = pod_namespace
1314
+
1315
+ for secret in self.secrets:
1316
+ self.log.debug("Adding secret to task %s", self.task_id)
1317
+ pod = secret.attach_to_pod(pod)
1318
+ if self.do_xcom_push:
1319
+ self.log.debug("Adding xcom sidecar to task %s", self.task_id)
1320
+ pod = xcom_sidecar.add_xcom_sidecar(
1321
+ pod,
1322
+ sidecar_container_image=self.hook.get_xcom_sidecar_container_image(),
1323
+ sidecar_container_resources=self.hook.get_xcom_sidecar_container_resources(),
1324
+ )
1325
+
1326
+ labels = self._get_ti_pod_labels(context)
1327
+ self.log.info("Building pod %s with labels: %s", pod.metadata.name, labels)
1328
+
1329
+ # Merge Pod Identifying labels with labels passed to operator
1330
+ pod.metadata.labels.update(labels)
1331
+ # Add Airflow Version to the label
1332
+ # And a label to identify that pod is launched by KubernetesPodOperator
1333
+ pod.metadata.labels.update(
1334
+ {
1335
+ "airflow_version": airflow_version.replace("+", "-"),
1336
+ "airflow_kpo_in_cluster": str(self.hook.is_in_cluster),
1337
+ }
1338
+ )
1339
+ pod_mutation_hook(pod)
1340
+ return pod
1341
+
1342
+ def dry_run(self) -> None:
1343
+ """
1344
+ Print out the pod definition that would be created by this operator.
1345
+
1346
+ Does not include labels specific to the task instance (since there isn't
1347
+ one in a dry_run) and excludes all empty elements.
1348
+ """
1349
+ pod = self.build_pod_request_obj()
1350
+ print(yaml.dump(prune_dict(pod.to_dict(), mode="strict")))
1351
+
1352
+ def process_duplicate_label_pods(self, pod_list: list[k8s.V1Pod]) -> k8s.V1Pod:
1353
+ """
1354
+ Patch or delete the existing pod with duplicate labels.
1355
+
1356
+ This is to handle an edge case that can happen only if reattach_on_restart
1357
+ flag is False, and the previous run attempt has failed because the task
1358
+ process has been killed externally by the cluster or another process.
1359
+
1360
+ If the task process is killed externally, it breaks the code execution and
1361
+ immediately exists the task. As a result the pod created in the previous attempt
1362
+ will not be properly deleted or patched by cleanup() method.
1363
+
1364
+ Return the newly created pod to be used for the next run attempt.
1365
+ """
1366
+ new_pod = pod_list.pop(self._get_most_recent_pod_index(pod_list))
1367
+ old_pod = pod_list[0]
1368
+ self.patch_already_checked(old_pod, reraise=False)
1369
+ if self.on_finish_action == OnFinishAction.DELETE_POD:
1370
+ self.process_pod_deletion(old_pod)
1371
+ return new_pod
1372
+
1373
+ @staticmethod
1374
+ def _get_most_recent_pod_index(pod_list: list[k8s.V1Pod]) -> int:
1375
+ """Loop through a list of V1Pod objects and get the index of the most recent one."""
1376
+ pod_start_times: list[datetime.datetime] = [
1377
+ pod.to_dict().get("status").get("start_time") for pod in pod_list
1378
+ ]
1379
+ most_recent_start_time = max(pod_start_times)
1380
+ return pod_start_times.index(most_recent_start_time)
1381
+
1382
+
1383
+ class _optionally_suppress(AbstractContextManager):
1384
+ """
1385
+ Returns context manager that will swallow and log exceptions.
1386
+
1387
+ By default swallows descendents of Exception, but you can provide other classes through
1388
+ the vararg ``exceptions``.
1389
+
1390
+ Suppression behavior can be disabled with reraise=True.
1391
+
1392
+ :meta private:
1393
+ """
1394
+
1395
+ def __init__(self, *exceptions, reraise: bool = False) -> None:
1396
+ self._exceptions = exceptions or (Exception,)
1397
+ self.reraise = reraise
1398
+ self.exception = None
1399
+
1400
+ def __enter__(self):
1401
+ return self
1402
+
1403
+ def __exit__(self, exctype, excinst, exctb) -> bool:
1404
+ error = exctype is not None
1405
+ matching_error = error and issubclass(exctype, self._exceptions)
1406
+ if (error and not matching_error) or (matching_error and self.reraise):
1407
+ return False
1408
+ if matching_error:
1409
+ self.exception = excinst
1410
+ logger = logging.getLogger(__name__)
1411
+ logger.exception(excinst)
1412
+ return True
1413
+
1414
+
1415
+ def _normalize_labels_dict(labels: dict) -> dict:
1416
+ """Return a copy of the labels dict with all None values replaced by empty strings."""
1417
+ return {k: ("" if v is None else v) for k, v in labels.items()}