apache-airflow-providers-cncf-kubernetes 10.7.0__py3-none-any.whl → 10.8.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/cncf/kubernetes/__init__.py +1 -1
- airflow/providers/cncf/kubernetes/cli/kubernetes_command.py +8 -4
- airflow/providers/cncf/kubernetes/decorators/kubernetes_cmd.py +1 -1
- airflow/providers/cncf/kubernetes/executors/kubernetes_executor.py +80 -30
- airflow/providers/cncf/kubernetes/executors/kubernetes_executor_types.py +50 -10
- airflow/providers/cncf/kubernetes/executors/kubernetes_executor_utils.py +200 -30
- airflow/providers/cncf/kubernetes/operators/kueue.py +17 -2
- airflow/providers/cncf/kubernetes/operators/pod.py +52 -0
- airflow/providers/cncf/kubernetes/utils/pod_manager.py +69 -17
- airflow/providers/cncf/kubernetes/version_compat.py +3 -0
- {apache_airflow_providers_cncf_kubernetes-10.7.0.dist-info → apache_airflow_providers_cncf_kubernetes-10.8.0rc1.dist-info}/METADATA +8 -9
- {apache_airflow_providers_cncf_kubernetes-10.7.0.dist-info → apache_airflow_providers_cncf_kubernetes-10.8.0rc1.dist-info}/RECORD +14 -14
- {apache_airflow_providers_cncf_kubernetes-10.7.0.dist-info → apache_airflow_providers_cncf_kubernetes-10.8.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_cncf_kubernetes-10.7.0.dist-info → apache_airflow_providers_cncf_kubernetes-10.8.0rc1.dist-info}/entry_points.txt +0 -0
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
29
29
|
|
30
30
|
__all__ = ["__version__"]
|
31
31
|
|
32
|
-
__version__ = "10.
|
32
|
+
__version__ = "10.8.0"
|
33
33
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
35
35
|
"2.10.0"
|
@@ -32,12 +32,16 @@ from airflow.providers.cncf.kubernetes.executors.kubernetes_executor import Kube
|
|
32
32
|
from airflow.providers.cncf.kubernetes.kube_client import get_kube_client
|
33
33
|
from airflow.providers.cncf.kubernetes.kubernetes_helper_functions import create_unique_id
|
34
34
|
from airflow.providers.cncf.kubernetes.pod_generator import PodGenerator, generate_pod_command_args
|
35
|
-
from airflow.providers.cncf.kubernetes.version_compat import AIRFLOW_V_3_0_PLUS
|
35
|
+
from airflow.providers.cncf.kubernetes.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_1_PLUS
|
36
36
|
from airflow.utils import cli as cli_utils, yaml
|
37
|
-
from airflow.utils.cli import get_dag
|
38
37
|
from airflow.utils.providers_configuration_loader import providers_configuration_loaded
|
39
38
|
from airflow.utils.types import DagRunType
|
40
39
|
|
40
|
+
if AIRFLOW_V_3_1_PLUS:
|
41
|
+
from airflow.utils.cli import get_bagged_dag
|
42
|
+
else:
|
43
|
+
from airflow.utils.cli import get_dag as get_bagged_dag # type: ignore[attr-defined,no-redef]
|
44
|
+
|
41
45
|
|
42
46
|
@cli_utils.action_cli
|
43
47
|
@providers_configuration_loaded
|
@@ -45,9 +49,9 @@ def generate_pod_yaml(args):
|
|
45
49
|
"""Generate yaml files for each task in the DAG. Used for testing output of KubernetesExecutor."""
|
46
50
|
logical_date = args.logical_date if AIRFLOW_V_3_0_PLUS else args.execution_date
|
47
51
|
if AIRFLOW_V_3_0_PLUS:
|
48
|
-
dag =
|
52
|
+
dag = get_bagged_dag(bundle_names=args.bundle_name, dag_id=args.dag_id)
|
49
53
|
else:
|
50
|
-
dag =
|
54
|
+
dag = get_bagged_dag(subdir=args.subdir, dag_id=args.dag_id)
|
51
55
|
yaml_output_path = args.output_path
|
52
56
|
|
53
57
|
dm = DagModel(dag_id=dag.dag_id)
|
@@ -24,9 +24,9 @@ from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperato
|
|
24
24
|
from airflow.providers.cncf.kubernetes.version_compat import (
|
25
25
|
DecoratedOperator,
|
26
26
|
TaskDecorator,
|
27
|
+
context_merge,
|
27
28
|
task_decorator_factory,
|
28
29
|
)
|
29
|
-
from airflow.utils.context import context_merge
|
30
30
|
from airflow.utils.operator_helpers import determine_kwargs
|
31
31
|
|
32
32
|
if TYPE_CHECKING:
|
@@ -66,6 +66,8 @@ from airflow.providers.cncf.kubernetes.exceptions import PodMutationHookExceptio
|
|
66
66
|
from airflow.providers.cncf.kubernetes.executors.kubernetes_executor_types import (
|
67
67
|
ADOPTED,
|
68
68
|
POD_EXECUTOR_DONE_KEY,
|
69
|
+
KubernetesJob,
|
70
|
+
KubernetesResults,
|
69
71
|
)
|
70
72
|
from airflow.providers.cncf.kubernetes.kube_config import KubeConfig
|
71
73
|
from airflow.providers.cncf.kubernetes.kubernetes_helper_functions import annotations_to_key
|
@@ -85,10 +87,6 @@ if TYPE_CHECKING:
|
|
85
87
|
from airflow.executors import workloads
|
86
88
|
from airflow.models.taskinstance import TaskInstance
|
87
89
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
88
|
-
from airflow.providers.cncf.kubernetes.executors.kubernetes_executor_types import (
|
89
|
-
KubernetesJobType,
|
90
|
-
KubernetesResultsType,
|
91
|
-
)
|
92
90
|
from airflow.providers.cncf.kubernetes.executors.kubernetes_executor_utils import (
|
93
91
|
AirflowKubernetesScheduler,
|
94
92
|
)
|
@@ -156,8 +154,8 @@ class KubernetesExecutor(BaseExecutor):
|
|
156
154
|
def __init__(self):
|
157
155
|
self.kube_config = KubeConfig()
|
158
156
|
self._manager = multiprocessing.Manager()
|
159
|
-
self.task_queue: Queue[
|
160
|
-
self.result_queue: Queue[
|
157
|
+
self.task_queue: Queue[KubernetesJob] = self._manager.Queue()
|
158
|
+
self.result_queue: Queue[KubernetesResults] = self._manager.Queue()
|
161
159
|
self.kube_scheduler: AirflowKubernetesScheduler | None = None
|
162
160
|
self.kube_client: client.CoreV1Api | None = None
|
163
161
|
self.scheduler_job_id: str | None = None
|
@@ -279,7 +277,7 @@ class KubernetesExecutor(BaseExecutor):
|
|
279
277
|
else:
|
280
278
|
pod_template_file = None
|
281
279
|
self.event_buffer[key] = (TaskInstanceState.QUEUED, self.scheduler_job_id)
|
282
|
-
self.task_queue.put((key, command, kube_executor_config, pod_template_file))
|
280
|
+
self.task_queue.put(KubernetesJob(key, command, kube_executor_config, pod_template_file))
|
283
281
|
# We keep a temporary local record that we've handled this so we don't
|
284
282
|
# try and remove it from the QUEUED state while we process it
|
285
283
|
self.last_handled[key] = time.time()
|
@@ -330,17 +328,16 @@ class KubernetesExecutor(BaseExecutor):
|
|
330
328
|
while True:
|
331
329
|
results = self.result_queue.get_nowait()
|
332
330
|
try:
|
333
|
-
|
334
|
-
|
335
|
-
self.log.info("Changing state of %s to %s", results, state)
|
331
|
+
last_resource_version[results.namespace] = results.resource_version
|
332
|
+
self.log.info("Changing state of %s to %s", results, results.state)
|
336
333
|
try:
|
337
|
-
self._change_state(
|
334
|
+
self._change_state(results)
|
338
335
|
except Exception as e:
|
339
336
|
self.log.exception(
|
340
337
|
"Exception: %s when attempting to change state of %s to %s, re-queueing.",
|
341
338
|
e,
|
342
339
|
results,
|
343
|
-
state,
|
340
|
+
results.state,
|
344
341
|
)
|
345
342
|
self.result_queue.put(results)
|
346
343
|
finally:
|
@@ -361,7 +358,7 @@ class KubernetesExecutor(BaseExecutor):
|
|
361
358
|
task = self.task_queue.get_nowait()
|
362
359
|
|
363
360
|
try:
|
364
|
-
key
|
361
|
+
key = task.key
|
365
362
|
self.kube_scheduler.run_next(task)
|
366
363
|
self.task_publish_retries.pop(key, None)
|
367
364
|
except PodReconciliationError as e:
|
@@ -371,31 +368,41 @@ class KubernetesExecutor(BaseExecutor):
|
|
371
368
|
)
|
372
369
|
self.fail(task[0], e)
|
373
370
|
except ApiException as e:
|
374
|
-
|
371
|
+
try:
|
372
|
+
if e.body:
|
373
|
+
body = json.loads(e.body)
|
374
|
+
else:
|
375
|
+
# If no body content, use reason as the message
|
376
|
+
body = {"message": e.reason}
|
377
|
+
except (json.JSONDecodeError, ValueError, TypeError):
|
378
|
+
# If the body is a string (e.g., in a 429 error), it can't be parsed as JSON.
|
379
|
+
# Use the body directly as the message instead.
|
380
|
+
body = {"message": e.body}
|
381
|
+
|
375
382
|
retries = self.task_publish_retries[key]
|
376
|
-
# In case of exceeded quota errors, requeue the task as per the task_publish_max_retries
|
383
|
+
# In case of exceeded quota or conflict errors, requeue the task as per the task_publish_max_retries
|
384
|
+
message = body.get("message", "")
|
377
385
|
if (
|
378
|
-
str(e.status) == "403"
|
379
|
-
and "
|
380
|
-
|
381
|
-
):
|
386
|
+
(str(e.status) == "403" and "exceeded quota" in message)
|
387
|
+
or (str(e.status) == "409" and "object has been modified" in message)
|
388
|
+
) and (self.task_publish_max_retries == -1 or retries < self.task_publish_max_retries):
|
382
389
|
self.log.warning(
|
383
390
|
"[Try %s of %s] Kube ApiException for Task: (%s). Reason: %r. Message: %s",
|
384
391
|
self.task_publish_retries[key] + 1,
|
385
392
|
self.task_publish_max_retries,
|
386
393
|
key,
|
387
394
|
e.reason,
|
388
|
-
|
395
|
+
message,
|
389
396
|
)
|
390
397
|
self.task_queue.put(task)
|
391
398
|
self.task_publish_retries[key] = retries + 1
|
392
399
|
else:
|
393
400
|
self.log.error("Pod creation failed with reason %r. Failing task", e.reason)
|
394
|
-
key
|
401
|
+
key = task.key
|
395
402
|
self.fail(key, e)
|
396
403
|
self.task_publish_retries.pop(key, None)
|
397
404
|
except PodMutationHookException as e:
|
398
|
-
key
|
405
|
+
key = task.key
|
399
406
|
self.log.error(
|
400
407
|
"Pod Mutation Hook failed for the task %s. Failing task. Details: %s",
|
401
408
|
key,
|
@@ -408,15 +415,56 @@ class KubernetesExecutor(BaseExecutor):
|
|
408
415
|
@provide_session
|
409
416
|
def _change_state(
|
410
417
|
self,
|
411
|
-
|
412
|
-
state: TaskInstanceState | str | None,
|
413
|
-
pod_name: str,
|
414
|
-
namespace: str,
|
418
|
+
results: KubernetesResults,
|
415
419
|
session: Session = NEW_SESSION,
|
416
420
|
) -> None:
|
421
|
+
"""Change state of the task based on KubernetesResults."""
|
417
422
|
if TYPE_CHECKING:
|
418
423
|
assert self.kube_scheduler
|
419
424
|
|
425
|
+
key = results.key
|
426
|
+
state = results.state
|
427
|
+
pod_name = results.pod_name
|
428
|
+
namespace = results.namespace
|
429
|
+
failure_details = results.failure_details
|
430
|
+
|
431
|
+
if state == TaskInstanceState.FAILED:
|
432
|
+
# Use pre-collected failure details from the watcher to avoid additional API calls
|
433
|
+
if failure_details:
|
434
|
+
pod_status = failure_details.get("pod_status")
|
435
|
+
pod_reason = failure_details.get("pod_reason")
|
436
|
+
pod_message = failure_details.get("pod_message")
|
437
|
+
container_state = failure_details.get("container_state")
|
438
|
+
container_reason = failure_details.get("container_reason")
|
439
|
+
container_message = failure_details.get("container_message")
|
440
|
+
exit_code = failure_details.get("exit_code")
|
441
|
+
container_type = failure_details.get("container_type")
|
442
|
+
container_name = failure_details.get("container_name")
|
443
|
+
|
444
|
+
task_key_str = f"{key.dag_id}.{key.task_id}.{key.try_number}"
|
445
|
+
self.log.warning(
|
446
|
+
"Task %s failed in pod %s/%s. Pod phase: %s, reason: %s, message: %s, "
|
447
|
+
"container_type: %s, container_name: %s, container_state: %s, container_reason: %s, "
|
448
|
+
"container_message: %s, exit_code: %s",
|
449
|
+
task_key_str,
|
450
|
+
namespace,
|
451
|
+
pod_name,
|
452
|
+
pod_status,
|
453
|
+
pod_reason,
|
454
|
+
pod_message,
|
455
|
+
container_type,
|
456
|
+
container_name,
|
457
|
+
container_state,
|
458
|
+
container_reason,
|
459
|
+
container_message,
|
460
|
+
exit_code,
|
461
|
+
)
|
462
|
+
else:
|
463
|
+
task_key_str = f"{key.dag_id}.{key.task_id}.{key.try_number}"
|
464
|
+
self.log.warning(
|
465
|
+
"Task %s failed in pod %s/%s (no details available)", task_key_str, namespace, pod_name
|
466
|
+
)
|
467
|
+
|
420
468
|
if state == ADOPTED:
|
421
469
|
# When the task pod is adopted by another executor,
|
422
470
|
# then remove the task from the current executor running queue.
|
@@ -696,18 +744,20 @@ class KubernetesExecutor(BaseExecutor):
|
|
696
744
|
results = self.result_queue.get_nowait()
|
697
745
|
self.log.warning("Executor shutting down, flushing results=%s", results)
|
698
746
|
try:
|
699
|
-
key, state, pod_name, namespace, resource_version = results
|
700
747
|
self.log.info(
|
701
|
-
"Changing state of %s to %s : resource_version=%
|
748
|
+
"Changing state of %s to %s : resource_version=%s",
|
749
|
+
results,
|
750
|
+
results.state,
|
751
|
+
results.resource_version,
|
702
752
|
)
|
703
753
|
try:
|
704
|
-
self._change_state(
|
754
|
+
self._change_state(results)
|
705
755
|
except Exception as e:
|
706
756
|
self.log.exception(
|
707
757
|
"Ignoring exception: %s when attempting to change state of %s to %s.",
|
708
758
|
e,
|
709
759
|
results,
|
710
|
-
state,
|
760
|
+
results.state,
|
711
761
|
)
|
712
762
|
finally:
|
713
763
|
self.result_queue.task_done()
|
@@ -16,26 +16,66 @@
|
|
16
16
|
# under the License.
|
17
17
|
from __future__ import annotations
|
18
18
|
|
19
|
-
from typing import TYPE_CHECKING, Any
|
19
|
+
from typing import TYPE_CHECKING, Any, Literal, NamedTuple, TypedDict
|
20
20
|
|
21
|
-
ADOPTED = "adopted"
|
22
21
|
if TYPE_CHECKING:
|
23
22
|
from collections.abc import Sequence
|
24
23
|
|
25
24
|
from airflow.models.taskinstance import TaskInstanceKey
|
26
25
|
from airflow.utils.state import TaskInstanceState
|
27
26
|
|
28
|
-
# TODO: Remove after Airflow 2 support is removed
|
29
|
-
CommandType = Sequence[str]
|
30
27
|
|
31
|
-
|
32
|
-
|
28
|
+
ADOPTED = "adopted"
|
29
|
+
|
30
|
+
|
31
|
+
class FailureDetails(TypedDict, total=False):
|
32
|
+
"""Detailed information about pod/container failure."""
|
33
|
+
|
34
|
+
pod_status: str | None
|
35
|
+
pod_reason: str | None
|
36
|
+
pod_message: str | None
|
37
|
+
container_state: str | None
|
38
|
+
container_reason: str | None
|
39
|
+
container_message: str | None
|
40
|
+
exit_code: int | None
|
41
|
+
container_type: Literal["init", "main"] | None
|
42
|
+
container_name: str | None
|
43
|
+
|
44
|
+
|
45
|
+
class KubernetesResults(NamedTuple):
|
46
|
+
"""Results from Kubernetes task execution."""
|
47
|
+
|
48
|
+
key: TaskInstanceKey
|
49
|
+
state: TaskInstanceState | str | None
|
50
|
+
pod_name: str
|
51
|
+
namespace: str
|
52
|
+
resource_version: str
|
53
|
+
failure_details: FailureDetails | None
|
54
|
+
|
55
|
+
|
56
|
+
class KubernetesWatch(NamedTuple):
|
57
|
+
"""Watch event data from Kubernetes pods."""
|
58
|
+
|
59
|
+
pod_name: str
|
60
|
+
namespace: str
|
61
|
+
state: TaskInstanceState | str | None
|
62
|
+
annotations: dict[str, str]
|
63
|
+
resource_version: str
|
64
|
+
failure_details: FailureDetails | None
|
65
|
+
|
66
|
+
|
67
|
+
# TODO: Remove after Airflow 2 support is removed
|
68
|
+
CommandType = "Sequence[str]"
|
69
|
+
|
70
|
+
|
71
|
+
class KubernetesJob(NamedTuple):
|
72
|
+
"""Job definition for Kubernetes execution."""
|
33
73
|
|
34
|
-
|
35
|
-
|
74
|
+
key: TaskInstanceKey
|
75
|
+
command: Sequence[str]
|
76
|
+
kube_executor_config: Any
|
77
|
+
pod_template_file: str | None
|
36
78
|
|
37
|
-
# pod_name, namespace, pod state, annotations, resource_version
|
38
|
-
KubernetesWatchType = tuple[str, str, TaskInstanceState | str | None, dict[str, str], str]
|
39
79
|
|
40
80
|
ALL_NAMESPACES = "ALL_NAMESPACES"
|
41
81
|
POD_EXECUTOR_DONE_KEY = "airflow_executor_done"
|
@@ -21,7 +21,7 @@ import json
|
|
21
21
|
import multiprocessing
|
22
22
|
import time
|
23
23
|
from queue import Empty, Queue
|
24
|
-
from typing import TYPE_CHECKING, Any
|
24
|
+
from typing import TYPE_CHECKING, Any, Literal, cast
|
25
25
|
|
26
26
|
from kubernetes import client, watch
|
27
27
|
from kubernetes.client.rest import ApiException
|
@@ -34,6 +34,10 @@ from airflow.providers.cncf.kubernetes.executors.kubernetes_executor_types impor
|
|
34
34
|
ALL_NAMESPACES,
|
35
35
|
POD_EXECUTOR_DONE_KEY,
|
36
36
|
POD_REVOKED_KEY,
|
37
|
+
FailureDetails,
|
38
|
+
KubernetesJob,
|
39
|
+
KubernetesResults,
|
40
|
+
KubernetesWatch,
|
37
41
|
)
|
38
42
|
from airflow.providers.cncf.kubernetes.kube_client import get_kube_client
|
39
43
|
from airflow.providers.cncf.kubernetes.kubernetes_helper_functions import (
|
@@ -49,12 +53,6 @@ from airflow.utils.state import TaskInstanceState
|
|
49
53
|
if TYPE_CHECKING:
|
50
54
|
from kubernetes.client import Configuration, models as k8s
|
51
55
|
|
52
|
-
from airflow.providers.cncf.kubernetes.executors.kubernetes_executor_types import (
|
53
|
-
KubernetesJobType,
|
54
|
-
KubernetesResultsType,
|
55
|
-
KubernetesWatchType,
|
56
|
-
)
|
57
|
-
|
58
56
|
|
59
57
|
class ResourceVersion(metaclass=Singleton):
|
60
58
|
"""Singleton for tracking resourceVersion from Kubernetes."""
|
@@ -68,7 +66,7 @@ class KubernetesJobWatcher(multiprocessing.Process, LoggingMixin):
|
|
68
66
|
def __init__(
|
69
67
|
self,
|
70
68
|
namespace: str,
|
71
|
-
watcher_queue: Queue[
|
69
|
+
watcher_queue: Queue[KubernetesWatch],
|
72
70
|
resource_version: str | None,
|
73
71
|
scheduler_job_id: str,
|
74
72
|
kube_config: Configuration,
|
@@ -216,16 +214,28 @@ class KubernetesJobWatcher(multiprocessing.Process, LoggingMixin):
|
|
216
214
|
# So, there is no change in the pod state.
|
217
215
|
# However, need to free the executor slot from the current executor.
|
218
216
|
self.log.info("Event: pod %s adopted, annotations: %s", pod_name, annotations_string)
|
219
|
-
self.watcher_queue.put(
|
217
|
+
self.watcher_queue.put(
|
218
|
+
KubernetesWatch(pod_name, namespace, ADOPTED, annotations, resource_version, None)
|
219
|
+
)
|
220
220
|
elif hasattr(pod.status, "reason") and pod.status.reason == "ProviderFailed":
|
221
221
|
# Most likely this happens due to Kubernetes setup (virtual kubelet, virtual nodes, etc.)
|
222
|
-
|
223
|
-
|
222
|
+
key = annotations_to_key(annotations=annotations)
|
223
|
+
task_key_str = f"{key.dag_id}.{key.task_id}.{key.try_number}" if key else "unknown"
|
224
|
+
self.log.warning(
|
225
|
+
"Event: %s failed to start with reason ProviderFailed, task: %s, annotations: %s",
|
224
226
|
pod_name,
|
227
|
+
task_key_str,
|
225
228
|
annotations_string,
|
226
229
|
)
|
227
230
|
self.watcher_queue.put(
|
228
|
-
(
|
231
|
+
KubernetesWatch(
|
232
|
+
pod_name,
|
233
|
+
namespace,
|
234
|
+
TaskInstanceState.FAILED,
|
235
|
+
annotations,
|
236
|
+
resource_version,
|
237
|
+
None,
|
238
|
+
)
|
229
239
|
)
|
230
240
|
elif status == "Pending":
|
231
241
|
# deletion_timestamp is set by kube server when a graceful deletion is requested.
|
@@ -254,14 +264,26 @@ class KubernetesJobWatcher(multiprocessing.Process, LoggingMixin):
|
|
254
264
|
and container_status_state["waiting"]["message"] == "pull QPS exceeded"
|
255
265
|
):
|
256
266
|
continue
|
257
|
-
|
258
|
-
|
267
|
+
key = annotations_to_key(annotations=annotations)
|
268
|
+
task_key_str = (
|
269
|
+
f"{key.dag_id}.{key.task_id}.{key.try_number}" if key else "unknown"
|
270
|
+
)
|
271
|
+
self.log.warning(
|
272
|
+
"Event: %s has container %s with fatal reason %s, task: %s",
|
259
273
|
pod_name,
|
260
274
|
container_status["name"],
|
261
275
|
container_status_state["waiting"]["reason"],
|
276
|
+
task_key_str,
|
262
277
|
)
|
263
278
|
self.watcher_queue.put(
|
264
|
-
(
|
279
|
+
KubernetesWatch(
|
280
|
+
pod_name,
|
281
|
+
namespace,
|
282
|
+
TaskInstanceState.FAILED,
|
283
|
+
annotations,
|
284
|
+
resource_version,
|
285
|
+
None,
|
286
|
+
)
|
265
287
|
)
|
266
288
|
break
|
267
289
|
else:
|
@@ -269,13 +291,34 @@ class KubernetesJobWatcher(multiprocessing.Process, LoggingMixin):
|
|
269
291
|
else:
|
270
292
|
self.log.debug("Event: %s Pending, annotations: %s", pod_name, annotations_string)
|
271
293
|
elif status == "Failed":
|
272
|
-
|
294
|
+
# Collect failure details for failed pods
|
295
|
+
try:
|
296
|
+
failure_details = collect_pod_failure_details(pod, self.log)
|
297
|
+
except Exception as e:
|
298
|
+
self.log.warning(
|
299
|
+
"Failed to collect pod failure details for %s/%s: %s", namespace, pod_name, e
|
300
|
+
)
|
301
|
+
|
302
|
+
key = annotations_to_key(annotations=annotations)
|
303
|
+
task_key_str = f"{key.dag_id}.{key.task_id}.{key.try_number}" if key else "unknown"
|
304
|
+
self.log.warning(
|
305
|
+
"Event: %s Failed, task: %s, annotations: %s", pod_name, task_key_str, annotations_string
|
306
|
+
)
|
273
307
|
self.watcher_queue.put(
|
274
|
-
(
|
308
|
+
KubernetesWatch(
|
309
|
+
pod_name,
|
310
|
+
namespace,
|
311
|
+
TaskInstanceState.FAILED,
|
312
|
+
annotations,
|
313
|
+
resource_version,
|
314
|
+
failure_details,
|
315
|
+
)
|
275
316
|
)
|
276
317
|
elif status == "Succeeded":
|
277
318
|
self.log.info("Event: %s Succeeded, annotations: %s", pod_name, annotations_string)
|
278
|
-
self.watcher_queue.put(
|
319
|
+
self.watcher_queue.put(
|
320
|
+
KubernetesWatch(pod_name, namespace, None, annotations, resource_version, None)
|
321
|
+
)
|
279
322
|
elif status == "Running":
|
280
323
|
# deletion_timestamp is set by kube server when a graceful deletion is requested.
|
281
324
|
# since kube server have received request to delete pod set TI state failed
|
@@ -286,7 +329,14 @@ class KubernetesJobWatcher(multiprocessing.Process, LoggingMixin):
|
|
286
329
|
annotations_string,
|
287
330
|
)
|
288
331
|
self.watcher_queue.put(
|
289
|
-
(
|
332
|
+
KubernetesWatch(
|
333
|
+
pod_name,
|
334
|
+
namespace,
|
335
|
+
TaskInstanceState.FAILED,
|
336
|
+
annotations,
|
337
|
+
resource_version,
|
338
|
+
None,
|
339
|
+
)
|
290
340
|
)
|
291
341
|
else:
|
292
342
|
self.log.info("Event: %s is Running, annotations: %s", pod_name, annotations_string)
|
@@ -302,13 +352,122 @@ class KubernetesJobWatcher(multiprocessing.Process, LoggingMixin):
|
|
302
352
|
)
|
303
353
|
|
304
354
|
|
355
|
+
def collect_pod_failure_details(pod: k8s.V1Pod, logger) -> FailureDetails | None:
|
356
|
+
"""
|
357
|
+
Collect detailed failure information from a failed pod.
|
358
|
+
|
359
|
+
Analyzes both init containers and main containers to determine the root cause
|
360
|
+
of pod failure, prioritizing terminated containers with non-zero exit codes.
|
361
|
+
|
362
|
+
Args:
|
363
|
+
pod: The Kubernetes V1Pod object to analyze
|
364
|
+
logger: Logger instance to use for error logging
|
365
|
+
|
366
|
+
Returns:
|
367
|
+
FailureDetails dict with failure information, or None if no failure details found
|
368
|
+
"""
|
369
|
+
if not pod.status or pod.status.phase != "Failed":
|
370
|
+
return None
|
371
|
+
|
372
|
+
try:
|
373
|
+
# Basic pod-level information
|
374
|
+
failure_details: FailureDetails = {
|
375
|
+
"pod_status": getattr(pod.status, "phase", None),
|
376
|
+
"pod_reason": getattr(pod.status, "reason", None),
|
377
|
+
"pod_message": getattr(pod.status, "message", None),
|
378
|
+
}
|
379
|
+
|
380
|
+
# Check init containers first (they run before main containers)
|
381
|
+
container_failure = _analyze_init_containers(pod.status)
|
382
|
+
|
383
|
+
# If no init container failure found, check main containers
|
384
|
+
if not container_failure:
|
385
|
+
container_failure = _analyze_main_containers(pod.status)
|
386
|
+
|
387
|
+
# Merge container failure details
|
388
|
+
if container_failure:
|
389
|
+
failure_details.update(container_failure)
|
390
|
+
|
391
|
+
return failure_details
|
392
|
+
|
393
|
+
except Exception:
|
394
|
+
# Log unexpected exception for debugging
|
395
|
+
logger.exception(
|
396
|
+
"Unexpected error while collecting pod failure details for pod %s",
|
397
|
+
getattr(pod.metadata, "name", "unknown"),
|
398
|
+
)
|
399
|
+
# Return basic pod info if container analysis fails
|
400
|
+
return {
|
401
|
+
"pod_status": getattr(pod.status, "phase", None),
|
402
|
+
"pod_reason": getattr(pod.status, "reason", None),
|
403
|
+
"pod_message": getattr(pod.status, "message", None),
|
404
|
+
}
|
405
|
+
|
406
|
+
|
407
|
+
def _analyze_containers(
|
408
|
+
container_statuses: list[k8s.V1ContainerStatus] | None, container_type: Literal["init", "main"]
|
409
|
+
) -> FailureDetails | None:
|
410
|
+
"""Analyze container statuses for failure details."""
|
411
|
+
if not container_statuses:
|
412
|
+
return None
|
413
|
+
|
414
|
+
waiting_info: FailureDetails | None = None
|
415
|
+
|
416
|
+
for cs in container_statuses:
|
417
|
+
state_obj = cs.state
|
418
|
+
if state_obj.terminated:
|
419
|
+
terminated_reason = getattr(state_obj.terminated, "reason", None)
|
420
|
+
exit_code = getattr(state_obj.terminated, "exit_code", 0)
|
421
|
+
|
422
|
+
# Only treat as failure if exit code != 0 AND reason is not "Completed"
|
423
|
+
if exit_code != 0 and terminated_reason != "Completed":
|
424
|
+
return cast(
|
425
|
+
"FailureDetails",
|
426
|
+
{
|
427
|
+
"container_state": "terminated",
|
428
|
+
"container_reason": terminated_reason,
|
429
|
+
"container_message": getattr(state_obj.terminated, "message", None),
|
430
|
+
"exit_code": exit_code,
|
431
|
+
"container_type": container_type,
|
432
|
+
"container_name": getattr(cs, "name", "unknown"),
|
433
|
+
},
|
434
|
+
)
|
435
|
+
elif state_obj.waiting:
|
436
|
+
# Record waiting state but continue looking for terminated containers
|
437
|
+
waiting_info = cast(
|
438
|
+
"FailureDetails",
|
439
|
+
{
|
440
|
+
"container_state": "waiting",
|
441
|
+
"container_reason": getattr(state_obj.waiting, "reason", None),
|
442
|
+
"container_message": getattr(state_obj.waiting, "message", None),
|
443
|
+
"container_type": container_type,
|
444
|
+
"container_name": getattr(cs, "name", "unknown"),
|
445
|
+
},
|
446
|
+
)
|
447
|
+
|
448
|
+
# If we only found waiting containers, return the last one
|
449
|
+
return waiting_info
|
450
|
+
|
451
|
+
|
452
|
+
def _analyze_init_containers(pod_status: k8s.V1PodStatus) -> FailureDetails | None:
|
453
|
+
"""Analyze init container statuses for failure details."""
|
454
|
+
init_container_statuses = getattr(pod_status, "init_container_statuses", None)
|
455
|
+
return _analyze_containers(init_container_statuses, "init")
|
456
|
+
|
457
|
+
|
458
|
+
def _analyze_main_containers(pod_status: k8s.V1PodStatus) -> FailureDetails | None:
|
459
|
+
"""Analyze main container statuses for failure details."""
|
460
|
+
container_statuses = getattr(pod_status, "container_statuses", None)
|
461
|
+
return _analyze_containers(container_statuses, "main")
|
462
|
+
|
463
|
+
|
305
464
|
class AirflowKubernetesScheduler(LoggingMixin):
|
306
465
|
"""Airflow Scheduler for Kubernetes."""
|
307
466
|
|
308
467
|
def __init__(
|
309
468
|
self,
|
310
469
|
kube_config: Any,
|
311
|
-
result_queue: Queue[
|
470
|
+
result_queue: Queue[KubernetesResults],
|
312
471
|
kube_client: client.CoreV1Api,
|
313
472
|
scheduler_job_id: str,
|
314
473
|
):
|
@@ -382,9 +541,12 @@ class AirflowKubernetesScheduler(LoggingMixin):
|
|
382
541
|
ResourceVersion().resource_version[namespace] = "0"
|
383
542
|
self.kube_watchers[namespace] = self._make_kube_watcher(namespace)
|
384
543
|
|
385
|
-
def run_next(self, next_job:
|
544
|
+
def run_next(self, next_job: KubernetesJob) -> None:
|
386
545
|
"""Receives the next job to run, builds the pod, and creates it."""
|
387
|
-
key
|
546
|
+
key = next_job.key
|
547
|
+
command = next_job.command
|
548
|
+
kube_executor_config = next_job.kube_executor_config
|
549
|
+
pod_template_file = next_job.pod_template_file
|
388
550
|
|
389
551
|
dag_id, task_id, run_id, try_number, map_index = key
|
390
552
|
if len(command) == 1:
|
@@ -502,19 +664,27 @@ class AirflowKubernetesScheduler(LoggingMixin):
|
|
502
664
|
finally:
|
503
665
|
self.watcher_queue.task_done()
|
504
666
|
|
505
|
-
def process_watcher_task(self, task:
|
667
|
+
def process_watcher_task(self, task: KubernetesWatch) -> None:
|
506
668
|
"""Process the task by watcher."""
|
507
|
-
pod_name, namespace, state, annotations, resource_version = task
|
508
669
|
self.log.debug(
|
509
670
|
"Attempting to finish pod; pod_name: %s; state: %s; annotations: %s",
|
510
|
-
pod_name,
|
511
|
-
state,
|
512
|
-
annotations_for_logging_task_metadata(annotations),
|
671
|
+
task.pod_name,
|
672
|
+
task.state,
|
673
|
+
annotations_for_logging_task_metadata(task.annotations),
|
513
674
|
)
|
514
|
-
key = annotations_to_key(annotations=annotations)
|
675
|
+
key = annotations_to_key(annotations=task.annotations)
|
515
676
|
if key:
|
516
|
-
self.log.debug("finishing job %s - %s (%s)", key, state, pod_name)
|
517
|
-
self.result_queue.put(
|
677
|
+
self.log.debug("finishing job %s - %s (%s)", key, task.state, task.pod_name)
|
678
|
+
self.result_queue.put(
|
679
|
+
KubernetesResults(
|
680
|
+
key,
|
681
|
+
task.state,
|
682
|
+
task.pod_name,
|
683
|
+
task.namespace,
|
684
|
+
task.resource_version,
|
685
|
+
task.failure_details,
|
686
|
+
)
|
687
|
+
)
|
518
688
|
|
519
689
|
def _flush_watcher_queue(self) -> None:
|
520
690
|
self.log.debug("Executor shutting down, watcher_queue approx. size=%d", self.watcher_queue.qsize())
|
@@ -65,12 +65,27 @@ class KubernetesInstallKueueOperator(BaseOperator):
|
|
65
65
|
try:
|
66
66
|
self.hook.apply_from_yaml_file(yaml_objects=yaml_objects)
|
67
67
|
except FailToCreateError as ex:
|
68
|
-
error_bodies = [
|
68
|
+
error_bodies = []
|
69
|
+
for e in ex.api_exceptions:
|
70
|
+
try:
|
71
|
+
if e.body:
|
72
|
+
error_bodies.append(json.loads(e.body))
|
73
|
+
else:
|
74
|
+
# If no body content, use reason as the message
|
75
|
+
reason = getattr(e, "reason", "Unknown")
|
76
|
+
error_bodies.append({"message": reason, "reason": reason})
|
77
|
+
except (json.JSONDecodeError, ValueError, TypeError):
|
78
|
+
# If the body is a string (e.g., in a 429 error), it can't be parsed as JSON.
|
79
|
+
# Use the body directly as the message instead.
|
80
|
+
error_bodies.append({"message": e.body, "reason": getattr(e, "reason", "Unknown")})
|
69
81
|
if next((e for e in error_bodies if e.get("reason") == "AlreadyExists"), None):
|
70
82
|
self.log.info("Kueue is already enabled for the cluster")
|
71
83
|
|
72
84
|
if errors := [e for e in error_bodies if e.get("reason") != "AlreadyExists"]:
|
73
|
-
error_message = "\n".join(
|
85
|
+
error_message = "\n".join(
|
86
|
+
e.get("message") or e.get("body") or f"Unknown error: {e.get('reason', 'Unknown')}"
|
87
|
+
for e in errors
|
88
|
+
)
|
74
89
|
raise AirflowException(error_message)
|
75
90
|
return
|
76
91
|
|
@@ -235,6 +235,11 @@ class KubernetesPodOperator(BaseOperator):
|
|
235
235
|
resuming to fetch the latest logs. If ``None``, then the task will remain in deferred state until pod
|
236
236
|
is done, and no logs will be visible until that time.
|
237
237
|
:param trigger_kwargs: additional keyword parameters passed to the trigger
|
238
|
+
:param container_name_log_prefix_enabled: if True, will prefix container name to each log line.
|
239
|
+
Default to True.
|
240
|
+
:param log_formatter: custom log formatter function that takes two string arguments:
|
241
|
+
the first string is the container_name and the second string is the message_to_log.
|
242
|
+
The function should return a formatted string. If None, the default formatting will be used.
|
238
243
|
"""
|
239
244
|
|
240
245
|
# !!! Changes in KubernetesPodOperator's arguments should be also reflected in !!!
|
@@ -343,6 +348,8 @@ class KubernetesPodOperator(BaseOperator):
|
|
343
348
|
progress_callback: Callable[[str], None] | None = None,
|
344
349
|
logging_interval: int | None = None,
|
345
350
|
trigger_kwargs: dict | None = None,
|
351
|
+
container_name_log_prefix_enabled: bool = True,
|
352
|
+
log_formatter: Callable[[str, str], str] | None = None,
|
346
353
|
**kwargs,
|
347
354
|
) -> None:
|
348
355
|
super().__init__(**kwargs)
|
@@ -438,6 +445,8 @@ class KubernetesPodOperator(BaseOperator):
|
|
438
445
|
self._progress_callback = progress_callback
|
439
446
|
self.callbacks = [] if not callbacks else callbacks if isinstance(callbacks, list) else [callbacks]
|
440
447
|
self._killed: bool = False
|
448
|
+
self.container_name_log_prefix_enabled = container_name_log_prefix_enabled
|
449
|
+
self.log_formatter = log_formatter
|
441
450
|
|
442
451
|
@cached_property
|
443
452
|
def _incluster_namespace(self):
|
@@ -622,6 +631,7 @@ class KubernetesPodOperator(BaseOperator):
|
|
622
631
|
asyncio.run(_await_pod_start())
|
623
632
|
except PodLaunchFailedException:
|
624
633
|
if self.log_events_on_failure:
|
634
|
+
self._read_pod_container_states(pod, reraise=False)
|
625
635
|
self._read_pod_events(pod, reraise=False)
|
626
636
|
raise
|
627
637
|
|
@@ -750,6 +760,8 @@ class KubernetesPodOperator(BaseOperator):
|
|
750
760
|
pod=pod,
|
751
761
|
init_containers=self.init_container_logs,
|
752
762
|
follow_logs=True,
|
763
|
+
container_name_log_prefix_enabled=self.container_name_log_prefix_enabled,
|
764
|
+
log_formatter=self.log_formatter,
|
753
765
|
)
|
754
766
|
except kubernetes.client.exceptions.ApiException as exc:
|
755
767
|
self._handle_api_exception(exc, pod)
|
@@ -766,6 +778,8 @@ class KubernetesPodOperator(BaseOperator):
|
|
766
778
|
pod=pod,
|
767
779
|
containers=self.container_logs,
|
768
780
|
follow_logs=True,
|
781
|
+
container_name_log_prefix_enabled=self.container_name_log_prefix_enabled,
|
782
|
+
log_formatter=self.log_formatter,
|
769
783
|
)
|
770
784
|
if not self.get_logs or (
|
771
785
|
self.container_logs is not True and self.base_container_name not in self.container_logs
|
@@ -914,6 +928,8 @@ class KubernetesPodOperator(BaseOperator):
|
|
914
928
|
container_name=self.base_container_name,
|
915
929
|
follow=follow,
|
916
930
|
since_time=last_log_time,
|
931
|
+
container_name_log_prefix_enabled=self.container_name_log_prefix_enabled,
|
932
|
+
log_formatter=self.log_formatter,
|
917
933
|
)
|
918
934
|
|
919
935
|
self.invoke_defer_method(pod_log_status.last_log_time)
|
@@ -1029,6 +1045,7 @@ class KubernetesPodOperator(BaseOperator):
|
|
1029
1045
|
context["ti"].xcom_push(XCOM_RETURN_KEY, xcom_result)
|
1030
1046
|
|
1031
1047
|
if self.log_events_on_failure:
|
1048
|
+
self._read_pod_container_states(pod, reraise=False)
|
1032
1049
|
self._read_pod_events(pod, reraise=False)
|
1033
1050
|
|
1034
1051
|
self.process_pod_deletion(remote_pod, reraise=False)
|
@@ -1076,6 +1093,41 @@ class KubernetesPodOperator(BaseOperator):
|
|
1076
1093
|
else:
|
1077
1094
|
self.log.error("Pod Event: %s - %s", event.reason, event.message)
|
1078
1095
|
|
1096
|
+
def _read_pod_container_states(self, pod, *, reraise=True) -> None:
|
1097
|
+
"""Log detailed container states of pod for debugging."""
|
1098
|
+
with _optionally_suppress(reraise=reraise):
|
1099
|
+
remote_pod = self.pod_manager.read_pod(pod)
|
1100
|
+
pod_phase = getattr(remote_pod.status, "phase", None)
|
1101
|
+
pod_reason = getattr(remote_pod.status, "reason", None)
|
1102
|
+
self.log.info("Pod phase: %s, reason: %s", pod_phase, pod_reason)
|
1103
|
+
|
1104
|
+
container_statuses = getattr(remote_pod.status, "container_statuses", [])
|
1105
|
+
for status in container_statuses:
|
1106
|
+
name = status.name
|
1107
|
+
state = status.state
|
1108
|
+
if state.terminated:
|
1109
|
+
level = self.log.error if state.terminated.exit_code != 0 else self.log.info
|
1110
|
+
level(
|
1111
|
+
"Container '%s': state='TERMINATED', reason='%s', exit_code=%s, message='%s'",
|
1112
|
+
name,
|
1113
|
+
state.terminated.reason,
|
1114
|
+
state.terminated.exit_code,
|
1115
|
+
state.terminated.message,
|
1116
|
+
)
|
1117
|
+
elif state.waiting:
|
1118
|
+
self.log.warning(
|
1119
|
+
"Container '%s': state='WAITING', reason='%s', message='%s'",
|
1120
|
+
name,
|
1121
|
+
state.waiting.reason,
|
1122
|
+
state.waiting.message,
|
1123
|
+
)
|
1124
|
+
elif state.running:
|
1125
|
+
self.log.info(
|
1126
|
+
"Container '%s': state='RUNNING', started_at=%s",
|
1127
|
+
name,
|
1128
|
+
state.running.started_at,
|
1129
|
+
)
|
1130
|
+
|
1079
1131
|
def is_istio_enabled(self, pod: V1Pod) -> bool:
|
1080
1132
|
"""Check if istio is enabled for the namespace of the pod by inspecting the namespace labels."""
|
1081
1133
|
if not pod:
|
@@ -23,7 +23,7 @@ import enum
|
|
23
23
|
import json
|
24
24
|
import math
|
25
25
|
import time
|
26
|
-
from collections.abc import Generator, Iterable
|
26
|
+
from collections.abc import Callable, Generator, Iterable
|
27
27
|
from contextlib import closing, suppress
|
28
28
|
from dataclasses import dataclass
|
29
29
|
from datetime import timedelta
|
@@ -456,6 +456,26 @@ class PodManager(LoggingMixin):
|
|
456
456
|
|
457
457
|
await asyncio.sleep(check_interval)
|
458
458
|
|
459
|
+
def _log_message(
|
460
|
+
self,
|
461
|
+
message: str,
|
462
|
+
container_name: str,
|
463
|
+
container_name_log_prefix_enabled: bool,
|
464
|
+
log_formatter: Callable[[str, str], str] | None,
|
465
|
+
) -> None:
|
466
|
+
"""Log a message with appropriate formatting."""
|
467
|
+
if is_log_group_marker(message):
|
468
|
+
print(message)
|
469
|
+
else:
|
470
|
+
if log_formatter:
|
471
|
+
formatted_message = log_formatter(container_name, message)
|
472
|
+
self.log.info("%s", formatted_message)
|
473
|
+
else:
|
474
|
+
log_message = (
|
475
|
+
f"[{container_name}] {message}" if container_name_log_prefix_enabled else message
|
476
|
+
)
|
477
|
+
self.log.info("%s", log_message)
|
478
|
+
|
459
479
|
def fetch_container_logs(
|
460
480
|
self,
|
461
481
|
pod: V1Pod,
|
@@ -464,6 +484,8 @@ class PodManager(LoggingMixin):
|
|
464
484
|
follow=False,
|
465
485
|
since_time: DateTime | None = None,
|
466
486
|
post_termination_timeout: int = 120,
|
487
|
+
container_name_log_prefix_enabled: bool = True,
|
488
|
+
log_formatter: Callable[[str, str], str] | None = None,
|
467
489
|
) -> PodLoggingStatus:
|
468
490
|
"""
|
469
491
|
Follow the logs of container and stream to airflow logging.
|
@@ -529,10 +551,12 @@ class PodManager(LoggingMixin):
|
|
529
551
|
line=line, client=self._client, mode=ExecutionMode.SYNC
|
530
552
|
)
|
531
553
|
if message_to_log is not None:
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
554
|
+
self._log_message(
|
555
|
+
message_to_log,
|
556
|
+
container_name,
|
557
|
+
container_name_log_prefix_enabled,
|
558
|
+
log_formatter,
|
559
|
+
)
|
536
560
|
last_captured_timestamp = message_timestamp
|
537
561
|
message_to_log = message
|
538
562
|
message_timestamp = line_timestamp
|
@@ -548,10 +572,9 @@ class PodManager(LoggingMixin):
|
|
548
572
|
line=line, client=self._client, mode=ExecutionMode.SYNC
|
549
573
|
)
|
550
574
|
if message_to_log is not None:
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
self.log.info("[%s] %s", container_name, message_to_log)
|
575
|
+
self._log_message(
|
576
|
+
message_to_log, container_name, container_name_log_prefix_enabled, log_formatter
|
577
|
+
)
|
555
578
|
last_captured_timestamp = message_timestamp
|
556
579
|
except TimeoutError as e:
|
557
580
|
# in case of timeout, increment return time by 2 seconds to avoid
|
@@ -560,10 +583,17 @@ class PodManager(LoggingMixin):
|
|
560
583
|
return val.add(seconds=2), e
|
561
584
|
except HTTPError as e:
|
562
585
|
exception = e
|
563
|
-
self.
|
564
|
-
|
565
|
-
|
566
|
-
|
586
|
+
self._http_error_timestamps = getattr(self, "_http_error_timestamps", [])
|
587
|
+
self._http_error_timestamps = [
|
588
|
+
t for t in self._http_error_timestamps if t > utcnow() - timedelta(seconds=60)
|
589
|
+
]
|
590
|
+
self._http_error_timestamps.append(utcnow())
|
591
|
+
# Log only if more than 2 errors occurred in the last 60 seconds
|
592
|
+
if len(self._http_error_timestamps) > 2:
|
593
|
+
self.log.exception(
|
594
|
+
"Reading of logs interrupted for container %r; will retry.",
|
595
|
+
container_name,
|
596
|
+
)
|
567
597
|
return last_captured_timestamp or since_time, exception
|
568
598
|
|
569
599
|
# note: `read_pod_logs` follows the logs, so we shouldn't necessarily *need* to
|
@@ -630,7 +660,12 @@ class PodManager(LoggingMixin):
|
|
630
660
|
return containers_to_log
|
631
661
|
|
632
662
|
def fetch_requested_init_container_logs(
|
633
|
-
self,
|
663
|
+
self,
|
664
|
+
pod: V1Pod,
|
665
|
+
init_containers: Iterable[str] | str | Literal[True] | None,
|
666
|
+
follow_logs=False,
|
667
|
+
container_name_log_prefix_enabled: bool = True,
|
668
|
+
log_formatter: Callable[[str, str], str] | None = None,
|
634
669
|
) -> list[PodLoggingStatus]:
|
635
670
|
"""
|
636
671
|
Follow the logs of containers in the specified pod and publish it to airflow logging.
|
@@ -650,12 +685,23 @@ class PodManager(LoggingMixin):
|
|
650
685
|
containers_to_log = sorted(containers_to_log, key=lambda cn: all_containers.index(cn))
|
651
686
|
for c in containers_to_log:
|
652
687
|
self._await_init_container_start(pod=pod, container_name=c)
|
653
|
-
status = self.fetch_container_logs(
|
688
|
+
status = self.fetch_container_logs(
|
689
|
+
pod=pod,
|
690
|
+
container_name=c,
|
691
|
+
follow=follow_logs,
|
692
|
+
container_name_log_prefix_enabled=container_name_log_prefix_enabled,
|
693
|
+
log_formatter=log_formatter,
|
694
|
+
)
|
654
695
|
pod_logging_statuses.append(status)
|
655
696
|
return pod_logging_statuses
|
656
697
|
|
657
698
|
def fetch_requested_container_logs(
|
658
|
-
self,
|
699
|
+
self,
|
700
|
+
pod: V1Pod,
|
701
|
+
containers: Iterable[str] | str | Literal[True],
|
702
|
+
follow_logs=False,
|
703
|
+
container_name_log_prefix_enabled: bool = True,
|
704
|
+
log_formatter: Callable[[str, str], str] | None = None,
|
659
705
|
) -> list[PodLoggingStatus]:
|
660
706
|
"""
|
661
707
|
Follow the logs of containers in the specified pod and publish it to airflow logging.
|
@@ -672,7 +718,13 @@ class PodManager(LoggingMixin):
|
|
672
718
|
pod_name=pod.metadata.name,
|
673
719
|
)
|
674
720
|
for c in containers_to_log:
|
675
|
-
status = self.fetch_container_logs(
|
721
|
+
status = self.fetch_container_logs(
|
722
|
+
pod=pod,
|
723
|
+
container_name=c,
|
724
|
+
follow=follow_logs,
|
725
|
+
container_name_log_prefix_enabled=container_name_log_prefix_enabled,
|
726
|
+
log_formatter=log_formatter,
|
727
|
+
)
|
676
728
|
pod_logging_statuses.append(status)
|
677
729
|
return pod_logging_statuses
|
678
730
|
|
@@ -38,9 +38,11 @@ AIRFLOW_V_3_1_PLUS = get_base_airflow_version_tuple() >= (3, 1, 0)
|
|
38
38
|
if AIRFLOW_V_3_1_PLUS:
|
39
39
|
from airflow.models.xcom import XCOM_RETURN_KEY
|
40
40
|
from airflow.sdk import BaseHook, BaseOperator
|
41
|
+
from airflow.sdk.definitions.context import context_merge
|
41
42
|
else:
|
42
43
|
from airflow.hooks.base import BaseHook # type: ignore[attr-defined,no-redef]
|
43
44
|
from airflow.models import BaseOperator
|
45
|
+
from airflow.utils.context import context_merge # type: ignore[attr-defined, no-redef]
|
44
46
|
from airflow.utils.xcom import XCOM_RETURN_KEY # type: ignore[no-redef]
|
45
47
|
|
46
48
|
if AIRFLOW_V_3_0_PLUS:
|
@@ -64,4 +66,5 @@ __all__ = [
|
|
64
66
|
"TaskDecorator",
|
65
67
|
"task_decorator_factory",
|
66
68
|
"XCOM_RETURN_KEY",
|
69
|
+
"context_merge",
|
67
70
|
]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: apache-airflow-providers-cncf-kubernetes
|
3
|
-
Version: 10.
|
3
|
+
Version: 10.8.0rc1
|
4
4
|
Summary: Provider package apache-airflow-providers-cncf-kubernetes for Apache Airflow
|
5
5
|
Keywords: airflow-provider,cncf.kubernetes,airflow,integration
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
@@ -21,14 +21,14 @@ Classifier: Programming Language :: Python :: 3.12
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.13
|
22
22
|
Classifier: Topic :: System :: Monitoring
|
23
23
|
Requires-Dist: aiofiles>=23.2.0
|
24
|
-
Requires-Dist: apache-airflow>=2.10.
|
24
|
+
Requires-Dist: apache-airflow>=2.10.0rc1
|
25
25
|
Requires-Dist: asgiref>=3.5.2
|
26
26
|
Requires-Dist: cryptography>=41.0.0
|
27
27
|
Requires-Dist: kubernetes>=32.0.0,<33.0.0
|
28
28
|
Requires-Dist: kubernetes_asyncio>=32.0.0,<33.0.0
|
29
29
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
30
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-cncf-kubernetes/10.
|
31
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-cncf-kubernetes/10.
|
30
|
+
Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-cncf-kubernetes/10.8.0/changelog.html
|
31
|
+
Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-cncf-kubernetes/10.8.0
|
32
32
|
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
33
33
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
34
34
|
Project-URL: Source Code, https://github.com/apache/airflow
|
@@ -59,9 +59,8 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
59
59
|
|
60
60
|
Package ``apache-airflow-providers-cncf-kubernetes``
|
61
61
|
|
62
|
-
Release: ``10.
|
62
|
+
Release: ``10.8.0``
|
63
63
|
|
64
|
-
Release Date: ``|PypiReleaseDate|``
|
65
64
|
|
66
65
|
`Kubernetes <https://kubernetes.io/>`__
|
67
66
|
|
@@ -73,12 +72,12 @@ This is a provider package for ``cncf.kubernetes`` provider. All classes for thi
|
|
73
72
|
are in ``airflow.providers.cncf.kubernetes`` python package.
|
74
73
|
|
75
74
|
You can find package information and changelog for the provider
|
76
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-cncf-kubernetes/10.
|
75
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-cncf-kubernetes/10.8.0/>`_.
|
77
76
|
|
78
77
|
Installation
|
79
78
|
------------
|
80
79
|
|
81
|
-
You can install this package on top of an existing Airflow
|
80
|
+
You can install this package on top of an existing Airflow installation (see ``Requirements`` below
|
82
81
|
for the minimum Airflow version supported) via
|
83
82
|
``pip install apache-airflow-providers-cncf-kubernetes``
|
84
83
|
|
@@ -99,5 +98,5 @@ PIP package Version required
|
|
99
98
|
====================== ====================
|
100
99
|
|
101
100
|
The changelog for the provider package can be found in the
|
102
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-cncf-kubernetes/10.
|
101
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-cncf-kubernetes/10.8.0/changelog.html>`_.
|
103
102
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
airflow/providers/cncf/kubernetes/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
|
2
|
-
airflow/providers/cncf/kubernetes/__init__.py,sha256=
|
2
|
+
airflow/providers/cncf/kubernetes/__init__.py,sha256=HVp4GW0M9NwgbjcJTNsjVu1VNqbixL0fo9pIMSfnWVU,1505
|
3
3
|
airflow/providers/cncf/kubernetes/callbacks.py,sha256=1nCLXFJKtr5FM9ApB8Drw5VAGSC3TDFsPSTMtRnAR3Q,6085
|
4
4
|
airflow/providers/cncf/kubernetes/exceptions.py,sha256=3cNEZTnrltBsqwzHiLfckwYYc_IWY1g4PcRs6zuMWWA,1137
|
5
5
|
airflow/providers/cncf/kubernetes/get_provider_info.py,sha256=Git4HycOcHrb4zD9W7ZYsqNDkQSQ4uipSJO_GaPiroE,16041
|
@@ -12,18 +12,18 @@ airflow/providers/cncf/kubernetes/python_kubernetes_script.jinja2,sha256=I0EHRGw
|
|
12
12
|
airflow/providers/cncf/kubernetes/python_kubernetes_script.py,sha256=KnTlZSWCZhwvj89fSc2kgIRTaI4iLNKPquHc2wXnluo,3460
|
13
13
|
airflow/providers/cncf/kubernetes/secret.py,sha256=wj-T9gouqau_X14slAstGmnSxqXJQzdLwUdURzHna0I,5209
|
14
14
|
airflow/providers/cncf/kubernetes/template_rendering.py,sha256=WSUBhjGSDhjNtA4IFlbYyX50rvYN6UA4dMk0cPqgOjo,3618
|
15
|
-
airflow/providers/cncf/kubernetes/version_compat.py,sha256=
|
15
|
+
airflow/providers/cncf/kubernetes/version_compat.py,sha256=dAsHieSJhaXavXi1_mvuUsrfmKNKCg2xCpOw-b7y1jo,2759
|
16
16
|
airflow/providers/cncf/kubernetes/backcompat/__init__.py,sha256=KXF76f3v1jIFUBNz8kwxVMvm7i4mNo35LbIG9IijBNc,1299
|
17
17
|
airflow/providers/cncf/kubernetes/backcompat/backwards_compat_converters.py,sha256=FkRRtIEucp2hYrecGVYVgyPI6-b7hE7X7L17Z3r459Y,4303
|
18
18
|
airflow/providers/cncf/kubernetes/cli/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
19
|
-
airflow/providers/cncf/kubernetes/cli/kubernetes_command.py,sha256=
|
19
|
+
airflow/providers/cncf/kubernetes/cli/kubernetes_command.py,sha256=zMIM39Ft-dBs8TzopnhtAIk1GJHFDq9zQXZmKyu8zRY,8281
|
20
20
|
airflow/providers/cncf/kubernetes/decorators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
21
21
|
airflow/providers/cncf/kubernetes/decorators/kubernetes.py,sha256=d27TR2k-NbpwQSwHd7L265ZZYXiRBlPg7na7RsrH1Ik,6216
|
22
|
-
airflow/providers/cncf/kubernetes/decorators/kubernetes_cmd.py,sha256=
|
22
|
+
airflow/providers/cncf/kubernetes/decorators/kubernetes_cmd.py,sha256=tkQWnyr5PkldaDwVzsDyP_qYznl01ewtc_kkSpLYKtI,4690
|
23
23
|
airflow/providers/cncf/kubernetes/executors/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
24
|
-
airflow/providers/cncf/kubernetes/executors/kubernetes_executor.py,sha256=
|
25
|
-
airflow/providers/cncf/kubernetes/executors/kubernetes_executor_types.py,sha256=
|
26
|
-
airflow/providers/cncf/kubernetes/executors/kubernetes_executor_utils.py,sha256=
|
24
|
+
airflow/providers/cncf/kubernetes/executors/kubernetes_executor.py,sha256=sP0mpCL4DHcwy0AkaNXPMmjL1MbhHT7yHVVOtbBRmUo,34323
|
25
|
+
airflow/providers/cncf/kubernetes/executors/kubernetes_executor_types.py,sha256=F0IlLbC6qKMVNZwqnbgUPxwFsZdcRhot2kwBhzc9gSM,2698
|
26
|
+
airflow/providers/cncf/kubernetes/executors/kubernetes_executor_utils.py,sha256=wNvHSyGkEWFIPzxzinE5DhM2K4JTYDdIMqJxZCkGWNo,31503
|
27
27
|
airflow/providers/cncf/kubernetes/executors/local_kubernetes_executor.py,sha256=CWCN4b6Ircs-3tCxJjBsrjl4Q0ABBJIwqlZr7a5lW6k,12243
|
28
28
|
airflow/providers/cncf/kubernetes/hooks/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
29
29
|
airflow/providers/cncf/kubernetes/hooks/kubernetes.py,sha256=lsqlNxcqNkN_FGrT8aBCuWJlVV9Oo7BFwL9cWyoHZTw,37792
|
@@ -32,8 +32,8 @@ airflow/providers/cncf/kubernetes/kubernetes_executor_templates/basic_template.y
|
|
32
32
|
airflow/providers/cncf/kubernetes/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
33
33
|
airflow/providers/cncf/kubernetes/operators/custom_object_launcher.py,sha256=jTVHQt1vp5gELrLNyM-DrZ1ywgmTy3Hh1i6wyl7AGS0,15314
|
34
34
|
airflow/providers/cncf/kubernetes/operators/job.py,sha256=lYUO_HAC_NyiyBZvNFZ_6Itk5bRCsI3BIyj3KZmHCpw,26775
|
35
|
-
airflow/providers/cncf/kubernetes/operators/kueue.py,sha256=
|
36
|
-
airflow/providers/cncf/kubernetes/operators/pod.py,sha256=
|
35
|
+
airflow/providers/cncf/kubernetes/operators/kueue.py,sha256=Lss1p7bkCdBPOlFhffhQSe4uBm9ztSVfPETPvpBfcUA,5422
|
36
|
+
airflow/providers/cncf/kubernetes/operators/pod.py,sha256=30TlKp46r2t4luGJ1Qf_uPO4U8M58IM1AKYlLuc1IeI,63883
|
37
37
|
airflow/providers/cncf/kubernetes/operators/resource.py,sha256=XQrlbLbk-tlN_CQnETa9hgFzxxL82hh-Fs0XM5SDhyg,7574
|
38
38
|
airflow/providers/cncf/kubernetes/operators/spark_kubernetes.py,sha256=lqZqehu-YUMDxFTwSlQm0e10pdoU5njus5TtMLOdEUI,13898
|
39
39
|
airflow/providers/cncf/kubernetes/pod_template_file_examples/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
@@ -52,9 +52,9 @@ airflow/providers/cncf/kubernetes/triggers/pod.py,sha256=AVk0-dJN_wjMeZtImMxan4J
|
|
52
52
|
airflow/providers/cncf/kubernetes/utils/__init__.py,sha256=ClZN0VPjWySdVwS_ktH7rrgL9VLAcs3OSJSB9s3zaYw,863
|
53
53
|
airflow/providers/cncf/kubernetes/utils/delete_from.py,sha256=poObZSoEJwQyaYWilEURs8f4CDY2sn_pfwS31Lf579A,5195
|
54
54
|
airflow/providers/cncf/kubernetes/utils/k8s_resource_iterator.py,sha256=pl-G-2WhZVbewKkwmL9AxPo1hAQWHHEPK43b-ruF4-w,1937
|
55
|
-
airflow/providers/cncf/kubernetes/utils/pod_manager.py,sha256=
|
55
|
+
airflow/providers/cncf/kubernetes/utils/pod_manager.py,sha256=w1Oe2q9AX0r8Rc_--Rq9xNOCgBRB48IAZOIf5cgrRx4,42444
|
56
56
|
airflow/providers/cncf/kubernetes/utils/xcom_sidecar.py,sha256=k6bdmVJ21OrAwGmWwledRrAmaty9ZrmbuM-IbaI4mqo,2519
|
57
|
-
apache_airflow_providers_cncf_kubernetes-10.
|
58
|
-
apache_airflow_providers_cncf_kubernetes-10.
|
59
|
-
apache_airflow_providers_cncf_kubernetes-10.
|
60
|
-
apache_airflow_providers_cncf_kubernetes-10.
|
57
|
+
apache_airflow_providers_cncf_kubernetes-10.8.0rc1.dist-info/entry_points.txt,sha256=ByD3QJJyP9CfmTYtpNI1953akD38RUDgpGXLaq9vpOw,111
|
58
|
+
apache_airflow_providers_cncf_kubernetes-10.8.0rc1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
59
|
+
apache_airflow_providers_cncf_kubernetes-10.8.0rc1.dist-info/METADATA,sha256=PcCeudBhG-Oh1srWMyBqumsu81LrOiR8c9I1IJAXF2I,4322
|
60
|
+
apache_airflow_providers_cncf_kubernetes-10.8.0rc1.dist-info/RECORD,,
|
File without changes
|