dpone-airflow-pack 0.44.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dpone_airflow_pack-0.44.0/PKG-INFO +39 -0
- dpone_airflow_pack-0.44.0/README.md +15 -0
- dpone_airflow_pack-0.44.0/pyproject.toml +32 -0
- dpone_airflow_pack-0.44.0/src/dpone_airflow_pack/__init__.py +20 -0
- dpone_airflow_pack-0.44.0/src/dpone_airflow_pack/connection_projection.py +109 -0
- dpone_airflow_pack-0.44.0/src/dpone_airflow_pack/operators.py +123 -0
- dpone_airflow_pack-0.44.0/src/dpone_airflow_pack/outcome.py +40 -0
- dpone_airflow_pack-0.44.0/src/dpone_airflow_pack/outcome_gate.py +189 -0
- dpone_airflow_pack-0.44.0/src/dpone_airflow_pack/pack_tasks.py +238 -0
- dpone_airflow_pack-0.44.0/src/dpone_airflow_pack/runtime_adapter.py +328 -0
- dpone_airflow_pack-0.44.0/src/dpone_airflow_pack/step_tasks.py +95 -0
- dpone_airflow_pack-0.44.0/src/dpone_airflow_pack/xcom_sidecar.py +29 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dpone-airflow-pack
|
|
3
|
+
Version: 0.44.0
|
|
4
|
+
Summary: Lightweight Airflow scheduler-side pack provider for dpone GitOps workloads
|
|
5
|
+
Keywords: airflow,gitops,data-engineering,scheduler,kubernetes
|
|
6
|
+
Author: PaulKov
|
|
7
|
+
License-Expression: Apache-2.0
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Database
|
|
15
|
+
Classifier: Topic :: System :: Distributed Computing
|
|
16
|
+
Classifier: Typing :: Typed
|
|
17
|
+
Maintainer: PaulKov
|
|
18
|
+
Requires-Python: >=3.11, <3.13
|
|
19
|
+
Project-URL: Homepage, https://github.com/PaulKov/dpone
|
|
20
|
+
Project-URL: Repository, https://github.com/PaulKov/dpone
|
|
21
|
+
Project-URL: Issues, https://github.com/PaulKov/dpone/issues
|
|
22
|
+
Project-URL: Documentation, https://paulkov.github.io/dpone/
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# dpone-airflow-pack
|
|
26
|
+
|
|
27
|
+
`dpone-airflow-pack` is the lightweight Airflow scheduler/webserver provider for dpone GitOps packs.
|
|
28
|
+
|
|
29
|
+
It only reads a static `airflow-pack.json` and builds visible Airflow/Kubernetes tasks. It does not import the full
|
|
30
|
+
`dpone` runtime and intentionally contains no source/sink/native transfer dependencies such as ClickHouse, MSSQL,
|
|
31
|
+
`pyodbc`, pandas, polars, or ConnectorX.
|
|
32
|
+
|
|
33
|
+
Recommended Airflow DAG import:
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from dpone_airflow_pack import build_dpone_gitops_task_group_from_pack
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
The full `dpone[full,accel]` package belongs in the KPO runtime image, not in the scheduler image.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# dpone-airflow-pack
|
|
2
|
+
|
|
3
|
+
`dpone-airflow-pack` is the lightweight Airflow scheduler/webserver provider for dpone GitOps packs.
|
|
4
|
+
|
|
5
|
+
It only reads a static `airflow-pack.json` and builds visible Airflow/Kubernetes tasks. It does not import the full
|
|
6
|
+
`dpone` runtime and intentionally contains no source/sink/native transfer dependencies such as ClickHouse, MSSQL,
|
|
7
|
+
`pyodbc`, pandas, polars, or ConnectorX.
|
|
8
|
+
|
|
9
|
+
Recommended Airflow DAG import:
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from dpone_airflow_pack import build_dpone_gitops_task_group_from_pack
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
The full `dpone[full,accel]` package belongs in the KPO runtime image, not in the scheduler image.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["uv_build>=0.9.18,<0.12.0"]
|
|
3
|
+
build-backend = "uv_build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "dpone-airflow-pack"
|
|
7
|
+
version = "0.44.0"
|
|
8
|
+
description = "Lightweight Airflow scheduler-side pack provider for dpone GitOps workloads"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11,<3.13"
|
|
11
|
+
license = "Apache-2.0"
|
|
12
|
+
authors = [{ name = "PaulKov" }]
|
|
13
|
+
maintainers = [{ name = "PaulKov" }]
|
|
14
|
+
keywords = ["airflow", "gitops", "data-engineering", "scheduler", "kubernetes"]
|
|
15
|
+
dependencies = []
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"License :: OSI Approved :: Apache Software License",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Topic :: Database",
|
|
24
|
+
"Topic :: System :: Distributed Computing",
|
|
25
|
+
"Typing :: Typed",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
Homepage = "https://github.com/PaulKov/dpone"
|
|
30
|
+
Repository = "https://github.com/PaulKov/dpone"
|
|
31
|
+
Issues = "https://github.com/PaulKov/dpone/issues"
|
|
32
|
+
Documentation = "https://paulkov.github.io/dpone/"
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Lightweight Airflow-side helpers for consuming dpone GitOps packs."""
|
|
2
|
+
|
|
3
|
+
from dpone_airflow_pack.pack_tasks import build_dpone_gitops_task_group_from_pack, load_dpone_airflow_pack
|
|
4
|
+
from dpone_airflow_pack.runtime_adapter import (
|
|
5
|
+
DponeAirflowArtifactError,
|
|
6
|
+
DponeAirflowContractError,
|
|
7
|
+
build_dpone_gitops_task_from_artifacts,
|
|
8
|
+
load_dpone_kpo_kwargs,
|
|
9
|
+
validate_dpone_artifacts,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"DponeAirflowArtifactError",
|
|
14
|
+
"DponeAirflowContractError",
|
|
15
|
+
"build_dpone_gitops_task_group_from_pack",
|
|
16
|
+
"build_dpone_gitops_task_from_artifacts",
|
|
17
|
+
"load_dpone_airflow_pack",
|
|
18
|
+
"load_dpone_kpo_kwargs",
|
|
19
|
+
"validate_dpone_artifacts",
|
|
20
|
+
]
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping, Sequence
|
|
4
|
+
from typing import Protocol
|
|
5
|
+
from urllib.parse import parse_qsl, quote, urlencode, urlsplit, urlunsplit
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ConnectionUriReader(Protocol):
|
|
9
|
+
def read_uri(self, connection_id: str) -> str: ...
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AirflowBaseHookConnectionReader:
|
|
13
|
+
"""Read Airflow connection URIs lazily at task execution time."""
|
|
14
|
+
|
|
15
|
+
def read_uri(self, connection_id: str) -> str:
|
|
16
|
+
from airflow.hooks.base import BaseHook
|
|
17
|
+
|
|
18
|
+
return BaseHook.get_connection(connection_id).get_uri()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RuntimeAirflowConnectionUriReader:
|
|
22
|
+
def __init__(self, *, reader: ConnectionUriReader, scheme_overrides: Mapping[str, str]) -> None:
|
|
23
|
+
self._reader = reader
|
|
24
|
+
self._scheme_overrides = dict(scheme_overrides)
|
|
25
|
+
|
|
26
|
+
def read_uri(self, connection_id: str) -> str:
|
|
27
|
+
uri = self._reader.read_uri(connection_id)
|
|
28
|
+
scheme = self._scheme_overrides.get(connection_id)
|
|
29
|
+
return replace_uri_scheme(uri, scheme) if scheme else uri
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class UnsafeAirflowConnectionEnvBuilder:
|
|
33
|
+
"""Build AIRFLOW_CONN_* values from Airflow connections.
|
|
34
|
+
|
|
35
|
+
This is intentionally an execution-time adapter, not a scheduler parse-time
|
|
36
|
+
reader, so DAG parsing never touches secrets.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, *, reader: ConnectionUriReader) -> None:
|
|
40
|
+
self._reader = reader
|
|
41
|
+
|
|
42
|
+
def build(self, connection_ids: Sequence[str]) -> dict[str, str]:
|
|
43
|
+
return {
|
|
44
|
+
airflow_conn_env_name(connection_id): self._reader.read_uri(connection_id)
|
|
45
|
+
for connection_id in connection_ids
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def airflow_conn_env_name(connection_id: str) -> str:
|
|
50
|
+
return "AIRFLOW_CONN_" + "".join(ch if ch.isalnum() else "_" for ch in connection_id.upper())
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def apply_database_overrides(env_vars: Mapping[str, str], database_overrides: Mapping[str, str]) -> dict[str, str]:
|
|
54
|
+
patched = dict(env_vars)
|
|
55
|
+
for connection_id, database in database_overrides.items():
|
|
56
|
+
env_name = airflow_conn_env_name(connection_id)
|
|
57
|
+
if env_name in patched:
|
|
58
|
+
patched[env_name] = replace_uri_database(patched[env_name], database)
|
|
59
|
+
return patched
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def apply_query_overrides(
|
|
63
|
+
env_vars: Mapping[str, str],
|
|
64
|
+
query_overrides: Mapping[str, Mapping[str, str]],
|
|
65
|
+
) -> dict[str, str]:
|
|
66
|
+
patched = dict(env_vars)
|
|
67
|
+
for connection_id, values in query_overrides.items():
|
|
68
|
+
env_name = airflow_conn_env_name(connection_id)
|
|
69
|
+
if env_name in patched:
|
|
70
|
+
patched[env_name] = merge_uri_query(patched[env_name], values)
|
|
71
|
+
return patched
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def replace_uri_database(uri: str, database: str) -> str:
|
|
75
|
+
normalized = str(database or "").strip()
|
|
76
|
+
if not normalized:
|
|
77
|
+
raise ValueError("Runtime database override must be non-empty")
|
|
78
|
+
parts = urlsplit(uri)
|
|
79
|
+
return urlunsplit((parts.scheme, parts.netloc, f"/{quote(normalized, safe='')}", parts.query, parts.fragment))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def replace_uri_scheme(uri: str, scheme: str) -> str:
|
|
83
|
+
normalized = str(scheme or "").strip()
|
|
84
|
+
if not normalized:
|
|
85
|
+
raise ValueError("Runtime scheme override must be non-empty")
|
|
86
|
+
parts = urlsplit(uri)
|
|
87
|
+
return urlunsplit((normalized, parts.netloc, parts.path, parts.query, parts.fragment))
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def merge_uri_query(uri: str, values: Mapping[str, str]) -> str:
|
|
91
|
+
normalized = {str(key): str(value) for key, value in values.items() if str(key) and value not in (None, "")}
|
|
92
|
+
parts = urlsplit(uri)
|
|
93
|
+
query = dict(parse_qsl(parts.query, keep_blank_values=True))
|
|
94
|
+
query.update(normalized)
|
|
95
|
+
return urlunsplit((parts.scheme, parts.netloc, parts.path, urlencode(query), parts.fragment))
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
__all__ = [
|
|
99
|
+
"AirflowBaseHookConnectionReader",
|
|
100
|
+
"ConnectionUriReader",
|
|
101
|
+
"RuntimeAirflowConnectionUriReader",
|
|
102
|
+
"UnsafeAirflowConnectionEnvBuilder",
|
|
103
|
+
"airflow_conn_env_name",
|
|
104
|
+
"apply_database_overrides",
|
|
105
|
+
"apply_query_overrides",
|
|
106
|
+
"merge_uri_query",
|
|
107
|
+
"replace_uri_database",
|
|
108
|
+
"replace_uri_scheme",
|
|
109
|
+
]
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping, Sequence
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from dpone_airflow_pack.connection_projection import (
|
|
7
|
+
AirflowBaseHookConnectionReader,
|
|
8
|
+
ConnectionUriReader,
|
|
9
|
+
RuntimeAirflowConnectionUriReader,
|
|
10
|
+
UnsafeAirflowConnectionEnvBuilder,
|
|
11
|
+
apply_database_overrides,
|
|
12
|
+
apply_query_overrides,
|
|
13
|
+
)
|
|
14
|
+
from dpone_airflow_pack.xcom_sidecar import XComSidecarRuntimeConfig, pin_xcom_sidecar_image
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator
|
|
18
|
+
except Exception: # pragma: no cover - local tests may not have Airflow installed.
|
|
19
|
+
|
|
20
|
+
class KubernetesPodOperator: # type: ignore[no-redef]
|
|
21
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
22
|
+
self.kwargs = kwargs
|
|
23
|
+
self.do_xcom_push = bool(kwargs.get("do_xcom_push", False))
|
|
24
|
+
self.env_vars = kwargs.get("env_vars")
|
|
25
|
+
|
|
26
|
+
def build_pod_request_obj(self, context: Any | None = None) -> Any:
|
|
27
|
+
return self.kwargs.get("full_pod_spec")
|
|
28
|
+
|
|
29
|
+
def execute(self, context: Any) -> Any:
|
|
30
|
+
return {"kind": "dpone.airflow_fallback_kpo_execute", "task_id": self.kwargs.get("task_id")}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
UNSAFE_AIRFLOW_CONNECTION_ENV_ANNOTATION = "dpone.dev/unsafe-airflow-connection-env"
|
|
34
|
+
UNSAFE_AIRFLOW_CONNECTION_ENV_VALUE = "non-prod-only"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class PinnedXComSidecarKubernetesPodOperator(KubernetesPodOperator): # type: ignore[misc,valid-type]
|
|
38
|
+
def __init__(self, *, xcom_sidecar: XComSidecarRuntimeConfig | None = None, **kwargs: Any) -> None:
|
|
39
|
+
super().__init__(**kwargs)
|
|
40
|
+
self.xcom_sidecar = xcom_sidecar or XComSidecarRuntimeConfig()
|
|
41
|
+
|
|
42
|
+
def build_pod_request_obj(self, context: Any | None = None) -> Any:
|
|
43
|
+
pod = super().build_pod_request_obj(context=context)
|
|
44
|
+
if getattr(self, "do_xcom_push", False):
|
|
45
|
+
pin_xcom_sidecar_image(pod, self.xcom_sidecar.image)
|
|
46
|
+
return pod
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class UnsafeAirflowConnectionEnvKubernetesPodOperator(PinnedXComSidecarKubernetesPodOperator):
|
|
50
|
+
"""Inject Airflow Connection URIs into pod env at execution time.
|
|
51
|
+
|
|
52
|
+
This non-production adapter is useful while a platform moves from Airflow
|
|
53
|
+
Connection projection to Secret/Vault projection. It never reads connection
|
|
54
|
+
values during DAG parse.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
*,
|
|
60
|
+
unsafe_airflow_connection_ids: Sequence[str],
|
|
61
|
+
unsafe_connection_reader: ConnectionUriReader | None = None,
|
|
62
|
+
unsafe_runtime_database_overrides: Mapping[str, str] | None = None,
|
|
63
|
+
unsafe_runtime_query_overrides: Mapping[str, Mapping[str, str]] | None = None,
|
|
64
|
+
unsafe_runtime_scheme_overrides: Mapping[str, str] | None = None,
|
|
65
|
+
**kwargs: Any,
|
|
66
|
+
) -> None:
|
|
67
|
+
super().__init__(**kwargs)
|
|
68
|
+
self.unsafe_airflow_connection_ids = tuple(unsafe_airflow_connection_ids)
|
|
69
|
+
self.unsafe_runtime_database_overrides = dict(unsafe_runtime_database_overrides or {})
|
|
70
|
+
self.unsafe_runtime_query_overrides = {
|
|
71
|
+
connection_id: dict(values) for connection_id, values in dict(unsafe_runtime_query_overrides or {}).items()
|
|
72
|
+
}
|
|
73
|
+
self.unsafe_runtime_scheme_overrides = dict(unsafe_runtime_scheme_overrides or {})
|
|
74
|
+
self._unsafe_connection_reader = unsafe_connection_reader
|
|
75
|
+
|
|
76
|
+
def execute(self, context: Any) -> Any:
|
|
77
|
+
builder = UnsafeAirflowConnectionEnvBuilder(reader=self._reader())
|
|
78
|
+
direct_env = apply_query_overrides(
|
|
79
|
+
apply_database_overrides(
|
|
80
|
+
builder.build(self.unsafe_airflow_connection_ids),
|
|
81
|
+
self.unsafe_runtime_database_overrides,
|
|
82
|
+
),
|
|
83
|
+
self.unsafe_runtime_query_overrides,
|
|
84
|
+
)
|
|
85
|
+
self.env_vars = merge_env_vars(getattr(self, "env_vars", None), direct_env)
|
|
86
|
+
return super().execute(context)
|
|
87
|
+
|
|
88
|
+
def _reader(self) -> ConnectionUriReader:
|
|
89
|
+
reader = self._unsafe_connection_reader or AirflowBaseHookConnectionReader()
|
|
90
|
+
if not self.unsafe_runtime_scheme_overrides:
|
|
91
|
+
return reader
|
|
92
|
+
return RuntimeAirflowConnectionUriReader(reader=reader, scheme_overrides=self.unsafe_runtime_scheme_overrides)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def merge_env_vars(existing: object, injected: Mapping[str, str]) -> object:
|
|
96
|
+
if not existing:
|
|
97
|
+
return dict(injected)
|
|
98
|
+
if isinstance(existing, Mapping):
|
|
99
|
+
merged = dict(existing)
|
|
100
|
+
merged.update(injected)
|
|
101
|
+
return merged
|
|
102
|
+
if isinstance(existing, list):
|
|
103
|
+
injected_names = set(injected)
|
|
104
|
+
preserved = [env_var for env_var in existing if getattr(env_var, "name", None) not in injected_names]
|
|
105
|
+
return [*preserved, *[_new_env_var(name, value) for name, value in injected.items()]]
|
|
106
|
+
raise TypeError(f"Unsupported env_vars type for unsafe Airflow env injection: {type(existing)!r}")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _new_env_var(name: str, value: str) -> object:
|
|
110
|
+
try:
|
|
111
|
+
from kubernetes.client import models as k8s
|
|
112
|
+
except Exception: # pragma: no cover
|
|
113
|
+
return {"name": name, "value": value}
|
|
114
|
+
return k8s.V1EnvVar(name=name, value=value)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
__all__ = [
|
|
118
|
+
"PinnedXComSidecarKubernetesPodOperator",
|
|
119
|
+
"UNSAFE_AIRFLOW_CONNECTION_ENV_ANNOTATION",
|
|
120
|
+
"UNSAFE_AIRFLOW_CONNECTION_ENV_VALUE",
|
|
121
|
+
"UnsafeAirflowConnectionEnvKubernetesPodOperator",
|
|
122
|
+
"merge_env_vars",
|
|
123
|
+
]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from dpone_airflow_pack.outcome_gate import GitOpsAirflowOutcomeGateEvaluator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def evaluate_pack_outcome(
|
|
11
|
+
*, upstream_task_id: str, required_status: str = "passed", ti: Any = None, **_: Any
|
|
12
|
+
) -> dict[str, Any]:
|
|
13
|
+
"""Evaluate the XCom summary produced by a dpone runtime KPO."""
|
|
14
|
+
|
|
15
|
+
summary = _pull_summary(ti=ti, upstream_task_id=upstream_task_id)
|
|
16
|
+
report = GitOpsAirflowOutcomeGateEvaluator().evaluate(
|
|
17
|
+
xcom_summary_path=f"xcom://{upstream_task_id}",
|
|
18
|
+
xcom_summary=summary,
|
|
19
|
+
required_status=required_status,
|
|
20
|
+
)
|
|
21
|
+
payload = report.to_jsonable()
|
|
22
|
+
if not report.passed:
|
|
23
|
+
raise RuntimeError(json.dumps(payload, ensure_ascii=False, sort_keys=True))
|
|
24
|
+
return payload
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _pull_summary(*, ti: Any, upstream_task_id: str) -> Mapping[str, Any]:
|
|
28
|
+
if ti is None or not hasattr(ti, "xcom_pull"):
|
|
29
|
+
raise RuntimeError("Airflow task instance with xcom_pull is required to evaluate dpone outcome")
|
|
30
|
+
raw_summary = ti.xcom_pull(task_ids=upstream_task_id)
|
|
31
|
+
if isinstance(raw_summary, Mapping):
|
|
32
|
+
return raw_summary
|
|
33
|
+
if isinstance(raw_summary, str):
|
|
34
|
+
payload = json.loads(raw_summary)
|
|
35
|
+
if isinstance(payload, Mapping):
|
|
36
|
+
return payload
|
|
37
|
+
raise RuntimeError(f"dpone runtime task `{upstream_task_id}` did not return a JSON object XCom summary")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
__all__ = ["evaluate_pack_outcome"]
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
AIRFLOW_OUTCOME_STRICT_FAIL = "strict_fail"
|
|
9
|
+
AIRFLOW_OUTCOME_XCOM_THEN_GATE = "xcom_then_gate"
|
|
10
|
+
AIRFLOW_OUTCOME_MODES = (AIRFLOW_OUTCOME_STRICT_FAIL, AIRFLOW_OUTCOME_XCOM_THEN_GATE)
|
|
11
|
+
_OUTCOME_SOURCE = "dpone airflow-pack outcome-gate"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True, slots=True)
|
|
15
|
+
class AirflowPackIssue:
|
|
16
|
+
code: str
|
|
17
|
+
message: str = ""
|
|
18
|
+
path: str = ""
|
|
19
|
+
source: str = _OUTCOME_SOURCE
|
|
20
|
+
|
|
21
|
+
def to_jsonable(self) -> dict[str, str]:
|
|
22
|
+
return {
|
|
23
|
+
"code": self.code,
|
|
24
|
+
"message": self.message,
|
|
25
|
+
"path": self.path,
|
|
26
|
+
"source": self.source,
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True, slots=True)
|
|
31
|
+
class GitOpsAirflowOutcomeGateReport:
|
|
32
|
+
xcom_summary_path: str
|
|
33
|
+
required_status: str
|
|
34
|
+
status: str
|
|
35
|
+
passed: bool
|
|
36
|
+
run_spec_path: str = ""
|
|
37
|
+
runtime_evidence_path: str = ""
|
|
38
|
+
runtime_evidence_sha256: str | None = None
|
|
39
|
+
failed_step: str | None = None
|
|
40
|
+
step_counts: dict[str, int] | None = None
|
|
41
|
+
warnings: tuple[AirflowPackIssue, ...] = ()
|
|
42
|
+
blockers: tuple[AirflowPackIssue, ...] = ()
|
|
43
|
+
kind: str = "gitops.airflow_outcome_gate"
|
|
44
|
+
schema_version: str = "1"
|
|
45
|
+
producer: str = _OUTCOME_SOURCE
|
|
46
|
+
|
|
47
|
+
def to_jsonable(self) -> dict[str, Any]:
|
|
48
|
+
payload: dict[str, Any] = {
|
|
49
|
+
"kind": self.kind,
|
|
50
|
+
"schema_version": self.schema_version,
|
|
51
|
+
"producer": self.producer,
|
|
52
|
+
"xcom_summary_path": self.xcom_summary_path,
|
|
53
|
+
"required_status": self.required_status,
|
|
54
|
+
"status": self.status,
|
|
55
|
+
"passed": self.passed,
|
|
56
|
+
"run_spec_path": self.run_spec_path,
|
|
57
|
+
"runtime_evidence_path": self.runtime_evidence_path,
|
|
58
|
+
"failed_step": self.failed_step,
|
|
59
|
+
"warnings": [warning.to_jsonable() for warning in self.warnings],
|
|
60
|
+
"blockers": [blocker.to_jsonable() for blocker in self.blockers],
|
|
61
|
+
}
|
|
62
|
+
if self.runtime_evidence_sha256 is not None:
|
|
63
|
+
payload["runtime_evidence_sha256"] = self.runtime_evidence_sha256
|
|
64
|
+
if self.step_counts is not None:
|
|
65
|
+
payload["step_counts"] = dict(self.step_counts)
|
|
66
|
+
return payload
|
|
67
|
+
|
|
68
|
+
def to_json(self) -> str:
|
|
69
|
+
return json.dumps(self.to_jsonable(), ensure_ascii=False, indent=2) + "\n"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class GitOpsAirflowOutcomeGateEvaluator:
|
|
73
|
+
"""Evaluate a final Airflow XCom summary without importing full dpone."""
|
|
74
|
+
|
|
75
|
+
def evaluate(
|
|
76
|
+
self,
|
|
77
|
+
*,
|
|
78
|
+
xcom_summary_path: str,
|
|
79
|
+
xcom_summary: Mapping[str, Any],
|
|
80
|
+
required_status: str = "passed",
|
|
81
|
+
) -> GitOpsAirflowOutcomeGateReport:
|
|
82
|
+
normalized_required = _text(required_status) or "passed"
|
|
83
|
+
status = _text(xcom_summary.get("status")) or "unknown"
|
|
84
|
+
blockers = (
|
|
85
|
+
*_kind_blockers(path=xcom_summary_path, summary=xcom_summary),
|
|
86
|
+
*_status_blockers(
|
|
87
|
+
path=xcom_summary_path,
|
|
88
|
+
status=status,
|
|
89
|
+
required_status=normalized_required,
|
|
90
|
+
),
|
|
91
|
+
*_issues(xcom_summary.get("blockers")),
|
|
92
|
+
)
|
|
93
|
+
return GitOpsAirflowOutcomeGateReport(
|
|
94
|
+
xcom_summary_path=xcom_summary_path,
|
|
95
|
+
required_status=normalized_required,
|
|
96
|
+
status=status,
|
|
97
|
+
passed=not blockers,
|
|
98
|
+
run_spec_path=_text(xcom_summary.get("run_spec_path")),
|
|
99
|
+
runtime_evidence_path=_text(xcom_summary.get("runtime_evidence_path")),
|
|
100
|
+
runtime_evidence_sha256=_optional_text(xcom_summary.get("runtime_evidence_sha256")),
|
|
101
|
+
failed_step=_optional_text(xcom_summary.get("failed_step")),
|
|
102
|
+
step_counts=_step_counts(xcom_summary.get("step_counts")),
|
|
103
|
+
blockers=blockers,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def normalize_airflow_outcome_mode(raw_mode: object) -> str:
|
|
108
|
+
mode = _text(raw_mode) or AIRFLOW_OUTCOME_STRICT_FAIL
|
|
109
|
+
return mode if mode in AIRFLOW_OUTCOME_MODES else AIRFLOW_OUTCOME_STRICT_FAIL
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def airflow_outcome_mode_names() -> tuple[str, ...]:
|
|
113
|
+
return AIRFLOW_OUTCOME_MODES
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _kind_blockers(*, path: str, summary: Mapping[str, Any]) -> tuple[AirflowPackIssue, ...]:
|
|
117
|
+
if summary.get("kind") == "gitops.airflow_xcom_summary":
|
|
118
|
+
return ()
|
|
119
|
+
return (
|
|
120
|
+
AirflowPackIssue(
|
|
121
|
+
code="airflow_outcome_kind_invalid",
|
|
122
|
+
message="XCom summary kind must be gitops.airflow_xcom_summary",
|
|
123
|
+
path=path,
|
|
124
|
+
),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _status_blockers(*, path: str, status: str, required_status: str) -> tuple[AirflowPackIssue, ...]:
|
|
129
|
+
if status == required_status:
|
|
130
|
+
return ()
|
|
131
|
+
return (
|
|
132
|
+
AirflowPackIssue(
|
|
133
|
+
code="airflow_outcome_failed",
|
|
134
|
+
message=f"Airflow XCom outcome status is `{status}`, expected `{required_status}`",
|
|
135
|
+
path=path,
|
|
136
|
+
),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _issues(raw_items: object) -> tuple[AirflowPackIssue, ...]:
|
|
141
|
+
if not isinstance(raw_items, list):
|
|
142
|
+
return ()
|
|
143
|
+
issues: list[AirflowPackIssue] = []
|
|
144
|
+
for raw_item in raw_items:
|
|
145
|
+
if not isinstance(raw_item, Mapping):
|
|
146
|
+
continue
|
|
147
|
+
code = _text(raw_item.get("code"))
|
|
148
|
+
if code:
|
|
149
|
+
issues.append(
|
|
150
|
+
AirflowPackIssue(
|
|
151
|
+
code=code,
|
|
152
|
+
message=_text(raw_item.get("message")),
|
|
153
|
+
path=_text(raw_item.get("path")),
|
|
154
|
+
source=_text(raw_item.get("source")) or "dpone airflow-pack runtime",
|
|
155
|
+
)
|
|
156
|
+
)
|
|
157
|
+
return tuple(issues)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _step_counts(value: object) -> dict[str, int] | None:
|
|
161
|
+
if not isinstance(value, Mapping):
|
|
162
|
+
return None
|
|
163
|
+
counts: dict[str, int] = {}
|
|
164
|
+
for key, raw_count in value.items():
|
|
165
|
+
if isinstance(raw_count, bool) or not isinstance(raw_count, int):
|
|
166
|
+
continue
|
|
167
|
+
counts[str(key)] = raw_count
|
|
168
|
+
return counts
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _optional_text(value: object) -> str | None:
|
|
172
|
+
text = _text(value)
|
|
173
|
+
return text or None
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _text(value: object) -> str:
|
|
177
|
+
return str(value or "").strip()
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
__all__ = [
|
|
181
|
+
"AIRFLOW_OUTCOME_MODES",
|
|
182
|
+
"AIRFLOW_OUTCOME_STRICT_FAIL",
|
|
183
|
+
"AIRFLOW_OUTCOME_XCOM_THEN_GATE",
|
|
184
|
+
"AirflowPackIssue",
|
|
185
|
+
"GitOpsAirflowOutcomeGateEvaluator",
|
|
186
|
+
"GitOpsAirflowOutcomeGateReport",
|
|
187
|
+
"airflow_outcome_mode_names",
|
|
188
|
+
"normalize_airflow_outcome_mode",
|
|
189
|
+
]
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""Build Airflow tasks from one compact dpone GitOps pack."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from collections.abc import Mapping
|
|
7
|
+
from copy import deepcopy
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from dpone_airflow_pack.operators import (
|
|
12
|
+
PinnedXComSidecarKubernetesPodOperator,
|
|
13
|
+
UnsafeAirflowConnectionEnvKubernetesPodOperator,
|
|
14
|
+
)
|
|
15
|
+
from dpone_airflow_pack.outcome import evaluate_pack_outcome
|
|
16
|
+
from dpone_airflow_pack.runtime_adapter import DponeAirflowContractError
|
|
17
|
+
from dpone_airflow_pack.xcom_sidecar import XComSidecarRuntimeConfig
|
|
18
|
+
|
|
19
|
+
_CONTRACT_OWNED_FIELDS = frozenset({"cmds", "arguments", "image", "namespace", "full_pod_spec", "do_xcom_push"})
|
|
20
|
+
_PACK_METADATA_FIELDS = frozenset({"outcome_mode", "resources_profile"})
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def load_dpone_airflow_pack(pack_path: str | Path) -> dict[str, Any]:
|
|
24
|
+
"""Load and validate a compact `gitops.airflow_pack` JSON file."""
|
|
25
|
+
|
|
26
|
+
path = Path(pack_path)
|
|
27
|
+
try:
|
|
28
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
29
|
+
except FileNotFoundError as exc:
|
|
30
|
+
raise DponeAirflowContractError(
|
|
31
|
+
f"dpone Airflow pack is missing: {path}",
|
|
32
|
+
blockers=({"code": "airflow_pack_missing", "path": str(path), "message": "Pack file does not exist"},),
|
|
33
|
+
) from exc
|
|
34
|
+
except json.JSONDecodeError as exc:
|
|
35
|
+
raise DponeAirflowContractError(
|
|
36
|
+
f"dpone Airflow pack is invalid JSON: {path}",
|
|
37
|
+
blockers=({"code": "airflow_pack_json_invalid", "path": str(path), "message": exc.msg},),
|
|
38
|
+
) from exc
|
|
39
|
+
if not isinstance(payload, dict) or payload.get("kind") != "gitops.airflow_pack":
|
|
40
|
+
raise DponeAirflowContractError(
|
|
41
|
+
f"dpone Airflow pack has invalid kind: {path}",
|
|
42
|
+
blockers=(
|
|
43
|
+
{"code": "airflow_pack_kind_invalid", "path": str(path), "message": "Expected gitops.airflow_pack"},
|
|
44
|
+
),
|
|
45
|
+
)
|
|
46
|
+
if not isinstance(payload.get("kpo_kwargs"), dict):
|
|
47
|
+
raise DponeAirflowContractError(
|
|
48
|
+
f"dpone Airflow pack has no kpo_kwargs: {path}",
|
|
49
|
+
blockers=(
|
|
50
|
+
{"code": "airflow_pack_kpo_kwargs_missing", "path": str(path), "message": "kpo_kwargs is required"},
|
|
51
|
+
),
|
|
52
|
+
)
|
|
53
|
+
return payload
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def build_dpone_gitops_task_group_from_pack(
|
|
57
|
+
pack_path: str | Path,
|
|
58
|
+
*,
|
|
59
|
+
dag: Any = None,
|
|
60
|
+
operator_overrides: Mapping[str, Any] | None = None,
|
|
61
|
+
) -> dict[str, Any]:
|
|
62
|
+
"""Build compact dpone Airflow tasks from one static pack.
|
|
63
|
+
|
|
64
|
+
The helper intentionally performs no manifest parsing, GitOps rendering,
|
|
65
|
+
Kubernetes API calls, or network I/O at Airflow parse time.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
pack = load_dpone_airflow_pack(pack_path)
|
|
69
|
+
kwargs = _runtime_operator_kwargs(pack)
|
|
70
|
+
kwargs["do_xcom_push"] = True
|
|
71
|
+
kwargs.update(_safe_overrides(operator_overrides))
|
|
72
|
+
step_tasks = _hook_step_tasks(pack=pack, dag=dag, operator_overrides=operator_overrides)
|
|
73
|
+
runtime = _operator_class(pack)(dag=dag, **_operator_init_kwargs(pack, kwargs))
|
|
74
|
+
tasks = {**step_tasks, "dpone_runtime": runtime}
|
|
75
|
+
_chain_pack_steps(pack, tasks)
|
|
76
|
+
for terminal in _terminal_hook_task_ids(pack):
|
|
77
|
+
_chain(tasks[terminal], runtime)
|
|
78
|
+
outcome = _outcome_task(pack=pack, dag=dag, upstream_task_id=str(kwargs["task_id"]))
|
|
79
|
+
if outcome is not None:
|
|
80
|
+
tasks["outcome_gate"] = outcome
|
|
81
|
+
_chain(runtime, outcome)
|
|
82
|
+
return tasks
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _operator_kwargs(raw_kwargs: Mapping[str, Any]) -> dict[str, Any]:
|
|
86
|
+
return {key: value for key, value in dict(raw_kwargs).items() if key not in _PACK_METADATA_FIELDS}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _runtime_operator_kwargs(pack: Mapping[str, Any]) -> dict[str, Any]:
|
|
90
|
+
kwargs = _operator_kwargs(pack["kpo_kwargs"])
|
|
91
|
+
command = str(pack.get("runtime_command") or "").strip()
|
|
92
|
+
if command:
|
|
93
|
+
kwargs["cmds"] = ["/bin/sh", "-ec"]
|
|
94
|
+
kwargs["arguments"] = [command]
|
|
95
|
+
pod_spec = pack.get("pod_spec")
|
|
96
|
+
if isinstance(pod_spec, Mapping):
|
|
97
|
+
kwargs["full_pod_spec"] = _deserialize_pod_spec(pod_spec)
|
|
98
|
+
return kwargs
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _hook_step_tasks(
|
|
102
|
+
*,
|
|
103
|
+
pack: Mapping[str, Any],
|
|
104
|
+
dag: Any,
|
|
105
|
+
operator_overrides: Mapping[str, Any] | None,
|
|
106
|
+
) -> dict[str, Any]:
|
|
107
|
+
tasks: dict[str, Any] = {}
|
|
108
|
+
for step in _pack_steps(pack):
|
|
109
|
+
if step.get("phase") != "pre_hook":
|
|
110
|
+
continue
|
|
111
|
+
kwargs = _runtime_operator_kwargs(pack)
|
|
112
|
+
kwargs["task_id"] = str(step["name"])
|
|
113
|
+
kwargs["name"] = str(step["name"]).replace("_", "-")
|
|
114
|
+
kwargs["cmds"] = ["/bin/sh", "-ec"]
|
|
115
|
+
kwargs["arguments"] = [str(step["command"])]
|
|
116
|
+
kwargs["do_xcom_push"] = False
|
|
117
|
+
if isinstance(kwargs.get("full_pod_spec"), Mapping):
|
|
118
|
+
kwargs["full_pod_spec"] = _pod_spec_for_step(kwargs["full_pod_spec"], step_name=str(step["name"]))
|
|
119
|
+
kwargs.update(_safe_overrides(operator_overrides))
|
|
120
|
+
tasks[str(step["name"])] = _operator_class(pack)(dag=dag, **_operator_init_kwargs(pack, kwargs))
|
|
121
|
+
return tasks
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _operator_class(pack: Mapping[str, Any]) -> type[Any]:
|
|
125
|
+
projection = _mapping(pack.get("connection_projection"))
|
|
126
|
+
if projection.get("mode") == "unsafe_airflow_env":
|
|
127
|
+
return UnsafeAirflowConnectionEnvKubernetesPodOperator
|
|
128
|
+
return PinnedXComSidecarKubernetesPodOperator
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _operator_init_kwargs(pack: Mapping[str, Any], kwargs: Mapping[str, Any]) -> dict[str, Any]:
|
|
132
|
+
init_kwargs = dict(kwargs)
|
|
133
|
+
xcom = _mapping(pack.get("xcom"))
|
|
134
|
+
if xcom.get("sidecar_image"):
|
|
135
|
+
init_kwargs["xcom_sidecar"] = XComSidecarRuntimeConfig(image=str(xcom["sidecar_image"]))
|
|
136
|
+
projection = _mapping(pack.get("connection_projection"))
|
|
137
|
+
if projection.get("mode") == "unsafe_airflow_env":
|
|
138
|
+
init_kwargs.update(
|
|
139
|
+
{
|
|
140
|
+
"unsafe_airflow_connection_ids": tuple(
|
|
141
|
+
str(item) for item in _sequence(projection.get("connection_ids"))
|
|
142
|
+
),
|
|
143
|
+
"unsafe_runtime_database_overrides": _string_mapping(projection.get("database_overrides")),
|
|
144
|
+
"unsafe_runtime_query_overrides": _nested_string_mapping(projection.get("query_overrides")),
|
|
145
|
+
"unsafe_runtime_scheme_overrides": _string_mapping(projection.get("scheme_overrides")),
|
|
146
|
+
}
|
|
147
|
+
)
|
|
148
|
+
return init_kwargs
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _pod_spec_for_step(pod_spec: Mapping[str, Any], *, step_name: str) -> dict[str, Any]:
|
|
152
|
+
payload = deepcopy(dict(pod_spec))
|
|
153
|
+
metadata = payload.setdefault("metadata", {})
|
|
154
|
+
if isinstance(metadata, dict):
|
|
155
|
+
metadata["name"] = str(metadata.get("name") or "dpone").rstrip("-") + "-" + step_name.replace("_", "-")
|
|
156
|
+
return payload
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _outcome_task(*, pack: Mapping[str, Any], dag: Any, upstream_task_id: str) -> Any | None:
|
|
160
|
+
outcome = _mapping(pack.get("outcome_gate"))
|
|
161
|
+
if not outcome:
|
|
162
|
+
return None
|
|
163
|
+
from airflow.providers.standard.operators.python import PythonOperator
|
|
164
|
+
|
|
165
|
+
return PythonOperator(
|
|
166
|
+
dag=dag,
|
|
167
|
+
task_id=str(outcome.get("task_id") or f"{upstream_task_id}__outcome_gate"),
|
|
168
|
+
python_callable=evaluate_pack_outcome,
|
|
169
|
+
op_kwargs={
|
|
170
|
+
"upstream_task_id": upstream_task_id,
|
|
171
|
+
"required_status": str(outcome.get("required_status") or "passed"),
|
|
172
|
+
},
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _safe_overrides(overrides: Mapping[str, Any] | None) -> dict[str, Any]:
|
|
177
|
+
clean = dict(overrides or {})
|
|
178
|
+
forbidden = sorted(_CONTRACT_OWNED_FIELDS.intersection(clean))
|
|
179
|
+
if forbidden:
|
|
180
|
+
raise ValueError(f"operator_overrides cannot replace compact pack-owned fields: {', '.join(forbidden)}")
|
|
181
|
+
return clean
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _mapping(value: object) -> Mapping[str, Any]:
|
|
185
|
+
return value if isinstance(value, Mapping) else {}
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _sequence(value: object) -> tuple[object, ...]:
|
|
189
|
+
return tuple(value) if isinstance(value, list | tuple) else ()
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _string_mapping(value: object) -> dict[str, str]:
|
|
193
|
+
return {str(key): str(item) for key, item in _mapping(value).items()}
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _nested_string_mapping(value: object) -> dict[str, dict[str, str]]:
|
|
197
|
+
return {str(key): _string_mapping(item) for key, item in _mapping(value).items()}
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _pack_steps(pack: Mapping[str, Any]) -> tuple[Mapping[str, Any], ...]:
|
|
201
|
+
return tuple(step for step in _sequence(pack.get("steps")) if isinstance(step, Mapping))
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _chain_pack_steps(pack: Mapping[str, Any], tasks: Mapping[str, Any]) -> None:
|
|
205
|
+
for step in _pack_steps(pack):
|
|
206
|
+
step_name = str(step.get("name") or "")
|
|
207
|
+
if step_name not in tasks:
|
|
208
|
+
continue
|
|
209
|
+
for dependency in _sequence(step.get("depends_on")):
|
|
210
|
+
dependency_name = str(dependency)
|
|
211
|
+
if dependency_name in tasks:
|
|
212
|
+
_chain(tasks[dependency_name], tasks[step_name])
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _terminal_hook_task_ids(pack: Mapping[str, Any]) -> tuple[str, ...]:
|
|
216
|
+
hook_steps = tuple(step for step in _pack_steps(pack) if step.get("phase") == "pre_hook")
|
|
217
|
+
dependencies = {str(dependency) for step in hook_steps for dependency in _sequence(step.get("depends_on"))}
|
|
218
|
+
return tuple(str(step["name"]) for step in hook_steps if str(step.get("name")) not in dependencies)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _deserialize_pod_spec(pod_spec: Mapping[str, Any]) -> object:
|
|
222
|
+
payload = dict(pod_spec)
|
|
223
|
+
try:
|
|
224
|
+
from kubernetes.client import ApiClient
|
|
225
|
+
from kubernetes.client import models as k8s
|
|
226
|
+
except Exception:
|
|
227
|
+
return payload
|
|
228
|
+
return ApiClient()._ApiClient__deserialize_model(payload, k8s.V1Pod)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _chain(upstream: Any, downstream: Any) -> None:
|
|
232
|
+
try:
|
|
233
|
+
upstream >> downstream
|
|
234
|
+
except TypeError:
|
|
235
|
+
return None
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
__all__ = ["build_dpone_gitops_task_group_from_pack", "load_dpone_airflow_pack"]
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
DEFAULT_ARTIFACT_DIR = ".dpone/gitops/airflow"
|
|
9
|
+
KPO_KWARGS_FILE = "kpo-kwargs.json"
|
|
10
|
+
POD_SPEC_FILE = "pod-spec.yaml"
|
|
11
|
+
POD_CONTRACT_FILE = "pod-contract.json"
|
|
12
|
+
|
|
13
|
+
_CONTRACT_OWNED_KPO_FIELDS = frozenset({"pod_template_file", "do_xcom_push", "cmds", "arguments"})
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DponeAirflowArtifactError(RuntimeError):
|
|
17
|
+
"""Base error for Airflow DAG-side dpone artifact loading."""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DponeAirflowContractError(DponeAirflowArtifactError):
|
|
21
|
+
"""Raised when generated dpone Airflow artifacts are missing or unsafe."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, message: str, *, blockers: tuple[dict[str, str], ...]) -> None:
|
|
24
|
+
super().__init__(message)
|
|
25
|
+
self.blockers = blockers
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def load_dpone_kpo_kwargs(
|
|
29
|
+
artifact_dir: str | Path = DEFAULT_ARTIFACT_DIR,
|
|
30
|
+
*,
|
|
31
|
+
task_id: str = "dpone_gitops_runtime",
|
|
32
|
+
name: str | None = None,
|
|
33
|
+
labels: Mapping[str, Any] | None = None,
|
|
34
|
+
annotations: Mapping[str, Any] | None = None,
|
|
35
|
+
operator_overrides: Mapping[str, Any] | None = None,
|
|
36
|
+
validate: bool = True,
|
|
37
|
+
) -> dict[str, Any]:
|
|
38
|
+
"""Load generated KubernetesPodOperator kwargs and apply DAG-local fields.
|
|
39
|
+
|
|
40
|
+
The generated `kpo-kwargs.json` remains the canonical contract. This helper
|
|
41
|
+
only rewrites the local `pod_template_file`, keeps final XCom enabled, and
|
|
42
|
+
applies caller-owned DAG metadata such as task id, labels, annotations,
|
|
43
|
+
retries, pools, and queues.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
root = Path(artifact_dir)
|
|
47
|
+
overrides = _operator_overrides(operator_overrides)
|
|
48
|
+
if validate:
|
|
49
|
+
validate_dpone_artifacts(root)
|
|
50
|
+
kwargs = _load_json_mapping(root / KPO_KWARGS_FILE, kind="kpo_kwargs")
|
|
51
|
+
kwargs["task_id"] = task_id
|
|
52
|
+
kwargs["name"] = name or task_id
|
|
53
|
+
kwargs["pod_template_file"] = str(root / POD_SPEC_FILE)
|
|
54
|
+
kwargs["do_xcom_push"] = True
|
|
55
|
+
kwargs["labels"] = _merge_mapping(kwargs.get("labels"), labels)
|
|
56
|
+
kwargs["annotations"] = _merge_mapping(kwargs.get("annotations"), annotations)
|
|
57
|
+
kwargs.update(overrides)
|
|
58
|
+
return kwargs
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def build_dpone_gitops_task_from_artifacts(
|
|
62
|
+
*,
|
|
63
|
+
dag: Any,
|
|
64
|
+
artifact_dir: str | Path = DEFAULT_ARTIFACT_DIR,
|
|
65
|
+
task_id: str = "dpone_gitops_runtime",
|
|
66
|
+
name: str | None = None,
|
|
67
|
+
labels: Mapping[str, Any] | None = None,
|
|
68
|
+
annotations: Mapping[str, Any] | None = None,
|
|
69
|
+
operator_overrides: Mapping[str, Any] | None = None,
|
|
70
|
+
) -> Any:
|
|
71
|
+
"""Build a KubernetesPodOperator from generated dpone artifacts."""
|
|
72
|
+
|
|
73
|
+
from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator
|
|
74
|
+
|
|
75
|
+
kwargs = load_dpone_kpo_kwargs(
|
|
76
|
+
artifact_dir,
|
|
77
|
+
task_id=task_id,
|
|
78
|
+
name=name,
|
|
79
|
+
labels=labels,
|
|
80
|
+
annotations=annotations,
|
|
81
|
+
operator_overrides=operator_overrides,
|
|
82
|
+
)
|
|
83
|
+
return KubernetesPodOperator(dag=dag, **kwargs)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def validate_dpone_artifacts(
|
|
87
|
+
artifact_dir: str | Path = DEFAULT_ARTIFACT_DIR,
|
|
88
|
+
*,
|
|
89
|
+
require_pod_contract: bool = True,
|
|
90
|
+
) -> dict[str, Any]:
|
|
91
|
+
"""Validate a generated dpone Airflow artifact directory.
|
|
92
|
+
|
|
93
|
+
This is intentionally a parse-time safety check for DAG repositories. It
|
|
94
|
+
does not parse manifests, discover sparse paths, mutate Kubernetes specs, or
|
|
95
|
+
make network calls.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
root = Path(artifact_dir)
|
|
99
|
+
entries = _artifact_entries(root=root, require_pod_contract=require_pod_contract)
|
|
100
|
+
blockers = _missing_required_blockers(entries)
|
|
101
|
+
kpo_kwargs: Mapping[str, Any] = {}
|
|
102
|
+
pod_spec: Mapping[str, Any] = {}
|
|
103
|
+
pod_contract: Mapping[str, Any] = {}
|
|
104
|
+
if not blockers:
|
|
105
|
+
kpo_kwargs, pod_spec, pod_contract, parse_blockers = _load_artifacts(
|
|
106
|
+
root=root,
|
|
107
|
+
require_pod_contract=require_pod_contract,
|
|
108
|
+
)
|
|
109
|
+
blockers.extend(parse_blockers)
|
|
110
|
+
if not blockers:
|
|
111
|
+
blockers.extend(
|
|
112
|
+
_contract_blockers(
|
|
113
|
+
root=root,
|
|
114
|
+
kpo_kwargs=kpo_kwargs,
|
|
115
|
+
pod_spec=pod_spec,
|
|
116
|
+
pod_contract=pod_contract,
|
|
117
|
+
require_pod_contract=require_pod_contract,
|
|
118
|
+
)
|
|
119
|
+
)
|
|
120
|
+
report = {
|
|
121
|
+
"kind": "dpone.airflow_runtime_artifacts",
|
|
122
|
+
"artifact_dir": str(root),
|
|
123
|
+
"entries": entries,
|
|
124
|
+
"warnings": [],
|
|
125
|
+
"blockers": blockers,
|
|
126
|
+
}
|
|
127
|
+
if blockers:
|
|
128
|
+
message = "dpone Airflow artifact directory is not ready: " + "; ".join(
|
|
129
|
+
f"{item['code']} at {item['path']}" for item in blockers
|
|
130
|
+
)
|
|
131
|
+
raise DponeAirflowContractError(message, blockers=tuple(blockers))
|
|
132
|
+
return report
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _load_artifacts(
|
|
136
|
+
*,
|
|
137
|
+
root: Path,
|
|
138
|
+
require_pod_contract: bool,
|
|
139
|
+
) -> tuple[Mapping[str, Any], Mapping[str, Any], Mapping[str, Any], list[dict[str, str]]]:
|
|
140
|
+
blockers: list[dict[str, str]] = []
|
|
141
|
+
kpo_kwargs = _load_json_mapping_or_blocker(root / KPO_KWARGS_FILE, kind="kpo_kwargs", blockers=blockers)
|
|
142
|
+
pod_spec = _load_yaml_mapping_or_blocker(root / POD_SPEC_FILE, blockers=blockers)
|
|
143
|
+
pod_contract: Mapping[str, Any] = {}
|
|
144
|
+
if require_pod_contract:
|
|
145
|
+
pod_contract = _load_json_mapping_or_blocker(root / POD_CONTRACT_FILE, kind="pod_contract", blockers=blockers)
|
|
146
|
+
return kpo_kwargs, pod_spec, pod_contract, blockers
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _load_json_mapping(path: Path, *, kind: str) -> dict[str, Any]:
|
|
150
|
+
try:
|
|
151
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
152
|
+
except FileNotFoundError as exc:
|
|
153
|
+
raise DponeAirflowContractError(
|
|
154
|
+
f"Required dpone Airflow artifact is missing: {path}",
|
|
155
|
+
blockers=(_blocker(f"{kind}_missing", str(path), "Required artifact does not exist"),),
|
|
156
|
+
) from exc
|
|
157
|
+
except json.JSONDecodeError as exc:
|
|
158
|
+
raise DponeAirflowContractError(
|
|
159
|
+
f"Required dpone Airflow artifact is not valid JSON: {path}",
|
|
160
|
+
blockers=(_blocker(f"{kind}_json_invalid", str(path), exc.msg),),
|
|
161
|
+
) from exc
|
|
162
|
+
if not isinstance(payload, dict):
|
|
163
|
+
raise DponeAirflowContractError(
|
|
164
|
+
f"Required dpone Airflow artifact must be a JSON object: {path}",
|
|
165
|
+
blockers=(_blocker(f"{kind}_json_invalid", str(path), "JSON artifact must be an object"),),
|
|
166
|
+
)
|
|
167
|
+
return payload
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _load_json_mapping_or_blocker(
|
|
171
|
+
path: Path,
|
|
172
|
+
*,
|
|
173
|
+
kind: str,
|
|
174
|
+
blockers: list[dict[str, str]],
|
|
175
|
+
) -> Mapping[str, Any]:
|
|
176
|
+
try:
|
|
177
|
+
return _load_json_mapping(path, kind=kind)
|
|
178
|
+
except DponeAirflowContractError as exc:
|
|
179
|
+
blockers.extend(exc.blockers)
|
|
180
|
+
return {}
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _load_yaml_mapping_or_blocker(path: Path, *, blockers: list[dict[str, str]]) -> Mapping[str, Any]:
|
|
184
|
+
try:
|
|
185
|
+
import yaml
|
|
186
|
+
|
|
187
|
+
payload = yaml.safe_load(path.read_text(encoding="utf-8"))
|
|
188
|
+
except FileNotFoundError:
|
|
189
|
+
blockers.append(_blocker("pod_spec_missing", str(path), "Required pod spec artifact does not exist"))
|
|
190
|
+
return {}
|
|
191
|
+
except Exception as exc: # noqa: BLE001 - parser differences should become contract blockers
|
|
192
|
+
blockers.append(_blocker("pod_spec_yaml_invalid", str(path), f"Pod spec YAML could not be parsed: {exc}"))
|
|
193
|
+
return {}
|
|
194
|
+
if not isinstance(payload, Mapping):
|
|
195
|
+
blockers.append(_blocker("pod_spec_yaml_invalid", str(path), "Pod spec YAML must be an object"))
|
|
196
|
+
return {}
|
|
197
|
+
return payload
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _artifact_entries(*, root: Path, require_pod_contract: bool) -> list[dict[str, Any]]:
|
|
201
|
+
entries = [
|
|
202
|
+
_entry(root / KPO_KWARGS_FILE, kind="kpo_kwargs", required=True),
|
|
203
|
+
_entry(root / POD_SPEC_FILE, kind="pod_spec", required=True),
|
|
204
|
+
]
|
|
205
|
+
entries.append(_entry(root / POD_CONTRACT_FILE, kind="pod_contract", required=require_pod_contract))
|
|
206
|
+
return entries
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _entry(path: Path, *, kind: str, required: bool) -> dict[str, Any]:
|
|
210
|
+
return {
|
|
211
|
+
"path": str(path),
|
|
212
|
+
"kind": kind,
|
|
213
|
+
"required": required,
|
|
214
|
+
"exists": path.exists(),
|
|
215
|
+
"reason": "dpone Airflow runtime artifact",
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _missing_required_blockers(entries: list[dict[str, Any]]) -> list[dict[str, str]]:
|
|
220
|
+
return [
|
|
221
|
+
_blocker(f"{entry['kind']}_missing", str(entry["path"]), "Required artifact does not exist")
|
|
222
|
+
for entry in entries
|
|
223
|
+
if entry["required"] and not entry["exists"]
|
|
224
|
+
]
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _contract_blockers(
|
|
228
|
+
*,
|
|
229
|
+
root: Path,
|
|
230
|
+
kpo_kwargs: Mapping[str, Any],
|
|
231
|
+
pod_spec: Mapping[str, Any],
|
|
232
|
+
pod_contract: Mapping[str, Any],
|
|
233
|
+
require_pod_contract: bool,
|
|
234
|
+
) -> list[dict[str, str]]:
|
|
235
|
+
blockers: list[dict[str, str]] = []
|
|
236
|
+
_append_kpo_blockers(root=root, kpo_kwargs=kpo_kwargs, pod_contract=pod_contract, blockers=blockers)
|
|
237
|
+
_append_pod_spec_blockers(root=root, pod_spec=pod_spec, blockers=blockers)
|
|
238
|
+
if require_pod_contract:
|
|
239
|
+
_append_pod_contract_blockers(root=root, pod_contract=pod_contract, blockers=blockers)
|
|
240
|
+
return blockers
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _append_kpo_blockers(
|
|
244
|
+
*,
|
|
245
|
+
root: Path,
|
|
246
|
+
kpo_kwargs: Mapping[str, Any],
|
|
247
|
+
pod_contract: Mapping[str, Any],
|
|
248
|
+
blockers: list[dict[str, str]],
|
|
249
|
+
) -> None:
|
|
250
|
+
path = str(root / KPO_KWARGS_FILE)
|
|
251
|
+
if kpo_kwargs.get("do_xcom_push") is not True:
|
|
252
|
+
blockers.append(_blocker("kpo_xcom_push_required", path, "kpo-kwargs.json must set do_xcom_push=true"))
|
|
253
|
+
if not kpo_kwargs.get("cmds"):
|
|
254
|
+
blockers.append(_blocker("kpo_cmds_required", path, "kpo-kwargs.json must include runtime cmds"))
|
|
255
|
+
if not kpo_kwargs.get("arguments"):
|
|
256
|
+
blockers.append(_blocker("kpo_arguments_required", path, "kpo-kwargs.json must include runtime arguments"))
|
|
257
|
+
pod_template = str(kpo_kwargs.get("pod_template_file") or "").strip()
|
|
258
|
+
expected = str(pod_contract.get("pod_spec_path") or "").strip()
|
|
259
|
+
if expected and pod_template != expected:
|
|
260
|
+
blockers.append(
|
|
261
|
+
_blocker("kpo_pod_template_mismatch", path, "kpo-kwargs.json pod_template_file must match pod-contract")
|
|
262
|
+
)
|
|
263
|
+
if not expected and not pod_template.endswith(POD_SPEC_FILE):
|
|
264
|
+
blockers.append(_blocker("kpo_pod_template_required", path, "kpo-kwargs.json must reference pod-spec.yaml"))
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _append_pod_spec_blockers(
|
|
268
|
+
*,
|
|
269
|
+
root: Path,
|
|
270
|
+
pod_spec: Mapping[str, Any],
|
|
271
|
+
blockers: list[dict[str, str]],
|
|
272
|
+
) -> None:
|
|
273
|
+
path = str(root / POD_SPEC_FILE)
|
|
274
|
+
if pod_spec.get("kind") != "Pod":
|
|
275
|
+
blockers.append(_blocker("pod_spec_kind_invalid", path, "pod-spec.yaml kind must be Pod"))
|
|
276
|
+
containers = _mapping(pod_spec.get("spec")).get("containers")
|
|
277
|
+
first_container = containers[0] if isinstance(containers, list) and containers else {}
|
|
278
|
+
if _mapping(first_container).get("name") != "base":
|
|
279
|
+
blockers.append(_blocker("pod_spec_base_container_missing", path, "pod-spec.yaml first container must be base"))
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _append_pod_contract_blockers(
|
|
283
|
+
*,
|
|
284
|
+
root: Path,
|
|
285
|
+
pod_contract: Mapping[str, Any],
|
|
286
|
+
blockers: list[dict[str, str]],
|
|
287
|
+
) -> None:
|
|
288
|
+
path = str(root / POD_CONTRACT_FILE)
|
|
289
|
+
if pod_contract.get("kind") != "gitops.airflow_pod_contract":
|
|
290
|
+
blockers.append(
|
|
291
|
+
_blocker("pod_contract_kind_invalid", path, "pod-contract.json kind must be gitops.airflow_pod_contract")
|
|
292
|
+
)
|
|
293
|
+
xcom = _mapping(pod_contract.get("xcom"))
|
|
294
|
+
if xcom.get("return_path") != "/airflow/xcom/return.json" or xcom.get("enabled") is not True:
|
|
295
|
+
blockers.append(_blocker("pod_contract_xcom_invalid", path, "pod contract must enable final XCom return.json"))
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _operator_overrides(overrides: Mapping[str, Any] | None) -> dict[str, Any]:
|
|
299
|
+
resolved = dict(overrides or {})
|
|
300
|
+
forbidden = sorted(_CONTRACT_OWNED_KPO_FIELDS.intersection(resolved))
|
|
301
|
+
if forbidden:
|
|
302
|
+
fields = ", ".join(forbidden)
|
|
303
|
+
raise ValueError(f"operator_overrides cannot replace dpone contract-owned KPO fields: {fields}")
|
|
304
|
+
return resolved
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _merge_mapping(base: object, extra: Mapping[str, Any] | None) -> dict[str, Any]:
|
|
308
|
+
merged = dict(base) if isinstance(base, Mapping) else {}
|
|
309
|
+
merged.update(dict(extra or {}))
|
|
310
|
+
return merged
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _mapping(value: object) -> Mapping[str, Any]:
|
|
314
|
+
return value if isinstance(value, Mapping) else {}
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def _blocker(code: str, path: str, message: str) -> dict[str, str]:
|
|
318
|
+
return {"code": code, "path": path, "message": message, "source": "dpone.airflow.runtime_adapter"}
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
__all__ = [
|
|
322
|
+
"DEFAULT_ARTIFACT_DIR",
|
|
323
|
+
"DponeAirflowArtifactError",
|
|
324
|
+
"DponeAirflowContractError",
|
|
325
|
+
"build_dpone_gitops_task_from_artifacts",
|
|
326
|
+
"load_dpone_kpo_kwargs",
|
|
327
|
+
"validate_dpone_artifacts",
|
|
328
|
+
]
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Build visible Airflow KPO tasks from dpone GitOps run-spec steps."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from collections.abc import Mapping
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from dpone_airflow_pack.runtime_adapter import DEFAULT_ARTIFACT_DIR, load_dpone_kpo_kwargs
|
|
11
|
+
|
|
12
|
+
RUN_SPEC_FILE = "run-spec.json"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def build_dpone_gitops_step_tasks_from_artifacts(
|
|
16
|
+
*,
|
|
17
|
+
dag: Any,
|
|
18
|
+
artifact_dir: str | Path = DEFAULT_ARTIFACT_DIR,
|
|
19
|
+
labels: Mapping[str, Any] | None = None,
|
|
20
|
+
annotations: Mapping[str, Any] | None = None,
|
|
21
|
+
operator_overrides: Mapping[str, Any] | None = None,
|
|
22
|
+
) -> dict[str, Any]:
|
|
23
|
+
"""Build one visible KubernetesPodOperator per run-spec step."""
|
|
24
|
+
|
|
25
|
+
from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator
|
|
26
|
+
|
|
27
|
+
root = Path(artifact_dir)
|
|
28
|
+
steps = _load_run_spec_steps(root)
|
|
29
|
+
tasks: dict[str, Any] = {}
|
|
30
|
+
for step in steps:
|
|
31
|
+
name = str(step.get("name") or "").strip()
|
|
32
|
+
command = str(step.get("command") or "").strip()
|
|
33
|
+
if not name or not command:
|
|
34
|
+
continue
|
|
35
|
+
kwargs = _load_step_kpo_kwargs(
|
|
36
|
+
root,
|
|
37
|
+
step_name=name,
|
|
38
|
+
command=command,
|
|
39
|
+
do_xcom_push=str(step.get("kind") or "") == "dpone_run",
|
|
40
|
+
labels=labels,
|
|
41
|
+
annotations=annotations,
|
|
42
|
+
operator_overrides=operator_overrides,
|
|
43
|
+
)
|
|
44
|
+
tasks[name] = KubernetesPodOperator(dag=dag, **kwargs)
|
|
45
|
+
_wire_step_dependencies(tasks=tasks, steps=steps)
|
|
46
|
+
return tasks
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _load_run_spec_steps(root: Path) -> list[Mapping[str, Any]]:
|
|
50
|
+
payload = json.loads((root / RUN_SPEC_FILE).read_text(encoding="utf-8"))
|
|
51
|
+
steps = payload.get("steps")
|
|
52
|
+
return [step for step in steps if isinstance(step, Mapping)] if isinstance(steps, list) else []
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _load_step_kpo_kwargs(
|
|
56
|
+
root: Path,
|
|
57
|
+
*,
|
|
58
|
+
step_name: str,
|
|
59
|
+
command: str,
|
|
60
|
+
do_xcom_push: bool,
|
|
61
|
+
labels: Mapping[str, Any] | None,
|
|
62
|
+
annotations: Mapping[str, Any] | None,
|
|
63
|
+
operator_overrides: Mapping[str, Any] | None,
|
|
64
|
+
) -> dict[str, Any]:
|
|
65
|
+
kwargs = load_dpone_kpo_kwargs(
|
|
66
|
+
root,
|
|
67
|
+
task_id=step_name,
|
|
68
|
+
name=step_name,
|
|
69
|
+
labels=labels,
|
|
70
|
+
annotations=annotations,
|
|
71
|
+
operator_overrides=operator_overrides,
|
|
72
|
+
)
|
|
73
|
+
kwargs["cmds"] = ["/bin/sh", "-ec"]
|
|
74
|
+
kwargs["arguments"] = [command]
|
|
75
|
+
kwargs["do_xcom_push"] = do_xcom_push
|
|
76
|
+
return kwargs
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _wire_step_dependencies(*, tasks: Mapping[str, Any], steps: list[Mapping[str, Any]]) -> None:
|
|
80
|
+
for step in steps:
|
|
81
|
+
task = tasks.get(str(step.get("name") or ""))
|
|
82
|
+
dependencies = step.get("depends_on")
|
|
83
|
+
if task is None or not isinstance(dependencies, list):
|
|
84
|
+
continue
|
|
85
|
+
for dependency in dependencies:
|
|
86
|
+
upstream = tasks.get(str(dependency))
|
|
87
|
+
if upstream is not None:
|
|
88
|
+
_chain(upstream, task)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _chain(upstream: Any, downstream: Any) -> None:
|
|
92
|
+
try:
|
|
93
|
+
upstream >> downstream
|
|
94
|
+
except TypeError:
|
|
95
|
+
return None
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
DEFAULT_XCOM_SIDECAR_IMAGE = "alpine:3.23.4"
|
|
7
|
+
XCOM_SIDECAR_CONTAINER_NAME = "airflow-xcom-sidecar"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class XComSidecarRuntimeConfig:
|
|
12
|
+
image: str = DEFAULT_XCOM_SIDECAR_IMAGE
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def pin_xcom_sidecar_image(pod: Any, image: str = DEFAULT_XCOM_SIDECAR_IMAGE) -> Any:
|
|
16
|
+
containers = getattr(getattr(pod, "spec", None), "containers", None) or []
|
|
17
|
+
for container in containers:
|
|
18
|
+
if getattr(container, "name", None) == XCOM_SIDECAR_CONTAINER_NAME:
|
|
19
|
+
container.image = image
|
|
20
|
+
break
|
|
21
|
+
return pod
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"DEFAULT_XCOM_SIDECAR_IMAGE",
|
|
26
|
+
"XCOM_SIDECAR_CONTAINER_NAME",
|
|
27
|
+
"XComSidecarRuntimeConfig",
|
|
28
|
+
"pin_xcom_sidecar_image",
|
|
29
|
+
]
|