apache-airflow-providers-standard 1.9.2rc1__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/standard/__init__.py +3 -3
- airflow/providers/standard/decorators/bash.py +1 -2
- airflow/providers/standard/example_dags/example_bash_decorator.py +1 -1
- airflow/providers/standard/example_dags/example_python_decorator.py +17 -0
- airflow/providers/standard/example_dags/example_python_operator.py +18 -0
- airflow/providers/standard/exceptions.py +1 -1
- airflow/providers/standard/get_provider_info.py +1 -0
- airflow/providers/standard/operators/bash.py +7 -3
- airflow/providers/standard/operators/datetime.py +1 -2
- airflow/providers/standard/operators/hitl.py +9 -2
- airflow/providers/standard/operators/latest_only.py +17 -8
- airflow/providers/standard/operators/python.py +93 -8
- airflow/providers/standard/operators/trigger_dagrun.py +86 -28
- airflow/providers/standard/sensors/bash.py +1 -2
- airflow/providers/standard/sensors/date_time.py +1 -16
- airflow/providers/standard/sensors/external_task.py +28 -7
- airflow/providers/standard/sensors/filesystem.py +2 -19
- airflow/providers/standard/sensors/time.py +2 -18
- airflow/providers/standard/sensors/time_delta.py +7 -6
- airflow/providers/standard/triggers/external_task.py +11 -8
- airflow/providers/standard/triggers/hitl.py +2 -2
- airflow/providers/standard/utils/openlineage.py +185 -0
- airflow/providers/standard/utils/python_virtualenv.py +4 -3
- airflow/providers/standard/utils/python_virtualenv_script.jinja2 +18 -3
- airflow/providers/standard/utils/skipmixin.py +2 -2
- {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/METADATA +22 -10
- {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/RECORD +31 -30
- {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/licenses/NOTICE +1 -1
- {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/entry_points.txt +0 -0
- {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -23,10 +23,13 @@ import warnings
|
|
|
23
23
|
from collections.abc import Callable, Collection, Iterable, Sequence
|
|
24
24
|
from typing import TYPE_CHECKING, ClassVar
|
|
25
25
|
|
|
26
|
-
from airflow.configuration import conf
|
|
27
|
-
from airflow.exceptions import AirflowSkipException
|
|
28
26
|
from airflow.models.dag import DagModel
|
|
29
|
-
from airflow.providers.common.compat.sdk import
|
|
27
|
+
from airflow.providers.common.compat.sdk import (
|
|
28
|
+
AirflowSkipException,
|
|
29
|
+
BaseOperatorLink,
|
|
30
|
+
BaseSensorOperator,
|
|
31
|
+
conf,
|
|
32
|
+
)
|
|
30
33
|
from airflow.providers.standard.exceptions import (
|
|
31
34
|
DuplicateStateError,
|
|
32
35
|
ExternalDagDeletedError,
|
|
@@ -59,8 +62,7 @@ else:
|
|
|
59
62
|
if TYPE_CHECKING:
|
|
60
63
|
from sqlalchemy.orm import Session
|
|
61
64
|
|
|
62
|
-
from airflow.
|
|
63
|
-
from airflow.providers.common.compat.sdk import Context
|
|
65
|
+
from airflow.providers.common.compat.sdk import Context, TaskInstanceKey
|
|
64
66
|
|
|
65
67
|
|
|
66
68
|
class ExternalDagLink(BaseOperatorLink):
|
|
@@ -80,8 +82,17 @@ class ExternalDagLink(BaseOperatorLink):
|
|
|
80
82
|
|
|
81
83
|
if not AIRFLOW_V_3_0_PLUS:
|
|
82
84
|
from airflow.models.renderedtifields import RenderedTaskInstanceFields
|
|
85
|
+
from airflow.models.taskinstancekey import TaskInstanceKey as CoreTaskInstanceKey
|
|
86
|
+
|
|
87
|
+
core_ti_key = CoreTaskInstanceKey(
|
|
88
|
+
dag_id=ti_key.dag_id,
|
|
89
|
+
task_id=ti_key.task_id,
|
|
90
|
+
run_id=ti_key.run_id,
|
|
91
|
+
try_number=ti_key.try_number,
|
|
92
|
+
map_index=ti_key.map_index,
|
|
93
|
+
)
|
|
83
94
|
|
|
84
|
-
if template_fields := RenderedTaskInstanceFields.get_templated_fields(
|
|
95
|
+
if template_fields := RenderedTaskInstanceFields.get_templated_fields(core_ti_key):
|
|
85
96
|
external_dag_id: str = template_fields.get("external_dag_id", operator.external_dag_id) # type: ignore[no-redef]
|
|
86
97
|
|
|
87
98
|
if AIRFLOW_V_3_0_PLUS:
|
|
@@ -251,6 +262,7 @@ class ExternalTaskSensor(BaseSensorOperator):
|
|
|
251
262
|
self._has_checked_existence = False
|
|
252
263
|
self.deferrable = deferrable
|
|
253
264
|
self.poll_interval = poll_interval
|
|
265
|
+
self.external_dates_filter: str | None = None
|
|
254
266
|
|
|
255
267
|
def _get_dttm_filter(self, context: Context) -> Sequence[datetime.datetime]:
|
|
256
268
|
logical_date = self._get_logical_date(context)
|
|
@@ -262,13 +274,19 @@ class ExternalTaskSensor(BaseSensorOperator):
|
|
|
262
274
|
return result if isinstance(result, list) else [result]
|
|
263
275
|
return [logical_date]
|
|
264
276
|
|
|
277
|
+
@staticmethod
|
|
278
|
+
def _serialize_dttm_filter(dttm_filter: Sequence[datetime.datetime]) -> str:
|
|
279
|
+
return ",".join(dt.isoformat() for dt in dttm_filter)
|
|
280
|
+
|
|
265
281
|
def poke(self, context: Context) -> bool:
|
|
266
282
|
# delay check to poke rather than __init__ in case it was supplied as XComArgs
|
|
267
283
|
if self.external_task_ids and len(self.external_task_ids) > len(set(self.external_task_ids)):
|
|
268
284
|
raise ValueError("Duplicate task_ids passed in external_task_ids parameter")
|
|
269
285
|
|
|
270
286
|
dttm_filter = self._get_dttm_filter(context)
|
|
271
|
-
serialized_dttm_filter =
|
|
287
|
+
serialized_dttm_filter = self._serialize_dttm_filter(dttm_filter)
|
|
288
|
+
# Save as attribute - to be used by listeners
|
|
289
|
+
self.external_dates_filter = serialized_dttm_filter
|
|
272
290
|
|
|
273
291
|
if self.external_task_ids:
|
|
274
292
|
self.log.info(
|
|
@@ -457,6 +475,9 @@ class ExternalTaskSensor(BaseSensorOperator):
|
|
|
457
475
|
if event is None:
|
|
458
476
|
raise ExternalTaskNotFoundError("No event received from trigger")
|
|
459
477
|
|
|
478
|
+
# Re-set as attribute after coming back from deferral - to be used by listeners
|
|
479
|
+
self.external_dates_filter = self._serialize_dttm_filter(self._get_dttm_filter(context))
|
|
480
|
+
|
|
460
481
|
if event["status"] == "success":
|
|
461
482
|
self.log.info("External tasks %s has executed successfully.", self.external_task_ids)
|
|
462
483
|
elif event["status"] == "skipped":
|
|
@@ -20,31 +20,14 @@ from __future__ import annotations
|
|
|
20
20
|
import datetime
|
|
21
21
|
import os
|
|
22
22
|
from collections.abc import Sequence
|
|
23
|
-
from dataclasses import dataclass
|
|
24
23
|
from functools import cached_property
|
|
25
24
|
from glob import glob
|
|
26
25
|
from typing import TYPE_CHECKING, Any
|
|
27
26
|
|
|
28
|
-
from airflow.
|
|
29
|
-
from airflow.exceptions import AirflowException
|
|
30
|
-
from airflow.providers.common.compat.sdk import BaseSensorOperator
|
|
27
|
+
from airflow.providers.common.compat.sdk import AirflowException, BaseSensorOperator, conf
|
|
31
28
|
from airflow.providers.standard.hooks.filesystem import FSHook
|
|
32
29
|
from airflow.providers.standard.triggers.file import FileTrigger
|
|
33
|
-
|
|
34
|
-
try:
|
|
35
|
-
from airflow.triggers.base import StartTriggerArgs # type: ignore[no-redef]
|
|
36
|
-
except ImportError: # TODO: Remove this when min airflow version is 2.10.0 for standard provider
|
|
37
|
-
|
|
38
|
-
@dataclass
|
|
39
|
-
class StartTriggerArgs: # type: ignore[no-redef]
|
|
40
|
-
"""Arguments required for start task execution from triggerer."""
|
|
41
|
-
|
|
42
|
-
trigger_cls: str
|
|
43
|
-
next_method: str
|
|
44
|
-
trigger_kwargs: dict[str, Any] | None = None
|
|
45
|
-
next_kwargs: dict[str, Any] | None = None
|
|
46
|
-
timeout: datetime.timedelta | None = None
|
|
47
|
-
|
|
30
|
+
from airflow.triggers.base import StartTriggerArgs
|
|
48
31
|
|
|
49
32
|
if TYPE_CHECKING:
|
|
50
33
|
from airflow.sdk import Context
|
|
@@ -19,28 +19,12 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import datetime
|
|
21
21
|
import warnings
|
|
22
|
-
from dataclasses import dataclass
|
|
23
22
|
from typing import TYPE_CHECKING, Any
|
|
24
23
|
|
|
25
|
-
from airflow.configuration import conf
|
|
26
24
|
from airflow.exceptions import AirflowProviderDeprecationWarning
|
|
27
|
-
from airflow.providers.common.compat.sdk import BaseSensorOperator, timezone
|
|
25
|
+
from airflow.providers.common.compat.sdk import BaseSensorOperator, conf, timezone
|
|
28
26
|
from airflow.providers.standard.triggers.temporal import DateTimeTrigger
|
|
29
|
-
|
|
30
|
-
try:
|
|
31
|
-
from airflow.triggers.base import StartTriggerArgs # type: ignore[no-redef]
|
|
32
|
-
except ImportError: # TODO: Remove this when min airflow version is 2.10.0 for standard provider
|
|
33
|
-
|
|
34
|
-
@dataclass
|
|
35
|
-
class StartTriggerArgs: # type: ignore[no-redef]
|
|
36
|
-
"""Arguments required for start task execution from triggerer."""
|
|
37
|
-
|
|
38
|
-
trigger_cls: str
|
|
39
|
-
next_method: str
|
|
40
|
-
trigger_kwargs: dict[str, Any] | None = None
|
|
41
|
-
next_kwargs: dict[str, Any] | None = None
|
|
42
|
-
timeout: datetime.timedelta | None = None
|
|
43
|
-
|
|
27
|
+
from airflow.triggers.base import StartTriggerArgs
|
|
44
28
|
|
|
45
29
|
if TYPE_CHECKING:
|
|
46
30
|
from airflow.sdk import Context
|
|
@@ -25,9 +25,8 @@ from typing import TYPE_CHECKING, Any
|
|
|
25
25
|
from deprecated.classic import deprecated
|
|
26
26
|
from packaging.version import Version
|
|
27
27
|
|
|
28
|
-
from airflow.
|
|
29
|
-
from airflow.
|
|
30
|
-
from airflow.providers.common.compat.sdk import BaseSensorOperator, timezone
|
|
28
|
+
from airflow.exceptions import AirflowProviderDeprecationWarning
|
|
29
|
+
from airflow.providers.common.compat.sdk import AirflowSkipException, BaseSensorOperator, conf, timezone
|
|
31
30
|
from airflow.providers.standard.triggers.temporal import DateTimeTrigger, TimeDeltaTrigger
|
|
32
31
|
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS
|
|
33
32
|
|
|
@@ -194,9 +193,11 @@ class WaitSensor(BaseSensorOperator):
|
|
|
194
193
|
def execute(self, context: Context) -> None:
|
|
195
194
|
if self.deferrable:
|
|
196
195
|
self.defer(
|
|
197
|
-
trigger=
|
|
198
|
-
|
|
199
|
-
|
|
196
|
+
trigger=(
|
|
197
|
+
TimeDeltaTrigger(self.time_to_wait, end_from_trigger=True)
|
|
198
|
+
if AIRFLOW_V_3_0_PLUS
|
|
199
|
+
else TimeDeltaTrigger(self.time_to_wait)
|
|
200
|
+
),
|
|
200
201
|
method_name="execute_complete",
|
|
201
202
|
)
|
|
202
203
|
else:
|
|
@@ -226,23 +226,26 @@ class DagStateTrigger(BaseTrigger):
|
|
|
226
226
|
elif self.execution_dates:
|
|
227
227
|
runs_ids_or_dates = len(self.execution_dates)
|
|
228
228
|
|
|
229
|
+
cls_path, data = self.serialize()
|
|
230
|
+
|
|
229
231
|
if AIRFLOW_V_3_0_PLUS:
|
|
230
|
-
data
|
|
231
|
-
|
|
232
|
+
data.update( # update with {run_id: run_state} dict
|
|
233
|
+
await self.validate_count_dags_af_3(runs_ids_or_dates_len=runs_ids_or_dates)
|
|
234
|
+
)
|
|
235
|
+
yield TriggerEvent((cls_path, data))
|
|
232
236
|
return
|
|
233
237
|
else:
|
|
234
238
|
while True:
|
|
235
239
|
num_dags = await self.count_dags()
|
|
236
240
|
if num_dags == runs_ids_or_dates:
|
|
237
|
-
yield TriggerEvent(
|
|
241
|
+
yield TriggerEvent((cls_path, data))
|
|
238
242
|
return
|
|
239
243
|
await asyncio.sleep(self.poll_interval)
|
|
240
244
|
|
|
241
|
-
async def validate_count_dags_af_3(self, runs_ids_or_dates_len: int = 0) -> dict[str,
|
|
245
|
+
async def validate_count_dags_af_3(self, runs_ids_or_dates_len: int = 0) -> dict[str, str]:
|
|
242
246
|
from airflow.sdk.execution_time.task_runner import RuntimeTaskInstance
|
|
243
247
|
|
|
244
|
-
|
|
245
|
-
|
|
248
|
+
run_states: dict[str, str] = {} # {run_id: run_state}
|
|
246
249
|
while True:
|
|
247
250
|
num_dags = await sync_to_async(RuntimeTaskInstance.get_dr_count)(
|
|
248
251
|
dag_id=self.dag_id,
|
|
@@ -257,8 +260,8 @@ class DagStateTrigger(BaseTrigger):
|
|
|
257
260
|
dag_id=self.dag_id,
|
|
258
261
|
run_id=run_id,
|
|
259
262
|
)
|
|
260
|
-
|
|
261
|
-
return
|
|
263
|
+
run_states[run_id] = state
|
|
264
|
+
return run_states
|
|
262
265
|
await asyncio.sleep(self.poll_interval)
|
|
263
266
|
|
|
264
267
|
if not AIRFLOW_V_3_0_PLUS:
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
-
from airflow.
|
|
19
|
+
from airflow.providers.common.compat.sdk import AirflowOptionalProviderFeatureException
|
|
20
20
|
from airflow.providers.standard.version_compat import AIRFLOW_V_3_1_PLUS
|
|
21
21
|
|
|
22
22
|
if not AIRFLOW_V_3_1_PLUS:
|
|
@@ -30,7 +30,7 @@ from uuid import UUID
|
|
|
30
30
|
|
|
31
31
|
from asgiref.sync import sync_to_async
|
|
32
32
|
|
|
33
|
-
from airflow.
|
|
33
|
+
from airflow.providers.common.compat.sdk import ParamValidationError
|
|
34
34
|
from airflow.sdk import Param
|
|
35
35
|
from airflow.sdk.definitions.param import ParamsDict
|
|
36
36
|
from airflow.sdk.execution_time.hitl import (
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import logging
|
|
20
|
+
from typing import TYPE_CHECKING
|
|
21
|
+
|
|
22
|
+
from airflow.providers.common.compat.openlineage.check import require_openlineage_version
|
|
23
|
+
from airflow.providers.common.compat.sdk import AirflowOptionalProviderFeatureException
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from airflow.models import TaskInstance
|
|
27
|
+
from airflow.sdk.types import RuntimeTaskInstanceProtocol as RuntimeTI
|
|
28
|
+
|
|
29
|
+
log = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
OPENLINEAGE_PROVIDER_MIN_VERSION = "2.8.0"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _is_openlineage_provider_accessible() -> bool:
|
|
35
|
+
"""
|
|
36
|
+
Check if the OpenLineage provider is accessible.
|
|
37
|
+
|
|
38
|
+
This function attempts to import the necessary OpenLineage modules and checks if the provider
|
|
39
|
+
is enabled and the listener is available.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
bool: True if the OpenLineage provider is accessible, False otherwise.
|
|
43
|
+
"""
|
|
44
|
+
try:
|
|
45
|
+
from airflow.providers.openlineage.conf import is_disabled
|
|
46
|
+
from airflow.providers.openlineage.plugins.listener import get_openlineage_listener
|
|
47
|
+
except (ImportError, AttributeError):
|
|
48
|
+
log.debug("OpenLineage provider could not be imported.")
|
|
49
|
+
return False
|
|
50
|
+
|
|
51
|
+
if is_disabled():
|
|
52
|
+
log.debug("OpenLineage provider is disabled.")
|
|
53
|
+
return False
|
|
54
|
+
|
|
55
|
+
if not get_openlineage_listener():
|
|
56
|
+
log.debug("OpenLineage listener could not be found.")
|
|
57
|
+
return False
|
|
58
|
+
|
|
59
|
+
return True
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@require_openlineage_version(provider_min_version=OPENLINEAGE_PROVIDER_MIN_VERSION)
|
|
63
|
+
def _get_openlineage_parent_info(ti: TaskInstance | RuntimeTI) -> dict[str, str]:
|
|
64
|
+
"""Get OpenLineage metadata about the parent task."""
|
|
65
|
+
from airflow.providers.openlineage.plugins.macros import (
|
|
66
|
+
lineage_job_name,
|
|
67
|
+
lineage_job_namespace,
|
|
68
|
+
lineage_root_job_name,
|
|
69
|
+
lineage_root_job_namespace,
|
|
70
|
+
lineage_root_run_id,
|
|
71
|
+
lineage_run_id,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
return {
|
|
75
|
+
"parentRunId": lineage_run_id(ti),
|
|
76
|
+
"parentJobName": lineage_job_name(ti),
|
|
77
|
+
"parentJobNamespace": lineage_job_namespace(),
|
|
78
|
+
"rootParentRunId": lineage_root_run_id(ti),
|
|
79
|
+
"rootParentJobName": lineage_root_job_name(ti),
|
|
80
|
+
"rootParentJobNamespace": lineage_root_job_namespace(ti),
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _inject_openlineage_parent_info_to_dagrun_conf(
|
|
85
|
+
dr_conf: dict | None, ol_parent_info: dict[str, str]
|
|
86
|
+
) -> dict:
|
|
87
|
+
"""
|
|
88
|
+
Safely inject OpenLineage parent and root run metadata into a DAG run configuration.
|
|
89
|
+
|
|
90
|
+
This function adds parent and root job/run identifiers derived from the given TaskInstance into the
|
|
91
|
+
`openlineage` section of the DAG run configuration. If an `openlineage` key already exists, it is
|
|
92
|
+
preserved and extended, but no existing parent or root identifiers are overwritten.
|
|
93
|
+
|
|
94
|
+
The function performs several safety checks:
|
|
95
|
+
- If conf is not a dictionary or contains a non-dict `openlineage` section, conf is returned unmodified.
|
|
96
|
+
- If `openlineage` section contains any parent/root lineage identifiers, conf is returned unmodified.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
dr_conf: The original DAG run configuration dictionary or None.
|
|
100
|
+
ol_parent_info: OpenLineage metadata about the parent task
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
A modified DAG run conf with injected OpenLineage parent and root metadata,
|
|
104
|
+
or the original conf if injection is not possible.
|
|
105
|
+
"""
|
|
106
|
+
current_ol_dr_conf = {}
|
|
107
|
+
if isinstance(dr_conf, dict) and dr_conf.get("openlineage"):
|
|
108
|
+
current_ol_dr_conf = dr_conf["openlineage"]
|
|
109
|
+
if not isinstance(current_ol_dr_conf, dict):
|
|
110
|
+
log.warning(
|
|
111
|
+
"Existing 'openlineage' section of DagRun conf is not a dictionary; "
|
|
112
|
+
"skipping injection of parent metadata."
|
|
113
|
+
)
|
|
114
|
+
return dr_conf
|
|
115
|
+
forbidden_keys = (
|
|
116
|
+
"parentRunId",
|
|
117
|
+
"parentJobName",
|
|
118
|
+
"parentJobNamespace",
|
|
119
|
+
"rootParentRunId",
|
|
120
|
+
"rootJobName",
|
|
121
|
+
"rootJobNamespace",
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
if existing := [k for k in forbidden_keys if k in current_ol_dr_conf]:
|
|
125
|
+
log.warning(
|
|
126
|
+
"'openlineage' section of DagRun conf already contains parent or root "
|
|
127
|
+
"identifiers: `%s`; skipping injection to avoid overwriting existing values.",
|
|
128
|
+
", ".join(existing),
|
|
129
|
+
)
|
|
130
|
+
return dr_conf
|
|
131
|
+
|
|
132
|
+
return {**(dr_conf or {}), **{"openlineage": {**ol_parent_info, **current_ol_dr_conf}}}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def safe_inject_openlineage_properties_into_dagrun_conf(
|
|
136
|
+
dr_conf: dict | None, ti: TaskInstance | RuntimeTI | None
|
|
137
|
+
) -> dict | None:
|
|
138
|
+
"""
|
|
139
|
+
Safely inject OpenLineage parent task metadata into a DAG run conf.
|
|
140
|
+
|
|
141
|
+
This function checks whether the OpenLineage provider is accessible and supports parent information
|
|
142
|
+
injection. If so, it enriches the DAG run conf with OpenLineage metadata about the parent task
|
|
143
|
+
to improve lineage tracking. The function does not modify other conf fields, will not overwrite
|
|
144
|
+
any existing content, and safely returns the original configuration if OpenLineage is unavailable,
|
|
145
|
+
unsupported, or an error occurs during injection.
|
|
146
|
+
|
|
147
|
+
:param dr_conf: The original DAG run configuration dictionary.
|
|
148
|
+
:param ti: The TaskInstance whose metadata may be injected.
|
|
149
|
+
|
|
150
|
+
:return: A potentially enriched DAG run conf with OpenLineage parent information,
|
|
151
|
+
or the original conf if injection was skipped or failed.
|
|
152
|
+
"""
|
|
153
|
+
try:
|
|
154
|
+
if ti is None:
|
|
155
|
+
log.debug("Task instance not provided - dagrun conf not modified.")
|
|
156
|
+
return dr_conf
|
|
157
|
+
|
|
158
|
+
if not _is_openlineage_provider_accessible():
|
|
159
|
+
log.debug("OpenLineage provider not accessible - dagrun conf not modified.")
|
|
160
|
+
return dr_conf
|
|
161
|
+
|
|
162
|
+
ol_parent_info = _get_openlineage_parent_info(ti=ti)
|
|
163
|
+
|
|
164
|
+
log.info("Injecting openlineage parent task information into dagrun conf.")
|
|
165
|
+
new_conf = _inject_openlineage_parent_info_to_dagrun_conf(
|
|
166
|
+
dr_conf=dr_conf.copy() if isinstance(dr_conf, dict) else dr_conf,
|
|
167
|
+
ol_parent_info=ol_parent_info,
|
|
168
|
+
)
|
|
169
|
+
return new_conf
|
|
170
|
+
except AirflowOptionalProviderFeatureException:
|
|
171
|
+
log.info(
|
|
172
|
+
"Current OpenLineage provider version doesn't support parent information in "
|
|
173
|
+
"the DagRun conf. Upgrade `apache-airflow-providers-openlineage>=%s` to use this feature. "
|
|
174
|
+
"DagRun conf has not been modified by OpenLineage.",
|
|
175
|
+
OPENLINEAGE_PROVIDER_MIN_VERSION,
|
|
176
|
+
)
|
|
177
|
+
return dr_conf
|
|
178
|
+
except Exception as e:
|
|
179
|
+
log.warning(
|
|
180
|
+
"An error occurred while trying to inject OpenLineage information into dagrun conf. "
|
|
181
|
+
"DagRun conf has not been modified by OpenLineage. Error: %s",
|
|
182
|
+
str(e),
|
|
183
|
+
)
|
|
184
|
+
log.debug("Error details: ", exc_info=e)
|
|
185
|
+
return dr_conf
|
|
@@ -30,7 +30,7 @@ from pathlib import Path
|
|
|
30
30
|
import jinja2
|
|
31
31
|
from jinja2 import select_autoescape
|
|
32
32
|
|
|
33
|
-
from airflow.
|
|
33
|
+
from airflow.providers.common.compat.sdk import conf
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
def _is_uv_installed() -> bool:
|
|
@@ -150,7 +150,7 @@ def _execute_in_subprocess(cmd: list[str], cwd: str | None = None, env: dict[str
|
|
|
150
150
|
stdout=subprocess.PIPE,
|
|
151
151
|
stderr=subprocess.STDOUT,
|
|
152
152
|
bufsize=0,
|
|
153
|
-
close_fds=
|
|
153
|
+
close_fds=False,
|
|
154
154
|
cwd=cwd,
|
|
155
155
|
env=env,
|
|
156
156
|
) as proc:
|
|
@@ -200,9 +200,10 @@ def prepare_virtualenv(
|
|
|
200
200
|
|
|
201
201
|
if _use_uv():
|
|
202
202
|
venv_cmd = _generate_uv_cmd(venv_directory, python_bin, system_site_packages)
|
|
203
|
+
_execute_in_subprocess(venv_cmd, env={**os.environ, **_index_urls_to_uv_env_vars(index_urls)})
|
|
203
204
|
else:
|
|
204
205
|
venv_cmd = _generate_venv_cmd(venv_directory, python_bin, system_site_packages)
|
|
205
|
-
|
|
206
|
+
_execute_in_subprocess(venv_cmd)
|
|
206
207
|
|
|
207
208
|
pip_cmd = None
|
|
208
209
|
if requirements is not None and len(requirements) != 0:
|
|
@@ -40,6 +40,23 @@ if sys.version_info >= (3,6):
|
|
|
40
40
|
pass
|
|
41
41
|
{% endif %}
|
|
42
42
|
|
|
43
|
+
try:
|
|
44
|
+
from airflow.sdk.execution_time import task_runner
|
|
45
|
+
except ModuleNotFoundError:
|
|
46
|
+
pass
|
|
47
|
+
else:
|
|
48
|
+
{#-
|
|
49
|
+
We are in an Airflow 3.x environment, try and set up supervisor comms so
|
|
50
|
+
virtualenv can access Vars/Conn/XCom/etc that normal tasks can
|
|
51
|
+
|
|
52
|
+
We don't use the walrus operator (`:=`) below as it is possible people can
|
|
53
|
+
be using this on pre-3.8 versions of python, and while Airflow doesn't
|
|
54
|
+
support them, it's easy to not break it not using that operator here.
|
|
55
|
+
#}
|
|
56
|
+
reinit_supervisor_comms = getattr(task_runner, "reinit_supervisor_comms", None)
|
|
57
|
+
if reinit_supervisor_comms:
|
|
58
|
+
reinit_supervisor_comms()
|
|
59
|
+
|
|
43
60
|
# Script
|
|
44
61
|
{{ python_callable_source }}
|
|
45
62
|
|
|
@@ -49,12 +66,10 @@ if sys.version_info >= (3,6):
|
|
|
49
66
|
import types
|
|
50
67
|
|
|
51
68
|
{{ modified_dag_module_name }} = types.ModuleType("{{ modified_dag_module_name }}")
|
|
52
|
-
|
|
53
69
|
{{ modified_dag_module_name }}.{{ python_callable }} = {{ python_callable }}
|
|
54
|
-
|
|
55
70
|
sys.modules["{{modified_dag_module_name}}"] = {{modified_dag_module_name}}
|
|
56
71
|
|
|
57
|
-
{
|
|
72
|
+
{%- endif -%}
|
|
58
73
|
|
|
59
74
|
{% if op_args or op_kwargs %}
|
|
60
75
|
with open(sys.argv[1], "rb") as file:
|
|
@@ -21,7 +21,7 @@ from collections.abc import Iterable, Sequence
|
|
|
21
21
|
from types import GeneratorType
|
|
22
22
|
from typing import TYPE_CHECKING
|
|
23
23
|
|
|
24
|
-
from airflow.
|
|
24
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
25
25
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
26
26
|
|
|
27
27
|
if TYPE_CHECKING:
|
|
@@ -63,7 +63,7 @@ class SkipMixin(LoggingMixin):
|
|
|
63
63
|
"""
|
|
64
64
|
# Import is internal for backward compatibility when importing PythonOperator
|
|
65
65
|
# from airflow.providers.common.compat.standard.operators
|
|
66
|
-
from airflow.
|
|
66
|
+
from airflow.providers.common.compat.sdk import DownstreamTasksSkipped
|
|
67
67
|
|
|
68
68
|
# The following could be applied only for non-mapped tasks,
|
|
69
69
|
# as future mapped tasks have not been expanded yet. Such tasks
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-standard
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.11.0
|
|
4
4
|
Summary: Provider package apache-airflow-providers-standard for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,standard,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -22,15 +22,17 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
22
22
|
Classifier: Topic :: System :: Monitoring
|
|
23
23
|
License-File: LICENSE
|
|
24
24
|
License-File: NOTICE
|
|
25
|
-
Requires-Dist: apache-airflow>=2.
|
|
26
|
-
Requires-Dist: apache-airflow-providers-common-compat>=1.
|
|
25
|
+
Requires-Dist: apache-airflow>=2.11.0
|
|
26
|
+
Requires-Dist: apache-airflow-providers-common-compat>=1.13.0
|
|
27
|
+
Requires-Dist: apache-airflow-providers-openlineage ; extra == "openlineage"
|
|
27
28
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
28
|
-
Project-URL: Changelog, https://airflow.
|
|
29
|
-
Project-URL: Documentation, https://airflow.
|
|
29
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-standard/1.11.0/changelog.html
|
|
30
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-standard/1.11.0
|
|
30
31
|
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
31
32
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
32
33
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
33
34
|
Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
35
|
+
Provides-Extra: openlineage
|
|
34
36
|
|
|
35
37
|
|
|
36
38
|
.. Licensed to the Apache Software Foundation (ASF) under one
|
|
@@ -57,7 +59,7 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
|
57
59
|
|
|
58
60
|
Package ``apache-airflow-providers-standard``
|
|
59
61
|
|
|
60
|
-
Release: ``1.
|
|
62
|
+
Release: ``1.11.0``
|
|
61
63
|
|
|
62
64
|
|
|
63
65
|
Airflow Standard Provider
|
|
@@ -70,7 +72,7 @@ This is a provider package for ``standard`` provider. All classes for this provi
|
|
|
70
72
|
are in ``airflow.providers.standard`` python package.
|
|
71
73
|
|
|
72
74
|
You can find package information and changelog for the provider
|
|
73
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-standard/1.
|
|
75
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-standard/1.11.0/>`_.
|
|
74
76
|
|
|
75
77
|
Installation
|
|
76
78
|
------------
|
|
@@ -87,8 +89,8 @@ Requirements
|
|
|
87
89
|
========================================== ==================
|
|
88
90
|
PIP package Version required
|
|
89
91
|
========================================== ==================
|
|
90
|
-
``apache-airflow`` ``>=2.
|
|
91
|
-
``apache-airflow-providers-common-compat`` ``>=1.
|
|
92
|
+
``apache-airflow`` ``>=2.11.0``
|
|
93
|
+
``apache-airflow-providers-common-compat`` ``>=1.13.0``
|
|
92
94
|
========================================== ==================
|
|
93
95
|
|
|
94
96
|
Cross provider package dependencies
|
|
@@ -108,8 +110,18 @@ You can install such cross-provider dependencies when installing from PyPI. For
|
|
|
108
110
|
Dependent package Extra
|
|
109
111
|
================================================================================================================== =================
|
|
110
112
|
`apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
|
|
113
|
+
`apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
|
|
111
114
|
================================================================================================================== =================
|
|
112
115
|
|
|
116
|
+
Optional dependencies
|
|
117
|
+
----------------------
|
|
118
|
+
|
|
119
|
+
=============== ========================================
|
|
120
|
+
Extra Dependencies
|
|
121
|
+
=============== ========================================
|
|
122
|
+
``openlineage`` ``apache-airflow-providers-openlineage``
|
|
123
|
+
=============== ========================================
|
|
124
|
+
|
|
113
125
|
The changelog for the provider package can be found in the
|
|
114
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-standard/1.
|
|
126
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-standard/1.11.0/changelog.html>`_.
|
|
115
127
|
|