apache-airflow-providers-standard 1.9.2rc1__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/standard/__init__.py +3 -3
- airflow/providers/standard/decorators/bash.py +1 -2
- airflow/providers/standard/example_dags/example_bash_decorator.py +1 -1
- airflow/providers/standard/example_dags/example_python_decorator.py +17 -0
- airflow/providers/standard/example_dags/example_python_operator.py +18 -0
- airflow/providers/standard/exceptions.py +1 -1
- airflow/providers/standard/get_provider_info.py +1 -0
- airflow/providers/standard/operators/bash.py +7 -3
- airflow/providers/standard/operators/datetime.py +1 -2
- airflow/providers/standard/operators/hitl.py +9 -2
- airflow/providers/standard/operators/latest_only.py +17 -8
- airflow/providers/standard/operators/python.py +93 -8
- airflow/providers/standard/operators/trigger_dagrun.py +86 -28
- airflow/providers/standard/sensors/bash.py +1 -2
- airflow/providers/standard/sensors/date_time.py +1 -16
- airflow/providers/standard/sensors/external_task.py +28 -7
- airflow/providers/standard/sensors/filesystem.py +2 -19
- airflow/providers/standard/sensors/time.py +2 -18
- airflow/providers/standard/sensors/time_delta.py +7 -6
- airflow/providers/standard/triggers/external_task.py +11 -8
- airflow/providers/standard/triggers/hitl.py +2 -2
- airflow/providers/standard/utils/openlineage.py +185 -0
- airflow/providers/standard/utils/python_virtualenv.py +4 -3
- airflow/providers/standard/utils/python_virtualenv_script.jinja2 +18 -3
- airflow/providers/standard/utils/skipmixin.py +2 -2
- {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/METADATA +22 -10
- {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/RECORD +31 -30
- {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/licenses/NOTICE +1 -1
- {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/entry_points.txt +0 -0
- {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -29,11 +29,11 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "1.
|
|
32
|
+
__version__ = "1.11.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
|
-
"2.
|
|
35
|
+
"2.11.0"
|
|
36
36
|
):
|
|
37
37
|
raise RuntimeError(
|
|
38
|
-
f"The package `apache-airflow-providers-standard:{__version__}` needs Apache Airflow 2.
|
|
38
|
+
f"The package `apache-airflow-providers-standard:{__version__}` needs Apache Airflow 2.11.0+"
|
|
39
39
|
)
|
|
@@ -89,8 +89,7 @@ class _BashDecoratedOperator(DecoratedOperator, BashOperator):
|
|
|
89
89
|
raise TypeError("The returned value from the TaskFlow callable must be a non-empty string.")
|
|
90
90
|
|
|
91
91
|
self._is_inline_cmd = self._is_inline_command(bash_command=self.bash_command)
|
|
92
|
-
|
|
93
|
-
|
|
92
|
+
self.render_template_fields(context)
|
|
94
93
|
return super().execute(context)
|
|
95
94
|
|
|
96
95
|
|
|
@@ -19,11 +19,11 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import pendulum
|
|
21
21
|
|
|
22
|
-
from airflow.exceptions import AirflowSkipException
|
|
23
22
|
from airflow.providers.common.compat.sdk import TriggerRule
|
|
24
23
|
from airflow.providers.standard.operators.empty import EmptyOperator
|
|
25
24
|
from airflow.providers.standard.utils.weekday import WeekDay
|
|
26
25
|
from airflow.sdk import chain, dag, task
|
|
26
|
+
from airflow.sdk.exceptions import AirflowSkipException
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
@dag(schedule=None, start_date=pendulum.datetime(2023, 1, 1, tz="UTC"), catchup=False)
|
|
@@ -22,6 +22,7 @@ virtual environment.
|
|
|
22
22
|
|
|
23
23
|
from __future__ import annotations
|
|
24
24
|
|
|
25
|
+
import asyncio
|
|
25
26
|
import logging
|
|
26
27
|
import sys
|
|
27
28
|
import time
|
|
@@ -75,6 +76,22 @@ def example_python_decorator():
|
|
|
75
76
|
run_this >> log_the_sql >> sleeping_task
|
|
76
77
|
# [END howto_operator_python_kwargs]
|
|
77
78
|
|
|
79
|
+
# [START howto_async_operator_python_kwargs]
|
|
80
|
+
# Generate 5 sleeping tasks, sleeping from 0.0 to 0.4 seconds respectively
|
|
81
|
+
# Asynchronous callables are natively supported since Airflow 3.2+
|
|
82
|
+
@task
|
|
83
|
+
async def my_async_sleeping_function(random_base):
|
|
84
|
+
"""This is a function that will run within the DAG execution"""
|
|
85
|
+
await asyncio.sleep(random_base)
|
|
86
|
+
|
|
87
|
+
for i in range(5):
|
|
88
|
+
async_sleeping_task = my_async_sleeping_function.override(task_id=f"async_sleep_for_{i}")(
|
|
89
|
+
random_base=i / 10
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
run_this >> log_the_sql >> async_sleeping_task
|
|
93
|
+
# [END howto_async_operator_python_kwargs]
|
|
94
|
+
|
|
78
95
|
# [START howto_operator_python_venv]
|
|
79
96
|
@task.virtualenv(
|
|
80
97
|
task_id="virtualenv_python", requirements=["colorama==0.4.0"], system_site_packages=False
|
|
@@ -22,6 +22,7 @@ within a virtual environment.
|
|
|
22
22
|
|
|
23
23
|
from __future__ import annotations
|
|
24
24
|
|
|
25
|
+
import asyncio
|
|
25
26
|
import logging
|
|
26
27
|
import sys
|
|
27
28
|
import time
|
|
@@ -88,6 +89,23 @@ with DAG(
|
|
|
88
89
|
run_this >> log_the_sql >> sleeping_task
|
|
89
90
|
# [END howto_operator_python_kwargs]
|
|
90
91
|
|
|
92
|
+
# [START howto_async_operator_python_kwargs]
|
|
93
|
+
# Generate 5 sleeping tasks, sleeping from 0.0 to 0.4 seconds respectively
|
|
94
|
+
# Asynchronous callables are natively supported since Airflow 3.2+
|
|
95
|
+
async def my_async_sleeping_function(random_base):
|
|
96
|
+
"""This is a function that will run within the DAG execution"""
|
|
97
|
+
await asyncio.sleep(random_base)
|
|
98
|
+
|
|
99
|
+
for i in range(5):
|
|
100
|
+
async_sleeping_task = PythonOperator(
|
|
101
|
+
task_id=f"async_sleep_for_{i}",
|
|
102
|
+
python_callable=my_async_sleeping_function,
|
|
103
|
+
op_kwargs={"random_base": i / 10},
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
run_this >> log_the_sql >> async_sleeping_task
|
|
107
|
+
# [END howto_async_operator_python_kwargs]
|
|
108
|
+
|
|
91
109
|
# [START howto_operator_python_venv]
|
|
92
110
|
def callable_virtualenv():
|
|
93
111
|
"""
|
|
@@ -34,6 +34,7 @@ def get_provider_info():
|
|
|
34
34
|
"how-to-guide": [
|
|
35
35
|
"/docs/apache-airflow-providers-standard/operators/bash.rst",
|
|
36
36
|
"/docs/apache-airflow-providers-standard/operators/python.rst",
|
|
37
|
+
"/docs/apache-airflow-providers-standard/operators/hitl.rst",
|
|
37
38
|
"/docs/apache-airflow-providers-standard/operators/datetime.rst",
|
|
38
39
|
"/docs/apache-airflow-providers-standard/operators/trigger_dag_run.rst",
|
|
39
40
|
"/docs/apache-airflow-providers-standard/operators/latest_only.rst",
|
|
@@ -24,14 +24,18 @@ from collections.abc import Callable, Container, Sequence
|
|
|
24
24
|
from functools import cached_property
|
|
25
25
|
from typing import TYPE_CHECKING, Any, cast
|
|
26
26
|
|
|
27
|
-
from airflow.
|
|
28
|
-
|
|
27
|
+
from airflow.providers.common.compat.sdk import (
|
|
28
|
+
AirflowException,
|
|
29
|
+
AirflowSkipException,
|
|
30
|
+
context_to_airflow_vars,
|
|
31
|
+
)
|
|
29
32
|
from airflow.providers.standard.hooks.subprocess import SubprocessHook, SubprocessResult, working_directory
|
|
30
33
|
from airflow.providers.standard.version_compat import BaseOperator
|
|
31
34
|
|
|
32
35
|
if TYPE_CHECKING:
|
|
33
36
|
from airflow.providers.common.compat.sdk import Context
|
|
34
|
-
|
|
37
|
+
|
|
38
|
+
from tests_common.test_utils.version_compat import ArgNotSet
|
|
35
39
|
|
|
36
40
|
|
|
37
41
|
class BashOperator(BaseOperator):
|
|
@@ -20,8 +20,7 @@ import datetime
|
|
|
20
20
|
from collections.abc import Iterable
|
|
21
21
|
from typing import TYPE_CHECKING
|
|
22
22
|
|
|
23
|
-
from airflow.
|
|
24
|
-
from airflow.providers.common.compat.sdk import timezone
|
|
23
|
+
from airflow.providers.common.compat.sdk import AirflowException, timezone
|
|
25
24
|
from airflow.providers.standard.operators.branch import BaseBranchOperator
|
|
26
25
|
|
|
27
26
|
if TYPE_CHECKING:
|
|
@@ -18,7 +18,7 @@ from __future__ import annotations
|
|
|
18
18
|
|
|
19
19
|
import logging
|
|
20
20
|
|
|
21
|
-
from airflow.
|
|
21
|
+
from airflow.providers.common.compat.sdk import AirflowOptionalProviderFeatureException
|
|
22
22
|
from airflow.providers.standard.version_compat import AIRFLOW_V_3_1_3_PLUS, AIRFLOW_V_3_1_PLUS
|
|
23
23
|
|
|
24
24
|
if not AIRFLOW_V_3_1_PLUS:
|
|
@@ -28,7 +28,7 @@ from collections.abc import Collection, Mapping, Sequence
|
|
|
28
28
|
from typing import TYPE_CHECKING, Any
|
|
29
29
|
from urllib.parse import ParseResult, urlencode, urlparse, urlunparse
|
|
30
30
|
|
|
31
|
-
from airflow.
|
|
31
|
+
from airflow.providers.common.compat.sdk import conf
|
|
32
32
|
from airflow.providers.standard.exceptions import HITLRejectException, HITLTimeoutError, HITLTriggerEventError
|
|
33
33
|
from airflow.providers.standard.operators.branch import BranchMixIn
|
|
34
34
|
from airflow.providers.standard.triggers.hitl import HITLTrigger, HITLTriggerEventSuccessPayload
|
|
@@ -84,6 +84,13 @@ class HITLOperator(BaseOperator):
|
|
|
84
84
|
self.multiple = multiple
|
|
85
85
|
|
|
86
86
|
self.params: ParamsDict = params if isinstance(params, ParamsDict) else ParamsDict(params or {})
|
|
87
|
+
if hasattr(ParamsDict, "filter_params_by_source"):
|
|
88
|
+
# Params that exist only in Dag level does not make sense to appear in HITLOperator
|
|
89
|
+
self.params = ParamsDict.filter_params_by_source(self.params, source="task")
|
|
90
|
+
elif self.params:
|
|
91
|
+
self.log.debug(
|
|
92
|
+
"ParamsDict.filter_params_by_source not available; HITLOperator will also include Dag level params."
|
|
93
|
+
)
|
|
87
94
|
|
|
88
95
|
self.notifiers: Sequence[BaseNotifier] = (
|
|
89
96
|
[notifiers] if isinstance(notifiers, BaseNotifier) else notifiers or []
|
|
@@ -26,7 +26,7 @@ from typing import TYPE_CHECKING
|
|
|
26
26
|
import pendulum
|
|
27
27
|
|
|
28
28
|
from airflow.providers.standard.operators.branch import BaseBranchOperator
|
|
29
|
-
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS
|
|
29
|
+
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_2_PLUS
|
|
30
30
|
from airflow.utils.types import DagRunType
|
|
31
31
|
|
|
32
32
|
if TYPE_CHECKING:
|
|
@@ -35,6 +35,17 @@ if TYPE_CHECKING:
|
|
|
35
35
|
from airflow.models import DagRun
|
|
36
36
|
from airflow.providers.common.compat.sdk import Context
|
|
37
37
|
|
|
38
|
+
if AIRFLOW_V_3_2_PLUS:
|
|
39
|
+
|
|
40
|
+
def _get_dag_timetable(dag):
|
|
41
|
+
from airflow.serialization.encoders import coerce_to_core_timetable
|
|
42
|
+
|
|
43
|
+
return coerce_to_core_timetable(dag.timetable)
|
|
44
|
+
else:
|
|
45
|
+
|
|
46
|
+
def _get_dag_timetable(dag):
|
|
47
|
+
return dag.timetable
|
|
48
|
+
|
|
38
49
|
|
|
39
50
|
class LatestOnlyOperator(BaseBranchOperator):
|
|
40
51
|
"""
|
|
@@ -104,15 +115,13 @@ class LatestOnlyOperator(BaseBranchOperator):
|
|
|
104
115
|
else:
|
|
105
116
|
end = dagrun_date
|
|
106
117
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
end=end,
|
|
110
|
-
)
|
|
111
|
-
|
|
118
|
+
timetable = _get_dag_timetable(self.dag)
|
|
119
|
+
current_interval = DataInterval(start=start, end=end)
|
|
112
120
|
time_restriction = TimeRestriction(
|
|
113
121
|
earliest=None, latest=current_interval.end - timedelta(microseconds=1), catchup=True
|
|
114
122
|
)
|
|
115
|
-
|
|
123
|
+
|
|
124
|
+
if prev_info := timetable.next_dagrun_info(
|
|
116
125
|
last_automated_data_interval=current_interval,
|
|
117
126
|
restriction=time_restriction,
|
|
118
127
|
):
|
|
@@ -121,7 +130,7 @@ class LatestOnlyOperator(BaseBranchOperator):
|
|
|
121
130
|
left = current_interval.start
|
|
122
131
|
|
|
123
132
|
time_restriction = TimeRestriction(earliest=current_interval.end, latest=None, catchup=True)
|
|
124
|
-
next_info =
|
|
133
|
+
next_info = timetable.next_dagrun_info(
|
|
125
134
|
last_automated_data_interval=current_interval,
|
|
126
135
|
restriction=time_restriction,
|
|
127
136
|
)
|
|
@@ -43,20 +43,22 @@ from packaging.version import InvalidVersion
|
|
|
43
43
|
|
|
44
44
|
from airflow.exceptions import (
|
|
45
45
|
AirflowConfigException,
|
|
46
|
-
AirflowException,
|
|
47
46
|
AirflowProviderDeprecationWarning,
|
|
48
|
-
AirflowSkipException,
|
|
49
47
|
DeserializingResultError,
|
|
50
48
|
)
|
|
51
49
|
from airflow.models.variable import Variable
|
|
52
|
-
from airflow.providers.common.compat.sdk import context_merge
|
|
50
|
+
from airflow.providers.common.compat.sdk import AirflowException, AirflowSkipException, context_merge
|
|
51
|
+
from airflow.providers.common.compat.standard.operators import (
|
|
52
|
+
BaseAsyncOperator,
|
|
53
|
+
is_async_callable,
|
|
54
|
+
)
|
|
53
55
|
from airflow.providers.standard.hooks.package_index import PackageIndexHook
|
|
54
56
|
from airflow.providers.standard.utils.python_virtualenv import (
|
|
55
57
|
_execute_in_subprocess,
|
|
56
58
|
prepare_virtualenv,
|
|
57
59
|
write_python_script,
|
|
58
60
|
)
|
|
59
|
-
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS,
|
|
61
|
+
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_2_PLUS
|
|
60
62
|
from airflow.utils import hashlib_wrapper
|
|
61
63
|
from airflow.utils.file import get_unique_dag_module_name
|
|
62
64
|
from airflow.utils.operator_helpers import KeywordParameters
|
|
@@ -77,7 +79,10 @@ if TYPE_CHECKING:
|
|
|
77
79
|
from pendulum.datetime import DateTime
|
|
78
80
|
|
|
79
81
|
from airflow.providers.common.compat.sdk import Context
|
|
80
|
-
from airflow.sdk.execution_time.callback_runner import
|
|
82
|
+
from airflow.sdk.execution_time.callback_runner import (
|
|
83
|
+
AsyncExecutionCallableRunner,
|
|
84
|
+
ExecutionCallableRunner,
|
|
85
|
+
)
|
|
81
86
|
from airflow.sdk.execution_time.context import OutletEventAccessorsProtocol
|
|
82
87
|
|
|
83
88
|
_SerializerTypeDef = Literal["pickle", "cloudpickle", "dill"]
|
|
@@ -117,9 +122,9 @@ class _PythonVersionInfo(NamedTuple):
|
|
|
117
122
|
return cls(*_parse_version_info(result.strip()))
|
|
118
123
|
|
|
119
124
|
|
|
120
|
-
class PythonOperator(
|
|
125
|
+
class PythonOperator(BaseAsyncOperator):
|
|
121
126
|
"""
|
|
122
|
-
|
|
127
|
+
Base class for all Python operators.
|
|
123
128
|
|
|
124
129
|
.. seealso::
|
|
125
130
|
For more information on how to use this operator, take a look at the guide:
|
|
@@ -194,7 +199,14 @@ class PythonOperator(BaseOperator):
|
|
|
194
199
|
self.template_ext = templates_exts
|
|
195
200
|
self.show_return_value_in_logs = show_return_value_in_logs
|
|
196
201
|
|
|
197
|
-
|
|
202
|
+
@property
|
|
203
|
+
def is_async(self) -> bool:
|
|
204
|
+
return is_async_callable(self.python_callable)
|
|
205
|
+
|
|
206
|
+
def execute(self, context) -> Any:
|
|
207
|
+
if self.is_async:
|
|
208
|
+
return BaseAsyncOperator.execute(self, context)
|
|
209
|
+
|
|
198
210
|
context_merge(context, self.op_kwargs, templates_dict=self.templates_dict)
|
|
199
211
|
self.op_kwargs = self.determine_kwargs(context)
|
|
200
212
|
|
|
@@ -238,6 +250,47 @@ class PythonOperator(BaseOperator):
|
|
|
238
250
|
runner = create_execution_runner(self.python_callable, asset_events, logger=self.log)
|
|
239
251
|
return runner.run(*self.op_args, **self.op_kwargs)
|
|
240
252
|
|
|
253
|
+
if AIRFLOW_V_3_2_PLUS:
|
|
254
|
+
|
|
255
|
+
async def aexecute(self, context):
|
|
256
|
+
context_merge(context, self.op_kwargs, templates_dict=self.templates_dict)
|
|
257
|
+
self.op_kwargs = self.determine_kwargs(context)
|
|
258
|
+
|
|
259
|
+
# This needs to be lazy because subclasses may implement execute_callable
|
|
260
|
+
# by running a separate process that can't use the eager result.
|
|
261
|
+
def __prepare_execution() -> (
|
|
262
|
+
tuple[AsyncExecutionCallableRunner, OutletEventAccessorsProtocol] | None
|
|
263
|
+
):
|
|
264
|
+
from airflow.sdk.execution_time.callback_runner import create_async_executable_runner
|
|
265
|
+
from airflow.sdk.execution_time.context import context_get_outlet_events
|
|
266
|
+
|
|
267
|
+
return (
|
|
268
|
+
cast("AsyncExecutionCallableRunner", create_async_executable_runner),
|
|
269
|
+
context_get_outlet_events(context),
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
self.__prepare_execution = __prepare_execution
|
|
273
|
+
|
|
274
|
+
return_value = await self.aexecute_callable()
|
|
275
|
+
if self.show_return_value_in_logs:
|
|
276
|
+
self.log.info("Done. Returned value was: %s", return_value)
|
|
277
|
+
else:
|
|
278
|
+
self.log.info("Done. Returned value not shown")
|
|
279
|
+
|
|
280
|
+
return return_value
|
|
281
|
+
|
|
282
|
+
async def aexecute_callable(self) -> Any:
|
|
283
|
+
"""
|
|
284
|
+
Call the python callable with the given arguments.
|
|
285
|
+
|
|
286
|
+
:return: the return value of the call.
|
|
287
|
+
"""
|
|
288
|
+
if (execution_preparation := self.__prepare_execution()) is None:
|
|
289
|
+
return await self.python_callable(*self.op_args, **self.op_kwargs)
|
|
290
|
+
create_execution_runner, asset_events = execution_preparation
|
|
291
|
+
runner = create_execution_runner(self.python_callable, asset_events, logger=self.log)
|
|
292
|
+
return await runner.run(*self.op_args, **self.op_kwargs)
|
|
293
|
+
|
|
241
294
|
|
|
242
295
|
class BranchPythonOperator(BaseBranchOperator, PythonOperator):
|
|
243
296
|
"""
|
|
@@ -488,8 +541,28 @@ class _BasePythonVirtualenvOperator(PythonOperator, metaclass=ABCMeta):
|
|
|
488
541
|
serializable_keys = set(self._iter_serializable_context_keys())
|
|
489
542
|
new = {k: v for k, v in context.items() if k in serializable_keys}
|
|
490
543
|
serializable_context = cast("Context", new)
|
|
544
|
+
# Store bundle_path for subprocess execution
|
|
545
|
+
self._bundle_path = self._get_bundle_path_from_context(context)
|
|
491
546
|
return super().execute(context=serializable_context)
|
|
492
547
|
|
|
548
|
+
def _get_bundle_path_from_context(self, context: Context) -> str | None:
|
|
549
|
+
"""
|
|
550
|
+
Extract bundle_path from the task instance's bundle_instance.
|
|
551
|
+
|
|
552
|
+
:param context: The task execution context
|
|
553
|
+
:return: Path to the bundle root directory, or None if not in a bundle
|
|
554
|
+
"""
|
|
555
|
+
if not AIRFLOW_V_3_0_PLUS:
|
|
556
|
+
return None
|
|
557
|
+
|
|
558
|
+
# In Airflow 3.x, the RuntimeTaskInstance has a bundle_instance attribute
|
|
559
|
+
# that contains the bundle information including its path
|
|
560
|
+
ti = context["ti"]
|
|
561
|
+
if bundle_instance := getattr(ti, "bundle_instance", None):
|
|
562
|
+
return bundle_instance.path
|
|
563
|
+
|
|
564
|
+
return None
|
|
565
|
+
|
|
493
566
|
def get_python_source(self):
|
|
494
567
|
"""Return the source of self.python_callable."""
|
|
495
568
|
return textwrap.dedent(inspect.getsource(self.python_callable))
|
|
@@ -562,9 +635,21 @@ class _BasePythonVirtualenvOperator(PythonOperator, metaclass=ABCMeta):
|
|
|
562
635
|
)
|
|
563
636
|
|
|
564
637
|
env_vars = dict(os.environ) if self.inherit_env else {}
|
|
638
|
+
if fd := os.getenv("__AIRFLOW_SUPERVISOR_FD"):
|
|
639
|
+
env_vars["__AIRFLOW_SUPERVISOR_FD"] = fd
|
|
565
640
|
if self.env_vars:
|
|
566
641
|
env_vars.update(self.env_vars)
|
|
567
642
|
|
|
643
|
+
# Add bundle_path to PYTHONPATH for subprocess to import Dag bundle modules
|
|
644
|
+
if self._bundle_path:
|
|
645
|
+
bundle_path = self._bundle_path
|
|
646
|
+
existing_pythonpath = env_vars.get("PYTHONPATH", "")
|
|
647
|
+
if existing_pythonpath:
|
|
648
|
+
# Append bundle_path after existing PYTHONPATH
|
|
649
|
+
env_vars["PYTHONPATH"] = f"{existing_pythonpath}{os.pathsep}{bundle_path}"
|
|
650
|
+
else:
|
|
651
|
+
env_vars["PYTHONPATH"] = bundle_path
|
|
652
|
+
|
|
568
653
|
try:
|
|
569
654
|
cmd: list[str] = [
|
|
570
655
|
os.fspath(python_path),
|
|
@@ -28,21 +28,28 @@ from sqlalchemy import select
|
|
|
28
28
|
from sqlalchemy.orm.exc import NoResultFound
|
|
29
29
|
|
|
30
30
|
from airflow.api.common.trigger_dag import trigger_dag
|
|
31
|
-
from airflow.
|
|
32
|
-
from airflow.exceptions import (
|
|
33
|
-
AirflowException,
|
|
34
|
-
AirflowSkipException,
|
|
35
|
-
DagNotFound,
|
|
36
|
-
DagRunAlreadyExists,
|
|
37
|
-
)
|
|
31
|
+
from airflow.exceptions import DagNotFound, DagRunAlreadyExists
|
|
38
32
|
from airflow.models.dag import DagModel
|
|
39
33
|
from airflow.models.dagrun import DagRun
|
|
40
34
|
from airflow.models.serialized_dag import SerializedDagModel
|
|
41
|
-
from airflow.providers.common.compat.sdk import
|
|
35
|
+
from airflow.providers.common.compat.sdk import (
|
|
36
|
+
AirflowException,
|
|
37
|
+
AirflowSkipException,
|
|
38
|
+
BaseOperatorLink,
|
|
39
|
+
XCom,
|
|
40
|
+
conf,
|
|
41
|
+
timezone,
|
|
42
|
+
)
|
|
42
43
|
from airflow.providers.standard.triggers.external_task import DagStateTrigger
|
|
44
|
+
from airflow.providers.standard.utils.openlineage import safe_inject_openlineage_properties_into_dagrun_conf
|
|
43
45
|
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS, BaseOperator
|
|
44
46
|
from airflow.utils.state import DagRunState
|
|
45
|
-
from airflow.utils.types import
|
|
47
|
+
from airflow.utils.types import DagRunType
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
from airflow.sdk.definitions._internal.types import NOTSET, ArgNotSet
|
|
51
|
+
except ImportError:
|
|
52
|
+
from airflow.utils.types import NOTSET, ArgNotSet # type: ignore[attr-defined,no-redef]
|
|
46
53
|
|
|
47
54
|
XCOM_LOGICAL_DATE_ISO = "trigger_logical_date_iso"
|
|
48
55
|
XCOM_RUN_ID = "trigger_run_id"
|
|
@@ -51,8 +58,7 @@ XCOM_RUN_ID = "trigger_run_id"
|
|
|
51
58
|
if TYPE_CHECKING:
|
|
52
59
|
from sqlalchemy.orm.session import Session
|
|
53
60
|
|
|
54
|
-
from airflow.
|
|
55
|
-
from airflow.providers.common.compat.sdk import Context
|
|
61
|
+
from airflow.providers.common.compat.sdk import Context, TaskInstanceKey
|
|
56
62
|
|
|
57
63
|
|
|
58
64
|
class DagIsPaused(AirflowException):
|
|
@@ -82,8 +88,17 @@ class TriggerDagRunLink(BaseOperatorLink):
|
|
|
82
88
|
trigger_dag_id = operator.trigger_dag_id
|
|
83
89
|
if not AIRFLOW_V_3_0_PLUS:
|
|
84
90
|
from airflow.models.renderedtifields import RenderedTaskInstanceFields
|
|
91
|
+
from airflow.models.taskinstancekey import TaskInstanceKey as CoreTaskInstanceKey
|
|
92
|
+
|
|
93
|
+
core_ti_key = CoreTaskInstanceKey(
|
|
94
|
+
dag_id=ti_key.dag_id,
|
|
95
|
+
task_id=ti_key.task_id,
|
|
96
|
+
run_id=ti_key.run_id,
|
|
97
|
+
try_number=ti_key.try_number,
|
|
98
|
+
map_index=ti_key.map_index,
|
|
99
|
+
)
|
|
85
100
|
|
|
86
|
-
if template_fields := RenderedTaskInstanceFields.get_templated_fields(
|
|
101
|
+
if template_fields := RenderedTaskInstanceFields.get_templated_fields(core_ti_key):
|
|
87
102
|
trigger_dag_id: str = template_fields.get("trigger_dag_id", operator.trigger_dag_id) # type: ignore[no-redef]
|
|
88
103
|
|
|
89
104
|
# Fetch the correct dag_run_id for the triggerED dag which is
|
|
@@ -129,8 +144,13 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
129
144
|
:param skip_when_already_exists: Set to true to mark the task as SKIPPED if a DAG run of the triggered
|
|
130
145
|
DAG for the same logical date already exists.
|
|
131
146
|
:param fail_when_dag_is_paused: If the dag to trigger is paused, DagIsPaused will be raised.
|
|
132
|
-
:param deferrable: If waiting for completion, whether
|
|
133
|
-
|
|
147
|
+
:param deferrable: If waiting for completion, whether to defer the task until done, default is ``False``.
|
|
148
|
+
:param openlineage_inject_parent_info: whether to include OpenLineage metadata about the parent task
|
|
149
|
+
in the triggered DAG run's conf, enabling improved lineage tracking. The metadata is only injected
|
|
150
|
+
if OpenLineage is enabled and running. This option does not modify any other part of the conf,
|
|
151
|
+
and existing OpenLineage-related settings in the conf will not be overwritten. The injection process
|
|
152
|
+
is safeguarded against exceptions - if any error occurs during metadata injection, it is gracefully
|
|
153
|
+
handled and the conf remains unchanged - so it's safe to use. Default is ``True``
|
|
134
154
|
"""
|
|
135
155
|
|
|
136
156
|
template_fields: Sequence[str] = (
|
|
@@ -160,6 +180,7 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
160
180
|
skip_when_already_exists: bool = False,
|
|
161
181
|
fail_when_dag_is_paused: bool = False,
|
|
162
182
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
|
183
|
+
openlineage_inject_parent_info: bool = True,
|
|
163
184
|
**kwargs,
|
|
164
185
|
) -> None:
|
|
165
186
|
super().__init__(**kwargs)
|
|
@@ -179,7 +200,8 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
179
200
|
self.failed_states = [DagRunState.FAILED]
|
|
180
201
|
self.skip_when_already_exists = skip_when_already_exists
|
|
181
202
|
self.fail_when_dag_is_paused = fail_when_dag_is_paused
|
|
182
|
-
self.
|
|
203
|
+
self.openlineage_inject_parent_info = openlineage_inject_parent_info
|
|
204
|
+
self.deferrable = deferrable
|
|
183
205
|
self.logical_date = logical_date
|
|
184
206
|
if logical_date is NOTSET:
|
|
185
207
|
self.logical_date = NOTSET
|
|
@@ -209,6 +231,12 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
209
231
|
except (TypeError, JSONDecodeError):
|
|
210
232
|
raise ValueError("conf parameter should be JSON Serializable %s", self.conf)
|
|
211
233
|
|
|
234
|
+
if self.openlineage_inject_parent_info:
|
|
235
|
+
self.log.debug("Checking if OpenLineage information can be safely injected into dagrun conf.")
|
|
236
|
+
self.conf = safe_inject_openlineage_properties_into_dagrun_conf(
|
|
237
|
+
dr_conf=self.conf, ti=context.get("ti")
|
|
238
|
+
)
|
|
239
|
+
|
|
212
240
|
if self.trigger_run_id:
|
|
213
241
|
run_id = str(self.trigger_run_id)
|
|
214
242
|
else:
|
|
@@ -221,6 +249,9 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
221
249
|
else:
|
|
222
250
|
run_id = DagRun.generate_run_id(DagRunType.MANUAL, parsed_logical_date or timezone.utcnow()) # type: ignore[misc,call-arg]
|
|
223
251
|
|
|
252
|
+
# Save run_id as task attribute - to be used by listeners
|
|
253
|
+
self.trigger_run_id = run_id
|
|
254
|
+
|
|
224
255
|
if self.fail_when_dag_is_paused:
|
|
225
256
|
dag_model = DagModel.get_current(self.trigger_dag_id)
|
|
226
257
|
if not dag_model:
|
|
@@ -232,12 +263,16 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
232
263
|
raise AirflowException(f"Dag {self.trigger_dag_id} is paused")
|
|
233
264
|
|
|
234
265
|
if AIRFLOW_V_3_0_PLUS:
|
|
235
|
-
self._trigger_dag_af_3(
|
|
266
|
+
self._trigger_dag_af_3(
|
|
267
|
+
context=context, run_id=self.trigger_run_id, parsed_logical_date=parsed_logical_date
|
|
268
|
+
)
|
|
236
269
|
else:
|
|
237
|
-
self._trigger_dag_af_2(
|
|
270
|
+
self._trigger_dag_af_2(
|
|
271
|
+
context=context, run_id=self.trigger_run_id, parsed_logical_date=parsed_logical_date
|
|
272
|
+
)
|
|
238
273
|
|
|
239
274
|
def _trigger_dag_af_3(self, context, run_id, parsed_logical_date):
|
|
240
|
-
from airflow.
|
|
275
|
+
from airflow.providers.common.compat.sdk import DagRunTriggerException
|
|
241
276
|
|
|
242
277
|
raise DagRunTriggerException(
|
|
243
278
|
trigger_dag_id=self.trigger_dag_id,
|
|
@@ -250,7 +285,7 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
250
285
|
allowed_states=self.allowed_states,
|
|
251
286
|
failed_states=self.failed_states,
|
|
252
287
|
poke_interval=self.poke_interval,
|
|
253
|
-
deferrable=self.
|
|
288
|
+
deferrable=self.deferrable,
|
|
254
289
|
)
|
|
255
290
|
|
|
256
291
|
def _trigger_dag_af_2(self, context, run_id, parsed_logical_date):
|
|
@@ -291,7 +326,7 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
291
326
|
|
|
292
327
|
if self.wait_for_completion:
|
|
293
328
|
# Kick off the deferral process
|
|
294
|
-
if self.
|
|
329
|
+
if self.deferrable:
|
|
295
330
|
self.defer(
|
|
296
331
|
trigger=DagStateTrigger(
|
|
297
332
|
dag_id=self.trigger_dag_id,
|
|
@@ -322,17 +357,40 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
322
357
|
return
|
|
323
358
|
|
|
324
359
|
def execute_complete(self, context: Context, event: tuple[str, dict[str, Any]]):
|
|
360
|
+
"""
|
|
361
|
+
Handle task completion after returning from a deferral.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
context: The Airflow context dictionary.
|
|
365
|
+
event: A tuple containing the class path of the trigger and the trigger event data.
|
|
366
|
+
"""
|
|
367
|
+
# Example event tuple content:
|
|
368
|
+
# (
|
|
369
|
+
# "airflow.providers.standard.triggers.external_task.DagStateTrigger",
|
|
370
|
+
# {
|
|
371
|
+
# 'dag_id': 'some_dag',
|
|
372
|
+
# 'states': ['success', 'failed'],
|
|
373
|
+
# 'poll_interval': 15,
|
|
374
|
+
# 'run_ids': ['manual__2025-11-19T17:49:20.907083+00:00'],
|
|
375
|
+
# 'execution_dates': [
|
|
376
|
+
# DateTime(2025, 11, 19, 17, 49, 20, 907083, tzinfo=Timezone('UTC'))
|
|
377
|
+
# ]
|
|
378
|
+
# }
|
|
379
|
+
# )
|
|
380
|
+
_, event_data = event
|
|
381
|
+
run_ids = event_data["run_ids"]
|
|
382
|
+
# Re-set as attribute after coming back from deferral - to be used by listeners.
|
|
383
|
+
# Just a safety check on length, we should always have single run_id here.
|
|
384
|
+
self.trigger_run_id = run_ids[0] if len(run_ids) == 1 else None
|
|
325
385
|
if AIRFLOW_V_3_0_PLUS:
|
|
326
|
-
self._trigger_dag_run_af_3_execute_complete(
|
|
386
|
+
self._trigger_dag_run_af_3_execute_complete(event_data=event_data)
|
|
327
387
|
else:
|
|
328
|
-
self._trigger_dag_run_af_2_execute_complete(
|
|
388
|
+
self._trigger_dag_run_af_2_execute_complete(event_data=event_data)
|
|
329
389
|
|
|
330
|
-
def _trigger_dag_run_af_3_execute_complete(self,
|
|
331
|
-
run_ids = event[1]["run_ids"]
|
|
332
|
-
event_data = event[1]
|
|
390
|
+
def _trigger_dag_run_af_3_execute_complete(self, event_data: dict[str, Any]):
|
|
333
391
|
failed_run_id_conditions = []
|
|
334
392
|
|
|
335
|
-
for run_id in run_ids:
|
|
393
|
+
for run_id in event_data["run_ids"]:
|
|
336
394
|
state = event_data.get(run_id)
|
|
337
395
|
if state in self.failed_states:
|
|
338
396
|
failed_run_id_conditions.append(run_id)
|
|
@@ -356,10 +414,10 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
356
414
|
|
|
357
415
|
@provide_session
|
|
358
416
|
def _trigger_dag_run_af_2_execute_complete(
|
|
359
|
-
self,
|
|
417
|
+
self, event_data: dict[str, Any], session: Session = NEW_SESSION
|
|
360
418
|
):
|
|
361
419
|
# This logical_date is parsed from the return trigger event
|
|
362
|
-
provided_logical_date =
|
|
420
|
+
provided_logical_date = event_data["execution_dates"][0]
|
|
363
421
|
try:
|
|
364
422
|
# Note: here execution fails on database isolation mode. Needs structural changes for AIP-72
|
|
365
423
|
dag_run = session.execute(
|
|
@@ -22,8 +22,7 @@ from subprocess import PIPE, STDOUT, Popen
|
|
|
22
22
|
from tempfile import NamedTemporaryFile, TemporaryDirectory, gettempdir
|
|
23
23
|
from typing import TYPE_CHECKING
|
|
24
24
|
|
|
25
|
-
from airflow.
|
|
26
|
-
from airflow.providers.common.compat.sdk import BaseSensorOperator
|
|
25
|
+
from airflow.providers.common.compat.sdk import AirflowFailException, BaseSensorOperator
|
|
27
26
|
|
|
28
27
|
if TYPE_CHECKING:
|
|
29
28
|
from airflow.providers.common.compat.sdk import Context
|
|
@@ -19,27 +19,12 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import datetime
|
|
21
21
|
from collections.abc import Sequence
|
|
22
|
-
from dataclasses import dataclass
|
|
23
22
|
from typing import TYPE_CHECKING, Any, NoReturn
|
|
24
23
|
|
|
25
24
|
from airflow.providers.common.compat.sdk import BaseSensorOperator, timezone
|
|
26
25
|
from airflow.providers.standard.triggers.temporal import DateTimeTrigger
|
|
27
26
|
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS
|
|
28
|
-
|
|
29
|
-
try:
|
|
30
|
-
from airflow.triggers.base import StartTriggerArgs # type: ignore[no-redef]
|
|
31
|
-
except ImportError: # TODO: Remove this when min airflow version is 2.10.0 for standard provider
|
|
32
|
-
|
|
33
|
-
@dataclass
|
|
34
|
-
class StartTriggerArgs: # type: ignore[no-redef]
|
|
35
|
-
"""Arguments required for start task execution from triggerer."""
|
|
36
|
-
|
|
37
|
-
trigger_cls: str
|
|
38
|
-
next_method: str
|
|
39
|
-
trigger_kwargs: dict[str, Any] | None = None
|
|
40
|
-
next_kwargs: dict[str, Any] | None = None
|
|
41
|
-
timeout: datetime.timedelta | None = None
|
|
42
|
-
|
|
27
|
+
from airflow.triggers.base import StartTriggerArgs
|
|
43
28
|
|
|
44
29
|
if TYPE_CHECKING:
|
|
45
30
|
from airflow.sdk import Context
|