apache-airflow-providers-standard 1.9.1rc1__py3-none-any.whl → 1.10.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/standard/__init__.py +3 -3
- airflow/providers/standard/decorators/bash.py +1 -2
- airflow/providers/standard/example_dags/example_bash_decorator.py +1 -1
- airflow/providers/standard/exceptions.py +1 -1
- airflow/providers/standard/hooks/subprocess.py +2 -9
- airflow/providers/standard/operators/bash.py +7 -3
- airflow/providers/standard/operators/datetime.py +1 -2
- airflow/providers/standard/operators/hitl.py +20 -10
- airflow/providers/standard/operators/latest_only.py +19 -10
- airflow/providers/standard/operators/python.py +39 -6
- airflow/providers/standard/operators/trigger_dagrun.py +82 -27
- airflow/providers/standard/sensors/bash.py +2 -4
- airflow/providers/standard/sensors/date_time.py +1 -16
- airflow/providers/standard/sensors/external_task.py +91 -51
- airflow/providers/standard/sensors/filesystem.py +2 -19
- airflow/providers/standard/sensors/time.py +2 -18
- airflow/providers/standard/sensors/time_delta.py +7 -6
- airflow/providers/standard/triggers/external_task.py +43 -40
- airflow/providers/standard/triggers/file.py +1 -1
- airflow/providers/standard/triggers/hitl.py +136 -87
- airflow/providers/standard/utils/openlineage.py +185 -0
- airflow/providers/standard/utils/python_virtualenv.py +38 -4
- airflow/providers/standard/utils/python_virtualenv_script.jinja2 +18 -3
- airflow/providers/standard/utils/sensor_helper.py +19 -8
- airflow/providers/standard/utils/skipmixin.py +2 -2
- airflow/providers/standard/version_compat.py +1 -0
- {apache_airflow_providers_standard-1.9.1rc1.dist-info → apache_airflow_providers_standard-1.10.3.dist-info}/METADATA +25 -11
- {apache_airflow_providers_standard-1.9.1rc1.dist-info → apache_airflow_providers_standard-1.10.3.dist-info}/RECORD +32 -30
- apache_airflow_providers_standard-1.10.3.dist-info/licenses/NOTICE +5 -0
- {apache_airflow_providers_standard-1.9.1rc1.dist-info → apache_airflow_providers_standard-1.10.3.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_standard-1.9.1rc1.dist-info → apache_airflow_providers_standard-1.10.3.dist-info}/entry_points.txt +0 -0
- {airflow/providers/standard → apache_airflow_providers_standard-1.10.3.dist-info/licenses}/LICENSE +0 -0
|
@@ -29,11 +29,11 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "1.
|
|
32
|
+
__version__ = "1.10.3"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
|
-
"2.
|
|
35
|
+
"2.11.0"
|
|
36
36
|
):
|
|
37
37
|
raise RuntimeError(
|
|
38
|
-
f"The package `apache-airflow-providers-standard:{__version__}` needs Apache Airflow 2.
|
|
38
|
+
f"The package `apache-airflow-providers-standard:{__version__}` needs Apache Airflow 2.11.0+"
|
|
39
39
|
)
|
|
@@ -89,8 +89,7 @@ class _BashDecoratedOperator(DecoratedOperator, BashOperator):
|
|
|
89
89
|
raise TypeError("The returned value from the TaskFlow callable must be a non-empty string.")
|
|
90
90
|
|
|
91
91
|
self._is_inline_cmd = self._is_inline_command(bash_command=self.bash_command)
|
|
92
|
-
|
|
93
|
-
|
|
92
|
+
self.render_template_fields(context)
|
|
94
93
|
return super().execute(context)
|
|
95
94
|
|
|
96
95
|
|
|
@@ -19,11 +19,11 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import pendulum
|
|
21
21
|
|
|
22
|
-
from airflow.exceptions import AirflowSkipException
|
|
23
22
|
from airflow.providers.common.compat.sdk import TriggerRule
|
|
24
23
|
from airflow.providers.standard.operators.empty import EmptyOperator
|
|
25
24
|
from airflow.providers.standard.utils.weekday import WeekDay
|
|
26
25
|
from airflow.sdk import chain, dag, task
|
|
26
|
+
from airflow.sdk.exceptions import AirflowSkipException
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
@dag(schedule=None, start_date=pendulum.datetime(2023, 1, 1, tz="UTC"), catchup=False)
|
|
@@ -77,14 +77,6 @@ class SubprocessHook(BaseHook):
|
|
|
77
77
|
"""
|
|
78
78
|
self.log.info("Tmp dir root location: %s", gettempdir())
|
|
79
79
|
with working_directory(cwd=cwd) as cwd:
|
|
80
|
-
|
|
81
|
-
def pre_exec():
|
|
82
|
-
# Restore default signal disposition and invoke setsid
|
|
83
|
-
for sig in ("SIGPIPE", "SIGXFZ", "SIGXFSZ"):
|
|
84
|
-
if hasattr(signal, sig):
|
|
85
|
-
signal.signal(getattr(signal, sig), signal.SIG_DFL)
|
|
86
|
-
os.setsid()
|
|
87
|
-
|
|
88
80
|
self.log.info("Running command: %s", command)
|
|
89
81
|
|
|
90
82
|
self.sub_process = Popen(
|
|
@@ -93,7 +85,8 @@ class SubprocessHook(BaseHook):
|
|
|
93
85
|
stderr=STDOUT,
|
|
94
86
|
cwd=cwd,
|
|
95
87
|
env=env if env or env == {} else os.environ,
|
|
96
|
-
|
|
88
|
+
start_new_session=True,
|
|
89
|
+
restore_signals=True,
|
|
97
90
|
)
|
|
98
91
|
|
|
99
92
|
self.log.info("Output:")
|
|
@@ -24,14 +24,18 @@ from collections.abc import Callable, Container, Sequence
|
|
|
24
24
|
from functools import cached_property
|
|
25
25
|
from typing import TYPE_CHECKING, Any, cast
|
|
26
26
|
|
|
27
|
-
from airflow.
|
|
28
|
-
|
|
27
|
+
from airflow.providers.common.compat.sdk import (
|
|
28
|
+
AirflowException,
|
|
29
|
+
AirflowSkipException,
|
|
30
|
+
context_to_airflow_vars,
|
|
31
|
+
)
|
|
29
32
|
from airflow.providers.standard.hooks.subprocess import SubprocessHook, SubprocessResult, working_directory
|
|
30
33
|
from airflow.providers.standard.version_compat import BaseOperator
|
|
31
34
|
|
|
32
35
|
if TYPE_CHECKING:
|
|
33
36
|
from airflow.providers.common.compat.sdk import Context
|
|
34
|
-
|
|
37
|
+
|
|
38
|
+
from tests_common.test_utils.version_compat import ArgNotSet
|
|
35
39
|
|
|
36
40
|
|
|
37
41
|
class BashOperator(BaseOperator):
|
|
@@ -20,8 +20,7 @@ import datetime
|
|
|
20
20
|
from collections.abc import Iterable
|
|
21
21
|
from typing import TYPE_CHECKING
|
|
22
22
|
|
|
23
|
-
from airflow.
|
|
24
|
-
from airflow.providers.common.compat.sdk import timezone
|
|
23
|
+
from airflow.providers.common.compat.sdk import AirflowException, timezone
|
|
25
24
|
from airflow.providers.standard.operators.branch import BaseBranchOperator
|
|
26
25
|
|
|
27
26
|
if TYPE_CHECKING:
|
|
@@ -18,8 +18,8 @@ from __future__ import annotations
|
|
|
18
18
|
|
|
19
19
|
import logging
|
|
20
20
|
|
|
21
|
-
from airflow.
|
|
22
|
-
from airflow.providers.standard.version_compat import AIRFLOW_V_3_1_PLUS
|
|
21
|
+
from airflow.providers.common.compat.sdk import AirflowOptionalProviderFeatureException
|
|
22
|
+
from airflow.providers.standard.version_compat import AIRFLOW_V_3_1_3_PLUS, AIRFLOW_V_3_1_PLUS
|
|
23
23
|
|
|
24
24
|
if not AIRFLOW_V_3_1_PLUS:
|
|
25
25
|
raise AirflowOptionalProviderFeatureException("Human in the loop functionality needs Airflow 3.1+.")
|
|
@@ -28,7 +28,7 @@ from collections.abc import Collection, Mapping, Sequence
|
|
|
28
28
|
from typing import TYPE_CHECKING, Any
|
|
29
29
|
from urllib.parse import ParseResult, urlencode, urlparse, urlunparse
|
|
30
30
|
|
|
31
|
-
from airflow.
|
|
31
|
+
from airflow.providers.common.compat.sdk import conf
|
|
32
32
|
from airflow.providers.standard.exceptions import HITLRejectException, HITLTimeoutError, HITLTriggerEventError
|
|
33
33
|
from airflow.providers.standard.operators.branch import BranchMixIn
|
|
34
34
|
from airflow.providers.standard.triggers.hitl import HITLTrigger, HITLTriggerEventSuccessPayload
|
|
@@ -84,6 +84,14 @@ class HITLOperator(BaseOperator):
|
|
|
84
84
|
self.multiple = multiple
|
|
85
85
|
|
|
86
86
|
self.params: ParamsDict = params if isinstance(params, ParamsDict) else ParamsDict(params or {})
|
|
87
|
+
if hasattr(ParamsDict, "filter_params_by_source"):
|
|
88
|
+
# Params that exist only in Dag level does not make sense to appear in HITLOperator
|
|
89
|
+
self.params = ParamsDict.filter_params_by_source(self.params, source="task")
|
|
90
|
+
elif self.params:
|
|
91
|
+
self.log.debug(
|
|
92
|
+
"ParamsDict.filter_params_by_source not available; HITLOperator will also include Dag level params."
|
|
93
|
+
)
|
|
94
|
+
|
|
87
95
|
self.notifiers: Sequence[BaseNotifier] = (
|
|
88
96
|
[notifiers] if isinstance(notifiers, BaseNotifier) else notifiers or []
|
|
89
97
|
)
|
|
@@ -110,6 +118,7 @@ class HITLOperator(BaseOperator):
|
|
|
110
118
|
Raises:
|
|
111
119
|
ValueError: If `"_options"` key is present in `params`, which is not allowed.
|
|
112
120
|
"""
|
|
121
|
+
self.params.validate()
|
|
113
122
|
if "_options" in self.params:
|
|
114
123
|
raise ValueError('"_options" is not allowed in params')
|
|
115
124
|
|
|
@@ -165,8 +174,10 @@ class HITLOperator(BaseOperator):
|
|
|
165
174
|
)
|
|
166
175
|
|
|
167
176
|
@property
|
|
168
|
-
def serialized_params(self) -> dict[str, Any]:
|
|
169
|
-
|
|
177
|
+
def serialized_params(self) -> dict[str, dict[str, Any]]:
|
|
178
|
+
if not AIRFLOW_V_3_1_3_PLUS:
|
|
179
|
+
return self.params.dump() if isinstance(self.params, ParamsDict) else self.params
|
|
180
|
+
return {k: self.params.get_param(k).serialize() for k in self.params}
|
|
170
181
|
|
|
171
182
|
def execute_complete(self, context: Context, event: dict[str, Any]) -> Any:
|
|
172
183
|
if "error" in event:
|
|
@@ -196,13 +207,12 @@ class HITLOperator(BaseOperator):
|
|
|
196
207
|
|
|
197
208
|
def validate_params_input(self, params_input: Mapping) -> None:
|
|
198
209
|
"""Check whether user provide valid params input."""
|
|
199
|
-
if (
|
|
200
|
-
self.serialized_params is not None
|
|
201
|
-
and params_input is not None
|
|
202
|
-
and set(self.serialized_params.keys()) ^ set(params_input)
|
|
203
|
-
):
|
|
210
|
+
if self.params and params_input and set(self.serialized_params.keys()) ^ set(params_input):
|
|
204
211
|
raise ValueError(f"params_input {params_input} does not match params {self.params}")
|
|
205
212
|
|
|
213
|
+
for key, value in params_input.items():
|
|
214
|
+
self.params[key] = value
|
|
215
|
+
|
|
206
216
|
def generate_link_to_ui(
|
|
207
217
|
self,
|
|
208
218
|
*,
|
|
@@ -26,7 +26,7 @@ from typing import TYPE_CHECKING
|
|
|
26
26
|
import pendulum
|
|
27
27
|
|
|
28
28
|
from airflow.providers.standard.operators.branch import BaseBranchOperator
|
|
29
|
-
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS
|
|
29
|
+
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_2_PLUS
|
|
30
30
|
from airflow.utils.types import DagRunType
|
|
31
31
|
|
|
32
32
|
if TYPE_CHECKING:
|
|
@@ -35,6 +35,17 @@ if TYPE_CHECKING:
|
|
|
35
35
|
from airflow.models import DagRun
|
|
36
36
|
from airflow.providers.common.compat.sdk import Context
|
|
37
37
|
|
|
38
|
+
if AIRFLOW_V_3_2_PLUS:
|
|
39
|
+
|
|
40
|
+
def _get_dag_timetable(dag):
|
|
41
|
+
from airflow.serialization.encoders import coerce_to_core_timetable
|
|
42
|
+
|
|
43
|
+
return coerce_to_core_timetable(dag.timetable)
|
|
44
|
+
else:
|
|
45
|
+
|
|
46
|
+
def _get_dag_timetable(dag):
|
|
47
|
+
return dag.timetable
|
|
48
|
+
|
|
38
49
|
|
|
39
50
|
class LatestOnlyOperator(BaseBranchOperator):
|
|
40
51
|
"""
|
|
@@ -88,9 +99,9 @@ class LatestOnlyOperator(BaseBranchOperator):
|
|
|
88
99
|
def _get_compare_dates(self, dag_run: DagRun) -> tuple[DateTime, DateTime] | None:
|
|
89
100
|
dagrun_date: DateTime
|
|
90
101
|
if AIRFLOW_V_3_0_PLUS:
|
|
91
|
-
dagrun_date = dag_run.logical_date or dag_run.run_after
|
|
102
|
+
dagrun_date = dag_run.logical_date or dag_run.run_after # type: ignore[assignment]
|
|
92
103
|
else:
|
|
93
|
-
dagrun_date = dag_run.logical_date
|
|
104
|
+
dagrun_date = dag_run.logical_date # type: ignore[assignment]
|
|
94
105
|
|
|
95
106
|
from airflow.timetables.base import DataInterval, TimeRestriction
|
|
96
107
|
|
|
@@ -104,15 +115,13 @@ class LatestOnlyOperator(BaseBranchOperator):
|
|
|
104
115
|
else:
|
|
105
116
|
end = dagrun_date
|
|
106
117
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
end=end,
|
|
110
|
-
)
|
|
111
|
-
|
|
118
|
+
timetable = _get_dag_timetable(self.dag)
|
|
119
|
+
current_interval = DataInterval(start=start, end=end)
|
|
112
120
|
time_restriction = TimeRestriction(
|
|
113
121
|
earliest=None, latest=current_interval.end - timedelta(microseconds=1), catchup=True
|
|
114
122
|
)
|
|
115
|
-
|
|
123
|
+
|
|
124
|
+
if prev_info := timetable.next_dagrun_info(
|
|
116
125
|
last_automated_data_interval=current_interval,
|
|
117
126
|
restriction=time_restriction,
|
|
118
127
|
):
|
|
@@ -121,7 +130,7 @@ class LatestOnlyOperator(BaseBranchOperator):
|
|
|
121
130
|
left = current_interval.start
|
|
122
131
|
|
|
123
132
|
time_restriction = TimeRestriction(earliest=current_interval.end, latest=None, catchup=True)
|
|
124
|
-
next_info =
|
|
133
|
+
next_info = timetable.next_dagrun_info(
|
|
125
134
|
last_automated_data_interval=current_interval,
|
|
126
135
|
restriction=time_restriction,
|
|
127
136
|
)
|
|
@@ -43,20 +43,21 @@ from packaging.version import InvalidVersion
|
|
|
43
43
|
|
|
44
44
|
from airflow.exceptions import (
|
|
45
45
|
AirflowConfigException,
|
|
46
|
-
AirflowException,
|
|
47
46
|
AirflowProviderDeprecationWarning,
|
|
48
|
-
AirflowSkipException,
|
|
49
47
|
DeserializingResultError,
|
|
50
48
|
)
|
|
51
49
|
from airflow.models.variable import Variable
|
|
52
|
-
from airflow.providers.common.compat.sdk import context_merge
|
|
50
|
+
from airflow.providers.common.compat.sdk import AirflowException, AirflowSkipException, context_merge
|
|
53
51
|
from airflow.providers.standard.hooks.package_index import PackageIndexHook
|
|
54
|
-
from airflow.providers.standard.utils.python_virtualenv import
|
|
52
|
+
from airflow.providers.standard.utils.python_virtualenv import (
|
|
53
|
+
_execute_in_subprocess,
|
|
54
|
+
prepare_virtualenv,
|
|
55
|
+
write_python_script,
|
|
56
|
+
)
|
|
55
57
|
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS, BaseOperator
|
|
56
58
|
from airflow.utils import hashlib_wrapper
|
|
57
59
|
from airflow.utils.file import get_unique_dag_module_name
|
|
58
60
|
from airflow.utils.operator_helpers import KeywordParameters
|
|
59
|
-
from airflow.utils.process_utils import execute_in_subprocess
|
|
60
61
|
|
|
61
62
|
if AIRFLOW_V_3_0_PLUS:
|
|
62
63
|
from airflow.providers.standard.operators.branch import BaseBranchOperator
|
|
@@ -485,8 +486,28 @@ class _BasePythonVirtualenvOperator(PythonOperator, metaclass=ABCMeta):
|
|
|
485
486
|
serializable_keys = set(self._iter_serializable_context_keys())
|
|
486
487
|
new = {k: v for k, v in context.items() if k in serializable_keys}
|
|
487
488
|
serializable_context = cast("Context", new)
|
|
489
|
+
# Store bundle_path for subprocess execution
|
|
490
|
+
self._bundle_path = self._get_bundle_path_from_context(context)
|
|
488
491
|
return super().execute(context=serializable_context)
|
|
489
492
|
|
|
493
|
+
def _get_bundle_path_from_context(self, context: Context) -> str | None:
|
|
494
|
+
"""
|
|
495
|
+
Extract bundle_path from the task instance's bundle_instance.
|
|
496
|
+
|
|
497
|
+
:param context: The task execution context
|
|
498
|
+
:return: Path to the bundle root directory, or None if not in a bundle
|
|
499
|
+
"""
|
|
500
|
+
if not AIRFLOW_V_3_0_PLUS:
|
|
501
|
+
return None
|
|
502
|
+
|
|
503
|
+
# In Airflow 3.x, the RuntimeTaskInstance has a bundle_instance attribute
|
|
504
|
+
# that contains the bundle information including its path
|
|
505
|
+
ti = context["ti"]
|
|
506
|
+
if bundle_instance := getattr(ti, "bundle_instance", None):
|
|
507
|
+
return bundle_instance.path
|
|
508
|
+
|
|
509
|
+
return None
|
|
510
|
+
|
|
490
511
|
def get_python_source(self):
|
|
491
512
|
"""Return the source of self.python_callable."""
|
|
492
513
|
return textwrap.dedent(inspect.getsource(self.python_callable))
|
|
@@ -559,9 +580,21 @@ class _BasePythonVirtualenvOperator(PythonOperator, metaclass=ABCMeta):
|
|
|
559
580
|
)
|
|
560
581
|
|
|
561
582
|
env_vars = dict(os.environ) if self.inherit_env else {}
|
|
583
|
+
if fd := os.getenv("__AIRFLOW_SUPERVISOR_FD"):
|
|
584
|
+
env_vars["__AIRFLOW_SUPERVISOR_FD"] = fd
|
|
562
585
|
if self.env_vars:
|
|
563
586
|
env_vars.update(self.env_vars)
|
|
564
587
|
|
|
588
|
+
# Add bundle_path to PYTHONPATH for subprocess to import Dag bundle modules
|
|
589
|
+
if self._bundle_path:
|
|
590
|
+
bundle_path = self._bundle_path
|
|
591
|
+
existing_pythonpath = env_vars.get("PYTHONPATH", "")
|
|
592
|
+
if existing_pythonpath:
|
|
593
|
+
# Append bundle_path after existing PYTHONPATH
|
|
594
|
+
env_vars["PYTHONPATH"] = f"{existing_pythonpath}{os.pathsep}{bundle_path}"
|
|
595
|
+
else:
|
|
596
|
+
env_vars["PYTHONPATH"] = bundle_path
|
|
597
|
+
|
|
565
598
|
try:
|
|
566
599
|
cmd: list[str] = [
|
|
567
600
|
os.fspath(python_path),
|
|
@@ -572,7 +605,7 @@ class _BasePythonVirtualenvOperator(PythonOperator, metaclass=ABCMeta):
|
|
|
572
605
|
os.fspath(termination_log_path),
|
|
573
606
|
os.fspath(airflow_context_path),
|
|
574
607
|
]
|
|
575
|
-
|
|
608
|
+
_execute_in_subprocess(
|
|
576
609
|
cmd=cmd,
|
|
577
610
|
env=env_vars,
|
|
578
611
|
)
|
|
@@ -21,27 +21,35 @@ import datetime
|
|
|
21
21
|
import json
|
|
22
22
|
import time
|
|
23
23
|
from collections.abc import Sequence
|
|
24
|
+
from json import JSONDecodeError
|
|
24
25
|
from typing import TYPE_CHECKING, Any
|
|
25
26
|
|
|
26
27
|
from sqlalchemy import select
|
|
27
28
|
from sqlalchemy.orm.exc import NoResultFound
|
|
28
29
|
|
|
29
30
|
from airflow.api.common.trigger_dag import trigger_dag
|
|
30
|
-
from airflow.
|
|
31
|
-
from airflow.exceptions import (
|
|
32
|
-
AirflowException,
|
|
33
|
-
AirflowSkipException,
|
|
34
|
-
DagNotFound,
|
|
35
|
-
DagRunAlreadyExists,
|
|
36
|
-
)
|
|
31
|
+
from airflow.exceptions import DagNotFound, DagRunAlreadyExists
|
|
37
32
|
from airflow.models.dag import DagModel
|
|
38
33
|
from airflow.models.dagrun import DagRun
|
|
39
34
|
from airflow.models.serialized_dag import SerializedDagModel
|
|
40
|
-
from airflow.providers.common.compat.sdk import
|
|
35
|
+
from airflow.providers.common.compat.sdk import (
|
|
36
|
+
AirflowException,
|
|
37
|
+
AirflowSkipException,
|
|
38
|
+
BaseOperatorLink,
|
|
39
|
+
XCom,
|
|
40
|
+
conf,
|
|
41
|
+
timezone,
|
|
42
|
+
)
|
|
41
43
|
from airflow.providers.standard.triggers.external_task import DagStateTrigger
|
|
44
|
+
from airflow.providers.standard.utils.openlineage import safe_inject_openlineage_properties_into_dagrun_conf
|
|
42
45
|
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS, BaseOperator
|
|
43
46
|
from airflow.utils.state import DagRunState
|
|
44
|
-
from airflow.utils.types import
|
|
47
|
+
from airflow.utils.types import DagRunType
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
from airflow.sdk.definitions._internal.types import NOTSET, ArgNotSet
|
|
51
|
+
except ImportError:
|
|
52
|
+
from airflow.utils.types import NOTSET, ArgNotSet # type: ignore[attr-defined,no-redef]
|
|
45
53
|
|
|
46
54
|
XCOM_LOGICAL_DATE_ISO = "trigger_logical_date_iso"
|
|
47
55
|
XCOM_RUN_ID = "trigger_run_id"
|
|
@@ -128,8 +136,13 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
128
136
|
:param skip_when_already_exists: Set to true to mark the task as SKIPPED if a DAG run of the triggered
|
|
129
137
|
DAG for the same logical date already exists.
|
|
130
138
|
:param fail_when_dag_is_paused: If the dag to trigger is paused, DagIsPaused will be raised.
|
|
131
|
-
:param deferrable: If waiting for completion, whether
|
|
132
|
-
|
|
139
|
+
:param deferrable: If waiting for completion, whether to defer the task until done, default is ``False``.
|
|
140
|
+
:param openlineage_inject_parent_info: whether to include OpenLineage metadata about the parent task
|
|
141
|
+
in the triggered DAG run's conf, enabling improved lineage tracking. The metadata is only injected
|
|
142
|
+
if OpenLineage is enabled and running. This option does not modify any other part of the conf,
|
|
143
|
+
and existing OpenLineage-related settings in the conf will not be overwritten. The injection process
|
|
144
|
+
is safeguarded against exceptions - if any error occurs during metadata injection, it is gracefully
|
|
145
|
+
handled and the conf remains unchanged - so it's safe to use. Default is ``True``
|
|
133
146
|
"""
|
|
134
147
|
|
|
135
148
|
template_fields: Sequence[str] = (
|
|
@@ -159,6 +172,7 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
159
172
|
skip_when_already_exists: bool = False,
|
|
160
173
|
fail_when_dag_is_paused: bool = False,
|
|
161
174
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
|
175
|
+
openlineage_inject_parent_info: bool = True,
|
|
162
176
|
**kwargs,
|
|
163
177
|
) -> None:
|
|
164
178
|
super().__init__(**kwargs)
|
|
@@ -178,7 +192,8 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
178
192
|
self.failed_states = [DagRunState.FAILED]
|
|
179
193
|
self.skip_when_already_exists = skip_when_already_exists
|
|
180
194
|
self.fail_when_dag_is_paused = fail_when_dag_is_paused
|
|
181
|
-
self.
|
|
195
|
+
self.openlineage_inject_parent_info = openlineage_inject_parent_info
|
|
196
|
+
self.deferrable = deferrable
|
|
182
197
|
self.logical_date = logical_date
|
|
183
198
|
if logical_date is NOTSET:
|
|
184
199
|
self.logical_date = NOTSET
|
|
@@ -202,9 +217,17 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
202
217
|
parsed_logical_date = timezone.parse(self.logical_date)
|
|
203
218
|
|
|
204
219
|
try:
|
|
220
|
+
if self.conf and isinstance(self.conf, str):
|
|
221
|
+
self.conf = json.loads(self.conf)
|
|
205
222
|
json.dumps(self.conf)
|
|
206
|
-
except TypeError:
|
|
207
|
-
raise ValueError("conf parameter should be JSON Serializable")
|
|
223
|
+
except (TypeError, JSONDecodeError):
|
|
224
|
+
raise ValueError("conf parameter should be JSON Serializable %s", self.conf)
|
|
225
|
+
|
|
226
|
+
if self.openlineage_inject_parent_info:
|
|
227
|
+
self.log.debug("Checking if OpenLineage information can be safely injected into dagrun conf.")
|
|
228
|
+
self.conf = safe_inject_openlineage_properties_into_dagrun_conf(
|
|
229
|
+
dr_conf=self.conf, ti=context.get("ti")
|
|
230
|
+
)
|
|
208
231
|
|
|
209
232
|
if self.trigger_run_id:
|
|
210
233
|
run_id = str(self.trigger_run_id)
|
|
@@ -218,8 +241,13 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
218
241
|
else:
|
|
219
242
|
run_id = DagRun.generate_run_id(DagRunType.MANUAL, parsed_logical_date or timezone.utcnow()) # type: ignore[misc,call-arg]
|
|
220
243
|
|
|
244
|
+
# Save run_id as task attribute - to be used by listeners
|
|
245
|
+
self.trigger_run_id = run_id
|
|
246
|
+
|
|
221
247
|
if self.fail_when_dag_is_paused:
|
|
222
248
|
dag_model = DagModel.get_current(self.trigger_dag_id)
|
|
249
|
+
if not dag_model:
|
|
250
|
+
raise ValueError(f"Dag {self.trigger_dag_id} is not found")
|
|
223
251
|
if dag_model.is_paused:
|
|
224
252
|
# TODO: enable this when dag state endpoint available from task sdk
|
|
225
253
|
# if AIRFLOW_V_3_0_PLUS:
|
|
@@ -227,12 +255,16 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
227
255
|
raise AirflowException(f"Dag {self.trigger_dag_id} is paused")
|
|
228
256
|
|
|
229
257
|
if AIRFLOW_V_3_0_PLUS:
|
|
230
|
-
self._trigger_dag_af_3(
|
|
258
|
+
self._trigger_dag_af_3(
|
|
259
|
+
context=context, run_id=self.trigger_run_id, parsed_logical_date=parsed_logical_date
|
|
260
|
+
)
|
|
231
261
|
else:
|
|
232
|
-
self._trigger_dag_af_2(
|
|
262
|
+
self._trigger_dag_af_2(
|
|
263
|
+
context=context, run_id=self.trigger_run_id, parsed_logical_date=parsed_logical_date
|
|
264
|
+
)
|
|
233
265
|
|
|
234
266
|
def _trigger_dag_af_3(self, context, run_id, parsed_logical_date):
|
|
235
|
-
from airflow.
|
|
267
|
+
from airflow.providers.common.compat.sdk import DagRunTriggerException
|
|
236
268
|
|
|
237
269
|
raise DagRunTriggerException(
|
|
238
270
|
trigger_dag_id=self.trigger_dag_id,
|
|
@@ -245,7 +277,7 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
245
277
|
allowed_states=self.allowed_states,
|
|
246
278
|
failed_states=self.failed_states,
|
|
247
279
|
poke_interval=self.poke_interval,
|
|
248
|
-
deferrable=self.
|
|
280
|
+
deferrable=self.deferrable,
|
|
249
281
|
)
|
|
250
282
|
|
|
251
283
|
def _trigger_dag_af_2(self, context, run_id, parsed_logical_date):
|
|
@@ -286,7 +318,7 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
286
318
|
|
|
287
319
|
if self.wait_for_completion:
|
|
288
320
|
# Kick off the deferral process
|
|
289
|
-
if self.
|
|
321
|
+
if self.deferrable:
|
|
290
322
|
self.defer(
|
|
291
323
|
trigger=DagStateTrigger(
|
|
292
324
|
dag_id=self.trigger_dag_id,
|
|
@@ -317,17 +349,40 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
317
349
|
return
|
|
318
350
|
|
|
319
351
|
def execute_complete(self, context: Context, event: tuple[str, dict[str, Any]]):
|
|
352
|
+
"""
|
|
353
|
+
Handle task completion after returning from a deferral.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
context: The Airflow context dictionary.
|
|
357
|
+
event: A tuple containing the class path of the trigger and the trigger event data.
|
|
358
|
+
"""
|
|
359
|
+
# Example event tuple content:
|
|
360
|
+
# (
|
|
361
|
+
# "airflow.providers.standard.triggers.external_task.DagStateTrigger",
|
|
362
|
+
# {
|
|
363
|
+
# 'dag_id': 'some_dag',
|
|
364
|
+
# 'states': ['success', 'failed'],
|
|
365
|
+
# 'poll_interval': 15,
|
|
366
|
+
# 'run_ids': ['manual__2025-11-19T17:49:20.907083+00:00'],
|
|
367
|
+
# 'execution_dates': [
|
|
368
|
+
# DateTime(2025, 11, 19, 17, 49, 20, 907083, tzinfo=Timezone('UTC'))
|
|
369
|
+
# ]
|
|
370
|
+
# }
|
|
371
|
+
# )
|
|
372
|
+
_, event_data = event
|
|
373
|
+
run_ids = event_data["run_ids"]
|
|
374
|
+
# Re-set as attribute after coming back from deferral - to be used by listeners.
|
|
375
|
+
# Just a safety check on length, we should always have single run_id here.
|
|
376
|
+
self.trigger_run_id = run_ids[0] if len(run_ids) == 1 else None
|
|
320
377
|
if AIRFLOW_V_3_0_PLUS:
|
|
321
|
-
self._trigger_dag_run_af_3_execute_complete(
|
|
378
|
+
self._trigger_dag_run_af_3_execute_complete(event_data=event_data)
|
|
322
379
|
else:
|
|
323
|
-
self._trigger_dag_run_af_2_execute_complete(
|
|
380
|
+
self._trigger_dag_run_af_2_execute_complete(event_data=event_data)
|
|
324
381
|
|
|
325
|
-
def _trigger_dag_run_af_3_execute_complete(self,
|
|
326
|
-
run_ids = event[1]["run_ids"]
|
|
327
|
-
event_data = event[1]
|
|
382
|
+
def _trigger_dag_run_af_3_execute_complete(self, event_data: dict[str, Any]):
|
|
328
383
|
failed_run_id_conditions = []
|
|
329
384
|
|
|
330
|
-
for run_id in run_ids:
|
|
385
|
+
for run_id in event_data["run_ids"]:
|
|
331
386
|
state = event_data.get(run_id)
|
|
332
387
|
if state in self.failed_states:
|
|
333
388
|
failed_run_id_conditions.append(run_id)
|
|
@@ -351,10 +406,10 @@ class TriggerDagRunOperator(BaseOperator):
|
|
|
351
406
|
|
|
352
407
|
@provide_session
|
|
353
408
|
def _trigger_dag_run_af_2_execute_complete(
|
|
354
|
-
self,
|
|
409
|
+
self, event_data: dict[str, Any], session: Session = NEW_SESSION
|
|
355
410
|
):
|
|
356
411
|
# This logical_date is parsed from the return trigger event
|
|
357
|
-
provided_logical_date =
|
|
412
|
+
provided_logical_date = event_data["execution_dates"][0]
|
|
358
413
|
try:
|
|
359
414
|
# Note: here execution fails on database isolation mode. Needs structural changes for AIP-72
|
|
360
415
|
dag_run = session.execute(
|
|
@@ -17,14 +17,12 @@
|
|
|
17
17
|
# under the License.
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
|
-
import os
|
|
21
20
|
from collections.abc import Sequence
|
|
22
21
|
from subprocess import PIPE, STDOUT, Popen
|
|
23
22
|
from tempfile import NamedTemporaryFile, TemporaryDirectory, gettempdir
|
|
24
23
|
from typing import TYPE_CHECKING
|
|
25
24
|
|
|
26
|
-
from airflow.
|
|
27
|
-
from airflow.providers.common.compat.sdk import BaseSensorOperator
|
|
25
|
+
from airflow.providers.common.compat.sdk import AirflowFailException, BaseSensorOperator
|
|
28
26
|
|
|
29
27
|
if TYPE_CHECKING:
|
|
30
28
|
from airflow.providers.common.compat.sdk import Context
|
|
@@ -89,7 +87,7 @@ class BashSensor(BaseSensorOperator):
|
|
|
89
87
|
close_fds=True,
|
|
90
88
|
cwd=tmp_dir,
|
|
91
89
|
env=self.env,
|
|
92
|
-
|
|
90
|
+
start_new_session=True,
|
|
93
91
|
) as resp:
|
|
94
92
|
if resp.stdout:
|
|
95
93
|
self.log.info("Output:")
|
|
@@ -19,27 +19,12 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import datetime
|
|
21
21
|
from collections.abc import Sequence
|
|
22
|
-
from dataclasses import dataclass
|
|
23
22
|
from typing import TYPE_CHECKING, Any, NoReturn
|
|
24
23
|
|
|
25
24
|
from airflow.providers.common.compat.sdk import BaseSensorOperator, timezone
|
|
26
25
|
from airflow.providers.standard.triggers.temporal import DateTimeTrigger
|
|
27
26
|
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS
|
|
28
|
-
|
|
29
|
-
try:
|
|
30
|
-
from airflow.triggers.base import StartTriggerArgs # type: ignore[no-redef]
|
|
31
|
-
except ImportError: # TODO: Remove this when min airflow version is 2.10.0 for standard provider
|
|
32
|
-
|
|
33
|
-
@dataclass
|
|
34
|
-
class StartTriggerArgs: # type: ignore[no-redef]
|
|
35
|
-
"""Arguments required for start task execution from triggerer."""
|
|
36
|
-
|
|
37
|
-
trigger_cls: str
|
|
38
|
-
next_method: str
|
|
39
|
-
trigger_kwargs: dict[str, Any] | None = None
|
|
40
|
-
next_kwargs: dict[str, Any] | None = None
|
|
41
|
-
timeout: datetime.timedelta | None = None
|
|
42
|
-
|
|
27
|
+
from airflow.triggers.base import StartTriggerArgs
|
|
43
28
|
|
|
44
29
|
if TYPE_CHECKING:
|
|
45
30
|
from airflow.sdk import Context
|