apache-airflow-providers-standard 1.9.2rc1__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. airflow/providers/standard/__init__.py +3 -3
  2. airflow/providers/standard/decorators/bash.py +1 -2
  3. airflow/providers/standard/example_dags/example_bash_decorator.py +1 -1
  4. airflow/providers/standard/example_dags/example_python_decorator.py +17 -0
  5. airflow/providers/standard/example_dags/example_python_operator.py +18 -0
  6. airflow/providers/standard/exceptions.py +1 -1
  7. airflow/providers/standard/get_provider_info.py +1 -0
  8. airflow/providers/standard/operators/bash.py +7 -3
  9. airflow/providers/standard/operators/datetime.py +1 -2
  10. airflow/providers/standard/operators/hitl.py +9 -2
  11. airflow/providers/standard/operators/latest_only.py +17 -8
  12. airflow/providers/standard/operators/python.py +93 -8
  13. airflow/providers/standard/operators/trigger_dagrun.py +86 -28
  14. airflow/providers/standard/sensors/bash.py +1 -2
  15. airflow/providers/standard/sensors/date_time.py +1 -16
  16. airflow/providers/standard/sensors/external_task.py +28 -7
  17. airflow/providers/standard/sensors/filesystem.py +2 -19
  18. airflow/providers/standard/sensors/time.py +2 -18
  19. airflow/providers/standard/sensors/time_delta.py +7 -6
  20. airflow/providers/standard/triggers/external_task.py +11 -8
  21. airflow/providers/standard/triggers/hitl.py +2 -2
  22. airflow/providers/standard/utils/openlineage.py +185 -0
  23. airflow/providers/standard/utils/python_virtualenv.py +4 -3
  24. airflow/providers/standard/utils/python_virtualenv_script.jinja2 +18 -3
  25. airflow/providers/standard/utils/skipmixin.py +2 -2
  26. {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/METADATA +22 -10
  27. {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/RECORD +31 -30
  28. {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/licenses/NOTICE +1 -1
  29. {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/WHEEL +0 -0
  30. {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/entry_points.txt +0 -0
  31. {apache_airflow_providers_standard-1.9.2rc1.dist-info → apache_airflow_providers_standard-1.11.0.dist-info}/licenses/LICENSE +0 -0
@@ -29,11 +29,11 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "1.9.2"
32
+ __version__ = "1.11.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
- "2.10.0"
35
+ "2.11.0"
36
36
  ):
37
37
  raise RuntimeError(
38
- f"The package `apache-airflow-providers-standard:{__version__}` needs Apache Airflow 2.10.0+"
38
+ f"The package `apache-airflow-providers-standard:{__version__}` needs Apache Airflow 2.11.0+"
39
39
  )
@@ -89,8 +89,7 @@ class _BashDecoratedOperator(DecoratedOperator, BashOperator):
89
89
  raise TypeError("The returned value from the TaskFlow callable must be a non-empty string.")
90
90
 
91
91
  self._is_inline_cmd = self._is_inline_command(bash_command=self.bash_command)
92
- context["ti"].render_templates() # type: ignore[attr-defined]
93
-
92
+ self.render_template_fields(context)
94
93
  return super().execute(context)
95
94
 
96
95
 
@@ -19,11 +19,11 @@ from __future__ import annotations
19
19
 
20
20
  import pendulum
21
21
 
22
- from airflow.exceptions import AirflowSkipException
23
22
  from airflow.providers.common.compat.sdk import TriggerRule
24
23
  from airflow.providers.standard.operators.empty import EmptyOperator
25
24
  from airflow.providers.standard.utils.weekday import WeekDay
26
25
  from airflow.sdk import chain, dag, task
26
+ from airflow.sdk.exceptions import AirflowSkipException
27
27
 
28
28
 
29
29
  @dag(schedule=None, start_date=pendulum.datetime(2023, 1, 1, tz="UTC"), catchup=False)
@@ -22,6 +22,7 @@ virtual environment.
22
22
 
23
23
  from __future__ import annotations
24
24
 
25
+ import asyncio
25
26
  import logging
26
27
  import sys
27
28
  import time
@@ -75,6 +76,22 @@ def example_python_decorator():
75
76
  run_this >> log_the_sql >> sleeping_task
76
77
  # [END howto_operator_python_kwargs]
77
78
 
79
+ # [START howto_async_operator_python_kwargs]
80
+ # Generate 5 sleeping tasks, sleeping from 0.0 to 0.4 seconds respectively
81
+ # Asynchronous callables are natively supported since Airflow 3.2+
82
+ @task
83
+ async def my_async_sleeping_function(random_base):
84
+ """This is a function that will run within the DAG execution"""
85
+ await asyncio.sleep(random_base)
86
+
87
+ for i in range(5):
88
+ async_sleeping_task = my_async_sleeping_function.override(task_id=f"async_sleep_for_{i}")(
89
+ random_base=i / 10
90
+ )
91
+
92
+ run_this >> log_the_sql >> async_sleeping_task
93
+ # [END howto_async_operator_python_kwargs]
94
+
78
95
  # [START howto_operator_python_venv]
79
96
  @task.virtualenv(
80
97
  task_id="virtualenv_python", requirements=["colorama==0.4.0"], system_site_packages=False
@@ -22,6 +22,7 @@ within a virtual environment.
22
22
 
23
23
  from __future__ import annotations
24
24
 
25
+ import asyncio
25
26
  import logging
26
27
  import sys
27
28
  import time
@@ -88,6 +89,23 @@ with DAG(
88
89
  run_this >> log_the_sql >> sleeping_task
89
90
  # [END howto_operator_python_kwargs]
90
91
 
92
+ # [START howto_async_operator_python_kwargs]
93
+ # Generate 5 sleeping tasks, sleeping from 0.0 to 0.4 seconds respectively
94
+ # Asynchronous callables are natively supported since Airflow 3.2+
95
+ async def my_async_sleeping_function(random_base):
96
+ """This is a function that will run within the DAG execution"""
97
+ await asyncio.sleep(random_base)
98
+
99
+ for i in range(5):
100
+ async_sleeping_task = PythonOperator(
101
+ task_id=f"async_sleep_for_{i}",
102
+ python_callable=my_async_sleeping_function,
103
+ op_kwargs={"random_base": i / 10},
104
+ )
105
+
106
+ run_this >> log_the_sql >> async_sleeping_task
107
+ # [END howto_async_operator_python_kwargs]
108
+
91
109
  # [START howto_operator_python_venv]
92
110
  def callable_virtualenv():
93
111
  """
@@ -18,7 +18,7 @@
18
18
 
19
19
  from __future__ import annotations
20
20
 
21
- from airflow.exceptions import AirflowException
21
+ from airflow.providers.common.compat.sdk import AirflowException
22
22
 
23
23
 
24
24
  class AirflowExternalTaskSensorException(AirflowException):
@@ -34,6 +34,7 @@ def get_provider_info():
34
34
  "how-to-guide": [
35
35
  "/docs/apache-airflow-providers-standard/operators/bash.rst",
36
36
  "/docs/apache-airflow-providers-standard/operators/python.rst",
37
+ "/docs/apache-airflow-providers-standard/operators/hitl.rst",
37
38
  "/docs/apache-airflow-providers-standard/operators/datetime.rst",
38
39
  "/docs/apache-airflow-providers-standard/operators/trigger_dag_run.rst",
39
40
  "/docs/apache-airflow-providers-standard/operators/latest_only.rst",
@@ -24,14 +24,18 @@ from collections.abc import Callable, Container, Sequence
24
24
  from functools import cached_property
25
25
  from typing import TYPE_CHECKING, Any, cast
26
26
 
27
- from airflow.exceptions import AirflowException, AirflowSkipException
28
- from airflow.providers.common.compat.sdk import context_to_airflow_vars
27
+ from airflow.providers.common.compat.sdk import (
28
+ AirflowException,
29
+ AirflowSkipException,
30
+ context_to_airflow_vars,
31
+ )
29
32
  from airflow.providers.standard.hooks.subprocess import SubprocessHook, SubprocessResult, working_directory
30
33
  from airflow.providers.standard.version_compat import BaseOperator
31
34
 
32
35
  if TYPE_CHECKING:
33
36
  from airflow.providers.common.compat.sdk import Context
34
- from airflow.utils.types import ArgNotSet
37
+
38
+ from tests_common.test_utils.version_compat import ArgNotSet
35
39
 
36
40
 
37
41
  class BashOperator(BaseOperator):
@@ -20,8 +20,7 @@ import datetime
20
20
  from collections.abc import Iterable
21
21
  from typing import TYPE_CHECKING
22
22
 
23
- from airflow.exceptions import AirflowException
24
- from airflow.providers.common.compat.sdk import timezone
23
+ from airflow.providers.common.compat.sdk import AirflowException, timezone
25
24
  from airflow.providers.standard.operators.branch import BaseBranchOperator
26
25
 
27
26
  if TYPE_CHECKING:
@@ -18,7 +18,7 @@ from __future__ import annotations
18
18
 
19
19
  import logging
20
20
 
21
- from airflow.exceptions import AirflowOptionalProviderFeatureException
21
+ from airflow.providers.common.compat.sdk import AirflowOptionalProviderFeatureException
22
22
  from airflow.providers.standard.version_compat import AIRFLOW_V_3_1_3_PLUS, AIRFLOW_V_3_1_PLUS
23
23
 
24
24
  if not AIRFLOW_V_3_1_PLUS:
@@ -28,7 +28,7 @@ from collections.abc import Collection, Mapping, Sequence
28
28
  from typing import TYPE_CHECKING, Any
29
29
  from urllib.parse import ParseResult, urlencode, urlparse, urlunparse
30
30
 
31
- from airflow.configuration import conf
31
+ from airflow.providers.common.compat.sdk import conf
32
32
  from airflow.providers.standard.exceptions import HITLRejectException, HITLTimeoutError, HITLTriggerEventError
33
33
  from airflow.providers.standard.operators.branch import BranchMixIn
34
34
  from airflow.providers.standard.triggers.hitl import HITLTrigger, HITLTriggerEventSuccessPayload
@@ -84,6 +84,13 @@ class HITLOperator(BaseOperator):
84
84
  self.multiple = multiple
85
85
 
86
86
  self.params: ParamsDict = params if isinstance(params, ParamsDict) else ParamsDict(params or {})
87
+ if hasattr(ParamsDict, "filter_params_by_source"):
88
+ # Params that exist only in Dag level does not make sense to appear in HITLOperator
89
+ self.params = ParamsDict.filter_params_by_source(self.params, source="task")
90
+ elif self.params:
91
+ self.log.debug(
92
+ "ParamsDict.filter_params_by_source not available; HITLOperator will also include Dag level params."
93
+ )
87
94
 
88
95
  self.notifiers: Sequence[BaseNotifier] = (
89
96
  [notifiers] if isinstance(notifiers, BaseNotifier) else notifiers or []
@@ -26,7 +26,7 @@ from typing import TYPE_CHECKING
26
26
  import pendulum
27
27
 
28
28
  from airflow.providers.standard.operators.branch import BaseBranchOperator
29
- from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS
29
+ from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_2_PLUS
30
30
  from airflow.utils.types import DagRunType
31
31
 
32
32
  if TYPE_CHECKING:
@@ -35,6 +35,17 @@ if TYPE_CHECKING:
35
35
  from airflow.models import DagRun
36
36
  from airflow.providers.common.compat.sdk import Context
37
37
 
38
+ if AIRFLOW_V_3_2_PLUS:
39
+
40
+ def _get_dag_timetable(dag):
41
+ from airflow.serialization.encoders import coerce_to_core_timetable
42
+
43
+ return coerce_to_core_timetable(dag.timetable)
44
+ else:
45
+
46
+ def _get_dag_timetable(dag):
47
+ return dag.timetable
48
+
38
49
 
39
50
  class LatestOnlyOperator(BaseBranchOperator):
40
51
  """
@@ -104,15 +115,13 @@ class LatestOnlyOperator(BaseBranchOperator):
104
115
  else:
105
116
  end = dagrun_date
106
117
 
107
- current_interval = DataInterval(
108
- start=start,
109
- end=end,
110
- )
111
-
118
+ timetable = _get_dag_timetable(self.dag)
119
+ current_interval = DataInterval(start=start, end=end)
112
120
  time_restriction = TimeRestriction(
113
121
  earliest=None, latest=current_interval.end - timedelta(microseconds=1), catchup=True
114
122
  )
115
- if prev_info := self.dag.timetable.next_dagrun_info(
123
+
124
+ if prev_info := timetable.next_dagrun_info(
116
125
  last_automated_data_interval=current_interval,
117
126
  restriction=time_restriction,
118
127
  ):
@@ -121,7 +130,7 @@ class LatestOnlyOperator(BaseBranchOperator):
121
130
  left = current_interval.start
122
131
 
123
132
  time_restriction = TimeRestriction(earliest=current_interval.end, latest=None, catchup=True)
124
- next_info = self.dag.timetable.next_dagrun_info(
133
+ next_info = timetable.next_dagrun_info(
125
134
  last_automated_data_interval=current_interval,
126
135
  restriction=time_restriction,
127
136
  )
@@ -43,20 +43,22 @@ from packaging.version import InvalidVersion
43
43
 
44
44
  from airflow.exceptions import (
45
45
  AirflowConfigException,
46
- AirflowException,
47
46
  AirflowProviderDeprecationWarning,
48
- AirflowSkipException,
49
47
  DeserializingResultError,
50
48
  )
51
49
  from airflow.models.variable import Variable
52
- from airflow.providers.common.compat.sdk import context_merge
50
+ from airflow.providers.common.compat.sdk import AirflowException, AirflowSkipException, context_merge
51
+ from airflow.providers.common.compat.standard.operators import (
52
+ BaseAsyncOperator,
53
+ is_async_callable,
54
+ )
53
55
  from airflow.providers.standard.hooks.package_index import PackageIndexHook
54
56
  from airflow.providers.standard.utils.python_virtualenv import (
55
57
  _execute_in_subprocess,
56
58
  prepare_virtualenv,
57
59
  write_python_script,
58
60
  )
59
- from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS, BaseOperator
61
+ from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_2_PLUS
60
62
  from airflow.utils import hashlib_wrapper
61
63
  from airflow.utils.file import get_unique_dag_module_name
62
64
  from airflow.utils.operator_helpers import KeywordParameters
@@ -77,7 +79,10 @@ if TYPE_CHECKING:
77
79
  from pendulum.datetime import DateTime
78
80
 
79
81
  from airflow.providers.common.compat.sdk import Context
80
- from airflow.sdk.execution_time.callback_runner import ExecutionCallableRunner
82
+ from airflow.sdk.execution_time.callback_runner import (
83
+ AsyncExecutionCallableRunner,
84
+ ExecutionCallableRunner,
85
+ )
81
86
  from airflow.sdk.execution_time.context import OutletEventAccessorsProtocol
82
87
 
83
88
  _SerializerTypeDef = Literal["pickle", "cloudpickle", "dill"]
@@ -117,9 +122,9 @@ class _PythonVersionInfo(NamedTuple):
117
122
  return cls(*_parse_version_info(result.strip()))
118
123
 
119
124
 
120
- class PythonOperator(BaseOperator):
125
+ class PythonOperator(BaseAsyncOperator):
121
126
  """
122
- Executes a Python callable.
127
+ Base class for all Python operators.
123
128
 
124
129
  .. seealso::
125
130
  For more information on how to use this operator, take a look at the guide:
@@ -194,7 +199,14 @@ class PythonOperator(BaseOperator):
194
199
  self.template_ext = templates_exts
195
200
  self.show_return_value_in_logs = show_return_value_in_logs
196
201
 
197
- def execute(self, context: Context) -> Any:
202
+ @property
203
+ def is_async(self) -> bool:
204
+ return is_async_callable(self.python_callable)
205
+
206
+ def execute(self, context) -> Any:
207
+ if self.is_async:
208
+ return BaseAsyncOperator.execute(self, context)
209
+
198
210
  context_merge(context, self.op_kwargs, templates_dict=self.templates_dict)
199
211
  self.op_kwargs = self.determine_kwargs(context)
200
212
 
@@ -238,6 +250,47 @@ class PythonOperator(BaseOperator):
238
250
  runner = create_execution_runner(self.python_callable, asset_events, logger=self.log)
239
251
  return runner.run(*self.op_args, **self.op_kwargs)
240
252
 
253
+ if AIRFLOW_V_3_2_PLUS:
254
+
255
+ async def aexecute(self, context):
256
+ context_merge(context, self.op_kwargs, templates_dict=self.templates_dict)
257
+ self.op_kwargs = self.determine_kwargs(context)
258
+
259
+ # This needs to be lazy because subclasses may implement execute_callable
260
+ # by running a separate process that can't use the eager result.
261
+ def __prepare_execution() -> (
262
+ tuple[AsyncExecutionCallableRunner, OutletEventAccessorsProtocol] | None
263
+ ):
264
+ from airflow.sdk.execution_time.callback_runner import create_async_executable_runner
265
+ from airflow.sdk.execution_time.context import context_get_outlet_events
266
+
267
+ return (
268
+ cast("AsyncExecutionCallableRunner", create_async_executable_runner),
269
+ context_get_outlet_events(context),
270
+ )
271
+
272
+ self.__prepare_execution = __prepare_execution
273
+
274
+ return_value = await self.aexecute_callable()
275
+ if self.show_return_value_in_logs:
276
+ self.log.info("Done. Returned value was: %s", return_value)
277
+ else:
278
+ self.log.info("Done. Returned value not shown")
279
+
280
+ return return_value
281
+
282
+ async def aexecute_callable(self) -> Any:
283
+ """
284
+ Call the python callable with the given arguments.
285
+
286
+ :return: the return value of the call.
287
+ """
288
+ if (execution_preparation := self.__prepare_execution()) is None:
289
+ return await self.python_callable(*self.op_args, **self.op_kwargs)
290
+ create_execution_runner, asset_events = execution_preparation
291
+ runner = create_execution_runner(self.python_callable, asset_events, logger=self.log)
292
+ return await runner.run(*self.op_args, **self.op_kwargs)
293
+
241
294
 
242
295
  class BranchPythonOperator(BaseBranchOperator, PythonOperator):
243
296
  """
@@ -488,8 +541,28 @@ class _BasePythonVirtualenvOperator(PythonOperator, metaclass=ABCMeta):
488
541
  serializable_keys = set(self._iter_serializable_context_keys())
489
542
  new = {k: v for k, v in context.items() if k in serializable_keys}
490
543
  serializable_context = cast("Context", new)
544
+ # Store bundle_path for subprocess execution
545
+ self._bundle_path = self._get_bundle_path_from_context(context)
491
546
  return super().execute(context=serializable_context)
492
547
 
548
+ def _get_bundle_path_from_context(self, context: Context) -> str | None:
549
+ """
550
+ Extract bundle_path from the task instance's bundle_instance.
551
+
552
+ :param context: The task execution context
553
+ :return: Path to the bundle root directory, or None if not in a bundle
554
+ """
555
+ if not AIRFLOW_V_3_0_PLUS:
556
+ return None
557
+
558
+ # In Airflow 3.x, the RuntimeTaskInstance has a bundle_instance attribute
559
+ # that contains the bundle information including its path
560
+ ti = context["ti"]
561
+ if bundle_instance := getattr(ti, "bundle_instance", None):
562
+ return bundle_instance.path
563
+
564
+ return None
565
+
493
566
  def get_python_source(self):
494
567
  """Return the source of self.python_callable."""
495
568
  return textwrap.dedent(inspect.getsource(self.python_callable))
@@ -562,9 +635,21 @@ class _BasePythonVirtualenvOperator(PythonOperator, metaclass=ABCMeta):
562
635
  )
563
636
 
564
637
  env_vars = dict(os.environ) if self.inherit_env else {}
638
+ if fd := os.getenv("__AIRFLOW_SUPERVISOR_FD"):
639
+ env_vars["__AIRFLOW_SUPERVISOR_FD"] = fd
565
640
  if self.env_vars:
566
641
  env_vars.update(self.env_vars)
567
642
 
643
+ # Add bundle_path to PYTHONPATH for subprocess to import Dag bundle modules
644
+ if self._bundle_path:
645
+ bundle_path = self._bundle_path
646
+ existing_pythonpath = env_vars.get("PYTHONPATH", "")
647
+ if existing_pythonpath:
648
+ # Append bundle_path after existing PYTHONPATH
649
+ env_vars["PYTHONPATH"] = f"{existing_pythonpath}{os.pathsep}{bundle_path}"
650
+ else:
651
+ env_vars["PYTHONPATH"] = bundle_path
652
+
568
653
  try:
569
654
  cmd: list[str] = [
570
655
  os.fspath(python_path),
@@ -28,21 +28,28 @@ from sqlalchemy import select
28
28
  from sqlalchemy.orm.exc import NoResultFound
29
29
 
30
30
  from airflow.api.common.trigger_dag import trigger_dag
31
- from airflow.configuration import conf
32
- from airflow.exceptions import (
33
- AirflowException,
34
- AirflowSkipException,
35
- DagNotFound,
36
- DagRunAlreadyExists,
37
- )
31
+ from airflow.exceptions import DagNotFound, DagRunAlreadyExists
38
32
  from airflow.models.dag import DagModel
39
33
  from airflow.models.dagrun import DagRun
40
34
  from airflow.models.serialized_dag import SerializedDagModel
41
- from airflow.providers.common.compat.sdk import BaseOperatorLink, XCom, timezone
35
+ from airflow.providers.common.compat.sdk import (
36
+ AirflowException,
37
+ AirflowSkipException,
38
+ BaseOperatorLink,
39
+ XCom,
40
+ conf,
41
+ timezone,
42
+ )
42
43
  from airflow.providers.standard.triggers.external_task import DagStateTrigger
44
+ from airflow.providers.standard.utils.openlineage import safe_inject_openlineage_properties_into_dagrun_conf
43
45
  from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS, BaseOperator
44
46
  from airflow.utils.state import DagRunState
45
- from airflow.utils.types import NOTSET, ArgNotSet, DagRunType
47
+ from airflow.utils.types import DagRunType
48
+
49
+ try:
50
+ from airflow.sdk.definitions._internal.types import NOTSET, ArgNotSet
51
+ except ImportError:
52
+ from airflow.utils.types import NOTSET, ArgNotSet # type: ignore[attr-defined,no-redef]
46
53
 
47
54
  XCOM_LOGICAL_DATE_ISO = "trigger_logical_date_iso"
48
55
  XCOM_RUN_ID = "trigger_run_id"
@@ -51,8 +58,7 @@ XCOM_RUN_ID = "trigger_run_id"
51
58
  if TYPE_CHECKING:
52
59
  from sqlalchemy.orm.session import Session
53
60
 
54
- from airflow.models.taskinstancekey import TaskInstanceKey
55
- from airflow.providers.common.compat.sdk import Context
61
+ from airflow.providers.common.compat.sdk import Context, TaskInstanceKey
56
62
 
57
63
 
58
64
  class DagIsPaused(AirflowException):
@@ -82,8 +88,17 @@ class TriggerDagRunLink(BaseOperatorLink):
82
88
  trigger_dag_id = operator.trigger_dag_id
83
89
  if not AIRFLOW_V_3_0_PLUS:
84
90
  from airflow.models.renderedtifields import RenderedTaskInstanceFields
91
+ from airflow.models.taskinstancekey import TaskInstanceKey as CoreTaskInstanceKey
92
+
93
+ core_ti_key = CoreTaskInstanceKey(
94
+ dag_id=ti_key.dag_id,
95
+ task_id=ti_key.task_id,
96
+ run_id=ti_key.run_id,
97
+ try_number=ti_key.try_number,
98
+ map_index=ti_key.map_index,
99
+ )
85
100
 
86
- if template_fields := RenderedTaskInstanceFields.get_templated_fields(ti_key):
101
+ if template_fields := RenderedTaskInstanceFields.get_templated_fields(core_ti_key):
87
102
  trigger_dag_id: str = template_fields.get("trigger_dag_id", operator.trigger_dag_id) # type: ignore[no-redef]
88
103
 
89
104
  # Fetch the correct dag_run_id for the triggerED dag which is
@@ -129,8 +144,13 @@ class TriggerDagRunOperator(BaseOperator):
129
144
  :param skip_when_already_exists: Set to true to mark the task as SKIPPED if a DAG run of the triggered
130
145
  DAG for the same logical date already exists.
131
146
  :param fail_when_dag_is_paused: If the dag to trigger is paused, DagIsPaused will be raised.
132
- :param deferrable: If waiting for completion, whether or not to defer the task until done,
133
- default is ``False``.
147
+ :param deferrable: If waiting for completion, whether to defer the task until done, default is ``False``.
148
+ :param openlineage_inject_parent_info: whether to include OpenLineage metadata about the parent task
149
+ in the triggered DAG run's conf, enabling improved lineage tracking. The metadata is only injected
150
+ if OpenLineage is enabled and running. This option does not modify any other part of the conf,
151
+ and existing OpenLineage-related settings in the conf will not be overwritten. The injection process
152
+ is safeguarded against exceptions - if any error occurs during metadata injection, it is gracefully
153
+ handled and the conf remains unchanged - so it's safe to use. Default is ``True``
134
154
  """
135
155
 
136
156
  template_fields: Sequence[str] = (
@@ -160,6 +180,7 @@ class TriggerDagRunOperator(BaseOperator):
160
180
  skip_when_already_exists: bool = False,
161
181
  fail_when_dag_is_paused: bool = False,
162
182
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
183
+ openlineage_inject_parent_info: bool = True,
163
184
  **kwargs,
164
185
  ) -> None:
165
186
  super().__init__(**kwargs)
@@ -179,7 +200,8 @@ class TriggerDagRunOperator(BaseOperator):
179
200
  self.failed_states = [DagRunState.FAILED]
180
201
  self.skip_when_already_exists = skip_when_already_exists
181
202
  self.fail_when_dag_is_paused = fail_when_dag_is_paused
182
- self._defer = deferrable
203
+ self.openlineage_inject_parent_info = openlineage_inject_parent_info
204
+ self.deferrable = deferrable
183
205
  self.logical_date = logical_date
184
206
  if logical_date is NOTSET:
185
207
  self.logical_date = NOTSET
@@ -209,6 +231,12 @@ class TriggerDagRunOperator(BaseOperator):
209
231
  except (TypeError, JSONDecodeError):
210
232
  raise ValueError("conf parameter should be JSON Serializable %s", self.conf)
211
233
 
234
+ if self.openlineage_inject_parent_info:
235
+ self.log.debug("Checking if OpenLineage information can be safely injected into dagrun conf.")
236
+ self.conf = safe_inject_openlineage_properties_into_dagrun_conf(
237
+ dr_conf=self.conf, ti=context.get("ti")
238
+ )
239
+
212
240
  if self.trigger_run_id:
213
241
  run_id = str(self.trigger_run_id)
214
242
  else:
@@ -221,6 +249,9 @@ class TriggerDagRunOperator(BaseOperator):
221
249
  else:
222
250
  run_id = DagRun.generate_run_id(DagRunType.MANUAL, parsed_logical_date or timezone.utcnow()) # type: ignore[misc,call-arg]
223
251
 
252
+ # Save run_id as task attribute - to be used by listeners
253
+ self.trigger_run_id = run_id
254
+
224
255
  if self.fail_when_dag_is_paused:
225
256
  dag_model = DagModel.get_current(self.trigger_dag_id)
226
257
  if not dag_model:
@@ -232,12 +263,16 @@ class TriggerDagRunOperator(BaseOperator):
232
263
  raise AirflowException(f"Dag {self.trigger_dag_id} is paused")
233
264
 
234
265
  if AIRFLOW_V_3_0_PLUS:
235
- self._trigger_dag_af_3(context=context, run_id=run_id, parsed_logical_date=parsed_logical_date)
266
+ self._trigger_dag_af_3(
267
+ context=context, run_id=self.trigger_run_id, parsed_logical_date=parsed_logical_date
268
+ )
236
269
  else:
237
- self._trigger_dag_af_2(context=context, run_id=run_id, parsed_logical_date=parsed_logical_date)
270
+ self._trigger_dag_af_2(
271
+ context=context, run_id=self.trigger_run_id, parsed_logical_date=parsed_logical_date
272
+ )
238
273
 
239
274
  def _trigger_dag_af_3(self, context, run_id, parsed_logical_date):
240
- from airflow.exceptions import DagRunTriggerException
275
+ from airflow.providers.common.compat.sdk import DagRunTriggerException
241
276
 
242
277
  raise DagRunTriggerException(
243
278
  trigger_dag_id=self.trigger_dag_id,
@@ -250,7 +285,7 @@ class TriggerDagRunOperator(BaseOperator):
250
285
  allowed_states=self.allowed_states,
251
286
  failed_states=self.failed_states,
252
287
  poke_interval=self.poke_interval,
253
- deferrable=self._defer,
288
+ deferrable=self.deferrable,
254
289
  )
255
290
 
256
291
  def _trigger_dag_af_2(self, context, run_id, parsed_logical_date):
@@ -291,7 +326,7 @@ class TriggerDagRunOperator(BaseOperator):
291
326
 
292
327
  if self.wait_for_completion:
293
328
  # Kick off the deferral process
294
- if self._defer:
329
+ if self.deferrable:
295
330
  self.defer(
296
331
  trigger=DagStateTrigger(
297
332
  dag_id=self.trigger_dag_id,
@@ -322,17 +357,40 @@ class TriggerDagRunOperator(BaseOperator):
322
357
  return
323
358
 
324
359
  def execute_complete(self, context: Context, event: tuple[str, dict[str, Any]]):
360
+ """
361
+ Handle task completion after returning from a deferral.
362
+
363
+ Args:
364
+ context: The Airflow context dictionary.
365
+ event: A tuple containing the class path of the trigger and the trigger event data.
366
+ """
367
+ # Example event tuple content:
368
+ # (
369
+ # "airflow.providers.standard.triggers.external_task.DagStateTrigger",
370
+ # {
371
+ # 'dag_id': 'some_dag',
372
+ # 'states': ['success', 'failed'],
373
+ # 'poll_interval': 15,
374
+ # 'run_ids': ['manual__2025-11-19T17:49:20.907083+00:00'],
375
+ # 'execution_dates': [
376
+ # DateTime(2025, 11, 19, 17, 49, 20, 907083, tzinfo=Timezone('UTC'))
377
+ # ]
378
+ # }
379
+ # )
380
+ _, event_data = event
381
+ run_ids = event_data["run_ids"]
382
+ # Re-set as attribute after coming back from deferral - to be used by listeners.
383
+ # Just a safety check on length, we should always have single run_id here.
384
+ self.trigger_run_id = run_ids[0] if len(run_ids) == 1 else None
325
385
  if AIRFLOW_V_3_0_PLUS:
326
- self._trigger_dag_run_af_3_execute_complete(event=event)
386
+ self._trigger_dag_run_af_3_execute_complete(event_data=event_data)
327
387
  else:
328
- self._trigger_dag_run_af_2_execute_complete(event=event)
388
+ self._trigger_dag_run_af_2_execute_complete(event_data=event_data)
329
389
 
330
- def _trigger_dag_run_af_3_execute_complete(self, event: tuple[str, dict[str, Any]]):
331
- run_ids = event[1]["run_ids"]
332
- event_data = event[1]
390
+ def _trigger_dag_run_af_3_execute_complete(self, event_data: dict[str, Any]):
333
391
  failed_run_id_conditions = []
334
392
 
335
- for run_id in run_ids:
393
+ for run_id in event_data["run_ids"]:
336
394
  state = event_data.get(run_id)
337
395
  if state in self.failed_states:
338
396
  failed_run_id_conditions.append(run_id)
@@ -356,10 +414,10 @@ class TriggerDagRunOperator(BaseOperator):
356
414
 
357
415
  @provide_session
358
416
  def _trigger_dag_run_af_2_execute_complete(
359
- self, event: tuple[str, dict[str, Any]], session: Session = NEW_SESSION
417
+ self, event_data: dict[str, Any], session: Session = NEW_SESSION
360
418
  ):
361
419
  # This logical_date is parsed from the return trigger event
362
- provided_logical_date = event[1]["execution_dates"][0]
420
+ provided_logical_date = event_data["execution_dates"][0]
363
421
  try:
364
422
  # Note: here execution fails on database isolation mode. Needs structural changes for AIP-72
365
423
  dag_run = session.execute(
@@ -22,8 +22,7 @@ from subprocess import PIPE, STDOUT, Popen
22
22
  from tempfile import NamedTemporaryFile, TemporaryDirectory, gettempdir
23
23
  from typing import TYPE_CHECKING
24
24
 
25
- from airflow.exceptions import AirflowFailException
26
- from airflow.providers.common.compat.sdk import BaseSensorOperator
25
+ from airflow.providers.common.compat.sdk import AirflowFailException, BaseSensorOperator
27
26
 
28
27
  if TYPE_CHECKING:
29
28
  from airflow.providers.common.compat.sdk import Context
@@ -19,27 +19,12 @@ from __future__ import annotations
19
19
 
20
20
  import datetime
21
21
  from collections.abc import Sequence
22
- from dataclasses import dataclass
23
22
  from typing import TYPE_CHECKING, Any, NoReturn
24
23
 
25
24
  from airflow.providers.common.compat.sdk import BaseSensorOperator, timezone
26
25
  from airflow.providers.standard.triggers.temporal import DateTimeTrigger
27
26
  from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS
28
-
29
- try:
30
- from airflow.triggers.base import StartTriggerArgs # type: ignore[no-redef]
31
- except ImportError: # TODO: Remove this when min airflow version is 2.10.0 for standard provider
32
-
33
- @dataclass
34
- class StartTriggerArgs: # type: ignore[no-redef]
35
- """Arguments required for start task execution from triggerer."""
36
-
37
- trigger_cls: str
38
- next_method: str
39
- trigger_kwargs: dict[str, Any] | None = None
40
- next_kwargs: dict[str, Any] | None = None
41
- timeout: datetime.timedelta | None = None
42
-
27
+ from airflow.triggers.base import StartTriggerArgs
43
28
 
44
29
  if TYPE_CHECKING:
45
30
  from airflow.sdk import Context