apache-airflow-providers-openlineage 1.14.0rc1__tar.gz → 2.0.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.

Files changed (31) hide show
  1. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/PKG-INFO +11 -15
  2. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/README.rst +5 -5
  3. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/__init__.py +3 -3
  4. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/conf.py +6 -0
  5. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/extractors/base.py +2 -2
  6. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/extractors/manager.py +2 -1
  7. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/get_provider_info.py +11 -3
  8. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/plugins/adapter.py +15 -12
  9. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/plugins/listener.py +41 -12
  10. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/plugins/macros.py +6 -1
  11. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/plugins/openlineage.py +2 -2
  12. apache_airflow_providers_openlineage-2.0.0rc1/airflow/providers/openlineage/utils/spark.py +86 -0
  13. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/utils/sql.py +2 -2
  14. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/utils/utils.py +50 -31
  15. apache_airflow_providers_openlineage-2.0.0rc1/airflow/providers/openlineage/version_compat.py +36 -0
  16. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/pyproject.toml +6 -13
  17. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/LICENSE +0 -0
  18. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/extractors/__init__.py +0 -0
  19. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/extractors/bash.py +0 -0
  20. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/extractors/python.py +0 -0
  21. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/facets/AirflowDagRunFacet.json +0 -0
  22. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/facets/AirflowDebugRunFacet.json +0 -0
  23. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/facets/AirflowJobFacet.json +0 -0
  24. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/facets/AirflowRunFacet.json +0 -0
  25. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/facets/AirflowStateRunFacet.json +0 -0
  26. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/facets/__init__.py +0 -0
  27. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/plugins/__init__.py +0 -0
  28. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/plugins/facets.py +0 -0
  29. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/sqlparser.py +0 -0
  30. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/utils/__init__.py +0 -0
  31. {apache_airflow_providers_openlineage-1.14.0rc1 → apache_airflow_providers_openlineage-2.0.0rc1}/airflow/providers/openlineage/utils/selective_enable.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: apache-airflow-providers-openlineage
3
- Version: 1.14.0rc1
3
+ Version: 2.0.0rc1
4
4
  Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
5
5
  Keywords: airflow-provider,openlineage,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -20,23 +20,19 @@ Classifier: Programming Language :: Python :: 3.10
20
20
  Classifier: Programming Language :: Python :: 3.11
21
21
  Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Topic :: System :: Monitoring
23
- Requires-Dist: apache-airflow-providers-common-compat>=1.2.1rc0
23
+ Requires-Dist: apache-airflow-providers-common-compat>=1.3.0rc0
24
24
  Requires-Dist: apache-airflow-providers-common-sql>=1.20.0rc0
25
- Requires-Dist: apache-airflow>=2.8.0rc0
25
+ Requires-Dist: apache-airflow>=2.9.0rc0
26
26
  Requires-Dist: attrs>=22.2
27
27
  Requires-Dist: openlineage-integration-common>=1.24.2
28
28
  Requires-Dist: openlineage-python>=1.24.2
29
- Requires-Dist: apache-airflow-providers-common-compat ; extra == "common-compat"
30
- Requires-Dist: apache-airflow-providers-common-sql ; extra == "common-sql"
31
29
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
32
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.14.0/changelog.html
33
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.14.0
30
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.0.0/changelog.html
31
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.0.0
34
32
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
35
33
  Project-URL: Source Code, https://github.com/apache/airflow
36
- Project-URL: Twitter, https://twitter.com/ApacheAirflow
34
+ Project-URL: Twitter, https://x.com/ApacheAirflow
37
35
  Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
38
- Provides-Extra: common-compat
39
- Provides-Extra: common-sql
40
36
 
41
37
 
42
38
  .. Licensed to the Apache Software Foundation (ASF) under one
@@ -82,7 +78,7 @@ Provides-Extra: common-sql
82
78
 
83
79
  Package ``apache-airflow-providers-openlineage``
84
80
 
85
- Release: ``1.14.0.rc1``
81
+ Release: ``2.0.0.rc1``
86
82
 
87
83
 
88
84
  `OpenLineage <https://openlineage.io/>`__
@@ -95,7 +91,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
95
91
  are in ``airflow.providers.openlineage`` python package.
96
92
 
97
93
  You can find package information and changelog for the provider
98
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.14.0/>`_.
94
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.0.0/>`_.
99
95
 
100
96
  Installation
101
97
  ------------
@@ -112,9 +108,9 @@ Requirements
112
108
  ========================================== ==================
113
109
  PIP package Version required
114
110
  ========================================== ==================
115
- ``apache-airflow`` ``>=2.8.0``
111
+ ``apache-airflow`` ``>=2.9.0``
116
112
  ``apache-airflow-providers-common-sql`` ``>=1.20.0``
117
- ``apache-airflow-providers-common-compat`` ``>=1.2.1``
113
+ ``apache-airflow-providers-common-compat`` ``>=1.3.0``
118
114
  ``attrs`` ``>=22.2``
119
115
  ``openlineage-integration-common`` ``>=1.24.2``
120
116
  ``openlineage-python`` ``>=1.24.2``
@@ -141,4 +137,4 @@ Dependent package
141
137
  ================================================================================================================== =================
142
138
 
143
139
  The changelog for the provider package can be found in the
144
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.14.0/changelog.html>`_.
140
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.0.0/changelog.html>`_.
@@ -42,7 +42,7 @@
42
42
 
43
43
  Package ``apache-airflow-providers-openlineage``
44
44
 
45
- Release: ``1.14.0.rc1``
45
+ Release: ``2.0.0.rc1``
46
46
 
47
47
 
48
48
  `OpenLineage <https://openlineage.io/>`__
@@ -55,7 +55,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
55
55
  are in ``airflow.providers.openlineage`` python package.
56
56
 
57
57
  You can find package information and changelog for the provider
58
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.14.0/>`_.
58
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.0.0/>`_.
59
59
 
60
60
  Installation
61
61
  ------------
@@ -72,9 +72,9 @@ Requirements
72
72
  ========================================== ==================
73
73
  PIP package Version required
74
74
  ========================================== ==================
75
- ``apache-airflow`` ``>=2.8.0``
75
+ ``apache-airflow`` ``>=2.9.0``
76
76
  ``apache-airflow-providers-common-sql`` ``>=1.20.0``
77
- ``apache-airflow-providers-common-compat`` ``>=1.2.1``
77
+ ``apache-airflow-providers-common-compat`` ``>=1.3.0``
78
78
  ``attrs`` ``>=22.2``
79
79
  ``openlineage-integration-common`` ``>=1.24.2``
80
80
  ``openlineage-python`` ``>=1.24.2``
@@ -101,4 +101,4 @@ Dependent package
101
101
  ================================================================================================================== =================
102
102
 
103
103
  The changelog for the provider package can be found in the
104
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.14.0/changelog.html>`_.
104
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.0.0/changelog.html>`_.
@@ -29,11 +29,11 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "1.14.0"
32
+ __version__ = "2.0.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
- "2.8.0"
35
+ "2.9.0"
36
36
  ):
37
37
  raise RuntimeError(
38
- f"The package `apache-airflow-providers-openlineage:{__version__}` needs Apache Airflow 2.8.0+"
38
+ f"The package `apache-airflow-providers-openlineage:{__version__}` needs Apache Airflow 2.9.0+"
39
39
  )
@@ -77,6 +77,12 @@ def selective_enable() -> bool:
77
77
  return conf.getboolean(_CONFIG_SECTION, "selective_enable", fallback="False")
78
78
 
79
79
 
80
+ @cache
81
+ def spark_inject_parent_job_info() -> bool:
82
+ """[openlineage] spark_inject_parent_job_info."""
83
+ return conf.getboolean(_CONFIG_SECTION, "spark_inject_parent_job_info", fallback="False")
84
+
85
+
80
86
  @cache
81
87
  def custom_extractors() -> set[str]:
82
88
  """[openlineage] extractors."""
@@ -29,7 +29,7 @@ with warnings.catch_warnings():
29
29
  from openlineage.client.facet import BaseFacet as BaseFacet_V1
30
30
  from openlineage.client.facet_v2 import JobFacet, RunFacet
31
31
 
32
- from airflow.providers.openlineage.utils.utils import IS_AIRFLOW_2_10_OR_HIGHER
32
+ from airflow.providers.openlineage.utils.utils import AIRFLOW_V_2_10_PLUS
33
33
  from airflow.utils.log.logging_mixin import LoggingMixin
34
34
  from airflow.utils.state import TaskInstanceState
35
35
 
@@ -117,7 +117,7 @@ class DefaultExtractor(BaseExtractor):
117
117
 
118
118
  def extract_on_complete(self, task_instance) -> OperatorLineage | None:
119
119
  failed_states = [TaskInstanceState.FAILED, TaskInstanceState.UP_FOR_RETRY]
120
- if not IS_AIRFLOW_2_10_OR_HIGHER: # todo: remove when min airflow version >= 2.10.0
120
+ if not AIRFLOW_V_2_10_PLUS: # todo: remove when min airflow version >= 2.10.0
121
121
  # Before fix (#41053) implemented in Airflow 2.10 TaskInstance's state was still RUNNING when
122
122
  # being passed to listener's on_failure method. Since `extract_on_complete()` is only called
123
123
  # after task completion, RUNNING state means that we are dealing with FAILED task in < 2.10
@@ -16,7 +16,8 @@
16
16
  # under the License.
17
17
  from __future__ import annotations
18
18
 
19
- from typing import TYPE_CHECKING, Iterator
19
+ from collections.abc import Iterator
20
+ from typing import TYPE_CHECKING
20
21
 
21
22
  from airflow.providers.common.compat.openlineage.utils.utils import (
22
23
  translate_airflow_asset,
@@ -28,8 +28,9 @@ def get_provider_info():
28
28
  "name": "OpenLineage Airflow",
29
29
  "description": "`OpenLineage <https://openlineage.io/>`__\n",
30
30
  "state": "ready",
31
- "source-date-epoch": 1730013356,
31
+ "source-date-epoch": 1734535974,
32
32
  "versions": [
33
+ "2.0.0",
33
34
  "1.14.0",
34
35
  "1.13.0",
35
36
  "1.12.2",
@@ -56,9 +57,9 @@ def get_provider_info():
56
57
  "1.0.0",
57
58
  ],
58
59
  "dependencies": [
59
- "apache-airflow>=2.8.0",
60
+ "apache-airflow>=2.9.0",
60
61
  "apache-airflow-providers-common-sql>=1.20.0",
61
- "apache-airflow-providers-common-compat>=1.2.1",
62
+ "apache-airflow-providers-common-compat>=1.3.0",
62
63
  "attrs>=22.2",
63
64
  "openlineage-integration-common>=1.24.2",
64
65
  "openlineage-python>=1.24.2",
@@ -172,6 +173,13 @@ def get_provider_info():
172
173
  "type": "boolean",
173
174
  "version_added": "1.11.0",
174
175
  },
176
+ "spark_inject_parent_job_info": {
177
+ "description": "Automatically inject OpenLineage's parent job (namespace, job name, run id) information into Spark\napplication properties for supported Operators.\n",
178
+ "type": "boolean",
179
+ "default": "False",
180
+ "example": None,
181
+ "version_added": "1.15.0",
182
+ },
175
183
  },
176
184
  }
177
185
  },
@@ -84,7 +84,7 @@ class OpenLineageAdapter(LoggingMixin):
84
84
  "OpenLineage configuration found. Transport type: `%s`",
85
85
  config.get("type", "no type provided"),
86
86
  )
87
- self._client = OpenLineageClient(config=config)
87
+ self._client = OpenLineageClient(config=config) # type: ignore[call-arg]
88
88
  else:
89
89
  self.log.debug(
90
90
  "OpenLineage configuration not found directly in Airflow. "
@@ -115,11 +115,11 @@ class OpenLineageAdapter(LoggingMixin):
115
115
  return yaml.safe_load(config_file)
116
116
 
117
117
  @staticmethod
118
- def build_dag_run_id(dag_id: str, logical_date: datetime) -> str:
118
+ def build_dag_run_id(dag_id: str, logical_date: datetime, clear_number: int) -> str:
119
119
  return str(
120
120
  generate_static_uuid(
121
121
  instant=logical_date,
122
- data=f"{conf.namespace()}.{dag_id}".encode(),
122
+ data=f"{conf.namespace()}.{dag_id}.{clear_number}".encode(),
123
123
  )
124
124
  )
125
125
 
@@ -128,12 +128,13 @@ class OpenLineageAdapter(LoggingMixin):
128
128
  dag_id: str,
129
129
  task_id: str,
130
130
  try_number: int,
131
- execution_date: datetime,
131
+ logical_date: datetime,
132
+ map_index: int,
132
133
  ):
133
134
  return str(
134
135
  generate_static_uuid(
135
- instant=execution_date,
136
- data=f"{conf.namespace()}.{dag_id}.{task_id}.{try_number}".encode(),
136
+ instant=logical_date,
137
+ data=f"{conf.namespace()}.{dag_id}.{task_id}.{try_number}.{map_index}".encode(),
137
138
  )
138
139
  )
139
140
 
@@ -156,10 +157,10 @@ class OpenLineageAdapter(LoggingMixin):
156
157
  stack.enter_context(Stats.timer("ol.emit.attempts"))
157
158
  self._client.emit(redacted_event)
158
159
  self.log.debug("Successfully emitted OpenLineage event of id %s", event.run.runId)
159
- except Exception as e:
160
+ except Exception:
160
161
  Stats.incr("ol.emit.failed")
161
162
  self.log.warning("Failed to emit OpenLineage event of id %s", event.run.runId)
162
- self.log.debug("OpenLineage emission failure: %s", e)
163
+ self.log.debug("OpenLineage emission failure: %s", exc_info=True)
163
164
 
164
165
  return redacted_event
165
166
 
@@ -332,6 +333,7 @@ class OpenLineageAdapter(LoggingMixin):
332
333
  nominal_end_time: str,
333
334
  owners: list[str],
334
335
  run_facets: dict[str, RunFacet],
336
+ clear_number: int,
335
337
  description: str | None = None,
336
338
  job_facets: dict[str, JobFacet] | None = None, # Custom job facets
337
339
  ):
@@ -348,8 +350,7 @@ class OpenLineageAdapter(LoggingMixin):
348
350
  ),
349
351
  run=self._build_run(
350
352
  run_id=self.build_dag_run_id(
351
- dag_id=dag_id,
352
- logical_date=logical_date,
353
+ dag_id=dag_id, logical_date=logical_date, clear_number=clear_number
353
354
  ),
354
355
  job_name=dag_id,
355
356
  nominal_start_time=nominal_start_time,
@@ -373,6 +374,7 @@ class OpenLineageAdapter(LoggingMixin):
373
374
  run_id: str,
374
375
  end_date: datetime,
375
376
  logical_date: datetime,
377
+ clear_number: int,
376
378
  dag_run_state: DagRunState,
377
379
  task_ids: list[str],
378
380
  ):
@@ -383,8 +385,7 @@ class OpenLineageAdapter(LoggingMixin):
383
385
  job=self._build_job(job_name=dag_id, job_type=_JOB_TYPE_DAG),
384
386
  run=Run(
385
387
  runId=self.build_dag_run_id(
386
- dag_id=dag_id,
387
- logical_date=logical_date,
388
+ dag_id=dag_id, logical_date=logical_date, clear_number=clear_number
388
389
  ),
389
390
  facets={
390
391
  **get_airflow_state_run_facet(dag_id, run_id, task_ids, dag_run_state),
@@ -408,6 +409,7 @@ class OpenLineageAdapter(LoggingMixin):
408
409
  run_id: str,
409
410
  end_date: datetime,
410
411
  logical_date: datetime,
412
+ clear_number: int,
411
413
  dag_run_state: DagRunState,
412
414
  task_ids: list[str],
413
415
  msg: str,
@@ -421,6 +423,7 @@ class OpenLineageAdapter(LoggingMixin):
421
423
  runId=self.build_dag_run_id(
422
424
  dag_id=dag_id,
423
425
  logical_date=logical_date,
426
+ clear_number=clear_number,
424
427
  ),
425
428
  facets={
426
429
  "errorMessage": error_message_run.ErrorMessageRunFacet(
@@ -32,7 +32,7 @@ from airflow.providers.openlineage import conf
32
32
  from airflow.providers.openlineage.extractors import ExtractorManager
33
33
  from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter, RunState
34
34
  from airflow.providers.openlineage.utils.utils import (
35
- IS_AIRFLOW_2_10_OR_HIGHER,
35
+ AIRFLOW_V_2_10_PLUS,
36
36
  get_airflow_dag_run_facet,
37
37
  get_airflow_debug_facet,
38
38
  get_airflow_job_facet,
@@ -42,6 +42,7 @@ from airflow.providers.openlineage.utils.utils import (
42
42
  get_user_provided_run_facets,
43
43
  is_operator_disabled,
44
44
  is_selective_lineage_enabled,
45
+ is_ti_rescheduled_already,
45
46
  print_warning,
46
47
  )
47
48
  from airflow.settings import configure_orm
@@ -60,7 +61,7 @@ _openlineage_listener: OpenLineageListener | None = None
60
61
 
61
62
  def _get_try_number_success(val):
62
63
  # todo: remove when min airflow version >= 2.10.0
63
- if IS_AIRFLOW_2_10_OR_HIGHER:
64
+ if AIRFLOW_V_2_10_PLUS:
64
65
  return val.try_number
65
66
  return val.try_number - 1
66
67
 
@@ -134,16 +135,28 @@ class OpenLineageListener:
134
135
  # we return here because Airflow 2.3 needs task from deferred state
135
136
  if task_instance.next_method is not None:
136
137
  return
138
+
139
+ if is_ti_rescheduled_already(task_instance):
140
+ self.log.debug("Skipping this instance of rescheduled task - START event was emitted already")
141
+ return
142
+
137
143
  parent_run_id = self.adapter.build_dag_run_id(
138
144
  dag_id=dag.dag_id,
139
145
  logical_date=dagrun.logical_date,
146
+ clear_number=dagrun.clear_number,
140
147
  )
141
148
 
149
+ if hasattr(task_instance, "logical_date"):
150
+ logical_date = task_instance.logical_date
151
+ else:
152
+ logical_date = task_instance.execution_date
153
+
142
154
  task_uuid = self.adapter.build_task_instance_run_id(
143
155
  dag_id=dag.dag_id,
144
156
  task_id=task.task_id,
145
157
  try_number=task_instance.try_number,
146
- execution_date=task_instance.execution_date,
158
+ logical_date=logical_date,
159
+ map_index=task_instance.map_index,
147
160
  )
148
161
  event_type = RunState.RUNNING.value.lower()
149
162
  operator_name = task.task_type.lower()
@@ -213,16 +226,22 @@ class OpenLineageListener:
213
226
 
214
227
  @print_warning(self.log)
215
228
  def on_success():
216
- parent_run_id = OpenLineageAdapter.build_dag_run_id(
229
+ parent_run_id = self.adapter.build_dag_run_id(
217
230
  dag_id=dag.dag_id,
218
231
  logical_date=dagrun.logical_date,
232
+ clear_number=dagrun.clear_number,
219
233
  )
220
234
 
221
- task_uuid = OpenLineageAdapter.build_task_instance_run_id(
235
+ if hasattr(task_instance, "logical_date"):
236
+ logical_date = task_instance.logical_date
237
+ else:
238
+ logical_date = task_instance.execution_date
239
+ task_uuid = self.adapter.build_task_instance_run_id(
222
240
  dag_id=dag.dag_id,
223
241
  task_id=task.task_id,
224
242
  try_number=_get_try_number_success(task_instance),
225
- execution_date=task_instance.execution_date,
243
+ logical_date=logical_date,
244
+ map_index=task_instance.map_index,
226
245
  )
227
246
  event_type = RunState.COMPLETE.value.lower()
228
247
  operator_name = task.task_type.lower()
@@ -254,7 +273,7 @@ class OpenLineageListener:
254
273
 
255
274
  self._execute(on_success, "on_success", use_fork=True)
256
275
 
257
- if IS_AIRFLOW_2_10_OR_HIGHER:
276
+ if AIRFLOW_V_2_10_PLUS:
258
277
 
259
278
  @hookimpl
260
279
  def on_task_instance_failed(
@@ -312,16 +331,23 @@ class OpenLineageListener:
312
331
 
313
332
  @print_warning(self.log)
314
333
  def on_failure():
315
- parent_run_id = OpenLineageAdapter.build_dag_run_id(
334
+ parent_run_id = self.adapter.build_dag_run_id(
316
335
  dag_id=dag.dag_id,
317
336
  logical_date=dagrun.logical_date,
337
+ clear_number=dagrun.clear_number,
318
338
  )
319
339
 
320
- task_uuid = OpenLineageAdapter.build_task_instance_run_id(
340
+ if hasattr(task_instance, "logical_date"):
341
+ logical_date = task_instance.logical_date
342
+ else:
343
+ logical_date = task_instance.execution_date
344
+
345
+ task_uuid = self.adapter.build_task_instance_run_id(
321
346
  dag_id=dag.dag_id,
322
347
  task_id=task.task_id,
323
348
  try_number=task_instance.try_number,
324
- execution_date=task_instance.execution_date,
349
+ logical_date=logical_date,
350
+ map_index=task_instance.map_index,
325
351
  )
326
352
  event_type = RunState.FAIL.value.lower()
327
353
  operator_name = task.task_type.lower()
@@ -444,6 +470,7 @@ class OpenLineageListener:
444
470
  nominal_start_time=data_interval_start,
445
471
  nominal_end_time=data_interval_end,
446
472
  run_facets=run_facets,
473
+ clear_number=dag_run.clear_number,
447
474
  owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None,
448
475
  description=dag_run.dag.description if dag_run.dag else None,
449
476
  # AirflowJobFacet should be created outside ProcessPoolExecutor that pickles objects,
@@ -469,7 +496,7 @@ class OpenLineageListener:
469
496
  self.log.debug("Executor have not started before `on_dag_run_success`")
470
497
  return
471
498
 
472
- if IS_AIRFLOW_2_10_OR_HIGHER:
499
+ if AIRFLOW_V_2_10_PLUS:
473
500
  task_ids = DagRun._get_partial_task_ids(dag_run.dag)
474
501
  else:
475
502
  task_ids = dag_run.dag.task_ids if dag_run.dag and dag_run.dag.partial else None
@@ -479,6 +506,7 @@ class OpenLineageListener:
479
506
  run_id=dag_run.run_id,
480
507
  end_date=dag_run.end_date,
481
508
  logical_date=dag_run.logical_date,
509
+ clear_number=dag_run.clear_number,
482
510
  task_ids=task_ids,
483
511
  dag_run_state=dag_run.get_state(),
484
512
  )
@@ -501,7 +529,7 @@ class OpenLineageListener:
501
529
  self.log.debug("Executor have not started before `on_dag_run_failed`")
502
530
  return
503
531
 
504
- if IS_AIRFLOW_2_10_OR_HIGHER:
532
+ if AIRFLOW_V_2_10_PLUS:
505
533
  task_ids = DagRun._get_partial_task_ids(dag_run.dag)
506
534
  else:
507
535
  task_ids = dag_run.dag.task_ids if dag_run.dag and dag_run.dag.partial else None
@@ -511,6 +539,7 @@ class OpenLineageListener:
511
539
  run_id=dag_run.run_id,
512
540
  end_date=dag_run.end_date,
513
541
  logical_date=dag_run.logical_date,
542
+ clear_number=dag_run.clear_number,
514
543
  dag_run_state=dag_run.get_state(),
515
544
  task_ids=task_ids,
516
545
  msg=msg,
@@ -58,11 +58,16 @@ def lineage_run_id(task_instance: TaskInstance):
58
58
  For more information take a look at the guide:
59
59
  :ref:`howto/macros:openlineage`
60
60
  """
61
+ if hasattr(task_instance, "logical_date"):
62
+ logical_date = task_instance.logical_date
63
+ else:
64
+ logical_date = task_instance.execution_date
61
65
  return OpenLineageAdapter.build_task_instance_run_id(
62
66
  dag_id=task_instance.dag_id,
63
67
  task_id=task_instance.task_id,
64
68
  try_number=task_instance.try_number,
65
- execution_date=task_instance.execution_date,
69
+ logical_date=logical_date,
70
+ map_index=task_instance.map_index,
66
71
  )
67
72
 
68
73
 
@@ -25,7 +25,7 @@ from airflow.providers.openlineage.plugins.macros import (
25
25
  lineage_parent_id,
26
26
  lineage_run_id,
27
27
  )
28
- from airflow.providers.openlineage.utils.utils import IS_AIRFLOW_2_10_OR_HIGHER
28
+ from airflow.providers.openlineage.version_compat import AIRFLOW_V_2_10_PLUS
29
29
 
30
30
 
31
31
  class OpenLineageProviderPlugin(AirflowPlugin):
@@ -40,7 +40,7 @@ class OpenLineageProviderPlugin(AirflowPlugin):
40
40
  if not conf.is_disabled():
41
41
  macros = [lineage_job_namespace, lineage_job_name, lineage_run_id, lineage_parent_id]
42
42
  listeners = [get_openlineage_listener()]
43
- if IS_AIRFLOW_2_10_OR_HIGHER:
43
+ if AIRFLOW_V_2_10_PLUS:
44
44
  from airflow.lineage.hook import HookLineageReader
45
45
 
46
46
  hook_lineage_readers = [HookLineageReader]
@@ -0,0 +1,86 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ from typing import TYPE_CHECKING
22
+
23
+ from airflow.providers.openlineage.plugins.macros import (
24
+ lineage_job_name,
25
+ lineage_job_namespace,
26
+ lineage_run_id,
27
+ )
28
+
29
+ if TYPE_CHECKING:
30
+ from airflow.utils.context import Context
31
+
32
+ log = logging.getLogger(__name__)
33
+
34
+
35
+ def _get_parent_job_information_as_spark_properties(context: Context) -> dict:
36
+ """
37
+ Retrieve parent job information as Spark properties.
38
+
39
+ Args:
40
+ context: The context containing task instance information.
41
+
42
+ Returns:
43
+ Spark properties with the parent job information.
44
+ """
45
+ ti = context["ti"]
46
+ return {
47
+ "spark.openlineage.parentJobNamespace": lineage_job_namespace(),
48
+ "spark.openlineage.parentJobName": lineage_job_name(ti), # type: ignore[arg-type]
49
+ "spark.openlineage.parentRunId": lineage_run_id(ti), # type: ignore[arg-type]
50
+ }
51
+
52
+
53
+ def _is_parent_job_information_present_in_spark_properties(properties: dict) -> bool:
54
+ """
55
+ Check if any parent job information is present in Spark properties.
56
+
57
+ Args:
58
+ properties: Spark properties.
59
+
60
+ Returns:
61
+ True if parent job information is present, False otherwise.
62
+ """
63
+ return any(str(key).startswith("spark.openlineage.parent") for key in properties)
64
+
65
+
66
+ def inject_parent_job_information_into_spark_properties(properties: dict, context: Context) -> dict:
67
+ """
68
+ Inject parent job information into Spark properties if not already present.
69
+
70
+ Args:
71
+ properties: Spark properties.
72
+ context: The context containing task instance information.
73
+
74
+ Returns:
75
+ Modified Spark properties with OpenLineage parent job information properties injected, if applicable.
76
+ """
77
+ if _is_parent_job_information_present_in_spark_properties(properties):
78
+ log.info(
79
+ "Some OpenLineage properties with parent job information are already present "
80
+ "in Spark properties. Skipping the injection of OpenLineage "
81
+ "parent job information into Spark properties."
82
+ )
83
+ return properties
84
+
85
+ ol_parent_job_properties = _get_parent_job_information_as_spark_properties(context)
86
+ return {**properties, **ol_parent_job_properties}
@@ -20,7 +20,7 @@ import logging
20
20
  from collections import defaultdict
21
21
  from contextlib import closing
22
22
  from enum import IntEnum
23
- from typing import TYPE_CHECKING, Dict, List, Optional
23
+ from typing import TYPE_CHECKING, Optional
24
24
 
25
25
  from attrs import define
26
26
  from openlineage.client.event_v2 import Dataset
@@ -50,7 +50,7 @@ class ColumnIndex(IntEnum):
50
50
  DATABASE = 5
51
51
 
52
52
 
53
- TablesHierarchy = Dict[Optional[str], Dict[Optional[str], List[str]]]
53
+ TablesHierarchy = dict[Optional[str], dict[Optional[str], list[str]]]
54
54
 
55
55
 
56
56
  @define
@@ -23,22 +23,20 @@ import logging
23
23
  from contextlib import suppress
24
24
  from functools import wraps
25
25
  from importlib import metadata
26
- from typing import TYPE_CHECKING, Any, Callable, Iterable
26
+ from typing import TYPE_CHECKING, Any, Callable
27
27
 
28
28
  import attrs
29
- from deprecated import deprecated
30
29
  from openlineage.client.utils import RedactMixin
31
- from packaging.version import Version
30
+ from sqlalchemy import exists
32
31
 
33
32
  from airflow import __version__ as AIRFLOW_VERSION
34
- from airflow.exceptions import (
35
- AirflowProviderDeprecationWarning,
36
- )
37
33
 
38
34
  # TODO: move this maybe to Airflow's logic?
39
- from airflow.models import DAG, BaseOperator, DagRun, MappedOperator
40
- from airflow.providers.common.compat.assets import Asset
41
- from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VERSION, conf
35
+ from airflow.models import DAG, BaseOperator, DagRun, MappedOperator, TaskReschedule
36
+ from airflow.providers.openlineage import (
37
+ __version__ as OPENLINEAGE_PROVIDER_VERSION,
38
+ conf,
39
+ )
42
40
  from airflow.providers.openlineage.plugins.facets import (
43
41
  AirflowDagRunFacet,
44
42
  AirflowDebugRunFacet,
@@ -53,6 +51,8 @@ from airflow.providers.openlineage.utils.selective_enable import (
53
51
  is_dag_lineage_enabled,
54
52
  is_task_lineage_enabled,
55
53
  )
54
+ from airflow.providers.openlineage.version_compat import AIRFLOW_V_2_10_PLUS, AIRFLOW_V_3_0_PLUS
55
+ from airflow.sensors.base import BaseSensorOperator
56
56
  from airflow.serialization.serialized_objects import SerializedBaseOperator
57
57
  from airflow.utils.context import AirflowContextDeprecationWarning
58
58
  from airflow.utils.log.secrets_masker import (
@@ -62,17 +62,27 @@ from airflow.utils.log.secrets_masker import (
62
62
  should_hide_value_for_key,
63
63
  )
64
64
  from airflow.utils.module_loading import import_string
65
+ from airflow.utils.session import NEW_SESSION, provide_session
65
66
 
66
67
  if TYPE_CHECKING:
67
68
  from openlineage.client.event_v2 import Dataset as OpenLineageDataset
68
69
  from openlineage.client.facet_v2 import RunFacet, processing_engine_run
69
70
 
70
71
  from airflow.models import TaskInstance
72
+ from airflow.providers.common.compat.assets import Asset
71
73
  from airflow.utils.state import DagRunState, TaskInstanceState
74
+ else:
75
+ try:
76
+ from airflow.providers.common.compat.assets import Asset
77
+ except ImportError:
78
+ if AIRFLOW_V_3_0_PLUS:
79
+ from airflow.sdk.definitions.asset import Asset
80
+ else:
81
+ # dataset is renamed to asset since Airflow 3.0
82
+ from airflow.datasets import Dataset as Asset
72
83
 
73
84
  log = logging.getLogger(__name__)
74
85
  _NOMINAL_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
75
- IS_AIRFLOW_2_10_OR_HIGHER = Version(Version(AIRFLOW_VERSION).base_version) >= Version("2.10.0")
76
86
 
77
87
 
78
88
  def try_import_from_string(string: str) -> Any:
@@ -167,6 +177,28 @@ def is_selective_lineage_enabled(obj: DAG | BaseOperator | MappedOperator) -> bo
167
177
  raise TypeError("is_selective_lineage_enabled can only be used on DAG or Operator objects")
168
178
 
169
179
 
180
+ @provide_session
181
+ def is_ti_rescheduled_already(ti: TaskInstance, session=NEW_SESSION):
182
+ if not isinstance(ti.task, BaseSensorOperator):
183
+ return False
184
+
185
+ if not ti.task.reschedule:
186
+ return False
187
+
188
+ return (
189
+ session.query(
190
+ exists().where(
191
+ TaskReschedule.dag_id == ti.dag_id,
192
+ TaskReschedule.task_id == ti.task_id,
193
+ TaskReschedule.run_id == ti.run_id,
194
+ TaskReschedule.map_index == ti.map_index,
195
+ TaskReschedule.try_number == ti.try_number,
196
+ )
197
+ ).scalar()
198
+ is True
199
+ )
200
+
201
+
170
202
  class InfoJsonEncodable(dict):
171
203
  """
172
204
  Airflow objects might not be json-encodable overall.
@@ -200,6 +232,7 @@ class InfoJsonEncodable(dict):
200
232
  self,
201
233
  **{field: InfoJsonEncodable._cast_basic_types(getattr(self, field)) for field in self._fields},
202
234
  )
235
+ del self.obj
203
236
 
204
237
  @staticmethod
205
238
  def _cast_basic_types(value):
@@ -660,11 +693,11 @@ def print_warning(log):
660
693
  def wrapper(*args, **kwargs):
661
694
  try:
662
695
  return f(*args, **kwargs)
663
- except Exception as e:
696
+ except Exception:
664
697
  log.warning(
665
- "Note: exception below is being caught: it's printed for visibility. However OpenLineage events aren't being emitted. If you see that, task has completed successfully despite not getting OL events."
698
+ "OpenLineage event emission failed. Exception below is being caught: it's printed for visibility. This has no impact on actual task execution status.",
699
+ exc_info=True,
666
700
  )
667
- log.warning(e)
668
701
 
669
702
  return wrapper
670
703
 
@@ -676,23 +709,9 @@ def get_filtered_unknown_operator_keys(operator: BaseOperator) -> dict:
676
709
  return {attr: value for attr, value in operator.__dict__.items() if attr not in not_required_keys}
677
710
 
678
711
 
679
- @deprecated(
680
- reason=(
681
- "`airflow.providers.openlineage.utils.utils.normalize_sql` "
682
- "has been deprecated and will be removed in future"
683
- ),
684
- category=AirflowProviderDeprecationWarning,
685
- )
686
- def normalize_sql(sql: str | Iterable[str]):
687
- if isinstance(sql, str):
688
- sql = [stmt for stmt in sql.split(";") if stmt != ""]
689
- sql = [obj for stmt in sql for obj in stmt.split(";") if obj != ""]
690
- return ";\n".join(sql)
691
-
692
-
693
712
  def should_use_external_connection(hook) -> bool:
694
713
  # If we're at Airflow 2.10, the execution is process-isolated, so we can safely run those again.
695
- if not IS_AIRFLOW_2_10_OR_HIGHER:
714
+ if not AIRFLOW_V_2_10_PLUS:
696
715
  return hook.__class__.__name__ not in [
697
716
  "SnowflakeHook",
698
717
  "SnowflakeSqlApiHook",
@@ -708,9 +727,9 @@ def translate_airflow_asset(asset: Asset, lineage_context) -> OpenLineageDataset
708
727
  This function returns None if no URI normalizer is defined, no asset converter is found or
709
728
  some core Airflow changes are missing and ImportError is raised.
710
729
  """
711
- try:
712
- from airflow.assets import _get_normalized_scheme
713
- except ModuleNotFoundError:
730
+ if AIRFLOW_V_3_0_PLUS:
731
+ from airflow.sdk.definitions.asset import _get_normalized_scheme
732
+ else:
714
733
  try:
715
734
  from airflow.datasets import _get_normalized_scheme # type: ignore[no-redef, attr-defined]
716
735
  except ImportError:
@@ -0,0 +1,36 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ #
18
+ # NOTE! THIS FILE IS COPIED MANUALLY IN OTHER PROVIDERS DELIBERATELY TO AVOID ADDING UNNECESSARY
19
+ # DEPENDENCIES BETWEEN PROVIDERS. IF YOU WANT TO ADD CONDITIONAL CODE IN YOUR PROVIDER THAT DEPENDS
20
+ # ON AIRFLOW VERSION, PLEASE COPY THIS FILE TO THE ROOT PACKAGE OF YOUR PROVIDER AND IMPORT
21
+ # THOSE CONSTANTS FROM IT RATHER THAN IMPORTING THEM FROM ANOTHER PROVIDER OR TEST CODE
22
+ #
23
+ from __future__ import annotations
24
+
25
+
26
+ def get_base_airflow_version_tuple() -> tuple[int, int, int]:
27
+ from packaging.version import Version
28
+
29
+ from airflow import __version__
30
+
31
+ airflow_version = Version(__version__)
32
+ return airflow_version.major, airflow_version.minor, airflow_version.micro
33
+
34
+
35
+ AIRFLOW_V_2_10_PLUS = get_base_airflow_version_tuple() >= (2, 10, 0)
36
+ AIRFLOW_V_3_0_PLUS = get_base_airflow_version_tuple() >= (3, 0, 0)
@@ -27,7 +27,7 @@ build-backend = "flit_core.buildapi"
27
27
 
28
28
  [project]
29
29
  name = "apache-airflow-providers-openlineage"
30
- version = "1.14.0.rc1"
30
+ version = "2.0.0.rc1"
31
31
  description = "Provider package apache-airflow-providers-openlineage for Apache Airflow"
32
32
  readme = "README.rst"
33
33
  authors = [
@@ -54,34 +54,27 @@ classifiers = [
54
54
  ]
55
55
  requires-python = "~=3.9"
56
56
  dependencies = [
57
- "apache-airflow-providers-common-compat>=1.2.1rc0",
57
+ "apache-airflow-providers-common-compat>=1.3.0rc0",
58
58
  "apache-airflow-providers-common-sql>=1.20.0rc0",
59
- "apache-airflow>=2.8.0rc0",
59
+ "apache-airflow>=2.9.0rc0",
60
60
  "attrs>=22.2",
61
61
  "openlineage-integration-common>=1.24.2",
62
62
  "openlineage-python>=1.24.2",
63
63
  ]
64
64
 
65
65
  [project.urls]
66
- "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.14.0"
67
- "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.14.0/changelog.html"
66
+ "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.0.0"
67
+ "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.0.0/changelog.html"
68
68
  "Bug Tracker" = "https://github.com/apache/airflow/issues"
69
69
  "Source Code" = "https://github.com/apache/airflow"
70
70
  "Slack Chat" = "https://s.apache.org/airflow-slack"
71
- "Twitter" = "https://twitter.com/ApacheAirflow"
71
+ "Twitter" = "https://x.com/ApacheAirflow"
72
72
  "YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/"
73
73
 
74
74
  [project.entry-points."apache_airflow_provider"]
75
75
  provider_info = "airflow.providers.openlineage.get_provider_info:get_provider_info"
76
76
  [project.entry-points."airflow.plugins"]
77
77
  openlineage = "airflow.providers.openlineage.plugins.openlineage:OpenLineageProviderPlugin"
78
- [project.optional-dependencies]
79
- "common.compat" = [
80
- "apache-airflow-providers-common-compat",
81
- ]
82
- "common.sql" = [
83
- "apache-airflow-providers-common-sql",
84
- ]
85
78
 
86
79
  [tool.flit.module]
87
80
  name = "airflow.providers.openlineage"