apache-airflow-providers-openlineage 1.11.0rc1__tar.gz → 1.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.

Files changed (29) hide show
  1. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/PKG-INFO +21 -19
  2. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/README.rst +13 -12
  3. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/__init__.py +1 -1
  4. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/extractors/manager.py +35 -3
  5. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/get_provider_info.py +5 -3
  6. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/plugins/adapter.py +49 -27
  7. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/plugins/listener.py +106 -49
  8. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/plugins/openlineage.py +5 -0
  9. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/utils/utils.py +23 -48
  10. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/pyproject.toml +8 -7
  11. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/LICENSE +0 -0
  12. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/conf.py +0 -0
  13. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/extractors/__init__.py +0 -0
  14. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/extractors/base.py +0 -0
  15. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/extractors/bash.py +0 -0
  16. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/extractors/python.py +0 -0
  17. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/facets/AirflowDagRunFacet.json +0 -0
  18. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/facets/AirflowDebugRunFacet.json +0 -0
  19. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/facets/AirflowJobFacet.json +0 -0
  20. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/facets/AirflowRunFacet.json +0 -0
  21. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/facets/AirflowStateRunFacet.json +0 -0
  22. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/facets/__init__.py +0 -0
  23. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/plugins/__init__.py +0 -0
  24. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/plugins/facets.py +0 -0
  25. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/plugins/macros.py +0 -0
  26. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/sqlparser.py +0 -0
  27. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/utils/__init__.py +0 -0
  28. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/utils/selective_enable.py +0 -0
  29. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.12.0}/airflow/providers/openlineage/utils/sql.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-openlineage
3
- Version: 1.11.0rc1
3
+ Version: 1.12.0
4
4
  Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
5
5
  Keywords: airflow-provider,openlineage,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -21,15 +21,16 @@ Classifier: Programming Language :: Python :: 3.10
21
21
  Classifier: Programming Language :: Python :: 3.11
22
22
  Classifier: Programming Language :: Python :: 3.12
23
23
  Classifier: Topic :: System :: Monitoring
24
- Requires-Dist: apache-airflow-providers-common-sql>=1.6.0rc0
25
- Requires-Dist: apache-airflow>=2.8.0rc0
24
+ Requires-Dist: apache-airflow-providers-common-compat>=1.2.0
25
+ Requires-Dist: apache-airflow-providers-common-sql>=1.6.0
26
+ Requires-Dist: apache-airflow>=2.8.0
26
27
  Requires-Dist: attrs>=22.2
27
- Requires-Dist: openlineage-integration-common>=1.16.0
28
- Requires-Dist: openlineage-python>=1.16.0
28
+ Requires-Dist: openlineage-integration-common>=1.22.0
29
+ Requires-Dist: openlineage-python>=1.22.0
29
30
  Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
30
31
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
31
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html
32
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0
32
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.12.0/changelog.html
33
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.12.0
33
34
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
34
35
  Project-URL: Source Code, https://github.com/apache/airflow
35
36
  Project-URL: Twitter, https://twitter.com/ApacheAirflow
@@ -80,7 +81,7 @@ Provides-Extra: common.sql
80
81
 
81
82
  Package ``apache-airflow-providers-openlineage``
82
83
 
83
- Release: ``1.11.0.rc1``
84
+ Release: ``1.12.0``
84
85
 
85
86
 
86
87
  `OpenLineage <https://openlineage.io/>`__
@@ -93,7 +94,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
93
94
  are in ``airflow.providers.openlineage`` python package.
94
95
 
95
96
  You can find package information and changelog for the provider
96
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/>`_.
97
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.12.0/>`_.
97
98
 
98
99
  Installation
99
100
  ------------
@@ -107,15 +108,16 @@ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
107
108
  Requirements
108
109
  ------------
109
110
 
110
- ======================================= ==================
111
- PIP package Version required
112
- ======================================= ==================
113
- ``apache-airflow`` ``>=2.8.0``
114
- ``apache-airflow-providers-common-sql`` ``>=1.6.0``
115
- ``attrs`` ``>=22.2``
116
- ``openlineage-integration-common`` ``>=1.16.0``
117
- ``openlineage-python`` ``>=1.16.0``
118
- ======================================= ==================
111
+ ========================================== ==================
112
+ PIP package Version required
113
+ ========================================== ==================
114
+ ``apache-airflow`` ``>=2.8.0``
115
+ ``apache-airflow-providers-common-sql`` ``>=1.6.0``
116
+ ``apache-airflow-providers-common-compat`` ``>=1.2.0``
117
+ ``attrs`` ``>=22.2``
118
+ ``openlineage-integration-common`` ``>=1.22.0``
119
+ ``openlineage-python`` ``>=1.22.0``
120
+ ========================================== ==================
119
121
 
120
122
  Cross provider package dependencies
121
123
  -----------------------------------
@@ -137,4 +139,4 @@ Dependent package
137
139
  ============================================================================================================ ==============
138
140
 
139
141
  The changelog for the provider package can be found in the
140
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html>`_.
142
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.12.0/changelog.html>`_.
@@ -42,7 +42,7 @@
42
42
 
43
43
  Package ``apache-airflow-providers-openlineage``
44
44
 
45
- Release: ``1.11.0.rc1``
45
+ Release: ``1.12.0``
46
46
 
47
47
 
48
48
  `OpenLineage <https://openlineage.io/>`__
@@ -55,7 +55,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
55
55
  are in ``airflow.providers.openlineage`` python package.
56
56
 
57
57
  You can find package information and changelog for the provider
58
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/>`_.
58
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.12.0/>`_.
59
59
 
60
60
  Installation
61
61
  ------------
@@ -69,15 +69,16 @@ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
69
69
  Requirements
70
70
  ------------
71
71
 
72
- ======================================= ==================
73
- PIP package Version required
74
- ======================================= ==================
75
- ``apache-airflow`` ``>=2.8.0``
76
- ``apache-airflow-providers-common-sql`` ``>=1.6.0``
77
- ``attrs`` ``>=22.2``
78
- ``openlineage-integration-common`` ``>=1.16.0``
79
- ``openlineage-python`` ``>=1.16.0``
80
- ======================================= ==================
72
+ ========================================== ==================
73
+ PIP package Version required
74
+ ========================================== ==================
75
+ ``apache-airflow`` ``>=2.8.0``
76
+ ``apache-airflow-providers-common-sql`` ``>=1.6.0``
77
+ ``apache-airflow-providers-common-compat`` ``>=1.2.0``
78
+ ``attrs`` ``>=22.2``
79
+ ``openlineage-integration-common`` ``>=1.22.0``
80
+ ``openlineage-python`` ``>=1.22.0``
81
+ ========================================== ==================
81
82
 
82
83
  Cross provider package dependencies
83
84
  -----------------------------------
@@ -99,4 +100,4 @@ Dependent package
99
100
  ============================================================================================================ ==============
100
101
 
101
102
  The changelog for the provider package can be found in the
102
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html>`_.
103
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.12.0/changelog.html>`_.
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "1.11.0"
32
+ __version__ = "1.12.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
35
  "2.8.0"
@@ -25,6 +25,7 @@ from airflow.providers.openlineage.extractors.bash import BashExtractor
25
25
  from airflow.providers.openlineage.extractors.python import PythonExtractor
26
26
  from airflow.providers.openlineage.utils.utils import (
27
27
  get_unknown_source_attribute_run_facet,
28
+ translate_airflow_dataset,
28
29
  try_import_from_string,
29
30
  )
30
31
  from airflow.utils.log.logging_mixin import LoggingMixin
@@ -90,7 +91,6 @@ class ExtractorManager(LoggingMixin):
90
91
  f"task_id={task.task_id} "
91
92
  f"airflow_run_id={dagrun.run_id} "
92
93
  )
93
-
94
94
  if extractor:
95
95
  # Extracting advanced metadata is only possible when extractor for particular operator
96
96
  # is defined. Without it, we can't extract any input or output data.
@@ -105,14 +105,22 @@ class ExtractorManager(LoggingMixin):
105
105
  task_metadata = self.validate_task_metadata(task_metadata)
106
106
  if task_metadata:
107
107
  if (not task_metadata.inputs) and (not task_metadata.outputs):
108
- self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
109
-
108
+ if (hook_lineage := self.get_hook_lineage()) is not None:
109
+ inputs, outputs = hook_lineage
110
+ task_metadata.inputs = inputs
111
+ task_metadata.outputs = outputs
112
+ else:
113
+ self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
110
114
  return task_metadata
111
115
 
112
116
  except Exception as e:
113
117
  self.log.warning(
114
118
  "Failed to extract metadata using found extractor %s - %s %s", extractor, e, task_info
115
119
  )
120
+ elif (hook_lineage := self.get_hook_lineage()) is not None:
121
+ inputs, outputs = hook_lineage
122
+ task_metadata = OperatorLineage(inputs=inputs, outputs=outputs)
123
+ return task_metadata
116
124
  else:
117
125
  self.log.debug("Unable to find an extractor %s", task_info)
118
126
 
@@ -168,6 +176,30 @@ class ExtractorManager(LoggingMixin):
168
176
  if d:
169
177
  task_metadata.outputs.append(d)
170
178
 
179
+ def get_hook_lineage(self) -> tuple[list[Dataset], list[Dataset]] | None:
180
+ try:
181
+ from airflow.lineage.hook import get_hook_lineage_collector
182
+ except ImportError:
183
+ return None
184
+
185
+ if not get_hook_lineage_collector().has_collected:
186
+ return None
187
+
188
+ return (
189
+ [
190
+ dataset
191
+ for dataset_info in get_hook_lineage_collector().collected_datasets.inputs
192
+ if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
193
+ is not None
194
+ ],
195
+ [
196
+ dataset
197
+ for dataset_info in get_hook_lineage_collector().collected_datasets.outputs
198
+ if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
199
+ is not None
200
+ ],
201
+ )
202
+
171
203
  @staticmethod
172
204
  def convert_to_ol_dataset_from_object_storage_uri(uri: str) -> Dataset | None:
173
205
  from urllib.parse import urlparse
@@ -28,8 +28,9 @@ def get_provider_info():
28
28
  "name": "OpenLineage Airflow",
29
29
  "description": "`OpenLineage <https://openlineage.io/>`__\n",
30
30
  "state": "ready",
31
- "source-date-epoch": 1723970474,
31
+ "source-date-epoch": 1726861079,
32
32
  "versions": [
33
+ "1.12.0",
33
34
  "1.11.0",
34
35
  "1.10.0",
35
36
  "1.9.1",
@@ -53,9 +54,10 @@ def get_provider_info():
53
54
  "dependencies": [
54
55
  "apache-airflow>=2.8.0",
55
56
  "apache-airflow-providers-common-sql>=1.6.0",
57
+ "apache-airflow-providers-common-compat>=1.2.0",
56
58
  "attrs>=22.2",
57
- "openlineage-integration-common>=1.16.0",
58
- "openlineage-python>=1.16.0",
59
+ "openlineage-integration-common>=1.22.0",
60
+ "openlineage-python>=1.22.0",
59
61
  ],
60
62
  "integrations": [
61
63
  {
@@ -40,7 +40,6 @@ from openlineage.client.uuid import generate_static_uuid
40
40
  from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VERSION, conf
41
41
  from airflow.providers.openlineage.utils.utils import (
42
42
  OpenLineageRedactor,
43
- get_airflow_dag_run_facet,
44
43
  get_airflow_debug_facet,
45
44
  get_airflow_state_run_facet,
46
45
  )
@@ -50,9 +49,9 @@ from airflow.utils.log.logging_mixin import LoggingMixin
50
49
  if TYPE_CHECKING:
51
50
  from datetime import datetime
52
51
 
53
- from airflow.models.dagrun import DagRun
54
52
  from airflow.providers.openlineage.extractors import OperatorLineage
55
53
  from airflow.utils.log.secrets_masker import SecretsMasker
54
+ from airflow.utils.state import DagRunState
56
55
 
57
56
  _PRODUCER = f"https://github.com/apache/airflow/tree/providers-openlineage/{OPENLINEAGE_PROVIDER_VERSION}"
58
57
 
@@ -118,10 +117,10 @@ class OpenLineageAdapter(LoggingMixin):
118
117
  return yaml.safe_load(config_file)
119
118
 
120
119
  @staticmethod
121
- def build_dag_run_id(dag_id: str, execution_date: datetime) -> str:
120
+ def build_dag_run_id(dag_id: str, logical_date: datetime) -> str:
122
121
  return str(
123
122
  generate_static_uuid(
124
- instant=execution_date,
123
+ instant=logical_date,
125
124
  data=f"{conf.namespace()}.{dag_id}".encode(),
126
125
  )
127
126
  )
@@ -336,33 +335,36 @@ class OpenLineageAdapter(LoggingMixin):
336
335
 
337
336
  def dag_started(
338
337
  self,
339
- dag_run: DagRun,
340
- msg: str,
338
+ dag_id: str,
339
+ logical_date: datetime,
340
+ start_date: datetime,
341
341
  nominal_start_time: str,
342
342
  nominal_end_time: str,
343
+ owners: list[str],
344
+ run_facets: dict[str, RunFacet],
345
+ description: str | None = None,
343
346
  job_facets: dict[str, JobFacet] | None = None, # Custom job facets
344
347
  ):
345
348
  try:
346
- owner = [x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None
347
349
  event = RunEvent(
348
350
  eventType=RunState.START,
349
- eventTime=dag_run.start_date.isoformat(),
351
+ eventTime=start_date.isoformat(),
350
352
  job=self._build_job(
351
- job_name=dag_run.dag_id,
353
+ job_name=dag_id,
352
354
  job_type=_JOB_TYPE_DAG,
353
- job_description=dag_run.dag.description if dag_run.dag else None,
354
- owners=owner,
355
+ job_description=description,
356
+ owners=owners,
355
357
  job_facets=job_facets,
356
358
  ),
357
359
  run=self._build_run(
358
360
  run_id=self.build_dag_run_id(
359
- dag_id=dag_run.dag_id,
360
- execution_date=dag_run.execution_date,
361
+ dag_id=dag_id,
362
+ logical_date=logical_date,
361
363
  ),
362
- job_name=dag_run.dag_id,
364
+ job_name=dag_id,
363
365
  nominal_start_time=nominal_start_time,
364
366
  nominal_end_time=nominal_end_time,
365
- run_facets={**get_airflow_dag_run_facet(dag_run), **get_airflow_debug_facet()},
367
+ run_facets={**run_facets, **get_airflow_debug_facet()},
366
368
  ),
367
369
  inputs=[],
368
370
  outputs=[],
@@ -375,18 +377,29 @@ class OpenLineageAdapter(LoggingMixin):
375
377
  # This part cannot be wrapped to deduplicate code, otherwise the method cannot be pickled in multiprocessing.
376
378
  self.log.warning("Failed to emit DAG started event: \n %s", traceback.format_exc())
377
379
 
378
- def dag_success(self, dag_run: DagRun, msg: str):
380
+ def dag_success(
381
+ self,
382
+ dag_id: str,
383
+ run_id: str,
384
+ end_date: datetime,
385
+ logical_date: datetime,
386
+ dag_run_state: DagRunState,
387
+ task_ids: list[str],
388
+ ):
379
389
  try:
380
390
  event = RunEvent(
381
391
  eventType=RunState.COMPLETE,
382
- eventTime=dag_run.end_date.isoformat(),
383
- job=self._build_job(job_name=dag_run.dag_id, job_type=_JOB_TYPE_DAG),
392
+ eventTime=end_date.isoformat(),
393
+ job=self._build_job(job_name=dag_id, job_type=_JOB_TYPE_DAG),
384
394
  run=Run(
385
395
  runId=self.build_dag_run_id(
386
- dag_id=dag_run.dag_id,
387
- execution_date=dag_run.execution_date,
396
+ dag_id=dag_id,
397
+ logical_date=logical_date,
388
398
  ),
389
- facets={**get_airflow_state_run_facet(dag_run), **get_airflow_debug_facet()},
399
+ facets={
400
+ **get_airflow_state_run_facet(dag_id, run_id, task_ids, dag_run_state),
401
+ **get_airflow_debug_facet(),
402
+ },
390
403
  ),
391
404
  inputs=[],
392
405
  outputs=[],
@@ -399,22 +412,31 @@ class OpenLineageAdapter(LoggingMixin):
399
412
  # This part cannot be wrapped to deduplicate code, otherwise the method cannot be pickled in multiprocessing.
400
413
  self.log.warning("Failed to emit DAG success event: \n %s", traceback.format_exc())
401
414
 
402
- def dag_failed(self, dag_run: DagRun, msg: str):
415
+ def dag_failed(
416
+ self,
417
+ dag_id: str,
418
+ run_id: str,
419
+ end_date: datetime,
420
+ logical_date: datetime,
421
+ dag_run_state: DagRunState,
422
+ task_ids: list[str],
423
+ msg: str,
424
+ ):
403
425
  try:
404
426
  event = RunEvent(
405
427
  eventType=RunState.FAIL,
406
- eventTime=dag_run.end_date.isoformat(),
407
- job=self._build_job(job_name=dag_run.dag_id, job_type=_JOB_TYPE_DAG),
428
+ eventTime=end_date.isoformat(),
429
+ job=self._build_job(job_name=dag_id, job_type=_JOB_TYPE_DAG),
408
430
  run=Run(
409
431
  runId=self.build_dag_run_id(
410
- dag_id=dag_run.dag_id,
411
- execution_date=dag_run.execution_date,
432
+ dag_id=dag_id,
433
+ logical_date=logical_date,
412
434
  ),
413
435
  facets={
414
436
  "errorMessage": error_message_run.ErrorMessageRunFacet(
415
437
  message=msg, programmingLanguage="python"
416
438
  ),
417
- **get_airflow_state_run_facet(dag_run),
439
+ **get_airflow_state_run_facet(dag_id, run_id, task_ids, dag_run_state),
418
440
  **get_airflow_debug_facet(),
419
441
  },
420
442
  ),
@@ -27,11 +27,13 @@ from setproctitle import getproctitle, setproctitle
27
27
 
28
28
  from airflow import settings
29
29
  from airflow.listeners import hookimpl
30
+ from airflow.models import DagRun
30
31
  from airflow.providers.openlineage import conf
31
32
  from airflow.providers.openlineage.extractors import ExtractorManager
32
33
  from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter, RunState
33
34
  from airflow.providers.openlineage.utils.utils import (
34
35
  IS_AIRFLOW_2_10_OR_HIGHER,
36
+ get_airflow_dag_run_facet,
35
37
  get_airflow_debug_facet,
36
38
  get_airflow_job_facet,
37
39
  get_airflow_mapped_task_facet,
@@ -51,7 +53,7 @@ from airflow.utils.timeout import timeout
51
53
  if TYPE_CHECKING:
52
54
  from sqlalchemy.orm import Session
53
55
 
54
- from airflow.models import DagRun, TaskInstance
56
+ from airflow.models import TaskInstance
55
57
 
56
58
  _openlineage_listener: OpenLineageListener | None = None
57
59
 
@@ -134,7 +136,7 @@ class OpenLineageListener:
134
136
  return
135
137
  parent_run_id = self.adapter.build_dag_run_id(
136
138
  dag_id=dag.dag_id,
137
- execution_date=dagrun.execution_date,
139
+ logical_date=dagrun.logical_date,
138
140
  )
139
141
 
140
142
  task_uuid = self.adapter.build_task_instance_run_id(
@@ -213,7 +215,7 @@ class OpenLineageListener:
213
215
  def on_success():
214
216
  parent_run_id = OpenLineageAdapter.build_dag_run_id(
215
217
  dag_id=dag.dag_id,
216
- execution_date=dagrun.execution_date,
218
+ logical_date=dagrun.logical_date,
217
219
  )
218
220
 
219
221
  task_uuid = OpenLineageAdapter.build_task_instance_run_id(
@@ -312,7 +314,7 @@ class OpenLineageListener:
312
314
  def on_failure():
313
315
  parent_run_id = OpenLineageAdapter.build_dag_run_id(
314
316
  dag_id=dag.dag_id,
315
- execution_date=dagrun.execution_date,
317
+ logical_date=dagrun.logical_date,
316
318
  )
317
319
 
318
320
  task_uuid = OpenLineageAdapter.build_task_instance_run_id(
@@ -413,65 +415,120 @@ class OpenLineageListener:
413
415
 
414
416
  @hookimpl
415
417
  def on_dag_run_running(self, dag_run: DagRun, msg: str) -> None:
416
- if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
417
- self.log.debug(
418
- "Skipping OpenLineage event emission for DAG `%s` "
419
- "due to lack of explicit lineage enablement for DAG while "
420
- "[openlineage] selective_enable is on.",
421
- dag_run.dag_id,
418
+ try:
419
+ if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
420
+ self.log.debug(
421
+ "Skipping OpenLineage event emission for DAG `%s` "
422
+ "due to lack of explicit lineage enablement for DAG while "
423
+ "[openlineage] selective_enable is on.",
424
+ dag_run.dag_id,
425
+ )
426
+ return
427
+
428
+ if not self.executor:
429
+ self.log.debug("Executor have not started before `on_dag_run_running`")
430
+ return
431
+
432
+ data_interval_start = (
433
+ dag_run.data_interval_start.isoformat() if dag_run.data_interval_start else None
422
434
  )
423
- return
435
+ data_interval_end = dag_run.data_interval_end.isoformat() if dag_run.data_interval_end else None
424
436
 
425
- if not self.executor:
426
- self.log.debug("Executor have not started before `on_dag_run_running`")
427
- return
437
+ run_facets = {**get_airflow_dag_run_facet(dag_run)}
428
438
 
429
- data_interval_start = dag_run.data_interval_start.isoformat() if dag_run.data_interval_start else None
430
- data_interval_end = dag_run.data_interval_end.isoformat() if dag_run.data_interval_end else None
431
- self.executor.submit(
432
- self.adapter.dag_started,
433
- dag_run=dag_run,
434
- msg=msg,
435
- nominal_start_time=data_interval_start,
436
- nominal_end_time=data_interval_end,
437
- # AirflowJobFacet should be created outside ProcessPoolExecutor that pickles objects,
438
- # as it causes lack of some TaskGroup attributes and crashes event emission.
439
- job_facets=get_airflow_job_facet(dag_run=dag_run),
440
- )
439
+ self.submit_callable(
440
+ self.adapter.dag_started,
441
+ dag_id=dag_run.dag_id,
442
+ run_id=dag_run.run_id,
443
+ logical_date=dag_run.logical_date,
444
+ start_date=dag_run.start_date,
445
+ nominal_start_time=data_interval_start,
446
+ nominal_end_time=data_interval_end,
447
+ run_facets=run_facets,
448
+ owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None,
449
+ description=dag_run.dag.description if dag_run.dag else None,
450
+ # AirflowJobFacet should be created outside ProcessPoolExecutor that pickles objects,
451
+ # as it causes lack of some TaskGroup attributes and crashes event emission.
452
+ job_facets=get_airflow_job_facet(dag_run=dag_run),
453
+ )
454
+ except BaseException as e:
455
+ self.log.warning("OpenLineage received exception in method on_dag_run_running", exc_info=e)
441
456
 
442
457
  @hookimpl
443
458
  def on_dag_run_success(self, dag_run: DagRun, msg: str) -> None:
444
- if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
445
- self.log.debug(
446
- "Skipping OpenLineage event emission for DAG `%s` "
447
- "due to lack of explicit lineage enablement for DAG while "
448
- "[openlineage] selective_enable is on.",
449
- dag_run.dag_id,
450
- )
451
- return
459
+ try:
460
+ if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
461
+ self.log.debug(
462
+ "Skipping OpenLineage event emission for DAG `%s` "
463
+ "due to lack of explicit lineage enablement for DAG while "
464
+ "[openlineage] selective_enable is on.",
465
+ dag_run.dag_id,
466
+ )
467
+ return
452
468
 
453
- if not self.executor:
454
- self.log.debug("Executor have not started before `on_dag_run_success`")
455
- return
469
+ if not self.executor:
470
+ self.log.debug("Executor have not started before `on_dag_run_success`")
471
+ return
456
472
 
457
- self.executor.submit(self.adapter.dag_success, dag_run=dag_run, msg=msg)
473
+ if IS_AIRFLOW_2_10_OR_HIGHER:
474
+ task_ids = DagRun._get_partial_task_ids(dag_run.dag)
475
+ else:
476
+ task_ids = dag_run.dag.task_ids if dag_run.dag and dag_run.dag.partial else None
477
+ self.submit_callable(
478
+ self.adapter.dag_success,
479
+ dag_id=dag_run.dag_id,
480
+ run_id=dag_run.run_id,
481
+ end_date=dag_run.end_date,
482
+ logical_date=dag_run.logical_date,
483
+ task_ids=task_ids,
484
+ dag_run_state=dag_run.get_state(),
485
+ )
486
+ except BaseException as e:
487
+ self.log.warning("OpenLineage received exception in method on_dag_run_success", exc_info=e)
458
488
 
459
489
  @hookimpl
460
490
  def on_dag_run_failed(self, dag_run: DagRun, msg: str) -> None:
461
- if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
462
- self.log.debug(
463
- "Skipping OpenLineage event emission for DAG `%s` "
464
- "due to lack of explicit lineage enablement for DAG while "
465
- "[openlineage] selective_enable is on.",
466
- dag_run.dag_id,
491
+ try:
492
+ if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
493
+ self.log.debug(
494
+ "Skipping OpenLineage event emission for DAG `%s` "
495
+ "due to lack of explicit lineage enablement for DAG while "
496
+ "[openlineage] selective_enable is on.",
497
+ dag_run.dag_id,
498
+ )
499
+ return
500
+
501
+ if not self.executor:
502
+ self.log.debug("Executor have not started before `on_dag_run_failed`")
503
+ return
504
+
505
+ if IS_AIRFLOW_2_10_OR_HIGHER:
506
+ task_ids = DagRun._get_partial_task_ids(dag_run.dag)
507
+ else:
508
+ task_ids = dag_run.dag.task_ids if dag_run.dag and dag_run.dag.partial else None
509
+ self.submit_callable(
510
+ self.adapter.dag_failed,
511
+ dag_id=dag_run.dag_id,
512
+ run_id=dag_run.run_id,
513
+ end_date=dag_run.end_date,
514
+ logical_date=dag_run.logical_date,
515
+ dag_run_state=dag_run.get_state(),
516
+ task_ids=task_ids,
517
+ msg=msg,
467
518
  )
468
- return
519
+ except BaseException as e:
520
+ self.log.warning("OpenLineage received exception in method on_dag_run_failed", exc_info=e)
469
521
 
470
- if not self.executor:
471
- self.log.debug("Executor have not started before `on_dag_run_failed`")
472
- return
522
+ def submit_callable(self, callable, *args, **kwargs):
523
+ fut = self.executor.submit(callable, *args, **kwargs)
524
+ fut.add_done_callback(self.log_submit_error)
525
+ return fut
473
526
 
474
- self.executor.submit(self.adapter.dag_failed, dag_run=dag_run, msg=msg)
527
+ def log_submit_error(self, fut):
528
+ if fut.exception():
529
+ self.log.warning("Failed to submit method to executor", exc_info=fut.exception())
530
+ else:
531
+ self.log.debug("Successfully submitted method to executor")
475
532
 
476
533
 
477
534
  def get_openlineage_listener() -> OpenLineageListener:
@@ -25,6 +25,7 @@ from airflow.providers.openlineage.plugins.macros import (
25
25
  lineage_parent_id,
26
26
  lineage_run_id,
27
27
  )
28
+ from airflow.providers.openlineage.utils.utils import IS_AIRFLOW_2_10_OR_HIGHER
28
29
 
29
30
 
30
31
  class OpenLineageProviderPlugin(AirflowPlugin):
@@ -39,6 +40,10 @@ class OpenLineageProviderPlugin(AirflowPlugin):
39
40
  if not conf.is_disabled():
40
41
  macros = [lineage_job_namespace, lineage_job_name, lineage_run_id, lineage_parent_id]
41
42
  listeners = [get_openlineage_listener()]
43
+ if IS_AIRFLOW_2_10_OR_HIGHER:
44
+ from airflow.lineage.hook import HookLineageReader
45
+
46
+ hook_lineage_readers = [HookLineageReader]
42
47
  else:
43
48
  macros = []
44
49
  listeners = []
@@ -33,7 +33,7 @@ from packaging.version import Version
33
33
  from airflow import __version__ as AIRFLOW_VERSION
34
34
  from airflow.datasets import Dataset
35
35
  from airflow.exceptions import AirflowProviderDeprecationWarning # TODO: move this maybe to Airflow's logic?
36
- from airflow.models import DAG, BaseOperator, MappedOperator, Operator
36
+ from airflow.models import DAG, BaseOperator, DagRun, MappedOperator
37
37
  from airflow.providers.openlineage import conf
38
38
  from airflow.providers.openlineage.plugins.facets import (
39
39
  AirflowDagRunFacet,
@@ -58,9 +58,8 @@ if TYPE_CHECKING:
58
58
  from openlineage.client.event_v2 import Dataset as OpenLineageDataset
59
59
  from openlineage.client.facet_v2 import RunFacet
60
60
 
61
- from airflow.models import DagRun, TaskInstance
62
- from airflow.utils.state import TaskInstanceState
63
-
61
+ from airflow.models import TaskInstance
62
+ from airflow.utils.state import DagRunState, TaskInstanceState
64
63
 
65
64
  log = logging.getLogger(__name__)
66
65
  _NOMINAL_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
@@ -199,7 +198,7 @@ class InfoJsonEncodable(dict):
199
198
  return value.isoformat()
200
199
  if isinstance(value, datetime.timedelta):
201
200
  return f"{value.total_seconds()} seconds"
202
- if isinstance(value, (set, tuple)):
201
+ if isinstance(value, (set, list, tuple)):
203
202
  return str(list(value))
204
203
  return value
205
204
 
@@ -244,7 +243,16 @@ class InfoJsonEncodable(dict):
244
243
  class DagInfo(InfoJsonEncodable):
245
244
  """Defines encoding DAG object to JSON."""
246
245
 
247
- includes = ["dag_id", "description", "fileloc", "owner", "schedule_interval", "start_date", "tags"]
246
+ includes = [
247
+ "dag_id",
248
+ "description",
249
+ "fileloc",
250
+ "owner",
251
+ "schedule_interval", # For Airflow 2.
252
+ "timetable_summary", # For Airflow 3.
253
+ "start_date",
254
+ "tags",
255
+ ]
248
256
  casts = {"timetable": lambda dag: dag.timetable.serialize() if getattr(dag, "timetable", None) else None}
249
257
  renames = {"_dag_id": "dag_id"}
250
258
 
@@ -423,59 +431,25 @@ def get_airflow_job_facet(dag_run: DagRun) -> dict[str, AirflowJobFacet]:
423
431
  return {}
424
432
  return {
425
433
  "airflow": AirflowJobFacet(
426
- taskTree=_get_parsed_dag_tree(dag_run.dag),
434
+ taskTree={}, # caused OOM errors, to be removed, see #41587
427
435
  taskGroups=_get_task_groups_details(dag_run.dag),
428
436
  tasks=_get_tasks_details(dag_run.dag),
429
437
  )
430
438
  }
431
439
 
432
440
 
433
- def get_airflow_state_run_facet(dag_run: DagRun) -> dict[str, AirflowStateRunFacet]:
441
+ def get_airflow_state_run_facet(
442
+ dag_id: str, run_id: str, task_ids: list[str], dag_run_state: DagRunState
443
+ ) -> dict[str, AirflowStateRunFacet]:
444
+ tis = DagRun.fetch_task_instances(dag_id=dag_id, run_id=run_id, task_ids=task_ids)
434
445
  return {
435
446
  "airflowState": AirflowStateRunFacet(
436
- dagRunState=dag_run.get_state(),
437
- tasksState={ti.task_id: ti.state for ti in dag_run.get_task_instances()},
447
+ dagRunState=dag_run_state,
448
+ tasksState={ti.task_id: ti.state for ti in tis},
438
449
  )
439
450
  }
440
451
 
441
452
 
442
- def _get_parsed_dag_tree(dag: DAG) -> dict:
443
- """
444
- Get DAG's tasks hierarchy representation.
445
-
446
- While the task dependencies are defined as following:
447
- task >> [task_2, task_4] >> task_7
448
- task_3 >> task_5
449
- task_6 # has no dependencies, it's a root and a leaf
450
-
451
- The result of this function will look like:
452
- {
453
- "task": {
454
- "task_2": {
455
- "task_7": {}
456
- },
457
- "task_4": {
458
- "task_7": {}
459
- }
460
- },
461
- "task_3": {
462
- "task_5": {}
463
- },
464
- "task_6": {}
465
- }
466
- """
467
-
468
- def get_downstream(task: Operator, current_dict: dict):
469
- current_dict[task.task_id] = {}
470
- for tmp_task in sorted(task.downstream_list, key=lambda x: x.task_id):
471
- get_downstream(tmp_task, current_dict[task.task_id])
472
-
473
- task_dict: dict = {}
474
- for t in sorted(dag.roots, key=lambda x: x.task_id):
475
- get_downstream(t, task_dict)
476
- return task_dict
477
-
478
-
479
453
  def _get_tasks_details(dag: DAG) -> dict:
480
454
  tasks = {
481
455
  single_task.task_id: {
@@ -487,8 +461,9 @@ def _get_tasks_details(dag: DAG) -> dict:
487
461
  "ui_label": single_task.label,
488
462
  "is_setup": single_task.is_setup,
489
463
  "is_teardown": single_task.is_teardown,
464
+ "downstream_task_ids": sorted(single_task.downstream_task_ids),
490
465
  }
491
- for single_task in dag.tasks
466
+ for single_task in sorted(dag.tasks, key=lambda x: x.task_id)
492
467
  }
493
468
 
494
469
  return tasks
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
28
28
 
29
29
  [project]
30
30
  name = "apache-airflow-providers-openlineage"
31
- version = "1.11.0.rc1"
31
+ version = "1.12.0"
32
32
  description = "Provider package apache-airflow-providers-openlineage for Apache Airflow"
33
33
  readme = "README.rst"
34
34
  authors = [
@@ -56,16 +56,17 @@ classifiers = [
56
56
  ]
57
57
  requires-python = "~=3.8"
58
58
  dependencies = [
59
- "apache-airflow-providers-common-sql>=1.6.0rc0",
60
- "apache-airflow>=2.8.0rc0",
59
+ "apache-airflow-providers-common-compat>=1.2.0",
60
+ "apache-airflow-providers-common-sql>=1.6.0",
61
+ "apache-airflow>=2.8.0",
61
62
  "attrs>=22.2",
62
- "openlineage-integration-common>=1.16.0",
63
- "openlineage-python>=1.16.0",
63
+ "openlineage-integration-common>=1.22.0",
64
+ "openlineage-python>=1.22.0",
64
65
  ]
65
66
 
66
67
  [project.urls]
67
- "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0"
68
- "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html"
68
+ "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.12.0"
69
+ "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.12.0/changelog.html"
69
70
  "Bug Tracker" = "https://github.com/apache/airflow/issues"
70
71
  "Source Code" = "https://github.com/apache/airflow"
71
72
  "Slack Chat" = "https://s.apache.org/airflow-slack"