apache-airflow-providers-openlineage 1.11.0rc1__tar.gz → 1.11.0rc2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/PKG-INFO +13 -11
  2. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/README.rst +11 -10
  3. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/extractors/manager.py +35 -3
  4. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/get_provider_info.py +1 -0
  5. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/plugins/openlineage.py +5 -0
  6. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/utils/utils.py +4 -40
  7. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/pyproject.toml +2 -1
  8. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/LICENSE +0 -0
  9. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/__init__.py +0 -0
  10. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/conf.py +0 -0
  11. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/extractors/__init__.py +0 -0
  12. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/extractors/base.py +0 -0
  13. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/extractors/bash.py +0 -0
  14. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/extractors/python.py +0 -0
  15. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/facets/AirflowDagRunFacet.json +0 -0
  16. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/facets/AirflowDebugRunFacet.json +0 -0
  17. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/facets/AirflowJobFacet.json +0 -0
  18. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/facets/AirflowRunFacet.json +0 -0
  19. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/facets/AirflowStateRunFacet.json +0 -0
  20. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/facets/__init__.py +0 -0
  21. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/plugins/__init__.py +0 -0
  22. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/plugins/adapter.py +0 -0
  23. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/plugins/facets.py +0 -0
  24. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/plugins/listener.py +0 -0
  25. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/plugins/macros.py +0 -0
  26. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/sqlparser.py +0 -0
  27. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/utils/__init__.py +0 -0
  28. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/utils/selective_enable.py +0 -0
  29. {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/utils/sql.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-openlineage
3
- Version: 1.11.0rc1
3
+ Version: 1.11.0rc2
4
4
  Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
5
5
  Keywords: airflow-provider,openlineage,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -21,6 +21,7 @@ Classifier: Programming Language :: Python :: 3.10
21
21
  Classifier: Programming Language :: Python :: 3.11
22
22
  Classifier: Programming Language :: Python :: 3.12
23
23
  Classifier: Topic :: System :: Monitoring
24
+ Requires-Dist: apache-airflow-providers-common-compat>=1.2.0rc0
24
25
  Requires-Dist: apache-airflow-providers-common-sql>=1.6.0rc0
25
26
  Requires-Dist: apache-airflow>=2.8.0rc0
26
27
  Requires-Dist: attrs>=22.2
@@ -80,7 +81,7 @@ Provides-Extra: common.sql
80
81
 
81
82
  Package ``apache-airflow-providers-openlineage``
82
83
 
83
- Release: ``1.11.0.rc1``
84
+ Release: ``1.11.0.rc2``
84
85
 
85
86
 
86
87
  `OpenLineage <https://openlineage.io/>`__
@@ -107,15 +108,16 @@ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
107
108
  Requirements
108
109
  ------------
109
110
 
110
- ======================================= ==================
111
- PIP package Version required
112
- ======================================= ==================
113
- ``apache-airflow`` ``>=2.8.0``
114
- ``apache-airflow-providers-common-sql`` ``>=1.6.0``
115
- ``attrs`` ``>=22.2``
116
- ``openlineage-integration-common`` ``>=1.16.0``
117
- ``openlineage-python`` ``>=1.16.0``
118
- ======================================= ==================
111
+ ========================================== ==================
112
+ PIP package Version required
113
+ ========================================== ==================
114
+ ``apache-airflow`` ``>=2.8.0``
115
+ ``apache-airflow-providers-common-sql`` ``>=1.6.0``
116
+ ``apache-airflow-providers-common-compat`` ``>=1.2.0``
117
+ ``attrs`` ``>=22.2``
118
+ ``openlineage-integration-common`` ``>=1.16.0``
119
+ ``openlineage-python`` ``>=1.16.0``
120
+ ========================================== ==================
119
121
 
120
122
  Cross provider package dependencies
121
123
  -----------------------------------
@@ -42,7 +42,7 @@
42
42
 
43
43
  Package ``apache-airflow-providers-openlineage``
44
44
 
45
- Release: ``1.11.0.rc1``
45
+ Release: ``1.11.0.rc2``
46
46
 
47
47
 
48
48
  `OpenLineage <https://openlineage.io/>`__
@@ -69,15 +69,16 @@ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
69
69
  Requirements
70
70
  ------------
71
71
 
72
- ======================================= ==================
73
- PIP package Version required
74
- ======================================= ==================
75
- ``apache-airflow`` ``>=2.8.0``
76
- ``apache-airflow-providers-common-sql`` ``>=1.6.0``
77
- ``attrs`` ``>=22.2``
78
- ``openlineage-integration-common`` ``>=1.16.0``
79
- ``openlineage-python`` ``>=1.16.0``
80
- ======================================= ==================
72
+ ========================================== ==================
73
+ PIP package Version required
74
+ ========================================== ==================
75
+ ``apache-airflow`` ``>=2.8.0``
76
+ ``apache-airflow-providers-common-sql`` ``>=1.6.0``
77
+ ``apache-airflow-providers-common-compat`` ``>=1.2.0``
78
+ ``attrs`` ``>=22.2``
79
+ ``openlineage-integration-common`` ``>=1.16.0``
80
+ ``openlineage-python`` ``>=1.16.0``
81
+ ========================================== ==================
81
82
 
82
83
  Cross provider package dependencies
83
84
  -----------------------------------
@@ -25,6 +25,7 @@ from airflow.providers.openlineage.extractors.bash import BashExtractor
25
25
  from airflow.providers.openlineage.extractors.python import PythonExtractor
26
26
  from airflow.providers.openlineage.utils.utils import (
27
27
  get_unknown_source_attribute_run_facet,
28
+ translate_airflow_dataset,
28
29
  try_import_from_string,
29
30
  )
30
31
  from airflow.utils.log.logging_mixin import LoggingMixin
@@ -90,7 +91,6 @@ class ExtractorManager(LoggingMixin):
90
91
  f"task_id={task.task_id} "
91
92
  f"airflow_run_id={dagrun.run_id} "
92
93
  )
93
-
94
94
  if extractor:
95
95
  # Extracting advanced metadata is only possible when extractor for particular operator
96
96
  # is defined. Without it, we can't extract any input or output data.
@@ -105,14 +105,22 @@ class ExtractorManager(LoggingMixin):
105
105
  task_metadata = self.validate_task_metadata(task_metadata)
106
106
  if task_metadata:
107
107
  if (not task_metadata.inputs) and (not task_metadata.outputs):
108
- self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
109
-
108
+ if (hook_lineage := self.get_hook_lineage()) is not None:
109
+ inputs, outputs = hook_lineage
110
+ task_metadata.inputs = inputs
111
+ task_metadata.outputs = outputs
112
+ else:
113
+ self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
110
114
  return task_metadata
111
115
 
112
116
  except Exception as e:
113
117
  self.log.warning(
114
118
  "Failed to extract metadata using found extractor %s - %s %s", extractor, e, task_info
115
119
  )
120
+ elif (hook_lineage := self.get_hook_lineage()) is not None:
121
+ inputs, outputs = hook_lineage
122
+ task_metadata = OperatorLineage(inputs=inputs, outputs=outputs)
123
+ return task_metadata
116
124
  else:
117
125
  self.log.debug("Unable to find an extractor %s", task_info)
118
126
 
@@ -168,6 +176,30 @@ class ExtractorManager(LoggingMixin):
168
176
  if d:
169
177
  task_metadata.outputs.append(d)
170
178
 
179
+ def get_hook_lineage(self) -> tuple[list[Dataset], list[Dataset]] | None:
180
+ try:
181
+ from airflow.lineage.hook import get_hook_lineage_collector
182
+ except ImportError:
183
+ return None
184
+
185
+ if not get_hook_lineage_collector().has_collected:
186
+ return None
187
+
188
+ return (
189
+ [
190
+ dataset
191
+ for dataset_info in get_hook_lineage_collector().collected_datasets.inputs
192
+ if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
193
+ is not None
194
+ ],
195
+ [
196
+ dataset
197
+ for dataset_info in get_hook_lineage_collector().collected_datasets.outputs
198
+ if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
199
+ is not None
200
+ ],
201
+ )
202
+
171
203
  @staticmethod
172
204
  def convert_to_ol_dataset_from_object_storage_uri(uri: str) -> Dataset | None:
173
205
  from urllib.parse import urlparse
@@ -53,6 +53,7 @@ def get_provider_info():
53
53
  "dependencies": [
54
54
  "apache-airflow>=2.8.0",
55
55
  "apache-airflow-providers-common-sql>=1.6.0",
56
+ "apache-airflow-providers-common-compat>=1.2.0",
56
57
  "attrs>=22.2",
57
58
  "openlineage-integration-common>=1.16.0",
58
59
  "openlineage-python>=1.16.0",
@@ -25,6 +25,7 @@ from airflow.providers.openlineage.plugins.macros import (
25
25
  lineage_parent_id,
26
26
  lineage_run_id,
27
27
  )
28
+ from airflow.providers.openlineage.utils.utils import IS_AIRFLOW_2_10_OR_HIGHER
28
29
 
29
30
 
30
31
  class OpenLineageProviderPlugin(AirflowPlugin):
@@ -39,6 +40,10 @@ class OpenLineageProviderPlugin(AirflowPlugin):
39
40
  if not conf.is_disabled():
40
41
  macros = [lineage_job_namespace, lineage_job_name, lineage_run_id, lineage_parent_id]
41
42
  listeners = [get_openlineage_listener()]
43
+ if IS_AIRFLOW_2_10_OR_HIGHER:
44
+ from airflow.lineage.hook import HookLineageReader
45
+
46
+ hook_lineage_readers = [HookLineageReader]
42
47
  else:
43
48
  macros = []
44
49
  listeners = []
@@ -33,7 +33,7 @@ from packaging.version import Version
33
33
  from airflow import __version__ as AIRFLOW_VERSION
34
34
  from airflow.datasets import Dataset
35
35
  from airflow.exceptions import AirflowProviderDeprecationWarning # TODO: move this maybe to Airflow's logic?
36
- from airflow.models import DAG, BaseOperator, MappedOperator, Operator
36
+ from airflow.models import DAG, BaseOperator, MappedOperator
37
37
  from airflow.providers.openlineage import conf
38
38
  from airflow.providers.openlineage.plugins.facets import (
39
39
  AirflowDagRunFacet,
@@ -423,7 +423,7 @@ def get_airflow_job_facet(dag_run: DagRun) -> dict[str, AirflowJobFacet]:
423
423
  return {}
424
424
  return {
425
425
  "airflow": AirflowJobFacet(
426
- taskTree=_get_parsed_dag_tree(dag_run.dag),
426
+ taskTree={}, # caused OOM errors, to be removed, see #41587
427
427
  taskGroups=_get_task_groups_details(dag_run.dag),
428
428
  tasks=_get_tasks_details(dag_run.dag),
429
429
  )
@@ -439,43 +439,6 @@ def get_airflow_state_run_facet(dag_run: DagRun) -> dict[str, AirflowStateRunFac
439
439
  }
440
440
 
441
441
 
442
- def _get_parsed_dag_tree(dag: DAG) -> dict:
443
- """
444
- Get DAG's tasks hierarchy representation.
445
-
446
- While the task dependencies are defined as following:
447
- task >> [task_2, task_4] >> task_7
448
- task_3 >> task_5
449
- task_6 # has no dependencies, it's a root and a leaf
450
-
451
- The result of this function will look like:
452
- {
453
- "task": {
454
- "task_2": {
455
- "task_7": {}
456
- },
457
- "task_4": {
458
- "task_7": {}
459
- }
460
- },
461
- "task_3": {
462
- "task_5": {}
463
- },
464
- "task_6": {}
465
- }
466
- """
467
-
468
- def get_downstream(task: Operator, current_dict: dict):
469
- current_dict[task.task_id] = {}
470
- for tmp_task in sorted(task.downstream_list, key=lambda x: x.task_id):
471
- get_downstream(tmp_task, current_dict[task.task_id])
472
-
473
- task_dict: dict = {}
474
- for t in sorted(dag.roots, key=lambda x: x.task_id):
475
- get_downstream(t, task_dict)
476
- return task_dict
477
-
478
-
479
442
  def _get_tasks_details(dag: DAG) -> dict:
480
443
  tasks = {
481
444
  single_task.task_id: {
@@ -487,8 +450,9 @@ def _get_tasks_details(dag: DAG) -> dict:
487
450
  "ui_label": single_task.label,
488
451
  "is_setup": single_task.is_setup,
489
452
  "is_teardown": single_task.is_teardown,
453
+ "downstream_task_ids": sorted(single_task.downstream_task_ids),
490
454
  }
491
- for single_task in dag.tasks
455
+ for single_task in sorted(dag.tasks, key=lambda x: x.task_id)
492
456
  }
493
457
 
494
458
  return tasks
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
28
28
 
29
29
  [project]
30
30
  name = "apache-airflow-providers-openlineage"
31
- version = "1.11.0.rc1"
31
+ version = "1.11.0.rc2"
32
32
  description = "Provider package apache-airflow-providers-openlineage for Apache Airflow"
33
33
  readme = "README.rst"
34
34
  authors = [
@@ -56,6 +56,7 @@ classifiers = [
56
56
  ]
57
57
  requires-python = "~=3.8"
58
58
  dependencies = [
59
+ "apache-airflow-providers-common-compat>=1.2.0rc0",
59
60
  "apache-airflow-providers-common-sql>=1.6.0rc0",
60
61
  "apache-airflow>=2.8.0rc0",
61
62
  "attrs>=22.2",