apache-airflow-providers-openlineage 1.11.0__tar.gz → 1.11.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/PKG-INFO +13 -15
  2. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/README.rst +10 -11
  3. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/extractors/manager.py +3 -35
  4. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/get_provider_info.py +0 -1
  5. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/plugins/openlineage.py +0 -5
  6. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/utils/utils.py +40 -4
  7. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/pyproject.toml +3 -4
  8. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/LICENSE +0 -0
  9. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/__init__.py +0 -0
  10. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/conf.py +0 -0
  11. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/extractors/__init__.py +0 -0
  12. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/extractors/base.py +0 -0
  13. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/extractors/bash.py +0 -0
  14. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/extractors/python.py +0 -0
  15. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/facets/AirflowDagRunFacet.json +0 -0
  16. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/facets/AirflowDebugRunFacet.json +0 -0
  17. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/facets/AirflowJobFacet.json +0 -0
  18. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/facets/AirflowRunFacet.json +0 -0
  19. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/facets/AirflowStateRunFacet.json +0 -0
  20. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/facets/__init__.py +0 -0
  21. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/plugins/__init__.py +0 -0
  22. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/plugins/adapter.py +0 -0
  23. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/plugins/facets.py +0 -0
  24. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/plugins/listener.py +0 -0
  25. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/plugins/macros.py +0 -0
  26. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/sqlparser.py +0 -0
  27. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/utils/__init__.py +0 -0
  28. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/utils/selective_enable.py +0 -0
  29. {apache_airflow_providers_openlineage-1.11.0 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/utils/sql.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-openlineage
3
- Version: 1.11.0
3
+ Version: 1.11.0rc1
4
4
  Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
5
5
  Keywords: airflow-provider,openlineage,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -21,9 +21,8 @@ Classifier: Programming Language :: Python :: 3.10
21
21
  Classifier: Programming Language :: Python :: 3.11
22
22
  Classifier: Programming Language :: Python :: 3.12
23
23
  Classifier: Topic :: System :: Monitoring
24
- Requires-Dist: apache-airflow-providers-common-compat>=1.2.0
25
- Requires-Dist: apache-airflow-providers-common-sql>=1.6.0
26
- Requires-Dist: apache-airflow>=2.8.0
24
+ Requires-Dist: apache-airflow-providers-common-sql>=1.6.0rc0
25
+ Requires-Dist: apache-airflow>=2.8.0rc0
27
26
  Requires-Dist: attrs>=22.2
28
27
  Requires-Dist: openlineage-integration-common>=1.16.0
29
28
  Requires-Dist: openlineage-python>=1.16.0
@@ -81,7 +80,7 @@ Provides-Extra: common.sql
81
80
 
82
81
  Package ``apache-airflow-providers-openlineage``
83
82
 
84
- Release: ``1.11.0``
83
+ Release: ``1.11.0.rc1``
85
84
 
86
85
 
87
86
  `OpenLineage <https://openlineage.io/>`__
@@ -108,16 +107,15 @@ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
108
107
  Requirements
109
108
  ------------
110
109
 
111
- ========================================== ==================
112
- PIP package Version required
113
- ========================================== ==================
114
- ``apache-airflow`` ``>=2.8.0``
115
- ``apache-airflow-providers-common-sql`` ``>=1.6.0``
116
- ``apache-airflow-providers-common-compat`` ``>=1.2.0``
117
- ``attrs`` ``>=22.2``
118
- ``openlineage-integration-common`` ``>=1.16.0``
119
- ``openlineage-python`` ``>=1.16.0``
120
- ========================================== ==================
110
+ ======================================= ==================
111
+ PIP package Version required
112
+ ======================================= ==================
113
+ ``apache-airflow`` ``>=2.8.0``
114
+ ``apache-airflow-providers-common-sql`` ``>=1.6.0``
115
+ ``attrs`` ``>=22.2``
116
+ ``openlineage-integration-common`` ``>=1.16.0``
117
+ ``openlineage-python`` ``>=1.16.0``
118
+ ======================================= ==================
121
119
 
122
120
  Cross provider package dependencies
123
121
  -----------------------------------
@@ -42,7 +42,7 @@
42
42
 
43
43
  Package ``apache-airflow-providers-openlineage``
44
44
 
45
- Release: ``1.11.0``
45
+ Release: ``1.11.0.rc1``
46
46
 
47
47
 
48
48
  `OpenLineage <https://openlineage.io/>`__
@@ -69,16 +69,15 @@ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
69
69
  Requirements
70
70
  ------------
71
71
 
72
- ========================================== ==================
73
- PIP package Version required
74
- ========================================== ==================
75
- ``apache-airflow`` ``>=2.8.0``
76
- ``apache-airflow-providers-common-sql`` ``>=1.6.0``
77
- ``apache-airflow-providers-common-compat`` ``>=1.2.0``
78
- ``attrs`` ``>=22.2``
79
- ``openlineage-integration-common`` ``>=1.16.0``
80
- ``openlineage-python`` ``>=1.16.0``
81
- ========================================== ==================
72
+ ======================================= ==================
73
+ PIP package Version required
74
+ ======================================= ==================
75
+ ``apache-airflow`` ``>=2.8.0``
76
+ ``apache-airflow-providers-common-sql`` ``>=1.6.0``
77
+ ``attrs`` ``>=22.2``
78
+ ``openlineage-integration-common`` ``>=1.16.0``
79
+ ``openlineage-python`` ``>=1.16.0``
80
+ ======================================= ==================
82
81
 
83
82
  Cross provider package dependencies
84
83
  -----------------------------------
@@ -25,7 +25,6 @@ from airflow.providers.openlineage.extractors.bash import BashExtractor
25
25
  from airflow.providers.openlineage.extractors.python import PythonExtractor
26
26
  from airflow.providers.openlineage.utils.utils import (
27
27
  get_unknown_source_attribute_run_facet,
28
- translate_airflow_dataset,
29
28
  try_import_from_string,
30
29
  )
31
30
  from airflow.utils.log.logging_mixin import LoggingMixin
@@ -91,6 +90,7 @@ class ExtractorManager(LoggingMixin):
91
90
  f"task_id={task.task_id} "
92
91
  f"airflow_run_id={dagrun.run_id} "
93
92
  )
93
+
94
94
  if extractor:
95
95
  # Extracting advanced metadata is only possible when extractor for particular operator
96
96
  # is defined. Without it, we can't extract any input or output data.
@@ -105,22 +105,14 @@ class ExtractorManager(LoggingMixin):
105
105
  task_metadata = self.validate_task_metadata(task_metadata)
106
106
  if task_metadata:
107
107
  if (not task_metadata.inputs) and (not task_metadata.outputs):
108
- if (hook_lineage := self.get_hook_lineage()) is not None:
109
- inputs, outputs = hook_lineage
110
- task_metadata.inputs = inputs
111
- task_metadata.outputs = outputs
112
- else:
113
- self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
108
+ self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
109
+
114
110
  return task_metadata
115
111
 
116
112
  except Exception as e:
117
113
  self.log.warning(
118
114
  "Failed to extract metadata using found extractor %s - %s %s", extractor, e, task_info
119
115
  )
120
- elif (hook_lineage := self.get_hook_lineage()) is not None:
121
- inputs, outputs = hook_lineage
122
- task_metadata = OperatorLineage(inputs=inputs, outputs=outputs)
123
- return task_metadata
124
116
  else:
125
117
  self.log.debug("Unable to find an extractor %s", task_info)
126
118
 
@@ -176,30 +168,6 @@ class ExtractorManager(LoggingMixin):
176
168
  if d:
177
169
  task_metadata.outputs.append(d)
178
170
 
179
- def get_hook_lineage(self) -> tuple[list[Dataset], list[Dataset]] | None:
180
- try:
181
- from airflow.lineage.hook import get_hook_lineage_collector
182
- except ImportError:
183
- return None
184
-
185
- if not get_hook_lineage_collector().has_collected:
186
- return None
187
-
188
- return (
189
- [
190
- dataset
191
- for dataset_info in get_hook_lineage_collector().collected_datasets.inputs
192
- if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
193
- is not None
194
- ],
195
- [
196
- dataset
197
- for dataset_info in get_hook_lineage_collector().collected_datasets.outputs
198
- if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
199
- is not None
200
- ],
201
- )
202
-
203
171
  @staticmethod
204
172
  def convert_to_ol_dataset_from_object_storage_uri(uri: str) -> Dataset | None:
205
173
  from urllib.parse import urlparse
@@ -53,7 +53,6 @@ def get_provider_info():
53
53
  "dependencies": [
54
54
  "apache-airflow>=2.8.0",
55
55
  "apache-airflow-providers-common-sql>=1.6.0",
56
- "apache-airflow-providers-common-compat>=1.2.0",
57
56
  "attrs>=22.2",
58
57
  "openlineage-integration-common>=1.16.0",
59
58
  "openlineage-python>=1.16.0",
@@ -25,7 +25,6 @@ from airflow.providers.openlineage.plugins.macros import (
25
25
  lineage_parent_id,
26
26
  lineage_run_id,
27
27
  )
28
- from airflow.providers.openlineage.utils.utils import IS_AIRFLOW_2_10_OR_HIGHER
29
28
 
30
29
 
31
30
  class OpenLineageProviderPlugin(AirflowPlugin):
@@ -40,10 +39,6 @@ class OpenLineageProviderPlugin(AirflowPlugin):
40
39
  if not conf.is_disabled():
41
40
  macros = [lineage_job_namespace, lineage_job_name, lineage_run_id, lineage_parent_id]
42
41
  listeners = [get_openlineage_listener()]
43
- if IS_AIRFLOW_2_10_OR_HIGHER:
44
- from airflow.lineage.hook import HookLineageReader
45
-
46
- hook_lineage_readers = [HookLineageReader]
47
42
  else:
48
43
  macros = []
49
44
  listeners = []
@@ -33,7 +33,7 @@ from packaging.version import Version
33
33
  from airflow import __version__ as AIRFLOW_VERSION
34
34
  from airflow.datasets import Dataset
35
35
  from airflow.exceptions import AirflowProviderDeprecationWarning # TODO: move this maybe to Airflow's logic?
36
- from airflow.models import DAG, BaseOperator, MappedOperator
36
+ from airflow.models import DAG, BaseOperator, MappedOperator, Operator
37
37
  from airflow.providers.openlineage import conf
38
38
  from airflow.providers.openlineage.plugins.facets import (
39
39
  AirflowDagRunFacet,
@@ -423,7 +423,7 @@ def get_airflow_job_facet(dag_run: DagRun) -> dict[str, AirflowJobFacet]:
423
423
  return {}
424
424
  return {
425
425
  "airflow": AirflowJobFacet(
426
- taskTree={}, # caused OOM errors, to be removed, see #41587
426
+ taskTree=_get_parsed_dag_tree(dag_run.dag),
427
427
  taskGroups=_get_task_groups_details(dag_run.dag),
428
428
  tasks=_get_tasks_details(dag_run.dag),
429
429
  )
@@ -439,6 +439,43 @@ def get_airflow_state_run_facet(dag_run: DagRun) -> dict[str, AirflowStateRunFac
439
439
  }
440
440
 
441
441
 
442
+ def _get_parsed_dag_tree(dag: DAG) -> dict:
443
+ """
444
+ Get DAG's tasks hierarchy representation.
445
+
446
+ While the task dependencies are defined as following:
447
+ task >> [task_2, task_4] >> task_7
448
+ task_3 >> task_5
449
+ task_6 # has no dependencies, it's a root and a leaf
450
+
451
+ The result of this function will look like:
452
+ {
453
+ "task": {
454
+ "task_2": {
455
+ "task_7": {}
456
+ },
457
+ "task_4": {
458
+ "task_7": {}
459
+ }
460
+ },
461
+ "task_3": {
462
+ "task_5": {}
463
+ },
464
+ "task_6": {}
465
+ }
466
+ """
467
+
468
+ def get_downstream(task: Operator, current_dict: dict):
469
+ current_dict[task.task_id] = {}
470
+ for tmp_task in sorted(task.downstream_list, key=lambda x: x.task_id):
471
+ get_downstream(tmp_task, current_dict[task.task_id])
472
+
473
+ task_dict: dict = {}
474
+ for t in sorted(dag.roots, key=lambda x: x.task_id):
475
+ get_downstream(t, task_dict)
476
+ return task_dict
477
+
478
+
442
479
  def _get_tasks_details(dag: DAG) -> dict:
443
480
  tasks = {
444
481
  single_task.task_id: {
@@ -450,9 +487,8 @@ def _get_tasks_details(dag: DAG) -> dict:
450
487
  "ui_label": single_task.label,
451
488
  "is_setup": single_task.is_setup,
452
489
  "is_teardown": single_task.is_teardown,
453
- "downstream_task_ids": sorted(single_task.downstream_task_ids),
454
490
  }
455
- for single_task in sorted(dag.tasks, key=lambda x: x.task_id)
491
+ for single_task in dag.tasks
456
492
  }
457
493
 
458
494
  return tasks
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
28
28
 
29
29
  [project]
30
30
  name = "apache-airflow-providers-openlineage"
31
- version = "1.11.0"
31
+ version = "1.11.0.rc1"
32
32
  description = "Provider package apache-airflow-providers-openlineage for Apache Airflow"
33
33
  readme = "README.rst"
34
34
  authors = [
@@ -56,9 +56,8 @@ classifiers = [
56
56
  ]
57
57
  requires-python = "~=3.8"
58
58
  dependencies = [
59
- "apache-airflow-providers-common-compat>=1.2.0",
60
- "apache-airflow-providers-common-sql>=1.6.0",
61
- "apache-airflow>=2.8.0",
59
+ "apache-airflow-providers-common-sql>=1.6.0rc0",
60
+ "apache-airflow>=2.8.0rc0",
62
61
  "attrs>=22.2",
63
62
  "openlineage-integration-common>=1.16.0",
64
63
  "openlineage-python>=1.16.0",