apache-airflow-providers-openlineage 1.11.0rc1__tar.gz → 1.11.0rc2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/PKG-INFO +13 -11
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/README.rst +11 -10
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/extractors/manager.py +35 -3
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/get_provider_info.py +1 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/plugins/openlineage.py +5 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/utils/utils.py +4 -40
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/pyproject.toml +2 -1
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/LICENSE +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/conf.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/extractors/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/extractors/base.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/extractors/bash.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/extractors/python.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/facets/AirflowDagRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/facets/AirflowDebugRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/facets/AirflowJobFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/facets/AirflowRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/facets/AirflowStateRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/facets/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/plugins/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/plugins/adapter.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/plugins/facets.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/plugins/listener.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/plugins/macros.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/sqlparser.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/utils/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/utils/selective_enable.py +0 -0
- {apache_airflow_providers_openlineage-1.11.0rc1 → apache_airflow_providers_openlineage-1.11.0rc2}/airflow/providers/openlineage/utils/sql.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 1.11.
|
|
3
|
+
Version: 1.11.0rc2
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -21,6 +21,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.11
|
|
22
22
|
Classifier: Programming Language :: Python :: 3.12
|
|
23
23
|
Classifier: Topic :: System :: Monitoring
|
|
24
|
+
Requires-Dist: apache-airflow-providers-common-compat>=1.2.0rc0
|
|
24
25
|
Requires-Dist: apache-airflow-providers-common-sql>=1.6.0rc0
|
|
25
26
|
Requires-Dist: apache-airflow>=2.8.0rc0
|
|
26
27
|
Requires-Dist: attrs>=22.2
|
|
@@ -80,7 +81,7 @@ Provides-Extra: common.sql
|
|
|
80
81
|
|
|
81
82
|
Package ``apache-airflow-providers-openlineage``
|
|
82
83
|
|
|
83
|
-
Release: ``1.11.0.
|
|
84
|
+
Release: ``1.11.0.rc2``
|
|
84
85
|
|
|
85
86
|
|
|
86
87
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -107,15 +108,16 @@ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
|
|
|
107
108
|
Requirements
|
|
108
109
|
------------
|
|
109
110
|
|
|
110
|
-
|
|
111
|
-
PIP package
|
|
112
|
-
|
|
113
|
-
``apache-airflow``
|
|
114
|
-
``apache-airflow-providers-common-sql``
|
|
115
|
-
``
|
|
116
|
-
``
|
|
117
|
-
``openlineage-
|
|
118
|
-
|
|
111
|
+
========================================== ==================
|
|
112
|
+
PIP package Version required
|
|
113
|
+
========================================== ==================
|
|
114
|
+
``apache-airflow`` ``>=2.8.0``
|
|
115
|
+
``apache-airflow-providers-common-sql`` ``>=1.6.0``
|
|
116
|
+
``apache-airflow-providers-common-compat`` ``>=1.2.0``
|
|
117
|
+
``attrs`` ``>=22.2``
|
|
118
|
+
``openlineage-integration-common`` ``>=1.16.0``
|
|
119
|
+
``openlineage-python`` ``>=1.16.0``
|
|
120
|
+
========================================== ==================
|
|
119
121
|
|
|
120
122
|
Cross provider package dependencies
|
|
121
123
|
-----------------------------------
|
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
|
|
43
43
|
Package ``apache-airflow-providers-openlineage``
|
|
44
44
|
|
|
45
|
-
Release: ``1.11.0.
|
|
45
|
+
Release: ``1.11.0.rc2``
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -69,15 +69,16 @@ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
|
|
|
69
69
|
Requirements
|
|
70
70
|
------------
|
|
71
71
|
|
|
72
|
-
|
|
73
|
-
PIP package
|
|
74
|
-
|
|
75
|
-
``apache-airflow``
|
|
76
|
-
``apache-airflow-providers-common-sql``
|
|
77
|
-
``
|
|
78
|
-
``
|
|
79
|
-
``openlineage-
|
|
80
|
-
|
|
72
|
+
========================================== ==================
|
|
73
|
+
PIP package Version required
|
|
74
|
+
========================================== ==================
|
|
75
|
+
``apache-airflow`` ``>=2.8.0``
|
|
76
|
+
``apache-airflow-providers-common-sql`` ``>=1.6.0``
|
|
77
|
+
``apache-airflow-providers-common-compat`` ``>=1.2.0``
|
|
78
|
+
``attrs`` ``>=22.2``
|
|
79
|
+
``openlineage-integration-common`` ``>=1.16.0``
|
|
80
|
+
``openlineage-python`` ``>=1.16.0``
|
|
81
|
+
========================================== ==================
|
|
81
82
|
|
|
82
83
|
Cross provider package dependencies
|
|
83
84
|
-----------------------------------
|
|
@@ -25,6 +25,7 @@ from airflow.providers.openlineage.extractors.bash import BashExtractor
|
|
|
25
25
|
from airflow.providers.openlineage.extractors.python import PythonExtractor
|
|
26
26
|
from airflow.providers.openlineage.utils.utils import (
|
|
27
27
|
get_unknown_source_attribute_run_facet,
|
|
28
|
+
translate_airflow_dataset,
|
|
28
29
|
try_import_from_string,
|
|
29
30
|
)
|
|
30
31
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
@@ -90,7 +91,6 @@ class ExtractorManager(LoggingMixin):
|
|
|
90
91
|
f"task_id={task.task_id} "
|
|
91
92
|
f"airflow_run_id={dagrun.run_id} "
|
|
92
93
|
)
|
|
93
|
-
|
|
94
94
|
if extractor:
|
|
95
95
|
# Extracting advanced metadata is only possible when extractor for particular operator
|
|
96
96
|
# is defined. Without it, we can't extract any input or output data.
|
|
@@ -105,14 +105,22 @@ class ExtractorManager(LoggingMixin):
|
|
|
105
105
|
task_metadata = self.validate_task_metadata(task_metadata)
|
|
106
106
|
if task_metadata:
|
|
107
107
|
if (not task_metadata.inputs) and (not task_metadata.outputs):
|
|
108
|
-
self.
|
|
109
|
-
|
|
108
|
+
if (hook_lineage := self.get_hook_lineage()) is not None:
|
|
109
|
+
inputs, outputs = hook_lineage
|
|
110
|
+
task_metadata.inputs = inputs
|
|
111
|
+
task_metadata.outputs = outputs
|
|
112
|
+
else:
|
|
113
|
+
self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
|
|
110
114
|
return task_metadata
|
|
111
115
|
|
|
112
116
|
except Exception as e:
|
|
113
117
|
self.log.warning(
|
|
114
118
|
"Failed to extract metadata using found extractor %s - %s %s", extractor, e, task_info
|
|
115
119
|
)
|
|
120
|
+
elif (hook_lineage := self.get_hook_lineage()) is not None:
|
|
121
|
+
inputs, outputs = hook_lineage
|
|
122
|
+
task_metadata = OperatorLineage(inputs=inputs, outputs=outputs)
|
|
123
|
+
return task_metadata
|
|
116
124
|
else:
|
|
117
125
|
self.log.debug("Unable to find an extractor %s", task_info)
|
|
118
126
|
|
|
@@ -168,6 +176,30 @@ class ExtractorManager(LoggingMixin):
|
|
|
168
176
|
if d:
|
|
169
177
|
task_metadata.outputs.append(d)
|
|
170
178
|
|
|
179
|
+
def get_hook_lineage(self) -> tuple[list[Dataset], list[Dataset]] | None:
|
|
180
|
+
try:
|
|
181
|
+
from airflow.lineage.hook import get_hook_lineage_collector
|
|
182
|
+
except ImportError:
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
if not get_hook_lineage_collector().has_collected:
|
|
186
|
+
return None
|
|
187
|
+
|
|
188
|
+
return (
|
|
189
|
+
[
|
|
190
|
+
dataset
|
|
191
|
+
for dataset_info in get_hook_lineage_collector().collected_datasets.inputs
|
|
192
|
+
if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
|
|
193
|
+
is not None
|
|
194
|
+
],
|
|
195
|
+
[
|
|
196
|
+
dataset
|
|
197
|
+
for dataset_info in get_hook_lineage_collector().collected_datasets.outputs
|
|
198
|
+
if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
|
|
199
|
+
is not None
|
|
200
|
+
],
|
|
201
|
+
)
|
|
202
|
+
|
|
171
203
|
@staticmethod
|
|
172
204
|
def convert_to_ol_dataset_from_object_storage_uri(uri: str) -> Dataset | None:
|
|
173
205
|
from urllib.parse import urlparse
|
|
@@ -53,6 +53,7 @@ def get_provider_info():
|
|
|
53
53
|
"dependencies": [
|
|
54
54
|
"apache-airflow>=2.8.0",
|
|
55
55
|
"apache-airflow-providers-common-sql>=1.6.0",
|
|
56
|
+
"apache-airflow-providers-common-compat>=1.2.0",
|
|
56
57
|
"attrs>=22.2",
|
|
57
58
|
"openlineage-integration-common>=1.16.0",
|
|
58
59
|
"openlineage-python>=1.16.0",
|
|
@@ -25,6 +25,7 @@ from airflow.providers.openlineage.plugins.macros import (
|
|
|
25
25
|
lineage_parent_id,
|
|
26
26
|
lineage_run_id,
|
|
27
27
|
)
|
|
28
|
+
from airflow.providers.openlineage.utils.utils import IS_AIRFLOW_2_10_OR_HIGHER
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class OpenLineageProviderPlugin(AirflowPlugin):
|
|
@@ -39,6 +40,10 @@ class OpenLineageProviderPlugin(AirflowPlugin):
|
|
|
39
40
|
if not conf.is_disabled():
|
|
40
41
|
macros = [lineage_job_namespace, lineage_job_name, lineage_run_id, lineage_parent_id]
|
|
41
42
|
listeners = [get_openlineage_listener()]
|
|
43
|
+
if IS_AIRFLOW_2_10_OR_HIGHER:
|
|
44
|
+
from airflow.lineage.hook import HookLineageReader
|
|
45
|
+
|
|
46
|
+
hook_lineage_readers = [HookLineageReader]
|
|
42
47
|
else:
|
|
43
48
|
macros = []
|
|
44
49
|
listeners = []
|
|
@@ -33,7 +33,7 @@ from packaging.version import Version
|
|
|
33
33
|
from airflow import __version__ as AIRFLOW_VERSION
|
|
34
34
|
from airflow.datasets import Dataset
|
|
35
35
|
from airflow.exceptions import AirflowProviderDeprecationWarning # TODO: move this maybe to Airflow's logic?
|
|
36
|
-
from airflow.models import DAG, BaseOperator, MappedOperator
|
|
36
|
+
from airflow.models import DAG, BaseOperator, MappedOperator
|
|
37
37
|
from airflow.providers.openlineage import conf
|
|
38
38
|
from airflow.providers.openlineage.plugins.facets import (
|
|
39
39
|
AirflowDagRunFacet,
|
|
@@ -423,7 +423,7 @@ def get_airflow_job_facet(dag_run: DagRun) -> dict[str, AirflowJobFacet]:
|
|
|
423
423
|
return {}
|
|
424
424
|
return {
|
|
425
425
|
"airflow": AirflowJobFacet(
|
|
426
|
-
taskTree=
|
|
426
|
+
taskTree={}, # caused OOM errors, to be removed, see #41587
|
|
427
427
|
taskGroups=_get_task_groups_details(dag_run.dag),
|
|
428
428
|
tasks=_get_tasks_details(dag_run.dag),
|
|
429
429
|
)
|
|
@@ -439,43 +439,6 @@ def get_airflow_state_run_facet(dag_run: DagRun) -> dict[str, AirflowStateRunFac
|
|
|
439
439
|
}
|
|
440
440
|
|
|
441
441
|
|
|
442
|
-
def _get_parsed_dag_tree(dag: DAG) -> dict:
|
|
443
|
-
"""
|
|
444
|
-
Get DAG's tasks hierarchy representation.
|
|
445
|
-
|
|
446
|
-
While the task dependencies are defined as following:
|
|
447
|
-
task >> [task_2, task_4] >> task_7
|
|
448
|
-
task_3 >> task_5
|
|
449
|
-
task_6 # has no dependencies, it's a root and a leaf
|
|
450
|
-
|
|
451
|
-
The result of this function will look like:
|
|
452
|
-
{
|
|
453
|
-
"task": {
|
|
454
|
-
"task_2": {
|
|
455
|
-
"task_7": {}
|
|
456
|
-
},
|
|
457
|
-
"task_4": {
|
|
458
|
-
"task_7": {}
|
|
459
|
-
}
|
|
460
|
-
},
|
|
461
|
-
"task_3": {
|
|
462
|
-
"task_5": {}
|
|
463
|
-
},
|
|
464
|
-
"task_6": {}
|
|
465
|
-
}
|
|
466
|
-
"""
|
|
467
|
-
|
|
468
|
-
def get_downstream(task: Operator, current_dict: dict):
|
|
469
|
-
current_dict[task.task_id] = {}
|
|
470
|
-
for tmp_task in sorted(task.downstream_list, key=lambda x: x.task_id):
|
|
471
|
-
get_downstream(tmp_task, current_dict[task.task_id])
|
|
472
|
-
|
|
473
|
-
task_dict: dict = {}
|
|
474
|
-
for t in sorted(dag.roots, key=lambda x: x.task_id):
|
|
475
|
-
get_downstream(t, task_dict)
|
|
476
|
-
return task_dict
|
|
477
|
-
|
|
478
|
-
|
|
479
442
|
def _get_tasks_details(dag: DAG) -> dict:
|
|
480
443
|
tasks = {
|
|
481
444
|
single_task.task_id: {
|
|
@@ -487,8 +450,9 @@ def _get_tasks_details(dag: DAG) -> dict:
|
|
|
487
450
|
"ui_label": single_task.label,
|
|
488
451
|
"is_setup": single_task.is_setup,
|
|
489
452
|
"is_teardown": single_task.is_teardown,
|
|
453
|
+
"downstream_task_ids": sorted(single_task.downstream_task_ids),
|
|
490
454
|
}
|
|
491
|
-
for single_task in dag.tasks
|
|
455
|
+
for single_task in sorted(dag.tasks, key=lambda x: x.task_id)
|
|
492
456
|
}
|
|
493
457
|
|
|
494
458
|
return tasks
|
|
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
|
|
|
28
28
|
|
|
29
29
|
[project]
|
|
30
30
|
name = "apache-airflow-providers-openlineage"
|
|
31
|
-
version = "1.11.0.
|
|
31
|
+
version = "1.11.0.rc2"
|
|
32
32
|
description = "Provider package apache-airflow-providers-openlineage for Apache Airflow"
|
|
33
33
|
readme = "README.rst"
|
|
34
34
|
authors = [
|
|
@@ -56,6 +56,7 @@ classifiers = [
|
|
|
56
56
|
]
|
|
57
57
|
requires-python = "~=3.8"
|
|
58
58
|
dependencies = [
|
|
59
|
+
"apache-airflow-providers-common-compat>=1.2.0rc0",
|
|
59
60
|
"apache-airflow-providers-common-sql>=1.6.0rc0",
|
|
60
61
|
"apache-airflow>=2.8.0rc0",
|
|
61
62
|
"attrs>=22.2",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|