apache-airflow-providers-openlineage 1.11.0__py3-none-any.whl → 1.11.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/openlineage/extractors/manager.py +3 -35
- airflow/providers/openlineage/get_provider_info.py +0 -1
- airflow/providers/openlineage/plugins/openlineage.py +0 -5
- airflow/providers/openlineage/utils/utils.py +40 -4
- {apache_airflow_providers_openlineage-1.11.0.dist-info → apache_airflow_providers_openlineage-1.11.0rc1.dist-info}/METADATA +13 -15
- {apache_airflow_providers_openlineage-1.11.0.dist-info → apache_airflow_providers_openlineage-1.11.0rc1.dist-info}/RECORD +8 -8
- {apache_airflow_providers_openlineage-1.11.0.dist-info → apache_airflow_providers_openlineage-1.11.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_openlineage-1.11.0.dist-info → apache_airflow_providers_openlineage-1.11.0rc1.dist-info}/entry_points.txt +0 -0
|
@@ -25,7 +25,6 @@ from airflow.providers.openlineage.extractors.bash import BashExtractor
|
|
|
25
25
|
from airflow.providers.openlineage.extractors.python import PythonExtractor
|
|
26
26
|
from airflow.providers.openlineage.utils.utils import (
|
|
27
27
|
get_unknown_source_attribute_run_facet,
|
|
28
|
-
translate_airflow_dataset,
|
|
29
28
|
try_import_from_string,
|
|
30
29
|
)
|
|
31
30
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
@@ -91,6 +90,7 @@ class ExtractorManager(LoggingMixin):
|
|
|
91
90
|
f"task_id={task.task_id} "
|
|
92
91
|
f"airflow_run_id={dagrun.run_id} "
|
|
93
92
|
)
|
|
93
|
+
|
|
94
94
|
if extractor:
|
|
95
95
|
# Extracting advanced metadata is only possible when extractor for particular operator
|
|
96
96
|
# is defined. Without it, we can't extract any input or output data.
|
|
@@ -105,22 +105,14 @@ class ExtractorManager(LoggingMixin):
|
|
|
105
105
|
task_metadata = self.validate_task_metadata(task_metadata)
|
|
106
106
|
if task_metadata:
|
|
107
107
|
if (not task_metadata.inputs) and (not task_metadata.outputs):
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
task_metadata.inputs = inputs
|
|
111
|
-
task_metadata.outputs = outputs
|
|
112
|
-
else:
|
|
113
|
-
self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
|
|
108
|
+
self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
|
|
109
|
+
|
|
114
110
|
return task_metadata
|
|
115
111
|
|
|
116
112
|
except Exception as e:
|
|
117
113
|
self.log.warning(
|
|
118
114
|
"Failed to extract metadata using found extractor %s - %s %s", extractor, e, task_info
|
|
119
115
|
)
|
|
120
|
-
elif (hook_lineage := self.get_hook_lineage()) is not None:
|
|
121
|
-
inputs, outputs = hook_lineage
|
|
122
|
-
task_metadata = OperatorLineage(inputs=inputs, outputs=outputs)
|
|
123
|
-
return task_metadata
|
|
124
116
|
else:
|
|
125
117
|
self.log.debug("Unable to find an extractor %s", task_info)
|
|
126
118
|
|
|
@@ -176,30 +168,6 @@ class ExtractorManager(LoggingMixin):
|
|
|
176
168
|
if d:
|
|
177
169
|
task_metadata.outputs.append(d)
|
|
178
170
|
|
|
179
|
-
def get_hook_lineage(self) -> tuple[list[Dataset], list[Dataset]] | None:
|
|
180
|
-
try:
|
|
181
|
-
from airflow.lineage.hook import get_hook_lineage_collector
|
|
182
|
-
except ImportError:
|
|
183
|
-
return None
|
|
184
|
-
|
|
185
|
-
if not get_hook_lineage_collector().has_collected:
|
|
186
|
-
return None
|
|
187
|
-
|
|
188
|
-
return (
|
|
189
|
-
[
|
|
190
|
-
dataset
|
|
191
|
-
for dataset_info in get_hook_lineage_collector().collected_datasets.inputs
|
|
192
|
-
if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
|
|
193
|
-
is not None
|
|
194
|
-
],
|
|
195
|
-
[
|
|
196
|
-
dataset
|
|
197
|
-
for dataset_info in get_hook_lineage_collector().collected_datasets.outputs
|
|
198
|
-
if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
|
|
199
|
-
is not None
|
|
200
|
-
],
|
|
201
|
-
)
|
|
202
|
-
|
|
203
171
|
@staticmethod
|
|
204
172
|
def convert_to_ol_dataset_from_object_storage_uri(uri: str) -> Dataset | None:
|
|
205
173
|
from urllib.parse import urlparse
|
|
@@ -53,7 +53,6 @@ def get_provider_info():
|
|
|
53
53
|
"dependencies": [
|
|
54
54
|
"apache-airflow>=2.8.0",
|
|
55
55
|
"apache-airflow-providers-common-sql>=1.6.0",
|
|
56
|
-
"apache-airflow-providers-common-compat>=1.2.0",
|
|
57
56
|
"attrs>=22.2",
|
|
58
57
|
"openlineage-integration-common>=1.16.0",
|
|
59
58
|
"openlineage-python>=1.16.0",
|
|
@@ -25,7 +25,6 @@ from airflow.providers.openlineage.plugins.macros import (
|
|
|
25
25
|
lineage_parent_id,
|
|
26
26
|
lineage_run_id,
|
|
27
27
|
)
|
|
28
|
-
from airflow.providers.openlineage.utils.utils import IS_AIRFLOW_2_10_OR_HIGHER
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
class OpenLineageProviderPlugin(AirflowPlugin):
|
|
@@ -40,10 +39,6 @@ class OpenLineageProviderPlugin(AirflowPlugin):
|
|
|
40
39
|
if not conf.is_disabled():
|
|
41
40
|
macros = [lineage_job_namespace, lineage_job_name, lineage_run_id, lineage_parent_id]
|
|
42
41
|
listeners = [get_openlineage_listener()]
|
|
43
|
-
if IS_AIRFLOW_2_10_OR_HIGHER:
|
|
44
|
-
from airflow.lineage.hook import HookLineageReader
|
|
45
|
-
|
|
46
|
-
hook_lineage_readers = [HookLineageReader]
|
|
47
42
|
else:
|
|
48
43
|
macros = []
|
|
49
44
|
listeners = []
|
|
@@ -33,7 +33,7 @@ from packaging.version import Version
|
|
|
33
33
|
from airflow import __version__ as AIRFLOW_VERSION
|
|
34
34
|
from airflow.datasets import Dataset
|
|
35
35
|
from airflow.exceptions import AirflowProviderDeprecationWarning # TODO: move this maybe to Airflow's logic?
|
|
36
|
-
from airflow.models import DAG, BaseOperator, MappedOperator
|
|
36
|
+
from airflow.models import DAG, BaseOperator, MappedOperator, Operator
|
|
37
37
|
from airflow.providers.openlineage import conf
|
|
38
38
|
from airflow.providers.openlineage.plugins.facets import (
|
|
39
39
|
AirflowDagRunFacet,
|
|
@@ -423,7 +423,7 @@ def get_airflow_job_facet(dag_run: DagRun) -> dict[str, AirflowJobFacet]:
|
|
|
423
423
|
return {}
|
|
424
424
|
return {
|
|
425
425
|
"airflow": AirflowJobFacet(
|
|
426
|
-
taskTree=
|
|
426
|
+
taskTree=_get_parsed_dag_tree(dag_run.dag),
|
|
427
427
|
taskGroups=_get_task_groups_details(dag_run.dag),
|
|
428
428
|
tasks=_get_tasks_details(dag_run.dag),
|
|
429
429
|
)
|
|
@@ -439,6 +439,43 @@ def get_airflow_state_run_facet(dag_run: DagRun) -> dict[str, AirflowStateRunFac
|
|
|
439
439
|
}
|
|
440
440
|
|
|
441
441
|
|
|
442
|
+
def _get_parsed_dag_tree(dag: DAG) -> dict:
|
|
443
|
+
"""
|
|
444
|
+
Get DAG's tasks hierarchy representation.
|
|
445
|
+
|
|
446
|
+
While the task dependencies are defined as following:
|
|
447
|
+
task >> [task_2, task_4] >> task_7
|
|
448
|
+
task_3 >> task_5
|
|
449
|
+
task_6 # has no dependencies, it's a root and a leaf
|
|
450
|
+
|
|
451
|
+
The result of this function will look like:
|
|
452
|
+
{
|
|
453
|
+
"task": {
|
|
454
|
+
"task_2": {
|
|
455
|
+
"task_7": {}
|
|
456
|
+
},
|
|
457
|
+
"task_4": {
|
|
458
|
+
"task_7": {}
|
|
459
|
+
}
|
|
460
|
+
},
|
|
461
|
+
"task_3": {
|
|
462
|
+
"task_5": {}
|
|
463
|
+
},
|
|
464
|
+
"task_6": {}
|
|
465
|
+
}
|
|
466
|
+
"""
|
|
467
|
+
|
|
468
|
+
def get_downstream(task: Operator, current_dict: dict):
|
|
469
|
+
current_dict[task.task_id] = {}
|
|
470
|
+
for tmp_task in sorted(task.downstream_list, key=lambda x: x.task_id):
|
|
471
|
+
get_downstream(tmp_task, current_dict[task.task_id])
|
|
472
|
+
|
|
473
|
+
task_dict: dict = {}
|
|
474
|
+
for t in sorted(dag.roots, key=lambda x: x.task_id):
|
|
475
|
+
get_downstream(t, task_dict)
|
|
476
|
+
return task_dict
|
|
477
|
+
|
|
478
|
+
|
|
442
479
|
def _get_tasks_details(dag: DAG) -> dict:
|
|
443
480
|
tasks = {
|
|
444
481
|
single_task.task_id: {
|
|
@@ -450,9 +487,8 @@ def _get_tasks_details(dag: DAG) -> dict:
|
|
|
450
487
|
"ui_label": single_task.label,
|
|
451
488
|
"is_setup": single_task.is_setup,
|
|
452
489
|
"is_teardown": single_task.is_teardown,
|
|
453
|
-
"downstream_task_ids": sorted(single_task.downstream_task_ids),
|
|
454
490
|
}
|
|
455
|
-
for single_task in
|
|
491
|
+
for single_task in dag.tasks
|
|
456
492
|
}
|
|
457
493
|
|
|
458
494
|
return tasks
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 1.11.
|
|
3
|
+
Version: 1.11.0rc1
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -21,9 +21,8 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.11
|
|
22
22
|
Classifier: Programming Language :: Python :: 3.12
|
|
23
23
|
Classifier: Topic :: System :: Monitoring
|
|
24
|
-
Requires-Dist: apache-airflow-providers-common-
|
|
25
|
-
Requires-Dist: apache-airflow
|
|
26
|
-
Requires-Dist: apache-airflow>=2.8.0
|
|
24
|
+
Requires-Dist: apache-airflow-providers-common-sql>=1.6.0rc0
|
|
25
|
+
Requires-Dist: apache-airflow>=2.8.0rc0
|
|
27
26
|
Requires-Dist: attrs>=22.2
|
|
28
27
|
Requires-Dist: openlineage-integration-common>=1.16.0
|
|
29
28
|
Requires-Dist: openlineage-python>=1.16.0
|
|
@@ -81,7 +80,7 @@ Provides-Extra: common.sql
|
|
|
81
80
|
|
|
82
81
|
Package ``apache-airflow-providers-openlineage``
|
|
83
82
|
|
|
84
|
-
Release: ``1.11.0``
|
|
83
|
+
Release: ``1.11.0.rc1``
|
|
85
84
|
|
|
86
85
|
|
|
87
86
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -108,16 +107,15 @@ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
|
|
|
108
107
|
Requirements
|
|
109
108
|
------------
|
|
110
109
|
|
|
111
|
-
|
|
112
|
-
PIP package
|
|
113
|
-
|
|
114
|
-
``apache-airflow``
|
|
115
|
-
``apache-airflow-providers-common-sql``
|
|
116
|
-
``
|
|
117
|
-
``
|
|
118
|
-
``openlineage-
|
|
119
|
-
|
|
120
|
-
========================================== ==================
|
|
110
|
+
======================================= ==================
|
|
111
|
+
PIP package Version required
|
|
112
|
+
======================================= ==================
|
|
113
|
+
``apache-airflow`` ``>=2.8.0``
|
|
114
|
+
``apache-airflow-providers-common-sql`` ``>=1.6.0``
|
|
115
|
+
``attrs`` ``>=22.2``
|
|
116
|
+
``openlineage-integration-common`` ``>=1.16.0``
|
|
117
|
+
``openlineage-python`` ``>=1.16.0``
|
|
118
|
+
======================================= ==================
|
|
121
119
|
|
|
122
120
|
Cross provider package dependencies
|
|
123
121
|
-----------------------------------
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
airflow/providers/openlineage/LICENSE,sha256=FFb4jd2AXnOOf7XLP04pQW6jbdhG49TxlGY6fFpCV1Y,13609
|
|
2
2
|
airflow/providers/openlineage/__init__.py,sha256=jUD56ffTq0DAqqtYoTQi2pjOt51XQnVZsPuAJPt0UV0,1499
|
|
3
3
|
airflow/providers/openlineage/conf.py,sha256=paV6AHxPJPdQyVMoTGBRHymT-bpbs4Tovttqy9oicMs,5151
|
|
4
|
-
airflow/providers/openlineage/get_provider_info.py,sha256=
|
|
4
|
+
airflow/providers/openlineage/get_provider_info.py,sha256=I8lTHWr3IccBOsCicCr_sR304Ayij9mVGc0dwI9zgtI,8824
|
|
5
5
|
airflow/providers/openlineage/sqlparser.py,sha256=c7q3VVw41S87ZFozrkrEr2oZK79N12mC3KdDs9V1IuM,15581
|
|
6
6
|
airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
|
|
7
7
|
airflow/providers/openlineage/extractors/base.py,sha256=olafVPBKxeGjqXerCYM0vj2q78Lm4ErWjex_R6nhjKY,6635
|
|
8
8
|
airflow/providers/openlineage/extractors/bash.py,sha256=3aR0PXs8fzRLibRxXN1R8wMZnGzyCur7mjpy8e5GC4A,2583
|
|
9
|
-
airflow/providers/openlineage/extractors/manager.py,sha256=
|
|
9
|
+
airflow/providers/openlineage/extractors/manager.py,sha256=uuSGLZgJHu0tu3RGzrc41uw6SahdgU3816T26FtnFEY,10539
|
|
10
10
|
airflow/providers/openlineage/extractors/python.py,sha256=hVWOplMlBimrpPKPeW6vm75a8OmAYMU1oJzqMz8Jh90,3171
|
|
11
11
|
airflow/providers/openlineage/facets/AirflowDagRunFacet.json,sha256=ie6c-J3-wGgk80WDTGWePz18o6DbW--TNM7BMF4WfcU,2251
|
|
12
12
|
airflow/providers/openlineage/facets/AirflowDebugRunFacet.json,sha256=_zA5gFqGje5MOH1SmdMeA5ViOHvW_pV4oijEAvkuBbY,768
|
|
@@ -19,12 +19,12 @@ airflow/providers/openlineage/plugins/adapter.py,sha256=mdXF8ZgyYLgjyWForHQnfKxn
|
|
|
19
19
|
airflow/providers/openlineage/plugins/facets.py,sha256=VvyMYR6ONkC95q5FdNmohv0scbA1Ej_B5cQ97as5GvA,4161
|
|
20
20
|
airflow/providers/openlineage/plugins/listener.py,sha256=vPtYWYYzKsNw6cR3MsXM4rfnFGoc-xPDGa_6jfTkuLU,18754
|
|
21
21
|
airflow/providers/openlineage/plugins/macros.py,sha256=hgFA3ZdQibyn4KXIOsKYBm4WRKDLA5q6Asscx5rvNfM,3076
|
|
22
|
-
airflow/providers/openlineage/plugins/openlineage.py,sha256=
|
|
22
|
+
airflow/providers/openlineage/plugins/openlineage.py,sha256=rsRUW_zpXVAglzsgQRv5T9VWYY7CMQl0qRWm8-3oqDA,1678
|
|
23
23
|
airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
24
24
|
airflow/providers/openlineage/utils/selective_enable.py,sha256=dFJ7wK7J_-BFwcOKp9tqFOSrASV3lmLv7HtRkEuMk3Q,3087
|
|
25
25
|
airflow/providers/openlineage/utils/sql.py,sha256=bnuU9WvjVKcWVMN3cUp0jaHtU5_ZRM5I1OP1WhIdztg,9583
|
|
26
|
-
airflow/providers/openlineage/utils/utils.py,sha256=
|
|
27
|
-
apache_airflow_providers_openlineage-1.11.
|
|
28
|
-
apache_airflow_providers_openlineage-1.11.
|
|
29
|
-
apache_airflow_providers_openlineage-1.11.
|
|
30
|
-
apache_airflow_providers_openlineage-1.11.
|
|
26
|
+
airflow/providers/openlineage/utils/utils.py,sha256=gBaWzD4ifBPLAtp4TuUAEzVy_w1L_bQ4S9yfMp_uO2Q,24366
|
|
27
|
+
apache_airflow_providers_openlineage-1.11.0rc1.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
|
|
28
|
+
apache_airflow_providers_openlineage-1.11.0rc1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
|
29
|
+
apache_airflow_providers_openlineage-1.11.0rc1.dist-info/METADATA,sha256=QGnBVXjzazlTPQqGJTrqVuoqpAIfK444OzKxf6YvZ-c,6387
|
|
30
|
+
apache_airflow_providers_openlineage-1.11.0rc1.dist-info/RECORD,,
|
|
File without changes
|