apache-airflow-providers-openlineage 1.10.0rc1__tar.gz → 1.11.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/PKG-INFO +19 -17
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/README.rst +13 -12
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/__init__.py +3 -3
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/conf.py +6 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/extractors/base.py +1 -1
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/extractors/manager.py +35 -3
- apache_airflow_providers_openlineage-1.11.0/airflow/providers/openlineage/facets/AirflowDebugRunFacet.json +30 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/get_provider_info.py +11 -2
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/plugins/adapter.py +5 -3
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/plugins/facets.py +7 -11
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/plugins/listener.py +7 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/plugins/openlineage.py +5 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/utils/utils.py +37 -75
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/pyproject.toml +6 -5
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/LICENSE +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/extractors/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/extractors/bash.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/extractors/python.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/facets/AirflowDagRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/facets/AirflowJobFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/facets/AirflowRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/facets/AirflowStateRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/facets/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/plugins/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/plugins/macros.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/sqlparser.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/utils/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/utils/selective_enable.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/utils/sql.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.11.0
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -21,15 +21,16 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.11
|
|
22
22
|
Classifier: Programming Language :: Python :: 3.12
|
|
23
23
|
Classifier: Topic :: System :: Monitoring
|
|
24
|
-
Requires-Dist: apache-airflow-providers-common-
|
|
25
|
-
Requires-Dist: apache-airflow>=
|
|
24
|
+
Requires-Dist: apache-airflow-providers-common-compat>=1.2.0
|
|
25
|
+
Requires-Dist: apache-airflow-providers-common-sql>=1.6.0
|
|
26
|
+
Requires-Dist: apache-airflow>=2.8.0
|
|
26
27
|
Requires-Dist: attrs>=22.2
|
|
27
28
|
Requires-Dist: openlineage-integration-common>=1.16.0
|
|
28
29
|
Requires-Dist: openlineage-python>=1.16.0
|
|
29
30
|
Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
|
|
30
31
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
31
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
32
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
32
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html
|
|
33
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0
|
|
33
34
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
34
35
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
35
36
|
Project-URL: Twitter, https://twitter.com/ApacheAirflow
|
|
@@ -80,7 +81,7 @@ Provides-Extra: common.sql
|
|
|
80
81
|
|
|
81
82
|
Package ``apache-airflow-providers-openlineage``
|
|
82
83
|
|
|
83
|
-
Release: ``1.
|
|
84
|
+
Release: ``1.11.0``
|
|
84
85
|
|
|
85
86
|
|
|
86
87
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -93,7 +94,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
93
94
|
are in ``airflow.providers.openlineage`` python package.
|
|
94
95
|
|
|
95
96
|
You can find package information and changelog for the provider
|
|
96
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
97
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/>`_.
|
|
97
98
|
|
|
98
99
|
Installation
|
|
99
100
|
------------
|
|
@@ -107,15 +108,16 @@ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
|
|
|
107
108
|
Requirements
|
|
108
109
|
------------
|
|
109
110
|
|
|
110
|
-
|
|
111
|
-
PIP package
|
|
112
|
-
|
|
113
|
-
``apache-airflow``
|
|
114
|
-
``apache-airflow-providers-common-sql``
|
|
115
|
-
``
|
|
116
|
-
``
|
|
117
|
-
``openlineage-
|
|
118
|
-
|
|
111
|
+
========================================== ==================
|
|
112
|
+
PIP package Version required
|
|
113
|
+
========================================== ==================
|
|
114
|
+
``apache-airflow`` ``>=2.8.0``
|
|
115
|
+
``apache-airflow-providers-common-sql`` ``>=1.6.0``
|
|
116
|
+
``apache-airflow-providers-common-compat`` ``>=1.2.0``
|
|
117
|
+
``attrs`` ``>=22.2``
|
|
118
|
+
``openlineage-integration-common`` ``>=1.16.0``
|
|
119
|
+
``openlineage-python`` ``>=1.16.0``
|
|
120
|
+
========================================== ==================
|
|
119
121
|
|
|
120
122
|
Cross provider package dependencies
|
|
121
123
|
-----------------------------------
|
|
@@ -137,4 +139,4 @@ Dependent package
|
|
|
137
139
|
============================================================================================================ ==============
|
|
138
140
|
|
|
139
141
|
The changelog for the provider package can be found in the
|
|
140
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
142
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html>`_.
|
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
|
|
43
43
|
Package ``apache-airflow-providers-openlineage``
|
|
44
44
|
|
|
45
|
-
Release: ``1.
|
|
45
|
+
Release: ``1.11.0``
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -55,7 +55,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
55
55
|
are in ``airflow.providers.openlineage`` python package.
|
|
56
56
|
|
|
57
57
|
You can find package information and changelog for the provider
|
|
58
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
58
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/>`_.
|
|
59
59
|
|
|
60
60
|
Installation
|
|
61
61
|
------------
|
|
@@ -69,15 +69,16 @@ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
|
|
|
69
69
|
Requirements
|
|
70
70
|
------------
|
|
71
71
|
|
|
72
|
-
|
|
73
|
-
PIP package
|
|
74
|
-
|
|
75
|
-
``apache-airflow``
|
|
76
|
-
``apache-airflow-providers-common-sql``
|
|
77
|
-
``
|
|
78
|
-
``
|
|
79
|
-
``openlineage-
|
|
80
|
-
|
|
72
|
+
========================================== ==================
|
|
73
|
+
PIP package Version required
|
|
74
|
+
========================================== ==================
|
|
75
|
+
``apache-airflow`` ``>=2.8.0``
|
|
76
|
+
``apache-airflow-providers-common-sql`` ``>=1.6.0``
|
|
77
|
+
``apache-airflow-providers-common-compat`` ``>=1.2.0``
|
|
78
|
+
``attrs`` ``>=22.2``
|
|
79
|
+
``openlineage-integration-common`` ``>=1.16.0``
|
|
80
|
+
``openlineage-python`` ``>=1.16.0``
|
|
81
|
+
========================================== ==================
|
|
81
82
|
|
|
82
83
|
Cross provider package dependencies
|
|
83
84
|
-----------------------------------
|
|
@@ -99,4 +100,4 @@ Dependent package
|
|
|
99
100
|
============================================================================================================ ==============
|
|
100
101
|
|
|
101
102
|
The changelog for the provider package can be found in the
|
|
102
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
103
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html>`_.
|
|
@@ -29,11 +29,11 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "1.
|
|
32
|
+
__version__ = "1.11.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
|
-
"2.
|
|
35
|
+
"2.8.0"
|
|
36
36
|
):
|
|
37
37
|
raise RuntimeError(
|
|
38
|
-
f"The package `apache-airflow-providers-openlineage:{__version__}` needs Apache Airflow 2.
|
|
38
|
+
f"The package `apache-airflow-providers-openlineage:{__version__}` needs Apache Airflow 2.8.0+"
|
|
39
39
|
)
|
|
@@ -145,3 +145,9 @@ def execution_timeout() -> int:
|
|
|
145
145
|
def include_full_task_info() -> bool:
|
|
146
146
|
"""[openlineage] include_full_task_info."""
|
|
147
147
|
return conf.getboolean(_CONFIG_SECTION, "include_full_task_info", fallback="False")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@cache
|
|
151
|
+
def debug_mode() -> bool:
|
|
152
|
+
"""[openlineage] debug_mode."""
|
|
153
|
+
return conf.getboolean(_CONFIG_SECTION, "debug_mode", fallback="False")
|
|
@@ -113,7 +113,7 @@ class DefaultExtractor(BaseExtractor):
|
|
|
113
113
|
"Operator %s does not have the get_openlineage_facets_on_start method.",
|
|
114
114
|
self.operator.task_type,
|
|
115
115
|
)
|
|
116
|
-
return
|
|
116
|
+
return OperatorLineage()
|
|
117
117
|
|
|
118
118
|
def extract_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
119
119
|
failed_states = [TaskInstanceState.FAILED, TaskInstanceState.UP_FOR_RETRY]
|
|
@@ -25,6 +25,7 @@ from airflow.providers.openlineage.extractors.bash import BashExtractor
|
|
|
25
25
|
from airflow.providers.openlineage.extractors.python import PythonExtractor
|
|
26
26
|
from airflow.providers.openlineage.utils.utils import (
|
|
27
27
|
get_unknown_source_attribute_run_facet,
|
|
28
|
+
translate_airflow_dataset,
|
|
28
29
|
try_import_from_string,
|
|
29
30
|
)
|
|
30
31
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
@@ -90,7 +91,6 @@ class ExtractorManager(LoggingMixin):
|
|
|
90
91
|
f"task_id={task.task_id} "
|
|
91
92
|
f"airflow_run_id={dagrun.run_id} "
|
|
92
93
|
)
|
|
93
|
-
|
|
94
94
|
if extractor:
|
|
95
95
|
# Extracting advanced metadata is only possible when extractor for particular operator
|
|
96
96
|
# is defined. Without it, we can't extract any input or output data.
|
|
@@ -105,14 +105,22 @@ class ExtractorManager(LoggingMixin):
|
|
|
105
105
|
task_metadata = self.validate_task_metadata(task_metadata)
|
|
106
106
|
if task_metadata:
|
|
107
107
|
if (not task_metadata.inputs) and (not task_metadata.outputs):
|
|
108
|
-
self.
|
|
109
|
-
|
|
108
|
+
if (hook_lineage := self.get_hook_lineage()) is not None:
|
|
109
|
+
inputs, outputs = hook_lineage
|
|
110
|
+
task_metadata.inputs = inputs
|
|
111
|
+
task_metadata.outputs = outputs
|
|
112
|
+
else:
|
|
113
|
+
self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
|
|
110
114
|
return task_metadata
|
|
111
115
|
|
|
112
116
|
except Exception as e:
|
|
113
117
|
self.log.warning(
|
|
114
118
|
"Failed to extract metadata using found extractor %s - %s %s", extractor, e, task_info
|
|
115
119
|
)
|
|
120
|
+
elif (hook_lineage := self.get_hook_lineage()) is not None:
|
|
121
|
+
inputs, outputs = hook_lineage
|
|
122
|
+
task_metadata = OperatorLineage(inputs=inputs, outputs=outputs)
|
|
123
|
+
return task_metadata
|
|
116
124
|
else:
|
|
117
125
|
self.log.debug("Unable to find an extractor %s", task_info)
|
|
118
126
|
|
|
@@ -168,6 +176,30 @@ class ExtractorManager(LoggingMixin):
|
|
|
168
176
|
if d:
|
|
169
177
|
task_metadata.outputs.append(d)
|
|
170
178
|
|
|
179
|
+
def get_hook_lineage(self) -> tuple[list[Dataset], list[Dataset]] | None:
|
|
180
|
+
try:
|
|
181
|
+
from airflow.lineage.hook import get_hook_lineage_collector
|
|
182
|
+
except ImportError:
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
if not get_hook_lineage_collector().has_collected:
|
|
186
|
+
return None
|
|
187
|
+
|
|
188
|
+
return (
|
|
189
|
+
[
|
|
190
|
+
dataset
|
|
191
|
+
for dataset_info in get_hook_lineage_collector().collected_datasets.inputs
|
|
192
|
+
if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
|
|
193
|
+
is not None
|
|
194
|
+
],
|
|
195
|
+
[
|
|
196
|
+
dataset
|
|
197
|
+
for dataset_info in get_hook_lineage_collector().collected_datasets.outputs
|
|
198
|
+
if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
|
|
199
|
+
is not None
|
|
200
|
+
],
|
|
201
|
+
)
|
|
202
|
+
|
|
171
203
|
@staticmethod
|
|
172
204
|
def convert_to_ol_dataset_from_object_storage_uri(uri: str) -> Dataset | None:
|
|
173
205
|
from urllib.parse import urlparse
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$defs": {
|
|
4
|
+
"AirflowDebugRunFacet": {
|
|
5
|
+
"allOf": [
|
|
6
|
+
{
|
|
7
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"packages": {
|
|
13
|
+
"description": "The names and versions of all installed Python packages.",
|
|
14
|
+
"type": "object",
|
|
15
|
+
"additionalProperties": true
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"required": ["packages"]
|
|
19
|
+
}
|
|
20
|
+
],
|
|
21
|
+
"type": "object"
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"type": "object",
|
|
25
|
+
"properties": {
|
|
26
|
+
"debug": {
|
|
27
|
+
"$ref": "#/$defs/AirflowDebugRunFacet"
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -28,8 +28,9 @@ def get_provider_info():
|
|
|
28
28
|
"name": "OpenLineage Airflow",
|
|
29
29
|
"description": "`OpenLineage <https://openlineage.io/>`__\n",
|
|
30
30
|
"state": "ready",
|
|
31
|
-
"source-date-epoch":
|
|
31
|
+
"source-date-epoch": 1723970474,
|
|
32
32
|
"versions": [
|
|
33
|
+
"1.11.0",
|
|
33
34
|
"1.10.0",
|
|
34
35
|
"1.9.1",
|
|
35
36
|
"1.9.0",
|
|
@@ -50,8 +51,9 @@ def get_provider_info():
|
|
|
50
51
|
"1.0.0",
|
|
51
52
|
],
|
|
52
53
|
"dependencies": [
|
|
53
|
-
"apache-airflow>=2.
|
|
54
|
+
"apache-airflow>=2.8.0",
|
|
54
55
|
"apache-airflow-providers-common-sql>=1.6.0",
|
|
56
|
+
"apache-airflow-providers-common-compat>=1.2.0",
|
|
55
57
|
"attrs>=22.2",
|
|
56
58
|
"openlineage-integration-common>=1.16.0",
|
|
57
59
|
"openlineage-python>=1.16.0",
|
|
@@ -158,6 +160,13 @@ def get_provider_info():
|
|
|
158
160
|
"type": "boolean",
|
|
159
161
|
"version_added": "1.10.0",
|
|
160
162
|
},
|
|
163
|
+
"debug_mode": {
|
|
164
|
+
"description": "If true, OpenLineage events will include information useful for debugging - potentially\ncontaining large fields e.g. all installed packages and their versions.\n",
|
|
165
|
+
"default": "False",
|
|
166
|
+
"example": None,
|
|
167
|
+
"type": "boolean",
|
|
168
|
+
"version_added": "1.11.0",
|
|
169
|
+
},
|
|
161
170
|
},
|
|
162
171
|
}
|
|
163
172
|
},
|
|
@@ -41,6 +41,7 @@ from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VE
|
|
|
41
41
|
from airflow.providers.openlineage.utils.utils import (
|
|
42
42
|
OpenLineageRedactor,
|
|
43
43
|
get_airflow_dag_run_facet,
|
|
44
|
+
get_airflow_debug_facet,
|
|
44
45
|
get_airflow_state_run_facet,
|
|
45
46
|
)
|
|
46
47
|
from airflow.stats import Stats
|
|
@@ -90,7 +91,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
90
91
|
"OpenLineage configuration not found directly in Airflow. "
|
|
91
92
|
"Looking for legacy environment configuration. "
|
|
92
93
|
)
|
|
93
|
-
self._client = OpenLineageClient
|
|
94
|
+
self._client = OpenLineageClient()
|
|
94
95
|
return self._client
|
|
95
96
|
|
|
96
97
|
def get_openlineage_config(self) -> dict | None:
|
|
@@ -361,7 +362,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
361
362
|
job_name=dag_run.dag_id,
|
|
362
363
|
nominal_start_time=nominal_start_time,
|
|
363
364
|
nominal_end_time=nominal_end_time,
|
|
364
|
-
run_facets=get_airflow_dag_run_facet(dag_run),
|
|
365
|
+
run_facets={**get_airflow_dag_run_facet(dag_run), **get_airflow_debug_facet()},
|
|
365
366
|
),
|
|
366
367
|
inputs=[],
|
|
367
368
|
outputs=[],
|
|
@@ -385,7 +386,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
385
386
|
dag_id=dag_run.dag_id,
|
|
386
387
|
execution_date=dag_run.execution_date,
|
|
387
388
|
),
|
|
388
|
-
facets={**get_airflow_state_run_facet(dag_run)},
|
|
389
|
+
facets={**get_airflow_state_run_facet(dag_run), **get_airflow_debug_facet()},
|
|
389
390
|
),
|
|
390
391
|
inputs=[],
|
|
391
392
|
outputs=[],
|
|
@@ -414,6 +415,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
414
415
|
message=msg, programmingLanguage="python"
|
|
415
416
|
),
|
|
416
417
|
**get_airflow_state_run_facet(dag_run),
|
|
418
|
+
**get_airflow_debug_facet(),
|
|
417
419
|
},
|
|
418
420
|
),
|
|
419
421
|
inputs=[],
|
|
@@ -17,17 +17,10 @@
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
19
|
from attrs import define
|
|
20
|
-
from deprecated import deprecated
|
|
21
20
|
from openlineage.client.facet_v2 import JobFacet, RunFacet
|
|
22
21
|
from openlineage.client.utils import RedactMixin
|
|
23
22
|
|
|
24
|
-
from airflow.exceptions import AirflowProviderDeprecationWarning
|
|
25
23
|
|
|
26
|
-
|
|
27
|
-
@deprecated(
|
|
28
|
-
reason="To be removed in the next release. Make sure to use information from AirflowRunFacet instead.",
|
|
29
|
-
category=AirflowProviderDeprecationWarning,
|
|
30
|
-
)
|
|
31
24
|
@define
|
|
32
25
|
class AirflowMappedTaskRunFacet(RunFacet):
|
|
33
26
|
"""Run facet containing information about mapped tasks."""
|
|
@@ -108,6 +101,13 @@ class AirflowDagRunFacet(RunFacet):
|
|
|
108
101
|
dagRun: dict
|
|
109
102
|
|
|
110
103
|
|
|
104
|
+
@define
|
|
105
|
+
class AirflowDebugRunFacet(RunFacet):
|
|
106
|
+
"""Airflow Debug run facet."""
|
|
107
|
+
|
|
108
|
+
packages: dict
|
|
109
|
+
|
|
110
|
+
|
|
111
111
|
@define
|
|
112
112
|
class UnknownOperatorInstance(RedactMixin):
|
|
113
113
|
"""
|
|
@@ -123,10 +123,6 @@ class UnknownOperatorInstance(RedactMixin):
|
|
|
123
123
|
_skip_redact = ["name", "type"]
|
|
124
124
|
|
|
125
125
|
|
|
126
|
-
@deprecated(
|
|
127
|
-
reason="To be removed in the next release. Make sure to use information from AirflowRunFacet instead.",
|
|
128
|
-
category=AirflowProviderDeprecationWarning,
|
|
129
|
-
)
|
|
130
126
|
@define
|
|
131
127
|
class UnknownOperatorAttributeRunFacet(RunFacet):
|
|
132
128
|
"""RunFacet that describes unknown operators in an Airflow DAG."""
|
|
@@ -32,6 +32,7 @@ from airflow.providers.openlineage.extractors import ExtractorManager
|
|
|
32
32
|
from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter, RunState
|
|
33
33
|
from airflow.providers.openlineage.utils.utils import (
|
|
34
34
|
IS_AIRFLOW_2_10_OR_HIGHER,
|
|
35
|
+
get_airflow_debug_facet,
|
|
35
36
|
get_airflow_job_facet,
|
|
36
37
|
get_airflow_mapped_task_facet,
|
|
37
38
|
get_airflow_run_facet,
|
|
@@ -122,6 +123,9 @@ class OpenLineageListener:
|
|
|
122
123
|
)
|
|
123
124
|
return
|
|
124
125
|
|
|
126
|
+
# Needs to be calculated outside of inner method so that it gets cached for usage in fork processes
|
|
127
|
+
debug_facet = get_airflow_debug_facet()
|
|
128
|
+
|
|
125
129
|
@print_warning(self.log)
|
|
126
130
|
def on_running():
|
|
127
131
|
# that's a workaround to detect task running from deferred state
|
|
@@ -166,6 +170,7 @@ class OpenLineageListener:
|
|
|
166
170
|
**get_user_provided_run_facets(task_instance, TaskInstanceState.RUNNING),
|
|
167
171
|
**get_airflow_mapped_task_facet(task_instance),
|
|
168
172
|
**get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
|
|
173
|
+
**debug_facet,
|
|
169
174
|
},
|
|
170
175
|
)
|
|
171
176
|
Stats.gauge(
|
|
@@ -237,6 +242,7 @@ class OpenLineageListener:
|
|
|
237
242
|
run_facets={
|
|
238
243
|
**get_user_provided_run_facets(task_instance, TaskInstanceState.SUCCESS),
|
|
239
244
|
**get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
|
|
245
|
+
**get_airflow_debug_facet(),
|
|
240
246
|
},
|
|
241
247
|
)
|
|
242
248
|
Stats.gauge(
|
|
@@ -336,6 +342,7 @@ class OpenLineageListener:
|
|
|
336
342
|
run_facets={
|
|
337
343
|
**get_user_provided_run_facets(task_instance, TaskInstanceState.FAILED),
|
|
338
344
|
**get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
|
|
345
|
+
**get_airflow_debug_facet(),
|
|
339
346
|
},
|
|
340
347
|
)
|
|
341
348
|
Stats.gauge(
|
|
@@ -25,6 +25,7 @@ from airflow.providers.openlineage.plugins.macros import (
|
|
|
25
25
|
lineage_parent_id,
|
|
26
26
|
lineage_run_id,
|
|
27
27
|
)
|
|
28
|
+
from airflow.providers.openlineage.utils.utils import IS_AIRFLOW_2_10_OR_HIGHER
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class OpenLineageProviderPlugin(AirflowPlugin):
|
|
@@ -39,6 +40,10 @@ class OpenLineageProviderPlugin(AirflowPlugin):
|
|
|
39
40
|
if not conf.is_disabled():
|
|
40
41
|
macros = [lineage_job_namespace, lineage_job_name, lineage_run_id, lineage_parent_id]
|
|
41
42
|
listeners = [get_openlineage_listener()]
|
|
43
|
+
if IS_AIRFLOW_2_10_OR_HIGHER:
|
|
44
|
+
from airflow.lineage.hook import HookLineageReader
|
|
45
|
+
|
|
46
|
+
hook_lineage_readers = [HookLineageReader]
|
|
42
47
|
else:
|
|
43
48
|
macros = []
|
|
44
49
|
listeners = []
|
|
@@ -20,10 +20,9 @@ from __future__ import annotations
|
|
|
20
20
|
import datetime
|
|
21
21
|
import json
|
|
22
22
|
import logging
|
|
23
|
-
import
|
|
24
|
-
from contextlib import redirect_stdout, suppress
|
|
23
|
+
from contextlib import suppress
|
|
25
24
|
from functools import wraps
|
|
26
|
-
from
|
|
25
|
+
from importlib import metadata
|
|
27
26
|
from typing import TYPE_CHECKING, Any, Callable, Iterable
|
|
28
27
|
|
|
29
28
|
import attrs
|
|
@@ -38,6 +37,7 @@ from airflow.models import DAG, BaseOperator, MappedOperator
|
|
|
38
37
|
from airflow.providers.openlineage import conf
|
|
39
38
|
from airflow.providers.openlineage.plugins.facets import (
|
|
40
39
|
AirflowDagRunFacet,
|
|
40
|
+
AirflowDebugRunFacet,
|
|
41
41
|
AirflowJobFacet,
|
|
42
42
|
AirflowMappedTaskRunFacet,
|
|
43
43
|
AirflowRunFacet,
|
|
@@ -85,6 +85,10 @@ def get_job_name(task: TaskInstance) -> str:
|
|
|
85
85
|
def get_airflow_mapped_task_facet(task_instance: TaskInstance) -> dict[str, Any]:
|
|
86
86
|
# check for -1 comes from SmartSensor compatibility with dynamic task mapping
|
|
87
87
|
# this comes from Airflow code
|
|
88
|
+
log.debug(
|
|
89
|
+
"AirflowMappedTaskRunFacet is deprecated and will be removed. "
|
|
90
|
+
"Use information from AirflowRunFacet instead."
|
|
91
|
+
)
|
|
88
92
|
if hasattr(task_instance, "map_index") and getattr(task_instance, "map_index") != -1:
|
|
89
93
|
return {"airflow_mappedTask": AirflowMappedTaskRunFacet.from_task_instance(task_instance)}
|
|
90
94
|
return {}
|
|
@@ -240,7 +244,7 @@ class InfoJsonEncodable(dict):
|
|
|
240
244
|
class DagInfo(InfoJsonEncodable):
|
|
241
245
|
"""Defines encoding DAG object to JSON."""
|
|
242
246
|
|
|
243
|
-
includes = ["dag_id", "description", "owner", "schedule_interval", "start_date", "tags"]
|
|
247
|
+
includes = ["dag_id", "description", "fileloc", "owner", "schedule_interval", "start_date", "tags"]
|
|
244
248
|
casts = {"timetable": lambda dag: dag.timetable.serialize() if getattr(dag, "timetable", None) else None}
|
|
245
249
|
renames = {"_dag_id": "dag_id"}
|
|
246
250
|
|
|
@@ -374,6 +378,28 @@ def get_airflow_dag_run_facet(dag_run: DagRun) -> dict[str, RunFacet]:
|
|
|
374
378
|
}
|
|
375
379
|
|
|
376
380
|
|
|
381
|
+
@conf.cache
|
|
382
|
+
def _get_all_packages_installed() -> dict[str, str]:
|
|
383
|
+
"""
|
|
384
|
+
Retrieve a dictionary of all installed packages and their versions.
|
|
385
|
+
|
|
386
|
+
This operation involves scanning the system's installed packages, which can be a heavy operation.
|
|
387
|
+
It is recommended to cache the result to avoid repeated, expensive lookups.
|
|
388
|
+
"""
|
|
389
|
+
return {dist.metadata["Name"]: dist.version for dist in metadata.distributions()}
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def get_airflow_debug_facet() -> dict[str, AirflowDebugRunFacet]:
|
|
393
|
+
if not conf.debug_mode():
|
|
394
|
+
return {}
|
|
395
|
+
log.warning("OpenLineage debug_mode is enabled. Be aware that this may log and emit extensive details.")
|
|
396
|
+
return {
|
|
397
|
+
"debug": AirflowDebugRunFacet(
|
|
398
|
+
packages=_get_all_packages_installed(),
|
|
399
|
+
)
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
|
|
377
403
|
def get_airflow_run_facet(
|
|
378
404
|
dag_run: DagRun,
|
|
379
405
|
dag: DAG,
|
|
@@ -397,7 +423,7 @@ def get_airflow_job_facet(dag_run: DagRun) -> dict[str, AirflowJobFacet]:
|
|
|
397
423
|
return {}
|
|
398
424
|
return {
|
|
399
425
|
"airflow": AirflowJobFacet(
|
|
400
|
-
taskTree=
|
|
426
|
+
taskTree={}, # caused OOM errors, to be removed, see #41587
|
|
401
427
|
taskGroups=_get_task_groups_details(dag_run.dag),
|
|
402
428
|
tasks=_get_tasks_details(dag_run.dag),
|
|
403
429
|
)
|
|
@@ -413,75 +439,6 @@ def get_airflow_state_run_facet(dag_run: DagRun) -> dict[str, AirflowStateRunFac
|
|
|
413
439
|
}
|
|
414
440
|
|
|
415
441
|
|
|
416
|
-
def _safe_get_dag_tree_view(dag: DAG) -> list[str]:
|
|
417
|
-
# get_tree_view() has been added in Airflow 2.8.2
|
|
418
|
-
if hasattr(dag, "get_tree_view"):
|
|
419
|
-
return dag.get_tree_view().splitlines()
|
|
420
|
-
|
|
421
|
-
with redirect_stdout(StringIO()) as stdout:
|
|
422
|
-
dag.tree_view()
|
|
423
|
-
return stdout.getvalue().splitlines()
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
def _get_parsed_dag_tree(dag: DAG) -> dict:
|
|
427
|
-
"""
|
|
428
|
-
Get DAG's tasks hierarchy representation.
|
|
429
|
-
|
|
430
|
-
While the task dependencies are defined as following:
|
|
431
|
-
task >> [task_2, task_4] >> task_7
|
|
432
|
-
task_3 >> task_5
|
|
433
|
-
task_6 # has no dependencies, it's a root and a leaf
|
|
434
|
-
|
|
435
|
-
The result of this function will look like:
|
|
436
|
-
{
|
|
437
|
-
"task": {
|
|
438
|
-
"task_2": {
|
|
439
|
-
"task_7": {}
|
|
440
|
-
},
|
|
441
|
-
"task_4": {
|
|
442
|
-
"task_7": {}
|
|
443
|
-
}
|
|
444
|
-
},
|
|
445
|
-
"task_3": {
|
|
446
|
-
"task_5": {}
|
|
447
|
-
},
|
|
448
|
-
"task_6": {}
|
|
449
|
-
}
|
|
450
|
-
"""
|
|
451
|
-
lines = _safe_get_dag_tree_view(dag)
|
|
452
|
-
task_dict: dict[str, dict] = {}
|
|
453
|
-
parent_map: dict[int, tuple[str, dict]] = {}
|
|
454
|
-
|
|
455
|
-
for line in lines:
|
|
456
|
-
stripped_line = line.strip()
|
|
457
|
-
if not stripped_line:
|
|
458
|
-
continue
|
|
459
|
-
|
|
460
|
-
# Determine the level by counting the leading spaces, assuming 4 spaces per level
|
|
461
|
-
# as defined in airflow.models.dag.DAG._generate_tree_view()
|
|
462
|
-
level = (len(line) - len(stripped_line)) // 4
|
|
463
|
-
# airflow.models.baseoperator.BaseOperator.__repr__ or
|
|
464
|
-
# airflow.models.mappedoperator.MappedOperator.__repr__ is used in DAG tree
|
|
465
|
-
# <Task({op_class}): {task_id}> or <Mapped({op_class}): {task_id}>
|
|
466
|
-
match = re.match(r"^<(?:Task|Mapped)\(.+\): (.+)>$", stripped_line)
|
|
467
|
-
if not match:
|
|
468
|
-
return {}
|
|
469
|
-
current_task_id = match[1]
|
|
470
|
-
|
|
471
|
-
if level == 0: # It's a root task
|
|
472
|
-
task_dict[current_task_id] = {}
|
|
473
|
-
parent_map[level] = (current_task_id, task_dict[current_task_id])
|
|
474
|
-
else:
|
|
475
|
-
# Find the immediate parent task
|
|
476
|
-
parent_task, parent_dict = parent_map[(level - 1)]
|
|
477
|
-
# Create new dict for the current task
|
|
478
|
-
parent_dict[current_task_id] = {}
|
|
479
|
-
# Update this task in the parent map
|
|
480
|
-
parent_map[level] = (current_task_id, parent_dict[current_task_id])
|
|
481
|
-
|
|
482
|
-
return task_dict
|
|
483
|
-
|
|
484
|
-
|
|
485
442
|
def _get_tasks_details(dag: DAG) -> dict:
|
|
486
443
|
tasks = {
|
|
487
444
|
single_task.task_id: {
|
|
@@ -493,8 +450,9 @@ def _get_tasks_details(dag: DAG) -> dict:
|
|
|
493
450
|
"ui_label": single_task.label,
|
|
494
451
|
"is_setup": single_task.is_setup,
|
|
495
452
|
"is_teardown": single_task.is_teardown,
|
|
453
|
+
"downstream_task_ids": sorted(single_task.downstream_task_ids),
|
|
496
454
|
}
|
|
497
|
-
for single_task in dag.tasks
|
|
455
|
+
for single_task in sorted(dag.tasks, key=lambda x: x.task_id)
|
|
498
456
|
}
|
|
499
457
|
|
|
500
458
|
return tasks
|
|
@@ -536,6 +494,10 @@ def _emits_ol_events(task: BaseOperator | MappedOperator) -> bool:
|
|
|
536
494
|
def get_unknown_source_attribute_run_facet(task: BaseOperator, name: str | None = None):
|
|
537
495
|
if not name:
|
|
538
496
|
name = get_operator_class(task).__name__
|
|
497
|
+
log.debug(
|
|
498
|
+
"UnknownOperatorAttributeRunFacet is deprecated and will be removed. "
|
|
499
|
+
"Use information from AirflowRunFacet instead."
|
|
500
|
+
)
|
|
539
501
|
return {
|
|
540
502
|
"unknownSourceAttribute": attrs.asdict(
|
|
541
503
|
UnknownOperatorAttributeRunFacet(
|
|
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
|
|
|
28
28
|
|
|
29
29
|
[project]
|
|
30
30
|
name = "apache-airflow-providers-openlineage"
|
|
31
|
-
version = "1.
|
|
31
|
+
version = "1.11.0"
|
|
32
32
|
description = "Provider package apache-airflow-providers-openlineage for Apache Airflow"
|
|
33
33
|
readme = "README.rst"
|
|
34
34
|
authors = [
|
|
@@ -56,16 +56,17 @@ classifiers = [
|
|
|
56
56
|
]
|
|
57
57
|
requires-python = "~=3.8"
|
|
58
58
|
dependencies = [
|
|
59
|
-
"apache-airflow-providers-common-
|
|
60
|
-
"apache-airflow>=
|
|
59
|
+
"apache-airflow-providers-common-compat>=1.2.0",
|
|
60
|
+
"apache-airflow-providers-common-sql>=1.6.0",
|
|
61
|
+
"apache-airflow>=2.8.0",
|
|
61
62
|
"attrs>=22.2",
|
|
62
63
|
"openlineage-integration-common>=1.16.0",
|
|
63
64
|
"openlineage-python>=1.16.0",
|
|
64
65
|
]
|
|
65
66
|
|
|
66
67
|
[project.urls]
|
|
67
|
-
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
68
|
-
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
68
|
+
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0"
|
|
69
|
+
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html"
|
|
69
70
|
"Bug Tracker" = "https://github.com/apache/airflow/issues"
|
|
70
71
|
"Source Code" = "https://github.com/apache/airflow"
|
|
71
72
|
"Slack Chat" = "https://s.apache.org/airflow-slack"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|