apache-airflow-providers-openlineage 2.0.0rc1__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.
- airflow/providers/openlineage/LICENSE +0 -52
- airflow/providers/openlineage/__init__.py +1 -1
- airflow/providers/openlineage/conf.py +6 -0
- airflow/providers/openlineage/extractors/base.py +2 -2
- airflow/providers/openlineage/extractors/bash.py +1 -2
- airflow/providers/openlineage/extractors/manager.py +3 -5
- airflow/providers/openlineage/extractors/python.py +1 -2
- airflow/providers/openlineage/get_provider_info.py +21 -13
- airflow/providers/openlineage/plugins/adapter.py +24 -13
- airflow/providers/openlineage/plugins/facets.py +1 -0
- airflow/providers/openlineage/plugins/listener.py +176 -93
- airflow/providers/openlineage/sqlparser.py +111 -10
- airflow/providers/openlineage/utils/selective_enable.py +16 -3
- airflow/providers/openlineage/utils/spark.py +70 -2
- airflow/providers/openlineage/utils/sql.py +2 -1
- airflow/providers/openlineage/utils/utils.py +55 -21
- {apache_airflow_providers_openlineage-2.0.0rc1.dist-info → apache_airflow_providers_openlineage-2.1.0.dist-info}/METADATA +14 -29
- apache_airflow_providers_openlineage-2.1.0.dist-info/RECORD +32 -0
- apache_airflow_providers_openlineage-2.0.0rc1.dist-info/RECORD +0 -32
- {apache_airflow_providers_openlineage-2.0.0rc1.dist-info → apache_airflow_providers_openlineage-2.1.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_openlineage-2.0.0rc1.dist-info → apache_airflow_providers_openlineage-2.1.0.dist-info}/entry_points.txt +0 -0
|
@@ -199,55 +199,3 @@ distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
199
199
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
200
200
|
See the License for the specific language governing permissions and
|
|
201
201
|
limitations under the License.
|
|
202
|
-
|
|
203
|
-
============================================================================
|
|
204
|
-
APACHE AIRFLOW SUBCOMPONENTS:
|
|
205
|
-
|
|
206
|
-
The Apache Airflow project contains subcomponents with separate copyright
|
|
207
|
-
notices and license terms. Your use of the source code for the these
|
|
208
|
-
subcomponents is subject to the terms and conditions of the following
|
|
209
|
-
licenses.
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
========================================================================
|
|
213
|
-
Third party Apache 2.0 licenses
|
|
214
|
-
========================================================================
|
|
215
|
-
|
|
216
|
-
The following components are provided under the Apache 2.0 License.
|
|
217
|
-
See project link for details. The text of each license is also included
|
|
218
|
-
at 3rd-party-licenses/LICENSE-[project].txt.
|
|
219
|
-
|
|
220
|
-
(ALv2 License) hue v4.3.0 (https://github.com/cloudera/hue/)
|
|
221
|
-
(ALv2 License) jqclock v2.3.0 (https://github.com/JohnRDOrazio/jQuery-Clock-Plugin)
|
|
222
|
-
(ALv2 License) bootstrap3-typeahead v4.0.2 (https://github.com/bassjobsen/Bootstrap-3-Typeahead)
|
|
223
|
-
(ALv2 License) connexion v2.7.0 (https://github.com/zalando/connexion)
|
|
224
|
-
|
|
225
|
-
========================================================================
|
|
226
|
-
MIT licenses
|
|
227
|
-
========================================================================
|
|
228
|
-
|
|
229
|
-
The following components are provided under the MIT License. See project link for details.
|
|
230
|
-
The text of each license is also included at 3rd-party-licenses/LICENSE-[project].txt.
|
|
231
|
-
|
|
232
|
-
(MIT License) jquery v3.5.1 (https://jquery.org/license/)
|
|
233
|
-
(MIT License) dagre-d3 v0.6.4 (https://github.com/cpettitt/dagre-d3)
|
|
234
|
-
(MIT License) bootstrap v3.4.1 (https://github.com/twbs/bootstrap/)
|
|
235
|
-
(MIT License) d3-tip v0.9.1 (https://github.com/Caged/d3-tip)
|
|
236
|
-
(MIT License) dataTables v1.10.25 (https://datatables.net)
|
|
237
|
-
(MIT License) normalize.css v3.0.2 (http://necolas.github.io/normalize.css/)
|
|
238
|
-
(MIT License) ElasticMock v1.3.2 (https://github.com/vrcmarcos/elasticmock)
|
|
239
|
-
(MIT License) MomentJS v2.24.0 (http://momentjs.com/)
|
|
240
|
-
(MIT License) eonasdan-bootstrap-datetimepicker v4.17.49 (https://github.com/eonasdan/bootstrap-datetimepicker/)
|
|
241
|
-
|
|
242
|
-
========================================================================
|
|
243
|
-
BSD 3-Clause licenses
|
|
244
|
-
========================================================================
|
|
245
|
-
The following components are provided under the BSD 3-Clause license. See project links for details.
|
|
246
|
-
The text of each license is also included at 3rd-party-licenses/LICENSE-[project].txt.
|
|
247
|
-
|
|
248
|
-
(BSD 3 License) d3 v5.16.0 (https://d3js.org)
|
|
249
|
-
(BSD 3 License) d3-shape v2.1.0 (https://github.com/d3/d3-shape)
|
|
250
|
-
(BSD 3 License) cgroupspy 0.2.1 (https://github.com/cloudsigma/cgroupspy)
|
|
251
|
-
|
|
252
|
-
========================================================================
|
|
253
|
-
See 3rd-party-licenses/LICENSES-ui.txt for packages used in `/airflow/www`
|
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "2.
|
|
32
|
+
__version__ = "2.1.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.9.0"
|
|
@@ -83,6 +83,12 @@ def spark_inject_parent_job_info() -> bool:
|
|
|
83
83
|
return conf.getboolean(_CONFIG_SECTION, "spark_inject_parent_job_info", fallback="False")
|
|
84
84
|
|
|
85
85
|
|
|
86
|
+
@cache
|
|
87
|
+
def spark_inject_transport_info() -> bool:
|
|
88
|
+
"""[openlineage] spark_inject_transport_info."""
|
|
89
|
+
return conf.getboolean(_CONFIG_SECTION, "spark_inject_transport_info", fallback="False")
|
|
90
|
+
|
|
91
|
+
|
|
86
92
|
@cache
|
|
87
93
|
def custom_extractors() -> set[str]:
|
|
88
94
|
"""[openlineage] extractors."""
|
|
@@ -22,16 +22,16 @@ from abc import ABC, abstractmethod
|
|
|
22
22
|
from typing import Generic, TypeVar, Union
|
|
23
23
|
|
|
24
24
|
from attrs import Factory, define
|
|
25
|
+
|
|
25
26
|
from openlineage.client.event_v2 import Dataset as OLDataset
|
|
26
27
|
|
|
27
28
|
with warnings.catch_warnings():
|
|
28
29
|
warnings.simplefilter("ignore", DeprecationWarning)
|
|
29
30
|
from openlineage.client.facet import BaseFacet as BaseFacet_V1
|
|
30
|
-
from openlineage.client.facet_v2 import JobFacet, RunFacet
|
|
31
|
-
|
|
32
31
|
from airflow.providers.openlineage.utils.utils import AIRFLOW_V_2_10_PLUS
|
|
33
32
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
34
33
|
from airflow.utils.state import TaskInstanceState
|
|
34
|
+
from openlineage.client.facet_v2 import JobFacet, RunFacet
|
|
35
35
|
|
|
36
36
|
# this is not to break static checks compatibility with v1 OpenLineage facet classes
|
|
37
37
|
DatasetSubclass = TypeVar("DatasetSubclass", bound=OLDataset)
|
|
@@ -17,11 +17,10 @@
|
|
|
17
17
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
|
-
from openlineage.client.facet_v2 import source_code_job
|
|
21
|
-
|
|
22
20
|
from airflow.providers.openlineage import conf
|
|
23
21
|
from airflow.providers.openlineage.extractors.base import BaseExtractor, OperatorLineage
|
|
24
22
|
from airflow.providers.openlineage.utils.utils import get_unknown_source_attribute_run_facet
|
|
23
|
+
from openlineage.client.facet_v2 import source_code_job
|
|
25
24
|
|
|
26
25
|
"""
|
|
27
26
|
:meta private:
|
|
@@ -34,10 +34,9 @@ from airflow.providers.openlineage.utils.utils import (
|
|
|
34
34
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
35
35
|
|
|
36
36
|
if TYPE_CHECKING:
|
|
37
|
-
from openlineage.client.event_v2 import Dataset
|
|
38
|
-
|
|
39
|
-
from airflow.lineage.entities import Table
|
|
40
37
|
from airflow.models import Operator
|
|
38
|
+
from airflow.providers.common.compat.lineage.entities import Table
|
|
39
|
+
from openlineage.client.event_v2 import Dataset
|
|
41
40
|
|
|
42
41
|
|
|
43
42
|
def _iter_extractor_types() -> Iterator[type[BaseExtractor]]:
|
|
@@ -291,10 +290,9 @@ class ExtractorManager(LoggingMixin):
|
|
|
291
290
|
|
|
292
291
|
@staticmethod
|
|
293
292
|
def convert_to_ol_dataset(obj) -> Dataset | None:
|
|
293
|
+
from airflow.providers.common.compat.lineage.entities import File, Table
|
|
294
294
|
from openlineage.client.event_v2 import Dataset
|
|
295
295
|
|
|
296
|
-
from airflow.lineage.entities import File, Table
|
|
297
|
-
|
|
298
296
|
if isinstance(obj, Dataset):
|
|
299
297
|
return obj
|
|
300
298
|
elif isinstance(obj, Table):
|
|
@@ -20,11 +20,10 @@ from __future__ import annotations
|
|
|
20
20
|
import inspect
|
|
21
21
|
from typing import Callable
|
|
22
22
|
|
|
23
|
-
from openlineage.client.facet_v2 import source_code_job
|
|
24
|
-
|
|
25
23
|
from airflow.providers.openlineage import conf
|
|
26
24
|
from airflow.providers.openlineage.extractors.base import BaseExtractor, OperatorLineage
|
|
27
25
|
from airflow.providers.openlineage.utils.utils import get_unknown_source_attribute_run_facet
|
|
26
|
+
from openlineage.client.facet_v2 import source_code_job
|
|
28
27
|
|
|
29
28
|
"""
|
|
30
29
|
:meta private:
|
|
@@ -15,8 +15,7 @@
|
|
|
15
15
|
# specific language governing permissions and limitations
|
|
16
16
|
# under the License.
|
|
17
17
|
|
|
18
|
-
# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE
|
|
19
|
-
# OVERWRITTEN WHEN PREPARING PACKAGES.
|
|
18
|
+
# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN!
|
|
20
19
|
#
|
|
21
20
|
# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE
|
|
22
21
|
# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY
|
|
@@ -28,8 +27,9 @@ def get_provider_info():
|
|
|
28
27
|
"name": "OpenLineage Airflow",
|
|
29
28
|
"description": "`OpenLineage <https://openlineage.io/>`__\n",
|
|
30
29
|
"state": "ready",
|
|
31
|
-
"source-date-epoch":
|
|
30
|
+
"source-date-epoch": 1739964022,
|
|
32
31
|
"versions": [
|
|
32
|
+
"2.1.0",
|
|
33
33
|
"2.0.0",
|
|
34
34
|
"1.14.0",
|
|
35
35
|
"1.13.0",
|
|
@@ -56,19 +56,11 @@ def get_provider_info():
|
|
|
56
56
|
"1.0.1",
|
|
57
57
|
"1.0.0",
|
|
58
58
|
],
|
|
59
|
-
"dependencies": [
|
|
60
|
-
"apache-airflow>=2.9.0",
|
|
61
|
-
"apache-airflow-providers-common-sql>=1.20.0",
|
|
62
|
-
"apache-airflow-providers-common-compat>=1.3.0",
|
|
63
|
-
"attrs>=22.2",
|
|
64
|
-
"openlineage-integration-common>=1.24.2",
|
|
65
|
-
"openlineage-python>=1.24.2",
|
|
66
|
-
],
|
|
67
59
|
"integrations": [
|
|
68
60
|
{
|
|
69
61
|
"integration-name": "OpenLineage",
|
|
70
62
|
"external-doc-url": "https://openlineage.io",
|
|
71
|
-
"logo": "/integration-logos/openlineage
|
|
63
|
+
"logo": "/docs/integration-logos/openlineage.svg",
|
|
72
64
|
"tags": ["protocol"],
|
|
73
65
|
}
|
|
74
66
|
],
|
|
@@ -178,9 +170,25 @@ def get_provider_info():
|
|
|
178
170
|
"type": "boolean",
|
|
179
171
|
"default": "False",
|
|
180
172
|
"example": None,
|
|
181
|
-
"version_added": "
|
|
173
|
+
"version_added": "2.0.0",
|
|
174
|
+
},
|
|
175
|
+
"spark_inject_transport_info": {
|
|
176
|
+
"description": "Automatically inject OpenLineage's transport information into Spark application properties\nfor supported Operators.\n",
|
|
177
|
+
"type": "boolean",
|
|
178
|
+
"default": "False",
|
|
179
|
+
"example": None,
|
|
180
|
+
"version_added": "2.1.0",
|
|
182
181
|
},
|
|
183
182
|
},
|
|
184
183
|
}
|
|
185
184
|
},
|
|
185
|
+
"dependencies": [
|
|
186
|
+
"apache-airflow>=2.9.0",
|
|
187
|
+
"apache-airflow-providers-common-sql>=1.20.0",
|
|
188
|
+
"apache-airflow-providers-common-compat>=1.4.0",
|
|
189
|
+
"attrs>=22.2",
|
|
190
|
+
"openlineage-integration-common>=1.24.2",
|
|
191
|
+
"openlineage-python>=1.24.2",
|
|
192
|
+
"uuid6>=2024.7.10",
|
|
193
|
+
],
|
|
186
194
|
}
|
|
@@ -21,6 +21,16 @@ from contextlib import ExitStack
|
|
|
21
21
|
from typing import TYPE_CHECKING
|
|
22
22
|
|
|
23
23
|
import yaml
|
|
24
|
+
|
|
25
|
+
from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VERSION, conf
|
|
26
|
+
from airflow.providers.openlineage.utils.utils import (
|
|
27
|
+
OpenLineageRedactor,
|
|
28
|
+
get_airflow_debug_facet,
|
|
29
|
+
get_airflow_state_run_facet,
|
|
30
|
+
get_processing_engine_facet,
|
|
31
|
+
)
|
|
32
|
+
from airflow.stats import Stats
|
|
33
|
+
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
24
34
|
from openlineage.client import OpenLineageClient, set_producer
|
|
25
35
|
from openlineage.client.event_v2 import Job, Run, RunEvent, RunState
|
|
26
36
|
from openlineage.client.facet_v2 import (
|
|
@@ -36,22 +46,17 @@ from openlineage.client.facet_v2 import (
|
|
|
36
46
|
)
|
|
37
47
|
from openlineage.client.uuid import generate_static_uuid
|
|
38
48
|
|
|
39
|
-
from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VERSION, conf
|
|
40
|
-
from airflow.providers.openlineage.utils.utils import (
|
|
41
|
-
OpenLineageRedactor,
|
|
42
|
-
get_airflow_debug_facet,
|
|
43
|
-
get_airflow_state_run_facet,
|
|
44
|
-
get_processing_engine_facet,
|
|
45
|
-
)
|
|
46
|
-
from airflow.stats import Stats
|
|
47
|
-
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
48
|
-
|
|
49
49
|
if TYPE_CHECKING:
|
|
50
50
|
from datetime import datetime
|
|
51
51
|
|
|
52
52
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
53
|
-
from airflow.
|
|
53
|
+
from airflow.sdk.execution_time.secrets_masker import SecretsMasker, _secrets_masker
|
|
54
54
|
from airflow.utils.state import DagRunState
|
|
55
|
+
else:
|
|
56
|
+
try:
|
|
57
|
+
from airflow.sdk.execution_time.secrets_masker import SecretsMasker, _secrets_masker
|
|
58
|
+
except ImportError:
|
|
59
|
+
from airflow.utils.log.secrets_masker import SecretsMasker, _secrets_masker
|
|
55
60
|
|
|
56
61
|
_PRODUCER = f"https://github.com/apache/airflow/tree/providers-openlineage/{OPENLINEAGE_PROVIDER_VERSION}"
|
|
57
62
|
|
|
@@ -71,8 +76,6 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
71
76
|
super().__init__()
|
|
72
77
|
self._client = client
|
|
73
78
|
if not secrets_masker:
|
|
74
|
-
from airflow.utils.log.secrets_masker import _secrets_masker
|
|
75
|
-
|
|
76
79
|
secrets_masker = _secrets_masker()
|
|
77
80
|
self._redacter = OpenLineageRedactor.from_masker(secrets_masker)
|
|
78
81
|
|
|
@@ -251,6 +254,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
251
254
|
run_facets = run_facets or {}
|
|
252
255
|
if task:
|
|
253
256
|
run_facets = {**task.run_facets, **run_facets}
|
|
257
|
+
run_facets = {**run_facets, **get_processing_engine_facet()} # type: ignore
|
|
254
258
|
event = RunEvent(
|
|
255
259
|
eventType=RunState.COMPLETE,
|
|
256
260
|
eventTime=end_time,
|
|
@@ -296,6 +300,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
296
300
|
run_facets = run_facets or {}
|
|
297
301
|
if task:
|
|
298
302
|
run_facets = {**task.run_facets, **run_facets}
|
|
303
|
+
run_facets = {**run_facets, **get_processing_engine_facet()} # type: ignore
|
|
299
304
|
|
|
300
305
|
if error:
|
|
301
306
|
stack_trace = None
|
|
@@ -377,6 +382,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
377
382
|
clear_number: int,
|
|
378
383
|
dag_run_state: DagRunState,
|
|
379
384
|
task_ids: list[str],
|
|
385
|
+
run_facets: dict[str, RunFacet],
|
|
380
386
|
):
|
|
381
387
|
try:
|
|
382
388
|
event = RunEvent(
|
|
@@ -390,6 +396,8 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
390
396
|
facets={
|
|
391
397
|
**get_airflow_state_run_facet(dag_id, run_id, task_ids, dag_run_state),
|
|
392
398
|
**get_airflow_debug_facet(),
|
|
399
|
+
**get_processing_engine_facet(),
|
|
400
|
+
**run_facets,
|
|
393
401
|
},
|
|
394
402
|
),
|
|
395
403
|
inputs=[],
|
|
@@ -413,6 +421,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
413
421
|
dag_run_state: DagRunState,
|
|
414
422
|
task_ids: list[str],
|
|
415
423
|
msg: str,
|
|
424
|
+
run_facets: dict[str, RunFacet],
|
|
416
425
|
):
|
|
417
426
|
try:
|
|
418
427
|
event = RunEvent(
|
|
@@ -431,6 +440,8 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
431
440
|
),
|
|
432
441
|
**get_airflow_state_run_facet(dag_id, run_id, task_ids, dag_run_state),
|
|
433
442
|
**get_airflow_debug_facet(),
|
|
443
|
+
**get_processing_engine_facet(),
|
|
444
|
+
**run_facets,
|
|
434
445
|
},
|
|
435
446
|
),
|
|
436
447
|
inputs=[],
|