apache-airflow-providers-openlineage 1.6.0rc1__tar.gz → 1.7.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/PKG-INFO +10 -9
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/README.rst +4 -4
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/__init__.py +1 -1
- apache_airflow_providers_openlineage-1.7.0rc1/airflow/providers/openlineage/conf.py +103 -0
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/extractors/base.py +3 -34
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/extractors/bash.py +6 -22
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/extractors/manager.py +14 -33
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/extractors/python.py +6 -22
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/get_provider_info.py +9 -1
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/plugins/adapter.py +9 -19
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/plugins/facets.py +11 -0
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/plugins/listener.py +49 -4
- apache_airflow_providers_openlineage-1.7.0rc1/airflow/providers/openlineage/plugins/macros.py +87 -0
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/plugins/openlineage.py +9 -19
- apache_airflow_providers_openlineage-1.7.0rc1/airflow/providers/openlineage/utils/selective_enable.py +87 -0
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/utils/utils.py +79 -125
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/pyproject.toml +6 -5
- apache_airflow_providers_openlineage-1.6.0rc1/airflow/providers/openlineage/plugins/macros.py +0 -66
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/LICENSE +0 -0
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/extractors/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/plugins/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/sqlparser.py +0 -0
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/utils/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/utils/sql.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0rc1
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -19,16 +19,17 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.9
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.10
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
23
|
Classifier: Topic :: System :: Monitoring
|
|
23
|
-
Requires-Dist: apache-airflow-providers-common-sql>=1.6.
|
|
24
|
-
Requires-Dist: apache-airflow>=2.7.
|
|
24
|
+
Requires-Dist: apache-airflow-providers-common-sql>=1.6.0rc0
|
|
25
|
+
Requires-Dist: apache-airflow>=2.7.0rc0
|
|
25
26
|
Requires-Dist: attrs>=22.2
|
|
26
27
|
Requires-Dist: openlineage-integration-common>=0.28.0
|
|
27
28
|
Requires-Dist: openlineage-python>=0.28.0
|
|
28
29
|
Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
|
|
29
30
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
30
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
31
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
31
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/changelog.html
|
|
32
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0
|
|
32
33
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
33
34
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
34
35
|
Project-URL: Twitter, https://twitter.com/ApacheAirflow
|
|
@@ -79,7 +80,7 @@ Provides-Extra: common.sql
|
|
|
79
80
|
|
|
80
81
|
Package ``apache-airflow-providers-openlineage``
|
|
81
82
|
|
|
82
|
-
Release: ``1.
|
|
83
|
+
Release: ``1.7.0.rc1``
|
|
83
84
|
|
|
84
85
|
|
|
85
86
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -92,7 +93,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
92
93
|
are in ``airflow.providers.openlineage`` python package.
|
|
93
94
|
|
|
94
95
|
You can find package information and changelog for the provider
|
|
95
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
96
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/>`_.
|
|
96
97
|
|
|
97
98
|
Installation
|
|
98
99
|
------------
|
|
@@ -101,7 +102,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
|
|
|
101
102
|
for the minimum Airflow version supported) via
|
|
102
103
|
``pip install apache-airflow-providers-openlineage``
|
|
103
104
|
|
|
104
|
-
The package supports the following python versions: 3.8,3.9,3.10,3.11
|
|
105
|
+
The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
|
|
105
106
|
|
|
106
107
|
Requirements
|
|
107
108
|
------------
|
|
@@ -136,4 +137,4 @@ Dependent package
|
|
|
136
137
|
============================================================================================================ ==============
|
|
137
138
|
|
|
138
139
|
The changelog for the provider package can be found in the
|
|
139
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
140
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/changelog.html>`_.
|
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
|
|
43
43
|
Package ``apache-airflow-providers-openlineage``
|
|
44
44
|
|
|
45
|
-
Release: ``1.
|
|
45
|
+
Release: ``1.7.0.rc1``
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -55,7 +55,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
55
55
|
are in ``airflow.providers.openlineage`` python package.
|
|
56
56
|
|
|
57
57
|
You can find package information and changelog for the provider
|
|
58
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
58
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/>`_.
|
|
59
59
|
|
|
60
60
|
Installation
|
|
61
61
|
------------
|
|
@@ -64,7 +64,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
|
|
|
64
64
|
for the minimum Airflow version supported) via
|
|
65
65
|
``pip install apache-airflow-providers-openlineage``
|
|
66
66
|
|
|
67
|
-
The package supports the following python versions: 3.8,3.9,3.10,3.11
|
|
67
|
+
The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
|
|
68
68
|
|
|
69
69
|
Requirements
|
|
70
70
|
------------
|
|
@@ -99,4 +99,4 @@ Dependent package
|
|
|
99
99
|
============================================================================================================ ==============
|
|
100
100
|
|
|
101
101
|
The changelog for the provider package can be found in the
|
|
102
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
102
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/changelog.html>`_.
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from airflow.compat.functools import cache
|
|
24
|
+
from airflow.configuration import conf
|
|
25
|
+
|
|
26
|
+
_CONFIG_SECTION = "openlineage"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@cache
|
|
30
|
+
def config_path(check_legacy_env_var: bool = True) -> str:
|
|
31
|
+
"""[openlineage] config_path."""
|
|
32
|
+
option = conf.get(_CONFIG_SECTION, "config_path", fallback="")
|
|
33
|
+
if check_legacy_env_var and not option:
|
|
34
|
+
option = os.getenv("OPENLINEAGE_CONFIG", "")
|
|
35
|
+
return option
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@cache
|
|
39
|
+
def is_source_enabled() -> bool:
|
|
40
|
+
"""[openlineage] disable_source_code."""
|
|
41
|
+
option = conf.get(_CONFIG_SECTION, "disable_source_code", fallback="")
|
|
42
|
+
if not option:
|
|
43
|
+
option = os.getenv("OPENLINEAGE_AIRFLOW_DISABLE_SOURCE_CODE", "")
|
|
44
|
+
return option.lower() not in ("true", "1", "t")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@cache
|
|
48
|
+
def disabled_operators() -> set[str]:
|
|
49
|
+
"""[openlineage] disabled_for_operators."""
|
|
50
|
+
option = conf.get(_CONFIG_SECTION, "disabled_for_operators", fallback="")
|
|
51
|
+
return set(operator.strip() for operator in option.split(";") if operator.strip())
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@cache
|
|
55
|
+
def selective_enable() -> bool:
|
|
56
|
+
return conf.getboolean(_CONFIG_SECTION, "selective_enable", fallback=False)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@cache
|
|
60
|
+
def custom_extractors() -> set[str]:
|
|
61
|
+
"""[openlineage] extractors."""
|
|
62
|
+
option = conf.get(_CONFIG_SECTION, "extractors", fallback="")
|
|
63
|
+
if not option:
|
|
64
|
+
option = os.getenv("OPENLINEAGE_EXTRACTORS", "")
|
|
65
|
+
return set(extractor.strip() for extractor in option.split(";") if extractor.strip())
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@cache
|
|
69
|
+
def namespace() -> str:
|
|
70
|
+
"""[openlineage] namespace."""
|
|
71
|
+
option = conf.get(_CONFIG_SECTION, "namespace", fallback="")
|
|
72
|
+
if not option:
|
|
73
|
+
option = os.getenv("OPENLINEAGE_NAMESPACE", "default")
|
|
74
|
+
return option
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@cache
|
|
78
|
+
def transport() -> dict[str, Any]:
|
|
79
|
+
"""[openlineage] transport."""
|
|
80
|
+
option = conf.getjson(_CONFIG_SECTION, "transport", fallback={})
|
|
81
|
+
if not isinstance(option, dict):
|
|
82
|
+
raise ValueError(f"OpenLineage transport `{option}` is not a dict")
|
|
83
|
+
return option
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@cache
|
|
87
|
+
def is_disabled() -> bool:
|
|
88
|
+
"""[openlineage] disabled + some extra checks."""
|
|
89
|
+
|
|
90
|
+
def _is_true(val):
|
|
91
|
+
return str(val).lower().strip() in ("true", "1", "t")
|
|
92
|
+
|
|
93
|
+
option = conf.get(_CONFIG_SECTION, "disabled", fallback="")
|
|
94
|
+
if _is_true(option):
|
|
95
|
+
return True
|
|
96
|
+
|
|
97
|
+
option = os.getenv("OPENLINEAGE_DISABLED", "")
|
|
98
|
+
if _is_true(option):
|
|
99
|
+
return True
|
|
100
|
+
|
|
101
|
+
# Check if both 'transport' and 'config_path' are not present and also
|
|
102
|
+
# if legacy 'OPENLINEAGE_URL' environment variables is not set
|
|
103
|
+
return transport() == {} and config_path(True) == "" and os.getenv("OPENLINEAGE_URL", "") == ""
|
|
@@ -18,12 +18,10 @@
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
20
|
from abc import ABC, abstractmethod
|
|
21
|
-
from functools import cached_property
|
|
22
21
|
from typing import TYPE_CHECKING
|
|
23
22
|
|
|
24
23
|
from attrs import Factory, define
|
|
25
24
|
|
|
26
|
-
from airflow.configuration import conf
|
|
27
25
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
28
26
|
from airflow.utils.state import TaskInstanceState
|
|
29
27
|
|
|
@@ -64,33 +62,10 @@ class BaseExtractor(ABC, LoggingMixin):
|
|
|
64
62
|
"""
|
|
65
63
|
raise NotImplementedError()
|
|
66
64
|
|
|
67
|
-
@cached_property
|
|
68
|
-
def disabled_operators(self) -> set[str]:
|
|
69
|
-
return set(
|
|
70
|
-
operator.strip() for operator in conf.get("openlineage", "disabled_for_operators").split(";")
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
@cached_property
|
|
74
|
-
def _is_operator_disabled(self) -> bool:
|
|
75
|
-
fully_qualified_class_name = (
|
|
76
|
-
self.operator.__class__.__module__ + "." + self.operator.__class__.__name__
|
|
77
|
-
)
|
|
78
|
-
return fully_qualified_class_name in self.disabled_operators
|
|
79
|
-
|
|
80
|
-
def validate(self):
|
|
81
|
-
assert self.operator.task_type in self.get_operator_classnames()
|
|
82
|
-
|
|
83
65
|
@abstractmethod
|
|
84
|
-
def _execute_extraction(self) -> OperatorLineage | None:
|
|
85
|
-
...
|
|
66
|
+
def _execute_extraction(self) -> OperatorLineage | None: ...
|
|
86
67
|
|
|
87
68
|
def extract(self) -> OperatorLineage | None:
|
|
88
|
-
if self._is_operator_disabled:
|
|
89
|
-
self.log.debug(
|
|
90
|
-
f"Skipping extraction for operator {self.operator.task_type} "
|
|
91
|
-
"due to its presence in [openlineage] openlineage_disabled_for_operators."
|
|
92
|
-
)
|
|
93
|
-
return None
|
|
94
69
|
return self._execute_extraction()
|
|
95
70
|
|
|
96
71
|
def extract_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
@@ -121,18 +96,12 @@ class DefaultExtractor(BaseExtractor):
|
|
|
121
96
|
return None
|
|
122
97
|
except AttributeError:
|
|
123
98
|
self.log.debug(
|
|
124
|
-
|
|
125
|
-
|
|
99
|
+
"Operator %s does not have the get_openlineage_facets_on_start method.",
|
|
100
|
+
self.operator.task_type,
|
|
126
101
|
)
|
|
127
102
|
return None
|
|
128
103
|
|
|
129
104
|
def extract_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
130
|
-
if self._is_operator_disabled:
|
|
131
|
-
self.log.debug(
|
|
132
|
-
f"Skipping extraction for operator {self.operator.task_type} "
|
|
133
|
-
"due to its presence in [openlineage] openlineage_disabled_for_operators."
|
|
134
|
-
)
|
|
135
|
-
return None
|
|
136
105
|
if task_instance.state == TaskInstanceState.FAILED:
|
|
137
106
|
on_failed = getattr(self.operator, "get_openlineage_facets_on_failure", None)
|
|
138
107
|
if on_failed and callable(on_failed):
|
|
@@ -19,15 +19,9 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
from openlineage.client.facet import SourceCodeJobFacet
|
|
21
21
|
|
|
22
|
+
from airflow.providers.openlineage import conf
|
|
22
23
|
from airflow.providers.openlineage.extractors.base import BaseExtractor, OperatorLineage
|
|
23
|
-
from airflow.providers.openlineage.
|
|
24
|
-
UnknownOperatorAttributeRunFacet,
|
|
25
|
-
UnknownOperatorInstance,
|
|
26
|
-
)
|
|
27
|
-
from airflow.providers.openlineage.utils.utils import (
|
|
28
|
-
get_filtered_unknown_operator_keys,
|
|
29
|
-
is_source_enabled,
|
|
30
|
-
)
|
|
24
|
+
from airflow.providers.openlineage.utils.utils import get_unknown_source_attribute_run_facet
|
|
31
25
|
|
|
32
26
|
"""
|
|
33
27
|
:meta private:
|
|
@@ -51,7 +45,7 @@ class BashExtractor(BaseExtractor):
|
|
|
51
45
|
|
|
52
46
|
def _execute_extraction(self) -> OperatorLineage | None:
|
|
53
47
|
job_facets: dict = {}
|
|
54
|
-
if is_source_enabled():
|
|
48
|
+
if conf.is_source_enabled():
|
|
55
49
|
job_facets = {
|
|
56
50
|
"sourceCode": SourceCodeJobFacet(
|
|
57
51
|
language="bash",
|
|
@@ -62,19 +56,9 @@ class BashExtractor(BaseExtractor):
|
|
|
62
56
|
|
|
63
57
|
return OperatorLineage(
|
|
64
58
|
job_facets=job_facets,
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
# directly.
|
|
69
|
-
"unknownSourceAttribute": UnknownOperatorAttributeRunFacet(
|
|
70
|
-
unknownItems=[
|
|
71
|
-
UnknownOperatorInstance(
|
|
72
|
-
name="BashOperator",
|
|
73
|
-
properties=get_filtered_unknown_operator_keys(self.operator),
|
|
74
|
-
)
|
|
75
|
-
]
|
|
76
|
-
)
|
|
77
|
-
},
|
|
59
|
+
# The BashOperator is recorded as an "unknownSource" even though we have an extractor,
|
|
60
|
+
# as the <i>data lineage</i> cannot be determined from the operator directly.
|
|
61
|
+
run_facets=get_unknown_source_attribute_run_facet(task=self.operator, name="BashOperator"),
|
|
78
62
|
)
|
|
79
63
|
|
|
80
64
|
def extract(self) -> OperatorLineage | None:
|
|
@@ -16,20 +16,15 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
-
import os
|
|
20
19
|
from contextlib import suppress
|
|
21
20
|
from typing import TYPE_CHECKING, Iterator
|
|
22
21
|
|
|
23
|
-
from airflow.
|
|
22
|
+
from airflow.providers.openlineage import conf
|
|
24
23
|
from airflow.providers.openlineage.extractors import BaseExtractor, OperatorLineage
|
|
25
24
|
from airflow.providers.openlineage.extractors.base import DefaultExtractor
|
|
26
25
|
from airflow.providers.openlineage.extractors.bash import BashExtractor
|
|
27
26
|
from airflow.providers.openlineage.extractors.python import PythonExtractor
|
|
28
|
-
from airflow.providers.openlineage.
|
|
29
|
-
UnknownOperatorAttributeRunFacet,
|
|
30
|
-
UnknownOperatorInstance,
|
|
31
|
-
)
|
|
32
|
-
from airflow.providers.openlineage.utils.utils import get_filtered_unknown_operator_keys
|
|
27
|
+
from airflow.providers.openlineage.utils.utils import get_unknown_source_attribute_run_facet
|
|
33
28
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
34
29
|
from airflow.utils.module_loading import import_string
|
|
35
30
|
|
|
@@ -65,22 +60,17 @@ class ExtractorManager(LoggingMixin):
|
|
|
65
60
|
for operator_class in extractor.get_operator_classnames():
|
|
66
61
|
self.extractors[operator_class] = extractor
|
|
67
62
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
operator_class,
|
|
80
|
-
extractor,
|
|
81
|
-
self.extractors[operator_class],
|
|
82
|
-
)
|
|
83
|
-
self.extractors[operator_class] = extractor
|
|
63
|
+
for extractor_path in conf.custom_extractors():
|
|
64
|
+
extractor: type[BaseExtractor] = try_import_from_string(extractor_path)
|
|
65
|
+
for operator_class in extractor.get_operator_classnames():
|
|
66
|
+
if operator_class in self.extractors:
|
|
67
|
+
self.log.debug(
|
|
68
|
+
"Duplicate extractor found for `%s`. `%s` will be used instead of `%s`",
|
|
69
|
+
operator_class,
|
|
70
|
+
extractor_path,
|
|
71
|
+
self.extractors[operator_class],
|
|
72
|
+
)
|
|
73
|
+
self.extractors[operator_class] = extractor
|
|
84
74
|
|
|
85
75
|
def add_extractor(self, operator_class: str, extractor: type[BaseExtractor]):
|
|
86
76
|
self.extractors[operator_class] = extractor
|
|
@@ -121,16 +111,7 @@ class ExtractorManager(LoggingMixin):
|
|
|
121
111
|
|
|
122
112
|
# Only include the unkonwnSourceAttribute facet if there is no extractor
|
|
123
113
|
task_metadata = OperatorLineage(
|
|
124
|
-
run_facets=
|
|
125
|
-
"unknownSourceAttribute": UnknownOperatorAttributeRunFacet(
|
|
126
|
-
unknownItems=[
|
|
127
|
-
UnknownOperatorInstance(
|
|
128
|
-
name=task.task_type,
|
|
129
|
-
properties=get_filtered_unknown_operator_keys(task),
|
|
130
|
-
)
|
|
131
|
-
]
|
|
132
|
-
)
|
|
133
|
-
},
|
|
114
|
+
run_facets=get_unknown_source_attribute_run_facet(task=task),
|
|
134
115
|
)
|
|
135
116
|
inlets = task.get_inlet_defs()
|
|
136
117
|
outlets = task.get_outlet_defs()
|
|
@@ -22,15 +22,9 @@ from typing import Callable
|
|
|
22
22
|
|
|
23
23
|
from openlineage.client.facet import SourceCodeJobFacet
|
|
24
24
|
|
|
25
|
+
from airflow.providers.openlineage import conf
|
|
25
26
|
from airflow.providers.openlineage.extractors.base import BaseExtractor, OperatorLineage
|
|
26
|
-
from airflow.providers.openlineage.
|
|
27
|
-
UnknownOperatorAttributeRunFacet,
|
|
28
|
-
UnknownOperatorInstance,
|
|
29
|
-
)
|
|
30
|
-
from airflow.providers.openlineage.utils.utils import (
|
|
31
|
-
get_filtered_unknown_operator_keys,
|
|
32
|
-
is_source_enabled,
|
|
33
|
-
)
|
|
27
|
+
from airflow.providers.openlineage.utils.utils import get_unknown_source_attribute_run_facet
|
|
34
28
|
|
|
35
29
|
"""
|
|
36
30
|
:meta private:
|
|
@@ -55,7 +49,7 @@ class PythonExtractor(BaseExtractor):
|
|
|
55
49
|
def _execute_extraction(self) -> OperatorLineage | None:
|
|
56
50
|
source_code = self.get_source_code(self.operator.python_callable)
|
|
57
51
|
job_facet: dict = {}
|
|
58
|
-
if is_source_enabled() and source_code:
|
|
52
|
+
if conf.is_source_enabled() and source_code:
|
|
59
53
|
job_facet = {
|
|
60
54
|
"sourceCode": SourceCodeJobFacet(
|
|
61
55
|
language="python",
|
|
@@ -65,19 +59,9 @@ class PythonExtractor(BaseExtractor):
|
|
|
65
59
|
}
|
|
66
60
|
return OperatorLineage(
|
|
67
61
|
job_facets=job_facet,
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
# directly.
|
|
72
|
-
"unknownSourceAttribute": UnknownOperatorAttributeRunFacet(
|
|
73
|
-
unknownItems=[
|
|
74
|
-
UnknownOperatorInstance(
|
|
75
|
-
name="PythonOperator",
|
|
76
|
-
properties=get_filtered_unknown_operator_keys(self.operator),
|
|
77
|
-
)
|
|
78
|
-
]
|
|
79
|
-
)
|
|
80
|
-
},
|
|
62
|
+
# The PythonOperator is recorded as an "unknownSource" even though we have an extractor,
|
|
63
|
+
# as the <i>data lineage</i> cannot be determined from the operator directly.
|
|
64
|
+
run_facets=get_unknown_source_attribute_run_facet(task=self.operator, name="PythonOperator"),
|
|
81
65
|
)
|
|
82
66
|
|
|
83
67
|
def get_source_code(self, callable: Callable) -> str | None:
|
|
@@ -28,8 +28,9 @@ def get_provider_info():
|
|
|
28
28
|
"name": "OpenLineage Airflow",
|
|
29
29
|
"description": "`OpenLineage <https://openlineage.io/>`__\n",
|
|
30
30
|
"state": "ready",
|
|
31
|
-
"source-date-epoch":
|
|
31
|
+
"source-date-epoch": 1712666247,
|
|
32
32
|
"versions": [
|
|
33
|
+
"1.7.0",
|
|
33
34
|
"1.6.0",
|
|
34
35
|
"1.5.0",
|
|
35
36
|
"1.4.0",
|
|
@@ -82,6 +83,13 @@ def get_provider_info():
|
|
|
82
83
|
"default": "",
|
|
83
84
|
"version_added": "1.1.0",
|
|
84
85
|
},
|
|
86
|
+
"selective_enable": {
|
|
87
|
+
"description": "If this setting is enabled, OpenLineage integration won't collect and emit metadata,\nunless you explicitly enable it per `DAG` or `Task` using `enable_lineage` method.\n",
|
|
88
|
+
"type": "boolean",
|
|
89
|
+
"default": "False",
|
|
90
|
+
"example": None,
|
|
91
|
+
"version_added": "1.7.0",
|
|
92
|
+
},
|
|
85
93
|
"namespace": {
|
|
86
94
|
"description": "Set namespace that the lineage data belongs to, so that if you use multiple OpenLineage producers,\nevents coming from them will be logically separated.\n",
|
|
87
95
|
"version_added": None,
|
|
@@ -16,7 +16,6 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
-
import os
|
|
20
19
|
import uuid
|
|
21
20
|
from contextlib import ExitStack
|
|
22
21
|
from typing import TYPE_CHECKING
|
|
@@ -37,8 +36,7 @@ from openlineage.client.facet import (
|
|
|
37
36
|
)
|
|
38
37
|
from openlineage.client.run import Job, Run, RunEvent, RunState
|
|
39
38
|
|
|
40
|
-
from airflow.
|
|
41
|
-
from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VERSION
|
|
39
|
+
from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VERSION, conf
|
|
42
40
|
from airflow.providers.openlineage.utils.utils import OpenLineageRedactor
|
|
43
41
|
from airflow.stats import Stats
|
|
44
42
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
@@ -48,12 +46,6 @@ if TYPE_CHECKING:
|
|
|
48
46
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
49
47
|
from airflow.utils.log.secrets_masker import SecretsMasker
|
|
50
48
|
|
|
51
|
-
_DAG_DEFAULT_NAMESPACE = "default"
|
|
52
|
-
|
|
53
|
-
_DAG_NAMESPACE = conf.get(
|
|
54
|
-
"openlineage", "namespace", fallback=os.getenv("OPENLINEAGE_NAMESPACE", _DAG_DEFAULT_NAMESPACE)
|
|
55
|
-
)
|
|
56
|
-
|
|
57
49
|
_PRODUCER = f"https://github.com/apache/airflow/tree/providers-openlineage/{OPENLINEAGE_PROVIDER_VERSION}"
|
|
58
50
|
|
|
59
51
|
set_producer(_PRODUCER)
|
|
@@ -88,18 +80,16 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
88
80
|
|
|
89
81
|
def get_openlineage_config(self) -> dict | None:
|
|
90
82
|
# First, try to read from YAML file
|
|
91
|
-
openlineage_config_path = conf.
|
|
83
|
+
openlineage_config_path = conf.config_path(check_legacy_env_var=False)
|
|
92
84
|
if openlineage_config_path:
|
|
93
85
|
config = self._read_yaml_config(openlineage_config_path)
|
|
94
86
|
if config:
|
|
95
87
|
return config.get("transport", None)
|
|
96
88
|
# Second, try to get transport config
|
|
97
|
-
|
|
98
|
-
if not
|
|
89
|
+
transport_config = conf.transport()
|
|
90
|
+
if not transport_config:
|
|
99
91
|
return None
|
|
100
|
-
|
|
101
|
-
raise ValueError(f"{transport} is not a dict")
|
|
102
|
-
return transport
|
|
92
|
+
return transport_config
|
|
103
93
|
|
|
104
94
|
def _read_yaml_config(self, path: str) -> dict | None:
|
|
105
95
|
with open(path) as config_file:
|
|
@@ -107,14 +97,14 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
107
97
|
|
|
108
98
|
@staticmethod
|
|
109
99
|
def build_dag_run_id(dag_id, dag_run_id):
|
|
110
|
-
return str(uuid.uuid3(uuid.NAMESPACE_URL, f"{
|
|
100
|
+
return str(uuid.uuid3(uuid.NAMESPACE_URL, f"{conf.namespace()}.{dag_id}.{dag_run_id}"))
|
|
111
101
|
|
|
112
102
|
@staticmethod
|
|
113
103
|
def build_task_instance_run_id(dag_id, task_id, execution_date, try_number):
|
|
114
104
|
return str(
|
|
115
105
|
uuid.uuid3(
|
|
116
106
|
uuid.NAMESPACE_URL,
|
|
117
|
-
f"{
|
|
107
|
+
f"{conf.namespace()}.{dag_id}.{task_id}.{execution_date}.{try_number}",
|
|
118
108
|
)
|
|
119
109
|
)
|
|
120
110
|
|
|
@@ -353,7 +343,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
353
343
|
if parent_run_id:
|
|
354
344
|
parent_run_facet = ParentRunFacet.create(
|
|
355
345
|
runId=parent_run_id,
|
|
356
|
-
namespace=
|
|
346
|
+
namespace=conf.namespace(),
|
|
357
347
|
name=parent_job_name or job_name,
|
|
358
348
|
)
|
|
359
349
|
facets.update(
|
|
@@ -396,4 +386,4 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
396
386
|
|
|
397
387
|
facets.update({"jobType": job_type})
|
|
398
388
|
|
|
399
|
-
return Job(
|
|
389
|
+
return Job(conf.namespace(), job_name, facets)
|
|
@@ -17,10 +17,17 @@
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
19
|
from attrs import define
|
|
20
|
+
from deprecated import deprecated
|
|
20
21
|
from openlineage.client.facet import BaseFacet
|
|
21
22
|
from openlineage.client.utils import RedactMixin
|
|
22
23
|
|
|
24
|
+
from airflow.exceptions import AirflowProviderDeprecationWarning
|
|
23
25
|
|
|
26
|
+
|
|
27
|
+
@deprecated(
|
|
28
|
+
reason="To be removed in the next release. Make sure to use information from AirflowRunFacet instead.",
|
|
29
|
+
category=AirflowProviderDeprecationWarning,
|
|
30
|
+
)
|
|
24
31
|
@define(slots=False)
|
|
25
32
|
class AirflowMappedTaskRunFacet(BaseFacet):
|
|
26
33
|
"""Run facet containing information about mapped tasks."""
|
|
@@ -66,6 +73,10 @@ class UnknownOperatorInstance(RedactMixin):
|
|
|
66
73
|
_skip_redact = ["name", "type"]
|
|
67
74
|
|
|
68
75
|
|
|
76
|
+
@deprecated(
|
|
77
|
+
reason="To be removed in the next release. Make sure to use information from AirflowRunFacet instead.",
|
|
78
|
+
category=AirflowProviderDeprecationWarning,
|
|
79
|
+
)
|
|
69
80
|
@define(slots=False)
|
|
70
81
|
class UnknownOperatorAttributeRunFacet(BaseFacet):
|
|
71
82
|
"""RunFacet that describes unknown operators in an Airflow DAG."""
|