apache-airflow-providers-openlineage 1.9.1rc1__tar.gz → 1.10.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/PKG-INFO +6 -6
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/README.rst +3 -3
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/__init__.py +1 -1
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/conf.py +31 -31
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/extractors/base.py +25 -10
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/extractors/bash.py +3 -3
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/extractors/manager.py +18 -15
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/extractors/python.py +3 -3
- apache_airflow_providers_openlineage-1.10.0rc1/airflow/providers/openlineage/facets/AirflowDagRunFacet.json +105 -0
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/facets/AirflowRunFacet.json +4 -0
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/get_provider_info.py +16 -1
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/plugins/adapter.py +67 -51
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/plugins/facets.py +20 -12
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/plugins/listener.py +22 -13
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/sqlparser.py +12 -19
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/utils/sql.py +5 -5
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/utils/utils.py +132 -15
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/pyproject.toml +3 -3
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/LICENSE +0 -0
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/extractors/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/facets/AirflowJobFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/facets/AirflowStateRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/facets/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/plugins/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/plugins/macros.py +0 -0
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/plugins/openlineage.py +0 -0
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/utils/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.9.1rc1 → apache_airflow_providers_openlineage-1.10.0rc1}/airflow/providers/openlineage/utils/selective_enable.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.10.0rc1
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -28,8 +28,8 @@ Requires-Dist: openlineage-integration-common>=1.16.0
|
|
|
28
28
|
Requires-Dist: openlineage-python>=1.16.0
|
|
29
29
|
Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
|
|
30
30
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
31
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
32
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
31
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.10.0/changelog.html
|
|
32
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.10.0
|
|
33
33
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
34
34
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
35
35
|
Project-URL: Twitter, https://twitter.com/ApacheAirflow
|
|
@@ -80,7 +80,7 @@ Provides-Extra: common.sql
|
|
|
80
80
|
|
|
81
81
|
Package ``apache-airflow-providers-openlineage``
|
|
82
82
|
|
|
83
|
-
Release: ``1.
|
|
83
|
+
Release: ``1.10.0.rc1``
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -93,7 +93,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
93
93
|
are in ``airflow.providers.openlineage`` python package.
|
|
94
94
|
|
|
95
95
|
You can find package information and changelog for the provider
|
|
96
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
96
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.10.0/>`_.
|
|
97
97
|
|
|
98
98
|
Installation
|
|
99
99
|
------------
|
|
@@ -137,4 +137,4 @@ Dependent package
|
|
|
137
137
|
============================================================================================================ ==============
|
|
138
138
|
|
|
139
139
|
The changelog for the provider package can be found in the
|
|
140
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
140
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.10.0/changelog.html>`_.
|
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
|
|
43
43
|
Package ``apache-airflow-providers-openlineage``
|
|
44
44
|
|
|
45
|
-
Release: ``1.
|
|
45
|
+
Release: ``1.10.0.rc1``
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -55,7 +55,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
55
55
|
are in ``airflow.providers.openlineage`` python package.
|
|
56
56
|
|
|
57
57
|
You can find package information and changelog for the provider
|
|
58
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
58
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.10.0/>`_.
|
|
59
59
|
|
|
60
60
|
Installation
|
|
61
61
|
------------
|
|
@@ -99,4 +99,4 @@ Dependent package
|
|
|
99
99
|
============================================================================================================ ==============
|
|
100
100
|
|
|
101
101
|
The changelog for the provider package can be found in the
|
|
102
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
102
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.10.0/changelog.html>`_.
|
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "1.
|
|
32
|
+
__version__ = "1.10.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.7.0"
|
|
@@ -17,15 +17,9 @@
|
|
|
17
17
|
"""
|
|
18
18
|
This module provides functions for safely retrieving and handling OpenLineage configurations.
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
to
|
|
22
|
-
|
|
23
|
-
Any invalid configuration values should be treated as incorrect and replaced with default values.
|
|
24
|
-
For example, if the default for boolean ``custom_ol_var`` is False, any non-true value provided:
|
|
25
|
-
``"asdf"``, ``12345``, ``{"key": 1}`` or empty string, will result in False being used.
|
|
26
|
-
|
|
27
|
-
By using default values for invalid configuration values, we ensure that the configurations are handled
|
|
28
|
-
safely, preventing potential runtime errors due to conversion issues.
|
|
20
|
+
For the legacy boolean env variables `OPENLINEAGE_AIRFLOW_DISABLE_SOURCE_CODE` and `OPENLINEAGE_DISABLED`,
|
|
21
|
+
any string not equal to "true", "1", or "t" should be treated as False, to maintain backward compatibility.
|
|
22
|
+
Support for legacy variables will be removed in Airflow 3.
|
|
29
23
|
"""
|
|
30
24
|
|
|
31
25
|
from __future__ import annotations
|
|
@@ -51,13 +45,6 @@ def _is_true(arg: Any) -> bool:
|
|
|
51
45
|
return str(arg).lower().strip() in ("true", "1", "t")
|
|
52
46
|
|
|
53
47
|
|
|
54
|
-
def _safe_int_convert(arg: Any, default: int) -> int:
|
|
55
|
-
try:
|
|
56
|
-
return int(arg)
|
|
57
|
-
except (ValueError, TypeError):
|
|
58
|
-
return default
|
|
59
|
-
|
|
60
|
-
|
|
61
48
|
@cache
|
|
62
49
|
def config_path(check_legacy_env_var: bool = True) -> str:
|
|
63
50
|
"""[openlineage] config_path."""
|
|
@@ -70,11 +57,11 @@ def config_path(check_legacy_env_var: bool = True) -> str:
|
|
|
70
57
|
@cache
|
|
71
58
|
def is_source_enabled() -> bool:
|
|
72
59
|
"""[openlineage] disable_source_code."""
|
|
73
|
-
option = conf.
|
|
74
|
-
if
|
|
75
|
-
option = os.getenv("OPENLINEAGE_AIRFLOW_DISABLE_SOURCE_CODE", "")
|
|
76
|
-
# when disable_source_code is True, is_source_enabled() should be False
|
|
77
|
-
return not
|
|
60
|
+
option = conf.getboolean(_CONFIG_SECTION, "disable_source_code", fallback="False")
|
|
61
|
+
if option is False: # Check legacy variable
|
|
62
|
+
option = _is_true(os.getenv("OPENLINEAGE_AIRFLOW_DISABLE_SOURCE_CODE", ""))
|
|
63
|
+
# when disable_source_code is True, is_source_enabled() should be False; hence the "not"
|
|
64
|
+
return not option
|
|
78
65
|
|
|
79
66
|
|
|
80
67
|
@cache
|
|
@@ -87,8 +74,7 @@ def disabled_operators() -> set[str]:
|
|
|
87
74
|
@cache
|
|
88
75
|
def selective_enable() -> bool:
|
|
89
76
|
"""[openlineage] selective_enable."""
|
|
90
|
-
|
|
91
|
-
return _is_true(option)
|
|
77
|
+
return conf.getboolean(_CONFIG_SECTION, "selective_enable", fallback="False")
|
|
92
78
|
|
|
93
79
|
|
|
94
80
|
@cache
|
|
@@ -100,6 +86,17 @@ def custom_extractors() -> set[str]:
|
|
|
100
86
|
return set(extractor.strip() for extractor in option.split(";") if extractor.strip())
|
|
101
87
|
|
|
102
88
|
|
|
89
|
+
@cache
|
|
90
|
+
def custom_run_facets() -> set[str]:
|
|
91
|
+
"""[openlineage] custom_run_facets."""
|
|
92
|
+
option = conf.get(_CONFIG_SECTION, "custom_run_facets", fallback="")
|
|
93
|
+
return set(
|
|
94
|
+
custom_facet_function.strip()
|
|
95
|
+
for custom_facet_function in option.split(";")
|
|
96
|
+
if custom_facet_function.strip()
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
103
100
|
@cache
|
|
104
101
|
def namespace() -> str:
|
|
105
102
|
"""[openlineage] namespace."""
|
|
@@ -121,13 +118,12 @@ def transport() -> dict[str, Any]:
|
|
|
121
118
|
@cache
|
|
122
119
|
def is_disabled() -> bool:
|
|
123
120
|
"""[openlineage] disabled + check if any configuration is present."""
|
|
124
|
-
|
|
125
|
-
if _is_true(option):
|
|
121
|
+
if conf.getboolean(_CONFIG_SECTION, "disabled", fallback="False"):
|
|
126
122
|
return True
|
|
127
123
|
|
|
128
|
-
|
|
129
|
-
if _is_true(option):
|
|
124
|
+
if _is_true(os.getenv("OPENLINEAGE_DISABLED", "")): # Check legacy variable
|
|
130
125
|
return True
|
|
126
|
+
|
|
131
127
|
# Check if both 'transport' and 'config_path' are not present and also
|
|
132
128
|
# if legacy 'OPENLINEAGE_URL' environment variables is not set
|
|
133
129
|
return transport() == {} and config_path(True) == "" and os.getenv("OPENLINEAGE_URL", "") == ""
|
|
@@ -136,12 +132,16 @@ def is_disabled() -> bool:
|
|
|
136
132
|
@cache
|
|
137
133
|
def dag_state_change_process_pool_size() -> int:
|
|
138
134
|
"""[openlineage] dag_state_change_process_pool_size."""
|
|
139
|
-
|
|
140
|
-
return _safe_int_convert(str(option).strip(), default=1)
|
|
135
|
+
return conf.getint(_CONFIG_SECTION, "dag_state_change_process_pool_size", fallback="1")
|
|
141
136
|
|
|
142
137
|
|
|
143
138
|
@cache
|
|
144
139
|
def execution_timeout() -> int:
|
|
145
140
|
"""[openlineage] execution_timeout."""
|
|
146
|
-
|
|
147
|
-
|
|
141
|
+
return conf.getint(_CONFIG_SECTION, "execution_timeout", fallback="10")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@cache
|
|
145
|
+
def include_full_task_info() -> bool:
|
|
146
|
+
"""[openlineage] include_full_task_info."""
|
|
147
|
+
return conf.getboolean(_CONFIG_SECTION, "include_full_task_info", fallback="False")
|
|
@@ -17,27 +17,35 @@
|
|
|
17
17
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
|
+
import warnings
|
|
20
21
|
from abc import ABC, abstractmethod
|
|
21
|
-
from typing import
|
|
22
|
+
from typing import Generic, TypeVar, Union
|
|
22
23
|
|
|
23
24
|
from attrs import Factory, define
|
|
25
|
+
from openlineage.client.event_v2 import Dataset as OLDataset
|
|
24
26
|
|
|
27
|
+
with warnings.catch_warnings():
|
|
28
|
+
warnings.simplefilter("ignore", DeprecationWarning)
|
|
29
|
+
from openlineage.client.facet import BaseFacet as BaseFacet_V1
|
|
30
|
+
from openlineage.client.facet_v2 import JobFacet, RunFacet
|
|
31
|
+
|
|
32
|
+
from airflow.providers.openlineage.utils.utils import IS_AIRFLOW_2_10_OR_HIGHER
|
|
25
33
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
26
34
|
from airflow.utils.state import TaskInstanceState
|
|
27
35
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
36
|
+
# this is not to break static checks compatibility with v1 OpenLineage facet classes
|
|
37
|
+
DatasetSubclass = TypeVar("DatasetSubclass", bound=OLDataset)
|
|
38
|
+
BaseFacetSubclass = TypeVar("BaseFacetSubclass", bound=Union[BaseFacet_V1, RunFacet, JobFacet])
|
|
31
39
|
|
|
32
40
|
|
|
33
41
|
@define
|
|
34
|
-
class OperatorLineage:
|
|
42
|
+
class OperatorLineage(Generic[DatasetSubclass, BaseFacetSubclass]):
|
|
35
43
|
"""Structure returned from lineage extraction."""
|
|
36
44
|
|
|
37
|
-
inputs: list[
|
|
38
|
-
outputs: list[
|
|
39
|
-
run_facets: dict[str,
|
|
40
|
-
job_facets: dict[str,
|
|
45
|
+
inputs: list[DatasetSubclass] = Factory(list)
|
|
46
|
+
outputs: list[DatasetSubclass] = Factory(list)
|
|
47
|
+
run_facets: dict[str, BaseFacetSubclass] = Factory(dict)
|
|
48
|
+
job_facets: dict[str, BaseFacetSubclass] = Factory(dict)
|
|
41
49
|
|
|
42
50
|
|
|
43
51
|
class BaseExtractor(ABC, LoggingMixin):
|
|
@@ -108,7 +116,14 @@ class DefaultExtractor(BaseExtractor):
|
|
|
108
116
|
return None
|
|
109
117
|
|
|
110
118
|
def extract_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
111
|
-
|
|
119
|
+
failed_states = [TaskInstanceState.FAILED, TaskInstanceState.UP_FOR_RETRY]
|
|
120
|
+
if not IS_AIRFLOW_2_10_OR_HIGHER: # todo: remove when min airflow version >= 2.10.0
|
|
121
|
+
# Before fix (#41053) implemented in Airflow 2.10 TaskInstance's state was still RUNNING when
|
|
122
|
+
# being passed to listener's on_failure method. Since `extract_on_complete()` is only called
|
|
123
|
+
# after task completion, RUNNING state means that we are dealing with FAILED task in < 2.10
|
|
124
|
+
failed_states = [TaskInstanceState.RUNNING]
|
|
125
|
+
|
|
126
|
+
if task_instance.state in failed_states:
|
|
112
127
|
on_failed = getattr(self.operator, "get_openlineage_facets_on_failure", None)
|
|
113
128
|
if on_failed and callable(on_failed):
|
|
114
129
|
self.log.debug(
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
|
-
from openlineage.client.
|
|
20
|
+
from openlineage.client.facet_v2 import source_code_job
|
|
21
21
|
|
|
22
22
|
from airflow.providers.openlineage import conf
|
|
23
23
|
from airflow.providers.openlineage.extractors.base import BaseExtractor, OperatorLineage
|
|
@@ -47,10 +47,10 @@ class BashExtractor(BaseExtractor):
|
|
|
47
47
|
job_facets: dict = {}
|
|
48
48
|
if conf.is_source_enabled():
|
|
49
49
|
job_facets = {
|
|
50
|
-
"sourceCode": SourceCodeJobFacet(
|
|
50
|
+
"sourceCode": source_code_job.SourceCodeJobFacet(
|
|
51
51
|
language="bash",
|
|
52
52
|
# We're on worker and should have access to DAG files
|
|
53
|
-
|
|
53
|
+
sourceCode=self.operator.bash_command,
|
|
54
54
|
)
|
|
55
55
|
}
|
|
56
56
|
else:
|
|
@@ -30,7 +30,7 @@ from airflow.providers.openlineage.utils.utils import (
|
|
|
30
30
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
31
31
|
|
|
32
32
|
if TYPE_CHECKING:
|
|
33
|
-
from openlineage.client.
|
|
33
|
+
from openlineage.client.event_v2 import Dataset
|
|
34
34
|
|
|
35
35
|
from airflow.lineage.entities import Table
|
|
36
36
|
from airflow.models import Operator
|
|
@@ -172,7 +172,7 @@ class ExtractorManager(LoggingMixin):
|
|
|
172
172
|
def convert_to_ol_dataset_from_object_storage_uri(uri: str) -> Dataset | None:
|
|
173
173
|
from urllib.parse import urlparse
|
|
174
174
|
|
|
175
|
-
from openlineage.client.
|
|
175
|
+
from openlineage.client.event_v2 import Dataset
|
|
176
176
|
|
|
177
177
|
if "/" not in uri:
|
|
178
178
|
return None
|
|
@@ -196,20 +196,19 @@ class ExtractorManager(LoggingMixin):
|
|
|
196
196
|
|
|
197
197
|
@staticmethod
|
|
198
198
|
def convert_to_ol_dataset_from_table(table: Table) -> Dataset:
|
|
199
|
-
from openlineage.client.
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
199
|
+
from openlineage.client.event_v2 import Dataset
|
|
200
|
+
from openlineage.client.facet_v2 import (
|
|
201
|
+
DatasetFacet,
|
|
202
|
+
documentation_dataset,
|
|
203
|
+
ownership_dataset,
|
|
204
|
+
schema_dataset,
|
|
205
205
|
)
|
|
206
|
-
from openlineage.client.run import Dataset
|
|
207
206
|
|
|
208
|
-
facets: dict[str,
|
|
207
|
+
facets: dict[str, DatasetFacet] = {}
|
|
209
208
|
if table.columns:
|
|
210
|
-
facets["schema"] = SchemaDatasetFacet(
|
|
209
|
+
facets["schema"] = schema_dataset.SchemaDatasetFacet(
|
|
211
210
|
fields=[
|
|
212
|
-
|
|
211
|
+
schema_dataset.SchemaDatasetFacetFields(
|
|
213
212
|
name=column.name,
|
|
214
213
|
type=column.data_type,
|
|
215
214
|
description=column.description,
|
|
@@ -218,9 +217,9 @@ class ExtractorManager(LoggingMixin):
|
|
|
218
217
|
]
|
|
219
218
|
)
|
|
220
219
|
if table.owners:
|
|
221
|
-
facets["ownership"] = OwnershipDatasetFacet(
|
|
220
|
+
facets["ownership"] = ownership_dataset.OwnershipDatasetFacet(
|
|
222
221
|
owners=[
|
|
223
|
-
|
|
222
|
+
ownership_dataset.Owner(
|
|
224
223
|
# f.e. "user:John Doe <jdoe@company.com>" or just "user:<jdoe@company.com>"
|
|
225
224
|
name=f"user:"
|
|
226
225
|
f"{user.first_name + ' ' if user.first_name else ''}"
|
|
@@ -231,6 +230,10 @@ class ExtractorManager(LoggingMixin):
|
|
|
231
230
|
for user in table.owners
|
|
232
231
|
]
|
|
233
232
|
)
|
|
233
|
+
if table.description:
|
|
234
|
+
facets["documentation"] = documentation_dataset.DocumentationDatasetFacet(
|
|
235
|
+
description=table.description
|
|
236
|
+
)
|
|
234
237
|
return Dataset(
|
|
235
238
|
namespace=f"{table.cluster}",
|
|
236
239
|
name=f"{table.database}.{table.name}",
|
|
@@ -239,7 +242,7 @@ class ExtractorManager(LoggingMixin):
|
|
|
239
242
|
|
|
240
243
|
@staticmethod
|
|
241
244
|
def convert_to_ol_dataset(obj) -> Dataset | None:
|
|
242
|
-
from openlineage.client.
|
|
245
|
+
from openlineage.client.event_v2 import Dataset
|
|
243
246
|
|
|
244
247
|
from airflow.lineage.entities import File, Table
|
|
245
248
|
|
|
@@ -20,7 +20,7 @@ from __future__ import annotations
|
|
|
20
20
|
import inspect
|
|
21
21
|
from typing import Callable
|
|
22
22
|
|
|
23
|
-
from openlineage.client.
|
|
23
|
+
from openlineage.client.facet_v2 import source_code_job
|
|
24
24
|
|
|
25
25
|
from airflow.providers.openlineage import conf
|
|
26
26
|
from airflow.providers.openlineage.extractors.base import BaseExtractor, OperatorLineage
|
|
@@ -51,10 +51,10 @@ class PythonExtractor(BaseExtractor):
|
|
|
51
51
|
job_facet: dict = {}
|
|
52
52
|
if conf.is_source_enabled() and source_code:
|
|
53
53
|
job_facet = {
|
|
54
|
-
"sourceCode": SourceCodeJobFacet(
|
|
54
|
+
"sourceCode": source_code_job.SourceCodeJobFacet(
|
|
55
55
|
language="python",
|
|
56
56
|
# We're on worker and should have access to DAG files
|
|
57
|
-
|
|
57
|
+
sourceCode=source_code,
|
|
58
58
|
)
|
|
59
59
|
}
|
|
60
60
|
else:
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$defs": {
|
|
4
|
+
"AirflowDagRunFacet": {
|
|
5
|
+
"allOf": [
|
|
6
|
+
{
|
|
7
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"dag": {
|
|
13
|
+
"$ref": "#/$defs/DAG"
|
|
14
|
+
},
|
|
15
|
+
"dagRun": {
|
|
16
|
+
"$ref": "#/$defs/DagRun"
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"required": [
|
|
20
|
+
"dag",
|
|
21
|
+
"dagRun"
|
|
22
|
+
]
|
|
23
|
+
}
|
|
24
|
+
]
|
|
25
|
+
},
|
|
26
|
+
"DAG": {
|
|
27
|
+
"type": "object",
|
|
28
|
+
"properties": {
|
|
29
|
+
"dag_id": {
|
|
30
|
+
"type": "string"
|
|
31
|
+
},
|
|
32
|
+
"description": {
|
|
33
|
+
"type": "string"
|
|
34
|
+
},
|
|
35
|
+
"owner": {
|
|
36
|
+
"type": "string"
|
|
37
|
+
},
|
|
38
|
+
"schedule_interval": {
|
|
39
|
+
"type": "string"
|
|
40
|
+
},
|
|
41
|
+
"start_date": {
|
|
42
|
+
"type": "string",
|
|
43
|
+
"format": "date-time"
|
|
44
|
+
},
|
|
45
|
+
"tags": {
|
|
46
|
+
"type": "string"
|
|
47
|
+
},
|
|
48
|
+
"timetable": {
|
|
49
|
+
"description": "Describes timetable (successor of schedule_interval)",
|
|
50
|
+
"type": "object",
|
|
51
|
+
"additionalProperties": true
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
"additionalProperties": true,
|
|
55
|
+
"required": [
|
|
56
|
+
"dag_id",
|
|
57
|
+
"start_date"
|
|
58
|
+
]
|
|
59
|
+
},
|
|
60
|
+
"DagRun": {
|
|
61
|
+
"type": "object",
|
|
62
|
+
"properties": {
|
|
63
|
+
"conf": {
|
|
64
|
+
"type": "object",
|
|
65
|
+
"additionalProperties": true
|
|
66
|
+
},
|
|
67
|
+
"dag_id": {
|
|
68
|
+
"type": "string"
|
|
69
|
+
},
|
|
70
|
+
"data_interval_start": {
|
|
71
|
+
"type": "string",
|
|
72
|
+
"format": "date-time"
|
|
73
|
+
},
|
|
74
|
+
"data_interval_end": {
|
|
75
|
+
"type": "string",
|
|
76
|
+
"format": "date-time"
|
|
77
|
+
},
|
|
78
|
+
"external_trigger": {
|
|
79
|
+
"type": "boolean"
|
|
80
|
+
},
|
|
81
|
+
"run_id": {
|
|
82
|
+
"type": "string"
|
|
83
|
+
},
|
|
84
|
+
"run_type": {
|
|
85
|
+
"type": "string"
|
|
86
|
+
},
|
|
87
|
+
"start_date": {
|
|
88
|
+
"type": "string",
|
|
89
|
+
"format": "date-time"
|
|
90
|
+
}
|
|
91
|
+
},
|
|
92
|
+
"additionalProperties": true,
|
|
93
|
+
"required": [
|
|
94
|
+
"dag_id",
|
|
95
|
+
"run_id"
|
|
96
|
+
]
|
|
97
|
+
}
|
|
98
|
+
},
|
|
99
|
+
"type": "object",
|
|
100
|
+
"properties": {
|
|
101
|
+
"airflowDagRun": {
|
|
102
|
+
"$ref": "#/$defs/AirflowDagRunFacet"
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
@@ -28,8 +28,9 @@ def get_provider_info():
|
|
|
28
28
|
"name": "OpenLineage Airflow",
|
|
29
29
|
"description": "`OpenLineage <https://openlineage.io/>`__\n",
|
|
30
30
|
"state": "ready",
|
|
31
|
-
"source-date-epoch":
|
|
31
|
+
"source-date-epoch": 1722664661,
|
|
32
32
|
"versions": [
|
|
33
|
+
"1.10.0",
|
|
33
34
|
"1.9.1",
|
|
34
35
|
"1.9.0",
|
|
35
36
|
"1.8.0",
|
|
@@ -108,6 +109,13 @@ def get_provider_info():
|
|
|
108
109
|
"default": None,
|
|
109
110
|
"version_added": None,
|
|
110
111
|
},
|
|
112
|
+
"custom_run_facets": {
|
|
113
|
+
"description": "Register custom run facet functions by passing a string of semicolon separated full import paths.\n",
|
|
114
|
+
"type": "string",
|
|
115
|
+
"example": "full.path.to.custom_facet_function;full.path.to.another_custom_facet_function",
|
|
116
|
+
"default": "",
|
|
117
|
+
"version_added": "1.10.0",
|
|
118
|
+
},
|
|
111
119
|
"config_path": {
|
|
112
120
|
"description": "Specify the path to the YAML configuration file.\nThis ensures backwards compatibility with passing config through the `openlineage.yml` file.\n",
|
|
113
121
|
"version_added": None,
|
|
@@ -143,6 +151,13 @@ def get_provider_info():
|
|
|
143
151
|
"type": "integer",
|
|
144
152
|
"version_added": "1.9.0",
|
|
145
153
|
},
|
|
154
|
+
"include_full_task_info": {
|
|
155
|
+
"description": "If true, OpenLineage event will include full task info - potentially containing large fields.\n",
|
|
156
|
+
"default": "False",
|
|
157
|
+
"example": None,
|
|
158
|
+
"type": "boolean",
|
|
159
|
+
"version_added": "1.10.0",
|
|
160
|
+
},
|
|
146
161
|
},
|
|
147
162
|
}
|
|
148
163
|
},
|