apache-airflow-providers-openlineage 2.1.1rc1__tar.gz → 2.1.2rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/PKG-INFO +8 -8
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/README.rst +4 -4
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/pyproject.toml +5 -5
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/__init__.py +1 -1
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/extractors/base.py +37 -37
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/extractors/manager.py +21 -12
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/get_provider_info.py +2 -1
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/plugins/adapter.py +1 -1
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/plugins/listener.py +16 -5
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/plugins/macros.py +15 -4
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/utils/spark.py +64 -26
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/utils/utils.py +12 -7
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/LICENSE +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/conf.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/extractors/__init__.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/extractors/bash.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/extractors/python.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/facets/AirflowDagRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/facets/AirflowDebugRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/facets/AirflowJobFacet.json +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/facets/AirflowRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/facets/AirflowStateRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/facets/__init__.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/plugins/__init__.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/plugins/facets.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/plugins/openlineage.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/sqlparser.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/utils/__init__.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/utils/selective_enable.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/utils/sql.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1rc1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/version_compat.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.2rc1
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -27,11 +27,11 @@ Requires-Dist: attrs>=22.2
|
|
|
27
27
|
Requires-Dist: openlineage-integration-common>=1.24.2
|
|
28
28
|
Requires-Dist: openlineage-python>=1.24.2
|
|
29
29
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
30
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
31
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
30
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/changelog.html
|
|
31
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2
|
|
32
|
+
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
32
33
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
33
34
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
34
|
-
Project-URL: Twitter, https://x.com/ApacheAirflow
|
|
35
35
|
Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
36
36
|
|
|
37
37
|
|
|
@@ -59,7 +59,7 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
|
59
59
|
|
|
60
60
|
Package ``apache-airflow-providers-openlineage``
|
|
61
61
|
|
|
62
|
-
Release: ``2.1.
|
|
62
|
+
Release: ``2.1.2``
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -72,7 +72,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
72
72
|
are in ``airflow.providers.openlineage`` python package.
|
|
73
73
|
|
|
74
74
|
You can find package information and changelog for the provider
|
|
75
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
75
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/>`_.
|
|
76
76
|
|
|
77
77
|
Installation
|
|
78
78
|
------------
|
|
@@ -101,7 +101,7 @@ Cross provider package dependencies
|
|
|
101
101
|
-----------------------------------
|
|
102
102
|
|
|
103
103
|
Those are dependencies that might be needed in order to use all the features of the package.
|
|
104
|
-
You need to install the specified
|
|
104
|
+
You need to install the specified providers in order to use them.
|
|
105
105
|
|
|
106
106
|
You can install such cross-provider dependencies when installing from PyPI. For example:
|
|
107
107
|
|
|
@@ -118,5 +118,5 @@ Dependent package
|
|
|
118
118
|
================================================================================================================== =================
|
|
119
119
|
|
|
120
120
|
The changelog for the provider package can be found in the
|
|
121
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
121
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/changelog.html>`_.
|
|
122
122
|
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
|
|
24
24
|
Package ``apache-airflow-providers-openlineage``
|
|
25
25
|
|
|
26
|
-
Release: ``2.1.
|
|
26
|
+
Release: ``2.1.2``
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -36,7 +36,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
36
36
|
are in ``airflow.providers.openlineage`` python package.
|
|
37
37
|
|
|
38
38
|
You can find package information and changelog for the provider
|
|
39
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
39
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/>`_.
|
|
40
40
|
|
|
41
41
|
Installation
|
|
42
42
|
------------
|
|
@@ -65,7 +65,7 @@ Cross provider package dependencies
|
|
|
65
65
|
-----------------------------------
|
|
66
66
|
|
|
67
67
|
Those are dependencies that might be needed in order to use all the features of the package.
|
|
68
|
-
You need to install the specified
|
|
68
|
+
You need to install the specified providers in order to use them.
|
|
69
69
|
|
|
70
70
|
You can install such cross-provider dependencies when installing from PyPI. For example:
|
|
71
71
|
|
|
@@ -82,4 +82,4 @@ Dependent package
|
|
|
82
82
|
================================================================================================================== =================
|
|
83
83
|
|
|
84
84
|
The changelog for the provider package can be found in the
|
|
85
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
85
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/changelog.html>`_.
|
|
@@ -20,12 +20,12 @@
|
|
|
20
20
|
# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE
|
|
21
21
|
# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY
|
|
22
22
|
[build-system]
|
|
23
|
-
requires = ["flit_core==3.
|
|
23
|
+
requires = ["flit_core==3.12.0"]
|
|
24
24
|
build-backend = "flit_core.buildapi"
|
|
25
25
|
|
|
26
26
|
[project]
|
|
27
27
|
name = "apache-airflow-providers-openlineage"
|
|
28
|
-
version = "2.1.
|
|
28
|
+
version = "2.1.2.rc1"
|
|
29
29
|
description = "Provider package apache-airflow-providers-openlineage for Apache Airflow"
|
|
30
30
|
readme = "README.rst"
|
|
31
31
|
authors = [
|
|
@@ -87,12 +87,12 @@ apache-airflow-providers-fab = {workspace = true}
|
|
|
87
87
|
apache-airflow-providers-standard = {workspace = true}
|
|
88
88
|
|
|
89
89
|
[project.urls]
|
|
90
|
-
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
91
|
-
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
90
|
+
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2"
|
|
91
|
+
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/changelog.html"
|
|
92
92
|
"Bug Tracker" = "https://github.com/apache/airflow/issues"
|
|
93
93
|
"Source Code" = "https://github.com/apache/airflow"
|
|
94
94
|
"Slack Chat" = "https://s.apache.org/airflow-slack"
|
|
95
|
-
"
|
|
95
|
+
"Mastodon" = "https://fosstodon.org/@airflow"
|
|
96
96
|
"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/"
|
|
97
97
|
|
|
98
98
|
[project.entry-points."apache_airflow_provider"]
|
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "2.1.
|
|
32
|
+
__version__ = "2.1.2"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.9.0"
|
|
@@ -29,14 +29,16 @@ with warnings.catch_warnings():
|
|
|
29
29
|
from openlineage.client.facet import BaseFacet as BaseFacet_V1
|
|
30
30
|
from openlineage.client.facet_v2 import JobFacet, RunFacet
|
|
31
31
|
|
|
32
|
-
from airflow.providers.openlineage.utils.utils import AIRFLOW_V_2_10_PLUS
|
|
33
32
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
34
|
-
from airflow.utils.state import TaskInstanceState
|
|
35
33
|
|
|
36
34
|
# this is not to break static checks compatibility with v1 OpenLineage facet classes
|
|
37
35
|
DatasetSubclass = TypeVar("DatasetSubclass", bound=OLDataset)
|
|
38
36
|
BaseFacetSubclass = TypeVar("BaseFacetSubclass", bound=Union[BaseFacet_V1, RunFacet, JobFacet])
|
|
39
37
|
|
|
38
|
+
OL_METHOD_NAME_START = "get_openlineage_facets_on_start"
|
|
39
|
+
OL_METHOD_NAME_COMPLETE = "get_openlineage_facets_on_complete"
|
|
40
|
+
OL_METHOD_NAME_FAIL = "get_openlineage_facets_on_failure"
|
|
41
|
+
|
|
40
42
|
|
|
41
43
|
@define
|
|
42
44
|
class OperatorLineage(Generic[DatasetSubclass, BaseFacetSubclass]):
|
|
@@ -81,6 +83,9 @@ class BaseExtractor(ABC, LoggingMixin):
|
|
|
81
83
|
def extract_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
82
84
|
return self.extract()
|
|
83
85
|
|
|
86
|
+
def extract_on_failure(self, task_instance) -> OperatorLineage | None:
|
|
87
|
+
return self.extract()
|
|
88
|
+
|
|
84
89
|
|
|
85
90
|
class DefaultExtractor(BaseExtractor):
|
|
86
91
|
"""Extractor that uses `get_openlineage_facets_on_start/complete/failure` methods."""
|
|
@@ -96,46 +101,41 @@ class DefaultExtractor(BaseExtractor):
|
|
|
96
101
|
return []
|
|
97
102
|
|
|
98
103
|
def _execute_extraction(self) -> OperatorLineage | None:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
self.log.debug(
|
|
102
|
-
"Trying to execute `get_openlineage_facets_on_start` for %s.", self.operator.task_type
|
|
103
|
-
)
|
|
104
|
-
return self._get_openlineage_facets(self.operator.get_openlineage_facets_on_start) # type: ignore
|
|
105
|
-
except ImportError:
|
|
106
|
-
self.log.error(
|
|
107
|
-
"OpenLineage provider method failed to import OpenLineage integration. "
|
|
108
|
-
"This should not happen. Please report this bug to developers."
|
|
109
|
-
)
|
|
110
|
-
return None
|
|
111
|
-
except AttributeError:
|
|
104
|
+
method = getattr(self.operator, OL_METHOD_NAME_START, None)
|
|
105
|
+
if callable(method):
|
|
112
106
|
self.log.debug(
|
|
113
|
-
"
|
|
114
|
-
self.operator.task_type,
|
|
107
|
+
"Trying to execute '%s' method of '%s'.", OL_METHOD_NAME_START, self.operator.task_type
|
|
115
108
|
)
|
|
116
|
-
return
|
|
109
|
+
return self._get_openlineage_facets(method)
|
|
110
|
+
self.log.debug(
|
|
111
|
+
"Operator '%s' does not have '%s' method.", self.operator.task_type, OL_METHOD_NAME_START
|
|
112
|
+
)
|
|
113
|
+
return OperatorLineage()
|
|
117
114
|
|
|
118
115
|
def extract_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
119
|
-
|
|
120
|
-
if
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
if on_failed and callable(on_failed):
|
|
129
|
-
self.log.debug(
|
|
130
|
-
"Executing `get_openlineage_facets_on_failure` for %s.", self.operator.task_type
|
|
131
|
-
)
|
|
132
|
-
return self._get_openlineage_facets(on_failed, task_instance)
|
|
133
|
-
on_complete = getattr(self.operator, "get_openlineage_facets_on_complete", None)
|
|
134
|
-
if on_complete and callable(on_complete):
|
|
135
|
-
self.log.debug("Executing `get_openlineage_facets_on_complete` for %s.", self.operator.task_type)
|
|
136
|
-
return self._get_openlineage_facets(on_complete, task_instance)
|
|
116
|
+
method = getattr(self.operator, OL_METHOD_NAME_COMPLETE, None)
|
|
117
|
+
if callable(method):
|
|
118
|
+
self.log.debug(
|
|
119
|
+
"Trying to execute '%s' method of '%s'.", OL_METHOD_NAME_COMPLETE, self.operator.task_type
|
|
120
|
+
)
|
|
121
|
+
return self._get_openlineage_facets(method, task_instance)
|
|
122
|
+
self.log.debug(
|
|
123
|
+
"Operator '%s' does not have '%s' method.", self.operator.task_type, OL_METHOD_NAME_COMPLETE
|
|
124
|
+
)
|
|
137
125
|
return self.extract()
|
|
138
126
|
|
|
127
|
+
def extract_on_failure(self, task_instance) -> OperatorLineage | None:
|
|
128
|
+
method = getattr(self.operator, OL_METHOD_NAME_FAIL, None)
|
|
129
|
+
if callable(method):
|
|
130
|
+
self.log.debug(
|
|
131
|
+
"Trying to execute '%s' method of '%s'.", OL_METHOD_NAME_FAIL, self.operator.task_type
|
|
132
|
+
)
|
|
133
|
+
return self._get_openlineage_facets(method, task_instance)
|
|
134
|
+
self.log.debug(
|
|
135
|
+
"Operator '%s' does not have '%s' method.", self.operator.task_type, OL_METHOD_NAME_FAIL
|
|
136
|
+
)
|
|
137
|
+
return self.extract_on_complete(task_instance)
|
|
138
|
+
|
|
139
139
|
def _get_openlineage_facets(self, get_facets_method, *args) -> OperatorLineage | None:
|
|
140
140
|
try:
|
|
141
141
|
facets: OperatorLineage = get_facets_method(*args)
|
|
@@ -153,5 +153,5 @@ class DefaultExtractor(BaseExtractor):
|
|
|
153
153
|
"This should not happen."
|
|
154
154
|
)
|
|
155
155
|
except Exception:
|
|
156
|
-
self.log.warning("OpenLineage provider method failed to extract data from provider.
|
|
156
|
+
self.log.warning("OpenLineage provider method failed to extract data from provider.")
|
|
157
157
|
return None
|
|
@@ -24,7 +24,11 @@ from airflow.providers.common.compat.openlineage.utils.utils import (
|
|
|
24
24
|
)
|
|
25
25
|
from airflow.providers.openlineage import conf
|
|
26
26
|
from airflow.providers.openlineage.extractors import BaseExtractor, OperatorLineage
|
|
27
|
-
from airflow.providers.openlineage.extractors.base import
|
|
27
|
+
from airflow.providers.openlineage.extractors.base import (
|
|
28
|
+
OL_METHOD_NAME_COMPLETE,
|
|
29
|
+
OL_METHOD_NAME_START,
|
|
30
|
+
DefaultExtractor,
|
|
31
|
+
)
|
|
28
32
|
from airflow.providers.openlineage.extractors.bash import BashExtractor
|
|
29
33
|
from airflow.providers.openlineage.extractors.python import PythonExtractor
|
|
30
34
|
from airflow.providers.openlineage.utils.utils import (
|
|
@@ -32,6 +36,7 @@ from airflow.providers.openlineage.utils.utils import (
|
|
|
32
36
|
try_import_from_string,
|
|
33
37
|
)
|
|
34
38
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
39
|
+
from airflow.utils.state import TaskInstanceState
|
|
35
40
|
|
|
36
41
|
if TYPE_CHECKING:
|
|
37
42
|
from openlineage.client.event_v2 import Dataset
|
|
@@ -87,7 +92,9 @@ class ExtractorManager(LoggingMixin):
|
|
|
87
92
|
def add_extractor(self, operator_class: str, extractor: type[BaseExtractor]):
|
|
88
93
|
self.extractors[operator_class] = extractor
|
|
89
94
|
|
|
90
|
-
def extract_metadata(
|
|
95
|
+
def extract_metadata(
|
|
96
|
+
self, dagrun, task, task_instance_state: TaskInstanceState, task_instance=None
|
|
97
|
+
) -> OperatorLineage:
|
|
91
98
|
extractor = self._get_extractor(task)
|
|
92
99
|
task_info = (
|
|
93
100
|
f"task_type={task.task_type} "
|
|
@@ -104,10 +111,15 @@ class ExtractorManager(LoggingMixin):
|
|
|
104
111
|
extractor.__class__.__name__,
|
|
105
112
|
str(task_info),
|
|
106
113
|
)
|
|
107
|
-
if
|
|
108
|
-
task_metadata = extractor.extract_on_complete(task_instance)
|
|
109
|
-
else:
|
|
114
|
+
if task_instance_state == TaskInstanceState.RUNNING:
|
|
110
115
|
task_metadata = extractor.extract()
|
|
116
|
+
elif task_instance_state == TaskInstanceState.FAILED:
|
|
117
|
+
if callable(getattr(extractor, "extract_on_failure", None)):
|
|
118
|
+
task_metadata = extractor.extract_on_failure(task_instance)
|
|
119
|
+
else:
|
|
120
|
+
task_metadata = extractor.extract_on_complete(task_instance)
|
|
121
|
+
else:
|
|
122
|
+
task_metadata = extractor.extract_on_complete(task_instance)
|
|
111
123
|
|
|
112
124
|
self.log.debug(
|
|
113
125
|
"Found task metadata for operation %s: %s",
|
|
@@ -155,13 +167,9 @@ class ExtractorManager(LoggingMixin):
|
|
|
155
167
|
return self.extractors[task.task_type]
|
|
156
168
|
|
|
157
169
|
def method_exists(method_name):
|
|
158
|
-
|
|
159
|
-
if method:
|
|
160
|
-
return callable(method)
|
|
170
|
+
return callable(getattr(task, method_name, None))
|
|
161
171
|
|
|
162
|
-
if method_exists(
|
|
163
|
-
"get_openlineage_facets_on_complete"
|
|
164
|
-
):
|
|
172
|
+
if method_exists(OL_METHOD_NAME_START) or method_exists(OL_METHOD_NAME_COMPLETE):
|
|
165
173
|
return self.default_extractor
|
|
166
174
|
return None
|
|
167
175
|
|
|
@@ -191,7 +199,8 @@ class ExtractorManager(LoggingMixin):
|
|
|
191
199
|
if d:
|
|
192
200
|
task_metadata.outputs.append(d)
|
|
193
201
|
|
|
194
|
-
|
|
202
|
+
@staticmethod
|
|
203
|
+
def get_hook_lineage() -> tuple[list[Dataset], list[Dataset]] | None:
|
|
195
204
|
try:
|
|
196
205
|
from airflow.providers.common.compat.lineage.hook import (
|
|
197
206
|
get_hook_lineage_collector,
|
|
@@ -27,8 +27,9 @@ def get_provider_info():
|
|
|
27
27
|
"name": "OpenLineage Airflow",
|
|
28
28
|
"description": "`OpenLineage <https://openlineage.io/>`__\n",
|
|
29
29
|
"state": "ready",
|
|
30
|
-
"source-date-epoch":
|
|
30
|
+
"source-date-epoch": 1742478177,
|
|
31
31
|
"versions": [
|
|
32
|
+
"2.1.2",
|
|
32
33
|
"2.1.1",
|
|
33
34
|
"2.1.0",
|
|
34
35
|
"2.0.0",
|
|
@@ -85,7 +85,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
85
85
|
if config:
|
|
86
86
|
self.log.debug(
|
|
87
87
|
"OpenLineage configuration found. Transport type: `%s`",
|
|
88
|
-
config.get("type", "no type provided"),
|
|
88
|
+
config.get("transport", {}).get("type", "no type provided"),
|
|
89
89
|
)
|
|
90
90
|
self._client = OpenLineageClient(config=config) # type: ignore[call-arg]
|
|
91
91
|
else:
|
|
@@ -69,13 +69,15 @@ def _get_try_number_success(val):
|
|
|
69
69
|
|
|
70
70
|
def _executor_initializer():
|
|
71
71
|
"""
|
|
72
|
-
Initialize
|
|
72
|
+
Initialize processes for the executor used with DAGRun listener's methods (on scheduler).
|
|
73
73
|
|
|
74
74
|
This function must be picklable, so it cannot be defined as an inner method or local function.
|
|
75
75
|
|
|
76
76
|
Reconfigures the ORM engine to prevent issues that arise when multiple processes interact with
|
|
77
77
|
the Airflow database.
|
|
78
78
|
"""
|
|
79
|
+
# This initializer is used only on the scheduler
|
|
80
|
+
# We can configure_orm regardless of the Airflow version, as DB access is always allowed from scheduler.
|
|
79
81
|
settings.configure_orm()
|
|
80
82
|
|
|
81
83
|
|
|
@@ -199,7 +201,9 @@ class OpenLineageListener:
|
|
|
199
201
|
operator_name = task.task_type.lower()
|
|
200
202
|
|
|
201
203
|
with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
|
|
202
|
-
task_metadata = self.extractor_manager.extract_metadata(
|
|
204
|
+
task_metadata = self.extractor_manager.extract_metadata(
|
|
205
|
+
dagrun=dagrun, task=task, task_instance_state=TaskInstanceState.RUNNING
|
|
206
|
+
)
|
|
203
207
|
|
|
204
208
|
redacted_event = self.adapter.start_task(
|
|
205
209
|
run_id=task_uuid,
|
|
@@ -302,7 +306,10 @@ class OpenLineageListener:
|
|
|
302
306
|
|
|
303
307
|
with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
|
|
304
308
|
task_metadata = self.extractor_manager.extract_metadata(
|
|
305
|
-
dagrun
|
|
309
|
+
dagrun=dagrun,
|
|
310
|
+
task=task,
|
|
311
|
+
task_instance_state=TaskInstanceState.SUCCESS,
|
|
312
|
+
task_instance=task_instance,
|
|
306
313
|
)
|
|
307
314
|
|
|
308
315
|
redacted_event = self.adapter.complete_task(
|
|
@@ -423,7 +430,10 @@ class OpenLineageListener:
|
|
|
423
430
|
|
|
424
431
|
with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
|
|
425
432
|
task_metadata = self.extractor_manager.extract_metadata(
|
|
426
|
-
dagrun
|
|
433
|
+
dagrun=dagrun,
|
|
434
|
+
task=task,
|
|
435
|
+
task_instance_state=TaskInstanceState.FAILED,
|
|
436
|
+
task_instance=task_instance,
|
|
427
437
|
)
|
|
428
438
|
|
|
429
439
|
redacted_event = self.adapter.fail_task(
|
|
@@ -481,7 +491,8 @@ class OpenLineageListener:
|
|
|
481
491
|
self.log.debug("Process with pid %s finished - parent", pid)
|
|
482
492
|
else:
|
|
483
493
|
setproctitle(getproctitle() + " - OpenLineage - " + callable_name)
|
|
484
|
-
|
|
494
|
+
if not AIRFLOW_V_3_0_PLUS:
|
|
495
|
+
configure_orm(disable_connection_pool=True)
|
|
485
496
|
self.log.debug("Executing OpenLineage process - %s - pid %s", callable_name, os.getpid())
|
|
486
497
|
callable()
|
|
487
498
|
self.log.debug("Process with current pid finishes after %s", callable_name)
|
|
@@ -21,6 +21,7 @@ from typing import TYPE_CHECKING
|
|
|
21
21
|
from airflow.providers.openlineage import conf
|
|
22
22
|
from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter
|
|
23
23
|
from airflow.providers.openlineage.utils.utils import get_job_name
|
|
24
|
+
from airflow.providers.openlineage.version_compat import AIRFLOW_V_3_0_PLUS
|
|
24
25
|
|
|
25
26
|
if TYPE_CHECKING:
|
|
26
27
|
from airflow.models import TaskInstance
|
|
@@ -58,15 +59,25 @@ def lineage_run_id(task_instance: TaskInstance):
|
|
|
58
59
|
For more information take a look at the guide:
|
|
59
60
|
:ref:`howto/macros:openlineage`
|
|
60
61
|
"""
|
|
61
|
-
if
|
|
62
|
-
|
|
62
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
63
|
+
context = task_instance.get_template_context()
|
|
64
|
+
if hasattr(task_instance, "dag_run"):
|
|
65
|
+
dag_run = task_instance.dag_run
|
|
66
|
+
elif hasattr(context, "dag_run"):
|
|
67
|
+
dag_run = context["dag_run"]
|
|
68
|
+
if hasattr(dag_run, "logical_date") and dag_run.logical_date:
|
|
69
|
+
date = dag_run.logical_date
|
|
70
|
+
else:
|
|
71
|
+
date = dag_run.run_after
|
|
72
|
+
elif hasattr(task_instance, "logical_date"):
|
|
73
|
+
date = task_instance.logical_date
|
|
63
74
|
else:
|
|
64
|
-
|
|
75
|
+
date = task_instance.execution_date
|
|
65
76
|
return OpenLineageAdapter.build_task_instance_run_id(
|
|
66
77
|
dag_id=task_instance.dag_id,
|
|
67
78
|
task_id=task_instance.task_id,
|
|
68
79
|
try_number=task_instance.try_number,
|
|
69
|
-
logical_date=
|
|
80
|
+
logical_date=date,
|
|
70
81
|
map_index=task_instance.map_index,
|
|
71
82
|
)
|
|
72
83
|
|
|
@@ -53,35 +53,73 @@ def _get_parent_job_information_as_spark_properties(context: Context) -> dict:
|
|
|
53
53
|
|
|
54
54
|
def _get_transport_information_as_spark_properties() -> dict:
|
|
55
55
|
"""Retrieve transport information as Spark properties."""
|
|
56
|
-
transport = get_openlineage_listener().adapter.get_or_create_openlineage_client().transport
|
|
57
|
-
if transport.kind != "http":
|
|
58
|
-
log.info(
|
|
59
|
-
"OpenLineage transport type `%s` does not support automatic "
|
|
60
|
-
"injection of OpenLineage transport information into Spark properties.",
|
|
61
|
-
transport.kind,
|
|
62
|
-
)
|
|
63
|
-
return {}
|
|
64
|
-
|
|
65
|
-
properties = {
|
|
66
|
-
"spark.openlineage.transport.type": transport.kind,
|
|
67
|
-
"spark.openlineage.transport.url": transport.url,
|
|
68
|
-
"spark.openlineage.transport.endpoint": transport.endpoint,
|
|
69
|
-
"spark.openlineage.transport.timeoutInMillis": str(
|
|
70
|
-
int(transport.timeout * 1000) # convert to milliseconds, as required by Spark integration
|
|
71
|
-
),
|
|
72
|
-
}
|
|
73
|
-
if transport.compression:
|
|
74
|
-
properties["spark.openlineage.transport.compression"] = str(transport.compression)
|
|
75
56
|
|
|
76
|
-
|
|
77
|
-
properties
|
|
78
|
-
|
|
57
|
+
def _get_transport_information(tp) -> dict:
|
|
58
|
+
properties = {
|
|
59
|
+
"type": tp.kind,
|
|
60
|
+
"url": tp.url,
|
|
61
|
+
"endpoint": tp.endpoint,
|
|
62
|
+
"timeoutInMillis": str(
|
|
63
|
+
int(tp.timeout) * 1000 # convert to milliseconds, as required by Spark integration
|
|
64
|
+
),
|
|
65
|
+
}
|
|
66
|
+
if hasattr(tp, "compression") and tp.compression:
|
|
67
|
+
properties["compression"] = str(tp.compression)
|
|
68
|
+
|
|
69
|
+
if hasattr(tp.config.auth, "api_key") and tp.config.auth.get_bearer():
|
|
70
|
+
properties["auth.type"] = "api_key"
|
|
71
|
+
properties["auth.apiKey"] = tp.config.auth.get_bearer()
|
|
72
|
+
|
|
73
|
+
if hasattr(tp.config, "custom_headers") and tp.config.custom_headers:
|
|
74
|
+
for key, value in tp.config.custom_headers.items():
|
|
75
|
+
properties[f"headers.{key}"] = value
|
|
76
|
+
return properties
|
|
77
|
+
|
|
78
|
+
def _format_transport(props: dict, transport: dict, name: str | None):
|
|
79
|
+
for key, value in transport.items():
|
|
80
|
+
if name:
|
|
81
|
+
props[f"spark.openlineage.transport.transports.{name}.{key}"] = value
|
|
82
|
+
else:
|
|
83
|
+
props[f"spark.openlineage.transport.{key}"] = value
|
|
84
|
+
return props
|
|
79
85
|
|
|
80
|
-
|
|
81
|
-
for key, value in transport.config.custom_headers.items():
|
|
82
|
-
properties[f"spark.openlineage.transport.headers.{key}"] = value
|
|
86
|
+
transport = get_openlineage_listener().adapter.get_or_create_openlineage_client().transport
|
|
83
87
|
|
|
84
|
-
|
|
88
|
+
if transport.kind == "composite":
|
|
89
|
+
http_transports = {}
|
|
90
|
+
for nested_transport in transport.transports:
|
|
91
|
+
if nested_transport.kind == "http":
|
|
92
|
+
http_transports[nested_transport.name] = _get_transport_information(nested_transport)
|
|
93
|
+
else:
|
|
94
|
+
name = nested_transport.name if hasattr(nested_transport, "name") else "no-name"
|
|
95
|
+
log.info(
|
|
96
|
+
"OpenLineage transport type `%s` with name `%s` is not supported in composite transport.",
|
|
97
|
+
nested_transport.kind,
|
|
98
|
+
name,
|
|
99
|
+
)
|
|
100
|
+
if len(http_transports) == 0:
|
|
101
|
+
log.warning(
|
|
102
|
+
"OpenLineage transport type `composite` does not contain http transport. Skipping "
|
|
103
|
+
"injection of OpenLineage transport information into Spark properties.",
|
|
104
|
+
)
|
|
105
|
+
return {}
|
|
106
|
+
props = {
|
|
107
|
+
"spark.openlineage.transport.type": "composite",
|
|
108
|
+
"spark.openlineage.transport.continueOnFailure": str(transport.config.continue_on_failure),
|
|
109
|
+
}
|
|
110
|
+
for name, http_transport in http_transports.items():
|
|
111
|
+
props = _format_transport(props, http_transport, name)
|
|
112
|
+
return props
|
|
113
|
+
|
|
114
|
+
elif transport.kind == "http":
|
|
115
|
+
return _format_transport({}, _get_transport_information(transport), None)
|
|
116
|
+
|
|
117
|
+
log.info(
|
|
118
|
+
"OpenLineage transport type `%s` does not support automatic "
|
|
119
|
+
"injection of OpenLineage transport information into Spark properties.",
|
|
120
|
+
transport.kind,
|
|
121
|
+
)
|
|
122
|
+
return {}
|
|
85
123
|
|
|
86
124
|
|
|
87
125
|
def _is_parent_job_information_present_in_spark_properties(properties: dict) -> bool:
|
|
@@ -213,13 +213,7 @@ def is_ti_rescheduled_already(ti: TaskInstance, session=NEW_SESSION):
|
|
|
213
213
|
|
|
214
214
|
return (
|
|
215
215
|
session.query(
|
|
216
|
-
exists().where(
|
|
217
|
-
TaskReschedule.dag_id == ti.dag_id,
|
|
218
|
-
TaskReschedule.task_id == ti.task_id,
|
|
219
|
-
TaskReschedule.run_id == ti.run_id,
|
|
220
|
-
TaskReschedule.map_index == ti.map_index,
|
|
221
|
-
TaskReschedule.try_number == ti.try_number,
|
|
222
|
-
)
|
|
216
|
+
exists().where(TaskReschedule.ti_id == ti.id, TaskReschedule.try_number == ti.try_number)
|
|
223
217
|
).scalar()
|
|
224
218
|
is True
|
|
225
219
|
)
|
|
@@ -369,8 +363,19 @@ class DagRunInfo(InfoJsonEncodable):
|
|
|
369
363
|
"run_id",
|
|
370
364
|
"run_type",
|
|
371
365
|
"start_date",
|
|
366
|
+
"end_date",
|
|
372
367
|
]
|
|
373
368
|
|
|
369
|
+
casts = {"duration": lambda dagrun: DagRunInfo.duration(dagrun)}
|
|
370
|
+
|
|
371
|
+
@classmethod
|
|
372
|
+
def duration(cls, dagrun: DagRun) -> float | None:
|
|
373
|
+
if not getattr(dagrun, "end_date", None) or not isinstance(dagrun.end_date, datetime.datetime):
|
|
374
|
+
return None
|
|
375
|
+
if not getattr(dagrun, "start_date", None) or not isinstance(dagrun.start_date, datetime.datetime):
|
|
376
|
+
return None
|
|
377
|
+
return (dagrun.end_date - dagrun.start_date).total_seconds()
|
|
378
|
+
|
|
374
379
|
|
|
375
380
|
class TaskInstanceInfo(InfoJsonEncodable):
|
|
376
381
|
"""Defines encoding TaskInstance object to JSON."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|