apache-airflow-providers-openlineage 2.1.1__tar.gz → 2.1.2b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/PKG-INFO +7 -7
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/README.rst +3 -3
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/pyproject.toml +4 -4
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/__init__.py +1 -1
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/extractors/base.py +37 -37
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/extractors/manager.py +21 -12
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/get_provider_info.py +2 -1
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/plugins/adapter.py +1 -1
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/plugins/listener.py +15 -5
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/utils/utils.py +12 -7
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/LICENSE +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/conf.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/extractors/__init__.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/extractors/bash.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/extractors/python.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/facets/AirflowDagRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/facets/AirflowDebugRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/facets/AirflowJobFacet.json +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/facets/AirflowRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/facets/AirflowStateRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/facets/__init__.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/plugins/__init__.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/plugins/facets.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/plugins/macros.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/plugins/openlineage.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/sqlparser.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/utils/__init__.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/utils/selective_enable.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/utils/spark.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/utils/sql.py +0 -0
- {apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/src/airflow/providers/openlineage/version_compat.py +0 -0
{apache_airflow_providers_openlineage-2.1.1 → apache_airflow_providers_openlineage-2.1.2b1}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.2b1
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -27,11 +27,11 @@ Requires-Dist: attrs>=22.2
|
|
|
27
27
|
Requires-Dist: openlineage-integration-common>=1.24.2
|
|
28
28
|
Requires-Dist: openlineage-python>=1.24.2
|
|
29
29
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
30
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
31
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
30
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1/changelog.html
|
|
31
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1
|
|
32
|
+
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
32
33
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
33
34
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
34
|
-
Project-URL: Twitter, https://x.com/ApacheAirflow
|
|
35
35
|
Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
36
36
|
|
|
37
37
|
|
|
@@ -59,7 +59,7 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
|
59
59
|
|
|
60
60
|
Package ``apache-airflow-providers-openlineage``
|
|
61
61
|
|
|
62
|
-
Release: ``2.1.
|
|
62
|
+
Release: ``2.1.2b1``
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -72,7 +72,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
72
72
|
are in ``airflow.providers.openlineage`` python package.
|
|
73
73
|
|
|
74
74
|
You can find package information and changelog for the provider
|
|
75
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
75
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1/>`_.
|
|
76
76
|
|
|
77
77
|
Installation
|
|
78
78
|
------------
|
|
@@ -118,5 +118,5 @@ Dependent package
|
|
|
118
118
|
================================================================================================================== =================
|
|
119
119
|
|
|
120
120
|
The changelog for the provider package can be found in the
|
|
121
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
121
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1/changelog.html>`_.
|
|
122
122
|
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
|
|
24
24
|
Package ``apache-airflow-providers-openlineage``
|
|
25
25
|
|
|
26
|
-
Release: ``2.1.
|
|
26
|
+
Release: ``2.1.2b1``
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -36,7 +36,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
36
36
|
are in ``airflow.providers.openlineage`` python package.
|
|
37
37
|
|
|
38
38
|
You can find package information and changelog for the provider
|
|
39
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
39
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1/>`_.
|
|
40
40
|
|
|
41
41
|
Installation
|
|
42
42
|
------------
|
|
@@ -82,4 +82,4 @@ Dependent package
|
|
|
82
82
|
================================================================================================================== =================
|
|
83
83
|
|
|
84
84
|
The changelog for the provider package can be found in the
|
|
85
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
85
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1/changelog.html>`_.
|
|
@@ -25,7 +25,7 @@ build-backend = "flit_core.buildapi"
|
|
|
25
25
|
|
|
26
26
|
[project]
|
|
27
27
|
name = "apache-airflow-providers-openlineage"
|
|
28
|
-
version = "2.1.
|
|
28
|
+
version = "2.1.2b1"
|
|
29
29
|
description = "Provider package apache-airflow-providers-openlineage for Apache Airflow"
|
|
30
30
|
readme = "README.rst"
|
|
31
31
|
authors = [
|
|
@@ -87,12 +87,12 @@ apache-airflow-providers-fab = {workspace = true}
|
|
|
87
87
|
apache-airflow-providers-standard = {workspace = true}
|
|
88
88
|
|
|
89
89
|
[project.urls]
|
|
90
|
-
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
91
|
-
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
90
|
+
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1"
|
|
91
|
+
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1/changelog.html"
|
|
92
92
|
"Bug Tracker" = "https://github.com/apache/airflow/issues"
|
|
93
93
|
"Source Code" = "https://github.com/apache/airflow"
|
|
94
94
|
"Slack Chat" = "https://s.apache.org/airflow-slack"
|
|
95
|
-
"
|
|
95
|
+
"Mastodon" = "https://fosstodon.org/@airflow"
|
|
96
96
|
"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/"
|
|
97
97
|
|
|
98
98
|
[project.entry-points."apache_airflow_provider"]
|
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "2.1.
|
|
32
|
+
__version__ = "2.1.2b1"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.9.0"
|
|
@@ -29,14 +29,16 @@ with warnings.catch_warnings():
|
|
|
29
29
|
from openlineage.client.facet import BaseFacet as BaseFacet_V1
|
|
30
30
|
from openlineage.client.facet_v2 import JobFacet, RunFacet
|
|
31
31
|
|
|
32
|
-
from airflow.providers.openlineage.utils.utils import AIRFLOW_V_2_10_PLUS
|
|
33
32
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
34
|
-
from airflow.utils.state import TaskInstanceState
|
|
35
33
|
|
|
36
34
|
# this is not to break static checks compatibility with v1 OpenLineage facet classes
|
|
37
35
|
DatasetSubclass = TypeVar("DatasetSubclass", bound=OLDataset)
|
|
38
36
|
BaseFacetSubclass = TypeVar("BaseFacetSubclass", bound=Union[BaseFacet_V1, RunFacet, JobFacet])
|
|
39
37
|
|
|
38
|
+
OL_METHOD_NAME_START = "get_openlineage_facets_on_start"
|
|
39
|
+
OL_METHOD_NAME_COMPLETE = "get_openlineage_facets_on_complete"
|
|
40
|
+
OL_METHOD_NAME_FAIL = "get_openlineage_facets_on_failure"
|
|
41
|
+
|
|
40
42
|
|
|
41
43
|
@define
|
|
42
44
|
class OperatorLineage(Generic[DatasetSubclass, BaseFacetSubclass]):
|
|
@@ -81,6 +83,9 @@ class BaseExtractor(ABC, LoggingMixin):
|
|
|
81
83
|
def extract_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
82
84
|
return self.extract()
|
|
83
85
|
|
|
86
|
+
def extract_on_failure(self, task_instance) -> OperatorLineage | None:
|
|
87
|
+
return self.extract()
|
|
88
|
+
|
|
84
89
|
|
|
85
90
|
class DefaultExtractor(BaseExtractor):
|
|
86
91
|
"""Extractor that uses `get_openlineage_facets_on_start/complete/failure` methods."""
|
|
@@ -96,46 +101,41 @@ class DefaultExtractor(BaseExtractor):
|
|
|
96
101
|
return []
|
|
97
102
|
|
|
98
103
|
def _execute_extraction(self) -> OperatorLineage | None:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
self.log.debug(
|
|
102
|
-
"Trying to execute `get_openlineage_facets_on_start` for %s.", self.operator.task_type
|
|
103
|
-
)
|
|
104
|
-
return self._get_openlineage_facets(self.operator.get_openlineage_facets_on_start) # type: ignore
|
|
105
|
-
except ImportError:
|
|
106
|
-
self.log.error(
|
|
107
|
-
"OpenLineage provider method failed to import OpenLineage integration. "
|
|
108
|
-
"This should not happen. Please report this bug to developers."
|
|
109
|
-
)
|
|
110
|
-
return None
|
|
111
|
-
except AttributeError:
|
|
104
|
+
method = getattr(self.operator, OL_METHOD_NAME_START, None)
|
|
105
|
+
if callable(method):
|
|
112
106
|
self.log.debug(
|
|
113
|
-
"
|
|
114
|
-
self.operator.task_type,
|
|
107
|
+
"Trying to execute '%s' method of '%s'.", OL_METHOD_NAME_START, self.operator.task_type
|
|
115
108
|
)
|
|
116
|
-
return
|
|
109
|
+
return self._get_openlineage_facets(method)
|
|
110
|
+
self.log.debug(
|
|
111
|
+
"Operator '%s' does not have '%s' method.", self.operator.task_type, OL_METHOD_NAME_START
|
|
112
|
+
)
|
|
113
|
+
return OperatorLineage()
|
|
117
114
|
|
|
118
115
|
def extract_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
119
|
-
|
|
120
|
-
if
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
if on_failed and callable(on_failed):
|
|
129
|
-
self.log.debug(
|
|
130
|
-
"Executing `get_openlineage_facets_on_failure` for %s.", self.operator.task_type
|
|
131
|
-
)
|
|
132
|
-
return self._get_openlineage_facets(on_failed, task_instance)
|
|
133
|
-
on_complete = getattr(self.operator, "get_openlineage_facets_on_complete", None)
|
|
134
|
-
if on_complete and callable(on_complete):
|
|
135
|
-
self.log.debug("Executing `get_openlineage_facets_on_complete` for %s.", self.operator.task_type)
|
|
136
|
-
return self._get_openlineage_facets(on_complete, task_instance)
|
|
116
|
+
method = getattr(self.operator, OL_METHOD_NAME_COMPLETE, None)
|
|
117
|
+
if callable(method):
|
|
118
|
+
self.log.debug(
|
|
119
|
+
"Trying to execute '%s' method of '%s'.", OL_METHOD_NAME_COMPLETE, self.operator.task_type
|
|
120
|
+
)
|
|
121
|
+
return self._get_openlineage_facets(method, task_instance)
|
|
122
|
+
self.log.debug(
|
|
123
|
+
"Operator '%s' does not have '%s' method.", self.operator.task_type, OL_METHOD_NAME_COMPLETE
|
|
124
|
+
)
|
|
137
125
|
return self.extract()
|
|
138
126
|
|
|
127
|
+
def extract_on_failure(self, task_instance) -> OperatorLineage | None:
|
|
128
|
+
method = getattr(self.operator, OL_METHOD_NAME_FAIL, None)
|
|
129
|
+
if callable(method):
|
|
130
|
+
self.log.debug(
|
|
131
|
+
"Trying to execute '%s' method of '%s'.", OL_METHOD_NAME_FAIL, self.operator.task_type
|
|
132
|
+
)
|
|
133
|
+
return self._get_openlineage_facets(method, task_instance)
|
|
134
|
+
self.log.debug(
|
|
135
|
+
"Operator '%s' does not have '%s' method.", self.operator.task_type, OL_METHOD_NAME_FAIL
|
|
136
|
+
)
|
|
137
|
+
return self.extract_on_complete(task_instance)
|
|
138
|
+
|
|
139
139
|
def _get_openlineage_facets(self, get_facets_method, *args) -> OperatorLineage | None:
|
|
140
140
|
try:
|
|
141
141
|
facets: OperatorLineage = get_facets_method(*args)
|
|
@@ -153,5 +153,5 @@ class DefaultExtractor(BaseExtractor):
|
|
|
153
153
|
"This should not happen."
|
|
154
154
|
)
|
|
155
155
|
except Exception:
|
|
156
|
-
self.log.warning("OpenLineage provider method failed to extract data from provider.
|
|
156
|
+
self.log.warning("OpenLineage provider method failed to extract data from provider.")
|
|
157
157
|
return None
|
|
@@ -24,7 +24,11 @@ from airflow.providers.common.compat.openlineage.utils.utils import (
|
|
|
24
24
|
)
|
|
25
25
|
from airflow.providers.openlineage import conf
|
|
26
26
|
from airflow.providers.openlineage.extractors import BaseExtractor, OperatorLineage
|
|
27
|
-
from airflow.providers.openlineage.extractors.base import
|
|
27
|
+
from airflow.providers.openlineage.extractors.base import (
|
|
28
|
+
OL_METHOD_NAME_COMPLETE,
|
|
29
|
+
OL_METHOD_NAME_START,
|
|
30
|
+
DefaultExtractor,
|
|
31
|
+
)
|
|
28
32
|
from airflow.providers.openlineage.extractors.bash import BashExtractor
|
|
29
33
|
from airflow.providers.openlineage.extractors.python import PythonExtractor
|
|
30
34
|
from airflow.providers.openlineage.utils.utils import (
|
|
@@ -32,6 +36,7 @@ from airflow.providers.openlineage.utils.utils import (
|
|
|
32
36
|
try_import_from_string,
|
|
33
37
|
)
|
|
34
38
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
39
|
+
from airflow.utils.state import TaskInstanceState
|
|
35
40
|
|
|
36
41
|
if TYPE_CHECKING:
|
|
37
42
|
from openlineage.client.event_v2 import Dataset
|
|
@@ -87,7 +92,9 @@ class ExtractorManager(LoggingMixin):
|
|
|
87
92
|
def add_extractor(self, operator_class: str, extractor: type[BaseExtractor]):
|
|
88
93
|
self.extractors[operator_class] = extractor
|
|
89
94
|
|
|
90
|
-
def extract_metadata(
|
|
95
|
+
def extract_metadata(
|
|
96
|
+
self, dagrun, task, task_instance_state: TaskInstanceState, task_instance=None
|
|
97
|
+
) -> OperatorLineage:
|
|
91
98
|
extractor = self._get_extractor(task)
|
|
92
99
|
task_info = (
|
|
93
100
|
f"task_type={task.task_type} "
|
|
@@ -104,10 +111,15 @@ class ExtractorManager(LoggingMixin):
|
|
|
104
111
|
extractor.__class__.__name__,
|
|
105
112
|
str(task_info),
|
|
106
113
|
)
|
|
107
|
-
if
|
|
108
|
-
task_metadata = extractor.extract_on_complete(task_instance)
|
|
109
|
-
else:
|
|
114
|
+
if task_instance_state == TaskInstanceState.RUNNING:
|
|
110
115
|
task_metadata = extractor.extract()
|
|
116
|
+
elif task_instance_state == TaskInstanceState.FAILED:
|
|
117
|
+
if callable(getattr(extractor, "extract_on_failure", None)):
|
|
118
|
+
task_metadata = extractor.extract_on_failure(task_instance)
|
|
119
|
+
else:
|
|
120
|
+
task_metadata = extractor.extract_on_complete(task_instance)
|
|
121
|
+
else:
|
|
122
|
+
task_metadata = extractor.extract_on_complete(task_instance)
|
|
111
123
|
|
|
112
124
|
self.log.debug(
|
|
113
125
|
"Found task metadata for operation %s: %s",
|
|
@@ -155,13 +167,9 @@ class ExtractorManager(LoggingMixin):
|
|
|
155
167
|
return self.extractors[task.task_type]
|
|
156
168
|
|
|
157
169
|
def method_exists(method_name):
|
|
158
|
-
|
|
159
|
-
if method:
|
|
160
|
-
return callable(method)
|
|
170
|
+
return callable(getattr(task, method_name, None))
|
|
161
171
|
|
|
162
|
-
if method_exists(
|
|
163
|
-
"get_openlineage_facets_on_complete"
|
|
164
|
-
):
|
|
172
|
+
if method_exists(OL_METHOD_NAME_START) or method_exists(OL_METHOD_NAME_COMPLETE):
|
|
165
173
|
return self.default_extractor
|
|
166
174
|
return None
|
|
167
175
|
|
|
@@ -191,7 +199,8 @@ class ExtractorManager(LoggingMixin):
|
|
|
191
199
|
if d:
|
|
192
200
|
task_metadata.outputs.append(d)
|
|
193
201
|
|
|
194
|
-
|
|
202
|
+
@staticmethod
|
|
203
|
+
def get_hook_lineage() -> tuple[list[Dataset], list[Dataset]] | None:
|
|
195
204
|
try:
|
|
196
205
|
from airflow.providers.common.compat.lineage.hook import (
|
|
197
206
|
get_hook_lineage_collector,
|
|
@@ -27,8 +27,9 @@ def get_provider_info():
|
|
|
27
27
|
"name": "OpenLineage Airflow",
|
|
28
28
|
"description": "`OpenLineage <https://openlineage.io/>`__\n",
|
|
29
29
|
"state": "ready",
|
|
30
|
-
"source-date-epoch":
|
|
30
|
+
"source-date-epoch": 1742478177,
|
|
31
31
|
"versions": [
|
|
32
|
+
"2.1.2b1",
|
|
32
33
|
"2.1.1",
|
|
33
34
|
"2.1.0",
|
|
34
35
|
"2.0.0",
|
|
@@ -85,7 +85,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
85
85
|
if config:
|
|
86
86
|
self.log.debug(
|
|
87
87
|
"OpenLineage configuration found. Transport type: `%s`",
|
|
88
|
-
config.get("type", "no type provided"),
|
|
88
|
+
config.get("transport", {}).get("type", "no type provided"),
|
|
89
89
|
)
|
|
90
90
|
self._client = OpenLineageClient(config=config) # type: ignore[call-arg]
|
|
91
91
|
else:
|
|
@@ -76,7 +76,8 @@ def _executor_initializer():
|
|
|
76
76
|
Reconfigures the ORM engine to prevent issues that arise when multiple processes interact with
|
|
77
77
|
the Airflow database.
|
|
78
78
|
"""
|
|
79
|
-
|
|
79
|
+
if not AIRFLOW_V_3_0_PLUS:
|
|
80
|
+
settings.configure_orm()
|
|
80
81
|
|
|
81
82
|
|
|
82
83
|
class OpenLineageListener:
|
|
@@ -199,7 +200,9 @@ class OpenLineageListener:
|
|
|
199
200
|
operator_name = task.task_type.lower()
|
|
200
201
|
|
|
201
202
|
with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
|
|
202
|
-
task_metadata = self.extractor_manager.extract_metadata(
|
|
203
|
+
task_metadata = self.extractor_manager.extract_metadata(
|
|
204
|
+
dagrun=dagrun, task=task, task_instance_state=TaskInstanceState.RUNNING
|
|
205
|
+
)
|
|
203
206
|
|
|
204
207
|
redacted_event = self.adapter.start_task(
|
|
205
208
|
run_id=task_uuid,
|
|
@@ -302,7 +305,10 @@ class OpenLineageListener:
|
|
|
302
305
|
|
|
303
306
|
with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
|
|
304
307
|
task_metadata = self.extractor_manager.extract_metadata(
|
|
305
|
-
dagrun
|
|
308
|
+
dagrun=dagrun,
|
|
309
|
+
task=task,
|
|
310
|
+
task_instance_state=TaskInstanceState.SUCCESS,
|
|
311
|
+
task_instance=task_instance,
|
|
306
312
|
)
|
|
307
313
|
|
|
308
314
|
redacted_event = self.adapter.complete_task(
|
|
@@ -423,7 +429,10 @@ class OpenLineageListener:
|
|
|
423
429
|
|
|
424
430
|
with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
|
|
425
431
|
task_metadata = self.extractor_manager.extract_metadata(
|
|
426
|
-
dagrun
|
|
432
|
+
dagrun=dagrun,
|
|
433
|
+
task=task,
|
|
434
|
+
task_instance_state=TaskInstanceState.FAILED,
|
|
435
|
+
task_instance=task_instance,
|
|
427
436
|
)
|
|
428
437
|
|
|
429
438
|
redacted_event = self.adapter.fail_task(
|
|
@@ -481,7 +490,8 @@ class OpenLineageListener:
|
|
|
481
490
|
self.log.debug("Process with pid %s finished - parent", pid)
|
|
482
491
|
else:
|
|
483
492
|
setproctitle(getproctitle() + " - OpenLineage - " + callable_name)
|
|
484
|
-
|
|
493
|
+
if not AIRFLOW_V_3_0_PLUS:
|
|
494
|
+
configure_orm(disable_connection_pool=True)
|
|
485
495
|
self.log.debug("Executing OpenLineage process - %s - pid %s", callable_name, os.getpid())
|
|
486
496
|
callable()
|
|
487
497
|
self.log.debug("Process with current pid finishes after %s", callable_name)
|
|
@@ -213,13 +213,7 @@ def is_ti_rescheduled_already(ti: TaskInstance, session=NEW_SESSION):
|
|
|
213
213
|
|
|
214
214
|
return (
|
|
215
215
|
session.query(
|
|
216
|
-
exists().where(
|
|
217
|
-
TaskReschedule.dag_id == ti.dag_id,
|
|
218
|
-
TaskReschedule.task_id == ti.task_id,
|
|
219
|
-
TaskReschedule.run_id == ti.run_id,
|
|
220
|
-
TaskReschedule.map_index == ti.map_index,
|
|
221
|
-
TaskReschedule.try_number == ti.try_number,
|
|
222
|
-
)
|
|
216
|
+
exists().where(TaskReschedule.ti_id == ti.id, TaskReschedule.try_number == ti.try_number)
|
|
223
217
|
).scalar()
|
|
224
218
|
is True
|
|
225
219
|
)
|
|
@@ -369,8 +363,19 @@ class DagRunInfo(InfoJsonEncodable):
|
|
|
369
363
|
"run_id",
|
|
370
364
|
"run_type",
|
|
371
365
|
"start_date",
|
|
366
|
+
"end_date",
|
|
372
367
|
]
|
|
373
368
|
|
|
369
|
+
casts = {"duration": lambda dagrun: DagRunInfo.duration(dagrun)}
|
|
370
|
+
|
|
371
|
+
@classmethod
|
|
372
|
+
def duration(cls, dagrun: DagRun) -> float | None:
|
|
373
|
+
if not getattr(dagrun, "end_date", None) or not isinstance(dagrun.end_date, datetime.datetime):
|
|
374
|
+
return None
|
|
375
|
+
if not getattr(dagrun, "start_date", None) or not isinstance(dagrun.start_date, datetime.datetime):
|
|
376
|
+
return None
|
|
377
|
+
return (dagrun.end_date - dagrun.start_date).total_seconds()
|
|
378
|
+
|
|
374
379
|
|
|
375
380
|
class TaskInstanceInfo(InfoJsonEncodable):
|
|
376
381
|
"""Defines encoding TaskInstance object to JSON."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|