apache-airflow-providers-openlineage 2.1.1__py3-none-any.whl → 2.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.
- airflow/providers/openlineage/__init__.py +1 -1
- airflow/providers/openlineage/extractors/base.py +42 -38
- airflow/providers/openlineage/extractors/manager.py +29 -28
- airflow/providers/openlineage/get_provider_info.py +2 -1
- airflow/providers/openlineage/plugins/adapter.py +17 -8
- airflow/providers/openlineage/plugins/listener.py +19 -6
- airflow/providers/openlineage/plugins/macros.py +15 -4
- airflow/providers/openlineage/utils/selective_enable.py +1 -1
- airflow/providers/openlineage/utils/spark.py +64 -26
- airflow/providers/openlineage/utils/utils.py +21 -2
- {apache_airflow_providers_openlineage-2.1.1.dist-info → apache_airflow_providers_openlineage-2.1.2.dist-info}/METADATA +8 -8
- {apache_airflow_providers_openlineage-2.1.1.dist-info → apache_airflow_providers_openlineage-2.1.2.dist-info}/RECORD +14 -14
- {apache_airflow_providers_openlineage-2.1.1.dist-info → apache_airflow_providers_openlineage-2.1.2.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_openlineage-2.1.1.dist-info → apache_airflow_providers_openlineage-2.1.2.dist-info}/entry_points.txt +0 -0
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "2.1.
|
|
32
|
+
__version__ = "2.1.2"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.9.0"
|
|
@@ -29,14 +29,16 @@ with warnings.catch_warnings():
|
|
|
29
29
|
from openlineage.client.facet import BaseFacet as BaseFacet_V1
|
|
30
30
|
from openlineage.client.facet_v2 import JobFacet, RunFacet
|
|
31
31
|
|
|
32
|
-
from airflow.providers.openlineage.utils.utils import AIRFLOW_V_2_10_PLUS
|
|
33
32
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
34
|
-
from airflow.utils.state import TaskInstanceState
|
|
35
33
|
|
|
36
34
|
# this is not to break static checks compatibility with v1 OpenLineage facet classes
|
|
37
35
|
DatasetSubclass = TypeVar("DatasetSubclass", bound=OLDataset)
|
|
38
36
|
BaseFacetSubclass = TypeVar("BaseFacetSubclass", bound=Union[BaseFacet_V1, RunFacet, JobFacet])
|
|
39
37
|
|
|
38
|
+
OL_METHOD_NAME_START = "get_openlineage_facets_on_start"
|
|
39
|
+
OL_METHOD_NAME_COMPLETE = "get_openlineage_facets_on_complete"
|
|
40
|
+
OL_METHOD_NAME_FAIL = "get_openlineage_facets_on_failure"
|
|
41
|
+
|
|
40
42
|
|
|
41
43
|
@define
|
|
42
44
|
class OperatorLineage(Generic[DatasetSubclass, BaseFacetSubclass]):
|
|
@@ -81,6 +83,9 @@ class BaseExtractor(ABC, LoggingMixin):
|
|
|
81
83
|
def extract_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
82
84
|
return self.extract()
|
|
83
85
|
|
|
86
|
+
def extract_on_failure(self, task_instance) -> OperatorLineage | None:
|
|
87
|
+
return self.extract_on_complete(task_instance)
|
|
88
|
+
|
|
84
89
|
|
|
85
90
|
class DefaultExtractor(BaseExtractor):
|
|
86
91
|
"""Extractor that uses `get_openlineage_facets_on_start/complete/failure` methods."""
|
|
@@ -96,46 +101,41 @@ class DefaultExtractor(BaseExtractor):
|
|
|
96
101
|
return []
|
|
97
102
|
|
|
98
103
|
def _execute_extraction(self) -> OperatorLineage | None:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
self.log.debug(
|
|
102
|
-
"Trying to execute `get_openlineage_facets_on_start` for %s.", self.operator.task_type
|
|
103
|
-
)
|
|
104
|
-
return self._get_openlineage_facets(self.operator.get_openlineage_facets_on_start) # type: ignore
|
|
105
|
-
except ImportError:
|
|
106
|
-
self.log.error(
|
|
107
|
-
"OpenLineage provider method failed to import OpenLineage integration. "
|
|
108
|
-
"This should not happen. Please report this bug to developers."
|
|
109
|
-
)
|
|
110
|
-
return None
|
|
111
|
-
except AttributeError:
|
|
104
|
+
method = getattr(self.operator, OL_METHOD_NAME_START, None)
|
|
105
|
+
if callable(method):
|
|
112
106
|
self.log.debug(
|
|
113
|
-
"
|
|
114
|
-
self.operator.task_type,
|
|
107
|
+
"Trying to execute '%s' method of '%s'.", OL_METHOD_NAME_START, self.operator.task_type
|
|
115
108
|
)
|
|
116
|
-
return
|
|
109
|
+
return self._get_openlineage_facets(method)
|
|
110
|
+
self.log.debug(
|
|
111
|
+
"Operator '%s' does not have '%s' method.", self.operator.task_type, OL_METHOD_NAME_START
|
|
112
|
+
)
|
|
113
|
+
return OperatorLineage()
|
|
117
114
|
|
|
118
115
|
def extract_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
119
|
-
|
|
120
|
-
if
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
if on_failed and callable(on_failed):
|
|
129
|
-
self.log.debug(
|
|
130
|
-
"Executing `get_openlineage_facets_on_failure` for %s.", self.operator.task_type
|
|
131
|
-
)
|
|
132
|
-
return self._get_openlineage_facets(on_failed, task_instance)
|
|
133
|
-
on_complete = getattr(self.operator, "get_openlineage_facets_on_complete", None)
|
|
134
|
-
if on_complete and callable(on_complete):
|
|
135
|
-
self.log.debug("Executing `get_openlineage_facets_on_complete` for %s.", self.operator.task_type)
|
|
136
|
-
return self._get_openlineage_facets(on_complete, task_instance)
|
|
116
|
+
method = getattr(self.operator, OL_METHOD_NAME_COMPLETE, None)
|
|
117
|
+
if callable(method):
|
|
118
|
+
self.log.debug(
|
|
119
|
+
"Trying to execute '%s' method of '%s'.", OL_METHOD_NAME_COMPLETE, self.operator.task_type
|
|
120
|
+
)
|
|
121
|
+
return self._get_openlineage_facets(method, task_instance)
|
|
122
|
+
self.log.debug(
|
|
123
|
+
"Operator '%s' does not have '%s' method.", self.operator.task_type, OL_METHOD_NAME_COMPLETE
|
|
124
|
+
)
|
|
137
125
|
return self.extract()
|
|
138
126
|
|
|
127
|
+
def extract_on_failure(self, task_instance) -> OperatorLineage | None:
|
|
128
|
+
method = getattr(self.operator, OL_METHOD_NAME_FAIL, None)
|
|
129
|
+
if callable(method):
|
|
130
|
+
self.log.debug(
|
|
131
|
+
"Trying to execute '%s' method of '%s'.", OL_METHOD_NAME_FAIL, self.operator.task_type
|
|
132
|
+
)
|
|
133
|
+
return self._get_openlineage_facets(method, task_instance)
|
|
134
|
+
self.log.debug(
|
|
135
|
+
"Operator '%s' does not have '%s' method.", self.operator.task_type, OL_METHOD_NAME_FAIL
|
|
136
|
+
)
|
|
137
|
+
return self.extract_on_complete(task_instance)
|
|
138
|
+
|
|
139
139
|
def _get_openlineage_facets(self, get_facets_method, *args) -> OperatorLineage | None:
|
|
140
140
|
try:
|
|
141
141
|
facets: OperatorLineage = get_facets_method(*args)
|
|
@@ -152,6 +152,10 @@ class DefaultExtractor(BaseExtractor):
|
|
|
152
152
|
"OpenLineage provider method failed to import OpenLineage integration. "
|
|
153
153
|
"This should not happen."
|
|
154
154
|
)
|
|
155
|
-
except Exception:
|
|
156
|
-
self.log.warning(
|
|
155
|
+
except Exception as e:
|
|
156
|
+
self.log.warning(
|
|
157
|
+
"OpenLineage method failed to extract data from Operator with the following exception: `%s`",
|
|
158
|
+
e,
|
|
159
|
+
)
|
|
160
|
+
self.log.debug("OpenLineage extraction failure details:", exc_info=True)
|
|
157
161
|
return None
|
|
@@ -24,7 +24,11 @@ from airflow.providers.common.compat.openlineage.utils.utils import (
|
|
|
24
24
|
)
|
|
25
25
|
from airflow.providers.openlineage import conf
|
|
26
26
|
from airflow.providers.openlineage.extractors import BaseExtractor, OperatorLineage
|
|
27
|
-
from airflow.providers.openlineage.extractors.base import
|
|
27
|
+
from airflow.providers.openlineage.extractors.base import (
|
|
28
|
+
OL_METHOD_NAME_COMPLETE,
|
|
29
|
+
OL_METHOD_NAME_START,
|
|
30
|
+
DefaultExtractor,
|
|
31
|
+
)
|
|
28
32
|
from airflow.providers.openlineage.extractors.bash import BashExtractor
|
|
29
33
|
from airflow.providers.openlineage.extractors.python import PythonExtractor
|
|
30
34
|
from airflow.providers.openlineage.utils.utils import (
|
|
@@ -32,6 +36,7 @@ from airflow.providers.openlineage.utils.utils import (
|
|
|
32
36
|
try_import_from_string,
|
|
33
37
|
)
|
|
34
38
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
39
|
+
from airflow.utils.state import TaskInstanceState
|
|
35
40
|
|
|
36
41
|
if TYPE_CHECKING:
|
|
37
42
|
from openlineage.client.event_v2 import Dataset
|
|
@@ -87,7 +92,9 @@ class ExtractorManager(LoggingMixin):
|
|
|
87
92
|
def add_extractor(self, operator_class: str, extractor: type[BaseExtractor]):
|
|
88
93
|
self.extractors[operator_class] = extractor
|
|
89
94
|
|
|
90
|
-
def extract_metadata(
|
|
95
|
+
def extract_metadata(
|
|
96
|
+
self, dagrun, task, task_instance_state: TaskInstanceState, task_instance=None
|
|
97
|
+
) -> OperatorLineage:
|
|
91
98
|
extractor = self._get_extractor(task)
|
|
92
99
|
task_info = (
|
|
93
100
|
f"task_type={task.task_type} "
|
|
@@ -104,10 +111,15 @@ class ExtractorManager(LoggingMixin):
|
|
|
104
111
|
extractor.__class__.__name__,
|
|
105
112
|
str(task_info),
|
|
106
113
|
)
|
|
107
|
-
if
|
|
108
|
-
task_metadata = extractor.extract_on_complete(task_instance)
|
|
109
|
-
else:
|
|
114
|
+
if task_instance_state == TaskInstanceState.RUNNING:
|
|
110
115
|
task_metadata = extractor.extract()
|
|
116
|
+
elif task_instance_state == TaskInstanceState.FAILED:
|
|
117
|
+
if callable(getattr(extractor, "extract_on_failure", None)):
|
|
118
|
+
task_metadata = extractor.extract_on_failure(task_instance)
|
|
119
|
+
else:
|
|
120
|
+
task_metadata = extractor.extract_on_complete(task_instance)
|
|
121
|
+
else:
|
|
122
|
+
task_metadata = extractor.extract_on_complete(task_instance)
|
|
111
123
|
|
|
112
124
|
self.log.debug(
|
|
113
125
|
"Found task metadata for operation %s: %s",
|
|
@@ -122,7 +134,7 @@ class ExtractorManager(LoggingMixin):
|
|
|
122
134
|
task_metadata.inputs = inputs
|
|
123
135
|
task_metadata.outputs = outputs
|
|
124
136
|
else:
|
|
125
|
-
self.extract_inlets_and_outlets(task_metadata, task
|
|
137
|
+
self.extract_inlets_and_outlets(task_metadata, task)
|
|
126
138
|
return task_metadata
|
|
127
139
|
|
|
128
140
|
except Exception as e:
|
|
@@ -132,6 +144,7 @@ class ExtractorManager(LoggingMixin):
|
|
|
132
144
|
e,
|
|
133
145
|
task_info,
|
|
134
146
|
)
|
|
147
|
+
self.log.debug("OpenLineage extraction failure details:", exc_info=True)
|
|
135
148
|
elif (hook_lineage := self.get_hook_lineage()) is not None:
|
|
136
149
|
inputs, outputs = hook_lineage
|
|
137
150
|
task_metadata = OperatorLineage(inputs=inputs, outputs=outputs)
|
|
@@ -143,9 +156,7 @@ class ExtractorManager(LoggingMixin):
|
|
|
143
156
|
task_metadata = OperatorLineage(
|
|
144
157
|
run_facets=get_unknown_source_attribute_run_facet(task=task),
|
|
145
158
|
)
|
|
146
|
-
|
|
147
|
-
outlets = task.get_outlet_defs()
|
|
148
|
-
self.extract_inlets_and_outlets(task_metadata, inlets, outlets)
|
|
159
|
+
self.extract_inlets_and_outlets(task_metadata, task)
|
|
149
160
|
return task_metadata
|
|
150
161
|
|
|
151
162
|
return OperatorLineage()
|
|
@@ -155,13 +166,9 @@ class ExtractorManager(LoggingMixin):
|
|
|
155
166
|
return self.extractors[task.task_type]
|
|
156
167
|
|
|
157
168
|
def method_exists(method_name):
|
|
158
|
-
|
|
159
|
-
if method:
|
|
160
|
-
return callable(method)
|
|
169
|
+
return callable(getattr(task, method_name, None))
|
|
161
170
|
|
|
162
|
-
if method_exists(
|
|
163
|
-
"get_openlineage_facets_on_complete"
|
|
164
|
-
):
|
|
171
|
+
if method_exists(OL_METHOD_NAME_START) or method_exists(OL_METHOD_NAME_COMPLETE):
|
|
165
172
|
return self.default_extractor
|
|
166
173
|
return None
|
|
167
174
|
|
|
@@ -174,28 +181,21 @@ class ExtractorManager(LoggingMixin):
|
|
|
174
181
|
return extractor(task)
|
|
175
182
|
return None
|
|
176
183
|
|
|
177
|
-
def extract_inlets_and_outlets(
|
|
178
|
-
|
|
179
|
-
task_metadata: OperatorLineage,
|
|
180
|
-
inlets: list,
|
|
181
|
-
outlets: list,
|
|
182
|
-
):
|
|
183
|
-
if inlets or outlets:
|
|
184
|
+
def extract_inlets_and_outlets(self, task_metadata: OperatorLineage, task) -> None:
|
|
185
|
+
if task.inlets or task.outlets:
|
|
184
186
|
self.log.debug("Manually extracting lineage metadata from inlets and outlets")
|
|
185
|
-
for i in inlets:
|
|
187
|
+
for i in task.inlets:
|
|
186
188
|
d = self.convert_to_ol_dataset(i)
|
|
187
189
|
if d:
|
|
188
190
|
task_metadata.inputs.append(d)
|
|
189
|
-
for o in outlets:
|
|
191
|
+
for o in task.outlets:
|
|
190
192
|
d = self.convert_to_ol_dataset(o)
|
|
191
193
|
if d:
|
|
192
194
|
task_metadata.outputs.append(d)
|
|
193
195
|
|
|
194
196
|
def get_hook_lineage(self) -> tuple[list[Dataset], list[Dataset]] | None:
|
|
195
197
|
try:
|
|
196
|
-
from airflow.providers.common.compat.lineage.hook import
|
|
197
|
-
get_hook_lineage_collector,
|
|
198
|
-
)
|
|
198
|
+
from airflow.providers.common.compat.lineage.hook import get_hook_lineage_collector
|
|
199
199
|
except ImportError:
|
|
200
200
|
return None
|
|
201
201
|
|
|
@@ -204,6 +204,7 @@ class ExtractorManager(LoggingMixin):
|
|
|
204
204
|
if not get_hook_lineage_collector().has_collected:
|
|
205
205
|
return None
|
|
206
206
|
|
|
207
|
+
self.log.debug("OpenLineage will extract lineage from Hook Lineage Collector.")
|
|
207
208
|
return (
|
|
208
209
|
[
|
|
209
210
|
asset
|
|
@@ -313,5 +314,5 @@ class ExtractorManager(LoggingMixin):
|
|
|
313
314
|
job_facets=task_metadata.job_facets,
|
|
314
315
|
)
|
|
315
316
|
except AttributeError:
|
|
316
|
-
self.log.warning("
|
|
317
|
+
self.log.warning("OpenLineage extractor returns non-valid metadata: `%s`", task_metadata)
|
|
317
318
|
return None
|
|
@@ -27,8 +27,9 @@ def get_provider_info():
|
|
|
27
27
|
"name": "OpenLineage Airflow",
|
|
28
28
|
"description": "`OpenLineage <https://openlineage.io/>`__\n",
|
|
29
29
|
"state": "ready",
|
|
30
|
-
"source-date-epoch":
|
|
30
|
+
"source-date-epoch": 1743477859,
|
|
31
31
|
"versions": [
|
|
32
|
+
"2.1.2",
|
|
32
33
|
"2.1.1",
|
|
33
34
|
"2.1.0",
|
|
34
35
|
"2.0.0",
|
|
@@ -85,7 +85,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
85
85
|
if config:
|
|
86
86
|
self.log.debug(
|
|
87
87
|
"OpenLineage configuration found. Transport type: `%s`",
|
|
88
|
-
config.get("type", "no type provided"),
|
|
88
|
+
config.get("transport", {}).get("type", "no type provided"),
|
|
89
89
|
)
|
|
90
90
|
self._client = OpenLineageClient(config=config) # type: ignore[call-arg]
|
|
91
91
|
else:
|
|
@@ -159,11 +159,20 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
159
159
|
stack.enter_context(Stats.timer(f"ol.emit.attempts.{event_type}.{transport_type}"))
|
|
160
160
|
stack.enter_context(Stats.timer("ol.emit.attempts"))
|
|
161
161
|
self._client.emit(redacted_event)
|
|
162
|
-
self.log.
|
|
163
|
-
|
|
162
|
+
self.log.info(
|
|
163
|
+
"Successfully emitted OpenLineage `%s` event of id `%s`",
|
|
164
|
+
event_type.upper(),
|
|
165
|
+
event.run.runId,
|
|
166
|
+
)
|
|
167
|
+
except Exception as e:
|
|
164
168
|
Stats.incr("ol.emit.failed")
|
|
165
|
-
self.log.warning(
|
|
166
|
-
|
|
169
|
+
self.log.warning(
|
|
170
|
+
"Failed to emit OpenLineage `%s` event of id `%s` with the following exception: `%s`",
|
|
171
|
+
event_type.upper(),
|
|
172
|
+
event.run.runId,
|
|
173
|
+
e,
|
|
174
|
+
)
|
|
175
|
+
self.log.debug("OpenLineage emission failure details:", exc_info=True)
|
|
167
176
|
|
|
168
177
|
return redacted_event
|
|
169
178
|
|
|
@@ -371,7 +380,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
371
380
|
# Catch all exceptions to prevent ProcessPoolExecutor from silently swallowing them.
|
|
372
381
|
# This ensures that any unexpected exceptions are logged for debugging purposes.
|
|
373
382
|
# This part cannot be wrapped to deduplicate code, otherwise the method cannot be pickled in multiprocessing.
|
|
374
|
-
self.log.warning("Failed to emit DAG started event: \n %s", traceback.format_exc())
|
|
383
|
+
self.log.warning("Failed to emit OpenLineage DAG started event: \n %s", traceback.format_exc())
|
|
375
384
|
|
|
376
385
|
def dag_success(
|
|
377
386
|
self,
|
|
@@ -409,7 +418,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
409
418
|
# Catch all exceptions to prevent ProcessPoolExecutor from silently swallowing them.
|
|
410
419
|
# This ensures that any unexpected exceptions are logged for debugging purposes.
|
|
411
420
|
# This part cannot be wrapped to deduplicate code, otherwise the method cannot be pickled in multiprocessing.
|
|
412
|
-
self.log.warning("Failed to emit DAG success event: \n %s", traceback.format_exc())
|
|
421
|
+
self.log.warning("Failed to emit OpenLineage DAG success event: \n %s", traceback.format_exc())
|
|
413
422
|
|
|
414
423
|
def dag_failed(
|
|
415
424
|
self,
|
|
@@ -453,7 +462,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
453
462
|
# Catch all exceptions to prevent ProcessPoolExecutor from silently swallowing them.
|
|
454
463
|
# This ensures that any unexpected exceptions are logged for debugging purposes.
|
|
455
464
|
# This part cannot be wrapped to deduplicate code, otherwise the method cannot be pickled in multiprocessing.
|
|
456
|
-
self.log.warning("Failed to emit DAG failed event: \n %s", traceback.format_exc())
|
|
465
|
+
self.log.warning("Failed to emit OpenLineage DAG failed event: \n %s", traceback.format_exc())
|
|
457
466
|
|
|
458
467
|
@staticmethod
|
|
459
468
|
def _build_run(
|
|
@@ -69,13 +69,15 @@ def _get_try_number_success(val):
|
|
|
69
69
|
|
|
70
70
|
def _executor_initializer():
|
|
71
71
|
"""
|
|
72
|
-
Initialize
|
|
72
|
+
Initialize processes for the executor used with DAGRun listener's methods (on scheduler).
|
|
73
73
|
|
|
74
74
|
This function must be picklable, so it cannot be defined as an inner method or local function.
|
|
75
75
|
|
|
76
76
|
Reconfigures the ORM engine to prevent issues that arise when multiple processes interact with
|
|
77
77
|
the Airflow database.
|
|
78
78
|
"""
|
|
79
|
+
# This initializer is used only on the scheduler
|
|
80
|
+
# We can configure_orm regardless of the Airflow version, as DB access is always allowed from scheduler.
|
|
79
81
|
settings.configure_orm()
|
|
80
82
|
|
|
81
83
|
|
|
@@ -199,7 +201,9 @@ class OpenLineageListener:
|
|
|
199
201
|
operator_name = task.task_type.lower()
|
|
200
202
|
|
|
201
203
|
with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
|
|
202
|
-
task_metadata = self.extractor_manager.extract_metadata(
|
|
204
|
+
task_metadata = self.extractor_manager.extract_metadata(
|
|
205
|
+
dagrun=dagrun, task=task, task_instance_state=TaskInstanceState.RUNNING
|
|
206
|
+
)
|
|
203
207
|
|
|
204
208
|
redacted_event = self.adapter.start_task(
|
|
205
209
|
run_id=task_uuid,
|
|
@@ -302,7 +306,10 @@ class OpenLineageListener:
|
|
|
302
306
|
|
|
303
307
|
with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
|
|
304
308
|
task_metadata = self.extractor_manager.extract_metadata(
|
|
305
|
-
dagrun
|
|
309
|
+
dagrun=dagrun,
|
|
310
|
+
task=task,
|
|
311
|
+
task_instance_state=TaskInstanceState.SUCCESS,
|
|
312
|
+
task_instance=task_instance,
|
|
306
313
|
)
|
|
307
314
|
|
|
308
315
|
redacted_event = self.adapter.complete_task(
|
|
@@ -423,7 +430,10 @@ class OpenLineageListener:
|
|
|
423
430
|
|
|
424
431
|
with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
|
|
425
432
|
task_metadata = self.extractor_manager.extract_metadata(
|
|
426
|
-
dagrun
|
|
433
|
+
dagrun=dagrun,
|
|
434
|
+
task=task,
|
|
435
|
+
task_instance_state=TaskInstanceState.FAILED,
|
|
436
|
+
task_instance=task_instance,
|
|
427
437
|
)
|
|
428
438
|
|
|
429
439
|
redacted_event = self.adapter.fail_task(
|
|
@@ -472,7 +482,9 @@ class OpenLineageListener:
|
|
|
472
482
|
process.wait(conf.execution_timeout())
|
|
473
483
|
except psutil.TimeoutExpired:
|
|
474
484
|
self.log.warning(
|
|
475
|
-
"OpenLineage process
|
|
485
|
+
"OpenLineage process with pid `%s` expired and will be terminated by listener. "
|
|
486
|
+
"This has no impact on actual task execution status.",
|
|
487
|
+
pid,
|
|
476
488
|
)
|
|
477
489
|
self._terminate_with_wait(process)
|
|
478
490
|
except BaseException:
|
|
@@ -481,7 +493,8 @@ class OpenLineageListener:
|
|
|
481
493
|
self.log.debug("Process with pid %s finished - parent", pid)
|
|
482
494
|
else:
|
|
483
495
|
setproctitle(getproctitle() + " - OpenLineage - " + callable_name)
|
|
484
|
-
|
|
496
|
+
if not AIRFLOW_V_3_0_PLUS:
|
|
497
|
+
configure_orm(disable_connection_pool=True)
|
|
485
498
|
self.log.debug("Executing OpenLineage process - %s - pid %s", callable_name, os.getpid())
|
|
486
499
|
callable()
|
|
487
500
|
self.log.debug("Process with current pid finishes after %s", callable_name)
|
|
@@ -21,6 +21,7 @@ from typing import TYPE_CHECKING
|
|
|
21
21
|
from airflow.providers.openlineage import conf
|
|
22
22
|
from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter
|
|
23
23
|
from airflow.providers.openlineage.utils.utils import get_job_name
|
|
24
|
+
from airflow.providers.openlineage.version_compat import AIRFLOW_V_3_0_PLUS
|
|
24
25
|
|
|
25
26
|
if TYPE_CHECKING:
|
|
26
27
|
from airflow.models import TaskInstance
|
|
@@ -58,15 +59,25 @@ def lineage_run_id(task_instance: TaskInstance):
|
|
|
58
59
|
For more information take a look at the guide:
|
|
59
60
|
:ref:`howto/macros:openlineage`
|
|
60
61
|
"""
|
|
61
|
-
if
|
|
62
|
-
|
|
62
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
63
|
+
context = task_instance.get_template_context()
|
|
64
|
+
if hasattr(task_instance, "dag_run"):
|
|
65
|
+
dag_run = task_instance.dag_run
|
|
66
|
+
elif hasattr(context, "dag_run"):
|
|
67
|
+
dag_run = context["dag_run"]
|
|
68
|
+
if hasattr(dag_run, "logical_date") and dag_run.logical_date:
|
|
69
|
+
date = dag_run.logical_date
|
|
70
|
+
else:
|
|
71
|
+
date = dag_run.run_after
|
|
72
|
+
elif hasattr(task_instance, "logical_date"):
|
|
73
|
+
date = task_instance.logical_date
|
|
63
74
|
else:
|
|
64
|
-
|
|
75
|
+
date = task_instance.execution_date
|
|
65
76
|
return OpenLineageAdapter.build_task_instance_run_id(
|
|
66
77
|
dag_id=task_instance.dag_id,
|
|
67
78
|
task_id=task_instance.task_id,
|
|
68
79
|
try_number=task_instance.try_number,
|
|
69
|
-
logical_date=
|
|
80
|
+
logical_date=date,
|
|
70
81
|
map_index=task_instance.map_index,
|
|
71
82
|
)
|
|
72
83
|
|
|
@@ -38,7 +38,7 @@ DISABLE_OL_PARAM = Param(False, const=False)
|
|
|
38
38
|
T = TypeVar("T", bound="DAG | Operator")
|
|
39
39
|
|
|
40
40
|
if TYPE_CHECKING:
|
|
41
|
-
from airflow.sdk.
|
|
41
|
+
from airflow.sdk.bases.operator import BaseOperator as SdkBaseOperator
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
log = logging.getLogger(__name__)
|
|
@@ -53,35 +53,73 @@ def _get_parent_job_information_as_spark_properties(context: Context) -> dict:
|
|
|
53
53
|
|
|
54
54
|
def _get_transport_information_as_spark_properties() -> dict:
|
|
55
55
|
"""Retrieve transport information as Spark properties."""
|
|
56
|
-
transport = get_openlineage_listener().adapter.get_or_create_openlineage_client().transport
|
|
57
|
-
if transport.kind != "http":
|
|
58
|
-
log.info(
|
|
59
|
-
"OpenLineage transport type `%s` does not support automatic "
|
|
60
|
-
"injection of OpenLineage transport information into Spark properties.",
|
|
61
|
-
transport.kind,
|
|
62
|
-
)
|
|
63
|
-
return {}
|
|
64
|
-
|
|
65
|
-
properties = {
|
|
66
|
-
"spark.openlineage.transport.type": transport.kind,
|
|
67
|
-
"spark.openlineage.transport.url": transport.url,
|
|
68
|
-
"spark.openlineage.transport.endpoint": transport.endpoint,
|
|
69
|
-
"spark.openlineage.transport.timeoutInMillis": str(
|
|
70
|
-
int(transport.timeout * 1000) # convert to milliseconds, as required by Spark integration
|
|
71
|
-
),
|
|
72
|
-
}
|
|
73
|
-
if transport.compression:
|
|
74
|
-
properties["spark.openlineage.transport.compression"] = str(transport.compression)
|
|
75
56
|
|
|
76
|
-
|
|
77
|
-
properties
|
|
78
|
-
|
|
57
|
+
def _get_transport_information(tp) -> dict:
|
|
58
|
+
properties = {
|
|
59
|
+
"type": tp.kind,
|
|
60
|
+
"url": tp.url,
|
|
61
|
+
"endpoint": tp.endpoint,
|
|
62
|
+
"timeoutInMillis": str(
|
|
63
|
+
int(tp.timeout) * 1000 # convert to milliseconds, as required by Spark integration
|
|
64
|
+
),
|
|
65
|
+
}
|
|
66
|
+
if hasattr(tp, "compression") and tp.compression:
|
|
67
|
+
properties["compression"] = str(tp.compression)
|
|
68
|
+
|
|
69
|
+
if hasattr(tp.config.auth, "api_key") and tp.config.auth.get_bearer():
|
|
70
|
+
properties["auth.type"] = "api_key"
|
|
71
|
+
properties["auth.apiKey"] = tp.config.auth.get_bearer()
|
|
72
|
+
|
|
73
|
+
if hasattr(tp.config, "custom_headers") and tp.config.custom_headers:
|
|
74
|
+
for key, value in tp.config.custom_headers.items():
|
|
75
|
+
properties[f"headers.{key}"] = value
|
|
76
|
+
return properties
|
|
77
|
+
|
|
78
|
+
def _format_transport(props: dict, transport: dict, name: str | None):
|
|
79
|
+
for key, value in transport.items():
|
|
80
|
+
if name:
|
|
81
|
+
props[f"spark.openlineage.transport.transports.{name}.{key}"] = value
|
|
82
|
+
else:
|
|
83
|
+
props[f"spark.openlineage.transport.{key}"] = value
|
|
84
|
+
return props
|
|
79
85
|
|
|
80
|
-
|
|
81
|
-
for key, value in transport.config.custom_headers.items():
|
|
82
|
-
properties[f"spark.openlineage.transport.headers.{key}"] = value
|
|
86
|
+
transport = get_openlineage_listener().adapter.get_or_create_openlineage_client().transport
|
|
83
87
|
|
|
84
|
-
|
|
88
|
+
if transport.kind == "composite":
|
|
89
|
+
http_transports = {}
|
|
90
|
+
for nested_transport in transport.transports:
|
|
91
|
+
if nested_transport.kind == "http":
|
|
92
|
+
http_transports[nested_transport.name] = _get_transport_information(nested_transport)
|
|
93
|
+
else:
|
|
94
|
+
name = nested_transport.name if hasattr(nested_transport, "name") else "no-name"
|
|
95
|
+
log.info(
|
|
96
|
+
"OpenLineage transport type `%s` with name `%s` is not supported in composite transport.",
|
|
97
|
+
nested_transport.kind,
|
|
98
|
+
name,
|
|
99
|
+
)
|
|
100
|
+
if len(http_transports) == 0:
|
|
101
|
+
log.warning(
|
|
102
|
+
"OpenLineage transport type `composite` does not contain http transport. Skipping "
|
|
103
|
+
"injection of OpenLineage transport information into Spark properties.",
|
|
104
|
+
)
|
|
105
|
+
return {}
|
|
106
|
+
props = {
|
|
107
|
+
"spark.openlineage.transport.type": "composite",
|
|
108
|
+
"spark.openlineage.transport.continueOnFailure": str(transport.config.continue_on_failure),
|
|
109
|
+
}
|
|
110
|
+
for name, http_transport in http_transports.items():
|
|
111
|
+
props = _format_transport(props, http_transport, name)
|
|
112
|
+
return props
|
|
113
|
+
|
|
114
|
+
elif transport.kind == "http":
|
|
115
|
+
return _format_transport({}, _get_transport_information(transport), None)
|
|
116
|
+
|
|
117
|
+
log.info(
|
|
118
|
+
"OpenLineage transport type `%s` does not support automatic "
|
|
119
|
+
"injection of OpenLineage transport information into Spark properties.",
|
|
120
|
+
transport.kind,
|
|
121
|
+
)
|
|
122
|
+
return {}
|
|
85
123
|
|
|
86
124
|
|
|
87
125
|
def _is_parent_job_information_present_in_spark_properties(properties: dict) -> bool:
|
|
@@ -210,7 +210,13 @@ def is_ti_rescheduled_already(ti: TaskInstance, session=NEW_SESSION):
|
|
|
210
210
|
|
|
211
211
|
if not ti.task.reschedule:
|
|
212
212
|
return False
|
|
213
|
-
|
|
213
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
214
|
+
return (
|
|
215
|
+
session.query(
|
|
216
|
+
exists().where(TaskReschedule.ti_id == ti.id, TaskReschedule.try_number == ti.try_number)
|
|
217
|
+
).scalar()
|
|
218
|
+
is True
|
|
219
|
+
)
|
|
214
220
|
return (
|
|
215
221
|
session.query(
|
|
216
222
|
exists().where(
|
|
@@ -369,8 +375,19 @@ class DagRunInfo(InfoJsonEncodable):
|
|
|
369
375
|
"run_id",
|
|
370
376
|
"run_type",
|
|
371
377
|
"start_date",
|
|
378
|
+
"end_date",
|
|
372
379
|
]
|
|
373
380
|
|
|
381
|
+
casts = {"duration": lambda dagrun: DagRunInfo.duration(dagrun)}
|
|
382
|
+
|
|
383
|
+
@classmethod
|
|
384
|
+
def duration(cls, dagrun: DagRun) -> float | None:
|
|
385
|
+
if not getattr(dagrun, "end_date", None) or not isinstance(dagrun.end_date, datetime.datetime):
|
|
386
|
+
return None
|
|
387
|
+
if not getattr(dagrun, "start_date", None) or not isinstance(dagrun.start_date, datetime.datetime):
|
|
388
|
+
return None
|
|
389
|
+
return (dagrun.end_date - dagrun.start_date).total_seconds()
|
|
390
|
+
|
|
374
391
|
|
|
375
392
|
class TaskInstanceInfo(InfoJsonEncodable):
|
|
376
393
|
"""Defines encoding TaskInstance object to JSON."""
|
|
@@ -740,7 +757,9 @@ def print_warning(log):
|
|
|
740
757
|
return f(*args, **kwargs)
|
|
741
758
|
except Exception:
|
|
742
759
|
log.warning(
|
|
743
|
-
"OpenLineage event emission failed.
|
|
760
|
+
"OpenLineage event emission failed. "
|
|
761
|
+
"Exception below is being caught but it's printed for visibility. "
|
|
762
|
+
"This has no impact on actual task execution status.",
|
|
744
763
|
exc_info=True,
|
|
745
764
|
)
|
|
746
765
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.2
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -27,11 +27,11 @@ Requires-Dist: attrs>=22.2
|
|
|
27
27
|
Requires-Dist: openlineage-integration-common>=1.24.2
|
|
28
28
|
Requires-Dist: openlineage-python>=1.24.2
|
|
29
29
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
30
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
31
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
30
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/changelog.html
|
|
31
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2
|
|
32
|
+
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
32
33
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
33
34
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
34
|
-
Project-URL: Twitter, https://x.com/ApacheAirflow
|
|
35
35
|
Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
36
36
|
|
|
37
37
|
|
|
@@ -59,7 +59,7 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
|
59
59
|
|
|
60
60
|
Package ``apache-airflow-providers-openlineage``
|
|
61
61
|
|
|
62
|
-
Release: ``2.1.
|
|
62
|
+
Release: ``2.1.2``
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -72,7 +72,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
72
72
|
are in ``airflow.providers.openlineage`` python package.
|
|
73
73
|
|
|
74
74
|
You can find package information and changelog for the provider
|
|
75
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
75
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/>`_.
|
|
76
76
|
|
|
77
77
|
Installation
|
|
78
78
|
------------
|
|
@@ -101,7 +101,7 @@ Cross provider package dependencies
|
|
|
101
101
|
-----------------------------------
|
|
102
102
|
|
|
103
103
|
Those are dependencies that might be needed in order to use all the features of the package.
|
|
104
|
-
You need to install the specified
|
|
104
|
+
You need to install the specified providers in order to use them.
|
|
105
105
|
|
|
106
106
|
You can install such cross-provider dependencies when installing from PyPI. For example:
|
|
107
107
|
|
|
@@ -118,5 +118,5 @@ Dependent package
|
|
|
118
118
|
================================================================================================================== =================
|
|
119
119
|
|
|
120
120
|
The changelog for the provider package can be found in the
|
|
121
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
121
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/changelog.html>`_.
|
|
122
122
|
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
airflow/providers/openlineage/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
|
|
2
|
-
airflow/providers/openlineage/__init__.py,sha256=
|
|
2
|
+
airflow/providers/openlineage/__init__.py,sha256=z82Hjldc_TSS3Uwai9WOyuJKxfAG5BH4NlVuMbMSc8g,1498
|
|
3
3
|
airflow/providers/openlineage/conf.py,sha256=aYdLU7iHBdGIU8ZAC5iUiIDgXP9gvP9r_z5hTAbXPOU,5535
|
|
4
|
-
airflow/providers/openlineage/get_provider_info.py,sha256=
|
|
4
|
+
airflow/providers/openlineage/get_provider_info.py,sha256=53i3FDOzBoxITYaVkSqzS9oI4NkxiTUPGKp8-DU9bJU,10320
|
|
5
5
|
airflow/providers/openlineage/sqlparser.py,sha256=N38XhkU-lxwxnYevQpq63JOBi4rzp0q56JjxO3H24W8,20340
|
|
6
6
|
airflow/providers/openlineage/version_compat.py,sha256=aHg90_DtgoSnQvILFICexMyNlHlALBdaeWqkX3dFDug,1605
|
|
7
7
|
airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
|
|
8
|
-
airflow/providers/openlineage/extractors/base.py,sha256=
|
|
8
|
+
airflow/providers/openlineage/extractors/base.py,sha256=ZXRlvMSak8kUfur-BxrgAxeylMQFG-iT-LusQguIFLc,6342
|
|
9
9
|
airflow/providers/openlineage/extractors/bash.py,sha256=3aR0PXs8fzRLibRxXN1R8wMZnGzyCur7mjpy8e5GC4A,2583
|
|
10
|
-
airflow/providers/openlineage/extractors/manager.py,sha256=
|
|
10
|
+
airflow/providers/openlineage/extractors/manager.py,sha256=g3WJRBR2-XZHTG7qAR4UEviwtymvDArhlPMVD3c4q_g,12862
|
|
11
11
|
airflow/providers/openlineage/extractors/python.py,sha256=hVWOplMlBimrpPKPeW6vm75a8OmAYMU1oJzqMz8Jh90,3171
|
|
12
12
|
airflow/providers/openlineage/facets/AirflowDagRunFacet.json,sha256=ie6c-J3-wGgk80WDTGWePz18o6DbW--TNM7BMF4WfcU,2251
|
|
13
13
|
airflow/providers/openlineage/facets/AirflowDebugRunFacet.json,sha256=_zA5gFqGje5MOH1SmdMeA5ViOHvW_pV4oijEAvkuBbY,768
|
|
@@ -16,17 +16,17 @@ airflow/providers/openlineage/facets/AirflowRunFacet.json,sha256=70mEaZShgSJp-2x
|
|
|
16
16
|
airflow/providers/openlineage/facets/AirflowStateRunFacet.json,sha256=xhHQEKD9Jopw-oqbkCCrrwFjfXnxvuJAritsmegKjuQ,937
|
|
17
17
|
airflow/providers/openlineage/facets/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
18
18
|
airflow/providers/openlineage/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
19
|
-
airflow/providers/openlineage/plugins/adapter.py,sha256=
|
|
19
|
+
airflow/providers/openlineage/plugins/adapter.py,sha256=wCGJ3rMDpiyFlPaonUQwGzs0hyNqU-4__e0Z9nAbcyI,20620
|
|
20
20
|
airflow/providers/openlineage/plugins/facets.py,sha256=VvyMYR6ONkC95q5FdNmohv0scbA1Ej_B5cQ97as5GvA,4161
|
|
21
|
-
airflow/providers/openlineage/plugins/listener.py,sha256=
|
|
22
|
-
airflow/providers/openlineage/plugins/macros.py,sha256=
|
|
21
|
+
airflow/providers/openlineage/plugins/listener.py,sha256=KlBKT9VkdOrZxvQHsLZWWq_g4jPhaa2GdVxmHy_EVhM,26083
|
|
22
|
+
airflow/providers/openlineage/plugins/macros.py,sha256=qrHLjE95Uq8H-W9CIkQe5Y9Pu1O-GErhpDV2olGaGQM,3730
|
|
23
23
|
airflow/providers/openlineage/plugins/openlineage.py,sha256=HD3mYNPfXd-buZydEpuAY-naVBXhausU2LYUNhL48QA,1906
|
|
24
24
|
airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
25
|
-
airflow/providers/openlineage/utils/selective_enable.py,sha256=
|
|
26
|
-
airflow/providers/openlineage/utils/spark.py,sha256
|
|
25
|
+
airflow/providers/openlineage/utils/selective_enable.py,sha256=YyrUQ7Djv5o46XdH83N_G8AXAZ9C_aKPa534pbNVp08,3441
|
|
26
|
+
airflow/providers/openlineage/utils/spark.py,sha256=-2XfUaV0WISK6vHSBmB9E78xkuPjO3fM1tDQCZG7j9I,7303
|
|
27
27
|
airflow/providers/openlineage/utils/sql.py,sha256=vkKrrdENEMVG8gtzV6yuTXMa2Z9fBAEXmxDVIDaVncI,9571
|
|
28
|
-
airflow/providers/openlineage/utils/utils.py,sha256=
|
|
29
|
-
apache_airflow_providers_openlineage-2.1.
|
|
30
|
-
apache_airflow_providers_openlineage-2.1.
|
|
31
|
-
apache_airflow_providers_openlineage-2.1.
|
|
32
|
-
apache_airflow_providers_openlineage-2.1.
|
|
28
|
+
airflow/providers/openlineage/utils/utils.py,sha256=Z3G2wa_EPfRaHEFgdNFvi36K8qz47lS_O66emx-aFjk,29281
|
|
29
|
+
apache_airflow_providers_openlineage-2.1.2.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
|
|
30
|
+
apache_airflow_providers_openlineage-2.1.2.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
|
|
31
|
+
apache_airflow_providers_openlineage-2.1.2.dist-info/METADATA,sha256=ViBC41KGtshgGbAPaa0FwRr7E5HaDaw375HA9jVSWLY,5685
|
|
32
|
+
apache_airflow_providers_openlineage-2.1.2.dist-info/RECORD,,
|
|
File without changes
|