apache-airflow-providers-openlineage 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.
- airflow/providers/openlineage/__init__.py +3 -6
- airflow/providers/openlineage/conf.py +37 -8
- airflow/providers/openlineage/extractors/base.py +7 -0
- airflow/providers/openlineage/extractors/bash.py +4 -0
- airflow/providers/openlineage/extractors/manager.py +18 -11
- airflow/providers/openlineage/extractors/python.py +5 -0
- airflow/providers/openlineage/get_provider_info.py +10 -2
- airflow/providers/openlineage/plugins/adapter.py +73 -39
- airflow/providers/openlineage/plugins/listener.py +76 -9
- airflow/providers/openlineage/plugins/openlineage.py +3 -0
- airflow/providers/openlineage/utils/utils.py +19 -1
- {apache_airflow_providers_openlineage-1.7.1.dist-info → apache_airflow_providers_openlineage-1.8.0.dist-info}/METADATA +6 -6
- apache_airflow_providers_openlineage-1.8.0.dist-info/RECORD +24 -0
- apache_airflow_providers_openlineage-1.7.1.dist-info/RECORD +0 -24
- {apache_airflow_providers_openlineage-1.7.1.dist-info → apache_airflow_providers_openlineage-1.8.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_openlineage-1.7.1.dist-info → apache_airflow_providers_openlineage-1.8.0.dist-info}/entry_points.txt +0 -0
|
@@ -25,14 +25,11 @@ from __future__ import annotations
|
|
|
25
25
|
|
|
26
26
|
import packaging.version
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
from airflow import __version__ as airflow_version
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
from airflow import __version__ as airflow_version
|
|
34
|
-
except ImportError:
|
|
35
|
-
from airflow.version import version as airflow_version
|
|
32
|
+
__version__ = "1.8.0"
|
|
36
33
|
|
|
37
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
38
35
|
"2.7.0"
|
|
@@ -14,6 +14,19 @@
|
|
|
14
14
|
# KIND, either express or implied. See the License for the
|
|
15
15
|
# specific language governing permissions and limitations
|
|
16
16
|
# under the License.
|
|
17
|
+
"""
|
|
18
|
+
This module provides functions for safely retrieving and handling OpenLineage configurations.
|
|
19
|
+
|
|
20
|
+
To prevent errors caused by invalid user-provided configuration values, we use ``conf.get()``
|
|
21
|
+
to fetch values as strings and perform safe conversions using custom functions.
|
|
22
|
+
|
|
23
|
+
Any invalid configuration values should be treated as incorrect and replaced with default values.
|
|
24
|
+
For example, if the default for boolean ``custom_ol_var`` is False, any non-true value provided:
|
|
25
|
+
``"asdf"``, ``12345``, ``{"key": 1}`` or empty string, will result in False being used.
|
|
26
|
+
|
|
27
|
+
By using default values for invalid configuration values, we ensure that the configurations are handled
|
|
28
|
+
safely, preventing potential runtime errors due to conversion issues.
|
|
29
|
+
"""
|
|
17
30
|
|
|
18
31
|
from __future__ import annotations
|
|
19
32
|
|
|
@@ -26,6 +39,17 @@ from airflow.configuration import conf
|
|
|
26
39
|
_CONFIG_SECTION = "openlineage"
|
|
27
40
|
|
|
28
41
|
|
|
42
|
+
def _is_true(arg: Any) -> bool:
|
|
43
|
+
return str(arg).lower().strip() in ("true", "1", "t")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _safe_int_convert(arg: Any, default: int) -> int:
|
|
47
|
+
try:
|
|
48
|
+
return int(arg)
|
|
49
|
+
except (ValueError, TypeError):
|
|
50
|
+
return default
|
|
51
|
+
|
|
52
|
+
|
|
29
53
|
@cache
|
|
30
54
|
def config_path(check_legacy_env_var: bool = True) -> str:
|
|
31
55
|
"""[openlineage] config_path."""
|
|
@@ -41,7 +65,8 @@ def is_source_enabled() -> bool:
|
|
|
41
65
|
option = conf.get(_CONFIG_SECTION, "disable_source_code", fallback="")
|
|
42
66
|
if not option:
|
|
43
67
|
option = os.getenv("OPENLINEAGE_AIRFLOW_DISABLE_SOURCE_CODE", "")
|
|
44
|
-
|
|
68
|
+
# when disable_source_code is True, is_source_enabled() should be False
|
|
69
|
+
return not _is_true(option)
|
|
45
70
|
|
|
46
71
|
|
|
47
72
|
@cache
|
|
@@ -53,7 +78,9 @@ def disabled_operators() -> set[str]:
|
|
|
53
78
|
|
|
54
79
|
@cache
|
|
55
80
|
def selective_enable() -> bool:
|
|
56
|
-
|
|
81
|
+
"""[openlineage] selective_enable."""
|
|
82
|
+
option = conf.get(_CONFIG_SECTION, "selective_enable", fallback="")
|
|
83
|
+
return _is_true(option)
|
|
57
84
|
|
|
58
85
|
|
|
59
86
|
@cache
|
|
@@ -85,11 +112,7 @@ def transport() -> dict[str, Any]:
|
|
|
85
112
|
|
|
86
113
|
@cache
|
|
87
114
|
def is_disabled() -> bool:
|
|
88
|
-
"""[openlineage] disabled +
|
|
89
|
-
|
|
90
|
-
def _is_true(val):
|
|
91
|
-
return str(val).lower().strip() in ("true", "1", "t")
|
|
92
|
-
|
|
115
|
+
"""[openlineage] disabled + check if any configuration is present."""
|
|
93
116
|
option = conf.get(_CONFIG_SECTION, "disabled", fallback="")
|
|
94
117
|
if _is_true(option):
|
|
95
118
|
return True
|
|
@@ -97,7 +120,13 @@ def is_disabled() -> bool:
|
|
|
97
120
|
option = os.getenv("OPENLINEAGE_DISABLED", "")
|
|
98
121
|
if _is_true(option):
|
|
99
122
|
return True
|
|
100
|
-
|
|
101
123
|
# Check if both 'transport' and 'config_path' are not present and also
|
|
102
124
|
# if legacy 'OPENLINEAGE_URL' environment variables is not set
|
|
103
125
|
return transport() == {} and config_path(True) == "" and os.getenv("OPENLINEAGE_URL", "") == ""
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@cache
|
|
129
|
+
def dag_state_change_process_pool_size() -> int:
|
|
130
|
+
"""[openlineage] dag_state_change_process_pool_size."""
|
|
131
|
+
option = conf.get(_CONFIG_SECTION, "dag_state_change_process_pool_size", fallback="")
|
|
132
|
+
return _safe_int_convert(str(option).strip(), default=1)
|
|
@@ -87,6 +87,9 @@ class DefaultExtractor(BaseExtractor):
|
|
|
87
87
|
def _execute_extraction(self) -> OperatorLineage | None:
|
|
88
88
|
# OpenLineage methods are optional - if there's no method, return None
|
|
89
89
|
try:
|
|
90
|
+
self.log.debug(
|
|
91
|
+
"Trying to execute `get_openlineage_facets_on_start` for %s.", self.operator.task_type
|
|
92
|
+
)
|
|
90
93
|
return self._get_openlineage_facets(self.operator.get_openlineage_facets_on_start) # type: ignore
|
|
91
94
|
except ImportError:
|
|
92
95
|
self.log.error(
|
|
@@ -105,9 +108,13 @@ class DefaultExtractor(BaseExtractor):
|
|
|
105
108
|
if task_instance.state == TaskInstanceState.FAILED:
|
|
106
109
|
on_failed = getattr(self.operator, "get_openlineage_facets_on_failure", None)
|
|
107
110
|
if on_failed and callable(on_failed):
|
|
111
|
+
self.log.debug(
|
|
112
|
+
"Executing `get_openlineage_facets_on_failure` for %s.", self.operator.task_type
|
|
113
|
+
)
|
|
108
114
|
return self._get_openlineage_facets(on_failed, task_instance)
|
|
109
115
|
on_complete = getattr(self.operator, "get_openlineage_facets_on_complete", None)
|
|
110
116
|
if on_complete and callable(on_complete):
|
|
117
|
+
self.log.debug("Executing `get_openlineage_facets_on_complete` for %s.", self.operator.task_type)
|
|
111
118
|
return self._get_openlineage_facets(on_complete, task_instance)
|
|
112
119
|
return self.extract()
|
|
113
120
|
|
|
@@ -16,7 +16,6 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
-
from contextlib import suppress
|
|
20
19
|
from typing import TYPE_CHECKING, Iterator
|
|
21
20
|
|
|
22
21
|
from airflow.providers.openlineage import conf
|
|
@@ -24,9 +23,11 @@ from airflow.providers.openlineage.extractors import BaseExtractor, OperatorLine
|
|
|
24
23
|
from airflow.providers.openlineage.extractors.base import DefaultExtractor
|
|
25
24
|
from airflow.providers.openlineage.extractors.bash import BashExtractor
|
|
26
25
|
from airflow.providers.openlineage.extractors.python import PythonExtractor
|
|
27
|
-
from airflow.providers.openlineage.utils.utils import
|
|
26
|
+
from airflow.providers.openlineage.utils.utils import (
|
|
27
|
+
get_unknown_source_attribute_run_facet,
|
|
28
|
+
try_import_from_string,
|
|
29
|
+
)
|
|
28
30
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
29
|
-
from airflow.utils.module_loading import import_string
|
|
30
31
|
|
|
31
32
|
if TYPE_CHECKING:
|
|
32
33
|
from openlineage.client.run import Dataset
|
|
@@ -35,11 +36,6 @@ if TYPE_CHECKING:
|
|
|
35
36
|
from airflow.models import Operator
|
|
36
37
|
|
|
37
38
|
|
|
38
|
-
def try_import_from_string(string):
|
|
39
|
-
with suppress(ImportError):
|
|
40
|
-
return import_string(string)
|
|
41
|
-
|
|
42
|
-
|
|
43
39
|
def _iter_extractor_types() -> Iterator[type[BaseExtractor]]:
|
|
44
40
|
if PythonExtractor is not None:
|
|
45
41
|
yield PythonExtractor
|
|
@@ -61,16 +57,27 @@ class ExtractorManager(LoggingMixin):
|
|
|
61
57
|
self.extractors[operator_class] = extractor
|
|
62
58
|
|
|
63
59
|
for extractor_path in conf.custom_extractors():
|
|
64
|
-
extractor: type[BaseExtractor] = try_import_from_string(extractor_path)
|
|
60
|
+
extractor: type[BaseExtractor] | None = try_import_from_string(extractor_path)
|
|
61
|
+
if not extractor:
|
|
62
|
+
self.log.warning(
|
|
63
|
+
"OpenLineage is unable to import custom extractor `%s`; will ignore it.", extractor_path
|
|
64
|
+
)
|
|
65
|
+
continue
|
|
65
66
|
for operator_class in extractor.get_operator_classnames():
|
|
66
67
|
if operator_class in self.extractors:
|
|
67
|
-
self.log.
|
|
68
|
-
"Duplicate extractor found for `%s`.
|
|
68
|
+
self.log.warning(
|
|
69
|
+
"Duplicate OpenLineage custom extractor found for `%s`. "
|
|
70
|
+
"`%s` will be used instead of `%s`",
|
|
69
71
|
operator_class,
|
|
70
72
|
extractor_path,
|
|
71
73
|
self.extractors[operator_class],
|
|
72
74
|
)
|
|
73
75
|
self.extractors[operator_class] = extractor
|
|
76
|
+
self.log.debug(
|
|
77
|
+
"Registered custom OpenLineage extractor `%s` for class `%s`",
|
|
78
|
+
extractor_path,
|
|
79
|
+
operator_class,
|
|
80
|
+
)
|
|
74
81
|
|
|
75
82
|
def add_extractor(self, operator_class: str, extractor: type[BaseExtractor]):
|
|
76
83
|
self.extractors[operator_class] = extractor
|
|
@@ -57,6 +57,11 @@ class PythonExtractor(BaseExtractor):
|
|
|
57
57
|
source=source_code,
|
|
58
58
|
)
|
|
59
59
|
}
|
|
60
|
+
else:
|
|
61
|
+
self.log.debug(
|
|
62
|
+
"OpenLineage disable_source_code option is on - no source code is extracted.",
|
|
63
|
+
)
|
|
64
|
+
|
|
60
65
|
return OperatorLineage(
|
|
61
66
|
job_facets=job_facet,
|
|
62
67
|
# The PythonOperator is recorded as an "unknownSource" even though we have an extractor,
|
|
@@ -28,8 +28,9 @@ def get_provider_info():
|
|
|
28
28
|
"name": "OpenLineage Airflow",
|
|
29
29
|
"description": "`OpenLineage <https://openlineage.io/>`__\n",
|
|
30
30
|
"state": "ready",
|
|
31
|
-
"source-date-epoch":
|
|
31
|
+
"source-date-epoch": 1715684338,
|
|
32
32
|
"versions": [
|
|
33
|
+
"1.8.0",
|
|
33
34
|
"1.7.1",
|
|
34
35
|
"1.7.0",
|
|
35
36
|
"1.6.0",
|
|
@@ -121,11 +122,18 @@ def get_provider_info():
|
|
|
121
122
|
},
|
|
122
123
|
"disable_source_code": {
|
|
123
124
|
"description": "Disable the inclusion of source code in OpenLineage events by setting this to `true`.\nBy default, several Operators (e.g. Python, Bash) will include their source code in the events\nunless disabled.\n",
|
|
124
|
-
"default":
|
|
125
|
+
"default": "False",
|
|
125
126
|
"example": None,
|
|
126
127
|
"type": "boolean",
|
|
127
128
|
"version_added": None,
|
|
128
129
|
},
|
|
130
|
+
"dag_state_change_process_pool_size": {
|
|
131
|
+
"description": "Number of processes to utilize for processing DAG state changes\nin an asynchronous manner within the scheduler process.\n",
|
|
132
|
+
"default": "1",
|
|
133
|
+
"example": None,
|
|
134
|
+
"type": "integer",
|
|
135
|
+
"version_added": "1.8.0",
|
|
136
|
+
},
|
|
129
137
|
},
|
|
130
138
|
}
|
|
131
139
|
},
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
+
import traceback
|
|
19
20
|
import uuid
|
|
20
21
|
from contextlib import ExitStack
|
|
21
22
|
from typing import TYPE_CHECKING
|
|
@@ -73,8 +74,16 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
73
74
|
if not self._client:
|
|
74
75
|
config = self.get_openlineage_config()
|
|
75
76
|
if config:
|
|
77
|
+
self.log.debug(
|
|
78
|
+
"OpenLineage configuration found. Transport type: `%s`",
|
|
79
|
+
config.get("type", "no type provided"),
|
|
80
|
+
)
|
|
76
81
|
self._client = OpenLineageClient.from_dict(config=config)
|
|
77
82
|
else:
|
|
83
|
+
self.log.debug(
|
|
84
|
+
"OpenLineage configuration not found directly in Airflow. "
|
|
85
|
+
"Looking for legacy environment configuration. "
|
|
86
|
+
)
|
|
78
87
|
self._client = OpenLineageClient.from_environment()
|
|
79
88
|
return self._client
|
|
80
89
|
|
|
@@ -85,13 +94,19 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
85
94
|
config = self._read_yaml_config(openlineage_config_path)
|
|
86
95
|
if config:
|
|
87
96
|
return config.get("transport", None)
|
|
97
|
+
self.log.debug("OpenLineage config file is empty: `%s`", openlineage_config_path)
|
|
98
|
+
else:
|
|
99
|
+
self.log.debug("OpenLineage config_path configuration not found.")
|
|
100
|
+
|
|
88
101
|
# Second, try to get transport config
|
|
89
102
|
transport_config = conf.transport()
|
|
90
103
|
if not transport_config:
|
|
104
|
+
self.log.debug("OpenLineage transport configuration not found.")
|
|
91
105
|
return None
|
|
92
106
|
return transport_config
|
|
93
107
|
|
|
94
|
-
|
|
108
|
+
@staticmethod
|
|
109
|
+
def _read_yaml_config(path: str) -> dict | None:
|
|
95
110
|
with open(path) as config_file:
|
|
96
111
|
return yaml.safe_load(config_file)
|
|
97
112
|
|
|
@@ -125,6 +140,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
125
140
|
stack.enter_context(Stats.timer(f"ol.emit.attempts.{event_type}.{transport_type}"))
|
|
126
141
|
stack.enter_context(Stats.timer("ol.emit.attempts"))
|
|
127
142
|
self._client.emit(redacted_event)
|
|
143
|
+
self.log.debug("Successfully emitted OpenLineage event of id %s", event.run.runId)
|
|
128
144
|
except Exception as e:
|
|
129
145
|
Stats.incr("ol.emit.failed")
|
|
130
146
|
self.log.warning("Failed to emit OpenLineage event of id %s", event.run.runId)
|
|
@@ -284,48 +300,66 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
284
300
|
nominal_start_time: str,
|
|
285
301
|
nominal_end_time: str,
|
|
286
302
|
):
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
303
|
+
try:
|
|
304
|
+
event = RunEvent(
|
|
305
|
+
eventType=RunState.START,
|
|
306
|
+
eventTime=dag_run.start_date.isoformat(),
|
|
307
|
+
job=self._build_job(job_name=dag_run.dag_id, job_type=_JOB_TYPE_DAG),
|
|
308
|
+
run=self._build_run(
|
|
309
|
+
run_id=self.build_dag_run_id(dag_run.dag_id, dag_run.run_id),
|
|
310
|
+
job_name=dag_run.dag_id,
|
|
311
|
+
nominal_start_time=nominal_start_time,
|
|
312
|
+
nominal_end_time=nominal_end_time,
|
|
313
|
+
),
|
|
314
|
+
inputs=[],
|
|
315
|
+
outputs=[],
|
|
316
|
+
producer=_PRODUCER,
|
|
317
|
+
)
|
|
318
|
+
self.emit(event)
|
|
319
|
+
except BaseException:
|
|
320
|
+
# Catch all exceptions to prevent ProcessPoolExecutor from silently swallowing them.
|
|
321
|
+
# This ensures that any unexpected exceptions are logged for debugging purposes.
|
|
322
|
+
# This part cannot be wrapped to deduplicate code, otherwise the method cannot be pickled in multiprocessing.
|
|
323
|
+
self.log.warning("Failed to emit DAG started event: \n %s", traceback.format_exc())
|
|
302
324
|
|
|
303
325
|
def dag_success(self, dag_run: DagRun, msg: str):
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
326
|
+
try:
|
|
327
|
+
event = RunEvent(
|
|
328
|
+
eventType=RunState.COMPLETE,
|
|
329
|
+
eventTime=dag_run.end_date.isoformat(),
|
|
330
|
+
job=self._build_job(job_name=dag_run.dag_id, job_type=_JOB_TYPE_DAG),
|
|
331
|
+
run=Run(runId=self.build_dag_run_id(dag_run.dag_id, dag_run.run_id)),
|
|
332
|
+
inputs=[],
|
|
333
|
+
outputs=[],
|
|
334
|
+
producer=_PRODUCER,
|
|
335
|
+
)
|
|
336
|
+
self.emit(event)
|
|
337
|
+
except BaseException:
|
|
338
|
+
# Catch all exceptions to prevent ProcessPoolExecutor from silently swallowing them.
|
|
339
|
+
# This ensures that any unexpected exceptions are logged for debugging purposes.
|
|
340
|
+
# This part cannot be wrapped to deduplicate code, otherwise the method cannot be pickled in multiprocessing.
|
|
341
|
+
self.log.warning("Failed to emit DAG success event: \n %s", traceback.format_exc())
|
|
314
342
|
|
|
315
343
|
def dag_failed(self, dag_run: DagRun, msg: str):
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
344
|
+
try:
|
|
345
|
+
event = RunEvent(
|
|
346
|
+
eventType=RunState.FAIL,
|
|
347
|
+
eventTime=dag_run.end_date.isoformat(),
|
|
348
|
+
job=self._build_job(job_name=dag_run.dag_id, job_type=_JOB_TYPE_DAG),
|
|
349
|
+
run=Run(
|
|
350
|
+
runId=self.build_dag_run_id(dag_run.dag_id, dag_run.run_id),
|
|
351
|
+
facets={"errorMessage": ErrorMessageRunFacet(message=msg, programmingLanguage="python")},
|
|
352
|
+
),
|
|
353
|
+
inputs=[],
|
|
354
|
+
outputs=[],
|
|
355
|
+
producer=_PRODUCER,
|
|
356
|
+
)
|
|
357
|
+
self.emit(event)
|
|
358
|
+
except BaseException:
|
|
359
|
+
# Catch all exceptions to prevent ProcessPoolExecutor from silently swallowing them.
|
|
360
|
+
# This ensures that any unexpected exceptions are logged for debugging purposes.
|
|
361
|
+
# This part cannot be wrapped to deduplicate code, otherwise the method cannot be pickled in multiprocessing.
|
|
362
|
+
self.log.warning("Failed to emit DAG failed event: \n %s", traceback.format_exc())
|
|
329
363
|
|
|
330
364
|
@staticmethod
|
|
331
365
|
def _build_run(
|
|
@@ -17,13 +17,15 @@
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
19
|
import logging
|
|
20
|
-
from concurrent.futures import
|
|
20
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
21
21
|
from datetime import datetime
|
|
22
22
|
from typing import TYPE_CHECKING
|
|
23
23
|
|
|
24
24
|
from openlineage.client.serde import Serde
|
|
25
25
|
|
|
26
|
+
from airflow import __version__ as airflow_version, settings
|
|
26
27
|
from airflow.listeners import hookimpl
|
|
28
|
+
from airflow.providers.openlineage import conf
|
|
27
29
|
from airflow.providers.openlineage.extractors import ExtractorManager
|
|
28
30
|
from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter, RunState
|
|
29
31
|
from airflow.providers.openlineage.utils.utils import (
|
|
@@ -45,6 +47,16 @@ if TYPE_CHECKING:
|
|
|
45
47
|
_openlineage_listener: OpenLineageListener | None = None
|
|
46
48
|
|
|
47
49
|
|
|
50
|
+
def _get_try_number_success(val):
|
|
51
|
+
# todo: remove when min airflow version >= 2.10.0
|
|
52
|
+
from packaging.version import parse
|
|
53
|
+
|
|
54
|
+
if parse(parse(airflow_version).base_version) < parse("2.10.0"):
|
|
55
|
+
return val.try_number - 1
|
|
56
|
+
else:
|
|
57
|
+
return val.try_number
|
|
58
|
+
|
|
59
|
+
|
|
48
60
|
class OpenLineageListener:
|
|
49
61
|
"""OpenLineage listener sends events on task instance and dag run starts, completes and failures."""
|
|
50
62
|
|
|
@@ -78,13 +90,19 @@ class OpenLineageListener:
|
|
|
78
90
|
dag = task.dag
|
|
79
91
|
if is_operator_disabled(task):
|
|
80
92
|
self.log.debug(
|
|
81
|
-
"Skipping OpenLineage event emission for operator
|
|
93
|
+
"Skipping OpenLineage event emission for operator `%s` "
|
|
82
94
|
"due to its presence in [openlineage] disabled_for_operators.",
|
|
83
95
|
task.task_type,
|
|
84
96
|
)
|
|
85
|
-
return
|
|
97
|
+
return
|
|
86
98
|
|
|
87
99
|
if not is_selective_lineage_enabled(task):
|
|
100
|
+
self.log.debug(
|
|
101
|
+
"Skipping OpenLineage event emission for task `%s` "
|
|
102
|
+
"due to lack of explicit lineage enablement for task or DAG while "
|
|
103
|
+
"[openlineage] selective_enable is on.",
|
|
104
|
+
task.task_id,
|
|
105
|
+
)
|
|
88
106
|
return
|
|
89
107
|
|
|
90
108
|
@print_warning(self.log)
|
|
@@ -146,15 +164,22 @@ class OpenLineageListener:
|
|
|
146
164
|
if TYPE_CHECKING:
|
|
147
165
|
assert task
|
|
148
166
|
dag = task.dag
|
|
167
|
+
|
|
149
168
|
if is_operator_disabled(task):
|
|
150
169
|
self.log.debug(
|
|
151
|
-
"Skipping OpenLineage event emission for operator
|
|
170
|
+
"Skipping OpenLineage event emission for operator `%s` "
|
|
152
171
|
"due to its presence in [openlineage] disabled_for_operators.",
|
|
153
172
|
task.task_type,
|
|
154
173
|
)
|
|
155
|
-
return
|
|
174
|
+
return
|
|
156
175
|
|
|
157
176
|
if not is_selective_lineage_enabled(task):
|
|
177
|
+
self.log.debug(
|
|
178
|
+
"Skipping OpenLineage event emission for task `%s` "
|
|
179
|
+
"due to lack of explicit lineage enablement for task or DAG while "
|
|
180
|
+
"[openlineage] selective_enable is on.",
|
|
181
|
+
task.task_id,
|
|
182
|
+
)
|
|
158
183
|
return
|
|
159
184
|
|
|
160
185
|
@print_warning(self.log)
|
|
@@ -165,7 +190,7 @@ class OpenLineageListener:
|
|
|
165
190
|
dag_id=dag.dag_id,
|
|
166
191
|
task_id=task.task_id,
|
|
167
192
|
execution_date=task_instance.execution_date,
|
|
168
|
-
try_number=task_instance
|
|
193
|
+
try_number=_get_try_number_success(task_instance),
|
|
169
194
|
)
|
|
170
195
|
event_type = RunState.COMPLETE.value.lower()
|
|
171
196
|
operator_name = task.task_type.lower()
|
|
@@ -201,15 +226,22 @@ class OpenLineageListener:
|
|
|
201
226
|
if TYPE_CHECKING:
|
|
202
227
|
assert task
|
|
203
228
|
dag = task.dag
|
|
229
|
+
|
|
204
230
|
if is_operator_disabled(task):
|
|
205
231
|
self.log.debug(
|
|
206
|
-
"Skipping OpenLineage event emission for operator
|
|
232
|
+
"Skipping OpenLineage event emission for operator `%s` "
|
|
207
233
|
"due to its presence in [openlineage] disabled_for_operators.",
|
|
208
234
|
task.task_type,
|
|
209
235
|
)
|
|
210
|
-
return
|
|
236
|
+
return
|
|
211
237
|
|
|
212
238
|
if not is_selective_lineage_enabled(task):
|
|
239
|
+
self.log.debug(
|
|
240
|
+
"Skipping OpenLineage event emission for task `%s` "
|
|
241
|
+
"due to lack of explicit lineage enablement for task or DAG while "
|
|
242
|
+
"[openlineage] selective_enable is on.",
|
|
243
|
+
task.task_id,
|
|
244
|
+
)
|
|
213
245
|
return
|
|
214
246
|
|
|
215
247
|
@print_warning(self.log)
|
|
@@ -249,8 +281,16 @@ class OpenLineageListener:
|
|
|
249
281
|
|
|
250
282
|
@property
|
|
251
283
|
def executor(self):
|
|
284
|
+
def initializer():
|
|
285
|
+
# Re-configure the ORM engine as there are issues with multiple processes
|
|
286
|
+
# if process calls Airflow DB.
|
|
287
|
+
settings.configure_orm()
|
|
288
|
+
|
|
252
289
|
if not self._executor:
|
|
253
|
-
self._executor =
|
|
290
|
+
self._executor = ProcessPoolExecutor(
|
|
291
|
+
max_workers=conf.dag_state_change_process_pool_size(),
|
|
292
|
+
initializer=initializer,
|
|
293
|
+
)
|
|
254
294
|
return self._executor
|
|
255
295
|
|
|
256
296
|
@hookimpl
|
|
@@ -266,7 +306,18 @@ class OpenLineageListener:
|
|
|
266
306
|
@hookimpl
|
|
267
307
|
def on_dag_run_running(self, dag_run: DagRun, msg: str):
|
|
268
308
|
if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
|
|
309
|
+
self.log.debug(
|
|
310
|
+
"Skipping OpenLineage event emission for DAG `%s` "
|
|
311
|
+
"due to lack of explicit lineage enablement for DAG while "
|
|
312
|
+
"[openlineage] selective_enable is on.",
|
|
313
|
+
dag_run.dag_id,
|
|
314
|
+
)
|
|
315
|
+
return
|
|
316
|
+
|
|
317
|
+
if not self.executor:
|
|
318
|
+
self.log.debug("Executor have not started before `on_dag_run_running`")
|
|
269
319
|
return
|
|
320
|
+
|
|
270
321
|
data_interval_start = dag_run.data_interval_start.isoformat() if dag_run.data_interval_start else None
|
|
271
322
|
data_interval_end = dag_run.data_interval_end.isoformat() if dag_run.data_interval_end else None
|
|
272
323
|
self.executor.submit(
|
|
@@ -280,19 +331,35 @@ class OpenLineageListener:
|
|
|
280
331
|
@hookimpl
|
|
281
332
|
def on_dag_run_success(self, dag_run: DagRun, msg: str):
|
|
282
333
|
if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
|
|
334
|
+
self.log.debug(
|
|
335
|
+
"Skipping OpenLineage event emission for DAG `%s` "
|
|
336
|
+
"due to lack of explicit lineage enablement for DAG while "
|
|
337
|
+
"[openlineage] selective_enable is on.",
|
|
338
|
+
dag_run.dag_id,
|
|
339
|
+
)
|
|
283
340
|
return
|
|
341
|
+
|
|
284
342
|
if not self.executor:
|
|
285
343
|
self.log.debug("Executor have not started before `on_dag_run_success`")
|
|
286
344
|
return
|
|
345
|
+
|
|
287
346
|
self.executor.submit(self.adapter.dag_success, dag_run=dag_run, msg=msg)
|
|
288
347
|
|
|
289
348
|
@hookimpl
|
|
290
349
|
def on_dag_run_failed(self, dag_run: DagRun, msg: str):
|
|
291
350
|
if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
|
|
351
|
+
self.log.debug(
|
|
352
|
+
"Skipping OpenLineage event emission for DAG `%s` "
|
|
353
|
+
"due to lack of explicit lineage enablement for DAG while "
|
|
354
|
+
"[openlineage] selective_enable is on.",
|
|
355
|
+
dag_run.dag_id,
|
|
356
|
+
)
|
|
292
357
|
return
|
|
358
|
+
|
|
293
359
|
if not self.executor:
|
|
294
360
|
self.log.debug("Executor have not started before `on_dag_run_failed`")
|
|
295
361
|
return
|
|
362
|
+
|
|
296
363
|
self.executor.submit(self.adapter.dag_failed, dag_run=dag_run, msg=msg)
|
|
297
364
|
|
|
298
365
|
|
|
@@ -25,8 +25,10 @@ from functools import wraps
|
|
|
25
25
|
from typing import TYPE_CHECKING, Any, Iterable
|
|
26
26
|
|
|
27
27
|
import attrs
|
|
28
|
-
from
|
|
28
|
+
from deprecated import deprecated
|
|
29
|
+
from openlineage.client.utils import RedactMixin
|
|
29
30
|
|
|
31
|
+
from airflow.exceptions import AirflowProviderDeprecationWarning # TODO: move this maybe to Airflow's logic?
|
|
30
32
|
from airflow.models import DAG, BaseOperator, MappedOperator
|
|
31
33
|
from airflow.providers.openlineage import conf
|
|
32
34
|
from airflow.providers.openlineage.plugins.facets import (
|
|
@@ -41,6 +43,7 @@ from airflow.providers.openlineage.utils.selective_enable import (
|
|
|
41
43
|
)
|
|
42
44
|
from airflow.utils.context import AirflowContextDeprecationWarning
|
|
43
45
|
from airflow.utils.log.secrets_masker import Redactable, Redacted, SecretsMasker, should_hide_value_for_key
|
|
46
|
+
from airflow.utils.module_loading import import_string
|
|
44
47
|
|
|
45
48
|
if TYPE_CHECKING:
|
|
46
49
|
from airflow.models import DagRun, TaskInstance
|
|
@@ -50,6 +53,11 @@ log = logging.getLogger(__name__)
|
|
|
50
53
|
_NOMINAL_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
|
|
51
54
|
|
|
52
55
|
|
|
56
|
+
def try_import_from_string(string: str) -> Any:
|
|
57
|
+
with suppress(ImportError):
|
|
58
|
+
return import_string(string)
|
|
59
|
+
|
|
60
|
+
|
|
53
61
|
def get_operator_class(task: BaseOperator) -> type:
|
|
54
62
|
if task.__class__.__name__ in ("DecoratedMappedOperator", "MappedOperator"):
|
|
55
63
|
return task.operator_class
|
|
@@ -367,6 +375,9 @@ def print_warning(log):
|
|
|
367
375
|
try:
|
|
368
376
|
return f(*args, **kwargs)
|
|
369
377
|
except Exception as e:
|
|
378
|
+
log.warning(
|
|
379
|
+
"Note: exception below is being caught: it's printed for visibility. However OpenLineage events aren't being emitted. If you see that, task has completed successfully despite not getting OL events."
|
|
380
|
+
)
|
|
370
381
|
log.warning(e)
|
|
371
382
|
|
|
372
383
|
return wrapper
|
|
@@ -379,6 +390,13 @@ def get_filtered_unknown_operator_keys(operator: BaseOperator) -> dict:
|
|
|
379
390
|
return {attr: value for attr, value in operator.__dict__.items() if attr not in not_required_keys}
|
|
380
391
|
|
|
381
392
|
|
|
393
|
+
@deprecated(
|
|
394
|
+
reason=(
|
|
395
|
+
"`airflow.providers.openlineage.utils.utils.normalize_sql` "
|
|
396
|
+
"has been deprecated and will be removed in future"
|
|
397
|
+
),
|
|
398
|
+
category=AirflowProviderDeprecationWarning,
|
|
399
|
+
)
|
|
382
400
|
def normalize_sql(sql: str | Iterable[str]):
|
|
383
401
|
if isinstance(sql, str):
|
|
384
402
|
sql = [stmt for stmt in sql.split(";") if stmt != ""]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.8.0
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -28,8 +28,8 @@ Requires-Dist: openlineage-integration-common>=0.28.0
|
|
|
28
28
|
Requires-Dist: openlineage-python>=0.28.0
|
|
29
29
|
Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
|
|
30
30
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
31
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
32
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
31
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.8.0/changelog.html
|
|
32
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.8.0
|
|
33
33
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
34
34
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
35
35
|
Project-URL: Twitter, https://twitter.com/ApacheAirflow
|
|
@@ -80,7 +80,7 @@ Provides-Extra: common.sql
|
|
|
80
80
|
|
|
81
81
|
Package ``apache-airflow-providers-openlineage``
|
|
82
82
|
|
|
83
|
-
Release: ``1.
|
|
83
|
+
Release: ``1.8.0``
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -93,7 +93,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
93
93
|
are in ``airflow.providers.openlineage`` python package.
|
|
94
94
|
|
|
95
95
|
You can find package information and changelog for the provider
|
|
96
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
96
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.8.0/>`_.
|
|
97
97
|
|
|
98
98
|
Installation
|
|
99
99
|
------------
|
|
@@ -137,4 +137,4 @@ Dependent package
|
|
|
137
137
|
============================================================================================================ ==============
|
|
138
138
|
|
|
139
139
|
The changelog for the provider package can be found in the
|
|
140
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
140
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.8.0/changelog.html>`_.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
airflow/providers/openlineage/LICENSE,sha256=ywUBpKZc7Jb96rVt5I3IDbg7dIJAbUSHkuoDcF3jbH4,13569
|
|
2
|
+
airflow/providers/openlineage/__init__.py,sha256=Y_3EiIS_TiqaVpc68HfekILHQRlsSGQLhs72joO7THg,1498
|
|
3
|
+
airflow/providers/openlineage/conf.py,sha256=wozXzU5Do9S0mtjjGc5ruF556G2-ZT4GJa3YLT_-Phg,4693
|
|
4
|
+
airflow/providers/openlineage/get_provider_info.py,sha256=ypUFlQXsC6s-cA7OdslwUaxhjaYIslbP_OILdb9CVNQ,7072
|
|
5
|
+
airflow/providers/openlineage/sqlparser.py,sha256=-FGWWK0Xu6XkGSXcfn7PXsWIe0Y0fwe-3hivHg7emLA,15308
|
|
6
|
+
airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
|
|
7
|
+
airflow/providers/openlineage/extractors/base.py,sha256=o6z8bXwNor1hwcUzezJ8LIPynR_BqXkP-qtwEgLtD2Q,5476
|
|
8
|
+
airflow/providers/openlineage/extractors/bash.py,sha256=m4hLvDV4-zX4gp8apRuhpAR3Uakr8UOUxf-thTWmOxw,2563
|
|
9
|
+
airflow/providers/openlineage/extractors/manager.py,sha256=9TyszMLAsgPS9NETWq7fPJjxcbTFk47x4kd1NRGCvsw,10315
|
|
10
|
+
airflow/providers/openlineage/extractors/python.py,sha256=EQXCj2aHr2XXw0pNxeX-ii8UQFCoqkdf40ozqmA5d58,3151
|
|
11
|
+
airflow/providers/openlineage/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
12
|
+
airflow/providers/openlineage/plugins/adapter.py,sha256=d_HczG_nXMwAHdGD-CO4baVfCc9ROcY0zQUWIZRF_Sw,16917
|
|
13
|
+
airflow/providers/openlineage/plugins/facets.py,sha256=Z6dsz0rv-3VbRaGZJxW_T7Dak2k0ROGF0YrR_1awxZ0,2644
|
|
14
|
+
airflow/providers/openlineage/plugins/listener.py,sha256=LrqvvGL4nzMNeha6PVbtylxp8gru3iKrkQltuQD2k8o,14092
|
|
15
|
+
airflow/providers/openlineage/plugins/macros.py,sha256=QowPc9cc_unV-NLxBwm32OmWETA9pOucWguSeK92SSc,3076
|
|
16
|
+
airflow/providers/openlineage/plugins/openlineage.py,sha256=rsRUW_zpXVAglzsgQRv5T9VWYY7CMQl0qRWm8-3oqDA,1678
|
|
17
|
+
airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
18
|
+
airflow/providers/openlineage/utils/selective_enable.py,sha256=JVTmXdQknBL-9N0drFDkVMf1HCf8C6nbITVaP4-5ba4,3072
|
|
19
|
+
airflow/providers/openlineage/utils/sql.py,sha256=7tEK0zVfIe7v3NI6oyv62x0KAS3sl8Ajfhqob8MdiX8,9366
|
|
20
|
+
airflow/providers/openlineage/utils/utils.py,sha256=WFMdRsuArqqrgjsmFGb_ljIWV6ry-EGkTPZzy5aiG4Q,14036
|
|
21
|
+
apache_airflow_providers_openlineage-1.8.0.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
|
|
22
|
+
apache_airflow_providers_openlineage-1.8.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
|
23
|
+
apache_airflow_providers_openlineage-1.8.0.dist-info/METADATA,sha256=cpXm9kSCT2wFTHbw9s6ZOBuNYq02RMhRRyrZMoTXXqY,6368
|
|
24
|
+
apache_airflow_providers_openlineage-1.8.0.dist-info/RECORD,,
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
airflow/providers/openlineage/LICENSE,sha256=ywUBpKZc7Jb96rVt5I3IDbg7dIJAbUSHkuoDcF3jbH4,13569
|
|
2
|
-
airflow/providers/openlineage/__init__.py,sha256=Xc1UFto3ZKaoNfnpJpkK8cxkSAtXxGbxhpZ624JXDko,1586
|
|
3
|
-
airflow/providers/openlineage/conf.py,sha256=Y76TUM_YwQtn-_081wQicZPTe_8bXH8jNafx-GACgeo,3398
|
|
4
|
-
airflow/providers/openlineage/get_provider_info.py,sha256=a_A_2VPU8M2vHZ5CX0_8yUawYEiFieMa3g5tmi3pEnU,6626
|
|
5
|
-
airflow/providers/openlineage/sqlparser.py,sha256=-FGWWK0Xu6XkGSXcfn7PXsWIe0Y0fwe-3hivHg7emLA,15308
|
|
6
|
-
airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
|
|
7
|
-
airflow/providers/openlineage/extractors/base.py,sha256=sj2KS23ocX7LAbkDiR53otkFg1qqEg41PyBivdc-kyM,5070
|
|
8
|
-
airflow/providers/openlineage/extractors/bash.py,sha256=lE7BH9vipRg9jGloPIE6y6wcHw_BbTvGBasfa4PfDBc,2412
|
|
9
|
-
airflow/providers/openlineage/extractors/manager.py,sha256=wrhsculNW8Pj3BKofT3wDkCqiOFKXmxi-nBd9AifTh4,9996
|
|
10
|
-
airflow/providers/openlineage/extractors/python.py,sha256=1iCC4_Due500ulkHmc_qvt8JGzxxb6suLeYS2FRYDlc,2999
|
|
11
|
-
airflow/providers/openlineage/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
12
|
-
airflow/providers/openlineage/plugins/adapter.py,sha256=YcVf_mC6ZTJEb3soRMzb96fypmWZB19M07-mn5Nh-Gc,14634
|
|
13
|
-
airflow/providers/openlineage/plugins/facets.py,sha256=Z6dsz0rv-3VbRaGZJxW_T7Dak2k0ROGF0YrR_1awxZ0,2644
|
|
14
|
-
airflow/providers/openlineage/plugins/listener.py,sha256=lw9IGw_JL_EP_FEVfKmR8JJuDz8xyQFCibgQF2kdHOg,11624
|
|
15
|
-
airflow/providers/openlineage/plugins/macros.py,sha256=QowPc9cc_unV-NLxBwm32OmWETA9pOucWguSeK92SSc,3076
|
|
16
|
-
airflow/providers/openlineage/plugins/openlineage.py,sha256=Owlbpp8puiww-4Wh6B46vYK2vLoGVK48qOW7RyZF188,1625
|
|
17
|
-
airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
18
|
-
airflow/providers/openlineage/utils/selective_enable.py,sha256=JVTmXdQknBL-9N0drFDkVMf1HCf8C6nbITVaP4-5ba4,3072
|
|
19
|
-
airflow/providers/openlineage/utils/sql.py,sha256=7tEK0zVfIe7v3NI6oyv62x0KAS3sl8Ajfhqob8MdiX8,9366
|
|
20
|
-
airflow/providers/openlineage/utils/utils.py,sha256=duT_rXHQuVFUIbMCplGGw0OI0RN0DAXU8oo8FaqcREg,13285
|
|
21
|
-
apache_airflow_providers_openlineage-1.7.1.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
|
|
22
|
-
apache_airflow_providers_openlineage-1.7.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
|
23
|
-
apache_airflow_providers_openlineage-1.7.1.dist-info/METADATA,sha256=2ZflQLoDUko_JC2iEjy_b3NnjSrhMKwWpzZnP_dIc1o,6368
|
|
24
|
-
apache_airflow_providers_openlineage-1.7.1.dist-info/RECORD,,
|
|
File without changes
|