apache-airflow-providers-openlineage 1.6.0__tar.gz → 1.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/PKG-INFO +8 -7
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/README.rst +4 -4
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/__init__.py +1 -1
- apache_airflow_providers_openlineage-1.7.0/airflow/providers/openlineage/conf.py +103 -0
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/extractors/base.py +3 -34
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/extractors/bash.py +6 -22
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/extractors/manager.py +14 -33
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/extractors/python.py +6 -22
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/get_provider_info.py +9 -1
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/plugins/adapter.py +9 -19
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/plugins/facets.py +11 -0
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/plugins/listener.py +49 -4
- apache_airflow_providers_openlineage-1.7.0/airflow/providers/openlineage/plugins/macros.py +87 -0
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/plugins/openlineage.py +9 -19
- apache_airflow_providers_openlineage-1.7.0/airflow/providers/openlineage/utils/selective_enable.py +87 -0
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/utils/utils.py +79 -125
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/pyproject.toml +4 -3
- apache_airflow_providers_openlineage-1.6.0/airflow/providers/openlineage/plugins/macros.py +0 -66
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/LICENSE +0 -0
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/extractors/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/plugins/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/sqlparser.py +0 -0
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/utils/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/airflow/providers/openlineage/utils/sql.py +0 -0
{apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -19,6 +19,7 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.9
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.10
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
23
|
Classifier: Topic :: System :: Monitoring
|
|
23
24
|
Requires-Dist: apache-airflow-providers-common-sql>=1.6.0
|
|
24
25
|
Requires-Dist: apache-airflow>=2.7.0
|
|
@@ -27,8 +28,8 @@ Requires-Dist: openlineage-integration-common>=0.28.0
|
|
|
27
28
|
Requires-Dist: openlineage-python>=0.28.0
|
|
28
29
|
Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
|
|
29
30
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
30
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
31
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
31
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/changelog.html
|
|
32
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0
|
|
32
33
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
33
34
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
34
35
|
Project-URL: Twitter, https://twitter.com/ApacheAirflow
|
|
@@ -79,7 +80,7 @@ Provides-Extra: common.sql
|
|
|
79
80
|
|
|
80
81
|
Package ``apache-airflow-providers-openlineage``
|
|
81
82
|
|
|
82
|
-
Release: ``1.
|
|
83
|
+
Release: ``1.7.0``
|
|
83
84
|
|
|
84
85
|
|
|
85
86
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -92,7 +93,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
92
93
|
are in ``airflow.providers.openlineage`` python package.
|
|
93
94
|
|
|
94
95
|
You can find package information and changelog for the provider
|
|
95
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
96
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/>`_.
|
|
96
97
|
|
|
97
98
|
Installation
|
|
98
99
|
------------
|
|
@@ -101,7 +102,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
|
|
|
101
102
|
for the minimum Airflow version supported) via
|
|
102
103
|
``pip install apache-airflow-providers-openlineage``
|
|
103
104
|
|
|
104
|
-
The package supports the following python versions: 3.8,3.9,3.10,3.11
|
|
105
|
+
The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
|
|
105
106
|
|
|
106
107
|
Requirements
|
|
107
108
|
------------
|
|
@@ -136,4 +137,4 @@ Dependent package
|
|
|
136
137
|
============================================================================================================ ==============
|
|
137
138
|
|
|
138
139
|
The changelog for the provider package can be found in the
|
|
139
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
140
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/changelog.html>`_.
|
{apache_airflow_providers_openlineage-1.6.0 → apache_airflow_providers_openlineage-1.7.0}/README.rst
RENAMED
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
|
|
43
43
|
Package ``apache-airflow-providers-openlineage``
|
|
44
44
|
|
|
45
|
-
Release: ``1.
|
|
45
|
+
Release: ``1.7.0``
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -55,7 +55,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
55
55
|
are in ``airflow.providers.openlineage`` python package.
|
|
56
56
|
|
|
57
57
|
You can find package information and changelog for the provider
|
|
58
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
58
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/>`_.
|
|
59
59
|
|
|
60
60
|
Installation
|
|
61
61
|
------------
|
|
@@ -64,7 +64,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
|
|
|
64
64
|
for the minimum Airflow version supported) via
|
|
65
65
|
``pip install apache-airflow-providers-openlineage``
|
|
66
66
|
|
|
67
|
-
The package supports the following python versions: 3.8,3.9,3.10,3.11
|
|
67
|
+
The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
|
|
68
68
|
|
|
69
69
|
Requirements
|
|
70
70
|
------------
|
|
@@ -99,4 +99,4 @@ Dependent package
|
|
|
99
99
|
============================================================================================================ ==============
|
|
100
100
|
|
|
101
101
|
The changelog for the provider package can be found in the
|
|
102
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
102
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/changelog.html>`_.
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from airflow.compat.functools import cache
|
|
24
|
+
from airflow.configuration import conf
|
|
25
|
+
|
|
26
|
+
_CONFIG_SECTION = "openlineage"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@cache
|
|
30
|
+
def config_path(check_legacy_env_var: bool = True) -> str:
|
|
31
|
+
"""[openlineage] config_path."""
|
|
32
|
+
option = conf.get(_CONFIG_SECTION, "config_path", fallback="")
|
|
33
|
+
if check_legacy_env_var and not option:
|
|
34
|
+
option = os.getenv("OPENLINEAGE_CONFIG", "")
|
|
35
|
+
return option
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@cache
|
|
39
|
+
def is_source_enabled() -> bool:
|
|
40
|
+
"""[openlineage] disable_source_code."""
|
|
41
|
+
option = conf.get(_CONFIG_SECTION, "disable_source_code", fallback="")
|
|
42
|
+
if not option:
|
|
43
|
+
option = os.getenv("OPENLINEAGE_AIRFLOW_DISABLE_SOURCE_CODE", "")
|
|
44
|
+
return option.lower() not in ("true", "1", "t")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@cache
|
|
48
|
+
def disabled_operators() -> set[str]:
|
|
49
|
+
"""[openlineage] disabled_for_operators."""
|
|
50
|
+
option = conf.get(_CONFIG_SECTION, "disabled_for_operators", fallback="")
|
|
51
|
+
return set(operator.strip() for operator in option.split(";") if operator.strip())
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@cache
|
|
55
|
+
def selective_enable() -> bool:
|
|
56
|
+
return conf.getboolean(_CONFIG_SECTION, "selective_enable", fallback=False)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@cache
|
|
60
|
+
def custom_extractors() -> set[str]:
|
|
61
|
+
"""[openlineage] extractors."""
|
|
62
|
+
option = conf.get(_CONFIG_SECTION, "extractors", fallback="")
|
|
63
|
+
if not option:
|
|
64
|
+
option = os.getenv("OPENLINEAGE_EXTRACTORS", "")
|
|
65
|
+
return set(extractor.strip() for extractor in option.split(";") if extractor.strip())
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@cache
|
|
69
|
+
def namespace() -> str:
|
|
70
|
+
"""[openlineage] namespace."""
|
|
71
|
+
option = conf.get(_CONFIG_SECTION, "namespace", fallback="")
|
|
72
|
+
if not option:
|
|
73
|
+
option = os.getenv("OPENLINEAGE_NAMESPACE", "default")
|
|
74
|
+
return option
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@cache
|
|
78
|
+
def transport() -> dict[str, Any]:
|
|
79
|
+
"""[openlineage] transport."""
|
|
80
|
+
option = conf.getjson(_CONFIG_SECTION, "transport", fallback={})
|
|
81
|
+
if not isinstance(option, dict):
|
|
82
|
+
raise ValueError(f"OpenLineage transport `{option}` is not a dict")
|
|
83
|
+
return option
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@cache
|
|
87
|
+
def is_disabled() -> bool:
|
|
88
|
+
"""[openlineage] disabled + some extra checks."""
|
|
89
|
+
|
|
90
|
+
def _is_true(val):
|
|
91
|
+
return str(val).lower().strip() in ("true", "1", "t")
|
|
92
|
+
|
|
93
|
+
option = conf.get(_CONFIG_SECTION, "disabled", fallback="")
|
|
94
|
+
if _is_true(option):
|
|
95
|
+
return True
|
|
96
|
+
|
|
97
|
+
option = os.getenv("OPENLINEAGE_DISABLED", "")
|
|
98
|
+
if _is_true(option):
|
|
99
|
+
return True
|
|
100
|
+
|
|
101
|
+
# Check if both 'transport' and 'config_path' are not present and also
|
|
102
|
+
# if legacy 'OPENLINEAGE_URL' environment variables is not set
|
|
103
|
+
return transport() == {} and config_path(True) == "" and os.getenv("OPENLINEAGE_URL", "") == ""
|
|
@@ -18,12 +18,10 @@
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
20
|
from abc import ABC, abstractmethod
|
|
21
|
-
from functools import cached_property
|
|
22
21
|
from typing import TYPE_CHECKING
|
|
23
22
|
|
|
24
23
|
from attrs import Factory, define
|
|
25
24
|
|
|
26
|
-
from airflow.configuration import conf
|
|
27
25
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
28
26
|
from airflow.utils.state import TaskInstanceState
|
|
29
27
|
|
|
@@ -64,33 +62,10 @@ class BaseExtractor(ABC, LoggingMixin):
|
|
|
64
62
|
"""
|
|
65
63
|
raise NotImplementedError()
|
|
66
64
|
|
|
67
|
-
@cached_property
|
|
68
|
-
def disabled_operators(self) -> set[str]:
|
|
69
|
-
return set(
|
|
70
|
-
operator.strip() for operator in conf.get("openlineage", "disabled_for_operators").split(";")
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
@cached_property
|
|
74
|
-
def _is_operator_disabled(self) -> bool:
|
|
75
|
-
fully_qualified_class_name = (
|
|
76
|
-
self.operator.__class__.__module__ + "." + self.operator.__class__.__name__
|
|
77
|
-
)
|
|
78
|
-
return fully_qualified_class_name in self.disabled_operators
|
|
79
|
-
|
|
80
|
-
def validate(self):
|
|
81
|
-
assert self.operator.task_type in self.get_operator_classnames()
|
|
82
|
-
|
|
83
65
|
@abstractmethod
|
|
84
|
-
def _execute_extraction(self) -> OperatorLineage | None:
|
|
85
|
-
...
|
|
66
|
+
def _execute_extraction(self) -> OperatorLineage | None: ...
|
|
86
67
|
|
|
87
68
|
def extract(self) -> OperatorLineage | None:
|
|
88
|
-
if self._is_operator_disabled:
|
|
89
|
-
self.log.debug(
|
|
90
|
-
f"Skipping extraction for operator {self.operator.task_type} "
|
|
91
|
-
"due to its presence in [openlineage] openlineage_disabled_for_operators."
|
|
92
|
-
)
|
|
93
|
-
return None
|
|
94
69
|
return self._execute_extraction()
|
|
95
70
|
|
|
96
71
|
def extract_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
@@ -121,18 +96,12 @@ class DefaultExtractor(BaseExtractor):
|
|
|
121
96
|
return None
|
|
122
97
|
except AttributeError:
|
|
123
98
|
self.log.debug(
|
|
124
|
-
|
|
125
|
-
|
|
99
|
+
"Operator %s does not have the get_openlineage_facets_on_start method.",
|
|
100
|
+
self.operator.task_type,
|
|
126
101
|
)
|
|
127
102
|
return None
|
|
128
103
|
|
|
129
104
|
def extract_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
130
|
-
if self._is_operator_disabled:
|
|
131
|
-
self.log.debug(
|
|
132
|
-
f"Skipping extraction for operator {self.operator.task_type} "
|
|
133
|
-
"due to its presence in [openlineage] openlineage_disabled_for_operators."
|
|
134
|
-
)
|
|
135
|
-
return None
|
|
136
105
|
if task_instance.state == TaskInstanceState.FAILED:
|
|
137
106
|
on_failed = getattr(self.operator, "get_openlineage_facets_on_failure", None)
|
|
138
107
|
if on_failed and callable(on_failed):
|
|
@@ -19,15 +19,9 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
from openlineage.client.facet import SourceCodeJobFacet
|
|
21
21
|
|
|
22
|
+
from airflow.providers.openlineage import conf
|
|
22
23
|
from airflow.providers.openlineage.extractors.base import BaseExtractor, OperatorLineage
|
|
23
|
-
from airflow.providers.openlineage.
|
|
24
|
-
UnknownOperatorAttributeRunFacet,
|
|
25
|
-
UnknownOperatorInstance,
|
|
26
|
-
)
|
|
27
|
-
from airflow.providers.openlineage.utils.utils import (
|
|
28
|
-
get_filtered_unknown_operator_keys,
|
|
29
|
-
is_source_enabled,
|
|
30
|
-
)
|
|
24
|
+
from airflow.providers.openlineage.utils.utils import get_unknown_source_attribute_run_facet
|
|
31
25
|
|
|
32
26
|
"""
|
|
33
27
|
:meta private:
|
|
@@ -51,7 +45,7 @@ class BashExtractor(BaseExtractor):
|
|
|
51
45
|
|
|
52
46
|
def _execute_extraction(self) -> OperatorLineage | None:
|
|
53
47
|
job_facets: dict = {}
|
|
54
|
-
if is_source_enabled():
|
|
48
|
+
if conf.is_source_enabled():
|
|
55
49
|
job_facets = {
|
|
56
50
|
"sourceCode": SourceCodeJobFacet(
|
|
57
51
|
language="bash",
|
|
@@ -62,19 +56,9 @@ class BashExtractor(BaseExtractor):
|
|
|
62
56
|
|
|
63
57
|
return OperatorLineage(
|
|
64
58
|
job_facets=job_facets,
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
# directly.
|
|
69
|
-
"unknownSourceAttribute": UnknownOperatorAttributeRunFacet(
|
|
70
|
-
unknownItems=[
|
|
71
|
-
UnknownOperatorInstance(
|
|
72
|
-
name="BashOperator",
|
|
73
|
-
properties=get_filtered_unknown_operator_keys(self.operator),
|
|
74
|
-
)
|
|
75
|
-
]
|
|
76
|
-
)
|
|
77
|
-
},
|
|
59
|
+
# The BashOperator is recorded as an "unknownSource" even though we have an extractor,
|
|
60
|
+
# as the <i>data lineage</i> cannot be determined from the operator directly.
|
|
61
|
+
run_facets=get_unknown_source_attribute_run_facet(task=self.operator, name="BashOperator"),
|
|
78
62
|
)
|
|
79
63
|
|
|
80
64
|
def extract(self) -> OperatorLineage | None:
|
|
@@ -16,20 +16,15 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
-
import os
|
|
20
19
|
from contextlib import suppress
|
|
21
20
|
from typing import TYPE_CHECKING, Iterator
|
|
22
21
|
|
|
23
|
-
from airflow.
|
|
22
|
+
from airflow.providers.openlineage import conf
|
|
24
23
|
from airflow.providers.openlineage.extractors import BaseExtractor, OperatorLineage
|
|
25
24
|
from airflow.providers.openlineage.extractors.base import DefaultExtractor
|
|
26
25
|
from airflow.providers.openlineage.extractors.bash import BashExtractor
|
|
27
26
|
from airflow.providers.openlineage.extractors.python import PythonExtractor
|
|
28
|
-
from airflow.providers.openlineage.
|
|
29
|
-
UnknownOperatorAttributeRunFacet,
|
|
30
|
-
UnknownOperatorInstance,
|
|
31
|
-
)
|
|
32
|
-
from airflow.providers.openlineage.utils.utils import get_filtered_unknown_operator_keys
|
|
27
|
+
from airflow.providers.openlineage.utils.utils import get_unknown_source_attribute_run_facet
|
|
33
28
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
34
29
|
from airflow.utils.module_loading import import_string
|
|
35
30
|
|
|
@@ -65,22 +60,17 @@ class ExtractorManager(LoggingMixin):
|
|
|
65
60
|
for operator_class in extractor.get_operator_classnames():
|
|
66
61
|
self.extractors[operator_class] = extractor
|
|
67
62
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
operator_class,
|
|
80
|
-
extractor,
|
|
81
|
-
self.extractors[operator_class],
|
|
82
|
-
)
|
|
83
|
-
self.extractors[operator_class] = extractor
|
|
63
|
+
for extractor_path in conf.custom_extractors():
|
|
64
|
+
extractor: type[BaseExtractor] = try_import_from_string(extractor_path)
|
|
65
|
+
for operator_class in extractor.get_operator_classnames():
|
|
66
|
+
if operator_class in self.extractors:
|
|
67
|
+
self.log.debug(
|
|
68
|
+
"Duplicate extractor found for `%s`. `%s` will be used instead of `%s`",
|
|
69
|
+
operator_class,
|
|
70
|
+
extractor_path,
|
|
71
|
+
self.extractors[operator_class],
|
|
72
|
+
)
|
|
73
|
+
self.extractors[operator_class] = extractor
|
|
84
74
|
|
|
85
75
|
def add_extractor(self, operator_class: str, extractor: type[BaseExtractor]):
|
|
86
76
|
self.extractors[operator_class] = extractor
|
|
@@ -121,16 +111,7 @@ class ExtractorManager(LoggingMixin):
|
|
|
121
111
|
|
|
122
112
|
# Only include the unkonwnSourceAttribute facet if there is no extractor
|
|
123
113
|
task_metadata = OperatorLineage(
|
|
124
|
-
run_facets=
|
|
125
|
-
"unknownSourceAttribute": UnknownOperatorAttributeRunFacet(
|
|
126
|
-
unknownItems=[
|
|
127
|
-
UnknownOperatorInstance(
|
|
128
|
-
name=task.task_type,
|
|
129
|
-
properties=get_filtered_unknown_operator_keys(task),
|
|
130
|
-
)
|
|
131
|
-
]
|
|
132
|
-
)
|
|
133
|
-
},
|
|
114
|
+
run_facets=get_unknown_source_attribute_run_facet(task=task),
|
|
134
115
|
)
|
|
135
116
|
inlets = task.get_inlet_defs()
|
|
136
117
|
outlets = task.get_outlet_defs()
|
|
@@ -22,15 +22,9 @@ from typing import Callable
|
|
|
22
22
|
|
|
23
23
|
from openlineage.client.facet import SourceCodeJobFacet
|
|
24
24
|
|
|
25
|
+
from airflow.providers.openlineage import conf
|
|
25
26
|
from airflow.providers.openlineage.extractors.base import BaseExtractor, OperatorLineage
|
|
26
|
-
from airflow.providers.openlineage.
|
|
27
|
-
UnknownOperatorAttributeRunFacet,
|
|
28
|
-
UnknownOperatorInstance,
|
|
29
|
-
)
|
|
30
|
-
from airflow.providers.openlineage.utils.utils import (
|
|
31
|
-
get_filtered_unknown_operator_keys,
|
|
32
|
-
is_source_enabled,
|
|
33
|
-
)
|
|
27
|
+
from airflow.providers.openlineage.utils.utils import get_unknown_source_attribute_run_facet
|
|
34
28
|
|
|
35
29
|
"""
|
|
36
30
|
:meta private:
|
|
@@ -55,7 +49,7 @@ class PythonExtractor(BaseExtractor):
|
|
|
55
49
|
def _execute_extraction(self) -> OperatorLineage | None:
|
|
56
50
|
source_code = self.get_source_code(self.operator.python_callable)
|
|
57
51
|
job_facet: dict = {}
|
|
58
|
-
if is_source_enabled() and source_code:
|
|
52
|
+
if conf.is_source_enabled() and source_code:
|
|
59
53
|
job_facet = {
|
|
60
54
|
"sourceCode": SourceCodeJobFacet(
|
|
61
55
|
language="python",
|
|
@@ -65,19 +59,9 @@ class PythonExtractor(BaseExtractor):
|
|
|
65
59
|
}
|
|
66
60
|
return OperatorLineage(
|
|
67
61
|
job_facets=job_facet,
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
# directly.
|
|
72
|
-
"unknownSourceAttribute": UnknownOperatorAttributeRunFacet(
|
|
73
|
-
unknownItems=[
|
|
74
|
-
UnknownOperatorInstance(
|
|
75
|
-
name="PythonOperator",
|
|
76
|
-
properties=get_filtered_unknown_operator_keys(self.operator),
|
|
77
|
-
)
|
|
78
|
-
]
|
|
79
|
-
)
|
|
80
|
-
},
|
|
62
|
+
# The PythonOperator is recorded as an "unknownSource" even though we have an extractor,
|
|
63
|
+
# as the <i>data lineage</i> cannot be determined from the operator directly.
|
|
64
|
+
run_facets=get_unknown_source_attribute_run_facet(task=self.operator, name="PythonOperator"),
|
|
81
65
|
)
|
|
82
66
|
|
|
83
67
|
def get_source_code(self, callable: Callable) -> str | None:
|
|
@@ -28,8 +28,9 @@ def get_provider_info():
|
|
|
28
28
|
"name": "OpenLineage Airflow",
|
|
29
29
|
"description": "`OpenLineage <https://openlineage.io/>`__\n",
|
|
30
30
|
"state": "ready",
|
|
31
|
-
"source-date-epoch":
|
|
31
|
+
"source-date-epoch": 1712666247,
|
|
32
32
|
"versions": [
|
|
33
|
+
"1.7.0",
|
|
33
34
|
"1.6.0",
|
|
34
35
|
"1.5.0",
|
|
35
36
|
"1.4.0",
|
|
@@ -82,6 +83,13 @@ def get_provider_info():
|
|
|
82
83
|
"default": "",
|
|
83
84
|
"version_added": "1.1.0",
|
|
84
85
|
},
|
|
86
|
+
"selective_enable": {
|
|
87
|
+
"description": "If this setting is enabled, OpenLineage integration won't collect and emit metadata,\nunless you explicitly enable it per `DAG` or `Task` using `enable_lineage` method.\n",
|
|
88
|
+
"type": "boolean",
|
|
89
|
+
"default": "False",
|
|
90
|
+
"example": None,
|
|
91
|
+
"version_added": "1.7.0",
|
|
92
|
+
},
|
|
85
93
|
"namespace": {
|
|
86
94
|
"description": "Set namespace that the lineage data belongs to, so that if you use multiple OpenLineage producers,\nevents coming from them will be logically separated.\n",
|
|
87
95
|
"version_added": None,
|
|
@@ -16,7 +16,6 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
-
import os
|
|
20
19
|
import uuid
|
|
21
20
|
from contextlib import ExitStack
|
|
22
21
|
from typing import TYPE_CHECKING
|
|
@@ -37,8 +36,7 @@ from openlineage.client.facet import (
|
|
|
37
36
|
)
|
|
38
37
|
from openlineage.client.run import Job, Run, RunEvent, RunState
|
|
39
38
|
|
|
40
|
-
from airflow.
|
|
41
|
-
from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VERSION
|
|
39
|
+
from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VERSION, conf
|
|
42
40
|
from airflow.providers.openlineage.utils.utils import OpenLineageRedactor
|
|
43
41
|
from airflow.stats import Stats
|
|
44
42
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
@@ -48,12 +46,6 @@ if TYPE_CHECKING:
|
|
|
48
46
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
49
47
|
from airflow.utils.log.secrets_masker import SecretsMasker
|
|
50
48
|
|
|
51
|
-
_DAG_DEFAULT_NAMESPACE = "default"
|
|
52
|
-
|
|
53
|
-
_DAG_NAMESPACE = conf.get(
|
|
54
|
-
"openlineage", "namespace", fallback=os.getenv("OPENLINEAGE_NAMESPACE", _DAG_DEFAULT_NAMESPACE)
|
|
55
|
-
)
|
|
56
|
-
|
|
57
49
|
_PRODUCER = f"https://github.com/apache/airflow/tree/providers-openlineage/{OPENLINEAGE_PROVIDER_VERSION}"
|
|
58
50
|
|
|
59
51
|
set_producer(_PRODUCER)
|
|
@@ -88,18 +80,16 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
88
80
|
|
|
89
81
|
def get_openlineage_config(self) -> dict | None:
|
|
90
82
|
# First, try to read from YAML file
|
|
91
|
-
openlineage_config_path = conf.
|
|
83
|
+
openlineage_config_path = conf.config_path(check_legacy_env_var=False)
|
|
92
84
|
if openlineage_config_path:
|
|
93
85
|
config = self._read_yaml_config(openlineage_config_path)
|
|
94
86
|
if config:
|
|
95
87
|
return config.get("transport", None)
|
|
96
88
|
# Second, try to get transport config
|
|
97
|
-
|
|
98
|
-
if not
|
|
89
|
+
transport_config = conf.transport()
|
|
90
|
+
if not transport_config:
|
|
99
91
|
return None
|
|
100
|
-
|
|
101
|
-
raise ValueError(f"{transport} is not a dict")
|
|
102
|
-
return transport
|
|
92
|
+
return transport_config
|
|
103
93
|
|
|
104
94
|
def _read_yaml_config(self, path: str) -> dict | None:
|
|
105
95
|
with open(path) as config_file:
|
|
@@ -107,14 +97,14 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
107
97
|
|
|
108
98
|
@staticmethod
|
|
109
99
|
def build_dag_run_id(dag_id, dag_run_id):
|
|
110
|
-
return str(uuid.uuid3(uuid.NAMESPACE_URL, f"{
|
|
100
|
+
return str(uuid.uuid3(uuid.NAMESPACE_URL, f"{conf.namespace()}.{dag_id}.{dag_run_id}"))
|
|
111
101
|
|
|
112
102
|
@staticmethod
|
|
113
103
|
def build_task_instance_run_id(dag_id, task_id, execution_date, try_number):
|
|
114
104
|
return str(
|
|
115
105
|
uuid.uuid3(
|
|
116
106
|
uuid.NAMESPACE_URL,
|
|
117
|
-
f"{
|
|
107
|
+
f"{conf.namespace()}.{dag_id}.{task_id}.{execution_date}.{try_number}",
|
|
118
108
|
)
|
|
119
109
|
)
|
|
120
110
|
|
|
@@ -353,7 +343,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
353
343
|
if parent_run_id:
|
|
354
344
|
parent_run_facet = ParentRunFacet.create(
|
|
355
345
|
runId=parent_run_id,
|
|
356
|
-
namespace=
|
|
346
|
+
namespace=conf.namespace(),
|
|
357
347
|
name=parent_job_name or job_name,
|
|
358
348
|
)
|
|
359
349
|
facets.update(
|
|
@@ -396,4 +386,4 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
396
386
|
|
|
397
387
|
facets.update({"jobType": job_type})
|
|
398
388
|
|
|
399
|
-
return Job(
|
|
389
|
+
return Job(conf.namespace(), job_name, facets)
|
|
@@ -17,10 +17,17 @@
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
19
|
from attrs import define
|
|
20
|
+
from deprecated import deprecated
|
|
20
21
|
from openlineage.client.facet import BaseFacet
|
|
21
22
|
from openlineage.client.utils import RedactMixin
|
|
22
23
|
|
|
24
|
+
from airflow.exceptions import AirflowProviderDeprecationWarning
|
|
23
25
|
|
|
26
|
+
|
|
27
|
+
@deprecated(
|
|
28
|
+
reason="To be removed in the next release. Make sure to use information from AirflowRunFacet instead.",
|
|
29
|
+
category=AirflowProviderDeprecationWarning,
|
|
30
|
+
)
|
|
24
31
|
@define(slots=False)
|
|
25
32
|
class AirflowMappedTaskRunFacet(BaseFacet):
|
|
26
33
|
"""Run facet containing information about mapped tasks."""
|
|
@@ -66,6 +73,10 @@ class UnknownOperatorInstance(RedactMixin):
|
|
|
66
73
|
_skip_redact = ["name", "type"]
|
|
67
74
|
|
|
68
75
|
|
|
76
|
+
@deprecated(
|
|
77
|
+
reason="To be removed in the next release. Make sure to use information from AirflowRunFacet instead.",
|
|
78
|
+
category=AirflowProviderDeprecationWarning,
|
|
79
|
+
)
|
|
69
80
|
@define(slots=False)
|
|
70
81
|
class UnknownOperatorAttributeRunFacet(BaseFacet):
|
|
71
82
|
"""RunFacet that describes unknown operators in an Airflow DAG."""
|
|
@@ -30,6 +30,8 @@ from airflow.providers.openlineage.utils.utils import (
|
|
|
30
30
|
get_airflow_run_facet,
|
|
31
31
|
get_custom_facets,
|
|
32
32
|
get_job_name,
|
|
33
|
+
is_operator_disabled,
|
|
34
|
+
is_selective_lineage_enabled,
|
|
33
35
|
print_warning,
|
|
34
36
|
)
|
|
35
37
|
from airflow.stats import Stats
|
|
@@ -59,17 +61,31 @@ class OpenLineageListener:
|
|
|
59
61
|
task_instance: TaskInstance,
|
|
60
62
|
session: Session, # This will always be QUEUED
|
|
61
63
|
):
|
|
62
|
-
if not
|
|
64
|
+
if not getattr(task_instance, "task", None) is not None:
|
|
63
65
|
self.log.warning(
|
|
64
|
-
|
|
65
|
-
|
|
66
|
+
"No task set for TI object task_id: %s - dag_id: %s - run_id %s",
|
|
67
|
+
task_instance.task_id,
|
|
68
|
+
task_instance.dag_id,
|
|
69
|
+
task_instance.run_id,
|
|
66
70
|
)
|
|
67
71
|
return
|
|
68
72
|
|
|
69
73
|
self.log.debug("OpenLineage listener got notification about task instance start")
|
|
70
74
|
dagrun = task_instance.dag_run
|
|
71
75
|
task = task_instance.task
|
|
76
|
+
if TYPE_CHECKING:
|
|
77
|
+
assert task
|
|
72
78
|
dag = task.dag
|
|
79
|
+
if is_operator_disabled(task):
|
|
80
|
+
self.log.debug(
|
|
81
|
+
"Skipping OpenLineage event emission for operator %s "
|
|
82
|
+
"due to its presence in [openlineage] disabled_for_operators.",
|
|
83
|
+
task.task_type,
|
|
84
|
+
)
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
if not is_selective_lineage_enabled(task):
|
|
88
|
+
return
|
|
73
89
|
|
|
74
90
|
@print_warning(self.log)
|
|
75
91
|
def on_running():
|
|
@@ -127,7 +143,19 @@ class OpenLineageListener:
|
|
|
127
143
|
|
|
128
144
|
dagrun = task_instance.dag_run
|
|
129
145
|
task = task_instance.task
|
|
146
|
+
if TYPE_CHECKING:
|
|
147
|
+
assert task
|
|
130
148
|
dag = task.dag
|
|
149
|
+
if is_operator_disabled(task):
|
|
150
|
+
self.log.debug(
|
|
151
|
+
"Skipping OpenLineage event emission for operator %s "
|
|
152
|
+
"due to its presence in [openlineage] disabled_for_operators.",
|
|
153
|
+
task.task_type,
|
|
154
|
+
)
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
if not is_selective_lineage_enabled(task):
|
|
158
|
+
return
|
|
131
159
|
|
|
132
160
|
@print_warning(self.log)
|
|
133
161
|
def on_success():
|
|
@@ -170,7 +198,19 @@ class OpenLineageListener:
|
|
|
170
198
|
|
|
171
199
|
dagrun = task_instance.dag_run
|
|
172
200
|
task = task_instance.task
|
|
201
|
+
if TYPE_CHECKING:
|
|
202
|
+
assert task
|
|
173
203
|
dag = task.dag
|
|
204
|
+
if is_operator_disabled(task):
|
|
205
|
+
self.log.debug(
|
|
206
|
+
"Skipping OpenLineage event emission for operator %s "
|
|
207
|
+
"due to its presence in [openlineage] disabled_for_operators.",
|
|
208
|
+
task.task_type,
|
|
209
|
+
)
|
|
210
|
+
return None
|
|
211
|
+
|
|
212
|
+
if not is_selective_lineage_enabled(task):
|
|
213
|
+
return
|
|
174
214
|
|
|
175
215
|
@print_warning(self.log)
|
|
176
216
|
def on_failure():
|
|
@@ -220,12 +260,13 @@ class OpenLineageListener:
|
|
|
220
260
|
@hookimpl
|
|
221
261
|
def before_stopping(self, component):
|
|
222
262
|
self.log.debug("before_stopping: %s", component.__class__.__name__)
|
|
223
|
-
# TODO: configure this with Airflow config
|
|
224
263
|
with timeout(30):
|
|
225
264
|
self.executor.shutdown(wait=True)
|
|
226
265
|
|
|
227
266
|
@hookimpl
|
|
228
267
|
def on_dag_run_running(self, dag_run: DagRun, msg: str):
|
|
268
|
+
if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
|
|
269
|
+
return
|
|
229
270
|
data_interval_start = dag_run.data_interval_start.isoformat() if dag_run.data_interval_start else None
|
|
230
271
|
data_interval_end = dag_run.data_interval_end.isoformat() if dag_run.data_interval_end else None
|
|
231
272
|
self.executor.submit(
|
|
@@ -238,6 +279,8 @@ class OpenLineageListener:
|
|
|
238
279
|
|
|
239
280
|
@hookimpl
|
|
240
281
|
def on_dag_run_success(self, dag_run: DagRun, msg: str):
|
|
282
|
+
if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
|
|
283
|
+
return
|
|
241
284
|
if not self.executor:
|
|
242
285
|
self.log.debug("Executor have not started before `on_dag_run_success`")
|
|
243
286
|
return
|
|
@@ -245,6 +288,8 @@ class OpenLineageListener:
|
|
|
245
288
|
|
|
246
289
|
@hookimpl
|
|
247
290
|
def on_dag_run_failed(self, dag_run: DagRun, msg: str):
|
|
291
|
+
if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
|
|
292
|
+
return
|
|
248
293
|
if not self.executor:
|
|
249
294
|
self.log.debug("Executor have not started before `on_dag_run_failed`")
|
|
250
295
|
return
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from typing import TYPE_CHECKING
|
|
20
|
+
|
|
21
|
+
from airflow.providers.openlineage import conf
|
|
22
|
+
from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter
|
|
23
|
+
from airflow.providers.openlineage.utils.utils import get_job_name
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from airflow.models import TaskInstance
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def lineage_job_namespace():
|
|
30
|
+
"""
|
|
31
|
+
Macro function which returns Airflow OpenLineage namespace.
|
|
32
|
+
|
|
33
|
+
.. seealso::
|
|
34
|
+
For more information take a look at the guide:
|
|
35
|
+
:ref:`howto/macros:openlineage`
|
|
36
|
+
"""
|
|
37
|
+
return conf.namespace()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def lineage_job_name(task_instance: TaskInstance):
|
|
41
|
+
"""
|
|
42
|
+
Macro function which returns Airflow task name in OpenLineage format (`<dag_id>.<task_id>`).
|
|
43
|
+
|
|
44
|
+
.. seealso::
|
|
45
|
+
For more information take a look at the guide:
|
|
46
|
+
:ref:`howto/macros:openlineage`
|
|
47
|
+
"""
|
|
48
|
+
return get_job_name(task_instance)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def lineage_run_id(task_instance: TaskInstance):
|
|
52
|
+
"""
|
|
53
|
+
Macro function which returns the generated run id (UUID) for a given task.
|
|
54
|
+
|
|
55
|
+
This can be used to forward the run id from a task to a child run so the job hierarchy is preserved.
|
|
56
|
+
|
|
57
|
+
.. seealso::
|
|
58
|
+
For more information take a look at the guide:
|
|
59
|
+
:ref:`howto/macros:openlineage`
|
|
60
|
+
"""
|
|
61
|
+
return OpenLineageAdapter.build_task_instance_run_id(
|
|
62
|
+
dag_id=task_instance.dag_id,
|
|
63
|
+
task_id=task_instance.task_id,
|
|
64
|
+
execution_date=task_instance.execution_date,
|
|
65
|
+
try_number=task_instance.try_number,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def lineage_parent_id(task_instance: TaskInstance):
|
|
70
|
+
"""
|
|
71
|
+
Macro function which returns a unique identifier of given task that can be used to create ParentRunFacet.
|
|
72
|
+
|
|
73
|
+
This identifier is composed of the namespace, job name, and generated run id for given task, structured
|
|
74
|
+
as '{namespace}/{job_name}/{run_id}'. This can be used to forward task information from a task to a child
|
|
75
|
+
run so the job hierarchy is preserved. Child run can easily create ParentRunFacet from these information.
|
|
76
|
+
|
|
77
|
+
.. seealso::
|
|
78
|
+
For more information take a look at the guide:
|
|
79
|
+
:ref:`howto/macros:openlineage`
|
|
80
|
+
"""
|
|
81
|
+
return "/".join(
|
|
82
|
+
(
|
|
83
|
+
lineage_job_namespace(),
|
|
84
|
+
lineage_job_name(task_instance),
|
|
85
|
+
lineage_run_id(task_instance),
|
|
86
|
+
)
|
|
87
|
+
)
|
|
@@ -16,25 +16,15 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
-
import os
|
|
20
|
-
|
|
21
|
-
from airflow.configuration import conf
|
|
22
19
|
from airflow.plugins_manager import AirflowPlugin
|
|
20
|
+
from airflow.providers.openlineage import conf
|
|
23
21
|
from airflow.providers.openlineage.plugins.listener import get_openlineage_listener
|
|
24
|
-
from airflow.providers.openlineage.plugins.macros import
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
or os.getenv("OPENLINEAGE_DISABLED", "false").lower() == "true"
|
|
31
|
-
or (
|
|
32
|
-
conf.get("openlineage", "transport", fallback="") == ""
|
|
33
|
-
and conf.get("openlineage", "config_path", fallback="") == ""
|
|
34
|
-
and os.getenv("OPENLINEAGE_URL", "") == ""
|
|
35
|
-
and os.getenv("OPENLINEAGE_CONFIG", "") == ""
|
|
36
|
-
)
|
|
37
|
-
)
|
|
22
|
+
from airflow.providers.openlineage.plugins.macros import (
|
|
23
|
+
lineage_job_name,
|
|
24
|
+
lineage_job_namespace,
|
|
25
|
+
lineage_parent_id,
|
|
26
|
+
lineage_run_id,
|
|
27
|
+
)
|
|
38
28
|
|
|
39
29
|
|
|
40
30
|
class OpenLineageProviderPlugin(AirflowPlugin):
|
|
@@ -46,6 +36,6 @@ class OpenLineageProviderPlugin(AirflowPlugin):
|
|
|
46
36
|
"""
|
|
47
37
|
|
|
48
38
|
name = "OpenLineageProviderPlugin"
|
|
49
|
-
if not
|
|
50
|
-
macros = [lineage_run_id, lineage_parent_id]
|
|
39
|
+
if not conf.is_disabled():
|
|
40
|
+
macros = [lineage_job_namespace, lineage_job_name, lineage_run_id, lineage_parent_id]
|
|
51
41
|
listeners = [get_openlineage_listener()]
|
apache_airflow_providers_openlineage-1.7.0/airflow/providers/openlineage/utils/selective_enable.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
from typing import TypeVar
|
|
22
|
+
|
|
23
|
+
from airflow.models import DAG, Operator, Param
|
|
24
|
+
from airflow.models.xcom_arg import XComArg
|
|
25
|
+
|
|
26
|
+
ENABLE_OL_PARAM_NAME = "_selective_enable_ol"
|
|
27
|
+
ENABLE_OL_PARAM = Param(True, const=True)
|
|
28
|
+
DISABLE_OL_PARAM = Param(False, const=False)
|
|
29
|
+
T = TypeVar("T", bound="DAG | Operator")
|
|
30
|
+
|
|
31
|
+
log = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def enable_lineage(obj: T) -> T:
|
|
35
|
+
"""Set selective enable OpenLineage parameter to True.
|
|
36
|
+
|
|
37
|
+
The method also propagates param to tasks if the object is DAG.
|
|
38
|
+
"""
|
|
39
|
+
if isinstance(obj, XComArg):
|
|
40
|
+
enable_lineage(obj.operator)
|
|
41
|
+
return obj
|
|
42
|
+
# propagate param to tasks
|
|
43
|
+
if isinstance(obj, DAG):
|
|
44
|
+
for task in obj.task_dict.values():
|
|
45
|
+
enable_lineage(task)
|
|
46
|
+
obj.params[ENABLE_OL_PARAM_NAME] = ENABLE_OL_PARAM
|
|
47
|
+
return obj
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def disable_lineage(obj: T) -> T:
|
|
51
|
+
"""Set selective enable OpenLineage parameter to False.
|
|
52
|
+
|
|
53
|
+
The method also propagates param to tasks if the object is DAG.
|
|
54
|
+
"""
|
|
55
|
+
if isinstance(obj, XComArg):
|
|
56
|
+
disable_lineage(obj.operator)
|
|
57
|
+
return obj
|
|
58
|
+
# propagate param to tasks
|
|
59
|
+
if isinstance(obj, DAG):
|
|
60
|
+
for task in obj.task_dict.values():
|
|
61
|
+
disable_lineage(task)
|
|
62
|
+
obj.params[ENABLE_OL_PARAM_NAME] = DISABLE_OL_PARAM
|
|
63
|
+
return obj
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def is_task_lineage_enabled(task: Operator) -> bool:
|
|
67
|
+
"""Check if selective enable OpenLineage parameter is set to True on task level."""
|
|
68
|
+
if task.params.get(ENABLE_OL_PARAM_NAME) is False:
|
|
69
|
+
log.debug(
|
|
70
|
+
"OpenLineage event emission suppressed. Task for this functionality is selectively disabled."
|
|
71
|
+
)
|
|
72
|
+
return task.params.get(ENABLE_OL_PARAM_NAME) is True
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def is_dag_lineage_enabled(dag: DAG) -> bool:
|
|
76
|
+
"""Check if DAG is selectively enabled to emit OpenLineage events.
|
|
77
|
+
|
|
78
|
+
The method also checks if selective enable parameter is set to True
|
|
79
|
+
or if any of the tasks in DAG is selectively enabled.
|
|
80
|
+
"""
|
|
81
|
+
if dag.params.get(ENABLE_OL_PARAM_NAME) is False:
|
|
82
|
+
log.debug(
|
|
83
|
+
"OpenLineage event emission suppressed. DAG for this functionality is selectively disabled."
|
|
84
|
+
)
|
|
85
|
+
return dag.params.get(ENABLE_OL_PARAM_NAME) is True or any(
|
|
86
|
+
is_task_lineage_enabled(task) for task in dag.tasks
|
|
87
|
+
)
|
|
@@ -20,124 +20,43 @@ from __future__ import annotations
|
|
|
20
20
|
import datetime
|
|
21
21
|
import json
|
|
22
22
|
import logging
|
|
23
|
-
import os
|
|
24
23
|
from contextlib import suppress
|
|
25
24
|
from functools import wraps
|
|
26
25
|
from typing import TYPE_CHECKING, Any, Iterable
|
|
27
|
-
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
|
|
28
26
|
|
|
29
27
|
import attrs
|
|
30
|
-
from
|
|
28
|
+
from openlineage.client.utils import RedactMixin # TODO: move this maybe to Airflow's logic?
|
|
31
29
|
|
|
32
|
-
|
|
33
|
-
from
|
|
34
|
-
|
|
35
|
-
from airflow.compat.functools import cache
|
|
36
|
-
from airflow.configuration import conf
|
|
30
|
+
from airflow.models import DAG, BaseOperator, MappedOperator
|
|
31
|
+
from airflow.providers.openlineage import conf
|
|
37
32
|
from airflow.providers.openlineage.plugins.facets import (
|
|
38
33
|
AirflowMappedTaskRunFacet,
|
|
39
34
|
AirflowRunFacet,
|
|
35
|
+
UnknownOperatorAttributeRunFacet,
|
|
36
|
+
UnknownOperatorInstance,
|
|
37
|
+
)
|
|
38
|
+
from airflow.providers.openlineage.utils.selective_enable import (
|
|
39
|
+
is_dag_lineage_enabled,
|
|
40
|
+
is_task_lineage_enabled,
|
|
40
41
|
)
|
|
41
42
|
from airflow.utils.context import AirflowContextDeprecationWarning
|
|
42
43
|
from airflow.utils.log.secrets_masker import Redactable, Redacted, SecretsMasker, should_hide_value_for_key
|
|
43
44
|
|
|
44
45
|
if TYPE_CHECKING:
|
|
45
|
-
from airflow.models import
|
|
46
|
+
from airflow.models import DagRun, TaskInstance
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
log = logging.getLogger(__name__)
|
|
49
50
|
_NOMINAL_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
|
|
50
51
|
|
|
51
52
|
|
|
52
|
-
def openlineage_job_name(dag_id: str, task_id: str) -> str:
|
|
53
|
-
return f"{dag_id}.{task_id}"
|
|
54
|
-
|
|
55
|
-
|
|
56
53
|
def get_operator_class(task: BaseOperator) -> type:
|
|
57
54
|
if task.__class__.__name__ in ("DecoratedMappedOperator", "MappedOperator"):
|
|
58
55
|
return task.operator_class
|
|
59
56
|
return task.__class__
|
|
60
57
|
|
|
61
58
|
|
|
62
|
-
def
|
|
63
|
-
def _task_encoder(obj):
|
|
64
|
-
from airflow.models import DAG
|
|
65
|
-
|
|
66
|
-
if isinstance(obj, datetime.datetime):
|
|
67
|
-
return obj.isoformat()
|
|
68
|
-
elif isinstance(obj, DAG):
|
|
69
|
-
return {
|
|
70
|
-
"dag_id": obj.dag_id,
|
|
71
|
-
"tags": obj.tags,
|
|
72
|
-
"schedule_interval": obj.schedule_interval,
|
|
73
|
-
"timetable": obj.timetable.serialize(),
|
|
74
|
-
}
|
|
75
|
-
else:
|
|
76
|
-
return str(obj)
|
|
77
|
-
|
|
78
|
-
return json.loads(json.dumps(task.__dict__, default=_task_encoder))
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def url_to_https(url) -> str | None:
|
|
82
|
-
# Ensure URL exists
|
|
83
|
-
if not url:
|
|
84
|
-
return None
|
|
85
|
-
|
|
86
|
-
base_url = None
|
|
87
|
-
if url.startswith("git@"):
|
|
88
|
-
part = url.split("git@")[1:2]
|
|
89
|
-
if part:
|
|
90
|
-
base_url = f'https://{part[0].replace(":", "/", 1)}'
|
|
91
|
-
elif url.startswith("https://"):
|
|
92
|
-
base_url = url
|
|
93
|
-
|
|
94
|
-
if not base_url:
|
|
95
|
-
raise ValueError(f"Unable to extract location from: {url}")
|
|
96
|
-
|
|
97
|
-
if base_url.endswith(".git"):
|
|
98
|
-
base_url = base_url[:-4]
|
|
99
|
-
return base_url
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def redacted_connection_uri(conn: Connection, filtered_params=None, filtered_prefixes=None):
|
|
103
|
-
"""
|
|
104
|
-
Return the connection URI for the given Connection.
|
|
105
|
-
|
|
106
|
-
This method additionally filters URI by removing query parameters that are known to carry sensitive data
|
|
107
|
-
like username, password, access key.
|
|
108
|
-
"""
|
|
109
|
-
if filtered_prefixes is None:
|
|
110
|
-
filtered_prefixes = []
|
|
111
|
-
if filtered_params is None:
|
|
112
|
-
filtered_params = []
|
|
113
|
-
|
|
114
|
-
def filter_key_params(k: str):
|
|
115
|
-
return k not in filtered_params and any(substr in k for substr in filtered_prefixes)
|
|
116
|
-
|
|
117
|
-
conn_uri = conn.get_uri()
|
|
118
|
-
parsed = urlparse(conn_uri)
|
|
119
|
-
|
|
120
|
-
# Remove username and password
|
|
121
|
-
netloc = f"{parsed.hostname}" + (f":{parsed.port}" if parsed.port else "")
|
|
122
|
-
parsed = parsed._replace(netloc=netloc)
|
|
123
|
-
if parsed.query:
|
|
124
|
-
query_dict = dict(parse_qsl(parsed.query))
|
|
125
|
-
if conn.EXTRA_KEY in query_dict:
|
|
126
|
-
query_dict = json.loads(query_dict[conn.EXTRA_KEY])
|
|
127
|
-
filtered_qs = {k: v for k, v in query_dict.items() if not filter_key_params(k)}
|
|
128
|
-
parsed = parsed._replace(query=urlencode(filtered_qs))
|
|
129
|
-
return urlunparse(parsed)
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
def get_connection(conn_id) -> Connection | None:
|
|
133
|
-
from airflow.hooks.base import BaseHook
|
|
134
|
-
|
|
135
|
-
with suppress(Exception):
|
|
136
|
-
return BaseHook.get_connection(conn_id=conn_id)
|
|
137
|
-
return None
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def get_job_name(task):
|
|
59
|
+
def get_job_name(task: TaskInstance) -> str:
|
|
141
60
|
return f"{task.dag_id}.{task.task_id}"
|
|
142
61
|
|
|
143
62
|
|
|
@@ -150,6 +69,26 @@ def get_custom_facets(task_instance: TaskInstance | None = None) -> dict[str, An
|
|
|
150
69
|
return custom_facets
|
|
151
70
|
|
|
152
71
|
|
|
72
|
+
def get_fully_qualified_class_name(operator: BaseOperator | MappedOperator) -> str:
|
|
73
|
+
return operator.__class__.__module__ + "." + operator.__class__.__name__
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def is_operator_disabled(operator: BaseOperator | MappedOperator) -> bool:
|
|
77
|
+
return get_fully_qualified_class_name(operator) in conf.disabled_operators()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def is_selective_lineage_enabled(obj: DAG | BaseOperator | MappedOperator) -> bool:
|
|
81
|
+
"""If selective enable is active check if DAG or Task is enabled to emit events."""
|
|
82
|
+
if not conf.selective_enable():
|
|
83
|
+
return True
|
|
84
|
+
if isinstance(obj, DAG):
|
|
85
|
+
return is_dag_lineage_enabled(obj)
|
|
86
|
+
elif isinstance(obj, (BaseOperator, MappedOperator)):
|
|
87
|
+
return is_task_lineage_enabled(obj)
|
|
88
|
+
else:
|
|
89
|
+
raise TypeError("is_selective_lineage_enabled can only be used on DAG or Operator objects")
|
|
90
|
+
|
|
91
|
+
|
|
153
92
|
class InfoJsonEncodable(dict):
|
|
154
93
|
"""
|
|
155
94
|
Airflow objects might not be json-encodable overall.
|
|
@@ -205,7 +144,7 @@ class InfoJsonEncodable(dict):
|
|
|
205
144
|
|
|
206
145
|
def _include_fields(self):
|
|
207
146
|
if self.includes and self.excludes:
|
|
208
|
-
raise
|
|
147
|
+
raise ValueError("Don't use both includes and excludes.")
|
|
209
148
|
if self.includes:
|
|
210
149
|
for field in self.includes:
|
|
211
150
|
if field not in self._fields and hasattr(self.obj, field):
|
|
@@ -256,23 +195,34 @@ class TaskInfo(InfoJsonEncodable):
|
|
|
256
195
|
"""Defines encoding BaseOperator/AbstractOperator object to JSON."""
|
|
257
196
|
|
|
258
197
|
renames = {
|
|
259
|
-
"_BaseOperator__init_kwargs": "args",
|
|
260
198
|
"_BaseOperator__from_mapped": "mapped",
|
|
261
199
|
"_downstream_task_ids": "downstream_task_ids",
|
|
262
200
|
"_upstream_task_ids": "upstream_task_ids",
|
|
201
|
+
"_is_setup": "is_setup",
|
|
202
|
+
"_is_teardown": "is_teardown",
|
|
263
203
|
}
|
|
264
|
-
|
|
265
|
-
"
|
|
266
|
-
"
|
|
267
|
-
"
|
|
268
|
-
"
|
|
269
|
-
"
|
|
270
|
-
"
|
|
271
|
-
"
|
|
272
|
-
"
|
|
273
|
-
"
|
|
274
|
-
"
|
|
275
|
-
"
|
|
204
|
+
includes = [
|
|
205
|
+
"depends_on_past",
|
|
206
|
+
"downstream_task_ids",
|
|
207
|
+
"execution_timeout",
|
|
208
|
+
"executor_config",
|
|
209
|
+
"ignore_first_depends_on_past",
|
|
210
|
+
"max_active_tis_per_dag",
|
|
211
|
+
"max_active_tis_per_dagrun",
|
|
212
|
+
"max_retry_delay",
|
|
213
|
+
"multiple_outputs",
|
|
214
|
+
"owner",
|
|
215
|
+
"priority_weight",
|
|
216
|
+
"queue",
|
|
217
|
+
"retries",
|
|
218
|
+
"retry_exponential_backoff",
|
|
219
|
+
"run_as_user",
|
|
220
|
+
"task_id",
|
|
221
|
+
"trigger_rule",
|
|
222
|
+
"upstream_task_ids",
|
|
223
|
+
"wait_for_downstream",
|
|
224
|
+
"wait_for_past_depends_before_skipping",
|
|
225
|
+
"weight_rule",
|
|
276
226
|
]
|
|
277
227
|
casts = {
|
|
278
228
|
"operator_class": lambda task: task.task_type,
|
|
@@ -306,18 +256,30 @@ def get_airflow_run_facet(
|
|
|
306
256
|
task_uuid: str,
|
|
307
257
|
):
|
|
308
258
|
return {
|
|
309
|
-
"airflow":
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
259
|
+
"airflow": attrs.asdict(
|
|
260
|
+
AirflowRunFacet(
|
|
261
|
+
dag=DagInfo(dag),
|
|
262
|
+
dagRun=DagRunInfo(dag_run),
|
|
263
|
+
taskInstance=TaskInstanceInfo(task_instance),
|
|
264
|
+
task=TaskInfo(task),
|
|
265
|
+
taskUuid=task_uuid,
|
|
266
|
+
)
|
|
267
|
+
)
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def get_unknown_source_attribute_run_facet(task: BaseOperator, name: str | None = None):
|
|
272
|
+
if not name:
|
|
273
|
+
name = get_operator_class(task).__name__
|
|
274
|
+
return {
|
|
275
|
+
"unknownSourceAttribute": attrs.asdict(
|
|
276
|
+
UnknownOperatorAttributeRunFacet(
|
|
277
|
+
unknownItems=[
|
|
278
|
+
UnknownOperatorInstance(
|
|
279
|
+
name=name,
|
|
280
|
+
properties=TaskInfo(task),
|
|
318
281
|
)
|
|
319
|
-
|
|
320
|
-
default=str,
|
|
282
|
+
]
|
|
321
283
|
)
|
|
322
284
|
)
|
|
323
285
|
}
|
|
@@ -412,14 +374,6 @@ def print_warning(log):
|
|
|
412
374
|
return decorator
|
|
413
375
|
|
|
414
376
|
|
|
415
|
-
@cache
|
|
416
|
-
def is_source_enabled() -> bool:
|
|
417
|
-
source_var = conf.get(
|
|
418
|
-
"openlineage", "disable_source_code", fallback=os.getenv("OPENLINEAGE_AIRFLOW_DISABLE_SOURCE_CODE")
|
|
419
|
-
)
|
|
420
|
-
return isinstance(source_var, str) and source_var.lower() not in ("true", "1", "t")
|
|
421
|
-
|
|
422
|
-
|
|
423
377
|
def get_filtered_unknown_operator_keys(operator: BaseOperator) -> dict:
|
|
424
378
|
not_required_keys = {"dag", "task_group"}
|
|
425
379
|
return {attr: value for attr, value in operator.__dict__.items() if attr not in not_required_keys}
|
|
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
|
|
|
28
28
|
|
|
29
29
|
[project]
|
|
30
30
|
name = "apache-airflow-providers-openlineage"
|
|
31
|
-
version = "1.
|
|
31
|
+
version = "1.7.0"
|
|
32
32
|
description = "Provider package apache-airflow-providers-openlineage for Apache Airflow"
|
|
33
33
|
readme = "README.rst"
|
|
34
34
|
authors = [
|
|
@@ -51,6 +51,7 @@ classifiers = [
|
|
|
51
51
|
"Programming Language :: Python :: 3.9",
|
|
52
52
|
"Programming Language :: Python :: 3.10",
|
|
53
53
|
"Programming Language :: Python :: 3.11",
|
|
54
|
+
"Programming Language :: Python :: 3.12",
|
|
54
55
|
"Topic :: System :: Monitoring",
|
|
55
56
|
]
|
|
56
57
|
requires-python = "~=3.8"
|
|
@@ -63,8 +64,8 @@ dependencies = [
|
|
|
63
64
|
]
|
|
64
65
|
|
|
65
66
|
[project.urls]
|
|
66
|
-
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
67
|
-
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
67
|
+
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0"
|
|
68
|
+
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/changelog.html"
|
|
68
69
|
"Bug Tracker" = "https://github.com/apache/airflow/issues"
|
|
69
70
|
"Source Code" = "https://github.com/apache/airflow"
|
|
70
71
|
"Slack Chat" = "https://s.apache.org/airflow-slack"
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
|
3
|
-
# distributed with this work for additional information
|
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
|
6
|
-
# "License"); you may not use this file except in compliance
|
|
7
|
-
# with the License. You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
|
12
|
-
# software distributed under the License is distributed on an
|
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
-
# KIND, either express or implied. See the License for the
|
|
15
|
-
# specific language governing permissions and limitations
|
|
16
|
-
# under the License.
|
|
17
|
-
from __future__ import annotations
|
|
18
|
-
|
|
19
|
-
import os
|
|
20
|
-
import typing
|
|
21
|
-
|
|
22
|
-
from airflow.configuration import conf
|
|
23
|
-
from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter
|
|
24
|
-
|
|
25
|
-
if typing.TYPE_CHECKING:
|
|
26
|
-
from airflow.models import TaskInstance
|
|
27
|
-
|
|
28
|
-
_JOB_NAMESPACE = conf.get("openlineage", "namespace", fallback=os.getenv("OPENLINEAGE_NAMESPACE", "default"))
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def lineage_run_id(task_instance: TaskInstance):
|
|
32
|
-
"""
|
|
33
|
-
Macro function which returns the generated run id for a given task.
|
|
34
|
-
|
|
35
|
-
This can be used to forward the run id from a task to a child run so the job hierarchy is preserved.
|
|
36
|
-
|
|
37
|
-
.. seealso::
|
|
38
|
-
For more information on how to use this operator, take a look at the guide:
|
|
39
|
-
:ref:`howto/macros:openlineage`
|
|
40
|
-
"""
|
|
41
|
-
return OpenLineageAdapter.build_task_instance_run_id(
|
|
42
|
-
dag_id=task_instance.dag_id,
|
|
43
|
-
task_id=task_instance.task.task_id,
|
|
44
|
-
execution_date=task_instance.execution_date,
|
|
45
|
-
try_number=task_instance.try_number,
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def lineage_parent_id(run_id: str, task_instance: TaskInstance):
|
|
50
|
-
"""
|
|
51
|
-
Macro function which returns the generated job and run id for a given task.
|
|
52
|
-
|
|
53
|
-
This can be used to forward the ids from a task to a child run so the job
|
|
54
|
-
hierarchy is preserved. Child run can create ParentRunFacet from those ids.
|
|
55
|
-
|
|
56
|
-
.. seealso::
|
|
57
|
-
For more information on how to use this macro, take a look at the guide:
|
|
58
|
-
:ref:`howto/macros:openlineage`
|
|
59
|
-
"""
|
|
60
|
-
job_name = OpenLineageAdapter.build_task_instance_run_id(
|
|
61
|
-
dag_id=task_instance.dag_id,
|
|
62
|
-
task_id=task_instance.task.task_id,
|
|
63
|
-
execution_date=task_instance.execution_date,
|
|
64
|
-
try_number=task_instance.try_number,
|
|
65
|
-
)
|
|
66
|
-
return f"{_JOB_NAMESPACE}/{job_name}/{run_id}"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|