apache-airflow-providers-openlineage 2.7.1rc1__py3-none-any.whl → 2.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/openlineage/__init__.py +1 -1
- airflow/providers/openlineage/extractors/manager.py +3 -3
- airflow/providers/openlineage/operators/empty.py +2 -2
- airflow/providers/openlineage/plugins/facets.py +1 -1
- airflow/providers/openlineage/plugins/listener.py +37 -11
- airflow/providers/openlineage/plugins/macros.py +45 -27
- airflow/providers/openlineage/plugins/openlineage.py +16 -12
- airflow/providers/openlineage/sqlparser.py +24 -12
- airflow/providers/openlineage/utils/selective_enable.py +2 -7
- airflow/providers/openlineage/utils/spark.py +1 -1
- airflow/providers/openlineage/utils/sql.py +9 -9
- airflow/providers/openlineage/utils/utils.py +210 -54
- airflow/providers/openlineage/version_compat.py +2 -13
- {apache_airflow_providers_openlineage-2.7.1rc1.dist-info → apache_airflow_providers_openlineage-2.8.0.dist-info}/METADATA +17 -15
- apache_airflow_providers_openlineage-2.8.0.dist-info/RECORD +35 -0
- apache_airflow_providers_openlineage-2.8.0.dist-info/licenses/NOTICE +5 -0
- apache_airflow_providers_openlineage-2.7.1rc1.dist-info/RECORD +0 -34
- {apache_airflow_providers_openlineage-2.7.1rc1.dist-info → apache_airflow_providers_openlineage-2.8.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_openlineage-2.7.1rc1.dist-info → apache_airflow_providers_openlineage-2.8.0.dist-info}/entry_points.txt +0 -0
- {airflow/providers/openlineage → apache_airflow_providers_openlineage-2.8.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "2.
|
|
32
|
+
__version__ = "2.8.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.10.0"
|
|
@@ -41,8 +41,8 @@ from airflow.utils.state import TaskInstanceState
|
|
|
41
41
|
if TYPE_CHECKING:
|
|
42
42
|
from openlineage.client.event_v2 import Dataset
|
|
43
43
|
|
|
44
|
-
from airflow.models import Operator
|
|
45
44
|
from airflow.providers.common.compat.lineage.entities import Table
|
|
45
|
+
from airflow.providers.common.compat.sdk import BaseOperator
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
def _iter_extractor_types() -> Iterator[type[BaseExtractor]]:
|
|
@@ -161,7 +161,7 @@ class ExtractorManager(LoggingMixin):
|
|
|
161
161
|
|
|
162
162
|
return OperatorLineage()
|
|
163
163
|
|
|
164
|
-
def get_extractor_class(self, task:
|
|
164
|
+
def get_extractor_class(self, task: BaseOperator) -> type[BaseExtractor] | None:
|
|
165
165
|
if task.task_type in self.extractors:
|
|
166
166
|
return self.extractors[task.task_type]
|
|
167
167
|
|
|
@@ -172,7 +172,7 @@ class ExtractorManager(LoggingMixin):
|
|
|
172
172
|
return self.default_extractor
|
|
173
173
|
return None
|
|
174
174
|
|
|
175
|
-
def _get_extractor(self, task:
|
|
175
|
+
def _get_extractor(self, task: BaseOperator) -> BaseExtractor | None:
|
|
176
176
|
# TODO: Re-enable in Extractor PR
|
|
177
177
|
# self.instantiate_abstract_extractors(task)
|
|
178
178
|
extractor = self.get_extractor_class(task)
|
|
@@ -18,11 +18,11 @@ from __future__ import annotations
|
|
|
18
18
|
|
|
19
19
|
from typing import TYPE_CHECKING
|
|
20
20
|
|
|
21
|
+
from airflow.providers.common.compat.sdk import BaseOperator
|
|
21
22
|
from airflow.providers.openlineage.extractors.base import OperatorLineage
|
|
22
|
-
from airflow.providers.openlineage.version_compat import BaseOperator
|
|
23
23
|
|
|
24
24
|
if TYPE_CHECKING:
|
|
25
|
-
from airflow.
|
|
25
|
+
from airflow.providers.common.compat.sdk import Context
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class EmptyOperator(BaseOperator):
|
|
@@ -29,6 +29,7 @@ from openlineage.client.serde import Serde
|
|
|
29
29
|
from airflow import settings
|
|
30
30
|
from airflow.listeners import hookimpl
|
|
31
31
|
from airflow.models import DagRun, TaskInstance
|
|
32
|
+
from airflow.providers.common.compat.sdk import timeout, timezone
|
|
32
33
|
from airflow.providers.openlineage import conf
|
|
33
34
|
from airflow.providers.openlineage.extractors import ExtractorManager, OperatorLineage
|
|
34
35
|
from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter, RunState
|
|
@@ -40,7 +41,9 @@ from airflow.providers.openlineage.utils.utils import (
|
|
|
40
41
|
get_airflow_mapped_task_facet,
|
|
41
42
|
get_airflow_run_facet,
|
|
42
43
|
get_dag_documentation,
|
|
44
|
+
get_dag_parent_run_facet,
|
|
43
45
|
get_job_name,
|
|
46
|
+
get_root_information_from_dagrun_conf,
|
|
44
47
|
get_task_documentation,
|
|
45
48
|
get_task_parent_run_facet,
|
|
46
49
|
get_user_provided_run_facets,
|
|
@@ -48,14 +51,14 @@ from airflow.providers.openlineage.utils.utils import (
|
|
|
48
51
|
is_selective_lineage_enabled,
|
|
49
52
|
print_warning,
|
|
50
53
|
)
|
|
51
|
-
from airflow.providers.openlineage.version_compat import timeout, timezone
|
|
52
54
|
from airflow.settings import configure_orm
|
|
53
55
|
from airflow.stats import Stats
|
|
54
56
|
from airflow.utils.state import TaskInstanceState
|
|
55
57
|
|
|
56
58
|
if TYPE_CHECKING:
|
|
59
|
+
from sqlalchemy.orm import Session
|
|
60
|
+
|
|
57
61
|
from airflow.sdk.execution_time.task_runner import RuntimeTaskInstance
|
|
58
|
-
from airflow.settings import Session
|
|
59
62
|
|
|
60
63
|
if sys.platform == "darwin":
|
|
61
64
|
from setproctitle import getproctitle
|
|
@@ -223,7 +226,11 @@ class OpenLineageListener:
|
|
|
223
226
|
task=task_metadata,
|
|
224
227
|
run_facets={
|
|
225
228
|
**get_user_provided_run_facets(task_instance, TaskInstanceState.RUNNING),
|
|
226
|
-
**get_task_parent_run_facet(
|
|
229
|
+
**get_task_parent_run_facet(
|
|
230
|
+
parent_run_id=parent_run_id,
|
|
231
|
+
parent_job_name=dag.dag_id,
|
|
232
|
+
**get_root_information_from_dagrun_conf(getattr(dagrun, "conf", {})),
|
|
233
|
+
),
|
|
227
234
|
**get_airflow_mapped_task_facet(task_instance),
|
|
228
235
|
**get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
|
|
229
236
|
**debug_facet,
|
|
@@ -350,7 +357,11 @@ class OpenLineageListener:
|
|
|
350
357
|
nominal_end_time=data_interval_end,
|
|
351
358
|
run_facets={
|
|
352
359
|
**get_user_provided_run_facets(task_instance, TaskInstanceState.SUCCESS),
|
|
353
|
-
**get_task_parent_run_facet(
|
|
360
|
+
**get_task_parent_run_facet(
|
|
361
|
+
parent_run_id=parent_run_id,
|
|
362
|
+
parent_job_name=dag.dag_id,
|
|
363
|
+
**get_root_information_from_dagrun_conf(getattr(dagrun, "conf", {})),
|
|
364
|
+
),
|
|
354
365
|
**get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
|
|
355
366
|
**get_airflow_debug_facet(),
|
|
356
367
|
},
|
|
@@ -488,7 +499,11 @@ class OpenLineageListener:
|
|
|
488
499
|
job_description_type=doc_type,
|
|
489
500
|
run_facets={
|
|
490
501
|
**get_user_provided_run_facets(task_instance, TaskInstanceState.FAILED),
|
|
491
|
-
**get_task_parent_run_facet(
|
|
502
|
+
**get_task_parent_run_facet(
|
|
503
|
+
parent_run_id=parent_run_id,
|
|
504
|
+
parent_job_name=dag.dag_id,
|
|
505
|
+
**get_root_information_from_dagrun_conf(getattr(dagrun, "conf", {})),
|
|
506
|
+
),
|
|
492
507
|
**get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
|
|
493
508
|
**get_airflow_debug_facet(),
|
|
494
509
|
},
|
|
@@ -539,7 +554,11 @@ class OpenLineageListener:
|
|
|
539
554
|
"job_description": None,
|
|
540
555
|
"job_description_type": None,
|
|
541
556
|
"run_facets": {
|
|
542
|
-
**get_task_parent_run_facet(
|
|
557
|
+
**get_task_parent_run_facet(
|
|
558
|
+
parent_run_id=parent_run_id,
|
|
559
|
+
parent_job_name=ti.dag_id,
|
|
560
|
+
**get_root_information_from_dagrun_conf(getattr(dagrun, "conf", {})),
|
|
561
|
+
),
|
|
543
562
|
**get_airflow_debug_facet(),
|
|
544
563
|
},
|
|
545
564
|
}
|
|
@@ -644,8 +663,6 @@ class OpenLineageListener:
|
|
|
644
663
|
)
|
|
645
664
|
data_interval_end = dag_run.data_interval_end.isoformat() if dag_run.data_interval_end else None
|
|
646
665
|
|
|
647
|
-
run_facets = {**get_airflow_dag_run_facet(dag_run)}
|
|
648
|
-
|
|
649
666
|
date = dag_run.logical_date
|
|
650
667
|
if AIRFLOW_V_3_0_PLUS and date is None:
|
|
651
668
|
date = dag_run.run_after
|
|
@@ -659,7 +676,6 @@ class OpenLineageListener:
|
|
|
659
676
|
start_date=dag_run.start_date,
|
|
660
677
|
nominal_start_time=data_interval_start,
|
|
661
678
|
nominal_end_time=data_interval_end,
|
|
662
|
-
run_facets=run_facets,
|
|
663
679
|
clear_number=dag_run.clear_number,
|
|
664
680
|
owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None,
|
|
665
681
|
job_description=doc,
|
|
@@ -668,6 +684,10 @@ class OpenLineageListener:
|
|
|
668
684
|
# AirflowJobFacet should be created outside ProcessPoolExecutor that pickles objects,
|
|
669
685
|
# as it causes lack of some TaskGroup attributes and crashes event emission.
|
|
670
686
|
job_facets=get_airflow_job_facet(dag_run=dag_run),
|
|
687
|
+
run_facets={
|
|
688
|
+
**get_airflow_dag_run_facet(dag_run),
|
|
689
|
+
**get_dag_parent_run_facet(getattr(dag_run, "conf", {})),
|
|
690
|
+
},
|
|
671
691
|
)
|
|
672
692
|
except BaseException as e:
|
|
673
693
|
self.log.warning("OpenLineage received exception in method on_dag_run_running", exc_info=e)
|
|
@@ -715,7 +735,10 @@ class OpenLineageListener:
|
|
|
715
735
|
job_description_type=doc_type,
|
|
716
736
|
task_ids=task_ids,
|
|
717
737
|
dag_run_state=dag_run.get_state(),
|
|
718
|
-
run_facets={
|
|
738
|
+
run_facets={
|
|
739
|
+
**get_airflow_dag_run_facet(dag_run),
|
|
740
|
+
**get_dag_parent_run_facet(getattr(dag_run, "conf", {})),
|
|
741
|
+
},
|
|
719
742
|
)
|
|
720
743
|
except BaseException as e:
|
|
721
744
|
self.log.warning("OpenLineage received exception in method on_dag_run_success", exc_info=e)
|
|
@@ -764,7 +787,10 @@ class OpenLineageListener:
|
|
|
764
787
|
dag_run_state=dag_run.get_state(),
|
|
765
788
|
task_ids=task_ids,
|
|
766
789
|
msg=msg,
|
|
767
|
-
run_facets={
|
|
790
|
+
run_facets={
|
|
791
|
+
**get_airflow_dag_run_facet(dag_run),
|
|
792
|
+
**get_dag_parent_run_facet(getattr(dag_run, "conf", {})),
|
|
793
|
+
},
|
|
768
794
|
)
|
|
769
795
|
except BaseException as e:
|
|
770
796
|
self.log.warning("OpenLineage received exception in method on_dag_run_failed", exc_info=e)
|
|
@@ -20,11 +20,11 @@ from typing import TYPE_CHECKING
|
|
|
20
20
|
|
|
21
21
|
from airflow.providers.openlineage import conf
|
|
22
22
|
from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter
|
|
23
|
-
from airflow.providers.openlineage.utils.utils import get_job_name
|
|
23
|
+
from airflow.providers.openlineage.utils.utils import get_job_name, get_root_information_from_dagrun_conf
|
|
24
24
|
from airflow.providers.openlineage.version_compat import AIRFLOW_V_3_0_PLUS
|
|
25
25
|
|
|
26
26
|
if TYPE_CHECKING:
|
|
27
|
-
from airflow.
|
|
27
|
+
from airflow.providers.common.compat.sdk import TaskInstance
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def lineage_job_namespace():
|
|
@@ -102,7 +102,7 @@ def lineage_root_parent_id(task_instance: TaskInstance):
|
|
|
102
102
|
"""
|
|
103
103
|
return "/".join(
|
|
104
104
|
(
|
|
105
|
-
|
|
105
|
+
lineage_root_job_namespace(task_instance),
|
|
106
106
|
lineage_root_job_name(task_instance),
|
|
107
107
|
lineage_root_run_id(task_instance),
|
|
108
108
|
)
|
|
@@ -110,10 +110,16 @@ def lineage_root_parent_id(task_instance: TaskInstance):
|
|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
def lineage_root_job_name(task_instance: TaskInstance):
|
|
113
|
+
root_parent_job_name = _get_ol_root_id("root_parent_job_name", task_instance)
|
|
114
|
+
if root_parent_job_name:
|
|
115
|
+
return root_parent_job_name
|
|
113
116
|
return task_instance.dag_id
|
|
114
117
|
|
|
115
118
|
|
|
116
119
|
def lineage_root_run_id(task_instance: TaskInstance):
|
|
120
|
+
root_parent_run_id = _get_ol_root_id("root_parent_run_id", task_instance)
|
|
121
|
+
if root_parent_run_id:
|
|
122
|
+
return root_parent_run_id
|
|
117
123
|
return OpenLineageAdapter.build_dag_run_id(
|
|
118
124
|
dag_id=task_instance.dag_id,
|
|
119
125
|
logical_date=_get_logical_date(task_instance),
|
|
@@ -121,32 +127,44 @@ def lineage_root_run_id(task_instance: TaskInstance):
|
|
|
121
127
|
)
|
|
122
128
|
|
|
123
129
|
|
|
130
|
+
def lineage_root_job_namespace(task_instance: TaskInstance):
|
|
131
|
+
root_parent_job_namespace = _get_ol_root_id("root_parent_job_namespace", task_instance)
|
|
132
|
+
if root_parent_job_namespace:
|
|
133
|
+
return root_parent_job_namespace
|
|
134
|
+
return conf.namespace()
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _get_ol_root_id(id_key: str, task_instance: TaskInstance) -> str | None:
|
|
138
|
+
dr_conf = _get_dag_run_conf(task_instance=task_instance)
|
|
139
|
+
ol_root_info = get_root_information_from_dagrun_conf(dr_conf=dr_conf)
|
|
140
|
+
if ol_root_info and ol_root_info.get(id_key):
|
|
141
|
+
return ol_root_info[id_key]
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _get_dagrun_from_ti(task_instance: TaskInstance):
|
|
146
|
+
context = task_instance.get_template_context()
|
|
147
|
+
if getattr(task_instance, "dag_run", None):
|
|
148
|
+
return task_instance.dag_run
|
|
149
|
+
return context["dag_run"]
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _get_dag_run_conf(task_instance: TaskInstance) -> dict:
|
|
153
|
+
dr = _get_dagrun_from_ti(task_instance=task_instance)
|
|
154
|
+
return dr.conf or {}
|
|
155
|
+
|
|
156
|
+
|
|
124
157
|
def _get_dag_run_clear_number(task_instance: TaskInstance):
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
context = task_instance.get_template_context()
|
|
128
|
-
if hasattr(task_instance, "dag_run"):
|
|
129
|
-
dag_run = task_instance.dag_run
|
|
130
|
-
else:
|
|
131
|
-
dag_run = context["dag_run"]
|
|
132
|
-
return dag_run.clear_number
|
|
133
|
-
return task_instance.dag_run.clear_number
|
|
158
|
+
dr = _get_dagrun_from_ti(task_instance=task_instance)
|
|
159
|
+
return dr.clear_number
|
|
134
160
|
|
|
135
161
|
|
|
136
162
|
def _get_logical_date(task_instance):
|
|
137
|
-
# todo: remove when min airflow version >= 3.0
|
|
138
163
|
if AIRFLOW_V_3_0_PLUS:
|
|
139
|
-
|
|
140
|
-
if
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
else:
|
|
147
|
-
date = dag_run.run_after
|
|
148
|
-
elif hasattr(task_instance, "logical_date"):
|
|
149
|
-
date = task_instance.logical_date
|
|
150
|
-
else:
|
|
151
|
-
date = task_instance.execution_date
|
|
152
|
-
return date
|
|
164
|
+
dr = _get_dagrun_from_ti(task_instance=task_instance)
|
|
165
|
+
if getattr(dr, "logical_date", None):
|
|
166
|
+
return dr.logical_date
|
|
167
|
+
return dr.run_after
|
|
168
|
+
if getattr(task_instance, "logical_date", None):
|
|
169
|
+
return task_instance.logical_date
|
|
170
|
+
return task_instance.execution_date
|
|
@@ -18,16 +18,21 @@ from __future__ import annotations
|
|
|
18
18
|
|
|
19
19
|
from airflow.plugins_manager import AirflowPlugin
|
|
20
20
|
from airflow.providers.openlineage import conf
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
21
|
+
|
|
22
|
+
# Conditional imports - only load expensive dependencies when plugin is enabled
|
|
23
|
+
if not conf.is_disabled():
|
|
24
|
+
from airflow.lineage.hook import HookLineageReader
|
|
25
|
+
from airflow.providers.openlineage.plugins.listener import get_openlineage_listener
|
|
26
|
+
from airflow.providers.openlineage.plugins.macros import (
|
|
27
|
+
lineage_job_name,
|
|
28
|
+
lineage_job_namespace,
|
|
29
|
+
lineage_parent_id,
|
|
30
|
+
lineage_root_job_name,
|
|
31
|
+
lineage_root_job_namespace,
|
|
32
|
+
lineage_root_parent_id,
|
|
33
|
+
lineage_root_run_id,
|
|
34
|
+
lineage_run_id,
|
|
35
|
+
)
|
|
31
36
|
|
|
32
37
|
|
|
33
38
|
class OpenLineageProviderPlugin(AirflowPlugin):
|
|
@@ -47,11 +52,10 @@ class OpenLineageProviderPlugin(AirflowPlugin):
|
|
|
47
52
|
lineage_parent_id,
|
|
48
53
|
lineage_root_run_id,
|
|
49
54
|
lineage_root_job_name,
|
|
55
|
+
lineage_root_job_namespace,
|
|
50
56
|
lineage_root_parent_id,
|
|
51
57
|
]
|
|
52
58
|
listeners = [get_openlineage_listener()]
|
|
53
|
-
from airflow.lineage.hook import HookLineageReader
|
|
54
|
-
|
|
55
59
|
hook_lineage_readers = [HookLineageReader]
|
|
56
60
|
else:
|
|
57
61
|
macros = []
|
|
@@ -39,8 +39,8 @@ if TYPE_CHECKING:
|
|
|
39
39
|
from openlineage.client.facet_v2 import JobFacet, RunFacet
|
|
40
40
|
from sqlalchemy.engine import Engine
|
|
41
41
|
|
|
42
|
+
from airflow.providers.common.compat.sdk import BaseHook
|
|
42
43
|
from airflow.providers.common.sql.hooks.sql import DbApiHook
|
|
43
|
-
from airflow.sdk import BaseHook
|
|
44
44
|
|
|
45
45
|
log = logging.getLogger(__name__)
|
|
46
46
|
|
|
@@ -232,8 +232,8 @@ class SQLParser(LoggingMixin):
|
|
|
232
232
|
else None,
|
|
233
233
|
)
|
|
234
234
|
|
|
235
|
+
@staticmethod
|
|
235
236
|
def get_metadata_from_parser(
|
|
236
|
-
self,
|
|
237
237
|
inputs: list[DbTableMeta],
|
|
238
238
|
outputs: list[DbTableMeta],
|
|
239
239
|
database_info: DatabaseInfo,
|
|
@@ -315,6 +315,7 @@ class SQLParser(LoggingMixin):
|
|
|
315
315
|
:param database_info: database specific information
|
|
316
316
|
:param database: when passed it takes precedence over parsed database name
|
|
317
317
|
:param sqlalchemy_engine: when passed, engine's dialect is used to compile SQL queries
|
|
318
|
+
:param use_connection: if call to db should be performed to enrich datasets (e.g., with schema)
|
|
318
319
|
"""
|
|
319
320
|
job_facets: dict[str, JobFacet] = {"sql": sql_job.SQLJobFacet(query=self.normalize_sql(sql))}
|
|
320
321
|
parse_result = self.parse(sql=self.split_sql_string(sql))
|
|
@@ -338,17 +339,28 @@ class SQLParser(LoggingMixin):
|
|
|
338
339
|
)
|
|
339
340
|
|
|
340
341
|
namespace = self.create_namespace(database_info=database_info)
|
|
342
|
+
inputs: list[Dataset] = []
|
|
343
|
+
outputs: list[Dataset] = []
|
|
341
344
|
if use_connection:
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
345
|
+
try:
|
|
346
|
+
inputs, outputs = self.parse_table_schemas(
|
|
347
|
+
hook=hook,
|
|
348
|
+
inputs=parse_result.in_tables,
|
|
349
|
+
outputs=parse_result.out_tables,
|
|
350
|
+
namespace=namespace,
|
|
351
|
+
database=database,
|
|
352
|
+
database_info=database_info,
|
|
353
|
+
sqlalchemy_engine=sqlalchemy_engine,
|
|
354
|
+
)
|
|
355
|
+
except Exception as e:
|
|
356
|
+
self.log.warning(
|
|
357
|
+
"OpenLineage method failed to enrich datasets using db metadata. Exception: `%s`",
|
|
358
|
+
e,
|
|
359
|
+
)
|
|
360
|
+
self.log.debug("OpenLineage failure details:", exc_info=True)
|
|
361
|
+
|
|
362
|
+
# If call to db failed or was not performed, use datasets from sql parsing alone
|
|
363
|
+
if not inputs and not outputs:
|
|
352
364
|
inputs, outputs = self.get_metadata_from_parser(
|
|
353
365
|
inputs=parse_result.in_tables,
|
|
354
366
|
outputs=parse_result.out_tables,
|
|
@@ -22,19 +22,14 @@ from typing import TYPE_CHECKING, TypeVar
|
|
|
22
22
|
|
|
23
23
|
from airflow.models import Param
|
|
24
24
|
from airflow.models.xcom_arg import XComArg
|
|
25
|
+
from airflow.providers.common.compat.sdk import DAG
|
|
25
26
|
|
|
26
27
|
if TYPE_CHECKING:
|
|
28
|
+
from airflow.providers.common.compat.sdk import BaseOperator, MappedOperator
|
|
27
29
|
from airflow.providers.openlineage.utils.utils import AnyOperator
|
|
28
|
-
from airflow.sdk import DAG, BaseOperator
|
|
29
|
-
from airflow.sdk.definitions.mappedoperator import MappedOperator
|
|
30
30
|
from airflow.serialization.serialized_objects import SerializedDAG
|
|
31
31
|
|
|
32
32
|
T = TypeVar("T", bound=DAG | BaseOperator | MappedOperator)
|
|
33
|
-
else:
|
|
34
|
-
try:
|
|
35
|
-
from airflow.sdk import DAG
|
|
36
|
-
except ImportError:
|
|
37
|
-
from airflow.models import DAG
|
|
38
33
|
|
|
39
34
|
ENABLE_OL_PARAM_NAME = "_selective_enable_ol"
|
|
40
35
|
ENABLE_OL_PARAM = Param(True, const=True)
|
|
@@ -29,9 +29,9 @@ from sqlalchemy import Column, MetaData, Table, and_, or_, union_all
|
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
31
|
from sqlalchemy.engine import Engine
|
|
32
|
-
from sqlalchemy.sql import
|
|
32
|
+
from sqlalchemy.sql.elements import ColumnElement
|
|
33
33
|
|
|
34
|
-
from airflow.sdk import BaseHook
|
|
34
|
+
from airflow.providers.common.compat.sdk import BaseHook
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
log = logging.getLogger(__name__)
|
|
@@ -207,7 +207,7 @@ def create_filter_clauses(
|
|
|
207
207
|
mapping: dict,
|
|
208
208
|
information_schema_table: Table,
|
|
209
209
|
uppercase_names: bool = False,
|
|
210
|
-
) ->
|
|
210
|
+
) -> ColumnElement[bool]:
|
|
211
211
|
"""
|
|
212
212
|
Create comprehensive filter clauses for all tables in one database.
|
|
213
213
|
|
|
@@ -228,19 +228,19 @@ def create_filter_clauses(
|
|
|
228
228
|
for db, schema_mapping in mapping.items():
|
|
229
229
|
schema_level_clauses = []
|
|
230
230
|
for schema, tables in schema_mapping.items():
|
|
231
|
-
filter_clause = information_schema_table.c[table_name_column_name].in_(
|
|
232
|
-
name.upper() if uppercase_names else name for name in tables
|
|
231
|
+
filter_clause: ColumnElement[bool] = information_schema_table.c[table_name_column_name].in_(
|
|
232
|
+
[name.upper() if uppercase_names else name for name in tables]
|
|
233
233
|
)
|
|
234
234
|
if schema:
|
|
235
|
-
|
|
235
|
+
schema_upper = schema.upper() if uppercase_names else schema
|
|
236
236
|
filter_clause = and_(
|
|
237
|
-
information_schema_table.c[table_schema_column_name] ==
|
|
237
|
+
information_schema_table.c[table_schema_column_name] == schema_upper, filter_clause
|
|
238
238
|
)
|
|
239
239
|
schema_level_clauses.append(filter_clause)
|
|
240
240
|
if db and table_database_column_name:
|
|
241
|
-
|
|
241
|
+
db_upper = db.upper() if uppercase_names else db
|
|
242
242
|
filter_clause = and_(
|
|
243
|
-
information_schema_table.c[table_database_column_name] ==
|
|
243
|
+
information_schema_table.c[table_database_column_name] == db_upper, or_(*schema_level_clauses)
|
|
244
244
|
)
|
|
245
245
|
filter_clauses.append(filter_clause)
|
|
246
246
|
else:
|
|
@@ -33,8 +33,10 @@ from openlineage.client.utils import RedactMixin
|
|
|
33
33
|
from airflow import __version__ as AIRFLOW_VERSION
|
|
34
34
|
|
|
35
35
|
# TODO: move this maybe to Airflow's logic?
|
|
36
|
-
from airflow.models import DagRun, TaskReschedule
|
|
36
|
+
from airflow.models import DagRun, TaskInstance, TaskReschedule
|
|
37
37
|
from airflow.models.mappedoperator import MappedOperator as SerializedMappedOperator
|
|
38
|
+
from airflow.providers.common.compat.assets import Asset
|
|
39
|
+
from airflow.providers.common.compat.sdk import DAG, BaseOperator, BaseSensorOperator, MappedOperator
|
|
38
40
|
from airflow.providers.openlineage import (
|
|
39
41
|
__version__ as OPENLINEAGE_PROVIDER_VERSION,
|
|
40
42
|
conf,
|
|
@@ -57,11 +59,6 @@ from airflow.providers.openlineage.version_compat import AIRFLOW_V_3_0_PLUS, get
|
|
|
57
59
|
from airflow.serialization.serialized_objects import SerializedBaseOperator, SerializedDAG
|
|
58
60
|
from airflow.utils.module_loading import import_string
|
|
59
61
|
|
|
60
|
-
if AIRFLOW_V_3_0_PLUS:
|
|
61
|
-
from airflow.sdk import BaseSensorOperator
|
|
62
|
-
else:
|
|
63
|
-
from airflow.sensors.base import BaseSensorOperator # type: ignore[no-redef]
|
|
64
|
-
|
|
65
62
|
if not AIRFLOW_V_3_0_PLUS:
|
|
66
63
|
from airflow.utils.session import NEW_SESSION, provide_session
|
|
67
64
|
|
|
@@ -71,10 +68,6 @@ if TYPE_CHECKING:
|
|
|
71
68
|
from openlineage.client.event_v2 import Dataset as OpenLineageDataset
|
|
72
69
|
from openlineage.client.facet_v2 import RunFacet, processing_engine_run
|
|
73
70
|
|
|
74
|
-
from airflow.models import TaskInstance
|
|
75
|
-
from airflow.providers.common.compat.assets import Asset
|
|
76
|
-
from airflow.sdk import DAG, BaseOperator
|
|
77
|
-
from airflow.sdk.definitions.mappedoperator import MappedOperator
|
|
78
71
|
from airflow.sdk.execution_time.secrets_masker import (
|
|
79
72
|
Redactable,
|
|
80
73
|
Redacted,
|
|
@@ -85,21 +78,6 @@ if TYPE_CHECKING:
|
|
|
85
78
|
|
|
86
79
|
AnyOperator: TypeAlias = BaseOperator | MappedOperator | SerializedBaseOperator | SerializedMappedOperator
|
|
87
80
|
else:
|
|
88
|
-
try:
|
|
89
|
-
from airflow.sdk import DAG, BaseOperator
|
|
90
|
-
from airflow.sdk.definitions.mappedoperator import MappedOperator
|
|
91
|
-
except ImportError:
|
|
92
|
-
from airflow.models import DAG, BaseOperator, MappedOperator
|
|
93
|
-
|
|
94
|
-
try:
|
|
95
|
-
from airflow.providers.common.compat.assets import Asset
|
|
96
|
-
except ImportError:
|
|
97
|
-
if AIRFLOW_V_3_0_PLUS:
|
|
98
|
-
from airflow.sdk import Asset
|
|
99
|
-
else:
|
|
100
|
-
# dataset is renamed to asset since Airflow 3.0
|
|
101
|
-
from airflow.datasets import Dataset as Asset
|
|
102
|
-
|
|
103
81
|
try:
|
|
104
82
|
from airflow.sdk._shared.secrets_masker import (
|
|
105
83
|
Redactable,
|
|
@@ -169,27 +147,175 @@ def get_job_name(task: TaskInstance | RuntimeTaskInstance) -> str:
|
|
|
169
147
|
return f"{task.dag_id}.{task.task_id}"
|
|
170
148
|
|
|
171
149
|
|
|
172
|
-
def
|
|
173
|
-
parent_run_id: str,
|
|
150
|
+
def _get_parent_run_facet(
|
|
151
|
+
parent_run_id: str,
|
|
152
|
+
parent_job_name: str,
|
|
153
|
+
parent_job_namespace: str = conf.namespace(),
|
|
154
|
+
root_parent_run_id: str | None = None,
|
|
155
|
+
root_parent_job_name: str | None = None,
|
|
156
|
+
root_parent_job_namespace: str | None = None,
|
|
174
157
|
) -> dict[str, Any]:
|
|
175
|
-
"""
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
as external events for DAG runs are not yet handled.
|
|
180
|
-
"""
|
|
158
|
+
"""Create the parent run facet from identifiers."""
|
|
159
|
+
root_parent_run_id = root_parent_run_id or parent_run_id
|
|
160
|
+
root_parent_job_name = root_parent_job_name or parent_job_name
|
|
161
|
+
root_parent_job_namespace = root_parent_job_namespace or parent_job_namespace
|
|
181
162
|
return {
|
|
182
163
|
"parent": parent_run.ParentRunFacet(
|
|
183
164
|
run=parent_run.Run(runId=parent_run_id),
|
|
184
165
|
job=parent_run.Job(namespace=parent_job_namespace, name=parent_job_name),
|
|
185
166
|
root=parent_run.Root(
|
|
186
|
-
run=parent_run.RootRun(runId=
|
|
187
|
-
job=parent_run.RootJob(namespace=
|
|
167
|
+
run=parent_run.RootRun(runId=root_parent_run_id),
|
|
168
|
+
job=parent_run.RootJob(namespace=root_parent_job_namespace, name=root_parent_job_name),
|
|
188
169
|
),
|
|
189
170
|
)
|
|
190
171
|
}
|
|
191
172
|
|
|
192
173
|
|
|
174
|
+
def get_task_parent_run_facet(
|
|
175
|
+
parent_run_id: str,
|
|
176
|
+
parent_job_name: str,
|
|
177
|
+
parent_job_namespace: str = conf.namespace(),
|
|
178
|
+
root_parent_run_id: str | None = None,
|
|
179
|
+
root_parent_job_name: str | None = None,
|
|
180
|
+
root_parent_job_namespace: str | None = None,
|
|
181
|
+
) -> dict[str, Any]:
|
|
182
|
+
"""Retrieve the parent run facet."""
|
|
183
|
+
return _get_parent_run_facet(
|
|
184
|
+
parent_run_id=parent_run_id,
|
|
185
|
+
parent_job_namespace=parent_job_namespace,
|
|
186
|
+
parent_job_name=parent_job_name,
|
|
187
|
+
root_parent_run_id=root_parent_run_id,
|
|
188
|
+
root_parent_job_namespace=root_parent_job_namespace,
|
|
189
|
+
root_parent_job_name=root_parent_job_name,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _get_openlineage_data_from_dagrun_conf(dr_conf: dict | None) -> dict:
|
|
194
|
+
"""Return the 'openlineage' section from a DAG run config if valid, otherwise an empty dict."""
|
|
195
|
+
if not dr_conf or not isinstance(dr_conf, dict):
|
|
196
|
+
return {}
|
|
197
|
+
ol_data = dr_conf.get("openlineage")
|
|
198
|
+
return ol_data if isinstance(ol_data, dict) else {}
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def get_root_information_from_dagrun_conf(dr_conf: dict | None) -> dict[str, str]:
|
|
202
|
+
"""Extract root parent run and job information from a DAG run config."""
|
|
203
|
+
ol_data = _get_openlineage_data_from_dagrun_conf(dr_conf)
|
|
204
|
+
if not ol_data:
|
|
205
|
+
log.debug("No 'openlineage' data found in DAG run config.")
|
|
206
|
+
return {}
|
|
207
|
+
|
|
208
|
+
root_run_id = ol_data.get("rootParentRunId", "")
|
|
209
|
+
root_namespace = ol_data.get("rootParentJobNamespace", "")
|
|
210
|
+
root_name = ol_data.get("rootParentJobName", "")
|
|
211
|
+
|
|
212
|
+
all_root_info = (root_run_id, root_namespace, root_name)
|
|
213
|
+
if not all(all_root_info):
|
|
214
|
+
if any(all_root_info):
|
|
215
|
+
log.warning(
|
|
216
|
+
"Incomplete root OpenLineage information in DAG run config. "
|
|
217
|
+
"No root information will be used. Found values: "
|
|
218
|
+
"rootParentRunId='%s', rootParentJobNamespace='%s', rootParentJobName='%s'.",
|
|
219
|
+
root_run_id,
|
|
220
|
+
root_namespace,
|
|
221
|
+
root_name,
|
|
222
|
+
)
|
|
223
|
+
else:
|
|
224
|
+
log.debug("No 'openlineage' root information found in DAG run config.")
|
|
225
|
+
return {}
|
|
226
|
+
|
|
227
|
+
try: # Validate that runId is correct UUID
|
|
228
|
+
parent_run.RootRun(runId=root_run_id)
|
|
229
|
+
except ValueError:
|
|
230
|
+
log.warning(
|
|
231
|
+
"Invalid OpenLineage rootParentRunId '%s' in DAG run config - expected a valid UUID.",
|
|
232
|
+
root_run_id,
|
|
233
|
+
)
|
|
234
|
+
return {}
|
|
235
|
+
|
|
236
|
+
log.debug(
|
|
237
|
+
"Extracted valid root OpenLineage identifiers from DAG run config: "
|
|
238
|
+
"rootParentRunId='%s', rootParentJobNamespace='%s', rootParentJobName='%s'.",
|
|
239
|
+
root_run_id,
|
|
240
|
+
root_namespace,
|
|
241
|
+
root_name,
|
|
242
|
+
)
|
|
243
|
+
return {
|
|
244
|
+
"root_parent_run_id": root_run_id,
|
|
245
|
+
"root_parent_job_namespace": root_namespace,
|
|
246
|
+
"root_parent_job_name": root_name,
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def get_dag_parent_run_facet(dr_conf: dict | None) -> dict[str, parent_run.ParentRunFacet]:
|
|
251
|
+
"""Build the OpenLineage parent run facet from a DAG run config."""
|
|
252
|
+
ol_data = _get_openlineage_data_from_dagrun_conf(dr_conf)
|
|
253
|
+
if not ol_data:
|
|
254
|
+
log.debug("No 'openlineage' data found in DAG run config.")
|
|
255
|
+
return {}
|
|
256
|
+
|
|
257
|
+
parent_run_id = ol_data.get("parentRunId", "")
|
|
258
|
+
parent_job_namespace = ol_data.get("parentJobNamespace", "")
|
|
259
|
+
parent_job_name = ol_data.get("parentJobName", "")
|
|
260
|
+
|
|
261
|
+
all_parent_info = (parent_run_id, parent_job_namespace, parent_job_name)
|
|
262
|
+
if not all(all_parent_info):
|
|
263
|
+
if any(all_parent_info):
|
|
264
|
+
log.warning(
|
|
265
|
+
"Incomplete parent OpenLineage information in DAG run config. "
|
|
266
|
+
"ParentRunFacet will NOT be created. Found values: "
|
|
267
|
+
"parentRunId='%s', parentJobNamespace='%s', parentJobName='%s'.",
|
|
268
|
+
parent_run_id,
|
|
269
|
+
parent_job_namespace,
|
|
270
|
+
parent_job_name,
|
|
271
|
+
)
|
|
272
|
+
else:
|
|
273
|
+
log.debug("No 'openlineage' parent information found in DAG run config.")
|
|
274
|
+
return {}
|
|
275
|
+
|
|
276
|
+
try: # Validate that runId is correct UUID
|
|
277
|
+
parent_run.RootRun(runId=parent_run_id)
|
|
278
|
+
except ValueError:
|
|
279
|
+
log.warning(
|
|
280
|
+
"Invalid OpenLineage parentRunId '%s' in DAG run config - expected a valid UUID.",
|
|
281
|
+
parent_run_id,
|
|
282
|
+
)
|
|
283
|
+
return {}
|
|
284
|
+
|
|
285
|
+
log.debug(
|
|
286
|
+
"Extracted valid parent OpenLineage identifiers from DAG run config: "
|
|
287
|
+
"parentRunId='%s', parentJobNamespace='%s', parentJobName='%s'.",
|
|
288
|
+
parent_run_id,
|
|
289
|
+
parent_job_namespace,
|
|
290
|
+
parent_job_name,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
root_info = get_root_information_from_dagrun_conf(dr_conf)
|
|
294
|
+
if root_info and all(root_info.values()):
|
|
295
|
+
root_parent_run_id = root_info["root_parent_run_id"]
|
|
296
|
+
root_parent_job_namespace = root_info["root_parent_job_namespace"]
|
|
297
|
+
root_parent_job_name = root_info["root_parent_job_name"]
|
|
298
|
+
else:
|
|
299
|
+
log.debug(
|
|
300
|
+
"Missing OpenLineage root identifiers in DAG run config, "
|
|
301
|
+
"parent identifiers will be used as root instead."
|
|
302
|
+
)
|
|
303
|
+
root_parent_run_id, root_parent_job_namespace, root_parent_job_name = (
|
|
304
|
+
parent_run_id,
|
|
305
|
+
parent_job_namespace,
|
|
306
|
+
parent_job_name,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
return _get_parent_run_facet(
|
|
310
|
+
parent_run_id=parent_run_id,
|
|
311
|
+
parent_job_namespace=parent_job_namespace,
|
|
312
|
+
parent_job_name=parent_job_name,
|
|
313
|
+
root_parent_run_id=root_parent_run_id,
|
|
314
|
+
root_parent_job_namespace=root_parent_job_namespace,
|
|
315
|
+
root_parent_job_name=root_parent_job_name,
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
|
|
193
319
|
def _truncate_string_to_byte_size(s: str, max_size: int = _MAX_DOC_BYTES) -> str:
|
|
194
320
|
"""
|
|
195
321
|
Truncate a string to a maximum UTF-8 byte size, ensuring valid encoding.
|
|
@@ -464,14 +590,25 @@ class DagInfo(InfoJsonEncodable):
|
|
|
464
590
|
"fileloc",
|
|
465
591
|
"owner",
|
|
466
592
|
"owner_links",
|
|
467
|
-
"schedule_interval", # For Airflow 2
|
|
468
|
-
"timetable_summary", # For Airflow 3.
|
|
593
|
+
"schedule_interval", # For Airflow 2 only -> AF3 has timetable_summary
|
|
469
594
|
"start_date",
|
|
470
595
|
"tags",
|
|
471
596
|
]
|
|
472
|
-
casts = {
|
|
597
|
+
casts = {
|
|
598
|
+
"timetable": lambda dag: DagInfo.serialize_timetable(dag),
|
|
599
|
+
"timetable_summary": lambda dag: DagInfo.timetable_summary(dag),
|
|
600
|
+
}
|
|
473
601
|
renames = {"_dag_id": "dag_id"}
|
|
474
602
|
|
|
603
|
+
@classmethod
|
|
604
|
+
def timetable_summary(cls, dag: DAG) -> str | None:
|
|
605
|
+
"""Extract summary from timetable if missing a ``timetable_summary`` property."""
|
|
606
|
+
if getattr(dag, "timetable_summary", None):
|
|
607
|
+
return dag.timetable_summary
|
|
608
|
+
if getattr(dag, "timetable", None):
|
|
609
|
+
return dag.timetable.summary
|
|
610
|
+
return None
|
|
611
|
+
|
|
475
612
|
@classmethod
|
|
476
613
|
def serialize_timetable(cls, dag: DAG) -> dict[str, Any]:
|
|
477
614
|
# This is enough for Airflow 2.10+ and has all the information needed
|
|
@@ -740,16 +877,20 @@ def get_airflow_state_run_facet(
|
|
|
740
877
|
dag_id: str, run_id: str, task_ids: list[str], dag_run_state: DagRunState
|
|
741
878
|
) -> dict[str, AirflowStateRunFacet]:
|
|
742
879
|
tis = DagRun.fetch_task_instances(dag_id=dag_id, run_id=run_id, task_ids=task_ids)
|
|
880
|
+
|
|
881
|
+
def get_task_duration(ti):
|
|
882
|
+
if ti.duration is not None:
|
|
883
|
+
return ti.duration
|
|
884
|
+
if ti.end_date is not None and ti.start_date is not None:
|
|
885
|
+
return (ti.end_date - ti.start_date).total_seconds()
|
|
886
|
+
# Fallback to 0.0 for tasks with missing timestamps (e.g., skipped/terminated tasks)
|
|
887
|
+
return 0.0
|
|
888
|
+
|
|
743
889
|
return {
|
|
744
890
|
"airflowState": AirflowStateRunFacet(
|
|
745
891
|
dagRunState=dag_run_state,
|
|
746
892
|
tasksState={ti.task_id: ti.state for ti in tis},
|
|
747
|
-
tasksDuration={
|
|
748
|
-
ti.task_id: ti.duration
|
|
749
|
-
if ti.duration is not None
|
|
750
|
-
else (ti.end_date - ti.start_date).total_seconds()
|
|
751
|
-
for ti in tis
|
|
752
|
-
},
|
|
893
|
+
tasksDuration={ti.task_id: get_task_duration(ti) for ti in tis},
|
|
753
894
|
)
|
|
754
895
|
}
|
|
755
896
|
|
|
@@ -788,17 +929,32 @@ def _get_task_groups_details(dag: DAG | SerializedDAG) -> dict:
|
|
|
788
929
|
def _emits_ol_events(task: AnyOperator) -> bool:
|
|
789
930
|
config_selective_enabled = is_selective_lineage_enabled(task)
|
|
790
931
|
config_disabled_for_operators = is_operator_disabled(task)
|
|
791
|
-
|
|
792
|
-
# in
|
|
793
|
-
|
|
794
|
-
(
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
932
|
+
|
|
933
|
+
is_task_schedulable_method = getattr(TaskInstance, "is_task_schedulable", None) # Added in 3.2.0 #56039
|
|
934
|
+
if is_task_schedulable_method and callable(is_task_schedulable_method):
|
|
935
|
+
is_skipped_as_empty_operator = not is_task_schedulable_method(task)
|
|
936
|
+
else:
|
|
937
|
+
# For older Airflow versions, re-create Airflow core internal logic as
|
|
938
|
+
# empty operators without callbacks/outlets are skipped for optimization by Airflow
|
|
939
|
+
# in airflow.models.taskinstance.TaskInstance._schedule_downstream_tasks or
|
|
940
|
+
# airflow.models.dagrun.DagRun.schedule_tis, depending on Airflow version
|
|
941
|
+
is_skipped_as_empty_operator = all(
|
|
942
|
+
(
|
|
943
|
+
task.inherits_from_empty_operator,
|
|
944
|
+
not getattr(task, "on_execute_callback", None),
|
|
945
|
+
not getattr(task, "on_success_callback", None),
|
|
946
|
+
not task.outlets,
|
|
947
|
+
not (task.inlets and get_base_airflow_version_tuple() >= (3, 0, 2)), # Added in 3.0.2 #50773
|
|
948
|
+
not (
|
|
949
|
+
getattr(task, "has_on_execute_callback", None) # Added in 3.1.0 #54569
|
|
950
|
+
and get_base_airflow_version_tuple() >= (3, 1, 0)
|
|
951
|
+
),
|
|
952
|
+
not (
|
|
953
|
+
getattr(task, "has_on_success_callback", None) # Added in 3.1.0 #54569
|
|
954
|
+
and get_base_airflow_version_tuple() >= (3, 1, 0)
|
|
955
|
+
),
|
|
956
|
+
)
|
|
800
957
|
)
|
|
801
|
-
)
|
|
802
958
|
|
|
803
959
|
emits_ol_events = all(
|
|
804
960
|
(
|
|
@@ -34,16 +34,5 @@ def get_base_airflow_version_tuple() -> tuple[int, int, int]:
|
|
|
34
34
|
|
|
35
35
|
AIRFLOW_V_3_0_PLUS = get_base_airflow_version_tuple() >= (3, 0, 0)
|
|
36
36
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
else:
|
|
40
|
-
from airflow.models import BaseOperator
|
|
41
|
-
|
|
42
|
-
try:
|
|
43
|
-
from airflow.sdk import timezone
|
|
44
|
-
from airflow.sdk.execution_time.timeout import timeout
|
|
45
|
-
except ImportError:
|
|
46
|
-
from airflow.utils import timezone # type: ignore[attr-defined,no-redef]
|
|
47
|
-
from airflow.utils.timeout import timeout # type: ignore[assignment,attr-defined,no-redef]
|
|
48
|
-
|
|
49
|
-
__all__ = ["AIRFLOW_V_3_0_PLUS", "BaseOperator", "timeout", "timezone"]
|
|
37
|
+
|
|
38
|
+
__all__ = ["AIRFLOW_V_3_0_PLUS"]
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.8.0
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
7
7
|
Maintainer-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
8
8
|
Requires-Python: >=3.10
|
|
9
9
|
Description-Content-Type: text/x-rst
|
|
10
|
+
License-Expression: Apache-2.0
|
|
10
11
|
Classifier: Development Status :: 5 - Production/Stable
|
|
11
12
|
Classifier: Environment :: Console
|
|
12
13
|
Classifier: Environment :: Web Environment
|
|
@@ -14,21 +15,22 @@ Classifier: Intended Audience :: Developers
|
|
|
14
15
|
Classifier: Intended Audience :: System Administrators
|
|
15
16
|
Classifier: Framework :: Apache Airflow
|
|
16
17
|
Classifier: Framework :: Apache Airflow :: Provider
|
|
17
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.10
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.11
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.12
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.13
|
|
22
22
|
Classifier: Topic :: System :: Monitoring
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
Requires-Dist: apache-airflow
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
License-File: NOTICE
|
|
25
|
+
Requires-Dist: apache-airflow>=2.10.0
|
|
26
|
+
Requires-Dist: apache-airflow-providers-common-sql>=1.20.0
|
|
27
|
+
Requires-Dist: apache-airflow-providers-common-compat>=1.8.0
|
|
26
28
|
Requires-Dist: attrs>=22.2
|
|
27
|
-
Requires-Dist: openlineage-integration-common>=1.
|
|
28
|
-
Requires-Dist: openlineage-python>=1.
|
|
29
|
+
Requires-Dist: openlineage-integration-common>=1.38.0
|
|
30
|
+
Requires-Dist: openlineage-python>=1.38.0
|
|
29
31
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
30
|
-
Project-URL: Changelog, https://airflow.
|
|
31
|
-
Project-URL: Documentation, https://airflow.
|
|
32
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.8.0/changelog.html
|
|
33
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.8.0
|
|
32
34
|
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
33
35
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
34
36
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
@@ -59,7 +61,7 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
|
59
61
|
|
|
60
62
|
Package ``apache-airflow-providers-openlineage``
|
|
61
63
|
|
|
62
|
-
Release: ``2.
|
|
64
|
+
Release: ``2.8.0``
|
|
63
65
|
|
|
64
66
|
|
|
65
67
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -72,7 +74,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
72
74
|
are in ``airflow.providers.openlineage`` python package.
|
|
73
75
|
|
|
74
76
|
You can find package information and changelog for the provider
|
|
75
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.
|
|
77
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.8.0/>`_.
|
|
76
78
|
|
|
77
79
|
Installation
|
|
78
80
|
------------
|
|
@@ -91,10 +93,10 @@ PIP package Version required
|
|
|
91
93
|
========================================== ==================
|
|
92
94
|
``apache-airflow`` ``>=2.10.0``
|
|
93
95
|
``apache-airflow-providers-common-sql`` ``>=1.20.0``
|
|
94
|
-
``apache-airflow-providers-common-compat`` ``>=1.
|
|
96
|
+
``apache-airflow-providers-common-compat`` ``>=1.8.0``
|
|
95
97
|
``attrs`` ``>=22.2``
|
|
96
|
-
``openlineage-integration-common`` ``>=1.
|
|
97
|
-
``openlineage-python`` ``>=1.
|
|
98
|
+
``openlineage-integration-common`` ``>=1.38.0``
|
|
99
|
+
``openlineage-python`` ``>=1.38.0``
|
|
98
100
|
========================================== ==================
|
|
99
101
|
|
|
100
102
|
Cross provider package dependencies
|
|
@@ -118,5 +120,5 @@ Dependent package
|
|
|
118
120
|
================================================================================================================== =================
|
|
119
121
|
|
|
120
122
|
The changelog for the provider package can be found in the
|
|
121
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.
|
|
123
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.8.0/changelog.html>`_.
|
|
122
124
|
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
airflow/providers/openlineage/__init__.py,sha256=nm3vnbgK8t7yL2JVr-tc2n83QRT7rGNeF-b1VmI2F7Y,1500
|
|
2
|
+
airflow/providers/openlineage/conf.py,sha256=9v2DpQ84BBCdRxPlh8QsboTqX8HXe-qeHVcTMRL5c3o,5807
|
|
3
|
+
airflow/providers/openlineage/get_provider_info.py,sha256=2Oy13q-jA-UYt-a9pYBk4PnImYshGnJCPD1Jj80ChNw,9453
|
|
4
|
+
airflow/providers/openlineage/sqlparser.py,sha256=fMYRVG55JDxsceS5IujJCNBCC0wMVcpiu2zIa57bnE8,21022
|
|
5
|
+
airflow/providers/openlineage/version_compat.py,sha256=0Xg0fTYA-jp6VYZFVQbpebkzXCj8kh3EHx6pGxnY-1I,1571
|
|
6
|
+
airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
|
|
7
|
+
airflow/providers/openlineage/extractors/base.py,sha256=0K7prvOeYjs30P87zgcOmABZOZYsw0WYoFBstS_vgmY,6449
|
|
8
|
+
airflow/providers/openlineage/extractors/bash.py,sha256=3aR0PXs8fzRLibRxXN1R8wMZnGzyCur7mjpy8e5GC4A,2583
|
|
9
|
+
airflow/providers/openlineage/extractors/manager.py,sha256=ApRbvjCwmV8KwvwcSIja127-2dqjeRxUbpDD5S8L8-o,12873
|
|
10
|
+
airflow/providers/openlineage/extractors/python.py,sha256=BHh2G68i9mrpJb-xF0YZm0NaQevcxsPixSXzLlFz6XE,3180
|
|
11
|
+
airflow/providers/openlineage/facets/AirflowDagRunFacet.json,sha256=ie6c-J3-wGgk80WDTGWePz18o6DbW--TNM7BMF4WfcU,2251
|
|
12
|
+
airflow/providers/openlineage/facets/AirflowDebugRunFacet.json,sha256=_zA5gFqGje5MOH1SmdMeA5ViOHvW_pV4oijEAvkuBbY,768
|
|
13
|
+
airflow/providers/openlineage/facets/AirflowJobFacet.json,sha256=rS9PuPWOi1Jc5B4a5qLxS_Az7Q9Eb3jVYQnN41iXDC0,1187
|
|
14
|
+
airflow/providers/openlineage/facets/AirflowRunFacet.json,sha256=70mEaZShgSJp-2xr0bVvw3ljiGOPEaXD591fhuAQm_o,5953
|
|
15
|
+
airflow/providers/openlineage/facets/AirflowStateRunFacet.json,sha256=xhHQEKD9Jopw-oqbkCCrrwFjfXnxvuJAritsmegKjuQ,937
|
|
16
|
+
airflow/providers/openlineage/facets/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
17
|
+
airflow/providers/openlineage/operators/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
18
|
+
airflow/providers/openlineage/operators/empty.py,sha256=egAPTMPZA9VB_X_OCM32QMiKD81NOxe7Ysi3Ncv4lQs,1705
|
|
19
|
+
airflow/providers/openlineage/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
20
|
+
airflow/providers/openlineage/plugins/adapter.py,sha256=moIgAoBTTVaKuU-i00xvkKyR7Rv9hoIU79awcZVJwg8,23065
|
|
21
|
+
airflow/providers/openlineage/plugins/facets.py,sha256=a6Olf3IezkGodTMpR_IUmO3Nid3xXIbT316NkjpGbBQ,4286
|
|
22
|
+
airflow/providers/openlineage/plugins/listener.py,sha256=OKDqTp5eFuc-D0472eMajCy9i8ivCSKBGCQPyjfGyjg,33366
|
|
23
|
+
airflow/providers/openlineage/plugins/macros.py,sha256=G_Ad3RFnozQN9scT9_rfiVcj9fK2nYk3tOmLQBX7Qq8,6028
|
|
24
|
+
airflow/providers/openlineage/plugins/openlineage.py,sha256=loHRsCkwFY-rfBCo8EPttd9kwZ7P_wdMsErem1-Vc2o,2251
|
|
25
|
+
airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
26
|
+
airflow/providers/openlineage/utils/selective_enable.py,sha256=_j60-4PjuFOEZt0avrtXOwqY3aP7fvI4X1aaVBfF0QQ,3424
|
|
27
|
+
airflow/providers/openlineage/utils/spark.py,sha256=82i8vBOz-q-o6QJr836TXARD29BGvHel-2oEhFBadME,7640
|
|
28
|
+
airflow/providers/openlineage/utils/sql.py,sha256=osoHieaycsNCE6czg53lp2PcdumskTG1OVIplHpMnD4,9634
|
|
29
|
+
airflow/providers/openlineage/utils/utils.py,sha256=prewg6VTA41H-hsRDimzGQ0_ZQWBlqcgnRxb73l0LGY,42953
|
|
30
|
+
apache_airflow_providers_openlineage-2.8.0.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
|
|
31
|
+
apache_airflow_providers_openlineage-2.8.0.dist-info/licenses/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
|
|
32
|
+
apache_airflow_providers_openlineage-2.8.0.dist-info/licenses/NOTICE,sha256=E3-_E02gwwSEFzeeWPKmnIjOoos3hW28CLISV6sYrbQ,168
|
|
33
|
+
apache_airflow_providers_openlineage-2.8.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
34
|
+
apache_airflow_providers_openlineage-2.8.0.dist-info/METADATA,sha256=pL555plUmBI353nj-4weyjP7yDkBHXsF-XgWPR-26r4,5699
|
|
35
|
+
apache_airflow_providers_openlineage-2.8.0.dist-info/RECORD,,
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
airflow/providers/openlineage/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
|
|
2
|
-
airflow/providers/openlineage/__init__.py,sha256=nYsV_xtFWF8HJOMfs8oL5yZCnRUSCSNP2uPr4_oa1fE,1500
|
|
3
|
-
airflow/providers/openlineage/conf.py,sha256=9v2DpQ84BBCdRxPlh8QsboTqX8HXe-qeHVcTMRL5c3o,5807
|
|
4
|
-
airflow/providers/openlineage/get_provider_info.py,sha256=2Oy13q-jA-UYt-a9pYBk4PnImYshGnJCPD1Jj80ChNw,9453
|
|
5
|
-
airflow/providers/openlineage/sqlparser.py,sha256=8Aq0qbUUBthKjXBV756p2aBf8RYfCuBBfgxwhGpQIg4,20360
|
|
6
|
-
airflow/providers/openlineage/version_compat.py,sha256=weU73JlGTWEcfDLnuFGOXw9Yiagp-bU_--nRgogt-jk,2020
|
|
7
|
-
airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
|
|
8
|
-
airflow/providers/openlineage/extractors/base.py,sha256=0K7prvOeYjs30P87zgcOmABZOZYsw0WYoFBstS_vgmY,6449
|
|
9
|
-
airflow/providers/openlineage/extractors/bash.py,sha256=3aR0PXs8fzRLibRxXN1R8wMZnGzyCur7mjpy8e5GC4A,2583
|
|
10
|
-
airflow/providers/openlineage/extractors/manager.py,sha256=kVo5OHkpUoYIiT2RvwUt128jC_Q4EosdLC0sP1YfIH0,12840
|
|
11
|
-
airflow/providers/openlineage/extractors/python.py,sha256=BHh2G68i9mrpJb-xF0YZm0NaQevcxsPixSXzLlFz6XE,3180
|
|
12
|
-
airflow/providers/openlineage/facets/AirflowDagRunFacet.json,sha256=ie6c-J3-wGgk80WDTGWePz18o6DbW--TNM7BMF4WfcU,2251
|
|
13
|
-
airflow/providers/openlineage/facets/AirflowDebugRunFacet.json,sha256=_zA5gFqGje5MOH1SmdMeA5ViOHvW_pV4oijEAvkuBbY,768
|
|
14
|
-
airflow/providers/openlineage/facets/AirflowJobFacet.json,sha256=rS9PuPWOi1Jc5B4a5qLxS_Az7Q9Eb3jVYQnN41iXDC0,1187
|
|
15
|
-
airflow/providers/openlineage/facets/AirflowRunFacet.json,sha256=70mEaZShgSJp-2xr0bVvw3ljiGOPEaXD591fhuAQm_o,5953
|
|
16
|
-
airflow/providers/openlineage/facets/AirflowStateRunFacet.json,sha256=xhHQEKD9Jopw-oqbkCCrrwFjfXnxvuJAritsmegKjuQ,937
|
|
17
|
-
airflow/providers/openlineage/facets/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
18
|
-
airflow/providers/openlineage/operators/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
19
|
-
airflow/providers/openlineage/operators/empty.py,sha256=g3ksadUeHW6IydzqIT4KxGU3Agt-F7NmKmOS6T47UDs,1710
|
|
20
|
-
airflow/providers/openlineage/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
21
|
-
airflow/providers/openlineage/plugins/adapter.py,sha256=moIgAoBTTVaKuU-i00xvkKyR7Rv9hoIU79awcZVJwg8,23065
|
|
22
|
-
airflow/providers/openlineage/plugins/facets.py,sha256=x2EPwst9MsoO53OpFV_aANO_rhiPq_2GLP4UOrqBnnQ,4279
|
|
23
|
-
airflow/providers/openlineage/plugins/listener.py,sha256=IQbTH5yvbR7R85Ffuvxq3oCi_5a_VDy_FVongNSMCfI,32329
|
|
24
|
-
airflow/providers/openlineage/plugins/macros.py,sha256=RfxkpNq78CHzfTAf9X7MQ_zRArMRu9sSD2j69fPSK7s,5265
|
|
25
|
-
airflow/providers/openlineage/plugins/openlineage.py,sha256=dP3GOVtOGAIokeaeRx2OW_c1TKAxDvATlD9OGMyXqr0,2032
|
|
26
|
-
airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
27
|
-
airflow/providers/openlineage/utils/selective_enable.py,sha256=XpozjxcWBASAQXSR2N0GkA-QZVq6EmerOzyR4-eQ64M,3521
|
|
28
|
-
airflow/providers/openlineage/utils/spark.py,sha256=X5liLxVLgQcgPF_0lFtQULeMOv_9dGj-HFjtZvWFgOo,7626
|
|
29
|
-
airflow/providers/openlineage/utils/sql.py,sha256=b_k2fUyGGWzR1eau7tgq7vKQJsR7wPQzDF8M-WRq6jk,9548
|
|
30
|
-
airflow/providers/openlineage/utils/utils.py,sha256=4sSXFo8reWjULIUdymCcpjwyCacaU_PhoBGyg8oa6pM,36466
|
|
31
|
-
apache_airflow_providers_openlineage-2.7.1rc1.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
|
|
32
|
-
apache_airflow_providers_openlineage-2.7.1rc1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
33
|
-
apache_airflow_providers_openlineage-2.7.1rc1.dist-info/METADATA,sha256=ToyDLBswKOdulpvb6x8-WO-fsE9FXSj2g630a4pw0HQ,5714
|
|
34
|
-
apache_airflow_providers_openlineage-2.7.1rc1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|