apache-airflow-providers-openlineage 2.2.0__py3-none-any.whl → 2.3.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.
- airflow/providers/openlineage/__init__.py +3 -3
- airflow/providers/openlineage/extractors/base.py +4 -1
- airflow/providers/openlineage/plugins/adapter.py +0 -36
- airflow/providers/openlineage/plugins/listener.py +12 -41
- airflow/providers/openlineage/plugins/macros.py +42 -15
- airflow/providers/openlineage/plugins/openlineage.py +2 -4
- airflow/providers/openlineage/utils/spark.py +5 -0
- airflow/providers/openlineage/utils/utils.py +51 -11
- airflow/providers/openlineage/version_compat.py +0 -1
- {apache_airflow_providers_openlineage-2.2.0.dist-info → apache_airflow_providers_openlineage-2.3.0rc1.dist-info}/METADATA +10 -10
- {apache_airflow_providers_openlineage-2.2.0.dist-info → apache_airflow_providers_openlineage-2.3.0rc1.dist-info}/RECORD +13 -13
- {apache_airflow_providers_openlineage-2.2.0.dist-info → apache_airflow_providers_openlineage-2.3.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_openlineage-2.2.0.dist-info → apache_airflow_providers_openlineage-2.3.0rc1.dist-info}/entry_points.txt +0 -0
|
@@ -29,11 +29,11 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "2.
|
|
32
|
+
__version__ = "2.3.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
|
-
"2.
|
|
35
|
+
"2.10.0"
|
|
36
36
|
):
|
|
37
37
|
raise RuntimeError(
|
|
38
|
-
f"The package `apache-airflow-providers-openlineage:{__version__}` needs Apache Airflow 2.
|
|
38
|
+
f"The package `apache-airflow-providers-openlineage:{__version__}` needs Apache Airflow 2.10.0+"
|
|
39
39
|
)
|
|
@@ -138,7 +138,10 @@ class DefaultExtractor(BaseExtractor):
|
|
|
138
138
|
|
|
139
139
|
def _get_openlineage_facets(self, get_facets_method, *args) -> OperatorLineage | None:
|
|
140
140
|
try:
|
|
141
|
-
facets: OperatorLineage = get_facets_method(*args)
|
|
141
|
+
facets: OperatorLineage | None = get_facets_method(*args)
|
|
142
|
+
if facets is None:
|
|
143
|
+
self.log.debug("OpenLineage method returned `None`")
|
|
144
|
+
return None
|
|
142
145
|
# "rewrite" OperatorLineage to safeguard against different version of the same class
|
|
143
146
|
# that was existing in openlineage-airflow package outside of Airflow repo
|
|
144
147
|
return OperatorLineage(
|
|
@@ -32,7 +32,6 @@ from openlineage.client.facet_v2 import (
|
|
|
32
32
|
job_type_job,
|
|
33
33
|
nominal_time_run,
|
|
34
34
|
ownership_job,
|
|
35
|
-
parent_run,
|
|
36
35
|
source_code_location_job,
|
|
37
36
|
)
|
|
38
37
|
from openlineage.client.uuid import generate_static_uuid
|
|
@@ -188,8 +187,6 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
188
187
|
job_name: str,
|
|
189
188
|
job_description: str,
|
|
190
189
|
event_time: str,
|
|
191
|
-
parent_job_name: str | None,
|
|
192
|
-
parent_run_id: str | None,
|
|
193
190
|
code_location: str | None,
|
|
194
191
|
nominal_start_time: str | None,
|
|
195
192
|
nominal_end_time: str | None,
|
|
@@ -204,9 +201,6 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
204
201
|
:param job_name: globally unique identifier of task in dag
|
|
205
202
|
:param job_description: user provided description of job
|
|
206
203
|
:param event_time:
|
|
207
|
-
:param parent_job_name: the name of the parent job (typically the DAG,
|
|
208
|
-
but possibly a task group)
|
|
209
|
-
:param parent_run_id: identifier of job spawning this task
|
|
210
204
|
:param code_location: file path or URL of DAG file
|
|
211
205
|
:param nominal_start_time: scheduled time of dag run
|
|
212
206
|
:param nominal_end_time: following schedule of dag run
|
|
@@ -223,9 +217,6 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
223
217
|
eventTime=event_time,
|
|
224
218
|
run=self._build_run(
|
|
225
219
|
run_id=run_id,
|
|
226
|
-
job_name=job_name,
|
|
227
|
-
parent_job_name=parent_job_name,
|
|
228
|
-
parent_run_id=parent_run_id,
|
|
229
220
|
nominal_start_time=nominal_start_time,
|
|
230
221
|
nominal_end_time=nominal_end_time,
|
|
231
222
|
run_facets=run_facets,
|
|
@@ -248,8 +239,6 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
248
239
|
self,
|
|
249
240
|
run_id: str,
|
|
250
241
|
job_name: str,
|
|
251
|
-
parent_job_name: str | None,
|
|
252
|
-
parent_run_id: str | None,
|
|
253
242
|
end_time: str,
|
|
254
243
|
task: OperatorLineage,
|
|
255
244
|
run_facets: dict[str, RunFacet] | None = None,
|
|
@@ -259,9 +248,6 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
259
248
|
|
|
260
249
|
:param run_id: globally unique identifier of task in dag run
|
|
261
250
|
:param job_name: globally unique identifier of task between dags
|
|
262
|
-
:param parent_job_name: the name of the parent job (typically the DAG,
|
|
263
|
-
but possibly a task group)
|
|
264
|
-
:param parent_run_id: identifier of job spawning this task
|
|
265
251
|
:param end_time: time of task completion
|
|
266
252
|
:param task: metadata container with information extracted from operator
|
|
267
253
|
:param run_facets: additional run facets
|
|
@@ -275,9 +261,6 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
275
261
|
eventTime=end_time,
|
|
276
262
|
run=self._build_run(
|
|
277
263
|
run_id=run_id,
|
|
278
|
-
job_name=job_name,
|
|
279
|
-
parent_job_name=parent_job_name,
|
|
280
|
-
parent_run_id=parent_run_id,
|
|
281
264
|
run_facets=run_facets,
|
|
282
265
|
),
|
|
283
266
|
job=self._build_job(job_name, job_type=_JOB_TYPE_TASK, job_facets=task.job_facets),
|
|
@@ -291,8 +274,6 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
291
274
|
self,
|
|
292
275
|
run_id: str,
|
|
293
276
|
job_name: str,
|
|
294
|
-
parent_job_name: str | None,
|
|
295
|
-
parent_run_id: str | None,
|
|
296
277
|
end_time: str,
|
|
297
278
|
task: OperatorLineage,
|
|
298
279
|
error: str | BaseException | None = None,
|
|
@@ -303,9 +284,6 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
303
284
|
|
|
304
285
|
:param run_id: globally unique identifier of task in dag run
|
|
305
286
|
:param job_name: globally unique identifier of task between dags
|
|
306
|
-
:param parent_job_name: the name of the parent job (typically the DAG,
|
|
307
|
-
but possibly a task group)
|
|
308
|
-
:param parent_run_id: identifier of job spawning this task
|
|
309
287
|
:param end_time: time of task completion
|
|
310
288
|
:param task: metadata container with information extracted from operator
|
|
311
289
|
:param run_facets: custom run facets
|
|
@@ -332,9 +310,6 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
332
310
|
eventTime=end_time,
|
|
333
311
|
run=self._build_run(
|
|
334
312
|
run_id=run_id,
|
|
335
|
-
job_name=job_name,
|
|
336
|
-
parent_job_name=parent_job_name,
|
|
337
|
-
parent_run_id=parent_run_id,
|
|
338
313
|
run_facets=run_facets,
|
|
339
314
|
),
|
|
340
315
|
job=self._build_job(job_name, job_type=_JOB_TYPE_TASK, job_facets=task.job_facets),
|
|
@@ -372,7 +347,6 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
372
347
|
run_id=self.build_dag_run_id(
|
|
373
348
|
dag_id=dag_id, logical_date=logical_date, clear_number=clear_number
|
|
374
349
|
),
|
|
375
|
-
job_name=dag_id,
|
|
376
350
|
nominal_start_time=nominal_start_time,
|
|
377
351
|
nominal_end_time=nominal_end_time,
|
|
378
352
|
run_facets={**run_facets, **get_airflow_debug_facet(), **get_processing_engine_facet()},
|
|
@@ -473,9 +447,6 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
473
447
|
@staticmethod
|
|
474
448
|
def _build_run(
|
|
475
449
|
run_id: str,
|
|
476
|
-
job_name: str,
|
|
477
|
-
parent_job_name: str | None = None,
|
|
478
|
-
parent_run_id: str | None = None,
|
|
479
450
|
nominal_start_time: str | None = None,
|
|
480
451
|
nominal_end_time: str | None = None,
|
|
481
452
|
run_facets: dict[str, RunFacet] | None = None,
|
|
@@ -485,13 +456,6 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
485
456
|
facets.update(
|
|
486
457
|
{"nominalTime": nominal_time_run.NominalTimeRunFacet(nominal_start_time, nominal_end_time)}
|
|
487
458
|
)
|
|
488
|
-
if parent_run_id:
|
|
489
|
-
parent_run_facet = parent_run.ParentRunFacet(
|
|
490
|
-
run=parent_run.Run(runId=parent_run_id),
|
|
491
|
-
job=parent_run.Job(namespace=conf.namespace(), name=parent_job_name or job_name),
|
|
492
|
-
)
|
|
493
|
-
facets.update({"parent": parent_run_facet})
|
|
494
|
-
|
|
495
459
|
if run_facets:
|
|
496
460
|
facets.update(run_facets)
|
|
497
461
|
|
|
@@ -33,7 +33,6 @@ from airflow.providers.openlineage import conf
|
|
|
33
33
|
from airflow.providers.openlineage.extractors import ExtractorManager, OperatorLineage
|
|
34
34
|
from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter, RunState
|
|
35
35
|
from airflow.providers.openlineage.utils.utils import (
|
|
36
|
-
AIRFLOW_V_2_10_PLUS,
|
|
37
36
|
AIRFLOW_V_3_0_PLUS,
|
|
38
37
|
get_airflow_dag_run_facet,
|
|
39
38
|
get_airflow_debug_facet,
|
|
@@ -41,6 +40,7 @@ from airflow.providers.openlineage.utils.utils import (
|
|
|
41
40
|
get_airflow_mapped_task_facet,
|
|
42
41
|
get_airflow_run_facet,
|
|
43
42
|
get_job_name,
|
|
43
|
+
get_task_parent_run_facet,
|
|
44
44
|
get_user_provided_run_facets,
|
|
45
45
|
is_operator_disabled,
|
|
46
46
|
is_selective_lineage_enabled,
|
|
@@ -59,13 +59,6 @@ if TYPE_CHECKING:
|
|
|
59
59
|
_openlineage_listener: OpenLineageListener | None = None
|
|
60
60
|
|
|
61
61
|
|
|
62
|
-
def _get_try_number_success(val):
|
|
63
|
-
# todo: remove when min airflow version >= 2.10.0
|
|
64
|
-
if AIRFLOW_V_2_10_PLUS:
|
|
65
|
-
return val.try_number
|
|
66
|
-
return val.try_number - 1
|
|
67
|
-
|
|
68
|
-
|
|
69
62
|
def _executor_initializer():
|
|
70
63
|
"""
|
|
71
64
|
Initialize processes for the executor used with DAGRun listener's methods (on scheduler).
|
|
@@ -209,14 +202,13 @@ class OpenLineageListener:
|
|
|
209
202
|
job_name=get_job_name(task),
|
|
210
203
|
job_description=dag.description,
|
|
211
204
|
event_time=start_date.isoformat(),
|
|
212
|
-
parent_job_name=dag.dag_id,
|
|
213
|
-
parent_run_id=parent_run_id,
|
|
214
205
|
code_location=None,
|
|
215
206
|
nominal_start_time=data_interval_start,
|
|
216
207
|
nominal_end_time=data_interval_end,
|
|
217
208
|
owners=dag.owner.split(", "),
|
|
218
209
|
task=task_metadata,
|
|
219
210
|
run_facets={
|
|
211
|
+
**get_task_parent_run_facet(parent_run_id=parent_run_id, parent_job_name=dag.dag_id),
|
|
220
212
|
**get_user_provided_run_facets(task_instance, TaskInstanceState.RUNNING),
|
|
221
213
|
**get_airflow_mapped_task_facet(task_instance),
|
|
222
214
|
**get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
|
|
@@ -304,7 +296,7 @@ class OpenLineageListener:
|
|
|
304
296
|
task_uuid = self.adapter.build_task_instance_run_id(
|
|
305
297
|
dag_id=dag.dag_id,
|
|
306
298
|
task_id=task.task_id,
|
|
307
|
-
try_number=
|
|
299
|
+
try_number=task_instance.try_number,
|
|
308
300
|
logical_date=date,
|
|
309
301
|
map_index=task_instance.map_index,
|
|
310
302
|
)
|
|
@@ -322,11 +314,10 @@ class OpenLineageListener:
|
|
|
322
314
|
redacted_event = self.adapter.complete_task(
|
|
323
315
|
run_id=task_uuid,
|
|
324
316
|
job_name=get_job_name(task),
|
|
325
|
-
parent_job_name=dag.dag_id,
|
|
326
|
-
parent_run_id=parent_run_id,
|
|
327
317
|
end_time=end_date.isoformat(),
|
|
328
318
|
task=task_metadata,
|
|
329
319
|
run_facets={
|
|
320
|
+
**get_task_parent_run_facet(parent_run_id=parent_run_id, parent_job_name=dag.dag_id),
|
|
330
321
|
**get_user_provided_run_facets(task_instance, TaskInstanceState.SUCCESS),
|
|
331
322
|
**get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
|
|
332
323
|
**get_airflow_debug_facet(),
|
|
@@ -366,8 +357,7 @@ class OpenLineageListener:
|
|
|
366
357
|
dagrun = context["dag_run"]
|
|
367
358
|
dag = context["dag"]
|
|
368
359
|
self._on_task_instance_failed(task_instance, dag, dagrun, task, error)
|
|
369
|
-
|
|
370
|
-
elif AIRFLOW_V_2_10_PLUS:
|
|
360
|
+
else:
|
|
371
361
|
|
|
372
362
|
@hookimpl
|
|
373
363
|
def on_task_instance_failed(
|
|
@@ -382,19 +372,6 @@ class OpenLineageListener:
|
|
|
382
372
|
if TYPE_CHECKING:
|
|
383
373
|
assert task
|
|
384
374
|
self._on_task_instance_failed(task_instance, task.dag, task_instance.dag_run, task, error)
|
|
385
|
-
else:
|
|
386
|
-
|
|
387
|
-
@hookimpl
|
|
388
|
-
def on_task_instance_failed(
|
|
389
|
-
self,
|
|
390
|
-
previous_state: TaskInstanceState,
|
|
391
|
-
task_instance: TaskInstance,
|
|
392
|
-
session: Session, # type: ignore[valid-type]
|
|
393
|
-
) -> None:
|
|
394
|
-
task = task_instance.task
|
|
395
|
-
if TYPE_CHECKING:
|
|
396
|
-
assert task
|
|
397
|
-
self._on_task_instance_failed(task_instance, task.dag, task_instance.dag_run, task)
|
|
398
375
|
|
|
399
376
|
def _on_task_instance_failed(
|
|
400
377
|
self,
|
|
@@ -456,12 +433,11 @@ class OpenLineageListener:
|
|
|
456
433
|
redacted_event = self.adapter.fail_task(
|
|
457
434
|
run_id=task_uuid,
|
|
458
435
|
job_name=get_job_name(task),
|
|
459
|
-
parent_job_name=dag.dag_id,
|
|
460
|
-
parent_run_id=parent_run_id,
|
|
461
436
|
end_time=end_date.isoformat(),
|
|
462
437
|
task=task_metadata,
|
|
463
438
|
error=error,
|
|
464
439
|
run_facets={
|
|
440
|
+
**get_task_parent_run_facet(parent_run_id=parent_run_id, parent_job_name=dag.dag_id),
|
|
465
441
|
**get_user_provided_run_facets(task_instance, TaskInstanceState.FAILED),
|
|
466
442
|
**get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
|
|
467
443
|
**get_airflow_debug_facet(),
|
|
@@ -504,11 +480,12 @@ class OpenLineageListener:
|
|
|
504
480
|
adapter_kwargs = {
|
|
505
481
|
"run_id": task_uuid,
|
|
506
482
|
"job_name": get_job_name(ti),
|
|
507
|
-
"parent_job_name": dagrun.dag_id,
|
|
508
|
-
"parent_run_id": parent_run_id,
|
|
509
483
|
"end_time": end_date.isoformat(),
|
|
510
484
|
"task": OperatorLineage(),
|
|
511
|
-
"run_facets":
|
|
485
|
+
"run_facets": {
|
|
486
|
+
**get_task_parent_run_facet(parent_run_id=parent_run_id, parent_job_name=ti.dag_id),
|
|
487
|
+
**get_airflow_debug_facet(),
|
|
488
|
+
},
|
|
512
489
|
}
|
|
513
490
|
|
|
514
491
|
if ti_state == TaskInstanceState.FAILED:
|
|
@@ -651,10 +628,7 @@ class OpenLineageListener:
|
|
|
651
628
|
self.log.debug("Executor have not started before `on_dag_run_success`")
|
|
652
629
|
return
|
|
653
630
|
|
|
654
|
-
|
|
655
|
-
task_ids = DagRun._get_partial_task_ids(dag_run.dag)
|
|
656
|
-
else:
|
|
657
|
-
task_ids = dag_run.dag.task_ids if dag_run.dag and dag_run.dag.partial else None
|
|
631
|
+
task_ids = DagRun._get_partial_task_ids(dag_run.dag)
|
|
658
632
|
|
|
659
633
|
date = dag_run.logical_date
|
|
660
634
|
if AIRFLOW_V_3_0_PLUS and date is None:
|
|
@@ -690,10 +664,7 @@ class OpenLineageListener:
|
|
|
690
664
|
self.log.debug("Executor have not started before `on_dag_run_failed`")
|
|
691
665
|
return
|
|
692
666
|
|
|
693
|
-
|
|
694
|
-
task_ids = DagRun._get_partial_task_ids(dag_run.dag)
|
|
695
|
-
else:
|
|
696
|
-
task_ids = dag_run.dag.task_ids if dag_run.dag and dag_run.dag.partial else None
|
|
667
|
+
task_ids = DagRun._get_partial_task_ids(dag_run.dag)
|
|
697
668
|
|
|
698
669
|
date = dag_run.logical_date
|
|
699
670
|
if AIRFLOW_V_3_0_PLUS and date is None:
|
|
@@ -59,25 +59,11 @@ def lineage_run_id(task_instance: TaskInstance):
|
|
|
59
59
|
For more information take a look at the guide:
|
|
60
60
|
:ref:`howto/macros:openlineage`
|
|
61
61
|
"""
|
|
62
|
-
if AIRFLOW_V_3_0_PLUS:
|
|
63
|
-
context = task_instance.get_template_context()
|
|
64
|
-
if hasattr(task_instance, "dag_run"):
|
|
65
|
-
dag_run = task_instance.dag_run
|
|
66
|
-
elif hasattr(context, "dag_run"):
|
|
67
|
-
dag_run = context["dag_run"]
|
|
68
|
-
if hasattr(dag_run, "logical_date") and dag_run.logical_date:
|
|
69
|
-
date = dag_run.logical_date
|
|
70
|
-
else:
|
|
71
|
-
date = dag_run.run_after
|
|
72
|
-
elif hasattr(task_instance, "logical_date"):
|
|
73
|
-
date = task_instance.logical_date
|
|
74
|
-
else:
|
|
75
|
-
date = task_instance.execution_date
|
|
76
62
|
return OpenLineageAdapter.build_task_instance_run_id(
|
|
77
63
|
dag_id=task_instance.dag_id,
|
|
78
64
|
task_id=task_instance.task_id,
|
|
79
65
|
try_number=task_instance.try_number,
|
|
80
|
-
logical_date=
|
|
66
|
+
logical_date=_get_logical_date(task_instance),
|
|
81
67
|
map_index=task_instance.map_index,
|
|
82
68
|
)
|
|
83
69
|
|
|
@@ -101,3 +87,44 @@ def lineage_parent_id(task_instance: TaskInstance):
|
|
|
101
87
|
lineage_run_id(task_instance),
|
|
102
88
|
)
|
|
103
89
|
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def lineage_root_parent_id(task_instance: TaskInstance):
|
|
93
|
+
return "/".join(
|
|
94
|
+
(
|
|
95
|
+
lineage_job_namespace(),
|
|
96
|
+
lineage_root_job_name(task_instance),
|
|
97
|
+
lineage_root_run_id(task_instance),
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def lineage_root_job_name(task_instance: TaskInstance):
|
|
103
|
+
return task_instance.dag_id
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def lineage_root_run_id(task_instance: TaskInstance):
|
|
107
|
+
return OpenLineageAdapter.build_dag_run_id(
|
|
108
|
+
dag_id=task_instance.dag_id,
|
|
109
|
+
logical_date=_get_logical_date(task_instance),
|
|
110
|
+
clear_number=task_instance.dag_run.clear_number,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _get_logical_date(task_instance):
|
|
115
|
+
# todo: remove when min airflow version >= 3.0
|
|
116
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
117
|
+
context = task_instance.get_template_context()
|
|
118
|
+
if hasattr(task_instance, "dag_run"):
|
|
119
|
+
dag_run = task_instance.dag_run
|
|
120
|
+
elif hasattr(context, "dag_run"):
|
|
121
|
+
dag_run = context["dag_run"]
|
|
122
|
+
if hasattr(dag_run, "logical_date") and dag_run.logical_date:
|
|
123
|
+
date = dag_run.logical_date
|
|
124
|
+
else:
|
|
125
|
+
date = dag_run.run_after
|
|
126
|
+
elif hasattr(task_instance, "logical_date"):
|
|
127
|
+
date = task_instance.logical_date
|
|
128
|
+
else:
|
|
129
|
+
date = task_instance.execution_date
|
|
130
|
+
return date
|
|
@@ -25,7 +25,6 @@ from airflow.providers.openlineage.plugins.macros import (
|
|
|
25
25
|
lineage_parent_id,
|
|
26
26
|
lineage_run_id,
|
|
27
27
|
)
|
|
28
|
-
from airflow.providers.openlineage.version_compat import AIRFLOW_V_2_10_PLUS
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
class OpenLineageProviderPlugin(AirflowPlugin):
|
|
@@ -40,10 +39,9 @@ class OpenLineageProviderPlugin(AirflowPlugin):
|
|
|
40
39
|
if not conf.is_disabled():
|
|
41
40
|
macros = [lineage_job_namespace, lineage_job_name, lineage_run_id, lineage_parent_id]
|
|
42
41
|
listeners = [get_openlineage_listener()]
|
|
43
|
-
|
|
44
|
-
from airflow.lineage.hook import HookLineageReader
|
|
42
|
+
from airflow.lineage.hook import HookLineageReader
|
|
45
43
|
|
|
46
|
-
|
|
44
|
+
hook_lineage_readers = [HookLineageReader]
|
|
47
45
|
else:
|
|
48
46
|
macros = []
|
|
49
47
|
listeners = []
|
|
@@ -24,6 +24,8 @@ from airflow.providers.openlineage.plugins.listener import get_openlineage_liste
|
|
|
24
24
|
from airflow.providers.openlineage.plugins.macros import (
|
|
25
25
|
lineage_job_name,
|
|
26
26
|
lineage_job_namespace,
|
|
27
|
+
lineage_root_job_name,
|
|
28
|
+
lineage_root_run_id,
|
|
27
29
|
lineage_run_id,
|
|
28
30
|
)
|
|
29
31
|
|
|
@@ -48,6 +50,9 @@ def _get_parent_job_information_as_spark_properties(context: Context) -> dict:
|
|
|
48
50
|
"spark.openlineage.parentJobNamespace": lineage_job_namespace(),
|
|
49
51
|
"spark.openlineage.parentJobName": lineage_job_name(ti), # type: ignore[arg-type]
|
|
50
52
|
"spark.openlineage.parentRunId": lineage_run_id(ti), # type: ignore[arg-type]
|
|
53
|
+
"spark.openlineage.rootParentRunId": lineage_root_run_id(ti), # type: ignore[arg-type]
|
|
54
|
+
"spark.openlineage.rootParentJobName": lineage_root_job_name(ti), # type: ignore[arg-type]
|
|
55
|
+
"spark.openlineage.rootParentJobNamespace": lineage_job_namespace(),
|
|
51
56
|
}
|
|
52
57
|
|
|
53
58
|
|
|
@@ -26,6 +26,7 @@ from importlib import metadata
|
|
|
26
26
|
from typing import TYPE_CHECKING, Any, Callable
|
|
27
27
|
|
|
28
28
|
import attrs
|
|
29
|
+
from openlineage.client.facet_v2 import parent_run
|
|
29
30
|
from openlineage.client.utils import RedactMixin
|
|
30
31
|
|
|
31
32
|
from airflow import __version__ as AIRFLOW_VERSION
|
|
@@ -50,7 +51,7 @@ from airflow.providers.openlineage.utils.selective_enable import (
|
|
|
50
51
|
is_dag_lineage_enabled,
|
|
51
52
|
is_task_lineage_enabled,
|
|
52
53
|
)
|
|
53
|
-
from airflow.providers.openlineage.version_compat import
|
|
54
|
+
from airflow.providers.openlineage.version_compat import AIRFLOW_V_3_0_PLUS
|
|
54
55
|
from airflow.sensors.base import BaseSensorOperator
|
|
55
56
|
from airflow.serialization.serialized_objects import SerializedBaseOperator
|
|
56
57
|
from airflow.utils.module_loading import import_string
|
|
@@ -126,6 +127,27 @@ def get_job_name(task: TaskInstance) -> str:
|
|
|
126
127
|
return f"{task.dag_id}.{task.task_id}"
|
|
127
128
|
|
|
128
129
|
|
|
130
|
+
def get_task_parent_run_facet(
|
|
131
|
+
parent_run_id: str, parent_job_name: str, parent_job_namespace: str = conf.namespace()
|
|
132
|
+
) -> dict[str, Any]:
|
|
133
|
+
"""
|
|
134
|
+
Retrieve the parent run facet for task-level events.
|
|
135
|
+
|
|
136
|
+
This facet currently always points to the DAG-level run ID and name,
|
|
137
|
+
as external events for DAG runs are not yet handled.
|
|
138
|
+
"""
|
|
139
|
+
return {
|
|
140
|
+
"parent": parent_run.ParentRunFacet(
|
|
141
|
+
run=parent_run.Run(runId=parent_run_id),
|
|
142
|
+
job=parent_run.Job(namespace=parent_job_namespace, name=parent_job_name),
|
|
143
|
+
root=parent_run.Root(
|
|
144
|
+
run=parent_run.RootRun(runId=parent_run_id),
|
|
145
|
+
job=parent_run.RootJob(namespace=parent_job_namespace, name=parent_job_name),
|
|
146
|
+
),
|
|
147
|
+
)
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
|
|
129
151
|
def get_airflow_mapped_task_facet(task_instance: TaskInstance) -> dict[str, Any]:
|
|
130
152
|
# check for -1 comes from SmartSensor compatibility with dynamic task mapping
|
|
131
153
|
# this comes from Airflow code
|
|
@@ -384,7 +406,13 @@ class DagRunInfo(InfoJsonEncodable):
|
|
|
384
406
|
"end_date",
|
|
385
407
|
]
|
|
386
408
|
|
|
387
|
-
casts = {
|
|
409
|
+
casts = {
|
|
410
|
+
"duration": lambda dagrun: DagRunInfo.duration(dagrun),
|
|
411
|
+
"dag_bundle_name": lambda dagrun: DagRunInfo.dag_version_info(dagrun, "bundle_name"),
|
|
412
|
+
"dag_bundle_version": lambda dagrun: DagRunInfo.dag_version_info(dagrun, "bundle_version"),
|
|
413
|
+
"dag_version_id": lambda dagrun: DagRunInfo.dag_version_info(dagrun, "version_id"),
|
|
414
|
+
"dag_version_number": lambda dagrun: DagRunInfo.dag_version_info(dagrun, "version_number"),
|
|
415
|
+
}
|
|
388
416
|
|
|
389
417
|
@classmethod
|
|
390
418
|
def duration(cls, dagrun: DagRun) -> float | None:
|
|
@@ -394,15 +422,33 @@ class DagRunInfo(InfoJsonEncodable):
|
|
|
394
422
|
return None
|
|
395
423
|
return (dagrun.end_date - dagrun.start_date).total_seconds()
|
|
396
424
|
|
|
425
|
+
@classmethod
|
|
426
|
+
def dag_version_info(cls, dagrun: DagRun, key: str) -> str | int | None:
|
|
427
|
+
# AF2 DagRun and AF3 DagRun SDK model (on worker) do not have this information
|
|
428
|
+
if not getattr(dagrun, "dag_versions", []):
|
|
429
|
+
return None
|
|
430
|
+
current_version = dagrun.dag_versions[-1]
|
|
431
|
+
if key == "bundle_name":
|
|
432
|
+
return current_version.bundle_name
|
|
433
|
+
if key == "bundle_version":
|
|
434
|
+
return current_version.bundle_version
|
|
435
|
+
if key == "version_id":
|
|
436
|
+
return str(current_version.id)
|
|
437
|
+
if key == "version_number":
|
|
438
|
+
return current_version.version_number
|
|
439
|
+
raise ValueError(f"Unsupported key: {key}`")
|
|
440
|
+
|
|
397
441
|
|
|
398
442
|
class TaskInstanceInfo(InfoJsonEncodable):
|
|
399
443
|
"""Defines encoding TaskInstance object to JSON."""
|
|
400
444
|
|
|
401
445
|
includes = ["duration", "try_number", "pool", "queued_dttm", "log_url"]
|
|
402
446
|
casts = {
|
|
403
|
-
"map_index": lambda ti: (
|
|
404
|
-
|
|
405
|
-
|
|
447
|
+
"map_index": lambda ti: ti.map_index if getattr(ti, "map_index", -1) != -1 else None,
|
|
448
|
+
"dag_bundle_version": lambda ti: (
|
|
449
|
+
ti.bundle_instance.version if hasattr(ti, "bundle_instance") else None
|
|
450
|
+
),
|
|
451
|
+
"dag_bundle_name": lambda ti: ti.bundle_instance.name if hasattr(ti, "bundle_instance") else None,
|
|
406
452
|
}
|
|
407
453
|
|
|
408
454
|
|
|
@@ -779,12 +825,6 @@ def get_filtered_unknown_operator_keys(operator: BaseOperator) -> dict:
|
|
|
779
825
|
|
|
780
826
|
def should_use_external_connection(hook) -> bool:
|
|
781
827
|
# If we're at Airflow 2.10, the execution is process-isolated, so we can safely run those again.
|
|
782
|
-
if not AIRFLOW_V_2_10_PLUS:
|
|
783
|
-
return hook.__class__.__name__ not in [
|
|
784
|
-
"SnowflakeHook",
|
|
785
|
-
"SnowflakeSqlApiHook",
|
|
786
|
-
"RedshiftSQLHook",
|
|
787
|
-
]
|
|
788
828
|
return True
|
|
789
829
|
|
|
790
830
|
|
|
@@ -32,5 +32,4 @@ def get_base_airflow_version_tuple() -> tuple[int, int, int]:
|
|
|
32
32
|
return airflow_version.major, airflow_version.minor, airflow_version.micro
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
AIRFLOW_V_2_10_PLUS = get_base_airflow_version_tuple() >= (2, 10, 0)
|
|
36
35
|
AIRFLOW_V_3_0_PLUS = get_base_airflow_version_tuple() >= (3, 0, 0)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0rc1
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -20,15 +20,15 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.11
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Topic :: System :: Monitoring
|
|
23
|
-
Requires-Dist: apache-airflow>=2.
|
|
24
|
-
Requires-Dist: apache-airflow-providers-common-sql>=1.20.
|
|
25
|
-
Requires-Dist: apache-airflow-providers-common-compat>=1.4.
|
|
23
|
+
Requires-Dist: apache-airflow>=2.10.0rc1
|
|
24
|
+
Requires-Dist: apache-airflow-providers-common-sql>=1.20.0rc1
|
|
25
|
+
Requires-Dist: apache-airflow-providers-common-compat>=1.4.0rc1
|
|
26
26
|
Requires-Dist: attrs>=22.2
|
|
27
27
|
Requires-Dist: openlineage-integration-common>=1.31.0
|
|
28
28
|
Requires-Dist: openlineage-python>=1.31.0
|
|
29
29
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
30
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.
|
|
31
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.
|
|
30
|
+
Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-openlineage/2.3.0/changelog.html
|
|
31
|
+
Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-openlineage/2.3.0
|
|
32
32
|
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
33
33
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
34
34
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
@@ -59,7 +59,7 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
|
59
59
|
|
|
60
60
|
Package ``apache-airflow-providers-openlineage``
|
|
61
61
|
|
|
62
|
-
Release: ``2.
|
|
62
|
+
Release: ``2.3.0``
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -72,7 +72,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
72
72
|
are in ``airflow.providers.openlineage`` python package.
|
|
73
73
|
|
|
74
74
|
You can find package information and changelog for the provider
|
|
75
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.
|
|
75
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.3.0/>`_.
|
|
76
76
|
|
|
77
77
|
Installation
|
|
78
78
|
------------
|
|
@@ -89,7 +89,7 @@ Requirements
|
|
|
89
89
|
========================================== ==================
|
|
90
90
|
PIP package Version required
|
|
91
91
|
========================================== ==================
|
|
92
|
-
``apache-airflow`` ``>=2.
|
|
92
|
+
``apache-airflow`` ``>=2.10.0``
|
|
93
93
|
``apache-airflow-providers-common-sql`` ``>=1.20.0``
|
|
94
94
|
``apache-airflow-providers-common-compat`` ``>=1.4.0``
|
|
95
95
|
``attrs`` ``>=22.2``
|
|
@@ -118,5 +118,5 @@ Dependent package
|
|
|
118
118
|
================================================================================================================== =================
|
|
119
119
|
|
|
120
120
|
The changelog for the provider package can be found in the
|
|
121
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.
|
|
121
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.3.0/changelog.html>`_.
|
|
122
122
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
airflow/providers/openlineage/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
|
|
2
|
-
airflow/providers/openlineage/__init__.py,sha256=
|
|
2
|
+
airflow/providers/openlineage/__init__.py,sha256=Wcw1Qb20o0-NwmxTMURRAqxVRr3y9F5hmzHOJfdaqiI,1500
|
|
3
3
|
airflow/providers/openlineage/conf.py,sha256=aYdLU7iHBdGIU8ZAC5iUiIDgXP9gvP9r_z5hTAbXPOU,5535
|
|
4
4
|
airflow/providers/openlineage/get_provider_info.py,sha256=108mAg-tdcTBmPYwJ9M3wcmhc-i1bAs5OB2MLnSBA-4,9256
|
|
5
5
|
airflow/providers/openlineage/sqlparser.py,sha256=N38XhkU-lxwxnYevQpq63JOBi4rzp0q56JjxO3H24W8,20340
|
|
6
|
-
airflow/providers/openlineage/version_compat.py,sha256=
|
|
6
|
+
airflow/providers/openlineage/version_compat.py,sha256=j5PCtXvZ71aBjixu-EFTNtVDPsngzzs7os0ZQDgFVDk,1536
|
|
7
7
|
airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
|
|
8
|
-
airflow/providers/openlineage/extractors/base.py,sha256=
|
|
8
|
+
airflow/providers/openlineage/extractors/base.py,sha256=d_PNXn5mo_yz7VSm5YSTiNQYzq2BJIaJD45INgRWNyQ,6477
|
|
9
9
|
airflow/providers/openlineage/extractors/bash.py,sha256=3aR0PXs8fzRLibRxXN1R8wMZnGzyCur7mjpy8e5GC4A,2583
|
|
10
10
|
airflow/providers/openlineage/extractors/manager.py,sha256=kVo5OHkpUoYIiT2RvwUt128jC_Q4EosdLC0sP1YfIH0,12840
|
|
11
11
|
airflow/providers/openlineage/extractors/python.py,sha256=hVWOplMlBimrpPKPeW6vm75a8OmAYMU1oJzqMz8Jh90,3171
|
|
@@ -16,17 +16,17 @@ airflow/providers/openlineage/facets/AirflowRunFacet.json,sha256=70mEaZShgSJp-2x
|
|
|
16
16
|
airflow/providers/openlineage/facets/AirflowStateRunFacet.json,sha256=xhHQEKD9Jopw-oqbkCCrrwFjfXnxvuJAritsmegKjuQ,937
|
|
17
17
|
airflow/providers/openlineage/facets/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
18
18
|
airflow/providers/openlineage/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
19
|
-
airflow/providers/openlineage/plugins/adapter.py,sha256=
|
|
19
|
+
airflow/providers/openlineage/plugins/adapter.py,sha256=4ylPzbIDR9i3a3NuPAOS8HplExRu3MlUo9t-fLJH-vc,19404
|
|
20
20
|
airflow/providers/openlineage/plugins/facets.py,sha256=VvyMYR6ONkC95q5FdNmohv0scbA1Ej_B5cQ97as5GvA,4161
|
|
21
|
-
airflow/providers/openlineage/plugins/listener.py,sha256=
|
|
22
|
-
airflow/providers/openlineage/plugins/macros.py,sha256=
|
|
23
|
-
airflow/providers/openlineage/plugins/openlineage.py,sha256=
|
|
21
|
+
airflow/providers/openlineage/plugins/listener.py,sha256=vcQcOczTiLTGepmIAZPTWkk5ysh1xR5fa-WvG2TYy1w,27988
|
|
22
|
+
airflow/providers/openlineage/plugins/macros.py,sha256=uAN9UBbWE_PvNLNf4YftGap0ovJaf8LHGqO3KnAVOP4,4455
|
|
23
|
+
airflow/providers/openlineage/plugins/openlineage.py,sha256=_vLa0x5mvt8ZkA7baI39PXxkYhzvdX9um9wB9qBGAik,1789
|
|
24
24
|
airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
25
25
|
airflow/providers/openlineage/utils/selective_enable.py,sha256=YyrUQ7Djv5o46XdH83N_G8AXAZ9C_aKPa534pbNVp08,3441
|
|
26
|
-
airflow/providers/openlineage/utils/spark.py,sha256=
|
|
26
|
+
airflow/providers/openlineage/utils/spark.py,sha256=X5liLxVLgQcgPF_0lFtQULeMOv_9dGj-HFjtZvWFgOo,7626
|
|
27
27
|
airflow/providers/openlineage/utils/sql.py,sha256=vkKrrdENEMVG8gtzV6yuTXMa2Z9fBAEXmxDVIDaVncI,9571
|
|
28
|
-
airflow/providers/openlineage/utils/utils.py,sha256=
|
|
29
|
-
apache_airflow_providers_openlineage-2.
|
|
30
|
-
apache_airflow_providers_openlineage-2.
|
|
31
|
-
apache_airflow_providers_openlineage-2.
|
|
32
|
-
apache_airflow_providers_openlineage-2.
|
|
28
|
+
airflow/providers/openlineage/utils/utils.py,sha256=DFO462SxSnZv3lOapoZYkiC7j1PrMtWDw0LZeqKKxIo,31368
|
|
29
|
+
apache_airflow_providers_openlineage-2.3.0rc1.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
|
|
30
|
+
apache_airflow_providers_openlineage-2.3.0rc1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
31
|
+
apache_airflow_providers_openlineage-2.3.0rc1.dist-info/METADATA,sha256=fbadl3jk_33JmU1dltvUS3wXyWDce8_taMLHboOMXJQ,5713
|
|
32
|
+
apache_airflow_providers_openlineage-2.3.0rc1.dist-info/RECORD,,
|
|
File without changes
|