apache-airflow-providers-openlineage 2.3.0rc1__py3-none-any.whl → 2.4.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.
- airflow/providers/openlineage/__init__.py +1 -1
- airflow/providers/openlineage/get_provider_info.py +1 -1
- airflow/providers/openlineage/plugins/adapter.py +125 -55
- airflow/providers/openlineage/plugins/listener.py +57 -13
- airflow/providers/openlineage/plugins/macros.py +11 -1
- airflow/providers/openlineage/plugins/openlineage.py +12 -1
- airflow/providers/openlineage/utils/utils.py +5 -2
- {apache_airflow_providers_openlineage-2.3.0rc1.dist-info → apache_airflow_providers_openlineage-2.4.0rc1.dist-info}/METADATA +6 -6
- {apache_airflow_providers_openlineage-2.3.0rc1.dist-info → apache_airflow_providers_openlineage-2.4.0rc1.dist-info}/RECORD +11 -11
- {apache_airflow_providers_openlineage-2.3.0rc1.dist-info → apache_airflow_providers_openlineage-2.4.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_openlineage-2.3.0rc1.dist-info → apache_airflow_providers_openlineage-2.4.0rc1.dist-info}/entry_points.txt +0 -0
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "2.
|
|
32
|
+
__version__ = "2.4.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.10.0"
|
|
@@ -42,7 +42,7 @@ def get_provider_info():
|
|
|
42
42
|
],
|
|
43
43
|
"config": {
|
|
44
44
|
"openlineage": {
|
|
45
|
-
"description": "This section applies settings for OpenLineage integration.\nMore about configuration and
|
|
45
|
+
"description": "This section applies settings for OpenLineage integration.\nMore about configuration and its precedence can be found in the `user's guide\n<https://airflow.apache.org/docs/apache-airflow-providers-openlineage/stable/guides/user.html#transport-setup>`_.\n",
|
|
46
46
|
"options": {
|
|
47
47
|
"disabled": {
|
|
48
48
|
"description": "Disable sending events without uninstalling the OpenLineage Provider by setting this to true.\n",
|
|
@@ -19,7 +19,7 @@ from __future__ import annotations
|
|
|
19
19
|
import os
|
|
20
20
|
import traceback
|
|
21
21
|
from contextlib import ExitStack
|
|
22
|
-
from typing import TYPE_CHECKING
|
|
22
|
+
from typing import TYPE_CHECKING, Literal
|
|
23
23
|
|
|
24
24
|
import yaml
|
|
25
25
|
from openlineage.client import OpenLineageClient, set_producer
|
|
@@ -32,7 +32,7 @@ from openlineage.client.facet_v2 import (
|
|
|
32
32
|
job_type_job,
|
|
33
33
|
nominal_time_run,
|
|
34
34
|
ownership_job,
|
|
35
|
-
|
|
35
|
+
tags_job,
|
|
36
36
|
)
|
|
37
37
|
from openlineage.client.uuid import generate_static_uuid
|
|
38
38
|
|
|
@@ -63,11 +63,8 @@ _PRODUCER = f"https://github.com/apache/airflow/tree/providers-openlineage/{OPEN
|
|
|
63
63
|
|
|
64
64
|
set_producer(_PRODUCER)
|
|
65
65
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
# otherwise the `JobTypeJobFacet._producer` will be set with the default value
|
|
69
|
-
_JOB_TYPE_DAG = job_type_job.JobTypeJobFacet(jobType="DAG", integration="AIRFLOW", processingType="BATCH")
|
|
70
|
-
_JOB_TYPE_TASK = job_type_job.JobTypeJobFacet(jobType="TASK", integration="AIRFLOW", processingType="BATCH")
|
|
66
|
+
_JOB_TYPE_DAG: Literal["DAG"] = "DAG"
|
|
67
|
+
_JOB_TYPE_TASK: Literal["TASK"] = "TASK"
|
|
71
68
|
|
|
72
69
|
|
|
73
70
|
class OpenLineageAdapter(LoggingMixin):
|
|
@@ -187,10 +184,10 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
187
184
|
job_name: str,
|
|
188
185
|
job_description: str,
|
|
189
186
|
event_time: str,
|
|
190
|
-
code_location: str | None,
|
|
191
187
|
nominal_start_time: str | None,
|
|
192
188
|
nominal_end_time: str | None,
|
|
193
|
-
owners: list[str],
|
|
189
|
+
owners: list[str] | None,
|
|
190
|
+
tags: list[str] | None,
|
|
194
191
|
task: OperatorLineage | None,
|
|
195
192
|
run_facets: dict[str, RunFacet] | None = None,
|
|
196
193
|
) -> RunEvent:
|
|
@@ -201,17 +198,16 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
201
198
|
:param job_name: globally unique identifier of task in dag
|
|
202
199
|
:param job_description: user provided description of job
|
|
203
200
|
:param event_time:
|
|
204
|
-
:param code_location: file path or URL of DAG file
|
|
205
201
|
:param nominal_start_time: scheduled time of dag run
|
|
206
202
|
:param nominal_end_time: following schedule of dag run
|
|
207
|
-
:param owners: list of owners
|
|
203
|
+
:param owners: list of owners
|
|
204
|
+
:param tags: list of tags
|
|
208
205
|
:param task: metadata container with information extracted from operator
|
|
209
206
|
:param run_facets: custom run facets
|
|
210
207
|
"""
|
|
211
208
|
run_facets = run_facets or {}
|
|
212
209
|
if task:
|
|
213
210
|
run_facets = {**task.run_facets, **run_facets}
|
|
214
|
-
run_facets = {**run_facets, **get_processing_engine_facet()} # type: ignore
|
|
215
211
|
event = RunEvent(
|
|
216
212
|
eventType=RunState.START,
|
|
217
213
|
eventTime=event_time,
|
|
@@ -225,8 +221,8 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
225
221
|
job_name=job_name,
|
|
226
222
|
job_type=_JOB_TYPE_TASK,
|
|
227
223
|
job_description=job_description,
|
|
228
|
-
|
|
229
|
-
|
|
224
|
+
job_owners=owners,
|
|
225
|
+
job_tags=tags,
|
|
230
226
|
job_facets=task.job_facets if task else None,
|
|
231
227
|
),
|
|
232
228
|
inputs=task.inputs if task else [],
|
|
@@ -241,6 +237,10 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
241
237
|
job_name: str,
|
|
242
238
|
end_time: str,
|
|
243
239
|
task: OperatorLineage,
|
|
240
|
+
nominal_start_time: str | None,
|
|
241
|
+
nominal_end_time: str | None,
|
|
242
|
+
owners: list[str] | None,
|
|
243
|
+
tags: list[str] | None,
|
|
244
244
|
run_facets: dict[str, RunFacet] | None = None,
|
|
245
245
|
) -> RunEvent:
|
|
246
246
|
"""
|
|
@@ -249,21 +249,32 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
249
249
|
:param run_id: globally unique identifier of task in dag run
|
|
250
250
|
:param job_name: globally unique identifier of task between dags
|
|
251
251
|
:param end_time: time of task completion
|
|
252
|
+
:param tags: list of tags
|
|
253
|
+
:param nominal_start_time: scheduled time of dag run
|
|
254
|
+
:param nominal_end_time: following schedule of dag run
|
|
252
255
|
:param task: metadata container with information extracted from operator
|
|
256
|
+
:param owners: list of owners
|
|
253
257
|
:param run_facets: additional run facets
|
|
254
258
|
"""
|
|
255
259
|
run_facets = run_facets or {}
|
|
256
260
|
if task:
|
|
257
261
|
run_facets = {**task.run_facets, **run_facets}
|
|
258
|
-
run_facets = {**run_facets, **get_processing_engine_facet()} # type: ignore
|
|
259
262
|
event = RunEvent(
|
|
260
263
|
eventType=RunState.COMPLETE,
|
|
261
264
|
eventTime=end_time,
|
|
262
265
|
run=self._build_run(
|
|
263
266
|
run_id=run_id,
|
|
267
|
+
nominal_start_time=nominal_start_time,
|
|
268
|
+
nominal_end_time=nominal_end_time,
|
|
264
269
|
run_facets=run_facets,
|
|
265
270
|
),
|
|
266
|
-
job=self._build_job(
|
|
271
|
+
job=self._build_job(
|
|
272
|
+
job_name,
|
|
273
|
+
job_type=_JOB_TYPE_TASK,
|
|
274
|
+
job_facets=task.job_facets,
|
|
275
|
+
job_owners=owners,
|
|
276
|
+
job_tags=tags,
|
|
277
|
+
),
|
|
267
278
|
inputs=task.inputs,
|
|
268
279
|
outputs=task.outputs,
|
|
269
280
|
producer=_PRODUCER,
|
|
@@ -276,6 +287,10 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
276
287
|
job_name: str,
|
|
277
288
|
end_time: str,
|
|
278
289
|
task: OperatorLineage,
|
|
290
|
+
nominal_start_time: str | None,
|
|
291
|
+
nominal_end_time: str | None,
|
|
292
|
+
owners: list[str] | None,
|
|
293
|
+
tags: list[str] | None,
|
|
279
294
|
error: str | BaseException | None = None,
|
|
280
295
|
run_facets: dict[str, RunFacet] | None = None,
|
|
281
296
|
) -> RunEvent:
|
|
@@ -287,13 +302,16 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
287
302
|
:param end_time: time of task completion
|
|
288
303
|
:param task: metadata container with information extracted from operator
|
|
289
304
|
:param run_facets: custom run facets
|
|
305
|
+
:param tags: list of tags
|
|
306
|
+
:param nominal_start_time: scheduled time of dag run
|
|
307
|
+
:param nominal_end_time: following schedule of dag run
|
|
308
|
+
:param owners: list of owners
|
|
290
309
|
:param error: error
|
|
291
310
|
:param run_facets: additional run facets
|
|
292
311
|
"""
|
|
293
312
|
run_facets = run_facets or {}
|
|
294
313
|
if task:
|
|
295
314
|
run_facets = {**task.run_facets, **run_facets}
|
|
296
|
-
run_facets = {**run_facets, **get_processing_engine_facet()} # type: ignore
|
|
297
315
|
|
|
298
316
|
if error:
|
|
299
317
|
stack_trace = None
|
|
@@ -310,9 +328,17 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
310
328
|
eventTime=end_time,
|
|
311
329
|
run=self._build_run(
|
|
312
330
|
run_id=run_id,
|
|
331
|
+
nominal_start_time=nominal_start_time,
|
|
332
|
+
nominal_end_time=nominal_end_time,
|
|
313
333
|
run_facets=run_facets,
|
|
314
334
|
),
|
|
315
|
-
job=self._build_job(
|
|
335
|
+
job=self._build_job(
|
|
336
|
+
job_name,
|
|
337
|
+
job_type=_JOB_TYPE_TASK,
|
|
338
|
+
job_facets=task.job_facets,
|
|
339
|
+
job_owners=owners,
|
|
340
|
+
job_tags=tags,
|
|
341
|
+
),
|
|
316
342
|
inputs=task.inputs,
|
|
317
343
|
outputs=task.outputs,
|
|
318
344
|
producer=_PRODUCER,
|
|
@@ -324,9 +350,10 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
324
350
|
dag_id: str,
|
|
325
351
|
logical_date: datetime,
|
|
326
352
|
start_date: datetime,
|
|
327
|
-
nominal_start_time: str,
|
|
328
|
-
nominal_end_time: str,
|
|
329
|
-
owners: list[str],
|
|
353
|
+
nominal_start_time: str | None,
|
|
354
|
+
nominal_end_time: str | None,
|
|
355
|
+
owners: list[str] | None,
|
|
356
|
+
tags: list[str],
|
|
330
357
|
run_facets: dict[str, RunFacet],
|
|
331
358
|
clear_number: int,
|
|
332
359
|
description: str | None = None,
|
|
@@ -340,8 +367,9 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
340
367
|
job_name=dag_id,
|
|
341
368
|
job_type=_JOB_TYPE_DAG,
|
|
342
369
|
job_description=description,
|
|
343
|
-
|
|
370
|
+
job_owners=owners,
|
|
344
371
|
job_facets=job_facets,
|
|
372
|
+
job_tags=tags,
|
|
345
373
|
),
|
|
346
374
|
run=self._build_run(
|
|
347
375
|
run_id=self.build_dag_run_id(
|
|
@@ -349,7 +377,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
349
377
|
),
|
|
350
378
|
nominal_start_time=nominal_start_time,
|
|
351
379
|
nominal_end_time=nominal_end_time,
|
|
352
|
-
run_facets={**run_facets, **get_airflow_debug_facet()
|
|
380
|
+
run_facets={**run_facets, **get_airflow_debug_facet()},
|
|
353
381
|
),
|
|
354
382
|
inputs=[],
|
|
355
383
|
outputs=[],
|
|
@@ -368,24 +396,34 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
368
396
|
run_id: str,
|
|
369
397
|
end_date: datetime,
|
|
370
398
|
logical_date: datetime,
|
|
399
|
+
nominal_start_time: str | None,
|
|
400
|
+
nominal_end_time: str | None,
|
|
401
|
+
tags: list[str] | None,
|
|
371
402
|
clear_number: int,
|
|
372
403
|
dag_run_state: DagRunState,
|
|
373
404
|
task_ids: list[str],
|
|
405
|
+
owners: list[str] | None,
|
|
374
406
|
run_facets: dict[str, RunFacet],
|
|
375
407
|
):
|
|
376
408
|
try:
|
|
377
409
|
event = RunEvent(
|
|
378
410
|
eventType=RunState.COMPLETE,
|
|
379
411
|
eventTime=end_date.isoformat(),
|
|
380
|
-
job=self._build_job(
|
|
381
|
-
|
|
382
|
-
|
|
412
|
+
job=self._build_job(
|
|
413
|
+
job_name=dag_id,
|
|
414
|
+
job_type=_JOB_TYPE_DAG,
|
|
415
|
+
job_owners=owners,
|
|
416
|
+
job_tags=tags,
|
|
417
|
+
),
|
|
418
|
+
run=self._build_run(
|
|
419
|
+
run_id=self.build_dag_run_id(
|
|
383
420
|
dag_id=dag_id, logical_date=logical_date, clear_number=clear_number
|
|
384
421
|
),
|
|
385
|
-
|
|
422
|
+
nominal_start_time=nominal_start_time,
|
|
423
|
+
nominal_end_time=nominal_end_time,
|
|
424
|
+
run_facets={
|
|
386
425
|
**get_airflow_state_run_facet(dag_id, run_id, task_ids, dag_run_state),
|
|
387
426
|
**get_airflow_debug_facet(),
|
|
388
|
-
**get_processing_engine_facet(),
|
|
389
427
|
**run_facets,
|
|
390
428
|
},
|
|
391
429
|
),
|
|
@@ -406,9 +444,13 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
406
444
|
run_id: str,
|
|
407
445
|
end_date: datetime,
|
|
408
446
|
logical_date: datetime,
|
|
447
|
+
nominal_start_time: str | None,
|
|
448
|
+
nominal_end_time: str | None,
|
|
449
|
+
tags: list[str] | None,
|
|
409
450
|
clear_number: int,
|
|
410
451
|
dag_run_state: DagRunState,
|
|
411
452
|
task_ids: list[str],
|
|
453
|
+
owners: list[str] | None,
|
|
412
454
|
msg: str,
|
|
413
455
|
run_facets: dict[str, RunFacet],
|
|
414
456
|
):
|
|
@@ -416,20 +458,24 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
416
458
|
event = RunEvent(
|
|
417
459
|
eventType=RunState.FAIL,
|
|
418
460
|
eventTime=end_date.isoformat(),
|
|
419
|
-
job=self._build_job(
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
461
|
+
job=self._build_job(
|
|
462
|
+
job_name=dag_id,
|
|
463
|
+
job_type=_JOB_TYPE_DAG,
|
|
464
|
+
job_owners=owners,
|
|
465
|
+
job_tags=tags,
|
|
466
|
+
),
|
|
467
|
+
run=self._build_run(
|
|
468
|
+
run_id=self.build_dag_run_id(
|
|
469
|
+
dag_id=dag_id, logical_date=logical_date, clear_number=clear_number
|
|
425
470
|
),
|
|
426
|
-
|
|
471
|
+
nominal_start_time=nominal_start_time,
|
|
472
|
+
nominal_end_time=nominal_end_time,
|
|
473
|
+
run_facets={
|
|
427
474
|
"errorMessage": error_message_run.ErrorMessageRunFacet(
|
|
428
475
|
message=msg, programmingLanguage="python"
|
|
429
476
|
),
|
|
430
477
|
**get_airflow_state_run_facet(dag_id, run_id, task_ids, dag_run_state),
|
|
431
478
|
**get_airflow_debug_facet(),
|
|
432
|
-
**get_processing_engine_facet(),
|
|
433
479
|
**run_facets,
|
|
434
480
|
},
|
|
435
481
|
),
|
|
@@ -452,49 +498,73 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
452
498
|
run_facets: dict[str, RunFacet] | None = None,
|
|
453
499
|
) -> Run:
|
|
454
500
|
facets: dict[str, RunFacet] = {}
|
|
501
|
+
if run_facets:
|
|
502
|
+
facets.update(run_facets)
|
|
455
503
|
if nominal_start_time:
|
|
456
504
|
facets.update(
|
|
457
|
-
{
|
|
505
|
+
{
|
|
506
|
+
"nominalTime": nominal_time_run.NominalTimeRunFacet(
|
|
507
|
+
nominalStartTime=nominal_start_time,
|
|
508
|
+
nominalEndTime=nominal_end_time,
|
|
509
|
+
producer=_PRODUCER,
|
|
510
|
+
)
|
|
511
|
+
}
|
|
458
512
|
)
|
|
459
|
-
|
|
460
|
-
facets.update(run_facets)
|
|
513
|
+
facets.update(get_processing_engine_facet())
|
|
461
514
|
|
|
462
515
|
return Run(run_id, facets)
|
|
463
516
|
|
|
464
517
|
@staticmethod
|
|
465
518
|
def _build_job(
|
|
466
519
|
job_name: str,
|
|
467
|
-
job_type:
|
|
520
|
+
job_type: Literal["DAG", "TASK"],
|
|
468
521
|
job_description: str | None = None,
|
|
469
|
-
|
|
470
|
-
|
|
522
|
+
job_owners: list[str] | None = None,
|
|
523
|
+
job_tags: list[str] | None = None,
|
|
471
524
|
job_facets: dict[str, JobFacet] | None = None,
|
|
472
525
|
):
|
|
473
526
|
facets: dict[str, JobFacet] = {}
|
|
474
|
-
|
|
527
|
+
if job_facets:
|
|
528
|
+
facets.update(job_facets)
|
|
475
529
|
if job_description:
|
|
476
530
|
facets.update(
|
|
477
|
-
{
|
|
531
|
+
{
|
|
532
|
+
"documentation": documentation_job.DocumentationJobFacet(
|
|
533
|
+
description=job_description, producer=_PRODUCER
|
|
534
|
+
)
|
|
535
|
+
}
|
|
478
536
|
)
|
|
479
|
-
if
|
|
537
|
+
if job_owners:
|
|
480
538
|
facets.update(
|
|
481
539
|
{
|
|
482
|
-
"
|
|
483
|
-
|
|
540
|
+
"ownership": ownership_job.OwnershipJobFacet(
|
|
541
|
+
owners=[ownership_job.Owner(name=owner) for owner in sorted(job_owners)],
|
|
542
|
+
producer=_PRODUCER,
|
|
484
543
|
)
|
|
485
544
|
}
|
|
486
545
|
)
|
|
487
|
-
if
|
|
546
|
+
if job_tags:
|
|
488
547
|
facets.update(
|
|
489
548
|
{
|
|
490
|
-
"
|
|
491
|
-
|
|
549
|
+
"tags": tags_job.TagsJobFacet(
|
|
550
|
+
tags=[
|
|
551
|
+
tags_job.TagsJobFacetFields(
|
|
552
|
+
key=tag,
|
|
553
|
+
value=tag,
|
|
554
|
+
source="AIRFLOW",
|
|
555
|
+
)
|
|
556
|
+
for tag in sorted(job_tags)
|
|
557
|
+
],
|
|
558
|
+
producer=_PRODUCER,
|
|
492
559
|
)
|
|
493
560
|
}
|
|
494
561
|
)
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
562
|
+
facets.update(
|
|
563
|
+
{
|
|
564
|
+
"jobType": job_type_job.JobTypeJobFacet(
|
|
565
|
+
jobType=job_type, integration="AIRFLOW", processingType="BATCH", producer=_PRODUCER
|
|
566
|
+
)
|
|
567
|
+
}
|
|
568
|
+
)
|
|
499
569
|
|
|
500
|
-
return Job(conf.namespace(), job_name, facets)
|
|
570
|
+
return Job(namespace=conf.namespace(), name=job_name, facets=facets)
|
|
@@ -152,17 +152,6 @@ class OpenLineageListener:
|
|
|
152
152
|
return
|
|
153
153
|
|
|
154
154
|
# Needs to be calculated outside of inner method so that it gets cached for usage in fork processes
|
|
155
|
-
data_interval_start = dagrun.data_interval_start
|
|
156
|
-
if isinstance(data_interval_start, datetime):
|
|
157
|
-
data_interval_start = data_interval_start.isoformat()
|
|
158
|
-
data_interval_end = dagrun.data_interval_end
|
|
159
|
-
if isinstance(data_interval_end, datetime):
|
|
160
|
-
data_interval_end = data_interval_end.isoformat()
|
|
161
|
-
|
|
162
|
-
clear_number = 0
|
|
163
|
-
if hasattr(dagrun, "clear_number"):
|
|
164
|
-
clear_number = dagrun.clear_number
|
|
165
|
-
|
|
166
155
|
debug_facet = get_airflow_debug_facet()
|
|
167
156
|
|
|
168
157
|
@print_warning(self.log)
|
|
@@ -176,6 +165,10 @@ class OpenLineageListener:
|
|
|
176
165
|
if AIRFLOW_V_3_0_PLUS and date is None:
|
|
177
166
|
date = dagrun.run_after
|
|
178
167
|
|
|
168
|
+
clear_number = 0
|
|
169
|
+
if hasattr(dagrun, "clear_number"):
|
|
170
|
+
clear_number = dagrun.clear_number
|
|
171
|
+
|
|
179
172
|
parent_run_id = self.adapter.build_dag_run_id(
|
|
180
173
|
dag_id=dag.dag_id,
|
|
181
174
|
logical_date=date,
|
|
@@ -192,6 +185,13 @@ class OpenLineageListener:
|
|
|
192
185
|
event_type = RunState.RUNNING.value.lower()
|
|
193
186
|
operator_name = task.task_type.lower()
|
|
194
187
|
|
|
188
|
+
data_interval_start = dagrun.data_interval_start
|
|
189
|
+
if isinstance(data_interval_start, datetime):
|
|
190
|
+
data_interval_start = data_interval_start.isoformat()
|
|
191
|
+
data_interval_end = dagrun.data_interval_end
|
|
192
|
+
if isinstance(data_interval_end, datetime):
|
|
193
|
+
data_interval_end = data_interval_end.isoformat()
|
|
194
|
+
|
|
195
195
|
with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
|
|
196
196
|
task_metadata = self.extractor_manager.extract_metadata(
|
|
197
197
|
dagrun=dagrun, task=task, task_instance_state=TaskInstanceState.RUNNING
|
|
@@ -202,10 +202,11 @@ class OpenLineageListener:
|
|
|
202
202
|
job_name=get_job_name(task),
|
|
203
203
|
job_description=dag.description,
|
|
204
204
|
event_time=start_date.isoformat(),
|
|
205
|
-
code_location=None,
|
|
206
205
|
nominal_start_time=data_interval_start,
|
|
207
206
|
nominal_end_time=data_interval_end,
|
|
208
|
-
|
|
207
|
+
# If task owner is default ("airflow"), use DAG owner instead that may have more details
|
|
208
|
+
owners=[x.strip() for x in (task if task.owner != "airflow" else dag).owner.split(",")],
|
|
209
|
+
tags=dag.tags,
|
|
209
210
|
task=task_metadata,
|
|
210
211
|
run_facets={
|
|
211
212
|
**get_task_parent_run_facet(parent_run_id=parent_run_id, parent_job_name=dag.dag_id),
|
|
@@ -303,6 +304,13 @@ class OpenLineageListener:
|
|
|
303
304
|
event_type = RunState.COMPLETE.value.lower()
|
|
304
305
|
operator_name = task.task_type.lower()
|
|
305
306
|
|
|
307
|
+
data_interval_start = dagrun.data_interval_start
|
|
308
|
+
if isinstance(data_interval_start, datetime):
|
|
309
|
+
data_interval_start = data_interval_start.isoformat()
|
|
310
|
+
data_interval_end = dagrun.data_interval_end
|
|
311
|
+
if isinstance(data_interval_end, datetime):
|
|
312
|
+
data_interval_end = data_interval_end.isoformat()
|
|
313
|
+
|
|
306
314
|
with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
|
|
307
315
|
task_metadata = self.extractor_manager.extract_metadata(
|
|
308
316
|
dagrun=dagrun,
|
|
@@ -316,6 +324,11 @@ class OpenLineageListener:
|
|
|
316
324
|
job_name=get_job_name(task),
|
|
317
325
|
end_time=end_date.isoformat(),
|
|
318
326
|
task=task_metadata,
|
|
327
|
+
# If task owner is default ("airflow"), use DAG owner instead that may have more details
|
|
328
|
+
owners=[x.strip() for x in (task if task.owner != "airflow" else dag).owner.split(",")],
|
|
329
|
+
tags=dag.tags,
|
|
330
|
+
nominal_start_time=data_interval_start,
|
|
331
|
+
nominal_end_time=data_interval_end,
|
|
319
332
|
run_facets={
|
|
320
333
|
**get_task_parent_run_facet(parent_run_id=parent_run_id, parent_job_name=dag.dag_id),
|
|
321
334
|
**get_user_provided_run_facets(task_instance, TaskInstanceState.SUCCESS),
|
|
@@ -422,6 +435,13 @@ class OpenLineageListener:
|
|
|
422
435
|
event_type = RunState.FAIL.value.lower()
|
|
423
436
|
operator_name = task.task_type.lower()
|
|
424
437
|
|
|
438
|
+
data_interval_start = dagrun.data_interval_start
|
|
439
|
+
if isinstance(data_interval_start, datetime):
|
|
440
|
+
data_interval_start = data_interval_start.isoformat()
|
|
441
|
+
data_interval_end = dagrun.data_interval_end
|
|
442
|
+
if isinstance(data_interval_end, datetime):
|
|
443
|
+
data_interval_end = data_interval_end.isoformat()
|
|
444
|
+
|
|
425
445
|
with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
|
|
426
446
|
task_metadata = self.extractor_manager.extract_metadata(
|
|
427
447
|
dagrun=dagrun,
|
|
@@ -436,6 +456,11 @@ class OpenLineageListener:
|
|
|
436
456
|
end_time=end_date.isoformat(),
|
|
437
457
|
task=task_metadata,
|
|
438
458
|
error=error,
|
|
459
|
+
nominal_start_time=data_interval_start,
|
|
460
|
+
nominal_end_time=data_interval_end,
|
|
461
|
+
tags=dag.tags,
|
|
462
|
+
# If task owner is default ("airflow"), use DAG owner instead that may have more details
|
|
463
|
+
owners=[x.strip() for x in (task if task.owner != "airflow" else dag).owner.split(",")],
|
|
439
464
|
run_facets={
|
|
440
465
|
**get_task_parent_run_facet(parent_run_id=parent_run_id, parent_job_name=dag.dag_id),
|
|
441
466
|
**get_user_provided_run_facets(task_instance, TaskInstanceState.FAILED),
|
|
@@ -605,6 +630,7 @@ class OpenLineageListener:
|
|
|
605
630
|
clear_number=dag_run.clear_number,
|
|
606
631
|
owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None,
|
|
607
632
|
description=dag_run.dag.description if dag_run.dag else None,
|
|
633
|
+
tags=dag_run.dag.tags if dag_run.dag else [],
|
|
608
634
|
# AirflowJobFacet should be created outside ProcessPoolExecutor that pickles objects,
|
|
609
635
|
# as it causes lack of some TaskGroup attributes and crashes event emission.
|
|
610
636
|
job_facets=get_airflow_job_facet(dag_run=dag_run),
|
|
@@ -634,13 +660,22 @@ class OpenLineageListener:
|
|
|
634
660
|
if AIRFLOW_V_3_0_PLUS and date is None:
|
|
635
661
|
date = dag_run.run_after
|
|
636
662
|
|
|
663
|
+
data_interval_start = (
|
|
664
|
+
dag_run.data_interval_start.isoformat() if dag_run.data_interval_start else None
|
|
665
|
+
)
|
|
666
|
+
data_interval_end = dag_run.data_interval_end.isoformat() if dag_run.data_interval_end else None
|
|
667
|
+
|
|
637
668
|
self.submit_callable(
|
|
638
669
|
self.adapter.dag_success,
|
|
639
670
|
dag_id=dag_run.dag_id,
|
|
640
671
|
run_id=dag_run.run_id,
|
|
641
672
|
end_date=dag_run.end_date,
|
|
673
|
+
nominal_start_time=data_interval_start,
|
|
674
|
+
nominal_end_time=data_interval_end,
|
|
642
675
|
logical_date=date,
|
|
643
676
|
clear_number=dag_run.clear_number,
|
|
677
|
+
owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None,
|
|
678
|
+
tags=dag_run.dag.tags if dag_run.dag else [],
|
|
644
679
|
task_ids=task_ids,
|
|
645
680
|
dag_run_state=dag_run.get_state(),
|
|
646
681
|
run_facets={**get_airflow_dag_run_facet(dag_run)},
|
|
@@ -670,13 +705,22 @@ class OpenLineageListener:
|
|
|
670
705
|
if AIRFLOW_V_3_0_PLUS and date is None:
|
|
671
706
|
date = dag_run.run_after
|
|
672
707
|
|
|
708
|
+
data_interval_start = (
|
|
709
|
+
dag_run.data_interval_start.isoformat() if dag_run.data_interval_start else None
|
|
710
|
+
)
|
|
711
|
+
data_interval_end = dag_run.data_interval_end.isoformat() if dag_run.data_interval_end else None
|
|
712
|
+
|
|
673
713
|
self.submit_callable(
|
|
674
714
|
self.adapter.dag_failed,
|
|
675
715
|
dag_id=dag_run.dag_id,
|
|
676
716
|
run_id=dag_run.run_id,
|
|
677
717
|
end_date=dag_run.end_date,
|
|
718
|
+
nominal_start_time=data_interval_start,
|
|
719
|
+
nominal_end_time=data_interval_end,
|
|
678
720
|
logical_date=date,
|
|
679
721
|
clear_number=dag_run.clear_number,
|
|
722
|
+
owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None,
|
|
723
|
+
tags=dag_run.dag.tags if dag_run.dag else [],
|
|
680
724
|
dag_run_state=dag_run.get_state(),
|
|
681
725
|
task_ids=task_ids,
|
|
682
726
|
msg=msg,
|
|
@@ -90,6 +90,16 @@ def lineage_parent_id(task_instance: TaskInstance):
|
|
|
90
90
|
|
|
91
91
|
|
|
92
92
|
def lineage_root_parent_id(task_instance: TaskInstance):
|
|
93
|
+
"""
|
|
94
|
+
Macro function which returns a unique identifier of given task that can be used to create root information for ParentRunFacet.
|
|
95
|
+
|
|
96
|
+
This identifier is composed of the namespace, dag name, and generated run id for given dag, structured
|
|
97
|
+
as '{namespace}/{job_name}/{run_id}'.
|
|
98
|
+
|
|
99
|
+
.. seealso::
|
|
100
|
+
For more information take a look at the guide:
|
|
101
|
+
:ref:`howto/macros:openlineage`
|
|
102
|
+
"""
|
|
93
103
|
return "/".join(
|
|
94
104
|
(
|
|
95
105
|
lineage_job_namespace(),
|
|
@@ -117,7 +127,7 @@ def _get_logical_date(task_instance):
|
|
|
117
127
|
context = task_instance.get_template_context()
|
|
118
128
|
if hasattr(task_instance, "dag_run"):
|
|
119
129
|
dag_run = task_instance.dag_run
|
|
120
|
-
|
|
130
|
+
else:
|
|
121
131
|
dag_run = context["dag_run"]
|
|
122
132
|
if hasattr(dag_run, "logical_date") and dag_run.logical_date:
|
|
123
133
|
date = dag_run.logical_date
|
|
@@ -23,6 +23,9 @@ from airflow.providers.openlineage.plugins.macros import (
|
|
|
23
23
|
lineage_job_name,
|
|
24
24
|
lineage_job_namespace,
|
|
25
25
|
lineage_parent_id,
|
|
26
|
+
lineage_root_job_name,
|
|
27
|
+
lineage_root_parent_id,
|
|
28
|
+
lineage_root_run_id,
|
|
26
29
|
lineage_run_id,
|
|
27
30
|
)
|
|
28
31
|
|
|
@@ -37,7 +40,15 @@ class OpenLineageProviderPlugin(AirflowPlugin):
|
|
|
37
40
|
|
|
38
41
|
name = "OpenLineageProviderPlugin"
|
|
39
42
|
if not conf.is_disabled():
|
|
40
|
-
macros = [
|
|
43
|
+
macros = [
|
|
44
|
+
lineage_job_namespace,
|
|
45
|
+
lineage_job_name,
|
|
46
|
+
lineage_run_id,
|
|
47
|
+
lineage_parent_id,
|
|
48
|
+
lineage_root_run_id,
|
|
49
|
+
lineage_root_job_name,
|
|
50
|
+
lineage_root_parent_id,
|
|
51
|
+
]
|
|
41
52
|
listeners = [get_openlineage_listener()]
|
|
42
53
|
from airflow.lineage.hook import HookLineageReader
|
|
43
54
|
|
|
@@ -70,7 +70,8 @@ if TYPE_CHECKING:
|
|
|
70
70
|
|
|
71
71
|
from airflow.models import TaskInstance
|
|
72
72
|
from airflow.providers.common.compat.assets import Asset
|
|
73
|
-
from airflow.sdk import DAG
|
|
73
|
+
from airflow.sdk import DAG
|
|
74
|
+
from airflow.sdk.definitions.mappedoperator import MappedOperator
|
|
74
75
|
from airflow.sdk.execution_time.secrets_masker import (
|
|
75
76
|
Redactable,
|
|
76
77
|
Redacted,
|
|
@@ -80,7 +81,8 @@ if TYPE_CHECKING:
|
|
|
80
81
|
from airflow.utils.state import DagRunState, TaskInstanceState
|
|
81
82
|
else:
|
|
82
83
|
try:
|
|
83
|
-
from airflow.sdk import DAG
|
|
84
|
+
from airflow.sdk import DAG
|
|
85
|
+
from airflow.sdk.definitions.mappedoperator import MappedOperator
|
|
84
86
|
except ImportError:
|
|
85
87
|
from airflow.models import DAG, MappedOperator
|
|
86
88
|
|
|
@@ -444,6 +446,7 @@ class TaskInstanceInfo(InfoJsonEncodable):
|
|
|
444
446
|
|
|
445
447
|
includes = ["duration", "try_number", "pool", "queued_dttm", "log_url"]
|
|
446
448
|
casts = {
|
|
449
|
+
"log_url": lambda ti: getattr(ti, "log_url", None),
|
|
447
450
|
"map_index": lambda ti: ti.map_index if getattr(ti, "map_index", -1) != -1 else None,
|
|
448
451
|
"dag_bundle_version": lambda ti: (
|
|
449
452
|
ti.bundle_instance.version if hasattr(ti, "bundle_instance") else None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.4.0rc1
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -27,8 +27,8 @@ Requires-Dist: attrs>=22.2
|
|
|
27
27
|
Requires-Dist: openlineage-integration-common>=1.31.0
|
|
28
28
|
Requires-Dist: openlineage-python>=1.31.0
|
|
29
29
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
30
|
-
Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-openlineage/2.
|
|
31
|
-
Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-openlineage/2.
|
|
30
|
+
Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-openlineage/2.4.0/changelog.html
|
|
31
|
+
Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-openlineage/2.4.0
|
|
32
32
|
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
33
33
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
34
34
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
@@ -59,7 +59,7 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
|
59
59
|
|
|
60
60
|
Package ``apache-airflow-providers-openlineage``
|
|
61
61
|
|
|
62
|
-
Release: ``2.
|
|
62
|
+
Release: ``2.4.0``
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -72,7 +72,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
72
72
|
are in ``airflow.providers.openlineage`` python package.
|
|
73
73
|
|
|
74
74
|
You can find package information and changelog for the provider
|
|
75
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.
|
|
75
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.4.0/>`_.
|
|
76
76
|
|
|
77
77
|
Installation
|
|
78
78
|
------------
|
|
@@ -118,5 +118,5 @@ Dependent package
|
|
|
118
118
|
================================================================================================================== =================
|
|
119
119
|
|
|
120
120
|
The changelog for the provider package can be found in the
|
|
121
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.
|
|
121
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.4.0/changelog.html>`_.
|
|
122
122
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
airflow/providers/openlineage/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
|
|
2
|
-
airflow/providers/openlineage/__init__.py,sha256=
|
|
2
|
+
airflow/providers/openlineage/__init__.py,sha256=Pyul6qEokTW4wHohh2fvYPpwWexk1XVfwNoH4JlZNuE,1500
|
|
3
3
|
airflow/providers/openlineage/conf.py,sha256=aYdLU7iHBdGIU8ZAC5iUiIDgXP9gvP9r_z5hTAbXPOU,5535
|
|
4
|
-
airflow/providers/openlineage/get_provider_info.py,sha256=
|
|
4
|
+
airflow/providers/openlineage/get_provider_info.py,sha256=z0oCEDCM3UyrlXrQt0ksTi6jYd2vqjiSSudBM7hLNZw,9255
|
|
5
5
|
airflow/providers/openlineage/sqlparser.py,sha256=N38XhkU-lxwxnYevQpq63JOBi4rzp0q56JjxO3H24W8,20340
|
|
6
6
|
airflow/providers/openlineage/version_compat.py,sha256=j5PCtXvZ71aBjixu-EFTNtVDPsngzzs7os0ZQDgFVDk,1536
|
|
7
7
|
airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
|
|
@@ -16,17 +16,17 @@ airflow/providers/openlineage/facets/AirflowRunFacet.json,sha256=70mEaZShgSJp-2x
|
|
|
16
16
|
airflow/providers/openlineage/facets/AirflowStateRunFacet.json,sha256=xhHQEKD9Jopw-oqbkCCrrwFjfXnxvuJAritsmegKjuQ,937
|
|
17
17
|
airflow/providers/openlineage/facets/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
18
18
|
airflow/providers/openlineage/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
19
|
-
airflow/providers/openlineage/plugins/adapter.py,sha256=
|
|
19
|
+
airflow/providers/openlineage/plugins/adapter.py,sha256=rtvjPvzj9fN6Crn9Sg0VTjqUrhVqEWvXakID2538JI4,21519
|
|
20
20
|
airflow/providers/openlineage/plugins/facets.py,sha256=VvyMYR6ONkC95q5FdNmohv0scbA1Ej_B5cQ97as5GvA,4161
|
|
21
|
-
airflow/providers/openlineage/plugins/listener.py,sha256=
|
|
22
|
-
airflow/providers/openlineage/plugins/macros.py,sha256=
|
|
23
|
-
airflow/providers/openlineage/plugins/openlineage.py,sha256=
|
|
21
|
+
airflow/providers/openlineage/plugins/listener.py,sha256=4BKCt3MvQF0xeVd2DoOTsFwF7VfTGKktihzf-AMRdeE,30733
|
|
22
|
+
airflow/providers/openlineage/plugins/macros.py,sha256=0n-oF98JmiDEZ3TDpNVMShkjezimICasUYt0YuXhztY,4837
|
|
23
|
+
airflow/providers/openlineage/plugins/openlineage.py,sha256=dP3GOVtOGAIokeaeRx2OW_c1TKAxDvATlD9OGMyXqr0,2032
|
|
24
24
|
airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
25
25
|
airflow/providers/openlineage/utils/selective_enable.py,sha256=YyrUQ7Djv5o46XdH83N_G8AXAZ9C_aKPa534pbNVp08,3441
|
|
26
26
|
airflow/providers/openlineage/utils/spark.py,sha256=X5liLxVLgQcgPF_0lFtQULeMOv_9dGj-HFjtZvWFgOo,7626
|
|
27
27
|
airflow/providers/openlineage/utils/sql.py,sha256=vkKrrdENEMVG8gtzV6yuTXMa2Z9fBAEXmxDVIDaVncI,9571
|
|
28
|
-
airflow/providers/openlineage/utils/utils.py,sha256=
|
|
29
|
-
apache_airflow_providers_openlineage-2.
|
|
30
|
-
apache_airflow_providers_openlineage-2.
|
|
31
|
-
apache_airflow_providers_openlineage-2.
|
|
32
|
-
apache_airflow_providers_openlineage-2.
|
|
28
|
+
airflow/providers/openlineage/utils/utils.py,sha256=4NjZ6k0X4dEYJVc7h50ukK9tq5PN45n31VcjV15nLxY,31540
|
|
29
|
+
apache_airflow_providers_openlineage-2.4.0rc1.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
|
|
30
|
+
apache_airflow_providers_openlineage-2.4.0rc1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
31
|
+
apache_airflow_providers_openlineage-2.4.0rc1.dist-info/METADATA,sha256=3KsDZ8V671PtDoWOktqIgSH8hoj4BJwHs4DfGIhxDLU,5713
|
|
32
|
+
apache_airflow_providers_openlineage-2.4.0rc1.dist-info/RECORD,,
|
|
File without changes
|