apache-airflow-providers-openlineage 2.3.0__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.

@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "2.3.0"
32
+ __version__ = "2.4.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
35
  "2.10.0"
@@ -42,7 +42,7 @@ def get_provider_info():
42
42
  ],
43
43
  "config": {
44
44
  "openlineage": {
45
- "description": "This section applies settings for OpenLineage integration.\nMore about configuration and it's precedence can be found in the `user's guide\n<https://airflow.apache.org/docs/apache-airflow-providers-openlineage/stable/guides/user.html#transport-setup>`_.\n",
45
+ "description": "This section applies settings for OpenLineage integration.\nMore about configuration and its precedence can be found in the `user's guide\n<https://airflow.apache.org/docs/apache-airflow-providers-openlineage/stable/guides/user.html#transport-setup>`_.\n",
46
46
  "options": {
47
47
  "disabled": {
48
48
  "description": "Disable sending events without uninstalling the OpenLineage Provider by setting this to true.\n",
@@ -19,7 +19,7 @@ from __future__ import annotations
19
19
  import os
20
20
  import traceback
21
21
  from contextlib import ExitStack
22
- from typing import TYPE_CHECKING
22
+ from typing import TYPE_CHECKING, Literal
23
23
 
24
24
  import yaml
25
25
  from openlineage.client import OpenLineageClient, set_producer
@@ -32,7 +32,7 @@ from openlineage.client.facet_v2 import (
32
32
  job_type_job,
33
33
  nominal_time_run,
34
34
  ownership_job,
35
- source_code_location_job,
35
+ tags_job,
36
36
  )
37
37
  from openlineage.client.uuid import generate_static_uuid
38
38
 
@@ -63,11 +63,8 @@ _PRODUCER = f"https://github.com/apache/airflow/tree/providers-openlineage/{OPEN
63
63
 
64
64
  set_producer(_PRODUCER)
65
65
 
66
- # https://openlineage.io/docs/spec/facets/job-facets/job-type
67
- # They must be set after the `set_producer(_PRODUCER)`
68
- # otherwise the `JobTypeJobFacet._producer` will be set with the default value
69
- _JOB_TYPE_DAG = job_type_job.JobTypeJobFacet(jobType="DAG", integration="AIRFLOW", processingType="BATCH")
70
- _JOB_TYPE_TASK = job_type_job.JobTypeJobFacet(jobType="TASK", integration="AIRFLOW", processingType="BATCH")
66
+ _JOB_TYPE_DAG: Literal["DAG"] = "DAG"
67
+ _JOB_TYPE_TASK: Literal["TASK"] = "TASK"
71
68
 
72
69
 
73
70
  class OpenLineageAdapter(LoggingMixin):
@@ -187,10 +184,10 @@ class OpenLineageAdapter(LoggingMixin):
187
184
  job_name: str,
188
185
  job_description: str,
189
186
  event_time: str,
190
- code_location: str | None,
191
187
  nominal_start_time: str | None,
192
188
  nominal_end_time: str | None,
193
- owners: list[str],
189
+ owners: list[str] | None,
190
+ tags: list[str] | None,
194
191
  task: OperatorLineage | None,
195
192
  run_facets: dict[str, RunFacet] | None = None,
196
193
  ) -> RunEvent:
@@ -201,17 +198,16 @@ class OpenLineageAdapter(LoggingMixin):
201
198
  :param job_name: globally unique identifier of task in dag
202
199
  :param job_description: user provided description of job
203
200
  :param event_time:
204
- :param code_location: file path or URL of DAG file
205
201
  :param nominal_start_time: scheduled time of dag run
206
202
  :param nominal_end_time: following schedule of dag run
207
- :param owners: list of owners of DAG
203
+ :param owners: list of owners
204
+ :param tags: list of tags
208
205
  :param task: metadata container with information extracted from operator
209
206
  :param run_facets: custom run facets
210
207
  """
211
208
  run_facets = run_facets or {}
212
209
  if task:
213
210
  run_facets = {**task.run_facets, **run_facets}
214
- run_facets = {**run_facets, **get_processing_engine_facet()} # type: ignore
215
211
  event = RunEvent(
216
212
  eventType=RunState.START,
217
213
  eventTime=event_time,
@@ -225,8 +221,8 @@ class OpenLineageAdapter(LoggingMixin):
225
221
  job_name=job_name,
226
222
  job_type=_JOB_TYPE_TASK,
227
223
  job_description=job_description,
228
- code_location=code_location,
229
- owners=owners,
224
+ job_owners=owners,
225
+ job_tags=tags,
230
226
  job_facets=task.job_facets if task else None,
231
227
  ),
232
228
  inputs=task.inputs if task else [],
@@ -241,6 +237,10 @@ class OpenLineageAdapter(LoggingMixin):
241
237
  job_name: str,
242
238
  end_time: str,
243
239
  task: OperatorLineage,
240
+ nominal_start_time: str | None,
241
+ nominal_end_time: str | None,
242
+ owners: list[str] | None,
243
+ tags: list[str] | None,
244
244
  run_facets: dict[str, RunFacet] | None = None,
245
245
  ) -> RunEvent:
246
246
  """
@@ -249,21 +249,32 @@ class OpenLineageAdapter(LoggingMixin):
249
249
  :param run_id: globally unique identifier of task in dag run
250
250
  :param job_name: globally unique identifier of task between dags
251
251
  :param end_time: time of task completion
252
+ :param tags: list of tags
253
+ :param nominal_start_time: scheduled time of dag run
254
+ :param nominal_end_time: following schedule of dag run
252
255
  :param task: metadata container with information extracted from operator
256
+ :param owners: list of owners
253
257
  :param run_facets: additional run facets
254
258
  """
255
259
  run_facets = run_facets or {}
256
260
  if task:
257
261
  run_facets = {**task.run_facets, **run_facets}
258
- run_facets = {**run_facets, **get_processing_engine_facet()} # type: ignore
259
262
  event = RunEvent(
260
263
  eventType=RunState.COMPLETE,
261
264
  eventTime=end_time,
262
265
  run=self._build_run(
263
266
  run_id=run_id,
267
+ nominal_start_time=nominal_start_time,
268
+ nominal_end_time=nominal_end_time,
264
269
  run_facets=run_facets,
265
270
  ),
266
- job=self._build_job(job_name, job_type=_JOB_TYPE_TASK, job_facets=task.job_facets),
271
+ job=self._build_job(
272
+ job_name,
273
+ job_type=_JOB_TYPE_TASK,
274
+ job_facets=task.job_facets,
275
+ job_owners=owners,
276
+ job_tags=tags,
277
+ ),
267
278
  inputs=task.inputs,
268
279
  outputs=task.outputs,
269
280
  producer=_PRODUCER,
@@ -276,6 +287,10 @@ class OpenLineageAdapter(LoggingMixin):
276
287
  job_name: str,
277
288
  end_time: str,
278
289
  task: OperatorLineage,
290
+ nominal_start_time: str | None,
291
+ nominal_end_time: str | None,
292
+ owners: list[str] | None,
293
+ tags: list[str] | None,
279
294
  error: str | BaseException | None = None,
280
295
  run_facets: dict[str, RunFacet] | None = None,
281
296
  ) -> RunEvent:
@@ -287,13 +302,16 @@ class OpenLineageAdapter(LoggingMixin):
287
302
  :param end_time: time of task completion
288
303
  :param task: metadata container with information extracted from operator
289
304
  :param run_facets: custom run facets
305
+ :param tags: list of tags
306
+ :param nominal_start_time: scheduled time of dag run
307
+ :param nominal_end_time: following schedule of dag run
308
+ :param owners: list of owners
290
309
  :param error: error
291
310
  :param run_facets: additional run facets
292
311
  """
293
312
  run_facets = run_facets or {}
294
313
  if task:
295
314
  run_facets = {**task.run_facets, **run_facets}
296
- run_facets = {**run_facets, **get_processing_engine_facet()} # type: ignore
297
315
 
298
316
  if error:
299
317
  stack_trace = None
@@ -310,9 +328,17 @@ class OpenLineageAdapter(LoggingMixin):
310
328
  eventTime=end_time,
311
329
  run=self._build_run(
312
330
  run_id=run_id,
331
+ nominal_start_time=nominal_start_time,
332
+ nominal_end_time=nominal_end_time,
313
333
  run_facets=run_facets,
314
334
  ),
315
- job=self._build_job(job_name, job_type=_JOB_TYPE_TASK, job_facets=task.job_facets),
335
+ job=self._build_job(
336
+ job_name,
337
+ job_type=_JOB_TYPE_TASK,
338
+ job_facets=task.job_facets,
339
+ job_owners=owners,
340
+ job_tags=tags,
341
+ ),
316
342
  inputs=task.inputs,
317
343
  outputs=task.outputs,
318
344
  producer=_PRODUCER,
@@ -324,9 +350,10 @@ class OpenLineageAdapter(LoggingMixin):
324
350
  dag_id: str,
325
351
  logical_date: datetime,
326
352
  start_date: datetime,
327
- nominal_start_time: str,
328
- nominal_end_time: str,
329
- owners: list[str],
353
+ nominal_start_time: str | None,
354
+ nominal_end_time: str | None,
355
+ owners: list[str] | None,
356
+ tags: list[str],
330
357
  run_facets: dict[str, RunFacet],
331
358
  clear_number: int,
332
359
  description: str | None = None,
@@ -340,8 +367,9 @@ class OpenLineageAdapter(LoggingMixin):
340
367
  job_name=dag_id,
341
368
  job_type=_JOB_TYPE_DAG,
342
369
  job_description=description,
343
- owners=owners,
370
+ job_owners=owners,
344
371
  job_facets=job_facets,
372
+ job_tags=tags,
345
373
  ),
346
374
  run=self._build_run(
347
375
  run_id=self.build_dag_run_id(
@@ -349,7 +377,7 @@ class OpenLineageAdapter(LoggingMixin):
349
377
  ),
350
378
  nominal_start_time=nominal_start_time,
351
379
  nominal_end_time=nominal_end_time,
352
- run_facets={**run_facets, **get_airflow_debug_facet(), **get_processing_engine_facet()},
380
+ run_facets={**run_facets, **get_airflow_debug_facet()},
353
381
  ),
354
382
  inputs=[],
355
383
  outputs=[],
@@ -368,24 +396,34 @@ class OpenLineageAdapter(LoggingMixin):
368
396
  run_id: str,
369
397
  end_date: datetime,
370
398
  logical_date: datetime,
399
+ nominal_start_time: str | None,
400
+ nominal_end_time: str | None,
401
+ tags: list[str] | None,
371
402
  clear_number: int,
372
403
  dag_run_state: DagRunState,
373
404
  task_ids: list[str],
405
+ owners: list[str] | None,
374
406
  run_facets: dict[str, RunFacet],
375
407
  ):
376
408
  try:
377
409
  event = RunEvent(
378
410
  eventType=RunState.COMPLETE,
379
411
  eventTime=end_date.isoformat(),
380
- job=self._build_job(job_name=dag_id, job_type=_JOB_TYPE_DAG),
381
- run=Run(
382
- runId=self.build_dag_run_id(
412
+ job=self._build_job(
413
+ job_name=dag_id,
414
+ job_type=_JOB_TYPE_DAG,
415
+ job_owners=owners,
416
+ job_tags=tags,
417
+ ),
418
+ run=self._build_run(
419
+ run_id=self.build_dag_run_id(
383
420
  dag_id=dag_id, logical_date=logical_date, clear_number=clear_number
384
421
  ),
385
- facets={
422
+ nominal_start_time=nominal_start_time,
423
+ nominal_end_time=nominal_end_time,
424
+ run_facets={
386
425
  **get_airflow_state_run_facet(dag_id, run_id, task_ids, dag_run_state),
387
426
  **get_airflow_debug_facet(),
388
- **get_processing_engine_facet(),
389
427
  **run_facets,
390
428
  },
391
429
  ),
@@ -406,9 +444,13 @@ class OpenLineageAdapter(LoggingMixin):
406
444
  run_id: str,
407
445
  end_date: datetime,
408
446
  logical_date: datetime,
447
+ nominal_start_time: str | None,
448
+ nominal_end_time: str | None,
449
+ tags: list[str] | None,
409
450
  clear_number: int,
410
451
  dag_run_state: DagRunState,
411
452
  task_ids: list[str],
453
+ owners: list[str] | None,
412
454
  msg: str,
413
455
  run_facets: dict[str, RunFacet],
414
456
  ):
@@ -416,20 +458,24 @@ class OpenLineageAdapter(LoggingMixin):
416
458
  event = RunEvent(
417
459
  eventType=RunState.FAIL,
418
460
  eventTime=end_date.isoformat(),
419
- job=self._build_job(job_name=dag_id, job_type=_JOB_TYPE_DAG),
420
- run=Run(
421
- runId=self.build_dag_run_id(
422
- dag_id=dag_id,
423
- logical_date=logical_date,
424
- clear_number=clear_number,
461
+ job=self._build_job(
462
+ job_name=dag_id,
463
+ job_type=_JOB_TYPE_DAG,
464
+ job_owners=owners,
465
+ job_tags=tags,
466
+ ),
467
+ run=self._build_run(
468
+ run_id=self.build_dag_run_id(
469
+ dag_id=dag_id, logical_date=logical_date, clear_number=clear_number
425
470
  ),
426
- facets={
471
+ nominal_start_time=nominal_start_time,
472
+ nominal_end_time=nominal_end_time,
473
+ run_facets={
427
474
  "errorMessage": error_message_run.ErrorMessageRunFacet(
428
475
  message=msg, programmingLanguage="python"
429
476
  ),
430
477
  **get_airflow_state_run_facet(dag_id, run_id, task_ids, dag_run_state),
431
478
  **get_airflow_debug_facet(),
432
- **get_processing_engine_facet(),
433
479
  **run_facets,
434
480
  },
435
481
  ),
@@ -452,49 +498,73 @@ class OpenLineageAdapter(LoggingMixin):
452
498
  run_facets: dict[str, RunFacet] | None = None,
453
499
  ) -> Run:
454
500
  facets: dict[str, RunFacet] = {}
501
+ if run_facets:
502
+ facets.update(run_facets)
455
503
  if nominal_start_time:
456
504
  facets.update(
457
- {"nominalTime": nominal_time_run.NominalTimeRunFacet(nominal_start_time, nominal_end_time)}
505
+ {
506
+ "nominalTime": nominal_time_run.NominalTimeRunFacet(
507
+ nominalStartTime=nominal_start_time,
508
+ nominalEndTime=nominal_end_time,
509
+ producer=_PRODUCER,
510
+ )
511
+ }
458
512
  )
459
- if run_facets:
460
- facets.update(run_facets)
513
+ facets.update(get_processing_engine_facet())
461
514
 
462
515
  return Run(run_id, facets)
463
516
 
464
517
  @staticmethod
465
518
  def _build_job(
466
519
  job_name: str,
467
- job_type: job_type_job.JobTypeJobFacet,
520
+ job_type: Literal["DAG", "TASK"],
468
521
  job_description: str | None = None,
469
- code_location: str | None = None,
470
- owners: list[str] | None = None,
522
+ job_owners: list[str] | None = None,
523
+ job_tags: list[str] | None = None,
471
524
  job_facets: dict[str, JobFacet] | None = None,
472
525
  ):
473
526
  facets: dict[str, JobFacet] = {}
474
-
527
+ if job_facets:
528
+ facets.update(job_facets)
475
529
  if job_description:
476
530
  facets.update(
477
- {"documentation": documentation_job.DocumentationJobFacet(description=job_description)}
531
+ {
532
+ "documentation": documentation_job.DocumentationJobFacet(
533
+ description=job_description, producer=_PRODUCER
534
+ )
535
+ }
478
536
  )
479
- if code_location:
537
+ if job_owners:
480
538
  facets.update(
481
539
  {
482
- "sourceCodeLocation": source_code_location_job.SourceCodeLocationJobFacet(
483
- "", url=code_location
540
+ "ownership": ownership_job.OwnershipJobFacet(
541
+ owners=[ownership_job.Owner(name=owner) for owner in sorted(job_owners)],
542
+ producer=_PRODUCER,
484
543
  )
485
544
  }
486
545
  )
487
- if owners:
546
+ if job_tags:
488
547
  facets.update(
489
548
  {
490
- "ownership": ownership_job.OwnershipJobFacet(
491
- owners=[ownership_job.Owner(name=owner) for owner in owners]
549
+ "tags": tags_job.TagsJobFacet(
550
+ tags=[
551
+ tags_job.TagsJobFacetFields(
552
+ key=tag,
553
+ value=tag,
554
+ source="AIRFLOW",
555
+ )
556
+ for tag in sorted(job_tags)
557
+ ],
558
+ producer=_PRODUCER,
492
559
  )
493
560
  }
494
561
  )
495
- if job_facets:
496
- facets = {**facets, **job_facets}
497
-
498
- facets.update({"jobType": job_type})
562
+ facets.update(
563
+ {
564
+ "jobType": job_type_job.JobTypeJobFacet(
565
+ jobType=job_type, integration="AIRFLOW", processingType="BATCH", producer=_PRODUCER
566
+ )
567
+ }
568
+ )
499
569
 
500
- return Job(conf.namespace(), job_name, facets)
570
+ return Job(namespace=conf.namespace(), name=job_name, facets=facets)
@@ -152,17 +152,6 @@ class OpenLineageListener:
152
152
  return
153
153
 
154
154
  # Needs to be calculated outside of inner method so that it gets cached for usage in fork processes
155
- data_interval_start = dagrun.data_interval_start
156
- if isinstance(data_interval_start, datetime):
157
- data_interval_start = data_interval_start.isoformat()
158
- data_interval_end = dagrun.data_interval_end
159
- if isinstance(data_interval_end, datetime):
160
- data_interval_end = data_interval_end.isoformat()
161
-
162
- clear_number = 0
163
- if hasattr(dagrun, "clear_number"):
164
- clear_number = dagrun.clear_number
165
-
166
155
  debug_facet = get_airflow_debug_facet()
167
156
 
168
157
  @print_warning(self.log)
@@ -176,6 +165,10 @@ class OpenLineageListener:
176
165
  if AIRFLOW_V_3_0_PLUS and date is None:
177
166
  date = dagrun.run_after
178
167
 
168
+ clear_number = 0
169
+ if hasattr(dagrun, "clear_number"):
170
+ clear_number = dagrun.clear_number
171
+
179
172
  parent_run_id = self.adapter.build_dag_run_id(
180
173
  dag_id=dag.dag_id,
181
174
  logical_date=date,
@@ -192,6 +185,13 @@ class OpenLineageListener:
192
185
  event_type = RunState.RUNNING.value.lower()
193
186
  operator_name = task.task_type.lower()
194
187
 
188
+ data_interval_start = dagrun.data_interval_start
189
+ if isinstance(data_interval_start, datetime):
190
+ data_interval_start = data_interval_start.isoformat()
191
+ data_interval_end = dagrun.data_interval_end
192
+ if isinstance(data_interval_end, datetime):
193
+ data_interval_end = data_interval_end.isoformat()
194
+
195
195
  with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
196
196
  task_metadata = self.extractor_manager.extract_metadata(
197
197
  dagrun=dagrun, task=task, task_instance_state=TaskInstanceState.RUNNING
@@ -202,10 +202,11 @@ class OpenLineageListener:
202
202
  job_name=get_job_name(task),
203
203
  job_description=dag.description,
204
204
  event_time=start_date.isoformat(),
205
- code_location=None,
206
205
  nominal_start_time=data_interval_start,
207
206
  nominal_end_time=data_interval_end,
208
- owners=dag.owner.split(", "),
207
+ # If task owner is default ("airflow"), use DAG owner instead that may have more details
208
+ owners=[x.strip() for x in (task if task.owner != "airflow" else dag).owner.split(",")],
209
+ tags=dag.tags,
209
210
  task=task_metadata,
210
211
  run_facets={
211
212
  **get_task_parent_run_facet(parent_run_id=parent_run_id, parent_job_name=dag.dag_id),
@@ -303,6 +304,13 @@ class OpenLineageListener:
303
304
  event_type = RunState.COMPLETE.value.lower()
304
305
  operator_name = task.task_type.lower()
305
306
 
307
+ data_interval_start = dagrun.data_interval_start
308
+ if isinstance(data_interval_start, datetime):
309
+ data_interval_start = data_interval_start.isoformat()
310
+ data_interval_end = dagrun.data_interval_end
311
+ if isinstance(data_interval_end, datetime):
312
+ data_interval_end = data_interval_end.isoformat()
313
+
306
314
  with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
307
315
  task_metadata = self.extractor_manager.extract_metadata(
308
316
  dagrun=dagrun,
@@ -316,6 +324,11 @@ class OpenLineageListener:
316
324
  job_name=get_job_name(task),
317
325
  end_time=end_date.isoformat(),
318
326
  task=task_metadata,
327
+ # If task owner is default ("airflow"), use DAG owner instead that may have more details
328
+ owners=[x.strip() for x in (task if task.owner != "airflow" else dag).owner.split(",")],
329
+ tags=dag.tags,
330
+ nominal_start_time=data_interval_start,
331
+ nominal_end_time=data_interval_end,
319
332
  run_facets={
320
333
  **get_task_parent_run_facet(parent_run_id=parent_run_id, parent_job_name=dag.dag_id),
321
334
  **get_user_provided_run_facets(task_instance, TaskInstanceState.SUCCESS),
@@ -422,6 +435,13 @@ class OpenLineageListener:
422
435
  event_type = RunState.FAIL.value.lower()
423
436
  operator_name = task.task_type.lower()
424
437
 
438
+ data_interval_start = dagrun.data_interval_start
439
+ if isinstance(data_interval_start, datetime):
440
+ data_interval_start = data_interval_start.isoformat()
441
+ data_interval_end = dagrun.data_interval_end
442
+ if isinstance(data_interval_end, datetime):
443
+ data_interval_end = data_interval_end.isoformat()
444
+
425
445
  with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
426
446
  task_metadata = self.extractor_manager.extract_metadata(
427
447
  dagrun=dagrun,
@@ -436,6 +456,11 @@ class OpenLineageListener:
436
456
  end_time=end_date.isoformat(),
437
457
  task=task_metadata,
438
458
  error=error,
459
+ nominal_start_time=data_interval_start,
460
+ nominal_end_time=data_interval_end,
461
+ tags=dag.tags,
462
+ # If task owner is default ("airflow"), use DAG owner instead that may have more details
463
+ owners=[x.strip() for x in (task if task.owner != "airflow" else dag).owner.split(",")],
439
464
  run_facets={
440
465
  **get_task_parent_run_facet(parent_run_id=parent_run_id, parent_job_name=dag.dag_id),
441
466
  **get_user_provided_run_facets(task_instance, TaskInstanceState.FAILED),
@@ -605,6 +630,7 @@ class OpenLineageListener:
605
630
  clear_number=dag_run.clear_number,
606
631
  owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None,
607
632
  description=dag_run.dag.description if dag_run.dag else None,
633
+ tags=dag_run.dag.tags if dag_run.dag else [],
608
634
  # AirflowJobFacet should be created outside ProcessPoolExecutor that pickles objects,
609
635
  # as it causes lack of some TaskGroup attributes and crashes event emission.
610
636
  job_facets=get_airflow_job_facet(dag_run=dag_run),
@@ -634,13 +660,22 @@ class OpenLineageListener:
634
660
  if AIRFLOW_V_3_0_PLUS and date is None:
635
661
  date = dag_run.run_after
636
662
 
663
+ data_interval_start = (
664
+ dag_run.data_interval_start.isoformat() if dag_run.data_interval_start else None
665
+ )
666
+ data_interval_end = dag_run.data_interval_end.isoformat() if dag_run.data_interval_end else None
667
+
637
668
  self.submit_callable(
638
669
  self.adapter.dag_success,
639
670
  dag_id=dag_run.dag_id,
640
671
  run_id=dag_run.run_id,
641
672
  end_date=dag_run.end_date,
673
+ nominal_start_time=data_interval_start,
674
+ nominal_end_time=data_interval_end,
642
675
  logical_date=date,
643
676
  clear_number=dag_run.clear_number,
677
+ owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None,
678
+ tags=dag_run.dag.tags if dag_run.dag else [],
644
679
  task_ids=task_ids,
645
680
  dag_run_state=dag_run.get_state(),
646
681
  run_facets={**get_airflow_dag_run_facet(dag_run)},
@@ -670,13 +705,22 @@ class OpenLineageListener:
670
705
  if AIRFLOW_V_3_0_PLUS and date is None:
671
706
  date = dag_run.run_after
672
707
 
708
+ data_interval_start = (
709
+ dag_run.data_interval_start.isoformat() if dag_run.data_interval_start else None
710
+ )
711
+ data_interval_end = dag_run.data_interval_end.isoformat() if dag_run.data_interval_end else None
712
+
673
713
  self.submit_callable(
674
714
  self.adapter.dag_failed,
675
715
  dag_id=dag_run.dag_id,
676
716
  run_id=dag_run.run_id,
677
717
  end_date=dag_run.end_date,
718
+ nominal_start_time=data_interval_start,
719
+ nominal_end_time=data_interval_end,
678
720
  logical_date=date,
679
721
  clear_number=dag_run.clear_number,
722
+ owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None,
723
+ tags=dag_run.dag.tags if dag_run.dag else [],
680
724
  dag_run_state=dag_run.get_state(),
681
725
  task_ids=task_ids,
682
726
  msg=msg,
@@ -90,6 +90,16 @@ def lineage_parent_id(task_instance: TaskInstance):
90
90
 
91
91
 
92
92
  def lineage_root_parent_id(task_instance: TaskInstance):
93
+ """
94
+ Macro function which returns a unique identifier of given task that can be used to create root information for ParentRunFacet.
95
+
96
+ This identifier is composed of the namespace, dag name, and generated run id for given dag, structured
97
+ as '{namespace}/{job_name}/{run_id}'.
98
+
99
+ .. seealso::
100
+ For more information take a look at the guide:
101
+ :ref:`howto/macros:openlineage`
102
+ """
93
103
  return "/".join(
94
104
  (
95
105
  lineage_job_namespace(),
@@ -117,7 +127,7 @@ def _get_logical_date(task_instance):
117
127
  context = task_instance.get_template_context()
118
128
  if hasattr(task_instance, "dag_run"):
119
129
  dag_run = task_instance.dag_run
120
- elif hasattr(context, "dag_run"):
130
+ else:
121
131
  dag_run = context["dag_run"]
122
132
  if hasattr(dag_run, "logical_date") and dag_run.logical_date:
123
133
  date = dag_run.logical_date
@@ -23,6 +23,9 @@ from airflow.providers.openlineage.plugins.macros import (
23
23
  lineage_job_name,
24
24
  lineage_job_namespace,
25
25
  lineage_parent_id,
26
+ lineage_root_job_name,
27
+ lineage_root_parent_id,
28
+ lineage_root_run_id,
26
29
  lineage_run_id,
27
30
  )
28
31
 
@@ -37,7 +40,15 @@ class OpenLineageProviderPlugin(AirflowPlugin):
37
40
 
38
41
  name = "OpenLineageProviderPlugin"
39
42
  if not conf.is_disabled():
40
- macros = [lineage_job_namespace, lineage_job_name, lineage_run_id, lineage_parent_id]
43
+ macros = [
44
+ lineage_job_namespace,
45
+ lineage_job_name,
46
+ lineage_run_id,
47
+ lineage_parent_id,
48
+ lineage_root_run_id,
49
+ lineage_root_job_name,
50
+ lineage_root_parent_id,
51
+ ]
41
52
  listeners = [get_openlineage_listener()]
42
53
  from airflow.lineage.hook import HookLineageReader
43
54
 
@@ -70,7 +70,8 @@ if TYPE_CHECKING:
70
70
 
71
71
  from airflow.models import TaskInstance
72
72
  from airflow.providers.common.compat.assets import Asset
73
- from airflow.sdk import DAG, MappedOperator
73
+ from airflow.sdk import DAG
74
+ from airflow.sdk.definitions.mappedoperator import MappedOperator
74
75
  from airflow.sdk.execution_time.secrets_masker import (
75
76
  Redactable,
76
77
  Redacted,
@@ -80,7 +81,8 @@ if TYPE_CHECKING:
80
81
  from airflow.utils.state import DagRunState, TaskInstanceState
81
82
  else:
82
83
  try:
83
- from airflow.sdk import DAG, MappedOperator
84
+ from airflow.sdk import DAG
85
+ from airflow.sdk.definitions.mappedoperator import MappedOperator
84
86
  except ImportError:
85
87
  from airflow.models import DAG, MappedOperator
86
88
 
@@ -444,6 +446,7 @@ class TaskInstanceInfo(InfoJsonEncodable):
444
446
 
445
447
  includes = ["duration", "try_number", "pool", "queued_dttm", "log_url"]
446
448
  casts = {
449
+ "log_url": lambda ti: getattr(ti, "log_url", None),
447
450
  "map_index": lambda ti: ti.map_index if getattr(ti, "map_index", -1) != -1 else None,
448
451
  "dag_bundle_version": lambda ti: (
449
452
  ti.bundle_instance.version if hasattr(ti, "bundle_instance") else None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apache-airflow-providers-openlineage
3
- Version: 2.3.0
3
+ Version: 2.4.0
4
4
  Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
5
5
  Keywords: airflow-provider,openlineage,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -27,8 +27,8 @@ Requires-Dist: attrs>=22.2
27
27
  Requires-Dist: openlineage-integration-common>=1.31.0
28
28
  Requires-Dist: openlineage-python>=1.31.0
29
29
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
30
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.3.0/changelog.html
31
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.3.0
30
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.4.0/changelog.html
31
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.4.0
32
32
  Project-URL: Mastodon, https://fosstodon.org/@airflow
33
33
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
34
34
  Project-URL: Source Code, https://github.com/apache/airflow
@@ -59,7 +59,7 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
59
59
 
60
60
  Package ``apache-airflow-providers-openlineage``
61
61
 
62
- Release: ``2.3.0``
62
+ Release: ``2.4.0``
63
63
 
64
64
 
65
65
  `OpenLineage <https://openlineage.io/>`__
@@ -72,7 +72,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
72
72
  are in ``airflow.providers.openlineage`` python package.
73
73
 
74
74
  You can find package information and changelog for the provider
75
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.3.0/>`_.
75
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.4.0/>`_.
76
76
 
77
77
  Installation
78
78
  ------------
@@ -118,5 +118,5 @@ Dependent package
118
118
  ================================================================================================================== =================
119
119
 
120
120
  The changelog for the provider package can be found in the
121
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.3.0/changelog.html>`_.
121
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.4.0/changelog.html>`_.
122
122
 
@@ -1,7 +1,7 @@
1
1
  airflow/providers/openlineage/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
2
- airflow/providers/openlineage/__init__.py,sha256=Wcw1Qb20o0-NwmxTMURRAqxVRr3y9F5hmzHOJfdaqiI,1500
2
+ airflow/providers/openlineage/__init__.py,sha256=Pyul6qEokTW4wHohh2fvYPpwWexk1XVfwNoH4JlZNuE,1500
3
3
  airflow/providers/openlineage/conf.py,sha256=aYdLU7iHBdGIU8ZAC5iUiIDgXP9gvP9r_z5hTAbXPOU,5535
4
- airflow/providers/openlineage/get_provider_info.py,sha256=108mAg-tdcTBmPYwJ9M3wcmhc-i1bAs5OB2MLnSBA-4,9256
4
+ airflow/providers/openlineage/get_provider_info.py,sha256=z0oCEDCM3UyrlXrQt0ksTi6jYd2vqjiSSudBM7hLNZw,9255
5
5
  airflow/providers/openlineage/sqlparser.py,sha256=N38XhkU-lxwxnYevQpq63JOBi4rzp0q56JjxO3H24W8,20340
6
6
  airflow/providers/openlineage/version_compat.py,sha256=j5PCtXvZ71aBjixu-EFTNtVDPsngzzs7os0ZQDgFVDk,1536
7
7
  airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
@@ -16,17 +16,17 @@ airflow/providers/openlineage/facets/AirflowRunFacet.json,sha256=70mEaZShgSJp-2x
16
16
  airflow/providers/openlineage/facets/AirflowStateRunFacet.json,sha256=xhHQEKD9Jopw-oqbkCCrrwFjfXnxvuJAritsmegKjuQ,937
17
17
  airflow/providers/openlineage/facets/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
18
18
  airflow/providers/openlineage/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
19
- airflow/providers/openlineage/plugins/adapter.py,sha256=4ylPzbIDR9i3a3NuPAOS8HplExRu3MlUo9t-fLJH-vc,19404
19
+ airflow/providers/openlineage/plugins/adapter.py,sha256=rtvjPvzj9fN6Crn9Sg0VTjqUrhVqEWvXakID2538JI4,21519
20
20
  airflow/providers/openlineage/plugins/facets.py,sha256=VvyMYR6ONkC95q5FdNmohv0scbA1Ej_B5cQ97as5GvA,4161
21
- airflow/providers/openlineage/plugins/listener.py,sha256=vcQcOczTiLTGepmIAZPTWkk5ysh1xR5fa-WvG2TYy1w,27988
22
- airflow/providers/openlineage/plugins/macros.py,sha256=uAN9UBbWE_PvNLNf4YftGap0ovJaf8LHGqO3KnAVOP4,4455
23
- airflow/providers/openlineage/plugins/openlineage.py,sha256=_vLa0x5mvt8ZkA7baI39PXxkYhzvdX9um9wB9qBGAik,1789
21
+ airflow/providers/openlineage/plugins/listener.py,sha256=4BKCt3MvQF0xeVd2DoOTsFwF7VfTGKktihzf-AMRdeE,30733
22
+ airflow/providers/openlineage/plugins/macros.py,sha256=0n-oF98JmiDEZ3TDpNVMShkjezimICasUYt0YuXhztY,4837
23
+ airflow/providers/openlineage/plugins/openlineage.py,sha256=dP3GOVtOGAIokeaeRx2OW_c1TKAxDvATlD9OGMyXqr0,2032
24
24
  airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
25
25
  airflow/providers/openlineage/utils/selective_enable.py,sha256=YyrUQ7Djv5o46XdH83N_G8AXAZ9C_aKPa534pbNVp08,3441
26
26
  airflow/providers/openlineage/utils/spark.py,sha256=X5liLxVLgQcgPF_0lFtQULeMOv_9dGj-HFjtZvWFgOo,7626
27
27
  airflow/providers/openlineage/utils/sql.py,sha256=vkKrrdENEMVG8gtzV6yuTXMa2Z9fBAEXmxDVIDaVncI,9571
28
- airflow/providers/openlineage/utils/utils.py,sha256=DFO462SxSnZv3lOapoZYkiC7j1PrMtWDw0LZeqKKxIo,31368
29
- apache_airflow_providers_openlineage-2.3.0.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
30
- apache_airflow_providers_openlineage-2.3.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
31
- apache_airflow_providers_openlineage-2.3.0.dist-info/METADATA,sha256=6uLjqJpVJBvxd57h5TIMFa05IUWKnMgBluKEM-M2BKc,5687
32
- apache_airflow_providers_openlineage-2.3.0.dist-info/RECORD,,
28
+ airflow/providers/openlineage/utils/utils.py,sha256=4NjZ6k0X4dEYJVc7h50ukK9tq5PN45n31VcjV15nLxY,31540
29
+ apache_airflow_providers_openlineage-2.4.0.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
30
+ apache_airflow_providers_openlineage-2.4.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
31
+ apache_airflow_providers_openlineage-2.4.0.dist-info/METADATA,sha256=mfXayl1ebXf7aR5T3t_lN34AfBPObOBfZv5Dpqpyugc,5687
32
+ apache_airflow_providers_openlineage-2.4.0.dist-info/RECORD,,