apache-airflow-providers-openlineage 2.1.1rc1__py3-none-any.whl → 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.

@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "2.1.1"
32
+ __version__ = "2.1.2"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
35
  "2.9.0"
@@ -29,14 +29,16 @@ with warnings.catch_warnings():
29
29
  from openlineage.client.facet import BaseFacet as BaseFacet_V1
30
30
  from openlineage.client.facet_v2 import JobFacet, RunFacet
31
31
 
32
- from airflow.providers.openlineage.utils.utils import AIRFLOW_V_2_10_PLUS
33
32
  from airflow.utils.log.logging_mixin import LoggingMixin
34
- from airflow.utils.state import TaskInstanceState
35
33
 
36
34
  # this is not to break static checks compatibility with v1 OpenLineage facet classes
37
35
  DatasetSubclass = TypeVar("DatasetSubclass", bound=OLDataset)
38
36
  BaseFacetSubclass = TypeVar("BaseFacetSubclass", bound=Union[BaseFacet_V1, RunFacet, JobFacet])
39
37
 
38
+ OL_METHOD_NAME_START = "get_openlineage_facets_on_start"
39
+ OL_METHOD_NAME_COMPLETE = "get_openlineage_facets_on_complete"
40
+ OL_METHOD_NAME_FAIL = "get_openlineage_facets_on_failure"
41
+
40
42
 
41
43
  @define
42
44
  class OperatorLineage(Generic[DatasetSubclass, BaseFacetSubclass]):
@@ -81,6 +83,9 @@ class BaseExtractor(ABC, LoggingMixin):
81
83
  def extract_on_complete(self, task_instance) -> OperatorLineage | None:
82
84
  return self.extract()
83
85
 
86
+ def extract_on_failure(self, task_instance) -> OperatorLineage | None:
87
+ return self.extract_on_complete(task_instance)
88
+
84
89
 
85
90
  class DefaultExtractor(BaseExtractor):
86
91
  """Extractor that uses `get_openlineage_facets_on_start/complete/failure` methods."""
@@ -96,46 +101,41 @@ class DefaultExtractor(BaseExtractor):
96
101
  return []
97
102
 
98
103
  def _execute_extraction(self) -> OperatorLineage | None:
99
- # OpenLineage methods are optional - if there's no method, return None
100
- try:
101
- self.log.debug(
102
- "Trying to execute `get_openlineage_facets_on_start` for %s.", self.operator.task_type
103
- )
104
- return self._get_openlineage_facets(self.operator.get_openlineage_facets_on_start) # type: ignore
105
- except ImportError:
106
- self.log.error(
107
- "OpenLineage provider method failed to import OpenLineage integration. "
108
- "This should not happen. Please report this bug to developers."
109
- )
110
- return None
111
- except AttributeError:
104
+ method = getattr(self.operator, OL_METHOD_NAME_START, None)
105
+ if callable(method):
112
106
  self.log.debug(
113
- "Operator %s does not have the get_openlineage_facets_on_start method.",
114
- self.operator.task_type,
107
+ "Trying to execute '%s' method of '%s'.", OL_METHOD_NAME_START, self.operator.task_type
115
108
  )
116
- return OperatorLineage()
109
+ return self._get_openlineage_facets(method)
110
+ self.log.debug(
111
+ "Operator '%s' does not have '%s' method.", self.operator.task_type, OL_METHOD_NAME_START
112
+ )
113
+ return OperatorLineage()
117
114
 
118
115
  def extract_on_complete(self, task_instance) -> OperatorLineage | None:
119
- failed_states = [TaskInstanceState.FAILED, TaskInstanceState.UP_FOR_RETRY]
120
- if not AIRFLOW_V_2_10_PLUS: # todo: remove when min airflow version >= 2.10.0
121
- # Before fix (#41053) implemented in Airflow 2.10 TaskInstance's state was still RUNNING when
122
- # being passed to listener's on_failure method. Since `extract_on_complete()` is only called
123
- # after task completion, RUNNING state means that we are dealing with FAILED task in < 2.10
124
- failed_states = [TaskInstanceState.RUNNING]
125
-
126
- if task_instance.state in failed_states:
127
- on_failed = getattr(self.operator, "get_openlineage_facets_on_failure", None)
128
- if on_failed and callable(on_failed):
129
- self.log.debug(
130
- "Executing `get_openlineage_facets_on_failure` for %s.", self.operator.task_type
131
- )
132
- return self._get_openlineage_facets(on_failed, task_instance)
133
- on_complete = getattr(self.operator, "get_openlineage_facets_on_complete", None)
134
- if on_complete and callable(on_complete):
135
- self.log.debug("Executing `get_openlineage_facets_on_complete` for %s.", self.operator.task_type)
136
- return self._get_openlineage_facets(on_complete, task_instance)
116
+ method = getattr(self.operator, OL_METHOD_NAME_COMPLETE, None)
117
+ if callable(method):
118
+ self.log.debug(
119
+ "Trying to execute '%s' method of '%s'.", OL_METHOD_NAME_COMPLETE, self.operator.task_type
120
+ )
121
+ return self._get_openlineage_facets(method, task_instance)
122
+ self.log.debug(
123
+ "Operator '%s' does not have '%s' method.", self.operator.task_type, OL_METHOD_NAME_COMPLETE
124
+ )
137
125
  return self.extract()
138
126
 
127
+ def extract_on_failure(self, task_instance) -> OperatorLineage | None:
128
+ method = getattr(self.operator, OL_METHOD_NAME_FAIL, None)
129
+ if callable(method):
130
+ self.log.debug(
131
+ "Trying to execute '%s' method of '%s'.", OL_METHOD_NAME_FAIL, self.operator.task_type
132
+ )
133
+ return self._get_openlineage_facets(method, task_instance)
134
+ self.log.debug(
135
+ "Operator '%s' does not have '%s' method.", self.operator.task_type, OL_METHOD_NAME_FAIL
136
+ )
137
+ return self.extract_on_complete(task_instance)
138
+
139
139
  def _get_openlineage_facets(self, get_facets_method, *args) -> OperatorLineage | None:
140
140
  try:
141
141
  facets: OperatorLineage = get_facets_method(*args)
@@ -152,6 +152,10 @@ class DefaultExtractor(BaseExtractor):
152
152
  "OpenLineage provider method failed to import OpenLineage integration. "
153
153
  "This should not happen."
154
154
  )
155
- except Exception:
156
- self.log.warning("OpenLineage provider method failed to extract data from provider. ")
155
+ except Exception as e:
156
+ self.log.warning(
157
+ "OpenLineage method failed to extract data from Operator with the following exception: `%s`",
158
+ e,
159
+ )
160
+ self.log.debug("OpenLineage extraction failure details:", exc_info=True)
157
161
  return None
@@ -24,7 +24,11 @@ from airflow.providers.common.compat.openlineage.utils.utils import (
24
24
  )
25
25
  from airflow.providers.openlineage import conf
26
26
  from airflow.providers.openlineage.extractors import BaseExtractor, OperatorLineage
27
- from airflow.providers.openlineage.extractors.base import DefaultExtractor
27
+ from airflow.providers.openlineage.extractors.base import (
28
+ OL_METHOD_NAME_COMPLETE,
29
+ OL_METHOD_NAME_START,
30
+ DefaultExtractor,
31
+ )
28
32
  from airflow.providers.openlineage.extractors.bash import BashExtractor
29
33
  from airflow.providers.openlineage.extractors.python import PythonExtractor
30
34
  from airflow.providers.openlineage.utils.utils import (
@@ -32,6 +36,7 @@ from airflow.providers.openlineage.utils.utils import (
32
36
  try_import_from_string,
33
37
  )
34
38
  from airflow.utils.log.logging_mixin import LoggingMixin
39
+ from airflow.utils.state import TaskInstanceState
35
40
 
36
41
  if TYPE_CHECKING:
37
42
  from openlineage.client.event_v2 import Dataset
@@ -87,7 +92,9 @@ class ExtractorManager(LoggingMixin):
87
92
  def add_extractor(self, operator_class: str, extractor: type[BaseExtractor]):
88
93
  self.extractors[operator_class] = extractor
89
94
 
90
- def extract_metadata(self, dagrun, task, complete: bool = False, task_instance=None) -> OperatorLineage:
95
+ def extract_metadata(
96
+ self, dagrun, task, task_instance_state: TaskInstanceState, task_instance=None
97
+ ) -> OperatorLineage:
91
98
  extractor = self._get_extractor(task)
92
99
  task_info = (
93
100
  f"task_type={task.task_type} "
@@ -104,10 +111,15 @@ class ExtractorManager(LoggingMixin):
104
111
  extractor.__class__.__name__,
105
112
  str(task_info),
106
113
  )
107
- if complete:
108
- task_metadata = extractor.extract_on_complete(task_instance)
109
- else:
114
+ if task_instance_state == TaskInstanceState.RUNNING:
110
115
  task_metadata = extractor.extract()
116
+ elif task_instance_state == TaskInstanceState.FAILED:
117
+ if callable(getattr(extractor, "extract_on_failure", None)):
118
+ task_metadata = extractor.extract_on_failure(task_instance)
119
+ else:
120
+ task_metadata = extractor.extract_on_complete(task_instance)
121
+ else:
122
+ task_metadata = extractor.extract_on_complete(task_instance)
111
123
 
112
124
  self.log.debug(
113
125
  "Found task metadata for operation %s: %s",
@@ -122,7 +134,7 @@ class ExtractorManager(LoggingMixin):
122
134
  task_metadata.inputs = inputs
123
135
  task_metadata.outputs = outputs
124
136
  else:
125
- self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
137
+ self.extract_inlets_and_outlets(task_metadata, task)
126
138
  return task_metadata
127
139
 
128
140
  except Exception as e:
@@ -132,6 +144,7 @@ class ExtractorManager(LoggingMixin):
132
144
  e,
133
145
  task_info,
134
146
  )
147
+ self.log.debug("OpenLineage extraction failure details:", exc_info=True)
135
148
  elif (hook_lineage := self.get_hook_lineage()) is not None:
136
149
  inputs, outputs = hook_lineage
137
150
  task_metadata = OperatorLineage(inputs=inputs, outputs=outputs)
@@ -143,9 +156,7 @@ class ExtractorManager(LoggingMixin):
143
156
  task_metadata = OperatorLineage(
144
157
  run_facets=get_unknown_source_attribute_run_facet(task=task),
145
158
  )
146
- inlets = task.get_inlet_defs()
147
- outlets = task.get_outlet_defs()
148
- self.extract_inlets_and_outlets(task_metadata, inlets, outlets)
159
+ self.extract_inlets_and_outlets(task_metadata, task)
149
160
  return task_metadata
150
161
 
151
162
  return OperatorLineage()
@@ -155,13 +166,9 @@ class ExtractorManager(LoggingMixin):
155
166
  return self.extractors[task.task_type]
156
167
 
157
168
  def method_exists(method_name):
158
- method = getattr(task, method_name, None)
159
- if method:
160
- return callable(method)
169
+ return callable(getattr(task, method_name, None))
161
170
 
162
- if method_exists("get_openlineage_facets_on_start") or method_exists(
163
- "get_openlineage_facets_on_complete"
164
- ):
171
+ if method_exists(OL_METHOD_NAME_START) or method_exists(OL_METHOD_NAME_COMPLETE):
165
172
  return self.default_extractor
166
173
  return None
167
174
 
@@ -174,28 +181,21 @@ class ExtractorManager(LoggingMixin):
174
181
  return extractor(task)
175
182
  return None
176
183
 
177
- def extract_inlets_and_outlets(
178
- self,
179
- task_metadata: OperatorLineage,
180
- inlets: list,
181
- outlets: list,
182
- ):
183
- if inlets or outlets:
184
+ def extract_inlets_and_outlets(self, task_metadata: OperatorLineage, task) -> None:
185
+ if task.inlets or task.outlets:
184
186
  self.log.debug("Manually extracting lineage metadata from inlets and outlets")
185
- for i in inlets:
187
+ for i in task.inlets:
186
188
  d = self.convert_to_ol_dataset(i)
187
189
  if d:
188
190
  task_metadata.inputs.append(d)
189
- for o in outlets:
191
+ for o in task.outlets:
190
192
  d = self.convert_to_ol_dataset(o)
191
193
  if d:
192
194
  task_metadata.outputs.append(d)
193
195
 
194
196
  def get_hook_lineage(self) -> tuple[list[Dataset], list[Dataset]] | None:
195
197
  try:
196
- from airflow.providers.common.compat.lineage.hook import (
197
- get_hook_lineage_collector,
198
- )
198
+ from airflow.providers.common.compat.lineage.hook import get_hook_lineage_collector
199
199
  except ImportError:
200
200
  return None
201
201
 
@@ -204,6 +204,7 @@ class ExtractorManager(LoggingMixin):
204
204
  if not get_hook_lineage_collector().has_collected:
205
205
  return None
206
206
 
207
+ self.log.debug("OpenLineage will extract lineage from Hook Lineage Collector.")
207
208
  return (
208
209
  [
209
210
  asset
@@ -313,5 +314,5 @@ class ExtractorManager(LoggingMixin):
313
314
  job_facets=task_metadata.job_facets,
314
315
  )
315
316
  except AttributeError:
316
- self.log.warning("Extractor returns non-valid metadata: %s", task_metadata)
317
+ self.log.warning("OpenLineage extractor returns non-valid metadata: `%s`", task_metadata)
317
318
  return None
@@ -27,8 +27,9 @@ def get_provider_info():
27
27
  "name": "OpenLineage Airflow",
28
28
  "description": "`OpenLineage <https://openlineage.io/>`__\n",
29
29
  "state": "ready",
30
- "source-date-epoch": 1741509355,
30
+ "source-date-epoch": 1743477859,
31
31
  "versions": [
32
+ "2.1.2",
32
33
  "2.1.1",
33
34
  "2.1.0",
34
35
  "2.0.0",
@@ -85,7 +85,7 @@ class OpenLineageAdapter(LoggingMixin):
85
85
  if config:
86
86
  self.log.debug(
87
87
  "OpenLineage configuration found. Transport type: `%s`",
88
- config.get("type", "no type provided"),
88
+ config.get("transport", {}).get("type", "no type provided"),
89
89
  )
90
90
  self._client = OpenLineageClient(config=config) # type: ignore[call-arg]
91
91
  else:
@@ -159,11 +159,20 @@ class OpenLineageAdapter(LoggingMixin):
159
159
  stack.enter_context(Stats.timer(f"ol.emit.attempts.{event_type}.{transport_type}"))
160
160
  stack.enter_context(Stats.timer("ol.emit.attempts"))
161
161
  self._client.emit(redacted_event)
162
- self.log.debug("Successfully emitted OpenLineage event of id %s", event.run.runId)
163
- except Exception:
162
+ self.log.info(
163
+ "Successfully emitted OpenLineage `%s` event of id `%s`",
164
+ event_type.upper(),
165
+ event.run.runId,
166
+ )
167
+ except Exception as e:
164
168
  Stats.incr("ol.emit.failed")
165
- self.log.warning("Failed to emit OpenLineage event of id %s", event.run.runId)
166
- self.log.debug("OpenLineage emission failure: %s", exc_info=True)
169
+ self.log.warning(
170
+ "Failed to emit OpenLineage `%s` event of id `%s` with the following exception: `%s`",
171
+ event_type.upper(),
172
+ event.run.runId,
173
+ e,
174
+ )
175
+ self.log.debug("OpenLineage emission failure details:", exc_info=True)
167
176
 
168
177
  return redacted_event
169
178
 
@@ -371,7 +380,7 @@ class OpenLineageAdapter(LoggingMixin):
371
380
  # Catch all exceptions to prevent ProcessPoolExecutor from silently swallowing them.
372
381
  # This ensures that any unexpected exceptions are logged for debugging purposes.
373
382
  # This part cannot be wrapped to deduplicate code, otherwise the method cannot be pickled in multiprocessing.
374
- self.log.warning("Failed to emit DAG started event: \n %s", traceback.format_exc())
383
+ self.log.warning("Failed to emit OpenLineage DAG started event: \n %s", traceback.format_exc())
375
384
 
376
385
  def dag_success(
377
386
  self,
@@ -409,7 +418,7 @@ class OpenLineageAdapter(LoggingMixin):
409
418
  # Catch all exceptions to prevent ProcessPoolExecutor from silently swallowing them.
410
419
  # This ensures that any unexpected exceptions are logged for debugging purposes.
411
420
  # This part cannot be wrapped to deduplicate code, otherwise the method cannot be pickled in multiprocessing.
412
- self.log.warning("Failed to emit DAG success event: \n %s", traceback.format_exc())
421
+ self.log.warning("Failed to emit OpenLineage DAG success event: \n %s", traceback.format_exc())
413
422
 
414
423
  def dag_failed(
415
424
  self,
@@ -453,7 +462,7 @@ class OpenLineageAdapter(LoggingMixin):
453
462
  # Catch all exceptions to prevent ProcessPoolExecutor from silently swallowing them.
454
463
  # This ensures that any unexpected exceptions are logged for debugging purposes.
455
464
  # This part cannot be wrapped to deduplicate code, otherwise the method cannot be pickled in multiprocessing.
456
- self.log.warning("Failed to emit DAG failed event: \n %s", traceback.format_exc())
465
+ self.log.warning("Failed to emit OpenLineage DAG failed event: \n %s", traceback.format_exc())
457
466
 
458
467
  @staticmethod
459
468
  def _build_run(
@@ -69,13 +69,15 @@ def _get_try_number_success(val):
69
69
 
70
70
  def _executor_initializer():
71
71
  """
72
- Initialize worker processes for the executor used for DagRun listener.
72
+ Initialize processes for the executor used with DAGRun listener's methods (on scheduler).
73
73
 
74
74
  This function must be picklable, so it cannot be defined as an inner method or local function.
75
75
 
76
76
  Reconfigures the ORM engine to prevent issues that arise when multiple processes interact with
77
77
  the Airflow database.
78
78
  """
79
+ # This initializer is used only on the scheduler
80
+ # We can configure_orm regardless of the Airflow version, as DB access is always allowed from scheduler.
79
81
  settings.configure_orm()
80
82
 
81
83
 
@@ -199,7 +201,9 @@ class OpenLineageListener:
199
201
  operator_name = task.task_type.lower()
200
202
 
201
203
  with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
202
- task_metadata = self.extractor_manager.extract_metadata(dagrun, task)
204
+ task_metadata = self.extractor_manager.extract_metadata(
205
+ dagrun=dagrun, task=task, task_instance_state=TaskInstanceState.RUNNING
206
+ )
203
207
 
204
208
  redacted_event = self.adapter.start_task(
205
209
  run_id=task_uuid,
@@ -302,7 +306,10 @@ class OpenLineageListener:
302
306
 
303
307
  with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
304
308
  task_metadata = self.extractor_manager.extract_metadata(
305
- dagrun, task, complete=True, task_instance=task_instance
309
+ dagrun=dagrun,
310
+ task=task,
311
+ task_instance_state=TaskInstanceState.SUCCESS,
312
+ task_instance=task_instance,
306
313
  )
307
314
 
308
315
  redacted_event = self.adapter.complete_task(
@@ -423,7 +430,10 @@ class OpenLineageListener:
423
430
 
424
431
  with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
425
432
  task_metadata = self.extractor_manager.extract_metadata(
426
- dagrun, task, complete=True, task_instance=task_instance
433
+ dagrun=dagrun,
434
+ task=task,
435
+ task_instance_state=TaskInstanceState.FAILED,
436
+ task_instance=task_instance,
427
437
  )
428
438
 
429
439
  redacted_event = self.adapter.fail_task(
@@ -472,7 +482,9 @@ class OpenLineageListener:
472
482
  process.wait(conf.execution_timeout())
473
483
  except psutil.TimeoutExpired:
474
484
  self.log.warning(
475
- "OpenLineage process %s expired. This should not affect process execution.", pid
485
+ "OpenLineage process with pid `%s` expired and will be terminated by listener. "
486
+ "This has no impact on actual task execution status.",
487
+ pid,
476
488
  )
477
489
  self._terminate_with_wait(process)
478
490
  except BaseException:
@@ -481,7 +493,8 @@ class OpenLineageListener:
481
493
  self.log.debug("Process with pid %s finished - parent", pid)
482
494
  else:
483
495
  setproctitle(getproctitle() + " - OpenLineage - " + callable_name)
484
- configure_orm(disable_connection_pool=True)
496
+ if not AIRFLOW_V_3_0_PLUS:
497
+ configure_orm(disable_connection_pool=True)
485
498
  self.log.debug("Executing OpenLineage process - %s - pid %s", callable_name, os.getpid())
486
499
  callable()
487
500
  self.log.debug("Process with current pid finishes after %s", callable_name)
@@ -21,6 +21,7 @@ from typing import TYPE_CHECKING
21
21
  from airflow.providers.openlineage import conf
22
22
  from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter
23
23
  from airflow.providers.openlineage.utils.utils import get_job_name
24
+ from airflow.providers.openlineage.version_compat import AIRFLOW_V_3_0_PLUS
24
25
 
25
26
  if TYPE_CHECKING:
26
27
  from airflow.models import TaskInstance
@@ -58,15 +59,25 @@ def lineage_run_id(task_instance: TaskInstance):
58
59
  For more information take a look at the guide:
59
60
  :ref:`howto/macros:openlineage`
60
61
  """
61
- if hasattr(task_instance, "logical_date"):
62
- logical_date = task_instance.logical_date
62
+ if AIRFLOW_V_3_0_PLUS:
63
+ context = task_instance.get_template_context()
64
+ if hasattr(task_instance, "dag_run"):
65
+ dag_run = task_instance.dag_run
66
+ elif hasattr(context, "dag_run"):
67
+ dag_run = context["dag_run"]
68
+ if hasattr(dag_run, "logical_date") and dag_run.logical_date:
69
+ date = dag_run.logical_date
70
+ else:
71
+ date = dag_run.run_after
72
+ elif hasattr(task_instance, "logical_date"):
73
+ date = task_instance.logical_date
63
74
  else:
64
- logical_date = task_instance.execution_date
75
+ date = task_instance.execution_date
65
76
  return OpenLineageAdapter.build_task_instance_run_id(
66
77
  dag_id=task_instance.dag_id,
67
78
  task_id=task_instance.task_id,
68
79
  try_number=task_instance.try_number,
69
- logical_date=logical_date,
80
+ logical_date=date,
70
81
  map_index=task_instance.map_index,
71
82
  )
72
83
 
@@ -38,7 +38,7 @@ DISABLE_OL_PARAM = Param(False, const=False)
38
38
  T = TypeVar("T", bound="DAG | Operator")
39
39
 
40
40
  if TYPE_CHECKING:
41
- from airflow.sdk.definitions.baseoperator import BaseOperator as SdkBaseOperator
41
+ from airflow.sdk.bases.operator import BaseOperator as SdkBaseOperator
42
42
 
43
43
 
44
44
  log = logging.getLogger(__name__)
@@ -53,35 +53,73 @@ def _get_parent_job_information_as_spark_properties(context: Context) -> dict:
53
53
 
54
54
  def _get_transport_information_as_spark_properties() -> dict:
55
55
  """Retrieve transport information as Spark properties."""
56
- transport = get_openlineage_listener().adapter.get_or_create_openlineage_client().transport
57
- if transport.kind != "http":
58
- log.info(
59
- "OpenLineage transport type `%s` does not support automatic "
60
- "injection of OpenLineage transport information into Spark properties.",
61
- transport.kind,
62
- )
63
- return {}
64
-
65
- properties = {
66
- "spark.openlineage.transport.type": transport.kind,
67
- "spark.openlineage.transport.url": transport.url,
68
- "spark.openlineage.transport.endpoint": transport.endpoint,
69
- "spark.openlineage.transport.timeoutInMillis": str(
70
- int(transport.timeout * 1000) # convert to milliseconds, as required by Spark integration
71
- ),
72
- }
73
- if transport.compression:
74
- properties["spark.openlineage.transport.compression"] = str(transport.compression)
75
56
 
76
- if hasattr(transport.config.auth, "api_key") and transport.config.auth.get_bearer():
77
- properties["spark.openlineage.transport.auth.type"] = "api_key"
78
- properties["spark.openlineage.transport.auth.apiKey"] = transport.config.auth.get_bearer()
57
+ def _get_transport_information(tp) -> dict:
58
+ properties = {
59
+ "type": tp.kind,
60
+ "url": tp.url,
61
+ "endpoint": tp.endpoint,
62
+ "timeoutInMillis": str(
63
+ int(tp.timeout) * 1000 # convert to milliseconds, as required by Spark integration
64
+ ),
65
+ }
66
+ if hasattr(tp, "compression") and tp.compression:
67
+ properties["compression"] = str(tp.compression)
68
+
69
+ if hasattr(tp.config.auth, "api_key") and tp.config.auth.get_bearer():
70
+ properties["auth.type"] = "api_key"
71
+ properties["auth.apiKey"] = tp.config.auth.get_bearer()
72
+
73
+ if hasattr(tp.config, "custom_headers") and tp.config.custom_headers:
74
+ for key, value in tp.config.custom_headers.items():
75
+ properties[f"headers.{key}"] = value
76
+ return properties
77
+
78
+ def _format_transport(props: dict, transport: dict, name: str | None):
79
+ for key, value in transport.items():
80
+ if name:
81
+ props[f"spark.openlineage.transport.transports.{name}.{key}"] = value
82
+ else:
83
+ props[f"spark.openlineage.transport.{key}"] = value
84
+ return props
79
85
 
80
- if hasattr(transport.config, "custom_headers") and transport.config.custom_headers:
81
- for key, value in transport.config.custom_headers.items():
82
- properties[f"spark.openlineage.transport.headers.{key}"] = value
86
+ transport = get_openlineage_listener().adapter.get_or_create_openlineage_client().transport
83
87
 
84
- return properties
88
+ if transport.kind == "composite":
89
+ http_transports = {}
90
+ for nested_transport in transport.transports:
91
+ if nested_transport.kind == "http":
92
+ http_transports[nested_transport.name] = _get_transport_information(nested_transport)
93
+ else:
94
+ name = nested_transport.name if hasattr(nested_transport, "name") else "no-name"
95
+ log.info(
96
+ "OpenLineage transport type `%s` with name `%s` is not supported in composite transport.",
97
+ nested_transport.kind,
98
+ name,
99
+ )
100
+ if len(http_transports) == 0:
101
+ log.warning(
102
+ "OpenLineage transport type `composite` does not contain http transport. Skipping "
103
+ "injection of OpenLineage transport information into Spark properties.",
104
+ )
105
+ return {}
106
+ props = {
107
+ "spark.openlineage.transport.type": "composite",
108
+ "spark.openlineage.transport.continueOnFailure": str(transport.config.continue_on_failure),
109
+ }
110
+ for name, http_transport in http_transports.items():
111
+ props = _format_transport(props, http_transport, name)
112
+ return props
113
+
114
+ elif transport.kind == "http":
115
+ return _format_transport({}, _get_transport_information(transport), None)
116
+
117
+ log.info(
118
+ "OpenLineage transport type `%s` does not support automatic "
119
+ "injection of OpenLineage transport information into Spark properties.",
120
+ transport.kind,
121
+ )
122
+ return {}
85
123
 
86
124
 
87
125
  def _is_parent_job_information_present_in_spark_properties(properties: dict) -> bool:
@@ -210,7 +210,13 @@ def is_ti_rescheduled_already(ti: TaskInstance, session=NEW_SESSION):
210
210
 
211
211
  if not ti.task.reschedule:
212
212
  return False
213
-
213
+ if AIRFLOW_V_3_0_PLUS:
214
+ return (
215
+ session.query(
216
+ exists().where(TaskReschedule.ti_id == ti.id, TaskReschedule.try_number == ti.try_number)
217
+ ).scalar()
218
+ is True
219
+ )
214
220
  return (
215
221
  session.query(
216
222
  exists().where(
@@ -369,8 +375,19 @@ class DagRunInfo(InfoJsonEncodable):
369
375
  "run_id",
370
376
  "run_type",
371
377
  "start_date",
378
+ "end_date",
372
379
  ]
373
380
 
381
+ casts = {"duration": lambda dagrun: DagRunInfo.duration(dagrun)}
382
+
383
+ @classmethod
384
+ def duration(cls, dagrun: DagRun) -> float | None:
385
+ if not getattr(dagrun, "end_date", None) or not isinstance(dagrun.end_date, datetime.datetime):
386
+ return None
387
+ if not getattr(dagrun, "start_date", None) or not isinstance(dagrun.start_date, datetime.datetime):
388
+ return None
389
+ return (dagrun.end_date - dagrun.start_date).total_seconds()
390
+
374
391
 
375
392
  class TaskInstanceInfo(InfoJsonEncodable):
376
393
  """Defines encoding TaskInstance object to JSON."""
@@ -740,7 +757,9 @@ def print_warning(log):
740
757
  return f(*args, **kwargs)
741
758
  except Exception:
742
759
  log.warning(
743
- "OpenLineage event emission failed. Exception below is being caught: it's printed for visibility. This has no impact on actual task execution status.",
760
+ "OpenLineage event emission failed. "
761
+ "Exception below is being caught but it's printed for visibility. "
762
+ "This has no impact on actual task execution status.",
744
763
  exc_info=True,
745
764
  )
746
765
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apache-airflow-providers-openlineage
3
- Version: 2.1.1rc1
3
+ Version: 2.1.2
4
4
  Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
5
5
  Keywords: airflow-provider,openlineage,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -20,18 +20,18 @@ Classifier: Programming Language :: Python :: 3.10
20
20
  Classifier: Programming Language :: Python :: 3.11
21
21
  Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Topic :: System :: Monitoring
23
- Requires-Dist: apache-airflow>=2.9.0rc0
24
- Requires-Dist: apache-airflow-providers-common-sql>=1.20.0rc0
25
- Requires-Dist: apache-airflow-providers-common-compat>=1.4.0rc0
23
+ Requires-Dist: apache-airflow>=2.9.0
24
+ Requires-Dist: apache-airflow-providers-common-sql>=1.20.0
25
+ Requires-Dist: apache-airflow-providers-common-compat>=1.4.0
26
26
  Requires-Dist: attrs>=22.2
27
27
  Requires-Dist: openlineage-integration-common>=1.24.2
28
28
  Requires-Dist: openlineage-python>=1.24.2
29
29
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
30
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.1/changelog.html
31
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.1
30
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/changelog.html
31
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2
32
+ Project-URL: Mastodon, https://fosstodon.org/@airflow
32
33
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
33
34
  Project-URL: Source Code, https://github.com/apache/airflow
34
- Project-URL: Twitter, https://x.com/ApacheAirflow
35
35
  Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
36
36
 
37
37
 
@@ -59,7 +59,7 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
59
59
 
60
60
  Package ``apache-airflow-providers-openlineage``
61
61
 
62
- Release: ``2.1.1``
62
+ Release: ``2.1.2``
63
63
 
64
64
 
65
65
  `OpenLineage <https://openlineage.io/>`__
@@ -72,7 +72,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
72
72
  are in ``airflow.providers.openlineage`` python package.
73
73
 
74
74
  You can find package information and changelog for the provider
75
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.1/>`_.
75
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/>`_.
76
76
 
77
77
  Installation
78
78
  ------------
@@ -101,7 +101,7 @@ Cross provider package dependencies
101
101
  -----------------------------------
102
102
 
103
103
  Those are dependencies that might be needed in order to use all the features of the package.
104
- You need to install the specified provider packages in order to use them.
104
+ You need to install the specified providers in order to use them.
105
105
 
106
106
  You can install such cross-provider dependencies when installing from PyPI. For example:
107
107
 
@@ -118,5 +118,5 @@ Dependent package
118
118
  ================================================================================================================== =================
119
119
 
120
120
  The changelog for the provider package can be found in the
121
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.1/changelog.html>`_.
121
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/changelog.html>`_.
122
122
 
@@ -1,13 +1,13 @@
1
1
  airflow/providers/openlineage/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
2
- airflow/providers/openlineage/__init__.py,sha256=zu1NWsVHz4OqcAQkqGuElQg03_iztd-3dAVvZcQ3maI,1498
2
+ airflow/providers/openlineage/__init__.py,sha256=z82Hjldc_TSS3Uwai9WOyuJKxfAG5BH4NlVuMbMSc8g,1498
3
3
  airflow/providers/openlineage/conf.py,sha256=aYdLU7iHBdGIU8ZAC5iUiIDgXP9gvP9r_z5hTAbXPOU,5535
4
- airflow/providers/openlineage/get_provider_info.py,sha256=dPjREg_wct28793JoYZjiSGMrLFkhTulTA2Nl9CeRzg,10299
4
+ airflow/providers/openlineage/get_provider_info.py,sha256=53i3FDOzBoxITYaVkSqzS9oI4NkxiTUPGKp8-DU9bJU,10320
5
5
  airflow/providers/openlineage/sqlparser.py,sha256=N38XhkU-lxwxnYevQpq63JOBi4rzp0q56JjxO3H24W8,20340
6
6
  airflow/providers/openlineage/version_compat.py,sha256=aHg90_DtgoSnQvILFICexMyNlHlALBdaeWqkX3dFDug,1605
7
7
  airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
8
- airflow/providers/openlineage/extractors/base.py,sha256=vaIg8HBj0crvzqoxiD252rG4IHMKJ81pGYmcDOlqJXk,6623
8
+ airflow/providers/openlineage/extractors/base.py,sha256=ZXRlvMSak8kUfur-BxrgAxeylMQFG-iT-LusQguIFLc,6342
9
9
  airflow/providers/openlineage/extractors/bash.py,sha256=3aR0PXs8fzRLibRxXN1R8wMZnGzyCur7mjpy8e5GC4A,2583
10
- airflow/providers/openlineage/extractors/manager.py,sha256=0OLxgjNgCNdg6zcWGU387bqthQGwvtZRkN1q9RU7wlY,12431
10
+ airflow/providers/openlineage/extractors/manager.py,sha256=g3WJRBR2-XZHTG7qAR4UEviwtymvDArhlPMVD3c4q_g,12862
11
11
  airflow/providers/openlineage/extractors/python.py,sha256=hVWOplMlBimrpPKPeW6vm75a8OmAYMU1oJzqMz8Jh90,3171
12
12
  airflow/providers/openlineage/facets/AirflowDagRunFacet.json,sha256=ie6c-J3-wGgk80WDTGWePz18o6DbW--TNM7BMF4WfcU,2251
13
13
  airflow/providers/openlineage/facets/AirflowDebugRunFacet.json,sha256=_zA5gFqGje5MOH1SmdMeA5ViOHvW_pV4oijEAvkuBbY,768
@@ -16,17 +16,17 @@ airflow/providers/openlineage/facets/AirflowRunFacet.json,sha256=70mEaZShgSJp-2x
16
16
  airflow/providers/openlineage/facets/AirflowStateRunFacet.json,sha256=xhHQEKD9Jopw-oqbkCCrrwFjfXnxvuJAritsmegKjuQ,937
17
17
  airflow/providers/openlineage/facets/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
18
18
  airflow/providers/openlineage/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
19
- airflow/providers/openlineage/plugins/adapter.py,sha256=QmacPqI2t37JwZT0HE7J84BeK-sKMmIF8IVrtYfTTS0,20304
19
+ airflow/providers/openlineage/plugins/adapter.py,sha256=wCGJ3rMDpiyFlPaonUQwGzs0hyNqU-4__e0Z9nAbcyI,20620
20
20
  airflow/providers/openlineage/plugins/facets.py,sha256=VvyMYR6ONkC95q5FdNmohv0scbA1Ej_B5cQ97as5GvA,4161
21
- airflow/providers/openlineage/plugins/listener.py,sha256=ETQEx2RvcEa0Q2tEWmuhIwcyPOcebTdKKDzfnX6toGE,25453
22
- airflow/providers/openlineage/plugins/macros.py,sha256=YuS0SlpZ3j2yaMepjNzQ6HCpnM2xTEuixA-0wra-EKU,3260
21
+ airflow/providers/openlineage/plugins/listener.py,sha256=KlBKT9VkdOrZxvQHsLZWWq_g4jPhaa2GdVxmHy_EVhM,26083
22
+ airflow/providers/openlineage/plugins/macros.py,sha256=qrHLjE95Uq8H-W9CIkQe5Y9Pu1O-GErhpDV2olGaGQM,3730
23
23
  airflow/providers/openlineage/plugins/openlineage.py,sha256=HD3mYNPfXd-buZydEpuAY-naVBXhausU2LYUNhL48QA,1906
24
24
  airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
25
- airflow/providers/openlineage/utils/selective_enable.py,sha256=ZJUH_iS0thup2qYAVcjOgNIru6E8bKc56_pNQHuc8Fg,3451
26
- airflow/providers/openlineage/utils/spark.py,sha256=3JS3Din4pAFyTmetWgCrLS-kYQEEYarIUW-vZnTaGwo,5826
25
+ airflow/providers/openlineage/utils/selective_enable.py,sha256=YyrUQ7Djv5o46XdH83N_G8AXAZ9C_aKPa534pbNVp08,3441
26
+ airflow/providers/openlineage/utils/spark.py,sha256=-2XfUaV0WISK6vHSBmB9E78xkuPjO3fM1tDQCZG7j9I,7303
27
27
  airflow/providers/openlineage/utils/sql.py,sha256=vkKrrdENEMVG8gtzV6yuTXMa2Z9fBAEXmxDVIDaVncI,9571
28
- airflow/providers/openlineage/utils/utils.py,sha256=UOdFuUo0xAfUPmtM8TQH173zP2ZSVmIhpJ0wvu2zQ0w,28511
29
- apache_airflow_providers_openlineage-2.1.1rc1.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
30
- apache_airflow_providers_openlineage-2.1.1rc1.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
31
- apache_airflow_providers_openlineage-2.1.1rc1.dist-info/METADATA,sha256=BcLvCZju_8rIJGwWroKFo7b8RucaQ1rQp3KBsik020k,5701
32
- apache_airflow_providers_openlineage-2.1.1rc1.dist-info/RECORD,,
28
+ airflow/providers/openlineage/utils/utils.py,sha256=Z3G2wa_EPfRaHEFgdNFvi36K8qz47lS_O66emx-aFjk,29281
29
+ apache_airflow_providers_openlineage-2.1.2.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
30
+ apache_airflow_providers_openlineage-2.1.2.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
31
+ apache_airflow_providers_openlineage-2.1.2.dist-info/METADATA,sha256=ViBC41KGtshgGbAPaa0FwRr7E5HaDaw375HA9jVSWLY,5685
32
+ apache_airflow_providers_openlineage-2.1.2.dist-info/RECORD,,