apache-airflow-providers-openlineage 1.7.1rc1__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.

@@ -25,14 +25,11 @@ from __future__ import annotations
25
25
 
26
26
  import packaging.version
27
27
 
28
- __all__ = ["__version__"]
28
+ from airflow import __version__ as airflow_version
29
29
 
30
- __version__ = "1.7.1"
30
+ __all__ = ["__version__"]
31
31
 
32
- try:
33
- from airflow import __version__ as airflow_version
34
- except ImportError:
35
- from airflow.version import version as airflow_version
32
+ __version__ = "1.8.0"
36
33
 
37
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
38
35
  "2.7.0"
@@ -14,6 +14,19 @@
14
14
  # KIND, either express or implied. See the License for the
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
+ """
18
+ This module provides functions for safely retrieving and handling OpenLineage configurations.
19
+
20
+ To prevent errors caused by invalid user-provided configuration values, we use ``conf.get()``
21
+ to fetch values as strings and perform safe conversions using custom functions.
22
+
23
+ Any invalid configuration values should be treated as incorrect and replaced with default values.
24
+ For example, if the default for boolean ``custom_ol_var`` is False, any non-true value provided:
25
+ ``"asdf"``, ``12345``, ``{"key": 1}`` or empty string, will result in False being used.
26
+
27
+ By using default values for invalid configuration values, we ensure that the configurations are handled
28
+ safely, preventing potential runtime errors due to conversion issues.
29
+ """
17
30
 
18
31
  from __future__ import annotations
19
32
 
@@ -26,6 +39,17 @@ from airflow.configuration import conf
26
39
  _CONFIG_SECTION = "openlineage"
27
40
 
28
41
 
42
+ def _is_true(arg: Any) -> bool:
43
+ return str(arg).lower().strip() in ("true", "1", "t")
44
+
45
+
46
+ def _safe_int_convert(arg: Any, default: int) -> int:
47
+ try:
48
+ return int(arg)
49
+ except (ValueError, TypeError):
50
+ return default
51
+
52
+
29
53
  @cache
30
54
  def config_path(check_legacy_env_var: bool = True) -> str:
31
55
  """[openlineage] config_path."""
@@ -41,7 +65,8 @@ def is_source_enabled() -> bool:
41
65
  option = conf.get(_CONFIG_SECTION, "disable_source_code", fallback="")
42
66
  if not option:
43
67
  option = os.getenv("OPENLINEAGE_AIRFLOW_DISABLE_SOURCE_CODE", "")
44
- return option.lower() not in ("true", "1", "t")
68
+ # when disable_source_code is True, is_source_enabled() should be False
69
+ return not _is_true(option)
45
70
 
46
71
 
47
72
  @cache
@@ -53,7 +78,9 @@ def disabled_operators() -> set[str]:
53
78
 
54
79
  @cache
55
80
  def selective_enable() -> bool:
56
- return conf.getboolean(_CONFIG_SECTION, "selective_enable", fallback=False)
81
+ """[openlineage] selective_enable."""
82
+ option = conf.get(_CONFIG_SECTION, "selective_enable", fallback="")
83
+ return _is_true(option)
57
84
 
58
85
 
59
86
  @cache
@@ -85,11 +112,7 @@ def transport() -> dict[str, Any]:
85
112
 
86
113
  @cache
87
114
  def is_disabled() -> bool:
88
- """[openlineage] disabled + some extra checks."""
89
-
90
- def _is_true(val):
91
- return str(val).lower().strip() in ("true", "1", "t")
92
-
115
+ """[openlineage] disabled + check if any configuration is present."""
93
116
  option = conf.get(_CONFIG_SECTION, "disabled", fallback="")
94
117
  if _is_true(option):
95
118
  return True
@@ -97,7 +120,13 @@ def is_disabled() -> bool:
97
120
  option = os.getenv("OPENLINEAGE_DISABLED", "")
98
121
  if _is_true(option):
99
122
  return True
100
-
101
123
  # Check if both 'transport' and 'config_path' are not present and also
102
124
  # if legacy 'OPENLINEAGE_URL' environment variables is not set
103
125
  return transport() == {} and config_path(True) == "" and os.getenv("OPENLINEAGE_URL", "") == ""
126
+
127
+
128
+ @cache
129
+ def dag_state_change_process_pool_size() -> int:
130
+ """[openlineage] dag_state_change_process_pool_size."""
131
+ option = conf.get(_CONFIG_SECTION, "dag_state_change_process_pool_size", fallback="")
132
+ return _safe_int_convert(str(option).strip(), default=1)
@@ -87,6 +87,9 @@ class DefaultExtractor(BaseExtractor):
87
87
  def _execute_extraction(self) -> OperatorLineage | None:
88
88
  # OpenLineage methods are optional - if there's no method, return None
89
89
  try:
90
+ self.log.debug(
91
+ "Trying to execute `get_openlineage_facets_on_start` for %s.", self.operator.task_type
92
+ )
90
93
  return self._get_openlineage_facets(self.operator.get_openlineage_facets_on_start) # type: ignore
91
94
  except ImportError:
92
95
  self.log.error(
@@ -105,9 +108,13 @@ class DefaultExtractor(BaseExtractor):
105
108
  if task_instance.state == TaskInstanceState.FAILED:
106
109
  on_failed = getattr(self.operator, "get_openlineage_facets_on_failure", None)
107
110
  if on_failed and callable(on_failed):
111
+ self.log.debug(
112
+ "Executing `get_openlineage_facets_on_failure` for %s.", self.operator.task_type
113
+ )
108
114
  return self._get_openlineage_facets(on_failed, task_instance)
109
115
  on_complete = getattr(self.operator, "get_openlineage_facets_on_complete", None)
110
116
  if on_complete and callable(on_complete):
117
+ self.log.debug("Executing `get_openlineage_facets_on_complete` for %s.", self.operator.task_type)
111
118
  return self._get_openlineage_facets(on_complete, task_instance)
112
119
  return self.extract()
113
120
 
@@ -53,6 +53,10 @@ class BashExtractor(BaseExtractor):
53
53
  source=self.operator.bash_command,
54
54
  )
55
55
  }
56
+ else:
57
+ self.log.debug(
58
+ "OpenLineage disable_source_code option is on - no source code is extracted.",
59
+ )
56
60
 
57
61
  return OperatorLineage(
58
62
  job_facets=job_facets,
@@ -16,7 +16,6 @@
16
16
  # under the License.
17
17
  from __future__ import annotations
18
18
 
19
- from contextlib import suppress
20
19
  from typing import TYPE_CHECKING, Iterator
21
20
 
22
21
  from airflow.providers.openlineage import conf
@@ -24,9 +23,11 @@ from airflow.providers.openlineage.extractors import BaseExtractor, OperatorLine
24
23
  from airflow.providers.openlineage.extractors.base import DefaultExtractor
25
24
  from airflow.providers.openlineage.extractors.bash import BashExtractor
26
25
  from airflow.providers.openlineage.extractors.python import PythonExtractor
27
- from airflow.providers.openlineage.utils.utils import get_unknown_source_attribute_run_facet
26
+ from airflow.providers.openlineage.utils.utils import (
27
+ get_unknown_source_attribute_run_facet,
28
+ try_import_from_string,
29
+ )
28
30
  from airflow.utils.log.logging_mixin import LoggingMixin
29
- from airflow.utils.module_loading import import_string
30
31
 
31
32
  if TYPE_CHECKING:
32
33
  from openlineage.client.run import Dataset
@@ -35,11 +36,6 @@ if TYPE_CHECKING:
35
36
  from airflow.models import Operator
36
37
 
37
38
 
38
- def try_import_from_string(string):
39
- with suppress(ImportError):
40
- return import_string(string)
41
-
42
-
43
39
  def _iter_extractor_types() -> Iterator[type[BaseExtractor]]:
44
40
  if PythonExtractor is not None:
45
41
  yield PythonExtractor
@@ -61,16 +57,27 @@ class ExtractorManager(LoggingMixin):
61
57
  self.extractors[operator_class] = extractor
62
58
 
63
59
  for extractor_path in conf.custom_extractors():
64
- extractor: type[BaseExtractor] = try_import_from_string(extractor_path)
60
+ extractor: type[BaseExtractor] | None = try_import_from_string(extractor_path)
61
+ if not extractor:
62
+ self.log.warning(
63
+ "OpenLineage is unable to import custom extractor `%s`; will ignore it.", extractor_path
64
+ )
65
+ continue
65
66
  for operator_class in extractor.get_operator_classnames():
66
67
  if operator_class in self.extractors:
67
- self.log.debug(
68
- "Duplicate extractor found for `%s`. `%s` will be used instead of `%s`",
68
+ self.log.warning(
69
+ "Duplicate OpenLineage custom extractor found for `%s`. "
70
+ "`%s` will be used instead of `%s`",
69
71
  operator_class,
70
72
  extractor_path,
71
73
  self.extractors[operator_class],
72
74
  )
73
75
  self.extractors[operator_class] = extractor
76
+ self.log.debug(
77
+ "Registered custom OpenLineage extractor `%s` for class `%s`",
78
+ extractor_path,
79
+ operator_class,
80
+ )
74
81
 
75
82
  def add_extractor(self, operator_class: str, extractor: type[BaseExtractor]):
76
83
  self.extractors[operator_class] = extractor
@@ -57,6 +57,11 @@ class PythonExtractor(BaseExtractor):
57
57
  source=source_code,
58
58
  )
59
59
  }
60
+ else:
61
+ self.log.debug(
62
+ "OpenLineage disable_source_code option is on - no source code is extracted.",
63
+ )
64
+
60
65
  return OperatorLineage(
61
66
  job_facets=job_facet,
62
67
  # The PythonOperator is recorded as an "unknownSource" even though we have an extractor,
@@ -28,8 +28,9 @@ def get_provider_info():
28
28
  "name": "OpenLineage Airflow",
29
29
  "description": "`OpenLineage <https://openlineage.io/>`__\n",
30
30
  "state": "ready",
31
- "source-date-epoch": 1714477058,
31
+ "source-date-epoch": 1715684338,
32
32
  "versions": [
33
+ "1.8.0",
33
34
  "1.7.1",
34
35
  "1.7.0",
35
36
  "1.6.0",
@@ -121,11 +122,18 @@ def get_provider_info():
121
122
  },
122
123
  "disable_source_code": {
123
124
  "description": "Disable the inclusion of source code in OpenLineage events by setting this to `true`.\nBy default, several Operators (e.g. Python, Bash) will include their source code in the events\nunless disabled.\n",
124
- "default": None,
125
+ "default": "False",
125
126
  "example": None,
126
127
  "type": "boolean",
127
128
  "version_added": None,
128
129
  },
130
+ "dag_state_change_process_pool_size": {
131
+ "description": "Number of processes to utilize for processing DAG state changes\nin an asynchronous manner within the scheduler process.\n",
132
+ "default": "1",
133
+ "example": None,
134
+ "type": "integer",
135
+ "version_added": "1.8.0",
136
+ },
129
137
  },
130
138
  }
131
139
  },
@@ -16,6 +16,7 @@
16
16
  # under the License.
17
17
  from __future__ import annotations
18
18
 
19
+ import traceback
19
20
  import uuid
20
21
  from contextlib import ExitStack
21
22
  from typing import TYPE_CHECKING
@@ -73,8 +74,16 @@ class OpenLineageAdapter(LoggingMixin):
73
74
  if not self._client:
74
75
  config = self.get_openlineage_config()
75
76
  if config:
77
+ self.log.debug(
78
+ "OpenLineage configuration found. Transport type: `%s`",
79
+ config.get("type", "no type provided"),
80
+ )
76
81
  self._client = OpenLineageClient.from_dict(config=config)
77
82
  else:
83
+ self.log.debug(
84
+ "OpenLineage configuration not found directly in Airflow. "
85
+ "Looking for legacy environment configuration. "
86
+ )
78
87
  self._client = OpenLineageClient.from_environment()
79
88
  return self._client
80
89
 
@@ -85,13 +94,19 @@ class OpenLineageAdapter(LoggingMixin):
85
94
  config = self._read_yaml_config(openlineage_config_path)
86
95
  if config:
87
96
  return config.get("transport", None)
97
+ self.log.debug("OpenLineage config file is empty: `%s`", openlineage_config_path)
98
+ else:
99
+ self.log.debug("OpenLineage config_path configuration not found.")
100
+
88
101
  # Second, try to get transport config
89
102
  transport_config = conf.transport()
90
103
  if not transport_config:
104
+ self.log.debug("OpenLineage transport configuration not found.")
91
105
  return None
92
106
  return transport_config
93
107
 
94
- def _read_yaml_config(self, path: str) -> dict | None:
108
+ @staticmethod
109
+ def _read_yaml_config(path: str) -> dict | None:
95
110
  with open(path) as config_file:
96
111
  return yaml.safe_load(config_file)
97
112
 
@@ -125,6 +140,7 @@ class OpenLineageAdapter(LoggingMixin):
125
140
  stack.enter_context(Stats.timer(f"ol.emit.attempts.{event_type}.{transport_type}"))
126
141
  stack.enter_context(Stats.timer("ol.emit.attempts"))
127
142
  self._client.emit(redacted_event)
143
+ self.log.debug("Successfully emitted OpenLineage event of id %s", event.run.runId)
128
144
  except Exception as e:
129
145
  Stats.incr("ol.emit.failed")
130
146
  self.log.warning("Failed to emit OpenLineage event of id %s", event.run.runId)
@@ -284,48 +300,66 @@ class OpenLineageAdapter(LoggingMixin):
284
300
  nominal_start_time: str,
285
301
  nominal_end_time: str,
286
302
  ):
287
- event = RunEvent(
288
- eventType=RunState.START,
289
- eventTime=dag_run.start_date.isoformat(),
290
- job=self._build_job(job_name=dag_run.dag_id, job_type=_JOB_TYPE_DAG),
291
- run=self._build_run(
292
- run_id=self.build_dag_run_id(dag_run.dag_id, dag_run.run_id),
293
- job_name=dag_run.dag_id,
294
- nominal_start_time=nominal_start_time,
295
- nominal_end_time=nominal_end_time,
296
- ),
297
- inputs=[],
298
- outputs=[],
299
- producer=_PRODUCER,
300
- )
301
- self.emit(event)
303
+ try:
304
+ event = RunEvent(
305
+ eventType=RunState.START,
306
+ eventTime=dag_run.start_date.isoformat(),
307
+ job=self._build_job(job_name=dag_run.dag_id, job_type=_JOB_TYPE_DAG),
308
+ run=self._build_run(
309
+ run_id=self.build_dag_run_id(dag_run.dag_id, dag_run.run_id),
310
+ job_name=dag_run.dag_id,
311
+ nominal_start_time=nominal_start_time,
312
+ nominal_end_time=nominal_end_time,
313
+ ),
314
+ inputs=[],
315
+ outputs=[],
316
+ producer=_PRODUCER,
317
+ )
318
+ self.emit(event)
319
+ except BaseException:
320
+ # Catch all exceptions to prevent ProcessPoolExecutor from silently swallowing them.
321
+ # This ensures that any unexpected exceptions are logged for debugging purposes.
322
+ # This part cannot be wrapped to deduplicate code, otherwise the method cannot be pickled in multiprocessing.
323
+ self.log.warning("Failed to emit DAG started event: \n %s", traceback.format_exc())
302
324
 
303
325
  def dag_success(self, dag_run: DagRun, msg: str):
304
- event = RunEvent(
305
- eventType=RunState.COMPLETE,
306
- eventTime=dag_run.end_date.isoformat(),
307
- job=self._build_job(job_name=dag_run.dag_id, job_type=_JOB_TYPE_DAG),
308
- run=Run(runId=self.build_dag_run_id(dag_run.dag_id, dag_run.run_id)),
309
- inputs=[],
310
- outputs=[],
311
- producer=_PRODUCER,
312
- )
313
- self.emit(event)
326
+ try:
327
+ event = RunEvent(
328
+ eventType=RunState.COMPLETE,
329
+ eventTime=dag_run.end_date.isoformat(),
330
+ job=self._build_job(job_name=dag_run.dag_id, job_type=_JOB_TYPE_DAG),
331
+ run=Run(runId=self.build_dag_run_id(dag_run.dag_id, dag_run.run_id)),
332
+ inputs=[],
333
+ outputs=[],
334
+ producer=_PRODUCER,
335
+ )
336
+ self.emit(event)
337
+ except BaseException:
338
+ # Catch all exceptions to prevent ProcessPoolExecutor from silently swallowing them.
339
+ # This ensures that any unexpected exceptions are logged for debugging purposes.
340
+ # This part cannot be wrapped to deduplicate code, otherwise the method cannot be pickled in multiprocessing.
341
+ self.log.warning("Failed to emit DAG success event: \n %s", traceback.format_exc())
314
342
 
315
343
  def dag_failed(self, dag_run: DagRun, msg: str):
316
- event = RunEvent(
317
- eventType=RunState.FAIL,
318
- eventTime=dag_run.end_date.isoformat(),
319
- job=self._build_job(job_name=dag_run.dag_id, job_type=_JOB_TYPE_DAG),
320
- run=Run(
321
- runId=self.build_dag_run_id(dag_run.dag_id, dag_run.run_id),
322
- facets={"errorMessage": ErrorMessageRunFacet(message=msg, programmingLanguage="python")},
323
- ),
324
- inputs=[],
325
- outputs=[],
326
- producer=_PRODUCER,
327
- )
328
- self.emit(event)
344
+ try:
345
+ event = RunEvent(
346
+ eventType=RunState.FAIL,
347
+ eventTime=dag_run.end_date.isoformat(),
348
+ job=self._build_job(job_name=dag_run.dag_id, job_type=_JOB_TYPE_DAG),
349
+ run=Run(
350
+ runId=self.build_dag_run_id(dag_run.dag_id, dag_run.run_id),
351
+ facets={"errorMessage": ErrorMessageRunFacet(message=msg, programmingLanguage="python")},
352
+ ),
353
+ inputs=[],
354
+ outputs=[],
355
+ producer=_PRODUCER,
356
+ )
357
+ self.emit(event)
358
+ except BaseException:
359
+ # Catch all exceptions to prevent ProcessPoolExecutor from silently swallowing them.
360
+ # This ensures that any unexpected exceptions are logged for debugging purposes.
361
+ # This part cannot be wrapped to deduplicate code, otherwise the method cannot be pickled in multiprocessing.
362
+ self.log.warning("Failed to emit DAG failed event: \n %s", traceback.format_exc())
329
363
 
330
364
  @staticmethod
331
365
  def _build_run(
@@ -17,13 +17,15 @@
17
17
  from __future__ import annotations
18
18
 
19
19
  import logging
20
- from concurrent.futures import ThreadPoolExecutor
20
+ from concurrent.futures import ProcessPoolExecutor
21
21
  from datetime import datetime
22
22
  from typing import TYPE_CHECKING
23
23
 
24
24
  from openlineage.client.serde import Serde
25
25
 
26
+ from airflow import __version__ as airflow_version, settings
26
27
  from airflow.listeners import hookimpl
28
+ from airflow.providers.openlineage import conf
27
29
  from airflow.providers.openlineage.extractors import ExtractorManager
28
30
  from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter, RunState
29
31
  from airflow.providers.openlineage.utils.utils import (
@@ -45,6 +47,16 @@ if TYPE_CHECKING:
45
47
  _openlineage_listener: OpenLineageListener | None = None
46
48
 
47
49
 
50
+ def _get_try_number_success(val):
51
+ # todo: remove when min airflow version >= 2.10.0
52
+ from packaging.version import parse
53
+
54
+ if parse(parse(airflow_version).base_version) < parse("2.10.0"):
55
+ return val.try_number - 1
56
+ else:
57
+ return val.try_number
58
+
59
+
48
60
  class OpenLineageListener:
49
61
  """OpenLineage listener sends events on task instance and dag run starts, completes and failures."""
50
62
 
@@ -78,13 +90,19 @@ class OpenLineageListener:
78
90
  dag = task.dag
79
91
  if is_operator_disabled(task):
80
92
  self.log.debug(
81
- "Skipping OpenLineage event emission for operator %s "
93
+ "Skipping OpenLineage event emission for operator `%s` "
82
94
  "due to its presence in [openlineage] disabled_for_operators.",
83
95
  task.task_type,
84
96
  )
85
- return None
97
+ return
86
98
 
87
99
  if not is_selective_lineage_enabled(task):
100
+ self.log.debug(
101
+ "Skipping OpenLineage event emission for task `%s` "
102
+ "due to lack of explicit lineage enablement for task or DAG while "
103
+ "[openlineage] selective_enable is on.",
104
+ task.task_id,
105
+ )
88
106
  return
89
107
 
90
108
  @print_warning(self.log)
@@ -146,15 +164,22 @@ class OpenLineageListener:
146
164
  if TYPE_CHECKING:
147
165
  assert task
148
166
  dag = task.dag
167
+
149
168
  if is_operator_disabled(task):
150
169
  self.log.debug(
151
- "Skipping OpenLineage event emission for operator %s "
170
+ "Skipping OpenLineage event emission for operator `%s` "
152
171
  "due to its presence in [openlineage] disabled_for_operators.",
153
172
  task.task_type,
154
173
  )
155
- return None
174
+ return
156
175
 
157
176
  if not is_selective_lineage_enabled(task):
177
+ self.log.debug(
178
+ "Skipping OpenLineage event emission for task `%s` "
179
+ "due to lack of explicit lineage enablement for task or DAG while "
180
+ "[openlineage] selective_enable is on.",
181
+ task.task_id,
182
+ )
158
183
  return
159
184
 
160
185
  @print_warning(self.log)
@@ -165,7 +190,7 @@ class OpenLineageListener:
165
190
  dag_id=dag.dag_id,
166
191
  task_id=task.task_id,
167
192
  execution_date=task_instance.execution_date,
168
- try_number=task_instance.try_number - 1,
193
+ try_number=_get_try_number_success(task_instance),
169
194
  )
170
195
  event_type = RunState.COMPLETE.value.lower()
171
196
  operator_name = task.task_type.lower()
@@ -201,15 +226,22 @@ class OpenLineageListener:
201
226
  if TYPE_CHECKING:
202
227
  assert task
203
228
  dag = task.dag
229
+
204
230
  if is_operator_disabled(task):
205
231
  self.log.debug(
206
- "Skipping OpenLineage event emission for operator %s "
232
+ "Skipping OpenLineage event emission for operator `%s` "
207
233
  "due to its presence in [openlineage] disabled_for_operators.",
208
234
  task.task_type,
209
235
  )
210
- return None
236
+ return
211
237
 
212
238
  if not is_selective_lineage_enabled(task):
239
+ self.log.debug(
240
+ "Skipping OpenLineage event emission for task `%s` "
241
+ "due to lack of explicit lineage enablement for task or DAG while "
242
+ "[openlineage] selective_enable is on.",
243
+ task.task_id,
244
+ )
213
245
  return
214
246
 
215
247
  @print_warning(self.log)
@@ -249,8 +281,16 @@ class OpenLineageListener:
249
281
 
250
282
  @property
251
283
  def executor(self):
284
+ def initializer():
285
+ # Re-configure the ORM engine as there are issues with multiple processes
286
+ # if process calls Airflow DB.
287
+ settings.configure_orm()
288
+
252
289
  if not self._executor:
253
- self._executor = ThreadPoolExecutor(max_workers=8, thread_name_prefix="openlineage_")
290
+ self._executor = ProcessPoolExecutor(
291
+ max_workers=conf.dag_state_change_process_pool_size(),
292
+ initializer=initializer,
293
+ )
254
294
  return self._executor
255
295
 
256
296
  @hookimpl
@@ -266,7 +306,18 @@ class OpenLineageListener:
266
306
  @hookimpl
267
307
  def on_dag_run_running(self, dag_run: DagRun, msg: str):
268
308
  if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
309
+ self.log.debug(
310
+ "Skipping OpenLineage event emission for DAG `%s` "
311
+ "due to lack of explicit lineage enablement for DAG while "
312
+ "[openlineage] selective_enable is on.",
313
+ dag_run.dag_id,
314
+ )
315
+ return
316
+
317
+ if not self.executor:
318
+ self.log.debug("Executor have not started before `on_dag_run_running`")
269
319
  return
320
+
270
321
  data_interval_start = dag_run.data_interval_start.isoformat() if dag_run.data_interval_start else None
271
322
  data_interval_end = dag_run.data_interval_end.isoformat() if dag_run.data_interval_end else None
272
323
  self.executor.submit(
@@ -280,19 +331,35 @@ class OpenLineageListener:
280
331
  @hookimpl
281
332
  def on_dag_run_success(self, dag_run: DagRun, msg: str):
282
333
  if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
334
+ self.log.debug(
335
+ "Skipping OpenLineage event emission for DAG `%s` "
336
+ "due to lack of explicit lineage enablement for DAG while "
337
+ "[openlineage] selective_enable is on.",
338
+ dag_run.dag_id,
339
+ )
283
340
  return
341
+
284
342
  if not self.executor:
285
343
  self.log.debug("Executor have not started before `on_dag_run_success`")
286
344
  return
345
+
287
346
  self.executor.submit(self.adapter.dag_success, dag_run=dag_run, msg=msg)
288
347
 
289
348
  @hookimpl
290
349
  def on_dag_run_failed(self, dag_run: DagRun, msg: str):
291
350
  if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
351
+ self.log.debug(
352
+ "Skipping OpenLineage event emission for DAG `%s` "
353
+ "due to lack of explicit lineage enablement for DAG while "
354
+ "[openlineage] selective_enable is on.",
355
+ dag_run.dag_id,
356
+ )
292
357
  return
358
+
293
359
  if not self.executor:
294
360
  self.log.debug("Executor have not started before `on_dag_run_failed`")
295
361
  return
362
+
296
363
  self.executor.submit(self.adapter.dag_failed, dag_run=dag_run, msg=msg)
297
364
 
298
365
 
@@ -39,3 +39,6 @@ class OpenLineageProviderPlugin(AirflowPlugin):
39
39
  if not conf.is_disabled():
40
40
  macros = [lineage_job_namespace, lineage_job_name, lineage_run_id, lineage_parent_id]
41
41
  listeners = [get_openlineage_listener()]
42
+ else:
43
+ macros = []
44
+ listeners = []
@@ -25,8 +25,10 @@ from functools import wraps
25
25
  from typing import TYPE_CHECKING, Any, Iterable
26
26
 
27
27
  import attrs
28
- from openlineage.client.utils import RedactMixin # TODO: move this maybe to Airflow's logic?
28
+ from deprecated import deprecated
29
+ from openlineage.client.utils import RedactMixin
29
30
 
31
+ from airflow.exceptions import AirflowProviderDeprecationWarning # TODO: move this maybe to Airflow's logic?
30
32
  from airflow.models import DAG, BaseOperator, MappedOperator
31
33
  from airflow.providers.openlineage import conf
32
34
  from airflow.providers.openlineage.plugins.facets import (
@@ -41,6 +43,7 @@ from airflow.providers.openlineage.utils.selective_enable import (
41
43
  )
42
44
  from airflow.utils.context import AirflowContextDeprecationWarning
43
45
  from airflow.utils.log.secrets_masker import Redactable, Redacted, SecretsMasker, should_hide_value_for_key
46
+ from airflow.utils.module_loading import import_string
44
47
 
45
48
  if TYPE_CHECKING:
46
49
  from airflow.models import DagRun, TaskInstance
@@ -50,6 +53,11 @@ log = logging.getLogger(__name__)
50
53
  _NOMINAL_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
51
54
 
52
55
 
56
+ def try_import_from_string(string: str) -> Any:
57
+ with suppress(ImportError):
58
+ return import_string(string)
59
+
60
+
53
61
  def get_operator_class(task: BaseOperator) -> type:
54
62
  if task.__class__.__name__ in ("DecoratedMappedOperator", "MappedOperator"):
55
63
  return task.operator_class
@@ -367,6 +375,9 @@ def print_warning(log):
367
375
  try:
368
376
  return f(*args, **kwargs)
369
377
  except Exception as e:
378
+ log.warning(
379
+ "Note: exception below is being caught: it's printed for visibility. However OpenLineage events aren't being emitted. If you see that, task has completed successfully despite not getting OL events."
380
+ )
370
381
  log.warning(e)
371
382
 
372
383
  return wrapper
@@ -379,6 +390,13 @@ def get_filtered_unknown_operator_keys(operator: BaseOperator) -> dict:
379
390
  return {attr: value for attr, value in operator.__dict__.items() if attr not in not_required_keys}
380
391
 
381
392
 
393
+ @deprecated(
394
+ reason=(
395
+ "`airflow.providers.openlineage.utils.utils.normalize_sql` "
396
+ "has been deprecated and will be removed in future"
397
+ ),
398
+ category=AirflowProviderDeprecationWarning,
399
+ )
382
400
  def normalize_sql(sql: str | Iterable[str]):
383
401
  if isinstance(sql, str):
384
402
  sql = [stmt for stmt in sql.split(";") if stmt != ""]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-openlineage
3
- Version: 1.7.1rc1
3
+ Version: 1.8.0
4
4
  Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
5
5
  Keywords: airflow-provider,openlineage,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -21,15 +21,15 @@ Classifier: Programming Language :: Python :: 3.10
21
21
  Classifier: Programming Language :: Python :: 3.11
22
22
  Classifier: Programming Language :: Python :: 3.12
23
23
  Classifier: Topic :: System :: Monitoring
24
- Requires-Dist: apache-airflow-providers-common-sql>=1.6.0rc0
25
- Requires-Dist: apache-airflow>=2.7.0rc0
24
+ Requires-Dist: apache-airflow-providers-common-sql>=1.6.0
25
+ Requires-Dist: apache-airflow>=2.7.0
26
26
  Requires-Dist: attrs>=22.2
27
27
  Requires-Dist: openlineage-integration-common>=0.28.0
28
28
  Requires-Dist: openlineage-python>=0.28.0
29
29
  Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
30
30
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
31
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.1/changelog.html
32
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.1
31
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.8.0/changelog.html
32
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.8.0
33
33
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
34
34
  Project-URL: Source Code, https://github.com/apache/airflow
35
35
  Project-URL: Twitter, https://twitter.com/ApacheAirflow
@@ -80,7 +80,7 @@ Provides-Extra: common.sql
80
80
 
81
81
  Package ``apache-airflow-providers-openlineage``
82
82
 
83
- Release: ``1.7.1.rc1``
83
+ Release: ``1.8.0``
84
84
 
85
85
 
86
86
  `OpenLineage <https://openlineage.io/>`__
@@ -93,7 +93,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
93
93
  are in ``airflow.providers.openlineage`` python package.
94
94
 
95
95
  You can find package information and changelog for the provider
96
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.1/>`_.
96
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.8.0/>`_.
97
97
 
98
98
  Installation
99
99
  ------------
@@ -137,4 +137,4 @@ Dependent package
137
137
  ============================================================================================================ ==============
138
138
 
139
139
  The changelog for the provider package can be found in the
140
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.1/changelog.html>`_.
140
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.8.0/changelog.html>`_.
@@ -0,0 +1,24 @@
1
+ airflow/providers/openlineage/LICENSE,sha256=ywUBpKZc7Jb96rVt5I3IDbg7dIJAbUSHkuoDcF3jbH4,13569
2
+ airflow/providers/openlineage/__init__.py,sha256=Y_3EiIS_TiqaVpc68HfekILHQRlsSGQLhs72joO7THg,1498
3
+ airflow/providers/openlineage/conf.py,sha256=wozXzU5Do9S0mtjjGc5ruF556G2-ZT4GJa3YLT_-Phg,4693
4
+ airflow/providers/openlineage/get_provider_info.py,sha256=ypUFlQXsC6s-cA7OdslwUaxhjaYIslbP_OILdb9CVNQ,7072
5
+ airflow/providers/openlineage/sqlparser.py,sha256=-FGWWK0Xu6XkGSXcfn7PXsWIe0Y0fwe-3hivHg7emLA,15308
6
+ airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
7
+ airflow/providers/openlineage/extractors/base.py,sha256=o6z8bXwNor1hwcUzezJ8LIPynR_BqXkP-qtwEgLtD2Q,5476
8
+ airflow/providers/openlineage/extractors/bash.py,sha256=m4hLvDV4-zX4gp8apRuhpAR3Uakr8UOUxf-thTWmOxw,2563
9
+ airflow/providers/openlineage/extractors/manager.py,sha256=9TyszMLAsgPS9NETWq7fPJjxcbTFk47x4kd1NRGCvsw,10315
10
+ airflow/providers/openlineage/extractors/python.py,sha256=EQXCj2aHr2XXw0pNxeX-ii8UQFCoqkdf40ozqmA5d58,3151
11
+ airflow/providers/openlineage/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
12
+ airflow/providers/openlineage/plugins/adapter.py,sha256=d_HczG_nXMwAHdGD-CO4baVfCc9ROcY0zQUWIZRF_Sw,16917
13
+ airflow/providers/openlineage/plugins/facets.py,sha256=Z6dsz0rv-3VbRaGZJxW_T7Dak2k0ROGF0YrR_1awxZ0,2644
14
+ airflow/providers/openlineage/plugins/listener.py,sha256=LrqvvGL4nzMNeha6PVbtylxp8gru3iKrkQltuQD2k8o,14092
15
+ airflow/providers/openlineage/plugins/macros.py,sha256=QowPc9cc_unV-NLxBwm32OmWETA9pOucWguSeK92SSc,3076
16
+ airflow/providers/openlineage/plugins/openlineage.py,sha256=rsRUW_zpXVAglzsgQRv5T9VWYY7CMQl0qRWm8-3oqDA,1678
17
+ airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
18
+ airflow/providers/openlineage/utils/selective_enable.py,sha256=JVTmXdQknBL-9N0drFDkVMf1HCf8C6nbITVaP4-5ba4,3072
19
+ airflow/providers/openlineage/utils/sql.py,sha256=7tEK0zVfIe7v3NI6oyv62x0KAS3sl8Ajfhqob8MdiX8,9366
20
+ airflow/providers/openlineage/utils/utils.py,sha256=WFMdRsuArqqrgjsmFGb_ljIWV6ry-EGkTPZzy5aiG4Q,14036
21
+ apache_airflow_providers_openlineage-1.8.0.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
22
+ apache_airflow_providers_openlineage-1.8.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
23
+ apache_airflow_providers_openlineage-1.8.0.dist-info/METADATA,sha256=cpXm9kSCT2wFTHbw9s6ZOBuNYq02RMhRRyrZMoTXXqY,6368
24
+ apache_airflow_providers_openlineage-1.8.0.dist-info/RECORD,,
@@ -1,24 +0,0 @@
1
- airflow/providers/openlineage/LICENSE,sha256=ywUBpKZc7Jb96rVt5I3IDbg7dIJAbUSHkuoDcF3jbH4,13569
2
- airflow/providers/openlineage/__init__.py,sha256=Xc1UFto3ZKaoNfnpJpkK8cxkSAtXxGbxhpZ624JXDko,1586
3
- airflow/providers/openlineage/conf.py,sha256=Y76TUM_YwQtn-_081wQicZPTe_8bXH8jNafx-GACgeo,3398
4
- airflow/providers/openlineage/get_provider_info.py,sha256=a_A_2VPU8M2vHZ5CX0_8yUawYEiFieMa3g5tmi3pEnU,6626
5
- airflow/providers/openlineage/sqlparser.py,sha256=-FGWWK0Xu6XkGSXcfn7PXsWIe0Y0fwe-3hivHg7emLA,15308
6
- airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
7
- airflow/providers/openlineage/extractors/base.py,sha256=sj2KS23ocX7LAbkDiR53otkFg1qqEg41PyBivdc-kyM,5070
8
- airflow/providers/openlineage/extractors/bash.py,sha256=lE7BH9vipRg9jGloPIE6y6wcHw_BbTvGBasfa4PfDBc,2412
9
- airflow/providers/openlineage/extractors/manager.py,sha256=wrhsculNW8Pj3BKofT3wDkCqiOFKXmxi-nBd9AifTh4,9996
10
- airflow/providers/openlineage/extractors/python.py,sha256=1iCC4_Due500ulkHmc_qvt8JGzxxb6suLeYS2FRYDlc,2999
11
- airflow/providers/openlineage/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
12
- airflow/providers/openlineage/plugins/adapter.py,sha256=YcVf_mC6ZTJEb3soRMzb96fypmWZB19M07-mn5Nh-Gc,14634
13
- airflow/providers/openlineage/plugins/facets.py,sha256=Z6dsz0rv-3VbRaGZJxW_T7Dak2k0ROGF0YrR_1awxZ0,2644
14
- airflow/providers/openlineage/plugins/listener.py,sha256=lw9IGw_JL_EP_FEVfKmR8JJuDz8xyQFCibgQF2kdHOg,11624
15
- airflow/providers/openlineage/plugins/macros.py,sha256=QowPc9cc_unV-NLxBwm32OmWETA9pOucWguSeK92SSc,3076
16
- airflow/providers/openlineage/plugins/openlineage.py,sha256=Owlbpp8puiww-4Wh6B46vYK2vLoGVK48qOW7RyZF188,1625
17
- airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
18
- airflow/providers/openlineage/utils/selective_enable.py,sha256=JVTmXdQknBL-9N0drFDkVMf1HCf8C6nbITVaP4-5ba4,3072
19
- airflow/providers/openlineage/utils/sql.py,sha256=7tEK0zVfIe7v3NI6oyv62x0KAS3sl8Ajfhqob8MdiX8,9366
20
- airflow/providers/openlineage/utils/utils.py,sha256=duT_rXHQuVFUIbMCplGGw0OI0RN0DAXU8oo8FaqcREg,13285
21
- apache_airflow_providers_openlineage-1.7.1rc1.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
22
- apache_airflow_providers_openlineage-1.7.1rc1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
23
- apache_airflow_providers_openlineage-1.7.1rc1.dist-info/METADATA,sha256=5ofVoQHgZeqImH1WQSQuXgyfxHld7oS782eWWbA5n-Q,6381
24
- apache_airflow_providers_openlineage-1.7.1rc1.dist-info/RECORD,,