apache-airflow-providers-openlineage 2.5.0__py3-none-any.whl → 2.6.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.

@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "2.5.0"
32
+ __version__ = "2.6.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
35
  "2.10.0"
@@ -136,9 +136,17 @@ def is_disabled() -> bool:
136
136
  if _is_true(os.getenv("OPENLINEAGE_DISABLED", "")): # Check legacy variable
137
137
  return True
138
138
 
139
- # Check if both 'transport' and 'config_path' are not present and also
140
- # if legacy 'OPENLINEAGE_URL' environment variables is not set
141
- return transport() == {} and config_path(True) == "" and os.getenv("OPENLINEAGE_URL", "") == ""
139
+ if transport(): # Check if transport is present
140
+ return False
141
+ if config_path(True): # Check if config file is present
142
+ return False
143
+ if os.getenv("OPENLINEAGE_URL"): # Check if url simple env var is present
144
+ return False
145
+ # Check if any transport configuration env var is present
146
+ if any(k.startswith("OPENLINEAGE__TRANSPORT") and v for k, v in os.environ.items()):
147
+ return False
148
+
149
+ return True # No transport configuration is present, we can disable OpenLineage
142
150
 
143
151
 
144
152
  @cache
@@ -59,7 +59,7 @@ class BaseExtractor(ABC, LoggingMixin):
59
59
 
60
60
  _allowed_query_params: list[str] = []
61
61
 
62
- def __init__(self, operator): # type: ignore
62
+ def __init__(self, operator):
63
63
  super().__init__()
64
64
  self.operator = operator
65
65
 
@@ -34,6 +34,12 @@ def get_provider_info():
34
34
  "tags": ["protocol"],
35
35
  }
36
36
  ],
37
+ "operators": [
38
+ {
39
+ "integration-name": "OpenLineage",
40
+ "python-modules": ["airflow.providers.openlineage.operators.empty"],
41
+ }
42
+ ],
37
43
  "plugins": [
38
44
  {
39
45
  "name": "openlineage",
@@ -0,0 +1,16 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
@@ -0,0 +1,48 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ from __future__ import annotations
18
+
19
+ from typing import TYPE_CHECKING
20
+
21
+ from airflow.providers.openlineage.extractors.base import OperatorLineage
22
+ from airflow.providers.openlineage.version_compat import BaseOperator
23
+
24
+ if TYPE_CHECKING:
25
+ from airflow.sdk.definitions.context import Context
26
+
27
+
28
+ class EmptyOperator(BaseOperator):
29
+ """
30
+ Operator that does literally nothing.
31
+
32
+ It can be used to group tasks in a DAG.
33
+ The task is evaluated by the scheduler but never processed by the executor.
34
+ """
35
+
36
+ ui_color = "#e8f7e4"
37
+
38
+ def execute(self, context: Context):
39
+ pass
40
+
41
+ def get_openlineage_facets_on_start(self) -> OperatorLineage:
42
+ return OperatorLineage()
43
+
44
+ def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage:
45
+ return OperatorLineage()
46
+
47
+ def get_openlineage_facets_on_failure(self, task_instance) -> OperatorLineage:
48
+ return OperatorLineage()
@@ -90,7 +90,7 @@ class OpenLineageAdapter(LoggingMixin):
90
90
  "OpenLineage configuration found. Transport type: `%s`",
91
91
  config.get("transport", {}).get("type", "no type provided"),
92
92
  )
93
- self._client = OpenLineageClient(config=config) # type: ignore[call-arg]
93
+ self._client = OpenLineageClient(config=config)
94
94
  else:
95
95
  self.log.debug(
96
96
  "OpenLineage configuration not found directly in Airflow. "
@@ -182,13 +182,14 @@ class OpenLineageAdapter(LoggingMixin):
182
182
  self,
183
183
  run_id: str,
184
184
  job_name: str,
185
- job_description: str,
186
185
  event_time: str,
186
+ job_description: str | None,
187
187
  nominal_start_time: str | None,
188
188
  nominal_end_time: str | None,
189
189
  owners: list[str] | None,
190
190
  tags: list[str] | None,
191
191
  task: OperatorLineage | None,
192
+ job_description_type: str | None = None,
192
193
  run_facets: dict[str, RunFacet] | None = None,
193
194
  ) -> RunEvent:
194
195
  """
@@ -196,7 +197,8 @@ class OpenLineageAdapter(LoggingMixin):
196
197
 
197
198
  :param run_id: globally unique identifier of task in dag run
198
199
  :param job_name: globally unique identifier of task in dag
199
- :param job_description: user provided description of job
200
+ :param job_description: description of the job
201
+ :param job_description_type: MIME type of the description arg content
200
202
  :param event_time:
201
203
  :param nominal_start_time: scheduled time of dag run
202
204
  :param nominal_end_time: following schedule of dag run
@@ -221,6 +223,7 @@ class OpenLineageAdapter(LoggingMixin):
221
223
  job_name=job_name,
222
224
  job_type=_JOB_TYPE_TASK,
223
225
  job_description=job_description,
226
+ job_description_type=job_description_type,
224
227
  job_owners=owners,
225
228
  job_tags=tags,
226
229
  job_facets=task.job_facets if task else None,
@@ -241,6 +244,8 @@ class OpenLineageAdapter(LoggingMixin):
241
244
  nominal_end_time: str | None,
242
245
  owners: list[str] | None,
243
246
  tags: list[str] | None,
247
+ job_description: str | None,
248
+ job_description_type: str | None = None,
244
249
  run_facets: dict[str, RunFacet] | None = None,
245
250
  ) -> RunEvent:
246
251
  """
@@ -249,6 +254,8 @@ class OpenLineageAdapter(LoggingMixin):
249
254
  :param run_id: globally unique identifier of task in dag run
250
255
  :param job_name: globally unique identifier of task between dags
251
256
  :param end_time: time of task completion
257
+ :param job_description: description of the job
258
+ :param job_description_type: MIME type of the description arg content
252
259
  :param tags: list of tags
253
260
  :param nominal_start_time: scheduled time of dag run
254
261
  :param nominal_end_time: following schedule of dag run
@@ -274,6 +281,8 @@ class OpenLineageAdapter(LoggingMixin):
274
281
  job_facets=task.job_facets,
275
282
  job_owners=owners,
276
283
  job_tags=tags,
284
+ job_description=job_description,
285
+ job_description_type=job_description_type,
277
286
  ),
278
287
  inputs=task.inputs,
279
288
  outputs=task.outputs,
@@ -291,6 +300,8 @@ class OpenLineageAdapter(LoggingMixin):
291
300
  nominal_end_time: str | None,
292
301
  owners: list[str] | None,
293
302
  tags: list[str] | None,
303
+ job_description: str | None,
304
+ job_description_type: str | None = None,
294
305
  error: str | BaseException | None = None,
295
306
  run_facets: dict[str, RunFacet] | None = None,
296
307
  ) -> RunEvent:
@@ -301,6 +312,8 @@ class OpenLineageAdapter(LoggingMixin):
301
312
  :param job_name: globally unique identifier of task between dags
302
313
  :param end_time: time of task completion
303
314
  :param task: metadata container with information extracted from operator
315
+ :param job_description: description of the job
316
+ :param job_description_type: MIME type of the description arg content
304
317
  :param run_facets: custom run facets
305
318
  :param tags: list of tags
306
319
  :param nominal_start_time: scheduled time of dag run
@@ -338,6 +351,8 @@ class OpenLineageAdapter(LoggingMixin):
338
351
  job_facets=task.job_facets,
339
352
  job_owners=owners,
340
353
  job_tags=tags,
354
+ job_description=job_description,
355
+ job_description_type=job_description_type,
341
356
  ),
342
357
  inputs=task.inputs,
343
358
  outputs=task.outputs,
@@ -356,7 +371,8 @@ class OpenLineageAdapter(LoggingMixin):
356
371
  tags: list[str],
357
372
  run_facets: dict[str, RunFacet],
358
373
  clear_number: int,
359
- description: str | None = None,
374
+ job_description: str | None,
375
+ job_description_type: str | None = None,
360
376
  job_facets: dict[str, JobFacet] | None = None, # Custom job facets
361
377
  ):
362
378
  try:
@@ -366,7 +382,8 @@ class OpenLineageAdapter(LoggingMixin):
366
382
  job=self._build_job(
367
383
  job_name=dag_id,
368
384
  job_type=_JOB_TYPE_DAG,
369
- job_description=description,
385
+ job_description=job_description,
386
+ job_description_type=job_description_type,
370
387
  job_owners=owners,
371
388
  job_facets=job_facets,
372
389
  job_tags=tags,
@@ -404,6 +421,8 @@ class OpenLineageAdapter(LoggingMixin):
404
421
  task_ids: list[str],
405
422
  owners: list[str] | None,
406
423
  run_facets: dict[str, RunFacet],
424
+ job_description: str | None,
425
+ job_description_type: str | None = None,
407
426
  ):
408
427
  try:
409
428
  event = RunEvent(
@@ -414,6 +433,8 @@ class OpenLineageAdapter(LoggingMixin):
414
433
  job_type=_JOB_TYPE_DAG,
415
434
  job_owners=owners,
416
435
  job_tags=tags,
436
+ job_description=job_description,
437
+ job_description_type=job_description_type,
417
438
  ),
418
439
  run=self._build_run(
419
440
  run_id=self.build_dag_run_id(
@@ -453,6 +474,8 @@ class OpenLineageAdapter(LoggingMixin):
453
474
  owners: list[str] | None,
454
475
  msg: str,
455
476
  run_facets: dict[str, RunFacet],
477
+ job_description: str | None,
478
+ job_description_type: str | None = None,
456
479
  ):
457
480
  try:
458
481
  event = RunEvent(
@@ -463,6 +486,8 @@ class OpenLineageAdapter(LoggingMixin):
463
486
  job_type=_JOB_TYPE_DAG,
464
487
  job_owners=owners,
465
488
  job_tags=tags,
489
+ job_description=job_description,
490
+ job_description_type=job_description_type,
466
491
  ),
467
492
  run=self._build_run(
468
493
  run_id=self.build_dag_run_id(
@@ -519,6 +544,7 @@ class OpenLineageAdapter(LoggingMixin):
519
544
  job_name: str,
520
545
  job_type: Literal["DAG", "TASK"],
521
546
  job_description: str | None = None,
547
+ job_description_type: str | None = None,
522
548
  job_owners: list[str] | None = None,
523
549
  job_tags: list[str] | None = None,
524
550
  job_facets: dict[str, JobFacet] | None = None,
@@ -530,7 +556,7 @@ class OpenLineageAdapter(LoggingMixin):
530
556
  facets.update(
531
557
  {
532
558
  "documentation": documentation_job.DocumentationJobFacet(
533
- description=job_description, producer=_PRODUCER
559
+ description=job_description, contentType=job_description_type, producer=_PRODUCER
534
560
  )
535
561
  }
536
562
  )
@@ -39,7 +39,9 @@ from airflow.providers.openlineage.utils.utils import (
39
39
  get_airflow_job_facet,
40
40
  get_airflow_mapped_task_facet,
41
41
  get_airflow_run_facet,
42
+ get_dag_documentation,
42
43
  get_job_name,
44
+ get_task_documentation,
43
45
  get_task_parent_run_facet,
44
46
  get_user_provided_run_facets,
45
47
  is_operator_disabled,
@@ -103,11 +105,11 @@ class OpenLineageListener:
103
105
  else:
104
106
 
105
107
  @hookimpl
106
- def on_task_instance_running(
108
+ def on_task_instance_running( # type: ignore[misc]
107
109
  self,
108
110
  previous_state: TaskInstanceState,
109
111
  task_instance: TaskInstance,
110
- session: Session, # type: ignore[valid-type]
112
+ session: Session,
111
113
  ) -> None:
112
114
  from airflow.providers.openlineage.utils.utils import is_ti_rescheduled_already
113
115
 
@@ -192,6 +194,10 @@ class OpenLineageListener:
192
194
  if isinstance(data_interval_end, datetime):
193
195
  data_interval_end = data_interval_end.isoformat()
194
196
 
197
+ doc, doc_type = get_task_documentation(task)
198
+ if not doc:
199
+ doc, doc_type = get_dag_documentation(dag)
200
+
195
201
  with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
196
202
  task_metadata = self.extractor_manager.extract_metadata(
197
203
  dagrun=dagrun, task=task, task_instance_state=TaskInstanceState.RUNNING
@@ -200,7 +206,8 @@ class OpenLineageListener:
200
206
  redacted_event = self.adapter.start_task(
201
207
  run_id=task_uuid,
202
208
  job_name=get_job_name(task_instance),
203
- job_description=dag.description,
209
+ job_description=doc,
210
+ job_description_type=doc_type,
204
211
  event_time=start_date.isoformat(),
205
212
  nominal_start_time=data_interval_start,
206
213
  nominal_end_time=data_interval_end,
@@ -250,11 +257,11 @@ class OpenLineageListener:
250
257
  else:
251
258
 
252
259
  @hookimpl
253
- def on_task_instance_success(
260
+ def on_task_instance_success( # type: ignore[misc]
254
261
  self,
255
262
  previous_state: TaskInstanceState,
256
263
  task_instance: TaskInstance,
257
- session: Session, # type: ignore[valid-type]
264
+ session: Session,
258
265
  ) -> None:
259
266
  self.log.debug("OpenLineage listener got notification about task instance success")
260
267
  task = task_instance.task
@@ -311,6 +318,10 @@ class OpenLineageListener:
311
318
  if isinstance(data_interval_end, datetime):
312
319
  data_interval_end = data_interval_end.isoformat()
313
320
 
321
+ doc, doc_type = get_task_documentation(task)
322
+ if not doc:
323
+ doc, doc_type = get_dag_documentation(dag)
324
+
314
325
  with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
315
326
  task_metadata = self.extractor_manager.extract_metadata(
316
327
  dagrun=dagrun,
@@ -327,6 +338,8 @@ class OpenLineageListener:
327
338
  # If task owner is default ("airflow"), use DAG owner instead that may have more details
328
339
  owners=[x.strip() for x in (task if task.owner != "airflow" else dag).owner.split(",")],
329
340
  tags=dag.tags,
341
+ job_description=doc,
342
+ job_description_type=doc_type,
330
343
  nominal_start_time=data_interval_start,
331
344
  nominal_end_time=data_interval_end,
332
345
  run_facets={
@@ -373,12 +386,12 @@ class OpenLineageListener:
373
386
  else:
374
387
 
375
388
  @hookimpl
376
- def on_task_instance_failed(
389
+ def on_task_instance_failed( # type: ignore[misc]
377
390
  self,
378
391
  previous_state: TaskInstanceState,
379
392
  task_instance: TaskInstance,
380
393
  error: None | str | BaseException,
381
- session: Session, # type: ignore[valid-type]
394
+ session: Session,
382
395
  ) -> None:
383
396
  self.log.debug("OpenLineage listener got notification about task instance failure")
384
397
  task = task_instance.task
@@ -442,6 +455,10 @@ class OpenLineageListener:
442
455
  if isinstance(data_interval_end, datetime):
443
456
  data_interval_end = data_interval_end.isoformat()
444
457
 
458
+ doc, doc_type = get_task_documentation(task)
459
+ if not doc:
460
+ doc, doc_type = get_dag_documentation(dag)
461
+
445
462
  with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
446
463
  task_metadata = self.extractor_manager.extract_metadata(
447
464
  dagrun=dagrun,
@@ -461,6 +478,8 @@ class OpenLineageListener:
461
478
  tags=dag.tags,
462
479
  # If task owner is default ("airflow"), use DAG owner instead that may have more details
463
480
  owners=[x.strip() for x in (task if task.owner != "airflow" else dag).owner.split(",")],
481
+ job_description=doc,
482
+ job_description_type=doc_type,
464
483
  run_facets={
465
484
  **get_task_parent_run_facet(parent_run_id=parent_run_id, parent_job_name=dag.dag_id),
466
485
  **get_user_provided_run_facets(task_instance, TaskInstanceState.FAILED),
@@ -511,6 +530,8 @@ class OpenLineageListener:
511
530
  "nominal_end_time": None,
512
531
  "tags": None,
513
532
  "owners": None,
533
+ "job_description": None,
534
+ "job_description_type": None,
514
535
  "run_facets": {
515
536
  **get_task_parent_run_facet(parent_run_id=parent_run_id, parent_job_name=ti.dag_id),
516
537
  **get_airflow_debug_facet(),
@@ -623,6 +644,8 @@ class OpenLineageListener:
623
644
  if AIRFLOW_V_3_0_PLUS and date is None:
624
645
  date = dag_run.run_after
625
646
 
647
+ doc, doc_type = get_dag_documentation(dag_run.dag)
648
+
626
649
  self.submit_callable(
627
650
  self.adapter.dag_started,
628
651
  dag_id=dag_run.dag_id,
@@ -633,7 +656,8 @@ class OpenLineageListener:
633
656
  run_facets=run_facets,
634
657
  clear_number=dag_run.clear_number,
635
658
  owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None,
636
- description=dag_run.dag.description if dag_run.dag else None,
659
+ job_description=doc,
660
+ job_description_type=doc_type,
637
661
  tags=dag_run.dag.tags if dag_run.dag else [],
638
662
  # AirflowJobFacet should be created outside ProcessPoolExecutor that pickles objects,
639
663
  # as it causes lack of some TaskGroup attributes and crashes event emission.
@@ -668,6 +692,7 @@ class OpenLineageListener:
668
692
  dag_run.data_interval_start.isoformat() if dag_run.data_interval_start else None
669
693
  )
670
694
  data_interval_end = dag_run.data_interval_end.isoformat() if dag_run.data_interval_end else None
695
+ doc, doc_type = get_dag_documentation(dag_run.dag)
671
696
 
672
697
  self.submit_callable(
673
698
  self.adapter.dag_success,
@@ -680,6 +705,8 @@ class OpenLineageListener:
680
705
  clear_number=dag_run.clear_number,
681
706
  owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None,
682
707
  tags=dag_run.dag.tags if dag_run.dag else [],
708
+ job_description=doc,
709
+ job_description_type=doc_type,
683
710
  task_ids=task_ids,
684
711
  dag_run_state=dag_run.get_state(),
685
712
  run_facets={**get_airflow_dag_run_facet(dag_run)},
@@ -713,6 +740,7 @@ class OpenLineageListener:
713
740
  dag_run.data_interval_start.isoformat() if dag_run.data_interval_start else None
714
741
  )
715
742
  data_interval_end = dag_run.data_interval_end.isoformat() if dag_run.data_interval_end else None
743
+ doc, doc_type = get_dag_documentation(dag_run.dag)
716
744
 
717
745
  self.submit_callable(
718
746
  self.adapter.dag_failed,
@@ -725,6 +753,8 @@ class OpenLineageListener:
725
753
  clear_number=dag_run.clear_number,
726
754
  owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None,
727
755
  tags=dag_run.dag.tags if dag_run.dag else [],
756
+ job_description=doc,
757
+ job_description_type=doc_type,
728
758
  dag_run_state=dag_run.get_state(),
729
759
  task_ids=task_ids,
730
760
  msg=msg,
@@ -24,8 +24,10 @@ from airflow.models import Param
24
24
  from airflow.models.xcom_arg import XComArg
25
25
 
26
26
  if TYPE_CHECKING:
27
- from airflow.sdk import DAG
28
- from airflow.sdk.definitions._internal.abstractoperator import Operator
27
+ from airflow.sdk import DAG, BaseOperator
28
+ from airflow.sdk.definitions.mappedoperator import MappedOperator
29
+
30
+ T = TypeVar("T", bound=DAG | BaseOperator | MappedOperator)
29
31
  else:
30
32
  try:
31
33
  from airflow.sdk import DAG
@@ -35,10 +37,6 @@ else:
35
37
  ENABLE_OL_PARAM_NAME = "_selective_enable_ol"
36
38
  ENABLE_OL_PARAM = Param(True, const=True)
37
39
  DISABLE_OL_PARAM = Param(False, const=False)
38
- T = TypeVar("T", bound="DAG | Operator")
39
-
40
- if TYPE_CHECKING:
41
- from airflow.sdk.bases.operator import BaseOperator as SdkBaseOperator
42
40
 
43
41
 
44
42
  log = logging.getLogger(__name__)
@@ -78,7 +76,7 @@ def disable_lineage(obj: T) -> T:
78
76
  return obj
79
77
 
80
78
 
81
- def is_task_lineage_enabled(task: Operator | SdkBaseOperator) -> bool:
79
+ def is_task_lineage_enabled(task: BaseOperator | MappedOperator) -> bool:
82
80
  """Check if selective enable OpenLineage parameter is set to True on task level."""
83
81
  if task.params.get(ENABLE_OL_PARAM_NAME) is False:
84
82
  log.debug(
@@ -52,7 +52,7 @@ from airflow.providers.openlineage.utils.selective_enable import (
52
52
  is_dag_lineage_enabled,
53
53
  is_task_lineage_enabled,
54
54
  )
55
- from airflow.providers.openlineage.version_compat import AIRFLOW_V_3_0_PLUS
55
+ from airflow.providers.openlineage.version_compat import AIRFLOW_V_3_0_PLUS, get_base_airflow_version_tuple
56
56
  from airflow.serialization.serialized_objects import SerializedBaseOperator
57
57
  from airflow.utils.module_loading import import_string
58
58
 
@@ -115,6 +115,7 @@ else:
115
115
 
116
116
  log = logging.getLogger(__name__)
117
117
  _NOMINAL_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
118
+ _MAX_DOC_BYTES = 64 * 1024 # 64 kilobytes
118
119
 
119
120
 
120
121
  def try_import_from_string(string: str) -> Any:
@@ -179,6 +180,80 @@ def get_task_parent_run_facet(
179
180
  }
180
181
 
181
182
 
183
+ def _truncate_string_to_byte_size(s: str, max_size: int = _MAX_DOC_BYTES) -> str:
184
+ """
185
+ Truncate a string to a maximum UTF-8 byte size, ensuring valid encoding.
186
+
187
+ This is used to safely limit the size of string content (e.g., for OpenLineage events)
188
+ without breaking multibyte characters. If truncation occurs, the result is a valid
189
+ UTF-8 string with any partial characters at the end removed.
190
+
191
+ Args:
192
+ s (str): The input string to truncate.
193
+ max_size (int): Maximum allowed size in bytes after UTF-8 encoding.
194
+
195
+ Returns:
196
+ str: A UTF-8-safe truncated string within the specified byte limit.
197
+ """
198
+ encoded = s.encode("utf-8")
199
+ if len(encoded) <= max_size:
200
+ return s
201
+ log.debug(
202
+ "Truncating long string content for OpenLineage event. "
203
+ "Original size: %d bytes, truncated to: %d bytes (UTF-8 safe).",
204
+ len(encoded),
205
+ max_size,
206
+ )
207
+ truncated = encoded[:max_size]
208
+ # Make sure we don't cut a multibyte character in half
209
+ return truncated.decode("utf-8", errors="ignore")
210
+
211
+
212
+ def get_task_documentation(operator: BaseOperator | MappedOperator | None) -> tuple[str | None, str | None]:
213
+ """Get task documentation and mime type, truncated to _MAX_DOC_BYTES bytes length, if present."""
214
+ if not operator:
215
+ return None, None
216
+
217
+ doc, mime_type = None, None
218
+ if operator.doc:
219
+ doc = operator.doc
220
+ mime_type = "text/plain"
221
+ elif operator.doc_md:
222
+ doc = operator.doc_md
223
+ mime_type = "text/markdown"
224
+ elif operator.doc_json:
225
+ doc = operator.doc_json
226
+ mime_type = "application/json"
227
+ elif operator.doc_yaml:
228
+ doc = operator.doc_yaml
229
+ mime_type = "application/x-yaml"
230
+ elif operator.doc_rst:
231
+ doc = operator.doc_rst
232
+ mime_type = "text/x-rst"
233
+
234
+ if doc:
235
+ return _truncate_string_to_byte_size(doc), mime_type
236
+ return None, None
237
+
238
+
239
+ def get_dag_documentation(dag: DAG | None) -> tuple[str | None, str | None]:
240
+ """Get dag documentation and mime type, truncated to _MAX_DOC_BYTES bytes length, if present."""
241
+ if not dag:
242
+ return None, None
243
+
244
+ doc, mime_type = None, None
245
+ if dag.doc_md:
246
+ doc = dag.doc_md
247
+ mime_type = "text/markdown"
248
+ elif dag.description:
249
+ doc = dag.description
250
+ mime_type = "text/plain"
251
+
252
+ if doc:
253
+ return _truncate_string_to_byte_size(doc), mime_type
254
+ return None, None
255
+
256
+
182
257
  def get_airflow_mapped_task_facet(task_instance: TaskInstance) -> dict[str, Any]:
183
258
  # check for -1 comes from SmartSensor compatibility with dynamic task mapping
184
259
  # this comes from Airflow code
@@ -235,7 +310,7 @@ def get_user_provided_run_facets(ti: TaskInstance, ti_state: TaskInstanceState)
235
310
  def get_fully_qualified_class_name(operator: BaseOperator | MappedOperator) -> str:
236
311
  if isinstance(operator, (MappedOperator, SerializedBaseOperator)):
237
312
  # as in airflow.api_connexion.schemas.common_schema.ClassReferenceSchema
238
- return operator._task_module + "." + operator._task_type # type: ignore
313
+ return operator._task_module + "." + operator._task_type
239
314
  op_class = get_operator_class(operator)
240
315
  return op_class.__module__ + "." + op_class.__name__
241
316
 
@@ -705,6 +780,7 @@ def _emits_ol_events(task: BaseOperator | MappedOperator) -> bool:
705
780
  not getattr(task, "on_execute_callback", None),
706
781
  not getattr(task, "on_success_callback", None),
707
782
  not task.outlets,
783
+ not (task.inlets and get_base_airflow_version_tuple() >= (3, 0, 2)), # Added in 3.0.2 #50773
708
784
  )
709
785
  )
710
786
 
@@ -872,7 +948,7 @@ def translate_airflow_asset(asset: Asset, lineage_context) -> OpenLineageDataset
872
948
  from airflow.sdk.definitions.asset import _get_normalized_scheme
873
949
  else:
874
950
  try:
875
- from airflow.datasets import _get_normalized_scheme # type: ignore[no-redef, attr-defined]
951
+ from airflow.datasets import _get_normalized_scheme # type: ignore[no-redef]
876
952
  except ImportError:
877
953
  return None
878
954
 
@@ -33,3 +33,13 @@ def get_base_airflow_version_tuple() -> tuple[int, int, int]:
33
33
 
34
34
 
35
35
  AIRFLOW_V_3_0_PLUS = get_base_airflow_version_tuple() >= (3, 0, 0)
36
+
37
+ if AIRFLOW_V_3_0_PLUS:
38
+ from airflow.sdk import BaseOperator
39
+ else:
40
+ from airflow.models import BaseOperator
41
+
42
+ __all__ = [
43
+ "AIRFLOW_V_3_0_PLUS",
44
+ "BaseOperator",
45
+ ]
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apache-airflow-providers-openlineage
3
- Version: 2.5.0
3
+ Version: 2.6.0rc1
4
4
  Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
5
5
  Keywords: airflow-provider,openlineage,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
7
7
  Maintainer-email: Apache Software Foundation <dev@airflow.apache.org>
8
- Requires-Python: ~=3.10
8
+ Requires-Python: >=3.10
9
9
  Description-Content-Type: text/x-rst
10
10
  Classifier: Development Status :: 5 - Production/Stable
11
11
  Classifier: Environment :: Console
@@ -18,16 +18,17 @@ Classifier: License :: OSI Approved :: Apache Software License
18
18
  Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3.11
20
20
  Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
21
22
  Classifier: Topic :: System :: Monitoring
22
- Requires-Dist: apache-airflow>=2.10.0
23
- Requires-Dist: apache-airflow-providers-common-sql>=1.20.0
24
- Requires-Dist: apache-airflow-providers-common-compat>=1.4.0
23
+ Requires-Dist: apache-airflow>=2.10.0rc1
24
+ Requires-Dist: apache-airflow-providers-common-sql>=1.20.0rc1
25
+ Requires-Dist: apache-airflow-providers-common-compat>=1.4.0rc1
25
26
  Requires-Dist: attrs>=22.2
26
- Requires-Dist: openlineage-integration-common>=1.34.0
27
- Requires-Dist: openlineage-python>=1.34.0
27
+ Requires-Dist: openlineage-integration-common>=1.36.0
28
+ Requires-Dist: openlineage-python>=1.36.0
28
29
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
29
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.5.0/changelog.html
30
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.5.0
30
+ Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-openlineage/2.6.0/changelog.html
31
+ Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-openlineage/2.6.0
31
32
  Project-URL: Mastodon, https://fosstodon.org/@airflow
32
33
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
33
34
  Project-URL: Source Code, https://github.com/apache/airflow
@@ -58,8 +59,9 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
58
59
 
59
60
  Package ``apache-airflow-providers-openlineage``
60
61
 
61
- Release: ``2.5.0``
62
+ Release: ``2.6.0``
62
63
 
64
+ Release Date: ``|PypiReleaseDate|``
63
65
 
64
66
  `OpenLineage <https://openlineage.io/>`__
65
67
 
@@ -71,7 +73,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
71
73
  are in ``airflow.providers.openlineage`` python package.
72
74
 
73
75
  You can find package information and changelog for the provider
74
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.5.0/>`_.
76
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.6.0/>`_.
75
77
 
76
78
  Installation
77
79
  ------------
@@ -80,7 +82,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
80
82
  for the minimum Airflow version supported) via
81
83
  ``pip install apache-airflow-providers-openlineage``
82
84
 
83
- The package supports the following python versions: 3.10,3.11,3.12
85
+ The package supports the following python versions: 3.10,3.11,3.12,3.13
84
86
 
85
87
  Requirements
86
88
  ------------
@@ -92,8 +94,8 @@ PIP package Version required
92
94
  ``apache-airflow-providers-common-sql`` ``>=1.20.0``
93
95
  ``apache-airflow-providers-common-compat`` ``>=1.4.0``
94
96
  ``attrs`` ``>=22.2``
95
- ``openlineage-integration-common`` ``>=1.34.0``
96
- ``openlineage-python`` ``>=1.34.0``
97
+ ``openlineage-integration-common`` ``>=1.36.0``
98
+ ``openlineage-python`` ``>=1.36.0``
97
99
  ========================================== ==================
98
100
 
99
101
  Cross provider package dependencies
@@ -117,5 +119,5 @@ Dependent package
117
119
  ================================================================================================================== =================
118
120
 
119
121
  The changelog for the provider package can be found in the
120
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.5.0/changelog.html>`_.
122
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.6.0/changelog.html>`_.
121
123
 
@@ -1,11 +1,11 @@
1
1
  airflow/providers/openlineage/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
2
- airflow/providers/openlineage/__init__.py,sha256=T2EIoXrtmbdXfH_F2bMY8LyH9-aiMt_Mj5K780qyt3I,1500
3
- airflow/providers/openlineage/conf.py,sha256=aYdLU7iHBdGIU8ZAC5iUiIDgXP9gvP9r_z5hTAbXPOU,5535
4
- airflow/providers/openlineage/get_provider_info.py,sha256=z0oCEDCM3UyrlXrQt0ksTi6jYd2vqjiSSudBM7hLNZw,9255
2
+ airflow/providers/openlineage/__init__.py,sha256=SocHoW3JIJo3V8QKfkGD7IKHz8b4oy37Jeo7Nz2wMjk,1500
3
+ airflow/providers/openlineage/conf.py,sha256=9v2DpQ84BBCdRxPlh8QsboTqX8HXe-qeHVcTMRL5c3o,5807
4
+ airflow/providers/openlineage/get_provider_info.py,sha256=2Oy13q-jA-UYt-a9pYBk4PnImYshGnJCPD1Jj80ChNw,9453
5
5
  airflow/providers/openlineage/sqlparser.py,sha256=8Aq0qbUUBthKjXBV756p2aBf8RYfCuBBfgxwhGpQIg4,20360
6
- airflow/providers/openlineage/version_compat.py,sha256=j5PCtXvZ71aBjixu-EFTNtVDPsngzzs7os0ZQDgFVDk,1536
6
+ airflow/providers/openlineage/version_compat.py,sha256=7RHBehpYMeNSBtmJiPUeJHA0c7l-Eqsdy546kW3RFa4,1712
7
7
  airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
8
- airflow/providers/openlineage/extractors/base.py,sha256=BsYKXTbz0t8TZIyKDfPV--iWwR5NCLWMVjQv2SKajCE,6465
8
+ airflow/providers/openlineage/extractors/base.py,sha256=0K7prvOeYjs30P87zgcOmABZOZYsw0WYoFBstS_vgmY,6449
9
9
  airflow/providers/openlineage/extractors/bash.py,sha256=3aR0PXs8fzRLibRxXN1R8wMZnGzyCur7mjpy8e5GC4A,2583
10
10
  airflow/providers/openlineage/extractors/manager.py,sha256=kVo5OHkpUoYIiT2RvwUt128jC_Q4EosdLC0sP1YfIH0,12840
11
11
  airflow/providers/openlineage/extractors/python.py,sha256=BHh2G68i9mrpJb-xF0YZm0NaQevcxsPixSXzLlFz6XE,3180
@@ -15,18 +15,20 @@ airflow/providers/openlineage/facets/AirflowJobFacet.json,sha256=rS9PuPWOi1Jc5B4
15
15
  airflow/providers/openlineage/facets/AirflowRunFacet.json,sha256=70mEaZShgSJp-2xr0bVvw3ljiGOPEaXD591fhuAQm_o,5953
16
16
  airflow/providers/openlineage/facets/AirflowStateRunFacet.json,sha256=xhHQEKD9Jopw-oqbkCCrrwFjfXnxvuJAritsmegKjuQ,937
17
17
  airflow/providers/openlineage/facets/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
18
+ airflow/providers/openlineage/operators/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
19
+ airflow/providers/openlineage/operators/empty.py,sha256=g3ksadUeHW6IydzqIT4KxGU3Agt-F7NmKmOS6T47UDs,1710
18
20
  airflow/providers/openlineage/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
19
- airflow/providers/openlineage/plugins/adapter.py,sha256=rtvjPvzj9fN6Crn9Sg0VTjqUrhVqEWvXakID2538JI4,21519
21
+ airflow/providers/openlineage/plugins/adapter.py,sha256=PyCec9S22_NZd7VTuJxQrJ7SFGfNgJoSyTlKFKYnzVg,22930
20
22
  airflow/providers/openlineage/plugins/facets.py,sha256=VvyMYR6ONkC95q5FdNmohv0scbA1Ej_B5cQ97as5GvA,4161
21
- airflow/providers/openlineage/plugins/listener.py,sha256=jAbviwp3F44sst8SFJSvUWB0OnFUz4mXJg79uvRaU0Q,31014
23
+ airflow/providers/openlineage/plugins/listener.py,sha256=bE3g0Qfo5aE89JOBJUi-CuShpKjrG5hDFa1hLbPw-5I,32128
22
24
  airflow/providers/openlineage/plugins/macros.py,sha256=RfxkpNq78CHzfTAf9X7MQ_zRArMRu9sSD2j69fPSK7s,5265
23
25
  airflow/providers/openlineage/plugins/openlineage.py,sha256=dP3GOVtOGAIokeaeRx2OW_c1TKAxDvATlD9OGMyXqr0,2032
24
26
  airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
25
- airflow/providers/openlineage/utils/selective_enable.py,sha256=YyrUQ7Djv5o46XdH83N_G8AXAZ9C_aKPa534pbNVp08,3441
27
+ airflow/providers/openlineage/utils/selective_enable.py,sha256=Nf94UaziAQHDvpx7bEDMRbT3twXVslDilUI1ZTEOB0M,3382
26
28
  airflow/providers/openlineage/utils/spark.py,sha256=X5liLxVLgQcgPF_0lFtQULeMOv_9dGj-HFjtZvWFgOo,7626
27
29
  airflow/providers/openlineage/utils/sql.py,sha256=b_k2fUyGGWzR1eau7tgq7vKQJsR7wPQzDF8M-WRq6jk,9548
28
- airflow/providers/openlineage/utils/utils.py,sha256=7Q9UHycEQ2emJrlhjIniUf9cmUNols3DxMO-V8q3QnY,32669
29
- apache_airflow_providers_openlineage-2.5.0.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
30
- apache_airflow_providers_openlineage-2.5.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
31
- apache_airflow_providers_openlineage-2.5.0.dist-info/METADATA,sha256=SMb2iKKQSPuwDgSJj9vqDKjJjj5l8SwZwahY1M8kBHU,5634
32
- apache_airflow_providers_openlineage-2.5.0.dist-info/RECORD,,
30
+ airflow/providers/openlineage/utils/utils.py,sha256=49fP_Nb1oPI3pH-0IQUXni2ct2_A4ROjkMCohs9rRAc,35267
31
+ apache_airflow_providers_openlineage-2.6.0rc1.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
32
+ apache_airflow_providers_openlineage-2.6.0rc1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
33
+ apache_airflow_providers_openlineage-2.6.0rc1.dist-info/METADATA,sha256=IMYNCDBcQUIz1XbJS949sJ7G3wQkOs2nYjySsiy2dFs,5752
34
+ apache_airflow_providers_openlineage-2.6.0rc1.dist-info/RECORD,,