apache-airflow-providers-openlineage 1.11.0rc1__py3-none-any.whl → 1.11.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.

@@ -25,6 +25,7 @@ from airflow.providers.openlineage.extractors.bash import BashExtractor
25
25
  from airflow.providers.openlineage.extractors.python import PythonExtractor
26
26
  from airflow.providers.openlineage.utils.utils import (
27
27
  get_unknown_source_attribute_run_facet,
28
+ translate_airflow_dataset,
28
29
  try_import_from_string,
29
30
  )
30
31
  from airflow.utils.log.logging_mixin import LoggingMixin
@@ -90,7 +91,6 @@ class ExtractorManager(LoggingMixin):
90
91
  f"task_id={task.task_id} "
91
92
  f"airflow_run_id={dagrun.run_id} "
92
93
  )
93
-
94
94
  if extractor:
95
95
  # Extracting advanced metadata is only possible when extractor for particular operator
96
96
  # is defined. Without it, we can't extract any input or output data.
@@ -105,14 +105,22 @@ class ExtractorManager(LoggingMixin):
105
105
  task_metadata = self.validate_task_metadata(task_metadata)
106
106
  if task_metadata:
107
107
  if (not task_metadata.inputs) and (not task_metadata.outputs):
108
- self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
109
-
108
+ if (hook_lineage := self.get_hook_lineage()) is not None:
109
+ inputs, outputs = hook_lineage
110
+ task_metadata.inputs = inputs
111
+ task_metadata.outputs = outputs
112
+ else:
113
+ self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
110
114
  return task_metadata
111
115
 
112
116
  except Exception as e:
113
117
  self.log.warning(
114
118
  "Failed to extract metadata using found extractor %s - %s %s", extractor, e, task_info
115
119
  )
120
+ elif (hook_lineage := self.get_hook_lineage()) is not None:
121
+ inputs, outputs = hook_lineage
122
+ task_metadata = OperatorLineage(inputs=inputs, outputs=outputs)
123
+ return task_metadata
116
124
  else:
117
125
  self.log.debug("Unable to find an extractor %s", task_info)
118
126
 
@@ -168,6 +176,30 @@ class ExtractorManager(LoggingMixin):
168
176
  if d:
169
177
  task_metadata.outputs.append(d)
170
178
 
179
+ def get_hook_lineage(self) -> tuple[list[Dataset], list[Dataset]] | None:
180
+ try:
181
+ from airflow.lineage.hook import get_hook_lineage_collector
182
+ except ImportError:
183
+ return None
184
+
185
+ if not get_hook_lineage_collector().has_collected:
186
+ return None
187
+
188
+ return (
189
+ [
190
+ dataset
191
+ for dataset_info in get_hook_lineage_collector().collected_datasets.inputs
192
+ if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
193
+ is not None
194
+ ],
195
+ [
196
+ dataset
197
+ for dataset_info in get_hook_lineage_collector().collected_datasets.outputs
198
+ if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
199
+ is not None
200
+ ],
201
+ )
202
+
171
203
  @staticmethod
172
204
  def convert_to_ol_dataset_from_object_storage_uri(uri: str) -> Dataset | None:
173
205
  from urllib.parse import urlparse
@@ -53,6 +53,7 @@ def get_provider_info():
53
53
  "dependencies": [
54
54
  "apache-airflow>=2.8.0",
55
55
  "apache-airflow-providers-common-sql>=1.6.0",
56
+ "apache-airflow-providers-common-compat>=1.2.0",
56
57
  "attrs>=22.2",
57
58
  "openlineage-integration-common>=1.16.0",
58
59
  "openlineage-python>=1.16.0",
@@ -25,6 +25,7 @@ from airflow.providers.openlineage.plugins.macros import (
25
25
  lineage_parent_id,
26
26
  lineage_run_id,
27
27
  )
28
+ from airflow.providers.openlineage.utils.utils import IS_AIRFLOW_2_10_OR_HIGHER
28
29
 
29
30
 
30
31
  class OpenLineageProviderPlugin(AirflowPlugin):
@@ -39,6 +40,10 @@ class OpenLineageProviderPlugin(AirflowPlugin):
39
40
  if not conf.is_disabled():
40
41
  macros = [lineage_job_namespace, lineage_job_name, lineage_run_id, lineage_parent_id]
41
42
  listeners = [get_openlineage_listener()]
43
+ if IS_AIRFLOW_2_10_OR_HIGHER:
44
+ from airflow.lineage.hook import HookLineageReader
45
+
46
+ hook_lineage_readers = [HookLineageReader]
42
47
  else:
43
48
  macros = []
44
49
  listeners = []
@@ -33,7 +33,7 @@ from packaging.version import Version
33
33
  from airflow import __version__ as AIRFLOW_VERSION
34
34
  from airflow.datasets import Dataset
35
35
  from airflow.exceptions import AirflowProviderDeprecationWarning # TODO: move this maybe to Airflow's logic?
36
- from airflow.models import DAG, BaseOperator, MappedOperator, Operator
36
+ from airflow.models import DAG, BaseOperator, MappedOperator
37
37
  from airflow.providers.openlineage import conf
38
38
  from airflow.providers.openlineage.plugins.facets import (
39
39
  AirflowDagRunFacet,
@@ -423,7 +423,7 @@ def get_airflow_job_facet(dag_run: DagRun) -> dict[str, AirflowJobFacet]:
423
423
  return {}
424
424
  return {
425
425
  "airflow": AirflowJobFacet(
426
- taskTree=_get_parsed_dag_tree(dag_run.dag),
426
+ taskTree={}, # caused OOM errors, to be removed, see #41587
427
427
  taskGroups=_get_task_groups_details(dag_run.dag),
428
428
  tasks=_get_tasks_details(dag_run.dag),
429
429
  )
@@ -439,43 +439,6 @@ def get_airflow_state_run_facet(dag_run: DagRun) -> dict[str, AirflowStateRunFac
439
439
  }
440
440
 
441
441
 
442
- def _get_parsed_dag_tree(dag: DAG) -> dict:
443
- """
444
- Get DAG's tasks hierarchy representation.
445
-
446
- While the task dependencies are defined as following:
447
- task >> [task_2, task_4] >> task_7
448
- task_3 >> task_5
449
- task_6 # has no dependencies, it's a root and a leaf
450
-
451
- The result of this function will look like:
452
- {
453
- "task": {
454
- "task_2": {
455
- "task_7": {}
456
- },
457
- "task_4": {
458
- "task_7": {}
459
- }
460
- },
461
- "task_3": {
462
- "task_5": {}
463
- },
464
- "task_6": {}
465
- }
466
- """
467
-
468
- def get_downstream(task: Operator, current_dict: dict):
469
- current_dict[task.task_id] = {}
470
- for tmp_task in sorted(task.downstream_list, key=lambda x: x.task_id):
471
- get_downstream(tmp_task, current_dict[task.task_id])
472
-
473
- task_dict: dict = {}
474
- for t in sorted(dag.roots, key=lambda x: x.task_id):
475
- get_downstream(t, task_dict)
476
- return task_dict
477
-
478
-
479
442
  def _get_tasks_details(dag: DAG) -> dict:
480
443
  tasks = {
481
444
  single_task.task_id: {
@@ -487,8 +450,9 @@ def _get_tasks_details(dag: DAG) -> dict:
487
450
  "ui_label": single_task.label,
488
451
  "is_setup": single_task.is_setup,
489
452
  "is_teardown": single_task.is_teardown,
453
+ "downstream_task_ids": sorted(single_task.downstream_task_ids),
490
454
  }
491
- for single_task in dag.tasks
455
+ for single_task in sorted(dag.tasks, key=lambda x: x.task_id)
492
456
  }
493
457
 
494
458
  return tasks
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-openlineage
3
- Version: 1.11.0rc1
3
+ Version: 1.11.0rc2
4
4
  Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
5
5
  Keywords: airflow-provider,openlineage,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -21,6 +21,7 @@ Classifier: Programming Language :: Python :: 3.10
21
21
  Classifier: Programming Language :: Python :: 3.11
22
22
  Classifier: Programming Language :: Python :: 3.12
23
23
  Classifier: Topic :: System :: Monitoring
24
+ Requires-Dist: apache-airflow-providers-common-compat>=1.2.0rc0
24
25
  Requires-Dist: apache-airflow-providers-common-sql>=1.6.0rc0
25
26
  Requires-Dist: apache-airflow>=2.8.0rc0
26
27
  Requires-Dist: attrs>=22.2
@@ -80,7 +81,7 @@ Provides-Extra: common.sql
80
81
 
81
82
  Package ``apache-airflow-providers-openlineage``
82
83
 
83
- Release: ``1.11.0.rc1``
84
+ Release: ``1.11.0.rc2``
84
85
 
85
86
 
86
87
  `OpenLineage <https://openlineage.io/>`__
@@ -107,15 +108,16 @@ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
107
108
  Requirements
108
109
  ------------
109
110
 
110
- ======================================= ==================
111
- PIP package Version required
112
- ======================================= ==================
113
- ``apache-airflow`` ``>=2.8.0``
114
- ``apache-airflow-providers-common-sql`` ``>=1.6.0``
115
- ``attrs`` ``>=22.2``
116
- ``openlineage-integration-common`` ``>=1.16.0``
117
- ``openlineage-python`` ``>=1.16.0``
118
- ======================================= ==================
111
+ ========================================== ==================
112
+ PIP package Version required
113
+ ========================================== ==================
114
+ ``apache-airflow`` ``>=2.8.0``
115
+ ``apache-airflow-providers-common-sql`` ``>=1.6.0``
116
+ ``apache-airflow-providers-common-compat`` ``>=1.2.0``
117
+ ``attrs`` ``>=22.2``
118
+ ``openlineage-integration-common`` ``>=1.16.0``
119
+ ``openlineage-python`` ``>=1.16.0``
120
+ ========================================== ==================
119
121
 
120
122
  Cross provider package dependencies
121
123
  -----------------------------------
@@ -1,12 +1,12 @@
1
1
  airflow/providers/openlineage/LICENSE,sha256=FFb4jd2AXnOOf7XLP04pQW6jbdhG49TxlGY6fFpCV1Y,13609
2
2
  airflow/providers/openlineage/__init__.py,sha256=jUD56ffTq0DAqqtYoTQi2pjOt51XQnVZsPuAJPt0UV0,1499
3
3
  airflow/providers/openlineage/conf.py,sha256=paV6AHxPJPdQyVMoTGBRHymT-bpbs4Tovttqy9oicMs,5151
4
- airflow/providers/openlineage/get_provider_info.py,sha256=I8lTHWr3IccBOsCicCr_sR304Ayij9mVGc0dwI9zgtI,8824
4
+ airflow/providers/openlineage/get_provider_info.py,sha256=0qKj7j0nBUYIadp5B3ZxTj8_bKVwnDaT02Z1_41qWTo,8885
5
5
  airflow/providers/openlineage/sqlparser.py,sha256=c7q3VVw41S87ZFozrkrEr2oZK79N12mC3KdDs9V1IuM,15581
6
6
  airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
7
7
  airflow/providers/openlineage/extractors/base.py,sha256=olafVPBKxeGjqXerCYM0vj2q78Lm4ErWjex_R6nhjKY,6635
8
8
  airflow/providers/openlineage/extractors/bash.py,sha256=3aR0PXs8fzRLibRxXN1R8wMZnGzyCur7mjpy8e5GC4A,2583
9
- airflow/providers/openlineage/extractors/manager.py,sha256=uuSGLZgJHu0tu3RGzrc41uw6SahdgU3816T26FtnFEY,10539
9
+ airflow/providers/openlineage/extractors/manager.py,sha256=Z5fZN6dt67xfJFtlD-_Xnc-tYpxOp66o-ASm6Sn2Vpk,11958
10
10
  airflow/providers/openlineage/extractors/python.py,sha256=hVWOplMlBimrpPKPeW6vm75a8OmAYMU1oJzqMz8Jh90,3171
11
11
  airflow/providers/openlineage/facets/AirflowDagRunFacet.json,sha256=ie6c-J3-wGgk80WDTGWePz18o6DbW--TNM7BMF4WfcU,2251
12
12
  airflow/providers/openlineage/facets/AirflowDebugRunFacet.json,sha256=_zA5gFqGje5MOH1SmdMeA5ViOHvW_pV4oijEAvkuBbY,768
@@ -19,12 +19,12 @@ airflow/providers/openlineage/plugins/adapter.py,sha256=mdXF8ZgyYLgjyWForHQnfKxn
19
19
  airflow/providers/openlineage/plugins/facets.py,sha256=VvyMYR6ONkC95q5FdNmohv0scbA1Ej_B5cQ97as5GvA,4161
20
20
  airflow/providers/openlineage/plugins/listener.py,sha256=vPtYWYYzKsNw6cR3MsXM4rfnFGoc-xPDGa_6jfTkuLU,18754
21
21
  airflow/providers/openlineage/plugins/macros.py,sha256=hgFA3ZdQibyn4KXIOsKYBm4WRKDLA5q6Asscx5rvNfM,3076
22
- airflow/providers/openlineage/plugins/openlineage.py,sha256=rsRUW_zpXVAglzsgQRv5T9VWYY7CMQl0qRWm8-3oqDA,1678
22
+ airflow/providers/openlineage/plugins/openlineage.py,sha256=T0L5Yxpyq_wzs2_hltJCMY5NKzgsYp0vuEn8LppV5PU,1915
23
23
  airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
24
24
  airflow/providers/openlineage/utils/selective_enable.py,sha256=dFJ7wK7J_-BFwcOKp9tqFOSrASV3lmLv7HtRkEuMk3Q,3087
25
25
  airflow/providers/openlineage/utils/sql.py,sha256=bnuU9WvjVKcWVMN3cUp0jaHtU5_ZRM5I1OP1WhIdztg,9583
26
- airflow/providers/openlineage/utils/utils.py,sha256=gBaWzD4ifBPLAtp4TuUAEzVy_w1L_bQ4S9yfMp_uO2Q,24366
27
- apache_airflow_providers_openlineage-1.11.0rc1.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
28
- apache_airflow_providers_openlineage-1.11.0rc1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
29
- apache_airflow_providers_openlineage-1.11.0rc1.dist-info/METADATA,sha256=QGnBVXjzazlTPQqGJTrqVuoqpAIfK444OzKxf6YvZ-c,6387
30
- apache_airflow_providers_openlineage-1.11.0rc1.dist-info/RECORD,,
26
+ airflow/providers/openlineage/utils/utils.py,sha256=i8bxV6T3tU2hOsMDlV9lo2Pnf-b9lm-xpYSUUprcd-Q,23509
27
+ apache_airflow_providers_openlineage-1.11.0rc2.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
28
+ apache_airflow_providers_openlineage-1.11.0rc2.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
29
+ apache_airflow_providers_openlineage-1.11.0rc2.dist-info/METADATA,sha256=zPTUcHhwoSwvaJylG8UmcZwKz9s_cQFnemAnH7VrN9s,6534
30
+ apache_airflow_providers_openlineage-1.11.0rc2.dist-info/RECORD,,