apache-airflow-providers-openlineage 1.10.0rc1__tar.gz → 1.11.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/PKG-INFO +19 -17
  2. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/README.rst +13 -12
  3. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/__init__.py +3 -3
  4. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/conf.py +6 -0
  5. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/extractors/base.py +1 -1
  6. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/extractors/manager.py +35 -3
  7. apache_airflow_providers_openlineage-1.11.0/airflow/providers/openlineage/facets/AirflowDebugRunFacet.json +30 -0
  8. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/get_provider_info.py +11 -2
  9. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/plugins/adapter.py +5 -3
  10. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/plugins/facets.py +7 -11
  11. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/plugins/listener.py +7 -0
  12. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/plugins/openlineage.py +5 -0
  13. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/utils/utils.py +37 -75
  14. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/pyproject.toml +6 -5
  15. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/LICENSE +0 -0
  16. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/extractors/__init__.py +0 -0
  17. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/extractors/bash.py +0 -0
  18. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/extractors/python.py +0 -0
  19. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/facets/AirflowDagRunFacet.json +0 -0
  20. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/facets/AirflowJobFacet.json +0 -0
  21. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/facets/AirflowRunFacet.json +0 -0
  22. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/facets/AirflowStateRunFacet.json +0 -0
  23. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/facets/__init__.py +0 -0
  24. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/plugins/__init__.py +0 -0
  25. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/plugins/macros.py +0 -0
  26. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/sqlparser.py +0 -0
  27. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/utils/__init__.py +0 -0
  28. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/utils/selective_enable.py +0 -0
  29. {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0}/airflow/providers/openlineage/utils/sql.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-openlineage
3
- Version: 1.10.0rc1
3
+ Version: 1.11.0
4
4
  Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
5
5
  Keywords: airflow-provider,openlineage,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -21,15 +21,16 @@ Classifier: Programming Language :: Python :: 3.10
21
21
  Classifier: Programming Language :: Python :: 3.11
22
22
  Classifier: Programming Language :: Python :: 3.12
23
23
  Classifier: Topic :: System :: Monitoring
24
- Requires-Dist: apache-airflow-providers-common-sql>=1.6.0rc0
25
- Requires-Dist: apache-airflow>=2.7.0rc0
24
+ Requires-Dist: apache-airflow-providers-common-compat>=1.2.0
25
+ Requires-Dist: apache-airflow-providers-common-sql>=1.6.0
26
+ Requires-Dist: apache-airflow>=2.8.0
26
27
  Requires-Dist: attrs>=22.2
27
28
  Requires-Dist: openlineage-integration-common>=1.16.0
28
29
  Requires-Dist: openlineage-python>=1.16.0
29
30
  Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
30
31
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
31
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.10.0/changelog.html
32
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.10.0
32
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html
33
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0
33
34
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
34
35
  Project-URL: Source Code, https://github.com/apache/airflow
35
36
  Project-URL: Twitter, https://twitter.com/ApacheAirflow
@@ -80,7 +81,7 @@ Provides-Extra: common.sql
80
81
 
81
82
  Package ``apache-airflow-providers-openlineage``
82
83
 
83
- Release: ``1.10.0.rc1``
84
+ Release: ``1.11.0``
84
85
 
85
86
 
86
87
  `OpenLineage <https://openlineage.io/>`__
@@ -93,7 +94,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
93
94
  are in ``airflow.providers.openlineage`` python package.
94
95
 
95
96
  You can find package information and changelog for the provider
96
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.10.0/>`_.
97
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/>`_.
97
98
 
98
99
  Installation
99
100
  ------------
@@ -107,15 +108,16 @@ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
107
108
  Requirements
108
109
  ------------
109
110
 
110
- ======================================= ==================
111
- PIP package Version required
112
- ======================================= ==================
113
- ``apache-airflow`` ``>=2.7.0``
114
- ``apache-airflow-providers-common-sql`` ``>=1.6.0``
115
- ``attrs`` ``>=22.2``
116
- ``openlineage-integration-common`` ``>=1.16.0``
117
- ``openlineage-python`` ``>=1.16.0``
118
- ======================================= ==================
111
+ ========================================== ==================
112
+ PIP package Version required
113
+ ========================================== ==================
114
+ ``apache-airflow`` ``>=2.8.0``
115
+ ``apache-airflow-providers-common-sql`` ``>=1.6.0``
116
+ ``apache-airflow-providers-common-compat`` ``>=1.2.0``
117
+ ``attrs`` ``>=22.2``
118
+ ``openlineage-integration-common`` ``>=1.16.0``
119
+ ``openlineage-python`` ``>=1.16.0``
120
+ ========================================== ==================
119
121
 
120
122
  Cross provider package dependencies
121
123
  -----------------------------------
@@ -137,4 +139,4 @@ Dependent package
137
139
  ============================================================================================================ ==============
138
140
 
139
141
  The changelog for the provider package can be found in the
140
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.10.0/changelog.html>`_.
142
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html>`_.
@@ -42,7 +42,7 @@
42
42
 
43
43
  Package ``apache-airflow-providers-openlineage``
44
44
 
45
- Release: ``1.10.0.rc1``
45
+ Release: ``1.11.0``
46
46
 
47
47
 
48
48
  `OpenLineage <https://openlineage.io/>`__
@@ -55,7 +55,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
55
55
  are in ``airflow.providers.openlineage`` python package.
56
56
 
57
57
  You can find package information and changelog for the provider
58
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.10.0/>`_.
58
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/>`_.
59
59
 
60
60
  Installation
61
61
  ------------
@@ -69,15 +69,16 @@ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
69
69
  Requirements
70
70
  ------------
71
71
 
72
- ======================================= ==================
73
- PIP package Version required
74
- ======================================= ==================
75
- ``apache-airflow`` ``>=2.7.0``
76
- ``apache-airflow-providers-common-sql`` ``>=1.6.0``
77
- ``attrs`` ``>=22.2``
78
- ``openlineage-integration-common`` ``>=1.16.0``
79
- ``openlineage-python`` ``>=1.16.0``
80
- ======================================= ==================
72
+ ========================================== ==================
73
+ PIP package Version required
74
+ ========================================== ==================
75
+ ``apache-airflow`` ``>=2.8.0``
76
+ ``apache-airflow-providers-common-sql`` ``>=1.6.0``
77
+ ``apache-airflow-providers-common-compat`` ``>=1.2.0``
78
+ ``attrs`` ``>=22.2``
79
+ ``openlineage-integration-common`` ``>=1.16.0``
80
+ ``openlineage-python`` ``>=1.16.0``
81
+ ========================================== ==================
81
82
 
82
83
  Cross provider package dependencies
83
84
  -----------------------------------
@@ -99,4 +100,4 @@ Dependent package
99
100
  ============================================================================================================ ==============
100
101
 
101
102
  The changelog for the provider package can be found in the
102
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.10.0/changelog.html>`_.
103
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html>`_.
@@ -29,11 +29,11 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "1.10.0"
32
+ __version__ = "1.11.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
- "2.7.0"
35
+ "2.8.0"
36
36
  ):
37
37
  raise RuntimeError(
38
- f"The package `apache-airflow-providers-openlineage:{__version__}` needs Apache Airflow 2.7.0+"
38
+ f"The package `apache-airflow-providers-openlineage:{__version__}` needs Apache Airflow 2.8.0+"
39
39
  )
@@ -145,3 +145,9 @@ def execution_timeout() -> int:
145
145
  def include_full_task_info() -> bool:
146
146
  """[openlineage] include_full_task_info."""
147
147
  return conf.getboolean(_CONFIG_SECTION, "include_full_task_info", fallback="False")
148
+
149
+
150
+ @cache
151
+ def debug_mode() -> bool:
152
+ """[openlineage] debug_mode."""
153
+ return conf.getboolean(_CONFIG_SECTION, "debug_mode", fallback="False")
@@ -113,7 +113,7 @@ class DefaultExtractor(BaseExtractor):
113
113
  "Operator %s does not have the get_openlineage_facets_on_start method.",
114
114
  self.operator.task_type,
115
115
  )
116
- return None
116
+ return OperatorLineage()
117
117
 
118
118
  def extract_on_complete(self, task_instance) -> OperatorLineage | None:
119
119
  failed_states = [TaskInstanceState.FAILED, TaskInstanceState.UP_FOR_RETRY]
@@ -25,6 +25,7 @@ from airflow.providers.openlineage.extractors.bash import BashExtractor
25
25
  from airflow.providers.openlineage.extractors.python import PythonExtractor
26
26
  from airflow.providers.openlineage.utils.utils import (
27
27
  get_unknown_source_attribute_run_facet,
28
+ translate_airflow_dataset,
28
29
  try_import_from_string,
29
30
  )
30
31
  from airflow.utils.log.logging_mixin import LoggingMixin
@@ -90,7 +91,6 @@ class ExtractorManager(LoggingMixin):
90
91
  f"task_id={task.task_id} "
91
92
  f"airflow_run_id={dagrun.run_id} "
92
93
  )
93
-
94
94
  if extractor:
95
95
  # Extracting advanced metadata is only possible when extractor for particular operator
96
96
  # is defined. Without it, we can't extract any input or output data.
@@ -105,14 +105,22 @@ class ExtractorManager(LoggingMixin):
105
105
  task_metadata = self.validate_task_metadata(task_metadata)
106
106
  if task_metadata:
107
107
  if (not task_metadata.inputs) and (not task_metadata.outputs):
108
- self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
109
-
108
+ if (hook_lineage := self.get_hook_lineage()) is not None:
109
+ inputs, outputs = hook_lineage
110
+ task_metadata.inputs = inputs
111
+ task_metadata.outputs = outputs
112
+ else:
113
+ self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets)
110
114
  return task_metadata
111
115
 
112
116
  except Exception as e:
113
117
  self.log.warning(
114
118
  "Failed to extract metadata using found extractor %s - %s %s", extractor, e, task_info
115
119
  )
120
+ elif (hook_lineage := self.get_hook_lineage()) is not None:
121
+ inputs, outputs = hook_lineage
122
+ task_metadata = OperatorLineage(inputs=inputs, outputs=outputs)
123
+ return task_metadata
116
124
  else:
117
125
  self.log.debug("Unable to find an extractor %s", task_info)
118
126
 
@@ -168,6 +176,30 @@ class ExtractorManager(LoggingMixin):
168
176
  if d:
169
177
  task_metadata.outputs.append(d)
170
178
 
179
+ def get_hook_lineage(self) -> tuple[list[Dataset], list[Dataset]] | None:
180
+ try:
181
+ from airflow.lineage.hook import get_hook_lineage_collector
182
+ except ImportError:
183
+ return None
184
+
185
+ if not get_hook_lineage_collector().has_collected:
186
+ return None
187
+
188
+ return (
189
+ [
190
+ dataset
191
+ for dataset_info in get_hook_lineage_collector().collected_datasets.inputs
192
+ if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
193
+ is not None
194
+ ],
195
+ [
196
+ dataset
197
+ for dataset_info in get_hook_lineage_collector().collected_datasets.outputs
198
+ if (dataset := translate_airflow_dataset(dataset_info.dataset, dataset_info.context))
199
+ is not None
200
+ ],
201
+ )
202
+
171
203
  @staticmethod
172
204
  def convert_to_ol_dataset_from_object_storage_uri(uri: str) -> Dataset | None:
173
205
  from urllib.parse import urlparse
@@ -0,0 +1,30 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$defs": {
4
+ "AirflowDebugRunFacet": {
5
+ "allOf": [
6
+ {
7
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
8
+ },
9
+ {
10
+ "type": "object",
11
+ "properties": {
12
+ "packages": {
13
+ "description": "The names and versions of all installed Python packages.",
14
+ "type": "object",
15
+ "additionalProperties": true
16
+ }
17
+ },
18
+ "required": ["packages"]
19
+ }
20
+ ],
21
+ "type": "object"
22
+ }
23
+ },
24
+ "type": "object",
25
+ "properties": {
26
+ "debug": {
27
+ "$ref": "#/$defs/AirflowDebugRunFacet"
28
+ }
29
+ }
30
+ }
@@ -28,8 +28,9 @@ def get_provider_info():
28
28
  "name": "OpenLineage Airflow",
29
29
  "description": "`OpenLineage <https://openlineage.io/>`__\n",
30
30
  "state": "ready",
31
- "source-date-epoch": 1722664661,
31
+ "source-date-epoch": 1723970474,
32
32
  "versions": [
33
+ "1.11.0",
33
34
  "1.10.0",
34
35
  "1.9.1",
35
36
  "1.9.0",
@@ -50,8 +51,9 @@ def get_provider_info():
50
51
  "1.0.0",
51
52
  ],
52
53
  "dependencies": [
53
- "apache-airflow>=2.7.0",
54
+ "apache-airflow>=2.8.0",
54
55
  "apache-airflow-providers-common-sql>=1.6.0",
56
+ "apache-airflow-providers-common-compat>=1.2.0",
55
57
  "attrs>=22.2",
56
58
  "openlineage-integration-common>=1.16.0",
57
59
  "openlineage-python>=1.16.0",
@@ -158,6 +160,13 @@ def get_provider_info():
158
160
  "type": "boolean",
159
161
  "version_added": "1.10.0",
160
162
  },
163
+ "debug_mode": {
164
+ "description": "If true, OpenLineage events will include information useful for debugging - potentially\ncontaining large fields e.g. all installed packages and their versions.\n",
165
+ "default": "False",
166
+ "example": None,
167
+ "type": "boolean",
168
+ "version_added": "1.11.0",
169
+ },
161
170
  },
162
171
  }
163
172
  },
@@ -41,6 +41,7 @@ from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VE
41
41
  from airflow.providers.openlineage.utils.utils import (
42
42
  OpenLineageRedactor,
43
43
  get_airflow_dag_run_facet,
44
+ get_airflow_debug_facet,
44
45
  get_airflow_state_run_facet,
45
46
  )
46
47
  from airflow.stats import Stats
@@ -90,7 +91,7 @@ class OpenLineageAdapter(LoggingMixin):
90
91
  "OpenLineage configuration not found directly in Airflow. "
91
92
  "Looking for legacy environment configuration. "
92
93
  )
93
- self._client = OpenLineageClient.from_environment()
94
+ self._client = OpenLineageClient()
94
95
  return self._client
95
96
 
96
97
  def get_openlineage_config(self) -> dict | None:
@@ -361,7 +362,7 @@ class OpenLineageAdapter(LoggingMixin):
361
362
  job_name=dag_run.dag_id,
362
363
  nominal_start_time=nominal_start_time,
363
364
  nominal_end_time=nominal_end_time,
364
- run_facets=get_airflow_dag_run_facet(dag_run),
365
+ run_facets={**get_airflow_dag_run_facet(dag_run), **get_airflow_debug_facet()},
365
366
  ),
366
367
  inputs=[],
367
368
  outputs=[],
@@ -385,7 +386,7 @@ class OpenLineageAdapter(LoggingMixin):
385
386
  dag_id=dag_run.dag_id,
386
387
  execution_date=dag_run.execution_date,
387
388
  ),
388
- facets={**get_airflow_state_run_facet(dag_run)},
389
+ facets={**get_airflow_state_run_facet(dag_run), **get_airflow_debug_facet()},
389
390
  ),
390
391
  inputs=[],
391
392
  outputs=[],
@@ -414,6 +415,7 @@ class OpenLineageAdapter(LoggingMixin):
414
415
  message=msg, programmingLanguage="python"
415
416
  ),
416
417
  **get_airflow_state_run_facet(dag_run),
418
+ **get_airflow_debug_facet(),
417
419
  },
418
420
  ),
419
421
  inputs=[],
@@ -17,17 +17,10 @@
17
17
  from __future__ import annotations
18
18
 
19
19
  from attrs import define
20
- from deprecated import deprecated
21
20
  from openlineage.client.facet_v2 import JobFacet, RunFacet
22
21
  from openlineage.client.utils import RedactMixin
23
22
 
24
- from airflow.exceptions import AirflowProviderDeprecationWarning
25
23
 
26
-
27
- @deprecated(
28
- reason="To be removed in the next release. Make sure to use information from AirflowRunFacet instead.",
29
- category=AirflowProviderDeprecationWarning,
30
- )
31
24
  @define
32
25
  class AirflowMappedTaskRunFacet(RunFacet):
33
26
  """Run facet containing information about mapped tasks."""
@@ -108,6 +101,13 @@ class AirflowDagRunFacet(RunFacet):
108
101
  dagRun: dict
109
102
 
110
103
 
104
+ @define
105
+ class AirflowDebugRunFacet(RunFacet):
106
+ """Airflow Debug run facet."""
107
+
108
+ packages: dict
109
+
110
+
111
111
  @define
112
112
  class UnknownOperatorInstance(RedactMixin):
113
113
  """
@@ -123,10 +123,6 @@ class UnknownOperatorInstance(RedactMixin):
123
123
  _skip_redact = ["name", "type"]
124
124
 
125
125
 
126
- @deprecated(
127
- reason="To be removed in the next release. Make sure to use information from AirflowRunFacet instead.",
128
- category=AirflowProviderDeprecationWarning,
129
- )
130
126
  @define
131
127
  class UnknownOperatorAttributeRunFacet(RunFacet):
132
128
  """RunFacet that describes unknown operators in an Airflow DAG."""
@@ -32,6 +32,7 @@ from airflow.providers.openlineage.extractors import ExtractorManager
32
32
  from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter, RunState
33
33
  from airflow.providers.openlineage.utils.utils import (
34
34
  IS_AIRFLOW_2_10_OR_HIGHER,
35
+ get_airflow_debug_facet,
35
36
  get_airflow_job_facet,
36
37
  get_airflow_mapped_task_facet,
37
38
  get_airflow_run_facet,
@@ -122,6 +123,9 @@ class OpenLineageListener:
122
123
  )
123
124
  return
124
125
 
126
+ # Needs to be calculated outside of inner method so that it gets cached for usage in fork processes
127
+ debug_facet = get_airflow_debug_facet()
128
+
125
129
  @print_warning(self.log)
126
130
  def on_running():
127
131
  # that's a workaround to detect task running from deferred state
@@ -166,6 +170,7 @@ class OpenLineageListener:
166
170
  **get_user_provided_run_facets(task_instance, TaskInstanceState.RUNNING),
167
171
  **get_airflow_mapped_task_facet(task_instance),
168
172
  **get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
173
+ **debug_facet,
169
174
  },
170
175
  )
171
176
  Stats.gauge(
@@ -237,6 +242,7 @@ class OpenLineageListener:
237
242
  run_facets={
238
243
  **get_user_provided_run_facets(task_instance, TaskInstanceState.SUCCESS),
239
244
  **get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
245
+ **get_airflow_debug_facet(),
240
246
  },
241
247
  )
242
248
  Stats.gauge(
@@ -336,6 +342,7 @@ class OpenLineageListener:
336
342
  run_facets={
337
343
  **get_user_provided_run_facets(task_instance, TaskInstanceState.FAILED),
338
344
  **get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
345
+ **get_airflow_debug_facet(),
339
346
  },
340
347
  )
341
348
  Stats.gauge(
@@ -25,6 +25,7 @@ from airflow.providers.openlineage.plugins.macros import (
25
25
  lineage_parent_id,
26
26
  lineage_run_id,
27
27
  )
28
+ from airflow.providers.openlineage.utils.utils import IS_AIRFLOW_2_10_OR_HIGHER
28
29
 
29
30
 
30
31
  class OpenLineageProviderPlugin(AirflowPlugin):
@@ -39,6 +40,10 @@ class OpenLineageProviderPlugin(AirflowPlugin):
39
40
  if not conf.is_disabled():
40
41
  macros = [lineage_job_namespace, lineage_job_name, lineage_run_id, lineage_parent_id]
41
42
  listeners = [get_openlineage_listener()]
43
+ if IS_AIRFLOW_2_10_OR_HIGHER:
44
+ from airflow.lineage.hook import HookLineageReader
45
+
46
+ hook_lineage_readers = [HookLineageReader]
42
47
  else:
43
48
  macros = []
44
49
  listeners = []
@@ -20,10 +20,9 @@ from __future__ import annotations
20
20
  import datetime
21
21
  import json
22
22
  import logging
23
- import re
24
- from contextlib import redirect_stdout, suppress
23
+ from contextlib import suppress
25
24
  from functools import wraps
26
- from io import StringIO
25
+ from importlib import metadata
27
26
  from typing import TYPE_CHECKING, Any, Callable, Iterable
28
27
 
29
28
  import attrs
@@ -38,6 +37,7 @@ from airflow.models import DAG, BaseOperator, MappedOperator
38
37
  from airflow.providers.openlineage import conf
39
38
  from airflow.providers.openlineage.plugins.facets import (
40
39
  AirflowDagRunFacet,
40
+ AirflowDebugRunFacet,
41
41
  AirflowJobFacet,
42
42
  AirflowMappedTaskRunFacet,
43
43
  AirflowRunFacet,
@@ -85,6 +85,10 @@ def get_job_name(task: TaskInstance) -> str:
85
85
  def get_airflow_mapped_task_facet(task_instance: TaskInstance) -> dict[str, Any]:
86
86
  # check for -1 comes from SmartSensor compatibility with dynamic task mapping
87
87
  # this comes from Airflow code
88
+ log.debug(
89
+ "AirflowMappedTaskRunFacet is deprecated and will be removed. "
90
+ "Use information from AirflowRunFacet instead."
91
+ )
88
92
  if hasattr(task_instance, "map_index") and getattr(task_instance, "map_index") != -1:
89
93
  return {"airflow_mappedTask": AirflowMappedTaskRunFacet.from_task_instance(task_instance)}
90
94
  return {}
@@ -240,7 +244,7 @@ class InfoJsonEncodable(dict):
240
244
  class DagInfo(InfoJsonEncodable):
241
245
  """Defines encoding DAG object to JSON."""
242
246
 
243
- includes = ["dag_id", "description", "owner", "schedule_interval", "start_date", "tags"]
247
+ includes = ["dag_id", "description", "fileloc", "owner", "schedule_interval", "start_date", "tags"]
244
248
  casts = {"timetable": lambda dag: dag.timetable.serialize() if getattr(dag, "timetable", None) else None}
245
249
  renames = {"_dag_id": "dag_id"}
246
250
 
@@ -374,6 +378,28 @@ def get_airflow_dag_run_facet(dag_run: DagRun) -> dict[str, RunFacet]:
374
378
  }
375
379
 
376
380
 
381
+ @conf.cache
382
+ def _get_all_packages_installed() -> dict[str, str]:
383
+ """
384
+ Retrieve a dictionary of all installed packages and their versions.
385
+
386
+ This operation involves scanning the system's installed packages, which can be a heavy operation.
387
+ It is recommended to cache the result to avoid repeated, expensive lookups.
388
+ """
389
+ return {dist.metadata["Name"]: dist.version for dist in metadata.distributions()}
390
+
391
+
392
+ def get_airflow_debug_facet() -> dict[str, AirflowDebugRunFacet]:
393
+ if not conf.debug_mode():
394
+ return {}
395
+ log.warning("OpenLineage debug_mode is enabled. Be aware that this may log and emit extensive details.")
396
+ return {
397
+ "debug": AirflowDebugRunFacet(
398
+ packages=_get_all_packages_installed(),
399
+ )
400
+ }
401
+
402
+
377
403
  def get_airflow_run_facet(
378
404
  dag_run: DagRun,
379
405
  dag: DAG,
@@ -397,7 +423,7 @@ def get_airflow_job_facet(dag_run: DagRun) -> dict[str, AirflowJobFacet]:
397
423
  return {}
398
424
  return {
399
425
  "airflow": AirflowJobFacet(
400
- taskTree=_get_parsed_dag_tree(dag_run.dag),
426
+ taskTree={}, # caused OOM errors, to be removed, see #41587
401
427
  taskGroups=_get_task_groups_details(dag_run.dag),
402
428
  tasks=_get_tasks_details(dag_run.dag),
403
429
  )
@@ -413,75 +439,6 @@ def get_airflow_state_run_facet(dag_run: DagRun) -> dict[str, AirflowStateRunFac
413
439
  }
414
440
 
415
441
 
416
- def _safe_get_dag_tree_view(dag: DAG) -> list[str]:
417
- # get_tree_view() has been added in Airflow 2.8.2
418
- if hasattr(dag, "get_tree_view"):
419
- return dag.get_tree_view().splitlines()
420
-
421
- with redirect_stdout(StringIO()) as stdout:
422
- dag.tree_view()
423
- return stdout.getvalue().splitlines()
424
-
425
-
426
- def _get_parsed_dag_tree(dag: DAG) -> dict:
427
- """
428
- Get DAG's tasks hierarchy representation.
429
-
430
- While the task dependencies are defined as following:
431
- task >> [task_2, task_4] >> task_7
432
- task_3 >> task_5
433
- task_6 # has no dependencies, it's a root and a leaf
434
-
435
- The result of this function will look like:
436
- {
437
- "task": {
438
- "task_2": {
439
- "task_7": {}
440
- },
441
- "task_4": {
442
- "task_7": {}
443
- }
444
- },
445
- "task_3": {
446
- "task_5": {}
447
- },
448
- "task_6": {}
449
- }
450
- """
451
- lines = _safe_get_dag_tree_view(dag)
452
- task_dict: dict[str, dict] = {}
453
- parent_map: dict[int, tuple[str, dict]] = {}
454
-
455
- for line in lines:
456
- stripped_line = line.strip()
457
- if not stripped_line:
458
- continue
459
-
460
- # Determine the level by counting the leading spaces, assuming 4 spaces per level
461
- # as defined in airflow.models.dag.DAG._generate_tree_view()
462
- level = (len(line) - len(stripped_line)) // 4
463
- # airflow.models.baseoperator.BaseOperator.__repr__ or
464
- # airflow.models.mappedoperator.MappedOperator.__repr__ is used in DAG tree
465
- # <Task({op_class}): {task_id}> or <Mapped({op_class}): {task_id}>
466
- match = re.match(r"^<(?:Task|Mapped)\(.+\): (.+)>$", stripped_line)
467
- if not match:
468
- return {}
469
- current_task_id = match[1]
470
-
471
- if level == 0: # It's a root task
472
- task_dict[current_task_id] = {}
473
- parent_map[level] = (current_task_id, task_dict[current_task_id])
474
- else:
475
- # Find the immediate parent task
476
- parent_task, parent_dict = parent_map[(level - 1)]
477
- # Create new dict for the current task
478
- parent_dict[current_task_id] = {}
479
- # Update this task in the parent map
480
- parent_map[level] = (current_task_id, parent_dict[current_task_id])
481
-
482
- return task_dict
483
-
484
-
485
442
  def _get_tasks_details(dag: DAG) -> dict:
486
443
  tasks = {
487
444
  single_task.task_id: {
@@ -493,8 +450,9 @@ def _get_tasks_details(dag: DAG) -> dict:
493
450
  "ui_label": single_task.label,
494
451
  "is_setup": single_task.is_setup,
495
452
  "is_teardown": single_task.is_teardown,
453
+ "downstream_task_ids": sorted(single_task.downstream_task_ids),
496
454
  }
497
- for single_task in dag.tasks
455
+ for single_task in sorted(dag.tasks, key=lambda x: x.task_id)
498
456
  }
499
457
 
500
458
  return tasks
@@ -536,6 +494,10 @@ def _emits_ol_events(task: BaseOperator | MappedOperator) -> bool:
536
494
  def get_unknown_source_attribute_run_facet(task: BaseOperator, name: str | None = None):
537
495
  if not name:
538
496
  name = get_operator_class(task).__name__
497
+ log.debug(
498
+ "UnknownOperatorAttributeRunFacet is deprecated and will be removed. "
499
+ "Use information from AirflowRunFacet instead."
500
+ )
539
501
  return {
540
502
  "unknownSourceAttribute": attrs.asdict(
541
503
  UnknownOperatorAttributeRunFacet(
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
28
28
 
29
29
  [project]
30
30
  name = "apache-airflow-providers-openlineage"
31
- version = "1.10.0.rc1"
31
+ version = "1.11.0"
32
32
  description = "Provider package apache-airflow-providers-openlineage for Apache Airflow"
33
33
  readme = "README.rst"
34
34
  authors = [
@@ -56,16 +56,17 @@ classifiers = [
56
56
  ]
57
57
  requires-python = "~=3.8"
58
58
  dependencies = [
59
- "apache-airflow-providers-common-sql>=1.6.0rc0",
60
- "apache-airflow>=2.7.0rc0",
59
+ "apache-airflow-providers-common-compat>=1.2.0",
60
+ "apache-airflow-providers-common-sql>=1.6.0",
61
+ "apache-airflow>=2.8.0",
61
62
  "attrs>=22.2",
62
63
  "openlineage-integration-common>=1.16.0",
63
64
  "openlineage-python>=1.16.0",
64
65
  ]
65
66
 
66
67
  [project.urls]
67
- "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.10.0"
68
- "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.10.0/changelog.html"
68
+ "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0"
69
+ "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html"
69
70
  "Bug Tracker" = "https://github.com/apache/airflow/issues"
70
71
  "Source Code" = "https://github.com/apache/airflow"
71
72
  "Slack Chat" = "https://s.apache.org/airflow-slack"