apache-airflow-providers-openlineage 1.10.0rc1__tar.gz → 1.11.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/PKG-INFO +8 -8
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/README.rst +4 -4
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/__init__.py +3 -3
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/conf.py +6 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/extractors/base.py +1 -1
- apache_airflow_providers_openlineage-1.11.0rc1/airflow/providers/openlineage/facets/AirflowDebugRunFacet.json +30 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/get_provider_info.py +10 -2
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/plugins/adapter.py +5 -3
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/plugins/facets.py +7 -11
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/plugins/listener.py +7 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/utils/utils.py +43 -45
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/pyproject.toml +4 -4
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/LICENSE +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/extractors/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/extractors/bash.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/extractors/manager.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/extractors/python.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/facets/AirflowDagRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/facets/AirflowJobFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/facets/AirflowRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/facets/AirflowStateRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/facets/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/plugins/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/plugins/macros.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/plugins/openlineage.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/sqlparser.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/utils/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/utils/selective_enable.py +0 -0
- {apache_airflow_providers_openlineage-1.10.0rc1 → apache_airflow_providers_openlineage-1.11.0rc1}/airflow/providers/openlineage/utils/sql.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.11.0rc1
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -22,14 +22,14 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
22
22
|
Classifier: Programming Language :: Python :: 3.12
|
|
23
23
|
Classifier: Topic :: System :: Monitoring
|
|
24
24
|
Requires-Dist: apache-airflow-providers-common-sql>=1.6.0rc0
|
|
25
|
-
Requires-Dist: apache-airflow>=2.
|
|
25
|
+
Requires-Dist: apache-airflow>=2.8.0rc0
|
|
26
26
|
Requires-Dist: attrs>=22.2
|
|
27
27
|
Requires-Dist: openlineage-integration-common>=1.16.0
|
|
28
28
|
Requires-Dist: openlineage-python>=1.16.0
|
|
29
29
|
Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
|
|
30
30
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
31
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
32
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
31
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html
|
|
32
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0
|
|
33
33
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
34
34
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
35
35
|
Project-URL: Twitter, https://twitter.com/ApacheAirflow
|
|
@@ -80,7 +80,7 @@ Provides-Extra: common.sql
|
|
|
80
80
|
|
|
81
81
|
Package ``apache-airflow-providers-openlineage``
|
|
82
82
|
|
|
83
|
-
Release: ``1.
|
|
83
|
+
Release: ``1.11.0.rc1``
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -93,7 +93,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
93
93
|
are in ``airflow.providers.openlineage`` python package.
|
|
94
94
|
|
|
95
95
|
You can find package information and changelog for the provider
|
|
96
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
96
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/>`_.
|
|
97
97
|
|
|
98
98
|
Installation
|
|
99
99
|
------------
|
|
@@ -110,7 +110,7 @@ Requirements
|
|
|
110
110
|
======================================= ==================
|
|
111
111
|
PIP package Version required
|
|
112
112
|
======================================= ==================
|
|
113
|
-
``apache-airflow`` ``>=2.
|
|
113
|
+
``apache-airflow`` ``>=2.8.0``
|
|
114
114
|
``apache-airflow-providers-common-sql`` ``>=1.6.0``
|
|
115
115
|
``attrs`` ``>=22.2``
|
|
116
116
|
``openlineage-integration-common`` ``>=1.16.0``
|
|
@@ -137,4 +137,4 @@ Dependent package
|
|
|
137
137
|
============================================================================================================ ==============
|
|
138
138
|
|
|
139
139
|
The changelog for the provider package can be found in the
|
|
140
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
140
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html>`_.
|
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
|
|
43
43
|
Package ``apache-airflow-providers-openlineage``
|
|
44
44
|
|
|
45
|
-
Release: ``1.
|
|
45
|
+
Release: ``1.11.0.rc1``
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -55,7 +55,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
55
55
|
are in ``airflow.providers.openlineage`` python package.
|
|
56
56
|
|
|
57
57
|
You can find package information and changelog for the provider
|
|
58
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
58
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/>`_.
|
|
59
59
|
|
|
60
60
|
Installation
|
|
61
61
|
------------
|
|
@@ -72,7 +72,7 @@ Requirements
|
|
|
72
72
|
======================================= ==================
|
|
73
73
|
PIP package Version required
|
|
74
74
|
======================================= ==================
|
|
75
|
-
``apache-airflow`` ``>=2.
|
|
75
|
+
``apache-airflow`` ``>=2.8.0``
|
|
76
76
|
``apache-airflow-providers-common-sql`` ``>=1.6.0``
|
|
77
77
|
``attrs`` ``>=22.2``
|
|
78
78
|
``openlineage-integration-common`` ``>=1.16.0``
|
|
@@ -99,4 +99,4 @@ Dependent package
|
|
|
99
99
|
============================================================================================================ ==============
|
|
100
100
|
|
|
101
101
|
The changelog for the provider package can be found in the
|
|
102
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
102
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html>`_.
|
|
@@ -29,11 +29,11 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "1.
|
|
32
|
+
__version__ = "1.11.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
|
-
"2.
|
|
35
|
+
"2.8.0"
|
|
36
36
|
):
|
|
37
37
|
raise RuntimeError(
|
|
38
|
-
f"The package `apache-airflow-providers-openlineage:{__version__}` needs Apache Airflow 2.
|
|
38
|
+
f"The package `apache-airflow-providers-openlineage:{__version__}` needs Apache Airflow 2.8.0+"
|
|
39
39
|
)
|
|
@@ -145,3 +145,9 @@ def execution_timeout() -> int:
|
|
|
145
145
|
def include_full_task_info() -> bool:
|
|
146
146
|
"""[openlineage] include_full_task_info."""
|
|
147
147
|
return conf.getboolean(_CONFIG_SECTION, "include_full_task_info", fallback="False")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@cache
|
|
151
|
+
def debug_mode() -> bool:
|
|
152
|
+
"""[openlineage] debug_mode."""
|
|
153
|
+
return conf.getboolean(_CONFIG_SECTION, "debug_mode", fallback="False")
|
|
@@ -113,7 +113,7 @@ class DefaultExtractor(BaseExtractor):
|
|
|
113
113
|
"Operator %s does not have the get_openlineage_facets_on_start method.",
|
|
114
114
|
self.operator.task_type,
|
|
115
115
|
)
|
|
116
|
-
return
|
|
116
|
+
return OperatorLineage()
|
|
117
117
|
|
|
118
118
|
def extract_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
119
119
|
failed_states = [TaskInstanceState.FAILED, TaskInstanceState.UP_FOR_RETRY]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$defs": {
|
|
4
|
+
"AirflowDebugRunFacet": {
|
|
5
|
+
"allOf": [
|
|
6
|
+
{
|
|
7
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"packages": {
|
|
13
|
+
"description": "The names and versions of all installed Python packages.",
|
|
14
|
+
"type": "object",
|
|
15
|
+
"additionalProperties": true
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"required": ["packages"]
|
|
19
|
+
}
|
|
20
|
+
],
|
|
21
|
+
"type": "object"
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"type": "object",
|
|
25
|
+
"properties": {
|
|
26
|
+
"debug": {
|
|
27
|
+
"$ref": "#/$defs/AirflowDebugRunFacet"
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -28,8 +28,9 @@ def get_provider_info():
|
|
|
28
28
|
"name": "OpenLineage Airflow",
|
|
29
29
|
"description": "`OpenLineage <https://openlineage.io/>`__\n",
|
|
30
30
|
"state": "ready",
|
|
31
|
-
"source-date-epoch":
|
|
31
|
+
"source-date-epoch": 1723970474,
|
|
32
32
|
"versions": [
|
|
33
|
+
"1.11.0",
|
|
33
34
|
"1.10.0",
|
|
34
35
|
"1.9.1",
|
|
35
36
|
"1.9.0",
|
|
@@ -50,7 +51,7 @@ def get_provider_info():
|
|
|
50
51
|
"1.0.0",
|
|
51
52
|
],
|
|
52
53
|
"dependencies": [
|
|
53
|
-
"apache-airflow>=2.
|
|
54
|
+
"apache-airflow>=2.8.0",
|
|
54
55
|
"apache-airflow-providers-common-sql>=1.6.0",
|
|
55
56
|
"attrs>=22.2",
|
|
56
57
|
"openlineage-integration-common>=1.16.0",
|
|
@@ -158,6 +159,13 @@ def get_provider_info():
|
|
|
158
159
|
"type": "boolean",
|
|
159
160
|
"version_added": "1.10.0",
|
|
160
161
|
},
|
|
162
|
+
"debug_mode": {
|
|
163
|
+
"description": "If true, OpenLineage events will include information useful for debugging - potentially\ncontaining large fields e.g. all installed packages and their versions.\n",
|
|
164
|
+
"default": "False",
|
|
165
|
+
"example": None,
|
|
166
|
+
"type": "boolean",
|
|
167
|
+
"version_added": "1.11.0",
|
|
168
|
+
},
|
|
161
169
|
},
|
|
162
170
|
}
|
|
163
171
|
},
|
|
@@ -41,6 +41,7 @@ from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VE
|
|
|
41
41
|
from airflow.providers.openlineage.utils.utils import (
|
|
42
42
|
OpenLineageRedactor,
|
|
43
43
|
get_airflow_dag_run_facet,
|
|
44
|
+
get_airflow_debug_facet,
|
|
44
45
|
get_airflow_state_run_facet,
|
|
45
46
|
)
|
|
46
47
|
from airflow.stats import Stats
|
|
@@ -90,7 +91,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
90
91
|
"OpenLineage configuration not found directly in Airflow. "
|
|
91
92
|
"Looking for legacy environment configuration. "
|
|
92
93
|
)
|
|
93
|
-
self._client = OpenLineageClient
|
|
94
|
+
self._client = OpenLineageClient()
|
|
94
95
|
return self._client
|
|
95
96
|
|
|
96
97
|
def get_openlineage_config(self) -> dict | None:
|
|
@@ -361,7 +362,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
361
362
|
job_name=dag_run.dag_id,
|
|
362
363
|
nominal_start_time=nominal_start_time,
|
|
363
364
|
nominal_end_time=nominal_end_time,
|
|
364
|
-
run_facets=get_airflow_dag_run_facet(dag_run),
|
|
365
|
+
run_facets={**get_airflow_dag_run_facet(dag_run), **get_airflow_debug_facet()},
|
|
365
366
|
),
|
|
366
367
|
inputs=[],
|
|
367
368
|
outputs=[],
|
|
@@ -385,7 +386,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
385
386
|
dag_id=dag_run.dag_id,
|
|
386
387
|
execution_date=dag_run.execution_date,
|
|
387
388
|
),
|
|
388
|
-
facets={**get_airflow_state_run_facet(dag_run)},
|
|
389
|
+
facets={**get_airflow_state_run_facet(dag_run), **get_airflow_debug_facet()},
|
|
389
390
|
),
|
|
390
391
|
inputs=[],
|
|
391
392
|
outputs=[],
|
|
@@ -414,6 +415,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
414
415
|
message=msg, programmingLanguage="python"
|
|
415
416
|
),
|
|
416
417
|
**get_airflow_state_run_facet(dag_run),
|
|
418
|
+
**get_airflow_debug_facet(),
|
|
417
419
|
},
|
|
418
420
|
),
|
|
419
421
|
inputs=[],
|
|
@@ -17,17 +17,10 @@
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
19
|
from attrs import define
|
|
20
|
-
from deprecated import deprecated
|
|
21
20
|
from openlineage.client.facet_v2 import JobFacet, RunFacet
|
|
22
21
|
from openlineage.client.utils import RedactMixin
|
|
23
22
|
|
|
24
|
-
from airflow.exceptions import AirflowProviderDeprecationWarning
|
|
25
23
|
|
|
26
|
-
|
|
27
|
-
@deprecated(
|
|
28
|
-
reason="To be removed in the next release. Make sure to use information from AirflowRunFacet instead.",
|
|
29
|
-
category=AirflowProviderDeprecationWarning,
|
|
30
|
-
)
|
|
31
24
|
@define
|
|
32
25
|
class AirflowMappedTaskRunFacet(RunFacet):
|
|
33
26
|
"""Run facet containing information about mapped tasks."""
|
|
@@ -108,6 +101,13 @@ class AirflowDagRunFacet(RunFacet):
|
|
|
108
101
|
dagRun: dict
|
|
109
102
|
|
|
110
103
|
|
|
104
|
+
@define
|
|
105
|
+
class AirflowDebugRunFacet(RunFacet):
|
|
106
|
+
"""Airflow Debug run facet."""
|
|
107
|
+
|
|
108
|
+
packages: dict
|
|
109
|
+
|
|
110
|
+
|
|
111
111
|
@define
|
|
112
112
|
class UnknownOperatorInstance(RedactMixin):
|
|
113
113
|
"""
|
|
@@ -123,10 +123,6 @@ class UnknownOperatorInstance(RedactMixin):
|
|
|
123
123
|
_skip_redact = ["name", "type"]
|
|
124
124
|
|
|
125
125
|
|
|
126
|
-
@deprecated(
|
|
127
|
-
reason="To be removed in the next release. Make sure to use information from AirflowRunFacet instead.",
|
|
128
|
-
category=AirflowProviderDeprecationWarning,
|
|
129
|
-
)
|
|
130
126
|
@define
|
|
131
127
|
class UnknownOperatorAttributeRunFacet(RunFacet):
|
|
132
128
|
"""RunFacet that describes unknown operators in an Airflow DAG."""
|
|
@@ -32,6 +32,7 @@ from airflow.providers.openlineage.extractors import ExtractorManager
|
|
|
32
32
|
from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter, RunState
|
|
33
33
|
from airflow.providers.openlineage.utils.utils import (
|
|
34
34
|
IS_AIRFLOW_2_10_OR_HIGHER,
|
|
35
|
+
get_airflow_debug_facet,
|
|
35
36
|
get_airflow_job_facet,
|
|
36
37
|
get_airflow_mapped_task_facet,
|
|
37
38
|
get_airflow_run_facet,
|
|
@@ -122,6 +123,9 @@ class OpenLineageListener:
|
|
|
122
123
|
)
|
|
123
124
|
return
|
|
124
125
|
|
|
126
|
+
# Needs to be calculated outside of inner method so that it gets cached for usage in fork processes
|
|
127
|
+
debug_facet = get_airflow_debug_facet()
|
|
128
|
+
|
|
125
129
|
@print_warning(self.log)
|
|
126
130
|
def on_running():
|
|
127
131
|
# that's a workaround to detect task running from deferred state
|
|
@@ -166,6 +170,7 @@ class OpenLineageListener:
|
|
|
166
170
|
**get_user_provided_run_facets(task_instance, TaskInstanceState.RUNNING),
|
|
167
171
|
**get_airflow_mapped_task_facet(task_instance),
|
|
168
172
|
**get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
|
|
173
|
+
**debug_facet,
|
|
169
174
|
},
|
|
170
175
|
)
|
|
171
176
|
Stats.gauge(
|
|
@@ -237,6 +242,7 @@ class OpenLineageListener:
|
|
|
237
242
|
run_facets={
|
|
238
243
|
**get_user_provided_run_facets(task_instance, TaskInstanceState.SUCCESS),
|
|
239
244
|
**get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
|
|
245
|
+
**get_airflow_debug_facet(),
|
|
240
246
|
},
|
|
241
247
|
)
|
|
242
248
|
Stats.gauge(
|
|
@@ -336,6 +342,7 @@ class OpenLineageListener:
|
|
|
336
342
|
run_facets={
|
|
337
343
|
**get_user_provided_run_facets(task_instance, TaskInstanceState.FAILED),
|
|
338
344
|
**get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
|
|
345
|
+
**get_airflow_debug_facet(),
|
|
339
346
|
},
|
|
340
347
|
)
|
|
341
348
|
Stats.gauge(
|
|
@@ -20,10 +20,9 @@ from __future__ import annotations
|
|
|
20
20
|
import datetime
|
|
21
21
|
import json
|
|
22
22
|
import logging
|
|
23
|
-
import
|
|
24
|
-
from contextlib import redirect_stdout, suppress
|
|
23
|
+
from contextlib import suppress
|
|
25
24
|
from functools import wraps
|
|
26
|
-
from
|
|
25
|
+
from importlib import metadata
|
|
27
26
|
from typing import TYPE_CHECKING, Any, Callable, Iterable
|
|
28
27
|
|
|
29
28
|
import attrs
|
|
@@ -34,10 +33,11 @@ from packaging.version import Version
|
|
|
34
33
|
from airflow import __version__ as AIRFLOW_VERSION
|
|
35
34
|
from airflow.datasets import Dataset
|
|
36
35
|
from airflow.exceptions import AirflowProviderDeprecationWarning # TODO: move this maybe to Airflow's logic?
|
|
37
|
-
from airflow.models import DAG, BaseOperator, MappedOperator
|
|
36
|
+
from airflow.models import DAG, BaseOperator, MappedOperator, Operator
|
|
38
37
|
from airflow.providers.openlineage import conf
|
|
39
38
|
from airflow.providers.openlineage.plugins.facets import (
|
|
40
39
|
AirflowDagRunFacet,
|
|
40
|
+
AirflowDebugRunFacet,
|
|
41
41
|
AirflowJobFacet,
|
|
42
42
|
AirflowMappedTaskRunFacet,
|
|
43
43
|
AirflowRunFacet,
|
|
@@ -85,6 +85,10 @@ def get_job_name(task: TaskInstance) -> str:
|
|
|
85
85
|
def get_airflow_mapped_task_facet(task_instance: TaskInstance) -> dict[str, Any]:
|
|
86
86
|
# check for -1 comes from SmartSensor compatibility with dynamic task mapping
|
|
87
87
|
# this comes from Airflow code
|
|
88
|
+
log.debug(
|
|
89
|
+
"AirflowMappedTaskRunFacet is deprecated and will be removed. "
|
|
90
|
+
"Use information from AirflowRunFacet instead."
|
|
91
|
+
)
|
|
88
92
|
if hasattr(task_instance, "map_index") and getattr(task_instance, "map_index") != -1:
|
|
89
93
|
return {"airflow_mappedTask": AirflowMappedTaskRunFacet.from_task_instance(task_instance)}
|
|
90
94
|
return {}
|
|
@@ -240,7 +244,7 @@ class InfoJsonEncodable(dict):
|
|
|
240
244
|
class DagInfo(InfoJsonEncodable):
|
|
241
245
|
"""Defines encoding DAG object to JSON."""
|
|
242
246
|
|
|
243
|
-
includes = ["dag_id", "description", "owner", "schedule_interval", "start_date", "tags"]
|
|
247
|
+
includes = ["dag_id", "description", "fileloc", "owner", "schedule_interval", "start_date", "tags"]
|
|
244
248
|
casts = {"timetable": lambda dag: dag.timetable.serialize() if getattr(dag, "timetable", None) else None}
|
|
245
249
|
renames = {"_dag_id": "dag_id"}
|
|
246
250
|
|
|
@@ -374,6 +378,28 @@ def get_airflow_dag_run_facet(dag_run: DagRun) -> dict[str, RunFacet]:
|
|
|
374
378
|
}
|
|
375
379
|
|
|
376
380
|
|
|
381
|
+
@conf.cache
|
|
382
|
+
def _get_all_packages_installed() -> dict[str, str]:
|
|
383
|
+
"""
|
|
384
|
+
Retrieve a dictionary of all installed packages and their versions.
|
|
385
|
+
|
|
386
|
+
This operation involves scanning the system's installed packages, which can be a heavy operation.
|
|
387
|
+
It is recommended to cache the result to avoid repeated, expensive lookups.
|
|
388
|
+
"""
|
|
389
|
+
return {dist.metadata["Name"]: dist.version for dist in metadata.distributions()}
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def get_airflow_debug_facet() -> dict[str, AirflowDebugRunFacet]:
|
|
393
|
+
if not conf.debug_mode():
|
|
394
|
+
return {}
|
|
395
|
+
log.warning("OpenLineage debug_mode is enabled. Be aware that this may log and emit extensive details.")
|
|
396
|
+
return {
|
|
397
|
+
"debug": AirflowDebugRunFacet(
|
|
398
|
+
packages=_get_all_packages_installed(),
|
|
399
|
+
)
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
|
|
377
403
|
def get_airflow_run_facet(
|
|
378
404
|
dag_run: DagRun,
|
|
379
405
|
dag: DAG,
|
|
@@ -413,16 +439,6 @@ def get_airflow_state_run_facet(dag_run: DagRun) -> dict[str, AirflowStateRunFac
|
|
|
413
439
|
}
|
|
414
440
|
|
|
415
441
|
|
|
416
|
-
def _safe_get_dag_tree_view(dag: DAG) -> list[str]:
|
|
417
|
-
# get_tree_view() has been added in Airflow 2.8.2
|
|
418
|
-
if hasattr(dag, "get_tree_view"):
|
|
419
|
-
return dag.get_tree_view().splitlines()
|
|
420
|
-
|
|
421
|
-
with redirect_stdout(StringIO()) as stdout:
|
|
422
|
-
dag.tree_view()
|
|
423
|
-
return stdout.getvalue().splitlines()
|
|
424
|
-
|
|
425
|
-
|
|
426
442
|
def _get_parsed_dag_tree(dag: DAG) -> dict:
|
|
427
443
|
"""
|
|
428
444
|
Get DAG's tasks hierarchy representation.
|
|
@@ -448,37 +464,15 @@ def _get_parsed_dag_tree(dag: DAG) -> dict:
|
|
|
448
464
|
"task_6": {}
|
|
449
465
|
}
|
|
450
466
|
"""
|
|
451
|
-
lines = _safe_get_dag_tree_view(dag)
|
|
452
|
-
task_dict: dict[str, dict] = {}
|
|
453
|
-
parent_map: dict[int, tuple[str, dict]] = {}
|
|
454
|
-
|
|
455
|
-
for line in lines:
|
|
456
|
-
stripped_line = line.strip()
|
|
457
|
-
if not stripped_line:
|
|
458
|
-
continue
|
|
459
|
-
|
|
460
|
-
# Determine the level by counting the leading spaces, assuming 4 spaces per level
|
|
461
|
-
# as defined in airflow.models.dag.DAG._generate_tree_view()
|
|
462
|
-
level = (len(line) - len(stripped_line)) // 4
|
|
463
|
-
# airflow.models.baseoperator.BaseOperator.__repr__ or
|
|
464
|
-
# airflow.models.mappedoperator.MappedOperator.__repr__ is used in DAG tree
|
|
465
|
-
# <Task({op_class}): {task_id}> or <Mapped({op_class}): {task_id}>
|
|
466
|
-
match = re.match(r"^<(?:Task|Mapped)\(.+\): (.+)>$", stripped_line)
|
|
467
|
-
if not match:
|
|
468
|
-
return {}
|
|
469
|
-
current_task_id = match[1]
|
|
470
|
-
|
|
471
|
-
if level == 0: # It's a root task
|
|
472
|
-
task_dict[current_task_id] = {}
|
|
473
|
-
parent_map[level] = (current_task_id, task_dict[current_task_id])
|
|
474
|
-
else:
|
|
475
|
-
# Find the immediate parent task
|
|
476
|
-
parent_task, parent_dict = parent_map[(level - 1)]
|
|
477
|
-
# Create new dict for the current task
|
|
478
|
-
parent_dict[current_task_id] = {}
|
|
479
|
-
# Update this task in the parent map
|
|
480
|
-
parent_map[level] = (current_task_id, parent_dict[current_task_id])
|
|
481
467
|
|
|
468
|
+
def get_downstream(task: Operator, current_dict: dict):
|
|
469
|
+
current_dict[task.task_id] = {}
|
|
470
|
+
for tmp_task in sorted(task.downstream_list, key=lambda x: x.task_id):
|
|
471
|
+
get_downstream(tmp_task, current_dict[task.task_id])
|
|
472
|
+
|
|
473
|
+
task_dict: dict = {}
|
|
474
|
+
for t in sorted(dag.roots, key=lambda x: x.task_id):
|
|
475
|
+
get_downstream(t, task_dict)
|
|
482
476
|
return task_dict
|
|
483
477
|
|
|
484
478
|
|
|
@@ -536,6 +530,10 @@ def _emits_ol_events(task: BaseOperator | MappedOperator) -> bool:
|
|
|
536
530
|
def get_unknown_source_attribute_run_facet(task: BaseOperator, name: str | None = None):
|
|
537
531
|
if not name:
|
|
538
532
|
name = get_operator_class(task).__name__
|
|
533
|
+
log.debug(
|
|
534
|
+
"UnknownOperatorAttributeRunFacet is deprecated and will be removed. "
|
|
535
|
+
"Use information from AirflowRunFacet instead."
|
|
536
|
+
)
|
|
539
537
|
return {
|
|
540
538
|
"unknownSourceAttribute": attrs.asdict(
|
|
541
539
|
UnknownOperatorAttributeRunFacet(
|
|
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
|
|
|
28
28
|
|
|
29
29
|
[project]
|
|
30
30
|
name = "apache-airflow-providers-openlineage"
|
|
31
|
-
version = "1.
|
|
31
|
+
version = "1.11.0.rc1"
|
|
32
32
|
description = "Provider package apache-airflow-providers-openlineage for Apache Airflow"
|
|
33
33
|
readme = "README.rst"
|
|
34
34
|
authors = [
|
|
@@ -57,15 +57,15 @@ classifiers = [
|
|
|
57
57
|
requires-python = "~=3.8"
|
|
58
58
|
dependencies = [
|
|
59
59
|
"apache-airflow-providers-common-sql>=1.6.0rc0",
|
|
60
|
-
"apache-airflow>=2.
|
|
60
|
+
"apache-airflow>=2.8.0rc0",
|
|
61
61
|
"attrs>=22.2",
|
|
62
62
|
"openlineage-integration-common>=1.16.0",
|
|
63
63
|
"openlineage-python>=1.16.0",
|
|
64
64
|
]
|
|
65
65
|
|
|
66
66
|
[project.urls]
|
|
67
|
-
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
68
|
-
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.
|
|
67
|
+
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0"
|
|
68
|
+
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.11.0/changelog.html"
|
|
69
69
|
"Bug Tracker" = "https://github.com/apache/airflow/issues"
|
|
70
70
|
"Source Code" = "https://github.com/apache/airflow"
|
|
71
71
|
"Slack Chat" = "https://s.apache.org/airflow-slack"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|