apache-airflow-providers-openlineage 1.8.0__py3-none-any.whl → 1.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.
- airflow/providers/openlineage/LICENSE +4 -4
- airflow/providers/openlineage/__init__.py +1 -1
- airflow/providers/openlineage/conf.py +16 -1
- airflow/providers/openlineage/extractors/base.py +6 -3
- airflow/providers/openlineage/facets/AirflowJobFacet.json +40 -0
- airflow/providers/openlineage/facets/AirflowRunFacet.json +261 -0
- airflow/providers/openlineage/facets/AirflowStateRunFacet.json +34 -0
- airflow/providers/openlineage/facets/__init__.py +16 -0
- airflow/providers/openlineage/get_provider_info.py +11 -3
- airflow/providers/openlineage/plugins/adapter.py +69 -15
- airflow/providers/openlineage/plugins/facets.py +46 -4
- airflow/providers/openlineage/plugins/listener.py +128 -33
- airflow/providers/openlineage/plugins/macros.py +1 -1
- airflow/providers/openlineage/sqlparser.py +16 -6
- airflow/providers/openlineage/utils/selective_enable.py +6 -3
- airflow/providers/openlineage/utils/sql.py +11 -3
- airflow/providers/openlineage/utils/utils.py +191 -22
- {apache_airflow_providers_openlineage-1.8.0.dist-info → apache_airflow_providers_openlineage-1.9.0.dist-info}/METADATA +10 -10
- apache_airflow_providers_openlineage-1.9.0.dist-info/RECORD +28 -0
- apache_airflow_providers_openlineage-1.8.0.dist-info/RECORD +0 -24
- {apache_airflow_providers_openlineage-1.8.0.dist-info → apache_airflow_providers_openlineage-1.9.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_openlineage-1.8.0.dist-info → apache_airflow_providers_openlineage-1.9.0.dist-info}/entry_points.txt +0 -0
|
@@ -215,7 +215,7 @@ Third party Apache 2.0 licenses
|
|
|
215
215
|
|
|
216
216
|
The following components are provided under the Apache 2.0 License.
|
|
217
217
|
See project link for details. The text of each license is also included
|
|
218
|
-
at licenses/LICENSE-[project].txt.
|
|
218
|
+
at 3rd-party-licenses/LICENSE-[project].txt.
|
|
219
219
|
|
|
220
220
|
(ALv2 License) hue v4.3.0 (https://github.com/cloudera/hue/)
|
|
221
221
|
(ALv2 License) jqclock v2.3.0 (https://github.com/JohnRDOrazio/jQuery-Clock-Plugin)
|
|
@@ -227,7 +227,7 @@ MIT licenses
|
|
|
227
227
|
========================================================================
|
|
228
228
|
|
|
229
229
|
The following components are provided under the MIT License. See project link for details.
|
|
230
|
-
The text of each license is also included at licenses/LICENSE-[project].txt.
|
|
230
|
+
The text of each license is also included at 3rd-party-licenses/LICENSE-[project].txt.
|
|
231
231
|
|
|
232
232
|
(MIT License) jquery v3.5.1 (https://jquery.org/license/)
|
|
233
233
|
(MIT License) dagre-d3 v0.6.4 (https://github.com/cpettitt/dagre-d3)
|
|
@@ -243,11 +243,11 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
|
|
|
243
243
|
BSD 3-Clause licenses
|
|
244
244
|
========================================================================
|
|
245
245
|
The following components are provided under the BSD 3-Clause license. See project links for details.
|
|
246
|
-
The text of each license is also included at licenses/LICENSE-[project].txt.
|
|
246
|
+
The text of each license is also included at 3rd-party-licenses/LICENSE-[project].txt.
|
|
247
247
|
|
|
248
248
|
(BSD 3 License) d3 v5.16.0 (https://d3js.org)
|
|
249
249
|
(BSD 3 License) d3-shape v2.1.0 (https://github.com/d3/d3-shape)
|
|
250
250
|
(BSD 3 License) cgroupspy 0.2.1 (https://github.com/cloudsigma/cgroupspy)
|
|
251
251
|
|
|
252
252
|
========================================================================
|
|
253
|
-
See licenses/LICENSES-ui.txt for packages used in `/airflow/www`
|
|
253
|
+
See 3rd-party-licenses/LICENSES-ui.txt for packages used in `/airflow/www`
|
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "1.
|
|
32
|
+
__version__ = "1.9.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.7.0"
|
|
@@ -33,7 +33,15 @@ from __future__ import annotations
|
|
|
33
33
|
import os
|
|
34
34
|
from typing import Any
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
# Disable caching if we're inside tests - this makes config easier to mock.
|
|
37
|
+
if os.getenv("PYTEST_VERSION"):
|
|
38
|
+
|
|
39
|
+
def decorator(func):
|
|
40
|
+
return func
|
|
41
|
+
|
|
42
|
+
cache = decorator
|
|
43
|
+
else:
|
|
44
|
+
from airflow.compat.functools import cache
|
|
37
45
|
from airflow.configuration import conf
|
|
38
46
|
|
|
39
47
|
_CONFIG_SECTION = "openlineage"
|
|
@@ -130,3 +138,10 @@ def dag_state_change_process_pool_size() -> int:
|
|
|
130
138
|
"""[openlineage] dag_state_change_process_pool_size."""
|
|
131
139
|
option = conf.get(_CONFIG_SECTION, "dag_state_change_process_pool_size", fallback="")
|
|
132
140
|
return _safe_int_convert(str(option).strip(), default=1)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@cache
|
|
144
|
+
def execution_timeout() -> int:
|
|
145
|
+
"""[openlineage] execution_timeout."""
|
|
146
|
+
option = conf.get(_CONFIG_SECTION, "execution_timeout", fallback="")
|
|
147
|
+
return _safe_int_convert(str(option).strip(), default=10)
|
|
@@ -41,7 +41,8 @@ class OperatorLineage:
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
class BaseExtractor(ABC, LoggingMixin):
|
|
44
|
-
"""
|
|
44
|
+
"""
|
|
45
|
+
Abstract base extractor class.
|
|
45
46
|
|
|
46
47
|
This is used mostly to maintain support for custom extractors.
|
|
47
48
|
"""
|
|
@@ -55,7 +56,8 @@ class BaseExtractor(ABC, LoggingMixin):
|
|
|
55
56
|
@classmethod
|
|
56
57
|
@abstractmethod
|
|
57
58
|
def get_operator_classnames(cls) -> list[str]:
|
|
58
|
-
"""
|
|
59
|
+
"""
|
|
60
|
+
Get a list of operators that extractor works for.
|
|
59
61
|
|
|
60
62
|
This is an abstract method that subclasses should implement. There are
|
|
61
63
|
operators that work very similarly and one extractor can cover.
|
|
@@ -77,7 +79,8 @@ class DefaultExtractor(BaseExtractor):
|
|
|
77
79
|
|
|
78
80
|
@classmethod
|
|
79
81
|
def get_operator_classnames(cls) -> list[str]:
|
|
80
|
-
"""
|
|
82
|
+
"""
|
|
83
|
+
Assign this extractor to *no* operators.
|
|
81
84
|
|
|
82
85
|
Default extractor is chosen not on the classname basis, but
|
|
83
86
|
by existence of get_openlineage_facets method on operator.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$defs": {
|
|
4
|
+
"AirflowJobFacet": {
|
|
5
|
+
"allOf": [
|
|
6
|
+
{
|
|
7
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"taskTree": {
|
|
13
|
+
"description": "The hierarchical structure of tasks in the DAG.",
|
|
14
|
+
"type": "object",
|
|
15
|
+
"additionalProperties": true
|
|
16
|
+
},
|
|
17
|
+
"taskGroups": {
|
|
18
|
+
"description": "Information about all task groups within the DAG.",
|
|
19
|
+
"type": "object",
|
|
20
|
+
"additionalProperties": true
|
|
21
|
+
},
|
|
22
|
+
"tasks": {
|
|
23
|
+
"description": "Details of all individual tasks within the DAG.",
|
|
24
|
+
"type": "object",
|
|
25
|
+
"additionalProperties": true
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"required": ["taskTree", "taskGroups", "tasks"]
|
|
29
|
+
}
|
|
30
|
+
],
|
|
31
|
+
"type": "object"
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
"type": "object",
|
|
35
|
+
"properties": {
|
|
36
|
+
"airflow": {
|
|
37
|
+
"$ref": "#/$defs/AirflowJobFacet"
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$defs": {
|
|
4
|
+
"AirflowRunFacet": {
|
|
5
|
+
"allOf": [
|
|
6
|
+
{
|
|
7
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"dag": {
|
|
13
|
+
"$ref": "#/$defs/DAG"
|
|
14
|
+
},
|
|
15
|
+
"dagRun": {
|
|
16
|
+
"$ref": "#/$defs/DagRun"
|
|
17
|
+
},
|
|
18
|
+
"taskInstance": {
|
|
19
|
+
"$ref": "#/$defs/TaskInstance"
|
|
20
|
+
},
|
|
21
|
+
"task": {
|
|
22
|
+
"$ref": "#/$defs/Task"
|
|
23
|
+
},
|
|
24
|
+
"taskUuid": {
|
|
25
|
+
"type": "string"
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"required": [
|
|
29
|
+
"dag",
|
|
30
|
+
"dagRun",
|
|
31
|
+
"taskInstance",
|
|
32
|
+
"task",
|
|
33
|
+
"taskUuid"
|
|
34
|
+
]
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
"Task": {
|
|
39
|
+
"type": "object",
|
|
40
|
+
"properties": {
|
|
41
|
+
"depends_on_past": {
|
|
42
|
+
"type": "boolean"
|
|
43
|
+
},
|
|
44
|
+
"downstream_task_ids": {
|
|
45
|
+
"type": "string"
|
|
46
|
+
},
|
|
47
|
+
"execution_timeout": {
|
|
48
|
+
"type": "string"
|
|
49
|
+
},
|
|
50
|
+
"executor_config": {
|
|
51
|
+
"type": "object",
|
|
52
|
+
"additionalProperties": true
|
|
53
|
+
},
|
|
54
|
+
"ignore_first_depends_on_past": {
|
|
55
|
+
"type": "boolean"
|
|
56
|
+
},
|
|
57
|
+
"is_setup": {
|
|
58
|
+
"type": "boolean"
|
|
59
|
+
},
|
|
60
|
+
"is_teardown": {
|
|
61
|
+
"type": "boolean"
|
|
62
|
+
},
|
|
63
|
+
"mapped": {
|
|
64
|
+
"type": "boolean"
|
|
65
|
+
},
|
|
66
|
+
"max_active_tis_per_dag": {
|
|
67
|
+
"type": "integer"
|
|
68
|
+
},
|
|
69
|
+
"max_active_tis_per_dagrun": {
|
|
70
|
+
"type": "integer"
|
|
71
|
+
},
|
|
72
|
+
"max_retry_delay": {
|
|
73
|
+
"type": "string"
|
|
74
|
+
},
|
|
75
|
+
"multiple_outputs": {
|
|
76
|
+
"type": "boolean"
|
|
77
|
+
},
|
|
78
|
+
"operator_class": {
|
|
79
|
+
"description": "Module + class name of the operator",
|
|
80
|
+
"type": "string"
|
|
81
|
+
},
|
|
82
|
+
"owner": {
|
|
83
|
+
"type": "string"
|
|
84
|
+
},
|
|
85
|
+
"priority_weight": {
|
|
86
|
+
"type": "integer"
|
|
87
|
+
},
|
|
88
|
+
"queue": {
|
|
89
|
+
"type": "string"
|
|
90
|
+
},
|
|
91
|
+
"retries": {
|
|
92
|
+
"type": "integer"
|
|
93
|
+
},
|
|
94
|
+
"retry_exponential_backoff": {
|
|
95
|
+
"type": "boolean"
|
|
96
|
+
},
|
|
97
|
+
"run_as_user": {
|
|
98
|
+
"type": "string"
|
|
99
|
+
},
|
|
100
|
+
"sla": {
|
|
101
|
+
"type": "number"
|
|
102
|
+
},
|
|
103
|
+
"task_id": {
|
|
104
|
+
"type": "string"
|
|
105
|
+
},
|
|
106
|
+
"trigger_rule": {
|
|
107
|
+
"type": "string"
|
|
108
|
+
},
|
|
109
|
+
"upstream_task_ids": {
|
|
110
|
+
"type": "string"
|
|
111
|
+
},
|
|
112
|
+
"wait_for_downstream": {
|
|
113
|
+
"type": "boolean"
|
|
114
|
+
},
|
|
115
|
+
"wait_for_past_depends_before_skipping": {
|
|
116
|
+
"type": "boolean"
|
|
117
|
+
},
|
|
118
|
+
"weight_rule": {
|
|
119
|
+
"type": "string"
|
|
120
|
+
},
|
|
121
|
+
"task_group": {
|
|
122
|
+
"description": "Task group related information",
|
|
123
|
+
"type": "object",
|
|
124
|
+
"properties": {
|
|
125
|
+
"group_id": {
|
|
126
|
+
"type": "string"
|
|
127
|
+
},
|
|
128
|
+
"downstream_group_ids": {
|
|
129
|
+
"type": "string"
|
|
130
|
+
},
|
|
131
|
+
"downstream_task_ids": {
|
|
132
|
+
"type": "string"
|
|
133
|
+
},
|
|
134
|
+
"prefix_group_id": {
|
|
135
|
+
"type": "boolean"
|
|
136
|
+
},
|
|
137
|
+
"tooltip": {
|
|
138
|
+
"type": "string"
|
|
139
|
+
},
|
|
140
|
+
"upstream_group_ids": {
|
|
141
|
+
"type": "string"
|
|
142
|
+
},
|
|
143
|
+
"upstream_task_ids": {
|
|
144
|
+
"type": "string"
|
|
145
|
+
}
|
|
146
|
+
},
|
|
147
|
+
"additionalProperties": true,
|
|
148
|
+
"required": ["group_id"]
|
|
149
|
+
}
|
|
150
|
+
},
|
|
151
|
+
"additionalProperties": true,
|
|
152
|
+
"required": [
|
|
153
|
+
"task_id"
|
|
154
|
+
]
|
|
155
|
+
},
|
|
156
|
+
"DAG": {
|
|
157
|
+
"type": "object",
|
|
158
|
+
"properties": {
|
|
159
|
+
"dag_id": {
|
|
160
|
+
"type": "string"
|
|
161
|
+
},
|
|
162
|
+
"description": {
|
|
163
|
+
"type": "string"
|
|
164
|
+
},
|
|
165
|
+
"owner": {
|
|
166
|
+
"type": "string"
|
|
167
|
+
},
|
|
168
|
+
"schedule_interval": {
|
|
169
|
+
"type": "string"
|
|
170
|
+
},
|
|
171
|
+
"start_date": {
|
|
172
|
+
"type": "string",
|
|
173
|
+
"format": "date-time"
|
|
174
|
+
},
|
|
175
|
+
"tags": {
|
|
176
|
+
"type": "string"
|
|
177
|
+
},
|
|
178
|
+
"timetable": {
|
|
179
|
+
"description": "Describes timetable (successor of schedule_interval)",
|
|
180
|
+
"type": "object",
|
|
181
|
+
"additionalProperties": true
|
|
182
|
+
}
|
|
183
|
+
},
|
|
184
|
+
"additionalProperties": true,
|
|
185
|
+
"required": [
|
|
186
|
+
"dag_id",
|
|
187
|
+
"start_date"
|
|
188
|
+
]
|
|
189
|
+
},
|
|
190
|
+
"TaskInstance": {
|
|
191
|
+
"type": "object",
|
|
192
|
+
"properties": {
|
|
193
|
+
"duration": {
|
|
194
|
+
"type": "number"
|
|
195
|
+
},
|
|
196
|
+
"map_index": {
|
|
197
|
+
"type": "integer"
|
|
198
|
+
},
|
|
199
|
+
"pool": {
|
|
200
|
+
"type": "string"
|
|
201
|
+
},
|
|
202
|
+
"try_number": {
|
|
203
|
+
"type": "integer"
|
|
204
|
+
},
|
|
205
|
+
"queued_dttm": {
|
|
206
|
+
"type": "string",
|
|
207
|
+
"format": "date-time"
|
|
208
|
+
}
|
|
209
|
+
},
|
|
210
|
+
"additionalProperties": true,
|
|
211
|
+
"required": [
|
|
212
|
+
"pool",
|
|
213
|
+
"try_number"
|
|
214
|
+
]
|
|
215
|
+
},
|
|
216
|
+
"DagRun": {
|
|
217
|
+
"type": "object",
|
|
218
|
+
"properties": {
|
|
219
|
+
"conf": {
|
|
220
|
+
"type": "object",
|
|
221
|
+
"additionalProperties": true
|
|
222
|
+
},
|
|
223
|
+
"dag_id": {
|
|
224
|
+
"type": "string"
|
|
225
|
+
},
|
|
226
|
+
"data_interval_start": {
|
|
227
|
+
"type": "string",
|
|
228
|
+
"format": "date-time"
|
|
229
|
+
},
|
|
230
|
+
"data_interval_end": {
|
|
231
|
+
"type": "string",
|
|
232
|
+
"format": "date-time"
|
|
233
|
+
},
|
|
234
|
+
"external_trigger": {
|
|
235
|
+
"type": "boolean"
|
|
236
|
+
},
|
|
237
|
+
"run_id": {
|
|
238
|
+
"type": "string"
|
|
239
|
+
},
|
|
240
|
+
"run_type": {
|
|
241
|
+
"type": "string"
|
|
242
|
+
},
|
|
243
|
+
"start_date": {
|
|
244
|
+
"type": "string",
|
|
245
|
+
"format": "date-time"
|
|
246
|
+
}
|
|
247
|
+
},
|
|
248
|
+
"additionalProperties": true,
|
|
249
|
+
"required": [
|
|
250
|
+
"dag_id",
|
|
251
|
+
"run_id"
|
|
252
|
+
]
|
|
253
|
+
}
|
|
254
|
+
},
|
|
255
|
+
"type": "object",
|
|
256
|
+
"properties": {
|
|
257
|
+
"airflow": {
|
|
258
|
+
"$ref": "#/$defs/AirflowRunFacet"
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$defs": {
|
|
4
|
+
"AirflowStateRunFacet": {
|
|
5
|
+
"allOf": [
|
|
6
|
+
{
|
|
7
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"dagRunState": {
|
|
13
|
+
"description": "The final status of the entire DagRun",
|
|
14
|
+
"type": "string"
|
|
15
|
+
},
|
|
16
|
+
"tasksState": {
|
|
17
|
+
"description": "Mapping of task IDs to their respective states",
|
|
18
|
+
"type": "object",
|
|
19
|
+
"additionalProperties": true
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
"required": ["dagRunState", "tasksState"]
|
|
23
|
+
}
|
|
24
|
+
],
|
|
25
|
+
"type": "object"
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"type": "object",
|
|
29
|
+
"properties": {
|
|
30
|
+
"airflowState": {
|
|
31
|
+
"$ref": "#/$defs/AirflowStateRunFacet"
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
@@ -28,8 +28,9 @@ def get_provider_info():
|
|
|
28
28
|
"name": "OpenLineage Airflow",
|
|
29
29
|
"description": "`OpenLineage <https://openlineage.io/>`__\n",
|
|
30
30
|
"state": "ready",
|
|
31
|
-
"source-date-epoch":
|
|
31
|
+
"source-date-epoch": 1718605195,
|
|
32
32
|
"versions": [
|
|
33
|
+
"1.9.0",
|
|
33
34
|
"1.8.0",
|
|
34
35
|
"1.7.1",
|
|
35
36
|
"1.7.0",
|
|
@@ -50,8 +51,8 @@ def get_provider_info():
|
|
|
50
51
|
"apache-airflow>=2.7.0",
|
|
51
52
|
"apache-airflow-providers-common-sql>=1.6.0",
|
|
52
53
|
"attrs>=22.2",
|
|
53
|
-
"openlineage-integration-common>=
|
|
54
|
-
"openlineage-python>=
|
|
54
|
+
"openlineage-integration-common>=1.16.0",
|
|
55
|
+
"openlineage-python>=1.16.0",
|
|
55
56
|
],
|
|
56
57
|
"integrations": [
|
|
57
58
|
{
|
|
@@ -134,6 +135,13 @@ def get_provider_info():
|
|
|
134
135
|
"type": "integer",
|
|
135
136
|
"version_added": "1.8.0",
|
|
136
137
|
},
|
|
138
|
+
"execution_timeout": {
|
|
139
|
+
"description": "Maximum amount of time (in seconds) that OpenLineage can spend executing metadata extraction.\n",
|
|
140
|
+
"default": "10",
|
|
141
|
+
"example": None,
|
|
142
|
+
"type": "integer",
|
|
143
|
+
"version_added": "1.9.0",
|
|
144
|
+
},
|
|
137
145
|
},
|
|
138
146
|
}
|
|
139
147
|
},
|
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
19
|
import traceback
|
|
20
|
-
import uuid
|
|
21
20
|
from contextlib import ExitStack
|
|
22
21
|
from typing import TYPE_CHECKING
|
|
23
22
|
|
|
@@ -36,13 +35,19 @@ from openlineage.client.facet import (
|
|
|
36
35
|
SourceCodeLocationJobFacet,
|
|
37
36
|
)
|
|
38
37
|
from openlineage.client.run import Job, Run, RunEvent, RunState
|
|
38
|
+
from openlineage.client.uuid import generate_static_uuid
|
|
39
39
|
|
|
40
40
|
from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VERSION, conf
|
|
41
|
-
from airflow.providers.openlineage.utils.utils import
|
|
41
|
+
from airflow.providers.openlineage.utils.utils import (
|
|
42
|
+
OpenLineageRedactor,
|
|
43
|
+
get_airflow_state_run_facet,
|
|
44
|
+
)
|
|
42
45
|
from airflow.stats import Stats
|
|
43
46
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
44
47
|
|
|
45
48
|
if TYPE_CHECKING:
|
|
49
|
+
from datetime import datetime
|
|
50
|
+
|
|
46
51
|
from airflow.models.dagrun import DagRun
|
|
47
52
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
48
53
|
from airflow.utils.log.secrets_masker import SecretsMasker
|
|
@@ -111,20 +116,31 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
111
116
|
return yaml.safe_load(config_file)
|
|
112
117
|
|
|
113
118
|
@staticmethod
|
|
114
|
-
def build_dag_run_id(dag_id,
|
|
115
|
-
return str(
|
|
119
|
+
def build_dag_run_id(dag_id: str, execution_date: datetime) -> str:
|
|
120
|
+
return str(
|
|
121
|
+
generate_static_uuid(
|
|
122
|
+
instant=execution_date,
|
|
123
|
+
data=f"{conf.namespace()}.{dag_id}".encode(),
|
|
124
|
+
)
|
|
125
|
+
)
|
|
116
126
|
|
|
117
127
|
@staticmethod
|
|
118
|
-
def build_task_instance_run_id(
|
|
128
|
+
def build_task_instance_run_id(
|
|
129
|
+
dag_id: str,
|
|
130
|
+
task_id: str,
|
|
131
|
+
try_number: int,
|
|
132
|
+
execution_date: datetime,
|
|
133
|
+
):
|
|
119
134
|
return str(
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
f"{conf.namespace()}.{dag_id}.{task_id}.{
|
|
135
|
+
generate_static_uuid(
|
|
136
|
+
instant=execution_date,
|
|
137
|
+
data=f"{conf.namespace()}.{dag_id}.{task_id}.{try_number}".encode(),
|
|
123
138
|
)
|
|
124
139
|
)
|
|
125
140
|
|
|
126
141
|
def emit(self, event: RunEvent):
|
|
127
|
-
"""
|
|
142
|
+
"""
|
|
143
|
+
Emit OpenLineage event.
|
|
128
144
|
|
|
129
145
|
:param event: Event to be emitted.
|
|
130
146
|
:return: Redacted Event.
|
|
@@ -264,6 +280,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
264
280
|
parent_run_id: str | None,
|
|
265
281
|
end_time: str,
|
|
266
282
|
task: OperatorLineage,
|
|
283
|
+
error: str | BaseException | None = None,
|
|
267
284
|
) -> RunEvent:
|
|
268
285
|
"""
|
|
269
286
|
Emit openlineage event of type FAIL.
|
|
@@ -275,7 +292,22 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
275
292
|
:param parent_run_id: identifier of job spawning this task
|
|
276
293
|
:param end_time: time of task completion
|
|
277
294
|
:param task: metadata container with information extracted from operator
|
|
295
|
+
:param error: error
|
|
278
296
|
"""
|
|
297
|
+
error_facet = {}
|
|
298
|
+
if error:
|
|
299
|
+
stack_trace = None
|
|
300
|
+
if isinstance(error, BaseException) and error.__traceback__:
|
|
301
|
+
import traceback
|
|
302
|
+
|
|
303
|
+
stack_trace = "\\n".join(traceback.format_exception(type(error), error, error.__traceback__))
|
|
304
|
+
|
|
305
|
+
error_facet = {
|
|
306
|
+
"errorMessage": ErrorMessageRunFacet(
|
|
307
|
+
message=str(error), programmingLanguage="python", stackTrace=stack_trace
|
|
308
|
+
)
|
|
309
|
+
}
|
|
310
|
+
|
|
279
311
|
event = RunEvent(
|
|
280
312
|
eventType=RunState.FAIL,
|
|
281
313
|
eventTime=end_time,
|
|
@@ -284,7 +316,7 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
284
316
|
job_name=job_name,
|
|
285
317
|
parent_job_name=parent_job_name,
|
|
286
318
|
parent_run_id=parent_run_id,
|
|
287
|
-
run_facets=task.run_facets,
|
|
319
|
+
run_facets={**task.run_facets, **error_facet},
|
|
288
320
|
),
|
|
289
321
|
job=self._build_job(job_name, job_type=_JOB_TYPE_TASK, job_facets=task.job_facets),
|
|
290
322
|
inputs=task.inputs,
|
|
@@ -299,14 +331,24 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
299
331
|
msg: str,
|
|
300
332
|
nominal_start_time: str,
|
|
301
333
|
nominal_end_time: str,
|
|
334
|
+
job_facets: dict[str, BaseFacet] | None = None, # Custom job facets
|
|
302
335
|
):
|
|
303
336
|
try:
|
|
304
337
|
event = RunEvent(
|
|
305
338
|
eventType=RunState.START,
|
|
306
339
|
eventTime=dag_run.start_date.isoformat(),
|
|
307
|
-
job=self._build_job(
|
|
340
|
+
job=self._build_job(
|
|
341
|
+
job_name=dag_run.dag_id,
|
|
342
|
+
job_type=_JOB_TYPE_DAG,
|
|
343
|
+
job_description=dag_run.dag.description if dag_run.dag else None,
|
|
344
|
+
owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None,
|
|
345
|
+
job_facets=job_facets,
|
|
346
|
+
),
|
|
308
347
|
run=self._build_run(
|
|
309
|
-
run_id=self.build_dag_run_id(
|
|
348
|
+
run_id=self.build_dag_run_id(
|
|
349
|
+
dag_id=dag_run.dag_id,
|
|
350
|
+
execution_date=dag_run.execution_date,
|
|
351
|
+
),
|
|
310
352
|
job_name=dag_run.dag_id,
|
|
311
353
|
nominal_start_time=nominal_start_time,
|
|
312
354
|
nominal_end_time=nominal_end_time,
|
|
@@ -328,7 +370,13 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
328
370
|
eventType=RunState.COMPLETE,
|
|
329
371
|
eventTime=dag_run.end_date.isoformat(),
|
|
330
372
|
job=self._build_job(job_name=dag_run.dag_id, job_type=_JOB_TYPE_DAG),
|
|
331
|
-
run=Run(
|
|
373
|
+
run=Run(
|
|
374
|
+
runId=self.build_dag_run_id(
|
|
375
|
+
dag_id=dag_run.dag_id,
|
|
376
|
+
execution_date=dag_run.execution_date,
|
|
377
|
+
),
|
|
378
|
+
facets={**get_airflow_state_run_facet(dag_run)},
|
|
379
|
+
),
|
|
332
380
|
inputs=[],
|
|
333
381
|
outputs=[],
|
|
334
382
|
producer=_PRODUCER,
|
|
@@ -347,8 +395,14 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
347
395
|
eventTime=dag_run.end_date.isoformat(),
|
|
348
396
|
job=self._build_job(job_name=dag_run.dag_id, job_type=_JOB_TYPE_DAG),
|
|
349
397
|
run=Run(
|
|
350
|
-
runId=self.build_dag_run_id(
|
|
351
|
-
|
|
398
|
+
runId=self.build_dag_run_id(
|
|
399
|
+
dag_id=dag_run.dag_id,
|
|
400
|
+
execution_date=dag_run.execution_date,
|
|
401
|
+
),
|
|
402
|
+
facets={
|
|
403
|
+
"errorMessage": ErrorMessageRunFacet(message=msg, programmingLanguage="python"),
|
|
404
|
+
**get_airflow_state_run_facet(dag_run),
|
|
405
|
+
},
|
|
352
406
|
),
|
|
353
407
|
inputs=[],
|
|
354
408
|
outputs=[],
|