apache-airflow-providers-openlineage 1.9.0rc1__tar.gz → 1.9.0rc2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/PKG-INFO +2 -2
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/README.rst +1 -1
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/extractors/base.py +6 -3
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/plugins/adapter.py +11 -4
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/plugins/facets.py +2 -1
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/plugins/listener.py +2 -2
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/sqlparser.py +4 -2
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/utils/selective_enable.py +6 -3
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/utils/sql.py +4 -2
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/utils/utils.py +12 -1
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/pyproject.toml +1 -1
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/LICENSE +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/conf.py +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/extractors/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/extractors/bash.py +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/extractors/manager.py +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/extractors/python.py +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/facets/AirflowJobFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/facets/AirflowRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/facets/AirflowStateRunFacet.json +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/facets/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/get_provider_info.py +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/plugins/__init__.py +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/plugins/macros.py +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/plugins/openlineage.py +0 -0
- {apache_airflow_providers_openlineage-1.9.0rc1 → apache_airflow_providers_openlineage-1.9.0rc2}/airflow/providers/openlineage/utils/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 1.9.
|
|
3
|
+
Version: 1.9.0rc2
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -80,7 +80,7 @@ Provides-Extra: common.sql
|
|
|
80
80
|
|
|
81
81
|
Package ``apache-airflow-providers-openlineage``
|
|
82
82
|
|
|
83
|
-
Release: ``1.9.0.
|
|
83
|
+
Release: ``1.9.0.rc2``
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -41,7 +41,8 @@ class OperatorLineage:
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
class BaseExtractor(ABC, LoggingMixin):
|
|
44
|
-
"""
|
|
44
|
+
"""
|
|
45
|
+
Abstract base extractor class.
|
|
45
46
|
|
|
46
47
|
This is used mostly to maintain support for custom extractors.
|
|
47
48
|
"""
|
|
@@ -55,7 +56,8 @@ class BaseExtractor(ABC, LoggingMixin):
|
|
|
55
56
|
@classmethod
|
|
56
57
|
@abstractmethod
|
|
57
58
|
def get_operator_classnames(cls) -> list[str]:
|
|
58
|
-
"""
|
|
59
|
+
"""
|
|
60
|
+
Get a list of operators that extractor works for.
|
|
59
61
|
|
|
60
62
|
This is an abstract method that subclasses should implement. There are
|
|
61
63
|
operators that work very similarly and one extractor can cover.
|
|
@@ -77,7 +79,8 @@ class DefaultExtractor(BaseExtractor):
|
|
|
77
79
|
|
|
78
80
|
@classmethod
|
|
79
81
|
def get_operator_classnames(cls) -> list[str]:
|
|
80
|
-
"""
|
|
82
|
+
"""
|
|
83
|
+
Assign this extractor to *no* operators.
|
|
81
84
|
|
|
82
85
|
Default extractor is chosen not on the classname basis, but
|
|
83
86
|
by existence of get_openlineage_facets method on operator.
|
|
@@ -139,7 +139,8 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
139
139
|
)
|
|
140
140
|
|
|
141
141
|
def emit(self, event: RunEvent):
|
|
142
|
-
"""
|
|
142
|
+
"""
|
|
143
|
+
Emit OpenLineage event.
|
|
143
144
|
|
|
144
145
|
:param event: Event to be emitted.
|
|
145
146
|
:return: Redacted Event.
|
|
@@ -295,11 +296,17 @@ class OpenLineageAdapter(LoggingMixin):
|
|
|
295
296
|
"""
|
|
296
297
|
error_facet = {}
|
|
297
298
|
if error:
|
|
298
|
-
|
|
299
|
+
stack_trace = None
|
|
300
|
+
if isinstance(error, BaseException) and error.__traceback__:
|
|
299
301
|
import traceback
|
|
300
302
|
|
|
301
|
-
|
|
302
|
-
|
|
303
|
+
stack_trace = "\\n".join(traceback.format_exception(type(error), error, error.__traceback__))
|
|
304
|
+
|
|
305
|
+
error_facet = {
|
|
306
|
+
"errorMessage": ErrorMessageRunFacet(
|
|
307
|
+
message=str(error), programmingLanguage="python", stackTrace=stack_trace
|
|
308
|
+
)
|
|
309
|
+
}
|
|
303
310
|
|
|
304
311
|
event = RunEvent(
|
|
305
312
|
eventType=RunState.FAIL,
|
|
@@ -102,7 +102,8 @@ class AirflowRunFacet(BaseFacet):
|
|
|
102
102
|
|
|
103
103
|
@define(slots=False)
|
|
104
104
|
class UnknownOperatorInstance(RedactMixin):
|
|
105
|
-
"""
|
|
105
|
+
"""
|
|
106
|
+
Describes an unknown operator.
|
|
106
107
|
|
|
107
108
|
This specifies the (class) name of the operator and its properties.
|
|
108
109
|
"""
|
|
@@ -367,7 +367,7 @@ class OpenLineageListener:
|
|
|
367
367
|
except BaseException:
|
|
368
368
|
# Kill the process directly.
|
|
369
369
|
self._terminate_with_wait(process)
|
|
370
|
-
self.log.
|
|
370
|
+
self.log.debug("Process with pid %s finished - parent", pid)
|
|
371
371
|
else:
|
|
372
372
|
setproctitle(getproctitle() + " - OpenLineage - " + callable_name)
|
|
373
373
|
configure_orm(disable_connection_pool=True)
|
|
@@ -381,7 +381,7 @@ class OpenLineageListener:
|
|
|
381
381
|
if not self._executor:
|
|
382
382
|
self._executor = ProcessPoolExecutor(
|
|
383
383
|
max_workers=conf.dag_state_change_process_pool_size(),
|
|
384
|
-
initializer=_executor_initializer
|
|
384
|
+
initializer=_executor_initializer,
|
|
385
385
|
)
|
|
386
386
|
return self._executor
|
|
387
387
|
|
|
@@ -118,7 +118,8 @@ def from_table_meta(
|
|
|
118
118
|
|
|
119
119
|
|
|
120
120
|
class SQLParser(LoggingMixin):
|
|
121
|
-
"""
|
|
121
|
+
"""
|
|
122
|
+
Interface for openlineage-sql.
|
|
122
123
|
|
|
123
124
|
:param dialect: dialect specific to the database
|
|
124
125
|
:param default_schema: schema applied to each table with no schema parsed
|
|
@@ -244,7 +245,8 @@ class SQLParser(LoggingMixin):
|
|
|
244
245
|
sqlalchemy_engine: Engine | None = None,
|
|
245
246
|
use_connection: bool = True,
|
|
246
247
|
) -> OperatorLineage:
|
|
247
|
-
"""
|
|
248
|
+
"""
|
|
249
|
+
Parse SQL statement(s) and generate OpenLineage metadata.
|
|
248
250
|
|
|
249
251
|
Generated OpenLineage metadata contains:
|
|
250
252
|
|
|
@@ -32,7 +32,8 @@ log = logging.getLogger(__name__)
|
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
def enable_lineage(obj: T) -> T:
|
|
35
|
-
"""
|
|
35
|
+
"""
|
|
36
|
+
Set selective enable OpenLineage parameter to True.
|
|
36
37
|
|
|
37
38
|
The method also propagates param to tasks if the object is DAG.
|
|
38
39
|
"""
|
|
@@ -48,7 +49,8 @@ def enable_lineage(obj: T) -> T:
|
|
|
48
49
|
|
|
49
50
|
|
|
50
51
|
def disable_lineage(obj: T) -> T:
|
|
51
|
-
"""
|
|
52
|
+
"""
|
|
53
|
+
Set selective enable OpenLineage parameter to False.
|
|
52
54
|
|
|
53
55
|
The method also propagates param to tasks if the object is DAG.
|
|
54
56
|
"""
|
|
@@ -73,7 +75,8 @@ def is_task_lineage_enabled(task: Operator) -> bool:
|
|
|
73
75
|
|
|
74
76
|
|
|
75
77
|
def is_dag_lineage_enabled(dag: DAG) -> bool:
|
|
76
|
-
"""
|
|
78
|
+
"""
|
|
79
|
+
Check if DAG is selectively enabled to emit OpenLineage events.
|
|
77
80
|
|
|
78
81
|
The method also checks if selective enable parameter is set to True
|
|
79
82
|
or if any of the tasks in DAG is selectively enabled.
|
|
@@ -85,7 +85,8 @@ def get_table_schemas(
|
|
|
85
85
|
in_query: str | None,
|
|
86
86
|
out_query: str | None,
|
|
87
87
|
) -> tuple[list[Dataset], list[Dataset]]:
|
|
88
|
-
"""
|
|
88
|
+
"""
|
|
89
|
+
Query database for table schemas.
|
|
89
90
|
|
|
90
91
|
Uses provided hook. Responsibility to provide queries for this function is on particular extractors.
|
|
91
92
|
If query for input or output table isn't provided, the query is skipped.
|
|
@@ -111,7 +112,8 @@ def get_table_schemas(
|
|
|
111
112
|
|
|
112
113
|
|
|
113
114
|
def parse_query_result(cursor) -> list[TableSchema]:
|
|
114
|
-
"""
|
|
115
|
+
"""
|
|
116
|
+
Fetch results from DB-API 2.0 cursor and creates list of table schemas.
|
|
115
117
|
|
|
116
118
|
For each row it creates :class:`TableSchema`.
|
|
117
119
|
"""
|
|
@@ -150,7 +150,7 @@ class InfoJsonEncodable(dict):
|
|
|
150
150
|
return value.isoformat()
|
|
151
151
|
if isinstance(value, datetime.timedelta):
|
|
152
152
|
return f"{value.total_seconds()} seconds"
|
|
153
|
-
if isinstance(value, (set,
|
|
153
|
+
if isinstance(value, (set, tuple)):
|
|
154
154
|
return str(list(value))
|
|
155
155
|
return value
|
|
156
156
|
|
|
@@ -214,6 +214,12 @@ class TaskInstanceInfo(InfoJsonEncodable):
|
|
|
214
214
|
}
|
|
215
215
|
|
|
216
216
|
|
|
217
|
+
class DatasetInfo(InfoJsonEncodable):
|
|
218
|
+
"""Defines encoding Airflow Dataset object to JSON."""
|
|
219
|
+
|
|
220
|
+
includes = ["uri", "extra"]
|
|
221
|
+
|
|
222
|
+
|
|
217
223
|
class TaskInfo(InfoJsonEncodable):
|
|
218
224
|
"""Defines encoding BaseOperator/AbstractOperator object to JSON."""
|
|
219
225
|
|
|
@@ -242,6 +248,9 @@ class TaskInfo(InfoJsonEncodable):
|
|
|
242
248
|
"run_as_user",
|
|
243
249
|
"sla",
|
|
244
250
|
"task_id",
|
|
251
|
+
"trigger_dag_id",
|
|
252
|
+
"external_dag_id",
|
|
253
|
+
"external_task_id",
|
|
245
254
|
"trigger_rule",
|
|
246
255
|
"upstream_task_ids",
|
|
247
256
|
"wait_for_downstream",
|
|
@@ -255,6 +264,8 @@ class TaskInfo(InfoJsonEncodable):
|
|
|
255
264
|
if hasattr(task, "task_group") and getattr(task.task_group, "_group_id", None)
|
|
256
265
|
else None
|
|
257
266
|
),
|
|
267
|
+
"inlets": lambda task: [DatasetInfo(inlet) for inlet in task.inlets],
|
|
268
|
+
"outlets": lambda task: [DatasetInfo(outlet) for outlet in task.outlets],
|
|
258
269
|
}
|
|
259
270
|
|
|
260
271
|
|
|
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
|
|
|
28
28
|
|
|
29
29
|
[project]
|
|
30
30
|
name = "apache-airflow-providers-openlineage"
|
|
31
|
-
version = "1.9.0.
|
|
31
|
+
version = "1.9.0.rc2"
|
|
32
32
|
description = "Provider package apache-airflow-providers-openlineage for Apache Airflow"
|
|
33
33
|
readme = "README.rst"
|
|
34
34
|
authors = [
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|