apache-airflow-providers-openlineage 2.7.2__py3-none-any.whl → 2.7.3rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.
- airflow/providers/openlineage/__init__.py +1 -1
- airflow/providers/openlineage/plugins/openlineage.py +14 -12
- airflow/providers/openlineage/sqlparser.py +23 -11
- airflow/providers/openlineage/utils/utils.py +10 -6
- {apache_airflow_providers_openlineage-2.7.2.dist-info → apache_airflow_providers_openlineage-2.7.3rc1.dist-info}/METADATA +14 -14
- {apache_airflow_providers_openlineage-2.7.2.dist-info → apache_airflow_providers_openlineage-2.7.3rc1.dist-info}/RECORD +8 -8
- {apache_airflow_providers_openlineage-2.7.2.dist-info → apache_airflow_providers_openlineage-2.7.3rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_openlineage-2.7.2.dist-info → apache_airflow_providers_openlineage-2.7.3rc1.dist-info}/entry_points.txt +0 -0
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "2.7.
|
|
32
|
+
__version__ = "2.7.3"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.10.0"
|
|
@@ -18,16 +18,20 @@ from __future__ import annotations
|
|
|
18
18
|
|
|
19
19
|
from airflow.plugins_manager import AirflowPlugin
|
|
20
20
|
from airflow.providers.openlineage import conf
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
21
|
+
|
|
22
|
+
# Conditional imports - only load expensive dependencies when plugin is enabled
|
|
23
|
+
if not conf.is_disabled():
|
|
24
|
+
from airflow.lineage.hook import HookLineageReader
|
|
25
|
+
from airflow.providers.openlineage.plugins.listener import get_openlineage_listener
|
|
26
|
+
from airflow.providers.openlineage.plugins.macros import (
|
|
27
|
+
lineage_job_name,
|
|
28
|
+
lineage_job_namespace,
|
|
29
|
+
lineage_parent_id,
|
|
30
|
+
lineage_root_job_name,
|
|
31
|
+
lineage_root_parent_id,
|
|
32
|
+
lineage_root_run_id,
|
|
33
|
+
lineage_run_id,
|
|
34
|
+
)
|
|
31
35
|
|
|
32
36
|
|
|
33
37
|
class OpenLineageProviderPlugin(AirflowPlugin):
|
|
@@ -50,8 +54,6 @@ class OpenLineageProviderPlugin(AirflowPlugin):
|
|
|
50
54
|
lineage_root_parent_id,
|
|
51
55
|
]
|
|
52
56
|
listeners = [get_openlineage_listener()]
|
|
53
|
-
from airflow.lineage.hook import HookLineageReader
|
|
54
|
-
|
|
55
57
|
hook_lineage_readers = [HookLineageReader]
|
|
56
58
|
else:
|
|
57
59
|
macros = []
|
|
@@ -232,8 +232,8 @@ class SQLParser(LoggingMixin):
|
|
|
232
232
|
else None,
|
|
233
233
|
)
|
|
234
234
|
|
|
235
|
+
@staticmethod
|
|
235
236
|
def get_metadata_from_parser(
|
|
236
|
-
self,
|
|
237
237
|
inputs: list[DbTableMeta],
|
|
238
238
|
outputs: list[DbTableMeta],
|
|
239
239
|
database_info: DatabaseInfo,
|
|
@@ -315,6 +315,7 @@ class SQLParser(LoggingMixin):
|
|
|
315
315
|
:param database_info: database specific information
|
|
316
316
|
:param database: when passed it takes precedence over parsed database name
|
|
317
317
|
:param sqlalchemy_engine: when passed, engine's dialect is used to compile SQL queries
|
|
318
|
+
:param use_connection: if call to db should be performed to enrich datasets (e.g., with schema)
|
|
318
319
|
"""
|
|
319
320
|
job_facets: dict[str, JobFacet] = {"sql": sql_job.SQLJobFacet(query=self.normalize_sql(sql))}
|
|
320
321
|
parse_result = self.parse(sql=self.split_sql_string(sql))
|
|
@@ -338,17 +339,28 @@ class SQLParser(LoggingMixin):
|
|
|
338
339
|
)
|
|
339
340
|
|
|
340
341
|
namespace = self.create_namespace(database_info=database_info)
|
|
342
|
+
inputs: list[Dataset] = []
|
|
343
|
+
outputs: list[Dataset] = []
|
|
341
344
|
if use_connection:
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
345
|
+
try:
|
|
346
|
+
inputs, outputs = self.parse_table_schemas(
|
|
347
|
+
hook=hook,
|
|
348
|
+
inputs=parse_result.in_tables,
|
|
349
|
+
outputs=parse_result.out_tables,
|
|
350
|
+
namespace=namespace,
|
|
351
|
+
database=database,
|
|
352
|
+
database_info=database_info,
|
|
353
|
+
sqlalchemy_engine=sqlalchemy_engine,
|
|
354
|
+
)
|
|
355
|
+
except Exception as e:
|
|
356
|
+
self.log.warning(
|
|
357
|
+
"OpenLineage method failed to enrich datasets using db metadata. Exception: `%s`",
|
|
358
|
+
e,
|
|
359
|
+
)
|
|
360
|
+
self.log.debug("OpenLineage failure details:", exc_info=True)
|
|
361
|
+
|
|
362
|
+
# If call to db failed or was not performed, use datasets from sql parsing alone
|
|
363
|
+
if not inputs and not outputs:
|
|
352
364
|
inputs, outputs = self.get_metadata_from_parser(
|
|
353
365
|
inputs=parse_result.in_tables,
|
|
354
366
|
outputs=parse_result.out_tables,
|
|
@@ -751,16 +751,20 @@ def get_airflow_state_run_facet(
|
|
|
751
751
|
dag_id: str, run_id: str, task_ids: list[str], dag_run_state: DagRunState
|
|
752
752
|
) -> dict[str, AirflowStateRunFacet]:
|
|
753
753
|
tis = DagRun.fetch_task_instances(dag_id=dag_id, run_id=run_id, task_ids=task_ids)
|
|
754
|
+
|
|
755
|
+
def get_task_duration(ti):
|
|
756
|
+
if ti.duration is not None:
|
|
757
|
+
return ti.duration
|
|
758
|
+
if ti.end_date is not None and ti.start_date is not None:
|
|
759
|
+
return (ti.end_date - ti.start_date).total_seconds()
|
|
760
|
+
# Fallback to 0.0 for tasks with missing timestamps (e.g., skipped/terminated tasks)
|
|
761
|
+
return 0.0
|
|
762
|
+
|
|
754
763
|
return {
|
|
755
764
|
"airflowState": AirflowStateRunFacet(
|
|
756
765
|
dagRunState=dag_run_state,
|
|
757
766
|
tasksState={ti.task_id: ti.state for ti in tis},
|
|
758
|
-
tasksDuration={
|
|
759
|
-
ti.task_id: ti.duration
|
|
760
|
-
if ti.duration is not None
|
|
761
|
-
else (ti.end_date - ti.start_date).total_seconds()
|
|
762
|
-
for ti in tis
|
|
763
|
-
},
|
|
767
|
+
tasksDuration={ti.task_id: get_task_duration(ti) for ti in tis},
|
|
764
768
|
)
|
|
765
769
|
}
|
|
766
770
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 2.7.
|
|
3
|
+
Version: 2.7.3rc1
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -20,15 +20,15 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.12
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.13
|
|
22
22
|
Classifier: Topic :: System :: Monitoring
|
|
23
|
-
Requires-Dist: apache-airflow>=2.10.
|
|
24
|
-
Requires-Dist: apache-airflow-providers-common-sql>=1.20.
|
|
25
|
-
Requires-Dist: apache-airflow-providers-common-compat>=1.
|
|
23
|
+
Requires-Dist: apache-airflow>=2.10.0rc1
|
|
24
|
+
Requires-Dist: apache-airflow-providers-common-sql>=1.20.0rc1
|
|
25
|
+
Requires-Dist: apache-airflow-providers-common-compat>=1.8.0rc1
|
|
26
26
|
Requires-Dist: attrs>=22.2
|
|
27
|
-
Requires-Dist: openlineage-integration-common>=1.
|
|
28
|
-
Requires-Dist: openlineage-python>=1.
|
|
27
|
+
Requires-Dist: openlineage-integration-common>=1.38.0
|
|
28
|
+
Requires-Dist: openlineage-python>=1.38.0
|
|
29
29
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
30
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.7.
|
|
31
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.7.
|
|
30
|
+
Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-openlineage/2.7.3/changelog.html
|
|
31
|
+
Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-openlineage/2.7.3
|
|
32
32
|
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
33
33
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
34
34
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
@@ -59,7 +59,7 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
|
59
59
|
|
|
60
60
|
Package ``apache-airflow-providers-openlineage``
|
|
61
61
|
|
|
62
|
-
Release: ``2.7.
|
|
62
|
+
Release: ``2.7.3``
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -72,7 +72,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
72
72
|
are in ``airflow.providers.openlineage`` python package.
|
|
73
73
|
|
|
74
74
|
You can find package information and changelog for the provider
|
|
75
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.7.
|
|
75
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.7.3/>`_.
|
|
76
76
|
|
|
77
77
|
Installation
|
|
78
78
|
------------
|
|
@@ -91,10 +91,10 @@ PIP package Version required
|
|
|
91
91
|
========================================== ==================
|
|
92
92
|
``apache-airflow`` ``>=2.10.0``
|
|
93
93
|
``apache-airflow-providers-common-sql`` ``>=1.20.0``
|
|
94
|
-
``apache-airflow-providers-common-compat`` ``>=1.
|
|
94
|
+
``apache-airflow-providers-common-compat`` ``>=1.8.0``
|
|
95
95
|
``attrs`` ``>=22.2``
|
|
96
|
-
``openlineage-integration-common`` ``>=1.
|
|
97
|
-
``openlineage-python`` ``>=1.
|
|
96
|
+
``openlineage-integration-common`` ``>=1.38.0``
|
|
97
|
+
``openlineage-python`` ``>=1.38.0``
|
|
98
98
|
========================================== ==================
|
|
99
99
|
|
|
100
100
|
Cross provider package dependencies
|
|
@@ -118,5 +118,5 @@ Dependent package
|
|
|
118
118
|
================================================================================================================== =================
|
|
119
119
|
|
|
120
120
|
The changelog for the provider package can be found in the
|
|
121
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.7.
|
|
121
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.7.3/changelog.html>`_.
|
|
122
122
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
airflow/providers/openlineage/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
|
|
2
|
-
airflow/providers/openlineage/__init__.py,sha256=
|
|
2
|
+
airflow/providers/openlineage/__init__.py,sha256=WVDyhuTV4ymGp3pA3G3ut6p-P3KlQPqhLVZXtt1sPco,1500
|
|
3
3
|
airflow/providers/openlineage/conf.py,sha256=9v2DpQ84BBCdRxPlh8QsboTqX8HXe-qeHVcTMRL5c3o,5807
|
|
4
4
|
airflow/providers/openlineage/get_provider_info.py,sha256=2Oy13q-jA-UYt-a9pYBk4PnImYshGnJCPD1Jj80ChNw,9453
|
|
5
|
-
airflow/providers/openlineage/sqlparser.py,sha256=
|
|
5
|
+
airflow/providers/openlineage/sqlparser.py,sha256=XtfIyBtCw61TDf0num4uD-7I264Hygsv2dTEIgylorQ,20998
|
|
6
6
|
airflow/providers/openlineage/version_compat.py,sha256=weU73JlGTWEcfDLnuFGOXw9Yiagp-bU_--nRgogt-jk,2020
|
|
7
7
|
airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
|
|
8
8
|
airflow/providers/openlineage/extractors/base.py,sha256=0K7prvOeYjs30P87zgcOmABZOZYsw0WYoFBstS_vgmY,6449
|
|
@@ -22,13 +22,13 @@ airflow/providers/openlineage/plugins/adapter.py,sha256=moIgAoBTTVaKuU-i00xvkKyR
|
|
|
22
22
|
airflow/providers/openlineage/plugins/facets.py,sha256=x2EPwst9MsoO53OpFV_aANO_rhiPq_2GLP4UOrqBnnQ,4279
|
|
23
23
|
airflow/providers/openlineage/plugins/listener.py,sha256=IQbTH5yvbR7R85Ffuvxq3oCi_5a_VDy_FVongNSMCfI,32329
|
|
24
24
|
airflow/providers/openlineage/plugins/macros.py,sha256=RfxkpNq78CHzfTAf9X7MQ_zRArMRu9sSD2j69fPSK7s,5265
|
|
25
|
-
airflow/providers/openlineage/plugins/openlineage.py,sha256=
|
|
25
|
+
airflow/providers/openlineage/plugins/openlineage.py,sha256=Czx9h8qDmfClhOxRuFOm7F1f9qG3jMXIvDzjXesaLTE,2175
|
|
26
26
|
airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
27
27
|
airflow/providers/openlineage/utils/selective_enable.py,sha256=XpozjxcWBASAQXSR2N0GkA-QZVq6EmerOzyR4-eQ64M,3521
|
|
28
28
|
airflow/providers/openlineage/utils/spark.py,sha256=X5liLxVLgQcgPF_0lFtQULeMOv_9dGj-HFjtZvWFgOo,7626
|
|
29
29
|
airflow/providers/openlineage/utils/sql.py,sha256=b_k2fUyGGWzR1eau7tgq7vKQJsR7wPQzDF8M-WRq6jk,9548
|
|
30
|
-
airflow/providers/openlineage/utils/utils.py,sha256=
|
|
31
|
-
apache_airflow_providers_openlineage-2.7.
|
|
32
|
-
apache_airflow_providers_openlineage-2.7.
|
|
33
|
-
apache_airflow_providers_openlineage-2.7.
|
|
34
|
-
apache_airflow_providers_openlineage-2.7.
|
|
30
|
+
airflow/providers/openlineage/utils/utils.py,sha256=VfGKEzeRxKe06-ASuA1MmSNSNw8RxaANeJpYdAw4cC0,37473
|
|
31
|
+
apache_airflow_providers_openlineage-2.7.3rc1.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
|
|
32
|
+
apache_airflow_providers_openlineage-2.7.3rc1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
33
|
+
apache_airflow_providers_openlineage-2.7.3rc1.dist-info/METADATA,sha256=sJ32EOS-It9sWfTC8Mfq8PnZs7GXL4LEsu60u85kcS0,5714
|
|
34
|
+
apache_airflow_providers_openlineage-2.7.3rc1.dist-info/RECORD,,
|
|
File without changes
|