apache-airflow-providers-openlineage 2.1.2b1__py3-none-any.whl → 2.1.2rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.
- airflow/providers/openlineage/__init__.py +1 -1
- airflow/providers/openlineage/get_provider_info.py +1 -1
- airflow/providers/openlineage/plugins/listener.py +4 -3
- airflow/providers/openlineage/plugins/macros.py +15 -4
- airflow/providers/openlineage/utils/spark.py +64 -26
- {apache_airflow_providers_openlineage-2.1.2b1.dist-info → apache_airflow_providers_openlineage-2.1.2rc1.dist-info}/METADATA +10 -10
- {apache_airflow_providers_openlineage-2.1.2b1.dist-info → apache_airflow_providers_openlineage-2.1.2rc1.dist-info}/RECORD +9 -9
- {apache_airflow_providers_openlineage-2.1.2b1.dist-info → apache_airflow_providers_openlineage-2.1.2rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_openlineage-2.1.2b1.dist-info → apache_airflow_providers_openlineage-2.1.2rc1.dist-info}/entry_points.txt +0 -0
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "2.1.
|
|
32
|
+
__version__ = "2.1.2"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.9.0"
|
|
@@ -69,15 +69,16 @@ def _get_try_number_success(val):
|
|
|
69
69
|
|
|
70
70
|
def _executor_initializer():
|
|
71
71
|
"""
|
|
72
|
-
Initialize
|
|
72
|
+
Initialize processes for the executor used with DAGRun listener's methods (on scheduler).
|
|
73
73
|
|
|
74
74
|
This function must be picklable, so it cannot be defined as an inner method or local function.
|
|
75
75
|
|
|
76
76
|
Reconfigures the ORM engine to prevent issues that arise when multiple processes interact with
|
|
77
77
|
the Airflow database.
|
|
78
78
|
"""
|
|
79
|
-
|
|
80
|
-
|
|
79
|
+
# This initializer is used only on the scheduler
|
|
80
|
+
# We can configure_orm regardless of the Airflow version, as DB access is always allowed from scheduler.
|
|
81
|
+
settings.configure_orm()
|
|
81
82
|
|
|
82
83
|
|
|
83
84
|
class OpenLineageListener:
|
|
@@ -21,6 +21,7 @@ from typing import TYPE_CHECKING
|
|
|
21
21
|
from airflow.providers.openlineage import conf
|
|
22
22
|
from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter
|
|
23
23
|
from airflow.providers.openlineage.utils.utils import get_job_name
|
|
24
|
+
from airflow.providers.openlineage.version_compat import AIRFLOW_V_3_0_PLUS
|
|
24
25
|
|
|
25
26
|
if TYPE_CHECKING:
|
|
26
27
|
from airflow.models import TaskInstance
|
|
@@ -58,15 +59,25 @@ def lineage_run_id(task_instance: TaskInstance):
|
|
|
58
59
|
For more information take a look at the guide:
|
|
59
60
|
:ref:`howto/macros:openlineage`
|
|
60
61
|
"""
|
|
61
|
-
if
|
|
62
|
-
|
|
62
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
63
|
+
context = task_instance.get_template_context()
|
|
64
|
+
if hasattr(task_instance, "dag_run"):
|
|
65
|
+
dag_run = task_instance.dag_run
|
|
66
|
+
elif hasattr(context, "dag_run"):
|
|
67
|
+
dag_run = context["dag_run"]
|
|
68
|
+
if hasattr(dag_run, "logical_date") and dag_run.logical_date:
|
|
69
|
+
date = dag_run.logical_date
|
|
70
|
+
else:
|
|
71
|
+
date = dag_run.run_after
|
|
72
|
+
elif hasattr(task_instance, "logical_date"):
|
|
73
|
+
date = task_instance.logical_date
|
|
63
74
|
else:
|
|
64
|
-
|
|
75
|
+
date = task_instance.execution_date
|
|
65
76
|
return OpenLineageAdapter.build_task_instance_run_id(
|
|
66
77
|
dag_id=task_instance.dag_id,
|
|
67
78
|
task_id=task_instance.task_id,
|
|
68
79
|
try_number=task_instance.try_number,
|
|
69
|
-
logical_date=
|
|
80
|
+
logical_date=date,
|
|
70
81
|
map_index=task_instance.map_index,
|
|
71
82
|
)
|
|
72
83
|
|
|
@@ -53,35 +53,73 @@ def _get_parent_job_information_as_spark_properties(context: Context) -> dict:
|
|
|
53
53
|
|
|
54
54
|
def _get_transport_information_as_spark_properties() -> dict:
|
|
55
55
|
"""Retrieve transport information as Spark properties."""
|
|
56
|
-
transport = get_openlineage_listener().adapter.get_or_create_openlineage_client().transport
|
|
57
|
-
if transport.kind != "http":
|
|
58
|
-
log.info(
|
|
59
|
-
"OpenLineage transport type `%s` does not support automatic "
|
|
60
|
-
"injection of OpenLineage transport information into Spark properties.",
|
|
61
|
-
transport.kind,
|
|
62
|
-
)
|
|
63
|
-
return {}
|
|
64
|
-
|
|
65
|
-
properties = {
|
|
66
|
-
"spark.openlineage.transport.type": transport.kind,
|
|
67
|
-
"spark.openlineage.transport.url": transport.url,
|
|
68
|
-
"spark.openlineage.transport.endpoint": transport.endpoint,
|
|
69
|
-
"spark.openlineage.transport.timeoutInMillis": str(
|
|
70
|
-
int(transport.timeout * 1000) # convert to milliseconds, as required by Spark integration
|
|
71
|
-
),
|
|
72
|
-
}
|
|
73
|
-
if transport.compression:
|
|
74
|
-
properties["spark.openlineage.transport.compression"] = str(transport.compression)
|
|
75
56
|
|
|
76
|
-
|
|
77
|
-
properties
|
|
78
|
-
|
|
57
|
+
def _get_transport_information(tp) -> dict:
|
|
58
|
+
properties = {
|
|
59
|
+
"type": tp.kind,
|
|
60
|
+
"url": tp.url,
|
|
61
|
+
"endpoint": tp.endpoint,
|
|
62
|
+
"timeoutInMillis": str(
|
|
63
|
+
int(tp.timeout) * 1000 # convert to milliseconds, as required by Spark integration
|
|
64
|
+
),
|
|
65
|
+
}
|
|
66
|
+
if hasattr(tp, "compression") and tp.compression:
|
|
67
|
+
properties["compression"] = str(tp.compression)
|
|
68
|
+
|
|
69
|
+
if hasattr(tp.config.auth, "api_key") and tp.config.auth.get_bearer():
|
|
70
|
+
properties["auth.type"] = "api_key"
|
|
71
|
+
properties["auth.apiKey"] = tp.config.auth.get_bearer()
|
|
72
|
+
|
|
73
|
+
if hasattr(tp.config, "custom_headers") and tp.config.custom_headers:
|
|
74
|
+
for key, value in tp.config.custom_headers.items():
|
|
75
|
+
properties[f"headers.{key}"] = value
|
|
76
|
+
return properties
|
|
77
|
+
|
|
78
|
+
def _format_transport(props: dict, transport: dict, name: str | None):
|
|
79
|
+
for key, value in transport.items():
|
|
80
|
+
if name:
|
|
81
|
+
props[f"spark.openlineage.transport.transports.{name}.{key}"] = value
|
|
82
|
+
else:
|
|
83
|
+
props[f"spark.openlineage.transport.{key}"] = value
|
|
84
|
+
return props
|
|
79
85
|
|
|
80
|
-
|
|
81
|
-
for key, value in transport.config.custom_headers.items():
|
|
82
|
-
properties[f"spark.openlineage.transport.headers.{key}"] = value
|
|
86
|
+
transport = get_openlineage_listener().adapter.get_or_create_openlineage_client().transport
|
|
83
87
|
|
|
84
|
-
|
|
88
|
+
if transport.kind == "composite":
|
|
89
|
+
http_transports = {}
|
|
90
|
+
for nested_transport in transport.transports:
|
|
91
|
+
if nested_transport.kind == "http":
|
|
92
|
+
http_transports[nested_transport.name] = _get_transport_information(nested_transport)
|
|
93
|
+
else:
|
|
94
|
+
name = nested_transport.name if hasattr(nested_transport, "name") else "no-name"
|
|
95
|
+
log.info(
|
|
96
|
+
"OpenLineage transport type `%s` with name `%s` is not supported in composite transport.",
|
|
97
|
+
nested_transport.kind,
|
|
98
|
+
name,
|
|
99
|
+
)
|
|
100
|
+
if len(http_transports) == 0:
|
|
101
|
+
log.warning(
|
|
102
|
+
"OpenLineage transport type `composite` does not contain http transport. Skipping "
|
|
103
|
+
"injection of OpenLineage transport information into Spark properties.",
|
|
104
|
+
)
|
|
105
|
+
return {}
|
|
106
|
+
props = {
|
|
107
|
+
"spark.openlineage.transport.type": "composite",
|
|
108
|
+
"spark.openlineage.transport.continueOnFailure": str(transport.config.continue_on_failure),
|
|
109
|
+
}
|
|
110
|
+
for name, http_transport in http_transports.items():
|
|
111
|
+
props = _format_transport(props, http_transport, name)
|
|
112
|
+
return props
|
|
113
|
+
|
|
114
|
+
elif transport.kind == "http":
|
|
115
|
+
return _format_transport({}, _get_transport_information(transport), None)
|
|
116
|
+
|
|
117
|
+
log.info(
|
|
118
|
+
"OpenLineage transport type `%s` does not support automatic "
|
|
119
|
+
"injection of OpenLineage transport information into Spark properties.",
|
|
120
|
+
transport.kind,
|
|
121
|
+
)
|
|
122
|
+
return {}
|
|
85
123
|
|
|
86
124
|
|
|
87
125
|
def _is_parent_job_information_present_in_spark_properties(properties: dict) -> bool:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-openlineage
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.2rc1
|
|
4
4
|
Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,openlineage,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -20,15 +20,15 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.11
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Topic :: System :: Monitoring
|
|
23
|
-
Requires-Dist: apache-airflow>=2.9.
|
|
24
|
-
Requires-Dist: apache-airflow-providers-common-sql>=1.20.
|
|
25
|
-
Requires-Dist: apache-airflow-providers-common-compat>=1.4.
|
|
23
|
+
Requires-Dist: apache-airflow>=2.9.0rc0
|
|
24
|
+
Requires-Dist: apache-airflow-providers-common-sql>=1.20.0rc0
|
|
25
|
+
Requires-Dist: apache-airflow-providers-common-compat>=1.4.0rc0
|
|
26
26
|
Requires-Dist: attrs>=22.2
|
|
27
27
|
Requires-Dist: openlineage-integration-common>=1.24.2
|
|
28
28
|
Requires-Dist: openlineage-python>=1.24.2
|
|
29
29
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
30
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
31
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
30
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/changelog.html
|
|
31
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2
|
|
32
32
|
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
33
33
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
34
34
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
@@ -59,7 +59,7 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
|
59
59
|
|
|
60
60
|
Package ``apache-airflow-providers-openlineage``
|
|
61
61
|
|
|
62
|
-
Release: ``2.1.
|
|
62
|
+
Release: ``2.1.2``
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
`OpenLineage <https://openlineage.io/>`__
|
|
@@ -72,7 +72,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
|
|
|
72
72
|
are in ``airflow.providers.openlineage`` python package.
|
|
73
73
|
|
|
74
74
|
You can find package information and changelog for the provider
|
|
75
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
75
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/>`_.
|
|
76
76
|
|
|
77
77
|
Installation
|
|
78
78
|
------------
|
|
@@ -101,7 +101,7 @@ Cross provider package dependencies
|
|
|
101
101
|
-----------------------------------
|
|
102
102
|
|
|
103
103
|
Those are dependencies that might be needed in order to use all the features of the package.
|
|
104
|
-
You need to install the specified
|
|
104
|
+
You need to install the specified providers in order to use them.
|
|
105
105
|
|
|
106
106
|
You can install such cross-provider dependencies when installing from PyPI. For example:
|
|
107
107
|
|
|
@@ -118,5 +118,5 @@ Dependent package
|
|
|
118
118
|
================================================================================================================== =================
|
|
119
119
|
|
|
120
120
|
The changelog for the provider package can be found in the
|
|
121
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.
|
|
121
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/changelog.html>`_.
|
|
122
122
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
airflow/providers/openlineage/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
|
|
2
|
-
airflow/providers/openlineage/__init__.py,sha256=
|
|
2
|
+
airflow/providers/openlineage/__init__.py,sha256=z82Hjldc_TSS3Uwai9WOyuJKxfAG5BH4NlVuMbMSc8g,1498
|
|
3
3
|
airflow/providers/openlineage/conf.py,sha256=aYdLU7iHBdGIU8ZAC5iUiIDgXP9gvP9r_z5hTAbXPOU,5535
|
|
4
|
-
airflow/providers/openlineage/get_provider_info.py,sha256=
|
|
4
|
+
airflow/providers/openlineage/get_provider_info.py,sha256=ryuVSqcK5KIwhNtglm59artrNsQbzFjFNu-yfjOGetY,10320
|
|
5
5
|
airflow/providers/openlineage/sqlparser.py,sha256=N38XhkU-lxwxnYevQpq63JOBi4rzp0q56JjxO3H24W8,20340
|
|
6
6
|
airflow/providers/openlineage/version_compat.py,sha256=aHg90_DtgoSnQvILFICexMyNlHlALBdaeWqkX3dFDug,1605
|
|
7
7
|
airflow/providers/openlineage/extractors/__init__.py,sha256=I0X4f6zUniclyD9zT0DFHRImpCpJVP4MkPJT3cd7X5I,1081
|
|
@@ -18,15 +18,15 @@ airflow/providers/openlineage/facets/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOF
|
|
|
18
18
|
airflow/providers/openlineage/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
19
19
|
airflow/providers/openlineage/plugins/adapter.py,sha256=799jV2xIyD0Eu1nRIT9CwzmQqRv-3BZXP1IOMJzSryo,20325
|
|
20
20
|
airflow/providers/openlineage/plugins/facets.py,sha256=VvyMYR6ONkC95q5FdNmohv0scbA1Ej_B5cQ97as5GvA,4161
|
|
21
|
-
airflow/providers/openlineage/plugins/listener.py,sha256=
|
|
22
|
-
airflow/providers/openlineage/plugins/macros.py,sha256=
|
|
21
|
+
airflow/providers/openlineage/plugins/listener.py,sha256=yjyLbXJGbZEFMXmj5XtTeQFxfmFl-R2vHawGeUAue-Y,25983
|
|
22
|
+
airflow/providers/openlineage/plugins/macros.py,sha256=qrHLjE95Uq8H-W9CIkQe5Y9Pu1O-GErhpDV2olGaGQM,3730
|
|
23
23
|
airflow/providers/openlineage/plugins/openlineage.py,sha256=HD3mYNPfXd-buZydEpuAY-naVBXhausU2LYUNhL48QA,1906
|
|
24
24
|
airflow/providers/openlineage/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
25
25
|
airflow/providers/openlineage/utils/selective_enable.py,sha256=ZJUH_iS0thup2qYAVcjOgNIru6E8bKc56_pNQHuc8Fg,3451
|
|
26
|
-
airflow/providers/openlineage/utils/spark.py,sha256
|
|
26
|
+
airflow/providers/openlineage/utils/spark.py,sha256=-2XfUaV0WISK6vHSBmB9E78xkuPjO3fM1tDQCZG7j9I,7303
|
|
27
27
|
airflow/providers/openlineage/utils/sql.py,sha256=vkKrrdENEMVG8gtzV6yuTXMa2Z9fBAEXmxDVIDaVncI,9571
|
|
28
28
|
airflow/providers/openlineage/utils/utils.py,sha256=Ww7Aryq4gfWbSf2ittdNTn_ksaxnD4P_xiTlKUy2Okc,28787
|
|
29
|
-
apache_airflow_providers_openlineage-2.1.
|
|
30
|
-
apache_airflow_providers_openlineage-2.1.
|
|
31
|
-
apache_airflow_providers_openlineage-2.1.
|
|
32
|
-
apache_airflow_providers_openlineage-2.1.
|
|
29
|
+
apache_airflow_providers_openlineage-2.1.2rc1.dist-info/entry_points.txt,sha256=GAx0_i2OeZzqaiiiYuA-xchICDXiCT5kVqpKSxsOjt4,214
|
|
30
|
+
apache_airflow_providers_openlineage-2.1.2rc1.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
|
|
31
|
+
apache_airflow_providers_openlineage-2.1.2rc1.dist-info/METADATA,sha256=jy4lwp2jQarfvdobMK_lbkoklCPvanEdJe7ezzPFdZE,5697
|
|
32
|
+
apache_airflow_providers_openlineage-2.1.2rc1.dist-info/RECORD,,
|
|
File without changes
|