apache-airflow-providers-openlineage 2.1.2b1__tar.gz → 2.1.2rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.

Files changed (31) hide show
  1. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/PKG-INFO +10 -10
  2. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/README.rst +4 -4
  3. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/pyproject.toml +7 -7
  4. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/__init__.py +1 -1
  5. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/get_provider_info.py +1 -1
  6. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/plugins/listener.py +4 -3
  7. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/plugins/macros.py +15 -4
  8. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/utils/spark.py +64 -26
  9. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/LICENSE +0 -0
  10. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/conf.py +0 -0
  11. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/extractors/__init__.py +0 -0
  12. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/extractors/base.py +0 -0
  13. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/extractors/bash.py +0 -0
  14. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/extractors/manager.py +0 -0
  15. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/extractors/python.py +0 -0
  16. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/facets/AirflowDagRunFacet.json +0 -0
  17. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/facets/AirflowDebugRunFacet.json +0 -0
  18. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/facets/AirflowJobFacet.json +0 -0
  19. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/facets/AirflowRunFacet.json +0 -0
  20. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/facets/AirflowStateRunFacet.json +0 -0
  21. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/facets/__init__.py +0 -0
  22. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/plugins/__init__.py +0 -0
  23. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/plugins/adapter.py +0 -0
  24. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/plugins/facets.py +0 -0
  25. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/plugins/openlineage.py +0 -0
  26. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/sqlparser.py +0 -0
  27. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/utils/__init__.py +0 -0
  28. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/utils/selective_enable.py +0 -0
  29. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/utils/sql.py +0 -0
  30. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/utils/utils.py +0 -0
  31. {apache_airflow_providers_openlineage-2.1.2b1 → apache_airflow_providers_openlineage-2.1.2rc1}/src/airflow/providers/openlineage/version_compat.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apache-airflow-providers-openlineage
3
- Version: 2.1.2b1
3
+ Version: 2.1.2rc1
4
4
  Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
5
5
  Keywords: airflow-provider,openlineage,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -20,15 +20,15 @@ Classifier: Programming Language :: Python :: 3.10
20
20
  Classifier: Programming Language :: Python :: 3.11
21
21
  Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Topic :: System :: Monitoring
23
- Requires-Dist: apache-airflow>=2.9.0
24
- Requires-Dist: apache-airflow-providers-common-sql>=1.20.0
25
- Requires-Dist: apache-airflow-providers-common-compat>=1.4.0
23
+ Requires-Dist: apache-airflow>=2.9.0rc0
24
+ Requires-Dist: apache-airflow-providers-common-sql>=1.20.0rc0
25
+ Requires-Dist: apache-airflow-providers-common-compat>=1.4.0rc0
26
26
  Requires-Dist: attrs>=22.2
27
27
  Requires-Dist: openlineage-integration-common>=1.24.2
28
28
  Requires-Dist: openlineage-python>=1.24.2
29
29
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
30
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1/changelog.html
31
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1
30
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/changelog.html
31
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2
32
32
  Project-URL: Mastodon, https://fosstodon.org/@airflow
33
33
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
34
34
  Project-URL: Source Code, https://github.com/apache/airflow
@@ -59,7 +59,7 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
59
59
 
60
60
  Package ``apache-airflow-providers-openlineage``
61
61
 
62
- Release: ``2.1.2b1``
62
+ Release: ``2.1.2``
63
63
 
64
64
 
65
65
  `OpenLineage <https://openlineage.io/>`__
@@ -72,7 +72,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
72
72
  are in ``airflow.providers.openlineage`` python package.
73
73
 
74
74
  You can find package information and changelog for the provider
75
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1/>`_.
75
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/>`_.
76
76
 
77
77
  Installation
78
78
  ------------
@@ -101,7 +101,7 @@ Cross provider package dependencies
101
101
  -----------------------------------
102
102
 
103
103
  Those are dependencies that might be needed in order to use all the features of the package.
104
- You need to install the specified provider packages in order to use them.
104
+ You need to install the specified providers in order to use them.
105
105
 
106
106
  You can install such cross-provider dependencies when installing from PyPI. For example:
107
107
 
@@ -118,5 +118,5 @@ Dependent package
118
118
  ================================================================================================================== =================
119
119
 
120
120
  The changelog for the provider package can be found in the
121
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1/changelog.html>`_.
121
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/changelog.html>`_.
122
122
 
@@ -23,7 +23,7 @@
23
23
 
24
24
  Package ``apache-airflow-providers-openlineage``
25
25
 
26
- Release: ``2.1.2b1``
26
+ Release: ``2.1.2``
27
27
 
28
28
 
29
29
  `OpenLineage <https://openlineage.io/>`__
@@ -36,7 +36,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
36
36
  are in ``airflow.providers.openlineage`` python package.
37
37
 
38
38
  You can find package information and changelog for the provider
39
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1/>`_.
39
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/>`_.
40
40
 
41
41
  Installation
42
42
  ------------
@@ -65,7 +65,7 @@ Cross provider package dependencies
65
65
  -----------------------------------
66
66
 
67
67
  Those are dependencies that might be needed in order to use all the features of the package.
68
- You need to install the specified provider packages in order to use them.
68
+ You need to install the specified providers in order to use them.
69
69
 
70
70
  You can install such cross-provider dependencies when installing from PyPI. For example:
71
71
 
@@ -82,4 +82,4 @@ Dependent package
82
82
  ================================================================================================================== =================
83
83
 
84
84
  The changelog for the provider package can be found in the
85
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1/changelog.html>`_.
85
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/changelog.html>`_.
@@ -20,12 +20,12 @@
20
20
  # IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE
21
21
  # `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY
22
22
  [build-system]
23
- requires = ["flit_core==3.11.0"]
23
+ requires = ["flit_core==3.12.0"]
24
24
  build-backend = "flit_core.buildapi"
25
25
 
26
26
  [project]
27
27
  name = "apache-airflow-providers-openlineage"
28
- version = "2.1.2b1"
28
+ version = "2.1.2.rc1"
29
29
  description = "Provider package apache-airflow-providers-openlineage for Apache Airflow"
30
30
  readme = "README.rst"
31
31
  authors = [
@@ -57,9 +57,9 @@ requires-python = "~=3.9"
57
57
  # Make sure to run ``breeze static-checks --type update-providers-dependencies --all-files``
58
58
  # After you modify the dependencies, and rebuild your Breeze CI image with ``breeze ci-image build``
59
59
  dependencies = [
60
- "apache-airflow>=2.9.0",
61
- "apache-airflow-providers-common-sql>=1.20.0",
62
- "apache-airflow-providers-common-compat>=1.4.0",
60
+ "apache-airflow>=2.9.0rc0",
61
+ "apache-airflow-providers-common-sql>=1.20.0rc0",
62
+ "apache-airflow-providers-common-compat>=1.4.0rc0",
63
63
  "attrs>=22.2",
64
64
  "openlineage-integration-common>=1.24.2",
65
65
  "openlineage-python>=1.24.2",
@@ -87,8 +87,8 @@ apache-airflow-providers-fab = {workspace = true}
87
87
  apache-airflow-providers-standard = {workspace = true}
88
88
 
89
89
  [project.urls]
90
- "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1"
91
- "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2b1/changelog.html"
90
+ "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2"
91
+ "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-openlineage/2.1.2/changelog.html"
92
92
  "Bug Tracker" = "https://github.com/apache/airflow/issues"
93
93
  "Source Code" = "https://github.com/apache/airflow"
94
94
  "Slack Chat" = "https://s.apache.org/airflow-slack"
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "2.1.2b1"
32
+ __version__ = "2.1.2"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
35
  "2.9.0"
@@ -29,7 +29,7 @@ def get_provider_info():
29
29
  "state": "ready",
30
30
  "source-date-epoch": 1742478177,
31
31
  "versions": [
32
- "2.1.2b1",
32
+ "2.1.2",
33
33
  "2.1.1",
34
34
  "2.1.0",
35
35
  "2.0.0",
@@ -69,15 +69,16 @@ def _get_try_number_success(val):
69
69
 
70
70
  def _executor_initializer():
71
71
  """
72
- Initialize worker processes for the executor used for DagRun listener.
72
+ Initialize processes for the executor used with DAGRun listener's methods (on scheduler).
73
73
 
74
74
  This function must be picklable, so it cannot be defined as an inner method or local function.
75
75
 
76
76
  Reconfigures the ORM engine to prevent issues that arise when multiple processes interact with
77
77
  the Airflow database.
78
78
  """
79
- if not AIRFLOW_V_3_0_PLUS:
80
- settings.configure_orm()
79
+ # This initializer is used only on the scheduler
80
+ # We can configure_orm regardless of the Airflow version, as DB access is always allowed from scheduler.
81
+ settings.configure_orm()
81
82
 
82
83
 
83
84
  class OpenLineageListener:
@@ -21,6 +21,7 @@ from typing import TYPE_CHECKING
21
21
  from airflow.providers.openlineage import conf
22
22
  from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter
23
23
  from airflow.providers.openlineage.utils.utils import get_job_name
24
+ from airflow.providers.openlineage.version_compat import AIRFLOW_V_3_0_PLUS
24
25
 
25
26
  if TYPE_CHECKING:
26
27
  from airflow.models import TaskInstance
@@ -58,15 +59,25 @@ def lineage_run_id(task_instance: TaskInstance):
58
59
  For more information take a look at the guide:
59
60
  :ref:`howto/macros:openlineage`
60
61
  """
61
- if hasattr(task_instance, "logical_date"):
62
- logical_date = task_instance.logical_date
62
+ if AIRFLOW_V_3_0_PLUS:
63
+ context = task_instance.get_template_context()
64
+ if hasattr(task_instance, "dag_run"):
65
+ dag_run = task_instance.dag_run
66
+ elif hasattr(context, "dag_run"):
67
+ dag_run = context["dag_run"]
68
+ if hasattr(dag_run, "logical_date") and dag_run.logical_date:
69
+ date = dag_run.logical_date
70
+ else:
71
+ date = dag_run.run_after
72
+ elif hasattr(task_instance, "logical_date"):
73
+ date = task_instance.logical_date
63
74
  else:
64
- logical_date = task_instance.execution_date
75
+ date = task_instance.execution_date
65
76
  return OpenLineageAdapter.build_task_instance_run_id(
66
77
  dag_id=task_instance.dag_id,
67
78
  task_id=task_instance.task_id,
68
79
  try_number=task_instance.try_number,
69
- logical_date=logical_date,
80
+ logical_date=date,
70
81
  map_index=task_instance.map_index,
71
82
  )
72
83
 
@@ -53,35 +53,73 @@ def _get_parent_job_information_as_spark_properties(context: Context) -> dict:
53
53
 
54
54
  def _get_transport_information_as_spark_properties() -> dict:
55
55
  """Retrieve transport information as Spark properties."""
56
- transport = get_openlineage_listener().adapter.get_or_create_openlineage_client().transport
57
- if transport.kind != "http":
58
- log.info(
59
- "OpenLineage transport type `%s` does not support automatic "
60
- "injection of OpenLineage transport information into Spark properties.",
61
- transport.kind,
62
- )
63
- return {}
64
-
65
- properties = {
66
- "spark.openlineage.transport.type": transport.kind,
67
- "spark.openlineage.transport.url": transport.url,
68
- "spark.openlineage.transport.endpoint": transport.endpoint,
69
- "spark.openlineage.transport.timeoutInMillis": str(
70
- int(transport.timeout * 1000) # convert to milliseconds, as required by Spark integration
71
- ),
72
- }
73
- if transport.compression:
74
- properties["spark.openlineage.transport.compression"] = str(transport.compression)
75
56
 
76
- if hasattr(transport.config.auth, "api_key") and transport.config.auth.get_bearer():
77
- properties["spark.openlineage.transport.auth.type"] = "api_key"
78
- properties["spark.openlineage.transport.auth.apiKey"] = transport.config.auth.get_bearer()
57
+ def _get_transport_information(tp) -> dict:
58
+ properties = {
59
+ "type": tp.kind,
60
+ "url": tp.url,
61
+ "endpoint": tp.endpoint,
62
+ "timeoutInMillis": str(
63
+ int(tp.timeout) * 1000 # convert to milliseconds, as required by Spark integration
64
+ ),
65
+ }
66
+ if hasattr(tp, "compression") and tp.compression:
67
+ properties["compression"] = str(tp.compression)
68
+
69
+ if hasattr(tp.config.auth, "api_key") and tp.config.auth.get_bearer():
70
+ properties["auth.type"] = "api_key"
71
+ properties["auth.apiKey"] = tp.config.auth.get_bearer()
72
+
73
+ if hasattr(tp.config, "custom_headers") and tp.config.custom_headers:
74
+ for key, value in tp.config.custom_headers.items():
75
+ properties[f"headers.{key}"] = value
76
+ return properties
77
+
78
+ def _format_transport(props: dict, transport: dict, name: str | None):
79
+ for key, value in transport.items():
80
+ if name:
81
+ props[f"spark.openlineage.transport.transports.{name}.{key}"] = value
82
+ else:
83
+ props[f"spark.openlineage.transport.{key}"] = value
84
+ return props
79
85
 
80
- if hasattr(transport.config, "custom_headers") and transport.config.custom_headers:
81
- for key, value in transport.config.custom_headers.items():
82
- properties[f"spark.openlineage.transport.headers.{key}"] = value
86
+ transport = get_openlineage_listener().adapter.get_or_create_openlineage_client().transport
83
87
 
84
- return properties
88
+ if transport.kind == "composite":
89
+ http_transports = {}
90
+ for nested_transport in transport.transports:
91
+ if nested_transport.kind == "http":
92
+ http_transports[nested_transport.name] = _get_transport_information(nested_transport)
93
+ else:
94
+ name = nested_transport.name if hasattr(nested_transport, "name") else "no-name"
95
+ log.info(
96
+ "OpenLineage transport type `%s` with name `%s` is not supported in composite transport.",
97
+ nested_transport.kind,
98
+ name,
99
+ )
100
+ if len(http_transports) == 0:
101
+ log.warning(
102
+ "OpenLineage transport type `composite` does not contain http transport. Skipping "
103
+ "injection of OpenLineage transport information into Spark properties.",
104
+ )
105
+ return {}
106
+ props = {
107
+ "spark.openlineage.transport.type": "composite",
108
+ "spark.openlineage.transport.continueOnFailure": str(transport.config.continue_on_failure),
109
+ }
110
+ for name, http_transport in http_transports.items():
111
+ props = _format_transport(props, http_transport, name)
112
+ return props
113
+
114
+ elif transport.kind == "http":
115
+ return _format_transport({}, _get_transport_information(transport), None)
116
+
117
+ log.info(
118
+ "OpenLineage transport type `%s` does not support automatic "
119
+ "injection of OpenLineage transport information into Spark properties.",
120
+ transport.kind,
121
+ )
122
+ return {}
85
123
 
86
124
 
87
125
  def _is_parent_job_information_present_in_spark_properties(properties: dict) -> bool: