apache-airflow-providers-databricks 7.5.0rc1__tar.gz → 7.6.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-databricks might be problematic. Click here for more details.
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/PKG-INFO +17 -16
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/README.rst +10 -9
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/changelog.rst +35 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/index.rst +9 -8
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/provider.yaml +2 -1
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/pyproject.toml +7 -7
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/__init__.py +1 -1
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/hooks/databricks.py +35 -31
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/hooks/databricks_base.py +16 -9
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/hooks/databricks_sql.py +13 -14
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/operators/databricks.py +21 -6
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/operators/databricks_repos.py +1 -1
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/operators/databricks_sql.py +1 -1
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/operators/databricks_workflow.py +21 -2
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/plugins/databricks_workflow.py +118 -57
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/sensors/databricks.py +2 -2
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/sensors/databricks_partition.py +8 -3
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/sensors/databricks_sql.py +8 -3
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/utils/openlineage.py +85 -58
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/version_compat.py +10 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/hooks/test_databricks.py +263 -216
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/hooks/test_databricks_azure_workload_identity.py +13 -10
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/hooks/test_databricks_azure_workload_identity_async.py +13 -10
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/hooks/test_databricks_sql.py +29 -13
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/operators/test_databricks.py +0 -4
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/operators/test_databricks_copy.py +10 -8
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/operators/test_databricks_workflow.py +1 -2
- apache_airflow_providers_databricks-7.6.0rc1/tests/unit/databricks/plugins/test_databricks_workflow.py +459 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/triggers/test_databricks.py +24 -17
- apache_airflow_providers_databricks-7.6.0rc1/tests/unit/databricks/utils/test_openlineage.py +1223 -0
- apache_airflow_providers_databricks-7.5.0rc1/tests/unit/databricks/plugins/test_databricks_workflow.py +0 -274
- apache_airflow_providers_databricks-7.5.0rc1/tests/unit/databricks/utils/test_openlineage.py +0 -617
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/.latest-doc-only-change.txt +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/commits.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/conf.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/connections/databricks.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/img/databricks_workflow_task_group_airflow_graph_view.png +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/img/workflow_plugin_launch_task.png +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/img/workflow_plugin_single_task.png +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/img/workflow_run_databricks_graph_view.png +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/installing-providers-from-sources.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/integration-logos/Databricks.png +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/operators/copy_into.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/operators/index.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/operators/jobs_create.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/operators/notebook.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/operators/repos_create.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/operators/repos_delete.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/operators/repos_update.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/operators/run_now.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/operators/sql.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/operators/sql_statements.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/operators/submit_run.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/operators/task.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/operators/workflow.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/plugins/index.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/plugins/workflow.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/docs/security.rst +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/LICENSE +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/exceptions.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/get_provider_info.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/hooks/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/operators/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/plugins/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/sensors/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/triggers/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/triggers/databricks.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/utils/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/utils/databricks.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/src/airflow/providers/databricks/utils/mixins.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/conftest.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/system/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/system/databricks/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/system/databricks/example_databricks.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/system/databricks/example_databricks_repos.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/system/databricks/example_databricks_sensors.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/system/databricks/example_databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/system/databricks/example_databricks_workflow.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/hooks/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/hooks/test_databricks_base.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/operators/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/operators/test_databricks_repos.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/operators/test_databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/plugins/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/sensors/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/sensors/test_databricks.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/sensors/test_databricks_partition.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/sensors/test_databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/test_exceptions.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/triggers/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/utils/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/utils/test_databricks.py +0 -0
- {apache_airflow_providers_databricks-7.5.0rc1 → apache_airflow_providers_databricks-7.6.0rc1}/tests/unit/databricks/utils/test_mixins.py +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version: 7.
|
|
3
|
+
Version: 7.6.0rc1
|
|
4
4
|
Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,databricks,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
7
7
|
Maintainer-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
8
|
-
Requires-Python: ~=3.
|
|
8
|
+
Requires-Python: ~=3.10
|
|
9
9
|
Description-Content-Type: text/x-rst
|
|
10
10
|
Classifier: Development Status :: 5 - Production/Stable
|
|
11
11
|
Classifier: Environment :: Console
|
|
@@ -15,7 +15,6 @@ Classifier: Intended Audience :: System Administrators
|
|
|
15
15
|
Classifier: Framework :: Apache Airflow
|
|
16
16
|
Classifier: Framework :: Apache Airflow :: Provider
|
|
17
17
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.10
|
|
20
19
|
Classifier: Programming Language :: Python :: 3.11
|
|
21
20
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -28,16 +27,17 @@ Requires-Dist: databricks-sql-connector>=3.0.0
|
|
|
28
27
|
Requires-Dist: databricks-sqlalchemy>=1.0.2
|
|
29
28
|
Requires-Dist: aiohttp>=3.9.2, <4
|
|
30
29
|
Requires-Dist: mergedeep>=1.3.4
|
|
31
|
-
Requires-Dist: pandas>=2.1.2
|
|
32
|
-
Requires-Dist:
|
|
30
|
+
Requires-Dist: pandas>=2.1.2; python_version <"3.13"
|
|
31
|
+
Requires-Dist: pandas>=2.2.3; python_version >="3.13"
|
|
32
|
+
Requires-Dist: pyarrow>=16.1.0
|
|
33
33
|
Requires-Dist: azure-identity>=1.3.1 ; extra == "azure-identity"
|
|
34
34
|
Requires-Dist: apache-airflow-providers-fab ; extra == "fab"
|
|
35
35
|
Requires-Dist: apache-airflow-providers-openlineage>=2.3.0rc1 ; extra == "openlineage"
|
|
36
36
|
Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
|
|
37
37
|
Requires-Dist: apache-airflow-providers-standard ; extra == "standard"
|
|
38
38
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
39
|
-
Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
40
|
-
Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
39
|
+
Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.6.0/changelog.html
|
|
40
|
+
Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.6.0
|
|
41
41
|
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
42
42
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
43
43
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
@@ -73,7 +73,7 @@ Provides-Extra: standard
|
|
|
73
73
|
|
|
74
74
|
Package ``apache-airflow-providers-databricks``
|
|
75
75
|
|
|
76
|
-
Release: ``7.
|
|
76
|
+
Release: ``7.6.0``
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
`Databricks <https://databricks.com/>`__
|
|
@@ -86,7 +86,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
86
86
|
are in ``airflow.providers.databricks`` python package.
|
|
87
87
|
|
|
88
88
|
You can find package information and changelog for the provider
|
|
89
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
89
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.6.0/>`_.
|
|
90
90
|
|
|
91
91
|
Installation
|
|
92
92
|
------------
|
|
@@ -95,14 +95,14 @@ You can install this package on top of an existing Airflow 2 installation (see `
|
|
|
95
95
|
for the minimum Airflow version supported) via
|
|
96
96
|
``pip install apache-airflow-providers-databricks``
|
|
97
97
|
|
|
98
|
-
The package supports the following python versions: 3.
|
|
98
|
+
The package supports the following python versions: 3.10,3.11,3.12
|
|
99
99
|
|
|
100
100
|
Requirements
|
|
101
101
|
------------
|
|
102
102
|
|
|
103
|
-
==========================================
|
|
103
|
+
========================================== =====================================
|
|
104
104
|
PIP package Version required
|
|
105
|
-
==========================================
|
|
105
|
+
========================================== =====================================
|
|
106
106
|
``apache-airflow`` ``>=2.10.0``
|
|
107
107
|
``apache-airflow-providers-common-compat`` ``>=1.6.0``
|
|
108
108
|
``apache-airflow-providers-common-sql`` ``>=1.27.0``
|
|
@@ -111,9 +111,10 @@ PIP package Version required
|
|
|
111
111
|
``databricks-sqlalchemy`` ``>=1.0.2``
|
|
112
112
|
``aiohttp`` ``>=3.9.2,<4``
|
|
113
113
|
``mergedeep`` ``>=1.3.4``
|
|
114
|
-
``pandas`` ``>=2.1.2
|
|
115
|
-
``
|
|
116
|
-
|
|
114
|
+
``pandas`` ``>=2.1.2; python_version < "3.13"``
|
|
115
|
+
``pandas`` ``>=2.2.3; python_version >= "3.13"``
|
|
116
|
+
``pyarrow`` ``>=16.1.0``
|
|
117
|
+
========================================== =====================================
|
|
117
118
|
|
|
118
119
|
Cross provider package dependencies
|
|
119
120
|
-----------------------------------
|
|
@@ -138,5 +139,5 @@ Dependent package
|
|
|
138
139
|
================================================================================================================== =================
|
|
139
140
|
|
|
140
141
|
The changelog for the provider package can be found in the
|
|
141
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
142
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.6.0/changelog.html>`_.
|
|
142
143
|
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
|
|
24
24
|
Package ``apache-airflow-providers-databricks``
|
|
25
25
|
|
|
26
|
-
Release: ``7.
|
|
26
|
+
Release: ``7.6.0``
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
`Databricks <https://databricks.com/>`__
|
|
@@ -36,7 +36,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
36
36
|
are in ``airflow.providers.databricks`` python package.
|
|
37
37
|
|
|
38
38
|
You can find package information and changelog for the provider
|
|
39
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
39
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.6.0/>`_.
|
|
40
40
|
|
|
41
41
|
Installation
|
|
42
42
|
------------
|
|
@@ -45,14 +45,14 @@ You can install this package on top of an existing Airflow 2 installation (see `
|
|
|
45
45
|
for the minimum Airflow version supported) via
|
|
46
46
|
``pip install apache-airflow-providers-databricks``
|
|
47
47
|
|
|
48
|
-
The package supports the following python versions: 3.
|
|
48
|
+
The package supports the following python versions: 3.10,3.11,3.12
|
|
49
49
|
|
|
50
50
|
Requirements
|
|
51
51
|
------------
|
|
52
52
|
|
|
53
|
-
==========================================
|
|
53
|
+
========================================== =====================================
|
|
54
54
|
PIP package Version required
|
|
55
|
-
==========================================
|
|
55
|
+
========================================== =====================================
|
|
56
56
|
``apache-airflow`` ``>=2.10.0``
|
|
57
57
|
``apache-airflow-providers-common-compat`` ``>=1.6.0``
|
|
58
58
|
``apache-airflow-providers-common-sql`` ``>=1.27.0``
|
|
@@ -61,9 +61,10 @@ PIP package Version required
|
|
|
61
61
|
``databricks-sqlalchemy`` ``>=1.0.2``
|
|
62
62
|
``aiohttp`` ``>=3.9.2,<4``
|
|
63
63
|
``mergedeep`` ``>=1.3.4``
|
|
64
|
-
``pandas`` ``>=2.1.2
|
|
65
|
-
``
|
|
66
|
-
|
|
64
|
+
``pandas`` ``>=2.1.2; python_version < "3.13"``
|
|
65
|
+
``pandas`` ``>=2.2.3; python_version >= "3.13"``
|
|
66
|
+
``pyarrow`` ``>=16.1.0``
|
|
67
|
+
========================================== =====================================
|
|
67
68
|
|
|
68
69
|
Cross provider package dependencies
|
|
69
70
|
-----------------------------------
|
|
@@ -88,4 +89,4 @@ Dependent package
|
|
|
88
89
|
================================================================================================================== =================
|
|
89
90
|
|
|
90
91
|
The changelog for the provider package can be found in the
|
|
91
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
92
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.6.0/changelog.html>`_.
|
|
@@ -26,6 +26,41 @@
|
|
|
26
26
|
Changelog
|
|
27
27
|
---------
|
|
28
28
|
|
|
29
|
+
7.6.0
|
|
30
|
+
.....
|
|
31
|
+
|
|
32
|
+
Features
|
|
33
|
+
~~~~~~~~
|
|
34
|
+
|
|
35
|
+
* ``Refactor Databricks hook to use HTTP method constants and auto-prepend api/ to endpoint paths (#52385)``
|
|
36
|
+
|
|
37
|
+
Bug Fixes
|
|
38
|
+
~~~~~~~~~
|
|
39
|
+
|
|
40
|
+
* ``Fix: Unclosed aiohttp ClientSession and TCPConnector in DatabricksRunNowOperator (deferrable=True) (#52119)``
|
|
41
|
+
|
|
42
|
+
Misc
|
|
43
|
+
~~~~
|
|
44
|
+
|
|
45
|
+
* ``Move 'BaseHook' implementation to task SDK (#51873)``
|
|
46
|
+
* ``Disable UP038 ruff rule and revert mandatory 'X | Y' in insintance checks (#52644)``
|
|
47
|
+
* ``Bump pyarrow to 16.1.0 minimum version for several providers (#52635)``
|
|
48
|
+
* ``Upgrade ruff to latest version (0.12.1) (#52562)``
|
|
49
|
+
* ``feat: Add explicit support for DatabricksHook to Openlineage helper (#52253)``
|
|
50
|
+
* ``Replace 'models.BaseOperator' to Task SDK one for DBT & Databricks (#52377)``
|
|
51
|
+
* ``Drop support for Python 3.9 (#52072)``
|
|
52
|
+
* ``Use BaseSensorOperator from task sdk in providers (#52296)``
|
|
53
|
+
* ``Enable DatabricksJobRunLink for Databricks plugin, skip provide_session usage in Airflow3 (#52228)``
|
|
54
|
+
* ``Add deprecation to 'airflow/sensors/base.py' (#52249)``
|
|
55
|
+
* ``Bump upper binding on pandas in all providers (#52060)``
|
|
56
|
+
|
|
57
|
+
.. Below changes are excluded from the changelog. Move them to
|
|
58
|
+
appropriate section above if needed. Do not delete the lines(!):
|
|
59
|
+
* ``Make sure all test version imports come from test_common (#52425)``
|
|
60
|
+
* ``Clean up messy default connection overrides in provider tests (#52137)``
|
|
61
|
+
* ``Remove pytest.mark.db_test where possible from databricks provider. (#52033)``
|
|
62
|
+
* ``Relax databricks test a bit to support compatibility with older version of it (#51787)``
|
|
63
|
+
|
|
29
64
|
7.5.0
|
|
30
65
|
.....
|
|
31
66
|
|
|
@@ -78,7 +78,7 @@ apache-airflow-providers-databricks package
|
|
|
78
78
|
`Databricks <https://databricks.com/>`__
|
|
79
79
|
|
|
80
80
|
|
|
81
|
-
Release: 7.
|
|
81
|
+
Release: 7.6.0
|
|
82
82
|
|
|
83
83
|
Provider package
|
|
84
84
|
----------------
|
|
@@ -98,9 +98,9 @@ Requirements
|
|
|
98
98
|
|
|
99
99
|
The minimum Apache Airflow version supported by this provider distribution is ``2.10.0``.
|
|
100
100
|
|
|
101
|
-
==========================================
|
|
101
|
+
========================================== =====================================
|
|
102
102
|
PIP package Version required
|
|
103
|
-
==========================================
|
|
103
|
+
========================================== =====================================
|
|
104
104
|
``apache-airflow`` ``>=2.10.0``
|
|
105
105
|
``apache-airflow-providers-common-compat`` ``>=1.6.0``
|
|
106
106
|
``apache-airflow-providers-common-sql`` ``>=1.27.0``
|
|
@@ -109,9 +109,10 @@ PIP package Version required
|
|
|
109
109
|
``databricks-sqlalchemy`` ``>=1.0.2``
|
|
110
110
|
``aiohttp`` ``>=3.9.2,<4``
|
|
111
111
|
``mergedeep`` ``>=1.3.4``
|
|
112
|
-
``pandas`` ``>=2.1.2
|
|
113
|
-
``
|
|
114
|
-
|
|
112
|
+
``pandas`` ``>=2.1.2; python_version < "3.13"``
|
|
113
|
+
``pandas`` ``>=2.2.3; python_version >= "3.13"``
|
|
114
|
+
``pyarrow`` ``>=16.1.0``
|
|
115
|
+
========================================== =====================================
|
|
115
116
|
|
|
116
117
|
Cross provider package dependencies
|
|
117
118
|
-----------------------------------
|
|
@@ -141,5 +142,5 @@ Downloading official packages
|
|
|
141
142
|
You can download officially released packages and verify their checksums and signatures from the
|
|
142
143
|
`Official Apache Download site <https://downloads.apache.org/airflow/providers/>`_
|
|
143
144
|
|
|
144
|
-
* `The apache-airflow-providers-databricks 7.
|
|
145
|
-
* `The apache-airflow-providers-databricks 7.
|
|
145
|
+
* `The apache-airflow-providers-databricks 7.6.0 sdist package <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.6.0.tar.gz>`_ (`asc <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.6.0.tar.gz.asc>`__, `sha512 <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.6.0.tar.gz.sha512>`__)
|
|
146
|
+
* `The apache-airflow-providers-databricks 7.6.0 wheel package <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.6.0-py3-none-any.whl>`_ (`asc <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.6.0-py3-none-any.whl.asc>`__, `sha512 <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.6.0-py3-none-any.whl.sha512>`__)
|
|
@@ -22,12 +22,13 @@ description: |
|
|
|
22
22
|
`Databricks <https://databricks.com/>`__
|
|
23
23
|
|
|
24
24
|
state: ready
|
|
25
|
-
source-date-epoch:
|
|
25
|
+
source-date-epoch: 1751473030
|
|
26
26
|
# Note that those versions are maintained by release manager - do not update them manually
|
|
27
27
|
# with the exception of case where other provider in sources has >= new provider version.
|
|
28
28
|
# In such case adding >= NEW_VERSION and bumping to NEW_VERSION in a provider have
|
|
29
29
|
# to be done in the same PR
|
|
30
30
|
versions:
|
|
31
|
+
- 7.6.0
|
|
31
32
|
- 7.5.0
|
|
32
33
|
- 7.4.0
|
|
33
34
|
- 7.3.2
|
|
@@ -25,7 +25,7 @@ build-backend = "flit_core.buildapi"
|
|
|
25
25
|
|
|
26
26
|
[project]
|
|
27
27
|
name = "apache-airflow-providers-databricks"
|
|
28
|
-
version = "7.
|
|
28
|
+
version = "7.6.0rc1"
|
|
29
29
|
description = "Provider package apache-airflow-providers-databricks for Apache Airflow"
|
|
30
30
|
readme = "README.rst"
|
|
31
31
|
authors = [
|
|
@@ -44,13 +44,12 @@ classifiers = [
|
|
|
44
44
|
"Framework :: Apache Airflow",
|
|
45
45
|
"Framework :: Apache Airflow :: Provider",
|
|
46
46
|
"License :: OSI Approved :: Apache Software License",
|
|
47
|
-
"Programming Language :: Python :: 3.9",
|
|
48
47
|
"Programming Language :: Python :: 3.10",
|
|
49
48
|
"Programming Language :: Python :: 3.11",
|
|
50
49
|
"Programming Language :: Python :: 3.12",
|
|
51
50
|
"Topic :: System :: Monitoring",
|
|
52
51
|
]
|
|
53
|
-
requires-python = "~=3.
|
|
52
|
+
requires-python = "~=3.10"
|
|
54
53
|
|
|
55
54
|
# The dependencies should be modified in place in the generated file.
|
|
56
55
|
# Any change in the dependencies is preserved when the file is regenerated
|
|
@@ -65,8 +64,9 @@ dependencies = [
|
|
|
65
64
|
"databricks-sqlalchemy>=1.0.2",
|
|
66
65
|
"aiohttp>=3.9.2, <4",
|
|
67
66
|
"mergedeep>=1.3.4",
|
|
68
|
-
|
|
69
|
-
|
|
67
|
+
'pandas>=2.1.2; python_version <"3.13"',
|
|
68
|
+
'pandas>=2.2.3; python_version >="3.13"',
|
|
69
|
+
"pyarrow>=16.1.0",
|
|
70
70
|
]
|
|
71
71
|
|
|
72
72
|
# The optional dependencies should be modified in place in the generated file
|
|
@@ -130,8 +130,8 @@ apache-airflow-providers-common-sql = {workspace = true}
|
|
|
130
130
|
apache-airflow-providers-standard = {workspace = true}
|
|
131
131
|
|
|
132
132
|
[project.urls]
|
|
133
|
-
"Documentation" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
134
|
-
"Changelog" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
133
|
+
"Documentation" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.6.0"
|
|
134
|
+
"Changelog" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.6.0/changelog.html"
|
|
135
135
|
"Bug Tracker" = "https://github.com/apache/airflow/issues"
|
|
136
136
|
"Source Code" = "https://github.com/apache/airflow"
|
|
137
137
|
"Slack Chat" = "https://s.apache.org/airflow-slack"
|
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "7.
|
|
32
|
+
__version__ = "7.6.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.10.0"
|
|
@@ -37,33 +37,36 @@ from requests import exceptions as requests_exceptions
|
|
|
37
37
|
from airflow.exceptions import AirflowException
|
|
38
38
|
from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHook
|
|
39
39
|
|
|
40
|
-
GET_CLUSTER_ENDPOINT = ("GET", "
|
|
41
|
-
RESTART_CLUSTER_ENDPOINT = ("POST", "
|
|
42
|
-
START_CLUSTER_ENDPOINT = ("POST", "
|
|
43
|
-
TERMINATE_CLUSTER_ENDPOINT = ("POST", "
|
|
44
|
-
|
|
45
|
-
CREATE_ENDPOINT = ("POST", "
|
|
46
|
-
RESET_ENDPOINT = ("POST", "
|
|
47
|
-
UPDATE_ENDPOINT = ("POST", "
|
|
48
|
-
RUN_NOW_ENDPOINT = ("POST", "
|
|
49
|
-
SUBMIT_RUN_ENDPOINT = ("POST", "
|
|
50
|
-
GET_RUN_ENDPOINT = ("GET", "
|
|
51
|
-
CANCEL_RUN_ENDPOINT = ("POST", "
|
|
52
|
-
DELETE_RUN_ENDPOINT = ("POST", "
|
|
53
|
-
REPAIR_RUN_ENDPOINT = ("POST", "
|
|
54
|
-
OUTPUT_RUNS_JOB_ENDPOINT = ("GET", "
|
|
55
|
-
CANCEL_ALL_RUNS_ENDPOINT = ("POST", "
|
|
56
|
-
|
|
57
|
-
INSTALL_LIBS_ENDPOINT = ("POST", "
|
|
58
|
-
UNINSTALL_LIBS_ENDPOINT = ("POST", "
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
40
|
+
GET_CLUSTER_ENDPOINT = ("GET", "2.0/clusters/get")
|
|
41
|
+
RESTART_CLUSTER_ENDPOINT = ("POST", "2.0/clusters/restart")
|
|
42
|
+
START_CLUSTER_ENDPOINT = ("POST", "2.0/clusters/start")
|
|
43
|
+
TERMINATE_CLUSTER_ENDPOINT = ("POST", "2.0/clusters/delete")
|
|
44
|
+
|
|
45
|
+
CREATE_ENDPOINT = ("POST", "2.1/jobs/create")
|
|
46
|
+
RESET_ENDPOINT = ("POST", "2.1/jobs/reset")
|
|
47
|
+
UPDATE_ENDPOINT = ("POST", "2.1/jobs/update")
|
|
48
|
+
RUN_NOW_ENDPOINT = ("POST", "2.1/jobs/run-now")
|
|
49
|
+
SUBMIT_RUN_ENDPOINT = ("POST", "2.1/jobs/runs/submit")
|
|
50
|
+
GET_RUN_ENDPOINT = ("GET", "2.1/jobs/runs/get")
|
|
51
|
+
CANCEL_RUN_ENDPOINT = ("POST", "2.1/jobs/runs/cancel")
|
|
52
|
+
DELETE_RUN_ENDPOINT = ("POST", "2.1/jobs/runs/delete")
|
|
53
|
+
REPAIR_RUN_ENDPOINT = ("POST", "2.1/jobs/runs/repair")
|
|
54
|
+
OUTPUT_RUNS_JOB_ENDPOINT = ("GET", "2.1/jobs/runs/get-output")
|
|
55
|
+
CANCEL_ALL_RUNS_ENDPOINT = ("POST", "2.1/jobs/runs/cancel-all")
|
|
56
|
+
|
|
57
|
+
INSTALL_LIBS_ENDPOINT = ("POST", "2.0/libraries/install")
|
|
58
|
+
UNINSTALL_LIBS_ENDPOINT = ("POST", "2.0/libraries/uninstall")
|
|
59
|
+
UPDATE_REPO_ENDPOINT = ("PATCH", "2.0/repos/")
|
|
60
|
+
DELETE_REPO_ENDPOINT = ("DELETE", "2.0/repos/")
|
|
61
|
+
CREATE_REPO_ENDPOINT = ("POST", "2.0/repos")
|
|
62
|
+
|
|
63
|
+
LIST_JOBS_ENDPOINT = ("GET", "2.1/jobs/list")
|
|
64
|
+
LIST_PIPELINES_ENDPOINT = ("GET", "2.0/pipelines")
|
|
65
|
+
|
|
66
|
+
WORKSPACE_GET_STATUS_ENDPOINT = ("GET", "2.0/workspace/get-status")
|
|
67
|
+
|
|
68
|
+
SPARK_VERSIONS_ENDPOINT = ("GET", "2.0/clusters/spark-versions")
|
|
69
|
+
SQL_STATEMENTS_ENDPOINT = "2.0/sql/statements"
|
|
67
70
|
|
|
68
71
|
|
|
69
72
|
class RunLifeCycleState(Enum):
|
|
@@ -718,7 +721,8 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
718
721
|
:param json: payload
|
|
719
722
|
:return: metadata from update
|
|
720
723
|
"""
|
|
721
|
-
|
|
724
|
+
method, base_path = UPDATE_REPO_ENDPOINT
|
|
725
|
+
repos_endpoint = (method, f"{base_path}/{repo_id}")
|
|
722
726
|
return self._do_api_call(repos_endpoint, json)
|
|
723
727
|
|
|
724
728
|
def delete_repo(self, repo_id: str):
|
|
@@ -728,7 +732,8 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
728
732
|
:param repo_id: ID of Databricks Repos
|
|
729
733
|
:return:
|
|
730
734
|
"""
|
|
731
|
-
|
|
735
|
+
method, base_path = DELETE_REPO_ENDPOINT
|
|
736
|
+
repos_endpoint = (method, f"{base_path}/{repo_id}")
|
|
732
737
|
self._do_api_call(repos_endpoint)
|
|
733
738
|
|
|
734
739
|
def create_repo(self, json: dict[str, Any]) -> dict:
|
|
@@ -738,8 +743,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
738
743
|
:param json: payload
|
|
739
744
|
:return:
|
|
740
745
|
"""
|
|
741
|
-
|
|
742
|
-
return self._do_api_call(repos_endpoint, json)
|
|
746
|
+
return self._do_api_call(CREATE_REPO_ENDPOINT, json)
|
|
743
747
|
|
|
744
748
|
def get_repo_by_path(self, path: str) -> str | None:
|
|
745
749
|
"""
|
|
@@ -50,9 +50,13 @@ from tenacity import (
|
|
|
50
50
|
|
|
51
51
|
from airflow import __version__
|
|
52
52
|
from airflow.exceptions import AirflowException, AirflowOptionalProviderFeatureException
|
|
53
|
-
from airflow.hooks.base import BaseHook
|
|
54
53
|
from airflow.providers_manager import ProvidersManager
|
|
55
54
|
|
|
55
|
+
try:
|
|
56
|
+
from airflow.sdk import BaseHook
|
|
57
|
+
except ImportError:
|
|
58
|
+
from airflow.hooks.base import BaseHook as BaseHook # type: ignore
|
|
59
|
+
|
|
56
60
|
if TYPE_CHECKING:
|
|
57
61
|
from airflow.models import Connection
|
|
58
62
|
|
|
@@ -135,7 +139,7 @@ class BaseDatabricksHook(BaseHook):
|
|
|
135
139
|
|
|
136
140
|
@cached_property
|
|
137
141
|
def databricks_conn(self) -> Connection:
|
|
138
|
-
return self.get_connection(self.databricks_conn_id)
|
|
142
|
+
return self.get_connection(self.databricks_conn_id) # type: ignore[return-value]
|
|
139
143
|
|
|
140
144
|
def get_conn(self) -> Connection:
|
|
141
145
|
return self.databricks_conn
|
|
@@ -353,14 +357,15 @@ class BaseDatabricksHook(BaseHook):
|
|
|
353
357
|
async for attempt in self._a_get_retry_object():
|
|
354
358
|
with attempt:
|
|
355
359
|
if self.databricks_conn.extra_dejson.get("use_azure_managed_identity", False):
|
|
356
|
-
|
|
360
|
+
async with AsyncManagedIdentityCredential() as credential:
|
|
361
|
+
token = await credential.get_token(f"{resource}/.default")
|
|
357
362
|
else:
|
|
358
|
-
|
|
363
|
+
async with AsyncClientSecretCredential(
|
|
359
364
|
client_id=self.databricks_conn.login,
|
|
360
365
|
client_secret=self.databricks_conn.password,
|
|
361
366
|
tenant_id=self.databricks_conn.extra_dejson["azure_tenant_id"],
|
|
362
|
-
)
|
|
363
|
-
|
|
367
|
+
) as credential:
|
|
368
|
+
token = await credential.get_token(f"{resource}/.default")
|
|
364
369
|
jsn = {
|
|
365
370
|
"access_token": token.token,
|
|
366
371
|
"token_type": "Bearer",
|
|
@@ -636,8 +641,9 @@ class BaseDatabricksHook(BaseHook):
|
|
|
636
641
|
"""
|
|
637
642
|
method, endpoint = endpoint_info
|
|
638
643
|
|
|
639
|
-
#
|
|
640
|
-
|
|
644
|
+
# Automatically prepend 'api/' prefix to all endpoint paths
|
|
645
|
+
full_endpoint = f"api/{endpoint}"
|
|
646
|
+
url = self._endpoint_url(full_endpoint)
|
|
641
647
|
|
|
642
648
|
aad_headers = self._get_aad_headers()
|
|
643
649
|
headers = {**self.user_agent_header, **aad_headers}
|
|
@@ -703,7 +709,8 @@ class BaseDatabricksHook(BaseHook):
|
|
|
703
709
|
"""
|
|
704
710
|
method, endpoint = endpoint_info
|
|
705
711
|
|
|
706
|
-
|
|
712
|
+
full_endpoint = f"api/{endpoint}"
|
|
713
|
+
url = self._endpoint_url(full_endpoint)
|
|
707
714
|
|
|
708
715
|
aad_headers = await self._a_get_aad_headers()
|
|
709
716
|
headers = {**self.user_agent_header, **aad_headers}
|
|
@@ -18,14 +18,13 @@ from __future__ import annotations
|
|
|
18
18
|
|
|
19
19
|
import threading
|
|
20
20
|
from collections import namedtuple
|
|
21
|
-
from collections.abc import Iterable, Mapping, Sequence
|
|
21
|
+
from collections.abc import Callable, Iterable, Mapping, Sequence
|
|
22
22
|
from contextlib import closing
|
|
23
23
|
from copy import copy
|
|
24
24
|
from datetime import timedelta
|
|
25
25
|
from typing import (
|
|
26
26
|
TYPE_CHECKING,
|
|
27
27
|
Any,
|
|
28
|
-
Callable,
|
|
29
28
|
TypeVar,
|
|
30
29
|
cast,
|
|
31
30
|
overload,
|
|
@@ -345,10 +344,9 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
345
344
|
|
|
346
345
|
def get_openlineage_database_specific_lineage(self, task_instance) -> OperatorLineage | None:
|
|
347
346
|
"""
|
|
348
|
-
|
|
347
|
+
Emit separate OpenLineage events for each Databricks query, based on executed query IDs.
|
|
349
348
|
|
|
350
|
-
If a single query ID is present,
|
|
351
|
-
If multiple query IDs are present, emits separate OpenLineage events for each query instead.
|
|
349
|
+
If a single query ID is present, also add an `ExternalQueryRunFacet` to the returned lineage metadata.
|
|
352
350
|
|
|
353
351
|
Note that `get_openlineage_database_specific_lineage` is usually called after task's execution,
|
|
354
352
|
so if multiple query IDs are present, both START and COMPLETE event for each query will be emitted
|
|
@@ -369,13 +367,22 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
369
367
|
from airflow.providers.openlineage.sqlparser import SQLParser
|
|
370
368
|
|
|
371
369
|
if not self.query_ids:
|
|
372
|
-
self.log.
|
|
370
|
+
self.log.info("OpenLineage could not find databricks query ids.")
|
|
373
371
|
return None
|
|
374
372
|
|
|
375
373
|
self.log.debug("openlineage: getting connection to get database info")
|
|
376
374
|
connection = self.get_connection(self.get_conn_id())
|
|
377
375
|
namespace = SQLParser.create_namespace(self.get_openlineage_database_info(connection))
|
|
378
376
|
|
|
377
|
+
self.log.info("Separate OpenLineage events will be emitted for each Databricks query_id.")
|
|
378
|
+
emit_openlineage_events_for_databricks_queries(
|
|
379
|
+
task_instance=task_instance,
|
|
380
|
+
hook=self,
|
|
381
|
+
query_ids=self.query_ids,
|
|
382
|
+
query_for_extra_metadata=True,
|
|
383
|
+
query_source_namespace=namespace,
|
|
384
|
+
)
|
|
385
|
+
|
|
379
386
|
if len(self.query_ids) == 1:
|
|
380
387
|
self.log.debug("Attaching ExternalQueryRunFacet with single query_id to OpenLineage event.")
|
|
381
388
|
return OperatorLineage(
|
|
@@ -386,12 +393,4 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
386
393
|
}
|
|
387
394
|
)
|
|
388
395
|
|
|
389
|
-
self.log.info("Multiple query_ids found. Separate OpenLineage event will be emitted for each query.")
|
|
390
|
-
emit_openlineage_events_for_databricks_queries(
|
|
391
|
-
query_ids=self.query_ids,
|
|
392
|
-
query_source_namespace=namespace,
|
|
393
|
-
task_instance=task_instance,
|
|
394
|
-
hook=self,
|
|
395
|
-
)
|
|
396
|
-
|
|
397
396
|
return None
|
|
@@ -29,7 +29,6 @@ from typing import TYPE_CHECKING, Any
|
|
|
29
29
|
|
|
30
30
|
from airflow.configuration import conf
|
|
31
31
|
from airflow.exceptions import AirflowException
|
|
32
|
-
from airflow.models import BaseOperator
|
|
33
32
|
from airflow.providers.databricks.hooks.databricks import (
|
|
34
33
|
DatabricksHook,
|
|
35
34
|
RunLifeCycleState,
|
|
@@ -42,13 +41,14 @@ from airflow.providers.databricks.operators.databricks_workflow import (
|
|
|
42
41
|
from airflow.providers.databricks.plugins.databricks_workflow import (
|
|
43
42
|
WorkflowJobRepairSingleTaskLink,
|
|
44
43
|
WorkflowJobRunLink,
|
|
44
|
+
store_databricks_job_run_link,
|
|
45
45
|
)
|
|
46
46
|
from airflow.providers.databricks.triggers.databricks import (
|
|
47
47
|
DatabricksExecutionTrigger,
|
|
48
48
|
)
|
|
49
49
|
from airflow.providers.databricks.utils.databricks import normalise_json_content, validate_trigger_event
|
|
50
50
|
from airflow.providers.databricks.utils.mixins import DatabricksSQLStatementsMixin
|
|
51
|
-
from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
|
|
51
|
+
from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS, BaseOperator
|
|
52
52
|
|
|
53
53
|
if TYPE_CHECKING:
|
|
54
54
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
|
@@ -1214,10 +1214,16 @@ class DatabricksTaskBaseOperator(BaseOperator, ABC):
|
|
|
1214
1214
|
super().__init__(**kwargs)
|
|
1215
1215
|
|
|
1216
1216
|
if self._databricks_workflow_task_group is not None:
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1217
|
+
# Conditionally set operator_extra_links based on Airflow version. In Airflow 3, only show the job run link.
|
|
1218
|
+
# In Airflow 2, show the job run link and the repair link.
|
|
1219
|
+
# TODO: Once we expand the plugin functionality in Airflow 3.1, this can be re-evaluated on how to handle the repair link.
|
|
1220
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
1221
|
+
self.operator_extra_links = (WorkflowJobRunLink(),)
|
|
1222
|
+
else:
|
|
1223
|
+
self.operator_extra_links = (
|
|
1224
|
+
WorkflowJobRunLink(),
|
|
1225
|
+
WorkflowJobRepairSingleTaskLink(),
|
|
1226
|
+
)
|
|
1221
1227
|
else:
|
|
1222
1228
|
# Databricks does not support repair for non-workflow tasks, hence do not show the repair link.
|
|
1223
1229
|
self.operator_extra_links = (DatabricksJobRunLink(),)
|
|
@@ -1427,6 +1433,15 @@ class DatabricksTaskBaseOperator(BaseOperator, ABC):
|
|
|
1427
1433
|
)
|
|
1428
1434
|
self.databricks_run_id = workflow_run_metadata.run_id
|
|
1429
1435
|
self.databricks_conn_id = workflow_run_metadata.conn_id
|
|
1436
|
+
|
|
1437
|
+
# Store operator links in XCom for Airflow 3 compatibility
|
|
1438
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
1439
|
+
# Store the job run link
|
|
1440
|
+
store_databricks_job_run_link(
|
|
1441
|
+
context=context,
|
|
1442
|
+
metadata=workflow_run_metadata,
|
|
1443
|
+
logger=self.log,
|
|
1444
|
+
)
|
|
1430
1445
|
else:
|
|
1431
1446
|
self._launch_job(context=context)
|
|
1432
1447
|
if self.wait_for_termination:
|
|
@@ -26,8 +26,8 @@ from typing import TYPE_CHECKING
|
|
|
26
26
|
from urllib.parse import urlsplit
|
|
27
27
|
|
|
28
28
|
from airflow.exceptions import AirflowException
|
|
29
|
-
from airflow.models import BaseOperator
|
|
30
29
|
from airflow.providers.databricks.hooks.databricks import DatabricksHook
|
|
30
|
+
from airflow.providers.databricks.version_compat import BaseOperator
|
|
31
31
|
|
|
32
32
|
if TYPE_CHECKING:
|
|
33
33
|
try:
|
|
@@ -28,9 +28,9 @@ from typing import TYPE_CHECKING, Any, ClassVar
|
|
|
28
28
|
from databricks.sql.utils import ParamEscaper
|
|
29
29
|
|
|
30
30
|
from airflow.exceptions import AirflowException
|
|
31
|
-
from airflow.models import BaseOperator
|
|
32
31
|
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
|
|
33
32
|
from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
|
|
33
|
+
from airflow.providers.databricks.version_compat import BaseOperator
|
|
34
34
|
|
|
35
35
|
if TYPE_CHECKING:
|
|
36
36
|
from airflow.utils.context import Context
|