apache-airflow-providers-databricks 7.8.3__tar.gz → 7.9.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/PKG-INFO +20 -12
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/README.rst +7 -4
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/changelog.rst +24 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/index.rst +5 -4
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/provider.yaml +2 -1
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/pyproject.toml +18 -10
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/__init__.py +1 -1
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/hooks/databricks.py +17 -17
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/operators/databricks.py +1 -1
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/operators/databricks_sql.py +164 -29
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/plugins/databricks_workflow.py +3 -3
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/hooks/test_databricks.py +11 -11
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/operators/test_databricks_sql.py +139 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/LICENSE +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/NOTICE +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/.latest-doc-only-change.txt +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/commits.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/conf.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/connections/databricks.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/img/databricks_workflow_task_group_airflow_graph_view.png +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/img/workflow_plugin_launch_task.png +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/img/workflow_plugin_single_task.png +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/img/workflow_run_databricks_graph_view.png +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/installing-providers-from-sources.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/integration-logos/Databricks.png +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/copy_into.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/index.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/jobs_create.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/notebook.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/repos_create.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/repos_delete.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/repos_update.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/run_now.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/sql.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/sql_statements.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/submit_run.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/task.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/workflow.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/plugins/index.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/plugins/workflow.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/docs/security.rst +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/exceptions.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/get_provider_info.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/hooks/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/hooks/databricks_base.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/hooks/databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/operators/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/operators/databricks_repos.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/operators/databricks_workflow.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/plugins/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/sensors/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/sensors/databricks.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/sensors/databricks_partition.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/sensors/databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/triggers/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/triggers/databricks.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/utils/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/utils/databricks.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/utils/mixins.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/utils/openlineage.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/version_compat.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/conftest.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/system/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/system/databricks/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/system/databricks/example_databricks.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/system/databricks/example_databricks_repos.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/system/databricks/example_databricks_sensors.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/system/databricks/example_databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/system/databricks/example_databricks_workflow.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/hooks/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/hooks/test_databricks_azure_workload_identity.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/hooks/test_databricks_azure_workload_identity_async.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/hooks/test_databricks_base.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/hooks/test_databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/operators/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/operators/test_databricks.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/operators/test_databricks_copy.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/operators/test_databricks_repos.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/operators/test_databricks_workflow.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/plugins/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/plugins/test_databricks_workflow.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/sensors/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/sensors/test_databricks.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/sensors/test_databricks_partition.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/sensors/test_databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/test_exceptions.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/triggers/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/triggers/test_databricks.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/utils/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/utils/test_databricks.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/utils/test_mixins.py +0 -0
- {apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/utils/test_openlineage.py +0 -0
{apache_airflow_providers_databricks-7.8.3 → apache_airflow_providers_databricks-7.9.0rc1}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version: 7.
|
|
3
|
+
Version: 7.9.0rc1
|
|
4
4
|
Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,databricks,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -22,9 +22,9 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
22
22
|
Classifier: Topic :: System :: Monitoring
|
|
23
23
|
License-File: LICENSE
|
|
24
24
|
License-File: NOTICE
|
|
25
|
-
Requires-Dist: apache-airflow>=2.11.
|
|
26
|
-
Requires-Dist: apache-airflow-providers-common-compat>=1.
|
|
27
|
-
Requires-Dist: apache-airflow-providers-common-sql>=1.27.
|
|
25
|
+
Requires-Dist: apache-airflow>=2.11.0rc1
|
|
26
|
+
Requires-Dist: apache-airflow-providers-common-compat>=1.13.0rc1
|
|
27
|
+
Requires-Dist: apache-airflow-providers-common-sql>=1.27.0rc1
|
|
28
28
|
Requires-Dist: requests>=2.32.0,<3
|
|
29
29
|
Requires-Dist: databricks-sql-connector>=4.0.0
|
|
30
30
|
Requires-Dist: aiohttp>=3.9.2, <4
|
|
@@ -33,21 +33,26 @@ Requires-Dist: pandas>=2.1.2; python_version <"3.13"
|
|
|
33
33
|
Requires-Dist: pandas>=2.2.3; python_version >="3.13"
|
|
34
34
|
Requires-Dist: pyarrow>=16.1.0; python_version < '3.13'
|
|
35
35
|
Requires-Dist: pyarrow>=18.0.0; python_version >= '3.13'
|
|
36
|
+
Requires-Dist: fastavro>=1.9.0 ; extra == "avro"
|
|
37
|
+
Requires-Dist: fastavro>=1.10.0 ; extra == "avro" and (python_version>="3.12")
|
|
36
38
|
Requires-Dist: azure-identity>=1.3.1 ; extra == "azure-identity"
|
|
37
|
-
Requires-Dist: apache-airflow-providers-fab>=2.2.
|
|
38
|
-
Requires-Dist: apache-airflow-providers-
|
|
39
|
+
Requires-Dist: apache-airflow-providers-fab>=2.2.0rc1 ; extra == "fab" and ( python_version < '3.13')
|
|
40
|
+
Requires-Dist: apache-airflow-providers-google>=10.24.0rc1 ; extra == "google"
|
|
41
|
+
Requires-Dist: apache-airflow-providers-openlineage>=2.3.0rc1 ; extra == "openlineage"
|
|
39
42
|
Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
|
|
40
43
|
Requires-Dist: databricks-sqlalchemy>=1.0.2 ; extra == "sqlalchemy"
|
|
41
44
|
Requires-Dist: apache-airflow-providers-standard ; extra == "standard"
|
|
42
45
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
43
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
44
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
46
|
+
Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.9.0/changelog.html
|
|
47
|
+
Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.9.0
|
|
45
48
|
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
46
49
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
47
50
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
48
51
|
Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
52
|
+
Provides-Extra: avro
|
|
49
53
|
Provides-Extra: azure-identity
|
|
50
54
|
Provides-Extra: fab
|
|
55
|
+
Provides-Extra: google
|
|
51
56
|
Provides-Extra: openlineage
|
|
52
57
|
Provides-Extra: sdk
|
|
53
58
|
Provides-Extra: sqlalchemy
|
|
@@ -78,7 +83,7 @@ Provides-Extra: standard
|
|
|
78
83
|
|
|
79
84
|
Package ``apache-airflow-providers-databricks``
|
|
80
85
|
|
|
81
|
-
Release: ``7.
|
|
86
|
+
Release: ``7.9.0``
|
|
82
87
|
|
|
83
88
|
|
|
84
89
|
`Databricks <https://databricks.com/>`__
|
|
@@ -91,7 +96,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
91
96
|
are in ``airflow.providers.databricks`` python package.
|
|
92
97
|
|
|
93
98
|
You can find package information and changelog for the provider
|
|
94
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
99
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.9.0/>`_.
|
|
95
100
|
|
|
96
101
|
Installation
|
|
97
102
|
------------
|
|
@@ -109,7 +114,7 @@ Requirements
|
|
|
109
114
|
PIP package Version required
|
|
110
115
|
========================================== ======================================
|
|
111
116
|
``apache-airflow`` ``>=2.11.0``
|
|
112
|
-
``apache-airflow-providers-common-compat`` ``>=1.
|
|
117
|
+
``apache-airflow-providers-common-compat`` ``>=1.13.0``
|
|
113
118
|
``apache-airflow-providers-common-sql`` ``>=1.27.0``
|
|
114
119
|
``requests`` ``>=2.32.0,<3``
|
|
115
120
|
``databricks-sql-connector`` ``>=4.0.0``
|
|
@@ -139,6 +144,7 @@ Dependent package
|
|
|
139
144
|
================================================================================================================== =================
|
|
140
145
|
`apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
|
|
141
146
|
`apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
|
|
147
|
+
`apache-airflow-providers-google <https://airflow.apache.org/docs/apache-airflow-providers-google>`_ ``google``
|
|
142
148
|
`apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
|
|
143
149
|
================================================================================================================== =================
|
|
144
150
|
|
|
@@ -154,8 +160,10 @@ Extra Dependencies
|
|
|
154
160
|
``standard`` ``apache-airflow-providers-standard``
|
|
155
161
|
``openlineage`` ``apache-airflow-providers-openlineage>=2.3.0``
|
|
156
162
|
``sqlalchemy`` ``databricks-sqlalchemy>=1.0.2``
|
|
163
|
+
``google`` ``apache-airflow-providers-google>=10.24.0``
|
|
164
|
+
``avro`` ``fastavro>=1.9.0``, ``fastavro>=1.10.0;python_version>="3.12"``
|
|
157
165
|
================== ================================================================
|
|
158
166
|
|
|
159
167
|
The changelog for the provider package can be found in the
|
|
160
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
168
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.9.0/changelog.html>`_.
|
|
161
169
|
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
|
|
24
24
|
Package ``apache-airflow-providers-databricks``
|
|
25
25
|
|
|
26
|
-
Release: ``7.
|
|
26
|
+
Release: ``7.9.0``
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
`Databricks <https://databricks.com/>`__
|
|
@@ -36,7 +36,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
36
36
|
are in ``airflow.providers.databricks`` python package.
|
|
37
37
|
|
|
38
38
|
You can find package information and changelog for the provider
|
|
39
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
39
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.9.0/>`_.
|
|
40
40
|
|
|
41
41
|
Installation
|
|
42
42
|
------------
|
|
@@ -54,7 +54,7 @@ Requirements
|
|
|
54
54
|
PIP package Version required
|
|
55
55
|
========================================== ======================================
|
|
56
56
|
``apache-airflow`` ``>=2.11.0``
|
|
57
|
-
``apache-airflow-providers-common-compat`` ``>=1.
|
|
57
|
+
``apache-airflow-providers-common-compat`` ``>=1.13.0``
|
|
58
58
|
``apache-airflow-providers-common-sql`` ``>=1.27.0``
|
|
59
59
|
``requests`` ``>=2.32.0,<3``
|
|
60
60
|
``databricks-sql-connector`` ``>=4.0.0``
|
|
@@ -84,6 +84,7 @@ Dependent package
|
|
|
84
84
|
================================================================================================================== =================
|
|
85
85
|
`apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
|
|
86
86
|
`apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
|
|
87
|
+
`apache-airflow-providers-google <https://airflow.apache.org/docs/apache-airflow-providers-google>`_ ``google``
|
|
87
88
|
`apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
|
|
88
89
|
================================================================================================================== =================
|
|
89
90
|
|
|
@@ -99,7 +100,9 @@ Extra Dependencies
|
|
|
99
100
|
``standard`` ``apache-airflow-providers-standard``
|
|
100
101
|
``openlineage`` ``apache-airflow-providers-openlineage>=2.3.0``
|
|
101
102
|
``sqlalchemy`` ``databricks-sqlalchemy>=1.0.2``
|
|
103
|
+
``google`` ``apache-airflow-providers-google>=10.24.0``
|
|
104
|
+
``avro`` ``fastavro>=1.9.0``, ``fastavro>=1.10.0;python_version>="3.12"``
|
|
102
105
|
================== ================================================================
|
|
103
106
|
|
|
104
107
|
The changelog for the provider package can be found in the
|
|
105
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
108
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.9.0/changelog.html>`_.
|
|
@@ -26,6 +26,30 @@
|
|
|
26
26
|
Changelog
|
|
27
27
|
---------
|
|
28
28
|
|
|
29
|
+
7.9.0
|
|
30
|
+
.....
|
|
31
|
+
|
|
32
|
+
Features
|
|
33
|
+
~~~~~~~~
|
|
34
|
+
|
|
35
|
+
* ``Add direct GCS export to DatabricksSqlOperator with Parquet/Avro support #55128 (#60543)``
|
|
36
|
+
|
|
37
|
+
Bug Fixes
|
|
38
|
+
~~~~~~~~~
|
|
39
|
+
|
|
40
|
+
* ``Fix missing fastavro after PR #60732 (#60797)``
|
|
41
|
+
* ``Pin fastavro to minimum 1.10.0 for Python 3.13 compatibility (#60732)``
|
|
42
|
+
* ``Updating Databricks API endpoints to appropriate versions (#60647)``
|
|
43
|
+
|
|
44
|
+
Misc
|
|
45
|
+
~~~~
|
|
46
|
+
|
|
47
|
+
* ``Define 'TaskInstanceKey' in task-sdk to support client server separation (#60776)``
|
|
48
|
+
|
|
49
|
+
.. Below changes are excluded from the changelog. Move them to
|
|
50
|
+
appropriate section above if needed. Do not delete the lines(!):
|
|
51
|
+
* ``Revert exclusion of deltalake 1.3.1 as aarch64 binaries are available now (#60611)``
|
|
52
|
+
|
|
29
53
|
7.8.3
|
|
30
54
|
.....
|
|
31
55
|
|
|
@@ -78,7 +78,7 @@ apache-airflow-providers-databricks package
|
|
|
78
78
|
`Databricks <https://databricks.com/>`__
|
|
79
79
|
|
|
80
80
|
|
|
81
|
-
Release: 7.
|
|
81
|
+
Release: 7.9.0
|
|
82
82
|
|
|
83
83
|
Provider package
|
|
84
84
|
----------------
|
|
@@ -102,7 +102,7 @@ The minimum Apache Airflow version supported by this provider distribution is ``
|
|
|
102
102
|
PIP package Version required
|
|
103
103
|
========================================== ======================================
|
|
104
104
|
``apache-airflow`` ``>=2.11.0``
|
|
105
|
-
``apache-airflow-providers-common-compat`` ``>=1.
|
|
105
|
+
``apache-airflow-providers-common-compat`` ``>=1.13.0``
|
|
106
106
|
``apache-airflow-providers-common-sql`` ``>=1.27.0``
|
|
107
107
|
``requests`` ``>=2.32.0,<3``
|
|
108
108
|
``databricks-sql-connector`` ``>=4.0.0``
|
|
@@ -132,6 +132,7 @@ Dependent package
|
|
|
132
132
|
================================================================================================================== =================
|
|
133
133
|
`apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
|
|
134
134
|
`apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
|
|
135
|
+
`apache-airflow-providers-google <https://airflow.apache.org/docs/apache-airflow-providers-google>`_ ``google``
|
|
135
136
|
`apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
|
|
136
137
|
================================================================================================================== =================
|
|
137
138
|
|
|
@@ -141,5 +142,5 @@ Downloading official packages
|
|
|
141
142
|
You can download officially released packages and verify their checksums and signatures from the
|
|
142
143
|
`Official Apache Download site <https://downloads.apache.org/airflow/providers/>`_
|
|
143
144
|
|
|
144
|
-
* `The apache-airflow-providers-databricks 7.
|
|
145
|
-
* `The apache-airflow-providers-databricks 7.
|
|
145
|
+
* `The apache-airflow-providers-databricks 7.9.0 sdist package <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.9.0.tar.gz>`_ (`asc <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.9.0.tar.gz.asc>`__, `sha512 <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.9.0.tar.gz.sha512>`__)
|
|
146
|
+
* `The apache-airflow-providers-databricks 7.9.0 wheel package <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.9.0-py3-none-any.whl>`_ (`asc <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.9.0-py3-none-any.whl.asc>`__, `sha512 <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.9.0-py3-none-any.whl.sha512>`__)
|
|
@@ -22,12 +22,13 @@ description: |
|
|
|
22
22
|
`Databricks <https://databricks.com/>`__
|
|
23
23
|
|
|
24
24
|
state: ready
|
|
25
|
-
source-date-epoch:
|
|
25
|
+
source-date-epoch: 1769460991
|
|
26
26
|
# Note that those versions are maintained by release manager - do not update them manually
|
|
27
27
|
# with the exception of case where other provider in sources has >= new provider version.
|
|
28
28
|
# In such case adding >= NEW_VERSION and bumping to NEW_VERSION in a provider have
|
|
29
29
|
# to be done in the same PR
|
|
30
30
|
versions:
|
|
31
|
+
- 7.9.0
|
|
31
32
|
- 7.8.3
|
|
32
33
|
- 7.8.2
|
|
33
34
|
- 7.8.1
|
|
@@ -25,7 +25,7 @@ build-backend = "flit_core.buildapi"
|
|
|
25
25
|
|
|
26
26
|
[project]
|
|
27
27
|
name = "apache-airflow-providers-databricks"
|
|
28
|
-
version = "7.
|
|
28
|
+
version = "7.9.0rc1"
|
|
29
29
|
description = "Provider package apache-airflow-providers-databricks for Apache Airflow"
|
|
30
30
|
readme = "README.rst"
|
|
31
31
|
license = "Apache-2.0"
|
|
@@ -58,9 +58,9 @@ requires-python = ">=3.10"
|
|
|
58
58
|
# Make sure to run ``prek update-providers-dependencies --all-files``
|
|
59
59
|
# After you modify the dependencies, and rebuild your Breeze CI image with ``breeze ci-image build``
|
|
60
60
|
dependencies = [
|
|
61
|
-
"apache-airflow>=2.11.
|
|
62
|
-
"apache-airflow-providers-common-compat>=1.
|
|
63
|
-
"apache-airflow-providers-common-sql>=1.27.
|
|
61
|
+
"apache-airflow>=2.11.0rc1",
|
|
62
|
+
"apache-airflow-providers-common-compat>=1.13.0rc1",
|
|
63
|
+
"apache-airflow-providers-common-sql>=1.27.0rc1",
|
|
64
64
|
"requests>=2.32.0,<3",
|
|
65
65
|
"databricks-sql-connector>=4.0.0",
|
|
66
66
|
"aiohttp>=3.9.2, <4",
|
|
@@ -82,17 +82,24 @@ dependencies = [
|
|
|
82
82
|
"azure-identity>=1.3.1",
|
|
83
83
|
]
|
|
84
84
|
"fab" = [
|
|
85
|
-
"apache-airflow-providers-fab>=2.2.
|
|
85
|
+
"apache-airflow-providers-fab>=2.2.0rc1; python_version < '3.13'"
|
|
86
86
|
]
|
|
87
87
|
"standard" = [
|
|
88
88
|
"apache-airflow-providers-standard"
|
|
89
89
|
]
|
|
90
90
|
"openlineage" = [
|
|
91
|
-
"apache-airflow-providers-openlineage>=2.3.
|
|
91
|
+
"apache-airflow-providers-openlineage>=2.3.0rc1"
|
|
92
92
|
]
|
|
93
93
|
"sqlalchemy" = [
|
|
94
94
|
"databricks-sqlalchemy>=1.0.2",
|
|
95
95
|
]
|
|
96
|
+
"google" = [
|
|
97
|
+
"apache-airflow-providers-google>=10.24.0rc1"
|
|
98
|
+
]
|
|
99
|
+
"avro" = [
|
|
100
|
+
"fastavro>=1.9.0",
|
|
101
|
+
'fastavro>=1.10.0;python_version>="3.12"' # Need to pin to this version for Python 3.13 compatibility
|
|
102
|
+
]
|
|
96
103
|
|
|
97
104
|
[dependency-groups]
|
|
98
105
|
dev = [
|
|
@@ -101,10 +108,11 @@ dev = [
|
|
|
101
108
|
"apache-airflow-devel-common",
|
|
102
109
|
"apache-airflow-providers-common-compat",
|
|
103
110
|
"apache-airflow-providers-common-sql",
|
|
111
|
+
"apache-airflow-providers-google",
|
|
104
112
|
"apache-airflow-providers-openlineage",
|
|
105
113
|
# Additional devel dependencies (do not remove this line and add extra development dependencies)
|
|
106
|
-
#
|
|
107
|
-
"deltalake>=1.1.3,!=1.3.0
|
|
114
|
+
# Need to exclude 1.3.0 due to missing aarch64 binaries, fixed with 1.3.1++
|
|
115
|
+
"deltalake>=1.1.3,!=1.3.0",
|
|
108
116
|
"apache-airflow-providers-fab>=2.2.0; python_version < '3.13'",
|
|
109
117
|
"apache-airflow-providers-microsoft-azure",
|
|
110
118
|
"apache-airflow-providers-common-sql[pandas,polars]",
|
|
@@ -138,8 +146,8 @@ apache-airflow-providers-common-sql = {workspace = true}
|
|
|
138
146
|
apache-airflow-providers-standard = {workspace = true}
|
|
139
147
|
|
|
140
148
|
[project.urls]
|
|
141
|
-
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
142
|
-
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
149
|
+
"Documentation" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.9.0"
|
|
150
|
+
"Changelog" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.9.0/changelog.html"
|
|
143
151
|
"Bug Tracker" = "https://github.com/apache/airflow/issues"
|
|
144
152
|
"Source Code" = "https://github.com/apache/airflow"
|
|
145
153
|
"Slack Chat" = "https://s.apache.org/airflow-slack"
|
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "7.
|
|
32
|
+
__version__ = "7.9.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.11.0"
|
|
@@ -37,10 +37,10 @@ from requests import exceptions as requests_exceptions
|
|
|
37
37
|
from airflow.providers.common.compat.sdk import AirflowException
|
|
38
38
|
from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHook
|
|
39
39
|
|
|
40
|
-
GET_CLUSTER_ENDPOINT = ("GET", "2.
|
|
41
|
-
RESTART_CLUSTER_ENDPOINT = ("POST", "2.
|
|
42
|
-
START_CLUSTER_ENDPOINT = ("POST", "2.
|
|
43
|
-
TERMINATE_CLUSTER_ENDPOINT = ("POST", "2.
|
|
40
|
+
GET_CLUSTER_ENDPOINT = ("GET", "2.1/clusters/get")
|
|
41
|
+
RESTART_CLUSTER_ENDPOINT = ("POST", "2.1/clusters/restart")
|
|
42
|
+
START_CLUSTER_ENDPOINT = ("POST", "2.1/clusters/start")
|
|
43
|
+
TERMINATE_CLUSTER_ENDPOINT = ("POST", "2.1/clusters/delete")
|
|
44
44
|
|
|
45
45
|
CREATE_ENDPOINT = ("POST", "2.2/jobs/create")
|
|
46
46
|
RESET_ENDPOINT = ("POST", "2.2/jobs/reset")
|
|
@@ -54,20 +54,20 @@ REPAIR_RUN_ENDPOINT = ("POST", "2.2/jobs/runs/repair")
|
|
|
54
54
|
OUTPUT_RUNS_JOB_ENDPOINT = ("GET", "2.2/jobs/runs/get-output")
|
|
55
55
|
CANCEL_ALL_RUNS_ENDPOINT = ("POST", "2.2/jobs/runs/cancel-all")
|
|
56
56
|
|
|
57
|
-
INSTALL_LIBS_ENDPOINT = ("POST", "2.
|
|
58
|
-
UNINSTALL_LIBS_ENDPOINT = ("POST", "2.
|
|
59
|
-
UPDATE_REPO_ENDPOINT = ("PATCH", "2.
|
|
60
|
-
DELETE_REPO_ENDPOINT = ("DELETE", "2.
|
|
61
|
-
CREATE_REPO_ENDPOINT = ("POST", "2.
|
|
57
|
+
INSTALL_LIBS_ENDPOINT = ("POST", "2.0/libraries/install")
|
|
58
|
+
UNINSTALL_LIBS_ENDPOINT = ("POST", "2.0/libraries/uninstall")
|
|
59
|
+
UPDATE_REPO_ENDPOINT = ("PATCH", "2.0/repos/")
|
|
60
|
+
DELETE_REPO_ENDPOINT = ("DELETE", "2.0/repos/")
|
|
61
|
+
CREATE_REPO_ENDPOINT = ("POST", "2.0/repos")
|
|
62
62
|
|
|
63
63
|
LIST_JOBS_ENDPOINT = ("GET", "2.2/jobs/list")
|
|
64
|
-
LIST_PIPELINES_ENDPOINT = ("GET", "2.
|
|
65
|
-
LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "2.
|
|
64
|
+
LIST_PIPELINES_ENDPOINT = ("GET", "2.0/pipelines")
|
|
65
|
+
LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "2.0/sql/warehouses")
|
|
66
66
|
|
|
67
|
-
WORKSPACE_GET_STATUS_ENDPOINT = ("GET", "2.
|
|
67
|
+
WORKSPACE_GET_STATUS_ENDPOINT = ("GET", "2.0/workspace/get-status")
|
|
68
68
|
|
|
69
|
-
SPARK_VERSIONS_ENDPOINT = ("GET", "2.
|
|
70
|
-
SQL_STATEMENTS_ENDPOINT = "2.
|
|
69
|
+
SPARK_VERSIONS_ENDPOINT = ("GET", "2.1/clusters/spark-versions")
|
|
70
|
+
SQL_STATEMENTS_ENDPOINT = "2.0/sql/statements"
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
class RunLifeCycleState(Enum):
|
|
@@ -717,7 +717,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
717
717
|
"""
|
|
718
718
|
Install libraries on the cluster.
|
|
719
719
|
|
|
720
|
-
Utility function to call the ``2.
|
|
720
|
+
Utility function to call the ``2.0/libraries/install`` endpoint.
|
|
721
721
|
|
|
722
722
|
:param json: json dictionary containing cluster_id and an array of library
|
|
723
723
|
"""
|
|
@@ -727,7 +727,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
727
727
|
"""
|
|
728
728
|
Uninstall libraries on the cluster.
|
|
729
729
|
|
|
730
|
-
Utility function to call the ``2.
|
|
730
|
+
Utility function to call the ``2.0/libraries/uninstall`` endpoint.
|
|
731
731
|
|
|
732
732
|
:param json: json dictionary containing cluster_id and an array of library
|
|
733
733
|
"""
|
|
@@ -790,7 +790,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
790
790
|
:param json: payload
|
|
791
791
|
:return: json containing permission specification
|
|
792
792
|
"""
|
|
793
|
-
return self._do_api_call(("PATCH", f"2.
|
|
793
|
+
return self._do_api_call(("PATCH", f"2.0/permissions/jobs/{job_id}"), json)
|
|
794
794
|
|
|
795
795
|
def post_sql_statement(self, json: dict[str, Any]) -> str:
|
|
796
796
|
"""
|
|
@@ -53,7 +53,7 @@ from airflow.providers.databricks.utils.mixins import DatabricksSQLStatementsMix
|
|
|
53
53
|
from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
|
|
54
54
|
|
|
55
55
|
if TYPE_CHECKING:
|
|
56
|
-
from airflow.
|
|
56
|
+
from airflow.providers.common.compat.sdk import TaskInstanceKey
|
|
57
57
|
from airflow.providers.databricks.operators.databricks_workflow import (
|
|
58
58
|
DatabricksWorkflowTaskGroup,
|
|
59
59
|
)
|
|
@@ -21,13 +21,20 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
import csv
|
|
23
23
|
import json
|
|
24
|
+
import os
|
|
24
25
|
from collections.abc import Sequence
|
|
25
26
|
from functools import cached_property
|
|
27
|
+
from tempfile import NamedTemporaryFile
|
|
26
28
|
from typing import TYPE_CHECKING, Any, ClassVar
|
|
29
|
+
from urllib.parse import urlparse
|
|
27
30
|
|
|
28
31
|
from databricks.sql.utils import ParamEscaper
|
|
29
32
|
|
|
30
|
-
from airflow.providers.common.compat.sdk import
|
|
33
|
+
from airflow.providers.common.compat.sdk import (
|
|
34
|
+
AirflowException,
|
|
35
|
+
AirflowOptionalProviderFeatureException,
|
|
36
|
+
BaseOperator,
|
|
37
|
+
)
|
|
31
38
|
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
|
|
32
39
|
from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
|
|
33
40
|
|
|
@@ -62,13 +69,27 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
|
|
|
62
69
|
:param catalog: An optional initial catalog to use. Requires DBR version 9.0+ (templated)
|
|
63
70
|
:param schema: An optional initial schema to use. Requires DBR version 9.0+ (templated)
|
|
64
71
|
:param output_path: optional string specifying the file to which write selected data. (templated)
|
|
65
|
-
|
|
66
|
-
|
|
72
|
+
Supports local file paths and GCS URIs (e.g., ``gs://bucket/path/file.parquet``).
|
|
73
|
+
When using GCS URIs, requires the ``apache-airflow-providers-google`` package.
|
|
74
|
+
:param output_format: format of output data if ``output_path`` is specified.
|
|
75
|
+
Possible values are ``csv``, ``json``, ``jsonl``, ``parquet``, ``avro``. Default is ``csv``.
|
|
67
76
|
:param csv_params: parameters that will be passed to the ``csv.DictWriter`` class used to write CSV data.
|
|
77
|
+
:param gcp_conn_id: The connection ID to use for connecting to Google Cloud when using GCS output path.
|
|
78
|
+
Default is ``google_cloud_default``.
|
|
79
|
+
:param gcs_impersonation_chain: Optional service account to impersonate using short-term
|
|
80
|
+
credentials for GCS upload, or chained list of accounts required to get the access_token
|
|
81
|
+
of the last account in the list, which will be impersonated in the request. (templated)
|
|
68
82
|
"""
|
|
69
83
|
|
|
70
84
|
template_fields: Sequence[str] = tuple(
|
|
71
|
-
{
|
|
85
|
+
{
|
|
86
|
+
"_output_path",
|
|
87
|
+
"schema",
|
|
88
|
+
"catalog",
|
|
89
|
+
"http_headers",
|
|
90
|
+
"databricks_conn_id",
|
|
91
|
+
"_gcs_impersonation_chain",
|
|
92
|
+
}
|
|
72
93
|
| set(SQLExecuteQueryOperator.template_fields)
|
|
73
94
|
)
|
|
74
95
|
|
|
@@ -90,6 +111,8 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
|
|
|
90
111
|
output_format: str = "csv",
|
|
91
112
|
csv_params: dict[str, Any] | None = None,
|
|
92
113
|
client_parameters: dict[str, Any] | None = None,
|
|
114
|
+
gcp_conn_id: str = "google_cloud_default",
|
|
115
|
+
gcs_impersonation_chain: str | Sequence[str] | None = None,
|
|
93
116
|
**kwargs,
|
|
94
117
|
) -> None:
|
|
95
118
|
super().__init__(conn_id=databricks_conn_id, **kwargs)
|
|
@@ -105,6 +128,8 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
|
|
|
105
128
|
self.http_headers = http_headers
|
|
106
129
|
self.catalog = catalog
|
|
107
130
|
self.schema = schema
|
|
131
|
+
self._gcp_conn_id = gcp_conn_id
|
|
132
|
+
self._gcs_impersonation_chain = gcs_impersonation_chain
|
|
108
133
|
|
|
109
134
|
@cached_property
|
|
110
135
|
def _hook(self) -> DatabricksSqlHook:
|
|
@@ -127,41 +152,151 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
|
|
|
127
152
|
def _should_run_output_processing(self) -> bool:
|
|
128
153
|
return self.do_xcom_push or bool(self._output_path)
|
|
129
154
|
|
|
155
|
+
@property
|
|
156
|
+
def _is_gcs_output(self) -> bool:
|
|
157
|
+
"""Check if the output path is a GCS URI."""
|
|
158
|
+
return self._output_path.startswith("gs://") if self._output_path else False
|
|
159
|
+
|
|
160
|
+
def _parse_gcs_path(self, path: str) -> tuple[str, str]:
|
|
161
|
+
"""Parse a GCS URI into bucket and object name."""
|
|
162
|
+
parsed = urlparse(path)
|
|
163
|
+
bucket = parsed.netloc
|
|
164
|
+
object_name = parsed.path.lstrip("/")
|
|
165
|
+
return bucket, object_name
|
|
166
|
+
|
|
167
|
+
def _upload_to_gcs(self, local_path: str, gcs_path: str) -> None:
|
|
168
|
+
"""Upload a local file to GCS."""
|
|
169
|
+
try:
|
|
170
|
+
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
|
171
|
+
except ImportError:
|
|
172
|
+
raise AirflowOptionalProviderFeatureException(
|
|
173
|
+
"The 'apache-airflow-providers-google' package is required for GCS output. "
|
|
174
|
+
"Install it with: pip install apache-airflow-providers-google"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
bucket, object_name = self._parse_gcs_path(gcs_path)
|
|
178
|
+
hook = GCSHook(
|
|
179
|
+
gcp_conn_id=self._gcp_conn_id,
|
|
180
|
+
impersonation_chain=self._gcs_impersonation_chain,
|
|
181
|
+
)
|
|
182
|
+
hook.upload(
|
|
183
|
+
bucket_name=bucket,
|
|
184
|
+
object_name=object_name,
|
|
185
|
+
filename=local_path,
|
|
186
|
+
)
|
|
187
|
+
self.log.info("Uploaded output to %s", gcs_path)
|
|
188
|
+
|
|
189
|
+
def _write_parquet(self, file_path: str, field_names: list[str], rows: list[Any]) -> None:
|
|
190
|
+
"""Write data to a Parquet file."""
|
|
191
|
+
import pyarrow as pa
|
|
192
|
+
import pyarrow.parquet as pq
|
|
193
|
+
|
|
194
|
+
data: dict[str, list] = {name: [] for name in field_names}
|
|
195
|
+
for row in rows:
|
|
196
|
+
row_dict = row._asdict()
|
|
197
|
+
for name in field_names:
|
|
198
|
+
data[name].append(row_dict[name])
|
|
199
|
+
|
|
200
|
+
table = pa.Table.from_pydict(data)
|
|
201
|
+
pq.write_table(table, file_path)
|
|
202
|
+
|
|
203
|
+
def _write_avro(self, file_path: str, field_names: list[str], rows: list[Any]) -> None:
|
|
204
|
+
"""Write data to an Avro file using fastavro."""
|
|
205
|
+
try:
|
|
206
|
+
from fastavro import writer
|
|
207
|
+
except ImportError:
|
|
208
|
+
raise AirflowOptionalProviderFeatureException(
|
|
209
|
+
"The 'fastavro' package is required for Avro output. Install it with: pip install fastavro"
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
data: dict[str, list] = {name: [] for name in field_names}
|
|
213
|
+
for row in rows:
|
|
214
|
+
row_dict = row._asdict()
|
|
215
|
+
for name in field_names:
|
|
216
|
+
data[name].append(row_dict[name])
|
|
217
|
+
|
|
218
|
+
schema_fields = []
|
|
219
|
+
for name in field_names:
|
|
220
|
+
sample_val = next(
|
|
221
|
+
(data[name][i] for i in range(len(data[name])) if data[name][i] is not None), None
|
|
222
|
+
)
|
|
223
|
+
if sample_val is None:
|
|
224
|
+
avro_type = ["null", "string"]
|
|
225
|
+
elif isinstance(sample_val, bool):
|
|
226
|
+
avro_type = ["null", "boolean"]
|
|
227
|
+
elif isinstance(sample_val, int):
|
|
228
|
+
avro_type = ["null", "long"]
|
|
229
|
+
elif isinstance(sample_val, float):
|
|
230
|
+
avro_type = ["null", "double"]
|
|
231
|
+
else:
|
|
232
|
+
avro_type = ["null", "string"]
|
|
233
|
+
schema_fields.append({"name": name, "type": avro_type})
|
|
234
|
+
|
|
235
|
+
avro_schema = {
|
|
236
|
+
"type": "record",
|
|
237
|
+
"name": "QueryResult",
|
|
238
|
+
"fields": schema_fields,
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
records = [row._asdict() for row in rows]
|
|
242
|
+
with open(file_path, "wb") as f:
|
|
243
|
+
writer(f, avro_schema, records)
|
|
244
|
+
|
|
130
245
|
def _process_output(self, results: list[Any], descriptions: list[Sequence[Sequence] | None]) -> list[Any]:
|
|
131
246
|
if not self._output_path:
|
|
132
247
|
return list(zip(descriptions, results))
|
|
133
248
|
if not self._output_format:
|
|
134
249
|
raise AirflowException("Output format should be specified!")
|
|
135
|
-
|
|
250
|
+
|
|
136
251
|
last_description = descriptions[-1]
|
|
137
252
|
last_results = results[-1]
|
|
138
253
|
if last_description is None:
|
|
139
|
-
raise AirflowException("There is missing description present for the output file.
|
|
254
|
+
raise AirflowException("There is missing description present for the output file.")
|
|
140
255
|
field_names = [field[0] for field in last_description]
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
write_header = csv_params.get("header", True)
|
|
148
|
-
if "header" in csv_params:
|
|
149
|
-
del csv_params["header"]
|
|
150
|
-
writer = csv.DictWriter(file, fieldnames=field_names, **csv_params)
|
|
151
|
-
if write_header:
|
|
152
|
-
writer.writeheader()
|
|
153
|
-
for row in last_results:
|
|
154
|
-
writer.writerow(row._asdict())
|
|
155
|
-
elif self._output_format.lower() == "json":
|
|
156
|
-
with open(self._output_path, "w") as file:
|
|
157
|
-
file.write(json.dumps([row._asdict() for row in last_results]))
|
|
158
|
-
elif self._output_format.lower() == "jsonl":
|
|
159
|
-
with open(self._output_path, "w") as file:
|
|
160
|
-
for row in last_results:
|
|
161
|
-
file.write(json.dumps(row._asdict()))
|
|
162
|
-
file.write("\n")
|
|
256
|
+
|
|
257
|
+
if self._is_gcs_output:
|
|
258
|
+
suffix = f".{self._output_format.lower()}"
|
|
259
|
+
tmp_file = NamedTemporaryFile(mode="w", suffix=suffix, delete=False, newline="")
|
|
260
|
+
local_path = tmp_file.name
|
|
261
|
+
tmp_file.close()
|
|
163
262
|
else:
|
|
164
|
-
|
|
263
|
+
local_path = self._output_path
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
output_format = self._output_format.lower()
|
|
267
|
+
if output_format == "csv":
|
|
268
|
+
with open(local_path, "w", newline="") as file:
|
|
269
|
+
if self._csv_params:
|
|
270
|
+
csv_params = self._csv_params.copy()
|
|
271
|
+
else:
|
|
272
|
+
csv_params = {}
|
|
273
|
+
write_header = csv_params.pop("header", True)
|
|
274
|
+
writer = csv.DictWriter(file, fieldnames=field_names, **csv_params)
|
|
275
|
+
if write_header:
|
|
276
|
+
writer.writeheader()
|
|
277
|
+
for row in last_results:
|
|
278
|
+
writer.writerow(row._asdict())
|
|
279
|
+
elif output_format == "json":
|
|
280
|
+
with open(local_path, "w") as file:
|
|
281
|
+
file.write(json.dumps([row._asdict() for row in last_results]))
|
|
282
|
+
elif output_format == "jsonl":
|
|
283
|
+
with open(local_path, "w") as file:
|
|
284
|
+
for row in last_results:
|
|
285
|
+
file.write(json.dumps(row._asdict()))
|
|
286
|
+
file.write("\n")
|
|
287
|
+
elif output_format == "parquet":
|
|
288
|
+
self._write_parquet(local_path, field_names, last_results)
|
|
289
|
+
elif output_format == "avro":
|
|
290
|
+
self._write_avro(local_path, field_names, last_results)
|
|
291
|
+
else:
|
|
292
|
+
raise ValueError(f"Unsupported output format: '{self._output_format}'")
|
|
293
|
+
|
|
294
|
+
if self._is_gcs_output:
|
|
295
|
+
self._upload_to_gcs(local_path, self._output_path)
|
|
296
|
+
finally:
|
|
297
|
+
if self._is_gcs_output and os.path.exists(local_path):
|
|
298
|
+
os.unlink(local_path)
|
|
299
|
+
|
|
165
300
|
return list(zip(descriptions, results))
|
|
166
301
|
|
|
167
302
|
|