apache-airflow-providers-databricks 7.8.3rc1__tar.gz → 7.9.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/PKG-INFO +16 -8
  2. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/README.rst +7 -4
  3. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/changelog.rst +24 -0
  4. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/index.rst +5 -4
  5. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/provider.yaml +2 -1
  6. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/pyproject.toml +14 -6
  7. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/__init__.py +1 -1
  8. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/hooks/databricks.py +17 -17
  9. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/operators/databricks.py +1 -1
  10. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/operators/databricks_sql.py +164 -29
  11. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/plugins/databricks_workflow.py +3 -3
  12. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/hooks/test_databricks.py +11 -11
  13. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/operators/test_databricks_sql.py +139 -0
  14. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/LICENSE +0 -0
  15. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/NOTICE +0 -0
  16. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/.latest-doc-only-change.txt +0 -0
  17. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/commits.rst +0 -0
  18. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/conf.py +0 -0
  19. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/connections/databricks.rst +0 -0
  20. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/img/databricks_workflow_task_group_airflow_graph_view.png +0 -0
  21. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/img/workflow_plugin_launch_task.png +0 -0
  22. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/img/workflow_plugin_single_task.png +0 -0
  23. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/img/workflow_run_databricks_graph_view.png +0 -0
  24. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/installing-providers-from-sources.rst +0 -0
  25. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/integration-logos/Databricks.png +0 -0
  26. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/copy_into.rst +0 -0
  27. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/index.rst +0 -0
  28. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/jobs_create.rst +0 -0
  29. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/notebook.rst +0 -0
  30. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/repos_create.rst +0 -0
  31. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/repos_delete.rst +0 -0
  32. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/repos_update.rst +0 -0
  33. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/run_now.rst +0 -0
  34. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/sql.rst +0 -0
  35. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/sql_statements.rst +0 -0
  36. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/submit_run.rst +0 -0
  37. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/task.rst +0 -0
  38. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/operators/workflow.rst +0 -0
  39. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/plugins/index.rst +0 -0
  40. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/plugins/workflow.rst +0 -0
  41. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/docs/security.rst +0 -0
  42. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/__init__.py +0 -0
  43. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/__init__.py +0 -0
  44. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/exceptions.py +0 -0
  45. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/get_provider_info.py +0 -0
  46. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/hooks/__init__.py +0 -0
  47. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/hooks/databricks_base.py +0 -0
  48. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/hooks/databricks_sql.py +0 -0
  49. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/operators/__init__.py +0 -0
  50. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/operators/databricks_repos.py +0 -0
  51. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/operators/databricks_workflow.py +0 -0
  52. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/plugins/__init__.py +0 -0
  53. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/sensors/__init__.py +0 -0
  54. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/sensors/databricks.py +0 -0
  55. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/sensors/databricks_partition.py +0 -0
  56. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/sensors/databricks_sql.py +0 -0
  57. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/triggers/__init__.py +0 -0
  58. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/triggers/databricks.py +0 -0
  59. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/utils/__init__.py +0 -0
  60. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/utils/databricks.py +0 -0
  61. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/utils/mixins.py +0 -0
  62. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/utils/openlineage.py +0 -0
  63. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/src/airflow/providers/databricks/version_compat.py +0 -0
  64. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/conftest.py +0 -0
  65. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/system/__init__.py +0 -0
  66. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/system/databricks/__init__.py +0 -0
  67. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/system/databricks/example_databricks.py +0 -0
  68. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/system/databricks/example_databricks_repos.py +0 -0
  69. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/system/databricks/example_databricks_sensors.py +0 -0
  70. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/system/databricks/example_databricks_sql.py +0 -0
  71. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/system/databricks/example_databricks_workflow.py +0 -0
  72. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/__init__.py +0 -0
  73. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/__init__.py +0 -0
  74. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/hooks/__init__.py +0 -0
  75. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/hooks/test_databricks_azure_workload_identity.py +0 -0
  76. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/hooks/test_databricks_azure_workload_identity_async.py +0 -0
  77. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/hooks/test_databricks_base.py +0 -0
  78. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/hooks/test_databricks_sql.py +0 -0
  79. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/operators/__init__.py +0 -0
  80. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/operators/test_databricks.py +0 -0
  81. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/operators/test_databricks_copy.py +0 -0
  82. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/operators/test_databricks_repos.py +0 -0
  83. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/operators/test_databricks_workflow.py +0 -0
  84. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/plugins/__init__.py +0 -0
  85. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/plugins/test_databricks_workflow.py +0 -0
  86. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/sensors/__init__.py +0 -0
  87. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/sensors/test_databricks.py +0 -0
  88. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/sensors/test_databricks_partition.py +0 -0
  89. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/sensors/test_databricks_sql.py +0 -0
  90. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/test_exceptions.py +0 -0
  91. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/triggers/__init__.py +0 -0
  92. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/triggers/test_databricks.py +0 -0
  93. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/utils/__init__.py +0 -0
  94. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/utils/test_databricks.py +0 -0
  95. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/utils/test_mixins.py +0 -0
  96. {apache_airflow_providers_databricks-7.8.3rc1 → apache_airflow_providers_databricks-7.9.0rc1}/tests/unit/databricks/utils/test_openlineage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apache-airflow-providers-databricks
3
- Version: 7.8.3rc1
3
+ Version: 7.9.0rc1
4
4
  Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
5
5
  Keywords: airflow-provider,databricks,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -23,7 +23,7 @@ Classifier: Topic :: System :: Monitoring
23
23
  License-File: LICENSE
24
24
  License-File: NOTICE
25
25
  Requires-Dist: apache-airflow>=2.11.0rc1
26
- Requires-Dist: apache-airflow-providers-common-compat>=1.12.0rc1
26
+ Requires-Dist: apache-airflow-providers-common-compat>=1.13.0rc1
27
27
  Requires-Dist: apache-airflow-providers-common-sql>=1.27.0rc1
28
28
  Requires-Dist: requests>=2.32.0,<3
29
29
  Requires-Dist: databricks-sql-connector>=4.0.0
@@ -33,21 +33,26 @@ Requires-Dist: pandas>=2.1.2; python_version <"3.13"
33
33
  Requires-Dist: pandas>=2.2.3; python_version >="3.13"
34
34
  Requires-Dist: pyarrow>=16.1.0; python_version < '3.13'
35
35
  Requires-Dist: pyarrow>=18.0.0; python_version >= '3.13'
36
+ Requires-Dist: fastavro>=1.9.0 ; extra == "avro"
37
+ Requires-Dist: fastavro>=1.10.0 ; extra == "avro" and (python_version>="3.12")
36
38
  Requires-Dist: azure-identity>=1.3.1 ; extra == "azure-identity"
37
39
  Requires-Dist: apache-airflow-providers-fab>=2.2.0rc1 ; extra == "fab" and ( python_version < '3.13')
40
+ Requires-Dist: apache-airflow-providers-google>=10.24.0rc1 ; extra == "google"
38
41
  Requires-Dist: apache-airflow-providers-openlineage>=2.3.0rc1 ; extra == "openlineage"
39
42
  Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
40
43
  Requires-Dist: databricks-sqlalchemy>=1.0.2 ; extra == "sqlalchemy"
41
44
  Requires-Dist: apache-airflow-providers-standard ; extra == "standard"
42
45
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
43
- Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.8.3/changelog.html
44
- Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.8.3
46
+ Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.9.0/changelog.html
47
+ Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.9.0
45
48
  Project-URL: Mastodon, https://fosstodon.org/@airflow
46
49
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
47
50
  Project-URL: Source Code, https://github.com/apache/airflow
48
51
  Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
52
+ Provides-Extra: avro
49
53
  Provides-Extra: azure-identity
50
54
  Provides-Extra: fab
55
+ Provides-Extra: google
51
56
  Provides-Extra: openlineage
52
57
  Provides-Extra: sdk
53
58
  Provides-Extra: sqlalchemy
@@ -78,7 +83,7 @@ Provides-Extra: standard
78
83
 
79
84
  Package ``apache-airflow-providers-databricks``
80
85
 
81
- Release: ``7.8.3``
86
+ Release: ``7.9.0``
82
87
 
83
88
 
84
89
  `Databricks <https://databricks.com/>`__
@@ -91,7 +96,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
91
96
  are in ``airflow.providers.databricks`` python package.
92
97
 
93
98
  You can find package information and changelog for the provider
94
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.8.3/>`_.
99
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.9.0/>`_.
95
100
 
96
101
  Installation
97
102
  ------------
@@ -109,7 +114,7 @@ Requirements
109
114
  PIP package Version required
110
115
  ========================================== ======================================
111
116
  ``apache-airflow`` ``>=2.11.0``
112
- ``apache-airflow-providers-common-compat`` ``>=1.10.1``
117
+ ``apache-airflow-providers-common-compat`` ``>=1.13.0``
113
118
  ``apache-airflow-providers-common-sql`` ``>=1.27.0``
114
119
  ``requests`` ``>=2.32.0,<3``
115
120
  ``databricks-sql-connector`` ``>=4.0.0``
@@ -139,6 +144,7 @@ Dependent package
139
144
  ================================================================================================================== =================
140
145
  `apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
141
146
  `apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
147
+ `apache-airflow-providers-google <https://airflow.apache.org/docs/apache-airflow-providers-google>`_ ``google``
142
148
  `apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
143
149
  ================================================================================================================== =================
144
150
 
@@ -154,8 +160,10 @@ Extra Dependencies
154
160
  ``standard`` ``apache-airflow-providers-standard``
155
161
  ``openlineage`` ``apache-airflow-providers-openlineage>=2.3.0``
156
162
  ``sqlalchemy`` ``databricks-sqlalchemy>=1.0.2``
163
+ ``google`` ``apache-airflow-providers-google>=10.24.0``
164
+ ``avro`` ``fastavro>=1.9.0``, ``fastavro>=1.10.0;python_version>="3.12"``
157
165
  ================== ================================================================
158
166
 
159
167
  The changelog for the provider package can be found in the
160
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.8.3/changelog.html>`_.
168
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.9.0/changelog.html>`_.
161
169
 
@@ -23,7 +23,7 @@
23
23
 
24
24
  Package ``apache-airflow-providers-databricks``
25
25
 
26
- Release: ``7.8.3``
26
+ Release: ``7.9.0``
27
27
 
28
28
 
29
29
  `Databricks <https://databricks.com/>`__
@@ -36,7 +36,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
36
36
  are in ``airflow.providers.databricks`` python package.
37
37
 
38
38
  You can find package information and changelog for the provider
39
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.8.3/>`_.
39
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.9.0/>`_.
40
40
 
41
41
  Installation
42
42
  ------------
@@ -54,7 +54,7 @@ Requirements
54
54
  PIP package Version required
55
55
  ========================================== ======================================
56
56
  ``apache-airflow`` ``>=2.11.0``
57
- ``apache-airflow-providers-common-compat`` ``>=1.10.1``
57
+ ``apache-airflow-providers-common-compat`` ``>=1.13.0``
58
58
  ``apache-airflow-providers-common-sql`` ``>=1.27.0``
59
59
  ``requests`` ``>=2.32.0,<3``
60
60
  ``databricks-sql-connector`` ``>=4.0.0``
@@ -84,6 +84,7 @@ Dependent package
84
84
  ================================================================================================================== =================
85
85
  `apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
86
86
  `apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
87
+ `apache-airflow-providers-google <https://airflow.apache.org/docs/apache-airflow-providers-google>`_ ``google``
87
88
  `apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
88
89
  ================================================================================================================== =================
89
90
 
@@ -99,7 +100,9 @@ Extra Dependencies
99
100
  ``standard`` ``apache-airflow-providers-standard``
100
101
  ``openlineage`` ``apache-airflow-providers-openlineage>=2.3.0``
101
102
  ``sqlalchemy`` ``databricks-sqlalchemy>=1.0.2``
103
+ ``google`` ``apache-airflow-providers-google>=10.24.0``
104
+ ``avro`` ``fastavro>=1.9.0``, ``fastavro>=1.10.0;python_version>="3.12"``
102
105
  ================== ================================================================
103
106
 
104
107
  The changelog for the provider package can be found in the
105
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.8.3/changelog.html>`_.
108
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.9.0/changelog.html>`_.
@@ -26,6 +26,30 @@
26
26
  Changelog
27
27
  ---------
28
28
 
29
+ 7.9.0
30
+ .....
31
+
32
+ Features
33
+ ~~~~~~~~
34
+
35
+ * ``Add direct GCS export to DatabricksSqlOperator with Parquet/Avro support #55128 (#60543)``
36
+
37
+ Bug Fixes
38
+ ~~~~~~~~~
39
+
40
+ * ``Fix missing fastavro after PR #60732 (#60797)``
41
+ * ``Pin fastavro to minimum 1.10.0 for Python 3.13 compatibility (#60732)``
42
+ * ``Updating Databricks API endpoints to appropriate versions (#60647)``
43
+
44
+ Misc
45
+ ~~~~
46
+
47
+ * ``Define 'TaskInstanceKey' in task-sdk to support client server separation (#60776)``
48
+
49
+ .. Below changes are excluded from the changelog. Move them to
50
+ appropriate section above if needed. Do not delete the lines(!):
51
+ * ``Revert exclusion of deltalake 1.3.1 as aarch64 binaries are available now (#60611)``
52
+
29
53
  7.8.3
30
54
  .....
31
55
 
@@ -78,7 +78,7 @@ apache-airflow-providers-databricks package
78
78
  `Databricks <https://databricks.com/>`__
79
79
 
80
80
 
81
- Release: 7.8.3
81
+ Release: 7.9.0
82
82
 
83
83
  Provider package
84
84
  ----------------
@@ -102,7 +102,7 @@ The minimum Apache Airflow version supported by this provider distribution is ``
102
102
  PIP package Version required
103
103
  ========================================== ======================================
104
104
  ``apache-airflow`` ``>=2.11.0``
105
- ``apache-airflow-providers-common-compat`` ``>=1.12.0``
105
+ ``apache-airflow-providers-common-compat`` ``>=1.13.0``
106
106
  ``apache-airflow-providers-common-sql`` ``>=1.27.0``
107
107
  ``requests`` ``>=2.32.0,<3``
108
108
  ``databricks-sql-connector`` ``>=4.0.0``
@@ -132,6 +132,7 @@ Dependent package
132
132
  ================================================================================================================== =================
133
133
  `apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
134
134
  `apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
135
+ `apache-airflow-providers-google <https://airflow.apache.org/docs/apache-airflow-providers-google>`_ ``google``
135
136
  `apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
136
137
  ================================================================================================================== =================
137
138
 
@@ -141,5 +142,5 @@ Downloading official packages
141
142
  You can download officially released packages and verify their checksums and signatures from the
142
143
  `Official Apache Download site <https://downloads.apache.org/airflow/providers/>`_
143
144
 
144
- * `The apache-airflow-providers-databricks 7.8.3 sdist package <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.8.3.tar.gz>`_ (`asc <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.8.3.tar.gz.asc>`__, `sha512 <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.8.3.tar.gz.sha512>`__)
145
- * `The apache-airflow-providers-databricks 7.8.3 wheel package <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.8.3-py3-none-any.whl>`_ (`asc <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.8.3-py3-none-any.whl.asc>`__, `sha512 <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.8.3-py3-none-any.whl.sha512>`__)
145
+ * `The apache-airflow-providers-databricks 7.9.0 sdist package <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.9.0.tar.gz>`_ (`asc <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.9.0.tar.gz.asc>`__, `sha512 <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.9.0.tar.gz.sha512>`__)
146
+ * `The apache-airflow-providers-databricks 7.9.0 wheel package <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.9.0-py3-none-any.whl>`_ (`asc <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.9.0-py3-none-any.whl.asc>`__, `sha512 <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.9.0-py3-none-any.whl.sha512>`__)
@@ -22,12 +22,13 @@ description: |
22
22
  `Databricks <https://databricks.com/>`__
23
23
 
24
24
  state: ready
25
- source-date-epoch: 1768334769
25
+ source-date-epoch: 1769460991
26
26
  # Note that those versions are maintained by release manager - do not update them manually
27
27
  # with the exception of case where other provider in sources has >= new provider version.
28
28
  # In such case adding >= NEW_VERSION and bumping to NEW_VERSION in a provider have
29
29
  # to be done in the same PR
30
30
  versions:
31
+ - 7.9.0
31
32
  - 7.8.3
32
33
  - 7.8.2
33
34
  - 7.8.1
@@ -25,7 +25,7 @@ build-backend = "flit_core.buildapi"
25
25
 
26
26
  [project]
27
27
  name = "apache-airflow-providers-databricks"
28
- version = "7.8.3rc1"
28
+ version = "7.9.0rc1"
29
29
  description = "Provider package apache-airflow-providers-databricks for Apache Airflow"
30
30
  readme = "README.rst"
31
31
  license = "Apache-2.0"
@@ -59,7 +59,7 @@ requires-python = ">=3.10"
59
59
  # After you modify the dependencies, and rebuild your Breeze CI image with ``breeze ci-image build``
60
60
  dependencies = [
61
61
  "apache-airflow>=2.11.0rc1",
62
- "apache-airflow-providers-common-compat>=1.12.0rc1",
62
+ "apache-airflow-providers-common-compat>=1.13.0rc1",
63
63
  "apache-airflow-providers-common-sql>=1.27.0rc1",
64
64
  "requests>=2.32.0,<3",
65
65
  "databricks-sql-connector>=4.0.0",
@@ -93,6 +93,13 @@ dependencies = [
93
93
  "sqlalchemy" = [
94
94
  "databricks-sqlalchemy>=1.0.2",
95
95
  ]
96
+ "google" = [
97
+ "apache-airflow-providers-google>=10.24.0rc1"
98
+ ]
99
+ "avro" = [
100
+ "fastavro>=1.9.0",
101
+ 'fastavro>=1.10.0;python_version>="3.12"' # Need to pin to this version for Python 3.13 compatibility
102
+ ]
96
103
 
97
104
  [dependency-groups]
98
105
  dev = [
@@ -101,10 +108,11 @@ dev = [
101
108
  "apache-airflow-devel-common",
102
109
  "apache-airflow-providers-common-compat",
103
110
  "apache-airflow-providers-common-sql",
111
+ "apache-airflow-providers-google",
104
112
  "apache-airflow-providers-openlineage",
105
113
  # Additional devel dependencies (do not remove this line and add extra development dependencies)
106
- # Limit deltalake to avoid issue with missing linux ARM wheels: https://github.com/delta-io/delta-rs/issues/4041
107
- "deltalake>=1.1.3,!=1.3.0,!=1.3.1",
114
+ # Need to exclude 1.3.0 due to missing aarch64 binaries, fixed with 1.3.1++
115
+ "deltalake>=1.1.3,!=1.3.0",
108
116
  "apache-airflow-providers-fab>=2.2.0; python_version < '3.13'",
109
117
  "apache-airflow-providers-microsoft-azure",
110
118
  "apache-airflow-providers-common-sql[pandas,polars]",
@@ -138,8 +146,8 @@ apache-airflow-providers-common-sql = {workspace = true}
138
146
  apache-airflow-providers-standard = {workspace = true}
139
147
 
140
148
  [project.urls]
141
- "Documentation" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.8.3"
142
- "Changelog" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.8.3/changelog.html"
149
+ "Documentation" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.9.0"
150
+ "Changelog" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.9.0/changelog.html"
143
151
  "Bug Tracker" = "https://github.com/apache/airflow/issues"
144
152
  "Source Code" = "https://github.com/apache/airflow"
145
153
  "Slack Chat" = "https://s.apache.org/airflow-slack"
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "7.8.3"
32
+ __version__ = "7.9.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
35
  "2.11.0"
@@ -37,10 +37,10 @@ from requests import exceptions as requests_exceptions
37
37
  from airflow.providers.common.compat.sdk import AirflowException
38
38
  from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHook
39
39
 
40
- GET_CLUSTER_ENDPOINT = ("GET", "2.2/clusters/get")
41
- RESTART_CLUSTER_ENDPOINT = ("POST", "2.2/clusters/restart")
42
- START_CLUSTER_ENDPOINT = ("POST", "2.2/clusters/start")
43
- TERMINATE_CLUSTER_ENDPOINT = ("POST", "2.2/clusters/delete")
40
+ GET_CLUSTER_ENDPOINT = ("GET", "2.1/clusters/get")
41
+ RESTART_CLUSTER_ENDPOINT = ("POST", "2.1/clusters/restart")
42
+ START_CLUSTER_ENDPOINT = ("POST", "2.1/clusters/start")
43
+ TERMINATE_CLUSTER_ENDPOINT = ("POST", "2.1/clusters/delete")
44
44
 
45
45
  CREATE_ENDPOINT = ("POST", "2.2/jobs/create")
46
46
  RESET_ENDPOINT = ("POST", "2.2/jobs/reset")
@@ -54,20 +54,20 @@ REPAIR_RUN_ENDPOINT = ("POST", "2.2/jobs/runs/repair")
54
54
  OUTPUT_RUNS_JOB_ENDPOINT = ("GET", "2.2/jobs/runs/get-output")
55
55
  CANCEL_ALL_RUNS_ENDPOINT = ("POST", "2.2/jobs/runs/cancel-all")
56
56
 
57
- INSTALL_LIBS_ENDPOINT = ("POST", "2.2/libraries/install")
58
- UNINSTALL_LIBS_ENDPOINT = ("POST", "2.2/libraries/uninstall")
59
- UPDATE_REPO_ENDPOINT = ("PATCH", "2.2/repos/")
60
- DELETE_REPO_ENDPOINT = ("DELETE", "2.2/repos/")
61
- CREATE_REPO_ENDPOINT = ("POST", "2.2/repos")
57
+ INSTALL_LIBS_ENDPOINT = ("POST", "2.0/libraries/install")
58
+ UNINSTALL_LIBS_ENDPOINT = ("POST", "2.0/libraries/uninstall")
59
+ UPDATE_REPO_ENDPOINT = ("PATCH", "2.0/repos/")
60
+ DELETE_REPO_ENDPOINT = ("DELETE", "2.0/repos/")
61
+ CREATE_REPO_ENDPOINT = ("POST", "2.0/repos")
62
62
 
63
63
  LIST_JOBS_ENDPOINT = ("GET", "2.2/jobs/list")
64
- LIST_PIPELINES_ENDPOINT = ("GET", "2.2/pipelines")
65
- LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "2.2/sql/endpoints")
64
+ LIST_PIPELINES_ENDPOINT = ("GET", "2.0/pipelines")
65
+ LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "2.0/sql/warehouses")
66
66
 
67
- WORKSPACE_GET_STATUS_ENDPOINT = ("GET", "2.2/workspace/get-status")
67
+ WORKSPACE_GET_STATUS_ENDPOINT = ("GET", "2.0/workspace/get-status")
68
68
 
69
- SPARK_VERSIONS_ENDPOINT = ("GET", "2.2/clusters/spark-versions")
70
- SQL_STATEMENTS_ENDPOINT = "2.2/sql/statements"
69
+ SPARK_VERSIONS_ENDPOINT = ("GET", "2.1/clusters/spark-versions")
70
+ SQL_STATEMENTS_ENDPOINT = "2.0/sql/statements"
71
71
 
72
72
 
73
73
  class RunLifeCycleState(Enum):
@@ -717,7 +717,7 @@ class DatabricksHook(BaseDatabricksHook):
717
717
  """
718
718
  Install libraries on the cluster.
719
719
 
720
- Utility function to call the ``2.2/libraries/install`` endpoint.
720
+ Utility function to call the ``2.0/libraries/install`` endpoint.
721
721
 
722
722
  :param json: json dictionary containing cluster_id and an array of library
723
723
  """
@@ -727,7 +727,7 @@ class DatabricksHook(BaseDatabricksHook):
727
727
  """
728
728
  Uninstall libraries on the cluster.
729
729
 
730
- Utility function to call the ``2.2/libraries/uninstall`` endpoint.
730
+ Utility function to call the ``2.0/libraries/uninstall`` endpoint.
731
731
 
732
732
  :param json: json dictionary containing cluster_id and an array of library
733
733
  """
@@ -790,7 +790,7 @@ class DatabricksHook(BaseDatabricksHook):
790
790
  :param json: payload
791
791
  :return: json containing permission specification
792
792
  """
793
- return self._do_api_call(("PATCH", f"2.2/permissions/jobs/{job_id}"), json)
793
+ return self._do_api_call(("PATCH", f"2.0/permissions/jobs/{job_id}"), json)
794
794
 
795
795
  def post_sql_statement(self, json: dict[str, Any]) -> str:
796
796
  """
@@ -53,7 +53,7 @@ from airflow.providers.databricks.utils.mixins import DatabricksSQLStatementsMix
53
53
  from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
54
54
 
55
55
  if TYPE_CHECKING:
56
- from airflow.models.taskinstancekey import TaskInstanceKey
56
+ from airflow.providers.common.compat.sdk import TaskInstanceKey
57
57
  from airflow.providers.databricks.operators.databricks_workflow import (
58
58
  DatabricksWorkflowTaskGroup,
59
59
  )
@@ -21,13 +21,20 @@ from __future__ import annotations
21
21
 
22
22
  import csv
23
23
  import json
24
+ import os
24
25
  from collections.abc import Sequence
25
26
  from functools import cached_property
27
+ from tempfile import NamedTemporaryFile
26
28
  from typing import TYPE_CHECKING, Any, ClassVar
29
+ from urllib.parse import urlparse
27
30
 
28
31
  from databricks.sql.utils import ParamEscaper
29
32
 
30
- from airflow.providers.common.compat.sdk import AirflowException, BaseOperator
33
+ from airflow.providers.common.compat.sdk import (
34
+ AirflowException,
35
+ AirflowOptionalProviderFeatureException,
36
+ BaseOperator,
37
+ )
31
38
  from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
32
39
  from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
33
40
 
@@ -62,13 +69,27 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
62
69
  :param catalog: An optional initial catalog to use. Requires DBR version 9.0+ (templated)
63
70
  :param schema: An optional initial schema to use. Requires DBR version 9.0+ (templated)
64
71
  :param output_path: optional string specifying the file to which write selected data. (templated)
65
- :param output_format: format of output data if ``output_path` is specified.
66
- Possible values are ``csv``, ``json``, ``jsonl``. Default is ``csv``.
72
+ Supports local file paths and GCS URIs (e.g., ``gs://bucket/path/file.parquet``).
73
+ When using GCS URIs, requires the ``apache-airflow-providers-google`` package.
74
+ :param output_format: format of output data if ``output_path`` is specified.
75
+ Possible values are ``csv``, ``json``, ``jsonl``, ``parquet``, ``avro``. Default is ``csv``.
67
76
  :param csv_params: parameters that will be passed to the ``csv.DictWriter`` class used to write CSV data.
77
+ :param gcp_conn_id: The connection ID to use for connecting to Google Cloud when using GCS output path.
78
+ Default is ``google_cloud_default``.
79
+ :param gcs_impersonation_chain: Optional service account to impersonate using short-term
80
+ credentials for GCS upload, or chained list of accounts required to get the access_token
81
+ of the last account in the list, which will be impersonated in the request. (templated)
68
82
  """
69
83
 
70
84
  template_fields: Sequence[str] = tuple(
71
- {"_output_path", "schema", "catalog", "http_headers", "databricks_conn_id"}
85
+ {
86
+ "_output_path",
87
+ "schema",
88
+ "catalog",
89
+ "http_headers",
90
+ "databricks_conn_id",
91
+ "_gcs_impersonation_chain",
92
+ }
72
93
  | set(SQLExecuteQueryOperator.template_fields)
73
94
  )
74
95
 
@@ -90,6 +111,8 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
90
111
  output_format: str = "csv",
91
112
  csv_params: dict[str, Any] | None = None,
92
113
  client_parameters: dict[str, Any] | None = None,
114
+ gcp_conn_id: str = "google_cloud_default",
115
+ gcs_impersonation_chain: str | Sequence[str] | None = None,
93
116
  **kwargs,
94
117
  ) -> None:
95
118
  super().__init__(conn_id=databricks_conn_id, **kwargs)
@@ -105,6 +128,8 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
105
128
  self.http_headers = http_headers
106
129
  self.catalog = catalog
107
130
  self.schema = schema
131
+ self._gcp_conn_id = gcp_conn_id
132
+ self._gcs_impersonation_chain = gcs_impersonation_chain
108
133
 
109
134
  @cached_property
110
135
  def _hook(self) -> DatabricksSqlHook:
@@ -127,41 +152,151 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
127
152
  def _should_run_output_processing(self) -> bool:
128
153
  return self.do_xcom_push or bool(self._output_path)
129
154
 
155
+ @property
156
+ def _is_gcs_output(self) -> bool:
157
+ """Check if the output path is a GCS URI."""
158
+ return self._output_path.startswith("gs://") if self._output_path else False
159
+
160
+ def _parse_gcs_path(self, path: str) -> tuple[str, str]:
161
+ """Parse a GCS URI into bucket and object name."""
162
+ parsed = urlparse(path)
163
+ bucket = parsed.netloc
164
+ object_name = parsed.path.lstrip("/")
165
+ return bucket, object_name
166
+
167
+ def _upload_to_gcs(self, local_path: str, gcs_path: str) -> None:
168
+ """Upload a local file to GCS."""
169
+ try:
170
+ from airflow.providers.google.cloud.hooks.gcs import GCSHook
171
+ except ImportError:
172
+ raise AirflowOptionalProviderFeatureException(
173
+ "The 'apache-airflow-providers-google' package is required for GCS output. "
174
+ "Install it with: pip install apache-airflow-providers-google"
175
+ )
176
+
177
+ bucket, object_name = self._parse_gcs_path(gcs_path)
178
+ hook = GCSHook(
179
+ gcp_conn_id=self._gcp_conn_id,
180
+ impersonation_chain=self._gcs_impersonation_chain,
181
+ )
182
+ hook.upload(
183
+ bucket_name=bucket,
184
+ object_name=object_name,
185
+ filename=local_path,
186
+ )
187
+ self.log.info("Uploaded output to %s", gcs_path)
188
+
189
+ def _write_parquet(self, file_path: str, field_names: list[str], rows: list[Any]) -> None:
190
+ """Write data to a Parquet file."""
191
+ import pyarrow as pa
192
+ import pyarrow.parquet as pq
193
+
194
+ data: dict[str, list] = {name: [] for name in field_names}
195
+ for row in rows:
196
+ row_dict = row._asdict()
197
+ for name in field_names:
198
+ data[name].append(row_dict[name])
199
+
200
+ table = pa.Table.from_pydict(data)
201
+ pq.write_table(table, file_path)
202
+
203
+ def _write_avro(self, file_path: str, field_names: list[str], rows: list[Any]) -> None:
204
+ """Write data to an Avro file using fastavro."""
205
+ try:
206
+ from fastavro import writer
207
+ except ImportError:
208
+ raise AirflowOptionalProviderFeatureException(
209
+ "The 'fastavro' package is required for Avro output. Install it with: pip install fastavro"
210
+ )
211
+
212
+ data: dict[str, list] = {name: [] for name in field_names}
213
+ for row in rows:
214
+ row_dict = row._asdict()
215
+ for name in field_names:
216
+ data[name].append(row_dict[name])
217
+
218
+ schema_fields = []
219
+ for name in field_names:
220
+ sample_val = next(
221
+ (data[name][i] for i in range(len(data[name])) if data[name][i] is not None), None
222
+ )
223
+ if sample_val is None:
224
+ avro_type = ["null", "string"]
225
+ elif isinstance(sample_val, bool):
226
+ avro_type = ["null", "boolean"]
227
+ elif isinstance(sample_val, int):
228
+ avro_type = ["null", "long"]
229
+ elif isinstance(sample_val, float):
230
+ avro_type = ["null", "double"]
231
+ else:
232
+ avro_type = ["null", "string"]
233
+ schema_fields.append({"name": name, "type": avro_type})
234
+
235
+ avro_schema = {
236
+ "type": "record",
237
+ "name": "QueryResult",
238
+ "fields": schema_fields,
239
+ }
240
+
241
+ records = [row._asdict() for row in rows]
242
+ with open(file_path, "wb") as f:
243
+ writer(f, avro_schema, records)
244
+
130
245
  def _process_output(self, results: list[Any], descriptions: list[Sequence[Sequence] | None]) -> list[Any]:
131
246
  if not self._output_path:
132
247
  return list(zip(descriptions, results))
133
248
  if not self._output_format:
134
249
  raise AirflowException("Output format should be specified!")
135
- # Output to a file only the result of last query
250
+
136
251
  last_description = descriptions[-1]
137
252
  last_results = results[-1]
138
253
  if last_description is None:
139
- raise AirflowException("There is missing description present for the output file. .")
254
+ raise AirflowException("There is missing description present for the output file.")
140
255
  field_names = [field[0] for field in last_description]
141
- if self._output_format.lower() == "csv":
142
- with open(self._output_path, "w", newline="") as file:
143
- if self._csv_params:
144
- csv_params = self._csv_params
145
- else:
146
- csv_params = {}
147
- write_header = csv_params.get("header", True)
148
- if "header" in csv_params:
149
- del csv_params["header"]
150
- writer = csv.DictWriter(file, fieldnames=field_names, **csv_params)
151
- if write_header:
152
- writer.writeheader()
153
- for row in last_results:
154
- writer.writerow(row._asdict())
155
- elif self._output_format.lower() == "json":
156
- with open(self._output_path, "w") as file:
157
- file.write(json.dumps([row._asdict() for row in last_results]))
158
- elif self._output_format.lower() == "jsonl":
159
- with open(self._output_path, "w") as file:
160
- for row in last_results:
161
- file.write(json.dumps(row._asdict()))
162
- file.write("\n")
256
+
257
+ if self._is_gcs_output:
258
+ suffix = f".{self._output_format.lower()}"
259
+ tmp_file = NamedTemporaryFile(mode="w", suffix=suffix, delete=False, newline="")
260
+ local_path = tmp_file.name
261
+ tmp_file.close()
163
262
  else:
164
- raise AirflowException(f"Unsupported output format: '{self._output_format}'")
263
+ local_path = self._output_path
264
+
265
+ try:
266
+ output_format = self._output_format.lower()
267
+ if output_format == "csv":
268
+ with open(local_path, "w", newline="") as file:
269
+ if self._csv_params:
270
+ csv_params = self._csv_params.copy()
271
+ else:
272
+ csv_params = {}
273
+ write_header = csv_params.pop("header", True)
274
+ writer = csv.DictWriter(file, fieldnames=field_names, **csv_params)
275
+ if write_header:
276
+ writer.writeheader()
277
+ for row in last_results:
278
+ writer.writerow(row._asdict())
279
+ elif output_format == "json":
280
+ with open(local_path, "w") as file:
281
+ file.write(json.dumps([row._asdict() for row in last_results]))
282
+ elif output_format == "jsonl":
283
+ with open(local_path, "w") as file:
284
+ for row in last_results:
285
+ file.write(json.dumps(row._asdict()))
286
+ file.write("\n")
287
+ elif output_format == "parquet":
288
+ self._write_parquet(local_path, field_names, last_results)
289
+ elif output_format == "avro":
290
+ self._write_avro(local_path, field_names, last_results)
291
+ else:
292
+ raise ValueError(f"Unsupported output format: '{self._output_format}'")
293
+
294
+ if self._is_gcs_output:
295
+ self._upload_to_gcs(local_path, self._output_path)
296
+ finally:
297
+ if self._is_gcs_output and os.path.exists(local_path):
298
+ os.unlink(local_path)
299
+
165
300
  return list(zip(descriptions, results))
166
301
 
167
302
 
@@ -298,7 +298,7 @@ class WorkflowJobRunLink(BaseOperatorLink, LoggingMixin):
298
298
  """XCom key where the link is stored during task execution."""
299
299
  return "databricks_job_run_link"
300
300
 
301
- def get_link(
301
+ def get_link( # type: ignore[override] # Signature intentionally kept this way for Airflow 2.x compatibility
302
302
  self,
303
303
  operator: BaseOperator,
304
304
  dttm=None,
@@ -374,7 +374,7 @@ class WorkflowJobRepairAllFailedLink(BaseOperatorLink, LoggingMixin):
374
374
 
375
375
  name = "Repair All Failed Tasks"
376
376
 
377
- def get_link(
377
+ def get_link( # type: ignore[override] # Signature intentionally kept this way for Airflow 2.x compatibility
378
378
  self,
379
379
  operator,
380
380
  dttm=None,
@@ -471,7 +471,7 @@ class WorkflowJobRepairSingleTaskLink(BaseOperatorLink, LoggingMixin):
471
471
 
472
472
  name = "Repair a single task"
473
473
 
474
- def get_link(
474
+ def get_link( # type: ignore[override] # Signature intentionally kept this way for Airflow 2.x compatibility
475
475
  self,
476
476
  operator,
477
477
  dttm=None,
@@ -216,42 +216,42 @@ def get_cluster_endpoint(host):
216
216
  """
217
217
  Utility function to generate the get run endpoint given the host.
218
218
  """
219
- return f"https://{host}/api/2.2/clusters/get"
219
+ return f"https://{host}/api/2.1/clusters/get"
220
220
 
221
221
 
222
222
  def start_cluster_endpoint(host):
223
223
  """
224
224
  Utility function to generate the get run endpoint given the host.
225
225
  """
226
- return f"https://{host}/api/2.2/clusters/start"
226
+ return f"https://{host}/api/2.1/clusters/start"
227
227
 
228
228
 
229
229
  def restart_cluster_endpoint(host):
230
230
  """
231
231
  Utility function to generate the get run endpoint given the host.
232
232
  """
233
- return f"https://{host}/api/2.2/clusters/restart"
233
+ return f"https://{host}/api/2.1/clusters/restart"
234
234
 
235
235
 
236
236
  def terminate_cluster_endpoint(host):
237
237
  """
238
238
  Utility function to generate the get run endpoint given the host.
239
239
  """
240
- return f"https://{host}/api/2.2/clusters/delete"
240
+ return f"https://{host}/api/2.1/clusters/delete"
241
241
 
242
242
 
243
243
  def install_endpoint(host):
244
244
  """
245
245
  Utility function to generate the install endpoint given the host.
246
246
  """
247
- return f"https://{host}/api/2.2/libraries/install"
247
+ return f"https://{host}/api/2.0/libraries/install"
248
248
 
249
249
 
250
250
  def uninstall_endpoint(host):
251
251
  """
252
252
  Utility function to generate the uninstall endpoint given the host.
253
253
  """
254
- return f"https://{host}/api/2.2/libraries/uninstall"
254
+ return f"https://{host}/api/2.0/libraries/uninstall"
255
255
 
256
256
 
257
257
  def list_jobs_endpoint(host):
@@ -265,19 +265,19 @@ def list_pipelines_endpoint(host):
265
265
  """
266
266
  Utility function to generate the list jobs endpoint given the host
267
267
  """
268
- return f"https://{host}/api/2.2/pipelines"
268
+ return f"https://{host}/api/2.0/pipelines"
269
269
 
270
270
 
271
271
  def list_spark_versions_endpoint(host):
272
272
  """Utility function to generate the list spark versions endpoint given the host"""
273
- return f"https://{host}/api/2.2/clusters/spark-versions"
273
+ return f"https://{host}/api/2.1/clusters/spark-versions"
274
274
 
275
275
 
276
276
  def permissions_endpoint(host, job_id):
277
277
  """
278
278
  Utility function to generate the permissions endpoint given the host
279
279
  """
280
- return f"https://{host}/api/2.2/permissions/jobs/{job_id}"
280
+ return f"https://{host}/api/2.0/permissions/jobs/{job_id}"
281
281
 
282
282
 
283
283
  def create_valid_response_mock(content):
@@ -289,7 +289,7 @@ def create_valid_response_mock(content):
289
289
 
290
290
  def sql_statements_endpoint(host):
291
291
  """Utility function to generate the sql statements endpoint given the host."""
292
- return f"https://{host}/api/2.2/sql/statements"
292
+ return f"https://{host}/api/2.0/sql/statements"
293
293
 
294
294
 
295
295
  def create_successful_response_mock(content):
@@ -1302,7 +1302,7 @@ class TestDatabricksHook:
1302
1302
  self.hook.update_job_permission(1, ACCESS_CONTROL_DICT)
1303
1303
 
1304
1304
  mock_requests.patch.assert_called_once_with(
1305
- f"https://{HOST}/api/2.2/permissions/jobs/1",
1305
+ f"https://{HOST}/api/2.0/permissions/jobs/1",
1306
1306
  json=utils.normalise_json_content(ACCESS_CONTROL_DICT),
1307
1307
  params=None,
1308
1308
  auth=HTTPBasicAuth(LOGIN, PASSWORD),
@@ -314,3 +314,142 @@ def test_hook_is_cached():
314
314
  hook = op.get_db_hook()
315
315
  hook2 = op.get_db_hook()
316
316
  assert hook is hook2
317
+
318
+
319
+ def test_exec_write_parquet_file(tmp_path):
320
+ """Test writing output to Parquet format."""
321
+ with patch("airflow.providers.databricks.operators.databricks_sql.DatabricksSqlHook") as db_mock_class:
322
+ path = tmp_path / "testfile.parquet"
323
+ op = DatabricksSqlOperator(
324
+ task_id=TASK_ID,
325
+ sql="select * from dummy",
326
+ output_path=os.fspath(path),
327
+ output_format="parquet",
328
+ return_last=True,
329
+ do_xcom_push=True,
330
+ split_statements=False,
331
+ )
332
+ db_mock = db_mock_class.return_value
333
+ db_mock.run.return_value = [SerializableRow(1, "value1"), SerializableRow(2, "value2")]
334
+ db_mock.descriptions = [[("id",), ("value",)]]
335
+
336
+ op.execute(None)
337
+
338
+ import pyarrow.parquet as pq
339
+
340
+ table = pq.read_table(path)
341
+ assert table.num_rows == 2
342
+ assert table.column_names == ["id", "value"]
343
+ assert table.column("id").to_pylist() == [1, 2]
344
+ assert table.column("value").to_pylist() == ["value1", "value2"]
345
+
346
+
347
+ def test_exec_write_avro_file_with_fastavro(tmp_path):
348
+ """Test writing output to Avro format using fastavro."""
349
+ pytest.importorskip("fastavro")
350
+ with patch("airflow.providers.databricks.operators.databricks_sql.DatabricksSqlHook") as db_mock_class:
351
+ path = tmp_path / "testfile.avro"
352
+ op = DatabricksSqlOperator(
353
+ task_id=TASK_ID,
354
+ sql="select * from dummy",
355
+ output_path=os.fspath(path),
356
+ output_format="avro",
357
+ return_last=True,
358
+ do_xcom_push=True,
359
+ split_statements=False,
360
+ )
361
+ db_mock = db_mock_class.return_value
362
+ db_mock.run.return_value = [SerializableRow(1, "value1"), SerializableRow(2, "value2")]
363
+ db_mock.descriptions = [[("id",), ("value",)]]
364
+
365
+ op.execute(None)
366
+
367
+ from fastavro import reader
368
+
369
+ with open(path, "rb") as f:
370
+ records = list(reader(f))
371
+ assert len(records) == 2
372
+ assert records[0] == {"id": 1, "value": "value1"}
373
+ assert records[1] == {"id": 2, "value": "value2"}
374
+
375
+
376
+ def test_exec_write_gcs_output(tmp_path):
377
+ """Test writing output to GCS."""
378
+ with (
379
+ patch("airflow.providers.databricks.operators.databricks_sql.DatabricksSqlHook") as db_mock_class,
380
+ patch("airflow.providers.google.cloud.hooks.gcs.GCSHook") as gcs_mock_class,
381
+ ):
382
+ op = DatabricksSqlOperator(
383
+ task_id=TASK_ID,
384
+ sql="select * from dummy",
385
+ output_path="gs://my-bucket/path/to/output.csv",
386
+ output_format="csv",
387
+ return_last=True,
388
+ do_xcom_push=True,
389
+ split_statements=False,
390
+ gcp_conn_id="my_gcp_conn",
391
+ )
392
+ db_mock = db_mock_class.return_value
393
+ db_mock.run.return_value = [SerializableRow(1, "value1"), SerializableRow(2, "value2")]
394
+ db_mock.descriptions = [[("id",), ("value",)]]
395
+
396
+ op.execute(None)
397
+
398
+ gcs_mock_class.assert_called_once_with(
399
+ gcp_conn_id="my_gcp_conn",
400
+ impersonation_chain=None,
401
+ )
402
+ gcs_mock_class.return_value.upload.assert_called_once()
403
+ call_kwargs = gcs_mock_class.return_value.upload.call_args[1]
404
+ assert call_kwargs["bucket_name"] == "my-bucket"
405
+ assert call_kwargs["object_name"] == "path/to/output.csv"
406
+
407
+
408
+ def test_exec_write_gcs_parquet_output(tmp_path):
409
+ """Test writing Parquet output to GCS."""
410
+ with (
411
+ patch("airflow.providers.databricks.operators.databricks_sql.DatabricksSqlHook") as db_mock_class,
412
+ patch("airflow.providers.google.cloud.hooks.gcs.GCSHook") as gcs_mock_class,
413
+ ):
414
+ op = DatabricksSqlOperator(
415
+ task_id=TASK_ID,
416
+ sql="select * from dummy",
417
+ output_path="gs://my-bucket/data/results.parquet",
418
+ output_format="parquet",
419
+ return_last=True,
420
+ do_xcom_push=True,
421
+ split_statements=False,
422
+ )
423
+ db_mock = db_mock_class.return_value
424
+ db_mock.run.return_value = [SerializableRow(1, "value1"), SerializableRow(2, "value2")]
425
+ db_mock.descriptions = [[("id",), ("value",)]]
426
+
427
+ op.execute(None)
428
+
429
+ gcs_mock_class.return_value.upload.assert_called_once()
430
+ call_kwargs = gcs_mock_class.return_value.upload.call_args[1]
431
+ assert call_kwargs["bucket_name"] == "my-bucket"
432
+ assert call_kwargs["object_name"] == "data/results.parquet"
433
+
434
+
435
+ def test_is_gcs_output():
436
+ """Test _is_gcs_output property."""
437
+ op_gcs = DatabricksSqlOperator(task_id=TASK_ID, sql="SELECT 1", output_path="gs://bucket/path")
438
+ assert op_gcs._is_gcs_output is True
439
+
440
+ op_local = DatabricksSqlOperator(task_id=TASK_ID, sql="SELECT 1", output_path="/local/path")
441
+ assert op_local._is_gcs_output is False
442
+
443
+ op_s3 = DatabricksSqlOperator(task_id=TASK_ID, sql="SELECT 1", output_path="s3://bucket/path")
444
+ assert op_s3._is_gcs_output is False
445
+
446
+ op_none = DatabricksSqlOperator(task_id=TASK_ID, sql="SELECT 1")
447
+ assert op_none._is_gcs_output is False
448
+
449
+
450
+ def test_parse_gcs_path():
451
+ """Test _parse_gcs_path method."""
452
+ op = DatabricksSqlOperator(task_id=TASK_ID, sql="SELECT 1")
453
+ bucket, object_name = op._parse_gcs_path("gs://my-bucket/path/to/file.parquet")
454
+ assert bucket == "my-bucket"
455
+ assert object_name == "path/to/file.parquet"