apache-airflow-providers-databricks 4.2.0rc2__tar.gz → 4.3.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-databricks might be problematic. Click here for more details.
- {apache-airflow-providers-databricks-4.2.0rc2/apache_airflow_providers_databricks.egg-info → apache-airflow-providers-databricks-4.3.0rc1}/PKG-INFO +37 -7
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/README.rst +33 -3
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/__init__.py +1 -1
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/get_provider_info.py +2 -3
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/hooks/databricks.py +10 -8
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/hooks/databricks_base.py +7 -1
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/hooks/databricks_sql.py +11 -9
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/operators/databricks.py +6 -6
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/operators/databricks_repos.py +2 -2
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/operators/databricks_sql.py +8 -3
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/sensors/databricks_partition.py +1 -1
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/sensors/databricks_sql.py +1 -1
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/utils/databricks.py +1 -1
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1/apache_airflow_providers_databricks.egg-info}/PKG-INFO +37 -7
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/pyproject.toml +6 -5
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/setup.cfg +4 -4
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/setup.py +1 -1
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/LICENSE +0 -0
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/MANIFEST.in +0 -0
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/NOTICE +0 -0
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/hooks/__init__.py +0 -0
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/operators/__init__.py +0 -0
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/sensors/__init__.py +0 -0
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/triggers/__init__.py +0 -0
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/triggers/databricks.py +0 -0
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/airflow/providers/databricks/utils/__init__.py +0 -0
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/apache_airflow_providers_databricks.egg-info/SOURCES.txt +0 -0
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/apache_airflow_providers_databricks.egg-info/dependency_links.txt +0 -0
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/apache_airflow_providers_databricks.egg-info/entry_points.txt +0 -0
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/apache_airflow_providers_databricks.egg-info/not-zip-safe +0 -0
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/apache_airflow_providers_databricks.egg-info/requires.txt +0 -0
- {apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/apache_airflow_providers_databricks.egg-info/top_level.txt +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.3.0rc1
|
|
4
4
|
Summary: Provider for Apache Airflow. Implements apache-airflow-providers-databricks package
|
|
5
5
|
Home-page: https://airflow.apache.org/
|
|
6
6
|
Download-URL: https://archive.apache.org/dist/airflow/providers
|
|
7
7
|
Author: Apache Software Foundation
|
|
8
8
|
Author-email: dev@airflow.apache.org
|
|
9
9
|
License: Apache License 2.0
|
|
10
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
10
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.3.0/
|
|
11
11
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
12
12
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
13
13
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
@@ -21,12 +21,12 @@ Classifier: Intended Audience :: System Administrators
|
|
|
21
21
|
Classifier: Framework :: Apache Airflow
|
|
22
22
|
Classifier: Framework :: Apache Airflow :: Provider
|
|
23
23
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
24
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
25
24
|
Classifier: Programming Language :: Python :: 3.8
|
|
26
25
|
Classifier: Programming Language :: Python :: 3.9
|
|
27
26
|
Classifier: Programming Language :: Python :: 3.10
|
|
27
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
28
28
|
Classifier: Topic :: System :: Monitoring
|
|
29
|
-
Requires-Python: ~=3.
|
|
29
|
+
Requires-Python: ~=3.8
|
|
30
30
|
Description-Content-Type: text/x-rst
|
|
31
31
|
Provides-Extra: common.sql
|
|
32
32
|
License-File: LICENSE
|
|
@@ -53,7 +53,7 @@ License-File: NOTICE
|
|
|
53
53
|
|
|
54
54
|
Package ``apache-airflow-providers-databricks``
|
|
55
55
|
|
|
56
|
-
Release: ``4.
|
|
56
|
+
Release: ``4.3.0rc1``
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
`Databricks <https://databricks.com/>`__
|
|
@@ -66,7 +66,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
66
66
|
are in ``airflow.providers.databricks`` python package.
|
|
67
67
|
|
|
68
68
|
You can find package information and changelog for the provider
|
|
69
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
69
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.3.0/>`_.
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
Installation
|
|
@@ -76,7 +76,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
|
|
|
76
76
|
for the minimum Airflow version supported) via
|
|
77
77
|
``pip install apache-airflow-providers-databricks``
|
|
78
78
|
|
|
79
|
-
The package supports the following python versions: 3.
|
|
79
|
+
The package supports the following python versions: 3.8,3.9,3.10,3.11
|
|
80
80
|
|
|
81
81
|
Requirements
|
|
82
82
|
------------
|
|
@@ -136,6 +136,36 @@ Dependent package
|
|
|
136
136
|
Changelog
|
|
137
137
|
---------
|
|
138
138
|
|
|
139
|
+
4.3.0
|
|
140
|
+
.....
|
|
141
|
+
|
|
142
|
+
.. note::
|
|
143
|
+
This release dropped support for Python 3.7
|
|
144
|
+
|
|
145
|
+
Features
|
|
146
|
+
~~~~~~~~
|
|
147
|
+
|
|
148
|
+
* ``add a return when the event is yielded in a loop to stop the execution (#31985)``
|
|
149
|
+
|
|
150
|
+
Bug Fixes
|
|
151
|
+
~~~~~~~~~
|
|
152
|
+
|
|
153
|
+
* ``Fix type annotation (#31888)``
|
|
154
|
+
* ``Fix Databricks SQL operator serialization (#31780)``
|
|
155
|
+
* ``Making Databricks run related multi-query string in one session again (#31898) (#31899)``
|
|
156
|
+
|
|
157
|
+
Misc
|
|
158
|
+
~~~~
|
|
159
|
+
* ``Remove return statement after yield from triggers class (#31703)``
|
|
160
|
+
* ``Remove Python 3.7 support (#30963)``
|
|
161
|
+
|
|
162
|
+
.. Below changes are excluded from the changelog. Move them to
|
|
163
|
+
appropriate section above if needed. Do not delete the lines(!):
|
|
164
|
+
* ``Improve docstrings in providers (#31681)``
|
|
165
|
+
* ``Add discoverability for triggers in provider.yaml (#31576)``
|
|
166
|
+
* ``Add D400 pydocstyle check - Providers (#31427)``
|
|
167
|
+
* ``Add note about dropping Python 3.7 for providers (#32015)``
|
|
168
|
+
|
|
139
169
|
4.2.0
|
|
140
170
|
.....
|
|
141
171
|
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
|
|
20
20
|
Package ``apache-airflow-providers-databricks``
|
|
21
21
|
|
|
22
|
-
Release: ``4.
|
|
22
|
+
Release: ``4.3.0rc1``
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
`Databricks <https://databricks.com/>`__
|
|
@@ -32,7 +32,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
32
32
|
are in ``airflow.providers.databricks`` python package.
|
|
33
33
|
|
|
34
34
|
You can find package information and changelog for the provider
|
|
35
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
35
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.3.0/>`_.
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
Installation
|
|
@@ -42,7 +42,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
|
|
|
42
42
|
for the minimum Airflow version supported) via
|
|
43
43
|
``pip install apache-airflow-providers-databricks``
|
|
44
44
|
|
|
45
|
-
The package supports the following python versions: 3.
|
|
45
|
+
The package supports the following python versions: 3.8,3.9,3.10,3.11
|
|
46
46
|
|
|
47
47
|
Requirements
|
|
48
48
|
------------
|
|
@@ -102,6 +102,36 @@ Dependent package
|
|
|
102
102
|
Changelog
|
|
103
103
|
---------
|
|
104
104
|
|
|
105
|
+
4.3.0
|
|
106
|
+
.....
|
|
107
|
+
|
|
108
|
+
.. note::
|
|
109
|
+
This release dropped support for Python 3.7
|
|
110
|
+
|
|
111
|
+
Features
|
|
112
|
+
~~~~~~~~
|
|
113
|
+
|
|
114
|
+
* ``add a return when the event is yielded in a loop to stop the execution (#31985)``
|
|
115
|
+
|
|
116
|
+
Bug Fixes
|
|
117
|
+
~~~~~~~~~
|
|
118
|
+
|
|
119
|
+
* ``Fix type annotation (#31888)``
|
|
120
|
+
* ``Fix Databricks SQL operator serialization (#31780)``
|
|
121
|
+
* ``Making Databricks run related multi-query string in one session again (#31898) (#31899)``
|
|
122
|
+
|
|
123
|
+
Misc
|
|
124
|
+
~~~~
|
|
125
|
+
* ``Remove return statement after yield from triggers class (#31703)``
|
|
126
|
+
* ``Remove Python 3.7 support (#30963)``
|
|
127
|
+
|
|
128
|
+
.. Below changes are excluded from the changelog. Move them to
|
|
129
|
+
appropriate section above if needed. Do not delete the lines(!):
|
|
130
|
+
* ``Improve docstrings in providers (#31681)``
|
|
131
|
+
* ``Add discoverability for triggers in provider.yaml (#31576)``
|
|
132
|
+
* ``Add D400 pydocstyle check - Providers (#31427)``
|
|
133
|
+
* ``Add note about dropping Python 3.7 for providers (#32015)``
|
|
134
|
+
|
|
105
135
|
4.2.0
|
|
106
136
|
.....
|
|
107
137
|
|
|
@@ -29,6 +29,7 @@ def get_provider_info():
|
|
|
29
29
|
"description": "`Databricks <https://databricks.com/>`__\n",
|
|
30
30
|
"suspended": False,
|
|
31
31
|
"versions": [
|
|
32
|
+
"4.3.0",
|
|
32
33
|
"4.2.0",
|
|
33
34
|
"4.1.0",
|
|
34
35
|
"4.0.1",
|
|
@@ -121,9 +122,7 @@ def get_provider_info():
|
|
|
121
122
|
"triggers": [
|
|
122
123
|
{
|
|
123
124
|
"integration-name": "Databricks",
|
|
124
|
-
"
|
|
125
|
-
"airflow.providers.databricks.triggers.databricks.DatabricksExecutionTrigger"
|
|
126
|
-
],
|
|
125
|
+
"python-modules": ["airflow.providers.databricks.triggers.databricks"],
|
|
127
126
|
}
|
|
128
127
|
],
|
|
129
128
|
"sensors": [
|
|
@@ -85,7 +85,7 @@ class RunState:
|
|
|
85
85
|
|
|
86
86
|
@property
|
|
87
87
|
def is_successful(self) -> bool:
|
|
88
|
-
"""True if the result state is SUCCESS"""
|
|
88
|
+
"""True if the result state is SUCCESS."""
|
|
89
89
|
return self.result_state == "SUCCESS"
|
|
90
90
|
|
|
91
91
|
def __eq__(self, other: object) -> bool:
|
|
@@ -223,6 +223,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
223
223
|
async def a_get_run_page_url(self, run_id: int) -> str:
|
|
224
224
|
"""
|
|
225
225
|
Async version of `get_run_page_url()`.
|
|
226
|
+
|
|
226
227
|
:param run_id: id of the run
|
|
227
228
|
:return: URL of the run page
|
|
228
229
|
"""
|
|
@@ -264,6 +265,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
264
265
|
async def a_get_run_state(self, run_id: int) -> RunState:
|
|
265
266
|
"""
|
|
266
267
|
Async version of `get_run_state()`.
|
|
268
|
+
|
|
267
269
|
:param run_id: id of the run
|
|
268
270
|
:return: state of the run
|
|
269
271
|
"""
|
|
@@ -309,7 +311,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
309
311
|
|
|
310
312
|
def get_run_state_lifecycle(self, run_id: int) -> str:
|
|
311
313
|
"""
|
|
312
|
-
Returns the lifecycle state of the run
|
|
314
|
+
Returns the lifecycle state of the run.
|
|
313
315
|
|
|
314
316
|
:param run_id: id of the run
|
|
315
317
|
:return: string with lifecycle state
|
|
@@ -318,7 +320,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
318
320
|
|
|
319
321
|
def get_run_state_result(self, run_id: int) -> str:
|
|
320
322
|
"""
|
|
321
|
-
Returns the resulting state of the run
|
|
323
|
+
Returns the resulting state of the run.
|
|
322
324
|
|
|
323
325
|
:param run_id: id of the run
|
|
324
326
|
:return: string with resulting state
|
|
@@ -327,7 +329,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
327
329
|
|
|
328
330
|
def get_run_state_message(self, run_id: int) -> str:
|
|
329
331
|
"""
|
|
330
|
-
Returns the state message for the run
|
|
332
|
+
Returns the state message for the run.
|
|
331
333
|
|
|
332
334
|
:param run_id: id of the run
|
|
333
335
|
:return: string with state message
|
|
@@ -426,7 +428,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
426
428
|
|
|
427
429
|
def update_repo(self, repo_id: str, json: dict[str, Any]) -> dict:
|
|
428
430
|
"""
|
|
429
|
-
Updates given Databricks Repos
|
|
431
|
+
Updates given Databricks Repos.
|
|
430
432
|
|
|
431
433
|
:param repo_id: ID of Databricks Repos
|
|
432
434
|
:param json: payload
|
|
@@ -437,7 +439,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
437
439
|
|
|
438
440
|
def delete_repo(self, repo_id: str):
|
|
439
441
|
"""
|
|
440
|
-
Deletes given Databricks Repos
|
|
442
|
+
Deletes given Databricks Repos.
|
|
441
443
|
|
|
442
444
|
:param repo_id: ID of Databricks Repos
|
|
443
445
|
:return:
|
|
@@ -447,7 +449,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
447
449
|
|
|
448
450
|
def create_repo(self, json: dict[str, Any]) -> dict:
|
|
449
451
|
"""
|
|
450
|
-
Creates a Databricks Repos
|
|
452
|
+
Creates a Databricks Repos.
|
|
451
453
|
|
|
452
454
|
:param json: payload
|
|
453
455
|
:return:
|
|
@@ -472,7 +474,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
472
474
|
return None
|
|
473
475
|
|
|
474
476
|
def test_connection(self) -> tuple[bool, str]:
|
|
475
|
-
"""Test the Databricks connectivity from UI"""
|
|
477
|
+
"""Test the Databricks connectivity from UI."""
|
|
476
478
|
hook = DatabricksHook(databricks_conn_id=self.databricks_conn_id)
|
|
477
479
|
try:
|
|
478
480
|
hook._do_api_call(endpoint_info=SPARK_VERSIONS_ENDPOINT).get("versions")
|
|
@@ -27,6 +27,7 @@ from __future__ import annotations
|
|
|
27
27
|
import copy
|
|
28
28
|
import platform
|
|
29
29
|
import time
|
|
30
|
+
from functools import cached_property
|
|
30
31
|
from typing import Any
|
|
31
32
|
from urllib.parse import urlsplit
|
|
32
33
|
|
|
@@ -45,7 +46,6 @@ from tenacity import (
|
|
|
45
46
|
)
|
|
46
47
|
|
|
47
48
|
from airflow import __version__
|
|
48
|
-
from airflow.compat.functools import cached_property
|
|
49
49
|
from airflow.exceptions import AirflowException
|
|
50
50
|
from airflow.hooks.base import BaseHook
|
|
51
51
|
from airflow.models import Connection
|
|
@@ -197,6 +197,7 @@ class BaseDatabricksHook(BaseHook):
|
|
|
197
197
|
def _get_retry_object(self) -> Retrying:
|
|
198
198
|
"""
|
|
199
199
|
Instantiate a retry object.
|
|
200
|
+
|
|
200
201
|
:return: instance of Retrying class
|
|
201
202
|
"""
|
|
202
203
|
return Retrying(**self.retry_args)
|
|
@@ -204,6 +205,7 @@ class BaseDatabricksHook(BaseHook):
|
|
|
204
205
|
def _a_get_retry_object(self) -> AsyncRetrying:
|
|
205
206
|
"""
|
|
206
207
|
Instantiate an async retry object.
|
|
208
|
+
|
|
207
209
|
:return: instance of AsyncRetrying class
|
|
208
210
|
"""
|
|
209
211
|
return AsyncRetrying(**self.retry_args)
|
|
@@ -278,6 +280,7 @@ class BaseDatabricksHook(BaseHook):
|
|
|
278
280
|
async def _a_get_aad_token(self, resource: str) -> str:
|
|
279
281
|
"""
|
|
280
282
|
Async version of `_get_aad_token()`.
|
|
283
|
+
|
|
281
284
|
:param resource: resource to issue token to
|
|
282
285
|
:return: AAD token, or raise an exception
|
|
283
286
|
"""
|
|
@@ -344,6 +347,7 @@ class BaseDatabricksHook(BaseHook):
|
|
|
344
347
|
def _get_aad_headers(self) -> dict:
|
|
345
348
|
"""
|
|
346
349
|
Fill AAD headers if necessary (SPN is outside of the workspace).
|
|
350
|
+
|
|
347
351
|
:return: dictionary with filled AAD headers
|
|
348
352
|
"""
|
|
349
353
|
headers = {}
|
|
@@ -358,6 +362,7 @@ class BaseDatabricksHook(BaseHook):
|
|
|
358
362
|
async def _a_get_aad_headers(self) -> dict:
|
|
359
363
|
"""
|
|
360
364
|
Async version of `_get_aad_headers()`.
|
|
365
|
+
|
|
361
366
|
:return: dictionary with filled AAD headers
|
|
362
367
|
"""
|
|
363
368
|
headers = {}
|
|
@@ -386,6 +391,7 @@ class BaseDatabricksHook(BaseHook):
|
|
|
386
391
|
def _check_azure_metadata_service() -> None:
|
|
387
392
|
"""
|
|
388
393
|
Check for Azure Metadata Service.
|
|
394
|
+
|
|
389
395
|
https://docs.microsoft.com/en-us/azure/virtual-machines/linux/instance-metadata-service
|
|
390
396
|
"""
|
|
391
397
|
try:
|
|
@@ -31,8 +31,7 @@ LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "api/2.0/sql/endpoints")
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
34
|
-
"""
|
|
35
|
-
Hook to interact with Databricks SQL.
|
|
34
|
+
"""Hook to interact with Databricks SQL.
|
|
36
35
|
|
|
37
36
|
:param databricks_conn_id: Reference to the
|
|
38
37
|
:ref:`Databricks connection <howto/connection:databricks>`.
|
|
@@ -95,7 +94,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
95
94
|
return lst[0]
|
|
96
95
|
|
|
97
96
|
def get_conn(self) -> Connection:
|
|
98
|
-
"""Returns a Databricks SQL connection object"""
|
|
97
|
+
"""Returns a Databricks SQL connection object."""
|
|
99
98
|
if not self._http_path:
|
|
100
99
|
if self._sql_endpoint_name:
|
|
101
100
|
endpoint = self._get_sql_endpoint_by_name(self._sql_endpoint_name)
|
|
@@ -148,10 +147,10 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
148
147
|
split_statements: bool = True,
|
|
149
148
|
return_last: bool = True,
|
|
150
149
|
) -> Any | list[Any] | None:
|
|
151
|
-
"""
|
|
152
|
-
|
|
153
|
-
statements to the
|
|
154
|
-
sequentially.
|
|
150
|
+
"""Runs a command or a list of commands.
|
|
151
|
+
|
|
152
|
+
Pass a list of SQL statements to the SQL parameter to get them to
|
|
153
|
+
execute sequentially.
|
|
155
154
|
|
|
156
155
|
:param sql: the sql statement to be executed (str) or a list of
|
|
157
156
|
sql statements to execute
|
|
@@ -180,10 +179,12 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
180
179
|
else:
|
|
181
180
|
raise ValueError("List of SQL statements is empty")
|
|
182
181
|
|
|
182
|
+
conn = None
|
|
183
183
|
results = []
|
|
184
184
|
for sql_statement in sql_list:
|
|
185
185
|
# when using AAD tokens, it could expire if previous query run longer than token lifetime
|
|
186
|
-
|
|
186
|
+
conn = self.get_conn()
|
|
187
|
+
with closing(conn.cursor()) as cur:
|
|
187
188
|
self.set_autocommit(conn, autocommit)
|
|
188
189
|
|
|
189
190
|
with closing(conn.cursor()) as cur:
|
|
@@ -196,7 +197,8 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
196
197
|
else:
|
|
197
198
|
results.append(result)
|
|
198
199
|
self.descriptions.append(cur.description)
|
|
199
|
-
|
|
200
|
+
if conn:
|
|
201
|
+
conn.close()
|
|
200
202
|
self._sql_conn = None
|
|
201
203
|
|
|
202
204
|
if handler is None:
|
|
@@ -20,10 +20,10 @@ from __future__ import annotations
|
|
|
20
20
|
|
|
21
21
|
import time
|
|
22
22
|
import warnings
|
|
23
|
+
from functools import cached_property
|
|
23
24
|
from logging import Logger
|
|
24
25
|
from typing import TYPE_CHECKING, Any, Sequence
|
|
25
26
|
|
|
26
|
-
from airflow.compat.functools import cached_property
|
|
27
27
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
|
28
28
|
from airflow.models import BaseOperator, BaseOperatorLink, XCom
|
|
29
29
|
from airflow.providers.databricks.hooks.databricks import DatabricksHook, RunState
|
|
@@ -42,7 +42,7 @@ XCOM_RUN_PAGE_URL_KEY = "run_page_url"
|
|
|
42
42
|
|
|
43
43
|
def _handle_databricks_operator_execution(operator, hook, log, context) -> None:
|
|
44
44
|
"""
|
|
45
|
-
Handles the Airflow + Databricks lifecycle logic for a Databricks operator
|
|
45
|
+
Handles the Airflow + Databricks lifecycle logic for a Databricks operator.
|
|
46
46
|
|
|
47
47
|
:param operator: Databricks operator being handled
|
|
48
48
|
:param context: Airflow context
|
|
@@ -100,7 +100,7 @@ def _handle_databricks_operator_execution(operator, hook, log, context) -> None:
|
|
|
100
100
|
|
|
101
101
|
def _handle_deferrable_databricks_operator_execution(operator, hook, log, context) -> None:
|
|
102
102
|
"""
|
|
103
|
-
Handles the Airflow + Databricks lifecycle logic for deferrable Databricks operators
|
|
103
|
+
Handles the Airflow + Databricks lifecycle logic for deferrable Databricks operators.
|
|
104
104
|
|
|
105
105
|
:param operator: Databricks async operator being handled
|
|
106
106
|
:param context: Airflow context
|
|
@@ -303,7 +303,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
|
|
|
303
303
|
dbt_task: dict[str, str | list[str]] | None = None,
|
|
304
304
|
new_cluster: dict[str, object] | None = None,
|
|
305
305
|
existing_cluster_id: str | None = None,
|
|
306
|
-
libraries: list[dict[str,
|
|
306
|
+
libraries: list[dict[str, Any]] | None = None,
|
|
307
307
|
run_name: str | None = None,
|
|
308
308
|
timeout_seconds: int | None = None,
|
|
309
309
|
databricks_conn_id: str = "databricks_default",
|
|
@@ -404,7 +404,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
|
|
|
404
404
|
|
|
405
405
|
|
|
406
406
|
class DatabricksSubmitRunDeferrableOperator(DatabricksSubmitRunOperator):
|
|
407
|
-
"""Deferrable version of ``DatabricksSubmitRunOperator
|
|
407
|
+
"""Deferrable version of ``DatabricksSubmitRunOperator``."""
|
|
408
408
|
|
|
409
409
|
def __init__(self, *args, **kwargs):
|
|
410
410
|
warnings.warn(
|
|
@@ -683,7 +683,7 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
683
683
|
|
|
684
684
|
|
|
685
685
|
class DatabricksRunNowDeferrableOperator(DatabricksRunNowOperator):
|
|
686
|
-
"""Deferrable version of ``DatabricksRunNowOperator
|
|
686
|
+
"""Deferrable version of ``DatabricksRunNowOperator``."""
|
|
687
687
|
|
|
688
688
|
def __init__(self, *args, **kwargs):
|
|
689
689
|
warnings.warn(
|
|
@@ -19,10 +19,10 @@
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
21
|
import re
|
|
22
|
+
from functools import cached_property
|
|
22
23
|
from typing import TYPE_CHECKING, Sequence
|
|
23
24
|
from urllib.parse import urlsplit
|
|
24
25
|
|
|
25
|
-
from airflow.compat.functools import cached_property
|
|
26
26
|
from airflow.exceptions import AirflowException
|
|
27
27
|
from airflow.models import BaseOperator
|
|
28
28
|
from airflow.providers.databricks.hooks.databricks import DatabricksHook
|
|
@@ -129,7 +129,7 @@ class DatabricksReposCreateOperator(BaseOperator):
|
|
|
129
129
|
|
|
130
130
|
def execute(self, context: Context):
|
|
131
131
|
"""
|
|
132
|
-
Creates a Databricks Repo
|
|
132
|
+
Creates a Databricks Repo.
|
|
133
133
|
|
|
134
134
|
:param context: context
|
|
135
135
|
:return: Repo ID
|
|
@@ -22,6 +22,7 @@ import csv
|
|
|
22
22
|
import json
|
|
23
23
|
from typing import TYPE_CHECKING, Any, Sequence
|
|
24
24
|
|
|
25
|
+
from databricks.sql.types import Row
|
|
25
26
|
from databricks.sql.utils import ParamEscaper
|
|
26
27
|
|
|
27
28
|
from airflow.exceptions import AirflowException
|
|
@@ -33,9 +34,13 @@ if TYPE_CHECKING:
|
|
|
33
34
|
from airflow.utils.context import Context
|
|
34
35
|
|
|
35
36
|
|
|
37
|
+
def make_serializable(val: Row):
|
|
38
|
+
return tuple(val)
|
|
39
|
+
|
|
40
|
+
|
|
36
41
|
class DatabricksSqlOperator(SQLExecuteQueryOperator):
|
|
37
42
|
"""
|
|
38
|
-
Executes SQL code in a Databricks SQL endpoint or a Databricks cluster
|
|
43
|
+
Executes SQL code in a Databricks SQL endpoint or a Databricks cluster.
|
|
39
44
|
|
|
40
45
|
.. seealso::
|
|
41
46
|
For more information on how to use this operator, take a look at the guide:
|
|
@@ -125,7 +130,7 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
|
|
|
125
130
|
|
|
126
131
|
def _process_output(self, results: list[Any], descriptions: list[Sequence[Sequence] | None]) -> list[Any]:
|
|
127
132
|
if not self._output_path:
|
|
128
|
-
return list(zip(descriptions, results))
|
|
133
|
+
return list(zip(descriptions, [[make_serializable(row) for row in res] for res in results]))
|
|
129
134
|
if not self._output_format:
|
|
130
135
|
raise AirflowException("Output format should be specified!")
|
|
131
136
|
# Output to a file only the result of last query
|
|
@@ -158,7 +163,7 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
|
|
|
158
163
|
file.write("\n")
|
|
159
164
|
else:
|
|
160
165
|
raise AirflowException(f"Unsupported output format: '{self._output_format}'")
|
|
161
|
-
return list(zip(descriptions, results))
|
|
166
|
+
return list(zip(descriptions, [[make_serializable(row) for row in res] for res in results]))
|
|
162
167
|
|
|
163
168
|
|
|
164
169
|
COPY_INTO_APPROVED_FORMATS = ["CSV", "JSON", "AVRO", "ORC", "PARQUET", "TEXT", "BINARYFILE"]
|
|
@@ -21,11 +21,11 @@
|
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
23
23
|
from datetime import datetime
|
|
24
|
+
from functools import cached_property
|
|
24
25
|
from typing import TYPE_CHECKING, Any, Callable, Sequence
|
|
25
26
|
|
|
26
27
|
from databricks.sql.utils import ParamEscaper
|
|
27
28
|
|
|
28
|
-
from airflow.compat.functools import cached_property
|
|
29
29
|
from airflow.exceptions import AirflowException
|
|
30
30
|
from airflow.providers.common.sql.hooks.sql import fetch_all_handler
|
|
31
31
|
from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
|
|
@@ -20,9 +20,9 @@
|
|
|
20
20
|
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
23
|
+
from functools import cached_property
|
|
23
24
|
from typing import TYPE_CHECKING, Any, Callable, Iterable, Sequence
|
|
24
25
|
|
|
25
|
-
from airflow.compat.functools import cached_property
|
|
26
26
|
from airflow.exceptions import AirflowException
|
|
27
27
|
from airflow.providers.common.sql.hooks.sql import fetch_all_handler
|
|
28
28
|
from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
|
|
@@ -57,7 +57,7 @@ def normalise_json_content(content, json_path: str = "json") -> str | bool | lis
|
|
|
57
57
|
def validate_trigger_event(event: dict):
|
|
58
58
|
"""
|
|
59
59
|
Validates correctness of the event
|
|
60
|
-
received from :class:`~airflow.providers.databricks.triggers.databricks.DatabricksExecutionTrigger
|
|
60
|
+
received from :class:`~airflow.providers.databricks.triggers.databricks.DatabricksExecutionTrigger`.
|
|
61
61
|
"""
|
|
62
62
|
keys_to_check = ["run_id", "run_page_url", "run_state"]
|
|
63
63
|
for key in keys_to_check:
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.3.0rc1
|
|
4
4
|
Summary: Provider for Apache Airflow. Implements apache-airflow-providers-databricks package
|
|
5
5
|
Home-page: https://airflow.apache.org/
|
|
6
6
|
Download-URL: https://archive.apache.org/dist/airflow/providers
|
|
7
7
|
Author: Apache Software Foundation
|
|
8
8
|
Author-email: dev@airflow.apache.org
|
|
9
9
|
License: Apache License 2.0
|
|
10
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
10
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.3.0/
|
|
11
11
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
12
12
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
13
13
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
@@ -21,12 +21,12 @@ Classifier: Intended Audience :: System Administrators
|
|
|
21
21
|
Classifier: Framework :: Apache Airflow
|
|
22
22
|
Classifier: Framework :: Apache Airflow :: Provider
|
|
23
23
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
24
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
25
24
|
Classifier: Programming Language :: Python :: 3.8
|
|
26
25
|
Classifier: Programming Language :: Python :: 3.9
|
|
27
26
|
Classifier: Programming Language :: Python :: 3.10
|
|
27
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
28
28
|
Classifier: Topic :: System :: Monitoring
|
|
29
|
-
Requires-Python: ~=3.
|
|
29
|
+
Requires-Python: ~=3.8
|
|
30
30
|
Description-Content-Type: text/x-rst
|
|
31
31
|
Provides-Extra: common.sql
|
|
32
32
|
License-File: LICENSE
|
|
@@ -53,7 +53,7 @@ License-File: NOTICE
|
|
|
53
53
|
|
|
54
54
|
Package ``apache-airflow-providers-databricks``
|
|
55
55
|
|
|
56
|
-
Release: ``4.
|
|
56
|
+
Release: ``4.3.0rc1``
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
`Databricks <https://databricks.com/>`__
|
|
@@ -66,7 +66,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
66
66
|
are in ``airflow.providers.databricks`` python package.
|
|
67
67
|
|
|
68
68
|
You can find package information and changelog for the provider
|
|
69
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
69
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.3.0/>`_.
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
Installation
|
|
@@ -76,7 +76,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
|
|
|
76
76
|
for the minimum Airflow version supported) via
|
|
77
77
|
``pip install apache-airflow-providers-databricks``
|
|
78
78
|
|
|
79
|
-
The package supports the following python versions: 3.
|
|
79
|
+
The package supports the following python versions: 3.8,3.9,3.10,3.11
|
|
80
80
|
|
|
81
81
|
Requirements
|
|
82
82
|
------------
|
|
@@ -136,6 +136,36 @@ Dependent package
|
|
|
136
136
|
Changelog
|
|
137
137
|
---------
|
|
138
138
|
|
|
139
|
+
4.3.0
|
|
140
|
+
.....
|
|
141
|
+
|
|
142
|
+
.. note::
|
|
143
|
+
This release dropped support for Python 3.7
|
|
144
|
+
|
|
145
|
+
Features
|
|
146
|
+
~~~~~~~~
|
|
147
|
+
|
|
148
|
+
* ``add a return when the event is yielded in a loop to stop the execution (#31985)``
|
|
149
|
+
|
|
150
|
+
Bug Fixes
|
|
151
|
+
~~~~~~~~~
|
|
152
|
+
|
|
153
|
+
* ``Fix type annotation (#31888)``
|
|
154
|
+
* ``Fix Databricks SQL operator serialization (#31780)``
|
|
155
|
+
* ``Making Databricks run related multi-query string in one session again (#31898) (#31899)``
|
|
156
|
+
|
|
157
|
+
Misc
|
|
158
|
+
~~~~
|
|
159
|
+
* ``Remove return statement after yield from triggers class (#31703)``
|
|
160
|
+
* ``Remove Python 3.7 support (#30963)``
|
|
161
|
+
|
|
162
|
+
.. Below changes are excluded from the changelog. Move them to
|
|
163
|
+
appropriate section above if needed. Do not delete the lines(!):
|
|
164
|
+
* ``Improve docstrings in providers (#31681)``
|
|
165
|
+
* ``Add discoverability for triggers in provider.yaml (#31576)``
|
|
166
|
+
* ``Add D400 pydocstyle check - Providers (#31427)``
|
|
167
|
+
* ``Add note about dropping Python 3.7 for providers (#32015)``
|
|
168
|
+
|
|
139
169
|
4.2.0
|
|
140
170
|
.....
|
|
141
171
|
|
|
@@ -26,6 +26,9 @@ target-version = ['py37', 'py38', 'py39', 'py310']
|
|
|
26
26
|
requires = ['setuptools==67.2.0']
|
|
27
27
|
build-backend = "setuptools.build_meta"
|
|
28
28
|
|
|
29
|
+
[project]
|
|
30
|
+
requires-python = ">=3.8"
|
|
31
|
+
|
|
29
32
|
[tool.ruff]
|
|
30
33
|
typing-modules = ["airflow.typing_compat"]
|
|
31
34
|
line-length = 110
|
|
@@ -35,12 +38,10 @@ extend-exclude = [
|
|
|
35
38
|
"airflow/providers/google/ads/_vendor/*",
|
|
36
39
|
# The files generated by stubgen aren't 100% valid syntax it turns out, and we don't ship them, so we can
|
|
37
40
|
# ignore them in ruff
|
|
38
|
-
"airflow/providers/common/sql/*/*.pyi"
|
|
41
|
+
"airflow/providers/common/sql/*/*.pyi",
|
|
42
|
+
"airflow/migrations/versions/*.py"
|
|
39
43
|
]
|
|
40
44
|
|
|
41
|
-
# TODO: Bump to Python 3.8 when support for Python 3.7 is dropped in Airflow.
|
|
42
|
-
target-version = "py37"
|
|
43
|
-
|
|
44
45
|
extend-select = [
|
|
45
46
|
"I", # Missing required import (auto-fixable)
|
|
46
47
|
"UP", # Pyupgrade
|
|
@@ -53,7 +54,7 @@ extend-select = [
|
|
|
53
54
|
"D106",
|
|
54
55
|
"D2",
|
|
55
56
|
"D3",
|
|
56
|
-
|
|
57
|
+
"D400",
|
|
57
58
|
# "D401", # Not enabled by ruff, but we don't want it
|
|
58
59
|
"D402",
|
|
59
60
|
"D403",
|
|
@@ -21,13 +21,13 @@ classifiers =
|
|
|
21
21
|
Framework :: Apache Airflow
|
|
22
22
|
Framework :: Apache Airflow :: Provider
|
|
23
23
|
License :: OSI Approved :: Apache Software License
|
|
24
|
-
Programming Language :: Python :: 3.7
|
|
25
24
|
Programming Language :: Python :: 3.8
|
|
26
25
|
Programming Language :: Python :: 3.9
|
|
27
26
|
Programming Language :: Python :: 3.10
|
|
27
|
+
Programming Language :: Python :: 3.11
|
|
28
28
|
Topic :: System :: Monitoring
|
|
29
29
|
project_urls =
|
|
30
|
-
Documentation=https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
30
|
+
Documentation=https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.3.0/
|
|
31
31
|
Bug Tracker=https://github.com/apache/airflow/issues
|
|
32
32
|
Source Code=https://github.com/apache/airflow
|
|
33
33
|
Slack Chat=https://s.apache.org/airflow-slack
|
|
@@ -40,7 +40,7 @@ python_tag = py3
|
|
|
40
40
|
[options]
|
|
41
41
|
zip_safe = False
|
|
42
42
|
include_package_data = True
|
|
43
|
-
python_requires = ~=3.
|
|
43
|
+
python_requires = ~=3.8
|
|
44
44
|
packages = find:
|
|
45
45
|
setup_requires =
|
|
46
46
|
setuptools
|
|
@@ -60,6 +60,6 @@ apache_airflow_provider =
|
|
|
60
60
|
packages = airflow.providers.databricks
|
|
61
61
|
|
|
62
62
|
[egg_info]
|
|
63
|
-
tag_build =
|
|
63
|
+
tag_build = rc1
|
|
64
64
|
tag_date = 0
|
|
65
65
|
|
|
File without changes
|
|
File without changes
|
{apache-airflow-providers-databricks-4.2.0rc2 → apache-airflow-providers-databricks-4.3.0rc1}/NOTICE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|