apache-airflow-providers-databricks 6.13.0__tar.gz → 7.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-databricks might be problematic. Click here for more details.
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/PKG-INFO +11 -13
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/README.rst +5 -5
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/__init__.py +3 -3
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/get_provider_info.py +4 -3
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/hooks/databricks_sql.py +26 -50
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/operators/databricks.py +12 -47
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/operators/databricks_repos.py +2 -1
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/operators/databricks_sql.py +2 -2
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/plugins/databricks_workflow.py +4 -1
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/sensors/databricks_partition.py +2 -1
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/sensors/databricks_sql.py +2 -1
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/pyproject.toml +6 -9
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/LICENSE +0 -0
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/exceptions.py +0 -0
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/hooks/__init__.py +0 -0
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/hooks/databricks.py +0 -0
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/hooks/databricks_base.py +0 -0
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/operators/__init__.py +0 -0
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/operators/databricks_workflow.py +0 -0
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/plugins/__init__.py +0 -0
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/sensors/__init__.py +0 -0
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/triggers/__init__.py +0 -0
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/triggers/databricks.py +0 -0
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/utils/__init__.py +0 -0
- {apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/airflow/providers/databricks/utils/databricks.py +0 -0
{apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version:
|
|
3
|
+
Version: 7.0.0
|
|
4
4
|
Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,databricks,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -22,25 +22,23 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
22
22
|
Classifier: Topic :: System :: Monitoring
|
|
23
23
|
Requires-Dist: aiohttp>=3.9.2, <4
|
|
24
24
|
Requires-Dist: apache-airflow-providers-common-sql>=1.20.0
|
|
25
|
-
Requires-Dist: apache-airflow>=2.
|
|
26
|
-
Requires-Dist: databricks-sql-connector>=
|
|
25
|
+
Requires-Dist: apache-airflow>=2.9.0
|
|
26
|
+
Requires-Dist: databricks-sql-connector>=3.0.0
|
|
27
27
|
Requires-Dist: mergedeep>=1.3.4
|
|
28
28
|
Requires-Dist: pandas>=1.5.3,<2.2;python_version<"3.9"
|
|
29
29
|
Requires-Dist: pandas>=2.1.2,<2.2;python_version>="3.9"
|
|
30
30
|
Requires-Dist: pyarrow>=14.0.1
|
|
31
31
|
Requires-Dist: requests>=2.27.0,<3
|
|
32
32
|
Requires-Dist: azure-identity>=1.3.1 ; extra == "azure-identity"
|
|
33
|
-
Requires-Dist: apache-airflow-providers-common-sql ; extra == "common-sql"
|
|
34
33
|
Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
|
|
35
34
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
36
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/
|
|
37
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/
|
|
35
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.0.0/changelog.html
|
|
36
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.0.0
|
|
38
37
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
39
38
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
40
|
-
Project-URL: Twitter, https://
|
|
39
|
+
Project-URL: Twitter, https://x.com/ApacheAirflow
|
|
41
40
|
Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
42
41
|
Provides-Extra: azure-identity
|
|
43
|
-
Provides-Extra: common-sql
|
|
44
42
|
Provides-Extra: sdk
|
|
45
43
|
|
|
46
44
|
|
|
@@ -87,7 +85,7 @@ Provides-Extra: sdk
|
|
|
87
85
|
|
|
88
86
|
Package ``apache-airflow-providers-databricks``
|
|
89
87
|
|
|
90
|
-
Release: ``
|
|
88
|
+
Release: ``7.0.0``
|
|
91
89
|
|
|
92
90
|
|
|
93
91
|
`Databricks <https://databricks.com/>`__
|
|
@@ -100,7 +98,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
100
98
|
are in ``airflow.providers.databricks`` python package.
|
|
101
99
|
|
|
102
100
|
You can find package information and changelog for the provider
|
|
103
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/
|
|
101
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.0.0/>`_.
|
|
104
102
|
|
|
105
103
|
Installation
|
|
106
104
|
------------
|
|
@@ -117,10 +115,10 @@ Requirements
|
|
|
117
115
|
======================================= =========================================
|
|
118
116
|
PIP package Version required
|
|
119
117
|
======================================= =========================================
|
|
120
|
-
``apache-airflow`` ``>=2.
|
|
118
|
+
``apache-airflow`` ``>=2.9.0``
|
|
121
119
|
``apache-airflow-providers-common-sql`` ``>=1.20.0``
|
|
122
120
|
``requests`` ``>=2.27.0,<3``
|
|
123
|
-
``databricks-sql-connector`` ``>=
|
|
121
|
+
``databricks-sql-connector`` ``>=3.0.0``
|
|
124
122
|
``aiohttp`` ``>=3.9.2,<4``
|
|
125
123
|
``mergedeep`` ``>=1.3.4``
|
|
126
124
|
``pandas`` ``>=2.1.2,<2.2; python_version >= "3.9"``
|
|
@@ -148,4 +146,4 @@ Dependent package
|
|
|
148
146
|
============================================================================================================ ==============
|
|
149
147
|
|
|
150
148
|
The changelog for the provider package can be found in the
|
|
151
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/
|
|
149
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.0.0/changelog.html>`_.
|
{apache_airflow_providers_databricks-6.13.0 → apache_airflow_providers_databricks-7.0.0}/README.rst
RENAMED
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
|
|
43
43
|
Package ``apache-airflow-providers-databricks``
|
|
44
44
|
|
|
45
|
-
Release: ``
|
|
45
|
+
Release: ``7.0.0``
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
`Databricks <https://databricks.com/>`__
|
|
@@ -55,7 +55,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
55
55
|
are in ``airflow.providers.databricks`` python package.
|
|
56
56
|
|
|
57
57
|
You can find package information and changelog for the provider
|
|
58
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/
|
|
58
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.0.0/>`_.
|
|
59
59
|
|
|
60
60
|
Installation
|
|
61
61
|
------------
|
|
@@ -72,10 +72,10 @@ Requirements
|
|
|
72
72
|
======================================= =========================================
|
|
73
73
|
PIP package Version required
|
|
74
74
|
======================================= =========================================
|
|
75
|
-
``apache-airflow`` ``>=2.
|
|
75
|
+
``apache-airflow`` ``>=2.9.0``
|
|
76
76
|
``apache-airflow-providers-common-sql`` ``>=1.20.0``
|
|
77
77
|
``requests`` ``>=2.27.0,<3``
|
|
78
|
-
``databricks-sql-connector`` ``>=
|
|
78
|
+
``databricks-sql-connector`` ``>=3.0.0``
|
|
79
79
|
``aiohttp`` ``>=3.9.2,<4``
|
|
80
80
|
``mergedeep`` ``>=1.3.4``
|
|
81
81
|
``pandas`` ``>=2.1.2,<2.2; python_version >= "3.9"``
|
|
@@ -103,4 +103,4 @@ Dependent package
|
|
|
103
103
|
============================================================================================================ ==============
|
|
104
104
|
|
|
105
105
|
The changelog for the provider package can be found in the
|
|
106
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/
|
|
106
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.0.0/changelog.html>`_.
|
|
@@ -29,11 +29,11 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "
|
|
32
|
+
__version__ = "7.0.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
|
-
"2.
|
|
35
|
+
"2.9.0"
|
|
36
36
|
):
|
|
37
37
|
raise RuntimeError(
|
|
38
|
-
f"The package `apache-airflow-providers-databricks:{__version__}` needs Apache Airflow 2.
|
|
38
|
+
f"The package `apache-airflow-providers-databricks:{__version__}` needs Apache Airflow 2.9.0+"
|
|
39
39
|
)
|
|
@@ -28,8 +28,9 @@ def get_provider_info():
|
|
|
28
28
|
"name": "Databricks",
|
|
29
29
|
"description": "`Databricks <https://databricks.com/>`__\n",
|
|
30
30
|
"state": "ready",
|
|
31
|
-
"source-date-epoch":
|
|
31
|
+
"source-date-epoch": 1734533222,
|
|
32
32
|
"versions": [
|
|
33
|
+
"7.0.0",
|
|
33
34
|
"6.13.0",
|
|
34
35
|
"6.12.0",
|
|
35
36
|
"6.11.0",
|
|
@@ -77,10 +78,10 @@ def get_provider_info():
|
|
|
77
78
|
"1.0.0",
|
|
78
79
|
],
|
|
79
80
|
"dependencies": [
|
|
80
|
-
"apache-airflow>=2.
|
|
81
|
+
"apache-airflow>=2.9.0",
|
|
81
82
|
"apache-airflow-providers-common-sql>=1.20.0",
|
|
82
83
|
"requests>=2.27.0,<3",
|
|
83
|
-
"databricks-sql-connector>=
|
|
84
|
+
"databricks-sql-connector>=3.0.0",
|
|
84
85
|
"aiohttp>=3.9.2, <4",
|
|
85
86
|
"mergedeep>=1.3.4",
|
|
86
87
|
'pandas>=2.1.2,<2.2;python_version>="3.9"',
|
|
@@ -17,8 +17,8 @@
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
19
|
import threading
|
|
20
|
-
import warnings
|
|
21
20
|
from collections import namedtuple
|
|
21
|
+
from collections.abc import Iterable, Mapping, Sequence
|
|
22
22
|
from contextlib import closing
|
|
23
23
|
from copy import copy
|
|
24
24
|
from datetime import timedelta
|
|
@@ -26,28 +26,23 @@ from typing import (
|
|
|
26
26
|
TYPE_CHECKING,
|
|
27
27
|
Any,
|
|
28
28
|
Callable,
|
|
29
|
-
Iterable,
|
|
30
|
-
List,
|
|
31
|
-
Mapping,
|
|
32
|
-
Sequence,
|
|
33
29
|
TypeVar,
|
|
34
30
|
cast,
|
|
35
31
|
overload,
|
|
36
32
|
)
|
|
37
33
|
|
|
38
34
|
from databricks import sql # type: ignore[attr-defined]
|
|
35
|
+
from databricks.sql.types import Row
|
|
39
36
|
|
|
40
|
-
from airflow.exceptions import
|
|
41
|
-
|
|
42
|
-
AirflowProviderDeprecationWarning,
|
|
43
|
-
)
|
|
37
|
+
from airflow.exceptions import AirflowException
|
|
38
|
+
from airflow.models.connection import Connection as AirflowConnection
|
|
44
39
|
from airflow.providers.common.sql.hooks.sql import DbApiHook, return_single_query_results
|
|
45
40
|
from airflow.providers.databricks.exceptions import DatabricksSqlExecutionError, DatabricksSqlExecutionTimeout
|
|
46
41
|
from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHook
|
|
47
42
|
|
|
48
43
|
if TYPE_CHECKING:
|
|
49
44
|
from databricks.sql.client import Connection
|
|
50
|
-
|
|
45
|
+
|
|
51
46
|
|
|
52
47
|
LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "api/2.0/sql/endpoints")
|
|
53
48
|
|
|
@@ -82,10 +77,6 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
82
77
|
on every request
|
|
83
78
|
:param catalog: An optional initial catalog to use. Requires DBR version 9.0+
|
|
84
79
|
:param schema: An optional initial schema to use. Requires DBR version 9.0+
|
|
85
|
-
:param return_tuple: Return a ``namedtuple`` object instead of a ``databricks.sql.Row`` object. Default
|
|
86
|
-
to False. In a future release of the provider, this will become True by default. This parameter
|
|
87
|
-
ensures backward-compatibility during the transition phase to common tuple objects for all hooks based
|
|
88
|
-
on DbApiHook. This flag will also be removed in a future release.
|
|
89
80
|
:param kwargs: Additional parameters internal to Databricks SQL Connector parameters
|
|
90
81
|
"""
|
|
91
82
|
|
|
@@ -102,11 +93,10 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
102
93
|
catalog: str | None = None,
|
|
103
94
|
schema: str | None = None,
|
|
104
95
|
caller: str = "DatabricksSqlHook",
|
|
105
|
-
return_tuple: bool = False,
|
|
106
96
|
**kwargs,
|
|
107
97
|
) -> None:
|
|
108
98
|
super().__init__(databricks_conn_id, caller=caller)
|
|
109
|
-
self._sql_conn = None
|
|
99
|
+
self._sql_conn: Connection | None = None
|
|
110
100
|
self._token: str | None = None
|
|
111
101
|
self._http_path = http_path
|
|
112
102
|
self._sql_endpoint_name = sql_endpoint_name
|
|
@@ -115,18 +105,8 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
115
105
|
self.http_headers = http_headers
|
|
116
106
|
self.catalog = catalog
|
|
117
107
|
self.schema = schema
|
|
118
|
-
self.return_tuple = return_tuple
|
|
119
108
|
self.additional_params = kwargs
|
|
120
109
|
|
|
121
|
-
if not self.return_tuple:
|
|
122
|
-
warnings.warn(
|
|
123
|
-
"""Returning a raw `databricks.sql.Row` object is deprecated. A namedtuple will be
|
|
124
|
-
returned instead in a future release of the databricks provider. Set `return_tuple=True` to
|
|
125
|
-
enable this behavior.""",
|
|
126
|
-
AirflowProviderDeprecationWarning,
|
|
127
|
-
stacklevel=2,
|
|
128
|
-
)
|
|
129
|
-
|
|
130
110
|
def _get_extra_config(self) -> dict[str, Any | None]:
|
|
131
111
|
extra_params = copy(self.databricks_conn.extra_dejson)
|
|
132
112
|
for arg in ["http_path", "session_configuration", *self.extra_parameters]:
|
|
@@ -146,7 +126,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
146
126
|
else:
|
|
147
127
|
return endpoint
|
|
148
128
|
|
|
149
|
-
def get_conn(self) ->
|
|
129
|
+
def get_conn(self) -> AirflowConnection:
|
|
150
130
|
"""Return a Databricks SQL connection object."""
|
|
151
131
|
if not self._http_path:
|
|
152
132
|
if self._sql_endpoint_name:
|
|
@@ -161,20 +141,15 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
161
141
|
"or sql_endpoint_name should be specified"
|
|
162
142
|
)
|
|
163
143
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
new_token = self._get_token(raise_error=True)
|
|
169
|
-
if new_token != self._token:
|
|
170
|
-
self._token = new_token
|
|
171
|
-
else:
|
|
172
|
-
requires_init = False
|
|
144
|
+
prev_token = self._token
|
|
145
|
+
new_token = self._get_token(raise_error=True)
|
|
146
|
+
if not self._token or new_token != self._token:
|
|
147
|
+
self._token = new_token
|
|
173
148
|
|
|
174
149
|
if not self.session_config:
|
|
175
150
|
self.session_config = self.databricks_conn.extra_dejson.get("session_configuration")
|
|
176
151
|
|
|
177
|
-
if not self._sql_conn or
|
|
152
|
+
if not self._sql_conn or prev_token != new_token:
|
|
178
153
|
if self._sql_conn: # close already existing connection
|
|
179
154
|
self._sql_conn.close()
|
|
180
155
|
self._sql_conn = sql.connect(
|
|
@@ -189,7 +164,10 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
189
164
|
**self._get_extra_config(),
|
|
190
165
|
**self.additional_params,
|
|
191
166
|
)
|
|
192
|
-
|
|
167
|
+
|
|
168
|
+
if self._sql_conn is None:
|
|
169
|
+
raise AirflowException("SQL connection is not initialized")
|
|
170
|
+
return cast(AirflowConnection, self._sql_conn)
|
|
193
171
|
|
|
194
172
|
@overload # type: ignore[override]
|
|
195
173
|
def run(
|
|
@@ -288,11 +266,8 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
288
266
|
|
|
289
267
|
if handler is not None:
|
|
290
268
|
raw_result = handler(cur)
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
else:
|
|
294
|
-
# Returning raw result is deprecated, and do not comply with current common.sql interface
|
|
295
|
-
result = raw_result # type: ignore[assignment]
|
|
269
|
+
result = self._make_common_data_structure(raw_result)
|
|
270
|
+
|
|
296
271
|
if return_single_query_results(sql, return_last, split_statements):
|
|
297
272
|
results = [result]
|
|
298
273
|
self.descriptions = [cur.description]
|
|
@@ -310,22 +285,23 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
310
285
|
else:
|
|
311
286
|
return results
|
|
312
287
|
|
|
313
|
-
def _make_common_data_structure(self, result: Sequence[
|
|
288
|
+
def _make_common_data_structure(self, result: T | Sequence[T]) -> tuple[Any, ...] | list[tuple[Any, ...]]:
|
|
314
289
|
"""Transform the databricks Row objects into namedtuple."""
|
|
315
290
|
# Below ignored lines respect namedtuple docstring, but mypy do not support dynamically
|
|
316
291
|
# instantiated namedtuple, and will never do: https://github.com/python/mypy/issues/848
|
|
317
292
|
if isinstance(result, list):
|
|
318
|
-
rows:
|
|
293
|
+
rows: Sequence[Row] = result
|
|
319
294
|
if not rows:
|
|
320
295
|
return []
|
|
321
296
|
rows_fields = tuple(rows[0].__fields__)
|
|
322
297
|
rows_object = namedtuple("Row", rows_fields, rename=True) # type: ignore
|
|
323
|
-
return cast(
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
row_fields = tuple(row.__fields__)
|
|
298
|
+
return cast(list[tuple[Any, ...]], [rows_object(*row) for row in rows])
|
|
299
|
+
elif isinstance(result, Row):
|
|
300
|
+
row_fields = tuple(result.__fields__)
|
|
327
301
|
row_object = namedtuple("Row", row_fields, rename=True) # type: ignore
|
|
328
|
-
return cast(tuple, row_object(*
|
|
302
|
+
return cast(tuple[Any, ...], row_object(*result))
|
|
303
|
+
else:
|
|
304
|
+
raise TypeError(f"Expected Sequence[Row] or Row, but got {type(result)}")
|
|
329
305
|
|
|
330
306
|
def bulk_dump(self, table, tmp_file):
|
|
331
307
|
raise NotImplementedError()
|
|
@@ -21,14 +21,13 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
import time
|
|
23
23
|
from abc import ABC, abstractmethod
|
|
24
|
+
from collections.abc import Sequence
|
|
24
25
|
from functools import cached_property
|
|
25
26
|
from logging import Logger
|
|
26
|
-
from typing import TYPE_CHECKING, Any
|
|
27
|
-
|
|
28
|
-
from deprecated import deprecated
|
|
27
|
+
from typing import TYPE_CHECKING, Any
|
|
29
28
|
|
|
30
29
|
from airflow.configuration import conf
|
|
31
|
-
from airflow.exceptions import AirflowException
|
|
30
|
+
from airflow.exceptions import AirflowException
|
|
32
31
|
from airflow.models import BaseOperator, BaseOperatorLink, XCom
|
|
33
32
|
from airflow.providers.databricks.hooks.databricks import DatabricksHook, RunLifeCycleState, RunState
|
|
34
33
|
from airflow.providers.databricks.operators.databricks_workflow import (
|
|
@@ -121,7 +120,7 @@ def _handle_databricks_operator_execution(operator, hook, log, context) -> None:
|
|
|
121
120
|
repair_json = {"run_id": operator.run_id, "rerun_all_failed_tasks": True}
|
|
122
121
|
if latest_repair_id is not None:
|
|
123
122
|
repair_json["latest_repair_id"] = latest_repair_id
|
|
124
|
-
operator.json["latest_repair_id"] = hook.repair_run(
|
|
123
|
+
operator.json["latest_repair_id"] = hook.repair_run(repair_json)
|
|
125
124
|
_handle_databricks_operator_execution(operator, hook, log, context)
|
|
126
125
|
raise AirflowException(error_message)
|
|
127
126
|
|
|
@@ -640,27 +639,6 @@ class DatabricksSubmitRunOperator(BaseOperator):
|
|
|
640
639
|
_handle_deferrable_databricks_operator_completion(event, self.log)
|
|
641
640
|
|
|
642
641
|
|
|
643
|
-
@deprecated(
|
|
644
|
-
reason=(
|
|
645
|
-
"`DatabricksSubmitRunDeferrableOperator` has been deprecated. "
|
|
646
|
-
"Please use `airflow.providers.databricks.operators.DatabricksSubmitRunOperator` "
|
|
647
|
-
"with `deferrable=True` instead."
|
|
648
|
-
),
|
|
649
|
-
category=AirflowProviderDeprecationWarning,
|
|
650
|
-
)
|
|
651
|
-
class DatabricksSubmitRunDeferrableOperator(DatabricksSubmitRunOperator):
|
|
652
|
-
"""Deferrable version of ``DatabricksSubmitRunOperator``."""
|
|
653
|
-
|
|
654
|
-
def __init__(self, *args, **kwargs):
|
|
655
|
-
super().__init__(deferrable=True, *args, **kwargs)
|
|
656
|
-
|
|
657
|
-
def execute(self, context):
|
|
658
|
-
hook = self._get_hook(caller="DatabricksSubmitRunDeferrableOperator")
|
|
659
|
-
json_normalised = normalise_json_content(self.json)
|
|
660
|
-
self.run_id = hook.submit_run(json_normalised)
|
|
661
|
-
_handle_deferrable_databricks_operator_execution(self, hook, self.log, context)
|
|
662
|
-
|
|
663
|
-
|
|
664
642
|
class DatabricksRunNowOperator(BaseOperator):
|
|
665
643
|
"""
|
|
666
644
|
Runs an existing Spark job run to Databricks using the api/2.1/jobs/run-now API endpoint.
|
|
@@ -952,13 +930,15 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
952
930
|
|
|
953
931
|
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
|
954
932
|
if event:
|
|
933
|
+
if event.get("run_state"):
|
|
934
|
+
run_state = RunState.from_json(event["run_state"])
|
|
935
|
+
if event.get("repair_run"):
|
|
936
|
+
event["repair_run"] = event["repair_run"] and (
|
|
937
|
+
not self.databricks_repair_reason_new_settings
|
|
938
|
+
or is_repair_reason_match_exist(self, run_state)
|
|
939
|
+
)
|
|
955
940
|
_handle_deferrable_databricks_operator_completion(event, self.log)
|
|
956
|
-
|
|
957
|
-
should_repair = event["repair_run"] and (
|
|
958
|
-
not self.databricks_repair_reason_new_settings
|
|
959
|
-
or is_repair_reason_match_exist(self, run_state)
|
|
960
|
-
)
|
|
961
|
-
if should_repair:
|
|
941
|
+
if event.get("repair_run"):
|
|
962
942
|
self.repair_run = False
|
|
963
943
|
self.run_id = event["run_id"]
|
|
964
944
|
job_id = self._hook.get_job_id(self.run_id)
|
|
@@ -980,21 +960,6 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
980
960
|
self.log.error("Error: Task: %s with invalid run_id was requested to be cancelled.", self.task_id)
|
|
981
961
|
|
|
982
962
|
|
|
983
|
-
@deprecated(
|
|
984
|
-
reason=(
|
|
985
|
-
"`DatabricksRunNowDeferrableOperator` has been deprecated. "
|
|
986
|
-
"Please use `airflow.providers.databricks.operators.DatabricksRunNowOperator` "
|
|
987
|
-
"with `deferrable=True` instead."
|
|
988
|
-
),
|
|
989
|
-
category=AirflowProviderDeprecationWarning,
|
|
990
|
-
)
|
|
991
|
-
class DatabricksRunNowDeferrableOperator(DatabricksRunNowOperator):
|
|
992
|
-
"""Deferrable version of ``DatabricksRunNowOperator``."""
|
|
993
|
-
|
|
994
|
-
def __init__(self, *args, **kwargs):
|
|
995
|
-
super().__init__(deferrable=True, *args, **kwargs)
|
|
996
|
-
|
|
997
|
-
|
|
998
963
|
class DatabricksTaskBaseOperator(BaseOperator, ABC):
|
|
999
964
|
"""
|
|
1000
965
|
Base class for operators that are run as Databricks job tasks or tasks within a Databricks workflow.
|
|
@@ -20,8 +20,9 @@
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
22
|
import re
|
|
23
|
+
from collections.abc import Sequence
|
|
23
24
|
from functools import cached_property
|
|
24
|
-
from typing import TYPE_CHECKING
|
|
25
|
+
from typing import TYPE_CHECKING
|
|
25
26
|
from urllib.parse import urlsplit
|
|
26
27
|
|
|
27
28
|
from airflow.exceptions import AirflowException
|
|
@@ -21,7 +21,8 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
import csv
|
|
23
23
|
import json
|
|
24
|
-
from
|
|
24
|
+
from collections.abc import Sequence
|
|
25
|
+
from typing import TYPE_CHECKING, Any, ClassVar
|
|
25
26
|
|
|
26
27
|
from databricks.sql.utils import ParamEscaper
|
|
27
28
|
|
|
@@ -114,7 +115,6 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
|
|
|
114
115
|
"catalog": self.catalog,
|
|
115
116
|
"schema": self.schema,
|
|
116
117
|
"caller": "DatabricksSqlOperator",
|
|
117
|
-
"return_tuple": True,
|
|
118
118
|
**self.client_parameters,
|
|
119
119
|
**self.hook_params,
|
|
120
120
|
}
|
|
@@ -195,7 +195,10 @@ def _get_launch_task_key(current_task_key: TaskInstanceKey, task_id: str) -> Tas
|
|
|
195
195
|
@provide_session
|
|
196
196
|
def get_task_instance(operator: BaseOperator, dttm, session: Session = NEW_SESSION) -> TaskInstance:
|
|
197
197
|
dag_id = operator.dag.dag_id
|
|
198
|
-
|
|
198
|
+
if hasattr(DagRun, "execution_date"): # Airflow 2.x.
|
|
199
|
+
dag_run = DagRun.find(dag_id, execution_date=dttm)[0] # type: ignore[call-arg]
|
|
200
|
+
else:
|
|
201
|
+
dag_run = DagRun.find(dag_id, logical_date=dttm)[0]
|
|
199
202
|
ti = (
|
|
200
203
|
session.query(TaskInstance)
|
|
201
204
|
.filter(
|
|
@@ -20,9 +20,10 @@
|
|
|
20
20
|
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
23
|
+
from collections.abc import Sequence
|
|
23
24
|
from datetime import datetime
|
|
24
25
|
from functools import cached_property
|
|
25
|
-
from typing import TYPE_CHECKING, Any, Callable
|
|
26
|
+
from typing import TYPE_CHECKING, Any, Callable
|
|
26
27
|
|
|
27
28
|
from databricks.sql.utils import ParamEscaper
|
|
28
29
|
|
|
@@ -20,8 +20,9 @@
|
|
|
20
20
|
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
23
|
+
from collections.abc import Iterable, Sequence
|
|
23
24
|
from functools import cached_property
|
|
24
|
-
from typing import TYPE_CHECKING, Any, Callable
|
|
25
|
+
from typing import TYPE_CHECKING, Any, Callable
|
|
25
26
|
|
|
26
27
|
from airflow.exceptions import AirflowException
|
|
27
28
|
from airflow.providers.common.sql.hooks.sql import fetch_all_handler
|
|
@@ -27,7 +27,7 @@ build-backend = "flit_core.buildapi"
|
|
|
27
27
|
|
|
28
28
|
[project]
|
|
29
29
|
name = "apache-airflow-providers-databricks"
|
|
30
|
-
version = "
|
|
30
|
+
version = "7.0.0"
|
|
31
31
|
description = "Provider package apache-airflow-providers-databricks for Apache Airflow"
|
|
32
32
|
readme = "README.rst"
|
|
33
33
|
authors = [
|
|
@@ -56,8 +56,8 @@ requires-python = "~=3.9"
|
|
|
56
56
|
dependencies = [
|
|
57
57
|
"aiohttp>=3.9.2, <4",
|
|
58
58
|
"apache-airflow-providers-common-sql>=1.20.0",
|
|
59
|
-
"apache-airflow>=2.
|
|
60
|
-
"databricks-sql-connector>=
|
|
59
|
+
"apache-airflow>=2.9.0",
|
|
60
|
+
"databricks-sql-connector>=3.0.0",
|
|
61
61
|
"mergedeep>=1.3.4",
|
|
62
62
|
"pandas>=1.5.3,<2.2;python_version<\"3.9\"",
|
|
63
63
|
"pandas>=2.1.2,<2.2;python_version>=\"3.9\"",
|
|
@@ -66,12 +66,12 @@ dependencies = [
|
|
|
66
66
|
]
|
|
67
67
|
|
|
68
68
|
[project.urls]
|
|
69
|
-
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/
|
|
70
|
-
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/
|
|
69
|
+
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.0.0"
|
|
70
|
+
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.0.0/changelog.html"
|
|
71
71
|
"Bug Tracker" = "https://github.com/apache/airflow/issues"
|
|
72
72
|
"Source Code" = "https://github.com/apache/airflow"
|
|
73
73
|
"Slack Chat" = "https://s.apache.org/airflow-slack"
|
|
74
|
-
"Twitter" = "https://
|
|
74
|
+
"Twitter" = "https://x.com/ApacheAirflow"
|
|
75
75
|
"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/"
|
|
76
76
|
|
|
77
77
|
[project.entry-points."apache_airflow_provider"]
|
|
@@ -79,9 +79,6 @@ provider_info = "airflow.providers.databricks.get_provider_info:get_provider_inf
|
|
|
79
79
|
[project.entry-points."airflow.plugins"]
|
|
80
80
|
databricks_workflow = "airflow.providers.databricks.plugins.databricks_workflow:DatabricksWorkflowPlugin"
|
|
81
81
|
[project.optional-dependencies]
|
|
82
|
-
"common.sql" = [
|
|
83
|
-
"apache-airflow-providers-common-sql",
|
|
84
|
-
]
|
|
85
82
|
"sdk" = [
|
|
86
83
|
"databricks-sdk==0.10.0",
|
|
87
84
|
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|