apache-airflow-providers-databricks 6.12.0rc1__tar.gz → 6.13.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-databricks might be problematic. Click here for more details.

Files changed (25) hide show
  1. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/PKG-INFO +12 -12
  2. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/README.rst +4 -4
  3. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/__init__.py +1 -1
  4. apache_airflow_providers_databricks-6.13.0/airflow/providers/databricks/exceptions.py +32 -0
  5. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/get_provider_info.py +3 -2
  6. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/hooks/databricks_base.py +98 -0
  7. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/hooks/databricks_sql.py +39 -2
  8. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/operators/databricks.py +30 -1
  9. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/operators/databricks_sql.py +7 -2
  10. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/pyproject.toml +6 -7
  11. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/LICENSE +0 -0
  12. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/hooks/__init__.py +0 -0
  13. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/hooks/databricks.py +0 -0
  14. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/operators/__init__.py +0 -0
  15. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/operators/databricks_repos.py +0 -0
  16. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/operators/databricks_workflow.py +0 -0
  17. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/plugins/__init__.py +0 -0
  18. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/plugins/databricks_workflow.py +0 -0
  19. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/sensors/__init__.py +0 -0
  20. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/sensors/databricks_partition.py +0 -0
  21. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/sensors/databricks_sql.py +0 -0
  22. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/triggers/__init__.py +0 -0
  23. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/triggers/databricks.py +0 -0
  24. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/utils/__init__.py +0 -0
  25. {apache_airflow_providers_databricks-6.12.0rc1 → apache_airflow_providers_databricks-6.13.0}/airflow/providers/databricks/utils/databricks.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: apache-airflow-providers-databricks
3
- Version: 6.12.0rc1
3
+ Version: 6.13.0
4
4
  Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
5
5
  Keywords: airflow-provider,databricks,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -21,8 +21,8 @@ Classifier: Programming Language :: Python :: 3.11
21
21
  Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Topic :: System :: Monitoring
23
23
  Requires-Dist: aiohttp>=3.9.2, <4
24
- Requires-Dist: apache-airflow-providers-common-sql>=1.10.0rc0
25
- Requires-Dist: apache-airflow>=2.8.0rc0
24
+ Requires-Dist: apache-airflow-providers-common-sql>=1.20.0
25
+ Requires-Dist: apache-airflow>=2.8.0
26
26
  Requires-Dist: databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0
27
27
  Requires-Dist: mergedeep>=1.3.4
28
28
  Requires-Dist: pandas>=1.5.3,<2.2;python_version<"3.9"
@@ -30,17 +30,17 @@ Requires-Dist: pandas>=2.1.2,<2.2;python_version>="3.9"
30
30
  Requires-Dist: pyarrow>=14.0.1
31
31
  Requires-Dist: requests>=2.27.0,<3
32
32
  Requires-Dist: azure-identity>=1.3.1 ; extra == "azure-identity"
33
- Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
33
+ Requires-Dist: apache-airflow-providers-common-sql ; extra == "common-sql"
34
34
  Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
35
35
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
36
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.12.0/changelog.html
37
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.12.0
36
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.13.0/changelog.html
37
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.13.0
38
38
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
39
39
  Project-URL: Source Code, https://github.com/apache/airflow
40
40
  Project-URL: Twitter, https://twitter.com/ApacheAirflow
41
41
  Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
42
42
  Provides-Extra: azure-identity
43
- Provides-Extra: common.sql
43
+ Provides-Extra: common-sql
44
44
  Provides-Extra: sdk
45
45
 
46
46
 
@@ -87,7 +87,7 @@ Provides-Extra: sdk
87
87
 
88
88
  Package ``apache-airflow-providers-databricks``
89
89
 
90
- Release: ``6.12.0.rc1``
90
+ Release: ``6.13.0``
91
91
 
92
92
 
93
93
  `Databricks <https://databricks.com/>`__
@@ -100,7 +100,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
100
100
  are in ``airflow.providers.databricks`` python package.
101
101
 
102
102
  You can find package information and changelog for the provider
103
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.12.0/>`_.
103
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.13.0/>`_.
104
104
 
105
105
  Installation
106
106
  ------------
@@ -118,7 +118,7 @@ Requirements
118
118
  PIP package Version required
119
119
  ======================================= =========================================
120
120
  ``apache-airflow`` ``>=2.8.0``
121
- ``apache-airflow-providers-common-sql`` ``>=1.10.0``
121
+ ``apache-airflow-providers-common-sql`` ``>=1.20.0``
122
122
  ``requests`` ``>=2.27.0,<3``
123
123
  ``databricks-sql-connector`` ``>=2.0.0,!=2.9.0,<3.0.0``
124
124
  ``aiohttp`` ``>=3.9.2,<4``
@@ -148,4 +148,4 @@ Dependent package
148
148
  ============================================================================================================ ==============
149
149
 
150
150
  The changelog for the provider package can be found in the
151
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.12.0/changelog.html>`_.
151
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.13.0/changelog.html>`_.
@@ -42,7 +42,7 @@
42
42
 
43
43
  Package ``apache-airflow-providers-databricks``
44
44
 
45
- Release: ``6.12.0.rc1``
45
+ Release: ``6.13.0``
46
46
 
47
47
 
48
48
  `Databricks <https://databricks.com/>`__
@@ -55,7 +55,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
55
55
  are in ``airflow.providers.databricks`` python package.
56
56
 
57
57
  You can find package information and changelog for the provider
58
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.12.0/>`_.
58
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.13.0/>`_.
59
59
 
60
60
  Installation
61
61
  ------------
@@ -73,7 +73,7 @@ Requirements
73
73
  PIP package Version required
74
74
  ======================================= =========================================
75
75
  ``apache-airflow`` ``>=2.8.0``
76
- ``apache-airflow-providers-common-sql`` ``>=1.10.0``
76
+ ``apache-airflow-providers-common-sql`` ``>=1.20.0``
77
77
  ``requests`` ``>=2.27.0,<3``
78
78
  ``databricks-sql-connector`` ``>=2.0.0,!=2.9.0,<3.0.0``
79
79
  ``aiohttp`` ``>=3.9.2,<4``
@@ -103,4 +103,4 @@ Dependent package
103
103
  ============================================================================================================ ==============
104
104
 
105
105
  The changelog for the provider package can be found in the
106
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.12.0/changelog.html>`_.
106
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.13.0/changelog.html>`_.
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "6.12.0"
32
+ __version__ = "6.13.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
35
  "2.8.0"
@@ -0,0 +1,32 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing,
13
+ # software distributed under the License is distributed on an
14
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ # KIND, either express or implied. See the License for the
16
+ # specific language governing permissions and limitations
17
+ # under the License.
18
+ # Note: Any AirflowException raised is expected to cause the TaskInstance
19
+ # to be marked in an ERROR state
20
+ """Exceptions used by Databricks Provider."""
21
+
22
+ from __future__ import annotations
23
+
24
+ from airflow.exceptions import AirflowException
25
+
26
+
27
+ class DatabricksSqlExecutionError(AirflowException):
28
+ """Raised when there is an error in sql execution."""
29
+
30
+
31
+ class DatabricksSqlExecutionTimeout(DatabricksSqlExecutionError):
32
+ """Raised when a sql execution times out."""
@@ -28,8 +28,9 @@ def get_provider_info():
28
28
  "name": "Databricks",
29
29
  "description": "`Databricks <https://databricks.com/>`__\n",
30
30
  "state": "ready",
31
- "source-date-epoch": 1730012521,
31
+ "source-date-epoch": 1731569981,
32
32
  "versions": [
33
+ "6.13.0",
33
34
  "6.12.0",
34
35
  "6.11.0",
35
36
  "6.10.0",
@@ -77,7 +78,7 @@ def get_provider_info():
77
78
  ],
78
79
  "dependencies": [
79
80
  "apache-airflow>=2.8.0",
80
- "apache-airflow-providers-common-sql>=1.10.0",
81
+ "apache-airflow-providers-common-sql>=1.20.0",
81
82
  "requests>=2.27.0,<3",
82
83
  "databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0",
83
84
  "aiohttp>=3.9.2, <4",
@@ -65,6 +65,8 @@ AZURE_MANAGEMENT_ENDPOINT = "https://management.core.windows.net/"
65
65
  DEFAULT_DATABRICKS_SCOPE = "2ff814a6-3304-4ab8-85cb-cd0e6f879c1d"
66
66
  OIDC_TOKEN_SERVICE_URL = "{}/oidc/v1/token"
67
67
 
68
+ DEFAULT_AZURE_CREDENTIAL_SETTING_KEY = "use_default_azure_credential"
69
+
68
70
 
69
71
  class BaseDatabricksHook(BaseHook):
70
72
  """
@@ -89,6 +91,7 @@ class BaseDatabricksHook(BaseHook):
89
91
  "token",
90
92
  "host",
91
93
  "use_azure_managed_identity",
94
+ DEFAULT_AZURE_CREDENTIAL_SETTING_KEY,
92
95
  "azure_ad_endpoint",
93
96
  "azure_resource_id",
94
97
  "azure_tenant_id",
@@ -376,6 +379,94 @@ class BaseDatabricksHook(BaseHook):
376
379
 
377
380
  return jsn["access_token"]
378
381
 
382
+ def _get_aad_token_for_default_az_credential(self, resource: str) -> str:
383
+ """
384
+ Get AAD token for given resource for workload identity.
385
+
386
+ Supports managed identity or service principal auth.
387
+ :param resource: resource to issue token to
388
+ :return: AAD token, or raise an exception
389
+ """
390
+ aad_token = self.oauth_tokens.get(resource)
391
+ if aad_token and self._is_oauth_token_valid(aad_token):
392
+ return aad_token["access_token"]
393
+
394
+ self.log.info("Existing AAD token is expired, or going to expire soon. Refreshing...")
395
+ try:
396
+ from azure.identity import DefaultAzureCredential
397
+
398
+ for attempt in self._get_retry_object():
399
+ with attempt:
400
+ # This only works in an Azure Kubernetes Service Cluster given the following environment variables:
401
+ # AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_FEDERATED_TOKEN_FILE
402
+ #
403
+ # While there is a WorkloadIdentityCredential class, the below class is advised by Microsoft
404
+ # https://learn.microsoft.com/en-us/azure/aks/workload-identity-overview
405
+ token = DefaultAzureCredential().get_token(f"{resource}/.default")
406
+
407
+ jsn = {
408
+ "access_token": token.token,
409
+ "token_type": "Bearer",
410
+ "expires_on": token.expires_on,
411
+ }
412
+ self._is_oauth_token_valid(jsn)
413
+ self.oauth_tokens[resource] = jsn
414
+ break
415
+ except ImportError as e:
416
+ raise AirflowOptionalProviderFeatureException(e)
417
+ except RetryError:
418
+ raise AirflowException(f"API requests to Azure failed {self.retry_limit} times. Giving up.")
419
+ except requests_exceptions.HTTPError as e:
420
+ msg = f"Response: {e.response.content.decode()}, Status Code: {e.response.status_code}"
421
+ raise AirflowException(msg)
422
+
423
+ return token.token
424
+
425
+ async def _a_get_aad_token_for_default_az_credential(self, resource: str) -> str:
426
+ """
427
+ Get AAD token for given resource for workload identity.
428
+
429
+ Supports managed identity or service principal auth.
430
+ :param resource: resource to issue token to
431
+ :return: AAD token, or raise an exception
432
+ """
433
+ aad_token = self.oauth_tokens.get(resource)
434
+ if aad_token and self._is_oauth_token_valid(aad_token):
435
+ return aad_token["access_token"]
436
+
437
+ self.log.info("Existing AAD token is expired, or going to expire soon. Refreshing...")
438
+ try:
439
+ from azure.identity.aio import (
440
+ DefaultAzureCredential as AsyncDefaultAzureCredential,
441
+ )
442
+
443
+ for attempt in self._get_retry_object():
444
+ with attempt:
445
+ # This only works in an Azure Kubernetes Service Cluster given the following environment variables:
446
+ # AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_FEDERATED_TOKEN_FILE
447
+ #
448
+ # While there is a WorkloadIdentityCredential class, the below class is advised by Microsoft
449
+ # https://learn.microsoft.com/en-us/azure/aks/workload-identity-overview
450
+ token = await AsyncDefaultAzureCredential().get_token(f"{resource}/.default")
451
+
452
+ jsn = {
453
+ "access_token": token.token,
454
+ "token_type": "Bearer",
455
+ "expires_on": token.expires_on,
456
+ }
457
+ self._is_oauth_token_valid(jsn)
458
+ self.oauth_tokens[resource] = jsn
459
+ break
460
+ except ImportError as e:
461
+ raise AirflowOptionalProviderFeatureException(e)
462
+ except RetryError:
463
+ raise AirflowException(f"API requests to Azure failed {self.retry_limit} times. Giving up.")
464
+ except requests_exceptions.HTTPError as e:
465
+ msg = f"Response: {e.response.content.decode()}, Status Code: {e.response.status_code}"
466
+ raise AirflowException(msg)
467
+
468
+ return token.token
469
+
379
470
  def _get_aad_headers(self) -> dict:
380
471
  """
381
472
  Fill AAD headers if necessary (SPN is outside of the workspace).
@@ -476,6 +567,9 @@ class BaseDatabricksHook(BaseHook):
476
567
  self.log.debug("Using AAD Token for managed identity.")
477
568
  self._check_azure_metadata_service()
478
569
  return self._get_aad_token(DEFAULT_DATABRICKS_SCOPE)
570
+ elif self.databricks_conn.extra_dejson.get(DEFAULT_AZURE_CREDENTIAL_SETTING_KEY, False):
571
+ self.log.debug("Using default Azure Credential authentication.")
572
+ return self._get_aad_token_for_default_az_credential(DEFAULT_DATABRICKS_SCOPE)
479
573
  elif self.databricks_conn.extra_dejson.get("service_principal_oauth", False):
480
574
  if self.databricks_conn.login == "" or self.databricks_conn.password == "":
481
575
  raise AirflowException("Service Principal credentials aren't provided")
@@ -504,6 +598,10 @@ class BaseDatabricksHook(BaseHook):
504
598
  self.log.debug("Using AAD Token for managed identity.")
505
599
  await self._a_check_azure_metadata_service()
506
600
  return await self._a_get_aad_token(DEFAULT_DATABRICKS_SCOPE)
601
+ elif self.databricks_conn.extra_dejson.get(DEFAULT_AZURE_CREDENTIAL_SETTING_KEY, False):
602
+ self.log.debug("Using AzureDefaultCredential for authentication.")
603
+
604
+ return await self._a_get_aad_token_for_default_az_credential(DEFAULT_DATABRICKS_SCOPE)
507
605
  elif self.databricks_conn.extra_dejson.get("service_principal_oauth", False):
508
606
  if self.databricks_conn.login == "" or self.databricks_conn.password == "":
509
607
  raise AirflowException("Service Principal credentials aren't provided")
@@ -16,10 +16,12 @@
16
16
  # under the License.
17
17
  from __future__ import annotations
18
18
 
19
+ import threading
19
20
  import warnings
20
21
  from collections import namedtuple
21
22
  from contextlib import closing
22
23
  from copy import copy
24
+ from datetime import timedelta
23
25
  from typing import (
24
26
  TYPE_CHECKING,
25
27
  Any,
@@ -35,8 +37,12 @@ from typing import (
35
37
 
36
38
  from databricks import sql # type: ignore[attr-defined]
37
39
 
38
- from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
40
+ from airflow.exceptions import (
41
+ AirflowException,
42
+ AirflowProviderDeprecationWarning,
43
+ )
39
44
  from airflow.providers.common.sql.hooks.sql import DbApiHook, return_single_query_results
45
+ from airflow.providers.databricks.exceptions import DatabricksSqlExecutionError, DatabricksSqlExecutionTimeout
40
46
  from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHook
41
47
 
42
48
  if TYPE_CHECKING:
@@ -49,6 +55,16 @@ LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "api/2.0/sql/endpoints")
49
55
  T = TypeVar("T")
50
56
 
51
57
 
58
+ def create_timeout_thread(cur, execution_timeout: timedelta | None) -> threading.Timer | None:
59
+ if execution_timeout is not None:
60
+ seconds_to_timeout = execution_timeout.total_seconds()
61
+ t = threading.Timer(seconds_to_timeout, cur.connection.cancel)
62
+ else:
63
+ t = None
64
+
65
+ return t
66
+
67
+
52
68
  class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
53
69
  """
54
70
  Hook to interact with Databricks SQL.
@@ -184,6 +200,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
184
200
  handler: None = ...,
185
201
  split_statements: bool = ...,
186
202
  return_last: bool = ...,
203
+ execution_timeout: timedelta | None = None,
187
204
  ) -> None: ...
188
205
 
189
206
  @overload
@@ -195,6 +212,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
195
212
  handler: Callable[[Any], T] = ...,
196
213
  split_statements: bool = ...,
197
214
  return_last: bool = ...,
215
+ execution_timeout: timedelta | None = None,
198
216
  ) -> tuple | list[tuple] | list[list[tuple] | tuple] | None: ...
199
217
 
200
218
  def run(
@@ -205,6 +223,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
205
223
  handler: Callable[[Any], T] | None = None,
206
224
  split_statements: bool = True,
207
225
  return_last: bool = True,
226
+ execution_timeout: timedelta | None = None,
208
227
  ) -> tuple | list[tuple] | list[list[tuple] | tuple] | None:
209
228
  """
210
229
  Run a command or a list of commands.
@@ -224,6 +243,8 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
224
243
  :param return_last: Whether to return result for only last statement or for all after split
225
244
  :return: return only result of the LAST SQL expression if handler was provided unless return_last
226
245
  is set to False.
246
+ :param execution_timeout: max time allowed for the execution of this task instance, if it goes beyond
247
+ it will raise and fail.
227
248
  """
228
249
  self.descriptions = []
229
250
  if isinstance(sql, str):
@@ -248,7 +269,23 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
248
269
  self.set_autocommit(conn, autocommit)
249
270
 
250
271
  with closing(conn.cursor()) as cur:
251
- self._run_command(cur, sql_statement, parameters) # type: ignore[attr-defined]
272
+ t = create_timeout_thread(cur, execution_timeout)
273
+
274
+ # TODO: adjust this to make testing easier
275
+ try:
276
+ self._run_command(cur, sql_statement, parameters) # type: ignore[attr-defined]
277
+ except Exception as e:
278
+ if t is None or t.is_alive():
279
+ raise DatabricksSqlExecutionError(
280
+ f"Error running SQL statement: {sql_statement}. {str(e)}"
281
+ )
282
+ raise DatabricksSqlExecutionTimeout(
283
+ f"Timeout threshold exceeded for SQL statement: {sql_statement} was cancelled."
284
+ )
285
+ finally:
286
+ if t is not None:
287
+ t.cancel()
288
+
252
289
  if handler is not None:
253
290
  raw_result = handler(cur)
254
291
  if self.return_tuple:
@@ -676,7 +676,7 @@ class DatabricksRunNowOperator(BaseOperator):
676
676
 
677
677
  json = {
678
678
  "job_id": 42,
679
- "notebook_params": {"dry-run": "true", "oldest-time-to-consider": "1457570074236"},
679
+ "job_parameters": {"dry-run": "true", "oldest-time-to-consider": "1457570074236"},
680
680
  }
681
681
 
682
682
  notebook_run = DatabricksRunNowOperator(task_id="notebook_run", json=json)
@@ -688,6 +688,8 @@ class DatabricksRunNowOperator(BaseOperator):
688
688
 
689
689
  job_id = 42
690
690
 
691
+ dbt_commands = ["dbt deps", "dbt seed", "dbt run"]
692
+
691
693
  notebook_params = {"dry-run": "true", "oldest-time-to-consider": "1457570074236"}
692
694
 
693
695
  python_params = ["douglas adams", "42"]
@@ -698,6 +700,7 @@ class DatabricksRunNowOperator(BaseOperator):
698
700
 
699
701
  notebook_run = DatabricksRunNowOperator(
700
702
  job_id=job_id,
703
+ dbt_commands=dbt_commands,
701
704
  notebook_params=notebook_params,
702
705
  python_params=python_params,
703
706
  jar_params=jar_params,
@@ -711,7 +714,9 @@ class DatabricksRunNowOperator(BaseOperator):
711
714
  Currently the named parameters that ``DatabricksRunNowOperator`` supports are
712
715
  - ``job_id``
713
716
  - ``job_name``
717
+ - ``job_parameters``
714
718
  - ``json``
719
+ - ``dbt_commands``
715
720
  - ``notebook_params``
716
721
  - ``python_params``
717
722
  - ``python_named_parameters``
@@ -731,6 +736,17 @@ class DatabricksRunNowOperator(BaseOperator):
731
736
  It must exist only one job with the specified name.
732
737
  ``job_id`` and ``job_name`` are mutually exclusive.
733
738
  This field will be templated.
739
+
740
+ :param job_parameters: A dict from keys to values that override or augment the job's
741
+ parameters for this run. Job parameters are passed to any of the job's tasks that
742
+ accept key-value parameters. Job parameters supersede ``notebook_params``, ``python_params``,
743
+ ``python_named_parameters``, ``jar_params``, ``spark_submit_params``, and they cannot be used in
744
+ combination.
745
+ This field will be templated.
746
+
747
+ .. seealso::
748
+ https://docs.databricks.com/en/workflows/jobs/settings.html#add-parameters-for-all-job-tasks
749
+
734
750
  :param json: A JSON object containing API parameters which will be passed
735
751
  directly to the ``api/2.1/jobs/run-now`` endpoint. The other named parameters
736
752
  (i.e. ``notebook_params``, ``spark_submit_params``..) to this operator will
@@ -741,6 +757,13 @@ class DatabricksRunNowOperator(BaseOperator):
741
757
  .. seealso::
742
758
  For more information about templating see :ref:`concepts:jinja-templating`.
743
759
  https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunNow
760
+
761
+ :param dbt_commands: A list containing the dbt commands to run using the dbt command line
762
+ interface. This field will be templated.
763
+
764
+ .. seealso::
765
+ https://docs.databricks.com/en/jobs/dbt.html
766
+
744
767
  :param notebook_params: A dict from keys to values for jobs with notebook task,
745
768
  e.g. "notebook_params": {"name": "john doe", "age": "35"}.
746
769
  The map is passed to the notebook and will be accessible through the
@@ -832,7 +855,9 @@ class DatabricksRunNowOperator(BaseOperator):
832
855
  *,
833
856
  job_id: str | None = None,
834
857
  job_name: str | None = None,
858
+ job_parameters: dict[str, str] | None = None,
835
859
  json: Any | None = None,
860
+ dbt_commands: list[str] | None = None,
836
861
  notebook_params: dict[str, str] | None = None,
837
862
  python_params: list[str] | None = None,
838
863
  jar_params: list[str] | None = None,
@@ -884,6 +909,10 @@ class DatabricksRunNowOperator(BaseOperator):
884
909
  self.json["spark_submit_params"] = spark_submit_params
885
910
  if idempotency_token is not None:
886
911
  self.json["idempotency_token"] = idempotency_token
912
+ if job_parameters is not None:
913
+ self.json["job_parameters"] = job_parameters
914
+ if dbt_commands is not None:
915
+ self.json["dbt_commands"] = dbt_commands
887
916
  if self.json:
888
917
  self.json = normalise_json_content(self.json)
889
918
  # This variable will be used in case our task gets killed.
@@ -21,7 +21,7 @@ from __future__ import annotations
21
21
 
22
22
  import csv
23
23
  import json
24
- from typing import TYPE_CHECKING, Any, Sequence
24
+ from typing import TYPE_CHECKING, Any, ClassVar, Sequence
25
25
 
26
26
  from databricks.sql.utils import ParamEscaper
27
27
 
@@ -72,7 +72,7 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
72
72
  )
73
73
 
74
74
  template_ext: Sequence[str] = (".sql",)
75
- template_fields_renderers = {"sql": "sql"}
75
+ template_fields_renderers: ClassVar[dict] = {"sql": "sql"}
76
76
  conn_id_field = "databricks_conn_id"
77
77
 
78
78
  def __init__(
@@ -353,3 +353,8 @@ FILEFORMAT = {self._file_format}
353
353
  self.log.info("Executing: %s", sql)
354
354
  hook = self._get_hook()
355
355
  hook.run(sql)
356
+
357
+ def on_kill(self) -> None:
358
+ # NB: on_kill isn't required for this operator since query cancelling gets
359
+ # handled in `DatabricksSqlHook.run()` method which is called in `execute()`
360
+ ...
@@ -21,14 +21,13 @@
21
21
 
22
22
  # IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE
23
23
  # `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY
24
- #
25
24
  [build-system]
26
- requires = ["flit_core >=3.2,<4"]
25
+ requires = ["flit_core==3.10.1"]
27
26
  build-backend = "flit_core.buildapi"
28
27
 
29
28
  [project]
30
29
  name = "apache-airflow-providers-databricks"
31
- version = "6.12.0.rc1"
30
+ version = "6.13.0"
32
31
  description = "Provider package apache-airflow-providers-databricks for Apache Airflow"
33
32
  readme = "README.rst"
34
33
  authors = [
@@ -56,8 +55,8 @@ classifiers = [
56
55
  requires-python = "~=3.9"
57
56
  dependencies = [
58
57
  "aiohttp>=3.9.2, <4",
59
- "apache-airflow-providers-common-sql>=1.10.0rc0",
60
- "apache-airflow>=2.8.0rc0",
58
+ "apache-airflow-providers-common-sql>=1.20.0",
59
+ "apache-airflow>=2.8.0",
61
60
  "databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0",
62
61
  "mergedeep>=1.3.4",
63
62
  "pandas>=1.5.3,<2.2;python_version<\"3.9\"",
@@ -67,8 +66,8 @@ dependencies = [
67
66
  ]
68
67
 
69
68
  [project.urls]
70
- "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.12.0"
71
- "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.12.0/changelog.html"
69
+ "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.13.0"
70
+ "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.13.0/changelog.html"
72
71
  "Bug Tracker" = "https://github.com/apache/airflow/issues"
73
72
  "Source Code" = "https://github.com/apache/airflow"
74
73
  "Slack Chat" = "https://s.apache.org/airflow-slack"