apache-airflow-providers-databricks 6.12.0__py3-none-any.whl → 6.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/databricks/__init__.py +1 -1
- airflow/providers/databricks/exceptions.py +32 -0
- airflow/providers/databricks/get_provider_info.py +3 -2
- airflow/providers/databricks/hooks/databricks_base.py +98 -0
- airflow/providers/databricks/hooks/databricks_sql.py +39 -2
- airflow/providers/databricks/operators/databricks.py +30 -1
- airflow/providers/databricks/operators/databricks_sql.py +7 -2
- {apache_airflow_providers_databricks-6.12.0.dist-info → apache_airflow_providers_databricks-6.13.0.dist-info}/METADATA +11 -11
- {apache_airflow_providers_databricks-6.12.0.dist-info → apache_airflow_providers_databricks-6.13.0.dist-info}/RECORD +11 -10
- {apache_airflow_providers_databricks-6.12.0.dist-info → apache_airflow_providers_databricks-6.13.0.dist-info}/WHEEL +1 -1
- {apache_airflow_providers_databricks-6.12.0.dist-info → apache_airflow_providers_databricks-6.13.0.dist-info}/entry_points.txt +0 -0
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "6.
|
|
32
|
+
__version__ = "6.13.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.8.0"
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
|
4
|
+
# distributed with this work for additional information
|
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
|
7
|
+
# "License"); you may not use this file except in compliance
|
|
8
|
+
# with the License. You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing,
|
|
13
|
+
# software distributed under the License is distributed on an
|
|
14
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
# KIND, either express or implied. See the License for the
|
|
16
|
+
# specific language governing permissions and limitations
|
|
17
|
+
# under the License.
|
|
18
|
+
# Note: Any AirflowException raised is expected to cause the TaskInstance
|
|
19
|
+
# to be marked in an ERROR state
|
|
20
|
+
"""Exceptions used by Databricks Provider."""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
from airflow.exceptions import AirflowException
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DatabricksSqlExecutionError(AirflowException):
|
|
28
|
+
"""Raised when there is an error in sql execution."""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class DatabricksSqlExecutionTimeout(DatabricksSqlExecutionError):
|
|
32
|
+
"""Raised when a sql execution times out."""
|
|
@@ -28,8 +28,9 @@ def get_provider_info():
|
|
|
28
28
|
"name": "Databricks",
|
|
29
29
|
"description": "`Databricks <https://databricks.com/>`__\n",
|
|
30
30
|
"state": "ready",
|
|
31
|
-
"source-date-epoch":
|
|
31
|
+
"source-date-epoch": 1731569981,
|
|
32
32
|
"versions": [
|
|
33
|
+
"6.13.0",
|
|
33
34
|
"6.12.0",
|
|
34
35
|
"6.11.0",
|
|
35
36
|
"6.10.0",
|
|
@@ -77,7 +78,7 @@ def get_provider_info():
|
|
|
77
78
|
],
|
|
78
79
|
"dependencies": [
|
|
79
80
|
"apache-airflow>=2.8.0",
|
|
80
|
-
"apache-airflow-providers-common-sql>=1.
|
|
81
|
+
"apache-airflow-providers-common-sql>=1.20.0",
|
|
81
82
|
"requests>=2.27.0,<3",
|
|
82
83
|
"databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0",
|
|
83
84
|
"aiohttp>=3.9.2, <4",
|
|
@@ -65,6 +65,8 @@ AZURE_MANAGEMENT_ENDPOINT = "https://management.core.windows.net/"
|
|
|
65
65
|
DEFAULT_DATABRICKS_SCOPE = "2ff814a6-3304-4ab8-85cb-cd0e6f879c1d"
|
|
66
66
|
OIDC_TOKEN_SERVICE_URL = "{}/oidc/v1/token"
|
|
67
67
|
|
|
68
|
+
DEFAULT_AZURE_CREDENTIAL_SETTING_KEY = "use_default_azure_credential"
|
|
69
|
+
|
|
68
70
|
|
|
69
71
|
class BaseDatabricksHook(BaseHook):
|
|
70
72
|
"""
|
|
@@ -89,6 +91,7 @@ class BaseDatabricksHook(BaseHook):
|
|
|
89
91
|
"token",
|
|
90
92
|
"host",
|
|
91
93
|
"use_azure_managed_identity",
|
|
94
|
+
DEFAULT_AZURE_CREDENTIAL_SETTING_KEY,
|
|
92
95
|
"azure_ad_endpoint",
|
|
93
96
|
"azure_resource_id",
|
|
94
97
|
"azure_tenant_id",
|
|
@@ -376,6 +379,94 @@ class BaseDatabricksHook(BaseHook):
|
|
|
376
379
|
|
|
377
380
|
return jsn["access_token"]
|
|
378
381
|
|
|
382
|
+
def _get_aad_token_for_default_az_credential(self, resource: str) -> str:
|
|
383
|
+
"""
|
|
384
|
+
Get AAD token for given resource for workload identity.
|
|
385
|
+
|
|
386
|
+
Supports managed identity or service principal auth.
|
|
387
|
+
:param resource: resource to issue token to
|
|
388
|
+
:return: AAD token, or raise an exception
|
|
389
|
+
"""
|
|
390
|
+
aad_token = self.oauth_tokens.get(resource)
|
|
391
|
+
if aad_token and self._is_oauth_token_valid(aad_token):
|
|
392
|
+
return aad_token["access_token"]
|
|
393
|
+
|
|
394
|
+
self.log.info("Existing AAD token is expired, or going to expire soon. Refreshing...")
|
|
395
|
+
try:
|
|
396
|
+
from azure.identity import DefaultAzureCredential
|
|
397
|
+
|
|
398
|
+
for attempt in self._get_retry_object():
|
|
399
|
+
with attempt:
|
|
400
|
+
# This only works in an Azure Kubernetes Service Cluster given the following environment variables:
|
|
401
|
+
# AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_FEDERATED_TOKEN_FILE
|
|
402
|
+
#
|
|
403
|
+
# While there is a WorkloadIdentityCredential class, the below class is advised by Microsoft
|
|
404
|
+
# https://learn.microsoft.com/en-us/azure/aks/workload-identity-overview
|
|
405
|
+
token = DefaultAzureCredential().get_token(f"{resource}/.default")
|
|
406
|
+
|
|
407
|
+
jsn = {
|
|
408
|
+
"access_token": token.token,
|
|
409
|
+
"token_type": "Bearer",
|
|
410
|
+
"expires_on": token.expires_on,
|
|
411
|
+
}
|
|
412
|
+
self._is_oauth_token_valid(jsn)
|
|
413
|
+
self.oauth_tokens[resource] = jsn
|
|
414
|
+
break
|
|
415
|
+
except ImportError as e:
|
|
416
|
+
raise AirflowOptionalProviderFeatureException(e)
|
|
417
|
+
except RetryError:
|
|
418
|
+
raise AirflowException(f"API requests to Azure failed {self.retry_limit} times. Giving up.")
|
|
419
|
+
except requests_exceptions.HTTPError as e:
|
|
420
|
+
msg = f"Response: {e.response.content.decode()}, Status Code: {e.response.status_code}"
|
|
421
|
+
raise AirflowException(msg)
|
|
422
|
+
|
|
423
|
+
return token.token
|
|
424
|
+
|
|
425
|
+
async def _a_get_aad_token_for_default_az_credential(self, resource: str) -> str:
|
|
426
|
+
"""
|
|
427
|
+
Get AAD token for given resource for workload identity.
|
|
428
|
+
|
|
429
|
+
Supports managed identity or service principal auth.
|
|
430
|
+
:param resource: resource to issue token to
|
|
431
|
+
:return: AAD token, or raise an exception
|
|
432
|
+
"""
|
|
433
|
+
aad_token = self.oauth_tokens.get(resource)
|
|
434
|
+
if aad_token and self._is_oauth_token_valid(aad_token):
|
|
435
|
+
return aad_token["access_token"]
|
|
436
|
+
|
|
437
|
+
self.log.info("Existing AAD token is expired, or going to expire soon. Refreshing...")
|
|
438
|
+
try:
|
|
439
|
+
from azure.identity.aio import (
|
|
440
|
+
DefaultAzureCredential as AsyncDefaultAzureCredential,
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
for attempt in self._get_retry_object():
|
|
444
|
+
with attempt:
|
|
445
|
+
# This only works in an Azure Kubernetes Service Cluster given the following environment variables:
|
|
446
|
+
# AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_FEDERATED_TOKEN_FILE
|
|
447
|
+
#
|
|
448
|
+
# While there is a WorkloadIdentityCredential class, the below class is advised by Microsoft
|
|
449
|
+
# https://learn.microsoft.com/en-us/azure/aks/workload-identity-overview
|
|
450
|
+
token = await AsyncDefaultAzureCredential().get_token(f"{resource}/.default")
|
|
451
|
+
|
|
452
|
+
jsn = {
|
|
453
|
+
"access_token": token.token,
|
|
454
|
+
"token_type": "Bearer",
|
|
455
|
+
"expires_on": token.expires_on,
|
|
456
|
+
}
|
|
457
|
+
self._is_oauth_token_valid(jsn)
|
|
458
|
+
self.oauth_tokens[resource] = jsn
|
|
459
|
+
break
|
|
460
|
+
except ImportError as e:
|
|
461
|
+
raise AirflowOptionalProviderFeatureException(e)
|
|
462
|
+
except RetryError:
|
|
463
|
+
raise AirflowException(f"API requests to Azure failed {self.retry_limit} times. Giving up.")
|
|
464
|
+
except requests_exceptions.HTTPError as e:
|
|
465
|
+
msg = f"Response: {e.response.content.decode()}, Status Code: {e.response.status_code}"
|
|
466
|
+
raise AirflowException(msg)
|
|
467
|
+
|
|
468
|
+
return token.token
|
|
469
|
+
|
|
379
470
|
def _get_aad_headers(self) -> dict:
|
|
380
471
|
"""
|
|
381
472
|
Fill AAD headers if necessary (SPN is outside of the workspace).
|
|
@@ -476,6 +567,9 @@ class BaseDatabricksHook(BaseHook):
|
|
|
476
567
|
self.log.debug("Using AAD Token for managed identity.")
|
|
477
568
|
self._check_azure_metadata_service()
|
|
478
569
|
return self._get_aad_token(DEFAULT_DATABRICKS_SCOPE)
|
|
570
|
+
elif self.databricks_conn.extra_dejson.get(DEFAULT_AZURE_CREDENTIAL_SETTING_KEY, False):
|
|
571
|
+
self.log.debug("Using default Azure Credential authentication.")
|
|
572
|
+
return self._get_aad_token_for_default_az_credential(DEFAULT_DATABRICKS_SCOPE)
|
|
479
573
|
elif self.databricks_conn.extra_dejson.get("service_principal_oauth", False):
|
|
480
574
|
if self.databricks_conn.login == "" or self.databricks_conn.password == "":
|
|
481
575
|
raise AirflowException("Service Principal credentials aren't provided")
|
|
@@ -504,6 +598,10 @@ class BaseDatabricksHook(BaseHook):
|
|
|
504
598
|
self.log.debug("Using AAD Token for managed identity.")
|
|
505
599
|
await self._a_check_azure_metadata_service()
|
|
506
600
|
return await self._a_get_aad_token(DEFAULT_DATABRICKS_SCOPE)
|
|
601
|
+
elif self.databricks_conn.extra_dejson.get(DEFAULT_AZURE_CREDENTIAL_SETTING_KEY, False):
|
|
602
|
+
self.log.debug("Using AzureDefaultCredential for authentication.")
|
|
603
|
+
|
|
604
|
+
return await self._a_get_aad_token_for_default_az_credential(DEFAULT_DATABRICKS_SCOPE)
|
|
507
605
|
elif self.databricks_conn.extra_dejson.get("service_principal_oauth", False):
|
|
508
606
|
if self.databricks_conn.login == "" or self.databricks_conn.password == "":
|
|
509
607
|
raise AirflowException("Service Principal credentials aren't provided")
|
|
@@ -16,10 +16,12 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
+
import threading
|
|
19
20
|
import warnings
|
|
20
21
|
from collections import namedtuple
|
|
21
22
|
from contextlib import closing
|
|
22
23
|
from copy import copy
|
|
24
|
+
from datetime import timedelta
|
|
23
25
|
from typing import (
|
|
24
26
|
TYPE_CHECKING,
|
|
25
27
|
Any,
|
|
@@ -35,8 +37,12 @@ from typing import (
|
|
|
35
37
|
|
|
36
38
|
from databricks import sql # type: ignore[attr-defined]
|
|
37
39
|
|
|
38
|
-
from airflow.exceptions import
|
|
40
|
+
from airflow.exceptions import (
|
|
41
|
+
AirflowException,
|
|
42
|
+
AirflowProviderDeprecationWarning,
|
|
43
|
+
)
|
|
39
44
|
from airflow.providers.common.sql.hooks.sql import DbApiHook, return_single_query_results
|
|
45
|
+
from airflow.providers.databricks.exceptions import DatabricksSqlExecutionError, DatabricksSqlExecutionTimeout
|
|
40
46
|
from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHook
|
|
41
47
|
|
|
42
48
|
if TYPE_CHECKING:
|
|
@@ -49,6 +55,16 @@ LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "api/2.0/sql/endpoints")
|
|
|
49
55
|
T = TypeVar("T")
|
|
50
56
|
|
|
51
57
|
|
|
58
|
+
def create_timeout_thread(cur, execution_timeout: timedelta | None) -> threading.Timer | None:
|
|
59
|
+
if execution_timeout is not None:
|
|
60
|
+
seconds_to_timeout = execution_timeout.total_seconds()
|
|
61
|
+
t = threading.Timer(seconds_to_timeout, cur.connection.cancel)
|
|
62
|
+
else:
|
|
63
|
+
t = None
|
|
64
|
+
|
|
65
|
+
return t
|
|
66
|
+
|
|
67
|
+
|
|
52
68
|
class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
53
69
|
"""
|
|
54
70
|
Hook to interact with Databricks SQL.
|
|
@@ -184,6 +200,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
184
200
|
handler: None = ...,
|
|
185
201
|
split_statements: bool = ...,
|
|
186
202
|
return_last: bool = ...,
|
|
203
|
+
execution_timeout: timedelta | None = None,
|
|
187
204
|
) -> None: ...
|
|
188
205
|
|
|
189
206
|
@overload
|
|
@@ -195,6 +212,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
195
212
|
handler: Callable[[Any], T] = ...,
|
|
196
213
|
split_statements: bool = ...,
|
|
197
214
|
return_last: bool = ...,
|
|
215
|
+
execution_timeout: timedelta | None = None,
|
|
198
216
|
) -> tuple | list[tuple] | list[list[tuple] | tuple] | None: ...
|
|
199
217
|
|
|
200
218
|
def run(
|
|
@@ -205,6 +223,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
205
223
|
handler: Callable[[Any], T] | None = None,
|
|
206
224
|
split_statements: bool = True,
|
|
207
225
|
return_last: bool = True,
|
|
226
|
+
execution_timeout: timedelta | None = None,
|
|
208
227
|
) -> tuple | list[tuple] | list[list[tuple] | tuple] | None:
|
|
209
228
|
"""
|
|
210
229
|
Run a command or a list of commands.
|
|
@@ -224,6 +243,8 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
224
243
|
:param return_last: Whether to return result for only last statement or for all after split
|
|
225
244
|
:return: return only result of the LAST SQL expression if handler was provided unless return_last
|
|
226
245
|
is set to False.
|
|
246
|
+
:param execution_timeout: max time allowed for the execution of this task instance, if it goes beyond
|
|
247
|
+
it will raise and fail.
|
|
227
248
|
"""
|
|
228
249
|
self.descriptions = []
|
|
229
250
|
if isinstance(sql, str):
|
|
@@ -248,7 +269,23 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
248
269
|
self.set_autocommit(conn, autocommit)
|
|
249
270
|
|
|
250
271
|
with closing(conn.cursor()) as cur:
|
|
251
|
-
|
|
272
|
+
t = create_timeout_thread(cur, execution_timeout)
|
|
273
|
+
|
|
274
|
+
# TODO: adjust this to make testing easier
|
|
275
|
+
try:
|
|
276
|
+
self._run_command(cur, sql_statement, parameters) # type: ignore[attr-defined]
|
|
277
|
+
except Exception as e:
|
|
278
|
+
if t is None or t.is_alive():
|
|
279
|
+
raise DatabricksSqlExecutionError(
|
|
280
|
+
f"Error running SQL statement: {sql_statement}. {str(e)}"
|
|
281
|
+
)
|
|
282
|
+
raise DatabricksSqlExecutionTimeout(
|
|
283
|
+
f"Timeout threshold exceeded for SQL statement: {sql_statement} was cancelled."
|
|
284
|
+
)
|
|
285
|
+
finally:
|
|
286
|
+
if t is not None:
|
|
287
|
+
t.cancel()
|
|
288
|
+
|
|
252
289
|
if handler is not None:
|
|
253
290
|
raw_result = handler(cur)
|
|
254
291
|
if self.return_tuple:
|
|
@@ -676,7 +676,7 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
676
676
|
|
|
677
677
|
json = {
|
|
678
678
|
"job_id": 42,
|
|
679
|
-
"
|
|
679
|
+
"job_parameters": {"dry-run": "true", "oldest-time-to-consider": "1457570074236"},
|
|
680
680
|
}
|
|
681
681
|
|
|
682
682
|
notebook_run = DatabricksRunNowOperator(task_id="notebook_run", json=json)
|
|
@@ -688,6 +688,8 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
688
688
|
|
|
689
689
|
job_id = 42
|
|
690
690
|
|
|
691
|
+
dbt_commands = ["dbt deps", "dbt seed", "dbt run"]
|
|
692
|
+
|
|
691
693
|
notebook_params = {"dry-run": "true", "oldest-time-to-consider": "1457570074236"}
|
|
692
694
|
|
|
693
695
|
python_params = ["douglas adams", "42"]
|
|
@@ -698,6 +700,7 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
698
700
|
|
|
699
701
|
notebook_run = DatabricksRunNowOperator(
|
|
700
702
|
job_id=job_id,
|
|
703
|
+
dbt_commands=dbt_commands,
|
|
701
704
|
notebook_params=notebook_params,
|
|
702
705
|
python_params=python_params,
|
|
703
706
|
jar_params=jar_params,
|
|
@@ -711,7 +714,9 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
711
714
|
Currently the named parameters that ``DatabricksRunNowOperator`` supports are
|
|
712
715
|
- ``job_id``
|
|
713
716
|
- ``job_name``
|
|
717
|
+
- ``job_parameters``
|
|
714
718
|
- ``json``
|
|
719
|
+
- ``dbt_commands``
|
|
715
720
|
- ``notebook_params``
|
|
716
721
|
- ``python_params``
|
|
717
722
|
- ``python_named_parameters``
|
|
@@ -731,6 +736,17 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
731
736
|
It must exist only one job with the specified name.
|
|
732
737
|
``job_id`` and ``job_name`` are mutually exclusive.
|
|
733
738
|
This field will be templated.
|
|
739
|
+
|
|
740
|
+
:param job_parameters: A dict from keys to values that override or augment the job's
|
|
741
|
+
parameters for this run. Job parameters are passed to any of the job's tasks that
|
|
742
|
+
accept key-value parameters. Job parameters supersede ``notebook_params``, ``python_params``,
|
|
743
|
+
``python_named_parameters``, ``jar_params``, ``spark_submit_params``, and they cannot be used in
|
|
744
|
+
combination.
|
|
745
|
+
This field will be templated.
|
|
746
|
+
|
|
747
|
+
.. seealso::
|
|
748
|
+
https://docs.databricks.com/en/workflows/jobs/settings.html#add-parameters-for-all-job-tasks
|
|
749
|
+
|
|
734
750
|
:param json: A JSON object containing API parameters which will be passed
|
|
735
751
|
directly to the ``api/2.1/jobs/run-now`` endpoint. The other named parameters
|
|
736
752
|
(i.e. ``notebook_params``, ``spark_submit_params``..) to this operator will
|
|
@@ -741,6 +757,13 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
741
757
|
.. seealso::
|
|
742
758
|
For more information about templating see :ref:`concepts:jinja-templating`.
|
|
743
759
|
https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunNow
|
|
760
|
+
|
|
761
|
+
:param dbt_commands: A list containing the dbt commands to run using the dbt command line
|
|
762
|
+
interface. This field will be templated.
|
|
763
|
+
|
|
764
|
+
.. seealso::
|
|
765
|
+
https://docs.databricks.com/en/jobs/dbt.html
|
|
766
|
+
|
|
744
767
|
:param notebook_params: A dict from keys to values for jobs with notebook task,
|
|
745
768
|
e.g. "notebook_params": {"name": "john doe", "age": "35"}.
|
|
746
769
|
The map is passed to the notebook and will be accessible through the
|
|
@@ -832,7 +855,9 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
832
855
|
*,
|
|
833
856
|
job_id: str | None = None,
|
|
834
857
|
job_name: str | None = None,
|
|
858
|
+
job_parameters: dict[str, str] | None = None,
|
|
835
859
|
json: Any | None = None,
|
|
860
|
+
dbt_commands: list[str] | None = None,
|
|
836
861
|
notebook_params: dict[str, str] | None = None,
|
|
837
862
|
python_params: list[str] | None = None,
|
|
838
863
|
jar_params: list[str] | None = None,
|
|
@@ -884,6 +909,10 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
884
909
|
self.json["spark_submit_params"] = spark_submit_params
|
|
885
910
|
if idempotency_token is not None:
|
|
886
911
|
self.json["idempotency_token"] = idempotency_token
|
|
912
|
+
if job_parameters is not None:
|
|
913
|
+
self.json["job_parameters"] = job_parameters
|
|
914
|
+
if dbt_commands is not None:
|
|
915
|
+
self.json["dbt_commands"] = dbt_commands
|
|
887
916
|
if self.json:
|
|
888
917
|
self.json = normalise_json_content(self.json)
|
|
889
918
|
# This variable will be used in case our task gets killed.
|
|
@@ -21,7 +21,7 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
import csv
|
|
23
23
|
import json
|
|
24
|
-
from typing import TYPE_CHECKING, Any, Sequence
|
|
24
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Sequence
|
|
25
25
|
|
|
26
26
|
from databricks.sql.utils import ParamEscaper
|
|
27
27
|
|
|
@@ -72,7 +72,7 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
|
|
|
72
72
|
)
|
|
73
73
|
|
|
74
74
|
template_ext: Sequence[str] = (".sql",)
|
|
75
|
-
template_fields_renderers = {"sql": "sql"}
|
|
75
|
+
template_fields_renderers: ClassVar[dict] = {"sql": "sql"}
|
|
76
76
|
conn_id_field = "databricks_conn_id"
|
|
77
77
|
|
|
78
78
|
def __init__(
|
|
@@ -353,3 +353,8 @@ FILEFORMAT = {self._file_format}
|
|
|
353
353
|
self.log.info("Executing: %s", sql)
|
|
354
354
|
hook = self._get_hook()
|
|
355
355
|
hook.run(sql)
|
|
356
|
+
|
|
357
|
+
def on_kill(self) -> None:
|
|
358
|
+
# NB: on_kill isn't required for this operator since query cancelling gets
|
|
359
|
+
# handled in `DatabricksSqlHook.run()` method which is called in `execute()`
|
|
360
|
+
...
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version: 6.
|
|
3
|
+
Version: 6.13.0
|
|
4
4
|
Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,databricks,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -21,7 +21,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Topic :: System :: Monitoring
|
|
23
23
|
Requires-Dist: aiohttp>=3.9.2, <4
|
|
24
|
-
Requires-Dist: apache-airflow-providers-common-sql>=1.
|
|
24
|
+
Requires-Dist: apache-airflow-providers-common-sql>=1.20.0
|
|
25
25
|
Requires-Dist: apache-airflow>=2.8.0
|
|
26
26
|
Requires-Dist: databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0
|
|
27
27
|
Requires-Dist: mergedeep>=1.3.4
|
|
@@ -30,17 +30,17 @@ Requires-Dist: pandas>=2.1.2,<2.2;python_version>="3.9"
|
|
|
30
30
|
Requires-Dist: pyarrow>=14.0.1
|
|
31
31
|
Requires-Dist: requests>=2.27.0,<3
|
|
32
32
|
Requires-Dist: azure-identity>=1.3.1 ; extra == "azure-identity"
|
|
33
|
-
Requires-Dist: apache-airflow-providers-common-sql ; extra == "common
|
|
33
|
+
Requires-Dist: apache-airflow-providers-common-sql ; extra == "common-sql"
|
|
34
34
|
Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
|
|
35
35
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
36
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
37
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
36
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.13.0/changelog.html
|
|
37
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.13.0
|
|
38
38
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
39
39
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
40
40
|
Project-URL: Twitter, https://twitter.com/ApacheAirflow
|
|
41
41
|
Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
42
42
|
Provides-Extra: azure-identity
|
|
43
|
-
Provides-Extra: common
|
|
43
|
+
Provides-Extra: common-sql
|
|
44
44
|
Provides-Extra: sdk
|
|
45
45
|
|
|
46
46
|
|
|
@@ -87,7 +87,7 @@ Provides-Extra: sdk
|
|
|
87
87
|
|
|
88
88
|
Package ``apache-airflow-providers-databricks``
|
|
89
89
|
|
|
90
|
-
Release: ``6.
|
|
90
|
+
Release: ``6.13.0``
|
|
91
91
|
|
|
92
92
|
|
|
93
93
|
`Databricks <https://databricks.com/>`__
|
|
@@ -100,7 +100,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
100
100
|
are in ``airflow.providers.databricks`` python package.
|
|
101
101
|
|
|
102
102
|
You can find package information and changelog for the provider
|
|
103
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
103
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.13.0/>`_.
|
|
104
104
|
|
|
105
105
|
Installation
|
|
106
106
|
------------
|
|
@@ -118,7 +118,7 @@ Requirements
|
|
|
118
118
|
PIP package Version required
|
|
119
119
|
======================================= =========================================
|
|
120
120
|
``apache-airflow`` ``>=2.8.0``
|
|
121
|
-
``apache-airflow-providers-common-sql`` ``>=1.
|
|
121
|
+
``apache-airflow-providers-common-sql`` ``>=1.20.0``
|
|
122
122
|
``requests`` ``>=2.27.0,<3``
|
|
123
123
|
``databricks-sql-connector`` ``>=2.0.0,!=2.9.0,<3.0.0``
|
|
124
124
|
``aiohttp`` ``>=3.9.2,<4``
|
|
@@ -148,4 +148,4 @@ Dependent package
|
|
|
148
148
|
============================================================================================================ ==============
|
|
149
149
|
|
|
150
150
|
The changelog for the provider package can be found in the
|
|
151
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
151
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.13.0/changelog.html>`_.
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
airflow/providers/databricks/LICENSE,sha256=FFb4jd2AXnOOf7XLP04pQW6jbdhG49TxlGY6fFpCV1Y,13609
|
|
2
|
-
airflow/providers/databricks/__init__.py,sha256=
|
|
3
|
-
airflow/providers/databricks/
|
|
2
|
+
airflow/providers/databricks/__init__.py,sha256=y_ZDGOe8x4vlzMqrtj3WqcPQgmRFxltWu6IRQ93VBLY,1498
|
|
3
|
+
airflow/providers/databricks/exceptions.py,sha256=85RklmLOI_PnTzfXNIUd5fAu2aMMUhelwumQAX0wANE,1261
|
|
4
|
+
airflow/providers/databricks/get_provider_info.py,sha256=ffAK-AjwJoKFajWmaLbSoApcCOgONBXXWPofASOVcVU,7784
|
|
4
5
|
airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
|
5
6
|
airflow/providers/databricks/hooks/databricks.py,sha256=-rgK_sMc2_BjTvSvMh1Md3XanVayOmcxijQfs1vRCPw,24774
|
|
6
|
-
airflow/providers/databricks/hooks/databricks_base.py,sha256=
|
|
7
|
-
airflow/providers/databricks/hooks/databricks_sql.py,sha256=
|
|
7
|
+
airflow/providers/databricks/hooks/databricks_base.py,sha256=8KVRF-ty20UQpJP3kgE6RDLAYqXk7ZjI07ZpwFIcGB8,34917
|
|
8
|
+
airflow/providers/databricks/hooks/databricks_sql.py,sha256=7F3ruIPYgkLFp4ms3AoldlS6FNooR8U6GOSI4A_Qmeo,14102
|
|
8
9
|
airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
|
9
|
-
airflow/providers/databricks/operators/databricks.py,sha256=
|
|
10
|
+
airflow/providers/databricks/operators/databricks.py,sha256=YpX2w3W6Jp8pUoxcMfNLlKQIMES9sj4WxZ5nMZpedm0,70429
|
|
10
11
|
airflow/providers/databricks/operators/databricks_repos.py,sha256=I1z2ppGfM_oPxR8BM6Nk1i6JuUOWB40dLbRNEZUvccA,13093
|
|
11
|
-
airflow/providers/databricks/operators/databricks_sql.py,sha256=
|
|
12
|
+
airflow/providers/databricks/operators/databricks_sql.py,sha256=Tx4mF1RS-lAHoKD31-w8F18RO_M9AnSFyyeocIvAZ-Y,17038
|
|
12
13
|
airflow/providers/databricks/operators/databricks_workflow.py,sha256=0_NaiPBbUjwtxZNE8BevMNWDCyQ0lHaCtNALa6ZAeNQ,14131
|
|
13
14
|
airflow/providers/databricks/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
14
15
|
airflow/providers/databricks/plugins/databricks_workflow.py,sha256=H6-R0Nq4mgF1h13jO56qXYSfD7_JFozBlSW0Vawg9DY,16750
|
|
@@ -19,7 +20,7 @@ airflow/providers/databricks/triggers/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvR
|
|
|
19
20
|
airflow/providers/databricks/triggers/databricks.py,sha256=xk9aEfdZnG33a4WSFfg6SZF4FfROV8B4HOyBYBvZR_Q,5104
|
|
20
21
|
airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
21
22
|
airflow/providers/databricks/utils/databricks.py,sha256=EICTPZTD0R0dy9UGKgv8srkrBTgzCQrcYNL9oBWuhzk,2890
|
|
22
|
-
apache_airflow_providers_databricks-6.
|
|
23
|
-
apache_airflow_providers_databricks-6.
|
|
24
|
-
apache_airflow_providers_databricks-6.
|
|
25
|
-
apache_airflow_providers_databricks-6.
|
|
23
|
+
apache_airflow_providers_databricks-6.13.0.dist-info/entry_points.txt,sha256=hjmZm3ab2cteTR4t9eE28oKixHwNIKtLCThd6sx3XRQ,227
|
|
24
|
+
apache_airflow_providers_databricks-6.13.0.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
|
|
25
|
+
apache_airflow_providers_databricks-6.13.0.dist-info/METADATA,sha256=tfCinJWj-6dhYZPywZCPs9j3LgzqImL9KZmaDo4bA9M,7026
|
|
26
|
+
apache_airflow_providers_databricks-6.13.0.dist-info/RECORD,,
|