apache-airflow-providers-databricks 4.4.0__py3-none-any.whl → 4.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/databricks/__init__.py +1 -1
- airflow/providers/databricks/get_provider_info.py +1 -0
- airflow/providers/databricks/hooks/databricks.py +74 -6
- airflow/providers/databricks/hooks/databricks_base.py +10 -8
- airflow/providers/databricks/hooks/databricks_sql.py +5 -3
- airflow/providers/databricks/operators/databricks.py +13 -1
- airflow/providers/databricks/operators/databricks_sql.py +2 -1
- {apache_airflow_providers_databricks-4.4.0.dist-info → apache_airflow_providers_databricks-4.5.0.dist-info}/METADATA +6 -6
- {apache_airflow_providers_databricks-4.4.0.dist-info → apache_airflow_providers_databricks-4.5.0.dist-info}/RECORD +14 -14
- {apache_airflow_providers_databricks-4.4.0.dist-info → apache_airflow_providers_databricks-4.5.0.dist-info}/WHEEL +1 -1
- {apache_airflow_providers_databricks-4.4.0.dist-info → apache_airflow_providers_databricks-4.5.0.dist-info}/LICENSE +0 -0
- {apache_airflow_providers_databricks-4.4.0.dist-info → apache_airflow_providers_databricks-4.5.0.dist-info}/NOTICE +0 -0
- {apache_airflow_providers_databricks-4.4.0.dist-info → apache_airflow_providers_databricks-4.5.0.dist-info}/entry_points.txt +0 -0
- {apache_airflow_providers_databricks-4.4.0.dist-info → apache_airflow_providers_databricks-4.5.0.dist-info}/top_level.txt +0 -0
|
@@ -52,10 +52,19 @@ INSTALL_LIBS_ENDPOINT = ("POST", "api/2.0/libraries/install")
|
|
|
52
52
|
UNINSTALL_LIBS_ENDPOINT = ("POST", "api/2.0/libraries/uninstall")
|
|
53
53
|
|
|
54
54
|
LIST_JOBS_ENDPOINT = ("GET", "api/2.1/jobs/list")
|
|
55
|
+
LIST_PIPELINES_ENDPOINT = ("GET", "/api/2.0/pipelines")
|
|
55
56
|
|
|
56
57
|
WORKSPACE_GET_STATUS_ENDPOINT = ("GET", "api/2.0/workspace/get-status")
|
|
57
58
|
|
|
58
|
-
RUN_LIFE_CYCLE_STATES = [
|
|
59
|
+
RUN_LIFE_CYCLE_STATES = [
|
|
60
|
+
"PENDING",
|
|
61
|
+
"RUNNING",
|
|
62
|
+
"TERMINATING",
|
|
63
|
+
"TERMINATED",
|
|
64
|
+
"SKIPPED",
|
|
65
|
+
"INTERNAL_ERROR",
|
|
66
|
+
"QUEUED",
|
|
67
|
+
]
|
|
59
68
|
|
|
60
69
|
SPARK_VERSIONS_ENDPOINT = ("GET", "api/2.0/clusters/spark-versions")
|
|
61
70
|
|
|
@@ -75,11 +84,9 @@ class RunState:
|
|
|
75
84
|
"""True if the current state is a terminal state."""
|
|
76
85
|
if self.life_cycle_state not in RUN_LIFE_CYCLE_STATES:
|
|
77
86
|
raise AirflowException(
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
"guide for troubleshooting information"
|
|
82
|
-
).format(self.life_cycle_state)
|
|
87
|
+
f"Unexpected life cycle state: {self.life_cycle_state}: If the state has "
|
|
88
|
+
"been introduced recently, please check the Databricks user "
|
|
89
|
+
"guide for troubleshooting information"
|
|
83
90
|
)
|
|
84
91
|
return self.life_cycle_state in ("TERMINATED", "SKIPPED", "INTERNAL_ERROR")
|
|
85
92
|
|
|
@@ -209,6 +216,67 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
209
216
|
else:
|
|
210
217
|
return matching_jobs[0]["job_id"]
|
|
211
218
|
|
|
219
|
+
def list_pipelines(
|
|
220
|
+
self, batch_size: int = 25, pipeline_name: str | None = None, notebook_path: str | None = None
|
|
221
|
+
) -> list[dict[str, Any]]:
|
|
222
|
+
"""
|
|
223
|
+
Lists the pipelines in Databricks Delta Live Tables.
|
|
224
|
+
|
|
225
|
+
:param batch_size: The limit/batch size used to retrieve pipelines.
|
|
226
|
+
:param pipeline_name: Optional name of a pipeline to search. Cannot be combined with path.
|
|
227
|
+
:param notebook_path: Optional notebook of a pipeline to search. Cannot be combined with name.
|
|
228
|
+
:return: A list of pipelines.
|
|
229
|
+
"""
|
|
230
|
+
has_more = True
|
|
231
|
+
next_token = None
|
|
232
|
+
all_pipelines = []
|
|
233
|
+
filter = None
|
|
234
|
+
if pipeline_name and notebook_path:
|
|
235
|
+
raise AirflowException("Cannot combine pipeline_name and notebook_path in one request")
|
|
236
|
+
|
|
237
|
+
if notebook_path:
|
|
238
|
+
filter = f"notebook='{notebook_path}'"
|
|
239
|
+
elif pipeline_name:
|
|
240
|
+
filter = f"name LIKE '{pipeline_name}'"
|
|
241
|
+
payload: dict[str, Any] = {
|
|
242
|
+
"max_results": batch_size,
|
|
243
|
+
}
|
|
244
|
+
if filter:
|
|
245
|
+
payload["filter"] = filter
|
|
246
|
+
|
|
247
|
+
while has_more:
|
|
248
|
+
if next_token:
|
|
249
|
+
payload["page_token"] = next_token
|
|
250
|
+
response = self._do_api_call(LIST_PIPELINES_ENDPOINT, payload)
|
|
251
|
+
pipelines = response.get("statuses", [])
|
|
252
|
+
all_pipelines += pipelines
|
|
253
|
+
if "next_page_token" in response:
|
|
254
|
+
next_token = response["next_page_token"]
|
|
255
|
+
else:
|
|
256
|
+
has_more = False
|
|
257
|
+
|
|
258
|
+
return all_pipelines
|
|
259
|
+
|
|
260
|
+
def find_pipeline_id_by_name(self, pipeline_name: str) -> str | None:
|
|
261
|
+
"""
|
|
262
|
+
Finds pipeline id by its name. If multiple pipelines with the same name, raises AirflowException.
|
|
263
|
+
|
|
264
|
+
:param pipeline_name: The name of the pipeline to look up.
|
|
265
|
+
:return: The pipeline_id as a GUID string or None if no pipeline was found.
|
|
266
|
+
"""
|
|
267
|
+
matching_pipelines = self.list_pipelines(pipeline_name=pipeline_name)
|
|
268
|
+
|
|
269
|
+
if len(matching_pipelines) > 1:
|
|
270
|
+
raise AirflowException(
|
|
271
|
+
f"There are more than one job with name {pipeline_name}. "
|
|
272
|
+
"Please delete duplicated pipelines first"
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
if not pipeline_name:
|
|
276
|
+
return None
|
|
277
|
+
else:
|
|
278
|
+
return matching_pipelines[0]["pipeline_id"]
|
|
279
|
+
|
|
212
280
|
def get_run_page_url(self, run_id: int) -> str:
|
|
213
281
|
"""
|
|
214
282
|
Retrieves run_page_url.
|
|
@@ -28,7 +28,7 @@ import copy
|
|
|
28
28
|
import platform
|
|
29
29
|
import time
|
|
30
30
|
from functools import cached_property
|
|
31
|
-
from typing import Any
|
|
31
|
+
from typing import TYPE_CHECKING, Any
|
|
32
32
|
from urllib.parse import urlsplit
|
|
33
33
|
|
|
34
34
|
import aiohttp
|
|
@@ -48,9 +48,11 @@ from tenacity import (
|
|
|
48
48
|
from airflow import __version__
|
|
49
49
|
from airflow.exceptions import AirflowException
|
|
50
50
|
from airflow.hooks.base import BaseHook
|
|
51
|
-
from airflow.models import Connection
|
|
52
51
|
from airflow.providers_manager import ProvidersManager
|
|
53
52
|
|
|
53
|
+
if TYPE_CHECKING:
|
|
54
|
+
from airflow.models import Connection
|
|
55
|
+
|
|
54
56
|
# https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/aad/service-prin-aad-token#--get-an-azure-active-directory-access-token
|
|
55
57
|
# https://docs.microsoft.com/en-us/graph/deployments#app-registration-and-token-service-root-endpoints
|
|
56
58
|
AZURE_DEFAULT_AD_ENDPOINT = "https://login.microsoftonline.com"
|
|
@@ -121,12 +123,12 @@ class BaseDatabricksHook(BaseHook):
|
|
|
121
123
|
self.retry_args["retry"] = retry_if_exception(self._retryable_error)
|
|
122
124
|
self.retry_args["after"] = my_after_func
|
|
123
125
|
else:
|
|
124
|
-
self.retry_args =
|
|
125
|
-
stop
|
|
126
|
-
wait
|
|
127
|
-
retry
|
|
128
|
-
after
|
|
129
|
-
|
|
126
|
+
self.retry_args = {
|
|
127
|
+
"stop": stop_after_attempt(self.retry_limit),
|
|
128
|
+
"wait": wait_exponential(min=self.retry_delay, max=(2**retry_limit)),
|
|
129
|
+
"retry": retry_if_exception(self._retryable_error),
|
|
130
|
+
"after": my_after_func,
|
|
131
|
+
}
|
|
130
132
|
|
|
131
133
|
@cached_property
|
|
132
134
|
def databricks_conn(self) -> Connection:
|
|
@@ -18,15 +18,17 @@ from __future__ import annotations
|
|
|
18
18
|
|
|
19
19
|
from contextlib import closing
|
|
20
20
|
from copy import copy
|
|
21
|
-
from typing import Any, Callable, Iterable, Mapping, TypeVar, overload
|
|
21
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Mapping, TypeVar, overload
|
|
22
22
|
|
|
23
23
|
from databricks import sql # type: ignore[attr-defined]
|
|
24
|
-
from databricks.sql.client import Connection # type: ignore[attr-defined]
|
|
25
24
|
|
|
26
25
|
from airflow.exceptions import AirflowException
|
|
27
26
|
from airflow.providers.common.sql.hooks.sql import DbApiHook, return_single_query_results
|
|
28
27
|
from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHook
|
|
29
28
|
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from databricks.sql.client import Connection
|
|
31
|
+
|
|
30
32
|
LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "api/2.0/sql/endpoints")
|
|
31
33
|
|
|
32
34
|
|
|
@@ -81,7 +83,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
81
83
|
|
|
82
84
|
def _get_extra_config(self) -> dict[str, Any | None]:
|
|
83
85
|
extra_params = copy(self.databricks_conn.extra_dejson)
|
|
84
|
-
for arg in ["http_path", "session_configuration"
|
|
86
|
+
for arg in ["http_path", "session_configuration", *self.extra_parameters]:
|
|
85
87
|
if arg in extra_params:
|
|
86
88
|
del extra_params[arg]
|
|
87
89
|
|
|
@@ -21,7 +21,6 @@ from __future__ import annotations
|
|
|
21
21
|
import time
|
|
22
22
|
import warnings
|
|
23
23
|
from functools import cached_property
|
|
24
|
-
from logging import Logger
|
|
25
24
|
from typing import TYPE_CHECKING, Any, Sequence
|
|
26
25
|
|
|
27
26
|
from airflow.configuration import conf
|
|
@@ -32,6 +31,8 @@ from airflow.providers.databricks.triggers.databricks import DatabricksExecution
|
|
|
32
31
|
from airflow.providers.databricks.utils.databricks import normalise_json_content, validate_trigger_event
|
|
33
32
|
|
|
34
33
|
if TYPE_CHECKING:
|
|
34
|
+
from logging import Logger
|
|
35
|
+
|
|
35
36
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
|
36
37
|
from airflow.utils.context import Context
|
|
37
38
|
|
|
@@ -364,6 +365,8 @@ class DatabricksSubmitRunOperator(BaseOperator):
|
|
|
364
365
|
|
|
365
366
|
if "dbt_task" in self.json and "git_source" not in self.json:
|
|
366
367
|
raise AirflowException("git_source is required for dbt_task")
|
|
368
|
+
if pipeline_task is not None and "pipeline_id" in pipeline_task and "pipeline_name" in pipeline_task:
|
|
369
|
+
raise AirflowException("'pipeline_name' is not allowed in conjunction with 'pipeline_id'")
|
|
367
370
|
|
|
368
371
|
# This variable will be used in case our task gets killed.
|
|
369
372
|
self.run_id: int | None = None
|
|
@@ -383,6 +386,15 @@ class DatabricksSubmitRunOperator(BaseOperator):
|
|
|
383
386
|
)
|
|
384
387
|
|
|
385
388
|
def execute(self, context: Context):
|
|
389
|
+
if (
|
|
390
|
+
"pipeline_task" in self.json
|
|
391
|
+
and self.json["pipeline_task"].get("pipeline_id") is None
|
|
392
|
+
and self.json["pipeline_task"].get("pipeline_name")
|
|
393
|
+
):
|
|
394
|
+
# If pipeline_id is not provided, we need to fetch it from the pipeline_name
|
|
395
|
+
pipeline_name = self.json["pipeline_task"]["pipeline_name"]
|
|
396
|
+
self.json["pipeline_task"]["pipeline_id"] = self._hook.get_pipeline_id(pipeline_name)
|
|
397
|
+
del self.json["pipeline_task"]["pipeline_name"]
|
|
386
398
|
json_normalised = normalise_json_content(self.json)
|
|
387
399
|
self.run_id = self._hook.submit_run(json_normalised)
|
|
388
400
|
if self.deferrable:
|
|
@@ -22,7 +22,6 @@ import csv
|
|
|
22
22
|
import json
|
|
23
23
|
from typing import TYPE_CHECKING, Any, Sequence
|
|
24
24
|
|
|
25
|
-
from databricks.sql.types import Row
|
|
26
25
|
from databricks.sql.utils import ParamEscaper
|
|
27
26
|
|
|
28
27
|
from airflow.exceptions import AirflowException
|
|
@@ -31,6 +30,8 @@ from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
|
|
|
31
30
|
from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
|
|
32
31
|
|
|
33
32
|
if TYPE_CHECKING:
|
|
33
|
+
from databricks.sql.types import Row
|
|
34
|
+
|
|
34
35
|
from airflow.utils.context import Context
|
|
35
36
|
|
|
36
37
|
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.5.0
|
|
4
4
|
Summary: Provider for Apache Airflow. Implements apache-airflow-providers-databricks package
|
|
5
5
|
Home-page: https://airflow.apache.org/
|
|
6
6
|
Download-URL: https://archive.apache.org/dist/airflow/providers
|
|
7
7
|
Author: Apache Software Foundation
|
|
8
8
|
Author-email: dev@airflow.apache.org
|
|
9
9
|
License: Apache License 2.0
|
|
10
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
11
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
10
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.5.0/
|
|
11
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.5.0/changelog.html
|
|
12
12
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
13
13
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
14
14
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
@@ -77,7 +77,7 @@ Requires-Dist: apache-airflow-providers-common-sql ; extra == 'common.sql'
|
|
|
77
77
|
|
|
78
78
|
Package ``apache-airflow-providers-databricks``
|
|
79
79
|
|
|
80
|
-
Release: ``4.
|
|
80
|
+
Release: ``4.5.0``
|
|
81
81
|
|
|
82
82
|
|
|
83
83
|
`Databricks <https://databricks.com/>`__
|
|
@@ -90,7 +90,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
90
90
|
are in ``airflow.providers.databricks`` python package.
|
|
91
91
|
|
|
92
92
|
You can find package information and changelog for the provider
|
|
93
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
93
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.5.0/>`_.
|
|
94
94
|
|
|
95
95
|
|
|
96
96
|
Installation
|
|
@@ -135,4 +135,4 @@ Dependent package
|
|
|
135
135
|
============================================================================================================ ==============
|
|
136
136
|
|
|
137
137
|
The changelog for the provider package can be found in the
|
|
138
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
138
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.5.0/changelog.html>`_.
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
airflow/providers/databricks/__init__.py,sha256=
|
|
2
|
-
airflow/providers/databricks/get_provider_info.py,sha256
|
|
1
|
+
airflow/providers/databricks/__init__.py,sha256=f4tIs0EPxcFXuETkMe32PU1hiGP-frvdB9iD2QB-FI8,1579
|
|
2
|
+
airflow/providers/databricks/get_provider_info.py,sha256=QKheBxRexRHoc7JFTQ3Vrx_3RrCesJNvmJoy3DfSFL4,5620
|
|
3
3
|
airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
|
4
|
-
airflow/providers/databricks/hooks/databricks.py,sha256=
|
|
5
|
-
airflow/providers/databricks/hooks/databricks_base.py,sha256=
|
|
6
|
-
airflow/providers/databricks/hooks/databricks_sql.py,sha256=
|
|
4
|
+
airflow/providers/databricks/hooks/databricks.py,sha256=2m0BPmhYbMwI_pKBmmvYL636ciYX1qYfkFfgkcTpk_c,19158
|
|
5
|
+
airflow/providers/databricks/hooks/databricks_base.py,sha256=R8NCyXWPhji9lo0X0zM9JY9geprl95XpAJQXwfFjFcM,30581
|
|
6
|
+
airflow/providers/databricks/hooks/databricks_sql.py,sha256=hCDJNousYOvxs3n08las3pz7ATo5bRXOoYI8ujDPmmI,10091
|
|
7
7
|
airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
|
8
|
-
airflow/providers/databricks/operators/databricks.py,sha256=
|
|
8
|
+
airflow/providers/databricks/operators/databricks.py,sha256=gLrqiIxJ2s0P_MgPikbCQtU2iGD_K9PiBbJBpi8jjUI,34074
|
|
9
9
|
airflow/providers/databricks/operators/databricks_repos.py,sha256=Z7bAB0HiQajfZghmwK9RRGqqumx5S5JJyOB86r6e23s,13096
|
|
10
|
-
airflow/providers/databricks/operators/databricks_sql.py,sha256
|
|
10
|
+
airflow/providers/databricks/operators/databricks_sql.py,sha256=-_txO63CR7nbjNR4lkq2o7nosw19qIM5qBpxkOjh_K0,17026
|
|
11
11
|
airflow/providers/databricks/sensors/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
12
12
|
airflow/providers/databricks/sensors/databricks_partition.py,sha256=bmLZWn8hsAZp4Uooan5mP1H4uR1Dek7TgcNx1ffmssI,9787
|
|
13
13
|
airflow/providers/databricks/sensors/databricks_sql.py,sha256=ESLhcaWHbW9xjsw2IwLHa3QAVnBs3gmsb9SYjs2UgIk,5543
|
|
@@ -15,10 +15,10 @@ airflow/providers/databricks/triggers/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvR
|
|
|
15
15
|
airflow/providers/databricks/triggers/databricks.py,sha256=Qj9mB0bNYRY_toPEU17gxbxmPkkT3P789kCHu_T64BA,3997
|
|
16
16
|
airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
17
17
|
airflow/providers/databricks/utils/databricks.py,sha256=6d8zhuaKD-0G4ef-m8duzlZBrChcCa9hvxWb8pKjaqI,2881
|
|
18
|
-
apache_airflow_providers_databricks-4.
|
|
19
|
-
apache_airflow_providers_databricks-4.
|
|
20
|
-
apache_airflow_providers_databricks-4.
|
|
21
|
-
apache_airflow_providers_databricks-4.
|
|
22
|
-
apache_airflow_providers_databricks-4.
|
|
23
|
-
apache_airflow_providers_databricks-4.
|
|
24
|
-
apache_airflow_providers_databricks-4.
|
|
18
|
+
apache_airflow_providers_databricks-4.5.0.dist-info/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
|
|
19
|
+
apache_airflow_providers_databricks-4.5.0.dist-info/METADATA,sha256=NytwRUe06fdVp9k1XvajC_BLbxMmFksxKEryLIbff_g,6143
|
|
20
|
+
apache_airflow_providers_databricks-4.5.0.dist-info/NOTICE,sha256=m-6s2XynUxVSUIxO4rVablAZCvFq-wmLrqV91DotRBw,240
|
|
21
|
+
apache_airflow_providers_databricks-4.5.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
22
|
+
apache_airflow_providers_databricks-4.5.0.dist-info/entry_points.txt,sha256=8r3YBg2Qr0qeOALxzlooH5pXM6QmoPQuyQ75cQrkY5A,107
|
|
23
|
+
apache_airflow_providers_databricks-4.5.0.dist-info/top_level.txt,sha256=OeMVH5md7fr2QQWpnZoOWWxWO-0WH1IP70lpTVwopPg,8
|
|
24
|
+
apache_airflow_providers_databricks-4.5.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|