apache-airflow-providers-databricks 7.7.2rc1__py3-none-any.whl → 7.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-databricks might be problematic. Click here for more details.
- airflow/providers/databricks/__init__.py +1 -1
- airflow/providers/databricks/hooks/databricks_base.py +51 -27
- airflow/providers/databricks/operators/databricks.py +2 -7
- airflow/providers/databricks/plugins/databricks_workflow.py +6 -6
- {apache_airflow_providers_databricks-7.7.2rc1.dist-info → apache_airflow_providers_databricks-7.7.3.dist-info}/METADATA +24 -11
- {apache_airflow_providers_databricks-7.7.2rc1.dist-info → apache_airflow_providers_databricks-7.7.3.dist-info}/RECORD +8 -8
- {apache_airflow_providers_databricks-7.7.2rc1.dist-info → apache_airflow_providers_databricks-7.7.3.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_databricks-7.7.2rc1.dist-info → apache_airflow_providers_databricks-7.7.3.dist-info}/entry_points.txt +0 -0
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "7.7.
|
|
32
|
+
__version__ = "7.7.3"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.10.0"
|
|
@@ -121,6 +121,9 @@ class BaseDatabricksHook(BaseHook):
|
|
|
121
121
|
self.oauth_tokens: dict[str, dict] = {}
|
|
122
122
|
self.token_timeout_seconds = 10
|
|
123
123
|
self.caller = caller
|
|
124
|
+
self._metadata_cache: dict[str, Any] = {}
|
|
125
|
+
self._metadata_expiry: float = 0
|
|
126
|
+
self._metadata_ttl: int = 300
|
|
124
127
|
|
|
125
128
|
def my_after_func(retry_state):
|
|
126
129
|
self._log_request_error(retry_state.attempt_number, retry_state.outcome)
|
|
@@ -515,43 +518,64 @@ class BaseDatabricksHook(BaseHook):
|
|
|
515
518
|
|
|
516
519
|
return int(token[time_key]) > (int(time.time()) + TOKEN_REFRESH_LEAD_TIME)
|
|
517
520
|
|
|
518
|
-
|
|
519
|
-
def _check_azure_metadata_service() -> None:
|
|
521
|
+
def _check_azure_metadata_service(self) -> None:
|
|
520
522
|
"""
|
|
521
|
-
Check for Azure Metadata Service.
|
|
523
|
+
Check for Azure Metadata Service (with caching).
|
|
522
524
|
|
|
523
525
|
https://docs.microsoft.com/en-us/azure/virtual-machines/linux/instance-metadata-service
|
|
524
526
|
"""
|
|
527
|
+
if self._metadata_cache and time.time() < self._metadata_expiry:
|
|
528
|
+
return
|
|
525
529
|
try:
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
530
|
+
for attempt in self._get_retry_object():
|
|
531
|
+
with attempt:
|
|
532
|
+
response = requests.get(
|
|
533
|
+
AZURE_METADATA_SERVICE_INSTANCE_URL,
|
|
534
|
+
params={"api-version": "2021-02-01"},
|
|
535
|
+
headers={"Metadata": "true"},
|
|
536
|
+
timeout=2,
|
|
537
|
+
)
|
|
538
|
+
response.raise_for_status()
|
|
539
|
+
response_json = response.json()
|
|
540
|
+
|
|
541
|
+
self._validate_azure_metadata_service(response_json)
|
|
542
|
+
self._metadata_cache = response_json
|
|
543
|
+
self._metadata_expiry = time.time() + self._metadata_ttl
|
|
544
|
+
break
|
|
545
|
+
except RetryError:
|
|
546
|
+
raise ConnectionError(f"Failed to reach Azure Metadata Service after {self.retry_limit} retries.")
|
|
536
547
|
except (requests_exceptions.RequestException, ValueError) as e:
|
|
537
|
-
raise
|
|
548
|
+
raise ConnectionError(f"Can't reach Azure Metadata Service: {e}")
|
|
538
549
|
|
|
539
550
|
async def _a_check_azure_metadata_service(self):
|
|
540
551
|
"""Async version of `_check_azure_metadata_service()`."""
|
|
552
|
+
if self._metadata_cache and time.time() < self._metadata_expiry:
|
|
553
|
+
return
|
|
541
554
|
try:
|
|
542
|
-
async
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
+
async for attempt in self._a_get_retry_object():
|
|
556
|
+
with attempt:
|
|
557
|
+
async with self._session.get(
|
|
558
|
+
url=AZURE_METADATA_SERVICE_INSTANCE_URL,
|
|
559
|
+
params={"api-version": "2021-02-01"},
|
|
560
|
+
headers={"Metadata": "true"},
|
|
561
|
+
timeout=2,
|
|
562
|
+
) as resp:
|
|
563
|
+
resp.raise_for_status()
|
|
564
|
+
response_json = await resp.json()
|
|
565
|
+
self._validate_azure_metadata_service(response_json)
|
|
566
|
+
self._metadata_cache = response_json
|
|
567
|
+
self._metadata_expiry = time.time() + self._metadata_ttl
|
|
568
|
+
break
|
|
569
|
+
except RetryError:
|
|
570
|
+
raise ConnectionError(f"Failed to reach Azure Metadata Service after {self.retry_limit} retries.")
|
|
571
|
+
except (aiohttp.ClientError, ValueError) as e:
|
|
572
|
+
raise ConnectionError(f"Can't reach Azure Metadata Service: {e}")
|
|
573
|
+
|
|
574
|
+
def _validate_azure_metadata_service(self, response_json: dict) -> None:
|
|
575
|
+
if "compute" not in response_json or "azEnvironment" not in response_json["compute"]:
|
|
576
|
+
raise ValueError(
|
|
577
|
+
f"Was able to fetch some metadata, but it doesn't look like Azure Metadata: {response_json}"
|
|
578
|
+
)
|
|
555
579
|
|
|
556
580
|
def _get_token(self, raise_error: bool = False) -> str | None:
|
|
557
581
|
if "token" in self.databricks_conn.extra_dejson:
|
|
@@ -24,7 +24,6 @@ import time
|
|
|
24
24
|
from abc import ABC, abstractmethod
|
|
25
25
|
from collections.abc import Sequence
|
|
26
26
|
from functools import cached_property
|
|
27
|
-
from logging import Logger
|
|
28
27
|
from typing import TYPE_CHECKING, Any
|
|
29
28
|
|
|
30
29
|
from airflow.configuration import conf
|
|
@@ -60,12 +59,8 @@ if TYPE_CHECKING:
|
|
|
60
59
|
DatabricksWorkflowTaskGroup,
|
|
61
60
|
)
|
|
62
61
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
63
|
-
from airflow.
|
|
64
|
-
|
|
65
|
-
try:
|
|
66
|
-
from airflow.sdk import TaskGroup
|
|
67
|
-
except ImportError:
|
|
68
|
-
from airflow.utils.task_group import TaskGroup # type: ignore[no-redef]
|
|
62
|
+
from airflow.sdk import TaskGroup
|
|
63
|
+
from airflow.sdk.types import Context, Logger
|
|
69
64
|
|
|
70
65
|
if AIRFLOW_V_3_0_PLUS:
|
|
71
66
|
from airflow.sdk import BaseOperatorLink
|
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
|
-
import logging
|
|
21
20
|
import os
|
|
22
21
|
from typing import TYPE_CHECKING, Any
|
|
23
22
|
from urllib.parse import unquote
|
|
@@ -45,6 +44,7 @@ if TYPE_CHECKING:
|
|
|
45
44
|
|
|
46
45
|
from airflow.models import BaseOperator
|
|
47
46
|
from airflow.providers.databricks.operators.databricks import DatabricksTaskBaseOperator
|
|
47
|
+
from airflow.sdk.types import Logger
|
|
48
48
|
from airflow.utils.context import Context
|
|
49
49
|
|
|
50
50
|
|
|
@@ -62,7 +62,7 @@ def get_auth_decorator():
|
|
|
62
62
|
|
|
63
63
|
|
|
64
64
|
def get_databricks_task_ids(
|
|
65
|
-
group_id: str, task_map: dict[str, DatabricksTaskBaseOperator], log:
|
|
65
|
+
group_id: str, task_map: dict[str, DatabricksTaskBaseOperator], log: Logger
|
|
66
66
|
) -> list[str]:
|
|
67
67
|
"""
|
|
68
68
|
Return a list of all Databricks task IDs for a dictionary of Airflow tasks.
|
|
@@ -112,7 +112,7 @@ if not AIRFLOW_V_3_0_PLUS:
|
|
|
112
112
|
|
|
113
113
|
@provide_session
|
|
114
114
|
def _clear_task_instances(
|
|
115
|
-
dag_id: str, run_id: str, task_ids: list[str], log:
|
|
115
|
+
dag_id: str, run_id: str, task_ids: list[str], log: Logger, session: Session = NEW_SESSION
|
|
116
116
|
) -> None:
|
|
117
117
|
dag = _get_dag(dag_id, session=session)
|
|
118
118
|
log.debug("task_ids %s to clear", str(task_ids))
|
|
@@ -145,7 +145,7 @@ def _repair_task(
|
|
|
145
145
|
databricks_conn_id: str,
|
|
146
146
|
databricks_run_id: int,
|
|
147
147
|
tasks_to_repair: list[str],
|
|
148
|
-
logger:
|
|
148
|
+
logger: Logger,
|
|
149
149
|
) -> int:
|
|
150
150
|
"""
|
|
151
151
|
Repair a Databricks task using the Databricks API.
|
|
@@ -294,7 +294,7 @@ class WorkflowJobRunLink(BaseOperatorLink, LoggingMixin):
|
|
|
294
294
|
def store_databricks_job_run_link(
|
|
295
295
|
context: Context,
|
|
296
296
|
metadata: Any,
|
|
297
|
-
logger:
|
|
297
|
+
logger: Logger,
|
|
298
298
|
) -> None:
|
|
299
299
|
"""
|
|
300
300
|
Store the Databricks job run link in XCom during task execution.
|
|
@@ -368,7 +368,7 @@ class WorkflowJobRepairAllFailedLink(BaseOperatorLink, LoggingMixin):
|
|
|
368
368
|
children[child_id] = child
|
|
369
369
|
return children
|
|
370
370
|
|
|
371
|
-
def get_tasks_to_run(self, ti_key: TaskInstanceKey, operator: BaseOperator, log:
|
|
371
|
+
def get_tasks_to_run(self, ti_key: TaskInstanceKey, operator: BaseOperator, log: Logger) -> str:
|
|
372
372
|
task_group = operator.task_group
|
|
373
373
|
if not task_group:
|
|
374
374
|
raise AirflowException("Task group is required for generating repair link.")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version: 7.7.
|
|
3
|
+
Version: 7.7.3
|
|
4
4
|
Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,databricks,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -20,9 +20,9 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.12
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.13
|
|
22
22
|
Classifier: Topic :: System :: Monitoring
|
|
23
|
-
Requires-Dist: apache-airflow>=2.10.
|
|
24
|
-
Requires-Dist: apache-airflow-providers-common-compat>=1.6.
|
|
25
|
-
Requires-Dist: apache-airflow-providers-common-sql>=1.27.
|
|
23
|
+
Requires-Dist: apache-airflow>=2.10.0
|
|
24
|
+
Requires-Dist: apache-airflow-providers-common-compat>=1.6.0
|
|
25
|
+
Requires-Dist: apache-airflow-providers-common-sql>=1.27.0
|
|
26
26
|
Requires-Dist: requests>=2.32.0,<3
|
|
27
27
|
Requires-Dist: databricks-sql-connector>=4.0.0
|
|
28
28
|
Requires-Dist: databricks-sqlalchemy>=1.0.2
|
|
@@ -33,13 +33,13 @@ Requires-Dist: pandas>=2.2.3; python_version >="3.13"
|
|
|
33
33
|
Requires-Dist: pyarrow>=16.1.0; python_version < '3.13'
|
|
34
34
|
Requires-Dist: pyarrow>=18.0.0; python_version >= '3.13'
|
|
35
35
|
Requires-Dist: azure-identity>=1.3.1 ; extra == "azure-identity"
|
|
36
|
-
Requires-Dist: apache-airflow-providers-fab>=2.2.
|
|
37
|
-
Requires-Dist: apache-airflow-providers-openlineage>=2.3.
|
|
36
|
+
Requires-Dist: apache-airflow-providers-fab>=2.2.0 ; extra == "fab" and ( python_version < '3.13')
|
|
37
|
+
Requires-Dist: apache-airflow-providers-openlineage>=2.3.0 ; extra == "openlineage"
|
|
38
38
|
Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
|
|
39
39
|
Requires-Dist: apache-airflow-providers-standard ; extra == "standard"
|
|
40
40
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
41
|
-
Project-URL: Changelog, https://airflow.
|
|
42
|
-
Project-URL: Documentation, https://airflow.
|
|
41
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.7.3/changelog.html
|
|
42
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.7.3
|
|
43
43
|
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
44
44
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
45
45
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
@@ -75,7 +75,7 @@ Provides-Extra: standard
|
|
|
75
75
|
|
|
76
76
|
Package ``apache-airflow-providers-databricks``
|
|
77
77
|
|
|
78
|
-
Release: ``7.7.
|
|
78
|
+
Release: ``7.7.3``
|
|
79
79
|
|
|
80
80
|
|
|
81
81
|
`Databricks <https://databricks.com/>`__
|
|
@@ -88,7 +88,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
88
88
|
are in ``airflow.providers.databricks`` python package.
|
|
89
89
|
|
|
90
90
|
You can find package information and changelog for the provider
|
|
91
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.7.
|
|
91
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.7.3/>`_.
|
|
92
92
|
|
|
93
93
|
Installation
|
|
94
94
|
------------
|
|
@@ -141,6 +141,19 @@ Dependent package
|
|
|
141
141
|
`apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
|
|
142
142
|
================================================================================================================== =================
|
|
143
143
|
|
|
144
|
+
Optional dependencies
|
|
145
|
+
----------------------
|
|
146
|
+
|
|
147
|
+
================== ================================================================
|
|
148
|
+
Extra Dependencies
|
|
149
|
+
================== ================================================================
|
|
150
|
+
``sdk`` ``databricks-sdk==0.10.0``
|
|
151
|
+
``azure-identity`` ``azure-identity>=1.3.1``
|
|
152
|
+
``fab`` ``apache-airflow-providers-fab>=2.2.0; python_version < '3.13'``
|
|
153
|
+
``standard`` ``apache-airflow-providers-standard``
|
|
154
|
+
``openlineage`` ``apache-airflow-providers-openlineage>=2.3.0``
|
|
155
|
+
================== ================================================================
|
|
156
|
+
|
|
144
157
|
The changelog for the provider package can be found in the
|
|
145
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.7.
|
|
158
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.7.3/changelog.html>`_.
|
|
146
159
|
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
airflow/providers/databricks/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
|
|
2
|
-
airflow/providers/databricks/__init__.py,sha256=
|
|
2
|
+
airflow/providers/databricks/__init__.py,sha256=Y1Uel2fhD6vQK9-aibLNliRJUTxpTJTKDbXbyfyrMQs,1499
|
|
3
3
|
airflow/providers/databricks/exceptions.py,sha256=85RklmLOI_PnTzfXNIUd5fAu2aMMUhelwumQAX0wANE,1261
|
|
4
4
|
airflow/providers/databricks/get_provider_info.py,sha256=NZ-rY6k6ctDZN7rDngN7mAzq7RMhLag5NwfnuBNcKuw,5644
|
|
5
5
|
airflow/providers/databricks/version_compat.py,sha256=FPgvVwIHRDhFFg0Ghd4WfCiQt-lI8DXtgv4bHwu7Wx4,2021
|
|
6
6
|
airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
|
7
7
|
airflow/providers/databricks/hooks/databricks.py,sha256=rkd1J73Zc8IRnMBapxRHWUvyNAEVyV5j14iBDtRdzFo,29470
|
|
8
|
-
airflow/providers/databricks/hooks/databricks_base.py,sha256=
|
|
8
|
+
airflow/providers/databricks/hooks/databricks_base.py,sha256=PpXH3LmHpGiWKaxBuInBCV2q7aKwLWs4xyKSx4E-sJ4,36486
|
|
9
9
|
airflow/providers/databricks/hooks/databricks_sql.py,sha256=-oGJxteTW1L7L0MLpiNeucWs3q_k2n0Ax2rLSNGc0F8,17726
|
|
10
10
|
airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
|
11
|
-
airflow/providers/databricks/operators/databricks.py,sha256=
|
|
11
|
+
airflow/providers/databricks/operators/databricks.py,sha256=bNvSL45FAw-0TACJTxIp2P2hRmes3n2E9G0A2bPe17Y,79500
|
|
12
12
|
airflow/providers/databricks/operators/databricks_repos.py,sha256=NLigItgvQOpxhDhttkU2Jhrcu1gODXQME2i5f8w7gYk,13311
|
|
13
13
|
airflow/providers/databricks/operators/databricks_sql.py,sha256=QmFUM83jY0pvnG4K-iM7Kuc4H48ORIx2jgGoOdAtEJw,21836
|
|
14
14
|
airflow/providers/databricks/operators/databricks_workflow.py,sha256=BAWsfFdEG-7p0_6ykkz-xZX1-vdtHnS8uhwjDFpevyg,15088
|
|
15
15
|
airflow/providers/databricks/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
16
|
-
airflow/providers/databricks/plugins/databricks_workflow.py,sha256=
|
|
16
|
+
airflow/providers/databricks/plugins/databricks_workflow.py,sha256=jxP85L29xGeFV3rZFE6YahrWlPebOSAXmcLWr2oY2AE,20043
|
|
17
17
|
airflow/providers/databricks/sensors/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
18
18
|
airflow/providers/databricks/sensors/databricks.py,sha256=AVSqvHDr7iDXL1WZ46MTN3KUnVSIOc_g5JEViA1MeVE,6428
|
|
19
19
|
airflow/providers/databricks/sensors/databricks_partition.py,sha256=1PZo-rdRo6E7yBa30ISFjgQ-iaFdqPYm0gnN5tXgxCU,10205
|
|
@@ -24,7 +24,7 @@ airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2
|
|
|
24
24
|
airflow/providers/databricks/utils/databricks.py,sha256=ecvzZbC4KdXds47VeSayot9EO-RQnTRJTEwKITH7waQ,5117
|
|
25
25
|
airflow/providers/databricks/utils/mixins.py,sha256=WUmkt3AmXalmV6zOUIJZWbTldxYunAZOstddDhKCC94,7407
|
|
26
26
|
airflow/providers/databricks/utils/openlineage.py,sha256=1jT5Woh9YifawdP-VFWsabfF-ecuCjPlzD5P_W4DAhI,15078
|
|
27
|
-
apache_airflow_providers_databricks-7.7.
|
|
28
|
-
apache_airflow_providers_databricks-7.7.
|
|
29
|
-
apache_airflow_providers_databricks-7.7.
|
|
30
|
-
apache_airflow_providers_databricks-7.7.
|
|
27
|
+
apache_airflow_providers_databricks-7.7.3.dist-info/entry_points.txt,sha256=hjmZm3ab2cteTR4t9eE28oKixHwNIKtLCThd6sx3XRQ,227
|
|
28
|
+
apache_airflow_providers_databricks-7.7.3.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
29
|
+
apache_airflow_providers_databricks-7.7.3.dist-info/METADATA,sha256=FfQ9LqeJWEQYfOKYTdXCsBGsnMNH2njzMfi1FTgR1TY,7863
|
|
30
|
+
apache_airflow_providers_databricks-7.7.3.dist-info/RECORD,,
|
|
File without changes
|