apache-airflow-providers-databricks 7.7.2rc1__py3-none-any.whl → 7.7.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "7.7.2"
32
+ __version__ = "7.7.3"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
35
  "2.10.0"
@@ -121,6 +121,9 @@ class BaseDatabricksHook(BaseHook):
121
121
  self.oauth_tokens: dict[str, dict] = {}
122
122
  self.token_timeout_seconds = 10
123
123
  self.caller = caller
124
+ self._metadata_cache: dict[str, Any] = {}
125
+ self._metadata_expiry: float = 0
126
+ self._metadata_ttl: int = 300
124
127
 
125
128
  def my_after_func(retry_state):
126
129
  self._log_request_error(retry_state.attempt_number, retry_state.outcome)
@@ -515,43 +518,64 @@ class BaseDatabricksHook(BaseHook):
515
518
 
516
519
  return int(token[time_key]) > (int(time.time()) + TOKEN_REFRESH_LEAD_TIME)
517
520
 
518
- @staticmethod
519
- def _check_azure_metadata_service() -> None:
521
+ def _check_azure_metadata_service(self) -> None:
520
522
  """
521
- Check for Azure Metadata Service.
523
+ Check for Azure Metadata Service (with caching).
522
524
 
523
525
  https://docs.microsoft.com/en-us/azure/virtual-machines/linux/instance-metadata-service
524
526
  """
527
+ if self._metadata_cache and time.time() < self._metadata_expiry:
528
+ return
525
529
  try:
526
- jsn = requests.get(
527
- AZURE_METADATA_SERVICE_INSTANCE_URL,
528
- params={"api-version": "2021-02-01"},
529
- headers={"Metadata": "true"},
530
- timeout=2,
531
- ).json()
532
- if "compute" not in jsn or "azEnvironment" not in jsn["compute"]:
533
- raise AirflowException(
534
- f"Was able to fetch some metadata, but it doesn't look like Azure Metadata: {jsn}"
535
- )
530
+ for attempt in self._get_retry_object():
531
+ with attempt:
532
+ response = requests.get(
533
+ AZURE_METADATA_SERVICE_INSTANCE_URL,
534
+ params={"api-version": "2021-02-01"},
535
+ headers={"Metadata": "true"},
536
+ timeout=2,
537
+ )
538
+ response.raise_for_status()
539
+ response_json = response.json()
540
+
541
+ self._validate_azure_metadata_service(response_json)
542
+ self._metadata_cache = response_json
543
+ self._metadata_expiry = time.time() + self._metadata_ttl
544
+ break
545
+ except RetryError:
546
+ raise ConnectionError(f"Failed to reach Azure Metadata Service after {self.retry_limit} retries.")
536
547
  except (requests_exceptions.RequestException, ValueError) as e:
537
- raise AirflowException(f"Can't reach Azure Metadata Service: {e}")
548
+ raise ConnectionError(f"Can't reach Azure Metadata Service: {e}")
538
549
 
539
550
  async def _a_check_azure_metadata_service(self):
540
551
  """Async version of `_check_azure_metadata_service()`."""
552
+ if self._metadata_cache and time.time() < self._metadata_expiry:
553
+ return
541
554
  try:
542
- async with self._session.get(
543
- url=AZURE_METADATA_SERVICE_INSTANCE_URL,
544
- params={"api-version": "2021-02-01"},
545
- headers={"Metadata": "true"},
546
- timeout=2,
547
- ) as resp:
548
- jsn = await resp.json()
549
- if "compute" not in jsn or "azEnvironment" not in jsn["compute"]:
550
- raise AirflowException(
551
- f"Was able to fetch some metadata, but it doesn't look like Azure Metadata: {jsn}"
552
- )
553
- except (requests_exceptions.RequestException, ValueError) as e:
554
- raise AirflowException(f"Can't reach Azure Metadata Service: {e}")
555
+ async for attempt in self._a_get_retry_object():
556
+ with attempt:
557
+ async with self._session.get(
558
+ url=AZURE_METADATA_SERVICE_INSTANCE_URL,
559
+ params={"api-version": "2021-02-01"},
560
+ headers={"Metadata": "true"},
561
+ timeout=2,
562
+ ) as resp:
563
+ resp.raise_for_status()
564
+ response_json = await resp.json()
565
+ self._validate_azure_metadata_service(response_json)
566
+ self._metadata_cache = response_json
567
+ self._metadata_expiry = time.time() + self._metadata_ttl
568
+ break
569
+ except RetryError:
570
+ raise ConnectionError(f"Failed to reach Azure Metadata Service after {self.retry_limit} retries.")
571
+ except (aiohttp.ClientError, ValueError) as e:
572
+ raise ConnectionError(f"Can't reach Azure Metadata Service: {e}")
573
+
574
+ def _validate_azure_metadata_service(self, response_json: dict) -> None:
575
+ if "compute" not in response_json or "azEnvironment" not in response_json["compute"]:
576
+ raise ValueError(
577
+ f"Was able to fetch some metadata, but it doesn't look like Azure Metadata: {response_json}"
578
+ )
555
579
 
556
580
  def _get_token(self, raise_error: bool = False) -> str | None:
557
581
  if "token" in self.databricks_conn.extra_dejson:
@@ -24,7 +24,6 @@ import time
24
24
  from abc import ABC, abstractmethod
25
25
  from collections.abc import Sequence
26
26
  from functools import cached_property
27
- from logging import Logger
28
27
  from typing import TYPE_CHECKING, Any
29
28
 
30
29
  from airflow.configuration import conf
@@ -60,12 +59,8 @@ if TYPE_CHECKING:
60
59
  DatabricksWorkflowTaskGroup,
61
60
  )
62
61
  from airflow.providers.openlineage.extractors import OperatorLineage
63
- from airflow.utils.context import Context
64
-
65
- try:
66
- from airflow.sdk import TaskGroup
67
- except ImportError:
68
- from airflow.utils.task_group import TaskGroup # type: ignore[no-redef]
62
+ from airflow.sdk import TaskGroup
63
+ from airflow.sdk.types import Context, Logger
69
64
 
70
65
  if AIRFLOW_V_3_0_PLUS:
71
66
  from airflow.sdk import BaseOperatorLink
@@ -17,7 +17,6 @@
17
17
 
18
18
  from __future__ import annotations
19
19
 
20
- import logging
21
20
  import os
22
21
  from typing import TYPE_CHECKING, Any
23
22
  from urllib.parse import unquote
@@ -45,6 +44,7 @@ if TYPE_CHECKING:
45
44
 
46
45
  from airflow.models import BaseOperator
47
46
  from airflow.providers.databricks.operators.databricks import DatabricksTaskBaseOperator
47
+ from airflow.sdk.types import Logger
48
48
  from airflow.utils.context import Context
49
49
 
50
50
 
@@ -62,7 +62,7 @@ def get_auth_decorator():
62
62
 
63
63
 
64
64
  def get_databricks_task_ids(
65
- group_id: str, task_map: dict[str, DatabricksTaskBaseOperator], log: logging.Logger
65
+ group_id: str, task_map: dict[str, DatabricksTaskBaseOperator], log: Logger
66
66
  ) -> list[str]:
67
67
  """
68
68
  Return a list of all Databricks task IDs for a dictionary of Airflow tasks.
@@ -112,7 +112,7 @@ if not AIRFLOW_V_3_0_PLUS:
112
112
 
113
113
  @provide_session
114
114
  def _clear_task_instances(
115
- dag_id: str, run_id: str, task_ids: list[str], log: logging.Logger, session: Session = NEW_SESSION
115
+ dag_id: str, run_id: str, task_ids: list[str], log: Logger, session: Session = NEW_SESSION
116
116
  ) -> None:
117
117
  dag = _get_dag(dag_id, session=session)
118
118
  log.debug("task_ids %s to clear", str(task_ids))
@@ -145,7 +145,7 @@ def _repair_task(
145
145
  databricks_conn_id: str,
146
146
  databricks_run_id: int,
147
147
  tasks_to_repair: list[str],
148
- logger: logging.Logger,
148
+ logger: Logger,
149
149
  ) -> int:
150
150
  """
151
151
  Repair a Databricks task using the Databricks API.
@@ -294,7 +294,7 @@ class WorkflowJobRunLink(BaseOperatorLink, LoggingMixin):
294
294
  def store_databricks_job_run_link(
295
295
  context: Context,
296
296
  metadata: Any,
297
- logger: logging.Logger,
297
+ logger: Logger,
298
298
  ) -> None:
299
299
  """
300
300
  Store the Databricks job run link in XCom during task execution.
@@ -368,7 +368,7 @@ class WorkflowJobRepairAllFailedLink(BaseOperatorLink, LoggingMixin):
368
368
  children[child_id] = child
369
369
  return children
370
370
 
371
- def get_tasks_to_run(self, ti_key: TaskInstanceKey, operator: BaseOperator, log: logging.Logger) -> str:
371
+ def get_tasks_to_run(self, ti_key: TaskInstanceKey, operator: BaseOperator, log: Logger) -> str:
372
372
  task_group = operator.task_group
373
373
  if not task_group:
374
374
  raise AirflowException("Task group is required for generating repair link.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apache-airflow-providers-databricks
3
- Version: 7.7.2rc1
3
+ Version: 7.7.3rc1
4
4
  Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
5
5
  Keywords: airflow-provider,databricks,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -38,8 +38,8 @@ Requires-Dist: apache-airflow-providers-openlineage>=2.3.0rc1 ; extra == "openli
38
38
  Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
39
39
  Requires-Dist: apache-airflow-providers-standard ; extra == "standard"
40
40
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
41
- Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.7.2/changelog.html
42
- Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.7.2
41
+ Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.7.3/changelog.html
42
+ Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.7.3
43
43
  Project-URL: Mastodon, https://fosstodon.org/@airflow
44
44
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
45
45
  Project-URL: Source Code, https://github.com/apache/airflow
@@ -75,7 +75,7 @@ Provides-Extra: standard
75
75
 
76
76
  Package ``apache-airflow-providers-databricks``
77
77
 
78
- Release: ``7.7.2``
78
+ Release: ``7.7.3``
79
79
 
80
80
 
81
81
  `Databricks <https://databricks.com/>`__
@@ -88,7 +88,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
88
88
  are in ``airflow.providers.databricks`` python package.
89
89
 
90
90
  You can find package information and changelog for the provider
91
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.7.2/>`_.
91
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.7.3/>`_.
92
92
 
93
93
  Installation
94
94
  ------------
@@ -141,6 +141,19 @@ Dependent package
141
141
  `apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
142
142
  ================================================================================================================== =================
143
143
 
144
+ Optional dependencies
145
+ ----------------------
146
+
147
+ ================== ================================================================
148
+ Extra Dependencies
149
+ ================== ================================================================
150
+ ``sdk`` ``databricks-sdk==0.10.0``
151
+ ``azure-identity`` ``azure-identity>=1.3.1``
152
+ ``fab`` ``apache-airflow-providers-fab>=2.2.0; python_version < '3.13'``
153
+ ``standard`` ``apache-airflow-providers-standard``
154
+ ``openlineage`` ``apache-airflow-providers-openlineage>=2.3.0``
155
+ ================== ================================================================
156
+
144
157
  The changelog for the provider package can be found in the
145
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.7.2/changelog.html>`_.
158
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.7.3/changelog.html>`_.
146
159
 
@@ -1,19 +1,19 @@
1
1
  airflow/providers/databricks/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
2
- airflow/providers/databricks/__init__.py,sha256=lYfw4I2LzZTvalJKs-1qVwu0Om8gcTtnYtiRZlSo30A,1499
2
+ airflow/providers/databricks/__init__.py,sha256=Y1Uel2fhD6vQK9-aibLNliRJUTxpTJTKDbXbyfyrMQs,1499
3
3
  airflow/providers/databricks/exceptions.py,sha256=85RklmLOI_PnTzfXNIUd5fAu2aMMUhelwumQAX0wANE,1261
4
4
  airflow/providers/databricks/get_provider_info.py,sha256=NZ-rY6k6ctDZN7rDngN7mAzq7RMhLag5NwfnuBNcKuw,5644
5
5
  airflow/providers/databricks/version_compat.py,sha256=FPgvVwIHRDhFFg0Ghd4WfCiQt-lI8DXtgv4bHwu7Wx4,2021
6
6
  airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
7
7
  airflow/providers/databricks/hooks/databricks.py,sha256=rkd1J73Zc8IRnMBapxRHWUvyNAEVyV5j14iBDtRdzFo,29470
8
- airflow/providers/databricks/hooks/databricks_base.py,sha256=gish0H2rHEzPqI5ZpU3BPFCUaycHMEYGYev0ufJMzzI,35167
8
+ airflow/providers/databricks/hooks/databricks_base.py,sha256=PpXH3LmHpGiWKaxBuInBCV2q7aKwLWs4xyKSx4E-sJ4,36486
9
9
  airflow/providers/databricks/hooks/databricks_sql.py,sha256=-oGJxteTW1L7L0MLpiNeucWs3q_k2n0Ax2rLSNGc0F8,17726
10
10
  airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
11
- airflow/providers/databricks/operators/databricks.py,sha256=nWurEENXmey-yWYzseb0lGArz4-Q2S29nGAnNLpytMQ,79642
11
+ airflow/providers/databricks/operators/databricks.py,sha256=bNvSL45FAw-0TACJTxIp2P2hRmes3n2E9G0A2bPe17Y,79500
12
12
  airflow/providers/databricks/operators/databricks_repos.py,sha256=NLigItgvQOpxhDhttkU2Jhrcu1gODXQME2i5f8w7gYk,13311
13
13
  airflow/providers/databricks/operators/databricks_sql.py,sha256=QmFUM83jY0pvnG4K-iM7Kuc4H48ORIx2jgGoOdAtEJw,21836
14
14
  airflow/providers/databricks/operators/databricks_workflow.py,sha256=BAWsfFdEG-7p0_6ykkz-xZX1-vdtHnS8uhwjDFpevyg,15088
15
15
  airflow/providers/databricks/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
16
- airflow/providers/databricks/plugins/databricks_workflow.py,sha256=XTbsGp8C7Y9_Z_At2VghJZts9jPCNagENXD5hJvtwOg,20057
16
+ airflow/providers/databricks/plugins/databricks_workflow.py,sha256=jxP85L29xGeFV3rZFE6YahrWlPebOSAXmcLWr2oY2AE,20043
17
17
  airflow/providers/databricks/sensors/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
18
18
  airflow/providers/databricks/sensors/databricks.py,sha256=AVSqvHDr7iDXL1WZ46MTN3KUnVSIOc_g5JEViA1MeVE,6428
19
19
  airflow/providers/databricks/sensors/databricks_partition.py,sha256=1PZo-rdRo6E7yBa30ISFjgQ-iaFdqPYm0gnN5tXgxCU,10205
@@ -24,7 +24,7 @@ airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2
24
24
  airflow/providers/databricks/utils/databricks.py,sha256=ecvzZbC4KdXds47VeSayot9EO-RQnTRJTEwKITH7waQ,5117
25
25
  airflow/providers/databricks/utils/mixins.py,sha256=WUmkt3AmXalmV6zOUIJZWbTldxYunAZOstddDhKCC94,7407
26
26
  airflow/providers/databricks/utils/openlineage.py,sha256=1jT5Woh9YifawdP-VFWsabfF-ecuCjPlzD5P_W4DAhI,15078
27
- apache_airflow_providers_databricks-7.7.2rc1.dist-info/entry_points.txt,sha256=hjmZm3ab2cteTR4t9eE28oKixHwNIKtLCThd6sx3XRQ,227
28
- apache_airflow_providers_databricks-7.7.2rc1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
29
- apache_airflow_providers_databricks-7.7.2rc1.dist-info/METADATA,sha256=hpSINe7B6t6yhfwKBxP7oCFPzQGNdaph_S0DZLvzCtE,7256
30
- apache_airflow_providers_databricks-7.7.2rc1.dist-info/RECORD,,
27
+ apache_airflow_providers_databricks-7.7.3rc1.dist-info/entry_points.txt,sha256=hjmZm3ab2cteTR4t9eE28oKixHwNIKtLCThd6sx3XRQ,227
28
+ apache_airflow_providers_databricks-7.7.3rc1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
29
+ apache_airflow_providers_databricks-7.7.3rc1.dist-info/METADATA,sha256=OEEfGmstXbMKNvQfJ-qRKtIhUUbteJfzI24Y7Q26pts,7895
30
+ apache_airflow_providers_databricks-7.7.3rc1.dist-info/RECORD,,