apache-airflow-providers-databricks 7.5.0__py3-none-any.whl → 7.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-databricks might be problematic. Click here for more details.

@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "7.5.0"
32
+ __version__ = "7.6.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
35
  "2.10.0"
@@ -37,33 +37,36 @@ from requests import exceptions as requests_exceptions
37
37
  from airflow.exceptions import AirflowException
38
38
  from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHook
39
39
 
40
- GET_CLUSTER_ENDPOINT = ("GET", "api/2.0/clusters/get")
41
- RESTART_CLUSTER_ENDPOINT = ("POST", "api/2.0/clusters/restart")
42
- START_CLUSTER_ENDPOINT = ("POST", "api/2.0/clusters/start")
43
- TERMINATE_CLUSTER_ENDPOINT = ("POST", "api/2.0/clusters/delete")
44
-
45
- CREATE_ENDPOINT = ("POST", "api/2.1/jobs/create")
46
- RESET_ENDPOINT = ("POST", "api/2.1/jobs/reset")
47
- UPDATE_ENDPOINT = ("POST", "api/2.1/jobs/update")
48
- RUN_NOW_ENDPOINT = ("POST", "api/2.1/jobs/run-now")
49
- SUBMIT_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/submit")
50
- GET_RUN_ENDPOINT = ("GET", "api/2.1/jobs/runs/get")
51
- CANCEL_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/cancel")
52
- DELETE_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/delete")
53
- REPAIR_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/repair")
54
- OUTPUT_RUNS_JOB_ENDPOINT = ("GET", "api/2.1/jobs/runs/get-output")
55
- CANCEL_ALL_RUNS_ENDPOINT = ("POST", "api/2.1/jobs/runs/cancel-all")
56
-
57
- INSTALL_LIBS_ENDPOINT = ("POST", "api/2.0/libraries/install")
58
- UNINSTALL_LIBS_ENDPOINT = ("POST", "api/2.0/libraries/uninstall")
59
-
60
- LIST_JOBS_ENDPOINT = ("GET", "api/2.1/jobs/list")
61
- LIST_PIPELINES_ENDPOINT = ("GET", "api/2.0/pipelines")
62
-
63
- WORKSPACE_GET_STATUS_ENDPOINT = ("GET", "api/2.0/workspace/get-status")
64
-
65
- SPARK_VERSIONS_ENDPOINT = ("GET", "api/2.0/clusters/spark-versions")
66
- SQL_STATEMENTS_ENDPOINT = "api/2.0/sql/statements"
40
+ GET_CLUSTER_ENDPOINT = ("GET", "2.0/clusters/get")
41
+ RESTART_CLUSTER_ENDPOINT = ("POST", "2.0/clusters/restart")
42
+ START_CLUSTER_ENDPOINT = ("POST", "2.0/clusters/start")
43
+ TERMINATE_CLUSTER_ENDPOINT = ("POST", "2.0/clusters/delete")
44
+
45
+ CREATE_ENDPOINT = ("POST", "2.1/jobs/create")
46
+ RESET_ENDPOINT = ("POST", "2.1/jobs/reset")
47
+ UPDATE_ENDPOINT = ("POST", "2.1/jobs/update")
48
+ RUN_NOW_ENDPOINT = ("POST", "2.1/jobs/run-now")
49
+ SUBMIT_RUN_ENDPOINT = ("POST", "2.1/jobs/runs/submit")
50
+ GET_RUN_ENDPOINT = ("GET", "2.1/jobs/runs/get")
51
+ CANCEL_RUN_ENDPOINT = ("POST", "2.1/jobs/runs/cancel")
52
+ DELETE_RUN_ENDPOINT = ("POST", "2.1/jobs/runs/delete")
53
+ REPAIR_RUN_ENDPOINT = ("POST", "2.1/jobs/runs/repair")
54
+ OUTPUT_RUNS_JOB_ENDPOINT = ("GET", "2.1/jobs/runs/get-output")
55
+ CANCEL_ALL_RUNS_ENDPOINT = ("POST", "2.1/jobs/runs/cancel-all")
56
+
57
+ INSTALL_LIBS_ENDPOINT = ("POST", "2.0/libraries/install")
58
+ UNINSTALL_LIBS_ENDPOINT = ("POST", "2.0/libraries/uninstall")
59
+ UPDATE_REPO_ENDPOINT = ("PATCH", "2.0/repos/")
60
+ DELETE_REPO_ENDPOINT = ("DELETE", "2.0/repos/")
61
+ CREATE_REPO_ENDPOINT = ("POST", "2.0/repos")
62
+
63
+ LIST_JOBS_ENDPOINT = ("GET", "2.1/jobs/list")
64
+ LIST_PIPELINES_ENDPOINT = ("GET", "2.0/pipelines")
65
+
66
+ WORKSPACE_GET_STATUS_ENDPOINT = ("GET", "2.0/workspace/get-status")
67
+
68
+ SPARK_VERSIONS_ENDPOINT = ("GET", "2.0/clusters/spark-versions")
69
+ SQL_STATEMENTS_ENDPOINT = "2.0/sql/statements"
67
70
 
68
71
 
69
72
  class RunLifeCycleState(Enum):
@@ -718,7 +721,8 @@ class DatabricksHook(BaseDatabricksHook):
718
721
  :param json: payload
719
722
  :return: metadata from update
720
723
  """
721
- repos_endpoint = ("PATCH", f"api/2.0/repos/{repo_id}")
724
+ method, base_path = UPDATE_REPO_ENDPOINT
725
+ repos_endpoint = (method, f"{base_path}/{repo_id}")
722
726
  return self._do_api_call(repos_endpoint, json)
723
727
 
724
728
  def delete_repo(self, repo_id: str):
@@ -728,7 +732,8 @@ class DatabricksHook(BaseDatabricksHook):
728
732
  :param repo_id: ID of Databricks Repos
729
733
  :return:
730
734
  """
731
- repos_endpoint = ("DELETE", f"api/2.0/repos/{repo_id}")
735
+ method, base_path = DELETE_REPO_ENDPOINT
736
+ repos_endpoint = (method, f"{base_path}/{repo_id}")
732
737
  self._do_api_call(repos_endpoint)
733
738
 
734
739
  def create_repo(self, json: dict[str, Any]) -> dict:
@@ -738,8 +743,7 @@ class DatabricksHook(BaseDatabricksHook):
738
743
  :param json: payload
739
744
  :return:
740
745
  """
741
- repos_endpoint = ("POST", "api/2.0/repos")
742
- return self._do_api_call(repos_endpoint, json)
746
+ return self._do_api_call(CREATE_REPO_ENDPOINT, json)
743
747
 
744
748
  def get_repo_by_path(self, path: str) -> str | None:
745
749
  """
@@ -50,9 +50,13 @@ from tenacity import (
50
50
 
51
51
  from airflow import __version__
52
52
  from airflow.exceptions import AirflowException, AirflowOptionalProviderFeatureException
53
- from airflow.hooks.base import BaseHook
54
53
  from airflow.providers_manager import ProvidersManager
55
54
 
55
+ try:
56
+ from airflow.sdk import BaseHook
57
+ except ImportError:
58
+ from airflow.hooks.base import BaseHook as BaseHook # type: ignore
59
+
56
60
  if TYPE_CHECKING:
57
61
  from airflow.models import Connection
58
62
 
@@ -135,7 +139,7 @@ class BaseDatabricksHook(BaseHook):
135
139
 
136
140
  @cached_property
137
141
  def databricks_conn(self) -> Connection:
138
- return self.get_connection(self.databricks_conn_id)
142
+ return self.get_connection(self.databricks_conn_id) # type: ignore[return-value]
139
143
 
140
144
  def get_conn(self) -> Connection:
141
145
  return self.databricks_conn
@@ -353,14 +357,15 @@ class BaseDatabricksHook(BaseHook):
353
357
  async for attempt in self._a_get_retry_object():
354
358
  with attempt:
355
359
  if self.databricks_conn.extra_dejson.get("use_azure_managed_identity", False):
356
- token = await AsyncManagedIdentityCredential().get_token(f"{resource}/.default")
360
+ async with AsyncManagedIdentityCredential() as credential:
361
+ token = await credential.get_token(f"{resource}/.default")
357
362
  else:
358
- credential = AsyncClientSecretCredential(
363
+ async with AsyncClientSecretCredential(
359
364
  client_id=self.databricks_conn.login,
360
365
  client_secret=self.databricks_conn.password,
361
366
  tenant_id=self.databricks_conn.extra_dejson["azure_tenant_id"],
362
- )
363
- token = await credential.get_token(f"{resource}/.default")
367
+ ) as credential:
368
+ token = await credential.get_token(f"{resource}/.default")
364
369
  jsn = {
365
370
  "access_token": token.token,
366
371
  "token_type": "Bearer",
@@ -636,8 +641,9 @@ class BaseDatabricksHook(BaseHook):
636
641
  """
637
642
  method, endpoint = endpoint_info
638
643
 
639
- # TODO: get rid of explicit 'api/' in the endpoint specification
640
- url = self._endpoint_url(endpoint)
644
+ # Automatically prepend 'api/' prefix to all endpoint paths
645
+ full_endpoint = f"api/{endpoint}"
646
+ url = self._endpoint_url(full_endpoint)
641
647
 
642
648
  aad_headers = self._get_aad_headers()
643
649
  headers = {**self.user_agent_header, **aad_headers}
@@ -703,7 +709,8 @@ class BaseDatabricksHook(BaseHook):
703
709
  """
704
710
  method, endpoint = endpoint_info
705
711
 
706
- url = self._endpoint_url(endpoint)
712
+ full_endpoint = f"api/{endpoint}"
713
+ url = self._endpoint_url(full_endpoint)
707
714
 
708
715
  aad_headers = await self._a_get_aad_headers()
709
716
  headers = {**self.user_agent_header, **aad_headers}
@@ -18,14 +18,13 @@ from __future__ import annotations
18
18
 
19
19
  import threading
20
20
  from collections import namedtuple
21
- from collections.abc import Iterable, Mapping, Sequence
21
+ from collections.abc import Callable, Iterable, Mapping, Sequence
22
22
  from contextlib import closing
23
23
  from copy import copy
24
24
  from datetime import timedelta
25
25
  from typing import (
26
26
  TYPE_CHECKING,
27
27
  Any,
28
- Callable,
29
28
  TypeVar,
30
29
  cast,
31
30
  overload,
@@ -345,10 +344,9 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
345
344
 
346
345
  def get_openlineage_database_specific_lineage(self, task_instance) -> OperatorLineage | None:
347
346
  """
348
- Generate OpenLineage metadata for a Databricks task instance based on executed query IDs.
347
+ Emit separate OpenLineage events for each Databricks query, based on executed query IDs.
349
348
 
350
- If a single query ID is present, attach an `ExternalQueryRunFacet` to the lineage metadata.
351
- If multiple query IDs are present, emits separate OpenLineage events for each query instead.
349
+ If a single query ID is present, also add an `ExternalQueryRunFacet` to the returned lineage metadata.
352
350
 
353
351
  Note that `get_openlineage_database_specific_lineage` is usually called after task's execution,
354
352
  so if multiple query IDs are present, both START and COMPLETE event for each query will be emitted
@@ -369,13 +367,22 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
369
367
  from airflow.providers.openlineage.sqlparser import SQLParser
370
368
 
371
369
  if not self.query_ids:
372
- self.log.debug("openlineage: no databricks query ids found.")
370
+ self.log.info("OpenLineage could not find databricks query ids.")
373
371
  return None
374
372
 
375
373
  self.log.debug("openlineage: getting connection to get database info")
376
374
  connection = self.get_connection(self.get_conn_id())
377
375
  namespace = SQLParser.create_namespace(self.get_openlineage_database_info(connection))
378
376
 
377
+ self.log.info("Separate OpenLineage events will be emitted for each Databricks query_id.")
378
+ emit_openlineage_events_for_databricks_queries(
379
+ task_instance=task_instance,
380
+ hook=self,
381
+ query_ids=self.query_ids,
382
+ query_for_extra_metadata=True,
383
+ query_source_namespace=namespace,
384
+ )
385
+
379
386
  if len(self.query_ids) == 1:
380
387
  self.log.debug("Attaching ExternalQueryRunFacet with single query_id to OpenLineage event.")
381
388
  return OperatorLineage(
@@ -386,12 +393,4 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
386
393
  }
387
394
  )
388
395
 
389
- self.log.info("Multiple query_ids found. Separate OpenLineage event will be emitted for each query.")
390
- emit_openlineage_events_for_databricks_queries(
391
- query_ids=self.query_ids,
392
- query_source_namespace=namespace,
393
- task_instance=task_instance,
394
- hook=self,
395
- )
396
-
397
396
  return None
@@ -29,7 +29,6 @@ from typing import TYPE_CHECKING, Any
29
29
 
30
30
  from airflow.configuration import conf
31
31
  from airflow.exceptions import AirflowException
32
- from airflow.models import BaseOperator
33
32
  from airflow.providers.databricks.hooks.databricks import (
34
33
  DatabricksHook,
35
34
  RunLifeCycleState,
@@ -42,13 +41,14 @@ from airflow.providers.databricks.operators.databricks_workflow import (
42
41
  from airflow.providers.databricks.plugins.databricks_workflow import (
43
42
  WorkflowJobRepairSingleTaskLink,
44
43
  WorkflowJobRunLink,
44
+ store_databricks_job_run_link,
45
45
  )
46
46
  from airflow.providers.databricks.triggers.databricks import (
47
47
  DatabricksExecutionTrigger,
48
48
  )
49
49
  from airflow.providers.databricks.utils.databricks import normalise_json_content, validate_trigger_event
50
50
  from airflow.providers.databricks.utils.mixins import DatabricksSQLStatementsMixin
51
- from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
51
+ from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS, BaseOperator
52
52
 
53
53
  if TYPE_CHECKING:
54
54
  from airflow.models.taskinstancekey import TaskInstanceKey
@@ -1214,10 +1214,16 @@ class DatabricksTaskBaseOperator(BaseOperator, ABC):
1214
1214
  super().__init__(**kwargs)
1215
1215
 
1216
1216
  if self._databricks_workflow_task_group is not None:
1217
- self.operator_extra_links = (
1218
- WorkflowJobRunLink(),
1219
- WorkflowJobRepairSingleTaskLink(),
1220
- )
1217
+ # Conditionally set operator_extra_links based on Airflow version. In Airflow 3, only show the job run link.
1218
+ # In Airflow 2, show the job run link and the repair link.
1219
+ # TODO: Once we expand the plugin functionality in Airflow 3.1, this can be re-evaluated on how to handle the repair link.
1220
+ if AIRFLOW_V_3_0_PLUS:
1221
+ self.operator_extra_links = (WorkflowJobRunLink(),)
1222
+ else:
1223
+ self.operator_extra_links = (
1224
+ WorkflowJobRunLink(),
1225
+ WorkflowJobRepairSingleTaskLink(),
1226
+ )
1221
1227
  else:
1222
1228
  # Databricks does not support repair for non-workflow tasks, hence do not show the repair link.
1223
1229
  self.operator_extra_links = (DatabricksJobRunLink(),)
@@ -1427,6 +1433,15 @@ class DatabricksTaskBaseOperator(BaseOperator, ABC):
1427
1433
  )
1428
1434
  self.databricks_run_id = workflow_run_metadata.run_id
1429
1435
  self.databricks_conn_id = workflow_run_metadata.conn_id
1436
+
1437
+ # Store operator links in XCom for Airflow 3 compatibility
1438
+ if AIRFLOW_V_3_0_PLUS:
1439
+ # Store the job run link
1440
+ store_databricks_job_run_link(
1441
+ context=context,
1442
+ metadata=workflow_run_metadata,
1443
+ logger=self.log,
1444
+ )
1430
1445
  else:
1431
1446
  self._launch_job(context=context)
1432
1447
  if self.wait_for_termination:
@@ -26,8 +26,8 @@ from typing import TYPE_CHECKING
26
26
  from urllib.parse import urlsplit
27
27
 
28
28
  from airflow.exceptions import AirflowException
29
- from airflow.models import BaseOperator
30
29
  from airflow.providers.databricks.hooks.databricks import DatabricksHook
30
+ from airflow.providers.databricks.version_compat import BaseOperator
31
31
 
32
32
  if TYPE_CHECKING:
33
33
  try:
@@ -28,9 +28,9 @@ from typing import TYPE_CHECKING, Any, ClassVar
28
28
  from databricks.sql.utils import ParamEscaper
29
29
 
30
30
  from airflow.exceptions import AirflowException
31
- from airflow.models import BaseOperator
32
31
  from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
33
32
  from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
33
+ from airflow.providers.databricks.version_compat import BaseOperator
34
34
 
35
35
  if TYPE_CHECKING:
36
36
  from airflow.utils.context import Context
@@ -26,12 +26,13 @@ from typing import TYPE_CHECKING, Any
26
26
  from mergedeep import merge
27
27
 
28
28
  from airflow.exceptions import AirflowException
29
- from airflow.models import BaseOperator
30
29
  from airflow.providers.databricks.hooks.databricks import DatabricksHook, RunLifeCycleState
31
30
  from airflow.providers.databricks.plugins.databricks_workflow import (
32
31
  WorkflowJobRepairAllFailedLink,
33
32
  WorkflowJobRunLink,
33
+ store_databricks_job_run_link,
34
34
  )
35
+ from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS, BaseOperator
35
36
  from airflow.utils.task_group import TaskGroup
36
37
 
37
38
  if TYPE_CHECKING:
@@ -92,9 +93,18 @@ class _CreateDatabricksWorkflowOperator(BaseOperator):
92
93
  populated after instantiation using the `add_task` method.
93
94
  """
94
95
 
95
- operator_extra_links = (WorkflowJobRunLink(), WorkflowJobRepairAllFailedLink())
96
96
  template_fields = ("notebook_params", "job_clusters")
97
97
  caller = "_CreateDatabricksWorkflowOperator"
98
+ # Conditionally set operator_extra_links based on Airflow version
99
+ if AIRFLOW_V_3_0_PLUS:
100
+ # In Airflow 3, disable "Repair All Failed Tasks" since we can't pre-determine failed tasks
101
+ operator_extra_links = (WorkflowJobRunLink(),)
102
+ else:
103
+ # In Airflow 2.x, keep both links
104
+ operator_extra_links = ( # type: ignore[assignment]
105
+ WorkflowJobRunLink(),
106
+ WorkflowJobRepairAllFailedLink(),
107
+ )
98
108
 
99
109
  def __init__(
100
110
  self,
@@ -219,6 +229,15 @@ class _CreateDatabricksWorkflowOperator(BaseOperator):
219
229
  run_id,
220
230
  )
221
231
 
232
+ # Store operator links in XCom for Airflow 3 compatibility
233
+ if AIRFLOW_V_3_0_PLUS:
234
+ # Store the job run link
235
+ store_databricks_job_run_link(
236
+ context=context,
237
+ metadata=self.workflow_run_metadata,
238
+ logger=self.log,
239
+ )
240
+
222
241
  return {
223
242
  "conn_id": self.databricks_conn_id,
224
243
  "job_id": job_id,
@@ -40,7 +40,6 @@ if AIRFLOW_V_3_0_PLUS:
40
40
  else:
41
41
  from airflow.www import auth # type: ignore
42
42
  from airflow.utils.log.logging_mixin import LoggingMixin
43
- from airflow.utils.session import NEW_SESSION, provide_session
44
43
  from airflow.utils.state import TaskInstanceState
45
44
  from airflow.utils.task_group import TaskGroup
46
45
 
@@ -49,6 +48,7 @@ if TYPE_CHECKING:
49
48
 
50
49
  from airflow.models import BaseOperator
51
50
  from airflow.providers.databricks.operators.databricks import DatabricksTaskBaseOperator
51
+ from airflow.utils.context import Context
52
52
 
53
53
  if AIRFLOW_V_3_0_PLUS:
54
54
  from airflow.sdk import BaseOperatorLink
@@ -93,32 +93,56 @@ def get_databricks_task_ids(
93
93
  return task_ids
94
94
 
95
95
 
96
- @provide_session
97
- def _get_dagrun(dag: DAG, run_id: str, session: Session | None = None) -> DagRun:
98
- """
99
- Retrieve the DagRun object associated with the specified DAG and run_id.
96
+ # TODO: Need to re-think on how to support the currently unavailable repair functionality in Airflow 3. Probably a
97
+ # good time to re-evaluate this would be once the plugin functionality is expanded in Airflow 3.1.
98
+ if not AIRFLOW_V_3_0_PLUS:
99
+ from airflow.utils.session import NEW_SESSION, provide_session
100
100
 
101
- :param dag: The DAG object associated with the DagRun to retrieve.
102
- :param run_id: The run_id associated with the DagRun to retrieve.
103
- :param session: The SQLAlchemy session to use for the query. If None, uses the default session.
104
- :return: The DagRun object associated with the specified DAG and run_id.
105
- """
106
- if not session:
107
- raise AirflowException("Session not provided.")
101
+ @provide_session
102
+ def _get_dagrun(dag: DAG, run_id: str, session: Session | None = None) -> DagRun:
103
+ """
104
+ Retrieve the DagRun object associated with the specified DAG and run_id.
108
105
 
109
- return session.query(DagRun).filter(DagRun.dag_id == dag.dag_id, DagRun.run_id == run_id).first()
106
+ :param dag: The DAG object associated with the DagRun to retrieve.
107
+ :param run_id: The run_id associated with the DagRun to retrieve.
108
+ :param session: The SQLAlchemy session to use for the query. If None, uses the default session.
109
+ :return: The DagRun object associated with the specified DAG and run_id.
110
+ """
111
+ if not session:
112
+ raise AirflowException("Session not provided.")
110
113
 
114
+ return session.query(DagRun).filter(DagRun.dag_id == dag.dag_id, DagRun.run_id == run_id).first()
111
115
 
112
- @provide_session
113
- def _clear_task_instances(
114
- dag_id: str, run_id: str, task_ids: list[str], log: logging.Logger, session: Session | None = None
115
- ) -> None:
116
- dag_bag = DagBag(read_dags_from_db=True)
117
- dag = dag_bag.get_dag(dag_id)
118
- log.debug("task_ids %s to clear", str(task_ids))
119
- dr: DagRun = _get_dagrun(dag, run_id, session=session)
120
- tis_to_clear = [ti for ti in dr.get_task_instances() if ti.databricks_task_key in task_ids]
121
- clear_task_instances(tis_to_clear, session)
116
+ @provide_session
117
+ def _clear_task_instances(
118
+ dag_id: str, run_id: str, task_ids: list[str], log: logging.Logger, session: Session | None = None
119
+ ) -> None:
120
+ dag_bag = DagBag(read_dags_from_db=True)
121
+ dag = dag_bag.get_dag(dag_id)
122
+ log.debug("task_ids %s to clear", str(task_ids))
123
+ dr: DagRun = _get_dagrun(dag, run_id, session=session)
124
+ tis_to_clear = [ti for ti in dr.get_task_instances() if ti.databricks_task_key in task_ids]
125
+ clear_task_instances(tis_to_clear, session)
126
+
127
+ @provide_session
128
+ def get_task_instance(operator: BaseOperator, dttm, session: Session = NEW_SESSION) -> TaskInstance:
129
+ dag_id = operator.dag.dag_id
130
+ if hasattr(DagRun, "execution_date"): # Airflow 2.x.
131
+ dag_run = DagRun.find(dag_id, execution_date=dttm)[0] # type: ignore[call-arg]
132
+ else:
133
+ dag_run = DagRun.find(dag_id, logical_date=dttm)[0]
134
+ ti = (
135
+ session.query(TaskInstance)
136
+ .filter(
137
+ TaskInstance.dag_id == dag_id,
138
+ TaskInstance.run_id == dag_run.run_id,
139
+ TaskInstance.task_id == operator.task_id,
140
+ )
141
+ .one_or_none()
142
+ )
143
+ if not ti:
144
+ raise TaskInstanceNotFound("Task instance not found")
145
+ return ti
122
146
 
123
147
 
124
148
  def _repair_task(
@@ -201,27 +225,6 @@ def _get_launch_task_key(current_task_key: TaskInstanceKey, task_id: str) -> Tas
201
225
  return current_task_key
202
226
 
203
227
 
204
- @provide_session
205
- def get_task_instance(operator: BaseOperator, dttm, session: Session = NEW_SESSION) -> TaskInstance:
206
- dag_id = operator.dag.dag_id
207
- if hasattr(DagRun, "execution_date"): # Airflow 2.x.
208
- dag_run = DagRun.find(dag_id, execution_date=dttm)[0] # type: ignore[call-arg]
209
- else:
210
- dag_run = DagRun.find(dag_id, logical_date=dttm)[0]
211
- ti = (
212
- session.query(TaskInstance)
213
- .filter(
214
- TaskInstance.dag_id == dag_id,
215
- TaskInstance.run_id == dag_run.run_id,
216
- TaskInstance.task_id == operator.task_id,
217
- )
218
- .one_or_none()
219
- )
220
- if not ti:
221
- raise TaskInstanceNotFound("Task instance not found")
222
- return ti
223
-
224
-
225
228
  def get_xcom_result(
226
229
  ti_key: TaskInstanceKey,
227
230
  key: str,
@@ -240,6 +243,11 @@ class WorkflowJobRunLink(BaseOperatorLink, LoggingMixin):
240
243
 
241
244
  name = "See Databricks Job Run"
242
245
 
246
+ @property
247
+ def xcom_key(self) -> str:
248
+ """XCom key where the link is stored during task execution."""
249
+ return "databricks_job_run_link"
250
+
243
251
  def get_link(
244
252
  self,
245
253
  operator: BaseOperator,
@@ -247,6 +255,29 @@ class WorkflowJobRunLink(BaseOperatorLink, LoggingMixin):
247
255
  *,
248
256
  ti_key: TaskInstanceKey | None = None,
249
257
  ) -> str:
258
+ if AIRFLOW_V_3_0_PLUS:
259
+ # Use public XCom API to get the pre-computed link
260
+ try:
261
+ link = XCom.get_value(
262
+ ti_key=ti_key,
263
+ key=self.xcom_key,
264
+ )
265
+ return link if link else ""
266
+ except Exception as e:
267
+ self.log.warning("Failed to retrieve Databricks job run link from XCom: %s", e)
268
+ return ""
269
+ else:
270
+ # Airflow 2.x - keep original implementation
271
+ return self._get_link_legacy(operator, dttm, ti_key=ti_key)
272
+
273
+ def _get_link_legacy(
274
+ self,
275
+ operator: BaseOperator,
276
+ dttm=None,
277
+ *,
278
+ ti_key: TaskInstanceKey | None = None,
279
+ ) -> str:
280
+ """Legacy implementation for Airflow 2.x."""
250
281
  if not ti_key:
251
282
  ti = get_task_instance(operator, dttm)
252
283
  ti_key = ti.key
@@ -269,6 +300,30 @@ class WorkflowJobRunLink(BaseOperatorLink, LoggingMixin):
269
300
  return f"https://{hook.host}/#job/{metadata.job_id}/run/{metadata.run_id}"
270
301
 
271
302
 
303
+ def store_databricks_job_run_link(
304
+ context: Context,
305
+ metadata: Any,
306
+ logger: logging.Logger,
307
+ ) -> None:
308
+ """
309
+ Store the Databricks job run link in XCom during task execution.
310
+
311
+ This should be called by Databricks operators during their execution.
312
+ """
313
+ if not AIRFLOW_V_3_0_PLUS:
314
+ return # Only needed for Airflow 3
315
+
316
+ try:
317
+ hook = DatabricksHook(metadata.conn_id)
318
+ link = f"https://{hook.host}/#job/{metadata.job_id}/run/{metadata.run_id}"
319
+
320
+ # Store the link in XCom for the UI to retrieve as extra link
321
+ context["ti"].xcom_push(key="databricks_job_run_link", value=link)
322
+ logger.info("Stored Databricks job run link in XCom: %s", link)
323
+ except Exception as e:
324
+ logger.warning("Failed to store Databricks job run link: %s", e)
325
+
326
+
272
327
  class WorkflowJobRepairAllFailedLink(BaseOperatorLink, LoggingMixin):
273
328
  """Constructs a link to send a request to repair all failed tasks in the Databricks workflow."""
274
329
 
@@ -455,13 +510,6 @@ class RepairDatabricksTasks(BaseView, LoggingMixin):
455
510
  return url_for("Airflow.grid", dag_id=dag_id, dag_run_id=run_id)
456
511
 
457
512
 
458
- repair_databricks_view = RepairDatabricksTasks()
459
-
460
- repair_databricks_package = {
461
- "view": repair_databricks_view,
462
- }
463
-
464
-
465
513
  class DatabricksWorkflowPlugin(AirflowPlugin):
466
514
  """
467
515
  Databricks Workflows plugin for Airflow.
@@ -472,9 +520,22 @@ class DatabricksWorkflowPlugin(AirflowPlugin):
472
520
  """
473
521
 
474
522
  name = "databricks_workflow"
475
- operator_extra_links = [
476
- WorkflowJobRepairAllFailedLink(),
477
- WorkflowJobRepairSingleTaskLink(),
478
- WorkflowJobRunLink(),
479
- ]
480
- appbuilder_views = [repair_databricks_package]
523
+
524
+ # Conditionally set operator_extra_links based on Airflow version
525
+ if AIRFLOW_V_3_0_PLUS:
526
+ # In Airflow 3, disable the links for repair functionality until it is figured out it can be supported
527
+ operator_extra_links = [
528
+ WorkflowJobRunLink(),
529
+ ]
530
+ else:
531
+ # In Airflow 2.x, keep all links including repair all failed tasks
532
+ operator_extra_links = [
533
+ WorkflowJobRepairAllFailedLink(),
534
+ WorkflowJobRepairSingleTaskLink(),
535
+ WorkflowJobRunLink(),
536
+ ]
537
+ repair_databricks_view = RepairDatabricksTasks()
538
+ repair_databricks_package = {
539
+ "view": repair_databricks_view,
540
+ }
541
+ appbuilder_views = [repair_databricks_package]
@@ -30,9 +30,9 @@ from airflow.providers.databricks.utils.mixins import DatabricksSQLStatementsMix
30
30
  from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
31
31
 
32
32
  if AIRFLOW_V_3_0_PLUS:
33
- from airflow.sdk import BaseSensorOperator
33
+ from airflow.sdk.bases.sensor import BaseSensorOperator
34
34
  else:
35
- from airflow.sensors.base import BaseSensorOperator
35
+ from airflow.sensors.base import BaseSensorOperator # type: ignore[no-redef]
36
36
 
37
37
  if TYPE_CHECKING:
38
38
  from airflow.utils.context import Context
@@ -20,17 +20,22 @@
20
20
 
21
21
  from __future__ import annotations
22
22
 
23
- from collections.abc import Sequence
23
+ from collections.abc import Callable, Sequence
24
24
  from datetime import datetime
25
25
  from functools import cached_property
26
- from typing import TYPE_CHECKING, Any, Callable
26
+ from typing import TYPE_CHECKING, Any
27
27
 
28
28
  from databricks.sql.utils import ParamEscaper
29
29
 
30
30
  from airflow.exceptions import AirflowException
31
31
  from airflow.providers.common.sql.hooks.handlers import fetch_all_handler
32
32
  from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
33
- from airflow.sensors.base import BaseSensorOperator
33
+ from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
34
+
35
+ if AIRFLOW_V_3_0_PLUS:
36
+ from airflow.sdk import BaseSensorOperator
37
+ else:
38
+ from airflow.sensors.base import BaseSensorOperator # type: ignore[no-redef]
34
39
 
35
40
  if TYPE_CHECKING:
36
41
  try:
@@ -20,14 +20,19 @@
20
20
 
21
21
  from __future__ import annotations
22
22
 
23
- from collections.abc import Iterable, Sequence
23
+ from collections.abc import Callable, Iterable, Sequence
24
24
  from functools import cached_property
25
- from typing import TYPE_CHECKING, Any, Callable
25
+ from typing import TYPE_CHECKING, Any
26
26
 
27
27
  from airflow.exceptions import AirflowException
28
28
  from airflow.providers.common.sql.hooks.handlers import fetch_all_handler
29
29
  from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
30
- from airflow.sensors.base import BaseSensorOperator
30
+ from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
31
+
32
+ if AIRFLOW_V_3_0_PLUS:
33
+ from airflow.sdk import BaseSensorOperator
34
+ else:
35
+ from airflow.sensors.base import BaseSensorOperator # type: ignore[no-redef]
31
36
 
32
37
  if TYPE_CHECKING:
33
38
  try:
@@ -31,6 +31,7 @@ if TYPE_CHECKING:
31
31
  from openlineage.client.event_v2 import RunEvent
32
32
  from openlineage.client.facet_v2 import JobFacet
33
33
 
34
+ from airflow.providers.databricks.hooks.databricks import DatabricksHook
34
35
  from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
35
36
 
36
37
  log = logging.getLogger(__name__)
@@ -121,20 +122,18 @@ def _get_parent_run_facet(task_instance):
121
122
  )
122
123
 
123
124
 
124
- def _run_api_call(hook: DatabricksSqlHook, query_ids: list[str]) -> list[dict]:
125
+ def _run_api_call(hook: DatabricksSqlHook | DatabricksHook, query_ids: list[str]) -> list[dict]:
125
126
  """Retrieve execution details for specific queries from Databricks's query history API."""
126
- if not hook._token:
127
- # This has logic for token initialization
128
- hook.get_conn()
129
-
130
- # https://docs.databricks.com/api/azure/workspace/queryhistory/list
131
127
  try:
128
+ token = hook._get_token(raise_error=True)
129
+ # https://docs.databricks.com/api/azure/workspace/queryhistory/list
132
130
  response = requests.get(
133
131
  url=f"https://{hook.host}/api/2.0/sql/history/queries",
134
- headers={"Authorization": f"Bearer {hook._token}"},
132
+ headers={"Authorization": f"Bearer {token}"},
135
133
  data=json.dumps({"filter_by": {"statement_ids": query_ids}}),
136
134
  timeout=2,
137
135
  )
136
+ response.raise_for_status()
138
137
  except Exception as e:
139
138
  log.warning(
140
139
  "OpenLineage could not retrieve Databricks queries details. Error received: `%s`.",
@@ -142,48 +141,42 @@ def _run_api_call(hook: DatabricksSqlHook, query_ids: list[str]) -> list[dict]:
142
141
  )
143
142
  return []
144
143
 
145
- if response.status_code != 200:
146
- log.warning(
147
- "OpenLineage could not retrieve Databricks queries details. API error received: `%s`: `%s`",
148
- response.status_code,
149
- response.text,
150
- )
151
- return []
152
-
153
144
  return response.json()["res"]
154
145
 
155
146
 
147
+ def _process_data_from_api(data: list[dict[str, Any]]) -> list[dict[str, Any]]:
148
+ """Convert timestamp fields to UTC datetime objects."""
149
+ for row in data:
150
+ for key in ("query_start_time_ms", "query_end_time_ms"):
151
+ row[key] = datetime.datetime.fromtimestamp(row[key] / 1000, tz=datetime.timezone.utc)
152
+
153
+ return data
154
+
155
+
156
156
  def _get_queries_details_from_databricks(
157
- hook: DatabricksSqlHook, query_ids: list[str]
157
+ hook: DatabricksSqlHook | DatabricksHook, query_ids: list[str]
158
158
  ) -> dict[str, dict[str, Any]]:
159
159
  if not query_ids:
160
160
  return {}
161
161
 
162
- queries_info_from_api = _run_api_call(hook=hook, query_ids=query_ids)
163
-
164
162
  query_details = {}
165
- for query_info in queries_info_from_api:
166
- if not query_info.get("query_id"):
167
- log.debug("Databricks query ID not found in API response.")
168
- continue
169
-
170
- q_start_time = None
171
- q_end_time = None
172
- if query_info.get("query_start_time_ms") and query_info.get("query_end_time_ms"):
173
- q_start_time = datetime.datetime.fromtimestamp(
174
- query_info["query_start_time_ms"] / 1000, tz=datetime.timezone.utc
175
- )
176
- q_end_time = datetime.datetime.fromtimestamp(
177
- query_info["query_end_time_ms"] / 1000, tz=datetime.timezone.utc
178
- )
179
-
180
- query_details[query_info["query_id"]] = {
181
- "status": query_info.get("status"),
182
- "start_time": q_start_time,
183
- "end_time": q_end_time,
184
- "query_text": query_info.get("query_text"),
185
- "error_message": query_info.get("error_message"),
163
+ try:
164
+ queries_info_from_api = _run_api_call(hook=hook, query_ids=query_ids)
165
+ queries_info_from_api = _process_data_from_api(queries_info_from_api)
166
+
167
+ query_details = {
168
+ query_info["query_id"]: {
169
+ "status": query_info.get("status"),
170
+ "start_time": query_info.get("query_start_time_ms"),
171
+ "end_time": query_info.get("query_end_time_ms"),
172
+ "query_text": query_info.get("query_text"),
173
+ "error_message": query_info.get("error_message"),
174
+ }
175
+ for query_info in queries_info_from_api
176
+ if query_info["query_id"]
186
177
  }
178
+ except Exception as e:
179
+ log.warning("OpenLineage could not retrieve extra metadata from Databricks. Error encountered: %s", e)
187
180
 
188
181
  return query_details
189
182
 
@@ -221,17 +214,18 @@ def _create_ol_event_pair(
221
214
 
222
215
  @require_openlineage_version(provider_min_version="2.3.0")
223
216
  def emit_openlineage_events_for_databricks_queries(
224
- query_ids: list[str],
225
- query_source_namespace: str,
226
217
  task_instance,
227
- hook: DatabricksSqlHook | None = None,
218
+ hook: DatabricksSqlHook | DatabricksHook | None = None,
219
+ query_ids: list[str] | None = None,
220
+ query_source_namespace: str | None = None,
221
+ query_for_extra_metadata: bool = False,
228
222
  additional_run_facets: dict | None = None,
229
223
  additional_job_facets: dict | None = None,
230
224
  ) -> None:
231
225
  """
232
226
  Emit OpenLineage events for executed Databricks queries.
233
227
 
234
- Metadata retrieval from Databricks is attempted only if a `DatabricksSqlHook` is provided.
228
+ Metadata retrieval from Databricks is attempted only if `get_extra_metadata` is True and hook is provided.
235
229
  If metadata is available, execution details such as start time, end time, execution status,
236
230
  error messages, and SQL text are included in the events. If no metadata is found, the function
237
231
  defaults to using the Airflow task instance's state and the current timestamp.
@@ -241,10 +235,16 @@ def emit_openlineage_events_for_databricks_queries(
241
235
  will correspond to actual query execution times.
242
236
 
243
237
  Args:
244
- query_ids: A list of Databricks query IDs to emit events for.
245
- query_source_namespace: The namespace to be included in ExternalQueryRunFacet.
246
238
  task_instance: The Airflow task instance that run these queries.
247
- hook: A hook instance used to retrieve query metadata if available.
239
+ hook: A supported Databricks hook instance used to retrieve query metadata if available.
240
+ If omitted, `query_ids` and `query_source_namespace` must be provided explicitly and
241
+ `query_for_extra_metadata` must be `False`.
242
+ query_ids: A list of Databricks query IDs to emit events for, can only be None if `hook` is provided
243
+ and `hook.query_ids` are present (DatabricksHook does not store query_ids).
244
+ query_source_namespace: The namespace to be included in ExternalQueryRunFacet,
245
+ can be `None` only if hook is provided.
246
+ query_for_extra_metadata: Whether to query Databricks for additional metadata about queries.
247
+ Must be `False` if `hook` is not provided.
248
248
  additional_run_facets: Additional run facets to include in OpenLineage events.
249
249
  additional_job_facets: Additional job facets to include in OpenLineage events.
250
250
  """
@@ -259,25 +259,52 @@ def emit_openlineage_events_for_databricks_queries(
259
259
  from airflow.providers.openlineage.conf import namespace
260
260
  from airflow.providers.openlineage.plugins.listener import get_openlineage_listener
261
261
 
262
- if not query_ids:
263
- log.debug("No Databricks query IDs provided; skipping OpenLineage event emission.")
264
- return
265
-
266
- query_ids = [q for q in query_ids] # Make a copy to make sure it does not change
262
+ log.info("OpenLineage will emit events for Databricks queries.")
267
263
 
268
264
  if hook:
265
+ if not query_ids:
266
+ log.debug("No Databricks query IDs provided; Checking `hook.query_ids` property.")
267
+ query_ids = getattr(hook, "query_ids", [])
268
+ if not query_ids:
269
+ raise ValueError("No Databricks query IDs provided and `hook.query_ids` are not present.")
270
+
271
+ if not query_source_namespace:
272
+ log.debug("No Databricks query namespace provided; Creating one from scratch.")
273
+
274
+ if hasattr(hook, "get_openlineage_database_info") and hasattr(hook, "get_conn_id"):
275
+ from airflow.providers.openlineage.sqlparser import SQLParser
276
+
277
+ query_source_namespace = SQLParser.create_namespace(
278
+ hook.get_openlineage_database_info(hook.get_connection(hook.get_conn_id()))
279
+ )
280
+ else:
281
+ query_source_namespace = f"databricks://{hook.host}" if hook.host else "databricks"
282
+ else:
283
+ if not query_ids:
284
+ raise ValueError("If 'hook' is not provided, 'query_ids' must be set.")
285
+ if not query_source_namespace:
286
+ raise ValueError("If 'hook' is not provided, 'query_source_namespace' must be set.")
287
+ if query_for_extra_metadata:
288
+ raise ValueError("If 'hook' is not provided, 'query_for_extra_metadata' must be False.")
289
+
290
+ query_ids = [q for q in query_ids] # Make a copy to make sure we do not change hook's attribute
291
+
292
+ if query_for_extra_metadata and hook:
269
293
  log.debug("Retrieving metadata for %s queries from Databricks.", len(query_ids))
270
294
  databricks_metadata = _get_queries_details_from_databricks(hook, query_ids)
271
295
  else:
272
- log.debug("DatabricksSqlHook not provided. No extra metadata fill be fetched from Databricks.")
296
+ log.debug("`query_for_extra_metadata` is False. No extra metadata fill be fetched from Databricks.")
273
297
  databricks_metadata = {}
274
298
 
275
299
  # If real metadata is unavailable, we send events with eventTime=now
276
300
  default_event_time = timezone.utcnow()
277
- # If no query metadata is provided, we use task_instance's state when checking for success
301
+ # ti.state has no `value` attr (AF2) when task it's still running, in AF3 we get 'running', in that case
302
+ # assuming it's user call and query succeeded, so we replace it with success.
278
303
  # Adjust state for DBX logic, where "finished" means "success"
279
- default_state = task_instance.state.value if hasattr(task_instance, "state") else ""
280
- default_state = "finished" if default_state == "success" else default_state
304
+ default_state = (
305
+ getattr(task_instance.state, "value", "running") if hasattr(task_instance, "state") else ""
306
+ )
307
+ default_state = "finished" if default_state in ("running", "success") else default_state
281
308
 
282
309
  log.debug("Generating OpenLineage facets")
283
310
  common_run_facets = {"parent": _get_parent_run_facet(task_instance)}
@@ -318,10 +345,10 @@ def emit_openlineage_events_for_databricks_queries(
318
345
  event_batch = _create_ol_event_pair(
319
346
  job_namespace=namespace(),
320
347
  job_name=f"{task_instance.dag_id}.{task_instance.task_id}.query.{counter}",
321
- start_time=query_metadata.get("start_time", default_event_time), # type: ignore[arg-type]
322
- end_time=query_metadata.get("end_time", default_event_time), # type: ignore[arg-type]
348
+ start_time=query_metadata.get("start_time") or default_event_time, # type: ignore[arg-type]
349
+ end_time=query_metadata.get("end_time") or default_event_time, # type: ignore[arg-type]
323
350
  # Only finished status means it completed without failures
324
- is_successful=query_metadata.get("status", default_state).lower() == "finished",
351
+ is_successful=(query_metadata.get("status") or default_state).lower() == "finished",
325
352
  run_facets={**query_specific_run_facets, **common_run_facets, **additional_run_facets},
326
353
  job_facets={**query_specific_job_facets, **common_job_facets, **additional_job_facets},
327
354
  )
@@ -33,3 +33,13 @@ def get_base_airflow_version_tuple() -> tuple[int, int, int]:
33
33
 
34
34
 
35
35
  AIRFLOW_V_3_0_PLUS = get_base_airflow_version_tuple() >= (3, 0, 0)
36
+
37
+ if AIRFLOW_V_3_0_PLUS:
38
+ from airflow.sdk import BaseOperator
39
+ else:
40
+ from airflow.models import BaseOperator
41
+
42
+ __all__ = [
43
+ "AIRFLOW_V_3_0_PLUS",
44
+ "BaseOperator",
45
+ ]
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apache-airflow-providers-databricks
3
- Version: 7.5.0
3
+ Version: 7.6.0
4
4
  Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
5
5
  Keywords: airflow-provider,databricks,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
7
7
  Maintainer-email: Apache Software Foundation <dev@airflow.apache.org>
8
- Requires-Python: ~=3.9
8
+ Requires-Python: ~=3.10
9
9
  Description-Content-Type: text/x-rst
10
10
  Classifier: Development Status :: 5 - Production/Stable
11
11
  Classifier: Environment :: Console
@@ -15,7 +15,6 @@ Classifier: Intended Audience :: System Administrators
15
15
  Classifier: Framework :: Apache Airflow
16
16
  Classifier: Framework :: Apache Airflow :: Provider
17
17
  Classifier: License :: OSI Approved :: Apache Software License
18
- Classifier: Programming Language :: Python :: 3.9
19
18
  Classifier: Programming Language :: Python :: 3.10
20
19
  Classifier: Programming Language :: Python :: 3.11
21
20
  Classifier: Programming Language :: Python :: 3.12
@@ -28,16 +27,17 @@ Requires-Dist: databricks-sql-connector>=3.0.0
28
27
  Requires-Dist: databricks-sqlalchemy>=1.0.2
29
28
  Requires-Dist: aiohttp>=3.9.2, <4
30
29
  Requires-Dist: mergedeep>=1.3.4
31
- Requires-Dist: pandas>=2.1.2,<2.2
32
- Requires-Dist: pyarrow>=14.0.1
30
+ Requires-Dist: pandas>=2.1.2; python_version <"3.13"
31
+ Requires-Dist: pandas>=2.2.3; python_version >="3.13"
32
+ Requires-Dist: pyarrow>=16.1.0
33
33
  Requires-Dist: azure-identity>=1.3.1 ; extra == "azure-identity"
34
34
  Requires-Dist: apache-airflow-providers-fab ; extra == "fab"
35
35
  Requires-Dist: apache-airflow-providers-openlineage>=2.3.0 ; extra == "openlineage"
36
36
  Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
37
37
  Requires-Dist: apache-airflow-providers-standard ; extra == "standard"
38
38
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
39
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.5.0/changelog.html
40
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.5.0
39
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.6.0/changelog.html
40
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.6.0
41
41
  Project-URL: Mastodon, https://fosstodon.org/@airflow
42
42
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
43
43
  Project-URL: Source Code, https://github.com/apache/airflow
@@ -73,7 +73,7 @@ Provides-Extra: standard
73
73
 
74
74
  Package ``apache-airflow-providers-databricks``
75
75
 
76
- Release: ``7.5.0``
76
+ Release: ``7.6.0``
77
77
 
78
78
 
79
79
  `Databricks <https://databricks.com/>`__
@@ -86,7 +86,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
86
86
  are in ``airflow.providers.databricks`` python package.
87
87
 
88
88
  You can find package information and changelog for the provider
89
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.5.0/>`_.
89
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.6.0/>`_.
90
90
 
91
91
  Installation
92
92
  ------------
@@ -95,14 +95,14 @@ You can install this package on top of an existing Airflow 2 installation (see `
95
95
  for the minimum Airflow version supported) via
96
96
  ``pip install apache-airflow-providers-databricks``
97
97
 
98
- The package supports the following python versions: 3.9,3.10,3.11,3.12
98
+ The package supports the following python versions: 3.10,3.11,3.12
99
99
 
100
100
  Requirements
101
101
  ------------
102
102
 
103
- ========================================== ==================
103
+ ========================================== =====================================
104
104
  PIP package Version required
105
- ========================================== ==================
105
+ ========================================== =====================================
106
106
  ``apache-airflow`` ``>=2.10.0``
107
107
  ``apache-airflow-providers-common-compat`` ``>=1.6.0``
108
108
  ``apache-airflow-providers-common-sql`` ``>=1.27.0``
@@ -111,9 +111,10 @@ PIP package Version required
111
111
  ``databricks-sqlalchemy`` ``>=1.0.2``
112
112
  ``aiohttp`` ``>=3.9.2,<4``
113
113
  ``mergedeep`` ``>=1.3.4``
114
- ``pandas`` ``>=2.1.2,<2.2``
115
- ``pyarrow`` ``>=14.0.1``
116
- ========================================== ==================
114
+ ``pandas`` ``>=2.1.2; python_version < "3.13"``
115
+ ``pandas`` ``>=2.2.3; python_version >= "3.13"``
116
+ ``pyarrow`` ``>=16.1.0``
117
+ ========================================== =====================================
117
118
 
118
119
  Cross provider package dependencies
119
120
  -----------------------------------
@@ -138,5 +139,5 @@ Dependent package
138
139
  ================================================================================================================== =================
139
140
 
140
141
  The changelog for the provider package can be found in the
141
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.5.0/changelog.html>`_.
142
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.6.0/changelog.html>`_.
142
143
 
@@ -1,30 +1,30 @@
1
1
  airflow/providers/databricks/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
2
- airflow/providers/databricks/__init__.py,sha256=d1KmgGbpEA3KHmX7l-luO2NBwkPvv7tSFPf-KMbN2LE,1499
2
+ airflow/providers/databricks/__init__.py,sha256=VqfLw47pSDD_4wxEIV-X0N2IYBRr2gjQ6HUgapVYbuA,1499
3
3
  airflow/providers/databricks/exceptions.py,sha256=85RklmLOI_PnTzfXNIUd5fAu2aMMUhelwumQAX0wANE,1261
4
4
  airflow/providers/databricks/get_provider_info.py,sha256=NZ-rY6k6ctDZN7rDngN7mAzq7RMhLag5NwfnuBNcKuw,5644
5
- airflow/providers/databricks/version_compat.py,sha256=j5PCtXvZ71aBjixu-EFTNtVDPsngzzs7os0ZQDgFVDk,1536
5
+ airflow/providers/databricks/version_compat.py,sha256=7RHBehpYMeNSBtmJiPUeJHA0c7l-Eqsdy546kW3RFa4,1712
6
6
  airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
7
- airflow/providers/databricks/hooks/databricks.py,sha256=FIoiKWIc9AP3s8Av3Av9yleTg1kI0norwW5CAc6jTQc,28867
8
- airflow/providers/databricks/hooks/databricks_base.py,sha256=D7-_74QgQaZm1NfHKl_UOXbVAXRo2xjnOx_r1MI-rWI,34871
9
- airflow/providers/databricks/hooks/databricks_sql.py,sha256=xTdi0JN-ZdsGe2XnCa8yBi-AINZUlyIVlP-5nb2d2T0,16964
7
+ airflow/providers/databricks/hooks/databricks.py,sha256=p_oAxWRohaVvhtlmQ3C67JUsi1fRQRW84QDr7uQ7rxk,28965
8
+ airflow/providers/databricks/hooks/databricks_base.py,sha256=gish0H2rHEzPqI5ZpU3BPFCUaycHMEYGYev0ufJMzzI,35167
9
+ airflow/providers/databricks/hooks/databricks_sql.py,sha256=r6LoYSk70DfzJ1kbNaJpM-oTYJm5mCSP600iC8pIY-E,16905
10
10
  airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
11
- airflow/providers/databricks/operators/databricks.py,sha256=yDy_pBaAi_muP3NstpXOqBNxSP9WL0_X3fX2OmR1f3c,79235
12
- airflow/providers/databricks/operators/databricks_repos.py,sha256=m_72OnnU9df7UB-8SK2Tp5VjfNyjYeAnil3dCKs9SbA,13282
13
- airflow/providers/databricks/operators/databricks_sql.py,sha256=yrYZa9Hq8JDc-8F5DGfW2mkcaNwu4o09JZj_SQQnsrE,21807
14
- airflow/providers/databricks/operators/databricks_workflow.py,sha256=9WNQR9COa90fbqb9qSzut34K9Z1S_ZdpNHAfIcuH454,14227
11
+ airflow/providers/databricks/operators/databricks.py,sha256=6rZiBdnbrs04EK-faP5DxNAHSHL3Is_q23da6N2fY7w,80047
12
+ airflow/providers/databricks/operators/databricks_repos.py,sha256=NLigItgvQOpxhDhttkU2Jhrcu1gODXQME2i5f8w7gYk,13311
13
+ airflow/providers/databricks/operators/databricks_sql.py,sha256=QmFUM83jY0pvnG4K-iM7Kuc4H48ORIx2jgGoOdAtEJw,21836
14
+ airflow/providers/databricks/operators/databricks_workflow.py,sha256=Gwrtf_EYrkYEMFIwzuA2h9IFyQyk_q4mC1cdJrf8U8Q,14994
15
15
  airflow/providers/databricks/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
16
- airflow/providers/databricks/plugins/databricks_workflow.py,sha256=1UpsodBLRrTah9zBGBzfM7n1pdkzTo7yilt6QxASspQ,17460
16
+ airflow/providers/databricks/plugins/databricks_workflow.py,sha256=iJGrG_uhFFhuGXecBFLWxLhm4zdAj-IzPsjA3EL-cpQ,20110
17
17
  airflow/providers/databricks/sensors/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
18
- airflow/providers/databricks/sensors/databricks.py,sha256=zG2rS7xemVUk-ztDSj0t90Ws47kqRgPm3NsBMQQR8bA,6389
19
- airflow/providers/databricks/sensors/databricks_partition.py,sha256=2zWdnqVaSSd7PFTZadfvtbsR7zOI4GwfZFOuEnXRLSM,10023
20
- airflow/providers/databricks/sensors/databricks_sql.py,sha256=jIA9oGBUCAlXzyrqigxlg7JQDsBFuNIF8ZUEJM8gPxg,5766
18
+ airflow/providers/databricks/sensors/databricks.py,sha256=AVSqvHDr7iDXL1WZ46MTN3KUnVSIOc_g5JEViA1MeVE,6428
19
+ airflow/providers/databricks/sensors/databricks_partition.py,sha256=1PZo-rdRo6E7yBa30ISFjgQ-iaFdqPYm0gnN5tXgxCU,10205
20
+ airflow/providers/databricks/sensors/databricks_sql.py,sha256=cbPKia5eH2no_sl-LltjBA-1qM64lurmB8lT9QR9eGk,5948
21
21
  airflow/providers/databricks/triggers/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
22
22
  airflow/providers/databricks/triggers/databricks.py,sha256=dSogx6GlcJfZ4CFhtlMeWs9sYFEYthP82S_U8-tM2Tk,9240
23
23
  airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
24
24
  airflow/providers/databricks/utils/databricks.py,sha256=s0qEr_DsFhKW4uUiq2VQbtqcj52isYIplPZsUcxGPrI,2862
25
25
  airflow/providers/databricks/utils/mixins.py,sha256=WUmkt3AmXalmV6zOUIJZWbTldxYunAZOstddDhKCC94,7407
26
- airflow/providers/databricks/utils/openlineage.py,sha256=7fR3CPcOruHapsz1DOZ38QN3ZcAGDADNHPY28CzYCbg,13194
27
- apache_airflow_providers_databricks-7.5.0.dist-info/entry_points.txt,sha256=hjmZm3ab2cteTR4t9eE28oKixHwNIKtLCThd6sx3XRQ,227
28
- apache_airflow_providers_databricks-7.5.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
29
- apache_airflow_providers_databricks-7.5.0.dist-info/METADATA,sha256=3Of4M9LHN0OsBur6wL1Nr3olZEGS_zAMIvrYOW6Rjaw,6760
30
- apache_airflow_providers_databricks-7.5.0.dist-info/RECORD,,
26
+ airflow/providers/databricks/utils/openlineage.py,sha256=DVgmT4I5-mhwMwo6j_qEvF4WUP35ZmZFwc1YqL-pMMA,15230
27
+ apache_airflow_providers_databricks-7.6.0.dist-info/entry_points.txt,sha256=hjmZm3ab2cteTR4t9eE28oKixHwNIKtLCThd6sx3XRQ,227
28
+ apache_airflow_providers_databricks-7.6.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
29
+ apache_airflow_providers_databricks-7.6.0.dist-info/METADATA,sha256=ThoHqv3qHIIStqI9vGH84F2Er1n3GPTmD_SxMvqi_bM,6939
30
+ apache_airflow_providers_databricks-7.6.0.dist-info/RECORD,,