apache-airflow-providers-databricks 6.9.0__tar.gz → 6.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-databricks might be problematic. Click here for more details.

Files changed (24) hide show
  1. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/PKG-INFO +6 -6
  2. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/README.rst +3 -3
  3. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/__init__.py +1 -1
  4. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/get_provider_info.py +2 -1
  5. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/hooks/databricks.py +10 -0
  6. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/operators/databricks.py +64 -3
  7. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/plugins/databricks_workflow.py +0 -12
  8. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/sensors/databricks_partition.py +1 -13
  9. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/sensors/databricks_sql.py +1 -4
  10. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/pyproject.toml +3 -3
  11. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/LICENSE +0 -0
  12. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/hooks/__init__.py +0 -0
  13. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/hooks/databricks_base.py +0 -0
  14. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/hooks/databricks_sql.py +0 -0
  15. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/operators/__init__.py +0 -0
  16. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/operators/databricks_repos.py +0 -0
  17. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/operators/databricks_sql.py +0 -0
  18. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/operators/databricks_workflow.py +0 -0
  19. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/plugins/__init__.py +0 -0
  20. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/sensors/__init__.py +0 -0
  21. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/triggers/__init__.py +0 -0
  22. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/triggers/databricks.py +0 -0
  23. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/utils/__init__.py +0 -0
  24. {apache_airflow_providers_databricks-6.9.0 → apache_airflow_providers_databricks-6.10.0}/airflow/providers/databricks/utils/databricks.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-databricks
3
- Version: 6.9.0
3
+ Version: 6.10.0
4
4
  Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
5
5
  Keywords: airflow-provider,databricks,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -34,8 +34,8 @@ Requires-Dist: azure-identity>=1.3.1 ; extra == "azure-identity"
34
34
  Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
35
35
  Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
36
36
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
37
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.9.0/changelog.html
38
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.9.0
37
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.10.0/changelog.html
38
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.10.0
39
39
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
40
40
  Project-URL: Source Code, https://github.com/apache/airflow
41
41
  Project-URL: Twitter, https://twitter.com/ApacheAirflow
@@ -88,7 +88,7 @@ Provides-Extra: sdk
88
88
 
89
89
  Package ``apache-airflow-providers-databricks``
90
90
 
91
- Release: ``6.9.0``
91
+ Release: ``6.10.0``
92
92
 
93
93
 
94
94
  `Databricks <https://databricks.com/>`__
@@ -101,7 +101,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
101
101
  are in ``airflow.providers.databricks`` python package.
102
102
 
103
103
  You can find package information and changelog for the provider
104
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.9.0/>`_.
104
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.10.0/>`_.
105
105
 
106
106
  Installation
107
107
  ------------
@@ -149,4 +149,4 @@ Dependent package
149
149
  ============================================================================================================ ==============
150
150
 
151
151
  The changelog for the provider package can be found in the
152
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.9.0/changelog.html>`_.
152
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.10.0/changelog.html>`_.
@@ -42,7 +42,7 @@
42
42
 
43
43
  Package ``apache-airflow-providers-databricks``
44
44
 
45
- Release: ``6.9.0``
45
+ Release: ``6.10.0``
46
46
 
47
47
 
48
48
  `Databricks <https://databricks.com/>`__
@@ -55,7 +55,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
55
55
  are in ``airflow.providers.databricks`` python package.
56
56
 
57
57
  You can find package information and changelog for the provider
58
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.9.0/>`_.
58
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.10.0/>`_.
59
59
 
60
60
  Installation
61
61
  ------------
@@ -103,4 +103,4 @@ Dependent package
103
103
  ============================================================================================================ ==============
104
104
 
105
105
  The changelog for the provider package can be found in the
106
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.9.0/changelog.html>`_.
106
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.10.0/changelog.html>`_.
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "6.9.0"
32
+ __version__ = "6.10.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
35
  "2.8.0"
@@ -28,8 +28,9 @@ def get_provider_info():
28
28
  "name": "Databricks",
29
29
  "description": "`Databricks <https://databricks.com/>`__\n",
30
30
  "state": "ready",
31
- "source-date-epoch": 1723970061,
31
+ "source-date-epoch": 1726860611,
32
32
  "versions": [
33
+ "6.10.0",
33
34
  "6.9.0",
34
35
  "6.8.0",
35
36
  "6.7.0",
@@ -44,6 +44,7 @@ TERMINATE_CLUSTER_ENDPOINT = ("POST", "api/2.0/clusters/delete")
44
44
 
45
45
  CREATE_ENDPOINT = ("POST", "api/2.1/jobs/create")
46
46
  RESET_ENDPOINT = ("POST", "api/2.1/jobs/reset")
47
+ UPDATE_ENDPOINT = ("POST", "api/2.1/jobs/update")
47
48
  RUN_NOW_ENDPOINT = ("POST", "api/2.1/jobs/run-now")
48
49
  SUBMIT_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/submit")
49
50
  GET_RUN_ENDPOINT = ("GET", "api/2.1/jobs/runs/get")
@@ -233,6 +234,15 @@ class DatabricksHook(BaseDatabricksHook):
233
234
  """
234
235
  self._do_api_call(RESET_ENDPOINT, {"job_id": job_id, "new_settings": json})
235
236
 
237
+ def update_job(self, job_id: str, json: dict) -> None:
238
+ """
239
+ Call the ``api/2.1/jobs/update`` endpoint.
240
+
241
+ :param job_id: The id of the job to update.
242
+ :param json: The data used in the new_settings of the request to the ``update`` endpoint.
243
+ """
244
+ self._do_api_call(UPDATE_ENDPOINT, {"job_id": job_id, "new_settings": json})
245
+
236
246
  def run_now(self, json: dict) -> int:
237
247
  """
238
248
  Call the ``api/2.1/jobs/run-now`` endpoint.
@@ -100,13 +100,23 @@ def _handle_databricks_operator_execution(operator, hook, log, context) -> None:
100
100
  f"and with the error {run_state.state_message}"
101
101
  )
102
102
 
103
- if isinstance(operator, DatabricksRunNowOperator) and operator.repair_run:
103
+ should_repair = (
104
+ isinstance(operator, DatabricksRunNowOperator)
105
+ and operator.repair_run
106
+ and (
107
+ not operator.databricks_repair_reason_new_settings
108
+ or is_repair_reason_match_exist(operator, run_state)
109
+ )
110
+ )
111
+
112
+ if should_repair:
104
113
  operator.repair_run = False
105
114
  log.warning(
106
115
  "%s but since repair run is set, repairing the run with all failed tasks",
107
116
  error_message,
108
117
  )
109
-
118
+ job_id = operator.json["job_id"]
119
+ update_job_for_repair(operator, hook, job_id, run_state)
110
120
  latest_repair_id = hook.get_latest_repair_id(operator.run_id)
111
121
  repair_json = {"run_id": operator.run_id, "rerun_all_failed_tasks": True}
112
122
  if latest_repair_id is not None:
@@ -123,6 +133,41 @@ def _handle_databricks_operator_execution(operator, hook, log, context) -> None:
123
133
  log.info("View run status, Spark UI, and logs at %s", run_page_url)
124
134
 
125
135
 
136
+ def is_repair_reason_match_exist(operator: Any, run_state: RunState) -> bool:
137
+ """
138
+ Check if the repair reason matches the run state message.
139
+
140
+ :param operator: Databricks operator being handled
141
+ :param run_state: Run state of the Databricks job
142
+ :return: True if repair reason matches the run state message, False otherwise
143
+ """
144
+ return any(reason in run_state.state_message for reason in operator.databricks_repair_reason_new_settings)
145
+
146
+
147
+ def update_job_for_repair(operator: Any, hook: Any, job_id: int, run_state: RunState) -> None:
148
+ """
149
+ Update job settings(partial) to repair the run with all failed tasks.
150
+
151
+ :param operator: Databricks operator being handled
152
+ :param hook: Databricks hook
153
+ :param job_id: Job ID of Databricks
154
+ :param run_state: Run state of the Databricks job
155
+ """
156
+ repair_reason = next(
157
+ (
158
+ reason
159
+ for reason in operator.databricks_repair_reason_new_settings
160
+ if reason in run_state.state_message
161
+ ),
162
+ None,
163
+ )
164
+ if repair_reason is not None:
165
+ new_settings_json = normalise_json_content(
166
+ operator.databricks_repair_reason_new_settings[repair_reason]
167
+ )
168
+ hook.update_job(job_id=job_id, json=new_settings_json)
169
+
170
+
126
171
  def _handle_deferrable_databricks_operator_execution(operator, hook, log, context) -> None:
127
172
  """
128
173
  Handle the Airflow + Databricks lifecycle logic for deferrable Databricks operators.
@@ -674,6 +719,7 @@ class DatabricksRunNowOperator(BaseOperator):
674
719
  - ``spark_submit_params``
675
720
  - ``idempotency_token``
676
721
  - ``repair_run``
722
+ - ``databricks_repair_reason_new_settings``
677
723
  - ``cancel_previous_runs``
678
724
 
679
725
  :param job_id: the job_id of the existing Databricks job.
@@ -764,6 +810,12 @@ class DatabricksRunNowOperator(BaseOperator):
764
810
  :param wait_for_termination: if we should wait for termination of the job run. ``True`` by default.
765
811
  :param deferrable: Run operator in the deferrable mode.
766
812
  :param repair_run: Repair the databricks run in case of failure.
813
+ :param databricks_repair_reason_new_settings: A dict of reason and new_settings JSON object for which
814
+ to repair the run. `None` by default. `None` means to repair at all cases with existing job
815
+ settings otherwise check whether `RunState` state_message contains reason and
816
+ update job settings as per new_settings using databricks partial job update endpoint
817
+ (https://docs.databricks.com/api/workspace/jobs/update). If nothing is matched, then repair
818
+ will not get triggered.
767
819
  :param cancel_previous_runs: Cancel all existing running jobs before submitting new one.
768
820
  """
769
821
 
@@ -796,6 +848,7 @@ class DatabricksRunNowOperator(BaseOperator):
796
848
  wait_for_termination: bool = True,
797
849
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
798
850
  repair_run: bool = False,
851
+ databricks_repair_reason_new_settings: dict[str, Any] | None = None,
799
852
  cancel_previous_runs: bool = False,
800
853
  **kwargs,
801
854
  ) -> None:
@@ -810,6 +863,7 @@ class DatabricksRunNowOperator(BaseOperator):
810
863
  self.wait_for_termination = wait_for_termination
811
864
  self.deferrable = deferrable
812
865
  self.repair_run = repair_run
866
+ self.databricks_repair_reason_new_settings = databricks_repair_reason_new_settings or {}
813
867
  self.cancel_previous_runs = cancel_previous_runs
814
868
 
815
869
  if job_id is not None:
@@ -870,9 +924,16 @@ class DatabricksRunNowOperator(BaseOperator):
870
924
  def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
871
925
  if event:
872
926
  _handle_deferrable_databricks_operator_completion(event, self.log)
873
- if event["repair_run"]:
927
+ run_state = RunState.from_json(event["run_state"])
928
+ should_repair = event["repair_run"] and (
929
+ not self.databricks_repair_reason_new_settings
930
+ or is_repair_reason_match_exist(self, run_state)
931
+ )
932
+ if should_repair:
874
933
  self.repair_run = False
875
934
  self.run_id = event["run_id"]
935
+ job_id = self._hook.get_job_id(self.run_id)
936
+ update_job_for_repair(self, self._hook, job_id, run_state)
876
937
  latest_repair_id = self._hook.get_latest_repair_id(self.run_id)
877
938
  repair_json = {"run_id": self.run_id, "rerun_all_failed_tasks": True}
878
939
  if latest_repair_id is not None:
@@ -24,7 +24,6 @@ from urllib.parse import unquote
24
24
 
25
25
  from flask import current_app, flash, redirect, request, url_for
26
26
  from flask_appbuilder.api import expose
27
- from packaging.version import Version
28
27
 
29
28
  from airflow.exceptions import AirflowException, TaskInstanceNotFound
30
29
  from airflow.models import BaseOperator, BaseOperatorLink
@@ -34,13 +33,11 @@ from airflow.models.taskinstance import TaskInstance, TaskInstanceKey
34
33
  from airflow.models.xcom import XCom
35
34
  from airflow.plugins_manager import AirflowPlugin
36
35
  from airflow.providers.databricks.hooks.databricks import DatabricksHook
37
- from airflow.security import permissions
38
36
  from airflow.utils.airflow_flask_app import AirflowApp
39
37
  from airflow.utils.log.logging_mixin import LoggingMixin
40
38
  from airflow.utils.session import NEW_SESSION, provide_session
41
39
  from airflow.utils.state import TaskInstanceState
42
40
  from airflow.utils.task_group import TaskGroup
43
- from airflow.version import version
44
41
  from airflow.www import auth
45
42
  from airflow.www.views import AirflowBaseView
46
43
 
@@ -55,15 +52,6 @@ airflow_app = cast(AirflowApp, current_app)
55
52
 
56
53
 
57
54
  def get_auth_decorator():
58
- # TODO: remove this if block when min_airflow_version is set to higher than 2.8.0
59
- if Version(version) < Version("2.8"):
60
- return auth.has_access(
61
- [
62
- (permissions.ACTION_CAN_EDIT, permissions.RESOURCE_DAG),
63
- (permissions.ACTION_CAN_CREATE, permissions.RESOURCE_DAG_RUN),
64
- ]
65
- )
66
-
67
55
  from airflow.auth.managers.models.resource_details import DagAccessEntity
68
56
 
69
57
  return auth.has_access_dag("POST", DagAccessEntity.RUN)
@@ -26,7 +26,7 @@ from typing import TYPE_CHECKING, Any, Callable, Sequence
26
26
 
27
27
  from databricks.sql.utils import ParamEscaper
28
28
 
29
- from airflow.exceptions import AirflowException, AirflowSkipException
29
+ from airflow.exceptions import AirflowException
30
30
  from airflow.providers.common.sql.hooks.sql import fetch_all_handler
31
31
  from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
32
32
  from airflow.sensors.base import BaseSensorOperator
@@ -182,10 +182,7 @@ class DatabricksPartitionSensor(BaseSensorOperator):
182
182
  partition_columns = self._sql_sensor(f"DESCRIBE DETAIL {table_name}")[0][7]
183
183
  self.log.debug("Partition columns: %s", partition_columns)
184
184
  if len(partition_columns) < 1:
185
- # TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
186
185
  message = f"Table {table_name} does not have partitions"
187
- if self.soft_fail:
188
- raise AirflowSkipException(message)
189
186
  raise AirflowException(message)
190
187
 
191
188
  formatted_opts = ""
@@ -207,17 +204,11 @@ class DatabricksPartitionSensor(BaseSensorOperator):
207
204
  f"""{partition_col}{self.partition_operator}{self.escaper.escape_item(partition_value)}"""
208
205
  )
209
206
  else:
210
- # TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
211
207
  message = f"Column {partition_col} not part of table partitions: {partition_columns}"
212
- if self.soft_fail:
213
- raise AirflowSkipException(message)
214
208
  raise AirflowException(message)
215
209
  else:
216
210
  # Raises exception if the table does not have any partitions.
217
- # TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
218
211
  message = "No partitions specified to check with the sensor."
219
- if self.soft_fail:
220
- raise AirflowSkipException(message)
221
212
  raise AirflowException(message)
222
213
  formatted_opts = f"{prefix} {joiner_val.join(output_list)} {suffix}"
223
214
  self.log.debug("Formatted options: %s", formatted_opts)
@@ -231,8 +222,5 @@ class DatabricksPartitionSensor(BaseSensorOperator):
231
222
  if partition_result:
232
223
  return True
233
224
  else:
234
- # TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
235
225
  message = f"Specified partition(s): {self.partitions} were not found."
236
- if self.soft_fail:
237
- raise AirflowSkipException(message)
238
226
  raise AirflowException(message)
@@ -23,7 +23,7 @@ from __future__ import annotations
23
23
  from functools import cached_property
24
24
  from typing import TYPE_CHECKING, Any, Callable, Iterable, Sequence
25
25
 
26
- from airflow.exceptions import AirflowException, AirflowSkipException
26
+ from airflow.exceptions import AirflowException
27
27
  from airflow.providers.common.sql.hooks.sql import fetch_all_handler
28
28
  from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
29
29
  from airflow.sensors.base import BaseSensorOperator
@@ -117,13 +117,10 @@ class DatabricksSqlSensor(BaseSensorOperator):
117
117
  def _get_results(self) -> bool:
118
118
  """Use the Databricks SQL hook and run the specified SQL query."""
119
119
  if not (self._http_path or self._sql_warehouse_name):
120
- # TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
121
120
  message = (
122
121
  "Databricks SQL warehouse/cluster configuration missing. Please specify either"
123
122
  " http_path or sql_warehouse_name."
124
123
  )
125
- if self.soft_fail:
126
- raise AirflowSkipException(message)
127
124
  raise AirflowException(message)
128
125
  hook = self.hook
129
126
  sql_result = hook.run(
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
28
28
 
29
29
  [project]
30
30
  name = "apache-airflow-providers-databricks"
31
- version = "6.9.0"
31
+ version = "6.10.0"
32
32
  description = "Provider package apache-airflow-providers-databricks for Apache Airflow"
33
33
  readme = "README.rst"
34
34
  authors = [
@@ -68,8 +68,8 @@ dependencies = [
68
68
  ]
69
69
 
70
70
  [project.urls]
71
- "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.9.0"
72
- "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.9.0/changelog.html"
71
+ "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.10.0"
72
+ "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.10.0/changelog.html"
73
73
  "Bug Tracker" = "https://github.com/apache/airflow/issues"
74
74
  "Source Code" = "https://github.com/apache/airflow"
75
75
  "Slack Chat" = "https://s.apache.org/airflow-slack"