apache-airflow-providers-databricks 6.4.0rc1__py3-none-any.whl → 6.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-databricks might be problematic. Click here for more details.

@@ -25,14 +25,11 @@ from __future__ import annotations
25
25
 
26
26
  import packaging.version
27
27
 
28
- __all__ = ["__version__"]
28
+ from airflow import __version__ as airflow_version
29
29
 
30
- __version__ = "6.4.0"
30
+ __all__ = ["__version__"]
31
31
 
32
- try:
33
- from airflow import __version__ as airflow_version
34
- except ImportError:
35
- from airflow.version import version as airflow_version
32
+ __version__ = "6.5.0"
36
33
 
37
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
38
35
  "2.7.0"
@@ -28,8 +28,9 @@ def get_provider_info():
28
28
  "name": "Databricks",
29
29
  "description": "`Databricks <https://databricks.com/>`__\n",
30
30
  "state": "ready",
31
- "source-date-epoch": 1714476154,
31
+ "source-date-epoch": 1716287262,
32
32
  "versions": [
33
+ "6.5.0",
33
34
  "6.4.0",
34
35
  "6.3.0",
35
36
  "6.2.0",
@@ -80,6 +80,7 @@ class BaseDatabricksHook(BaseHook):
80
80
  :param retry_delay: The number of seconds to wait between retries (it
81
81
  might be a floating point number).
82
82
  :param retry_args: An optional dictionary with arguments passed to ``tenacity.Retrying`` class.
83
+ :param caller: The name of the operator that is calling the hook.
83
84
  """
84
85
 
85
86
  conn_name_attr: str = "databricks_conn_id"
@@ -67,23 +67,22 @@ def _handle_databricks_operator_execution(operator, hook, log, context) -> None:
67
67
  log.info("%s completed successfully.", operator.task_id)
68
68
  log.info("View run status, Spark UI, and logs at %s", run_page_url)
69
69
  return
70
-
71
70
  if run_state.result_state == "FAILED":
72
- task_run_id = None
71
+ failed_tasks = []
73
72
  for task in run_info.get("tasks", []):
74
73
  if task.get("state", {}).get("result_state", "") == "FAILED":
75
74
  task_run_id = task["run_id"]
76
- if task_run_id is not None:
77
- run_output = hook.get_run_output(task_run_id)
78
- if "error" in run_output:
79
- notebook_error = run_output["error"]
80
- else:
81
- notebook_error = run_state.state_message
82
- else:
83
- notebook_error = run_state.state_message
75
+ task_key = task["task_key"]
76
+ run_output = hook.get_run_output(task_run_id)
77
+ if "error" in run_output:
78
+ error = run_output["error"]
79
+ else:
80
+ error = run_state.state_message
81
+ failed_tasks.append({"task_key": task_key, "run_id": task_run_id, "error": error})
82
+
84
83
  error_message = (
85
84
  f"{operator.task_id} failed with terminal state: {run_state} "
86
- f"and with the error {notebook_error}"
85
+ f"and with the errors {failed_tasks}"
87
86
  )
88
87
  else:
89
88
  error_message = (
@@ -168,7 +167,7 @@ def _handle_deferrable_databricks_operator_completion(event: dict, log: Logger)
168
167
 
169
168
  error_message = f"Job run failed with terminal state: {run_state} and with the errors {errors}"
170
169
 
171
- if event["repair_run"]:
170
+ if event.get("repair_run"):
172
171
  log.warning(
173
172
  "%s but since repair run is set, repairing the run with all failed tasks",
174
173
  error_message,
@@ -924,9 +923,11 @@ class DatabricksNotebookOperator(BaseOperator):
924
923
  :param databricks_retry_args: An optional dictionary with arguments passed to ``tenacity.Retrying`` class.
925
924
  :param wait_for_termination: if we should wait for termination of the job run. ``True`` by default.
926
925
  :param databricks_conn_id: The name of the Airflow connection to use.
926
+ :param deferrable: Run operator in the deferrable mode.
927
927
  """
928
928
 
929
929
  template_fields = ("notebook_params",)
930
+ CALLER = "DatabricksNotebookOperator"
930
931
 
931
932
  def __init__(
932
933
  self,
@@ -943,6 +944,7 @@ class DatabricksNotebookOperator(BaseOperator):
943
944
  databricks_retry_args: dict[Any, Any] | None = None,
944
945
  wait_for_termination: bool = True,
945
946
  databricks_conn_id: str = "databricks_default",
947
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
946
948
  **kwargs: Any,
947
949
  ):
948
950
  self.notebook_path = notebook_path
@@ -959,11 +961,12 @@ class DatabricksNotebookOperator(BaseOperator):
959
961
  self.wait_for_termination = wait_for_termination
960
962
  self.databricks_conn_id = databricks_conn_id
961
963
  self.databricks_run_id: int | None = None
964
+ self.deferrable = deferrable
962
965
  super().__init__(**kwargs)
963
966
 
964
967
  @cached_property
965
968
  def _hook(self) -> DatabricksHook:
966
- return self._get_hook(caller="DatabricksNotebookOperator")
969
+ return self._get_hook(caller=self.CALLER)
967
970
 
968
971
  def _get_hook(self, caller: str) -> DatabricksHook:
969
972
  return DatabricksHook(
@@ -971,7 +974,7 @@ class DatabricksNotebookOperator(BaseOperator):
971
974
  retry_limit=self.databricks_retry_limit,
972
975
  retry_delay=self.databricks_retry_delay,
973
976
  retry_args=self.databricks_retry_args,
974
- caller=caller,
977
+ caller=self.CALLER,
975
978
  )
976
979
 
977
980
  def _get_task_timeout_seconds(self) -> int:
@@ -1042,6 +1045,19 @@ class DatabricksNotebookOperator(BaseOperator):
1042
1045
  run = self._hook.get_run(self.databricks_run_id)
1043
1046
  run_state = RunState(**run["state"])
1044
1047
  self.log.info("Current state of the job: %s", run_state.life_cycle_state)
1048
+ if self.deferrable and not run_state.is_terminal:
1049
+ self.defer(
1050
+ trigger=DatabricksExecutionTrigger(
1051
+ run_id=self.databricks_run_id,
1052
+ databricks_conn_id=self.databricks_conn_id,
1053
+ polling_period_seconds=self.polling_period_seconds,
1054
+ retry_limit=self.databricks_retry_limit,
1055
+ retry_delay=self.databricks_retry_delay,
1056
+ retry_args=self.databricks_retry_args,
1057
+ caller=self.CALLER,
1058
+ ),
1059
+ method_name=DEFER_METHOD_NAME,
1060
+ )
1045
1061
  while not run_state.is_terminal:
1046
1062
  time.sleep(self.polling_period_seconds)
1047
1063
  run = self._hook.get_run(self.databricks_run_id)
@@ -1057,9 +1073,7 @@ class DatabricksNotebookOperator(BaseOperator):
1057
1073
  )
1058
1074
  if not run_state.is_successful:
1059
1075
  raise AirflowException(
1060
- "Task failed. Final state %s. Reason: %s",
1061
- run_state.result_state,
1062
- run_state.state_message,
1076
+ f"Task failed. Final state {run_state.result_state}. Reason: {run_state.state_message}"
1063
1077
  )
1064
1078
  self.log.info("Task succeeded. Final state %s.", run_state.result_state)
1065
1079
 
@@ -1067,3 +1081,16 @@ class DatabricksNotebookOperator(BaseOperator):
1067
1081
  self.launch_notebook_job()
1068
1082
  if self.wait_for_termination:
1069
1083
  self.monitor_databricks_job()
1084
+
1085
+ def execute_complete(self, context: dict | None, event: dict) -> None:
1086
+ run_state = RunState.from_json(event["run_state"])
1087
+ if run_state.life_cycle_state != "TERMINATED":
1088
+ raise AirflowException(
1089
+ f"Databricks job failed with state {run_state.life_cycle_state}. "
1090
+ f"Message: {run_state.state_message}"
1091
+ )
1092
+ if not run_state.is_successful:
1093
+ raise AirflowException(
1094
+ f"Task failed. Final state {run_state.result_state}. Reason: {run_state.state_message}"
1095
+ )
1096
+ self.log.info("Task succeeded. Final state %s.", run_state.result_state)
@@ -48,6 +48,7 @@ class DatabricksExecutionTrigger(BaseTrigger):
48
48
  retry_args: dict[Any, Any] | None = None,
49
49
  run_page_url: str | None = None,
50
50
  repair_run: bool = False,
51
+ caller: str = "DatabricksExecutionTrigger",
51
52
  ) -> None:
52
53
  super().__init__()
53
54
  self.run_id = run_id
@@ -63,6 +64,7 @@ class DatabricksExecutionTrigger(BaseTrigger):
63
64
  retry_limit=self.retry_limit,
64
65
  retry_delay=self.retry_delay,
65
66
  retry_args=retry_args,
67
+ caller=caller,
66
68
  )
67
69
 
68
70
  def serialize(self) -> tuple[str, dict[str, Any]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-databricks
3
- Version: 6.4.0rc1
3
+ Version: 6.5.0
4
4
  Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
5
5
  Keywords: airflow-provider,databricks,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -22,15 +22,15 @@ Classifier: Programming Language :: Python :: 3.11
22
22
  Classifier: Programming Language :: Python :: 3.12
23
23
  Classifier: Topic :: System :: Monitoring
24
24
  Requires-Dist: aiohttp>=3.9.2, <4
25
- Requires-Dist: apache-airflow-providers-common-sql>=1.10.0rc0
26
- Requires-Dist: apache-airflow>=2.7.0rc0
25
+ Requires-Dist: apache-airflow-providers-common-sql>=1.10.0
26
+ Requires-Dist: apache-airflow>=2.7.0
27
27
  Requires-Dist: databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0
28
28
  Requires-Dist: requests>=2.27.0,<3
29
29
  Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
30
30
  Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
31
31
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
32
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.4.0/changelog.html
33
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.4.0
32
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.5.0/changelog.html
33
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.5.0
34
34
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
35
35
  Project-URL: Source Code, https://github.com/apache/airflow
36
36
  Project-URL: Twitter, https://twitter.com/ApacheAirflow
@@ -82,7 +82,7 @@ Provides-Extra: sdk
82
82
 
83
83
  Package ``apache-airflow-providers-databricks``
84
84
 
85
- Release: ``6.4.0.rc1``
85
+ Release: ``6.5.0``
86
86
 
87
87
 
88
88
  `Databricks <https://databricks.com/>`__
@@ -95,7 +95,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
95
95
  are in ``airflow.providers.databricks`` python package.
96
96
 
97
97
  You can find package information and changelog for the provider
98
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.4.0/>`_.
98
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.5.0/>`_.
99
99
 
100
100
  Installation
101
101
  ------------
@@ -139,4 +139,4 @@ Dependent package
139
139
  ============================================================================================================ ==============
140
140
 
141
141
  The changelog for the provider package can be found in the
142
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.4.0/changelog.html>`_.
142
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.5.0/changelog.html>`_.
@@ -1,22 +1,22 @@
1
1
  airflow/providers/databricks/LICENSE,sha256=ywUBpKZc7Jb96rVt5I3IDbg7dIJAbUSHkuoDcF3jbH4,13569
2
- airflow/providers/databricks/__init__.py,sha256=ZJ0GA08oUAP30uPjov_SNDpKfoOR5bQ017KXjsPXIqk,1585
3
- airflow/providers/databricks/get_provider_info.py,sha256=Jxx0xdxa7i4-Gxug9kbVNN0sZjPuNFDVP1Ewux3aLfQ,6330
2
+ airflow/providers/databricks/__init__.py,sha256=JsYXdg4453ofY8-fk8U0XwsqkJDG10zVtOPYdrpl9-Y,1497
3
+ airflow/providers/databricks/get_provider_info.py,sha256=CZqPGkTq9yxBbzG5iAGX3Y21HKDIRhHn5KnJeKfizY8,6351
4
4
  airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
5
5
  airflow/providers/databricks/hooks/databricks.py,sha256=Pq42FzDGCZ2B0AaZw1zOD74uuXwJWHvwh2YkcQ8G-vk,23715
6
- airflow/providers/databricks/hooks/databricks_base.py,sha256=uMHbJSL8rdAgb2PKyQnj7F__Fji5lSJgszJrVsUowig,30592
6
+ airflow/providers/databricks/hooks/databricks_base.py,sha256=XtLF6qK5QSMwVkhorniHZbos_GA4XHIxI8urBs8zdOQ,30662
7
7
  airflow/providers/databricks/hooks/databricks_sql.py,sha256=8LxnnVEKfk9nIJam3LR8E8uiOqNauMP0WgFa9eKX3Go,12564
8
8
  airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
9
- airflow/providers/databricks/operators/databricks.py,sha256=WW_QJ14qzDmOWOANSZ8mgdgGBylKOiflB7tyszFqd-w,51327
9
+ airflow/providers/databricks/operators/databricks.py,sha256=OCWUA_UzR8LyEMpbfcPejAgHD8rtDD9ILAD4GggWD9E,52819
10
10
  airflow/providers/databricks/operators/databricks_repos.py,sha256=I1z2ppGfM_oPxR8BM6Nk1i6JuUOWB40dLbRNEZUvccA,13093
11
11
  airflow/providers/databricks/operators/databricks_sql.py,sha256=ABqM3aEeZELcVly2qUc4vKR1qmSW32yWYrS6seRwHi0,16800
12
12
  airflow/providers/databricks/sensors/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
13
13
  airflow/providers/databricks/sensors/databricks_partition.py,sha256=TRZVjGEYzIbN4nZDPJEVavXdcegpyj0BVKXINMRFlCM,10605
14
14
  airflow/providers/databricks/sensors/databricks_sql.py,sha256=8qSfbzpWgU2_oZU9pS9SA_MSYhyIHgYZjTYfDkDH84Y,5771
15
15
  airflow/providers/databricks/triggers/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
16
- airflow/providers/databricks/triggers/databricks.py,sha256=LEj3YMr_yIS3z5RP0TzZ0XlRBE1ognPzcbYaiEJT8G8,5025
16
+ airflow/providers/databricks/triggers/databricks.py,sha256=xk9aEfdZnG33a4WSFfg6SZF4FfROV8B4HOyBYBvZR_Q,5104
17
17
  airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
18
18
  airflow/providers/databricks/utils/databricks.py,sha256=EICTPZTD0R0dy9UGKgv8srkrBTgzCQrcYNL9oBWuhzk,2890
19
- apache_airflow_providers_databricks-6.4.0rc1.dist-info/entry_points.txt,sha256=1WxGXTFDb107eV5Zmrt3p12J4LHYk56-ZKlvpOK7vg4,106
20
- apache_airflow_providers_databricks-6.4.0rc1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
21
- apache_airflow_providers_databricks-6.4.0rc1.dist-info/METADATA,sha256=2tX3_hVSvXOED2O3s42IBN_7saKuQjPCZ6kVIwVA-PE,6500
22
- apache_airflow_providers_databricks-6.4.0rc1.dist-info/RECORD,,
19
+ apache_airflow_providers_databricks-6.5.0.dist-info/entry_points.txt,sha256=1WxGXTFDb107eV5Zmrt3p12J4LHYk56-ZKlvpOK7vg4,106
20
+ apache_airflow_providers_databricks-6.5.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
21
+ apache_airflow_providers_databricks-6.5.0.dist-info/METADATA,sha256=L7_iBfAW2fu9RRcnKaCHSbc1BPCXh8Mb_m4W8MUxvmQ,6487
22
+ apache_airflow_providers_databricks-6.5.0.dist-info/RECORD,,