apache-airflow-providers-databricks 6.4.0__tar.gz → 6.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-databricks might be problematic. Click here for more details.
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/PKG-INFO +6 -6
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/README.rst +3 -3
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/__init__.py +3 -6
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/get_provider_info.py +2 -1
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/hooks/databricks_base.py +1 -0
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/operators/databricks.py +44 -17
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/triggers/databricks.py +2 -0
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/pyproject.toml +3 -3
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/LICENSE +0 -0
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/hooks/__init__.py +0 -0
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/hooks/databricks.py +0 -0
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/hooks/databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/operators/__init__.py +0 -0
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/operators/databricks_repos.py +0 -0
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/operators/databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/sensors/__init__.py +0 -0
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/sensors/databricks_partition.py +0 -0
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/sensors/databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/triggers/__init__.py +0 -0
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/utils/__init__.py +0 -0
- {apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/airflow/providers/databricks/utils/databricks.py +0 -0
{apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version: 6.
|
|
3
|
+
Version: 6.5.0
|
|
4
4
|
Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,databricks,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -29,8 +29,8 @@ Requires-Dist: requests>=2.27.0,<3
|
|
|
29
29
|
Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
|
|
30
30
|
Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
|
|
31
31
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
32
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
33
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
32
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.5.0/changelog.html
|
|
33
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.5.0
|
|
34
34
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
35
35
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
36
36
|
Project-URL: Twitter, https://twitter.com/ApacheAirflow
|
|
@@ -82,7 +82,7 @@ Provides-Extra: sdk
|
|
|
82
82
|
|
|
83
83
|
Package ``apache-airflow-providers-databricks``
|
|
84
84
|
|
|
85
|
-
Release: ``6.
|
|
85
|
+
Release: ``6.5.0``
|
|
86
86
|
|
|
87
87
|
|
|
88
88
|
`Databricks <https://databricks.com/>`__
|
|
@@ -95,7 +95,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
95
95
|
are in ``airflow.providers.databricks`` python package.
|
|
96
96
|
|
|
97
97
|
You can find package information and changelog for the provider
|
|
98
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
98
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.5.0/>`_.
|
|
99
99
|
|
|
100
100
|
Installation
|
|
101
101
|
------------
|
|
@@ -139,4 +139,4 @@ Dependent package
|
|
|
139
139
|
============================================================================================================ ==============
|
|
140
140
|
|
|
141
141
|
The changelog for the provider package can be found in the
|
|
142
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
142
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.5.0/changelog.html>`_.
|
{apache_airflow_providers_databricks-6.4.0 → apache_airflow_providers_databricks-6.5.0}/README.rst
RENAMED
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
|
|
43
43
|
Package ``apache-airflow-providers-databricks``
|
|
44
44
|
|
|
45
|
-
Release: ``6.
|
|
45
|
+
Release: ``6.5.0``
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
`Databricks <https://databricks.com/>`__
|
|
@@ -55,7 +55,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
55
55
|
are in ``airflow.providers.databricks`` python package.
|
|
56
56
|
|
|
57
57
|
You can find package information and changelog for the provider
|
|
58
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
58
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.5.0/>`_.
|
|
59
59
|
|
|
60
60
|
Installation
|
|
61
61
|
------------
|
|
@@ -99,4 +99,4 @@ Dependent package
|
|
|
99
99
|
============================================================================================================ ==============
|
|
100
100
|
|
|
101
101
|
The changelog for the provider package can be found in the
|
|
102
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
102
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.5.0/changelog.html>`_.
|
|
@@ -25,14 +25,11 @@ from __future__ import annotations
|
|
|
25
25
|
|
|
26
26
|
import packaging.version
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
from airflow import __version__ as airflow_version
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
from airflow import __version__ as airflow_version
|
|
34
|
-
except ImportError:
|
|
35
|
-
from airflow.version import version as airflow_version
|
|
32
|
+
__version__ = "6.5.0"
|
|
36
33
|
|
|
37
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
38
35
|
"2.7.0"
|
|
@@ -28,8 +28,9 @@ def get_provider_info():
|
|
|
28
28
|
"name": "Databricks",
|
|
29
29
|
"description": "`Databricks <https://databricks.com/>`__\n",
|
|
30
30
|
"state": "ready",
|
|
31
|
-
"source-date-epoch":
|
|
31
|
+
"source-date-epoch": 1716287262,
|
|
32
32
|
"versions": [
|
|
33
|
+
"6.5.0",
|
|
33
34
|
"6.4.0",
|
|
34
35
|
"6.3.0",
|
|
35
36
|
"6.2.0",
|
|
@@ -80,6 +80,7 @@ class BaseDatabricksHook(BaseHook):
|
|
|
80
80
|
:param retry_delay: The number of seconds to wait between retries (it
|
|
81
81
|
might be a floating point number).
|
|
82
82
|
:param retry_args: An optional dictionary with arguments passed to ``tenacity.Retrying`` class.
|
|
83
|
+
:param caller: The name of the operator that is calling the hook.
|
|
83
84
|
"""
|
|
84
85
|
|
|
85
86
|
conn_name_attr: str = "databricks_conn_id"
|
|
@@ -67,23 +67,22 @@ def _handle_databricks_operator_execution(operator, hook, log, context) -> None:
|
|
|
67
67
|
log.info("%s completed successfully.", operator.task_id)
|
|
68
68
|
log.info("View run status, Spark UI, and logs at %s", run_page_url)
|
|
69
69
|
return
|
|
70
|
-
|
|
71
70
|
if run_state.result_state == "FAILED":
|
|
72
|
-
|
|
71
|
+
failed_tasks = []
|
|
73
72
|
for task in run_info.get("tasks", []):
|
|
74
73
|
if task.get("state", {}).get("result_state", "") == "FAILED":
|
|
75
74
|
task_run_id = task["run_id"]
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
75
|
+
task_key = task["task_key"]
|
|
76
|
+
run_output = hook.get_run_output(task_run_id)
|
|
77
|
+
if "error" in run_output:
|
|
78
|
+
error = run_output["error"]
|
|
79
|
+
else:
|
|
80
|
+
error = run_state.state_message
|
|
81
|
+
failed_tasks.append({"task_key": task_key, "run_id": task_run_id, "error": error})
|
|
82
|
+
|
|
84
83
|
error_message = (
|
|
85
84
|
f"{operator.task_id} failed with terminal state: {run_state} "
|
|
86
|
-
f"and with the
|
|
85
|
+
f"and with the errors {failed_tasks}"
|
|
87
86
|
)
|
|
88
87
|
else:
|
|
89
88
|
error_message = (
|
|
@@ -168,7 +167,7 @@ def _handle_deferrable_databricks_operator_completion(event: dict, log: Logger)
|
|
|
168
167
|
|
|
169
168
|
error_message = f"Job run failed with terminal state: {run_state} and with the errors {errors}"
|
|
170
169
|
|
|
171
|
-
if event
|
|
170
|
+
if event.get("repair_run"):
|
|
172
171
|
log.warning(
|
|
173
172
|
"%s but since repair run is set, repairing the run with all failed tasks",
|
|
174
173
|
error_message,
|
|
@@ -924,9 +923,11 @@ class DatabricksNotebookOperator(BaseOperator):
|
|
|
924
923
|
:param databricks_retry_args: An optional dictionary with arguments passed to ``tenacity.Retrying`` class.
|
|
925
924
|
:param wait_for_termination: if we should wait for termination of the job run. ``True`` by default.
|
|
926
925
|
:param databricks_conn_id: The name of the Airflow connection to use.
|
|
926
|
+
:param deferrable: Run operator in the deferrable mode.
|
|
927
927
|
"""
|
|
928
928
|
|
|
929
929
|
template_fields = ("notebook_params",)
|
|
930
|
+
CALLER = "DatabricksNotebookOperator"
|
|
930
931
|
|
|
931
932
|
def __init__(
|
|
932
933
|
self,
|
|
@@ -943,6 +944,7 @@ class DatabricksNotebookOperator(BaseOperator):
|
|
|
943
944
|
databricks_retry_args: dict[Any, Any] | None = None,
|
|
944
945
|
wait_for_termination: bool = True,
|
|
945
946
|
databricks_conn_id: str = "databricks_default",
|
|
947
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
|
946
948
|
**kwargs: Any,
|
|
947
949
|
):
|
|
948
950
|
self.notebook_path = notebook_path
|
|
@@ -959,11 +961,12 @@ class DatabricksNotebookOperator(BaseOperator):
|
|
|
959
961
|
self.wait_for_termination = wait_for_termination
|
|
960
962
|
self.databricks_conn_id = databricks_conn_id
|
|
961
963
|
self.databricks_run_id: int | None = None
|
|
964
|
+
self.deferrable = deferrable
|
|
962
965
|
super().__init__(**kwargs)
|
|
963
966
|
|
|
964
967
|
@cached_property
|
|
965
968
|
def _hook(self) -> DatabricksHook:
|
|
966
|
-
return self._get_hook(caller=
|
|
969
|
+
return self._get_hook(caller=self.CALLER)
|
|
967
970
|
|
|
968
971
|
def _get_hook(self, caller: str) -> DatabricksHook:
|
|
969
972
|
return DatabricksHook(
|
|
@@ -971,7 +974,7 @@ class DatabricksNotebookOperator(BaseOperator):
|
|
|
971
974
|
retry_limit=self.databricks_retry_limit,
|
|
972
975
|
retry_delay=self.databricks_retry_delay,
|
|
973
976
|
retry_args=self.databricks_retry_args,
|
|
974
|
-
caller=
|
|
977
|
+
caller=self.CALLER,
|
|
975
978
|
)
|
|
976
979
|
|
|
977
980
|
def _get_task_timeout_seconds(self) -> int:
|
|
@@ -1042,6 +1045,19 @@ class DatabricksNotebookOperator(BaseOperator):
|
|
|
1042
1045
|
run = self._hook.get_run(self.databricks_run_id)
|
|
1043
1046
|
run_state = RunState(**run["state"])
|
|
1044
1047
|
self.log.info("Current state of the job: %s", run_state.life_cycle_state)
|
|
1048
|
+
if self.deferrable and not run_state.is_terminal:
|
|
1049
|
+
self.defer(
|
|
1050
|
+
trigger=DatabricksExecutionTrigger(
|
|
1051
|
+
run_id=self.databricks_run_id,
|
|
1052
|
+
databricks_conn_id=self.databricks_conn_id,
|
|
1053
|
+
polling_period_seconds=self.polling_period_seconds,
|
|
1054
|
+
retry_limit=self.databricks_retry_limit,
|
|
1055
|
+
retry_delay=self.databricks_retry_delay,
|
|
1056
|
+
retry_args=self.databricks_retry_args,
|
|
1057
|
+
caller=self.CALLER,
|
|
1058
|
+
),
|
|
1059
|
+
method_name=DEFER_METHOD_NAME,
|
|
1060
|
+
)
|
|
1045
1061
|
while not run_state.is_terminal:
|
|
1046
1062
|
time.sleep(self.polling_period_seconds)
|
|
1047
1063
|
run = self._hook.get_run(self.databricks_run_id)
|
|
@@ -1057,9 +1073,7 @@ class DatabricksNotebookOperator(BaseOperator):
|
|
|
1057
1073
|
)
|
|
1058
1074
|
if not run_state.is_successful:
|
|
1059
1075
|
raise AirflowException(
|
|
1060
|
-
"Task failed. Final state
|
|
1061
|
-
run_state.result_state,
|
|
1062
|
-
run_state.state_message,
|
|
1076
|
+
f"Task failed. Final state {run_state.result_state}. Reason: {run_state.state_message}"
|
|
1063
1077
|
)
|
|
1064
1078
|
self.log.info("Task succeeded. Final state %s.", run_state.result_state)
|
|
1065
1079
|
|
|
@@ -1067,3 +1081,16 @@ class DatabricksNotebookOperator(BaseOperator):
|
|
|
1067
1081
|
self.launch_notebook_job()
|
|
1068
1082
|
if self.wait_for_termination:
|
|
1069
1083
|
self.monitor_databricks_job()
|
|
1084
|
+
|
|
1085
|
+
def execute_complete(self, context: dict | None, event: dict) -> None:
|
|
1086
|
+
run_state = RunState.from_json(event["run_state"])
|
|
1087
|
+
if run_state.life_cycle_state != "TERMINATED":
|
|
1088
|
+
raise AirflowException(
|
|
1089
|
+
f"Databricks job failed with state {run_state.life_cycle_state}. "
|
|
1090
|
+
f"Message: {run_state.state_message}"
|
|
1091
|
+
)
|
|
1092
|
+
if not run_state.is_successful:
|
|
1093
|
+
raise AirflowException(
|
|
1094
|
+
f"Task failed. Final state {run_state.result_state}. Reason: {run_state.state_message}"
|
|
1095
|
+
)
|
|
1096
|
+
self.log.info("Task succeeded. Final state %s.", run_state.result_state)
|
|
@@ -48,6 +48,7 @@ class DatabricksExecutionTrigger(BaseTrigger):
|
|
|
48
48
|
retry_args: dict[Any, Any] | None = None,
|
|
49
49
|
run_page_url: str | None = None,
|
|
50
50
|
repair_run: bool = False,
|
|
51
|
+
caller: str = "DatabricksExecutionTrigger",
|
|
51
52
|
) -> None:
|
|
52
53
|
super().__init__()
|
|
53
54
|
self.run_id = run_id
|
|
@@ -63,6 +64,7 @@ class DatabricksExecutionTrigger(BaseTrigger):
|
|
|
63
64
|
retry_limit=self.retry_limit,
|
|
64
65
|
retry_delay=self.retry_delay,
|
|
65
66
|
retry_args=retry_args,
|
|
67
|
+
caller=caller,
|
|
66
68
|
)
|
|
67
69
|
|
|
68
70
|
def serialize(self) -> tuple[str, dict[str, Any]]:
|
|
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
|
|
|
28
28
|
|
|
29
29
|
[project]
|
|
30
30
|
name = "apache-airflow-providers-databricks"
|
|
31
|
-
version = "6.
|
|
31
|
+
version = "6.5.0"
|
|
32
32
|
description = "Provider package apache-airflow-providers-databricks for Apache Airflow"
|
|
33
33
|
readme = "README.rst"
|
|
34
34
|
authors = [
|
|
@@ -64,8 +64,8 @@ dependencies = [
|
|
|
64
64
|
]
|
|
65
65
|
|
|
66
66
|
[project.urls]
|
|
67
|
-
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
68
|
-
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
67
|
+
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.5.0"
|
|
68
|
+
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.5.0/changelog.html"
|
|
69
69
|
"Bug Tracker" = "https://github.com/apache/airflow/issues"
|
|
70
70
|
"Source Code" = "https://github.com/apache/airflow"
|
|
71
71
|
"Slack Chat" = "https://s.apache.org/airflow-slack"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|