apache-airflow-providers-databricks 6.0.0rc1__py3-none-any.whl → 6.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-databricks might be problematic. Click here for more details.
- airflow/providers/databricks/__init__.py +1 -1
- airflow/providers/databricks/get_provider_info.py +11 -9
- airflow/providers/databricks/hooks/databricks.py +13 -2
- airflow/providers/databricks/hooks/databricks_sql.py +8 -6
- airflow/providers/databricks/operators/databricks.py +35 -12
- {apache_airflow_providers_databricks-6.0.0rc1.dist-info → apache_airflow_providers_databricks-6.1.0.dist-info}/METADATA +8 -8
- {apache_airflow_providers_databricks-6.0.0rc1.dist-info → apache_airflow_providers_databricks-6.1.0.dist-info}/RECORD +9 -9
- {apache_airflow_providers_databricks-6.0.0rc1.dist-info → apache_airflow_providers_databricks-6.1.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_databricks-6.0.0rc1.dist-info → apache_airflow_providers_databricks-6.1.0.dist-info}/entry_points.txt +0 -0
|
@@ -27,9 +27,10 @@ def get_provider_info():
|
|
|
27
27
|
"package-name": "apache-airflow-providers-databricks",
|
|
28
28
|
"name": "Databricks",
|
|
29
29
|
"description": "`Databricks <https://databricks.com/>`__\n",
|
|
30
|
-
"
|
|
31
|
-
"source-date-epoch":
|
|
30
|
+
"state": "ready",
|
|
31
|
+
"source-date-epoch": 1705912006,
|
|
32
32
|
"versions": [
|
|
33
|
+
"6.1.0",
|
|
33
34
|
"6.0.0",
|
|
34
35
|
"5.0.1",
|
|
35
36
|
"5.0.0",
|
|
@@ -70,6 +71,14 @@ def get_provider_info():
|
|
|
70
71
|
"databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0",
|
|
71
72
|
"aiohttp>=3.6.3, <4",
|
|
72
73
|
],
|
|
74
|
+
"additional-extras": [
|
|
75
|
+
{
|
|
76
|
+
"name": "sdk",
|
|
77
|
+
"description": "Install Databricks SDK",
|
|
78
|
+
"dependencies": ["databricks-sdk==0.10.0"],
|
|
79
|
+
}
|
|
80
|
+
],
|
|
81
|
+
"devel-dependencies": ["deltalake>=0.12.0"],
|
|
73
82
|
"integrations": [
|
|
74
83
|
{
|
|
75
84
|
"integration-name": "Databricks",
|
|
@@ -153,11 +162,4 @@ def get_provider_info():
|
|
|
153
162
|
}
|
|
154
163
|
],
|
|
155
164
|
"extra-links": ["airflow.providers.databricks.operators.databricks.DatabricksJobRunLink"],
|
|
156
|
-
"additional-extras": [
|
|
157
|
-
{
|
|
158
|
-
"name": "sdk",
|
|
159
|
-
"description": "Install Databricks SDK",
|
|
160
|
-
"dependencies": ["databricks-sdk==0.10.0"],
|
|
161
|
-
}
|
|
162
|
-
],
|
|
163
165
|
}
|
|
@@ -519,13 +519,24 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
519
519
|
json = {"run_id": run_id}
|
|
520
520
|
self._do_api_call(DELETE_RUN_ENDPOINT, json)
|
|
521
521
|
|
|
522
|
-
def repair_run(self, json: dict) ->
|
|
522
|
+
def repair_run(self, json: dict) -> int:
|
|
523
523
|
"""
|
|
524
524
|
Re-run one or more tasks.
|
|
525
525
|
|
|
526
526
|
:param json: repair a job run.
|
|
527
527
|
"""
|
|
528
|
-
self._do_api_call(REPAIR_RUN_ENDPOINT, json)
|
|
528
|
+
response = self._do_api_call(REPAIR_RUN_ENDPOINT, json)
|
|
529
|
+
return response["repair_id"]
|
|
530
|
+
|
|
531
|
+
def get_latest_repair_id(self, run_id: int) -> int | None:
|
|
532
|
+
"""Get latest repair id if any exist for run_id else None."""
|
|
533
|
+
json = {"run_id": run_id, "include_history": True}
|
|
534
|
+
response = self._do_api_call(GET_RUN_ENDPOINT, json)
|
|
535
|
+
repair_history = response["repair_history"]
|
|
536
|
+
if len(repair_history) == 1:
|
|
537
|
+
return None
|
|
538
|
+
else:
|
|
539
|
+
return repair_history[-1]["id"]
|
|
529
540
|
|
|
530
541
|
def get_cluster_state(self, cluster_id: str) -> ClusterState:
|
|
531
542
|
"""
|
|
@@ -174,7 +174,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
174
174
|
)
|
|
175
175
|
return self._sql_conn
|
|
176
176
|
|
|
177
|
-
@overload
|
|
177
|
+
@overload # type: ignore[override]
|
|
178
178
|
def run(
|
|
179
179
|
self,
|
|
180
180
|
sql: str | Iterable[str],
|
|
@@ -249,7 +249,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
249
249
|
self.set_autocommit(conn, autocommit)
|
|
250
250
|
|
|
251
251
|
with closing(conn.cursor()) as cur:
|
|
252
|
-
self._run_command(cur, sql_statement, parameters)
|
|
252
|
+
self._run_command(cur, sql_statement, parameters) # type: ignore[attr-defined]
|
|
253
253
|
if handler is not None:
|
|
254
254
|
raw_result = handler(cur)
|
|
255
255
|
if self.return_tuple:
|
|
@@ -280,13 +280,15 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
280
280
|
# instantiated namedtuple, and will never do: https://github.com/python/mypy/issues/848
|
|
281
281
|
if isinstance(result, list):
|
|
282
282
|
rows: list[Row] = result
|
|
283
|
-
|
|
284
|
-
|
|
283
|
+
if not rows:
|
|
284
|
+
return []
|
|
285
|
+
rows_fields = tuple(rows[0].__fields__)
|
|
286
|
+
rows_object = namedtuple("Row", rows_fields, rename=True) # type: ignore
|
|
285
287
|
return cast(List[tuple], [rows_object(*row) for row in rows])
|
|
286
288
|
else:
|
|
287
289
|
row: Row = result
|
|
288
|
-
row_fields = row.__fields__
|
|
289
|
-
row_object = namedtuple("Row", row_fields) # type: ignore
|
|
290
|
+
row_fields = tuple(row.__fields__)
|
|
291
|
+
row_object = namedtuple("Row", row_fields, rename=True) # type: ignore
|
|
290
292
|
return cast(tuple, row_object(*row))
|
|
291
293
|
|
|
292
294
|
def bulk_dump(self, table, tmp_file):
|
|
@@ -88,6 +88,19 @@ def _handle_databricks_operator_execution(operator, hook, log, context) -> None:
|
|
|
88
88
|
f"{operator.task_id} failed with terminal state: {run_state} "
|
|
89
89
|
f"and with the error {run_state.state_message}"
|
|
90
90
|
)
|
|
91
|
+
if isinstance(operator, DatabricksRunNowOperator) and operator.repair_run:
|
|
92
|
+
operator.repair_run = False
|
|
93
|
+
log.warning(
|
|
94
|
+
"%s but since repair run is set, repairing the run with all failed tasks",
|
|
95
|
+
error_message,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
latest_repair_id = hook.get_latest_repair_id(operator.run_id)
|
|
99
|
+
repair_json = {"run_id": operator.run_id, "rerun_all_failed_tasks": True}
|
|
100
|
+
if latest_repair_id is not None:
|
|
101
|
+
repair_json["latest_repair_id"] = latest_repair_id
|
|
102
|
+
operator.json["latest_repair_id"] = hook.repair_run(operator, repair_json)
|
|
103
|
+
_handle_databricks_operator_execution(operator, hook, log, context)
|
|
91
104
|
raise AirflowException(error_message)
|
|
92
105
|
|
|
93
106
|
else:
|
|
@@ -119,18 +132,24 @@ def _handle_deferrable_databricks_operator_execution(operator, hook, log, contex
|
|
|
119
132
|
log.info("View run status, Spark UI, and logs at %s", run_page_url)
|
|
120
133
|
|
|
121
134
|
if operator.wait_for_termination:
|
|
122
|
-
operator.
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
135
|
+
run_info = hook.get_run(operator.run_id)
|
|
136
|
+
run_state = RunState(**run_info["state"])
|
|
137
|
+
if not run_state.is_terminal:
|
|
138
|
+
operator.defer(
|
|
139
|
+
trigger=DatabricksExecutionTrigger(
|
|
140
|
+
run_id=operator.run_id,
|
|
141
|
+
databricks_conn_id=operator.databricks_conn_id,
|
|
142
|
+
polling_period_seconds=operator.polling_period_seconds,
|
|
143
|
+
retry_limit=operator.databricks_retry_limit,
|
|
144
|
+
retry_delay=operator.databricks_retry_delay,
|
|
145
|
+
retry_args=operator.databricks_retry_args,
|
|
146
|
+
run_page_url=run_page_url,
|
|
147
|
+
),
|
|
148
|
+
method_name=DEFER_METHOD_NAME,
|
|
149
|
+
)
|
|
150
|
+
else:
|
|
151
|
+
if run_state.is_successful:
|
|
152
|
+
log.info("%s completed successfully.", operator.task_id)
|
|
134
153
|
|
|
135
154
|
|
|
136
155
|
def _handle_deferrable_databricks_operator_completion(event: dict, log: Logger) -> None:
|
|
@@ -623,6 +642,7 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
623
642
|
- ``jar_params``
|
|
624
643
|
- ``spark_submit_params``
|
|
625
644
|
- ``idempotency_token``
|
|
645
|
+
- ``repair_run``
|
|
626
646
|
|
|
627
647
|
:param job_id: the job_id of the existing Databricks job.
|
|
628
648
|
This field will be templated.
|
|
@@ -711,6 +731,7 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
711
731
|
:param do_xcom_push: Whether we should push run_id and run_page_url to xcom.
|
|
712
732
|
:param wait_for_termination: if we should wait for termination of the job run. ``True`` by default.
|
|
713
733
|
:param deferrable: Run operator in the deferrable mode.
|
|
734
|
+
:param repair_run: Repair the databricks run in case of failure, doesn't work in deferrable mode
|
|
714
735
|
"""
|
|
715
736
|
|
|
716
737
|
# Used in airflow.models.BaseOperator
|
|
@@ -741,6 +762,7 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
741
762
|
do_xcom_push: bool = True,
|
|
742
763
|
wait_for_termination: bool = True,
|
|
743
764
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
|
765
|
+
repair_run: bool = False,
|
|
744
766
|
**kwargs,
|
|
745
767
|
) -> None:
|
|
746
768
|
"""Create a new ``DatabricksRunNowOperator``."""
|
|
@@ -753,6 +775,7 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
753
775
|
self.databricks_retry_args = databricks_retry_args
|
|
754
776
|
self.wait_for_termination = wait_for_termination
|
|
755
777
|
self.deferrable = deferrable
|
|
778
|
+
self.repair_run = repair_run
|
|
756
779
|
|
|
757
780
|
if job_id is not None:
|
|
758
781
|
self.json["job_id"] = job_id
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version: 6.0
|
|
3
|
+
Version: 6.1.0
|
|
4
4
|
Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,databricks,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -21,15 +21,15 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.11
|
|
22
22
|
Classifier: Topic :: System :: Monitoring
|
|
23
23
|
Requires-Dist: aiohttp>=3.6.3, <4
|
|
24
|
-
Requires-Dist: apache-airflow-providers-common-sql>=1.10.0
|
|
25
|
-
Requires-Dist: apache-airflow>=2.6.0
|
|
24
|
+
Requires-Dist: apache-airflow-providers-common-sql>=1.10.0
|
|
25
|
+
Requires-Dist: apache-airflow>=2.6.0
|
|
26
26
|
Requires-Dist: databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0
|
|
27
27
|
Requires-Dist: requests>=2.27,<3
|
|
28
28
|
Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
|
|
29
29
|
Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
|
|
30
30
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
31
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
32
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
31
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.1.0/changelog.html
|
|
32
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.1.0
|
|
33
33
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
34
34
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
35
35
|
Project-URL: Twitter, https://twitter.com/ApacheAirflow
|
|
@@ -81,7 +81,7 @@ Provides-Extra: sdk
|
|
|
81
81
|
|
|
82
82
|
Package ``apache-airflow-providers-databricks``
|
|
83
83
|
|
|
84
|
-
Release: ``6.
|
|
84
|
+
Release: ``6.1.0``
|
|
85
85
|
|
|
86
86
|
|
|
87
87
|
`Databricks <https://databricks.com/>`__
|
|
@@ -94,7 +94,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
94
94
|
are in ``airflow.providers.databricks`` python package.
|
|
95
95
|
|
|
96
96
|
You can find package information and changelog for the provider
|
|
97
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
97
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.1.0/>`_.
|
|
98
98
|
|
|
99
99
|
Installation
|
|
100
100
|
------------
|
|
@@ -138,4 +138,4 @@ Dependent package
|
|
|
138
138
|
============================================================================================================ ==============
|
|
139
139
|
|
|
140
140
|
The changelog for the provider package can be found in the
|
|
141
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.
|
|
141
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.1.0/changelog.html>`_.
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
airflow/providers/databricks/LICENSE,sha256=ywUBpKZc7Jb96rVt5I3IDbg7dIJAbUSHkuoDcF3jbH4,13569
|
|
2
|
-
airflow/providers/databricks/__init__.py,sha256=
|
|
3
|
-
airflow/providers/databricks/get_provider_info.py,sha256=
|
|
2
|
+
airflow/providers/databricks/__init__.py,sha256=9F0qMBigUZVuMKQSrzhcdG9mjNAwMVYFZJ9Mu6fvUsQ,1585
|
|
3
|
+
airflow/providers/databricks/get_provider_info.py,sha256=aAWTy02B1anUO7_gY3v3rtjH38L8ZX9MUnA9C-xFvYg,6177
|
|
4
4
|
airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
|
5
|
-
airflow/providers/databricks/hooks/databricks.py,sha256=
|
|
5
|
+
airflow/providers/databricks/hooks/databricks.py,sha256=VRxltNFCRrZ9-U7yszhv1aCX48vdQcsnR-IHe_ACFa8,23076
|
|
6
6
|
airflow/providers/databricks/hooks/databricks_base.py,sha256=nX-40P0XKZyFyahkbbImvWmoCQ4ic35viAYlFIupUHE,30591
|
|
7
|
-
airflow/providers/databricks/hooks/databricks_sql.py,sha256=
|
|
7
|
+
airflow/providers/databricks/hooks/databricks_sql.py,sha256=y55dj2Z1T-5BaMrCh60QT7IW1C_-JiXJBZRU_FJZpvY,12580
|
|
8
8
|
airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
|
9
|
-
airflow/providers/databricks/operators/databricks.py,sha256=
|
|
9
|
+
airflow/providers/databricks/operators/databricks.py,sha256=g89WZV9VZaLTGEDSdK9gt3GAZfFTrziGa5O1o6o-qkw,41389
|
|
10
10
|
airflow/providers/databricks/operators/databricks_repos.py,sha256=NUxa0jvvmK16CDKb-7Tbs3wF9XoFi1AVJlKxlsE3r4k,13092
|
|
11
11
|
airflow/providers/databricks/operators/databricks_sql.py,sha256=C_XqR5SN_g2t0XXlH248nPAC7BcYB0P1NAsPJT5qcEg,16806
|
|
12
12
|
airflow/providers/databricks/sensors/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
@@ -16,7 +16,7 @@ airflow/providers/databricks/triggers/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvR
|
|
|
16
16
|
airflow/providers/databricks/triggers/databricks.py,sha256=Qj9mB0bNYRY_toPEU17gxbxmPkkT3P789kCHu_T64BA,3997
|
|
17
17
|
airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
18
18
|
airflow/providers/databricks/utils/databricks.py,sha256=iRzRHvdFETGiFxZccOjfC8NGgDofMfP35Tqp3M5CGr0,2880
|
|
19
|
-
apache_airflow_providers_databricks-6.0.
|
|
20
|
-
apache_airflow_providers_databricks-6.0.
|
|
21
|
-
apache_airflow_providers_databricks-6.0.
|
|
22
|
-
apache_airflow_providers_databricks-6.0.
|
|
19
|
+
apache_airflow_providers_databricks-6.1.0.dist-info/entry_points.txt,sha256=1WxGXTFDb107eV5Zmrt3p12J4LHYk56-ZKlvpOK7vg4,106
|
|
20
|
+
apache_airflow_providers_databricks-6.1.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
|
21
|
+
apache_airflow_providers_databricks-6.1.0.dist-info/METADATA,sha256=nPsc_67tyRVrEcjehAw3sv98WGWcasE7J01nMipjO8U,6427
|
|
22
|
+
apache_airflow_providers_databricks-6.1.0.dist-info/RECORD,,
|
|
File without changes
|