apache-airflow-providers-databricks 6.2.0__tar.gz → 6.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-databricks might be problematic. Click here for more details.

Files changed (21) hide show
  1. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/PKG-INFO +8 -7
  2. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/README.rst +4 -4
  3. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/__init__.py +1 -1
  4. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/get_provider_info.py +2 -1
  5. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/hooks/databricks.py +14 -5
  6. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/hooks/databricks_base.py +1 -0
  7. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/hooks/databricks_sql.py +2 -4
  8. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/operators/databricks.py +84 -57
  9. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/operators/databricks_repos.py +1 -0
  10. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/operators/databricks_sql.py +11 -10
  11. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/triggers/databricks.py +12 -8
  12. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/pyproject.toml +4 -3
  13. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/LICENSE +0 -0
  14. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/hooks/__init__.py +0 -0
  15. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/operators/__init__.py +0 -0
  16. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/sensors/__init__.py +0 -0
  17. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/sensors/databricks_partition.py +0 -0
  18. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/sensors/databricks_sql.py +0 -0
  19. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/triggers/__init__.py +0 -0
  20. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/utils/__init__.py +0 -0
  21. {apache_airflow_providers_databricks-6.2.0 → apache_airflow_providers_databricks-6.3.0}/airflow/providers/databricks/utils/databricks.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-databricks
3
- Version: 6.2.0
3
+ Version: 6.3.0
4
4
  Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
5
5
  Keywords: airflow-provider,databricks,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -19,6 +19,7 @@ Classifier: Programming Language :: Python :: 3.8
19
19
  Classifier: Programming Language :: Python :: 3.9
20
20
  Classifier: Programming Language :: Python :: 3.10
21
21
  Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
22
23
  Classifier: Topic :: System :: Monitoring
23
24
  Requires-Dist: aiohttp>=3.9.2, <4
24
25
  Requires-Dist: apache-airflow-providers-common-sql>=1.10.0
@@ -28,8 +29,8 @@ Requires-Dist: requests>=2.27.0,<3
28
29
  Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
29
30
  Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
30
31
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
31
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.2.0/changelog.html
32
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.2.0
32
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.3.0/changelog.html
33
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.3.0
33
34
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
34
35
  Project-URL: Source Code, https://github.com/apache/airflow
35
36
  Project-URL: Twitter, https://twitter.com/ApacheAirflow
@@ -81,7 +82,7 @@ Provides-Extra: sdk
81
82
 
82
83
  Package ``apache-airflow-providers-databricks``
83
84
 
84
- Release: ``6.2.0``
85
+ Release: ``6.3.0``
85
86
 
86
87
 
87
88
  `Databricks <https://databricks.com/>`__
@@ -94,7 +95,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
94
95
  are in ``airflow.providers.databricks`` python package.
95
96
 
96
97
  You can find package information and changelog for the provider
97
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.2.0/>`_.
98
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.3.0/>`_.
98
99
 
99
100
  Installation
100
101
  ------------
@@ -103,7 +104,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
103
104
  for the minimum Airflow version supported) via
104
105
  ``pip install apache-airflow-providers-databricks``
105
106
 
106
- The package supports the following python versions: 3.8,3.9,3.10,3.11
107
+ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
107
108
 
108
109
  Requirements
109
110
  ------------
@@ -138,4 +139,4 @@ Dependent package
138
139
  ============================================================================================================ ==============
139
140
 
140
141
  The changelog for the provider package can be found in the
141
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.2.0/changelog.html>`_.
142
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.3.0/changelog.html>`_.
@@ -42,7 +42,7 @@
42
42
 
43
43
  Package ``apache-airflow-providers-databricks``
44
44
 
45
- Release: ``6.2.0``
45
+ Release: ``6.3.0``
46
46
 
47
47
 
48
48
  `Databricks <https://databricks.com/>`__
@@ -55,7 +55,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
55
55
  are in ``airflow.providers.databricks`` python package.
56
56
 
57
57
  You can find package information and changelog for the provider
58
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.2.0/>`_.
58
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.3.0/>`_.
59
59
 
60
60
  Installation
61
61
  ------------
@@ -64,7 +64,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
64
64
  for the minimum Airflow version supported) via
65
65
  ``pip install apache-airflow-providers-databricks``
66
66
 
67
- The package supports the following python versions: 3.8,3.9,3.10,3.11
67
+ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
68
68
 
69
69
  Requirements
70
70
  ------------
@@ -99,4 +99,4 @@ Dependent package
99
99
  ============================================================================================================ ==============
100
100
 
101
101
  The changelog for the provider package can be found in the
102
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.2.0/changelog.html>`_.
102
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.3.0/changelog.html>`_.
@@ -27,7 +27,7 @@ import packaging.version
27
27
 
28
28
  __all__ = ["__version__"]
29
29
 
30
- __version__ = "6.2.0"
30
+ __version__ = "6.3.0"
31
31
 
32
32
  try:
33
33
  from airflow import __version__ as airflow_version
@@ -28,8 +28,9 @@ def get_provider_info():
28
28
  "name": "Databricks",
29
29
  "description": "`Databricks <https://databricks.com/>`__\n",
30
30
  "state": "ready",
31
- "source-date-epoch": 1707636345,
31
+ "source-date-epoch": 1712665557,
32
32
  "versions": [
33
+ "6.3.0",
33
34
  "6.2.0",
34
35
  "6.1.0",
35
36
  "6.0.0",
@@ -25,6 +25,7 @@ operators talk to the
25
25
  or the ``api/2.1/jobs/runs/submit``
26
26
  `endpoint <https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunsSubmit>`_.
27
27
  """
28
+
28
29
  from __future__ import annotations
29
30
 
30
31
  import json
@@ -196,8 +197,7 @@ class DatabricksHook(BaseDatabricksHook):
196
197
  super().__init__(databricks_conn_id, timeout_seconds, retry_limit, retry_delay, retry_args, caller)
197
198
 
198
199
  def create_job(self, json: dict) -> int:
199
- """
200
- Utility function to call the ``api/2.1/jobs/create`` endpoint.
200
+ """Call the ``api/2.1/jobs/create`` endpoint.
201
201
 
202
202
  :param json: The data used in the body of the request to the ``create`` endpoint.
203
203
  :return: the job_id as an int
@@ -206,8 +206,7 @@ class DatabricksHook(BaseDatabricksHook):
206
206
  return response["job_id"]
207
207
 
208
208
  def reset_job(self, job_id: str, json: dict) -> None:
209
- """
210
- Utility function to call the ``api/2.1/jobs/reset`` endpoint.
209
+ """Call the ``api/2.1/jobs/reset`` endpoint.
211
210
 
212
211
  :param json: The data used in the new_settings of the request to the ``reset`` endpoint.
213
212
  """
@@ -530,7 +529,7 @@ class DatabricksHook(BaseDatabricksHook):
530
529
 
531
530
  def get_latest_repair_id(self, run_id: int) -> int | None:
532
531
  """Get latest repair id if any exist for run_id else None."""
533
- json = {"run_id": run_id, "include_history": True}
532
+ json = {"run_id": run_id, "include_history": "true"}
534
533
  response = self._do_api_call(GET_RUN_ENDPOINT, json)
535
534
  repair_history = response["repair_history"]
536
535
  if len(repair_history) == 1:
@@ -656,6 +655,16 @@ class DatabricksHook(BaseDatabricksHook):
656
655
 
657
656
  return None
658
657
 
658
+ def update_job_permission(self, job_id: int, json: dict[str, Any]) -> dict:
659
+ """
660
+ Update databricks job permission.
661
+
662
+ :param job_id: job id
663
+ :param json: payload
664
+ :return: json containing permission specification
665
+ """
666
+ return self._do_api_call(("PATCH", f"api/2.0/permissions/jobs/{job_id}"), json)
667
+
659
668
  def test_connection(self) -> tuple[bool, str]:
660
669
  """Test the Databricks connectivity from UI."""
661
670
  hook = DatabricksHook(databricks_conn_id=self.databricks_conn_id)
@@ -22,6 +22,7 @@ This hook enable the submitting and running of jobs to the Databricks platform.
22
22
  operators talk to the ``api/2.0/jobs/runs/submit``
23
23
  `endpoint <https://docs.databricks.com/api/latest/jobs.html#runs-submit>`_.
24
24
  """
25
+
25
26
  from __future__ import annotations
26
27
 
27
28
  import copy
@@ -183,8 +183,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
183
183
  handler: None = ...,
184
184
  split_statements: bool = ...,
185
185
  return_last: bool = ...,
186
- ) -> None:
187
- ...
186
+ ) -> None: ...
188
187
 
189
188
  @overload
190
189
  def run(
@@ -195,8 +194,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
195
194
  handler: Callable[[Any], T] = ...,
196
195
  split_statements: bool = ...,
197
196
  return_last: bool = ...,
198
- ) -> tuple | list[tuple] | list[list[tuple] | tuple] | None:
199
- ...
197
+ ) -> tuple | list[tuple] | list[list[tuple] | tuple] | None: ...
200
198
 
201
199
  def run(
202
200
  self,
@@ -16,6 +16,7 @@
16
16
  # specific language governing permissions and limitations
17
17
  # under the License.
18
18
  """This module contains Databricks operators."""
19
+
19
20
  from __future__ import annotations
20
21
 
21
22
  import time
@@ -51,6 +52,7 @@ def _handle_databricks_operator_execution(operator, hook, log, context) -> None:
51
52
  """
52
53
  if operator.do_xcom_push and context is not None:
53
54
  context["ti"].xcom_push(key=XCOM_RUN_ID_KEY, value=operator.run_id)
55
+
54
56
  log.info("Run submitted with run_id: %s", operator.run_id)
55
57
  run_page_url = hook.get_run_page_url(operator.run_id)
56
58
  if operator.do_xcom_push and context is not None:
@@ -65,52 +67,52 @@ def _handle_databricks_operator_execution(operator, hook, log, context) -> None:
65
67
  log.info("%s completed successfully.", operator.task_id)
66
68
  log.info("View run status, Spark UI, and logs at %s", run_page_url)
67
69
  return
68
- else:
69
- if run_state.result_state == "FAILED":
70
- task_run_id = None
71
- if "tasks" in run_info:
72
- for task in run_info["tasks"]:
73
- if task.get("state", {}).get("result_state", "") == "FAILED":
74
- task_run_id = task["run_id"]
75
- if task_run_id is not None:
76
- run_output = hook.get_run_output(task_run_id)
77
- if "error" in run_output:
78
- notebook_error = run_output["error"]
79
- else:
80
- notebook_error = run_state.state_message
70
+
71
+ if run_state.result_state == "FAILED":
72
+ task_run_id = None
73
+ if "tasks" in run_info:
74
+ for task in run_info["tasks"]:
75
+ if task.get("state", {}).get("result_state", "") == "FAILED":
76
+ task_run_id = task["run_id"]
77
+ if task_run_id is not None:
78
+ run_output = hook.get_run_output(task_run_id)
79
+ if "error" in run_output:
80
+ notebook_error = run_output["error"]
81
81
  else:
82
82
  notebook_error = run_state.state_message
83
- error_message = (
84
- f"{operator.task_id} failed with terminal state: {run_state} "
85
- f"and with the error {notebook_error}"
86
- )
87
83
  else:
88
- error_message = (
89
- f"{operator.task_id} failed with terminal state: {run_state} "
90
- f"and with the error {run_state.state_message}"
91
- )
92
- if isinstance(operator, DatabricksRunNowOperator) and operator.repair_run:
93
- operator.repair_run = False
94
- log.warning(
95
- "%s but since repair run is set, repairing the run with all failed tasks",
96
- error_message,
97
- )
98
-
99
- latest_repair_id = hook.get_latest_repair_id(operator.run_id)
100
- repair_json = {"run_id": operator.run_id, "rerun_all_failed_tasks": True}
101
- if latest_repair_id is not None:
102
- repair_json["latest_repair_id"] = latest_repair_id
103
- operator.json["latest_repair_id"] = hook.repair_run(operator, repair_json)
104
- _handle_databricks_operator_execution(operator, hook, log, context)
105
- raise AirflowException(error_message)
106
-
107
- else:
108
- log.info("%s in run state: %s", operator.task_id, run_state)
109
- log.info("View run status, Spark UI, and logs at %s", run_page_url)
110
- log.info("Sleeping for %s seconds.", operator.polling_period_seconds)
111
- time.sleep(operator.polling_period_seconds)
112
- else:
113
- log.info("View run status, Spark UI, and logs at %s", run_page_url)
84
+ notebook_error = run_state.state_message
85
+ error_message = (
86
+ f"{operator.task_id} failed with terminal state: {run_state} "
87
+ f"and with the error {notebook_error}"
88
+ )
89
+ else:
90
+ error_message = (
91
+ f"{operator.task_id} failed with terminal state: {run_state} "
92
+ f"and with the error {run_state.state_message}"
93
+ )
94
+
95
+ if isinstance(operator, DatabricksRunNowOperator) and operator.repair_run:
96
+ operator.repair_run = False
97
+ log.warning(
98
+ "%s but since repair run is set, repairing the run with all failed tasks",
99
+ error_message,
100
+ )
101
+
102
+ latest_repair_id = hook.get_latest_repair_id(operator.run_id)
103
+ repair_json = {"run_id": operator.run_id, "rerun_all_failed_tasks": True}
104
+ if latest_repair_id is not None:
105
+ repair_json["latest_repair_id"] = latest_repair_id
106
+ operator.json["latest_repair_id"] = hook.repair_run(operator, repair_json)
107
+ _handle_databricks_operator_execution(operator, hook, log, context)
108
+ raise AirflowException(error_message)
109
+
110
+ log.info("%s in run state: %s", operator.task_id, run_state)
111
+ log.info("View run status, Spark UI, and logs at %s", run_page_url)
112
+ log.info("Sleeping for %s seconds.", operator.polling_period_seconds)
113
+ time.sleep(operator.polling_period_seconds)
114
+
115
+ log.info("View run status, Spark UI, and logs at %s", run_page_url)
114
116
 
115
117
 
116
118
  def _handle_deferrable_databricks_operator_execution(operator, hook, log, context) -> None:
@@ -145,6 +147,7 @@ def _handle_deferrable_databricks_operator_execution(operator, hook, log, contex
145
147
  retry_delay=operator.databricks_retry_delay,
146
148
  retry_args=operator.databricks_retry_args,
147
149
  run_page_url=run_page_url,
150
+ repair_run=getattr(operator, "repair_run", False),
148
151
  ),
149
152
  method_name=DEFER_METHOD_NAME,
150
153
  )
@@ -162,9 +165,15 @@ def _handle_deferrable_databricks_operator_completion(event: dict, log: Logger)
162
165
  if run_state.is_successful:
163
166
  log.info("Job run completed successfully.")
164
167
  return
165
- else:
166
- error_message = f"Job run failed with terminal state: {run_state}"
167
- raise AirflowException(error_message)
168
+
169
+ error_message = f"Job run failed with terminal state: {run_state}"
170
+ if event["repair_run"]:
171
+ log.warning(
172
+ "%s but since repair run is set, repairing the run with all failed tasks",
173
+ error_message,
174
+ )
175
+ return
176
+ raise AirflowException(error_message)
168
177
 
169
178
 
170
179
  class DatabricksJobRunLink(BaseOperatorLink):
@@ -257,7 +266,7 @@ class DatabricksCreateJobsOperator(BaseOperator):
257
266
  databricks_retry_args: dict[Any, Any] | None = None,
258
267
  **kwargs,
259
268
  ) -> None:
260
- """Creates a new ``DatabricksCreateJobsOperator``."""
269
+ """Create a new ``DatabricksCreateJobsOperator``."""
261
270
  super().__init__(**kwargs)
262
271
  self.json = json or {}
263
272
  self.databricks_conn_id = databricks_conn_id
@@ -287,8 +296,8 @@ class DatabricksCreateJobsOperator(BaseOperator):
287
296
  self.json["git_source"] = git_source
288
297
  if access_control_list is not None:
289
298
  self.json["access_control_list"] = access_control_list
290
-
291
- self.json = normalise_json_content(self.json)
299
+ if self.json:
300
+ self.json = normalise_json_content(self.json)
292
301
 
293
302
  @cached_property
294
303
  def _hook(self):
@@ -307,6 +316,10 @@ class DatabricksCreateJobsOperator(BaseOperator):
307
316
  if job_id is None:
308
317
  return self._hook.create_job(self.json)
309
318
  self._hook.reset_job(str(job_id), self.json)
319
+ if (access_control_list := self.json.get("access_control_list")) is not None:
320
+ acl_json = {"access_control_list": access_control_list}
321
+ self._hook.update_job_permission(job_id, normalise_json_content(acl_json))
322
+
310
323
  return job_id
311
324
 
312
325
 
@@ -583,9 +596,6 @@ class DatabricksSubmitRunDeferrableOperator(DatabricksSubmitRunOperator):
583
596
  self.run_id = hook.submit_run(json_normalised)
584
597
  _handle_deferrable_databricks_operator_execution(self, hook, self.log, context)
585
598
 
586
- def execute_complete(self, context: dict | None, event: dict):
587
- _handle_deferrable_databricks_operator_completion(event, self.log)
588
-
589
599
 
590
600
  class DatabricksRunNowOperator(BaseOperator):
591
601
  """
@@ -645,6 +655,7 @@ class DatabricksRunNowOperator(BaseOperator):
645
655
  - ``spark_submit_params``
646
656
  - ``idempotency_token``
647
657
  - ``repair_run``
658
+ - ``cancel_previous_runs``
648
659
 
649
660
  :param job_id: the job_id of the existing Databricks job.
650
661
  This field will be templated.
@@ -733,7 +744,8 @@ class DatabricksRunNowOperator(BaseOperator):
733
744
  :param do_xcom_push: Whether we should push run_id and run_page_url to xcom.
734
745
  :param wait_for_termination: if we should wait for termination of the job run. ``True`` by default.
735
746
  :param deferrable: Run operator in the deferrable mode.
736
- :param repair_run: Repair the databricks run in case of failure, doesn't work in deferrable mode
747
+ :param repair_run: Repair the databricks run in case of failure.
748
+ :param cancel_previous_runs: Cancel all existing running jobs before submitting new one.
737
749
  """
738
750
 
739
751
  # Used in airflow.models.BaseOperator
@@ -765,6 +777,7 @@ class DatabricksRunNowOperator(BaseOperator):
765
777
  wait_for_termination: bool = True,
766
778
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
767
779
  repair_run: bool = False,
780
+ cancel_previous_runs: bool = False,
768
781
  **kwargs,
769
782
  ) -> None:
770
783
  """Create a new ``DatabricksRunNowOperator``."""
@@ -778,6 +791,7 @@ class DatabricksRunNowOperator(BaseOperator):
778
791
  self.wait_for_termination = wait_for_termination
779
792
  self.deferrable = deferrable
780
793
  self.repair_run = repair_run
794
+ self.cancel_previous_runs = cancel_previous_runs
781
795
 
782
796
  if job_id is not None:
783
797
  self.json["job_id"] = job_id
@@ -797,8 +811,8 @@ class DatabricksRunNowOperator(BaseOperator):
797
811
  self.json["spark_submit_params"] = spark_submit_params
798
812
  if idempotency_token is not None:
799
813
  self.json["idempotency_token"] = idempotency_token
800
-
801
- self.json = normalise_json_content(self.json)
814
+ if self.json:
815
+ self.json = normalise_json_content(self.json)
802
816
  # This variable will be used in case our task gets killed.
803
817
  self.run_id: int | None = None
804
818
  self.do_xcom_push = do_xcom_push
@@ -824,6 +838,10 @@ class DatabricksRunNowOperator(BaseOperator):
824
838
  raise AirflowException(f"Job ID for job name {self.json['job_name']} can not be found")
825
839
  self.json["job_id"] = job_id
826
840
  del self.json["job_name"]
841
+
842
+ if self.cancel_previous_runs and self.json["job_id"] is not None:
843
+ hook.cancel_all_runs(self.json["job_id"])
844
+
827
845
  self.run_id = hook.run_now(self.json)
828
846
  if self.deferrable:
829
847
  _handle_deferrable_databricks_operator_execution(self, hook, self.log, context)
@@ -833,8 +851,17 @@ class DatabricksRunNowOperator(BaseOperator):
833
851
  def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
834
852
  if event:
835
853
  _handle_deferrable_databricks_operator_completion(event, self.log)
836
-
837
- def on_kill(self):
854
+ if event["repair_run"]:
855
+ self.repair_run = False
856
+ self.run_id = event["run_id"]
857
+ latest_repair_id = self._hook.get_latest_repair_id(self.run_id)
858
+ repair_json = {"run_id": self.run_id, "rerun_all_failed_tasks": True}
859
+ if latest_repair_id is not None:
860
+ repair_json["latest_repair_id"] = latest_repair_id
861
+ self.json["latest_repair_id"] = self._hook.repair_run(repair_json)
862
+ _handle_deferrable_databricks_operator_execution(self, self._hook, self.log, context)
863
+
864
+ def on_kill(self) -> None:
838
865
  if self.run_id:
839
866
  self._hook.cancel_run(self.run_id)
840
867
  self.log.info(
@@ -16,6 +16,7 @@
16
16
  # specific language governing permissions and limitations
17
17
  # under the License.
18
18
  """This module contains Databricks operators."""
19
+
19
20
  from __future__ import annotations
20
21
 
21
22
  import re
@@ -16,6 +16,7 @@
16
16
  # specific language governing permissions and limitations
17
17
  # under the License.
18
18
  """This module contains Databricks operators."""
19
+
19
20
  from __future__ import annotations
20
21
 
21
22
  import csv
@@ -207,9 +208,9 @@ class DatabricksCopyIntoOperator(BaseOperator):
207
208
  """
208
209
 
209
210
  template_fields: Sequence[str] = (
210
- "_file_location",
211
- "_files",
212
- "_table_name",
211
+ "file_location",
212
+ "files",
213
+ "table_name",
213
214
  "databricks_conn_id",
214
215
  )
215
216
 
@@ -249,17 +250,17 @@ class DatabricksCopyIntoOperator(BaseOperator):
249
250
  raise AirflowException("file_location shouldn't be empty")
250
251
  if file_format not in COPY_INTO_APPROVED_FORMATS:
251
252
  raise AirflowException(f"file_format '{file_format}' isn't supported")
252
- self._files = files
253
+ self.files = files
253
254
  self._pattern = pattern
254
255
  self._file_format = file_format
255
256
  self.databricks_conn_id = databricks_conn_id
256
257
  self._http_path = http_path
257
258
  self._sql_endpoint_name = sql_endpoint_name
258
259
  self.session_config = session_configuration
259
- self._table_name = table_name
260
+ self.table_name = table_name
260
261
  self._catalog = catalog
261
262
  self._schema = schema
262
- self._file_location = file_location
263
+ self.file_location = file_location
263
264
  self._expression_list = expression_list
264
265
  self._credential = credential
265
266
  self._storage_credential = storage_credential
@@ -313,14 +314,14 @@ class DatabricksCopyIntoOperator(BaseOperator):
313
314
  if self._credential is not None:
314
315
  maybe_credential = self._generate_options("CREDENTIAL", escaper, self._credential, False)
315
316
  maybe_with = f" WITH ({maybe_credential} {maybe_encryption})"
316
- location = escaper.escape_item(self._file_location) + maybe_with
317
+ location = escaper.escape_item(self.file_location) + maybe_with
317
318
  if self._expression_list is not None:
318
319
  location = f"(SELECT {self._expression_list} FROM {location})"
319
320
  files_or_pattern = ""
320
321
  if self._pattern is not None:
321
322
  files_or_pattern = f"PATTERN = {escaper.escape_item(self._pattern)}\n"
322
- elif self._files is not None:
323
- files_or_pattern = f"FILES = {escaper.escape_item(self._files)}\n"
323
+ elif self.files is not None:
324
+ files_or_pattern = f"FILES = {escaper.escape_item(self.files)}\n"
324
325
  format_options = self._generate_options("FORMAT_OPTIONS", escaper, self._format_options) + "\n"
325
326
  copy_options = self._generate_options("COPY_OPTIONS", escaper, self._copy_options) + "\n"
326
327
  storage_cred = ""
@@ -340,7 +341,7 @@ class DatabricksCopyIntoOperator(BaseOperator):
340
341
  else:
341
342
  raise AirflowException(f"Incorrect data type for validate parameter: {type(self._validate)}")
342
343
  # TODO: think on how to make sure that table_name and expression_list aren't used for SQL injection
343
- sql = f"""COPY INTO {self._table_name}{storage_cred}
344
+ sql = f"""COPY INTO {self.table_name}{storage_cred}
344
345
  FROM {location}
345
346
  FILEFORMAT = {self._file_format}
346
347
  {validation}{files_or_pattern}{format_options}{copy_options}
@@ -47,6 +47,7 @@ class DatabricksExecutionTrigger(BaseTrigger):
47
47
  retry_delay: int = 10,
48
48
  retry_args: dict[Any, Any] | None = None,
49
49
  run_page_url: str | None = None,
50
+ repair_run: bool = False,
50
51
  ) -> None:
51
52
  super().__init__()
52
53
  self.run_id = run_id
@@ -56,6 +57,7 @@ class DatabricksExecutionTrigger(BaseTrigger):
56
57
  self.retry_delay = retry_delay
57
58
  self.retry_args = retry_args
58
59
  self.run_page_url = run_page_url
60
+ self.repair_run = repair_run
59
61
  self.hook = DatabricksHook(
60
62
  databricks_conn_id,
61
63
  retry_limit=self.retry_limit,
@@ -74,6 +76,7 @@ class DatabricksExecutionTrigger(BaseTrigger):
74
76
  "retry_delay": self.retry_delay,
75
77
  "retry_args": self.retry_args,
76
78
  "run_page_url": self.run_page_url,
79
+ "repair_run": self.repair_run,
77
80
  },
78
81
  )
79
82
 
@@ -87,14 +90,15 @@ class DatabricksExecutionTrigger(BaseTrigger):
87
90
  "run_id": self.run_id,
88
91
  "run_page_url": self.run_page_url,
89
92
  "run_state": run_state.to_json(),
93
+ "repair_run": self.repair_run,
90
94
  }
91
95
  )
92
96
  return
93
- else:
94
- self.log.info(
95
- "run-id %s in run state %s. sleeping for %s seconds",
96
- self.run_id,
97
- run_state,
98
- self.polling_period_seconds,
99
- )
100
- await asyncio.sleep(self.polling_period_seconds)
97
+
98
+ self.log.info(
99
+ "run-id %s in run state %s. sleeping for %s seconds",
100
+ self.run_id,
101
+ run_state,
102
+ self.polling_period_seconds,
103
+ )
104
+ await asyncio.sleep(self.polling_period_seconds)
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
28
28
 
29
29
  [project]
30
30
  name = "apache-airflow-providers-databricks"
31
- version = "6.2.0"
31
+ version = "6.3.0"
32
32
  description = "Provider package apache-airflow-providers-databricks for Apache Airflow"
33
33
  readme = "README.rst"
34
34
  authors = [
@@ -51,6 +51,7 @@ classifiers = [
51
51
  "Programming Language :: Python :: 3.9",
52
52
  "Programming Language :: Python :: 3.10",
53
53
  "Programming Language :: Python :: 3.11",
54
+ "Programming Language :: Python :: 3.12",
54
55
  "Topic :: System :: Monitoring",
55
56
  ]
56
57
  requires-python = "~=3.8"
@@ -63,8 +64,8 @@ dependencies = [
63
64
  ]
64
65
 
65
66
  [project.urls]
66
- "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.2.0"
67
- "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.2.0/changelog.html"
67
+ "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.3.0"
68
+ "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.3.0/changelog.html"
68
69
  "Bug Tracker" = "https://github.com/apache/airflow/issues"
69
70
  "Source Code" = "https://github.com/apache/airflow"
70
71
  "Slack Chat" = "https://s.apache.org/airflow-slack"