apache-airflow-providers-databricks 6.0.0rc1__tar.gz → 6.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-databricks might be problematic. Click here for more details.

Files changed (21) hide show
  1. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/PKG-INFO +8 -8
  2. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/README.rst +3 -3
  3. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/__init__.py +1 -1
  4. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/get_provider_info.py +11 -9
  5. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/hooks/databricks.py +13 -2
  6. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/hooks/databricks_sql.py +8 -6
  7. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/operators/databricks.py +35 -12
  8. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/pyproject.toml +5 -5
  9. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/LICENSE +0 -0
  10. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/hooks/__init__.py +0 -0
  11. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/hooks/databricks_base.py +0 -0
  12. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/operators/__init__.py +0 -0
  13. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/operators/databricks_repos.py +0 -0
  14. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/operators/databricks_sql.py +0 -0
  15. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/sensors/__init__.py +0 -0
  16. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/sensors/databricks_partition.py +0 -0
  17. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/sensors/databricks_sql.py +0 -0
  18. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/triggers/__init__.py +0 -0
  19. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/triggers/databricks.py +0 -0
  20. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/utils/__init__.py +0 -0
  21. {apache_airflow_providers_databricks-6.0.0rc1 → apache_airflow_providers_databricks-6.1.0}/airflow/providers/databricks/utils/databricks.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-databricks
3
- Version: 6.0.0rc1
3
+ Version: 6.1.0
4
4
  Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
5
5
  Keywords: airflow-provider,databricks,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -21,15 +21,15 @@ Classifier: Programming Language :: Python :: 3.10
21
21
  Classifier: Programming Language :: Python :: 3.11
22
22
  Classifier: Topic :: System :: Monitoring
23
23
  Requires-Dist: aiohttp>=3.6.3, <4
24
- Requires-Dist: apache-airflow-providers-common-sql>=1.10.0.dev0
25
- Requires-Dist: apache-airflow>=2.6.0.dev0
24
+ Requires-Dist: apache-airflow-providers-common-sql>=1.10.0
25
+ Requires-Dist: apache-airflow>=2.6.0
26
26
  Requires-Dist: databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0
27
27
  Requires-Dist: requests>=2.27,<3
28
28
  Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
29
29
  Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
30
30
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
31
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.0.0/changelog.html
32
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.0.0
31
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.1.0/changelog.html
32
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.1.0
33
33
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
34
34
  Project-URL: Source Code, https://github.com/apache/airflow
35
35
  Project-URL: Twitter, https://twitter.com/ApacheAirflow
@@ -81,7 +81,7 @@ Provides-Extra: sdk
81
81
 
82
82
  Package ``apache-airflow-providers-databricks``
83
83
 
84
- Release: ``6.0.0.rc1``
84
+ Release: ``6.1.0``
85
85
 
86
86
 
87
87
  `Databricks <https://databricks.com/>`__
@@ -94,7 +94,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
94
94
  are in ``airflow.providers.databricks`` python package.
95
95
 
96
96
  You can find package information and changelog for the provider
97
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.0.0/>`_.
97
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.1.0/>`_.
98
98
 
99
99
  Installation
100
100
  ------------
@@ -138,4 +138,4 @@ Dependent package
138
138
  ============================================================================================================ ==============
139
139
 
140
140
  The changelog for the provider package can be found in the
141
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.0.0/changelog.html>`_.
141
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.1.0/changelog.html>`_.
@@ -42,7 +42,7 @@
42
42
 
43
43
  Package ``apache-airflow-providers-databricks``
44
44
 
45
- Release: ``6.0.0.rc1``
45
+ Release: ``6.1.0``
46
46
 
47
47
 
48
48
  `Databricks <https://databricks.com/>`__
@@ -55,7 +55,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
55
55
  are in ``airflow.providers.databricks`` python package.
56
56
 
57
57
  You can find package information and changelog for the provider
58
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.0.0/>`_.
58
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.1.0/>`_.
59
59
 
60
60
  Installation
61
61
  ------------
@@ -99,4 +99,4 @@ Dependent package
99
99
  ============================================================================================================ ==============
100
100
 
101
101
  The changelog for the provider package can be found in the
102
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.0.0/changelog.html>`_.
102
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.1.0/changelog.html>`_.
@@ -27,7 +27,7 @@ import packaging.version
27
27
 
28
28
  __all__ = ["__version__"]
29
29
 
30
- __version__ = "6.0.0"
30
+ __version__ = "6.1.0"
31
31
 
32
32
  try:
33
33
  from airflow import __version__ as airflow_version
@@ -27,9 +27,10 @@ def get_provider_info():
27
27
  "package-name": "apache-airflow-providers-databricks",
28
28
  "name": "Databricks",
29
29
  "description": "`Databricks <https://databricks.com/>`__\n",
30
- "suspended": False,
31
- "source-date-epoch": 1703288125,
30
+ "state": "ready",
31
+ "source-date-epoch": 1705912006,
32
32
  "versions": [
33
+ "6.1.0",
33
34
  "6.0.0",
34
35
  "5.0.1",
35
36
  "5.0.0",
@@ -70,6 +71,14 @@ def get_provider_info():
70
71
  "databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0",
71
72
  "aiohttp>=3.6.3, <4",
72
73
  ],
74
+ "additional-extras": [
75
+ {
76
+ "name": "sdk",
77
+ "description": "Install Databricks SDK",
78
+ "dependencies": ["databricks-sdk==0.10.0"],
79
+ }
80
+ ],
81
+ "devel-dependencies": ["deltalake>=0.12.0"],
73
82
  "integrations": [
74
83
  {
75
84
  "integration-name": "Databricks",
@@ -153,11 +162,4 @@ def get_provider_info():
153
162
  }
154
163
  ],
155
164
  "extra-links": ["airflow.providers.databricks.operators.databricks.DatabricksJobRunLink"],
156
- "additional-extras": [
157
- {
158
- "name": "sdk",
159
- "description": "Install Databricks SDK",
160
- "dependencies": ["databricks-sdk==0.10.0"],
161
- }
162
- ],
163
165
  }
@@ -519,13 +519,24 @@ class DatabricksHook(BaseDatabricksHook):
519
519
  json = {"run_id": run_id}
520
520
  self._do_api_call(DELETE_RUN_ENDPOINT, json)
521
521
 
522
- def repair_run(self, json: dict) -> None:
522
+ def repair_run(self, json: dict) -> int:
523
523
  """
524
524
  Re-run one or more tasks.
525
525
 
526
526
  :param json: repair a job run.
527
527
  """
528
- self._do_api_call(REPAIR_RUN_ENDPOINT, json)
528
+ response = self._do_api_call(REPAIR_RUN_ENDPOINT, json)
529
+ return response["repair_id"]
530
+
531
+ def get_latest_repair_id(self, run_id: int) -> int | None:
532
+ """Get latest repair id if any exist for run_id else None."""
533
+ json = {"run_id": run_id, "include_history": True}
534
+ response = self._do_api_call(GET_RUN_ENDPOINT, json)
535
+ repair_history = response["repair_history"]
536
+ if len(repair_history) == 1:
537
+ return None
538
+ else:
539
+ return repair_history[-1]["id"]
529
540
 
530
541
  def get_cluster_state(self, cluster_id: str) -> ClusterState:
531
542
  """
@@ -174,7 +174,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
174
174
  )
175
175
  return self._sql_conn
176
176
 
177
- @overload
177
+ @overload # type: ignore[override]
178
178
  def run(
179
179
  self,
180
180
  sql: str | Iterable[str],
@@ -249,7 +249,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
249
249
  self.set_autocommit(conn, autocommit)
250
250
 
251
251
  with closing(conn.cursor()) as cur:
252
- self._run_command(cur, sql_statement, parameters)
252
+ self._run_command(cur, sql_statement, parameters) # type: ignore[attr-defined]
253
253
  if handler is not None:
254
254
  raw_result = handler(cur)
255
255
  if self.return_tuple:
@@ -280,13 +280,15 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
280
280
  # instantiated namedtuple, and will never do: https://github.com/python/mypy/issues/848
281
281
  if isinstance(result, list):
282
282
  rows: list[Row] = result
283
- rows_fields = rows[0].__fields__
284
- rows_object = namedtuple("Row", rows_fields) # type: ignore[misc]
283
+ if not rows:
284
+ return []
285
+ rows_fields = tuple(rows[0].__fields__)
286
+ rows_object = namedtuple("Row", rows_fields, rename=True) # type: ignore
285
287
  return cast(List[tuple], [rows_object(*row) for row in rows])
286
288
  else:
287
289
  row: Row = result
288
- row_fields = row.__fields__
289
- row_object = namedtuple("Row", row_fields) # type: ignore[misc]
290
+ row_fields = tuple(row.__fields__)
291
+ row_object = namedtuple("Row", row_fields, rename=True) # type: ignore
290
292
  return cast(tuple, row_object(*row))
291
293
 
292
294
  def bulk_dump(self, table, tmp_file):
@@ -88,6 +88,19 @@ def _handle_databricks_operator_execution(operator, hook, log, context) -> None:
88
88
  f"{operator.task_id} failed with terminal state: {run_state} "
89
89
  f"and with the error {run_state.state_message}"
90
90
  )
91
+ if isinstance(operator, DatabricksRunNowOperator) and operator.repair_run:
92
+ operator.repair_run = False
93
+ log.warning(
94
+ "%s but since repair run is set, repairing the run with all failed tasks",
95
+ error_message,
96
+ )
97
+
98
+ latest_repair_id = hook.get_latest_repair_id(operator.run_id)
99
+ repair_json = {"run_id": operator.run_id, "rerun_all_failed_tasks": True}
100
+ if latest_repair_id is not None:
101
+ repair_json["latest_repair_id"] = latest_repair_id
102
+ operator.json["latest_repair_id"] = hook.repair_run(operator, repair_json)
103
+ _handle_databricks_operator_execution(operator, hook, log, context)
91
104
  raise AirflowException(error_message)
92
105
 
93
106
  else:
@@ -119,18 +132,24 @@ def _handle_deferrable_databricks_operator_execution(operator, hook, log, contex
119
132
  log.info("View run status, Spark UI, and logs at %s", run_page_url)
120
133
 
121
134
  if operator.wait_for_termination:
122
- operator.defer(
123
- trigger=DatabricksExecutionTrigger(
124
- run_id=operator.run_id,
125
- databricks_conn_id=operator.databricks_conn_id,
126
- polling_period_seconds=operator.polling_period_seconds,
127
- retry_limit=operator.databricks_retry_limit,
128
- retry_delay=operator.databricks_retry_delay,
129
- retry_args=operator.databricks_retry_args,
130
- run_page_url=run_page_url,
131
- ),
132
- method_name=DEFER_METHOD_NAME,
133
- )
135
+ run_info = hook.get_run(operator.run_id)
136
+ run_state = RunState(**run_info["state"])
137
+ if not run_state.is_terminal:
138
+ operator.defer(
139
+ trigger=DatabricksExecutionTrigger(
140
+ run_id=operator.run_id,
141
+ databricks_conn_id=operator.databricks_conn_id,
142
+ polling_period_seconds=operator.polling_period_seconds,
143
+ retry_limit=operator.databricks_retry_limit,
144
+ retry_delay=operator.databricks_retry_delay,
145
+ retry_args=operator.databricks_retry_args,
146
+ run_page_url=run_page_url,
147
+ ),
148
+ method_name=DEFER_METHOD_NAME,
149
+ )
150
+ else:
151
+ if run_state.is_successful:
152
+ log.info("%s completed successfully.", operator.task_id)
134
153
 
135
154
 
136
155
  def _handle_deferrable_databricks_operator_completion(event: dict, log: Logger) -> None:
@@ -623,6 +642,7 @@ class DatabricksRunNowOperator(BaseOperator):
623
642
  - ``jar_params``
624
643
  - ``spark_submit_params``
625
644
  - ``idempotency_token``
645
+ - ``repair_run``
626
646
 
627
647
  :param job_id: the job_id of the existing Databricks job.
628
648
  This field will be templated.
@@ -711,6 +731,7 @@ class DatabricksRunNowOperator(BaseOperator):
711
731
  :param do_xcom_push: Whether we should push run_id and run_page_url to xcom.
712
732
  :param wait_for_termination: if we should wait for termination of the job run. ``True`` by default.
713
733
  :param deferrable: Run operator in the deferrable mode.
734
+ :param repair_run: Repair the databricks run in case of failure, doesn't work in deferrable mode
714
735
  """
715
736
 
716
737
  # Used in airflow.models.BaseOperator
@@ -741,6 +762,7 @@ class DatabricksRunNowOperator(BaseOperator):
741
762
  do_xcom_push: bool = True,
742
763
  wait_for_termination: bool = True,
743
764
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
765
+ repair_run: bool = False,
744
766
  **kwargs,
745
767
  ) -> None:
746
768
  """Create a new ``DatabricksRunNowOperator``."""
@@ -753,6 +775,7 @@ class DatabricksRunNowOperator(BaseOperator):
753
775
  self.databricks_retry_args = databricks_retry_args
754
776
  self.wait_for_termination = wait_for_termination
755
777
  self.deferrable = deferrable
778
+ self.repair_run = repair_run
756
779
 
757
780
  if job_id is not None:
758
781
  self.json["job_id"] = job_id
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
28
28
 
29
29
  [project]
30
30
  name = "apache-airflow-providers-databricks"
31
- version = "6.0.0.rc1"
31
+ version = "6.1.0"
32
32
  description = "Provider package apache-airflow-providers-databricks for Apache Airflow"
33
33
  readme = "README.rst"
34
34
  authors = [
@@ -56,15 +56,15 @@ classifiers = [
56
56
  requires-python = "~=3.8"
57
57
  dependencies = [
58
58
  "aiohttp>=3.6.3, <4",
59
- "apache-airflow-providers-common-sql>=1.10.0.dev0",
60
- "apache-airflow>=2.6.0.dev0",
59
+ "apache-airflow-providers-common-sql>=1.10.0",
60
+ "apache-airflow>=2.6.0",
61
61
  "databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0",
62
62
  "requests>=2.27,<3",
63
63
  ]
64
64
 
65
65
  [project.urls]
66
- "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.0.0"
67
- "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.0.0/changelog.html"
66
+ "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.1.0"
67
+ "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.1.0/changelog.html"
68
68
  "Bug Tracker" = "https://github.com/apache/airflow/issues"
69
69
  "Source Code" = "https://github.com/apache/airflow"
70
70
  "Slack Chat" = "https://s.apache.org/airflow-slack"