apache-airflow-providers-databricks 5.1.0rc2__py3-none-any.whl → 6.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-databricks might be problematic. Click here for more details.

@@ -27,7 +27,7 @@ import packaging.version
27
27
 
28
28
  __all__ = ["__version__"]
29
29
 
30
- __version__ = "5.1.0"
30
+ __version__ = "6.0.0"
31
31
 
32
32
  try:
33
33
  from airflow import __version__ as airflow_version
@@ -28,9 +28,9 @@ def get_provider_info():
28
28
  "name": "Databricks",
29
29
  "description": "`Databricks <https://databricks.com/>`__\n",
30
30
  "suspended": False,
31
- "source-date-epoch": 1701983371,
31
+ "source-date-epoch": 1703288125,
32
32
  "versions": [
33
- "5.1.0",
33
+ "6.0.0",
34
34
  "5.0.1",
35
35
  "5.0.0",
36
36
  "4.7.0",
@@ -65,7 +65,7 @@ def get_provider_info():
65
65
  ],
66
66
  "dependencies": [
67
67
  "apache-airflow>=2.6.0",
68
- "apache-airflow-providers-common-sql>=1.8.1",
68
+ "apache-airflow-providers-common-sql>=1.10.0",
69
69
  "requests>=2.27,<3",
70
70
  "databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0",
71
71
  "aiohttp>=3.6.3, <4",
@@ -55,7 +55,7 @@ INSTALL_LIBS_ENDPOINT = ("POST", "api/2.0/libraries/install")
55
55
  UNINSTALL_LIBS_ENDPOINT = ("POST", "api/2.0/libraries/uninstall")
56
56
 
57
57
  LIST_JOBS_ENDPOINT = ("GET", "api/2.1/jobs/list")
58
- LIST_PIPELINES_ENDPOINT = ("GET", "/api/2.0/pipelines")
58
+ LIST_PIPELINES_ENDPOINT = ("GET", "api/2.0/pipelines")
59
59
 
60
60
  WORKSPACE_GET_STATUS_ENDPOINT = ("GET", "api/2.0/workspace/get-status")
61
61
 
@@ -322,8 +322,8 @@ class DatabricksHook(BaseDatabricksHook):
322
322
  payload["filter"] = filter
323
323
 
324
324
  while has_more:
325
- if next_token:
326
- payload["page_token"] = next_token
325
+ if next_token is not None:
326
+ payload = {**payload, "page_token": next_token}
327
327
  response = self._do_api_call(LIST_PIPELINES_ENDPOINT, payload)
328
328
  pipelines = response.get("statuses", [])
329
329
  all_pipelines += pipelines
@@ -345,11 +345,11 @@ class DatabricksHook(BaseDatabricksHook):
345
345
 
346
346
  if len(matching_pipelines) > 1:
347
347
  raise AirflowException(
348
- f"There are more than one job with name {pipeline_name}. "
348
+ f"There are more than one pipelines with name {pipeline_name}. "
349
349
  "Please delete duplicated pipelines first"
350
350
  )
351
351
 
352
- if not pipeline_name:
352
+ if not pipeline_name or len(matching_pipelines) == 0:
353
353
  return None
354
354
  else:
355
355
  return matching_pipelines[0]["pipeline_id"]
@@ -16,19 +16,32 @@
16
16
  # under the License.
17
17
  from __future__ import annotations
18
18
 
19
+ import warnings
20
+ from collections import namedtuple
19
21
  from contextlib import closing
20
22
  from copy import copy
21
- from typing import TYPE_CHECKING, Any, Callable, Iterable, Mapping, TypeVar, overload
23
+ from typing import (
24
+ TYPE_CHECKING,
25
+ Any,
26
+ Callable,
27
+ Iterable,
28
+ List,
29
+ Mapping,
30
+ Sequence,
31
+ TypeVar,
32
+ cast,
33
+ overload,
34
+ )
22
35
 
23
36
  from databricks import sql # type: ignore[attr-defined]
24
- from databricks.sql.types import Row
25
37
 
26
- from airflow.exceptions import AirflowException
38
+ from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
27
39
  from airflow.providers.common.sql.hooks.sql import DbApiHook, return_single_query_results
28
40
  from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHook
29
41
 
30
42
  if TYPE_CHECKING:
31
43
  from databricks.sql.client import Connection
44
+ from databricks.sql.types import Row
32
45
 
33
46
  LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "api/2.0/sql/endpoints")
34
47
 
@@ -52,6 +65,10 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
52
65
  on every request
53
66
  :param catalog: An optional initial catalog to use. Requires DBR version 9.0+
54
67
  :param schema: An optional initial schema to use. Requires DBR version 9.0+
68
+ :param return_tuple: Return a ``namedtuple`` object instead of a ``databricks.sql.Row`` object. Default
69
+ to False. In a future release of the provider, this will become True by default. This parameter
70
+ ensures backward-compatibility during the transition phase to common tuple objects for all hooks based
71
+ on DbApiHook. This flag will also be removed in a future release.
55
72
  :param kwargs: Additional parameters internal to Databricks SQL Connector parameters
56
73
  """
57
74
 
@@ -68,6 +85,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
68
85
  catalog: str | None = None,
69
86
  schema: str | None = None,
70
87
  caller: str = "DatabricksSqlHook",
88
+ return_tuple: bool = False,
71
89
  **kwargs,
72
90
  ) -> None:
73
91
  super().__init__(databricks_conn_id, caller=caller)
@@ -80,8 +98,18 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
80
98
  self.http_headers = http_headers
81
99
  self.catalog = catalog
82
100
  self.schema = schema
101
+ self.return_tuple = return_tuple
83
102
  self.additional_params = kwargs
84
103
 
104
+ if not self.return_tuple:
105
+ warnings.warn(
106
+ """Returning a raw `databricks.sql.Row` object is deprecated. A namedtuple will be
107
+ returned instead in a future release of the databricks provider. Set `return_tuple=True` to
108
+ enable this behavior.""",
109
+ AirflowProviderDeprecationWarning,
110
+ stacklevel=2,
111
+ )
112
+
85
113
  def _get_extra_config(self) -> dict[str, Any | None]:
86
114
  extra_params = copy(self.databricks_conn.extra_dejson)
87
115
  for arg in ["http_path", "session_configuration", *self.extra_parameters]:
@@ -167,7 +195,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
167
195
  handler: Callable[[Any], T] = ...,
168
196
  split_statements: bool = ...,
169
197
  return_last: bool = ...,
170
- ) -> T | list[T]:
198
+ ) -> tuple | list[tuple] | list[list[tuple] | tuple] | None:
171
199
  ...
172
200
 
173
201
  def run(
@@ -178,7 +206,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
178
206
  handler: Callable[[Any], T] | None = None,
179
207
  split_statements: bool = True,
180
208
  return_last: bool = True,
181
- ) -> T | list[T] | None:
209
+ ) -> tuple | list[tuple] | list[list[tuple] | tuple] | None:
182
210
  """
183
211
  Run a command or a list of commands.
184
212
 
@@ -223,7 +251,12 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
223
251
  with closing(conn.cursor()) as cur:
224
252
  self._run_command(cur, sql_statement, parameters)
225
253
  if handler is not None:
226
- result = self._make_serializable(handler(cur))
254
+ raw_result = handler(cur)
255
+ if self.return_tuple:
256
+ result = self._make_common_data_structure(raw_result)
257
+ else:
258
+ # Returning raw result is deprecated, and do not comply with current common.sql interface
259
+ result = raw_result # type: ignore[assignment]
227
260
  if return_single_query_results(sql, return_last, split_statements):
228
261
  results = [result]
229
262
  self.descriptions = [cur.description]
@@ -241,14 +274,20 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
241
274
  else:
242
275
  return results
243
276
 
244
- @staticmethod
245
- def _make_serializable(result):
246
- """Transform the databricks Row objects into JSON-serializable lists."""
277
+ def _make_common_data_structure(self, result: Sequence[Row] | Row) -> list[tuple] | tuple:
278
+ """Transform the databricks Row objects into namedtuple."""
279
+ # Below ignored lines respect namedtuple docstring, but mypy do not support dynamically
280
+ # instantiated namedtuple, and will never do: https://github.com/python/mypy/issues/848
247
281
  if isinstance(result, list):
248
- return [list(row) for row in result]
249
- elif isinstance(result, Row):
250
- return list(result)
251
- return result
282
+ rows: list[Row] = result
283
+ rows_fields = rows[0].__fields__
284
+ rows_object = namedtuple("Row", rows_fields) # type: ignore[misc]
285
+ return cast(List[tuple], [rows_object(*row) for row in rows])
286
+ else:
287
+ row: Row = result
288
+ row_fields = row.__fields__
289
+ row_object = namedtuple("Row", row_fields) # type: ignore[misc]
290
+ return cast(tuple, row_object(*row))
252
291
 
253
292
  def bulk_dump(self, table, tmp_file):
254
293
  raise NotImplementedError()
@@ -521,7 +521,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
521
521
  ):
522
522
  # If pipeline_id is not provided, we need to fetch it from the pipeline_name
523
523
  pipeline_name = self.json["pipeline_task"]["pipeline_name"]
524
- self.json["pipeline_task"]["pipeline_id"] = self._hook.get_pipeline_id(pipeline_name)
524
+ self.json["pipeline_task"]["pipeline_id"] = self._hook.find_pipeline_id_by_name(pipeline_name)
525
525
  del self.json["pipeline_task"]["pipeline_name"]
526
526
  json_normalised = normalise_json_content(self.json)
527
527
  self.run_id = self._hook.submit_run(json_normalised)
@@ -580,26 +580,20 @@ class DatabricksRunNowOperator(BaseOperator):
580
580
  For example ::
581
581
 
582
582
  json = {
583
- "job_id": 42,
584
- "notebook_params": {
585
- "dry-run": "true",
586
- "oldest-time-to-consider": "1457570074236"
587
- }
583
+ "job_id": 42,
584
+ "notebook_params": {"dry-run": "true", "oldest-time-to-consider": "1457570074236"},
588
585
  }
589
586
 
590
- notebook_run = DatabricksRunNowOperator(task_id='notebook_run', json=json)
587
+ notebook_run = DatabricksRunNowOperator(task_id="notebook_run", json=json)
591
588
 
592
589
  Another way to accomplish the same thing is to use the named parameters
593
590
  of the ``DatabricksRunNowOperator`` directly. Note that there is exactly
594
591
  one named parameter for each top level parameter in the ``run-now``
595
592
  endpoint. In this method, your code would look like this: ::
596
593
 
597
- job_id=42
594
+ job_id = 42
598
595
 
599
- notebook_params = {
600
- "dry-run": "true",
601
- "oldest-time-to-consider": "1457570074236"
602
- }
596
+ notebook_params = {"dry-run": "true", "oldest-time-to-consider": "1457570074236"}
603
597
 
604
598
  python_params = ["douglas adams", "42"]
605
599
 
@@ -612,7 +606,7 @@ class DatabricksRunNowOperator(BaseOperator):
612
606
  notebook_params=notebook_params,
613
607
  python_params=python_params,
614
608
  jar_params=jar_params,
615
- spark_submit_params=spark_submit_params
609
+ spark_submit_params=spark_submit_params,
616
610
  )
617
611
 
618
612
  In the case where both the json parameter **AND** the named parameters
@@ -113,6 +113,7 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
113
113
  "catalog": self.catalog,
114
114
  "schema": self.schema,
115
115
  "caller": "DatabricksSqlOperator",
116
+ "return_tuple": True,
116
117
  **self.client_parameters,
117
118
  **self.hook_params,
118
119
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-databricks
3
- Version: 5.1.0rc2
3
+ Version: 6.0.0rc1
4
4
  Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
5
5
  Keywords: airflow-provider,databricks,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -21,15 +21,15 @@ Classifier: Programming Language :: Python :: 3.10
21
21
  Classifier: Programming Language :: Python :: 3.11
22
22
  Classifier: Topic :: System :: Monitoring
23
23
  Requires-Dist: aiohttp>=3.6.3, <4
24
- Requires-Dist: apache-airflow-providers-common-sql>=1.8.1.dev0
24
+ Requires-Dist: apache-airflow-providers-common-sql>=1.10.0.dev0
25
25
  Requires-Dist: apache-airflow>=2.6.0.dev0
26
26
  Requires-Dist: databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0
27
27
  Requires-Dist: requests>=2.27,<3
28
28
  Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
29
29
  Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
30
30
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
31
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/5.1.0/changelog.html
32
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/5.1.0
31
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.0.0/changelog.html
32
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.0.0
33
33
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
34
34
  Project-URL: Source Code, https://github.com/apache/airflow
35
35
  Project-URL: Twitter, https://twitter.com/ApacheAirflow
@@ -81,7 +81,7 @@ Provides-Extra: sdk
81
81
 
82
82
  Package ``apache-airflow-providers-databricks``
83
83
 
84
- Release: ``5.1.0.rc2``
84
+ Release: ``6.0.0.rc1``
85
85
 
86
86
 
87
87
  `Databricks <https://databricks.com/>`__
@@ -94,7 +94,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
94
94
  are in ``airflow.providers.databricks`` python package.
95
95
 
96
96
  You can find package information and changelog for the provider
97
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/5.1.0/>`_.
97
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.0.0/>`_.
98
98
 
99
99
  Installation
100
100
  ------------
@@ -112,7 +112,7 @@ Requirements
112
112
  PIP package Version required
113
113
  ======================================= ==========================
114
114
  ``apache-airflow`` ``>=2.6.0``
115
- ``apache-airflow-providers-common-sql`` ``>=1.8.1``
115
+ ``apache-airflow-providers-common-sql`` ``>=1.10.0``
116
116
  ``requests`` ``>=2.27,<3``
117
117
  ``databricks-sql-connector`` ``>=2.0.0,!=2.9.0,<3.0.0``
118
118
  ``aiohttp`` ``>=3.6.3,<4``
@@ -138,4 +138,4 @@ Dependent package
138
138
  ============================================================================================================ ==============
139
139
 
140
140
  The changelog for the provider package can be found in the
141
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/5.1.0/changelog.html>`_.
141
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.0.0/changelog.html>`_.
@@ -1,14 +1,14 @@
1
1
  airflow/providers/databricks/LICENSE,sha256=ywUBpKZc7Jb96rVt5I3IDbg7dIJAbUSHkuoDcF3jbH4,13569
2
- airflow/providers/databricks/__init__.py,sha256=JsaG1QZ78eKQBM9TcObkVPwxFuVOGUUkNq8GW0V6rYA,1585
3
- airflow/providers/databricks/get_provider_info.py,sha256=Fv4b-vDzk-i2J31xahNNm4cvVzVUkz10jGF7iJSecuc,6104
2
+ airflow/providers/databricks/__init__.py,sha256=UcvoLm-UfmmEOWyZjjBQ8rf5a5aUs9Uoge2HmRJKwi0,1585
3
+ airflow/providers/databricks/get_provider_info.py,sha256=tCIb8m8RY9FOo5YKewgLcpntbi4SWCNppvrx3qBycLo,6105
4
4
  airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
5
- airflow/providers/databricks/hooks/databricks.py,sha256=wq7uiSU_WbBY8yrlI2Zm6l12t9YP0MgbBMeHe0zuWe4,22542
5
+ airflow/providers/databricks/hooks/databricks.py,sha256=GeqiBYFsFIwK4vgBTxde-bbdDliWzQAEbvZcShbyuio,22604
6
6
  airflow/providers/databricks/hooks/databricks_base.py,sha256=nX-40P0XKZyFyahkbbImvWmoCQ4ic35viAYlFIupUHE,30591
7
- airflow/providers/databricks/hooks/databricks_sql.py,sha256=PVyN2VYnJR2kmPUJdxeFCChPR-B3R0kpUDDKh4slvOA,10474
7
+ airflow/providers/databricks/hooks/databricks_sql.py,sha256=afzw9l1VNBFvReEheAUSrGgXc7FWePcwuYw9-qMLJUg,12445
8
8
  airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
9
- airflow/providers/databricks/operators/databricks.py,sha256=afG7vCIT8_JwTK7k1gXlgQDTsJj9JichlC2bP9XmHg0,40065
9
+ airflow/providers/databricks/operators/databricks.py,sha256=XmQuoWzesOYpwAtxn3aB-1CihbnT8W67WoxUoAh0Bnc,40012
10
10
  airflow/providers/databricks/operators/databricks_repos.py,sha256=NUxa0jvvmK16CDKb-7Tbs3wF9XoFi1AVJlKxlsE3r4k,13092
11
- airflow/providers/databricks/operators/databricks_sql.py,sha256=UlO9vtAJnThZq0hIvzDYbeCqQWqseeDnA1-UMSPQQnc,16772
11
+ airflow/providers/databricks/operators/databricks_sql.py,sha256=C_XqR5SN_g2t0XXlH248nPAC7BcYB0P1NAsPJT5qcEg,16806
12
12
  airflow/providers/databricks/sensors/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
13
13
  airflow/providers/databricks/sensors/databricks_partition.py,sha256=TRZVjGEYzIbN4nZDPJEVavXdcegpyj0BVKXINMRFlCM,10605
14
14
  airflow/providers/databricks/sensors/databricks_sql.py,sha256=8qSfbzpWgU2_oZU9pS9SA_MSYhyIHgYZjTYfDkDH84Y,5771
@@ -16,7 +16,7 @@ airflow/providers/databricks/triggers/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvR
16
16
  airflow/providers/databricks/triggers/databricks.py,sha256=Qj9mB0bNYRY_toPEU17gxbxmPkkT3P789kCHu_T64BA,3997
17
17
  airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
18
18
  airflow/providers/databricks/utils/databricks.py,sha256=iRzRHvdFETGiFxZccOjfC8NGgDofMfP35Tqp3M5CGr0,2880
19
- apache_airflow_providers_databricks-5.1.0rc2.dist-info/entry_points.txt,sha256=1WxGXTFDb107eV5Zmrt3p12J4LHYk56-ZKlvpOK7vg4,106
20
- apache_airflow_providers_databricks-5.1.0rc2.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
21
- apache_airflow_providers_databricks-5.1.0rc2.dist-info/METADATA,sha256=ehi83rkKMnM6yuXZRDHumEbmc6OYB4rSOGoGvT36M68,6442
22
- apache_airflow_providers_databricks-5.1.0rc2.dist-info/RECORD,,
19
+ apache_airflow_providers_databricks-6.0.0rc1.dist-info/entry_points.txt,sha256=1WxGXTFDb107eV5Zmrt3p12J4LHYk56-ZKlvpOK7vg4,106
20
+ apache_airflow_providers_databricks-6.0.0rc1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
21
+ apache_airflow_providers_databricks-6.0.0rc1.dist-info/METADATA,sha256=Sd37-KPVkHCMIBl1KPqZAE9xUslg0pKMojaZ6SibGog,6444
22
+ apache_airflow_providers_databricks-6.0.0rc1.dist-info/RECORD,,