databricks-sdk 0.37.0__py3-none-any.whl → 0.39.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of databricks-sdk might be problematic. Click here for more details.

Files changed (33) hide show
  1. databricks/sdk/__init__.py +24 -2
  2. databricks/sdk/_base_client.py +61 -14
  3. databricks/sdk/config.py +10 -9
  4. databricks/sdk/credentials_provider.py +6 -5
  5. databricks/sdk/mixins/jobs.py +49 -0
  6. databricks/sdk/mixins/open_ai_client.py +2 -2
  7. databricks/sdk/service/apps.py +185 -4
  8. databricks/sdk/service/billing.py +248 -1
  9. databricks/sdk/service/catalog.py +1943 -46
  10. databricks/sdk/service/cleanrooms.py +1281 -0
  11. databricks/sdk/service/compute.py +1486 -8
  12. databricks/sdk/service/dashboards.py +336 -11
  13. databricks/sdk/service/files.py +162 -2
  14. databricks/sdk/service/iam.py +353 -2
  15. databricks/sdk/service/jobs.py +1281 -16
  16. databricks/sdk/service/marketplace.py +688 -0
  17. databricks/sdk/service/ml.py +1038 -2
  18. databricks/sdk/service/oauth2.py +176 -0
  19. databricks/sdk/service/pipelines.py +602 -15
  20. databricks/sdk/service/provisioning.py +402 -0
  21. databricks/sdk/service/serving.py +615 -0
  22. databricks/sdk/service/settings.py +1190 -3
  23. databricks/sdk/service/sharing.py +328 -2
  24. databricks/sdk/service/sql.py +1186 -2
  25. databricks/sdk/service/vectorsearch.py +290 -0
  26. databricks/sdk/service/workspace.py +453 -1
  27. databricks/sdk/version.py +1 -1
  28. {databricks_sdk-0.37.0.dist-info → databricks_sdk-0.39.0.dist-info}/METADATA +26 -26
  29. {databricks_sdk-0.37.0.dist-info → databricks_sdk-0.39.0.dist-info}/RECORD +33 -31
  30. {databricks_sdk-0.37.0.dist-info → databricks_sdk-0.39.0.dist-info}/WHEEL +1 -1
  31. {databricks_sdk-0.37.0.dist-info → databricks_sdk-0.39.0.dist-info}/LICENSE +0 -0
  32. {databricks_sdk-0.37.0.dist-info → databricks_sdk-0.39.0.dist-info}/NOTICE +0 -0
  33. {databricks_sdk-0.37.0.dist-info → databricks_sdk-0.39.0.dist-info}/top_level.txt +0 -0
@@ -6,6 +6,7 @@ from databricks.sdk import azure
6
6
  from databricks.sdk.credentials_provider import CredentialsStrategy
7
7
  from databricks.sdk.mixins.compute import ClustersExt
8
8
  from databricks.sdk.mixins.files import DbfsExt
9
+ from databricks.sdk.mixins.jobs import JobsExt
9
10
  from databricks.sdk.mixins.open_ai_client import ServingEndpointsExt
10
11
  from databricks.sdk.mixins.workspace import WorkspaceExt
11
12
  from databricks.sdk.service.apps import AppsAPI
@@ -27,6 +28,9 @@ from databricks.sdk.service.catalog import (AccountMetastoreAssignmentsAPI,
27
28
  TableConstraintsAPI, TablesAPI,
28
29
  TemporaryTableCredentialsAPI,
29
30
  VolumesAPI, WorkspaceBindingsAPI)
31
+ from databricks.sdk.service.cleanrooms import (CleanRoomAssetsAPI,
32
+ CleanRoomsAPI,
33
+ CleanRoomTaskRunsAPI)
30
34
  from databricks.sdk.service.compute import (ClusterPoliciesAPI, ClustersAPI,
31
35
  CommandExecutionAPI,
32
36
  GlobalInitScriptsAPI,
@@ -175,6 +179,9 @@ class WorkspaceClient:
175
179
  self._apps = AppsAPI(self._api_client)
176
180
  self._artifact_allowlists = ArtifactAllowlistsAPI(self._api_client)
177
181
  self._catalogs = CatalogsAPI(self._api_client)
182
+ self._clean_room_assets = CleanRoomAssetsAPI(self._api_client)
183
+ self._clean_room_task_runs = CleanRoomTaskRunsAPI(self._api_client)
184
+ self._clean_rooms = CleanRoomsAPI(self._api_client)
178
185
  self._cluster_policies = ClusterPoliciesAPI(self._api_client)
179
186
  self._clusters = ClustersExt(self._api_client)
180
187
  self._command_execution = CommandExecutionAPI(self._api_client)
@@ -204,7 +211,7 @@ class WorkspaceClient:
204
211
  self._instance_pools = InstancePoolsAPI(self._api_client)
205
212
  self._instance_profiles = InstanceProfilesAPI(self._api_client)
206
213
  self._ip_access_lists = IpAccessListsAPI(self._api_client)
207
- self._jobs = JobsAPI(self._api_client)
214
+ self._jobs = JobsExt(self._api_client)
208
215
  self._lakeview = LakeviewAPI(self._api_client)
209
216
  self._libraries = LibrariesAPI(self._api_client)
210
217
  self._metastores = MetastoresAPI(self._api_client)
@@ -304,6 +311,21 @@ class WorkspaceClient:
304
311
  """A catalog is the first layer of Unity Catalog’s three-level namespace."""
305
312
  return self._catalogs
306
313
 
314
+ @property
315
+ def clean_room_assets(self) -> CleanRoomAssetsAPI:
316
+ """Clean room assets are data and code objects — Tables, volumes, and notebooks that are shared with the clean room."""
317
+ return self._clean_room_assets
318
+
319
+ @property
320
+ def clean_room_task_runs(self) -> CleanRoomTaskRunsAPI:
321
+ """Clean room task runs are the executions of notebooks in a clean room."""
322
+ return self._clean_room_task_runs
323
+
324
+ @property
325
+ def clean_rooms(self) -> CleanRoomsAPI:
326
+ """A clean room uses Delta Sharing and serverless compute to provide a secure and privacy-protecting environment where multiple parties can work together on sensitive enterprise data without direct access to each other’s data."""
327
+ return self._clean_rooms
328
+
307
329
  @property
308
330
  def cluster_policies(self) -> ClusterPoliciesAPI:
309
331
  """You can use cluster policies to control users' ability to configure clusters based on a set of rules."""
@@ -450,7 +472,7 @@ class WorkspaceClient:
450
472
  return self._ip_access_lists
451
473
 
452
474
  @property
453
- def jobs(self) -> JobsAPI:
475
+ def jobs(self) -> JobsExt:
454
476
  """The Jobs API allows you to create, edit, and delete jobs."""
455
477
  return self._jobs
456
478
 
@@ -1,3 +1,4 @@
1
+ import io
1
2
  import logging
2
3
  import urllib.parse
3
4
  from datetime import timedelta
@@ -50,7 +51,8 @@ class _BaseClient:
50
51
  http_timeout_seconds: float = None,
51
52
  extra_error_customizers: List[_ErrorCustomizer] = None,
52
53
  debug_headers: bool = False,
53
- clock: Clock = None):
54
+ clock: Clock = None,
55
+ streaming_buffer_size: int = 1024 * 1024): # 1MB
54
56
  """
55
57
  :param debug_truncate_bytes:
56
58
  :param retry_timeout_seconds:
@@ -68,6 +70,7 @@ class _BaseClient:
68
70
  :param extra_error_customizers:
69
71
  :param debug_headers: Whether to include debug headers in the request log.
70
72
  :param clock: Clock object to use for time-related operations.
73
+ :param streaming_buffer_size: The size of the buffer to use for streaming responses.
71
74
  """
72
75
 
73
76
  self._debug_truncate_bytes = debug_truncate_bytes or 96
@@ -78,6 +81,7 @@ class _BaseClient:
78
81
  self._clock = clock or RealClock()
79
82
  self._session = requests.Session()
80
83
  self._session.auth = self._authenticate
84
+ self._streaming_buffer_size = streaming_buffer_size
81
85
 
82
86
  # We don't use `max_retries` from HTTPAdapter to align with a more production-ready
83
87
  # retry strategy established in the Databricks SDK for Go. See _is_retryable and
@@ -127,6 +131,14 @@ class _BaseClient:
127
131
  flattened = dict(flatten_dict(with_fixed_bools))
128
132
  return flattened
129
133
 
134
+ @staticmethod
135
+ def _is_seekable_stream(data) -> bool:
136
+ if data is None:
137
+ return False
138
+ if not isinstance(data, io.IOBase):
139
+ return False
140
+ return data.seekable()
141
+
130
142
  def do(self,
131
143
  method: str,
132
144
  url: str,
@@ -141,24 +153,39 @@ class _BaseClient:
141
153
  if headers is None:
142
154
  headers = {}
143
155
  headers['User-Agent'] = self._user_agent_base
144
- retryable = retried(timeout=timedelta(seconds=self._retry_timeout_seconds),
145
- is_retryable=self._is_retryable,
146
- clock=self._clock)
147
- response = retryable(self._perform)(method,
148
- url,
149
- query=query,
150
- headers=headers,
151
- body=body,
152
- raw=raw,
153
- files=files,
154
- data=data,
155
- auth=auth)
156
+
157
+ # Wrap strings and bytes in a seekable stream so that we can rewind them.
158
+ if isinstance(data, (str, bytes)):
159
+ data = io.BytesIO(data.encode('utf-8') if isinstance(data, str) else data)
160
+
161
+ # Only retry if the request is not a stream or if the stream is seekable and
162
+ # we can rewind it. This is necessary to avoid bugs where the retry doesn't
163
+ # re-read already read data from the body.
164
+ if data is not None and not self._is_seekable_stream(data):
165
+ logger.debug(f"Retry disabled for non-seekable stream: type={type(data)}")
166
+ call = self._perform
167
+ else:
168
+ call = retried(timeout=timedelta(seconds=self._retry_timeout_seconds),
169
+ is_retryable=self._is_retryable,
170
+ clock=self._clock)(self._perform)
171
+
172
+ response = call(method,
173
+ url,
174
+ query=query,
175
+ headers=headers,
176
+ body=body,
177
+ raw=raw,
178
+ files=files,
179
+ data=data,
180
+ auth=auth)
156
181
 
157
182
  resp = dict()
158
183
  for header in response_headers if response_headers else []:
159
184
  resp[header] = response.headers.get(Casing.to_header_case(header))
160
185
  if raw:
161
- resp["contents"] = _StreamingResponse(response)
186
+ streaming_response = _StreamingResponse(response)
187
+ streaming_response.set_chunk_size(self._streaming_buffer_size)
188
+ resp["contents"] = streaming_response
162
189
  return resp
163
190
  if not len(response.content):
164
191
  return resp
@@ -221,6 +248,12 @@ class _BaseClient:
221
248
  files=None,
222
249
  data=None,
223
250
  auth: Callable[[requests.PreparedRequest], requests.PreparedRequest] = None):
251
+ # Keep track of the initial position of the stream so that we can rewind it if
252
+ # we need to retry the request.
253
+ initial_data_position = 0
254
+ if self._is_seekable_stream(data):
255
+ initial_data_position = data.tell()
256
+
224
257
  response = self._session.request(method,
225
258
  url,
226
259
  params=self._fix_query_string(query),
@@ -232,9 +265,18 @@ class _BaseClient:
232
265
  stream=raw,
233
266
  timeout=self._http_timeout_seconds)
234
267
  self._record_request_log(response, raw=raw or data is not None or files is not None)
268
+
235
269
  error = self._error_parser.get_api_error(response)
236
270
  if error is not None:
271
+ # If the request body is a seekable stream, rewind it so that it is ready
272
+ # to be read again in case of a retry.
273
+ #
274
+ # TODO: This should be moved into a "before-retry" hook to avoid one
275
+ # unnecessary seek on the last failed retry before aborting.
276
+ if self._is_seekable_stream(data):
277
+ data.seek(initial_data_position)
237
278
  raise error from None
279
+
238
280
  return response
239
281
 
240
282
  def _record_request_log(self, response: requests.Response, raw: bool = False) -> None:
@@ -283,6 +325,11 @@ class _StreamingResponse(BinaryIO):
283
325
  return False
284
326
 
285
327
  def read(self, n: int = -1) -> bytes:
328
+ """
329
+ Read up to n bytes from the response stream. If n is negative, read
330
+ until the end of the stream.
331
+ """
332
+
286
333
  self._open()
287
334
  read_everything = n < 0
288
335
  remaining_bytes = n
databricks/sdk/config.py CHANGED
@@ -92,15 +92,16 @@ class Config:
92
92
  max_connections_per_pool: int = ConfigAttribute()
93
93
  databricks_environment: Optional[DatabricksEnvironment] = None
94
94
 
95
- def __init__(self,
96
- *,
97
- # Deprecated. Use credentials_strategy instead.
98
- credentials_provider: Optional[CredentialsStrategy] = None,
99
- credentials_strategy: Optional[CredentialsStrategy] = None,
100
- product=None,
101
- product_version=None,
102
- clock: Optional[Clock] = None,
103
- **kwargs):
95
+ def __init__(
96
+ self,
97
+ *,
98
+ # Deprecated. Use credentials_strategy instead.
99
+ credentials_provider: Optional[CredentialsStrategy] = None,
100
+ credentials_strategy: Optional[CredentialsStrategy] = None,
101
+ product=None,
102
+ product_version=None,
103
+ clock: Optional[Clock] = None,
104
+ **kwargs):
104
105
  self._header_factory = None
105
106
  self._inner = {}
106
107
  self._user_agent_other_info = []
@@ -304,11 +304,12 @@ def github_oidc_azure(cfg: 'Config') -> Optional[CredentialsProvider]:
304
304
  # detect Azure AD Tenant ID if it's not specified directly
305
305
  token_endpoint = cfg.oidc_endpoints.token_endpoint
306
306
  cfg.azure_tenant_id = token_endpoint.replace(aad_endpoint, '').split('/')[0]
307
- inner = ClientCredentials(client_id=cfg.azure_client_id,
308
- client_secret="", # we have no (rotatable) secrets in OIDC flow
309
- token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token",
310
- endpoint_params=params,
311
- use_params=True)
307
+ inner = ClientCredentials(
308
+ client_id=cfg.azure_client_id,
309
+ client_secret="", # we have no (rotatable) secrets in OIDC flow
310
+ token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token",
311
+ endpoint_params=params,
312
+ use_params=True)
312
313
 
313
314
  def refreshed_headers() -> Dict[str, str]:
314
315
  token = inner.token()
@@ -0,0 +1,49 @@
1
+ from typing import Optional
2
+
3
+ from databricks.sdk.service import jobs
4
+
5
+
6
+ class JobsExt(jobs.JobsAPI):
7
+
8
+ def get_run(self,
9
+ run_id: int,
10
+ *,
11
+ include_history: Optional[bool] = None,
12
+ include_resolved_values: Optional[bool] = None,
13
+ page_token: Optional[str] = None) -> jobs.Run:
14
+ """
15
+ This method fetches the details of a run identified by `run_id`. If the run has multiple pages of tasks or iterations,
16
+ it will paginate through all pages and aggregate the results.
17
+ :param run_id: int
18
+ The canonical identifier of the run for which to retrieve the metadata. This field is required.
19
+ :param include_history: bool (optional)
20
+ Whether to include the repair history in the response.
21
+ :param include_resolved_values: bool (optional)
22
+ Whether to include resolved parameter values in the response.
23
+ :param page_token: str (optional)
24
+ To list the next page or the previous page of job tasks, set this field to the value of the
25
+ `next_page_token` or `prev_page_token` returned in the GetJob response.
26
+ :returns: :class:`Run`
27
+ """
28
+ run = super().get_run(run_id,
29
+ include_history=include_history,
30
+ include_resolved_values=include_resolved_values,
31
+ page_token=page_token)
32
+
33
+ # When querying a Job run, a page token is returned when there are more than 100 tasks. No iterations are defined for a Job run. Therefore, the next page in the response only includes the next page of tasks.
34
+ # When querying a ForEach task run, a page token is returned when there are more than 100 iterations. Only a single task is returned, corresponding to the ForEach task itself. Therefore, the client only reads the iterations from the next page and not the tasks.
35
+ is_paginating_iterations = run.iterations is not None and len(run.iterations) > 0
36
+
37
+ while run.next_page_token is not None:
38
+ next_run = super().get_run(run_id,
39
+ include_history=include_history,
40
+ include_resolved_values=include_resolved_values,
41
+ page_token=run.next_page_token)
42
+ if is_paginating_iterations:
43
+ run.iterations.extend(next_run.iterations)
44
+ else:
45
+ run.tasks.extend(next_run.tasks)
46
+ run.next_page_token = next_run.next_page_token
47
+
48
+ run.prev_page_token = None
49
+ return run
@@ -29,7 +29,7 @@ class ServingEndpointsExt(ServingEndpointsAPI):
29
29
  from openai import OpenAI
30
30
  except Exception:
31
31
  raise ImportError(
32
- "Open AI is not installed. Please install the Databricks SDK with the following command `pip isntall databricks-sdk[openai]`"
32
+ "Open AI is not installed. Please install the Databricks SDK with the following command `pip install databricks-sdk[openai]`"
33
33
  )
34
34
 
35
35
  return OpenAI(
@@ -42,7 +42,7 @@ class ServingEndpointsExt(ServingEndpointsAPI):
42
42
  from langchain_openai import ChatOpenAI
43
43
  except Exception:
44
44
  raise ImportError(
45
- "Langchain Open AI is not installed. Please install the Databricks SDK with the following command `pip isntall databricks-sdk[openai]` and ensure you are using python>3.7"
45
+ "Langchain Open AI is not installed. Please install the Databricks SDK with the following command `pip install databricks-sdk[openai]` and ensure you are using python>3.7"
46
46
  )
47
47
 
48
48
  return ChatOpenAI(