databricks-sdk 0.32.3__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of databricks-sdk might be problematic. Click here for more details.

databricks/sdk/core.py CHANGED
@@ -1,19 +1,13 @@
1
1
  import re
2
- from datetime import timedelta
3
- from types import TracebackType
4
- from typing import Any, BinaryIO, Iterator, Type
2
+ from typing import BinaryIO
5
3
  from urllib.parse import urlencode
6
4
 
7
- from requests.adapters import HTTPAdapter
8
-
9
- from .casing import Casing
5
+ from ._base_client import _BaseClient
10
6
  from .config import *
11
7
  # To preserve backwards compatibility (as these definitions were previously in this module)
12
8
  from .credentials_provider import *
13
- from .errors import DatabricksError, _ErrorCustomizer, _Parser
14
- from .logger import RoundTrip
9
+ from .errors import DatabricksError, _ErrorCustomizer
15
10
  from .oauth import retrieve_token
16
- from .retries import retried
17
11
 
18
12
  __all__ = ['Config', 'DatabricksError']
19
13
 
@@ -25,53 +19,19 @@ OIDC_TOKEN_PATH = "/oidc/v1/token"
25
19
 
26
20
 
27
21
  class ApiClient:
28
- _cfg: Config
29
- _RETRY_AFTER_DEFAULT: int = 1
30
-
31
- def __init__(self, cfg: Config = None):
32
-
33
- if cfg is None:
34
- cfg = Config()
35
22
 
23
+ def __init__(self, cfg: Config):
36
24
  self._cfg = cfg
37
- # See https://github.com/databricks/databricks-sdk-go/blob/main/client/client.go#L34-L35
38
- self._debug_truncate_bytes = cfg.debug_truncate_bytes if cfg.debug_truncate_bytes else 96
39
- self._retry_timeout_seconds = cfg.retry_timeout_seconds if cfg.retry_timeout_seconds else 300
40
- self._user_agent_base = cfg.user_agent
41
- self._session = requests.Session()
42
- self._session.auth = self._authenticate
43
-
44
- # Number of urllib3 connection pools to cache before discarding the least
45
- # recently used pool. Python requests default value is 10.
46
- pool_connections = cfg.max_connection_pools
47
- if pool_connections is None:
48
- pool_connections = 20
49
-
50
- # The maximum number of connections to save in the pool. Improves performance
51
- # in multithreaded situations. For now, we're setting it to the same value
52
- # as connection_pool_size.
53
- pool_maxsize = cfg.max_connections_per_pool
54
- if cfg.max_connections_per_pool is None:
55
- pool_maxsize = pool_connections
56
-
57
- # If pool_block is False, then more connections will are created,
58
- # but not saved after the first use. Blocks when no free connections are available.
59
- # urllib3 ensures that no more than pool_maxsize connections are used at a time.
60
- # Prevents platform from flooding. By default, requests library doesn't block.
61
- pool_block = True
62
-
63
- # We don't use `max_retries` from HTTPAdapter to align with a more production-ready
64
- # retry strategy established in the Databricks SDK for Go. See _is_retryable and
65
- # @retried for more details.
66
- http_adapter = HTTPAdapter(pool_connections=pool_connections,
67
- pool_maxsize=pool_maxsize,
68
- pool_block=pool_block)
69
- self._session.mount("https://", http_adapter)
70
-
71
- # Default to 60 seconds
72
- self._http_timeout_seconds = cfg.http_timeout_seconds if cfg.http_timeout_seconds else 60
73
-
74
- self._error_parser = _Parser(extra_error_customizers=[_AddDebugErrorCustomizer(cfg)])
25
+ self._api_client = _BaseClient(debug_truncate_bytes=cfg.debug_truncate_bytes,
26
+ retry_timeout_seconds=cfg.retry_timeout_seconds,
27
+ user_agent_base=cfg.user_agent,
28
+ header_factory=cfg.authenticate,
29
+ max_connection_pools=cfg.max_connection_pools,
30
+ max_connections_per_pool=cfg.max_connections_per_pool,
31
+ pool_block=True,
32
+ http_timeout_seconds=cfg.http_timeout_seconds,
33
+ extra_error_customizers=[_AddDebugErrorCustomizer(cfg)],
34
+ clock=cfg.clock)
75
35
 
76
36
  @property
77
37
  def account_id(self) -> str:
@@ -81,40 +41,6 @@ class ApiClient:
81
41
  def is_account_client(self) -> bool:
82
42
  return self._cfg.is_account_client
83
43
 
84
- def _authenticate(self, r: requests.PreparedRequest) -> requests.PreparedRequest:
85
- headers = self._cfg.authenticate()
86
- for k, v in headers.items():
87
- r.headers[k] = v
88
- return r
89
-
90
- @staticmethod
91
- def _fix_query_string(query: Optional[dict] = None) -> Optional[dict]:
92
- # Convert True -> "true" for Databricks APIs to understand booleans.
93
- # See: https://github.com/databricks/databricks-sdk-py/issues/142
94
- if query is None:
95
- return None
96
- with_fixed_bools = {k: v if type(v) != bool else ('true' if v else 'false') for k, v in query.items()}
97
-
98
- # Query parameters may be nested, e.g.
99
- # {'filter_by': {'user_ids': [123, 456]}}
100
- # The HTTP-compatible representation of this is
101
- # filter_by.user_ids=123&filter_by.user_ids=456
102
- # To achieve this, we convert the above dictionary to
103
- # {'filter_by.user_ids': [123, 456]}
104
- # See the following for more information:
105
- # https://cloud.google.com/endpoints/docs/grpc-service-config/reference/rpc/google.api#google.api.HttpRule
106
- def flatten_dict(d: Dict[str, Any]) -> Dict[str, Any]:
107
- for k1, v1 in d.items():
108
- if isinstance(v1, dict):
109
- v1 = dict(flatten_dict(v1))
110
- for k2, v2 in v1.items():
111
- yield f"{k1}.{k2}", v2
112
- else:
113
- yield k1, v1
114
-
115
- flattened = dict(flatten_dict(with_fixed_bools))
116
- return flattened
117
-
118
44
  def get_oauth_token(self, auth_details: str) -> Token:
119
45
  if not self._cfg.auth_type:
120
46
  self._cfg.authenticate()
@@ -142,115 +68,22 @@ class ApiClient:
142
68
  files=None,
143
69
  data=None,
144
70
  auth: Callable[[requests.PreparedRequest], requests.PreparedRequest] = None,
145
- response_headers: List[str] = None) -> Union[dict, BinaryIO]:
146
- if headers is None:
147
- headers = {}
71
+ response_headers: List[str] = None) -> Union[dict, list, BinaryIO]:
148
72
  if url is None:
149
73
  # Remove extra `/` from path for Files API
150
74
  # Once we've fixed the OpenAPI spec, we can remove this
151
75
  path = re.sub('^/api/2.0/fs/files//', '/api/2.0/fs/files/', path)
152
76
  url = f"{self._cfg.host}{path}"
153
- headers['User-Agent'] = self._user_agent_base
154
- retryable = retried(timeout=timedelta(seconds=self._retry_timeout_seconds),
155
- is_retryable=self._is_retryable,
156
- clock=self._cfg.clock)
157
- response = retryable(self._perform)(method,
158
- url,
159
- query=query,
160
- headers=headers,
161
- body=body,
162
- raw=raw,
163
- files=files,
164
- data=data,
165
- auth=auth)
166
-
167
- resp = dict()
168
- for header in response_headers if response_headers else []:
169
- resp[header] = response.headers.get(Casing.to_header_case(header))
170
- if raw:
171
- resp["contents"] = StreamingResponse(response)
172
- return resp
173
- if not len(response.content):
174
- return resp
175
-
176
- jsonResponse = response.json()
177
- if jsonResponse is None:
178
- return resp
179
-
180
- if isinstance(jsonResponse, list):
181
- return jsonResponse
182
-
183
- return {**resp, **jsonResponse}
184
-
185
- @staticmethod
186
- def _is_retryable(err: BaseException) -> Optional[str]:
187
- # this method is Databricks-specific port of urllib3 retries
188
- # (see https://github.com/urllib3/urllib3/blob/main/src/urllib3/util/retry.py)
189
- # and Databricks SDK for Go retries
190
- # (see https://github.com/databricks/databricks-sdk-go/blob/main/apierr/errors.go)
191
- from urllib3.exceptions import ProxyError
192
- if isinstance(err, ProxyError):
193
- err = err.original_error
194
- if isinstance(err, requests.ConnectionError):
195
- # corresponds to `connection reset by peer` and `connection refused` errors from Go,
196
- # which are generally related to the temporary glitches in the networking stack,
197
- # also caused by endpoint protection software, like ZScaler, to drop connections while
198
- # not yet authenticated.
199
- #
200
- # return a simple string for debug log readability, as `raise TimeoutError(...) from err`
201
- # will bubble up the original exception in case we reach max retries.
202
- return f'cannot connect'
203
- if isinstance(err, requests.Timeout):
204
- # corresponds to `TLS handshake timeout` and `i/o timeout` in Go.
205
- #
206
- # return a simple string for debug log readability, as `raise TimeoutError(...) from err`
207
- # will bubble up the original exception in case we reach max retries.
208
- return f'timeout'
209
- if isinstance(err, DatabricksError):
210
- message = str(err)
211
- transient_error_string_matches = [
212
- "com.databricks.backend.manager.util.UnknownWorkerEnvironmentException",
213
- "does not have any associated worker environments", "There is no worker environment with id",
214
- "Unknown worker environment", "ClusterNotReadyException", "Unexpected error",
215
- "Please try again later or try a faster operation.",
216
- "RPC token bucket limit has been exceeded",
217
- ]
218
- for substring in transient_error_string_matches:
219
- if substring not in message:
220
- continue
221
- return f'matched {substring}'
222
- return None
223
-
224
- def _perform(self,
225
- method: str,
226
- url: str,
227
- query: dict = None,
228
- headers: dict = None,
229
- body: dict = None,
230
- raw: bool = False,
231
- files=None,
232
- data=None,
233
- auth: Callable[[requests.PreparedRequest], requests.PreparedRequest] = None):
234
- response = self._session.request(method,
235
- url,
236
- params=self._fix_query_string(query),
237
- json=body,
238
- headers=headers,
239
- files=files,
240
- data=data,
241
- auth=auth,
242
- stream=raw,
243
- timeout=self._http_timeout_seconds)
244
- self._record_request_log(response, raw=raw or data is not None or files is not None)
245
- error = self._error_parser.get_api_error(response)
246
- if error is not None:
247
- raise error from None
248
- return response
249
-
250
- def _record_request_log(self, response: requests.Response, raw: bool = False) -> None:
251
- if not logger.isEnabledFor(logging.DEBUG):
252
- return
253
- logger.debug(RoundTrip(response, self._cfg.debug_headers, self._debug_truncate_bytes, raw).generate())
77
+ return self._api_client.do(method=method,
78
+ url=url,
79
+ query=query,
80
+ headers=headers,
81
+ body=body,
82
+ raw=raw,
83
+ files=files,
84
+ data=data,
85
+ auth=auth,
86
+ response_headers=response_headers)
254
87
 
255
88
 
256
89
  class _AddDebugErrorCustomizer(_ErrorCustomizer):
@@ -264,103 +97,3 @@ class _AddDebugErrorCustomizer(_ErrorCustomizer):
264
97
  if response.status_code in (401, 403):
265
98
  message = kwargs.get('message', 'request failed')
266
99
  kwargs['message'] = self._cfg.wrap_debug_info(message)
267
-
268
-
269
- class StreamingResponse(BinaryIO):
270
- _response: requests.Response
271
- _buffer: bytes
272
- _content: Union[Iterator[bytes], None]
273
- _chunk_size: Union[int, None]
274
- _closed: bool = False
275
-
276
- def fileno(self) -> int:
277
- pass
278
-
279
- def flush(self) -> int:
280
- pass
281
-
282
- def __init__(self, response: requests.Response, chunk_size: Union[int, None] = None):
283
- self._response = response
284
- self._buffer = b''
285
- self._content = None
286
- self._chunk_size = chunk_size
287
-
288
- def _open(self) -> None:
289
- if self._closed:
290
- raise ValueError("I/O operation on closed file")
291
- if not self._content:
292
- self._content = self._response.iter_content(chunk_size=self._chunk_size)
293
-
294
- def __enter__(self) -> BinaryIO:
295
- self._open()
296
- return self
297
-
298
- def set_chunk_size(self, chunk_size: Union[int, None]) -> None:
299
- self._chunk_size = chunk_size
300
-
301
- def close(self) -> None:
302
- self._response.close()
303
- self._closed = True
304
-
305
- def isatty(self) -> bool:
306
- return False
307
-
308
- def read(self, n: int = -1) -> bytes:
309
- self._open()
310
- read_everything = n < 0
311
- remaining_bytes = n
312
- res = b''
313
- while remaining_bytes > 0 or read_everything:
314
- if len(self._buffer) == 0:
315
- try:
316
- self._buffer = next(self._content)
317
- except StopIteration:
318
- break
319
- bytes_available = len(self._buffer)
320
- to_read = bytes_available if read_everything else min(remaining_bytes, bytes_available)
321
- res += self._buffer[:to_read]
322
- self._buffer = self._buffer[to_read:]
323
- remaining_bytes -= to_read
324
- return res
325
-
326
- def readable(self) -> bool:
327
- return self._content is not None
328
-
329
- def readline(self, __limit: int = ...) -> bytes:
330
- raise NotImplementedError()
331
-
332
- def readlines(self, __hint: int = ...) -> List[bytes]:
333
- raise NotImplementedError()
334
-
335
- def seek(self, __offset: int, __whence: int = ...) -> int:
336
- raise NotImplementedError()
337
-
338
- def seekable(self) -> bool:
339
- return False
340
-
341
- def tell(self) -> int:
342
- raise NotImplementedError()
343
-
344
- def truncate(self, __size: Union[int, None] = ...) -> int:
345
- raise NotImplementedError()
346
-
347
- def writable(self) -> bool:
348
- return False
349
-
350
- def write(self, s: Union[bytes, bytearray]) -> int:
351
- raise NotImplementedError()
352
-
353
- def writelines(self, lines: Iterable[bytes]) -> None:
354
- raise NotImplementedError()
355
-
356
- def __next__(self) -> bytes:
357
- return self.read(1)
358
-
359
- def __iter__(self) -> Iterator[bytes]:
360
- return self._content
361
-
362
- def __exit__(self, t: Union[Type[BaseException], None], value: Union[BaseException, None],
363
- traceback: Union[TracebackType, None]) -> None:
364
- self._content = None
365
- self._buffer = b''
366
- self.close()
@@ -167,7 +167,7 @@ class _DbfsIO(BinaryIO):
167
167
  return f"<_DbfsIO {self._path} {'read' if self.readable() else 'write'}=True>"
168
168
 
169
169
 
170
- class _FilesIO(BinaryIO):
170
+ class _VolumesIO(BinaryIO):
171
171
 
172
172
  def __init__(self, api: files.FilesAPI, path: str, *, read: bool, write: bool, overwrite: bool):
173
173
  self._buffer = []
@@ -262,7 +262,7 @@ class _FilesIO(BinaryIO):
262
262
  self.close()
263
263
 
264
264
  def __repr__(self) -> str:
265
- return f"<_FilesIO {self._path} {'read' if self.readable() else 'write'}=True>"
265
+ return f"<_VolumesIO {self._path} {'read' if self.readable() else 'write'}=True>"
266
266
 
267
267
 
268
268
  class _Path(ABC):
@@ -398,7 +398,7 @@ class _LocalPath(_Path):
398
398
  return f'<_LocalPath {self._path}>'
399
399
 
400
400
 
401
- class _FilesPath(_Path):
401
+ class _VolumesPath(_Path):
402
402
 
403
403
  def __init__(self, api: files.FilesAPI, src: Union[str, pathlib.Path]):
404
404
  self._path = pathlib.PurePosixPath(str(src).replace('dbfs:', '').replace('file:', ''))
@@ -411,7 +411,7 @@ class _FilesPath(_Path):
411
411
  return False
412
412
 
413
413
  def child(self, path: str) -> Self:
414
- return _FilesPath(self._api, str(self._path / path))
414
+ return _VolumesPath(self._api, str(self._path / path))
415
415
 
416
416
  def _is_dir(self) -> bool:
417
417
  try:
@@ -431,7 +431,7 @@ class _FilesPath(_Path):
431
431
  return self.is_dir
432
432
 
433
433
  def open(self, *, read=False, write=False, overwrite=False) -> BinaryIO:
434
- return _FilesIO(self._api, self.as_string, read=read, write=write, overwrite=overwrite)
434
+ return _VolumesIO(self._api, self.as_string, read=read, write=write, overwrite=overwrite)
435
435
 
436
436
  def list(self, *, recursive=False) -> Generator[files.FileInfo, None, None]:
437
437
  if not self.is_dir:
@@ -458,13 +458,13 @@ class _FilesPath(_Path):
458
458
  def delete(self, *, recursive=False):
459
459
  if self.is_dir:
460
460
  for entry in self.list(recursive=False):
461
- _FilesPath(self._api, entry.path).delete(recursive=True)
461
+ _VolumesPath(self._api, entry.path).delete(recursive=True)
462
462
  self._api.delete_directory(self.as_string)
463
463
  else:
464
464
  self._api.delete(self.as_string)
465
465
 
466
466
  def __repr__(self) -> str:
467
- return f'<_FilesPath {self._path}>'
467
+ return f'<_VolumesPath {self._path}>'
468
468
 
469
469
 
470
470
  class _DbfsPath(_Path):
@@ -589,8 +589,8 @@ class DbfsExt(files.DbfsAPI):
589
589
  'UC Volumes paths, not external locations or DBFS mount points.')
590
590
  if src.scheme == 'file':
591
591
  return _LocalPath(src.geturl())
592
- if src.path.startswith(('/Volumes', '/Models')):
593
- return _FilesPath(self._files_api, src.geturl())
592
+ if src.path.startswith('/Volumes'):
593
+ return _VolumesPath(self._files_api, src.geturl())
594
594
  return _DbfsPath(self._dbfs_api, src.geturl())
595
595
 
596
596
  def copy(self, src: str, dst: str, *, recursive=False, overwrite=False):