databricks-sdk 0.26.0__py3-none-any.whl → 0.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of databricks-sdk might be problematic. Click here for more details.

@@ -1,15 +1,20 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import base64
4
+ import os
4
5
  import pathlib
5
6
  import shutil
6
7
  import sys
7
8
  from abc import ABC, abstractmethod
9
+ from collections import deque
10
+ from io import BytesIO
8
11
  from types import TracebackType
9
- from typing import TYPE_CHECKING, AnyStr, BinaryIO, Iterable, Iterator, Type
10
-
11
- from databricks.sdk.core import DatabricksError
12
+ from typing import (TYPE_CHECKING, AnyStr, BinaryIO, Generator, Iterable,
13
+ Iterator, Type, Union)
14
+ from urllib import parse
12
15
 
16
+ from .._property import _cached_property
17
+ from ..errors import NotFound
13
18
  from ..service import files
14
19
 
15
20
  if TYPE_CHECKING:
@@ -161,19 +166,135 @@ class _DbfsIO(BinaryIO):
161
166
  return f"<_DbfsIO {self._path} {'read' if self.readable() else 'write'}=True>"
162
167
 
163
168
 
169
+ class _VolumesIO(BinaryIO):
170
+
171
+ def __init__(self, api: files.FilesAPI, path: str, *, read: bool, write: bool, overwrite: bool):
172
+ self._buffer = []
173
+ self._api = api
174
+ self._path = path
175
+ self._read = read
176
+ self._write = write
177
+ self._overwrite = overwrite
178
+ self._closed = False
179
+ self._read_handle = None
180
+ self._offset = 0
181
+
182
+ def __enter__(self):
183
+ if self._read:
184
+ self.__open_read()
185
+ return self
186
+
187
+ def close(self):
188
+ if self._closed:
189
+ return
190
+ if self._write:
191
+ to_write = b''.join(self._buffer)
192
+ self._api.upload(self._path, contents=BytesIO(to_write), overwrite=self._overwrite)
193
+ elif self._read:
194
+ self._read_handle.close()
195
+ self._closed = True
196
+
197
+ def fileno(self) -> int:
198
+ return 0
199
+
200
+ def flush(self):
201
+ raise NotImplementedError()
202
+
203
+ def isatty(self) -> bool:
204
+ return False
205
+
206
+ def __check_closed(self):
207
+ if self._closed:
208
+ raise ValueError('I/O operation on closed file')
209
+
210
+ def __open_read(self):
211
+ if self._read_handle is None:
212
+ self._read_handle = self._api.download(self._path).contents
213
+
214
+ def read(self, __n=...):
215
+ self.__check_closed()
216
+ self.__open_read()
217
+ return self._read_handle.read(__n)
218
+
219
+ def readable(self):
220
+ return self._read
221
+
222
+ def readline(self, __limit=...):
223
+ raise NotImplementedError()
224
+
225
+ def readlines(self, __hint=...):
226
+ raise NotImplementedError()
227
+
228
+ def seek(self, __offset, __whence=...):
229
+ raise NotImplementedError()
230
+
231
+ def seekable(self):
232
+ return False
233
+
234
+ def tell(self):
235
+ if self._read_handle is not None:
236
+ return self._read_handle.tell()
237
+ return self._offset
238
+
239
+ def truncate(self, __size=...):
240
+ raise NotImplementedError()
241
+
242
+ def writable(self):
243
+ return self._write
244
+
245
+ def write(self, __s):
246
+ self.__check_closed()
247
+ self._buffer.append(__s)
248
+
249
+ def writelines(self, __lines):
250
+ raise NotImplementedError()
251
+
252
+ def __next__(self):
253
+ self.__check_closed()
254
+ return self._read_handle.__next__()
255
+
256
+ def __iter__(self):
257
+ self.__check_closed()
258
+ return self._read_handle.__iter__()
259
+
260
+ def __exit__(self, __t, __value, __traceback):
261
+ self.close()
262
+
263
+ def __repr__(self) -> str:
264
+ return f"<_VolumesIO {self._path} {'read' if self.readable() else 'write'}=True>"
265
+
266
+
164
267
  class _Path(ABC):
165
268
 
269
+ def __init__(self, path: str):
270
+ self._path = pathlib.Path(str(path).replace('dbfs:', '').replace('file:', ''))
271
+
166
272
  @property
167
- @abstractmethod
168
273
  def is_local(self) -> bool:
274
+ return self._is_local()
275
+
276
+ @abstractmethod
277
+ def _is_local(self) -> bool:
169
278
  ...
170
279
 
280
+ @property
281
+ def is_dbfs(self) -> bool:
282
+ return self._is_dbfs()
283
+
171
284
  @abstractmethod
172
- def child(self, path: str) -> str:
285
+ def _is_dbfs(self) -> bool:
173
286
  ...
174
287
 
175
288
  @abstractmethod
289
+ def child(self, path: str) -> str:
290
+ ...
291
+
292
+ @_cached_property
176
293
  def is_dir(self) -> bool:
294
+ return self._is_dir()
295
+
296
+ @abstractmethod
297
+ def _is_dir(self) -> bool:
177
298
  ...
178
299
 
179
300
  @abstractmethod
@@ -184,19 +305,17 @@ class _Path(ABC):
184
305
  def open(self, *, read=False, write=False, overwrite=False):
185
306
  ...
186
307
 
308
+ def list(self, *, recursive=False) -> Generator[files.FileInfo, None, None]:
309
+ ...
310
+
187
311
  @abstractmethod
188
- def list_opened_handles(self, *, recursive=False) -> Iterator[(str, BinaryIO)]:
312
+ def mkdir(self):
189
313
  ...
190
314
 
191
315
  @abstractmethod
192
316
  def delete(self, *, recursive=False):
193
317
  ...
194
318
 
195
- def _with_path(self, src: str):
196
- # create sanitized path representation, so that
197
- # we can have clean child paths in list_opened_handles()
198
- self._path = pathlib.Path(str(src).replace('dbfs:', '').replace('file:', ''))
199
-
200
319
  @property
201
320
  def name(self) -> str:
202
321
  return self._path.name
@@ -208,19 +327,21 @@ class _Path(ABC):
208
327
 
209
328
  class _LocalPath(_Path):
210
329
 
211
- def __init__(self, src: str):
212
- self._with_path(src)
213
-
214
- @property
215
- def is_local(self) -> bool:
330
+ def _is_local(self) -> bool:
216
331
  return True
217
332
 
333
+ def _is_dbfs(self) -> bool:
334
+ return False
335
+
218
336
  def child(self, path: str) -> Self:
219
337
  return _LocalPath(str(self._path / path))
220
338
 
221
- def is_dir(self) -> bool:
339
+ def _is_dir(self) -> bool:
222
340
  return self._path.is_dir()
223
341
 
342
+ def mkdir(self):
343
+ self._path.mkdir(mode=0o755, parents=True, exist_ok=True)
344
+
224
345
  def exists(self) -> bool:
225
346
  return self._path.exists()
226
347
 
@@ -229,78 +350,168 @@ class _LocalPath(_Path):
229
350
  self._path.parent.mkdir(mode=0o755, parents=True, exist_ok=True)
230
351
  return self._path.open(mode='wb' if overwrite else 'rb' if read else 'xb')
231
352
 
232
- def _list_local(self, recursive=False):
233
- queue = [self._path]
353
+ def list(self, recursive=False) -> Generator[files.FileInfo, None, None]:
354
+ if not self.is_dir:
355
+ st = self._path.stat()
356
+ yield files.FileInfo(path='file:' + str(self._path.absolute()),
357
+ is_dir=False,
358
+ file_size=st.st_size,
359
+ modification_time=int(st.st_mtime_ns / 1e6),
360
+ )
361
+ return
362
+ queue = deque([self._path])
234
363
  while queue:
235
- path, queue = queue[0], queue[1:]
364
+ path = queue.popleft()
236
365
  for leaf in path.iterdir():
237
366
  if leaf.is_dir():
238
367
  if recursive:
239
368
  queue.append(leaf)
240
369
  continue
241
- yield leaf
242
-
243
- def list_opened_handles(self, *, recursive=False) -> Iterator[(str, BinaryIO)]:
244
- for leaf in self._list_local(recursive):
245
- if not recursive and leaf.is_dir:
246
- continue
247
- with leaf.open('rb') as handle:
248
- child = str(leaf).replace(str(self._path) + '/', '')
249
- yield child, handle
370
+ info = leaf.stat()
371
+ yield files.FileInfo(path='file:' + str(leaf.absolute()),
372
+ is_dir=False,
373
+ file_size=info.st_size,
374
+ modification_time=int(info.st_mtime_ns / 1e6),
375
+ )
250
376
 
251
377
  def delete(self, *, recursive=False):
252
- if self.is_dir():
378
+ if self.is_dir:
253
379
  if recursive:
254
- for leaf in self._list_local(True):
255
- kw = {}
256
- if sys.version_info[:2] > (3, 7):
257
- # Python3.7 does not support `missing_ok` keyword
258
- kw['missing_ok'] = True
259
- leaf.unlink(**kw)
380
+ for leaf in self.list(recursive=True):
381
+ _LocalPath(leaf.path).delete()
260
382
  self._path.rmdir()
261
- return
262
- self._path.unlink()
383
+ else:
384
+ kw = {}
385
+ if sys.version_info[:2] > (3, 7):
386
+ kw['missing_ok'] = True
387
+ self._path.unlink(**kw)
263
388
 
264
389
  def __repr__(self) -> str:
265
390
  return f'<_LocalPath {self._path}>'
266
391
 
267
392
 
393
+ class _VolumesPath(_Path):
394
+
395
+ def __init__(self, api: files.FilesAPI, src: Union[str, pathlib.Path]):
396
+ super().__init__(src)
397
+ self._api = api
398
+
399
+ def _is_local(self) -> bool:
400
+ return False
401
+
402
+ def _is_dbfs(self) -> bool:
403
+ return False
404
+
405
+ def child(self, path: str) -> Self:
406
+ return _VolumesPath(self._api, str(self._path / path))
407
+
408
+ def _is_dir(self) -> bool:
409
+ try:
410
+ self._api.get_directory_metadata(self.as_string)
411
+ return True
412
+ except NotFound:
413
+ return False
414
+
415
+ def mkdir(self):
416
+ self._api.create_directory(self.as_string)
417
+
418
+ def exists(self) -> bool:
419
+ try:
420
+ self._api.get_metadata(self.as_string)
421
+ return True
422
+ except NotFound:
423
+ return self.is_dir
424
+
425
+ def open(self, *, read=False, write=False, overwrite=False) -> BinaryIO:
426
+ return _VolumesIO(self._api, self.as_string, read=read, write=write, overwrite=overwrite)
427
+
428
+ def list(self, *, recursive=False) -> Generator[files.FileInfo, None, None]:
429
+ if not self.is_dir:
430
+ meta = self._api.get_metadata(self.as_string)
431
+ yield files.FileInfo(path=self.as_string,
432
+ is_dir=False,
433
+ file_size=meta.content_length,
434
+ modification_time=meta.last_modified,
435
+ )
436
+ return
437
+ queue = deque([self])
438
+ while queue:
439
+ next_path = queue.popleft()
440
+ for file in self._api.list_directory_contents(next_path.as_string):
441
+ if recursive and file.is_directory:
442
+ queue.append(self.child(file.name))
443
+ if not recursive or not file.is_directory:
444
+ yield files.FileInfo(path=file.path,
445
+ is_dir=file.is_directory,
446
+ file_size=file.file_size,
447
+ modification_time=file.last_modified,
448
+ )
449
+
450
+ def delete(self, *, recursive=False):
451
+ if self.is_dir:
452
+ for entry in self.list(recursive=False):
453
+ _VolumesPath(self._api, entry.path).delete(recursive=True)
454
+ self._api.delete_directory(self.as_string)
455
+ else:
456
+ self._api.delete(self.as_string)
457
+
458
+ def __repr__(self) -> str:
459
+ return f'<_VolumesPath {self._path}>'
460
+
461
+
268
462
  class _DbfsPath(_Path):
269
463
 
270
- def __init__(self, api: 'DbfsExt', src: str):
271
- self._with_path(src)
464
+ def __init__(self, api: files.DbfsAPI, src: str):
465
+ super().__init__(src)
272
466
  self._api = api
273
467
 
274
- @property
275
- def is_local(self) -> bool:
468
+ def _is_local(self) -> bool:
276
469
  return False
277
470
 
471
+ def _is_dbfs(self) -> bool:
472
+ return True
473
+
278
474
  def child(self, path: str) -> Self:
279
475
  child = self._path / path
280
476
  return _DbfsPath(self._api, str(child))
281
477
 
282
- def is_dir(self) -> bool:
478
+ def _is_dir(self) -> bool:
283
479
  try:
284
480
  remote = self._api.get_status(self.as_string)
285
481
  return remote.is_dir
286
- except DatabricksError as e:
287
- if e.error_code == 'RESOURCE_DOES_NOT_EXIST':
288
- return False
289
- raise e
482
+ except NotFound:
483
+ return False
484
+
485
+ def mkdir(self):
486
+ self._api.mkdirs(self.as_string)
290
487
 
291
488
  def exists(self) -> bool:
292
- return self._api.exists(self.as_string)
489
+ try:
490
+ self._api.get_status(self.as_string)
491
+ return True
492
+ except NotFound:
493
+ return False
293
494
 
294
495
  def open(self, *, read=False, write=False, overwrite=False) -> BinaryIO:
295
- return self._api.open(self.as_string, read=read, write=write, overwrite=overwrite)
296
-
297
- def list_opened_handles(self, *, recursive=False) -> Iterator[(str, BinaryIO)]:
298
- for file in self._api.list(self.as_string, recursive=recursive):
299
- if not recursive and file.is_dir:
300
- continue
301
- with self._api.open(file.path, read=True) as handle:
302
- child = file.path.replace(str(self._path) + '/', '').replace('dbfs:', '')
303
- yield child, handle
496
+ return _DbfsIO(self._api, self.as_string, read=read, write=write, overwrite=overwrite)
497
+
498
+ def list(self, *, recursive=False) -> Generator[files.FileInfo, None, None]:
499
+ if not self.is_dir:
500
+ meta = self._api.get_status(self.as_string)
501
+ yield files.FileInfo(path=self.as_string,
502
+ is_dir=False,
503
+ file_size=meta.file_size,
504
+ modification_time=meta.modification_time,
505
+ )
506
+ return
507
+ queue = deque([self])
508
+ while queue:
509
+ next_path = queue.popleft()
510
+ for file in self._api.list(next_path.as_string):
511
+ if recursive and file.is_dir:
512
+ queue.append(self.child(file.path))
513
+ if not recursive or not file.is_dir:
514
+ yield file
304
515
 
305
516
  def delete(self, *, recursive=False):
306
517
  self._api.delete(self.as_string, recursive=recursive)
@@ -312,8 +523,18 @@ class _DbfsPath(_Path):
312
523
  class DbfsExt(files.DbfsAPI):
313
524
  __doc__ = files.DbfsAPI.__doc__
314
525
 
315
- def open(self, path: str, *, read: bool = False, write: bool = False, overwrite: bool = False) -> _DbfsIO:
316
- return _DbfsIO(self, path, read=read, write=write, overwrite=overwrite)
526
+ def __init__(self, api_client):
527
+ super().__init__(api_client)
528
+ self._files_api = files.FilesAPI(api_client)
529
+ self._dbfs_api = files.DbfsAPI(api_client)
530
+
531
+ def open(self,
532
+ path: str,
533
+ *,
534
+ read: bool = False,
535
+ write: bool = False,
536
+ overwrite: bool = False) -> BinaryIO:
537
+ return self._path(path).open(read=read, write=write, overwrite=overwrite)
317
538
 
318
539
  def upload(self, path: str, src: BinaryIO, *, overwrite: bool = False):
319
540
  """Upload file to DBFS"""
@@ -333,34 +554,36 @@ class DbfsExt(files.DbfsAPI):
333
554
  When calling list on a large directory, the list operation will time out after approximately 60
334
555
  seconds.
335
556
 
557
+ :param path: the DBFS or UC Volume path to list
336
558
  :param recursive: traverse deep into directory tree
337
559
  :returns iterator of metadata for every file
338
560
  """
339
- queue = [path]
340
- while queue:
341
- path, queue = queue[0], queue[1:]
342
- for file_info in super().list(path):
343
- if recursive and file_info.is_dir:
344
- queue.append(file_info.path)
345
- continue
346
- yield file_info
561
+ p = self._path(path)
562
+ yield from p.list(recursive=recursive)
563
+
564
+ def mkdirs(self, path: str):
565
+ """Create directory on DBFS"""
566
+ p = self._path(path)
567
+ p.mkdir()
347
568
 
348
569
  def exists(self, path: str) -> bool:
349
570
  """If file exists on DBFS"""
350
- # TODO: check if we want to put it to errors module to prevent circular import
351
- from databricks.sdk.core import DatabricksError
352
- try:
353
- self.get_status(path)
354
- return True
355
- except DatabricksError as e:
356
- if e.error_code == 'RESOURCE_DOES_NOT_EXIST':
357
- return False
358
- raise e
571
+ p = self._path(path)
572
+ return p.exists()
573
+
574
+ __ALLOWED_SCHEMES = [None, 'file', 'dbfs']
359
575
 
360
576
  def _path(self, src):
361
- if str(src).startswith('file:'):
362
- return _LocalPath(src)
363
- return _DbfsPath(self, src)
577
+ src = parse.urlparse(str(src))
578
+ if src.scheme and src.scheme not in self.__ALLOWED_SCHEMES:
579
+ raise ValueError(
580
+ f'unsupported scheme "{src.scheme}". DBUtils in the SDK only supports local, root DBFS, and '
581
+ 'UC Volumes paths, not external locations or DBFS mount points.')
582
+ if src.scheme == 'file':
583
+ return _LocalPath(src.geturl())
584
+ if src.path.startswith('/Volumes'):
585
+ return _VolumesPath(self._files_api, src.geturl())
586
+ return _DbfsPath(self._dbfs_api, src.geturl())
364
587
 
365
588
  def copy(self, src: str, dst: str, *, recursive=False, overwrite=False):
366
589
  """Copy files between DBFS and local filesystems"""
@@ -368,32 +591,40 @@ class DbfsExt(files.DbfsAPI):
368
591
  dst = self._path(dst)
369
592
  if src.is_local and dst.is_local:
370
593
  raise IOError('both destinations are on local FS')
371
- if dst.exists() and dst.is_dir():
594
+ if dst.exists() and dst.is_dir:
372
595
  # if target is a folder, make file with the same name there
373
596
  dst = dst.child(src.name)
374
- if not src.is_dir():
375
- # copy single file
376
- with src.open(read=True) as reader:
377
- with dst.open(write=True, overwrite=overwrite) as writer:
597
+ if src.is_dir:
598
+ queue = [self._path(x.path) for x in src.list(recursive=recursive) if not x.is_dir]
599
+ else:
600
+ queue = [src]
601
+ for child in queue:
602
+ child_dst = dst.child(os.path.relpath(child.as_string, src.as_string))
603
+ with child.open(read=True) as reader:
604
+ with child_dst.open(write=True, overwrite=overwrite) as writer:
378
605
  shutil.copyfileobj(reader, writer, length=_DbfsIO.MAX_CHUNK_SIZE)
379
- return
380
- # iterate through files
381
- for child, reader in src.list_opened_handles(recursive=recursive):
382
- with dst.child(child).open(write=True, overwrite=overwrite) as writer:
383
- shutil.copyfileobj(reader, writer, length=_DbfsIO.MAX_CHUNK_SIZE)
384
606
 
385
607
  def move_(self, src: str, dst: str, *, recursive=False, overwrite=False):
386
608
  """Move files between local and DBFS systems"""
387
609
  source = self._path(src)
388
610
  target = self._path(dst)
389
- if not source.is_local and not target.is_local:
611
+ if source.is_dbfs and target.is_dbfs:
390
612
  # Moves a file from one location to another location within DBFS.
391
613
  # this operation is recursive by default.
392
614
  return self.move(source.as_string, target.as_string)
393
615
  if source.is_local and target.is_local:
394
616
  raise IOError('both destinations are on local FS')
395
- if source.is_dir() and not recursive:
396
- raise IOError('moving directories across filesystems requires recursive flag')
617
+ if source.is_dir and not recursive:
618
+ src_type = 'local' if source.is_local else 'DBFS' if source.is_dbfs else 'UC Volume'
619
+ dst_type = 'local' if target.is_local else 'DBFS' if target.is_dbfs else 'UC Volume'
620
+ raise IOError(f'moving a directory from {src_type} to {dst_type} requires recursive flag')
397
621
  # do cross-fs moving
398
622
  self.copy(src, dst, recursive=recursive, overwrite=overwrite)
399
- source.delete(recursive=recursive)
623
+ self.delete(src, recursive=recursive)
624
+
625
+ def delete(self, path: str, *, recursive=False):
626
+ """Delete file or directory on DBFS"""
627
+ p = self._path(path)
628
+ if p.is_dir and not recursive:
629
+ raise IOError('deleting directories requires recursive flag')
630
+ p.delete(recursive=recursive)
@@ -7592,7 +7592,8 @@ class ModelVersionsAPI:
7592
7592
  response. For the latter case, the caller must also be the owner or have the **USE_CATALOG** privilege
7593
7593
  on the parent catalog and the **USE_SCHEMA** privilege on the parent schema.
7594
7594
 
7595
- There is no guarantee of a specific ordering of the elements in the response.
7595
+ There is no guarantee of a specific ordering of the elements in the response. The elements in the
7596
+ response will not contain any aliases or tags.
7596
7597
 
7597
7598
  :param full_name: str
7598
7599
  The full three-level name of the registered model under which to list model versions
@@ -2502,6 +2502,8 @@ class ResolvedValues:
2502
2502
 
2503
2503
  @dataclass
2504
2504
  class Run:
2505
+ """Run was retrieved successfully"""
2506
+
2505
2507
  attempt_number: Optional[int] = None
2506
2508
  """The sequence number of this run attempt for a triggered job run. The initial attempt of a run
2507
2509
  has an attempt_number of 0\. If the initial run attempt fails, and the job has a retry policy