databricks-sdk 0.44.0__py3-none-any.whl → 0.45.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of databricks-sdk might be problematic. Click here for more details.
- databricks/sdk/__init__.py +123 -115
- databricks/sdk/_base_client.py +112 -88
- databricks/sdk/_property.py +12 -7
- databricks/sdk/_widgets/__init__.py +13 -2
- databricks/sdk/_widgets/default_widgets_utils.py +21 -15
- databricks/sdk/_widgets/ipywidgets_utils.py +47 -24
- databricks/sdk/azure.py +8 -6
- databricks/sdk/casing.py +5 -5
- databricks/sdk/config.py +152 -99
- databricks/sdk/core.py +57 -47
- databricks/sdk/credentials_provider.py +360 -210
- databricks/sdk/data_plane.py +86 -3
- databricks/sdk/dbutils.py +123 -87
- databricks/sdk/environments.py +52 -35
- databricks/sdk/errors/base.py +61 -35
- databricks/sdk/errors/customizer.py +3 -3
- databricks/sdk/errors/deserializer.py +38 -25
- databricks/sdk/errors/details.py +417 -0
- databricks/sdk/errors/mapper.py +1 -1
- databricks/sdk/errors/overrides.py +27 -24
- databricks/sdk/errors/parser.py +26 -14
- databricks/sdk/errors/platform.py +10 -10
- databricks/sdk/errors/private_link.py +24 -24
- databricks/sdk/logger/round_trip_logger.py +28 -20
- databricks/sdk/mixins/compute.py +90 -60
- databricks/sdk/mixins/files.py +815 -145
- databricks/sdk/mixins/jobs.py +201 -20
- databricks/sdk/mixins/open_ai_client.py +26 -20
- databricks/sdk/mixins/workspace.py +45 -34
- databricks/sdk/oauth.py +372 -196
- databricks/sdk/retries.py +14 -12
- databricks/sdk/runtime/__init__.py +34 -17
- databricks/sdk/runtime/dbutils_stub.py +52 -39
- databricks/sdk/service/_internal.py +12 -7
- databricks/sdk/service/apps.py +618 -418
- databricks/sdk/service/billing.py +827 -604
- databricks/sdk/service/catalog.py +6552 -4474
- databricks/sdk/service/cleanrooms.py +550 -388
- databricks/sdk/service/compute.py +5241 -3531
- databricks/sdk/service/dashboards.py +1313 -923
- databricks/sdk/service/files.py +442 -309
- databricks/sdk/service/iam.py +2115 -1483
- databricks/sdk/service/jobs.py +4151 -2588
- databricks/sdk/service/marketplace.py +2210 -1517
- databricks/sdk/service/ml.py +3364 -2255
- databricks/sdk/service/oauth2.py +922 -584
- databricks/sdk/service/pipelines.py +1865 -1203
- databricks/sdk/service/provisioning.py +1435 -1029
- databricks/sdk/service/serving.py +2040 -1278
- databricks/sdk/service/settings.py +2846 -1929
- databricks/sdk/service/sharing.py +2201 -877
- databricks/sdk/service/sql.py +4650 -3103
- databricks/sdk/service/vectorsearch.py +816 -550
- databricks/sdk/service/workspace.py +1330 -906
- databricks/sdk/useragent.py +36 -22
- databricks/sdk/version.py +1 -1
- {databricks_sdk-0.44.0.dist-info → databricks_sdk-0.45.0.dist-info}/METADATA +31 -31
- databricks_sdk-0.45.0.dist-info/RECORD +70 -0
- {databricks_sdk-0.44.0.dist-info → databricks_sdk-0.45.0.dist-info}/WHEEL +1 -1
- databricks_sdk-0.44.0.dist-info/RECORD +0 -69
- {databricks_sdk-0.44.0.dist-info → databricks_sdk-0.45.0.dist-info}/LICENSE +0 -0
- {databricks_sdk-0.44.0.dist-info → databricks_sdk-0.45.0.dist-info}/NOTICE +0 -0
- {databricks_sdk-0.44.0.dist-info → databricks_sdk-0.45.0.dist-info}/top_level.txt +0 -0
databricks/sdk/mixins/files.py
CHANGED
|
@@ -1,26 +1,36 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import base64
|
|
4
|
+
import datetime
|
|
4
5
|
import logging
|
|
5
6
|
import os
|
|
6
7
|
import pathlib
|
|
7
8
|
import platform
|
|
9
|
+
import re
|
|
8
10
|
import shutil
|
|
9
11
|
import sys
|
|
12
|
+
import xml.etree.ElementTree as ET
|
|
10
13
|
from abc import ABC, abstractmethod
|
|
11
14
|
from collections import deque
|
|
12
15
|
from collections.abc import Iterator
|
|
16
|
+
from datetime import timedelta
|
|
13
17
|
from io import BytesIO
|
|
14
18
|
from types import TracebackType
|
|
15
|
-
from typing import (TYPE_CHECKING, AnyStr, BinaryIO,
|
|
16
|
-
Optional, Type, Union)
|
|
19
|
+
from typing import (TYPE_CHECKING, AnyStr, BinaryIO, Callable, Generator,
|
|
20
|
+
Iterable, Optional, Type, Union)
|
|
17
21
|
from urllib import parse
|
|
18
22
|
|
|
23
|
+
import requests
|
|
24
|
+
import requests.adapters
|
|
19
25
|
from requests import RequestException
|
|
20
26
|
|
|
21
|
-
from .._base_client import _RawResponse, _StreamingResponse
|
|
27
|
+
from .._base_client import _BaseClient, _RawResponse, _StreamingResponse
|
|
22
28
|
from .._property import _cached_property
|
|
23
|
-
from ..
|
|
29
|
+
from ..config import Config
|
|
30
|
+
from ..errors import AlreadyExists, NotFound
|
|
31
|
+
from ..errors.customizer import _RetryAfterCustomizer
|
|
32
|
+
from ..errors.mapper import _error_mapper
|
|
33
|
+
from ..retries import retried
|
|
24
34
|
from ..service import files
|
|
25
35
|
from ..service._internal import _escape_multi_segment_path_parameter
|
|
26
36
|
from ..service.files import DownloadResponse
|
|
@@ -39,19 +49,25 @@ class _DbfsIO(BinaryIO):
|
|
|
39
49
|
_offset = 0
|
|
40
50
|
_closed = False
|
|
41
51
|
|
|
42
|
-
def __init__(
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
api: files.DbfsAPI,
|
|
55
|
+
path: str,
|
|
56
|
+
*,
|
|
57
|
+
read: bool = False,
|
|
58
|
+
write: bool = False,
|
|
59
|
+
overwrite: bool = False,
|
|
60
|
+
):
|
|
49
61
|
self._api = api
|
|
50
62
|
self._path = path
|
|
51
|
-
if write and read:
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
63
|
+
if write and read:
|
|
64
|
+
raise IOError(f"can open either for reading or writing")
|
|
65
|
+
if read:
|
|
66
|
+
self._status = api.get_status(path)
|
|
67
|
+
elif write:
|
|
68
|
+
self._created = api.create(path, overwrite=overwrite)
|
|
69
|
+
else:
|
|
70
|
+
raise IOError(f"need to open either for reading or writing")
|
|
55
71
|
|
|
56
72
|
def __enter__(self) -> Self:
|
|
57
73
|
return self
|
|
@@ -69,54 +85,59 @@ class _DbfsIO(BinaryIO):
|
|
|
69
85
|
return self._created is not None
|
|
70
86
|
|
|
71
87
|
def write(self, buffer: bytes) -> int:
|
|
72
|
-
"""
|
|
88
|
+
"""Write bytes to file.
|
|
73
89
|
|
|
74
90
|
:return: Return the number of bytes written.
|
|
75
91
|
"""
|
|
76
92
|
if not self.writable():
|
|
77
|
-
raise IOError(
|
|
93
|
+
raise IOError("file not open for writing")
|
|
78
94
|
if type(buffer) is not bytes:
|
|
79
95
|
# Python doesn't strictly enforce types. Even if they're specified.
|
|
80
|
-
raise TypeError(f
|
|
96
|
+
raise TypeError(f"a bytes-like object is required, not {type(buffer)}")
|
|
81
97
|
total = 0
|
|
82
98
|
while total < len(buffer):
|
|
83
99
|
chunk = buffer[total:]
|
|
84
100
|
if len(chunk) > self.MAX_CHUNK_SIZE:
|
|
85
|
-
chunk = chunk[:self.MAX_CHUNK_SIZE]
|
|
101
|
+
chunk = chunk[: self.MAX_CHUNK_SIZE]
|
|
86
102
|
encoded = base64.b64encode(chunk).decode()
|
|
87
103
|
self._api.add_block(self._created.handle, encoded)
|
|
88
104
|
total += len(chunk)
|
|
89
105
|
return total
|
|
90
106
|
|
|
91
107
|
def close(self) -> None:
|
|
92
|
-
"""
|
|
93
|
-
if self.writable():
|
|
108
|
+
"""Disable all I/O operations."""
|
|
109
|
+
if self.writable():
|
|
110
|
+
self._api.close(self._created.handle)
|
|
94
111
|
self._closed = True
|
|
95
112
|
|
|
96
113
|
@property
|
|
97
114
|
def closed(self) -> bool:
|
|
98
115
|
return self._closed
|
|
99
116
|
|
|
100
|
-
def __exit__(
|
|
101
|
-
|
|
117
|
+
def __exit__(
|
|
118
|
+
self,
|
|
119
|
+
__t: Type[BaseException] | None,
|
|
120
|
+
__value: BaseException | None,
|
|
121
|
+
__traceback: TracebackType | None,
|
|
122
|
+
):
|
|
102
123
|
self.close()
|
|
103
124
|
|
|
104
125
|
def readable(self) -> bool:
|
|
105
126
|
return self._status is not None
|
|
106
127
|
|
|
107
128
|
def read(self, size: int = ...) -> bytes:
|
|
108
|
-
"""
|
|
129
|
+
"""Read at most size bytes, returned as a bytes object.
|
|
109
130
|
|
|
110
131
|
:param size: If the size argument is negative, read until EOF is reached.
|
|
111
132
|
Return an empty bytes object at EOF.
|
|
112
133
|
:return: bytes
|
|
113
134
|
"""
|
|
114
135
|
if not self.readable():
|
|
115
|
-
raise IOError(
|
|
136
|
+
raise IOError("file not open for reading")
|
|
116
137
|
|
|
117
138
|
# call __iter__() and read until EOF is reached
|
|
118
139
|
if size is ... or size < 0:
|
|
119
|
-
buffer = b
|
|
140
|
+
buffer = b""
|
|
120
141
|
for chunk in self:
|
|
121
142
|
buffer += chunk
|
|
122
143
|
return buffer
|
|
@@ -128,7 +149,7 @@ class _DbfsIO(BinaryIO):
|
|
|
128
149
|
if response.bytes_read == 0:
|
|
129
150
|
# as per Python interface convention, return an empty bytes object at EOF,
|
|
130
151
|
# and not the EOFError as in other SDKs
|
|
131
|
-
return b
|
|
152
|
+
return b""
|
|
132
153
|
|
|
133
154
|
raw = base64.b64decode(response.data)
|
|
134
155
|
self._offset += response.bytes_read
|
|
@@ -178,7 +199,15 @@ class _DbfsIO(BinaryIO):
|
|
|
178
199
|
|
|
179
200
|
class _VolumesIO(BinaryIO):
|
|
180
201
|
|
|
181
|
-
def __init__(
|
|
202
|
+
def __init__(
|
|
203
|
+
self,
|
|
204
|
+
api: files.FilesAPI,
|
|
205
|
+
path: str,
|
|
206
|
+
*,
|
|
207
|
+
read: bool,
|
|
208
|
+
write: bool,
|
|
209
|
+
overwrite: bool,
|
|
210
|
+
):
|
|
182
211
|
self._buffer = []
|
|
183
212
|
self._api = api
|
|
184
213
|
self._path = path
|
|
@@ -198,8 +227,12 @@ class _VolumesIO(BinaryIO):
|
|
|
198
227
|
if self._closed:
|
|
199
228
|
return
|
|
200
229
|
if self._write:
|
|
201
|
-
to_write = b
|
|
202
|
-
self._api.upload(
|
|
230
|
+
to_write = b"".join(self._buffer)
|
|
231
|
+
self._api.upload(
|
|
232
|
+
self._path,
|
|
233
|
+
contents=BytesIO(to_write),
|
|
234
|
+
overwrite=self._overwrite,
|
|
235
|
+
)
|
|
203
236
|
elif self._read:
|
|
204
237
|
self._read_handle.close()
|
|
205
238
|
self._closed = True
|
|
@@ -215,7 +248,7 @@ class _VolumesIO(BinaryIO):
|
|
|
215
248
|
|
|
216
249
|
def __check_closed(self):
|
|
217
250
|
if self._closed:
|
|
218
|
-
raise ValueError(
|
|
251
|
+
raise ValueError("I/O operation on closed file")
|
|
219
252
|
|
|
220
253
|
def __open_read(self):
|
|
221
254
|
if self._read_handle is None:
|
|
@@ -277,55 +310,45 @@ class _VolumesIO(BinaryIO):
|
|
|
277
310
|
class _Path(ABC):
|
|
278
311
|
|
|
279
312
|
@abstractmethod
|
|
280
|
-
def __init__(self):
|
|
281
|
-
...
|
|
313
|
+
def __init__(self): ...
|
|
282
314
|
|
|
283
315
|
@property
|
|
284
316
|
def is_local(self) -> bool:
|
|
285
317
|
return self._is_local()
|
|
286
318
|
|
|
287
319
|
@abstractmethod
|
|
288
|
-
def _is_local(self) -> bool:
|
|
289
|
-
...
|
|
320
|
+
def _is_local(self) -> bool: ...
|
|
290
321
|
|
|
291
322
|
@property
|
|
292
323
|
def is_dbfs(self) -> bool:
|
|
293
324
|
return self._is_dbfs()
|
|
294
325
|
|
|
295
326
|
@abstractmethod
|
|
296
|
-
def _is_dbfs(self) -> bool:
|
|
297
|
-
...
|
|
327
|
+
def _is_dbfs(self) -> bool: ...
|
|
298
328
|
|
|
299
329
|
@abstractmethod
|
|
300
|
-
def child(self, path: str) -> str:
|
|
301
|
-
...
|
|
330
|
+
def child(self, path: str) -> str: ...
|
|
302
331
|
|
|
303
332
|
@_cached_property
|
|
304
333
|
def is_dir(self) -> bool:
|
|
305
334
|
return self._is_dir()
|
|
306
335
|
|
|
307
336
|
@abstractmethod
|
|
308
|
-
def _is_dir(self) -> bool:
|
|
309
|
-
...
|
|
337
|
+
def _is_dir(self) -> bool: ...
|
|
310
338
|
|
|
311
339
|
@abstractmethod
|
|
312
|
-
def exists(self) -> bool:
|
|
313
|
-
...
|
|
340
|
+
def exists(self) -> bool: ...
|
|
314
341
|
|
|
315
342
|
@abstractmethod
|
|
316
|
-
def open(self, *, read=False, write=False, overwrite=False):
|
|
317
|
-
...
|
|
343
|
+
def open(self, *, read=False, write=False, overwrite=False): ...
|
|
318
344
|
|
|
319
|
-
def list(self, *, recursive=False) -> Generator[files.FileInfo, None, None]:
|
|
320
|
-
...
|
|
345
|
+
def list(self, *, recursive=False) -> Generator[files.FileInfo, None, None]: ...
|
|
321
346
|
|
|
322
347
|
@abstractmethod
|
|
323
|
-
def mkdir(self):
|
|
324
|
-
...
|
|
348
|
+
def mkdir(self): ...
|
|
325
349
|
|
|
326
350
|
@abstractmethod
|
|
327
|
-
def delete(self, *, recursive=False):
|
|
328
|
-
...
|
|
351
|
+
def delete(self, *, recursive=False): ...
|
|
329
352
|
|
|
330
353
|
@property
|
|
331
354
|
def name(self) -> str:
|
|
@@ -340,9 +363,9 @@ class _LocalPath(_Path):
|
|
|
340
363
|
|
|
341
364
|
def __init__(self, path: str):
|
|
342
365
|
if platform.system() == "Windows":
|
|
343
|
-
self._path = pathlib.Path(str(path).replace(
|
|
366
|
+
self._path = pathlib.Path(str(path).replace("file:///", "").replace("file:", ""))
|
|
344
367
|
else:
|
|
345
|
-
self._path = pathlib.Path(str(path).replace(
|
|
368
|
+
self._path = pathlib.Path(str(path).replace("file:", ""))
|
|
346
369
|
|
|
347
370
|
def _is_local(self) -> bool:
|
|
348
371
|
return True
|
|
@@ -365,16 +388,17 @@ class _LocalPath(_Path):
|
|
|
365
388
|
def open(self, *, read=False, write=False, overwrite=False):
|
|
366
389
|
# make local fs follow the similar semantics as DBFS
|
|
367
390
|
self._path.parent.mkdir(mode=0o755, parents=True, exist_ok=True)
|
|
368
|
-
return self._path.open(mode=
|
|
391
|
+
return self._path.open(mode="wb" if overwrite else "rb" if read else "xb")
|
|
369
392
|
|
|
370
393
|
def list(self, recursive=False) -> Generator[files.FileInfo, None, None]:
|
|
371
394
|
if not self.is_dir:
|
|
372
395
|
st = self._path.stat()
|
|
373
|
-
yield files.FileInfo(
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
396
|
+
yield files.FileInfo(
|
|
397
|
+
path="file:" + str(self._path.absolute()),
|
|
398
|
+
is_dir=False,
|
|
399
|
+
file_size=st.st_size,
|
|
400
|
+
modification_time=int(st.st_mtime_ns / 1e6),
|
|
401
|
+
)
|
|
378
402
|
return
|
|
379
403
|
queue = deque([self._path])
|
|
380
404
|
while queue:
|
|
@@ -385,11 +409,12 @@ class _LocalPath(_Path):
|
|
|
385
409
|
queue.append(leaf)
|
|
386
410
|
continue
|
|
387
411
|
info = leaf.stat()
|
|
388
|
-
yield files.FileInfo(
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
412
|
+
yield files.FileInfo(
|
|
413
|
+
path="file:" + str(leaf.absolute()),
|
|
414
|
+
is_dir=False,
|
|
415
|
+
file_size=info.st_size,
|
|
416
|
+
modification_time=int(info.st_mtime_ns / 1e6),
|
|
417
|
+
)
|
|
393
418
|
|
|
394
419
|
def delete(self, *, recursive=False):
|
|
395
420
|
if self.is_dir:
|
|
@@ -400,17 +425,17 @@ class _LocalPath(_Path):
|
|
|
400
425
|
else:
|
|
401
426
|
kw = {}
|
|
402
427
|
if sys.version_info[:2] > (3, 7):
|
|
403
|
-
kw[
|
|
428
|
+
kw["missing_ok"] = True
|
|
404
429
|
self._path.unlink(**kw)
|
|
405
430
|
|
|
406
431
|
def __repr__(self) -> str:
|
|
407
|
-
return f
|
|
432
|
+
return f"<_LocalPath {self._path}>"
|
|
408
433
|
|
|
409
434
|
|
|
410
435
|
class _VolumesPath(_Path):
|
|
411
436
|
|
|
412
437
|
def __init__(self, api: files.FilesAPI, src: Union[str, pathlib.Path]):
|
|
413
|
-
self._path = pathlib.PurePosixPath(str(src).replace(
|
|
438
|
+
self._path = pathlib.PurePosixPath(str(src).replace("dbfs:", "").replace("file:", ""))
|
|
414
439
|
self._api = api
|
|
415
440
|
|
|
416
441
|
def _is_local(self) -> bool:
|
|
@@ -440,16 +465,23 @@ class _VolumesPath(_Path):
|
|
|
440
465
|
return self.is_dir
|
|
441
466
|
|
|
442
467
|
def open(self, *, read=False, write=False, overwrite=False) -> BinaryIO:
|
|
443
|
-
return _VolumesIO(
|
|
468
|
+
return _VolumesIO(
|
|
469
|
+
self._api,
|
|
470
|
+
self.as_string,
|
|
471
|
+
read=read,
|
|
472
|
+
write=write,
|
|
473
|
+
overwrite=overwrite,
|
|
474
|
+
)
|
|
444
475
|
|
|
445
476
|
def list(self, *, recursive=False) -> Generator[files.FileInfo, None, None]:
|
|
446
477
|
if not self.is_dir:
|
|
447
478
|
meta = self._api.get_metadata(self.as_string)
|
|
448
|
-
yield files.FileInfo(
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
479
|
+
yield files.FileInfo(
|
|
480
|
+
path=self.as_string,
|
|
481
|
+
is_dir=False,
|
|
482
|
+
file_size=meta.content_length,
|
|
483
|
+
modification_time=meta.last_modified,
|
|
484
|
+
)
|
|
453
485
|
return
|
|
454
486
|
queue = deque([self])
|
|
455
487
|
while queue:
|
|
@@ -458,11 +490,12 @@ class _VolumesPath(_Path):
|
|
|
458
490
|
if recursive and file.is_directory:
|
|
459
491
|
queue.append(self.child(file.name))
|
|
460
492
|
if not recursive or not file.is_directory:
|
|
461
|
-
yield files.FileInfo(
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
493
|
+
yield files.FileInfo(
|
|
494
|
+
path=file.path,
|
|
495
|
+
is_dir=file.is_directory,
|
|
496
|
+
file_size=file.file_size,
|
|
497
|
+
modification_time=file.last_modified,
|
|
498
|
+
)
|
|
466
499
|
|
|
467
500
|
def delete(self, *, recursive=False):
|
|
468
501
|
if self.is_dir:
|
|
@@ -473,13 +506,13 @@ class _VolumesPath(_Path):
|
|
|
473
506
|
self._api.delete(self.as_string)
|
|
474
507
|
|
|
475
508
|
def __repr__(self) -> str:
|
|
476
|
-
return f
|
|
509
|
+
return f"<_VolumesPath {self._path}>"
|
|
477
510
|
|
|
478
511
|
|
|
479
512
|
class _DbfsPath(_Path):
|
|
480
513
|
|
|
481
514
|
def __init__(self, api: files.DbfsAPI, src: str):
|
|
482
|
-
self._path = pathlib.PurePosixPath(str(src).replace(
|
|
515
|
+
self._path = pathlib.PurePosixPath(str(src).replace("dbfs:", "").replace("file:", ""))
|
|
483
516
|
self._api = api
|
|
484
517
|
|
|
485
518
|
def _is_local(self) -> bool:
|
|
@@ -510,16 +543,23 @@ class _DbfsPath(_Path):
|
|
|
510
543
|
return False
|
|
511
544
|
|
|
512
545
|
def open(self, *, read=False, write=False, overwrite=False) -> BinaryIO:
|
|
513
|
-
return _DbfsIO(
|
|
546
|
+
return _DbfsIO(
|
|
547
|
+
self._api,
|
|
548
|
+
self.as_string,
|
|
549
|
+
read=read,
|
|
550
|
+
write=write,
|
|
551
|
+
overwrite=overwrite,
|
|
552
|
+
)
|
|
514
553
|
|
|
515
554
|
def list(self, *, recursive=False) -> Generator[files.FileInfo, None, None]:
|
|
516
555
|
if not self.is_dir:
|
|
517
556
|
meta = self._api.get_status(self.as_string)
|
|
518
|
-
yield files.FileInfo(
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
557
|
+
yield files.FileInfo(
|
|
558
|
+
path=self.as_string,
|
|
559
|
+
is_dir=False,
|
|
560
|
+
file_size=meta.file_size,
|
|
561
|
+
modification_time=meta.modification_time,
|
|
562
|
+
)
|
|
523
563
|
return
|
|
524
564
|
queue = deque([self])
|
|
525
565
|
while queue:
|
|
@@ -534,7 +574,7 @@ class _DbfsPath(_Path):
|
|
|
534
574
|
self._api.delete(self.as_string, recursive=recursive)
|
|
535
575
|
|
|
536
576
|
def __repr__(self) -> str:
|
|
537
|
-
return f
|
|
577
|
+
return f"<_DbfsPath {self._path}>"
|
|
538
578
|
|
|
539
579
|
|
|
540
580
|
class DbfsExt(files.DbfsAPI):
|
|
@@ -545,12 +585,14 @@ class DbfsExt(files.DbfsAPI):
|
|
|
545
585
|
self._files_api = files.FilesAPI(api_client)
|
|
546
586
|
self._dbfs_api = files.DbfsAPI(api_client)
|
|
547
587
|
|
|
548
|
-
def open(
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
588
|
+
def open(
|
|
589
|
+
self,
|
|
590
|
+
path: str,
|
|
591
|
+
*,
|
|
592
|
+
read: bool = False,
|
|
593
|
+
write: bool = False,
|
|
594
|
+
overwrite: bool = False,
|
|
595
|
+
) -> BinaryIO:
|
|
554
596
|
return self._path(path).open(read=read, write=write, overwrite=overwrite)
|
|
555
597
|
|
|
556
598
|
def upload(self, path: str, src: BinaryIO, *, overwrite: bool = False):
|
|
@@ -588,17 +630,18 @@ class DbfsExt(files.DbfsAPI):
|
|
|
588
630
|
p = self._path(path)
|
|
589
631
|
return p.exists()
|
|
590
632
|
|
|
591
|
-
__ALLOWED_SCHEMES = [None,
|
|
633
|
+
__ALLOWED_SCHEMES = [None, "file", "dbfs"]
|
|
592
634
|
|
|
593
635
|
def _path(self, src):
|
|
594
636
|
src = parse.urlparse(str(src))
|
|
595
637
|
if src.scheme and src.scheme not in self.__ALLOWED_SCHEMES:
|
|
596
638
|
raise ValueError(
|
|
597
639
|
f'unsupported scheme "{src.scheme}". DBUtils in the SDK only supports local, root DBFS, and '
|
|
598
|
-
|
|
599
|
-
|
|
640
|
+
"UC Volumes paths, not external locations or DBFS mount points."
|
|
641
|
+
)
|
|
642
|
+
if src.scheme == "file":
|
|
600
643
|
return _LocalPath(src.geturl())
|
|
601
|
-
if src.path.startswith(
|
|
644
|
+
if src.path.startswith("/Volumes"):
|
|
602
645
|
return _VolumesPath(self._files_api, src.geturl())
|
|
603
646
|
return _DbfsPath(self._dbfs_api, src.geturl())
|
|
604
647
|
|
|
@@ -607,7 +650,7 @@ class DbfsExt(files.DbfsAPI):
|
|
|
607
650
|
src = self._path(src)
|
|
608
651
|
dst = self._path(dst)
|
|
609
652
|
if src.is_local and dst.is_local:
|
|
610
|
-
raise IOError(
|
|
653
|
+
raise IOError("both destinations are on local FS")
|
|
611
654
|
if dst.exists() and dst.is_dir:
|
|
612
655
|
# if target is a folder, make file with the same name there
|
|
613
656
|
dst = dst.child(src.name)
|
|
@@ -630,11 +673,11 @@ class DbfsExt(files.DbfsAPI):
|
|
|
630
673
|
# this operation is recursive by default.
|
|
631
674
|
return self.move(source.as_string, target.as_string)
|
|
632
675
|
if source.is_local and target.is_local:
|
|
633
|
-
raise IOError(
|
|
676
|
+
raise IOError("both destinations are on local FS")
|
|
634
677
|
if source.is_dir and not recursive:
|
|
635
|
-
src_type =
|
|
636
|
-
dst_type =
|
|
637
|
-
raise IOError(f
|
|
678
|
+
src_type = "local" if source.is_local else "DBFS" if source.is_dbfs else "UC Volume"
|
|
679
|
+
dst_type = "local" if target.is_local else "DBFS" if target.is_dbfs else "UC Volume"
|
|
680
|
+
raise IOError(f"moving a directory from {src_type} to {dst_type} requires recursive flag")
|
|
638
681
|
# do cross-fs moving
|
|
639
682
|
self.copy(src, dst, recursive=recursive, overwrite=overwrite)
|
|
640
683
|
self.delete(src, recursive=recursive)
|
|
@@ -643,16 +686,20 @@ class DbfsExt(files.DbfsAPI):
|
|
|
643
686
|
"""Delete file or directory on DBFS"""
|
|
644
687
|
p = self._path(path)
|
|
645
688
|
if p.is_dir and not recursive:
|
|
646
|
-
raise IOError(
|
|
689
|
+
raise IOError("deleting directories requires recursive flag")
|
|
647
690
|
p.delete(recursive=recursive)
|
|
648
691
|
|
|
649
692
|
|
|
650
693
|
class FilesExt(files.FilesAPI):
|
|
651
694
|
__doc__ = files.FilesAPI.__doc__
|
|
652
695
|
|
|
696
|
+
# note that these error codes are retryable only for idempotent operations
|
|
697
|
+
_RETRYABLE_STATUS_CODES = [408, 429, 500, 502, 503, 504]
|
|
698
|
+
|
|
653
699
|
def __init__(self, api_client, config: Config):
|
|
654
700
|
super().__init__(api_client)
|
|
655
701
|
self._config = config.copy()
|
|
702
|
+
self._multipart_upload_read_ahead_bytes = 1
|
|
656
703
|
|
|
657
704
|
def download(self, file_path: str) -> DownloadResponse:
|
|
658
705
|
"""Download a file.
|
|
@@ -670,56 +717,660 @@ class FilesExt(files.FilesAPI):
|
|
|
670
717
|
:returns: :class:`DownloadResponse`
|
|
671
718
|
"""
|
|
672
719
|
|
|
673
|
-
initial_response: DownloadResponse = self.
|
|
674
|
-
|
|
675
|
-
|
|
720
|
+
initial_response: DownloadResponse = self._open_download_stream(
|
|
721
|
+
file_path=file_path,
|
|
722
|
+
start_byte_offset=0,
|
|
723
|
+
if_unmodified_since_timestamp=None,
|
|
724
|
+
)
|
|
676
725
|
|
|
677
726
|
wrapped_response = self._wrap_stream(file_path, initial_response)
|
|
678
727
|
initial_response.contents._response = wrapped_response
|
|
679
728
|
return initial_response
|
|
680
729
|
|
|
681
|
-
def
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
730
|
+
def upload(self, file_path: str, contents: BinaryIO, *, overwrite: Optional[bool] = None):
|
|
731
|
+
"""Upload a file.
|
|
732
|
+
|
|
733
|
+
Uploads a file. The file contents should be sent as the request body as raw bytes (an
|
|
734
|
+
octet stream); do not encode or otherwise modify the bytes before sending. The contents of the
|
|
735
|
+
resulting file will be exactly the bytes sent in the request body. If the request is successful, there
|
|
736
|
+
is no response body.
|
|
737
|
+
|
|
738
|
+
:param file_path: str
|
|
739
|
+
The absolute remote path of the target file.
|
|
740
|
+
:param contents: BinaryIO
|
|
741
|
+
:param overwrite: bool (optional)
|
|
742
|
+
If true, an existing file will be overwritten. When not specified, assumed True.
|
|
743
|
+
"""
|
|
744
|
+
|
|
745
|
+
# Upload empty and small files with one-shot upload.
|
|
746
|
+
pre_read_buffer = contents.read(self._config.multipart_upload_min_stream_size)
|
|
747
|
+
if len(pre_read_buffer) < self._config.multipart_upload_min_stream_size:
|
|
748
|
+
_LOG.debug(
|
|
749
|
+
f"Using one-shot upload for input stream of size {len(pre_read_buffer)} below {self._config.multipart_upload_min_stream_size} bytes"
|
|
750
|
+
)
|
|
751
|
+
return super().upload(file_path=file_path, contents=BytesIO(pre_read_buffer), overwrite=overwrite)
|
|
752
|
+
|
|
753
|
+
query = {"action": "initiate-upload"}
|
|
754
|
+
if overwrite is not None:
|
|
755
|
+
query["overwrite"] = overwrite
|
|
756
|
+
|
|
757
|
+
# Method _api.do() takes care of retrying and will raise an exception in case of failure.
|
|
758
|
+
initiate_upload_response = self._api.do(
|
|
759
|
+
"POST", f"/api/2.0/fs/files{_escape_multi_segment_path_parameter(file_path)}", query=query
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
if initiate_upload_response.get("multipart_upload"):
|
|
763
|
+
cloud_provider_session = self._create_cloud_provider_session()
|
|
764
|
+
session_token = initiate_upload_response["multipart_upload"].get("session_token")
|
|
765
|
+
if not session_token:
|
|
766
|
+
raise ValueError(f"Unexpected server response: {initiate_upload_response}")
|
|
767
|
+
|
|
768
|
+
try:
|
|
769
|
+
self._perform_multipart_upload(
|
|
770
|
+
file_path, contents, session_token, pre_read_buffer, cloud_provider_session
|
|
771
|
+
)
|
|
772
|
+
except Exception as e:
|
|
773
|
+
_LOG.info(f"Aborting multipart upload on error: {e}")
|
|
774
|
+
try:
|
|
775
|
+
self._abort_multipart_upload(file_path, session_token, cloud_provider_session)
|
|
776
|
+
except BaseException as ex:
|
|
777
|
+
_LOG.warning(f"Failed to abort upload: {ex}")
|
|
778
|
+
# ignore, abort is a best-effort
|
|
779
|
+
finally:
|
|
780
|
+
# rethrow original exception
|
|
781
|
+
raise e from None
|
|
782
|
+
|
|
783
|
+
elif initiate_upload_response.get("resumable_upload"):
|
|
784
|
+
cloud_provider_session = self._create_cloud_provider_session()
|
|
785
|
+
session_token = initiate_upload_response["resumable_upload"]["session_token"]
|
|
786
|
+
self._perform_resumable_upload(
|
|
787
|
+
file_path, contents, session_token, overwrite, pre_read_buffer, cloud_provider_session
|
|
788
|
+
)
|
|
789
|
+
else:
|
|
790
|
+
raise ValueError(f"Unexpected server response: {initiate_upload_response}")
|
|
791
|
+
|
|
792
|
+
def _perform_multipart_upload(
|
|
793
|
+
self,
|
|
794
|
+
target_path: str,
|
|
795
|
+
input_stream: BinaryIO,
|
|
796
|
+
session_token: str,
|
|
797
|
+
pre_read_buffer: bytes,
|
|
798
|
+
cloud_provider_session: requests.Session,
|
|
799
|
+
):
|
|
800
|
+
"""
|
|
801
|
+
Performs multipart upload using presigned URLs on AWS and Azure:
|
|
802
|
+
https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html
|
|
803
|
+
"""
|
|
804
|
+
current_part_number = 1
|
|
805
|
+
etags: dict = {}
|
|
806
|
+
|
|
807
|
+
# Why are we buffering the current chunk?
|
|
808
|
+
# AWS and Azure don't support traditional "Transfer-encoding: chunked", so we must
|
|
809
|
+
# provide each chunk size up front. In case of a non-seekable input stream we need
|
|
810
|
+
# to buffer a chunk before uploading to know its size. This also allows us to rewind
|
|
811
|
+
# the stream before retrying on request failure.
|
|
812
|
+
# AWS signed chunked upload: https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-streaming.html
|
|
813
|
+
# https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blobs-tune-upload-download-python#buffering-during-uploads
|
|
814
|
+
|
|
815
|
+
chunk_offset = 0 # used only for logging
|
|
816
|
+
|
|
817
|
+
# This buffer is expected to contain at least multipart_upload_chunk_size bytes.
|
|
818
|
+
# Note that initially buffer can be bigger (from pre_read_buffer).
|
|
819
|
+
buffer = pre_read_buffer
|
|
820
|
+
|
|
821
|
+
retry_count = 0
|
|
822
|
+
eof = False
|
|
823
|
+
while not eof:
|
|
824
|
+
# If needed, buffer the next chunk.
|
|
825
|
+
buffer = FilesExt._fill_buffer(buffer, self._config.multipart_upload_chunk_size, input_stream)
|
|
826
|
+
if len(buffer) == 0:
|
|
827
|
+
# End of stream, no need to request the next block of upload URLs.
|
|
828
|
+
break
|
|
829
|
+
|
|
830
|
+
_LOG.debug(
|
|
831
|
+
f"Multipart upload: requesting next {self._config.multipart_upload_batch_url_count} upload URLs starting from part {current_part_number}"
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
body: dict = {
|
|
835
|
+
"path": target_path,
|
|
836
|
+
"session_token": session_token,
|
|
837
|
+
"start_part_number": current_part_number,
|
|
838
|
+
"count": self._config.multipart_upload_batch_url_count,
|
|
839
|
+
"expire_time": self._get_url_expire_time(),
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
headers = {"Content-Type": "application/json"}
|
|
843
|
+
|
|
844
|
+
# Requesting URLs for the same set of parts is an idempotent operation, safe to retry.
|
|
845
|
+
# Method _api.do() takes care of retrying and will raise an exception in case of failure.
|
|
846
|
+
upload_part_urls_response = self._api.do(
|
|
847
|
+
"POST", "/api/2.0/fs/create-upload-part-urls", headers=headers, body=body
|
|
848
|
+
)
|
|
849
|
+
|
|
850
|
+
upload_part_urls = upload_part_urls_response.get("upload_part_urls", [])
|
|
851
|
+
if len(upload_part_urls) == 0:
|
|
852
|
+
raise ValueError(f"Unexpected server response: {upload_part_urls_response}")
|
|
853
|
+
|
|
854
|
+
for upload_part_url in upload_part_urls:
|
|
855
|
+
buffer = FilesExt._fill_buffer(buffer, self._config.multipart_upload_chunk_size, input_stream)
|
|
856
|
+
actual_buffer_length = len(buffer)
|
|
857
|
+
if actual_buffer_length == 0:
|
|
858
|
+
eof = True
|
|
859
|
+
break
|
|
860
|
+
|
|
861
|
+
url = upload_part_url["url"]
|
|
862
|
+
required_headers = upload_part_url.get("headers", [])
|
|
863
|
+
assert current_part_number == upload_part_url["part_number"]
|
|
864
|
+
|
|
865
|
+
headers: dict = {"Content-Type": "application/octet-stream"}
|
|
866
|
+
for h in required_headers:
|
|
867
|
+
headers[h["name"]] = h["value"]
|
|
868
|
+
|
|
869
|
+
actual_chunk_length = min(actual_buffer_length, self._config.multipart_upload_chunk_size)
|
|
870
|
+
_LOG.debug(
|
|
871
|
+
f"Uploading part {current_part_number}: [{chunk_offset}, {chunk_offset + actual_chunk_length - 1}]"
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
chunk = BytesIO(buffer[:actual_chunk_length])
|
|
875
|
+
|
|
876
|
+
def rewind():
|
|
877
|
+
chunk.seek(0, os.SEEK_SET)
|
|
878
|
+
|
|
879
|
+
def perform():
|
|
880
|
+
return cloud_provider_session.request(
|
|
881
|
+
"PUT",
|
|
882
|
+
url,
|
|
883
|
+
headers=headers,
|
|
884
|
+
data=chunk,
|
|
885
|
+
timeout=self._config.multipart_upload_single_chunk_upload_timeout_seconds,
|
|
886
|
+
)
|
|
887
|
+
|
|
888
|
+
upload_response = self._retry_idempotent_operation(perform, rewind)
|
|
889
|
+
|
|
890
|
+
if upload_response.status_code in (200, 201):
|
|
891
|
+
# Chunk upload successful
|
|
892
|
+
|
|
893
|
+
chunk_offset += actual_chunk_length
|
|
894
|
+
|
|
895
|
+
etag = upload_response.headers.get("ETag", "")
|
|
896
|
+
etags[current_part_number] = etag
|
|
897
|
+
|
|
898
|
+
# Discard uploaded bytes
|
|
899
|
+
buffer = buffer[actual_chunk_length:]
|
|
900
|
+
|
|
901
|
+
# Reset retry count when progressing along the stream
|
|
902
|
+
retry_count = 0
|
|
903
|
+
|
|
904
|
+
elif FilesExt._is_url_expired_response(upload_response):
|
|
905
|
+
if retry_count < self._config.multipart_upload_max_retries:
|
|
906
|
+
retry_count += 1
|
|
907
|
+
_LOG.debug("Upload URL expired")
|
|
908
|
+
# Preserve the buffer so we'll upload the current part again using next upload URL
|
|
909
|
+
else:
|
|
910
|
+
# don't confuse user with unrelated "Permission denied" error.
|
|
911
|
+
raise ValueError(f"Unsuccessful chunk upload: upload URL expired")
|
|
912
|
+
|
|
913
|
+
else:
|
|
914
|
+
message = f"Unsuccessful chunk upload. Response status: {upload_response.status_code}, body: {upload_response.content}"
|
|
915
|
+
_LOG.warning(message)
|
|
916
|
+
mapped_error = _error_mapper(upload_response, {})
|
|
917
|
+
raise mapped_error or ValueError(message)
|
|
918
|
+
|
|
919
|
+
current_part_number += 1
|
|
920
|
+
|
|
921
|
+
_LOG.debug(
|
|
922
|
+
f"Completing multipart upload after uploading {len(etags)} parts of up to {self._config.multipart_upload_chunk_size} bytes"
|
|
923
|
+
)
|
|
924
|
+
|
|
925
|
+
query = {"action": "complete-upload", "upload_type": "multipart", "session_token": session_token}
|
|
926
|
+
headers = {"Content-Type": "application/json"}
|
|
927
|
+
body: dict = {}
|
|
928
|
+
|
|
929
|
+
parts = []
|
|
930
|
+
for etag in sorted(etags.items()):
|
|
931
|
+
part = {"part_number": etag[0], "etag": etag[1]}
|
|
932
|
+
parts.append(part)
|
|
933
|
+
|
|
934
|
+
body["parts"] = parts
|
|
935
|
+
|
|
936
|
+
# Completing upload is an idempotent operation, safe to retry.
|
|
937
|
+
# Method _api.do() takes care of retrying and will raise an exception in case of failure.
|
|
938
|
+
self._api.do(
|
|
939
|
+
"POST",
|
|
940
|
+
f"/api/2.0/fs/files{_escape_multi_segment_path_parameter(target_path)}",
|
|
941
|
+
query=query,
|
|
942
|
+
headers=headers,
|
|
943
|
+
body=body,
|
|
944
|
+
)
|
|
945
|
+
|
|
946
|
+
@staticmethod
|
|
947
|
+
def _fill_buffer(buffer: bytes, desired_min_size: int, input_stream: BinaryIO):
|
|
948
|
+
"""
|
|
949
|
+
Tries to fill given buffer to contain at least `desired_min_size` bytes by reading from input stream.
|
|
950
|
+
"""
|
|
951
|
+
bytes_to_read = max(0, desired_min_size - len(buffer))
|
|
952
|
+
if bytes_to_read > 0:
|
|
953
|
+
next_buf = input_stream.read(bytes_to_read)
|
|
954
|
+
new_buffer = buffer + next_buf
|
|
955
|
+
return new_buffer
|
|
956
|
+
else:
|
|
957
|
+
# we have already buffered enough data
|
|
958
|
+
return buffer
|
|
959
|
+
|
|
960
|
+
@staticmethod
|
|
961
|
+
def _is_url_expired_response(response: requests.Response):
|
|
962
|
+
"""
|
|
963
|
+
Checks if response matches one of the known "URL expired" responses from the cloud storage providers.
|
|
964
|
+
"""
|
|
965
|
+
if response.status_code != 403:
|
|
966
|
+
return False
|
|
967
|
+
|
|
968
|
+
try:
|
|
969
|
+
xml_root = ET.fromstring(response.content)
|
|
970
|
+
if xml_root.tag != "Error":
|
|
971
|
+
return False
|
|
972
|
+
|
|
973
|
+
code = xml_root.find("Code")
|
|
974
|
+
if code is None:
|
|
975
|
+
return False
|
|
976
|
+
|
|
977
|
+
if code.text == "AuthenticationFailed":
|
|
978
|
+
# Azure
|
|
979
|
+
details = xml_root.find("AuthenticationErrorDetail")
|
|
980
|
+
if details is not None and "Signature not valid in the specified time frame" in details.text:
|
|
981
|
+
return True
|
|
982
|
+
|
|
983
|
+
if code.text == "AccessDenied":
|
|
984
|
+
# AWS
|
|
985
|
+
message = xml_root.find("Message")
|
|
986
|
+
if message is not None and message.text == "Request has expired":
|
|
987
|
+
return True
|
|
988
|
+
|
|
989
|
+
except ET.ParseError:
|
|
990
|
+
pass
|
|
991
|
+
|
|
992
|
+
return False
|
|
993
|
+
|
|
994
|
+
def _perform_resumable_upload(
|
|
995
|
+
self,
|
|
996
|
+
target_path: str,
|
|
997
|
+
input_stream: BinaryIO,
|
|
998
|
+
session_token: str,
|
|
999
|
+
overwrite: bool,
|
|
1000
|
+
pre_read_buffer: bytes,
|
|
1001
|
+
cloud_provider_session: requests.Session,
|
|
1002
|
+
):
|
|
1003
|
+
"""
|
|
1004
|
+
Performs resumable upload on GCP: https://cloud.google.com/storage/docs/performing-resumable-uploads
|
|
1005
|
+
"""
|
|
1006
|
+
|
|
1007
|
+
# Session URI we're using expires after a week
|
|
1008
|
+
|
|
1009
|
+
# Why are we buffering the current chunk?
|
|
1010
|
+
# When using resumable upload API we're uploading data in chunks. During chunk upload
|
|
1011
|
+
# server responds with the "received offset" confirming how much data it stored so far,
|
|
1012
|
+
# so we should continue uploading from that offset. (Note this is not a failure but an
|
|
1013
|
+
# expected behaviour as per the docs.) But, input stream might be consumed beyond that
|
|
1014
|
+
# offset, since server might have read more data than it confirmed received, or some data
|
|
1015
|
+
# might have been pre-cached by e.g. OS or a proxy. So, to continue upload, we must rewind
|
|
1016
|
+
# the input stream back to the byte next to "received offset". This is not possible
|
|
1017
|
+
# for non-seekable input stream, so we must buffer the whole last chunk and seek inside
|
|
1018
|
+
# the buffer. By always uploading from the buffer we fully support non-seekable streams.
|
|
1019
|
+
|
|
1020
|
+
# Why are we doing read-ahead?
|
|
1021
|
+
# It's not possible to upload an empty chunk as "Content-Range" header format does not
|
|
1022
|
+
# support this. So if current chunk happens to finish exactly at the end of the stream,
|
|
1023
|
+
# we need to know that and mark the chunk as last (by passing real file size in the
|
|
1024
|
+
# "Content-Range" header) when uploading it. To detect if we're at the end of the stream
|
|
1025
|
+
# we're reading "ahead" an extra bytes but not uploading them immediately. If
|
|
1026
|
+
# nothing has been read ahead, it means we're at the end of the stream.
|
|
1027
|
+
# On the contrary, in multipart upload we can decide to complete upload *after*
|
|
1028
|
+
# last chunk has been sent.
|
|
1029
|
+
|
|
1030
|
+
body: dict = {"path": target_path, "session_token": session_token}
|
|
1031
|
+
|
|
1032
|
+
headers = {"Content-Type": "application/json"}
|
|
1033
|
+
|
|
1034
|
+
# Method _api.do() takes care of retrying and will raise an exception in case of failure.
|
|
1035
|
+
resumable_upload_url_response = self._api.do(
|
|
1036
|
+
"POST", "/api/2.0/fs/create-resumable-upload-url", headers=headers, body=body
|
|
1037
|
+
)
|
|
1038
|
+
|
|
1039
|
+
resumable_upload_url_node = resumable_upload_url_response.get("resumable_upload_url")
|
|
1040
|
+
if not resumable_upload_url_node:
|
|
1041
|
+
raise ValueError(f"Unexpected server response: {resumable_upload_url_response}")
|
|
1042
|
+
|
|
1043
|
+
resumable_upload_url = resumable_upload_url_node.get("url")
|
|
1044
|
+
if not resumable_upload_url:
|
|
1045
|
+
raise ValueError(f"Unexpected server response: {resumable_upload_url_response}")
|
|
1046
|
+
|
|
1047
|
+
required_headers = resumable_upload_url_node.get("headers", [])
|
|
1048
|
+
|
|
1049
|
+
try:
|
|
1050
|
+
# We will buffer this many bytes: one chunk + read-ahead block.
|
|
1051
|
+
# Note buffer may contain more data initially (from pre_read_buffer).
|
|
1052
|
+
min_buffer_size = self._config.multipart_upload_chunk_size + self._multipart_upload_read_ahead_bytes
|
|
1053
|
+
|
|
1054
|
+
buffer = pre_read_buffer
|
|
1055
|
+
|
|
1056
|
+
# How many bytes in the buffer were confirmed to be received by the server.
|
|
1057
|
+
# All the remaining bytes in the buffer must be uploaded.
|
|
1058
|
+
uploaded_bytes_count = 0
|
|
1059
|
+
|
|
1060
|
+
chunk_offset = 0
|
|
1061
|
+
|
|
1062
|
+
retry_count = 0
|
|
1063
|
+
while True:
|
|
1064
|
+
# If needed, fill the buffer to contain at least min_buffer_size bytes
|
|
1065
|
+
# (unless end of stream), discarding already uploaded bytes.
|
|
1066
|
+
bytes_to_read = max(0, min_buffer_size - (len(buffer) - uploaded_bytes_count))
|
|
1067
|
+
next_buf = input_stream.read(bytes_to_read)
|
|
1068
|
+
buffer = buffer[uploaded_bytes_count:] + next_buf
|
|
1069
|
+
|
|
1070
|
+
if len(next_buf) < bytes_to_read:
|
|
1071
|
+
# This is the last chunk in the stream.
|
|
1072
|
+
# Let's upload all the remaining bytes in one go.
|
|
1073
|
+
actual_chunk_length = len(buffer)
|
|
1074
|
+
file_size = chunk_offset + actual_chunk_length
|
|
1075
|
+
else:
|
|
1076
|
+
# More chunks expected, let's upload current chunk (excluding read-ahead block).
|
|
1077
|
+
actual_chunk_length = self._config.multipart_upload_chunk_size
|
|
1078
|
+
file_size = "*"
|
|
1079
|
+
|
|
1080
|
+
headers: dict = {"Content-Type": "application/octet-stream"}
|
|
1081
|
+
for h in required_headers:
|
|
1082
|
+
headers[h["name"]] = h["value"]
|
|
1083
|
+
|
|
1084
|
+
chunk_last_byte_offset = chunk_offset + actual_chunk_length - 1
|
|
1085
|
+
content_range_header = f"bytes {chunk_offset}-{chunk_last_byte_offset}/{file_size}"
|
|
1086
|
+
_LOG.debug(f"Uploading chunk: {content_range_header}")
|
|
1087
|
+
headers["Content-Range"] = content_range_header
|
|
1088
|
+
|
|
1089
|
+
def retrieve_upload_status() -> Optional[requests.Response]:
|
|
1090
|
+
def perform():
|
|
1091
|
+
return cloud_provider_session.request(
|
|
1092
|
+
"PUT",
|
|
1093
|
+
resumable_upload_url,
|
|
1094
|
+
headers={"Content-Range": "bytes */*"},
|
|
1095
|
+
data=b"",
|
|
1096
|
+
timeout=self._config.multipart_upload_single_chunk_upload_timeout_seconds,
|
|
1097
|
+
)
|
|
1098
|
+
|
|
1099
|
+
try:
|
|
1100
|
+
return self._retry_idempotent_operation(perform)
|
|
1101
|
+
except RequestException:
|
|
1102
|
+
_LOG.warning("Failed to retrieve upload status")
|
|
1103
|
+
return None
|
|
1104
|
+
|
|
1105
|
+
try:
|
|
1106
|
+
upload_response = cloud_provider_session.request(
|
|
1107
|
+
"PUT",
|
|
1108
|
+
resumable_upload_url,
|
|
1109
|
+
headers=headers,
|
|
1110
|
+
data=BytesIO(buffer[:actual_chunk_length]),
|
|
1111
|
+
timeout=self._config.multipart_upload_single_chunk_upload_timeout_seconds,
|
|
1112
|
+
)
|
|
1113
|
+
|
|
1114
|
+
# https://cloud.google.com/storage/docs/performing-resumable-uploads#resume-upload
|
|
1115
|
+
# If an upload request is terminated before receiving a response, or if you receive
|
|
1116
|
+
# a 503 or 500 response, then you need to resume the interrupted upload from where it left off.
|
|
1117
|
+
|
|
1118
|
+
# Let's follow that for all potentially retryable status codes.
|
|
1119
|
+
# Together with the catch block below we replicate the logic in _retry_idempotent_operation().
|
|
1120
|
+
if upload_response.status_code in self._RETRYABLE_STATUS_CODES:
|
|
1121
|
+
if retry_count < self._config.multipart_upload_max_retries:
|
|
1122
|
+
retry_count += 1
|
|
1123
|
+
# let original upload_response be handled as an error
|
|
1124
|
+
upload_response = retrieve_upload_status() or upload_response
|
|
1125
|
+
else:
|
|
1126
|
+
# we received non-retryable response, reset retry count
|
|
1127
|
+
retry_count = 0
|
|
1128
|
+
|
|
1129
|
+
except RequestException as e:
|
|
1130
|
+
# Let's do the same for retryable network errors.
|
|
1131
|
+
if _BaseClient._is_retryable(e) and retry_count < self._config.multipart_upload_max_retries:
|
|
1132
|
+
retry_count += 1
|
|
1133
|
+
upload_response = retrieve_upload_status()
|
|
1134
|
+
if not upload_response:
|
|
1135
|
+
# rethrow original exception
|
|
1136
|
+
raise e from None
|
|
1137
|
+
else:
|
|
1138
|
+
# rethrow original exception
|
|
1139
|
+
raise e from None
|
|
1140
|
+
|
|
1141
|
+
if upload_response.status_code in (200, 201):
|
|
1142
|
+
if file_size == "*":
|
|
1143
|
+
raise ValueError(
|
|
1144
|
+
f"Received unexpected status {upload_response.status_code} before reaching end of stream"
|
|
1145
|
+
)
|
|
1146
|
+
|
|
1147
|
+
# upload complete
|
|
1148
|
+
break
|
|
1149
|
+
|
|
1150
|
+
elif upload_response.status_code == 308:
|
|
1151
|
+
# chunk accepted (or check-status succeeded), let's determine received offset to resume from there
|
|
1152
|
+
range_string = upload_response.headers.get("Range")
|
|
1153
|
+
confirmed_offset = self._extract_range_offset(range_string)
|
|
1154
|
+
_LOG.debug(f"Received confirmed offset: {confirmed_offset}")
|
|
1155
|
+
|
|
1156
|
+
if confirmed_offset:
|
|
1157
|
+
if confirmed_offset < chunk_offset - 1 or confirmed_offset > chunk_last_byte_offset:
|
|
1158
|
+
raise ValueError(
|
|
1159
|
+
f"Unexpected received offset: {confirmed_offset} is outside of expected range, chunk offset: {chunk_offset}, chunk last byte offset: {chunk_last_byte_offset}"
|
|
1160
|
+
)
|
|
1161
|
+
else:
|
|
1162
|
+
if chunk_offset > 0:
|
|
1163
|
+
raise ValueError(
|
|
1164
|
+
f"Unexpected received offset: {confirmed_offset} is outside of expected range, chunk offset: {chunk_offset}, chunk last byte offset: {chunk_last_byte_offset}"
|
|
1165
|
+
)
|
|
1166
|
+
|
|
1167
|
+
# We have just uploaded a part of chunk starting from offset "chunk_offset" and ending
|
|
1168
|
+
# at offset "confirmed_offset" (inclusive), so the next chunk will start at
|
|
1169
|
+
# offset "confirmed_offset + 1"
|
|
1170
|
+
if confirmed_offset:
|
|
1171
|
+
next_chunk_offset = confirmed_offset + 1
|
|
1172
|
+
else:
|
|
1173
|
+
next_chunk_offset = chunk_offset
|
|
1174
|
+
uploaded_bytes_count = next_chunk_offset - chunk_offset
|
|
1175
|
+
chunk_offset = next_chunk_offset
|
|
1176
|
+
|
|
1177
|
+
elif upload_response.status_code == 412 and not overwrite:
|
|
1178
|
+
# Assuming this is only possible reason
|
|
1179
|
+
# Full message in this case: "At least one of the pre-conditions you specified did not hold."
|
|
1180
|
+
raise AlreadyExists("The file being created already exists.")
|
|
1181
|
+
|
|
1182
|
+
else:
|
|
1183
|
+
message = f"Unsuccessful chunk upload. Response status: {upload_response.status_code}, body: {upload_response.content}"
|
|
1184
|
+
_LOG.warning(message)
|
|
1185
|
+
mapped_error = _error_mapper(upload_response, {})
|
|
1186
|
+
raise mapped_error or ValueError(message)
|
|
1187
|
+
|
|
1188
|
+
except Exception as e:
|
|
1189
|
+
_LOG.info(f"Aborting resumable upload on error: {e}")
|
|
1190
|
+
try:
|
|
1191
|
+
self._abort_resumable_upload(resumable_upload_url, required_headers, cloud_provider_session)
|
|
1192
|
+
except BaseException as ex:
|
|
1193
|
+
_LOG.warning(f"Failed to abort upload: {ex}")
|
|
1194
|
+
# ignore, abort is a best-effort
|
|
1195
|
+
finally:
|
|
1196
|
+
# rethrow original exception
|
|
1197
|
+
raise e from None
|
|
1198
|
+
|
|
1199
|
+
@staticmethod
|
|
1200
|
+
def _extract_range_offset(range_string: Optional[str]) -> Optional[int]:
|
|
1201
|
+
"""Parses the response range header to extract the last byte."""
|
|
1202
|
+
if not range_string:
|
|
1203
|
+
return None # server did not yet confirm any bytes
|
|
1204
|
+
|
|
1205
|
+
if match := re.match("bytes=0-(\\d+)", range_string):
|
|
1206
|
+
return int(match.group(1))
|
|
1207
|
+
else:
|
|
1208
|
+
raise ValueError(f"Cannot parse response header: Range: {range_string}")
|
|
1209
|
+
|
|
1210
|
+
def _get_url_expire_time(self):
|
|
1211
|
+
"""Generates expiration time and save it in the required format."""
|
|
1212
|
+
current_time = datetime.datetime.now(datetime.timezone.utc)
|
|
1213
|
+
expire_time = current_time + self._config.multipart_upload_url_expiration_duration
|
|
1214
|
+
# From Google Protobuf doc:
|
|
1215
|
+
# In JSON format, the Timestamp type is encoded as a string in the
|
|
1216
|
+
# * [RFC 3339](https://www.ietf.org/rfc/rfc3339.txt) format. That is, the
|
|
1217
|
+
# * format is "{year}-{month}-{day}T{hour}:{min}:{sec}[.{frac_sec}]Z"
|
|
1218
|
+
return expire_time.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
1219
|
+
|
|
1220
|
+
def _abort_multipart_upload(self, target_path: str, session_token: str, cloud_provider_session: requests.Session):
|
|
1221
|
+
"""Aborts ongoing multipart upload session to clean up incomplete file."""
|
|
1222
|
+
body: dict = {"path": target_path, "session_token": session_token, "expire_time": self._get_url_expire_time()}
|
|
1223
|
+
|
|
1224
|
+
headers = {"Content-Type": "application/json"}
|
|
1225
|
+
|
|
1226
|
+
# Method _api.do() takes care of retrying and will raise an exception in case of failure.
|
|
1227
|
+
abort_url_response = self._api.do("POST", "/api/2.0/fs/create-abort-upload-url", headers=headers, body=body)
|
|
1228
|
+
|
|
1229
|
+
abort_upload_url_node = abort_url_response["abort_upload_url"]
|
|
1230
|
+
abort_url = abort_upload_url_node["url"]
|
|
1231
|
+
required_headers = abort_upload_url_node.get("headers", [])
|
|
1232
|
+
|
|
1233
|
+
headers: dict = {"Content-Type": "application/octet-stream"}
|
|
1234
|
+
for h in required_headers:
|
|
1235
|
+
headers[h["name"]] = h["value"]
|
|
1236
|
+
|
|
1237
|
+
def perform():
|
|
1238
|
+
return cloud_provider_session.request(
|
|
1239
|
+
"DELETE",
|
|
1240
|
+
abort_url,
|
|
1241
|
+
headers=headers,
|
|
1242
|
+
data=b"",
|
|
1243
|
+
timeout=self._config.multipart_upload_single_chunk_upload_timeout_seconds,
|
|
1244
|
+
)
|
|
1245
|
+
|
|
1246
|
+
abort_response = self._retry_idempotent_operation(perform)
|
|
1247
|
+
|
|
1248
|
+
if abort_response.status_code not in (200, 201):
|
|
1249
|
+
raise ValueError(abort_response)
|
|
1250
|
+
|
|
1251
|
+
def _abort_resumable_upload(
|
|
1252
|
+
self, resumable_upload_url: str, required_headers: list, cloud_provider_session: requests.Session
|
|
1253
|
+
):
|
|
1254
|
+
"""Aborts ongoing resumable upload session to clean up incomplete file."""
|
|
1255
|
+
headers: dict = {}
|
|
1256
|
+
for h in required_headers:
|
|
1257
|
+
headers[h["name"]] = h["value"]
|
|
1258
|
+
|
|
1259
|
+
def perform():
|
|
1260
|
+
return cloud_provider_session.request(
|
|
1261
|
+
"DELETE",
|
|
1262
|
+
resumable_upload_url,
|
|
1263
|
+
headers=headers,
|
|
1264
|
+
data=b"",
|
|
1265
|
+
timeout=self._config.multipart_upload_single_chunk_upload_timeout_seconds,
|
|
1266
|
+
)
|
|
1267
|
+
|
|
1268
|
+
abort_response = self._retry_idempotent_operation(perform)
|
|
1269
|
+
|
|
1270
|
+
if abort_response.status_code not in (200, 201):
|
|
1271
|
+
raise ValueError(abort_response)
|
|
1272
|
+
|
|
1273
|
+
def _create_cloud_provider_session(self):
|
|
1274
|
+
"""Creates a separate session which does not inherit auth headers from BaseClient session."""
|
|
1275
|
+
session = requests.Session()
|
|
1276
|
+
|
|
1277
|
+
# following session config in _BaseClient
|
|
1278
|
+
http_adapter = requests.adapters.HTTPAdapter(
|
|
1279
|
+
self._config.max_connection_pools or 20, self._config.max_connections_per_pool or 20, pool_block=True
|
|
1280
|
+
)
|
|
1281
|
+
session.mount("https://", http_adapter)
|
|
1282
|
+
# presigned URL for storage proxy can use plain HTTP
|
|
1283
|
+
session.mount("http://", http_adapter)
|
|
1284
|
+
return session
|
|
1285
|
+
|
|
1286
|
+
def _retry_idempotent_operation(
|
|
1287
|
+
self, operation: Callable[[], requests.Response], before_retry: Callable = None
|
|
1288
|
+
) -> requests.Response:
|
|
1289
|
+
"""Perform given idempotent operation with necessary retries. Since operation is idempotent it's
|
|
1290
|
+
safe to retry it for response codes where server state might have changed.
|
|
1291
|
+
"""
|
|
1292
|
+
|
|
1293
|
+
def delegate():
|
|
1294
|
+
response = operation()
|
|
1295
|
+
if response.status_code in self._RETRYABLE_STATUS_CODES:
|
|
1296
|
+
attrs = {}
|
|
1297
|
+
# this will assign "retry_after_secs" to the attrs, essentially making exception look retryable
|
|
1298
|
+
_RetryAfterCustomizer().customize_error(response, attrs)
|
|
1299
|
+
raise _error_mapper(response, attrs)
|
|
1300
|
+
else:
|
|
1301
|
+
return response
|
|
1302
|
+
|
|
1303
|
+
# following _BaseClient timeout
|
|
1304
|
+
retry_timeout_seconds = self._config.retry_timeout_seconds or 300
|
|
1305
|
+
|
|
1306
|
+
return retried(
|
|
1307
|
+
timeout=timedelta(seconds=retry_timeout_seconds),
|
|
1308
|
+
# also retry on network errors (connection error, connection timeout)
|
|
1309
|
+
# where we believe request didn't reach the server
|
|
1310
|
+
is_retryable=_BaseClient._is_retryable,
|
|
1311
|
+
before_retry=before_retry,
|
|
1312
|
+
)(delegate)()
|
|
1313
|
+
|
|
1314
|
+
def _open_download_stream(
|
|
1315
|
+
self, file_path: str, start_byte_offset: int, if_unmodified_since_timestamp: Optional[str] = None
|
|
1316
|
+
) -> DownloadResponse:
|
|
1317
|
+
"""Opens a download stream from given offset, performing necessary retries."""
|
|
1318
|
+
headers = {
|
|
1319
|
+
"Accept": "application/octet-stream",
|
|
1320
|
+
}
|
|
686
1321
|
|
|
687
1322
|
if start_byte_offset and not if_unmodified_since_timestamp:
|
|
688
1323
|
raise Exception("if_unmodified_since_timestamp is required if start_byte_offset is specified")
|
|
689
1324
|
|
|
690
1325
|
if start_byte_offset:
|
|
691
|
-
headers[
|
|
1326
|
+
headers["Range"] = f"bytes={start_byte_offset}-"
|
|
692
1327
|
|
|
693
1328
|
if if_unmodified_since_timestamp:
|
|
694
|
-
headers[
|
|
695
|
-
|
|
696
|
-
response_headers = [
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
1329
|
+
headers["If-Unmodified-Since"] = if_unmodified_since_timestamp
|
|
1330
|
+
|
|
1331
|
+
response_headers = [
|
|
1332
|
+
"content-length",
|
|
1333
|
+
"content-type",
|
|
1334
|
+
"last-modified",
|
|
1335
|
+
]
|
|
1336
|
+
# Method _api.do() takes care of retrying and will raise an exception in case of failure.
|
|
1337
|
+
res = self._api.do(
|
|
1338
|
+
"GET",
|
|
1339
|
+
f"/api/2.0/fs/files{_escape_multi_segment_path_parameter(file_path)}",
|
|
1340
|
+
headers=headers,
|
|
1341
|
+
response_headers=response_headers,
|
|
1342
|
+
raw=True,
|
|
1343
|
+
)
|
|
702
1344
|
|
|
703
1345
|
result = DownloadResponse.from_dict(res)
|
|
704
1346
|
if not isinstance(result.contents, _StreamingResponse):
|
|
705
|
-
raise Exception(
|
|
706
|
-
|
|
1347
|
+
raise Exception(
|
|
1348
|
+
"Internal error: response contents is of unexpected type: " + type(result.contents).__name__
|
|
1349
|
+
)
|
|
707
1350
|
|
|
708
1351
|
return result
|
|
709
1352
|
|
|
710
|
-
def _wrap_stream(self, file_path: str,
|
|
711
|
-
underlying_response = _ResilientIterator._extract_raw_response(
|
|
712
|
-
return _ResilientResponse(
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
1353
|
+
def _wrap_stream(self, file_path: str, download_response: DownloadResponse):
|
|
1354
|
+
underlying_response = _ResilientIterator._extract_raw_response(download_response)
|
|
1355
|
+
return _ResilientResponse(
|
|
1356
|
+
self,
|
|
1357
|
+
file_path,
|
|
1358
|
+
download_response.last_modified,
|
|
1359
|
+
offset=0,
|
|
1360
|
+
underlying_response=underlying_response,
|
|
1361
|
+
)
|
|
717
1362
|
|
|
718
1363
|
|
|
719
1364
|
class _ResilientResponse(_RawResponse):
|
|
720
1365
|
|
|
721
|
-
def __init__(
|
|
722
|
-
|
|
1366
|
+
def __init__(
|
|
1367
|
+
self,
|
|
1368
|
+
api: FilesExt,
|
|
1369
|
+
file_path: str,
|
|
1370
|
+
file_last_modified: str,
|
|
1371
|
+
offset: int,
|
|
1372
|
+
underlying_response: _RawResponse,
|
|
1373
|
+
):
|
|
723
1374
|
self.api = api
|
|
724
1375
|
self.file_path = file_path
|
|
725
1376
|
self.underlying_response = underlying_response
|
|
@@ -728,11 +1379,17 @@ class _ResilientResponse(_RawResponse):
|
|
|
728
1379
|
|
|
729
1380
|
def iter_content(self, chunk_size=1, decode_unicode=False):
|
|
730
1381
|
if decode_unicode:
|
|
731
|
-
raise ValueError(
|
|
1382
|
+
raise ValueError("Decode unicode is not supported")
|
|
732
1383
|
|
|
733
1384
|
iterator = self.underlying_response.iter_content(chunk_size=chunk_size, decode_unicode=False)
|
|
734
|
-
self.iterator = _ResilientIterator(
|
|
735
|
-
|
|
1385
|
+
self.iterator = _ResilientIterator(
|
|
1386
|
+
iterator,
|
|
1387
|
+
self.file_path,
|
|
1388
|
+
self.file_last_modified,
|
|
1389
|
+
self.offset,
|
|
1390
|
+
self.api,
|
|
1391
|
+
chunk_size,
|
|
1392
|
+
)
|
|
736
1393
|
return self.iterator
|
|
737
1394
|
|
|
738
1395
|
def close(self):
|
|
@@ -744,12 +1401,21 @@ class _ResilientIterator(Iterator):
|
|
|
744
1401
|
# and recovers from failures by requesting download from the current offset.
|
|
745
1402
|
|
|
746
1403
|
@staticmethod
|
|
747
|
-
def _extract_raw_response(
|
|
748
|
-
|
|
1404
|
+
def _extract_raw_response(
|
|
1405
|
+
download_response: DownloadResponse,
|
|
1406
|
+
) -> _RawResponse:
|
|
1407
|
+
streaming_response: _StreamingResponse = download_response.contents # this is an instance of _StreamingResponse
|
|
749
1408
|
return streaming_response._response
|
|
750
1409
|
|
|
751
|
-
def __init__(
|
|
752
|
-
|
|
1410
|
+
def __init__(
|
|
1411
|
+
self,
|
|
1412
|
+
underlying_iterator,
|
|
1413
|
+
file_path: str,
|
|
1414
|
+
file_last_modified: str,
|
|
1415
|
+
offset: int,
|
|
1416
|
+
api: FilesExt,
|
|
1417
|
+
chunk_size: int,
|
|
1418
|
+
):
|
|
753
1419
|
self._underlying_iterator = underlying_iterator
|
|
754
1420
|
self._api = api
|
|
755
1421
|
self._file_path = file_path
|
|
@@ -768,14 +1434,18 @@ class _ResilientIterator(Iterator):
|
|
|
768
1434
|
if self._total_recovers_count == self._api._config.files_api_client_download_max_total_recovers:
|
|
769
1435
|
_LOG.debug("Total recovers limit exceeded")
|
|
770
1436
|
return False
|
|
771
|
-
if
|
|
1437
|
+
if (
|
|
1438
|
+
self._api._config.files_api_client_download_max_total_recovers_without_progressing is not None
|
|
1439
|
+
and self._recovers_without_progressing_count
|
|
1440
|
+
>= self._api._config.files_api_client_download_max_total_recovers_without_progressing
|
|
1441
|
+
):
|
|
772
1442
|
_LOG.debug("No progression recovers limit exceeded")
|
|
773
1443
|
return False
|
|
774
1444
|
return True
|
|
775
1445
|
|
|
776
1446
|
def _recover(self) -> bool:
|
|
777
1447
|
if not self._should_recover():
|
|
778
|
-
return False
|
|
1448
|
+
return False # recover suppressed, rethrow original exception
|
|
779
1449
|
|
|
780
1450
|
self._total_recovers_count += 1
|
|
781
1451
|
self._recovers_without_progressing_count += 1
|
|
@@ -786,15 +1456,15 @@ class _ResilientIterator(Iterator):
|
|
|
786
1456
|
_LOG.debug("Trying to recover from offset " + str(self._offset))
|
|
787
1457
|
|
|
788
1458
|
# following call includes all the required network retries
|
|
789
|
-
downloadResponse = self._api.
|
|
790
|
-
self._file_last_modified)
|
|
1459
|
+
downloadResponse = self._api._open_download_stream(self._file_path, self._offset, self._file_last_modified)
|
|
791
1460
|
underlying_response = _ResilientIterator._extract_raw_response(downloadResponse)
|
|
792
|
-
self._underlying_iterator = underlying_response.iter_content(
|
|
793
|
-
|
|
1461
|
+
self._underlying_iterator = underlying_response.iter_content(
|
|
1462
|
+
chunk_size=self._chunk_size, decode_unicode=False
|
|
1463
|
+
)
|
|
794
1464
|
_LOG.debug("Recover succeeded")
|
|
795
1465
|
return True
|
|
796
1466
|
except:
|
|
797
|
-
return False
|
|
1467
|
+
return False # recover failed, rethrow original exception
|
|
798
1468
|
|
|
799
1469
|
def __next__(self):
|
|
800
1470
|
if self._closed:
|