azure-storage-blob 12.20.0b1__py3-none-any.whl → 12.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/storage/blob/__init__.py +12 -2
- azure/storage/blob/_blob_client.py +64 -51
- azure/storage/blob/_blob_service_client.py +17 -12
- azure/storage/blob/_container_client.py +33 -23
- azure/storage/blob/_download.py +277 -167
- azure/storage/blob/_generated/_azure_blob_storage.py +1 -1
- azure/storage/blob/_generated/_configuration.py +2 -2
- azure/storage/blob/_generated/_patch.py +2 -0
- azure/storage/blob/_generated/_serialization.py +1 -1
- azure/storage/blob/_generated/aio/_azure_blob_storage.py +1 -1
- azure/storage/blob/_generated/aio/_configuration.py +2 -2
- azure/storage/blob/_generated/aio/_patch.py +2 -0
- azure/storage/blob/_generated/aio/operations/_append_blob_operations.py +10 -5
- azure/storage/blob/_generated/aio/operations/_blob_operations.py +45 -26
- azure/storage/blob/_generated/aio/operations/_block_blob_operations.py +12 -7
- azure/storage/blob/_generated/aio/operations/_container_operations.py +39 -20
- azure/storage/blob/_generated/aio/operations/_page_blob_operations.py +15 -10
- azure/storage/blob/_generated/aio/operations/_patch.py +3 -0
- azure/storage/blob/_generated/aio/operations/_service_operations.py +28 -10
- azure/storage/blob/_generated/models/_patch.py +3 -0
- azure/storage/blob/_generated/operations/_append_blob_operations.py +14 -9
- azure/storage/blob/_generated/operations/_blob_operations.py +76 -51
- azure/storage/blob/_generated/operations/_block_blob_operations.py +18 -13
- azure/storage/blob/_generated/operations/_container_operations.py +64 -39
- azure/storage/blob/_generated/operations/_page_blob_operations.py +24 -19
- azure/storage/blob/_generated/operations/_patch.py +3 -0
- azure/storage/blob/_generated/operations/_service_operations.py +43 -19
- azure/storage/blob/_generated/py.typed +1 -0
- azure/storage/blob/_lease.py +6 -5
- azure/storage/blob/_models.py +1 -1
- azure/storage/blob/_serialize.py +1 -0
- azure/storage/blob/_shared/authentication.py +62 -4
- azure/storage/blob/_shared/base_client.py +1 -1
- azure/storage/blob/_shared/base_client_async.py +3 -2
- azure/storage/blob/_shared/models.py +13 -12
- azure/storage/blob/_shared/shared_access_signature.py +1 -0
- azure/storage/blob/_shared_access_signature.py +1 -0
- azure/storage/blob/_version.py +1 -1
- azure/storage/blob/aio/__init__.py +13 -4
- azure/storage/blob/aio/_blob_client_async.py +50 -47
- azure/storage/blob/aio/_blob_service_client_async.py +11 -11
- azure/storage/blob/aio/_container_client_async.py +23 -20
- azure/storage/blob/aio/_download_async.py +317 -209
- azure/storage/blob/aio/_lease_async.py +6 -6
- {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/METADATA +2 -2
- azure_storage_blob-12.21.0.dist-info/RECORD +82 -0
- azure_storage_blob-12.20.0b1.dist-info/RECORD +0 -81
- {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/LICENSE +0 -0
- {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/WHEEL +0 -0
- {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/top_level.txt +0 -0
@@ -5,11 +5,12 @@
|
|
5
5
|
# --------------------------------------------------------------------------
|
6
6
|
# pylint: disable=invalid-overridden-method
|
7
7
|
|
8
|
+
import codecs
|
8
9
|
import sys
|
9
10
|
import warnings
|
10
|
-
from io import BytesIO
|
11
|
+
from io import BytesIO, StringIO
|
11
12
|
from itertools import islice
|
12
|
-
from typing import AsyncIterator, Generic, IO, Optional, TypeVar
|
13
|
+
from typing import AsyncIterator, Generic, IO, Optional, overload, TypeVar, Union
|
13
14
|
|
14
15
|
import asyncio
|
15
16
|
|
@@ -59,7 +60,7 @@ class _AsyncChunkDownloader(_ChunkDownloader):
|
|
59
60
|
|
60
61
|
async def process_chunk(self, chunk_start):
|
61
62
|
chunk_start, chunk_end = self._calculate_range(chunk_start)
|
62
|
-
chunk_data = await self._download_chunk(chunk_start, chunk_end - 1)
|
63
|
+
chunk_data, _ = await self._download_chunk(chunk_start, chunk_end - 1)
|
63
64
|
length = chunk_end - chunk_start
|
64
65
|
if length > 0:
|
65
66
|
await self._write_to_stream(chunk_data, chunk_start)
|
@@ -95,8 +96,8 @@ class _AsyncChunkDownloader(_ChunkDownloader):
|
|
95
96
|
# No need to download the empty chunk from server if there's no data in the chunk to be downloaded.
|
96
97
|
# Do optimize and create empty chunk locally if condition is met.
|
97
98
|
if self._do_optimize(download_range[0], download_range[1]):
|
98
|
-
|
99
|
-
chunk_data = b"\x00" *
|
99
|
+
content_length = download_range[1] - download_range[0] + 1
|
100
|
+
chunk_data = b"\x00" * content_length
|
100
101
|
else:
|
101
102
|
range_header, range_validation = validate_and_format_range_headers(
|
102
103
|
download_range[0],
|
@@ -117,14 +118,14 @@ class _AsyncChunkDownloader(_ChunkDownloader):
|
|
117
118
|
process_storage_error(error)
|
118
119
|
|
119
120
|
chunk_data = await process_content(response, offset[0], offset[1], self.encryption_options)
|
120
|
-
|
121
|
+
content_length = response.content_length
|
121
122
|
|
122
123
|
# This makes sure that if_match is set so that we can validate
|
123
124
|
# that subsequent downloads are to an unmodified blob
|
124
125
|
if self.request_options.get('modified_access_conditions'):
|
125
126
|
self.request_options['modified_access_conditions'].if_match = response.properties.etag
|
126
127
|
|
127
|
-
return chunk_data
|
128
|
+
return chunk_data, content_length
|
128
129
|
|
129
130
|
|
130
131
|
class _AsyncChunkIterator(object):
|
@@ -167,7 +168,7 @@ class _AsyncChunkIterator(object):
|
|
167
168
|
|
168
169
|
try:
|
169
170
|
chunk = next(self._iter_chunks)
|
170
|
-
self._current_content += await self._iter_downloader.yield_chunk(chunk)
|
171
|
+
self._current_content += (await self._iter_downloader.yield_chunk(chunk))[0]
|
171
172
|
except StopIteration as exc:
|
172
173
|
self._complete = True
|
173
174
|
# it's likely that there some data left in self._current_content
|
@@ -228,28 +229,32 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
228
229
|
self._encryption_options = encryption_options or {}
|
229
230
|
self._progress_hook = kwargs.pop('progress_hook', None)
|
230
231
|
self._request_options = kwargs
|
232
|
+
self._response = None
|
231
233
|
self._location_mode = None
|
232
|
-
self.
|
233
|
-
self.
|
234
|
-
self._file_size = None
|
234
|
+
self._current_content = b''
|
235
|
+
self._file_size = 0
|
235
236
|
self._non_empty_ranges = None
|
236
|
-
self._response = None
|
237
237
|
self._encryption_data = None
|
238
|
-
self._offset = 0
|
239
238
|
|
240
|
-
|
241
|
-
self.
|
239
|
+
# The content download offset, after any processing (decryption), in bytes
|
240
|
+
self._download_offset = 0
|
241
|
+
# The raw download offset, before processing (decryption), in bytes
|
242
|
+
self._raw_download_offset = 0
|
243
|
+
# The offset the stream has been read to in bytes or chars depending on mode
|
244
|
+
self._read_offset = 0
|
245
|
+
# The offset into current_content that has been consumed in bytes or chars depending on mode
|
246
|
+
self._current_content_offset = 0
|
247
|
+
|
248
|
+
self._text_mode: Optional[bool] = None
|
249
|
+
self._decoder = None
|
250
|
+
# Whether the current content is the first chunk of download content or not
|
251
|
+
self._first_chunk = True
|
252
|
+
self._download_start = self._start_range or 0
|
242
253
|
|
243
254
|
# The cls is passed in via download_cls to avoid conflicting arg name with Generic.__new__
|
244
255
|
# but needs to be changed to cls in the request options.
|
245
256
|
self._request_options['cls'] = download_cls
|
246
257
|
|
247
|
-
# The service only provides transactional MD5s for chunks under 4MB.
|
248
|
-
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
|
249
|
-
# chunk so a transactional MD5 can be retrieved.
|
250
|
-
self._first_get_size = self._config.max_single_get_size if not self._validate_content \
|
251
|
-
else self._config.max_chunk_get_size
|
252
|
-
|
253
258
|
def __len__(self):
|
254
259
|
return self.size
|
255
260
|
|
@@ -272,12 +277,19 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
272
277
|
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
|
273
278
|
await self._get_encryption_data_request()
|
274
279
|
|
280
|
+
# The service only provides transactional MD5s for chunks under 4MB.
|
281
|
+
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
|
282
|
+
# chunk so a transactional MD5 can be retrieved.
|
283
|
+
first_get_size = (
|
284
|
+
self._config.max_single_get_size if not self._validate_content else self._config.max_chunk_get_size
|
285
|
+
)
|
275
286
|
initial_request_start = self._start_range if self._start_range is not None else 0
|
276
|
-
if self._end_range is not None and self._end_range -
|
287
|
+
if self._end_range is not None and self._end_range - initial_request_start < first_get_size:
|
277
288
|
initial_request_end = self._end_range
|
278
289
|
else:
|
279
|
-
initial_request_end = initial_request_start +
|
290
|
+
initial_request_end = initial_request_start + first_get_size - 1
|
280
291
|
|
292
|
+
# pylint: disable-next=attribute-defined-outside-init
|
281
293
|
self._initial_range, self._initial_offset = process_range_and_offset(
|
282
294
|
initial_request_start,
|
283
295
|
initial_request_end,
|
@@ -292,39 +304,22 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
292
304
|
self.properties.name = self.name
|
293
305
|
self.properties.container = self.container
|
294
306
|
|
295
|
-
# Set the content length to the download size instead of the size of
|
296
|
-
# the last range
|
297
|
-
initial_size = self._response.properties.size
|
307
|
+
# Set the content length to the download size instead of the size of the last range
|
298
308
|
self.properties.size = self.size
|
299
|
-
|
300
|
-
|
301
|
-
|
309
|
+
self.properties.content_range = (f"bytes {self._download_start}-"
|
310
|
+
f"{self._end_range if self._end_range is not None else self._file_size - 1}/"
|
311
|
+
f"{self._file_size}")
|
302
312
|
|
303
313
|
# Overwrite the content MD5 as it is the MD5 for the last range instead
|
304
314
|
# of the stored MD5
|
305
315
|
# TODO: Set to the stored MD5 when the service returns this
|
306
316
|
self.properties.content_md5 = None
|
307
317
|
|
308
|
-
|
309
|
-
|
310
|
-
else:
|
311
|
-
self._current_content = await process_content(
|
312
|
-
self._response,
|
313
|
-
self._initial_offset[0],
|
314
|
-
self._initial_offset[1],
|
315
|
-
self._encryption_options
|
316
|
-
)
|
317
|
-
|
318
|
-
# If the file is small, the download is complete at this point.
|
319
|
-
# If file size is large, download the rest of the file in chunks.
|
320
|
-
# For encryption V2, calculate based on size of decrypted content, not download size.
|
318
|
+
@property
|
319
|
+
def _download_complete(self):
|
321
320
|
if is_encryption_v2(self._encryption_data):
|
322
|
-
|
323
|
-
|
324
|
-
self._download_complete = initial_size >= self.size
|
325
|
-
|
326
|
-
if not self._download_complete and self._request_options.get("modified_access_conditions"):
|
327
|
-
self._request_options["modified_access_conditions"].if_match = self._response.properties.etag
|
321
|
+
return self._download_offset >= self.size
|
322
|
+
return self._raw_download_offset >= self.size
|
328
323
|
|
329
324
|
async def _initial_request(self):
|
330
325
|
range_header, range_validation = validate_and_format_range_headers(
|
@@ -357,7 +352,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
357
352
|
|
358
353
|
if self._end_range is not None:
|
359
354
|
# Use the length unless it is over the end of the file
|
360
|
-
self.size = min(self._file_size, self._end_range - self._start_range + 1)
|
355
|
+
self.size = min(self._file_size - self._start_range, self._end_range - self._start_range + 1)
|
361
356
|
elif self._start_range is not None:
|
362
357
|
self.size = self._file_size - self._start_range
|
363
358
|
else:
|
@@ -383,6 +378,18 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
383
378
|
else:
|
384
379
|
process_storage_error(error)
|
385
380
|
|
381
|
+
if self.size == 0:
|
382
|
+
self._current_content = b""
|
383
|
+
else:
|
384
|
+
self._current_content = await process_content(
|
385
|
+
response,
|
386
|
+
self._initial_offset[0],
|
387
|
+
self._initial_offset[1],
|
388
|
+
self._encryption_options
|
389
|
+
)
|
390
|
+
self._download_offset += len(self._current_content)
|
391
|
+
self._raw_download_offset += response.content_length
|
392
|
+
|
386
393
|
# get page ranges to optimize downloading sparse page blob
|
387
394
|
if response.properties.blob_type == 'PageBlob':
|
388
395
|
try:
|
@@ -391,22 +398,19 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
391
398
|
except HttpResponseError:
|
392
399
|
pass
|
393
400
|
|
394
|
-
|
395
|
-
|
396
|
-
def _get_downloader_start_with_offset(self):
|
397
|
-
# Start where the initial request download ended
|
398
|
-
start = self._initial_range[1] + 1
|
399
|
-
# For encryption V2 only, adjust start to the end of the fetched data rather than download size
|
400
|
-
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
|
401
|
-
start = (self._start_range or 0) + len(self._current_content)
|
401
|
+
if not self._download_complete and self._request_options.get("modified_access_conditions"):
|
402
|
+
self._request_options["modified_access_conditions"].if_match = response.properties.etag
|
402
403
|
|
403
|
-
|
404
|
-
start += (self._offset - len(self._current_content))
|
405
|
-
return start
|
404
|
+
return response
|
406
405
|
|
407
406
|
def chunks(self):
|
408
407
|
# type: () -> AsyncIterator[bytes]
|
409
|
-
"""
|
408
|
+
"""
|
409
|
+
Iterate over chunks in the download stream. Note, the iterator returned will
|
410
|
+
iterate over the entire download content, regardless of any data that was
|
411
|
+
previously read.
|
412
|
+
|
413
|
+
NOTE: If the stream has been partially read, some data may be re-downloaded by the iterator.
|
410
414
|
|
411
415
|
:returns: An async iterator of the chunks in the download stream.
|
412
416
|
:rtype: AsyncIterator[bytes]
|
@@ -420,79 +424,124 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
420
424
|
:dedent: 16
|
421
425
|
:caption: Download a blob using chunks().
|
422
426
|
"""
|
423
|
-
if self.
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
427
|
+
if self._text_mode:
|
428
|
+
raise ValueError("Stream has been partially read in text mode. chunks is not supported in text mode.")
|
429
|
+
if self._encoding:
|
430
|
+
warnings.warn("Encoding is ignored with chunks as only bytes are supported.")
|
431
|
+
|
432
|
+
iter_downloader = None
|
433
|
+
# If we still have the first chunk buffered, use it. Otherwise, download all content again
|
434
|
+
if not self._first_chunk or not self._download_complete:
|
435
|
+
if self._first_chunk:
|
436
|
+
start = self._download_start + len(self._current_content)
|
437
|
+
current_progress = len(self._current_content)
|
438
|
+
else:
|
439
|
+
start = self._download_start
|
440
|
+
current_progress = 0
|
441
|
+
|
442
|
+
end = self._download_start + self.size
|
431
443
|
|
432
|
-
if self._end_range is not None:
|
433
|
-
# Use the length unless it is over the end of the file
|
434
|
-
data_end = min(self._file_size, self._end_range + 1)
|
435
444
|
iter_downloader = _AsyncChunkDownloader(
|
436
445
|
client=self._clients.blob,
|
437
446
|
non_empty_ranges=self._non_empty_ranges,
|
438
447
|
total_size=self.size,
|
439
448
|
chunk_size=self._config.max_chunk_get_size,
|
440
|
-
current_progress=
|
441
|
-
start_range=
|
442
|
-
end_range=
|
443
|
-
stream=None,
|
444
|
-
parallel=False,
|
449
|
+
current_progress=current_progress,
|
450
|
+
start_range=start,
|
451
|
+
end_range=end,
|
445
452
|
validate_content=self._validate_content,
|
446
453
|
encryption_options=self._encryption_options,
|
447
454
|
encryption_data=self._encryption_data,
|
448
455
|
use_location=self._location_mode,
|
449
|
-
**self._request_options
|
456
|
+
**self._request_options
|
457
|
+
)
|
458
|
+
|
459
|
+
initial_content = self._current_content if self._first_chunk else b''
|
450
460
|
return _AsyncChunkIterator(
|
451
461
|
size=self.size,
|
452
|
-
content=
|
462
|
+
content=initial_content,
|
453
463
|
downloader=iter_downloader,
|
454
464
|
chunk_size=self._config.max_chunk_get_size)
|
455
465
|
|
456
|
-
|
466
|
+
@overload
|
467
|
+
async def read(self, size: int = -1) -> T:
|
468
|
+
...
|
469
|
+
|
470
|
+
@overload
|
471
|
+
async def read(self, *, chars: Optional[int] = None) -> T:
|
472
|
+
...
|
473
|
+
|
474
|
+
# pylint: disable-next=too-many-statements,too-many-branches
|
475
|
+
async def read(self, size: int = -1, *, chars: Optional[int] = None) -> T:
|
457
476
|
"""
|
458
|
-
Read
|
459
|
-
|
477
|
+
Read the specified bytes or chars from the stream. If `encoding`
|
478
|
+
was specified on `download_blob`, it is recommended to use the
|
479
|
+
chars parameter to read a specific number of chars to avoid decoding
|
480
|
+
errors. If size/chars is unspecified or negative all bytes will be read.
|
460
481
|
|
461
|
-
:param
|
482
|
+
:param int size:
|
462
483
|
The number of bytes to download from the stream. Leave unspecified
|
463
|
-
or set
|
484
|
+
or set negative to download all bytes.
|
485
|
+
:keyword Optional[int] chars:
|
486
|
+
The number of chars to download from the stream. Leave unspecified
|
487
|
+
or set negative to download all chars. Note, this can only be used
|
488
|
+
when encoding is specified on `download_blob`.
|
464
489
|
:returns:
|
465
490
|
The requested data as bytes or a string if encoding was specified. If
|
466
491
|
the return value is empty, there is no more data to read.
|
467
492
|
:rtype: T
|
468
493
|
"""
|
469
|
-
if size
|
470
|
-
|
494
|
+
if size > -1 and self._encoding:
|
495
|
+
warnings.warn(
|
496
|
+
"Size parameter specified with text encoding enabled. It is recommended to use chars "
|
497
|
+
"to read a specific number of characters instead."
|
498
|
+
)
|
499
|
+
if size > -1 and chars is not None:
|
500
|
+
raise ValueError("Cannot specify both size and chars.")
|
501
|
+
if not self._encoding and chars is not None:
|
502
|
+
raise ValueError("Must specify encoding to read chars.")
|
503
|
+
if self._text_mode and size > -1:
|
504
|
+
raise ValueError("Stream has been partially read in text mode. Please use chars.")
|
505
|
+
if self._text_mode is False and chars is not None:
|
506
|
+
raise ValueError("Stream has been partially read in bytes mode. Please use size.")
|
507
|
+
|
471
508
|
# Empty blob or already read to the end
|
472
|
-
if size == 0 or
|
509
|
+
if (size == 0 or chars == 0 or
|
510
|
+
(self._download_complete and self._current_content_offset >= len(self._current_content))):
|
473
511
|
return b'' if not self._encoding else ''
|
474
512
|
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
513
|
+
if not self._text_mode and chars is not None:
|
514
|
+
self._text_mode = True
|
515
|
+
self._decoder = codecs.getincrementaldecoder(self._encoding)('strict')
|
516
|
+
self._current_content = self._decoder.decode(self._current_content, final=self._download_complete)
|
517
|
+
elif self._text_mode is None:
|
518
|
+
self._text_mode = False
|
519
|
+
|
520
|
+
output_stream: Union[BytesIO, StringIO]
|
521
|
+
if self._text_mode:
|
522
|
+
output_stream = StringIO()
|
523
|
+
size = chars if chars else sys.maxsize
|
524
|
+
else:
|
525
|
+
output_stream = BytesIO()
|
526
|
+
size = size if size > 0 else sys.maxsize
|
527
|
+
readall = size == sys.maxsize
|
528
|
+
count = 0
|
529
|
+
|
530
|
+
# Start by reading from current_content
|
531
|
+
start = self._current_content_offset
|
532
|
+
length = min(len(self._current_content) - self._current_content_offset, size - count)
|
533
|
+
read = output_stream.write(self._current_content[start:start + length])
|
534
|
+
|
535
|
+
count += read
|
536
|
+
self._current_content_offset += read
|
537
|
+
self._read_offset += read
|
538
|
+
await self._check_and_report_progress()
|
539
|
+
|
540
|
+
remaining = size - count
|
541
|
+
if remaining > 0 and not self._download_complete:
|
542
|
+
# Create a downloader than can download the rest of the file
|
543
|
+
start = self._download_start + self._download_offset
|
544
|
+
end = self._download_start + self.size
|
496
545
|
|
497
546
|
parallel = self._max_concurrency > 1
|
498
547
|
downloader = _AsyncChunkDownloader(
|
@@ -500,10 +549,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
500
549
|
non_empty_ranges=self._non_empty_ranges,
|
501
550
|
total_size=self.size,
|
502
551
|
chunk_size=self._config.max_chunk_get_size,
|
503
|
-
current_progress=self.
|
504
|
-
start_range=
|
505
|
-
end_range=
|
506
|
-
stream=
|
552
|
+
current_progress=self._read_offset,
|
553
|
+
start_range=start,
|
554
|
+
end_range=end,
|
555
|
+
stream=output_stream,
|
507
556
|
parallel=parallel,
|
508
557
|
validate_content=self._validate_content,
|
509
558
|
encryption_options=self._encryption_options,
|
@@ -512,42 +561,74 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
512
561
|
progress_hook=self._progress_hook,
|
513
562
|
**self._request_options
|
514
563
|
)
|
564
|
+
self._first_chunk = False
|
565
|
+
|
566
|
+
# When reading all data, have the downloader read everything into the stream.
|
567
|
+
# Else, read one chunk at a time (using the downloader as an iterator) until
|
568
|
+
# the requested size is reached.
|
569
|
+
chunks_iter = downloader.get_chunk_offsets()
|
570
|
+
if readall and not self._text_mode:
|
571
|
+
running_futures = [
|
572
|
+
asyncio.ensure_future(downloader.process_chunk(d))
|
573
|
+
for d in islice(chunks_iter, 0, self._max_concurrency)
|
574
|
+
]
|
575
|
+
while running_futures:
|
576
|
+
# Wait for some download to finish before adding a new one
|
577
|
+
done, running_futures = await asyncio.wait(
|
578
|
+
running_futures, return_when=asyncio.FIRST_COMPLETED)
|
579
|
+
try:
|
580
|
+
for task in done:
|
581
|
+
task.result()
|
582
|
+
except HttpResponseError as error:
|
583
|
+
process_storage_error(error)
|
584
|
+
try:
|
585
|
+
for _ in range(0, len(done)):
|
586
|
+
next_chunk = next(chunks_iter)
|
587
|
+
running_futures.add(asyncio.ensure_future(downloader.process_chunk(next_chunk)))
|
588
|
+
except StopIteration:
|
589
|
+
break
|
590
|
+
|
591
|
+
if running_futures:
|
592
|
+
# Wait for the remaining downloads to finish
|
593
|
+
done, _running_futures = await asyncio.wait(running_futures)
|
594
|
+
try:
|
595
|
+
for task in done:
|
596
|
+
task.result()
|
597
|
+
except HttpResponseError as error:
|
598
|
+
process_storage_error(error)
|
599
|
+
|
600
|
+
self._complete_read()
|
515
601
|
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
self._offset += remaining_size
|
602
|
+
else:
|
603
|
+
while (chunk := next(chunks_iter, None)) is not None and remaining > 0:
|
604
|
+
chunk_data, content_length = await downloader.yield_chunk(chunk)
|
605
|
+
self._download_offset += len(chunk_data)
|
606
|
+
self._raw_download_offset += content_length
|
607
|
+
self._current_content = self._decoder.decode(
|
608
|
+
chunk_data, final=self._download_complete) if self._text_mode else chunk_data
|
609
|
+
|
610
|
+
if remaining < len(self._current_content):
|
611
|
+
read = output_stream.write(self._current_content[:remaining])
|
612
|
+
else:
|
613
|
+
read = output_stream.write(self._current_content)
|
614
|
+
|
615
|
+
self._current_content_offset = read
|
616
|
+
self._read_offset += read
|
617
|
+
remaining -= read
|
618
|
+
await self._check_and_report_progress()
|
619
|
+
|
620
|
+
data = output_stream.getvalue()
|
621
|
+
if not self._text_mode and self._encoding:
|
622
|
+
try:
|
623
|
+
# This is technically incorrect to do, but we have it for backwards compatibility.
|
624
|
+
data = data.decode(self._encoding)
|
625
|
+
except UnicodeDecodeError:
|
626
|
+
warnings.warn(
|
627
|
+
"Encountered a decoding error while decoding blob data from a partial read. "
|
628
|
+
"Try using the `chars` keyword instead to read in text mode."
|
629
|
+
)
|
630
|
+
raise
|
547
631
|
|
548
|
-
data = stream.getvalue()
|
549
|
-
if self._encoding:
|
550
|
-
return data.decode(self._encoding)
|
551
632
|
return data
|
552
633
|
|
553
634
|
async def readall(self) -> T:
|
@@ -558,53 +639,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
558
639
|
:returns: The requested data as bytes or a string if encoding was specified.
|
559
640
|
:rtype: T
|
560
641
|
"""
|
561
|
-
|
562
|
-
await self.readinto(stream)
|
563
|
-
data = stream.getvalue()
|
564
|
-
if self._encoding:
|
565
|
-
return data.decode(self._encoding)
|
566
|
-
return data
|
567
|
-
|
568
|
-
async def content_as_bytes(self, max_concurrency=1):
|
569
|
-
"""DEPRECATED: Download the contents of this file.
|
570
|
-
|
571
|
-
This operation is blocking until all data is downloaded.
|
572
|
-
|
573
|
-
This method is deprecated, use func:`readall` instead.
|
574
|
-
|
575
|
-
:param int max_concurrency:
|
576
|
-
The number of parallel connections with which to download.
|
577
|
-
:returns: The contents of the file as bytes.
|
578
|
-
:rtype: bytes
|
579
|
-
"""
|
580
|
-
warnings.warn(
|
581
|
-
"content_as_bytes is deprecated, use readall instead",
|
582
|
-
DeprecationWarning
|
583
|
-
)
|
584
|
-
self._max_concurrency = max_concurrency
|
585
|
-
return await self.readall()
|
586
|
-
|
587
|
-
async def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
|
588
|
-
"""DEPRECATED: Download the contents of this blob, and decode as text.
|
589
|
-
|
590
|
-
This operation is blocking until all data is downloaded.
|
591
|
-
|
592
|
-
This method is deprecated, use func:`readall` instead.
|
593
|
-
|
594
|
-
:param int max_concurrency:
|
595
|
-
The number of parallel connections with which to download.
|
596
|
-
:param str encoding:
|
597
|
-
Test encoding to decode the downloaded bytes. Default is UTF-8.
|
598
|
-
:returns: The content of the file as a str.
|
599
|
-
:rtype: str
|
600
|
-
"""
|
601
|
-
warnings.warn(
|
602
|
-
"content_as_text is deprecated, use readall instead",
|
603
|
-
DeprecationWarning
|
604
|
-
)
|
605
|
-
self._max_concurrency = max_concurrency
|
606
|
-
self._encoding = encoding
|
607
|
-
return await self.readall()
|
642
|
+
return await self.read()
|
608
643
|
|
609
644
|
async def readinto(self, stream: IO[bytes]) -> int:
|
610
645
|
"""Download the contents of this blob to a stream.
|
@@ -616,6 +651,11 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
616
651
|
:returns: The number of bytes read.
|
617
652
|
:rtype: int
|
618
653
|
"""
|
654
|
+
if self._text_mode:
|
655
|
+
raise ValueError("Stream has been partially read in text mode. readinto is not supported in text mode.")
|
656
|
+
if self._encoding:
|
657
|
+
warnings.warn("Encoding is ignored with readinto as only byte streams are supported.")
|
658
|
+
|
619
659
|
# the stream must be seekable if parallel download is required
|
620
660
|
parallel = self._max_concurrency > 1
|
621
661
|
if parallel:
|
@@ -629,35 +669,34 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
629
669
|
raise ValueError(error_message) from exc
|
630
670
|
|
631
671
|
# If some data has been streamed using `read`, only stream the remaining data
|
632
|
-
remaining_size = self.size - self.
|
672
|
+
remaining_size = self.size - self._read_offset
|
633
673
|
# Already read to the end
|
634
674
|
if remaining_size <= 0:
|
635
675
|
return 0
|
636
676
|
|
637
|
-
# Write the content to the user stream
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
677
|
+
# Write the current content to the user stream
|
678
|
+
current_remaining = len(self._current_content) - self._current_content_offset
|
679
|
+
start = self._current_content_offset
|
680
|
+
count = stream.write(self._current_content[start:start + current_remaining])
|
681
|
+
|
682
|
+
self._current_content_offset += count
|
683
|
+
self._read_offset += count
|
684
|
+
if self._progress_hook:
|
685
|
+
await self._progress_hook(self._read_offset, self.size)
|
644
686
|
|
687
|
+
# If all the data was already downloaded/buffered
|
645
688
|
if self._download_complete:
|
646
689
|
return remaining_size
|
647
690
|
|
648
|
-
|
649
|
-
|
650
|
-
# Use the length unless it is over the end of the file
|
651
|
-
data_end = min(self._file_size, self._end_range + 1)
|
652
|
-
|
653
|
-
data_start = self._get_downloader_start_with_offset()
|
691
|
+
data_start = self._download_start + self._read_offset
|
692
|
+
data_end = self._download_start + self.size
|
654
693
|
|
655
694
|
downloader = _AsyncChunkDownloader(
|
656
695
|
client=self._clients.blob,
|
657
696
|
non_empty_ranges=self._non_empty_ranges,
|
658
697
|
total_size=self.size,
|
659
698
|
chunk_size=self._config.max_chunk_get_size,
|
660
|
-
current_progress=self.
|
699
|
+
current_progress=self._read_offset,
|
661
700
|
start_range=data_start,
|
662
701
|
end_range=data_end,
|
663
702
|
stream=stream,
|
@@ -667,7 +706,8 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
667
706
|
encryption_data=self._encryption_data,
|
668
707
|
use_location=self._location_mode,
|
669
708
|
progress_hook=self._progress_hook,
|
670
|
-
**self._request_options
|
709
|
+
**self._request_options
|
710
|
+
)
|
671
711
|
|
672
712
|
dl_tasks = downloader.get_chunk_offsets()
|
673
713
|
running_futures = [
|
@@ -699,8 +739,72 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
699
739
|
except HttpResponseError as error:
|
700
740
|
process_storage_error(error)
|
701
741
|
|
742
|
+
self._complete_read()
|
702
743
|
return remaining_size
|
703
744
|
|
745
|
+
def _complete_read(self):
|
746
|
+
"""Adjusts all offsets to the end of the download."""
|
747
|
+
self._download_offset = self.size
|
748
|
+
self._raw_download_offset = self.size
|
749
|
+
self._read_offset = self.size
|
750
|
+
self._current_content_offset = len(self._current_content)
|
751
|
+
|
752
|
+
async def _check_and_report_progress(self):
|
753
|
+
"""Reports progress if necessary."""
|
754
|
+
# Only report progress at the end of each chunk and use download_offset to always report
|
755
|
+
# progress in terms of (approximate) byte count.
|
756
|
+
if self._progress_hook and self._current_content_offset == len(self._current_content):
|
757
|
+
await self._progress_hook(self._download_offset, self.size)
|
758
|
+
|
759
|
+
async def content_as_bytes(self, max_concurrency=1):
|
760
|
+
"""DEPRECATED: Download the contents of this file.
|
761
|
+
|
762
|
+
This operation is blocking until all data is downloaded.
|
763
|
+
|
764
|
+
This method is deprecated, use func:`readall` instead.
|
765
|
+
|
766
|
+
:param int max_concurrency:
|
767
|
+
The number of parallel connections with which to download.
|
768
|
+
:returns: The contents of the file as bytes.
|
769
|
+
:rtype: bytes
|
770
|
+
"""
|
771
|
+
warnings.warn(
|
772
|
+
"content_as_bytes is deprecated, use readall instead",
|
773
|
+
DeprecationWarning
|
774
|
+
)
|
775
|
+
if self._text_mode:
|
776
|
+
raise ValueError("Stream has been partially read in text mode. "
|
777
|
+
"content_as_bytes is not supported in text mode.")
|
778
|
+
|
779
|
+
self._max_concurrency = max_concurrency
|
780
|
+
return await self.readall()
|
781
|
+
|
782
|
+
async def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
|
783
|
+
"""DEPRECATED: Download the contents of this blob, and decode as text.
|
784
|
+
|
785
|
+
This operation is blocking until all data is downloaded.
|
786
|
+
|
787
|
+
This method is deprecated, use func:`readall` instead.
|
788
|
+
|
789
|
+
:param int max_concurrency:
|
790
|
+
The number of parallel connections with which to download.
|
791
|
+
:param str encoding:
|
792
|
+
Test encoding to decode the downloaded bytes. Default is UTF-8.
|
793
|
+
:returns: The content of the file as a str.
|
794
|
+
:rtype: str
|
795
|
+
"""
|
796
|
+
warnings.warn(
|
797
|
+
"content_as_text is deprecated, use readall instead",
|
798
|
+
DeprecationWarning
|
799
|
+
)
|
800
|
+
if self._text_mode:
|
801
|
+
raise ValueError("Stream has been partially read in text mode. "
|
802
|
+
"content_as_text is not supported in text mode.")
|
803
|
+
|
804
|
+
self._max_concurrency = max_concurrency
|
805
|
+
self._encoding = encoding
|
806
|
+
return await self.readall()
|
807
|
+
|
704
808
|
async def download_to_stream(self, stream, max_concurrency=1):
|
705
809
|
"""DEPRECATED: Download the contents of this blob to a stream.
|
706
810
|
|
@@ -719,6 +823,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
719
823
|
"download_to_stream is deprecated, use readinto instead",
|
720
824
|
DeprecationWarning
|
721
825
|
)
|
826
|
+
if self._text_mode:
|
827
|
+
raise ValueError("Stream has been partially read in text mode. "
|
828
|
+
"download_to_stream is not supported in text mode.")
|
829
|
+
|
722
830
|
self._max_concurrency = max_concurrency
|
723
831
|
await self.readinto(stream)
|
724
832
|
return self.properties
|