azure-storage-blob 12.21.0__py3-none-any.whl → 12.21.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/storage/blob/_blob_client.py +48 -53
- azure/storage/blob/_blob_service_client.py +11 -11
- azure/storage/blob/_container_client.py +20 -22
- azure/storage/blob/_download.py +167 -277
- azure/storage/blob/_lease.py +5 -5
- azure/storage/blob/_models.py +1 -1
- azure/storage/blob/_version.py +1 -1
- azure/storage/blob/aio/_blob_client_async.py +46 -49
- azure/storage/blob/aio/_blob_service_client_async.py +10 -10
- azure/storage/blob/aio/_container_client_async.py +19 -22
- azure/storage/blob/aio/_download_async.py +209 -317
- azure/storage/blob/aio/_lease_async.py +5 -5
- {azure_storage_blob-12.21.0.dist-info → azure_storage_blob-12.21.0b1.dist-info}/METADATA +7 -7
- {azure_storage_blob-12.21.0.dist-info → azure_storage_blob-12.21.0b1.dist-info}/RECORD +17 -18
- {azure_storage_blob-12.21.0.dist-info → azure_storage_blob-12.21.0b1.dist-info}/WHEEL +1 -1
- azure/storage/blob/_generated/py.typed +0 -1
- {azure_storage_blob-12.21.0.dist-info → azure_storage_blob-12.21.0b1.dist-info}/LICENSE +0 -0
- {azure_storage_blob-12.21.0.dist-info → azure_storage_blob-12.21.0b1.dist-info}/top_level.txt +0 -0
@@ -5,12 +5,11 @@
|
|
5
5
|
# --------------------------------------------------------------------------
|
6
6
|
# pylint: disable=invalid-overridden-method
|
7
7
|
|
8
|
-
import codecs
|
9
8
|
import sys
|
10
9
|
import warnings
|
11
|
-
from io import BytesIO
|
10
|
+
from io import BytesIO
|
12
11
|
from itertools import islice
|
13
|
-
from typing import AsyncIterator, Generic, IO, Optional,
|
12
|
+
from typing import AsyncIterator, Generic, IO, Optional, TypeVar
|
14
13
|
|
15
14
|
import asyncio
|
16
15
|
|
@@ -60,7 +59,7 @@ class _AsyncChunkDownloader(_ChunkDownloader):
|
|
60
59
|
|
61
60
|
async def process_chunk(self, chunk_start):
|
62
61
|
chunk_start, chunk_end = self._calculate_range(chunk_start)
|
63
|
-
chunk_data
|
62
|
+
chunk_data = await self._download_chunk(chunk_start, chunk_end - 1)
|
64
63
|
length = chunk_end - chunk_start
|
65
64
|
if length > 0:
|
66
65
|
await self._write_to_stream(chunk_data, chunk_start)
|
@@ -96,8 +95,8 @@ class _AsyncChunkDownloader(_ChunkDownloader):
|
|
96
95
|
# No need to download the empty chunk from server if there's no data in the chunk to be downloaded.
|
97
96
|
# Do optimize and create empty chunk locally if condition is met.
|
98
97
|
if self._do_optimize(download_range[0], download_range[1]):
|
99
|
-
|
100
|
-
chunk_data = b"\x00" *
|
98
|
+
data_size = download_range[1] - download_range[0] + 1
|
99
|
+
chunk_data = b"\x00" * data_size
|
101
100
|
else:
|
102
101
|
range_header, range_validation = validate_and_format_range_headers(
|
103
102
|
download_range[0],
|
@@ -118,14 +117,14 @@ class _AsyncChunkDownloader(_ChunkDownloader):
|
|
118
117
|
process_storage_error(error)
|
119
118
|
|
120
119
|
chunk_data = await process_content(response, offset[0], offset[1], self.encryption_options)
|
121
|
-
|
120
|
+
|
122
121
|
|
123
122
|
# This makes sure that if_match is set so that we can validate
|
124
123
|
# that subsequent downloads are to an unmodified blob
|
125
124
|
if self.request_options.get('modified_access_conditions'):
|
126
125
|
self.request_options['modified_access_conditions'].if_match = response.properties.etag
|
127
126
|
|
128
|
-
return chunk_data
|
127
|
+
return chunk_data
|
129
128
|
|
130
129
|
|
131
130
|
class _AsyncChunkIterator(object):
|
@@ -168,7 +167,7 @@ class _AsyncChunkIterator(object):
|
|
168
167
|
|
169
168
|
try:
|
170
169
|
chunk = next(self._iter_chunks)
|
171
|
-
self._current_content +=
|
170
|
+
self._current_content += await self._iter_downloader.yield_chunk(chunk)
|
172
171
|
except StopIteration as exc:
|
173
172
|
self._complete = True
|
174
173
|
# it's likely that there some data left in self._current_content
|
@@ -229,32 +228,28 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
229
228
|
self._encryption_options = encryption_options or {}
|
230
229
|
self._progress_hook = kwargs.pop('progress_hook', None)
|
231
230
|
self._request_options = kwargs
|
232
|
-
self._response = None
|
233
231
|
self._location_mode = None
|
234
|
-
self.
|
235
|
-
self.
|
232
|
+
self._download_complete = False
|
233
|
+
self._current_content = None
|
234
|
+
self._file_size = None
|
236
235
|
self._non_empty_ranges = None
|
236
|
+
self._response = None
|
237
237
|
self._encryption_data = None
|
238
|
+
self._offset = 0
|
238
239
|
|
239
|
-
|
240
|
-
self.
|
241
|
-
# The raw download offset, before processing (decryption), in bytes
|
242
|
-
self._raw_download_offset = 0
|
243
|
-
# The offset the stream has been read to in bytes or chars depending on mode
|
244
|
-
self._read_offset = 0
|
245
|
-
# The offset into current_content that has been consumed in bytes or chars depending on mode
|
246
|
-
self._current_content_offset = 0
|
247
|
-
|
248
|
-
self._text_mode: Optional[bool] = None
|
249
|
-
self._decoder = None
|
250
|
-
# Whether the current content is the first chunk of download content or not
|
251
|
-
self._first_chunk = True
|
252
|
-
self._download_start = self._start_range or 0
|
240
|
+
self._initial_range = None
|
241
|
+
self._initial_offset = None
|
253
242
|
|
254
243
|
# The cls is passed in via download_cls to avoid conflicting arg name with Generic.__new__
|
255
244
|
# but needs to be changed to cls in the request options.
|
256
245
|
self._request_options['cls'] = download_cls
|
257
246
|
|
247
|
+
# The service only provides transactional MD5s for chunks under 4MB.
|
248
|
+
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
|
249
|
+
# chunk so a transactional MD5 can be retrieved.
|
250
|
+
self._first_get_size = self._config.max_single_get_size if not self._validate_content \
|
251
|
+
else self._config.max_chunk_get_size
|
252
|
+
|
258
253
|
def __len__(self):
|
259
254
|
return self.size
|
260
255
|
|
@@ -277,19 +272,12 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
277
272
|
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
|
278
273
|
await self._get_encryption_data_request()
|
279
274
|
|
280
|
-
# The service only provides transactional MD5s for chunks under 4MB.
|
281
|
-
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
|
282
|
-
# chunk so a transactional MD5 can be retrieved.
|
283
|
-
first_get_size = (
|
284
|
-
self._config.max_single_get_size if not self._validate_content else self._config.max_chunk_get_size
|
285
|
-
)
|
286
275
|
initial_request_start = self._start_range if self._start_range is not None else 0
|
287
|
-
if self._end_range is not None and self._end_range -
|
276
|
+
if self._end_range is not None and self._end_range - self._start_range < self._first_get_size:
|
288
277
|
initial_request_end = self._end_range
|
289
278
|
else:
|
290
|
-
initial_request_end = initial_request_start +
|
279
|
+
initial_request_end = initial_request_start + self._first_get_size - 1
|
291
280
|
|
292
|
-
# pylint: disable-next=attribute-defined-outside-init
|
293
281
|
self._initial_range, self._initial_offset = process_range_and_offset(
|
294
282
|
initial_request_start,
|
295
283
|
initial_request_end,
|
@@ -304,22 +292,39 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
304
292
|
self.properties.name = self.name
|
305
293
|
self.properties.container = self.container
|
306
294
|
|
307
|
-
# Set the content length to the download size instead of the size of
|
295
|
+
# Set the content length to the download size instead of the size of
|
296
|
+
# the last range
|
297
|
+
initial_size = self._response.properties.size
|
308
298
|
self.properties.size = self.size
|
309
|
-
|
310
|
-
|
311
|
-
|
299
|
+
|
300
|
+
# Overwrite the content range to the user requested range
|
301
|
+
self.properties.content_range = f'bytes {self._start_range}-{self._end_range}/{self._file_size}'
|
312
302
|
|
313
303
|
# Overwrite the content MD5 as it is the MD5 for the last range instead
|
314
304
|
# of the stored MD5
|
315
305
|
# TODO: Set to the stored MD5 when the service returns this
|
316
306
|
self.properties.content_md5 = None
|
317
307
|
|
318
|
-
|
319
|
-
|
308
|
+
if self.size == 0:
|
309
|
+
self._current_content = b""
|
310
|
+
else:
|
311
|
+
self._current_content = await process_content(
|
312
|
+
self._response,
|
313
|
+
self._initial_offset[0],
|
314
|
+
self._initial_offset[1],
|
315
|
+
self._encryption_options
|
316
|
+
)
|
317
|
+
|
318
|
+
# If the file is small, the download is complete at this point.
|
319
|
+
# If file size is large, download the rest of the file in chunks.
|
320
|
+
# For encryption V2, calculate based on size of decrypted content, not download size.
|
320
321
|
if is_encryption_v2(self._encryption_data):
|
321
|
-
|
322
|
-
|
322
|
+
self._download_complete = len(self._current_content) >= self.size
|
323
|
+
else:
|
324
|
+
self._download_complete = initial_size >= self.size
|
325
|
+
|
326
|
+
if not self._download_complete and self._request_options.get("modified_access_conditions"):
|
327
|
+
self._request_options["modified_access_conditions"].if_match = self._response.properties.etag
|
323
328
|
|
324
329
|
async def _initial_request(self):
|
325
330
|
range_header, range_validation = validate_and_format_range_headers(
|
@@ -352,7 +357,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
352
357
|
|
353
358
|
if self._end_range is not None:
|
354
359
|
# Use the length unless it is over the end of the file
|
355
|
-
self.size = min(self._file_size
|
360
|
+
self.size = min(self._file_size, self._end_range - self._start_range + 1)
|
356
361
|
elif self._start_range is not None:
|
357
362
|
self.size = self._file_size - self._start_range
|
358
363
|
else:
|
@@ -378,18 +383,6 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
378
383
|
else:
|
379
384
|
process_storage_error(error)
|
380
385
|
|
381
|
-
if self.size == 0:
|
382
|
-
self._current_content = b""
|
383
|
-
else:
|
384
|
-
self._current_content = await process_content(
|
385
|
-
response,
|
386
|
-
self._initial_offset[0],
|
387
|
-
self._initial_offset[1],
|
388
|
-
self._encryption_options
|
389
|
-
)
|
390
|
-
self._download_offset += len(self._current_content)
|
391
|
-
self._raw_download_offset += response.content_length
|
392
|
-
|
393
386
|
# get page ranges to optimize downloading sparse page blob
|
394
387
|
if response.properties.blob_type == 'PageBlob':
|
395
388
|
try:
|
@@ -398,19 +391,22 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
398
391
|
except HttpResponseError:
|
399
392
|
pass
|
400
393
|
|
401
|
-
if not self._download_complete and self._request_options.get("modified_access_conditions"):
|
402
|
-
self._request_options["modified_access_conditions"].if_match = response.properties.etag
|
403
|
-
|
404
394
|
return response
|
405
395
|
|
396
|
+
def _get_downloader_start_with_offset(self):
|
397
|
+
# Start where the initial request download ended
|
398
|
+
start = self._initial_range[1] + 1
|
399
|
+
# For encryption V2 only, adjust start to the end of the fetched data rather than download size
|
400
|
+
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
|
401
|
+
start = (self._start_range or 0) + len(self._current_content)
|
402
|
+
|
403
|
+
# Adjust the start based on any data read past the current content
|
404
|
+
start += (self._offset - len(self._current_content))
|
405
|
+
return start
|
406
|
+
|
406
407
|
def chunks(self):
|
407
408
|
# type: () -> AsyncIterator[bytes]
|
408
|
-
"""
|
409
|
-
Iterate over chunks in the download stream. Note, the iterator returned will
|
410
|
-
iterate over the entire download content, regardless of any data that was
|
411
|
-
previously read.
|
412
|
-
|
413
|
-
NOTE: If the stream has been partially read, some data may be re-downloaded by the iterator.
|
409
|
+
"""Iterate over chunks in the download stream.
|
414
410
|
|
415
411
|
:returns: An async iterator of the chunks in the download stream.
|
416
412
|
:rtype: AsyncIterator[bytes]
|
@@ -424,124 +420,79 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
424
420
|
:dedent: 16
|
425
421
|
:caption: Download a blob using chunks().
|
426
422
|
"""
|
427
|
-
if self.
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
if self._first_chunk:
|
436
|
-
start = self._download_start + len(self._current_content)
|
437
|
-
current_progress = len(self._current_content)
|
438
|
-
else:
|
439
|
-
start = self._download_start
|
440
|
-
current_progress = 0
|
441
|
-
|
442
|
-
end = self._download_start + self.size
|
423
|
+
if self.size == 0 or self._download_complete:
|
424
|
+
iter_downloader = None
|
425
|
+
else:
|
426
|
+
data_end = self._file_size
|
427
|
+
data_start = self._initial_range[1] + 1 # Start where the first download ended
|
428
|
+
# For encryption, adjust start to the end of the fetched data rather than download size
|
429
|
+
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
|
430
|
+
data_start = (self._start_range or 0) + len(self._current_content)
|
443
431
|
|
432
|
+
if self._end_range is not None:
|
433
|
+
# Use the length unless it is over the end of the file
|
434
|
+
data_end = min(self._file_size, self._end_range + 1)
|
444
435
|
iter_downloader = _AsyncChunkDownloader(
|
445
436
|
client=self._clients.blob,
|
446
437
|
non_empty_ranges=self._non_empty_ranges,
|
447
438
|
total_size=self.size,
|
448
439
|
chunk_size=self._config.max_chunk_get_size,
|
449
|
-
current_progress=
|
450
|
-
start_range=
|
451
|
-
end_range=
|
440
|
+
current_progress=self._first_get_size,
|
441
|
+
start_range=data_start,
|
442
|
+
end_range=data_end,
|
443
|
+
stream=None,
|
444
|
+
parallel=False,
|
452
445
|
validate_content=self._validate_content,
|
453
446
|
encryption_options=self._encryption_options,
|
454
447
|
encryption_data=self._encryption_data,
|
455
448
|
use_location=self._location_mode,
|
456
|
-
**self._request_options
|
457
|
-
)
|
458
|
-
|
459
|
-
initial_content = self._current_content if self._first_chunk else b''
|
449
|
+
**self._request_options)
|
460
450
|
return _AsyncChunkIterator(
|
461
451
|
size=self.size,
|
462
|
-
content=
|
452
|
+
content=self._current_content,
|
463
453
|
downloader=iter_downloader,
|
464
454
|
chunk_size=self._config.max_chunk_get_size)
|
465
455
|
|
466
|
-
|
467
|
-
async def read(self, size: int = -1) -> T:
|
468
|
-
...
|
469
|
-
|
470
|
-
@overload
|
471
|
-
async def read(self, *, chars: Optional[int] = None) -> T:
|
472
|
-
...
|
473
|
-
|
474
|
-
# pylint: disable-next=too-many-statements,too-many-branches
|
475
|
-
async def read(self, size: int = -1, *, chars: Optional[int] = None) -> T:
|
456
|
+
async def read(self, size: Optional[int] = -1) -> T:
|
476
457
|
"""
|
477
|
-
Read
|
478
|
-
|
479
|
-
chars parameter to read a specific number of chars to avoid decoding
|
480
|
-
errors. If size/chars is unspecified or negative all bytes will be read.
|
458
|
+
Read up to size bytes from the stream and return them. If size
|
459
|
+
is unspecified or is -1, all bytes will be read.
|
481
460
|
|
482
|
-
:param int size:
|
461
|
+
:param Optional[int] size:
|
483
462
|
The number of bytes to download from the stream. Leave unspecified
|
484
|
-
or set
|
485
|
-
:keyword Optional[int] chars:
|
486
|
-
The number of chars to download from the stream. Leave unspecified
|
487
|
-
or set negative to download all chars. Note, this can only be used
|
488
|
-
when encoding is specified on `download_blob`.
|
463
|
+
or set to -1 to download all bytes.
|
489
464
|
:returns:
|
490
465
|
The requested data as bytes or a string if encoding was specified. If
|
491
466
|
the return value is empty, there is no more data to read.
|
492
467
|
:rtype: T
|
493
468
|
"""
|
494
|
-
if size
|
495
|
-
|
496
|
-
"Size parameter specified with text encoding enabled. It is recommended to use chars "
|
497
|
-
"to read a specific number of characters instead."
|
498
|
-
)
|
499
|
-
if size > -1 and chars is not None:
|
500
|
-
raise ValueError("Cannot specify both size and chars.")
|
501
|
-
if not self._encoding and chars is not None:
|
502
|
-
raise ValueError("Must specify encoding to read chars.")
|
503
|
-
if self._text_mode and size > -1:
|
504
|
-
raise ValueError("Stream has been partially read in text mode. Please use chars.")
|
505
|
-
if self._text_mode is False and chars is not None:
|
506
|
-
raise ValueError("Stream has been partially read in bytes mode. Please use size.")
|
507
|
-
|
469
|
+
if size == -1:
|
470
|
+
return await self.readall()
|
508
471
|
# Empty blob or already read to the end
|
509
|
-
if
|
510
|
-
(self._download_complete and self._current_content_offset >= len(self._current_content))):
|
472
|
+
if size == 0 or self._offset >= self.size:
|
511
473
|
return b'' if not self._encoding else ''
|
512
474
|
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
count += read
|
536
|
-
self._current_content_offset += read
|
537
|
-
self._read_offset += read
|
538
|
-
await self._check_and_report_progress()
|
539
|
-
|
540
|
-
remaining = size - count
|
541
|
-
if remaining > 0 and not self._download_complete:
|
542
|
-
# Create a downloader than can download the rest of the file
|
543
|
-
start = self._download_start + self._download_offset
|
544
|
-
end = self._download_start + self.size
|
475
|
+
stream = BytesIO()
|
476
|
+
remaining_size = size
|
477
|
+
|
478
|
+
# Start by reading from current_content if there is data left
|
479
|
+
if self._offset < len(self._current_content):
|
480
|
+
start = self._offset
|
481
|
+
length = min(remaining_size, len(self._current_content) - self._offset)
|
482
|
+
read = stream.write(self._current_content[start:start + length])
|
483
|
+
|
484
|
+
remaining_size -= read
|
485
|
+
self._offset += read
|
486
|
+
if self._progress_hook:
|
487
|
+
await self._progress_hook(self._offset, self.size)
|
488
|
+
|
489
|
+
if remaining_size > 0:
|
490
|
+
start_range = self._get_downloader_start_with_offset()
|
491
|
+
|
492
|
+
# End is the min between the remaining size, the file size, and the end of the specified range
|
493
|
+
end_range = min(start_range + remaining_size, self._file_size)
|
494
|
+
if self._end_range is not None:
|
495
|
+
end_range = min(end_range, self._end_range + 1)
|
545
496
|
|
546
497
|
parallel = self._max_concurrency > 1
|
547
498
|
downloader = _AsyncChunkDownloader(
|
@@ -549,10 +500,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
549
500
|
non_empty_ranges=self._non_empty_ranges,
|
550
501
|
total_size=self.size,
|
551
502
|
chunk_size=self._config.max_chunk_get_size,
|
552
|
-
current_progress=self.
|
553
|
-
start_range=
|
554
|
-
end_range=
|
555
|
-
stream=
|
503
|
+
current_progress=self._offset,
|
504
|
+
start_range=start_range,
|
505
|
+
end_range=end_range,
|
506
|
+
stream=stream,
|
556
507
|
parallel=parallel,
|
557
508
|
validate_content=self._validate_content,
|
558
509
|
encryption_options=self._encryption_options,
|
@@ -561,74 +512,42 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
561
512
|
progress_hook=self._progress_hook,
|
562
513
|
**self._request_options
|
563
514
|
)
|
564
|
-
self._first_chunk = False
|
565
|
-
|
566
|
-
# When reading all data, have the downloader read everything into the stream.
|
567
|
-
# Else, read one chunk at a time (using the downloader as an iterator) until
|
568
|
-
# the requested size is reached.
|
569
|
-
chunks_iter = downloader.get_chunk_offsets()
|
570
|
-
if readall and not self._text_mode:
|
571
|
-
running_futures = [
|
572
|
-
asyncio.ensure_future(downloader.process_chunk(d))
|
573
|
-
for d in islice(chunks_iter, 0, self._max_concurrency)
|
574
|
-
]
|
575
|
-
while running_futures:
|
576
|
-
# Wait for some download to finish before adding a new one
|
577
|
-
done, running_futures = await asyncio.wait(
|
578
|
-
running_futures, return_when=asyncio.FIRST_COMPLETED)
|
579
|
-
try:
|
580
|
-
for task in done:
|
581
|
-
task.result()
|
582
|
-
except HttpResponseError as error:
|
583
|
-
process_storage_error(error)
|
584
|
-
try:
|
585
|
-
for _ in range(0, len(done)):
|
586
|
-
next_chunk = next(chunks_iter)
|
587
|
-
running_futures.add(asyncio.ensure_future(downloader.process_chunk(next_chunk)))
|
588
|
-
except StopIteration:
|
589
|
-
break
|
590
|
-
|
591
|
-
if running_futures:
|
592
|
-
# Wait for the remaining downloads to finish
|
593
|
-
done, _running_futures = await asyncio.wait(running_futures)
|
594
|
-
try:
|
595
|
-
for task in done:
|
596
|
-
task.result()
|
597
|
-
except HttpResponseError as error:
|
598
|
-
process_storage_error(error)
|
599
|
-
|
600
|
-
self._complete_read()
|
601
515
|
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
516
|
+
dl_tasks = downloader.get_chunk_offsets()
|
517
|
+
running_futures = [
|
518
|
+
asyncio.ensure_future(downloader.process_chunk(d))
|
519
|
+
for d in islice(dl_tasks, 0, self._max_concurrency)
|
520
|
+
]
|
521
|
+
while running_futures:
|
522
|
+
# Wait for some download to finish before adding a new one
|
523
|
+
done, running_futures = await asyncio.wait(
|
524
|
+
running_futures, return_when=asyncio.FIRST_COMPLETED)
|
525
|
+
try:
|
526
|
+
for task in done:
|
527
|
+
task.result()
|
528
|
+
except HttpResponseError as error:
|
529
|
+
process_storage_error(error)
|
530
|
+
try:
|
531
|
+
for _ in range(0, len(done)):
|
532
|
+
next_chunk = next(dl_tasks)
|
533
|
+
running_futures.add(asyncio.ensure_future(downloader.process_chunk(next_chunk)))
|
534
|
+
except StopIteration:
|
535
|
+
break
|
536
|
+
|
537
|
+
if running_futures:
|
538
|
+
# Wait for the remaining downloads to finish
|
539
|
+
done, _running_futures = await asyncio.wait(running_futures)
|
540
|
+
try:
|
541
|
+
for task in done:
|
542
|
+
task.result()
|
543
|
+
except HttpResponseError as error:
|
544
|
+
process_storage_error(error)
|
631
545
|
|
546
|
+
self._offset += remaining_size
|
547
|
+
|
548
|
+
data = stream.getvalue()
|
549
|
+
if self._encoding:
|
550
|
+
return data.decode(self._encoding)
|
632
551
|
return data
|
633
552
|
|
634
553
|
async def readall(self) -> T:
|
@@ -639,7 +558,53 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
639
558
|
:returns: The requested data as bytes or a string if encoding was specified.
|
640
559
|
:rtype: T
|
641
560
|
"""
|
642
|
-
|
561
|
+
stream = BytesIO()
|
562
|
+
await self.readinto(stream)
|
563
|
+
data = stream.getvalue()
|
564
|
+
if self._encoding:
|
565
|
+
return data.decode(self._encoding)
|
566
|
+
return data
|
567
|
+
|
568
|
+
async def content_as_bytes(self, max_concurrency=1):
|
569
|
+
"""DEPRECATED: Download the contents of this file.
|
570
|
+
|
571
|
+
This operation is blocking until all data is downloaded.
|
572
|
+
|
573
|
+
This method is deprecated, use func:`readall` instead.
|
574
|
+
|
575
|
+
:param int max_concurrency:
|
576
|
+
The number of parallel connections with which to download.
|
577
|
+
:returns: The contents of the file as bytes.
|
578
|
+
:rtype: bytes
|
579
|
+
"""
|
580
|
+
warnings.warn(
|
581
|
+
"content_as_bytes is deprecated, use readall instead",
|
582
|
+
DeprecationWarning
|
583
|
+
)
|
584
|
+
self._max_concurrency = max_concurrency
|
585
|
+
return await self.readall()
|
586
|
+
|
587
|
+
async def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
|
588
|
+
"""DEPRECATED: Download the contents of this blob, and decode as text.
|
589
|
+
|
590
|
+
This operation is blocking until all data is downloaded.
|
591
|
+
|
592
|
+
This method is deprecated, use func:`readall` instead.
|
593
|
+
|
594
|
+
:param int max_concurrency:
|
595
|
+
The number of parallel connections with which to download.
|
596
|
+
:param str encoding:
|
597
|
+
Test encoding to decode the downloaded bytes. Default is UTF-8.
|
598
|
+
:returns: The content of the file as a str.
|
599
|
+
:rtype: str
|
600
|
+
"""
|
601
|
+
warnings.warn(
|
602
|
+
"content_as_text is deprecated, use readall instead",
|
603
|
+
DeprecationWarning
|
604
|
+
)
|
605
|
+
self._max_concurrency = max_concurrency
|
606
|
+
self._encoding = encoding
|
607
|
+
return await self.readall()
|
643
608
|
|
644
609
|
async def readinto(self, stream: IO[bytes]) -> int:
|
645
610
|
"""Download the contents of this blob to a stream.
|
@@ -651,11 +616,6 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
651
616
|
:returns: The number of bytes read.
|
652
617
|
:rtype: int
|
653
618
|
"""
|
654
|
-
if self._text_mode:
|
655
|
-
raise ValueError("Stream has been partially read in text mode. readinto is not supported in text mode.")
|
656
|
-
if self._encoding:
|
657
|
-
warnings.warn("Encoding is ignored with readinto as only byte streams are supported.")
|
658
|
-
|
659
619
|
# the stream must be seekable if parallel download is required
|
660
620
|
parallel = self._max_concurrency > 1
|
661
621
|
if parallel:
|
@@ -669,34 +629,35 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
669
629
|
raise ValueError(error_message) from exc
|
670
630
|
|
671
631
|
# If some data has been streamed using `read`, only stream the remaining data
|
672
|
-
remaining_size = self.size - self.
|
632
|
+
remaining_size = self.size - self._offset
|
673
633
|
# Already read to the end
|
674
634
|
if remaining_size <= 0:
|
675
635
|
return 0
|
676
636
|
|
677
|
-
# Write the
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
if self._progress_hook:
|
685
|
-
await self._progress_hook(self._read_offset, self.size)
|
637
|
+
# Write the content to the user stream if there is data left
|
638
|
+
if self._offset < len(self._current_content):
|
639
|
+
content = self._current_content[self._offset:]
|
640
|
+
stream.write(content)
|
641
|
+
self._offset += len(content)
|
642
|
+
if self._progress_hook:
|
643
|
+
await self._progress_hook(len(content), self.size)
|
686
644
|
|
687
|
-
# If all the data was already downloaded/buffered
|
688
645
|
if self._download_complete:
|
689
646
|
return remaining_size
|
690
647
|
|
691
|
-
|
692
|
-
|
648
|
+
data_end = self._file_size
|
649
|
+
if self._end_range is not None:
|
650
|
+
# Use the length unless it is over the end of the file
|
651
|
+
data_end = min(self._file_size, self._end_range + 1)
|
652
|
+
|
653
|
+
data_start = self._get_downloader_start_with_offset()
|
693
654
|
|
694
655
|
downloader = _AsyncChunkDownloader(
|
695
656
|
client=self._clients.blob,
|
696
657
|
non_empty_ranges=self._non_empty_ranges,
|
697
658
|
total_size=self.size,
|
698
659
|
chunk_size=self._config.max_chunk_get_size,
|
699
|
-
current_progress=self.
|
660
|
+
current_progress=self._offset,
|
700
661
|
start_range=data_start,
|
701
662
|
end_range=data_end,
|
702
663
|
stream=stream,
|
@@ -706,8 +667,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
706
667
|
encryption_data=self._encryption_data,
|
707
668
|
use_location=self._location_mode,
|
708
669
|
progress_hook=self._progress_hook,
|
709
|
-
**self._request_options
|
710
|
-
)
|
670
|
+
**self._request_options)
|
711
671
|
|
712
672
|
dl_tasks = downloader.get_chunk_offsets()
|
713
673
|
running_futures = [
|
@@ -739,72 +699,8 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
739
699
|
except HttpResponseError as error:
|
740
700
|
process_storage_error(error)
|
741
701
|
|
742
|
-
self._complete_read()
|
743
702
|
return remaining_size
|
744
703
|
|
745
|
-
def _complete_read(self):
|
746
|
-
"""Adjusts all offsets to the end of the download."""
|
747
|
-
self._download_offset = self.size
|
748
|
-
self._raw_download_offset = self.size
|
749
|
-
self._read_offset = self.size
|
750
|
-
self._current_content_offset = len(self._current_content)
|
751
|
-
|
752
|
-
async def _check_and_report_progress(self):
|
753
|
-
"""Reports progress if necessary."""
|
754
|
-
# Only report progress at the end of each chunk and use download_offset to always report
|
755
|
-
# progress in terms of (approximate) byte count.
|
756
|
-
if self._progress_hook and self._current_content_offset == len(self._current_content):
|
757
|
-
await self._progress_hook(self._download_offset, self.size)
|
758
|
-
|
759
|
-
async def content_as_bytes(self, max_concurrency=1):
|
760
|
-
"""DEPRECATED: Download the contents of this file.
|
761
|
-
|
762
|
-
This operation is blocking until all data is downloaded.
|
763
|
-
|
764
|
-
This method is deprecated, use func:`readall` instead.
|
765
|
-
|
766
|
-
:param int max_concurrency:
|
767
|
-
The number of parallel connections with which to download.
|
768
|
-
:returns: The contents of the file as bytes.
|
769
|
-
:rtype: bytes
|
770
|
-
"""
|
771
|
-
warnings.warn(
|
772
|
-
"content_as_bytes is deprecated, use readall instead",
|
773
|
-
DeprecationWarning
|
774
|
-
)
|
775
|
-
if self._text_mode:
|
776
|
-
raise ValueError("Stream has been partially read in text mode. "
|
777
|
-
"content_as_bytes is not supported in text mode.")
|
778
|
-
|
779
|
-
self._max_concurrency = max_concurrency
|
780
|
-
return await self.readall()
|
781
|
-
|
782
|
-
async def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
|
783
|
-
"""DEPRECATED: Download the contents of this blob, and decode as text.
|
784
|
-
|
785
|
-
This operation is blocking until all data is downloaded.
|
786
|
-
|
787
|
-
This method is deprecated, use func:`readall` instead.
|
788
|
-
|
789
|
-
:param int max_concurrency:
|
790
|
-
The number of parallel connections with which to download.
|
791
|
-
:param str encoding:
|
792
|
-
Test encoding to decode the downloaded bytes. Default is UTF-8.
|
793
|
-
:returns: The content of the file as a str.
|
794
|
-
:rtype: str
|
795
|
-
"""
|
796
|
-
warnings.warn(
|
797
|
-
"content_as_text is deprecated, use readall instead",
|
798
|
-
DeprecationWarning
|
799
|
-
)
|
800
|
-
if self._text_mode:
|
801
|
-
raise ValueError("Stream has been partially read in text mode. "
|
802
|
-
"content_as_text is not supported in text mode.")
|
803
|
-
|
804
|
-
self._max_concurrency = max_concurrency
|
805
|
-
self._encoding = encoding
|
806
|
-
return await self.readall()
|
807
|
-
|
808
704
|
async def download_to_stream(self, stream, max_concurrency=1):
|
809
705
|
"""DEPRECATED: Download the contents of this blob to a stream.
|
810
706
|
|
@@ -823,10 +719,6 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
823
719
|
"download_to_stream is deprecated, use readinto instead",
|
824
720
|
DeprecationWarning
|
825
721
|
)
|
826
|
-
if self._text_mode:
|
827
|
-
raise ValueError("Stream has been partially read in text mode. "
|
828
|
-
"download_to_stream is not supported in text mode.")
|
829
|
-
|
830
722
|
self._max_concurrency = max_concurrency
|
831
723
|
await self.readinto(stream)
|
832
724
|
return self.properties
|