azure-storage-blob 12.21.0__py3-none-any.whl → 12.21.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/storage/blob/_blob_client.py +48 -53
- azure/storage/blob/_blob_service_client.py +11 -11
- azure/storage/blob/_container_client.py +20 -22
- azure/storage/blob/_download.py +167 -277
- azure/storage/blob/_lease.py +5 -5
- azure/storage/blob/_models.py +1 -1
- azure/storage/blob/_version.py +1 -1
- azure/storage/blob/aio/_blob_client_async.py +46 -49
- azure/storage/blob/aio/_blob_service_client_async.py +10 -10
- azure/storage/blob/aio/_container_client_async.py +19 -22
- azure/storage/blob/aio/_download_async.py +209 -317
- azure/storage/blob/aio/_lease_async.py +5 -5
- {azure_storage_blob-12.21.0.dist-info → azure_storage_blob-12.21.0b1.dist-info}/METADATA +7 -7
- {azure_storage_blob-12.21.0.dist-info → azure_storage_blob-12.21.0b1.dist-info}/RECORD +17 -18
- {azure_storage_blob-12.21.0.dist-info → azure_storage_blob-12.21.0b1.dist-info}/WHEEL +1 -1
- azure/storage/blob/_generated/py.typed +0 -1
- {azure_storage_blob-12.21.0.dist-info → azure_storage_blob-12.21.0b1.dist-info}/LICENSE +0 -0
- {azure_storage_blob-12.21.0.dist-info → azure_storage_blob-12.21.0b1.dist-info}/top_level.txt +0 -0
azure/storage/blob/_download.py
CHANGED
@@ -3,13 +3,13 @@
|
|
3
3
|
# Licensed under the MIT License. See License.txt in the project root for
|
4
4
|
# license information.
|
5
5
|
# --------------------------------------------------------------------------
|
6
|
-
|
6
|
+
|
7
7
|
import sys
|
8
8
|
import threading
|
9
9
|
import time
|
10
10
|
import warnings
|
11
|
-
from io import BytesIO
|
12
|
-
from typing import Generic, IO, Iterator, Optional,
|
11
|
+
from io import BytesIO
|
12
|
+
from typing import Generic, IO, Iterator, Optional, TypeVar
|
13
13
|
|
14
14
|
from azure.core.exceptions import DecodeError, HttpResponseError, IncompleteReadError
|
15
15
|
from azure.core.tracing.common import with_current_context
|
@@ -125,7 +125,7 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
|
|
125
125
|
|
126
126
|
def process_chunk(self, chunk_start):
|
127
127
|
chunk_start, chunk_end = self._calculate_range(chunk_start)
|
128
|
-
chunk_data
|
128
|
+
chunk_data = self._download_chunk(chunk_start, chunk_end - 1)
|
129
129
|
length = chunk_end - chunk_start
|
130
130
|
if length > 0:
|
131
131
|
self._write_to_stream(chunk_data, chunk_start)
|
@@ -186,8 +186,8 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
|
|
186
186
|
# No need to download the empty chunk from server if there's no data in the chunk to be downloaded.
|
187
187
|
# Do optimize and create empty chunk locally if condition is met.
|
188
188
|
if self._do_optimize(download_range[0], download_range[1]):
|
189
|
-
|
190
|
-
chunk_data = b"\x00" *
|
189
|
+
data_size = download_range[1] - download_range[0] + 1
|
190
|
+
chunk_data = b"\x00" * data_size
|
191
191
|
else:
|
192
192
|
range_header, range_validation = validate_and_format_range_headers(
|
193
193
|
download_range[0],
|
@@ -218,14 +218,13 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
|
|
218
218
|
if retry_total <= 0:
|
219
219
|
raise HttpResponseError(error, error=error) from error
|
220
220
|
time.sleep(1)
|
221
|
-
content_length = response.content_length
|
222
221
|
|
223
222
|
# This makes sure that if_match is set so that we can validate
|
224
223
|
# that subsequent downloads are to an unmodified blob
|
225
224
|
if self.request_options.get("modified_access_conditions"):
|
226
225
|
self.request_options["modified_access_conditions"].if_match = response.properties.etag
|
227
226
|
|
228
|
-
return chunk_data
|
227
|
+
return chunk_data
|
229
228
|
|
230
229
|
|
231
230
|
class _ChunkIterator(object):
|
@@ -265,7 +264,7 @@ class _ChunkIterator(object):
|
|
265
264
|
|
266
265
|
try:
|
267
266
|
chunk = next(self._iter_chunks)
|
268
|
-
self._current_content += self._iter_downloader.yield_chunk(chunk)
|
267
|
+
self._current_content += self._iter_downloader.yield_chunk(chunk)
|
269
268
|
except StopIteration as e:
|
270
269
|
self._complete = True
|
271
270
|
if self._current_content:
|
@@ -329,27 +328,14 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
329
328
|
self._encryption_options = encryption_options or {}
|
330
329
|
self._progress_hook = kwargs.pop('progress_hook', None)
|
331
330
|
self._request_options = kwargs
|
332
|
-
self._response = None
|
333
331
|
self._location_mode = None
|
334
|
-
self.
|
335
|
-
self.
|
332
|
+
self._download_complete = False
|
333
|
+
self._current_content = None
|
334
|
+
self._file_size = None
|
336
335
|
self._non_empty_ranges = None
|
336
|
+
self._response = None
|
337
337
|
self._encryption_data = None
|
338
|
-
|
339
|
-
# The content download offset, after any processing (decryption), in bytes
|
340
|
-
self._download_offset = 0
|
341
|
-
# The raw download offset, before processing (decryption), in bytes
|
342
|
-
self._raw_download_offset = 0
|
343
|
-
# The offset the stream has been read to in bytes or chars depending on mode
|
344
|
-
self._read_offset = 0
|
345
|
-
# The offset into current_content that has been consumed in bytes or chars depending on mode
|
346
|
-
self._current_content_offset = 0
|
347
|
-
|
348
|
-
self._text_mode: Optional[bool] = None
|
349
|
-
self._decoder = None
|
350
|
-
# Whether the current content is the first chunk of download content or not
|
351
|
-
self._first_chunk = True
|
352
|
-
self._download_start = self._start_range or 0
|
338
|
+
self._offset = 0
|
353
339
|
|
354
340
|
# The cls is passed in via download_cls to avoid conflicting arg name with Generic.__new__
|
355
341
|
# but needs to be changed to cls in the request options.
|
@@ -361,14 +347,14 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
361
347
|
# The service only provides transactional MD5s for chunks under 4MB.
|
362
348
|
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
|
363
349
|
# chunk so a transactional MD5 can be retrieved.
|
364
|
-
|
350
|
+
self._first_get_size = (
|
365
351
|
self._config.max_single_get_size if not self._validate_content else self._config.max_chunk_get_size
|
366
352
|
)
|
367
|
-
initial_request_start = self.
|
368
|
-
if self._end_range is not None and self._end_range -
|
353
|
+
initial_request_start = self._start_range if self._start_range is not None else 0
|
354
|
+
if self._end_range is not None and self._end_range - self._start_range < self._first_get_size:
|
369
355
|
initial_request_end = self._end_range
|
370
356
|
else:
|
371
|
-
initial_request_end = initial_request_start +
|
357
|
+
initial_request_end = initial_request_start + self._first_get_size - 1
|
372
358
|
|
373
359
|
self._initial_range, self._initial_offset = process_range_and_offset(
|
374
360
|
initial_request_start,
|
@@ -383,11 +369,12 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
383
369
|
self.properties.name = self.name
|
384
370
|
self.properties.container = self.container
|
385
371
|
|
386
|
-
# Set the content length to the download size instead of the size of
|
372
|
+
# Set the content length to the download size instead of the size of
|
373
|
+
# the last range
|
387
374
|
self.properties.size = self.size
|
388
|
-
|
389
|
-
|
390
|
-
|
375
|
+
|
376
|
+
# Overwrite the content range to the user requested range
|
377
|
+
self.properties.content_range = f"bytes {self._start_range}-{self._end_range}/{self._file_size}"
|
391
378
|
|
392
379
|
# Overwrite the content MD5 as it is the MD5 for the last range instead
|
393
380
|
# of the stored MD5
|
@@ -412,12 +399,6 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
412
399
|
# Restore cls for download
|
413
400
|
self._request_options['cls'] = download_cls
|
414
401
|
|
415
|
-
@property
|
416
|
-
def _download_complete(self):
|
417
|
-
if is_encryption_v2(self._encryption_data):
|
418
|
-
return self._download_offset >= self.size
|
419
|
-
return self._raw_download_offset >= self.size
|
420
|
-
|
421
402
|
def _initial_request(self):
|
422
403
|
range_header, range_validation = validate_and_format_range_headers(
|
423
404
|
self._initial_range[0],
|
@@ -454,7 +435,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
454
435
|
|
455
436
|
if self._end_range is not None:
|
456
437
|
# Use the end range index unless it is over the end of the file
|
457
|
-
self.size = min(self._file_size
|
438
|
+
self.size = min(self._file_size, self._end_range - self._start_range + 1)
|
458
439
|
elif self._start_range is not None:
|
459
440
|
self.size = self._file_size - self._start_range
|
460
441
|
else:
|
@@ -497,8 +478,6 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
497
478
|
if retry_total <= 0:
|
498
479
|
raise HttpResponseError(error, error=error) from error
|
499
480
|
time.sleep(1)
|
500
|
-
self._download_offset += len(self._current_content)
|
501
|
-
self._raw_download_offset += response.content_length
|
502
481
|
|
503
482
|
# get page ranges to optimize downloading sparse page blob
|
504
483
|
if response.properties.blob_type == 'PageBlob':
|
@@ -512,19 +491,33 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
512
491
|
except HttpResponseError:
|
513
492
|
pass
|
514
493
|
|
494
|
+
# If the file is small, the download is complete at this point.
|
495
|
+
# If file size is large, download the rest of the file in chunks.
|
496
|
+
# For encryption V2, calculate based on size of decrypted content, not download size.
|
497
|
+
if is_encryption_v2(self._encryption_data):
|
498
|
+
self._download_complete = len(self._current_content) >= self.size
|
499
|
+
else:
|
500
|
+
self._download_complete = response.properties.size >= self.size
|
501
|
+
|
515
502
|
if not self._download_complete and self._request_options.get("modified_access_conditions"):
|
516
503
|
self._request_options["modified_access_conditions"].if_match = response.properties.etag
|
517
504
|
|
518
505
|
return response
|
519
506
|
|
507
|
+
def _get_downloader_start_with_offset(self):
|
508
|
+
# Start where the initial request download ended
|
509
|
+
start = self._initial_range[1] + 1
|
510
|
+
# For encryption V2 only, adjust start to the end of the fetched data rather than download size
|
511
|
+
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
|
512
|
+
start = (self._start_range or 0) + len(self._current_content)
|
513
|
+
|
514
|
+
# Adjust the start based on any data read past the current content
|
515
|
+
start += (self._offset - len(self._current_content))
|
516
|
+
return start
|
517
|
+
|
520
518
|
def chunks(self):
|
521
519
|
# type: () -> Iterator[bytes]
|
522
|
-
"""
|
523
|
-
Iterate over chunks in the download stream. Note, the iterator returned will
|
524
|
-
iterate over the entire download content, regardless of any data that was
|
525
|
-
previously read.
|
526
|
-
|
527
|
-
NOTE: If the stream has been partially read, some data may be re-downloaded by the iterator.
|
520
|
+
"""Iterate over chunks in the download stream.
|
528
521
|
|
529
522
|
:returns: An iterator of the chunks in the download stream.
|
530
523
|
:rtype: Iterator[bytes]
|
@@ -538,124 +531,81 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
538
531
|
:dedent: 12
|
539
532
|
:caption: Download a blob using chunks().
|
540
533
|
"""
|
541
|
-
if self.
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
if not self._first_chunk or not self._download_complete:
|
549
|
-
if self._first_chunk:
|
550
|
-
start = self._download_start + len(self._current_content)
|
551
|
-
current_progress = len(self._current_content)
|
552
|
-
else:
|
553
|
-
start = self._download_start
|
554
|
-
current_progress = 0
|
534
|
+
if self.size == 0 or self._download_complete:
|
535
|
+
iter_downloader = None
|
536
|
+
else:
|
537
|
+
data_end = self._file_size
|
538
|
+
if self._end_range is not None:
|
539
|
+
# Use the end range index unless it is over the end of the file
|
540
|
+
data_end = min(self._file_size, self._end_range + 1)
|
555
541
|
|
556
|
-
|
542
|
+
data_start = self._initial_range[1] + 1 # Start where the first download ended
|
543
|
+
# For encryption, adjust start to the end of the fetched data rather than download size
|
544
|
+
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
|
545
|
+
data_start = (self._start_range or 0) + len(self._current_content)
|
557
546
|
|
558
547
|
iter_downloader = _ChunkDownloader(
|
559
548
|
client=self._clients.blob,
|
560
549
|
non_empty_ranges=self._non_empty_ranges,
|
561
550
|
total_size=self.size,
|
562
551
|
chunk_size=self._config.max_chunk_get_size,
|
563
|
-
current_progress=
|
564
|
-
start_range=
|
565
|
-
end_range=
|
552
|
+
current_progress=self._first_get_size,
|
553
|
+
start_range=data_start,
|
554
|
+
end_range=data_end,
|
555
|
+
stream=None,
|
556
|
+
parallel=False,
|
566
557
|
validate_content=self._validate_content,
|
567
558
|
encryption_options=self._encryption_options,
|
568
559
|
encryption_data=self._encryption_data,
|
569
560
|
use_location=self._location_mode,
|
570
561
|
**self._request_options
|
571
562
|
)
|
572
|
-
|
573
|
-
initial_content = self._current_content if self._first_chunk else b''
|
574
563
|
return _ChunkIterator(
|
575
564
|
size=self.size,
|
576
|
-
content=
|
565
|
+
content=self._current_content,
|
577
566
|
downloader=iter_downloader,
|
578
567
|
chunk_size=self._config.max_chunk_get_size)
|
579
568
|
|
580
|
-
|
581
|
-
def read(self, size: int = -1) -> T:
|
582
|
-
...
|
583
|
-
|
584
|
-
@overload
|
585
|
-
def read(self, *, chars: Optional[int] = None) -> T:
|
586
|
-
...
|
587
|
-
|
588
|
-
# pylint: disable-next=too-many-statements,too-many-branches
|
589
|
-
def read(self, size: int = -1, *, chars: Optional[int] = None) -> T:
|
569
|
+
def read(self, size: Optional[int] = -1) -> T:
|
590
570
|
"""
|
591
|
-
Read
|
592
|
-
|
593
|
-
chars parameter to read a specific number of chars to avoid decoding
|
594
|
-
errors. If size/chars is unspecified or negative all bytes will be read.
|
571
|
+
Read up to size bytes from the stream and return them. If size
|
572
|
+
is unspecified or is -1, all bytes will be read.
|
595
573
|
|
596
|
-
:param int size:
|
574
|
+
:param Optional[int] size:
|
597
575
|
The number of bytes to download from the stream. Leave unspecified
|
598
|
-
or set
|
599
|
-
:keyword Optional[int] chars:
|
600
|
-
The number of chars to download from the stream. Leave unspecified
|
601
|
-
or set negative to download all chars. Note, this can only be used
|
602
|
-
when encoding is specified on `download_blob`.
|
576
|
+
or set to -1 to download all bytes.
|
603
577
|
:returns:
|
604
578
|
The requested data as bytes or a string if encoding was specified. If
|
605
579
|
the return value is empty, there is no more data to read.
|
606
580
|
:rtype: T
|
607
581
|
"""
|
608
|
-
if size
|
609
|
-
|
610
|
-
"Size parameter specified with text encoding enabled. It is recommended to use chars "
|
611
|
-
"to read a specific number of characters instead."
|
612
|
-
)
|
613
|
-
if size > -1 and chars is not None:
|
614
|
-
raise ValueError("Cannot specify both size and chars.")
|
615
|
-
if not self._encoding and chars is not None:
|
616
|
-
raise ValueError("Must specify encoding to read chars.")
|
617
|
-
if self._text_mode and size > -1:
|
618
|
-
raise ValueError("Stream has been partially read in text mode. Please use chars.")
|
619
|
-
if self._text_mode is False and chars is not None:
|
620
|
-
raise ValueError("Stream has been partially read in bytes mode. Please use size.")
|
621
|
-
|
582
|
+
if size == -1:
|
583
|
+
return self.readall()
|
622
584
|
# Empty blob or already read to the end
|
623
|
-
if
|
624
|
-
(self._download_complete and self._current_content_offset >= len(self._current_content))):
|
585
|
+
if size == 0 or self._offset >= self.size:
|
625
586
|
return b'' if not self._encoding else ''
|
626
587
|
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
count += read
|
650
|
-
self._current_content_offset += read
|
651
|
-
self._read_offset += read
|
652
|
-
self._check_and_report_progress()
|
653
|
-
|
654
|
-
remaining = size - count
|
655
|
-
if remaining > 0 and not self._download_complete:
|
656
|
-
# Create a downloader than can download the rest of the file
|
657
|
-
start = self._download_start + self._download_offset
|
658
|
-
end = self._download_start + self.size
|
588
|
+
stream = BytesIO()
|
589
|
+
remaining_size = size
|
590
|
+
|
591
|
+
# Start by reading from current_content if there is data left
|
592
|
+
if self._offset < len(self._current_content):
|
593
|
+
start = self._offset
|
594
|
+
length = min(remaining_size, len(self._current_content) - self._offset)
|
595
|
+
read = stream.write(self._current_content[start:start + length])
|
596
|
+
|
597
|
+
remaining_size -= read
|
598
|
+
self._offset += read
|
599
|
+
if self._progress_hook:
|
600
|
+
self._progress_hook(self._offset, self.size)
|
601
|
+
|
602
|
+
if remaining_size > 0:
|
603
|
+
start_range = self._get_downloader_start_with_offset()
|
604
|
+
|
605
|
+
# End is the min between the remaining size, the file size, and the end of the specified range
|
606
|
+
end_range = min(start_range + remaining_size, self._file_size)
|
607
|
+
if self._end_range is not None:
|
608
|
+
end_range = min(end_range, self._end_range + 1)
|
659
609
|
|
660
610
|
parallel = self._max_concurrency > 1
|
661
611
|
downloader = _ChunkDownloader(
|
@@ -663,10 +613,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
663
613
|
non_empty_ranges=self._non_empty_ranges,
|
664
614
|
total_size=self.size,
|
665
615
|
chunk_size=self._config.max_chunk_get_size,
|
666
|
-
current_progress=self.
|
667
|
-
start_range=
|
668
|
-
end_range=
|
669
|
-
stream=
|
616
|
+
current_progress=self._offset,
|
617
|
+
start_range=start_range,
|
618
|
+
end_range=end_range,
|
619
|
+
stream=stream,
|
670
620
|
parallel=parallel,
|
671
621
|
validate_content=self._validate_content,
|
672
622
|
encryption_options=self._encryption_options,
|
@@ -675,57 +625,23 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
675
625
|
progress_hook=self._progress_hook,
|
676
626
|
**self._request_options
|
677
627
|
)
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
chunks_iter = downloader.get_chunk_offsets()
|
684
|
-
if readall and not self._text_mode:
|
685
|
-
# Only do parallel if there is more than one chunk left to download
|
686
|
-
if parallel and (self.size - self._download_offset) > self._config.max_chunk_get_size:
|
687
|
-
import concurrent.futures
|
688
|
-
with concurrent.futures.ThreadPoolExecutor(self._max_concurrency) as executor:
|
689
|
-
list(executor.map(
|
628
|
+
|
629
|
+
if parallel and remaining_size > self._config.max_chunk_get_size:
|
630
|
+
import concurrent.futures
|
631
|
+
with concurrent.futures.ThreadPoolExecutor(self._max_concurrency) as executor:
|
632
|
+
list(executor.map(
|
690
633
|
with_current_context(downloader.process_chunk),
|
691
634
|
downloader.get_chunk_offsets()
|
692
635
|
))
|
693
|
-
else:
|
694
|
-
for chunk in chunks_iter:
|
695
|
-
downloader.process_chunk(chunk)
|
696
|
-
|
697
|
-
self._complete_read()
|
698
|
-
|
699
636
|
else:
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
self._current_content = self._decoder.decode(
|
705
|
-
chunk_data, final=self._download_complete) if self._text_mode else chunk_data
|
706
|
-
|
707
|
-
if remaining < len(self._current_content):
|
708
|
-
read = output_stream.write(self._current_content[:remaining])
|
709
|
-
else:
|
710
|
-
read = output_stream.write(self._current_content)
|
711
|
-
|
712
|
-
self._current_content_offset = read
|
713
|
-
self._read_offset += read
|
714
|
-
remaining -= read
|
715
|
-
self._check_and_report_progress()
|
716
|
-
|
717
|
-
data = output_stream.getvalue()
|
718
|
-
if not self._text_mode and self._encoding:
|
719
|
-
try:
|
720
|
-
# This is technically incorrect to do, but we have it for backwards compatibility.
|
721
|
-
data = data.decode(self._encoding)
|
722
|
-
except UnicodeDecodeError:
|
723
|
-
warnings.warn(
|
724
|
-
"Encountered a decoding error while decoding blob data from a partial read. "
|
725
|
-
"Try using the `chars` keyword instead to read in text mode."
|
726
|
-
)
|
727
|
-
raise
|
637
|
+
for chunk in downloader.get_chunk_offsets():
|
638
|
+
downloader.process_chunk(chunk)
|
639
|
+
|
640
|
+
self._offset += remaining_size
|
728
641
|
|
642
|
+
data = stream.getvalue()
|
643
|
+
if self._encoding:
|
644
|
+
return data.decode(self._encoding)
|
729
645
|
return data
|
730
646
|
|
731
647
|
def readall(self) -> T:
|
@@ -736,7 +652,53 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
736
652
|
:returns: The requested data as bytes or a string if encoding was specified.
|
737
653
|
:rtype: T
|
738
654
|
"""
|
739
|
-
|
655
|
+
stream = BytesIO()
|
656
|
+
self.readinto(stream)
|
657
|
+
data = stream.getvalue()
|
658
|
+
if self._encoding:
|
659
|
+
return data.decode(self._encoding)
|
660
|
+
return data
|
661
|
+
|
662
|
+
def content_as_bytes(self, max_concurrency=1):
|
663
|
+
"""DEPRECATED: Download the contents of this file.
|
664
|
+
|
665
|
+
This operation is blocking until all data is downloaded.
|
666
|
+
|
667
|
+
This method is deprecated, use func:`readall` instead.
|
668
|
+
|
669
|
+
:param int max_concurrency:
|
670
|
+
The number of parallel connections with which to download.
|
671
|
+
:returns: The contents of the file as bytes.
|
672
|
+
:rtype: bytes
|
673
|
+
"""
|
674
|
+
warnings.warn(
|
675
|
+
"content_as_bytes is deprecated, use readall instead",
|
676
|
+
DeprecationWarning
|
677
|
+
)
|
678
|
+
self._max_concurrency = max_concurrency
|
679
|
+
return self.readall()
|
680
|
+
|
681
|
+
def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
|
682
|
+
"""DEPRECATED: Download the contents of this blob, and decode as text.
|
683
|
+
|
684
|
+
This operation is blocking until all data is downloaded.
|
685
|
+
|
686
|
+
This method is deprecated, use func:`readall` instead.
|
687
|
+
|
688
|
+
:param int max_concurrency:
|
689
|
+
The number of parallel connections with which to download.
|
690
|
+
:param str encoding:
|
691
|
+
Test encoding to decode the downloaded bytes. Default is UTF-8.
|
692
|
+
:returns: The content of the file as a str.
|
693
|
+
:rtype: str
|
694
|
+
"""
|
695
|
+
warnings.warn(
|
696
|
+
"content_as_text is deprecated, use readall instead",
|
697
|
+
DeprecationWarning
|
698
|
+
)
|
699
|
+
self._max_concurrency = max_concurrency
|
700
|
+
self._encoding = encoding
|
701
|
+
return self.readall()
|
740
702
|
|
741
703
|
def readinto(self, stream: IO[bytes]) -> int:
|
742
704
|
"""Download the contents of this file to a stream.
|
@@ -748,11 +710,6 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
748
710
|
:returns: The number of bytes read.
|
749
711
|
:rtype: int
|
750
712
|
"""
|
751
|
-
if self._text_mode:
|
752
|
-
raise ValueError("Stream has been partially read in text mode. readinto is not supported in text mode.")
|
753
|
-
if self._encoding:
|
754
|
-
warnings.warn("Encoding is ignored with readinto as only byte streams are supported.")
|
755
|
-
|
756
713
|
# The stream must be seekable if parallel download is required
|
757
714
|
parallel = self._max_concurrency > 1
|
758
715
|
if parallel:
|
@@ -766,34 +723,35 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
766
723
|
raise ValueError(error_message) from exc
|
767
724
|
|
768
725
|
# If some data has been streamed using `read`, only stream the remaining data
|
769
|
-
remaining_size = self.size - self.
|
726
|
+
remaining_size = self.size - self._offset
|
770
727
|
# Already read to the end
|
771
728
|
if remaining_size <= 0:
|
772
729
|
return 0
|
773
730
|
|
774
|
-
# Write the
|
775
|
-
|
776
|
-
|
777
|
-
|
731
|
+
# Write the content to the user stream if there is data left
|
732
|
+
if self._offset < len(self._current_content):
|
733
|
+
content = self._current_content[self._offset:]
|
734
|
+
stream.write(content)
|
735
|
+
self._offset += len(content)
|
736
|
+
if self._progress_hook:
|
737
|
+
self._progress_hook(len(content), self.size)
|
778
738
|
|
779
|
-
self._current_content_offset += count
|
780
|
-
self._read_offset += count
|
781
|
-
if self._progress_hook:
|
782
|
-
self._progress_hook(self._read_offset, self.size)
|
783
|
-
|
784
|
-
# If all the data was already downloaded/buffered
|
785
739
|
if self._download_complete:
|
786
740
|
return remaining_size
|
787
741
|
|
788
|
-
|
789
|
-
|
742
|
+
data_end = self._file_size
|
743
|
+
if self._end_range is not None:
|
744
|
+
# Use the length unless it is over the end of the file
|
745
|
+
data_end = min(self._file_size, self._end_range + 1)
|
746
|
+
|
747
|
+
data_start = self._get_downloader_start_with_offset()
|
790
748
|
|
791
749
|
downloader = _ChunkDownloader(
|
792
750
|
client=self._clients.blob,
|
793
751
|
non_empty_ranges=self._non_empty_ranges,
|
794
752
|
total_size=self.size,
|
795
753
|
chunk_size=self._config.max_chunk_get_size,
|
796
|
-
current_progress=self.
|
754
|
+
current_progress=self._offset,
|
797
755
|
start_range=data_start,
|
798
756
|
end_range=data_end,
|
799
757
|
stream=stream,
|
@@ -816,72 +774,8 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
816
774
|
for chunk in downloader.get_chunk_offsets():
|
817
775
|
downloader.process_chunk(chunk)
|
818
776
|
|
819
|
-
self._complete_read()
|
820
777
|
return remaining_size
|
821
778
|
|
822
|
-
def _complete_read(self):
|
823
|
-
"""Adjusts all offsets to the end of the download."""
|
824
|
-
self._download_offset = self.size
|
825
|
-
self._raw_download_offset = self.size
|
826
|
-
self._read_offset = self.size
|
827
|
-
self._current_content_offset = len(self._current_content)
|
828
|
-
|
829
|
-
def _check_and_report_progress(self):
|
830
|
-
"""Reports progress if necessary."""
|
831
|
-
# Only report progress at the end of each chunk and use download_offset to always report
|
832
|
-
# progress in terms of (approximate) byte count.
|
833
|
-
if self._progress_hook and self._current_content_offset == len(self._current_content):
|
834
|
-
self._progress_hook(self._download_offset, self.size)
|
835
|
-
|
836
|
-
def content_as_bytes(self, max_concurrency=1):
|
837
|
-
"""DEPRECATED: Download the contents of this file.
|
838
|
-
|
839
|
-
This operation is blocking until all data is downloaded.
|
840
|
-
|
841
|
-
This method is deprecated, use func:`readall` instead.
|
842
|
-
|
843
|
-
:param int max_concurrency:
|
844
|
-
The number of parallel connections with which to download.
|
845
|
-
:returns: The contents of the file as bytes.
|
846
|
-
:rtype: bytes
|
847
|
-
"""
|
848
|
-
warnings.warn(
|
849
|
-
"content_as_bytes is deprecated, use readall instead",
|
850
|
-
DeprecationWarning
|
851
|
-
)
|
852
|
-
if self._text_mode:
|
853
|
-
raise ValueError("Stream has been partially read in text mode. "
|
854
|
-
"content_as_bytes is not supported in text mode.")
|
855
|
-
|
856
|
-
self._max_concurrency = max_concurrency
|
857
|
-
return self.readall()
|
858
|
-
|
859
|
-
def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
|
860
|
-
"""DEPRECATED: Download the contents of this blob, and decode as text.
|
861
|
-
|
862
|
-
This operation is blocking until all data is downloaded.
|
863
|
-
|
864
|
-
This method is deprecated, use func:`readall` instead.
|
865
|
-
|
866
|
-
:param int max_concurrency:
|
867
|
-
The number of parallel connections with which to download.
|
868
|
-
:param str encoding:
|
869
|
-
Test encoding to decode the downloaded bytes. Default is UTF-8.
|
870
|
-
:returns: The content of the file as a str.
|
871
|
-
:rtype: str
|
872
|
-
"""
|
873
|
-
warnings.warn(
|
874
|
-
"content_as_text is deprecated, use readall instead",
|
875
|
-
DeprecationWarning
|
876
|
-
)
|
877
|
-
if self._text_mode:
|
878
|
-
raise ValueError("Stream has been partially read in text mode. "
|
879
|
-
"content_as_text is not supported in text mode.")
|
880
|
-
|
881
|
-
self._max_concurrency = max_concurrency
|
882
|
-
self._encoding = encoding
|
883
|
-
return self.readall()
|
884
|
-
|
885
779
|
def download_to_stream(self, stream, max_concurrency=1):
|
886
780
|
"""DEPRECATED: Download the contents of this blob to a stream.
|
887
781
|
|
@@ -900,10 +794,6 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
900
794
|
"download_to_stream is deprecated, use readinto instead",
|
901
795
|
DeprecationWarning
|
902
796
|
)
|
903
|
-
if self._text_mode:
|
904
|
-
raise ValueError("Stream has been partially read in text mode. "
|
905
|
-
"download_to_stream is not supported in text mode.")
|
906
|
-
|
907
797
|
self._max_concurrency = max_concurrency
|
908
798
|
self.readinto(stream)
|
909
799
|
return self.properties
|