azure-storage-blob 12.20.0b1__py3-none-any.whl → 12.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/storage/blob/__init__.py +12 -2
- azure/storage/blob/_blob_client.py +64 -51
- azure/storage/blob/_blob_service_client.py +17 -12
- azure/storage/blob/_container_client.py +33 -23
- azure/storage/blob/_download.py +277 -167
- azure/storage/blob/_generated/_azure_blob_storage.py +1 -1
- azure/storage/blob/_generated/_configuration.py +2 -2
- azure/storage/blob/_generated/_patch.py +2 -0
- azure/storage/blob/_generated/_serialization.py +1 -1
- azure/storage/blob/_generated/aio/_azure_blob_storage.py +1 -1
- azure/storage/blob/_generated/aio/_configuration.py +2 -2
- azure/storage/blob/_generated/aio/_patch.py +2 -0
- azure/storage/blob/_generated/aio/operations/_append_blob_operations.py +10 -5
- azure/storage/blob/_generated/aio/operations/_blob_operations.py +45 -26
- azure/storage/blob/_generated/aio/operations/_block_blob_operations.py +12 -7
- azure/storage/blob/_generated/aio/operations/_container_operations.py +39 -20
- azure/storage/blob/_generated/aio/operations/_page_blob_operations.py +15 -10
- azure/storage/blob/_generated/aio/operations/_patch.py +3 -0
- azure/storage/blob/_generated/aio/operations/_service_operations.py +28 -10
- azure/storage/blob/_generated/models/_patch.py +3 -0
- azure/storage/blob/_generated/operations/_append_blob_operations.py +14 -9
- azure/storage/blob/_generated/operations/_blob_operations.py +76 -51
- azure/storage/blob/_generated/operations/_block_blob_operations.py +18 -13
- azure/storage/blob/_generated/operations/_container_operations.py +64 -39
- azure/storage/blob/_generated/operations/_page_blob_operations.py +24 -19
- azure/storage/blob/_generated/operations/_patch.py +3 -0
- azure/storage/blob/_generated/operations/_service_operations.py +43 -19
- azure/storage/blob/_generated/py.typed +1 -0
- azure/storage/blob/_lease.py +6 -5
- azure/storage/blob/_models.py +1 -1
- azure/storage/blob/_serialize.py +1 -0
- azure/storage/blob/_shared/authentication.py +62 -4
- azure/storage/blob/_shared/base_client.py +1 -1
- azure/storage/blob/_shared/base_client_async.py +3 -2
- azure/storage/blob/_shared/models.py +13 -12
- azure/storage/blob/_shared/shared_access_signature.py +1 -0
- azure/storage/blob/_shared_access_signature.py +1 -0
- azure/storage/blob/_version.py +1 -1
- azure/storage/blob/aio/__init__.py +13 -4
- azure/storage/blob/aio/_blob_client_async.py +50 -47
- azure/storage/blob/aio/_blob_service_client_async.py +11 -11
- azure/storage/blob/aio/_container_client_async.py +23 -20
- azure/storage/blob/aio/_download_async.py +317 -209
- azure/storage/blob/aio/_lease_async.py +6 -6
- {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/METADATA +2 -2
- azure_storage_blob-12.21.0.dist-info/RECORD +82 -0
- azure_storage_blob-12.20.0b1.dist-info/RECORD +0 -81
- {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/LICENSE +0 -0
- {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/WHEEL +0 -0
- {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/top_level.txt +0 -0
azure/storage/blob/_download.py
CHANGED
@@ -3,13 +3,13 @@
|
|
3
3
|
# Licensed under the MIT License. See License.txt in the project root for
|
4
4
|
# license information.
|
5
5
|
# --------------------------------------------------------------------------
|
6
|
-
|
6
|
+
import codecs
|
7
7
|
import sys
|
8
8
|
import threading
|
9
9
|
import time
|
10
10
|
import warnings
|
11
|
-
from io import BytesIO
|
12
|
-
from typing import Generic, IO, Iterator, Optional, TypeVar
|
11
|
+
from io import BytesIO, StringIO
|
12
|
+
from typing import Generic, IO, Iterator, Optional, overload, TypeVar, Union
|
13
13
|
|
14
14
|
from azure.core.exceptions import DecodeError, HttpResponseError, IncompleteReadError
|
15
15
|
from azure.core.tracing.common import with_current_context
|
@@ -125,7 +125,7 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
|
|
125
125
|
|
126
126
|
def process_chunk(self, chunk_start):
|
127
127
|
chunk_start, chunk_end = self._calculate_range(chunk_start)
|
128
|
-
chunk_data = self._download_chunk(chunk_start, chunk_end - 1)
|
128
|
+
chunk_data, _ = self._download_chunk(chunk_start, chunk_end - 1)
|
129
129
|
length = chunk_end - chunk_start
|
130
130
|
if length > 0:
|
131
131
|
self._write_to_stream(chunk_data, chunk_start)
|
@@ -186,8 +186,8 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
|
|
186
186
|
# No need to download the empty chunk from server if there's no data in the chunk to be downloaded.
|
187
187
|
# Do optimize and create empty chunk locally if condition is met.
|
188
188
|
if self._do_optimize(download_range[0], download_range[1]):
|
189
|
-
|
190
|
-
chunk_data = b"\x00" *
|
189
|
+
content_length = download_range[1] - download_range[0] + 1
|
190
|
+
chunk_data = b"\x00" * content_length
|
191
191
|
else:
|
192
192
|
range_header, range_validation = validate_and_format_range_headers(
|
193
193
|
download_range[0],
|
@@ -218,13 +218,14 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
|
|
218
218
|
if retry_total <= 0:
|
219
219
|
raise HttpResponseError(error, error=error) from error
|
220
220
|
time.sleep(1)
|
221
|
+
content_length = response.content_length
|
221
222
|
|
222
223
|
# This makes sure that if_match is set so that we can validate
|
223
224
|
# that subsequent downloads are to an unmodified blob
|
224
225
|
if self.request_options.get("modified_access_conditions"):
|
225
226
|
self.request_options["modified_access_conditions"].if_match = response.properties.etag
|
226
227
|
|
227
|
-
return chunk_data
|
228
|
+
return chunk_data, content_length
|
228
229
|
|
229
230
|
|
230
231
|
class _ChunkIterator(object):
|
@@ -264,7 +265,7 @@ class _ChunkIterator(object):
|
|
264
265
|
|
265
266
|
try:
|
266
267
|
chunk = next(self._iter_chunks)
|
267
|
-
self._current_content += self._iter_downloader.yield_chunk(chunk)
|
268
|
+
self._current_content += self._iter_downloader.yield_chunk(chunk)[0]
|
268
269
|
except StopIteration as e:
|
269
270
|
self._complete = True
|
270
271
|
if self._current_content:
|
@@ -328,14 +329,27 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
328
329
|
self._encryption_options = encryption_options or {}
|
329
330
|
self._progress_hook = kwargs.pop('progress_hook', None)
|
330
331
|
self._request_options = kwargs
|
332
|
+
self._response = None
|
331
333
|
self._location_mode = None
|
332
|
-
self.
|
333
|
-
self.
|
334
|
-
self._file_size = None
|
334
|
+
self._current_content = b''
|
335
|
+
self._file_size = 0
|
335
336
|
self._non_empty_ranges = None
|
336
|
-
self._response = None
|
337
337
|
self._encryption_data = None
|
338
|
-
|
338
|
+
|
339
|
+
# The content download offset, after any processing (decryption), in bytes
|
340
|
+
self._download_offset = 0
|
341
|
+
# The raw download offset, before processing (decryption), in bytes
|
342
|
+
self._raw_download_offset = 0
|
343
|
+
# The offset the stream has been read to in bytes or chars depending on mode
|
344
|
+
self._read_offset = 0
|
345
|
+
# The offset into current_content that has been consumed in bytes or chars depending on mode
|
346
|
+
self._current_content_offset = 0
|
347
|
+
|
348
|
+
self._text_mode: Optional[bool] = None
|
349
|
+
self._decoder = None
|
350
|
+
# Whether the current content is the first chunk of download content or not
|
351
|
+
self._first_chunk = True
|
352
|
+
self._download_start = self._start_range or 0
|
339
353
|
|
340
354
|
# The cls is passed in via download_cls to avoid conflicting arg name with Generic.__new__
|
341
355
|
# but needs to be changed to cls in the request options.
|
@@ -347,14 +361,14 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
347
361
|
# The service only provides transactional MD5s for chunks under 4MB.
|
348
362
|
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
|
349
363
|
# chunk so a transactional MD5 can be retrieved.
|
350
|
-
|
364
|
+
first_get_size = (
|
351
365
|
self._config.max_single_get_size if not self._validate_content else self._config.max_chunk_get_size
|
352
366
|
)
|
353
|
-
initial_request_start = self.
|
354
|
-
if self._end_range is not None and self._end_range -
|
367
|
+
initial_request_start = self._download_start
|
368
|
+
if self._end_range is not None and self._end_range - initial_request_start < first_get_size:
|
355
369
|
initial_request_end = self._end_range
|
356
370
|
else:
|
357
|
-
initial_request_end = initial_request_start +
|
371
|
+
initial_request_end = initial_request_start + first_get_size - 1
|
358
372
|
|
359
373
|
self._initial_range, self._initial_offset = process_range_and_offset(
|
360
374
|
initial_request_start,
|
@@ -369,12 +383,11 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
369
383
|
self.properties.name = self.name
|
370
384
|
self.properties.container = self.container
|
371
385
|
|
372
|
-
# Set the content length to the download size instead of the size of
|
373
|
-
# the last range
|
386
|
+
# Set the content length to the download size instead of the size of the last range
|
374
387
|
self.properties.size = self.size
|
375
|
-
|
376
|
-
|
377
|
-
|
388
|
+
self.properties.content_range = (f"bytes {self._download_start}-"
|
389
|
+
f"{self._end_range if self._end_range is not None else self._file_size - 1}/"
|
390
|
+
f"{self._file_size}")
|
378
391
|
|
379
392
|
# Overwrite the content MD5 as it is the MD5 for the last range instead
|
380
393
|
# of the stored MD5
|
@@ -399,6 +412,12 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
399
412
|
# Restore cls for download
|
400
413
|
self._request_options['cls'] = download_cls
|
401
414
|
|
415
|
+
@property
|
416
|
+
def _download_complete(self):
|
417
|
+
if is_encryption_v2(self._encryption_data):
|
418
|
+
return self._download_offset >= self.size
|
419
|
+
return self._raw_download_offset >= self.size
|
420
|
+
|
402
421
|
def _initial_request(self):
|
403
422
|
range_header, range_validation = validate_and_format_range_headers(
|
404
423
|
self._initial_range[0],
|
@@ -435,7 +454,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
435
454
|
|
436
455
|
if self._end_range is not None:
|
437
456
|
# Use the end range index unless it is over the end of the file
|
438
|
-
self.size = min(self._file_size, self._end_range - self._start_range + 1)
|
457
|
+
self.size = min(self._file_size - self._start_range, self._end_range - self._start_range + 1)
|
439
458
|
elif self._start_range is not None:
|
440
459
|
self.size = self._file_size - self._start_range
|
441
460
|
else:
|
@@ -478,6 +497,8 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
478
497
|
if retry_total <= 0:
|
479
498
|
raise HttpResponseError(error, error=error) from error
|
480
499
|
time.sleep(1)
|
500
|
+
self._download_offset += len(self._current_content)
|
501
|
+
self._raw_download_offset += response.content_length
|
481
502
|
|
482
503
|
# get page ranges to optimize downloading sparse page blob
|
483
504
|
if response.properties.blob_type == 'PageBlob':
|
@@ -491,33 +512,19 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
491
512
|
except HttpResponseError:
|
492
513
|
pass
|
493
514
|
|
494
|
-
# If the file is small, the download is complete at this point.
|
495
|
-
# If file size is large, download the rest of the file in chunks.
|
496
|
-
# For encryption V2, calculate based on size of decrypted content, not download size.
|
497
|
-
if is_encryption_v2(self._encryption_data):
|
498
|
-
self._download_complete = len(self._current_content) >= self.size
|
499
|
-
else:
|
500
|
-
self._download_complete = response.properties.size >= self.size
|
501
|
-
|
502
515
|
if not self._download_complete and self._request_options.get("modified_access_conditions"):
|
503
516
|
self._request_options["modified_access_conditions"].if_match = response.properties.etag
|
504
517
|
|
505
518
|
return response
|
506
519
|
|
507
|
-
def _get_downloader_start_with_offset(self):
|
508
|
-
# Start where the initial request download ended
|
509
|
-
start = self._initial_range[1] + 1
|
510
|
-
# For encryption V2 only, adjust start to the end of the fetched data rather than download size
|
511
|
-
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
|
512
|
-
start = (self._start_range or 0) + len(self._current_content)
|
513
|
-
|
514
|
-
# Adjust the start based on any data read past the current content
|
515
|
-
start += (self._offset - len(self._current_content))
|
516
|
-
return start
|
517
|
-
|
518
520
|
def chunks(self):
|
519
521
|
# type: () -> Iterator[bytes]
|
520
|
-
"""
|
522
|
+
"""
|
523
|
+
Iterate over chunks in the download stream. Note, the iterator returned will
|
524
|
+
iterate over the entire download content, regardless of any data that was
|
525
|
+
previously read.
|
526
|
+
|
527
|
+
NOTE: If the stream has been partially read, some data may be re-downloaded by the iterator.
|
521
528
|
|
522
529
|
:returns: An iterator of the chunks in the download stream.
|
523
530
|
:rtype: Iterator[bytes]
|
@@ -531,81 +538,124 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
531
538
|
:dedent: 12
|
532
539
|
:caption: Download a blob using chunks().
|
533
540
|
"""
|
534
|
-
if self.
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
+
if self._text_mode:
|
542
|
+
raise ValueError("Stream has been partially read in text mode. chunks is not supported in text mode.")
|
543
|
+
if self._encoding:
|
544
|
+
warnings.warn("Encoding is ignored with chunks as only bytes are supported.")
|
545
|
+
|
546
|
+
iter_downloader = None
|
547
|
+
# If we still have the first chunk buffered, use it. Otherwise, download all content again
|
548
|
+
if not self._first_chunk or not self._download_complete:
|
549
|
+
if self._first_chunk:
|
550
|
+
start = self._download_start + len(self._current_content)
|
551
|
+
current_progress = len(self._current_content)
|
552
|
+
else:
|
553
|
+
start = self._download_start
|
554
|
+
current_progress = 0
|
541
555
|
|
542
|
-
|
543
|
-
# For encryption, adjust start to the end of the fetched data rather than download size
|
544
|
-
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
|
545
|
-
data_start = (self._start_range or 0) + len(self._current_content)
|
556
|
+
end = self._download_start + self.size
|
546
557
|
|
547
558
|
iter_downloader = _ChunkDownloader(
|
548
559
|
client=self._clients.blob,
|
549
560
|
non_empty_ranges=self._non_empty_ranges,
|
550
561
|
total_size=self.size,
|
551
562
|
chunk_size=self._config.max_chunk_get_size,
|
552
|
-
current_progress=
|
553
|
-
start_range=
|
554
|
-
end_range=
|
555
|
-
stream=None,
|
556
|
-
parallel=False,
|
563
|
+
current_progress=current_progress,
|
564
|
+
start_range=start,
|
565
|
+
end_range=end,
|
557
566
|
validate_content=self._validate_content,
|
558
567
|
encryption_options=self._encryption_options,
|
559
568
|
encryption_data=self._encryption_data,
|
560
569
|
use_location=self._location_mode,
|
561
570
|
**self._request_options
|
562
571
|
)
|
572
|
+
|
573
|
+
initial_content = self._current_content if self._first_chunk else b''
|
563
574
|
return _ChunkIterator(
|
564
575
|
size=self.size,
|
565
|
-
content=
|
576
|
+
content=initial_content,
|
566
577
|
downloader=iter_downloader,
|
567
578
|
chunk_size=self._config.max_chunk_get_size)
|
568
579
|
|
569
|
-
|
580
|
+
@overload
|
581
|
+
def read(self, size: int = -1) -> T:
|
582
|
+
...
|
583
|
+
|
584
|
+
@overload
|
585
|
+
def read(self, *, chars: Optional[int] = None) -> T:
|
586
|
+
...
|
587
|
+
|
588
|
+
# pylint: disable-next=too-many-statements,too-many-branches
|
589
|
+
def read(self, size: int = -1, *, chars: Optional[int] = None) -> T:
|
570
590
|
"""
|
571
|
-
Read
|
572
|
-
|
591
|
+
Read the specified bytes or chars from the stream. If `encoding`
|
592
|
+
was specified on `download_blob`, it is recommended to use the
|
593
|
+
chars parameter to read a specific number of chars to avoid decoding
|
594
|
+
errors. If size/chars is unspecified or negative all bytes will be read.
|
573
595
|
|
574
|
-
:param
|
596
|
+
:param int size:
|
575
597
|
The number of bytes to download from the stream. Leave unspecified
|
576
|
-
or set
|
598
|
+
or set negative to download all bytes.
|
599
|
+
:keyword Optional[int] chars:
|
600
|
+
The number of chars to download from the stream. Leave unspecified
|
601
|
+
or set negative to download all chars. Note, this can only be used
|
602
|
+
when encoding is specified on `download_blob`.
|
577
603
|
:returns:
|
578
604
|
The requested data as bytes or a string if encoding was specified. If
|
579
605
|
the return value is empty, there is no more data to read.
|
580
606
|
:rtype: T
|
581
607
|
"""
|
582
|
-
if size
|
583
|
-
|
608
|
+
if size > -1 and self._encoding:
|
609
|
+
warnings.warn(
|
610
|
+
"Size parameter specified with text encoding enabled. It is recommended to use chars "
|
611
|
+
"to read a specific number of characters instead."
|
612
|
+
)
|
613
|
+
if size > -1 and chars is not None:
|
614
|
+
raise ValueError("Cannot specify both size and chars.")
|
615
|
+
if not self._encoding and chars is not None:
|
616
|
+
raise ValueError("Must specify encoding to read chars.")
|
617
|
+
if self._text_mode and size > -1:
|
618
|
+
raise ValueError("Stream has been partially read in text mode. Please use chars.")
|
619
|
+
if self._text_mode is False and chars is not None:
|
620
|
+
raise ValueError("Stream has been partially read in bytes mode. Please use size.")
|
621
|
+
|
584
622
|
# Empty blob or already read to the end
|
585
|
-
if size == 0 or
|
623
|
+
if (size == 0 or chars == 0 or
|
624
|
+
(self._download_complete and self._current_content_offset >= len(self._current_content))):
|
586
625
|
return b'' if not self._encoding else ''
|
587
626
|
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
627
|
+
if not self._text_mode and chars is not None:
|
628
|
+
self._text_mode = True
|
629
|
+
self._decoder = codecs.getincrementaldecoder(self._encoding)('strict')
|
630
|
+
self._current_content = self._decoder.decode(self._current_content, final=self._download_complete)
|
631
|
+
elif self._text_mode is None:
|
632
|
+
self._text_mode = False
|
633
|
+
|
634
|
+
output_stream: Union[BytesIO, StringIO]
|
635
|
+
if self._text_mode:
|
636
|
+
output_stream = StringIO()
|
637
|
+
size = chars if chars else sys.maxsize
|
638
|
+
else:
|
639
|
+
output_stream = BytesIO()
|
640
|
+
size = size if size > 0 else sys.maxsize
|
641
|
+
readall = size == sys.maxsize
|
642
|
+
count = 0
|
643
|
+
|
644
|
+
# Start by reading from current_content
|
645
|
+
start = self._current_content_offset
|
646
|
+
length = min(len(self._current_content) - self._current_content_offset, size - count)
|
647
|
+
read = output_stream.write(self._current_content[start:start + length])
|
648
|
+
|
649
|
+
count += read
|
650
|
+
self._current_content_offset += read
|
651
|
+
self._read_offset += read
|
652
|
+
self._check_and_report_progress()
|
653
|
+
|
654
|
+
remaining = size - count
|
655
|
+
if remaining > 0 and not self._download_complete:
|
656
|
+
# Create a downloader than can download the rest of the file
|
657
|
+
start = self._download_start + self._download_offset
|
658
|
+
end = self._download_start + self.size
|
609
659
|
|
610
660
|
parallel = self._max_concurrency > 1
|
611
661
|
downloader = _ChunkDownloader(
|
@@ -613,10 +663,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
613
663
|
non_empty_ranges=self._non_empty_ranges,
|
614
664
|
total_size=self.size,
|
615
665
|
chunk_size=self._config.max_chunk_get_size,
|
616
|
-
current_progress=self.
|
617
|
-
start_range=
|
618
|
-
end_range=
|
619
|
-
stream=
|
666
|
+
current_progress=self._read_offset,
|
667
|
+
start_range=start,
|
668
|
+
end_range=end,
|
669
|
+
stream=output_stream,
|
620
670
|
parallel=parallel,
|
621
671
|
validate_content=self._validate_content,
|
622
672
|
encryption_options=self._encryption_options,
|
@@ -625,23 +675,57 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
625
675
|
progress_hook=self._progress_hook,
|
626
676
|
**self._request_options
|
627
677
|
)
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
678
|
+
self._first_chunk = False
|
679
|
+
|
680
|
+
# When reading all data, have the downloader read everything into the stream.
|
681
|
+
# Else, read one chunk at a time (using the downloader as an iterator) until
|
682
|
+
# the requested size is reached.
|
683
|
+
chunks_iter = downloader.get_chunk_offsets()
|
684
|
+
if readall and not self._text_mode:
|
685
|
+
# Only do parallel if there is more than one chunk left to download
|
686
|
+
if parallel and (self.size - self._download_offset) > self._config.max_chunk_get_size:
|
687
|
+
import concurrent.futures
|
688
|
+
with concurrent.futures.ThreadPoolExecutor(self._max_concurrency) as executor:
|
689
|
+
list(executor.map(
|
633
690
|
with_current_context(downloader.process_chunk),
|
634
691
|
downloader.get_chunk_offsets()
|
635
692
|
))
|
636
|
-
|
637
|
-
|
638
|
-
|
693
|
+
else:
|
694
|
+
for chunk in chunks_iter:
|
695
|
+
downloader.process_chunk(chunk)
|
639
696
|
|
640
|
-
|
697
|
+
self._complete_read()
|
698
|
+
|
699
|
+
else:
|
700
|
+
while (chunk := next(chunks_iter, None)) is not None and remaining > 0:
|
701
|
+
chunk_data, content_length = downloader.yield_chunk(chunk)
|
702
|
+
self._download_offset += len(chunk_data)
|
703
|
+
self._raw_download_offset += content_length
|
704
|
+
self._current_content = self._decoder.decode(
|
705
|
+
chunk_data, final=self._download_complete) if self._text_mode else chunk_data
|
706
|
+
|
707
|
+
if remaining < len(self._current_content):
|
708
|
+
read = output_stream.write(self._current_content[:remaining])
|
709
|
+
else:
|
710
|
+
read = output_stream.write(self._current_content)
|
711
|
+
|
712
|
+
self._current_content_offset = read
|
713
|
+
self._read_offset += read
|
714
|
+
remaining -= read
|
715
|
+
self._check_and_report_progress()
|
716
|
+
|
717
|
+
data = output_stream.getvalue()
|
718
|
+
if not self._text_mode and self._encoding:
|
719
|
+
try:
|
720
|
+
# This is technically incorrect to do, but we have it for backwards compatibility.
|
721
|
+
data = data.decode(self._encoding)
|
722
|
+
except UnicodeDecodeError:
|
723
|
+
warnings.warn(
|
724
|
+
"Encountered a decoding error while decoding blob data from a partial read. "
|
725
|
+
"Try using the `chars` keyword instead to read in text mode."
|
726
|
+
)
|
727
|
+
raise
|
641
728
|
|
642
|
-
data = stream.getvalue()
|
643
|
-
if self._encoding:
|
644
|
-
return data.decode(self._encoding)
|
645
729
|
return data
|
646
730
|
|
647
731
|
def readall(self) -> T:
|
@@ -652,53 +736,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
652
736
|
:returns: The requested data as bytes or a string if encoding was specified.
|
653
737
|
:rtype: T
|
654
738
|
"""
|
655
|
-
|
656
|
-
self.readinto(stream)
|
657
|
-
data = stream.getvalue()
|
658
|
-
if self._encoding:
|
659
|
-
return data.decode(self._encoding)
|
660
|
-
return data
|
661
|
-
|
662
|
-
def content_as_bytes(self, max_concurrency=1):
|
663
|
-
"""DEPRECATED: Download the contents of this file.
|
664
|
-
|
665
|
-
This operation is blocking until all data is downloaded.
|
666
|
-
|
667
|
-
This method is deprecated, use func:`readall` instead.
|
668
|
-
|
669
|
-
:param int max_concurrency:
|
670
|
-
The number of parallel connections with which to download.
|
671
|
-
:returns: The contents of the file as bytes.
|
672
|
-
:rtype: bytes
|
673
|
-
"""
|
674
|
-
warnings.warn(
|
675
|
-
"content_as_bytes is deprecated, use readall instead",
|
676
|
-
DeprecationWarning
|
677
|
-
)
|
678
|
-
self._max_concurrency = max_concurrency
|
679
|
-
return self.readall()
|
680
|
-
|
681
|
-
def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
|
682
|
-
"""DEPRECATED: Download the contents of this blob, and decode as text.
|
683
|
-
|
684
|
-
This operation is blocking until all data is downloaded.
|
685
|
-
|
686
|
-
This method is deprecated, use func:`readall` instead.
|
687
|
-
|
688
|
-
:param int max_concurrency:
|
689
|
-
The number of parallel connections with which to download.
|
690
|
-
:param str encoding:
|
691
|
-
Test encoding to decode the downloaded bytes. Default is UTF-8.
|
692
|
-
:returns: The content of the file as a str.
|
693
|
-
:rtype: str
|
694
|
-
"""
|
695
|
-
warnings.warn(
|
696
|
-
"content_as_text is deprecated, use readall instead",
|
697
|
-
DeprecationWarning
|
698
|
-
)
|
699
|
-
self._max_concurrency = max_concurrency
|
700
|
-
self._encoding = encoding
|
701
|
-
return self.readall()
|
739
|
+
return self.read()
|
702
740
|
|
703
741
|
def readinto(self, stream: IO[bytes]) -> int:
|
704
742
|
"""Download the contents of this file to a stream.
|
@@ -710,6 +748,11 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
710
748
|
:returns: The number of bytes read.
|
711
749
|
:rtype: int
|
712
750
|
"""
|
751
|
+
if self._text_mode:
|
752
|
+
raise ValueError("Stream has been partially read in text mode. readinto is not supported in text mode.")
|
753
|
+
if self._encoding:
|
754
|
+
warnings.warn("Encoding is ignored with readinto as only byte streams are supported.")
|
755
|
+
|
713
756
|
# The stream must be seekable if parallel download is required
|
714
757
|
parallel = self._max_concurrency > 1
|
715
758
|
if parallel:
|
@@ -723,35 +766,34 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
723
766
|
raise ValueError(error_message) from exc
|
724
767
|
|
725
768
|
# If some data has been streamed using `read`, only stream the remaining data
|
726
|
-
remaining_size = self.size - self.
|
769
|
+
remaining_size = self.size - self._read_offset
|
727
770
|
# Already read to the end
|
728
771
|
if remaining_size <= 0:
|
729
772
|
return 0
|
730
773
|
|
731
|
-
# Write the content to the user stream
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
self._offset += len(content)
|
736
|
-
if self._progress_hook:
|
737
|
-
self._progress_hook(len(content), self.size)
|
774
|
+
# Write the current content to the user stream
|
775
|
+
current_remaining = len(self._current_content) - self._current_content_offset
|
776
|
+
start = self._current_content_offset
|
777
|
+
count = stream.write(self._current_content[start:start + current_remaining])
|
738
778
|
|
779
|
+
self._current_content_offset += count
|
780
|
+
self._read_offset += count
|
781
|
+
if self._progress_hook:
|
782
|
+
self._progress_hook(self._read_offset, self.size)
|
783
|
+
|
784
|
+
# If all the data was already downloaded/buffered
|
739
785
|
if self._download_complete:
|
740
786
|
return remaining_size
|
741
787
|
|
742
|
-
|
743
|
-
|
744
|
-
# Use the length unless it is over the end of the file
|
745
|
-
data_end = min(self._file_size, self._end_range + 1)
|
746
|
-
|
747
|
-
data_start = self._get_downloader_start_with_offset()
|
788
|
+
data_start = self._download_start + self._read_offset
|
789
|
+
data_end = self._download_start + self.size
|
748
790
|
|
749
791
|
downloader = _ChunkDownloader(
|
750
792
|
client=self._clients.blob,
|
751
793
|
non_empty_ranges=self._non_empty_ranges,
|
752
794
|
total_size=self.size,
|
753
795
|
chunk_size=self._config.max_chunk_get_size,
|
754
|
-
current_progress=self.
|
796
|
+
current_progress=self._read_offset,
|
755
797
|
start_range=data_start,
|
756
798
|
end_range=data_end,
|
757
799
|
stream=stream,
|
@@ -774,8 +816,72 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
774
816
|
for chunk in downloader.get_chunk_offsets():
|
775
817
|
downloader.process_chunk(chunk)
|
776
818
|
|
819
|
+
self._complete_read()
|
777
820
|
return remaining_size
|
778
821
|
|
822
|
+
def _complete_read(self):
|
823
|
+
"""Adjusts all offsets to the end of the download."""
|
824
|
+
self._download_offset = self.size
|
825
|
+
self._raw_download_offset = self.size
|
826
|
+
self._read_offset = self.size
|
827
|
+
self._current_content_offset = len(self._current_content)
|
828
|
+
|
829
|
+
def _check_and_report_progress(self):
|
830
|
+
"""Reports progress if necessary."""
|
831
|
+
# Only report progress at the end of each chunk and use download_offset to always report
|
832
|
+
# progress in terms of (approximate) byte count.
|
833
|
+
if self._progress_hook and self._current_content_offset == len(self._current_content):
|
834
|
+
self._progress_hook(self._download_offset, self.size)
|
835
|
+
|
836
|
+
def content_as_bytes(self, max_concurrency=1):
|
837
|
+
"""DEPRECATED: Download the contents of this file.
|
838
|
+
|
839
|
+
This operation is blocking until all data is downloaded.
|
840
|
+
|
841
|
+
This method is deprecated, use func:`readall` instead.
|
842
|
+
|
843
|
+
:param int max_concurrency:
|
844
|
+
The number of parallel connections with which to download.
|
845
|
+
:returns: The contents of the file as bytes.
|
846
|
+
:rtype: bytes
|
847
|
+
"""
|
848
|
+
warnings.warn(
|
849
|
+
"content_as_bytes is deprecated, use readall instead",
|
850
|
+
DeprecationWarning
|
851
|
+
)
|
852
|
+
if self._text_mode:
|
853
|
+
raise ValueError("Stream has been partially read in text mode. "
|
854
|
+
"content_as_bytes is not supported in text mode.")
|
855
|
+
|
856
|
+
self._max_concurrency = max_concurrency
|
857
|
+
return self.readall()
|
858
|
+
|
859
|
+
def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
|
860
|
+
"""DEPRECATED: Download the contents of this blob, and decode as text.
|
861
|
+
|
862
|
+
This operation is blocking until all data is downloaded.
|
863
|
+
|
864
|
+
This method is deprecated, use func:`readall` instead.
|
865
|
+
|
866
|
+
:param int max_concurrency:
|
867
|
+
The number of parallel connections with which to download.
|
868
|
+
:param str encoding:
|
869
|
+
Test encoding to decode the downloaded bytes. Default is UTF-8.
|
870
|
+
:returns: The content of the file as a str.
|
871
|
+
:rtype: str
|
872
|
+
"""
|
873
|
+
warnings.warn(
|
874
|
+
"content_as_text is deprecated, use readall instead",
|
875
|
+
DeprecationWarning
|
876
|
+
)
|
877
|
+
if self._text_mode:
|
878
|
+
raise ValueError("Stream has been partially read in text mode. "
|
879
|
+
"content_as_text is not supported in text mode.")
|
880
|
+
|
881
|
+
self._max_concurrency = max_concurrency
|
882
|
+
self._encoding = encoding
|
883
|
+
return self.readall()
|
884
|
+
|
779
885
|
def download_to_stream(self, stream, max_concurrency=1):
|
780
886
|
"""DEPRECATED: Download the contents of this blob to a stream.
|
781
887
|
|
@@ -794,6 +900,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
794
900
|
"download_to_stream is deprecated, use readinto instead",
|
795
901
|
DeprecationWarning
|
796
902
|
)
|
903
|
+
if self._text_mode:
|
904
|
+
raise ValueError("Stream has been partially read in text mode. "
|
905
|
+
"download_to_stream is not supported in text mode.")
|
906
|
+
|
797
907
|
self._max_concurrency = max_concurrency
|
798
908
|
self.readinto(stream)
|
799
909
|
return self.properties
|
@@ -47,7 +47,7 @@ class AzureBlobStorage: # pylint: disable=client-accepts-api-version-keyword
|
|
47
47
|
:param base_url: Service URL. Required. Default value is "".
|
48
48
|
:type base_url: str
|
49
49
|
:keyword version: Specifies the version of the operation to use for this request. Default value
|
50
|
-
is "
|
50
|
+
is "2024-08-04". Note that overriding this default value may result in unsupported behavior.
|
51
51
|
:paramtype version: str
|
52
52
|
"""
|
53
53
|
|