azure-storage-blob 12.20.0b1__py3-none-any.whl → 12.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. azure/storage/blob/__init__.py +12 -2
  2. azure/storage/blob/_blob_client.py +64 -51
  3. azure/storage/blob/_blob_service_client.py +17 -12
  4. azure/storage/blob/_container_client.py +33 -23
  5. azure/storage/blob/_download.py +277 -167
  6. azure/storage/blob/_generated/_azure_blob_storage.py +1 -1
  7. azure/storage/blob/_generated/_configuration.py +2 -2
  8. azure/storage/blob/_generated/_patch.py +2 -0
  9. azure/storage/blob/_generated/_serialization.py +1 -1
  10. azure/storage/blob/_generated/aio/_azure_blob_storage.py +1 -1
  11. azure/storage/blob/_generated/aio/_configuration.py +2 -2
  12. azure/storage/blob/_generated/aio/_patch.py +2 -0
  13. azure/storage/blob/_generated/aio/operations/_append_blob_operations.py +10 -5
  14. azure/storage/blob/_generated/aio/operations/_blob_operations.py +45 -26
  15. azure/storage/blob/_generated/aio/operations/_block_blob_operations.py +12 -7
  16. azure/storage/blob/_generated/aio/operations/_container_operations.py +39 -20
  17. azure/storage/blob/_generated/aio/operations/_page_blob_operations.py +15 -10
  18. azure/storage/blob/_generated/aio/operations/_patch.py +3 -0
  19. azure/storage/blob/_generated/aio/operations/_service_operations.py +28 -10
  20. azure/storage/blob/_generated/models/_patch.py +3 -0
  21. azure/storage/blob/_generated/operations/_append_blob_operations.py +14 -9
  22. azure/storage/blob/_generated/operations/_blob_operations.py +76 -51
  23. azure/storage/blob/_generated/operations/_block_blob_operations.py +18 -13
  24. azure/storage/blob/_generated/operations/_container_operations.py +64 -39
  25. azure/storage/blob/_generated/operations/_page_blob_operations.py +24 -19
  26. azure/storage/blob/_generated/operations/_patch.py +3 -0
  27. azure/storage/blob/_generated/operations/_service_operations.py +43 -19
  28. azure/storage/blob/_generated/py.typed +1 -0
  29. azure/storage/blob/_lease.py +6 -5
  30. azure/storage/blob/_models.py +1 -1
  31. azure/storage/blob/_serialize.py +1 -0
  32. azure/storage/blob/_shared/authentication.py +62 -4
  33. azure/storage/blob/_shared/base_client.py +1 -1
  34. azure/storage/blob/_shared/base_client_async.py +3 -2
  35. azure/storage/blob/_shared/models.py +13 -12
  36. azure/storage/blob/_shared/shared_access_signature.py +1 -0
  37. azure/storage/blob/_shared_access_signature.py +1 -0
  38. azure/storage/blob/_version.py +1 -1
  39. azure/storage/blob/aio/__init__.py +13 -4
  40. azure/storage/blob/aio/_blob_client_async.py +50 -47
  41. azure/storage/blob/aio/_blob_service_client_async.py +11 -11
  42. azure/storage/blob/aio/_container_client_async.py +23 -20
  43. azure/storage/blob/aio/_download_async.py +317 -209
  44. azure/storage/blob/aio/_lease_async.py +6 -6
  45. {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/METADATA +2 -2
  46. azure_storage_blob-12.21.0.dist-info/RECORD +82 -0
  47. azure_storage_blob-12.20.0b1.dist-info/RECORD +0 -81
  48. {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/LICENSE +0 -0
  49. {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/WHEEL +0 -0
  50. {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/top_level.txt +0 -0
@@ -3,13 +3,13 @@
3
3
  # Licensed under the MIT License. See License.txt in the project root for
4
4
  # license information.
5
5
  # --------------------------------------------------------------------------
6
-
6
+ import codecs
7
7
  import sys
8
8
  import threading
9
9
  import time
10
10
  import warnings
11
- from io import BytesIO
12
- from typing import Generic, IO, Iterator, Optional, TypeVar
11
+ from io import BytesIO, StringIO
12
+ from typing import Generic, IO, Iterator, Optional, overload, TypeVar, Union
13
13
 
14
14
  from azure.core.exceptions import DecodeError, HttpResponseError, IncompleteReadError
15
15
  from azure.core.tracing.common import with_current_context
@@ -125,7 +125,7 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
125
125
 
126
126
  def process_chunk(self, chunk_start):
127
127
  chunk_start, chunk_end = self._calculate_range(chunk_start)
128
- chunk_data = self._download_chunk(chunk_start, chunk_end - 1)
128
+ chunk_data, _ = self._download_chunk(chunk_start, chunk_end - 1)
129
129
  length = chunk_end - chunk_start
130
130
  if length > 0:
131
131
  self._write_to_stream(chunk_data, chunk_start)
@@ -186,8 +186,8 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
186
186
  # No need to download the empty chunk from server if there's no data in the chunk to be downloaded.
187
187
  # Do optimize and create empty chunk locally if condition is met.
188
188
  if self._do_optimize(download_range[0], download_range[1]):
189
- data_size = download_range[1] - download_range[0] + 1
190
- chunk_data = b"\x00" * data_size
189
+ content_length = download_range[1] - download_range[0] + 1
190
+ chunk_data = b"\x00" * content_length
191
191
  else:
192
192
  range_header, range_validation = validate_and_format_range_headers(
193
193
  download_range[0],
@@ -218,13 +218,14 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
218
218
  if retry_total <= 0:
219
219
  raise HttpResponseError(error, error=error) from error
220
220
  time.sleep(1)
221
+ content_length = response.content_length
221
222
 
222
223
  # This makes sure that if_match is set so that we can validate
223
224
  # that subsequent downloads are to an unmodified blob
224
225
  if self.request_options.get("modified_access_conditions"):
225
226
  self.request_options["modified_access_conditions"].if_match = response.properties.etag
226
227
 
227
- return chunk_data
228
+ return chunk_data, content_length
228
229
 
229
230
 
230
231
  class _ChunkIterator(object):
@@ -264,7 +265,7 @@ class _ChunkIterator(object):
264
265
 
265
266
  try:
266
267
  chunk = next(self._iter_chunks)
267
- self._current_content += self._iter_downloader.yield_chunk(chunk)
268
+ self._current_content += self._iter_downloader.yield_chunk(chunk)[0]
268
269
  except StopIteration as e:
269
270
  self._complete = True
270
271
  if self._current_content:
@@ -328,14 +329,27 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
328
329
  self._encryption_options = encryption_options or {}
329
330
  self._progress_hook = kwargs.pop('progress_hook', None)
330
331
  self._request_options = kwargs
332
+ self._response = None
331
333
  self._location_mode = None
332
- self._download_complete = False
333
- self._current_content = None
334
- self._file_size = None
334
+ self._current_content = b''
335
+ self._file_size = 0
335
336
  self._non_empty_ranges = None
336
- self._response = None
337
337
  self._encryption_data = None
338
- self._offset = 0
338
+
339
+ # The content download offset, after any processing (decryption), in bytes
340
+ self._download_offset = 0
341
+ # The raw download offset, before processing (decryption), in bytes
342
+ self._raw_download_offset = 0
343
+ # The offset the stream has been read to in bytes or chars depending on mode
344
+ self._read_offset = 0
345
+ # The offset into current_content that has been consumed in bytes or chars depending on mode
346
+ self._current_content_offset = 0
347
+
348
+ self._text_mode: Optional[bool] = None
349
+ self._decoder = None
350
+ # Whether the current content is the first chunk of download content or not
351
+ self._first_chunk = True
352
+ self._download_start = self._start_range or 0
339
353
 
340
354
  # The cls is passed in via download_cls to avoid conflicting arg name with Generic.__new__
341
355
  # but needs to be changed to cls in the request options.
@@ -347,14 +361,14 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
347
361
  # The service only provides transactional MD5s for chunks under 4MB.
348
362
  # If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
349
363
  # chunk so a transactional MD5 can be retrieved.
350
- self._first_get_size = (
364
+ first_get_size = (
351
365
  self._config.max_single_get_size if not self._validate_content else self._config.max_chunk_get_size
352
366
  )
353
- initial_request_start = self._start_range if self._start_range is not None else 0
354
- if self._end_range is not None and self._end_range - self._start_range < self._first_get_size:
367
+ initial_request_start = self._download_start
368
+ if self._end_range is not None and self._end_range - initial_request_start < first_get_size:
355
369
  initial_request_end = self._end_range
356
370
  else:
357
- initial_request_end = initial_request_start + self._first_get_size - 1
371
+ initial_request_end = initial_request_start + first_get_size - 1
358
372
 
359
373
  self._initial_range, self._initial_offset = process_range_and_offset(
360
374
  initial_request_start,
@@ -369,12 +383,11 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
369
383
  self.properties.name = self.name
370
384
  self.properties.container = self.container
371
385
 
372
- # Set the content length to the download size instead of the size of
373
- # the last range
386
+ # Set the content length to the download size instead of the size of the last range
374
387
  self.properties.size = self.size
375
-
376
- # Overwrite the content range to the user requested range
377
- self.properties.content_range = f"bytes {self._start_range}-{self._end_range}/{self._file_size}"
388
+ self.properties.content_range = (f"bytes {self._download_start}-"
389
+ f"{self._end_range if self._end_range is not None else self._file_size - 1}/"
390
+ f"{self._file_size}")
378
391
 
379
392
  # Overwrite the content MD5 as it is the MD5 for the last range instead
380
393
  # of the stored MD5
@@ -399,6 +412,12 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
399
412
  # Restore cls for download
400
413
  self._request_options['cls'] = download_cls
401
414
 
415
+ @property
416
+ def _download_complete(self):
417
+ if is_encryption_v2(self._encryption_data):
418
+ return self._download_offset >= self.size
419
+ return self._raw_download_offset >= self.size
420
+
402
421
  def _initial_request(self):
403
422
  range_header, range_validation = validate_and_format_range_headers(
404
423
  self._initial_range[0],
@@ -435,7 +454,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
435
454
 
436
455
  if self._end_range is not None:
437
456
  # Use the end range index unless it is over the end of the file
438
- self.size = min(self._file_size, self._end_range - self._start_range + 1)
457
+ self.size = min(self._file_size - self._start_range, self._end_range - self._start_range + 1)
439
458
  elif self._start_range is not None:
440
459
  self.size = self._file_size - self._start_range
441
460
  else:
@@ -478,6 +497,8 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
478
497
  if retry_total <= 0:
479
498
  raise HttpResponseError(error, error=error) from error
480
499
  time.sleep(1)
500
+ self._download_offset += len(self._current_content)
501
+ self._raw_download_offset += response.content_length
481
502
 
482
503
  # get page ranges to optimize downloading sparse page blob
483
504
  if response.properties.blob_type == 'PageBlob':
@@ -491,33 +512,19 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
491
512
  except HttpResponseError:
492
513
  pass
493
514
 
494
- # If the file is small, the download is complete at this point.
495
- # If file size is large, download the rest of the file in chunks.
496
- # For encryption V2, calculate based on size of decrypted content, not download size.
497
- if is_encryption_v2(self._encryption_data):
498
- self._download_complete = len(self._current_content) >= self.size
499
- else:
500
- self._download_complete = response.properties.size >= self.size
501
-
502
515
  if not self._download_complete and self._request_options.get("modified_access_conditions"):
503
516
  self._request_options["modified_access_conditions"].if_match = response.properties.etag
504
517
 
505
518
  return response
506
519
 
507
- def _get_downloader_start_with_offset(self):
508
- # Start where the initial request download ended
509
- start = self._initial_range[1] + 1
510
- # For encryption V2 only, adjust start to the end of the fetched data rather than download size
511
- if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
512
- start = (self._start_range or 0) + len(self._current_content)
513
-
514
- # Adjust the start based on any data read past the current content
515
- start += (self._offset - len(self._current_content))
516
- return start
517
-
518
520
  def chunks(self):
519
521
  # type: () -> Iterator[bytes]
520
- """Iterate over chunks in the download stream.
522
+ """
523
+ Iterate over chunks in the download stream. Note, the iterator returned will
524
+ iterate over the entire download content, regardless of any data that was
525
+ previously read.
526
+
527
+ NOTE: If the stream has been partially read, some data may be re-downloaded by the iterator.
521
528
 
522
529
  :returns: An iterator of the chunks in the download stream.
523
530
  :rtype: Iterator[bytes]
@@ -531,81 +538,124 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
531
538
  :dedent: 12
532
539
  :caption: Download a blob using chunks().
533
540
  """
534
- if self.size == 0 or self._download_complete:
535
- iter_downloader = None
536
- else:
537
- data_end = self._file_size
538
- if self._end_range is not None:
539
- # Use the end range index unless it is over the end of the file
540
- data_end = min(self._file_size, self._end_range + 1)
541
+ if self._text_mode:
542
+ raise ValueError("Stream has been partially read in text mode. chunks is not supported in text mode.")
543
+ if self._encoding:
544
+ warnings.warn("Encoding is ignored with chunks as only bytes are supported.")
545
+
546
+ iter_downloader = None
547
+ # If we still have the first chunk buffered, use it. Otherwise, download all content again
548
+ if not self._first_chunk or not self._download_complete:
549
+ if self._first_chunk:
550
+ start = self._download_start + len(self._current_content)
551
+ current_progress = len(self._current_content)
552
+ else:
553
+ start = self._download_start
554
+ current_progress = 0
541
555
 
542
- data_start = self._initial_range[1] + 1 # Start where the first download ended
543
- # For encryption, adjust start to the end of the fetched data rather than download size
544
- if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
545
- data_start = (self._start_range or 0) + len(self._current_content)
556
+ end = self._download_start + self.size
546
557
 
547
558
  iter_downloader = _ChunkDownloader(
548
559
  client=self._clients.blob,
549
560
  non_empty_ranges=self._non_empty_ranges,
550
561
  total_size=self.size,
551
562
  chunk_size=self._config.max_chunk_get_size,
552
- current_progress=self._first_get_size,
553
- start_range=data_start,
554
- end_range=data_end,
555
- stream=None,
556
- parallel=False,
563
+ current_progress=current_progress,
564
+ start_range=start,
565
+ end_range=end,
557
566
  validate_content=self._validate_content,
558
567
  encryption_options=self._encryption_options,
559
568
  encryption_data=self._encryption_data,
560
569
  use_location=self._location_mode,
561
570
  **self._request_options
562
571
  )
572
+
573
+ initial_content = self._current_content if self._first_chunk else b''
563
574
  return _ChunkIterator(
564
575
  size=self.size,
565
- content=self._current_content,
576
+ content=initial_content,
566
577
  downloader=iter_downloader,
567
578
  chunk_size=self._config.max_chunk_get_size)
568
579
 
569
- def read(self, size: Optional[int] = -1) -> T:
580
+ @overload
581
+ def read(self, size: int = -1) -> T:
582
+ ...
583
+
584
+ @overload
585
+ def read(self, *, chars: Optional[int] = None) -> T:
586
+ ...
587
+
588
+ # pylint: disable-next=too-many-statements,too-many-branches
589
+ def read(self, size: int = -1, *, chars: Optional[int] = None) -> T:
570
590
  """
571
- Read up to size bytes from the stream and return them. If size
572
- is unspecified or is -1, all bytes will be read.
591
+ Read the specified bytes or chars from the stream. If `encoding`
592
+ was specified on `download_blob`, it is recommended to use the
593
+ chars parameter to read a specific number of chars to avoid decoding
594
+ errors. If size/chars is unspecified or negative all bytes will be read.
573
595
 
574
- :param Optional[int] size:
596
+ :param int size:
575
597
  The number of bytes to download from the stream. Leave unspecified
576
- or set to -1 to download all bytes.
598
+ or set negative to download all bytes.
599
+ :keyword Optional[int] chars:
600
+ The number of chars to download from the stream. Leave unspecified
601
+ or set negative to download all chars. Note, this can only be used
602
+ when encoding is specified on `download_blob`.
577
603
  :returns:
578
604
  The requested data as bytes or a string if encoding was specified. If
579
605
  the return value is empty, there is no more data to read.
580
606
  :rtype: T
581
607
  """
582
- if size == -1:
583
- return self.readall()
608
+ if size > -1 and self._encoding:
609
+ warnings.warn(
610
+ "Size parameter specified with text encoding enabled. It is recommended to use chars "
611
+ "to read a specific number of characters instead."
612
+ )
613
+ if size > -1 and chars is not None:
614
+ raise ValueError("Cannot specify both size and chars.")
615
+ if not self._encoding and chars is not None:
616
+ raise ValueError("Must specify encoding to read chars.")
617
+ if self._text_mode and size > -1:
618
+ raise ValueError("Stream has been partially read in text mode. Please use chars.")
619
+ if self._text_mode is False and chars is not None:
620
+ raise ValueError("Stream has been partially read in bytes mode. Please use size.")
621
+
584
622
  # Empty blob or already read to the end
585
- if size == 0 or self._offset >= self.size:
623
+ if (size == 0 or chars == 0 or
624
+ (self._download_complete and self._current_content_offset >= len(self._current_content))):
586
625
  return b'' if not self._encoding else ''
587
626
 
588
- stream = BytesIO()
589
- remaining_size = size
590
-
591
- # Start by reading from current_content if there is data left
592
- if self._offset < len(self._current_content):
593
- start = self._offset
594
- length = min(remaining_size, len(self._current_content) - self._offset)
595
- read = stream.write(self._current_content[start:start + length])
596
-
597
- remaining_size -= read
598
- self._offset += read
599
- if self._progress_hook:
600
- self._progress_hook(self._offset, self.size)
601
-
602
- if remaining_size > 0:
603
- start_range = self._get_downloader_start_with_offset()
604
-
605
- # End is the min between the remaining size, the file size, and the end of the specified range
606
- end_range = min(start_range + remaining_size, self._file_size)
607
- if self._end_range is not None:
608
- end_range = min(end_range, self._end_range + 1)
627
+ if not self._text_mode and chars is not None:
628
+ self._text_mode = True
629
+ self._decoder = codecs.getincrementaldecoder(self._encoding)('strict')
630
+ self._current_content = self._decoder.decode(self._current_content, final=self._download_complete)
631
+ elif self._text_mode is None:
632
+ self._text_mode = False
633
+
634
+ output_stream: Union[BytesIO, StringIO]
635
+ if self._text_mode:
636
+ output_stream = StringIO()
637
+ size = chars if chars else sys.maxsize
638
+ else:
639
+ output_stream = BytesIO()
640
+ size = size if size > 0 else sys.maxsize
641
+ readall = size == sys.maxsize
642
+ count = 0
643
+
644
+ # Start by reading from current_content
645
+ start = self._current_content_offset
646
+ length = min(len(self._current_content) - self._current_content_offset, size - count)
647
+ read = output_stream.write(self._current_content[start:start + length])
648
+
649
+ count += read
650
+ self._current_content_offset += read
651
+ self._read_offset += read
652
+ self._check_and_report_progress()
653
+
654
+ remaining = size - count
655
+ if remaining > 0 and not self._download_complete:
656
+ # Create a downloader than can download the rest of the file
657
+ start = self._download_start + self._download_offset
658
+ end = self._download_start + self.size
609
659
 
610
660
  parallel = self._max_concurrency > 1
611
661
  downloader = _ChunkDownloader(
@@ -613,10 +663,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
613
663
  non_empty_ranges=self._non_empty_ranges,
614
664
  total_size=self.size,
615
665
  chunk_size=self._config.max_chunk_get_size,
616
- current_progress=self._offset,
617
- start_range=start_range,
618
- end_range=end_range,
619
- stream=stream,
666
+ current_progress=self._read_offset,
667
+ start_range=start,
668
+ end_range=end,
669
+ stream=output_stream,
620
670
  parallel=parallel,
621
671
  validate_content=self._validate_content,
622
672
  encryption_options=self._encryption_options,
@@ -625,23 +675,57 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
625
675
  progress_hook=self._progress_hook,
626
676
  **self._request_options
627
677
  )
628
-
629
- if parallel and remaining_size > self._config.max_chunk_get_size:
630
- import concurrent.futures
631
- with concurrent.futures.ThreadPoolExecutor(self._max_concurrency) as executor:
632
- list(executor.map(
678
+ self._first_chunk = False
679
+
680
+ # When reading all data, have the downloader read everything into the stream.
681
+ # Else, read one chunk at a time (using the downloader as an iterator) until
682
+ # the requested size is reached.
683
+ chunks_iter = downloader.get_chunk_offsets()
684
+ if readall and not self._text_mode:
685
+ # Only do parallel if there is more than one chunk left to download
686
+ if parallel and (self.size - self._download_offset) > self._config.max_chunk_get_size:
687
+ import concurrent.futures
688
+ with concurrent.futures.ThreadPoolExecutor(self._max_concurrency) as executor:
689
+ list(executor.map(
633
690
  with_current_context(downloader.process_chunk),
634
691
  downloader.get_chunk_offsets()
635
692
  ))
636
- else:
637
- for chunk in downloader.get_chunk_offsets():
638
- downloader.process_chunk(chunk)
693
+ else:
694
+ for chunk in chunks_iter:
695
+ downloader.process_chunk(chunk)
639
696
 
640
- self._offset += remaining_size
697
+ self._complete_read()
698
+
699
+ else:
700
+ while (chunk := next(chunks_iter, None)) is not None and remaining > 0:
701
+ chunk_data, content_length = downloader.yield_chunk(chunk)
702
+ self._download_offset += len(chunk_data)
703
+ self._raw_download_offset += content_length
704
+ self._current_content = self._decoder.decode(
705
+ chunk_data, final=self._download_complete) if self._text_mode else chunk_data
706
+
707
+ if remaining < len(self._current_content):
708
+ read = output_stream.write(self._current_content[:remaining])
709
+ else:
710
+ read = output_stream.write(self._current_content)
711
+
712
+ self._current_content_offset = read
713
+ self._read_offset += read
714
+ remaining -= read
715
+ self._check_and_report_progress()
716
+
717
+ data = output_stream.getvalue()
718
+ if not self._text_mode and self._encoding:
719
+ try:
720
+ # This is technically incorrect to do, but we have it for backwards compatibility.
721
+ data = data.decode(self._encoding)
722
+ except UnicodeDecodeError:
723
+ warnings.warn(
724
+ "Encountered a decoding error while decoding blob data from a partial read. "
725
+ "Try using the `chars` keyword instead to read in text mode."
726
+ )
727
+ raise
641
728
 
642
- data = stream.getvalue()
643
- if self._encoding:
644
- return data.decode(self._encoding)
645
729
  return data
646
730
 
647
731
  def readall(self) -> T:
@@ -652,53 +736,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
652
736
  :returns: The requested data as bytes or a string if encoding was specified.
653
737
  :rtype: T
654
738
  """
655
- stream = BytesIO()
656
- self.readinto(stream)
657
- data = stream.getvalue()
658
- if self._encoding:
659
- return data.decode(self._encoding)
660
- return data
661
-
662
- def content_as_bytes(self, max_concurrency=1):
663
- """DEPRECATED: Download the contents of this file.
664
-
665
- This operation is blocking until all data is downloaded.
666
-
667
- This method is deprecated, use func:`readall` instead.
668
-
669
- :param int max_concurrency:
670
- The number of parallel connections with which to download.
671
- :returns: The contents of the file as bytes.
672
- :rtype: bytes
673
- """
674
- warnings.warn(
675
- "content_as_bytes is deprecated, use readall instead",
676
- DeprecationWarning
677
- )
678
- self._max_concurrency = max_concurrency
679
- return self.readall()
680
-
681
- def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
682
- """DEPRECATED: Download the contents of this blob, and decode as text.
683
-
684
- This operation is blocking until all data is downloaded.
685
-
686
- This method is deprecated, use func:`readall` instead.
687
-
688
- :param int max_concurrency:
689
- The number of parallel connections with which to download.
690
- :param str encoding:
691
- Test encoding to decode the downloaded bytes. Default is UTF-8.
692
- :returns: The content of the file as a str.
693
- :rtype: str
694
- """
695
- warnings.warn(
696
- "content_as_text is deprecated, use readall instead",
697
- DeprecationWarning
698
- )
699
- self._max_concurrency = max_concurrency
700
- self._encoding = encoding
701
- return self.readall()
739
+ return self.read()
702
740
 
703
741
  def readinto(self, stream: IO[bytes]) -> int:
704
742
  """Download the contents of this file to a stream.
@@ -710,6 +748,11 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
710
748
  :returns: The number of bytes read.
711
749
  :rtype: int
712
750
  """
751
+ if self._text_mode:
752
+ raise ValueError("Stream has been partially read in text mode. readinto is not supported in text mode.")
753
+ if self._encoding:
754
+ warnings.warn("Encoding is ignored with readinto as only byte streams are supported.")
755
+
713
756
  # The stream must be seekable if parallel download is required
714
757
  parallel = self._max_concurrency > 1
715
758
  if parallel:
@@ -723,35 +766,34 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
723
766
  raise ValueError(error_message) from exc
724
767
 
725
768
  # If some data has been streamed using `read`, only stream the remaining data
726
- remaining_size = self.size - self._offset
769
+ remaining_size = self.size - self._read_offset
727
770
  # Already read to the end
728
771
  if remaining_size <= 0:
729
772
  return 0
730
773
 
731
- # Write the content to the user stream if there is data left
732
- if self._offset < len(self._current_content):
733
- content = self._current_content[self._offset:]
734
- stream.write(content)
735
- self._offset += len(content)
736
- if self._progress_hook:
737
- self._progress_hook(len(content), self.size)
774
+ # Write the current content to the user stream
775
+ current_remaining = len(self._current_content) - self._current_content_offset
776
+ start = self._current_content_offset
777
+ count = stream.write(self._current_content[start:start + current_remaining])
738
778
 
779
+ self._current_content_offset += count
780
+ self._read_offset += count
781
+ if self._progress_hook:
782
+ self._progress_hook(self._read_offset, self.size)
783
+
784
+ # If all the data was already downloaded/buffered
739
785
  if self._download_complete:
740
786
  return remaining_size
741
787
 
742
- data_end = self._file_size
743
- if self._end_range is not None:
744
- # Use the length unless it is over the end of the file
745
- data_end = min(self._file_size, self._end_range + 1)
746
-
747
- data_start = self._get_downloader_start_with_offset()
788
+ data_start = self._download_start + self._read_offset
789
+ data_end = self._download_start + self.size
748
790
 
749
791
  downloader = _ChunkDownloader(
750
792
  client=self._clients.blob,
751
793
  non_empty_ranges=self._non_empty_ranges,
752
794
  total_size=self.size,
753
795
  chunk_size=self._config.max_chunk_get_size,
754
- current_progress=self._offset,
796
+ current_progress=self._read_offset,
755
797
  start_range=data_start,
756
798
  end_range=data_end,
757
799
  stream=stream,
@@ -774,8 +816,72 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
774
816
  for chunk in downloader.get_chunk_offsets():
775
817
  downloader.process_chunk(chunk)
776
818
 
819
+ self._complete_read()
777
820
  return remaining_size
778
821
 
822
+ def _complete_read(self):
823
+ """Adjusts all offsets to the end of the download."""
824
+ self._download_offset = self.size
825
+ self._raw_download_offset = self.size
826
+ self._read_offset = self.size
827
+ self._current_content_offset = len(self._current_content)
828
+
829
+ def _check_and_report_progress(self):
830
+ """Reports progress if necessary."""
831
+ # Only report progress at the end of each chunk and use download_offset to always report
832
+ # progress in terms of (approximate) byte count.
833
+ if self._progress_hook and self._current_content_offset == len(self._current_content):
834
+ self._progress_hook(self._download_offset, self.size)
835
+
836
+ def content_as_bytes(self, max_concurrency=1):
837
+ """DEPRECATED: Download the contents of this file.
838
+
839
+ This operation is blocking until all data is downloaded.
840
+
841
+ This method is deprecated, use func:`readall` instead.
842
+
843
+ :param int max_concurrency:
844
+ The number of parallel connections with which to download.
845
+ :returns: The contents of the file as bytes.
846
+ :rtype: bytes
847
+ """
848
+ warnings.warn(
849
+ "content_as_bytes is deprecated, use readall instead",
850
+ DeprecationWarning
851
+ )
852
+ if self._text_mode:
853
+ raise ValueError("Stream has been partially read in text mode. "
854
+ "content_as_bytes is not supported in text mode.")
855
+
856
+ self._max_concurrency = max_concurrency
857
+ return self.readall()
858
+
859
+ def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
860
+ """DEPRECATED: Download the contents of this blob, and decode as text.
861
+
862
+ This operation is blocking until all data is downloaded.
863
+
864
+ This method is deprecated, use func:`readall` instead.
865
+
866
+ :param int max_concurrency:
867
+ The number of parallel connections with which to download.
868
+ :param str encoding:
869
+ Test encoding to decode the downloaded bytes. Default is UTF-8.
870
+ :returns: The content of the file as a str.
871
+ :rtype: str
872
+ """
873
+ warnings.warn(
874
+ "content_as_text is deprecated, use readall instead",
875
+ DeprecationWarning
876
+ )
877
+ if self._text_mode:
878
+ raise ValueError("Stream has been partially read in text mode. "
879
+ "content_as_text is not supported in text mode.")
880
+
881
+ self._max_concurrency = max_concurrency
882
+ self._encoding = encoding
883
+ return self.readall()
884
+
779
885
  def download_to_stream(self, stream, max_concurrency=1):
780
886
  """DEPRECATED: Download the contents of this blob to a stream.
781
887
 
@@ -794,6 +900,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
794
900
  "download_to_stream is deprecated, use readinto instead",
795
901
  DeprecationWarning
796
902
  )
903
+ if self._text_mode:
904
+ raise ValueError("Stream has been partially read in text mode. "
905
+ "download_to_stream is not supported in text mode.")
906
+
797
907
  self._max_concurrency = max_concurrency
798
908
  self.readinto(stream)
799
909
  return self.properties
@@ -47,7 +47,7 @@ class AzureBlobStorage: # pylint: disable=client-accepts-api-version-keyword
47
47
  :param base_url: Service URL. Required. Default value is "".
48
48
  :type base_url: str
49
49
  :keyword version: Specifies the version of the operation to use for this request. Default value
50
- is "2021-12-02". Note that overriding this default value may result in unsupported behavior.
50
+ is "2024-08-04". Note that overriding this default value may result in unsupported behavior.
51
51
  :paramtype version: str
52
52
  """
53
53