azure-storage-blob 12.20.0b1__py3-none-any.whl → 12.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. azure/storage/blob/__init__.py +12 -2
  2. azure/storage/blob/_blob_client.py +64 -51
  3. azure/storage/blob/_blob_service_client.py +17 -12
  4. azure/storage/blob/_container_client.py +33 -23
  5. azure/storage/blob/_download.py +277 -167
  6. azure/storage/blob/_generated/_azure_blob_storage.py +1 -1
  7. azure/storage/blob/_generated/_configuration.py +2 -2
  8. azure/storage/blob/_generated/_patch.py +2 -0
  9. azure/storage/blob/_generated/_serialization.py +1 -1
  10. azure/storage/blob/_generated/aio/_azure_blob_storage.py +1 -1
  11. azure/storage/blob/_generated/aio/_configuration.py +2 -2
  12. azure/storage/blob/_generated/aio/_patch.py +2 -0
  13. azure/storage/blob/_generated/aio/operations/_append_blob_operations.py +10 -5
  14. azure/storage/blob/_generated/aio/operations/_blob_operations.py +45 -26
  15. azure/storage/blob/_generated/aio/operations/_block_blob_operations.py +12 -7
  16. azure/storage/blob/_generated/aio/operations/_container_operations.py +39 -20
  17. azure/storage/blob/_generated/aio/operations/_page_blob_operations.py +15 -10
  18. azure/storage/blob/_generated/aio/operations/_patch.py +3 -0
  19. azure/storage/blob/_generated/aio/operations/_service_operations.py +28 -10
  20. azure/storage/blob/_generated/models/_patch.py +3 -0
  21. azure/storage/blob/_generated/operations/_append_blob_operations.py +14 -9
  22. azure/storage/blob/_generated/operations/_blob_operations.py +76 -51
  23. azure/storage/blob/_generated/operations/_block_blob_operations.py +18 -13
  24. azure/storage/blob/_generated/operations/_container_operations.py +64 -39
  25. azure/storage/blob/_generated/operations/_page_blob_operations.py +24 -19
  26. azure/storage/blob/_generated/operations/_patch.py +3 -0
  27. azure/storage/blob/_generated/operations/_service_operations.py +43 -19
  28. azure/storage/blob/_generated/py.typed +1 -0
  29. azure/storage/blob/_lease.py +6 -5
  30. azure/storage/blob/_models.py +1 -1
  31. azure/storage/blob/_serialize.py +1 -0
  32. azure/storage/blob/_shared/authentication.py +62 -4
  33. azure/storage/blob/_shared/base_client.py +1 -1
  34. azure/storage/blob/_shared/base_client_async.py +3 -2
  35. azure/storage/blob/_shared/models.py +13 -12
  36. azure/storage/blob/_shared/shared_access_signature.py +1 -0
  37. azure/storage/blob/_shared_access_signature.py +1 -0
  38. azure/storage/blob/_version.py +1 -1
  39. azure/storage/blob/aio/__init__.py +13 -4
  40. azure/storage/blob/aio/_blob_client_async.py +50 -47
  41. azure/storage/blob/aio/_blob_service_client_async.py +11 -11
  42. azure/storage/blob/aio/_container_client_async.py +23 -20
  43. azure/storage/blob/aio/_download_async.py +317 -209
  44. azure/storage/blob/aio/_lease_async.py +6 -6
  45. {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/METADATA +2 -2
  46. azure_storage_blob-12.21.0.dist-info/RECORD +82 -0
  47. azure_storage_blob-12.20.0b1.dist-info/RECORD +0 -81
  48. {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/LICENSE +0 -0
  49. {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/WHEEL +0 -0
  50. {azure_storage_blob-12.20.0b1.dist-info → azure_storage_blob-12.21.0.dist-info}/top_level.txt +0 -0
@@ -5,11 +5,12 @@
5
5
  # --------------------------------------------------------------------------
6
6
  # pylint: disable=invalid-overridden-method
7
7
 
8
+ import codecs
8
9
  import sys
9
10
  import warnings
10
- from io import BytesIO
11
+ from io import BytesIO, StringIO
11
12
  from itertools import islice
12
- from typing import AsyncIterator, Generic, IO, Optional, TypeVar
13
+ from typing import AsyncIterator, Generic, IO, Optional, overload, TypeVar, Union
13
14
 
14
15
  import asyncio
15
16
 
@@ -59,7 +60,7 @@ class _AsyncChunkDownloader(_ChunkDownloader):
59
60
 
60
61
  async def process_chunk(self, chunk_start):
61
62
  chunk_start, chunk_end = self._calculate_range(chunk_start)
62
- chunk_data = await self._download_chunk(chunk_start, chunk_end - 1)
63
+ chunk_data, _ = await self._download_chunk(chunk_start, chunk_end - 1)
63
64
  length = chunk_end - chunk_start
64
65
  if length > 0:
65
66
  await self._write_to_stream(chunk_data, chunk_start)
@@ -95,8 +96,8 @@ class _AsyncChunkDownloader(_ChunkDownloader):
95
96
  # No need to download the empty chunk from server if there's no data in the chunk to be downloaded.
96
97
  # Do optimize and create empty chunk locally if condition is met.
97
98
  if self._do_optimize(download_range[0], download_range[1]):
98
- data_size = download_range[1] - download_range[0] + 1
99
- chunk_data = b"\x00" * data_size
99
+ content_length = download_range[1] - download_range[0] + 1
100
+ chunk_data = b"\x00" * content_length
100
101
  else:
101
102
  range_header, range_validation = validate_and_format_range_headers(
102
103
  download_range[0],
@@ -117,14 +118,14 @@ class _AsyncChunkDownloader(_ChunkDownloader):
117
118
  process_storage_error(error)
118
119
 
119
120
  chunk_data = await process_content(response, offset[0], offset[1], self.encryption_options)
120
-
121
+ content_length = response.content_length
121
122
 
122
123
  # This makes sure that if_match is set so that we can validate
123
124
  # that subsequent downloads are to an unmodified blob
124
125
  if self.request_options.get('modified_access_conditions'):
125
126
  self.request_options['modified_access_conditions'].if_match = response.properties.etag
126
127
 
127
- return chunk_data
128
+ return chunk_data, content_length
128
129
 
129
130
 
130
131
  class _AsyncChunkIterator(object):
@@ -167,7 +168,7 @@ class _AsyncChunkIterator(object):
167
168
 
168
169
  try:
169
170
  chunk = next(self._iter_chunks)
170
- self._current_content += await self._iter_downloader.yield_chunk(chunk)
171
+ self._current_content += (await self._iter_downloader.yield_chunk(chunk))[0]
171
172
  except StopIteration as exc:
172
173
  self._complete = True
173
174
  # it's likely that there some data left in self._current_content
@@ -228,28 +229,32 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
228
229
  self._encryption_options = encryption_options or {}
229
230
  self._progress_hook = kwargs.pop('progress_hook', None)
230
231
  self._request_options = kwargs
232
+ self._response = None
231
233
  self._location_mode = None
232
- self._download_complete = False
233
- self._current_content = None
234
- self._file_size = None
234
+ self._current_content = b''
235
+ self._file_size = 0
235
236
  self._non_empty_ranges = None
236
- self._response = None
237
237
  self._encryption_data = None
238
- self._offset = 0
239
238
 
240
- self._initial_range = None
241
- self._initial_offset = None
239
+ # The content download offset, after any processing (decryption), in bytes
240
+ self._download_offset = 0
241
+ # The raw download offset, before processing (decryption), in bytes
242
+ self._raw_download_offset = 0
243
+ # The offset the stream has been read to in bytes or chars depending on mode
244
+ self._read_offset = 0
245
+ # The offset into current_content that has been consumed in bytes or chars depending on mode
246
+ self._current_content_offset = 0
247
+
248
+ self._text_mode: Optional[bool] = None
249
+ self._decoder = None
250
+ # Whether the current content is the first chunk of download content or not
251
+ self._first_chunk = True
252
+ self._download_start = self._start_range or 0
242
253
 
243
254
  # The cls is passed in via download_cls to avoid conflicting arg name with Generic.__new__
244
255
  # but needs to be changed to cls in the request options.
245
256
  self._request_options['cls'] = download_cls
246
257
 
247
- # The service only provides transactional MD5s for chunks under 4MB.
248
- # If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
249
- # chunk so a transactional MD5 can be retrieved.
250
- self._first_get_size = self._config.max_single_get_size if not self._validate_content \
251
- else self._config.max_chunk_get_size
252
-
253
258
  def __len__(self):
254
259
  return self.size
255
260
 
@@ -272,12 +277,19 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
272
277
  if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
273
278
  await self._get_encryption_data_request()
274
279
 
280
+ # The service only provides transactional MD5s for chunks under 4MB.
281
+ # If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
282
+ # chunk so a transactional MD5 can be retrieved.
283
+ first_get_size = (
284
+ self._config.max_single_get_size if not self._validate_content else self._config.max_chunk_get_size
285
+ )
275
286
  initial_request_start = self._start_range if self._start_range is not None else 0
276
- if self._end_range is not None and self._end_range - self._start_range < self._first_get_size:
287
+ if self._end_range is not None and self._end_range - initial_request_start < first_get_size:
277
288
  initial_request_end = self._end_range
278
289
  else:
279
- initial_request_end = initial_request_start + self._first_get_size - 1
290
+ initial_request_end = initial_request_start + first_get_size - 1
280
291
 
292
+ # pylint: disable-next=attribute-defined-outside-init
281
293
  self._initial_range, self._initial_offset = process_range_and_offset(
282
294
  initial_request_start,
283
295
  initial_request_end,
@@ -292,39 +304,22 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
292
304
  self.properties.name = self.name
293
305
  self.properties.container = self.container
294
306
 
295
- # Set the content length to the download size instead of the size of
296
- # the last range
297
- initial_size = self._response.properties.size
307
+ # Set the content length to the download size instead of the size of the last range
298
308
  self.properties.size = self.size
299
-
300
- # Overwrite the content range to the user requested range
301
- self.properties.content_range = f'bytes {self._start_range}-{self._end_range}/{self._file_size}'
309
+ self.properties.content_range = (f"bytes {self._download_start}-"
310
+ f"{self._end_range if self._end_range is not None else self._file_size - 1}/"
311
+ f"{self._file_size}")
302
312
 
303
313
  # Overwrite the content MD5 as it is the MD5 for the last range instead
304
314
  # of the stored MD5
305
315
  # TODO: Set to the stored MD5 when the service returns this
306
316
  self.properties.content_md5 = None
307
317
 
308
- if self.size == 0:
309
- self._current_content = b""
310
- else:
311
- self._current_content = await process_content(
312
- self._response,
313
- self._initial_offset[0],
314
- self._initial_offset[1],
315
- self._encryption_options
316
- )
317
-
318
- # If the file is small, the download is complete at this point.
319
- # If file size is large, download the rest of the file in chunks.
320
- # For encryption V2, calculate based on size of decrypted content, not download size.
318
+ @property
319
+ def _download_complete(self):
321
320
  if is_encryption_v2(self._encryption_data):
322
- self._download_complete = len(self._current_content) >= self.size
323
- else:
324
- self._download_complete = initial_size >= self.size
325
-
326
- if not self._download_complete and self._request_options.get("modified_access_conditions"):
327
- self._request_options["modified_access_conditions"].if_match = self._response.properties.etag
321
+ return self._download_offset >= self.size
322
+ return self._raw_download_offset >= self.size
328
323
 
329
324
  async def _initial_request(self):
330
325
  range_header, range_validation = validate_and_format_range_headers(
@@ -357,7 +352,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
357
352
 
358
353
  if self._end_range is not None:
359
354
  # Use the length unless it is over the end of the file
360
- self.size = min(self._file_size, self._end_range - self._start_range + 1)
355
+ self.size = min(self._file_size - self._start_range, self._end_range - self._start_range + 1)
361
356
  elif self._start_range is not None:
362
357
  self.size = self._file_size - self._start_range
363
358
  else:
@@ -383,6 +378,18 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
383
378
  else:
384
379
  process_storage_error(error)
385
380
 
381
+ if self.size == 0:
382
+ self._current_content = b""
383
+ else:
384
+ self._current_content = await process_content(
385
+ response,
386
+ self._initial_offset[0],
387
+ self._initial_offset[1],
388
+ self._encryption_options
389
+ )
390
+ self._download_offset += len(self._current_content)
391
+ self._raw_download_offset += response.content_length
392
+
386
393
  # get page ranges to optimize downloading sparse page blob
387
394
  if response.properties.blob_type == 'PageBlob':
388
395
  try:
@@ -391,22 +398,19 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
391
398
  except HttpResponseError:
392
399
  pass
393
400
 
394
- return response
395
-
396
- def _get_downloader_start_with_offset(self):
397
- # Start where the initial request download ended
398
- start = self._initial_range[1] + 1
399
- # For encryption V2 only, adjust start to the end of the fetched data rather than download size
400
- if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
401
- start = (self._start_range or 0) + len(self._current_content)
401
+ if not self._download_complete and self._request_options.get("modified_access_conditions"):
402
+ self._request_options["modified_access_conditions"].if_match = response.properties.etag
402
403
 
403
- # Adjust the start based on any data read past the current content
404
- start += (self._offset - len(self._current_content))
405
- return start
404
+ return response
406
405
 
407
406
  def chunks(self):
408
407
  # type: () -> AsyncIterator[bytes]
409
- """Iterate over chunks in the download stream.
408
+ """
409
+ Iterate over chunks in the download stream. Note, the iterator returned will
410
+ iterate over the entire download content, regardless of any data that was
411
+ previously read.
412
+
413
+ NOTE: If the stream has been partially read, some data may be re-downloaded by the iterator.
410
414
 
411
415
  :returns: An async iterator of the chunks in the download stream.
412
416
  :rtype: AsyncIterator[bytes]
@@ -420,79 +424,124 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
420
424
  :dedent: 16
421
425
  :caption: Download a blob using chunks().
422
426
  """
423
- if self.size == 0 or self._download_complete:
424
- iter_downloader = None
425
- else:
426
- data_end = self._file_size
427
- data_start = self._initial_range[1] + 1 # Start where the first download ended
428
- # For encryption, adjust start to the end of the fetched data rather than download size
429
- if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
430
- data_start = (self._start_range or 0) + len(self._current_content)
427
+ if self._text_mode:
428
+ raise ValueError("Stream has been partially read in text mode. chunks is not supported in text mode.")
429
+ if self._encoding:
430
+ warnings.warn("Encoding is ignored with chunks as only bytes are supported.")
431
+
432
+ iter_downloader = None
433
+ # If we still have the first chunk buffered, use it. Otherwise, download all content again
434
+ if not self._first_chunk or not self._download_complete:
435
+ if self._first_chunk:
436
+ start = self._download_start + len(self._current_content)
437
+ current_progress = len(self._current_content)
438
+ else:
439
+ start = self._download_start
440
+ current_progress = 0
441
+
442
+ end = self._download_start + self.size
431
443
 
432
- if self._end_range is not None:
433
- # Use the length unless it is over the end of the file
434
- data_end = min(self._file_size, self._end_range + 1)
435
444
  iter_downloader = _AsyncChunkDownloader(
436
445
  client=self._clients.blob,
437
446
  non_empty_ranges=self._non_empty_ranges,
438
447
  total_size=self.size,
439
448
  chunk_size=self._config.max_chunk_get_size,
440
- current_progress=self._first_get_size,
441
- start_range=data_start,
442
- end_range=data_end,
443
- stream=None,
444
- parallel=False,
449
+ current_progress=current_progress,
450
+ start_range=start,
451
+ end_range=end,
445
452
  validate_content=self._validate_content,
446
453
  encryption_options=self._encryption_options,
447
454
  encryption_data=self._encryption_data,
448
455
  use_location=self._location_mode,
449
- **self._request_options)
456
+ **self._request_options
457
+ )
458
+
459
+ initial_content = self._current_content if self._first_chunk else b''
450
460
  return _AsyncChunkIterator(
451
461
  size=self.size,
452
- content=self._current_content,
462
+ content=initial_content,
453
463
  downloader=iter_downloader,
454
464
  chunk_size=self._config.max_chunk_get_size)
455
465
 
456
- async def read(self, size: Optional[int] = -1) -> T:
466
+ @overload
467
+ async def read(self, size: int = -1) -> T:
468
+ ...
469
+
470
+ @overload
471
+ async def read(self, *, chars: Optional[int] = None) -> T:
472
+ ...
473
+
474
+ # pylint: disable-next=too-many-statements,too-many-branches
475
+ async def read(self, size: int = -1, *, chars: Optional[int] = None) -> T:
457
476
  """
458
- Read up to size bytes from the stream and return them. If size
459
- is unspecified or is -1, all bytes will be read.
477
+ Read the specified bytes or chars from the stream. If `encoding`
478
+ was specified on `download_blob`, it is recommended to use the
479
+ chars parameter to read a specific number of chars to avoid decoding
480
+ errors. If size/chars is unspecified or negative all bytes will be read.
460
481
 
461
- :param Optional[int] size:
482
+ :param int size:
462
483
  The number of bytes to download from the stream. Leave unspecified
463
- or set to -1 to download all bytes.
484
+ or set negative to download all bytes.
485
+ :keyword Optional[int] chars:
486
+ The number of chars to download from the stream. Leave unspecified
487
+ or set negative to download all chars. Note, this can only be used
488
+ when encoding is specified on `download_blob`.
464
489
  :returns:
465
490
  The requested data as bytes or a string if encoding was specified. If
466
491
  the return value is empty, there is no more data to read.
467
492
  :rtype: T
468
493
  """
469
- if size == -1:
470
- return await self.readall()
494
+ if size > -1 and self._encoding:
495
+ warnings.warn(
496
+ "Size parameter specified with text encoding enabled. It is recommended to use chars "
497
+ "to read a specific number of characters instead."
498
+ )
499
+ if size > -1 and chars is not None:
500
+ raise ValueError("Cannot specify both size and chars.")
501
+ if not self._encoding and chars is not None:
502
+ raise ValueError("Must specify encoding to read chars.")
503
+ if self._text_mode and size > -1:
504
+ raise ValueError("Stream has been partially read in text mode. Please use chars.")
505
+ if self._text_mode is False and chars is not None:
506
+ raise ValueError("Stream has been partially read in bytes mode. Please use size.")
507
+
471
508
  # Empty blob or already read to the end
472
- if size == 0 or self._offset >= self.size:
509
+ if (size == 0 or chars == 0 or
510
+ (self._download_complete and self._current_content_offset >= len(self._current_content))):
473
511
  return b'' if not self._encoding else ''
474
512
 
475
- stream = BytesIO()
476
- remaining_size = size
477
-
478
- # Start by reading from current_content if there is data left
479
- if self._offset < len(self._current_content):
480
- start = self._offset
481
- length = min(remaining_size, len(self._current_content) - self._offset)
482
- read = stream.write(self._current_content[start:start + length])
483
-
484
- remaining_size -= read
485
- self._offset += read
486
- if self._progress_hook:
487
- await self._progress_hook(self._offset, self.size)
488
-
489
- if remaining_size > 0:
490
- start_range = self._get_downloader_start_with_offset()
491
-
492
- # End is the min between the remaining size, the file size, and the end of the specified range
493
- end_range = min(start_range + remaining_size, self._file_size)
494
- if self._end_range is not None:
495
- end_range = min(end_range, self._end_range + 1)
513
+ if not self._text_mode and chars is not None:
514
+ self._text_mode = True
515
+ self._decoder = codecs.getincrementaldecoder(self._encoding)('strict')
516
+ self._current_content = self._decoder.decode(self._current_content, final=self._download_complete)
517
+ elif self._text_mode is None:
518
+ self._text_mode = False
519
+
520
+ output_stream: Union[BytesIO, StringIO]
521
+ if self._text_mode:
522
+ output_stream = StringIO()
523
+ size = chars if chars else sys.maxsize
524
+ else:
525
+ output_stream = BytesIO()
526
+ size = size if size > 0 else sys.maxsize
527
+ readall = size == sys.maxsize
528
+ count = 0
529
+
530
+ # Start by reading from current_content
531
+ start = self._current_content_offset
532
+ length = min(len(self._current_content) - self._current_content_offset, size - count)
533
+ read = output_stream.write(self._current_content[start:start + length])
534
+
535
+ count += read
536
+ self._current_content_offset += read
537
+ self._read_offset += read
538
+ await self._check_and_report_progress()
539
+
540
+ remaining = size - count
541
+ if remaining > 0 and not self._download_complete:
542
+ # Create a downloader than can download the rest of the file
543
+ start = self._download_start + self._download_offset
544
+ end = self._download_start + self.size
496
545
 
497
546
  parallel = self._max_concurrency > 1
498
547
  downloader = _AsyncChunkDownloader(
@@ -500,10 +549,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
500
549
  non_empty_ranges=self._non_empty_ranges,
501
550
  total_size=self.size,
502
551
  chunk_size=self._config.max_chunk_get_size,
503
- current_progress=self._offset,
504
- start_range=start_range,
505
- end_range=end_range,
506
- stream=stream,
552
+ current_progress=self._read_offset,
553
+ start_range=start,
554
+ end_range=end,
555
+ stream=output_stream,
507
556
  parallel=parallel,
508
557
  validate_content=self._validate_content,
509
558
  encryption_options=self._encryption_options,
@@ -512,42 +561,74 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
512
561
  progress_hook=self._progress_hook,
513
562
  **self._request_options
514
563
  )
564
+ self._first_chunk = False
565
+
566
+ # When reading all data, have the downloader read everything into the stream.
567
+ # Else, read one chunk at a time (using the downloader as an iterator) until
568
+ # the requested size is reached.
569
+ chunks_iter = downloader.get_chunk_offsets()
570
+ if readall and not self._text_mode:
571
+ running_futures = [
572
+ asyncio.ensure_future(downloader.process_chunk(d))
573
+ for d in islice(chunks_iter, 0, self._max_concurrency)
574
+ ]
575
+ while running_futures:
576
+ # Wait for some download to finish before adding a new one
577
+ done, running_futures = await asyncio.wait(
578
+ running_futures, return_when=asyncio.FIRST_COMPLETED)
579
+ try:
580
+ for task in done:
581
+ task.result()
582
+ except HttpResponseError as error:
583
+ process_storage_error(error)
584
+ try:
585
+ for _ in range(0, len(done)):
586
+ next_chunk = next(chunks_iter)
587
+ running_futures.add(asyncio.ensure_future(downloader.process_chunk(next_chunk)))
588
+ except StopIteration:
589
+ break
590
+
591
+ if running_futures:
592
+ # Wait for the remaining downloads to finish
593
+ done, _running_futures = await asyncio.wait(running_futures)
594
+ try:
595
+ for task in done:
596
+ task.result()
597
+ except HttpResponseError as error:
598
+ process_storage_error(error)
599
+
600
+ self._complete_read()
515
601
 
516
- dl_tasks = downloader.get_chunk_offsets()
517
- running_futures = [
518
- asyncio.ensure_future(downloader.process_chunk(d))
519
- for d in islice(dl_tasks, 0, self._max_concurrency)
520
- ]
521
- while running_futures:
522
- # Wait for some download to finish before adding a new one
523
- done, running_futures = await asyncio.wait(
524
- running_futures, return_when=asyncio.FIRST_COMPLETED)
525
- try:
526
- for task in done:
527
- task.result()
528
- except HttpResponseError as error:
529
- process_storage_error(error)
530
- try:
531
- for _ in range(0, len(done)):
532
- next_chunk = next(dl_tasks)
533
- running_futures.add(asyncio.ensure_future(downloader.process_chunk(next_chunk)))
534
- except StopIteration:
535
- break
536
-
537
- if running_futures:
538
- # Wait for the remaining downloads to finish
539
- done, _running_futures = await asyncio.wait(running_futures)
540
- try:
541
- for task in done:
542
- task.result()
543
- except HttpResponseError as error:
544
- process_storage_error(error)
545
-
546
- self._offset += remaining_size
602
+ else:
603
+ while (chunk := next(chunks_iter, None)) is not None and remaining > 0:
604
+ chunk_data, content_length = await downloader.yield_chunk(chunk)
605
+ self._download_offset += len(chunk_data)
606
+ self._raw_download_offset += content_length
607
+ self._current_content = self._decoder.decode(
608
+ chunk_data, final=self._download_complete) if self._text_mode else chunk_data
609
+
610
+ if remaining < len(self._current_content):
611
+ read = output_stream.write(self._current_content[:remaining])
612
+ else:
613
+ read = output_stream.write(self._current_content)
614
+
615
+ self._current_content_offset = read
616
+ self._read_offset += read
617
+ remaining -= read
618
+ await self._check_and_report_progress()
619
+
620
+ data = output_stream.getvalue()
621
+ if not self._text_mode and self._encoding:
622
+ try:
623
+ # This is technically incorrect to do, but we have it for backwards compatibility.
624
+ data = data.decode(self._encoding)
625
+ except UnicodeDecodeError:
626
+ warnings.warn(
627
+ "Encountered a decoding error while decoding blob data from a partial read. "
628
+ "Try using the `chars` keyword instead to read in text mode."
629
+ )
630
+ raise
547
631
 
548
- data = stream.getvalue()
549
- if self._encoding:
550
- return data.decode(self._encoding)
551
632
  return data
552
633
 
553
634
  async def readall(self) -> T:
@@ -558,53 +639,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
558
639
  :returns: The requested data as bytes or a string if encoding was specified.
559
640
  :rtype: T
560
641
  """
561
- stream = BytesIO()
562
- await self.readinto(stream)
563
- data = stream.getvalue()
564
- if self._encoding:
565
- return data.decode(self._encoding)
566
- return data
567
-
568
- async def content_as_bytes(self, max_concurrency=1):
569
- """DEPRECATED: Download the contents of this file.
570
-
571
- This operation is blocking until all data is downloaded.
572
-
573
- This method is deprecated, use func:`readall` instead.
574
-
575
- :param int max_concurrency:
576
- The number of parallel connections with which to download.
577
- :returns: The contents of the file as bytes.
578
- :rtype: bytes
579
- """
580
- warnings.warn(
581
- "content_as_bytes is deprecated, use readall instead",
582
- DeprecationWarning
583
- )
584
- self._max_concurrency = max_concurrency
585
- return await self.readall()
586
-
587
- async def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
588
- """DEPRECATED: Download the contents of this blob, and decode as text.
589
-
590
- This operation is blocking until all data is downloaded.
591
-
592
- This method is deprecated, use func:`readall` instead.
593
-
594
- :param int max_concurrency:
595
- The number of parallel connections with which to download.
596
- :param str encoding:
597
- Test encoding to decode the downloaded bytes. Default is UTF-8.
598
- :returns: The content of the file as a str.
599
- :rtype: str
600
- """
601
- warnings.warn(
602
- "content_as_text is deprecated, use readall instead",
603
- DeprecationWarning
604
- )
605
- self._max_concurrency = max_concurrency
606
- self._encoding = encoding
607
- return await self.readall()
642
+ return await self.read()
608
643
 
609
644
  async def readinto(self, stream: IO[bytes]) -> int:
610
645
  """Download the contents of this blob to a stream.
@@ -616,6 +651,11 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
616
651
  :returns: The number of bytes read.
617
652
  :rtype: int
618
653
  """
654
+ if self._text_mode:
655
+ raise ValueError("Stream has been partially read in text mode. readinto is not supported in text mode.")
656
+ if self._encoding:
657
+ warnings.warn("Encoding is ignored with readinto as only byte streams are supported.")
658
+
619
659
  # the stream must be seekable if parallel download is required
620
660
  parallel = self._max_concurrency > 1
621
661
  if parallel:
@@ -629,35 +669,34 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
629
669
  raise ValueError(error_message) from exc
630
670
 
631
671
  # If some data has been streamed using `read`, only stream the remaining data
632
- remaining_size = self.size - self._offset
672
+ remaining_size = self.size - self._read_offset
633
673
  # Already read to the end
634
674
  if remaining_size <= 0:
635
675
  return 0
636
676
 
637
- # Write the content to the user stream if there is data left
638
- if self._offset < len(self._current_content):
639
- content = self._current_content[self._offset:]
640
- stream.write(content)
641
- self._offset += len(content)
642
- if self._progress_hook:
643
- await self._progress_hook(len(content), self.size)
677
+ # Write the current content to the user stream
678
+ current_remaining = len(self._current_content) - self._current_content_offset
679
+ start = self._current_content_offset
680
+ count = stream.write(self._current_content[start:start + current_remaining])
681
+
682
+ self._current_content_offset += count
683
+ self._read_offset += count
684
+ if self._progress_hook:
685
+ await self._progress_hook(self._read_offset, self.size)
644
686
 
687
+ # If all the data was already downloaded/buffered
645
688
  if self._download_complete:
646
689
  return remaining_size
647
690
 
648
- data_end = self._file_size
649
- if self._end_range is not None:
650
- # Use the length unless it is over the end of the file
651
- data_end = min(self._file_size, self._end_range + 1)
652
-
653
- data_start = self._get_downloader_start_with_offset()
691
+ data_start = self._download_start + self._read_offset
692
+ data_end = self._download_start + self.size
654
693
 
655
694
  downloader = _AsyncChunkDownloader(
656
695
  client=self._clients.blob,
657
696
  non_empty_ranges=self._non_empty_ranges,
658
697
  total_size=self.size,
659
698
  chunk_size=self._config.max_chunk_get_size,
660
- current_progress=self._offset,
699
+ current_progress=self._read_offset,
661
700
  start_range=data_start,
662
701
  end_range=data_end,
663
702
  stream=stream,
@@ -667,7 +706,8 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
667
706
  encryption_data=self._encryption_data,
668
707
  use_location=self._location_mode,
669
708
  progress_hook=self._progress_hook,
670
- **self._request_options)
709
+ **self._request_options
710
+ )
671
711
 
672
712
  dl_tasks = downloader.get_chunk_offsets()
673
713
  running_futures = [
@@ -699,8 +739,72 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
699
739
  except HttpResponseError as error:
700
740
  process_storage_error(error)
701
741
 
742
+ self._complete_read()
702
743
  return remaining_size
703
744
 
745
+ def _complete_read(self):
746
+ """Adjusts all offsets to the end of the download."""
747
+ self._download_offset = self.size
748
+ self._raw_download_offset = self.size
749
+ self._read_offset = self.size
750
+ self._current_content_offset = len(self._current_content)
751
+
752
+ async def _check_and_report_progress(self):
753
+ """Reports progress if necessary."""
754
+ # Only report progress at the end of each chunk and use download_offset to always report
755
+ # progress in terms of (approximate) byte count.
756
+ if self._progress_hook and self._current_content_offset == len(self._current_content):
757
+ await self._progress_hook(self._download_offset, self.size)
758
+
759
+ async def content_as_bytes(self, max_concurrency=1):
760
+ """DEPRECATED: Download the contents of this file.
761
+
762
+ This operation is blocking until all data is downloaded.
763
+
764
+ This method is deprecated, use func:`readall` instead.
765
+
766
+ :param int max_concurrency:
767
+ The number of parallel connections with which to download.
768
+ :returns: The contents of the file as bytes.
769
+ :rtype: bytes
770
+ """
771
+ warnings.warn(
772
+ "content_as_bytes is deprecated, use readall instead",
773
+ DeprecationWarning
774
+ )
775
+ if self._text_mode:
776
+ raise ValueError("Stream has been partially read in text mode. "
777
+ "content_as_bytes is not supported in text mode.")
778
+
779
+ self._max_concurrency = max_concurrency
780
+ return await self.readall()
781
+
782
+ async def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
783
+ """DEPRECATED: Download the contents of this blob, and decode as text.
784
+
785
+ This operation is blocking until all data is downloaded.
786
+
787
+ This method is deprecated, use func:`readall` instead.
788
+
789
+ :param int max_concurrency:
790
+ The number of parallel connections with which to download.
791
+ :param str encoding:
792
+ Test encoding to decode the downloaded bytes. Default is UTF-8.
793
+ :returns: The content of the file as a str.
794
+ :rtype: str
795
+ """
796
+ warnings.warn(
797
+ "content_as_text is deprecated, use readall instead",
798
+ DeprecationWarning
799
+ )
800
+ if self._text_mode:
801
+ raise ValueError("Stream has been partially read in text mode. "
802
+ "content_as_text is not supported in text mode.")
803
+
804
+ self._max_concurrency = max_concurrency
805
+ self._encoding = encoding
806
+ return await self.readall()
807
+
704
808
  async def download_to_stream(self, stream, max_concurrency=1):
705
809
  """DEPRECATED: Download the contents of this blob to a stream.
706
810
 
@@ -719,6 +823,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
719
823
  "download_to_stream is deprecated, use readinto instead",
720
824
  DeprecationWarning
721
825
  )
826
+ if self._text_mode:
827
+ raise ValueError("Stream has been partially read in text mode. "
828
+ "download_to_stream is not supported in text mode.")
829
+
722
830
  self._max_concurrency = max_concurrency
723
831
  await self.readinto(stream)
724
832
  return self.properties