azure-storage-blob 12.21.0__py3-none-any.whl → 12.21.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,12 +5,11 @@
5
5
  # --------------------------------------------------------------------------
6
6
  # pylint: disable=invalid-overridden-method
7
7
 
8
- import codecs
9
8
  import sys
10
9
  import warnings
11
- from io import BytesIO, StringIO
10
+ from io import BytesIO
12
11
  from itertools import islice
13
- from typing import AsyncIterator, Generic, IO, Optional, overload, TypeVar, Union
12
+ from typing import AsyncIterator, Generic, IO, Optional, TypeVar
14
13
 
15
14
  import asyncio
16
15
 
@@ -60,7 +59,7 @@ class _AsyncChunkDownloader(_ChunkDownloader):
60
59
 
61
60
  async def process_chunk(self, chunk_start):
62
61
  chunk_start, chunk_end = self._calculate_range(chunk_start)
63
- chunk_data, _ = await self._download_chunk(chunk_start, chunk_end - 1)
62
+ chunk_data = await self._download_chunk(chunk_start, chunk_end - 1)
64
63
  length = chunk_end - chunk_start
65
64
  if length > 0:
66
65
  await self._write_to_stream(chunk_data, chunk_start)
@@ -96,8 +95,8 @@ class _AsyncChunkDownloader(_ChunkDownloader):
96
95
  # No need to download the empty chunk from server if there's no data in the chunk to be downloaded.
97
96
  # Do optimize and create empty chunk locally if condition is met.
98
97
  if self._do_optimize(download_range[0], download_range[1]):
99
- content_length = download_range[1] - download_range[0] + 1
100
- chunk_data = b"\x00" * content_length
98
+ data_size = download_range[1] - download_range[0] + 1
99
+ chunk_data = b"\x00" * data_size
101
100
  else:
102
101
  range_header, range_validation = validate_and_format_range_headers(
103
102
  download_range[0],
@@ -118,14 +117,14 @@ class _AsyncChunkDownloader(_ChunkDownloader):
118
117
  process_storage_error(error)
119
118
 
120
119
  chunk_data = await process_content(response, offset[0], offset[1], self.encryption_options)
121
- content_length = response.content_length
120
+
122
121
 
123
122
  # This makes sure that if_match is set so that we can validate
124
123
  # that subsequent downloads are to an unmodified blob
125
124
  if self.request_options.get('modified_access_conditions'):
126
125
  self.request_options['modified_access_conditions'].if_match = response.properties.etag
127
126
 
128
- return chunk_data, content_length
127
+ return chunk_data
129
128
 
130
129
 
131
130
  class _AsyncChunkIterator(object):
@@ -168,7 +167,7 @@ class _AsyncChunkIterator(object):
168
167
 
169
168
  try:
170
169
  chunk = next(self._iter_chunks)
171
- self._current_content += (await self._iter_downloader.yield_chunk(chunk))[0]
170
+ self._current_content += await self._iter_downloader.yield_chunk(chunk)
172
171
  except StopIteration as exc:
173
172
  self._complete = True
174
173
  # it's likely that there some data left in self._current_content
@@ -229,32 +228,28 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
229
228
  self._encryption_options = encryption_options or {}
230
229
  self._progress_hook = kwargs.pop('progress_hook', None)
231
230
  self._request_options = kwargs
232
- self._response = None
233
231
  self._location_mode = None
234
- self._current_content = b''
235
- self._file_size = 0
232
+ self._download_complete = False
233
+ self._current_content = None
234
+ self._file_size = None
236
235
  self._non_empty_ranges = None
236
+ self._response = None
237
237
  self._encryption_data = None
238
+ self._offset = 0
238
239
 
239
- # The content download offset, after any processing (decryption), in bytes
240
- self._download_offset = 0
241
- # The raw download offset, before processing (decryption), in bytes
242
- self._raw_download_offset = 0
243
- # The offset the stream has been read to in bytes or chars depending on mode
244
- self._read_offset = 0
245
- # The offset into current_content that has been consumed in bytes or chars depending on mode
246
- self._current_content_offset = 0
247
-
248
- self._text_mode: Optional[bool] = None
249
- self._decoder = None
250
- # Whether the current content is the first chunk of download content or not
251
- self._first_chunk = True
252
- self._download_start = self._start_range or 0
240
+ self._initial_range = None
241
+ self._initial_offset = None
253
242
 
254
243
  # The cls is passed in via download_cls to avoid conflicting arg name with Generic.__new__
255
244
  # but needs to be changed to cls in the request options.
256
245
  self._request_options['cls'] = download_cls
257
246
 
247
+ # The service only provides transactional MD5s for chunks under 4MB.
248
+ # If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
249
+ # chunk so a transactional MD5 can be retrieved.
250
+ self._first_get_size = self._config.max_single_get_size if not self._validate_content \
251
+ else self._config.max_chunk_get_size
252
+
258
253
  def __len__(self):
259
254
  return self.size
260
255
 
@@ -277,19 +272,12 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
277
272
  if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
278
273
  await self._get_encryption_data_request()
279
274
 
280
- # The service only provides transactional MD5s for chunks under 4MB.
281
- # If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
282
- # chunk so a transactional MD5 can be retrieved.
283
- first_get_size = (
284
- self._config.max_single_get_size if not self._validate_content else self._config.max_chunk_get_size
285
- )
286
275
  initial_request_start = self._start_range if self._start_range is not None else 0
287
- if self._end_range is not None and self._end_range - initial_request_start < first_get_size:
276
+ if self._end_range is not None and self._end_range - self._start_range < self._first_get_size:
288
277
  initial_request_end = self._end_range
289
278
  else:
290
- initial_request_end = initial_request_start + first_get_size - 1
279
+ initial_request_end = initial_request_start + self._first_get_size - 1
291
280
 
292
- # pylint: disable-next=attribute-defined-outside-init
293
281
  self._initial_range, self._initial_offset = process_range_and_offset(
294
282
  initial_request_start,
295
283
  initial_request_end,
@@ -304,22 +292,39 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
304
292
  self.properties.name = self.name
305
293
  self.properties.container = self.container
306
294
 
307
- # Set the content length to the download size instead of the size of the last range
295
+ # Set the content length to the download size instead of the size of
296
+ # the last range
297
+ initial_size = self._response.properties.size
308
298
  self.properties.size = self.size
309
- self.properties.content_range = (f"bytes {self._download_start}-"
310
- f"{self._end_range if self._end_range is not None else self._file_size - 1}/"
311
- f"{self._file_size}")
299
+
300
+ # Overwrite the content range to the user requested range
301
+ self.properties.content_range = f'bytes {self._start_range}-{self._end_range}/{self._file_size}'
312
302
 
313
303
  # Overwrite the content MD5 as it is the MD5 for the last range instead
314
304
  # of the stored MD5
315
305
  # TODO: Set to the stored MD5 when the service returns this
316
306
  self.properties.content_md5 = None
317
307
 
318
- @property
319
- def _download_complete(self):
308
+ if self.size == 0:
309
+ self._current_content = b""
310
+ else:
311
+ self._current_content = await process_content(
312
+ self._response,
313
+ self._initial_offset[0],
314
+ self._initial_offset[1],
315
+ self._encryption_options
316
+ )
317
+
318
+ # If the file is small, the download is complete at this point.
319
+ # If file size is large, download the rest of the file in chunks.
320
+ # For encryption V2, calculate based on size of decrypted content, not download size.
320
321
  if is_encryption_v2(self._encryption_data):
321
- return self._download_offset >= self.size
322
- return self._raw_download_offset >= self.size
322
+ self._download_complete = len(self._current_content) >= self.size
323
+ else:
324
+ self._download_complete = initial_size >= self.size
325
+
326
+ if not self._download_complete and self._request_options.get("modified_access_conditions"):
327
+ self._request_options["modified_access_conditions"].if_match = self._response.properties.etag
323
328
 
324
329
  async def _initial_request(self):
325
330
  range_header, range_validation = validate_and_format_range_headers(
@@ -352,7 +357,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
352
357
 
353
358
  if self._end_range is not None:
354
359
  # Use the length unless it is over the end of the file
355
- self.size = min(self._file_size - self._start_range, self._end_range - self._start_range + 1)
360
+ self.size = min(self._file_size, self._end_range - self._start_range + 1)
356
361
  elif self._start_range is not None:
357
362
  self.size = self._file_size - self._start_range
358
363
  else:
@@ -378,18 +383,6 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
378
383
  else:
379
384
  process_storage_error(error)
380
385
 
381
- if self.size == 0:
382
- self._current_content = b""
383
- else:
384
- self._current_content = await process_content(
385
- response,
386
- self._initial_offset[0],
387
- self._initial_offset[1],
388
- self._encryption_options
389
- )
390
- self._download_offset += len(self._current_content)
391
- self._raw_download_offset += response.content_length
392
-
393
386
  # get page ranges to optimize downloading sparse page blob
394
387
  if response.properties.blob_type == 'PageBlob':
395
388
  try:
@@ -398,19 +391,22 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
398
391
  except HttpResponseError:
399
392
  pass
400
393
 
401
- if not self._download_complete and self._request_options.get("modified_access_conditions"):
402
- self._request_options["modified_access_conditions"].if_match = response.properties.etag
403
-
404
394
  return response
405
395
 
396
+ def _get_downloader_start_with_offset(self):
397
+ # Start where the initial request download ended
398
+ start = self._initial_range[1] + 1
399
+ # For encryption V2 only, adjust start to the end of the fetched data rather than download size
400
+ if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
401
+ start = (self._start_range or 0) + len(self._current_content)
402
+
403
+ # Adjust the start based on any data read past the current content
404
+ start += (self._offset - len(self._current_content))
405
+ return start
406
+
406
407
  def chunks(self):
407
408
  # type: () -> AsyncIterator[bytes]
408
- """
409
- Iterate over chunks in the download stream. Note, the iterator returned will
410
- iterate over the entire download content, regardless of any data that was
411
- previously read.
412
-
413
- NOTE: If the stream has been partially read, some data may be re-downloaded by the iterator.
409
+ """Iterate over chunks in the download stream.
414
410
 
415
411
  :returns: An async iterator of the chunks in the download stream.
416
412
  :rtype: AsyncIterator[bytes]
@@ -424,124 +420,79 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
424
420
  :dedent: 16
425
421
  :caption: Download a blob using chunks().
426
422
  """
427
- if self._text_mode:
428
- raise ValueError("Stream has been partially read in text mode. chunks is not supported in text mode.")
429
- if self._encoding:
430
- warnings.warn("Encoding is ignored with chunks as only bytes are supported.")
431
-
432
- iter_downloader = None
433
- # If we still have the first chunk buffered, use it. Otherwise, download all content again
434
- if not self._first_chunk or not self._download_complete:
435
- if self._first_chunk:
436
- start = self._download_start + len(self._current_content)
437
- current_progress = len(self._current_content)
438
- else:
439
- start = self._download_start
440
- current_progress = 0
441
-
442
- end = self._download_start + self.size
423
+ if self.size == 0 or self._download_complete:
424
+ iter_downloader = None
425
+ else:
426
+ data_end = self._file_size
427
+ data_start = self._initial_range[1] + 1 # Start where the first download ended
428
+ # For encryption, adjust start to the end of the fetched data rather than download size
429
+ if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
430
+ data_start = (self._start_range or 0) + len(self._current_content)
443
431
 
432
+ if self._end_range is not None:
433
+ # Use the length unless it is over the end of the file
434
+ data_end = min(self._file_size, self._end_range + 1)
444
435
  iter_downloader = _AsyncChunkDownloader(
445
436
  client=self._clients.blob,
446
437
  non_empty_ranges=self._non_empty_ranges,
447
438
  total_size=self.size,
448
439
  chunk_size=self._config.max_chunk_get_size,
449
- current_progress=current_progress,
450
- start_range=start,
451
- end_range=end,
440
+ current_progress=self._first_get_size,
441
+ start_range=data_start,
442
+ end_range=data_end,
443
+ stream=None,
444
+ parallel=False,
452
445
  validate_content=self._validate_content,
453
446
  encryption_options=self._encryption_options,
454
447
  encryption_data=self._encryption_data,
455
448
  use_location=self._location_mode,
456
- **self._request_options
457
- )
458
-
459
- initial_content = self._current_content if self._first_chunk else b''
449
+ **self._request_options)
460
450
  return _AsyncChunkIterator(
461
451
  size=self.size,
462
- content=initial_content,
452
+ content=self._current_content,
463
453
  downloader=iter_downloader,
464
454
  chunk_size=self._config.max_chunk_get_size)
465
455
 
466
- @overload
467
- async def read(self, size: int = -1) -> T:
468
- ...
469
-
470
- @overload
471
- async def read(self, *, chars: Optional[int] = None) -> T:
472
- ...
473
-
474
- # pylint: disable-next=too-many-statements,too-many-branches
475
- async def read(self, size: int = -1, *, chars: Optional[int] = None) -> T:
456
+ async def read(self, size: Optional[int] = -1) -> T:
476
457
  """
477
- Read the specified bytes or chars from the stream. If `encoding`
478
- was specified on `download_blob`, it is recommended to use the
479
- chars parameter to read a specific number of chars to avoid decoding
480
- errors. If size/chars is unspecified or negative all bytes will be read.
458
+ Read up to size bytes from the stream and return them. If size
459
+ is unspecified or is -1, all bytes will be read.
481
460
 
482
- :param int size:
461
+ :param Optional[int] size:
483
462
  The number of bytes to download from the stream. Leave unspecified
484
- or set negative to download all bytes.
485
- :keyword Optional[int] chars:
486
- The number of chars to download from the stream. Leave unspecified
487
- or set negative to download all chars. Note, this can only be used
488
- when encoding is specified on `download_blob`.
463
+ or set to -1 to download all bytes.
489
464
  :returns:
490
465
  The requested data as bytes or a string if encoding was specified. If
491
466
  the return value is empty, there is no more data to read.
492
467
  :rtype: T
493
468
  """
494
- if size > -1 and self._encoding:
495
- warnings.warn(
496
- "Size parameter specified with text encoding enabled. It is recommended to use chars "
497
- "to read a specific number of characters instead."
498
- )
499
- if size > -1 and chars is not None:
500
- raise ValueError("Cannot specify both size and chars.")
501
- if not self._encoding and chars is not None:
502
- raise ValueError("Must specify encoding to read chars.")
503
- if self._text_mode and size > -1:
504
- raise ValueError("Stream has been partially read in text mode. Please use chars.")
505
- if self._text_mode is False and chars is not None:
506
- raise ValueError("Stream has been partially read in bytes mode. Please use size.")
507
-
469
+ if size == -1:
470
+ return await self.readall()
508
471
  # Empty blob or already read to the end
509
- if (size == 0 or chars == 0 or
510
- (self._download_complete and self._current_content_offset >= len(self._current_content))):
472
+ if size == 0 or self._offset >= self.size:
511
473
  return b'' if not self._encoding else ''
512
474
 
513
- if not self._text_mode and chars is not None:
514
- self._text_mode = True
515
- self._decoder = codecs.getincrementaldecoder(self._encoding)('strict')
516
- self._current_content = self._decoder.decode(self._current_content, final=self._download_complete)
517
- elif self._text_mode is None:
518
- self._text_mode = False
519
-
520
- output_stream: Union[BytesIO, StringIO]
521
- if self._text_mode:
522
- output_stream = StringIO()
523
- size = chars if chars else sys.maxsize
524
- else:
525
- output_stream = BytesIO()
526
- size = size if size > 0 else sys.maxsize
527
- readall = size == sys.maxsize
528
- count = 0
529
-
530
- # Start by reading from current_content
531
- start = self._current_content_offset
532
- length = min(len(self._current_content) - self._current_content_offset, size - count)
533
- read = output_stream.write(self._current_content[start:start + length])
534
-
535
- count += read
536
- self._current_content_offset += read
537
- self._read_offset += read
538
- await self._check_and_report_progress()
539
-
540
- remaining = size - count
541
- if remaining > 0 and not self._download_complete:
542
- # Create a downloader than can download the rest of the file
543
- start = self._download_start + self._download_offset
544
- end = self._download_start + self.size
475
+ stream = BytesIO()
476
+ remaining_size = size
477
+
478
+ # Start by reading from current_content if there is data left
479
+ if self._offset < len(self._current_content):
480
+ start = self._offset
481
+ length = min(remaining_size, len(self._current_content) - self._offset)
482
+ read = stream.write(self._current_content[start:start + length])
483
+
484
+ remaining_size -= read
485
+ self._offset += read
486
+ if self._progress_hook:
487
+ await self._progress_hook(self._offset, self.size)
488
+
489
+ if remaining_size > 0:
490
+ start_range = self._get_downloader_start_with_offset()
491
+
492
+ # End is the min between the remaining size, the file size, and the end of the specified range
493
+ end_range = min(start_range + remaining_size, self._file_size)
494
+ if self._end_range is not None:
495
+ end_range = min(end_range, self._end_range + 1)
545
496
 
546
497
  parallel = self._max_concurrency > 1
547
498
  downloader = _AsyncChunkDownloader(
@@ -549,10 +500,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
549
500
  non_empty_ranges=self._non_empty_ranges,
550
501
  total_size=self.size,
551
502
  chunk_size=self._config.max_chunk_get_size,
552
- current_progress=self._read_offset,
553
- start_range=start,
554
- end_range=end,
555
- stream=output_stream,
503
+ current_progress=self._offset,
504
+ start_range=start_range,
505
+ end_range=end_range,
506
+ stream=stream,
556
507
  parallel=parallel,
557
508
  validate_content=self._validate_content,
558
509
  encryption_options=self._encryption_options,
@@ -561,74 +512,42 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
561
512
  progress_hook=self._progress_hook,
562
513
  **self._request_options
563
514
  )
564
- self._first_chunk = False
565
-
566
- # When reading all data, have the downloader read everything into the stream.
567
- # Else, read one chunk at a time (using the downloader as an iterator) until
568
- # the requested size is reached.
569
- chunks_iter = downloader.get_chunk_offsets()
570
- if readall and not self._text_mode:
571
- running_futures = [
572
- asyncio.ensure_future(downloader.process_chunk(d))
573
- for d in islice(chunks_iter, 0, self._max_concurrency)
574
- ]
575
- while running_futures:
576
- # Wait for some download to finish before adding a new one
577
- done, running_futures = await asyncio.wait(
578
- running_futures, return_when=asyncio.FIRST_COMPLETED)
579
- try:
580
- for task in done:
581
- task.result()
582
- except HttpResponseError as error:
583
- process_storage_error(error)
584
- try:
585
- for _ in range(0, len(done)):
586
- next_chunk = next(chunks_iter)
587
- running_futures.add(asyncio.ensure_future(downloader.process_chunk(next_chunk)))
588
- except StopIteration:
589
- break
590
-
591
- if running_futures:
592
- # Wait for the remaining downloads to finish
593
- done, _running_futures = await asyncio.wait(running_futures)
594
- try:
595
- for task in done:
596
- task.result()
597
- except HttpResponseError as error:
598
- process_storage_error(error)
599
-
600
- self._complete_read()
601
515
 
602
- else:
603
- while (chunk := next(chunks_iter, None)) is not None and remaining > 0:
604
- chunk_data, content_length = await downloader.yield_chunk(chunk)
605
- self._download_offset += len(chunk_data)
606
- self._raw_download_offset += content_length
607
- self._current_content = self._decoder.decode(
608
- chunk_data, final=self._download_complete) if self._text_mode else chunk_data
609
-
610
- if remaining < len(self._current_content):
611
- read = output_stream.write(self._current_content[:remaining])
612
- else:
613
- read = output_stream.write(self._current_content)
614
-
615
- self._current_content_offset = read
616
- self._read_offset += read
617
- remaining -= read
618
- await self._check_and_report_progress()
619
-
620
- data = output_stream.getvalue()
621
- if not self._text_mode and self._encoding:
622
- try:
623
- # This is technically incorrect to do, but we have it for backwards compatibility.
624
- data = data.decode(self._encoding)
625
- except UnicodeDecodeError:
626
- warnings.warn(
627
- "Encountered a decoding error while decoding blob data from a partial read. "
628
- "Try using the `chars` keyword instead to read in text mode."
629
- )
630
- raise
516
+ dl_tasks = downloader.get_chunk_offsets()
517
+ running_futures = [
518
+ asyncio.ensure_future(downloader.process_chunk(d))
519
+ for d in islice(dl_tasks, 0, self._max_concurrency)
520
+ ]
521
+ while running_futures:
522
+ # Wait for some download to finish before adding a new one
523
+ done, running_futures = await asyncio.wait(
524
+ running_futures, return_when=asyncio.FIRST_COMPLETED)
525
+ try:
526
+ for task in done:
527
+ task.result()
528
+ except HttpResponseError as error:
529
+ process_storage_error(error)
530
+ try:
531
+ for _ in range(0, len(done)):
532
+ next_chunk = next(dl_tasks)
533
+ running_futures.add(asyncio.ensure_future(downloader.process_chunk(next_chunk)))
534
+ except StopIteration:
535
+ break
536
+
537
+ if running_futures:
538
+ # Wait for the remaining downloads to finish
539
+ done, _running_futures = await asyncio.wait(running_futures)
540
+ try:
541
+ for task in done:
542
+ task.result()
543
+ except HttpResponseError as error:
544
+ process_storage_error(error)
631
545
 
546
+ self._offset += remaining_size
547
+
548
+ data = stream.getvalue()
549
+ if self._encoding:
550
+ return data.decode(self._encoding)
632
551
  return data
633
552
 
634
553
  async def readall(self) -> T:
@@ -639,7 +558,53 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
639
558
  :returns: The requested data as bytes or a string if encoding was specified.
640
559
  :rtype: T
641
560
  """
642
- return await self.read()
561
+ stream = BytesIO()
562
+ await self.readinto(stream)
563
+ data = stream.getvalue()
564
+ if self._encoding:
565
+ return data.decode(self._encoding)
566
+ return data
567
+
568
+ async def content_as_bytes(self, max_concurrency=1):
569
+ """DEPRECATED: Download the contents of this file.
570
+
571
+ This operation is blocking until all data is downloaded.
572
+
573
+ This method is deprecated, use func:`readall` instead.
574
+
575
+ :param int max_concurrency:
576
+ The number of parallel connections with which to download.
577
+ :returns: The contents of the file as bytes.
578
+ :rtype: bytes
579
+ """
580
+ warnings.warn(
581
+ "content_as_bytes is deprecated, use readall instead",
582
+ DeprecationWarning
583
+ )
584
+ self._max_concurrency = max_concurrency
585
+ return await self.readall()
586
+
587
+ async def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
588
+ """DEPRECATED: Download the contents of this blob, and decode as text.
589
+
590
+ This operation is blocking until all data is downloaded.
591
+
592
+ This method is deprecated, use func:`readall` instead.
593
+
594
+ :param int max_concurrency:
595
+ The number of parallel connections with which to download.
596
+ :param str encoding:
597
+ Test encoding to decode the downloaded bytes. Default is UTF-8.
598
+ :returns: The content of the file as a str.
599
+ :rtype: str
600
+ """
601
+ warnings.warn(
602
+ "content_as_text is deprecated, use readall instead",
603
+ DeprecationWarning
604
+ )
605
+ self._max_concurrency = max_concurrency
606
+ self._encoding = encoding
607
+ return await self.readall()
643
608
 
644
609
  async def readinto(self, stream: IO[bytes]) -> int:
645
610
  """Download the contents of this blob to a stream.
@@ -651,11 +616,6 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
651
616
  :returns: The number of bytes read.
652
617
  :rtype: int
653
618
  """
654
- if self._text_mode:
655
- raise ValueError("Stream has been partially read in text mode. readinto is not supported in text mode.")
656
- if self._encoding:
657
- warnings.warn("Encoding is ignored with readinto as only byte streams are supported.")
658
-
659
619
  # the stream must be seekable if parallel download is required
660
620
  parallel = self._max_concurrency > 1
661
621
  if parallel:
@@ -669,34 +629,35 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
669
629
  raise ValueError(error_message) from exc
670
630
 
671
631
  # If some data has been streamed using `read`, only stream the remaining data
672
- remaining_size = self.size - self._read_offset
632
+ remaining_size = self.size - self._offset
673
633
  # Already read to the end
674
634
  if remaining_size <= 0:
675
635
  return 0
676
636
 
677
- # Write the current content to the user stream
678
- current_remaining = len(self._current_content) - self._current_content_offset
679
- start = self._current_content_offset
680
- count = stream.write(self._current_content[start:start + current_remaining])
681
-
682
- self._current_content_offset += count
683
- self._read_offset += count
684
- if self._progress_hook:
685
- await self._progress_hook(self._read_offset, self.size)
637
+ # Write the content to the user stream if there is data left
638
+ if self._offset < len(self._current_content):
639
+ content = self._current_content[self._offset:]
640
+ stream.write(content)
641
+ self._offset += len(content)
642
+ if self._progress_hook:
643
+ await self._progress_hook(len(content), self.size)
686
644
 
687
- # If all the data was already downloaded/buffered
688
645
  if self._download_complete:
689
646
  return remaining_size
690
647
 
691
- data_start = self._download_start + self._read_offset
692
- data_end = self._download_start + self.size
648
+ data_end = self._file_size
649
+ if self._end_range is not None:
650
+ # Use the length unless it is over the end of the file
651
+ data_end = min(self._file_size, self._end_range + 1)
652
+
653
+ data_start = self._get_downloader_start_with_offset()
693
654
 
694
655
  downloader = _AsyncChunkDownloader(
695
656
  client=self._clients.blob,
696
657
  non_empty_ranges=self._non_empty_ranges,
697
658
  total_size=self.size,
698
659
  chunk_size=self._config.max_chunk_get_size,
699
- current_progress=self._read_offset,
660
+ current_progress=self._offset,
700
661
  start_range=data_start,
701
662
  end_range=data_end,
702
663
  stream=stream,
@@ -706,8 +667,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
706
667
  encryption_data=self._encryption_data,
707
668
  use_location=self._location_mode,
708
669
  progress_hook=self._progress_hook,
709
- **self._request_options
710
- )
670
+ **self._request_options)
711
671
 
712
672
  dl_tasks = downloader.get_chunk_offsets()
713
673
  running_futures = [
@@ -739,72 +699,8 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
739
699
  except HttpResponseError as error:
740
700
  process_storage_error(error)
741
701
 
742
- self._complete_read()
743
702
  return remaining_size
744
703
 
745
- def _complete_read(self):
746
- """Adjusts all offsets to the end of the download."""
747
- self._download_offset = self.size
748
- self._raw_download_offset = self.size
749
- self._read_offset = self.size
750
- self._current_content_offset = len(self._current_content)
751
-
752
- async def _check_and_report_progress(self):
753
- """Reports progress if necessary."""
754
- # Only report progress at the end of each chunk and use download_offset to always report
755
- # progress in terms of (approximate) byte count.
756
- if self._progress_hook and self._current_content_offset == len(self._current_content):
757
- await self._progress_hook(self._download_offset, self.size)
758
-
759
- async def content_as_bytes(self, max_concurrency=1):
760
- """DEPRECATED: Download the contents of this file.
761
-
762
- This operation is blocking until all data is downloaded.
763
-
764
- This method is deprecated, use func:`readall` instead.
765
-
766
- :param int max_concurrency:
767
- The number of parallel connections with which to download.
768
- :returns: The contents of the file as bytes.
769
- :rtype: bytes
770
- """
771
- warnings.warn(
772
- "content_as_bytes is deprecated, use readall instead",
773
- DeprecationWarning
774
- )
775
- if self._text_mode:
776
- raise ValueError("Stream has been partially read in text mode. "
777
- "content_as_bytes is not supported in text mode.")
778
-
779
- self._max_concurrency = max_concurrency
780
- return await self.readall()
781
-
782
- async def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
783
- """DEPRECATED: Download the contents of this blob, and decode as text.
784
-
785
- This operation is blocking until all data is downloaded.
786
-
787
- This method is deprecated, use func:`readall` instead.
788
-
789
- :param int max_concurrency:
790
- The number of parallel connections with which to download.
791
- :param str encoding:
792
- Test encoding to decode the downloaded bytes. Default is UTF-8.
793
- :returns: The content of the file as a str.
794
- :rtype: str
795
- """
796
- warnings.warn(
797
- "content_as_text is deprecated, use readall instead",
798
- DeprecationWarning
799
- )
800
- if self._text_mode:
801
- raise ValueError("Stream has been partially read in text mode. "
802
- "content_as_text is not supported in text mode.")
803
-
804
- self._max_concurrency = max_concurrency
805
- self._encoding = encoding
806
- return await self.readall()
807
-
808
704
  async def download_to_stream(self, stream, max_concurrency=1):
809
705
  """DEPRECATED: Download the contents of this blob to a stream.
810
706
 
@@ -823,10 +719,6 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
823
719
  "download_to_stream is deprecated, use readinto instead",
824
720
  DeprecationWarning
825
721
  )
826
- if self._text_mode:
827
- raise ValueError("Stream has been partially read in text mode. "
828
- "download_to_stream is not supported in text mode.")
829
-
830
722
  self._max_concurrency = max_concurrency
831
723
  await self.readinto(stream)
832
724
  return self.properties