azure-storage-blob 12.21.0b1__py3-none-any.whl → 12.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/storage/blob/__init__.py +19 -18
- azure/storage/blob/_blob_client.py +470 -1555
- azure/storage/blob/_blob_client_helpers.py +1242 -0
- azure/storage/blob/_blob_service_client.py +93 -112
- azure/storage/blob/_blob_service_client_helpers.py +27 -0
- azure/storage/blob/_container_client.py +176 -377
- azure/storage/blob/_container_client_helpers.py +266 -0
- azure/storage/blob/_deserialize.py +68 -44
- azure/storage/blob/_download.py +375 -241
- azure/storage/blob/_encryption.py +14 -7
- azure/storage/blob/_generated/_azure_blob_storage.py +2 -1
- azure/storage/blob/_generated/_serialization.py +2 -0
- azure/storage/blob/_generated/aio/_azure_blob_storage.py +2 -1
- azure/storage/blob/_generated/aio/operations/_append_blob_operations.py +1 -7
- azure/storage/blob/_generated/aio/operations/_blob_operations.py +21 -47
- azure/storage/blob/_generated/aio/operations/_block_blob_operations.py +2 -10
- azure/storage/blob/_generated/aio/operations/_container_operations.py +13 -26
- azure/storage/blob/_generated/aio/operations/_page_blob_operations.py +3 -14
- azure/storage/blob/_generated/aio/operations/_service_operations.py +14 -17
- azure/storage/blob/_generated/operations/_append_blob_operations.py +1 -7
- azure/storage/blob/_generated/operations/_blob_operations.py +21 -47
- azure/storage/blob/_generated/operations/_block_blob_operations.py +2 -10
- azure/storage/blob/_generated/operations/_container_operations.py +13 -26
- azure/storage/blob/_generated/operations/_page_blob_operations.py +3 -14
- azure/storage/blob/_generated/operations/_service_operations.py +14 -17
- azure/storage/blob/_generated/py.typed +1 -0
- azure/storage/blob/_lease.py +52 -63
- azure/storage/blob/_list_blobs_helper.py +129 -135
- azure/storage/blob/_models.py +480 -277
- azure/storage/blob/_quick_query_helper.py +30 -31
- azure/storage/blob/_serialize.py +39 -56
- azure/storage/blob/_shared/avro/datafile.py +1 -1
- azure/storage/blob/_shared/avro/datafile_async.py +1 -1
- azure/storage/blob/_shared/base_client.py +3 -1
- azure/storage/blob/_shared/base_client_async.py +1 -1
- azure/storage/blob/_shared/policies.py +16 -15
- azure/storage/blob/_shared/policies_async.py +21 -6
- azure/storage/blob/_shared/response_handlers.py +6 -2
- azure/storage/blob/_shared/shared_access_signature.py +21 -3
- azure/storage/blob/_shared/uploads.py +1 -1
- azure/storage/blob/_shared/uploads_async.py +1 -1
- azure/storage/blob/_shared_access_signature.py +110 -52
- azure/storage/blob/_upload_helpers.py +75 -68
- azure/storage/blob/_version.py +1 -1
- azure/storage/blob/aio/__init__.py +19 -11
- azure/storage/blob/aio/_blob_client_async.py +554 -301
- azure/storage/blob/aio/_blob_service_client_async.py +148 -97
- azure/storage/blob/aio/_container_client_async.py +289 -140
- azure/storage/blob/aio/_download_async.py +485 -337
- azure/storage/blob/aio/_lease_async.py +61 -60
- azure/storage/blob/aio/_list_blobs_helper.py +94 -96
- azure/storage/blob/aio/_models.py +60 -38
- azure/storage/blob/aio/_upload_helpers.py +75 -66
- {azure_storage_blob-12.21.0b1.dist-info → azure_storage_blob-12.23.0.dist-info}/METADATA +7 -7
- azure_storage_blob-12.23.0.dist-info/RECORD +84 -0
- {azure_storage_blob-12.21.0b1.dist-info → azure_storage_blob-12.23.0.dist-info}/WHEEL +1 -1
- azure/storage/blob/_generated/_vendor.py +0 -16
- azure_storage_blob-12.21.0b1.dist-info/RECORD +0 -81
- {azure_storage_blob-12.21.0b1.dist-info → azure_storage_blob-12.23.0.dist-info}/LICENSE +0 -0
- {azure_storage_blob-12.21.0b1.dist-info → azure_storage_blob-12.23.0.dist-info}/top_level.txt +0 -0
azure/storage/blob/_download.py
CHANGED
@@ -3,19 +3,23 @@
|
|
3
3
|
# Licensed under the MIT License. See License.txt in the project root for
|
4
4
|
# license information.
|
5
5
|
# --------------------------------------------------------------------------
|
6
|
-
|
6
|
+
import codecs
|
7
7
|
import sys
|
8
8
|
import threading
|
9
9
|
import time
|
10
10
|
import warnings
|
11
|
-
from io import BytesIO
|
12
|
-
from typing import
|
11
|
+
from io import BytesIO, StringIO
|
12
|
+
from typing import (
|
13
|
+
Any, Callable, cast, Dict, Generator,
|
14
|
+
Generic, IO, Iterator, List, Optional,
|
15
|
+
overload, Tuple, TypeVar, Union, TYPE_CHECKING
|
16
|
+
)
|
13
17
|
|
14
18
|
from azure.core.exceptions import DecodeError, HttpResponseError, IncompleteReadError
|
15
19
|
from azure.core.tracing.common import with_current_context
|
16
20
|
|
17
21
|
from ._shared.request_handlers import validate_and_format_range_headers
|
18
|
-
from ._shared.response_handlers import
|
22
|
+
from ._shared.response_handlers import parse_length_from_content_range, process_storage_error
|
19
23
|
from ._deserialize import deserialize_blob_properties, get_page_ranges_result
|
20
24
|
from ._encryption import (
|
21
25
|
adjust_blob_size_for_encryption,
|
@@ -25,10 +29,25 @@ from ._encryption import (
|
|
25
29
|
parse_encryption_data
|
26
30
|
)
|
27
31
|
|
32
|
+
if TYPE_CHECKING:
|
33
|
+
from codecs import IncrementalDecoder
|
34
|
+
from ._encryption import _EncryptionData
|
35
|
+
from ._generated import AzureBlobStorage
|
36
|
+
from ._generated.operations import BlobOperations
|
37
|
+
from ._models import BlobProperties
|
38
|
+
from ._shared.models import StorageConfiguration
|
39
|
+
|
40
|
+
|
28
41
|
T = TypeVar('T', bytes, str)
|
29
42
|
|
30
43
|
|
31
|
-
def process_range_and_offset(
|
44
|
+
def process_range_and_offset(
|
45
|
+
start_range: int,
|
46
|
+
end_range: int,
|
47
|
+
length: Optional[int],
|
48
|
+
encryption_options: Dict[str, Any],
|
49
|
+
encryption_data: Optional["_EncryptionData"]
|
50
|
+
) -> Tuple[Tuple[int, int], Tuple[int, int]]:
|
32
51
|
start_offset, end_offset = 0, 0
|
33
52
|
if encryption_options.get("key") is not None or encryption_options.get("resolver") is not None:
|
34
53
|
return get_adjusted_download_range_and_offset(
|
@@ -40,7 +59,7 @@ def process_range_and_offset(start_range, end_range, length, encryption_options,
|
|
40
59
|
return (start_range, end_range), (start_offset, end_offset)
|
41
60
|
|
42
61
|
|
43
|
-
def process_content(data, start_offset, end_offset, encryption):
|
62
|
+
def process_content(data: Any, start_offset: int, end_offset: int, encryption: Dict[str, Any]) -> bytes:
|
44
63
|
if data is None:
|
45
64
|
raise ValueError("Response cannot be None.")
|
46
65
|
|
@@ -49,7 +68,7 @@ def process_content(data, start_offset, end_offset, encryption):
|
|
49
68
|
if content and encryption.get("key") is not None or encryption.get("resolver") is not None:
|
50
69
|
try:
|
51
70
|
return decrypt_blob(
|
52
|
-
encryption.get("required"),
|
71
|
+
encryption.get("required") or False,
|
53
72
|
encryption.get("key"),
|
54
73
|
encryption.get("resolver"),
|
55
74
|
content,
|
@@ -65,21 +84,21 @@ def process_content(data, start_offset, end_offset, encryption):
|
|
65
84
|
class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
|
66
85
|
def __init__(
|
67
86
|
self,
|
68
|
-
client
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
progress_hook=None,
|
81
|
-
**kwargs
|
82
|
-
):
|
87
|
+
client: "BlobOperations",
|
88
|
+
total_size: int,
|
89
|
+
chunk_size: int,
|
90
|
+
current_progress: int,
|
91
|
+
start_range: int,
|
92
|
+
end_range: int,
|
93
|
+
validate_content: bool,
|
94
|
+
encryption_options: Dict[str, Any],
|
95
|
+
encryption_data: Optional["_EncryptionData"] = None,
|
96
|
+
stream: Any = None,
|
97
|
+
parallel: Optional[int] = None,
|
98
|
+
non_empty_ranges: Optional[List[Dict[str, Any]]] = None,
|
99
|
+
progress_hook: Optional[Callable[[int, Optional[int]], None]] = None,
|
100
|
+
**kwargs: Any
|
101
|
+
) -> None:
|
83
102
|
self.client = client
|
84
103
|
self.non_empty_ranges = non_empty_ranges
|
85
104
|
|
@@ -110,32 +129,32 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
|
|
110
129
|
self.validate_content = validate_content
|
111
130
|
self.request_options = kwargs
|
112
131
|
|
113
|
-
def _calculate_range(self, chunk_start):
|
132
|
+
def _calculate_range(self, chunk_start: int) -> Tuple[int, int]:
|
114
133
|
if chunk_start + self.chunk_size > self.end_index:
|
115
134
|
chunk_end = self.end_index
|
116
135
|
else:
|
117
136
|
chunk_end = chunk_start + self.chunk_size
|
118
137
|
return chunk_start, chunk_end
|
119
138
|
|
120
|
-
def get_chunk_offsets(self):
|
139
|
+
def get_chunk_offsets(self) -> Generator[int, None, None]:
|
121
140
|
index = self.start_index
|
122
141
|
while index < self.end_index:
|
123
142
|
yield index
|
124
143
|
index += self.chunk_size
|
125
144
|
|
126
|
-
def process_chunk(self, chunk_start):
|
145
|
+
def process_chunk(self, chunk_start: int) -> None:
|
127
146
|
chunk_start, chunk_end = self._calculate_range(chunk_start)
|
128
|
-
chunk_data = self._download_chunk(chunk_start, chunk_end - 1)
|
147
|
+
chunk_data, _ = self._download_chunk(chunk_start, chunk_end - 1)
|
129
148
|
length = chunk_end - chunk_start
|
130
149
|
if length > 0:
|
131
150
|
self._write_to_stream(chunk_data, chunk_start)
|
132
151
|
self._update_progress(length)
|
133
152
|
|
134
|
-
def yield_chunk(self, chunk_start):
|
153
|
+
def yield_chunk(self, chunk_start: int) -> Tuple[bytes, int]:
|
135
154
|
chunk_start, chunk_end = self._calculate_range(chunk_start)
|
136
155
|
return self._download_chunk(chunk_start, chunk_end - 1)
|
137
156
|
|
138
|
-
def _update_progress(self, length):
|
157
|
+
def _update_progress(self, length: int) -> None:
|
139
158
|
if self.progress_lock:
|
140
159
|
with self.progress_lock: # pylint: disable=not-context-manager
|
141
160
|
self.progress_total += length
|
@@ -145,7 +164,7 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
|
|
145
164
|
if self.progress_hook:
|
146
165
|
self.progress_hook(self.progress_total, self.total_size)
|
147
166
|
|
148
|
-
def _write_to_stream(self, chunk_data, chunk_start):
|
167
|
+
def _write_to_stream(self, chunk_data: bytes, chunk_start: int) -> None:
|
149
168
|
if self.stream_lock:
|
150
169
|
with self.stream_lock: # pylint: disable=not-context-manager
|
151
170
|
self.stream.seek(self.stream_start + (chunk_start - self.start_index))
|
@@ -153,7 +172,7 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
|
|
153
172
|
else:
|
154
173
|
self.stream.write(chunk_data)
|
155
174
|
|
156
|
-
def _do_optimize(self, given_range_start, given_range_end):
|
175
|
+
def _do_optimize(self, given_range_start: int, given_range_end: int) -> bool:
|
157
176
|
# If we have no page range list stored, then assume there's data everywhere for that page blob
|
158
177
|
# or it's a block blob or append blob
|
159
178
|
if self.non_empty_ranges is None:
|
@@ -178,7 +197,9 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
|
|
178
197
|
# Went through all src_ranges, but nothing overlapped. Optimization will be applied.
|
179
198
|
return True
|
180
199
|
|
181
|
-
def _download_chunk(self, chunk_start, chunk_end):
|
200
|
+
def _download_chunk(self, chunk_start: int, chunk_end: int) -> Tuple[bytes, int]:
|
201
|
+
if self.encryption_options is None:
|
202
|
+
raise ValueError("Required argument is missing: encryption_options")
|
182
203
|
download_range, offset = process_range_and_offset(
|
183
204
|
chunk_start, chunk_end, chunk_end, self.encryption_options, self.encryption_data
|
184
205
|
)
|
@@ -186,8 +207,8 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
|
|
186
207
|
# No need to download the empty chunk from server if there's no data in the chunk to be downloaded.
|
187
208
|
# Do optimize and create empty chunk locally if condition is met.
|
188
209
|
if self._do_optimize(download_range[0], download_range[1]):
|
189
|
-
|
190
|
-
chunk_data = b"\x00" *
|
210
|
+
content_length = download_range[1] - download_range[0] + 1
|
211
|
+
chunk_data = b"\x00" * content_length
|
191
212
|
else:
|
192
213
|
range_header, range_validation = validate_and_format_range_headers(
|
193
214
|
download_range[0],
|
@@ -198,6 +219,7 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
|
|
198
219
|
retry_active = True
|
199
220
|
retry_total = 3
|
200
221
|
while retry_active:
|
222
|
+
response: Any = None
|
201
223
|
try:
|
202
224
|
_, response = self.client.download(
|
203
225
|
range=range_header,
|
@@ -218,34 +240,35 @@ class _ChunkDownloader(object): # pylint: disable=too-many-instance-attributes
|
|
218
240
|
if retry_total <= 0:
|
219
241
|
raise HttpResponseError(error, error=error) from error
|
220
242
|
time.sleep(1)
|
243
|
+
content_length = response.content_length
|
221
244
|
|
222
245
|
# This makes sure that if_match is set so that we can validate
|
223
246
|
# that subsequent downloads are to an unmodified blob
|
224
247
|
if self.request_options.get("modified_access_conditions"):
|
225
248
|
self.request_options["modified_access_conditions"].if_match = response.properties.etag
|
226
249
|
|
227
|
-
return chunk_data
|
250
|
+
return chunk_data, content_length
|
228
251
|
|
229
252
|
|
230
253
|
class _ChunkIterator(object):
|
231
|
-
"""
|
254
|
+
"""Iterator for chunks in blob download stream."""
|
232
255
|
|
233
|
-
def __init__(self, size, content, downloader, chunk_size):
|
256
|
+
def __init__(self, size: int, content: bytes, downloader: Optional[_ChunkDownloader], chunk_size: int) -> None:
|
234
257
|
self.size = size
|
235
258
|
self._chunk_size = chunk_size
|
236
259
|
self._current_content = content
|
237
260
|
self._iter_downloader = downloader
|
238
|
-
self._iter_chunks = None
|
261
|
+
self._iter_chunks: Optional[Generator[int, None, None]] = None
|
239
262
|
self._complete = size == 0
|
240
263
|
|
241
|
-
def __len__(self):
|
264
|
+
def __len__(self) -> int:
|
242
265
|
return self.size
|
243
266
|
|
244
|
-
def __iter__(self):
|
267
|
+
def __iter__(self) -> Iterator[bytes]:
|
245
268
|
return self
|
246
269
|
|
247
270
|
# Iterate through responses.
|
248
|
-
def __next__(self):
|
271
|
+
def __next__(self) -> bytes:
|
249
272
|
if self._complete:
|
250
273
|
raise StopIteration("Download complete")
|
251
274
|
if not self._iter_downloader:
|
@@ -263,8 +286,8 @@ class _ChunkIterator(object):
|
|
263
286
|
return self._get_chunk_data()
|
264
287
|
|
265
288
|
try:
|
266
|
-
|
267
|
-
self._current_content += self._iter_downloader.yield_chunk(
|
289
|
+
next_chunk = next(self._iter_chunks)
|
290
|
+
self._current_content += self._iter_downloader.yield_chunk(next_chunk)[0]
|
268
291
|
except StopIteration as e:
|
269
292
|
self._complete = True
|
270
293
|
if self._current_content:
|
@@ -277,46 +300,46 @@ class _ChunkIterator(object):
|
|
277
300
|
|
278
301
|
next = __next__ # Python 2 compatibility.
|
279
302
|
|
280
|
-
def _get_chunk_data(self):
|
303
|
+
def _get_chunk_data(self) -> bytes:
|
281
304
|
chunk_data = self._current_content[: self._chunk_size]
|
282
305
|
self._current_content = self._current_content[self._chunk_size:]
|
283
306
|
return chunk_data
|
284
307
|
|
285
308
|
|
286
309
|
class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-attributes
|
287
|
-
"""
|
288
|
-
|
289
|
-
:ivar str name:
|
290
|
-
The name of the blob being downloaded.
|
291
|
-
:ivar str container:
|
292
|
-
The name of the container where the blob is.
|
293
|
-
:ivar ~azure.storage.blob.BlobProperties properties:
|
294
|
-
The properties of the blob being downloaded. If only a range of the data is being
|
295
|
-
downloaded, this will be reflected in the properties.
|
296
|
-
:ivar int size:
|
297
|
-
The size of the total data in the stream. This will be the byte range if specified,
|
298
|
-
otherwise the total size of the blob.
|
310
|
+
"""
|
311
|
+
A streaming object to download from Azure Storage.
|
299
312
|
"""
|
300
313
|
|
314
|
+
name: str
|
315
|
+
"""The name of the blob being downloaded."""
|
316
|
+
container: str
|
317
|
+
"""The name of the container where the blob is."""
|
318
|
+
properties: "BlobProperties"
|
319
|
+
"""The properties of the blob being downloaded. If only a range of the data is being
|
320
|
+
downloaded, this will be reflected in the properties."""
|
321
|
+
size: int
|
322
|
+
"""The size of the total data in the stream. This will be the byte range if specified,
|
323
|
+
otherwise the total size of the blob."""
|
324
|
+
|
301
325
|
def __init__(
|
302
326
|
self,
|
303
|
-
clients=None,
|
304
|
-
config=None,
|
305
|
-
start_range=None,
|
306
|
-
end_range=None,
|
307
|
-
validate_content=None,
|
308
|
-
encryption_options=None,
|
309
|
-
max_concurrency=1,
|
310
|
-
name=None,
|
311
|
-
container=None,
|
312
|
-
encoding=None,
|
313
|
-
download_cls=None,
|
314
|
-
**kwargs
|
315
|
-
):
|
327
|
+
clients: "AzureBlobStorage" = None, # type: ignore [assignment]
|
328
|
+
config: "StorageConfiguration" = None, # type: ignore [assignment]
|
329
|
+
start_range: Optional[int] = None,
|
330
|
+
end_range: Optional[int] = None,
|
331
|
+
validate_content: bool = None, # type: ignore [assignment]
|
332
|
+
encryption_options: Dict[str, Any] = None, # type: ignore [assignment]
|
333
|
+
max_concurrency: int = 1,
|
334
|
+
name: str = None, # type: ignore [assignment]
|
335
|
+
container: str = None, # type: ignore [assignment]
|
336
|
+
encoding: Optional[str] = None,
|
337
|
+
download_cls: Optional[Callable] = None,
|
338
|
+
**kwargs: Any
|
339
|
+
) -> None:
|
316
340
|
self.name = name
|
317
341
|
self.container = container
|
318
|
-
self.
|
319
|
-
self.size = None
|
342
|
+
self.size = 0
|
320
343
|
|
321
344
|
self._clients = clients
|
322
345
|
self._config = config
|
@@ -328,14 +351,27 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
328
351
|
self._encryption_options = encryption_options or {}
|
329
352
|
self._progress_hook = kwargs.pop('progress_hook', None)
|
330
353
|
self._request_options = kwargs
|
354
|
+
self._response = None
|
331
355
|
self._location_mode = None
|
332
|
-
self.
|
333
|
-
self.
|
334
|
-
self._file_size = None
|
356
|
+
self._current_content: Union[str, bytes] = b''
|
357
|
+
self._file_size = 0
|
335
358
|
self._non_empty_ranges = None
|
336
|
-
self.
|
337
|
-
|
338
|
-
|
359
|
+
self._encryption_data: Optional["_EncryptionData"] = None
|
360
|
+
|
361
|
+
# The content download offset, after any processing (decryption), in bytes
|
362
|
+
self._download_offset = 0
|
363
|
+
# The raw download offset, before processing (decryption), in bytes
|
364
|
+
self._raw_download_offset = 0
|
365
|
+
# The offset the stream has been read to in bytes or chars depending on mode
|
366
|
+
self._read_offset = 0
|
367
|
+
# The offset into current_content that has been consumed in bytes or chars depending on mode
|
368
|
+
self._current_content_offset = 0
|
369
|
+
|
370
|
+
self._text_mode: Optional[bool] = None
|
371
|
+
self._decoder: Optional["IncrementalDecoder"] = None
|
372
|
+
# Whether the current content is the first chunk of download content or not
|
373
|
+
self._first_chunk = True
|
374
|
+
self._download_start = self._start_range or 0
|
339
375
|
|
340
376
|
# The cls is passed in via download_cls to avoid conflicting arg name with Generic.__new__
|
341
377
|
# but needs to be changed to cls in the request options.
|
@@ -347,14 +383,14 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
347
383
|
# The service only provides transactional MD5s for chunks under 4MB.
|
348
384
|
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
|
349
385
|
# chunk so a transactional MD5 can be retrieved.
|
350
|
-
|
386
|
+
first_get_size = (
|
351
387
|
self._config.max_single_get_size if not self._validate_content else self._config.max_chunk_get_size
|
352
388
|
)
|
353
|
-
initial_request_start = self.
|
354
|
-
if self._end_range is not None and self._end_range -
|
389
|
+
initial_request_start = self._download_start
|
390
|
+
if self._end_range is not None and self._end_range - initial_request_start < first_get_size:
|
355
391
|
initial_request_end = self._end_range
|
356
392
|
else:
|
357
|
-
initial_request_end = initial_request_start +
|
393
|
+
initial_request_end = initial_request_start + first_get_size - 1
|
358
394
|
|
359
395
|
self._initial_range, self._initial_offset = process_range_and_offset(
|
360
396
|
initial_request_start,
|
@@ -365,32 +401,31 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
365
401
|
)
|
366
402
|
|
367
403
|
self._response = self._initial_request()
|
368
|
-
self.properties = self._response.properties
|
404
|
+
self.properties = cast("BlobProperties", self._response.properties)
|
369
405
|
self.properties.name = self.name
|
370
406
|
self.properties.container = self.container
|
371
407
|
|
372
|
-
# Set the content length to the download size instead of the size of
|
373
|
-
# the last range
|
408
|
+
# Set the content length to the download size instead of the size of the last range
|
374
409
|
self.properties.size = self.size
|
375
|
-
|
376
|
-
|
377
|
-
|
410
|
+
self.properties.content_range = (f"bytes {self._download_start}-"
|
411
|
+
f"{self._end_range if self._end_range is not None else self._file_size - 1}/"
|
412
|
+
f"{self._file_size}")
|
378
413
|
|
379
414
|
# Overwrite the content MD5 as it is the MD5 for the last range instead
|
380
415
|
# of the stored MD5
|
381
416
|
# TODO: Set to the stored MD5 when the service returns this
|
382
|
-
self.properties.content_md5 = None
|
417
|
+
self.properties.content_md5 = None # type: ignore [attr-defined]
|
383
418
|
|
384
419
|
def __len__(self):
|
385
420
|
return self.size
|
386
421
|
|
387
|
-
def _get_encryption_data_request(self):
|
422
|
+
def _get_encryption_data_request(self) -> None:
|
388
423
|
# Save current request cls
|
389
424
|
download_cls = self._request_options.pop('cls', None)
|
390
425
|
# Adjust cls for get_properties
|
391
426
|
self._request_options['cls'] = deserialize_blob_properties
|
392
427
|
|
393
|
-
properties = self._clients.blob.get_properties(**self._request_options)
|
428
|
+
properties = cast("BlobProperties", self._clients.blob.get_properties(**self._request_options))
|
394
429
|
# This will return None if there is no encryption metadata or there are parsing errors.
|
395
430
|
# That is acceptable here, the proper error will be caught and surfaced when attempting
|
396
431
|
# to decrypt the blob.
|
@@ -399,6 +434,12 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
399
434
|
# Restore cls for download
|
400
435
|
self._request_options['cls'] = download_cls
|
401
436
|
|
437
|
+
@property
|
438
|
+
def _download_complete(self):
|
439
|
+
if is_encryption_v2(self._encryption_data):
|
440
|
+
return self._download_offset >= self.size
|
441
|
+
return self._raw_download_offset >= self.size
|
442
|
+
|
402
443
|
def _initial_request(self):
|
403
444
|
range_header, range_validation = validate_and_format_range_headers(
|
404
445
|
self._initial_range[0],
|
@@ -412,14 +453,14 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
412
453
|
retry_total = 3
|
413
454
|
while retry_active:
|
414
455
|
try:
|
415
|
-
location_mode, response = self._clients.blob.download(
|
456
|
+
location_mode, response = cast(Tuple[Optional[str], Any], self._clients.blob.download(
|
416
457
|
range=range_header,
|
417
458
|
range_get_content_md5=range_validation,
|
418
459
|
validate_content=self._validate_content,
|
419
460
|
data_stream_total=None,
|
420
461
|
download_stream_current=0,
|
421
462
|
**self._request_options
|
422
|
-
)
|
463
|
+
))
|
423
464
|
|
424
465
|
# Check the location we read from to ensure we use the same one
|
425
466
|
# for subsequent requests.
|
@@ -433,9 +474,9 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
433
474
|
# Remove any extra encryption data size from blob size
|
434
475
|
self._file_size = adjust_blob_size_for_encryption(self._file_size, self._encryption_data)
|
435
476
|
|
436
|
-
if self._end_range is not None:
|
477
|
+
if self._end_range is not None and self._start_range is not None:
|
437
478
|
# Use the end range index unless it is over the end of the file
|
438
|
-
self.size = min(self._file_size, self._end_range - self._start_range + 1)
|
479
|
+
self.size = min(self._file_size - self._start_range, self._end_range - self._start_range + 1)
|
439
480
|
elif self._start_range is not None:
|
440
481
|
self.size = self._file_size - self._start_range
|
441
482
|
else:
|
@@ -478,6 +519,8 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
478
519
|
if retry_total <= 0:
|
479
520
|
raise HttpResponseError(error, error=error) from error
|
480
521
|
time.sleep(1)
|
522
|
+
self._download_offset += len(self._current_content)
|
523
|
+
self._raw_download_offset += response.content_length
|
481
524
|
|
482
525
|
# get page ranges to optimize downloading sparse page blob
|
483
526
|
if response.properties.blob_type == 'PageBlob':
|
@@ -491,33 +534,18 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
491
534
|
except HttpResponseError:
|
492
535
|
pass
|
493
536
|
|
494
|
-
# If the file is small, the download is complete at this point.
|
495
|
-
# If file size is large, download the rest of the file in chunks.
|
496
|
-
# For encryption V2, calculate based on size of decrypted content, not download size.
|
497
|
-
if is_encryption_v2(self._encryption_data):
|
498
|
-
self._download_complete = len(self._current_content) >= self.size
|
499
|
-
else:
|
500
|
-
self._download_complete = response.properties.size >= self.size
|
501
|
-
|
502
537
|
if not self._download_complete and self._request_options.get("modified_access_conditions"):
|
503
538
|
self._request_options["modified_access_conditions"].if_match = response.properties.etag
|
504
539
|
|
505
540
|
return response
|
506
541
|
|
507
|
-
def
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
start = (self._start_range or 0) + len(self._current_content)
|
513
|
-
|
514
|
-
# Adjust the start based on any data read past the current content
|
515
|
-
start += (self._offset - len(self._current_content))
|
516
|
-
return start
|
542
|
+
def chunks(self) -> Iterator[bytes]:
|
543
|
+
"""
|
544
|
+
Iterate over chunks in the download stream. Note, the iterator returned will
|
545
|
+
iterate over the entire download content, regardless of any data that was
|
546
|
+
previously read.
|
517
547
|
|
518
|
-
|
519
|
-
# type: () -> Iterator[bytes]
|
520
|
-
"""Iterate over chunks in the download stream.
|
548
|
+
NOTE: If the stream has been partially read, some data may be re-downloaded by the iterator.
|
521
549
|
|
522
550
|
:returns: An iterator of the chunks in the download stream.
|
523
551
|
:rtype: Iterator[bytes]
|
@@ -531,81 +559,125 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
531
559
|
:dedent: 12
|
532
560
|
:caption: Download a blob using chunks().
|
533
561
|
"""
|
534
|
-
if self.
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
562
|
+
if self._text_mode:
|
563
|
+
raise ValueError("Stream has been partially read in text mode. chunks is not supported in text mode.")
|
564
|
+
if self._encoding:
|
565
|
+
warnings.warn("Encoding is ignored with chunks as only bytes are supported.")
|
566
|
+
|
567
|
+
iter_downloader = None
|
568
|
+
# If we still have the first chunk buffered, use it. Otherwise, download all content again
|
569
|
+
if not self._first_chunk or not self._download_complete:
|
570
|
+
if self._first_chunk:
|
571
|
+
start = self._download_start + len(self._current_content)
|
572
|
+
current_progress = len(self._current_content)
|
573
|
+
else:
|
574
|
+
start = self._download_start
|
575
|
+
current_progress = 0
|
541
576
|
|
542
|
-
|
543
|
-
# For encryption, adjust start to the end of the fetched data rather than download size
|
544
|
-
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
|
545
|
-
data_start = (self._start_range or 0) + len(self._current_content)
|
577
|
+
end = self._download_start + self.size
|
546
578
|
|
547
579
|
iter_downloader = _ChunkDownloader(
|
548
580
|
client=self._clients.blob,
|
549
581
|
non_empty_ranges=self._non_empty_ranges,
|
550
582
|
total_size=self.size,
|
551
583
|
chunk_size=self._config.max_chunk_get_size,
|
552
|
-
current_progress=
|
553
|
-
start_range=
|
554
|
-
end_range=
|
555
|
-
stream=None,
|
556
|
-
parallel=False,
|
584
|
+
current_progress=current_progress,
|
585
|
+
start_range=start,
|
586
|
+
end_range=end,
|
557
587
|
validate_content=self._validate_content,
|
558
588
|
encryption_options=self._encryption_options,
|
559
589
|
encryption_data=self._encryption_data,
|
560
590
|
use_location=self._location_mode,
|
561
591
|
**self._request_options
|
562
592
|
)
|
593
|
+
|
594
|
+
initial_content = self._current_content if self._first_chunk else b''
|
563
595
|
return _ChunkIterator(
|
564
596
|
size=self.size,
|
565
|
-
content=
|
597
|
+
content=cast(bytes, initial_content),
|
566
598
|
downloader=iter_downloader,
|
567
599
|
chunk_size=self._config.max_chunk_get_size)
|
568
600
|
|
569
|
-
|
601
|
+
@overload
|
602
|
+
def read(self, size: int = -1) -> T:
|
603
|
+
...
|
604
|
+
|
605
|
+
@overload
|
606
|
+
def read(self, *, chars: Optional[int] = None) -> T:
|
607
|
+
...
|
608
|
+
|
609
|
+
# pylint: disable-next=too-many-statements,too-many-branches
|
610
|
+
def read(self, size: int = -1, *, chars: Optional[int] = None) -> T:
|
570
611
|
"""
|
571
|
-
Read
|
572
|
-
|
612
|
+
Read the specified bytes or chars from the stream. If `encoding`
|
613
|
+
was specified on `download_blob`, it is recommended to use the
|
614
|
+
chars parameter to read a specific number of chars to avoid decoding
|
615
|
+
errors. If size/chars is unspecified or negative all bytes will be read.
|
573
616
|
|
574
|
-
:param
|
617
|
+
:param int size:
|
575
618
|
The number of bytes to download from the stream. Leave unspecified
|
576
|
-
or set
|
619
|
+
or set negative to download all bytes.
|
620
|
+
:keyword Optional[int] chars:
|
621
|
+
The number of chars to download from the stream. Leave unspecified
|
622
|
+
or set negative to download all chars. Note, this can only be used
|
623
|
+
when encoding is specified on `download_blob`.
|
577
624
|
:returns:
|
578
625
|
The requested data as bytes or a string if encoding was specified. If
|
579
626
|
the return value is empty, there is no more data to read.
|
580
627
|
:rtype: T
|
581
628
|
"""
|
582
|
-
if size
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
read = stream.write(self._current_content[start:start + length])
|
596
|
-
|
597
|
-
remaining_size -= read
|
598
|
-
self._offset += read
|
599
|
-
if self._progress_hook:
|
600
|
-
self._progress_hook(self._offset, self.size)
|
601
|
-
|
602
|
-
if remaining_size > 0:
|
603
|
-
start_range = self._get_downloader_start_with_offset()
|
629
|
+
if size > -1 and self._encoding:
|
630
|
+
warnings.warn(
|
631
|
+
"Size parameter specified with text encoding enabled. It is recommended to use chars "
|
632
|
+
"to read a specific number of characters instead."
|
633
|
+
)
|
634
|
+
if size > -1 and chars is not None:
|
635
|
+
raise ValueError("Cannot specify both size and chars.")
|
636
|
+
if not self._encoding and chars is not None:
|
637
|
+
raise ValueError("Must specify encoding to read chars.")
|
638
|
+
if self._text_mode and size > -1:
|
639
|
+
raise ValueError("Stream has been partially read in text mode. Please use chars.")
|
640
|
+
if self._text_mode is False and chars is not None:
|
641
|
+
raise ValueError("Stream has been partially read in bytes mode. Please use size.")
|
604
642
|
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
643
|
+
# Empty blob or already read to the end
|
644
|
+
if (size == 0 or chars == 0 or
|
645
|
+
(self._download_complete and self._current_content_offset >= len(self._current_content))):
|
646
|
+
return b'' if not self._encoding else '' # type: ignore [return-value]
|
647
|
+
|
648
|
+
if not self._text_mode and chars is not None and self._encoding is not None:
|
649
|
+
self._text_mode = True
|
650
|
+
self._decoder = codecs.getincrementaldecoder(self._encoding)('strict')
|
651
|
+
self._current_content = self._decoder.decode(
|
652
|
+
cast(bytes, self._current_content), final=self._download_complete)
|
653
|
+
elif self._text_mode is None:
|
654
|
+
self._text_mode = False
|
655
|
+
|
656
|
+
output_stream: Union[BytesIO, StringIO]
|
657
|
+
if self._text_mode:
|
658
|
+
output_stream = StringIO()
|
659
|
+
size = chars if chars else sys.maxsize
|
660
|
+
else:
|
661
|
+
output_stream = BytesIO()
|
662
|
+
size = size if size > 0 else sys.maxsize
|
663
|
+
readall = size == sys.maxsize
|
664
|
+
count = 0
|
665
|
+
|
666
|
+
# Start by reading from current_content
|
667
|
+
start = self._current_content_offset
|
668
|
+
length = min(len(self._current_content) - self._current_content_offset, size - count)
|
669
|
+
read = output_stream.write(self._current_content[start:start + length]) # type: ignore [arg-type]
|
670
|
+
|
671
|
+
count += read
|
672
|
+
self._current_content_offset += read
|
673
|
+
self._read_offset += read
|
674
|
+
self._check_and_report_progress()
|
675
|
+
|
676
|
+
remaining = size - count
|
677
|
+
if remaining > 0 and not self._download_complete:
|
678
|
+
# Create a downloader than can download the rest of the file
|
679
|
+
start = self._download_start + self._download_offset
|
680
|
+
end = self._download_start + self.size
|
609
681
|
|
610
682
|
parallel = self._max_concurrency > 1
|
611
683
|
downloader = _ChunkDownloader(
|
@@ -613,10 +685,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
613
685
|
non_empty_ranges=self._non_empty_ranges,
|
614
686
|
total_size=self.size,
|
615
687
|
chunk_size=self._config.max_chunk_get_size,
|
616
|
-
current_progress=self.
|
617
|
-
start_range=
|
618
|
-
end_range=
|
619
|
-
stream=
|
688
|
+
current_progress=self._read_offset,
|
689
|
+
start_range=start,
|
690
|
+
end_range=end,
|
691
|
+
stream=output_stream,
|
620
692
|
parallel=parallel,
|
621
693
|
validate_content=self._validate_content,
|
622
694
|
encryption_options=self._encryption_options,
|
@@ -625,24 +697,60 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
625
697
|
progress_hook=self._progress_hook,
|
626
698
|
**self._request_options
|
627
699
|
)
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
700
|
+
self._first_chunk = False
|
701
|
+
|
702
|
+
# When reading all data, have the downloader read everything into the stream.
|
703
|
+
# Else, read one chunk at a time (using the downloader as an iterator) until
|
704
|
+
# the requested size is reached.
|
705
|
+
chunks_iter = downloader.get_chunk_offsets()
|
706
|
+
if readall and not self._text_mode:
|
707
|
+
# Only do parallel if there is more than one chunk left to download
|
708
|
+
if parallel and (self.size - self._download_offset) > self._config.max_chunk_get_size:
|
709
|
+
import concurrent.futures
|
710
|
+
with concurrent.futures.ThreadPoolExecutor(self._max_concurrency) as executor:
|
711
|
+
list(executor.map(
|
633
712
|
with_current_context(downloader.process_chunk),
|
634
713
|
downloader.get_chunk_offsets()
|
635
714
|
))
|
636
|
-
|
637
|
-
|
638
|
-
|
715
|
+
else:
|
716
|
+
for next_chunk in chunks_iter:
|
717
|
+
downloader.process_chunk(next_chunk)
|
639
718
|
|
640
|
-
|
719
|
+
self._complete_read()
|
641
720
|
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
721
|
+
else:
|
722
|
+
while (chunk := next(chunks_iter, None)) is not None and remaining > 0:
|
723
|
+
chunk_data, content_length = downloader.yield_chunk(chunk)
|
724
|
+
self._download_offset += len(chunk_data)
|
725
|
+
self._raw_download_offset += content_length
|
726
|
+
if self._text_mode and self._decoder is not None:
|
727
|
+
self._current_content = self._decoder.decode(chunk_data, final=self._download_complete)
|
728
|
+
else:
|
729
|
+
self._current_content = chunk_data
|
730
|
+
|
731
|
+
if remaining < len(self._current_content):
|
732
|
+
read = output_stream.write(self._current_content[:remaining]) # type: ignore [arg-type]
|
733
|
+
else:
|
734
|
+
read = output_stream.write(self._current_content) # type: ignore [arg-type]
|
735
|
+
|
736
|
+
self._current_content_offset = read
|
737
|
+
self._read_offset += read
|
738
|
+
remaining -= read
|
739
|
+
self._check_and_report_progress()
|
740
|
+
|
741
|
+
data = output_stream.getvalue()
|
742
|
+
if not self._text_mode and self._encoding:
|
743
|
+
try:
|
744
|
+
# This is technically incorrect to do, but we have it for backwards compatibility.
|
745
|
+
data = cast(bytes, data).decode(self._encoding)
|
746
|
+
except UnicodeDecodeError:
|
747
|
+
warnings.warn(
|
748
|
+
"Encountered a decoding error while decoding blob data from a partial read. "
|
749
|
+
"Try using the `chars` keyword instead to read in text mode."
|
750
|
+
)
|
751
|
+
raise
|
752
|
+
|
753
|
+
return data # type: ignore [return-value]
|
646
754
|
|
647
755
|
def readall(self) -> T:
|
648
756
|
"""
|
@@ -652,53 +760,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
652
760
|
:returns: The requested data as bytes or a string if encoding was specified.
|
653
761
|
:rtype: T
|
654
762
|
"""
|
655
|
-
|
656
|
-
self.readinto(stream)
|
657
|
-
data = stream.getvalue()
|
658
|
-
if self._encoding:
|
659
|
-
return data.decode(self._encoding)
|
660
|
-
return data
|
661
|
-
|
662
|
-
def content_as_bytes(self, max_concurrency=1):
|
663
|
-
"""DEPRECATED: Download the contents of this file.
|
664
|
-
|
665
|
-
This operation is blocking until all data is downloaded.
|
666
|
-
|
667
|
-
This method is deprecated, use func:`readall` instead.
|
668
|
-
|
669
|
-
:param int max_concurrency:
|
670
|
-
The number of parallel connections with which to download.
|
671
|
-
:returns: The contents of the file as bytes.
|
672
|
-
:rtype: bytes
|
673
|
-
"""
|
674
|
-
warnings.warn(
|
675
|
-
"content_as_bytes is deprecated, use readall instead",
|
676
|
-
DeprecationWarning
|
677
|
-
)
|
678
|
-
self._max_concurrency = max_concurrency
|
679
|
-
return self.readall()
|
680
|
-
|
681
|
-
def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
|
682
|
-
"""DEPRECATED: Download the contents of this blob, and decode as text.
|
683
|
-
|
684
|
-
This operation is blocking until all data is downloaded.
|
685
|
-
|
686
|
-
This method is deprecated, use func:`readall` instead.
|
687
|
-
|
688
|
-
:param int max_concurrency:
|
689
|
-
The number of parallel connections with which to download.
|
690
|
-
:param str encoding:
|
691
|
-
Test encoding to decode the downloaded bytes. Default is UTF-8.
|
692
|
-
:returns: The content of the file as a str.
|
693
|
-
:rtype: str
|
694
|
-
"""
|
695
|
-
warnings.warn(
|
696
|
-
"content_as_text is deprecated, use readall instead",
|
697
|
-
DeprecationWarning
|
698
|
-
)
|
699
|
-
self._max_concurrency = max_concurrency
|
700
|
-
self._encoding = encoding
|
701
|
-
return self.readall()
|
763
|
+
return self.read()
|
702
764
|
|
703
765
|
def readinto(self, stream: IO[bytes]) -> int:
|
704
766
|
"""Download the contents of this file to a stream.
|
@@ -710,6 +772,11 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
710
772
|
:returns: The number of bytes read.
|
711
773
|
:rtype: int
|
712
774
|
"""
|
775
|
+
if self._text_mode:
|
776
|
+
raise ValueError("Stream has been partially read in text mode. readinto is not supported in text mode.")
|
777
|
+
if self._encoding:
|
778
|
+
warnings.warn("Encoding is ignored with readinto as only byte streams are supported.")
|
779
|
+
|
713
780
|
# The stream must be seekable if parallel download is required
|
714
781
|
parallel = self._max_concurrency > 1
|
715
782
|
if parallel:
|
@@ -723,35 +790,34 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
723
790
|
raise ValueError(error_message) from exc
|
724
791
|
|
725
792
|
# If some data has been streamed using `read`, only stream the remaining data
|
726
|
-
remaining_size = self.size - self.
|
793
|
+
remaining_size = self.size - self._read_offset
|
727
794
|
# Already read to the end
|
728
795
|
if remaining_size <= 0:
|
729
796
|
return 0
|
730
797
|
|
731
|
-
# Write the content to the user stream
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
798
|
+
# Write the current content to the user stream
|
799
|
+
current_remaining = len(self._current_content) - self._current_content_offset
|
800
|
+
start = self._current_content_offset
|
801
|
+
count = stream.write(cast(bytes, self._current_content[start:start + current_remaining]))
|
802
|
+
|
803
|
+
self._current_content_offset += count
|
804
|
+
self._read_offset += count
|
805
|
+
if self._progress_hook:
|
806
|
+
self._progress_hook(self._read_offset, self.size)
|
738
807
|
|
808
|
+
# If all the data was already downloaded/buffered
|
739
809
|
if self._download_complete:
|
740
810
|
return remaining_size
|
741
811
|
|
742
|
-
|
743
|
-
|
744
|
-
# Use the length unless it is over the end of the file
|
745
|
-
data_end = min(self._file_size, self._end_range + 1)
|
746
|
-
|
747
|
-
data_start = self._get_downloader_start_with_offset()
|
812
|
+
data_start = self._download_start + self._read_offset
|
813
|
+
data_end = self._download_start + self.size
|
748
814
|
|
749
815
|
downloader = _ChunkDownloader(
|
750
816
|
client=self._clients.blob,
|
751
817
|
non_empty_ranges=self._non_empty_ranges,
|
752
818
|
total_size=self.size,
|
753
819
|
chunk_size=self._config.max_chunk_get_size,
|
754
|
-
current_progress=self.
|
820
|
+
current_progress=self._read_offset,
|
755
821
|
start_range=data_start,
|
756
822
|
end_range=data_end,
|
757
823
|
stream=stream,
|
@@ -774,8 +840,72 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
774
840
|
for chunk in downloader.get_chunk_offsets():
|
775
841
|
downloader.process_chunk(chunk)
|
776
842
|
|
843
|
+
self._complete_read()
|
777
844
|
return remaining_size
|
778
845
|
|
846
|
+
def _complete_read(self):
|
847
|
+
"""Adjusts all offsets to the end of the download."""
|
848
|
+
self._download_offset = self.size
|
849
|
+
self._raw_download_offset = self.size
|
850
|
+
self._read_offset = self.size
|
851
|
+
self._current_content_offset = len(self._current_content)
|
852
|
+
|
853
|
+
def _check_and_report_progress(self):
|
854
|
+
"""Reports progress if necessary."""
|
855
|
+
# Only report progress at the end of each chunk and use download_offset to always report
|
856
|
+
# progress in terms of (approximate) byte count.
|
857
|
+
if self._progress_hook and self._current_content_offset == len(self._current_content):
|
858
|
+
self._progress_hook(self._download_offset, self.size)
|
859
|
+
|
860
|
+
def content_as_bytes(self, max_concurrency=1):
|
861
|
+
"""DEPRECATED: Download the contents of this file.
|
862
|
+
|
863
|
+
This operation is blocking until all data is downloaded.
|
864
|
+
|
865
|
+
This method is deprecated, use func:`readall` instead.
|
866
|
+
|
867
|
+
:param int max_concurrency:
|
868
|
+
The number of parallel connections with which to download.
|
869
|
+
:returns: The contents of the file as bytes.
|
870
|
+
:rtype: bytes
|
871
|
+
"""
|
872
|
+
warnings.warn(
|
873
|
+
"content_as_bytes is deprecated, use readall instead",
|
874
|
+
DeprecationWarning
|
875
|
+
)
|
876
|
+
if self._text_mode:
|
877
|
+
raise ValueError("Stream has been partially read in text mode. "
|
878
|
+
"content_as_bytes is not supported in text mode.")
|
879
|
+
|
880
|
+
self._max_concurrency = max_concurrency
|
881
|
+
return self.readall()
|
882
|
+
|
883
|
+
def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
|
884
|
+
"""DEPRECATED: Download the contents of this blob, and decode as text.
|
885
|
+
|
886
|
+
This operation is blocking until all data is downloaded.
|
887
|
+
|
888
|
+
This method is deprecated, use func:`readall` instead.
|
889
|
+
|
890
|
+
:param int max_concurrency:
|
891
|
+
The number of parallel connections with which to download.
|
892
|
+
:param str encoding:
|
893
|
+
Test encoding to decode the downloaded bytes. Default is UTF-8.
|
894
|
+
:returns: The content of the file as a str.
|
895
|
+
:rtype: str
|
896
|
+
"""
|
897
|
+
warnings.warn(
|
898
|
+
"content_as_text is deprecated, use readall instead",
|
899
|
+
DeprecationWarning
|
900
|
+
)
|
901
|
+
if self._text_mode:
|
902
|
+
raise ValueError("Stream has been partially read in text mode. "
|
903
|
+
"content_as_text is not supported in text mode.")
|
904
|
+
|
905
|
+
self._max_concurrency = max_concurrency
|
906
|
+
self._encoding = encoding
|
907
|
+
return self.readall()
|
908
|
+
|
779
909
|
def download_to_stream(self, stream, max_concurrency=1):
|
780
910
|
"""DEPRECATED: Download the contents of this blob to a stream.
|
781
911
|
|
@@ -794,6 +924,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
794
924
|
"download_to_stream is deprecated, use readinto instead",
|
795
925
|
DeprecationWarning
|
796
926
|
)
|
927
|
+
if self._text_mode:
|
928
|
+
raise ValueError("Stream has been partially read in text mode. "
|
929
|
+
"download_to_stream is not supported in text mode.")
|
930
|
+
|
797
931
|
self._max_concurrency = max_concurrency
|
798
932
|
self.readinto(stream)
|
799
933
|
return self.properties
|