azure-storage-blob 12.21.0b1__py3-none-any.whl → 12.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/storage/blob/__init__.py +19 -18
- azure/storage/blob/_blob_client.py +470 -1555
- azure/storage/blob/_blob_client_helpers.py +1242 -0
- azure/storage/blob/_blob_service_client.py +93 -112
- azure/storage/blob/_blob_service_client_helpers.py +27 -0
- azure/storage/blob/_container_client.py +169 -376
- azure/storage/blob/_container_client_helpers.py +261 -0
- azure/storage/blob/_deserialize.py +68 -44
- azure/storage/blob/_download.py +375 -241
- azure/storage/blob/_encryption.py +14 -7
- azure/storage/blob/_generated/py.typed +1 -0
- azure/storage/blob/_lease.py +52 -63
- azure/storage/blob/_list_blobs_helper.py +129 -135
- azure/storage/blob/_models.py +480 -277
- azure/storage/blob/_quick_query_helper.py +30 -31
- azure/storage/blob/_serialize.py +38 -56
- azure/storage/blob/_shared/avro/datafile.py +1 -1
- azure/storage/blob/_shared/avro/datafile_async.py +1 -1
- azure/storage/blob/_shared/base_client.py +1 -1
- azure/storage/blob/_shared/base_client_async.py +1 -1
- azure/storage/blob/_shared/policies.py +8 -6
- azure/storage/blob/_shared/policies_async.py +3 -1
- azure/storage/blob/_shared/response_handlers.py +6 -2
- azure/storage/blob/_shared/shared_access_signature.py +2 -2
- azure/storage/blob/_shared/uploads.py +1 -1
- azure/storage/blob/_shared/uploads_async.py +1 -1
- azure/storage/blob/_shared_access_signature.py +70 -53
- azure/storage/blob/_upload_helpers.py +75 -68
- azure/storage/blob/_version.py +1 -1
- azure/storage/blob/aio/__init__.py +19 -11
- azure/storage/blob/aio/_blob_client_async.py +554 -301
- azure/storage/blob/aio/_blob_service_client_async.py +148 -97
- azure/storage/blob/aio/_container_client_async.py +282 -139
- azure/storage/blob/aio/_download_async.py +408 -283
- azure/storage/blob/aio/_lease_async.py +61 -60
- azure/storage/blob/aio/_list_blobs_helper.py +94 -96
- azure/storage/blob/aio/_models.py +60 -38
- azure/storage/blob/aio/_upload_helpers.py +75 -66
- {azure_storage_blob-12.21.0b1.dist-info → azure_storage_blob-12.22.0.dist-info}/METADATA +7 -7
- {azure_storage_blob-12.21.0b1.dist-info → azure_storage_blob-12.22.0.dist-info}/RECORD +43 -39
- {azure_storage_blob-12.21.0b1.dist-info → azure_storage_blob-12.22.0.dist-info}/WHEEL +1 -1
- {azure_storage_blob-12.21.0b1.dist-info → azure_storage_blob-12.22.0.dist-info}/LICENSE +0 -0
- {azure_storage_blob-12.21.0b1.dist-info → azure_storage_blob-12.22.0.dist-info}/top_level.txt +0 -0
@@ -4,19 +4,25 @@
|
|
4
4
|
# license information.
|
5
5
|
# --------------------------------------------------------------------------
|
6
6
|
# pylint: disable=invalid-overridden-method
|
7
|
+
# mypy: disable-error-code=override
|
7
8
|
|
9
|
+
import asyncio
|
10
|
+
import codecs
|
8
11
|
import sys
|
9
12
|
import warnings
|
10
|
-
from io import BytesIO
|
13
|
+
from io import BytesIO, StringIO
|
11
14
|
from itertools import islice
|
12
|
-
from typing import
|
13
|
-
|
14
|
-
|
15
|
+
from typing import (
|
16
|
+
Any, AsyncIterator, Awaitable,
|
17
|
+
Generator, Callable, cast, Dict,
|
18
|
+
Generic, IO, Optional, overload,
|
19
|
+
Tuple, TypeVar, Union, TYPE_CHECKING
|
20
|
+
)
|
15
21
|
|
16
22
|
from azure.core.exceptions import HttpResponseError
|
17
23
|
|
18
24
|
from .._shared.request_handlers import validate_and_format_range_headers
|
19
|
-
from .._shared.response_handlers import
|
25
|
+
from .._shared.response_handlers import parse_length_from_content_range, process_storage_error
|
20
26
|
from .._deserialize import deserialize_blob_properties, get_page_ranges_result
|
21
27
|
from .._download import process_range_and_offset, _ChunkDownloader
|
22
28
|
from .._encryption import (
|
@@ -26,17 +32,25 @@ from .._encryption import (
|
|
26
32
|
parse_encryption_data
|
27
33
|
)
|
28
34
|
|
35
|
+
if TYPE_CHECKING:
|
36
|
+
from codecs import IncrementalDecoder
|
37
|
+
from .._encryption import _EncryptionData
|
38
|
+
from .._generated.aio import AzureBlobStorage
|
39
|
+
from .._models import BlobProperties
|
40
|
+
from .._shared.models import StorageConfiguration
|
41
|
+
|
42
|
+
|
29
43
|
T = TypeVar('T', bytes, str)
|
30
44
|
|
31
45
|
|
32
|
-
async def process_content(data, start_offset, end_offset, encryption):
|
46
|
+
async def process_content(data: Any, start_offset: int, end_offset: int, encryption: Dict[str, Any]) -> bytes:
|
33
47
|
if data is None:
|
34
48
|
raise ValueError("Response cannot be None.")
|
35
|
-
content = data.response.body()
|
49
|
+
content = cast(bytes, data.response.body())
|
36
50
|
if encryption.get('key') is not None or encryption.get('resolver') is not None:
|
37
51
|
try:
|
38
52
|
return decrypt_blob(
|
39
|
-
encryption.get('required'),
|
53
|
+
encryption.get('required') or False,
|
40
54
|
encryption.get('key'),
|
41
55
|
encryption.get('resolver'),
|
42
56
|
content,
|
@@ -52,42 +66,45 @@ async def process_content(data, start_offset, end_offset, encryption):
|
|
52
66
|
|
53
67
|
|
54
68
|
class _AsyncChunkDownloader(_ChunkDownloader):
|
55
|
-
def __init__(self, **kwargs):
|
69
|
+
def __init__(self, **kwargs: Any) -> None:
|
56
70
|
super(_AsyncChunkDownloader, self).__init__(**kwargs)
|
57
|
-
self.
|
58
|
-
self.
|
71
|
+
self.stream_lock_async = asyncio.Lock() if kwargs.get('parallel') else None
|
72
|
+
self.progress_lock_async = asyncio.Lock() if kwargs.get('parallel') else None
|
59
73
|
|
60
|
-
async def process_chunk(self, chunk_start):
|
74
|
+
async def process_chunk(self, chunk_start: int) -> None:
|
61
75
|
chunk_start, chunk_end = self._calculate_range(chunk_start)
|
62
|
-
chunk_data = await self._download_chunk(chunk_start, chunk_end - 1)
|
76
|
+
chunk_data, _ = await self._download_chunk(chunk_start, chunk_end - 1)
|
63
77
|
length = chunk_end - chunk_start
|
64
78
|
if length > 0:
|
65
79
|
await self._write_to_stream(chunk_data, chunk_start)
|
66
80
|
await self._update_progress(length)
|
67
81
|
|
68
|
-
async def yield_chunk(self, chunk_start):
|
82
|
+
async def yield_chunk(self, chunk_start: int) -> Tuple[bytes, int]:
|
69
83
|
chunk_start, chunk_end = self._calculate_range(chunk_start)
|
70
84
|
return await self._download_chunk(chunk_start, chunk_end - 1)
|
71
85
|
|
72
|
-
async def _update_progress(self, length):
|
73
|
-
if self.
|
74
|
-
async with self.
|
86
|
+
async def _update_progress(self, length: int) -> None:
|
87
|
+
if self.progress_lock_async:
|
88
|
+
async with self.progress_lock_async:
|
75
89
|
self.progress_total += length
|
76
90
|
else:
|
77
91
|
self.progress_total += length
|
78
92
|
|
79
93
|
if self.progress_hook:
|
80
|
-
await
|
94
|
+
await cast(Callable[[int, Optional[int]], Awaitable[Any]], self.progress_hook)(
|
95
|
+
self.progress_total, self.total_size)
|
81
96
|
|
82
|
-
async def _write_to_stream(self, chunk_data, chunk_start):
|
83
|
-
if self.
|
84
|
-
async with self.
|
97
|
+
async def _write_to_stream(self, chunk_data: bytes, chunk_start: int) -> None:
|
98
|
+
if self.stream_lock_async:
|
99
|
+
async with self.stream_lock_async:
|
85
100
|
self.stream.seek(self.stream_start + (chunk_start - self.start_index))
|
86
101
|
self.stream.write(chunk_data)
|
87
102
|
else:
|
88
103
|
self.stream.write(chunk_data)
|
89
104
|
|
90
|
-
async def _download_chunk(self, chunk_start, chunk_end):
|
105
|
+
async def _download_chunk(self, chunk_start: int, chunk_end: int) -> Tuple[bytes, int]:
|
106
|
+
if self.encryption_options is None:
|
107
|
+
raise ValueError("Required argument is missing: encryption_options")
|
91
108
|
download_range, offset = process_range_and_offset(
|
92
109
|
chunk_start, chunk_end, chunk_end, self.encryption_options, self.encryption_data
|
93
110
|
)
|
@@ -95,8 +112,8 @@ class _AsyncChunkDownloader(_ChunkDownloader):
|
|
95
112
|
# No need to download the empty chunk from server if there's no data in the chunk to be downloaded.
|
96
113
|
# Do optimize and create empty chunk locally if condition is met.
|
97
114
|
if self._do_optimize(download_range[0], download_range[1]):
|
98
|
-
|
99
|
-
chunk_data = b"\x00" *
|
115
|
+
content_length = download_range[1] - download_range[0] + 1
|
116
|
+
chunk_data = b"\x00" * content_length
|
100
117
|
else:
|
101
118
|
range_header, range_validation = validate_and_format_range_headers(
|
102
119
|
download_range[0],
|
@@ -104,51 +121,51 @@ class _AsyncChunkDownloader(_ChunkDownloader):
|
|
104
121
|
check_content_md5=self.validate_content
|
105
122
|
)
|
106
123
|
try:
|
107
|
-
_, response = await self.client.download(
|
124
|
+
_, response = await cast(Awaitable[Any], self.client.download(
|
108
125
|
range=range_header,
|
109
126
|
range_get_content_md5=range_validation,
|
110
127
|
validate_content=self.validate_content,
|
111
128
|
data_stream_total=self.total_size,
|
112
129
|
download_stream_current=self.progress_total,
|
113
130
|
**self.request_options
|
114
|
-
)
|
131
|
+
))
|
115
132
|
|
116
133
|
except HttpResponseError as error:
|
117
134
|
process_storage_error(error)
|
118
135
|
|
119
136
|
chunk_data = await process_content(response, offset[0], offset[1], self.encryption_options)
|
120
|
-
|
137
|
+
content_length = response.content_length
|
121
138
|
|
122
139
|
# This makes sure that if_match is set so that we can validate
|
123
140
|
# that subsequent downloads are to an unmodified blob
|
124
141
|
if self.request_options.get('modified_access_conditions'):
|
125
142
|
self.request_options['modified_access_conditions'].if_match = response.properties.etag
|
126
143
|
|
127
|
-
return chunk_data
|
144
|
+
return chunk_data, content_length
|
128
145
|
|
129
146
|
|
130
147
|
class _AsyncChunkIterator(object):
|
131
148
|
"""Async iterator for chunks in blob download stream."""
|
132
149
|
|
133
|
-
def __init__(self, size, content, downloader, chunk_size):
|
150
|
+
def __init__(self, size: int, content: bytes, downloader: Optional[_AsyncChunkDownloader], chunk_size: int) -> None:
|
134
151
|
self.size = size
|
135
152
|
self._chunk_size = chunk_size
|
136
153
|
self._current_content = content
|
137
154
|
self._iter_downloader = downloader
|
138
|
-
self._iter_chunks = None
|
155
|
+
self._iter_chunks: Optional[Generator[int, None, None]] = None
|
139
156
|
self._complete = size == 0
|
140
157
|
|
141
|
-
def __len__(self):
|
158
|
+
def __len__(self) -> int:
|
142
159
|
return self.size
|
143
160
|
|
144
|
-
def __iter__(self):
|
161
|
+
def __iter__(self) -> None:
|
145
162
|
raise TypeError("Async stream must be iterated asynchronously.")
|
146
163
|
|
147
|
-
def __aiter__(self):
|
164
|
+
def __aiter__(self) -> AsyncIterator[bytes]:
|
148
165
|
return self
|
149
166
|
|
150
167
|
# Iterate through responses.
|
151
|
-
async def __anext__(self):
|
168
|
+
async def __anext__(self) -> bytes:
|
152
169
|
if self._complete:
|
153
170
|
raise StopAsyncIteration("Download complete")
|
154
171
|
if not self._iter_downloader:
|
@@ -167,7 +184,7 @@ class _AsyncChunkIterator(object):
|
|
167
184
|
|
168
185
|
try:
|
169
186
|
chunk = next(self._iter_chunks)
|
170
|
-
self._current_content += await self._iter_downloader.yield_chunk(chunk)
|
187
|
+
self._current_content += (await self._iter_downloader.yield_chunk(chunk))[0]
|
171
188
|
except StopIteration as exc:
|
172
189
|
self._complete = True
|
173
190
|
# it's likely that there some data left in self._current_content
|
@@ -177,46 +194,46 @@ class _AsyncChunkIterator(object):
|
|
177
194
|
|
178
195
|
return self._get_chunk_data()
|
179
196
|
|
180
|
-
def _get_chunk_data(self):
|
197
|
+
def _get_chunk_data(self) -> bytes:
|
181
198
|
chunk_data = self._current_content[: self._chunk_size]
|
182
199
|
self._current_content = self._current_content[self._chunk_size:]
|
183
200
|
return chunk_data
|
184
201
|
|
185
202
|
|
186
203
|
class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-attributes
|
187
|
-
"""
|
188
|
-
|
189
|
-
:ivar str name:
|
190
|
-
The name of the blob being downloaded.
|
191
|
-
:ivar str container:
|
192
|
-
The name of the container where the blob is.
|
193
|
-
:ivar ~azure.storage.blob.BlobProperties properties:
|
194
|
-
The properties of the blob being downloaded. If only a range of the data is being
|
195
|
-
downloaded, this will be reflected in the properties.
|
196
|
-
:ivar int size:
|
197
|
-
The size of the total data in the stream. This will be the byte range if specified,
|
198
|
-
otherwise the total size of the blob.
|
204
|
+
"""
|
205
|
+
A streaming object to download from Azure Storage.
|
199
206
|
"""
|
200
207
|
|
208
|
+
name: str
|
209
|
+
"""The name of the blob being downloaded."""
|
210
|
+
container: str
|
211
|
+
"""The name of the container where the blob is."""
|
212
|
+
properties: "BlobProperties"
|
213
|
+
"""The properties of the blob being downloaded. If only a range of the data is being
|
214
|
+
downloaded, this will be reflected in the properties."""
|
215
|
+
size: int
|
216
|
+
"""The size of the total data in the stream. This will be the byte range if specified,
|
217
|
+
otherwise the total size of the blob."""
|
218
|
+
|
201
219
|
def __init__(
|
202
220
|
self,
|
203
|
-
clients=None,
|
204
|
-
config=None,
|
205
|
-
start_range=None,
|
206
|
-
end_range=None,
|
207
|
-
validate_content=None,
|
208
|
-
encryption_options=None,
|
209
|
-
max_concurrency=1,
|
210
|
-
name=None,
|
211
|
-
container=None,
|
212
|
-
encoding=None,
|
213
|
-
download_cls=None,
|
214
|
-
**kwargs
|
215
|
-
):
|
221
|
+
clients: "AzureBlobStorage" = None, # type: ignore [assignment]
|
222
|
+
config: "StorageConfiguration" = None, # type: ignore [assignment]
|
223
|
+
start_range: Optional[int] = None,
|
224
|
+
end_range: Optional[int] = None,
|
225
|
+
validate_content: bool = None, # type: ignore [assignment]
|
226
|
+
encryption_options: Dict[str, Any] = None, # type: ignore [assignment]
|
227
|
+
max_concurrency: int = 1,
|
228
|
+
name: str = None, # type: ignore [assignment]
|
229
|
+
container: str = None, # type: ignore [assignment]
|
230
|
+
encoding: Optional[str] = None,
|
231
|
+
download_cls: Optional[Callable] = None,
|
232
|
+
**kwargs: Any
|
233
|
+
) -> None:
|
216
234
|
self.name = name
|
217
235
|
self.container = container
|
218
|
-
self.
|
219
|
-
self.size = None
|
236
|
+
self.size = 0
|
220
237
|
|
221
238
|
self._clients = clients
|
222
239
|
self._config = config
|
@@ -228,38 +245,42 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
228
245
|
self._encryption_options = encryption_options or {}
|
229
246
|
self._progress_hook = kwargs.pop('progress_hook', None)
|
230
247
|
self._request_options = kwargs
|
248
|
+
self._response = None
|
231
249
|
self._location_mode = None
|
232
|
-
self.
|
233
|
-
self.
|
234
|
-
self._file_size = None
|
250
|
+
self._current_content: Union[str, bytes] = b''
|
251
|
+
self._file_size = 0
|
235
252
|
self._non_empty_ranges = None
|
236
|
-
self.
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
self.
|
253
|
+
self._encryption_data: Optional["_EncryptionData"] = None
|
254
|
+
|
255
|
+
# The content download offset, after any processing (decryption), in bytes
|
256
|
+
self._download_offset = 0
|
257
|
+
# The raw download offset, before processing (decryption), in bytes
|
258
|
+
self._raw_download_offset = 0
|
259
|
+
# The offset the stream has been read to in bytes or chars depending on mode
|
260
|
+
self._read_offset = 0
|
261
|
+
# The offset into current_content that has been consumed in bytes or chars depending on mode
|
262
|
+
self._current_content_offset = 0
|
263
|
+
|
264
|
+
self._text_mode: Optional[bool] = None
|
265
|
+
self._decoder: Optional["IncrementalDecoder"] = None
|
266
|
+
# Whether the current content is the first chunk of download content or not
|
267
|
+
self._first_chunk = True
|
268
|
+
self._download_start = self._start_range or 0
|
242
269
|
|
243
270
|
# The cls is passed in via download_cls to avoid conflicting arg name with Generic.__new__
|
244
271
|
# but needs to be changed to cls in the request options.
|
245
272
|
self._request_options['cls'] = download_cls
|
246
273
|
|
247
|
-
# The service only provides transactional MD5s for chunks under 4MB.
|
248
|
-
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
|
249
|
-
# chunk so a transactional MD5 can be retrieved.
|
250
|
-
self._first_get_size = self._config.max_single_get_size if not self._validate_content \
|
251
|
-
else self._config.max_chunk_get_size
|
252
|
-
|
253
274
|
def __len__(self):
|
254
275
|
return self.size
|
255
276
|
|
256
|
-
async def _get_encryption_data_request(self):
|
277
|
+
async def _get_encryption_data_request(self) -> None:
|
257
278
|
# Save current request cls
|
258
279
|
download_cls = self._request_options.pop('cls', None)
|
259
280
|
# Adjust cls for get_properties
|
260
281
|
self._request_options['cls'] = deserialize_blob_properties
|
261
282
|
|
262
|
-
properties = await self._clients.blob.get_properties(**self._request_options)
|
283
|
+
properties = cast("BlobProperties", await self._clients.blob.get_properties(**self._request_options))
|
263
284
|
# This will return None if there is no encryption metadata or there are parsing errors.
|
264
285
|
# That is acceptable here, the proper error will be caught and surfaced when attempting
|
265
286
|
# to decrypt the blob.
|
@@ -268,16 +289,23 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
268
289
|
# Restore cls for download
|
269
290
|
self._request_options['cls'] = download_cls
|
270
291
|
|
271
|
-
async def _setup(self):
|
292
|
+
async def _setup(self) -> None:
|
272
293
|
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
|
273
294
|
await self._get_encryption_data_request()
|
274
295
|
|
296
|
+
# The service only provides transactional MD5s for chunks under 4MB.
|
297
|
+
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
|
298
|
+
# chunk so a transactional MD5 can be retrieved.
|
299
|
+
first_get_size = (
|
300
|
+
self._config.max_single_get_size if not self._validate_content else self._config.max_chunk_get_size
|
301
|
+
)
|
275
302
|
initial_request_start = self._start_range if self._start_range is not None else 0
|
276
|
-
if self._end_range is not None and self._end_range -
|
303
|
+
if self._end_range is not None and self._end_range - initial_request_start < first_get_size:
|
277
304
|
initial_request_end = self._end_range
|
278
305
|
else:
|
279
|
-
initial_request_end = initial_request_start +
|
306
|
+
initial_request_end = initial_request_start + first_get_size - 1
|
280
307
|
|
308
|
+
# pylint: disable-next=attribute-defined-outside-init
|
281
309
|
self._initial_range, self._initial_offset = process_range_and_offset(
|
282
310
|
initial_request_start,
|
283
311
|
initial_request_end,
|
@@ -287,44 +315,26 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
287
315
|
)
|
288
316
|
|
289
317
|
self._response = await self._initial_request()
|
290
|
-
|
291
|
-
self.properties = self._response.properties
|
318
|
+
self.properties = cast("BlobProperties", self._response.properties) # type: ignore [attr-defined]
|
292
319
|
self.properties.name = self.name
|
293
320
|
self.properties.container = self.container
|
294
321
|
|
295
|
-
# Set the content length to the download size instead of the size of
|
296
|
-
# the last range
|
297
|
-
initial_size = self._response.properties.size
|
322
|
+
# Set the content length to the download size instead of the size of the last range
|
298
323
|
self.properties.size = self.size
|
299
|
-
|
300
|
-
|
301
|
-
|
324
|
+
self.properties.content_range = (f"bytes {self._download_start}-"
|
325
|
+
f"{self._end_range if self._end_range is not None else self._file_size - 1}/"
|
326
|
+
f"{self._file_size}")
|
302
327
|
|
303
328
|
# Overwrite the content MD5 as it is the MD5 for the last range instead
|
304
329
|
# of the stored MD5
|
305
330
|
# TODO: Set to the stored MD5 when the service returns this
|
306
|
-
self.properties.content_md5 = None
|
331
|
+
self.properties.content_md5 = None # type: ignore [attr-defined]
|
307
332
|
|
308
|
-
|
309
|
-
|
310
|
-
else:
|
311
|
-
self._current_content = await process_content(
|
312
|
-
self._response,
|
313
|
-
self._initial_offset[0],
|
314
|
-
self._initial_offset[1],
|
315
|
-
self._encryption_options
|
316
|
-
)
|
317
|
-
|
318
|
-
# If the file is small, the download is complete at this point.
|
319
|
-
# If file size is large, download the rest of the file in chunks.
|
320
|
-
# For encryption V2, calculate based on size of decrypted content, not download size.
|
333
|
+
@property
|
334
|
+
def _download_complete(self):
|
321
335
|
if is_encryption_v2(self._encryption_data):
|
322
|
-
|
323
|
-
|
324
|
-
self._download_complete = initial_size >= self.size
|
325
|
-
|
326
|
-
if not self._download_complete and self._request_options.get("modified_access_conditions"):
|
327
|
-
self._request_options["modified_access_conditions"].if_match = self._response.properties.etag
|
336
|
+
return self._download_offset >= self.size
|
337
|
+
return self._raw_download_offset >= self.size
|
328
338
|
|
329
339
|
async def _initial_request(self):
|
330
340
|
range_header, range_validation = validate_and_format_range_headers(
|
@@ -335,13 +345,13 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
335
345
|
check_content_md5=self._validate_content)
|
336
346
|
|
337
347
|
try:
|
338
|
-
location_mode, response = await self._clients.blob.download(
|
348
|
+
location_mode, response = cast(Tuple[Optional[str], Any], await self._clients.blob.download(
|
339
349
|
range=range_header,
|
340
350
|
range_get_content_md5=range_validation,
|
341
351
|
validate_content=self._validate_content,
|
342
352
|
data_stream_total=None,
|
343
353
|
download_stream_current=0,
|
344
|
-
**self._request_options)
|
354
|
+
**self._request_options))
|
345
355
|
|
346
356
|
# Check the location we read from to ensure we use the same one
|
347
357
|
# for subsequent requests.
|
@@ -355,9 +365,9 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
355
365
|
# Remove any extra encryption data size from blob size
|
356
366
|
self._file_size = adjust_blob_size_for_encryption(self._file_size, self._encryption_data)
|
357
367
|
|
358
|
-
if self._end_range is not None:
|
368
|
+
if self._end_range is not None and self._start_range is not None:
|
359
369
|
# Use the length unless it is over the end of the file
|
360
|
-
self.size = min(self._file_size, self._end_range - self._start_range + 1)
|
370
|
+
self.size = min(self._file_size - self._start_range, self._end_range - self._start_range + 1)
|
361
371
|
elif self._start_range is not None:
|
362
372
|
self.size = self._file_size - self._start_range
|
363
373
|
else:
|
@@ -369,11 +379,11 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
369
379
|
# request a range, do a regular get request in order to get
|
370
380
|
# any properties.
|
371
381
|
try:
|
372
|
-
_, response = await self._clients.blob.download(
|
382
|
+
_, response = cast(Tuple[Optional[Any], Any], await self._clients.blob.download(
|
373
383
|
validate_content=self._validate_content,
|
374
384
|
data_stream_total=0,
|
375
385
|
download_stream_current=0,
|
376
|
-
**self._request_options)
|
386
|
+
**self._request_options))
|
377
387
|
except HttpResponseError as e:
|
378
388
|
process_storage_error(e)
|
379
389
|
|
@@ -383,6 +393,18 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
383
393
|
else:
|
384
394
|
process_storage_error(error)
|
385
395
|
|
396
|
+
if self.size == 0:
|
397
|
+
self._current_content = b""
|
398
|
+
else:
|
399
|
+
self._current_content = await process_content(
|
400
|
+
response,
|
401
|
+
self._initial_offset[0],
|
402
|
+
self._initial_offset[1],
|
403
|
+
self._encryption_options
|
404
|
+
)
|
405
|
+
self._download_offset += len(self._current_content)
|
406
|
+
self._raw_download_offset += response.content_length
|
407
|
+
|
386
408
|
# get page ranges to optimize downloading sparse page blob
|
387
409
|
if response.properties.blob_type == 'PageBlob':
|
388
410
|
try:
|
@@ -391,22 +413,18 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
391
413
|
except HttpResponseError:
|
392
414
|
pass
|
393
415
|
|
394
|
-
|
416
|
+
if not self._download_complete and self._request_options.get("modified_access_conditions"):
|
417
|
+
self._request_options["modified_access_conditions"].if_match = response.properties.etag
|
395
418
|
|
396
|
-
|
397
|
-
# Start where the initial request download ended
|
398
|
-
start = self._initial_range[1] + 1
|
399
|
-
# For encryption V2 only, adjust start to the end of the fetched data rather than download size
|
400
|
-
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
|
401
|
-
start = (self._start_range or 0) + len(self._current_content)
|
419
|
+
return response
|
402
420
|
|
403
|
-
|
404
|
-
|
405
|
-
|
421
|
+
def chunks(self) -> AsyncIterator[bytes]:
|
422
|
+
"""
|
423
|
+
Iterate over chunks in the download stream. Note, the iterator returned will
|
424
|
+
iterate over the entire download content, regardless of any data that was
|
425
|
+
previously read.
|
406
426
|
|
407
|
-
|
408
|
-
# type: () -> AsyncIterator[bytes]
|
409
|
-
"""Iterate over chunks in the download stream.
|
427
|
+
NOTE: If the stream has been partially read, some data may be re-downloaded by the iterator.
|
410
428
|
|
411
429
|
:returns: An async iterator of the chunks in the download stream.
|
412
430
|
:rtype: AsyncIterator[bytes]
|
@@ -420,79 +438,125 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
420
438
|
:dedent: 16
|
421
439
|
:caption: Download a blob using chunks().
|
422
440
|
"""
|
423
|
-
if self.
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
441
|
+
if self._text_mode:
|
442
|
+
raise ValueError("Stream has been partially read in text mode. chunks is not supported in text mode.")
|
443
|
+
if self._encoding:
|
444
|
+
warnings.warn("Encoding is ignored with chunks as only bytes are supported.")
|
445
|
+
|
446
|
+
iter_downloader = None
|
447
|
+
# If we still have the first chunk buffered, use it. Otherwise, download all content again
|
448
|
+
if not self._first_chunk or not self._download_complete:
|
449
|
+
if self._first_chunk:
|
450
|
+
start = self._download_start + len(self._current_content)
|
451
|
+
current_progress = len(self._current_content)
|
452
|
+
else:
|
453
|
+
start = self._download_start
|
454
|
+
current_progress = 0
|
455
|
+
|
456
|
+
end = self._download_start + self.size
|
431
457
|
|
432
|
-
if self._end_range is not None:
|
433
|
-
# Use the length unless it is over the end of the file
|
434
|
-
data_end = min(self._file_size, self._end_range + 1)
|
435
458
|
iter_downloader = _AsyncChunkDownloader(
|
436
459
|
client=self._clients.blob,
|
437
460
|
non_empty_ranges=self._non_empty_ranges,
|
438
461
|
total_size=self.size,
|
439
462
|
chunk_size=self._config.max_chunk_get_size,
|
440
|
-
current_progress=
|
441
|
-
start_range=
|
442
|
-
end_range=
|
443
|
-
stream=None,
|
444
|
-
parallel=False,
|
463
|
+
current_progress=current_progress,
|
464
|
+
start_range=start,
|
465
|
+
end_range=end,
|
445
466
|
validate_content=self._validate_content,
|
446
467
|
encryption_options=self._encryption_options,
|
447
468
|
encryption_data=self._encryption_data,
|
448
469
|
use_location=self._location_mode,
|
449
|
-
**self._request_options
|
470
|
+
**self._request_options
|
471
|
+
)
|
472
|
+
|
473
|
+
initial_content = self._current_content if self._first_chunk else b''
|
450
474
|
return _AsyncChunkIterator(
|
451
475
|
size=self.size,
|
452
|
-
content=
|
476
|
+
content=cast(bytes, initial_content),
|
453
477
|
downloader=iter_downloader,
|
454
478
|
chunk_size=self._config.max_chunk_get_size)
|
455
479
|
|
456
|
-
|
480
|
+
@overload
|
481
|
+
async def read(self, size: int = -1) -> T:
|
482
|
+
...
|
483
|
+
|
484
|
+
@overload
|
485
|
+
async def read(self, *, chars: Optional[int] = None) -> T:
|
486
|
+
...
|
487
|
+
|
488
|
+
# pylint: disable-next=too-many-statements,too-many-branches
|
489
|
+
async def read(self, size: int = -1, *, chars: Optional[int] = None) -> T:
|
457
490
|
"""
|
458
|
-
Read
|
459
|
-
|
491
|
+
Read the specified bytes or chars from the stream. If `encoding`
|
492
|
+
was specified on `download_blob`, it is recommended to use the
|
493
|
+
chars parameter to read a specific number of chars to avoid decoding
|
494
|
+
errors. If size/chars is unspecified or negative all bytes will be read.
|
460
495
|
|
461
|
-
:param
|
496
|
+
:param int size:
|
462
497
|
The number of bytes to download from the stream. Leave unspecified
|
463
|
-
or set
|
498
|
+
or set negative to download all bytes.
|
499
|
+
:keyword Optional[int] chars:
|
500
|
+
The number of chars to download from the stream. Leave unspecified
|
501
|
+
or set negative to download all chars. Note, this can only be used
|
502
|
+
when encoding is specified on `download_blob`.
|
464
503
|
:returns:
|
465
504
|
The requested data as bytes or a string if encoding was specified. If
|
466
505
|
the return value is empty, there is no more data to read.
|
467
506
|
:rtype: T
|
468
507
|
"""
|
469
|
-
if size
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
read = stream.write(self._current_content[start:start + length])
|
483
|
-
|
484
|
-
remaining_size -= read
|
485
|
-
self._offset += read
|
486
|
-
if self._progress_hook:
|
487
|
-
await self._progress_hook(self._offset, self.size)
|
488
|
-
|
489
|
-
if remaining_size > 0:
|
490
|
-
start_range = self._get_downloader_start_with_offset()
|
508
|
+
if size > -1 and self._encoding:
|
509
|
+
warnings.warn(
|
510
|
+
"Size parameter specified with text encoding enabled. It is recommended to use chars "
|
511
|
+
"to read a specific number of characters instead."
|
512
|
+
)
|
513
|
+
if size > -1 and chars is not None:
|
514
|
+
raise ValueError("Cannot specify both size and chars.")
|
515
|
+
if not self._encoding and chars is not None:
|
516
|
+
raise ValueError("Must specify encoding to read chars.")
|
517
|
+
if self._text_mode and size > -1:
|
518
|
+
raise ValueError("Stream has been partially read in text mode. Please use chars.")
|
519
|
+
if self._text_mode is False and chars is not None:
|
520
|
+
raise ValueError("Stream has been partially read in bytes mode. Please use size.")
|
491
521
|
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
522
|
+
# Empty blob or already read to the end
|
523
|
+
if (size == 0 or chars == 0 or
|
524
|
+
(self._download_complete and self._current_content_offset >= len(self._current_content))):
|
525
|
+
return b'' if not self._encoding else '' # type: ignore [return-value]
|
526
|
+
|
527
|
+
if not self._text_mode and chars is not None and self._encoding is not None:
|
528
|
+
self._text_mode = True
|
529
|
+
self._decoder = codecs.getincrementaldecoder(self._encoding)('strict')
|
530
|
+
self._current_content = self._decoder.decode(
|
531
|
+
cast(bytes, self._current_content), final=self._download_complete)
|
532
|
+
elif self._text_mode is None:
|
533
|
+
self._text_mode = False
|
534
|
+
|
535
|
+
output_stream: Union[BytesIO, StringIO]
|
536
|
+
if self._text_mode:
|
537
|
+
output_stream = StringIO()
|
538
|
+
size = chars if chars else sys.maxsize
|
539
|
+
else:
|
540
|
+
output_stream = BytesIO()
|
541
|
+
size = size if size > 0 else sys.maxsize
|
542
|
+
readall = size == sys.maxsize
|
543
|
+
count = 0
|
544
|
+
|
545
|
+
# Start by reading from current_content
|
546
|
+
start = self._current_content_offset
|
547
|
+
length = min(len(self._current_content) - self._current_content_offset, size - count)
|
548
|
+
read = output_stream.write(self._current_content[start:start + length]) # type: ignore [arg-type]
|
549
|
+
|
550
|
+
count += read
|
551
|
+
self._current_content_offset += read
|
552
|
+
self._read_offset += read
|
553
|
+
await self._check_and_report_progress()
|
554
|
+
|
555
|
+
remaining = size - count
|
556
|
+
if remaining > 0 and not self._download_complete:
|
557
|
+
# Create a downloader than can download the rest of the file
|
558
|
+
start = self._download_start + self._download_offset
|
559
|
+
end = self._download_start + self.size
|
496
560
|
|
497
561
|
parallel = self._max_concurrency > 1
|
498
562
|
downloader = _AsyncChunkDownloader(
|
@@ -500,10 +564,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
500
564
|
non_empty_ranges=self._non_empty_ranges,
|
501
565
|
total_size=self.size,
|
502
566
|
chunk_size=self._config.max_chunk_get_size,
|
503
|
-
current_progress=self.
|
504
|
-
start_range=
|
505
|
-
end_range=
|
506
|
-
stream=
|
567
|
+
current_progress=self._read_offset,
|
568
|
+
start_range=start,
|
569
|
+
end_range=end,
|
570
|
+
stream=output_stream,
|
507
571
|
parallel=parallel,
|
508
572
|
validate_content=self._validate_content,
|
509
573
|
encryption_options=self._encryption_options,
|
@@ -512,43 +576,77 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
512
576
|
progress_hook=self._progress_hook,
|
513
577
|
**self._request_options
|
514
578
|
)
|
579
|
+
self._first_chunk = False
|
580
|
+
|
581
|
+
# When reading all data, have the downloader read everything into the stream.
|
582
|
+
# Else, read one chunk at a time (using the downloader as an iterator) until
|
583
|
+
# the requested size is reached.
|
584
|
+
chunks_iter = downloader.get_chunk_offsets()
|
585
|
+
if readall and not self._text_mode:
|
586
|
+
running_futures: Any = [
|
587
|
+
asyncio.ensure_future(downloader.process_chunk(d))
|
588
|
+
for d in islice(chunks_iter, 0, self._max_concurrency)
|
589
|
+
]
|
590
|
+
while running_futures:
|
591
|
+
# Wait for some download to finish before adding a new one
|
592
|
+
done, running_futures = await asyncio.wait(
|
593
|
+
running_futures, return_when=asyncio.FIRST_COMPLETED)
|
594
|
+
try:
|
595
|
+
for task in done:
|
596
|
+
task.result()
|
597
|
+
except HttpResponseError as error:
|
598
|
+
process_storage_error(error)
|
599
|
+
try:
|
600
|
+
for _ in range(0, len(done)):
|
601
|
+
next_chunk = next(chunks_iter)
|
602
|
+
running_futures.add(asyncio.ensure_future(downloader.process_chunk(next_chunk)))
|
603
|
+
except StopIteration:
|
604
|
+
break
|
605
|
+
|
606
|
+
if running_futures:
|
607
|
+
# Wait for the remaining downloads to finish
|
608
|
+
done, _running_futures = await asyncio.wait(running_futures)
|
609
|
+
try:
|
610
|
+
for task in done:
|
611
|
+
task.result()
|
612
|
+
except HttpResponseError as error:
|
613
|
+
process_storage_error(error)
|
614
|
+
|
615
|
+
self._complete_read()
|
515
616
|
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
617
|
+
else:
|
618
|
+
while (chunk := next(chunks_iter, None)) is not None and remaining > 0:
|
619
|
+
chunk_data, content_length = await downloader.yield_chunk(chunk)
|
620
|
+
self._download_offset += len(chunk_data)
|
621
|
+
self._raw_download_offset += content_length
|
622
|
+
if self._text_mode and self._decoder is not None:
|
623
|
+
self._current_content = self._decoder.decode(chunk_data, final=self._download_complete)
|
624
|
+
else:
|
625
|
+
self._current_content = chunk_data
|
626
|
+
|
627
|
+
if remaining < len(self._current_content):
|
628
|
+
read = output_stream.write(self._current_content[:remaining]) # type: ignore [arg-type]
|
629
|
+
else:
|
630
|
+
read = output_stream.write(self._current_content) # type: ignore [arg-type]
|
631
|
+
|
632
|
+
self._current_content_offset = read
|
633
|
+
self._read_offset += read
|
634
|
+
remaining -= read
|
635
|
+
await self._check_and_report_progress()
|
636
|
+
|
637
|
+
data = output_stream.getvalue()
|
638
|
+
if not self._text_mode and self._encoding:
|
639
|
+
try:
|
640
|
+
# This is technically incorrect to do, but we have it for backwards compatibility.
|
641
|
+
data = cast(bytes, data).decode(self._encoding)
|
642
|
+
except UnicodeDecodeError:
|
643
|
+
warnings.warn(
|
644
|
+
"Encountered a decoding error while decoding blob data from a partial read. "
|
645
|
+
"Try using the `chars` keyword instead to read in text mode."
|
646
|
+
)
|
647
|
+
raise
|
545
648
|
|
546
|
-
|
547
|
-
|
548
|
-
data = stream.getvalue()
|
549
|
-
if self._encoding:
|
550
|
-
return data.decode(self._encoding)
|
551
|
-
return data
|
649
|
+
return data # type: ignore [return-value]
|
552
650
|
|
553
651
|
async def readall(self) -> T:
|
554
652
|
"""
|
@@ -558,53 +656,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
558
656
|
:returns: The requested data as bytes or a string if encoding was specified.
|
559
657
|
:rtype: T
|
560
658
|
"""
|
561
|
-
|
562
|
-
await self.readinto(stream)
|
563
|
-
data = stream.getvalue()
|
564
|
-
if self._encoding:
|
565
|
-
return data.decode(self._encoding)
|
566
|
-
return data
|
567
|
-
|
568
|
-
async def content_as_bytes(self, max_concurrency=1):
|
569
|
-
"""DEPRECATED: Download the contents of this file.
|
570
|
-
|
571
|
-
This operation is blocking until all data is downloaded.
|
572
|
-
|
573
|
-
This method is deprecated, use func:`readall` instead.
|
574
|
-
|
575
|
-
:param int max_concurrency:
|
576
|
-
The number of parallel connections with which to download.
|
577
|
-
:returns: The contents of the file as bytes.
|
578
|
-
:rtype: bytes
|
579
|
-
"""
|
580
|
-
warnings.warn(
|
581
|
-
"content_as_bytes is deprecated, use readall instead",
|
582
|
-
DeprecationWarning
|
583
|
-
)
|
584
|
-
self._max_concurrency = max_concurrency
|
585
|
-
return await self.readall()
|
586
|
-
|
587
|
-
async def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
|
588
|
-
"""DEPRECATED: Download the contents of this blob, and decode as text.
|
589
|
-
|
590
|
-
This operation is blocking until all data is downloaded.
|
591
|
-
|
592
|
-
This method is deprecated, use func:`readall` instead.
|
593
|
-
|
594
|
-
:param int max_concurrency:
|
595
|
-
The number of parallel connections with which to download.
|
596
|
-
:param str encoding:
|
597
|
-
Test encoding to decode the downloaded bytes. Default is UTF-8.
|
598
|
-
:returns: The content of the file as a str.
|
599
|
-
:rtype: str
|
600
|
-
"""
|
601
|
-
warnings.warn(
|
602
|
-
"content_as_text is deprecated, use readall instead",
|
603
|
-
DeprecationWarning
|
604
|
-
)
|
605
|
-
self._max_concurrency = max_concurrency
|
606
|
-
self._encoding = encoding
|
607
|
-
return await self.readall()
|
659
|
+
return await self.read()
|
608
660
|
|
609
661
|
async def readinto(self, stream: IO[bytes]) -> int:
|
610
662
|
"""Download the contents of this blob to a stream.
|
@@ -616,6 +668,11 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
616
668
|
:returns: The number of bytes read.
|
617
669
|
:rtype: int
|
618
670
|
"""
|
671
|
+
if self._text_mode:
|
672
|
+
raise ValueError("Stream has been partially read in text mode. readinto is not supported in text mode.")
|
673
|
+
if self._encoding:
|
674
|
+
warnings.warn("Encoding is ignored with readinto as only byte streams are supported.")
|
675
|
+
|
619
676
|
# the stream must be seekable if parallel download is required
|
620
677
|
parallel = self._max_concurrency > 1
|
621
678
|
if parallel:
|
@@ -629,35 +686,34 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
629
686
|
raise ValueError(error_message) from exc
|
630
687
|
|
631
688
|
# If some data has been streamed using `read`, only stream the remaining data
|
632
|
-
remaining_size = self.size - self.
|
689
|
+
remaining_size = self.size - self._read_offset
|
633
690
|
# Already read to the end
|
634
691
|
if remaining_size <= 0:
|
635
692
|
return 0
|
636
693
|
|
637
|
-
# Write the content to the user stream
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
self._offset += len(content)
|
642
|
-
if self._progress_hook:
|
643
|
-
await self._progress_hook(len(content), self.size)
|
694
|
+
# Write the current content to the user stream
|
695
|
+
current_remaining = len(self._current_content) - self._current_content_offset
|
696
|
+
start = self._current_content_offset
|
697
|
+
count = stream.write(cast(bytes, self._current_content[start:start + current_remaining]))
|
644
698
|
|
699
|
+
self._current_content_offset += count
|
700
|
+
self._read_offset += count
|
701
|
+
if self._progress_hook:
|
702
|
+
await self._progress_hook(self._read_offset, self.size)
|
703
|
+
|
704
|
+
# If all the data was already downloaded/buffered
|
645
705
|
if self._download_complete:
|
646
706
|
return remaining_size
|
647
707
|
|
648
|
-
|
649
|
-
|
650
|
-
# Use the length unless it is over the end of the file
|
651
|
-
data_end = min(self._file_size, self._end_range + 1)
|
652
|
-
|
653
|
-
data_start = self._get_downloader_start_with_offset()
|
708
|
+
data_start = self._download_start + self._read_offset
|
709
|
+
data_end = self._download_start + self.size
|
654
710
|
|
655
711
|
downloader = _AsyncChunkDownloader(
|
656
712
|
client=self._clients.blob,
|
657
713
|
non_empty_ranges=self._non_empty_ranges,
|
658
714
|
total_size=self.size,
|
659
715
|
chunk_size=self._config.max_chunk_get_size,
|
660
|
-
current_progress=self.
|
716
|
+
current_progress=self._read_offset,
|
661
717
|
start_range=data_start,
|
662
718
|
end_range=data_end,
|
663
719
|
stream=stream,
|
@@ -667,13 +723,14 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
667
723
|
encryption_data=self._encryption_data,
|
668
724
|
use_location=self._location_mode,
|
669
725
|
progress_hook=self._progress_hook,
|
670
|
-
**self._request_options
|
726
|
+
**self._request_options
|
727
|
+
)
|
671
728
|
|
672
729
|
dl_tasks = downloader.get_chunk_offsets()
|
673
|
-
running_futures =
|
730
|
+
running_futures = {
|
674
731
|
asyncio.ensure_future(downloader.process_chunk(d))
|
675
732
|
for d in islice(dl_tasks, 0, self._max_concurrency)
|
676
|
-
|
733
|
+
}
|
677
734
|
while running_futures:
|
678
735
|
# Wait for some download to finish before adding a new one
|
679
736
|
done, running_futures = await asyncio.wait(
|
@@ -699,8 +756,72 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
699
756
|
except HttpResponseError as error:
|
700
757
|
process_storage_error(error)
|
701
758
|
|
759
|
+
self._complete_read()
|
702
760
|
return remaining_size
|
703
761
|
|
762
|
+
def _complete_read(self):
|
763
|
+
"""Adjusts all offsets to the end of the download."""
|
764
|
+
self._download_offset = self.size
|
765
|
+
self._raw_download_offset = self.size
|
766
|
+
self._read_offset = self.size
|
767
|
+
self._current_content_offset = len(self._current_content)
|
768
|
+
|
769
|
+
async def _check_and_report_progress(self):
|
770
|
+
"""Reports progress if necessary."""
|
771
|
+
# Only report progress at the end of each chunk and use download_offset to always report
|
772
|
+
# progress in terms of (approximate) byte count.
|
773
|
+
if self._progress_hook and self._current_content_offset == len(self._current_content):
|
774
|
+
await self._progress_hook(self._download_offset, self.size)
|
775
|
+
|
776
|
+
async def content_as_bytes(self, max_concurrency=1):
|
777
|
+
"""DEPRECATED: Download the contents of this file.
|
778
|
+
|
779
|
+
This operation is blocking until all data is downloaded.
|
780
|
+
|
781
|
+
This method is deprecated, use func:`readall` instead.
|
782
|
+
|
783
|
+
:param int max_concurrency:
|
784
|
+
The number of parallel connections with which to download.
|
785
|
+
:returns: The contents of the file as bytes.
|
786
|
+
:rtype: bytes
|
787
|
+
"""
|
788
|
+
warnings.warn(
|
789
|
+
"content_as_bytes is deprecated, use readall instead",
|
790
|
+
DeprecationWarning
|
791
|
+
)
|
792
|
+
if self._text_mode:
|
793
|
+
raise ValueError("Stream has been partially read in text mode. "
|
794
|
+
"content_as_bytes is not supported in text mode.")
|
795
|
+
|
796
|
+
self._max_concurrency = max_concurrency
|
797
|
+
return await self.readall()
|
798
|
+
|
799
|
+
async def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
|
800
|
+
"""DEPRECATED: Download the contents of this blob, and decode as text.
|
801
|
+
|
802
|
+
This operation is blocking until all data is downloaded.
|
803
|
+
|
804
|
+
This method is deprecated, use func:`readall` instead.
|
805
|
+
|
806
|
+
:param int max_concurrency:
|
807
|
+
The number of parallel connections with which to download.
|
808
|
+
:param str encoding:
|
809
|
+
Test encoding to decode the downloaded bytes. Default is UTF-8.
|
810
|
+
:returns: The content of the file as a str.
|
811
|
+
:rtype: str
|
812
|
+
"""
|
813
|
+
warnings.warn(
|
814
|
+
"content_as_text is deprecated, use readall instead",
|
815
|
+
DeprecationWarning
|
816
|
+
)
|
817
|
+
if self._text_mode:
|
818
|
+
raise ValueError("Stream has been partially read in text mode. "
|
819
|
+
"content_as_text is not supported in text mode.")
|
820
|
+
|
821
|
+
self._max_concurrency = max_concurrency
|
822
|
+
self._encoding = encoding
|
823
|
+
return await self.readall()
|
824
|
+
|
704
825
|
async def download_to_stream(self, stream, max_concurrency=1):
|
705
826
|
"""DEPRECATED: Download the contents of this blob to a stream.
|
706
827
|
|
@@ -719,6 +840,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
719
840
|
"download_to_stream is deprecated, use readinto instead",
|
720
841
|
DeprecationWarning
|
721
842
|
)
|
843
|
+
if self._text_mode:
|
844
|
+
raise ValueError("Stream has been partially read in text mode. "
|
845
|
+
"download_to_stream is not supported in text mode.")
|
846
|
+
|
722
847
|
self._max_concurrency = max_concurrency
|
723
848
|
await self.readinto(stream)
|
724
849
|
return self.properties
|