azure-storage-blob 12.21.0b1__py3-none-any.whl → 12.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/storage/blob/__init__.py +19 -18
- azure/storage/blob/_blob_client.py +470 -1555
- azure/storage/blob/_blob_client_helpers.py +1242 -0
- azure/storage/blob/_blob_service_client.py +93 -112
- azure/storage/blob/_blob_service_client_helpers.py +27 -0
- azure/storage/blob/_container_client.py +176 -377
- azure/storage/blob/_container_client_helpers.py +266 -0
- azure/storage/blob/_deserialize.py +68 -44
- azure/storage/blob/_download.py +375 -241
- azure/storage/blob/_encryption.py +14 -7
- azure/storage/blob/_generated/_azure_blob_storage.py +2 -1
- azure/storage/blob/_generated/_serialization.py +2 -0
- azure/storage/blob/_generated/aio/_azure_blob_storage.py +2 -1
- azure/storage/blob/_generated/aio/operations/_append_blob_operations.py +1 -7
- azure/storage/blob/_generated/aio/operations/_blob_operations.py +21 -47
- azure/storage/blob/_generated/aio/operations/_block_blob_operations.py +2 -10
- azure/storage/blob/_generated/aio/operations/_container_operations.py +13 -26
- azure/storage/blob/_generated/aio/operations/_page_blob_operations.py +3 -14
- azure/storage/blob/_generated/aio/operations/_service_operations.py +14 -17
- azure/storage/blob/_generated/operations/_append_blob_operations.py +1 -7
- azure/storage/blob/_generated/operations/_blob_operations.py +21 -47
- azure/storage/blob/_generated/operations/_block_blob_operations.py +2 -10
- azure/storage/blob/_generated/operations/_container_operations.py +13 -26
- azure/storage/blob/_generated/operations/_page_blob_operations.py +3 -14
- azure/storage/blob/_generated/operations/_service_operations.py +14 -17
- azure/storage/blob/_generated/py.typed +1 -0
- azure/storage/blob/_lease.py +52 -63
- azure/storage/blob/_list_blobs_helper.py +129 -135
- azure/storage/blob/_models.py +480 -277
- azure/storage/blob/_quick_query_helper.py +30 -31
- azure/storage/blob/_serialize.py +39 -56
- azure/storage/blob/_shared/avro/datafile.py +1 -1
- azure/storage/blob/_shared/avro/datafile_async.py +1 -1
- azure/storage/blob/_shared/base_client.py +3 -1
- azure/storage/blob/_shared/base_client_async.py +1 -1
- azure/storage/blob/_shared/policies.py +16 -15
- azure/storage/blob/_shared/policies_async.py +21 -6
- azure/storage/blob/_shared/response_handlers.py +6 -2
- azure/storage/blob/_shared/shared_access_signature.py +21 -3
- azure/storage/blob/_shared/uploads.py +1 -1
- azure/storage/blob/_shared/uploads_async.py +1 -1
- azure/storage/blob/_shared_access_signature.py +110 -52
- azure/storage/blob/_upload_helpers.py +75 -68
- azure/storage/blob/_version.py +1 -1
- azure/storage/blob/aio/__init__.py +19 -11
- azure/storage/blob/aio/_blob_client_async.py +554 -301
- azure/storage/blob/aio/_blob_service_client_async.py +148 -97
- azure/storage/blob/aio/_container_client_async.py +289 -140
- azure/storage/blob/aio/_download_async.py +485 -337
- azure/storage/blob/aio/_lease_async.py +61 -60
- azure/storage/blob/aio/_list_blobs_helper.py +94 -96
- azure/storage/blob/aio/_models.py +60 -38
- azure/storage/blob/aio/_upload_helpers.py +75 -66
- {azure_storage_blob-12.21.0b1.dist-info → azure_storage_blob-12.23.0.dist-info}/METADATA +7 -7
- azure_storage_blob-12.23.0.dist-info/RECORD +84 -0
- {azure_storage_blob-12.21.0b1.dist-info → azure_storage_blob-12.23.0.dist-info}/WHEEL +1 -1
- azure/storage/blob/_generated/_vendor.py +0 -16
- azure_storage_blob-12.21.0b1.dist-info/RECORD +0 -81
- {azure_storage_blob-12.21.0b1.dist-info → azure_storage_blob-12.23.0.dist-info}/LICENSE +0 -0
- {azure_storage_blob-12.21.0b1.dist-info → azure_storage_blob-12.23.0.dist-info}/top_level.txt +0 -0
@@ -4,19 +4,25 @@
|
|
4
4
|
# license information.
|
5
5
|
# --------------------------------------------------------------------------
|
6
6
|
# pylint: disable=invalid-overridden-method
|
7
|
+
# mypy: disable-error-code=override
|
7
8
|
|
9
|
+
import asyncio
|
10
|
+
import codecs
|
8
11
|
import sys
|
9
12
|
import warnings
|
10
|
-
from io import BytesIO
|
13
|
+
from io import BytesIO, StringIO
|
11
14
|
from itertools import islice
|
12
|
-
from typing import
|
13
|
-
|
14
|
-
|
15
|
+
from typing import (
|
16
|
+
Any, AsyncIterator, Awaitable,
|
17
|
+
Generator, Callable, cast, Dict,
|
18
|
+
Generic, IO, Optional, overload,
|
19
|
+
Tuple, TypeVar, Union, TYPE_CHECKING
|
20
|
+
)
|
15
21
|
|
16
|
-
from azure.core.exceptions import HttpResponseError
|
22
|
+
from azure.core.exceptions import DecodeError, HttpResponseError, IncompleteReadError
|
17
23
|
|
18
24
|
from .._shared.request_handlers import validate_and_format_range_headers
|
19
|
-
from .._shared.response_handlers import
|
25
|
+
from .._shared.response_handlers import parse_length_from_content_range, process_storage_error
|
20
26
|
from .._deserialize import deserialize_blob_properties, get_page_ranges_result
|
21
27
|
from .._download import process_range_and_offset, _ChunkDownloader
|
22
28
|
from .._encryption import (
|
@@ -26,17 +32,26 @@ from .._encryption import (
|
|
26
32
|
parse_encryption_data
|
27
33
|
)
|
28
34
|
|
35
|
+
if TYPE_CHECKING:
|
36
|
+
from codecs import IncrementalDecoder
|
37
|
+
from .._encryption import _EncryptionData
|
38
|
+
from .._generated.aio import AzureBlobStorage
|
39
|
+
from .._models import BlobProperties
|
40
|
+
from .._shared.models import StorageConfiguration
|
41
|
+
|
42
|
+
|
29
43
|
T = TypeVar('T', bytes, str)
|
30
44
|
|
31
45
|
|
32
|
-
async def process_content(data, start_offset, end_offset, encryption):
|
46
|
+
async def process_content(data: Any, start_offset: int, end_offset: int, encryption: Dict[str, Any]) -> bytes:
|
33
47
|
if data is None:
|
34
48
|
raise ValueError("Response cannot be None.")
|
35
|
-
|
49
|
+
await data.response.read()
|
50
|
+
content = cast(bytes, data.response.content)
|
36
51
|
if encryption.get('key') is not None or encryption.get('resolver') is not None:
|
37
52
|
try:
|
38
53
|
return decrypt_blob(
|
39
|
-
encryption.get('required'),
|
54
|
+
encryption.get('required') or False,
|
40
55
|
encryption.get('key'),
|
41
56
|
encryption.get('resolver'),
|
42
57
|
content,
|
@@ -52,42 +67,45 @@ async def process_content(data, start_offset, end_offset, encryption):
|
|
52
67
|
|
53
68
|
|
54
69
|
class _AsyncChunkDownloader(_ChunkDownloader):
|
55
|
-
def __init__(self, **kwargs):
|
70
|
+
def __init__(self, **kwargs: Any) -> None:
|
56
71
|
super(_AsyncChunkDownloader, self).__init__(**kwargs)
|
57
|
-
self.
|
58
|
-
self.
|
72
|
+
self.stream_lock_async = asyncio.Lock() if kwargs.get('parallel') else None
|
73
|
+
self.progress_lock_async = asyncio.Lock() if kwargs.get('parallel') else None
|
59
74
|
|
60
|
-
async def process_chunk(self, chunk_start):
|
75
|
+
async def process_chunk(self, chunk_start: int) -> None:
|
61
76
|
chunk_start, chunk_end = self._calculate_range(chunk_start)
|
62
|
-
chunk_data = await self._download_chunk(chunk_start, chunk_end - 1)
|
77
|
+
chunk_data, _ = await self._download_chunk(chunk_start, chunk_end - 1)
|
63
78
|
length = chunk_end - chunk_start
|
64
79
|
if length > 0:
|
65
80
|
await self._write_to_stream(chunk_data, chunk_start)
|
66
81
|
await self._update_progress(length)
|
67
82
|
|
68
|
-
async def yield_chunk(self, chunk_start):
|
83
|
+
async def yield_chunk(self, chunk_start: int) -> Tuple[bytes, int]:
|
69
84
|
chunk_start, chunk_end = self._calculate_range(chunk_start)
|
70
85
|
return await self._download_chunk(chunk_start, chunk_end - 1)
|
71
86
|
|
72
|
-
async def _update_progress(self, length):
|
73
|
-
if self.
|
74
|
-
async with self.
|
87
|
+
async def _update_progress(self, length: int) -> None:
|
88
|
+
if self.progress_lock_async:
|
89
|
+
async with self.progress_lock_async:
|
75
90
|
self.progress_total += length
|
76
91
|
else:
|
77
92
|
self.progress_total += length
|
78
93
|
|
79
94
|
if self.progress_hook:
|
80
|
-
await
|
95
|
+
await cast(Callable[[int, Optional[int]], Awaitable[Any]], self.progress_hook)(
|
96
|
+
self.progress_total, self.total_size)
|
81
97
|
|
82
|
-
async def _write_to_stream(self, chunk_data, chunk_start):
|
83
|
-
if self.
|
84
|
-
async with self.
|
98
|
+
async def _write_to_stream(self, chunk_data: bytes, chunk_start: int) -> None:
|
99
|
+
if self.stream_lock_async:
|
100
|
+
async with self.stream_lock_async:
|
85
101
|
self.stream.seek(self.stream_start + (chunk_start - self.start_index))
|
86
102
|
self.stream.write(chunk_data)
|
87
103
|
else:
|
88
104
|
self.stream.write(chunk_data)
|
89
105
|
|
90
|
-
async def _download_chunk(self, chunk_start, chunk_end):
|
106
|
+
async def _download_chunk(self, chunk_start: int, chunk_end: int) -> Tuple[bytes, int]:
|
107
|
+
if self.encryption_options is None:
|
108
|
+
raise ValueError("Required argument is missing: encryption_options")
|
91
109
|
download_range, offset = process_range_and_offset(
|
92
110
|
chunk_start, chunk_end, chunk_end, self.encryption_options, self.encryption_data
|
93
111
|
)
|
@@ -95,60 +113,70 @@ class _AsyncChunkDownloader(_ChunkDownloader):
|
|
95
113
|
# No need to download the empty chunk from server if there's no data in the chunk to be downloaded.
|
96
114
|
# Do optimize and create empty chunk locally if condition is met.
|
97
115
|
if self._do_optimize(download_range[0], download_range[1]):
|
98
|
-
|
99
|
-
chunk_data = b"\x00" *
|
116
|
+
content_length = download_range[1] - download_range[0] + 1
|
117
|
+
chunk_data = b"\x00" * content_length
|
100
118
|
else:
|
101
119
|
range_header, range_validation = validate_and_format_range_headers(
|
102
120
|
download_range[0],
|
103
121
|
download_range[1],
|
104
122
|
check_content_md5=self.validate_content
|
105
123
|
)
|
106
|
-
try:
|
107
|
-
_, response = await self.client.download(
|
108
|
-
range=range_header,
|
109
|
-
range_get_content_md5=range_validation,
|
110
|
-
validate_content=self.validate_content,
|
111
|
-
data_stream_total=self.total_size,
|
112
|
-
download_stream_current=self.progress_total,
|
113
|
-
**self.request_options
|
114
|
-
)
|
115
124
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
125
|
+
retry_active = True
|
126
|
+
retry_total = 3
|
127
|
+
while retry_active:
|
128
|
+
try:
|
129
|
+
_, response = await cast(Awaitable[Any], self.client.download(
|
130
|
+
range=range_header,
|
131
|
+
range_get_content_md5=range_validation,
|
132
|
+
validate_content=self.validate_content,
|
133
|
+
data_stream_total=self.total_size,
|
134
|
+
download_stream_current=self.progress_total,
|
135
|
+
**self.request_options
|
136
|
+
))
|
137
|
+
except HttpResponseError as error:
|
138
|
+
process_storage_error(error)
|
120
139
|
|
140
|
+
try:
|
141
|
+
chunk_data = await process_content(response, offset[0], offset[1], self.encryption_options)
|
142
|
+
retry_active = False
|
143
|
+
except (IncompleteReadError, HttpResponseError, DecodeError) as error:
|
144
|
+
retry_total -= 1
|
145
|
+
if retry_total <= 0:
|
146
|
+
raise HttpResponseError(error, error=error) from error
|
147
|
+
await asyncio.sleep(1)
|
148
|
+
content_length = response.content_length
|
121
149
|
|
122
150
|
# This makes sure that if_match is set so that we can validate
|
123
151
|
# that subsequent downloads are to an unmodified blob
|
124
152
|
if self.request_options.get('modified_access_conditions'):
|
125
153
|
self.request_options['modified_access_conditions'].if_match = response.properties.etag
|
126
154
|
|
127
|
-
return chunk_data
|
155
|
+
return chunk_data, content_length
|
128
156
|
|
129
157
|
|
130
158
|
class _AsyncChunkIterator(object):
|
131
159
|
"""Async iterator for chunks in blob download stream."""
|
132
160
|
|
133
|
-
def __init__(self, size, content, downloader, chunk_size):
|
161
|
+
def __init__(self, size: int, content: bytes, downloader: Optional[_AsyncChunkDownloader], chunk_size: int) -> None:
|
134
162
|
self.size = size
|
135
163
|
self._chunk_size = chunk_size
|
136
164
|
self._current_content = content
|
137
165
|
self._iter_downloader = downloader
|
138
|
-
self._iter_chunks = None
|
166
|
+
self._iter_chunks: Optional[Generator[int, None, None]] = None
|
139
167
|
self._complete = size == 0
|
140
168
|
|
141
|
-
def __len__(self):
|
169
|
+
def __len__(self) -> int:
|
142
170
|
return self.size
|
143
171
|
|
144
|
-
def __iter__(self):
|
172
|
+
def __iter__(self) -> None:
|
145
173
|
raise TypeError("Async stream must be iterated asynchronously.")
|
146
174
|
|
147
|
-
def __aiter__(self):
|
175
|
+
def __aiter__(self) -> AsyncIterator[bytes]:
|
148
176
|
return self
|
149
177
|
|
150
178
|
# Iterate through responses.
|
151
|
-
async def __anext__(self):
|
179
|
+
async def __anext__(self) -> bytes:
|
152
180
|
if self._complete:
|
153
181
|
raise StopAsyncIteration("Download complete")
|
154
182
|
if not self._iter_downloader:
|
@@ -167,7 +195,7 @@ class _AsyncChunkIterator(object):
|
|
167
195
|
|
168
196
|
try:
|
169
197
|
chunk = next(self._iter_chunks)
|
170
|
-
self._current_content += await self._iter_downloader.yield_chunk(chunk)
|
198
|
+
self._current_content += (await self._iter_downloader.yield_chunk(chunk))[0]
|
171
199
|
except StopIteration as exc:
|
172
200
|
self._complete = True
|
173
201
|
# it's likely that there some data left in self._current_content
|
@@ -177,46 +205,46 @@ class _AsyncChunkIterator(object):
|
|
177
205
|
|
178
206
|
return self._get_chunk_data()
|
179
207
|
|
180
|
-
def _get_chunk_data(self):
|
208
|
+
def _get_chunk_data(self) -> bytes:
|
181
209
|
chunk_data = self._current_content[: self._chunk_size]
|
182
210
|
self._current_content = self._current_content[self._chunk_size:]
|
183
211
|
return chunk_data
|
184
212
|
|
185
213
|
|
186
214
|
class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-attributes
|
187
|
-
"""A streaming object to download from Azure Storage.
|
188
|
-
|
189
|
-
:ivar str name:
|
190
|
-
The name of the blob being downloaded.
|
191
|
-
:ivar str container:
|
192
|
-
The name of the container where the blob is.
|
193
|
-
:ivar ~azure.storage.blob.BlobProperties properties:
|
194
|
-
The properties of the blob being downloaded. If only a range of the data is being
|
195
|
-
downloaded, this will be reflected in the properties.
|
196
|
-
:ivar int size:
|
197
|
-
The size of the total data in the stream. This will be the byte range if specified,
|
198
|
-
otherwise the total size of the blob.
|
199
215
|
"""
|
216
|
+
A streaming object to download from Azure Storage.
|
217
|
+
"""
|
218
|
+
|
219
|
+
name: str
|
220
|
+
"""The name of the blob being downloaded."""
|
221
|
+
container: str
|
222
|
+
"""The name of the container where the blob is."""
|
223
|
+
properties: "BlobProperties"
|
224
|
+
"""The properties of the blob being downloaded. If only a range of the data is being
|
225
|
+
downloaded, this will be reflected in the properties."""
|
226
|
+
size: int
|
227
|
+
"""The size of the total data in the stream. This will be the byte range if specified,
|
228
|
+
otherwise the total size of the blob."""
|
200
229
|
|
201
230
|
def __init__(
|
202
231
|
self,
|
203
|
-
clients=None,
|
204
|
-
config=None,
|
205
|
-
start_range=None,
|
206
|
-
end_range=None,
|
207
|
-
validate_content=None,
|
208
|
-
encryption_options=None,
|
209
|
-
max_concurrency=1,
|
210
|
-
name=None,
|
211
|
-
container=None,
|
212
|
-
encoding=None,
|
213
|
-
download_cls=None,
|
214
|
-
**kwargs
|
215
|
-
):
|
232
|
+
clients: "AzureBlobStorage" = None, # type: ignore [assignment]
|
233
|
+
config: "StorageConfiguration" = None, # type: ignore [assignment]
|
234
|
+
start_range: Optional[int] = None,
|
235
|
+
end_range: Optional[int] = None,
|
236
|
+
validate_content: bool = None, # type: ignore [assignment]
|
237
|
+
encryption_options: Dict[str, Any] = None, # type: ignore [assignment]
|
238
|
+
max_concurrency: int = 1,
|
239
|
+
name: str = None, # type: ignore [assignment]
|
240
|
+
container: str = None, # type: ignore [assignment]
|
241
|
+
encoding: Optional[str] = None,
|
242
|
+
download_cls: Optional[Callable] = None,
|
243
|
+
**kwargs: Any
|
244
|
+
) -> None:
|
216
245
|
self.name = name
|
217
246
|
self.container = container
|
218
|
-
self.
|
219
|
-
self.size = None
|
247
|
+
self.size = 0
|
220
248
|
|
221
249
|
self._clients = clients
|
222
250
|
self._config = config
|
@@ -228,38 +256,42 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
228
256
|
self._encryption_options = encryption_options or {}
|
229
257
|
self._progress_hook = kwargs.pop('progress_hook', None)
|
230
258
|
self._request_options = kwargs
|
259
|
+
self._response = None
|
231
260
|
self._location_mode = None
|
232
|
-
self.
|
233
|
-
self.
|
234
|
-
self._file_size = None
|
261
|
+
self._current_content: Union[str, bytes] = b''
|
262
|
+
self._file_size = 0
|
235
263
|
self._non_empty_ranges = None
|
236
|
-
self.
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
self.
|
264
|
+
self._encryption_data: Optional["_EncryptionData"] = None
|
265
|
+
|
266
|
+
# The content download offset, after any processing (decryption), in bytes
|
267
|
+
self._download_offset = 0
|
268
|
+
# The raw download offset, before processing (decryption), in bytes
|
269
|
+
self._raw_download_offset = 0
|
270
|
+
# The offset the stream has been read to in bytes or chars depending on mode
|
271
|
+
self._read_offset = 0
|
272
|
+
# The offset into current_content that has been consumed in bytes or chars depending on mode
|
273
|
+
self._current_content_offset = 0
|
274
|
+
|
275
|
+
self._text_mode: Optional[bool] = None
|
276
|
+
self._decoder: Optional["IncrementalDecoder"] = None
|
277
|
+
# Whether the current content is the first chunk of download content or not
|
278
|
+
self._first_chunk = True
|
279
|
+
self._download_start = self._start_range or 0
|
242
280
|
|
243
281
|
# The cls is passed in via download_cls to avoid conflicting arg name with Generic.__new__
|
244
282
|
# but needs to be changed to cls in the request options.
|
245
283
|
self._request_options['cls'] = download_cls
|
246
284
|
|
247
|
-
# The service only provides transactional MD5s for chunks under 4MB.
|
248
|
-
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
|
249
|
-
# chunk so a transactional MD5 can be retrieved.
|
250
|
-
self._first_get_size = self._config.max_single_get_size if not self._validate_content \
|
251
|
-
else self._config.max_chunk_get_size
|
252
|
-
|
253
285
|
def __len__(self):
|
254
286
|
return self.size
|
255
287
|
|
256
|
-
async def _get_encryption_data_request(self):
|
288
|
+
async def _get_encryption_data_request(self) -> None:
|
257
289
|
# Save current request cls
|
258
290
|
download_cls = self._request_options.pop('cls', None)
|
259
291
|
# Adjust cls for get_properties
|
260
292
|
self._request_options['cls'] = deserialize_blob_properties
|
261
293
|
|
262
|
-
properties = await self._clients.blob.get_properties(**self._request_options)
|
294
|
+
properties = cast("BlobProperties", await self._clients.blob.get_properties(**self._request_options))
|
263
295
|
# This will return None if there is no encryption metadata or there are parsing errors.
|
264
296
|
# That is acceptable here, the proper error will be caught and surfaced when attempting
|
265
297
|
# to decrypt the blob.
|
@@ -268,16 +300,23 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
268
300
|
# Restore cls for download
|
269
301
|
self._request_options['cls'] = download_cls
|
270
302
|
|
271
|
-
async def _setup(self):
|
303
|
+
async def _setup(self) -> None:
|
272
304
|
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
|
273
305
|
await self._get_encryption_data_request()
|
274
306
|
|
307
|
+
# The service only provides transactional MD5s for chunks under 4MB.
|
308
|
+
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
|
309
|
+
# chunk so a transactional MD5 can be retrieved.
|
310
|
+
first_get_size = (
|
311
|
+
self._config.max_single_get_size if not self._validate_content else self._config.max_chunk_get_size
|
312
|
+
)
|
275
313
|
initial_request_start = self._start_range if self._start_range is not None else 0
|
276
|
-
if self._end_range is not None and self._end_range -
|
314
|
+
if self._end_range is not None and self._end_range - initial_request_start < first_get_size:
|
277
315
|
initial_request_end = self._end_range
|
278
316
|
else:
|
279
|
-
initial_request_end = initial_request_start +
|
317
|
+
initial_request_end = initial_request_start + first_get_size - 1
|
280
318
|
|
319
|
+
# pylint: disable-next=attribute-defined-outside-init
|
281
320
|
self._initial_range, self._initial_offset = process_range_and_offset(
|
282
321
|
initial_request_start,
|
283
322
|
initial_request_end,
|
@@ -287,44 +326,26 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
287
326
|
)
|
288
327
|
|
289
328
|
self._response = await self._initial_request()
|
290
|
-
|
291
|
-
self.properties = self._response.properties
|
329
|
+
self.properties = cast("BlobProperties", self._response.properties) # type: ignore [attr-defined]
|
292
330
|
self.properties.name = self.name
|
293
331
|
self.properties.container = self.container
|
294
332
|
|
295
|
-
# Set the content length to the download size instead of the size of
|
296
|
-
# the last range
|
297
|
-
initial_size = self._response.properties.size
|
333
|
+
# Set the content length to the download size instead of the size of the last range
|
298
334
|
self.properties.size = self.size
|
299
|
-
|
300
|
-
|
301
|
-
|
335
|
+
self.properties.content_range = (f"bytes {self._download_start}-"
|
336
|
+
f"{self._end_range if self._end_range is not None else self._file_size - 1}/"
|
337
|
+
f"{self._file_size}")
|
302
338
|
|
303
339
|
# Overwrite the content MD5 as it is the MD5 for the last range instead
|
304
340
|
# of the stored MD5
|
305
341
|
# TODO: Set to the stored MD5 when the service returns this
|
306
|
-
self.properties.content_md5 = None
|
342
|
+
self.properties.content_md5 = None # type: ignore [attr-defined]
|
307
343
|
|
308
|
-
|
309
|
-
|
310
|
-
else:
|
311
|
-
self._current_content = await process_content(
|
312
|
-
self._response,
|
313
|
-
self._initial_offset[0],
|
314
|
-
self._initial_offset[1],
|
315
|
-
self._encryption_options
|
316
|
-
)
|
317
|
-
|
318
|
-
# If the file is small, the download is complete at this point.
|
319
|
-
# If file size is large, download the rest of the file in chunks.
|
320
|
-
# For encryption V2, calculate based on size of decrypted content, not download size.
|
344
|
+
@property
|
345
|
+
def _download_complete(self):
|
321
346
|
if is_encryption_v2(self._encryption_data):
|
322
|
-
|
323
|
-
|
324
|
-
self._download_complete = initial_size >= self.size
|
325
|
-
|
326
|
-
if not self._download_complete and self._request_options.get("modified_access_conditions"):
|
327
|
-
self._request_options["modified_access_conditions"].if_match = self._response.properties.etag
|
347
|
+
return self._download_offset >= self.size
|
348
|
+
return self._raw_download_offset >= self.size
|
328
349
|
|
329
350
|
async def _initial_request(self):
|
330
351
|
range_header, range_validation = validate_and_format_range_headers(
|
@@ -332,56 +353,80 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
332
353
|
self._initial_range[1],
|
333
354
|
start_range_required=False,
|
334
355
|
end_range_required=False,
|
335
|
-
check_content_md5=self._validate_content
|
356
|
+
check_content_md5=self._validate_content
|
357
|
+
)
|
336
358
|
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
#
|
360
|
-
self.
|
361
|
-
|
362
|
-
self.
|
363
|
-
|
364
|
-
|
359
|
+
retry_active = True
|
360
|
+
retry_total = 3
|
361
|
+
while retry_active:
|
362
|
+
try:
|
363
|
+
location_mode, response = cast(Tuple[Optional[str], Any], await self._clients.blob.download(
|
364
|
+
range=range_header,
|
365
|
+
range_get_content_md5=range_validation,
|
366
|
+
validate_content=self._validate_content,
|
367
|
+
data_stream_total=None,
|
368
|
+
download_stream_current=0,
|
369
|
+
**self._request_options
|
370
|
+
))
|
371
|
+
|
372
|
+
# Check the location we read from to ensure we use the same one
|
373
|
+
# for subsequent requests.
|
374
|
+
self._location_mode = location_mode
|
375
|
+
|
376
|
+
# Parse the total file size and adjust the download size if ranges
|
377
|
+
# were specified
|
378
|
+
self._file_size = parse_length_from_content_range(response.properties.content_range)
|
379
|
+
if self._file_size is None:
|
380
|
+
raise ValueError("Required Content-Range response header is missing or malformed.")
|
381
|
+
# Remove any extra encryption data size from blob size
|
382
|
+
self._file_size = adjust_blob_size_for_encryption(self._file_size, self._encryption_data)
|
383
|
+
|
384
|
+
if self._end_range is not None and self._start_range is not None:
|
385
|
+
# Use the length unless it is over the end of the file
|
386
|
+
self.size = min(self._file_size - self._start_range, self._end_range - self._start_range + 1)
|
387
|
+
elif self._start_range is not None:
|
388
|
+
self.size = self._file_size - self._start_range
|
389
|
+
else:
|
390
|
+
self.size = self._file_size
|
365
391
|
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
392
|
+
except HttpResponseError as error:
|
393
|
+
if self._start_range is None and error.response and error.status_code == 416:
|
394
|
+
# Get range will fail on an empty file. If the user did not
|
395
|
+
# request a range, do a regular get request in order to get
|
396
|
+
# any properties.
|
397
|
+
try:
|
398
|
+
_, response = cast(Tuple[Optional[Any], Any], await self._clients.blob.download(
|
399
|
+
validate_content=self._validate_content,
|
400
|
+
data_stream_total=0,
|
401
|
+
download_stream_current=0,
|
402
|
+
**self._request_options))
|
403
|
+
except HttpResponseError as e:
|
404
|
+
process_storage_error(e)
|
405
|
+
|
406
|
+
# Set the download size to empty
|
407
|
+
self.size = 0
|
408
|
+
self._file_size = 0
|
409
|
+
else:
|
410
|
+
process_storage_error(error)
|
411
|
+
|
412
|
+
try:
|
413
|
+
if self.size == 0:
|
414
|
+
self._current_content = b""
|
415
|
+
else:
|
416
|
+
self._current_content = await process_content(
|
417
|
+
response,
|
418
|
+
self._initial_offset[0],
|
419
|
+
self._initial_offset[1],
|
420
|
+
self._encryption_options
|
421
|
+
)
|
422
|
+
retry_active = False
|
423
|
+
except (IncompleteReadError, HttpResponseError, DecodeError) as error:
|
424
|
+
retry_total -= 1
|
425
|
+
if retry_total <= 0:
|
426
|
+
raise HttpResponseError(error, error=error) from error
|
427
|
+
await asyncio.sleep(1)
|
428
|
+
self._download_offset += len(self._current_content)
|
429
|
+
self._raw_download_offset += response.content_length
|
385
430
|
|
386
431
|
# get page ranges to optimize downloading sparse page blob
|
387
432
|
if response.properties.blob_type == 'PageBlob':
|
@@ -391,22 +436,18 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
391
436
|
except HttpResponseError:
|
392
437
|
pass
|
393
438
|
|
394
|
-
|
439
|
+
if not self._download_complete and self._request_options.get("modified_access_conditions"):
|
440
|
+
self._request_options["modified_access_conditions"].if_match = response.properties.etag
|
395
441
|
|
396
|
-
|
397
|
-
# Start where the initial request download ended
|
398
|
-
start = self._initial_range[1] + 1
|
399
|
-
# For encryption V2 only, adjust start to the end of the fetched data rather than download size
|
400
|
-
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
|
401
|
-
start = (self._start_range or 0) + len(self._current_content)
|
442
|
+
return response
|
402
443
|
|
403
|
-
|
404
|
-
|
405
|
-
|
444
|
+
def chunks(self) -> AsyncIterator[bytes]:
|
445
|
+
"""
|
446
|
+
Iterate over chunks in the download stream. Note, the iterator returned will
|
447
|
+
iterate over the entire download content, regardless of any data that was
|
448
|
+
previously read.
|
406
449
|
|
407
|
-
|
408
|
-
# type: () -> AsyncIterator[bytes]
|
409
|
-
"""Iterate over chunks in the download stream.
|
450
|
+
NOTE: If the stream has been partially read, some data may be re-downloaded by the iterator.
|
410
451
|
|
411
452
|
:returns: An async iterator of the chunks in the download stream.
|
412
453
|
:rtype: AsyncIterator[bytes]
|
@@ -420,79 +461,125 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
420
461
|
:dedent: 16
|
421
462
|
:caption: Download a blob using chunks().
|
422
463
|
"""
|
423
|
-
if self.
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
464
|
+
if self._text_mode:
|
465
|
+
raise ValueError("Stream has been partially read in text mode. chunks is not supported in text mode.")
|
466
|
+
if self._encoding:
|
467
|
+
warnings.warn("Encoding is ignored with chunks as only bytes are supported.")
|
468
|
+
|
469
|
+
iter_downloader = None
|
470
|
+
# If we still have the first chunk buffered, use it. Otherwise, download all content again
|
471
|
+
if not self._first_chunk or not self._download_complete:
|
472
|
+
if self._first_chunk:
|
473
|
+
start = self._download_start + len(self._current_content)
|
474
|
+
current_progress = len(self._current_content)
|
475
|
+
else:
|
476
|
+
start = self._download_start
|
477
|
+
current_progress = 0
|
478
|
+
|
479
|
+
end = self._download_start + self.size
|
480
|
+
|
435
481
|
iter_downloader = _AsyncChunkDownloader(
|
436
482
|
client=self._clients.blob,
|
437
483
|
non_empty_ranges=self._non_empty_ranges,
|
438
484
|
total_size=self.size,
|
439
485
|
chunk_size=self._config.max_chunk_get_size,
|
440
|
-
current_progress=
|
441
|
-
start_range=
|
442
|
-
end_range=
|
443
|
-
stream=None,
|
444
|
-
parallel=False,
|
486
|
+
current_progress=current_progress,
|
487
|
+
start_range=start,
|
488
|
+
end_range=end,
|
445
489
|
validate_content=self._validate_content,
|
446
490
|
encryption_options=self._encryption_options,
|
447
491
|
encryption_data=self._encryption_data,
|
448
492
|
use_location=self._location_mode,
|
449
|
-
**self._request_options
|
493
|
+
**self._request_options
|
494
|
+
)
|
495
|
+
|
496
|
+
initial_content = self._current_content if self._first_chunk else b''
|
450
497
|
return _AsyncChunkIterator(
|
451
498
|
size=self.size,
|
452
|
-
content=
|
499
|
+
content=cast(bytes, initial_content),
|
453
500
|
downloader=iter_downloader,
|
454
501
|
chunk_size=self._config.max_chunk_get_size)
|
455
502
|
|
456
|
-
|
503
|
+
@overload
|
504
|
+
async def read(self, size: int = -1) -> T:
|
505
|
+
...
|
506
|
+
|
507
|
+
@overload
|
508
|
+
async def read(self, *, chars: Optional[int] = None) -> T:
|
509
|
+
...
|
510
|
+
|
511
|
+
# pylint: disable-next=too-many-statements,too-many-branches
|
512
|
+
async def read(self, size: int = -1, *, chars: Optional[int] = None) -> T:
|
457
513
|
"""
|
458
|
-
Read
|
459
|
-
|
514
|
+
Read the specified bytes or chars from the stream. If `encoding`
|
515
|
+
was specified on `download_blob`, it is recommended to use the
|
516
|
+
chars parameter to read a specific number of chars to avoid decoding
|
517
|
+
errors. If size/chars is unspecified or negative all bytes will be read.
|
460
518
|
|
461
|
-
:param
|
519
|
+
:param int size:
|
462
520
|
The number of bytes to download from the stream. Leave unspecified
|
463
|
-
or set
|
521
|
+
or set negative to download all bytes.
|
522
|
+
:keyword Optional[int] chars:
|
523
|
+
The number of chars to download from the stream. Leave unspecified
|
524
|
+
or set negative to download all chars. Note, this can only be used
|
525
|
+
when encoding is specified on `download_blob`.
|
464
526
|
:returns:
|
465
527
|
The requested data as bytes or a string if encoding was specified. If
|
466
528
|
the return value is empty, there is no more data to read.
|
467
529
|
:rtype: T
|
468
530
|
"""
|
469
|
-
if size
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
read = stream.write(self._current_content[start:start + length])
|
483
|
-
|
484
|
-
remaining_size -= read
|
485
|
-
self._offset += read
|
486
|
-
if self._progress_hook:
|
487
|
-
await self._progress_hook(self._offset, self.size)
|
488
|
-
|
489
|
-
if remaining_size > 0:
|
490
|
-
start_range = self._get_downloader_start_with_offset()
|
531
|
+
if size > -1 and self._encoding:
|
532
|
+
warnings.warn(
|
533
|
+
"Size parameter specified with text encoding enabled. It is recommended to use chars "
|
534
|
+
"to read a specific number of characters instead."
|
535
|
+
)
|
536
|
+
if size > -1 and chars is not None:
|
537
|
+
raise ValueError("Cannot specify both size and chars.")
|
538
|
+
if not self._encoding and chars is not None:
|
539
|
+
raise ValueError("Must specify encoding to read chars.")
|
540
|
+
if self._text_mode and size > -1:
|
541
|
+
raise ValueError("Stream has been partially read in text mode. Please use chars.")
|
542
|
+
if self._text_mode is False and chars is not None:
|
543
|
+
raise ValueError("Stream has been partially read in bytes mode. Please use size.")
|
491
544
|
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
545
|
+
# Empty blob or already read to the end
|
546
|
+
if (size == 0 or chars == 0 or
|
547
|
+
(self._download_complete and self._current_content_offset >= len(self._current_content))):
|
548
|
+
return b'' if not self._encoding else '' # type: ignore [return-value]
|
549
|
+
|
550
|
+
if not self._text_mode and chars is not None and self._encoding is not None:
|
551
|
+
self._text_mode = True
|
552
|
+
self._decoder = codecs.getincrementaldecoder(self._encoding)('strict')
|
553
|
+
self._current_content = self._decoder.decode(
|
554
|
+
cast(bytes, self._current_content), final=self._download_complete)
|
555
|
+
elif self._text_mode is None:
|
556
|
+
self._text_mode = False
|
557
|
+
|
558
|
+
output_stream: Union[BytesIO, StringIO]
|
559
|
+
if self._text_mode:
|
560
|
+
output_stream = StringIO()
|
561
|
+
size = chars if chars else sys.maxsize
|
562
|
+
else:
|
563
|
+
output_stream = BytesIO()
|
564
|
+
size = size if size > 0 else sys.maxsize
|
565
|
+
readall = size == sys.maxsize
|
566
|
+
count = 0
|
567
|
+
|
568
|
+
# Start by reading from current_content
|
569
|
+
start = self._current_content_offset
|
570
|
+
length = min(len(self._current_content) - self._current_content_offset, size - count)
|
571
|
+
read = output_stream.write(self._current_content[start:start + length]) # type: ignore [arg-type]
|
572
|
+
|
573
|
+
count += read
|
574
|
+
self._current_content_offset += read
|
575
|
+
self._read_offset += read
|
576
|
+
await self._check_and_report_progress()
|
577
|
+
|
578
|
+
remaining = size - count
|
579
|
+
if remaining > 0 and not self._download_complete:
|
580
|
+
# Create a downloader than can download the rest of the file
|
581
|
+
start = self._download_start + self._download_offset
|
582
|
+
end = self._download_start + self.size
|
496
583
|
|
497
584
|
parallel = self._max_concurrency > 1
|
498
585
|
downloader = _AsyncChunkDownloader(
|
@@ -500,10 +587,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
500
587
|
non_empty_ranges=self._non_empty_ranges,
|
501
588
|
total_size=self.size,
|
502
589
|
chunk_size=self._config.max_chunk_get_size,
|
503
|
-
current_progress=self.
|
504
|
-
start_range=
|
505
|
-
end_range=
|
506
|
-
stream=
|
590
|
+
current_progress=self._read_offset,
|
591
|
+
start_range=start,
|
592
|
+
end_range=end,
|
593
|
+
stream=output_stream,
|
507
594
|
parallel=parallel,
|
508
595
|
validate_content=self._validate_content,
|
509
596
|
encryption_options=self._encryption_options,
|
@@ -512,43 +599,77 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
512
599
|
progress_hook=self._progress_hook,
|
513
600
|
**self._request_options
|
514
601
|
)
|
602
|
+
self._first_chunk = False
|
603
|
+
|
604
|
+
# When reading all data, have the downloader read everything into the stream.
|
605
|
+
# Else, read one chunk at a time (using the downloader as an iterator) until
|
606
|
+
# the requested size is reached.
|
607
|
+
chunks_iter = downloader.get_chunk_offsets()
|
608
|
+
if readall and not self._text_mode:
|
609
|
+
running_futures: Any = [
|
610
|
+
asyncio.ensure_future(downloader.process_chunk(d))
|
611
|
+
for d in islice(chunks_iter, 0, self._max_concurrency)
|
612
|
+
]
|
613
|
+
while running_futures:
|
614
|
+
# Wait for some download to finish before adding a new one
|
615
|
+
done, running_futures = await asyncio.wait(
|
616
|
+
running_futures, return_when=asyncio.FIRST_COMPLETED)
|
617
|
+
try:
|
618
|
+
for task in done:
|
619
|
+
task.result()
|
620
|
+
except HttpResponseError as error:
|
621
|
+
process_storage_error(error)
|
622
|
+
try:
|
623
|
+
for _ in range(0, len(done)):
|
624
|
+
next_chunk = next(chunks_iter)
|
625
|
+
running_futures.add(asyncio.ensure_future(downloader.process_chunk(next_chunk)))
|
626
|
+
except StopIteration:
|
627
|
+
break
|
628
|
+
|
629
|
+
if running_futures:
|
630
|
+
# Wait for the remaining downloads to finish
|
631
|
+
done, _running_futures = await asyncio.wait(running_futures)
|
632
|
+
try:
|
633
|
+
for task in done:
|
634
|
+
task.result()
|
635
|
+
except HttpResponseError as error:
|
636
|
+
process_storage_error(error)
|
637
|
+
|
638
|
+
self._complete_read()
|
515
639
|
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
640
|
+
else:
|
641
|
+
while (chunk := next(chunks_iter, None)) is not None and remaining > 0:
|
642
|
+
chunk_data, content_length = await downloader.yield_chunk(chunk)
|
643
|
+
self._download_offset += len(chunk_data)
|
644
|
+
self._raw_download_offset += content_length
|
645
|
+
if self._text_mode and self._decoder is not None:
|
646
|
+
self._current_content = self._decoder.decode(chunk_data, final=self._download_complete)
|
647
|
+
else:
|
648
|
+
self._current_content = chunk_data
|
649
|
+
|
650
|
+
if remaining < len(self._current_content):
|
651
|
+
read = output_stream.write(self._current_content[:remaining]) # type: ignore [arg-type]
|
652
|
+
else:
|
653
|
+
read = output_stream.write(self._current_content) # type: ignore [arg-type]
|
654
|
+
|
655
|
+
self._current_content_offset = read
|
656
|
+
self._read_offset += read
|
657
|
+
remaining -= read
|
658
|
+
await self._check_and_report_progress()
|
659
|
+
|
660
|
+
data = output_stream.getvalue()
|
661
|
+
if not self._text_mode and self._encoding:
|
662
|
+
try:
|
663
|
+
# This is technically incorrect to do, but we have it for backwards compatibility.
|
664
|
+
data = cast(bytes, data).decode(self._encoding)
|
665
|
+
except UnicodeDecodeError:
|
666
|
+
warnings.warn(
|
667
|
+
"Encountered a decoding error while decoding blob data from a partial read. "
|
668
|
+
"Try using the `chars` keyword instead to read in text mode."
|
669
|
+
)
|
670
|
+
raise
|
547
671
|
|
548
|
-
data
|
549
|
-
if self._encoding:
|
550
|
-
return data.decode(self._encoding)
|
551
|
-
return data
|
672
|
+
return data # type: ignore [return-value]
|
552
673
|
|
553
674
|
async def readall(self) -> T:
|
554
675
|
"""
|
@@ -558,53 +679,7 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
558
679
|
:returns: The requested data as bytes or a string if encoding was specified.
|
559
680
|
:rtype: T
|
560
681
|
"""
|
561
|
-
|
562
|
-
await self.readinto(stream)
|
563
|
-
data = stream.getvalue()
|
564
|
-
if self._encoding:
|
565
|
-
return data.decode(self._encoding)
|
566
|
-
return data
|
567
|
-
|
568
|
-
async def content_as_bytes(self, max_concurrency=1):
|
569
|
-
"""DEPRECATED: Download the contents of this file.
|
570
|
-
|
571
|
-
This operation is blocking until all data is downloaded.
|
572
|
-
|
573
|
-
This method is deprecated, use func:`readall` instead.
|
574
|
-
|
575
|
-
:param int max_concurrency:
|
576
|
-
The number of parallel connections with which to download.
|
577
|
-
:returns: The contents of the file as bytes.
|
578
|
-
:rtype: bytes
|
579
|
-
"""
|
580
|
-
warnings.warn(
|
581
|
-
"content_as_bytes is deprecated, use readall instead",
|
582
|
-
DeprecationWarning
|
583
|
-
)
|
584
|
-
self._max_concurrency = max_concurrency
|
585
|
-
return await self.readall()
|
586
|
-
|
587
|
-
async def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
|
588
|
-
"""DEPRECATED: Download the contents of this blob, and decode as text.
|
589
|
-
|
590
|
-
This operation is blocking until all data is downloaded.
|
591
|
-
|
592
|
-
This method is deprecated, use func:`readall` instead.
|
593
|
-
|
594
|
-
:param int max_concurrency:
|
595
|
-
The number of parallel connections with which to download.
|
596
|
-
:param str encoding:
|
597
|
-
Test encoding to decode the downloaded bytes. Default is UTF-8.
|
598
|
-
:returns: The content of the file as a str.
|
599
|
-
:rtype: str
|
600
|
-
"""
|
601
|
-
warnings.warn(
|
602
|
-
"content_as_text is deprecated, use readall instead",
|
603
|
-
DeprecationWarning
|
604
|
-
)
|
605
|
-
self._max_concurrency = max_concurrency
|
606
|
-
self._encoding = encoding
|
607
|
-
return await self.readall()
|
682
|
+
return await self.read()
|
608
683
|
|
609
684
|
async def readinto(self, stream: IO[bytes]) -> int:
|
610
685
|
"""Download the contents of this blob to a stream.
|
@@ -616,6 +691,11 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
616
691
|
:returns: The number of bytes read.
|
617
692
|
:rtype: int
|
618
693
|
"""
|
694
|
+
if self._text_mode:
|
695
|
+
raise ValueError("Stream has been partially read in text mode. readinto is not supported in text mode.")
|
696
|
+
if self._encoding:
|
697
|
+
warnings.warn("Encoding is ignored with readinto as only byte streams are supported.")
|
698
|
+
|
619
699
|
# the stream must be seekable if parallel download is required
|
620
700
|
parallel = self._max_concurrency > 1
|
621
701
|
if parallel:
|
@@ -629,35 +709,34 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
629
709
|
raise ValueError(error_message) from exc
|
630
710
|
|
631
711
|
# If some data has been streamed using `read`, only stream the remaining data
|
632
|
-
remaining_size = self.size - self.
|
712
|
+
remaining_size = self.size - self._read_offset
|
633
713
|
# Already read to the end
|
634
714
|
if remaining_size <= 0:
|
635
715
|
return 0
|
636
716
|
|
637
|
-
# Write the content to the user stream
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
717
|
+
# Write the current content to the user stream
|
718
|
+
current_remaining = len(self._current_content) - self._current_content_offset
|
719
|
+
start = self._current_content_offset
|
720
|
+
count = stream.write(cast(bytes, self._current_content[start:start + current_remaining]))
|
721
|
+
|
722
|
+
self._current_content_offset += count
|
723
|
+
self._read_offset += count
|
724
|
+
if self._progress_hook:
|
725
|
+
await self._progress_hook(self._read_offset, self.size)
|
644
726
|
|
727
|
+
# If all the data was already downloaded/buffered
|
645
728
|
if self._download_complete:
|
646
729
|
return remaining_size
|
647
730
|
|
648
|
-
|
649
|
-
|
650
|
-
# Use the length unless it is over the end of the file
|
651
|
-
data_end = min(self._file_size, self._end_range + 1)
|
652
|
-
|
653
|
-
data_start = self._get_downloader_start_with_offset()
|
731
|
+
data_start = self._download_start + self._read_offset
|
732
|
+
data_end = self._download_start + self.size
|
654
733
|
|
655
734
|
downloader = _AsyncChunkDownloader(
|
656
735
|
client=self._clients.blob,
|
657
736
|
non_empty_ranges=self._non_empty_ranges,
|
658
737
|
total_size=self.size,
|
659
738
|
chunk_size=self._config.max_chunk_get_size,
|
660
|
-
current_progress=self.
|
739
|
+
current_progress=self._read_offset,
|
661
740
|
start_range=data_start,
|
662
741
|
end_range=data_end,
|
663
742
|
stream=stream,
|
@@ -667,13 +746,14 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
667
746
|
encryption_data=self._encryption_data,
|
668
747
|
use_location=self._location_mode,
|
669
748
|
progress_hook=self._progress_hook,
|
670
|
-
**self._request_options
|
749
|
+
**self._request_options
|
750
|
+
)
|
671
751
|
|
672
752
|
dl_tasks = downloader.get_chunk_offsets()
|
673
|
-
running_futures =
|
753
|
+
running_futures = {
|
674
754
|
asyncio.ensure_future(downloader.process_chunk(d))
|
675
755
|
for d in islice(dl_tasks, 0, self._max_concurrency)
|
676
|
-
|
756
|
+
}
|
677
757
|
while running_futures:
|
678
758
|
# Wait for some download to finish before adding a new one
|
679
759
|
done, running_futures = await asyncio.wait(
|
@@ -699,8 +779,72 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
699
779
|
except HttpResponseError as error:
|
700
780
|
process_storage_error(error)
|
701
781
|
|
782
|
+
self._complete_read()
|
702
783
|
return remaining_size
|
703
784
|
|
785
|
+
def _complete_read(self):
|
786
|
+
"""Adjusts all offsets to the end of the download."""
|
787
|
+
self._download_offset = self.size
|
788
|
+
self._raw_download_offset = self.size
|
789
|
+
self._read_offset = self.size
|
790
|
+
self._current_content_offset = len(self._current_content)
|
791
|
+
|
792
|
+
async def _check_and_report_progress(self):
|
793
|
+
"""Reports progress if necessary."""
|
794
|
+
# Only report progress at the end of each chunk and use download_offset to always report
|
795
|
+
# progress in terms of (approximate) byte count.
|
796
|
+
if self._progress_hook and self._current_content_offset == len(self._current_content):
|
797
|
+
await self._progress_hook(self._download_offset, self.size)
|
798
|
+
|
799
|
+
async def content_as_bytes(self, max_concurrency=1):
|
800
|
+
"""DEPRECATED: Download the contents of this file.
|
801
|
+
|
802
|
+
This operation is blocking until all data is downloaded.
|
803
|
+
|
804
|
+
This method is deprecated, use func:`readall` instead.
|
805
|
+
|
806
|
+
:param int max_concurrency:
|
807
|
+
The number of parallel connections with which to download.
|
808
|
+
:returns: The contents of the file as bytes.
|
809
|
+
:rtype: bytes
|
810
|
+
"""
|
811
|
+
warnings.warn(
|
812
|
+
"content_as_bytes is deprecated, use readall instead",
|
813
|
+
DeprecationWarning
|
814
|
+
)
|
815
|
+
if self._text_mode:
|
816
|
+
raise ValueError("Stream has been partially read in text mode. "
|
817
|
+
"content_as_bytes is not supported in text mode.")
|
818
|
+
|
819
|
+
self._max_concurrency = max_concurrency
|
820
|
+
return await self.readall()
|
821
|
+
|
822
|
+
async def content_as_text(self, max_concurrency=1, encoding="UTF-8"):
|
823
|
+
"""DEPRECATED: Download the contents of this blob, and decode as text.
|
824
|
+
|
825
|
+
This operation is blocking until all data is downloaded.
|
826
|
+
|
827
|
+
This method is deprecated, use func:`readall` instead.
|
828
|
+
|
829
|
+
:param int max_concurrency:
|
830
|
+
The number of parallel connections with which to download.
|
831
|
+
:param str encoding:
|
832
|
+
Test encoding to decode the downloaded bytes. Default is UTF-8.
|
833
|
+
:returns: The content of the file as a str.
|
834
|
+
:rtype: str
|
835
|
+
"""
|
836
|
+
warnings.warn(
|
837
|
+
"content_as_text is deprecated, use readall instead",
|
838
|
+
DeprecationWarning
|
839
|
+
)
|
840
|
+
if self._text_mode:
|
841
|
+
raise ValueError("Stream has been partially read in text mode. "
|
842
|
+
"content_as_text is not supported in text mode.")
|
843
|
+
|
844
|
+
self._max_concurrency = max_concurrency
|
845
|
+
self._encoding = encoding
|
846
|
+
return await self.readall()
|
847
|
+
|
704
848
|
async def download_to_stream(self, stream, max_concurrency=1):
|
705
849
|
"""DEPRECATED: Download the contents of this blob to a stream.
|
706
850
|
|
@@ -719,6 +863,10 @@ class StorageStreamDownloader(Generic[T]): # pylint: disable=too-many-instance-
|
|
719
863
|
"download_to_stream is deprecated, use readinto instead",
|
720
864
|
DeprecationWarning
|
721
865
|
)
|
866
|
+
if self._text_mode:
|
867
|
+
raise ValueError("Stream has been partially read in text mode. "
|
868
|
+
"download_to_stream is not supported in text mode.")
|
869
|
+
|
722
870
|
self._max_concurrency = max_concurrency
|
723
871
|
await self.readinto(stream)
|
724
872
|
return self.properties
|