modal 0.74.14__py3-none-any.whl → 0.74.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modal/_utils/blob_utils.py +138 -6
- modal/cli/volume.py +13 -5
- modal/client.pyi +2 -2
- modal/volume.py +276 -7
- modal/volume.pyi +179 -39
- {modal-0.74.14.dist-info → modal-0.74.16.dist-info}/METADATA +1 -1
- {modal-0.74.14.dist-info → modal-0.74.16.dist-info}/RECORD +19 -19
- modal_proto/api.proto +27 -0
- modal_proto/api_grpc.py +32 -0
- modal_proto/api_pb2.py +813 -773
- modal_proto/api_pb2.pyi +79 -0
- modal_proto/api_pb2_grpc.py +69 -0
- modal_proto/api_pb2_grpc.pyi +28 -0
- modal_proto/modal_api_grpc.py +2 -0
- modal_version/_version_generated.py +1 -1
- {modal-0.74.14.dist-info → modal-0.74.16.dist-info}/WHEEL +0 -0
- {modal-0.74.14.dist-info → modal-0.74.16.dist-info}/entry_points.txt +0 -0
- {modal-0.74.14.dist-info → modal-0.74.16.dist-info}/licenses/LICENSE +0 -0
- {modal-0.74.14.dist-info → modal-0.74.16.dist-info}/top_level.txt +0 -0
modal/_utils/blob_utils.py
CHANGED
@@ -1,15 +1,25 @@
|
|
1
1
|
# Copyright Modal Labs 2022
|
2
2
|
import asyncio
|
3
3
|
import dataclasses
|
4
|
+
import functools
|
4
5
|
import hashlib
|
5
|
-
import io
|
6
6
|
import os
|
7
7
|
import platform
|
8
8
|
import time
|
9
9
|
from collections.abc import AsyncIterator
|
10
10
|
from contextlib import AbstractContextManager, contextmanager
|
11
|
+
from io import BytesIO, FileIO
|
11
12
|
from pathlib import Path, PurePosixPath
|
12
|
-
from typing import
|
13
|
+
from typing import (
|
14
|
+
TYPE_CHECKING,
|
15
|
+
Any,
|
16
|
+
BinaryIO,
|
17
|
+
Callable,
|
18
|
+
ContextManager,
|
19
|
+
Optional,
|
20
|
+
Union,
|
21
|
+
cast,
|
22
|
+
)
|
13
23
|
from urllib.parse import urlparse
|
14
24
|
|
15
25
|
from modal_proto import api_pb2
|
@@ -43,6 +53,9 @@ DEFAULT_SEGMENT_CHUNK_SIZE = 2**24
|
|
43
53
|
# TODO(dano): remove this once we stop requiring md5 for blobs
|
44
54
|
MULTIPART_UPLOAD_THRESHOLD = 1024**3
|
45
55
|
|
56
|
+
# For block based storage like volumefs2: the size of a block
|
57
|
+
BLOCK_SIZE: int = 8 * 1024 * 1024
|
58
|
+
|
46
59
|
|
47
60
|
@retry(n_attempts=5, base_delay=0.5, timeout=None)
|
48
61
|
async def _upload_to_s3_url(
|
@@ -94,7 +107,7 @@ async def _upload_to_s3_url(
|
|
94
107
|
|
95
108
|
|
96
109
|
async def perform_multipart_upload(
|
97
|
-
data_file: Union[BinaryIO,
|
110
|
+
data_file: Union[BinaryIO, BytesIO, FileIO],
|
98
111
|
*,
|
99
112
|
content_length: int,
|
100
113
|
max_part_size: int,
|
@@ -112,9 +125,9 @@ async def perform_multipart_upload(
|
|
112
125
|
# Give each part its own IO reader object to avoid needing to
|
113
126
|
# lock access to the reader's position pointer.
|
114
127
|
data_file_readers: list[BinaryIO]
|
115
|
-
if isinstance(data_file,
|
128
|
+
if isinstance(data_file, BytesIO):
|
116
129
|
view = data_file.getbuffer() # does not copy data
|
117
|
-
data_file_readers = [
|
130
|
+
data_file_readers = [BytesIO(view) for _ in range(len(part_urls))]
|
118
131
|
else:
|
119
132
|
filename = data_file.name
|
120
133
|
data_file_readers = [open(filename, "rb") for _ in range(len(part_urls))]
|
@@ -174,7 +187,7 @@ async def _blob_upload(
|
|
174
187
|
upload_hashes: UploadHashes, data: Union[bytes, BinaryIO], stub, progress_report_cb: Optional[Callable] = None
|
175
188
|
) -> str:
|
176
189
|
if isinstance(data, bytes):
|
177
|
-
data =
|
190
|
+
data = BytesIO(data)
|
178
191
|
|
179
192
|
content_length = get_content_length(data)
|
180
193
|
|
@@ -368,6 +381,125 @@ def get_file_upload_spec_from_fileobj(fp: BinaryIO, mount_filename: PurePosixPat
|
|
368
381
|
mode,
|
369
382
|
)
|
370
383
|
|
384
|
+
_FileUploadSource2 = Callable[[], ContextManager[BinaryIO]]
|
385
|
+
|
386
|
+
@dataclasses.dataclass
|
387
|
+
class FileUploadSpec2:
|
388
|
+
source: _FileUploadSource2
|
389
|
+
source_description: Union[str, Path]
|
390
|
+
|
391
|
+
path: str
|
392
|
+
# Raw (unencoded 32 byte) SHA256 sum per 8MiB file block
|
393
|
+
blocks_sha256: list[bytes]
|
394
|
+
mode: int # file permission bits (last 12 bits of st_mode)
|
395
|
+
size: int
|
396
|
+
|
397
|
+
|
398
|
+
@staticmethod
|
399
|
+
async def from_path(
|
400
|
+
filename: Path,
|
401
|
+
mount_filename: PurePosixPath,
|
402
|
+
mode: Optional[int] = None,
|
403
|
+
) -> "FileUploadSpec2":
|
404
|
+
# Python appears to give files 0o666 bits on Windows (equal for user, group, and global),
|
405
|
+
# so we mask those out to 0o755 for compatibility with POSIX-based permissions.
|
406
|
+
mode = mode or os.stat(filename).st_mode & (0o7777 if platform.system() != "Windows" else 0o7755)
|
407
|
+
|
408
|
+
def source():
|
409
|
+
return open(filename, "rb")
|
410
|
+
|
411
|
+
return await FileUploadSpec2._create(
|
412
|
+
source,
|
413
|
+
filename,
|
414
|
+
mount_filename,
|
415
|
+
mode,
|
416
|
+
)
|
417
|
+
|
418
|
+
|
419
|
+
@staticmethod
|
420
|
+
async def from_fileobj(
|
421
|
+
source_fp: Union[BinaryIO, BytesIO],
|
422
|
+
mount_filename: PurePosixPath,
|
423
|
+
mode: int
|
424
|
+
) -> "FileUploadSpec2":
|
425
|
+
try:
|
426
|
+
fileno = source_fp.fileno()
|
427
|
+
def source():
|
428
|
+
new_fd = os.dup(fileno)
|
429
|
+
fp = os.fdopen(new_fd, "rb")
|
430
|
+
fp.seek(0)
|
431
|
+
return fp
|
432
|
+
|
433
|
+
except OSError:
|
434
|
+
# `.fileno()` not available; assume BytesIO-like type
|
435
|
+
source_fp = cast(BytesIO, source_fp)
|
436
|
+
buffer = source_fp.getbuffer()
|
437
|
+
def source():
|
438
|
+
return BytesIO(buffer)
|
439
|
+
|
440
|
+
return await FileUploadSpec2._create(
|
441
|
+
source,
|
442
|
+
str(source),
|
443
|
+
mount_filename,
|
444
|
+
mode,
|
445
|
+
)
|
446
|
+
|
447
|
+
|
448
|
+
@staticmethod
|
449
|
+
async def _create(
|
450
|
+
source: _FileUploadSource2,
|
451
|
+
source_description: Union[str, Path],
|
452
|
+
mount_filename: PurePosixPath,
|
453
|
+
mode: int,
|
454
|
+
) -> "FileUploadSpec2":
|
455
|
+
# Current position is ignored - we always upload from position 0
|
456
|
+
with source() as source_fp:
|
457
|
+
source_fp.seek(0, os.SEEK_END)
|
458
|
+
size = source_fp.tell()
|
459
|
+
|
460
|
+
blocks_sha256 = await hash_blocks_sha256(source, size)
|
461
|
+
|
462
|
+
return FileUploadSpec2(
|
463
|
+
source=source,
|
464
|
+
source_description=source_description,
|
465
|
+
path=mount_filename.as_posix(),
|
466
|
+
blocks_sha256=blocks_sha256,
|
467
|
+
mode=mode & 0o7777,
|
468
|
+
size=size,
|
469
|
+
)
|
470
|
+
|
471
|
+
|
472
|
+
async def hash_blocks_sha256(
|
473
|
+
source: _FileUploadSource2,
|
474
|
+
size: int,
|
475
|
+
) -> list[bytes]:
|
476
|
+
def ceildiv(a: int, b: int) -> int:
|
477
|
+
return -(a // -b)
|
478
|
+
|
479
|
+
num_blocks = ceildiv(size, BLOCK_SIZE)
|
480
|
+
|
481
|
+
def hash_block_sha256(block_idx: int) -> bytes:
|
482
|
+
sha256_hash = hashlib.sha256()
|
483
|
+
block_start = block_idx * BLOCK_SIZE
|
484
|
+
|
485
|
+
with source() as block_fp:
|
486
|
+
block_fp.seek(block_start)
|
487
|
+
|
488
|
+
num_bytes_read = 0
|
489
|
+
while num_bytes_read < BLOCK_SIZE:
|
490
|
+
chunk = block_fp.read(BLOCK_SIZE - num_bytes_read)
|
491
|
+
|
492
|
+
if not chunk:
|
493
|
+
break
|
494
|
+
|
495
|
+
num_bytes_read += len(chunk)
|
496
|
+
sha256_hash.update(chunk)
|
497
|
+
|
498
|
+
return sha256_hash.digest()
|
499
|
+
|
500
|
+
tasks = (asyncio.to_thread(functools.partial(hash_block_sha256, idx)) for idx in range(num_blocks))
|
501
|
+
return await asyncio.gather(*tasks)
|
502
|
+
|
371
503
|
|
372
504
|
def use_md5(url: str) -> bool:
|
373
505
|
"""This takes an upload URL in S3 and returns whether we should attach a checksum.
|
modal/cli/volume.py
CHANGED
@@ -19,7 +19,7 @@ from modal.cli._download import _volume_download
|
|
19
19
|
from modal.cli.utils import ENV_OPTION, YES_OPTION, display_table, timestamp_to_local
|
20
20
|
from modal.client import _Client
|
21
21
|
from modal.environments import ensure_env
|
22
|
-
from modal.volume import
|
22
|
+
from modal.volume import _AbstractVolumeUploadContextManager, _Volume
|
23
23
|
from modal_proto import api_pb2
|
24
24
|
|
25
25
|
volume_cli = Typer(
|
@@ -198,8 +198,12 @@ async def put(
|
|
198
198
|
if Path(local_path).is_dir():
|
199
199
|
with progress_handler.live:
|
200
200
|
try:
|
201
|
-
async with
|
202
|
-
vol.
|
201
|
+
async with _AbstractVolumeUploadContextManager.resolve(
|
202
|
+
vol._metadata.version,
|
203
|
+
vol.object_id,
|
204
|
+
vol._client,
|
205
|
+
progress_cb=progress_handler.progress,
|
206
|
+
force=force
|
203
207
|
) as batch:
|
204
208
|
batch.put_directory(local_path, remote_path)
|
205
209
|
except FileExistsError as exc:
|
@@ -210,8 +214,12 @@ async def put(
|
|
210
214
|
else:
|
211
215
|
with progress_handler.live:
|
212
216
|
try:
|
213
|
-
async with
|
214
|
-
vol.
|
217
|
+
async with _AbstractVolumeUploadContextManager.resolve(
|
218
|
+
vol._metadata.version,
|
219
|
+
vol.object_id,
|
220
|
+
vol._client,
|
221
|
+
progress_cb=progress_handler.progress,
|
222
|
+
force=force
|
215
223
|
) as batch:
|
216
224
|
batch.put_file(local_path, remote_path)
|
217
225
|
|
modal/client.pyi
CHANGED
@@ -27,7 +27,7 @@ class _Client:
|
|
27
27
|
_snapshotted: bool
|
28
28
|
|
29
29
|
def __init__(
|
30
|
-
self, server_url: str, client_type: int, credentials: typing.Optional[tuple[str, str]], version: str = "0.74.
|
30
|
+
self, server_url: str, client_type: int, credentials: typing.Optional[tuple[str, str]], version: str = "0.74.16"
|
31
31
|
): ...
|
32
32
|
def is_closed(self) -> bool: ...
|
33
33
|
@property
|
@@ -85,7 +85,7 @@ class Client:
|
|
85
85
|
_snapshotted: bool
|
86
86
|
|
87
87
|
def __init__(
|
88
|
-
self, server_url: str, client_type: int, credentials: typing.Optional[tuple[str, str]], version: str = "0.74.
|
88
|
+
self, server_url: str, client_type: int, credentials: typing.Optional[tuple[str, str]], version: str = "0.74.16"
|
89
89
|
): ...
|
90
90
|
def is_closed(self) -> bool: ...
|
91
91
|
@property
|
modal/volume.py
CHANGED
@@ -10,16 +10,19 @@ import time
|
|
10
10
|
import typing
|
11
11
|
from collections.abc import AsyncGenerator, AsyncIterator, Generator, Sequence
|
12
12
|
from dataclasses import dataclass
|
13
|
+
from io import BytesIO
|
13
14
|
from pathlib import Path, PurePosixPath
|
14
15
|
from typing import (
|
15
16
|
IO,
|
16
17
|
Any,
|
18
|
+
Awaitable,
|
17
19
|
BinaryIO,
|
18
20
|
Callable,
|
19
21
|
Optional,
|
20
22
|
Union,
|
21
23
|
)
|
22
24
|
|
25
|
+
from google.protobuf.message import Message
|
23
26
|
from grpclib import GRPCError, Status
|
24
27
|
from synchronicity.async_wrap import asynccontextmanager
|
25
28
|
|
@@ -31,7 +34,9 @@ from ._object import EPHEMERAL_OBJECT_HEARTBEAT_SLEEP, _get_environment_name, _O
|
|
31
34
|
from ._resolver import Resolver
|
32
35
|
from ._utils.async_utils import TaskContext, aclosing, async_map, asyncnullcontext, synchronize_api
|
33
36
|
from ._utils.blob_utils import (
|
37
|
+
BLOCK_SIZE,
|
34
38
|
FileUploadSpec,
|
39
|
+
FileUploadSpec2,
|
35
40
|
blob_iter,
|
36
41
|
blob_upload_file,
|
37
42
|
get_file_upload_spec_from_fileobj,
|
@@ -39,6 +44,7 @@ from ._utils.blob_utils import (
|
|
39
44
|
)
|
40
45
|
from ._utils.deprecation import deprecation_error, deprecation_warning, renamed_parameter
|
41
46
|
from ._utils.grpc_utils import retry_transient_errors
|
47
|
+
from ._utils.http_utils import ClientSessionRegistry
|
42
48
|
from ._utils.name_utils import check_object_name
|
43
49
|
from .client import _Client
|
44
50
|
from .config import logger
|
@@ -119,6 +125,7 @@ class _Volume(_Object, type_prefix="vo"):
|
|
119
125
|
"""
|
120
126
|
|
121
127
|
_lock: Optional[asyncio.Lock] = None
|
128
|
+
_metadata: "typing.Optional[api_pb2.VolumeMetadata]"
|
122
129
|
|
123
130
|
async def _get_lock(self):
|
124
131
|
# To (mostly*) prevent multiple concurrent operations on the same volume, which can cause problems under
|
@@ -171,10 +178,21 @@ class _Volume(_Object, type_prefix="vo"):
|
|
171
178
|
version=version,
|
172
179
|
)
|
173
180
|
response = await resolver.client.stub.VolumeGetOrCreate(req)
|
174
|
-
self._hydrate(response.volume_id, resolver.client,
|
181
|
+
self._hydrate(response.volume_id, resolver.client, response.metadata)
|
175
182
|
|
176
183
|
return _Volume._from_loader(_load, "Volume()", hydrate_lazily=True)
|
177
184
|
|
185
|
+
def _hydrate_metadata(self, metadata: Optional[Message]):
|
186
|
+
if metadata and isinstance(metadata, api_pb2.VolumeMetadata):
|
187
|
+
self._metadata = metadata
|
188
|
+
else:
|
189
|
+
raise TypeError(
|
190
|
+
"_hydrate_metadata() requires an `api_pb2.VolumeMetadata` to determine volume version"
|
191
|
+
)
|
192
|
+
|
193
|
+
def _get_metadata(self) -> Optional[Message]:
|
194
|
+
return self._metadata
|
195
|
+
|
178
196
|
@classmethod
|
179
197
|
@asynccontextmanager
|
180
198
|
async def ephemeral(
|
@@ -209,7 +227,7 @@ class _Volume(_Object, type_prefix="vo"):
|
|
209
227
|
async with TaskContext() as tc:
|
210
228
|
request = api_pb2.VolumeHeartbeatRequest(volume_id=response.volume_id)
|
211
229
|
tc.infinite_loop(lambda: client.stub.VolumeHeartbeat(request), sleep=_heartbeat_sleep)
|
212
|
-
yield cls._new_hydrated(response.volume_id, client,
|
230
|
+
yield cls._new_hydrated(response.volume_id, client, response.metadata, is_another_app=True)
|
213
231
|
|
214
232
|
@staticmethod
|
215
233
|
@renamed_parameter((2024, 12, 18), "label", "name")
|
@@ -481,7 +499,7 @@ class _Volume(_Object, type_prefix="vo"):
|
|
481
499
|
await retry_transient_errors(self._client.stub.VolumeCopyFiles, request, base_delay=1)
|
482
500
|
|
483
501
|
@live_method
|
484
|
-
async def batch_upload(self, force: bool = False) -> "
|
502
|
+
async def batch_upload(self, force: bool = False) -> "_AbstractVolumeUploadContextManager":
|
485
503
|
"""
|
486
504
|
Initiate a batched upload to a volume.
|
487
505
|
|
@@ -499,7 +517,13 @@ class _Volume(_Object, type_prefix="vo"):
|
|
499
517
|
batch.put_file(io.BytesIO(b"some data"), "/foobar")
|
500
518
|
```
|
501
519
|
"""
|
502
|
-
return
|
520
|
+
return _AbstractVolumeUploadContextManager.resolve(
|
521
|
+
self._metadata.version,
|
522
|
+
self.object_id,
|
523
|
+
self._client,
|
524
|
+
force=force
|
525
|
+
)
|
526
|
+
|
503
527
|
|
504
528
|
@live_method
|
505
529
|
async def _instance_delete(self):
|
@@ -527,7 +551,57 @@ class _Volume(_Object, type_prefix="vo"):
|
|
527
551
|
await retry_transient_errors(obj._client.stub.VolumeRename, req)
|
528
552
|
|
529
553
|
|
530
|
-
|
554
|
+
Volume = synchronize_api(_Volume)
|
555
|
+
|
556
|
+
# TODO(dflemstr): Find a way to add ABC or AbstractAsyncContextManager superclasses while keeping synchronicity happy.
|
557
|
+
class _AbstractVolumeUploadContextManager:
|
558
|
+
async def __aenter__(self):
|
559
|
+
...
|
560
|
+
|
561
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
562
|
+
...
|
563
|
+
|
564
|
+
|
565
|
+
def put_file(
|
566
|
+
self,
|
567
|
+
local_file: Union[Path, str, BinaryIO, BytesIO],
|
568
|
+
remote_path: Union[PurePosixPath, str],
|
569
|
+
mode: Optional[int] = None,
|
570
|
+
):
|
571
|
+
...
|
572
|
+
|
573
|
+
def put_directory(
|
574
|
+
self,
|
575
|
+
local_path: Union[Path, str],
|
576
|
+
remote_path: Union[PurePosixPath, str],
|
577
|
+
recursive: bool = True,
|
578
|
+
):
|
579
|
+
...
|
580
|
+
|
581
|
+
@staticmethod
|
582
|
+
def resolve(
|
583
|
+
version: "modal_proto.api_pb2.VolumeFsVersion.ValueType",
|
584
|
+
object_id: str,
|
585
|
+
client,
|
586
|
+
progress_cb: Optional[Callable[..., Any]] = None,
|
587
|
+
force: bool = False
|
588
|
+
) -> "_AbstractVolumeUploadContextManager":
|
589
|
+
|
590
|
+
if version in [
|
591
|
+
None,
|
592
|
+
api_pb2.VolumeFsVersion.VOLUME_FS_VERSION_UNSPECIFIED,
|
593
|
+
api_pb2.VolumeFsVersion.VOLUME_FS_VERSION_V1
|
594
|
+
]:
|
595
|
+
return _VolumeUploadContextManager(object_id, client, progress_cb=progress_cb, force=force)
|
596
|
+
elif version == api_pb2.VolumeFsVersion.VOLUME_FS_VERSION_V2:
|
597
|
+
return _VolumeUploadContextManager2(object_id, client, progress_cb=progress_cb, force=force)
|
598
|
+
else:
|
599
|
+
raise RuntimeError(f"unsupported volume version: {version}")
|
600
|
+
|
601
|
+
|
602
|
+
AbstractVolumeUploadContextManager = synchronize_api(_AbstractVolumeUploadContextManager)
|
603
|
+
|
604
|
+
class _VolumeUploadContextManager(_AbstractVolumeUploadContextManager):
|
531
605
|
"""Context manager for batch-uploading files to a Volume."""
|
532
606
|
|
533
607
|
_volume_id: str
|
@@ -585,7 +659,7 @@ class _VolumeUploadContextManager:
|
|
585
659
|
|
586
660
|
def put_file(
|
587
661
|
self,
|
588
|
-
local_file: Union[Path, str, BinaryIO],
|
662
|
+
local_file: Union[Path, str, BinaryIO, BytesIO],
|
589
663
|
remote_path: Union[PurePosixPath, str],
|
590
664
|
mode: Optional[int] = None,
|
591
665
|
):
|
@@ -678,9 +752,204 @@ class _VolumeUploadContextManager:
|
|
678
752
|
)
|
679
753
|
|
680
754
|
|
681
|
-
Volume = synchronize_api(_Volume)
|
682
755
|
VolumeUploadContextManager = synchronize_api(_VolumeUploadContextManager)
|
683
756
|
|
757
|
+
_FileUploader2 = Callable[[], Awaitable[FileUploadSpec2]]
|
758
|
+
|
759
|
+
class _VolumeUploadContextManager2(_AbstractVolumeUploadContextManager):
|
760
|
+
"""Context manager for batch-uploading files to a Volume version 2."""
|
761
|
+
|
762
|
+
_volume_id: str
|
763
|
+
_client: _Client
|
764
|
+
_force: bool
|
765
|
+
_progress_cb: Callable[..., Any]
|
766
|
+
_uploader_generators: list[Generator[_FileUploader2]]
|
767
|
+
|
768
|
+
def __init__(
|
769
|
+
self, volume_id: str, client: _Client, progress_cb: Optional[Callable[..., Any]] = None, force: bool = False
|
770
|
+
):
|
771
|
+
"""mdmd:hidden"""
|
772
|
+
self._volume_id = volume_id
|
773
|
+
self._client = client
|
774
|
+
self._uploader_generators = []
|
775
|
+
self._progress_cb = progress_cb or (lambda *_, **__: None)
|
776
|
+
self._force = force
|
777
|
+
|
778
|
+
async def __aenter__(self):
|
779
|
+
return self
|
780
|
+
|
781
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
782
|
+
if not exc_val:
|
783
|
+
# Flatten all the uploads yielded by the upload generators in the batch
|
784
|
+
def gen_upload_providers():
|
785
|
+
for gen in self._uploader_generators:
|
786
|
+
yield from gen
|
787
|
+
|
788
|
+
async def gen_file_upload_specs() -> list[FileUploadSpec2]:
|
789
|
+
uploads = [asyncio.create_task(fut()) for fut in gen_upload_providers()]
|
790
|
+
logger.debug(f"Computing checksums for {len(uploads)} files")
|
791
|
+
|
792
|
+
file_specs = []
|
793
|
+
for file_spec in asyncio.as_completed(uploads):
|
794
|
+
file_specs.append(await file_spec)
|
795
|
+
return file_specs
|
796
|
+
|
797
|
+
upload_specs = await gen_file_upload_specs()
|
798
|
+
await self._put_file_specs(upload_specs)
|
799
|
+
|
800
|
+
|
801
|
+
def put_file(
|
802
|
+
self,
|
803
|
+
local_file: Union[Path, str, BinaryIO, BytesIO],
|
804
|
+
remote_path: Union[PurePosixPath, str],
|
805
|
+
mode: Optional[int] = None,
|
806
|
+
):
|
807
|
+
"""Upload a file from a local file or file-like object.
|
808
|
+
|
809
|
+
Will create any needed parent directories automatically.
|
810
|
+
|
811
|
+
If `local_file` is a file-like object it must remain readable for the lifetime of the batch.
|
812
|
+
"""
|
813
|
+
remote_path = PurePosixPath(remote_path).as_posix()
|
814
|
+
if remote_path.endswith("/"):
|
815
|
+
raise ValueError(f"remote_path ({remote_path}) must refer to a file - cannot end with /")
|
816
|
+
|
817
|
+
def gen():
|
818
|
+
if isinstance(local_file, str) or isinstance(local_file, Path):
|
819
|
+
yield lambda: FileUploadSpec2.from_path(local_file, PurePosixPath(remote_path), mode)
|
820
|
+
else:
|
821
|
+
yield lambda: FileUploadSpec2.from_fileobj(local_file, PurePosixPath(remote_path), mode or 0o644)
|
822
|
+
|
823
|
+
self._uploader_generators.append(gen())
|
824
|
+
|
825
|
+
def put_directory(
|
826
|
+
self,
|
827
|
+
local_path: Union[Path, str],
|
828
|
+
remote_path: Union[PurePosixPath, str],
|
829
|
+
recursive: bool = True,
|
830
|
+
):
|
831
|
+
"""
|
832
|
+
Upload all files in a local directory.
|
833
|
+
|
834
|
+
Will create any needed parent directories automatically.
|
835
|
+
"""
|
836
|
+
local_path = Path(local_path)
|
837
|
+
assert local_path.is_dir()
|
838
|
+
remote_path = PurePosixPath(remote_path)
|
839
|
+
|
840
|
+
def create_spec(subpath):
|
841
|
+
relpath_str = subpath.relative_to(local_path)
|
842
|
+
return lambda: FileUploadSpec2.from_path(subpath, remote_path / relpath_str)
|
843
|
+
|
844
|
+
def gen():
|
845
|
+
glob = local_path.rglob("*") if recursive else local_path.glob("*")
|
846
|
+
for subpath in glob:
|
847
|
+
# Skip directories and unsupported file types (e.g. block devices)
|
848
|
+
if subpath.is_file():
|
849
|
+
yield create_spec(subpath)
|
850
|
+
|
851
|
+
self._uploader_generators.append(gen())
|
852
|
+
|
853
|
+
async def _put_file_specs(self, file_specs: list[FileUploadSpec2]):
|
854
|
+
put_responses = {}
|
855
|
+
# num_blocks_total = sum(len(file_spec.blocks_sha256) for file_spec in file_specs)
|
856
|
+
|
857
|
+
# We should only need two iterations: Once to possibly get some missing_blocks; the second time we should have
|
858
|
+
# all blocks uploaded
|
859
|
+
for _ in range(2):
|
860
|
+
files = []
|
861
|
+
|
862
|
+
for file_spec in file_specs:
|
863
|
+
blocks = [
|
864
|
+
api_pb2.VolumePutFiles2Request.Block(
|
865
|
+
contents_sha256=block_sha256,
|
866
|
+
put_response=put_responses.get(block_sha256)
|
867
|
+
) for block_sha256 in file_spec.blocks_sha256
|
868
|
+
]
|
869
|
+
files.append(api_pb2.VolumePutFiles2Request.File(
|
870
|
+
path=file_spec.path,
|
871
|
+
mode=file_spec.mode,
|
872
|
+
size=file_spec.size,
|
873
|
+
blocks=blocks
|
874
|
+
))
|
875
|
+
|
876
|
+
request = api_pb2.VolumePutFiles2Request(
|
877
|
+
volume_id=self._volume_id,
|
878
|
+
files=files,
|
879
|
+
disallow_overwrite_existing_files=not self._force,
|
880
|
+
)
|
881
|
+
|
882
|
+
try:
|
883
|
+
response = await retry_transient_errors(self._client.stub.VolumePutFiles2, request, base_delay=1)
|
884
|
+
except GRPCError as exc:
|
885
|
+
raise FileExistsError(exc.message) if exc.status == Status.ALREADY_EXISTS else exc
|
886
|
+
|
887
|
+
if not response.missing_blocks:
|
888
|
+
break
|
889
|
+
|
890
|
+
await _put_missing_blocks(file_specs, response.missing_blocks, put_responses, self._progress_cb)
|
891
|
+
else:
|
892
|
+
raise RuntimeError("Did not succeed at uploading all files despite supplying all missing blocks")
|
893
|
+
|
894
|
+
self._progress_cb(complete=True)
|
895
|
+
|
896
|
+
|
897
|
+
VolumeUploadContextManager2 = synchronize_api(_VolumeUploadContextManager2)
|
898
|
+
|
899
|
+
|
900
|
+
async def _put_missing_blocks(
|
901
|
+
file_specs: list[FileUploadSpec2],
|
902
|
+
# TODO(dflemstr): Element type is `api_pb2.VolumePutFiles2Response.MissingBlock` but synchronicity gets confused
|
903
|
+
# by the nested class (?)
|
904
|
+
missing_blocks: list,
|
905
|
+
put_responses: dict[bytes, bytes],
|
906
|
+
progress_cb: Callable[..., Any]
|
907
|
+
):
|
908
|
+
async def put_missing_block(
|
909
|
+
# TODO(dflemstr): Type is `api_pb2.VolumePutFiles2Response.MissingBlock` but synchronicity gets confused
|
910
|
+
# by the nested class (?)
|
911
|
+
missing_block
|
912
|
+
) -> (bytes, bytes):
|
913
|
+
# Lazily import to keep the eager loading time of this module down
|
914
|
+
from ._utils.bytes_io_segment_payload import BytesIOSegmentPayload
|
915
|
+
|
916
|
+
assert isinstance(missing_block, api_pb2.VolumePutFiles2Response.MissingBlock)
|
917
|
+
|
918
|
+
file_spec = file_specs[missing_block.file_index]
|
919
|
+
# TODO(dflemstr): What if the underlying file has changed here in the meantime; should we check the
|
920
|
+
# hash again just to be sure?
|
921
|
+
block_sha256 = file_spec.blocks_sha256[missing_block.block_index]
|
922
|
+
block_start = missing_block.block_index * BLOCK_SIZE
|
923
|
+
block_length = min(BLOCK_SIZE, file_spec.size - block_start)
|
924
|
+
|
925
|
+
progress_name = f"{file_spec.path} block {missing_block.block_index + 1} / {len(file_spec.blocks_sha256)}"
|
926
|
+
progress_task_id = progress_cb(name=progress_name, size=file_spec.size)
|
927
|
+
|
928
|
+
with file_spec.source() as source_fp:
|
929
|
+
payload = BytesIOSegmentPayload(
|
930
|
+
source_fp,
|
931
|
+
block_start,
|
932
|
+
block_length,
|
933
|
+
progress_report_cb=functools.partial(progress_cb, progress_task_id)
|
934
|
+
)
|
935
|
+
|
936
|
+
async with ClientSessionRegistry.get_session().put(
|
937
|
+
missing_block.put_url,
|
938
|
+
data=payload,
|
939
|
+
) as response:
|
940
|
+
response.raise_for_status()
|
941
|
+
resp_data = await response.content.read()
|
942
|
+
|
943
|
+
return block_sha256, resp_data
|
944
|
+
|
945
|
+
tasks = [
|
946
|
+
asyncio.create_task(put_missing_block(missing_block))
|
947
|
+
for missing_block in missing_blocks
|
948
|
+
]
|
949
|
+
for task_result in asyncio.as_completed(tasks):
|
950
|
+
digest, resp = await task_result
|
951
|
+
put_responses[digest] = resp
|
952
|
+
|
684
953
|
|
685
954
|
def _open_files_error_annotation(mount_path: str) -> Optional[str]:
|
686
955
|
if platform.system() != "Linux":
|