modal 1.0.3.dev10__py3-none-any.whl → 1.2.3.dev7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of modal might be problematic. Click here for more details.
- modal/__init__.py +0 -2
- modal/__main__.py +3 -4
- modal/_billing.py +80 -0
- modal/_clustered_functions.py +7 -3
- modal/_clustered_functions.pyi +15 -3
- modal/_container_entrypoint.py +51 -69
- modal/_functions.py +508 -240
- modal/_grpc_client.py +171 -0
- modal/_load_context.py +105 -0
- modal/_object.py +81 -21
- modal/_output.py +58 -45
- modal/_partial_function.py +48 -73
- modal/_pty.py +7 -3
- modal/_resolver.py +26 -46
- modal/_runtime/asgi.py +4 -3
- modal/_runtime/container_io_manager.py +358 -220
- modal/_runtime/container_io_manager.pyi +296 -101
- modal/_runtime/execution_context.py +18 -2
- modal/_runtime/execution_context.pyi +64 -7
- modal/_runtime/gpu_memory_snapshot.py +262 -57
- modal/_runtime/user_code_imports.py +28 -58
- modal/_serialization.py +90 -6
- modal/_traceback.py +42 -1
- modal/_tunnel.pyi +380 -12
- modal/_utils/async_utils.py +84 -29
- modal/_utils/auth_token_manager.py +111 -0
- modal/_utils/blob_utils.py +181 -58
- modal/_utils/deprecation.py +19 -0
- modal/_utils/function_utils.py +91 -47
- modal/_utils/grpc_utils.py +89 -66
- modal/_utils/mount_utils.py +26 -1
- modal/_utils/name_utils.py +17 -3
- modal/_utils/task_command_router_client.py +536 -0
- modal/_utils/time_utils.py +34 -6
- modal/app.py +256 -88
- modal/app.pyi +909 -92
- modal/billing.py +5 -0
- modal/builder/2025.06.txt +18 -0
- modal/builder/PREVIEW.txt +18 -0
- modal/builder/base-images.json +58 -0
- modal/cli/_download.py +19 -3
- modal/cli/_traceback.py +3 -2
- modal/cli/app.py +4 -4
- modal/cli/cluster.py +15 -7
- modal/cli/config.py +5 -3
- modal/cli/container.py +7 -6
- modal/cli/dict.py +22 -16
- modal/cli/entry_point.py +12 -5
- modal/cli/environment.py +5 -4
- modal/cli/import_refs.py +3 -3
- modal/cli/launch.py +102 -5
- modal/cli/network_file_system.py +11 -12
- modal/cli/profile.py +3 -2
- modal/cli/programs/launch_instance_ssh.py +94 -0
- modal/cli/programs/run_jupyter.py +1 -1
- modal/cli/programs/run_marimo.py +95 -0
- modal/cli/programs/vscode.py +1 -1
- modal/cli/queues.py +57 -26
- modal/cli/run.py +91 -23
- modal/cli/secret.py +48 -22
- modal/cli/token.py +7 -8
- modal/cli/utils.py +4 -7
- modal/cli/volume.py +31 -25
- modal/client.py +15 -85
- modal/client.pyi +183 -62
- modal/cloud_bucket_mount.py +5 -3
- modal/cloud_bucket_mount.pyi +197 -5
- modal/cls.py +200 -126
- modal/cls.pyi +446 -68
- modal/config.py +29 -11
- modal/container_process.py +319 -19
- modal/container_process.pyi +190 -20
- modal/dict.py +290 -71
- modal/dict.pyi +835 -83
- modal/environments.py +15 -27
- modal/environments.pyi +46 -24
- modal/exception.py +14 -2
- modal/experimental/__init__.py +194 -40
- modal/experimental/flash.py +618 -0
- modal/experimental/flash.pyi +380 -0
- modal/experimental/ipython.py +11 -7
- modal/file_io.py +29 -36
- modal/file_io.pyi +251 -53
- modal/file_pattern_matcher.py +56 -16
- modal/functions.pyi +673 -92
- modal/gpu.py +1 -1
- modal/image.py +528 -176
- modal/image.pyi +1572 -145
- modal/io_streams.py +458 -128
- modal/io_streams.pyi +433 -52
- modal/mount.py +216 -151
- modal/mount.pyi +225 -78
- modal/network_file_system.py +45 -62
- modal/network_file_system.pyi +277 -56
- modal/object.pyi +93 -17
- modal/parallel_map.py +942 -129
- modal/parallel_map.pyi +294 -15
- modal/partial_function.py +0 -2
- modal/partial_function.pyi +234 -19
- modal/proxy.py +17 -8
- modal/proxy.pyi +36 -3
- modal/queue.py +270 -65
- modal/queue.pyi +817 -57
- modal/runner.py +115 -101
- modal/runner.pyi +205 -49
- modal/sandbox.py +512 -136
- modal/sandbox.pyi +845 -111
- modal/schedule.py +1 -1
- modal/secret.py +300 -70
- modal/secret.pyi +589 -34
- modal/serving.py +7 -11
- modal/serving.pyi +7 -8
- modal/snapshot.py +11 -8
- modal/snapshot.pyi +25 -4
- modal/token_flow.py +4 -4
- modal/token_flow.pyi +28 -8
- modal/volume.py +416 -158
- modal/volume.pyi +1117 -121
- {modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/METADATA +10 -9
- modal-1.2.3.dev7.dist-info/RECORD +195 -0
- modal_docs/mdmd/mdmd.py +17 -4
- modal_proto/api.proto +534 -79
- modal_proto/api_grpc.py +337 -1
- modal_proto/api_pb2.py +1522 -968
- modal_proto/api_pb2.pyi +1619 -134
- modal_proto/api_pb2_grpc.py +699 -4
- modal_proto/api_pb2_grpc.pyi +226 -14
- modal_proto/modal_api_grpc.py +175 -154
- modal_proto/sandbox_router.proto +145 -0
- modal_proto/sandbox_router_grpc.py +105 -0
- modal_proto/sandbox_router_pb2.py +149 -0
- modal_proto/sandbox_router_pb2.pyi +333 -0
- modal_proto/sandbox_router_pb2_grpc.py +203 -0
- modal_proto/sandbox_router_pb2_grpc.pyi +75 -0
- modal_proto/task_command_router.proto +144 -0
- modal_proto/task_command_router_grpc.py +105 -0
- modal_proto/task_command_router_pb2.py +149 -0
- modal_proto/task_command_router_pb2.pyi +333 -0
- modal_proto/task_command_router_pb2_grpc.py +203 -0
- modal_proto/task_command_router_pb2_grpc.pyi +75 -0
- modal_version/__init__.py +1 -1
- modal/requirements/PREVIEW.txt +0 -16
- modal/requirements/base-images.json +0 -26
- modal-1.0.3.dev10.dist-info/RECORD +0 -179
- modal_proto/modal_options_grpc.py +0 -3
- modal_proto/options.proto +0 -19
- modal_proto/options_grpc.py +0 -3
- modal_proto/options_pb2.py +0 -35
- modal_proto/options_pb2.pyi +0 -20
- modal_proto/options_pb2_grpc.py +0 -4
- modal_proto/options_pb2_grpc.pyi +0 -7
- /modal/{requirements → builder}/2023.12.312.txt +0 -0
- /modal/{requirements → builder}/2023.12.txt +0 -0
- /modal/{requirements → builder}/2024.04.txt +0 -0
- /modal/{requirements → builder}/2024.10.txt +0 -0
- /modal/{requirements → builder}/README.md +0 -0
- {modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/WHEEL +0 -0
- {modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/entry_points.txt +0 -0
- {modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/licenses/LICENSE +0 -0
- {modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/top_level.txt +0 -0
modal/io_streams.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
# Copyright Modal Labs 2022
|
|
2
2
|
import asyncio
|
|
3
|
+
import codecs
|
|
4
|
+
import time
|
|
3
5
|
from collections.abc import AsyncGenerator, AsyncIterator
|
|
6
|
+
from dataclasses import dataclass
|
|
4
7
|
from typing import (
|
|
5
8
|
TYPE_CHECKING,
|
|
6
9
|
Generic,
|
|
@@ -14,12 +17,14 @@ from typing import (
|
|
|
14
17
|
from grpclib import Status
|
|
15
18
|
from grpclib.exceptions import GRPCError, StreamTerminatedError
|
|
16
19
|
|
|
17
|
-
from modal.exception import ClientClosed, InvalidError
|
|
20
|
+
from modal.exception import ClientClosed, ExecTimeoutError, InvalidError
|
|
18
21
|
from modal_proto import api_pb2
|
|
19
22
|
|
|
20
23
|
from ._utils.async_utils import synchronize_api
|
|
21
|
-
from ._utils.grpc_utils import RETRYABLE_GRPC_STATUS_CODES
|
|
24
|
+
from ._utils.grpc_utils import RETRYABLE_GRPC_STATUS_CODES
|
|
25
|
+
from ._utils.task_command_router_client import TaskCommandRouterClient
|
|
22
26
|
from .client import _Client
|
|
27
|
+
from .config import logger
|
|
23
28
|
from .stream_type import StreamType
|
|
24
29
|
|
|
25
30
|
if TYPE_CHECKING:
|
|
@@ -50,6 +55,7 @@ async def _container_process_logs_iterator(
|
|
|
50
55
|
file_descriptor: "api_pb2.FileDescriptor.ValueType",
|
|
51
56
|
client: _Client,
|
|
52
57
|
last_index: int,
|
|
58
|
+
deadline: Optional[float] = None,
|
|
53
59
|
) -> AsyncGenerator[tuple[Optional[bytes], int], None]:
|
|
54
60
|
req = api_pb2.ContainerExecGetOutputRequest(
|
|
55
61
|
exec_id=process_id,
|
|
@@ -58,40 +64,33 @@ async def _container_process_logs_iterator(
|
|
|
58
64
|
get_raw_bytes=True,
|
|
59
65
|
last_batch_index=last_index,
|
|
60
66
|
)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
67
|
+
stream = client.stub.ContainerExecGetOutput.unary_stream(req)
|
|
68
|
+
while True:
|
|
69
|
+
# Check deadline before attempting to receive the next batch
|
|
70
|
+
try:
|
|
71
|
+
remaining = (deadline - time.monotonic()) if deadline else None
|
|
72
|
+
batch = await asyncio.wait_for(stream.__anext__(), timeout=remaining)
|
|
73
|
+
except asyncio.TimeoutError:
|
|
74
|
+
yield None, -1
|
|
64
75
|
break
|
|
76
|
+
except StopAsyncIteration:
|
|
77
|
+
break
|
|
78
|
+
|
|
65
79
|
for item in batch.items:
|
|
66
80
|
yield item.message_bytes, batch.batch_index
|
|
67
81
|
|
|
82
|
+
if batch.HasField("exit_code"):
|
|
83
|
+
yield None, batch.batch_index
|
|
84
|
+
break
|
|
68
85
|
|
|
69
|
-
T = TypeVar("T", str, bytes)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
class _StreamReader(Generic[T]):
|
|
73
|
-
"""Retrieve logs from a stream (`stdout` or `stderr`).
|
|
74
86
|
|
|
75
|
-
|
|
76
|
-
statements. Just loop over the object to read in chunks.
|
|
87
|
+
T = TypeVar("T", str, bytes)
|
|
77
88
|
|
|
78
|
-
**Usage**
|
|
79
89
|
|
|
80
|
-
|
|
81
|
-
from
|
|
90
|
+
class _StreamReaderThroughServer(Generic[T]):
|
|
91
|
+
"""A StreamReader implementation that reads from the server."""
|
|
82
92
|
|
|
83
|
-
|
|
84
|
-
"bash",
|
|
85
|
-
"-c",
|
|
86
|
-
"for i in $(seq 1 10); do echo foo; sleep 0.1; done",
|
|
87
|
-
app=running_app,
|
|
88
|
-
)
|
|
89
|
-
for message in sandbox.stdout:
|
|
90
|
-
print(f"Message: {message}")
|
|
91
|
-
```
|
|
92
|
-
"""
|
|
93
|
-
|
|
94
|
-
_stream: Optional[AsyncGenerator[Optional[bytes], None]]
|
|
93
|
+
_stream: Optional[AsyncGenerator[T, None]]
|
|
95
94
|
|
|
96
95
|
def __init__(
|
|
97
96
|
self,
|
|
@@ -102,6 +101,7 @@ class _StreamReader(Generic[T]):
|
|
|
102
101
|
stream_type: StreamType = StreamType.PIPE,
|
|
103
102
|
text: bool = True,
|
|
104
103
|
by_line: bool = False,
|
|
104
|
+
deadline: Optional[float] = None,
|
|
105
105
|
) -> None:
|
|
106
106
|
"""mdmd:hidden"""
|
|
107
107
|
self._file_descriptor = file_descriptor
|
|
@@ -111,16 +111,13 @@ class _StreamReader(Generic[T]):
|
|
|
111
111
|
self._stream = None
|
|
112
112
|
self._last_entry_id: str = ""
|
|
113
113
|
self._line_buffer = b""
|
|
114
|
+
self._deadline = deadline
|
|
114
115
|
|
|
115
116
|
# Sandbox logs are streamed to the client as strings, so StreamReaders reading
|
|
116
117
|
# them must have text mode enabled.
|
|
117
118
|
if object_type == "sandbox" and not text:
|
|
118
119
|
raise ValueError("Sandbox streams must have text mode enabled.")
|
|
119
120
|
|
|
120
|
-
# line-buffering is only supported when text=True
|
|
121
|
-
if by_line and not text:
|
|
122
|
-
raise ValueError("line-buffering is only supported when text=True")
|
|
123
|
-
|
|
124
121
|
self._text = text
|
|
125
122
|
self._by_line = by_line
|
|
126
123
|
|
|
@@ -138,10 +135,9 @@ class _StreamReader(Generic[T]):
|
|
|
138
135
|
self._stream_type = stream_type
|
|
139
136
|
|
|
140
137
|
if self._object_type == "container_process":
|
|
141
|
-
#
|
|
142
|
-
#
|
|
143
|
-
#
|
|
144
|
-
self._container_process_buffer: list[Optional[bytes]] = []
|
|
138
|
+
# TODO: we should not have this async code in constructors!
|
|
139
|
+
# it only works as long as all the construction happens inside of synchronicity code
|
|
140
|
+
self._container_process_buffer: list[Optional[bytes]] = [] # TODO: change this to an asyncio.Queue
|
|
145
141
|
self._consume_container_process_task = asyncio.create_task(self._consume_container_process_stream())
|
|
146
142
|
|
|
147
143
|
@property
|
|
@@ -150,32 +146,19 @@ class _StreamReader(Generic[T]):
|
|
|
150
146
|
return self._file_descriptor
|
|
151
147
|
|
|
152
148
|
async def read(self) -> T:
|
|
153
|
-
"""Fetch the entire contents of the stream until EOF.
|
|
154
|
-
|
|
155
|
-
**Usage**
|
|
156
|
-
|
|
157
|
-
```python fixture:running_app
|
|
158
|
-
from modal import Sandbox
|
|
159
|
-
|
|
160
|
-
sandbox = Sandbox.create("echo", "hello", app=running_app)
|
|
161
|
-
sandbox.wait()
|
|
162
|
-
|
|
163
|
-
print(sandbox.stdout.read())
|
|
164
|
-
```
|
|
165
|
-
"""
|
|
166
|
-
data_str = ""
|
|
167
|
-
data_bytes = b""
|
|
168
|
-
async for message in self._get_logs():
|
|
169
|
-
if message is None:
|
|
170
|
-
break
|
|
171
|
-
if self._text:
|
|
172
|
-
data_str += message.decode("utf-8")
|
|
173
|
-
else:
|
|
174
|
-
data_bytes += message
|
|
175
|
-
|
|
149
|
+
"""Fetch the entire contents of the stream until EOF."""
|
|
150
|
+
logger.debug(f"{self._object_id} StreamReader fd={self._file_descriptor} read starting")
|
|
176
151
|
if self._text:
|
|
152
|
+
data_str = ""
|
|
153
|
+
async for message in _decode_bytes_stream_to_str(self._get_logs()):
|
|
154
|
+
data_str += message
|
|
155
|
+
logger.debug(f"{self._object_id} StreamReader fd={self._file_descriptor} read completed after EOF")
|
|
177
156
|
return cast(T, data_str)
|
|
178
157
|
else:
|
|
158
|
+
data_bytes = b""
|
|
159
|
+
async for message in self._get_logs():
|
|
160
|
+
data_bytes += message
|
|
161
|
+
logger.debug(f"{self._object_id} StreamReader fd={self._file_descriptor} read completed after EOF")
|
|
179
162
|
return cast(T, data_bytes)
|
|
180
163
|
|
|
181
164
|
async def _consume_container_process_stream(self):
|
|
@@ -187,13 +170,15 @@ class _StreamReader(Generic[T]):
|
|
|
187
170
|
retries_remaining = 10
|
|
188
171
|
last_index = 0
|
|
189
172
|
while not completed:
|
|
173
|
+
if self._deadline and time.monotonic() >= self._deadline:
|
|
174
|
+
break
|
|
190
175
|
try:
|
|
191
176
|
iterator = _container_process_logs_iterator(
|
|
192
|
-
self._object_id, self._file_descriptor, self._client, last_index
|
|
177
|
+
self._object_id, self._file_descriptor, self._client, last_index, self._deadline
|
|
193
178
|
)
|
|
194
|
-
|
|
195
179
|
async for message, batch_index in iterator:
|
|
196
180
|
if self._stream_type == StreamType.STDOUT and message:
|
|
181
|
+
# TODO: rearchitect this, since these bytes aren't necessarily decodable
|
|
197
182
|
print(message.decode("utf-8"), end="")
|
|
198
183
|
elif self._stream_type == StreamType.PIPE:
|
|
199
184
|
self._container_process_buffer.append(message)
|
|
@@ -216,10 +201,14 @@ class _StreamReader(Generic[T]):
|
|
|
216
201
|
elif isinstance(exc, ClientClosed):
|
|
217
202
|
# If the client was closed, the user has triggered a cleanup.
|
|
218
203
|
break
|
|
204
|
+
logger.error(f"{self._object_id} stream read failure while consuming process output: {exc}")
|
|
219
205
|
raise exc
|
|
220
206
|
|
|
221
207
|
async def _stream_container_process(self) -> AsyncGenerator[tuple[Optional[bytes], str], None]:
|
|
222
208
|
"""Streams the container process buffer to the reader."""
|
|
209
|
+
# Container process streams need to be consumed as they are produced,
|
|
210
|
+
# otherwise the process will block. Use a buffer to store the stream
|
|
211
|
+
# until the client consumes it.
|
|
223
212
|
entry_id = 0
|
|
224
213
|
if self._last_entry_id:
|
|
225
214
|
entry_id = int(self._last_entry_id) + 1
|
|
@@ -237,7 +226,7 @@ class _StreamReader(Generic[T]):
|
|
|
237
226
|
|
|
238
227
|
entry_id += 1
|
|
239
228
|
|
|
240
|
-
async def _get_logs(self, skip_empty_messages: bool = True) -> AsyncGenerator[
|
|
229
|
+
async def _get_logs(self, skip_empty_messages: bool = True) -> AsyncGenerator[bytes, None]:
|
|
241
230
|
"""Streams sandbox or process logs from the server to the reader.
|
|
242
231
|
|
|
243
232
|
Logs returned by this method may contain partial or multiple lines at a time.
|
|
@@ -249,7 +238,6 @@ class _StreamReader(Generic[T]):
|
|
|
249
238
|
raise InvalidError("Logs can only be retrieved using the PIPE stream type.")
|
|
250
239
|
|
|
251
240
|
if self.eof:
|
|
252
|
-
yield None
|
|
253
241
|
return
|
|
254
242
|
|
|
255
243
|
completed = False
|
|
@@ -271,10 +259,12 @@ class _StreamReader(Generic[T]):
|
|
|
271
259
|
if skip_empty_messages and message == b"":
|
|
272
260
|
continue
|
|
273
261
|
|
|
274
|
-
yield message
|
|
275
262
|
if message is None:
|
|
276
263
|
completed = True
|
|
277
264
|
self.eof = True
|
|
265
|
+
return
|
|
266
|
+
|
|
267
|
+
yield message
|
|
278
268
|
|
|
279
269
|
except (GRPCError, StreamTerminatedError) as exc:
|
|
280
270
|
if retries_remaining > 0:
|
|
@@ -287,55 +277,312 @@ class _StreamReader(Generic[T]):
|
|
|
287
277
|
continue
|
|
288
278
|
raise
|
|
289
279
|
|
|
290
|
-
async def _get_logs_by_line(self) -> AsyncGenerator[
|
|
280
|
+
async def _get_logs_by_line(self) -> AsyncGenerator[bytes, None]:
|
|
291
281
|
"""Process logs from the server and yield complete lines only."""
|
|
292
282
|
async for message in self._get_logs():
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
yield
|
|
298
|
-
else:
|
|
299
|
-
assert isinstance(message, bytes)
|
|
300
|
-
self._line_buffer += message
|
|
301
|
-
while b"\n" in self._line_buffer:
|
|
302
|
-
line, self._line_buffer = self._line_buffer.split(b"\n", 1)
|
|
303
|
-
yield line + b"\n"
|
|
283
|
+
assert isinstance(message, bytes)
|
|
284
|
+
self._line_buffer += message
|
|
285
|
+
while b"\n" in self._line_buffer:
|
|
286
|
+
line, self._line_buffer = self._line_buffer.split(b"\n", 1)
|
|
287
|
+
yield line + b"\n"
|
|
304
288
|
|
|
305
|
-
|
|
306
|
-
|
|
289
|
+
if self._line_buffer:
|
|
290
|
+
yield self._line_buffer
|
|
291
|
+
self._line_buffer = b""
|
|
292
|
+
|
|
293
|
+
def _ensure_stream(self) -> AsyncGenerator[T, None]:
|
|
307
294
|
if not self._stream:
|
|
308
295
|
if self._by_line:
|
|
309
|
-
|
|
296
|
+
# TODO: This is quite odd - it does line buffering in binary mode
|
|
297
|
+
# but we then always add the buffered text decoding on top of that.
|
|
298
|
+
# feels a bit upside down...
|
|
299
|
+
stream = self._get_logs_by_line()
|
|
310
300
|
else:
|
|
311
|
-
|
|
312
|
-
|
|
301
|
+
stream = self._get_logs()
|
|
302
|
+
if self._text:
|
|
303
|
+
stream = _decode_bytes_stream_to_str(stream)
|
|
304
|
+
self._stream = cast(AsyncGenerator[T, None], stream)
|
|
305
|
+
return self._stream
|
|
313
306
|
|
|
314
307
|
async def __anext__(self) -> T:
|
|
315
308
|
"""mdmd:hidden"""
|
|
316
|
-
|
|
309
|
+
stream = self._ensure_stream()
|
|
310
|
+
return cast(T, await stream.__anext__())
|
|
311
|
+
|
|
312
|
+
async def aclose(self):
|
|
313
|
+
"""mdmd:hidden"""
|
|
314
|
+
if self._stream:
|
|
315
|
+
await self._stream.aclose()
|
|
317
316
|
|
|
318
|
-
value = await self._stream.__anext__()
|
|
319
317
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
raise StopAsyncIteration
|
|
318
|
+
async def _decode_bytes_stream_to_str(stream: AsyncGenerator[bytes, None]) -> AsyncGenerator[str, None]:
|
|
319
|
+
"""Incrementally decode a bytes async generator as UTF-8 without breaking on chunk boundaries.
|
|
323
320
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
321
|
+
This function uses a streaming UTF-8 decoder so that multi-byte characters split across
|
|
322
|
+
chunks are handled correctly instead of raising ``UnicodeDecodeError``.
|
|
323
|
+
"""
|
|
324
|
+
decoder = codecs.getincrementaldecoder("utf-8")(errors="strict")
|
|
325
|
+
async for item in stream:
|
|
326
|
+
text = decoder.decode(item, final=False)
|
|
327
|
+
if text:
|
|
328
|
+
yield text
|
|
329
|
+
|
|
330
|
+
# Flush any buffered partial character at end-of-stream
|
|
331
|
+
tail = decoder.decode(b"", final=True)
|
|
332
|
+
if tail:
|
|
333
|
+
yield tail
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
async def _stream_by_line(stream: AsyncGenerator[bytes, None]) -> AsyncGenerator[bytes, None]:
|
|
337
|
+
"""Yield complete lines only (ending with \n), buffering partial lines until complete."""
|
|
338
|
+
line_buffer = b""
|
|
339
|
+
async for message in stream:
|
|
340
|
+
assert isinstance(message, bytes)
|
|
341
|
+
line_buffer += message
|
|
342
|
+
while b"\n" in line_buffer:
|
|
343
|
+
line, line_buffer = line_buffer.split(b"\n", 1)
|
|
344
|
+
yield line + b"\n"
|
|
345
|
+
|
|
346
|
+
if line_buffer:
|
|
347
|
+
yield line_buffer
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
@dataclass
|
|
351
|
+
class _StreamReaderThroughCommandRouterParams:
|
|
352
|
+
file_descriptor: "api_pb2.FileDescriptor.ValueType"
|
|
353
|
+
task_id: str
|
|
354
|
+
object_id: str
|
|
355
|
+
command_router_client: TaskCommandRouterClient
|
|
356
|
+
deadline: Optional[float]
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
async def _stdio_stream_from_command_router(
|
|
360
|
+
params: _StreamReaderThroughCommandRouterParams,
|
|
361
|
+
) -> AsyncGenerator[bytes, None]:
|
|
362
|
+
"""Stream raw bytes from the router client."""
|
|
363
|
+
stream = params.command_router_client.exec_stdio_read(
|
|
364
|
+
params.task_id, params.object_id, params.file_descriptor, params.deadline
|
|
365
|
+
)
|
|
366
|
+
try:
|
|
367
|
+
async for item in stream:
|
|
368
|
+
if len(item.data) == 0:
|
|
369
|
+
# This is an error.
|
|
370
|
+
raise ValueError("Received empty message streaming stdio from sandbox.")
|
|
371
|
+
|
|
372
|
+
yield item.data
|
|
373
|
+
except ExecTimeoutError:
|
|
374
|
+
logger.debug(f"Deadline exceeded while streaming stdio for exec {params.object_id}")
|
|
375
|
+
# TODO(saltzm): This is a weird API, but customers currently may rely on it. We
|
|
376
|
+
# should probably raise this error rather than just ending the stream.
|
|
377
|
+
return
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
class _BytesStreamReaderThroughCommandRouter(Generic[T]):
|
|
381
|
+
"""
|
|
382
|
+
StreamReader implementation that will read directly from the worker that
|
|
383
|
+
hosts the sandbox.
|
|
384
|
+
|
|
385
|
+
This implementation is used for non-text streams.
|
|
386
|
+
"""
|
|
387
|
+
|
|
388
|
+
def __init__(
|
|
389
|
+
self,
|
|
390
|
+
params: _StreamReaderThroughCommandRouterParams,
|
|
391
|
+
) -> None:
|
|
392
|
+
self._params = params
|
|
393
|
+
self._stream = None
|
|
394
|
+
|
|
395
|
+
@property
|
|
396
|
+
def file_descriptor(self) -> int:
|
|
397
|
+
return self._params.file_descriptor
|
|
398
|
+
|
|
399
|
+
async def read(self) -> T:
|
|
400
|
+
data_bytes = b""
|
|
401
|
+
async for part in self:
|
|
402
|
+
data_bytes += cast(bytes, part)
|
|
403
|
+
return cast(T, data_bytes)
|
|
404
|
+
|
|
405
|
+
def __aiter__(self) -> AsyncIterator[T]:
|
|
406
|
+
return self
|
|
407
|
+
|
|
408
|
+
async def __anext__(self) -> T:
|
|
409
|
+
if self._stream is None:
|
|
410
|
+
self._stream = _stdio_stream_from_command_router(self._params)
|
|
411
|
+
# This raises StopAsyncIteration if the stream is at EOF.
|
|
412
|
+
return cast(T, await self._stream.__anext__())
|
|
328
413
|
|
|
329
414
|
async def aclose(self):
|
|
330
|
-
"""mdmd:hidden"""
|
|
331
415
|
if self._stream:
|
|
332
416
|
await self._stream.aclose()
|
|
333
417
|
|
|
334
418
|
|
|
419
|
+
class _TextStreamReaderThroughCommandRouter(Generic[T]):
|
|
420
|
+
"""
|
|
421
|
+
StreamReader implementation that will read directly from the worker
|
|
422
|
+
that hosts the sandbox.
|
|
423
|
+
|
|
424
|
+
This implementation is used for text streams.
|
|
425
|
+
"""
|
|
426
|
+
|
|
427
|
+
def __init__(
|
|
428
|
+
self,
|
|
429
|
+
params: _StreamReaderThroughCommandRouterParams,
|
|
430
|
+
by_line: bool,
|
|
431
|
+
) -> None:
|
|
432
|
+
self._params = params
|
|
433
|
+
self._by_line = by_line
|
|
434
|
+
self._stream = None
|
|
435
|
+
|
|
436
|
+
@property
|
|
437
|
+
def file_descriptor(self) -> int:
|
|
438
|
+
return self._params.file_descriptor
|
|
439
|
+
|
|
440
|
+
async def read(self) -> T:
|
|
441
|
+
data_str = ""
|
|
442
|
+
async for part in self:
|
|
443
|
+
data_str += cast(str, part)
|
|
444
|
+
return cast(T, data_str)
|
|
445
|
+
|
|
446
|
+
def __aiter__(self) -> AsyncIterator[T]:
|
|
447
|
+
return self
|
|
448
|
+
|
|
449
|
+
async def __anext__(self) -> T:
|
|
450
|
+
if self._stream is None:
|
|
451
|
+
bytes_stream = _stdio_stream_from_command_router(self._params)
|
|
452
|
+
if self._by_line:
|
|
453
|
+
self._stream = _decode_bytes_stream_to_str(_stream_by_line(bytes_stream))
|
|
454
|
+
else:
|
|
455
|
+
self._stream = _decode_bytes_stream_to_str(bytes_stream)
|
|
456
|
+
# This raises StopAsyncIteration if the stream is at EOF.
|
|
457
|
+
return cast(T, await self._stream.__anext__())
|
|
458
|
+
|
|
459
|
+
async def aclose(self):
|
|
460
|
+
if self._stream:
|
|
461
|
+
await self._stream.aclose()
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
class _DevnullStreamReader(Generic[T]):
|
|
465
|
+
"""StreamReader implementation for a stream configured with
|
|
466
|
+
StreamType.DEVNULL. Throws an error if read or any other method is
|
|
467
|
+
called.
|
|
468
|
+
"""
|
|
469
|
+
|
|
470
|
+
def __init__(self, file_descriptor: "api_pb2.FileDescriptor.ValueType") -> None:
|
|
471
|
+
self._file_descriptor = file_descriptor
|
|
472
|
+
|
|
473
|
+
@property
|
|
474
|
+
def file_descriptor(self) -> int:
|
|
475
|
+
return self._file_descriptor
|
|
476
|
+
|
|
477
|
+
async def read(self) -> T:
|
|
478
|
+
raise ValueError("read is not supported for a stream configured with StreamType.DEVNULL")
|
|
479
|
+
|
|
480
|
+
def __aiter__(self) -> AsyncIterator[T]:
|
|
481
|
+
raise ValueError("__aiter__ is not supported for a stream configured with StreamType.DEVNULL")
|
|
482
|
+
|
|
483
|
+
async def __anext__(self) -> T:
|
|
484
|
+
raise ValueError("__anext__ is not supported for a stream configured with StreamType.DEVNULL")
|
|
485
|
+
|
|
486
|
+
async def aclose(self):
|
|
487
|
+
raise ValueError("aclose is not supported for a stream configured with StreamType.DEVNULL")
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
class _StreamReader(Generic[T]):
|
|
491
|
+
"""Retrieve logs from a stream (`stdout` or `stderr`).
|
|
492
|
+
|
|
493
|
+
As an asynchronous iterable, the object supports the `for` and `async for`
|
|
494
|
+
statements. Just loop over the object to read in chunks.
|
|
495
|
+
"""
|
|
496
|
+
|
|
497
|
+
_impl: Union[
|
|
498
|
+
_StreamReaderThroughServer,
|
|
499
|
+
_DevnullStreamReader,
|
|
500
|
+
_TextStreamReaderThroughCommandRouter,
|
|
501
|
+
_BytesStreamReaderThroughCommandRouter,
|
|
502
|
+
]
|
|
503
|
+
|
|
504
|
+
def __init__(
|
|
505
|
+
self,
|
|
506
|
+
file_descriptor: "api_pb2.FileDescriptor.ValueType",
|
|
507
|
+
object_id: str,
|
|
508
|
+
object_type: Literal["sandbox", "container_process"],
|
|
509
|
+
client: _Client,
|
|
510
|
+
stream_type: StreamType = StreamType.PIPE,
|
|
511
|
+
text: bool = True,
|
|
512
|
+
by_line: bool = False,
|
|
513
|
+
deadline: Optional[float] = None,
|
|
514
|
+
command_router_client: Optional[TaskCommandRouterClient] = None,
|
|
515
|
+
task_id: Optional[str] = None,
|
|
516
|
+
) -> None:
|
|
517
|
+
"""mdmd:hidden"""
|
|
518
|
+
if by_line and not text:
|
|
519
|
+
raise ValueError("line-buffering is only supported when text=True")
|
|
520
|
+
|
|
521
|
+
if command_router_client is None:
|
|
522
|
+
self._impl = _StreamReaderThroughServer(
|
|
523
|
+
file_descriptor, object_id, object_type, client, stream_type, text, by_line, deadline
|
|
524
|
+
)
|
|
525
|
+
else:
|
|
526
|
+
# The only reason task_id is optional is because StreamReader is
|
|
527
|
+
# also used for sandbox logs, which don't have a task ID available
|
|
528
|
+
# when the StreamReader is created.
|
|
529
|
+
assert task_id is not None
|
|
530
|
+
assert object_type == "container_process"
|
|
531
|
+
if stream_type == StreamType.DEVNULL:
|
|
532
|
+
self._impl = _DevnullStreamReader(file_descriptor)
|
|
533
|
+
else:
|
|
534
|
+
assert stream_type == StreamType.PIPE or stream_type == StreamType.STDOUT
|
|
535
|
+
# TODO(saltzm): The original implementation of STDOUT StreamType in
|
|
536
|
+
# _StreamReaderThroughServer prints to stdout immediately. This doesn't match
|
|
537
|
+
# python subprocess.run, which uses None to print to stdout immediately, and uses
|
|
538
|
+
# STDOUT as an argument to stderr to redirect stderr to the stdout stream. We should
|
|
539
|
+
# implement the old behavior here before moving out of beta, but after that
|
|
540
|
+
# we should consider changing the API to match python subprocess.run. I don't expect
|
|
541
|
+
# many customers are using this in any case, so I think it's fine to leave this
|
|
542
|
+
# unimplemented for now.
|
|
543
|
+
if stream_type == StreamType.STDOUT:
|
|
544
|
+
raise NotImplementedError(
|
|
545
|
+
"Currently the STDOUT stream type is not supported when using exec "
|
|
546
|
+
"through a task command router, which is currently in beta."
|
|
547
|
+
)
|
|
548
|
+
params = _StreamReaderThroughCommandRouterParams(
|
|
549
|
+
file_descriptor, task_id, object_id, command_router_client, deadline
|
|
550
|
+
)
|
|
551
|
+
if text:
|
|
552
|
+
self._impl = _TextStreamReaderThroughCommandRouter(params, by_line)
|
|
553
|
+
else:
|
|
554
|
+
self._impl = _BytesStreamReaderThroughCommandRouter(params)
|
|
555
|
+
|
|
556
|
+
@property
|
|
557
|
+
def file_descriptor(self) -> int:
|
|
558
|
+
"""Possible values are `1` for stdout and `2` for stderr."""
|
|
559
|
+
return self._impl.file_descriptor
|
|
560
|
+
|
|
561
|
+
async def read(self) -> T:
|
|
562
|
+
"""Fetch the entire contents of the stream until EOF."""
|
|
563
|
+
return await self._impl.read()
|
|
564
|
+
|
|
565
|
+
# TODO(saltzm): I'd prefer to have the implementation classes only implement __aiter__
|
|
566
|
+
# and have them return generator functions directly, but synchronicity doesn't let us
|
|
567
|
+
# return self._impl.__aiter__() here because it won't properly wrap the implementation
|
|
568
|
+
# classes.
|
|
569
|
+
def __aiter__(self) -> AsyncIterator[T]:
|
|
570
|
+
"""mdmd:hidden"""
|
|
571
|
+
return self
|
|
572
|
+
|
|
573
|
+
async def __anext__(self) -> T:
|
|
574
|
+
"""mdmd:hidden"""
|
|
575
|
+
return await self._impl.__anext__()
|
|
576
|
+
|
|
577
|
+
async def aclose(self):
|
|
578
|
+
"""mdmd:hidden"""
|
|
579
|
+
await self._impl.aclose()
|
|
580
|
+
|
|
581
|
+
|
|
335
582
|
MAX_BUFFER_SIZE = 2 * 1024 * 1024
|
|
336
583
|
|
|
337
584
|
|
|
338
|
-
class
|
|
585
|
+
class _StreamWriterThroughServer:
|
|
339
586
|
"""Provides an interface to buffer and write logs to a sandbox or container process stream (`stdin`)."""
|
|
340
587
|
|
|
341
588
|
def __init__(self, object_id: str, object_type: Literal["sandbox", "container_process"], client: _Client) -> None:
|
|
@@ -357,25 +604,6 @@ class _StreamWriter:
|
|
|
357
604
|
|
|
358
605
|
This is non-blocking and queues the data to an internal buffer. Must be
|
|
359
606
|
used along with the `drain()` method, which flushes the buffer.
|
|
360
|
-
|
|
361
|
-
**Usage**
|
|
362
|
-
|
|
363
|
-
```python fixture:running_app
|
|
364
|
-
from modal import Sandbox
|
|
365
|
-
|
|
366
|
-
sandbox = Sandbox.create(
|
|
367
|
-
"bash",
|
|
368
|
-
"-c",
|
|
369
|
-
"while read line; do echo $line; done",
|
|
370
|
-
app=running_app,
|
|
371
|
-
)
|
|
372
|
-
sandbox.stdin.write(b"foo\\n")
|
|
373
|
-
sandbox.stdin.write(b"bar\\n")
|
|
374
|
-
sandbox.stdin.write_eof()
|
|
375
|
-
|
|
376
|
-
sandbox.stdin.drain()
|
|
377
|
-
sandbox.wait()
|
|
378
|
-
```
|
|
379
607
|
"""
|
|
380
608
|
if self._is_closed:
|
|
381
609
|
raise ValueError("Stdin is closed. Cannot write to it.")
|
|
@@ -383,7 +611,7 @@ class _StreamWriter:
|
|
|
383
611
|
if isinstance(data, str):
|
|
384
612
|
data = data.encode("utf-8")
|
|
385
613
|
if len(self._buffer) + len(data) > MAX_BUFFER_SIZE:
|
|
386
|
-
raise BufferError("Buffer size exceed limit. Call drain to
|
|
614
|
+
raise BufferError("Buffer size exceed limit. Call drain to flush the buffer.")
|
|
387
615
|
self._buffer.extend(data)
|
|
388
616
|
else:
|
|
389
617
|
raise TypeError(f"data argument must be a bytes-like object, not {type(data).__name__}")
|
|
@@ -402,19 +630,6 @@ class _StreamWriter:
|
|
|
402
630
|
|
|
403
631
|
This is a flow control method that blocks until data is sent. It returns
|
|
404
632
|
when it is appropriate to continue writing data to the stream.
|
|
405
|
-
|
|
406
|
-
**Usage**
|
|
407
|
-
|
|
408
|
-
```python notest
|
|
409
|
-
writer.write(data)
|
|
410
|
-
writer.drain()
|
|
411
|
-
```
|
|
412
|
-
|
|
413
|
-
Async usage:
|
|
414
|
-
```python notest
|
|
415
|
-
writer.write(data) # not a blocking operation
|
|
416
|
-
await writer.drain.aio()
|
|
417
|
-
```
|
|
418
633
|
"""
|
|
419
634
|
data = bytes(self._buffer)
|
|
420
635
|
self._buffer.clear()
|
|
@@ -422,15 +637,13 @@ class _StreamWriter:
|
|
|
422
637
|
|
|
423
638
|
try:
|
|
424
639
|
if self._object_type == "sandbox":
|
|
425
|
-
await
|
|
426
|
-
self._client.stub.SandboxStdinWrite,
|
|
640
|
+
await self._client.stub.SandboxStdinWrite(
|
|
427
641
|
api_pb2.SandboxStdinWriteRequest(
|
|
428
642
|
sandbox_id=self._object_id, index=index, eof=self._is_closed, input=data
|
|
429
643
|
),
|
|
430
644
|
)
|
|
431
645
|
else:
|
|
432
|
-
await
|
|
433
|
-
self._client.stub.ContainerExecPutInput,
|
|
646
|
+
await self._client.stub.ContainerExecPutInput(
|
|
434
647
|
api_pb2.ContainerExecPutInputRequest(
|
|
435
648
|
exec_id=self._object_id,
|
|
436
649
|
input=api_pb2.RuntimeInputMessage(message=data, message_index=index, eof=self._is_closed),
|
|
@@ -443,5 +656,122 @@ class _StreamWriter:
|
|
|
443
656
|
raise exc
|
|
444
657
|
|
|
445
658
|
|
|
659
|
+
class _StreamWriterThroughCommandRouter:
|
|
660
|
+
def __init__(
|
|
661
|
+
self,
|
|
662
|
+
object_id: str,
|
|
663
|
+
command_router_client: TaskCommandRouterClient,
|
|
664
|
+
task_id: str,
|
|
665
|
+
) -> None:
|
|
666
|
+
self._object_id = object_id
|
|
667
|
+
self._command_router_client = command_router_client
|
|
668
|
+
self._task_id = task_id
|
|
669
|
+
self._is_closed = False
|
|
670
|
+
self._buffer = bytearray()
|
|
671
|
+
self._offset = 0
|
|
672
|
+
|
|
673
|
+
def write(self, data: Union[bytes, bytearray, memoryview, str]) -> None:
|
|
674
|
+
if self._is_closed:
|
|
675
|
+
raise ValueError("Stdin is closed. Cannot write to it.")
|
|
676
|
+
if isinstance(data, (bytes, bytearray, memoryview, str)):
|
|
677
|
+
if isinstance(data, str):
|
|
678
|
+
data = data.encode("utf-8")
|
|
679
|
+
if len(self._buffer) + len(data) > MAX_BUFFER_SIZE:
|
|
680
|
+
raise BufferError("Buffer size exceed limit. Call drain to flush the buffer.")
|
|
681
|
+
self._buffer.extend(data)
|
|
682
|
+
else:
|
|
683
|
+
raise TypeError(f"data argument must be a bytes-like object, not {type(data).__name__}")
|
|
684
|
+
|
|
685
|
+
def write_eof(self) -> None:
|
|
686
|
+
self._is_closed = True
|
|
687
|
+
|
|
688
|
+
async def drain(self) -> None:
|
|
689
|
+
eof = self._is_closed
|
|
690
|
+
# NB: There's no need to prevent writing eof twice, because the command router will ignore the second EOF.
|
|
691
|
+
if self._buffer or eof:
|
|
692
|
+
data = bytes(self._buffer)
|
|
693
|
+
await self._command_router_client.exec_stdin_write(
|
|
694
|
+
task_id=self._task_id, exec_id=self._object_id, offset=self._offset, data=data, eof=eof
|
|
695
|
+
)
|
|
696
|
+
# Only clear the buffer after writing the data to the command router is successful.
|
|
697
|
+
# This allows the client to retry drain() in the event of an exception (though
|
|
698
|
+
# exec_stdin_write already retries on transient errors, so most users will probably
|
|
699
|
+
# not do this).
|
|
700
|
+
self._buffer.clear()
|
|
701
|
+
self._offset += len(data)
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
class _StreamWriter:
|
|
705
|
+
"""Provides an interface to buffer and write logs to a sandbox or container process stream (`stdin`)."""
|
|
706
|
+
|
|
707
|
+
def __init__(
|
|
708
|
+
self,
|
|
709
|
+
object_id: str,
|
|
710
|
+
object_type: Literal["sandbox", "container_process"],
|
|
711
|
+
client: _Client,
|
|
712
|
+
command_router_client: Optional[TaskCommandRouterClient] = None,
|
|
713
|
+
task_id: Optional[str] = None,
|
|
714
|
+
) -> None:
|
|
715
|
+
"""mdmd:hidden"""
|
|
716
|
+
if command_router_client is None:
|
|
717
|
+
self._impl = _StreamWriterThroughServer(object_id, object_type, client)
|
|
718
|
+
else:
|
|
719
|
+
assert task_id is not None
|
|
720
|
+
assert object_type == "container_process"
|
|
721
|
+
self._impl = _StreamWriterThroughCommandRouter(object_id, command_router_client, task_id=task_id)
|
|
722
|
+
|
|
723
|
+
def write(self, data: Union[bytes, bytearray, memoryview, str]) -> None:
|
|
724
|
+
"""Write data to the stream but does not send it immediately.
|
|
725
|
+
|
|
726
|
+
This is non-blocking and queues the data to an internal buffer. Must be
|
|
727
|
+
used along with the `drain()` method, which flushes the buffer.
|
|
728
|
+
|
|
729
|
+
**Usage**
|
|
730
|
+
|
|
731
|
+
```python fixture:sandbox
|
|
732
|
+
proc = sandbox.exec(
|
|
733
|
+
"bash",
|
|
734
|
+
"-c",
|
|
735
|
+
"while read line; do echo $line; done",
|
|
736
|
+
)
|
|
737
|
+
proc.stdin.write(b"foo\\n")
|
|
738
|
+
proc.stdin.write(b"bar\\n")
|
|
739
|
+
proc.stdin.write_eof()
|
|
740
|
+
proc.stdin.drain()
|
|
741
|
+
```
|
|
742
|
+
"""
|
|
743
|
+
self._impl.write(data)
|
|
744
|
+
|
|
745
|
+
def write_eof(self) -> None:
|
|
746
|
+
"""Close the write end of the stream after the buffered data is drained.
|
|
747
|
+
|
|
748
|
+
If the process was blocked on input, it will become unblocked after
|
|
749
|
+
`write_eof()`. This method needs to be used along with the `drain()`
|
|
750
|
+
method, which flushes the EOF to the process.
|
|
751
|
+
"""
|
|
752
|
+
self._impl.write_eof()
|
|
753
|
+
|
|
754
|
+
async def drain(self) -> None:
|
|
755
|
+
"""Flush the write buffer and send data to the running process.
|
|
756
|
+
|
|
757
|
+
This is a flow control method that blocks until data is sent. It returns
|
|
758
|
+
when it is appropriate to continue writing data to the stream.
|
|
759
|
+
|
|
760
|
+
**Usage**
|
|
761
|
+
|
|
762
|
+
```python notest
|
|
763
|
+
writer.write(data)
|
|
764
|
+
writer.drain()
|
|
765
|
+
```
|
|
766
|
+
|
|
767
|
+
Async usage:
|
|
768
|
+
```python notest
|
|
769
|
+
writer.write(data) # not a blocking operation
|
|
770
|
+
await writer.drain.aio()
|
|
771
|
+
```
|
|
772
|
+
"""
|
|
773
|
+
await self._impl.drain()
|
|
774
|
+
|
|
775
|
+
|
|
446
776
|
StreamReader = synchronize_api(_StreamReader)
|
|
447
777
|
StreamWriter = synchronize_api(_StreamWriter)
|