modal 1.1.5.dev51__tar.gz → 1.1.5.dev52__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of modal might be problematic. Click here for more details.
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/PKG-INFO +1 -1
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_runtime/gpu_memory_snapshot.py +20 -17
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/client.pyi +2 -2
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal.egg-info/PKG-INFO +1 -1
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_version/__init__.py +1 -1
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/LICENSE +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/README.md +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/__init__.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/__main__.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_clustered_functions.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_clustered_functions.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_container_entrypoint.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_functions.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_ipython.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_location.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_object.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_output.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_partial_function.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_pty.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_resolver.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_resources.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_runtime/__init__.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_runtime/asgi.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_runtime/container_io_manager.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_runtime/container_io_manager.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_runtime/execution_context.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_runtime/execution_context.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_runtime/telemetry.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_runtime/user_code_imports.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_serialization.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_traceback.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_tunnel.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_tunnel.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_type_manager.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/__init__.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/app_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/async_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/auth_token_manager.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/blob_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/bytes_io_segment_payload.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/deprecation.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/docker_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/function_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/git_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/grpc_testing.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/grpc_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/hash_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/http_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/jwt_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/logger.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/mount_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/name_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/package_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/pattern_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/rand_pb_testing.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/shell_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_utils/time_utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_vendor/__init__.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_vendor/a2wsgi_wsgi.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_vendor/cloudpickle.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_vendor/tblib.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/_watcher.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/app.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/app.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/builder/2023.12.312.txt +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/builder/2023.12.txt +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/builder/2024.04.txt +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/builder/2024.10.txt +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/builder/2025.06.txt +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/builder/PREVIEW.txt +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/builder/README.md +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/builder/base-images.json +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/call_graph.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/__init__.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/_download.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/_traceback.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/app.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/cluster.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/config.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/container.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/dict.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/entry_point.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/environment.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/import_refs.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/launch.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/network_file_system.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/profile.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/programs/__init__.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/programs/launch_instance_ssh.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/programs/run_jupyter.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/programs/run_marimo.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/programs/vscode.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/queues.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/run.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/secret.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/token.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/utils.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cli/volume.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/client.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cloud_bucket_mount.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cloud_bucket_mount.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cls.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/cls.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/config.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/container_process.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/container_process.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/dict.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/dict.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/environments.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/environments.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/exception.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/experimental/__init__.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/experimental/flash.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/experimental/flash.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/experimental/ipython.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/file_io.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/file_io.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/file_pattern_matcher.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/functions.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/functions.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/gpu.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/image.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/image.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/io_streams.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/io_streams.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/mount.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/mount.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/network_file_system.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/network_file_system.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/object.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/object.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/output.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/parallel_map.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/parallel_map.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/partial_function.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/partial_function.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/proxy.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/proxy.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/py.typed +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/queue.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/queue.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/retries.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/runner.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/runner.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/running_app.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/sandbox.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/sandbox.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/schedule.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/scheduler_placement.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/secret.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/secret.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/serving.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/serving.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/snapshot.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/snapshot.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/stream_type.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/token_flow.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/token_flow.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/volume.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal/volume.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal.egg-info/SOURCES.txt +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal.egg-info/dependency_links.txt +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal.egg-info/entry_points.txt +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal.egg-info/requires.txt +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal.egg-info/top_level.txt +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_docs/__init__.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_docs/gen_cli_docs.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_docs/gen_reference_docs.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_docs/mdmd/__init__.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_docs/mdmd/mdmd.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_docs/mdmd/signatures.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/__init__.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/api.proto +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/api_grpc.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/api_pb2.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/api_pb2.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/api_pb2_grpc.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/api_pb2_grpc.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/modal_api_grpc.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/modal_options_grpc.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/options.proto +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/options_grpc.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/options_pb2.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/options_pb2.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/options_pb2_grpc.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/options_pb2_grpc.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/py.typed +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/sandbox_router.proto +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/sandbox_router_grpc.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/sandbox_router_pb2.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/sandbox_router_pb2.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/sandbox_router_pb2_grpc.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_proto/sandbox_router_pb2_grpc.pyi +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/modal_version/__main__.py +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/pyproject.toml +0 -0
- {modal-1.1.5.dev51 → modal-1.1.5.dev52}/setup.cfg +0 -0
|
@@ -18,6 +18,12 @@ from modal.config import config, logger
|
|
|
18
18
|
|
|
19
19
|
CUDA_CHECKPOINT_PATH: str = config.get("cuda_checkpoint_path")
|
|
20
20
|
|
|
21
|
+
# Maximum total duration for an entire toggle operation.
|
|
22
|
+
CUDA_CHECKPOINT_TOGGLE_TIMEOUT: float = 5 * 60.0
|
|
23
|
+
|
|
24
|
+
# Maximum total duration for each individual `cuda-checkpoint` invocation.
|
|
25
|
+
CUDA_CHECKPOINT_TIMEOUT: float = 90
|
|
26
|
+
|
|
21
27
|
|
|
22
28
|
class CudaCheckpointState(Enum):
|
|
23
29
|
"""State representation from the CUDA API [1].
|
|
@@ -44,7 +50,7 @@ class CudaCheckpointProcess:
|
|
|
44
50
|
pid: int
|
|
45
51
|
state: CudaCheckpointState
|
|
46
52
|
|
|
47
|
-
def toggle(self, target_state: CudaCheckpointState,
|
|
53
|
+
def toggle(self, target_state: CudaCheckpointState, skip_first_refresh: bool = False) -> None:
|
|
48
54
|
"""Toggle CUDA checkpoint state for current process, moving GPU memory to the
|
|
49
55
|
CPU and back depending on the current process state when called.
|
|
50
56
|
"""
|
|
@@ -54,7 +60,11 @@ class CudaCheckpointProcess:
|
|
|
54
60
|
retry_count = 0
|
|
55
61
|
max_retries = 3
|
|
56
62
|
|
|
57
|
-
|
|
63
|
+
attempts = 0
|
|
64
|
+
while self._should_continue_toggle(
|
|
65
|
+
target_state, start_time, refresh=not (skip_first_refresh and attempts == 0)
|
|
66
|
+
):
|
|
67
|
+
attempts += 1
|
|
58
68
|
try:
|
|
59
69
|
self._execute_toggle_command()
|
|
60
70
|
# Use exponential backoff for retries
|
|
@@ -73,10 +83,11 @@ class CudaCheckpointProcess:
|
|
|
73
83
|
logger.debug(f"PID: {self.pid} Target state {target_state.value} reached")
|
|
74
84
|
|
|
75
85
|
def _should_continue_toggle(
|
|
76
|
-
self, target_state: CudaCheckpointState, start_time: float,
|
|
86
|
+
self, target_state: CudaCheckpointState, start_time: float, refresh: bool = True
|
|
77
87
|
) -> bool:
|
|
78
88
|
"""Check if toggle operation should continue based on current state and timeout."""
|
|
79
|
-
|
|
89
|
+
if refresh:
|
|
90
|
+
self.refresh_state()
|
|
80
91
|
|
|
81
92
|
if self.state == target_state:
|
|
82
93
|
return False
|
|
@@ -85,7 +96,7 @@ class CudaCheckpointProcess:
|
|
|
85
96
|
raise CudaCheckpointException(f"PID: {self.pid} CUDA process state is {self.state}")
|
|
86
97
|
|
|
87
98
|
elapsed = time.monotonic() - start_time
|
|
88
|
-
if elapsed >=
|
|
99
|
+
if elapsed >= CUDA_CHECKPOINT_TOGGLE_TIMEOUT:
|
|
89
100
|
raise CudaCheckpointException(
|
|
90
101
|
f"PID: {self.pid} Timeout after {elapsed:.2f}s waiting for state {target_state.value}. "
|
|
91
102
|
f"Current state: {self.state}"
|
|
@@ -101,7 +112,7 @@ class CudaCheckpointProcess:
|
|
|
101
112
|
check=True,
|
|
102
113
|
capture_output=True,
|
|
103
114
|
text=True,
|
|
104
|
-
timeout=
|
|
115
|
+
timeout=CUDA_CHECKPOINT_TIMEOUT,
|
|
105
116
|
)
|
|
106
117
|
logger.debug(f"PID: {self.pid} Successfully toggled CUDA checkpoint state")
|
|
107
118
|
except subprocess.CalledProcessError as e:
|
|
@@ -121,7 +132,7 @@ class CudaCheckpointProcess:
|
|
|
121
132
|
check=True,
|
|
122
133
|
capture_output=True,
|
|
123
134
|
text=True,
|
|
124
|
-
timeout=
|
|
135
|
+
timeout=CUDA_CHECKPOINT_TIMEOUT,
|
|
125
136
|
)
|
|
126
137
|
|
|
127
138
|
state_str = result.stdout.strip().lower()
|
|
@@ -190,6 +201,7 @@ class CudaCheckpointSession:
|
|
|
190
201
|
[CUDA_CHECKPOINT_PATH, "--get-state", "--pid", str(pid)],
|
|
191
202
|
capture_output=True,
|
|
192
203
|
text=True,
|
|
204
|
+
# This should be quick since no checkpoint has taken place yet
|
|
193
205
|
timeout=5,
|
|
194
206
|
)
|
|
195
207
|
|
|
@@ -256,20 +268,11 @@ class CudaCheckpointSession:
|
|
|
256
268
|
logger.debug("No CUDA sessions to restore.")
|
|
257
269
|
return
|
|
258
270
|
|
|
259
|
-
# Validate all states first
|
|
260
|
-
for proc in self.cuda_processes:
|
|
261
|
-
proc.refresh_state() # Refresh state before validation
|
|
262
|
-
if proc.state != CudaCheckpointState.CHECKPOINTED:
|
|
263
|
-
raise CudaCheckpointException(
|
|
264
|
-
f"PID {proc.pid}: CUDA session not in {CudaCheckpointState.CHECKPOINTED.value} state. "
|
|
265
|
-
f"Current state: {proc.state.value}"
|
|
266
|
-
)
|
|
267
|
-
|
|
268
271
|
# See checkpoint() for rationale about parallelism.
|
|
269
272
|
start = time.perf_counter()
|
|
270
273
|
|
|
271
274
|
def restore_process(proc: CudaCheckpointProcess) -> None:
|
|
272
|
-
proc.toggle(CudaCheckpointState.RUNNING)
|
|
275
|
+
proc.toggle(CudaCheckpointState.RUNNING, skip_first_refresh=True)
|
|
273
276
|
|
|
274
277
|
with ThreadPoolExecutor() as executor:
|
|
275
278
|
futures = [executor.submit(restore_process, proc) for proc in self.cuda_processes]
|
|
@@ -33,7 +33,7 @@ class _Client:
|
|
|
33
33
|
server_url: str,
|
|
34
34
|
client_type: int,
|
|
35
35
|
credentials: typing.Optional[tuple[str, str]],
|
|
36
|
-
version: str = "1.1.5.
|
|
36
|
+
version: str = "1.1.5.dev52",
|
|
37
37
|
):
|
|
38
38
|
"""mdmd:hidden
|
|
39
39
|
The Modal client object is not intended to be instantiated directly by users.
|
|
@@ -164,7 +164,7 @@ class Client:
|
|
|
164
164
|
server_url: str,
|
|
165
165
|
client_type: int,
|
|
166
166
|
credentials: typing.Optional[tuple[str, str]],
|
|
167
|
-
version: str = "1.1.5.
|
|
167
|
+
version: str = "1.1.5.dev52",
|
|
168
168
|
):
|
|
169
169
|
"""mdmd:hidden
|
|
170
170
|
The Modal client object is not intended to be instantiated directly by users.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|