podstack 1.3.21__tar.gz → 1.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {podstack-1.3.21 → podstack-1.4.0}/PKG-INFO +1 -1
- {podstack-1.3.21 → podstack-1.4.0}/podstack/__init__.py +66 -1
- {podstack-1.3.21 → podstack-1.4.0}/podstack/annotations.py +156 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack/gpu_runner.py +295 -11
- podstack-1.4.0/podstack/pipelines.py +269 -0
- podstack-1.4.0/podstack/storage.py +291 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack.egg-info/PKG-INFO +1 -1
- {podstack-1.3.21 → podstack-1.4.0}/podstack.egg-info/SOURCES.txt +2 -0
- {podstack-1.3.21 → podstack-1.4.0}/pyproject.toml +1 -1
- {podstack-1.3.21 → podstack-1.4.0}/LICENSE +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/README.md +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack/client.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack/exceptions.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack/execution.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack/models.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack/notebook.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack/registry/__init__.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack/registry/autolog.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack/registry/client.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack/registry/exceptions.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack/registry/experiment.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack/registry/model.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack/registry/model_utils.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack.egg-info/dependency_links.txt +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack.egg-info/requires.txt +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack.egg-info/top_level.txt +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack_gpu/__init__.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack_gpu/app.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack_gpu/exceptions.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack_gpu/image.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack_gpu/runner.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack_gpu/secret.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack_gpu/utils.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/podstack_gpu/volume.py +0 -0
- {podstack-1.3.21 → podstack-1.4.0}/setup.cfg +0 -0
|
@@ -54,7 +54,7 @@ Decorators:
|
|
|
54
54
|
...
|
|
55
55
|
"""
|
|
56
56
|
|
|
57
|
-
__version__ = "1.
|
|
57
|
+
__version__ = "1.4.0"
|
|
58
58
|
|
|
59
59
|
from .client import Client
|
|
60
60
|
from .notebook import Notebook, NotebookStatus
|
|
@@ -86,6 +86,31 @@ from .gpu_runner import (
|
|
|
86
86
|
RunnerList,
|
|
87
87
|
run as run_on_gpu,
|
|
88
88
|
list_runners,
|
|
89
|
+
get_runner_info,
|
|
90
|
+
pip_install,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Pipeline module import
|
|
94
|
+
from . import pipelines
|
|
95
|
+
from .pipelines import (
|
|
96
|
+
step,
|
|
97
|
+
pipeline,
|
|
98
|
+
StepConfig,
|
|
99
|
+
PipelineDef,
|
|
100
|
+
list_pipelines,
|
|
101
|
+
get_pipeline,
|
|
102
|
+
get_pipeline_run,
|
|
103
|
+
trigger_pipeline,
|
|
104
|
+
cancel_pipeline_run,
|
|
105
|
+
archive_pipeline,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Storage module import
|
|
109
|
+
from . import storage
|
|
110
|
+
from .storage import (
|
|
111
|
+
StorageClient, BucketList, VolumeList,
|
|
112
|
+
create_bucket, get_bucket, list_buckets, delete_bucket,
|
|
113
|
+
create_volume, get_volume, list_volumes, delete_volume,
|
|
89
114
|
)
|
|
90
115
|
|
|
91
116
|
# Annotations module import
|
|
@@ -103,6 +128,13 @@ from .annotations import (
|
|
|
103
128
|
get_environment,
|
|
104
129
|
get_auto_shutdown_minutes,
|
|
105
130
|
enable_remote_execution,
|
|
131
|
+
deploy,
|
|
132
|
+
DeployConfig,
|
|
133
|
+
)
|
|
134
|
+
from .gpu_runner import (
|
|
135
|
+
get_deployment,
|
|
136
|
+
stop_deployment,
|
|
137
|
+
delete_deployment,
|
|
106
138
|
)
|
|
107
139
|
|
|
108
140
|
|
|
@@ -206,6 +238,31 @@ __all__ = [
|
|
|
206
238
|
"WalletBalance",
|
|
207
239
|
# Registry
|
|
208
240
|
"registry",
|
|
241
|
+
# Pipelines
|
|
242
|
+
"pipelines",
|
|
243
|
+
"step",
|
|
244
|
+
"pipeline",
|
|
245
|
+
"StepConfig",
|
|
246
|
+
"PipelineDef",
|
|
247
|
+
"list_pipelines",
|
|
248
|
+
"get_pipeline",
|
|
249
|
+
"get_pipeline_run",
|
|
250
|
+
"trigger_pipeline",
|
|
251
|
+
"cancel_pipeline_run",
|
|
252
|
+
"archive_pipeline",
|
|
253
|
+
# Storage
|
|
254
|
+
"storage",
|
|
255
|
+
"StorageClient",
|
|
256
|
+
"BucketList",
|
|
257
|
+
"VolumeList",
|
|
258
|
+
"create_bucket",
|
|
259
|
+
"get_bucket",
|
|
260
|
+
"list_buckets",
|
|
261
|
+
"delete_bucket",
|
|
262
|
+
"create_volume",
|
|
263
|
+
"get_volume",
|
|
264
|
+
"list_volumes",
|
|
265
|
+
"delete_volume",
|
|
209
266
|
# GPU Runner
|
|
210
267
|
"gpu_runner",
|
|
211
268
|
"GPURunner",
|
|
@@ -213,6 +270,8 @@ __all__ = [
|
|
|
213
270
|
"RunnerList",
|
|
214
271
|
"run_on_gpu",
|
|
215
272
|
"list_runners",
|
|
273
|
+
"get_runner_info",
|
|
274
|
+
"pip_install",
|
|
216
275
|
# Annotations
|
|
217
276
|
"annotations",
|
|
218
277
|
"gpu",
|
|
@@ -227,4 +286,10 @@ __all__ = [
|
|
|
227
286
|
"get_environment",
|
|
228
287
|
"get_auto_shutdown_minutes",
|
|
229
288
|
"enable_remote_execution",
|
|
289
|
+
# Deploy
|
|
290
|
+
"deploy",
|
|
291
|
+
"DeployConfig",
|
|
292
|
+
"get_deployment",
|
|
293
|
+
"stop_deployment",
|
|
294
|
+
"delete_deployment",
|
|
230
295
|
]
|
|
@@ -37,6 +37,7 @@ from typing import Optional, Dict, Any, Callable, Union
|
|
|
37
37
|
from contextlib import contextmanager
|
|
38
38
|
|
|
39
39
|
from . import registry
|
|
40
|
+
from .pipelines import step, pipeline, StepConfig, PipelineDef
|
|
40
41
|
from .gpu_runner import (
|
|
41
42
|
GPURunner,
|
|
42
43
|
GPUExecutionResult,
|
|
@@ -802,6 +803,154 @@ class ModelRegistry:
|
|
|
802
803
|
model = ModelRegistry()
|
|
803
804
|
|
|
804
805
|
|
|
806
|
+
class DeployConfig:
|
|
807
|
+
"""
|
|
808
|
+
Deploy configuration decorator.
|
|
809
|
+
|
|
810
|
+
Deploys the decorated function as a persistent REST API endpoint.
|
|
811
|
+
|
|
812
|
+
Usage:
|
|
813
|
+
@podstack.deploy(name="sentiment-api", gpu="A10", fraction=50)
|
|
814
|
+
def predict(data):
|
|
815
|
+
return {"sentiment": "positive", "text": data["text"]}
|
|
816
|
+
|
|
817
|
+
result = predict() # Deploys and returns endpoint info
|
|
818
|
+
"""
|
|
819
|
+
|
|
820
|
+
def __init__(
|
|
821
|
+
self,
|
|
822
|
+
name: str,
|
|
823
|
+
gpu: str = "A10",
|
|
824
|
+
count: int = 1,
|
|
825
|
+
fraction: int = 100,
|
|
826
|
+
pip: Union[str, list] = None,
|
|
827
|
+
uv: Union[str, list] = None,
|
|
828
|
+
function: str = None,
|
|
829
|
+
image: str = None,
|
|
830
|
+
):
|
|
831
|
+
self.name = name
|
|
832
|
+
self.gpu = gpu
|
|
833
|
+
self.count = count
|
|
834
|
+
self.fraction = fraction
|
|
835
|
+
self.pip = pip
|
|
836
|
+
self.uv = uv
|
|
837
|
+
self.function = function
|
|
838
|
+
self.image = image
|
|
839
|
+
|
|
840
|
+
def __call__(self, func: Callable) -> Callable:
|
|
841
|
+
@functools.wraps(func)
|
|
842
|
+
def wrapper(*args, **kwargs):
|
|
843
|
+
runner = get_runner()
|
|
844
|
+
|
|
845
|
+
# Get function source
|
|
846
|
+
source = inspect.getsource(func)
|
|
847
|
+
source = textwrap.dedent(source)
|
|
848
|
+
|
|
849
|
+
# Remove decorator lines
|
|
850
|
+
lines = source.split('\n')
|
|
851
|
+
clean_lines = []
|
|
852
|
+
skip_decorator = False
|
|
853
|
+
paren_depth = 0
|
|
854
|
+
for line in lines:
|
|
855
|
+
stripped = line.strip()
|
|
856
|
+
if stripped.startswith('@podstack.deploy') or stripped.startswith('@deploy'):
|
|
857
|
+
skip_decorator = True
|
|
858
|
+
paren_depth += line.count('(') - line.count(')')
|
|
859
|
+
if paren_depth <= 0:
|
|
860
|
+
skip_decorator = False
|
|
861
|
+
continue
|
|
862
|
+
if skip_decorator:
|
|
863
|
+
paren_depth += line.count('(') - line.count(')')
|
|
864
|
+
if paren_depth <= 0:
|
|
865
|
+
skip_decorator = False
|
|
866
|
+
continue
|
|
867
|
+
if stripped.startswith('@'):
|
|
868
|
+
continue
|
|
869
|
+
clean_lines.append(line)
|
|
870
|
+
source = '\n'.join(clean_lines)
|
|
871
|
+
|
|
872
|
+
function_name = self.function or func.__name__
|
|
873
|
+
pip_packages = self.pip
|
|
874
|
+
if isinstance(pip_packages, list):
|
|
875
|
+
pip_packages = " ".join(pip_packages)
|
|
876
|
+
|
|
877
|
+
# POST to deployment API
|
|
878
|
+
import httpx
|
|
879
|
+
headers = runner._get_headers()
|
|
880
|
+
url = f"{runner.api_url}/api/v1/deployments/from-source"
|
|
881
|
+
with httpx.Client(timeout=30.0) as client:
|
|
882
|
+
response = client.post(url, headers=headers, json={
|
|
883
|
+
"name": self.name,
|
|
884
|
+
"source": source,
|
|
885
|
+
"function_name": function_name,
|
|
886
|
+
"gpu_type": self.gpu,
|
|
887
|
+
"gpu_fraction": self.fraction,
|
|
888
|
+
"gpu_count": self.count,
|
|
889
|
+
"pip_packages": pip_packages or "",
|
|
890
|
+
"image": self.image or "",
|
|
891
|
+
})
|
|
892
|
+
if response.status_code >= 400:
|
|
893
|
+
raise PodstackError(f"Deployment failed: {response.text}")
|
|
894
|
+
data = response.json()
|
|
895
|
+
|
|
896
|
+
deployment_id = data.get("deployment_id")
|
|
897
|
+
print(f"[Podstack] Deploying '{self.name}' as REST API...")
|
|
898
|
+
print(f"[Podstack] Deployment ID: {deployment_id}")
|
|
899
|
+
print(f"[Podstack] Status: {data.get('status')}")
|
|
900
|
+
|
|
901
|
+
# Poll for active status (up to 120s)
|
|
902
|
+
status_url = f"{runner.api_url}/api/v1/deployments/{deployment_id}"
|
|
903
|
+
for _ in range(24):
|
|
904
|
+
time.sleep(5)
|
|
905
|
+
with httpx.Client(timeout=30.0) as client:
|
|
906
|
+
status_resp = client.get(status_url, headers=headers)
|
|
907
|
+
if status_resp.status_code >= 400:
|
|
908
|
+
continue
|
|
909
|
+
status_data = status_resp.json()
|
|
910
|
+
status = status_data.get("status")
|
|
911
|
+
if status == "active":
|
|
912
|
+
endpoint = status_data.get("service_url", "")
|
|
913
|
+
print(f"[Podstack] Endpoint live: POST {endpoint}/predict")
|
|
914
|
+
return {"deployment_id": deployment_id, "status": "active", "endpoint": f"{endpoint}/predict"}
|
|
915
|
+
elif status == "failed":
|
|
916
|
+
error = status_data.get("error_message", "Unknown error")
|
|
917
|
+
print(f"[Podstack] Deployment failed: {error}")
|
|
918
|
+
raise PodstackError(f"Deployment failed: {error}")
|
|
919
|
+
print(f"[Podstack] Status: {status}...")
|
|
920
|
+
|
|
921
|
+
print(f"[Podstack] Deployment still starting. Check status with:")
|
|
922
|
+
print(f" podstack.get_deployment('{deployment_id}')")
|
|
923
|
+
return {"deployment_id": deployment_id, "status": "starting"}
|
|
924
|
+
|
|
925
|
+
return wrapper
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
def deploy(
|
|
929
|
+
name: str,
|
|
930
|
+
gpu: str = "A10",
|
|
931
|
+
count: int = 1,
|
|
932
|
+
fraction: int = 100,
|
|
933
|
+
pip: Union[str, list] = None,
|
|
934
|
+
uv: Union[str, list] = None,
|
|
935
|
+
function: str = None,
|
|
936
|
+
image: str = None,
|
|
937
|
+
) -> DeployConfig:
|
|
938
|
+
"""Deploy a function as a persistent REST API endpoint.
|
|
939
|
+
|
|
940
|
+
Examples:
|
|
941
|
+
@podstack.deploy(name="sentiment-api", gpu="A10", fraction=25)
|
|
942
|
+
def predict(data):
|
|
943
|
+
return {"sentiment": "positive"}
|
|
944
|
+
|
|
945
|
+
result = predict() # Deploys and returns endpoint info
|
|
946
|
+
print(result["endpoint"])
|
|
947
|
+
"""
|
|
948
|
+
return DeployConfig(
|
|
949
|
+
name=name, gpu=gpu, count=count, fraction=fraction,
|
|
950
|
+
pip=pip, uv=uv, function=function, image=image,
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
|
|
805
954
|
def get_gpu_config() -> Dict[str, Any]:
|
|
806
955
|
"""Get current GPU configuration."""
|
|
807
956
|
return _current_gpu_config.copy()
|
|
@@ -832,6 +981,13 @@ __all__ = [
|
|
|
832
981
|
"enable_remote_execution",
|
|
833
982
|
"is_remote_execution_enabled",
|
|
834
983
|
"GPUConfig",
|
|
984
|
+
"deploy",
|
|
985
|
+
"DeployConfig",
|
|
986
|
+
# Pipeline
|
|
987
|
+
"step",
|
|
988
|
+
"pipeline",
|
|
989
|
+
"StepConfig",
|
|
990
|
+
"PipelineDef",
|
|
835
991
|
# Exceptions
|
|
836
992
|
"PodstackError",
|
|
837
993
|
"PodstackTimeoutError",
|
|
@@ -128,6 +128,42 @@ def is_jupyter() -> bool:
|
|
|
128
128
|
return False
|
|
129
129
|
|
|
130
130
|
|
|
131
|
+
def _format_libraries(libs) -> str:
|
|
132
|
+
"""Format a libraries field into a compact comma-separated string."""
|
|
133
|
+
if not libs:
|
|
134
|
+
return ""
|
|
135
|
+
# If it's a JSON string, parse it
|
|
136
|
+
if isinstance(libs, str):
|
|
137
|
+
try:
|
|
138
|
+
libs = __import__("json").loads(libs)
|
|
139
|
+
except (ValueError, TypeError):
|
|
140
|
+
return libs
|
|
141
|
+
# Dict like {"torch": "2.3.0", "numpy": "1.26"} → "torch, numpy"
|
|
142
|
+
if isinstance(libs, dict):
|
|
143
|
+
return ", ".join(libs.keys())
|
|
144
|
+
# List like ["torch", "numpy"]
|
|
145
|
+
if isinstance(libs, list):
|
|
146
|
+
return ", ".join(str(item) for item in libs)
|
|
147
|
+
return str(libs)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _normalize_mounts(mounts: list, default_prefix: str) -> str:
|
|
151
|
+
"""Normalize mount specs to 'name:/path,name2:/path2' format.
|
|
152
|
+
|
|
153
|
+
Accepts: ["name:/path", "name"] -> "name:/path,name:/mnt/nfs/name"
|
|
154
|
+
"""
|
|
155
|
+
parts = []
|
|
156
|
+
for entry in mounts:
|
|
157
|
+
entry = entry.strip()
|
|
158
|
+
if not entry:
|
|
159
|
+
continue
|
|
160
|
+
if ":" in entry:
|
|
161
|
+
parts.append(entry)
|
|
162
|
+
else:
|
|
163
|
+
parts.append(f"{entry}:{default_prefix}/{entry}")
|
|
164
|
+
return ",".join(parts)
|
|
165
|
+
|
|
166
|
+
|
|
131
167
|
class RunnerList(list):
|
|
132
168
|
"""A list of runners with a .show() method for tabular display."""
|
|
133
169
|
|
|
@@ -141,18 +177,22 @@ class RunnerList(list):
|
|
|
141
177
|
no_w = 5
|
|
142
178
|
name_w = 45
|
|
143
179
|
type_w = 6
|
|
144
|
-
|
|
180
|
+
libs_w = 40
|
|
181
|
+
desc_w = 40
|
|
145
182
|
|
|
146
|
-
header = f"{'S.NO':<{no_w}} {'Name':<{name_w}} {'Type':<{type_w}} {'Description'}"
|
|
183
|
+
header = f"{'S.NO':<{no_w}} {'Name':<{name_w}} {'Type':<{type_w}} {'Libraries':<{libs_w}} {'Description'}"
|
|
147
184
|
print(header)
|
|
148
|
-
print("-" * (no_w + name_w + type_w + desc_w +
|
|
185
|
+
print("-" * (no_w + name_w + type_w + libs_w + desc_w + 4))
|
|
149
186
|
for i, r in enumerate(self, 1):
|
|
150
187
|
name = r.get("name", "")
|
|
151
188
|
rtype = r.get("type", "")
|
|
152
189
|
desc = r.get("description", "")
|
|
190
|
+
libs = _format_libraries(r.get("libraries", ""))
|
|
153
191
|
if len(desc) > desc_w:
|
|
154
192
|
desc = desc[:desc_w - 3] + "..."
|
|
155
|
-
|
|
193
|
+
if len(libs) > libs_w:
|
|
194
|
+
libs = libs[:libs_w - 3] + "..."
|
|
195
|
+
print(f"{i:<{no_w}} {name:<{name_w}} {rtype:<{type_w}} {libs:<{libs_w}} {desc}")
|
|
156
196
|
|
|
157
197
|
|
|
158
198
|
class OutputStreamer:
|
|
@@ -396,7 +436,9 @@ class GPURunner:
|
|
|
396
436
|
conda: Union[str, list] = None,
|
|
397
437
|
requirements: str = None,
|
|
398
438
|
use_uv: bool = False,
|
|
399
|
-
runner: str = None
|
|
439
|
+
runner: str = None,
|
|
440
|
+
nfs: List[str] = None,
|
|
441
|
+
buckets: List[str] = None
|
|
400
442
|
) -> str:
|
|
401
443
|
"""Build the @podstack annotation string."""
|
|
402
444
|
parts = [f"#@podstack gpu={gpu}"]
|
|
@@ -439,6 +481,12 @@ class GPURunner:
|
|
|
439
481
|
if runner:
|
|
440
482
|
parts.append(f"runner={runner}")
|
|
441
483
|
|
|
484
|
+
# NFS volume and bucket mounts
|
|
485
|
+
if nfs:
|
|
486
|
+
parts.append(f"nfs={_normalize_mounts(nfs, '/mnt/nfs')}")
|
|
487
|
+
if buckets:
|
|
488
|
+
parts.append(f"buckets={_normalize_mounts(buckets, '/mnt/buckets')}")
|
|
489
|
+
|
|
442
490
|
return " ".join(parts)
|
|
443
491
|
|
|
444
492
|
def submit(
|
|
@@ -456,7 +504,9 @@ class GPURunner:
|
|
|
456
504
|
use_uv: bool = False,
|
|
457
505
|
env_vars: Dict[str, str] = None,
|
|
458
506
|
add_annotation: bool = True,
|
|
459
|
-
runner: str = None
|
|
507
|
+
runner: str = None,
|
|
508
|
+
nfs: List[str] = None,
|
|
509
|
+
buckets: List[str] = None
|
|
460
510
|
) -> Dict[str, Any]:
|
|
461
511
|
"""
|
|
462
512
|
Submit code for GPU execution (non-blocking).
|
|
@@ -486,7 +536,7 @@ class GPURunner:
|
|
|
486
536
|
|
|
487
537
|
# Add annotation if not present
|
|
488
538
|
if add_annotation and not code.strip().startswith("#@podstack"):
|
|
489
|
-
annotation = self._build_annotation(gpu, count, fraction, timeout, env, pip, uv, conda, requirements, use_uv, runner)
|
|
539
|
+
annotation = self._build_annotation(gpu, count, fraction, timeout, env, pip, uv, conda, requirements, use_uv, runner, nfs=nfs, buckets=buckets)
|
|
490
540
|
code = f"{annotation}\n\n{code}"
|
|
491
541
|
|
|
492
542
|
# Inject environment variables
|
|
@@ -780,7 +830,9 @@ _stream_install(
|
|
|
780
830
|
provisioning_timeout: int = 300,
|
|
781
831
|
cancel_on_timeout: bool = True,
|
|
782
832
|
stream: bool = None,
|
|
783
|
-
runner: str = None
|
|
833
|
+
runner: str = None,
|
|
834
|
+
nfs: List[str] = None,
|
|
835
|
+
buckets: List[str] = None
|
|
784
836
|
) -> GPUExecutionResult:
|
|
785
837
|
"""
|
|
786
838
|
Execute code on GPU and optionally wait for completion.
|
|
@@ -815,7 +867,7 @@ _stream_install(
|
|
|
815
867
|
ValueError: If parameters are invalid
|
|
816
868
|
"""
|
|
817
869
|
# Submit the code
|
|
818
|
-
submission = self.submit(code, gpu, count, fraction, timeout, env, pip, uv, conda, requirements, use_uv, env_vars=env_vars, runner=runner)
|
|
870
|
+
submission = self.submit(code, gpu, count, fraction, timeout, env, pip, uv, conda, requirements, use_uv, env_vars=env_vars, runner=runner, nfs=nfs, buckets=buckets)
|
|
819
871
|
execution_id = submission.get("execution_id")
|
|
820
872
|
|
|
821
873
|
if not execution_id:
|
|
@@ -1158,6 +1210,162 @@ _stream_install(
|
|
|
1158
1210
|
except Exception:
|
|
1159
1211
|
raise RuntimeError(f"Invalid JSON response: {response.text[:200]}")
|
|
1160
1212
|
|
|
1213
|
+
def get_runner_info(self, name: str) -> dict:
|
|
1214
|
+
"""
|
|
1215
|
+
Get details for a specific runner by name.
|
|
1216
|
+
|
|
1217
|
+
Args:
|
|
1218
|
+
name: Runner name (e.g. "pytorch-2.3.0-cuda-12.1-py3.11")
|
|
1219
|
+
|
|
1220
|
+
Returns:
|
|
1221
|
+
Dict with runner details (name, type, description, image, libraries, scenarios)
|
|
1222
|
+
"""
|
|
1223
|
+
platform_url = self._get_platform_url()
|
|
1224
|
+
url = f"{platform_url}/api/v1/runners/{name}"
|
|
1225
|
+
|
|
1226
|
+
with httpx.Client(timeout=self.timeout) as client:
|
|
1227
|
+
try:
|
|
1228
|
+
response = client.get(url, headers=self._get_headers())
|
|
1229
|
+
except httpx.ConnectError as e:
|
|
1230
|
+
raise ConnectionError(f"Failed to connect to {url}: {e}")
|
|
1231
|
+
except httpx.TimeoutException:
|
|
1232
|
+
raise TimeoutError(f"Request to {url} timed out")
|
|
1233
|
+
|
|
1234
|
+
if response.status_code >= 400:
|
|
1235
|
+
try:
|
|
1236
|
+
error_msg = response.json().get("error", response.text)
|
|
1237
|
+
except Exception:
|
|
1238
|
+
error_msg = response.text[:500] if response.text else f"HTTP {response.status_code}"
|
|
1239
|
+
raise RuntimeError(f"Failed to get runner info: {error_msg}")
|
|
1240
|
+
|
|
1241
|
+
try:
|
|
1242
|
+
return response.json()
|
|
1243
|
+
except Exception:
|
|
1244
|
+
raise RuntimeError(f"Invalid JSON response: {response.text[:200]}")
|
|
1245
|
+
|
|
1246
|
+
def pip_install(
|
|
1247
|
+
self,
|
|
1248
|
+
packages: Union[str, list],
|
|
1249
|
+
gpu: str = "L40S",
|
|
1250
|
+
fraction: int = 100,
|
|
1251
|
+
runner: str = None,
|
|
1252
|
+
use_uv: bool = False,
|
|
1253
|
+
wait: bool = True,
|
|
1254
|
+
stream: bool = None,
|
|
1255
|
+
) -> GPUExecutionResult:
|
|
1256
|
+
"""
|
|
1257
|
+
Install pip packages on a GPU runner.
|
|
1258
|
+
|
|
1259
|
+
Args:
|
|
1260
|
+
packages: Package(s) to install — string or list of strings
|
|
1261
|
+
gpu: GPU type (default: L40S)
|
|
1262
|
+
fraction: GPU fraction percentage (default: 100)
|
|
1263
|
+
runner: Runner name to use
|
|
1264
|
+
use_uv: Use uv instead of pip (faster)
|
|
1265
|
+
wait: Wait for completion (default: True)
|
|
1266
|
+
stream: Stream output in real-time
|
|
1267
|
+
|
|
1268
|
+
Returns:
|
|
1269
|
+
GPUExecutionResult object
|
|
1270
|
+
|
|
1271
|
+
Examples:
|
|
1272
|
+
podstack.pip_install("transformers")
|
|
1273
|
+
podstack.pip_install(["torch", "datasets"], gpu="A100-80G")
|
|
1274
|
+
podstack.pip_install("scikit-learn", use_uv=True)
|
|
1275
|
+
"""
|
|
1276
|
+
if isinstance(packages, str):
|
|
1277
|
+
packages = [packages]
|
|
1278
|
+
return self.run(
|
|
1279
|
+
code="print('[Podstack] Package installation complete')",
|
|
1280
|
+
gpu=gpu,
|
|
1281
|
+
fraction=fraction,
|
|
1282
|
+
runner=runner,
|
|
1283
|
+
pip=packages,
|
|
1284
|
+
use_uv=use_uv,
|
|
1285
|
+
wait=wait,
|
|
1286
|
+
stream=stream,
|
|
1287
|
+
)
|
|
1288
|
+
|
|
1289
|
+
# === Deployment Management ===
|
|
1290
|
+
|
|
1291
|
+
def deploy_source(
|
|
1292
|
+
self,
|
|
1293
|
+
name: str,
|
|
1294
|
+
source: str,
|
|
1295
|
+
function_name: str = "predict",
|
|
1296
|
+
gpu: str = "A10",
|
|
1297
|
+
fraction: int = 100,
|
|
1298
|
+
count: int = 1,
|
|
1299
|
+
pip_packages: str = "",
|
|
1300
|
+
image: str = "",
|
|
1301
|
+
) -> Dict[str, Any]:
|
|
1302
|
+
"""Deploy raw Python source as a REST API endpoint.
|
|
1303
|
+
|
|
1304
|
+
Args:
|
|
1305
|
+
name: Deployment name
|
|
1306
|
+
source: Python source code containing the serve function
|
|
1307
|
+
function_name: Function to expose (default: predict)
|
|
1308
|
+
gpu: GPU type
|
|
1309
|
+
fraction: GPU fraction percentage
|
|
1310
|
+
count: GPU count
|
|
1311
|
+
pip_packages: Space-separated pip packages
|
|
1312
|
+
image: Docker image override
|
|
1313
|
+
|
|
1314
|
+
Returns:
|
|
1315
|
+
Deployment info dict with deployment_id and status
|
|
1316
|
+
"""
|
|
1317
|
+
url = f"{self.api_url}/api/v1/deployments/from-source"
|
|
1318
|
+
with httpx.Client(timeout=self.timeout) as client:
|
|
1319
|
+
response = client.post(url, headers=self._get_headers(), json={
|
|
1320
|
+
"name": name,
|
|
1321
|
+
"source": source,
|
|
1322
|
+
"function_name": function_name,
|
|
1323
|
+
"gpu_type": gpu,
|
|
1324
|
+
"gpu_fraction": fraction,
|
|
1325
|
+
"gpu_count": count,
|
|
1326
|
+
"pip_packages": pip_packages,
|
|
1327
|
+
"image": image,
|
|
1328
|
+
})
|
|
1329
|
+
if response.status_code >= 400:
|
|
1330
|
+
raise RuntimeError(f"Failed to create deployment: {response.text}")
|
|
1331
|
+
return response.json()
|
|
1332
|
+
|
|
1333
|
+
def get_deployment(self, deployment_id: str) -> Dict[str, Any]:
|
|
1334
|
+
"""Get deployment status and info."""
|
|
1335
|
+
url = f"{self.api_url}/api/v1/deployments/{deployment_id}"
|
|
1336
|
+
with httpx.Client(timeout=self.timeout) as client:
|
|
1337
|
+
response = client.get(url, headers=self._get_headers())
|
|
1338
|
+
if response.status_code >= 400:
|
|
1339
|
+
raise RuntimeError(f"Failed to get deployment: {response.text}")
|
|
1340
|
+
return response.json()
|
|
1341
|
+
|
|
1342
|
+
def list_deployments(self, notebook_id: str = None) -> List[Dict[str, Any]]:
|
|
1343
|
+
"""List deployments for a notebook."""
|
|
1344
|
+
if not notebook_id:
|
|
1345
|
+
raise ValueError("notebook_id is required")
|
|
1346
|
+
url = f"{self.api_url}/api/v1/notebooks/{notebook_id}/deployments"
|
|
1347
|
+
with httpx.Client(timeout=self.timeout) as client:
|
|
1348
|
+
response = client.get(url, headers=self._get_headers())
|
|
1349
|
+
if response.status_code >= 400:
|
|
1350
|
+
raise RuntimeError(f"Failed to list deployments: {response.text}")
|
|
1351
|
+
return response.json().get("deployments", [])
|
|
1352
|
+
|
|
1353
|
+
def stop_deployment(self, deployment_id: str) -> None:
|
|
1354
|
+
"""Stop a running deployment."""
|
|
1355
|
+
url = f"{self.api_url}/api/v1/deployments/{deployment_id}/stop"
|
|
1356
|
+
with httpx.Client(timeout=self.timeout) as client:
|
|
1357
|
+
response = client.post(url, headers=self._get_headers())
|
|
1358
|
+
if response.status_code >= 400:
|
|
1359
|
+
raise RuntimeError(f"Failed to stop deployment: {response.text}")
|
|
1360
|
+
|
|
1361
|
+
def delete_deployment(self, deployment_id: str) -> None:
|
|
1362
|
+
"""Delete a deployment."""
|
|
1363
|
+
url = f"{self.api_url}/api/v1/deployments/{deployment_id}"
|
|
1364
|
+
with httpx.Client(timeout=self.timeout) as client:
|
|
1365
|
+
response = client.delete(url, headers=self._get_headers())
|
|
1366
|
+
if response.status_code >= 400:
|
|
1367
|
+
raise RuntimeError(f"Failed to delete deployment: {response.text}")
|
|
1368
|
+
|
|
1161
1369
|
def run_function(
|
|
1162
1370
|
self,
|
|
1163
1371
|
func: Callable,
|
|
@@ -1266,7 +1474,9 @@ def run(
|
|
|
1266
1474
|
requirements: str = None,
|
|
1267
1475
|
use_uv: bool = False,
|
|
1268
1476
|
wait: bool = True,
|
|
1269
|
-
stream: bool = None
|
|
1477
|
+
stream: bool = None,
|
|
1478
|
+
nfs: List[str] = None,
|
|
1479
|
+
buckets: List[str] = None
|
|
1270
1480
|
) -> GPUExecutionResult:
|
|
1271
1481
|
"""
|
|
1272
1482
|
Execute code on remote GPU.
|
|
@@ -1285,6 +1495,8 @@ def run(
|
|
|
1285
1495
|
use_uv: Use uv instead of pip for all installations (faster)
|
|
1286
1496
|
wait: Whether to wait for completion
|
|
1287
1497
|
stream: Stream output in real-time (default: True in Jupyter, False otherwise)
|
|
1498
|
+
nfs: NFS volumes to mount — ["name:/path"] or ["name"] (auto-mounts at /mnt/nfs/name)
|
|
1499
|
+
buckets: Buckets to mount — ["name:/path"] or ["name"] (auto-mounts at /mnt/buckets/name)
|
|
1288
1500
|
|
|
1289
1501
|
Returns:
|
|
1290
1502
|
GPUExecutionResult object
|
|
@@ -1313,6 +1525,9 @@ def run(
|
|
|
1313
1525
|
|
|
1314
1526
|
# Force streaming in non-Jupyter environment
|
|
1315
1527
|
podstack.run(code, stream=True)
|
|
1528
|
+
|
|
1529
|
+
# Mount NFS volumes and buckets
|
|
1530
|
+
podstack.run_on_gpu(code, nfs=["model-weights:/weights"], buckets=["data"])
|
|
1316
1531
|
"""
|
|
1317
1532
|
return get_runner().run(
|
|
1318
1533
|
code=code,
|
|
@@ -1327,7 +1542,9 @@ def run(
|
|
|
1327
1542
|
requirements=requirements,
|
|
1328
1543
|
use_uv=use_uv,
|
|
1329
1544
|
wait=wait,
|
|
1330
|
-
stream=stream
|
|
1545
|
+
stream=stream,
|
|
1546
|
+
nfs=nfs,
|
|
1547
|
+
buckets=buckets
|
|
1331
1548
|
)
|
|
1332
1549
|
|
|
1333
1550
|
|
|
@@ -1345,6 +1562,58 @@ def list_runners() -> RunnerList:
|
|
|
1345
1562
|
return get_runner().list_runners()
|
|
1346
1563
|
|
|
1347
1564
|
|
|
1565
|
+
def get_runner_info(name: str) -> dict:
|
|
1566
|
+
"""
|
|
1567
|
+
Get details for a specific runner by name.
|
|
1568
|
+
|
|
1569
|
+
Args:
|
|
1570
|
+
name: Runner name (e.g. "pytorch-2.3.0-cuda-12.1-py3.11")
|
|
1571
|
+
|
|
1572
|
+
Returns:
|
|
1573
|
+
Dict with runner details (name, type, description, image, libraries, scenarios)
|
|
1574
|
+
|
|
1575
|
+
Example:
|
|
1576
|
+
info = podstack.get_runner_info("pytorch-2.3.0-cuda-12.1-py3.11")
|
|
1577
|
+
print(info["libraries"])
|
|
1578
|
+
"""
|
|
1579
|
+
return get_runner().get_runner_info(name)
|
|
1580
|
+
|
|
1581
|
+
|
|
1582
|
+
def pip_install(
|
|
1583
|
+
packages: Union[str, list],
|
|
1584
|
+
gpu: str = "L40S",
|
|
1585
|
+
fraction: int = 100,
|
|
1586
|
+
runner: str = None,
|
|
1587
|
+
use_uv: bool = False,
|
|
1588
|
+
wait: bool = True,
|
|
1589
|
+
stream: bool = None,
|
|
1590
|
+
) -> GPUExecutionResult:
|
|
1591
|
+
"""
|
|
1592
|
+
Install pip packages on a GPU runner.
|
|
1593
|
+
|
|
1594
|
+
Args:
|
|
1595
|
+
packages: Package(s) to install — string or list of strings
|
|
1596
|
+
gpu: GPU type (default: L40S)
|
|
1597
|
+
fraction: GPU fraction percentage (default: 100)
|
|
1598
|
+
runner: Runner name to use
|
|
1599
|
+
use_uv: Use uv instead of pip (faster)
|
|
1600
|
+
wait: Wait for completion (default: True)
|
|
1601
|
+
stream: Stream output in real-time
|
|
1602
|
+
|
|
1603
|
+
Returns:
|
|
1604
|
+
GPUExecutionResult object
|
|
1605
|
+
|
|
1606
|
+
Examples:
|
|
1607
|
+
podstack.pip_install("transformers")
|
|
1608
|
+
podstack.pip_install(["torch", "datasets"], gpu="A100-80G")
|
|
1609
|
+
podstack.pip_install("scikit-learn", use_uv=True)
|
|
1610
|
+
"""
|
|
1611
|
+
return get_runner().pip_install(
|
|
1612
|
+
packages, gpu=gpu, fraction=fraction, runner=runner,
|
|
1613
|
+
use_uv=use_uv, wait=wait, stream=stream,
|
|
1614
|
+
)
|
|
1615
|
+
|
|
1616
|
+
|
|
1348
1617
|
def stream_output(execution_id: str, show_output: bool = True) -> Iterator[Dict[str, Any]]:
|
|
1349
1618
|
"""
|
|
1350
1619
|
Stream real-time output from a running execution.
|
|
@@ -1365,3 +1634,18 @@ def stream_output(execution_id: str, show_output: bool = True) -> Iterator[Dict[
|
|
|
1365
1634
|
print(f"Done!")
|
|
1366
1635
|
"""
|
|
1367
1636
|
return get_runner().stream_output(execution_id, show_output=show_output)
|
|
1637
|
+
|
|
1638
|
+
|
|
1639
|
+
def get_deployment(deployment_id: str) -> Dict[str, Any]:
|
|
1640
|
+
"""Get deployment status and info."""
|
|
1641
|
+
return get_runner().get_deployment(deployment_id)
|
|
1642
|
+
|
|
1643
|
+
|
|
1644
|
+
def stop_deployment(deployment_id: str) -> None:
|
|
1645
|
+
"""Stop a running deployment."""
|
|
1646
|
+
get_runner().stop_deployment(deployment_id)
|
|
1647
|
+
|
|
1648
|
+
|
|
1649
|
+
def delete_deployment(deployment_id: str) -> None:
|
|
1650
|
+
"""Delete a deployment."""
|
|
1651
|
+
get_runner().delete_deployment(deployment_id)
|