podstack 1.3.12__tar.gz → 1.3.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- podstack-1.3.14/PKG-INFO +919 -0
- podstack-1.3.14/README.md +867 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack/registry/client.py +640 -42
- {podstack-1.3.12 → podstack-1.3.14}/podstack/registry/experiment.py +90 -1
- podstack-1.3.14/podstack.egg-info/PKG-INFO +919 -0
- {podstack-1.3.12 → podstack-1.3.14}/pyproject.toml +1 -1
- podstack-1.3.12/PKG-INFO +0 -467
- podstack-1.3.12/README.md +0 -415
- podstack-1.3.12/podstack.egg-info/PKG-INFO +0 -467
- {podstack-1.3.12 → podstack-1.3.14}/LICENSE +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack/__init__.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack/annotations.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack/client.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack/exceptions.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack/execution.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack/gpu_runner.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack/models.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack/notebook.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack/registry/__init__.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack/registry/autolog.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack/registry/exceptions.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack/registry/model.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack/registry/model_utils.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack.egg-info/SOURCES.txt +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack.egg-info/dependency_links.txt +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack.egg-info/requires.txt +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack.egg-info/top_level.txt +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack_gpu/__init__.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack_gpu/app.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack_gpu/exceptions.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack_gpu/image.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack_gpu/runner.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack_gpu/secret.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack_gpu/utils.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/podstack_gpu/volume.py +0 -0
- {podstack-1.3.12 → podstack-1.3.14}/setup.cfg +0 -0
podstack-1.3.14/PKG-INFO
ADDED
|
@@ -0,0 +1,919 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: podstack
|
|
3
|
+
Version: 1.3.14
|
|
4
|
+
Summary: Official Python SDK for Podstack GPU Notebook Platform
|
|
5
|
+
Author-email: Podstack <support@podstack.ai>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://podstack.ai
|
|
8
|
+
Project-URL: Documentation, https://docs.podstack.ai
|
|
9
|
+
Project-URL: Repository, https://github.com/podstack/podstack-python
|
|
10
|
+
Project-URL: Issues, https://github.com/podstack/podstack-python/issues
|
|
11
|
+
Keywords: gpu,notebook,machine-learning,deep-learning,cloud,jupyter
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Requires-Python: >=3.8
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: httpx>=0.24.0
|
|
28
|
+
Requires-Dist: requests>=2.28.0
|
|
29
|
+
Provides-Extra: torch
|
|
30
|
+
Requires-Dist: torch; extra == "torch"
|
|
31
|
+
Provides-Extra: tensorflow
|
|
32
|
+
Requires-Dist: tensorflow; extra == "tensorflow"
|
|
33
|
+
Provides-Extra: sklearn
|
|
34
|
+
Requires-Dist: scikit-learn; extra == "sklearn"
|
|
35
|
+
Provides-Extra: huggingface
|
|
36
|
+
Requires-Dist: transformers; extra == "huggingface"
|
|
37
|
+
Requires-Dist: safetensors; extra == "huggingface"
|
|
38
|
+
Provides-Extra: all
|
|
39
|
+
Requires-Dist: torch; extra == "all"
|
|
40
|
+
Requires-Dist: tensorflow; extra == "all"
|
|
41
|
+
Requires-Dist: scikit-learn; extra == "all"
|
|
42
|
+
Requires-Dist: transformers; extra == "all"
|
|
43
|
+
Requires-Dist: safetensors; extra == "all"
|
|
44
|
+
Provides-Extra: dev
|
|
45
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
46
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
47
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
48
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
49
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
50
|
+
Requires-Dist: ruff>=0.0.270; extra == "dev"
|
|
51
|
+
Dynamic: license-file
|
|
52
|
+
|
|
53
|
+
# Podstack Python SDK
|
|
54
|
+
|
|
55
|
+
Official Python SDK for the Podstack GPU Platform. Run ML workloads on remote GPUs with simple decorators, track experiments, and manage models.
|
|
56
|
+
|
|
57
|
+
## Installation
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install podstack
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
With optional dependencies:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install podstack[torch] # PyTorch support
|
|
67
|
+
pip install podstack[huggingface] # HuggingFace Transformers
|
|
68
|
+
pip install podstack[all] # All ML frameworks
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Quick Start
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
import podstack
|
|
75
|
+
|
|
76
|
+
# Initialize the SDK
|
|
77
|
+
podstack.init(
|
|
78
|
+
api_key="your-api-key",
|
|
79
|
+
project_id="your-project-id"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Run a function on a remote GPU with a single decorator
|
|
83
|
+
@podstack.gpu(type="L40S", fraction=100)
|
|
84
|
+
def train():
|
|
85
|
+
import torch
|
|
86
|
+
print(f"GPU: {torch.cuda.get_device_name(0)}")
|
|
87
|
+
return {"status": "done"}
|
|
88
|
+
|
|
89
|
+
result = train() # Executes on remote GPU!
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Decorators & Annotations
|
|
93
|
+
|
|
94
|
+
Podstack provides decorators that turn any Python function into a remote GPU workload with built-in experiment tracking.
|
|
95
|
+
|
|
96
|
+
### `@podstack.gpu` - Remote GPU Execution
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
import podstack
|
|
100
|
+
|
|
101
|
+
# Basic GPU execution
|
|
102
|
+
@podstack.gpu(type="L40S")
|
|
103
|
+
def train_model():
|
|
104
|
+
import torch
|
|
105
|
+
model = torch.nn.Linear(768, 10).cuda()
|
|
106
|
+
return {"params": sum(p.numel() for p in model.parameters())}
|
|
107
|
+
|
|
108
|
+
result = train_model()
|
|
109
|
+
|
|
110
|
+
# Specify GPU type, count, and fraction
|
|
111
|
+
@podstack.gpu(type="A100-80G", count=2, fraction=100)
|
|
112
|
+
def train_large_model():
|
|
113
|
+
import torch
|
|
114
|
+
print(f"GPUs available: {torch.cuda.device_count()}")
|
|
115
|
+
|
|
116
|
+
# Install pip packages on the fly
|
|
117
|
+
@podstack.gpu(type="L40S", pip=["transformers", "datasets", "accelerate"])
|
|
118
|
+
def finetune_llm():
|
|
119
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
120
|
+
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
|
|
121
|
+
...
|
|
122
|
+
|
|
123
|
+
# Use uv for faster package installation
|
|
124
|
+
@podstack.gpu(type="L40S", uv=["torch", "transformers"])
|
|
125
|
+
def fast_setup():
|
|
126
|
+
...
|
|
127
|
+
|
|
128
|
+
# Install from requirements.txt
|
|
129
|
+
@podstack.gpu(type="L40S", requirements="requirements.txt", use_uv=True)
|
|
130
|
+
def train_with_deps():
|
|
131
|
+
...
|
|
132
|
+
|
|
133
|
+
# Use conda packages
|
|
134
|
+
@podstack.gpu(type="L40S", conda="cudatoolkit=11.8")
|
|
135
|
+
def train_with_conda():
|
|
136
|
+
...
|
|
137
|
+
|
|
138
|
+
# Use a pre-built environment
|
|
139
|
+
@podstack.gpu(type="L40S", env="nlp")
|
|
140
|
+
def nlp_task():
|
|
141
|
+
...
|
|
142
|
+
|
|
143
|
+
# Set execution timeout (default: 3600s)
|
|
144
|
+
@podstack.gpu(type="L40S", timeout=7200)
|
|
145
|
+
def long_training():
|
|
146
|
+
...
|
|
147
|
+
|
|
148
|
+
# Disable remote execution (run locally for debugging)
|
|
149
|
+
@podstack.gpu(type="L40S", remote=False)
|
|
150
|
+
def debug_locally():
|
|
151
|
+
print("This runs on your local machine")
|
|
152
|
+
|
|
153
|
+
# Use as a context manager
|
|
154
|
+
with podstack.gpu(type="A100-80G", count=2) as cfg:
|
|
155
|
+
print(f"GPU config set: {cfg.type}")
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
**Available GPU types:** `T4`, `L4`, `A10`, `L40S`, `A100-40G`, `A100-80G`, `H100`
|
|
159
|
+
|
|
160
|
+
**Available environments:** `ml`, `nlp`, `cv`, `audio`, `tabular`, `rl`, `scientific`
|
|
161
|
+
|
|
162
|
+
### `@podstack.experiment` - Experiment Tracking
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
import podstack
|
|
166
|
+
|
|
167
|
+
# As a decorator
|
|
168
|
+
@podstack.experiment(name="transformer-experiments")
|
|
169
|
+
def run_experiment():
|
|
170
|
+
...
|
|
171
|
+
|
|
172
|
+
# As a context manager
|
|
173
|
+
with podstack.experiment(name="transformer-experiments") as exp:
|
|
174
|
+
print(f"Experiment ID: {exp.id}")
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### `@podstack.run` - Run Tracking
|
|
178
|
+
|
|
179
|
+
Automatically tracks execution time and GPU configuration.
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
import podstack
|
|
183
|
+
|
|
184
|
+
# As a decorator
|
|
185
|
+
@podstack.experiment(name="my-experiment")
|
|
186
|
+
@podstack.run(name="training-v1", track_gpu=True)
|
|
187
|
+
def train():
|
|
188
|
+
podstack.registry.log_params({"lr": 0.001, "batch_size": 32})
|
|
189
|
+
for epoch in range(10):
|
|
190
|
+
loss = 1.0 / (epoch + 1)
|
|
191
|
+
podstack.registry.log_metrics({"loss": loss}, step=epoch)
|
|
192
|
+
|
|
193
|
+
# As a context manager
|
|
194
|
+
with podstack.run(name="training-v1") as run:
|
|
195
|
+
podstack.registry.log_params({"lr": 0.001})
|
|
196
|
+
podstack.registry.log_metrics({"loss": 0.5}, step=1)
|
|
197
|
+
print(f"Run ID: {run.id}")
|
|
198
|
+
|
|
199
|
+
# With tags
|
|
200
|
+
@podstack.run(name="ablation-study", tags={"variant": "no-dropout"})
|
|
201
|
+
def ablation():
|
|
202
|
+
...
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### `@podstack.model` - Model Registration
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
import podstack
|
|
209
|
+
|
|
210
|
+
# Register model after function completes
|
|
211
|
+
@podstack.experiment(name="my-experiment")
|
|
212
|
+
@podstack.run(name="training-v1")
|
|
213
|
+
@podstack.model.register(name="my-classifier")
|
|
214
|
+
def train_and_save():
|
|
215
|
+
import torch
|
|
216
|
+
model = torch.nn.Linear(768, 10)
|
|
217
|
+
torch.save(model.state_dict(), "model.pt")
|
|
218
|
+
podstack.registry.log_artifact("model.pt", "model")
|
|
219
|
+
|
|
220
|
+
# Promote model to production after validation
|
|
221
|
+
@podstack.model.promote(name="my-classifier", version=1, stage="production")
|
|
222
|
+
def validate_and_promote():
|
|
223
|
+
# Run validation checks
|
|
224
|
+
accuracy = 0.95
|
|
225
|
+
assert accuracy > 0.90, "Model doesn't meet threshold"
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### Combining Decorators
|
|
229
|
+
|
|
230
|
+
Stack decorators for a complete ML workflow:
|
|
231
|
+
|
|
232
|
+
```python
|
|
233
|
+
import podstack
|
|
234
|
+
|
|
235
|
+
podstack.init(api_key="your-api-key", project_id="your-project-id")
|
|
236
|
+
|
|
237
|
+
@podstack.gpu(type="L40S", pip=["transformers", "datasets"])
|
|
238
|
+
@podstack.experiment(name="sentiment-analysis")
|
|
239
|
+
@podstack.run(name="bert-finetune-v1", track_gpu=True)
|
|
240
|
+
@podstack.model.register(name="sentiment-bert")
|
|
241
|
+
def full_pipeline():
|
|
242
|
+
from transformers import AutoModelForSequenceClassification, Trainer
|
|
243
|
+
|
|
244
|
+
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
|
|
245
|
+
|
|
246
|
+
# Log hyperparameters
|
|
247
|
+
podstack.registry.log_params({
|
|
248
|
+
"model": "bert-base-uncased",
|
|
249
|
+
"learning_rate": 2e-5,
|
|
250
|
+
"epochs": 3
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
# Train...
|
|
254
|
+
podstack.registry.log_metrics({"accuracy": 0.92, "f1": 0.89})
|
|
255
|
+
|
|
256
|
+
return {"accuracy": 0.92}
|
|
257
|
+
|
|
258
|
+
result = full_pipeline() # Runs on remote L40S GPU with full tracking
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
## Registry - Experiment Tracking & Model Management
|
|
262
|
+
|
|
263
|
+
### Initialize
|
|
264
|
+
|
|
265
|
+
```python
|
|
266
|
+
from podstack import registry
|
|
267
|
+
|
|
268
|
+
registry.init(
|
|
269
|
+
api_key="your-api-key",
|
|
270
|
+
project_id="your-project-id"
|
|
271
|
+
)
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
### Track Experiments and Runs
|
|
275
|
+
|
|
276
|
+
```python
|
|
277
|
+
from podstack import registry
|
|
278
|
+
|
|
279
|
+
# Set experiment
|
|
280
|
+
registry.set_experiment("my-experiment")
|
|
281
|
+
|
|
282
|
+
# Start a tracked run
|
|
283
|
+
with registry.start_run(name="training-v1") as run:
|
|
284
|
+
# Log hyperparameters
|
|
285
|
+
registry.log_params({
|
|
286
|
+
"learning_rate": 0.001,
|
|
287
|
+
"batch_size": 32,
|
|
288
|
+
"epochs": 10,
|
|
289
|
+
"optimizer": "adam"
|
|
290
|
+
})
|
|
291
|
+
|
|
292
|
+
# Log metrics at each step
|
|
293
|
+
for epoch in range(10):
|
|
294
|
+
loss = train_epoch()
|
|
295
|
+
accuracy = evaluate()
|
|
296
|
+
registry.log_metrics({"loss": loss, "accuracy": accuracy}, step=epoch)
|
|
297
|
+
|
|
298
|
+
# Set tags
|
|
299
|
+
registry.set_tag("framework", "pytorch")
|
|
300
|
+
|
|
301
|
+
# Upload artifacts to cloud artifact store
|
|
302
|
+
registry.log_artifact("model.pt")
|
|
303
|
+
registry.log_artifact("training_curves.png", artifact_path="plots/curves.png")
|
|
304
|
+
|
|
305
|
+
# Log dataset provenance (first-class resource, deduped by content hash)
|
|
306
|
+
registry.log_dataset("imdb-reviews", path="data/imdb.csv", context="training")
|
|
307
|
+
|
|
308
|
+
# Or pass a DataFrame — schema and row/feature counts are auto-computed
|
|
309
|
+
import pandas as pd
|
|
310
|
+
df = pd.read_csv("data/imdb.csv")
|
|
311
|
+
registry.log_dataset("imdb-reviews", df=df, context="training")
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
### Log and Load Models
|
|
315
|
+
|
|
316
|
+
```python
|
|
317
|
+
from podstack import registry
|
|
318
|
+
|
|
319
|
+
# Serialize and upload the model to the artifact store (auto-detects framework)
|
|
320
|
+
registry.log_model(model, artifact_path="model", framework="pytorch")
|
|
321
|
+
|
|
322
|
+
# Register in model registry
|
|
323
|
+
registry.register_model(
|
|
324
|
+
name="my-classifier",
|
|
325
|
+
run_id=run.id,
|
|
326
|
+
description="BERT sentiment classifier"
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
# Promote to production
|
|
330
|
+
registry.set_model_stage("my-classifier", version=1, stage="production")
|
|
331
|
+
|
|
332
|
+
# Set aliases
|
|
333
|
+
registry.set_model_alias("my-classifier", alias="champion", version=1)
|
|
334
|
+
|
|
335
|
+
# Load model from any machine — files are downloaded automatically if missing locally
|
|
336
|
+
model = registry.load_model("my-classifier", stage="production")
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
### Compare Runs
|
|
340
|
+
|
|
341
|
+
```python
|
|
342
|
+
from podstack import registry
|
|
343
|
+
|
|
344
|
+
# Compare multiple runs
|
|
345
|
+
comparison = registry.compare_runs(
|
|
346
|
+
run_ids=["run-id-1", "run-id-2", "run-id-3"],
|
|
347
|
+
metric_keys=["loss", "accuracy"]
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
# Get metric history for a run
|
|
351
|
+
history = registry.get_metric_history("run-id-1", "loss")
|
|
352
|
+
for point in history:
|
|
353
|
+
print(f"Step {point.step}: {point.value}")
|
|
354
|
+
|
|
355
|
+
# Search runs
|
|
356
|
+
runs = registry.search_runs(
|
|
357
|
+
experiment_id="exp-id",
|
|
358
|
+
status="completed",
|
|
359
|
+
max_results=50
|
|
360
|
+
)
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
### Dataset Tracking & Lineage
|
|
364
|
+
|
|
365
|
+
Podstack tracks datasets as first-class resources, linking them to runs and model versions so you can always answer *"what data was this model trained on?"*
|
|
366
|
+
|
|
367
|
+
The lineage chain is:
|
|
368
|
+
|
|
369
|
+
```
|
|
370
|
+
Dataset(s) ──[logged to]──▶ Run ──[run_id]──▶ ModelVersion
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
#### `log_dataset()` — log a dataset to the active run
|
|
374
|
+
|
|
375
|
+
```python
|
|
376
|
+
dataset = registry.log_dataset(
|
|
377
|
+
name="imdb-reviews", # required — human-readable name
|
|
378
|
+
path="data/imdb.csv", # local path or URI (s3://, gcs://, https://)
|
|
379
|
+
context="training", # "training" | "validation" | "test" (default: "training")
|
|
380
|
+
)
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
The dataset is stored as a **project-level resource** and linked to the current run.
|
|
384
|
+
Subsequent calls with the same file produce the same dataset record — no duplicates.
|
|
385
|
+
|
|
386
|
+
**Auto-enrichment from a local file:**
|
|
387
|
+
|
|
388
|
+
```python
|
|
389
|
+
# SHA-256 digest is computed automatically for files ≤ 500 MB.
|
|
390
|
+
# This enables deduplication across runs — if two runs use the exact
|
|
391
|
+
# same file, they share one Dataset record in the registry.
|
|
392
|
+
dataset = registry.log_dataset("imdb-reviews", path="data/imdb.csv")
|
|
393
|
+
print(dataset.digest) # "a3f2c1..." — hex SHA-256
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
**Auto-enrichment from a pandas DataFrame:**
|
|
397
|
+
|
|
398
|
+
```python
|
|
399
|
+
import pandas as pd
|
|
400
|
+
|
|
401
|
+
df = pd.read_csv("data/imdb.csv")
|
|
402
|
+
|
|
403
|
+
dataset = registry.log_dataset(
|
|
404
|
+
name="imdb-reviews",
|
|
405
|
+
df=df,
|
|
406
|
+
context="training",
|
|
407
|
+
)
|
|
408
|
+
# schema and profile are computed automatically:
|
|
409
|
+
print(dataset.schema) # {"text": "object", "label": "int64"}
|
|
410
|
+
print(dataset.profile) # {"num_rows": 50000, "num_features": 2}
|
|
411
|
+
```
|
|
412
|
+
|
|
413
|
+
**Pass both `path` and `df`** to get digest dedup *and* schema inference:
|
|
414
|
+
|
|
415
|
+
```python
|
|
416
|
+
dataset = registry.log_dataset("imdb-reviews", path="data/imdb.csv", df=df)
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
**All parameters:**
|
|
420
|
+
|
|
421
|
+
| Parameter | Type | Default | Description |
|
|
422
|
+
|-----------|------|---------|-------------|
|
|
423
|
+
| `name` | `str` | required | Human-readable dataset name |
|
|
424
|
+
| `path` | `str` | `None` | Local file path or URI (`s3://`, `gcs://`, `https://`) |
|
|
425
|
+
| `df` | `DataFrame` | `None` | pandas DataFrame — schema and profile auto-computed |
|
|
426
|
+
| `context` | `str` | `"training"` | Role of the dataset: `"training"`, `"validation"`, or `"test"` |
|
|
427
|
+
| `digest` | `str` | `None` | SHA-256 hex digest. Computed from `path` if not provided |
|
|
428
|
+
| `source_type` | `str` | `"local"` | Storage backend: `"local"`, `"s3"`, `"gcs"`, `"url"` |
|
|
429
|
+
| `tags` | `dict` | `None` | Arbitrary string key-value tags |
|
|
430
|
+
|
|
431
|
+
**Returns:** `Dataset` object with fields:
|
|
432
|
+
|
|
433
|
+
| Field | Type | Description |
|
|
434
|
+
|-------|------|-------------|
|
|
435
|
+
| `id` | `str` | UUID of the dataset record |
|
|
436
|
+
| `name` | `str` | Dataset name |
|
|
437
|
+
| `digest` | `str` | SHA-256 hex digest (empty if not computed) |
|
|
438
|
+
| `source_type` | `str` | Storage backend |
|
|
439
|
+
| `source` | `str` | File path or URI |
|
|
440
|
+
| `schema` | `dict` | Column → dtype mapping |
|
|
441
|
+
| `profile` | `dict` | `num_rows`, `num_features`, and any other stats |
|
|
442
|
+
| `tags` | `dict` | Tags dict |
|
|
443
|
+
| `created_at` | `str` | ISO 8601 timestamp |
|
|
444
|
+
|
|
445
|
+
**Via the `Run` object** (equivalent to calling `registry.log_dataset()`):
|
|
446
|
+
|
|
447
|
+
```python
|
|
448
|
+
with registry.start_run("training-v1") as run:
|
|
449
|
+
dataset = run.log_dataset("imdb-reviews", df=df, context="training")
|
|
450
|
+
```
|
|
451
|
+
|
|
452
|
+
#### Multiple datasets per run
|
|
453
|
+
|
|
454
|
+
Log validation and test sets alongside the training set:
|
|
455
|
+
|
|
456
|
+
```python
|
|
457
|
+
with registry.start_run("bert-finetune") as run:
|
|
458
|
+
run.log_dataset("imdb-train", df=train_df, context="training")
|
|
459
|
+
run.log_dataset("imdb-val", df=val_df, context="validation")
|
|
460
|
+
run.log_dataset("imdb-test", df=test_df, context="test")
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
#### `get_run_datasets()` — retrieve datasets logged to a run
|
|
464
|
+
|
|
465
|
+
Returns every `Dataset` object linked to a run, in the order they were logged.
|
|
466
|
+
|
|
467
|
+
```python
|
|
468
|
+
datasets = registry.get_run_datasets(run_id)
|
|
469
|
+
```
|
|
470
|
+
|
|
471
|
+
**Parameters:**
|
|
472
|
+
|
|
473
|
+
| Parameter | Type | Description |
|
|
474
|
+
|-----------|------|-------------|
|
|
475
|
+
| `run_id` | `str` | ID of the run to query |
|
|
476
|
+
|
|
477
|
+
**Returns:** `list[Dataset]` — same object as returned by `log_dataset()`.
|
|
478
|
+
|
|
479
|
+
**Fields on each `Dataset`:**
|
|
480
|
+
|
|
481
|
+
| Field | Type | Description |
|
|
482
|
+
|-------|------|-------------|
|
|
483
|
+
| `id` | `str` | UUID of the dataset record |
|
|
484
|
+
| `name` | `str` | Human-readable name |
|
|
485
|
+
| `digest` | `str` | SHA-256 hex digest (empty if not computed at log time) |
|
|
486
|
+
| `source_type` | `str` | `"local"`, `"s3"`, `"gcs"`, or `"url"` |
|
|
487
|
+
| `source` | `str` | File path or URI that was passed to `log_dataset()` |
|
|
488
|
+
| `schema` | `dict` | Column → dtype mapping (e.g. `{"text": "object", "label": "int64"}`) |
|
|
489
|
+
| `profile` | `dict` | Stats dict, always contains `num_rows` and `num_features` when a DataFrame was passed |
|
|
490
|
+
| `tags` | `dict` | Key-value tags |
|
|
491
|
+
| `created_at` | `str` | ISO 8601 timestamp |
|
|
492
|
+
|
|
493
|
+
**Examples:**
|
|
494
|
+
|
|
495
|
+
```python
|
|
496
|
+
from podstack import registry
|
|
497
|
+
|
|
498
|
+
registry.init(api_key="...", project_id="...")
|
|
499
|
+
|
|
500
|
+
datasets = registry.get_run_datasets("3a9f12c4-...")
|
|
501
|
+
|
|
502
|
+
# Inspect each dataset
|
|
503
|
+
for ds in datasets:
|
|
504
|
+
print(ds.name)
|
|
505
|
+
print(f" source : {ds.source}")
|
|
506
|
+
print(f" digest : {ds.digest[:16]}…")
|
|
507
|
+
print(f" rows : {ds.profile.get('num_rows', 'unknown')}")
|
|
508
|
+
print(f" schema : {ds.schema}")
|
|
509
|
+
```
|
|
510
|
+
|
|
511
|
+
Checking datasets on a run you have in hand:
|
|
512
|
+
|
|
513
|
+
```python
|
|
514
|
+
with registry.start_run("training-v1") as run:
|
|
515
|
+
run.log_dataset("train", df=train_df, context="training")
|
|
516
|
+
run.log_dataset("val", df=val_df, context="validation")
|
|
517
|
+
|
|
518
|
+
# After the run completes, retrieve everything that was logged
|
|
519
|
+
datasets = registry.get_run_datasets(run.id)
|
|
520
|
+
assert len(datasets) == 2
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
Verifying deduplication — the same physical file logged across two runs
|
|
524
|
+
returns the same dataset ID:
|
|
525
|
+
|
|
526
|
+
```python
|
|
527
|
+
ds1 = registry.get_run_datasets(run_a.id)[0]
|
|
528
|
+
ds2 = registry.get_run_datasets(run_b.id)[0]
|
|
529
|
+
|
|
530
|
+
# Same file → same digest → same Dataset record
|
|
531
|
+
assert ds1.id == ds2.id
|
|
532
|
+
assert ds1.digest == ds2.digest
|
|
533
|
+
```
|
|
534
|
+
|
|
535
|
+
#### `get_model_lineage()` — trace a model back to its training data
|
|
536
|
+
|
|
537
|
+
Returns the full provenance chain for every version of a registered model:
|
|
538
|
+
which datasets each version was trained on, via which run.
|
|
539
|
+
|
|
540
|
+
```python
|
|
541
|
+
lineage = registry.get_model_lineage(model_id)
|
|
542
|
+
```
|
|
543
|
+
|
|
544
|
+
**Parameters:**
|
|
545
|
+
|
|
546
|
+
| Parameter | Type | Description |
|
|
547
|
+
|-----------|------|-------------|
|
|
548
|
+
| `model_id` | `str` | ID of the registered model |
|
|
549
|
+
|
|
550
|
+
**Returns:** `dict` with the following structure:
|
|
551
|
+
|
|
552
|
+
```
|
|
553
|
+
{
|
|
554
|
+
"model_id": str,
|
|
555
|
+
"versions": [
|
|
556
|
+
{
|
|
557
|
+
"version": int, # version number (1, 2, 3 …)
|
|
558
|
+
"stage": str, # "development" | "staging" | "production" | "archived"
|
|
559
|
+
"run_id": str, # ID of the linked training run (empty if none)
|
|
560
|
+
"run_name": str, # display name of the run
|
|
561
|
+
"datasets": [Dataset] # list of Dataset dicts logged to that run
|
|
562
|
+
},
|
|
563
|
+
…
|
|
564
|
+
]
|
|
565
|
+
}
|
|
566
|
+
```
|
|
567
|
+
|
|
568
|
+
Each `datasets` entry has the same fields as a `Dataset` object
|
|
569
|
+
(`id`, `name`, `digest`, `source_type`, `source`, `schema`, `profile`, `tags`, `created_at`).
|
|
570
|
+
|
|
571
|
+
**Examples:**
|
|
572
|
+
|
|
573
|
+
Basic iteration:
|
|
574
|
+
|
|
575
|
+
```python
|
|
576
|
+
from podstack import registry
|
|
577
|
+
|
|
578
|
+
registry.init(api_key="...", project_id="...")
|
|
579
|
+
|
|
580
|
+
model = registry.get_model("sentiment-bert")
|
|
581
|
+
lineage = registry.get_model_lineage(model.id)
|
|
582
|
+
|
|
583
|
+
for version in lineage["versions"]:
|
|
584
|
+
print(f"v{version['version']} · {version['stage']}")
|
|
585
|
+
print(f" Run: {version['run_name']} ({version['run_id'][:8]}…)")
|
|
586
|
+
for ds in version["datasets"]:
|
|
587
|
+
rows = ds["profile"].get("num_rows", "?")
|
|
588
|
+
print(f" └─ {ds['name']} {rows} rows sha256:{ds['digest'][:12]}…")
|
|
589
|
+
```
|
|
590
|
+
|
|
591
|
+
Example output:
|
|
592
|
+
|
|
593
|
+
```
|
|
594
|
+
v3 · production
|
|
595
|
+
Run: bert-finetune-v3 (3a9f12c4…)
|
|
596
|
+
└─ imdb-train 40000 rows sha256:a3f2c1d8e9b0…
|
|
597
|
+
└─ imdb-val 5000 rows sha256:7e4b2f1a0c3d…
|
|
598
|
+
v2 · staging
|
|
599
|
+
Run: bert-finetune-v2 (8b2e77d1…)
|
|
600
|
+
└─ imdb-train 40000 rows sha256:a3f2c1d8e9b0…
|
|
601
|
+
v1 · archived
|
|
602
|
+
Run: bert-finetune-v1 (f1c3a0e2…)
|
|
603
|
+
└─ imdb-train 40000 rows sha256:a3f2c1d8e9b0…
|
|
604
|
+
```
|
|
605
|
+
|
|
606
|
+
Finding every unique dataset ever used to train any version of a model:
|
|
607
|
+
|
|
608
|
+
```python
|
|
609
|
+
lineage = registry.get_model_lineage(model.id)
|
|
610
|
+
seen = {}
|
|
611
|
+
for version in lineage["versions"]:
|
|
612
|
+
for ds in version["datasets"]:
|
|
613
|
+
seen[ds["id"]] = ds # dedup by ID
|
|
614
|
+
|
|
615
|
+
unique_datasets = list(seen.values())
|
|
616
|
+
print(f"{len(unique_datasets)} unique dataset(s) across all versions")
|
|
617
|
+
```
|
|
618
|
+
|
|
619
|
+
Checking whether the production version was trained on an approved dataset:
|
|
620
|
+
|
|
621
|
+
```python
|
|
622
|
+
APPROVED_DIGEST = "a3f2c1d8e9b0..."
|
|
623
|
+
|
|
624
|
+
lineage = registry.get_model_lineage(model.id)
|
|
625
|
+
prod = next(v for v in lineage["versions"] if v["stage"] == "production")
|
|
626
|
+
|
|
627
|
+
approved = any(ds["digest"] == APPROVED_DIGEST for ds in prod["datasets"])
|
|
628
|
+
print("Production model trained on approved data:", approved)
|
|
629
|
+
```
|
|
630
|
+
|
|
631
|
+
#### End-to-end example
|
|
632
|
+
|
|
633
|
+
```python
|
|
634
|
+
import pandas as pd
|
|
635
|
+
from podstack import registry
|
|
636
|
+
|
|
637
|
+
registry.init(api_key="...", project_id="...")
|
|
638
|
+
registry.set_experiment("sentiment-analysis")
|
|
639
|
+
|
|
640
|
+
# Load data
|
|
641
|
+
train_df = pd.read_csv("data/train.csv")
|
|
642
|
+
val_df = pd.read_csv("data/val.csv")
|
|
643
|
+
|
|
644
|
+
with registry.start_run("bert-finetune-v3") as run:
|
|
645
|
+
# Log datasets — digest is auto-computed, schema inferred
|
|
646
|
+
run.log_dataset("imdb-train", path="data/train.csv", df=train_df, context="training")
|
|
647
|
+
run.log_dataset("imdb-val", path="data/val.csv", df=val_df, context="validation")
|
|
648
|
+
|
|
649
|
+
# Train
|
|
650
|
+
run.log_params({"lr": 2e-5, "epochs": 3})
|
|
651
|
+
run.log_metrics({"accuracy": 0.93, "f1": 0.92})
|
|
652
|
+
|
|
653
|
+
# Register and promote the model
|
|
654
|
+
registry.register_model("sentiment-bert", run_id=run.id)
|
|
655
|
+
registry.set_model_stage("sentiment-bert", version=3, stage="production")
|
|
656
|
+
|
|
657
|
+
# Later — answer "what data trained v3?"
|
|
658
|
+
model = registry.get_model("sentiment-bert")
|
|
659
|
+
lineage = registry.get_model_lineage(model.id)
|
|
660
|
+
```
|
|
661
|
+
|
|
662
|
+
### Artifact Storage
|
|
663
|
+
|
|
664
|
+
Podstack stores every artifact you log — model files, plots, CSV exports, anything — in the project's cloud artifact store. Artifacts are keyed by run ID, so the same file can be retrieved from any machine, by any project member, at any time.
|
|
665
|
+
|
|
666
|
+
#### `log_artifact()` — upload a file for the active run
|
|
667
|
+
|
|
668
|
+
```python
|
|
669
|
+
# Upload a single file (uses the filename as the artifact path)
|
|
670
|
+
registry.log_artifact("model.pt")
|
|
671
|
+
|
|
672
|
+
# Upload with an explicit path inside the artifact store
|
|
673
|
+
registry.log_artifact("training_curves.png", artifact_path="plots/curves.png")
|
|
674
|
+
registry.log_artifact("feature_importance.csv", artifact_path="analysis/features.csv")
|
|
675
|
+
```
|
|
676
|
+
|
|
677
|
+
**Parameters:**
|
|
678
|
+
|
|
679
|
+
| Parameter | Type | Default | Description |
|
|
680
|
+
|-----------|------|---------|-------------|
|
|
681
|
+
| `local_path` | `str` | required | Path to the local file to upload |
|
|
682
|
+
| `artifact_path` | `str` | filename | Relative path inside the artifact store. Defaults to `os.path.basename(local_path)` |
|
|
683
|
+
|
|
684
|
+
If the artifact store is temporarily unreachable, the SDK saves the file to a local fallback cache (`~/.podstack/artifacts/<run_id>/`) so your run is never interrupted.
|
|
685
|
+
|
|
686
|
+
**Via the `Run` object** — equivalent to calling `registry.log_artifact()`:
|
|
687
|
+
|
|
688
|
+
```python
|
|
689
|
+
with registry.start_run("training-v1") as run:
|
|
690
|
+
run.log_artifact("confusion_matrix.png", artifact_path="plots/confusion_matrix.png")
|
|
691
|
+
run.log_artifact("model.pkl")
|
|
692
|
+
```
|
|
693
|
+
|
|
694
|
+
#### `list_artifacts()` — list all artifacts for a run
|
|
695
|
+
|
|
696
|
+
```python
|
|
697
|
+
artifacts = registry.list_artifacts(run_id)
|
|
698
|
+
for a in artifacts:
|
|
699
|
+
print(f"{a['path']:40s} {a['size'] / 1e6:.1f} MB {a['last_modified']}")
|
|
700
|
+
```
|
|
701
|
+
|
|
702
|
+
**Parameters:**
|
|
703
|
+
|
|
704
|
+
| Parameter | Type | Description |
|
|
705
|
+
|-----------|------|-------------|
|
|
706
|
+
| `run_id` | `str` | ID of the run to query |
|
|
707
|
+
|
|
708
|
+
**Returns:** `list[dict]` — one entry per artifact:
|
|
709
|
+
|
|
710
|
+
| Key | Type | Description |
|
|
711
|
+
|-----|------|-------------|
|
|
712
|
+
| `path` | `str` | Relative artifact path (e.g. `"plots/curves.png"`) |
|
|
713
|
+
| `size` | `int` | File size in bytes |
|
|
714
|
+
| `etag` | `str` | Content hash for integrity verification |
|
|
715
|
+
| `last_modified` | `str` | ISO 8601 upload timestamp |
|
|
716
|
+
|
|
717
|
+
#### `download_artifact()` — retrieve an artifact
|
|
718
|
+
|
|
719
|
+
Downloads a specific artifact from the cloud store into a local directory. Falls back to the local cache when the store is unreachable.
|
|
720
|
+
|
|
721
|
+
```python
|
|
722
|
+
# Download a single file
|
|
723
|
+
dest = registry.download_artifact("run-id", "model/model.pkl", "./downloads/")
|
|
724
|
+
print(f"Saved to: {dest}")
|
|
725
|
+
|
|
726
|
+
# Download a whole model directory
|
|
727
|
+
dest = registry.download_artifact("run-id", "model", "./local_models/")
|
|
728
|
+
```
|
|
729
|
+
|
|
730
|
+
**Parameters:**
|
|
731
|
+
|
|
732
|
+
| Parameter | Type | Description |
|
|
733
|
+
|-----------|------|-------------|
|
|
734
|
+
| `run_id` | `str` | ID of the run that logged the artifact |
|
|
735
|
+
| `artifact_path` | `str` | Relative artifact path as logged (e.g. `"model/model.pkl"`) |
|
|
736
|
+
| `local_path` | `str` | Destination directory |
|
|
737
|
+
|
|
738
|
+
**Returns:** `str` — absolute path to the downloaded file or directory.
|
|
739
|
+
|
|
740
|
+
**Raises:** `ArtifactNotFoundError` if the artifact cannot be found in the store or the local cache.
|
|
741
|
+
|
|
742
|
+
#### Models as artifacts: `log_model()` and `load_model()`
|
|
743
|
+
|
|
744
|
+
`log_model()` serializes your model to disk and uploads every resulting file to the artifact store in one call. `load_model()` resolves the registered model version, downloads any missing files from the store, then deserializes the model — so it works correctly from any machine regardless of where training happened.
|
|
745
|
+
|
|
746
|
+
```python
|
|
747
|
+
# ── Training machine ──────────────────────────────────────────────────────────
|
|
748
|
+
with registry.start_run("bert-finetune-v3") as run:
|
|
749
|
+
# train...
|
|
750
|
+
registry.log_model(model, artifact_path="model", framework="pytorch")
|
|
751
|
+
|
|
752
|
+
registry.register_model("sentiment-bert", run_id=run.id)
|
|
753
|
+
registry.set_model_stage("sentiment-bert", version=3, stage="production")
|
|
754
|
+
|
|
755
|
+
# ── Any machine (CI, inference server, colleague's laptop) ───────────────────
|
|
756
|
+
# Model files are downloaded automatically from the artifact store if not cached
|
|
757
|
+
model = registry.load_model("sentiment-bert", stage="production")
|
|
758
|
+
```
|
|
759
|
+
|
|
760
|
+
**`log_model()` parameters:**
|
|
761
|
+
|
|
762
|
+
| Parameter | Type | Default | Description |
|
|
763
|
+
|-----------|------|---------|-------------|
|
|
764
|
+
| `model` | any | required | Model object (PyTorch, TensorFlow, sklearn, HuggingFace, or any picklable object) |
|
|
765
|
+
| `artifact_path` | `str` | `"model"` | Sub-path inside the artifact store |
|
|
766
|
+
| `framework` | `str` | auto-detected | `"pytorch"`, `"tensorflow"`, `"sklearn"`, `"huggingface"`, or `"pickle"` |
|
|
767
|
+
| `metadata` | `dict` | `None` | Arbitrary key-value metadata stored as run params |
|
|
768
|
+
|
|
769
|
+
**`load_model()` parameters:**
|
|
770
|
+
|
|
771
|
+
| Parameter | Type | Default | Description |
|
|
772
|
+
|-----------|------|---------|-------------|
|
|
773
|
+
| `model_name` | `str` | required | Registered model name |
|
|
774
|
+
| `version` | `int` | `None` | Specific version to load. Mutually exclusive with `stage` |
|
|
775
|
+
| `stage` | `str` | `None` | Stage to load from: `"development"`, `"staging"`, `"production"`, `"archived"` |
|
|
776
|
+
| `framework` | `str` | from run params | Override framework for deserialization |
|
|
777
|
+
|
|
778
|
+
#### Viewing artifacts in the dashboard
|
|
779
|
+
|
|
780
|
+
Every artifact logged with `log_artifact()` or `log_model()` appears automatically in the **Artifacts tab** of the run's detail page in the Podstack dashboard. No extra steps are needed — the tab populates from the same store the SDK writes to.
|
|
781
|
+
|
|
782
|
+
The Artifacts tab shows:
|
|
783
|
+
|
|
784
|
+
| Column | Description |
|
|
785
|
+
|--------|-------------|
|
|
786
|
+
| **Path** | The relative artifact path as logged (e.g. `model/model.pkl`, `plots/curves.png`) |
|
|
787
|
+
| **Type badge** | File extension, color-coded by category — model weights, data files, images, configs, etc. |
|
|
788
|
+
| **Size** | Formatted file size (B / KB / MB) |
|
|
789
|
+
| **Uploaded** | Timestamp of when the file was stored |
|
|
790
|
+
| **Download** | One-click download button — opens a short-lived direct download link in the browser |
|
|
791
|
+
|
|
792
|
+
A footer below the list shows the combined size of all artifacts for the run.
|
|
793
|
+
|
|
794
|
+
```python
|
|
795
|
+
# Everything logged here shows up in the dashboard Artifacts tab
|
|
796
|
+
with registry.start_run("bert-finetune-v3") as run:
|
|
797
|
+
registry.log_params({"lr": 2e-5, "epochs": 3})
|
|
798
|
+
registry.log_metrics({"accuracy": 0.93})
|
|
799
|
+
|
|
800
|
+
# These all appear as separate rows in the Artifacts tab
|
|
801
|
+
registry.log_artifact("confusion_matrix.png", artifact_path="plots/confusion_matrix.png")
|
|
802
|
+
registry.log_artifact("feature_importance.csv", artifact_path="analysis/features.csv")
|
|
803
|
+
registry.log_model(model, artifact_path="model", framework="pytorch")
|
|
804
|
+
# ↳ each model file (model.pkl, config.json, etc.) appears as its own row
|
|
805
|
+
```
|
|
806
|
+
|
|
807
|
+
#### Access control
|
|
808
|
+
|
|
809
|
+
Artifact upload and download URLs are issued by the registry API and require a valid API key and project membership. The URLs are short-lived, ensuring that access always reflects the current state of your project — a revoked key can no longer generate new URLs. Any member of a project can upload and download artifacts for runs within that project.
|
|
810
|
+
|
|
811
|
+
### List and Browse
|
|
812
|
+
|
|
813
|
+
```python
|
|
814
|
+
from podstack import registry
|
|
815
|
+
|
|
816
|
+
# List experiments
|
|
817
|
+
experiments = registry.list_experiments()
|
|
818
|
+
|
|
819
|
+
# List models
|
|
820
|
+
models = registry.list_models()
|
|
821
|
+
|
|
822
|
+
# List artifacts for a specific run
|
|
823
|
+
artifacts = registry.list_artifacts(run_id)
|
|
824
|
+
|
|
825
|
+
# Download a specific artifact to a local directory
|
|
826
|
+
dest = registry.download_artifact("run-id", "model/model.pt", "./downloads/")
|
|
827
|
+
print(f"Saved to: {dest}")
|
|
828
|
+
```
|
|
829
|
+
|
|
830
|
+
## GPU Runner - Direct Code Execution
|
|
831
|
+
|
|
832
|
+
For running code strings directly on GPUs without decorators:
|
|
833
|
+
|
|
834
|
+
```python
|
|
835
|
+
import podstack
|
|
836
|
+
|
|
837
|
+
podstack.init(api_key="your-api-key", project_id="your-project-id")
|
|
838
|
+
|
|
839
|
+
# Run code on a remote GPU
|
|
840
|
+
result = podstack.run_on_gpu('''
|
|
841
|
+
import torch
|
|
842
|
+
print(f"GPU: {torch.cuda.get_device_name(0)}")
|
|
843
|
+
print(f"Memory: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB")
|
|
844
|
+
''', gpu="L40S")
|
|
845
|
+
|
|
846
|
+
print(result.output)
|
|
847
|
+
print(f"Success: {result.success}")
|
|
848
|
+
print(f"Duration: {result.duration_seconds}s")
|
|
849
|
+
```
|
|
850
|
+
|
|
851
|
+
## Client API
|
|
852
|
+
|
|
853
|
+
For direct API access to notebooks and executions:
|
|
854
|
+
|
|
855
|
+
```python
|
|
856
|
+
from podstack import Client
|
|
857
|
+
|
|
858
|
+
client = Client(api_key="your-api-key")
|
|
859
|
+
|
|
860
|
+
# Create a notebook
|
|
861
|
+
notebook = client.sync_create_notebook(name="experiment", gpu_type="L40S")
|
|
862
|
+
print(f"JupyterLab: {notebook.jupyter_url}")
|
|
863
|
+
|
|
864
|
+
# Run code
|
|
865
|
+
result = client.sync_run("print('Hello GPU!')", gpu_type="L40S")
|
|
866
|
+
print(result.output)
|
|
867
|
+
```
|
|
868
|
+
|
|
869
|
+
## Error Handling
|
|
870
|
+
|
|
871
|
+
```python
|
|
872
|
+
from podstack import (
|
|
873
|
+
PodstackError,
|
|
874
|
+
AuthenticationError,
|
|
875
|
+
GPUNotAvailableError,
|
|
876
|
+
RateLimitError,
|
|
877
|
+
ExecutionTimeoutError
|
|
878
|
+
)
|
|
879
|
+
|
|
880
|
+
try:
|
|
881
|
+
result = train()
|
|
882
|
+
except AuthenticationError:
|
|
883
|
+
print("Invalid API key")
|
|
884
|
+
except GPUNotAvailableError as e:
|
|
885
|
+
print(f"GPU not available")
|
|
886
|
+
except RateLimitError as e:
|
|
887
|
+
print(f"Rate limited, retry after {e.retry_after}s")
|
|
888
|
+
except ExecutionTimeoutError as e:
|
|
889
|
+
print(f"Execution timed out: {e.execution_id}")
|
|
890
|
+
except PodstackError as e:
|
|
891
|
+
print(f"Error: {e.message}")
|
|
892
|
+
```
|
|
893
|
+
|
|
894
|
+
## Configuration
|
|
895
|
+
|
|
896
|
+
```python
|
|
897
|
+
import podstack
|
|
898
|
+
|
|
899
|
+
# Option 1: Initialize explicitly
|
|
900
|
+
podstack.init(
|
|
901
|
+
api_key="your-api-key",
|
|
902
|
+
project_id="your-project-id",
|
|
903
|
+
api_url="https://api.podstack.ai/v1", # optional
|
|
904
|
+
registry_url="https://registry.podstack.ai" # optional
|
|
905
|
+
)
|
|
906
|
+
|
|
907
|
+
# Option 2: Environment variables
|
|
908
|
+
# PODSTACK_API_KEY=your-api-key
|
|
909
|
+
# PODSTACK_PROJECT_ID=your-project-id
|
|
910
|
+
# PODSTACK_API_URL=https://api.podstack.ai/v1
|
|
911
|
+
# PODSTACK_REGISTRY_URL=https://registry.podstack.ai
|
|
912
|
+
|
|
913
|
+
# Option 3: Auto-init (set PODSTACK_AUTO_INIT=1)
|
|
914
|
+
# SDK auto-initializes from env vars at import time
|
|
915
|
+
```
|
|
916
|
+
|
|
917
|
+
## License
|
|
918
|
+
|
|
919
|
+
MIT License - see LICENSE for details.
|