podstack 1.3.21__tar.gz → 1.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {podstack-1.3.21 → podstack-1.7.0}/PKG-INFO +1 -1
  2. {podstack-1.3.21 → podstack-1.7.0}/podstack/__init__.py +146 -1
  3. podstack-1.7.0/podstack/agent/__init__.py +105 -0
  4. podstack-1.7.0/podstack/agent/client.py +308 -0
  5. podstack-1.7.0/podstack/agent/exceptions.py +56 -0
  6. podstack-1.7.0/podstack/agent/models.py +279 -0
  7. podstack-1.7.0/podstack/agent/spec.py +160 -0
  8. {podstack-1.3.21 → podstack-1.7.0}/podstack/annotations.py +156 -0
  9. podstack-1.7.0/podstack/cell.py +860 -0
  10. {podstack-1.3.21 → podstack-1.7.0}/podstack/gpu_runner.py +327 -32
  11. podstack-1.7.0/podstack/image.py +515 -0
  12. podstack-1.7.0/podstack/pipelines.py +269 -0
  13. {podstack-1.3.21 → podstack-1.7.0}/podstack/registry/client.py +21 -1
  14. {podstack-1.3.21 → podstack-1.7.0}/podstack/registry/experiment.py +17 -2
  15. podstack-1.7.0/podstack/runners.py +312 -0
  16. podstack-1.7.0/podstack/sandbox.py +474 -0
  17. podstack-1.7.0/podstack/serialization.py +359 -0
  18. podstack-1.7.0/podstack/session.py +117 -0
  19. podstack-1.7.0/podstack/storage.py +291 -0
  20. {podstack-1.3.21 → podstack-1.7.0}/podstack.egg-info/PKG-INFO +1 -1
  21. {podstack-1.3.21 → podstack-1.7.0}/podstack.egg-info/SOURCES.txt +17 -1
  22. {podstack-1.3.21 → podstack-1.7.0}/pyproject.toml +1 -1
  23. podstack-1.7.0/tests/test_cell_serialization.py +207 -0
  24. podstack-1.7.0/tests/test_image_builder.py +208 -0
  25. podstack-1.7.0/tests/test_runners_catalog.py +250 -0
  26. {podstack-1.3.21 → podstack-1.7.0}/LICENSE +0 -0
  27. {podstack-1.3.21 → podstack-1.7.0}/README.md +0 -0
  28. {podstack-1.3.21 → podstack-1.7.0}/podstack/client.py +0 -0
  29. {podstack-1.3.21 → podstack-1.7.0}/podstack/exceptions.py +0 -0
  30. {podstack-1.3.21 → podstack-1.7.0}/podstack/execution.py +0 -0
  31. {podstack-1.3.21 → podstack-1.7.0}/podstack/models.py +0 -0
  32. {podstack-1.3.21 → podstack-1.7.0}/podstack/notebook.py +0 -0
  33. {podstack-1.3.21 → podstack-1.7.0}/podstack/registry/__init__.py +0 -0
  34. {podstack-1.3.21 → podstack-1.7.0}/podstack/registry/autolog.py +0 -0
  35. {podstack-1.3.21 → podstack-1.7.0}/podstack/registry/exceptions.py +0 -0
  36. {podstack-1.3.21 → podstack-1.7.0}/podstack/registry/model.py +0 -0
  37. {podstack-1.3.21 → podstack-1.7.0}/podstack/registry/model_utils.py +0 -0
  38. {podstack-1.3.21 → podstack-1.7.0}/podstack.egg-info/dependency_links.txt +0 -0
  39. {podstack-1.3.21 → podstack-1.7.0}/podstack.egg-info/requires.txt +0 -0
  40. {podstack-1.3.21 → podstack-1.7.0}/podstack.egg-info/top_level.txt +0 -0
  41. {podstack-1.3.21 → podstack-1.7.0}/podstack_gpu/__init__.py +0 -0
  42. {podstack-1.3.21 → podstack-1.7.0}/podstack_gpu/app.py +0 -0
  43. {podstack-1.3.21 → podstack-1.7.0}/podstack_gpu/exceptions.py +0 -0
  44. {podstack-1.3.21 → podstack-1.7.0}/podstack_gpu/image.py +0 -0
  45. {podstack-1.3.21 → podstack-1.7.0}/podstack_gpu/runner.py +0 -0
  46. {podstack-1.3.21 → podstack-1.7.0}/podstack_gpu/secret.py +0 -0
  47. {podstack-1.3.21 → podstack-1.7.0}/podstack_gpu/utils.py +0 -0
  48. {podstack-1.3.21 → podstack-1.7.0}/podstack_gpu/volume.py +0 -0
  49. {podstack-1.3.21 → podstack-1.7.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: podstack
3
- Version: 1.3.21
3
+ Version: 1.7.0
4
4
  Summary: Official Python SDK for Podstack GPU Notebook Platform
5
5
  Author-email: Podstack <support@podstack.ai>
6
6
  License-Expression: MIT
@@ -54,7 +54,7 @@ Decorators:
54
54
  ...
55
55
  """
56
56
 
57
- __version__ = "1.3.2"
57
+ __version__ = "1.7.0"
58
58
 
59
59
  from .client import Client
60
60
  from .notebook import Notebook, NotebookStatus
@@ -86,6 +86,36 @@ from .gpu_runner import (
86
86
  RunnerList,
87
87
  run as run_on_gpu,
88
88
  list_runners,
89
+ get_runner_info,
90
+ describe_runner,
91
+ pip_install,
92
+ )
93
+ # Typed runner catalog model (new in 1.7.x). Runner is a dict subclass so
94
+ # existing dict-style access continues to work; `.describe()` / `.packages`
95
+ # / `.filter()` on RunnerList are the new affordances.
96
+ from .runners import Runner
97
+
98
+ # Pipeline module import
99
+ from . import pipelines
100
+ from .pipelines import (
101
+ step,
102
+ pipeline,
103
+ StepConfig,
104
+ PipelineDef,
105
+ list_pipelines,
106
+ get_pipeline,
107
+ get_pipeline_run,
108
+ trigger_pipeline,
109
+ cancel_pipeline_run,
110
+ archive_pipeline,
111
+ )
112
+
113
+ # Storage module import
114
+ from . import storage
115
+ from .storage import (
116
+ StorageClient, BucketList, VolumeList,
117
+ create_bucket, get_bucket, list_buckets, delete_bucket,
118
+ create_volume, get_volume, list_volumes, delete_volume,
89
119
  )
90
120
 
91
121
  # Annotations module import
@@ -103,6 +133,57 @@ from .annotations import (
103
133
  get_environment,
104
134
  get_auto_shutdown_minutes,
105
135
  enable_remote_execution,
136
+ deploy,
137
+ DeployConfig,
138
+ )
139
+ from .gpu_runner import (
140
+ get_deployment,
141
+ stop_deployment,
142
+ delete_deployment,
143
+ )
144
+
145
+ # Reactive cell data-transfer (Modal / Ray Jobs-style) with type dispatch.
146
+ # Lets users write @podstack.cell(...) and have outputs auto-serialized,
147
+ # uploaded to object storage, and auto-downloaded + deserialized in
148
+ # downstream cells running on different GPUs.
149
+ from . import session as _cell_session
150
+ from .session import (
151
+ init_session,
152
+ current_session,
153
+ reset_session,
154
+ SessionConfig,
155
+ )
156
+ from . import serialization as cell_serialization
157
+ from .serialization import (
158
+ register_serializer,
159
+ PayloadMeta,
160
+ )
161
+ from . import cell as _cell_module
162
+ from .cell import (
163
+ cell,
164
+ CellResult,
165
+ CellOutputRef,
166
+ prune_session,
167
+ )
168
+
169
+ # Image-as-code builder (Modal-style). Users describe images in Python;
170
+ # the backend deduplicates builds by structural hash so identical build
171
+ # plans across users share one cached image.
172
+ from . import image as _image_module
173
+ from .image import Image
174
+
175
+ # Agent Cloud module import
176
+ from . import agent
177
+
178
+ # Sandbox module import
179
+ from . import sandbox
180
+ from .sandbox import (
181
+ Sandbox,
182
+ SandboxClient,
183
+ SandboxError,
184
+ create as create_sandbox,
185
+ list_sandboxes,
186
+ get_sandbox,
106
187
  )
107
188
 
108
189
 
@@ -150,6 +231,13 @@ def init(
150
231
  api_url=registry_url
151
232
  )
152
233
 
234
+ # Initialize sandbox
235
+ sandbox.init(
236
+ api_key=api_key,
237
+ project_id=project_id,
238
+ api_url=api_url,
239
+ )
240
+
153
241
 
154
242
  def auto_init():
155
243
  """
@@ -206,13 +294,42 @@ __all__ = [
206
294
  "WalletBalance",
207
295
  # Registry
208
296
  "registry",
297
+ # Pipelines
298
+ "pipelines",
299
+ "step",
300
+ "pipeline",
301
+ "StepConfig",
302
+ "PipelineDef",
303
+ "list_pipelines",
304
+ "get_pipeline",
305
+ "get_pipeline_run",
306
+ "trigger_pipeline",
307
+ "cancel_pipeline_run",
308
+ "archive_pipeline",
309
+ # Storage
310
+ "storage",
311
+ "StorageClient",
312
+ "BucketList",
313
+ "VolumeList",
314
+ "create_bucket",
315
+ "get_bucket",
316
+ "list_buckets",
317
+ "delete_bucket",
318
+ "create_volume",
319
+ "get_volume",
320
+ "list_volumes",
321
+ "delete_volume",
209
322
  # GPU Runner
210
323
  "gpu_runner",
211
324
  "GPURunner",
212
325
  "GPUExecutionResult",
213
326
  "RunnerList",
327
+ "Runner",
214
328
  "run_on_gpu",
215
329
  "list_runners",
330
+ "get_runner_info",
331
+ "describe_runner",
332
+ "pip_install",
216
333
  # Annotations
217
334
  "annotations",
218
335
  "gpu",
@@ -227,4 +344,32 @@ __all__ = [
227
344
  "get_environment",
228
345
  "get_auto_shutdown_minutes",
229
346
  "enable_remote_execution",
347
+ # Deploy
348
+ "deploy",
349
+ "DeployConfig",
350
+ "get_deployment",
351
+ "stop_deployment",
352
+ "delete_deployment",
353
+ # Sandbox
354
+ "sandbox",
355
+ "Sandbox",
356
+ "SandboxClient",
357
+ "SandboxError",
358
+ "create_sandbox",
359
+ "list_sandboxes",
360
+ "get_sandbox",
361
+ # Cell data transfer (reactive, cross-GPU)
362
+ "cell",
363
+ "CellResult",
364
+ "CellOutputRef",
365
+ "prune_session",
366
+ "init_session",
367
+ "current_session",
368
+ "reset_session",
369
+ "SessionConfig",
370
+ "cell_serialization",
371
+ "register_serializer",
372
+ "PayloadMeta",
373
+ # Image-as-code (Modal-style)
374
+ "Image",
230
375
  ]
@@ -0,0 +1,105 @@
1
+ """
2
+ Podstack Agent Cloud SDK
3
+
4
+ Prompt-to-model generation: describe a model in natural language,
5
+ generate training code via LLM, execute on GPU, auto-iterate, and deploy.
6
+
7
+ Usage:
8
+ from podstack.agent import AgentClient
9
+
10
+ client = AgentClient(api_key="...", project_id="...")
11
+ spec = client.create_spec_from_prompt("Build a CIFAR-10 classifier with 90% accuracy")
12
+ gen = client.generate_code(spec.id)
13
+ run = client.execute_generation(gen.id)
14
+ endpoint = client.deploy_run(run.id)
15
+
16
+ Or use module-level convenience functions:
17
+
18
+ from podstack import agent
19
+
20
+ agent.init(api_key="...", project_id="...")
21
+ spec = agent.create_spec_from_prompt("Build a CIFAR-10 classifier")
22
+ """
23
+
24
+ from typing import Optional
25
+
26
+ from .client import AgentClient
27
+ from .models import (
28
+ AgentAction,
29
+ CodeGeneration,
30
+ IterationLog,
31
+ ModelGenArtifact,
32
+ ModelGenEndpoint,
33
+ ModelGenRun,
34
+ ModelSpec,
35
+ )
36
+ from .spec import ModelSpecBuilder
37
+ from .exceptions import (
38
+ AgentError,
39
+ ExecutionError,
40
+ GateFailedError,
41
+ GenerationError,
42
+ IterationLimitError,
43
+ SpecValidationError,
44
+ )
45
+
46
+ __all__ = [
47
+ # Client
48
+ "AgentClient",
49
+ "ModelSpecBuilder",
50
+ # Models
51
+ "ModelSpec",
52
+ "CodeGeneration",
53
+ "ModelGenRun",
54
+ "ModelGenArtifact",
55
+ "ModelGenEndpoint",
56
+ "AgentAction",
57
+ "IterationLog",
58
+ # Exceptions
59
+ "AgentError",
60
+ "SpecValidationError",
61
+ "GateFailedError",
62
+ "IterationLimitError",
63
+ "GenerationError",
64
+ "ExecutionError",
65
+ # Module-level functions
66
+ "init",
67
+ "create_spec",
68
+ "create_spec_from_prompt",
69
+ ]
70
+
71
+ _client: Optional[AgentClient] = None
72
+
73
+
74
+ def _get_client() -> AgentClient:
75
+ global _client
76
+ if _client is None:
77
+ _client = AgentClient()
78
+ return _client
79
+
80
+
81
+ def init(
82
+ api_url: str = None,
83
+ api_key: str = None,
84
+ project_id: str = None,
85
+ timeout: int = 60,
86
+ ) -> AgentClient:
87
+ """Initialize the module-level AgentClient singleton."""
88
+ global _client
89
+ _client = AgentClient(
90
+ api_url=api_url,
91
+ api_key=api_key,
92
+ project_id=project_id,
93
+ timeout=timeout,
94
+ )
95
+ return _client
96
+
97
+
98
+ def create_spec(name: str, task_type: str, **kwargs) -> ModelSpec:
99
+ """Create a ModelSpec from structured parameters."""
100
+ return _get_client().create_spec(name=name, task_type=task_type, **kwargs)
101
+
102
+
103
+ def create_spec_from_prompt(prompt: str) -> ModelSpec:
104
+ """Create a ModelSpec from natural language."""
105
+ return _get_client().create_spec_from_prompt(prompt)
@@ -0,0 +1,308 @@
1
+ """
2
+ Agent Cloud Client
3
+
4
+ Client for interacting with the Podstack Agent Cloud API.
5
+ """
6
+
7
+ import os
8
+ from typing import Any, Dict, List, Optional
9
+
10
+ import requests
11
+
12
+ from .exceptions import AgentError
13
+ from .models import (
14
+ AgentAction,
15
+ CodeGeneration,
16
+ IterationLog,
17
+ ModelGenArtifact,
18
+ ModelGenEndpoint,
19
+ ModelGenRun,
20
+ ModelSpec,
21
+ )
22
+ from ..exceptions import AuthenticationError
23
+
24
+
25
+ class AgentClient:
26
+ """
27
+ Podstack Agent Cloud Client for prompt-to-model generation.
28
+
29
+ Usage:
30
+ client = AgentClient(
31
+ api_url="https://cloud.podstack.ai/agent-cloud",
32
+ api_key="your-api-key",
33
+ project_id="your-project-id"
34
+ )
35
+
36
+ # Create spec from natural language
37
+ spec = client.create_spec_from_prompt("Build a CIFAR-10 image classifier with 90% accuracy")
38
+
39
+ # Generate code
40
+ gen = client.generate_code(spec.id)
41
+
42
+ # Execute on GPU
43
+ run = client.execute_generation(gen.id)
44
+
45
+ # Deploy
46
+ endpoint = client.deploy_run(run.id)
47
+ """
48
+
49
+ DEFAULT_API_URL = "https://cloud.podstack.ai/agent-cloud"
50
+
51
+ def __init__(
52
+ self,
53
+ api_url: str = None,
54
+ api_key: str = None,
55
+ project_id: str = None,
56
+ timeout: int = 60,
57
+ ):
58
+ self.api_url = api_url or os.getenv("PODSTACK_AGENT_URL", self.DEFAULT_API_URL)
59
+ self.api_key = api_key or os.getenv("PODSTACK_API_KEY")
60
+ self.project_id = project_id or os.getenv("PODSTACK_PROJECT_ID")
61
+ self.timeout = timeout
62
+
63
+ def _get_headers(self) -> Dict[str, str]:
64
+ headers = {"Content-Type": "application/json"}
65
+ if self.api_key:
66
+ headers["Authorization"] = f"Bearer {self.api_key}"
67
+ if self.project_id:
68
+ headers["X-Project-ID"] = self.project_id
69
+ return headers
70
+
71
+ def _request(
72
+ self,
73
+ method: str,
74
+ endpoint: str,
75
+ json: Dict[str, Any] = None,
76
+ params: Dict[str, Any] = None,
77
+ ) -> Dict[str, Any]:
78
+ url = f"{self.api_url}/api/v1{endpoint}"
79
+ headers = self._get_headers()
80
+
81
+ try:
82
+ response = requests.request(
83
+ method=method,
84
+ url=url,
85
+ headers=headers,
86
+ json=json,
87
+ params=params,
88
+ timeout=self.timeout,
89
+ )
90
+
91
+ if response.status_code == 401:
92
+ raise AuthenticationError("Invalid or expired API key")
93
+ elif response.status_code == 404:
94
+ error_msg = response.json().get("error", "Not found")
95
+ raise AgentError(error_msg, code="not_found")
96
+ elif response.status_code >= 400:
97
+ error_data = response.json() if response.content else {}
98
+ error_msg = error_data.get("error", f"Request failed: {response.status_code}")
99
+ raise AgentError(error_msg)
100
+
101
+ if response.status_code == 204 or not response.content:
102
+ return {}
103
+ return response.json()
104
+
105
+ except requests.exceptions.Timeout:
106
+ raise AgentError("Request timed out", code="timeout")
107
+ except requests.exceptions.ConnectionError:
108
+ raise AgentError("Connection failed — is agent-cloud service running?", code="connection")
109
+
110
+ # ── Spec methods ──────────────────────────────────────────────────────
111
+
112
+ def create_spec(
113
+ self,
114
+ name: str,
115
+ task_type: str,
116
+ data_kind: str = "",
117
+ task_description: str = "",
118
+ domain: str = "",
119
+ data_source: str = "",
120
+ data_format: str = "",
121
+ primary_metric: str = "",
122
+ minimum_thresholds: Dict[str, float] = None,
123
+ deploy_target: str = "api",
124
+ max_training_time_sec: int = 3600,
125
+ seed: int = 42,
126
+ validate_bias: bool = False,
127
+ validate_fairness: bool = False,
128
+ ) -> ModelSpec:
129
+ """Create a ModelSpec from structured parameters."""
130
+ body = {
131
+ "name": name,
132
+ "task_type": task_type,
133
+ "data_kind": data_kind,
134
+ "task_description": task_description,
135
+ "domain": domain,
136
+ "data_source": data_source,
137
+ "data_format": data_format,
138
+ "primary_metric": primary_metric,
139
+ "deploy_target": deploy_target,
140
+ "max_training_time_sec": max_training_time_sec,
141
+ "seed": seed,
142
+ "validate_bias": validate_bias,
143
+ "validate_fairness": validate_fairness,
144
+ }
145
+ if minimum_thresholds:
146
+ body["minimum_thresholds"] = minimum_thresholds
147
+
148
+ data = self._request("POST", "/specs", json=body)
149
+ return ModelSpec.from_dict(data.get("spec", data))
150
+
151
+ def create_spec_from_prompt(self, prompt: str) -> ModelSpec:
152
+ """Create a ModelSpec from a natural language prompt via LLM."""
153
+ data = self._request("POST", "/specs/from-prompt", json={"prompt": prompt})
154
+ return ModelSpec.from_dict(data.get("spec", data))
155
+
156
+ def get_spec(self, spec_id: str) -> ModelSpec:
157
+ """Get a spec by ID."""
158
+ data = self._request("GET", f"/specs/{spec_id}")
159
+ return ModelSpec.from_dict(data.get("spec", data))
160
+
161
+ def list_specs(self, limit: int = 20, offset: int = 0) -> List[ModelSpec]:
162
+ """List specs in the project."""
163
+ data = self._request("GET", "/specs", params={"limit": limit, "offset": offset})
164
+ return [ModelSpec.from_dict(s) for s in data.get("specs", [])]
165
+
166
+ def delete_spec(self, spec_id: str) -> None:
167
+ """Delete a spec."""
168
+ self._request("DELETE", f"/specs/{spec_id}")
169
+
170
+ # ── Generation methods ────────────────────────────────────────────────
171
+
172
+ def generate_code(self, spec_id: str) -> CodeGeneration:
173
+ """Generate training code for a spec via LLM."""
174
+ data = self._request("POST", f"/specs/{spec_id}/generate")
175
+ return CodeGeneration.from_dict(data.get("generation", data))
176
+
177
+ def get_generation(self, generation_id: str) -> CodeGeneration:
178
+ """Get a code generation by ID."""
179
+ data = self._request("GET", f"/generations/{generation_id}")
180
+ return CodeGeneration.from_dict(data.get("generation", data))
181
+
182
+ def execute_generation(
183
+ self, generation_id: str, gpu_type: str = "A100", gpu_count: int = 1
184
+ ) -> ModelGenRun:
185
+ """Execute generated code on GPU."""
186
+ data = self._request(
187
+ "POST",
188
+ f"/generations/{generation_id}/execute",
189
+ json={"gpu_type": gpu_type, "gpu_count": gpu_count},
190
+ )
191
+ return ModelGenRun.from_dict(data.get("run", data))
192
+
193
+ # ── Run methods ───────────────────────────────────────────────────────
194
+
195
+ def get_run(self, run_id: str) -> ModelGenRun:
196
+ """Get a run by ID."""
197
+ data = self._request("GET", f"/runs/{run_id}")
198
+ return ModelGenRun.from_dict(data.get("run", data))
199
+
200
+ def list_runs(
201
+ self, spec_id: str = None, status: str = None, limit: int = 20, offset: int = 0
202
+ ) -> List[ModelGenRun]:
203
+ """List runs with optional filters."""
204
+ params = {"limit": limit, "offset": offset}
205
+ if spec_id:
206
+ params["spec_id"] = spec_id
207
+ if status:
208
+ params["status"] = status
209
+ data = self._request("GET", "/runs", params=params)
210
+ return [ModelGenRun.from_dict(r) for r in data.get("runs", [])]
211
+
212
+ def cancel_run(self, run_id: str) -> None:
213
+ """Cancel a running execution."""
214
+ self._request("POST", f"/runs/{run_id}/cancel")
215
+
216
+ def get_run_logs(self, run_id: str) -> str:
217
+ """Get execution logs for a run."""
218
+ data = self._request("GET", f"/runs/{run_id}/logs")
219
+ return data.get("logs", "")
220
+
221
+ def list_artifacts(self, run_id: str) -> List[ModelGenArtifact]:
222
+ """List artifacts for a run."""
223
+ data = self._request("GET", f"/runs/{run_id}/artifacts")
224
+ return [ModelGenArtifact.from_dict(a) for a in data.get("artifacts", [])]
225
+
226
+ # ── Iteration methods ─────────────────────────────────────────────────
227
+
228
+ def iterate(self, run_id: str) -> IterationLog:
229
+ """Trigger one iteration cycle (plan -> patch -> critique -> validate -> execute)."""
230
+ data = self._request("POST", f"/runs/{run_id}/iterate")
231
+ return IterationLog.from_dict(data.get("iteration", data))
232
+
233
+ def iterate_until_success(self, run_id: str, max_iterations: int = 5) -> ModelGenRun:
234
+ """Auto-iterate until primary metric threshold is met or max iterations reached."""
235
+ from .exceptions import IterationLimitError
236
+
237
+ current_run_id = run_id
238
+ for i in range(max_iterations):
239
+ iter_log = self.iterate(current_run_id)
240
+ if iter_log.new_run_id:
241
+ new_run = self.get_run(iter_log.new_run_id)
242
+ if new_run.final_metric_value is not None and new_run.metric_gate_passed:
243
+ return new_run
244
+ current_run_id = new_run.id
245
+
246
+ # Return the last run
247
+ last_run = self.get_run(current_run_id)
248
+ raise IterationLimitError(max_iterations, last_run.final_metric_value)
249
+
250
+ def list_iterations(self, run_id: str) -> List[IterationLog]:
251
+ """List iteration logs for a run."""
252
+ data = self._request("GET", f"/runs/{run_id}/iterations")
253
+ return [IterationLog.from_dict(i) for i in data.get("iterations", [])]
254
+
255
+ # ── Deployment methods ────────────────────────────────────────────────
256
+
257
+ def run_gates(self, run_id: str) -> Dict[str, bool]:
258
+ """Run all 5 deployment gates and return pass/fail per gate."""
259
+ data = self._request("POST", f"/runs/{run_id}/gates")
260
+ gates = data.get("gates", [])
261
+ return {g["gate"]: g["passed"] for g in gates}
262
+
263
+ def deploy_run(
264
+ self, run_id: str, name: str = None, replicas: int = 1
265
+ ) -> ModelGenEndpoint:
266
+ """Deploy a run as an API endpoint (runs gates first)."""
267
+ body = {"replicas": replicas}
268
+ if name:
269
+ body["name"] = name
270
+ data = self._request("POST", f"/runs/{run_id}/deploy", json=body)
271
+ return ModelGenEndpoint.from_dict(data.get("endpoint", data))
272
+
273
+ def predict(self, endpoint_id: str, input_data: Any) -> Any:
274
+ """Make a prediction via a deployed endpoint."""
275
+ data = self._request("POST", f"/endpoints/{endpoint_id}/predict", json=input_data)
276
+ return data.get("prediction", data)
277
+
278
+ def get_endpoint(self, endpoint_id: str) -> ModelGenEndpoint:
279
+ """Get endpoint details."""
280
+ data = self._request("GET", f"/endpoints/{endpoint_id}")
281
+ return ModelGenEndpoint.from_dict(data.get("endpoint", data))
282
+
283
+ def list_endpoints(self, limit: int = 20, offset: int = 0) -> List[ModelGenEndpoint]:
284
+ """List all endpoints."""
285
+ data = self._request("GET", "/endpoints", params={"limit": limit, "offset": offset})
286
+ return [ModelGenEndpoint.from_dict(e) for e in data.get("endpoints", [])]
287
+
288
+ def stop_endpoint(self, endpoint_id: str) -> None:
289
+ """Stop an active endpoint."""
290
+ self._request("POST", f"/endpoints/{endpoint_id}/stop")
291
+
292
+ def delete_endpoint(self, endpoint_id: str) -> None:
293
+ """Delete an endpoint."""
294
+ self._request("DELETE", f"/endpoints/{endpoint_id}")
295
+
296
+ # ── Observability ─────────────────────────────────────────────────────
297
+
298
+ def list_actions(
299
+ self, spec_id: str = None, run_id: str = None, limit: int = 50, offset: int = 0
300
+ ) -> List[AgentAction]:
301
+ """Query the agent audit trail."""
302
+ params = {"limit": limit, "offset": offset}
303
+ if spec_id:
304
+ params["spec_id"] = spec_id
305
+ if run_id:
306
+ params["run_id"] = run_id
307
+ data = self._request("GET", "/actions", params=params)
308
+ return [AgentAction.from_dict(a) for a in data.get("actions", [])]
@@ -0,0 +1,56 @@
1
+ """
2
+ Agent Cloud Exception Classes
3
+
4
+ Custom exceptions for the Podstack Agent Cloud SDK.
5
+ """
6
+
7
+ from ..exceptions import PodstackError
8
+
9
+
10
+ class AgentError(PodstackError):
11
+ """Base exception for agent cloud errors."""
12
+
13
+ def __init__(self, message: str, code: str = "agent_error"):
14
+ super().__init__(message, code=code)
15
+
16
+
17
+ class SpecValidationError(AgentError):
18
+ """Raised when a model spec fails validation."""
19
+
20
+ def __init__(self, message: str):
21
+ super().__init__(message, code="spec_validation_error")
22
+
23
+
24
+ class GateFailedError(AgentError):
25
+ """Raised when a deployment gate check fails."""
26
+
27
+ def __init__(self, gate: str, message: str):
28
+ super().__init__(f"Gate '{gate}' failed: {message}", code="gate_failed")
29
+ self.gate = gate
30
+
31
+
32
+ class IterationLimitError(AgentError):
33
+ """Raised when the maximum iteration count is reached without meeting thresholds."""
34
+
35
+ def __init__(self, max_iterations: int, best_metric: float = None):
36
+ msg = f"Maximum iterations ({max_iterations}) reached"
37
+ if best_metric is not None:
38
+ msg += f" — best metric value: {best_metric:.4f}"
39
+ super().__init__(msg, code="iteration_limit")
40
+ self.max_iterations = max_iterations
41
+ self.best_metric = best_metric
42
+
43
+
44
+ class GenerationError(AgentError):
45
+ """Raised when code generation fails."""
46
+
47
+ def __init__(self, message: str):
48
+ super().__init__(message, code="generation_error")
49
+
50
+
51
+ class ExecutionError(AgentError):
52
+ """Raised when GPU execution fails."""
53
+
54
+ def __init__(self, message: str, run_id: str = None):
55
+ super().__init__(message, code="execution_error")
56
+ self.run_id = run_id