podstack 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- podstack/__init__.py +222 -0
- podstack/annotations.py +725 -0
- podstack/client.py +322 -0
- podstack/exceptions.py +125 -0
- podstack/execution.py +291 -0
- podstack/gpu_runner.py +1141 -0
- podstack/models.py +274 -0
- podstack/notebook.py +410 -0
- podstack/registry/__init__.py +402 -0
- podstack/registry/client.py +957 -0
- podstack/registry/exceptions.py +107 -0
- podstack/registry/experiment.py +227 -0
- podstack/registry/model.py +273 -0
- podstack/registry/model_utils.py +231 -0
- podstack-1.2.0.dist-info/METADATA +299 -0
- podstack-1.2.0.dist-info/RECORD +27 -0
- podstack-1.2.0.dist-info/WHEEL +5 -0
- podstack-1.2.0.dist-info/licenses/LICENSE +21 -0
- podstack-1.2.0.dist-info/top_level.txt +2 -0
- podstack_gpu/__init__.py +126 -0
- podstack_gpu/app.py +675 -0
- podstack_gpu/exceptions.py +35 -0
- podstack_gpu/image.py +325 -0
- podstack_gpu/runner.py +746 -0
- podstack_gpu/secret.py +189 -0
- podstack_gpu/utils.py +203 -0
- podstack_gpu/volume.py +198 -0
podstack/execution.py
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Podstack Execution Module
|
|
3
|
+
|
|
4
|
+
Handles code execution operations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import Optional, Dict, Any, List, TYPE_CHECKING
|
|
10
|
+
from enum import Enum
|
|
11
|
+
import asyncio
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from .client import Client
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ExecutionStatus(str, Enum):
|
|
18
|
+
"""Status of an execution"""
|
|
19
|
+
PENDING = "pending"
|
|
20
|
+
RUNNING = "running"
|
|
21
|
+
COMPLETED = "completed"
|
|
22
|
+
FAILED = "failed"
|
|
23
|
+
CANCELLED = "cancelled"
|
|
24
|
+
TIMEOUT = "timeout"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class ExecutionOutput:
|
|
29
|
+
"""Output from an execution"""
|
|
30
|
+
output_type: str # "stdout", "stderr", "display_data", "execute_result", "error"
|
|
31
|
+
data: Any
|
|
32
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def from_dict(cls, data: Dict[str, Any]) -> "ExecutionOutput":
|
|
36
|
+
return cls(
|
|
37
|
+
output_type=data["output_type"],
|
|
38
|
+
data=data["data"],
|
|
39
|
+
metadata=data.get("metadata", {})
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class Execution:
|
|
45
|
+
"""
|
|
46
|
+
Represents a code execution.
|
|
47
|
+
|
|
48
|
+
Attributes:
|
|
49
|
+
id: Unique execution ID
|
|
50
|
+
status: Current status
|
|
51
|
+
code: The code that was executed
|
|
52
|
+
output: Execution output (stdout)
|
|
53
|
+
error: Error message if failed
|
|
54
|
+
execution_time_ms: Execution time in milliseconds
|
|
55
|
+
gpu_memory_used_mb: GPU memory used in MB
|
|
56
|
+
created_at: When the execution was created
|
|
57
|
+
completed_at: When the execution completed
|
|
58
|
+
"""
|
|
59
|
+
id: str
|
|
60
|
+
status: ExecutionStatus
|
|
61
|
+
code: str
|
|
62
|
+
output: Optional[str] = None
|
|
63
|
+
error: Optional[str] = None
|
|
64
|
+
execution_time_ms: Optional[float] = None
|
|
65
|
+
gpu_memory_used_mb: Optional[int] = None
|
|
66
|
+
created_at: Optional[datetime] = None
|
|
67
|
+
completed_at: Optional[datetime] = None
|
|
68
|
+
notebook_id: Optional[str] = None
|
|
69
|
+
outputs: List[ExecutionOutput] = field(default_factory=list)
|
|
70
|
+
|
|
71
|
+
_client: Optional["Client"] = field(default=None, repr=False)
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def from_dict(cls, data: Dict[str, Any], client: "Client" = None) -> "Execution":
|
|
75
|
+
created_at = None
|
|
76
|
+
if data.get("created_at"):
|
|
77
|
+
created_at = datetime.fromisoformat(data["created_at"].replace("Z", "+00:00"))
|
|
78
|
+
|
|
79
|
+
completed_at = None
|
|
80
|
+
if data.get("completed_at"):
|
|
81
|
+
completed_at = datetime.fromisoformat(data["completed_at"].replace("Z", "+00:00"))
|
|
82
|
+
|
|
83
|
+
outputs = []
|
|
84
|
+
if data.get("outputs"):
|
|
85
|
+
outputs = [ExecutionOutput.from_dict(o) for o in data["outputs"]]
|
|
86
|
+
|
|
87
|
+
return cls(
|
|
88
|
+
id=data["id"] if "id" in data else data.get("execution_id", ""),
|
|
89
|
+
status=ExecutionStatus(data["status"]),
|
|
90
|
+
code=data.get("code", ""),
|
|
91
|
+
output=data.get("output"),
|
|
92
|
+
error=data.get("error"),
|
|
93
|
+
execution_time_ms=data.get("execution_time_ms"),
|
|
94
|
+
gpu_memory_used_mb=data.get("gpu_memory_used_mb"),
|
|
95
|
+
created_at=created_at,
|
|
96
|
+
completed_at=completed_at,
|
|
97
|
+
notebook_id=data.get("notebook_id"),
|
|
98
|
+
outputs=outputs,
|
|
99
|
+
_client=client
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def is_complete(self) -> bool:
|
|
104
|
+
"""Check if execution is complete"""
|
|
105
|
+
return self.status in (
|
|
106
|
+
ExecutionStatus.COMPLETED,
|
|
107
|
+
ExecutionStatus.FAILED,
|
|
108
|
+
ExecutionStatus.CANCELLED,
|
|
109
|
+
ExecutionStatus.TIMEOUT
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def is_success(self) -> bool:
|
|
114
|
+
"""Check if execution completed successfully"""
|
|
115
|
+
return self.status == ExecutionStatus.COMPLETED
|
|
116
|
+
|
|
117
|
+
async def refresh(self) -> "Execution":
|
|
118
|
+
"""Refresh execution status from API"""
|
|
119
|
+
if not self._client:
|
|
120
|
+
raise RuntimeError("Execution not bound to client")
|
|
121
|
+
|
|
122
|
+
data = await self._client._request("GET", f"/executions/{self.id}")
|
|
123
|
+
updated = Execution.from_dict(data, self._client)
|
|
124
|
+
|
|
125
|
+
# Update self with new data
|
|
126
|
+
self.status = updated.status
|
|
127
|
+
self.output = updated.output
|
|
128
|
+
self.error = updated.error
|
|
129
|
+
self.execution_time_ms = updated.execution_time_ms
|
|
130
|
+
self.gpu_memory_used_mb = updated.gpu_memory_used_mb
|
|
131
|
+
self.completed_at = updated.completed_at
|
|
132
|
+
self.outputs = updated.outputs
|
|
133
|
+
|
|
134
|
+
return self
|
|
135
|
+
|
|
136
|
+
async def wait(self, poll_interval: float = 0.5, timeout: float = None) -> "Execution":
|
|
137
|
+
"""
|
|
138
|
+
Wait for execution to complete.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
poll_interval: Seconds between status checks
|
|
142
|
+
timeout: Maximum seconds to wait (None for no limit)
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Updated Execution object
|
|
146
|
+
"""
|
|
147
|
+
start_time = asyncio.get_event_loop().time()
|
|
148
|
+
|
|
149
|
+
while not self.is_complete:
|
|
150
|
+
if timeout and (asyncio.get_event_loop().time() - start_time) > timeout:
|
|
151
|
+
from .exceptions import ExecutionTimeoutError
|
|
152
|
+
raise ExecutionTimeoutError(self.id, int(timeout))
|
|
153
|
+
|
|
154
|
+
await asyncio.sleep(poll_interval)
|
|
155
|
+
await self.refresh()
|
|
156
|
+
|
|
157
|
+
return self
|
|
158
|
+
|
|
159
|
+
async def cancel(self) -> "Execution":
|
|
160
|
+
"""Cancel the execution"""
|
|
161
|
+
if not self._client:
|
|
162
|
+
raise RuntimeError("Execution not bound to client")
|
|
163
|
+
|
|
164
|
+
await self._client._request("POST", f"/executions/{self.id}/cancel")
|
|
165
|
+
await self.refresh()
|
|
166
|
+
return self
|
|
167
|
+
|
|
168
|
+
def get_stdout(self) -> str:
|
|
169
|
+
"""Get stdout output"""
|
|
170
|
+
if self.output:
|
|
171
|
+
return self.output
|
|
172
|
+
|
|
173
|
+
stdout_parts = []
|
|
174
|
+
for out in self.outputs:
|
|
175
|
+
if out.output_type == "stdout":
|
|
176
|
+
stdout_parts.append(str(out.data))
|
|
177
|
+
return "".join(stdout_parts)
|
|
178
|
+
|
|
179
|
+
def get_stderr(self) -> str:
|
|
180
|
+
"""Get stderr output"""
|
|
181
|
+
stderr_parts = []
|
|
182
|
+
for out in self.outputs:
|
|
183
|
+
if out.output_type == "stderr":
|
|
184
|
+
stderr_parts.append(str(out.data))
|
|
185
|
+
return "".join(stderr_parts)
|
|
186
|
+
|
|
187
|
+
def get_display_data(self) -> List[Dict[str, Any]]:
|
|
188
|
+
"""Get display data outputs (images, HTML, etc.)"""
|
|
189
|
+
return [
|
|
190
|
+
{"data": out.data, "metadata": out.metadata}
|
|
191
|
+
for out in self.outputs
|
|
192
|
+
if out.output_type == "display_data"
|
|
193
|
+
]
|
|
194
|
+
|
|
195
|
+
def __str__(self) -> str:
|
|
196
|
+
return f"Execution({self.id}, status={self.status.value})"
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class ExecutionsAPI:
|
|
200
|
+
"""API for managing executions"""
|
|
201
|
+
|
|
202
|
+
def __init__(self, client: "Client"):
|
|
203
|
+
self._client = client
|
|
204
|
+
|
|
205
|
+
async def create(
|
|
206
|
+
self,
|
|
207
|
+
code: str,
|
|
208
|
+
gpu_type: str = "A10",
|
|
209
|
+
environment: str = "pytorch",
|
|
210
|
+
timeout_seconds: int = 300
|
|
211
|
+
) -> Execution:
|
|
212
|
+
"""
|
|
213
|
+
Create a serverless execution (run code without managing notebooks).
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
code: Python code to execute
|
|
217
|
+
gpu_type: GPU type to use
|
|
218
|
+
environment: Environment preset
|
|
219
|
+
timeout_seconds: Maximum execution time
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
Execution object
|
|
223
|
+
"""
|
|
224
|
+
data = await self._client._request("POST", "/executions", json={
|
|
225
|
+
"code": code,
|
|
226
|
+
"gpu_type": gpu_type,
|
|
227
|
+
"environment": environment,
|
|
228
|
+
"timeout_seconds": timeout_seconds
|
|
229
|
+
})
|
|
230
|
+
return Execution.from_dict(data, self._client)
|
|
231
|
+
|
|
232
|
+
async def get(self, execution_id: str) -> Execution:
|
|
233
|
+
"""Get an execution by ID"""
|
|
234
|
+
data = await self._client._request("GET", f"/executions/{execution_id}")
|
|
235
|
+
return Execution.from_dict(data, self._client)
|
|
236
|
+
|
|
237
|
+
async def list(
|
|
238
|
+
self,
|
|
239
|
+
notebook_id: str = None,
|
|
240
|
+
status: ExecutionStatus = None,
|
|
241
|
+
limit: int = 20,
|
|
242
|
+
offset: int = 0
|
|
243
|
+
) -> List[Execution]:
|
|
244
|
+
"""
|
|
245
|
+
List executions.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
notebook_id: Filter by notebook
|
|
249
|
+
status: Filter by status
|
|
250
|
+
limit: Maximum results
|
|
251
|
+
offset: Pagination offset
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
List of Execution objects
|
|
255
|
+
"""
|
|
256
|
+
params = {"limit": limit, "offset": offset}
|
|
257
|
+
if notebook_id:
|
|
258
|
+
params["notebook_id"] = notebook_id
|
|
259
|
+
if status:
|
|
260
|
+
params["status"] = status.value
|
|
261
|
+
|
|
262
|
+
data = await self._client._request("GET", "/executions", params=params)
|
|
263
|
+
return [Execution.from_dict(e, self._client) for e in data.get("executions", [])]
|
|
264
|
+
|
|
265
|
+
async def run(
|
|
266
|
+
self,
|
|
267
|
+
code: str,
|
|
268
|
+
gpu_type: str = "A10",
|
|
269
|
+
environment: str = "pytorch",
|
|
270
|
+
timeout_seconds: int = 300,
|
|
271
|
+
wait: bool = True
|
|
272
|
+
) -> Execution:
|
|
273
|
+
"""
|
|
274
|
+
Run code and optionally wait for completion.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
code: Python code to execute
|
|
278
|
+
gpu_type: GPU type to use
|
|
279
|
+
environment: Environment preset
|
|
280
|
+
timeout_seconds: Maximum execution time
|
|
281
|
+
wait: Whether to wait for completion
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
Execution object
|
|
285
|
+
"""
|
|
286
|
+
execution = await self.create(code, gpu_type, environment, timeout_seconds)
|
|
287
|
+
|
|
288
|
+
if wait:
|
|
289
|
+
await execution.wait(timeout=timeout_seconds)
|
|
290
|
+
|
|
291
|
+
return execution
|