podstack 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
podstack_gpu/app.py ADDED
@@ -0,0 +1,675 @@
1
+ """Podstack App - Application class with function decorators."""
2
+
3
+ from __future__ import annotations
4
+ import os
5
+ import inspect
6
+ import textwrap
7
+ from typing import Optional, List, Dict, Any, Callable, Union, Generator, TYPE_CHECKING
8
+ from functools import wraps
9
+
10
+ from .exceptions import PodstackError, ValidationError
11
+
12
+ if TYPE_CHECKING:
13
+ from .image import Image
14
+ from .volume import Volume
15
+ from .secret import Secret
16
+
17
+
18
+ # Valid GPU types
19
+ VALID_GPU_TYPES = ["A10", "L40", "L40S", "A100-40G", "A100-80G", "H100"]
20
+ VALID_FRACTIONS = [25, 50, 75, 100]
21
+
22
+
23
+ class Function:
24
+ """
25
+ A GPU-accelerated function that can be invoked remotely.
26
+
27
+ Example:
28
+ @app.function(gpu="H100")
29
+ def train(epochs: int):
30
+ import torch
31
+ ...
32
+
33
+ # Call remotely
34
+ result = train.remote(epochs=10)
35
+
36
+ # Call locally
37
+ result = train.local(epochs=10)
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ func: Callable,
43
+ app: "App",
44
+ gpu: str = "L40S",
45
+ count: int = 1,
46
+ fraction: int = 100,
47
+ timeout: int = 3600,
48
+ memory: Optional[int] = None,
49
+ image: Optional["Image"] = None,
50
+ volumes: Optional[Dict[str, "Volume"]] = None,
51
+ secrets: Optional[List["Secret"]] = None,
52
+ retries: int = 0,
53
+ concurrency_limit: Optional[int] = None,
54
+ ):
55
+ self._func = func
56
+ self._app = app
57
+ self._gpu = gpu.upper()
58
+ self._count = count
59
+ self._fraction = fraction
60
+ self._timeout = timeout
61
+ self._memory = memory
62
+ self._image = image
63
+ self._volumes = volumes or {}
64
+ self._secrets = secrets or []
65
+ self._retries = retries
66
+ self._concurrency_limit = concurrency_limit
67
+
68
+ # Validate
69
+ self._validate()
70
+
71
+ # Preserve function metadata
72
+ wraps(func)(self)
73
+
74
+ def _validate(self):
75
+ """Validate function configuration."""
76
+ if self._gpu not in VALID_GPU_TYPES:
77
+ raise ValidationError(
78
+ f"Invalid GPU type: {self._gpu}. "
79
+ f"Valid types: {', '.join(VALID_GPU_TYPES)}"
80
+ )
81
+
82
+ if self._fraction not in VALID_FRACTIONS:
83
+ raise ValidationError(
84
+ f"Invalid GPU fraction: {self._fraction}. "
85
+ f"Valid fractions: {VALID_FRACTIONS}"
86
+ )
87
+
88
+ if self._count < 1 or self._count > 8:
89
+ raise ValidationError("GPU count must be between 1 and 8")
90
+
91
+ if self._timeout < 60:
92
+ raise ValidationError("Timeout must be at least 60 seconds")
93
+
94
+ if self._timeout > 86400:
95
+ raise ValidationError("Timeout cannot exceed 86400 seconds (24 hours)")
96
+
97
+ def __call__(self, *args, **kwargs):
98
+ """Call the function locally (default behavior)."""
99
+ return self.local(*args, **kwargs)
100
+
101
+ def local(self, *args, **kwargs):
102
+ """Execute the function locally without GPU."""
103
+ return self._func(*args, **kwargs)
104
+
105
+ def remote(self, *args, **kwargs) -> "FunctionCall":
106
+ """
107
+ Execute the function on a remote GPU.
108
+
109
+ Returns:
110
+ FunctionCall object to get results
111
+ """
112
+ return self._app._execute_remote(self, args, kwargs)
113
+
114
+ def spawn(self, *args, **kwargs) -> "FunctionCall":
115
+ """
116
+ Spawn the function on a remote GPU without waiting.
117
+
118
+ Returns:
119
+ FunctionCall object to get results later
120
+ """
121
+ return self._app._execute_remote(self, args, kwargs, wait=False)
122
+
123
+ def map(self, *iterables, order_outputs: bool = True, return_exceptions: bool = False):
124
+ """
125
+ Map the function over iterables in parallel.
126
+
127
+ Args:
128
+ *iterables: Input iterables to map over
129
+ order_outputs: If True, return results in input order
130
+ return_exceptions: If True, return exceptions instead of raising
131
+
132
+ Returns:
133
+ Generator of results
134
+ """
135
+ return self._app._execute_map(
136
+ self, iterables,
137
+ order_outputs=order_outputs,
138
+ return_exceptions=return_exceptions
139
+ )
140
+
141
+ def starmap(self, args_list: List[tuple], kwargs_list: List[dict] = None, **options):
142
+ """
143
+ Map the function over a list of argument tuples.
144
+
145
+ Args:
146
+ args_list: List of positional argument tuples
147
+ kwargs_list: List of keyword argument dicts (optional)
148
+ """
149
+ kwargs_list = kwargs_list or [{}] * len(args_list)
150
+ return self._app._execute_starmap(self, args_list, kwargs_list, **options)
151
+
152
+ def get_source(self) -> str:
153
+ """Get the source code of the function without decorators."""
154
+ source = inspect.getsource(self._func)
155
+ source = textwrap.dedent(source)
156
+
157
+ # Remove decorator lines (lines starting with @)
158
+ lines = source.split('\n')
159
+ cleaned_lines = []
160
+ in_decorator = False
161
+
162
+ for line in lines:
163
+ stripped = line.strip()
164
+ # Skip decorator lines
165
+ if stripped.startswith('@'):
166
+ in_decorator = True
167
+ # Check if decorator continues (ends with backslash or has unclosed parens)
168
+ if stripped.endswith('\\') or (stripped.count('(') > stripped.count(')')):
169
+ continue
170
+ else:
171
+ in_decorator = False
172
+ continue
173
+ # Skip continuation of multi-line decorators
174
+ if in_decorator:
175
+ if stripped.endswith('\\') or (stripped.count('(') > stripped.count(')')):
176
+ continue
177
+ else:
178
+ in_decorator = False
179
+ continue
180
+ cleaned_lines.append(line)
181
+
182
+ return '\n'.join(cleaned_lines)
183
+
184
+ def _build_annotation(self) -> str:
185
+ """Build the GPU annotation string."""
186
+ parts = [f"#@podstack gpu={self._gpu} count={self._count} fraction={self._fraction} timeout={self._timeout}"]
187
+
188
+ if self._memory:
189
+ parts.append(f"memory={self._memory}")
190
+
191
+ if self._image:
192
+ annotation_part = self._image.definition.to_annotation()
193
+ if annotation_part:
194
+ parts.append(annotation_part)
195
+
196
+ return " ".join(parts)
197
+
198
+ @property
199
+ def info(self) -> dict:
200
+ """Get function metadata."""
201
+ return {
202
+ "name": self._func.__name__,
203
+ "gpu": self._gpu,
204
+ "count": self._count,
205
+ "fraction": self._fraction,
206
+ "timeout": self._timeout,
207
+ "memory": self._memory,
208
+ "image": self._image.to_dict() if self._image else None,
209
+ }
210
+
211
+
212
+ class FunctionCall:
213
+ """
214
+ Represents an in-progress or completed function call.
215
+
216
+ Example:
217
+ call = train.spawn(epochs=10)
218
+ # Do other work...
219
+ result = call.get() # Wait for result
220
+ print(f"Duration: {call.gpu_seconds}s, Cost: ₹{call.cost_inr:.4f}")
221
+
222
+ # Or stream status updates in real-time
223
+ for update in call.stream_status():
224
+ print(f"Status: {update.status}")
225
+ """
226
+
227
+ def __init__(self, execution_id: str, app: "App"):
228
+ self._execution_id = execution_id
229
+ self._app = app
230
+ self._result = None
231
+ self._done = False
232
+ self._error = None
233
+ self._gpu_seconds = 0.0
234
+ self._cost_cents = 0
235
+ self._status = "submitted"
236
+ self._queue_position = None
237
+ self._estimated_wait = None
238
+
239
+ @property
240
+ def object_id(self) -> str:
241
+ """Get the execution ID."""
242
+ return self._execution_id
243
+
244
+ @property
245
+ def status(self) -> str:
246
+ """Get current execution status."""
247
+ return self._status
248
+
249
+ @property
250
+ def queue_position(self) -> Optional[int]:
251
+ """Get queue position (if queued)."""
252
+ return self._queue_position
253
+
254
+ @property
255
+ def estimated_wait_seconds(self) -> Optional[int]:
256
+ """Get estimated wait time in seconds (if queued)."""
257
+ return self._estimated_wait
258
+
259
+ @property
260
+ def gpu_seconds(self) -> float:
261
+ """Get GPU execution time in seconds."""
262
+ return self._gpu_seconds
263
+
264
+ @property
265
+ def cost_cents(self) -> int:
266
+ """Get execution cost in cents."""
267
+ return self._cost_cents
268
+
269
+ @property
270
+ def cost_inr(self) -> float:
271
+ """Get execution cost in INR (rupees)."""
272
+ return self._cost_cents / 100 if self._cost_cents else 0.0
273
+
274
+ @property
275
+ def cost_dollars(self) -> float:
276
+ """Deprecated: Use cost_inr instead. Returns cost in INR for backwards compatibility."""
277
+ return self.cost_inr
278
+
279
+ def get(self, timeout: float = None):
280
+ """
281
+ Wait for the function to complete and return the result.
282
+
283
+ Args:
284
+ timeout: Maximum time to wait in seconds
285
+
286
+ Returns:
287
+ The function's return value
288
+
289
+ Raises:
290
+ TimeoutError: If timeout is exceeded
291
+ ExecutionError: If the function failed
292
+ """
293
+ if self._done:
294
+ if self._error:
295
+ raise self._error
296
+ return self._result
297
+
298
+ result = self._app._wait_for_result(self._execution_id, timeout)
299
+ self._result = result
300
+ self._done = True
301
+ return result
302
+
303
+ def stream_status(self, callback: Callable = None):
304
+ """
305
+ Stream status updates in real-time.
306
+
307
+ Can be used as an iterator or with a callback.
308
+
309
+ Args:
310
+ callback: Optional callback function for each update
311
+
312
+ Yields:
313
+ StatusUpdate objects (if no callback provided)
314
+
315
+ Example:
316
+ # As iterator
317
+ for update in call.stream_status():
318
+ print(f"Status: {update.status}")
319
+
320
+ # With callback
321
+ call.stream_status(callback=lambda u: print(u.status))
322
+ """
323
+ runner = self._app._get_runner()
324
+
325
+ if callback:
326
+ def _callback(update):
327
+ self._update_from_status(update)
328
+ callback(update)
329
+ runner.stream_status(self._execution_id, _callback)
330
+ else:
331
+ # Return a generator
332
+ updates = []
333
+ def _collect(update):
334
+ self._update_from_status(update)
335
+ updates.append(update)
336
+
337
+ runner.stream_status(self._execution_id, _collect)
338
+ yield from updates
339
+
340
+ def _update_from_status(self, update):
341
+ """Update internal state from status update."""
342
+ from .runner import StatusUpdate
343
+ self._status = update.status
344
+ self._queue_position = update.queue_position
345
+ self._estimated_wait = update.estimated_wait_seconds
346
+ if update.gpu_seconds:
347
+ self._gpu_seconds = update.gpu_seconds
348
+ if update.cost_cents:
349
+ self._cost_cents = update.cost_cents
350
+ if update.is_terminal:
351
+ self._done = True
352
+ if update.error:
353
+ from .exceptions import ExecutionError
354
+ self._error = ExecutionError(update.error, self._execution_id, update.status)
355
+
356
+ def cancel(self, reason: str = "Cancelled by user") -> bool:
357
+ """Cancel the function call."""
358
+ return self._app._cancel_execution(self._execution_id, reason)
359
+
360
+
361
+ class App:
362
+ """
363
+ Podstack Application - container for GPU functions.
364
+
365
+ Example:
366
+ import podstack
367
+
368
+ app = podstack.App("my-training-app")
369
+
370
+ @app.function(gpu="H100", image=podstack.Image.ml())
371
+ def train(epochs: int):
372
+ import torch
373
+ ...
374
+
375
+ if __name__ == "__main__":
376
+ result = train.remote(epochs=10)
377
+ """
378
+
379
+ # Default API URL for the Podstack Notebooks API
380
+ DEFAULT_API_URL = "https://cloud.podstack.ai/notebooks"
381
+
382
+ def __init__(
383
+ self,
384
+ name: str = None,
385
+ token: str = None,
386
+ project_id: str = None,
387
+ api_url: str = None,
388
+ ):
389
+ """
390
+ Initialize a Podstack App.
391
+
392
+ Args:
393
+ name: Application name (for organization)
394
+ token: API token (or set PODSTACK_TOKEN env var) - supports psk_xxx platform tokens
395
+ project_id: Project ID (or set PODSTACK_PROJECT_ID env var)
396
+ api_url: API URL (optional, defaults to PODSTACK_API_URL env var or production URL)
397
+ """
398
+ self.name = name or "default"
399
+ self._token = token or os.environ.get("PODSTACK_TOKEN")
400
+ self._project_id = project_id or os.environ.get("PODSTACK_PROJECT_ID")
401
+ self._api_url = (api_url or os.environ.get("PODSTACK_API_URL") or self.DEFAULT_API_URL).rstrip("/")
402
+
403
+ self._functions: Dict[str, Function] = {}
404
+ self._runner = None # Lazy initialized
405
+
406
+ def _get_runner(self):
407
+ """Get or create the GPU runner."""
408
+ if self._runner is None:
409
+ from .runner import GPURunner
410
+
411
+ if not self._token:
412
+ raise PodstackError(
413
+ "No API token provided. Set PODSTACK_TOKEN environment variable "
414
+ "or pass token to App()"
415
+ )
416
+ if not self._project_id:
417
+ raise PodstackError(
418
+ "No project ID provided. Set PODSTACK_PROJECT_ID environment variable "
419
+ "or pass project_id to App()"
420
+ )
421
+
422
+ self._runner = GPURunner(
423
+ token=self._token,
424
+ project_id=self._project_id,
425
+ api_url=self._api_url,
426
+ )
427
+ return self._runner
428
+
429
+ def function(
430
+ self,
431
+ gpu: str = "L40S",
432
+ count: int = 1,
433
+ fraction: int = 100,
434
+ timeout: int = 3600,
435
+ memory: int = None,
436
+ image: "Image" = None,
437
+ volumes: Dict[str, "Volume"] = None,
438
+ secrets: List["Secret"] = None,
439
+ retries: int = 0,
440
+ concurrency_limit: int = None,
441
+ ) -> Callable[[Callable], Function]:
442
+ """
443
+ Decorator to create a GPU-accelerated function.
444
+
445
+ Args:
446
+ gpu: GPU type (A10, L40, L40S, A100-40G, A100-80G, H100)
447
+ count: Number of GPUs (1-8)
448
+ fraction: GPU fraction percentage (25, 50, 75, 100) - use lower fractions for cost savings
449
+ timeout: Maximum execution time in seconds
450
+ memory: GPU memory limit in GB
451
+ image: Container image specification
452
+ volumes: Mount volumes {"/path": Volume}
453
+ secrets: List of secrets to inject
454
+ retries: Number of retries on failure
455
+ concurrency_limit: Max concurrent executions
456
+
457
+ Example:
458
+ # Full GPU for training
459
+ @app.function(gpu="H100", image=podstack.Image.ml())
460
+ def train(epochs: int):
461
+ import torch
462
+ ...
463
+
464
+ # Fractional GPU for inference (cost-effective)
465
+ @app.function(gpu="L40S", fraction=25, image=podstack.Image.ml())
466
+ def inference(data):
467
+ return model.predict(data)
468
+
469
+ # Multiple GPUs for distributed training
470
+ @app.function(gpu="A100-80G", count=4, image=podstack.Image.ml())
471
+ def distributed_train():
472
+ ...
473
+ """
474
+ def decorator(func: Callable) -> Function:
475
+ fn = Function(
476
+ func=func,
477
+ app=self,
478
+ gpu=gpu,
479
+ count=count,
480
+ fraction=fraction,
481
+ timeout=timeout,
482
+ memory=memory,
483
+ image=image,
484
+ volumes=volumes,
485
+ secrets=secrets,
486
+ retries=retries,
487
+ concurrency_limit=concurrency_limit,
488
+ )
489
+ self._functions[func.__name__] = fn
490
+ return fn
491
+
492
+ return decorator
493
+
494
+ def cls(
495
+ self,
496
+ gpu: str = "L40S",
497
+ count: int = 1,
498
+ fraction: int = 100,
499
+ timeout: int = 3600,
500
+ memory: int = None,
501
+ image: "Image" = None,
502
+ volumes: Dict[str, "Volume"] = None,
503
+ secrets: List["Secret"] = None,
504
+ ):
505
+ """
506
+ Decorator to create a GPU-accelerated class.
507
+
508
+ Methods decorated with @method will run on the GPU.
509
+
510
+ Example:
511
+ @app.cls(gpu="H100", image=podstack.Image.ml())
512
+ class ModelServer:
513
+ def __init__(self):
514
+ self.model = load_model()
515
+
516
+ @podstack.method()
517
+ def predict(self, data):
518
+ return self.model(data)
519
+ """
520
+ def decorator(cls):
521
+ # Store GPU config on the class
522
+ cls._podstack_config = {
523
+ "gpu": gpu,
524
+ "count": count,
525
+ "fraction": fraction,
526
+ "timeout": timeout,
527
+ "memory": memory,
528
+ "image": image,
529
+ "volumes": volumes,
530
+ "secrets": secrets,
531
+ }
532
+ cls._podstack_app = self
533
+ return cls
534
+
535
+ return decorator
536
+
537
+ def _execute_remote(self, fn: Function, args: tuple, kwargs: dict, wait: bool = True) -> FunctionCall:
538
+ """Execute a function remotely."""
539
+ runner = self._get_runner()
540
+
541
+ # Build the code to execute
542
+ source = fn.get_source()
543
+
544
+ # Build argument serialization
545
+ import json
546
+ args_json = json.dumps(args)
547
+ kwargs_json = json.dumps(kwargs)
548
+
549
+ code = f'''
550
+ {fn._build_annotation()}
551
+ {source}
552
+
553
+ import json
554
+ _args = json.loads({args_json!r})
555
+ _kwargs = json.loads({kwargs_json!r})
556
+ _result = {fn._func.__name__}(*_args, **_kwargs)
557
+ print("__RESULT__:", json.dumps(_result))
558
+ '''
559
+
560
+ # Submit
561
+ result = runner.run(
562
+ code,
563
+ gpu=fn._gpu,
564
+ count=fn._count,
565
+ fraction=fn._fraction,
566
+ timeout=fn._timeout,
567
+ memory=fn._memory,
568
+ env=fn._image.definition.env_preset if fn._image else None,
569
+ pip=fn._image.definition.pip_packages if fn._image else None,
570
+ wait=wait,
571
+ )
572
+
573
+ call = FunctionCall(result.execution_id, self)
574
+ call._gpu_seconds = result.gpu_seconds
575
+ call._cost_cents = result.cost_cents
576
+
577
+ if wait and result.success:
578
+ # Parse result from output
579
+ for line in result.output.split('\n'):
580
+ if line.startswith("__RESULT__:"):
581
+ import json
582
+ call._result = json.loads(line[11:].strip())
583
+ call._done = True
584
+ break
585
+
586
+ # If no __RESULT__ found, return the raw output
587
+ if not call._done:
588
+ call._result = result.output
589
+ call._done = True
590
+
591
+ return call
592
+
593
+ def _execute_map(self, fn: Function, iterables, order_outputs: bool, return_exceptions: bool):
594
+ """Execute map over iterables."""
595
+ # Simple implementation - submit all and collect
596
+ calls = []
597
+ for args in zip(*iterables):
598
+ call = self._execute_remote(fn, args, {}, wait=False)
599
+ calls.append(call)
600
+
601
+ # Collect results
602
+ results = []
603
+ for call in calls:
604
+ try:
605
+ results.append(call.get())
606
+ except Exception as e:
607
+ if return_exceptions:
608
+ results.append(e)
609
+ else:
610
+ raise
611
+
612
+ return results if order_outputs else iter(results)
613
+
614
+ def _execute_starmap(self, fn: Function, args_list, kwargs_list, **options):
615
+ """Execute starmap over argument lists."""
616
+ calls = []
617
+ for args, kwargs in zip(args_list, kwargs_list):
618
+ call = self._execute_remote(fn, args, kwargs, wait=False)
619
+ calls.append(call)
620
+
621
+ # Collect results
622
+ return [call.get() for call in calls]
623
+
624
+ def _wait_for_result(self, execution_id: str, timeout: float = None):
625
+ """Wait for an execution to complete."""
626
+ runner = self._get_runner()
627
+ import time
628
+
629
+ start = time.time()
630
+ while True:
631
+ if timeout and (time.time() - start) > timeout:
632
+ from .exceptions import TimeoutError
633
+ raise TimeoutError(f"Timed out waiting for {execution_id}")
634
+
635
+ status = runner.get_status(execution_id)
636
+ if status["status"] in ("completed", "failed", "timeout", "cancelled"):
637
+ break
638
+ time.sleep(2)
639
+
640
+ if status["status"] != "completed":
641
+ from .exceptions import ExecutionError
642
+ raise ExecutionError(
643
+ status.get("error", f"Execution {status['status']}"),
644
+ execution_id=execution_id,
645
+ status=status["status"],
646
+ )
647
+
648
+ return status.get("result")
649
+
650
+ def _cancel_execution(self, execution_id: str, reason: str) -> bool:
651
+ """Cancel an execution."""
652
+ runner = self._get_runner()
653
+ return runner.cancel(execution_id, reason)
654
+
655
+ @property
656
+ def registered_functions(self) -> Dict[str, Function]:
657
+ """Get all registered functions."""
658
+ return self._functions.copy()
659
+
660
+
661
+ def method():
662
+ """
663
+ Decorator to mark a method as GPU-executable within a @app.cls class.
664
+
665
+ Example:
666
+ @app.cls(gpu="H100")
667
+ class Model:
668
+ @podstack.method()
669
+ def predict(self, x):
670
+ ...
671
+ """
672
+ def decorator(func):
673
+ func._is_podstack_method = True
674
+ return func
675
+ return decorator
@@ -0,0 +1,35 @@
1
+ """Podstack SDK exceptions."""
2
+
3
+
4
+ class PodstackError(Exception):
5
+ """Base exception for Podstack SDK."""
6
+ pass
7
+
8
+
9
+ class AuthenticationError(PodstackError):
10
+ """Authentication failed - invalid or expired token."""
11
+ pass
12
+
13
+
14
+ class ValidationError(PodstackError):
15
+ """Invalid parameters or annotation."""
16
+ pass
17
+
18
+
19
+ class ExecutionError(PodstackError):
20
+ """GPU execution failed."""
21
+
22
+ def __init__(self, message: str, execution_id: str = None, status: str = None):
23
+ super().__init__(message)
24
+ self.execution_id = execution_id
25
+ self.status = status
26
+
27
+
28
+ class TimeoutError(PodstackError):
29
+ """Execution timed out."""
30
+ pass
31
+
32
+
33
+ class InsufficientBalanceError(PodstackError):
34
+ """Insufficient wallet balance."""
35
+ pass