more-compute 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. frontend/app/globals.css +734 -27
  2. frontend/app/layout.tsx +13 -3
  3. frontend/components/Notebook.tsx +2 -14
  4. frontend/components/cell/MonacoCell.tsx +99 -5
  5. frontend/components/layout/Sidebar.tsx +39 -4
  6. frontend/components/panels/ClaudePanel.tsx +461 -0
  7. frontend/components/popups/ComputePopup.tsx +739 -418
  8. frontend/components/popups/FilterPopup.tsx +305 -189
  9. frontend/components/popups/MetricsPopup.tsx +20 -1
  10. frontend/components/popups/ProviderConfigModal.tsx +322 -0
  11. frontend/components/popups/ProviderDropdown.tsx +398 -0
  12. frontend/components/popups/SettingsPopup.tsx +1 -1
  13. frontend/contexts/ClaudeContext.tsx +392 -0
  14. frontend/contexts/PodWebSocketContext.tsx +16 -21
  15. frontend/hooks/useInlineDiff.ts +269 -0
  16. frontend/lib/api.ts +323 -12
  17. frontend/lib/settings.ts +5 -0
  18. frontend/lib/websocket-native.ts +4 -8
  19. frontend/lib/websocket.ts +1 -2
  20. frontend/package-lock.json +733 -36
  21. frontend/package.json +2 -0
  22. frontend/public/assets/icons/providers/lambda_labs.svg +22 -0
  23. frontend/public/assets/icons/providers/prime_intellect.svg +18 -0
  24. frontend/public/assets/icons/providers/runpod.svg +9 -0
  25. frontend/public/assets/icons/providers/vastai.svg +1 -0
  26. frontend/settings.md +54 -0
  27. frontend/tsconfig.tsbuildinfo +1 -0
  28. frontend/types/claude.ts +194 -0
  29. kernel_run.py +13 -0
  30. {more_compute-0.4.3.dist-info → more_compute-0.5.0.dist-info}/METADATA +53 -11
  31. {more_compute-0.4.3.dist-info → more_compute-0.5.0.dist-info}/RECORD +56 -37
  32. {more_compute-0.4.3.dist-info → more_compute-0.5.0.dist-info}/WHEEL +1 -1
  33. morecompute/__init__.py +1 -1
  34. morecompute/__version__.py +1 -1
  35. morecompute/execution/executor.py +24 -67
  36. morecompute/execution/worker.py +6 -72
  37. morecompute/models/api_models.py +62 -0
  38. morecompute/notebook.py +11 -0
  39. morecompute/server.py +641 -133
  40. morecompute/services/claude_service.py +392 -0
  41. morecompute/services/pod_manager.py +168 -67
  42. morecompute/services/pod_monitor.py +67 -39
  43. morecompute/services/prime_intellect.py +0 -4
  44. morecompute/services/providers/__init__.py +92 -0
  45. morecompute/services/providers/base_provider.py +336 -0
  46. morecompute/services/providers/lambda_labs_provider.py +394 -0
  47. morecompute/services/providers/provider_factory.py +194 -0
  48. morecompute/services/providers/runpod_provider.py +504 -0
  49. morecompute/services/providers/vastai_provider.py +407 -0
  50. morecompute/utils/cell_magics.py +0 -3
  51. morecompute/utils/config_util.py +93 -3
  52. morecompute/utils/special_commands.py +5 -32
  53. morecompute/utils/version_check.py +117 -0
  54. frontend/styling_README.md +0 -23
  55. {more_compute-0.4.3.dist-info/licenses → more_compute-0.5.0.dist-info}/LICENSE +0 -0
  56. {more_compute-0.4.3.dist-info → more_compute-0.5.0.dist-info}/entry_points.txt +0 -0
  57. {more_compute-0.4.3.dist-info → more_compute-0.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,92 @@
1
+ """GPU cloud provider implementations.
2
+
3
+ This module provides a unified interface for multiple GPU cloud providers:
4
+ - RunPod (GraphQL API, SSH support)
5
+ - Lambda Labs (REST API, SSH support)
6
+ - Vast.ai (REST API, community GPUs, SSH support)
7
+ - Prime Intellect (REST API, SSH support) - original provider
8
+
9
+ All providers support SSH tunneling for remote code execution.
10
+
11
+ Usage:
12
+ from morecompute.services.providers import (
13
+ get_provider,
14
+ list_providers,
15
+ configure_provider,
16
+ get_active_provider
17
+ )
18
+
19
+ # List all available providers
20
+ providers = list_providers()
21
+
22
+ # Configure a provider with API key
23
+ configure_provider("runpod", "your-api-key", make_active=True)
24
+
25
+ # Get the active provider instance
26
+ provider = get_active_provider()
27
+
28
+ # Get GPU availability
29
+ gpus = await provider.get_gpu_availability(gpu_type="H100")
30
+
31
+ # Create a pod
32
+ pod = await provider.create_pod(request)
33
+ """
34
+
35
+ # Base classes
36
+ from .base_provider import (
37
+ BaseGPUProvider,
38
+ ProviderInfo,
39
+ ProviderType,
40
+ GpuAvailability,
41
+ NormalizedPod,
42
+ )
43
+
44
+ # Factory functions
45
+ from .provider_factory import (
46
+ register_provider,
47
+ get_provider_class,
48
+ get_provider,
49
+ refresh_provider,
50
+ list_providers,
51
+ get_configured_providers,
52
+ get_active_provider_name,
53
+ set_active_provider,
54
+ get_active_provider,
55
+ configure_provider,
56
+ clear_all_providers,
57
+ )
58
+
59
+ # Import provider implementations to trigger registration
60
+ from . import runpod_provider
61
+ from . import lambda_labs_provider
62
+ from . import vastai_provider
63
+
64
+ # Export provider classes for direct access if needed
65
+ from .runpod_provider import RunPodProvider
66
+ from .lambda_labs_provider import LambdaLabsProvider
67
+ from .vastai_provider import VastAIProvider
68
+
69
+ __all__ = [
70
+ # Base classes
71
+ "BaseGPUProvider",
72
+ "ProviderInfo",
73
+ "ProviderType",
74
+ "GpuAvailability",
75
+ "NormalizedPod",
76
+ # Factory functions
77
+ "register_provider",
78
+ "get_provider_class",
79
+ "get_provider",
80
+ "refresh_provider",
81
+ "list_providers",
82
+ "get_configured_providers",
83
+ "get_active_provider_name",
84
+ "set_active_provider",
85
+ "get_active_provider",
86
+ "configure_provider",
87
+ "clear_all_providers",
88
+ # Provider classes
89
+ "RunPodProvider",
90
+ "LambdaLabsProvider",
91
+ "VastAIProvider",
92
+ ]
@@ -0,0 +1,336 @@
1
+ """Abstract base class for GPU cloud providers."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass
5
+ from enum import Enum
6
+ from typing import Any
7
+ import httpx
8
+ from fastapi import HTTPException
9
+
10
+ from ...models.api_models import PodResponse
11
+
12
+
13
+ class ProviderType(str, Enum):
14
+ """Supported GPU cloud providers."""
15
+ PRIME_INTELLECT = "prime_intellect"
16
+ RUNPOD = "runpod"
17
+ MODAL = "modal"
18
+ LAMBDA_LABS = "lambda_labs"
19
+ VASTAI = "vastai"
20
+
21
+
22
+ @dataclass
23
+ class ProviderInfo:
24
+ """Information about a GPU provider."""
25
+ name: str # Internal name (e.g., "runpod")
26
+ display_name: str # Human-readable name (e.g., "RunPod")
27
+ api_key_env_name: str # Environment variable name (e.g., "RUNPOD_API_KEY")
28
+ supports_ssh: bool # Whether provider supports SSH connections
29
+ dashboard_url: str # URL to get API key
30
+ configured: bool = False # Whether API key is configured
31
+ is_active: bool = False # Whether this is the currently active provider
32
+
33
+
34
+ @dataclass
35
+ class GpuAvailability:
36
+ """Normalized GPU availability information."""
37
+ gpu_type: str
38
+ gpu_name: str
39
+ gpu_count: int
40
+ price_hr: float
41
+ cloud_id: str
42
+ socket: str
43
+ region: str | None = None
44
+ security: str | None = None
45
+ vcpus: int | None = None
46
+ memory: int | None = None
47
+ disk_size: int | None = None
48
+ available: bool = True
49
+
50
+
51
+ @dataclass
52
+ class NormalizedPod:
53
+ """Normalized pod information across providers."""
54
+ id: str
55
+ name: str
56
+ status: str
57
+ gpu_name: str
58
+ gpu_count: int
59
+ price_hr: float
60
+ ssh_connection: str | None
61
+ ip: str | None
62
+ provider: str
63
+ created_at: str
64
+ updated_at: str
65
+ user_id: str | None = None
66
+ team_id: str | None = None
67
+
68
+
69
+ class BaseGPUProvider(ABC):
70
+ """Abstract base class for GPU cloud providers.
71
+
72
+ All provider implementations must extend this class and implement
73
+ the abstract methods to provide a consistent interface.
74
+ """
75
+
76
+ # Class attributes to be defined by subclasses
77
+ PROVIDER_NAME: str = "" # e.g., "runpod"
78
+ PROVIDER_DISPLAY_NAME: str = "" # e.g., "RunPod"
79
+ API_KEY_ENV_NAME: str = "" # e.g., "RUNPOD_API_KEY"
80
+ SUPPORTS_SSH: bool = True # False for Modal
81
+ DASHBOARD_URL: str = "" # URL to get API key
82
+
83
+ def __init__(self, api_key: str | None = None):
84
+ """Initialize the provider with optional API key.
85
+
86
+ Args:
87
+ api_key: The API key for authentication. If None, provider
88
+ will be in unconfigured state.
89
+ """
90
+ self.api_key = api_key
91
+ self._client: httpx.AsyncClient | None = None
92
+
93
+ @property
94
+ def is_configured(self) -> bool:
95
+ """Check if the provider has a valid API key configured."""
96
+ return self.api_key is not None and len(self.api_key.strip()) > 0
97
+
98
+ def get_info(self, is_active: bool = False) -> ProviderInfo:
99
+ """Get provider information."""
100
+ return ProviderInfo(
101
+ name=self.PROVIDER_NAME,
102
+ display_name=self.PROVIDER_DISPLAY_NAME,
103
+ api_key_env_name=self.API_KEY_ENV_NAME,
104
+ supports_ssh=self.SUPPORTS_SSH,
105
+ dashboard_url=self.DASHBOARD_URL,
106
+ configured=self.is_configured,
107
+ is_active=is_active
108
+ )
109
+
110
+ async def _make_request(
111
+ self,
112
+ method: str,
113
+ url: str,
114
+ headers: dict[str, str] | None = None,
115
+ params: dict[str, Any] | None = None,
116
+ json_data: dict[str, Any] | None = None,
117
+ timeout: float = 30.0
118
+ ) -> dict[str, Any]:
119
+ """Make an HTTP request with error handling.
120
+
121
+ Args:
122
+ method: HTTP method (GET, POST, DELETE, etc.)
123
+ url: Full URL to request
124
+ headers: Request headers (will be merged with auth headers)
125
+ params: Query parameters
126
+ json_data: JSON body data
127
+ timeout: Request timeout in seconds
128
+
129
+ Returns:
130
+ Parsed JSON response
131
+
132
+ Raises:
133
+ HTTPException: On API or connection errors
134
+ """
135
+ request_headers = self._get_auth_headers()
136
+ if headers:
137
+ request_headers.update(headers)
138
+
139
+ async with httpx.AsyncClient() as client:
140
+ try:
141
+ response = await client.request(
142
+ method=method,
143
+ url=url,
144
+ headers=request_headers,
145
+ params=params,
146
+ json=json_data,
147
+ timeout=timeout
148
+ )
149
+ response.raise_for_status()
150
+
151
+ # Handle empty responses
152
+ if response.status_code == 204 or not response.content:
153
+ return {}
154
+
155
+ return response.json()
156
+ except httpx.HTTPStatusError as e:
157
+ # Sanitize error message - don't expose full API response to clients
158
+ status_code = e.response.status_code
159
+ if status_code == 401:
160
+ detail = f"{self.PROVIDER_DISPLAY_NAME} authentication failed. Please check your API key."
161
+ elif status_code == 402:
162
+ detail = f"Insufficient funds in your {self.PROVIDER_DISPLAY_NAME} account."
163
+ elif status_code == 403:
164
+ detail = f"Access denied. Please check your {self.PROVIDER_DISPLAY_NAME} permissions."
165
+ elif status_code == 404:
166
+ detail = f"{self.PROVIDER_DISPLAY_NAME} resource not found."
167
+ elif status_code >= 500:
168
+ detail = f"{self.PROVIDER_DISPLAY_NAME} service error. Please try again later."
169
+ else:
170
+ detail = f"{self.PROVIDER_DISPLAY_NAME} API error (status {status_code})."
171
+ raise HTTPException(status_code=status_code, detail=detail)
172
+ except httpx.RequestError:
173
+ raise HTTPException(
174
+ status_code=503,
175
+ detail=f"Unable to connect to {self.PROVIDER_DISPLAY_NAME}. Please check your internet connection."
176
+ )
177
+
178
+ @abstractmethod
179
+ def _get_auth_headers(self) -> dict[str, str]:
180
+ """Get authentication headers for API requests.
181
+
182
+ Returns:
183
+ Dictionary of headers to include in requests
184
+ """
185
+ pass
186
+
187
+ @abstractmethod
188
+ async def get_gpu_availability(
189
+ self,
190
+ regions: list[str] | None = None,
191
+ gpu_count: int | None = None,
192
+ gpu_type: str | None = None,
193
+ **kwargs: Any
194
+ ) -> dict[str, Any]:
195
+ """Get available GPU resources with pricing.
196
+
197
+ Args:
198
+ regions: Filter by regions
199
+ gpu_count: Filter by GPU count
200
+ gpu_type: Filter by GPU type (e.g., "H100", "A100")
201
+ **kwargs: Provider-specific filters
202
+
203
+ Returns:
204
+ Dict containing available GPUs with pricing
205
+ """
206
+ pass
207
+
208
+ @abstractmethod
209
+ async def create_pod(self, request: Any) -> PodResponse:
210
+ """Create a new GPU pod/instance.
211
+
212
+ Args:
213
+ request: Pod creation request (provider-specific format)
214
+
215
+ Returns:
216
+ PodResponse with created pod information
217
+ """
218
+ pass
219
+
220
+ @abstractmethod
221
+ async def get_pods(
222
+ self,
223
+ status: str | None = None,
224
+ limit: int = 100,
225
+ offset: int = 0
226
+ ) -> dict[str, Any]:
227
+ """Get list of all pods for the user.
228
+
229
+ Args:
230
+ status: Filter by status
231
+ limit: Maximum number of results
232
+ offset: Pagination offset
233
+
234
+ Returns:
235
+ Dict with list of pods
236
+ """
237
+ pass
238
+
239
+ @abstractmethod
240
+ async def get_pod(self, pod_id: str) -> PodResponse:
241
+ """Get details for a specific pod.
242
+
243
+ Args:
244
+ pod_id: The pod identifier
245
+
246
+ Returns:
247
+ PodResponse with pod information
248
+ """
249
+ pass
250
+
251
+ @abstractmethod
252
+ async def delete_pod(self, pod_id: str) -> dict[str, Any]:
253
+ """Delete/terminate a pod.
254
+
255
+ Args:
256
+ pod_id: The pod identifier
257
+
258
+ Returns:
259
+ Dict with deletion confirmation
260
+ """
261
+ pass
262
+
263
+ async def get_pod_status(self, pod_ids: list[str]) -> dict[str, Any]:
264
+ """Get status for multiple pods.
265
+
266
+ Default implementation fetches each pod individually.
267
+ Providers can override for more efficient batch operations.
268
+
269
+ Args:
270
+ pod_ids: List of pod identifiers
271
+
272
+ Returns:
273
+ Dict with status information for requested pods
274
+ """
275
+ statuses = {}
276
+ for pod_id in pod_ids:
277
+ try:
278
+ pod = await self.get_pod(pod_id)
279
+ statuses[pod_id] = pod.status
280
+ except HTTPException:
281
+ statuses[pod_id] = "unknown"
282
+ return {"statuses": statuses}
283
+
284
+ def normalize_pod(self, pod_data: dict[str, Any]) -> NormalizedPod:
285
+ """Convert provider-specific pod data to normalized format.
286
+
287
+ Args:
288
+ pod_data: Raw pod data from provider API
289
+
290
+ Returns:
291
+ NormalizedPod instance
292
+ """
293
+ # Default implementation - subclasses should override
294
+ return NormalizedPod(
295
+ id=pod_data.get("id", ""),
296
+ name=pod_data.get("name", ""),
297
+ status=pod_data.get("status", "unknown"),
298
+ gpu_name=pod_data.get("gpuName", pod_data.get("gpu_name", "")),
299
+ gpu_count=pod_data.get("gpuCount", pod_data.get("gpu_count", 1)),
300
+ price_hr=pod_data.get("priceHr", pod_data.get("price_hr", 0.0)),
301
+ ssh_connection=pod_data.get("sshConnection", pod_data.get("ssh_connection")),
302
+ ip=pod_data.get("ip"),
303
+ provider=self.PROVIDER_NAME,
304
+ created_at=str(pod_data.get("createdAt", pod_data.get("created_at", ""))),
305
+ updated_at=str(pod_data.get("updatedAt", pod_data.get("updated_at", ""))),
306
+ user_id=pod_data.get("userId", pod_data.get("user_id")),
307
+ team_id=pod_data.get("teamId", pod_data.get("team_id"))
308
+ )
309
+
310
+ def get_ssh_connection_info(self, pod: PodResponse | NormalizedPod) -> dict[str, Any] | None:
311
+ """Parse SSH connection information from pod.
312
+
313
+ Args:
314
+ pod: Pod response or normalized pod
315
+
316
+ Returns:
317
+ Dict with host, port, user, or None if not available
318
+ """
319
+ if not self.SUPPORTS_SSH:
320
+ return None
321
+
322
+ ssh_conn = getattr(pod, 'sshConnection', None) or getattr(pod, 'ssh_connection', None)
323
+ if not ssh_conn:
324
+ return None
325
+
326
+ # Parse common SSH connection format: "ssh user@host -p port"
327
+ # This is a default implementation; providers can override
328
+ import re
329
+ match = re.match(r'ssh\s+(\w+)@([\w.-]+)\s+-p\s+(\d+)', ssh_conn)
330
+ if match:
331
+ return {
332
+ "user": match.group(1),
333
+ "host": match.group(2),
334
+ "port": int(match.group(3))
335
+ }
336
+ return {"raw": ssh_conn}