more-compute 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. frontend/app/globals.css +734 -27
  2. frontend/app/layout.tsx +13 -3
  3. frontend/components/Notebook.tsx +2 -14
  4. frontend/components/cell/MonacoCell.tsx +99 -5
  5. frontend/components/layout/Sidebar.tsx +39 -4
  6. frontend/components/panels/ClaudePanel.tsx +461 -0
  7. frontend/components/popups/ComputePopup.tsx +739 -418
  8. frontend/components/popups/FilterPopup.tsx +305 -189
  9. frontend/components/popups/MetricsPopup.tsx +20 -1
  10. frontend/components/popups/ProviderConfigModal.tsx +322 -0
  11. frontend/components/popups/ProviderDropdown.tsx +398 -0
  12. frontend/components/popups/SettingsPopup.tsx +1 -1
  13. frontend/contexts/ClaudeContext.tsx +392 -0
  14. frontend/contexts/PodWebSocketContext.tsx +16 -21
  15. frontend/hooks/useInlineDiff.ts +269 -0
  16. frontend/lib/api.ts +323 -12
  17. frontend/lib/settings.ts +5 -0
  18. frontend/lib/websocket-native.ts +4 -8
  19. frontend/lib/websocket.ts +1 -2
  20. frontend/package-lock.json +733 -36
  21. frontend/package.json +2 -0
  22. frontend/public/assets/icons/providers/lambda_labs.svg +22 -0
  23. frontend/public/assets/icons/providers/prime_intellect.svg +18 -0
  24. frontend/public/assets/icons/providers/runpod.svg +9 -0
  25. frontend/public/assets/icons/providers/vastai.svg +1 -0
  26. frontend/settings.md +54 -0
  27. frontend/tsconfig.tsbuildinfo +1 -0
  28. frontend/types/claude.ts +194 -0
  29. kernel_run.py +13 -0
  30. {more_compute-0.4.3.dist-info → more_compute-0.5.0.dist-info}/METADATA +53 -11
  31. {more_compute-0.4.3.dist-info → more_compute-0.5.0.dist-info}/RECORD +56 -37
  32. {more_compute-0.4.3.dist-info → more_compute-0.5.0.dist-info}/WHEEL +1 -1
  33. morecompute/__init__.py +1 -1
  34. morecompute/__version__.py +1 -1
  35. morecompute/execution/executor.py +24 -67
  36. morecompute/execution/worker.py +6 -72
  37. morecompute/models/api_models.py +62 -0
  38. morecompute/notebook.py +11 -0
  39. morecompute/server.py +641 -133
  40. morecompute/services/claude_service.py +392 -0
  41. morecompute/services/pod_manager.py +168 -67
  42. morecompute/services/pod_monitor.py +67 -39
  43. morecompute/services/prime_intellect.py +0 -4
  44. morecompute/services/providers/__init__.py +92 -0
  45. morecompute/services/providers/base_provider.py +336 -0
  46. morecompute/services/providers/lambda_labs_provider.py +394 -0
  47. morecompute/services/providers/provider_factory.py +194 -0
  48. morecompute/services/providers/runpod_provider.py +504 -0
  49. morecompute/services/providers/vastai_provider.py +407 -0
  50. morecompute/utils/cell_magics.py +0 -3
  51. morecompute/utils/config_util.py +93 -3
  52. morecompute/utils/special_commands.py +5 -32
  53. morecompute/utils/version_check.py +117 -0
  54. frontend/styling_README.md +0 -23
  55. {more_compute-0.4.3.dist-info/licenses → more_compute-0.5.0.dist-info}/LICENSE +0 -0
  56. {more_compute-0.4.3.dist-info → more_compute-0.5.0.dist-info}/entry_points.txt +0 -0
  57. {more_compute-0.4.3.dist-info → more_compute-0.5.0.dist-info}/top_level.txt +0 -0
@@ -157,10 +157,8 @@ class NextZmqExecutor:
157
157
  normalized_source, result, start_time, execution_count, websocket, cell_index
158
158
  )
159
159
  result['execution_time'] = f"{(time.time()-start_time)*1000:.1f}ms"
160
- print(f"[EXECUTOR] Sending execution_complete for cell {cell_index}, status={result.get('status')}, has_error={result.get('error') is not None}", file=sys.stderr, flush=True)
161
160
  if websocket:
162
161
  await websocket.send_json({'type': 'execution_complete', 'data': {'cell_index': cell_index, 'result': result}})
163
- print(f"[EXECUTOR] Sent execution_complete successfully", file=sys.stderr, flush=True)
164
162
  return result
165
163
  # For remote execution OR mixed commands, fall through to send via ZMQ
166
164
 
@@ -178,14 +176,12 @@ class NextZmqExecutor:
178
176
  while True:
179
177
  # Check if this cell was interrupted
180
178
  if self.interrupted_cell == cell_index and interrupted_time is None:
181
- print(f"[EXECUTE] Cell {cell_index} was interrupted, waiting for subprocess to be killed...", file=sys.stderr, flush=True)
182
179
  interrupted_time = time.time()
183
180
  # Don't break immediately - wait for execution_complete from worker
184
181
  # Give worker 5 seconds to kill subprocess and send completion
185
182
 
186
183
  # If interrupted and waited long enough, force break
187
184
  if interrupted_time and (time.time() - interrupted_time > 5.0):
188
- print(f"[EXECUTE] Cell {cell_index} interrupt timeout, breaking out", file=sys.stderr, flush=True)
189
185
  self.interrupted_cell = None # Clear the flag
190
186
  result.update({
191
187
  'status': 'error',
@@ -200,7 +196,6 @@ class NextZmqExecutor:
200
196
 
201
197
  # Timeout check for stuck operations
202
198
  if time.time() - start_time > max_wait:
203
- print(f"[EXECUTE] Cell {cell_index} exceeded max wait time, timing out", file=sys.stderr, flush=True)
204
199
  result.update({
205
200
  'status': 'error',
206
201
  'error': {
@@ -236,7 +231,6 @@ class NextZmqExecutor:
236
231
  result.setdefault('execution_count', execution_count)
237
232
  # Clear interrupted flag if this was interrupted
238
233
  if self.interrupted_cell == cell_index:
239
- print(f"[EXECUTE] Cell {cell_index} completed after interrupt", file=sys.stderr, flush=True)
240
234
  self.interrupted_cell = None
241
235
  break
242
236
 
@@ -248,16 +242,14 @@ class NextZmqExecutor:
248
242
  self.req.setsockopt(zmq.RCVTIMEO, -1) # type: ignore[reportAttributeAccessIssue]
249
243
  except zmq.Again:
250
244
  # Timeout - worker didn't reply (probably killed), need to reset socket
251
- print(f"[EXECUTE] Worker didn't reply, resetting REQ socket", file=sys.stderr, flush=True)
252
245
  try:
253
246
  self.req.close(0) # type: ignore[reportAttributeAccessIssue]
254
247
  self.req = self.ctx.socket(zmq.REQ) # type: ignore[reportUnknownMemberType, reportAttributeAccessIssue]
255
248
  self.req.connect(self.cmd_addr) # type: ignore[reportAttributeAccessIssue]
256
- except Exception as e:
257
- print(f"[EXECUTE] Error resetting socket: {e}", file=sys.stderr, flush=True)
258
- except Exception as e:
249
+ except Exception:
250
+ pass
251
+ except Exception:
259
252
  # Some other error, also reset socket to be safe
260
- print(f"[EXECUTE] Error receiving reply: {e}, resetting socket", file=sys.stderr, flush=True)
261
253
  try:
262
254
  self.req.setsockopt(zmq.RCVTIMEO, -1) # type: ignore[reportAttributeAccessIssue]
263
255
  self.req.close(0) # type: ignore[reportAttributeAccessIssue]
@@ -272,13 +264,9 @@ class NextZmqExecutor:
272
264
 
273
265
  async def interrupt_kernel(self, cell_index: int | None = None) -> None:
274
266
  """Interrupt the kernel using the control socket"""
275
- import sys
276
- print(f"[INTERRUPT] Starting interrupt for cell {cell_index}", file=sys.stderr, flush=True)
277
-
278
267
  # Mark this cell as interrupted so execute_cell can break out
279
268
  if isinstance(cell_index, int):
280
269
  self.interrupted_cell = cell_index
281
- print(f"[INTERRUPT] Marked cell {cell_index} as interrupted", file=sys.stderr, flush=True)
282
270
 
283
271
  payload: dict[str, object] = {'type': 'interrupt'}
284
272
  if isinstance(cell_index, int):
@@ -290,11 +278,8 @@ class NextZmqExecutor:
290
278
  self.ctrl.setsockopt(zmq.RCVTIMEO, 1000) # type: ignore[reportAttributeAccessIssue]
291
279
  self.ctrl.send_json(payload) # type: ignore[reportAttributeAccessIssue]
292
280
  _ = cast(dict[str, object], self.ctrl.recv_json()) # type: ignore[reportAttributeAccessIssue]
293
- print(f"[INTERRUPT] Sent interrupt signal to worker via control socket", file=sys.stderr, flush=True)
294
- except Exception as e:
295
- print(f"[INTERRUPT] Could not send interrupt signal: {e}", file=sys.stderr, flush=True)
281
+ except Exception:
296
282
  # If control socket fails, try force-kill immediately
297
- print(f"[INTERRUPT] Force killing worker immediately...", file=sys.stderr, flush=True)
298
283
  await self._force_kill_worker()
299
284
  finally:
300
285
  # Reset timeouts
@@ -308,46 +293,37 @@ class NextZmqExecutor:
308
293
  except Exception:
309
294
  pass
310
295
 
311
- print(f"[INTERRUPT] Interrupt complete", file=sys.stderr, flush=True)
312
-
313
296
  async def _force_kill_worker(self) -> None:
314
297
  """Force kill the worker process and respawn"""
315
- import sys
316
- print(f"[FORCE_KILL] Killing worker PID={self.worker_pid}", file=sys.stderr, flush=True)
317
-
318
298
  if self.worker_pid:
319
299
  try:
320
300
  # For blocking I/O, SIGKILL immediately - no mercy
321
- print(f"[FORCE_KILL] Sending SIGKILL to {self.worker_pid}", file=sys.stderr, flush=True)
322
301
  os.kill(self.worker_pid, signal.SIGKILL)
323
302
  await asyncio.sleep(0.1) # Brief wait for process to die
324
303
  except ProcessLookupError:
325
- print(f"[FORCE_KILL] Process {self.worker_pid} already dead", file=sys.stderr, flush=True)
326
- except Exception as e:
327
- print(f"[FORCE_KILL] Error killing PID {self.worker_pid}: {e}", file=sys.stderr, flush=True)
304
+ pass
305
+ except Exception:
306
+ pass
328
307
 
329
308
  # Also try via Popen object if available
330
309
  if self.worker_proc:
331
310
  try:
332
- print(f"[FORCE_KILL] Killing via Popen object", file=sys.stderr, flush=True)
333
311
  self.worker_proc.kill() # SIGKILL directly
334
312
  await asyncio.sleep(0.1)
335
- except Exception as e:
336
- print(f"[FORCE_KILL] Error killing via Popen: {e}", file=sys.stderr, flush=True)
313
+ except Exception:
314
+ pass
337
315
 
338
316
  # CRITICAL: Reset socket state - close and recreate
339
317
  # The REQ socket may be waiting for a reply from the dead worker
340
318
  try:
341
- print(f"[FORCE_KILL] Resetting REQ and CTRL sockets", file=sys.stderr, flush=True)
342
319
  self.req.close(0) # type: ignore[reportAttributeAccessIssue]
343
320
  self.req = self.ctx.socket(zmq.REQ) # type: ignore[reportUnknownMemberType, reportAttributeAccessIssue]
344
321
  self.req.connect(self.cmd_addr) # type: ignore[reportAttributeAccessIssue]
345
322
  self.ctrl.close(0) # type: ignore[reportAttributeAccessIssue]
346
323
  self.ctrl = self.ctx.socket(zmq.DEALER) # type: ignore[reportUnknownMemberType, reportAttributeAccessIssue]
347
324
  self.ctrl.connect(self.ctrl_addr) # type: ignore[reportAttributeAccessIssue]
348
- print(f"[FORCE_KILL] Socket reset complete", file=sys.stderr, flush=True)
349
- except Exception as e:
350
- print(f"[FORCE_KILL] Error resetting sockets: {e}", file=sys.stderr, flush=True)
325
+ except Exception:
326
+ pass
351
327
 
352
328
  # Respawn worker
353
329
  try:
@@ -357,27 +333,21 @@ class NextZmqExecutor:
357
333
 
358
334
  def reset_kernel(self) -> None:
359
335
  """Reset the kernel by shutting down worker and restarting"""
360
- import sys
361
- print(f"[RESET] Starting kernel reset, worker_pid={self.worker_pid}, is_remote={self.is_remote}", file=sys.stderr, flush=True)
362
-
363
336
  # If connected to remote GPU, DON'T kill the worker - just send shutdown message
364
337
  if self.is_remote:
365
- print(f"[RESET] Remote worker - sending shutdown message only", file=sys.stderr, flush=True)
366
338
  try:
367
339
  self.req.setsockopt(zmq.SNDTIMEO, 2000) # type: ignore[reportAttributeAccessIssue]
368
340
  self.req.setsockopt(zmq.RCVTIMEO, 2000) # type: ignore[reportAttributeAccessIssue]
369
341
  self.req.send_json({'type': 'shutdown'}) # type: ignore[reportAttributeAccessIssue]
370
342
  _ = cast(dict[str, object], self.req.recv_json()) # type: ignore[reportAttributeAccessIssue]
371
- print(f"[RESET] Remote worker acknowledged shutdown", file=sys.stderr, flush=True)
372
- except Exception as e:
373
- print(f"[RESET] Remote worker shutdown failed: {e}", file=sys.stderr, flush=True)
343
+ except Exception:
344
+ pass
374
345
  finally:
375
346
  self.req.setsockopt(zmq.SNDTIMEO, -1) # type: ignore[reportAttributeAccessIssue]
376
347
  self.req.setsockopt(zmq.RCVTIMEO, -1) # type: ignore[reportAttributeAccessIssue]
377
348
 
378
349
  # Reset execution count but don't respawn worker
379
350
  self.execution_count = 0
380
- print(f"[RESET] Remote kernel reset complete", file=sys.stderr, flush=True)
381
351
  return
382
352
 
383
353
  # Local worker mode - try graceful shutdown first
@@ -386,9 +356,8 @@ class NextZmqExecutor:
386
356
  self.req.setsockopt(zmq.RCVTIMEO, 500) # type: ignore[reportAttributeAccessIssue]
387
357
  self.req.send_json({'type': 'shutdown'}) # type: ignore[reportAttributeAccessIssue]
388
358
  _ = cast(dict[str, object], self.req.recv_json()) # type: ignore[reportAttributeAccessIssue]
389
- print(f"[RESET] Sent graceful shutdown message", file=sys.stderr, flush=True)
390
- except Exception as e:
391
- print(f"[RESET] Graceful shutdown failed: {e}", file=sys.stderr, flush=True)
359
+ except Exception:
360
+ pass
392
361
  finally:
393
362
  self.req.setsockopt(zmq.SNDTIMEO, -1) # type: ignore[reportAttributeAccessIssue]
394
363
  self.req.setsockopt(zmq.RCVTIMEO, -1) # type: ignore[reportAttributeAccessIssue]
@@ -396,35 +365,30 @@ class NextZmqExecutor:
396
365
  # Force kill local worker if needed
397
366
  if self.worker_pid:
398
367
  try:
399
- print(f"[RESET] Sending SIGTERM to worker PID {self.worker_pid}", file=sys.stderr, flush=True)
400
368
  os.kill(self.worker_pid, signal.SIGTERM)
401
369
  time.sleep(0.3) # Give it time to shutdown gracefully
402
370
  try:
403
371
  # Check if still alive
404
372
  os.kill(self.worker_pid, 0)
405
373
  # Still alive, force kill
406
- print(f"[RESET] Worker still alive, sending SIGKILL", file=sys.stderr, flush=True)
407
374
  os.kill(self.worker_pid, signal.SIGKILL)
408
375
  time.sleep(0.2) # Wait for SIGKILL to complete
409
376
  except ProcessLookupError:
410
- print(f"[RESET] Worker process terminated", file=sys.stderr, flush=True)
411
- except Exception as e:
412
- print(f"[RESET] Error killing worker: {e}", file=sys.stderr, flush=True)
377
+ pass
378
+ except Exception:
379
+ pass
413
380
 
414
381
  if self.worker_proc:
415
382
  try:
416
383
  self.worker_proc.terminate()
417
384
  self.worker_proc.wait(timeout=1)
418
- print(f"[RESET] Worker process terminated via Popen", file=sys.stderr, flush=True)
419
385
  except Exception:
420
386
  try:
421
387
  self.worker_proc.kill()
422
- print(f"[RESET] Worker process killed via Popen", file=sys.stderr, flush=True)
423
- except Exception as e:
424
- print(f"[RESET] Error killing via Popen: {e}", file=sys.stderr, flush=True)
388
+ except Exception:
389
+ pass
425
390
 
426
391
  # Close sockets first, BEFORE recreating them
427
- print(f"[RESET] Closing old sockets", file=sys.stderr, flush=True)
428
392
  try:
429
393
  self.req.close(0) # type: ignore[reportAttributeAccessIssue]
430
394
  except Exception:
@@ -436,7 +400,6 @@ class NextZmqExecutor:
436
400
 
437
401
  # Wait for ZMQ to release the sockets (critical!)
438
402
  time.sleep(0.5)
439
- print(f"[RESET] Sockets closed, waited for cleanup", file=sys.stderr, flush=True)
440
403
 
441
404
  # Reset state
442
405
  self.execution_count = 0
@@ -445,14 +408,12 @@ class NextZmqExecutor:
445
408
 
446
409
  # Recreate sockets
447
410
  try:
448
- print(f"[RESET] Creating new sockets", file=sys.stderr, flush=True)
449
411
  self.req = self.ctx.socket(zmq.REQ) # type: ignore[reportUnknownMemberType, reportAttributeAccessIssue]
450
412
  self.req.connect(self.cmd_addr) # type: ignore[reportAttributeAccessIssue]
451
413
  self.ctrl = self.ctx.socket(zmq.DEALER) # type: ignore[reportUnknownMemberType, reportAttributeAccessIssue]
452
414
  self.ctrl.connect(self.ctrl_addr) # type: ignore[reportAttributeAccessIssue]
453
- print(f"[RESET] New sockets created successfully", file=sys.stderr, flush=True)
454
- except Exception as e:
455
- print(f"[RESET] Error creating sockets: {e}", file=sys.stderr, flush=True)
415
+ except Exception:
416
+ pass
456
417
 
457
418
  # Reset special handler
458
419
  if self.special_handler is not None:
@@ -460,10 +421,6 @@ class NextZmqExecutor:
460
421
 
461
422
  # Respawn worker
462
423
  try:
463
- print(f"[RESET] Respawning new worker", file=sys.stderr, flush=True)
464
424
  self._ensure_worker()
465
- print(f"[RESET] Kernel reset complete, new worker_pid={self.worker_pid}", file=sys.stderr, flush=True)
466
- except Exception as e:
467
- print(f"[RESET] Error respawning worker: {e}", file=sys.stderr, flush=True)
468
- import traceback
469
- traceback.print_exc()
425
+ except Exception:
426
+ pass
@@ -58,7 +58,6 @@ def _inject_shell_command_function(globals_dict: dict):
58
58
 
59
59
  # Check if already interrupted before starting new command
60
60
  if _interrupt_requested:
61
- print(f"[WORKER] Shell command skipped due to previous interrupt", file=sys.stderr, flush=True)
62
61
  raise KeyboardInterrupt("Execution was interrupted")
63
62
 
64
63
  # Prepare command and environment (using shared utilities)
@@ -80,12 +79,6 @@ def _inject_shell_command_function(globals_dict: dict):
80
79
  _current_subprocess = process
81
80
  if os.name != 'nt':
82
81
  _current_process_group = os.getpgid(process.pid)
83
- # Also create a new process group for clean killing
84
- print(f"[WORKER] Started subprocess PID={process.pid}, PGID={_current_process_group}", file=sys.stderr, flush=True)
85
- else:
86
- print(f"[WORKER] Started subprocess PID={process.pid}", file=sys.stderr, flush=True)
87
-
88
- sys.stderr.flush()
89
82
 
90
83
  try:
91
84
  # Stream output line by line
@@ -120,7 +113,6 @@ def _inject_shell_command_function(globals_dict: dict):
120
113
  except subprocess.TimeoutExpired:
121
114
  # Check if interrupted
122
115
  if _interrupt_requested:
123
- print(f"[WORKER] Interrupt detected, killing subprocess", file=sys.stderr, flush=True)
124
116
  try:
125
117
  process.kill()
126
118
  except Exception:
@@ -135,7 +127,6 @@ def _inject_shell_command_function(globals_dict: dict):
135
127
  except Exception:
136
128
  pass
137
129
  # Don't wait for process or threads - raise immediately
138
- print(f"[WORKER] Raising KeyboardInterrupt immediately", file=sys.stderr, flush=True)
139
130
  raise KeyboardInterrupt("Execution interrupted by user")
140
131
 
141
132
  # Normal completion - join threads briefly
@@ -235,63 +226,34 @@ def control_thread_main(ctrl, current_cell_ref):
235
226
  """Run control channel in separate thread (Jupyter pattern)"""
236
227
  global _interrupt_requested, _current_subprocess, _current_process_group
237
228
 
238
- print(f"[CONTROL] Control thread started", file=sys.stderr, flush=True)
239
-
240
229
  while True:
241
230
  try:
242
231
  # Block waiting for control messages
243
232
  identity = ctrl.recv()
244
233
  msg = ctrl.recv_json()
245
234
 
246
- print(f"[CONTROL] Received: {msg}", file=sys.stderr, flush=True)
247
-
248
235
  mtype = msg.get('type')
249
236
  if mtype == 'interrupt':
250
237
  requested_cell = msg.get('cell_index')
251
238
  current_cell = current_cell_ref[0]
252
239
 
253
- print(f"[CONTROL] Interrupt check: requested={requested_cell}, current={current_cell}, subprocess={_current_subprocess}, pgid={_current_process_group}", file=sys.stderr)
254
- sys.stderr.flush()
255
-
256
240
  if requested_cell is None or requested_cell == current_cell:
257
- print(f"[CONTROL] ✓ Match! Processing interrupt for cell {requested_cell}", file=sys.stderr)
258
- sys.stderr.flush()
259
-
260
241
  # Set global flag
261
242
  _interrupt_requested = True
262
243
 
263
244
  # Send SIGINT to process group (Jupyter pattern)
264
245
  if _current_process_group and os.name != 'nt':
265
246
  try:
266
- print(f"[CONTROL] Sending SIGINT to process group {_current_process_group}", file=sys.stderr)
267
- sys.stderr.flush()
268
247
  os.killpg(_current_process_group, signal.SIGINT)
269
- print(f"[CONTROL] SIGINT sent successfully", file=sys.stderr)
270
- sys.stderr.flush()
271
- except Exception as e:
272
- print(f"[CONTROL] Failed to kill process group: {e}", file=sys.stderr)
273
- sys.stderr.flush()
248
+ except Exception:
249
+ pass
274
250
 
275
251
  # Also kill subprocess directly
276
252
  if _current_subprocess:
277
253
  try:
278
- print(f"[CONTROL] Killing subprocess PID={_current_subprocess.pid}", file=sys.stderr)
279
- sys.stderr.flush()
280
254
  _current_subprocess.kill()
281
- print(f"[CONTROL] Subprocess killed", file=sys.stderr)
282
- sys.stderr.flush()
283
- except Exception as e:
284
- print(f"[CONTROL] Failed to kill subprocess: {e}", file=sys.stderr)
285
- sys.stderr.flush()
286
-
287
- # Don't send SIGINT to self - let the execution thread finish gracefully
288
- # Sending SIGINT here can interrupt the execution thread before it sends
289
- # completion messages, leaving the frontend in a confused state
290
- print(f"[CONTROL] Interrupt signal sent, waiting for execution thread to finish", file=sys.stderr)
291
- sys.stderr.flush()
292
- else:
293
- print(f"[CONTROL] ✗ NO MATCH! Ignoring interrupt (requested cell {requested_cell} != current cell {current_cell})", file=sys.stderr)
294
- sys.stderr.flush()
255
+ except Exception:
256
+ pass
295
257
 
296
258
  # Reply
297
259
  ctrl.send(identity, zmq.SNDMORE)
@@ -302,27 +264,18 @@ def control_thread_main(ctrl, current_cell_ref):
302
264
  ctrl.send_json({'ok': True, 'pid': os.getpid()})
303
265
  break
304
266
 
305
- except Exception as e:
306
- print(f"[CONTROL] Error: {e}", file=sys.stderr, flush=True)
307
- import traceback
308
- traceback.print_exc()
267
+ except Exception:
268
+ pass
309
269
 
310
270
 
311
271
  def worker_main():
312
272
  global _current_subprocess, _interrupt_requested, _current_process_group
313
273
 
314
- print(f"[WORKER] ========================================", file=sys.stderr, flush=True)
315
- print(f"[WORKER] Starting THREADED worker (new code!)", file=sys.stderr, flush=True)
316
- print(f"[WORKER] PID: {os.getpid()}", file=sys.stderr, flush=True)
317
- print(f"[WORKER] ========================================", file=sys.stderr, flush=True)
318
-
319
274
  _setup_signals()
320
275
  cmd_addr = os.environ['MC_ZMQ_CMD_ADDR']
321
276
  pub_addr = os.environ['MC_ZMQ_PUB_ADDR']
322
277
  ctrl_addr = os.environ.get('MC_ZMQ_CTRL_ADDR', cmd_addr.replace('5555', '5557'))
323
278
 
324
- print(f"[WORKER] Binding to control socket: {ctrl_addr}", file=sys.stderr, flush=True)
325
-
326
279
  ctx = zmq.Context.instance()
327
280
  rep = ctx.socket(zmq.REP)
328
281
  rep.bind(cmd_addr)
@@ -341,7 +294,6 @@ def worker_main():
341
294
  # Start control thread (Jupyter pattern)
342
295
  ctrl_thread = threading.Thread(target=control_thread_main, args=(ctrl, current_cell_ref), daemon=True)
343
296
  ctrl_thread.start()
344
- print(f"[WORKER] Started control thread", file=sys.stderr, flush=True)
345
297
 
346
298
  # Persistent REPL state
347
299
  g = {"__name__": "__main__"}
@@ -380,9 +332,7 @@ def worker_main():
380
332
  cell_index = msg.get('cell_index')
381
333
  requested_count = msg.get('execution_count')
382
334
 
383
- print(f"[WORKER] Setting current_cell_ref[0] = {cell_index}", file=sys.stderr, flush=True)
384
335
  current_cell_ref[0] = cell_index # Update for control thread
385
- print(f"[WORKER] Confirmed current_cell_ref[0] = {current_cell_ref[0]}", file=sys.stderr, flush=True)
386
336
 
387
337
  if isinstance(requested_count, int):
388
338
  exec_count = requested_count - 1
@@ -404,7 +354,6 @@ def worker_main():
404
354
 
405
355
  try:
406
356
  shell_cmd = code.strip()[1:].strip()
407
- print(f"[WORKER] Executing shell: {shell_cmd[:50]}...", file=sys.stderr, flush=True)
408
357
 
409
358
  # Run shell command with streaming
410
359
  process = subprocess.Popen(
@@ -453,7 +402,6 @@ def worker_main():
453
402
  break
454
403
  except subprocess.TimeoutExpired:
455
404
  if _interrupt_requested:
456
- print(f"[WORKER] Interrupt detected, killing shell process", file=sys.stderr, flush=True)
457
405
  try:
458
406
  process.kill()
459
407
  except Exception:
@@ -468,7 +416,6 @@ def worker_main():
468
416
  except Exception:
469
417
  pass
470
418
  # Set interrupted status immediately
471
- print(f"[WORKER] Setting error status for interrupted shell command", file=sys.stderr, flush=True)
472
419
  status = 'error'
473
420
  error_payload = {
474
421
  'ename': 'KeyboardInterrupt',
@@ -484,8 +431,6 @@ def worker_main():
484
431
  stdout_thread.join(timeout=0.1)
485
432
  stderr_thread.join(timeout=0.1)
486
433
 
487
- print(f"[WORKER] Shell process finished: return_code={return_code}", file=sys.stderr, flush=True)
488
-
489
434
  # Check return code
490
435
  if return_code != 0:
491
436
  status = 'error'
@@ -494,7 +439,6 @@ def worker_main():
494
439
  'evalue': f'Command failed with return code {return_code}',
495
440
  'traceback': [f'Shell command failed: {shell_cmd}']
496
441
  }
497
- print(f"[WORKER] Set error_payload to ShellCommandError", file=sys.stderr, flush=True)
498
442
  except KeyboardInterrupt:
499
443
  status = 'error'
500
444
  error_payload = {
@@ -511,17 +455,12 @@ def worker_main():
511
455
  error_payload = {'ename': type(exc).__name__, 'evalue': str(exc), 'traceback': traceback.format_exc().split('\n')}
512
456
 
513
457
  duration_ms = f"{(time.time()-start)*1000:.1f}ms"
514
- print(f"[WORKER] Sending completion messages: status={status}, error={error_payload is not None}", file=sys.stderr, flush=True)
515
458
  if error_payload:
516
459
  pub.send_json({'type': 'execution_error', 'cell_index': cell_index, 'error': error_payload})
517
- print(f"[WORKER] Sent execution_error", file=sys.stderr, flush=True)
518
460
  pub.send_json({'type': 'execution_complete', 'cell_index': cell_index, 'result': {'status': status, 'execution_count': exec_count, 'execution_time': duration_ms, 'outputs': [], 'error': error_payload}})
519
- print(f"[WORKER] Sent execution_complete", file=sys.stderr, flush=True)
520
461
  rep.send_json({'ok': True, 'pid': os.getpid()})
521
462
 
522
- print(f"[WORKER] Clearing current_cell_ref[0] (was {current_cell_ref[0]})", file=sys.stderr, flush=True)
523
463
  current_cell_ref[0] = None
524
- print(f"[WORKER] Confirmed current_cell_ref[0] = {current_cell_ref[0]}", file=sys.stderr, flush=True)
525
464
  continue
526
465
 
527
466
  # Regular Python code execution
@@ -590,17 +529,12 @@ def worker_main():
590
529
  sys.stdout, sys.stderr = old_out, old_err
591
530
  exec_count += 1
592
531
  duration_ms = f"{(time.time()-start)*1000:.1f}ms"
593
- print(f"[WORKER] Sending completion messages (Python): status={status}, error={error_payload is not None}", file=sys.stderr, flush=True)
594
532
  if error_payload:
595
533
  pub.send_json({'type': 'execution_error', 'cell_index': cell_index, 'error': error_payload})
596
- print(f"[WORKER] Sent execution_error", file=sys.stderr, flush=True)
597
534
  pub.send_json({'type': 'execution_complete', 'cell_index': cell_index, 'result': {'status': status, 'execution_count': exec_count, 'execution_time': duration_ms, 'outputs': [], 'error': error_payload}})
598
- print(f"[WORKER] Sent execution_complete", file=sys.stderr, flush=True)
599
535
  rep.send_json({'ok': True, 'pid': os.getpid()})
600
536
 
601
- print(f"[WORKER] Clearing current_cell_ref[0] (was {current_cell_ref[0]})", file=sys.stderr, flush=True)
602
537
  current_cell_ref[0] = None
603
- print(f"[WORKER] Confirmed current_cell_ref[0] = {current_cell_ref[0]}", file=sys.stderr, flush=True)
604
538
 
605
539
  try:
606
540
  rep.close(0); pub.close(0); ctrl.close(0)
@@ -195,3 +195,65 @@ class AvailabilityQuery(BaseModel):
195
195
  gpu_count: int | None = None
196
196
  gpu_type: str | None = None
197
197
  security: str | None = None
198
+
199
+
200
+ # ============================================================================
201
+ # Multi-Provider API Models
202
+ # ============================================================================
203
+
204
+ class ProviderInfo(BaseModel):
205
+ """Information about a GPU cloud provider."""
206
+ name: str # Internal name (e.g., "runpod")
207
+ display_name: str # Human-readable name (e.g., "RunPod")
208
+ api_key_env_name: str # Environment variable name
209
+ supports_ssh: bool # Whether provider supports SSH connections
210
+ dashboard_url: str # URL to get API key
211
+ configured: bool = False # Whether API key is configured
212
+ is_active: bool = False # Whether this is the currently active provider
213
+
214
+
215
+ class ProviderListResponse(BaseModel):
216
+ """Response model for listing providers."""
217
+ providers: list[ProviderInfo]
218
+ active_provider: str | None = None
219
+
220
+
221
+ class ProviderConfigRequest(BaseModel):
222
+ """Request model for configuring a provider API key."""
223
+ api_key: str
224
+ token_secret: str | None = None # For Modal which requires two tokens
225
+ make_active: bool = False # Whether to make this the active provider
226
+
227
+
228
+ class SetActiveProviderRequest(BaseModel):
229
+ """Request model for setting the active provider."""
230
+ provider: str
231
+
232
+
233
+ class GpuAvailabilityResponse(BaseModel):
234
+ """Response model for GPU availability."""
235
+ data: list[dict]
236
+ total_count: int
237
+ provider: str
238
+ note: str | None = None
239
+
240
+
241
+ class PodListResponse(BaseModel):
242
+ """Response model for listing pods."""
243
+ data: list[dict]
244
+ total_count: int
245
+ offset: int
246
+ limit: int
247
+ provider: str
248
+
249
+
250
+ class PodWithProvider(PodResponse):
251
+ """Pod response with provider information."""
252
+ provider: str = "prime_intellect"
253
+
254
+
255
+ class CreatePodWithProviderRequest(BaseModel):
256
+ """Request to create a pod with explicit provider selection."""
257
+ pod: PodConfig
258
+ provider_name: str # Provider to use (e.g., "runpod", "lambda_labs")
259
+ team: TeamConfig | None = None
morecompute/notebook.py CHANGED
@@ -127,6 +127,17 @@ class Notebook:
127
127
  cell['id'] = self._generate_cell_id()
128
128
  self.cells.append(cell)
129
129
 
130
+ # Ensure at least one empty cell exists
131
+ if not self.cells:
132
+ self.cells.append({
133
+ 'id': self._generate_cell_id(),
134
+ 'cell_type': 'code',
135
+ 'source': '',
136
+ 'metadata': {},
137
+ 'outputs': [],
138
+ 'execution_count': None
139
+ })
140
+
130
141
  self.metadata = data.get('metadata', {})
131
142
  self.file_path = file_path
132
143