more-compute 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- frontend/app/globals.css +322 -77
- frontend/app/layout.tsx +98 -82
- frontend/components/Cell.tsx +234 -95
- frontend/components/Notebook.tsx +430 -199
- frontend/components/{AddCellButton.tsx → cell/AddCellButton.tsx} +0 -2
- frontend/components/cell/MonacoCell.tsx +726 -0
- frontend/components/layout/ConnectionBanner.tsx +41 -0
- frontend/components/{Sidebar.tsx → layout/Sidebar.tsx} +16 -11
- frontend/components/modals/ConfirmModal.tsx +154 -0
- frontend/components/modals/SuccessModal.tsx +140 -0
- frontend/components/output/MarkdownRenderer.tsx +116 -0
- frontend/components/popups/ComputePopup.tsx +674 -365
- frontend/components/popups/MetricsPopup.tsx +11 -7
- frontend/components/popups/SettingsPopup.tsx +11 -13
- frontend/contexts/PodWebSocketContext.tsx +247 -0
- frontend/eslint.config.mjs +11 -0
- frontend/lib/monaco-themes.ts +160 -0
- frontend/lib/settings.ts +128 -26
- frontend/lib/themes.json +9973 -0
- frontend/lib/websocket-native.ts +19 -8
- frontend/lib/websocket.ts +59 -11
- frontend/next.config.ts +8 -0
- frontend/package-lock.json +1705 -3
- frontend/package.json +8 -1
- frontend/styling_README.md +18 -0
- kernel_run.py +159 -42
- more_compute-0.2.0.dist-info/METADATA +126 -0
- more_compute-0.2.0.dist-info/RECORD +100 -0
- morecompute/__version__.py +1 -1
- morecompute/execution/executor.py +31 -20
- morecompute/execution/worker.py +68 -7
- morecompute/models/__init__.py +31 -0
- morecompute/models/api_models.py +197 -0
- morecompute/notebook.py +50 -7
- morecompute/server.py +574 -94
- morecompute/services/data_manager.py +379 -0
- morecompute/services/lsp_service.py +335 -0
- morecompute/services/pod_manager.py +122 -20
- morecompute/services/pod_monitor.py +138 -0
- morecompute/services/prime_intellect.py +87 -63
- morecompute/utils/config_util.py +59 -0
- morecompute/utils/special_commands.py +11 -5
- morecompute/utils/zmq_util.py +51 -0
- frontend/components/MarkdownRenderer.tsx +0 -84
- frontend/components/popups/PythonPopup.tsx +0 -292
- more_compute-0.1.4.dist-info/METADATA +0 -173
- more_compute-0.1.4.dist-info/RECORD +0 -86
- /frontend/components/{CellButton.tsx → cell/CellButton.tsx} +0 -0
- /frontend/components/{ErrorModal.tsx → modals/ErrorModal.tsx} +0 -0
- /frontend/components/{CellOutput.tsx → output/CellOutput.tsx} +0 -0
- /frontend/components/{ErrorDisplay.tsx → output/ErrorDisplay.tsx} +0 -0
- {more_compute-0.1.4.dist-info → more_compute-0.2.0.dist-info}/WHEEL +0 -0
- {more_compute-0.1.4.dist-info → more_compute-0.2.0.dist-info}/entry_points.txt +0 -0
- {more_compute-0.1.4.dist-info → more_compute-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {more_compute-0.1.4.dist-info → more_compute-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Language Server Protocol (LSP) service for Python autocomplete.
|
|
3
|
+
Manages Pyright language server for providing IntelliSense features.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import json
|
|
8
|
+
import subprocess
|
|
9
|
+
import sys
|
|
10
|
+
from typing import Optional, Dict, Any, List
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
import logging
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LSPService:
|
|
18
|
+
"""Manages Pyright language server for Python code intelligence."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, workspace_root: Path):
|
|
21
|
+
self.workspace_root = workspace_root
|
|
22
|
+
self.process: Optional[subprocess.Popen] = None
|
|
23
|
+
self.msg_id = 0
|
|
24
|
+
self.pending_requests: Dict[int, asyncio.Future] = {}
|
|
25
|
+
self.initialized = False
|
|
26
|
+
self.documents: Dict[str, str] = {} # Track open documents
|
|
27
|
+
self._reader_task: Optional[asyncio.Task] = None
|
|
28
|
+
|
|
29
|
+
async def start(self):
|
|
30
|
+
"""Start the Pyright language server process."""
|
|
31
|
+
try:
|
|
32
|
+
# Start Pyright in LSP mode
|
|
33
|
+
self.process = subprocess.Popen(
|
|
34
|
+
["pyright-langserver", "--stdio"],
|
|
35
|
+
stdin=subprocess.PIPE,
|
|
36
|
+
stdout=subprocess.PIPE,
|
|
37
|
+
stderr=subprocess.PIPE,
|
|
38
|
+
bufsize=0,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
logger.info("Pyright language server started")
|
|
42
|
+
|
|
43
|
+
# Start reading responses in background
|
|
44
|
+
self._reader_task = asyncio.create_task(self._read_responses())
|
|
45
|
+
|
|
46
|
+
# Initialize the language server
|
|
47
|
+
init_result = await self._send_request("initialize", {
|
|
48
|
+
"processId": None,
|
|
49
|
+
"rootUri": f"file://{self.workspace_root}",
|
|
50
|
+
"capabilities": {
|
|
51
|
+
"textDocument": {
|
|
52
|
+
"completion": {
|
|
53
|
+
"completionItem": {
|
|
54
|
+
"snippetSupport": True,
|
|
55
|
+
"documentationFormat": ["markdown", "plaintext"],
|
|
56
|
+
"resolveSupport": {
|
|
57
|
+
"properties": ["documentation", "detail"]
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
"contextSupport": True
|
|
61
|
+
},
|
|
62
|
+
"hover": {
|
|
63
|
+
"contentFormat": ["markdown", "plaintext"]
|
|
64
|
+
},
|
|
65
|
+
"signatureHelp": {
|
|
66
|
+
"signatureInformation": {
|
|
67
|
+
"documentationFormat": ["markdown", "plaintext"]
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
},
|
|
72
|
+
"initializationOptions": {
|
|
73
|
+
"python": {
|
|
74
|
+
"analysis": {
|
|
75
|
+
"autoSearchPaths": True,
|
|
76
|
+
"useLibraryCodeForTypes": True,
|
|
77
|
+
"diagnosticMode": "openFilesOnly"
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
# Send initialized notification
|
|
84
|
+
await self._send_notification("initialized", {})
|
|
85
|
+
self.initialized = True
|
|
86
|
+
logger.info("Pyright language server initialized")
|
|
87
|
+
|
|
88
|
+
except Exception as e:
|
|
89
|
+
logger.error(f"Failed to start LSP service: {e}")
|
|
90
|
+
raise
|
|
91
|
+
|
|
92
|
+
async def get_completions(self, cell_id: str, source: str, line: int, character: int) -> List[Dict[str, Any]]:
|
|
93
|
+
"""Get code completions at a specific position."""
|
|
94
|
+
if not self.initialized:
|
|
95
|
+
return []
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
# Create a virtual file URI for the cell
|
|
99
|
+
file_uri = f"file://{self.workspace_root}/cell_{cell_id}.py"
|
|
100
|
+
|
|
101
|
+
# Update the document
|
|
102
|
+
if file_uri in self.documents:
|
|
103
|
+
# Document already open, send change notification
|
|
104
|
+
await self._send_notification("textDocument/didChange", {
|
|
105
|
+
"textDocument": {
|
|
106
|
+
"uri": file_uri,
|
|
107
|
+
"version": self.documents[file_uri]["version"] + 1
|
|
108
|
+
},
|
|
109
|
+
"contentChanges": [{"text": source}]
|
|
110
|
+
})
|
|
111
|
+
self.documents[file_uri]["version"] += 1
|
|
112
|
+
self.documents[file_uri]["text"] = source
|
|
113
|
+
else:
|
|
114
|
+
# Open new document
|
|
115
|
+
await self._send_notification("textDocument/didOpen", {
|
|
116
|
+
"textDocument": {
|
|
117
|
+
"uri": file_uri,
|
|
118
|
+
"languageId": "python",
|
|
119
|
+
"version": 1,
|
|
120
|
+
"text": source
|
|
121
|
+
}
|
|
122
|
+
})
|
|
123
|
+
self.documents[file_uri] = {"version": 1, "text": source}
|
|
124
|
+
|
|
125
|
+
# Request completions
|
|
126
|
+
result = await self._send_request("textDocument/completion", {
|
|
127
|
+
"textDocument": {"uri": file_uri},
|
|
128
|
+
"position": {"line": line, "character": character},
|
|
129
|
+
"context": {"triggerKind": 1} # Invoked
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
if not result:
|
|
133
|
+
return []
|
|
134
|
+
|
|
135
|
+
# Handle both list and CompletionList formats
|
|
136
|
+
items = result.get("items", []) if isinstance(result, dict) else result
|
|
137
|
+
|
|
138
|
+
return items if isinstance(items, list) else []
|
|
139
|
+
|
|
140
|
+
except Exception as e:
|
|
141
|
+
logger.error(f"Error getting completions: {e}")
|
|
142
|
+
return []
|
|
143
|
+
|
|
144
|
+
async def get_hover(self, cell_id: str, source: str, line: int, character: int) -> Optional[Dict[str, Any]]:
|
|
145
|
+
"""Get hover information at a specific position."""
|
|
146
|
+
if not self.initialized:
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
file_uri = f"file://{self.workspace_root}/cell_{cell_id}.py"
|
|
151
|
+
|
|
152
|
+
# Ensure document is open
|
|
153
|
+
if file_uri not in self.documents:
|
|
154
|
+
await self._send_notification("textDocument/didOpen", {
|
|
155
|
+
"textDocument": {
|
|
156
|
+
"uri": file_uri,
|
|
157
|
+
"languageId": "python",
|
|
158
|
+
"version": 1,
|
|
159
|
+
"text": source
|
|
160
|
+
}
|
|
161
|
+
})
|
|
162
|
+
self.documents[file_uri] = {"version": 1, "text": source}
|
|
163
|
+
|
|
164
|
+
result = await self._send_request("textDocument/hover", {
|
|
165
|
+
"textDocument": {"uri": file_uri},
|
|
166
|
+
"position": {"line": line, "character": character}
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
return result
|
|
170
|
+
|
|
171
|
+
except Exception as e:
|
|
172
|
+
logger.error(f"Error getting hover info: {e}")
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
async def _send_request(self, method: str, params: Dict[str, Any]) -> Any:
|
|
176
|
+
"""Send a JSON-RPC request and wait for response."""
|
|
177
|
+
if not self.process or not self.process.stdin:
|
|
178
|
+
raise RuntimeError("LSP process not running")
|
|
179
|
+
|
|
180
|
+
self.msg_id += 1
|
|
181
|
+
msg_id = self.msg_id
|
|
182
|
+
|
|
183
|
+
message = {
|
|
184
|
+
"jsonrpc": "2.0",
|
|
185
|
+
"id": msg_id,
|
|
186
|
+
"method": method,
|
|
187
|
+
"params": params
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
# Create future for response
|
|
191
|
+
future: asyncio.Future = asyncio.Future()
|
|
192
|
+
self.pending_requests[msg_id] = future
|
|
193
|
+
|
|
194
|
+
# Send message
|
|
195
|
+
content = json.dumps(message)
|
|
196
|
+
headers = f"Content-Length: {len(content)}\r\n\r\n"
|
|
197
|
+
try:
|
|
198
|
+
self.process.stdin.write((headers + content).encode())
|
|
199
|
+
self.process.stdin.flush()
|
|
200
|
+
except Exception as e:
|
|
201
|
+
del self.pending_requests[msg_id]
|
|
202
|
+
raise RuntimeError(f"Failed to send LSP request: {e}")
|
|
203
|
+
|
|
204
|
+
# Wait for response with timeout
|
|
205
|
+
try:
|
|
206
|
+
result = await asyncio.wait_for(future, timeout=5.0)
|
|
207
|
+
return result
|
|
208
|
+
except asyncio.TimeoutError:
|
|
209
|
+
del self.pending_requests[msg_id]
|
|
210
|
+
logger.warning(f"LSP request timeout for method: {method}")
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
async def _send_notification(self, method: str, params: Dict[str, Any]):
|
|
214
|
+
"""Send a notification (no response expected)."""
|
|
215
|
+
if not self.process or not self.process.stdin:
|
|
216
|
+
raise RuntimeError("LSP process not running")
|
|
217
|
+
|
|
218
|
+
message = {
|
|
219
|
+
"jsonrpc": "2.0",
|
|
220
|
+
"method": method,
|
|
221
|
+
"params": params
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
content = json.dumps(message)
|
|
225
|
+
headers = f"Content-Length: {len(content)}\r\n\r\n"
|
|
226
|
+
self.process.stdin.write((headers + content).encode())
|
|
227
|
+
self.process.stdin.flush()
|
|
228
|
+
|
|
229
|
+
async def _read_responses(self):
|
|
230
|
+
"""Background task to read LSP responses."""
|
|
231
|
+
if not self.process or not self.process.stdout:
|
|
232
|
+
return
|
|
233
|
+
|
|
234
|
+
buffer = b""
|
|
235
|
+
|
|
236
|
+
try:
|
|
237
|
+
while self.process.poll() is None:
|
|
238
|
+
# Read data
|
|
239
|
+
chunk = await asyncio.get_event_loop().run_in_executor(
|
|
240
|
+
None, self.process.stdout.read, 1024
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
if not chunk:
|
|
244
|
+
break
|
|
245
|
+
|
|
246
|
+
buffer += chunk
|
|
247
|
+
|
|
248
|
+
# Process complete messages
|
|
249
|
+
while b"\r\n\r\n" in buffer:
|
|
250
|
+
header_end = buffer.index(b"\r\n\r\n")
|
|
251
|
+
headers = buffer[:header_end].decode('utf-8')
|
|
252
|
+
buffer = buffer[header_end + 4:]
|
|
253
|
+
|
|
254
|
+
# Parse Content-Length
|
|
255
|
+
content_length = 0
|
|
256
|
+
for line in headers.split("\r\n"):
|
|
257
|
+
if line.startswith("Content-Length:"):
|
|
258
|
+
content_length = int(line.split(":")[1].strip())
|
|
259
|
+
break
|
|
260
|
+
|
|
261
|
+
# Wait for complete message
|
|
262
|
+
while len(buffer) < content_length:
|
|
263
|
+
chunk = await asyncio.get_event_loop().run_in_executor(
|
|
264
|
+
None, self.process.stdout.read, content_length - len(buffer)
|
|
265
|
+
)
|
|
266
|
+
if not chunk:
|
|
267
|
+
break
|
|
268
|
+
buffer += chunk
|
|
269
|
+
|
|
270
|
+
# Parse message
|
|
271
|
+
message_data = buffer[:content_length]
|
|
272
|
+
buffer = buffer[content_length:]
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
message = json.loads(message_data.decode('utf-8'))
|
|
276
|
+
await self._handle_message(message)
|
|
277
|
+
except json.JSONDecodeError as e:
|
|
278
|
+
logger.error(f"Failed to parse LSP message: {e}")
|
|
279
|
+
|
|
280
|
+
except Exception as e:
|
|
281
|
+
logger.error(f"Error reading LSP responses: {e}")
|
|
282
|
+
|
|
283
|
+
async def _handle_message(self, message: Dict[str, Any]):
|
|
284
|
+
"""Handle incoming LSP message."""
|
|
285
|
+
# Response to our request
|
|
286
|
+
if "id" in message and message["id"] in self.pending_requests:
|
|
287
|
+
future = self.pending_requests.pop(message["id"])
|
|
288
|
+
if "result" in message:
|
|
289
|
+
future.set_result(message["result"])
|
|
290
|
+
elif "error" in message:
|
|
291
|
+
future.set_exception(RuntimeError(message["error"]))
|
|
292
|
+
else:
|
|
293
|
+
future.set_result(None)
|
|
294
|
+
|
|
295
|
+
# Notification from server (e.g., diagnostics)
|
|
296
|
+
elif "method" in message:
|
|
297
|
+
# We can handle server notifications here if needed
|
|
298
|
+
pass
|
|
299
|
+
|
|
300
|
+
async def shutdown(self):
|
|
301
|
+
"""Shutdown the language server."""
|
|
302
|
+
if not self.initialized:
|
|
303
|
+
return
|
|
304
|
+
|
|
305
|
+
try:
|
|
306
|
+
# Close all documents
|
|
307
|
+
for uri in list(self.documents.keys()):
|
|
308
|
+
await self._send_notification("textDocument/didClose", {
|
|
309
|
+
"textDocument": {"uri": uri}
|
|
310
|
+
})
|
|
311
|
+
|
|
312
|
+
# Shutdown
|
|
313
|
+
await self._send_request("shutdown", {})
|
|
314
|
+
await self._send_notification("exit", {})
|
|
315
|
+
|
|
316
|
+
# Cancel reader task
|
|
317
|
+
if self._reader_task:
|
|
318
|
+
self._reader_task.cancel()
|
|
319
|
+
try:
|
|
320
|
+
await self._reader_task
|
|
321
|
+
except asyncio.CancelledError:
|
|
322
|
+
pass
|
|
323
|
+
|
|
324
|
+
# Terminate process
|
|
325
|
+
if self.process:
|
|
326
|
+
self.process.terminate()
|
|
327
|
+
try:
|
|
328
|
+
self.process.wait(timeout=5)
|
|
329
|
+
except subprocess.TimeoutExpired:
|
|
330
|
+
self.process.kill()
|
|
331
|
+
|
|
332
|
+
logger.info("LSP service shutdown complete")
|
|
333
|
+
|
|
334
|
+
except Exception as e:
|
|
335
|
+
logger.error(f"Error during LSP shutdown: {e}")
|
|
@@ -96,6 +96,23 @@ class PodKernelManager:
|
|
|
96
96
|
"""
|
|
97
97
|
import sys
|
|
98
98
|
|
|
99
|
+
# Check if already connected to this pod
|
|
100
|
+
if self.pod and self.pod.id == pod_id:
|
|
101
|
+
# Check if tunnel is still alive
|
|
102
|
+
if self.ssh_tunnel_proc and self.ssh_tunnel_proc.poll() is None:
|
|
103
|
+
return {
|
|
104
|
+
"status": "ok",
|
|
105
|
+
"message": f"Already connected to pod {pod_id}"
|
|
106
|
+
}
|
|
107
|
+
# Tunnel died, clean up and reconnect
|
|
108
|
+
print(f"[POD MANAGER] Existing tunnel dead, reconnecting...", file=sys.stderr, flush=True)
|
|
109
|
+
await self.disconnect()
|
|
110
|
+
|
|
111
|
+
# If connected to different pod, disconnect first
|
|
112
|
+
if self.pod and self.pod.id != pod_id:
|
|
113
|
+
print(f"[POD MANAGER] Disconnecting from {self.pod.id} to connect to {pod_id}", file=sys.stderr, flush=True)
|
|
114
|
+
await self.disconnect()
|
|
115
|
+
|
|
99
116
|
self.pod = await self.pi_service.get_pod(pod_id)
|
|
100
117
|
|
|
101
118
|
print(f"[POD MANAGER] Pod status: {self.pod.status}", file=sys.stderr, flush=True)
|
|
@@ -144,12 +161,16 @@ class PodKernelManager:
|
|
|
144
161
|
print(f"[POD MANAGER] Parsed SSH host: {ssh_host}, port: {ssh_port}", file=sys.stderr, flush=True)
|
|
145
162
|
|
|
146
163
|
#deploy worker code to pod
|
|
164
|
+
print(f"[POD MANAGER] Deploying worker code to pod...", file=sys.stderr, flush=True)
|
|
147
165
|
deploy_result = await self._deploy_worker(ssh_host, ssh_port)
|
|
166
|
+
print(f"[POD MANAGER] Deploy result: {deploy_result}", file=sys.stderr, flush=True)
|
|
148
167
|
if deploy_result.get("status") == "error":
|
|
149
168
|
return deploy_result
|
|
150
169
|
|
|
151
170
|
#create ssh tunnel for ZMQ ports
|
|
171
|
+
print(f"[POD MANAGER] Creating SSH tunnel...", file=sys.stderr, flush=True)
|
|
152
172
|
tunnel_result = await self._create_ssh_tunnel(ssh_host, ssh_port)
|
|
173
|
+
print(f"[POD MANAGER] Tunnel result: {tunnel_result}", file=sys.stderr, flush=True)
|
|
153
174
|
if tunnel_result.get("status") == "error":
|
|
154
175
|
return tunnel_result
|
|
155
176
|
|
|
@@ -159,6 +180,11 @@ class PodKernelManager:
|
|
|
159
180
|
await self.disconnect()
|
|
160
181
|
return worker_result
|
|
161
182
|
|
|
183
|
+
# Note: Worker may take a few seconds to start and install matplotlib
|
|
184
|
+
# The connection should work even if verification fails
|
|
185
|
+
print(f"[POD MANAGER] Remote worker is starting (matplotlib install may take a few seconds)", file=sys.stderr, flush=True)
|
|
186
|
+
print(f"[POD MANAGER] Connection established - try running code in ~5 seconds", file=sys.stderr, flush=True)
|
|
187
|
+
|
|
162
188
|
return {
|
|
163
189
|
"status": "ok",
|
|
164
190
|
"message": f"Connected to pod {pod_id}",
|
|
@@ -253,7 +279,7 @@ class PodKernelManager:
|
|
|
253
279
|
(
|
|
254
280
|
"cd /tmp && "
|
|
255
281
|
"tar -xzf morecompute.tar.gz && "
|
|
256
|
-
"pip install --quiet pyzmq && "
|
|
282
|
+
"pip install --quiet pyzmq matplotlib && "
|
|
257
283
|
"echo 'Deployment complete'"
|
|
258
284
|
)
|
|
259
285
|
])
|
|
@@ -315,19 +341,27 @@ class PodKernelManager:
|
|
|
315
341
|
|
|
316
342
|
self.ssh_tunnel_proc = subprocess.Popen(
|
|
317
343
|
tunnel_cmd,
|
|
318
|
-
stdout=subprocess.
|
|
319
|
-
stderr=subprocess.
|
|
344
|
+
stdout=subprocess.PIPE,
|
|
345
|
+
stderr=subprocess.PIPE
|
|
320
346
|
)
|
|
321
347
|
|
|
322
348
|
# Wait briefly for tunnel to establish
|
|
323
349
|
await asyncio.sleep(2)
|
|
324
350
|
|
|
351
|
+
# Check if process is still running
|
|
352
|
+
if self.ssh_tunnel_proc is None:
|
|
353
|
+
return {
|
|
354
|
+
"status": "error",
|
|
355
|
+
"message": "SSH tunnel process is None"
|
|
356
|
+
}
|
|
325
357
|
if self.ssh_tunnel_proc.poll() is not None:
|
|
358
|
+
# Process died, get error output
|
|
359
|
+
stdout, stderr = self.ssh_tunnel_proc.communicate()
|
|
360
|
+
error_msg = stderr.decode('utf-8') if stderr else "No error output"
|
|
326
361
|
return {
|
|
327
362
|
"status": "error",
|
|
328
|
-
"message": "SSH tunnel failed to establish"
|
|
363
|
+
"message": f"SSH tunnel failed to establish: {error_msg}"
|
|
329
364
|
}
|
|
330
|
-
|
|
331
365
|
return {
|
|
332
366
|
"status": "ok",
|
|
333
367
|
"message": "SSH tunnel created",
|
|
@@ -335,6 +369,7 @@ class PodKernelManager:
|
|
|
335
369
|
}
|
|
336
370
|
|
|
337
371
|
except Exception as e:
|
|
372
|
+
print(f"[POD MANAGER] Exception creating tunnel: {e}", file=sys.stderr, flush=True)
|
|
338
373
|
return {
|
|
339
374
|
"status": "error",
|
|
340
375
|
"message": f"Tunnel creation error: {str(e)}"
|
|
@@ -352,7 +387,10 @@ class PodKernelManager:
|
|
|
352
387
|
dict with worker start status
|
|
353
388
|
"""
|
|
354
389
|
try:
|
|
390
|
+
print(f"[POD MANAGER] Starting remote worker on {ssh_host}:{ssh_port}", file=sys.stderr, flush=True)
|
|
391
|
+
|
|
355
392
|
# Start worker in background on remote pod
|
|
393
|
+
# Use 'python3' instead of sys.executable since remote pod may have different Python path
|
|
356
394
|
ssh_key = self._get_ssh_key()
|
|
357
395
|
worker_cmd = ["ssh", "-p", ssh_port]
|
|
358
396
|
|
|
@@ -365,14 +403,14 @@ class PodKernelManager:
|
|
|
365
403
|
"-o", "BatchMode=yes",
|
|
366
404
|
"-o", "ConnectTimeout=10",
|
|
367
405
|
f"root@{ssh_host}",
|
|
406
|
+
"sh", "-c",
|
|
368
407
|
(
|
|
369
|
-
f"cd /tmp && "
|
|
370
|
-
f"
|
|
371
|
-
f"
|
|
372
|
-
f"
|
|
373
|
-
f"
|
|
374
|
-
f"
|
|
375
|
-
f"echo $!"
|
|
408
|
+
f"'cd /tmp && "
|
|
409
|
+
f"MC_ZMQ_CMD_ADDR=tcp://0.0.0.0:{self.remote_cmd_port} "
|
|
410
|
+
f"MC_ZMQ_PUB_ADDR=tcp://0.0.0.0:{self.remote_pub_port} "
|
|
411
|
+
f"nohup python3 /tmp/morecompute/execution/worker.py "
|
|
412
|
+
f">/tmp/worker.log 2>&1 </dev/null & "
|
|
413
|
+
f"echo $!'"
|
|
376
414
|
)
|
|
377
415
|
])
|
|
378
416
|
|
|
@@ -390,10 +428,13 @@ class PodKernelManager:
|
|
|
390
428
|
}
|
|
391
429
|
|
|
392
430
|
remote_pid = result.stdout.strip()
|
|
431
|
+
print(f"[POD MANAGER] Remote worker PID: {remote_pid}", file=sys.stderr, flush=True)
|
|
393
432
|
|
|
394
433
|
# Wait for worker to be ready
|
|
395
434
|
await asyncio.sleep(2)
|
|
396
435
|
|
|
436
|
+
print(f"[POD MANAGER] Remote worker should be ready now", file=sys.stderr, flush=True)
|
|
437
|
+
|
|
397
438
|
return {
|
|
398
439
|
"status": "ok",
|
|
399
440
|
"message": "Remote worker started",
|
|
@@ -458,6 +499,64 @@ class PodKernelManager:
|
|
|
458
499
|
"messages": messages
|
|
459
500
|
}
|
|
460
501
|
|
|
502
|
+
async def execute_ssh_command(self, command: str) -> tuple[str, str, int]:
|
|
503
|
+
"""
|
|
504
|
+
Execute a command on the remote pod via SSH.
|
|
505
|
+
|
|
506
|
+
args:
|
|
507
|
+
command: The command to execute
|
|
508
|
+
|
|
509
|
+
returns:
|
|
510
|
+
tuple of (stdout, stderr, return_code)
|
|
511
|
+
"""
|
|
512
|
+
if not self.pod or not self.pod.sshConnection:
|
|
513
|
+
raise RuntimeError("No active pod connection")
|
|
514
|
+
|
|
515
|
+
# Parse SSH connection string to get host and port
|
|
516
|
+
ssh_parts = self.pod.sshConnection.split()
|
|
517
|
+
host_part = None
|
|
518
|
+
port = "22" # default SSH port
|
|
519
|
+
|
|
520
|
+
for part in ssh_parts:
|
|
521
|
+
if "@" in part:
|
|
522
|
+
host_part = part
|
|
523
|
+
if part == "-p" and ssh_parts.index(part) + 1 < len(ssh_parts):
|
|
524
|
+
port = ssh_parts[ssh_parts.index(part) + 1]
|
|
525
|
+
|
|
526
|
+
if not host_part:
|
|
527
|
+
raise RuntimeError(f"Invalid SSH connection format: {self.pod.sshConnection}")
|
|
528
|
+
|
|
529
|
+
# Get SSH key
|
|
530
|
+
ssh_key = self._get_ssh_key()
|
|
531
|
+
if not ssh_key:
|
|
532
|
+
raise RuntimeError("SSH key not found. Please configure MORECOMPUTE_SSH_KEY or add key to ~/.ssh/")
|
|
533
|
+
|
|
534
|
+
# Build SSH command
|
|
535
|
+
ssh_cmd = [
|
|
536
|
+
"ssh",
|
|
537
|
+
"-i", ssh_key,
|
|
538
|
+
"-p", port,
|
|
539
|
+
"-o", "StrictHostKeyChecking=no",
|
|
540
|
+
"-o", "UserKnownHostsFile=/dev/null",
|
|
541
|
+
"-o", "LogLevel=ERROR",
|
|
542
|
+
host_part,
|
|
543
|
+
command
|
|
544
|
+
]
|
|
545
|
+
|
|
546
|
+
# Execute command
|
|
547
|
+
proc = await asyncio.create_subprocess_exec(
|
|
548
|
+
*ssh_cmd,
|
|
549
|
+
stdout=asyncio.subprocess.PIPE,
|
|
550
|
+
stderr=asyncio.subprocess.PIPE
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
stdout, stderr = await proc.communicate()
|
|
554
|
+
return (
|
|
555
|
+
stdout.decode('utf-8', errors='replace'),
|
|
556
|
+
stderr.decode('utf-8', errors='replace'),
|
|
557
|
+
proc.returncode or 0
|
|
558
|
+
)
|
|
559
|
+
|
|
461
560
|
async def get_status(self) -> dict[str, object]:
|
|
462
561
|
"""
|
|
463
562
|
Get current connection status.
|
|
@@ -465,7 +564,10 @@ class PodKernelManager:
|
|
|
465
564
|
returns:
|
|
466
565
|
dict with status information
|
|
467
566
|
"""
|
|
468
|
-
|
|
567
|
+
# Cache pod reference to avoid race condition with disconnect()
|
|
568
|
+
pod = self.pod
|
|
569
|
+
|
|
570
|
+
if not pod:
|
|
469
571
|
return {
|
|
470
572
|
"connected": False,
|
|
471
573
|
"pod": None
|
|
@@ -478,7 +580,7 @@ class PodKernelManager:
|
|
|
478
580
|
|
|
479
581
|
# Get updated pod info
|
|
480
582
|
try:
|
|
481
|
-
updated_pod = await self.pi_service.get_pod(
|
|
583
|
+
updated_pod = await self.pi_service.get_pod(pod.id)
|
|
482
584
|
pod_status = updated_pod.status
|
|
483
585
|
except Exception:
|
|
484
586
|
pod_status = "unknown"
|
|
@@ -486,13 +588,13 @@ class PodKernelManager:
|
|
|
486
588
|
return {
|
|
487
589
|
"connected": True,
|
|
488
590
|
"pod": {
|
|
489
|
-
"id":
|
|
490
|
-
"name":
|
|
591
|
+
"id": pod.id,
|
|
592
|
+
"name": pod.name,
|
|
491
593
|
"status": pod_status,
|
|
492
|
-
"gpu_type":
|
|
493
|
-
"gpu_count":
|
|
494
|
-
"price_hr":
|
|
495
|
-
"ssh_connection":
|
|
594
|
+
"gpu_type": pod.gpuName,
|
|
595
|
+
"gpu_count": pod.gpuCount,
|
|
596
|
+
"price_hr": pod.priceHr,
|
|
597
|
+
"ssh_connection": pod.sshConnection
|
|
496
598
|
},
|
|
497
599
|
"tunnel": {
|
|
498
600
|
"alive": tunnel_alive,
|