@draht/pods 2026.3.2-2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +511 -0
  2. package/dist/cli.d.ts +3 -0
  3. package/dist/cli.d.ts.map +1 -0
  4. package/dist/cli.js +346 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/commands/models.d.ts +39 -0
  7. package/dist/commands/models.d.ts.map +1 -0
  8. package/dist/commands/models.js +658 -0
  9. package/dist/commands/models.js.map +1 -0
  10. package/dist/commands/pods.d.ts +21 -0
  11. package/dist/commands/pods.d.ts.map +1 -0
  12. package/dist/commands/pods.js +175 -0
  13. package/dist/commands/pods.js.map +1 -0
  14. package/dist/commands/prompt.d.ts +7 -0
  15. package/dist/commands/prompt.d.ts.map +1 -0
  16. package/dist/commands/prompt.js +54 -0
  17. package/dist/commands/prompt.js.map +1 -0
  18. package/dist/config.d.ts +11 -0
  19. package/dist/config.d.ts.map +1 -0
  20. package/dist/config.js +74 -0
  21. package/dist/config.js.map +1 -0
  22. package/dist/index.d.ts +2 -0
  23. package/dist/index.d.ts.map +1 -0
  24. package/dist/index.js +3 -0
  25. package/dist/index.js.map +1 -0
  26. package/dist/model-configs.d.ts +22 -0
  27. package/dist/model-configs.d.ts.map +1 -0
  28. package/dist/model-configs.js +75 -0
  29. package/dist/model-configs.js.map +1 -0
  30. package/dist/models.json +295 -0
  31. package/dist/scripts/model_run.sh +83 -0
  32. package/dist/scripts/pod_setup.sh +336 -0
  33. package/dist/ssh.d.ts +24 -0
  34. package/dist/ssh.d.ts.map +1 -0
  35. package/dist/ssh.js +115 -0
  36. package/dist/ssh.js.map +1 -0
  37. package/dist/types.d.ts +23 -0
  38. package/dist/types.d.ts.map +1 -0
  39. package/dist/types.js +3 -0
  40. package/dist/types.js.map +1 -0
  41. package/package.json +40 -0
  42. package/scripts/model_run.sh +83 -0
  43. package/scripts/pod_setup.sh +336 -0
@@ -0,0 +1,658 @@
1
+ import chalk from "chalk";
2
+ import { spawn } from "child_process";
3
+ import { readFileSync } from "fs";
4
+ import { dirname, join } from "path";
5
+ import { fileURLToPath } from "url";
6
+ import { getActivePod, loadConfig, saveConfig } from "../config.js";
7
+ import { getModelConfig, getModelName, isKnownModel } from "../model-configs.js";
8
+ import { sshExec } from "../ssh.js";
9
+ /**
10
+ * Get the pod to use (active or override)
11
+ */
12
+ const getPod = (podOverride) => {
13
+ if (podOverride) {
14
+ const config = loadConfig();
15
+ const pod = config.pods[podOverride];
16
+ if (!pod) {
17
+ console.error(chalk.red(`Pod '${podOverride}' not found`));
18
+ process.exit(1);
19
+ }
20
+ return { name: podOverride, pod };
21
+ }
22
+ const active = getActivePod();
23
+ if (!active) {
24
+ console.error(chalk.red("No active pod. Use 'pi pods active <name>' to set one."));
25
+ process.exit(1);
26
+ }
27
+ return active;
28
+ };
29
+ /**
30
+ * Find next available port starting from 8001
31
+ */
32
+ const getNextPort = (pod) => {
33
+ const usedPorts = Object.values(pod.models).map((m) => m.port);
34
+ let port = 8001;
35
+ while (usedPorts.includes(port)) {
36
+ port++;
37
+ }
38
+ return port;
39
+ };
40
+ /**
41
+ * Select GPUs for model deployment (round-robin)
42
+ */
43
+ const selectGPUs = (pod, count = 1) => {
44
+ if (count === pod.gpus.length) {
45
+ // Use all GPUs
46
+ return pod.gpus.map((g) => g.id);
47
+ }
48
+ // Count GPU usage across all models
49
+ const gpuUsage = new Map();
50
+ for (const gpu of pod.gpus) {
51
+ gpuUsage.set(gpu.id, 0);
52
+ }
53
+ for (const model of Object.values(pod.models)) {
54
+ for (const gpuId of model.gpu) {
55
+ gpuUsage.set(gpuId, (gpuUsage.get(gpuId) || 0) + 1);
56
+ }
57
+ }
58
+ // Sort GPUs by usage (least used first)
59
+ const sortedGPUs = Array.from(gpuUsage.entries())
60
+ .sort((a, b) => a[1] - b[1])
61
+ .map((entry) => entry[0]);
62
+ // Return the least used GPUs
63
+ return sortedGPUs.slice(0, count);
64
+ };
65
+ /**
66
+ * Start a model
67
+ */
68
+ export const startModel = async (modelId, name, options) => {
69
+ const { name: podName, pod } = getPod(options.pod);
70
+ // Validation
71
+ if (!pod.modelsPath) {
72
+ console.error(chalk.red("Pod does not have a models path configured"));
73
+ process.exit(1);
74
+ }
75
+ if (pod.models[name]) {
76
+ console.error(chalk.red(`Model '${name}' already exists on pod '${podName}'`));
77
+ process.exit(1);
78
+ }
79
+ const port = getNextPort(pod);
80
+ // Determine GPU allocation and vLLM args
81
+ let gpus = [];
82
+ let vllmArgs = [];
83
+ let modelConfig = null;
84
+ if (options.vllmArgs?.length) {
85
+ // Custom args override everything
86
+ vllmArgs = options.vllmArgs;
87
+ console.log(chalk.gray("Using custom vLLM args, GPU allocation managed by vLLM"));
88
+ }
89
+ else if (isKnownModel(modelId)) {
90
+ // Handle --gpus parameter for known models
91
+ if (options.gpus) {
92
+ // Validate GPU count
93
+ if (options.gpus > pod.gpus.length) {
94
+ console.error(chalk.red(`Error: Requested ${options.gpus} GPUs but pod only has ${pod.gpus.length}`));
95
+ process.exit(1);
96
+ }
97
+ // Try to find config for requested GPU count
98
+ modelConfig = getModelConfig(modelId, pod.gpus, options.gpus);
99
+ if (modelConfig) {
100
+ gpus = selectGPUs(pod, options.gpus);
101
+ vllmArgs = [...(modelConfig.args || [])];
102
+ }
103
+ else {
104
+ console.error(chalk.red(`Model '${getModelName(modelId)}' does not have a configuration for ${options.gpus} GPU(s)`));
105
+ console.error(chalk.yellow("Available configurations:"));
106
+ // Show available configurations
107
+ for (let gpuCount = 1; gpuCount <= pod.gpus.length; gpuCount++) {
108
+ const config = getModelConfig(modelId, pod.gpus, gpuCount);
109
+ if (config) {
110
+ console.error(chalk.gray(` - ${gpuCount} GPU(s)`));
111
+ }
112
+ }
113
+ process.exit(1);
114
+ }
115
+ }
116
+ else {
117
+ // Find best config for this hardware (original behavior)
118
+ for (let gpuCount = pod.gpus.length; gpuCount >= 1; gpuCount--) {
119
+ modelConfig = getModelConfig(modelId, pod.gpus, gpuCount);
120
+ if (modelConfig) {
121
+ gpus = selectGPUs(pod, gpuCount);
122
+ vllmArgs = [...(modelConfig.args || [])];
123
+ break;
124
+ }
125
+ }
126
+ if (!modelConfig) {
127
+ console.error(chalk.red(`Model '${getModelName(modelId)}' not compatible with this pod's GPUs`));
128
+ process.exit(1);
129
+ }
130
+ }
131
+ }
132
+ else {
133
+ // Unknown model
134
+ if (options.gpus) {
135
+ console.error(chalk.red("Error: --gpus can only be used with predefined models"));
136
+ console.error(chalk.yellow("For custom models, use --vllm with tensor-parallel-size or similar arguments"));
137
+ process.exit(1);
138
+ }
139
+ // Single GPU default
140
+ gpus = selectGPUs(pod, 1);
141
+ console.log(chalk.gray("Unknown model, defaulting to single GPU"));
142
+ }
143
+ // Apply memory/context overrides
144
+ if (!options.vllmArgs?.length) {
145
+ if (options.memory) {
146
+ const fraction = parseFloat(options.memory.replace("%", "")) / 100;
147
+ vllmArgs = vllmArgs.filter((arg) => !arg.includes("gpu-memory-utilization"));
148
+ vllmArgs.push("--gpu-memory-utilization", String(fraction));
149
+ }
150
+ if (options.context) {
151
+ const contextSizes = {
152
+ "4k": 4096,
153
+ "8k": 8192,
154
+ "16k": 16384,
155
+ "32k": 32768,
156
+ "64k": 65536,
157
+ "128k": 131072,
158
+ };
159
+ const maxTokens = contextSizes[options.context.toLowerCase()] || parseInt(options.context, 10);
160
+ vllmArgs = vllmArgs.filter((arg) => !arg.includes("max-model-len"));
161
+ vllmArgs.push("--max-model-len", String(maxTokens));
162
+ }
163
+ }
164
+ // Show what we're doing
165
+ console.log(chalk.green(`Starting model '${name}' on pod '${podName}'...`));
166
+ console.log(`Model: ${modelId}`);
167
+ console.log(`Port: ${port}`);
168
+ console.log(`GPU(s): ${gpus.length ? gpus.join(", ") : "Managed by vLLM"}`);
169
+ if (modelConfig?.notes)
170
+ console.log(chalk.yellow(`Note: ${modelConfig.notes}`));
171
+ console.log("");
172
+ // Read and customize model_run.sh script with our values
173
+ const scriptPath = join(dirname(fileURLToPath(import.meta.url)), "../../scripts/model_run.sh");
174
+ let scriptContent = readFileSync(scriptPath, "utf-8");
175
+ // Replace placeholders - no escaping needed, heredoc with 'EOF' is literal
176
+ scriptContent = scriptContent
177
+ .replace("{{MODEL_ID}}", modelId)
178
+ .replace("{{NAME}}", name)
179
+ .replace("{{PORT}}", String(port))
180
+ .replace("{{VLLM_ARGS}}", vllmArgs.join(" "));
181
+ // Upload customized script
182
+ await sshExec(pod.ssh, `cat > /tmp/model_run_${name}.sh << 'EOF'
183
+ ${scriptContent}
184
+ EOF
185
+ chmod +x /tmp/model_run_${name}.sh`);
186
+ // Prepare environment
187
+ const env = [
188
+ `HF_TOKEN='${process.env.HF_TOKEN}'`,
189
+ `PI_API_KEY='${process.env.PI_API_KEY}'`,
190
+ `HF_HUB_ENABLE_HF_TRANSFER=1`,
191
+ `VLLM_NO_USAGE_STATS=1`,
192
+ `PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True`,
193
+ `FORCE_COLOR=1`,
194
+ `TERM=xterm-256color`,
195
+ ...(gpus.length === 1 ? [`CUDA_VISIBLE_DEVICES=${gpus[0]}`] : []),
196
+ ...Object.entries(modelConfig?.env || {}).map(([k, v]) => `${k}='${v}'`),
197
+ ]
198
+ .map((e) => `export ${e}`)
199
+ .join("\n");
200
+ // Start the model runner with script command for pseudo-TTY (preserves colors)
201
+ // Note: We use script to preserve colors and create a log file
202
+ // setsid creates a new session so it survives SSH disconnection
203
+ const startCmd = `
204
+ ${env}
205
+ mkdir -p ~/.vllm_logs
206
+ # Create a wrapper that monitors the script command
207
+ cat > /tmp/model_wrapper_${name}.sh << 'WRAPPER'
208
+ #!/bin/bash
209
+ script -q -f -c "/tmp/model_run_${name}.sh" ~/.vllm_logs/${name}.log
210
+ exit_code=$?
211
+ echo "Script exited with code $exit_code" >> ~/.vllm_logs/${name}.log
212
+ exit $exit_code
213
+ WRAPPER
214
+ chmod +x /tmp/model_wrapper_${name}.sh
215
+ setsid /tmp/model_wrapper_${name}.sh </dev/null >/dev/null 2>&1 &
216
+ echo $!
217
+ exit 0
218
+ `;
219
+ const pidResult = await sshExec(pod.ssh, startCmd);
220
+ const pid = parseInt(pidResult.stdout.trim(), 10);
221
+ if (!pid) {
222
+ console.error(chalk.red("Failed to start model runner"));
223
+ process.exit(1);
224
+ }
225
+ // Save to config
226
+ const config = loadConfig();
227
+ config.pods[podName].models[name] = { model: modelId, port, gpu: gpus, pid };
228
+ saveConfig(config);
229
+ console.log(`Model runner started with PID: ${pid}`);
230
+ console.log("Streaming logs... (waiting for startup)\n");
231
+ // Small delay to ensure log file is created
232
+ await new Promise((resolve) => setTimeout(resolve, 500));
233
+ // Stream logs with color support, watching for startup complete
234
+ const sshParts = pod.ssh.split(" ");
235
+ const sshCommand = sshParts[0]; // "ssh"
236
+ const sshArgs = sshParts.slice(1); // ["root@86.38.238.55"]
237
+ const host = sshArgs[0].split("@")[1] || "localhost";
238
+ const tailCmd = `tail -f ~/.vllm_logs/${name}.log`;
239
+ // Build the full args array for spawn
240
+ const fullArgs = [...sshArgs, tailCmd];
241
+ const logProcess = spawn(sshCommand, fullArgs, {
242
+ stdio: ["inherit", "pipe", "pipe"], // capture stdout and stderr
243
+ env: { ...process.env, FORCE_COLOR: "1" },
244
+ });
245
+ let interrupted = false;
246
+ let startupComplete = false;
247
+ let startupFailed = false;
248
+ let failureReason = "";
249
+ // Handle Ctrl+C
250
+ const sigintHandler = () => {
251
+ interrupted = true;
252
+ logProcess.kill();
253
+ };
254
+ process.on("SIGINT", sigintHandler);
255
+ // Process log output line by line
256
+ const processOutput = (data) => {
257
+ const lines = data.toString().split("\n");
258
+ for (const line of lines) {
259
+ if (line) {
260
+ console.log(line); // Echo the line to console
261
+ // Check for startup complete message
262
+ if (line.includes("Application startup complete")) {
263
+ startupComplete = true;
264
+ logProcess.kill(); // Stop tailing logs
265
+ }
266
+ // Check for failure indicators
267
+ if (line.includes("Model runner exiting with code") && !line.includes("code 0")) {
268
+ startupFailed = true;
269
+ failureReason = "Model runner failed to start";
270
+ logProcess.kill();
271
+ }
272
+ if (line.includes("Script exited with code") && !line.includes("code 0")) {
273
+ startupFailed = true;
274
+ failureReason = "Script failed to execute";
275
+ logProcess.kill();
276
+ }
277
+ if (line.includes("torch.OutOfMemoryError") || line.includes("CUDA out of memory")) {
278
+ startupFailed = true;
279
+ failureReason = "Out of GPU memory (OOM)";
280
+ // Don't kill immediately - let it show more error context
281
+ }
282
+ if (line.includes("RuntimeError: Engine core initialization failed")) {
283
+ startupFailed = true;
284
+ failureReason = "vLLM engine initialization failed";
285
+ logProcess.kill();
286
+ }
287
+ }
288
+ }
289
+ };
290
+ logProcess.stdout?.on("data", processOutput);
291
+ logProcess.stderr?.on("data", processOutput);
292
+ await new Promise((resolve) => logProcess.on("exit", resolve));
293
+ process.removeListener("SIGINT", sigintHandler);
294
+ if (startupFailed) {
295
+ // Model failed to start - clean up and report error
296
+ console.log(`\n${chalk.red(`✗ Model failed to start: ${failureReason}`)}`);
297
+ // Remove the failed model from config
298
+ const config = loadConfig();
299
+ delete config.pods[podName].models[name];
300
+ saveConfig(config);
301
+ console.log(chalk.yellow("\nModel has been removed from configuration."));
302
+ // Provide helpful suggestions based on failure reason
303
+ if (failureReason.includes("OOM") || failureReason.includes("memory")) {
304
+ console.log(`\n${chalk.bold("Suggestions:")}`);
305
+ console.log(" • Try reducing GPU memory utilization: --memory 50%");
306
+ console.log(" • Use a smaller context window: --context 4k");
307
+ console.log(" • Use a quantized version of the model (e.g., FP8)");
308
+ console.log(" • Use more GPUs with tensor parallelism");
309
+ console.log(" • Try a smaller model variant");
310
+ }
311
+ console.log(`\n${chalk.cyan(`Check full logs: pi ssh "tail -100 ~/.vllm_logs/${name}.log"`)}`);
312
+ process.exit(1);
313
+ }
314
+ else if (startupComplete) {
315
+ // Model started successfully - output connection details
316
+ console.log(`\n${chalk.green("✓ Model started successfully!")}`);
317
+ console.log(`\n${chalk.bold("Connection Details:")}`);
318
+ console.log(chalk.cyan("─".repeat(50)));
319
+ console.log(chalk.white("Base URL: ") + chalk.yellow(`http://${host}:${port}/v1`));
320
+ console.log(chalk.white("Model: ") + chalk.yellow(modelId));
321
+ console.log(chalk.white("API Key: ") + chalk.yellow(process.env.PI_API_KEY || "(not set)"));
322
+ console.log(chalk.cyan("─".repeat(50)));
323
+ console.log(`\n${chalk.bold("Export for shell:")}`);
324
+ console.log(chalk.gray(`export OPENAI_BASE_URL="http://${host}:${port}/v1"`));
325
+ console.log(chalk.gray(`export OPENAI_API_KEY="${process.env.PI_API_KEY || "your-api-key"}"`));
326
+ console.log(chalk.gray(`export OPENAI_MODEL="${modelId}"`));
327
+ console.log(`\n${chalk.bold("Example usage:")}`);
328
+ console.log(chalk.gray(`
329
+ # Python
330
+ from openai import OpenAI
331
+ client = OpenAI() # Uses env vars
332
+ response = client.chat.completions.create(
333
+ model="${modelId}",
334
+ messages=[{"role": "user", "content": "Hello!"}]
335
+ )
336
+
337
+ # CLI
338
+ curl $OPENAI_BASE_URL/chat/completions \\
339
+ -H "Authorization: Bearer $OPENAI_API_KEY" \\
340
+ -H "Content-Type: application/json" \\
341
+ -d '{"model":"${modelId}","messages":[{"role":"user","content":"Hi"}]}'`));
342
+ console.log("");
343
+ console.log(chalk.cyan(`Chat with model: pi agent ${name} "Your message"`));
344
+ console.log(chalk.cyan(`Interactive mode: pi agent ${name} -i`));
345
+ console.log(chalk.cyan(`Monitor logs: pi logs ${name}`));
346
+ console.log(chalk.cyan(`Stop model: pi stop ${name}`));
347
+ }
348
+ else if (interrupted) {
349
+ console.log(chalk.yellow("\n\nStopped monitoring. Model deployment continues in background."));
350
+ console.log(chalk.cyan(`Chat with model: pi agent ${name} "Your message"`));
351
+ console.log(chalk.cyan(`Check status: pi logs ${name}`));
352
+ console.log(chalk.cyan(`Stop model: pi stop ${name}`));
353
+ }
354
+ else {
355
+ console.log(chalk.yellow("\n\nLog stream ended. Model may still be running."));
356
+ console.log(chalk.cyan(`Chat with model: pi agent ${name} "Your message"`));
357
+ console.log(chalk.cyan(`Check status: pi logs ${name}`));
358
+ console.log(chalk.cyan(`Stop model: pi stop ${name}`));
359
+ }
360
+ };
361
+ /**
362
+ * Stop a model
363
+ */
364
+ export const stopModel = async (name, options) => {
365
+ const { name: podName, pod } = getPod(options.pod);
366
+ const model = pod.models[name];
367
+ if (!model) {
368
+ console.error(chalk.red(`Model '${name}' not found on pod '${podName}'`));
369
+ process.exit(1);
370
+ }
371
+ console.log(chalk.yellow(`Stopping model '${name}' on pod '${podName}'...`));
372
+ // Kill the script process and all its children
373
+ // Using pkill to kill the process and all children
374
+ const killCmd = `
375
+ # Kill the script process and all its children
376
+ pkill -TERM -P ${model.pid} 2>/dev/null || true
377
+ kill ${model.pid} 2>/dev/null || true
378
+ `;
379
+ await sshExec(pod.ssh, killCmd);
380
+ // Remove from config
381
+ const config = loadConfig();
382
+ delete config.pods[podName].models[name];
383
+ saveConfig(config);
384
+ console.log(chalk.green(`✓ Model '${name}' stopped`));
385
+ };
386
+ /**
387
+ * Stop all models on a pod
388
+ */
389
+ export const stopAllModels = async (options) => {
390
+ const { name: podName, pod } = getPod(options.pod);
391
+ const modelNames = Object.keys(pod.models);
392
+ if (modelNames.length === 0) {
393
+ console.log(`No models running on pod '${podName}'`);
394
+ return;
395
+ }
396
+ console.log(chalk.yellow(`Stopping ${modelNames.length} model(s) on pod '${podName}'...`));
397
+ // Kill all script processes and their children
398
+ const pids = Object.values(pod.models).map((m) => m.pid);
399
+ const killCmd = `
400
+ for PID in ${pids.join(" ")}; do
401
+ pkill -TERM -P $PID 2>/dev/null || true
402
+ kill $PID 2>/dev/null || true
403
+ done
404
+ `;
405
+ await sshExec(pod.ssh, killCmd);
406
+ // Clear all models from config
407
+ const config = loadConfig();
408
+ config.pods[podName].models = {};
409
+ saveConfig(config);
410
+ console.log(chalk.green(`✓ Stopped all models: ${modelNames.join(", ")}`));
411
+ };
412
+ /**
413
+ * List all models
414
+ */
415
+ export const listModels = async (options) => {
416
+ const { name: podName, pod } = getPod(options.pod);
417
+ const modelNames = Object.keys(pod.models);
418
+ if (modelNames.length === 0) {
419
+ console.log(`No models running on pod '${podName}'`);
420
+ return;
421
+ }
422
+ // Get pod SSH host for URL display
423
+ const sshParts = pod.ssh.split(" ");
424
+ const host = sshParts.find((p) => p.includes("@"))?.split("@")[1] || "unknown";
425
+ console.log(`Models on pod '${chalk.bold(podName)}':`);
426
+ for (const name of modelNames) {
427
+ const model = pod.models[name];
428
+ const gpuStr = model.gpu.length > 1
429
+ ? `GPUs ${model.gpu.join(",")}`
430
+ : model.gpu.length === 1
431
+ ? `GPU ${model.gpu[0]}`
432
+ : "GPU unknown";
433
+ console.log(` ${chalk.green(name)} - Port ${model.port} - ${gpuStr} - PID ${model.pid}`);
434
+ console.log(` Model: ${chalk.gray(model.model)}`);
435
+ console.log(` URL: ${chalk.cyan(`http://${host}:${model.port}/v1`)}`);
436
+ }
437
+ // Optionally verify processes are still running
438
+ console.log("");
439
+ console.log("Verifying processes...");
440
+ let anyDead = false;
441
+ for (const name of modelNames) {
442
+ const model = pod.models[name];
443
+ // Check both the wrapper process and if vLLM is responding
444
+ const checkCmd = `
445
+ # Check if wrapper process exists
446
+ if ps -p ${model.pid} > /dev/null 2>&1; then
447
+ # Process exists, now check if vLLM is responding
448
+ if curl -s -f http://localhost:${model.port}/health > /dev/null 2>&1; then
449
+ echo "running"
450
+ else
451
+ # Check if it's still starting up
452
+ if tail -n 20 ~/.vllm_logs/${name}.log 2>/dev/null | grep -q "ERROR\\|Failed\\|Cuda error\\|died"; then
453
+ echo "crashed"
454
+ else
455
+ echo "starting"
456
+ fi
457
+ fi
458
+ else
459
+ echo "dead"
460
+ fi
461
+ `;
462
+ const result = await sshExec(pod.ssh, checkCmd);
463
+ const status = result.stdout.trim();
464
+ if (status === "dead") {
465
+ console.log(chalk.red(` ${name}: Process ${model.pid} is not running`));
466
+ anyDead = true;
467
+ }
468
+ else if (status === "crashed") {
469
+ console.log(chalk.red(` ${name}: vLLM crashed (check logs with 'pi logs ${name}')`));
470
+ anyDead = true;
471
+ }
472
+ else if (status === "starting") {
473
+ console.log(chalk.yellow(` ${name}: Still starting up...`));
474
+ }
475
+ }
476
+ if (anyDead) {
477
+ console.log("");
478
+ console.log(chalk.yellow("Some models are not running. Clean up with:"));
479
+ console.log(chalk.cyan(" pi stop <name>"));
480
+ }
481
+ else {
482
+ console.log(chalk.green("✓ All processes verified"));
483
+ }
484
+ };
485
+ /**
486
+ * View model logs
487
+ */
488
+ export const viewLogs = async (name, options) => {
489
+ const { name: podName, pod } = getPod(options.pod);
490
+ const model = pod.models[name];
491
+ if (!model) {
492
+ console.error(chalk.red(`Model '${name}' not found on pod '${podName}'`));
493
+ process.exit(1);
494
+ }
495
+ console.log(chalk.green(`Streaming logs for '${name}' on pod '${podName}'...`));
496
+ console.log(chalk.gray("Press Ctrl+C to stop"));
497
+ console.log("");
498
+ // Stream logs with color preservation
499
+ const sshParts = pod.ssh.split(" ");
500
+ const sshCommand = sshParts[0]; // "ssh"
501
+ const sshArgs = sshParts.slice(1); // ["root@86.38.238.55"]
502
+ const tailCmd = `tail -f ~/.vllm_logs/${name}.log`;
503
+ const logProcess = spawn(sshCommand, [...sshArgs, tailCmd], {
504
+ stdio: "inherit",
505
+ env: {
506
+ ...process.env,
507
+ FORCE_COLOR: "1",
508
+ },
509
+ });
510
+ // Wait for process to exit
511
+ await new Promise((resolve) => {
512
+ logProcess.on("exit", () => resolve());
513
+ });
514
+ };
515
+ /**
516
+ * Show known models and their hardware requirements
517
+ */
518
+ export const showKnownModels = async () => {
519
+ const __filename = fileURLToPath(import.meta.url);
520
+ const __dirname = dirname(__filename);
521
+ const modelsJsonPath = join(__dirname, "..", "models.json");
522
+ const modelsJson = JSON.parse(readFileSync(modelsJsonPath, "utf-8"));
523
+ const models = modelsJson.models;
524
+ // Get active pod info if available
525
+ const activePod = getActivePod();
526
+ let podGpuCount = 0;
527
+ let podGpuType = "";
528
+ if (activePod) {
529
+ podGpuCount = activePod.pod.gpus.length;
530
+ // Extract GPU type from name (e.g., "NVIDIA H200" -> "H200")
531
+ podGpuType = activePod.pod.gpus[0]?.name?.replace("NVIDIA", "")?.trim()?.split(" ")[0] || "";
532
+ console.log(chalk.bold(`Known Models for ${activePod.name} (${podGpuCount}x ${podGpuType || "GPU"}):\n`));
533
+ }
534
+ else {
535
+ console.log(chalk.bold("Known Models:\n"));
536
+ console.log(chalk.yellow("No active pod. Use 'pi pods active <name>' to filter compatible models.\n"));
537
+ }
538
+ console.log("Usage: pi start <model> --name <name> [options]\n");
539
+ // Group models by compatibility and family
540
+ const compatible = {};
541
+ const incompatible = {};
542
+ for (const [modelId, info] of Object.entries(models)) {
543
+ const modelInfo = info;
544
+ const family = modelInfo.name.split("-")[0] || "Other";
545
+ let isCompatible = false;
546
+ let compatibleConfig = "";
547
+ let minGpu = "Unknown";
548
+ let minNotes;
549
+ if (modelInfo.configs && modelInfo.configs.length > 0) {
550
+ // Sort configs by GPU count to find minimum
551
+ const sortedConfigs = [...modelInfo.configs].sort((a, b) => (a.gpuCount || 1) - (b.gpuCount || 1));
552
+ // Find minimum requirements
553
+ const minConfig = sortedConfigs[0];
554
+ const minGpuCount = minConfig.gpuCount || 1;
555
+ const gpuTypes = minConfig.gpuTypes?.join("/") || "H100/H200";
556
+ if (minGpuCount === 1) {
557
+ minGpu = `1x ${gpuTypes}`;
558
+ }
559
+ else {
560
+ minGpu = `${minGpuCount}x ${gpuTypes}`;
561
+ }
562
+ minNotes = minConfig.notes || modelInfo.notes;
563
+ // Check compatibility with active pod
564
+ if (activePod && podGpuCount > 0) {
565
+ // Find best matching config for this pod
566
+ for (const config of sortedConfigs) {
567
+ const configGpuCount = config.gpuCount || 1;
568
+ const configGpuTypes = config.gpuTypes || [];
569
+ // Check if we have enough GPUs
570
+ if (configGpuCount <= podGpuCount) {
571
+ // Check if GPU type matches (if specified)
572
+ if (configGpuTypes.length === 0 ||
573
+ configGpuTypes.some((type) => podGpuType.includes(type) || type.includes(podGpuType))) {
574
+ isCompatible = true;
575
+ if (configGpuCount === 1) {
576
+ compatibleConfig = `1x ${podGpuType}`;
577
+ }
578
+ else {
579
+ compatibleConfig = `${configGpuCount}x ${podGpuType}`;
580
+ }
581
+ minNotes = config.notes || modelInfo.notes;
582
+ break;
583
+ }
584
+ }
585
+ }
586
+ }
587
+ }
588
+ const modelEntry = {
589
+ id: modelId,
590
+ name: modelInfo.name,
591
+ notes: minNotes,
592
+ };
593
+ if (activePod && isCompatible) {
594
+ if (!compatible[family]) {
595
+ compatible[family] = [];
596
+ }
597
+ compatible[family].push({ ...modelEntry, config: compatibleConfig });
598
+ }
599
+ else {
600
+ if (!incompatible[family]) {
601
+ incompatible[family] = [];
602
+ }
603
+ incompatible[family].push({ ...modelEntry, minGpu });
604
+ }
605
+ }
606
+ // Display compatible models first
607
+ if (activePod && Object.keys(compatible).length > 0) {
608
+ console.log(chalk.green.bold("✓ Compatible Models:\n"));
609
+ const sortedFamilies = Object.keys(compatible).sort();
610
+ for (const family of sortedFamilies) {
611
+ console.log(chalk.cyan(`${family} Models:`));
612
+ const modelList = compatible[family].sort((a, b) => a.name.localeCompare(b.name));
613
+ for (const model of modelList) {
614
+ console.log(` ${chalk.green(model.id)}`);
615
+ console.log(` Name: ${model.name}`);
616
+ console.log(` Config: ${model.config}`);
617
+ if (model.notes) {
618
+ console.log(chalk.gray(` Note: ${model.notes}`));
619
+ }
620
+ console.log("");
621
+ }
622
+ }
623
+ }
624
+ // Display incompatible models
625
+ if (Object.keys(incompatible).length > 0) {
626
+ if (activePod && Object.keys(compatible).length > 0) {
627
+ console.log(chalk.red.bold("✗ Incompatible Models (need more/different GPUs):\n"));
628
+ }
629
+ const sortedFamilies = Object.keys(incompatible).sort();
630
+ for (const family of sortedFamilies) {
631
+ if (!activePod) {
632
+ console.log(chalk.cyan(`${family} Models:`));
633
+ }
634
+ else {
635
+ console.log(chalk.gray(`${family} Models:`));
636
+ }
637
+ const modelList = incompatible[family].sort((a, b) => a.name.localeCompare(b.name));
638
+ for (const model of modelList) {
639
+ const color = activePod ? chalk.gray : chalk.green;
640
+ console.log(` ${color(model.id)}`);
641
+ console.log(chalk.gray(` Name: ${model.name}`));
642
+ console.log(chalk.gray(` Min Hardware: ${model.minGpu}`));
643
+ if (model.notes && !activePod) {
644
+ console.log(chalk.gray(` Note: ${model.notes}`));
645
+ }
646
+ if (activePod) {
647
+ console.log(""); // Less verbose for incompatible models when filtered
648
+ }
649
+ else {
650
+ console.log("");
651
+ }
652
+ }
653
+ }
654
+ }
655
+ console.log(chalk.gray("\nFor unknown models, defaults to single GPU deployment."));
656
+ console.log(chalk.gray("Use --vllm to pass custom arguments to vLLM."));
657
+ };
658
+ //# sourceMappingURL=models.js.map