@mariozechner/pi 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/pi ADDED
@@ -0,0 +1,860 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Prime Intellect CLI - All-in-one pod management
4
+ */
5
+
6
+ const fs = require('fs');
7
+ const { execSync, spawn } = require('child_process');
8
+ const path = require('path');
9
+ const os = require('os');
10
+
11
+ const CONFIG_FILE = path.join(os.homedir(), '.pi_config');
12
+ const SCRIPT_DIR = __dirname;
13
+
14
+ class PrimeIntellectCLI {
15
+ constructor() {
16
+ this.loadConfig();
17
+ }
18
+
19
+ loadConfig() {
20
+ if (fs.existsSync(CONFIG_FILE)) {
21
+ this.config = JSON.parse(fs.readFileSync(CONFIG_FILE, 'utf8'));
22
+ // Migrate old single-pod config
23
+ if (this.config.ssh && !this.config.pods) {
24
+ this.config = {
25
+ pods: { 'default': { ssh: this.config.ssh } },
26
+ active: 'default'
27
+ };
28
+ this.saveConfig();
29
+ }
30
+ } else {
31
+ this.config = { pods: {}, active: null };
32
+ }
33
+ }
34
+
35
+ saveConfig() {
36
+ fs.writeFileSync(CONFIG_FILE, JSON.stringify(this.config, null, 2));
37
+ }
38
+
39
+ getActivePod() {
40
+ if (!this.config.active || !this.config.pods[this.config.active]) {
41
+ return null;
42
+ }
43
+ return this.config.pods[this.config.active];
44
+ }
45
+
46
+ ssh(command, interactive = false, skipPirc = false) {
47
+ const pod = this.getActivePod();
48
+ if (!pod) {
49
+ console.error('No active pod. Run: pi setup <pod-name> <ssh_command>');
50
+ console.error('Example: pi setup prod "root@135.181.71.41 -p 22"');
51
+ console.error('Or activate an existing pod: pi pod <pod-name>');
52
+ process.exit(1);
53
+ }
54
+
55
+ // Wrap command to source .pirc first (if it exists), unless skipPirc is true
56
+ const finalCommand = skipPirc ? command : `[ -f ~/.pirc ] && source ~/.pirc; ${command}`;
57
+
58
+ if (interactive) {
59
+ // For interactive commands, use spawn with shell
60
+ const sshParts = pod.ssh.split(' ');
61
+ const sshCmd = ['ssh', ...sshParts, finalCommand];
62
+ const proc = spawn(sshCmd[0], sshCmd.slice(1), { stdio: 'inherit', shell: false });
63
+ return new Promise((resolve) => {
64
+ proc.on('close', resolve);
65
+ });
66
+ } else {
67
+ const sshCmd = `ssh ${pod.ssh} ${JSON.stringify(finalCommand)}`;
68
+
69
+ // For non-interactive, use execSync
70
+ try {
71
+ return execSync(sshCmd, { encoding: 'utf8' });
72
+ } catch (e) {
73
+ if (e.status !== 0) {
74
+ console.error('SSH command failed:', e.message);
75
+ process.exit(1);
76
+ }
77
+ throw e;
78
+ }
79
+ }
80
+ }
81
+
82
+ scp(localFile, remotePath = '~/') {
83
+ const pod = this.getActivePod();
84
+ if (!pod) {
85
+ console.error('No active pod. Run: pi setup <pod-name> <ssh_command>');
86
+ process.exit(1);
87
+ }
88
+
89
+ const [userHost, ...sshArgs] = pod.ssh.split(' ');
90
+ let scpCmd = `scp`;
91
+
92
+ // Add port if specified
93
+ const portArg = sshArgs.find(arg => arg === '-p');
94
+ if (portArg) {
95
+ const portIndex = sshArgs.indexOf(portArg);
96
+ const port = sshArgs[portIndex + 1];
97
+ scpCmd += ` -P ${port}`;
98
+ }
99
+
100
+ scpCmd += ` ${localFile} ${userHost}:${remotePath}`;
101
+
102
+ try {
103
+ execSync(scpCmd, { stdio: 'inherit' });
104
+ } catch (e) {
105
+ console.error('SCP failed:', e.message);
106
+ process.exit(1);
107
+ }
108
+ }
109
+
110
+ async setup(podName, sshCommand) {
111
+ if (!podName || !sshCommand) {
112
+ console.error('Usage: pi setup <pod-name> <ssh_command>');
113
+ console.error('Example: pi setup prod "root@135.181.71.41 -p 22"');
114
+ process.exit(1);
115
+ }
116
+
117
+ // Remove "ssh " prefix if present
118
+ if (sshCommand.toLowerCase().startsWith('ssh ')) {
119
+ sshCommand = sshCommand.substring(4);
120
+ }
121
+
122
+ // Save pod config
123
+ if (!this.config.pods) {
124
+ this.config.pods = {};
125
+ }
126
+ this.config.pods[podName] = { ssh: sshCommand };
127
+ this.config.active = podName;
128
+ this.saveConfig();
129
+ console.log(`Saved pod '${podName}' with SSH: ${sshCommand}`);
130
+
131
+ // Test connection
132
+ console.log('\nTesting SSH connection...');
133
+ try {
134
+ const hostname = this.ssh('hostname', false, true).trim();
135
+ console.log(`✓ Connected to ${hostname}`);
136
+ } catch (e) {
137
+ console.error('✗ SSH connection failed');
138
+ process.exit(1);
139
+ }
140
+
141
+ // Copy setup files
142
+ console.log('\nCopying setup files...');
143
+ this.scp(path.join(SCRIPT_DIR, 'pod_setup.sh'));
144
+ this.scp(path.join(SCRIPT_DIR, 'vllm_manager.py'));
145
+
146
+ // Run setup with HF_TOKEN
147
+ console.log('\nRunning setup script...');
148
+ const hfToken = process.env.HF_TOKEN;
149
+ if (!hfToken) {
150
+ console.error('\nERROR: HF_TOKEN environment variable not set');
151
+ console.error('Please export HF_TOKEN before running setup');
152
+ process.exit(1);
153
+ }
154
+ await this.ssh(`export HF_TOKEN="${hfToken}" && bash pod_setup.sh`, true, true);
155
+
156
+ console.log('\n✓ Setup complete!');
157
+
158
+ // Show usage help
159
+ this.showHelp();
160
+ }
161
+
162
+ list() {
163
+ const output = this.ssh('python3 vllm_manager.py list');
164
+ console.log(output);
165
+ }
166
+
167
+ parseContextSize(value) {
168
+ if (!value) return 8192;
169
+
170
+ // Convert string to lowercase for case-insensitive matching
171
+ const lower = value.toString().toLowerCase();
172
+
173
+ // Handle 'k' suffix (4k, 8k, 32k, etc)
174
+ if (lower.endsWith('k')) {
175
+ return parseInt(lower.slice(0, -1)) * 1024;
176
+ }
177
+
178
+ // Handle plain numbers
179
+ return parseInt(value);
180
+ }
181
+
182
+ parseMemory(value) {
183
+ if (!value) return 0.9;
184
+
185
+ const str = value.toString().toLowerCase();
186
+
187
+ // Handle percentage (30%, 50%, etc)
188
+ if (str.endsWith('%')) {
189
+ return parseInt(str.slice(0, -1)) / 100;
190
+ }
191
+
192
+ // Handle decimal (0.3, 0.5, etc)
193
+ const num = parseFloat(str);
194
+ if (num > 1) {
195
+ console.error('Memory must be between 0-1 or 0-100%');
196
+ process.exit(1);
197
+ }
198
+ return num;
199
+ }
200
+
201
+ async handleStart(args) {
202
+ if (!args[0]) {
203
+ console.error('Usage: pi start <model> [options]');
204
+ console.error('');
205
+ console.error('Options:');
206
+ console.error(' --name <name> Model alias (default: auto-generated)');
207
+ console.error(' --context <size> Context window: 4k, 8k, 16k, 32k or 4096, 8192, etc (default: model default)');
208
+ console.error(' --memory <percent> GPU memory: 30%, 50%, 90% or 0.3, 0.5, 0.9 (default: 90%)');
209
+ console.error(' --all-gpus Use all GPUs with tensor parallelism (ignores --memory)');
210
+ console.error(' --vllm-args Pass remaining args directly to vLLM (ignores other options)');
211
+ console.error('');
212
+ console.error('Examples:');
213
+ console.error(' pi start Qwen/Qwen2.5-7B-Instruct');
214
+ console.error(' pi start Qwen/Qwen2.5-7B-Instruct --name qwen --memory 20%');
215
+ console.error(' pi start meta-llama/Llama-3.1-70B-Instruct --all-gpus');
216
+ console.error(' pi start meta-llama/Llama-3.1-405B --all-gpus --context 128k');
217
+ console.error('');
218
+ console.error(' # Custom vLLM args for Qwen3-Coder on 8xH200:');
219
+ console.error(' pi start Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 --name qwen-coder --vllm-args \\');
220
+ console.error(' --data-parallel-size 8 --enable-expert-parallel \\');
221
+ console.error(' --tool-call-parser qwen3_coder --enable-auto-tool-choice --gpu-memory-utilization 0.9 --max-model-len 200000');
222
+ process.exit(1);
223
+ }
224
+
225
+ const modelId = args[0];
226
+ let name = null;
227
+ let context = null; // Changed to null - let vLLM use model default
228
+ let memory = 0.9;
229
+ let allGpus = false;
230
+ let vllmArgs = null;
231
+
232
+ // Check for --vllm-args first
233
+ const vllmArgsIndex = args.indexOf('--vllm-args');
234
+ if (vllmArgsIndex !== -1) {
235
+ // Extract name if provided before --vllm-args
236
+ for (let i = 1; i < vllmArgsIndex; i++) {
237
+ if (args[i] === '--name' && args[i + 1]) {
238
+ name = args[++i];
239
+ }
240
+ }
241
+ // Everything after --vllm-args is passed to vLLM
242
+ vllmArgs = args.slice(vllmArgsIndex + 1).join(' ');
243
+ } else {
244
+ // Parse normal arguments
245
+ for (let i = 1; i < args.length; i++) {
246
+ switch (args[i]) {
247
+ case '--name':
248
+ name = args[++i];
249
+ break;
250
+ case '--context':
251
+ context = this.parseContextSize(args[++i]);
252
+ break;
253
+ case '--memory':
254
+ memory = this.parseMemory(args[++i]);
255
+ break;
256
+ case '--all-gpus':
257
+ allGpus = true;
258
+ break;
259
+ default:
260
+ console.error(`Unknown option: ${args[i]}`);
261
+ process.exit(1);
262
+ }
263
+ }
264
+ }
265
+
266
+ // Check for multi-GPU setup
267
+ const gpuCount = await this.getGpuCount();
268
+
269
+ if (allGpus) {
270
+ if (memory !== 0.9) {
271
+ console.log('Warning: --memory ignored with --all-gpus (using 95% memory across all GPUs)');
272
+ }
273
+ memory = 0.95;
274
+
275
+ if (gpuCount === 1) {
276
+ console.log('Note: --all-gpus specified but only 1 GPU found');
277
+ allGpus = false;
278
+ }
279
+ }
280
+
281
+ // Auto-generate name if not provided
282
+ if (!name) {
283
+ // Extract model name from path (e.g., "Phi-3-mini" from "microsoft/Phi-3-mini-4k-instruct")
284
+ const parts = modelId.split('/');
285
+ const modelName = parts[parts.length - 1];
286
+ name = modelName.toLowerCase()
287
+ .replace(/-instruct$/, '')
288
+ .replace(/-chat$/, '')
289
+ .replace(/[^a-z0-9-]/g, '-')
290
+ .replace(/-+/g, '-')
291
+ .replace(/^-|-$/g, '')
292
+ .slice(0, 20);
293
+ }
294
+
295
+ // If vllmArgs provided, use raw vLLM command
296
+ if (vllmArgs) {
297
+ await this.startRaw(modelId, name, vllmArgs);
298
+ } else {
299
+ // Call the original start method with positional args
300
+ const contextStr = context ? context.toString() : null;
301
+ await this.start(modelId, name, contextStr, memory.toString(), { allGpus, gpuCount });
302
+ }
303
+ }
304
+
305
+ async getGpuCount() {
306
+ try {
307
+ const output = this.ssh('nvidia-smi --query-gpu=name --format=csv,noheader | wc -l');
308
+ return parseInt(output.trim()) || 1;
309
+ } catch {
310
+ return 1;
311
+ }
312
+ }
313
+
314
+ async start(modelId, name, maxLen = null, gpuMemory, options = {}) {
315
+ // Check if name is already in use locally first
316
+ if (name) {
317
+ const runningModels = this.getRunningModels();
318
+ if (runningModels[name]) {
319
+ console.error(`Error: Model name '${name}' is already in use`);
320
+ console.error('Running models:', Object.keys(runningModels).join(', '));
321
+ process.exit(1);
322
+ }
323
+ }
324
+
325
+ // Build args for vllm_manager.py
326
+ let args = modelId;
327
+
328
+ // Handle optional parameters
329
+ if (name || maxLen || gpuMemory) {
330
+ args += ` ${name || '""'}`;
331
+
332
+ if (maxLen || gpuMemory) {
333
+ args += ` ${maxLen || '""'}`; // Pass empty string to use vLLM default
334
+
335
+ if (gpuMemory) {
336
+ args += ` ${gpuMemory}`;
337
+ }
338
+ }
339
+ }
340
+
341
+ // Handle multi-GPU options
342
+ let envPrefix = '';
343
+ if (options.allGpus && options.gpuCount > 1) {
344
+ args += ` ${options.gpuCount}`; // Pass tensor parallel size
345
+ }
346
+
347
+ const output = this.ssh(`${envPrefix}python3 vllm_manager.py start ${args}`);
348
+
349
+ // Extract model name and connection info from output
350
+ const nameMatch = output.match(/Started (\S+)/);
351
+ const urlMatch = output.match(/URL: (http:\/\/[^\s]+)/);
352
+ const exportMatch = output.match(/export OPENAI_BASE_URL='([^']+)'/);
353
+
354
+ if (nameMatch) {
355
+ const modelName = nameMatch[1];
356
+ const url = urlMatch ? urlMatch[1] : null;
357
+ const exportCmd = exportMatch ? `export OPENAI_BASE_URL='${exportMatch[1]}'` : null;
358
+
359
+ console.log(`\nStarted ${modelName}`);
360
+ console.log('Waiting for model to initialize...\n');
361
+
362
+ // Set up Ctrl+C handler for manual interruption
363
+ const showModelInfo = () => {
364
+ console.log('\n\n' + '='.repeat(60));
365
+ console.log('Model Information:');
366
+ console.log('='.repeat(60));
367
+ console.log(`Name: ${modelName}`);
368
+ if (url) console.log(`URL: ${url}`);
369
+ if (exportCmd) {
370
+ console.log(`\nTo use with OpenAI clients:`);
371
+ console.log(exportCmd);
372
+ console.log(`export OPENAI_API_KEY='dummy'`);
373
+ }
374
+ console.log('='.repeat(60));
375
+ };
376
+
377
+ process.on('SIGINT', () => {
378
+ showModelInfo();
379
+ process.exit(0);
380
+ });
381
+
382
+ // Watch logs until startup complete
383
+ await this.logs(modelName, true); // autoExit = true for startup
384
+
385
+ // Show model info after automatic exit
386
+ showModelInfo();
387
+ } else {
388
+ console.log(output);
389
+ }
390
+ }
391
+
392
+ async startRaw(modelId, name, vllmArgs) {
393
+ // Check if name is already in use
394
+ const runningModels = this.getRunningModels();
395
+ if (runningModels[name]) {
396
+ console.error(`Error: Model name '${name}' is already in use`);
397
+ console.error('Running models:', Object.keys(runningModels).join(', '));
398
+ process.exit(1);
399
+ }
400
+
401
+ console.log(`Starting ${name} with custom vLLM args...`);
402
+
403
+ // Start vLLM with raw arguments - use base64 to safely pass complex args
404
+ const base64Args = Buffer.from(vllmArgs).toString('base64');
405
+ const output = this.ssh(`python3 vllm_manager.py start_raw "${modelId}" "${name}" "${base64Args}"`);
406
+
407
+ // Extract connection info from output
408
+ const urlMatch = output.match(/URL: (http:\/\/[^\s]+)/);
409
+ const exportMatch = output.match(/export OPENAI_BASE_URL='([^']+)'/);
410
+
411
+ if (urlMatch || exportMatch) {
412
+ const url = urlMatch ? urlMatch[1] : null;
413
+ const exportCmd = exportMatch ? `export OPENAI_BASE_URL='${exportMatch[1]}'` : null;
414
+
415
+ console.log(`\nStarted ${name}`);
416
+ console.log('Waiting for model to initialize...\n');
417
+
418
+ // Set up Ctrl+C handler for manual interruption
419
+ const showModelInfo = () => {
420
+ console.log('\n\n' + '='.repeat(60));
421
+ console.log('Model Information:');
422
+ console.log('='.repeat(60));
423
+ console.log(`Name: ${name}`);
424
+ if (url) console.log(`URL: ${url}`);
425
+ if (exportCmd) {
426
+ console.log(`\nTo use with OpenAI clients:`);
427
+ console.log(exportCmd);
428
+ console.log(`export OPENAI_API_KEY='dummy'`);
429
+ }
430
+ console.log('='.repeat(60));
431
+ };
432
+
433
+ process.on('SIGINT', () => {
434
+ showModelInfo();
435
+ process.exit(0);
436
+ });
437
+
438
+ // Watch logs until startup complete
439
+ await this.logs(name, true); // autoExit = true for startup
440
+
441
+ // Show model info after automatic exit
442
+ showModelInfo();
443
+ } else {
444
+ console.log(output);
445
+ }
446
+ }
447
+
448
+ stop(name) {
449
+ if (!name) {
450
+ // Stop all models
451
+ const runningModels = this.getRunningModels();
452
+ const modelNames = Object.keys(runningModels);
453
+
454
+ if (modelNames.length === 0) {
455
+ console.log('No models running');
456
+ return;
457
+ }
458
+
459
+ console.log(`Stopping ${modelNames.length} model(s): ${modelNames.join(', ')}`);
460
+
461
+ for (const modelName of modelNames) {
462
+ const output = this.ssh(`python3 vllm_manager.py stop ${modelName}`);
463
+ console.log(output);
464
+ }
465
+ } else {
466
+ // Stop specific model
467
+ const output = this.ssh(`python3 vllm_manager.py stop ${name}`);
468
+ console.log(output);
469
+ }
470
+ }
471
+
472
+ async logs(name, autoExit = false) {
473
+ if (!name) {
474
+ console.error('Usage: pi logs <name>');
475
+ process.exit(1);
476
+ }
477
+
478
+ // Use vllm_manager.py to get the log file path
479
+ const infoOutput = this.ssh(`python3 vllm_manager.py list`);
480
+
481
+ // Extract log file path from the output
482
+ const lines = infoOutput.split('\n');
483
+ let logFile = null;
484
+ let inModel = false;
485
+
486
+ for (const line of lines) {
487
+ if (line.startsWith(`${name}:`)) {
488
+ inModel = true;
489
+ } else if (inModel && line.includes('Logs:')) {
490
+ logFile = line.split('Logs:')[1].trim();
491
+ break;
492
+ }
493
+ }
494
+
495
+ if (!logFile) {
496
+ console.error(`No logs found for ${name}`);
497
+ process.exit(1);
498
+ }
499
+
500
+ // Use a custom tail that watches for startup complete
501
+ const pod = this.getActivePod();
502
+ const sshCmd = `ssh ${pod.ssh} tail -n 50 -f ${logFile}`;
503
+
504
+ return new Promise((resolve) => {
505
+ const [cmd, ...args] = sshCmd.split(' ');
506
+ const proc = spawn(cmd, args, { stdio: ['inherit', 'pipe', 'pipe'] });
507
+
508
+ let buffer = '';
509
+
510
+ proc.stdout.on('data', (data) => {
511
+ process.stdout.write(data);
512
+ buffer += data.toString();
513
+
514
+ // Only check for startup messages if autoExit is enabled
515
+ if (autoExit) {
516
+ if (buffer.includes('Application startup complete.') ||
517
+ buffer.includes('Uvicorn running on')) {
518
+ setTimeout(() => {
519
+ proc.kill();
520
+ resolve();
521
+ }, 500); // Small delay to ensure final messages are shown
522
+ }
523
+ }
524
+
525
+ // Keep buffer size manageable
526
+ if (buffer.length > 10000) {
527
+ buffer = buffer.slice(-5000);
528
+ }
529
+ });
530
+
531
+ proc.stderr.on('data', (data) => {
532
+ process.stderr.write(data);
533
+ });
534
+
535
+ proc.on('close', () => {
536
+ resolve();
537
+ });
538
+ });
539
+ }
540
+
541
+ async shell() {
542
+ const pod = this.getActivePod();
543
+ if (!pod) {
544
+ console.error('No active pod. Run: pi setup <pod-name> <ssh_command>');
545
+ process.exit(1);
546
+ }
547
+
548
+ console.log('Connecting to pod...');
549
+
550
+ // Use spawn directly for interactive shell
551
+ const sshParts = pod.ssh.split(' ');
552
+ const sshCmd = ['ssh', ...sshParts];
553
+ const proc = spawn(sshCmd[0], sshCmd.slice(1), { stdio: 'inherit' });
554
+
555
+ return new Promise((resolve) => {
556
+ proc.on('close', resolve);
557
+ });
558
+ }
559
+
560
+ listPods() {
561
+ if (!this.config.pods || Object.keys(this.config.pods).length === 0) {
562
+ console.log('No pods configured. Run: pi setup <pod-name> <ssh_command>');
563
+ return;
564
+ }
565
+
566
+ console.log('Configured pods:\n');
567
+
568
+ // Show active pod first
569
+ if (this.config.active && this.config.pods[this.config.active]) {
570
+ console.log(`● ${this.config.active} (active)`);
571
+ console.log(` ${this.config.pods[this.config.active].ssh}\n`);
572
+ }
573
+
574
+ // Show other pods
575
+ Object.keys(this.config.pods).sort().forEach(name => {
576
+ if (name !== this.config.active) {
577
+ console.log(`○ ${name}`);
578
+ console.log(` ${this.config.pods[name].ssh}`);
579
+ }
580
+ });
581
+ }
582
+
583
+ switchPod(podName) {
584
+ if (!this.config.pods || !this.config.pods[podName]) {
585
+ console.error(`Pod '${podName}' not found`);
586
+ console.error('Available pods:', Object.keys(this.config.pods || {}).join(', ') || 'none');
587
+ process.exit(1);
588
+ }
589
+
590
+ this.config.active = podName;
591
+ this.saveConfig();
592
+ console.log(`Switched to pod: ${podName} (${this.config.pods[podName].ssh})`);
593
+ }
594
+
595
+ removePod(podName) {
596
+ if (!this.config.pods || !this.config.pods[podName]) {
597
+ console.error(`Pod '${podName}' not found`);
598
+ console.error('Available pods:', Object.keys(this.config.pods || {}).join(', ') || 'none');
599
+ process.exit(1);
600
+ }
601
+
602
+ delete this.config.pods[podName];
603
+
604
+ // If we removed the active pod, clear it or switch to another
605
+ if (this.config.active === podName) {
606
+ const remainingPods = Object.keys(this.config.pods);
607
+ this.config.active = remainingPods.length > 0 ? remainingPods[0] : null;
608
+ }
609
+
610
+ this.saveConfig();
611
+ console.log(`Removed pod: ${podName}`);
612
+ if (this.config.active) {
613
+ console.log(`Active pod is now: ${this.config.active}`);
614
+ }
615
+ }
616
+
617
+ async searchModels(query) {
618
+ console.log(`Searching HuggingFace for models matching "${query}"...\n`);
619
+
620
+ try {
621
+ const response = await fetch(`https://huggingface.co/api/models?search=${query}&filter=text-generation&sort=downloads&limit=20`);
622
+ const data = await response.json();
623
+
624
+ if (!data || data.length === 0) {
625
+ console.log('No models found');
626
+ return;
627
+ }
628
+
629
+ // Format results
630
+ console.log('Popular models (sorted by downloads):\n');
631
+ for (const model of data) {
632
+ const modelName = model.modelId.toLowerCase();
633
+
634
+ // Skip incompatible formats
635
+ if (modelName.includes('-mlx-') || modelName.includes('-mlx')) {
636
+ continue; // MLX is for Apple Silicon only
637
+ }
638
+ if (modelName.includes('-gguf') || modelName.includes('.gguf')) {
639
+ continue; // GGUF is for llama.cpp, not vLLM
640
+ }
641
+
642
+ const downloads = model.downloads || 0;
643
+ const likes = model.likes || 0;
644
+
645
+ console.log(`\x1b[1m${model.modelId}\x1b[0m`); // Bold
646
+ console.log(` \x1b[36mhttps://huggingface.co/${model.modelId}\x1b[0m`); // Cyan for URL
647
+ console.log(` Downloads: ${downloads.toLocaleString()} | Likes: ${likes}`);
648
+
649
+ // Check for quantization
650
+ if (modelName.includes('-fp8') || modelName.includes('fp8-')) {
651
+ console.log(` \x1b[33mNote: FP8 quantized - requires GPU with FP8 support\x1b[0m`);
652
+ }
653
+
654
+ console.log(` pi start ${model.modelId}`);
655
+ console.log();
656
+ }
657
+
658
+ // Add HuggingFace search URL
659
+ console.log(`\nView more models on HuggingFace:`);
660
+ console.log(`\x1b[36mhttps://huggingface.co/models?search=${encodeURIComponent(query)}&sort=downloads&pipeline_tag=text-generation\x1b[0m`);
661
+ } catch (error) {
662
+ console.error('Error searching models:', error.message);
663
+ }
664
+ }
665
+
666
+ async prompt(name, message) {
667
+ // Get model info
668
+ const models = this.getRunningModels();
669
+ const model = models[name];
670
+
671
+ if (!model || !model.url) {
672
+ console.error(`Model '${name}' is not running`);
673
+ console.error('Running models:', Object.keys(models).join(', ') || 'none');
674
+ process.exit(1);
675
+ }
676
+
677
+ // Make API call directly to the model's external URL
678
+ const url = `${model.url}/chat/completions`;
679
+ const payload = {
680
+ model: model.model_id,
681
+ messages: [{ role: 'user', content: message }],
682
+ max_tokens: 500,
683
+ temperature: 0.7
684
+ };
685
+
686
+ try {
687
+ const response = await fetch(url, {
688
+ method: 'POST',
689
+ headers: { 'Content-Type': 'application/json' },
690
+ body: JSON.stringify(payload)
691
+ });
692
+
693
+ if (!response.ok) {
694
+ throw new Error(`HTTP ${response.status}: ${await response.text()}`);
695
+ }
696
+
697
+ const data = await response.json();
698
+ console.log(data.choices[0].message.content);
699
+ } catch (error) {
700
+ console.error('Error:', error.message);
701
+ process.exit(1);
702
+ }
703
+ }
704
+
705
+ showHelp() {
706
+ console.log('\nPrime Intellect CLI\n');
707
+
708
+ console.log('Pod Management:');
709
+ console.log(' pi setup <pod-name> <ssh_command> Configure and activate a pod');
710
+ console.log(' pi pods List all pods (active pod marked)');
711
+ console.log(' pi pod <pod-name> Switch active pod');
712
+ console.log(' pi pod remove <pod-name> Remove pod from config\n');
713
+ console.log('Model Management:');
714
+ console.log(' pi list List running models');
715
+ console.log(' pi search <query> Search HuggingFace models');
716
+ console.log(' pi start <model> [options] Start a model');
717
+ console.log(' pi stop [name] Stop a model (or all if no name)');
718
+ console.log(' pi logs <name> View model logs');
719
+ console.log(' pi prompt <name> <msg> Chat with a model\n');
720
+ console.log('Start Options:');
721
+ console.log(' --name <name> Model alias (default: auto-generated)');
722
+ console.log(' --context <size> Context window: 4k, 16k, 32k (default: 8k)');
723
+ console.log(' --memory <percent> GPU memory: 30%, 50%, 90% (default: 90%)');
724
+ console.log(' --all-gpus Use all GPUs with tensor parallelism\n');
725
+ console.log('Utility:');
726
+ console.log(' pi shell SSH into active pod');
727
+
728
+ console.log('\nQuick Example:');
729
+ console.log(' pi start Qwen/Qwen2.5-7B-Instruct --name qwen');
730
+ console.log(' pi prompt qwen "What is 2+2?"');
731
+
732
+ if (this.config.active && this.config.pods[this.config.active]) {
733
+ console.log(`\nActive pod: ${this.config.active} (${this.config.pods[this.config.active].ssh})`);
734
+ } else {
735
+ console.log('\nNo active pod');
736
+ }
737
+ }
738
+
739
+ getRunningModels() {
740
+ try {
741
+ const output = this.ssh('python3 vllm_manager.py list');
742
+ const models = {};
743
+
744
+ // Parse the output to extract model info
745
+ const lines = output.split('\n');
746
+ let currentModel = null;
747
+
748
+ for (const line of lines) {
749
+ if (line.match(/^[a-zA-Z0-9_-]+:$/)) {
750
+ currentModel = line.slice(0, -1);
751
+ models[currentModel] = {};
752
+ } else if (currentModel) {
753
+ if (line.includes('Model:')) {
754
+ models[currentModel].model_id = line.split('Model:')[1].trim();
755
+ } else if (line.includes('Port:')) {
756
+ models[currentModel].port = parseInt(line.split('Port:')[1].trim());
757
+ } else if (line.includes('URL:')) {
758
+ models[currentModel].url = line.split('URL:')[1].trim();
759
+ }
760
+ }
761
+ }
762
+
763
+ return models;
764
+ } catch (e) {
765
+ return {};
766
+ }
767
+ }
768
+
769
+ async run() {
770
+ const [,, command, ...args] = process.argv;
771
+
772
+ switch (command) {
773
+ case 'setup': {
774
+ if (args.length < 2) {
775
+ console.error('Usage: pi setup <pod-name> <ssh_command>');
776
+ console.error('Example: pi setup prod "root@135.181.71.41 -p 22"');
777
+ process.exit(1);
778
+ }
779
+ const podName = args[0];
780
+ const sshCmd = args.slice(1).join(' ');
781
+ this.setup(podName, sshCmd);
782
+ break;
783
+ }
784
+ case 'pods':
785
+ this.listPods();
786
+ break;
787
+
788
+ case 'pod':
789
+ if (!args[0]) {
790
+ console.error('Usage: pi pod <pod-name>');
791
+ console.error(' pi pod remove <pod-name>');
792
+ process.exit(1);
793
+ }
794
+ if (args[0] === 'remove' && args[1]) {
795
+ this.removePod(args[1]);
796
+ } else {
797
+ this.switchPod(args[0]);
798
+ }
799
+ break;
800
+
801
+ case 'list':
802
+ case 'ls':
803
+ this.list();
804
+ break;
805
+
806
+ case 'search':
807
+ if (!args[0]) {
808
+ console.error('Usage: pi search <query>');
809
+ console.error('Example: pi search qwen');
810
+ process.exit(1);
811
+ }
812
+ await this.searchModels(args[0]);
813
+ break;
814
+
815
+ case 'start':
816
+ await this.handleStart(args);
817
+ break;
818
+
819
+ case 'stop':
820
+ this.stop(args[0]);
821
+ break;
822
+
823
+ case 'logs':
824
+ await this.logs(args[0], false); // autoExit = false for manual logs command
825
+ break;
826
+
827
+ case 'prompt': {
828
+ if (args.length < 2) {
829
+ console.error('Usage: pi prompt <model_name> "<message>"');
830
+ console.error('Example: pi prompt phi3 "Hey, how you going"');
831
+ process.exit(1);
832
+ }
833
+ const modelName = args[0];
834
+ const message = args.slice(1).join(' ');
835
+ this.prompt(modelName, message);
836
+ break;
837
+ }
838
+ case 'shell':
839
+ await this.shell();
840
+ break;
841
+
842
+ case 'ssh':
843
+ // Pass through any SSH command
844
+ if (args.length > 0) {
845
+ const output = this.ssh(args.join(' '));
846
+ console.log(output);
847
+ } else {
848
+ this.shell();
849
+ }
850
+ break;
851
+
852
+ default:
853
+ this.showHelp();
854
+ }
855
+ }
856
+ }
857
+
858
+ // Run CLI
859
+ const cli = new PrimeIntellectCLI();
860
+ cli.run().catch(console.error);