npm - @appkit/llamacpp-cli - Versions diffs - 1.0.0 → 1.1.1 - Mend

@appkit/llamacpp-cli 1.0.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/CHANGELOG.md +14 -0
package/README.md +52 -14
package/dist/cli.js +8 -2
package/dist/cli.js.map +1 -1
package/dist/commands/list.js +1 -1
package/dist/commands/list.js.map +1 -1
package/dist/commands/logs.d.ts +4 -0
package/dist/commands/logs.d.ts.map +1 -1
package/dist/commands/logs.js +157 -24
package/dist/commands/logs.js.map +1 -1
package/dist/commands/ps.d.ts.map +1 -1
package/dist/commands/ps.js +11 -1
package/dist/commands/ps.js.map +1 -1
package/dist/commands/pull.js +1 -1
package/dist/commands/pull.js.map +1 -1
package/dist/commands/run.js +1 -1
package/dist/commands/run.js.map +1 -1
package/dist/commands/start.d.ts +2 -0
package/dist/commands/start.d.ts.map +1 -1
package/dist/commands/start.js +7 -3
package/dist/commands/start.js.map +1 -1
package/dist/lib/config-generator.d.ts +2 -0
package/dist/lib/config-generator.d.ts.map +1 -1
package/dist/lib/config-generator.js +6 -0
package/dist/lib/config-generator.js.map +1 -1
package/dist/lib/launchctl-manager.d.ts.map +1 -1
package/dist/lib/launchctl-manager.js +22 -13
package/dist/lib/launchctl-manager.js.map +1 -1
package/dist/types/server-config.d.ts +2 -0
package/dist/types/server-config.d.ts.map +1 -1
package/dist/types/server-config.js.map +1 -1
package/dist/utils/log-parser.d.ts +37 -0
package/dist/utils/log-parser.d.ts.map +1 -0
package/dist/utils/log-parser.js +164 -0
package/dist/utils/log-parser.js.map +1 -0
package/dist/utils/process-utils.d.ts +6 -0
package/dist/utils/process-utils.d.ts.map +1 -1
package/dist/utils/process-utils.js +35 -0
package/dist/utils/process-utils.js.map +1 -1
package/package.json +1 -1
package/src/cli.ts +8 -2
package/src/commands/list.ts +1 -1
package/src/commands/logs.ts +133 -26
package/src/commands/ps.ts +13 -2
package/src/commands/pull.ts +1 -1
package/src/commands/run.ts +1 -1
package/src/commands/start.ts +9 -3
package/src/lib/config-generator.ts +8 -0
package/src/lib/launchctl-manager.ts +22 -13
package/src/types/server-config.ts +2 -0
package/src/utils/log-parser.ts +184 -0
package/src/utils/process-utils.ts +38 -0

package/src/commands/logs.ts CHANGED Viewed

@@ -1,13 +1,20 @@
 import chalk from 'chalk';
 import { spawn } from 'child_process';
+import * as readline from 'readline';
+import * as fs from 'fs';
 import { stateManager } from '../lib/state-manager';
 import { fileExists } from '../utils/file-utils';
 import { execCommand } from '../utils/process-utils';
+import { logParser } from '../utils/log-parser';
 interface LogsOptions {
   follow?: boolean;
   lines?: number;
   errors?: boolean;
+  verbose?: boolean;
+  http?: boolean;
+  stdout?: boolean;
+  filter?: string;
 }
 export async function logsCommand(identifier: string, options: LogsOptions): Promise<void> {
@@ -17,9 +24,9 @@ export async function logsCommand(identifier: string, options: LogsOptions): Pro
     throw new Error(`Server not found: ${identifier}\n\nUse: llamacpp ps`);
   }
-  // Determine log file
-  const logPath = options.errors ? server.stderrPath : server.stdoutPath;
-  const logType = options.errors ? 'errors' : 'logs';
+  // Determine log file (default to stderr where verbose logs go)
+  const logPath = options.stdout ? server.stdoutPath : server.stderrPath;
+  const logType = options.stdout ? 'stdout' : 'stderr';
   // Check if log file exists
   if (!(await fileExists(logPath))) {
@@ -28,34 +35,134 @@ export async function logsCommand(identifier: string, options: LogsOptions): Pro
     return;
   }
-  console.log(chalk.blue(`📋 ${options.errors ? 'Errors' : 'Logs'} for ${server.modelName}`));
+  // Determine filter pattern and mode
+  let filterPattern: string | null = null;
+  let filterDesc = '';
+  let useCompactMode = false;
+  if (options.verbose) {
+    // Show everything (no filter)
+    filterDesc = ' (all messages)';
+  } else if (options.errors) {
+    // Show only errors
+    filterPattern = 'error|Error|ERROR|failed|Failed|FAILED';
+    filterDesc = ' (errors only)';
+  } else if (options.http) {
+    // Full HTTP JSON logs
+    filterPattern = 'log_server_r';
+    filterDesc = ' (HTTP JSON)';
+  } else if (options.filter) {
+    // Custom filter
+    filterPattern = options.filter;
+    filterDesc = ` (filter: ${options.filter})`;
+  } else {
+    // Default: Compact one-liner format
+    filterPattern = 'log_server_r';
+    filterDesc = ' (compact)';
+    useCompactMode = true;
+  }
+  console.log(chalk.blue(`📋 Logs for ${server.modelName} (${logType}${filterDesc})`));
   console.log(chalk.dim(`   ${logPath}\n`));
   if (options.follow) {
-    // Follow logs in real-time
-    const tail = spawn('tail', ['-f', logPath], {
-      stdio: 'inherit',
-    });
-    // Handle Ctrl+C gracefully
-    process.on('SIGINT', () => {
-      tail.kill();
-      console.log();
-      process.exit(0);
-    });
-    // Wait for tail to exit
-    tail.on('exit', () => {
-      process.exit(0);
-    });
+    // Follow logs in real-time with optional filtering
+    if (useCompactMode) {
+      // Compact mode with follow: parse lines in real-time
+      const tailProcess = spawn('tail', ['-f', logPath]);
+      const rl = readline.createInterface({
+        input: tailProcess.stdout,
+        crlfDelay: Infinity,
+      });
+      rl.on('line', (line) => {
+        if (line.includes('log_server_r')) {
+          logParser.processLine(line, (compactLine) => {
+            console.log(compactLine);
+          });
+        }
+      });
+      // Handle Ctrl+C gracefully
+      process.on('SIGINT', () => {
+        tailProcess.kill();
+        rl.close();
+        console.log();
+        process.exit(0);
+      });
+      tailProcess.on('exit', () => {
+        process.exit(0);
+      });
+    } else if (filterPattern) {
+      // Use tail piped to grep for filtering
+      const grepProcess = spawn('sh', ['-c', `tail -f "${logPath}" | grep --line-buffered -E "${filterPattern}"`], {
+        stdio: 'inherit',
+      });
+      // Handle Ctrl+C gracefully
+      process.on('SIGINT', () => {
+        grepProcess.kill();
+        console.log();
+        process.exit(0);
+      });
+      grepProcess.on('exit', () => {
+        process.exit(0);
+      });
+    } else {
+      // No filter, just tail
+      const tail = spawn('tail', ['-f', logPath], {
+        stdio: 'inherit',
+      });
+      process.on('SIGINT', () => {
+        tail.kill();
+        console.log();
+        process.exit(0);
+      });
+      tail.on('exit', () => {
+        process.exit(0);
+      });
+    }
   } else {
-    // Show last N lines
+    // Show last N lines with optional filtering
     const lines = options.lines || 50;
-    try {
-      const output = await execCommand(`tail -n ${lines} "${logPath}"`);
-      console.log(output);
-    } catch (error) {
-      throw new Error(`Failed to read logs: ${(error as Error).message}`);
+    if (useCompactMode) {
+      // Compact mode: read file and parse
+      try {
+        const command = `tail -n ${lines * 3} "${logPath}" | grep -E "log_server_r"`;
+        const output = await execCommand(command);
+        const logLines = output.split('\n').filter((l) => l.trim());
+        for (const line of logLines) {
+          logParser.processLine(line, (compactLine) => {
+            console.log(compactLine);
+          });
+        }
+      } catch (error) {
+        throw new Error(`Failed to read logs: ${(error as Error).message}`);
+      }
+    } else {
+      // Regular filtering
+      try {
+        let command: string;
+        if (filterPattern) {
+          // Use tail piped to grep
+          command = `tail -n ${lines} "${logPath}" | grep -E "${filterPattern}"`;
+        } else {
+          // No filter
+          command = `tail -n ${lines} "${logPath}"`;
+        }
+        const output = await execCommand(command);
+        console.log(output);
+      } catch (error) {
+        throw new Error(`Failed to read logs: ${(error as Error).message}`);
+      }
     }
   }
 }

package/src/commands/ps.ts CHANGED Viewed

@@ -2,7 +2,8 @@ import chalk from 'chalk';
 import Table from 'cli-table3';
 import { stateManager } from '../lib/state-manager';
 import { statusChecker } from '../lib/status-checker';
-import { formatUptime } from '../utils/format-utils';
+import { formatUptime, formatBytes } from '../utils/format-utils';
+import { getProcessMemory } from '../utils/process-utils';
 export async function psCommand(): Promise<void> {
   const servers = await stateManager.getAllServers();
@@ -18,7 +19,7 @@ export async function psCommand(): Promise<void> {
   const updated = await statusChecker.updateAllServerStatuses();
   const table = new Table({
-    head: ['SERVER ID', 'MODEL', 'PORT', 'STATUS', 'PID', 'UPTIME'],
+    head: ['SERVER ID', 'MODEL', 'PORT', 'STATUS', 'PID', 'MEMORY', 'UPTIME'],
   });
   let runningCount = 0;
@@ -51,12 +52,22 @@ export async function psCommand(): Promise<void> {
         ? formatUptime(server.lastStarted)
         : '-';
+    // Get memory usage for running servers
+    let memoryText = '-';
+    if (server.status === 'running' && server.pid) {
+      const memoryBytes = await getProcessMemory(server.pid);
+      if (memoryBytes !== null) {
+        memoryText = formatBytes(memoryBytes);
+      }
+    }
     table.push([
       server.id,
       server.modelName,
       server.port.toString(),
       statusColor(statusText),
       server.pid?.toString() || '-',
+      memoryText,
       uptime,
     ]);
   }

package/src/commands/pull.ts CHANGED Viewed

@@ -30,7 +30,7 @@ export async function pullCommand(identifier: string, options: PullOptions): Pro
     const modelPath = await modelDownloader.downloadModel(parsed.repo, filename);
     console.log();
-    console.log(chalk.dim(`Start server: llamacpp start ${filename}`));
+    console.log(chalk.dim(`Start server: llamacpp server start ${filename}`));
   } catch (error) {
     if ((error as Error).message.includes('interrupted')) {
       console.log(chalk.dim('\nDownload was interrupted. Run the same command again to retry.'));

package/src/commands/run.ts CHANGED Viewed

@@ -49,7 +49,7 @@ export async function runCommand(modelIdentifier: string): Promise<void> {
   // 2. Verify server is running
   const status = await statusChecker.checkServer(server);
   if (!status.isRunning) {
-    throw new Error(`Server exists but is not running. Start it with: llamacpp start ${server.id}`);
+    throw new Error(`Server exists but is not running. Start it with: llamacpp server start ${server.id}`);
   }
   // 3. Start REPL

package/src/commands/start.ts CHANGED Viewed

@@ -15,6 +15,8 @@ interface StartOptions {
   threads?: number;
   ctxSize?: number;
   gpuLayers?: number;
+  logVerbosity?: number;
+  logTimestamps?: boolean;
 }
 export async function startCommand(model: string, options: StartOptions): Promise<void> {
@@ -67,6 +69,8 @@ export async function startCommand(model: string, options: StartOptions): Promis
     threads: options.threads,
     ctxSize: options.ctxSize,
     gpuLayers: options.gpuLayers,
+    logVerbosity: options.logVerbosity,
+    logTimestamps: options.logTimestamps,
   };
   const config = await configGenerator.generateConfig(
@@ -84,6 +88,8 @@ export async function startCommand(model: string, options: StartOptions): Promis
   console.log(chalk.dim(`Threads: ${config.threads}`));
   console.log(chalk.dim(`Context Size: ${config.ctxSize}`));
   console.log(chalk.dim(`GPU Layers: ${config.gpuLayers}`));
+  console.log(chalk.dim(`Log Verbosity: ${config.logVerbosity !== undefined ? config.logVerbosity : 'all'}`));
+  console.log(chalk.dim(`Log Timestamps: ${config.logTimestamps ? 'enabled' : 'disabled'}`));
   console.log();
   // 7. Ensure log directory exists
@@ -121,7 +127,7 @@ export async function startCommand(model: string, options: StartOptions): Promis
     await launchctlManager.stopService(config.label);
     await launchctlManager.unloadService(config.plistPath);
     await launchctlManager.deletePlist(config.plistPath);
-    throw new Error('Server failed to start. Check logs with: llamacpp logs --errors');
+    throw new Error('Server failed to start. Check logs with: llamacpp server logs --errors');
   }
   // 12. Update config with running status
@@ -135,6 +141,6 @@ export async function startCommand(model: string, options: StartOptions): Promis
   console.log(chalk.green('✅ Server started successfully!'));
   console.log();
   console.log(chalk.dim(`Connect: http://localhost:${config.port}`));
-  console.log(chalk.dim(`View logs: llamacpp logs ${config.id}`));
-  console.log(chalk.dim(`Stop: llamacpp stop ${config.id}`));
+  console.log(chalk.dim(`View logs: llamacpp server logs ${config.id}`));
+  console.log(chalk.dim(`Stop: llamacpp server stop ${config.id}`));
 }

package/src/lib/config-generator.ts CHANGED Viewed

@@ -11,6 +11,8 @@ export interface ServerOptions {
   gpuLayers?: number;
   embeddings?: boolean;
   jinja?: boolean;
+  logVerbosity?: number;
+  logTimestamps?: boolean;
 }
 export interface SmartDefaults {
@@ -67,6 +69,8 @@ export class ConfigGenerator {
     const gpuLayers = options?.gpuLayers ?? smartDefaults.gpuLayers;
     const embeddings = options?.embeddings ?? true;
     const jinja = options?.jinja ?? true;
+    const logVerbosity = options?.logVerbosity;  // Default to undefined (log everything), filter at CLI level
+    const logTimestamps = options?.logTimestamps ?? true;  // Enable timestamps by default
     // Generate server ID
     const id = sanitizeModelName(modelName);
@@ -88,6 +92,8 @@ export class ConfigGenerator {
       gpuLayers,
       embeddings,
       jinja,
+      logVerbosity,
+      logTimestamps,
       status: 'stopped',
       createdAt: new Date().toISOString(),
       plistPath,
@@ -111,6 +117,8 @@ export class ConfigGenerator {
       gpuLayers: options?.gpuLayers ?? globalConfig.defaults.gpuLayers,
       embeddings: options?.embeddings ?? true,
       jinja: options?.jinja ?? true,
+      logVerbosity: options?.logVerbosity,  // undefined = log everything
+      logTimestamps: options?.logTimestamps ?? true,
     };
   }
 }

package/src/lib/launchctl-manager.ts CHANGED Viewed

@@ -16,6 +16,27 @@ export class LaunchctlManager {
    * Generate plist XML content for a server
    */
   generatePlist(config: ServerConfig): string {
+    // Build program arguments array
+    const args = [
+      '/opt/homebrew/bin/llama-server',
+      '--model', config.modelPath,
+      '--port', config.port.toString(),
+      '--threads', config.threads.toString(),
+      '--ctx-size', config.ctxSize.toString(),
+      '--gpu-layers', config.gpuLayers.toString(),
+    ];
+    // Add flags
+    if (config.embeddings) args.push('--embeddings');
+    if (config.jinja) args.push('--jinja');
+    if (config.logVerbosity !== undefined) {
+      args.push('--log-verbosity', config.logVerbosity.toString());
+    }
+    if (config.logTimestamps) args.push('--log-timestamps');
+    // Generate XML array elements
+    const argsXml = args.map(arg => `      <string>${arg}</string>`).join('\n');
     return `<?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
   "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
@@ -26,19 +47,7 @@ export class LaunchctlManager {
     <key>ProgramArguments</key>
     <array>
-      <string>/opt/homebrew/bin/llama-server</string>
-      <string>--model</string>
-      <string>${config.modelPath}</string>
-      <string>--port</string>
-      <string>${config.port}</string>
-      <string>--threads</string>
-      <string>${config.threads}</string>
-      <string>--ctx-size</string>
-      <string>${config.ctxSize}</string>
-      <string>--gpu-layers</string>
-      <string>${config.gpuLayers}</string>
-      <string>--embeddings</string>
-      <string>--jinja</string>
+${argsXml}
     </array>
     <key>RunAtLoad</key>

package/src/types/server-config.ts CHANGED Viewed

@@ -12,6 +12,8 @@ export interface ServerConfig {
   gpuLayers: number;
   embeddings: boolean;     // Always true
   jinja: boolean;          // Always true
+  logVerbosity?: number;   // Log verbosity level (0=errors only, 1=warnings, 2=info/HTTP, 9=debug, undefined=all)
+  logTimestamps: boolean;  // Add timestamps to log messages
   // State tracking
   status: ServerStatus;

package/src/utils/log-parser.ts ADDED Viewed

@@ -0,0 +1,184 @@
+/**
+ * Parse and consolidate verbose llama-server logs into compact single-line format
+ */
+interface CompactLogEntry {
+  timestamp: string;
+  method: string;
+  endpoint: string;
+  ip: string;
+  status: number;
+  userMessage: string;
+  tokensIn: number;
+  tokensOut: number;
+  responseTimeMs: number;
+}
+export class LogParser {
+  private buffer: string[] = [];
+  private isBuffering = false;
+  /**
+   * Process log lines and output compact format
+   */
+  processLine(line: string, callback: (compactLine: string) => void): void {
+    // Check if this is the start of an HTTP request log
+    if (line.includes('log_server_r: request: POST')) {
+      this.isBuffering = true;
+      this.buffer = [line];
+      return;
+    }
+    // If we're buffering, collect lines
+    if (this.isBuffering) {
+      this.buffer.push(line);
+      // Check if we have a complete request (found response line)
+      if (line.includes('log_server_r: response:')) {
+        const compactLine = this.consolidateRequest(this.buffer);
+        if (compactLine) {
+          callback(compactLine);
+        }
+        this.buffer = [];
+        this.isBuffering = false;
+      }
+    }
+  }
+  /**
+   * Consolidate buffered request/response lines into single line
+   */
+  private consolidateRequest(lines: string[]): string | null {
+    try {
+      // Parse first line: timestamp and request info
+      const firstLine = lines[0];
+      const timestamp = this.extractTimestamp(firstLine);
+      const requestMatch = firstLine.match(/request: (POST|GET|PUT|DELETE) (\/[^\s]+) ([^\s]+) (\d+)/);
+      if (!requestMatch) return null;
+      const [, method, endpoint, ip, status] = requestMatch;
+      // Parse request JSON (second line)
+      const requestLine = lines.find((l) => l.includes('log_server_r: request:') && l.includes('{'));
+      if (!requestLine) return null;
+      const requestJson = this.extractJson(requestLine);
+      if (!requestJson) return null;
+      const userMessage = this.extractUserMessage(requestJson);
+      // Parse response JSON (last line)
+      const responseLine = lines.find((l) => l.includes('log_server_r: response:'));
+      if (!responseLine) return null;
+      const responseJson = this.extractJson(responseLine);
+      if (!responseJson) return null;
+      const tokensIn = responseJson.usage?.prompt_tokens || 0;
+      const tokensOut = responseJson.usage?.completion_tokens || 0;
+      // Extract response time from verbose timings
+      const responseTimeMs = this.extractResponseTime(responseJson);
+      // Format compact line
+      return this.formatCompactLine({
+        timestamp,
+        method,
+        endpoint,
+        ip,
+        status: parseInt(status, 10),
+        userMessage,
+        tokensIn,
+        tokensOut,
+        responseTimeMs,
+      });
+    } catch (error) {
+      return null;
+    }
+  }
+  /**
+   * Extract timestamp from log line
+   */
+  private extractTimestamp(line: string): string {
+    // Look for timestamp format like [2025-12-09 10:13:45]
+    const match = line.match(/\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\]/);
+    if (match) {
+      return match[1];  // Return as-is: 2025-12-09 10:13:45
+    }
+    // If no timestamp in logs, use current time in same format
+    const now = new Date();
+    return now.toISOString().substring(0, 19).replace('T', ' ');  // 2025-12-09 10:13:45
+  }
+  /**
+   * Extract JSON from log line
+   */
+  private extractJson(line: string): any {
+    const jsonStart = line.indexOf('{');
+    if (jsonStart === -1) return null;
+    try {
+      const jsonStr = line.substring(jsonStart);
+      return JSON.parse(jsonStr);
+    } catch {
+      return null;
+    }
+  }
+  /**
+   * Extract first user message from request JSON
+   */
+  private extractUserMessage(requestJson: any): string {
+    const messages = requestJson.messages || [];
+    const userMsg = messages.find((m: any) => m.role === 'user');
+    if (!userMsg || !userMsg.content) return '';
+    // Truncate to first 50 characters
+    const content = userMsg.content.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
+    return content.length > 50 ? content.substring(0, 47) + '...' : content;
+  }
+  /**
+   * Extract response time from response JSON
+   */
+  private extractResponseTime(responseJson: any): number {
+    // Check __verbose.timings first (has total time)
+    const verboseTimings = responseJson.__verbose?.timings;
+    if (verboseTimings) {
+      const promptMs = verboseTimings.prompt_ms || 0;
+      const predictedMs = verboseTimings.predicted_ms || 0;
+      return Math.round(promptMs + predictedMs);
+    }
+    // Fallback to top-level timings
+    const timings = responseJson.timings;
+    if (timings) {
+      const promptMs = timings.prompt_ms || 0;
+      const predictedMs = timings.predicted_ms || 0;
+      return Math.round(promptMs + predictedMs);
+    }
+    return 0;
+  }
+  /**
+   * Format compact log line
+   */
+  private formatCompactLine(entry: CompactLogEntry): string {
+    return [
+      entry.timestamp,
+      entry.method,
+      entry.endpoint,
+      entry.ip,
+      entry.status,
+      `"${entry.userMessage}"`,
+      entry.tokensIn,
+      entry.tokensOut,
+      entry.responseTimeMs,
+    ].join(' ');
+  }
+}
+// Export singleton instance
+export const logParser = new LogParser();

package/src/utils/process-utils.ts CHANGED Viewed

@@ -58,3 +58,41 @@ export async function isPortInUse(port: number): Promise<boolean> {
     return false;
   }
 }
+/**
+ * Get memory usage for a process in bytes
+ * Uses 'top' on macOS which includes GPU/Metal memory (more accurate for llama-server)
+ * Returns null if process not found or error occurs
+ */
+export async function getProcessMemory(pid: number): Promise<number | null> {
+  try {
+    // Use top with -l 1 (one sample) to get memory stats
+    // MEM column shows resident memory including GPU memory on macOS
+    const output = await execCommand(`top -l 1 -pid ${pid} -stats mem`);
+    // Get the last non-empty line which contains the memory value
+    const lines = output.split('\n').filter((line) => line.trim().length > 0);
+    if (lines.length === 0) return null;
+    const memStr = lines[lines.length - 1].trim();
+    // Parse memory string (e.g., "10.5G", "512M", "1024K", "10G")
+    const match = memStr.match(/^([\d.]+)([KMGT])$/);
+    if (!match) return null;
+    const value = parseFloat(match[1]);
+    const unit = match[2];
+    // Convert to bytes
+    const multipliers: { [key: string]: number } = {
+      K: 1024,
+      M: 1024 * 1024,
+      G: 1024 * 1024 * 1024,
+      T: 1024 * 1024 * 1024 * 1024,
+    };
+    return Math.round(value * multipliers[unit]);
+  } catch {
+    return null;
+  }
+}