npm - @appkit/llamacpp-cli - Versions diffs - 1.4.1 → 1.6.0 - Mend

@appkit/llamacpp-cli 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

package/CHANGELOG.md +21 -0
package/MONITORING-ACCURACY-FIX.md +199 -0
package/PER-PROCESS-METRICS.md +190 -0
package/README.md +136 -1
package/dist/cli.js +21 -4
package/dist/cli.js.map +1 -1
package/dist/commands/create.d.ts.map +1 -1
package/dist/commands/create.js +12 -3
package/dist/commands/create.js.map +1 -1
package/dist/commands/monitor.d.ts +2 -0
package/dist/commands/monitor.d.ts.map +1 -0
package/dist/commands/monitor.js +126 -0
package/dist/commands/monitor.js.map +1 -0
package/dist/commands/ps.d.ts +3 -1
package/dist/commands/ps.d.ts.map +1 -1
package/dist/commands/ps.js +75 -5
package/dist/commands/ps.js.map +1 -1
package/dist/commands/server-show.d.ts.map +1 -1
package/dist/commands/server-show.js +10 -3
package/dist/commands/server-show.js.map +1 -1
package/dist/commands/start.d.ts.map +1 -1
package/dist/commands/start.js +14 -2
package/dist/commands/start.js.map +1 -1
package/dist/lib/history-manager.d.ts +46 -0
package/dist/lib/history-manager.d.ts.map +1 -0
package/dist/lib/history-manager.js +157 -0
package/dist/lib/history-manager.js.map +1 -0
package/dist/lib/metrics-aggregator.d.ts +40 -0
package/dist/lib/metrics-aggregator.d.ts.map +1 -0
package/dist/lib/metrics-aggregator.js +211 -0
package/dist/lib/metrics-aggregator.js.map +1 -0
package/dist/lib/system-collector.d.ts +80 -0
package/dist/lib/system-collector.d.ts.map +1 -0
package/dist/lib/system-collector.js +311 -0
package/dist/lib/system-collector.js.map +1 -0
package/dist/tui/HistoricalMonitorApp.d.ts +5 -0
package/dist/tui/HistoricalMonitorApp.d.ts.map +1 -0
package/dist/tui/HistoricalMonitorApp.js +490 -0
package/dist/tui/HistoricalMonitorApp.js.map +1 -0
package/dist/tui/MonitorApp.d.ts +4 -0
package/dist/tui/MonitorApp.d.ts.map +1 -0
package/dist/tui/MonitorApp.js +315 -0
package/dist/tui/MonitorApp.js.map +1 -0
package/dist/tui/MultiServerMonitorApp.d.ts +4 -0
package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -0
package/dist/tui/MultiServerMonitorApp.js +712 -0
package/dist/tui/MultiServerMonitorApp.js.map +1 -0
package/dist/types/history-types.d.ts +30 -0
package/dist/types/history-types.d.ts.map +1 -0
package/dist/types/history-types.js +11 -0
package/dist/types/history-types.js.map +1 -0
package/dist/types/monitor-types.d.ts +123 -0
package/dist/types/monitor-types.d.ts.map +1 -0
package/dist/types/monitor-types.js +3 -0
package/dist/types/monitor-types.js.map +1 -0
package/dist/types/server-config.d.ts +1 -0
package/dist/types/server-config.d.ts.map +1 -1
package/dist/types/server-config.js.map +1 -1
package/dist/utils/downsample-utils.d.ts +35 -0
package/dist/utils/downsample-utils.d.ts.map +1 -0
package/dist/utils/downsample-utils.js +107 -0
package/dist/utils/downsample-utils.js.map +1 -0
package/dist/utils/file-utils.d.ts +6 -0
package/dist/utils/file-utils.d.ts.map +1 -1
package/dist/utils/file-utils.js +38 -0
package/dist/utils/file-utils.js.map +1 -1
package/dist/utils/process-utils.d.ts +35 -2
package/dist/utils/process-utils.d.ts.map +1 -1
package/dist/utils/process-utils.js +220 -25
package/dist/utils/process-utils.js.map +1 -1
package/docs/images/.gitkeep +1 -0
package/package.json +5 -1
package/src/cli.ts +21 -4
package/src/commands/create.ts +14 -4
package/src/commands/monitor.ts +110 -0
package/src/commands/ps.ts +88 -5
package/src/commands/server-show.ts +10 -3
package/src/commands/start.ts +15 -2
package/src/lib/history-manager.ts +172 -0
package/src/lib/metrics-aggregator.ts +257 -0
package/src/lib/system-collector.ts +315 -0
package/src/tui/HistoricalMonitorApp.ts +548 -0
package/src/tui/MonitorApp.ts +386 -0
package/src/tui/MultiServerMonitorApp.ts +792 -0
package/src/types/history-types.ts +39 -0
package/src/types/monitor-types.ts +162 -0
package/src/types/server-config.ts +1 -0
package/src/utils/downsample-utils.ts +128 -0
package/src/utils/file-utils.ts +40 -0
package/src/utils/process-utils.ts +243 -25
package/test-load.sh +100 -0

package/src/commands/start.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import chalk from 'chalk';
 import { stateManager } from '../lib/state-manager';
 import { launchctlManager } from '../lib/launchctl-manager';
 import { statusChecker } from '../lib/status-checker';
+import { parseMetalMemoryFromLog } from '../utils/file-utils';
 export async function startCommand(identifier: string): Promise<void> {
   // Initialize state manager
@@ -61,9 +62,21 @@ export async function startCommand(identifier: string): Promise<void> {
   }
   // 7. Update server status
-  await statusChecker.updateServerStatus(server);
+  let updatedServer = await statusChecker.updateServerStatus(server);
-  // 8. Display success
+  // 8. Parse Metal (GPU) memory allocation if not already captured
+  if (!updatedServer.metalMemoryMB) {
+    console.log(chalk.dim('Detecting Metal (GPU) memory allocation...'));
+    await new Promise(resolve => setTimeout(resolve, 8000)); // 8 second delay
+    const metalMemoryMB = await parseMetalMemoryFromLog(updatedServer.stderrPath);
+    if (metalMemoryMB) {
+      updatedServer = { ...updatedServer, metalMemoryMB };
+      await stateManager.saveServerConfig(updatedServer);
+      console.log(chalk.dim(`Metal memory: ${metalMemoryMB.toFixed(0)} MB`));
+    }
+  }
+  // 9. Display success
   console.log();
   console.log(chalk.green('✅ Server started successfully!'));
   console.log();

package/src/lib/history-manager.ts ADDED Viewed

@@ -0,0 +1,172 @@
+import { mkdir, readFile, writeFile, access, rename } from 'fs/promises';
+import { join } from 'path';
+import { homedir } from 'os';
+import { ServerMetrics, SystemMetrics } from '../types/monitor-types.js';
+import { HistoryData, HistorySnapshot, TIME_WINDOW_HOURS, TimeWindow } from '../types/history-types.js';
+export class HistoryManager {
+  private serverId: string;
+  private historyDir: string;
+  private historyPath: string;
+  private readonly MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24 hours
+  constructor(serverId: string) {
+    this.serverId = serverId;
+    this.historyDir = join(homedir(), '.llamacpp', 'history');
+    this.historyPath = join(this.historyDir, `${serverId}.json`);
+  }
+  /**
+   * Append a new snapshot to history (with auto-pruning)
+   */
+  async appendSnapshot(serverMetrics: ServerMetrics, systemMetrics?: SystemMetrics): Promise<void> {
+    try {
+      // Ensure history directory exists
+      await mkdir(this.historyDir, { recursive: true });
+      // Load existing history
+      const historyData = await this.loadHistoryData();
+      // Create new snapshot
+      const snapshot: HistorySnapshot = {
+        timestamp: Date.now(),
+        server: {
+          healthy: serverMetrics.healthy,
+          uptime: serverMetrics.uptime,
+          activeSlots: serverMetrics.activeSlots,
+          idleSlots: serverMetrics.idleSlots,
+          totalSlots: serverMetrics.totalSlots,
+          avgPromptSpeed: serverMetrics.avgPromptSpeed,
+          avgGenerateSpeed: serverMetrics.avgGenerateSpeed,
+          processMemory: serverMetrics.processMemory,
+          processCpuUsage: serverMetrics.processCpuUsage,
+        },
+        system: systemMetrics ? {
+          gpuUsage: systemMetrics.gpuUsage,
+          cpuUsage: systemMetrics.cpuUsage,
+          aneUsage: systemMetrics.aneUsage,
+          temperature: systemMetrics.temperature,
+          memoryUsed: systemMetrics.memoryUsed,
+          memoryTotal: systemMetrics.memoryTotal,
+        } : undefined,
+      };
+      // Append new snapshot
+      historyData.snapshots.push(snapshot);
+      // Prune old snapshots (keep only last 24h)
+      historyData.snapshots = this.pruneOldSnapshots(historyData.snapshots, this.MAX_AGE_MS);
+      // Atomic write: write to temp file in same directory, then rename
+      // This prevents read collisions during concurrent access
+      // IMPORTANT: temp file MUST be in same directory as destination for rename to work across filesystems
+      const tempPath = join(this.historyDir, `.${this.serverId}-${Date.now()}.tmp`);
+      await writeFile(tempPath, JSON.stringify(historyData, null, 2), 'utf-8');
+      await rename(tempPath, this.historyPath);
+    } catch (error) {
+      // Silent failure - don't interrupt monitoring
+      // Don't throw - just return silently to avoid polluting console
+      return;
+    }
+  }
+  /**
+   * Load all snapshots within specified time window
+   */
+  async loadHistory(windowHours: number): Promise<HistorySnapshot[]> {
+    // Retry logic for file I/O collisions during concurrent read/write
+    const maxRetries = 3;
+    let lastError: Error | null = null;
+    for (let attempt = 0; attempt < maxRetries; attempt++) {
+      try {
+        const historyData = await this.loadHistoryData();
+        return this.filterByTimeWindow(historyData.snapshots, windowHours);
+      } catch (error) {
+        lastError = error as Error;
+        // Wait briefly before retry (exponential backoff)
+        if (attempt < maxRetries - 1) {
+          await new Promise(resolve => setTimeout(resolve, 50 * Math.pow(2, attempt)));
+        }
+      }
+    }
+    // All retries failed - throw error so it can be handled upstream
+    throw new Error(`Failed to load history after ${maxRetries} attempts: ${lastError?.message || 'Unknown error'}`);
+  }
+  /**
+   * Load history for specific time window type
+   */
+  async loadHistoryByWindow(window: TimeWindow): Promise<HistorySnapshot[]> {
+    return this.loadHistory(TIME_WINDOW_HOURS[window]);
+  }
+  /**
+   * Get file path for server history
+   */
+  getHistoryPath(): string {
+    return this.historyPath;
+  }
+  /**
+   * Check if history file exists
+   */
+  async hasHistory(): Promise<boolean> {
+    try {
+      await access(this.historyPath);
+      return true;
+    } catch {
+      return false;
+    }
+  }
+  /**
+   * Clear all history for server
+   */
+  async clearHistory(): Promise<void> {
+    const emptyHistory: HistoryData = {
+      serverId: this.serverId,
+      snapshots: [],
+    };
+    await mkdir(this.historyDir, { recursive: true });
+    // Atomic write - temp file in same directory as destination
+    const tempPath = join(this.historyDir, `.${this.serverId}-${Date.now()}.tmp`);
+    await writeFile(tempPath, JSON.stringify(emptyHistory, null, 2), 'utf-8');
+    await rename(tempPath, this.historyPath);
+  }
+  /**
+   * Load full history data from file
+   */
+  private async loadHistoryData(): Promise<HistoryData> {
+    try {
+      const content = await readFile(this.historyPath, 'utf-8');
+      return JSON.parse(content) as HistoryData;
+    } catch (error) {
+      // File doesn't exist or is corrupted, return empty history
+      return {
+        serverId: this.serverId,
+        snapshots: [],
+      };
+    }
+  }
+  /**
+   * Prune snapshots older than maxAge
+   */
+  private pruneOldSnapshots(snapshots: HistorySnapshot[], maxAgeMs: number): HistorySnapshot[] {
+    const cutoff = Date.now() - maxAgeMs;
+    return snapshots.filter(s => s.timestamp >= cutoff);
+  }
+  /**
+   * Filter snapshots by time window
+   */
+  private filterByTimeWindow(snapshots: HistorySnapshot[], windowHours: number): HistorySnapshot[] {
+    const cutoff = Date.now() - (windowHours * 60 * 60 * 1000);
+    return snapshots.filter(s => s.timestamp >= cutoff);
+  }
+}

package/src/lib/metrics-aggregator.ts ADDED Viewed

@@ -0,0 +1,257 @@
+import { ServerConfig } from '../types/server-config.js';
+import { ServerMetrics, SlotInfo, MonitorData } from '../types/monitor-types.js';
+import { statusChecker } from './status-checker.js';
+import { systemCollector } from './system-collector.js';
+import { getProcessMemory, getProcessCpu } from '../utils/process-utils.js';
+/**
+ * Aggregates metrics from llama.cpp server API endpoints
+ * Combines server health, slot status, and model properties
+ */
+export class MetricsAggregator {
+  private serverUrl: string;
+  private timeout: number;
+  private previousSlots: Map<number, { n_decoded: number; timestamp: number }> = new Map();
+  constructor(server: ServerConfig, timeout: number = 5000) {
+    // Handle null host (legacy configs) by defaulting to 127.0.0.1
+    const host = server.host || '127.0.0.1';
+    this.serverUrl = `http://${host}:${server.port}`;
+    this.timeout = timeout;
+  }
+  /**
+   * Fetch data from llama.cpp API with timeout
+   */
+  private async fetchWithTimeout(
+    endpoint: string,
+    customTimeout?: number
+  ): Promise<any | null> {
+    try {
+      const controller = new AbortController();
+      const timeoutMs = customTimeout ?? this.timeout;
+      const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
+      const response = await fetch(`${this.serverUrl}${endpoint}`, {
+        signal: controller.signal,
+      });
+      clearTimeout(timeoutId);
+      if (!response.ok) {
+        return null;
+      }
+      return await response.json();
+    } catch (err) {
+      // Network error, timeout, or parse error
+      return null;
+    }
+  }
+  /**
+   * Get server health status
+   */
+  private async getHealth(): Promise<boolean> {
+    const health = await this.fetchWithTimeout('/health');
+    return health !== null && health.status === 'ok';
+  }
+  /**
+   * Get server properties (model info, context size, etc.)
+   */
+  private async getProps(): Promise<any> {
+    return await this.fetchWithTimeout('/props');
+  }
+  /**
+   * Get active slots information with calculated tok/s
+   */
+  private async getSlots(): Promise<SlotInfo[]> {
+    const data = await this.fetchWithTimeout('/slots');
+    if (!data || !Array.isArray(data)) {
+      return [];
+    }
+    const now = Date.now();
+    return data.map((slot: any) => {
+      const slotId = slot.id;
+      const n_decoded = slot.next_token?.[0]?.n_decoded || 0;
+      const isProcessing = slot.is_processing;
+      // Calculate tokens per second by comparing with previous poll
+      let predicted_per_second: number | undefined;
+      if (isProcessing && n_decoded > 0) {
+        const previous = this.previousSlots.get(slotId);
+        if (previous && previous.n_decoded < n_decoded) {
+          const tokensGenerated = n_decoded - previous.n_decoded;
+          const timeElapsed = (now - previous.timestamp) / 1000; // Convert to seconds
+          if (timeElapsed > 0) {
+            predicted_per_second = tokensGenerated / timeElapsed;
+          }
+        }
+        // Store current state for next comparison
+        this.previousSlots.set(slotId, { n_decoded, timestamp: now });
+      } else if (!isProcessing) {
+        // Clear history when slot becomes idle
+        this.previousSlots.delete(slotId);
+      }
+      return {
+        id: slotId,
+        state: isProcessing ? 'processing' : 'idle',
+        n_prompt_tokens: slot.n_prompt_tokens,
+        n_decoded,
+        n_ctx: slot.n_ctx || 0,
+        timings: predicted_per_second
+          ? {
+              prompt_n: 0,
+              prompt_ms: 0,
+              prompt_per_token_ms: 0,
+              prompt_per_second: 0,
+              predicted_n: n_decoded,
+              predicted_ms: 0,
+              predicted_per_token_ms: 0,
+              predicted_per_second,
+            }
+          : undefined,
+      };
+    });
+  }
+  /**
+   * Aggregate all server metrics
+   * @param server - Server configuration
+   * @param processMemory - Optional pre-fetched process memory (for batch collection)
+   * @param processCpuUsage - Optional pre-fetched process CPU usage (for batch collection)
+   */
+  async collectServerMetrics(
+    server: ServerConfig,
+    processMemory?: number | null,
+    processCpuUsage?: number | null
+  ): Promise<ServerMetrics> {
+    const now = Date.now();
+    // Check basic server status first
+    const status = await statusChecker.checkServer(server);
+    // Calculate uptime if server is running and has lastStarted
+    let uptime: string | undefined;
+    if (status.isRunning && server.lastStarted) {
+      const startTime = new Date(server.lastStarted).getTime();
+      const uptimeSeconds = Math.floor((now - startTime) / 1000);
+      const hours = Math.floor(uptimeSeconds / 3600);
+      const minutes = Math.floor((uptimeSeconds % 3600) / 60);
+      const seconds = uptimeSeconds % 60;
+      uptime = `${hours}h ${minutes}m ${seconds}s`;
+    }
+    // If server not running, return minimal data
+    if (!status.isRunning) {
+      return {
+        server,
+        healthy: false,
+        modelLoaded: false,
+        modelName: server.modelName,
+        contextSize: server.ctxSize,
+        totalSlots: 0,
+        activeSlots: 0,
+        idleSlots: 0,
+        slots: [],
+        timestamp: now,
+        stale: false,
+      };
+    }
+    // Fetch detailed metrics in parallel
+    // If processMemory/CPU were pre-fetched (batch mode), use them; otherwise fetch individually
+    const [healthy, props, slots, fetchedMemory, fetchedCpu] = await Promise.all([
+      this.getHealth(),
+      this.getProps(),
+      this.getSlots(),
+      processMemory !== undefined
+        ? Promise.resolve(processMemory)
+        : (server.pid ? getProcessMemory(server.pid) : Promise.resolve(null)),
+      processCpuUsage !== undefined
+        ? Promise.resolve(processCpuUsage)
+        : (server.pid ? getProcessCpu(server.pid) : Promise.resolve(null)),
+    ]);
+    // Calculate slot statistics
+    const activeSlots = slots.filter((s) => s.state === 'processing').length;
+    const idleSlots = slots.filter((s) => s.state === 'idle').length;
+    const totalSlots = props?.total_slots || slots.length;
+    // Calculate average speeds (only from processing slots)
+    const processingSlots = slots.filter((s) => s.state === 'processing' && s.timings);
+    const avgPromptSpeed =
+      processingSlots.length > 0
+        ? processingSlots.reduce(
+            (sum, s) => sum + (s.timings?.prompt_per_second || 0),
+            0
+          ) / processingSlots.length
+        : undefined;
+    const avgGenerateSpeed =
+      processingSlots.length > 0
+        ? processingSlots.reduce(
+            (sum, s) => sum + (s.timings?.predicted_per_second || 0),
+            0
+          ) / processingSlots.length
+        : undefined;
+    // Calculate total memory (CPU + Metal GPU memory if available)
+    let totalMemory = fetchedMemory ?? undefined;
+    if (totalMemory !== undefined && server.metalMemoryMB) {
+      // Add Metal memory (convert MB to bytes)
+      totalMemory += server.metalMemoryMB * 1024 * 1024;
+    }
+    return {
+      server,
+      healthy,
+      uptime,
+      modelLoaded: props !== null,
+      modelName: server.modelName,
+      contextSize: props?.default_generation_settings?.n_ctx || server.ctxSize,
+      totalSlots,
+      activeSlots,
+      idleSlots,
+      slots,
+      avgPromptSpeed,
+      avgGenerateSpeed,
+      processMemory: totalMemory,
+      processCpuUsage: fetchedCpu ?? undefined,
+      timestamp: now,
+      stale: false,
+    };
+  }
+  /**
+   * Collect complete monitoring data (server + system metrics)
+   */
+  async collectMonitorData(
+    server: ServerConfig,
+    updateInterval: number = 2000
+  ): Promise<MonitorData> {
+    // Collect server and system metrics in parallel
+    const [serverMetrics, systemMetrics] = await Promise.all([
+      this.collectServerMetrics(server),
+      systemCollector.collectSystemMetrics(),
+    ]);
+    return {
+      server: serverMetrics,
+      system: systemMetrics,
+      lastUpdated: new Date(),
+      updateInterval,
+      consecutiveFailures: 0,
+    };
+  }
+}