npm - @appkit/llamacpp-cli - Versions diffs - 1.4.1 → 1.5.0 - Mend

@appkit/llamacpp-cli 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/CHANGELOG.md +8 -0
package/README.md +87 -1
package/dist/cli.js +14 -0
package/dist/cli.js.map +1 -1
package/dist/commands/monitor.d.ts +2 -0
package/dist/commands/monitor.d.ts.map +1 -0
package/dist/commands/monitor.js +76 -0
package/dist/commands/monitor.js.map +1 -0
package/dist/lib/metrics-aggregator.d.ts +39 -0
package/dist/lib/metrics-aggregator.d.ts.map +1 -0
package/dist/lib/metrics-aggregator.js +200 -0
package/dist/lib/metrics-aggregator.js.map +1 -0
package/dist/lib/system-collector.d.ts +75 -0
package/dist/lib/system-collector.d.ts.map +1 -0
package/dist/lib/system-collector.js +310 -0
package/dist/lib/system-collector.js.map +1 -0
package/dist/tui/MonitorApp.d.ts +4 -0
package/dist/tui/MonitorApp.d.ts.map +1 -0
package/dist/tui/MonitorApp.js +293 -0
package/dist/tui/MonitorApp.js.map +1 -0
package/dist/tui/MultiServerMonitorApp.d.ts +4 -0
package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -0
package/dist/tui/MultiServerMonitorApp.js +496 -0
package/dist/tui/MultiServerMonitorApp.js.map +1 -0
package/dist/tui/components/ErrorState.d.ts +8 -0
package/dist/tui/components/ErrorState.d.ts.map +1 -0
package/dist/tui/components/ErrorState.js +22 -0
package/dist/tui/components/ErrorState.js.map +1 -0
package/dist/tui/components/LoadingState.d.ts +8 -0
package/dist/tui/components/LoadingState.d.ts.map +1 -0
package/dist/tui/components/LoadingState.js +21 -0
package/dist/tui/components/LoadingState.js.map +1 -0
package/dist/types/monitor-types.d.ts +122 -0
package/dist/types/monitor-types.d.ts.map +1 -0
package/dist/types/monitor-types.js +3 -0
package/dist/types/monitor-types.js.map +1 -0
package/dist/utils/process-utils.d.ts +16 -1
package/dist/utils/process-utils.d.ts.map +1 -1
package/dist/utils/process-utils.js +144 -27
package/dist/utils/process-utils.js.map +1 -1
package/package.json +3 -1
package/src/cli.ts +14 -0
package/src/commands/monitor.ts +90 -0
package/src/lib/metrics-aggregator.ts +244 -0
package/src/lib/system-collector.ts +312 -0
package/src/tui/MonitorApp.ts +361 -0
package/src/tui/MultiServerMonitorApp.ts +547 -0
package/src/types/monitor-types.ts +161 -0
package/src/utils/process-utils.ts +160 -26

package/src/utils/process-utils.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { exec } from 'child_process';
+import { exec, spawn } from 'child_process';
 import { promisify } from 'util';
 export const execAsync = promisify(exec);
@@ -60,39 +60,173 @@ export async function isPortInUse(port: number): Promise<boolean> {
 }
 /**
- * Get memory usage for a process in bytes
- * Uses 'top' on macOS which includes GPU/Metal memory (more accurate for llama-server)
- * Returns null if process not found or error occurs
+ * Spawn a streaming command, read one line, and kill it
+ * Useful for commands like 'macmon pipe' that stream indefinitely
+ * Ensures the process is killed to prevent leaks
  */
-export async function getProcessMemory(pid: number): Promise<number | null> {
+export async function spawnAndReadOneLine(
+  command: string,
+  args: string[],
+  timeoutMs: number = 2000
+): Promise<string | null> {
+  return new Promise((resolve) => {
+    const child = spawn(command, args, {
+      stdio: ['ignore', 'pipe', 'ignore'],
+      detached: false, // Keep in same process group for easier cleanup
+    });
+    let resolved = false;
+    let output = '';
+    const cleanup = () => {
+      try {
+        // Try SIGKILL immediately (SIGTERM may not work for macmon)
+        child.kill('SIGKILL');
+      } catch {
+        // Process might already be dead
+      }
+    };
+    // Set timeout to kill process if it doesn't produce output
+    const timeout = setTimeout(() => {
+      if (!resolved) {
+        resolved = true;
+        cleanup();
+        resolve(null);
+      }
+    }, timeoutMs);
+    // Read stdout line by line
+    child.stdout?.on('data', (data) => {
+      if (resolved) return;
+      output += data.toString();
+      // Check if we have a complete line
+      const newlineIndex = output.indexOf('\n');
+      if (newlineIndex !== -1) {
+        const line = output.substring(0, newlineIndex).trim();
+        if (line.length > 0) {
+          resolved = true;
+          clearTimeout(timeout);
+          cleanup();
+          resolve(line);
+        }
+      }
+    });
+    // Handle process errors
+    child.on('error', () => {
+      if (!resolved) {
+        resolved = true;
+        clearTimeout(timeout);
+        resolve(null);
+      }
+    });
+    // Handle process exit
+    child.on('exit', () => {
+      if (!resolved) {
+        resolved = true;
+        clearTimeout(timeout);
+        // Return partial output if we have any
+        const line = output.trim();
+        resolve(line.length > 0 ? line : null);
+      }
+    });
+  });
+}
+// Process memory cache to prevent spawning too many 'top' processes
+// Cache per PID with 3-second TTL
+const processMemoryCache = new Map<number, { value: number | null; timestamp: number }>();
+const PROCESS_MEMORY_CACHE_TTL = 3000; // 3 seconds
+/**
+ * Batch get memory usage for multiple processes in one top call
+ * Much more efficient than calling getProcessMemory() multiple times
+ * Returns Map<pid, bytes> for all requested PIDs
+ */
+export async function getBatchProcessMemory(pids: number[]): Promise<Map<number, number | null>> {
+  const result = new Map<number, number | null>();
+  const now = Date.now();
+  // Check cache and collect PIDs that need fetching
+  const pidsToFetch: number[] = [];
+  for (const pid of pids) {
+    const cached = processMemoryCache.get(pid);
+    if (cached && (now - cached.timestamp) < PROCESS_MEMORY_CACHE_TTL) {
+      result.set(pid, cached.value);
+    } else {
+      pidsToFetch.push(pid);
+    }
+  }
+  // If all PIDs were cached, return early
+  if (pidsToFetch.length === 0) {
+    return result;
+  }
   try {
-    // Use top with -l 1 (one sample) to get memory stats
-    // MEM column shows resident memory including GPU memory on macOS
-    const output = await execCommand(`top -l 1 -pid ${pid} -stats mem`);
+    // Build top command with all PIDs: top -l 1 -pid X -pid Y -pid Z -stats pid,mem
+    const pidArgs = pidsToFetch.map(pid => `-pid ${pid}`).join(' ');
+    const output = await execCommand(`top -l 1 ${pidArgs} -stats pid,mem 2>/dev/null`);
-    // Get the last non-empty line which contains the memory value
-    const lines = output.split('\n').filter((line) => line.trim().length > 0);
-    if (lines.length === 0) return null;
+    // Parse output: each line is "PID  MEM" (e.g., "1438  299M")
+    const lines = output.split('\n');
+    for (const line of lines) {
+      const match = line.trim().match(/^(\d+)\s+([\d.]+)([KMGT])\s*$/);
+      if (!match) continue;
-    const memStr = lines[lines.length - 1].trim();
+      const pid = parseInt(match[1], 10);
+      const value = parseFloat(match[2]);
+      const unit = match[3];
-    // Parse memory string (e.g., "10.5G", "512M", "1024K", "10G")
-    const match = memStr.match(/^([\d.]+)([KMGT])$/);
-    if (!match) return null;
+      // Convert to bytes
+      const multipliers: { [key: string]: number } = {
+        K: 1024,
+        M: 1024 * 1024,
+        G: 1024 * 1024 * 1024,
+        T: 1024 * 1024 * 1024 * 1024,
+      };
-    const value = parseFloat(match[1]);
-    const unit = match[2];
+      const bytes = Math.round(value * multipliers[unit]);
-    // Convert to bytes
-    const multipliers: { [key: string]: number } = {
-      K: 1024,
-      M: 1024 * 1024,
-      G: 1024 * 1024 * 1024,
-      T: 1024 * 1024 * 1024 * 1024,
-    };
+      // Cache and store result
+      processMemoryCache.set(pid, { value: bytes, timestamp: now });
+      result.set(pid, bytes);
+    }
-    return Math.round(value * multipliers[unit]);
+    // For any PIDs that weren't in the output, cache null
+    for (const pid of pidsToFetch) {
+      if (!result.has(pid)) {
+        processMemoryCache.set(pid, { value: null, timestamp: now });
+        result.set(pid, null);
+      }
+    }
+    return result;
   } catch {
-    return null;
+    // On error, cache null for all requested PIDs
+    for (const pid of pidsToFetch) {
+      processMemoryCache.set(pid, { value: null, timestamp: now });
+      result.set(pid, null);
+    }
+    return result;
   }
 }
+/**
+ * Get memory usage for a single process in bytes
+ * Uses 'top' on macOS which includes GPU/Metal memory (more accurate for llama-server)
+ * Returns null if process not found or error occurs
+ * Caches results for 3 seconds to prevent spawning too many top processes
+ *
+ * Note: For multiple PIDs, use getBatchProcessMemory() instead - much more efficient
+ */
+export async function getProcessMemory(pid: number): Promise<number | null> {
+  const result = await getBatchProcessMemory([pid]);
+  return result.get(pid) ?? null;
+}