npm - @appkit/llamacpp-cli - Versions diffs - 1.12.0 → 1.12.1 - Mend

@appkit/llamacpp-cli 1.12.0 → 1.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

package/README.md +217 -168
package/package.json +10 -2
package/web/dist/assets/index-Bin89Lwr.css +1 -0
package/web/dist/assets/index-CVmonw3T.js +17 -0
package/web/{index.html → dist/index.html} +2 -1
package/.versionrc.json +0 -16
package/CHANGELOG.md +0 -213
package/docs/images/.gitkeep +0 -1
package/docs/images/web-ui-servers.png +0 -0
package/src/cli.ts +0 -523
package/src/commands/admin/config.ts +0 -121
package/src/commands/admin/logs.ts +0 -91
package/src/commands/admin/restart.ts +0 -26
package/src/commands/admin/start.ts +0 -27
package/src/commands/admin/status.ts +0 -84
package/src/commands/admin/stop.ts +0 -16
package/src/commands/config-global.ts +0 -38
package/src/commands/config.ts +0 -323
package/src/commands/create.ts +0 -183
package/src/commands/delete.ts +0 -74
package/src/commands/list.ts +0 -37
package/src/commands/logs-all.ts +0 -251
package/src/commands/logs.ts +0 -345
package/src/commands/monitor.ts +0 -110
package/src/commands/ps.ts +0 -84
package/src/commands/pull.ts +0 -44
package/src/commands/rm.ts +0 -107
package/src/commands/router/config.ts +0 -116
package/src/commands/router/logs.ts +0 -256
package/src/commands/router/restart.ts +0 -36
package/src/commands/router/start.ts +0 -60
package/src/commands/router/status.ts +0 -119
package/src/commands/router/stop.ts +0 -33
package/src/commands/run.ts +0 -233
package/src/commands/search.ts +0 -107
package/src/commands/server-show.ts +0 -161
package/src/commands/show.ts +0 -207
package/src/commands/start.ts +0 -101
package/src/commands/stop.ts +0 -39
package/src/commands/tui.ts +0 -25
package/src/lib/admin-manager.ts +0 -435
package/src/lib/admin-server.ts +0 -1243
package/src/lib/config-generator.ts +0 -130
package/src/lib/download-job-manager.ts +0 -213
package/src/lib/history-manager.ts +0 -172
package/src/lib/launchctl-manager.ts +0 -225
package/src/lib/metrics-aggregator.ts +0 -257
package/src/lib/model-downloader.ts +0 -328
package/src/lib/model-scanner.ts +0 -157
package/src/lib/model-search.ts +0 -114
package/src/lib/models-dir-setup.ts +0 -46
package/src/lib/port-manager.ts +0 -80
package/src/lib/router-logger.ts +0 -201
package/src/lib/router-manager.ts +0 -414
package/src/lib/router-server.ts +0 -538
package/src/lib/state-manager.ts +0 -206
package/src/lib/status-checker.ts +0 -113
package/src/lib/system-collector.ts +0 -315
package/src/tui/ConfigApp.ts +0 -1085
package/src/tui/HistoricalMonitorApp.ts +0 -587
package/src/tui/ModelsApp.ts +0 -368
package/src/tui/MonitorApp.ts +0 -386
package/src/tui/MultiServerMonitorApp.ts +0 -1833
package/src/tui/RootNavigator.ts +0 -74
package/src/tui/SearchApp.ts +0 -511
package/src/tui/SplashScreen.ts +0 -149
package/src/types/admin-config.ts +0 -25
package/src/types/global-config.ts +0 -26
package/src/types/history-types.ts +0 -39
package/src/types/model-info.ts +0 -8
package/src/types/monitor-types.ts +0 -162
package/src/types/router-config.ts +0 -25
package/src/types/server-config.ts +0 -46
package/src/utils/downsample-utils.ts +0 -128
package/src/utils/file-utils.ts +0 -146
package/src/utils/format-utils.ts +0 -98
package/src/utils/log-parser.ts +0 -284
package/src/utils/log-utils.ts +0 -178
package/src/utils/process-utils.ts +0 -316
package/src/utils/prompt-utils.ts +0 -47
package/test-load.sh +0 -100
package/tsconfig.json +0 -20
package/web/eslint.config.js +0 -23
package/web/llamacpp-web-dist.tar.gz +0 -0
package/web/package-lock.json +0 -4017
package/web/package.json +0 -38
package/web/postcss.config.js +0 -6
package/web/src/App.css +0 -42
package/web/src/App.tsx +0 -86
package/web/src/assets/react.svg +0 -1
package/web/src/components/ApiKeyPrompt.tsx +0 -71
package/web/src/components/CreateServerModal.tsx +0 -372
package/web/src/components/DownloadProgress.tsx +0 -123
package/web/src/components/Nav.tsx +0 -89
package/web/src/components/RouterConfigModal.tsx +0 -240
package/web/src/components/SearchModal.tsx +0 -306
package/web/src/components/ServerConfigModal.tsx +0 -291
package/web/src/hooks/useApi.ts +0 -259
package/web/src/index.css +0 -42
package/web/src/lib/api.ts +0 -226
package/web/src/main.tsx +0 -10
package/web/src/pages/Dashboard.tsx +0 -103
package/web/src/pages/Models.tsx +0 -258
package/web/src/pages/Router.tsx +0 -270
package/web/src/pages/RouterLogs.tsx +0 -201
package/web/src/pages/ServerLogs.tsx +0 -553
package/web/src/pages/Servers.tsx +0 -358
package/web/src/types/api.ts +0 -140
package/web/tailwind.config.js +0 -31
package/web/tsconfig.app.json +0 -28
package/web/tsconfig.json +0 -7
package/web/tsconfig.node.json +0 -26
package/web/vite.config.ts +0 -25
/package/web/{public → dist}/vite.svg +0 -0

package/src/utils/process-utils.ts DELETED Viewed

@@ -1,316 +0,0 @@
-import { exec, spawn } from 'child_process';
-import { promisify } from 'util';
-export const execAsync = promisify(exec);
-/**
- * Execute a command and return stdout
- * Throws on non-zero exit code
- */
-export async function execCommand(command: string): Promise<string> {
-  const { stdout } = await execAsync(command);
-  return stdout.trim();
-}
-/**
- * Execute a command and return both stdout and stderr
- */
-export async function execCommandFull(command: string): Promise<{ stdout: string; stderr: string }> {
-  const { stdout, stderr } = await execAsync(command);
-  return {
-    stdout: stdout.trim(),
-    stderr: stderr.trim(),
-  };
-}
-/**
- * Check if a command exists in PATH
- */
-export async function commandExists(command: string): Promise<boolean> {
-  try {
-    await execAsync(`which ${command}`);
-    return true;
-  } catch {
-    return false;
-  }
-}
-/**
- * Check if a process is running by PID
- */
-export async function isProcessRunning(pid: number): Promise<boolean> {
-  try {
-    await execAsync(`ps -p ${pid}`);
-    return true;
-  } catch {
-    return false;
-  }
-}
-/**
- * Check if a port is in use
- */
-export async function isPortInUse(port: number): Promise<boolean> {
-  try {
-    await execAsync(`lsof -iTCP:${port} -sTCP:LISTEN -t`);
-    return true;
-  } catch {
-    return false;
-  }
-}
-/**
- * Spawn a streaming command, read one line, and kill it
- * Useful for commands like 'macmon pipe' that stream indefinitely
- * Ensures the process is killed to prevent leaks
- */
-export async function spawnAndReadOneLine(
-  command: string,
-  args: string[],
-  timeoutMs: number = 2000
-): Promise<string | null> {
-  return new Promise((resolve) => {
-    const child = spawn(command, args, {
-      stdio: ['ignore', 'pipe', 'ignore'],
-      detached: false, // Keep in same process group for easier cleanup
-    });
-    let resolved = false;
-    let output = '';
-    const cleanup = () => {
-      try {
-        // Try SIGKILL immediately (SIGTERM may not work for macmon)
-        child.kill('SIGKILL');
-      } catch {
-        // Process might already be dead
-      }
-    };
-    // Set timeout to kill process if it doesn't produce output
-    const timeout = setTimeout(() => {
-      if (!resolved) {
-        resolved = true;
-        cleanup();
-        resolve(null);
-      }
-    }, timeoutMs);
-    // Read stdout line by line
-    child.stdout?.on('data', (data) => {
-      if (resolved) return;
-      output += data.toString();
-      // Check if we have a complete line
-      const newlineIndex = output.indexOf('\n');
-      if (newlineIndex !== -1) {
-        const line = output.substring(0, newlineIndex).trim();
-        if (line.length > 0) {
-          resolved = true;
-          clearTimeout(timeout);
-          cleanup();
-          resolve(line);
-        }
-      }
-    });
-    // Handle process errors
-    child.on('error', () => {
-      if (!resolved) {
-        resolved = true;
-        clearTimeout(timeout);
-        resolve(null);
-      }
-    });
-    // Handle process exit
-    child.on('exit', () => {
-      if (!resolved) {
-        resolved = true;
-        clearTimeout(timeout);
-        // Return partial output if we have any
-        const line = output.trim();
-        resolve(line.length > 0 ? line : null);
-      }
-    });
-  });
-}
-// Process memory cache to prevent spawning too many 'top' processes
-// Cache per PID with 3-second TTL
-const processMemoryCache = new Map<number, { value: number | null; timestamp: number }>();
-const PROCESS_MEMORY_CACHE_TTL = 3000; // 3 seconds
-/**
- * Batch get memory usage for multiple processes in one top call
- * Much more efficient than calling getProcessMemory() multiple times
- * Returns Map<pid, bytes> for all requested PIDs
- */
-export async function getBatchProcessMemory(pids: number[]): Promise<Map<number, number | null>> {
-  const result = new Map<number, number | null>();
-  const now = Date.now();
-  // Check cache and collect PIDs that need fetching
-  const pidsToFetch: number[] = [];
-  for (const pid of pids) {
-    const cached = processMemoryCache.get(pid);
-    if (cached && (now - cached.timestamp) < PROCESS_MEMORY_CACHE_TTL) {
-      result.set(pid, cached.value);
-    } else {
-      pidsToFetch.push(pid);
-    }
-  }
-  // If all PIDs were cached, return early
-  if (pidsToFetch.length === 0) {
-    return result;
-  }
-  try {
-    // Build top command with all PIDs: top -l 1 -pid X -pid Y -pid Z -stats pid,mem
-    const pidArgs = pidsToFetch.map(pid => `-pid ${pid}`).join(' ');
-    const output = await execCommand(`top -l 1 ${pidArgs} -stats pid,mem 2>/dev/null`);
-    // Parse output: each line is "PID  MEM" (e.g., "1438  299M")
-    const lines = output.split('\n');
-    for (const line of lines) {
-      const match = line.trim().match(/^(\d+)\s+([\d.]+)([KMGT])\s*$/);
-      if (!match) continue;
-      const pid = parseInt(match[1], 10);
-      const value = parseFloat(match[2]);
-      const unit = match[3];
-      // Convert to bytes
-      const multipliers: { [key: string]: number } = {
-        K: 1024,
-        M: 1024 * 1024,
-        G: 1024 * 1024 * 1024,
-        T: 1024 * 1024 * 1024 * 1024,
-      };
-      const bytes = Math.round(value * multipliers[unit]);
-      // Cache and store result
-      processMemoryCache.set(pid, { value: bytes, timestamp: now });
-      result.set(pid, bytes);
-    }
-    // For any PIDs that weren't in the output, cache null
-    for (const pid of pidsToFetch) {
-      if (!result.has(pid)) {
-        processMemoryCache.set(pid, { value: null, timestamp: now });
-        result.set(pid, null);
-      }
-    }
-    return result;
-  } catch {
-    // On error, cache null for all requested PIDs
-    for (const pid of pidsToFetch) {
-      processMemoryCache.set(pid, { value: null, timestamp: now });
-      result.set(pid, null);
-    }
-    return result;
-  }
-}
-/**
- * Get memory usage for a single process in bytes
- * Uses 'top' on macOS which reports CPU memory only (NOT GPU/Metal memory)
- * Returns null if process not found or error occurs
- * Caches results for 3 seconds to prevent spawning too many top processes
- *
- * Note: For llama-server processes with GPU offloading, use ServerConfig.metalMemoryMB
- * to get GPU memory allocation (parsed from logs during server startup)
- *
- * Note: For multiple PIDs, use getBatchProcessMemory() instead - much more efficient
- */
-export async function getProcessMemory(pid: number): Promise<number | null> {
-  const result = await getBatchProcessMemory([pid]);
-  return result.get(pid) ?? null;
-}
-// Process CPU cache to prevent spawning too many 'ps' processes
-// Cache per PID with 3-second TTL
-const processCpuCache = new Map<number, { value: number | null; timestamp: number }>();
-const PROCESS_CPU_CACHE_TTL = 3000; // 3 seconds
-/**
- * Batch get CPU usage for multiple processes in one ps call
- * Much more efficient than calling getProcessCpu() multiple times
- * Returns Map<pid, percentage> for all requested PIDs
- */
-export async function getBatchProcessCpu(pids: number[]): Promise<Map<number, number | null>> {
-  const result = new Map<number, number | null>();
-  const now = Date.now();
-  // Check cache and collect PIDs that need fetching
-  const pidsToFetch: number[] = [];
-  for (const pid of pids) {
-    const cached = processCpuCache.get(pid);
-    if (cached && (now - cached.timestamp) < PROCESS_CPU_CACHE_TTL) {
-      result.set(pid, cached.value);
-    } else {
-      pidsToFetch.push(pid);
-    }
-  }
-  // If all PIDs were cached, return early
-  if (pidsToFetch.length === 0) {
-    return result;
-  }
-  try {
-    // Build ps command with all PIDs: ps -p X,Y,Z -o pid=,%cpu=
-    const pidList = pidsToFetch.join(',');
-    const output = await execCommand(`ps -p ${pidList} -o pid=,%cpu= 2>/dev/null`);
-    // Parse output: each line is "PID  %CPU" (e.g., "1438  45.2")
-    const lines = output.split('\n');
-    for (const line of lines) {
-      const match = line.trim().match(/^(\d+)\s+([\d.]+)\s*$/);
-      if (!match) continue;
-      const pid = parseInt(match[1], 10);
-      const cpuPercent = parseFloat(match[2]);
-      // Cache and store result
-      processCpuCache.set(pid, { value: cpuPercent, timestamp: now });
-      result.set(pid, cpuPercent);
-    }
-    // For any PIDs that weren't in the output, cache null (process not running)
-    for (const pid of pidsToFetch) {
-      if (!result.has(pid)) {
-        processCpuCache.set(pid, { value: null, timestamp: now });
-        result.set(pid, null);
-      }
-    }
-    return result;
-  } catch {
-    // On error, cache null for all requested PIDs
-    for (const pid of pidsToFetch) {
-      processCpuCache.set(pid, { value: null, timestamp: now });
-      result.set(pid, null);
-    }
-    return result;
-  }
-}
-/**
- * Get CPU usage for a single process as percentage (0-100+)
- * Uses 'ps -o %cpu' on macOS
- * Returns null if process not found or error occurs
- * Caches results for 3 seconds to prevent spawning too many ps processes
- *
- * Note: For multiple PIDs, use getBatchProcessCpu() instead - much more efficient
- */
-export async function getProcessCpu(pid: number): Promise<number | null> {
-  const result = await getBatchProcessCpu([pid]);
-  return result.get(pid) ?? null;
-}

package/src/utils/prompt-utils.ts DELETED Viewed

@@ -1,47 +0,0 @@
-import * as readline from 'readline';
-/**
- * Prompt user for input
- */
-export function prompt(question: string, defaultValue?: string): Promise<string> {
-  const rl = readline.createInterface({
-    input: process.stdin,
-    output: process.stdout,
-  });
-  return new Promise((resolve) => {
-    const promptText = defaultValue
-      ? `${question} [${defaultValue}]: `
-      : `${question}: `;
-    rl.question(promptText, (answer) => {
-      rl.close();
-      resolve(answer.trim() || defaultValue || '');
-    });
-  });
-}
-/**
- * Prompt user for yes/no confirmation
- */
-export function confirm(question: string, defaultYes = true): Promise<boolean> {
-  const rl = readline.createInterface({
-    input: process.stdin,
-    output: process.stdout,
-  });
-  const suffix = defaultYes ? '[Y/n]' : '[y/N]';
-  return new Promise((resolve) => {
-    rl.question(`${question} ${suffix}: `, (answer) => {
-      rl.close();
-      const input = answer.trim().toLowerCase();
-      if (input === '') {
-        resolve(defaultYes);
-      } else {
-        resolve(input === 'y' || input === 'yes');
-      }
-    });
-  });
-}

package/test-load.sh DELETED Viewed

@@ -1,100 +0,0 @@
-#!/bin/bash
-# Test script for parallel chat requests to multiple llama servers
-# Usage: ./test-load.sh
-# Stop with CTRL-C
-set -e
-# Available ports
-PORTS=(9001 9002 9004 9005)
-# Test prompts with varying complexity
-PROMPTS=(
-  "Write a hello world program in Python"
-  "Explain quantum computing in simple terms"
-  "Write a flappy bird game in Python"
-  "What are the benefits of functional programming?"
-  "Create a REST API example using FastAPI"
-  "Explain the difference between processes and threads"
-  "Write a binary search algorithm in JavaScript"
-  "What is the difference between HTTP and HTTPS?"
-  "Create a simple todo list app in React"
-  "Explain Docker containers to a beginner"
-  "Write a quicksort implementation in C++"
-  "What are the SOLID principles?"
-  "Create a SQL query to find duplicate records"
-  "Explain async/await in JavaScript"
-  "Write a Fibonacci sequence generator in any language"
-)
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[0;33m'
-BLUE='\033[0;34m'
-MAGENTA='\033[0;35m'
-CYAN='\033[0;36m'
-NC='\033[0m' # No Color
-# Counter for requests
-REQUEST_COUNT=0
-# Function to run a single chat request
-run_chat() {
-  local port=$1
-  local prompt=$2
-  local request_id=$3
-  echo -e "${CYAN}[Request #${request_id}]${NC} ${YELLOW}Port ${port}:${NC} ${prompt}"
-  # Run the chat request (suppress output except errors)
-  if npm run dev -- server run -m "$prompt" "$port" > /dev/null 2>&1; then
-    echo -e "${CYAN}[Request #${request_id}]${NC} ${GREEN}✓ Completed${NC} (port ${port})"
-  else
-    echo -e "${CYAN}[Request #${request_id}]${NC} ${RED}✗ Failed${NC} (port ${port})"
-  fi
-}
-# Trap CTRL-C for clean exit
-trap 'echo -e "\n${YELLOW}Stopping test load script...${NC}"; echo -e "${GREEN}Total requests sent: ${REQUEST_COUNT}${NC}"; exit 0' INT
-echo -e "${MAGENTA}========================================${NC}"
-echo -e "${MAGENTA}  Llama Server Load Test${NC}"
-echo -e "${MAGENTA}========================================${NC}"
-echo -e "${BLUE}Ports: ${PORTS[*]}${NC}"
-echo -e "${BLUE}Parallel requests: 3${NC}"
-echo -e "${BLUE}Press CTRL-C to stop${NC}"
-echo -e "${MAGENTA}========================================${NC}\n"
-# Function to count running background jobs
-count_running_jobs() {
-  jobs -r | wc -l | tr -d ' '
-}
-# Main loop - maintain exactly 3 concurrent requests at all times
-# Start initial 3 requests
-for i in {1..3}; do
-  PORT=${PORTS[$RANDOM % ${#PORTS[@]}]}
-  PROMPT=${PROMPTS[$RANDOM % ${#PROMPTS[@]}]}
-  ((REQUEST_COUNT++))
-  run_chat "$PORT" "$PROMPT" "$REQUEST_COUNT" &
-done
-# Continuously monitor and start new requests as old ones complete
-while true; do
-  # Get count of running background jobs
-  RUNNING=$(count_running_jobs)
-  # Start new requests to maintain 3 concurrent
-  while [ "$RUNNING" -lt 3 ]; do
-    PORT=${PORTS[$RANDOM % ${#PORTS[@]}]}
-    PROMPT=${PROMPTS[$RANDOM % ${#PROMPTS[@]}]}
-    ((REQUEST_COUNT++))
-    run_chat "$PORT" "$PROMPT" "$REQUEST_COUNT" &
-    RUNNING=$(count_running_jobs)
-  done
-  # Small sleep to avoid busy-waiting
-  sleep 0.5
-done

package/tsconfig.json DELETED Viewed

@@ -1,20 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "ES2020",
-    "module": "commonjs",
-    "lib": ["ES2020"],
-    "outDir": "./dist",
-    "rootDir": "./src",
-    "strict": true,
-    "esModuleInterop": true,
-    "skipLibCheck": true,
-    "forceConsistentCasingInFileNames": true,
-    "resolveJsonModule": true,
-    "declaration": true,
-    "declarationMap": true,
-    "sourceMap": true,
-    "moduleResolution": "node"
-  },
-  "include": ["src/**/*"],
-  "exclude": ["node_modules", "dist"]
-}

package/web/eslint.config.js DELETED Viewed

@@ -1,23 +0,0 @@
-import js from '@eslint/js'
-import globals from 'globals'
-import reactHooks from 'eslint-plugin-react-hooks'
-import reactRefresh from 'eslint-plugin-react-refresh'
-import tseslint from 'typescript-eslint'
-import { defineConfig, globalIgnores } from 'eslint/config'
-export default defineConfig([
-  globalIgnores(['dist']),
-  {
-    files: ['**/*.{ts,tsx}'],
-    extends: [
-      js.configs.recommended,
-      tseslint.configs.recommended,
-      reactHooks.configs.flat.recommended,
-      reactRefresh.configs.vite,
-    ],
-    languageOptions: {
-      ecmaVersion: 2020,
-      globals: globals.browser,
-    },
-  },
-])

package/web/llamacpp-web-dist.tar.gz DELETED Viewed

Binary file