@appkit/llamacpp-cli 1.12.0 → 1.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +217 -168
  2. package/package.json +10 -2
  3. package/web/dist/assets/index-Bin89Lwr.css +1 -0
  4. package/web/dist/assets/index-CVmonw3T.js +17 -0
  5. package/web/{index.html → dist/index.html} +2 -1
  6. package/.versionrc.json +0 -16
  7. package/CHANGELOG.md +0 -213
  8. package/docs/images/.gitkeep +0 -1
  9. package/docs/images/web-ui-servers.png +0 -0
  10. package/src/cli.ts +0 -523
  11. package/src/commands/admin/config.ts +0 -121
  12. package/src/commands/admin/logs.ts +0 -91
  13. package/src/commands/admin/restart.ts +0 -26
  14. package/src/commands/admin/start.ts +0 -27
  15. package/src/commands/admin/status.ts +0 -84
  16. package/src/commands/admin/stop.ts +0 -16
  17. package/src/commands/config-global.ts +0 -38
  18. package/src/commands/config.ts +0 -323
  19. package/src/commands/create.ts +0 -183
  20. package/src/commands/delete.ts +0 -74
  21. package/src/commands/list.ts +0 -37
  22. package/src/commands/logs-all.ts +0 -251
  23. package/src/commands/logs.ts +0 -345
  24. package/src/commands/monitor.ts +0 -110
  25. package/src/commands/ps.ts +0 -84
  26. package/src/commands/pull.ts +0 -44
  27. package/src/commands/rm.ts +0 -107
  28. package/src/commands/router/config.ts +0 -116
  29. package/src/commands/router/logs.ts +0 -256
  30. package/src/commands/router/restart.ts +0 -36
  31. package/src/commands/router/start.ts +0 -60
  32. package/src/commands/router/status.ts +0 -119
  33. package/src/commands/router/stop.ts +0 -33
  34. package/src/commands/run.ts +0 -233
  35. package/src/commands/search.ts +0 -107
  36. package/src/commands/server-show.ts +0 -161
  37. package/src/commands/show.ts +0 -207
  38. package/src/commands/start.ts +0 -101
  39. package/src/commands/stop.ts +0 -39
  40. package/src/commands/tui.ts +0 -25
  41. package/src/lib/admin-manager.ts +0 -435
  42. package/src/lib/admin-server.ts +0 -1243
  43. package/src/lib/config-generator.ts +0 -130
  44. package/src/lib/download-job-manager.ts +0 -213
  45. package/src/lib/history-manager.ts +0 -172
  46. package/src/lib/launchctl-manager.ts +0 -225
  47. package/src/lib/metrics-aggregator.ts +0 -257
  48. package/src/lib/model-downloader.ts +0 -328
  49. package/src/lib/model-scanner.ts +0 -157
  50. package/src/lib/model-search.ts +0 -114
  51. package/src/lib/models-dir-setup.ts +0 -46
  52. package/src/lib/port-manager.ts +0 -80
  53. package/src/lib/router-logger.ts +0 -201
  54. package/src/lib/router-manager.ts +0 -414
  55. package/src/lib/router-server.ts +0 -538
  56. package/src/lib/state-manager.ts +0 -206
  57. package/src/lib/status-checker.ts +0 -113
  58. package/src/lib/system-collector.ts +0 -315
  59. package/src/tui/ConfigApp.ts +0 -1085
  60. package/src/tui/HistoricalMonitorApp.ts +0 -587
  61. package/src/tui/ModelsApp.ts +0 -368
  62. package/src/tui/MonitorApp.ts +0 -386
  63. package/src/tui/MultiServerMonitorApp.ts +0 -1833
  64. package/src/tui/RootNavigator.ts +0 -74
  65. package/src/tui/SearchApp.ts +0 -511
  66. package/src/tui/SplashScreen.ts +0 -149
  67. package/src/types/admin-config.ts +0 -25
  68. package/src/types/global-config.ts +0 -26
  69. package/src/types/history-types.ts +0 -39
  70. package/src/types/model-info.ts +0 -8
  71. package/src/types/monitor-types.ts +0 -162
  72. package/src/types/router-config.ts +0 -25
  73. package/src/types/server-config.ts +0 -46
  74. package/src/utils/downsample-utils.ts +0 -128
  75. package/src/utils/file-utils.ts +0 -146
  76. package/src/utils/format-utils.ts +0 -98
  77. package/src/utils/log-parser.ts +0 -284
  78. package/src/utils/log-utils.ts +0 -178
  79. package/src/utils/process-utils.ts +0 -316
  80. package/src/utils/prompt-utils.ts +0 -47
  81. package/test-load.sh +0 -100
  82. package/tsconfig.json +0 -20
  83. package/web/eslint.config.js +0 -23
  84. package/web/llamacpp-web-dist.tar.gz +0 -0
  85. package/web/package-lock.json +0 -4017
  86. package/web/package.json +0 -38
  87. package/web/postcss.config.js +0 -6
  88. package/web/src/App.css +0 -42
  89. package/web/src/App.tsx +0 -86
  90. package/web/src/assets/react.svg +0 -1
  91. package/web/src/components/ApiKeyPrompt.tsx +0 -71
  92. package/web/src/components/CreateServerModal.tsx +0 -372
  93. package/web/src/components/DownloadProgress.tsx +0 -123
  94. package/web/src/components/Nav.tsx +0 -89
  95. package/web/src/components/RouterConfigModal.tsx +0 -240
  96. package/web/src/components/SearchModal.tsx +0 -306
  97. package/web/src/components/ServerConfigModal.tsx +0 -291
  98. package/web/src/hooks/useApi.ts +0 -259
  99. package/web/src/index.css +0 -42
  100. package/web/src/lib/api.ts +0 -226
  101. package/web/src/main.tsx +0 -10
  102. package/web/src/pages/Dashboard.tsx +0 -103
  103. package/web/src/pages/Models.tsx +0 -258
  104. package/web/src/pages/Router.tsx +0 -270
  105. package/web/src/pages/RouterLogs.tsx +0 -201
  106. package/web/src/pages/ServerLogs.tsx +0 -553
  107. package/web/src/pages/Servers.tsx +0 -358
  108. package/web/src/types/api.ts +0 -140
  109. package/web/tailwind.config.js +0 -31
  110. package/web/tsconfig.app.json +0 -28
  111. package/web/tsconfig.json +0 -7
  112. package/web/tsconfig.node.json +0 -26
  113. package/web/vite.config.ts +0 -25
  114. /package/web/{public → dist}/vite.svg +0 -0
@@ -1,316 +0,0 @@
1
- import { exec, spawn } from 'child_process';
2
- import { promisify } from 'util';
3
-
4
- export const execAsync = promisify(exec);
5
-
6
- /**
7
- * Execute a command and return stdout
8
- * Throws on non-zero exit code
9
- */
10
- export async function execCommand(command: string): Promise<string> {
11
- const { stdout } = await execAsync(command);
12
- return stdout.trim();
13
- }
14
-
15
- /**
16
- * Execute a command and return both stdout and stderr
17
- */
18
- export async function execCommandFull(command: string): Promise<{ stdout: string; stderr: string }> {
19
- const { stdout, stderr } = await execAsync(command);
20
- return {
21
- stdout: stdout.trim(),
22
- stderr: stderr.trim(),
23
- };
24
- }
25
-
26
- /**
27
- * Check if a command exists in PATH
28
- */
29
- export async function commandExists(command: string): Promise<boolean> {
30
- try {
31
- await execAsync(`which ${command}`);
32
- return true;
33
- } catch {
34
- return false;
35
- }
36
- }
37
-
38
- /**
39
- * Check if a process is running by PID
40
- */
41
- export async function isProcessRunning(pid: number): Promise<boolean> {
42
- try {
43
- await execAsync(`ps -p ${pid}`);
44
- return true;
45
- } catch {
46
- return false;
47
- }
48
- }
49
-
50
- /**
51
- * Check if a port is in use
52
- */
53
- export async function isPortInUse(port: number): Promise<boolean> {
54
- try {
55
- await execAsync(`lsof -iTCP:${port} -sTCP:LISTEN -t`);
56
- return true;
57
- } catch {
58
- return false;
59
- }
60
- }
61
-
62
- /**
63
- * Spawn a streaming command, read one line, and kill it
64
- * Useful for commands like 'macmon pipe' that stream indefinitely
65
- * Ensures the process is killed to prevent leaks
66
- */
67
- export async function spawnAndReadOneLine(
68
- command: string,
69
- args: string[],
70
- timeoutMs: number = 2000
71
- ): Promise<string | null> {
72
- return new Promise((resolve) => {
73
- const child = spawn(command, args, {
74
- stdio: ['ignore', 'pipe', 'ignore'],
75
- detached: false, // Keep in same process group for easier cleanup
76
- });
77
-
78
- let resolved = false;
79
- let output = '';
80
-
81
- const cleanup = () => {
82
- try {
83
- // Try SIGKILL immediately (SIGTERM may not work for macmon)
84
- child.kill('SIGKILL');
85
- } catch {
86
- // Process might already be dead
87
- }
88
- };
89
-
90
- // Set timeout to kill process if it doesn't produce output
91
- const timeout = setTimeout(() => {
92
- if (!resolved) {
93
- resolved = true;
94
- cleanup();
95
- resolve(null);
96
- }
97
- }, timeoutMs);
98
-
99
- // Read stdout line by line
100
- child.stdout?.on('data', (data) => {
101
- if (resolved) return;
102
-
103
- output += data.toString();
104
-
105
- // Check if we have a complete line
106
- const newlineIndex = output.indexOf('\n');
107
- if (newlineIndex !== -1) {
108
- const line = output.substring(0, newlineIndex).trim();
109
-
110
- if (line.length > 0) {
111
- resolved = true;
112
- clearTimeout(timeout);
113
- cleanup();
114
- resolve(line);
115
- }
116
- }
117
- });
118
-
119
- // Handle process errors
120
- child.on('error', () => {
121
- if (!resolved) {
122
- resolved = true;
123
- clearTimeout(timeout);
124
- resolve(null);
125
- }
126
- });
127
-
128
- // Handle process exit
129
- child.on('exit', () => {
130
- if (!resolved) {
131
- resolved = true;
132
- clearTimeout(timeout);
133
-
134
- // Return partial output if we have any
135
- const line = output.trim();
136
- resolve(line.length > 0 ? line : null);
137
- }
138
- });
139
- });
140
- }
141
-
142
- // Process memory cache to prevent spawning too many 'top' processes
143
- // Cache per PID with 3-second TTL
144
- const processMemoryCache = new Map<number, { value: number | null; timestamp: number }>();
145
- const PROCESS_MEMORY_CACHE_TTL = 3000; // 3 seconds
146
-
147
- /**
148
- * Batch get memory usage for multiple processes in one top call
149
- * Much more efficient than calling getProcessMemory() multiple times
150
- * Returns Map<pid, bytes> for all requested PIDs
151
- */
152
- export async function getBatchProcessMemory(pids: number[]): Promise<Map<number, number | null>> {
153
- const result = new Map<number, number | null>();
154
- const now = Date.now();
155
-
156
- // Check cache and collect PIDs that need fetching
157
- const pidsToFetch: number[] = [];
158
- for (const pid of pids) {
159
- const cached = processMemoryCache.get(pid);
160
- if (cached && (now - cached.timestamp) < PROCESS_MEMORY_CACHE_TTL) {
161
- result.set(pid, cached.value);
162
- } else {
163
- pidsToFetch.push(pid);
164
- }
165
- }
166
-
167
- // If all PIDs were cached, return early
168
- if (pidsToFetch.length === 0) {
169
- return result;
170
- }
171
-
172
- try {
173
- // Build top command with all PIDs: top -l 1 -pid X -pid Y -pid Z -stats pid,mem
174
- const pidArgs = pidsToFetch.map(pid => `-pid ${pid}`).join(' ');
175
- const output = await execCommand(`top -l 1 ${pidArgs} -stats pid,mem 2>/dev/null`);
176
-
177
- // Parse output: each line is "PID MEM" (e.g., "1438 299M")
178
- const lines = output.split('\n');
179
- for (const line of lines) {
180
- const match = line.trim().match(/^(\d+)\s+([\d.]+)([KMGT])\s*$/);
181
- if (!match) continue;
182
-
183
- const pid = parseInt(match[1], 10);
184
- const value = parseFloat(match[2]);
185
- const unit = match[3];
186
-
187
- // Convert to bytes
188
- const multipliers: { [key: string]: number } = {
189
- K: 1024,
190
- M: 1024 * 1024,
191
- G: 1024 * 1024 * 1024,
192
- T: 1024 * 1024 * 1024 * 1024,
193
- };
194
-
195
- const bytes = Math.round(value * multipliers[unit]);
196
-
197
- // Cache and store result
198
- processMemoryCache.set(pid, { value: bytes, timestamp: now });
199
- result.set(pid, bytes);
200
- }
201
-
202
- // For any PIDs that weren't in the output, cache null
203
- for (const pid of pidsToFetch) {
204
- if (!result.has(pid)) {
205
- processMemoryCache.set(pid, { value: null, timestamp: now });
206
- result.set(pid, null);
207
- }
208
- }
209
-
210
- return result;
211
- } catch {
212
- // On error, cache null for all requested PIDs
213
- for (const pid of pidsToFetch) {
214
- processMemoryCache.set(pid, { value: null, timestamp: now });
215
- result.set(pid, null);
216
- }
217
- return result;
218
- }
219
- }
220
-
221
- /**
222
- * Get memory usage for a single process in bytes
223
- * Uses 'top' on macOS which reports CPU memory only (NOT GPU/Metal memory)
224
- * Returns null if process not found or error occurs
225
- * Caches results for 3 seconds to prevent spawning too many top processes
226
- *
227
- * Note: For llama-server processes with GPU offloading, use ServerConfig.metalMemoryMB
228
- * to get GPU memory allocation (parsed from logs during server startup)
229
- *
230
- * Note: For multiple PIDs, use getBatchProcessMemory() instead - much more efficient
231
- */
232
- export async function getProcessMemory(pid: number): Promise<number | null> {
233
- const result = await getBatchProcessMemory([pid]);
234
- return result.get(pid) ?? null;
235
- }
236
-
237
- // Process CPU cache to prevent spawning too many 'ps' processes
238
- // Cache per PID with 3-second TTL
239
- const processCpuCache = new Map<number, { value: number | null; timestamp: number }>();
240
- const PROCESS_CPU_CACHE_TTL = 3000; // 3 seconds
241
-
242
- /**
243
- * Batch get CPU usage for multiple processes in one ps call
244
- * Much more efficient than calling getProcessCpu() multiple times
245
- * Returns Map<pid, percentage> for all requested PIDs
246
- */
247
- export async function getBatchProcessCpu(pids: number[]): Promise<Map<number, number | null>> {
248
- const result = new Map<number, number | null>();
249
- const now = Date.now();
250
-
251
- // Check cache and collect PIDs that need fetching
252
- const pidsToFetch: number[] = [];
253
- for (const pid of pids) {
254
- const cached = processCpuCache.get(pid);
255
- if (cached && (now - cached.timestamp) < PROCESS_CPU_CACHE_TTL) {
256
- result.set(pid, cached.value);
257
- } else {
258
- pidsToFetch.push(pid);
259
- }
260
- }
261
-
262
- // If all PIDs were cached, return early
263
- if (pidsToFetch.length === 0) {
264
- return result;
265
- }
266
-
267
- try {
268
- // Build ps command with all PIDs: ps -p X,Y,Z -o pid=,%cpu=
269
- const pidList = pidsToFetch.join(',');
270
- const output = await execCommand(`ps -p ${pidList} -o pid=,%cpu= 2>/dev/null`);
271
-
272
- // Parse output: each line is "PID %CPU" (e.g., "1438 45.2")
273
- const lines = output.split('\n');
274
- for (const line of lines) {
275
- const match = line.trim().match(/^(\d+)\s+([\d.]+)\s*$/);
276
- if (!match) continue;
277
-
278
- const pid = parseInt(match[1], 10);
279
- const cpuPercent = parseFloat(match[2]);
280
-
281
- // Cache and store result
282
- processCpuCache.set(pid, { value: cpuPercent, timestamp: now });
283
- result.set(pid, cpuPercent);
284
- }
285
-
286
- // For any PIDs that weren't in the output, cache null (process not running)
287
- for (const pid of pidsToFetch) {
288
- if (!result.has(pid)) {
289
- processCpuCache.set(pid, { value: null, timestamp: now });
290
- result.set(pid, null);
291
- }
292
- }
293
-
294
- return result;
295
- } catch {
296
- // On error, cache null for all requested PIDs
297
- for (const pid of pidsToFetch) {
298
- processCpuCache.set(pid, { value: null, timestamp: now });
299
- result.set(pid, null);
300
- }
301
- return result;
302
- }
303
- }
304
-
305
- /**
306
- * Get CPU usage for a single process as percentage (0-100+)
307
- * Uses 'ps -o %cpu' on macOS
308
- * Returns null if process not found or error occurs
309
- * Caches results for 3 seconds to prevent spawning too many ps processes
310
- *
311
- * Note: For multiple PIDs, use getBatchProcessCpu() instead - much more efficient
312
- */
313
- export async function getProcessCpu(pid: number): Promise<number | null> {
314
- const result = await getBatchProcessCpu([pid]);
315
- return result.get(pid) ?? null;
316
- }
@@ -1,47 +0,0 @@
1
- import * as readline from 'readline';
2
-
3
- /**
4
- * Prompt user for input
5
- */
6
- export function prompt(question: string, defaultValue?: string): Promise<string> {
7
- const rl = readline.createInterface({
8
- input: process.stdin,
9
- output: process.stdout,
10
- });
11
-
12
- return new Promise((resolve) => {
13
- const promptText = defaultValue
14
- ? `${question} [${defaultValue}]: `
15
- : `${question}: `;
16
-
17
- rl.question(promptText, (answer) => {
18
- rl.close();
19
- resolve(answer.trim() || defaultValue || '');
20
- });
21
- });
22
- }
23
-
24
- /**
25
- * Prompt user for yes/no confirmation
26
- */
27
- export function confirm(question: string, defaultYes = true): Promise<boolean> {
28
- const rl = readline.createInterface({
29
- input: process.stdin,
30
- output: process.stdout,
31
- });
32
-
33
- const suffix = defaultYes ? '[Y/n]' : '[y/N]';
34
-
35
- return new Promise((resolve) => {
36
- rl.question(`${question} ${suffix}: `, (answer) => {
37
- rl.close();
38
- const input = answer.trim().toLowerCase();
39
-
40
- if (input === '') {
41
- resolve(defaultYes);
42
- } else {
43
- resolve(input === 'y' || input === 'yes');
44
- }
45
- });
46
- });
47
- }
package/test-load.sh DELETED
@@ -1,100 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Test script for parallel chat requests to multiple llama servers
4
- # Usage: ./test-load.sh
5
- # Stop with CTRL-C
6
-
7
- set -e
8
-
9
- # Available ports
10
- PORTS=(9001 9002 9004 9005)
11
-
12
- # Test prompts with varying complexity
13
- PROMPTS=(
14
- "Write a hello world program in Python"
15
- "Explain quantum computing in simple terms"
16
- "Write a flappy bird game in Python"
17
- "What are the benefits of functional programming?"
18
- "Create a REST API example using FastAPI"
19
- "Explain the difference between processes and threads"
20
- "Write a binary search algorithm in JavaScript"
21
- "What is the difference between HTTP and HTTPS?"
22
- "Create a simple todo list app in React"
23
- "Explain Docker containers to a beginner"
24
- "Write a quicksort implementation in C++"
25
- "What are the SOLID principles?"
26
- "Create a SQL query to find duplicate records"
27
- "Explain async/await in JavaScript"
28
- "Write a Fibonacci sequence generator in any language"
29
- )
30
-
31
- # Colors for output
32
- RED='\033[0;31m'
33
- GREEN='\033[0;32m'
34
- YELLOW='\033[0;33m'
35
- BLUE='\033[0;34m'
36
- MAGENTA='\033[0;35m'
37
- CYAN='\033[0;36m'
38
- NC='\033[0m' # No Color
39
-
40
- # Counter for requests
41
- REQUEST_COUNT=0
42
-
43
- # Function to run a single chat request
44
- run_chat() {
45
- local port=$1
46
- local prompt=$2
47
- local request_id=$3
48
-
49
- echo -e "${CYAN}[Request #${request_id}]${NC} ${YELLOW}Port ${port}:${NC} ${prompt}"
50
-
51
- # Run the chat request (suppress output except errors)
52
- if npm run dev -- server run -m "$prompt" "$port" > /dev/null 2>&1; then
53
- echo -e "${CYAN}[Request #${request_id}]${NC} ${GREEN}✓ Completed${NC} (port ${port})"
54
- else
55
- echo -e "${CYAN}[Request #${request_id}]${NC} ${RED}✗ Failed${NC} (port ${port})"
56
- fi
57
- }
58
-
59
- # Trap CTRL-C for clean exit
60
- trap 'echo -e "\n${YELLOW}Stopping test load script...${NC}"; echo -e "${GREEN}Total requests sent: ${REQUEST_COUNT}${NC}"; exit 0' INT
61
-
62
- echo -e "${MAGENTA}========================================${NC}"
63
- echo -e "${MAGENTA} Llama Server Load Test${NC}"
64
- echo -e "${MAGENTA}========================================${NC}"
65
- echo -e "${BLUE}Ports: ${PORTS[*]}${NC}"
66
- echo -e "${BLUE}Parallel requests: 3${NC}"
67
- echo -e "${BLUE}Press CTRL-C to stop${NC}"
68
- echo -e "${MAGENTA}========================================${NC}\n"
69
-
70
- # Function to count running background jobs
71
- count_running_jobs() {
72
- jobs -r | wc -l | tr -d ' '
73
- }
74
-
75
- # Main loop - maintain exactly 3 concurrent requests at all times
76
- # Start initial 3 requests
77
- for i in {1..3}; do
78
- PORT=${PORTS[$RANDOM % ${#PORTS[@]}]}
79
- PROMPT=${PROMPTS[$RANDOM % ${#PROMPTS[@]}]}
80
- ((REQUEST_COUNT++))
81
- run_chat "$PORT" "$PROMPT" "$REQUEST_COUNT" &
82
- done
83
-
84
- # Continuously monitor and start new requests as old ones complete
85
- while true; do
86
- # Get count of running background jobs
87
- RUNNING=$(count_running_jobs)
88
-
89
- # Start new requests to maintain 3 concurrent
90
- while [ "$RUNNING" -lt 3 ]; do
91
- PORT=${PORTS[$RANDOM % ${#PORTS[@]}]}
92
- PROMPT=${PROMPTS[$RANDOM % ${#PROMPTS[@]}]}
93
- ((REQUEST_COUNT++))
94
- run_chat "$PORT" "$PROMPT" "$REQUEST_COUNT" &
95
- RUNNING=$(count_running_jobs)
96
- done
97
-
98
- # Small sleep to avoid busy-waiting
99
- sleep 0.5
100
- done
package/tsconfig.json DELETED
@@ -1,20 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "target": "ES2020",
4
- "module": "commonjs",
5
- "lib": ["ES2020"],
6
- "outDir": "./dist",
7
- "rootDir": "./src",
8
- "strict": true,
9
- "esModuleInterop": true,
10
- "skipLibCheck": true,
11
- "forceConsistentCasingInFileNames": true,
12
- "resolveJsonModule": true,
13
- "declaration": true,
14
- "declarationMap": true,
15
- "sourceMap": true,
16
- "moduleResolution": "node"
17
- },
18
- "include": ["src/**/*"],
19
- "exclude": ["node_modules", "dist"]
20
- }
@@ -1,23 +0,0 @@
1
- import js from '@eslint/js'
2
- import globals from 'globals'
3
- import reactHooks from 'eslint-plugin-react-hooks'
4
- import reactRefresh from 'eslint-plugin-react-refresh'
5
- import tseslint from 'typescript-eslint'
6
- import { defineConfig, globalIgnores } from 'eslint/config'
7
-
8
- export default defineConfig([
9
- globalIgnores(['dist']),
10
- {
11
- files: ['**/*.{ts,tsx}'],
12
- extends: [
13
- js.configs.recommended,
14
- tseslint.configs.recommended,
15
- reactHooks.configs.flat.recommended,
16
- reactRefresh.configs.vite,
17
- ],
18
- languageOptions: {
19
- ecmaVersion: 2020,
20
- globals: globals.browser,
21
- },
22
- },
23
- ])
Binary file