@appkit/llamacpp-cli 1.12.0 → 1.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +217 -168
  2. package/package.json +10 -2
  3. package/web/dist/assets/index-Bin89Lwr.css +1 -0
  4. package/web/dist/assets/index-CVmonw3T.js +17 -0
  5. package/web/{index.html → dist/index.html} +2 -1
  6. package/.versionrc.json +0 -16
  7. package/CHANGELOG.md +0 -213
  8. package/docs/images/.gitkeep +0 -1
  9. package/docs/images/web-ui-servers.png +0 -0
  10. package/src/cli.ts +0 -523
  11. package/src/commands/admin/config.ts +0 -121
  12. package/src/commands/admin/logs.ts +0 -91
  13. package/src/commands/admin/restart.ts +0 -26
  14. package/src/commands/admin/start.ts +0 -27
  15. package/src/commands/admin/status.ts +0 -84
  16. package/src/commands/admin/stop.ts +0 -16
  17. package/src/commands/config-global.ts +0 -38
  18. package/src/commands/config.ts +0 -323
  19. package/src/commands/create.ts +0 -183
  20. package/src/commands/delete.ts +0 -74
  21. package/src/commands/list.ts +0 -37
  22. package/src/commands/logs-all.ts +0 -251
  23. package/src/commands/logs.ts +0 -345
  24. package/src/commands/monitor.ts +0 -110
  25. package/src/commands/ps.ts +0 -84
  26. package/src/commands/pull.ts +0 -44
  27. package/src/commands/rm.ts +0 -107
  28. package/src/commands/router/config.ts +0 -116
  29. package/src/commands/router/logs.ts +0 -256
  30. package/src/commands/router/restart.ts +0 -36
  31. package/src/commands/router/start.ts +0 -60
  32. package/src/commands/router/status.ts +0 -119
  33. package/src/commands/router/stop.ts +0 -33
  34. package/src/commands/run.ts +0 -233
  35. package/src/commands/search.ts +0 -107
  36. package/src/commands/server-show.ts +0 -161
  37. package/src/commands/show.ts +0 -207
  38. package/src/commands/start.ts +0 -101
  39. package/src/commands/stop.ts +0 -39
  40. package/src/commands/tui.ts +0 -25
  41. package/src/lib/admin-manager.ts +0 -435
  42. package/src/lib/admin-server.ts +0 -1243
  43. package/src/lib/config-generator.ts +0 -130
  44. package/src/lib/download-job-manager.ts +0 -213
  45. package/src/lib/history-manager.ts +0 -172
  46. package/src/lib/launchctl-manager.ts +0 -225
  47. package/src/lib/metrics-aggregator.ts +0 -257
  48. package/src/lib/model-downloader.ts +0 -328
  49. package/src/lib/model-scanner.ts +0 -157
  50. package/src/lib/model-search.ts +0 -114
  51. package/src/lib/models-dir-setup.ts +0 -46
  52. package/src/lib/port-manager.ts +0 -80
  53. package/src/lib/router-logger.ts +0 -201
  54. package/src/lib/router-manager.ts +0 -414
  55. package/src/lib/router-server.ts +0 -538
  56. package/src/lib/state-manager.ts +0 -206
  57. package/src/lib/status-checker.ts +0 -113
  58. package/src/lib/system-collector.ts +0 -315
  59. package/src/tui/ConfigApp.ts +0 -1085
  60. package/src/tui/HistoricalMonitorApp.ts +0 -587
  61. package/src/tui/ModelsApp.ts +0 -368
  62. package/src/tui/MonitorApp.ts +0 -386
  63. package/src/tui/MultiServerMonitorApp.ts +0 -1833
  64. package/src/tui/RootNavigator.ts +0 -74
  65. package/src/tui/SearchApp.ts +0 -511
  66. package/src/tui/SplashScreen.ts +0 -149
  67. package/src/types/admin-config.ts +0 -25
  68. package/src/types/global-config.ts +0 -26
  69. package/src/types/history-types.ts +0 -39
  70. package/src/types/model-info.ts +0 -8
  71. package/src/types/monitor-types.ts +0 -162
  72. package/src/types/router-config.ts +0 -25
  73. package/src/types/server-config.ts +0 -46
  74. package/src/utils/downsample-utils.ts +0 -128
  75. package/src/utils/file-utils.ts +0 -146
  76. package/src/utils/format-utils.ts +0 -98
  77. package/src/utils/log-parser.ts +0 -284
  78. package/src/utils/log-utils.ts +0 -178
  79. package/src/utils/process-utils.ts +0 -316
  80. package/src/utils/prompt-utils.ts +0 -47
  81. package/test-load.sh +0 -100
  82. package/tsconfig.json +0 -20
  83. package/web/eslint.config.js +0 -23
  84. package/web/llamacpp-web-dist.tar.gz +0 -0
  85. package/web/package-lock.json +0 -4017
  86. package/web/package.json +0 -38
  87. package/web/postcss.config.js +0 -6
  88. package/web/src/App.css +0 -42
  89. package/web/src/App.tsx +0 -86
  90. package/web/src/assets/react.svg +0 -1
  91. package/web/src/components/ApiKeyPrompt.tsx +0 -71
  92. package/web/src/components/CreateServerModal.tsx +0 -372
  93. package/web/src/components/DownloadProgress.tsx +0 -123
  94. package/web/src/components/Nav.tsx +0 -89
  95. package/web/src/components/RouterConfigModal.tsx +0 -240
  96. package/web/src/components/SearchModal.tsx +0 -306
  97. package/web/src/components/ServerConfigModal.tsx +0 -291
  98. package/web/src/hooks/useApi.ts +0 -259
  99. package/web/src/index.css +0 -42
  100. package/web/src/lib/api.ts +0 -226
  101. package/web/src/main.tsx +0 -10
  102. package/web/src/pages/Dashboard.tsx +0 -103
  103. package/web/src/pages/Models.tsx +0 -258
  104. package/web/src/pages/Router.tsx +0 -270
  105. package/web/src/pages/RouterLogs.tsx +0 -201
  106. package/web/src/pages/ServerLogs.tsx +0 -553
  107. package/web/src/pages/Servers.tsx +0 -358
  108. package/web/src/types/api.ts +0 -140
  109. package/web/tailwind.config.js +0 -31
  110. package/web/tsconfig.app.json +0 -28
  111. package/web/tsconfig.json +0 -7
  112. package/web/tsconfig.node.json +0 -26
  113. package/web/vite.config.ts +0 -25
  114. /package/web/{public → dist}/vite.svg +0 -0
@@ -1,225 +0,0 @@
1
- import * as path from 'path';
2
- import * as fs from 'fs/promises';
3
- import { ServerConfig } from '../types/server-config';
4
- import { execCommand, execAsync } from '../utils/process-utils';
5
- import { writeFileAtomic, fileExists } from '../utils/file-utils';
6
-
7
- export interface ServiceStatus {
8
- isRunning: boolean;
9
- pid: number | null;
10
- exitCode: number | null;
11
- lastExitReason?: string;
12
- }
13
-
14
- export class LaunchctlManager {
15
- /**
16
- * Generate plist XML content for a server
17
- */
18
- generatePlist(config: ServerConfig): string {
19
- // Build program arguments array
20
- const args = [
21
- '/opt/homebrew/bin/llama-server',
22
- '--model', config.modelPath,
23
- '--host', config.host,
24
- '--port', config.port.toString(),
25
- '--threads', config.threads.toString(),
26
- '--ctx-size', config.ctxSize.toString(),
27
- '--gpu-layers', config.gpuLayers.toString(),
28
- ];
29
-
30
- // Add flags
31
- if (config.embeddings) args.push('--embeddings');
32
- if (config.jinja) args.push('--jinja');
33
-
34
- // Conditionally enable verbose HTTP logging for detailed request/response info
35
- if (config.verbose) {
36
- args.push('--log-verbose');
37
- }
38
-
39
- // Add custom flags
40
- if (config.customFlags && config.customFlags.length > 0) {
41
- args.push(...config.customFlags);
42
- }
43
-
44
- // Generate XML array elements
45
- const argsXml = args.map(arg => ` <string>${arg}</string>`).join('\n');
46
-
47
- return `<?xml version="1.0" encoding="UTF-8"?>
48
- <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
49
- "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
50
- <plist version="1.0">
51
- <dict>
52
- <key>Label</key>
53
- <string>${config.label}</string>
54
-
55
- <key>ProgramArguments</key>
56
- <array>
57
- ${argsXml}
58
- </array>
59
-
60
- <key>RunAtLoad</key>
61
- <false/>
62
-
63
- <key>KeepAlive</key>
64
- <dict>
65
- <key>Crashed</key>
66
- <true/>
67
- <key>SuccessfulExit</key>
68
- <false/>
69
- </dict>
70
-
71
- <key>StandardOutPath</key>
72
- <string>${config.stdoutPath}</string>
73
-
74
- <key>StandardErrorPath</key>
75
- <string>${config.stderrPath}</string>
76
-
77
- <key>WorkingDirectory</key>
78
- <string>/tmp</string>
79
-
80
- <key>ThrottleInterval</key>
81
- <integer>10</integer>
82
- </dict>
83
- </plist>
84
- `;
85
- }
86
-
87
- /**
88
- * Create and write plist file
89
- */
90
- async createPlist(config: ServerConfig): Promise<void> {
91
- const plistContent = this.generatePlist(config);
92
- await writeFileAtomic(config.plistPath, plistContent);
93
- }
94
-
95
- /**
96
- * Delete plist file
97
- */
98
- async deletePlist(plistPath: string): Promise<void> {
99
- if (await fileExists(plistPath)) {
100
- await fs.unlink(plistPath);
101
- }
102
- }
103
-
104
- /**
105
- * Load service (register with launchctl)
106
- */
107
- async loadService(plistPath: string): Promise<void> {
108
- await execCommand(`launchctl load "${plistPath}"`);
109
- }
110
-
111
- /**
112
- * Unload service (unregister from launchctl)
113
- */
114
- async unloadService(plistPath: string): Promise<void> {
115
- try {
116
- await execCommand(`launchctl unload "${plistPath}"`);
117
- } catch (error) {
118
- // Ignore errors if service is not loaded
119
- }
120
- }
121
-
122
- /**
123
- * Start service
124
- */
125
- async startService(label: string): Promise<void> {
126
- await execCommand(`launchctl start ${label}`);
127
- }
128
-
129
- /**
130
- * Stop service
131
- */
132
- async stopService(label: string): Promise<void> {
133
- await execCommand(`launchctl stop ${label}`);
134
- }
135
-
136
- /**
137
- * Get service status from launchctl
138
- */
139
- async getServiceStatus(label: string): Promise<ServiceStatus> {
140
- try {
141
- const { stdout } = await execAsync(`launchctl list | grep ${label}`);
142
- const lines = stdout.trim().split('\n');
143
-
144
- for (const line of lines) {
145
- const parts = line.split(/\s+/);
146
- if (parts.length >= 3) {
147
- const pidStr = parts[0].trim();
148
- const exitCodeStr = parts[1].trim();
149
- const serviceLabel = parts[2].trim();
150
-
151
- // Match the exact label
152
- if (serviceLabel === label) {
153
- const pid = pidStr !== '-' ? parseInt(pidStr, 10) : null;
154
- const exitCode = exitCodeStr !== '-' ? parseInt(exitCodeStr, 10) : null;
155
- const isRunning = pid !== null;
156
-
157
- return {
158
- isRunning,
159
- pid,
160
- exitCode,
161
- lastExitReason: this.interpretExitCode(exitCode),
162
- };
163
- }
164
- }
165
- }
166
-
167
- // Service not found
168
- return {
169
- isRunning: false,
170
- pid: null,
171
- exitCode: null,
172
- };
173
- } catch (error) {
174
- // Service not found or not loaded
175
- return {
176
- isRunning: false,
177
- pid: null,
178
- exitCode: null,
179
- };
180
- }
181
- }
182
-
183
- /**
184
- * Interpret exit code to human-readable reason
185
- */
186
- private interpretExitCode(code: number | null): string | undefined {
187
- if (code === null || code === 0) return undefined;
188
- if (code === -9) return 'Force killed (SIGKILL)';
189
- if (code === -15) return 'Terminated (SIGTERM)';
190
- return `Exit code: ${code}`;
191
- }
192
-
193
- /**
194
- * Wait for service to start (with timeout)
195
- */
196
- async waitForServiceStart(label: string, timeoutMs = 5000): Promise<boolean> {
197
- const startTime = Date.now();
198
- while (Date.now() - startTime < timeoutMs) {
199
- const status = await this.getServiceStatus(label);
200
- if (status.isRunning) {
201
- return true;
202
- }
203
- await new Promise((resolve) => setTimeout(resolve, 500));
204
- }
205
- return false;
206
- }
207
-
208
- /**
209
- * Wait for service to stop (with timeout)
210
- */
211
- async waitForServiceStop(label: string, timeoutMs = 5000): Promise<boolean> {
212
- const startTime = Date.now();
213
- while (Date.now() - startTime < timeoutMs) {
214
- const status = await this.getServiceStatus(label);
215
- if (!status.isRunning) {
216
- return true;
217
- }
218
- await new Promise((resolve) => setTimeout(resolve, 500));
219
- }
220
- return false;
221
- }
222
- }
223
-
224
- // Export singleton instance
225
- export const launchctlManager = new LaunchctlManager();
@@ -1,257 +0,0 @@
1
- import { ServerConfig } from '../types/server-config.js';
2
- import { ServerMetrics, SlotInfo, MonitorData } from '../types/monitor-types.js';
3
- import { statusChecker } from './status-checker.js';
4
- import { systemCollector } from './system-collector.js';
5
- import { getProcessMemory, getProcessCpu } from '../utils/process-utils.js';
6
-
7
- /**
8
- * Aggregates metrics from llama.cpp server API endpoints
9
- * Combines server health, slot status, and model properties
10
- */
11
- export class MetricsAggregator {
12
- private serverUrl: string;
13
- private timeout: number;
14
- private previousSlots: Map<number, { n_decoded: number; timestamp: number }> = new Map();
15
-
16
- constructor(server: ServerConfig, timeout: number = 5000) {
17
- // Handle null host (legacy configs) by defaulting to 127.0.0.1
18
- const host = server.host || '127.0.0.1';
19
- this.serverUrl = `http://${host}:${server.port}`;
20
- this.timeout = timeout;
21
- }
22
-
23
- /**
24
- * Fetch data from llama.cpp API with timeout
25
- */
26
- private async fetchWithTimeout(
27
- endpoint: string,
28
- customTimeout?: number
29
- ): Promise<any | null> {
30
- try {
31
- const controller = new AbortController();
32
- const timeoutMs = customTimeout ?? this.timeout;
33
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
34
-
35
- const response = await fetch(`${this.serverUrl}${endpoint}`, {
36
- signal: controller.signal,
37
- });
38
-
39
- clearTimeout(timeoutId);
40
-
41
- if (!response.ok) {
42
- return null;
43
- }
44
-
45
- return await response.json();
46
- } catch (err) {
47
- // Network error, timeout, or parse error
48
- return null;
49
- }
50
- }
51
-
52
- /**
53
- * Get server health status
54
- */
55
- private async getHealth(): Promise<boolean> {
56
- const health = await this.fetchWithTimeout('/health');
57
- return health !== null && health.status === 'ok';
58
- }
59
-
60
- /**
61
- * Get server properties (model info, context size, etc.)
62
- */
63
- private async getProps(): Promise<any> {
64
- return await this.fetchWithTimeout('/props');
65
- }
66
-
67
- /**
68
- * Get active slots information with calculated tok/s
69
- */
70
- private async getSlots(): Promise<SlotInfo[]> {
71
- const data = await this.fetchWithTimeout('/slots');
72
- if (!data || !Array.isArray(data)) {
73
- return [];
74
- }
75
-
76
- const now = Date.now();
77
-
78
- return data.map((slot: any) => {
79
- const slotId = slot.id;
80
- const n_decoded = slot.next_token?.[0]?.n_decoded || 0;
81
- const isProcessing = slot.is_processing;
82
-
83
- // Calculate tokens per second by comparing with previous poll
84
- let predicted_per_second: number | undefined;
85
-
86
- if (isProcessing && n_decoded > 0) {
87
- const previous = this.previousSlots.get(slotId);
88
-
89
- if (previous && previous.n_decoded < n_decoded) {
90
- const tokensGenerated = n_decoded - previous.n_decoded;
91
- const timeElapsed = (now - previous.timestamp) / 1000; // Convert to seconds
92
-
93
- if (timeElapsed > 0) {
94
- predicted_per_second = tokensGenerated / timeElapsed;
95
- }
96
- }
97
-
98
- // Store current state for next comparison
99
- this.previousSlots.set(slotId, { n_decoded, timestamp: now });
100
- } else if (!isProcessing) {
101
- // Clear history when slot becomes idle
102
- this.previousSlots.delete(slotId);
103
- }
104
-
105
- return {
106
- id: slotId,
107
- state: isProcessing ? 'processing' : 'idle',
108
- n_prompt_tokens: slot.n_prompt_tokens,
109
- n_decoded,
110
- n_ctx: slot.n_ctx || 0,
111
- timings: predicted_per_second
112
- ? {
113
- prompt_n: 0,
114
- prompt_ms: 0,
115
- prompt_per_token_ms: 0,
116
- prompt_per_second: 0,
117
- predicted_n: n_decoded,
118
- predicted_ms: 0,
119
- predicted_per_token_ms: 0,
120
- predicted_per_second,
121
- }
122
- : undefined,
123
- };
124
- });
125
- }
126
-
127
- /**
128
- * Aggregate all server metrics
129
- * @param server - Server configuration
130
- * @param processMemory - Optional pre-fetched process memory (for batch collection)
131
- * @param processCpuUsage - Optional pre-fetched process CPU usage (for batch collection)
132
- */
133
- async collectServerMetrics(
134
- server: ServerConfig,
135
- processMemory?: number | null,
136
- processCpuUsage?: number | null
137
- ): Promise<ServerMetrics> {
138
- const now = Date.now();
139
-
140
- // Check basic server status first
141
- const status = await statusChecker.checkServer(server);
142
-
143
- // Calculate uptime if server is running and has lastStarted
144
- let uptime: string | undefined;
145
- if (status.isRunning && server.lastStarted) {
146
- const startTime = new Date(server.lastStarted).getTime();
147
- const uptimeSeconds = Math.floor((now - startTime) / 1000);
148
- const hours = Math.floor(uptimeSeconds / 3600);
149
- const minutes = Math.floor((uptimeSeconds % 3600) / 60);
150
- const seconds = uptimeSeconds % 60;
151
- uptime = `${hours}h ${minutes}m ${seconds}s`;
152
- }
153
-
154
- // If server not running, return minimal data
155
- if (!status.isRunning) {
156
- return {
157
- server,
158
- healthy: false,
159
- modelLoaded: false,
160
- modelName: server.modelName,
161
- contextSize: server.ctxSize,
162
- totalSlots: 0,
163
- activeSlots: 0,
164
- idleSlots: 0,
165
- slots: [],
166
- timestamp: now,
167
- stale: false,
168
- };
169
- }
170
-
171
- // Fetch detailed metrics in parallel
172
- // If processMemory/CPU were pre-fetched (batch mode), use them; otherwise fetch individually
173
- const [healthy, props, slots, fetchedMemory, fetchedCpu] = await Promise.all([
174
- this.getHealth(),
175
- this.getProps(),
176
- this.getSlots(),
177
- processMemory !== undefined
178
- ? Promise.resolve(processMemory)
179
- : (server.pid ? getProcessMemory(server.pid) : Promise.resolve(null)),
180
- processCpuUsage !== undefined
181
- ? Promise.resolve(processCpuUsage)
182
- : (server.pid ? getProcessCpu(server.pid) : Promise.resolve(null)),
183
- ]);
184
-
185
- // Calculate slot statistics
186
- const activeSlots = slots.filter((s) => s.state === 'processing').length;
187
- const idleSlots = slots.filter((s) => s.state === 'idle').length;
188
- const totalSlots = props?.total_slots || slots.length;
189
-
190
- // Calculate average speeds (only from processing slots)
191
- const processingSlots = slots.filter((s) => s.state === 'processing' && s.timings);
192
-
193
- const avgPromptSpeed =
194
- processingSlots.length > 0
195
- ? processingSlots.reduce(
196
- (sum, s) => sum + (s.timings?.prompt_per_second || 0),
197
- 0
198
- ) / processingSlots.length
199
- : undefined;
200
-
201
- const avgGenerateSpeed =
202
- processingSlots.length > 0
203
- ? processingSlots.reduce(
204
- (sum, s) => sum + (s.timings?.predicted_per_second || 0),
205
- 0
206
- ) / processingSlots.length
207
- : undefined;
208
-
209
- // Calculate total memory (CPU + Metal GPU memory if available)
210
- let totalMemory = fetchedMemory ?? undefined;
211
- if (totalMemory !== undefined && server.metalMemoryMB) {
212
- // Add Metal memory (convert MB to bytes)
213
- totalMemory += server.metalMemoryMB * 1024 * 1024;
214
- }
215
-
216
- return {
217
- server,
218
- healthy,
219
- uptime,
220
- modelLoaded: props !== null,
221
- modelName: server.modelName,
222
- contextSize: props?.default_generation_settings?.n_ctx || server.ctxSize,
223
- totalSlots,
224
- activeSlots,
225
- idleSlots,
226
- slots,
227
- avgPromptSpeed,
228
- avgGenerateSpeed,
229
- processMemory: totalMemory,
230
- processCpuUsage: fetchedCpu ?? undefined,
231
- timestamp: now,
232
- stale: false,
233
- };
234
- }
235
-
236
- /**
237
- * Collect complete monitoring data (server + system metrics)
238
- */
239
- async collectMonitorData(
240
- server: ServerConfig,
241
- updateInterval: number = 2000
242
- ): Promise<MonitorData> {
243
- // Collect server and system metrics in parallel
244
- const [serverMetrics, systemMetrics] = await Promise.all([
245
- this.collectServerMetrics(server),
246
- systemCollector.collectSystemMetrics(),
247
- ]);
248
-
249
- return {
250
- server: serverMetrics,
251
- system: systemMetrics,
252
- lastUpdated: new Date(),
253
- updateInterval,
254
- consecutiveFailures: 0,
255
- };
256
- }
257
- }