@appkit/llamacpp-cli 1.12.0 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/README.md +294 -168
  2. package/dist/cli.js +35 -0
  3. package/dist/cli.js.map +1 -1
  4. package/dist/commands/launch/claude.d.ts +6 -0
  5. package/dist/commands/launch/claude.d.ts.map +1 -0
  6. package/dist/commands/launch/claude.js +277 -0
  7. package/dist/commands/launch/claude.js.map +1 -0
  8. package/dist/lib/integration-checker.d.ts +26 -0
  9. package/dist/lib/integration-checker.d.ts.map +1 -0
  10. package/dist/lib/integration-checker.js +77 -0
  11. package/dist/lib/integration-checker.js.map +1 -0
  12. package/dist/lib/router-manager.d.ts +4 -0
  13. package/dist/lib/router-manager.d.ts.map +1 -1
  14. package/dist/lib/router-manager.js +10 -0
  15. package/dist/lib/router-manager.js.map +1 -1
  16. package/dist/lib/router-server.d.ts +13 -0
  17. package/dist/lib/router-server.d.ts.map +1 -1
  18. package/dist/lib/router-server.js +267 -7
  19. package/dist/lib/router-server.js.map +1 -1
  20. package/dist/types/integration-config.d.ts +28 -0
  21. package/dist/types/integration-config.d.ts.map +1 -0
  22. package/dist/types/integration-config.js +3 -0
  23. package/dist/types/integration-config.js.map +1 -0
  24. package/package.json +10 -2
  25. package/web/dist/assets/index-Bin89Lwr.css +1 -0
  26. package/web/dist/assets/index-CVmonw3T.js +17 -0
  27. package/web/{index.html → dist/index.html} +2 -1
  28. package/.versionrc.json +0 -16
  29. package/CHANGELOG.md +0 -213
  30. package/docs/images/.gitkeep +0 -1
  31. package/docs/images/web-ui-servers.png +0 -0
  32. package/src/cli.ts +0 -523
  33. package/src/commands/admin/config.ts +0 -121
  34. package/src/commands/admin/logs.ts +0 -91
  35. package/src/commands/admin/restart.ts +0 -26
  36. package/src/commands/admin/start.ts +0 -27
  37. package/src/commands/admin/status.ts +0 -84
  38. package/src/commands/admin/stop.ts +0 -16
  39. package/src/commands/config-global.ts +0 -38
  40. package/src/commands/config.ts +0 -323
  41. package/src/commands/create.ts +0 -183
  42. package/src/commands/delete.ts +0 -74
  43. package/src/commands/list.ts +0 -37
  44. package/src/commands/logs-all.ts +0 -251
  45. package/src/commands/logs.ts +0 -345
  46. package/src/commands/monitor.ts +0 -110
  47. package/src/commands/ps.ts +0 -84
  48. package/src/commands/pull.ts +0 -44
  49. package/src/commands/rm.ts +0 -107
  50. package/src/commands/router/config.ts +0 -116
  51. package/src/commands/router/logs.ts +0 -256
  52. package/src/commands/router/restart.ts +0 -36
  53. package/src/commands/router/start.ts +0 -60
  54. package/src/commands/router/status.ts +0 -119
  55. package/src/commands/router/stop.ts +0 -33
  56. package/src/commands/run.ts +0 -233
  57. package/src/commands/search.ts +0 -107
  58. package/src/commands/server-show.ts +0 -161
  59. package/src/commands/show.ts +0 -207
  60. package/src/commands/start.ts +0 -101
  61. package/src/commands/stop.ts +0 -39
  62. package/src/commands/tui.ts +0 -25
  63. package/src/lib/admin-manager.ts +0 -435
  64. package/src/lib/admin-server.ts +0 -1243
  65. package/src/lib/config-generator.ts +0 -130
  66. package/src/lib/download-job-manager.ts +0 -213
  67. package/src/lib/history-manager.ts +0 -172
  68. package/src/lib/launchctl-manager.ts +0 -225
  69. package/src/lib/metrics-aggregator.ts +0 -257
  70. package/src/lib/model-downloader.ts +0 -328
  71. package/src/lib/model-scanner.ts +0 -157
  72. package/src/lib/model-search.ts +0 -114
  73. package/src/lib/models-dir-setup.ts +0 -46
  74. package/src/lib/port-manager.ts +0 -80
  75. package/src/lib/router-logger.ts +0 -201
  76. package/src/lib/router-manager.ts +0 -414
  77. package/src/lib/router-server.ts +0 -538
  78. package/src/lib/state-manager.ts +0 -206
  79. package/src/lib/status-checker.ts +0 -113
  80. package/src/lib/system-collector.ts +0 -315
  81. package/src/tui/ConfigApp.ts +0 -1085
  82. package/src/tui/HistoricalMonitorApp.ts +0 -587
  83. package/src/tui/ModelsApp.ts +0 -368
  84. package/src/tui/MonitorApp.ts +0 -386
  85. package/src/tui/MultiServerMonitorApp.ts +0 -1833
  86. package/src/tui/RootNavigator.ts +0 -74
  87. package/src/tui/SearchApp.ts +0 -511
  88. package/src/tui/SplashScreen.ts +0 -149
  89. package/src/types/admin-config.ts +0 -25
  90. package/src/types/global-config.ts +0 -26
  91. package/src/types/history-types.ts +0 -39
  92. package/src/types/model-info.ts +0 -8
  93. package/src/types/monitor-types.ts +0 -162
  94. package/src/types/router-config.ts +0 -25
  95. package/src/types/server-config.ts +0 -46
  96. package/src/utils/downsample-utils.ts +0 -128
  97. package/src/utils/file-utils.ts +0 -146
  98. package/src/utils/format-utils.ts +0 -98
  99. package/src/utils/log-parser.ts +0 -284
  100. package/src/utils/log-utils.ts +0 -178
  101. package/src/utils/process-utils.ts +0 -316
  102. package/src/utils/prompt-utils.ts +0 -47
  103. package/test-load.sh +0 -100
  104. package/tsconfig.json +0 -20
  105. package/web/eslint.config.js +0 -23
  106. package/web/llamacpp-web-dist.tar.gz +0 -0
  107. package/web/package-lock.json +0 -4017
  108. package/web/package.json +0 -38
  109. package/web/postcss.config.js +0 -6
  110. package/web/src/App.css +0 -42
  111. package/web/src/App.tsx +0 -86
  112. package/web/src/assets/react.svg +0 -1
  113. package/web/src/components/ApiKeyPrompt.tsx +0 -71
  114. package/web/src/components/CreateServerModal.tsx +0 -372
  115. package/web/src/components/DownloadProgress.tsx +0 -123
  116. package/web/src/components/Nav.tsx +0 -89
  117. package/web/src/components/RouterConfigModal.tsx +0 -240
  118. package/web/src/components/SearchModal.tsx +0 -306
  119. package/web/src/components/ServerConfigModal.tsx +0 -291
  120. package/web/src/hooks/useApi.ts +0 -259
  121. package/web/src/index.css +0 -42
  122. package/web/src/lib/api.ts +0 -226
  123. package/web/src/main.tsx +0 -10
  124. package/web/src/pages/Dashboard.tsx +0 -103
  125. package/web/src/pages/Models.tsx +0 -258
  126. package/web/src/pages/Router.tsx +0 -270
  127. package/web/src/pages/RouterLogs.tsx +0 -201
  128. package/web/src/pages/ServerLogs.tsx +0 -553
  129. package/web/src/pages/Servers.tsx +0 -358
  130. package/web/src/types/api.ts +0 -140
  131. package/web/tailwind.config.js +0 -31
  132. package/web/tsconfig.app.json +0 -28
  133. package/web/tsconfig.json +0 -7
  134. package/web/tsconfig.node.json +0 -26
  135. package/web/vite.config.ts +0 -25
  136. /package/web/{public → dist}/vite.svg +0 -0
@@ -1,225 +0,0 @@
1
- import * as path from 'path';
2
- import * as fs from 'fs/promises';
3
- import { ServerConfig } from '../types/server-config';
4
- import { execCommand, execAsync } from '../utils/process-utils';
5
- import { writeFileAtomic, fileExists } from '../utils/file-utils';
6
-
7
- export interface ServiceStatus {
8
- isRunning: boolean;
9
- pid: number | null;
10
- exitCode: number | null;
11
- lastExitReason?: string;
12
- }
13
-
14
- export class LaunchctlManager {
15
- /**
16
- * Generate plist XML content for a server
17
- */
18
- generatePlist(config: ServerConfig): string {
19
- // Build program arguments array
20
- const args = [
21
- '/opt/homebrew/bin/llama-server',
22
- '--model', config.modelPath,
23
- '--host', config.host,
24
- '--port', config.port.toString(),
25
- '--threads', config.threads.toString(),
26
- '--ctx-size', config.ctxSize.toString(),
27
- '--gpu-layers', config.gpuLayers.toString(),
28
- ];
29
-
30
- // Add flags
31
- if (config.embeddings) args.push('--embeddings');
32
- if (config.jinja) args.push('--jinja');
33
-
34
- // Conditionally enable verbose HTTP logging for detailed request/response info
35
- if (config.verbose) {
36
- args.push('--log-verbose');
37
- }
38
-
39
- // Add custom flags
40
- if (config.customFlags && config.customFlags.length > 0) {
41
- args.push(...config.customFlags);
42
- }
43
-
44
- // Generate XML array elements
45
- const argsXml = args.map(arg => ` <string>${arg}</string>`).join('\n');
46
-
47
- return `<?xml version="1.0" encoding="UTF-8"?>
48
- <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
49
- "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
50
- <plist version="1.0">
51
- <dict>
52
- <key>Label</key>
53
- <string>${config.label}</string>
54
-
55
- <key>ProgramArguments</key>
56
- <array>
57
- ${argsXml}
58
- </array>
59
-
60
- <key>RunAtLoad</key>
61
- <false/>
62
-
63
- <key>KeepAlive</key>
64
- <dict>
65
- <key>Crashed</key>
66
- <true/>
67
- <key>SuccessfulExit</key>
68
- <false/>
69
- </dict>
70
-
71
- <key>StandardOutPath</key>
72
- <string>${config.stdoutPath}</string>
73
-
74
- <key>StandardErrorPath</key>
75
- <string>${config.stderrPath}</string>
76
-
77
- <key>WorkingDirectory</key>
78
- <string>/tmp</string>
79
-
80
- <key>ThrottleInterval</key>
81
- <integer>10</integer>
82
- </dict>
83
- </plist>
84
- `;
85
- }
86
-
87
- /**
88
- * Create and write plist file
89
- */
90
- async createPlist(config: ServerConfig): Promise<void> {
91
- const plistContent = this.generatePlist(config);
92
- await writeFileAtomic(config.plistPath, plistContent);
93
- }
94
-
95
- /**
96
- * Delete plist file
97
- */
98
- async deletePlist(plistPath: string): Promise<void> {
99
- if (await fileExists(plistPath)) {
100
- await fs.unlink(plistPath);
101
- }
102
- }
103
-
104
- /**
105
- * Load service (register with launchctl)
106
- */
107
- async loadService(plistPath: string): Promise<void> {
108
- await execCommand(`launchctl load "${plistPath}"`);
109
- }
110
-
111
- /**
112
- * Unload service (unregister from launchctl)
113
- */
114
- async unloadService(plistPath: string): Promise<void> {
115
- try {
116
- await execCommand(`launchctl unload "${plistPath}"`);
117
- } catch (error) {
118
- // Ignore errors if service is not loaded
119
- }
120
- }
121
-
122
- /**
123
- * Start service
124
- */
125
- async startService(label: string): Promise<void> {
126
- await execCommand(`launchctl start ${label}`);
127
- }
128
-
129
- /**
130
- * Stop service
131
- */
132
- async stopService(label: string): Promise<void> {
133
- await execCommand(`launchctl stop ${label}`);
134
- }
135
-
136
- /**
137
- * Get service status from launchctl
138
- */
139
- async getServiceStatus(label: string): Promise<ServiceStatus> {
140
- try {
141
- const { stdout } = await execAsync(`launchctl list | grep ${label}`);
142
- const lines = stdout.trim().split('\n');
143
-
144
- for (const line of lines) {
145
- const parts = line.split(/\s+/);
146
- if (parts.length >= 3) {
147
- const pidStr = parts[0].trim();
148
- const exitCodeStr = parts[1].trim();
149
- const serviceLabel = parts[2].trim();
150
-
151
- // Match the exact label
152
- if (serviceLabel === label) {
153
- const pid = pidStr !== '-' ? parseInt(pidStr, 10) : null;
154
- const exitCode = exitCodeStr !== '-' ? parseInt(exitCodeStr, 10) : null;
155
- const isRunning = pid !== null;
156
-
157
- return {
158
- isRunning,
159
- pid,
160
- exitCode,
161
- lastExitReason: this.interpretExitCode(exitCode),
162
- };
163
- }
164
- }
165
- }
166
-
167
- // Service not found
168
- return {
169
- isRunning: false,
170
- pid: null,
171
- exitCode: null,
172
- };
173
- } catch (error) {
174
- // Service not found or not loaded
175
- return {
176
- isRunning: false,
177
- pid: null,
178
- exitCode: null,
179
- };
180
- }
181
- }
182
-
183
- /**
184
- * Interpret exit code to human-readable reason
185
- */
186
- private interpretExitCode(code: number | null): string | undefined {
187
- if (code === null || code === 0) return undefined;
188
- if (code === -9) return 'Force killed (SIGKILL)';
189
- if (code === -15) return 'Terminated (SIGTERM)';
190
- return `Exit code: ${code}`;
191
- }
192
-
193
- /**
194
- * Wait for service to start (with timeout)
195
- */
196
- async waitForServiceStart(label: string, timeoutMs = 5000): Promise<boolean> {
197
- const startTime = Date.now();
198
- while (Date.now() - startTime < timeoutMs) {
199
- const status = await this.getServiceStatus(label);
200
- if (status.isRunning) {
201
- return true;
202
- }
203
- await new Promise((resolve) => setTimeout(resolve, 500));
204
- }
205
- return false;
206
- }
207
-
208
- /**
209
- * Wait for service to stop (with timeout)
210
- */
211
- async waitForServiceStop(label: string, timeoutMs = 5000): Promise<boolean> {
212
- const startTime = Date.now();
213
- while (Date.now() - startTime < timeoutMs) {
214
- const status = await this.getServiceStatus(label);
215
- if (!status.isRunning) {
216
- return true;
217
- }
218
- await new Promise((resolve) => setTimeout(resolve, 500));
219
- }
220
- return false;
221
- }
222
- }
223
-
224
- // Export singleton instance
225
- export const launchctlManager = new LaunchctlManager();
@@ -1,257 +0,0 @@
1
- import { ServerConfig } from '../types/server-config.js';
2
- import { ServerMetrics, SlotInfo, MonitorData } from '../types/monitor-types.js';
3
- import { statusChecker } from './status-checker.js';
4
- import { systemCollector } from './system-collector.js';
5
- import { getProcessMemory, getProcessCpu } from '../utils/process-utils.js';
6
-
7
- /**
8
- * Aggregates metrics from llama.cpp server API endpoints
9
- * Combines server health, slot status, and model properties
10
- */
11
- export class MetricsAggregator {
12
- private serverUrl: string;
13
- private timeout: number;
14
- private previousSlots: Map<number, { n_decoded: number; timestamp: number }> = new Map();
15
-
16
- constructor(server: ServerConfig, timeout: number = 5000) {
17
- // Handle null host (legacy configs) by defaulting to 127.0.0.1
18
- const host = server.host || '127.0.0.1';
19
- this.serverUrl = `http://${host}:${server.port}`;
20
- this.timeout = timeout;
21
- }
22
-
23
- /**
24
- * Fetch data from llama.cpp API with timeout
25
- */
26
- private async fetchWithTimeout(
27
- endpoint: string,
28
- customTimeout?: number
29
- ): Promise<any | null> {
30
- try {
31
- const controller = new AbortController();
32
- const timeoutMs = customTimeout ?? this.timeout;
33
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
34
-
35
- const response = await fetch(`${this.serverUrl}${endpoint}`, {
36
- signal: controller.signal,
37
- });
38
-
39
- clearTimeout(timeoutId);
40
-
41
- if (!response.ok) {
42
- return null;
43
- }
44
-
45
- return await response.json();
46
- } catch (err) {
47
- // Network error, timeout, or parse error
48
- return null;
49
- }
50
- }
51
-
52
- /**
53
- * Get server health status
54
- */
55
- private async getHealth(): Promise<boolean> {
56
- const health = await this.fetchWithTimeout('/health');
57
- return health !== null && health.status === 'ok';
58
- }
59
-
60
- /**
61
- * Get server properties (model info, context size, etc.)
62
- */
63
- private async getProps(): Promise<any> {
64
- return await this.fetchWithTimeout('/props');
65
- }
66
-
67
- /**
68
- * Get active slots information with calculated tok/s
69
- */
70
- private async getSlots(): Promise<SlotInfo[]> {
71
- const data = await this.fetchWithTimeout('/slots');
72
- if (!data || !Array.isArray(data)) {
73
- return [];
74
- }
75
-
76
- const now = Date.now();
77
-
78
- return data.map((slot: any) => {
79
- const slotId = slot.id;
80
- const n_decoded = slot.next_token?.[0]?.n_decoded || 0;
81
- const isProcessing = slot.is_processing;
82
-
83
- // Calculate tokens per second by comparing with previous poll
84
- let predicted_per_second: number | undefined;
85
-
86
- if (isProcessing && n_decoded > 0) {
87
- const previous = this.previousSlots.get(slotId);
88
-
89
- if (previous && previous.n_decoded < n_decoded) {
90
- const tokensGenerated = n_decoded - previous.n_decoded;
91
- const timeElapsed = (now - previous.timestamp) / 1000; // Convert to seconds
92
-
93
- if (timeElapsed > 0) {
94
- predicted_per_second = tokensGenerated / timeElapsed;
95
- }
96
- }
97
-
98
- // Store current state for next comparison
99
- this.previousSlots.set(slotId, { n_decoded, timestamp: now });
100
- } else if (!isProcessing) {
101
- // Clear history when slot becomes idle
102
- this.previousSlots.delete(slotId);
103
- }
104
-
105
- return {
106
- id: slotId,
107
- state: isProcessing ? 'processing' : 'idle',
108
- n_prompt_tokens: slot.n_prompt_tokens,
109
- n_decoded,
110
- n_ctx: slot.n_ctx || 0,
111
- timings: predicted_per_second
112
- ? {
113
- prompt_n: 0,
114
- prompt_ms: 0,
115
- prompt_per_token_ms: 0,
116
- prompt_per_second: 0,
117
- predicted_n: n_decoded,
118
- predicted_ms: 0,
119
- predicted_per_token_ms: 0,
120
- predicted_per_second,
121
- }
122
- : undefined,
123
- };
124
- });
125
- }
126
-
127
- /**
128
- * Aggregate all server metrics
129
- * @param server - Server configuration
130
- * @param processMemory - Optional pre-fetched process memory (for batch collection)
131
- * @param processCpuUsage - Optional pre-fetched process CPU usage (for batch collection)
132
- */
133
- async collectServerMetrics(
134
- server: ServerConfig,
135
- processMemory?: number | null,
136
- processCpuUsage?: number | null
137
- ): Promise<ServerMetrics> {
138
- const now = Date.now();
139
-
140
- // Check basic server status first
141
- const status = await statusChecker.checkServer(server);
142
-
143
- // Calculate uptime if server is running and has lastStarted
144
- let uptime: string | undefined;
145
- if (status.isRunning && server.lastStarted) {
146
- const startTime = new Date(server.lastStarted).getTime();
147
- const uptimeSeconds = Math.floor((now - startTime) / 1000);
148
- const hours = Math.floor(uptimeSeconds / 3600);
149
- const minutes = Math.floor((uptimeSeconds % 3600) / 60);
150
- const seconds = uptimeSeconds % 60;
151
- uptime = `${hours}h ${minutes}m ${seconds}s`;
152
- }
153
-
154
- // If server not running, return minimal data
155
- if (!status.isRunning) {
156
- return {
157
- server,
158
- healthy: false,
159
- modelLoaded: false,
160
- modelName: server.modelName,
161
- contextSize: server.ctxSize,
162
- totalSlots: 0,
163
- activeSlots: 0,
164
- idleSlots: 0,
165
- slots: [],
166
- timestamp: now,
167
- stale: false,
168
- };
169
- }
170
-
171
- // Fetch detailed metrics in parallel
172
- // If processMemory/CPU were pre-fetched (batch mode), use them; otherwise fetch individually
173
- const [healthy, props, slots, fetchedMemory, fetchedCpu] = await Promise.all([
174
- this.getHealth(),
175
- this.getProps(),
176
- this.getSlots(),
177
- processMemory !== undefined
178
- ? Promise.resolve(processMemory)
179
- : (server.pid ? getProcessMemory(server.pid) : Promise.resolve(null)),
180
- processCpuUsage !== undefined
181
- ? Promise.resolve(processCpuUsage)
182
- : (server.pid ? getProcessCpu(server.pid) : Promise.resolve(null)),
183
- ]);
184
-
185
- // Calculate slot statistics
186
- const activeSlots = slots.filter((s) => s.state === 'processing').length;
187
- const idleSlots = slots.filter((s) => s.state === 'idle').length;
188
- const totalSlots = props?.total_slots || slots.length;
189
-
190
- // Calculate average speeds (only from processing slots)
191
- const processingSlots = slots.filter((s) => s.state === 'processing' && s.timings);
192
-
193
- const avgPromptSpeed =
194
- processingSlots.length > 0
195
- ? processingSlots.reduce(
196
- (sum, s) => sum + (s.timings?.prompt_per_second || 0),
197
- 0
198
- ) / processingSlots.length
199
- : undefined;
200
-
201
- const avgGenerateSpeed =
202
- processingSlots.length > 0
203
- ? processingSlots.reduce(
204
- (sum, s) => sum + (s.timings?.predicted_per_second || 0),
205
- 0
206
- ) / processingSlots.length
207
- : undefined;
208
-
209
- // Calculate total memory (CPU + Metal GPU memory if available)
210
- let totalMemory = fetchedMemory ?? undefined;
211
- if (totalMemory !== undefined && server.metalMemoryMB) {
212
- // Add Metal memory (convert MB to bytes)
213
- totalMemory += server.metalMemoryMB * 1024 * 1024;
214
- }
215
-
216
- return {
217
- server,
218
- healthy,
219
- uptime,
220
- modelLoaded: props !== null,
221
- modelName: server.modelName,
222
- contextSize: props?.default_generation_settings?.n_ctx || server.ctxSize,
223
- totalSlots,
224
- activeSlots,
225
- idleSlots,
226
- slots,
227
- avgPromptSpeed,
228
- avgGenerateSpeed,
229
- processMemory: totalMemory,
230
- processCpuUsage: fetchedCpu ?? undefined,
231
- timestamp: now,
232
- stale: false,
233
- };
234
- }
235
-
236
- /**
237
- * Collect complete monitoring data (server + system metrics)
238
- */
239
- async collectMonitorData(
240
- server: ServerConfig,
241
- updateInterval: number = 2000
242
- ): Promise<MonitorData> {
243
- // Collect server and system metrics in parallel
244
- const [serverMetrics, systemMetrics] = await Promise.all([
245
- this.collectServerMetrics(server),
246
- systemCollector.collectSystemMetrics(),
247
- ]);
248
-
249
- return {
250
- server: serverMetrics,
251
- system: systemMetrics,
252
- lastUpdated: new Date(),
253
- updateInterval,
254
- consecutiveFailures: 0,
255
- };
256
- }
257
- }