@appkit/llamacpp-cli 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/MONITORING-ACCURACY-FIX.md +199 -0
  3. package/PER-PROCESS-METRICS.md +190 -0
  4. package/README.md +136 -1
  5. package/dist/cli.js +21 -4
  6. package/dist/cli.js.map +1 -1
  7. package/dist/commands/create.d.ts.map +1 -1
  8. package/dist/commands/create.js +12 -3
  9. package/dist/commands/create.js.map +1 -1
  10. package/dist/commands/monitor.d.ts +2 -0
  11. package/dist/commands/monitor.d.ts.map +1 -0
  12. package/dist/commands/monitor.js +126 -0
  13. package/dist/commands/monitor.js.map +1 -0
  14. package/dist/commands/ps.d.ts +3 -1
  15. package/dist/commands/ps.d.ts.map +1 -1
  16. package/dist/commands/ps.js +75 -5
  17. package/dist/commands/ps.js.map +1 -1
  18. package/dist/commands/server-show.d.ts.map +1 -1
  19. package/dist/commands/server-show.js +10 -3
  20. package/dist/commands/server-show.js.map +1 -1
  21. package/dist/commands/start.d.ts.map +1 -1
  22. package/dist/commands/start.js +14 -2
  23. package/dist/commands/start.js.map +1 -1
  24. package/dist/lib/history-manager.d.ts +46 -0
  25. package/dist/lib/history-manager.d.ts.map +1 -0
  26. package/dist/lib/history-manager.js +157 -0
  27. package/dist/lib/history-manager.js.map +1 -0
  28. package/dist/lib/metrics-aggregator.d.ts +40 -0
  29. package/dist/lib/metrics-aggregator.d.ts.map +1 -0
  30. package/dist/lib/metrics-aggregator.js +211 -0
  31. package/dist/lib/metrics-aggregator.js.map +1 -0
  32. package/dist/lib/system-collector.d.ts +80 -0
  33. package/dist/lib/system-collector.d.ts.map +1 -0
  34. package/dist/lib/system-collector.js +311 -0
  35. package/dist/lib/system-collector.js.map +1 -0
  36. package/dist/tui/HistoricalMonitorApp.d.ts +5 -0
  37. package/dist/tui/HistoricalMonitorApp.d.ts.map +1 -0
  38. package/dist/tui/HistoricalMonitorApp.js +490 -0
  39. package/dist/tui/HistoricalMonitorApp.js.map +1 -0
  40. package/dist/tui/MonitorApp.d.ts +4 -0
  41. package/dist/tui/MonitorApp.d.ts.map +1 -0
  42. package/dist/tui/MonitorApp.js +315 -0
  43. package/dist/tui/MonitorApp.js.map +1 -0
  44. package/dist/tui/MultiServerMonitorApp.d.ts +4 -0
  45. package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -0
  46. package/dist/tui/MultiServerMonitorApp.js +712 -0
  47. package/dist/tui/MultiServerMonitorApp.js.map +1 -0
  48. package/dist/types/history-types.d.ts +30 -0
  49. package/dist/types/history-types.d.ts.map +1 -0
  50. package/dist/types/history-types.js +11 -0
  51. package/dist/types/history-types.js.map +1 -0
  52. package/dist/types/monitor-types.d.ts +123 -0
  53. package/dist/types/monitor-types.d.ts.map +1 -0
  54. package/dist/types/monitor-types.js +3 -0
  55. package/dist/types/monitor-types.js.map +1 -0
  56. package/dist/types/server-config.d.ts +1 -0
  57. package/dist/types/server-config.d.ts.map +1 -1
  58. package/dist/types/server-config.js.map +1 -1
  59. package/dist/utils/downsample-utils.d.ts +35 -0
  60. package/dist/utils/downsample-utils.d.ts.map +1 -0
  61. package/dist/utils/downsample-utils.js +107 -0
  62. package/dist/utils/downsample-utils.js.map +1 -0
  63. package/dist/utils/file-utils.d.ts +6 -0
  64. package/dist/utils/file-utils.d.ts.map +1 -1
  65. package/dist/utils/file-utils.js +38 -0
  66. package/dist/utils/file-utils.js.map +1 -1
  67. package/dist/utils/process-utils.d.ts +35 -2
  68. package/dist/utils/process-utils.d.ts.map +1 -1
  69. package/dist/utils/process-utils.js +220 -25
  70. package/dist/utils/process-utils.js.map +1 -1
  71. package/docs/images/.gitkeep +1 -0
  72. package/package.json +5 -1
  73. package/src/cli.ts +21 -4
  74. package/src/commands/create.ts +14 -4
  75. package/src/commands/monitor.ts +110 -0
  76. package/src/commands/ps.ts +88 -5
  77. package/src/commands/server-show.ts +10 -3
  78. package/src/commands/start.ts +15 -2
  79. package/src/lib/history-manager.ts +172 -0
  80. package/src/lib/metrics-aggregator.ts +257 -0
  81. package/src/lib/system-collector.ts +315 -0
  82. package/src/tui/HistoricalMonitorApp.ts +548 -0
  83. package/src/tui/MonitorApp.ts +386 -0
  84. package/src/tui/MultiServerMonitorApp.ts +792 -0
  85. package/src/types/history-types.ts +39 -0
  86. package/src/types/monitor-types.ts +162 -0
  87. package/src/types/server-config.ts +1 -0
  88. package/src/utils/downsample-utils.ts +128 -0
  89. package/src/utils/file-utils.ts +40 -0
  90. package/src/utils/process-utils.ts +243 -25
  91. package/test-load.sh +100 -0
@@ -2,6 +2,7 @@ import chalk from 'chalk';
2
2
  import { stateManager } from '../lib/state-manager';
3
3
  import { launchctlManager } from '../lib/launchctl-manager';
4
4
  import { statusChecker } from '../lib/status-checker';
5
+ import { parseMetalMemoryFromLog } from '../utils/file-utils';
5
6
 
6
7
  export async function startCommand(identifier: string): Promise<void> {
7
8
  // Initialize state manager
@@ -61,9 +62,21 @@ export async function startCommand(identifier: string): Promise<void> {
61
62
  }
62
63
 
63
64
  // 7. Update server status
64
- await statusChecker.updateServerStatus(server);
65
+ let updatedServer = await statusChecker.updateServerStatus(server);
65
66
 
66
- // 8. Display success
67
+ // 8. Parse Metal (GPU) memory allocation if not already captured
68
+ if (!updatedServer.metalMemoryMB) {
69
+ console.log(chalk.dim('Detecting Metal (GPU) memory allocation...'));
70
+ await new Promise(resolve => setTimeout(resolve, 8000)); // 8 second delay
71
+ const metalMemoryMB = await parseMetalMemoryFromLog(updatedServer.stderrPath);
72
+ if (metalMemoryMB) {
73
+ updatedServer = { ...updatedServer, metalMemoryMB };
74
+ await stateManager.saveServerConfig(updatedServer);
75
+ console.log(chalk.dim(`Metal memory: ${metalMemoryMB.toFixed(0)} MB`));
76
+ }
77
+ }
78
+
79
+ // 9. Display success
67
80
  console.log();
68
81
  console.log(chalk.green('✅ Server started successfully!'));
69
82
  console.log();
@@ -0,0 +1,172 @@
1
+ import { mkdir, readFile, writeFile, access, rename } from 'fs/promises';
2
+ import { join } from 'path';
3
+ import { homedir } from 'os';
4
+ import { ServerMetrics, SystemMetrics } from '../types/monitor-types.js';
5
+ import { HistoryData, HistorySnapshot, TIME_WINDOW_HOURS, TimeWindow } from '../types/history-types.js';
6
+
7
+ export class HistoryManager {
8
+ private serverId: string;
9
+ private historyDir: string;
10
+ private historyPath: string;
11
+ private readonly MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24 hours
12
+
13
+ constructor(serverId: string) {
14
+ this.serverId = serverId;
15
+ this.historyDir = join(homedir(), '.llamacpp', 'history');
16
+ this.historyPath = join(this.historyDir, `${serverId}.json`);
17
+ }
18
+
19
+ /**
20
+ * Append a new snapshot to history (with auto-pruning)
21
+ */
22
+ async appendSnapshot(serverMetrics: ServerMetrics, systemMetrics?: SystemMetrics): Promise<void> {
23
+ try {
24
+ // Ensure history directory exists
25
+ await mkdir(this.historyDir, { recursive: true });
26
+
27
+ // Load existing history
28
+ const historyData = await this.loadHistoryData();
29
+
30
+ // Create new snapshot
31
+ const snapshot: HistorySnapshot = {
32
+ timestamp: Date.now(),
33
+ server: {
34
+ healthy: serverMetrics.healthy,
35
+ uptime: serverMetrics.uptime,
36
+ activeSlots: serverMetrics.activeSlots,
37
+ idleSlots: serverMetrics.idleSlots,
38
+ totalSlots: serverMetrics.totalSlots,
39
+ avgPromptSpeed: serverMetrics.avgPromptSpeed,
40
+ avgGenerateSpeed: serverMetrics.avgGenerateSpeed,
41
+ processMemory: serverMetrics.processMemory,
42
+ processCpuUsage: serverMetrics.processCpuUsage,
43
+ },
44
+ system: systemMetrics ? {
45
+ gpuUsage: systemMetrics.gpuUsage,
46
+ cpuUsage: systemMetrics.cpuUsage,
47
+ aneUsage: systemMetrics.aneUsage,
48
+ temperature: systemMetrics.temperature,
49
+ memoryUsed: systemMetrics.memoryUsed,
50
+ memoryTotal: systemMetrics.memoryTotal,
51
+ } : undefined,
52
+ };
53
+
54
+ // Append new snapshot
55
+ historyData.snapshots.push(snapshot);
56
+
57
+ // Prune old snapshots (keep only last 24h)
58
+ historyData.snapshots = this.pruneOldSnapshots(historyData.snapshots, this.MAX_AGE_MS);
59
+
60
+ // Atomic write: write to temp file in same directory, then rename
61
+ // This prevents read collisions during concurrent access
62
+ // IMPORTANT: temp file MUST be in same directory as destination for rename to work across filesystems
63
+ const tempPath = join(this.historyDir, `.${this.serverId}-${Date.now()}.tmp`);
64
+ await writeFile(tempPath, JSON.stringify(historyData, null, 2), 'utf-8');
65
+ await rename(tempPath, this.historyPath);
66
+ } catch (error) {
67
+ // Silent failure - don't interrupt monitoring
68
+ // Don't throw - just return silently to avoid polluting console
69
+ return;
70
+ }
71
+ }
72
+
73
+ /**
74
+ * Load all snapshots within specified time window
75
+ */
76
+ async loadHistory(windowHours: number): Promise<HistorySnapshot[]> {
77
+ // Retry logic for file I/O collisions during concurrent read/write
78
+ const maxRetries = 3;
79
+ let lastError: Error | null = null;
80
+
81
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
82
+ try {
83
+ const historyData = await this.loadHistoryData();
84
+ return this.filterByTimeWindow(historyData.snapshots, windowHours);
85
+ } catch (error) {
86
+ lastError = error as Error;
87
+ // Wait briefly before retry (exponential backoff)
88
+ if (attempt < maxRetries - 1) {
89
+ await new Promise(resolve => setTimeout(resolve, 50 * Math.pow(2, attempt)));
90
+ }
91
+ }
92
+ }
93
+
94
+ // All retries failed - throw error so it can be handled upstream
95
+ throw new Error(`Failed to load history after ${maxRetries} attempts: ${lastError?.message || 'Unknown error'}`);
96
+ }
97
+
98
+ /**
99
+ * Load history for specific time window type
100
+ */
101
+ async loadHistoryByWindow(window: TimeWindow): Promise<HistorySnapshot[]> {
102
+ return this.loadHistory(TIME_WINDOW_HOURS[window]);
103
+ }
104
+
105
+ /**
106
+ * Get file path for server history
107
+ */
108
+ getHistoryPath(): string {
109
+ return this.historyPath;
110
+ }
111
+
112
+ /**
113
+ * Check if history file exists
114
+ */
115
+ async hasHistory(): Promise<boolean> {
116
+ try {
117
+ await access(this.historyPath);
118
+ return true;
119
+ } catch {
120
+ return false;
121
+ }
122
+ }
123
+
124
+ /**
125
+ * Clear all history for server
126
+ */
127
+ async clearHistory(): Promise<void> {
128
+ const emptyHistory: HistoryData = {
129
+ serverId: this.serverId,
130
+ snapshots: [],
131
+ };
132
+
133
+ await mkdir(this.historyDir, { recursive: true });
134
+
135
+ // Atomic write - temp file in same directory as destination
136
+ const tempPath = join(this.historyDir, `.${this.serverId}-${Date.now()}.tmp`);
137
+ await writeFile(tempPath, JSON.stringify(emptyHistory, null, 2), 'utf-8');
138
+ await rename(tempPath, this.historyPath);
139
+ }
140
+
141
+ /**
142
+ * Load full history data from file
143
+ */
144
+ private async loadHistoryData(): Promise<HistoryData> {
145
+ try {
146
+ const content = await readFile(this.historyPath, 'utf-8');
147
+ return JSON.parse(content) as HistoryData;
148
+ } catch (error) {
149
+ // File doesn't exist or is corrupted, return empty history
150
+ return {
151
+ serverId: this.serverId,
152
+ snapshots: [],
153
+ };
154
+ }
155
+ }
156
+
157
+ /**
158
+ * Prune snapshots older than maxAge
159
+ */
160
+ private pruneOldSnapshots(snapshots: HistorySnapshot[], maxAgeMs: number): HistorySnapshot[] {
161
+ const cutoff = Date.now() - maxAgeMs;
162
+ return snapshots.filter(s => s.timestamp >= cutoff);
163
+ }
164
+
165
+ /**
166
+ * Filter snapshots by time window
167
+ */
168
+ private filterByTimeWindow(snapshots: HistorySnapshot[], windowHours: number): HistorySnapshot[] {
169
+ const cutoff = Date.now() - (windowHours * 60 * 60 * 1000);
170
+ return snapshots.filter(s => s.timestamp >= cutoff);
171
+ }
172
+ }
@@ -0,0 +1,257 @@
1
+ import { ServerConfig } from '../types/server-config.js';
2
+ import { ServerMetrics, SlotInfo, MonitorData } from '../types/monitor-types.js';
3
+ import { statusChecker } from './status-checker.js';
4
+ import { systemCollector } from './system-collector.js';
5
+ import { getProcessMemory, getProcessCpu } from '../utils/process-utils.js';
6
+
7
+ /**
8
+ * Aggregates metrics from llama.cpp server API endpoints
9
+ * Combines server health, slot status, and model properties
10
+ */
11
+ export class MetricsAggregator {
12
+ private serverUrl: string;
13
+ private timeout: number;
14
+ private previousSlots: Map<number, { n_decoded: number; timestamp: number }> = new Map();
15
+
16
+ constructor(server: ServerConfig, timeout: number = 5000) {
17
+ // Handle null host (legacy configs) by defaulting to 127.0.0.1
18
+ const host = server.host || '127.0.0.1';
19
+ this.serverUrl = `http://${host}:${server.port}`;
20
+ this.timeout = timeout;
21
+ }
22
+
23
+ /**
24
+ * Fetch data from llama.cpp API with timeout
25
+ */
26
+ private async fetchWithTimeout(
27
+ endpoint: string,
28
+ customTimeout?: number
29
+ ): Promise<any | null> {
30
+ try {
31
+ const controller = new AbortController();
32
+ const timeoutMs = customTimeout ?? this.timeout;
33
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
34
+
35
+ const response = await fetch(`${this.serverUrl}${endpoint}`, {
36
+ signal: controller.signal,
37
+ });
38
+
39
+ clearTimeout(timeoutId);
40
+
41
+ if (!response.ok) {
42
+ return null;
43
+ }
44
+
45
+ return await response.json();
46
+ } catch (err) {
47
+ // Network error, timeout, or parse error
48
+ return null;
49
+ }
50
+ }
51
+
52
+ /**
53
+ * Get server health status
54
+ */
55
+ private async getHealth(): Promise<boolean> {
56
+ const health = await this.fetchWithTimeout('/health');
57
+ return health !== null && health.status === 'ok';
58
+ }
59
+
60
+ /**
61
+ * Get server properties (model info, context size, etc.)
62
+ */
63
+ private async getProps(): Promise<any> {
64
+ return await this.fetchWithTimeout('/props');
65
+ }
66
+
67
+ /**
68
+ * Get active slots information with calculated tok/s
69
+ */
70
+ private async getSlots(): Promise<SlotInfo[]> {
71
+ const data = await this.fetchWithTimeout('/slots');
72
+ if (!data || !Array.isArray(data)) {
73
+ return [];
74
+ }
75
+
76
+ const now = Date.now();
77
+
78
+ return data.map((slot: any) => {
79
+ const slotId = slot.id;
80
+ const n_decoded = slot.next_token?.[0]?.n_decoded || 0;
81
+ const isProcessing = slot.is_processing;
82
+
83
+ // Calculate tokens per second by comparing with previous poll
84
+ let predicted_per_second: number | undefined;
85
+
86
+ if (isProcessing && n_decoded > 0) {
87
+ const previous = this.previousSlots.get(slotId);
88
+
89
+ if (previous && previous.n_decoded < n_decoded) {
90
+ const tokensGenerated = n_decoded - previous.n_decoded;
91
+ const timeElapsed = (now - previous.timestamp) / 1000; // Convert to seconds
92
+
93
+ if (timeElapsed > 0) {
94
+ predicted_per_second = tokensGenerated / timeElapsed;
95
+ }
96
+ }
97
+
98
+ // Store current state for next comparison
99
+ this.previousSlots.set(slotId, { n_decoded, timestamp: now });
100
+ } else if (!isProcessing) {
101
+ // Clear history when slot becomes idle
102
+ this.previousSlots.delete(slotId);
103
+ }
104
+
105
+ return {
106
+ id: slotId,
107
+ state: isProcessing ? 'processing' : 'idle',
108
+ n_prompt_tokens: slot.n_prompt_tokens,
109
+ n_decoded,
110
+ n_ctx: slot.n_ctx || 0,
111
+ timings: predicted_per_second
112
+ ? {
113
+ prompt_n: 0,
114
+ prompt_ms: 0,
115
+ prompt_per_token_ms: 0,
116
+ prompt_per_second: 0,
117
+ predicted_n: n_decoded,
118
+ predicted_ms: 0,
119
+ predicted_per_token_ms: 0,
120
+ predicted_per_second,
121
+ }
122
+ : undefined,
123
+ };
124
+ });
125
+ }
126
+
127
+ /**
128
+ * Aggregate all server metrics
129
+ * @param server - Server configuration
130
+ * @param processMemory - Optional pre-fetched process memory (for batch collection)
131
+ * @param processCpuUsage - Optional pre-fetched process CPU usage (for batch collection)
132
+ */
133
+ async collectServerMetrics(
134
+ server: ServerConfig,
135
+ processMemory?: number | null,
136
+ processCpuUsage?: number | null
137
+ ): Promise<ServerMetrics> {
138
+ const now = Date.now();
139
+
140
+ // Check basic server status first
141
+ const status = await statusChecker.checkServer(server);
142
+
143
+ // Calculate uptime if server is running and has lastStarted
144
+ let uptime: string | undefined;
145
+ if (status.isRunning && server.lastStarted) {
146
+ const startTime = new Date(server.lastStarted).getTime();
147
+ const uptimeSeconds = Math.floor((now - startTime) / 1000);
148
+ const hours = Math.floor(uptimeSeconds / 3600);
149
+ const minutes = Math.floor((uptimeSeconds % 3600) / 60);
150
+ const seconds = uptimeSeconds % 60;
151
+ uptime = `${hours}h ${minutes}m ${seconds}s`;
152
+ }
153
+
154
+ // If server not running, return minimal data
155
+ if (!status.isRunning) {
156
+ return {
157
+ server,
158
+ healthy: false,
159
+ modelLoaded: false,
160
+ modelName: server.modelName,
161
+ contextSize: server.ctxSize,
162
+ totalSlots: 0,
163
+ activeSlots: 0,
164
+ idleSlots: 0,
165
+ slots: [],
166
+ timestamp: now,
167
+ stale: false,
168
+ };
169
+ }
170
+
171
+ // Fetch detailed metrics in parallel
172
+ // If processMemory/CPU were pre-fetched (batch mode), use them; otherwise fetch individually
173
+ const [healthy, props, slots, fetchedMemory, fetchedCpu] = await Promise.all([
174
+ this.getHealth(),
175
+ this.getProps(),
176
+ this.getSlots(),
177
+ processMemory !== undefined
178
+ ? Promise.resolve(processMemory)
179
+ : (server.pid ? getProcessMemory(server.pid) : Promise.resolve(null)),
180
+ processCpuUsage !== undefined
181
+ ? Promise.resolve(processCpuUsage)
182
+ : (server.pid ? getProcessCpu(server.pid) : Promise.resolve(null)),
183
+ ]);
184
+
185
+ // Calculate slot statistics
186
+ const activeSlots = slots.filter((s) => s.state === 'processing').length;
187
+ const idleSlots = slots.filter((s) => s.state === 'idle').length;
188
+ const totalSlots = props?.total_slots || slots.length;
189
+
190
+ // Calculate average speeds (only from processing slots)
191
+ const processingSlots = slots.filter((s) => s.state === 'processing' && s.timings);
192
+
193
+ const avgPromptSpeed =
194
+ processingSlots.length > 0
195
+ ? processingSlots.reduce(
196
+ (sum, s) => sum + (s.timings?.prompt_per_second || 0),
197
+ 0
198
+ ) / processingSlots.length
199
+ : undefined;
200
+
201
+ const avgGenerateSpeed =
202
+ processingSlots.length > 0
203
+ ? processingSlots.reduce(
204
+ (sum, s) => sum + (s.timings?.predicted_per_second || 0),
205
+ 0
206
+ ) / processingSlots.length
207
+ : undefined;
208
+
209
+ // Calculate total memory (CPU + Metal GPU memory if available)
210
+ let totalMemory = fetchedMemory ?? undefined;
211
+ if (totalMemory !== undefined && server.metalMemoryMB) {
212
+ // Add Metal memory (convert MB to bytes)
213
+ totalMemory += server.metalMemoryMB * 1024 * 1024;
214
+ }
215
+
216
+ return {
217
+ server,
218
+ healthy,
219
+ uptime,
220
+ modelLoaded: props !== null,
221
+ modelName: server.modelName,
222
+ contextSize: props?.default_generation_settings?.n_ctx || server.ctxSize,
223
+ totalSlots,
224
+ activeSlots,
225
+ idleSlots,
226
+ slots,
227
+ avgPromptSpeed,
228
+ avgGenerateSpeed,
229
+ processMemory: totalMemory,
230
+ processCpuUsage: fetchedCpu ?? undefined,
231
+ timestamp: now,
232
+ stale: false,
233
+ };
234
+ }
235
+
236
+ /**
237
+ * Collect complete monitoring data (server + system metrics)
238
+ */
239
+ async collectMonitorData(
240
+ server: ServerConfig,
241
+ updateInterval: number = 2000
242
+ ): Promise<MonitorData> {
243
+ // Collect server and system metrics in parallel
244
+ const [serverMetrics, systemMetrics] = await Promise.all([
245
+ this.collectServerMetrics(server),
246
+ systemCollector.collectSystemMetrics(),
247
+ ]);
248
+
249
+ return {
250
+ server: serverMetrics,
251
+ system: systemMetrics,
252
+ lastUpdated: new Date(),
253
+ updateInterval,
254
+ consecutiveFailures: 0,
255
+ };
256
+ }
257
+ }