@appkit/llamacpp-cli 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/CHANGELOG.md +20 -0
  2. package/MONITORING-ACCURACY-FIX.md +199 -0
  3. package/PER-PROCESS-METRICS.md +190 -0
  4. package/README.md +124 -9
  5. package/dist/cli.js +32 -7
  6. package/dist/cli.js.map +1 -1
  7. package/dist/commands/config.d.ts.map +1 -1
  8. package/dist/commands/config.js +15 -1
  9. package/dist/commands/config.js.map +1 -1
  10. package/dist/commands/create.d.ts.map +1 -1
  11. package/dist/commands/create.js +12 -4
  12. package/dist/commands/create.js.map +1 -1
  13. package/dist/commands/delete.js +12 -10
  14. package/dist/commands/delete.js.map +1 -1
  15. package/dist/commands/logs-all.d.ts +9 -0
  16. package/dist/commands/logs-all.d.ts.map +1 -0
  17. package/dist/commands/logs-all.js +209 -0
  18. package/dist/commands/logs-all.js.map +1 -0
  19. package/dist/commands/logs.d.ts +4 -0
  20. package/dist/commands/logs.d.ts.map +1 -1
  21. package/dist/commands/logs.js +108 -2
  22. package/dist/commands/logs.js.map +1 -1
  23. package/dist/commands/monitor.d.ts.map +1 -1
  24. package/dist/commands/monitor.js +51 -1
  25. package/dist/commands/monitor.js.map +1 -1
  26. package/dist/commands/ps.d.ts +3 -1
  27. package/dist/commands/ps.d.ts.map +1 -1
  28. package/dist/commands/ps.js +75 -5
  29. package/dist/commands/ps.js.map +1 -1
  30. package/dist/commands/rm.d.ts.map +1 -1
  31. package/dist/commands/rm.js +5 -12
  32. package/dist/commands/rm.js.map +1 -1
  33. package/dist/commands/server-show.d.ts.map +1 -1
  34. package/dist/commands/server-show.js +30 -3
  35. package/dist/commands/server-show.js.map +1 -1
  36. package/dist/commands/start.d.ts.map +1 -1
  37. package/dist/commands/start.js +34 -7
  38. package/dist/commands/start.js.map +1 -1
  39. package/dist/commands/stop.js +3 -3
  40. package/dist/commands/stop.js.map +1 -1
  41. package/dist/lib/history-manager.d.ts +46 -0
  42. package/dist/lib/history-manager.d.ts.map +1 -0
  43. package/dist/lib/history-manager.js +157 -0
  44. package/dist/lib/history-manager.js.map +1 -0
  45. package/dist/lib/metrics-aggregator.d.ts +2 -1
  46. package/dist/lib/metrics-aggregator.d.ts.map +1 -1
  47. package/dist/lib/metrics-aggregator.js +15 -4
  48. package/dist/lib/metrics-aggregator.js.map +1 -1
  49. package/dist/lib/system-collector.d.ts +9 -4
  50. package/dist/lib/system-collector.d.ts.map +1 -1
  51. package/dist/lib/system-collector.js +29 -28
  52. package/dist/lib/system-collector.js.map +1 -1
  53. package/dist/tui/HistoricalMonitorApp.d.ts +5 -0
  54. package/dist/tui/HistoricalMonitorApp.d.ts.map +1 -0
  55. package/dist/tui/HistoricalMonitorApp.js +490 -0
  56. package/dist/tui/HistoricalMonitorApp.js.map +1 -0
  57. package/dist/tui/MonitorApp.d.ts.map +1 -1
  58. package/dist/tui/MonitorApp.js +84 -62
  59. package/dist/tui/MonitorApp.js.map +1 -1
  60. package/dist/tui/MultiServerMonitorApp.d.ts +1 -1
  61. package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -1
  62. package/dist/tui/MultiServerMonitorApp.js +293 -77
  63. package/dist/tui/MultiServerMonitorApp.js.map +1 -1
  64. package/dist/types/history-types.d.ts +30 -0
  65. package/dist/types/history-types.d.ts.map +1 -0
  66. package/dist/types/history-types.js +11 -0
  67. package/dist/types/history-types.js.map +1 -0
  68. package/dist/types/monitor-types.d.ts +1 -0
  69. package/dist/types/monitor-types.d.ts.map +1 -1
  70. package/dist/types/server-config.d.ts +1 -0
  71. package/dist/types/server-config.d.ts.map +1 -1
  72. package/dist/types/server-config.js.map +1 -1
  73. package/dist/utils/downsample-utils.d.ts +35 -0
  74. package/dist/utils/downsample-utils.d.ts.map +1 -0
  75. package/dist/utils/downsample-utils.js +107 -0
  76. package/dist/utils/downsample-utils.js.map +1 -0
  77. package/dist/utils/file-utils.d.ts +6 -0
  78. package/dist/utils/file-utils.d.ts.map +1 -1
  79. package/dist/utils/file-utils.js +38 -0
  80. package/dist/utils/file-utils.js.map +1 -1
  81. package/dist/utils/log-utils.d.ts +43 -0
  82. package/dist/utils/log-utils.d.ts.map +1 -0
  83. package/dist/utils/log-utils.js +190 -0
  84. package/dist/utils/log-utils.js.map +1 -0
  85. package/dist/utils/process-utils.d.ts +19 -1
  86. package/dist/utils/process-utils.d.ts.map +1 -1
  87. package/dist/utils/process-utils.js +79 -1
  88. package/dist/utils/process-utils.js.map +1 -1
  89. package/docs/images/.gitkeep +1 -0
  90. package/package.json +3 -1
  91. package/src/cli.ts +32 -7
  92. package/src/commands/config.ts +15 -1
  93. package/src/commands/create.ts +14 -5
  94. package/src/commands/delete.ts +10 -10
  95. package/src/commands/logs-all.ts +251 -0
  96. package/src/commands/logs.ts +138 -2
  97. package/src/commands/monitor.ts +21 -1
  98. package/src/commands/ps.ts +88 -5
  99. package/src/commands/rm.ts +5 -12
  100. package/src/commands/server-show.ts +35 -3
  101. package/src/commands/start.ts +35 -7
  102. package/src/commands/stop.ts +3 -3
  103. package/src/lib/history-manager.ts +172 -0
  104. package/src/lib/metrics-aggregator.ts +18 -5
  105. package/src/lib/system-collector.ts +31 -28
  106. package/src/tui/HistoricalMonitorApp.ts +548 -0
  107. package/src/tui/MonitorApp.ts +89 -64
  108. package/src/tui/MultiServerMonitorApp.ts +348 -103
  109. package/src/types/history-types.ts +39 -0
  110. package/src/types/monitor-types.ts +1 -0
  111. package/src/types/server-config.ts +1 -0
  112. package/src/utils/downsample-utils.ts +128 -0
  113. package/src/utils/file-utils.ts +40 -0
  114. package/src/utils/log-utils.ts +178 -0
  115. package/src/utils/process-utils.ts +85 -1
  116. package/test-load.sh +100 -0
  117. package/dist/tui/components/ErrorState.d.ts +0 -8
  118. package/dist/tui/components/ErrorState.d.ts.map +0 -1
  119. package/dist/tui/components/ErrorState.js +0 -22
  120. package/dist/tui/components/ErrorState.js.map +0 -1
  121. package/dist/tui/components/LoadingState.d.ts +0 -8
  122. package/dist/tui/components/LoadingState.d.ts.map +0 -1
  123. package/dist/tui/components/LoadingState.js +0 -21
  124. package/dist/tui/components/LoadingState.js.map +0 -1
@@ -0,0 +1,172 @@
1
+ import { mkdir, readFile, writeFile, access, rename } from 'fs/promises';
2
+ import { join } from 'path';
3
+ import { homedir } from 'os';
4
+ import { ServerMetrics, SystemMetrics } from '../types/monitor-types.js';
5
+ import { HistoryData, HistorySnapshot, TIME_WINDOW_HOURS, TimeWindow } from '../types/history-types.js';
6
+
7
+ export class HistoryManager {
8
+ private serverId: string;
9
+ private historyDir: string;
10
+ private historyPath: string;
11
+ private readonly MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24 hours
12
+
13
+ constructor(serverId: string) {
14
+ this.serverId = serverId;
15
+ this.historyDir = join(homedir(), '.llamacpp', 'history');
16
+ this.historyPath = join(this.historyDir, `${serverId}.json`);
17
+ }
18
+
19
+ /**
20
+ * Append a new snapshot to history (with auto-pruning)
21
+ */
22
+ async appendSnapshot(serverMetrics: ServerMetrics, systemMetrics?: SystemMetrics): Promise<void> {
23
+ try {
24
+ // Ensure history directory exists
25
+ await mkdir(this.historyDir, { recursive: true });
26
+
27
+ // Load existing history
28
+ const historyData = await this.loadHistoryData();
29
+
30
+ // Create new snapshot
31
+ const snapshot: HistorySnapshot = {
32
+ timestamp: Date.now(),
33
+ server: {
34
+ healthy: serverMetrics.healthy,
35
+ uptime: serverMetrics.uptime,
36
+ activeSlots: serverMetrics.activeSlots,
37
+ idleSlots: serverMetrics.idleSlots,
38
+ totalSlots: serverMetrics.totalSlots,
39
+ avgPromptSpeed: serverMetrics.avgPromptSpeed,
40
+ avgGenerateSpeed: serverMetrics.avgGenerateSpeed,
41
+ processMemory: serverMetrics.processMemory,
42
+ processCpuUsage: serverMetrics.processCpuUsage,
43
+ },
44
+ system: systemMetrics ? {
45
+ gpuUsage: systemMetrics.gpuUsage,
46
+ cpuUsage: systemMetrics.cpuUsage,
47
+ aneUsage: systemMetrics.aneUsage,
48
+ temperature: systemMetrics.temperature,
49
+ memoryUsed: systemMetrics.memoryUsed,
50
+ memoryTotal: systemMetrics.memoryTotal,
51
+ } : undefined,
52
+ };
53
+
54
+ // Append new snapshot
55
+ historyData.snapshots.push(snapshot);
56
+
57
+ // Prune old snapshots (keep only last 24h)
58
+ historyData.snapshots = this.pruneOldSnapshots(historyData.snapshots, this.MAX_AGE_MS);
59
+
60
+ // Atomic write: write to temp file in same directory, then rename
61
+ // This prevents read collisions during concurrent access
62
+ // IMPORTANT: temp file MUST be in same directory as destination for rename to work across filesystems
63
+ const tempPath = join(this.historyDir, `.${this.serverId}-${Date.now()}.tmp`);
64
+ await writeFile(tempPath, JSON.stringify(historyData, null, 2), 'utf-8');
65
+ await rename(tempPath, this.historyPath);
66
+ } catch (error) {
67
+ // Silent failure - don't interrupt monitoring
68
+ // Don't throw - just return silently to avoid polluting console
69
+ return;
70
+ }
71
+ }
72
+
73
+ /**
74
+ * Load all snapshots within specified time window
75
+ */
76
+ async loadHistory(windowHours: number): Promise<HistorySnapshot[]> {
77
+ // Retry logic for file I/O collisions during concurrent read/write
78
+ const maxRetries = 3;
79
+ let lastError: Error | null = null;
80
+
81
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
82
+ try {
83
+ const historyData = await this.loadHistoryData();
84
+ return this.filterByTimeWindow(historyData.snapshots, windowHours);
85
+ } catch (error) {
86
+ lastError = error as Error;
87
+ // Wait briefly before retry (exponential backoff)
88
+ if (attempt < maxRetries - 1) {
89
+ await new Promise(resolve => setTimeout(resolve, 50 * Math.pow(2, attempt)));
90
+ }
91
+ }
92
+ }
93
+
94
+ // All retries failed - throw error so it can be handled upstream
95
+ throw new Error(`Failed to load history after ${maxRetries} attempts: ${lastError?.message || 'Unknown error'}`);
96
+ }
97
+
98
+ /**
99
+ * Load history for specific time window type
100
+ */
101
+ async loadHistoryByWindow(window: TimeWindow): Promise<HistorySnapshot[]> {
102
+ return this.loadHistory(TIME_WINDOW_HOURS[window]);
103
+ }
104
+
105
+ /**
106
+ * Get file path for server history
107
+ */
108
+ getHistoryPath(): string {
109
+ return this.historyPath;
110
+ }
111
+
112
+ /**
113
+ * Check if history file exists
114
+ */
115
+ async hasHistory(): Promise<boolean> {
116
+ try {
117
+ await access(this.historyPath);
118
+ return true;
119
+ } catch {
120
+ return false;
121
+ }
122
+ }
123
+
124
+ /**
125
+ * Clear all history for server
126
+ */
127
+ async clearHistory(): Promise<void> {
128
+ const emptyHistory: HistoryData = {
129
+ serverId: this.serverId,
130
+ snapshots: [],
131
+ };
132
+
133
+ await mkdir(this.historyDir, { recursive: true });
134
+
135
+ // Atomic write - temp file in same directory as destination
136
+ const tempPath = join(this.historyDir, `.${this.serverId}-${Date.now()}.tmp`);
137
+ await writeFile(tempPath, JSON.stringify(emptyHistory, null, 2), 'utf-8');
138
+ await rename(tempPath, this.historyPath);
139
+ }
140
+
141
+ /**
142
+ * Load full history data from file
143
+ */
144
+ private async loadHistoryData(): Promise<HistoryData> {
145
+ try {
146
+ const content = await readFile(this.historyPath, 'utf-8');
147
+ return JSON.parse(content) as HistoryData;
148
+ } catch (error) {
149
+ // File doesn't exist or is corrupted, return empty history
150
+ return {
151
+ serverId: this.serverId,
152
+ snapshots: [],
153
+ };
154
+ }
155
+ }
156
+
157
+ /**
158
+ * Prune snapshots older than maxAge
159
+ */
160
+ private pruneOldSnapshots(snapshots: HistorySnapshot[], maxAgeMs: number): HistorySnapshot[] {
161
+ const cutoff = Date.now() - maxAgeMs;
162
+ return snapshots.filter(s => s.timestamp >= cutoff);
163
+ }
164
+
165
+ /**
166
+ * Filter snapshots by time window
167
+ */
168
+ private filterByTimeWindow(snapshots: HistorySnapshot[], windowHours: number): HistorySnapshot[] {
169
+ const cutoff = Date.now() - (windowHours * 60 * 60 * 1000);
170
+ return snapshots.filter(s => s.timestamp >= cutoff);
171
+ }
172
+ }
@@ -2,7 +2,7 @@ import { ServerConfig } from '../types/server-config.js';
2
2
  import { ServerMetrics, SlotInfo, MonitorData } from '../types/monitor-types.js';
3
3
  import { statusChecker } from './status-checker.js';
4
4
  import { systemCollector } from './system-collector.js';
5
- import { getProcessMemory } from '../utils/process-utils.js';
5
+ import { getProcessMemory, getProcessCpu } from '../utils/process-utils.js';
6
6
 
7
7
  /**
8
8
  * Aggregates metrics from llama.cpp server API endpoints
@@ -128,10 +128,12 @@ export class MetricsAggregator {
128
128
  * Aggregate all server metrics
129
129
  * @param server - Server configuration
130
130
  * @param processMemory - Optional pre-fetched process memory (for batch collection)
131
+ * @param processCpuUsage - Optional pre-fetched process CPU usage (for batch collection)
131
132
  */
132
133
  async collectServerMetrics(
133
134
  server: ServerConfig,
134
- processMemory?: number | null
135
+ processMemory?: number | null,
136
+ processCpuUsage?: number | null
135
137
  ): Promise<ServerMetrics> {
136
138
  const now = Date.now();
137
139
 
@@ -167,14 +169,17 @@ export class MetricsAggregator {
167
169
  }
168
170
 
169
171
  // Fetch detailed metrics in parallel
170
- // If processMemory was pre-fetched (batch mode), use it; otherwise fetch individually
171
- const [healthy, props, slots, fetchedMemory] = await Promise.all([
172
+ // If processMemory/CPU were pre-fetched (batch mode), use them; otherwise fetch individually
173
+ const [healthy, props, slots, fetchedMemory, fetchedCpu] = await Promise.all([
172
174
  this.getHealth(),
173
175
  this.getProps(),
174
176
  this.getSlots(),
175
177
  processMemory !== undefined
176
178
  ? Promise.resolve(processMemory)
177
179
  : (server.pid ? getProcessMemory(server.pid) : Promise.resolve(null)),
180
+ processCpuUsage !== undefined
181
+ ? Promise.resolve(processCpuUsage)
182
+ : (server.pid ? getProcessCpu(server.pid) : Promise.resolve(null)),
178
183
  ]);
179
184
 
180
185
  // Calculate slot statistics
@@ -201,6 +206,13 @@ export class MetricsAggregator {
201
206
  ) / processingSlots.length
202
207
  : undefined;
203
208
 
209
+ // Calculate total memory (CPU + Metal GPU memory if available)
210
+ let totalMemory = fetchedMemory ?? undefined;
211
+ if (totalMemory !== undefined && server.metalMemoryMB) {
212
+ // Add Metal memory (convert MB to bytes)
213
+ totalMemory += server.metalMemoryMB * 1024 * 1024;
214
+ }
215
+
204
216
  return {
205
217
  server,
206
218
  healthy,
@@ -214,7 +226,8 @@ export class MetricsAggregator {
214
226
  slots,
215
227
  avgPromptSpeed,
216
228
  avgGenerateSpeed,
217
- processMemory: fetchedMemory ?? undefined,
229
+ processMemory: totalMemory,
230
+ processCpuUsage: fetchedCpu ?? undefined,
218
231
  timestamp: now,
219
232
  stale: false,
220
233
  };
@@ -178,16 +178,12 @@ export class SystemCollector {
178
178
  */
179
179
  private parseVmStatOutput(output: string): {
180
180
  memoryUsed: number;
181
- memoryTotal: number;
182
181
  } {
183
182
  const lines = output.split('\n');
184
183
  const pageSize = 16384; // 16KB on Apple Silicon
185
184
  let pagesActive = 0;
186
185
  let pagesWired = 0;
187
186
  let pagesCompressed = 0;
188
- let pagesFree = 0;
189
- let pagesInactive = 0;
190
- let pagesSpeculative = 0;
191
187
 
192
188
  for (const line of lines) {
193
189
  const match = line.match(/Pages (.*?):\s+(\d+)\./);
@@ -198,49 +194,56 @@ export class SystemCollector {
198
194
  if (name === 'active') pagesActive = value;
199
195
  else if (name === 'wired down') pagesWired = value;
200
196
  else if (name === 'compressed') pagesCompressed = value;
201
- else if (name === 'free') pagesFree = value;
202
- else if (name === 'inactive') pagesInactive = value;
203
- else if (name === 'speculative') pagesSpeculative = value;
204
197
  }
205
198
  }
206
199
 
207
200
  // Calculate used memory (active + wired + compressed)
201
+ // This matches what Activity Monitor and macmon report as "used"
208
202
  const usedPages = pagesActive + pagesWired + pagesCompressed;
209
203
  const memoryUsed = usedPages * pageSize;
210
204
 
211
- // Calculate total memory (used + free + inactive + speculative)
212
- const totalPages =
213
- pagesActive +
214
- pagesWired +
215
- pagesCompressed +
216
- pagesFree +
217
- pagesInactive +
218
- pagesSpeculative;
219
- const memoryTotal = totalPages * pageSize;
220
-
221
- return { memoryUsed, memoryTotal };
205
+ return { memoryUsed };
206
+ }
207
+
208
+ /**
209
+ * Get total system memory from sysctl
210
+ * Returns installed RAM size in bytes
211
+ */
212
+ private async getTotalMemory(): Promise<number> {
213
+ try {
214
+ const output = await execCommand('sysctl -n hw.memsize 2>/dev/null');
215
+ return parseInt(output.trim(), 10) || 0;
216
+ } catch {
217
+ return 0;
218
+ }
222
219
  }
223
220
 
224
221
  /**
225
- * Collect vm_stat memory metrics
222
+ * Collect vm_stat memory metrics + total system memory from sysctl
226
223
  */
227
- private async getVmStatMetrics(): Promise<{
224
+ private async getMemoryMetrics(): Promise<{
228
225
  memoryUsed: number;
229
226
  memoryTotal: number;
230
227
  }> {
231
228
  try {
232
- const output = await execCommand('vm_stat 2>/dev/null');
233
- return this.parseVmStatOutput(output);
229
+ // Get used memory from vm_stat
230
+ const vmStatOutput = await execCommand('vm_stat 2>/dev/null');
231
+ const { memoryUsed } = this.parseVmStatOutput(vmStatOutput);
232
+
233
+ // Get total installed RAM from sysctl (this is accurate)
234
+ const memoryTotal = await this.getTotalMemory();
235
+
236
+ return { memoryUsed, memoryTotal };
234
237
  } catch {
235
- // Fallback to zeros if vm_stat fails
238
+ // Fallback to zeros if commands fail
236
239
  return { memoryUsed: 0, memoryTotal: 0 };
237
240
  }
238
241
  }
239
242
 
240
243
  /**
241
244
  * Collect all system metrics
242
- * Attempts macmon first (GPU/CPU/ANE + memory), falls back to vm_stat (memory only)
243
- * Caches results for 1.5s to prevent spawning multiple macmon processes
245
+ * Attempts macmon first (GPU/CPU/ANE), always gets memory from vm_stat + sysctl
246
+ * Caches results for 4s to prevent spawning multiple macmon processes
244
247
  */
245
248
  async collectSystemMetrics(): Promise<SystemMetrics> {
246
249
  const now = Date.now();
@@ -276,11 +279,11 @@ export class SystemCollector {
276
279
  const warnings: string[] = [];
277
280
  const now = Date.now();
278
281
 
279
- // Try macmon first
282
+ // Try macmon first for GPU/CPU/ANE
280
283
  const macmonMetrics = await this.getMacmonMetrics();
281
284
 
282
- // Always get memory from vm_stat (more reliable than macmon)
283
- const memoryMetrics = await this.getVmStatMetrics();
285
+ // Always get memory from vm_stat + sysctl (accurate total from sysctl)
286
+ const memoryMetrics = await this.getMemoryMetrics();
284
287
 
285
288
  // Determine source and add warnings
286
289
  let source: 'macmon' | 'vm_stat' | 'none';