@appkit/llamacpp-cli 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +87 -1
  3. package/dist/cli.js +14 -0
  4. package/dist/cli.js.map +1 -1
  5. package/dist/commands/monitor.d.ts +2 -0
  6. package/dist/commands/monitor.d.ts.map +1 -0
  7. package/dist/commands/monitor.js +76 -0
  8. package/dist/commands/monitor.js.map +1 -0
  9. package/dist/lib/metrics-aggregator.d.ts +39 -0
  10. package/dist/lib/metrics-aggregator.d.ts.map +1 -0
  11. package/dist/lib/metrics-aggregator.js +200 -0
  12. package/dist/lib/metrics-aggregator.js.map +1 -0
  13. package/dist/lib/system-collector.d.ts +75 -0
  14. package/dist/lib/system-collector.d.ts.map +1 -0
  15. package/dist/lib/system-collector.js +310 -0
  16. package/dist/lib/system-collector.js.map +1 -0
  17. package/dist/tui/MonitorApp.d.ts +4 -0
  18. package/dist/tui/MonitorApp.d.ts.map +1 -0
  19. package/dist/tui/MonitorApp.js +293 -0
  20. package/dist/tui/MonitorApp.js.map +1 -0
  21. package/dist/tui/MultiServerMonitorApp.d.ts +4 -0
  22. package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -0
  23. package/dist/tui/MultiServerMonitorApp.js +496 -0
  24. package/dist/tui/MultiServerMonitorApp.js.map +1 -0
  25. package/dist/tui/components/ErrorState.d.ts +8 -0
  26. package/dist/tui/components/ErrorState.d.ts.map +1 -0
  27. package/dist/tui/components/ErrorState.js +22 -0
  28. package/dist/tui/components/ErrorState.js.map +1 -0
  29. package/dist/tui/components/LoadingState.d.ts +8 -0
  30. package/dist/tui/components/LoadingState.d.ts.map +1 -0
  31. package/dist/tui/components/LoadingState.js +21 -0
  32. package/dist/tui/components/LoadingState.js.map +1 -0
  33. package/dist/types/monitor-types.d.ts +122 -0
  34. package/dist/types/monitor-types.d.ts.map +1 -0
  35. package/dist/types/monitor-types.js +3 -0
  36. package/dist/types/monitor-types.js.map +1 -0
  37. package/dist/utils/process-utils.d.ts +16 -1
  38. package/dist/utils/process-utils.d.ts.map +1 -1
  39. package/dist/utils/process-utils.js +144 -27
  40. package/dist/utils/process-utils.js.map +1 -1
  41. package/package.json +4 -2
  42. package/src/cli.ts +14 -0
  43. package/src/commands/monitor.ts +90 -0
  44. package/src/lib/metrics-aggregator.ts +244 -0
  45. package/src/lib/system-collector.ts +312 -0
  46. package/src/tui/MonitorApp.ts +361 -0
  47. package/src/tui/MultiServerMonitorApp.ts +547 -0
  48. package/src/types/monitor-types.ts +161 -0
  49. package/src/utils/process-utils.ts +160 -26
@@ -0,0 +1,361 @@
1
+ import blessed from 'blessed';
2
+ import { ServerConfig } from '../types/server-config.js';
3
+ import { MetricsAggregator } from '../lib/metrics-aggregator.js';
4
+ import { MonitorData } from '../types/monitor-types.js';
5
+
6
+ export async function createMonitorUI(
7
+ screen: blessed.Widgets.Screen,
8
+ server: ServerConfig
9
+ ): Promise<void> {
10
+ let updateInterval = 2000;
11
+ let intervalId: NodeJS.Timeout | null = null;
12
+ let consecutiveFailures = 0;
13
+ let lastGoodData: MonitorData | null = null;
14
+ const STALE_THRESHOLD = 5;
15
+ const metricsAggregator = new MetricsAggregator(server);
16
+
17
+ // Single scrollable content box
18
+ const contentBox = blessed.box({
19
+ top: 0,
20
+ left: 0,
21
+ width: '100%',
22
+ height: '100%',
23
+ tags: true,
24
+ scrollable: true,
25
+ alwaysScroll: true,
26
+ keys: true,
27
+ vi: true,
28
+ mouse: true,
29
+ scrollbar: {
30
+ ch: '█',
31
+ style: {
32
+ fg: 'blue',
33
+ },
34
+ },
35
+ });
36
+ screen.append(contentBox);
37
+
38
+ // Helper to create progress bar
39
+ function createProgressBar(percentage: number, width: number = 30): string {
40
+ const filled = Math.round((percentage / 100) * width);
41
+ const empty = width - filled;
42
+ return '[' + '█'.repeat(Math.max(0, filled)) + '░'.repeat(Math.max(0, empty)) + ']';
43
+ }
44
+
45
+ // Fetch and update display
46
+ async function fetchData() {
47
+ try {
48
+ const data = await metricsAggregator.collectMonitorData(server, updateInterval);
49
+
50
+ // Reset failure count on success
51
+ consecutiveFailures = 0;
52
+ lastGoodData = data;
53
+
54
+ const termWidth = (screen.width as number) || 80;
55
+ const divider = '─'.repeat(termWidth - 2); // Account for padding
56
+
57
+ let content = '';
58
+
59
+ // Header
60
+ content += '{bold}{blue-fg}═══ llama.cpp Server Monitor ═══{/blue-fg}{/bold}\n\n';
61
+
62
+ // Server Info
63
+ content += '{bold}Server Information{/bold}\n';
64
+ content += divider + '\n';
65
+
66
+ const statusIcon = data.server.healthy ? '{green-fg}●{/green-fg}' : '{red-fg}●{/red-fg}';
67
+ const statusText = data.server.healthy ? 'RUNNING' : 'UNHEALTHY';
68
+ content += `Status: ${statusIcon} ${statusText}\n`;
69
+
70
+ if (data.server.uptime) {
71
+ content += `Uptime: ${data.server.uptime}\n`;
72
+ }
73
+
74
+ content += `Model: ${server.modelName}\n`;
75
+ // Handle null host (legacy configs) by defaulting to 127.0.0.1
76
+ const displayHost = server.host || '127.0.0.1';
77
+ content += `Endpoint: http://${displayHost}:${server.port}\n`;
78
+ content += `Slots: ${data.server.activeSlots} active / ${data.server.totalSlots} total\n`;
79
+ content += '\n';
80
+
81
+ // Request Metrics
82
+ if (data.server.totalSlots > 0) {
83
+ content += '{bold}Request Metrics{/bold}\n';
84
+ content += divider + '\n';
85
+ content += `Active: ${data.server.activeSlots} / ${data.server.totalSlots}\n`;
86
+ content += `Idle: ${data.server.idleSlots} / ${data.server.totalSlots}\n`;
87
+
88
+ if (data.server.avgPromptSpeed !== undefined && data.server.avgPromptSpeed > 0) {
89
+ content += `Prompt: ${Math.round(data.server.avgPromptSpeed)} tokens/sec\n`;
90
+ }
91
+
92
+ if (data.server.avgGenerateSpeed !== undefined && data.server.avgGenerateSpeed > 0) {
93
+ content += `Generate: ${Math.round(data.server.avgGenerateSpeed)} tokens/sec\n`;
94
+ }
95
+
96
+ content += '\n';
97
+ }
98
+
99
+ // Active Slots Detail
100
+ if (data.server.slots.length > 0) {
101
+ const activeSlots = data.server.slots.filter(s => s.state === 'processing');
102
+
103
+ if (activeSlots.length > 0) {
104
+ content += '{bold}Active Slots{/bold}\n';
105
+ content += divider + '\n';
106
+
107
+ activeSlots.forEach((slot) => {
108
+ content += `Slot #${slot.id}: {yellow-fg}PROCESSING{/yellow-fg}`;
109
+
110
+ if (slot.timings?.predicted_per_second) {
111
+ content += ` - ${Math.round(slot.timings.predicted_per_second)} tok/s`;
112
+ }
113
+
114
+ if (slot.n_decoded !== undefined) {
115
+ content += ` - ${slot.n_decoded} tokens`;
116
+ }
117
+
118
+ content += '\n';
119
+ });
120
+
121
+ content += '\n';
122
+ }
123
+ }
124
+
125
+ // System Resources
126
+ content += '{bold}System Resources{/bold}\n';
127
+ content += divider + '\n';
128
+
129
+ if (data.system) {
130
+ if (data.system.gpuUsage !== undefined) {
131
+ const bar = createProgressBar(data.system.gpuUsage);
132
+ content += `GPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(data.system.gpuUsage)}%`;
133
+
134
+ if (data.system.temperature !== undefined) {
135
+ content += ` - ${Math.round(data.system.temperature)}°C`;
136
+ }
137
+
138
+ content += '\n';
139
+ }
140
+
141
+ if (data.system.cpuUsage !== undefined) {
142
+ const bar = createProgressBar(data.system.cpuUsage);
143
+ content += `CPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(data.system.cpuUsage)}%\n`;
144
+ }
145
+
146
+ if (data.system.aneUsage !== undefined && data.system.aneUsage > 1) {
147
+ const bar = createProgressBar(data.system.aneUsage);
148
+ content += `ANE: {cyan-fg}${bar}{/cyan-fg} ${Math.round(data.system.aneUsage)}%\n`;
149
+ }
150
+
151
+ if (data.system.memoryTotal > 0) {
152
+ const memoryUsedGB = data.system.memoryUsed / (1024 ** 3);
153
+ const memoryTotalGB = data.system.memoryTotal / (1024 ** 3);
154
+ const memoryPercentage = (data.system.memoryUsed / data.system.memoryTotal) * 100;
155
+ const bar = createProgressBar(memoryPercentage);
156
+ content += `Memory: {cyan-fg}${bar}{/cyan-fg} ${Math.round(memoryPercentage)}% `;
157
+ content += `(${memoryUsedGB.toFixed(1)} / ${memoryTotalGB.toFixed(1)} GB)\n`;
158
+ }
159
+
160
+ if (data.system.warnings && data.system.warnings.length > 0) {
161
+ content += `\n{yellow-fg}⚠ ${data.system.warnings.join(', ')}{/yellow-fg}\n`;
162
+ }
163
+ }
164
+
165
+ content += '\n';
166
+
167
+ // Footer
168
+ content += divider + '\n';
169
+ content += `{gray-fg}Updated: ${data.lastUpdated.toLocaleTimeString()} | `;
170
+ content += `Interval: ${updateInterval}ms | `;
171
+ content += `[R]efresh [+/-]Speed [Q]uit{/gray-fg}`;
172
+
173
+ contentBox.setContent(content);
174
+ screen.render();
175
+
176
+ } catch (err) {
177
+ consecutiveFailures++;
178
+ const isStale = consecutiveFailures >= STALE_THRESHOLD;
179
+
180
+ // If we have last good data and we're stale, show it with indicator
181
+ if (lastGoodData && isStale) {
182
+ const termWidth = (screen.width as number) || 80;
183
+ const divider = '─'.repeat(termWidth - 2);
184
+
185
+ let content = '';
186
+
187
+ // Header with stale warning
188
+ content += '{bold}{blue-fg}═══ llama.cpp Server Monitor ═══{/blue-fg}{/bold}\n';
189
+ content += '{bold}{yellow-fg}⚠ CONNECTION LOST - SHOWING STALE DATA{/yellow-fg}{/bold}\n\n';
190
+
191
+ // Server Info
192
+ content += '{bold}Server Information{/bold}\n';
193
+ content += divider + '\n';
194
+
195
+ const statusIcon = '{yellow-fg}●{/yellow-fg}';
196
+ const statusText = 'STALE';
197
+ content += `Status: ${statusIcon} ${statusText}\n`;
198
+
199
+ if (lastGoodData.server.uptime) {
200
+ content += `Uptime: ${lastGoodData.server.uptime}\n`;
201
+ }
202
+
203
+ content += `Model: ${server.modelName}\n`;
204
+ // Handle null host (legacy configs) by defaulting to 127.0.0.1
205
+ const displayHost = server.host || '127.0.0.1';
206
+ content += `Endpoint: http://${displayHost}:${server.port}\n`;
207
+ content += `Slots: ${lastGoodData.server.activeSlots} active / ${lastGoodData.server.totalSlots} total\n\n`;
208
+
209
+ // Request Metrics
210
+ if (lastGoodData.server.totalSlots > 0) {
211
+ content += '{bold}Request Metrics{/bold} {yellow-fg}(stale){/yellow-fg}\n';
212
+ content += divider + '\n';
213
+ content += `Active: ${lastGoodData.server.activeSlots} / ${lastGoodData.server.totalSlots}\n`;
214
+ content += `Idle: ${lastGoodData.server.idleSlots} / ${lastGoodData.server.totalSlots}\n`;
215
+
216
+ if (lastGoodData.server.avgPromptSpeed !== undefined && lastGoodData.server.avgPromptSpeed > 0) {
217
+ content += `Prompt: ${Math.round(lastGoodData.server.avgPromptSpeed)} tokens/sec\n`;
218
+ }
219
+
220
+ if (lastGoodData.server.avgGenerateSpeed !== undefined && lastGoodData.server.avgGenerateSpeed > 0) {
221
+ content += `Generate: ${Math.round(lastGoodData.server.avgGenerateSpeed)} tokens/sec\n`;
222
+ }
223
+
224
+ content += '\n';
225
+ }
226
+
227
+ // Active Slots Detail
228
+ if (lastGoodData.server.slots.length > 0) {
229
+ const activeSlots = lastGoodData.server.slots.filter(s => s.state === 'processing');
230
+
231
+ if (activeSlots.length > 0) {
232
+ content += '{bold}Active Slots{/bold} {yellow-fg}(stale){/yellow-fg}\n';
233
+ content += divider + '\n';
234
+
235
+ activeSlots.forEach((slot) => {
236
+ content += `Slot #${slot.id}: {yellow-fg}PROCESSING{/yellow-fg}`;
237
+
238
+ if (slot.timings?.predicted_per_second) {
239
+ content += ` - ${Math.round(slot.timings.predicted_per_second)} tok/s`;
240
+ }
241
+
242
+ if (slot.n_decoded !== undefined) {
243
+ content += ` - ${slot.n_decoded} tokens`;
244
+ }
245
+
246
+ content += '\n';
247
+ });
248
+
249
+ content += '\n';
250
+ }
251
+ }
252
+
253
+ // System Resources
254
+ content += '{bold}System Resources{/bold} {yellow-fg}(stale){/yellow-fg}\n';
255
+ content += divider + '\n';
256
+
257
+ if (lastGoodData.system) {
258
+ if (lastGoodData.system.gpuUsage !== undefined) {
259
+ const bar = createProgressBar(lastGoodData.system.gpuUsage);
260
+ content += `GPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(lastGoodData.system.gpuUsage)}%`;
261
+
262
+ if (lastGoodData.system.temperature !== undefined) {
263
+ content += ` - ${Math.round(lastGoodData.system.temperature)}°C`;
264
+ }
265
+
266
+ content += '\n';
267
+ }
268
+
269
+ if (lastGoodData.system.cpuUsage !== undefined) {
270
+ const bar = createProgressBar(lastGoodData.system.cpuUsage);
271
+ content += `CPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(lastGoodData.system.cpuUsage)}%\n`;
272
+ }
273
+
274
+ if (lastGoodData.system.aneUsage !== undefined && lastGoodData.system.aneUsage > 1) {
275
+ const bar = createProgressBar(lastGoodData.system.aneUsage);
276
+ content += `ANE: {cyan-fg}${bar}{/cyan-fg} ${Math.round(lastGoodData.system.aneUsage)}%\n`;
277
+ }
278
+
279
+ if (lastGoodData.system.memoryTotal > 0) {
280
+ const memoryUsedGB = lastGoodData.system.memoryUsed / (1024 ** 3);
281
+ const memoryTotalGB = lastGoodData.system.memoryTotal / (1024 ** 3);
282
+ const memoryPercentage = (lastGoodData.system.memoryUsed / lastGoodData.system.memoryTotal) * 100;
283
+ const bar = createProgressBar(memoryPercentage);
284
+ content += `Memory: {cyan-fg}${bar}{/cyan-fg} ${Math.round(memoryPercentage)}% `;
285
+ content += `(${memoryUsedGB.toFixed(1)} / ${memoryTotalGB.toFixed(1)} GB)\n`;
286
+ }
287
+
288
+ if (lastGoodData.system.warnings && lastGoodData.system.warnings.length > 0) {
289
+ content += `\n{yellow-fg}⚠ ${lastGoodData.system.warnings.join(', ')}{/yellow-fg}\n`;
290
+ }
291
+ }
292
+
293
+ content += '\n';
294
+
295
+ // Footer
296
+ content += divider + '\n';
297
+ content += `{yellow-fg}Last good data: ${lastGoodData.lastUpdated.toLocaleTimeString()}{/yellow-fg}\n`;
298
+ content += `{yellow-fg}Connection failures: ${consecutiveFailures}{/yellow-fg}\n`;
299
+ content += `{gray-fg}Interval: ${updateInterval}ms | [R]efresh [+/-]Speed [Q]uit{/gray-fg}`;
300
+
301
+ contentBox.setContent(content);
302
+ screen.render();
303
+ } else if (!lastGoodData || consecutiveFailures < STALE_THRESHOLD) {
304
+ // Show connection error (either no last data or not stale yet)
305
+ const errorMsg = err instanceof Error ? err.message : 'Unknown error';
306
+ const retryMsg = consecutiveFailures < STALE_THRESHOLD
307
+ ? `Retrying... (${consecutiveFailures}/${STALE_THRESHOLD})`
308
+ : 'Connection lost';
309
+
310
+ contentBox.setContent(
311
+ '{bold}{red-fg}Connection Error{/red-fg}{/bold}\n\n' +
312
+ `{red-fg}${errorMsg}{/red-fg}\n\n` +
313
+ `{yellow-fg}${retryMsg}{/yellow-fg}\n\n` +
314
+ '{gray-fg}Press [R] to retry or [Q] to quit{/gray-fg}'
315
+ );
316
+ screen.render();
317
+ }
318
+ }
319
+ }
320
+
321
+ // Polling
322
+ function startPolling() {
323
+ if (intervalId) clearInterval(intervalId);
324
+ fetchData();
325
+ intervalId = setInterval(fetchData, updateInterval);
326
+ }
327
+
328
+ // Keyboard shortcuts
329
+ screen.key(['r', 'R'], () => {
330
+ fetchData();
331
+ });
332
+
333
+ screen.key(['+', '='], () => {
334
+ updateInterval = Math.max(500, updateInterval - 500);
335
+ startPolling();
336
+ });
337
+
338
+ screen.key(['-', '_'], () => {
339
+ updateInterval = Math.min(10000, updateInterval + 500);
340
+ startPolling();
341
+ });
342
+
343
+ screen.key(['q', 'Q', 'C-c'], () => {
344
+ if (intervalId) clearInterval(intervalId);
345
+ screen.destroy();
346
+ process.exit(0);
347
+ });
348
+
349
+ // Initial display
350
+ contentBox.setContent('{cyan-fg}⏳ Connecting to server...{/cyan-fg}');
351
+ screen.render();
352
+
353
+ startPolling();
354
+
355
+ // Cleanup
356
+ screen.on('destroy', () => {
357
+ if (intervalId) clearInterval(intervalId);
358
+ // Note: macmon child processes will automatically die when parent exits
359
+ // since they're spawned with detached: false
360
+ });
361
+ }