@appkit/llamacpp-cli 1.4.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +87 -1
- package/dist/cli.js +14 -0
- package/dist/cli.js.map +1 -1
- package/dist/commands/monitor.d.ts +2 -0
- package/dist/commands/monitor.d.ts.map +1 -0
- package/dist/commands/monitor.js +76 -0
- package/dist/commands/monitor.js.map +1 -0
- package/dist/lib/metrics-aggregator.d.ts +39 -0
- package/dist/lib/metrics-aggregator.d.ts.map +1 -0
- package/dist/lib/metrics-aggregator.js +200 -0
- package/dist/lib/metrics-aggregator.js.map +1 -0
- package/dist/lib/system-collector.d.ts +75 -0
- package/dist/lib/system-collector.d.ts.map +1 -0
- package/dist/lib/system-collector.js +310 -0
- package/dist/lib/system-collector.js.map +1 -0
- package/dist/tui/MonitorApp.d.ts +4 -0
- package/dist/tui/MonitorApp.d.ts.map +1 -0
- package/dist/tui/MonitorApp.js +293 -0
- package/dist/tui/MonitorApp.js.map +1 -0
- package/dist/tui/MultiServerMonitorApp.d.ts +4 -0
- package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -0
- package/dist/tui/MultiServerMonitorApp.js +496 -0
- package/dist/tui/MultiServerMonitorApp.js.map +1 -0
- package/dist/tui/components/ErrorState.d.ts +8 -0
- package/dist/tui/components/ErrorState.d.ts.map +1 -0
- package/dist/tui/components/ErrorState.js +22 -0
- package/dist/tui/components/ErrorState.js.map +1 -0
- package/dist/tui/components/LoadingState.d.ts +8 -0
- package/dist/tui/components/LoadingState.d.ts.map +1 -0
- package/dist/tui/components/LoadingState.js +21 -0
- package/dist/tui/components/LoadingState.js.map +1 -0
- package/dist/types/monitor-types.d.ts +122 -0
- package/dist/types/monitor-types.d.ts.map +1 -0
- package/dist/types/monitor-types.js +3 -0
- package/dist/types/monitor-types.js.map +1 -0
- package/dist/utils/process-utils.d.ts +16 -1
- package/dist/utils/process-utils.d.ts.map +1 -1
- package/dist/utils/process-utils.js +144 -27
- package/dist/utils/process-utils.js.map +1 -1
- package/package.json +3 -1
- package/src/cli.ts +14 -0
- package/src/commands/monitor.ts +90 -0
- package/src/lib/metrics-aggregator.ts +244 -0
- package/src/lib/system-collector.ts +312 -0
- package/src/tui/MonitorApp.ts +361 -0
- package/src/tui/MultiServerMonitorApp.ts +547 -0
- package/src/types/monitor-types.ts +161 -0
- package/src/utils/process-utils.ts +160 -26
|
@@ -0,0 +1,547 @@
|
|
|
1
|
+
import blessed from 'blessed';
|
|
2
|
+
import { ServerConfig } from '../types/server-config.js';
|
|
3
|
+
import { MetricsAggregator } from '../lib/metrics-aggregator.js';
|
|
4
|
+
import { SystemCollector } from '../lib/system-collector.js';
|
|
5
|
+
import { MonitorData, SystemMetrics } from '../types/monitor-types.js';
|
|
6
|
+
|
|
7
|
+
type ViewMode = 'list' | 'detail';
|
|
8
|
+
|
|
9
|
+
interface ServerMonitorData {
|
|
10
|
+
server: ServerConfig;
|
|
11
|
+
data: MonitorData | null;
|
|
12
|
+
error: string | null;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export async function createMultiServerMonitorUI(
|
|
16
|
+
screen: blessed.Widgets.Screen,
|
|
17
|
+
servers: ServerConfig[]
|
|
18
|
+
): Promise<void> {
|
|
19
|
+
let updateInterval = 2000;
|
|
20
|
+
let intervalId: NodeJS.Timeout | null = null;
|
|
21
|
+
let viewMode: ViewMode = 'list';
|
|
22
|
+
let selectedServerIndex = 0;
|
|
23
|
+
let isLoading = false;
|
|
24
|
+
let lastSystemMetrics: SystemMetrics | null = null;
|
|
25
|
+
|
|
26
|
+
// Spinner animation
|
|
27
|
+
const spinnerFrames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
|
|
28
|
+
let spinnerFrameIndex = 0;
|
|
29
|
+
let spinnerIntervalId: NodeJS.Timeout | null = null;
|
|
30
|
+
|
|
31
|
+
const systemCollector = new SystemCollector();
|
|
32
|
+
const aggregators = new Map<string, MetricsAggregator>();
|
|
33
|
+
const serverDataMap = new Map<string, ServerMonitorData>();
|
|
34
|
+
|
|
35
|
+
// Initialize aggregators for each server
|
|
36
|
+
for (const server of servers) {
|
|
37
|
+
aggregators.set(server.id, new MetricsAggregator(server));
|
|
38
|
+
serverDataMap.set(server.id, {
|
|
39
|
+
server,
|
|
40
|
+
data: null,
|
|
41
|
+
error: null,
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Single scrollable content box
|
|
46
|
+
const contentBox = blessed.box({
|
|
47
|
+
top: 0,
|
|
48
|
+
left: 0,
|
|
49
|
+
width: '100%',
|
|
50
|
+
height: '100%',
|
|
51
|
+
tags: true,
|
|
52
|
+
scrollable: true,
|
|
53
|
+
alwaysScroll: true,
|
|
54
|
+
keys: true,
|
|
55
|
+
vi: true,
|
|
56
|
+
mouse: true,
|
|
57
|
+
scrollbar: {
|
|
58
|
+
ch: '█',
|
|
59
|
+
style: {
|
|
60
|
+
fg: 'blue',
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
});
|
|
64
|
+
screen.append(contentBox);
|
|
65
|
+
|
|
66
|
+
// Helper to create progress bar
|
|
67
|
+
function createProgressBar(percentage: number, width: number = 30): string {
|
|
68
|
+
const filled = Math.round((percentage / 100) * width);
|
|
69
|
+
const empty = width - filled;
|
|
70
|
+
return '[' + '█'.repeat(Math.max(0, filled)) + '░'.repeat(Math.max(0, empty)) + ']';
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Render system resources section
|
|
74
|
+
function renderSystemResources(systemMetrics: SystemMetrics | null): string {
|
|
75
|
+
let content = '';
|
|
76
|
+
|
|
77
|
+
content += '{bold}System Resources{/bold}\n';
|
|
78
|
+
const termWidth = (screen.width as number) || 80;
|
|
79
|
+
const divider = '─'.repeat(termWidth - 2);
|
|
80
|
+
content += divider + '\n';
|
|
81
|
+
|
|
82
|
+
if (systemMetrics) {
|
|
83
|
+
if (systemMetrics.gpuUsage !== undefined) {
|
|
84
|
+
const bar = createProgressBar(systemMetrics.gpuUsage);
|
|
85
|
+
content += `GPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(systemMetrics.gpuUsage)}%`;
|
|
86
|
+
|
|
87
|
+
if (systemMetrics.temperature !== undefined) {
|
|
88
|
+
content += ` - ${Math.round(systemMetrics.temperature)}°C`;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
content += '\n';
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (systemMetrics.cpuUsage !== undefined) {
|
|
95
|
+
const bar = createProgressBar(systemMetrics.cpuUsage);
|
|
96
|
+
content += `CPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(systemMetrics.cpuUsage)}%\n`;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (systemMetrics.aneUsage !== undefined && systemMetrics.aneUsage > 1) {
|
|
100
|
+
const bar = createProgressBar(systemMetrics.aneUsage);
|
|
101
|
+
content += `ANE: {cyan-fg}${bar}{/cyan-fg} ${Math.round(systemMetrics.aneUsage)}%\n`;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (systemMetrics.memoryTotal > 0) {
|
|
105
|
+
const memoryUsedGB = systemMetrics.memoryUsed / (1024 ** 3);
|
|
106
|
+
const memoryTotalGB = systemMetrics.memoryTotal / (1024 ** 3);
|
|
107
|
+
const memoryPercentage = (systemMetrics.memoryUsed / systemMetrics.memoryTotal) * 100;
|
|
108
|
+
const bar = createProgressBar(memoryPercentage);
|
|
109
|
+
content += `Memory: {cyan-fg}${bar}{/cyan-fg} ${Math.round(memoryPercentage)}% `;
|
|
110
|
+
content += `(${memoryUsedGB.toFixed(1)} / ${memoryTotalGB.toFixed(1)} GB)\n`;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (systemMetrics.warnings && systemMetrics.warnings.length > 0) {
|
|
114
|
+
content += `\n{yellow-fg}⚠ ${systemMetrics.warnings.join(', ')}{/yellow-fg}\n`;
|
|
115
|
+
}
|
|
116
|
+
} else {
|
|
117
|
+
content += '{gray-fg}Collecting system metrics...{/gray-fg}\n';
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return content;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Show loading spinner
|
|
124
|
+
function showLoading(): void {
|
|
125
|
+
if (isLoading) return; // Already loading
|
|
126
|
+
|
|
127
|
+
isLoading = true;
|
|
128
|
+
spinnerFrameIndex = 0;
|
|
129
|
+
|
|
130
|
+
// Start spinner animation (80ms per frame = smooth rotation)
|
|
131
|
+
spinnerIntervalId = setInterval(() => {
|
|
132
|
+
spinnerFrameIndex = (spinnerFrameIndex + 1) % spinnerFrames.length;
|
|
133
|
+
|
|
134
|
+
// Re-render current view with updated spinner frame
|
|
135
|
+
let content = '';
|
|
136
|
+
if (viewMode === 'list') {
|
|
137
|
+
content = renderListView(lastSystemMetrics);
|
|
138
|
+
} else {
|
|
139
|
+
content = renderDetailView(lastSystemMetrics);
|
|
140
|
+
}
|
|
141
|
+
contentBox.setContent(content);
|
|
142
|
+
screen.render();
|
|
143
|
+
}, 80);
|
|
144
|
+
|
|
145
|
+
// Immediate first render
|
|
146
|
+
let content = '';
|
|
147
|
+
if (viewMode === 'list') {
|
|
148
|
+
content = renderListView(lastSystemMetrics);
|
|
149
|
+
} else {
|
|
150
|
+
content = renderDetailView(lastSystemMetrics);
|
|
151
|
+
}
|
|
152
|
+
contentBox.setContent(content);
|
|
153
|
+
screen.render();
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Hide loading spinner
|
|
157
|
+
function hideLoading(): void {
|
|
158
|
+
isLoading = false;
|
|
159
|
+
if (spinnerIntervalId) {
|
|
160
|
+
clearInterval(spinnerIntervalId);
|
|
161
|
+
spinnerIntervalId = null;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Render list view
|
|
166
|
+
function renderListView(systemMetrics: SystemMetrics | null): string {
|
|
167
|
+
const termWidth = (screen.width as number) || 80;
|
|
168
|
+
const divider = '─'.repeat(termWidth - 2);
|
|
169
|
+
let content = '';
|
|
170
|
+
|
|
171
|
+
// Header
|
|
172
|
+
content += '{bold}{blue-fg}═══ llama.cpp Multi-Server Monitor ═══{/blue-fg}{/bold}\n';
|
|
173
|
+
|
|
174
|
+
// Status line with optional spinner
|
|
175
|
+
const statusPlainText = 'Press 1-9 for details | [F] Filter | [Q] Quit';
|
|
176
|
+
const spinnerChar = isLoading ? spinnerFrames[spinnerFrameIndex] : '';
|
|
177
|
+
const spinnerText = spinnerChar ? ` {cyan-fg}${spinnerChar}{/cyan-fg}` : '';
|
|
178
|
+
|
|
179
|
+
content += `{gray-fg}${statusPlainText}${spinnerText}{/gray-fg}\n\n`;
|
|
180
|
+
|
|
181
|
+
// System resources
|
|
182
|
+
content += renderSystemResources(systemMetrics);
|
|
183
|
+
content += '\n';
|
|
184
|
+
|
|
185
|
+
// Server list header
|
|
186
|
+
const runningCount = servers.filter(s => s.status === 'running').length;
|
|
187
|
+
const stoppedCount = servers.filter(s => s.status !== 'running').length;
|
|
188
|
+
content += `{bold}Servers (${runningCount} running, ${stoppedCount} stopped){/bold}\n`;
|
|
189
|
+
content += '{gray-fg}Press number for details{/gray-fg}\n';
|
|
190
|
+
content += divider + '\n';
|
|
191
|
+
|
|
192
|
+
// Table header
|
|
193
|
+
content += '{bold}# │ Server ID │ Port │ Status │ Slots │ tok/s │ Memory{/bold}\n';
|
|
194
|
+
content += divider + '\n';
|
|
195
|
+
|
|
196
|
+
// Server rows
|
|
197
|
+
servers.forEach((server, index) => {
|
|
198
|
+
const serverData = serverDataMap.get(server.id);
|
|
199
|
+
const num = index + 1;
|
|
200
|
+
|
|
201
|
+
// Server ID (truncate if needed)
|
|
202
|
+
const serverId = server.id.padEnd(16).substring(0, 16);
|
|
203
|
+
|
|
204
|
+
// Port
|
|
205
|
+
const port = server.port.toString().padStart(4);
|
|
206
|
+
|
|
207
|
+
// Status
|
|
208
|
+
let status = '';
|
|
209
|
+
if (serverData?.data) {
|
|
210
|
+
if (serverData.data.server.healthy) {
|
|
211
|
+
status = '{green-fg}● RUN{/green-fg} ';
|
|
212
|
+
} else {
|
|
213
|
+
status = '{red-fg}● ERR{/red-fg} ';
|
|
214
|
+
}
|
|
215
|
+
} else if (server.status === 'running') {
|
|
216
|
+
status = '{yellow-fg}● ...{/yellow-fg} ';
|
|
217
|
+
} else {
|
|
218
|
+
status = '{gray-fg}○ STOP{/gray-fg}';
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Slots
|
|
222
|
+
let slots = '- ';
|
|
223
|
+
if (serverData?.data?.server) {
|
|
224
|
+
const active = serverData.data.server.activeSlots;
|
|
225
|
+
const total = serverData.data.server.totalSlots;
|
|
226
|
+
slots = `${active}/${total}`.padStart(5);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// tok/s
|
|
230
|
+
let tokensPerSec = '- ';
|
|
231
|
+
if (serverData?.data?.server.avgGenerateSpeed !== undefined &&
|
|
232
|
+
serverData.data.server.avgGenerateSpeed > 0) {
|
|
233
|
+
tokensPerSec = Math.round(serverData.data.server.avgGenerateSpeed).toString().padStart(6);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Memory (actual process memory from top command)
|
|
237
|
+
let memory = '- ';
|
|
238
|
+
if (serverData?.data?.server.processMemory) {
|
|
239
|
+
const bytes = serverData.data.server.processMemory;
|
|
240
|
+
// Format as GB/MB depending on size
|
|
241
|
+
if (bytes >= 1024 * 1024 * 1024) {
|
|
242
|
+
const gb = (bytes / (1024 * 1024 * 1024)).toFixed(1);
|
|
243
|
+
memory = `${gb} GB`.padStart(7);
|
|
244
|
+
} else {
|
|
245
|
+
const mb = Math.round(bytes / (1024 * 1024));
|
|
246
|
+
memory = `${mb} MB`.padStart(7);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
content += `${num} │ ${serverId} │ ${port} │ ${status} │ ${slots} │ ${tokensPerSec} │ ${memory}\n`;
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
// Footer
|
|
254
|
+
content += '\n' + divider + '\n';
|
|
255
|
+
content += `{gray-fg}Updated: ${new Date().toLocaleTimeString()} | `;
|
|
256
|
+
content += `Interval: ${updateInterval}ms | [R]efresh [+/-]Speed{/gray-fg}`;
|
|
257
|
+
|
|
258
|
+
return content;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Render detail view for selected server
|
|
262
|
+
function renderDetailView(systemMetrics: SystemMetrics | null): string {
|
|
263
|
+
const server = servers[selectedServerIndex];
|
|
264
|
+
const serverData = serverDataMap.get(server.id);
|
|
265
|
+
const termWidth = (screen.width as number) || 80;
|
|
266
|
+
const divider = '─'.repeat(termWidth - 2);
|
|
267
|
+
let content = '';
|
|
268
|
+
|
|
269
|
+
// Header
|
|
270
|
+
content += `{bold}{blue-fg}═══ Server #${selectedServerIndex + 1}: ${server.id} (${server.port}) ═══{/blue-fg}{/bold}\n`;
|
|
271
|
+
|
|
272
|
+
// Status line with optional spinner
|
|
273
|
+
const statusPlainText = '[ESC] Back to list | [Q] Quit';
|
|
274
|
+
const spinnerChar = isLoading ? spinnerFrames[spinnerFrameIndex] : '';
|
|
275
|
+
const spinnerText = spinnerChar ? ` {cyan-fg}${spinnerChar}{/cyan-fg}` : '';
|
|
276
|
+
|
|
277
|
+
content += `{gray-fg}${statusPlainText}${spinnerText}{/gray-fg}\n\n`;
|
|
278
|
+
|
|
279
|
+
// System resources
|
|
280
|
+
content += renderSystemResources(systemMetrics);
|
|
281
|
+
content += '\n';
|
|
282
|
+
|
|
283
|
+
if (!serverData?.data) {
|
|
284
|
+
content += '{yellow-fg}Loading server data...{/yellow-fg}\n';
|
|
285
|
+
return content;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
const data = serverData.data;
|
|
289
|
+
|
|
290
|
+
// Server Information
|
|
291
|
+
content += '{bold}Server Information{/bold}\n';
|
|
292
|
+
content += divider + '\n';
|
|
293
|
+
|
|
294
|
+
const statusIcon = data.server.healthy ? '{green-fg}●{/green-fg}' : '{red-fg}●{/red-fg}';
|
|
295
|
+
const statusText = data.server.healthy ? 'RUNNING' : 'UNHEALTHY';
|
|
296
|
+
content += `Status: ${statusIcon} ${statusText}`;
|
|
297
|
+
|
|
298
|
+
if (data.server.uptime) {
|
|
299
|
+
content += ` Uptime: ${data.server.uptime}`;
|
|
300
|
+
}
|
|
301
|
+
content += '\n';
|
|
302
|
+
|
|
303
|
+
content += `Model: ${server.modelName}`;
|
|
304
|
+
if (data.server.contextSize) {
|
|
305
|
+
content += ` Context: ${data.server.contextSize} tokens`;
|
|
306
|
+
}
|
|
307
|
+
content += '\n';
|
|
308
|
+
|
|
309
|
+
// Handle null host (legacy configs) by defaulting to 127.0.0.1
|
|
310
|
+
const displayHost = server.host || '127.0.0.1';
|
|
311
|
+
content += `Endpoint: http://${displayHost}:${server.port}`;
|
|
312
|
+
|
|
313
|
+
// Add actual process memory (if available)
|
|
314
|
+
if (data.server.processMemory) {
|
|
315
|
+
const bytes = data.server.processMemory;
|
|
316
|
+
let memStr;
|
|
317
|
+
if (bytes >= 1024 * 1024 * 1024) {
|
|
318
|
+
memStr = `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
|
|
319
|
+
} else {
|
|
320
|
+
memStr = `${Math.round(bytes / (1024 * 1024))} MB`;
|
|
321
|
+
}
|
|
322
|
+
content += ` Memory: ${memStr}\n`;
|
|
323
|
+
} else {
|
|
324
|
+
content += '\n';
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
content += `Slots: ${data.server.activeSlots} active / ${data.server.totalSlots} total\n`;
|
|
328
|
+
content += '\n';
|
|
329
|
+
|
|
330
|
+
// Request Metrics
|
|
331
|
+
if (data.server.totalSlots > 0) {
|
|
332
|
+
content += '{bold}Request Metrics{/bold}\n';
|
|
333
|
+
content += divider + '\n';
|
|
334
|
+
content += `Active: ${data.server.activeSlots} / ${data.server.totalSlots}\n`;
|
|
335
|
+
content += `Idle: ${data.server.idleSlots} / ${data.server.totalSlots}\n`;
|
|
336
|
+
|
|
337
|
+
if (data.server.avgPromptSpeed !== undefined && data.server.avgPromptSpeed > 0) {
|
|
338
|
+
content += `Prompt: ${Math.round(data.server.avgPromptSpeed)} tokens/sec\n`;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
if (data.server.avgGenerateSpeed !== undefined && data.server.avgGenerateSpeed > 0) {
|
|
342
|
+
content += `Generate: ${Math.round(data.server.avgGenerateSpeed)} tokens/sec\n`;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
content += '\n';
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// Active Slots Detail
|
|
349
|
+
if (data.server.slots.length > 0) {
|
|
350
|
+
const activeSlots = data.server.slots.filter(s => s.state === 'processing');
|
|
351
|
+
|
|
352
|
+
if (activeSlots.length > 0) {
|
|
353
|
+
content += '{bold}Active Slots{/bold}\n';
|
|
354
|
+
content += divider + '\n';
|
|
355
|
+
|
|
356
|
+
activeSlots.forEach((slot) => {
|
|
357
|
+
content += `Slot #${slot.id}: {yellow-fg}PROCESSING{/yellow-fg}`;
|
|
358
|
+
|
|
359
|
+
if (slot.timings?.predicted_per_second) {
|
|
360
|
+
content += ` - ${Math.round(slot.timings.predicted_per_second)} tok/s`;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
if (slot.n_decoded !== undefined) {
|
|
364
|
+
content += ` - ${slot.n_decoded}`;
|
|
365
|
+
if (slot.n_ctx) {
|
|
366
|
+
content += ` / ${slot.n_ctx}`;
|
|
367
|
+
}
|
|
368
|
+
content += ' tokens';
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
content += '\n';
|
|
372
|
+
});
|
|
373
|
+
|
|
374
|
+
content += '\n';
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// Footer
|
|
379
|
+
content += divider + '\n';
|
|
380
|
+
content += `{gray-fg}Updated: ${data.lastUpdated.toLocaleTimeString()} | `;
|
|
381
|
+
content += `Interval: ${updateInterval}ms | [R]efresh [+/-]Speed{/gray-fg}`;
|
|
382
|
+
|
|
383
|
+
return content;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// Fetch and update display
|
|
387
|
+
async function fetchData() {
|
|
388
|
+
try {
|
|
389
|
+
// Collect system metrics ONCE for all servers (not per-server)
|
|
390
|
+
// This prevents spawning multiple macmon processes
|
|
391
|
+
const systemMetricsPromise = systemCollector.collectSystemMetrics();
|
|
392
|
+
|
|
393
|
+
// Batch collect process memory for ALL servers in one top call
|
|
394
|
+
// This prevents spawning multiple top processes (5x speedup)
|
|
395
|
+
const { getBatchProcessMemory } = await import('../utils/process-utils.js');
|
|
396
|
+
const pids = servers.filter(s => s.pid).map(s => s.pid!);
|
|
397
|
+
const memoryMapPromise = pids.length > 0
|
|
398
|
+
? getBatchProcessMemory(pids)
|
|
399
|
+
: Promise.resolve(new Map<number, number | null>());
|
|
400
|
+
|
|
401
|
+
// Wait for memory batch to complete
|
|
402
|
+
const memoryMap = await memoryMapPromise;
|
|
403
|
+
|
|
404
|
+
// Collect server metrics only (NOT system metrics) for each server
|
|
405
|
+
const promises = servers.map(async (server) => {
|
|
406
|
+
const aggregator = aggregators.get(server.id)!;
|
|
407
|
+
try {
|
|
408
|
+
// Use collectServerMetrics instead of collectMonitorData
|
|
409
|
+
// to avoid spawning macmon per server
|
|
410
|
+
// Pass pre-fetched memory to avoid spawning top per server
|
|
411
|
+
const serverMetrics = await aggregator.collectServerMetrics(
|
|
412
|
+
server,
|
|
413
|
+
server.pid ? memoryMap.get(server.pid) ?? null : null
|
|
414
|
+
);
|
|
415
|
+
|
|
416
|
+
// Build MonitorData manually with shared system metrics
|
|
417
|
+
const data: MonitorData = {
|
|
418
|
+
server: serverMetrics,
|
|
419
|
+
system: undefined, // Will be set after system metrics resolve
|
|
420
|
+
lastUpdated: new Date(),
|
|
421
|
+
updateInterval,
|
|
422
|
+
consecutiveFailures: 0,
|
|
423
|
+
};
|
|
424
|
+
|
|
425
|
+
serverDataMap.set(server.id, {
|
|
426
|
+
server,
|
|
427
|
+
data,
|
|
428
|
+
error: null,
|
|
429
|
+
});
|
|
430
|
+
} catch (err) {
|
|
431
|
+
serverDataMap.set(server.id, {
|
|
432
|
+
server,
|
|
433
|
+
data: null,
|
|
434
|
+
error: err instanceof Error ? err.message : 'Unknown error',
|
|
435
|
+
});
|
|
436
|
+
}
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
// Wait for both system metrics and server metrics to complete
|
|
440
|
+
const systemMetrics = await systemMetricsPromise;
|
|
441
|
+
await Promise.all(promises);
|
|
442
|
+
|
|
443
|
+
// Store system metrics for loading state
|
|
444
|
+
lastSystemMetrics = systemMetrics;
|
|
445
|
+
|
|
446
|
+
// Update all server data with shared system metrics
|
|
447
|
+
for (const serverData of serverDataMap.values()) {
|
|
448
|
+
if (serverData.data) {
|
|
449
|
+
serverData.data.system = systemMetrics;
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// Render once with complete data
|
|
454
|
+
let content = '';
|
|
455
|
+
if (viewMode === 'list') {
|
|
456
|
+
content = renderListView(systemMetrics);
|
|
457
|
+
} else {
|
|
458
|
+
content = renderDetailView(systemMetrics);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
contentBox.setContent(content);
|
|
462
|
+
screen.render();
|
|
463
|
+
|
|
464
|
+
// Clear loading state
|
|
465
|
+
hideLoading();
|
|
466
|
+
|
|
467
|
+
} catch (err) {
|
|
468
|
+
const errorMsg = err instanceof Error ? err.message : 'Unknown error';
|
|
469
|
+
contentBox.setContent(
|
|
470
|
+
'{bold}{red-fg}Error{/red-fg}{/bold}\n\n' +
|
|
471
|
+
`{red-fg}${errorMsg}{/red-fg}\n\n` +
|
|
472
|
+
'{gray-fg}Press [R] to retry or [Q] to quit{/gray-fg}'
|
|
473
|
+
);
|
|
474
|
+
screen.render();
|
|
475
|
+
|
|
476
|
+
// Clear loading state on error too
|
|
477
|
+
isLoading = false;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
// Polling
|
|
482
|
+
function startPolling() {
|
|
483
|
+
if (intervalId) clearInterval(intervalId);
|
|
484
|
+
fetchData();
|
|
485
|
+
intervalId = setInterval(fetchData, updateInterval);
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
// Keyboard shortcuts - List view
|
|
489
|
+
screen.key(['1', '2', '3', '4', '5', '6', '7', '8', '9'], (ch) => {
|
|
490
|
+
const index = parseInt(ch, 10) - 1;
|
|
491
|
+
if (index >= 0 && index < servers.length) {
|
|
492
|
+
showLoading();
|
|
493
|
+
selectedServerIndex = index;
|
|
494
|
+
viewMode = 'detail';
|
|
495
|
+
fetchData();
|
|
496
|
+
}
|
|
497
|
+
});
|
|
498
|
+
|
|
499
|
+
// Keyboard shortcuts - Detail view
|
|
500
|
+
screen.key(['escape'], () => {
|
|
501
|
+
if (viewMode === 'detail') {
|
|
502
|
+
showLoading();
|
|
503
|
+
viewMode = 'list';
|
|
504
|
+
fetchData();
|
|
505
|
+
}
|
|
506
|
+
});
|
|
507
|
+
|
|
508
|
+
// Keyboard shortcuts - Common
|
|
509
|
+
screen.key(['r', 'R'], () => {
|
|
510
|
+
showLoading();
|
|
511
|
+
fetchData();
|
|
512
|
+
});
|
|
513
|
+
|
|
514
|
+
screen.key(['+', '='], () => {
|
|
515
|
+
updateInterval = Math.max(500, updateInterval - 500);
|
|
516
|
+
startPolling();
|
|
517
|
+
});
|
|
518
|
+
|
|
519
|
+
screen.key(['-', '_'], () => {
|
|
520
|
+
updateInterval = Math.min(10000, updateInterval + 500);
|
|
521
|
+
startPolling();
|
|
522
|
+
});
|
|
523
|
+
|
|
524
|
+
screen.key(['q', 'Q', 'C-c'], () => {
|
|
525
|
+
showLoading();
|
|
526
|
+
if (intervalId) clearInterval(intervalId);
|
|
527
|
+
if (spinnerIntervalId) clearInterval(spinnerIntervalId);
|
|
528
|
+
// Small delay to show the loading state before exit
|
|
529
|
+
setTimeout(() => {
|
|
530
|
+
screen.destroy();
|
|
531
|
+
process.exit(0);
|
|
532
|
+
}, 100);
|
|
533
|
+
});
|
|
534
|
+
|
|
535
|
+
// Initial display
|
|
536
|
+
contentBox.setContent('{cyan-fg}⏳ Connecting to servers...{/cyan-fg}');
|
|
537
|
+
screen.render();
|
|
538
|
+
|
|
539
|
+
startPolling();
|
|
540
|
+
|
|
541
|
+
// Cleanup
|
|
542
|
+
screen.on('destroy', () => {
|
|
543
|
+
if (intervalId) clearInterval(intervalId);
|
|
544
|
+
// Note: macmon child processes will automatically die when parent exits
|
|
545
|
+
// since they're spawned with detached: false
|
|
546
|
+
});
|
|
547
|
+
}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import { ServerConfig } from './server-config.js';
|
|
2
|
+
|
|
3
|
+
// llama.cpp API response types
|
|
4
|
+
|
|
5
|
+
export interface HealthResponse {
|
|
6
|
+
status: string;
|
|
7
|
+
error?: string;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface PropsResponse {
|
|
11
|
+
default_generation_settings: {
|
|
12
|
+
n_ctx: number;
|
|
13
|
+
n_predict: number;
|
|
14
|
+
model: string;
|
|
15
|
+
seed: number;
|
|
16
|
+
temperature: number;
|
|
17
|
+
top_k: number;
|
|
18
|
+
top_p: number;
|
|
19
|
+
min_p: number;
|
|
20
|
+
n_keep: number;
|
|
21
|
+
stream: boolean;
|
|
22
|
+
};
|
|
23
|
+
total_slots: number;
|
|
24
|
+
model_loaded: boolean;
|
|
25
|
+
model_path: string;
|
|
26
|
+
model_alias?: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface SlotInfo {
|
|
30
|
+
id: number;
|
|
31
|
+
state: 'idle' | 'processing';
|
|
32
|
+
task_id?: number;
|
|
33
|
+
prompt?: string;
|
|
34
|
+
n_prompt_tokens?: number;
|
|
35
|
+
n_decoded?: number;
|
|
36
|
+
n_ctx: number;
|
|
37
|
+
truncated?: boolean;
|
|
38
|
+
stopped_eos?: boolean;
|
|
39
|
+
stopped_word?: boolean;
|
|
40
|
+
stopped_limit?: boolean;
|
|
41
|
+
stopping_word?: string;
|
|
42
|
+
tokens_predicted?: number;
|
|
43
|
+
tokens_evaluated?: number;
|
|
44
|
+
generation_settings?: {
|
|
45
|
+
n_ctx: number;
|
|
46
|
+
n_predict: number;
|
|
47
|
+
seed: number;
|
|
48
|
+
temperature: number;
|
|
49
|
+
top_k: number;
|
|
50
|
+
top_p: number;
|
|
51
|
+
};
|
|
52
|
+
prompt_tokens_processed?: number;
|
|
53
|
+
t_prompt_processing?: number; // Time in ms
|
|
54
|
+
t_token_generation?: number; // Time in ms
|
|
55
|
+
timings?: {
|
|
56
|
+
prompt_n: number;
|
|
57
|
+
prompt_ms: number;
|
|
58
|
+
prompt_per_token_ms: number;
|
|
59
|
+
prompt_per_second: number;
|
|
60
|
+
predicted_n: number;
|
|
61
|
+
predicted_ms: number;
|
|
62
|
+
predicted_per_token_ms: number;
|
|
63
|
+
predicted_per_second: number;
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export interface SlotsResponse {
|
|
68
|
+
slots: SlotInfo[];
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// System metrics types
|
|
72
|
+
|
|
73
|
+
export interface SystemMetrics {
|
|
74
|
+
// GPU/CPU/ANE (from macmon if available)
|
|
75
|
+
gpuUsage?: number; // Percentage (0-100)
|
|
76
|
+
cpuUsage?: number; // Percentage (0-100)
|
|
77
|
+
cpuCores?: number; // Number of cores
|
|
78
|
+
aneUsage?: number; // Apple Neural Engine percentage (0-100)
|
|
79
|
+
temperature?: number; // GPU temperature in Celsius
|
|
80
|
+
|
|
81
|
+
// Memory (from vm_stat or macmon)
|
|
82
|
+
memoryUsed: number; // Bytes
|
|
83
|
+
memoryTotal: number; // Bytes
|
|
84
|
+
swapUsed?: number; // Bytes
|
|
85
|
+
processMemory?: number; // Bytes (specific to llama-server process)
|
|
86
|
+
|
|
87
|
+
// Metadata
|
|
88
|
+
timestamp: number;
|
|
89
|
+
source: 'macmon' | 'vm_stat' | 'none';
|
|
90
|
+
warnings?: string[]; // e.g., "macmon not available, showing memory only"
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Aggregated metrics for TUI display
|
|
94
|
+
|
|
95
|
+
export interface ServerMetrics {
|
|
96
|
+
// Server identification
|
|
97
|
+
server: ServerConfig;
|
|
98
|
+
|
|
99
|
+
// Health status
|
|
100
|
+
healthy: boolean;
|
|
101
|
+
uptime?: string; // Human-readable (e.g., "2h 34m 12s")
|
|
102
|
+
error?: string;
|
|
103
|
+
|
|
104
|
+
// Model information
|
|
105
|
+
modelLoaded: boolean;
|
|
106
|
+
modelName: string;
|
|
107
|
+
contextSize: number;
|
|
108
|
+
totalSlots: number;
|
|
109
|
+
|
|
110
|
+
// Request metrics
|
|
111
|
+
activeSlots: number;
|
|
112
|
+
idleSlots: number;
|
|
113
|
+
slots: SlotInfo[];
|
|
114
|
+
|
|
115
|
+
// Performance metrics (derived from slots)
|
|
116
|
+
avgPromptSpeed?: number; // Tokens per second
|
|
117
|
+
avgGenerateSpeed?: number; // Tokens per second
|
|
118
|
+
requestsPerMinute?: number; // Estimated from slot activity
|
|
119
|
+
avgLatency?: number; // Milliseconds
|
|
120
|
+
|
|
121
|
+
// Cache metrics (if available from /metrics endpoint)
|
|
122
|
+
cacheHitRate?: number; // Percentage
|
|
123
|
+
|
|
124
|
+
// Process metrics
|
|
125
|
+
processMemory?: number; // Bytes (actual RSS from top command)
|
|
126
|
+
|
|
127
|
+
// Timestamp
|
|
128
|
+
timestamp: number;
|
|
129
|
+
stale: boolean; // True if data is from last successful fetch
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export interface MonitorData {
|
|
133
|
+
server: ServerMetrics;
|
|
134
|
+
system?: SystemMetrics;
|
|
135
|
+
lastUpdated: Date;
|
|
136
|
+
updateInterval: number; // Milliseconds
|
|
137
|
+
consecutiveFailures: number;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Error and loading states
|
|
141
|
+
|
|
142
|
+
export interface ErrorState {
|
|
143
|
+
error: string;
|
|
144
|
+
canRetry: boolean;
|
|
145
|
+
suggestions?: string[];
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
export interface LoadingState {
|
|
149
|
+
message: string;
|
|
150
|
+
progress?: number; // 0-100 if determinate
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Collection result (for graceful degradation)
|
|
154
|
+
|
|
155
|
+
export interface CollectionResult<T> {
|
|
156
|
+
success: boolean;
|
|
157
|
+
data: T | null;
|
|
158
|
+
error?: string;
|
|
159
|
+
warnings?: string[];
|
|
160
|
+
stale?: boolean;
|
|
161
|
+
}
|