@appkit/llamacpp-cli 1.12.0 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +294 -168
- package/dist/cli.js +35 -0
- package/dist/cli.js.map +1 -1
- package/dist/commands/launch/claude.d.ts +6 -0
- package/dist/commands/launch/claude.d.ts.map +1 -0
- package/dist/commands/launch/claude.js +277 -0
- package/dist/commands/launch/claude.js.map +1 -0
- package/dist/lib/integration-checker.d.ts +26 -0
- package/dist/lib/integration-checker.d.ts.map +1 -0
- package/dist/lib/integration-checker.js +77 -0
- package/dist/lib/integration-checker.js.map +1 -0
- package/dist/lib/router-manager.d.ts +4 -0
- package/dist/lib/router-manager.d.ts.map +1 -1
- package/dist/lib/router-manager.js +10 -0
- package/dist/lib/router-manager.js.map +1 -1
- package/dist/lib/router-server.d.ts +13 -0
- package/dist/lib/router-server.d.ts.map +1 -1
- package/dist/lib/router-server.js +267 -7
- package/dist/lib/router-server.js.map +1 -1
- package/dist/types/integration-config.d.ts +28 -0
- package/dist/types/integration-config.d.ts.map +1 -0
- package/dist/types/integration-config.js +3 -0
- package/dist/types/integration-config.js.map +1 -0
- package/package.json +10 -2
- package/web/dist/assets/index-Bin89Lwr.css +1 -0
- package/web/dist/assets/index-CVmonw3T.js +17 -0
- package/web/{index.html → dist/index.html} +2 -1
- package/.versionrc.json +0 -16
- package/CHANGELOG.md +0 -213
- package/docs/images/.gitkeep +0 -1
- package/docs/images/web-ui-servers.png +0 -0
- package/src/cli.ts +0 -523
- package/src/commands/admin/config.ts +0 -121
- package/src/commands/admin/logs.ts +0 -91
- package/src/commands/admin/restart.ts +0 -26
- package/src/commands/admin/start.ts +0 -27
- package/src/commands/admin/status.ts +0 -84
- package/src/commands/admin/stop.ts +0 -16
- package/src/commands/config-global.ts +0 -38
- package/src/commands/config.ts +0 -323
- package/src/commands/create.ts +0 -183
- package/src/commands/delete.ts +0 -74
- package/src/commands/list.ts +0 -37
- package/src/commands/logs-all.ts +0 -251
- package/src/commands/logs.ts +0 -345
- package/src/commands/monitor.ts +0 -110
- package/src/commands/ps.ts +0 -84
- package/src/commands/pull.ts +0 -44
- package/src/commands/rm.ts +0 -107
- package/src/commands/router/config.ts +0 -116
- package/src/commands/router/logs.ts +0 -256
- package/src/commands/router/restart.ts +0 -36
- package/src/commands/router/start.ts +0 -60
- package/src/commands/router/status.ts +0 -119
- package/src/commands/router/stop.ts +0 -33
- package/src/commands/run.ts +0 -233
- package/src/commands/search.ts +0 -107
- package/src/commands/server-show.ts +0 -161
- package/src/commands/show.ts +0 -207
- package/src/commands/start.ts +0 -101
- package/src/commands/stop.ts +0 -39
- package/src/commands/tui.ts +0 -25
- package/src/lib/admin-manager.ts +0 -435
- package/src/lib/admin-server.ts +0 -1243
- package/src/lib/config-generator.ts +0 -130
- package/src/lib/download-job-manager.ts +0 -213
- package/src/lib/history-manager.ts +0 -172
- package/src/lib/launchctl-manager.ts +0 -225
- package/src/lib/metrics-aggregator.ts +0 -257
- package/src/lib/model-downloader.ts +0 -328
- package/src/lib/model-scanner.ts +0 -157
- package/src/lib/model-search.ts +0 -114
- package/src/lib/models-dir-setup.ts +0 -46
- package/src/lib/port-manager.ts +0 -80
- package/src/lib/router-logger.ts +0 -201
- package/src/lib/router-manager.ts +0 -414
- package/src/lib/router-server.ts +0 -538
- package/src/lib/state-manager.ts +0 -206
- package/src/lib/status-checker.ts +0 -113
- package/src/lib/system-collector.ts +0 -315
- package/src/tui/ConfigApp.ts +0 -1085
- package/src/tui/HistoricalMonitorApp.ts +0 -587
- package/src/tui/ModelsApp.ts +0 -368
- package/src/tui/MonitorApp.ts +0 -386
- package/src/tui/MultiServerMonitorApp.ts +0 -1833
- package/src/tui/RootNavigator.ts +0 -74
- package/src/tui/SearchApp.ts +0 -511
- package/src/tui/SplashScreen.ts +0 -149
- package/src/types/admin-config.ts +0 -25
- package/src/types/global-config.ts +0 -26
- package/src/types/history-types.ts +0 -39
- package/src/types/model-info.ts +0 -8
- package/src/types/monitor-types.ts +0 -162
- package/src/types/router-config.ts +0 -25
- package/src/types/server-config.ts +0 -46
- package/src/utils/downsample-utils.ts +0 -128
- package/src/utils/file-utils.ts +0 -146
- package/src/utils/format-utils.ts +0 -98
- package/src/utils/log-parser.ts +0 -284
- package/src/utils/log-utils.ts +0 -178
- package/src/utils/process-utils.ts +0 -316
- package/src/utils/prompt-utils.ts +0 -47
- package/test-load.sh +0 -100
- package/tsconfig.json +0 -20
- package/web/eslint.config.js +0 -23
- package/web/llamacpp-web-dist.tar.gz +0 -0
- package/web/package-lock.json +0 -4017
- package/web/package.json +0 -38
- package/web/postcss.config.js +0 -6
- package/web/src/App.css +0 -42
- package/web/src/App.tsx +0 -86
- package/web/src/assets/react.svg +0 -1
- package/web/src/components/ApiKeyPrompt.tsx +0 -71
- package/web/src/components/CreateServerModal.tsx +0 -372
- package/web/src/components/DownloadProgress.tsx +0 -123
- package/web/src/components/Nav.tsx +0 -89
- package/web/src/components/RouterConfigModal.tsx +0 -240
- package/web/src/components/SearchModal.tsx +0 -306
- package/web/src/components/ServerConfigModal.tsx +0 -291
- package/web/src/hooks/useApi.ts +0 -259
- package/web/src/index.css +0 -42
- package/web/src/lib/api.ts +0 -226
- package/web/src/main.tsx +0 -10
- package/web/src/pages/Dashboard.tsx +0 -103
- package/web/src/pages/Models.tsx +0 -258
- package/web/src/pages/Router.tsx +0 -270
- package/web/src/pages/RouterLogs.tsx +0 -201
- package/web/src/pages/ServerLogs.tsx +0 -553
- package/web/src/pages/Servers.tsx +0 -358
- package/web/src/types/api.ts +0 -140
- package/web/tailwind.config.js +0 -31
- package/web/tsconfig.app.json +0 -28
- package/web/tsconfig.json +0 -7
- package/web/tsconfig.node.json +0 -26
- package/web/vite.config.ts +0 -25
- /package/web/{public → dist}/vite.svg +0 -0
|
@@ -1,225 +0,0 @@
|
|
|
1
|
-
import * as path from 'path';
|
|
2
|
-
import * as fs from 'fs/promises';
|
|
3
|
-
import { ServerConfig } from '../types/server-config';
|
|
4
|
-
import { execCommand, execAsync } from '../utils/process-utils';
|
|
5
|
-
import { writeFileAtomic, fileExists } from '../utils/file-utils';
|
|
6
|
-
|
|
7
|
-
export interface ServiceStatus {
|
|
8
|
-
isRunning: boolean;
|
|
9
|
-
pid: number | null;
|
|
10
|
-
exitCode: number | null;
|
|
11
|
-
lastExitReason?: string;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export class LaunchctlManager {
|
|
15
|
-
/**
|
|
16
|
-
* Generate plist XML content for a server
|
|
17
|
-
*/
|
|
18
|
-
generatePlist(config: ServerConfig): string {
|
|
19
|
-
// Build program arguments array
|
|
20
|
-
const args = [
|
|
21
|
-
'/opt/homebrew/bin/llama-server',
|
|
22
|
-
'--model', config.modelPath,
|
|
23
|
-
'--host', config.host,
|
|
24
|
-
'--port', config.port.toString(),
|
|
25
|
-
'--threads', config.threads.toString(),
|
|
26
|
-
'--ctx-size', config.ctxSize.toString(),
|
|
27
|
-
'--gpu-layers', config.gpuLayers.toString(),
|
|
28
|
-
];
|
|
29
|
-
|
|
30
|
-
// Add flags
|
|
31
|
-
if (config.embeddings) args.push('--embeddings');
|
|
32
|
-
if (config.jinja) args.push('--jinja');
|
|
33
|
-
|
|
34
|
-
// Conditionally enable verbose HTTP logging for detailed request/response info
|
|
35
|
-
if (config.verbose) {
|
|
36
|
-
args.push('--log-verbose');
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
// Add custom flags
|
|
40
|
-
if (config.customFlags && config.customFlags.length > 0) {
|
|
41
|
-
args.push(...config.customFlags);
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
// Generate XML array elements
|
|
45
|
-
const argsXml = args.map(arg => ` <string>${arg}</string>`).join('\n');
|
|
46
|
-
|
|
47
|
-
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
48
|
-
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
|
|
49
|
-
"http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
50
|
-
<plist version="1.0">
|
|
51
|
-
<dict>
|
|
52
|
-
<key>Label</key>
|
|
53
|
-
<string>${config.label}</string>
|
|
54
|
-
|
|
55
|
-
<key>ProgramArguments</key>
|
|
56
|
-
<array>
|
|
57
|
-
${argsXml}
|
|
58
|
-
</array>
|
|
59
|
-
|
|
60
|
-
<key>RunAtLoad</key>
|
|
61
|
-
<false/>
|
|
62
|
-
|
|
63
|
-
<key>KeepAlive</key>
|
|
64
|
-
<dict>
|
|
65
|
-
<key>Crashed</key>
|
|
66
|
-
<true/>
|
|
67
|
-
<key>SuccessfulExit</key>
|
|
68
|
-
<false/>
|
|
69
|
-
</dict>
|
|
70
|
-
|
|
71
|
-
<key>StandardOutPath</key>
|
|
72
|
-
<string>${config.stdoutPath}</string>
|
|
73
|
-
|
|
74
|
-
<key>StandardErrorPath</key>
|
|
75
|
-
<string>${config.stderrPath}</string>
|
|
76
|
-
|
|
77
|
-
<key>WorkingDirectory</key>
|
|
78
|
-
<string>/tmp</string>
|
|
79
|
-
|
|
80
|
-
<key>ThrottleInterval</key>
|
|
81
|
-
<integer>10</integer>
|
|
82
|
-
</dict>
|
|
83
|
-
</plist>
|
|
84
|
-
`;
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
/**
|
|
88
|
-
* Create and write plist file
|
|
89
|
-
*/
|
|
90
|
-
async createPlist(config: ServerConfig): Promise<void> {
|
|
91
|
-
const plistContent = this.generatePlist(config);
|
|
92
|
-
await writeFileAtomic(config.plistPath, plistContent);
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
/**
|
|
96
|
-
* Delete plist file
|
|
97
|
-
*/
|
|
98
|
-
async deletePlist(plistPath: string): Promise<void> {
|
|
99
|
-
if (await fileExists(plistPath)) {
|
|
100
|
-
await fs.unlink(plistPath);
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
/**
|
|
105
|
-
* Load service (register with launchctl)
|
|
106
|
-
*/
|
|
107
|
-
async loadService(plistPath: string): Promise<void> {
|
|
108
|
-
await execCommand(`launchctl load "${plistPath}"`);
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
/**
|
|
112
|
-
* Unload service (unregister from launchctl)
|
|
113
|
-
*/
|
|
114
|
-
async unloadService(plistPath: string): Promise<void> {
|
|
115
|
-
try {
|
|
116
|
-
await execCommand(`launchctl unload "${plistPath}"`);
|
|
117
|
-
} catch (error) {
|
|
118
|
-
// Ignore errors if service is not loaded
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
/**
|
|
123
|
-
* Start service
|
|
124
|
-
*/
|
|
125
|
-
async startService(label: string): Promise<void> {
|
|
126
|
-
await execCommand(`launchctl start ${label}`);
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
/**
|
|
130
|
-
* Stop service
|
|
131
|
-
*/
|
|
132
|
-
async stopService(label: string): Promise<void> {
|
|
133
|
-
await execCommand(`launchctl stop ${label}`);
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
/**
|
|
137
|
-
* Get service status from launchctl
|
|
138
|
-
*/
|
|
139
|
-
async getServiceStatus(label: string): Promise<ServiceStatus> {
|
|
140
|
-
try {
|
|
141
|
-
const { stdout } = await execAsync(`launchctl list | grep ${label}`);
|
|
142
|
-
const lines = stdout.trim().split('\n');
|
|
143
|
-
|
|
144
|
-
for (const line of lines) {
|
|
145
|
-
const parts = line.split(/\s+/);
|
|
146
|
-
if (parts.length >= 3) {
|
|
147
|
-
const pidStr = parts[0].trim();
|
|
148
|
-
const exitCodeStr = parts[1].trim();
|
|
149
|
-
const serviceLabel = parts[2].trim();
|
|
150
|
-
|
|
151
|
-
// Match the exact label
|
|
152
|
-
if (serviceLabel === label) {
|
|
153
|
-
const pid = pidStr !== '-' ? parseInt(pidStr, 10) : null;
|
|
154
|
-
const exitCode = exitCodeStr !== '-' ? parseInt(exitCodeStr, 10) : null;
|
|
155
|
-
const isRunning = pid !== null;
|
|
156
|
-
|
|
157
|
-
return {
|
|
158
|
-
isRunning,
|
|
159
|
-
pid,
|
|
160
|
-
exitCode,
|
|
161
|
-
lastExitReason: this.interpretExitCode(exitCode),
|
|
162
|
-
};
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
// Service not found
|
|
168
|
-
return {
|
|
169
|
-
isRunning: false,
|
|
170
|
-
pid: null,
|
|
171
|
-
exitCode: null,
|
|
172
|
-
};
|
|
173
|
-
} catch (error) {
|
|
174
|
-
// Service not found or not loaded
|
|
175
|
-
return {
|
|
176
|
-
isRunning: false,
|
|
177
|
-
pid: null,
|
|
178
|
-
exitCode: null,
|
|
179
|
-
};
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
/**
|
|
184
|
-
* Interpret exit code to human-readable reason
|
|
185
|
-
*/
|
|
186
|
-
private interpretExitCode(code: number | null): string | undefined {
|
|
187
|
-
if (code === null || code === 0) return undefined;
|
|
188
|
-
if (code === -9) return 'Force killed (SIGKILL)';
|
|
189
|
-
if (code === -15) return 'Terminated (SIGTERM)';
|
|
190
|
-
return `Exit code: ${code}`;
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
/**
|
|
194
|
-
* Wait for service to start (with timeout)
|
|
195
|
-
*/
|
|
196
|
-
async waitForServiceStart(label: string, timeoutMs = 5000): Promise<boolean> {
|
|
197
|
-
const startTime = Date.now();
|
|
198
|
-
while (Date.now() - startTime < timeoutMs) {
|
|
199
|
-
const status = await this.getServiceStatus(label);
|
|
200
|
-
if (status.isRunning) {
|
|
201
|
-
return true;
|
|
202
|
-
}
|
|
203
|
-
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
204
|
-
}
|
|
205
|
-
return false;
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
/**
|
|
209
|
-
* Wait for service to stop (with timeout)
|
|
210
|
-
*/
|
|
211
|
-
async waitForServiceStop(label: string, timeoutMs = 5000): Promise<boolean> {
|
|
212
|
-
const startTime = Date.now();
|
|
213
|
-
while (Date.now() - startTime < timeoutMs) {
|
|
214
|
-
const status = await this.getServiceStatus(label);
|
|
215
|
-
if (!status.isRunning) {
|
|
216
|
-
return true;
|
|
217
|
-
}
|
|
218
|
-
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
219
|
-
}
|
|
220
|
-
return false;
|
|
221
|
-
}
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
// Export singleton instance
|
|
225
|
-
export const launchctlManager = new LaunchctlManager();
|
|
@@ -1,257 +0,0 @@
|
|
|
1
|
-
import { ServerConfig } from '../types/server-config.js';
|
|
2
|
-
import { ServerMetrics, SlotInfo, MonitorData } from '../types/monitor-types.js';
|
|
3
|
-
import { statusChecker } from './status-checker.js';
|
|
4
|
-
import { systemCollector } from './system-collector.js';
|
|
5
|
-
import { getProcessMemory, getProcessCpu } from '../utils/process-utils.js';
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Aggregates metrics from llama.cpp server API endpoints
|
|
9
|
-
* Combines server health, slot status, and model properties
|
|
10
|
-
*/
|
|
11
|
-
export class MetricsAggregator {
|
|
12
|
-
private serverUrl: string;
|
|
13
|
-
private timeout: number;
|
|
14
|
-
private previousSlots: Map<number, { n_decoded: number; timestamp: number }> = new Map();
|
|
15
|
-
|
|
16
|
-
constructor(server: ServerConfig, timeout: number = 5000) {
|
|
17
|
-
// Handle null host (legacy configs) by defaulting to 127.0.0.1
|
|
18
|
-
const host = server.host || '127.0.0.1';
|
|
19
|
-
this.serverUrl = `http://${host}:${server.port}`;
|
|
20
|
-
this.timeout = timeout;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* Fetch data from llama.cpp API with timeout
|
|
25
|
-
*/
|
|
26
|
-
private async fetchWithTimeout(
|
|
27
|
-
endpoint: string,
|
|
28
|
-
customTimeout?: number
|
|
29
|
-
): Promise<any | null> {
|
|
30
|
-
try {
|
|
31
|
-
const controller = new AbortController();
|
|
32
|
-
const timeoutMs = customTimeout ?? this.timeout;
|
|
33
|
-
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
34
|
-
|
|
35
|
-
const response = await fetch(`${this.serverUrl}${endpoint}`, {
|
|
36
|
-
signal: controller.signal,
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
clearTimeout(timeoutId);
|
|
40
|
-
|
|
41
|
-
if (!response.ok) {
|
|
42
|
-
return null;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
return await response.json();
|
|
46
|
-
} catch (err) {
|
|
47
|
-
// Network error, timeout, or parse error
|
|
48
|
-
return null;
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/**
|
|
53
|
-
* Get server health status
|
|
54
|
-
*/
|
|
55
|
-
private async getHealth(): Promise<boolean> {
|
|
56
|
-
const health = await this.fetchWithTimeout('/health');
|
|
57
|
-
return health !== null && health.status === 'ok';
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
/**
|
|
61
|
-
* Get server properties (model info, context size, etc.)
|
|
62
|
-
*/
|
|
63
|
-
private async getProps(): Promise<any> {
|
|
64
|
-
return await this.fetchWithTimeout('/props');
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
/**
|
|
68
|
-
* Get active slots information with calculated tok/s
|
|
69
|
-
*/
|
|
70
|
-
private async getSlots(): Promise<SlotInfo[]> {
|
|
71
|
-
const data = await this.fetchWithTimeout('/slots');
|
|
72
|
-
if (!data || !Array.isArray(data)) {
|
|
73
|
-
return [];
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
const now = Date.now();
|
|
77
|
-
|
|
78
|
-
return data.map((slot: any) => {
|
|
79
|
-
const slotId = slot.id;
|
|
80
|
-
const n_decoded = slot.next_token?.[0]?.n_decoded || 0;
|
|
81
|
-
const isProcessing = slot.is_processing;
|
|
82
|
-
|
|
83
|
-
// Calculate tokens per second by comparing with previous poll
|
|
84
|
-
let predicted_per_second: number | undefined;
|
|
85
|
-
|
|
86
|
-
if (isProcessing && n_decoded > 0) {
|
|
87
|
-
const previous = this.previousSlots.get(slotId);
|
|
88
|
-
|
|
89
|
-
if (previous && previous.n_decoded < n_decoded) {
|
|
90
|
-
const tokensGenerated = n_decoded - previous.n_decoded;
|
|
91
|
-
const timeElapsed = (now - previous.timestamp) / 1000; // Convert to seconds
|
|
92
|
-
|
|
93
|
-
if (timeElapsed > 0) {
|
|
94
|
-
predicted_per_second = tokensGenerated / timeElapsed;
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
// Store current state for next comparison
|
|
99
|
-
this.previousSlots.set(slotId, { n_decoded, timestamp: now });
|
|
100
|
-
} else if (!isProcessing) {
|
|
101
|
-
// Clear history when slot becomes idle
|
|
102
|
-
this.previousSlots.delete(slotId);
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
return {
|
|
106
|
-
id: slotId,
|
|
107
|
-
state: isProcessing ? 'processing' : 'idle',
|
|
108
|
-
n_prompt_tokens: slot.n_prompt_tokens,
|
|
109
|
-
n_decoded,
|
|
110
|
-
n_ctx: slot.n_ctx || 0,
|
|
111
|
-
timings: predicted_per_second
|
|
112
|
-
? {
|
|
113
|
-
prompt_n: 0,
|
|
114
|
-
prompt_ms: 0,
|
|
115
|
-
prompt_per_token_ms: 0,
|
|
116
|
-
prompt_per_second: 0,
|
|
117
|
-
predicted_n: n_decoded,
|
|
118
|
-
predicted_ms: 0,
|
|
119
|
-
predicted_per_token_ms: 0,
|
|
120
|
-
predicted_per_second,
|
|
121
|
-
}
|
|
122
|
-
: undefined,
|
|
123
|
-
};
|
|
124
|
-
});
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
/**
|
|
128
|
-
* Aggregate all server metrics
|
|
129
|
-
* @param server - Server configuration
|
|
130
|
-
* @param processMemory - Optional pre-fetched process memory (for batch collection)
|
|
131
|
-
* @param processCpuUsage - Optional pre-fetched process CPU usage (for batch collection)
|
|
132
|
-
*/
|
|
133
|
-
async collectServerMetrics(
|
|
134
|
-
server: ServerConfig,
|
|
135
|
-
processMemory?: number | null,
|
|
136
|
-
processCpuUsage?: number | null
|
|
137
|
-
): Promise<ServerMetrics> {
|
|
138
|
-
const now = Date.now();
|
|
139
|
-
|
|
140
|
-
// Check basic server status first
|
|
141
|
-
const status = await statusChecker.checkServer(server);
|
|
142
|
-
|
|
143
|
-
// Calculate uptime if server is running and has lastStarted
|
|
144
|
-
let uptime: string | undefined;
|
|
145
|
-
if (status.isRunning && server.lastStarted) {
|
|
146
|
-
const startTime = new Date(server.lastStarted).getTime();
|
|
147
|
-
const uptimeSeconds = Math.floor((now - startTime) / 1000);
|
|
148
|
-
const hours = Math.floor(uptimeSeconds / 3600);
|
|
149
|
-
const minutes = Math.floor((uptimeSeconds % 3600) / 60);
|
|
150
|
-
const seconds = uptimeSeconds % 60;
|
|
151
|
-
uptime = `${hours}h ${minutes}m ${seconds}s`;
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
// If server not running, return minimal data
|
|
155
|
-
if (!status.isRunning) {
|
|
156
|
-
return {
|
|
157
|
-
server,
|
|
158
|
-
healthy: false,
|
|
159
|
-
modelLoaded: false,
|
|
160
|
-
modelName: server.modelName,
|
|
161
|
-
contextSize: server.ctxSize,
|
|
162
|
-
totalSlots: 0,
|
|
163
|
-
activeSlots: 0,
|
|
164
|
-
idleSlots: 0,
|
|
165
|
-
slots: [],
|
|
166
|
-
timestamp: now,
|
|
167
|
-
stale: false,
|
|
168
|
-
};
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
// Fetch detailed metrics in parallel
|
|
172
|
-
// If processMemory/CPU were pre-fetched (batch mode), use them; otherwise fetch individually
|
|
173
|
-
const [healthy, props, slots, fetchedMemory, fetchedCpu] = await Promise.all([
|
|
174
|
-
this.getHealth(),
|
|
175
|
-
this.getProps(),
|
|
176
|
-
this.getSlots(),
|
|
177
|
-
processMemory !== undefined
|
|
178
|
-
? Promise.resolve(processMemory)
|
|
179
|
-
: (server.pid ? getProcessMemory(server.pid) : Promise.resolve(null)),
|
|
180
|
-
processCpuUsage !== undefined
|
|
181
|
-
? Promise.resolve(processCpuUsage)
|
|
182
|
-
: (server.pid ? getProcessCpu(server.pid) : Promise.resolve(null)),
|
|
183
|
-
]);
|
|
184
|
-
|
|
185
|
-
// Calculate slot statistics
|
|
186
|
-
const activeSlots = slots.filter((s) => s.state === 'processing').length;
|
|
187
|
-
const idleSlots = slots.filter((s) => s.state === 'idle').length;
|
|
188
|
-
const totalSlots = props?.total_slots || slots.length;
|
|
189
|
-
|
|
190
|
-
// Calculate average speeds (only from processing slots)
|
|
191
|
-
const processingSlots = slots.filter((s) => s.state === 'processing' && s.timings);
|
|
192
|
-
|
|
193
|
-
const avgPromptSpeed =
|
|
194
|
-
processingSlots.length > 0
|
|
195
|
-
? processingSlots.reduce(
|
|
196
|
-
(sum, s) => sum + (s.timings?.prompt_per_second || 0),
|
|
197
|
-
0
|
|
198
|
-
) / processingSlots.length
|
|
199
|
-
: undefined;
|
|
200
|
-
|
|
201
|
-
const avgGenerateSpeed =
|
|
202
|
-
processingSlots.length > 0
|
|
203
|
-
? processingSlots.reduce(
|
|
204
|
-
(sum, s) => sum + (s.timings?.predicted_per_second || 0),
|
|
205
|
-
0
|
|
206
|
-
) / processingSlots.length
|
|
207
|
-
: undefined;
|
|
208
|
-
|
|
209
|
-
// Calculate total memory (CPU + Metal GPU memory if available)
|
|
210
|
-
let totalMemory = fetchedMemory ?? undefined;
|
|
211
|
-
if (totalMemory !== undefined && server.metalMemoryMB) {
|
|
212
|
-
// Add Metal memory (convert MB to bytes)
|
|
213
|
-
totalMemory += server.metalMemoryMB * 1024 * 1024;
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
return {
|
|
217
|
-
server,
|
|
218
|
-
healthy,
|
|
219
|
-
uptime,
|
|
220
|
-
modelLoaded: props !== null,
|
|
221
|
-
modelName: server.modelName,
|
|
222
|
-
contextSize: props?.default_generation_settings?.n_ctx || server.ctxSize,
|
|
223
|
-
totalSlots,
|
|
224
|
-
activeSlots,
|
|
225
|
-
idleSlots,
|
|
226
|
-
slots,
|
|
227
|
-
avgPromptSpeed,
|
|
228
|
-
avgGenerateSpeed,
|
|
229
|
-
processMemory: totalMemory,
|
|
230
|
-
processCpuUsage: fetchedCpu ?? undefined,
|
|
231
|
-
timestamp: now,
|
|
232
|
-
stale: false,
|
|
233
|
-
};
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
/**
|
|
237
|
-
* Collect complete monitoring data (server + system metrics)
|
|
238
|
-
*/
|
|
239
|
-
async collectMonitorData(
|
|
240
|
-
server: ServerConfig,
|
|
241
|
-
updateInterval: number = 2000
|
|
242
|
-
): Promise<MonitorData> {
|
|
243
|
-
// Collect server and system metrics in parallel
|
|
244
|
-
const [serverMetrics, systemMetrics] = await Promise.all([
|
|
245
|
-
this.collectServerMetrics(server),
|
|
246
|
-
systemCollector.collectSystemMetrics(),
|
|
247
|
-
]);
|
|
248
|
-
|
|
249
|
-
return {
|
|
250
|
-
server: serverMetrics,
|
|
251
|
-
system: systemMetrics,
|
|
252
|
-
lastUpdated: new Date(),
|
|
253
|
-
updateInterval,
|
|
254
|
-
consecutiveFailures: 0,
|
|
255
|
-
};
|
|
256
|
-
}
|
|
257
|
-
}
|