@appkit/llamacpp-cli 1.11.0 → 1.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/README.md +572 -170
  2. package/dist/cli.js +99 -0
  3. package/dist/cli.js.map +1 -1
  4. package/dist/commands/admin/config.d.ts +10 -0
  5. package/dist/commands/admin/config.d.ts.map +1 -0
  6. package/dist/commands/admin/config.js +100 -0
  7. package/dist/commands/admin/config.js.map +1 -0
  8. package/dist/commands/admin/logs.d.ts +10 -0
  9. package/dist/commands/admin/logs.d.ts.map +1 -0
  10. package/dist/commands/admin/logs.js +114 -0
  11. package/dist/commands/admin/logs.js.map +1 -0
  12. package/dist/commands/admin/restart.d.ts +2 -0
  13. package/dist/commands/admin/restart.d.ts.map +1 -0
  14. package/dist/commands/admin/restart.js +29 -0
  15. package/dist/commands/admin/restart.js.map +1 -0
  16. package/dist/commands/admin/start.d.ts +2 -0
  17. package/dist/commands/admin/start.d.ts.map +1 -0
  18. package/dist/commands/admin/start.js +30 -0
  19. package/dist/commands/admin/start.js.map +1 -0
  20. package/dist/commands/admin/status.d.ts +2 -0
  21. package/dist/commands/admin/status.d.ts.map +1 -0
  22. package/dist/commands/admin/status.js +82 -0
  23. package/dist/commands/admin/status.js.map +1 -0
  24. package/dist/commands/admin/stop.d.ts +2 -0
  25. package/dist/commands/admin/stop.d.ts.map +1 -0
  26. package/dist/commands/admin/stop.js +21 -0
  27. package/dist/commands/admin/stop.js.map +1 -0
  28. package/dist/commands/logs.d.ts +1 -0
  29. package/dist/commands/logs.d.ts.map +1 -1
  30. package/dist/commands/logs.js +22 -0
  31. package/dist/commands/logs.js.map +1 -1
  32. package/dist/lib/admin-manager.d.ts +111 -0
  33. package/dist/lib/admin-manager.d.ts.map +1 -0
  34. package/dist/lib/admin-manager.js +413 -0
  35. package/dist/lib/admin-manager.js.map +1 -0
  36. package/dist/lib/admin-server.d.ts +148 -0
  37. package/dist/lib/admin-server.d.ts.map +1 -0
  38. package/dist/lib/admin-server.js +1161 -0
  39. package/dist/lib/admin-server.js.map +1 -0
  40. package/dist/lib/download-job-manager.d.ts +64 -0
  41. package/dist/lib/download-job-manager.d.ts.map +1 -0
  42. package/dist/lib/download-job-manager.js +164 -0
  43. package/dist/lib/download-job-manager.js.map +1 -0
  44. package/dist/tui/MultiServerMonitorApp.js +1 -1
  45. package/dist/types/admin-config.d.ts +19 -0
  46. package/dist/types/admin-config.d.ts.map +1 -0
  47. package/dist/types/admin-config.js +3 -0
  48. package/dist/types/admin-config.js.map +1 -0
  49. package/dist/utils/log-parser.d.ts +9 -0
  50. package/dist/utils/log-parser.d.ts.map +1 -1
  51. package/dist/utils/log-parser.js +11 -0
  52. package/dist/utils/log-parser.js.map +1 -1
  53. package/package.json +10 -2
  54. package/web/README.md +429 -0
  55. package/web/dist/assets/index-Bin89Lwr.css +1 -0
  56. package/web/dist/assets/index-CVmonw3T.js +17 -0
  57. package/web/dist/index.html +14 -0
  58. package/web/dist/vite.svg +1 -0
  59. package/.versionrc.json +0 -16
  60. package/CHANGELOG.md +0 -203
  61. package/MONITORING-ACCURACY-FIX.md +0 -199
  62. package/PER-PROCESS-METRICS.md +0 -190
  63. package/docs/images/.gitkeep +0 -1
  64. package/src/cli.ts +0 -423
  65. package/src/commands/config-global.ts +0 -38
  66. package/src/commands/config.ts +0 -323
  67. package/src/commands/create.ts +0 -183
  68. package/src/commands/delete.ts +0 -74
  69. package/src/commands/list.ts +0 -37
  70. package/src/commands/logs-all.ts +0 -251
  71. package/src/commands/logs.ts +0 -321
  72. package/src/commands/monitor.ts +0 -110
  73. package/src/commands/ps.ts +0 -84
  74. package/src/commands/pull.ts +0 -44
  75. package/src/commands/rm.ts +0 -107
  76. package/src/commands/router/config.ts +0 -116
  77. package/src/commands/router/logs.ts +0 -256
  78. package/src/commands/router/restart.ts +0 -36
  79. package/src/commands/router/start.ts +0 -60
  80. package/src/commands/router/status.ts +0 -119
  81. package/src/commands/router/stop.ts +0 -33
  82. package/src/commands/run.ts +0 -233
  83. package/src/commands/search.ts +0 -107
  84. package/src/commands/server-show.ts +0 -161
  85. package/src/commands/show.ts +0 -207
  86. package/src/commands/start.ts +0 -101
  87. package/src/commands/stop.ts +0 -39
  88. package/src/commands/tui.ts +0 -25
  89. package/src/lib/config-generator.ts +0 -130
  90. package/src/lib/history-manager.ts +0 -172
  91. package/src/lib/launchctl-manager.ts +0 -225
  92. package/src/lib/metrics-aggregator.ts +0 -257
  93. package/src/lib/model-downloader.ts +0 -328
  94. package/src/lib/model-scanner.ts +0 -157
  95. package/src/lib/model-search.ts +0 -114
  96. package/src/lib/models-dir-setup.ts +0 -46
  97. package/src/lib/port-manager.ts +0 -80
  98. package/src/lib/router-logger.ts +0 -201
  99. package/src/lib/router-manager.ts +0 -414
  100. package/src/lib/router-server.ts +0 -538
  101. package/src/lib/state-manager.ts +0 -206
  102. package/src/lib/status-checker.ts +0 -113
  103. package/src/lib/system-collector.ts +0 -315
  104. package/src/tui/ConfigApp.ts +0 -1085
  105. package/src/tui/HistoricalMonitorApp.ts +0 -587
  106. package/src/tui/ModelsApp.ts +0 -368
  107. package/src/tui/MonitorApp.ts +0 -386
  108. package/src/tui/MultiServerMonitorApp.ts +0 -1833
  109. package/src/tui/RootNavigator.ts +0 -74
  110. package/src/tui/SearchApp.ts +0 -511
  111. package/src/tui/SplashScreen.ts +0 -149
  112. package/src/types/global-config.ts +0 -26
  113. package/src/types/history-types.ts +0 -39
  114. package/src/types/model-info.ts +0 -8
  115. package/src/types/monitor-types.ts +0 -162
  116. package/src/types/router-config.ts +0 -25
  117. package/src/types/server-config.ts +0 -46
  118. package/src/utils/downsample-utils.ts +0 -128
  119. package/src/utils/file-utils.ts +0 -146
  120. package/src/utils/format-utils.ts +0 -98
  121. package/src/utils/log-parser.ts +0 -271
  122. package/src/utils/log-utils.ts +0 -178
  123. package/src/utils/process-utils.ts +0 -316
  124. package/src/utils/prompt-utils.ts +0 -47
  125. package/test-load.sh +0 -100
  126. package/tsconfig.json +0 -20
@@ -1,172 +0,0 @@
1
- import { mkdir, readFile, writeFile, access, rename } from 'fs/promises';
2
- import { join } from 'path';
3
- import { homedir } from 'os';
4
- import { ServerMetrics, SystemMetrics } from '../types/monitor-types.js';
5
- import { HistoryData, HistorySnapshot, TIME_WINDOW_HOURS, TimeWindow } from '../types/history-types.js';
6
-
7
- export class HistoryManager {
8
- private serverId: string;
9
- private historyDir: string;
10
- private historyPath: string;
11
- private readonly MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24 hours
12
-
13
- constructor(serverId: string) {
14
- this.serverId = serverId;
15
- this.historyDir = join(homedir(), '.llamacpp', 'history');
16
- this.historyPath = join(this.historyDir, `${serverId}.json`);
17
- }
18
-
19
- /**
20
- * Append a new snapshot to history (with auto-pruning)
21
- */
22
- async appendSnapshot(serverMetrics: ServerMetrics, systemMetrics?: SystemMetrics): Promise<void> {
23
- try {
24
- // Ensure history directory exists
25
- await mkdir(this.historyDir, { recursive: true });
26
-
27
- // Load existing history
28
- const historyData = await this.loadHistoryData();
29
-
30
- // Create new snapshot
31
- const snapshot: HistorySnapshot = {
32
- timestamp: Date.now(),
33
- server: {
34
- healthy: serverMetrics.healthy,
35
- uptime: serverMetrics.uptime,
36
- activeSlots: serverMetrics.activeSlots,
37
- idleSlots: serverMetrics.idleSlots,
38
- totalSlots: serverMetrics.totalSlots,
39
- avgPromptSpeed: serverMetrics.avgPromptSpeed,
40
- avgGenerateSpeed: serverMetrics.avgGenerateSpeed,
41
- processMemory: serverMetrics.processMemory,
42
- processCpuUsage: serverMetrics.processCpuUsage,
43
- },
44
- system: systemMetrics ? {
45
- gpuUsage: systemMetrics.gpuUsage,
46
- cpuUsage: systemMetrics.cpuUsage,
47
- aneUsage: systemMetrics.aneUsage,
48
- temperature: systemMetrics.temperature,
49
- memoryUsed: systemMetrics.memoryUsed,
50
- memoryTotal: systemMetrics.memoryTotal,
51
- } : undefined,
52
- };
53
-
54
- // Append new snapshot
55
- historyData.snapshots.push(snapshot);
56
-
57
- // Prune old snapshots (keep only last 24h)
58
- historyData.snapshots = this.pruneOldSnapshots(historyData.snapshots, this.MAX_AGE_MS);
59
-
60
- // Atomic write: write to temp file in same directory, then rename
61
- // This prevents read collisions during concurrent access
62
- // IMPORTANT: temp file MUST be in same directory as destination for rename to work across filesystems
63
- const tempPath = join(this.historyDir, `.${this.serverId}-${Date.now()}.tmp`);
64
- await writeFile(tempPath, JSON.stringify(historyData, null, 2), 'utf-8');
65
- await rename(tempPath, this.historyPath);
66
- } catch (error) {
67
- // Silent failure - don't interrupt monitoring
68
- // Don't throw - just return silently to avoid polluting console
69
- return;
70
- }
71
- }
72
-
73
- /**
74
- * Load all snapshots within specified time window
75
- */
76
- async loadHistory(windowHours: number): Promise<HistorySnapshot[]> {
77
- // Retry logic for file I/O collisions during concurrent read/write
78
- const maxRetries = 3;
79
- let lastError: Error | null = null;
80
-
81
- for (let attempt = 0; attempt < maxRetries; attempt++) {
82
- try {
83
- const historyData = await this.loadHistoryData();
84
- return this.filterByTimeWindow(historyData.snapshots, windowHours);
85
- } catch (error) {
86
- lastError = error as Error;
87
- // Wait briefly before retry (exponential backoff)
88
- if (attempt < maxRetries - 1) {
89
- await new Promise(resolve => setTimeout(resolve, 50 * Math.pow(2, attempt)));
90
- }
91
- }
92
- }
93
-
94
- // All retries failed - throw error so it can be handled upstream
95
- throw new Error(`Failed to load history after ${maxRetries} attempts: ${lastError?.message || 'Unknown error'}`);
96
- }
97
-
98
- /**
99
- * Load history for specific time window type
100
- */
101
- async loadHistoryByWindow(window: TimeWindow): Promise<HistorySnapshot[]> {
102
- return this.loadHistory(TIME_WINDOW_HOURS[window]);
103
- }
104
-
105
- /**
106
- * Get file path for server history
107
- */
108
- getHistoryPath(): string {
109
- return this.historyPath;
110
- }
111
-
112
- /**
113
- * Check if history file exists
114
- */
115
- async hasHistory(): Promise<boolean> {
116
- try {
117
- await access(this.historyPath);
118
- return true;
119
- } catch {
120
- return false;
121
- }
122
- }
123
-
124
- /**
125
- * Clear all history for server
126
- */
127
- async clearHistory(): Promise<void> {
128
- const emptyHistory: HistoryData = {
129
- serverId: this.serverId,
130
- snapshots: [],
131
- };
132
-
133
- await mkdir(this.historyDir, { recursive: true });
134
-
135
- // Atomic write - temp file in same directory as destination
136
- const tempPath = join(this.historyDir, `.${this.serverId}-${Date.now()}.tmp`);
137
- await writeFile(tempPath, JSON.stringify(emptyHistory, null, 2), 'utf-8');
138
- await rename(tempPath, this.historyPath);
139
- }
140
-
141
- /**
142
- * Load full history data from file
143
- */
144
- private async loadHistoryData(): Promise<HistoryData> {
145
- try {
146
- const content = await readFile(this.historyPath, 'utf-8');
147
- return JSON.parse(content) as HistoryData;
148
- } catch (error) {
149
- // File doesn't exist or is corrupted, return empty history
150
- return {
151
- serverId: this.serverId,
152
- snapshots: [],
153
- };
154
- }
155
- }
156
-
157
- /**
158
- * Prune snapshots older than maxAge
159
- */
160
- private pruneOldSnapshots(snapshots: HistorySnapshot[], maxAgeMs: number): HistorySnapshot[] {
161
- const cutoff = Date.now() - maxAgeMs;
162
- return snapshots.filter(s => s.timestamp >= cutoff);
163
- }
164
-
165
- /**
166
- * Filter snapshots by time window
167
- */
168
- private filterByTimeWindow(snapshots: HistorySnapshot[], windowHours: number): HistorySnapshot[] {
169
- const cutoff = Date.now() - (windowHours * 60 * 60 * 1000);
170
- return snapshots.filter(s => s.timestamp >= cutoff);
171
- }
172
- }
@@ -1,225 +0,0 @@
1
- import * as path from 'path';
2
- import * as fs from 'fs/promises';
3
- import { ServerConfig } from '../types/server-config';
4
- import { execCommand, execAsync } from '../utils/process-utils';
5
- import { writeFileAtomic, fileExists } from '../utils/file-utils';
6
-
7
- export interface ServiceStatus {
8
- isRunning: boolean;
9
- pid: number | null;
10
- exitCode: number | null;
11
- lastExitReason?: string;
12
- }
13
-
14
- export class LaunchctlManager {
15
- /**
16
- * Generate plist XML content for a server
17
- */
18
- generatePlist(config: ServerConfig): string {
19
- // Build program arguments array
20
- const args = [
21
- '/opt/homebrew/bin/llama-server',
22
- '--model', config.modelPath,
23
- '--host', config.host,
24
- '--port', config.port.toString(),
25
- '--threads', config.threads.toString(),
26
- '--ctx-size', config.ctxSize.toString(),
27
- '--gpu-layers', config.gpuLayers.toString(),
28
- ];
29
-
30
- // Add flags
31
- if (config.embeddings) args.push('--embeddings');
32
- if (config.jinja) args.push('--jinja');
33
-
34
- // Conditionally enable verbose HTTP logging for detailed request/response info
35
- if (config.verbose) {
36
- args.push('--log-verbose');
37
- }
38
-
39
- // Add custom flags
40
- if (config.customFlags && config.customFlags.length > 0) {
41
- args.push(...config.customFlags);
42
- }
43
-
44
- // Generate XML array elements
45
- const argsXml = args.map(arg => ` <string>${arg}</string>`).join('\n');
46
-
47
- return `<?xml version="1.0" encoding="UTF-8"?>
48
- <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
49
- "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
50
- <plist version="1.0">
51
- <dict>
52
- <key>Label</key>
53
- <string>${config.label}</string>
54
-
55
- <key>ProgramArguments</key>
56
- <array>
57
- ${argsXml}
58
- </array>
59
-
60
- <key>RunAtLoad</key>
61
- <false/>
62
-
63
- <key>KeepAlive</key>
64
- <dict>
65
- <key>Crashed</key>
66
- <true/>
67
- <key>SuccessfulExit</key>
68
- <false/>
69
- </dict>
70
-
71
- <key>StandardOutPath</key>
72
- <string>${config.stdoutPath}</string>
73
-
74
- <key>StandardErrorPath</key>
75
- <string>${config.stderrPath}</string>
76
-
77
- <key>WorkingDirectory</key>
78
- <string>/tmp</string>
79
-
80
- <key>ThrottleInterval</key>
81
- <integer>10</integer>
82
- </dict>
83
- </plist>
84
- `;
85
- }
86
-
87
- /**
88
- * Create and write plist file
89
- */
90
- async createPlist(config: ServerConfig): Promise<void> {
91
- const plistContent = this.generatePlist(config);
92
- await writeFileAtomic(config.plistPath, plistContent);
93
- }
94
-
95
- /**
96
- * Delete plist file
97
- */
98
- async deletePlist(plistPath: string): Promise<void> {
99
- if (await fileExists(plistPath)) {
100
- await fs.unlink(plistPath);
101
- }
102
- }
103
-
104
- /**
105
- * Load service (register with launchctl)
106
- */
107
- async loadService(plistPath: string): Promise<void> {
108
- await execCommand(`launchctl load "${plistPath}"`);
109
- }
110
-
111
- /**
112
- * Unload service (unregister from launchctl)
113
- */
114
- async unloadService(plistPath: string): Promise<void> {
115
- try {
116
- await execCommand(`launchctl unload "${plistPath}"`);
117
- } catch (error) {
118
- // Ignore errors if service is not loaded
119
- }
120
- }
121
-
122
- /**
123
- * Start service
124
- */
125
- async startService(label: string): Promise<void> {
126
- await execCommand(`launchctl start ${label}`);
127
- }
128
-
129
- /**
130
- * Stop service
131
- */
132
- async stopService(label: string): Promise<void> {
133
- await execCommand(`launchctl stop ${label}`);
134
- }
135
-
136
- /**
137
- * Get service status from launchctl
138
- */
139
- async getServiceStatus(label: string): Promise<ServiceStatus> {
140
- try {
141
- const { stdout } = await execAsync(`launchctl list | grep ${label}`);
142
- const lines = stdout.trim().split('\n');
143
-
144
- for (const line of lines) {
145
- const parts = line.split(/\s+/);
146
- if (parts.length >= 3) {
147
- const pidStr = parts[0].trim();
148
- const exitCodeStr = parts[1].trim();
149
- const serviceLabel = parts[2].trim();
150
-
151
- // Match the exact label
152
- if (serviceLabel === label) {
153
- const pid = pidStr !== '-' ? parseInt(pidStr, 10) : null;
154
- const exitCode = exitCodeStr !== '-' ? parseInt(exitCodeStr, 10) : null;
155
- const isRunning = pid !== null;
156
-
157
- return {
158
- isRunning,
159
- pid,
160
- exitCode,
161
- lastExitReason: this.interpretExitCode(exitCode),
162
- };
163
- }
164
- }
165
- }
166
-
167
- // Service not found
168
- return {
169
- isRunning: false,
170
- pid: null,
171
- exitCode: null,
172
- };
173
- } catch (error) {
174
- // Service not found or not loaded
175
- return {
176
- isRunning: false,
177
- pid: null,
178
- exitCode: null,
179
- };
180
- }
181
- }
182
-
183
- /**
184
- * Interpret exit code to human-readable reason
185
- */
186
- private interpretExitCode(code: number | null): string | undefined {
187
- if (code === null || code === 0) return undefined;
188
- if (code === -9) return 'Force killed (SIGKILL)';
189
- if (code === -15) return 'Terminated (SIGTERM)';
190
- return `Exit code: ${code}`;
191
- }
192
-
193
- /**
194
- * Wait for service to start (with timeout)
195
- */
196
- async waitForServiceStart(label: string, timeoutMs = 5000): Promise<boolean> {
197
- const startTime = Date.now();
198
- while (Date.now() - startTime < timeoutMs) {
199
- const status = await this.getServiceStatus(label);
200
- if (status.isRunning) {
201
- return true;
202
- }
203
- await new Promise((resolve) => setTimeout(resolve, 500));
204
- }
205
- return false;
206
- }
207
-
208
- /**
209
- * Wait for service to stop (with timeout)
210
- */
211
- async waitForServiceStop(label: string, timeoutMs = 5000): Promise<boolean> {
212
- const startTime = Date.now();
213
- while (Date.now() - startTime < timeoutMs) {
214
- const status = await this.getServiceStatus(label);
215
- if (!status.isRunning) {
216
- return true;
217
- }
218
- await new Promise((resolve) => setTimeout(resolve, 500));
219
- }
220
- return false;
221
- }
222
- }
223
-
224
- // Export singleton instance
225
- export const launchctlManager = new LaunchctlManager();
@@ -1,257 +0,0 @@
1
- import { ServerConfig } from '../types/server-config.js';
2
- import { ServerMetrics, SlotInfo, MonitorData } from '../types/monitor-types.js';
3
- import { statusChecker } from './status-checker.js';
4
- import { systemCollector } from './system-collector.js';
5
- import { getProcessMemory, getProcessCpu } from '../utils/process-utils.js';
6
-
7
- /**
8
- * Aggregates metrics from llama.cpp server API endpoints
9
- * Combines server health, slot status, and model properties
10
- */
11
- export class MetricsAggregator {
12
- private serverUrl: string;
13
- private timeout: number;
14
- private previousSlots: Map<number, { n_decoded: number; timestamp: number }> = new Map();
15
-
16
- constructor(server: ServerConfig, timeout: number = 5000) {
17
- // Handle null host (legacy configs) by defaulting to 127.0.0.1
18
- const host = server.host || '127.0.0.1';
19
- this.serverUrl = `http://${host}:${server.port}`;
20
- this.timeout = timeout;
21
- }
22
-
23
- /**
24
- * Fetch data from llama.cpp API with timeout
25
- */
26
- private async fetchWithTimeout(
27
- endpoint: string,
28
- customTimeout?: number
29
- ): Promise<any | null> {
30
- try {
31
- const controller = new AbortController();
32
- const timeoutMs = customTimeout ?? this.timeout;
33
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
34
-
35
- const response = await fetch(`${this.serverUrl}${endpoint}`, {
36
- signal: controller.signal,
37
- });
38
-
39
- clearTimeout(timeoutId);
40
-
41
- if (!response.ok) {
42
- return null;
43
- }
44
-
45
- return await response.json();
46
- } catch (err) {
47
- // Network error, timeout, or parse error
48
- return null;
49
- }
50
- }
51
-
52
- /**
53
- * Get server health status
54
- */
55
- private async getHealth(): Promise<boolean> {
56
- const health = await this.fetchWithTimeout('/health');
57
- return health !== null && health.status === 'ok';
58
- }
59
-
60
- /**
61
- * Get server properties (model info, context size, etc.)
62
- */
63
- private async getProps(): Promise<any> {
64
- return await this.fetchWithTimeout('/props');
65
- }
66
-
67
- /**
68
- * Get active slots information with calculated tok/s
69
- */
70
- private async getSlots(): Promise<SlotInfo[]> {
71
- const data = await this.fetchWithTimeout('/slots');
72
- if (!data || !Array.isArray(data)) {
73
- return [];
74
- }
75
-
76
- const now = Date.now();
77
-
78
- return data.map((slot: any) => {
79
- const slotId = slot.id;
80
- const n_decoded = slot.next_token?.[0]?.n_decoded || 0;
81
- const isProcessing = slot.is_processing;
82
-
83
- // Calculate tokens per second by comparing with previous poll
84
- let predicted_per_second: number | undefined;
85
-
86
- if (isProcessing && n_decoded > 0) {
87
- const previous = this.previousSlots.get(slotId);
88
-
89
- if (previous && previous.n_decoded < n_decoded) {
90
- const tokensGenerated = n_decoded - previous.n_decoded;
91
- const timeElapsed = (now - previous.timestamp) / 1000; // Convert to seconds
92
-
93
- if (timeElapsed > 0) {
94
- predicted_per_second = tokensGenerated / timeElapsed;
95
- }
96
- }
97
-
98
- // Store current state for next comparison
99
- this.previousSlots.set(slotId, { n_decoded, timestamp: now });
100
- } else if (!isProcessing) {
101
- // Clear history when slot becomes idle
102
- this.previousSlots.delete(slotId);
103
- }
104
-
105
- return {
106
- id: slotId,
107
- state: isProcessing ? 'processing' : 'idle',
108
- n_prompt_tokens: slot.n_prompt_tokens,
109
- n_decoded,
110
- n_ctx: slot.n_ctx || 0,
111
- timings: predicted_per_second
112
- ? {
113
- prompt_n: 0,
114
- prompt_ms: 0,
115
- prompt_per_token_ms: 0,
116
- prompt_per_second: 0,
117
- predicted_n: n_decoded,
118
- predicted_ms: 0,
119
- predicted_per_token_ms: 0,
120
- predicted_per_second,
121
- }
122
- : undefined,
123
- };
124
- });
125
- }
126
-
127
- /**
128
- * Aggregate all server metrics
129
- * @param server - Server configuration
130
- * @param processMemory - Optional pre-fetched process memory (for batch collection)
131
- * @param processCpuUsage - Optional pre-fetched process CPU usage (for batch collection)
132
- */
133
- async collectServerMetrics(
134
- server: ServerConfig,
135
- processMemory?: number | null,
136
- processCpuUsage?: number | null
137
- ): Promise<ServerMetrics> {
138
- const now = Date.now();
139
-
140
- // Check basic server status first
141
- const status = await statusChecker.checkServer(server);
142
-
143
- // Calculate uptime if server is running and has lastStarted
144
- let uptime: string | undefined;
145
- if (status.isRunning && server.lastStarted) {
146
- const startTime = new Date(server.lastStarted).getTime();
147
- const uptimeSeconds = Math.floor((now - startTime) / 1000);
148
- const hours = Math.floor(uptimeSeconds / 3600);
149
- const minutes = Math.floor((uptimeSeconds % 3600) / 60);
150
- const seconds = uptimeSeconds % 60;
151
- uptime = `${hours}h ${minutes}m ${seconds}s`;
152
- }
153
-
154
- // If server not running, return minimal data
155
- if (!status.isRunning) {
156
- return {
157
- server,
158
- healthy: false,
159
- modelLoaded: false,
160
- modelName: server.modelName,
161
- contextSize: server.ctxSize,
162
- totalSlots: 0,
163
- activeSlots: 0,
164
- idleSlots: 0,
165
- slots: [],
166
- timestamp: now,
167
- stale: false,
168
- };
169
- }
170
-
171
- // Fetch detailed metrics in parallel
172
- // If processMemory/CPU were pre-fetched (batch mode), use them; otherwise fetch individually
173
- const [healthy, props, slots, fetchedMemory, fetchedCpu] = await Promise.all([
174
- this.getHealth(),
175
- this.getProps(),
176
- this.getSlots(),
177
- processMemory !== undefined
178
- ? Promise.resolve(processMemory)
179
- : (server.pid ? getProcessMemory(server.pid) : Promise.resolve(null)),
180
- processCpuUsage !== undefined
181
- ? Promise.resolve(processCpuUsage)
182
- : (server.pid ? getProcessCpu(server.pid) : Promise.resolve(null)),
183
- ]);
184
-
185
- // Calculate slot statistics
186
- const activeSlots = slots.filter((s) => s.state === 'processing').length;
187
- const idleSlots = slots.filter((s) => s.state === 'idle').length;
188
- const totalSlots = props?.total_slots || slots.length;
189
-
190
- // Calculate average speeds (only from processing slots)
191
- const processingSlots = slots.filter((s) => s.state === 'processing' && s.timings);
192
-
193
- const avgPromptSpeed =
194
- processingSlots.length > 0
195
- ? processingSlots.reduce(
196
- (sum, s) => sum + (s.timings?.prompt_per_second || 0),
197
- 0
198
- ) / processingSlots.length
199
- : undefined;
200
-
201
- const avgGenerateSpeed =
202
- processingSlots.length > 0
203
- ? processingSlots.reduce(
204
- (sum, s) => sum + (s.timings?.predicted_per_second || 0),
205
- 0
206
- ) / processingSlots.length
207
- : undefined;
208
-
209
- // Calculate total memory (CPU + Metal GPU memory if available)
210
- let totalMemory = fetchedMemory ?? undefined;
211
- if (totalMemory !== undefined && server.metalMemoryMB) {
212
- // Add Metal memory (convert MB to bytes)
213
- totalMemory += server.metalMemoryMB * 1024 * 1024;
214
- }
215
-
216
- return {
217
- server,
218
- healthy,
219
- uptime,
220
- modelLoaded: props !== null,
221
- modelName: server.modelName,
222
- contextSize: props?.default_generation_settings?.n_ctx || server.ctxSize,
223
- totalSlots,
224
- activeSlots,
225
- idleSlots,
226
- slots,
227
- avgPromptSpeed,
228
- avgGenerateSpeed,
229
- processMemory: totalMemory,
230
- processCpuUsage: fetchedCpu ?? undefined,
231
- timestamp: now,
232
- stale: false,
233
- };
234
- }
235
-
236
- /**
237
- * Collect complete monitoring data (server + system metrics)
238
- */
239
- async collectMonitorData(
240
- server: ServerConfig,
241
- updateInterval: number = 2000
242
- ): Promise<MonitorData> {
243
- // Collect server and system metrics in parallel
244
- const [serverMetrics, systemMetrics] = await Promise.all([
245
- this.collectServerMetrics(server),
246
- systemCollector.collectSystemMetrics(),
247
- ]);
248
-
249
- return {
250
- server: serverMetrics,
251
- system: systemMetrics,
252
- lastUpdated: new Date(),
253
- updateInterval,
254
- consecutiveFailures: 0,
255
- };
256
- }
257
- }