@appkit/llamacpp-cli 1.12.0 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +294 -168
- package/dist/cli.js +35 -0
- package/dist/cli.js.map +1 -1
- package/dist/commands/launch/claude.d.ts +6 -0
- package/dist/commands/launch/claude.d.ts.map +1 -0
- package/dist/commands/launch/claude.js +277 -0
- package/dist/commands/launch/claude.js.map +1 -0
- package/dist/lib/integration-checker.d.ts +26 -0
- package/dist/lib/integration-checker.d.ts.map +1 -0
- package/dist/lib/integration-checker.js +77 -0
- package/dist/lib/integration-checker.js.map +1 -0
- package/dist/lib/router-manager.d.ts +4 -0
- package/dist/lib/router-manager.d.ts.map +1 -1
- package/dist/lib/router-manager.js +10 -0
- package/dist/lib/router-manager.js.map +1 -1
- package/dist/lib/router-server.d.ts +13 -0
- package/dist/lib/router-server.d.ts.map +1 -1
- package/dist/lib/router-server.js +267 -7
- package/dist/lib/router-server.js.map +1 -1
- package/dist/types/integration-config.d.ts +28 -0
- package/dist/types/integration-config.d.ts.map +1 -0
- package/dist/types/integration-config.js +3 -0
- package/dist/types/integration-config.js.map +1 -0
- package/package.json +10 -2
- package/web/dist/assets/index-Bin89Lwr.css +1 -0
- package/web/dist/assets/index-CVmonw3T.js +17 -0
- package/web/{index.html → dist/index.html} +2 -1
- package/.versionrc.json +0 -16
- package/CHANGELOG.md +0 -213
- package/docs/images/.gitkeep +0 -1
- package/docs/images/web-ui-servers.png +0 -0
- package/src/cli.ts +0 -523
- package/src/commands/admin/config.ts +0 -121
- package/src/commands/admin/logs.ts +0 -91
- package/src/commands/admin/restart.ts +0 -26
- package/src/commands/admin/start.ts +0 -27
- package/src/commands/admin/status.ts +0 -84
- package/src/commands/admin/stop.ts +0 -16
- package/src/commands/config-global.ts +0 -38
- package/src/commands/config.ts +0 -323
- package/src/commands/create.ts +0 -183
- package/src/commands/delete.ts +0 -74
- package/src/commands/list.ts +0 -37
- package/src/commands/logs-all.ts +0 -251
- package/src/commands/logs.ts +0 -345
- package/src/commands/monitor.ts +0 -110
- package/src/commands/ps.ts +0 -84
- package/src/commands/pull.ts +0 -44
- package/src/commands/rm.ts +0 -107
- package/src/commands/router/config.ts +0 -116
- package/src/commands/router/logs.ts +0 -256
- package/src/commands/router/restart.ts +0 -36
- package/src/commands/router/start.ts +0 -60
- package/src/commands/router/status.ts +0 -119
- package/src/commands/router/stop.ts +0 -33
- package/src/commands/run.ts +0 -233
- package/src/commands/search.ts +0 -107
- package/src/commands/server-show.ts +0 -161
- package/src/commands/show.ts +0 -207
- package/src/commands/start.ts +0 -101
- package/src/commands/stop.ts +0 -39
- package/src/commands/tui.ts +0 -25
- package/src/lib/admin-manager.ts +0 -435
- package/src/lib/admin-server.ts +0 -1243
- package/src/lib/config-generator.ts +0 -130
- package/src/lib/download-job-manager.ts +0 -213
- package/src/lib/history-manager.ts +0 -172
- package/src/lib/launchctl-manager.ts +0 -225
- package/src/lib/metrics-aggregator.ts +0 -257
- package/src/lib/model-downloader.ts +0 -328
- package/src/lib/model-scanner.ts +0 -157
- package/src/lib/model-search.ts +0 -114
- package/src/lib/models-dir-setup.ts +0 -46
- package/src/lib/port-manager.ts +0 -80
- package/src/lib/router-logger.ts +0 -201
- package/src/lib/router-manager.ts +0 -414
- package/src/lib/router-server.ts +0 -538
- package/src/lib/state-manager.ts +0 -206
- package/src/lib/status-checker.ts +0 -113
- package/src/lib/system-collector.ts +0 -315
- package/src/tui/ConfigApp.ts +0 -1085
- package/src/tui/HistoricalMonitorApp.ts +0 -587
- package/src/tui/ModelsApp.ts +0 -368
- package/src/tui/MonitorApp.ts +0 -386
- package/src/tui/MultiServerMonitorApp.ts +0 -1833
- package/src/tui/RootNavigator.ts +0 -74
- package/src/tui/SearchApp.ts +0 -511
- package/src/tui/SplashScreen.ts +0 -149
- package/src/types/admin-config.ts +0 -25
- package/src/types/global-config.ts +0 -26
- package/src/types/history-types.ts +0 -39
- package/src/types/model-info.ts +0 -8
- package/src/types/monitor-types.ts +0 -162
- package/src/types/router-config.ts +0 -25
- package/src/types/server-config.ts +0 -46
- package/src/utils/downsample-utils.ts +0 -128
- package/src/utils/file-utils.ts +0 -146
- package/src/utils/format-utils.ts +0 -98
- package/src/utils/log-parser.ts +0 -284
- package/src/utils/log-utils.ts +0 -178
- package/src/utils/process-utils.ts +0 -316
- package/src/utils/prompt-utils.ts +0 -47
- package/test-load.sh +0 -100
- package/tsconfig.json +0 -20
- package/web/eslint.config.js +0 -23
- package/web/llamacpp-web-dist.tar.gz +0 -0
- package/web/package-lock.json +0 -4017
- package/web/package.json +0 -38
- package/web/postcss.config.js +0 -6
- package/web/src/App.css +0 -42
- package/web/src/App.tsx +0 -86
- package/web/src/assets/react.svg +0 -1
- package/web/src/components/ApiKeyPrompt.tsx +0 -71
- package/web/src/components/CreateServerModal.tsx +0 -372
- package/web/src/components/DownloadProgress.tsx +0 -123
- package/web/src/components/Nav.tsx +0 -89
- package/web/src/components/RouterConfigModal.tsx +0 -240
- package/web/src/components/SearchModal.tsx +0 -306
- package/web/src/components/ServerConfigModal.tsx +0 -291
- package/web/src/hooks/useApi.ts +0 -259
- package/web/src/index.css +0 -42
- package/web/src/lib/api.ts +0 -226
- package/web/src/main.tsx +0 -10
- package/web/src/pages/Dashboard.tsx +0 -103
- package/web/src/pages/Models.tsx +0 -258
- package/web/src/pages/Router.tsx +0 -270
- package/web/src/pages/RouterLogs.tsx +0 -201
- package/web/src/pages/ServerLogs.tsx +0 -553
- package/web/src/pages/Servers.tsx +0 -358
- package/web/src/types/api.ts +0 -140
- package/web/tailwind.config.js +0 -31
- package/web/tsconfig.app.json +0 -28
- package/web/tsconfig.json +0 -7
- package/web/tsconfig.node.json +0 -26
- package/web/vite.config.ts +0 -25
- /package/web/{public → dist}/vite.svg +0 -0
package/src/tui/SplashScreen.ts
DELETED
|
@@ -1,149 +0,0 @@
|
|
|
1
|
-
import blessed from "blessed";
|
|
2
|
-
import { readFileSync } from "fs";
|
|
3
|
-
import { join } from "path";
|
|
4
|
-
|
|
5
|
-
// Get version from package.json at runtime
|
|
6
|
-
function getVersion(): string {
|
|
7
|
-
try {
|
|
8
|
-
// Try to find package.json relative to this file's location
|
|
9
|
-
// Works both in src/ (dev) and dist/ (production)
|
|
10
|
-
const possiblePaths = [
|
|
11
|
-
join(__dirname, "../../package.json"), // From dist/tui/
|
|
12
|
-
join(__dirname, "../../../package.json"), // Fallback
|
|
13
|
-
];
|
|
14
|
-
|
|
15
|
-
for (const pkgPath of possiblePaths) {
|
|
16
|
-
try {
|
|
17
|
-
const content = readFileSync(pkgPath, "utf-8");
|
|
18
|
-
const pkg = JSON.parse(content);
|
|
19
|
-
if (pkg.version) return pkg.version;
|
|
20
|
-
} catch {
|
|
21
|
-
continue;
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
return "1.0.0";
|
|
25
|
-
} catch {
|
|
26
|
-
return "1.0.0";
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
// ASCII art logo for llama.cpp
|
|
31
|
-
const LOGO = `
|
|
32
|
-
{cyan-fg} ██╗ ██╗ █████╗ ███╗ ███╗ █████╗ ██████╗██████╗ ██████╗{/cyan-fg}
|
|
33
|
-
{cyan-fg}██║ ██║ ██╔══██╗████╗ ████║██╔══██╗ ██╔════╝██╔══██╗██╔══██╗{/cyan-fg}
|
|
34
|
-
{cyan-fg}██║ ██║ ███████║██╔████╔██║███████║ ██║ ██████╔╝██████╔╝{/cyan-fg}
|
|
35
|
-
{cyan-fg}██║ ██║ ██╔══██║██║╚██╔╝██║██╔══██║ ██║ ██╔═══╝ ██╔═══╝{/cyan-fg}
|
|
36
|
-
{cyan-fg}███████╗███████╗██║ ██║██║ ╚═╝ ██║██║ ██║ ╚██████╗██║ ██║{/cyan-fg}
|
|
37
|
-
{cyan-fg}╚══════╝╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝╚═╝ ╚═╝{/cyan-fg}
|
|
38
|
-
`.trim();
|
|
39
|
-
|
|
40
|
-
export interface SplashCallbacks {
|
|
41
|
-
onLoadConfigs: () => Promise<void>;
|
|
42
|
-
onCheckServices: () => Promise<void>;
|
|
43
|
-
onInitMetrics: () => Promise<void>;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
export interface SplashScreenControls {
|
|
47
|
-
updateStatus: (line: number, text: string, done?: boolean) => void;
|
|
48
|
-
complete: () => void;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Creates and displays the splash screen with loading status
|
|
53
|
-
* Returns a cleanup function to remove the splash when the main UI is ready
|
|
54
|
-
*/
|
|
55
|
-
export async function createSplashScreen(
|
|
56
|
-
screen: blessed.Widgets.Screen,
|
|
57
|
-
callbacks: SplashCallbacks,
|
|
58
|
-
): Promise<() => void> {
|
|
59
|
-
const version = getVersion();
|
|
60
|
-
|
|
61
|
-
// Create top-left aligned container
|
|
62
|
-
const container = blessed.box({
|
|
63
|
-
top: 0,
|
|
64
|
-
left: 0,
|
|
65
|
-
width: 75,
|
|
66
|
-
height: 19,
|
|
67
|
-
tags: true,
|
|
68
|
-
});
|
|
69
|
-
screen.append(container);
|
|
70
|
-
|
|
71
|
-
// Logo box
|
|
72
|
-
const logoBox = blessed.box({
|
|
73
|
-
parent: container,
|
|
74
|
-
top: 0,
|
|
75
|
-
left: 0,
|
|
76
|
-
width: 73,
|
|
77
|
-
height: 7,
|
|
78
|
-
tags: true,
|
|
79
|
-
content: LOGO,
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
// Info box with border
|
|
83
|
-
const infoBox = blessed.box({
|
|
84
|
-
parent: container,
|
|
85
|
-
top: 7,
|
|
86
|
-
left: 0,
|
|
87
|
-
width: 73,
|
|
88
|
-
height: 10,
|
|
89
|
-
tags: true,
|
|
90
|
-
border: {
|
|
91
|
-
type: "line",
|
|
92
|
-
},
|
|
93
|
-
style: {
|
|
94
|
-
border: {
|
|
95
|
-
fg: "blue",
|
|
96
|
-
},
|
|
97
|
-
},
|
|
98
|
-
});
|
|
99
|
-
|
|
100
|
-
// Status lines (inside the info box)
|
|
101
|
-
const statusLines = [
|
|
102
|
-
"{bold}Local LLM Server Manager{/bold} v" +
|
|
103
|
-
version,
|
|
104
|
-
"",
|
|
105
|
-
"{gray-fg}>{/gray-fg} Loading server configurations...",
|
|
106
|
-
"{gray-fg}>{/gray-fg} Checking launchctl services...",
|
|
107
|
-
"{gray-fg}>{/gray-fg} Initializing metrics collectors...",
|
|
108
|
-
"{gray-fg}>{/gray-fg} Loading UI...",
|
|
109
|
-
];
|
|
110
|
-
|
|
111
|
-
function updateDisplay() {
|
|
112
|
-
infoBox.setContent(statusLines.join("\n"));
|
|
113
|
-
screen.render();
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
function updateStatus(line: number, text: string, done: boolean = false) {
|
|
117
|
-
const prefix = done ? "{green-fg}✓{/green-fg}" : "{yellow-fg}>{/yellow-fg}";
|
|
118
|
-
statusLines[line + 2] = `${prefix} ${text}`;
|
|
119
|
-
updateDisplay();
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
// Initial render
|
|
123
|
-
updateDisplay();
|
|
124
|
-
|
|
125
|
-
// Run loading sequence
|
|
126
|
-
// Step 1: Load configs
|
|
127
|
-
updateStatus(0, "Loading server configurations...", false);
|
|
128
|
-
await callbacks.onLoadConfigs();
|
|
129
|
-
updateStatus(0, "Server configurations loaded", true);
|
|
130
|
-
|
|
131
|
-
// Step 2: Check services
|
|
132
|
-
updateStatus(1, "Checking launchctl services...", false);
|
|
133
|
-
await callbacks.onCheckServices();
|
|
134
|
-
updateStatus(1, "Launchctl services checked", true);
|
|
135
|
-
|
|
136
|
-
// Step 3: Init metrics
|
|
137
|
-
updateStatus(2, "Initializing metrics collectors...", false);
|
|
138
|
-
await callbacks.onInitMetrics();
|
|
139
|
-
updateStatus(2, "Metrics collectors ready", true);
|
|
140
|
-
|
|
141
|
-
// Step 4: Loading UI (stays active until cleanup is called)
|
|
142
|
-
updateStatus(3, "Loading UI...", false);
|
|
143
|
-
|
|
144
|
-
// Return cleanup function - caller decides when to remove splash
|
|
145
|
-
return () => {
|
|
146
|
-
screen.remove(container);
|
|
147
|
-
screen.render();
|
|
148
|
-
};
|
|
149
|
-
}
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
export type AdminStatus = 'running' | 'stopped' | 'crashed';
|
|
2
|
-
|
|
3
|
-
export interface AdminConfig {
|
|
4
|
-
id: 'admin';
|
|
5
|
-
port: number;
|
|
6
|
-
host: string;
|
|
7
|
-
apiKey: string; // Auto-generated on first start
|
|
8
|
-
|
|
9
|
-
// State tracking
|
|
10
|
-
status: AdminStatus;
|
|
11
|
-
pid?: number;
|
|
12
|
-
createdAt: string;
|
|
13
|
-
lastStarted?: string;
|
|
14
|
-
lastStopped?: string;
|
|
15
|
-
|
|
16
|
-
// launchctl metadata
|
|
17
|
-
plistPath: string;
|
|
18
|
-
label: 'com.llama.admin';
|
|
19
|
-
stdoutPath: string;
|
|
20
|
-
stderrPath: string;
|
|
21
|
-
|
|
22
|
-
// Admin settings
|
|
23
|
-
requestTimeout: number; // ms for API requests (default: 30000)
|
|
24
|
-
verbose: boolean; // Enable verbose logging to file (default: false)
|
|
25
|
-
}
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
export interface GlobalConfig {
|
|
2
|
-
version: string;
|
|
3
|
-
defaultPort: number;
|
|
4
|
-
modelsDirectory: string; // ~/models expanded to full path
|
|
5
|
-
llamaServerBinary: string; // /opt/homebrew/bin/llama-server
|
|
6
|
-
defaults: {
|
|
7
|
-
threads: number;
|
|
8
|
-
ctxSize: number;
|
|
9
|
-
gpuLayers: number;
|
|
10
|
-
};
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Default global configuration
|
|
15
|
-
*/
|
|
16
|
-
export const DEFAULT_GLOBAL_CONFIG: GlobalConfig = {
|
|
17
|
-
version: '1.0.0',
|
|
18
|
-
defaultPort: 9000,
|
|
19
|
-
modelsDirectory: '', // Set at runtime
|
|
20
|
-
llamaServerBinary: '/opt/homebrew/bin/llama-server',
|
|
21
|
-
defaults: {
|
|
22
|
-
threads: 8,
|
|
23
|
-
ctxSize: 8192,
|
|
24
|
-
gpuLayers: 60,
|
|
25
|
-
},
|
|
26
|
-
};
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
// Historical monitoring data types
|
|
2
|
-
|
|
3
|
-
export interface HistorySnapshot {
|
|
4
|
-
timestamp: number; // Unix timestamp in milliseconds
|
|
5
|
-
server: {
|
|
6
|
-
healthy: boolean;
|
|
7
|
-
uptime?: string;
|
|
8
|
-
activeSlots: number;
|
|
9
|
-
idleSlots: number;
|
|
10
|
-
totalSlots: number;
|
|
11
|
-
avgPromptSpeed?: number; // Tokens per second
|
|
12
|
-
avgGenerateSpeed?: number; // Tokens per second
|
|
13
|
-
processMemory?: number; // Bytes (RSS)
|
|
14
|
-
processCpuUsage?: number; // Percentage (0-100+) from ps
|
|
15
|
-
};
|
|
16
|
-
system?: {
|
|
17
|
-
gpuUsage?: number; // Percentage (0-100)
|
|
18
|
-
cpuUsage?: number; // Percentage (0-100)
|
|
19
|
-
aneUsage?: number; // Percentage (0-100)
|
|
20
|
-
temperature?: number; // Celsius
|
|
21
|
-
memoryUsed: number; // Bytes
|
|
22
|
-
memoryTotal: number; // Bytes
|
|
23
|
-
};
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
export interface HistoryData {
|
|
27
|
-
serverId: string;
|
|
28
|
-
snapshots: HistorySnapshot[];
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
export type TimeWindow = '1h' | '6h' | '24h';
|
|
32
|
-
|
|
33
|
-
export const TIME_WINDOW_HOURS: Record<TimeWindow, number> = {
|
|
34
|
-
'1h': 1,
|
|
35
|
-
'6h': 6,
|
|
36
|
-
'24h': 24,
|
|
37
|
-
};
|
|
38
|
-
|
|
39
|
-
export const TIME_WINDOWS: TimeWindow[] = ['1h', '6h', '24h'];
|
package/src/types/model-info.ts
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
export interface ModelInfo {
|
|
2
|
-
filename: string; // Original filename
|
|
3
|
-
path: string; // Full absolute path
|
|
4
|
-
size: number; // File size in bytes
|
|
5
|
-
sizeFormatted: string; // Human-readable size (e.g., "1.9 GB")
|
|
6
|
-
modified: Date; // Last modified date
|
|
7
|
-
exists: boolean; // File exists and is readable
|
|
8
|
-
}
|
|
@@ -1,162 +0,0 @@
|
|
|
1
|
-
import { ServerConfig } from './server-config.js';
|
|
2
|
-
|
|
3
|
-
// llama.cpp API response types
|
|
4
|
-
|
|
5
|
-
export interface HealthResponse {
|
|
6
|
-
status: string;
|
|
7
|
-
error?: string;
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
export interface PropsResponse {
|
|
11
|
-
default_generation_settings: {
|
|
12
|
-
n_ctx: number;
|
|
13
|
-
n_predict: number;
|
|
14
|
-
model: string;
|
|
15
|
-
seed: number;
|
|
16
|
-
temperature: number;
|
|
17
|
-
top_k: number;
|
|
18
|
-
top_p: number;
|
|
19
|
-
min_p: number;
|
|
20
|
-
n_keep: number;
|
|
21
|
-
stream: boolean;
|
|
22
|
-
};
|
|
23
|
-
total_slots: number;
|
|
24
|
-
model_loaded: boolean;
|
|
25
|
-
model_path: string;
|
|
26
|
-
model_alias?: string;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
export interface SlotInfo {
|
|
30
|
-
id: number;
|
|
31
|
-
state: 'idle' | 'processing';
|
|
32
|
-
task_id?: number;
|
|
33
|
-
prompt?: string;
|
|
34
|
-
n_prompt_tokens?: number;
|
|
35
|
-
n_decoded?: number;
|
|
36
|
-
n_ctx: number;
|
|
37
|
-
truncated?: boolean;
|
|
38
|
-
stopped_eos?: boolean;
|
|
39
|
-
stopped_word?: boolean;
|
|
40
|
-
stopped_limit?: boolean;
|
|
41
|
-
stopping_word?: string;
|
|
42
|
-
tokens_predicted?: number;
|
|
43
|
-
tokens_evaluated?: number;
|
|
44
|
-
generation_settings?: {
|
|
45
|
-
n_ctx: number;
|
|
46
|
-
n_predict: number;
|
|
47
|
-
seed: number;
|
|
48
|
-
temperature: number;
|
|
49
|
-
top_k: number;
|
|
50
|
-
top_p: number;
|
|
51
|
-
};
|
|
52
|
-
prompt_tokens_processed?: number;
|
|
53
|
-
t_prompt_processing?: number; // Time in ms
|
|
54
|
-
t_token_generation?: number; // Time in ms
|
|
55
|
-
timings?: {
|
|
56
|
-
prompt_n: number;
|
|
57
|
-
prompt_ms: number;
|
|
58
|
-
prompt_per_token_ms: number;
|
|
59
|
-
prompt_per_second: number;
|
|
60
|
-
predicted_n: number;
|
|
61
|
-
predicted_ms: number;
|
|
62
|
-
predicted_per_token_ms: number;
|
|
63
|
-
predicted_per_second: number;
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
export interface SlotsResponse {
|
|
68
|
-
slots: SlotInfo[];
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
// System metrics types
|
|
72
|
-
|
|
73
|
-
export interface SystemMetrics {
|
|
74
|
-
// GPU/CPU/ANE (from macmon if available)
|
|
75
|
-
gpuUsage?: number; // Percentage (0-100)
|
|
76
|
-
cpuUsage?: number; // Percentage (0-100)
|
|
77
|
-
cpuCores?: number; // Number of cores
|
|
78
|
-
aneUsage?: number; // Apple Neural Engine percentage (0-100)
|
|
79
|
-
temperature?: number; // GPU temperature in Celsius
|
|
80
|
-
|
|
81
|
-
// Memory (from vm_stat or macmon)
|
|
82
|
-
memoryUsed: number; // Bytes
|
|
83
|
-
memoryTotal: number; // Bytes
|
|
84
|
-
swapUsed?: number; // Bytes
|
|
85
|
-
processMemory?: number; // Bytes (specific to llama-server process)
|
|
86
|
-
|
|
87
|
-
// Metadata
|
|
88
|
-
timestamp: number;
|
|
89
|
-
source: 'macmon' | 'vm_stat' | 'none';
|
|
90
|
-
warnings?: string[]; // e.g., "macmon not available, showing memory only"
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
// Aggregated metrics for TUI display
|
|
94
|
-
|
|
95
|
-
export interface ServerMetrics {
|
|
96
|
-
// Server identification
|
|
97
|
-
server: ServerConfig;
|
|
98
|
-
|
|
99
|
-
// Health status
|
|
100
|
-
healthy: boolean;
|
|
101
|
-
uptime?: string; // Human-readable (e.g., "2h 34m 12s")
|
|
102
|
-
error?: string;
|
|
103
|
-
|
|
104
|
-
// Model information
|
|
105
|
-
modelLoaded: boolean;
|
|
106
|
-
modelName: string;
|
|
107
|
-
contextSize: number;
|
|
108
|
-
totalSlots: number;
|
|
109
|
-
|
|
110
|
-
// Request metrics
|
|
111
|
-
activeSlots: number;
|
|
112
|
-
idleSlots: number;
|
|
113
|
-
slots: SlotInfo[];
|
|
114
|
-
|
|
115
|
-
// Performance metrics (derived from slots)
|
|
116
|
-
avgPromptSpeed?: number; // Tokens per second
|
|
117
|
-
avgGenerateSpeed?: number; // Tokens per second
|
|
118
|
-
requestsPerMinute?: number; // Estimated from slot activity
|
|
119
|
-
avgLatency?: number; // Milliseconds
|
|
120
|
-
|
|
121
|
-
// Cache metrics (if available from /metrics endpoint)
|
|
122
|
-
cacheHitRate?: number; // Percentage
|
|
123
|
-
|
|
124
|
-
// Process metrics
|
|
125
|
-
processMemory?: number; // Bytes (actual RSS from top command)
|
|
126
|
-
processCpuUsage?: number; // Percentage (0-100+) from ps command
|
|
127
|
-
|
|
128
|
-
// Timestamp
|
|
129
|
-
timestamp: number;
|
|
130
|
-
stale: boolean; // True if data is from last successful fetch
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
export interface MonitorData {
|
|
134
|
-
server: ServerMetrics;
|
|
135
|
-
system?: SystemMetrics;
|
|
136
|
-
lastUpdated: Date;
|
|
137
|
-
updateInterval: number; // Milliseconds
|
|
138
|
-
consecutiveFailures: number;
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
// Error and loading states
|
|
142
|
-
|
|
143
|
-
export interface ErrorState {
|
|
144
|
-
error: string;
|
|
145
|
-
canRetry: boolean;
|
|
146
|
-
suggestions?: string[];
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
export interface LoadingState {
|
|
150
|
-
message: string;
|
|
151
|
-
progress?: number; // 0-100 if determinate
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
// Collection result (for graceful degradation)
|
|
155
|
-
|
|
156
|
-
export interface CollectionResult<T> {
|
|
157
|
-
success: boolean;
|
|
158
|
-
data: T | null;
|
|
159
|
-
error?: string;
|
|
160
|
-
warnings?: string[];
|
|
161
|
-
stale?: boolean;
|
|
162
|
-
}
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
export type RouterStatus = 'running' | 'stopped' | 'crashed';
|
|
2
|
-
|
|
3
|
-
export interface RouterConfig {
|
|
4
|
-
id: 'router';
|
|
5
|
-
port: number;
|
|
6
|
-
host: string;
|
|
7
|
-
|
|
8
|
-
// State tracking
|
|
9
|
-
status: RouterStatus;
|
|
10
|
-
pid?: number;
|
|
11
|
-
createdAt: string;
|
|
12
|
-
lastStarted?: string;
|
|
13
|
-
lastStopped?: string;
|
|
14
|
-
|
|
15
|
-
// launchctl metadata
|
|
16
|
-
plistPath: string;
|
|
17
|
-
label: 'com.llama.router';
|
|
18
|
-
stdoutPath: string;
|
|
19
|
-
stderrPath: string;
|
|
20
|
-
|
|
21
|
-
// Router settings
|
|
22
|
-
healthCheckInterval: number; // ms between health checks (default: 5000)
|
|
23
|
-
requestTimeout: number; // ms for backend requests (default: 120000)
|
|
24
|
-
verbose: boolean; // Enable verbose logging to file (default: false)
|
|
25
|
-
}
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
export type ServerStatus = 'running' | 'stopped' | 'crashed';
|
|
2
|
-
|
|
3
|
-
export interface ServerConfig {
|
|
4
|
-
id: string; // Sanitized model name (unique identifier)
|
|
5
|
-
modelPath: string; // Full path to GGUF file
|
|
6
|
-
modelName: string; // Display name (original filename)
|
|
7
|
-
port: number; // Server port
|
|
8
|
-
host: string; // Bind address (default: 127.0.0.1)
|
|
9
|
-
|
|
10
|
-
// llama-server configuration
|
|
11
|
-
threads: number;
|
|
12
|
-
ctxSize: number;
|
|
13
|
-
gpuLayers: number;
|
|
14
|
-
embeddings: boolean; // Always true
|
|
15
|
-
jinja: boolean; // Always true
|
|
16
|
-
verbose: boolean; // Enable verbose HTTP logging (--log-verbose flag)
|
|
17
|
-
customFlags?: string[]; // Additional llama-server flags (e.g., ["--pooling", "mean"])
|
|
18
|
-
|
|
19
|
-
// State tracking
|
|
20
|
-
status: ServerStatus;
|
|
21
|
-
pid?: number;
|
|
22
|
-
createdAt: string; // ISO timestamp
|
|
23
|
-
lastStarted?: string; // ISO timestamp
|
|
24
|
-
lastStopped?: string; // ISO timestamp
|
|
25
|
-
metalMemoryMB?: number; // Metal (GPU) memory allocated in MB (parsed from logs)
|
|
26
|
-
|
|
27
|
-
// launchctl metadata
|
|
28
|
-
plistPath: string; // Full path to plist file
|
|
29
|
-
label: string; // launchctl service label (com.llama.<id>)
|
|
30
|
-
|
|
31
|
-
// Logging
|
|
32
|
-
stdoutPath: string; // Path to stdout log
|
|
33
|
-
stderrPath: string; // Path to stderr log
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
/**
|
|
37
|
-
* Sanitize a model filename to create a valid server ID
|
|
38
|
-
* Example: "llama-3.2-3b-instruct-q4_k_m.gguf" → "llama-3-2-3b-instruct-q4-k-m"
|
|
39
|
-
*/
|
|
40
|
-
export function sanitizeModelName(modelName: string): string {
|
|
41
|
-
return modelName
|
|
42
|
-
.replace(/\.gguf$/i, '') // Remove .gguf extension
|
|
43
|
-
.replace(/[^a-zA-Z0-9]+/g, '-') // Replace non-alphanumeric with hyphens
|
|
44
|
-
.toLowerCase() // Lowercase
|
|
45
|
-
.replace(/^-+|-+$/g, ''); // Trim hyphens from ends
|
|
46
|
-
}
|
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Downsampling utilities for time-series chart data
|
|
3
|
-
* Uses time-aligned buckets to ensure stable charts as new data arrives
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
export interface TimeSeriesPoint {
|
|
7
|
-
timestamp: number;
|
|
8
|
-
value: number;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
type AggregationMethod = 'max' | 'mean';
|
|
12
|
-
|
|
13
|
-
const ONE_HOUR_MS = 60 * 60 * 1000;
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* Core bucketing logic shared by all downsampling functions.
|
|
17
|
-
* Uses ABSOLUTE bucket boundaries that never shift, ensuring chart stability.
|
|
18
|
-
*/
|
|
19
|
-
function createTimeBuckets(
|
|
20
|
-
data: TimeSeriesPoint[],
|
|
21
|
-
targetPoints: number,
|
|
22
|
-
startTime: number,
|
|
23
|
-
endTime: number
|
|
24
|
-
): number[][] {
|
|
25
|
-
const timeRange = endTime - startTime;
|
|
26
|
-
const bucketDuration = Math.ceil(timeRange / targetPoints);
|
|
27
|
-
const alignedStart = Math.floor(startTime / bucketDuration) * bucketDuration;
|
|
28
|
-
const buckets: number[][] = Array.from({ length: targetPoints }, () => []);
|
|
29
|
-
|
|
30
|
-
for (const point of data) {
|
|
31
|
-
if (point.timestamp < startTime || point.timestamp > endTime) continue;
|
|
32
|
-
const bucketIndex = Math.floor((point.timestamp - alignedStart) / bucketDuration);
|
|
33
|
-
if (bucketIndex >= 0 && bucketIndex < targetPoints) {
|
|
34
|
-
buckets[bucketIndex].push(point.value);
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
return buckets;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
/**
|
|
42
|
-
* Aggregate bucket values using the specified method.
|
|
43
|
-
*/
|
|
44
|
-
function aggregateBuckets(buckets: number[][], method: AggregationMethod): number[] {
|
|
45
|
-
return buckets.map(bucket => {
|
|
46
|
-
const validValues = method === 'max'
|
|
47
|
-
? bucket.filter(v => !isNaN(v) && v > 0)
|
|
48
|
-
: bucket.filter(v => !isNaN(v) && isFinite(v));
|
|
49
|
-
|
|
50
|
-
if (validValues.length === 0) return 0;
|
|
51
|
-
|
|
52
|
-
if (method === 'max') {
|
|
53
|
-
return Math.max(...validValues);
|
|
54
|
-
}
|
|
55
|
-
return validValues.reduce((sum, v) => sum + v, 0) / validValues.length;
|
|
56
|
-
});
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* Downsample using time-aligned bucket max - preserves peaks.
|
|
61
|
-
* Best for: GPU/CPU usage, token speeds where peaks matter.
|
|
62
|
-
*/
|
|
63
|
-
export function downsampleMaxTime(data: TimeSeriesPoint[], targetPoints: number): number[] {
|
|
64
|
-
if (data.length === 0) return [];
|
|
65
|
-
if (data.length <= targetPoints) return data.map(d => d.value);
|
|
66
|
-
|
|
67
|
-
const buckets = createTimeBuckets(
|
|
68
|
-
data,
|
|
69
|
-
targetPoints,
|
|
70
|
-
data[0].timestamp,
|
|
71
|
-
data[data.length - 1].timestamp
|
|
72
|
-
);
|
|
73
|
-
return aggregateBuckets(buckets, 'max');
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
/**
|
|
77
|
-
* Downsample using time-aligned bucket mean - preserves average trends.
|
|
78
|
-
* Best for: Memory usage where average is meaningful.
|
|
79
|
-
*/
|
|
80
|
-
export function downsampleMeanTime(data: TimeSeriesPoint[], targetPoints: number): number[] {
|
|
81
|
-
if (data.length === 0) return [];
|
|
82
|
-
if (data.length <= targetPoints) return data.map(d => d.value);
|
|
83
|
-
|
|
84
|
-
const buckets = createTimeBuckets(
|
|
85
|
-
data,
|
|
86
|
-
targetPoints,
|
|
87
|
-
data[0].timestamp,
|
|
88
|
-
data[data.length - 1].timestamp
|
|
89
|
-
);
|
|
90
|
-
return aggregateBuckets(buckets, 'mean');
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
/**
|
|
94
|
-
* Calculate downsampling ratio as a display string.
|
|
95
|
-
*/
|
|
96
|
-
export function getDownsampleRatio(originalCount: number, targetCount: number): string {
|
|
97
|
-
if (originalCount <= targetCount) return '1:1';
|
|
98
|
-
const ratio = Math.round(originalCount / targetCount);
|
|
99
|
-
return `${ratio}:1`;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
/**
|
|
103
|
-
* Downsample with full hour coverage using max aggregation.
|
|
104
|
-
* Creates buckets for the entire hour (60 minutes), filling gaps with 0.
|
|
105
|
-
* Best for: Hour view where we want to show the full time range.
|
|
106
|
-
*/
|
|
107
|
-
export function downsampleMaxTimeWithFullHour(data: TimeSeriesPoint[], targetPoints: number): number[] {
|
|
108
|
-
if (data.length === 0) return Array(targetPoints).fill(0);
|
|
109
|
-
|
|
110
|
-
const now = Date.now();
|
|
111
|
-
const oneHourAgo = now - ONE_HOUR_MS;
|
|
112
|
-
const buckets = createTimeBuckets(data, targetPoints, oneHourAgo, now);
|
|
113
|
-
return aggregateBuckets(buckets, 'max');
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
/**
|
|
117
|
-
* Downsample with full hour coverage using mean aggregation.
|
|
118
|
-
* Creates buckets for the entire hour (60 minutes), filling gaps with 0.
|
|
119
|
-
* Best for: Hour view where we want to show the full time range.
|
|
120
|
-
*/
|
|
121
|
-
export function downsampleMeanTimeWithFullHour(data: TimeSeriesPoint[], targetPoints: number): number[] {
|
|
122
|
-
if (data.length === 0) return Array(targetPoints).fill(0);
|
|
123
|
-
|
|
124
|
-
const now = Date.now();
|
|
125
|
-
const oneHourAgo = now - ONE_HOUR_MS;
|
|
126
|
-
const buckets = createTimeBuckets(data, targetPoints, oneHourAgo, now);
|
|
127
|
-
return aggregateBuckets(buckets, 'mean');
|
|
128
|
-
}
|