groove-dev 0.25.20 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/node_modules/@groove-dev/daemon/src/agent-loop.js +479 -0
  2. package/node_modules/@groove-dev/daemon/src/api.js +104 -5
  3. package/node_modules/@groove-dev/daemon/src/index.js +6 -1
  4. package/node_modules/@groove-dev/daemon/src/llama-server.js +268 -0
  5. package/node_modules/@groove-dev/daemon/src/model-manager.js +411 -0
  6. package/node_modules/@groove-dev/daemon/src/process.js +179 -11
  7. package/node_modules/@groove-dev/daemon/src/providers/codex.js +51 -1
  8. package/node_modules/@groove-dev/daemon/src/providers/gemini.js +3 -2
  9. package/node_modules/@groove-dev/daemon/src/providers/index.js +4 -0
  10. package/node_modules/@groove-dev/daemon/src/providers/local.js +183 -0
  11. package/node_modules/@groove-dev/daemon/src/registry.js +1 -1
  12. package/node_modules/@groove-dev/daemon/src/tool-executor.js +367 -0
  13. package/node_modules/@groove-dev/gui/dist/assets/index-BQnZrh4f.css +1 -0
  14. package/node_modules/@groove-dev/gui/dist/assets/index-BqL4GcgZ.js +633 -0
  15. package/node_modules/@groove-dev/gui/dist/index.html +2 -2
  16. package/node_modules/@groove-dev/gui/src/app.jsx +2 -0
  17. package/node_modules/@groove-dev/gui/src/components/agents/agent-config.jsx +7 -2
  18. package/node_modules/@groove-dev/gui/src/components/layout/activity-bar.jsx +2 -1
  19. package/node_modules/@groove-dev/gui/src/stores/groove.js +6 -2
  20. package/node_modules/@groove-dev/gui/src/views/models.jsx +380 -0
  21. package/package.json +2 -2
  22. package/packages/daemon/src/agent-loop.js +479 -0
  23. package/packages/daemon/src/api.js +104 -5
  24. package/packages/daemon/src/index.js +6 -1
  25. package/packages/daemon/src/llama-server.js +268 -0
  26. package/packages/daemon/src/model-manager.js +411 -0
  27. package/packages/daemon/src/process.js +179 -11
  28. package/packages/daemon/src/providers/codex.js +51 -1
  29. package/packages/daemon/src/providers/gemini.js +3 -2
  30. package/packages/daemon/src/providers/index.js +4 -0
  31. package/packages/daemon/src/providers/local.js +183 -0
  32. package/packages/daemon/src/registry.js +1 -1
  33. package/packages/daemon/src/tool-executor.js +367 -0
  34. package/packages/gui/dist/assets/index-BQnZrh4f.css +1 -0
  35. package/packages/gui/dist/assets/index-BqL4GcgZ.js +633 -0
  36. package/packages/gui/dist/index.html +2 -2
  37. package/packages/gui/src/app.jsx +2 -0
  38. package/packages/gui/src/components/agents/agent-config.jsx +7 -2
  39. package/packages/gui/src/components/layout/activity-bar.jsx +2 -1
  40. package/packages/gui/src/stores/groove.js +6 -2
  41. package/packages/gui/src/views/models.jsx +380 -0
  42. package/node_modules/@groove-dev/gui/dist/assets/index-GYcMwmjs.css +0 -1
  43. package/node_modules/@groove-dev/gui/dist/assets/index-H_e3KvZp.js +0 -623
  44. package/packages/gui/dist/assets/index-GYcMwmjs.css +0 -1
  45. package/packages/gui/dist/assets/index-H_e3KvZp.js +0 -623
@@ -0,0 +1,268 @@
1
+ // GROOVE — llama-server Process Manager
2
+ // FSL-1.1-Apache-2.0 — see LICENSE
3
+ //
4
+ // Manages llama-server (llama.cpp) inference server instances.
5
+ // Each model gets its own server on a unique port.
6
+ // Auto-starts when an agent needs a GGUF model, auto-stops when idle.
7
+
8
+ import { spawn, execSync } from 'child_process';
9
+
10
+ const BASE_PORT = 8081;
11
+ const MAX_SERVERS = 5;
12
+ const HEALTH_TIMEOUT = 30000; // 30s for model loading
13
+ const HEALTH_POLL_INTERVAL = 500; // Check every 500ms
14
+ const IDLE_TIMEOUT = 300000; // 5 minutes before auto-stop
15
+
16
+ export class LlamaServerManager {
17
+ constructor(daemon) {
18
+ this.daemon = daemon;
19
+ this.servers = new Map(); // modelPath -> { proc, port, users, startedAt, lastUsed }
20
+ this.nextPort = BASE_PORT;
21
+ }
22
+
23
+ static isInstalled() {
24
+ try {
25
+ execSync('which llama-server', { stdio: 'ignore' });
26
+ return true;
27
+ } catch {
28
+ return false;
29
+ }
30
+ }
31
+
32
+ // --- Server Lifecycle ---
33
+
34
+ /**
35
+ * Ensure a llama-server is running for the given model.
36
+ * Returns the OpenAI-compatible API base URL.
37
+ * Starts the server if not already running.
38
+ */
39
+ async ensureServer(modelPath, options = {}) {
40
+ // Reuse existing server
41
+ if (this.servers.has(modelPath)) {
42
+ const server = this.servers.get(modelPath);
43
+ server.users++;
44
+ server.lastUsed = Date.now();
45
+ return `http://127.0.0.1:${server.port}/v1`;
46
+ }
47
+
48
+ // Check capacity
49
+ if (this.servers.size >= MAX_SERVERS) {
50
+ // Stop least recently used server
51
+ await this._evictLRU();
52
+ }
53
+
54
+ // Allocate port
55
+ const port = this._allocatePort();
56
+
57
+ // Start llama-server
58
+ const ctxSize = options.contextWindow || 32768;
59
+ const nGpuLayers = options.gpuLayers ?? -1; // -1 = auto (offload all to GPU)
60
+ const parallel = options.parallel || 2;
61
+
62
+ const args = [
63
+ '--model', modelPath,
64
+ '--port', String(port),
65
+ '--host', '127.0.0.1',
66
+ '--ctx-size', String(ctxSize),
67
+ '--n-gpu-layers', String(nGpuLayers),
68
+ '--parallel', String(parallel),
69
+ '--log-disable', // Reduce noise
70
+ ];
71
+
72
+ // Flash attention for better memory efficiency (if supported)
73
+ if (options.flashAttention !== false) {
74
+ args.push('--flash-attn');
75
+ }
76
+
77
+ const proc = spawn('llama-server', args, {
78
+ stdio: ['ignore', 'pipe', 'pipe'],
79
+ detached: false,
80
+ });
81
+
82
+ if (!proc.pid) {
83
+ throw new Error('Failed to start llama-server — check installation');
84
+ }
85
+
86
+ const server = {
87
+ proc,
88
+ port,
89
+ modelPath,
90
+ users: 1,
91
+ startedAt: Date.now(),
92
+ lastUsed: Date.now(),
93
+ ready: false,
94
+ };
95
+
96
+ this.servers.set(modelPath, server);
97
+
98
+ // Capture stderr for debugging
99
+ const stderrBuf = [];
100
+ proc.stderr.on('data', (chunk) => {
101
+ stderrBuf.push(chunk.toString());
102
+ if (stderrBuf.join('').length > 4096) stderrBuf.shift();
103
+ });
104
+
105
+ proc.on('exit', (code, signal) => {
106
+ this.servers.delete(modelPath);
107
+ this.daemon?.broadcast({
108
+ type: 'llama:server:stopped',
109
+ data: { modelPath, port, code, signal },
110
+ });
111
+ });
112
+
113
+ // Wait for server to be ready
114
+ try {
115
+ await this._waitForHealth(port);
116
+ server.ready = true;
117
+
118
+ this.daemon?.broadcast({
119
+ type: 'llama:server:ready',
120
+ data: { modelPath, port },
121
+ });
122
+
123
+ return `http://127.0.0.1:${port}/v1`;
124
+ } catch (err) {
125
+ // Server failed to start
126
+ await this.stopServer(modelPath);
127
+ const stderr = stderrBuf.join('').slice(-500);
128
+ throw new Error(`llama-server failed to start: ${stderr || err.message}`);
129
+ }
130
+ }
131
+
132
+ /**
133
+ * Release a server (decrement user count).
134
+ * Server auto-stops after IDLE_TIMEOUT if no users.
135
+ */
136
+ releaseServer(modelPath) {
137
+ const server = this.servers.get(modelPath);
138
+ if (!server) return;
139
+
140
+ server.users = Math.max(0, server.users - 1);
141
+ server.lastUsed = Date.now();
142
+
143
+ // Schedule auto-stop check
144
+ if (server.users === 0) {
145
+ setTimeout(() => {
146
+ const s = this.servers.get(modelPath);
147
+ if (s && s.users === 0 && Date.now() - s.lastUsed >= IDLE_TIMEOUT) {
148
+ this.stopServer(modelPath);
149
+ }
150
+ }, IDLE_TIMEOUT + 1000);
151
+ }
152
+ }
153
+
154
+ async stopServer(modelPath) {
155
+ const server = this.servers.get(modelPath);
156
+ if (!server) return false;
157
+
158
+ return new Promise((resolve) => {
159
+ const timeout = setTimeout(() => {
160
+ try { server.proc.kill('SIGKILL'); } catch {}
161
+ }, 5000);
162
+
163
+ server.proc.on('exit', () => {
164
+ clearTimeout(timeout);
165
+ this.servers.delete(modelPath);
166
+ resolve(true);
167
+ });
168
+
169
+ try {
170
+ server.proc.kill('SIGTERM');
171
+ } catch {
172
+ clearTimeout(timeout);
173
+ this.servers.delete(modelPath);
174
+ resolve(true);
175
+ }
176
+ });
177
+ }
178
+
179
+ async stopAll() {
180
+ const paths = Array.from(this.servers.keys());
181
+ await Promise.all(paths.map((p) => this.stopServer(p)));
182
+ }
183
+
184
+ // --- Health Check ---
185
+
186
+ async _waitForHealth(port) {
187
+ const start = Date.now();
188
+ while (Date.now() - start < HEALTH_TIMEOUT) {
189
+ try {
190
+ const res = await fetch(`http://127.0.0.1:${port}/health`, {
191
+ signal: AbortSignal.timeout(2000),
192
+ });
193
+ if (res.ok) {
194
+ const data = await res.json().catch(() => ({}));
195
+ if (data.status === 'ok' || data.status === 'no slot available') {
196
+ return true;
197
+ }
198
+ }
199
+ } catch {
200
+ // Server still loading
201
+ }
202
+ await new Promise((r) => setTimeout(r, HEALTH_POLL_INTERVAL));
203
+ }
204
+ throw new Error(`llama-server health check timed out after ${HEALTH_TIMEOUT / 1000}s`);
205
+ }
206
+
207
+ async healthCheck(modelPath) {
208
+ const server = this.servers.get(modelPath);
209
+ if (!server) return { running: false };
210
+
211
+ try {
212
+ const res = await fetch(`http://127.0.0.1:${server.port}/health`, {
213
+ signal: AbortSignal.timeout(3000),
214
+ });
215
+ const data = await res.json().catch(() => ({}));
216
+ return { running: true, ready: server.ready, port: server.port, status: data.status };
217
+ } catch {
218
+ return { running: true, ready: false, port: server.port, status: 'unreachable' };
219
+ }
220
+ }
221
+
222
+ // --- Port Management ---
223
+
224
+ _allocatePort() {
225
+ const usedPorts = new Set(Array.from(this.servers.values()).map((s) => s.port));
226
+ let port = BASE_PORT;
227
+ while (usedPorts.has(port) && port < BASE_PORT + 100) {
228
+ port++;
229
+ }
230
+ return port;
231
+ }
232
+
233
+ async _evictLRU() {
234
+ // Find the server with fewest users, then oldest lastUsed
235
+ let lru = null;
236
+ for (const [path, server] of this.servers) {
237
+ if (!lru || server.users < lru.users ||
238
+ (server.users === lru.users && server.lastUsed < lru.lastUsed)) {
239
+ lru = { path, ...server };
240
+ }
241
+ }
242
+ if (lru) {
243
+ await this.stopServer(lru.path);
244
+ }
245
+ }
246
+
247
+ // --- Status ---
248
+
249
+ getRunningServers() {
250
+ return Array.from(this.servers.entries()).map(([modelPath, s]) => ({
251
+ modelPath,
252
+ port: s.port,
253
+ users: s.users,
254
+ ready: s.ready,
255
+ uptime: Date.now() - s.startedAt,
256
+ lastUsed: s.lastUsed,
257
+ }));
258
+ }
259
+
260
+ getStatus() {
261
+ return {
262
+ installed: LlamaServerManager.isInstalled(),
263
+ running: this.servers.size,
264
+ maxServers: MAX_SERVERS,
265
+ servers: this.getRunningServers(),
266
+ };
267
+ }
268
+ }
@@ -0,0 +1,411 @@
1
+ // GROOVE — Model Manager (HuggingFace + Local GGUF Storage)
2
+ // FSL-1.1-Apache-2.0 — see LICENSE
3
+ //
4
+ // Manages local model storage, HuggingFace integration for searching/downloading
5
+ // GGUF models, and metadata indexing. Models live in ~/.groove/models/.
6
+
7
+ import { existsSync, mkdirSync, readFileSync, writeFileSync, unlinkSync, statSync, createWriteStream } from 'fs';
8
+ import { resolve, basename } from 'path';
9
+ import { homedir } from 'os';
10
+ import { pipeline } from 'stream/promises';
11
+ import { Readable } from 'stream';
12
+
13
+ const MODELS_DIR = resolve(homedir(), '.groove', 'models');
14
+ const INDEX_PATH = resolve(MODELS_DIR, 'models.json');
15
+
16
+ // Known context windows for popular model families
17
+ const CONTEXT_WINDOWS = {
18
+ 'qwen2.5-coder': 32768,
19
+ 'qwen3': 32768,
20
+ 'deepseek': 65536,
21
+ 'llama': 131072,
22
+ 'mistral': 32768,
23
+ 'codestral': 32768,
24
+ 'gemma': 32768,
25
+ 'phi': 128000,
26
+ 'starcoder': 8192,
27
+ };
28
+
29
+ // Approximate RAM requirements per billion parameters at different quantization levels
30
+ const RAM_PER_BILLION = {
31
+ Q2_K: 0.5, Q3_K_S: 0.55, Q3_K_M: 0.6, Q3_K_L: 0.65,
32
+ Q4_0: 0.7, Q4_K_S: 0.75, Q4_K_M: 0.8,
33
+ Q5_0: 0.85, Q5_K_S: 0.9, Q5_K_M: 0.95,
34
+ Q6_K: 1.05, Q8_0: 1.2,
35
+ F16: 2.0, F32: 4.0,
36
+ };
37
+
38
+ export class ModelManager {
39
+ constructor(daemon) {
40
+ this.daemon = daemon;
41
+ this.modelsDir = MODELS_DIR;
42
+ this.indexPath = INDEX_PATH;
43
+ this.downloads = new Map(); // filename -> { progress, controller, ... }
44
+ this.index = { models: [] };
45
+
46
+ mkdirSync(this.modelsDir, { recursive: true });
47
+ this.load();
48
+ }
49
+
50
+ // --- Index Persistence ---
51
+
52
+ load() {
53
+ if (existsSync(this.indexPath)) {
54
+ try {
55
+ this.index = JSON.parse(readFileSync(this.indexPath, 'utf8'));
56
+ if (!Array.isArray(this.index.models)) this.index.models = [];
57
+ } catch {
58
+ this.index = { models: [] };
59
+ }
60
+ }
61
+ }
62
+
63
+ save() {
64
+ writeFileSync(this.indexPath, JSON.stringify(this.index, null, 2));
65
+ }
66
+
67
+ // --- HuggingFace API ---
68
+
69
+ async search(query, { limit = 20, sort = 'downloads' } = {}) {
70
+ const params = new URLSearchParams({
71
+ search: query,
72
+ filter: 'gguf',
73
+ sort,
74
+ direction: '-1',
75
+ limit: String(limit),
76
+ });
77
+
78
+ const res = await fetch(`https://huggingface.co/api/models?${params}`, {
79
+ headers: { 'User-Agent': 'GROOVE-ModelManager/1.0' },
80
+ signal: AbortSignal.timeout(15000),
81
+ });
82
+
83
+ if (!res.ok) throw new Error(`HuggingFace API error: ${res.status}`);
84
+ const models = await res.json();
85
+
86
+ return models.map((m) => ({
87
+ id: m.modelId || m.id,
88
+ name: m.modelId?.split('/').pop() || m.id,
89
+ author: m.modelId?.split('/')[0] || '',
90
+ downloads: m.downloads || 0,
91
+ likes: m.likes || 0,
92
+ tags: m.tags || [],
93
+ lastModified: m.lastModified,
94
+ }));
95
+ }
96
+
97
+ async getModelFiles(repoId) {
98
+ const res = await fetch(`https://huggingface.co/api/models/${repoId}`, {
99
+ headers: { 'User-Agent': 'GROOVE-ModelManager/1.0' },
100
+ signal: AbortSignal.timeout(15000),
101
+ });
102
+
103
+ if (!res.ok) throw new Error(`Model not found: ${repoId}`);
104
+ const data = await res.json();
105
+
106
+ const files = (data.siblings || [])
107
+ .filter((f) => f.rfilename.endsWith('.gguf'))
108
+ .map((f) => {
109
+ const filename = f.rfilename;
110
+ const quant = parseQuantization(filename);
111
+ const params = parseParameters(filename);
112
+ return {
113
+ filename,
114
+ size: f.size || 0,
115
+ quantization: quant,
116
+ parameters: params,
117
+ estimatedRamGb: estimateRam(params, quant),
118
+ };
119
+ })
120
+ .sort((a, b) => a.size - b.size);
121
+
122
+ return {
123
+ repoId,
124
+ name: data.modelId?.split('/').pop() || repoId,
125
+ author: data.modelId?.split('/')[0] || '',
126
+ files,
127
+ };
128
+ }
129
+
130
+ // --- Download Management ---
131
+
132
+ async download(repoId, filename, onProgress) {
133
+ if (this.downloads.has(filename)) {
134
+ throw new Error(`Already downloading: ${filename}`);
135
+ }
136
+
137
+ const url = `https://huggingface.co/${repoId}/resolve/main/${filename}`;
138
+ const destPath = resolve(this.modelsDir, filename);
139
+ const tempPath = destPath + '.part';
140
+ const controller = new AbortController();
141
+
142
+ // Check for partial download (resume support)
143
+ let startByte = 0;
144
+ if (existsSync(tempPath)) {
145
+ try { startByte = statSync(tempPath).size; } catch { startByte = 0; }
146
+ }
147
+
148
+ const headers = { 'User-Agent': 'GROOVE-ModelManager/1.0' };
149
+ if (startByte > 0) {
150
+ headers.Range = `bytes=${startByte}-`;
151
+ }
152
+
153
+ const downloadState = {
154
+ filename,
155
+ repoId,
156
+ downloaded: startByte,
157
+ totalBytes: 0,
158
+ percent: 0,
159
+ speed: 0,
160
+ startedAt: Date.now(),
161
+ controller,
162
+ };
163
+ this.downloads.set(filename, downloadState);
164
+
165
+ try {
166
+ const res = await fetch(url, { headers, signal: controller.signal });
167
+
168
+ if (!res.ok && res.status !== 206) {
169
+ throw new Error(`Download failed: HTTP ${res.status}`);
170
+ }
171
+
172
+ const contentLength = Number(res.headers.get('content-length') || 0);
173
+ const totalBytes = startByte + contentLength;
174
+ downloadState.totalBytes = totalBytes;
175
+
176
+ const fileStream = createWriteStream(tempPath, {
177
+ flags: startByte > 0 ? 'a' : 'w',
178
+ });
179
+
180
+ let lastProgressTime = Date.now();
181
+ let lastProgressBytes = startByte;
182
+
183
+ // Stream the download with progress tracking
184
+ const reader = res.body.getReader();
185
+ while (true) {
186
+ const { done, value } = await reader.read();
187
+ if (done) break;
188
+
189
+ fileStream.write(Buffer.from(value));
190
+ downloadState.downloaded += value.length;
191
+ downloadState.percent = totalBytes > 0 ? downloadState.downloaded / totalBytes : 0;
192
+
193
+ // Calculate speed every 500ms
194
+ const now = Date.now();
195
+ if (now - lastProgressTime > 500) {
196
+ const elapsed = (now - lastProgressTime) / 1000;
197
+ downloadState.speed = (downloadState.downloaded - lastProgressBytes) / elapsed;
198
+ lastProgressTime = now;
199
+ lastProgressBytes = downloadState.downloaded;
200
+
201
+ if (onProgress) onProgress({ ...downloadState });
202
+
203
+ // Broadcast progress to GUI
204
+ this.daemon?.broadcast({
205
+ type: 'model:download:progress',
206
+ data: {
207
+ filename, repoId,
208
+ downloaded: downloadState.downloaded,
209
+ totalBytes, percent: downloadState.percent,
210
+ speed: downloadState.speed,
211
+ },
212
+ });
213
+ }
214
+ }
215
+
216
+ await new Promise((res, rej) => {
217
+ fileStream.end(() => res());
218
+ fileStream.on('error', rej);
219
+ });
220
+
221
+ // Rename .part to final filename
222
+ const { renameSync } = await import('fs');
223
+ renameSync(tempPath, destPath);
224
+
225
+ // Index the model
226
+ const quant = parseQuantization(filename);
227
+ const params = parseParameters(filename);
228
+ const contextWindow = guessContextWindow(filename);
229
+
230
+ const modelEntry = {
231
+ id: filename.replace('.gguf', ''),
232
+ filename,
233
+ repoId,
234
+ parameters: params,
235
+ quantization: quant,
236
+ contextWindow,
237
+ sizeBytes: totalBytes,
238
+ category: filename.toLowerCase().includes('code') ? 'code' : 'general',
239
+ tier: classifyTier(params, quant),
240
+ downloadedAt: new Date().toISOString(),
241
+ };
242
+
243
+ // Remove existing entry if re-downloading
244
+ this.index.models = this.index.models.filter((m) => m.filename !== filename);
245
+ this.index.models.push(modelEntry);
246
+ this.save();
247
+
248
+ this.downloads.delete(filename);
249
+
250
+ this.daemon?.broadcast({
251
+ type: 'model:download:complete',
252
+ data: { filename, repoId, model: modelEntry },
253
+ });
254
+
255
+ return modelEntry;
256
+ } catch (err) {
257
+ this.downloads.delete(filename);
258
+ if (err.name === 'AbortError') {
259
+ this.daemon?.broadcast({ type: 'model:download:cancelled', data: { filename } });
260
+ return null;
261
+ }
262
+ this.daemon?.broadcast({ type: 'model:download:error', data: { filename, error: err.message } });
263
+ throw err;
264
+ }
265
+ }
266
+
267
+ cancelDownload(filename) {
268
+ const download = this.downloads.get(filename);
269
+ if (download) {
270
+ download.controller.abort();
271
+ this.downloads.delete(filename);
272
+ return true;
273
+ }
274
+ return false;
275
+ }
276
+
277
+ getActiveDownloads() {
278
+ return Array.from(this.downloads.values()).map((d) => ({
279
+ filename: d.filename,
280
+ repoId: d.repoId,
281
+ downloaded: d.downloaded,
282
+ totalBytes: d.totalBytes,
283
+ percent: d.percent,
284
+ speed: d.speed,
285
+ }));
286
+ }
287
+
288
+ // --- Installed Model Management ---
289
+
290
+ getInstalled() {
291
+ return this.index.models.map((m) => ({
292
+ ...m,
293
+ exists: existsSync(resolve(this.modelsDir, m.filename)),
294
+ }));
295
+ }
296
+
297
+ getModel(id) {
298
+ return this.index.models.find((m) => m.id === id || m.filename === id) || null;
299
+ }
300
+
301
+ getModelPath(id) {
302
+ const model = this.getModel(id);
303
+ if (!model) return null;
304
+ const p = resolve(this.modelsDir, model.filename);
305
+ return existsSync(p) ? p : null;
306
+ }
307
+
308
+ deleteModel(id) {
309
+ const model = this.getModel(id);
310
+ if (!model) return false;
311
+
312
+ const p = resolve(this.modelsDir, model.filename);
313
+ if (existsSync(p)) {
314
+ try { unlinkSync(p); } catch { /* ignore */ }
315
+ }
316
+
317
+ // Also remove .part files
318
+ const partPath = p + '.part';
319
+ if (existsSync(partPath)) {
320
+ try { unlinkSync(partPath); } catch { /* ignore */ }
321
+ }
322
+
323
+ this.index.models = this.index.models.filter((m) => m.id !== model.id);
324
+ this.save();
325
+ return true;
326
+ }
327
+
328
+ // --- Hardware Recommendations ---
329
+
330
+ recommendQuantization(modelParams, availableRamGb) {
331
+ // Try quantizations from best quality to most compressed
332
+ const preferences = ['Q8_0', 'Q6_K', 'Q5_K_M', 'Q5_K_S', 'Q4_K_M', 'Q4_K_S', 'Q3_K_M', 'Q2_K'];
333
+ const params = parseParamsBillions(modelParams);
334
+ if (!params) return 'Q4_K_M'; // Safe default
335
+
336
+ for (const quant of preferences) {
337
+ const ramNeeded = params * (RAM_PER_BILLION[quant] || 1) + 1; // +1GB overhead
338
+ if (ramNeeded <= availableRamGb * 0.85) { // Leave 15% headroom
339
+ return quant;
340
+ }
341
+ }
342
+ return 'Q2_K'; // Smallest if nothing else fits
343
+ }
344
+
345
+ getStatus() {
346
+ return {
347
+ modelsDir: this.modelsDir,
348
+ installedCount: this.index.models.length,
349
+ activeDownloads: this.downloads.size,
350
+ };
351
+ }
352
+ }
353
+
354
+ // --- Parsing Utilities ---
355
+
356
+ function parseQuantization(filename) {
357
+ const lower = filename.toLowerCase();
358
+ const patterns = [
359
+ /[_-](q[2-8]_k_[sml])/i,
360
+ /[_-](q[2-8]_k)/i,
361
+ /[_-](q[2-8]_0)/i,
362
+ /[_-](f16)/i,
363
+ /[_-](f32)/i,
364
+ /[_-](iq[1-4]_[a-z]+)/i,
365
+ ];
366
+ for (const p of patterns) {
367
+ const match = lower.match(p);
368
+ if (match) return match[1].toUpperCase();
369
+ }
370
+ return null;
371
+ }
372
+
373
+ function parseParameters(filename) {
374
+ const match = filename.match(/(\d+\.?\d*)[bB]/);
375
+ if (match) return `${match[1]}B`;
376
+ // Try word forms
377
+ if (/7b/i.test(filename)) return '7B';
378
+ if (/14b/i.test(filename)) return '14B';
379
+ if (/32b/i.test(filename)) return '32B';
380
+ if (/70b/i.test(filename)) return '70B';
381
+ return null;
382
+ }
383
+
384
+ function parseParamsBillions(paramStr) {
385
+ if (!paramStr) return null;
386
+ const match = paramStr.match(/([\d.]+)/);
387
+ return match ? parseFloat(match[1]) : null;
388
+ }
389
+
390
+ function estimateRam(params, quant) {
391
+ const billions = parseParamsBillions(params);
392
+ if (!billions || !quant) return null;
393
+ const perB = RAM_PER_BILLION[quant] || RAM_PER_BILLION.Q4_K_M;
394
+ return Math.round((billions * perB + 1) * 10) / 10; // +1GB overhead, round to 1 decimal
395
+ }
396
+
397
+ function guessContextWindow(filename) {
398
+ const lower = filename.toLowerCase();
399
+ for (const [prefix, ctx] of Object.entries(CONTEXT_WINDOWS)) {
400
+ if (lower.includes(prefix)) return ctx;
401
+ }
402
+ return 32768; // Safe default
403
+ }
404
+
405
+ function classifyTier(params, quant) {
406
+ const billions = parseParamsBillions(params);
407
+ if (!billions) return 'medium';
408
+ if (billions >= 25) return 'heavy';
409
+ if (billions >= 10) return 'medium';
410
+ return 'light';
411
+ }