npm - groove-dev - Versions diffs - 0.25.20 → 0.26.0 - Mend

groove-dev 0.25.20 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/packages/daemon/src/llama-server.js ADDED Viewed

@@ -0,0 +1,268 @@
+// GROOVE — llama-server Process Manager
+// FSL-1.1-Apache-2.0 — see LICENSE
+//
+// Manages llama-server (llama.cpp) inference server instances.
+// Each model gets its own server on a unique port.
+// Auto-starts when an agent needs a GGUF model, auto-stops when idle.
+import { spawn, execSync } from 'child_process';
+const BASE_PORT = 8081;
+const MAX_SERVERS = 5;
+const HEALTH_TIMEOUT = 30000; // 30s for model loading
+const HEALTH_POLL_INTERVAL = 500; // Check every 500ms
+const IDLE_TIMEOUT = 300000; // 5 minutes before auto-stop
+export class LlamaServerManager {
+  constructor(daemon) {
+    this.daemon = daemon;
+    this.servers = new Map(); // modelPath -> { proc, port, users, startedAt, lastUsed }
+    this.nextPort = BASE_PORT;
+  }
+  static isInstalled() {
+    try {
+      execSync('which llama-server', { stdio: 'ignore' });
+      return true;
+    } catch {
+      return false;
+    }
+  }
+  // --- Server Lifecycle ---
+  /**
+   * Ensure a llama-server is running for the given model.
+   * Returns the OpenAI-compatible API base URL.
+   * Starts the server if not already running.
+   */
+  async ensureServer(modelPath, options = {}) {
+    // Reuse existing server
+    if (this.servers.has(modelPath)) {
+      const server = this.servers.get(modelPath);
+      server.users++;
+      server.lastUsed = Date.now();
+      return `http://127.0.0.1:${server.port}/v1`;
+    }
+    // Check capacity
+    if (this.servers.size >= MAX_SERVERS) {
+      // Stop least recently used server
+      await this._evictLRU();
+    }
+    // Allocate port
+    const port = this._allocatePort();
+    // Start llama-server
+    const ctxSize = options.contextWindow || 32768;
+    const nGpuLayers = options.gpuLayers ?? -1; // -1 = auto (offload all to GPU)
+    const parallel = options.parallel || 2;
+    const args = [
+      '--model', modelPath,
+      '--port', String(port),
+      '--host', '127.0.0.1',
+      '--ctx-size', String(ctxSize),
+      '--n-gpu-layers', String(nGpuLayers),
+      '--parallel', String(parallel),
+      '--log-disable', // Reduce noise
+    ];
+    // Flash attention for better memory efficiency (if supported)
+    if (options.flashAttention !== false) {
+      args.push('--flash-attn');
+    }
+    const proc = spawn('llama-server', args, {
+      stdio: ['ignore', 'pipe', 'pipe'],
+      detached: false,
+    });
+    if (!proc.pid) {
+      throw new Error('Failed to start llama-server — check installation');
+    }
+    const server = {
+      proc,
+      port,
+      modelPath,
+      users: 1,
+      startedAt: Date.now(),
+      lastUsed: Date.now(),
+      ready: false,
+    };
+    this.servers.set(modelPath, server);
+    // Capture stderr for debugging
+    const stderrBuf = [];
+    proc.stderr.on('data', (chunk) => {
+      stderrBuf.push(chunk.toString());
+      if (stderrBuf.join('').length > 4096) stderrBuf.shift();
+    });
+    proc.on('exit', (code, signal) => {
+      this.servers.delete(modelPath);
+      this.daemon?.broadcast({
+        type: 'llama:server:stopped',
+        data: { modelPath, port, code, signal },
+      });
+    });
+    // Wait for server to be ready
+    try {
+      await this._waitForHealth(port);
+      server.ready = true;
+      this.daemon?.broadcast({
+        type: 'llama:server:ready',
+        data: { modelPath, port },
+      });
+      return `http://127.0.0.1:${port}/v1`;
+    } catch (err) {
+      // Server failed to start
+      await this.stopServer(modelPath);
+      const stderr = stderrBuf.join('').slice(-500);
+      throw new Error(`llama-server failed to start: ${stderr || err.message}`);
+    }
+  }
+  /**
+   * Release a server (decrement user count).
+   * Server auto-stops after IDLE_TIMEOUT if no users.
+   */
+  releaseServer(modelPath) {
+    const server = this.servers.get(modelPath);
+    if (!server) return;
+    server.users = Math.max(0, server.users - 1);
+    server.lastUsed = Date.now();
+    // Schedule auto-stop check
+    if (server.users === 0) {
+      setTimeout(() => {
+        const s = this.servers.get(modelPath);
+        if (s && s.users === 0 && Date.now() - s.lastUsed >= IDLE_TIMEOUT) {
+          this.stopServer(modelPath);
+        }
+      }, IDLE_TIMEOUT + 1000);
+    }
+  }
+  async stopServer(modelPath) {
+    const server = this.servers.get(modelPath);
+    if (!server) return false;
+    return new Promise((resolve) => {
+      const timeout = setTimeout(() => {
+        try { server.proc.kill('SIGKILL'); } catch {}
+      }, 5000);
+      server.proc.on('exit', () => {
+        clearTimeout(timeout);
+        this.servers.delete(modelPath);
+        resolve(true);
+      });
+      try {
+        server.proc.kill('SIGTERM');
+      } catch {
+        clearTimeout(timeout);
+        this.servers.delete(modelPath);
+        resolve(true);
+      }
+    });
+  }
+  async stopAll() {
+    const paths = Array.from(this.servers.keys());
+    await Promise.all(paths.map((p) => this.stopServer(p)));
+  }
+  // --- Health Check ---
+  async _waitForHealth(port) {
+    const start = Date.now();
+    while (Date.now() - start < HEALTH_TIMEOUT) {
+      try {
+        const res = await fetch(`http://127.0.0.1:${port}/health`, {
+          signal: AbortSignal.timeout(2000),
+        });
+        if (res.ok) {
+          const data = await res.json().catch(() => ({}));
+          if (data.status === 'ok' || data.status === 'no slot available') {
+            return true;
+          }
+        }
+      } catch {
+        // Server still loading
+      }
+      await new Promise((r) => setTimeout(r, HEALTH_POLL_INTERVAL));
+    }
+    throw new Error(`llama-server health check timed out after ${HEALTH_TIMEOUT / 1000}s`);
+  }
+  async healthCheck(modelPath) {
+    const server = this.servers.get(modelPath);
+    if (!server) return { running: false };
+    try {
+      const res = await fetch(`http://127.0.0.1:${server.port}/health`, {
+        signal: AbortSignal.timeout(3000),
+      });
+      const data = await res.json().catch(() => ({}));
+      return { running: true, ready: server.ready, port: server.port, status: data.status };
+    } catch {
+      return { running: true, ready: false, port: server.port, status: 'unreachable' };
+    }
+  }
+  // --- Port Management ---
+  _allocatePort() {
+    const usedPorts = new Set(Array.from(this.servers.values()).map((s) => s.port));
+    let port = BASE_PORT;
+    while (usedPorts.has(port) && port < BASE_PORT + 100) {
+      port++;
+    }
+    return port;
+  }
+  async _evictLRU() {
+    // Find the server with fewest users, then oldest lastUsed
+    let lru = null;
+    for (const [path, server] of this.servers) {
+      if (!lru || server.users < lru.users ||
+          (server.users === lru.users && server.lastUsed < lru.lastUsed)) {
+        lru = { path, ...server };
+      }
+    }
+    if (lru) {
+      await this.stopServer(lru.path);
+    }
+  }
+  // --- Status ---
+  getRunningServers() {
+    return Array.from(this.servers.entries()).map(([modelPath, s]) => ({
+      modelPath,
+      port: s.port,
+      users: s.users,
+      ready: s.ready,
+      uptime: Date.now() - s.startedAt,
+      lastUsed: s.lastUsed,
+    }));
+  }
+  getStatus() {
+    return {
+      installed: LlamaServerManager.isInstalled(),
+      running: this.servers.size,
+      maxServers: MAX_SERVERS,
+      servers: this.getRunningServers(),
+    };
+  }
+}

package/packages/daemon/src/model-manager.js ADDED Viewed

@@ -0,0 +1,411 @@
+// GROOVE — Model Manager (HuggingFace + Local GGUF Storage)
+// FSL-1.1-Apache-2.0 — see LICENSE
+//
+// Manages local model storage, HuggingFace integration for searching/downloading
+// GGUF models, and metadata indexing. Models live in ~/.groove/models/.
+import { existsSync, mkdirSync, readFileSync, writeFileSync, unlinkSync, statSync, createWriteStream } from 'fs';
+import { resolve, basename } from 'path';
+import { homedir } from 'os';
+import { pipeline } from 'stream/promises';
+import { Readable } from 'stream';
+const MODELS_DIR = resolve(homedir(), '.groove', 'models');
+const INDEX_PATH = resolve(MODELS_DIR, 'models.json');
+// Known context windows for popular model families
+const CONTEXT_WINDOWS = {
+  'qwen2.5-coder': 32768,
+  'qwen3': 32768,
+  'deepseek': 65536,
+  'llama': 131072,
+  'mistral': 32768,
+  'codestral': 32768,
+  'gemma': 32768,
+  'phi': 128000,
+  'starcoder': 8192,
+};
+// Approximate RAM requirements per billion parameters at different quantization levels
+const RAM_PER_BILLION = {
+  Q2_K: 0.5, Q3_K_S: 0.55, Q3_K_M: 0.6, Q3_K_L: 0.65,
+  Q4_0: 0.7, Q4_K_S: 0.75, Q4_K_M: 0.8,
+  Q5_0: 0.85, Q5_K_S: 0.9, Q5_K_M: 0.95,
+  Q6_K: 1.05, Q8_0: 1.2,
+  F16: 2.0, F32: 4.0,
+};
+export class ModelManager {
+  constructor(daemon) {
+    this.daemon = daemon;
+    this.modelsDir = MODELS_DIR;
+    this.indexPath = INDEX_PATH;
+    this.downloads = new Map(); // filename -> { progress, controller, ... }
+    this.index = { models: [] };
+    mkdirSync(this.modelsDir, { recursive: true });
+    this.load();
+  }
+  // --- Index Persistence ---
+  load() {
+    if (existsSync(this.indexPath)) {
+      try {
+        this.index = JSON.parse(readFileSync(this.indexPath, 'utf8'));
+        if (!Array.isArray(this.index.models)) this.index.models = [];
+      } catch {
+        this.index = { models: [] };
+      }
+    }
+  }
+  save() {
+    writeFileSync(this.indexPath, JSON.stringify(this.index, null, 2));
+  }
+  // --- HuggingFace API ---
+  async search(query, { limit = 20, sort = 'downloads' } = {}) {
+    const params = new URLSearchParams({
+      search: query,
+      filter: 'gguf',
+      sort,
+      direction: '-1',
+      limit: String(limit),
+    });
+    const res = await fetch(`https://huggingface.co/api/models?${params}`, {
+      headers: { 'User-Agent': 'GROOVE-ModelManager/1.0' },
+      signal: AbortSignal.timeout(15000),
+    });
+    if (!res.ok) throw new Error(`HuggingFace API error: ${res.status}`);
+    const models = await res.json();
+    return models.map((m) => ({
+      id: m.modelId || m.id,
+      name: m.modelId?.split('/').pop() || m.id,
+      author: m.modelId?.split('/')[0] || '',
+      downloads: m.downloads || 0,
+      likes: m.likes || 0,
+      tags: m.tags || [],
+      lastModified: m.lastModified,
+    }));
+  }
+  async getModelFiles(repoId) {
+    const res = await fetch(`https://huggingface.co/api/models/${repoId}`, {
+      headers: { 'User-Agent': 'GROOVE-ModelManager/1.0' },
+      signal: AbortSignal.timeout(15000),
+    });
+    if (!res.ok) throw new Error(`Model not found: ${repoId}`);
+    const data = await res.json();
+    const files = (data.siblings || [])
+      .filter((f) => f.rfilename.endsWith('.gguf'))
+      .map((f) => {
+        const filename = f.rfilename;
+        const quant = parseQuantization(filename);
+        const params = parseParameters(filename);
+        return {
+          filename,
+          size: f.size || 0,
+          quantization: quant,
+          parameters: params,
+          estimatedRamGb: estimateRam(params, quant),
+        };
+      })
+      .sort((a, b) => a.size - b.size);
+    return {
+      repoId,
+      name: data.modelId?.split('/').pop() || repoId,
+      author: data.modelId?.split('/')[0] || '',
+      files,
+    };
+  }
+  // --- Download Management ---
+  async download(repoId, filename, onProgress) {
+    if (this.downloads.has(filename)) {
+      throw new Error(`Already downloading: ${filename}`);
+    }
+    const url = `https://huggingface.co/${repoId}/resolve/main/${filename}`;
+    const destPath = resolve(this.modelsDir, filename);
+    const tempPath = destPath + '.part';
+    const controller = new AbortController();
+    // Check for partial download (resume support)
+    let startByte = 0;
+    if (existsSync(tempPath)) {
+      try { startByte = statSync(tempPath).size; } catch { startByte = 0; }
+    }
+    const headers = { 'User-Agent': 'GROOVE-ModelManager/1.0' };
+    if (startByte > 0) {
+      headers.Range = `bytes=${startByte}-`;
+    }
+    const downloadState = {
+      filename,
+      repoId,
+      downloaded: startByte,
+      totalBytes: 0,
+      percent: 0,
+      speed: 0,
+      startedAt: Date.now(),
+      controller,
+    };
+    this.downloads.set(filename, downloadState);
+    try {
+      const res = await fetch(url, { headers, signal: controller.signal });
+      if (!res.ok && res.status !== 206) {
+        throw new Error(`Download failed: HTTP ${res.status}`);
+      }
+      const contentLength = Number(res.headers.get('content-length') || 0);
+      const totalBytes = startByte + contentLength;
+      downloadState.totalBytes = totalBytes;
+      const fileStream = createWriteStream(tempPath, {
+        flags: startByte > 0 ? 'a' : 'w',
+      });
+      let lastProgressTime = Date.now();
+      let lastProgressBytes = startByte;
+      // Stream the download with progress tracking
+      const reader = res.body.getReader();
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        fileStream.write(Buffer.from(value));
+        downloadState.downloaded += value.length;
+        downloadState.percent = totalBytes > 0 ? downloadState.downloaded / totalBytes : 0;
+        // Calculate speed every 500ms
+        const now = Date.now();
+        if (now - lastProgressTime > 500) {
+          const elapsed = (now - lastProgressTime) / 1000;
+          downloadState.speed = (downloadState.downloaded - lastProgressBytes) / elapsed;
+          lastProgressTime = now;
+          lastProgressBytes = downloadState.downloaded;
+          if (onProgress) onProgress({ ...downloadState });
+          // Broadcast progress to GUI
+          this.daemon?.broadcast({
+            type: 'model:download:progress',
+            data: {
+              filename, repoId,
+              downloaded: downloadState.downloaded,
+              totalBytes, percent: downloadState.percent,
+              speed: downloadState.speed,
+            },
+          });
+        }
+      }
+      await new Promise((res, rej) => {
+        fileStream.end(() => res());
+        fileStream.on('error', rej);
+      });
+      // Rename .part to final filename
+      const { renameSync } = await import('fs');
+      renameSync(tempPath, destPath);
+      // Index the model
+      const quant = parseQuantization(filename);
+      const params = parseParameters(filename);
+      const contextWindow = guessContextWindow(filename);
+      const modelEntry = {
+        id: filename.replace('.gguf', ''),
+        filename,
+        repoId,
+        parameters: params,
+        quantization: quant,
+        contextWindow,
+        sizeBytes: totalBytes,
+        category: filename.toLowerCase().includes('code') ? 'code' : 'general',
+        tier: classifyTier(params, quant),
+        downloadedAt: new Date().toISOString(),
+      };
+      // Remove existing entry if re-downloading
+      this.index.models = this.index.models.filter((m) => m.filename !== filename);
+      this.index.models.push(modelEntry);
+      this.save();
+      this.downloads.delete(filename);
+      this.daemon?.broadcast({
+        type: 'model:download:complete',
+        data: { filename, repoId, model: modelEntry },
+      });
+      return modelEntry;
+    } catch (err) {
+      this.downloads.delete(filename);
+      if (err.name === 'AbortError') {
+        this.daemon?.broadcast({ type: 'model:download:cancelled', data: { filename } });
+        return null;
+      }
+      this.daemon?.broadcast({ type: 'model:download:error', data: { filename, error: err.message } });
+      throw err;
+    }
+  }
+  cancelDownload(filename) {
+    const download = this.downloads.get(filename);
+    if (download) {
+      download.controller.abort();
+      this.downloads.delete(filename);
+      return true;
+    }
+    return false;
+  }
+  getActiveDownloads() {
+    return Array.from(this.downloads.values()).map((d) => ({
+      filename: d.filename,
+      repoId: d.repoId,
+      downloaded: d.downloaded,
+      totalBytes: d.totalBytes,
+      percent: d.percent,
+      speed: d.speed,
+    }));
+  }
+  // --- Installed Model Management ---
+  getInstalled() {
+    return this.index.models.map((m) => ({
+      ...m,
+      exists: existsSync(resolve(this.modelsDir, m.filename)),
+    }));
+  }
+  getModel(id) {
+    return this.index.models.find((m) => m.id === id || m.filename === id) || null;
+  }
+  getModelPath(id) {
+    const model = this.getModel(id);
+    if (!model) return null;
+    const p = resolve(this.modelsDir, model.filename);
+    return existsSync(p) ? p : null;
+  }
+  deleteModel(id) {
+    const model = this.getModel(id);
+    if (!model) return false;
+    const p = resolve(this.modelsDir, model.filename);
+    if (existsSync(p)) {
+      try { unlinkSync(p); } catch { /* ignore */ }
+    }
+    // Also remove .part files
+    const partPath = p + '.part';
+    if (existsSync(partPath)) {
+      try { unlinkSync(partPath); } catch { /* ignore */ }
+    }
+    this.index.models = this.index.models.filter((m) => m.id !== model.id);
+    this.save();
+    return true;
+  }
+  // --- Hardware Recommendations ---
+  recommendQuantization(modelParams, availableRamGb) {
+    // Try quantizations from best quality to most compressed
+    const preferences = ['Q8_0', 'Q6_K', 'Q5_K_M', 'Q5_K_S', 'Q4_K_M', 'Q4_K_S', 'Q3_K_M', 'Q2_K'];
+    const params = parseParamsBillions(modelParams);
+    if (!params) return 'Q4_K_M'; // Safe default
+    for (const quant of preferences) {
+      const ramNeeded = params * (RAM_PER_BILLION[quant] || 1) + 1; // +1GB overhead
+      if (ramNeeded <= availableRamGb * 0.85) { // Leave 15% headroom
+        return quant;
+      }
+    }
+    return 'Q2_K'; // Smallest if nothing else fits
+  }
+  getStatus() {
+    return {
+      modelsDir: this.modelsDir,
+      installedCount: this.index.models.length,
+      activeDownloads: this.downloads.size,
+    };
+  }
+}
+// --- Parsing Utilities ---
+function parseQuantization(filename) {
+  const lower = filename.toLowerCase();
+  const patterns = [
+    /[_-](q[2-8]_k_[sml])/i,
+    /[_-](q[2-8]_k)/i,
+    /[_-](q[2-8]_0)/i,
+    /[_-](f16)/i,
+    /[_-](f32)/i,
+    /[_-](iq[1-4]_[a-z]+)/i,
+  ];
+  for (const p of patterns) {
+    const match = lower.match(p);
+    if (match) return match[1].toUpperCase();
+  }
+  return null;
+}
+function parseParameters(filename) {
+  const match = filename.match(/(\d+\.?\d*)[bB]/);
+  if (match) return `${match[1]}B`;
+  // Try word forms
+  if (/7b/i.test(filename)) return '7B';
+  if (/14b/i.test(filename)) return '14B';
+  if (/32b/i.test(filename)) return '32B';
+  if (/70b/i.test(filename)) return '70B';
+  return null;
+}
+function parseParamsBillions(paramStr) {
+  if (!paramStr) return null;
+  const match = paramStr.match(/([\d.]+)/);
+  return match ? parseFloat(match[1]) : null;
+}
+function estimateRam(params, quant) {
+  const billions = parseParamsBillions(params);
+  if (!billions || !quant) return null;
+  const perB = RAM_PER_BILLION[quant] || RAM_PER_BILLION.Q4_K_M;
+  return Math.round((billions * perB + 1) * 10) / 10; // +1GB overhead, round to 1 decimal
+}
+function guessContextWindow(filename) {
+  const lower = filename.toLowerCase();
+  for (const [prefix, ctx] of Object.entries(CONTEXT_WINDOWS)) {
+    if (lower.includes(prefix)) return ctx;
+  }
+  return 32768; // Safe default
+}
+function classifyTier(params, quant) {
+  const billions = parseParamsBillions(params);
+  if (!billions) return 'medium';
+  if (billions >= 25) return 'heavy';
+  if (billions >= 10) return 'medium';
+  return 'light';
+}