npm - groove-dev - Versions diffs - 0.27.151 → 0.27.153 - Mend

groove-dev 0.27.151 → 0.27.153

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/node_modules/@groove-dev/cli/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@groove-dev/cli",
-  "version": "0.27.151",
+  "version": "0.27.153",
   "description": "GROOVE CLI — manage AI coding agents from your terminal",
   "license": "FSL-1.1-Apache-2.0",
   "type": "module",

package/node_modules/@groove-dev/daemon/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@groove-dev/daemon",
-  "version": "0.27.151",
+  "version": "0.27.153",
   "description": "GROOVE daemon — agent orchestration engine",
   "license": "FSL-1.1-Apache-2.0",
   "type": "module",

package/node_modules/@groove-dev/daemon/src/agent-loop.js CHANGED Viewed

@@ -10,6 +10,11 @@ import { existsSync, readFileSync, writeFileSync, mkdirSync, unlinkSync } from '
 import { resolve, dirname } from 'path';
 import { TOOL_DEFINITIONS, ToolExecutor } from './tool-executor.js';
+function stripThinkTags(text) {
+  if (!text) return text;
+  return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
+}
 export class AgentLoop extends EventEmitter {
   constructor({ daemon, agent, loopConfig, logStream }) {
     super();
@@ -145,6 +150,9 @@ export class AgentLoop extends EventEmitter {
         this._updateTokens(usage);
       }
+      // Strip thinking tags from display content (keep raw in conversation history)
+      const displayContent = stripThinkTags(content);
       // In prompt-based mode, parse tool calls from the model's text
       if (this.toolMode === 'prompt' && content) {
         const parsed = this._parseToolCallsFromText(content);
@@ -153,7 +161,7 @@ export class AgentLoop extends EventEmitter {
         }
       }
-      // Append assistant message to conversation history
+      // Append assistant message to conversation history (raw content with thinking preserved)
       const assistantMsg = { role: 'assistant' };
       if (content) assistantMsg.content = content;
       if (this.toolMode === 'native' && toolCalls?.length > 0) {
@@ -167,20 +175,21 @@ export class AgentLoop extends EventEmitter {
       // No tool calls → turn complete, broadcast final text and go idle
       if (!toolCalls || toolCalls.length === 0) {
-        if (content) {
-          this._writeLog({ type: 'assistant', content: content.slice(0, 2000) });
+        if (displayContent) {
+          this._writeLog({ type: 'assistant', content: displayContent.slice(0, 2000) });
         }
-        this.emit('output', { type: 'result', subtype: 'assistant', data: content || 'Turn complete', turns: this.turns });
+        this.emit('output', { type: 'result', subtype: 'assistant', data: displayContent || 'Turn complete', turns: this.turns });
         break;
       }
       // Has tool calls — broadcast text before executing tools (if model sent text + tools)
-      const displayContent = this.toolMode === 'prompt'
-        ? (content || '').replace(/<tool_call>[\s\S]*?<\/tool_call>/g, '').trim()
-        : content;
-      if (displayContent) {
-        this._writeLog({ type: 'assistant', content: displayContent.slice(0, 2000) });
-        this.emit('output', { type: 'activity', subtype: 'assistant', data: displayContent });
+      let preToolText = displayContent;
+      if (this.toolMode === 'prompt') {
+        preToolText = stripThinkTags((content || '').replace(/<tool_call>[\s\S]*?<\/tool_call>/g, ''));
+      }
+      if (preToolText) {
+        this._writeLog({ type: 'assistant', content: preToolText.slice(0, 2000) });
+        this.emit('output', { type: 'activity', subtype: 'assistant', data: preToolText });
       }
       // Execute each tool call
@@ -188,13 +197,29 @@ export class AgentLoop extends EventEmitter {
         if (!this.running) break;
         let args;
+        let parseError = null;
         try {
           args = JSON.parse(call.function.arguments);
-        } catch {
+        } catch (e) {
+          parseError = e.message;
           args = {};
         }
         const toolName = call.function.name;
+        // Report malformed JSON back to the model instead of silently failing
+        if (parseError) {
+          const errMsg = `Invalid JSON in tool arguments: ${parseError}. Raw: ${call.function.arguments.slice(0, 200)}`;
+          this._writeLog({ type: 'tool_result', tool: toolName, success: false, output: errMsg });
+          this.emit('output', { type: 'activity', subtype: 'tool_result', data: [{ type: 'tool_result', name: toolName, success: false, output: errMsg }] });
+          if (this.toolMode === 'native') {
+            this.messages.push({ role: 'tool', tool_call_id: call.id, content: `Error: ${errMsg}` });
+          } else {
+            this.messages.push({ role: 'user', content: `<tool_result name="${toolName}">\nError: ${errMsg}\n</tool_result>` });
+          }
+          continue;
+        }
         const inputSummary = this._summarizeToolInput(toolName, args);
         // Log + broadcast tool invocation
@@ -226,8 +251,12 @@ export class AgentLoop extends EventEmitter {
           this.daemon.classifier.addEvent(this.agent.id, { type: 'error', text: result.error });
         }
-        // Append tool result to conversation for the model
-        const resultContent = result.success ? (result.result || 'Done.') : `Error: ${result.error}`;
+        // Append tool result to conversation — cap size to protect context window
+        const MAX_RESULT_CHARS = 30000;
+        let resultContent = result.success ? (result.result || 'Done.') : `Error: ${result.error}`;
+        if (resultContent.length > MAX_RESULT_CHARS) {
+          resultContent = resultContent.slice(0, MAX_RESULT_CHARS) + '\n... (result truncated — use offset/limit for large files, or pipe commands through head/tail)';
+        }
         if (this.toolMode === 'native') {
           this.messages.push({
             role: 'tool',
@@ -359,6 +388,10 @@ export class AgentLoop extends EventEmitter {
     let finishReason = null;
     let buffer = '';
+    // State machine for suppressing <think> blocks during streaming
+    let insideThink = false;
+    let streamBuf = '';
     const reader = response.body.getReader();
     const decoder = new TextDecoder();
@@ -388,10 +421,56 @@ export class AgentLoop extends EventEmitter {
           if (choice.finish_reason) finishReason = choice.finish_reason;
           const delta = choice.delta || {};
-          // Stream text tokens to GUI in real-time
+          // reasoning_content: separate thinking field (vLLM, some OpenAI-compat servers)
+          // Capture for logging but don't stream to GUI
+          if (delta.reasoning_content || delta.reasoning) {
+            // Accumulate in content so it's in the conversation history
+            // but don't stream it to the GUI
+            content += delta.reasoning_content || delta.reasoning;
+          }
+          // Stream text tokens to GUI in real-time, suppressing <think> blocks
           if (delta.content) {
             content += delta.content;
-            this.emit('output', { type: 'activity', subtype: 'stream', data: delta.content });
+            streamBuf += delta.content;
+            // Process buffer — emit non-think content, suppress think content
+            let safety = 0;
+            while (streamBuf.length > 0 && safety++ < 100) {
+              if (insideThink) {
+                const closeIdx = streamBuf.indexOf('</think>');
+                if (closeIdx >= 0) {
+                  insideThink = false;
+                  streamBuf = streamBuf.slice(closeIdx + 8);
+                } else {
+                  break; // wait for more data
+                }
+              } else {
+                const openIdx = streamBuf.indexOf('<think>');
+                if (openIdx >= 0) {
+                  const before = streamBuf.slice(0, openIdx);
+                  if (before) {
+                    this.emit('output', { type: 'activity', subtype: 'stream', data: before });
+                  }
+                  insideThink = true;
+                  streamBuf = streamBuf.slice(openIdx + 7);
+                } else {
+                  // Hold back bytes that could be the start of a <think> tag
+                  let safeEnd = streamBuf.length;
+                  for (let i = Math.min(6, streamBuf.length); i >= 1; i--) {
+                    if ('<think>'.startsWith(streamBuf.slice(-i))) {
+                      safeEnd = streamBuf.length - i;
+                      break;
+                    }
+                  }
+                  if (safeEnd > 0) {
+                    this.emit('output', { type: 'activity', subtype: 'stream', data: streamBuf.slice(0, safeEnd) });
+                  }
+                  streamBuf = streamBuf.slice(safeEnd);
+                  break;
+                }
+              }
+            }
           }
           // Accumulate tool call deltas
@@ -419,6 +498,11 @@ export class AgentLoop extends EventEmitter {
       return null;
     }
+    // Flush remaining stream buffer (e.g. unclosed <think> — treat as display content)
+    if (streamBuf) {
+      this.emit('output', { type: 'activity', subtype: 'stream', data: streamBuf });
+    }
     return {
       content: content || null,
       toolCalls: toolCalls.size > 0 ? Array.from(toolCalls.values()) : null,

package/node_modules/@groove-dev/daemon/src/llama-server.js CHANGED Viewed

@@ -5,7 +5,10 @@
 // Each model gets its own server on a unique port.
 // Auto-starts when an agent needs a GGUF model, auto-stops when idle.
-import { spawn, execSync } from 'child_process';
+import { spawn, execSync, execFileSync } from 'child_process';
+import { existsSync, mkdirSync, chmodSync } from 'fs';
+import { resolve } from 'path';
+import { homedir } from 'os';
 const BASE_PORT = 8081;
 const MAX_SERVERS = 5;
@@ -25,10 +28,98 @@ export class LlamaServerManager {
       execSync('which llama-server', { stdio: 'ignore' });
       return true;
     } catch {
-      return false;
+      // Check common manual install locations
+      const paths = [
+        resolve(homedir(), '.local', 'bin', 'llama-server'),
+        resolve(homedir(), '.groove', 'bin', 'llama-server'),
+        '/usr/local/bin/llama-server',
+      ];
+      return paths.some(p => existsSync(p));
     }
   }
+  static getLlamaServerPath() {
+    try {
+      return execSync('which llama-server', { stdio: 'pipe', encoding: 'utf8' }).trim();
+    } catch {
+      const paths = [
+        resolve(homedir(), '.local', 'bin', 'llama-server'),
+        resolve(homedir(), '.groove', 'bin', 'llama-server'),
+        '/usr/local/bin/llama-server',
+      ];
+      return paths.find(p => existsSync(p)) || 'llama-server';
+    }
+  }
+  static async install() {
+    const platform = process.platform;
+    if (platform === 'darwin') {
+      try {
+        execSync('which brew', { stdio: 'ignore' });
+      } catch {
+        throw new Error('Homebrew not found. Install it from https://brew.sh then retry.');
+      }
+      execSync('brew install llama.cpp', { stdio: 'pipe', timeout: 600000 });
+      return { method: 'brew', path: execSync('which llama-server', { encoding: 'utf8', stdio: 'pipe' }).trim() };
+    }
+    if (platform === 'linux') {
+      const installDir = resolve(homedir(), '.local', 'bin');
+      mkdirSync(installDir, { recursive: true });
+      const arch = process.arch === 'arm64' ? 'arm64' : 'x64';
+      const hasCuda = (() => { try { execSync('which nvidia-smi', { stdio: 'ignore' }); return true; } catch { return false; } })();
+      const resp = await fetch('https://api.github.com/repos/ggml-org/llama.cpp/releases/latest', {
+        headers: { 'User-Agent': 'groove-dev' },
+      });
+      if (!resp.ok) throw new Error(`GitHub API error: ${resp.status}`);
+      const release = await resp.json();
+      const suffix = hasCuda ? `ubuntu-${arch}-cuda` : `ubuntu-${arch}`;
+      let asset = release.assets.find(a => a.name.includes(suffix) && a.name.endsWith('.zip'));
+      if (!asset && hasCuda) {
+        asset = release.assets.find(a => a.name.includes(`ubuntu-${arch}`) && a.name.endsWith('.zip'));
+      }
+      if (!asset) {
+        asset = release.assets.find(a => a.name.includes('ubuntu') && a.name.includes(arch) && a.name.endsWith('.zip'));
+      }
+      if (!asset) throw new Error(`No pre-built binary found for linux-${arch}. Build from source: https://github.com/ggml-org/llama.cpp#build`);
+      const tmpZip = `/tmp/groove-llama-${Date.now()}.zip`;
+      const tmpDir = `/tmp/groove-llama-extract-${Date.now()}`;
+      execSync(`curl -fSL "${asset.browser_download_url}" -o "${tmpZip}"`, { stdio: 'pipe', timeout: 600000 });
+      execSync(`unzip -o "${tmpZip}" -d "${tmpDir}"`, { stdio: 'pipe', timeout: 60000 });
+      const findResult = execSync(`find "${tmpDir}" -name llama-server -type f`, { encoding: 'utf8', stdio: 'pipe' }).trim();
+      const binPath = findResult.split('\n')[0];
+      if (!binPath) throw new Error('llama-server binary not found in release archive');
+      const destPath = resolve(installDir, 'llama-server');
+      execSync(`cp "${binPath}" "${destPath}"`, { stdio: 'pipe' });
+      chmodSync(destPath, 0o755);
+      // Copy shared libraries if present
+      try {
+        const libDir = resolve(binPath, '..', '..', 'lib');
+        if (existsSync(libDir)) {
+          const userLibDir = resolve(homedir(), '.local', 'lib');
+          mkdirSync(userLibDir, { recursive: true });
+          execSync(`cp -r "${libDir}/"* "${userLibDir}/"`, { stdio: 'pipe' });
+        }
+      } catch { /* libs are optional */ }
+      // Cleanup
+      try { execSync(`rm -rf "${tmpZip}" "${tmpDir}"`, { stdio: 'ignore' }); } catch { /* best-effort */ }
+      return { method: 'github-release', path: destPath, cuda: hasCuda, release: release.tag_name };
+    }
+    throw new Error(`Automatic install not supported on ${platform}. Install llama-server manually: https://github.com/ggml-org/llama.cpp#build`);
+  }
   // --- Server Lifecycle ---
   /**
@@ -74,9 +165,11 @@ export class LlamaServerManager {
       args.push('--flash-attn', 'auto');
     }
-    const proc = spawn('llama-server', args, {
+    const serverBin = LlamaServerManager.getLlamaServerPath();
+    const proc = spawn(serverBin, args, {
       stdio: ['ignore', 'pipe', 'pipe'],
       detached: false,
+      env: { ...process.env, LD_LIBRARY_PATH: [resolve(homedir(), '.local', 'lib'), process.env.LD_LIBRARY_PATH].filter(Boolean).join(':') },
     });
     if (!proc.pid) {

package/node_modules/@groove-dev/daemon/src/model-manager.js CHANGED Viewed

@@ -69,7 +69,6 @@ export class ModelManager {
   async search(query, { limit = 20, sort = 'downloads' } = {}) {
     const params = new URLSearchParams({
       search: query,
-      filter: 'gguf',
       sort,
       direction: '-1',
       limit: String(limit),
@@ -83,15 +82,20 @@ export class ModelManager {
     if (!res.ok) throw new Error(`HuggingFace API error: ${res.status}`);
     const models = await res.json();
-    return models.map((m) => ({
-      id: m.modelId || m.id,
-      name: m.modelId?.split('/').pop() || m.id,
-      author: m.modelId?.split('/')[0] || '',
-      downloads: m.downloads || 0,
-      likes: m.likes || 0,
-      tags: m.tags || [],
-      lastModified: m.lastModified,
-    }));
+    return models.map((m) => {
+      const id = m.modelId || m.id;
+      const tags = m.tags || [];
+      return {
+        id,
+        name: id.split('/').pop() || id,
+        author: id.split('/')[0] || '',
+        downloads: m.downloads || 0,
+        likes: m.likes || 0,
+        tags,
+        lastModified: m.lastModified,
+        recommendedRuntimes: inferRuntimes(id, tags),
+      };
+    });
   }
   async getModelFiles(repoId) {
@@ -409,3 +413,41 @@ function classifyTier(params, quant) {
   if (billions >= 10) return 'medium';
   return 'light';
 }
+function inferRuntimes(repoId, tags) {
+  const lower = repoId.toLowerCase();
+  const tagSet = new Set(tags.map((t) => t.toLowerCase()));
+  const runtimes = new Set();
+  // GGUF → llama.cpp and (implicitly) Ollama
+  if (tagSet.has('gguf') || lower.includes('-gguf') || lower.includes('_gguf')) {
+    runtimes.add('llama.cpp');
+  }
+  // MLX-optimized models
+  if (tagSet.has('mlx') || lower.includes('-mlx') || lower.includes('_mlx')) {
+    runtimes.add('MLX');
+  }
+  // GPTQ / AWQ quantized → vLLM handles these well
+  if (tagSet.has('gptq') || tagSet.has('awq') || lower.includes('-gptq') || lower.includes('-awq')) {
+    runtimes.add('vLLM');
+  }
+  // SafeTensors / standard transformer weights → vLLM, TGI, MLX
+  if (tagSet.has('safetensors') || tagSet.has('transformers')) {
+    runtimes.add('vLLM');
+    runtimes.add('TGI');
+    if (!runtimes.has('MLX')) runtimes.add('MLX');
+  }
+  // If nothing matched, infer from general model traits
+  if (runtimes.size === 0) {
+    if (tagSet.has('pytorch') || tagSet.has('tf') || tagSet.has('jax')) {
+      runtimes.add('vLLM');
+      runtimes.add('TGI');
+    }
+  }
+  return [...runtimes];
+}

package/node_modules/@groove-dev/daemon/src/routes/providers.js CHANGED Viewed

@@ -713,6 +713,17 @@ export function registerProviderRoutes(app, daemon) {
     res.json(daemon.llamaServer.getStatus());
   });
+  app.post('/api/llama/install', async (req, res) => {
+    try {
+      const { LlamaServerManager } = await import('../llama-server.js');
+      const result = await LlamaServerManager.install();
+      daemon.modelLab.refreshInstalledTools();
+      res.json({ success: true, ...result });
+    } catch (err) {
+      res.status(500).json({ error: err.message });
+    }
+  });
   app.get('/api/mlx/status', (req, res) => {
     res.json(daemon.mlxServer.getStatus());
   });

package/node_modules/@groove-dev/daemon/src/tool-executor.js CHANGED Viewed

@@ -1,7 +1,7 @@
 // GROOVE — Tool Executor for Local Agent Loop
 // FSL-1.1-Apache-2.0 — see LICENSE
-import { readFileSync, writeFileSync, readdirSync, statSync, mkdirSync, existsSync } from 'fs';
+import { readFileSync, writeFileSync, readdirSync, statSync, mkdirSync, existsSync, openSync, readSync, closeSync } from 'fs';
 import { execSync } from 'child_process';
 import { resolve, relative, dirname, sep } from 'path';
 import { minimatch } from 'minimatch';
@@ -190,11 +190,24 @@ export class ToolExecutor {
     if (stat.isDirectory()) {
       return { success: false, error: `Path is a directory, not a file: ${filePath}` };
     }
-    // Guard against huge files
     if (stat.size > 5 * 1024 * 1024) {
       return { success: false, error: `File too large (${formatBytes(stat.size)}). Use offset/limit to read a section.` };
     }
+    // Detect binary files — check first 8KB for null bytes
+    if (stat.size > 0) {
+      const probe = Buffer.alloc(Math.min(8192, stat.size));
+      const fd = openSync(resolved, 'r');
+      try {
+        readSync(fd, probe, 0, probe.length, 0);
+      } finally {
+        closeSync(fd);
+      }
+      if (probe.includes(0)) {
+        return { success: false, error: `Binary file (${formatBytes(stat.size)}). Cannot read non-text files.` };
+      }
+    }
     const content = readFileSync(resolved, 'utf8');
     let lines = content.split('\n');
     const totalLines = lines.length;
@@ -207,8 +220,19 @@ export class ToolExecutor {
       lines = lines.slice(0, limit);
     }
+    // Auto-limit large files when no explicit limit was provided
+    const MAX_LINES = 2000;
+    let autoTruncated = false;
+    if (!limit && lines.length > MAX_LINES) {
+      lines = lines.slice(0, MAX_LINES);
+      autoTruncated = true;
+    }
     const numbered = lines.map((line, i) => `${startLine + i}\t${line}`).join('\n');
-    return { success: true, result: numbered, meta: { totalLines } };
+    const result = autoTruncated
+      ? numbered + `\n\n... (showing ${MAX_LINES} of ${totalLines} lines — use offset/limit to read more)`
+      : numbered;
+    return { success: true, result, meta: { totalLines } };
   }
   writeFile({ path: filePath, content }) {