npm - groove-dev - Versions diffs - 0.27.134 → 0.27.136 - Mend

groove-dev 0.27.134 → 0.27.136

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

package/packages/cli/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@groove-dev/cli",
-  "version": "0.27.134",
+  "version": "0.27.136",
   "description": "GROOVE CLI — manage AI coding agents from your terminal",
   "license": "FSL-1.1-Apache-2.0",
   "type": "module",

package/packages/daemon/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@groove-dev/daemon",
-  "version": "0.27.134",
+  "version": "0.27.136",
   "description": "GROOVE daemon — agent orchestration engine",
   "license": "FSL-1.1-Apache-2.0",
   "type": "module",

package/packages/daemon/src/adaptive.js CHANGED Viewed

@@ -137,6 +137,20 @@ export class AdaptiveThresholds {
     const filesWritten = signals.filesWritten || 0;
     score += Math.min(filesWritten * 2, 10); // Cap at +10
+    // Output length decay: assistant responses shrinking dramatically
+    if (signals.outputLengthDecay) score -= 10;
+    // Tool output volume: bloated context from large tool results
+    const toolVol = signals.toolOutputVolume || 0;
+    if (toolVol === 2) score -= 10;
+    else if (toolVol === 1) score -= 5;
+    // Turn latency trend: agent slowing down significantly
+    if (signals.turnLatencyTrend) score -= 5;
+    // Bash repetition: agent stuck running identical commands
+    if (signals.bashRepetition) score -= 8;
     // Clamp to 0-100
     return Math.max(0, Math.min(100, score));
   }
@@ -165,20 +179,43 @@ export class AdaptiveThresholds {
       filesWritten: 0,
       fileChurn: 0,         // same file written 3+ times → possible circular refactoring
       errorTrend: 0,        // errors increasing in recent window → degradation signal
+      outputLengthDecay: 0, // last 5 assistant turns avg <50% of first 5 → declining output
+      toolOutputVolume: 0,  // cumulative tool result chars (>300KB = bloated context)
+      turnLatencyTrend: 0,  // avg gap in last 10 entries >2x first 10 → slowing down
+      bashRepetition: 0,    // 3+ identical consecutive Bash commands → stuck in loop
     };
     const writtenFiles = new Set();
     const fileWriteCounts = {};
     const writeEditOps = [];
+    const assistantOutputLengths = [];
+    let toolOutputBytes = 0;
+    const entryTimestamps = [];
+    const bashCommands = [];
     for (const entry of entries) {
+      if (entry.timestamp) entryTimestamps.push(new Date(entry.timestamp).getTime());
       if (entry.type === 'error') {
         signals.errorCount++;
       }
+      // Track assistant output lengths for decay detection
+      if (entry.type === 'thinking' && entry.text) {
+        assistantOutputLengths.push(entry.text.length);
+      }
       if (entry.type === 'tool') {
         signals.toolCalls++;
+        // Track tool result output volume
+        if (entry.output) toolOutputBytes += entry.output.length;
+        // Track Bash commands for repetition detection
+        if (entry.tool === 'Bash' && entry.input) {
+          bashCommands.push(entry.input);
+        }
         if (entry.tool === 'Write' || entry.tool === 'Edit') {
           if (entry.input) {
             writtenFiles.add(entry.input);
@@ -245,6 +282,46 @@ export class AdaptiveThresholds {
       signals.errorTrend = secondHalfErrors - firstHalfErrors;
     }
+    // Output length decay: if last 5 assistant outputs avg <50% of first 5
+    if (assistantOutputLengths.length >= 10) {
+      const first5 = assistantOutputLengths.slice(0, 5);
+      const last5 = assistantOutputLengths.slice(-5);
+      const firstAvg = first5.reduce((a, b) => a + b, 0) / 5;
+      const lastAvg = last5.reduce((a, b) => a + b, 0) / 5;
+      if (firstAvg > 0 && lastAvg < firstAvg * 0.5) signals.outputLengthDecay = 1;
+    }
+    // Tool output volume: cumulative tool result size
+    if (toolOutputBytes > 600_000) signals.toolOutputVolume = 2;
+    else if (toolOutputBytes > 300_000) signals.toolOutputVolume = 1;
+    // Turn latency trend: avg gap in last 10 entries >2x first 10
+    if (entryTimestamps.length >= 20) {
+      const gaps = (ts) => {
+        const g = [];
+        for (let i = 1; i < ts.length; i++) g.push(ts[i] - ts[i - 1]);
+        return g;
+      };
+      const firstGaps = gaps(entryTimestamps.slice(0, 11));
+      const lastGaps = gaps(entryTimestamps.slice(-11));
+      const avgFirst = firstGaps.reduce((a, b) => a + b, 0) / firstGaps.length;
+      const avgLast = lastGaps.reduce((a, b) => a + b, 0) / lastGaps.length;
+      if (avgFirst > 0 && avgLast > avgFirst * 2) signals.turnLatencyTrend = 1;
+    }
+    // Bash repetition: 3+ identical consecutive Bash commands
+    let maxConsecutive = 0;
+    let streak = 1;
+    for (let i = 1; i < bashCommands.length; i++) {
+      if (bashCommands[i] === bashCommands[i - 1]) {
+        streak++;
+        if (streak > maxConsecutive) maxConsecutive = streak;
+      } else {
+        streak = 1;
+      }
+    }
+    if (maxConsecutive >= 3) signals.bashRepetition = 1;
     return signals;
   }

package/packages/daemon/src/api.js CHANGED Viewed

@@ -124,6 +124,38 @@ export function createApi(app, daemon) {
     res.json({ status: 'ok', uptime: process.uptime() });
   });
+  // Debug: test fetch to llama-server from daemon runtime
+  app.get('/api/lab/debug-fetch', async (req, res) => {
+    const target = req.query.url || 'http://localhost:8081/v1/chat/completions';
+    const log = [];
+    try {
+      log.push(`fetch → ${target}`);
+      log.push(`node ${process.version}, electron ${process.versions.electron || 'N/A'}`);
+      const start = Date.now();
+      const r = await fetch(target, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ model: 'Qwen3-0.6B-Q8_0.gguf', messages: [{ role: 'user', content: 'Say ok' }], stream: true, max_tokens: 10 }),
+        signal: AbortSignal.timeout(10000),
+      });
+      log.push(`status=${r.status} in ${Date.now() - start}ms`);
+      const reader = r.body.getReader();
+      let chunks = 0;
+      while (chunks < 5) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        chunks++;
+        log.push(`chunk ${chunks}: ${new TextDecoder().decode(value).slice(0, 120)}`);
+      }
+      reader.cancel();
+      log.push(`total chunks read: ${chunks}`);
+      res.json({ ok: true, log });
+    } catch (err) {
+      log.push(`ERROR: ${err.message}`);
+      res.json({ ok: false, log, error: err.message });
+    }
+  });
   // List all agents
   app.get('/api/agents', (req, res) => {
     res.json(daemon.registry.getAll());
@@ -6703,11 +6735,9 @@ Keep responses concise. Help them think, don't lecture them about the system the
       let closed = false;
       req.on('close', () => { closed = true; });
-      const stream = daemon.modelLab.streamInference(params);
-      for await (const event of stream) {
-        if (closed) break;
-        res.write(`data: ${JSON.stringify(event)}\n\n`);
-      }
+      await daemon.modelLab.streamInference(params, (event) => {
+        if (!closed) res.write(`data: ${JSON.stringify(event)}\n\n`);
+      });
       if (!closed) {
         res.write('data: [DONE]\n\n');

package/packages/daemon/src/journalist.js CHANGED Viewed

@@ -460,7 +460,7 @@ export class Journalist {
       '(What was completed. Name files, functions, and line numbers.)',
       '',
       'Be specific. Name files, functions, and line numbers. Do not summarize vaguely.',
-      'Keep your response under 2000 characters.',
+      'Keep your response under 1500 characters.',
       '',
       '---',
       '',
@@ -469,7 +469,7 @@ export class Journalist {
     ];
     let totalChars = 0;
-    const cap = 30_000;
+    const cap = 15_000;
     for (const entry of entries.slice(-200)) {
       const line = this.formatEntry(entry);
       if (totalChars + line.length > cap) break;
@@ -853,15 +853,15 @@ export class Journalist {
     const agentLog = filteredLogs[agent.id];
     const entries = agentLog?.entries || [];
-    // Layer 7 memory: discoveries, constraints, specializations
-    const discoveries = this.daemon.memory?.getDiscoveriesMarkdown(agent.role, 10, 2000) || '';
+    // Layer 7 memory: discoveries (inline, not pointer — agents lose context with pointers), constraints, specializations
+    const discoveries = this.daemon.memory?.getDiscoveriesMarkdown(agent.role, 10, 1500) || '';
     const constraints = this.daemon.memory?.getConstraintsMarkdown(2000) || '';
     const specialization = this.daemon.memory?.getSpecialization(agent.id);
     const specLine = specialization?.avgQualityScore != null
       ? `- Quality profile: ${specialization.avgQualityScore}/100 across ${specialization.sessionCount} sessions`
       : '';
-    const recentChain = this.daemon.memory?.getRecentHandoffMarkdown(agent.role, 3, 3000, agent.workingDir, agent.teamId) || '';
+    const recentChain = this.daemon.memory?.getRecentHandoffMarkdown(agent.role, 1, 1500, agent.workingDir, agent.teamId) || '';
     const agentFeedback = this.getUserFeedback(agent.id).slice(-5);
     const conversationSummary = agentFeedback.length > 0
@@ -871,7 +871,7 @@ export class Journalist {
     const recentTools = entries
       .filter((e) => e.type === 'tool' || e.type === 'error')
       .slice(-5)
-      .map((e) => `- ${e.type === 'error' ? 'ERROR ' : ''}${e.tool}: ${(e.input || e.text || '').slice(0, 80)}`)
+      .map((e) => `- ${e.type === 'error' ? 'ERROR ' : ''}${e.tool}: ${(e.input || e.text || '').slice(0, 200)}`)
       .join('\n');
     // Try AI-synthesized session summary
@@ -908,7 +908,7 @@ export class Journalist {
       const fallbackRecentTools = entries
         .filter((e) => e.type === 'tool' || e.type === 'error')
         .slice(-5)
-        .map((e) => `- ${e.type === 'error' ? 'ERROR ' : ''}${e.tool}: ${(e.input || '').slice(0, 80)}`)
+        .map((e) => `- ${e.type === 'error' ? 'ERROR ' : ''}${e.tool}: ${(e.input || '').slice(0, 200)}`)
         .join('\n');
       const fallbackParts = [];
@@ -919,7 +919,13 @@ export class Journalist {
       sessionSummary = fallbackParts.join('\n\n');
     }
-    return [
+    // For quality_degradation rotations, drop user messages (already in session summary)
+    const includeUserMessages = options.reason !== 'quality_degradation';
+    // Cap Original Task to 1000 chars — task descriptions for debugging can be long
+    const originalTask = agent.prompt ? agent.prompt.slice(0, 1000) + (agent.prompt.length > 1000 ? '…' : '') : '';
+    let brief = [
       `# Handoff Brief — ${agent.name} (${agent.role})`,
       ``,
       `Role: ${agent.role} | Scope: ${agent.scope?.join(', ') || 'unrestricted'} | Provider: ${agent.provider}`,
@@ -927,17 +933,27 @@ export class Journalist {
       `Rotation: ${options.reason || 'manual'}${options.qualityScore ? ` (quality: ${options.qualityScore}/100)` : ''} | Tokens: ${agent.tokensUsed}`,
       specLine,
       ``,
-      discoveries ? `## Known Issues & Fixes\n\n${discoveries}\n` : '',
+      // Priority order: session summary (contains unresolved errors) first,
+      // then constraints, then discoveries, then tools — so the most critical
+      // debugging context survives even if the brief hits the hard cap.
+      sessionSummary ? `## Session Summary\n\n${sessionSummary}\n` : '',
       constraints ? `## Project Constraints (must follow)\n\n${constraints}\n` : '',
+      discoveries ? `## Known Issues & Fixes\n\n${discoveries}\n` : '',
       recentTools ? `## Last 5 Tool Calls\n\n${recentTools}\n` : '',
-      sessionSummary ? `## Session Summary\n\n${sessionSummary}\n` : '',
-      conversationSummary ? `## Recent User Messages\n\n${conversationSummary}\n` : '',
+      includeUserMessages && conversationSummary ? `## Recent User Messages\n\n${conversationSummary}\n` : '',
       recentChain ? `## Rotation History\n\n${recentChain}\n` : '',
-      agent.prompt ? `## Original Task\n\n${agent.prompt}\n` : '',
+      originalTask ? `## Original Task\n\n${originalTask}\n` : '',
       ``,
       agent.role === 'planner' ? 'CRITICAL: You are a PLANNING ONLY agent. Do NOT implement code. Route all work to your team via .groove/recommended-team.json.\n' : '',
       `Continue seamlessly — finish the work and deliver the output.`,
     ].filter(Boolean).join('\n');
+    // Hard cap: 8000 chars — enough for debugging context without overwhelming the new agent
+    if (brief.length > 8000) {
+      brief = brief.slice(0, 7950) + '\n\n[Brief truncated — see session logs for full context]';
+    }
+    return brief;
   }
   // --- Workspace Grouping ---

package/packages/daemon/src/model-lab.js CHANGED Viewed

@@ -4,6 +4,7 @@
 import { resolve } from 'path';
 import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync, unlinkSync } from 'fs';
 import { randomUUID } from 'crypto';
+import { Readable } from 'stream';
 const RUNTIME_TYPES = ['ollama', 'vllm', 'llama-cpp', 'tgi', 'openai-compatible'];
 const DEFAULT_OLLAMA_ENDPOINT = 'http://localhost:11434';
@@ -208,7 +209,7 @@ export class ModelLab {
   // ─── Inference ──────────────────────────────────────────────
-  async *streamInference({ runtimeId, model, messages, parameters, sessionId }) {
+  async streamInference({ runtimeId, model, messages, parameters, sessionId }, onEvent) {
     const rt = this.runtimes.get(runtimeId);
     if (!rt) throw new Error('Runtime not found');
     if (!model) throw new Error('Model is required');
@@ -216,7 +217,6 @@ export class ModelLab {
       throw new Error('Messages array is required');
     }
-    // Build request body — all runtimes use OpenAI-compatible format
     const body = {
       model,
       messages,
@@ -224,12 +224,9 @@ export class ModelLab {
       ...this._buildParameterBody(parameters || {}),
     };
-    const endpoint = rt.type === 'ollama'
-      ? `${rt.endpoint}/v1/chat/completions`
-      : `${rt.endpoint}/v1/chat/completions`;
-    const headers = { 'Content-Type': 'application/json' };
-    if (rt.apiKey) headers['Authorization'] = `Bearer ${rt.apiKey}`;
+    const endpoint = rt.endpoint.replace('localhost', '127.0.0.1');
+    const reqHeaders = { 'Content-Type': 'application/json' };
+    if (rt.apiKey) reqHeaders['Authorization'] = `Bearer ${rt.apiKey}`;
     const requestStart = Date.now();
     let ttft = null;
@@ -239,91 +236,64 @@ export class ModelLab {
     let generationStart = null;
     let fullContent = '';
-    const resp = await fetch(endpoint, {
+    const resp = await fetch(`${endpoint}/v1/chat/completions`, {
       method: 'POST',
-      headers,
+      headers: reqHeaders,
       body: JSON.stringify(body),
       signal: AbortSignal.timeout(300000),
     });
     if (!resp.ok) {
-      let errorMsg;
-      try { errorMsg = (await resp.json()).error?.message || `HTTP ${resp.status}`; } catch { errorMsg = `HTTP ${resp.status}`; }
-      throw new Error(errorMsg);
+      let errMsg = `HTTP ${resp.status}`;
+      try { const e = await resp.json(); errMsg = e.error?.message || errMsg; } catch { /* ignore */ }
+      throw new Error(errMsg);
     }
-    const reader = resp.body.getReader();
-    const decoder = new TextDecoder();
+    const nodeStream = Readable.fromWeb(resp.body);
     let buffer = '';
-    try {
-      while (true) {
-        const { done, value } = await reader.read();
-        if (done) break;
-        buffer += decoder.decode(value, { stream: true });
-        const lines = buffer.split('\n');
-        buffer = lines.pop() || '';
-        for (const line of lines) {
-          const trimmed = line.trim();
-          if (!trimmed || !trimmed.startsWith('data: ')) continue;
-          const payload = trimmed.slice(6);
-          if (payload === '[DONE]') continue;
-          try {
-            const chunk = JSON.parse(payload);
-            const delta = chunk.choices?.[0]?.delta;
-            if (delta?.reasoning_content) {
-              if (ttft === null) {
-                ttft = Date.now() - requestStart;
-                generationStart = Date.now();
-              }
-              completionTokens++;
-              yield { type: 'reasoning', content: delta.reasoning_content };
-            }
-            if (delta?.content) {
-              if (ttft === null) {
-                ttft = Date.now() - requestStart;
-                generationStart = Date.now();
-              }
-              fullContent += delta.content;
-              completionTokens++;
-              yield { type: 'token', content: delta.content };
-            }
-            // Capture usage from final chunk if provided
-            if (chunk.usage) {
-              promptTokens = chunk.usage.prompt_tokens || 0;
-              totalTokens = chunk.usage.total_tokens || 0;
-              if (chunk.usage.completion_tokens) {
-                completionTokens = chunk.usage.completion_tokens;
-              }
-            }
-          } catch { /* skip malformed chunk */ }
-        }
+    for await (const chunk of nodeStream) {
+      buffer += typeof chunk === 'string' ? chunk : chunk.toString('utf8');
+      const lines = buffer.split('\n');
+      buffer = lines.pop() || '';
+      for (const line of lines) {
+        const trimmed = line.trim();
+        if (!trimmed || !trimmed.startsWith('data: ')) continue;
+        const data = trimmed.slice(6);
+        if (data === '[DONE]') continue;
+        try {
+          const parsed = JSON.parse(data);
+          const delta = parsed.choices?.[0]?.delta;
+          if (delta?.reasoning_content) {
+            if (ttft === null) { ttft = Date.now() - requestStart; generationStart = Date.now(); }
+            completionTokens++;
+            onEvent({ type: 'reasoning', content: delta.reasoning_content });
+          }
+          if (delta?.content) {
+            if (ttft === null) { ttft = Date.now() - requestStart; generationStart = Date.now(); }
+            fullContent += delta.content;
+            completionTokens++;
+            onEvent({ type: 'token', content: delta.content });
+          }
+          if (parsed.usage) {
+            promptTokens = parsed.usage.prompt_tokens || 0;
+            totalTokens = parsed.usage.total_tokens || 0;
+            if (parsed.usage.completion_tokens) completionTokens = parsed.usage.completion_tokens;
+          }
+        } catch { /* skip malformed chunk */ }
       }
-    } finally {
-      reader.releaseLock();
     }
     const generationTime = generationStart ? Date.now() - generationStart : Date.now() - requestStart;
     const tokensPerSec = generationTime > 0 ? (completionTokens / (generationTime / 1000)) : 0;
-    // Ollama memory usage
-    let memoryUsage = null;
-    if (rt.type === 'ollama') {
-      memoryUsage = await this.getOllamaMemoryUsage(rt.endpoint);
-    }
-    // Persist to session if sessionId provided
     if (sessionId) {
-      this._appendToSession(sessionId, messages, {
-        role: 'assistant',
-        content: fullContent,
-      });
+      this._appendToSession(sessionId, messages, { role: 'assistant', content: fullContent });
     }
-    yield {
+    onEvent({
       type: 'done',
       metrics: {
         ttft,
@@ -332,9 +302,16 @@ export class ModelLab {
         promptTokens,
         completionTokens,
         generationTime,
-        memoryUsage,
+        memoryUsage: null,
       },
-    };
+    });
+    if (rt.type === 'ollama') {
+      try {
+        const mem = await this.getOllamaMemoryUsage(rt.endpoint);
+        if (mem) onEvent({ type: 'memory', usage: mem });
+      } catch { /* ignore */ }
+    }
   }
   _buildParameterBody(params) {

package/packages/daemon/src/process.js CHANGED Viewed

@@ -342,6 +342,9 @@ export class ProcessManager {
     this._stalledAgents = new Set(); // agentIds already flagged as stalled (avoids duplicate broadcasts)
     this._exitHandled = new Set();
     this._resultReceived = new Set();
+    this._truncationFlagged = new Set(); // agentIds that have had any truncation in their session
+    this._lastAssistantBlocks = new Map(); // agentId -> last assistant content blocks (for abandoned tool_use detection)
+    this._previousCacheReadTokens = new Map(); // agentId -> previous turn's cacheReadTokens
     this._stallWatchdog = setInterval(() => this._checkStalls(), STALL_CHECK_INTERVAL_MS);
     if (this._stallWatchdog.unref) this._stallWatchdog.unref();
@@ -355,7 +358,8 @@ export class ProcessManager {
       if (!agent || agent.status !== 'running') continue;
       const lastActivity = agent.lastActivity ? new Date(agent.lastActivity).getTime() : now;
       const silentMs = now - lastActivity;
-      if (silentMs < STALL_THRESHOLD_MS) {
+      const effectiveStallMs = this._truncationFlagged.has(agentId) ? 2 * 60_000 : STALL_THRESHOLD_MS;
+      if (silentMs < effectiveStallMs) {
         if (this._stalledAgents.has(agentId)) {
           this._stalledAgents.delete(agentId);
           registry.update(agentId, { stalled: false });
@@ -402,6 +406,9 @@ export class ProcessManager {
         setTimeout(() => this._exitHandled.delete(agentId), 30_000);
         this._stalledAgents.delete(agentId);
         this._resultReceived.delete(agentId);
+        this._truncationFlagged.delete(agentId);
+        this._lastAssistantBlocks.delete(agentId);
+        this._previousCacheReadTokens.delete(agentId);
         const throttle = this._streamThrottle.get(agentId);
         if (throttle?.timer) clearTimeout(throttle.timer);
         this._streamThrottle.delete(agentId);
@@ -435,6 +442,9 @@ export class ProcessManager {
     this.peakContextUsage.delete(agent.id);
     this.pendingMessages.delete(agent.id);
     this._stalledAgents.delete(agent.id);
+    this._truncationFlagged.delete(agent.id);
+    this._lastAssistantBlocks.delete(agent.id);
+    this._previousCacheReadTokens.delete(agent.id);
     if (this.daemon.locks) this.daemon.locks.release(agent.id);
@@ -567,6 +577,16 @@ export class ProcessManager {
     this.handles.delete(agent.id);
     this._stalledAgents.delete(agent.id);
+    this._truncationFlagged.delete(agent.id);
+    this._lastAssistantBlocks.delete(agent.id);
+    this._previousCacheReadTokens.delete(agent.id);
+    const throttle = this._streamThrottle.get(agent.id);
+    if (throttle?.timer) clearTimeout(throttle.timer);
+    this._streamThrottle.delete(agent.id);
+    this.peakContextUsage.delete(agent.id);
+    this.pendingMessages.delete(agent.id);
     if (this.daemon.locks) this.daemon.locks.release(agent.id);
@@ -884,7 +904,19 @@ export class ProcessManager {
     // Handoffs are injected only when the agent has a real task or is a rotation.
     const hasTask = !!(config.prompt && config.prompt.trim().length > 0);
     const isRotation = !!(config.isRotation);
-    const introContext = introducer.generateContext(agent, { taskNegotiation, hasTask, isRotation });
+    let introContext = introducer.generateContext(agent, { taskNegotiation, hasTask, isRotation });
+    // Intro context size warning and optional truncation (Change 7)
+    if (introContext) {
+      const introLen = introContext.length;
+      const maxIntroChars = this.daemon.config?.maxIntroContextChars || 10000;
+      if (introLen > 8000) {
+        console.warn(`[Groove] Intro context for ${agent.name} is ${introLen} chars — consider reducing CLAUDE.md.`);
+      }
+      if (introLen > maxIntroChars) {
+        introContext = introContext.slice(0, maxIntroChars) + '\n\n[Intro context truncated at ' + maxIntroChars + ' chars]';
+      }
+    }
     // Ensure the project map is fresh before the new agent reads CLAUDE.md
     if (this.daemon.journalist) {
@@ -1050,6 +1082,9 @@ For normal file edits within your scope, proceed without review.
         this.handles.delete(agent.id);
         this._stalledAgents.delete(agent.id);
         this._resultReceived.delete(agent.id);
+        this._truncationFlagged.delete(agent.id);
+        this._lastAssistantBlocks.delete(agent.id);
+        this._previousCacheReadTokens.delete(agent.id);
         // Clean up stream throttle so pending timers don't fire for dead agents
         const throttle = this._streamThrottle.get(agent.id);
@@ -1338,6 +1373,60 @@ For normal file edits within your scope, proceed without review.
       updates.stalled = false;
     }
+    // --- Incomplete response / truncation detection (Change 1) ---
+    if (output.type === 'activity' && output.subtype === 'assistant' && Array.isArray(output.data)) {
+      const blocks = output.data;
+      let truncated = false;
+      // Check 1: last text block ends mid-sentence (no terminal punctuation).
+      // Skip short responses (<40 chars) — "OK", "Done", "Sure" are legitimate.
+      const textBlocks = blocks.filter(b => b.type === 'text' && b.text);
+      if (textBlocks.length > 0) {
+        const lastText = textBlocks[textBlocks.length - 1].text.trimEnd();
+        if (lastText.length >= 40 && !/[.?!}\])`'"]$/.test(lastText) && !/```\s*$/.test(lastText)) {
+          truncated = true;
+        }
+      }
+      // Check 2: previous turn had tool_use blocks but this turn is near-empty
+      // (<20 chars of text, no new tool calls). In normal flow the assistant
+      // processes tool results and produces a substantive follow-up; a near-empty
+      // response suggests the tool call was abandoned or its result was lost.
+      const prevBlocks = this._lastAssistantBlocks.get(agentId);
+      if (prevBlocks && prevBlocks.some(b => b.type === 'tool_use')) {
+        const totalCurrentText = textBlocks.reduce((sum, b) => sum + (b.text?.length || 0), 0);
+        if (totalCurrentText < 20 && !blocks.some(b => b.type === 'tool_use')) {
+          truncated = true;
+        }
+      }
+      this._lastAssistantBlocks.set(agentId, blocks);
+      if (truncated) {
+        this._truncationFlagged.add(agentId);
+        const prev = agent.consecutiveTruncations || 0;
+        updates.truncationSuspected = true;
+        updates.consecutiveTruncations = prev + 1;
+        classifier.addEvent(agentId, { type: 'error', subtype: 'truncated_response', timestamp: Date.now() });
+      } else if (agent.truncationSuspected) {
+        updates.truncationSuspected = false;
+        updates.consecutiveTruncations = 0;
+      }
+    }
+    // --- Cache reset detection (Change 5) ---
+    if (output.cacheReadTokens !== undefined) {
+      const prevCache = this._previousCacheReadTokens.get(agentId);
+      if (prevCache !== undefined && prevCache > 50_000) {
+        const drop = prevCache - output.cacheReadTokens;
+        if (drop > prevCache * 0.5) {
+          classifier.addEvent(agentId, { type: 'error', subtype: 'cache_reset', timestamp: Date.now() });
+          updates.cacheResetDetected = true;
+        }
+      }
+      this._previousCacheReadTokens.set(agentId, output.cacheReadTokens);
+    }
     // Token tracking — feed subsystems with full breakdown
     if (output.tokensUsed !== undefined && output.tokensUsed > 0) {
       updates.tokensUsed = agent.tokensUsed + output.tokensUsed;