groove-dev 0.27.134 → 0.27.136

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/moe-training/client/domain-tagger.js +1 -1
  2. package/moe-training/scripts/retag-delegate-yield.js +303 -0
  3. package/moe-training/test/shared/envelope-schema.test.js +3 -3
  4. package/node_modules/@groove-dev/cli/package.json +1 -1
  5. package/node_modules/@groove-dev/daemon/package.json +1 -1
  6. package/node_modules/@groove-dev/daemon/src/adaptive.js +77 -0
  7. package/node_modules/@groove-dev/daemon/src/api.js +35 -5
  8. package/node_modules/@groove-dev/daemon/src/journalist.js +28 -12
  9. package/node_modules/@groove-dev/daemon/src/model-lab.js +53 -76
  10. package/node_modules/@groove-dev/daemon/src/process.js +91 -2
  11. package/node_modules/@groove-dev/daemon/src/rotator.js +45 -3
  12. package/node_modules/@groove-dev/gui/dist/assets/{index-Dozp69tK.js → index-BrZHF7pK.js} +1770 -1766
  13. package/node_modules/@groove-dev/gui/dist/assets/index-DIfiwdKl.css +1 -0
  14. package/node_modules/@groove-dev/gui/dist/index.html +2 -2
  15. package/node_modules/@groove-dev/gui/package.json +1 -1
  16. package/node_modules/@groove-dev/gui/src/components/agents/agent-chat.jsx +60 -18
  17. package/node_modules/@groove-dev/gui/src/components/agents/agent-feed.jsx +42 -20
  18. package/node_modules/@groove-dev/gui/src/components/agents/agent-file-tree.jsx +1 -1
  19. package/node_modules/@groove-dev/gui/src/components/agents/workspace-mode.jsx +1 -1
  20. package/node_modules/@groove-dev/gui/src/components/chat/chat-messages.jsx +2 -22
  21. package/node_modules/@groove-dev/gui/src/components/editor/code-editor.jsx +9 -9
  22. package/node_modules/@groove-dev/gui/src/components/editor/file-tree.jsx +1 -1
  23. package/node_modules/@groove-dev/gui/src/components/editor/terminal.jsx +7 -0
  24. package/node_modules/@groove-dev/gui/src/components/lab/chat-playground.jsx +59 -51
  25. package/node_modules/@groove-dev/gui/src/components/lab/lab-assistant.jsx +48 -48
  26. package/node_modules/@groove-dev/gui/src/components/lab/metrics-panel.jsx +39 -38
  27. package/node_modules/@groove-dev/gui/src/components/lab/parameter-panel.jsx +4 -5
  28. package/node_modules/@groove-dev/gui/src/components/lab/preset-manager.jsx +11 -11
  29. package/node_modules/@groove-dev/gui/src/components/lab/runtime-config.jsx +66 -62
  30. package/node_modules/@groove-dev/gui/src/components/lab/system-prompt-editor.jsx +13 -13
  31. package/node_modules/@groove-dev/gui/src/components/layout/breadcrumb-bar.jsx +1 -1
  32. package/node_modules/@groove-dev/gui/src/components/preview/preview-workspace.jsx +62 -22
  33. package/node_modules/@groove-dev/gui/src/components/ui/slider.jsx +16 -17
  34. package/node_modules/@groove-dev/gui/src/components/ui/table-tree.jsx +38 -0
  35. package/node_modules/@groove-dev/gui/src/stores/groove.js +23 -9
  36. package/node_modules/@groove-dev/gui/src/views/editor.jsx +1 -1
  37. package/node_modules/@groove-dev/gui/src/views/model-lab.jsx +101 -87
  38. package/node_modules/moe-training/client/domain-tagger.js +1 -1
  39. package/node_modules/moe-training/scripts/retag-delegate-yield.js +303 -0
  40. package/node_modules/moe-training/test/shared/envelope-schema.test.js +3 -3
  41. package/package.json +1 -1
  42. package/packages/cli/package.json +1 -1
  43. package/packages/daemon/package.json +1 -1
  44. package/packages/daemon/src/adaptive.js +77 -0
  45. package/packages/daemon/src/api.js +35 -5
  46. package/packages/daemon/src/journalist.js +28 -12
  47. package/packages/daemon/src/model-lab.js +53 -76
  48. package/packages/daemon/src/process.js +91 -2
  49. package/packages/daemon/src/rotator.js +45 -3
  50. package/packages/gui/dist/assets/{index-Dozp69tK.js → index-BrZHF7pK.js} +1770 -1766
  51. package/packages/gui/dist/assets/index-DIfiwdKl.css +1 -0
  52. package/packages/gui/dist/index.html +2 -2
  53. package/packages/gui/package.json +1 -1
  54. package/packages/gui/src/components/agents/agent-chat.jsx +60 -18
  55. package/packages/gui/src/components/agents/agent-feed.jsx +42 -20
  56. package/packages/gui/src/components/agents/agent-file-tree.jsx +1 -1
  57. package/packages/gui/src/components/agents/workspace-mode.jsx +1 -1
  58. package/packages/gui/src/components/chat/chat-messages.jsx +2 -22
  59. package/packages/gui/src/components/editor/code-editor.jsx +9 -9
  60. package/packages/gui/src/components/editor/file-tree.jsx +1 -1
  61. package/packages/gui/src/components/editor/terminal.jsx +7 -0
  62. package/packages/gui/src/components/lab/chat-playground.jsx +59 -51
  63. package/packages/gui/src/components/lab/lab-assistant.jsx +48 -48
  64. package/packages/gui/src/components/lab/metrics-panel.jsx +39 -38
  65. package/packages/gui/src/components/lab/parameter-panel.jsx +4 -5
  66. package/packages/gui/src/components/lab/preset-manager.jsx +11 -11
  67. package/packages/gui/src/components/lab/runtime-config.jsx +66 -62
  68. package/packages/gui/src/components/lab/system-prompt-editor.jsx +13 -13
  69. package/packages/gui/src/components/layout/breadcrumb-bar.jsx +1 -1
  70. package/packages/gui/src/components/preview/preview-workspace.jsx +62 -22
  71. package/packages/gui/src/components/ui/slider.jsx +16 -17
  72. package/packages/gui/src/components/ui/table-tree.jsx +38 -0
  73. package/packages/gui/src/stores/groove.js +23 -9
  74. package/packages/gui/src/views/editor.jsx +1 -1
  75. package/packages/gui/src/views/model-lab.jsx +101 -87
  76. package/plan_files/DELEGATE_YIELD_TRAINING_TAGS.md +135 -0
  77. package/plan_files/session-quality-rotation-fixes.md +218 -0
  78. package/test.py +571 -0
  79. package/node_modules/@groove-dev/gui/dist/assets/index-BgQL4bNl.css +0 -1
  80. package/packages/gui/dist/assets/index-BgQL4bNl.css +0 -1
  81. /package/{AGENT_ORCHESTRATION.md → plan_files/AGENT_ORCHESTRATION.md} +0 -0
  82. /package/{DYNAMIC_LEAF_ARCH.md → plan_files/DYNAMIC_LEAF_ARCH.md} +0 -0
  83. /package/{EMBEDDING_DIAGNOSTIC.md → plan_files/EMBEDDING_DIAGNOSTIC.md} +0 -0
  84. /package/{EMBEDDING_SERVICE_BUILD_PLAN.md → plan_files/EMBEDDING_SERVICE_BUILD_PLAN.md} +0 -0
  85. /package/{MOE_TRAINING_PIPELINE.md → plan_files/MOE_TRAINING_PIPELINE.md} +0 -0
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@groove-dev/cli",
3
- "version": "0.27.134",
3
+ "version": "0.27.136",
4
4
  "description": "GROOVE CLI — manage AI coding agents from your terminal",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "type": "module",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@groove-dev/daemon",
3
- "version": "0.27.134",
3
+ "version": "0.27.136",
4
4
  "description": "GROOVE daemon — agent orchestration engine",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "type": "module",
@@ -137,6 +137,20 @@ export class AdaptiveThresholds {
137
137
  const filesWritten = signals.filesWritten || 0;
138
138
  score += Math.min(filesWritten * 2, 10); // Cap at +10
139
139
 
140
+ // Output length decay: assistant responses shrinking dramatically
141
+ if (signals.outputLengthDecay) score -= 10;
142
+
143
+ // Tool output volume: bloated context from large tool results
144
+ const toolVol = signals.toolOutputVolume || 0;
145
+ if (toolVol === 2) score -= 10;
146
+ else if (toolVol === 1) score -= 5;
147
+
148
+ // Turn latency trend: agent slowing down significantly
149
+ if (signals.turnLatencyTrend) score -= 5;
150
+
151
+ // Bash repetition: agent stuck running identical commands
152
+ if (signals.bashRepetition) score -= 8;
153
+
140
154
  // Clamp to 0-100
141
155
  return Math.max(0, Math.min(100, score));
142
156
  }
@@ -165,20 +179,43 @@ export class AdaptiveThresholds {
165
179
  filesWritten: 0,
166
180
  fileChurn: 0, // same file written 3+ times → possible circular refactoring
167
181
  errorTrend: 0, // errors increasing in recent window → degradation signal
182
+ outputLengthDecay: 0, // last 5 assistant turns avg <50% of first 5 → declining output
183
+ toolOutputVolume: 0, // cumulative tool result chars (>300KB = bloated context)
184
+ turnLatencyTrend: 0, // avg gap in last 10 entries >2x first 10 → slowing down
185
+ bashRepetition: 0, // 3+ identical consecutive Bash commands → stuck in loop
168
186
  };
169
187
 
170
188
  const writtenFiles = new Set();
171
189
  const fileWriteCounts = {};
172
190
  const writeEditOps = [];
191
+ const assistantOutputLengths = [];
192
+ let toolOutputBytes = 0;
193
+ const entryTimestamps = [];
194
+ const bashCommands = [];
173
195
 
174
196
  for (const entry of entries) {
197
+ if (entry.timestamp) entryTimestamps.push(new Date(entry.timestamp).getTime());
198
+
175
199
  if (entry.type === 'error') {
176
200
  signals.errorCount++;
177
201
  }
178
202
 
203
+ // Track assistant output lengths for decay detection
204
+ if (entry.type === 'thinking' && entry.text) {
205
+ assistantOutputLengths.push(entry.text.length);
206
+ }
207
+
179
208
  if (entry.type === 'tool') {
180
209
  signals.toolCalls++;
181
210
 
211
+ // Track tool result output volume
212
+ if (entry.output) toolOutputBytes += entry.output.length;
213
+
214
+ // Track Bash commands for repetition detection
215
+ if (entry.tool === 'Bash' && entry.input) {
216
+ bashCommands.push(entry.input);
217
+ }
218
+
182
219
  if (entry.tool === 'Write' || entry.tool === 'Edit') {
183
220
  if (entry.input) {
184
221
  writtenFiles.add(entry.input);
@@ -245,6 +282,46 @@ export class AdaptiveThresholds {
245
282
  signals.errorTrend = secondHalfErrors - firstHalfErrors;
246
283
  }
247
284
 
285
+ // Output length decay: if last 5 assistant outputs avg <50% of first 5
286
+ if (assistantOutputLengths.length >= 10) {
287
+ const first5 = assistantOutputLengths.slice(0, 5);
288
+ const last5 = assistantOutputLengths.slice(-5);
289
+ const firstAvg = first5.reduce((a, b) => a + b, 0) / 5;
290
+ const lastAvg = last5.reduce((a, b) => a + b, 0) / 5;
291
+ if (firstAvg > 0 && lastAvg < firstAvg * 0.5) signals.outputLengthDecay = 1;
292
+ }
293
+
294
+ // Tool output volume: cumulative tool result size
295
+ if (toolOutputBytes > 600_000) signals.toolOutputVolume = 2;
296
+ else if (toolOutputBytes > 300_000) signals.toolOutputVolume = 1;
297
+
298
+ // Turn latency trend: avg gap in last 10 entries >2x first 10
299
+ if (entryTimestamps.length >= 20) {
300
+ const gaps = (ts) => {
301
+ const g = [];
302
+ for (let i = 1; i < ts.length; i++) g.push(ts[i] - ts[i - 1]);
303
+ return g;
304
+ };
305
+ const firstGaps = gaps(entryTimestamps.slice(0, 11));
306
+ const lastGaps = gaps(entryTimestamps.slice(-11));
307
+ const avgFirst = firstGaps.reduce((a, b) => a + b, 0) / firstGaps.length;
308
+ const avgLast = lastGaps.reduce((a, b) => a + b, 0) / lastGaps.length;
309
+ if (avgFirst > 0 && avgLast > avgFirst * 2) signals.turnLatencyTrend = 1;
310
+ }
311
+
312
+ // Bash repetition: 3+ identical consecutive Bash commands
313
+ let maxConsecutive = 0;
314
+ let streak = 1;
315
+ for (let i = 1; i < bashCommands.length; i++) {
316
+ if (bashCommands[i] === bashCommands[i - 1]) {
317
+ streak++;
318
+ if (streak > maxConsecutive) maxConsecutive = streak;
319
+ } else {
320
+ streak = 1;
321
+ }
322
+ }
323
+ if (maxConsecutive >= 3) signals.bashRepetition = 1;
324
+
248
325
  return signals;
249
326
  }
250
327
 
@@ -124,6 +124,38 @@ export function createApi(app, daemon) {
124
124
  res.json({ status: 'ok', uptime: process.uptime() });
125
125
  });
126
126
 
127
+ // Debug: test fetch to llama-server from daemon runtime
128
+ app.get('/api/lab/debug-fetch', async (req, res) => {
129
+ const target = req.query.url || 'http://localhost:8081/v1/chat/completions';
130
+ const log = [];
131
+ try {
132
+ log.push(`fetch → ${target}`);
133
+ log.push(`node ${process.version}, electron ${process.versions.electron || 'N/A'}`);
134
+ const start = Date.now();
135
+ const r = await fetch(target, {
136
+ method: 'POST',
137
+ headers: { 'Content-Type': 'application/json' },
138
+ body: JSON.stringify({ model: 'Qwen3-0.6B-Q8_0.gguf', messages: [{ role: 'user', content: 'Say ok' }], stream: true, max_tokens: 10 }),
139
+ signal: AbortSignal.timeout(10000),
140
+ });
141
+ log.push(`status=${r.status} in ${Date.now() - start}ms`);
142
+ const reader = r.body.getReader();
143
+ let chunks = 0;
144
+ while (chunks < 5) {
145
+ const { done, value } = await reader.read();
146
+ if (done) break;
147
+ chunks++;
148
+ log.push(`chunk ${chunks}: ${new TextDecoder().decode(value).slice(0, 120)}`);
149
+ }
150
+ reader.cancel();
151
+ log.push(`total chunks read: ${chunks}`);
152
+ res.json({ ok: true, log });
153
+ } catch (err) {
154
+ log.push(`ERROR: ${err.message}`);
155
+ res.json({ ok: false, log, error: err.message });
156
+ }
157
+ });
158
+
127
159
  // List all agents
128
160
  app.get('/api/agents', (req, res) => {
129
161
  res.json(daemon.registry.getAll());
@@ -6703,11 +6735,9 @@ Keep responses concise. Help them think, don't lecture them about the system the
6703
6735
  let closed = false;
6704
6736
  req.on('close', () => { closed = true; });
6705
6737
 
6706
- const stream = daemon.modelLab.streamInference(params);
6707
- for await (const event of stream) {
6708
- if (closed) break;
6709
- res.write(`data: ${JSON.stringify(event)}\n\n`);
6710
- }
6738
+ await daemon.modelLab.streamInference(params, (event) => {
6739
+ if (!closed) res.write(`data: ${JSON.stringify(event)}\n\n`);
6740
+ });
6711
6741
 
6712
6742
  if (!closed) {
6713
6743
  res.write('data: [DONE]\n\n');
@@ -460,7 +460,7 @@ export class Journalist {
460
460
  '(What was completed. Name files, functions, and line numbers.)',
461
461
  '',
462
462
  'Be specific. Name files, functions, and line numbers. Do not summarize vaguely.',
463
- 'Keep your response under 2000 characters.',
463
+ 'Keep your response under 1500 characters.',
464
464
  '',
465
465
  '---',
466
466
  '',
@@ -469,7 +469,7 @@ export class Journalist {
469
469
  ];
470
470
 
471
471
  let totalChars = 0;
472
- const cap = 30_000;
472
+ const cap = 15_000;
473
473
  for (const entry of entries.slice(-200)) {
474
474
  const line = this.formatEntry(entry);
475
475
  if (totalChars + line.length > cap) break;
@@ -853,15 +853,15 @@ export class Journalist {
853
853
  const agentLog = filteredLogs[agent.id];
854
854
  const entries = agentLog?.entries || [];
855
855
 
856
- // Layer 7 memory: discoveries, constraints, specializations
857
- const discoveries = this.daemon.memory?.getDiscoveriesMarkdown(agent.role, 10, 2000) || '';
856
+ // Layer 7 memory: discoveries (inline, not pointer — agents lose context with pointers), constraints, specializations
857
+ const discoveries = this.daemon.memory?.getDiscoveriesMarkdown(agent.role, 10, 1500) || '';
858
858
  const constraints = this.daemon.memory?.getConstraintsMarkdown(2000) || '';
859
859
  const specialization = this.daemon.memory?.getSpecialization(agent.id);
860
860
  const specLine = specialization?.avgQualityScore != null
861
861
  ? `- Quality profile: ${specialization.avgQualityScore}/100 across ${specialization.sessionCount} sessions`
862
862
  : '';
863
863
 
864
- const recentChain = this.daemon.memory?.getRecentHandoffMarkdown(agent.role, 3, 3000, agent.workingDir, agent.teamId) || '';
864
+ const recentChain = this.daemon.memory?.getRecentHandoffMarkdown(agent.role, 1, 1500, agent.workingDir, agent.teamId) || '';
865
865
 
866
866
  const agentFeedback = this.getUserFeedback(agent.id).slice(-5);
867
867
  const conversationSummary = agentFeedback.length > 0
@@ -871,7 +871,7 @@ export class Journalist {
871
871
  const recentTools = entries
872
872
  .filter((e) => e.type === 'tool' || e.type === 'error')
873
873
  .slice(-5)
874
- .map((e) => `- ${e.type === 'error' ? 'ERROR ' : ''}${e.tool}: ${(e.input || e.text || '').slice(0, 80)}`)
874
+ .map((e) => `- ${e.type === 'error' ? 'ERROR ' : ''}${e.tool}: ${(e.input || e.text || '').slice(0, 200)}`)
875
875
  .join('\n');
876
876
 
877
877
  // Try AI-synthesized session summary
@@ -908,7 +908,7 @@ export class Journalist {
908
908
  const fallbackRecentTools = entries
909
909
  .filter((e) => e.type === 'tool' || e.type === 'error')
910
910
  .slice(-5)
911
- .map((e) => `- ${e.type === 'error' ? 'ERROR ' : ''}${e.tool}: ${(e.input || '').slice(0, 80)}`)
911
+ .map((e) => `- ${e.type === 'error' ? 'ERROR ' : ''}${e.tool}: ${(e.input || '').slice(0, 200)}`)
912
912
  .join('\n');
913
913
 
914
914
  const fallbackParts = [];
@@ -919,7 +919,13 @@ export class Journalist {
919
919
  sessionSummary = fallbackParts.join('\n\n');
920
920
  }
921
921
 
922
- return [
922
+ // For quality_degradation rotations, drop user messages (already in session summary)
923
+ const includeUserMessages = options.reason !== 'quality_degradation';
924
+
925
+ // Cap Original Task to 1000 chars — task descriptions for debugging can be long
926
+ const originalTask = agent.prompt ? agent.prompt.slice(0, 1000) + (agent.prompt.length > 1000 ? '…' : '') : '';
927
+
928
+ let brief = [
923
929
  `# Handoff Brief — ${agent.name} (${agent.role})`,
924
930
  ``,
925
931
  `Role: ${agent.role} | Scope: ${agent.scope?.join(', ') || 'unrestricted'} | Provider: ${agent.provider}`,
@@ -927,17 +933,27 @@ export class Journalist {
927
933
  `Rotation: ${options.reason || 'manual'}${options.qualityScore ? ` (quality: ${options.qualityScore}/100)` : ''} | Tokens: ${agent.tokensUsed}`,
928
934
  specLine,
929
935
  ``,
930
- discoveries ? `## Known Issues & Fixes\n\n${discoveries}\n` : '',
936
+ // Priority order: session summary (contains unresolved errors) first,
937
+ // then constraints, then discoveries, then tools — so the most critical
938
+ // debugging context survives even if the brief hits the hard cap.
939
+ sessionSummary ? `## Session Summary\n\n${sessionSummary}\n` : '',
931
940
  constraints ? `## Project Constraints (must follow)\n\n${constraints}\n` : '',
941
+ discoveries ? `## Known Issues & Fixes\n\n${discoveries}\n` : '',
932
942
  recentTools ? `## Last 5 Tool Calls\n\n${recentTools}\n` : '',
933
- sessionSummary ? `## Session Summary\n\n${sessionSummary}\n` : '',
934
- conversationSummary ? `## Recent User Messages\n\n${conversationSummary}\n` : '',
943
+ includeUserMessages && conversationSummary ? `## Recent User Messages\n\n${conversationSummary}\n` : '',
935
944
  recentChain ? `## Rotation History\n\n${recentChain}\n` : '',
936
- agent.prompt ? `## Original Task\n\n${agent.prompt}\n` : '',
945
+ originalTask ? `## Original Task\n\n${originalTask}\n` : '',
937
946
  ``,
938
947
  agent.role === 'planner' ? 'CRITICAL: You are a PLANNING ONLY agent. Do NOT implement code. Route all work to your team via .groove/recommended-team.json.\n' : '',
939
948
  `Continue seamlessly — finish the work and deliver the output.`,
940
949
  ].filter(Boolean).join('\n');
950
+
951
+ // Hard cap: 8000 chars — enough for debugging context without overwhelming the new agent
952
+ if (brief.length > 8000) {
953
+ brief = brief.slice(0, 7950) + '\n\n[Brief truncated — see session logs for full context]';
954
+ }
955
+
956
+ return brief;
941
957
  }
942
958
 
943
959
  // --- Workspace Grouping ---
@@ -4,6 +4,7 @@
4
4
  import { resolve } from 'path';
5
5
  import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync, unlinkSync } from 'fs';
6
6
  import { randomUUID } from 'crypto';
7
+ import { Readable } from 'stream';
7
8
 
8
9
  const RUNTIME_TYPES = ['ollama', 'vllm', 'llama-cpp', 'tgi', 'openai-compatible'];
9
10
  const DEFAULT_OLLAMA_ENDPOINT = 'http://localhost:11434';
@@ -208,7 +209,7 @@ export class ModelLab {
208
209
 
209
210
  // ─── Inference ──────────────────────────────────────────────
210
211
 
211
- async *streamInference({ runtimeId, model, messages, parameters, sessionId }) {
212
+ async streamInference({ runtimeId, model, messages, parameters, sessionId }, onEvent) {
212
213
  const rt = this.runtimes.get(runtimeId);
213
214
  if (!rt) throw new Error('Runtime not found');
214
215
  if (!model) throw new Error('Model is required');
@@ -216,7 +217,6 @@ export class ModelLab {
216
217
  throw new Error('Messages array is required');
217
218
  }
218
219
 
219
- // Build request body — all runtimes use OpenAI-compatible format
220
220
  const body = {
221
221
  model,
222
222
  messages,
@@ -224,12 +224,9 @@ export class ModelLab {
224
224
  ...this._buildParameterBody(parameters || {}),
225
225
  };
226
226
 
227
- const endpoint = rt.type === 'ollama'
228
- ? `${rt.endpoint}/v1/chat/completions`
229
- : `${rt.endpoint}/v1/chat/completions`;
230
-
231
- const headers = { 'Content-Type': 'application/json' };
232
- if (rt.apiKey) headers['Authorization'] = `Bearer ${rt.apiKey}`;
227
+ const endpoint = rt.endpoint.replace('localhost', '127.0.0.1');
228
+ const reqHeaders = { 'Content-Type': 'application/json' };
229
+ if (rt.apiKey) reqHeaders['Authorization'] = `Bearer ${rt.apiKey}`;
233
230
 
234
231
  const requestStart = Date.now();
235
232
  let ttft = null;
@@ -239,91 +236,64 @@ export class ModelLab {
239
236
  let generationStart = null;
240
237
  let fullContent = '';
241
238
 
242
- const resp = await fetch(endpoint, {
239
+ const resp = await fetch(`${endpoint}/v1/chat/completions`, {
243
240
  method: 'POST',
244
- headers,
241
+ headers: reqHeaders,
245
242
  body: JSON.stringify(body),
246
243
  signal: AbortSignal.timeout(300000),
247
244
  });
248
245
 
249
246
  if (!resp.ok) {
250
- let errorMsg;
251
- try { errorMsg = (await resp.json()).error?.message || `HTTP ${resp.status}`; } catch { errorMsg = `HTTP ${resp.status}`; }
252
- throw new Error(errorMsg);
247
+ let errMsg = `HTTP ${resp.status}`;
248
+ try { const e = await resp.json(); errMsg = e.error?.message || errMsg; } catch { /* ignore */ }
249
+ throw new Error(errMsg);
253
250
  }
254
251
 
255
- const reader = resp.body.getReader();
256
- const decoder = new TextDecoder();
252
+ const nodeStream = Readable.fromWeb(resp.body);
257
253
  let buffer = '';
258
254
 
259
- try {
260
- while (true) {
261
- const { done, value } = await reader.read();
262
- if (done) break;
263
-
264
- buffer += decoder.decode(value, { stream: true });
265
- const lines = buffer.split('\n');
266
- buffer = lines.pop() || '';
267
-
268
- for (const line of lines) {
269
- const trimmed = line.trim();
270
- if (!trimmed || !trimmed.startsWith('data: ')) continue;
271
- const payload = trimmed.slice(6);
272
- if (payload === '[DONE]') continue;
273
-
274
- try {
275
- const chunk = JSON.parse(payload);
276
- const delta = chunk.choices?.[0]?.delta;
277
- if (delta?.reasoning_content) {
278
- if (ttft === null) {
279
- ttft = Date.now() - requestStart;
280
- generationStart = Date.now();
281
- }
282
- completionTokens++;
283
- yield { type: 'reasoning', content: delta.reasoning_content };
284
- }
285
- if (delta?.content) {
286
- if (ttft === null) {
287
- ttft = Date.now() - requestStart;
288
- generationStart = Date.now();
289
- }
290
- fullContent += delta.content;
291
- completionTokens++;
292
- yield { type: 'token', content: delta.content };
293
- }
294
- // Capture usage from final chunk if provided
295
- if (chunk.usage) {
296
- promptTokens = chunk.usage.prompt_tokens || 0;
297
- totalTokens = chunk.usage.total_tokens || 0;
298
- if (chunk.usage.completion_tokens) {
299
- completionTokens = chunk.usage.completion_tokens;
300
- }
301
- }
302
- } catch { /* skip malformed chunk */ }
303
- }
255
+ for await (const chunk of nodeStream) {
256
+ buffer += typeof chunk === 'string' ? chunk : chunk.toString('utf8');
257
+ const lines = buffer.split('\n');
258
+ buffer = lines.pop() || '';
259
+
260
+ for (const line of lines) {
261
+ const trimmed = line.trim();
262
+ if (!trimmed || !trimmed.startsWith('data: ')) continue;
263
+ const data = trimmed.slice(6);
264
+ if (data === '[DONE]') continue;
265
+
266
+ try {
267
+ const parsed = JSON.parse(data);
268
+ const delta = parsed.choices?.[0]?.delta;
269
+ if (delta?.reasoning_content) {
270
+ if (ttft === null) { ttft = Date.now() - requestStart; generationStart = Date.now(); }
271
+ completionTokens++;
272
+ onEvent({ type: 'reasoning', content: delta.reasoning_content });
273
+ }
274
+ if (delta?.content) {
275
+ if (ttft === null) { ttft = Date.now() - requestStart; generationStart = Date.now(); }
276
+ fullContent += delta.content;
277
+ completionTokens++;
278
+ onEvent({ type: 'token', content: delta.content });
279
+ }
280
+ if (parsed.usage) {
281
+ promptTokens = parsed.usage.prompt_tokens || 0;
282
+ totalTokens = parsed.usage.total_tokens || 0;
283
+ if (parsed.usage.completion_tokens) completionTokens = parsed.usage.completion_tokens;
284
+ }
285
+ } catch { /* skip malformed chunk */ }
304
286
  }
305
- } finally {
306
- reader.releaseLock();
307
287
  }
308
288
 
309
289
  const generationTime = generationStart ? Date.now() - generationStart : Date.now() - requestStart;
310
290
  const tokensPerSec = generationTime > 0 ? (completionTokens / (generationTime / 1000)) : 0;
311
291
 
312
- // Ollama memory usage
313
- let memoryUsage = null;
314
- if (rt.type === 'ollama') {
315
- memoryUsage = await this.getOllamaMemoryUsage(rt.endpoint);
316
- }
317
-
318
- // Persist to session if sessionId provided
319
292
  if (sessionId) {
320
- this._appendToSession(sessionId, messages, {
321
- role: 'assistant',
322
- content: fullContent,
323
- });
293
+ this._appendToSession(sessionId, messages, { role: 'assistant', content: fullContent });
324
294
  }
325
295
 
326
- yield {
296
+ onEvent({
327
297
  type: 'done',
328
298
  metrics: {
329
299
  ttft,
@@ -332,9 +302,16 @@ export class ModelLab {
332
302
  promptTokens,
333
303
  completionTokens,
334
304
  generationTime,
335
- memoryUsage,
305
+ memoryUsage: null,
336
306
  },
337
- };
307
+ });
308
+
309
+ if (rt.type === 'ollama') {
310
+ try {
311
+ const mem = await this.getOllamaMemoryUsage(rt.endpoint);
312
+ if (mem) onEvent({ type: 'memory', usage: mem });
313
+ } catch { /* ignore */ }
314
+ }
338
315
  }
339
316
 
340
317
  _buildParameterBody(params) {
@@ -342,6 +342,9 @@ export class ProcessManager {
342
342
  this._stalledAgents = new Set(); // agentIds already flagged as stalled (avoids duplicate broadcasts)
343
343
  this._exitHandled = new Set();
344
344
  this._resultReceived = new Set();
345
+ this._truncationFlagged = new Set(); // agentIds that have had any truncation in their session
346
+ this._lastAssistantBlocks = new Map(); // agentId -> last assistant content blocks (for abandoned tool_use detection)
347
+ this._previousCacheReadTokens = new Map(); // agentId -> previous turn's cacheReadTokens
345
348
 
346
349
  this._stallWatchdog = setInterval(() => this._checkStalls(), STALL_CHECK_INTERVAL_MS);
347
350
  if (this._stallWatchdog.unref) this._stallWatchdog.unref();
@@ -355,7 +358,8 @@ export class ProcessManager {
355
358
  if (!agent || agent.status !== 'running') continue;
356
359
  const lastActivity = agent.lastActivity ? new Date(agent.lastActivity).getTime() : now;
357
360
  const silentMs = now - lastActivity;
358
- if (silentMs < STALL_THRESHOLD_MS) {
361
+ const effectiveStallMs = this._truncationFlagged.has(agentId) ? 2 * 60_000 : STALL_THRESHOLD_MS;
362
+ if (silentMs < effectiveStallMs) {
359
363
  if (this._stalledAgents.has(agentId)) {
360
364
  this._stalledAgents.delete(agentId);
361
365
  registry.update(agentId, { stalled: false });
@@ -402,6 +406,9 @@ export class ProcessManager {
402
406
  setTimeout(() => this._exitHandled.delete(agentId), 30_000);
403
407
  this._stalledAgents.delete(agentId);
404
408
  this._resultReceived.delete(agentId);
409
+ this._truncationFlagged.delete(agentId);
410
+ this._lastAssistantBlocks.delete(agentId);
411
+ this._previousCacheReadTokens.delete(agentId);
405
412
  const throttle = this._streamThrottle.get(agentId);
406
413
  if (throttle?.timer) clearTimeout(throttle.timer);
407
414
  this._streamThrottle.delete(agentId);
@@ -435,6 +442,9 @@ export class ProcessManager {
435
442
  this.peakContextUsage.delete(agent.id);
436
443
  this.pendingMessages.delete(agent.id);
437
444
  this._stalledAgents.delete(agent.id);
445
+ this._truncationFlagged.delete(agent.id);
446
+ this._lastAssistantBlocks.delete(agent.id);
447
+ this._previousCacheReadTokens.delete(agent.id);
438
448
 
439
449
  if (this.daemon.locks) this.daemon.locks.release(agent.id);
440
450
 
@@ -567,6 +577,16 @@ export class ProcessManager {
567
577
 
568
578
  this.handles.delete(agent.id);
569
579
  this._stalledAgents.delete(agent.id);
580
+ this._truncationFlagged.delete(agent.id);
581
+ this._lastAssistantBlocks.delete(agent.id);
582
+ this._previousCacheReadTokens.delete(agent.id);
583
+
584
+ const throttle = this._streamThrottle.get(agent.id);
585
+ if (throttle?.timer) clearTimeout(throttle.timer);
586
+ this._streamThrottle.delete(agent.id);
587
+
588
+ this.peakContextUsage.delete(agent.id);
589
+ this.pendingMessages.delete(agent.id);
570
590
 
571
591
  if (this.daemon.locks) this.daemon.locks.release(agent.id);
572
592
 
@@ -884,7 +904,19 @@ export class ProcessManager {
884
904
  // Handoffs are injected only when the agent has a real task or is a rotation.
885
905
  const hasTask = !!(config.prompt && config.prompt.trim().length > 0);
886
906
  const isRotation = !!(config.isRotation);
887
- const introContext = introducer.generateContext(agent, { taskNegotiation, hasTask, isRotation });
907
+ let introContext = introducer.generateContext(agent, { taskNegotiation, hasTask, isRotation });
908
+
909
+ // Intro context size warning and optional truncation (Change 7)
910
+ if (introContext) {
911
+ const introLen = introContext.length;
912
+ const maxIntroChars = this.daemon.config?.maxIntroContextChars || 10000;
913
+ if (introLen > 8000) {
914
+ console.warn(`[Groove] Intro context for ${agent.name} is ${introLen} chars — consider reducing CLAUDE.md.`);
915
+ }
916
+ if (introLen > maxIntroChars) {
917
+ introContext = introContext.slice(0, maxIntroChars) + '\n\n[Intro context truncated at ' + maxIntroChars + ' chars]';
918
+ }
919
+ }
888
920
 
889
921
  // Ensure the project map is fresh before the new agent reads CLAUDE.md
890
922
  if (this.daemon.journalist) {
@@ -1050,6 +1082,9 @@ For normal file edits within your scope, proceed without review.
1050
1082
  this.handles.delete(agent.id);
1051
1083
  this._stalledAgents.delete(agent.id);
1052
1084
  this._resultReceived.delete(agent.id);
1085
+ this._truncationFlagged.delete(agent.id);
1086
+ this._lastAssistantBlocks.delete(agent.id);
1087
+ this._previousCacheReadTokens.delete(agent.id);
1053
1088
 
1054
1089
  // Clean up stream throttle so pending timers don't fire for dead agents
1055
1090
  const throttle = this._streamThrottle.get(agent.id);
@@ -1338,6 +1373,60 @@ For normal file edits within your scope, proceed without review.
1338
1373
  updates.stalled = false;
1339
1374
  }
1340
1375
 
1376
+ // --- Incomplete response / truncation detection (Change 1) ---
1377
+ if (output.type === 'activity' && output.subtype === 'assistant' && Array.isArray(output.data)) {
1378
+ const blocks = output.data;
1379
+ let truncated = false;
1380
+
1381
+ // Check 1: last text block ends mid-sentence (no terminal punctuation).
1382
+ // Skip short responses (<40 chars) — "OK", "Done", "Sure" are legitimate.
1383
+ const textBlocks = blocks.filter(b => b.type === 'text' && b.text);
1384
+ if (textBlocks.length > 0) {
1385
+ const lastText = textBlocks[textBlocks.length - 1].text.trimEnd();
1386
+ if (lastText.length >= 40 && !/[.?!}\])`'"]$/.test(lastText) && !/```\s*$/.test(lastText)) {
1387
+ truncated = true;
1388
+ }
1389
+ }
1390
+
1391
+ // Check 2: previous turn had tool_use blocks but this turn is near-empty
1392
+ // (<20 chars of text, no new tool calls). In normal flow the assistant
1393
+ // processes tool results and produces a substantive follow-up; a near-empty
1394
+ // response suggests the tool call was abandoned or its result was lost.
1395
+ const prevBlocks = this._lastAssistantBlocks.get(agentId);
1396
+ if (prevBlocks && prevBlocks.some(b => b.type === 'tool_use')) {
1397
+ const totalCurrentText = textBlocks.reduce((sum, b) => sum + (b.text?.length || 0), 0);
1398
+ if (totalCurrentText < 20 && !blocks.some(b => b.type === 'tool_use')) {
1399
+ truncated = true;
1400
+ }
1401
+ }
1402
+
1403
+ this._lastAssistantBlocks.set(agentId, blocks);
1404
+
1405
+ if (truncated) {
1406
+ this._truncationFlagged.add(agentId);
1407
+ const prev = agent.consecutiveTruncations || 0;
1408
+ updates.truncationSuspected = true;
1409
+ updates.consecutiveTruncations = prev + 1;
1410
+ classifier.addEvent(agentId, { type: 'error', subtype: 'truncated_response', timestamp: Date.now() });
1411
+ } else if (agent.truncationSuspected) {
1412
+ updates.truncationSuspected = false;
1413
+ updates.consecutiveTruncations = 0;
1414
+ }
1415
+ }
1416
+
1417
+ // --- Cache reset detection (Change 5) ---
1418
+ if (output.cacheReadTokens !== undefined) {
1419
+ const prevCache = this._previousCacheReadTokens.get(agentId);
1420
+ if (prevCache !== undefined && prevCache > 50_000) {
1421
+ const drop = prevCache - output.cacheReadTokens;
1422
+ if (drop > prevCache * 0.5) {
1423
+ classifier.addEvent(agentId, { type: 'error', subtype: 'cache_reset', timestamp: Date.now() });
1424
+ updates.cacheResetDetected = true;
1425
+ }
1426
+ }
1427
+ this._previousCacheReadTokens.set(agentId, output.cacheReadTokens);
1428
+ }
1429
+
1341
1430
  // Token tracking — feed subsystems with full breakdown
1342
1431
  if (output.tokensUsed !== undefined && output.tokensUsed > 0) {
1343
1432
  updates.tokensUsed = agent.tokensUsed + output.tokensUsed;