groove-dev 0.27.134 → 0.27.136
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/moe-training/client/domain-tagger.js +1 -1
- package/moe-training/scripts/retag-delegate-yield.js +303 -0
- package/moe-training/test/shared/envelope-schema.test.js +3 -3
- package/node_modules/@groove-dev/cli/package.json +1 -1
- package/node_modules/@groove-dev/daemon/package.json +1 -1
- package/node_modules/@groove-dev/daemon/src/adaptive.js +77 -0
- package/node_modules/@groove-dev/daemon/src/api.js +35 -5
- package/node_modules/@groove-dev/daemon/src/journalist.js +28 -12
- package/node_modules/@groove-dev/daemon/src/model-lab.js +53 -76
- package/node_modules/@groove-dev/daemon/src/process.js +91 -2
- package/node_modules/@groove-dev/daemon/src/rotator.js +45 -3
- package/node_modules/@groove-dev/gui/dist/assets/{index-Dozp69tK.js → index-BrZHF7pK.js} +1770 -1766
- package/node_modules/@groove-dev/gui/dist/assets/index-DIfiwdKl.css +1 -0
- package/node_modules/@groove-dev/gui/dist/index.html +2 -2
- package/node_modules/@groove-dev/gui/package.json +1 -1
- package/node_modules/@groove-dev/gui/src/components/agents/agent-chat.jsx +60 -18
- package/node_modules/@groove-dev/gui/src/components/agents/agent-feed.jsx +42 -20
- package/node_modules/@groove-dev/gui/src/components/agents/agent-file-tree.jsx +1 -1
- package/node_modules/@groove-dev/gui/src/components/agents/workspace-mode.jsx +1 -1
- package/node_modules/@groove-dev/gui/src/components/chat/chat-messages.jsx +2 -22
- package/node_modules/@groove-dev/gui/src/components/editor/code-editor.jsx +9 -9
- package/node_modules/@groove-dev/gui/src/components/editor/file-tree.jsx +1 -1
- package/node_modules/@groove-dev/gui/src/components/editor/terminal.jsx +7 -0
- package/node_modules/@groove-dev/gui/src/components/lab/chat-playground.jsx +59 -51
- package/node_modules/@groove-dev/gui/src/components/lab/lab-assistant.jsx +48 -48
- package/node_modules/@groove-dev/gui/src/components/lab/metrics-panel.jsx +39 -38
- package/node_modules/@groove-dev/gui/src/components/lab/parameter-panel.jsx +4 -5
- package/node_modules/@groove-dev/gui/src/components/lab/preset-manager.jsx +11 -11
- package/node_modules/@groove-dev/gui/src/components/lab/runtime-config.jsx +66 -62
- package/node_modules/@groove-dev/gui/src/components/lab/system-prompt-editor.jsx +13 -13
- package/node_modules/@groove-dev/gui/src/components/layout/breadcrumb-bar.jsx +1 -1
- package/node_modules/@groove-dev/gui/src/components/preview/preview-workspace.jsx +62 -22
- package/node_modules/@groove-dev/gui/src/components/ui/slider.jsx +16 -17
- package/node_modules/@groove-dev/gui/src/components/ui/table-tree.jsx +38 -0
- package/node_modules/@groove-dev/gui/src/stores/groove.js +23 -9
- package/node_modules/@groove-dev/gui/src/views/editor.jsx +1 -1
- package/node_modules/@groove-dev/gui/src/views/model-lab.jsx +101 -87
- package/node_modules/moe-training/client/domain-tagger.js +1 -1
- package/node_modules/moe-training/scripts/retag-delegate-yield.js +303 -0
- package/node_modules/moe-training/test/shared/envelope-schema.test.js +3 -3
- package/package.json +1 -1
- package/packages/cli/package.json +1 -1
- package/packages/daemon/package.json +1 -1
- package/packages/daemon/src/adaptive.js +77 -0
- package/packages/daemon/src/api.js +35 -5
- package/packages/daemon/src/journalist.js +28 -12
- package/packages/daemon/src/model-lab.js +53 -76
- package/packages/daemon/src/process.js +91 -2
- package/packages/daemon/src/rotator.js +45 -3
- package/packages/gui/dist/assets/{index-Dozp69tK.js → index-BrZHF7pK.js} +1770 -1766
- package/packages/gui/dist/assets/index-DIfiwdKl.css +1 -0
- package/packages/gui/dist/index.html +2 -2
- package/packages/gui/package.json +1 -1
- package/packages/gui/src/components/agents/agent-chat.jsx +60 -18
- package/packages/gui/src/components/agents/agent-feed.jsx +42 -20
- package/packages/gui/src/components/agents/agent-file-tree.jsx +1 -1
- package/packages/gui/src/components/agents/workspace-mode.jsx +1 -1
- package/packages/gui/src/components/chat/chat-messages.jsx +2 -22
- package/packages/gui/src/components/editor/code-editor.jsx +9 -9
- package/packages/gui/src/components/editor/file-tree.jsx +1 -1
- package/packages/gui/src/components/editor/terminal.jsx +7 -0
- package/packages/gui/src/components/lab/chat-playground.jsx +59 -51
- package/packages/gui/src/components/lab/lab-assistant.jsx +48 -48
- package/packages/gui/src/components/lab/metrics-panel.jsx +39 -38
- package/packages/gui/src/components/lab/parameter-panel.jsx +4 -5
- package/packages/gui/src/components/lab/preset-manager.jsx +11 -11
- package/packages/gui/src/components/lab/runtime-config.jsx +66 -62
- package/packages/gui/src/components/lab/system-prompt-editor.jsx +13 -13
- package/packages/gui/src/components/layout/breadcrumb-bar.jsx +1 -1
- package/packages/gui/src/components/preview/preview-workspace.jsx +62 -22
- package/packages/gui/src/components/ui/slider.jsx +16 -17
- package/packages/gui/src/components/ui/table-tree.jsx +38 -0
- package/packages/gui/src/stores/groove.js +23 -9
- package/packages/gui/src/views/editor.jsx +1 -1
- package/packages/gui/src/views/model-lab.jsx +101 -87
- package/plan_files/DELEGATE_YIELD_TRAINING_TAGS.md +135 -0
- package/plan_files/session-quality-rotation-fixes.md +218 -0
- package/test.py +571 -0
- package/node_modules/@groove-dev/gui/dist/assets/index-BgQL4bNl.css +0 -1
- package/packages/gui/dist/assets/index-BgQL4bNl.css +0 -1
- /package/{AGENT_ORCHESTRATION.md → plan_files/AGENT_ORCHESTRATION.md} +0 -0
- /package/{DYNAMIC_LEAF_ARCH.md → plan_files/DYNAMIC_LEAF_ARCH.md} +0 -0
- /package/{EMBEDDING_DIAGNOSTIC.md → plan_files/EMBEDDING_DIAGNOSTIC.md} +0 -0
- /package/{EMBEDDING_SERVICE_BUILD_PLAN.md → plan_files/EMBEDDING_SERVICE_BUILD_PLAN.md} +0 -0
- /package/{MOE_TRAINING_PIPELINE.md → plan_files/MOE_TRAINING_PIPELINE.md} +0 -0
|
@@ -137,6 +137,20 @@ export class AdaptiveThresholds {
|
|
|
137
137
|
const filesWritten = signals.filesWritten || 0;
|
|
138
138
|
score += Math.min(filesWritten * 2, 10); // Cap at +10
|
|
139
139
|
|
|
140
|
+
// Output length decay: assistant responses shrinking dramatically
|
|
141
|
+
if (signals.outputLengthDecay) score -= 10;
|
|
142
|
+
|
|
143
|
+
// Tool output volume: bloated context from large tool results
|
|
144
|
+
const toolVol = signals.toolOutputVolume || 0;
|
|
145
|
+
if (toolVol === 2) score -= 10;
|
|
146
|
+
else if (toolVol === 1) score -= 5;
|
|
147
|
+
|
|
148
|
+
// Turn latency trend: agent slowing down significantly
|
|
149
|
+
if (signals.turnLatencyTrend) score -= 5;
|
|
150
|
+
|
|
151
|
+
// Bash repetition: agent stuck running identical commands
|
|
152
|
+
if (signals.bashRepetition) score -= 8;
|
|
153
|
+
|
|
140
154
|
// Clamp to 0-100
|
|
141
155
|
return Math.max(0, Math.min(100, score));
|
|
142
156
|
}
|
|
@@ -165,20 +179,43 @@ export class AdaptiveThresholds {
|
|
|
165
179
|
filesWritten: 0,
|
|
166
180
|
fileChurn: 0, // same file written 3+ times → possible circular refactoring
|
|
167
181
|
errorTrend: 0, // errors increasing in recent window → degradation signal
|
|
182
|
+
outputLengthDecay: 0, // last 5 assistant turns avg <50% of first 5 → declining output
|
|
183
|
+
toolOutputVolume: 0, // cumulative tool result chars (>300KB = bloated context)
|
|
184
|
+
turnLatencyTrend: 0, // avg gap in last 10 entries >2x first 10 → slowing down
|
|
185
|
+
bashRepetition: 0, // 3+ identical consecutive Bash commands → stuck in loop
|
|
168
186
|
};
|
|
169
187
|
|
|
170
188
|
const writtenFiles = new Set();
|
|
171
189
|
const fileWriteCounts = {};
|
|
172
190
|
const writeEditOps = [];
|
|
191
|
+
const assistantOutputLengths = [];
|
|
192
|
+
let toolOutputBytes = 0;
|
|
193
|
+
const entryTimestamps = [];
|
|
194
|
+
const bashCommands = [];
|
|
173
195
|
|
|
174
196
|
for (const entry of entries) {
|
|
197
|
+
if (entry.timestamp) entryTimestamps.push(new Date(entry.timestamp).getTime());
|
|
198
|
+
|
|
175
199
|
if (entry.type === 'error') {
|
|
176
200
|
signals.errorCount++;
|
|
177
201
|
}
|
|
178
202
|
|
|
203
|
+
// Track assistant output lengths for decay detection
|
|
204
|
+
if (entry.type === 'thinking' && entry.text) {
|
|
205
|
+
assistantOutputLengths.push(entry.text.length);
|
|
206
|
+
}
|
|
207
|
+
|
|
179
208
|
if (entry.type === 'tool') {
|
|
180
209
|
signals.toolCalls++;
|
|
181
210
|
|
|
211
|
+
// Track tool result output volume
|
|
212
|
+
if (entry.output) toolOutputBytes += entry.output.length;
|
|
213
|
+
|
|
214
|
+
// Track Bash commands for repetition detection
|
|
215
|
+
if (entry.tool === 'Bash' && entry.input) {
|
|
216
|
+
bashCommands.push(entry.input);
|
|
217
|
+
}
|
|
218
|
+
|
|
182
219
|
if (entry.tool === 'Write' || entry.tool === 'Edit') {
|
|
183
220
|
if (entry.input) {
|
|
184
221
|
writtenFiles.add(entry.input);
|
|
@@ -245,6 +282,46 @@ export class AdaptiveThresholds {
|
|
|
245
282
|
signals.errorTrend = secondHalfErrors - firstHalfErrors;
|
|
246
283
|
}
|
|
247
284
|
|
|
285
|
+
// Output length decay: if last 5 assistant outputs avg <50% of first 5
|
|
286
|
+
if (assistantOutputLengths.length >= 10) {
|
|
287
|
+
const first5 = assistantOutputLengths.slice(0, 5);
|
|
288
|
+
const last5 = assistantOutputLengths.slice(-5);
|
|
289
|
+
const firstAvg = first5.reduce((a, b) => a + b, 0) / 5;
|
|
290
|
+
const lastAvg = last5.reduce((a, b) => a + b, 0) / 5;
|
|
291
|
+
if (firstAvg > 0 && lastAvg < firstAvg * 0.5) signals.outputLengthDecay = 1;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Tool output volume: cumulative tool result size
|
|
295
|
+
if (toolOutputBytes > 600_000) signals.toolOutputVolume = 2;
|
|
296
|
+
else if (toolOutputBytes > 300_000) signals.toolOutputVolume = 1;
|
|
297
|
+
|
|
298
|
+
// Turn latency trend: avg gap in last 10 entries >2x first 10
|
|
299
|
+
if (entryTimestamps.length >= 20) {
|
|
300
|
+
const gaps = (ts) => {
|
|
301
|
+
const g = [];
|
|
302
|
+
for (let i = 1; i < ts.length; i++) g.push(ts[i] - ts[i - 1]);
|
|
303
|
+
return g;
|
|
304
|
+
};
|
|
305
|
+
const firstGaps = gaps(entryTimestamps.slice(0, 11));
|
|
306
|
+
const lastGaps = gaps(entryTimestamps.slice(-11));
|
|
307
|
+
const avgFirst = firstGaps.reduce((a, b) => a + b, 0) / firstGaps.length;
|
|
308
|
+
const avgLast = lastGaps.reduce((a, b) => a + b, 0) / lastGaps.length;
|
|
309
|
+
if (avgFirst > 0 && avgLast > avgFirst * 2) signals.turnLatencyTrend = 1;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// Bash repetition: 3+ identical consecutive Bash commands
|
|
313
|
+
let maxConsecutive = 0;
|
|
314
|
+
let streak = 1;
|
|
315
|
+
for (let i = 1; i < bashCommands.length; i++) {
|
|
316
|
+
if (bashCommands[i] === bashCommands[i - 1]) {
|
|
317
|
+
streak++;
|
|
318
|
+
if (streak > maxConsecutive) maxConsecutive = streak;
|
|
319
|
+
} else {
|
|
320
|
+
streak = 1;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
if (maxConsecutive >= 3) signals.bashRepetition = 1;
|
|
324
|
+
|
|
248
325
|
return signals;
|
|
249
326
|
}
|
|
250
327
|
|
|
@@ -124,6 +124,38 @@ export function createApi(app, daemon) {
|
|
|
124
124
|
res.json({ status: 'ok', uptime: process.uptime() });
|
|
125
125
|
});
|
|
126
126
|
|
|
127
|
+
// Debug: test fetch to llama-server from daemon runtime
|
|
128
|
+
app.get('/api/lab/debug-fetch', async (req, res) => {
|
|
129
|
+
const target = req.query.url || 'http://localhost:8081/v1/chat/completions';
|
|
130
|
+
const log = [];
|
|
131
|
+
try {
|
|
132
|
+
log.push(`fetch → ${target}`);
|
|
133
|
+
log.push(`node ${process.version}, electron ${process.versions.electron || 'N/A'}`);
|
|
134
|
+
const start = Date.now();
|
|
135
|
+
const r = await fetch(target, {
|
|
136
|
+
method: 'POST',
|
|
137
|
+
headers: { 'Content-Type': 'application/json' },
|
|
138
|
+
body: JSON.stringify({ model: 'Qwen3-0.6B-Q8_0.gguf', messages: [{ role: 'user', content: 'Say ok' }], stream: true, max_tokens: 10 }),
|
|
139
|
+
signal: AbortSignal.timeout(10000),
|
|
140
|
+
});
|
|
141
|
+
log.push(`status=${r.status} in ${Date.now() - start}ms`);
|
|
142
|
+
const reader = r.body.getReader();
|
|
143
|
+
let chunks = 0;
|
|
144
|
+
while (chunks < 5) {
|
|
145
|
+
const { done, value } = await reader.read();
|
|
146
|
+
if (done) break;
|
|
147
|
+
chunks++;
|
|
148
|
+
log.push(`chunk ${chunks}: ${new TextDecoder().decode(value).slice(0, 120)}`);
|
|
149
|
+
}
|
|
150
|
+
reader.cancel();
|
|
151
|
+
log.push(`total chunks read: ${chunks}`);
|
|
152
|
+
res.json({ ok: true, log });
|
|
153
|
+
} catch (err) {
|
|
154
|
+
log.push(`ERROR: ${err.message}`);
|
|
155
|
+
res.json({ ok: false, log, error: err.message });
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
|
|
127
159
|
// List all agents
|
|
128
160
|
app.get('/api/agents', (req, res) => {
|
|
129
161
|
res.json(daemon.registry.getAll());
|
|
@@ -6703,11 +6735,9 @@ Keep responses concise. Help them think, don't lecture them about the system the
|
|
|
6703
6735
|
let closed = false;
|
|
6704
6736
|
req.on('close', () => { closed = true; });
|
|
6705
6737
|
|
|
6706
|
-
|
|
6707
|
-
|
|
6708
|
-
|
|
6709
|
-
res.write(`data: ${JSON.stringify(event)}\n\n`);
|
|
6710
|
-
}
|
|
6738
|
+
await daemon.modelLab.streamInference(params, (event) => {
|
|
6739
|
+
if (!closed) res.write(`data: ${JSON.stringify(event)}\n\n`);
|
|
6740
|
+
});
|
|
6711
6741
|
|
|
6712
6742
|
if (!closed) {
|
|
6713
6743
|
res.write('data: [DONE]\n\n');
|
|
@@ -460,7 +460,7 @@ export class Journalist {
|
|
|
460
460
|
'(What was completed. Name files, functions, and line numbers.)',
|
|
461
461
|
'',
|
|
462
462
|
'Be specific. Name files, functions, and line numbers. Do not summarize vaguely.',
|
|
463
|
-
'Keep your response under
|
|
463
|
+
'Keep your response under 1500 characters.',
|
|
464
464
|
'',
|
|
465
465
|
'---',
|
|
466
466
|
'',
|
|
@@ -469,7 +469,7 @@ export class Journalist {
|
|
|
469
469
|
];
|
|
470
470
|
|
|
471
471
|
let totalChars = 0;
|
|
472
|
-
const cap =
|
|
472
|
+
const cap = 15_000;
|
|
473
473
|
for (const entry of entries.slice(-200)) {
|
|
474
474
|
const line = this.formatEntry(entry);
|
|
475
475
|
if (totalChars + line.length > cap) break;
|
|
@@ -853,15 +853,15 @@ export class Journalist {
|
|
|
853
853
|
const agentLog = filteredLogs[agent.id];
|
|
854
854
|
const entries = agentLog?.entries || [];
|
|
855
855
|
|
|
856
|
-
// Layer 7 memory: discoveries, constraints, specializations
|
|
857
|
-
const discoveries = this.daemon.memory?.getDiscoveriesMarkdown(agent.role, 10,
|
|
856
|
+
// Layer 7 memory: discoveries (inline, not pointer — agents lose context with pointers), constraints, specializations
|
|
857
|
+
const discoveries = this.daemon.memory?.getDiscoveriesMarkdown(agent.role, 10, 1500) || '';
|
|
858
858
|
const constraints = this.daemon.memory?.getConstraintsMarkdown(2000) || '';
|
|
859
859
|
const specialization = this.daemon.memory?.getSpecialization(agent.id);
|
|
860
860
|
const specLine = specialization?.avgQualityScore != null
|
|
861
861
|
? `- Quality profile: ${specialization.avgQualityScore}/100 across ${specialization.sessionCount} sessions`
|
|
862
862
|
: '';
|
|
863
863
|
|
|
864
|
-
const recentChain = this.daemon.memory?.getRecentHandoffMarkdown(agent.role,
|
|
864
|
+
const recentChain = this.daemon.memory?.getRecentHandoffMarkdown(agent.role, 1, 1500, agent.workingDir, agent.teamId) || '';
|
|
865
865
|
|
|
866
866
|
const agentFeedback = this.getUserFeedback(agent.id).slice(-5);
|
|
867
867
|
const conversationSummary = agentFeedback.length > 0
|
|
@@ -871,7 +871,7 @@ export class Journalist {
|
|
|
871
871
|
const recentTools = entries
|
|
872
872
|
.filter((e) => e.type === 'tool' || e.type === 'error')
|
|
873
873
|
.slice(-5)
|
|
874
|
-
.map((e) => `- ${e.type === 'error' ? 'ERROR ' : ''}${e.tool}: ${(e.input || e.text || '').slice(0,
|
|
874
|
+
.map((e) => `- ${e.type === 'error' ? 'ERROR ' : ''}${e.tool}: ${(e.input || e.text || '').slice(0, 200)}`)
|
|
875
875
|
.join('\n');
|
|
876
876
|
|
|
877
877
|
// Try AI-synthesized session summary
|
|
@@ -908,7 +908,7 @@ export class Journalist {
|
|
|
908
908
|
const fallbackRecentTools = entries
|
|
909
909
|
.filter((e) => e.type === 'tool' || e.type === 'error')
|
|
910
910
|
.slice(-5)
|
|
911
|
-
.map((e) => `- ${e.type === 'error' ? 'ERROR ' : ''}${e.tool}: ${(e.input || '').slice(0,
|
|
911
|
+
.map((e) => `- ${e.type === 'error' ? 'ERROR ' : ''}${e.tool}: ${(e.input || '').slice(0, 200)}`)
|
|
912
912
|
.join('\n');
|
|
913
913
|
|
|
914
914
|
const fallbackParts = [];
|
|
@@ -919,7 +919,13 @@ export class Journalist {
|
|
|
919
919
|
sessionSummary = fallbackParts.join('\n\n');
|
|
920
920
|
}
|
|
921
921
|
|
|
922
|
-
|
|
922
|
+
// For quality_degradation rotations, drop user messages (already in session summary)
|
|
923
|
+
const includeUserMessages = options.reason !== 'quality_degradation';
|
|
924
|
+
|
|
925
|
+
// Cap Original Task to 1000 chars — task descriptions for debugging can be long
|
|
926
|
+
const originalTask = agent.prompt ? agent.prompt.slice(0, 1000) + (agent.prompt.length > 1000 ? '…' : '') : '';
|
|
927
|
+
|
|
928
|
+
let brief = [
|
|
923
929
|
`# Handoff Brief — ${agent.name} (${agent.role})`,
|
|
924
930
|
``,
|
|
925
931
|
`Role: ${agent.role} | Scope: ${agent.scope?.join(', ') || 'unrestricted'} | Provider: ${agent.provider}`,
|
|
@@ -927,17 +933,27 @@ export class Journalist {
|
|
|
927
933
|
`Rotation: ${options.reason || 'manual'}${options.qualityScore ? ` (quality: ${options.qualityScore}/100)` : ''} | Tokens: ${agent.tokensUsed}`,
|
|
928
934
|
specLine,
|
|
929
935
|
``,
|
|
930
|
-
|
|
936
|
+
// Priority order: session summary (contains unresolved errors) first,
|
|
937
|
+
// then constraints, then discoveries, then tools — so the most critical
|
|
938
|
+
// debugging context survives even if the brief hits the hard cap.
|
|
939
|
+
sessionSummary ? `## Session Summary\n\n${sessionSummary}\n` : '',
|
|
931
940
|
constraints ? `## Project Constraints (must follow)\n\n${constraints}\n` : '',
|
|
941
|
+
discoveries ? `## Known Issues & Fixes\n\n${discoveries}\n` : '',
|
|
932
942
|
recentTools ? `## Last 5 Tool Calls\n\n${recentTools}\n` : '',
|
|
933
|
-
|
|
934
|
-
conversationSummary ? `## Recent User Messages\n\n${conversationSummary}\n` : '',
|
|
943
|
+
includeUserMessages && conversationSummary ? `## Recent User Messages\n\n${conversationSummary}\n` : '',
|
|
935
944
|
recentChain ? `## Rotation History\n\n${recentChain}\n` : '',
|
|
936
|
-
|
|
945
|
+
originalTask ? `## Original Task\n\n${originalTask}\n` : '',
|
|
937
946
|
``,
|
|
938
947
|
agent.role === 'planner' ? 'CRITICAL: You are a PLANNING ONLY agent. Do NOT implement code. Route all work to your team via .groove/recommended-team.json.\n' : '',
|
|
939
948
|
`Continue seamlessly — finish the work and deliver the output.`,
|
|
940
949
|
].filter(Boolean).join('\n');
|
|
950
|
+
|
|
951
|
+
// Hard cap: 8000 chars — enough for debugging context without overwhelming the new agent
|
|
952
|
+
if (brief.length > 8000) {
|
|
953
|
+
brief = brief.slice(0, 7950) + '\n\n[Brief truncated — see session logs for full context]';
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
return brief;
|
|
941
957
|
}
|
|
942
958
|
|
|
943
959
|
// --- Workspace Grouping ---
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
import { resolve } from 'path';
|
|
5
5
|
import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync, unlinkSync } from 'fs';
|
|
6
6
|
import { randomUUID } from 'crypto';
|
|
7
|
+
import { Readable } from 'stream';
|
|
7
8
|
|
|
8
9
|
const RUNTIME_TYPES = ['ollama', 'vllm', 'llama-cpp', 'tgi', 'openai-compatible'];
|
|
9
10
|
const DEFAULT_OLLAMA_ENDPOINT = 'http://localhost:11434';
|
|
@@ -208,7 +209,7 @@ export class ModelLab {
|
|
|
208
209
|
|
|
209
210
|
// ─── Inference ──────────────────────────────────────────────
|
|
210
211
|
|
|
211
|
-
async
|
|
212
|
+
async streamInference({ runtimeId, model, messages, parameters, sessionId }, onEvent) {
|
|
212
213
|
const rt = this.runtimes.get(runtimeId);
|
|
213
214
|
if (!rt) throw new Error('Runtime not found');
|
|
214
215
|
if (!model) throw new Error('Model is required');
|
|
@@ -216,7 +217,6 @@ export class ModelLab {
|
|
|
216
217
|
throw new Error('Messages array is required');
|
|
217
218
|
}
|
|
218
219
|
|
|
219
|
-
// Build request body — all runtimes use OpenAI-compatible format
|
|
220
220
|
const body = {
|
|
221
221
|
model,
|
|
222
222
|
messages,
|
|
@@ -224,12 +224,9 @@ export class ModelLab {
|
|
|
224
224
|
...this._buildParameterBody(parameters || {}),
|
|
225
225
|
};
|
|
226
226
|
|
|
227
|
-
const endpoint = rt.
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
const headers = { 'Content-Type': 'application/json' };
|
|
232
|
-
if (rt.apiKey) headers['Authorization'] = `Bearer ${rt.apiKey}`;
|
|
227
|
+
const endpoint = rt.endpoint.replace('localhost', '127.0.0.1');
|
|
228
|
+
const reqHeaders = { 'Content-Type': 'application/json' };
|
|
229
|
+
if (rt.apiKey) reqHeaders['Authorization'] = `Bearer ${rt.apiKey}`;
|
|
233
230
|
|
|
234
231
|
const requestStart = Date.now();
|
|
235
232
|
let ttft = null;
|
|
@@ -239,91 +236,64 @@ export class ModelLab {
|
|
|
239
236
|
let generationStart = null;
|
|
240
237
|
let fullContent = '';
|
|
241
238
|
|
|
242
|
-
const resp = await fetch(endpoint
|
|
239
|
+
const resp = await fetch(`${endpoint}/v1/chat/completions`, {
|
|
243
240
|
method: 'POST',
|
|
244
|
-
headers,
|
|
241
|
+
headers: reqHeaders,
|
|
245
242
|
body: JSON.stringify(body),
|
|
246
243
|
signal: AbortSignal.timeout(300000),
|
|
247
244
|
});
|
|
248
245
|
|
|
249
246
|
if (!resp.ok) {
|
|
250
|
-
let
|
|
251
|
-
try {
|
|
252
|
-
throw new Error(
|
|
247
|
+
let errMsg = `HTTP ${resp.status}`;
|
|
248
|
+
try { const e = await resp.json(); errMsg = e.error?.message || errMsg; } catch { /* ignore */ }
|
|
249
|
+
throw new Error(errMsg);
|
|
253
250
|
}
|
|
254
251
|
|
|
255
|
-
const
|
|
256
|
-
const decoder = new TextDecoder();
|
|
252
|
+
const nodeStream = Readable.fromWeb(resp.body);
|
|
257
253
|
let buffer = '';
|
|
258
254
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
const
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
const
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
fullContent += delta.content;
|
|
291
|
-
completionTokens++;
|
|
292
|
-
yield { type: 'token', content: delta.content };
|
|
293
|
-
}
|
|
294
|
-
// Capture usage from final chunk if provided
|
|
295
|
-
if (chunk.usage) {
|
|
296
|
-
promptTokens = chunk.usage.prompt_tokens || 0;
|
|
297
|
-
totalTokens = chunk.usage.total_tokens || 0;
|
|
298
|
-
if (chunk.usage.completion_tokens) {
|
|
299
|
-
completionTokens = chunk.usage.completion_tokens;
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
} catch { /* skip malformed chunk */ }
|
|
303
|
-
}
|
|
255
|
+
for await (const chunk of nodeStream) {
|
|
256
|
+
buffer += typeof chunk === 'string' ? chunk : chunk.toString('utf8');
|
|
257
|
+
const lines = buffer.split('\n');
|
|
258
|
+
buffer = lines.pop() || '';
|
|
259
|
+
|
|
260
|
+
for (const line of lines) {
|
|
261
|
+
const trimmed = line.trim();
|
|
262
|
+
if (!trimmed || !trimmed.startsWith('data: ')) continue;
|
|
263
|
+
const data = trimmed.slice(6);
|
|
264
|
+
if (data === '[DONE]') continue;
|
|
265
|
+
|
|
266
|
+
try {
|
|
267
|
+
const parsed = JSON.parse(data);
|
|
268
|
+
const delta = parsed.choices?.[0]?.delta;
|
|
269
|
+
if (delta?.reasoning_content) {
|
|
270
|
+
if (ttft === null) { ttft = Date.now() - requestStart; generationStart = Date.now(); }
|
|
271
|
+
completionTokens++;
|
|
272
|
+
onEvent({ type: 'reasoning', content: delta.reasoning_content });
|
|
273
|
+
}
|
|
274
|
+
if (delta?.content) {
|
|
275
|
+
if (ttft === null) { ttft = Date.now() - requestStart; generationStart = Date.now(); }
|
|
276
|
+
fullContent += delta.content;
|
|
277
|
+
completionTokens++;
|
|
278
|
+
onEvent({ type: 'token', content: delta.content });
|
|
279
|
+
}
|
|
280
|
+
if (parsed.usage) {
|
|
281
|
+
promptTokens = parsed.usage.prompt_tokens || 0;
|
|
282
|
+
totalTokens = parsed.usage.total_tokens || 0;
|
|
283
|
+
if (parsed.usage.completion_tokens) completionTokens = parsed.usage.completion_tokens;
|
|
284
|
+
}
|
|
285
|
+
} catch { /* skip malformed chunk */ }
|
|
304
286
|
}
|
|
305
|
-
} finally {
|
|
306
|
-
reader.releaseLock();
|
|
307
287
|
}
|
|
308
288
|
|
|
309
289
|
const generationTime = generationStart ? Date.now() - generationStart : Date.now() - requestStart;
|
|
310
290
|
const tokensPerSec = generationTime > 0 ? (completionTokens / (generationTime / 1000)) : 0;
|
|
311
291
|
|
|
312
|
-
// Ollama memory usage
|
|
313
|
-
let memoryUsage = null;
|
|
314
|
-
if (rt.type === 'ollama') {
|
|
315
|
-
memoryUsage = await this.getOllamaMemoryUsage(rt.endpoint);
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
// Persist to session if sessionId provided
|
|
319
292
|
if (sessionId) {
|
|
320
|
-
this._appendToSession(sessionId, messages, {
|
|
321
|
-
role: 'assistant',
|
|
322
|
-
content: fullContent,
|
|
323
|
-
});
|
|
293
|
+
this._appendToSession(sessionId, messages, { role: 'assistant', content: fullContent });
|
|
324
294
|
}
|
|
325
295
|
|
|
326
|
-
|
|
296
|
+
onEvent({
|
|
327
297
|
type: 'done',
|
|
328
298
|
metrics: {
|
|
329
299
|
ttft,
|
|
@@ -332,9 +302,16 @@ export class ModelLab {
|
|
|
332
302
|
promptTokens,
|
|
333
303
|
completionTokens,
|
|
334
304
|
generationTime,
|
|
335
|
-
memoryUsage,
|
|
305
|
+
memoryUsage: null,
|
|
336
306
|
},
|
|
337
|
-
};
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
if (rt.type === 'ollama') {
|
|
310
|
+
try {
|
|
311
|
+
const mem = await this.getOllamaMemoryUsage(rt.endpoint);
|
|
312
|
+
if (mem) onEvent({ type: 'memory', usage: mem });
|
|
313
|
+
} catch { /* ignore */ }
|
|
314
|
+
}
|
|
338
315
|
}
|
|
339
316
|
|
|
340
317
|
_buildParameterBody(params) {
|
|
@@ -342,6 +342,9 @@ export class ProcessManager {
|
|
|
342
342
|
this._stalledAgents = new Set(); // agentIds already flagged as stalled (avoids duplicate broadcasts)
|
|
343
343
|
this._exitHandled = new Set();
|
|
344
344
|
this._resultReceived = new Set();
|
|
345
|
+
this._truncationFlagged = new Set(); // agentIds that have had any truncation in their session
|
|
346
|
+
this._lastAssistantBlocks = new Map(); // agentId -> last assistant content blocks (for abandoned tool_use detection)
|
|
347
|
+
this._previousCacheReadTokens = new Map(); // agentId -> previous turn's cacheReadTokens
|
|
345
348
|
|
|
346
349
|
this._stallWatchdog = setInterval(() => this._checkStalls(), STALL_CHECK_INTERVAL_MS);
|
|
347
350
|
if (this._stallWatchdog.unref) this._stallWatchdog.unref();
|
|
@@ -355,7 +358,8 @@ export class ProcessManager {
|
|
|
355
358
|
if (!agent || agent.status !== 'running') continue;
|
|
356
359
|
const lastActivity = agent.lastActivity ? new Date(agent.lastActivity).getTime() : now;
|
|
357
360
|
const silentMs = now - lastActivity;
|
|
358
|
-
|
|
361
|
+
const effectiveStallMs = this._truncationFlagged.has(agentId) ? 2 * 60_000 : STALL_THRESHOLD_MS;
|
|
362
|
+
if (silentMs < effectiveStallMs) {
|
|
359
363
|
if (this._stalledAgents.has(agentId)) {
|
|
360
364
|
this._stalledAgents.delete(agentId);
|
|
361
365
|
registry.update(agentId, { stalled: false });
|
|
@@ -402,6 +406,9 @@ export class ProcessManager {
|
|
|
402
406
|
setTimeout(() => this._exitHandled.delete(agentId), 30_000);
|
|
403
407
|
this._stalledAgents.delete(agentId);
|
|
404
408
|
this._resultReceived.delete(agentId);
|
|
409
|
+
this._truncationFlagged.delete(agentId);
|
|
410
|
+
this._lastAssistantBlocks.delete(agentId);
|
|
411
|
+
this._previousCacheReadTokens.delete(agentId);
|
|
405
412
|
const throttle = this._streamThrottle.get(agentId);
|
|
406
413
|
if (throttle?.timer) clearTimeout(throttle.timer);
|
|
407
414
|
this._streamThrottle.delete(agentId);
|
|
@@ -435,6 +442,9 @@ export class ProcessManager {
|
|
|
435
442
|
this.peakContextUsage.delete(agent.id);
|
|
436
443
|
this.pendingMessages.delete(agent.id);
|
|
437
444
|
this._stalledAgents.delete(agent.id);
|
|
445
|
+
this._truncationFlagged.delete(agent.id);
|
|
446
|
+
this._lastAssistantBlocks.delete(agent.id);
|
|
447
|
+
this._previousCacheReadTokens.delete(agent.id);
|
|
438
448
|
|
|
439
449
|
if (this.daemon.locks) this.daemon.locks.release(agent.id);
|
|
440
450
|
|
|
@@ -567,6 +577,16 @@ export class ProcessManager {
|
|
|
567
577
|
|
|
568
578
|
this.handles.delete(agent.id);
|
|
569
579
|
this._stalledAgents.delete(agent.id);
|
|
580
|
+
this._truncationFlagged.delete(agent.id);
|
|
581
|
+
this._lastAssistantBlocks.delete(agent.id);
|
|
582
|
+
this._previousCacheReadTokens.delete(agent.id);
|
|
583
|
+
|
|
584
|
+
const throttle = this._streamThrottle.get(agent.id);
|
|
585
|
+
if (throttle?.timer) clearTimeout(throttle.timer);
|
|
586
|
+
this._streamThrottle.delete(agent.id);
|
|
587
|
+
|
|
588
|
+
this.peakContextUsage.delete(agent.id);
|
|
589
|
+
this.pendingMessages.delete(agent.id);
|
|
570
590
|
|
|
571
591
|
if (this.daemon.locks) this.daemon.locks.release(agent.id);
|
|
572
592
|
|
|
@@ -884,7 +904,19 @@ export class ProcessManager {
|
|
|
884
904
|
// Handoffs are injected only when the agent has a real task or is a rotation.
|
|
885
905
|
const hasTask = !!(config.prompt && config.prompt.trim().length > 0);
|
|
886
906
|
const isRotation = !!(config.isRotation);
|
|
887
|
-
|
|
907
|
+
let introContext = introducer.generateContext(agent, { taskNegotiation, hasTask, isRotation });
|
|
908
|
+
|
|
909
|
+
// Intro context size warning and optional truncation (Change 7)
|
|
910
|
+
if (introContext) {
|
|
911
|
+
const introLen = introContext.length;
|
|
912
|
+
const maxIntroChars = this.daemon.config?.maxIntroContextChars || 10000;
|
|
913
|
+
if (introLen > 8000) {
|
|
914
|
+
console.warn(`[Groove] Intro context for ${agent.name} is ${introLen} chars — consider reducing CLAUDE.md.`);
|
|
915
|
+
}
|
|
916
|
+
if (introLen > maxIntroChars) {
|
|
917
|
+
introContext = introContext.slice(0, maxIntroChars) + '\n\n[Intro context truncated at ' + maxIntroChars + ' chars]';
|
|
918
|
+
}
|
|
919
|
+
}
|
|
888
920
|
|
|
889
921
|
// Ensure the project map is fresh before the new agent reads CLAUDE.md
|
|
890
922
|
if (this.daemon.journalist) {
|
|
@@ -1050,6 +1082,9 @@ For normal file edits within your scope, proceed without review.
|
|
|
1050
1082
|
this.handles.delete(agent.id);
|
|
1051
1083
|
this._stalledAgents.delete(agent.id);
|
|
1052
1084
|
this._resultReceived.delete(agent.id);
|
|
1085
|
+
this._truncationFlagged.delete(agent.id);
|
|
1086
|
+
this._lastAssistantBlocks.delete(agent.id);
|
|
1087
|
+
this._previousCacheReadTokens.delete(agent.id);
|
|
1053
1088
|
|
|
1054
1089
|
// Clean up stream throttle so pending timers don't fire for dead agents
|
|
1055
1090
|
const throttle = this._streamThrottle.get(agent.id);
|
|
@@ -1338,6 +1373,60 @@ For normal file edits within your scope, proceed without review.
|
|
|
1338
1373
|
updates.stalled = false;
|
|
1339
1374
|
}
|
|
1340
1375
|
|
|
1376
|
+
// --- Incomplete response / truncation detection (Change 1) ---
|
|
1377
|
+
if (output.type === 'activity' && output.subtype === 'assistant' && Array.isArray(output.data)) {
|
|
1378
|
+
const blocks = output.data;
|
|
1379
|
+
let truncated = false;
|
|
1380
|
+
|
|
1381
|
+
// Check 1: last text block ends mid-sentence (no terminal punctuation).
|
|
1382
|
+
// Skip short responses (<40 chars) — "OK", "Done", "Sure" are legitimate.
|
|
1383
|
+
const textBlocks = blocks.filter(b => b.type === 'text' && b.text);
|
|
1384
|
+
if (textBlocks.length > 0) {
|
|
1385
|
+
const lastText = textBlocks[textBlocks.length - 1].text.trimEnd();
|
|
1386
|
+
if (lastText.length >= 40 && !/[.?!}\])`'"]$/.test(lastText) && !/```\s*$/.test(lastText)) {
|
|
1387
|
+
truncated = true;
|
|
1388
|
+
}
|
|
1389
|
+
}
|
|
1390
|
+
|
|
1391
|
+
// Check 2: previous turn had tool_use blocks but this turn is near-empty
|
|
1392
|
+
// (<20 chars of text, no new tool calls). In normal flow the assistant
|
|
1393
|
+
// processes tool results and produces a substantive follow-up; a near-empty
|
|
1394
|
+
// response suggests the tool call was abandoned or its result was lost.
|
|
1395
|
+
const prevBlocks = this._lastAssistantBlocks.get(agentId);
|
|
1396
|
+
if (prevBlocks && prevBlocks.some(b => b.type === 'tool_use')) {
|
|
1397
|
+
const totalCurrentText = textBlocks.reduce((sum, b) => sum + (b.text?.length || 0), 0);
|
|
1398
|
+
if (totalCurrentText < 20 && !blocks.some(b => b.type === 'tool_use')) {
|
|
1399
|
+
truncated = true;
|
|
1400
|
+
}
|
|
1401
|
+
}
|
|
1402
|
+
|
|
1403
|
+
this._lastAssistantBlocks.set(agentId, blocks);
|
|
1404
|
+
|
|
1405
|
+
if (truncated) {
|
|
1406
|
+
this._truncationFlagged.add(agentId);
|
|
1407
|
+
const prev = agent.consecutiveTruncations || 0;
|
|
1408
|
+
updates.truncationSuspected = true;
|
|
1409
|
+
updates.consecutiveTruncations = prev + 1;
|
|
1410
|
+
classifier.addEvent(agentId, { type: 'error', subtype: 'truncated_response', timestamp: Date.now() });
|
|
1411
|
+
} else if (agent.truncationSuspected) {
|
|
1412
|
+
updates.truncationSuspected = false;
|
|
1413
|
+
updates.consecutiveTruncations = 0;
|
|
1414
|
+
}
|
|
1415
|
+
}
|
|
1416
|
+
|
|
1417
|
+
// --- Cache reset detection (Change 5) ---
|
|
1418
|
+
if (output.cacheReadTokens !== undefined) {
|
|
1419
|
+
const prevCache = this._previousCacheReadTokens.get(agentId);
|
|
1420
|
+
if (prevCache !== undefined && prevCache > 50_000) {
|
|
1421
|
+
const drop = prevCache - output.cacheReadTokens;
|
|
1422
|
+
if (drop > prevCache * 0.5) {
|
|
1423
|
+
classifier.addEvent(agentId, { type: 'error', subtype: 'cache_reset', timestamp: Date.now() });
|
|
1424
|
+
updates.cacheResetDetected = true;
|
|
1425
|
+
}
|
|
1426
|
+
}
|
|
1427
|
+
this._previousCacheReadTokens.set(agentId, output.cacheReadTokens);
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1341
1430
|
// Token tracking — feed subsystems with full breakdown
|
|
1342
1431
|
if (output.tokensUsed !== undefined && output.tokensUsed > 0) {
|
|
1343
1432
|
updates.tokensUsed = agent.tokensUsed + output.tokensUsed;
|