groove-dev 0.27.26 → 0.27.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +0 -10
- package/SECURITY_SWEEP.md +228 -0
- package/node_modules/@groove-dev/cli/package.json +1 -1
- package/node_modules/@groove-dev/daemon/package.json +1 -1
- package/node_modules/@groove-dev/daemon/src/introducer.js +7 -7
- package/node_modules/@groove-dev/daemon/src/journalist.js +36 -6
- package/node_modules/@groove-dev/daemon/src/memory.js +29 -10
- package/node_modules/@groove-dev/daemon/src/process.js +17 -12
- package/node_modules/@groove-dev/daemon/src/providers/codex.js +34 -11
- package/node_modules/@groove-dev/daemon/src/rotator.js +24 -1
- package/node_modules/@groove-dev/daemon/test/introducer.test.js +63 -0
- package/node_modules/@groove-dev/daemon/test/journalist.test.js +106 -0
- package/node_modules/@groove-dev/daemon/test/memory.test.js +49 -0
- package/node_modules/@groove-dev/daemon/test/rotator.test.js +99 -0
- package/node_modules/@groove-dev/gui/package.json +1 -1
- package/package.json +1 -1
- package/packages/cli/package.json +1 -1
- package/packages/daemon/package.json +1 -1
- package/packages/daemon/src/introducer.js +7 -7
- package/packages/daemon/src/journalist.js +36 -6
- package/packages/daemon/src/memory.js +29 -10
- package/packages/daemon/src/process.js +17 -12
- package/packages/daemon/src/providers/codex.js +34 -11
- package/packages/daemon/src/rotator.js +24 -1
- package/packages/gui/package.json +1 -1
- package/MUST_FIX_ISSUES.md +0 -305
|
@@ -427,12 +427,18 @@ export class ProcessManager {
|
|
|
427
427
|
taskNegotiation = await this.negotiateTaskSplit(agent, sameRole);
|
|
428
428
|
}
|
|
429
429
|
|
|
430
|
-
//
|
|
431
|
-
//
|
|
432
|
-
//
|
|
433
|
-
//
|
|
434
|
-
|
|
435
|
-
const
|
|
430
|
+
// Compute hasTask from actual prompt content — agents spawned without a
|
|
431
|
+
// prompt should NOT receive handoff history (prevents cross-team contamination).
|
|
432
|
+
// Discoveries + constraints are always injected (project knowledge).
|
|
433
|
+
// Handoffs are injected only when the agent has a real task or is a rotation.
|
|
434
|
+
const hasTask = !!(config.prompt && config.prompt.trim().length > 0);
|
|
435
|
+
const isRotation = !!(config.isRotation);
|
|
436
|
+
const introContext = introducer.generateContext(agent, { taskNegotiation, hasTask, isRotation });
|
|
437
|
+
|
|
438
|
+
// Ensure the project map is fresh before the new agent reads CLAUDE.md
|
|
439
|
+
if (this.daemon.journalist) {
|
|
440
|
+
await this.daemon.journalist.ensureFresh(30000);
|
|
441
|
+
}
|
|
436
442
|
|
|
437
443
|
// Track cold-start savings — agent gets context from planner/journalist/team
|
|
438
444
|
// instead of exploring the codebase from scratch
|
|
@@ -597,7 +603,7 @@ For normal file edits within your scope, proceed without review.
|
|
|
597
603
|
|
|
598
604
|
this.daemon.broadcast({ type: 'agent:exit', agentId: agent.id, code: code || 0, signal, status });
|
|
599
605
|
if (this.daemon.integrations) this.daemon.integrations.refreshMcpJson();
|
|
600
|
-
if (status === 'completed' && this.daemon.journalist) this.daemon.journalist.
|
|
606
|
+
if (status === 'completed' && this.daemon.journalist) this.daemon.journalist.requestSynthesis('completion');
|
|
601
607
|
this._checkPhase2(agent.id);
|
|
602
608
|
|
|
603
609
|
// Auto-trigger idle QC + process cross-scope handoffs
|
|
@@ -783,10 +789,9 @@ For normal file edits within your scope, proceed without review.
|
|
|
783
789
|
}
|
|
784
790
|
}
|
|
785
791
|
|
|
786
|
-
// Trigger journalist synthesis
|
|
787
|
-
// map is fresh for the next agent that spawns (don't wait for 120s cycle)
|
|
792
|
+
// Trigger journalist synthesis on completion (event-driven, debounced)
|
|
788
793
|
if (finalStatus === 'completed' && this.daemon.journalist) {
|
|
789
|
-
this.daemon.journalist.
|
|
794
|
+
this.daemon.journalist.requestSynthesis('completion');
|
|
790
795
|
}
|
|
791
796
|
|
|
792
797
|
// Phase 2 auto-spawn: check if all phase 1 agents for a team are done
|
|
@@ -1168,7 +1173,7 @@ For normal file edits within your scope, proceed without review.
|
|
|
1168
1173
|
oldTokens: agentData?.tokensUsed || 0,
|
|
1169
1174
|
contextUsage: agentData?.contextUsage || 0,
|
|
1170
1175
|
brief: brief.slice(0, 4000),
|
|
1171
|
-
}, agent.workingDir);
|
|
1176
|
+
}, agent.workingDir, agent.teamId);
|
|
1172
1177
|
} catch { /* best-effort */ }
|
|
1173
1178
|
}
|
|
1174
1179
|
|
|
@@ -1369,7 +1374,7 @@ For normal file edits within your scope, proceed without review.
|
|
|
1369
1374
|
registry.update(newAgent.id, { status: finalStatus, pid: null });
|
|
1370
1375
|
this.daemon.broadcast({ type: 'agent:exit', agentId: newAgent.id, code, signal, status: finalStatus });
|
|
1371
1376
|
if (finalStatus === 'completed' && this.daemon.journalist) {
|
|
1372
|
-
this.daemon.journalist.
|
|
1377
|
+
this.daemon.journalist.requestSynthesis('completion');
|
|
1373
1378
|
}
|
|
1374
1379
|
});
|
|
1375
1380
|
|
|
@@ -95,6 +95,7 @@ export class CodexProvider extends Provider {
|
|
|
95
95
|
if (agent.prompt) args.push(agent.prompt);
|
|
96
96
|
|
|
97
97
|
this._currentModel = agent.model;
|
|
98
|
+
this._sessionInputTokens = 0;
|
|
98
99
|
|
|
99
100
|
return {
|
|
100
101
|
command: 'codex',
|
|
@@ -109,6 +110,11 @@ export class CodexProvider extends Provider {
|
|
|
109
110
|
return { command: 'codex', args, env: {} };
|
|
110
111
|
}
|
|
111
112
|
|
|
113
|
+
_getMaxContext() {
|
|
114
|
+
const model = CodexProvider.models.find((m) => m.id === this._currentModel);
|
|
115
|
+
return model?.maxContext || 200000;
|
|
116
|
+
}
|
|
117
|
+
|
|
112
118
|
switchModel(agent, newModel) {
|
|
113
119
|
return false; // Codex doesn't support mid-session model switch
|
|
114
120
|
}
|
|
@@ -175,36 +181,48 @@ export class CodexProvider extends Provider {
|
|
|
175
181
|
|
|
176
182
|
case 'item.completed': {
|
|
177
183
|
const item = event.item || {};
|
|
184
|
+
|
|
185
|
+
// Accumulate usage for intermediate context estimation.
|
|
186
|
+
// Codex only reports full contextUsage at turn.completed — without this,
|
|
187
|
+
// the rotator sees stale contextUsage between turns and never triggers.
|
|
188
|
+
if (event.usage) {
|
|
189
|
+
this._sessionInputTokens += event.usage.input_tokens || 0;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
let result = null;
|
|
178
193
|
if (item.type === 'agent_message') {
|
|
179
|
-
|
|
194
|
+
result = {
|
|
180
195
|
type: 'activity', subtype: 'assistant',
|
|
181
196
|
data: [{ type: 'text', text: item.text || '' }],
|
|
182
197
|
};
|
|
183
|
-
}
|
|
184
|
-
if (item.type === 'command_execution') {
|
|
198
|
+
} else if (item.type === 'command_execution') {
|
|
185
199
|
const output = (item.aggregated_output || '').slice(0, 2000);
|
|
186
|
-
|
|
200
|
+
result = {
|
|
187
201
|
type: 'activity', subtype: 'assistant',
|
|
188
202
|
data: [
|
|
189
203
|
{ type: 'tool_use', id: item.id || 'exec', name: 'Bash', input: { command: item.command } },
|
|
190
204
|
...(output ? [{ type: 'text', text: output }] : []),
|
|
191
205
|
],
|
|
192
206
|
};
|
|
193
|
-
}
|
|
194
|
-
if (item.type === 'todo_list') {
|
|
207
|
+
} else if (item.type === 'todo_list') {
|
|
195
208
|
const steps = (item.items || []).map((s) => `${s.completed ? '✓' : '○'} ${s.text}`).join('\n');
|
|
196
|
-
|
|
209
|
+
result = {
|
|
197
210
|
type: 'activity', subtype: 'assistant',
|
|
198
211
|
data: [{ type: 'text', text: steps }],
|
|
199
212
|
};
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
return {
|
|
213
|
+
} else if (item.type === 'file_edit' || item.type === 'file_write' || item.type === 'file_read') {
|
|
214
|
+
result = {
|
|
203
215
|
type: 'activity', subtype: 'assistant',
|
|
204
216
|
data: [{ type: 'tool_use', id: item.id || 'file', name: item.type === 'file_read' ? 'Read' : item.type === 'file_write' ? 'Write' : 'Edit', input: { path: item.path || item.file || '' } }],
|
|
205
217
|
};
|
|
206
218
|
}
|
|
207
|
-
|
|
219
|
+
|
|
220
|
+
// Attach intermediate context estimate so all 7 layers see Codex progress
|
|
221
|
+
if (result && this._sessionInputTokens > 0) {
|
|
222
|
+
result.contextUsage = this._sessionInputTokens / this._getMaxContext();
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
return result;
|
|
208
226
|
}
|
|
209
227
|
|
|
210
228
|
case 'turn.completed': {
|
|
@@ -215,11 +233,15 @@ export class CodexProvider extends Provider {
|
|
|
215
233
|
const outputTokens = usage.output_tokens || 0;
|
|
216
234
|
const cachedTokens = usage.cached_input_tokens || 0;
|
|
217
235
|
const totalTokens = inputTokens + outputTokens;
|
|
236
|
+
const cacheCreationTokens = cachedTokens > 0 ? Math.max(0, inputTokens - cachedTokens) : 0;
|
|
218
237
|
|
|
219
238
|
const model = CodexProvider.models.find((m) => m.id === this._currentModel);
|
|
220
239
|
const pricing = model?.pricing;
|
|
221
240
|
const maxContext = model?.maxContext || 200000;
|
|
222
241
|
|
|
242
|
+
// Sync accumulator to actual cumulative value from turn completion
|
|
243
|
+
this._sessionInputTokens = inputTokens;
|
|
244
|
+
|
|
223
245
|
let estimatedCostUsd = 0;
|
|
224
246
|
if (pricing) {
|
|
225
247
|
const newInput = inputTokens - cachedTokens;
|
|
@@ -235,6 +257,7 @@ export class CodexProvider extends Provider {
|
|
|
235
257
|
inputTokens,
|
|
236
258
|
outputTokens,
|
|
237
259
|
cacheReadTokens: cachedTokens,
|
|
260
|
+
cacheCreationTokens,
|
|
238
261
|
contextUsage: inputTokens / maxContext,
|
|
239
262
|
estimatedCostUsd,
|
|
240
263
|
costSource: pricing ? 'calculated' : 'estimated',
|
|
@@ -31,6 +31,7 @@ export class Rotator extends EventEmitter {
|
|
|
31
31
|
this.rotationHistory = [];
|
|
32
32
|
this.rotating = new Set();
|
|
33
33
|
this.lastRotationTime = new Map(); // agentId -> timestamp of last rotation
|
|
34
|
+
this._lastContextState = new Map(); // agentId -> { contextUsage, timestamp }
|
|
34
35
|
this.enabled = false;
|
|
35
36
|
this.liveScores = {};
|
|
36
37
|
this.scoreHistory = {};
|
|
@@ -180,6 +181,25 @@ export class Rotator extends EventEmitter {
|
|
|
180
181
|
continue;
|
|
181
182
|
}
|
|
182
183
|
|
|
184
|
+
// Stale context fallback — safety net for providers (like Codex) that don't
|
|
185
|
+
// report intermediate contextUsage. If contextUsage hasn't changed in 120+
|
|
186
|
+
// seconds but tokens are being consumed, estimate from total tokens.
|
|
187
|
+
const knownCtx = this._lastContextState.get(agent.id);
|
|
188
|
+
if (!knownCtx || knownCtx.contextUsage !== agent.contextUsage) {
|
|
189
|
+
this._lastContextState.set(agent.id, { contextUsage: agent.contextUsage, timestamp: Date.now() });
|
|
190
|
+
} else if (agent.tokensUsed > 0 && (Date.now() - knownCtx.timestamp) >= 120_000) {
|
|
191
|
+
const providerClass = getProvider(agent.provider)?.constructor;
|
|
192
|
+
const models = providerClass?.models || [];
|
|
193
|
+
const model = models.find((m) => m.id === agent.model) || models[0];
|
|
194
|
+
const maxContext = model?.maxContext || 200000;
|
|
195
|
+
const estimatedContext = agent.tokensUsed / maxContext;
|
|
196
|
+
if (estimatedContext >= HARD_CEILING) {
|
|
197
|
+
console.log(` Rotator: ${agent.name} estimated context ${Math.round(estimatedContext * 100)}% (stale contextUsage fallback)`);
|
|
198
|
+
await this.rotate(agent.id, { reason: 'estimated_context_ceiling' });
|
|
199
|
+
continue;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
183
203
|
// Cooldown — skip threshold/quality rotation if recently rotated
|
|
184
204
|
if (this._isOnCooldown(agent.id)) continue;
|
|
185
205
|
|
|
@@ -274,7 +294,7 @@ export class Rotator extends EventEmitter {
|
|
|
274
294
|
oldTokens: agent.tokensUsed,
|
|
275
295
|
contextUsage: agent.contextUsage,
|
|
276
296
|
brief: brief.slice(0, 4000),
|
|
277
|
-
}, agent.workingDir);
|
|
297
|
+
}, agent.workingDir, agent.teamId);
|
|
278
298
|
}
|
|
279
299
|
|
|
280
300
|
const record = {
|
|
@@ -312,6 +332,7 @@ export class Rotator extends EventEmitter {
|
|
|
312
332
|
workingDir: agent.workingDir,
|
|
313
333
|
name: agent.name,
|
|
314
334
|
teamId: agent.teamId,
|
|
335
|
+
isRotation: true,
|
|
315
336
|
});
|
|
316
337
|
} catch (spawnErr) {
|
|
317
338
|
// Spawn failed — re-add old agent so the user can see and retry.
|
|
@@ -499,6 +520,7 @@ export class Rotator extends EventEmitter {
|
|
|
499
520
|
const naturalCompactions = this.rotationHistory.filter((r) => r.reason === 'natural_compaction').length;
|
|
500
521
|
const hardCeilingRotations = this.rotationHistory.filter((r) => r.reason === 'hard_ceiling').length;
|
|
501
522
|
const tokenCeilingRotations = this.rotationHistory.filter((r) => r.reason === 'token_ceiling').length;
|
|
523
|
+
const estimatedCeilingRotations = this.rotationHistory.filter((r) => r.reason === 'estimated_context_ceiling').length;
|
|
502
524
|
return {
|
|
503
525
|
enabled: this.enabled,
|
|
504
526
|
totalRotations,
|
|
@@ -508,6 +530,7 @@ export class Rotator extends EventEmitter {
|
|
|
508
530
|
naturalCompactions,
|
|
509
531
|
hardCeilingRotations,
|
|
510
532
|
tokenCeilingRotations,
|
|
533
|
+
estimatedCeilingRotations,
|
|
511
534
|
rotating: Array.from(this.rotating),
|
|
512
535
|
liveScores: this.liveScores,
|
|
513
536
|
scoreHistory: this.scoreHistory,
|
package/MUST_FIX_ISSUES.md
DELETED
|
@@ -1,305 +0,0 @@
|
|
|
1
|
-
LAYER 1: ELECTRON SHELL (packages/desktop/main.js) ----------------------------------------------------
|
|
2
|
-
|
|
3
|
-
The desktop app itself creates memory pressure before any agent work begins:
|
|
4
|
-
|
|
5
|
-
CRITICAL - Daemon heap capped at 512MB (line 178) The daemon process managing ALL agents, the GUI server, WebSocket broadcasts, journalist synthesis, token tracking, and state persistence gets only 512MB. When GC hits this ceiling, the event loop blocks for 100-300ms. The Electron app freezes waiting for daemon responses. This is why button clicks take 5s to register.
|
|
6
|
-
|
|
7
|
-
CRITICAL - Listener accumulation on window reuse (lines 862-914) When reopening a project, the home window is reused. Lines 872-873 remove close/closed listeners, but console-message, render-process-gone, and permission handlers are re-registered without removing the old ones. Each reuse adds duplicate listeners. N window reuses = N duplicate handlers firing per event.
|
|
8
|
-
|
|
9
|
-
CRITICAL - IPC broadcast flooding (process.js lines 926-950) Every agent output is broadcast to all Electron windows. Throttled to 4/sec per agent, but with 4+ agents that's 16+ JSON serializations per second over IPC with no backpressure. The renderer can't keep up, and the IPC message queue grows unbounded in the main process.
|
|
10
|
-
|
|
11
|
-
HIGH - Subscription polling on every window focus (lines 301, 886) Every time you click the Electron window, it triggers a subscription check to the daemon. With multiple windows, this creates constant HTTP traffic even when idle.
|
|
12
|
-
|
|
13
|
-
HIGH - stdout buffer string operations (process.js lines 844-854) Per-agent stdout buffer grows to 1MB, then the "oldest half" is discarded via .indexOf() and .slice() on a multi-MB string. This runs on EVERY chunk. With streaming agents, it's constant multi-MB string allocations and GC pressure.
|
|
14
|
-
|
|
15
|
-
LAYER 2: PROCESS MANAGER (packages/daemon/src/process.js) -----------------------------------------------------------
|
|
16
|
-
|
|
17
|
-
HIGH - Stream throttle timers leak (lines 928-948) Each agent gets a throttle object in a Map. If agent crashes or is killed while a timer is pending, the callback still fires, broadcasting to a dead agent ID. The closure over output data never gets GC'd. Long sessions with many spawns = unbounded map growth.
|
|
18
|
-
|
|
19
|
-
HIGH - Resume doesn't clean up maps (lines 1251-1367) When resuming a completed agent, peakContextUsage, pendingMessages, and _streamThrottle entries from the old agent ID are never cleared. These maps grow with every resume cycle.
|
|
20
|
-
|
|
21
|
-
HIGH - Log stream file descriptor leaks (lines 545, 1284) Each spawn creates a write stream. On exit, logStream.end() is called but file descriptors aren't guaranteed released if I/O is slow. Resume creates new streams for the same agent. Long sessions accumulate hundreds of open .log file descriptors.
|
|
22
|
-
|
|
23
|
-
LAYER 3: JOURNALIST (packages/daemon/src/journalist.js) ---------------------------------------------------------
|
|
24
|
-
|
|
25
|
-
This is where "infinite sessions" should work. It doesn't fully deliver.
|
|
26
|
-
|
|
27
|
-
CRITICAL - 40K char budget is actually 3-5K tokens (line 10) The comment says "~10k tokens budget" but 40,000 UTF-8 chars of chat-style logs is closer to 3-5K tokens. The synthesis prompt can only fit the last 20-30 agent events. In active sessions, 60-70% of recent activity is dropped from synthesis.
|
|
28
|
-
|
|
29
|
-
CRITICAL - Exploration tools excluded from synthesis (lines 211-218) Read, Glob, and Grep events are filtered into a separate explorationEntries array and never included in the synthesis prompt. New agents don't know what files were examined. They re-explore the same files, wasting tokens.
|
|
30
|
-
|
|
31
|
-
HIGH - Handoff brief is only 7K tokens total (lines 706-784) The brief pulls from 9 sources, each truncated:
|
|
32
|
-
|
|
33
|
-
-
|
|
34
|
-
Errors: 10 entries x 300 chars = 3K chars
|
|
35
|
-
-
|
|
36
|
-
Results: last 3 joined = 3K chars
|
|
37
|
-
-
|
|
38
|
-
File changes: 20 files = 2K chars
|
|
39
|
-
-
|
|
40
|
-
Discoveries: 10 entries = 2K chars
|
|
41
|
-
-
|
|
42
|
-
Constraints: 2K chars
|
|
43
|
-
-
|
|
44
|
-
User feedback: 5 x 500 = 2.5K chars
|
|
45
|
-
-
|
|
46
|
-
Recent tools: 5 x 80 = 400 chars
|
|
47
|
-
-
|
|
48
|
-
Rotation history: 3K chars
|
|
49
|
-
-
|
|
50
|
-
Original task: 2K chars
|
|
51
|
-
Total ~22K chars = ~7K tokens. The original agent had full context. The new agent gets a structured digest. Nuance, intermediate reasoning, and failed experiments are gone.
|
|
52
|
-
|
|
53
|
-
HIGH - Only last 5 tool calls in rotation history (lines 755-760) A complex multi-file refactor needs 10-20 tool calls for context. Only the last 5 survive, each truncated to 80 chars. The handoff says "agent was editing files" but not "agent tried approach A, it failed because X, then tried B."
|
|
54
|
-
|
|
55
|
-
MEDIUM - Synthesis cycle is 2 min, rotation check is 15 sec (lines 9, rotator.js:11) Rotations can happen 8x per synthesis cycle. Agent rotates at 0:15, journalist doesn't synthesize until 2:00. New agent has 1:45 of unsynthesized activity.
|
|
56
|
-
|
|
57
|
-
LAYER 4: ROTATOR (packages/daemon/src/rotator.js) ---------------------------------------------------
|
|
58
|
-
|
|
59
|
-
CRITICAL - 5-minute cooldown prevents fast recovery (line 16) After rotation, the new agent is locked in for 5 minutes regardless of quality. If the handoff was bad and the new agent is immediately degraded, nothing can be done until cooldown expires. Total degradation window: 8-10 minutes (3 min to detect + 5 min cooldown).
|
|
60
|
-
|
|
61
|
-
HIGH - No post-rotation validation (lines 233-237) Pre-rotation session is scored and recorded. But there is NO measurement of post-rotation quality. System assumes rotation improved things without verifying. Could be making things worse.
|
|
62
|
-
|
|
63
|
-
HIGH - Kill+respawn loses in-flight state (lines 278-301) Between kill and respawn: in-flight variables, cached results, classifier window, score history, and live quality tracking are all destroyed. New agent starts from the 7K-token brief only. This is a warm start, not zero cold start.
|
|
64
|
-
|
|
65
|
-
HIGH - Idle check blocks quality rotation (lines 201-213) Quality rotation only fires if agent is idle 10+ seconds. If user is actively sending messages, an agent scoring 30/100 won't rotate because it's "busy." Planner agents producing bad plans while user reads output = degradation that the system ignores.
|
|
66
|
-
|
|
67
|
-
MEDIUM - Handoff chain keeps only 10 rotations (memory.js line 18) At 5-minute rotations, 10 rotations = 50 minutes of history. Multi-hour tasks lose their full lineage. Agent #11 can't see what agent #1 learned.
|
|
68
|
-
|
|
69
|
-
LAYER 5: ADAPTIVE THRESHOLDS (packages/daemon/src/adaptive.js) ----------------------------------------------------------------
|
|
70
|
-
|
|
71
|
-
HIGH - Session scoring baseline is arbitrary (line 96) Score starts at 70. Errors cost 5 points each, repetitions cost 6. But these weights have no empirical calibration. A fatal error and a benign warning both cost 5 points. The scoring doesn't distinguish.
|
|
72
|
-
|
|
73
|
-
HIGH - Asymmetric drift (+2%/-5%) (lines 8-9) Good sessions nudge threshold up slowly (+2%). Bad sessions yank it down fast (-5%). Over many sessions, thresholds oscillate unpredictably rather than converging.
|
|
74
|
-
|
|
75
|
-
HIGH - Convergence detection is set but never read (lines 139-149) The profile.converged flag is computed and stored. The rotator never checks it. It's dead data.
|
|
76
|
-
|
|
77
|
-
MEDIUM - No per-role quality thresholds (rotator.js line 12) QUALITY_THRESHOLD=40 applies to all roles. Planners naturally have higher error rates (exploring, revising). Backend agents on stable code have lower rates. Same threshold for both.
|
|
78
|
-
|
|
79
|
-
LAYER 6: TOKEN TRACKER (packages/daemon/src/tokentracker.js) --------------------------------------------------------------
|
|
80
|
-
|
|
81
|
-
HIGH - Planner token ceiling is 50M tokens (rotator.js lines 111-112) Role multiplier for planner is 10x the 5M base = 50M tokens. That's ~180K output tokens. Degradation happens long before 50M, but the safety ceiling never triggers. Planners can burn 50M tokens of garbage before forced rotation.
|
|
82
|
-
|
|
83
|
-
MEDIUM - Cache hit rate is global, not per-agent (lines 205-216) If planner has 80% hit rate and fullstack has 20%, global rate is ~50%. Can't detect which agents benefit from caching.
|
|
84
|
-
|
|
85
|
-
MEDIUM - Cold-start savings estimation is inflated (lines 8-14, 182-187) Per-file cost is flat (15 tokens). A 50-line config and a 10K-line monolith cost the same. Reported savings are overstated.
|
|
86
|
-
|
|
87
|
-
MEDIUM - Journalist synthesis cost not tracked separately (lines 237-258) Internal overhead is tracked by __ prefix IDs, but journalist synthesis tokens may not be properly attributed.
|
|
88
|
-
|
|
89
|
-
LAYER 7: MEMORY (packages/daemon/src/memory.js) -------------------------------------------------
|
|
90
|
-
|
|
91
|
-
CRITICAL - Layer 7 skips first-generation agents (process.js line 431) Discoveries and handoff history are only injected when hasTask=true. hasTask = !!config.prompt. Agents spawned with just a role and no prompt never receive prior discoveries. They repeat mistakes that Layer 7 already captured.
|
|
92
|
-
|
|
93
|
-
HIGH - Specializations recorded but never used (lines 310-389) Quality profiles per agent/role are stored. They are NEVER read for routing decisions. The router doesn't check "which agent specialization scores best for this file domain." It's write-only data.
|
|
94
|
-
|
|
95
|
-
HIGH - Handoff briefs degrade over generations (lines 154-219) By rotation 5-10, the brief is summarizing summaries. Content degradation compounds. There's no quality scoring to flag "this was a clean rotation" vs "this was an emergency rotation."
|
|
96
|
-
|
|
97
|
-
MEDIUM - No mid-session memory injection (introducer.js lines 345-381) Memory is injected once at spawn. If journalist discovers a critical fix during the agent's session, the running agent never learns it. Only the next rotated agent sees it.
|
|
98
|
-
|
|
99
|
-
LAYER 8: ROUTER + CLASSIFIER (router.js, classifier.js) ---------------------------------------------------------
|
|
100
|
-
|
|
101
|
-
CRITICAL - Classifier and Router are disconnected (entire design) Classifier produces tier classifications (light/medium/heavy). Router makes model decisions. But router.recommend() is called ONCE at spawn. Classifier broadcasts updates to UI only. There is NO feedback loop from classifier to router mid-session. An agent can drift from heavy to light work and stay on the expensive model for the entire session.
|
|
102
|
-
|
|
103
|
-
HIGH - Default routing mode is FIXED, not AUTO (router.js line 73) Unless explicitly set, agents spawn in FIXED mode with no adaptive routing. Most agents never get auto-routing.
|
|
104
|
-
|
|
105
|
-
HIGH - Cost tracking is dead code (router.js lines 125-136) recordUsage() is called, costLog accumulates data, but nothing reads the cost log. Cost data exists but never feeds back into routing decisions.
|
|
106
|
-
|
|
107
|
-
MEDIUM - 200-event classifier window averages away trends (classifier.js line 39) If task complexity changes at event 190, classifier still averages across events 1-189. Recent changes are diluted.
|
|
108
|
-
|
|
109
|
-
BACKBONE: API + REGISTRY + STATE (api.js, registry.js, state.js, timeline.js) -------------------------------------------------------------------------------
|
|
110
|
-
|
|
111
|
-
HIGH - O(n^2) broadcast on every registry change (api.js line 224) Every agent spawn/update/remove triggers enrichAgents() on ALL agents, then broadcasts full state to ALL WebSocket clients. With 10 agents and frequent updates, this is expensive.
|
|
112
|
-
|
|
113
|
-
HIGH - State saved every 30 seconds (index.js lines 518-521) If daemon crashes 29 seconds after an agent spawn, that agent is lost. State persistence should be 5s max for critical data.
|
|
114
|
-
|
|
115
|
-
HIGH - Synchronous state write blocks event loop (state.js line 24) writeFileSync on every save. Blocks all other operations for the duration of the write.
|
|
116
|
-
|
|
117
|
-
MEDIUM - Timeline caps at 16.7 hours (timeline.js lines 8-10) 2000 snapshots x 30s = 16.7 hours. After that, oldest snapshots are silently deleted. For multi-day sessions, early history is gone.
|
|
118
|
-
|
|
119
|
-
MEDIUM - AGENTS_REGISTRY.md rewritten every 2 seconds (index.js lines 210-220) On every registry change, debounced to 2s. Also rewrites the GROOVE section of CLAUDE.md. Constant file I/O.
|
|
120
|
-
|
|
121
|
-
THE DEGRADATION CASCADE -------------------------
|
|
122
|
-
|
|
123
|
-
Here's what actually happens when a planner agent runs for 30+ minutes:
|
|
124
|
-
|
|
125
|
-
T=0:00 - Agent spawns with role only, no prompt
|
|
126
|
-
|
|
127
|
-
-
|
|
128
|
-
hasTask=false, so Layer 7 discoveries are SKIPPED
|
|
129
|
-
-
|
|
130
|
-
Router defaults to FIXED mode, no auto-routing
|
|
131
|
-
-
|
|
132
|
-
Agent starts blind to all prior learnings
|
|
133
|
-
T=0:30 - Agent reads files, builds mental model
|
|
134
|
-
|
|
135
|
-
-
|
|
136
|
-
Exploration events (Read, Glob, Grep) are filtered OUT of
|
|
137
|
-
journalist synthesis
|
|
138
|
-
|
|
139
|
-
-
|
|
140
|
-
Classifier window starts filling (200-event buffer)
|
|
141
|
-
T=2:00 - First journalist synthesis cycle
|
|
142
|
-
|
|
143
|
-
-
|
|
144
|
-
Only captures 3-5K tokens of activity (40K char budget)
|
|
145
|
-
-
|
|
146
|
-
Exploration context excluded
|
|
147
|
-
T=5:00 - Agent is productive but context window filling
|
|
148
|
-
|
|
149
|
-
-
|
|
150
|
-
Quality score: 65 (decent but in dead zone 40-70)
|
|
151
|
-
-
|
|
152
|
-
No rotation triggered (quality threshold is 40)
|
|
153
|
-
-
|
|
154
|
-
No model downshift (classifier needs 40+ events)
|
|
155
|
-
-
|
|
156
|
-
Adaptive threshold nudges up (+2%)
|
|
157
|
-
T=10:00 - Context usage at 40-50%
|
|
158
|
-
|
|
159
|
-
-
|
|
160
|
-
Agent slowing down, re-reading files
|
|
161
|
-
-
|
|
162
|
-
Quality score drops to 55 (still in dead zone)
|
|
163
|
-
-
|
|
164
|
-
Classifier broadcasts tier update to UI only
|
|
165
|
-
-
|
|
166
|
-
Router does nothing (FIXED mode, no mid-session routing)
|
|
167
|
-
-
|
|
168
|
-
Token velocity increasing but below 1.5M/5min spike
|
|
169
|
-
T=15:00 - Quality score drops to 38 (below threshold!)
|
|
170
|
-
|
|
171
|
-
-
|
|
172
|
-
But agent is "active" (user reading output), idle check fails
|
|
173
|
-
-
|
|
174
|
-
Rotation blocked because agent isn't idle 10+ seconds
|
|
175
|
-
-
|
|
176
|
-
Agent continues degrading
|
|
177
|
-
T=18:00 - Agent finally idle, quality rotation fires
|
|
178
|
-
|
|
179
|
-
-
|
|
180
|
-
40K char synthesis captures last 20-30 events
|
|
181
|
-
-
|
|
182
|
-
Exploration context lost
|
|
183
|
-
-
|
|
184
|
-
Handoff brief generated: 7K tokens
|
|
185
|
-
-
|
|
186
|
-
Old agent killed, state destroyed
|
|
187
|
-
T=18:30 - New agent spawns with brief
|
|
188
|
-
|
|
189
|
-
-
|
|
190
|
-
Gets discoveries this time (hasTask=true from brief)
|
|
191
|
-
-
|
|
192
|
-
But only last 10 discoveries, 4K chars
|
|
193
|
-
-
|
|
194
|
-
Doesn't know what files previous agent explored
|
|
195
|
-
-
|
|
196
|
-
Spends first 2-3 turns re-reading same files
|
|
197
|
-
-
|
|
198
|
-
Cooldown: can't rotate for 5 minutes even if degraded
|
|
199
|
-
T=20:00 - New agent productive but missing context
|
|
200
|
-
|
|
201
|
-
-
|
|
202
|
-
Doesn't have previous agent's intermediate reasoning
|
|
203
|
-
-
|
|
204
|
-
Doesn't know which approaches were tried and failed
|
|
205
|
-
-
|
|
206
|
-
Quality score defaults to 70 (not enough data yet)
|
|
207
|
-
T=30:00 - Second rotation
|
|
208
|
-
|
|
209
|
-
-
|
|
210
|
-
Handoff brief now summarizes a summary
|
|
211
|
-
-
|
|
212
|
-
3K chars of rotation history (3 briefs compressed)
|
|
213
|
-
-
|
|
214
|
-
Context loss compounds
|
|
215
|
-
This cycle repeats. Each rotation loses ~50-70% of useful context. By rotation 5 (roughly 90 minutes), the agent is working with heavily compressed summaries and repeating exploration the first agent already did.
|
|
216
|
-
|
|
217
|
-
PRIORITY FIXES FOR TRUE INFINITE SESSIONS -------------------------------------------
|
|
218
|
-
|
|
219
|
-
TIER 1 - IMMEDIATE (stop the bleeding)
|
|
220
|
-
|
|
221
|
-
1.
|
|
222
|
-
Increase daemon heap to 2048MB (main.js line 178)
|
|
223
|
-
Change: --max-old-space-size=2048 Impact: Eliminates GC-induced freezes, fixes desktop lag
|
|
224
|
-
|
|
225
|
-
1.
|
|
226
|
-
Force Layer 7 injection regardless of hasTask (process.js line 431)
|
|
227
|
-
Change: Always pass hasTask=true to generateContext, or remove the gate Impact: First-generation agents get prior discoveries immediately
|
|
228
|
-
|
|
229
|
-
1.
|
|
230
|
-
Include exploration events in synthesis (journalist.js lines 211-218)
|
|
231
|
-
Change: Don't filter Read/Glob/Grep into separate array; include in main entries with lower priority Impact: New agents know what files were examined
|
|
232
|
-
|
|
233
|
-
1.
|
|
234
|
-
Lower planner token ceiling from 50M to 10M (rotator.js)
|
|
235
|
-
Change: Reduce ROLE_MULTIPLIERS.planner from 10 to 2 Impact: Planners rotate before burning excessive tokens
|
|
236
|
-
|
|
237
|
-
1.
|
|
238
|
-
Clean up maps on agent removal (process.js)
|
|
239
|
-
Change: Clear peakContextUsage, pendingMessages, _streamThrottle when registry.remove() is called Impact: Stops unbounded memory growth
|
|
240
|
-
|
|
241
|
-
TIER 2 - HIGH PRIORITY (fix the feedback loops)
|
|
242
|
-
|
|
243
|
-
1.
|
|
244
|
-
Connect classifier to router mid-session
|
|
245
|
-
Change: When classifier detects tier change, call router.recommend() and auto-apply if confidence is high Impact: Agents auto-downshift to cheaper models when task simplifies
|
|
246
|
-
|
|
247
|
-
1.
|
|
248
|
-
Reduce rotation cooldown from 5 min to 2 min for quality rotation
|
|
249
|
-
Change: Separate QUALITY_COOLDOWN_MS = 2 * 60 * 1000 Impact: Degraded agents recover faster
|
|
250
|
-
|
|
251
|
-
1.
|
|
252
|
-
Remove idle check for severe degradation
|
|
253
|
-
Change: If quality score < 25, rotate immediately regardless of idle Impact: Severely degraded agents don't persist
|
|
254
|
-
|
|
255
|
-
1.
|
|
256
|
-
Increase synthesis budget to 100K chars (~25K tokens)
|
|
257
|
-
Change: MAX_LOG_CHARS = 100_000 Impact: Captures 80-90% of recent activity instead of 30-40%
|
|
258
|
-
|
|
259
|
-
1.
|
|
260
|
-
Add post-rotation validation
|
|
261
|
-
Change: Compare first-10-events quality of new agent vs last-10 of old agent. If worse, flag as cold start. Impact: System knows when rotation helped vs hurt
|
|
262
|
-
|
|
263
|
-
TIER 3 - MEDIUM PRIORITY (optimize the flow)
|
|
264
|
-
|
|
265
|
-
1.
|
|
266
|
-
Delta-only WebSocket broadcasts (api.js line 224)
|
|
267
|
-
Change: Send only changed agent data, not full state Impact: Reduces IPC pressure by 80%+
|
|
268
|
-
|
|
269
|
-
1.
|
|
270
|
-
Use specializations for routing
|
|
271
|
-
Change: When task involves files agent previously excelled at, prefer that agent Impact: Agents build on proven strengths
|
|
272
|
-
|
|
273
|
-
1.
|
|
274
|
-
Increase handoff chain from 10 to 25 rotations
|
|
275
|
-
Change: MAX_HANDOFF_ROTATIONS = 25 Impact: 2+ hours of causal history preserved
|
|
276
|
-
|
|
277
|
-
1.
|
|
278
|
-
State save every 5 seconds instead of 30
|
|
279
|
-
Change: setInterval at 5000 Impact: Max 5s of data loss on crash instead of 30
|
|
280
|
-
|
|
281
|
-
1.
|
|
282
|
-
Pre-compile minimatch patterns (lockmanager.js)
|
|
283
|
-
Change: Use minimatch.makeRe() at registration, cache compiled Impact: O(n*m) regex compilation per check reduced to O(n) lookup
|
|
284
|
-
|
|
285
|
-
1.
|
|
286
|
-
Fix listener cleanup on window reuse (main.js)
|
|
287
|
-
Change: Remove all webContents listeners before re-registering Impact: Stops listener accumulation, reduces memory pressure
|
|
288
|
-
|
|
289
|
-
TIER 4 - ARCHITECTURAL (true infinite sessions)
|
|
290
|
-
|
|
291
|
-
1.
|
|
292
|
-
Implement incremental context patching at rotation
|
|
293
|
-
Instead of regenerating full context from scratch, patch only what changed since last rotation
|
|
294
|
-
|
|
295
|
-
1.
|
|
296
|
-
Add mid-session memory injection
|
|
297
|
-
When journalist discovers new fix, inject into running agent via instruct endpoint
|
|
298
|
-
|
|
299
|
-
1.
|
|
300
|
-
Implement per-role quality baselines
|
|
301
|
-
Calibrate scoring weights and thresholds per role type
|
|
302
|
-
|
|
303
|
-
1.
|
|
304
|
-
Add degradation cascade detection
|
|
305
|
-
Track if downstream agents inherit errors from upstream rotations
|