groove-dev 0.27.26 → 0.27.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -427,12 +427,18 @@ export class ProcessManager {
427
427
  taskNegotiation = await this.negotiateTaskSplit(agent, sameRole);
428
428
  }
429
429
 
430
- // Generate introduction context (team awareness + negotiation)
431
- // Always pass hasTask: true so Layer 7 discoveries and handoff history
432
- // are injected for ALL agents, not just those with explicit prompts.
433
- // Without this, first-generation agents spawned with just a role never
434
- // receive prior discoveries and repeat mistakes Layer 7 already captured.
435
- const introContext = introducer.generateContext(agent, { taskNegotiation, hasTask: true });
430
+ // Compute hasTask from actual prompt content — agents spawned without a
431
+ // prompt should NOT receive handoff history (prevents cross-team contamination).
432
+ // Discoveries + constraints are always injected (project knowledge).
433
+ // Handoffs are injected only when the agent has a real task or is a rotation.
434
+ const hasTask = !!(config.prompt && config.prompt.trim().length > 0);
435
+ const isRotation = !!(config.isRotation);
436
+ const introContext = introducer.generateContext(agent, { taskNegotiation, hasTask, isRotation });
437
+
438
+ // Ensure the project map is fresh before the new agent reads CLAUDE.md
439
+ if (this.daemon.journalist) {
440
+ await this.daemon.journalist.ensureFresh(30000);
441
+ }
436
442
 
437
443
  // Track cold-start savings — agent gets context from planner/journalist/team
438
444
  // instead of exploring the codebase from scratch
@@ -597,7 +603,7 @@ For normal file edits within your scope, proceed without review.
597
603
 
598
604
  this.daemon.broadcast({ type: 'agent:exit', agentId: agent.id, code: code || 0, signal, status });
599
605
  if (this.daemon.integrations) this.daemon.integrations.refreshMcpJson();
600
- if (status === 'completed' && this.daemon.journalist) this.daemon.journalist.cycle().catch(() => {});
606
+ if (status === 'completed' && this.daemon.journalist) this.daemon.journalist.requestSynthesis('completion');
601
607
  this._checkPhase2(agent.id);
602
608
 
603
609
  // Auto-trigger idle QC + process cross-scope handoffs
@@ -783,10 +789,9 @@ For normal file edits within your scope, proceed without review.
783
789
  }
784
790
  }
785
791
 
786
- // Trigger journalist synthesis immediately on completion so the project
787
- // map is fresh for the next agent that spawns (don't wait for 120s cycle)
792
+ // Trigger journalist synthesis on completion (event-driven, debounced)
788
793
  if (finalStatus === 'completed' && this.daemon.journalist) {
789
- this.daemon.journalist.cycle().catch(() => {});
794
+ this.daemon.journalist.requestSynthesis('completion');
790
795
  }
791
796
 
792
797
  // Phase 2 auto-spawn: check if all phase 1 agents for a team are done
@@ -1168,7 +1173,7 @@ For normal file edits within your scope, proceed without review.
1168
1173
  oldTokens: agentData?.tokensUsed || 0,
1169
1174
  contextUsage: agentData?.contextUsage || 0,
1170
1175
  brief: brief.slice(0, 4000),
1171
- }, agent.workingDir);
1176
+ }, agent.workingDir, agent.teamId);
1172
1177
  } catch { /* best-effort */ }
1173
1178
  }
1174
1179
 
@@ -1369,7 +1374,7 @@ For normal file edits within your scope, proceed without review.
1369
1374
  registry.update(newAgent.id, { status: finalStatus, pid: null });
1370
1375
  this.daemon.broadcast({ type: 'agent:exit', agentId: newAgent.id, code, signal, status: finalStatus });
1371
1376
  if (finalStatus === 'completed' && this.daemon.journalist) {
1372
- this.daemon.journalist.cycle().catch(() => {});
1377
+ this.daemon.journalist.requestSynthesis('completion');
1373
1378
  }
1374
1379
  });
1375
1380
 
@@ -95,6 +95,7 @@ export class CodexProvider extends Provider {
95
95
  if (agent.prompt) args.push(agent.prompt);
96
96
 
97
97
  this._currentModel = agent.model;
98
+ this._sessionInputTokens = 0;
98
99
 
99
100
  return {
100
101
  command: 'codex',
@@ -109,6 +110,11 @@ export class CodexProvider extends Provider {
109
110
  return { command: 'codex', args, env: {} };
110
111
  }
111
112
 
113
+ _getMaxContext() {
114
+ const model = CodexProvider.models.find((m) => m.id === this._currentModel);
115
+ return model?.maxContext || 200000;
116
+ }
117
+
112
118
  switchModel(agent, newModel) {
113
119
  return false; // Codex doesn't support mid-session model switch
114
120
  }
@@ -175,36 +181,48 @@ export class CodexProvider extends Provider {
175
181
 
176
182
  case 'item.completed': {
177
183
  const item = event.item || {};
184
+
185
+ // Accumulate usage for intermediate context estimation.
186
+ // Codex only reports full contextUsage at turn.completed — without this,
187
+ // the rotator sees stale contextUsage between turns and never triggers.
188
+ if (event.usage) {
189
+ this._sessionInputTokens += event.usage.input_tokens || 0;
190
+ }
191
+
192
+ let result = null;
178
193
  if (item.type === 'agent_message') {
179
- return {
194
+ result = {
180
195
  type: 'activity', subtype: 'assistant',
181
196
  data: [{ type: 'text', text: item.text || '' }],
182
197
  };
183
- }
184
- if (item.type === 'command_execution') {
198
+ } else if (item.type === 'command_execution') {
185
199
  const output = (item.aggregated_output || '').slice(0, 2000);
186
- return {
200
+ result = {
187
201
  type: 'activity', subtype: 'assistant',
188
202
  data: [
189
203
  { type: 'tool_use', id: item.id || 'exec', name: 'Bash', input: { command: item.command } },
190
204
  ...(output ? [{ type: 'text', text: output }] : []),
191
205
  ],
192
206
  };
193
- }
194
- if (item.type === 'todo_list') {
207
+ } else if (item.type === 'todo_list') {
195
208
  const steps = (item.items || []).map((s) => `${s.completed ? '✓' : '○'} ${s.text}`).join('\n');
196
- return {
209
+ result = {
197
210
  type: 'activity', subtype: 'assistant',
198
211
  data: [{ type: 'text', text: steps }],
199
212
  };
200
- }
201
- if (item.type === 'file_edit' || item.type === 'file_write' || item.type === 'file_read') {
202
- return {
213
+ } else if (item.type === 'file_edit' || item.type === 'file_write' || item.type === 'file_read') {
214
+ result = {
203
215
  type: 'activity', subtype: 'assistant',
204
216
  data: [{ type: 'tool_use', id: item.id || 'file', name: item.type === 'file_read' ? 'Read' : item.type === 'file_write' ? 'Write' : 'Edit', input: { path: item.path || item.file || '' } }],
205
217
  };
206
218
  }
207
- return null;
219
+
220
+ // Attach intermediate context estimate so all 7 layers see Codex progress
221
+ if (result && this._sessionInputTokens > 0) {
222
+ result.contextUsage = this._sessionInputTokens / this._getMaxContext();
223
+ }
224
+
225
+ return result;
208
226
  }
209
227
 
210
228
  case 'turn.completed': {
@@ -215,11 +233,15 @@ export class CodexProvider extends Provider {
215
233
  const outputTokens = usage.output_tokens || 0;
216
234
  const cachedTokens = usage.cached_input_tokens || 0;
217
235
  const totalTokens = inputTokens + outputTokens;
236
+ const cacheCreationTokens = cachedTokens > 0 ? Math.max(0, inputTokens - cachedTokens) : 0;
218
237
 
219
238
  const model = CodexProvider.models.find((m) => m.id === this._currentModel);
220
239
  const pricing = model?.pricing;
221
240
  const maxContext = model?.maxContext || 200000;
222
241
 
242
+ // Sync accumulator to actual cumulative value from turn completion
243
+ this._sessionInputTokens = inputTokens;
244
+
223
245
  let estimatedCostUsd = 0;
224
246
  if (pricing) {
225
247
  const newInput = inputTokens - cachedTokens;
@@ -235,6 +257,7 @@ export class CodexProvider extends Provider {
235
257
  inputTokens,
236
258
  outputTokens,
237
259
  cacheReadTokens: cachedTokens,
260
+ cacheCreationTokens,
238
261
  contextUsage: inputTokens / maxContext,
239
262
  estimatedCostUsd,
240
263
  costSource: pricing ? 'calculated' : 'estimated',
@@ -31,6 +31,7 @@ export class Rotator extends EventEmitter {
31
31
  this.rotationHistory = [];
32
32
  this.rotating = new Set();
33
33
  this.lastRotationTime = new Map(); // agentId -> timestamp of last rotation
34
+ this._lastContextState = new Map(); // agentId -> { contextUsage, timestamp }
34
35
  this.enabled = false;
35
36
  this.liveScores = {};
36
37
  this.scoreHistory = {};
@@ -180,6 +181,25 @@ export class Rotator extends EventEmitter {
180
181
  continue;
181
182
  }
182
183
 
184
+ // Stale context fallback — safety net for providers (like Codex) that don't
185
+ // report intermediate contextUsage. If contextUsage hasn't changed in 120+
186
+ // seconds but tokens are being consumed, estimate from total tokens.
187
+ const knownCtx = this._lastContextState.get(agent.id);
188
+ if (!knownCtx || knownCtx.contextUsage !== agent.contextUsage) {
189
+ this._lastContextState.set(agent.id, { contextUsage: agent.contextUsage, timestamp: Date.now() });
190
+ } else if (agent.tokensUsed > 0 && (Date.now() - knownCtx.timestamp) >= 120_000) {
191
+ const providerClass = getProvider(agent.provider)?.constructor;
192
+ const models = providerClass?.models || [];
193
+ const model = models.find((m) => m.id === agent.model) || models[0];
194
+ const maxContext = model?.maxContext || 200000;
195
+ const estimatedContext = agent.tokensUsed / maxContext;
196
+ if (estimatedContext >= HARD_CEILING) {
197
+ console.log(` Rotator: ${agent.name} estimated context ${Math.round(estimatedContext * 100)}% (stale contextUsage fallback)`);
198
+ await this.rotate(agent.id, { reason: 'estimated_context_ceiling' });
199
+ continue;
200
+ }
201
+ }
202
+
183
203
  // Cooldown — skip threshold/quality rotation if recently rotated
184
204
  if (this._isOnCooldown(agent.id)) continue;
185
205
 
@@ -274,7 +294,7 @@ export class Rotator extends EventEmitter {
274
294
  oldTokens: agent.tokensUsed,
275
295
  contextUsage: agent.contextUsage,
276
296
  brief: brief.slice(0, 4000),
277
- }, agent.workingDir);
297
+ }, agent.workingDir, agent.teamId);
278
298
  }
279
299
 
280
300
  const record = {
@@ -312,6 +332,7 @@ export class Rotator extends EventEmitter {
312
332
  workingDir: agent.workingDir,
313
333
  name: agent.name,
314
334
  teamId: agent.teamId,
335
+ isRotation: true,
315
336
  });
316
337
  } catch (spawnErr) {
317
338
  // Spawn failed — re-add old agent so the user can see and retry.
@@ -499,6 +520,7 @@ export class Rotator extends EventEmitter {
499
520
  const naturalCompactions = this.rotationHistory.filter((r) => r.reason === 'natural_compaction').length;
500
521
  const hardCeilingRotations = this.rotationHistory.filter((r) => r.reason === 'hard_ceiling').length;
501
522
  const tokenCeilingRotations = this.rotationHistory.filter((r) => r.reason === 'token_ceiling').length;
523
+ const estimatedCeilingRotations = this.rotationHistory.filter((r) => r.reason === 'estimated_context_ceiling').length;
502
524
  return {
503
525
  enabled: this.enabled,
504
526
  totalRotations,
@@ -508,6 +530,7 @@ export class Rotator extends EventEmitter {
508
530
  naturalCompactions,
509
531
  hardCeilingRotations,
510
532
  tokenCeilingRotations,
533
+ estimatedCeilingRotations,
511
534
  rotating: Array.from(this.rotating),
512
535
  liveScores: this.liveScores,
513
536
  scoreHistory: this.scoreHistory,
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@groove-dev/gui",
3
- "version": "0.27.26",
3
+ "version": "0.27.27",
4
4
  "description": "GROOVE GUI — visual agent control plane",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "type": "module",
@@ -1,305 +0,0 @@
1
- LAYER 1: ELECTRON SHELL (packages/desktop/main.js) ----------------------------------------------------
2
-
3
- The desktop app itself creates memory pressure before any agent work begins:
4
-
5
- CRITICAL - Daemon heap capped at 512MB (line 178) The daemon process managing ALL agents, the GUI server, WebSocket broadcasts, journalist synthesis, token tracking, and state persistence gets only 512MB. When GC hits this ceiling, the event loop blocks for 100-300ms. The Electron app freezes waiting for daemon responses. This is why button clicks take 5s to register.
6
-
7
- CRITICAL - Listener accumulation on window reuse (lines 862-914) When reopening a project, the home window is reused. Lines 872-873 remove close/closed listeners, but console-message, render-process-gone, and permission handlers are re-registered without removing the old ones. Each reuse adds duplicate listeners. N window reuses = N duplicate handlers firing per event.
8
-
9
- CRITICAL - IPC broadcast flooding (process.js lines 926-950) Every agent output is broadcast to all Electron windows. Throttled to 4/sec per agent, but with 4+ agents that's 16+ JSON serializations per second over IPC with no backpressure. The renderer can't keep up, and the IPC message queue grows unbounded in the main process.
10
-
11
- HIGH - Subscription polling on every window focus (lines 301, 886) Every time you click the Electron window, it triggers a subscription check to the daemon. With multiple windows, this creates constant HTTP traffic even when idle.
12
-
13
- HIGH - stdout buffer string operations (process.js lines 844-854) Per-agent stdout buffer grows to 1MB, then the "oldest half" is discarded via .indexOf() and .slice() on a multi-MB string. This runs on EVERY chunk. With streaming agents, it's constant multi-MB string allocations and GC pressure.
14
-
15
- LAYER 2: PROCESS MANAGER (packages/daemon/src/process.js) -----------------------------------------------------------
16
-
17
- HIGH - Stream throttle timers leak (lines 928-948) Each agent gets a throttle object in a Map. If agent crashes or is killed while a timer is pending, the callback still fires, broadcasting to a dead agent ID. The closure over output data never gets GC'd. Long sessions with many spawns = unbounded map growth.
18
-
19
- HIGH - Resume doesn't clean up maps (lines 1251-1367) When resuming a completed agent, peakContextUsage, pendingMessages, and _streamThrottle entries from the old agent ID are never cleared. These maps grow with every resume cycle.
20
-
21
- HIGH - Log stream file descriptor leaks (lines 545, 1284) Each spawn creates a write stream. On exit, logStream.end() is called but file descriptors aren't guaranteed released if I/O is slow. Resume creates new streams for the same agent. Long sessions accumulate hundreds of open .log file descriptors.
22
-
23
- LAYER 3: JOURNALIST (packages/daemon/src/journalist.js) ---------------------------------------------------------
24
-
25
- This is where "infinite sessions" should work. It doesn't fully deliver.
26
-
27
- CRITICAL - 40K char budget is actually 3-5K tokens (line 10) The comment says "~10k tokens budget" but 40,000 UTF-8 chars of chat-style logs is closer to 3-5K tokens. The synthesis prompt can only fit the last 20-30 agent events. In active sessions, 60-70% of recent activity is dropped from synthesis.
28
-
29
- CRITICAL - Exploration tools excluded from synthesis (lines 211-218) Read, Glob, and Grep events are filtered into a separate explorationEntries array and never included in the synthesis prompt. New agents don't know what files were examined. They re-explore the same files, wasting tokens.
30
-
31
- HIGH - Handoff brief is only 7K tokens total (lines 706-784) The brief pulls from 9 sources, each truncated:
32
-
33
- -
34
- Errors: 10 entries x 300 chars = 3K chars
35
- -
36
- Results: last 3 joined = 3K chars
37
- -
38
- File changes: 20 files = 2K chars
39
- -
40
- Discoveries: 10 entries = 2K chars
41
- -
42
- Constraints: 2K chars
43
- -
44
- User feedback: 5 x 500 = 2.5K chars
45
- -
46
- Recent tools: 5 x 80 = 400 chars
47
- -
48
- Rotation history: 3K chars
49
- -
50
- Original task: 2K chars
51
- Total ~22K chars = ~7K tokens. The original agent had full context. The new agent gets a structured digest. Nuance, intermediate reasoning, and failed experiments are gone.
52
-
53
- HIGH - Only last 5 tool calls in rotation history (lines 755-760) A complex multi-file refactor needs 10-20 tool calls for context. Only the last 5 survive, each truncated to 80 chars. The handoff says "agent was editing files" but not "agent tried approach A, it failed because X, then tried B."
54
-
55
- MEDIUM - Synthesis cycle is 2 min, rotation check is 15 sec (lines 9, rotator.js:11) Rotations can happen 8x per synthesis cycle. Agent rotates at 0:15, journalist doesn't synthesize until 2:00. New agent has 1:45 of unsynthesized activity.
56
-
57
- LAYER 4: ROTATOR (packages/daemon/src/rotator.js) ---------------------------------------------------
58
-
59
- CRITICAL - 5-minute cooldown prevents fast recovery (line 16) After rotation, the new agent is locked in for 5 minutes regardless of quality. If the handoff was bad and the new agent is immediately degraded, nothing can be done until cooldown expires. Total degradation window: 8-10 minutes (3 min to detect + 5 min cooldown).
60
-
61
- HIGH - No post-rotation validation (lines 233-237) Pre-rotation session is scored and recorded. But there is NO measurement of post-rotation quality. System assumes rotation improved things without verifying. Could be making things worse.
62
-
63
- HIGH - Kill+respawn loses in-flight state (lines 278-301) Between kill and respawn: in-flight variables, cached results, classifier window, score history, and live quality tracking are all destroyed. New agent starts from the 7K-token brief only. This is a warm start, not zero cold start.
64
-
65
- HIGH - Idle check blocks quality rotation (lines 201-213) Quality rotation only fires if agent is idle 10+ seconds. If user is actively sending messages, an agent scoring 30/100 won't rotate because it's "busy." Planner agents producing bad plans while user reads output = degradation that the system ignores.
66
-
67
- MEDIUM - Handoff chain keeps only 10 rotations (memory.js line 18) At 5-minute rotations, 10 rotations = 50 minutes of history. Multi-hour tasks lose their full lineage. Agent #11 can't see what agent #1 learned.
68
-
69
- LAYER 5: ADAPTIVE THRESHOLDS (packages/daemon/src/adaptive.js) ----------------------------------------------------------------
70
-
71
- HIGH - Session scoring baseline is arbitrary (line 96) Score starts at 70. Errors cost 5 points each, repetitions cost 6. But these weights have no empirical calibration. A fatal error and a benign warning both cost 5 points. The scoring doesn't distinguish.
72
-
73
- HIGH - Asymmetric drift (+2%/-5%) (lines 8-9) Good sessions nudge threshold up slowly (+2%). Bad sessions yank it down fast (-5%). Over many sessions, thresholds oscillate unpredictably rather than converging.
74
-
75
- HIGH - Convergence detection is set but never read (lines 139-149) The profile.converged flag is computed and stored. The rotator never checks it. It's dead data.
76
-
77
- MEDIUM - No per-role quality thresholds (rotator.js line 12) QUALITY_THRESHOLD=40 applies to all roles. Planners naturally have higher error rates (exploring, revising). Backend agents on stable code have lower rates. Same threshold for both.
78
-
79
- LAYER 6: TOKEN TRACKER (packages/daemon/src/tokentracker.js) --------------------------------------------------------------
80
-
81
- HIGH - Planner token ceiling is 50M tokens (rotator.js lines 111-112) Role multiplier for planner is 10x the 5M base = 50M tokens. That's ~180K output tokens. Degradation happens long before 50M, but the safety ceiling never triggers. Planners can burn 50M tokens of garbage before forced rotation.
82
-
83
- MEDIUM - Cache hit rate is global, not per-agent (lines 205-216) If planner has 80% hit rate and fullstack has 20%, global rate is ~50%. Can't detect which agents benefit from caching.
84
-
85
- MEDIUM - Cold-start savings estimation is inflated (lines 8-14, 182-187) Per-file cost is flat (15 tokens). A 50-line config and a 10K-line monolith cost the same. Reported savings are overstated.
86
-
87
- MEDIUM - Journalist synthesis cost not tracked separately (lines 237-258) Internal overhead is tracked by __ prefix IDs, but journalist synthesis tokens may not be properly attributed.
88
-
89
- LAYER 7: MEMORY (packages/daemon/src/memory.js) -------------------------------------------------
90
-
91
- CRITICAL - Layer 7 skips first-generation agents (process.js line 431) Discoveries and handoff history are only injected when hasTask=true. hasTask = !!config.prompt. Agents spawned with just a role and no prompt never receive prior discoveries. They repeat mistakes that Layer 7 already captured.
92
-
93
- HIGH - Specializations recorded but never used (lines 310-389) Quality profiles per agent/role are stored. They are NEVER read for routing decisions. The router doesn't check "which agent specialization scores best for this file domain." It's write-only data.
94
-
95
- HIGH - Handoff briefs degrade over generations (lines 154-219) By rotation 5-10, the brief is summarizing summaries. Content degradation compounds. There's no quality scoring to flag "this was a clean rotation" vs "this was an emergency rotation."
96
-
97
- MEDIUM - No mid-session memory injection (introducer.js lines 345-381) Memory is injected once at spawn. If journalist discovers a critical fix during the agent's session, the running agent never learns it. Only the next rotated agent sees it.
98
-
99
- LAYER 8: ROUTER + CLASSIFIER (router.js, classifier.js) ---------------------------------------------------------
100
-
101
- CRITICAL - Classifier and Router are disconnected (entire design) Classifier produces tier classifications (light/medium/heavy). Router makes model decisions. But router.recommend() is called ONCE at spawn. Classifier broadcasts updates to UI only. There is NO feedback loop from classifier to router mid-session. An agent can drift from heavy to light work and stay on the expensive model for the entire session.
102
-
103
- HIGH - Default routing mode is FIXED, not AUTO (router.js line 73) Unless explicitly set, agents spawn in FIXED mode with no adaptive routing. Most agents never get auto-routing.
104
-
105
- HIGH - Cost tracking is dead code (router.js lines 125-136) recordUsage() is called, costLog accumulates data, but nothing reads the cost log. Cost data exists but never feeds back into routing decisions.
106
-
107
- MEDIUM - 200-event classifier window averages away trends (classifier.js line 39) If task complexity changes at event 190, classifier still averages across events 1-189. Recent changes are diluted.
108
-
109
- BACKBONE: API + REGISTRY + STATE (api.js, registry.js, state.js, timeline.js) -------------------------------------------------------------------------------
110
-
111
- HIGH - O(n^2) broadcast on every registry change (api.js line 224) Every agent spawn/update/remove triggers enrichAgents() on ALL agents, then broadcasts full state to ALL WebSocket clients. With 10 agents and frequent updates, this is expensive.
112
-
113
- HIGH - State saved every 30 seconds (index.js lines 518-521) If daemon crashes 29 seconds after an agent spawn, that agent is lost. State persistence should be 5s max for critical data.
114
-
115
- HIGH - Synchronous state write blocks event loop (state.js line 24) writeFileSync on every save. Blocks all other operations for the duration of the write.
116
-
117
- MEDIUM - Timeline caps at 16.7 hours (timeline.js lines 8-10) 2000 snapshots x 30s = 16.7 hours. After that, oldest snapshots are silently deleted. For multi-day sessions, early history is gone.
118
-
119
- MEDIUM - AGENTS_REGISTRY.md rewritten every 2 seconds (index.js lines 210-220) On every registry change, debounced to 2s. Also rewrites the GROOVE section of CLAUDE.md. Constant file I/O.
120
-
121
- THE DEGRADATION CASCADE -------------------------
122
-
123
- Here's what actually happens when a planner agent runs for 30+ minutes:
124
-
125
- T=0:00 - Agent spawns with role only, no prompt
126
-
127
- -
128
- hasTask=false, so Layer 7 discoveries are SKIPPED
129
- -
130
- Router defaults to FIXED mode, no auto-routing
131
- -
132
- Agent starts blind to all prior learnings
133
- T=0:30 - Agent reads files, builds mental model
134
-
135
- -
136
- Exploration events (Read, Glob, Grep) are filtered OUT of
137
- journalist synthesis
138
-
139
- -
140
- Classifier window starts filling (200-event buffer)
141
- T=2:00 - First journalist synthesis cycle
142
-
143
- -
144
- Only captures 3-5K tokens of activity (40K char budget)
145
- -
146
- Exploration context excluded
147
- T=5:00 - Agent is productive but context window filling
148
-
149
- -
150
- Quality score: 65 (decent but in dead zone 40-70)
151
- -
152
- No rotation triggered (quality threshold is 40)
153
- -
154
- No model downshift (classifier needs 40+ events)
155
- -
156
- Adaptive threshold nudges up (+2%)
157
- T=10:00 - Context usage at 40-50%
158
-
159
- -
160
- Agent slowing down, re-reading files
161
- -
162
- Quality score drops to 55 (still in dead zone)
163
- -
164
- Classifier broadcasts tier update to UI only
165
- -
166
- Router does nothing (FIXED mode, no mid-session routing)
167
- -
168
- Token velocity increasing but below 1.5M/5min spike
169
- T=15:00 - Quality score drops to 38 (below threshold!)
170
-
171
- -
172
- But agent is "active" (user reading output), idle check fails
173
- -
174
- Rotation blocked because agent isn't idle 10+ seconds
175
- -
176
- Agent continues degrading
177
- T=18:00 - Agent finally idle, quality rotation fires
178
-
179
- -
180
- 40K char synthesis captures last 20-30 events
181
- -
182
- Exploration context lost
183
- -
184
- Handoff brief generated: 7K tokens
185
- -
186
- Old agent killed, state destroyed
187
- T=18:30 - New agent spawns with brief
188
-
189
- -
190
- Gets discoveries this time (hasTask=true from brief)
191
- -
192
- But only last 10 discoveries, 4K chars
193
- -
194
- Doesn't know what files previous agent explored
195
- -
196
- Spends first 2-3 turns re-reading same files
197
- -
198
- Cooldown: can't rotate for 5 minutes even if degraded
199
- T=20:00 - New agent productive but missing context
200
-
201
- -
202
- Doesn't have previous agent's intermediate reasoning
203
- -
204
- Doesn't know which approaches were tried and failed
205
- -
206
- Quality score defaults to 70 (not enough data yet)
207
- T=30:00 - Second rotation
208
-
209
- -
210
- Handoff brief now summarizes a summary
211
- -
212
- 3K chars of rotation history (3 briefs compressed)
213
- -
214
- Context loss compounds
215
- This cycle repeats. Each rotation loses ~50-70% of useful context. By rotation 5 (roughly 90 minutes), the agent is working with heavily compressed summaries and repeating exploration the first agent already did.
216
-
217
- PRIORITY FIXES FOR TRUE INFINITE SESSIONS -------------------------------------------
218
-
219
- TIER 1 - IMMEDIATE (stop the bleeding)
220
-
221
- 1.
222
- Increase daemon heap to 2048MB (main.js line 178)
223
- Change: --max-old-space-size=2048 Impact: Eliminates GC-induced freezes, fixes desktop lag
224
-
225
- 1.
226
- Force Layer 7 injection regardless of hasTask (process.js line 431)
227
- Change: Always pass hasTask=true to generateContext, or remove the gate Impact: First-generation agents get prior discoveries immediately
228
-
229
- 1.
230
- Include exploration events in synthesis (journalist.js lines 211-218)
231
- Change: Don't filter Read/Glob/Grep into separate array; include in main entries with lower priority Impact: New agents know what files were examined
232
-
233
- 1.
234
- Lower planner token ceiling from 50M to 10M (rotator.js)
235
- Change: Reduce ROLE_MULTIPLIERS.planner from 10 to 2 Impact: Planners rotate before burning excessive tokens
236
-
237
- 1.
238
- Clean up maps on agent removal (process.js)
239
- Change: Clear peakContextUsage, pendingMessages, _streamThrottle when registry.remove() is called Impact: Stops unbounded memory growth
240
-
241
- TIER 2 - HIGH PRIORITY (fix the feedback loops)
242
-
243
- 1.
244
- Connect classifier to router mid-session
245
- Change: When classifier detects tier change, call router.recommend() and auto-apply if confidence is high Impact: Agents auto-downshift to cheaper models when task simplifies
246
-
247
- 1.
248
- Reduce rotation cooldown from 5 min to 2 min for quality rotation
249
- Change: Separate QUALITY_COOLDOWN_MS = 2 * 60 * 1000 Impact: Degraded agents recover faster
250
-
251
- 1.
252
- Remove idle check for severe degradation
253
- Change: If quality score < 25, rotate immediately regardless of idle Impact: Severely degraded agents don't persist
254
-
255
- 1.
256
- Increase synthesis budget to 100K chars (~25K tokens)
257
- Change: MAX_LOG_CHARS = 100_000 Impact: Captures 80-90% of recent activity instead of 30-40%
258
-
259
- 1.
260
- Add post-rotation validation
261
- Change: Compare first-10-events quality of new agent vs last-10 of old agent. If worse, flag as cold start. Impact: System knows when rotation helped vs hurt
262
-
263
- TIER 3 - MEDIUM PRIORITY (optimize the flow)
264
-
265
- 1.
266
- Delta-only WebSocket broadcasts (api.js line 224)
267
- Change: Send only changed agent data, not full state Impact: Reduces IPC pressure by 80%+
268
-
269
- 1.
270
- Use specializations for routing
271
- Change: When task involves files agent previously excelled at, prefer that agent Impact: Agents build on proven strengths
272
-
273
- 1.
274
- Increase handoff chain from 10 to 25 rotations
275
- Change: MAX_HANDOFF_ROTATIONS = 25 Impact: 2+ hours of causal history preserved
276
-
277
- 1.
278
- State save every 5 seconds instead of 30
279
- Change: setInterval at 5000 Impact: Max 5s of data loss on crash instead of 30
280
-
281
- 1.
282
- Pre-compile minimatch patterns (lockmanager.js)
283
- Change: Use minimatch.makeRe() at registration, cache compiled Impact: O(n*m) regex compilation per check reduced to O(n) lookup
284
-
285
- 1.
286
- Fix listener cleanup on window reuse (main.js)
287
- Change: Remove all webContents listeners before re-registering Impact: Stops listener accumulation, reduces memory pressure
288
-
289
- TIER 4 - ARCHITECTURAL (true infinite sessions)
290
-
291
- 1.
292
- Implement incremental context patching at rotation
293
- Instead of regenerating full context from scratch, patch only what changed since last rotation
294
-
295
- 1.
296
- Add mid-session memory injection
297
- When journalist discovers new fix, inject into running agent via instruct endpoint
298
-
299
- 1.
300
- Implement per-role quality baselines
301
- Calibrate scoring weights and thresholds per role type
302
-
303
- 1.
304
- Add degradation cascade detection
305
- Track if downstream agents inherit errors from upstream rotations