claude-code-session-manager 0.21.2 → 0.21.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/bin/cli.cjs +5 -0
  2. package/dist/assets/{TiptapBody-CepFtp62.js → TiptapBody-CZLSQ6pj.js} +2 -2
  3. package/dist/assets/cssMode-DfqZGMQs.js +1 -0
  4. package/dist/assets/{freemarker2-DqQlU_4i.js → freemarker2-XTPYh37h.js} +1 -1
  5. package/dist/assets/handlebars-DKUF5VyH.js +1 -0
  6. package/dist/assets/html-uqoqsIeI.js +1 -0
  7. package/dist/assets/htmlMode-aMTQs1su.js +1 -0
  8. package/dist/assets/index-BUrrcj7x.js +3525 -0
  9. package/dist/assets/index-DeQI4oVI.css +32 -0
  10. package/dist/assets/javascript-BVxRZMds.js +1 -0
  11. package/dist/assets/{jsonMode-CFEryxme.js → jsonMode-D04xP2s5.js} +4 -4
  12. package/dist/assets/liquid-BkQHTH2P.js +1 -0
  13. package/dist/assets/lspLanguageFeatures-By9uLznH.js +4 -0
  14. package/dist/assets/mdx-Du1IlbjV.js +1 -0
  15. package/dist/assets/{index-CrE67_1W.css → monaco-editor-BTnBOi8r.css} +1 -32
  16. package/dist/assets/monaco-editor-BW5C4Iv1.js +908 -0
  17. package/dist/assets/python-DSlImqXd.js +1 -0
  18. package/dist/assets/razor-BmUVyvSK.js +1 -0
  19. package/dist/assets/{tsMode-CNLm8WAZ.js → tsMode-Btj0TTH7.js} +1 -1
  20. package/dist/assets/typescript-Bzelq9vO.js +1 -0
  21. package/dist/assets/xml-Whd9EaSd.js +1 -0
  22. package/dist/assets/yaml-QYf0-IN8.js +1 -0
  23. package/dist/index.html +4 -2
  24. package/package.json +1 -1
  25. package/src/main/__tests__/runVerify.test.cjs +138 -0
  26. package/src/main/config.cjs +36 -4
  27. package/src/main/historyAggregator.cjs +400 -149
  28. package/src/main/index.cjs +8 -0
  29. package/src/main/ipcSchemas.cjs +42 -13
  30. package/src/main/kg.cjs +87 -30
  31. package/src/main/lib/credentials.cjs +7 -0
  32. package/src/main/lib/e2eStateMachine.cjs +39 -0
  33. package/src/main/runVerify.cjs +51 -5
  34. package/src/main/scheduler/prdParser.cjs +16 -1
  35. package/src/main/scheduler.cjs +171 -13
  36. package/src/main/transcripts.cjs +141 -19
  37. package/src/main/usageMatrix.cjs +7 -3
  38. package/src/main/webRemote.cjs +196 -31
  39. package/src/preload/api.d.ts +40 -0
  40. package/src/preload/index.cjs +7 -0
  41. package/dist/assets/cssMode-8hR_Zezu.js +0 -1
  42. package/dist/assets/handlebars-Ts2NzFcS.js +0 -1
  43. package/dist/assets/html-QjLxt2p6.js +0 -1
  44. package/dist/assets/htmlMode-Dst38sy3.js +0 -1
  45. package/dist/assets/index-XKsJ4Pk3.js +0 -4431
  46. package/dist/assets/javascript-CNxLjNGz.js +0 -1
  47. package/dist/assets/liquid-BBfKLTB_.js +0 -1
  48. package/dist/assets/lspLanguageFeatures-BNyh7ouG.js +0 -4
  49. package/dist/assets/mdx-SaTyS1xC.js +0 -1
  50. package/dist/assets/python-C84TNhMd.js +0 -1
  51. package/dist/assets/razor-BaVJM3L8.js +0 -1
  52. package/dist/assets/typescript-BdrDpzPy.js +0 -1
  53. package/dist/assets/xml-CHJ3Xjjj.js +0 -1
  54. package/dist/assets/yaml-Cg2-K8t3.js +0 -1
@@ -180,12 +180,16 @@ const HEARTBEAT_MAX_BYTES = 1024 * 1024;
180
180
  // DEFAULT_PROJECT_CWD imported from lib/schedulerBatch.cjs (single source of truth).
181
181
 
182
182
  const ENV_CAP = process.env.SM_SCHEDULER_MAX_CONCURRENCY
183
- ? Math.max(1, Math.min(20, parseInt(process.env.SM_SCHEDULER_MAX_CONCURRENCY, 10) || 4))
183
+ ? Math.max(1, Math.min(20, parseInt(process.env.SM_SCHEDULER_MAX_CONCURRENCY, 10) || 3))
184
184
  : null;
185
185
 
186
+ // Each headless claude -p process can grow past 1 GB; require 1.5 GB headroom
187
+ // per running+pending slot to avoid OOM (incident 2026-06-10).
188
+ const MIN_FREE_MB_PER_JOB = 1500;
189
+
186
190
  const DEFAULT_CONFIG = {
187
191
  offsetMinutes: 15,
188
- concurrencyCap: ENV_CAP ?? 4,
192
+ concurrencyCap: ENV_CAP ?? 3,
189
193
  defaultCwd: DEFAULT_PROJECT_CWD,
190
194
  // 'when-available' = poll usage and fire whenever utilization < threshold.
191
195
  // 'on-reset' = fire offsetMinutes after the next 5h reset (legacy).
@@ -202,6 +206,39 @@ const DEFAULT_CONFIG = {
202
206
  },
203
207
  };
204
208
 
209
+ // ---------- memory gate ----------
210
+
211
+ /**
212
+ * Returns available system memory in MB. Reads /proc/meminfo on Linux; fails
213
+ * open (returns Infinity) on darwin or on any parse/read error so the gate
214
+ * never blocks scheduling on unsupported platforms.
215
+ */
216
+ function getAvailableMemMb() {
217
+ if (process.platform !== 'linux') return Infinity;
218
+ try {
219
+ const raw = fs.readFileSync('/proc/meminfo', 'utf8');
220
+ const m = raw.match(/^MemAvailable:\s+(\d+)\s+kB/m);
221
+ if (!m) return Infinity;
222
+ return Math.floor(parseInt(m[1], 10) / 1024);
223
+ } catch {
224
+ return Infinity;
225
+ }
226
+ }
227
+
228
+ /**
229
+ * Pure helper: clamp a batch down so launching `toLaunch` more jobs doesn't
230
+ * drop available memory below MIN_FREE_MB_PER_JOB per active slot.
231
+ * Exported for unit tests.
232
+ */
233
+ function memoryLimitedBatchSize(availableMb, minPerJob, runningCount, batchLen) {
234
+ if (availableMb === Infinity) return batchLen;
235
+ let allowed = batchLen;
236
+ while (allowed > 0 && availableMb < minPerJob * (runningCount + allowed)) {
237
+ allowed--;
238
+ }
239
+ return allowed;
240
+ }
241
+
205
242
  // ---------- fs helpers ----------
206
243
 
207
244
  /**
@@ -539,6 +576,8 @@ let heartbeatInterval = null;
539
576
  // double-spawn when runDueJobs() is called while jobs are in flight.
540
577
  const runningSet = new Set();
541
578
  let cancelToken = { cancelled: false };
579
+ // Last memory-gate observation; included in snapshot for renderer visibility.
580
+ let lastMemGate = null;
542
581
 
543
582
  function attachWindow(w) { mainWindow = w; }
544
583
 
@@ -557,6 +596,13 @@ function buildScheduleStatePayload(state, { withPaths = false } = {}) {
557
596
  nextReset: getNextResetCached(),
558
597
  paused: state.paused,
559
598
  utilization: cachedUtilization,
599
+ pollHealth: {
600
+ lastPollAt,
601
+ lastPollOk,
602
+ consecutiveFailures,
603
+ lastFailureKind,
604
+ },
605
+ memGate: lastMemGate,
560
606
  };
561
607
  if (withPaths) {
562
608
  payload.paths = { root: ROOT, prds: PRDS_DIR, runs: RUNS_DIR, queue: QUEUE_PATH };
@@ -743,7 +789,7 @@ async function executeJob(job, runDir, defaultCwd, onPid) {
743
789
  // before handing it to the child process.
744
790
  try { fs.accessSync(cwd, fs.constants.X_OK); }
745
791
  catch {
746
- const errMsg = `cwd no longer exists: ${cwd}`;
792
+ const errMsg = `cwd does not exist on this machine: ${cwd}`;
747
793
  safeLog(`[scheduler] ${errMsg}\n`);
748
794
  closeFd();
749
795
  // Sync write: this is an early-exit error path inside an async function,
@@ -1356,11 +1402,29 @@ function tickQueue() {
1356
1402
  const batch = pickNextBatch(state.jobs, runningSet, cap);
1357
1403
  if (batch.length === 0) return;
1358
1404
 
1405
+ const availableMb = getAvailableMemMb();
1406
+ const allowed = memoryLimitedBatchSize(availableMb, MIN_FREE_MB_PER_JOB, runningSet.size, batch.length);
1407
+ if (allowed === 0) {
1408
+ const threshold = MIN_FREE_MB_PER_JOB * (runningSet.size + 1);
1409
+ console.log(`[scheduler] memory gate: available=${availableMb} MB < threshold=${threshold} MB — deferring ${batch.length} job(s)`);
1410
+ lastMemGate = { availableMb, threshold, deferred: true, at: new Date().toISOString() };
1411
+ return;
1412
+ }
1413
+ const gatedBatch = batch.slice(0, allowed);
1414
+ if (gatedBatch.length < batch.length) {
1415
+ console.log(`[scheduler] memory gate: available=${availableMb} MB — clamped batch ${batch.length} → ${gatedBatch.length}`);
1416
+ lastMemGate = { availableMb, threshold: MIN_FREE_MB_PER_JOB * (runningSet.size + gatedBatch.length), deferred: false, clamped: true, at: new Date().toISOString() };
1417
+ } else {
1418
+ // Ungated full batch: clear stale gate snapshot so status doesn't show
1419
+ // a stale deferral from a previous tick.
1420
+ lastMemGate = null;
1421
+ }
1422
+
1359
1423
  await mutate((s) => { s.lastRunAt = new Date().toISOString(); });
1360
1424
  await broadcast();
1361
1425
 
1362
1426
  const { runId, dir: runDir } = pickRunDir();
1363
- for (const job of batch) {
1427
+ for (const job of gatedBatch) {
1364
1428
  if (cancelToken.cancelled) break;
1365
1429
  // spawnJob is fire-and-forget; it calls tickQueue() on completion.
1366
1430
  spawnJob(job, runId, runDir, state.config.defaultCwd).catch(() => {});
@@ -1450,6 +1514,18 @@ async function reapDeadRunningJobs() {
1450
1514
 
1451
1515
  // ---------- poll loop with exponential backoff ----------
1452
1516
 
1517
+ /**
1518
+ * Pure: given the current pause reason and whether a reset timestamp is cached,
1519
+ * return which clearPause source to pass after a successful billing poll, or null.
1520
+ * Exported for unit testing.
1521
+ */
1522
+ function pollRecoveryClearSource(pauseReason, hasCachedReset) {
1523
+ if (pauseReason === 'network') return 'network-recovered';
1524
+ if (pauseReason === 'auth') return 'auth-recovered';
1525
+ if (pauseReason === 'reset_failure' && hasCachedReset) return 'reset-recovered';
1526
+ return null;
1527
+ }
1528
+
1453
1529
  async function pollLoop() {
1454
1530
  try {
1455
1531
  await reapDeadRunningJobs().catch(() => {});
@@ -1468,15 +1544,10 @@ async function pollLoop() {
1468
1544
  lastPollOk = true;
1469
1545
  persistSchedulerState();
1470
1546
 
1471
- // If a 'network' pause resolved, clear it now that we have a good reading.
1547
+ // Clear any pause that was waiting for a successful billing read.
1472
1548
  const cur = await readQueue();
1473
- if (cur.paused?.reason === 'network') {
1474
- await clearPause('network-recovered');
1475
- }
1476
- // If 'reset_failure' was set and we now have a valid reset, clear it.
1477
- if (cur.paused?.reason === 'reset_failure' && cachedNextReset) {
1478
- await clearPause('reset-recovered');
1479
- }
1549
+ const clearSrc = pollRecoveryClearSource(cur.paused?.reason ?? null, !!cachedNextReset);
1550
+ if (clearSrc) await clearPause(clearSrc);
1480
1551
 
1481
1552
  await maybeLaunchWhenAvailable(cur);
1482
1553
  await broadcast();
@@ -1558,6 +1629,66 @@ function selectHistoryJobs(jobs, limit) {
1558
1629
  .slice(0, cap);
1559
1630
  }
1560
1631
 
1632
+ // Transcript-scan verdicts that re-running verifyRun can re-evaluate. NOT
1633
+ // 'uncommitted_changes' — that comes from the git commit-guard, which verifyRun
1634
+ // does not inspect, so re-scanning it would always return 'clean' and wrongly
1635
+ // heal a genuinely-unfinished job.
1636
+ const RESCANNABLE_VERDICTS = new Set(['transcript_errors', 'verify_unavailable']);
1637
+
1638
+ /**
1639
+ * Pure predicate: is this job eligible for the boot re-verify self-heal? Only
1640
+ * needs_review jobs with a run log AND a transcript-scan verdict. Crucially
1641
+ * EXCLUDES 'uncommitted_changes' (git commit-guard) — verifyRun can't see git,
1642
+ * so re-scanning it would falsely heal an unfinished job. Exported for tests.
1643
+ */
1644
+ function isRescanCandidate(job) {
1645
+ return !!job
1646
+ && job.status === 'needs_review'
1647
+ && !!job.runId
1648
+ && RESCANNABLE_VERDICTS.has(job.verifierVerdict);
1649
+ }
1650
+
1651
+ /**
1652
+ * Self-healing pass over needs_review jobs. The verifier runs in-process, so a
1653
+ * fix to runVerify.cjs only takes effect for jobs verified AFTER an app
1654
+ * restart — jobs flagged by the old (buggy) verifier stay stuck in needs_review
1655
+ * forever. On boot we re-run the CURRENT verifier over every transcript-scan
1656
+ * needs_review job and auto-complete the ones that now pass clean, so verifier
1657
+ * improvements retroactively clear their own false positives (2026-06-10:
1658
+ * anchored ImportError detectors + harness-tool-error exemption healed 8 jobs).
1659
+ *
1660
+ * @returns {Promise<{rescanned:number, healed:string[]}>}
1661
+ */
1662
+ async function reverifyNeedsReview() {
1663
+ const snap = await readQueue();
1664
+ const candidates = snap.jobs.filter(isRescanCandidate);
1665
+ const healed = [];
1666
+ for (const job of candidates) {
1667
+ const runDir = path.join(RUNS_DIR, job.runId);
1668
+ const prdPath = path.join(PRDS_DIR, `${job.slug}.md`);
1669
+ let v = null;
1670
+ try {
1671
+ v = await verifyRun({ runDir, prdPath, queueEntry: job, allJobs: snap.jobs });
1672
+ } catch { continue; } // unreadable log etc. — leave for human review
1673
+ if (v && v.verdict === 'clean') healed.push(job.slug);
1674
+ }
1675
+ if (healed.length) {
1676
+ const healSet = new Set(healed);
1677
+ await mutate((s) => {
1678
+ for (const j of s.jobs) {
1679
+ if (j.status === 'needs_review' && healSet.has(j.slug)) {
1680
+ j.status = 'completed';
1681
+ j.error = null;
1682
+ delete j.verifierVerdict;
1683
+ }
1684
+ }
1685
+ });
1686
+ console.log(`[scheduler] boot reverify: healed ${healed.length} stale needs_review → completed (${healed.join(', ')})`);
1687
+ await broadcast();
1688
+ }
1689
+ return { rescanned: candidates.length, healed };
1690
+ }
1691
+
1561
1692
  function registerScheduleHandlers() {
1562
1693
  ensureDirs();
1563
1694
  supervisor.registerHandlers();
@@ -1595,6 +1726,13 @@ function registerScheduleHandlers() {
1595
1726
  };
1596
1727
  });
1597
1728
 
1729
+ ipcMain.handle('schedule:reverify-needs-review', async () => {
1730
+ // Manual trigger for the boot self-heal pass — re-scan needs_review jobs
1731
+ // with the current verifier and auto-complete the ones that now pass clean.
1732
+ const result = await reverifyNeedsReview();
1733
+ return { ok: true, ...result };
1734
+ });
1735
+
1598
1736
  ipcMain.handle('schedule:force-tick', async () => {
1599
1737
  // Bypass the billing-poll gate entirely — fire pending jobs immediately regardless of meter state.
1600
1738
  // Clears any existing pause first (same semantics as run-now).
@@ -1850,6 +1988,13 @@ async function init() {
1850
1988
  await setPaused(boot.paused.reason, boot.paused.resumeAt);
1851
1989
  }
1852
1990
 
1991
+ // Self-heal stale needs_review flags using the current verifier (see
1992
+ // reverifyNeedsReview). Runs once on boot so a shipped verifier fix clears
1993
+ // its own historical false positives without manual retagging.
1994
+ await reverifyNeedsReview().catch((e) => {
1995
+ console.error(`[scheduler] boot reverify failed: ${e?.message ?? e}`);
1996
+ });
1997
+
1853
1998
  await rescheduleTimer();
1854
1999
  // Refresh next-reset every 10 minutes — billing window can shift if usage
1855
2000
  // resets early or the auth token rotates. Tracked so re-init doesn't leak.
@@ -1961,6 +2106,19 @@ const remote = {
1961
2106
  const resolved = safeSlugPath(slug);
1962
2107
  if (!resolved) return { ok: false, error: 'invalid slug' };
1963
2108
  try {
2109
+ // Symlink defense, matching readPrd/readLog: safeSlugPath is lexical and
2110
+ // does NOT resolve symlinks, so a rogue job could plant prds/x.md → an
2111
+ // arbitrary $HOME path and have writeTextAtomic clobber it. Resolve the
2112
+ // real parent dir (the file itself may not exist yet) and re-assert
2113
+ // containment; also reject the target if it is already a symlink.
2114
+ const realParent = await fsp.realpath(path.dirname(resolved));
2115
+ if (realParent !== PRDS_DIR && !realParent.startsWith(PRDS_DIR + path.sep)) {
2116
+ return { ok: false, error: 'invalid slug' };
2117
+ }
2118
+ const existing = await fsp.lstat(resolved).catch(() => null);
2119
+ if (existing && existing.isSymbolicLink()) {
2120
+ return { ok: false, error: 'invalid slug' };
2121
+ }
1964
2122
  await config.writeTextAtomic(resolved, body);
1965
2123
  const stat = await fsp.stat(resolved);
1966
2124
  return { ok: true, bytesWritten: stat.size };
@@ -2005,4 +2163,4 @@ const remote = {
2005
2163
  },
2006
2164
  };
2007
2165
 
2008
- module.exports = { registerScheduleHandlers, attachWindow, init, ROOT, PRDS_DIR, selectHistoryJobs, parsePorcelain, FINISH_PROTOCOL, remote, pickNextBatch, pickForProject, reapDeadRunningJobs };
2166
+ module.exports = { registerScheduleHandlers, attachWindow, init, ROOT, PRDS_DIR, selectHistoryJobs, parsePorcelain, FINISH_PROTOCOL, remote, pickNextBatch, pickForProject, reapDeadRunningJobs, pollRecoveryClearSource, memoryLimitedBatchSize, reverifyNeedsReview, isRescanCandidate };
@@ -43,6 +43,44 @@ function transcriptPath(cwd, sessionUuid) {
43
43
  return path.join(os.homedir(), '.claude', 'projects', encodeCwd(cwd), `${sessionUuid}.jsonl`);
44
44
  }
45
45
 
46
+ const MAX_RAW_STR = 4096;
47
+
48
+ // Block types whose text/content fields are parsed structurally by
49
+ // orchestrator.ts / race.ts — truncating them produces mid-token "…" and
50
+ // unparseable JSON, so they are exempt from the size cap.
51
+ const EXEMPT_TYPES = new Set(['tool_result', 'tool_use']);
52
+
53
+ /**
54
+ * Cap string fields in a content block array so arbitrary tool output doesn't
55
+ * bloat the ring buffer. Blocks whose type is in EXEMPT_TYPES are passed
56
+ * through intact so that structured result payloads survive to the digest
57
+ * parsers in race.ts / orchestrator.ts.
58
+ */
59
+ function trimContentArray(content) {
60
+ if (!Array.isArray(content)) return content;
61
+ return content.map((block) => {
62
+ if (!block || typeof block !== 'object') return block;
63
+ if (EXEMPT_TYPES.has(block.type)) return block;
64
+ const b = { ...block };
65
+ if (typeof b.text === 'string' && b.text.length > MAX_RAW_STR) {
66
+ b.text = b.text.slice(0, MAX_RAW_STR) + '…';
67
+ }
68
+ if (typeof b.content === 'string' && b.content.length > MAX_RAW_STR) {
69
+ b.content = b.content.slice(0, MAX_RAW_STR) + '…';
70
+ }
71
+ if (Array.isArray(b.content)) {
72
+ b.content = trimContentArray(b.content);
73
+ }
74
+ return b;
75
+ });
76
+ }
77
+
78
+ /** Build the slim raw projection used by race.ts and orchestrator.ts. */
79
+ function makeRaw(obj) {
80
+ const msgContent = obj?.message?.content;
81
+ return { message: { content: trimContentArray(msgContent) } };
82
+ }
83
+
46
84
  /**
47
85
  * Parse one JSONL line defensively. Real schema drifts, so we pass through
48
86
  * anything that parses and tag a coarse `kind`.
@@ -56,7 +94,7 @@ function classifyLine(obj) {
56
94
 
57
95
  // Usage rollups arrive as summary events.
58
96
  if (obj.usage || msg?.usage) {
59
- return { kind: 'usage', data: obj.usage || msg.usage, raw: obj };
97
+ return { kind: 'usage', data: obj.usage || msg.usage, raw: makeRaw(obj) };
60
98
  }
61
99
 
62
100
  // Tool uses: scan content array for tool_use blocks.
@@ -64,31 +102,31 @@ function classifyLine(obj) {
64
102
  for (const block of content) {
65
103
  if (block?.type === 'tool_use') {
66
104
  if (block.name === 'TodoWrite') {
67
- return { kind: 'todo_write', data: block.input?.todos || block.input || [], raw: obj };
105
+ return { kind: 'todo_write', data: block.input?.todos || block.input || [], raw: makeRaw(obj) };
68
106
  }
69
107
  if (block.name === 'ExitPlanMode' || block.name === 'EnterPlanMode') {
70
- return { kind: 'plan', data: block.input, raw: obj };
108
+ return { kind: 'plan', data: block.input, raw: makeRaw(obj) };
71
109
  }
72
110
  if (block.name === 'Agent' || block.name === 'Task') {
73
111
  // Include block.id as toolUseId so the live store can match the
74
112
  // corresponding tool_result and update per-agent lastActivityAt.
75
- return { kind: 'agent_spawn', data: { ...block.input, toolUseId: block.id }, raw: obj };
113
+ return { kind: 'agent_spawn', data: { ...block.input, toolUseId: block.id }, raw: makeRaw(obj) };
76
114
  }
77
115
  return {
78
116
  kind: 'tool_use',
79
117
  data: { name: block.name, input: block.input, id: block.id },
80
- raw: obj,
118
+ raw: makeRaw(obj),
81
119
  };
82
120
  }
83
121
  // tool_result carries the tool_use_id of the completed Task/Agent call.
84
122
  // The live store uses this to update the agent's lastActivityAt bookend.
85
123
  if (block?.type === 'tool_result' && block.tool_use_id) {
86
- return { kind: 'tool_result', data: { toolUseId: block.tool_use_id }, raw: obj };
124
+ return { kind: 'tool_result', data: { toolUseId: block.tool_use_id }, raw: makeRaw(obj) };
87
125
  }
88
126
  }
89
127
  }
90
128
 
91
- return { kind: type || 'message', data: obj, raw: obj };
129
+ return { kind: type || 'message', data: obj, raw: makeRaw(obj) };
92
130
  }
93
131
 
94
132
  /**
@@ -129,7 +167,7 @@ async function readDelta(sub) {
129
167
  }
130
168
  }
131
169
 
132
- async function flush(sub, { emit = true } = {}) {
170
+ async function doFlush(sub, { emit = true, replay = false } = {}) {
133
171
  const lines = await readDelta(sub);
134
172
  for (const line of lines) {
135
173
  let obj;
@@ -150,6 +188,7 @@ async function flush(sub, { emit = true } = {}) {
150
188
  cwd: sub.cwd,
151
189
  sessionUuid: sub.sessionUuid,
152
190
  ev,
191
+ replay,
153
192
  });
154
193
  if (emit) sendIfAlive(window, `transcript:event:${sub.tabId}`, ev);
155
194
  // Mirror to OTEL — no-op when disabled. We emit on the initial drain too
@@ -164,10 +203,86 @@ async function flush(sub, { emit = true } = {}) {
164
203
  }
165
204
  }
166
205
 
206
+ // Serialised flush scheduler — at most one readDelta per sub in flight at a
207
+ // time. Uses a dirty flag for trailing-edge re-run: if a chokidar event fires
208
+ // while a flush is in progress, dirty stays true and the loop runs one more
209
+ // time after the current read completes, guaranteeing no event is dropped.
210
+ function scheduleFlush(sub) {
211
+ sub.dirty = true;
212
+ if (sub.flushing) return sub.flushing;
213
+ sub.flushing = (async () => {
214
+ while (sub.dirty) {
215
+ sub.dirty = false;
216
+ await doFlush(sub);
217
+ }
218
+ })()
219
+ .catch((e) => {
220
+ logs.writeLine({ level: 'warn', scope: 'transcripts', message: 'flush error', meta: { error: e?.message } });
221
+ })
222
+ .finally(() => {
223
+ sub.flushing = null;
224
+ });
225
+ return sub.flushing;
226
+ }
227
+
167
228
  const MAX_TRANSCRIPT_SUBS = 20;
168
229
 
230
+ /**
231
+ * LRU pool of released-but-cached subscriptions. When a renderer consumer
232
+ * calls release(), the sub stays alive (offset + buffer preserved) so a
233
+ * subsequent tab-switch back resumes from the current offset instead of
234
+ * re-reading the entire transcript from byte 0. Oldest entries are evicted
235
+ * once the pool exceeds LRU_CAP.
236
+ */
237
+ const LRU_CAP = 6;
238
+ const lruReleased = []; // tabIds with no active consumer, ordered oldest→newest
239
+
240
+ function _closeSub(tabId) {
241
+ const sub = subs.get(tabId);
242
+ if (!sub) return;
243
+ sub.watcher?.close().catch(() => {});
244
+ subs.delete(tabId);
245
+ usageMatrix.removeTab(tabId);
246
+ const i = lruReleased.indexOf(tabId);
247
+ if (i !== -1) lruReleased.splice(i, 1);
248
+ }
249
+
250
+ /**
251
+ * release(tabId) — called when the renderer's last consumer unmounts (view
252
+ * switch). Keeps the sub alive in the LRU cache so a quick revisit resumes
253
+ * from the persisted offset. Evicts the oldest cached sub if over LRU_CAP.
254
+ */
255
+ function release(tabId) {
256
+ if (!subs.has(tabId)) return;
257
+ if (!lruReleased.includes(tabId)) {
258
+ lruReleased.push(tabId);
259
+ }
260
+ while (lruReleased.length > LRU_CAP) {
261
+ const oldest = lruReleased.shift();
262
+ _closeSub(oldest);
263
+ }
264
+ }
265
+
266
+ /** closeTab(tabId) — genuine tab close; always destroys the sub immediately. */
267
+ function closeTab(tabId) {
268
+ _closeSub(tabId);
269
+ }
270
+
169
271
  async function subscribe({ tabId, cwd, sessionUuid }) {
170
- if (subs.has(tabId)) return { ok: true, path: subs.get(tabId).filePath };
272
+ if (subs.has(tabId)) {
273
+ // Tab is in the LRU cache — promote it back to active.
274
+ const i = lruReleased.indexOf(tabId);
275
+ if (i !== -1) lruReleased.splice(i, 1);
276
+ return { ok: true, path: subs.get(tabId).filePath };
277
+ }
278
+ if (subs.size >= MAX_TRANSCRIPT_SUBS) {
279
+ // Before rejecting a genuinely new subscription, evict an idle LRU-cached
280
+ // entry — it occupies a slot but has no active consumer. Only reject if no
281
+ // idle entries are available to free.
282
+ if (lruReleased.length > 0) {
283
+ _closeSub(lruReleased[0]);
284
+ }
285
+ }
171
286
  if (subs.size >= MAX_TRANSCRIPT_SUBS) {
172
287
  logs.writeLine({
173
288
  level: 'warn',
@@ -189,34 +304,33 @@ async function subscribe({ tabId, cwd, sessionUuid }) {
189
304
  pending: '',
190
305
  buffer: [],
191
306
  watcher: null,
307
+ flushing: null,
308
+ dirty: false,
192
309
  };
193
310
  // If the file already exists, read current content as replay. Do not emit
194
311
  // during this initial drain — the renderer drains sub.buffer via
195
312
  // `transcript:buffer` after `transcript:subscribe` resolves. Emitting here
196
313
  // would race the renderer's onEvent listener registration and drop events.
314
+ // replay:true prevents historical usage events from entering the 5-min window.
197
315
  if (fs.existsSync(filePath)) {
198
- await flush(sub, { emit: false });
316
+ await doFlush(sub, { emit: false, replay: true });
199
317
  }
200
318
  const watcher = chokidar.watch(filePath, {
201
319
  ignoreInitial: false,
202
320
  persistent: true,
203
321
  awaitWriteFinish: { stabilityThreshold: 30, pollInterval: 20 },
204
322
  });
205
- watcher.on('add', () => flush(sub).catch(() => {}));
206
- watcher.on('change', () => flush(sub).catch(() => {}));
323
+ watcher.on('add', () => scheduleFlush(sub));
324
+ watcher.on('change', () => scheduleFlush(sub));
207
325
  watcher.on('error', (err) => logs.writeLine({ level: 'warn', scope: 'transcripts', message: 'chokidar watcher error', meta: { error: err?.message } }));
208
326
  sub.watcher = watcher;
209
327
  subs.set(tabId, sub);
210
328
  return { ok: true, path: filePath };
211
329
  }
212
330
 
331
+ /** @deprecated Use release() for view-switch, closeTab() for genuine close. */
213
332
  function unsubscribe(tabId) {
214
- const sub = subs.get(tabId);
215
- if (!sub) return;
216
- sub.watcher?.close().catch(() => {});
217
- subs.delete(tabId);
218
- // Drop the tab from the AgOps matrix — "active sessions" only.
219
- usageMatrix.removeTab(tabId);
333
+ release(tabId);
220
334
  }
221
335
 
222
336
  function getBuffer(tabId) {
@@ -233,8 +347,14 @@ function closeAll() {
233
347
  function registerTranscriptHandlers() {
234
348
  const { schemas: s, validated: v } = require('./ipcSchemas.cjs');
235
349
  ipcMain.handle('transcript:subscribe', v(s.transcriptSubscribe, (payload) => subscribe(payload)));
350
+ // transcript:unsubscribe is now an alias for release (view-switch, not close).
236
351
  ipcMain.handle('transcript:unsubscribe', v(s.transcriptTabId, ({ tabId }) => {
237
- unsubscribe(tabId);
352
+ release(tabId);
353
+ return { ok: true };
354
+ }));
355
+ // transcript:close is the genuine close used when a tab is removed.
356
+ ipcMain.handle('transcript:close', v(s.transcriptTabId, ({ tabId }) => {
357
+ closeTab(tabId);
238
358
  return { ok: true };
239
359
  }));
240
360
  ipcMain.handle('transcript:buffer', v(s.transcriptTabId, ({ tabId }) => getBuffer(tabId)));
@@ -245,6 +365,8 @@ module.exports = {
245
365
  attachWindow,
246
366
  registerTranscriptHandlers,
247
367
  closeAll,
368
+ release,
369
+ closeTab,
248
370
  encodeCwd,
249
371
  transcriptPath,
250
372
  classifyLine,
@@ -85,7 +85,7 @@ function ensureTab(tabId, cwd, sessionUuid) {
85
85
  * Feed one classified transcript event into the per-tab aggregator. Called
86
86
  * from transcripts.cjs for every event, both during replay and live.
87
87
  */
88
- function recordEvent({ tabId, cwd, sessionUuid, ev }) {
88
+ function recordEvent({ tabId, cwd, sessionUuid, ev, replay = false }) {
89
89
  if (!tabId || !ev) return;
90
90
  const t = ensureTab(tabId, cwd, sessionUuid);
91
91
  const now = Date.now();
@@ -109,8 +109,12 @@ function recordEvent({ tabId, cwd, sessionUuid, ev }) {
109
109
  t.perTurnInputTokens.push(inTok);
110
110
  if (t.perTurnInputTokens.length > TURN_RING) t.perTurnInputTokens.shift();
111
111
 
112
- t.tokenWindow.push({ ts: now, tokens: inTok + outTok });
113
- pruneWindow(t.tokenWindow, now);
112
+ // Historical events must not enter the 5-min sliding window — doing so
113
+ // would make tokensPerMin spike to "critical" on every tab switch.
114
+ if (!replay) {
115
+ t.tokenWindow.push({ ts: now, tokens: inTok + outTok });
116
+ pruneWindow(t.tokenWindow, now);
117
+ }
114
118
  dirty = true;
115
119
  break;
116
120
  }