npm - @semalt-ai/code - Versions diffs - 1.20.0 → 1.20.1 - Mend

@semalt-ai/code 1.20.0 → 1.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/lib/commands/chat-session.js +2 -1
package/lib/commands/chat-turn.js +61 -17
package/lib/ui/file-activity.js +11 -18
package/package.json +1 -1
package/test/file-activity.test.js +70 -50
package/test/narration-ordering.test.js +309 -0
package/test/permission-flush.test.js +302 -0

package/lib/commands/chat-session.js CHANGED Viewed

@@ -127,7 +127,8 @@ function createChatSession(ctx) {
     // its own per-op line via the SAME `_display` render the live path uses —
     // byte-identical to a fresh per-op commit. read_file and list_dir share ONE
     // group (mirroring the live merged key): a mixed run re-groups into the same
-    // single summary, with fileSummaryState picking the homogeneous-vs-mixed verb.
+    // single summary, rendered with the single "explored ×N" verb (fileSummaryState
+    // no longer branches on composition).
     let fileBuf = [];   // [{ core, ts }]
     function flushFile() {
       if (!fileBuf.length) return;

package/lib/commands/chat-turn.js CHANGED Viewed

@@ -254,6 +254,23 @@ function createTurnHandler(ctx, slashHandlers) {
         statusBar.update('streaming', 'Streaming response');
       },
       onPermissionAsk: (tag, input) => {
+        // Flush any open file/web activity group BEFORE the permission picker
+        // opens. The permission gate fires ahead of onToolStart (agent.js — the
+        // "ask before onToolStart" comment), so the non-groupable flush that
+        // onToolStart normally performs (below) is sequenced AFTER the modal and
+        // can't fire while it's open — leaving an open group stranded LIVE in the
+        // writer's activity region beside the prompt for the modal's whole life.
+        // Flush here so the group commits to scrollback ABOVE the prompt instead.
+        // This is safe to do unconditionally: groupable tools (read_file/list_dir)
+        // are read-only with a NULL permission descriptor, so onPermissionAsk
+        // NEVER fires for them — by the time we get here the prompting tool is by
+        // definition non-groupable, exactly the case onToolStart already flushes.
+        // flush() is idempotent (isOpen()/groupId===null guard), so the later
+        // onToolStart flush, the turn-end finally flush, or the deny path all
+        // become no-ops — no double commit. Covers the deny case too: a denied
+        // tool's group is committed here rather than stranded until the finally.
+        if (webTracker.isOpen()) webTracker.flush();
+        if (fileTracker.isOpen()) fileTracker.flush();
         // Status-bar update fires while the permission picker is open so
         // the user can see what's pending in the side label, not just
         // inside the modal. Mirrors the labels onToolStart uses post-grant
@@ -496,6 +513,28 @@ function createTurnHandler(ctx, slashHandlers) {
         // branch opened the gate (eager-open or showThink). The StreamParser emits
         // these verbatim, so once the gate is open they would otherwise stream live.
         if (ORPHAN_CLOSE_TAG_RE.test(token.trim())) return;
+        // Ordering fix (Option b) — flush any open file/web activity group BEFORE
+        // the FIRST content-bearing narration token commits to scrollback. Streamed
+        // narration commits INCREMENTALLY: streamToken() emits the "▸ AI-agent"
+        // header and each complete line to immutable scrollback (chat-history.js)
+        // BEFORE onAssistantMessage/finalizeLastMessage ever fires. So flushing only
+        // at onAssistantMessage is too LATE for the streamed path — the narration
+        // lines are already above a still-open group, which then commits BELOW the
+        // conclusion it's based on (the "list ×3 below 'directory almost empty'"
+        // bug). Flushing here, at streaming-start, guarantees the group's summary
+        // commits ABOVE the first visible narration line.
+        //
+        // Gate strictly: only when the stream has NOT yet started (so we flush once,
+        // before the header) AND this token carries non-whitespace content — pure
+        // whitespace streaming artifacts in a silent read,read,read run must NOT
+        // flush, so such runs still collapse to one "explored ×N". flush() is
+        // idempotent (groupId===null guard), so the later onAssistantMessage,
+        // onToolStart, onPermissionAsk, and turn-end finally flushes all no-op —
+        // exactly one commit.
+        if (token && token.trim() && !chatHistory.isStreaming?.()) {
+          if (webTracker.isOpen()) webTracker.flush();
+          if (fileTracker.isOpen()) fileTracker.flush();
+        }
         chatHistory.streamToken(token);
         statusBar.onToken();
       },
@@ -516,26 +555,31 @@ function createTurnHandler(ctx, slashHandlers) {
         const terminal = meta && typeof meta.terminal === 'boolean'
           ? meta.terminal
           : !!(cleanContent && cleanContent.trim());
-        // Web-activity ordering (W.3 regression fix): commit any still-open web
-        // group BEFORE the answer is finalized, so the collapsed "✓ web · …"
-        // summary lands ABOVE the answer in scrollback (pre-W.3 ordering).
+        // Ordering fix (Option b) — commit any still-open file/web activity group
+        // BEFORE the answer is finalized, so the collapsed summary lands ABOVE the
+        // narration in scrollback (correct chronological ordering: a conclusion has
+        // the group it's based on committed above it).
+        //
+        // Flush on the TERMINAL signal (the final no-tool answer) OR on any
+        // CONTENT-BEARING intermediate narration. This is the deliberate Option-(b)
+        // tradeoff: an intermediate narration that carries visible content now
+        // flushes the open group, so a "chatty" multi-read run FRAGMENTS into
+        // correctly-ordered sub-groups (each "explored ×N" above its narration)
+        // rather than collapsing across a conclusion that was based on it. A SILENT
+        // multi-read run (empty/whitespace-only interim narration — pure streaming
+        // artifacts) does NOT flush, so it still collapses fully to one summary.
         //
-        // Guard on the TERMINAL signal (no tool calls this iteration). Intermediate
-        // web-tool iterations are non-terminal — they keep the group open so a
-        // multi-step search→fetch still collapses into a single line (the W.3
-        // guarantee). Pre-live-narration this used "cleanContent is empty" as the
-        // proxy for intermediate; now intermediate iterations carry narration too,
-        // so we rely on the explicit `terminal` flag instead — otherwise an
-        // intermediate narration would flush the group early and split the line.
-        // Empty/interrupted turns (no terminal message ever arrives) fall back to
-        // the turn-end `finally` flush, which is the safety net.
-        if (terminal && webTracker.isOpen()) {
+        // For the STREAMED path the open group is already committed above by the
+        // onToken streaming-start flush; this is the backstop for the non-streaming
+        // / finalize-only path (and the direct-callback unit tests). flush() is
+        // idempotent, so when both fire only one commit results. Empty/interrupted
+        // turns (no terminal message, no content) fall back to the turn-end
+        // `finally` flush, which is the safety net.
+        const contentful = !!(cleanContent && cleanContent.trim());
+        if ((terminal || contentful) && webTracker.isOpen()) {
           webTracker.flush();
         }
-        // Same terminal-gating for the file group: only the explicit terminal
-        // signal flushes, so intermediate-iteration narration does NOT split a
-        // multi-iteration read run — it still collapses to one summary.
-        if (terminal && fileTracker.isOpen()) {
+        if ((terminal || contentful) && fileTracker.isOpen()) {
           fileTracker.flush();
         }
         chatHistory.finalizeLastMessage(cleanContent);

package/lib/ui/file-activity.js CHANGED Viewed

@@ -9,7 +9,7 @@
 // collapses a run of CONSECUTIVE same-type file ops into a SINGLE compact
 // process-summary line —
 //
-//     ✓ file · read ×10 (index.html, battlecity.js, …)
+//     ✓ file · explored ×10 (index.html, battlecity.js, …)
 //
 // — exactly the way `web-activity.js` collapses web_search/http_get. It is a
 // parallel, independent instance of `createWebActivityTracker`; the web tracker
@@ -24,9 +24,9 @@
 // scrollback model:
 //   • GROUP KEY = a single shared key for BOTH read_file and list_dir, so a
 //     mixed read/list exploration phase collapses into ONE summary instead of
-//     fragmenting on every read↔list switch. A homogeneous run keeps its specific
-//     verb ("read ×N" / "list ×N"); a genuinely mixed run uses the neutral "file
-//     ×N". Any OTHER tool still breaks the run. The web tracker has a single key.
+//     fragmenting on every read↔list switch. Every group renders the SAME verb
+//     ("explored ×N", live "exploring… ×N") regardless of composition. Any OTHER
+//     tool still breaks the run. The web tracker has a single key.
 //   • THRESHOLD decided at flush time. A group of 1–2 ops commits each op as its
 //     own normal result line (byte-identical to today); a group of 3+ commits ONE
 //     summary line. The web tracker always collapses. We can't retroactively pull
@@ -81,21 +81,14 @@ function _basename(p) {
 // Pure: fold a list of file ops (ToolOperation descriptors OR persisted cores —
 // both expose `tag`/`target`) into the fields the summary needs. read_file and
-// list_dir now share one group, so a group may be MIXED. The verb reflects the
-// group's composition: homogeneous reads → "read"/"reading…", homogeneous lists
-// → "list"/"listing…", a genuinely mixed group → the neutral "file"/"accessing…".
+// list_dir share one group, so a group may be MIXED — but the verb is a SINGLE
+// "explored"/"exploring…" regardless of composition (read-only, list-only, and
+// mixed all read the same). No more homogeneous-vs-mixed branching.
 function fileSummaryState(ops) {
   const list = (ops || []).filter(Boolean);
-  let hasRead = false, hasList = false;
-  for (const o of list) {
-    if (normalizeFileTag(o.tag) === 'list_dir') hasList = true;
-    else hasRead = true;
-  }
-  const mixed = hasRead && hasList;
-  const isList = hasList && !hasRead;
   return {
-    verb: mixed ? 'file' : (isList ? 'list' : 'read'),
-    gerund: mixed ? 'accessing…' : (isList ? 'listing…' : 'reading…'),
+    verb: 'explored',
+    gerund: 'exploring…',
     count: list.length,
     basenames: list.map((o) => _basename(o.target)),
   };
@@ -172,8 +165,8 @@ function createFileActivityTracker(deps) {
     // share one key, so a read↔list switch does NOT flush — both accumulate into
     // the same group (the key only changes for a different category, which never
     // reaches here). The live row is a growing web-style aggregate: "● file ·
-    // reading… ×N (a, b, …)" (or "accessing… ×N" once mixed). `input` is the op's
-    // path (used for the live basename).
+    // exploring… ×N (a, b, …)". `input` is the op's path (used for the live
+    // basename).
     start(tag, input) {
       const key = fileGroupKey(tag);
       if (groupId !== null && key !== currentKey) api.flush();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@semalt-ai/code",
-  "version": "1.20.0",
+  "version": "1.20.1",
   "description": "Self-hosted AI Coding Assistant CLI",
   "main": "./lib/sdk.js",
   "//exports": "Two-tier embedding surface (Task 5.2): '.' is the STABLE createAgent facade; './internals' is the UNSTABLE building blocks (no semver guarantee). The boundary is enforced here, not just in docs. Works for both require() and import.",

package/test/file-activity.test.js CHANGED Viewed

@@ -3,8 +3,8 @@
 // File-activity grouping — a SECOND INSTANCE of the web-activity collapser for
 // consecutive pure file reads/lists (read_file / list_dir). read_file and
 // list_dir share ONE group key, so a mixed read/list exploration phase collapses
-// into a single summary (neutral "file ×N" verb) instead of fragmenting; a
-// homogeneous run keeps its specific "read ×N" / "list ×N" verb. Covers the live
+// into a single summary instead of fragmenting; EVERY group (read-only,
+// list-only, or mixed) renders the same single "explored ×N" verb. Covers the live
 // flush sites (driven through the REAL createTurnHandler callbacks, exactly as
 // web-activity-ordering.test.js does for web ops), the flush-time THRESHOLD
 // (1–2 individual lines, 3+ collapsed summary), the merged read/list group, the
@@ -89,12 +89,12 @@ function fileOp(cb, tag, path, bytes) {
 }
 const commits = (events) => events.filter((e) => e.kind === 'commit');
-// Matches a collapsed file summary for any verb: homogeneous (read/reading,
-// list/listing) or the neutral mixed verb (file/accessing).
-const summaries = (events) => commits(events).filter((e) => /file .*(read|reading|list|listing|file|accessing) ×\d+/.test(e.line));
+// Matches a collapsed file summary — a single "explored ×N" verb for every group
+// composition (read-only, list-only, or mixed).
+const summaries = (events) => commits(events).filter((e) => /file .* explored ×\d+/.test(e.line));
 // ───────────────────────────────────────────────────────────────────────────
-// (a) 10 consecutive read_file ops → ONE "✓ file · read ×10 (…)" summary line.
+// (a) 10 consecutive read_file ops → ONE "✓ file · explored ×10 (…)" summary line.
 // ───────────────────────────────────────────────────────────────────────────
 test('(a) 10 reads collapse to ONE summary; ×10 always present; basenames truncated to width', async () => {
   const prevCols = process.stdout.columns;
@@ -111,7 +111,7 @@ test('(a) 10 reads collapse to ONE summary; ×10 always present; basenames trunc
     const s = summaries(h.events);
     assert.strictEqual(s.length, 1, 'exactly one collapsed summary');
-    assert.match(s[0].line, /file .* read ×10 \(/, 'shows the read verb and the ×10 count');
+    assert.match(s[0].line, /file .* explored ×10 \(/, 'shows the explored verb and the ×10 count');
     assert.ok(s[0].line.includes('…'), 'the basename list is truncated to width');
     assert.match(s[0].line, /×10/, 'the ×10 count survives truncation (it is in the fixed prefix)');
     // Single physical row at the render width.
@@ -145,10 +145,10 @@ test('(b) 2 reads commit as two individual lines, no summary', async () => {
 // ───────────────────────────────────────────────────────────────────────────
 // (c) reads and lists INTERLEAVED (read, list, read, list, read) → ONE merged
-//     summary with the neutral "file ×5" verb (was: two separate summaries —
+//     summary with the single "explored ×5" verb (was: two separate summaries —
 //     CHANGED by the key-merge: read_file + list_dir now share one group).
 // ───────────────────────────────────────────────────────────────────────────
-test('(c) interleaved reads+lists collapse to ONE merged summary with the neutral verb', async () => {
+test('(c) interleaved reads+lists collapse to ONE merged summary with the explored verb', async () => {
   const h = harness();
   h.setScenario(async (cb) => {
     cb.onAssistantMessage('');
@@ -163,8 +163,8 @@ test('(c) interleaved reads+lists collapse to ONE merged summary with the neutra
   const s = summaries(h.events);
   assert.strictEqual(s.length, 1, 'one merged summary — read and list share a group now');
-  assert.match(s[0].line, /file .* file ×5 \(/, 'mixed group uses the neutral "file ×5" verb');
-  assert.doesNotMatch(s[0].line, /read ×|list ×/, 'no homogeneous verb for a mixed group');
+  assert.match(s[0].line, /file .* explored ×5 \(/, 'mixed group uses the single "explored ×5" verb');
+  assert.doesNotMatch(s[0].line, /read ×|list ×|file ×/, 'no read/list/file verb for the merged group');
   // All five basenames/dirs listed once in the merged summary.
   for (const b of ['a.js', 'd0', 'b.js', 'd1', 'c.js']) {
     assert.ok(s[0].line.includes(b), `merged summary lists ${b}`);
@@ -172,9 +172,9 @@ test('(c) interleaved reads+lists collapse to ONE merged summary with the neutra
 });
 // ───────────────────────────────────────────────────────────────────────────
-// (c2) homogeneous list run (5 list_dir, no reads) → still "list ×5".
+// (c2) list-only run (5 list_dir, no reads) → "explored ×5" (single verb).
 // ───────────────────────────────────────────────────────────────────────────
-test('(c2) 5 list_dir ops only → homogeneous "list ×5" summary (specific verb kept)', async () => {
+test('(c2) 5 list_dir ops only → "explored ×5" summary (single verb, no list branch)', async () => {
   const h = harness();
   h.setScenario(async (cb) => {
     cb.onAssistantMessage('');
@@ -185,8 +185,8 @@ test('(c2) 5 list_dir ops only → homogeneous "list ×5" summary (specific verb
   const s = summaries(h.events);
   assert.strictEqual(s.length, 1, 'one summary');
-  assert.match(s[0].line, /list ×5/, 'homogeneous lists keep the specific "list" verb');
-  assert.doesNotMatch(s[0].line, /file ×|read ×/, 'no neutral/read verb for an all-list group');
+  assert.match(s[0].line, /explored ×5/, 'list-only group uses the single "explored" verb');
+  assert.doesNotMatch(s[0].line, /file ×|read ×|list ×/, 'no read/list/file verb for an all-list group');
 });
 // ───────────────────────────────────────────────────────────────────────────
@@ -214,9 +214,9 @@ test('(c3) a grep between two mixed read/list runs splits them into two summarie
   const s = summaries(h.events);
   assert.strictEqual(s.length, 2, 'the grep splits the run into two merged summaries');
-  assert.match(s[0].line, /file ×3/, 'first mixed group is file ×3');
-  assert.match(s[1].line, /file ×3/, 'second mixed group is file ×3');
-  const iS0 = h.events.findIndex((e) => e.kind === 'commit' && /file ×3/.test(e.line));
+  assert.match(s[0].line, /explored ×3/, 'first mixed group is explored ×3');
+  assert.match(s[1].line, /explored ×3/, 'second mixed group is explored ×3');
+  const iS0 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
   const iGrep = h.events.findIndex((e) => e.kind === 'commit' && /TODO/.test(e.line));
   assert.ok(iS0 >= 0 && iGrep >= 0 && iS0 < iGrep, 'the first summary lands above the grep line');
 });
@@ -259,7 +259,7 @@ test('(d) a non-file tool after a read run flushes the summary before its own li
   });
   await h.handler('read then shell');
-  const iSummary = h.events.findIndex((e) => e.kind === 'commit' && /read ×3/.test(e.line));
+  const iSummary = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
   const iShell = h.events.findIndex((e) => e.kind === 'commit' && /ls -la/.test(e.line));
   assert.ok(iSummary >= 0, 'the read summary committed');
   assert.ok(iShell >= 0, 'the shell line committed');
@@ -267,7 +267,7 @@ test('(d) a non-file tool after a read run flushes the summary before its own li
 });
 // ───────────────────────────────────────────────────────────────────────────
-// (e) read run with op #5 erroring → "read ×4" summary, then standalone error +
+// (e) read run with op #5 erroring → "explored ×4" summary, then standalone error +
 //     body, then a fresh group for the subsequent reads.
 // ───────────────────────────────────────────────────────────────────────────
 test('(e) a mid-run error flushes the success-group, renders error standalone, then a new group starts', async () => {
@@ -286,13 +286,13 @@ test('(e) a mid-run error flushes the success-group, renders error standalone, t
   const s = summaries(h.events);
   assert.strictEqual(s.length, 2, 'the 4 successes and the 3 later successes form two summaries');
-  assert.match(s[0].line, /read ×4/, 'the errored op did NOT join the group → ×4 not ×5');
-  assert.match(s[1].line, /read ×3/, 'a new group started after the error');
+  assert.match(s[0].line, /explored ×4/, 'the errored op did NOT join the group → ×4 not ×5');
+  assert.match(s[1].line, /explored ×3/, 'a new group started after the error');
-  const iSummary4 = h.events.findIndex((e) => e.kind === 'commit' && /read ×4/.test(e.line));
+  const iSummary4 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×4/.test(e.line));
   const iErrLine = h.events.findIndex((e) => e.kind === 'commit' && /read \/bad\.js/.test(e.line));
   const iErrBody = h.events.findIndex((e) => e.kind === 'error-body');
-  const iSummary3 = h.events.findIndex((e) => e.kind === 'commit' && /read ×3/.test(e.line));
+  const iSummary3 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
   assert.ok(iSummary4 >= 0 && iErrLine >= 0 && iErrBody >= 0 && iSummary3 >= 0, 'all four landmarks present');
   assert.ok(iSummary4 < iErrLine, 'success summary before the error line (never above the reads it followed)');
   assert.ok(iErrLine < iErrBody, 'error line before its expandable body');
@@ -300,27 +300,47 @@ test('(e) a mid-run error flushes the success-group, renders error standalone, t
 });
 // ───────────────────────────────────────────────────────────────────────────
-// (f) multi-iteration read run with intermediate narration → still ONE summary
-//     (terminal-flag gating: intermediate narration must NOT split the group).
+// (f) INTENTIONAL BEHAVIOR CHANGE (Option b — "fix: flush activity groups before
+//     content-bearing narration for correct ordering").
+//
+//     PREVIOUSLY this asserted that content-bearing INTERMEDIATE narration did
+//     NOT split the group (terminal-flag gating → one "explored ×4"). That left
+//     the narration committed ABOVE a still-open group → the summary later landed
+//     BELOW the conclusion it was based on.
+//
+//     NEW behavior: content-bearing intermediate narration flushes the open group
+//     FIRST, so a chatty multi-read run FRAGMENTS into correctly-ordered
+//     sub-groups (each "explored ×N" above its narration). Silent runs (empty
+//     interim narration) still collapse fully — see narration-ordering.test.js.
+//     Three reads per fragment so each crosses the ≥3 summary threshold.
 // ───────────────────────────────────────────────────────────────────────────
-test('(f) intermediate-iteration narration does NOT split a multi-iteration read run', async () => {
+test('(f) content-bearing intermediate narration FRAGMENTS a multi-iteration read run (correctly ordered)', async () => {
   const h = harness();
   h.setScenario(async (cb) => {
-    // iter 1: two reads, then a NON-empty but NON-terminal narration.
+    // iter 1: three reads, then a NON-empty NON-terminal narration → flushes #1.
     cb.onAssistantMessage('');
     fileOp(cb, 'read', '/i1a.js');
     fileOp(cb, 'read', '/i1b.js');
+    fileOp(cb, 'read', '/i1c.js');
     cb.onAssistantMessage('Let me read a couple more files.', { terminal: false });
-    // iter 2: two more reads, then the terminal answer.
+    // iter 2: three more reads, then the terminal answer → flushes #2.
     fileOp(cb, 'read', '/i2a.js');
     fileOp(cb, 'read', '/i2b.js');
+    fileOp(cb, 'read', '/i2c.js');
     cb.onAssistantMessage('All read.', { terminal: true });
   });
-  await h.handler('multi-iteration reads');
+  await h.handler('multi-iteration reads with interim narration');
   const s = summaries(h.events);
-  assert.strictEqual(s.length, 1, 'the four reads across two iterations collapse to ONE summary');
-  assert.match(s[0].line, /read ×4/, 'all four reads counted');
+  assert.strictEqual(s.length, 2, 'content-bearing interim narration split the run into TWO summaries');
+  assert.ok(s.every((e) => /explored ×3/.test(e.line)), 'each fragment is explored ×3');
+  // Ordering: fragment #1 above the interim narration, fragment #2 below it.
+  const iSum1 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
+  const iNarr1 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'Let me read a couple more files.');
+  const iSum2 = h.events.findIndex((e, idx) => idx > iSum1 && e.kind === 'commit' && /explored ×3/.test(e.line));
+  assert.ok(iSum1 < iNarr1, 'fragment #1 commits ABOVE the interim narration');
+  assert.ok(iNarr1 < iSum2, 'fragment #2 commits below the interim narration');
 });
 // ───────────────────────────────────────────────────────────────────────────
@@ -363,7 +383,7 @@ function replayCommits(loadedMessages, cols) {
 const fileLineOf = (commitsArr) => commitsArr
   .map((c) => stripAnsi(c))
-  .filter((c) => /file .* read ×\d+/.test(c));
+  .filter((c) => /file .* explored ×\d+/.test(c));
 test('(g) replay re-groups to the same summary at the replay width; narrower re-truncates; ≥3 threshold applied', () => {
   const files = Array.from({ length: 6 }, (_, i) => `/proj/module-${i}/index-file-${i}.js`);
@@ -398,7 +418,7 @@ test('(g) replay re-groups to the same summary at the replay width; narrower re-
 // ───────────────────────────────────────────────────────────────────────────
 // (g2) replay of a MIXED read/list run re-groups into the SAME single merged
-//      summary (neutral "file ×N" verb), byte-identical to the live oracle.
+//      summary (single "explored ×N" verb), byte-identical to the live oracle.
 // ───────────────────────────────────────────────────────────────────────────
 test('(g2) replay of a mixed read/list run → identical merged summary at the replay width', () => {
   // read, list, read, list, read — interleaved, persisted as native cores.
@@ -418,11 +438,11 @@ test('(g2) replay of a mixed read/list run → identical merged summary at the r
   const mixedLineOf = (commitsArr) => commitsArr
     .map((c) => stripAnsi(c))
-    .filter((c) => /file .* file ×\d+/.test(c));
+    .filter((c) => /file .* explored ×\d+/.test(c));
   const oracle = liveFileSummary(ops, 200);
   assert.strictEqual(oracle.length, 1, 'live commits one merged summary for the mixed run');
-  assert.match(stripAnsi(oracle[0]), /file ×5/, 'live oracle uses the neutral verb for the mixed run');
+  assert.match(stripAnsi(oracle[0]), /explored ×5/, 'live oracle uses the single explored verb for the mixed run');
   const replay = mixedLineOf(replayCommits(loaded, 200));
   assert.strictEqual(replay.length, 1, 'replay commits exactly one merged file summary');
@@ -468,10 +488,10 @@ test('(i) the web tracker is unaffected: a web run alongside a file run still yi
   const fileS = summaries(h.events);
   assert.strictEqual(fileS.length, 1, 'one file summary');
-  assert.match(fileS[0].line, /read ×3/);
+  assert.match(fileS[0].line, /explored ×3/);
   const webS = commits(h.events).filter((e) => / web /.test(e.line) && /source/.test(e.line));
   assert.strictEqual(webS.length, 1, 'the web tracker still commits its own summary, unaffected');
-  const iFile = h.events.findIndex((e) => e.kind === 'commit' && /read ×3/.test(e.line));
+  const iFile = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
   const iWeb = h.events.findIndex((e) => e.kind === 'commit' && / web /.test(e.line) && /source/.test(e.line));
   assert.ok(iFile < iWeb, 'the file summary lands above the web summary it preceded');
 });
@@ -492,31 +512,31 @@ test('isGroupableFileCore / normalizeFileTag / fileSummaryState predicates', ()
   assert.ok(!isGroupableFileCore({ v: 1, kind: 'web', tag: 'http_get' }), 'a web core is not a file core');
   assert.ok(!isGroupableFileCore(null), 'null is tolerated');
-  // read_file and list_dir normalize to DISTINCT tags (used by fileSummaryState
-  // to decide the verb) …
+  // read_file and list_dir normalize to DISTINCT tags …
   assert.notStrictEqual(normalizeFileTag(readCore.tag), normalizeFileTag(listCore.tag));
   assert.strictEqual(normalizeFileTag('read'), 'read_file');
   assert.strictEqual(normalizeFileTag('list_dir'), 'list_dir');
-  // … but they now share ONE group KEY, so a read↔list switch never flushes.
+  // … but they share ONE group KEY, so a read↔list switch never flushes.
   assert.strictEqual(fileGroupKey('read'), fileGroupKey('list_dir'));
   assert.strictEqual(fileGroupKey('read_file'), fileGroupKey('list_dir'));
-  // Homogeneous read group → specific "read" verb.
+  // Every group composition → the SAME single "explored" / "exploring…" verb,
+  // regardless of read-only, list-only, or mixed.
   const st = fileSummaryState([readCore, readCore]);
-  assert.strictEqual(st.verb, 'read');
-  assert.strictEqual(st.gerund, 'reading…');
+  assert.strictEqual(st.verb, 'explored');
+  assert.strictEqual(st.gerund, 'exploring…');
   assert.strictEqual(st.count, 2);
   assert.deepStrictEqual(st.basenames, ['a.js', 'a.js']);
-  // Homogeneous list group → specific "list" verb.
+  // List-only group → still "explored".
   const stList = fileSummaryState([listCore, listCore]);
-  assert.strictEqual(stList.verb, 'list');
-  assert.strictEqual(stList.gerund, 'listing…');
+  assert.strictEqual(stList.verb, 'explored');
+  assert.strictEqual(stList.gerund, 'exploring…');
-  // Mixed group → neutral "file" / "accessing…" verb.
+  // Mixed group → still "explored" (no composition branching).
   const stMixed = fileSummaryState([readCore, listCore]);
-  assert.strictEqual(stMixed.verb, 'file');
-  assert.strictEqual(stMixed.gerund, 'accessing…');
+  assert.strictEqual(stMixed.verb, 'explored');
+  assert.strictEqual(stMixed.gerund, 'exploring…');
   assert.strictEqual(stMixed.count, 2);
   assert.deepStrictEqual(stMixed.basenames, ['a.js', 'd']);
 });

package/test/narration-ordering.test.js ADDED Viewed

@@ -0,0 +1,309 @@
+'use strict';
+// Activity-group ordering vs assistant narration (Option b — "fix: flush activity
+// groups before content-bearing narration for correct ordering").
+//
+// THE BUG: assistant narration commits to immutable scrollback immediately
+// (streamed token-by-token via streamToken, finalized via finalizeLastMessage),
+// while an open file/web activity group lives in the redrawable activity region
+// BELOW scrollback and only commits at a later boundary. When narration was
+// INTERMEDIATE (a tool follows), the old terminal-only flush gate was skipped, so
+// the narration committed ABOVE the still-open group and the group flushed later,
+// landing BELOW the conclusion it was based on (the "list ×3 below 'directory
+// almost empty'" screenshot).
+//
+// THE FIX (Option b): flush the open group BEFORE any content-bearing narration.
+//   • Streamed path  — flush at streaming-START (onToken, before the first
+//     content-bearing token commits the "▸ AI-agent" header + line to scrollback).
+//   • Finalize path  — flush at onAssistantMessage on terminal OR content-bearing
+//     narration (backstop for the non-streamed path).
+//   • Silent runs (empty/whitespace-only interim narration) still fully collapse.
+//
+// Tests drive the REAL createTurnHandler callbacks (same harness shape as
+// permission-flush.test.js / file-activity.test.js), recording every committed
+// scrollback line and every narration line in ONE ordered log so ordering can be
+// asserted directly.
+const { test } = require('node:test');
+const assert = require('node:assert');
+process.stdout.isTTY = true;
+delete process.env.NO_COLOR;
+const { stripAnsi } = require('../lib/ui/utils');
+const { createTurnHandler } = require('../lib/commands/chat-turn');
+// ── Live harness ──────────────────────────────────────────────────────────────
+// `turnOpts` becomes ctx.opts. Pass { showThink: true } to disable the
+// implicit-think buffering gate so onToken streams tokens straight to
+// chatHistory.streamToken (exercising the streamed-narration path).
+function harness(turnOpts) {
+  const events = [];
+  // streaming flag mirrors chat-history's _streamActive: set on the first
+  // streamToken, cleared on clear/finalize. isStreaming() drives the
+  // streaming-start flush gate in onToken.
+  let streaming = false;
+  const writerModule = {
+    startActivity() {}, updateActivity() {},
+    endActivity(id, line) {
+      for (const raw of String(line == null ? '' : line).split('\n')) {
+        if (raw === '') continue;
+        events.push({ kind: 'commit', line: stripAnsi(raw) });
+      }
+    },
+    scrollback(line) { events.push({ kind: 'scrollback', line: stripAnsi(String(line)) }); },
+  };
+  const chatHistory = {
+    addMessage(m) { if (m && m.isError) events.push({ kind: 'error', output: m.output }); },
+    streamToken(t) {
+      if (!t) return;
+      // Mirror the real per-line commit: the header on the first token, then the
+      // token text as a committed narration line. (We don't need byte-fidelity —
+      // only that a narration line lands in the ordered log when the model speaks.)
+      if (!streaming) { streaming = true; events.push({ kind: 'narration', line: '▸ AI-agent' }); }
+      events.push({ kind: 'narration', line: t });
+    },
+    isStreaming() { return streaming; },
+    clearStreamingContent() { streaming = false; },
+    deferToolOutput() {}, commitDeferredDetail() {},
+    finalizeLastMessage(content) {
+      streaming = false;
+      if (content && content.trim()) events.push({ kind: 'answer', content });
+    },
+  };
+  const statusBar = { update() {}, onToken() {}, addPendingTokens() {}, updateMetrics() {}, setCost() {} };
+  const inputField = { on() {}, removeListener() {}, releaseNavigation() {}, setDisabled() {} };
+  let scenario = async () => {};
+  const runAgentLoop = async (messages, model, maxIter, limit, loopOpts) => {
+    await scenario(loopOpts.callbacks);
+    return { messages, metrics: { turns: [] }, withheldActions: [] };
+  };
+  const ctx = {
+    inputField, statusBar, chatHistory, writerModule, runAgentLoop,
+    getConfig: () => ({ auth_token: 'tok', max_iterations: 50, show_cost: false, system_prompt_mode: 'system_role' }),
+    approxTokens: () => 0,
+    resolveCommand: () => null,
+    opts: turnOpts || {},
+    TAG_REGISTRY: {},
+    collapseListMsg() {}, handlePendingSelection() {}, showPendingStep() {},
+    activateNavCapture() {}, finalizeListMsg() {},
+    createChatIfNeeded: async () => {}, saveTurnToDashboard: async () => {}, saveSession() {},
+    messages: [], currentModel: 'm', debugMode: false, pendingImages: [],
+    chatSync: async () => '', resolvedSystemPrompt: '', resolvedTokenLimit: null, planMode: false,
+  };
+  const handler = createTurnHandler(ctx, {});
+  return { events, handler, setScenario: (fn) => { scenario = fn; } };
+}
+// One fully-successful groupable file op (read / list_dir).
+function fileOp(cb, tag, path) {
+  cb.onToolStart(tag, path, { id: `${tag}-${path}`, attrs: { path } });
+  cb.onToolEnd(tag, 'contents', 5, { id: `${tag}-${path}`, attrs: { path }, meta: { bytes: 10 }, error: null });
+}
+// One fully-successful web op (http_get) — leaves the web group OPEN.
+function webOp(cb, url) {
+  cb.onToolStart('http_get', url, { id: `g-${url}`, attrs: { url } });
+  cb.onToolEnd('http_get', {}, 120, { id: `g-${url}`, attrs: { url }, meta: { status_code: 200, bytes: 1000 }, error: null });
+}
+const commits = (events) => events.filter((e) => e.kind === 'commit');
+const fileSummaries = (events) => commits(events).filter((e) => /file .* explored ×\d+/.test(e.line));
+const webSummaries = (events) => commits(events).filter((e) => / web /.test(e.line) && /source/.test(e.line));
+// ───────────────────────────────────────────────────────────────────────────
+// (a) The SCREENSHOT scenario: a group of list/read ops, then a content-bearing
+//     INTERMEDIATE narration (the conclusion based on them), then a non-groupable
+//     tool (write_file). The group must commit ABOVE the narration.
+// ───────────────────────────────────────────────────────────────────────────
+test('(a) group of list ops → intermediate conclusion narration → write_file: group commits ABOVE the narration', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');                                   // empty pre-tool narration — must NOT flush
+    fileOp(cb, 'list_dir', '/proj');
+    fileOp(cb, 'list_dir', '/proj/src');
+    fileOp(cb, 'list_dir', '/proj/test');
+    // The conclusion drawn FROM the listings — content-bearing, intermediate (a
+    // tool follows). This is what must NOT sit above the listings it summarizes.
+    cb.onAssistantMessage('The directory is almost empty.', { terminal: false });
+    // A non-groupable effectful tool follows.
+    cb.onToolStart('write_file', '/proj/new.js', { id: 'w1', attrs: { path: '/proj/new.js' } });
+    cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/proj/new.js' }, meta: { bytes: 3 }, error: null });
+    cb.onAssistantMessage('Done.', { terminal: true });
+  });
+  await h.handler('list a few dirs, conclude, then write');
+  const s = fileSummaries(h.events);
+  assert.strictEqual(s.length, 1, 'the three listings collapse to one summary');
+  assert.match(s[0].line, /explored ×3/);
+  const iGroup = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
+  const iNarration = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'The directory is almost empty.');
+  assert.ok(iGroup >= 0 && iNarration >= 0, 'both group and narration present');
+  assert.ok(iGroup < iNarration, 'the explored ×3 group commits ABOVE the conclusion narration (the screenshot fix)');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (b) SILENT multi-read run — only empty interim narration. Still ONE summary.
+// ───────────────────────────────────────────────────────────────────────────
+test('(b) silent multi-read run (empty interim narration) still collapses to ONE explored ×N', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    fileOp(cb, 'read', '/a.js');
+    fileOp(cb, 'read', '/b.js');
+    cb.onAssistantMessage('', { terminal: false });   // silent intermediate — must NOT flush
+    fileOp(cb, 'read', '/c.js');
+    fileOp(cb, 'read', '/d.js');
+    cb.onAssistantMessage('', { terminal: false });   // silent intermediate — must NOT flush
+    fileOp(cb, 'read', '/e.js');
+    cb.onAssistantMessage('Read everything.', { terminal: true });
+  });
+  await h.handler('silent multi-read run');
+  const s = fileSummaries(h.events);
+  assert.strictEqual(s.length, 1, 'a silent run collapses to exactly ONE summary across all iterations');
+  assert.match(s[0].line, /explored ×5/, 'all five reads counted — empty interim narration did not split the group');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (c) CHATTY run — content-bearing narration between reads. Fragments into
+//     correctly-ordered sub-groups, each ABOVE its narration.
+// ───────────────────────────────────────────────────────────────────────────
+test('(c) chatty run: content-bearing interim narration fragments into correctly-ordered sub-groups', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    fileOp(cb, 'read', '/a.js');
+    fileOp(cb, 'read', '/b.js');
+    fileOp(cb, 'read', '/c.js');
+    cb.onAssistantMessage('First batch looks fine.', { terminal: false });  // flushes group #1
+    fileOp(cb, 'read', '/d.js');
+    fileOp(cb, 'read', '/e.js');
+    fileOp(cb, 'read', '/f.js');
+    cb.onAssistantMessage('Second batch too.', { terminal: true });         // flushes group #2
+  });
+  await h.handler('chatty multi-read run');
+  const s = fileSummaries(h.events);
+  assert.strictEqual(s.length, 2, 'two content-bearing narrations → two fragments');
+  assert.ok(s.every((e) => /explored ×3/.test(e.line)), 'each fragment is explored ×3');
+  const iSum1 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
+  const iNarr1 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'First batch looks fine.');
+  const iSum2 = h.events.findIndex((e, idx) => idx > iSum1 && e.kind === 'commit' && /explored ×3/.test(e.line));
+  const iNarr2 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'Second batch too.');
+  assert.ok(iSum1 < iNarr1, 'fragment #1 above its narration');
+  assert.ok(iNarr1 < iSum2, 'fragment #2 starts after narration #1');
+  assert.ok(iSum2 < iNarr2, 'fragment #2 above its narration');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (d) STREAMED narration (token path) — the group must commit ABOVE the first
+//     streamed narration line. This is the load-bearing case: streamToken commits
+//     the header + line to scrollback BEFORE onAssistantMessage, so the flush must
+//     happen at streaming-START (onToken), not at onAssistantMessage.
+//     showThink:true disables the implicit-think buffer so onToken streams live.
+// ───────────────────────────────────────────────────────────────────────────
+test('(d) streamed content-bearing narration: group commits ABOVE the first narration line', async () => {
+  const h = harness({ showThink: true });
+  h.setScenario(async (cb) => {
+    fileOp(cb, 'list_dir', '/proj');
+    fileOp(cb, 'list_dir', '/proj/src');
+    fileOp(cb, 'list_dir', '/proj/test');
+    // Narration STREAMS in token-by-token (what really happens). The very first
+    // content token must flush the open group first.
+    for (const tok of ['The ', 'directory ', 'is ', 'almost ', 'empty.', '\n']) cb.onToken(tok);
+    cb.onAssistantMessage('The directory is almost empty.', { terminal: true });
+  });
+  await h.handler('stream a conclusion after listing');
+  const s = fileSummaries(h.events);
+  assert.strictEqual(s.length, 1, 'one summary');
+  assert.match(s[0].line, /explored ×3/);
+  const iGroup = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
+  const iFirstNarration = h.events.findIndex((e) => e.kind === 'narration');
+  assert.ok(iGroup >= 0 && iFirstNarration >= 0, 'group and streamed narration both present');
+  assert.ok(iGroup < iFirstNarration, 'the group commits ABOVE the FIRST streamed narration line (streaming-start flush)');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (e) DOUBLE-FLUSH idempotency — the streaming-start flush, then the
+//     onAssistantMessage flush, then the turn-end finally flush all call flush();
+//     the group commits EXACTLY once.
+// ───────────────────────────────────────────────────────────────────────────
+test('(e) double-flush idempotency: streaming-start + onAssistantMessage + finally → exactly one commit', async () => {
+  const h = harness({ showThink: true });
+  h.setScenario(async (cb) => {
+    fileOp(cb, 'read', '/a.js');
+    fileOp(cb, 'read', '/b.js');
+    fileOp(cb, 'read', '/c.js');
+    for (const tok of ['All ', 'good.', '\n']) cb.onToken(tok);   // flush #1 (streaming-start)
+    cb.onAssistantMessage('All good.', { terminal: true });        // flush #2 (no-op) + finally flush (no-op)
+  });
+  await h.handler('one group, many flush opportunities');
+  const s = fileSummaries(h.events);
+  assert.strictEqual(s.length, 1, 'the group committed EXACTLY once despite multiple flush() calls');
+  assert.match(s[0].line, /explored ×3/);
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (f) WEB group parity — the open web group also flushes before content-bearing
+//     narration, symmetrically with the file group.
+// ───────────────────────────────────────────────────────────────────────────
+test('(f) web group flushes before content-bearing narration (symmetry with the file group)', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    webOp(cb, 'https://a.example');
+    webOp(cb, 'https://b.example');
+    // Content-bearing intermediate narration must flush the web group above it.
+    cb.onAssistantMessage('Both pages confirm the API shape.', { terminal: false });
+    cb.onToolStart('write_file', '/notes.md', { id: 'w1', attrs: { path: '/notes.md' } });
+    cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/notes.md' }, meta: { bytes: 3 }, error: null });
+    cb.onAssistantMessage('Done.', { terminal: true });
+  });
+  await h.handler('fetch two pages, conclude, then write');
+  const w = webSummaries(h.events);
+  assert.strictEqual(w.length, 1, 'the two fetches collapse to one web summary');
+  const iWeb = h.events.findIndex((e) => e.kind === 'commit' && / web /.test(e.line) && /source/.test(e.line));
+  const iNarration = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'Both pages confirm the API shape.');
+  assert.ok(iWeb >= 0 && iNarration >= 0, 'web summary and narration present');
+  assert.ok(iWeb < iNarration, 'the web summary commits ABOVE the content-bearing narration');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (g) WHITESPACE-only interim narration does NOT flush (silent collapse preserved
+//     even when pure streaming artifacts arrive between reads).
+// ───────────────────────────────────────────────────────────────────────────
+test('(g) whitespace-only interim narration does NOT flush — silent collapse preserved', async () => {
+  const h = harness({ showThink: true });
+  let commitsAfterWhitespace = -1;
+  h.setScenario(async (cb) => {
+    fileOp(cb, 'read', '/a.js');
+    fileOp(cb, 'read', '/b.js');
+    // Pure streaming artifacts between reads — whitespace tokens + a whitespace
+    // finalize. NONE of these may flush the open group (token.trim() is empty so
+    // the streaming-start gate is skipped; the onAssistantMessage gate sees no
+    // content). The group stays open and uncommitted.
+    cb.onToken('   ');
+    cb.onToken('\n');
+    cb.onAssistantMessage('   ', { terminal: false });
+    commitsAfterWhitespace = commits(h.events).length;   // snapshot: must be 0 — nothing flushed
+    fileOp(cb, 'read', '/c.js');
+    cb.onAssistantMessage('Read all three.', { terminal: true });   // terminal → the single flush
+  });
+  await h.handler('whitespace artifacts between reads');
+  assert.strictEqual(commitsAfterWhitespace, 0, 'whitespace interim narration committed NOTHING — the group was not flushed');
+  const s = fileSummaries(h.events);
+  assert.strictEqual(s.length, 1, 'whitespace interim narration did NOT split the group');
+  assert.match(s[0].line, /explored ×3/, 'all three reads collapsed into ONE summary');
+});

package/test/permission-flush.test.js ADDED Viewed

@@ -0,0 +1,302 @@
+'use strict';
+// Permission-prompt flush — open file/web activity groups must be committed to
+// scrollback when a permission-gated (effectful) tool triggers a prompt, NOT
+// left rendering LIVE in the writer's activity region beside the modal.
+//
+// Root cause this guards: the agent loop asks permission BEFORE onToolStart
+// (agent.js), so onToolStart's "flush the other group before this non-groupable
+// op" step is sequenced AFTER the modal and cannot fire while it is open. The fix
+// adds an unconditional flush of both trackers at the TOP of the onPermissionAsk
+// handler (chat-turn.js). This is safe because groupable tools (read_file /
+// list_dir) are read-only with a NULL permission descriptor, so onPermissionAsk
+// never fires for them — by the time it fires the prompting tool is non-groupable.
+//
+// Tests drive the REAL createTurnHandler callbacks (same harness shape as
+// file-activity.test.js / web-activity-ordering.test.js), simulating the
+// loop's onPermissionAsk → (grant ⇒ onToolStart/onToolEnd | deny ⇒ nothing)
+// sequence by hand.
+const { test } = require('node:test');
+const assert = require('node:assert');
+// Stable colour env for byte comparisons (node:test isolates each file's process).
+process.stdout.isTTY = true;
+delete process.env.NO_COLOR;
+const { stripAnsi } = require('../lib/ui/utils');
+const { createTurnHandler } = require('../lib/commands/chat-turn');
+const { TOOL_REGISTRY } = require('../lib/tool_registry');
+// ── Live harness: drive the real createTurnHandler callbacks ──────────────────
+// Mirrors file-activity.test.js's harness. Records every committed line in one
+// ordered log so we can assert flush ORDERING (group above the prompting tool).
+function harness(opts) {
+  const events = [];
+  const writerModule = {
+    startActivity() {}, updateActivity() {},
+    endActivity(id, line) {
+      for (const raw of String(line == null ? '' : line).split('\n')) {
+        if (raw === '') continue;
+        events.push({ kind: 'commit', line: stripAnsi(raw) });
+      }
+    },
+    scrollback(line) { events.push({ kind: 'scrollback', line: stripAnsi(String(line)) }); },
+  };
+  const chatHistory = {
+    addMessage(m) { if (m && m.isError) events.push({ kind: 'error-body', output: m.output }); },
+    streamToken() {}, clearStreamingContent() {},
+    deferToolOutput() {}, commitDeferredDetail() {},
+    finalizeLastMessage(content) { if (content && content.trim()) events.push({ kind: 'answer', content }); },
+  };
+  const statusBar = { update() {}, onToken() {}, addPendingTokens() {}, updateMetrics() {}, setCost() {} };
+  const inputField = { on() {}, removeListener() {}, releaseNavigation() {}, setDisabled() {} };
+  let scenario = async () => {};
+  const runAgentLoop = async (messages, model, maxIter, limit, loopOpts) => {
+    await scenario(loopOpts.callbacks);
+    return { messages, metrics: { turns: [] }, withheldActions: [] };
+  };
+  const ctx = {
+    inputField, statusBar, chatHistory, writerModule, runAgentLoop,
+    getConfig: () => ({ auth_token: 'tok', max_iterations: 50, show_cost: false, system_prompt_mode: 'system_role' }),
+    approxTokens: () => 0,
+    resolveCommand: () => null,
+    opts: {},
+    TAG_REGISTRY: {},
+    collapseListMsg() {}, handlePendingSelection() {}, showPendingStep() {},
+    activateNavCapture() {}, finalizeListMsg() {},
+    createChatIfNeeded: async () => {}, saveTurnToDashboard: async () => {}, saveSession() {},
+    messages: [], currentModel: 'm', debugMode: (opts && opts.debugMode) || false, pendingImages: [],
+    chatSync: async () => '', resolvedSystemPrompt: '', resolvedTokenLimit: null, planMode: false,
+  };
+  const handler = createTurnHandler(ctx, {});
+  return { events, handler, setScenario: (fn) => { scenario = fn; } };
+}
+// One fully-successful groupable file op (read / list_dir).
+function fileOp(cb, tag, path, bytes) {
+  cb.onToolStart(tag, path, { id: `${tag}-${path}`, attrs: { path } });
+  cb.onToolEnd(tag, 'contents', 5, { id: `${tag}-${path}`, attrs: { path }, meta: { bytes: bytes || 10 }, error: null });
+}
+// One fully-successful web op (http_get) — leaves the web group OPEN (it only
+// flushes on a non-web tool start, terminal narration, or turn end).
+function webOp(cb, url) {
+  cb.onToolStart('http_get', url, { id: `g-${url}`, attrs: { url } });
+  cb.onToolEnd('http_get', {}, 120, { id: `g-${url}`, attrs: { url }, meta: { status_code: 200, bytes: 1000 }, error: null });
+}
+const commits = (events) => events.filter((e) => e.kind === 'commit');
+const fileSummaries = (events) => commits(events).filter((e) => /file .* explored ×\d+/.test(e.line));
+const webSummaries = (events) => commits(events).filter((e) => / web /.test(e.line) && /source/.test(e.line));
+// ───────────────────────────────────────────────────────────────────────────
+// (a) 2-read group (below threshold) + permission-gated write_file → the group
+//     flushes as TWO individual lines at onPermissionAsk, BEFORE the prompt; no
+//     stale live group remains during the modal. (grant path)
+// ───────────────────────────────────────────────────────────────────────────
+test('(a) <3 file group flushes as individual lines at onPermissionAsk, above the prompting tool', async () => {
+  const h = harness();
+  let commitsAtAsk = -1;
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    fileOp(cb, 'read', '/a.js');
+    fileOp(cb, 'read', '/b.js');
+    // Effectful tool triggers a permission prompt — fires BEFORE onToolStart.
+    cb.onPermissionAsk('write_file', '/out.js');
+    commitsAtAsk = commits(h.events).length;            // snapshot at the ask
+    // Grant → the tool now starts and ends.
+    cb.onToolStart('write_file', '/out.js', { id: 'w1', attrs: { path: '/out.js' } });
+    cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/out.js' }, meta: { bytes: 3 }, error: null });
+    cb.onAssistantMessage('done');
+  });
+  await h.handler('two reads then a write');
+  // The two reads were committed at the moment the prompt opened — not stranded.
+  assert.strictEqual(commitsAtAsk, 2, 'both read lines committed AT onPermissionAsk, before the modal');
+  assert.strictEqual(fileSummaries(h.events).length, 0, 'a 2-op group stays individual lines (no summary)');
+  const reads = commits(h.events).filter((e) => /read \//.test(e.line));
+  assert.strictEqual(reads.length, 2, 'two individual read lines');
+  // Ordering: the read lines land ABOVE the write_file line.
+  const iLastRead = h.events.map((e) => e).reduce((acc, e, i) => (e.kind === 'commit' && /read \//.test(e.line) ? i : acc), -1);
+  const iWrite = h.events.findIndex((e) => e.kind === 'commit' && /out\.js/.test(e.line) && !/read/.test(e.line));
+  assert.ok(iLastRead >= 0 && iWrite >= 0 && iLastRead < iWrite, 'read group commits ABOVE the write_file row');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (b) ≥3-read group + permission-gated write_file → the group flushes as ONE
+//     summary at onPermissionAsk, BEFORE the prompt, above the tool row.
+// ───────────────────────────────────────────────────────────────────────────
+test('(b) ≥3 file group flushes as a summary at onPermissionAsk, above the prompting tool', async () => {
+  const h = harness();
+  let summariesAtAsk = -1;
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    for (let i = 0; i < 3; i++) fileOp(cb, 'read', `/r${i}.js`);
+    cb.onPermissionAsk('write_file', '/out.js');
+    summariesAtAsk = fileSummaries(h.events).length;     // snapshot at the ask
+    cb.onToolStart('write_file', '/out.js', { id: 'w1', attrs: { path: '/out.js' } });
+    cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/out.js' }, meta: { bytes: 3 }, error: null });
+    cb.onAssistantMessage('done');
+  });
+  await h.handler('three reads then a write');
+  assert.strictEqual(summariesAtAsk, 1, 'the summary committed AT onPermissionAsk');
+  const s = fileSummaries(h.events);
+  assert.strictEqual(s.length, 1, 'exactly one summary overall');
+  assert.match(s[0].line, /explored ×3/, 'collapsed explored ×3 summary');
+  const iSummary = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
+  const iWrite = h.events.findIndex((e) => e.kind === 'commit' && /out\.js/.test(e.line) && !/read/.test(e.line));
+  assert.ok(iSummary >= 0 && iWrite >= 0 && iSummary < iWrite, 'summary lands ABOVE the write_file row');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (c) open WEB group + permission-gated write_file → the web group flushes at
+//     onPermissionAsk (the IDENTICAL latent gap on the web tracker).
+// ───────────────────────────────────────────────────────────────────────────
+test('(c) open web group flushes at onPermissionAsk, above the prompting tool', async () => {
+  const h = harness();
+  let webAtAsk = -1;
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    webOp(cb, 'https://x.example');                      // web group left OPEN
+    cb.onPermissionAsk('write_file', '/out.js');
+    webAtAsk = webSummaries(h.events).length;            // snapshot at the ask
+    cb.onToolStart('write_file', '/out.js', { id: 'w1', attrs: { path: '/out.js' } });
+    cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/out.js' }, meta: { bytes: 3 }, error: null });
+    cb.onAssistantMessage('done');
+  });
+  await h.handler('a fetch then a write');
+  assert.strictEqual(webAtAsk, 1, 'the web summary committed AT onPermissionAsk (latent web gap fixed)');
+  const w = webSummaries(h.events);
+  assert.strictEqual(w.length, 1, 'exactly one web summary');
+  const iWeb = h.events.findIndex((e) => e.kind === 'commit' && / web /.test(e.line) && /source/.test(e.line));
+  const iWrite = h.events.findIndex((e) => e.kind === 'commit' && /out\.js/.test(e.line) && !/web/.test(e.line));
+  assert.ok(iWeb >= 0 && iWrite >= 0 && iWeb < iWrite, 'web summary lands ABOVE the write_file row');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (d) DENIAL path — onToolStart never runs. The group must still be flushed at
+//     onPermissionAsk, not stranded live until the turn-end finally.
+// ───────────────────────────────────────────────────────────────────────────
+test('(d) denial path: the group is flushed at onPermissionAsk, not stranded until the finally', async () => {
+  const h = harness();
+  let commitsAtAsk = -1;
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    for (let i = 0; i < 3; i++) fileOp(cb, 'read', `/r${i}.js`);
+    cb.onPermissionAsk('write_file', '/out.js');
+    commitsAtAsk = fileSummaries(h.events).length;       // snapshot at the ask
+    // DENY: agent.js breaks the loop — NO onToolStart, NO onToolEnd for the tool.
+    cb.onAssistantMessage('I was denied, stopping.');
+  });
+  await h.handler('three reads then a denied write');
+  assert.strictEqual(commitsAtAsk, 1, 'the read group was committed AT onPermissionAsk, before deny — not stranded');
+  // And there is exactly one summary in total (the finally flush is a no-op).
+  assert.strictEqual(fileSummaries(h.events).length, 1, 'still exactly one summary after the finally');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (e) DOUBLE-FLUSH guard — onPermissionAsk flush, then the post-grant onToolStart
+//     flush, then the turn-end finally flush all call flush(); the group must
+//     commit EXACTLY ONCE (idempotent isOpen()/groupId===null guard).
+// ───────────────────────────────────────────────────────────────────────────
+test('(e) double-flush guard: onPermissionAsk + onToolStart + finally → exactly one commit', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    for (let i = 0; i < 3; i++) fileOp(cb, 'read', `/r${i}.js`);     // ≥3 → one summary line
+    cb.onPermissionAsk('write_file', '/out.js');                     // flush #1 (commits)
+    cb.onToolStart('write_file', '/out.js', { id: 'w1', attrs: { path: '/out.js' } }); // flush #2 (no-op)
+    cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/out.js' }, meta: { bytes: 3 }, error: null });
+    cb.onAssistantMessage('done');                                   // finally flush (no-op)
+  });
+  await h.handler('idempotent double flush');
+  const s = fileSummaries(h.events);
+  assert.strictEqual(s.length, 1, 'the group committed EXACTLY once despite three flush() calls');
+  assert.match(s[0].line, /explored ×3/);
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (f) INTENTIONAL BEHAVIOR CHANGE (Option b — "fix: flush activity groups before
+//     content-bearing narration for correct ordering").
+//
+//     PREVIOUSLY this test asserted that content-bearing INTERMEDIATE narration
+//     ("Reading a couple more.") did NOT split the group, collapsing all four
+//     reads into one "explored ×4" summary. That ordering was chronologically
+//     WRONG: the narration committed to scrollback ABOVE a still-open group, so
+//     the group's summary later landed BELOW the conclusion it was based on.
+//
+//     NEW behavior: any content-bearing intermediate narration flushes the open
+//     group FIRST, so each sub-group commits ABOVE its narration. A chatty
+//     multi-read run therefore FRAGMENTS into correctly-ordered sub-groups
+//     ("explored ×3" / narration / "explored ×3") instead of one "explored ×6".
+//     This is the deliberate Option-(b) tradeoff — each fragment is chronologically
+//     truthful. (Silent runs with empty interim narration STILL fully collapse —
+//     see narration-ordering.test.js case (b)/(g).)
+//
+//     Uses 3 reads per fragment so each crosses GROUP_THRESHOLD and emits a
+//     summary line (a <3 fragment would render individual per-op lines instead).
+// ───────────────────────────────────────────────────────────────────────────
+test('(f) content-bearing interim narration FRAGMENTS the read run into correctly-ordered sub-groups', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');                                  // empty pre-tool narration — must NOT flush
+    fileOp(cb, 'read', '/i1a.js');
+    fileOp(cb, 'read', '/i1b.js');
+    fileOp(cb, 'read', '/i1c.js');
+    cb.onAssistantMessage('Reading a couple more.', { terminal: false }); // content-bearing → FLUSHES group #1
+    fileOp(cb, 'read', '/i2a.js');
+    fileOp(cb, 'read', '/i2b.js');
+    fileOp(cb, 'read', '/i2c.js');
+    cb.onAssistantMessage('All read.', { terminal: true });     // terminal → flushes group #2
+    // onPermissionAsk is intentionally never called for this read-only run.
+  });
+  await h.handler('multi-iteration reads, content-bearing interim narration');
+  const s = fileSummaries(h.events);
+  assert.strictEqual(s.length, 2, 'content-bearing interim narration split the run into TWO summaries');
+  assert.match(s[0].line, /explored ×3/, 'first fragment: the three reads before the interim narration');
+  assert.match(s[1].line, /explored ×3/, 'second fragment: the three reads after it');
+  // Ordering: each summary lands ABOVE its narration (the Option-(b) guarantee).
+  const iSum1 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
+  const iNarr1 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'Reading a couple more.');
+  const iSum2 = h.events.findIndex((e, idx) => idx > iSum1 && e.kind === 'commit' && /explored ×3/.test(e.line));
+  const iNarr2 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'All read.');
+  assert.ok(iSum1 >= 0 && iNarr1 > iSum1, 'group #1 commits ABOVE the interim narration');
+  assert.ok(iSum2 > iNarr1 && iNarr2 > iSum2, 'group #2 commits below the interim narration and ABOVE the terminal answer');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (g) read_file / list_dir have NULL permission descriptors → onPermissionAsk is
+//     never invoked for them, so the unconditional flush can never wrongly break
+//     an in-progress read/list group. (Groupable ⇒ null descriptor invariant.)
+// ───────────────────────────────────────────────────────────────────────────
+test('(g) read_file and list_dir have null permission descriptors (groupable ⇒ never reaches onPermissionAsk)', async () => {
+  const byTag = (t) => TOOL_REGISTRY.find((e) => Array.isArray(e.tags) && e.tags.includes(t));
+  const readEntry = byTag('read_file');
+  const listEntry = byTag('list_dir');
+  const writeEntry = byTag('write_file');
+  assert.ok(readEntry && typeof readEntry.permission === 'function', 'read_file entry present with a permission fn');
+  assert.ok(listEntry && typeof listEntry.permission === 'function', 'list_dir entry present with a permission fn');
+  assert.ok(writeEntry && typeof writeEntry.permission === 'function', 'write_file entry present with a permission fn');
+  // Groupable read-only tools: null descriptor → the loop's askGate is false →
+  // onPermissionAsk is NOT invoked for them.
+  assert.strictEqual(readEntry.permission({}, ['/a.js']), null, 'read_file descriptor is null');
+  assert.strictEqual(listEntry.permission({}, ['/d']), null, 'list_dir descriptor is null');
+  // Contrast: write_file (the prompting tool above) returns a NON-null descriptor.
+  // (_uiActive:true skips the headless diff branch, which would touch ctx.writer.)
+  const writeDesc = await writeEntry.permission({ _uiActive: true }, ['/out.js', 'x']);
+  assert.ok(writeDesc && typeof writeDesc === 'object' && writeDesc.tag === 'write_file',
+    'write_file returns a non-null permission descriptor');
+});