@semalt-ai/code 1.20.0 → 1.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -127,7 +127,8 @@ function createChatSession(ctx) {
127
127
  // its own per-op line via the SAME `_display` render the live path uses —
128
128
  // byte-identical to a fresh per-op commit. read_file and list_dir share ONE
129
129
  // group (mirroring the live merged key): a mixed run re-groups into the same
130
- // single summary, with fileSummaryState picking the homogeneous-vs-mixed verb.
130
+ // single summary, rendered with the single "explored ×N" verb (fileSummaryState
131
+ // no longer branches on composition).
131
132
  let fileBuf = []; // [{ core, ts }]
132
133
  function flushFile() {
133
134
  if (!fileBuf.length) return;
@@ -254,6 +254,23 @@ function createTurnHandler(ctx, slashHandlers) {
254
254
  statusBar.update('streaming', 'Streaming response');
255
255
  },
256
256
  onPermissionAsk: (tag, input) => {
257
+ // Flush any open file/web activity group BEFORE the permission picker
258
+ // opens. The permission gate fires ahead of onToolStart (agent.js — the
259
+ // "ask before onToolStart" comment), so the non-groupable flush that
260
+ // onToolStart normally performs (below) is sequenced AFTER the modal and
261
+ // can't fire while it's open — leaving an open group stranded LIVE in the
262
+ // writer's activity region beside the prompt for the modal's whole life.
263
+ // Flush here so the group commits to scrollback ABOVE the prompt instead.
264
+ // This is safe to do unconditionally: groupable tools (read_file/list_dir)
265
+ // are read-only with a NULL permission descriptor, so onPermissionAsk
266
+ // NEVER fires for them — by the time we get here the prompting tool is by
267
+ // definition non-groupable, exactly the case onToolStart already flushes.
268
+ // flush() is idempotent (isOpen()/groupId===null guard), so the later
269
+ // onToolStart flush, the turn-end finally flush, or the deny path all
270
+ // become no-ops — no double commit. Covers the deny case too: a denied
271
+ // tool's group is committed here rather than stranded until the finally.
272
+ if (webTracker.isOpen()) webTracker.flush();
273
+ if (fileTracker.isOpen()) fileTracker.flush();
257
274
  // Status-bar update fires while the permission picker is open so
258
275
  // the user can see what's pending in the side label, not just
259
276
  // inside the modal. Mirrors the labels onToolStart uses post-grant
@@ -496,6 +513,28 @@ function createTurnHandler(ctx, slashHandlers) {
496
513
  // branch opened the gate (eager-open or showThink). The StreamParser emits
497
514
  // these verbatim, so once the gate is open they would otherwise stream live.
498
515
  if (ORPHAN_CLOSE_TAG_RE.test(token.trim())) return;
516
+ // Ordering fix (Option b) — flush any open file/web activity group BEFORE
517
+ // the FIRST content-bearing narration token commits to scrollback. Streamed
518
+ // narration commits INCREMENTALLY: streamToken() emits the "▸ AI-agent"
519
+ // header and each complete line to immutable scrollback (chat-history.js)
520
+ // BEFORE onAssistantMessage/finalizeLastMessage ever fires. So flushing only
521
+ // at onAssistantMessage is too LATE for the streamed path — the narration
522
+ // lines are already above a still-open group, which then commits BELOW the
523
+ // conclusion it's based on (the "list ×3 below 'directory almost empty'"
524
+ // bug). Flushing here, at streaming-start, guarantees the group's summary
525
+ // commits ABOVE the first visible narration line.
526
+ //
527
+ // Gate strictly: only when the stream has NOT yet started (so we flush once,
528
+ // before the header) AND this token carries non-whitespace content — pure
529
+ // whitespace streaming artifacts in a silent read,read,read run must NOT
530
+ // flush, so such runs still collapse to one "explored ×N". flush() is
531
+ // idempotent (groupId===null guard), so the later onAssistantMessage,
532
+ // onToolStart, onPermissionAsk, and turn-end finally flushes all no-op —
533
+ // exactly one commit.
534
+ if (token && token.trim() && !chatHistory.isStreaming?.()) {
535
+ if (webTracker.isOpen()) webTracker.flush();
536
+ if (fileTracker.isOpen()) fileTracker.flush();
537
+ }
499
538
  chatHistory.streamToken(token);
500
539
  statusBar.onToken();
501
540
  },
@@ -516,26 +555,31 @@ function createTurnHandler(ctx, slashHandlers) {
516
555
  const terminal = meta && typeof meta.terminal === 'boolean'
517
556
  ? meta.terminal
518
557
  : !!(cleanContent && cleanContent.trim());
519
- // Web-activity ordering (W.3 regression fix): commit any still-open web
520
- // group BEFORE the answer is finalized, so the collapsed "✓ web · …"
521
- // summary lands ABOVE the answer in scrollback (pre-W.3 ordering).
558
+ // Ordering fix (Option b) commit any still-open file/web activity group
559
+ // BEFORE the answer is finalized, so the collapsed summary lands ABOVE the
560
+ // narration in scrollback (correct chronological ordering: a conclusion has
561
+ // the group it's based on committed above it).
562
+ //
563
+ // Flush on the TERMINAL signal (the final no-tool answer) OR on any
564
+ // CONTENT-BEARING intermediate narration. This is the deliberate Option-(b)
565
+ // tradeoff: an intermediate narration that carries visible content now
566
+ // flushes the open group, so a "chatty" multi-read run FRAGMENTS into
567
+ // correctly-ordered sub-groups (each "explored ×N" above its narration)
568
+ // rather than collapsing across a conclusion that was based on it. A SILENT
569
+ // multi-read run (empty/whitespace-only interim narration — pure streaming
570
+ // artifacts) does NOT flush, so it still collapses fully to one summary.
522
571
  //
523
- // Guard on the TERMINAL signal (no tool calls this iteration). Intermediate
524
- // web-tool iterations are non-terminal they keep the group open so a
525
- // multi-step search→fetch still collapses into a single line (the W.3
526
- // guarantee). Pre-live-narration this used "cleanContent is empty" as the
527
- // proxy for intermediate; now intermediate iterations carry narration too,
528
- // so we rely on the explicit `terminal` flag instead — otherwise an
529
- // intermediate narration would flush the group early and split the line.
530
- // Empty/interrupted turns (no terminal message ever arrives) fall back to
531
- // the turn-end `finally` flush, which is the safety net.
532
- if (terminal && webTracker.isOpen()) {
572
+ // For the STREAMED path the open group is already committed above by the
573
+ // onToken streaming-start flush; this is the backstop for the non-streaming
574
+ // / finalize-only path (and the direct-callback unit tests). flush() is
575
+ // idempotent, so when both fire only one commit results. Empty/interrupted
576
+ // turns (no terminal message, no content) fall back to the turn-end
577
+ // `finally` flush, which is the safety net.
578
+ const contentful = !!(cleanContent && cleanContent.trim());
579
+ if ((terminal || contentful) && webTracker.isOpen()) {
533
580
  webTracker.flush();
534
581
  }
535
- // Same terminal-gating for the file group: only the explicit terminal
536
- // signal flushes, so intermediate-iteration narration does NOT split a
537
- // multi-iteration read run — it still collapses to one summary.
538
- if (terminal && fileTracker.isOpen()) {
582
+ if ((terminal || contentful) && fileTracker.isOpen()) {
539
583
  fileTracker.flush();
540
584
  }
541
585
  chatHistory.finalizeLastMessage(cleanContent);
@@ -9,7 +9,7 @@
9
9
  // collapses a run of CONSECUTIVE same-type file ops into a SINGLE compact
10
10
  // process-summary line —
11
11
  //
12
- // ✓ file · read ×10 (index.html, battlecity.js, …)
12
+ // ✓ file · explored ×10 (index.html, battlecity.js, …)
13
13
  //
14
14
  // — exactly the way `web-activity.js` collapses web_search/http_get. It is a
15
15
  // parallel, independent instance of `createWebActivityTracker`; the web tracker
@@ -24,9 +24,9 @@
24
24
  // scrollback model:
25
25
  // • GROUP KEY = a single shared key for BOTH read_file and list_dir, so a
26
26
  // mixed read/list exploration phase collapses into ONE summary instead of
27
- // fragmenting on every read↔list switch. A homogeneous run keeps its specific
28
- // verb ("read ×N" / "list ×N"); a genuinely mixed run uses the neutral "file
29
- // ×N". Any OTHER tool still breaks the run. The web tracker has a single key.
27
+ // fragmenting on every read↔list switch. Every group renders the SAME verb
28
+ // ("explored ×N", live "exploring… ×N") regardless of composition. Any OTHER
29
+ // tool still breaks the run. The web tracker has a single key.
30
30
  // • THRESHOLD decided at flush time. A group of 1–2 ops commits each op as its
31
31
  // own normal result line (byte-identical to today); a group of 3+ commits ONE
32
32
  // summary line. The web tracker always collapses. We can't retroactively pull
@@ -81,21 +81,14 @@ function _basename(p) {
81
81
 
82
82
  // Pure: fold a list of file ops (ToolOperation descriptors OR persisted cores —
83
83
  // both expose `tag`/`target`) into the fields the summary needs. read_file and
84
- // list_dir now share one group, so a group may be MIXED. The verb reflects the
85
- // group's composition: homogeneous reads → "read"/"reading…", homogeneous lists
86
- // "list"/"listing…", a genuinely mixed group the neutral "file"/"accessing…".
84
+ // list_dir share one group, so a group may be MIXED but the verb is a SINGLE
85
+ // "explored"/"exploring…" regardless of composition (read-only, list-only, and
86
+ // mixed all read the same). No more homogeneous-vs-mixed branching.
87
87
  function fileSummaryState(ops) {
88
88
  const list = (ops || []).filter(Boolean);
89
- let hasRead = false, hasList = false;
90
- for (const o of list) {
91
- if (normalizeFileTag(o.tag) === 'list_dir') hasList = true;
92
- else hasRead = true;
93
- }
94
- const mixed = hasRead && hasList;
95
- const isList = hasList && !hasRead;
96
89
  return {
97
- verb: mixed ? 'file' : (isList ? 'list' : 'read'),
98
- gerund: mixed ? 'accessing…' : (isList ? 'listing…' : 'reading…'),
90
+ verb: 'explored',
91
+ gerund: 'exploring…',
99
92
  count: list.length,
100
93
  basenames: list.map((o) => _basename(o.target)),
101
94
  };
@@ -172,8 +165,8 @@ function createFileActivityTracker(deps) {
172
165
  // share one key, so a read↔list switch does NOT flush — both accumulate into
173
166
  // the same group (the key only changes for a different category, which never
174
167
  // reaches here). The live row is a growing web-style aggregate: "● file ·
175
- // reading… ×N (a, b, …)" (or "accessing… ×N" once mixed). `input` is the op's
176
- // path (used for the live basename).
168
+ // exploring… ×N (a, b, …)". `input` is the op's path (used for the live
169
+ // basename).
177
170
  start(tag, input) {
178
171
  const key = fileGroupKey(tag);
179
172
  if (groupId !== null && key !== currentKey) api.flush();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@semalt-ai/code",
3
- "version": "1.20.0",
3
+ "version": "1.20.1",
4
4
  "description": "Self-hosted AI Coding Assistant CLI",
5
5
  "main": "./lib/sdk.js",
6
6
  "//exports": "Two-tier embedding surface (Task 5.2): '.' is the STABLE createAgent facade; './internals' is the UNSTABLE building blocks (no semver guarantee). The boundary is enforced here, not just in docs. Works for both require() and import.",
@@ -3,8 +3,8 @@
3
3
  // File-activity grouping — a SECOND INSTANCE of the web-activity collapser for
4
4
  // consecutive pure file reads/lists (read_file / list_dir). read_file and
5
5
  // list_dir share ONE group key, so a mixed read/list exploration phase collapses
6
- // into a single summary (neutral "file ×N" verb) instead of fragmenting; a
7
- // homogeneous run keeps its specific "read ×N" / "list ×N" verb. Covers the live
6
+ // into a single summary instead of fragmenting; EVERY group (read-only,
7
+ // list-only, or mixed) renders the same single "explored ×N" verb. Covers the live
8
8
  // flush sites (driven through the REAL createTurnHandler callbacks, exactly as
9
9
  // web-activity-ordering.test.js does for web ops), the flush-time THRESHOLD
10
10
  // (1–2 individual lines, 3+ collapsed summary), the merged read/list group, the
@@ -89,12 +89,12 @@ function fileOp(cb, tag, path, bytes) {
89
89
  }
90
90
 
91
91
  const commits = (events) => events.filter((e) => e.kind === 'commit');
92
- // Matches a collapsed file summary for any verb: homogeneous (read/reading,
93
- // list/listing) or the neutral mixed verb (file/accessing).
94
- const summaries = (events) => commits(events).filter((e) => /file .*(read|reading|list|listing|file|accessing) ×\d+/.test(e.line));
92
+ // Matches a collapsed file summary a single "explored ×N" verb for every group
93
+ // composition (read-only, list-only, or mixed).
94
+ const summaries = (events) => commits(events).filter((e) => /file .* explored ×\d+/.test(e.line));
95
95
 
96
96
  // ───────────────────────────────────────────────────────────────────────────
97
- // (a) 10 consecutive read_file ops → ONE "✓ file · read ×10 (…)" summary line.
97
+ // (a) 10 consecutive read_file ops → ONE "✓ file · explored ×10 (…)" summary line.
98
98
  // ───────────────────────────────────────────────────────────────────────────
99
99
  test('(a) 10 reads collapse to ONE summary; ×10 always present; basenames truncated to width', async () => {
100
100
  const prevCols = process.stdout.columns;
@@ -111,7 +111,7 @@ test('(a) 10 reads collapse to ONE summary; ×10 always present; basenames trunc
111
111
 
112
112
  const s = summaries(h.events);
113
113
  assert.strictEqual(s.length, 1, 'exactly one collapsed summary');
114
- assert.match(s[0].line, /file .* read ×10 \(/, 'shows the read verb and the ×10 count');
114
+ assert.match(s[0].line, /file .* explored ×10 \(/, 'shows the explored verb and the ×10 count');
115
115
  assert.ok(s[0].line.includes('…'), 'the basename list is truncated to width');
116
116
  assert.match(s[0].line, /×10/, 'the ×10 count survives truncation (it is in the fixed prefix)');
117
117
  // Single physical row at the render width.
@@ -145,10 +145,10 @@ test('(b) 2 reads commit as two individual lines, no summary', async () => {
145
145
 
146
146
  // ───────────────────────────────────────────────────────────────────────────
147
147
  // (c) reads and lists INTERLEAVED (read, list, read, list, read) → ONE merged
148
- // summary with the neutral "file ×5" verb (was: two separate summaries —
148
+ // summary with the single "explored ×5" verb (was: two separate summaries —
149
149
  // CHANGED by the key-merge: read_file + list_dir now share one group).
150
150
  // ───────────────────────────────────────────────────────────────────────────
151
- test('(c) interleaved reads+lists collapse to ONE merged summary with the neutral verb', async () => {
151
+ test('(c) interleaved reads+lists collapse to ONE merged summary with the explored verb', async () => {
152
152
  const h = harness();
153
153
  h.setScenario(async (cb) => {
154
154
  cb.onAssistantMessage('');
@@ -163,8 +163,8 @@ test('(c) interleaved reads+lists collapse to ONE merged summary with the neutra
163
163
 
164
164
  const s = summaries(h.events);
165
165
  assert.strictEqual(s.length, 1, 'one merged summary — read and list share a group now');
166
- assert.match(s[0].line, /file .* file ×5 \(/, 'mixed group uses the neutral "file ×5" verb');
167
- assert.doesNotMatch(s[0].line, /read ×|list ×/, 'no homogeneous verb for a mixed group');
166
+ assert.match(s[0].line, /file .* explored ×5 \(/, 'mixed group uses the single "explored ×5" verb');
167
+ assert.doesNotMatch(s[0].line, /read ×|list ×|file ×/, 'no read/list/file verb for the merged group');
168
168
  // All five basenames/dirs listed once in the merged summary.
169
169
  for (const b of ['a.js', 'd0', 'b.js', 'd1', 'c.js']) {
170
170
  assert.ok(s[0].line.includes(b), `merged summary lists ${b}`);
@@ -172,9 +172,9 @@ test('(c) interleaved reads+lists collapse to ONE merged summary with the neutra
172
172
  });
173
173
 
174
174
  // ───────────────────────────────────────────────────────────────────────────
175
- // (c2) homogeneous list run (5 list_dir, no reads) → still "list ×5".
175
+ // (c2) list-only run (5 list_dir, no reads) → "explored ×5" (single verb).
176
176
  // ───────────────────────────────────────────────────────────────────────────
177
- test('(c2) 5 list_dir ops only → homogeneous "list ×5" summary (specific verb kept)', async () => {
177
+ test('(c2) 5 list_dir ops only → "explored ×5" summary (single verb, no list branch)', async () => {
178
178
  const h = harness();
179
179
  h.setScenario(async (cb) => {
180
180
  cb.onAssistantMessage('');
@@ -185,8 +185,8 @@ test('(c2) 5 list_dir ops only → homogeneous "list ×5" summary (specific verb
185
185
 
186
186
  const s = summaries(h.events);
187
187
  assert.strictEqual(s.length, 1, 'one summary');
188
- assert.match(s[0].line, /list ×5/, 'homogeneous lists keep the specific "list" verb');
189
- assert.doesNotMatch(s[0].line, /file ×|read ×/, 'no neutral/read verb for an all-list group');
188
+ assert.match(s[0].line, /explored ×5/, 'list-only group uses the single "explored" verb');
189
+ assert.doesNotMatch(s[0].line, /file ×|read ×|list ×/, 'no read/list/file verb for an all-list group');
190
190
  });
191
191
 
192
192
  // ───────────────────────────────────────────────────────────────────────────
@@ -214,9 +214,9 @@ test('(c3) a grep between two mixed read/list runs splits them into two summarie
214
214
 
215
215
  const s = summaries(h.events);
216
216
  assert.strictEqual(s.length, 2, 'the grep splits the run into two merged summaries');
217
- assert.match(s[0].line, /file ×3/, 'first mixed group is file ×3');
218
- assert.match(s[1].line, /file ×3/, 'second mixed group is file ×3');
219
- const iS0 = h.events.findIndex((e) => e.kind === 'commit' && /file ×3/.test(e.line));
217
+ assert.match(s[0].line, /explored ×3/, 'first mixed group is explored ×3');
218
+ assert.match(s[1].line, /explored ×3/, 'second mixed group is explored ×3');
219
+ const iS0 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
220
220
  const iGrep = h.events.findIndex((e) => e.kind === 'commit' && /TODO/.test(e.line));
221
221
  assert.ok(iS0 >= 0 && iGrep >= 0 && iS0 < iGrep, 'the first summary lands above the grep line');
222
222
  });
@@ -259,7 +259,7 @@ test('(d) a non-file tool after a read run flushes the summary before its own li
259
259
  });
260
260
  await h.handler('read then shell');
261
261
 
262
- const iSummary = h.events.findIndex((e) => e.kind === 'commit' && /read ×3/.test(e.line));
262
+ const iSummary = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
263
263
  const iShell = h.events.findIndex((e) => e.kind === 'commit' && /ls -la/.test(e.line));
264
264
  assert.ok(iSummary >= 0, 'the read summary committed');
265
265
  assert.ok(iShell >= 0, 'the shell line committed');
@@ -267,7 +267,7 @@ test('(d) a non-file tool after a read run flushes the summary before its own li
267
267
  });
268
268
 
269
269
  // ───────────────────────────────────────────────────────────────────────────
270
- // (e) read run with op #5 erroring → "read ×4" summary, then standalone error +
270
+ // (e) read run with op #5 erroring → "explored ×4" summary, then standalone error +
271
271
  // body, then a fresh group for the subsequent reads.
272
272
  // ───────────────────────────────────────────────────────────────────────────
273
273
  test('(e) a mid-run error flushes the success-group, renders error standalone, then a new group starts', async () => {
@@ -286,13 +286,13 @@ test('(e) a mid-run error flushes the success-group, renders error standalone, t
286
286
 
287
287
  const s = summaries(h.events);
288
288
  assert.strictEqual(s.length, 2, 'the 4 successes and the 3 later successes form two summaries');
289
- assert.match(s[0].line, /read ×4/, 'the errored op did NOT join the group → ×4 not ×5');
290
- assert.match(s[1].line, /read ×3/, 'a new group started after the error');
289
+ assert.match(s[0].line, /explored ×4/, 'the errored op did NOT join the group → ×4 not ×5');
290
+ assert.match(s[1].line, /explored ×3/, 'a new group started after the error');
291
291
 
292
- const iSummary4 = h.events.findIndex((e) => e.kind === 'commit' && /read ×4/.test(e.line));
292
+ const iSummary4 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×4/.test(e.line));
293
293
  const iErrLine = h.events.findIndex((e) => e.kind === 'commit' && /read \/bad\.js/.test(e.line));
294
294
  const iErrBody = h.events.findIndex((e) => e.kind === 'error-body');
295
- const iSummary3 = h.events.findIndex((e) => e.kind === 'commit' && /read ×3/.test(e.line));
295
+ const iSummary3 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
296
296
  assert.ok(iSummary4 >= 0 && iErrLine >= 0 && iErrBody >= 0 && iSummary3 >= 0, 'all four landmarks present');
297
297
  assert.ok(iSummary4 < iErrLine, 'success summary before the error line (never above the reads it followed)');
298
298
  assert.ok(iErrLine < iErrBody, 'error line before its expandable body');
@@ -300,27 +300,47 @@ test('(e) a mid-run error flushes the success-group, renders error standalone, t
300
300
  });
301
301
 
302
302
  // ───────────────────────────────────────────────────────────────────────────
303
- // (f) multi-iteration read run with intermediate narration still ONE summary
304
- // (terminal-flag gating: intermediate narration must NOT split the group).
303
+ // (f) INTENTIONAL BEHAVIOR CHANGE (Option b "fix: flush activity groups before
304
+ // content-bearing narration for correct ordering").
305
+ //
306
+ // PREVIOUSLY this asserted that content-bearing INTERMEDIATE narration did
307
+ // NOT split the group (terminal-flag gating → one "explored ×4"). That left
308
+ // the narration committed ABOVE a still-open group → the summary later landed
309
+ // BELOW the conclusion it was based on.
310
+ //
311
+ // NEW behavior: content-bearing intermediate narration flushes the open group
312
+ // FIRST, so a chatty multi-read run FRAGMENTS into correctly-ordered
313
+ // sub-groups (each "explored ×N" above its narration). Silent runs (empty
314
+ // interim narration) still collapse fully — see narration-ordering.test.js.
315
+ // Three reads per fragment so each crosses the ≥3 summary threshold.
305
316
  // ───────────────────────────────────────────────────────────────────────────
306
- test('(f) intermediate-iteration narration does NOT split a multi-iteration read run', async () => {
317
+ test('(f) content-bearing intermediate narration FRAGMENTS a multi-iteration read run (correctly ordered)', async () => {
307
318
  const h = harness();
308
319
  h.setScenario(async (cb) => {
309
- // iter 1: two reads, then a NON-empty but NON-terminal narration.
320
+ // iter 1: three reads, then a NON-empty NON-terminal narration → flushes #1.
310
321
  cb.onAssistantMessage('');
311
322
  fileOp(cb, 'read', '/i1a.js');
312
323
  fileOp(cb, 'read', '/i1b.js');
324
+ fileOp(cb, 'read', '/i1c.js');
313
325
  cb.onAssistantMessage('Let me read a couple more files.', { terminal: false });
314
- // iter 2: two more reads, then the terminal answer.
326
+ // iter 2: three more reads, then the terminal answer → flushes #2.
315
327
  fileOp(cb, 'read', '/i2a.js');
316
328
  fileOp(cb, 'read', '/i2b.js');
329
+ fileOp(cb, 'read', '/i2c.js');
317
330
  cb.onAssistantMessage('All read.', { terminal: true });
318
331
  });
319
- await h.handler('multi-iteration reads');
332
+ await h.handler('multi-iteration reads with interim narration');
320
333
 
321
334
  const s = summaries(h.events);
322
- assert.strictEqual(s.length, 1, 'the four reads across two iterations collapse to ONE summary');
323
- assert.match(s[0].line, /read ×4/, 'all four reads counted');
335
+ assert.strictEqual(s.length, 2, 'content-bearing interim narration split the run into TWO summaries');
336
+ assert.ok(s.every((e) => /explored ×3/.test(e.line)), 'each fragment is explored ×3');
337
+
338
+ // Ordering: fragment #1 above the interim narration, fragment #2 below it.
339
+ const iSum1 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
340
+ const iNarr1 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'Let me read a couple more files.');
341
+ const iSum2 = h.events.findIndex((e, idx) => idx > iSum1 && e.kind === 'commit' && /explored ×3/.test(e.line));
342
+ assert.ok(iSum1 < iNarr1, 'fragment #1 commits ABOVE the interim narration');
343
+ assert.ok(iNarr1 < iSum2, 'fragment #2 commits below the interim narration');
324
344
  });
325
345
 
326
346
  // ───────────────────────────────────────────────────────────────────────────
@@ -363,7 +383,7 @@ function replayCommits(loadedMessages, cols) {
363
383
 
364
384
  const fileLineOf = (commitsArr) => commitsArr
365
385
  .map((c) => stripAnsi(c))
366
- .filter((c) => /file .* read ×\d+/.test(c));
386
+ .filter((c) => /file .* explored ×\d+/.test(c));
367
387
 
368
388
  test('(g) replay re-groups to the same summary at the replay width; narrower re-truncates; ≥3 threshold applied', () => {
369
389
  const files = Array.from({ length: 6 }, (_, i) => `/proj/module-${i}/index-file-${i}.js`);
@@ -398,7 +418,7 @@ test('(g) replay re-groups to the same summary at the replay width; narrower re-
398
418
 
399
419
  // ───────────────────────────────────────────────────────────────────────────
400
420
  // (g2) replay of a MIXED read/list run re-groups into the SAME single merged
401
- // summary (neutral "file ×N" verb), byte-identical to the live oracle.
421
+ // summary (single "explored ×N" verb), byte-identical to the live oracle.
402
422
  // ───────────────────────────────────────────────────────────────────────────
403
423
  test('(g2) replay of a mixed read/list run → identical merged summary at the replay width', () => {
404
424
  // read, list, read, list, read — interleaved, persisted as native cores.
@@ -418,11 +438,11 @@ test('(g2) replay of a mixed read/list run → identical merged summary at the r
418
438
 
419
439
  const mixedLineOf = (commitsArr) => commitsArr
420
440
  .map((c) => stripAnsi(c))
421
- .filter((c) => /file .* file ×\d+/.test(c));
441
+ .filter((c) => /file .* explored ×\d+/.test(c));
422
442
 
423
443
  const oracle = liveFileSummary(ops, 200);
424
444
  assert.strictEqual(oracle.length, 1, 'live commits one merged summary for the mixed run');
425
- assert.match(stripAnsi(oracle[0]), /file ×5/, 'live oracle uses the neutral verb for the mixed run');
445
+ assert.match(stripAnsi(oracle[0]), /explored ×5/, 'live oracle uses the single explored verb for the mixed run');
426
446
 
427
447
  const replay = mixedLineOf(replayCommits(loaded, 200));
428
448
  assert.strictEqual(replay.length, 1, 'replay commits exactly one merged file summary');
@@ -468,10 +488,10 @@ test('(i) the web tracker is unaffected: a web run alongside a file run still yi
468
488
 
469
489
  const fileS = summaries(h.events);
470
490
  assert.strictEqual(fileS.length, 1, 'one file summary');
471
- assert.match(fileS[0].line, /read ×3/);
491
+ assert.match(fileS[0].line, /explored ×3/);
472
492
  const webS = commits(h.events).filter((e) => / web /.test(e.line) && /source/.test(e.line));
473
493
  assert.strictEqual(webS.length, 1, 'the web tracker still commits its own summary, unaffected');
474
- const iFile = h.events.findIndex((e) => e.kind === 'commit' && /read ×3/.test(e.line));
494
+ const iFile = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
475
495
  const iWeb = h.events.findIndex((e) => e.kind === 'commit' && / web /.test(e.line) && /source/.test(e.line));
476
496
  assert.ok(iFile < iWeb, 'the file summary lands above the web summary it preceded');
477
497
  });
@@ -492,31 +512,31 @@ test('isGroupableFileCore / normalizeFileTag / fileSummaryState predicates', ()
492
512
  assert.ok(!isGroupableFileCore({ v: 1, kind: 'web', tag: 'http_get' }), 'a web core is not a file core');
493
513
  assert.ok(!isGroupableFileCore(null), 'null is tolerated');
494
514
 
495
- // read_file and list_dir normalize to DISTINCT tags (used by fileSummaryState
496
- // to decide the verb) …
515
+ // read_file and list_dir normalize to DISTINCT tags
497
516
  assert.notStrictEqual(normalizeFileTag(readCore.tag), normalizeFileTag(listCore.tag));
498
517
  assert.strictEqual(normalizeFileTag('read'), 'read_file');
499
518
  assert.strictEqual(normalizeFileTag('list_dir'), 'list_dir');
500
- // … but they now share ONE group KEY, so a read↔list switch never flushes.
519
+ // … but they share ONE group KEY, so a read↔list switch never flushes.
501
520
  assert.strictEqual(fileGroupKey('read'), fileGroupKey('list_dir'));
502
521
  assert.strictEqual(fileGroupKey('read_file'), fileGroupKey('list_dir'));
503
522
 
504
- // Homogeneous read group → specific "read" verb.
523
+ // Every group composition the SAME single "explored" / "exploring…" verb,
524
+ // regardless of read-only, list-only, or mixed.
505
525
  const st = fileSummaryState([readCore, readCore]);
506
- assert.strictEqual(st.verb, 'read');
507
- assert.strictEqual(st.gerund, 'reading…');
526
+ assert.strictEqual(st.verb, 'explored');
527
+ assert.strictEqual(st.gerund, 'exploring…');
508
528
  assert.strictEqual(st.count, 2);
509
529
  assert.deepStrictEqual(st.basenames, ['a.js', 'a.js']);
510
530
 
511
- // Homogeneous list group → specific "list" verb.
531
+ // List-only group → still "explored".
512
532
  const stList = fileSummaryState([listCore, listCore]);
513
- assert.strictEqual(stList.verb, 'list');
514
- assert.strictEqual(stList.gerund, 'listing…');
533
+ assert.strictEqual(stList.verb, 'explored');
534
+ assert.strictEqual(stList.gerund, 'exploring…');
515
535
 
516
- // Mixed group → neutral "file" / "accessing…" verb.
536
+ // Mixed group → still "explored" (no composition branching).
517
537
  const stMixed = fileSummaryState([readCore, listCore]);
518
- assert.strictEqual(stMixed.verb, 'file');
519
- assert.strictEqual(stMixed.gerund, 'accessing…');
538
+ assert.strictEqual(stMixed.verb, 'explored');
539
+ assert.strictEqual(stMixed.gerund, 'exploring…');
520
540
  assert.strictEqual(stMixed.count, 2);
521
541
  assert.deepStrictEqual(stMixed.basenames, ['a.js', 'd']);
522
542
  });
@@ -0,0 +1,309 @@
1
+ 'use strict';
2
+
3
+ // Activity-group ordering vs assistant narration (Option b — "fix: flush activity
4
+ // groups before content-bearing narration for correct ordering").
5
+ //
6
+ // THE BUG: assistant narration commits to immutable scrollback immediately
7
+ // (streamed token-by-token via streamToken, finalized via finalizeLastMessage),
8
+ // while an open file/web activity group lives in the redrawable activity region
9
+ // BELOW scrollback and only commits at a later boundary. When narration was
10
+ // INTERMEDIATE (a tool follows), the old terminal-only flush gate was skipped, so
11
+ // the narration committed ABOVE the still-open group and the group flushed later,
12
+ // landing BELOW the conclusion it was based on (the "list ×3 below 'directory
13
+ // almost empty'" screenshot).
14
+ //
15
+ // THE FIX (Option b): flush the open group BEFORE any content-bearing narration.
16
+ // • Streamed path — flush at streaming-START (onToken, before the first
17
+ // content-bearing token commits the "▸ AI-agent" header + line to scrollback).
18
+ // • Finalize path — flush at onAssistantMessage on terminal OR content-bearing
19
+ // narration (backstop for the non-streamed path).
20
+ // • Silent runs (empty/whitespace-only interim narration) still fully collapse.
21
+ //
22
+ // Tests drive the REAL createTurnHandler callbacks (same harness shape as
23
+ // permission-flush.test.js / file-activity.test.js), recording every committed
24
+ // scrollback line and every narration line in ONE ordered log so ordering can be
25
+ // asserted directly.
26
+
27
+ const { test } = require('node:test');
28
+ const assert = require('node:assert');
29
+
30
+ process.stdout.isTTY = true;
31
+ delete process.env.NO_COLOR;
32
+
33
+ const { stripAnsi } = require('../lib/ui/utils');
34
+ const { createTurnHandler } = require('../lib/commands/chat-turn');
35
+
36
+ // ── Live harness ──────────────────────────────────────────────────────────────
37
+ // `turnOpts` becomes ctx.opts. Pass { showThink: true } to disable the
38
+ // implicit-think buffering gate so onToken streams tokens straight to
39
+ // chatHistory.streamToken (exercising the streamed-narration path).
40
+ function harness(turnOpts) {
41
+ const events = [];
42
+ // streaming flag mirrors chat-history's _streamActive: set on the first
43
+ // streamToken, cleared on clear/finalize. isStreaming() drives the
44
+ // streaming-start flush gate in onToken.
45
+ let streaming = false;
46
+
47
+ const writerModule = {
48
+ startActivity() {}, updateActivity() {},
49
+ endActivity(id, line) {
50
+ for (const raw of String(line == null ? '' : line).split('\n')) {
51
+ if (raw === '') continue;
52
+ events.push({ kind: 'commit', line: stripAnsi(raw) });
53
+ }
54
+ },
55
+ scrollback(line) { events.push({ kind: 'scrollback', line: stripAnsi(String(line)) }); },
56
+ };
57
+ const chatHistory = {
58
+ addMessage(m) { if (m && m.isError) events.push({ kind: 'error', output: m.output }); },
59
+ streamToken(t) {
60
+ if (!t) return;
61
+ // Mirror the real per-line commit: the header on the first token, then the
62
+ // token text as a committed narration line. (We don't need byte-fidelity —
63
+ // only that a narration line lands in the ordered log when the model speaks.)
64
+ if (!streaming) { streaming = true; events.push({ kind: 'narration', line: '▸ AI-agent' }); }
65
+ events.push({ kind: 'narration', line: t });
66
+ },
67
+ isStreaming() { return streaming; },
68
+ clearStreamingContent() { streaming = false; },
69
+ deferToolOutput() {}, commitDeferredDetail() {},
70
+ finalizeLastMessage(content) {
71
+ streaming = false;
72
+ if (content && content.trim()) events.push({ kind: 'answer', content });
73
+ },
74
+ };
75
+ const statusBar = { update() {}, onToken() {}, addPendingTokens() {}, updateMetrics() {}, setCost() {} };
76
+ const inputField = { on() {}, removeListener() {}, releaseNavigation() {}, setDisabled() {} };
77
+
78
+ let scenario = async () => {};
79
+ const runAgentLoop = async (messages, model, maxIter, limit, loopOpts) => {
80
+ await scenario(loopOpts.callbacks);
81
+ return { messages, metrics: { turns: [] }, withheldActions: [] };
82
+ };
83
+
84
+ const ctx = {
85
+ inputField, statusBar, chatHistory, writerModule, runAgentLoop,
86
+ getConfig: () => ({ auth_token: 'tok', max_iterations: 50, show_cost: false, system_prompt_mode: 'system_role' }),
87
+ approxTokens: () => 0,
88
+ resolveCommand: () => null,
89
+ opts: turnOpts || {},
90
+ TAG_REGISTRY: {},
91
+ collapseListMsg() {}, handlePendingSelection() {}, showPendingStep() {},
92
+ activateNavCapture() {}, finalizeListMsg() {},
93
+ createChatIfNeeded: async () => {}, saveTurnToDashboard: async () => {}, saveSession() {},
94
+ messages: [], currentModel: 'm', debugMode: false, pendingImages: [],
95
+ chatSync: async () => '', resolvedSystemPrompt: '', resolvedTokenLimit: null, planMode: false,
96
+ };
97
+
98
+ const handler = createTurnHandler(ctx, {});
99
+ return { events, handler, setScenario: (fn) => { scenario = fn; } };
100
+ }
101
+
102
+ // One fully-successful groupable file op (read / list_dir).
103
+ function fileOp(cb, tag, path) {
104
+ cb.onToolStart(tag, path, { id: `${tag}-${path}`, attrs: { path } });
105
+ cb.onToolEnd(tag, 'contents', 5, { id: `${tag}-${path}`, attrs: { path }, meta: { bytes: 10 }, error: null });
106
+ }
107
+
108
+ // One fully-successful web op (http_get) — leaves the web group OPEN.
109
+ function webOp(cb, url) {
110
+ cb.onToolStart('http_get', url, { id: `g-${url}`, attrs: { url } });
111
+ cb.onToolEnd('http_get', {}, 120, { id: `g-${url}`, attrs: { url }, meta: { status_code: 200, bytes: 1000 }, error: null });
112
+ }
113
+
114
+ const commits = (events) => events.filter((e) => e.kind === 'commit');
115
+ const fileSummaries = (events) => commits(events).filter((e) => /file .* explored ×\d+/.test(e.line));
116
+ const webSummaries = (events) => commits(events).filter((e) => / web /.test(e.line) && /source/.test(e.line));
117
+
118
+ // ───────────────────────────────────────────────────────────────────────────
119
+ // (a) The SCREENSHOT scenario: a group of list/read ops, then a content-bearing
120
+ // INTERMEDIATE narration (the conclusion based on them), then a non-groupable
121
+ // tool (write_file). The group must commit ABOVE the narration.
122
+ // ───────────────────────────────────────────────────────────────────────────
123
+ test('(a) group of list ops → intermediate conclusion narration → write_file: group commits ABOVE the narration', async () => {
124
+ const h = harness();
125
+ h.setScenario(async (cb) => {
126
+ cb.onAssistantMessage(''); // empty pre-tool narration — must NOT flush
127
+ fileOp(cb, 'list_dir', '/proj');
128
+ fileOp(cb, 'list_dir', '/proj/src');
129
+ fileOp(cb, 'list_dir', '/proj/test');
130
+ // The conclusion drawn FROM the listings — content-bearing, intermediate (a
131
+ // tool follows). This is what must NOT sit above the listings it summarizes.
132
+ cb.onAssistantMessage('The directory is almost empty.', { terminal: false });
133
+ // A non-groupable effectful tool follows.
134
+ cb.onToolStart('write_file', '/proj/new.js', { id: 'w1', attrs: { path: '/proj/new.js' } });
135
+ cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/proj/new.js' }, meta: { bytes: 3 }, error: null });
136
+ cb.onAssistantMessage('Done.', { terminal: true });
137
+ });
138
+ await h.handler('list a few dirs, conclude, then write');
139
+
140
+ const s = fileSummaries(h.events);
141
+ assert.strictEqual(s.length, 1, 'the three listings collapse to one summary');
142
+ assert.match(s[0].line, /explored ×3/);
143
+
144
+ const iGroup = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
145
+ const iNarration = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'The directory is almost empty.');
146
+ assert.ok(iGroup >= 0 && iNarration >= 0, 'both group and narration present');
147
+ assert.ok(iGroup < iNarration, 'the explored ×3 group commits ABOVE the conclusion narration (the screenshot fix)');
148
+ });
149
+
150
+ // ───────────────────────────────────────────────────────────────────────────
151
+ // (b) SILENT multi-read run — only empty interim narration. Still ONE summary.
152
+ // ───────────────────────────────────────────────────────────────────────────
153
+ test('(b) silent multi-read run (empty interim narration) still collapses to ONE explored ×N', async () => {
154
+ const h = harness();
155
+ h.setScenario(async (cb) => {
156
+ cb.onAssistantMessage('');
157
+ fileOp(cb, 'read', '/a.js');
158
+ fileOp(cb, 'read', '/b.js');
159
+ cb.onAssistantMessage('', { terminal: false }); // silent intermediate — must NOT flush
160
+ fileOp(cb, 'read', '/c.js');
161
+ fileOp(cb, 'read', '/d.js');
162
+ cb.onAssistantMessage('', { terminal: false }); // silent intermediate — must NOT flush
163
+ fileOp(cb, 'read', '/e.js');
164
+ cb.onAssistantMessage('Read everything.', { terminal: true });
165
+ });
166
+ await h.handler('silent multi-read run');
167
+
168
+ const s = fileSummaries(h.events);
169
+ assert.strictEqual(s.length, 1, 'a silent run collapses to exactly ONE summary across all iterations');
170
+ assert.match(s[0].line, /explored ×5/, 'all five reads counted — empty interim narration did not split the group');
171
+ });
172
+
173
+ // ───────────────────────────────────────────────────────────────────────────
174
+ // (c) CHATTY run — content-bearing narration between reads. Fragments into
175
+ // correctly-ordered sub-groups, each ABOVE its narration.
176
+ // ───────────────────────────────────────────────────────────────────────────
177
+ test('(c) chatty run: content-bearing interim narration fragments into correctly-ordered sub-groups', async () => {
178
+ const h = harness();
179
+ h.setScenario(async (cb) => {
180
+ cb.onAssistantMessage('');
181
+ fileOp(cb, 'read', '/a.js');
182
+ fileOp(cb, 'read', '/b.js');
183
+ fileOp(cb, 'read', '/c.js');
184
+ cb.onAssistantMessage('First batch looks fine.', { terminal: false }); // flushes group #1
185
+ fileOp(cb, 'read', '/d.js');
186
+ fileOp(cb, 'read', '/e.js');
187
+ fileOp(cb, 'read', '/f.js');
188
+ cb.onAssistantMessage('Second batch too.', { terminal: true }); // flushes group #2
189
+ });
190
+ await h.handler('chatty multi-read run');
191
+
192
+ const s = fileSummaries(h.events);
193
+ assert.strictEqual(s.length, 2, 'two content-bearing narrations → two fragments');
194
+ assert.ok(s.every((e) => /explored ×3/.test(e.line)), 'each fragment is explored ×3');
195
+
196
+ const iSum1 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
197
+ const iNarr1 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'First batch looks fine.');
198
+ const iSum2 = h.events.findIndex((e, idx) => idx > iSum1 && e.kind === 'commit' && /explored ×3/.test(e.line));
199
+ const iNarr2 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'Second batch too.');
200
+ assert.ok(iSum1 < iNarr1, 'fragment #1 above its narration');
201
+ assert.ok(iNarr1 < iSum2, 'fragment #2 starts after narration #1');
202
+ assert.ok(iSum2 < iNarr2, 'fragment #2 above its narration');
203
+ });
204
+
205
+ // ───────────────────────────────────────────────────────────────────────────
206
+ // (d) STREAMED narration (token path) — the group must commit ABOVE the first
207
+ // streamed narration line. This is the load-bearing case: streamToken commits
208
+ // the header + line to scrollback BEFORE onAssistantMessage, so the flush must
209
+ // happen at streaming-START (onToken), not at onAssistantMessage.
210
+ // showThink:true disables the implicit-think buffer so onToken streams live.
211
+ // ───────────────────────────────────────────────────────────────────────────
212
+ test('(d) streamed content-bearing narration: group commits ABOVE the first narration line', async () => {
213
+ const h = harness({ showThink: true });
214
+ h.setScenario(async (cb) => {
215
+ fileOp(cb, 'list_dir', '/proj');
216
+ fileOp(cb, 'list_dir', '/proj/src');
217
+ fileOp(cb, 'list_dir', '/proj/test');
218
+ // Narration STREAMS in token-by-token (what really happens). The very first
219
+ // content token must flush the open group first.
220
+ for (const tok of ['The ', 'directory ', 'is ', 'almost ', 'empty.', '\n']) cb.onToken(tok);
221
+ cb.onAssistantMessage('The directory is almost empty.', { terminal: true });
222
+ });
223
+ await h.handler('stream a conclusion after listing');
224
+
225
+ const s = fileSummaries(h.events);
226
+ assert.strictEqual(s.length, 1, 'one summary');
227
+ assert.match(s[0].line, /explored ×3/);
228
+
229
+ const iGroup = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
230
+ const iFirstNarration = h.events.findIndex((e) => e.kind === 'narration');
231
+ assert.ok(iGroup >= 0 && iFirstNarration >= 0, 'group and streamed narration both present');
232
+ assert.ok(iGroup < iFirstNarration, 'the group commits ABOVE the FIRST streamed narration line (streaming-start flush)');
233
+ });
234
+
235
+ // ───────────────────────────────────────────────────────────────────────────
236
+ // (e) DOUBLE-FLUSH idempotency — the streaming-start flush, then the
237
+ // onAssistantMessage flush, then the turn-end finally flush all call flush();
238
+ // the group commits EXACTLY once.
239
+ // ───────────────────────────────────────────────────────────────────────────
240
+ test('(e) double-flush idempotency: streaming-start + onAssistantMessage + finally → exactly one commit', async () => {
241
+ const h = harness({ showThink: true });
242
+ h.setScenario(async (cb) => {
243
+ fileOp(cb, 'read', '/a.js');
244
+ fileOp(cb, 'read', '/b.js');
245
+ fileOp(cb, 'read', '/c.js');
246
+ for (const tok of ['All ', 'good.', '\n']) cb.onToken(tok); // flush #1 (streaming-start)
247
+ cb.onAssistantMessage('All good.', { terminal: true }); // flush #2 (no-op) + finally flush (no-op)
248
+ });
249
+ await h.handler('one group, many flush opportunities');
250
+
251
+ const s = fileSummaries(h.events);
252
+ assert.strictEqual(s.length, 1, 'the group committed EXACTLY once despite multiple flush() calls');
253
+ assert.match(s[0].line, /explored ×3/);
254
+ });
255
+
256
+ // ───────────────────────────────────────────────────────────────────────────
257
+ // (f) WEB group parity — the open web group also flushes before content-bearing
258
+ // narration, symmetrically with the file group.
259
+ // ───────────────────────────────────────────────────────────────────────────
260
+ test('(f) web group flushes before content-bearing narration (symmetry with the file group)', async () => {
261
+ const h = harness();
262
+ h.setScenario(async (cb) => {
263
+ cb.onAssistantMessage('');
264
+ webOp(cb, 'https://a.example');
265
+ webOp(cb, 'https://b.example');
266
+ // Content-bearing intermediate narration must flush the web group above it.
267
+ cb.onAssistantMessage('Both pages confirm the API shape.', { terminal: false });
268
+ cb.onToolStart('write_file', '/notes.md', { id: 'w1', attrs: { path: '/notes.md' } });
269
+ cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/notes.md' }, meta: { bytes: 3 }, error: null });
270
+ cb.onAssistantMessage('Done.', { terminal: true });
271
+ });
272
+ await h.handler('fetch two pages, conclude, then write');
273
+
274
+ const w = webSummaries(h.events);
275
+ assert.strictEqual(w.length, 1, 'the two fetches collapse to one web summary');
276
+ const iWeb = h.events.findIndex((e) => e.kind === 'commit' && / web /.test(e.line) && /source/.test(e.line));
277
+ const iNarration = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'Both pages confirm the API shape.');
278
+ assert.ok(iWeb >= 0 && iNarration >= 0, 'web summary and narration present');
279
+ assert.ok(iWeb < iNarration, 'the web summary commits ABOVE the content-bearing narration');
280
+ });
281
+
282
+ // ───────────────────────────────────────────────────────────────────────────
283
+ // (g) WHITESPACE-only interim narration does NOT flush (silent collapse preserved
284
+ // even when pure streaming artifacts arrive between reads).
285
+ // ───────────────────────────────────────────────────────────────────────────
286
+ test('(g) whitespace-only interim narration does NOT flush — silent collapse preserved', async () => {
287
+ const h = harness({ showThink: true });
288
+ let commitsAfterWhitespace = -1;
289
+ h.setScenario(async (cb) => {
290
+ fileOp(cb, 'read', '/a.js');
291
+ fileOp(cb, 'read', '/b.js');
292
+ // Pure streaming artifacts between reads — whitespace tokens + a whitespace
293
+ // finalize. NONE of these may flush the open group (token.trim() is empty so
294
+ // the streaming-start gate is skipped; the onAssistantMessage gate sees no
295
+ // content). The group stays open and uncommitted.
296
+ cb.onToken(' ');
297
+ cb.onToken('\n');
298
+ cb.onAssistantMessage(' ', { terminal: false });
299
+ commitsAfterWhitespace = commits(h.events).length; // snapshot: must be 0 — nothing flushed
300
+ fileOp(cb, 'read', '/c.js');
301
+ cb.onAssistantMessage('Read all three.', { terminal: true }); // terminal → the single flush
302
+ });
303
+ await h.handler('whitespace artifacts between reads');
304
+
305
+ assert.strictEqual(commitsAfterWhitespace, 0, 'whitespace interim narration committed NOTHING — the group was not flushed');
306
+ const s = fileSummaries(h.events);
307
+ assert.strictEqual(s.length, 1, 'whitespace interim narration did NOT split the group');
308
+ assert.match(s[0].line, /explored ×3/, 'all three reads collapsed into ONE summary');
309
+ });
@@ -0,0 +1,302 @@
1
+ 'use strict';
2
+
3
+ // Permission-prompt flush — open file/web activity groups must be committed to
4
+ // scrollback when a permission-gated (effectful) tool triggers a prompt, NOT
5
+ // left rendering LIVE in the writer's activity region beside the modal.
6
+ //
7
+ // Root cause this guards: the agent loop asks permission BEFORE onToolStart
8
+ // (agent.js), so onToolStart's "flush the other group before this non-groupable
9
+ // op" step is sequenced AFTER the modal and cannot fire while it is open. The fix
10
+ // adds an unconditional flush of both trackers at the TOP of the onPermissionAsk
11
+ // handler (chat-turn.js). This is safe because groupable tools (read_file /
12
+ // list_dir) are read-only with a NULL permission descriptor, so onPermissionAsk
13
+ // never fires for them — by the time it fires the prompting tool is non-groupable.
14
+ //
15
+ // Tests drive the REAL createTurnHandler callbacks (same harness shape as
16
+ // file-activity.test.js / web-activity-ordering.test.js), simulating the
17
+ // loop's onPermissionAsk → (grant ⇒ onToolStart/onToolEnd | deny ⇒ nothing)
18
+ // sequence by hand.
19
+
20
+ const { test } = require('node:test');
21
+ const assert = require('node:assert');
22
+
23
+ // Stable colour env for byte comparisons (node:test isolates each file's process).
24
+ process.stdout.isTTY = true;
25
+ delete process.env.NO_COLOR;
26
+
27
+ const { stripAnsi } = require('../lib/ui/utils');
28
+ const { createTurnHandler } = require('../lib/commands/chat-turn');
29
+ const { TOOL_REGISTRY } = require('../lib/tool_registry');
30
+
31
+ // ── Live harness: drive the real createTurnHandler callbacks ──────────────────
32
+ // Mirrors file-activity.test.js's harness. Records every committed line in one
33
+ // ordered log so we can assert flush ORDERING (group above the prompting tool).
34
+ function harness(opts) {
35
+ const events = [];
36
+ const writerModule = {
37
+ startActivity() {}, updateActivity() {},
38
+ endActivity(id, line) {
39
+ for (const raw of String(line == null ? '' : line).split('\n')) {
40
+ if (raw === '') continue;
41
+ events.push({ kind: 'commit', line: stripAnsi(raw) });
42
+ }
43
+ },
44
+ scrollback(line) { events.push({ kind: 'scrollback', line: stripAnsi(String(line)) }); },
45
+ };
46
+ const chatHistory = {
47
+ addMessage(m) { if (m && m.isError) events.push({ kind: 'error-body', output: m.output }); },
48
+ streamToken() {}, clearStreamingContent() {},
49
+ deferToolOutput() {}, commitDeferredDetail() {},
50
+ finalizeLastMessage(content) { if (content && content.trim()) events.push({ kind: 'answer', content }); },
51
+ };
52
+ const statusBar = { update() {}, onToken() {}, addPendingTokens() {}, updateMetrics() {}, setCost() {} };
53
+ const inputField = { on() {}, removeListener() {}, releaseNavigation() {}, setDisabled() {} };
54
+
55
+ let scenario = async () => {};
56
+ const runAgentLoop = async (messages, model, maxIter, limit, loopOpts) => {
57
+ await scenario(loopOpts.callbacks);
58
+ return { messages, metrics: { turns: [] }, withheldActions: [] };
59
+ };
60
+
61
+ const ctx = {
62
+ inputField, statusBar, chatHistory, writerModule, runAgentLoop,
63
+ getConfig: () => ({ auth_token: 'tok', max_iterations: 50, show_cost: false, system_prompt_mode: 'system_role' }),
64
+ approxTokens: () => 0,
65
+ resolveCommand: () => null,
66
+ opts: {},
67
+ TAG_REGISTRY: {},
68
+ collapseListMsg() {}, handlePendingSelection() {}, showPendingStep() {},
69
+ activateNavCapture() {}, finalizeListMsg() {},
70
+ createChatIfNeeded: async () => {}, saveTurnToDashboard: async () => {}, saveSession() {},
71
+ messages: [], currentModel: 'm', debugMode: (opts && opts.debugMode) || false, pendingImages: [],
72
+ chatSync: async () => '', resolvedSystemPrompt: '', resolvedTokenLimit: null, planMode: false,
73
+ };
74
+
75
+ const handler = createTurnHandler(ctx, {});
76
+ return { events, handler, setScenario: (fn) => { scenario = fn; } };
77
+ }
78
+
79
+ // One fully-successful groupable file op (read / list_dir).
80
+ function fileOp(cb, tag, path, bytes) {
81
+ cb.onToolStart(tag, path, { id: `${tag}-${path}`, attrs: { path } });
82
+ cb.onToolEnd(tag, 'contents', 5, { id: `${tag}-${path}`, attrs: { path }, meta: { bytes: bytes || 10 }, error: null });
83
+ }
84
+
85
+ // One fully-successful web op (http_get) — leaves the web group OPEN (it only
86
+ // flushes on a non-web tool start, terminal narration, or turn end).
87
+ function webOp(cb, url) {
88
+ cb.onToolStart('http_get', url, { id: `g-${url}`, attrs: { url } });
89
+ cb.onToolEnd('http_get', {}, 120, { id: `g-${url}`, attrs: { url }, meta: { status_code: 200, bytes: 1000 }, error: null });
90
+ }
91
+
92
+ const commits = (events) => events.filter((e) => e.kind === 'commit');
93
+ const fileSummaries = (events) => commits(events).filter((e) => /file .* explored ×\d+/.test(e.line));
94
+ const webSummaries = (events) => commits(events).filter((e) => / web /.test(e.line) && /source/.test(e.line));
95
+
96
+ // ───────────────────────────────────────────────────────────────────────────
97
+ // (a) 2-read group (below threshold) + permission-gated write_file → the group
98
+ // flushes as TWO individual lines at onPermissionAsk, BEFORE the prompt; no
99
+ // stale live group remains during the modal. (grant path)
100
+ // ───────────────────────────────────────────────────────────────────────────
101
+ test('(a) <3 file group flushes as individual lines at onPermissionAsk, above the prompting tool', async () => {
102
+ const h = harness();
103
+ let commitsAtAsk = -1;
104
+ h.setScenario(async (cb) => {
105
+ cb.onAssistantMessage('');
106
+ fileOp(cb, 'read', '/a.js');
107
+ fileOp(cb, 'read', '/b.js');
108
+ // Effectful tool triggers a permission prompt — fires BEFORE onToolStart.
109
+ cb.onPermissionAsk('write_file', '/out.js');
110
+ commitsAtAsk = commits(h.events).length; // snapshot at the ask
111
+ // Grant → the tool now starts and ends.
112
+ cb.onToolStart('write_file', '/out.js', { id: 'w1', attrs: { path: '/out.js' } });
113
+ cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/out.js' }, meta: { bytes: 3 }, error: null });
114
+ cb.onAssistantMessage('done');
115
+ });
116
+ await h.handler('two reads then a write');
117
+
118
+ // The two reads were committed at the moment the prompt opened — not stranded.
119
+ assert.strictEqual(commitsAtAsk, 2, 'both read lines committed AT onPermissionAsk, before the modal');
120
+ assert.strictEqual(fileSummaries(h.events).length, 0, 'a 2-op group stays individual lines (no summary)');
121
+ const reads = commits(h.events).filter((e) => /read \//.test(e.line));
122
+ assert.strictEqual(reads.length, 2, 'two individual read lines');
123
+ // Ordering: the read lines land ABOVE the write_file line.
124
+ const iLastRead = h.events.map((e) => e).reduce((acc, e, i) => (e.kind === 'commit' && /read \//.test(e.line) ? i : acc), -1);
125
+ const iWrite = h.events.findIndex((e) => e.kind === 'commit' && /out\.js/.test(e.line) && !/read/.test(e.line));
126
+ assert.ok(iLastRead >= 0 && iWrite >= 0 && iLastRead < iWrite, 'read group commits ABOVE the write_file row');
127
+ });
128
+
129
+ // ───────────────────────────────────────────────────────────────────────────
130
+ // (b) ≥3-read group + permission-gated write_file → the group flushes as ONE
131
+ // summary at onPermissionAsk, BEFORE the prompt, above the tool row.
132
+ // ───────────────────────────────────────────────────────────────────────────
133
+ test('(b) ≥3 file group flushes as a summary at onPermissionAsk, above the prompting tool', async () => {
134
+ const h = harness();
135
+ let summariesAtAsk = -1;
136
+ h.setScenario(async (cb) => {
137
+ cb.onAssistantMessage('');
138
+ for (let i = 0; i < 3; i++) fileOp(cb, 'read', `/r${i}.js`);
139
+ cb.onPermissionAsk('write_file', '/out.js');
140
+ summariesAtAsk = fileSummaries(h.events).length; // snapshot at the ask
141
+ cb.onToolStart('write_file', '/out.js', { id: 'w1', attrs: { path: '/out.js' } });
142
+ cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/out.js' }, meta: { bytes: 3 }, error: null });
143
+ cb.onAssistantMessage('done');
144
+ });
145
+ await h.handler('three reads then a write');
146
+
147
+ assert.strictEqual(summariesAtAsk, 1, 'the summary committed AT onPermissionAsk');
148
+ const s = fileSummaries(h.events);
149
+ assert.strictEqual(s.length, 1, 'exactly one summary overall');
150
+ assert.match(s[0].line, /explored ×3/, 'collapsed explored ×3 summary');
151
+ const iSummary = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
152
+ const iWrite = h.events.findIndex((e) => e.kind === 'commit' && /out\.js/.test(e.line) && !/read/.test(e.line));
153
+ assert.ok(iSummary >= 0 && iWrite >= 0 && iSummary < iWrite, 'summary lands ABOVE the write_file row');
154
+ });
155
+
156
+ // ───────────────────────────────────────────────────────────────────────────
157
+ // (c) open WEB group + permission-gated write_file → the web group flushes at
158
+ // onPermissionAsk (the IDENTICAL latent gap on the web tracker).
159
+ // ───────────────────────────────────────────────────────────────────────────
160
+ test('(c) open web group flushes at onPermissionAsk, above the prompting tool', async () => {
161
+ const h = harness();
162
+ let webAtAsk = -1;
163
+ h.setScenario(async (cb) => {
164
+ cb.onAssistantMessage('');
165
+ webOp(cb, 'https://x.example'); // web group left OPEN
166
+ cb.onPermissionAsk('write_file', '/out.js');
167
+ webAtAsk = webSummaries(h.events).length; // snapshot at the ask
168
+ cb.onToolStart('write_file', '/out.js', { id: 'w1', attrs: { path: '/out.js' } });
169
+ cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/out.js' }, meta: { bytes: 3 }, error: null });
170
+ cb.onAssistantMessage('done');
171
+ });
172
+ await h.handler('a fetch then a write');
173
+
174
+ assert.strictEqual(webAtAsk, 1, 'the web summary committed AT onPermissionAsk (latent web gap fixed)');
175
+ const w = webSummaries(h.events);
176
+ assert.strictEqual(w.length, 1, 'exactly one web summary');
177
+ const iWeb = h.events.findIndex((e) => e.kind === 'commit' && / web /.test(e.line) && /source/.test(e.line));
178
+ const iWrite = h.events.findIndex((e) => e.kind === 'commit' && /out\.js/.test(e.line) && !/web/.test(e.line));
179
+ assert.ok(iWeb >= 0 && iWrite >= 0 && iWeb < iWrite, 'web summary lands ABOVE the write_file row');
180
+ });
181
+
182
+ // ───────────────────────────────────────────────────────────────────────────
183
+ // (d) DENIAL path — onToolStart never runs. The group must still be flushed at
184
+ // onPermissionAsk, not stranded live until the turn-end finally.
185
+ // ───────────────────────────────────────────────────────────────────────────
186
+ test('(d) denial path: the group is flushed at onPermissionAsk, not stranded until the finally', async () => {
187
+ const h = harness();
188
+ let commitsAtAsk = -1;
189
+ h.setScenario(async (cb) => {
190
+ cb.onAssistantMessage('');
191
+ for (let i = 0; i < 3; i++) fileOp(cb, 'read', `/r${i}.js`);
192
+ cb.onPermissionAsk('write_file', '/out.js');
193
+ commitsAtAsk = fileSummaries(h.events).length; // snapshot at the ask
194
+ // DENY: agent.js breaks the loop — NO onToolStart, NO onToolEnd for the tool.
195
+ cb.onAssistantMessage('I was denied, stopping.');
196
+ });
197
+ await h.handler('three reads then a denied write');
198
+
199
+ assert.strictEqual(commitsAtAsk, 1, 'the read group was committed AT onPermissionAsk, before deny — not stranded');
200
+ // And there is exactly one summary in total (the finally flush is a no-op).
201
+ assert.strictEqual(fileSummaries(h.events).length, 1, 'still exactly one summary after the finally');
202
+ });
203
+
204
+ // ───────────────────────────────────────────────────────────────────────────
205
+ // (e) DOUBLE-FLUSH guard — onPermissionAsk flush, then the post-grant onToolStart
206
+ // flush, then the turn-end finally flush all call flush(); the group must
207
+ // commit EXACTLY ONCE (idempotent isOpen()/groupId===null guard).
208
+ // ───────────────────────────────────────────────────────────────────────────
209
+ test('(e) double-flush guard: onPermissionAsk + onToolStart + finally → exactly one commit', async () => {
210
+ const h = harness();
211
+ h.setScenario(async (cb) => {
212
+ cb.onAssistantMessage('');
213
+ for (let i = 0; i < 3; i++) fileOp(cb, 'read', `/r${i}.js`); // ≥3 → one summary line
214
+ cb.onPermissionAsk('write_file', '/out.js'); // flush #1 (commits)
215
+ cb.onToolStart('write_file', '/out.js', { id: 'w1', attrs: { path: '/out.js' } }); // flush #2 (no-op)
216
+ cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/out.js' }, meta: { bytes: 3 }, error: null });
217
+ cb.onAssistantMessage('done'); // finally flush (no-op)
218
+ });
219
+ await h.handler('idempotent double flush');
220
+
221
+ const s = fileSummaries(h.events);
222
+ assert.strictEqual(s.length, 1, 'the group committed EXACTLY once despite three flush() calls');
223
+ assert.match(s[0].line, /explored ×3/);
224
+ });
225
+
226
+ // ───────────────────────────────────────────────────────────────────────────
227
+ // (f) INTENTIONAL BEHAVIOR CHANGE (Option b — "fix: flush activity groups before
228
+ // content-bearing narration for correct ordering").
229
+ //
230
+ // PREVIOUSLY this test asserted that content-bearing INTERMEDIATE narration
231
+ // ("Reading a couple more.") did NOT split the group, collapsing all four
232
+ // reads into one "explored ×4" summary. That ordering was chronologically
233
+ // WRONG: the narration committed to scrollback ABOVE a still-open group, so
234
+ // the group's summary later landed BELOW the conclusion it was based on.
235
+ //
236
+ // NEW behavior: any content-bearing intermediate narration flushes the open
237
+ // group FIRST, so each sub-group commits ABOVE its narration. A chatty
238
+ // multi-read run therefore FRAGMENTS into correctly-ordered sub-groups
239
+ // ("explored ×3" / narration / "explored ×3") instead of one "explored ×6".
240
+ // This is the deliberate Option-(b) tradeoff — each fragment is chronologically
241
+ // truthful. (Silent runs with empty interim narration STILL fully collapse —
242
+ // see narration-ordering.test.js case (b)/(g).)
243
+ //
244
+ // Uses 3 reads per fragment so each crosses GROUP_THRESHOLD and emits a
245
+ // summary line (a <3 fragment would render individual per-op lines instead).
246
+ // ───────────────────────────────────────────────────────────────────────────
247
+ test('(f) content-bearing interim narration FRAGMENTS the read run into correctly-ordered sub-groups', async () => {
248
+ const h = harness();
249
+ h.setScenario(async (cb) => {
250
+ cb.onAssistantMessage(''); // empty pre-tool narration — must NOT flush
251
+ fileOp(cb, 'read', '/i1a.js');
252
+ fileOp(cb, 'read', '/i1b.js');
253
+ fileOp(cb, 'read', '/i1c.js');
254
+ cb.onAssistantMessage('Reading a couple more.', { terminal: false }); // content-bearing → FLUSHES group #1
255
+ fileOp(cb, 'read', '/i2a.js');
256
+ fileOp(cb, 'read', '/i2b.js');
257
+ fileOp(cb, 'read', '/i2c.js');
258
+ cb.onAssistantMessage('All read.', { terminal: true }); // terminal → flushes group #2
259
+ // onPermissionAsk is intentionally never called for this read-only run.
260
+ });
261
+ await h.handler('multi-iteration reads, content-bearing interim narration');
262
+
263
+ const s = fileSummaries(h.events);
264
+ assert.strictEqual(s.length, 2, 'content-bearing interim narration split the run into TWO summaries');
265
+ assert.match(s[0].line, /explored ×3/, 'first fragment: the three reads before the interim narration');
266
+ assert.match(s[1].line, /explored ×3/, 'second fragment: the three reads after it');
267
+
268
+ // Ordering: each summary lands ABOVE its narration (the Option-(b) guarantee).
269
+ const iSum1 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
270
+ const iNarr1 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'Reading a couple more.');
271
+ const iSum2 = h.events.findIndex((e, idx) => idx > iSum1 && e.kind === 'commit' && /explored ×3/.test(e.line));
272
+ const iNarr2 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'All read.');
273
+ assert.ok(iSum1 >= 0 && iNarr1 > iSum1, 'group #1 commits ABOVE the interim narration');
274
+ assert.ok(iSum2 > iNarr1 && iNarr2 > iSum2, 'group #2 commits below the interim narration and ABOVE the terminal answer');
275
+ });
276
+
277
+ // ───────────────────────────────────────────────────────────────────────────
278
+ // (g) read_file / list_dir have NULL permission descriptors → onPermissionAsk is
279
+ // never invoked for them, so the unconditional flush can never wrongly break
280
+ // an in-progress read/list group. (Groupable ⇒ null descriptor invariant.)
281
+ // ───────────────────────────────────────────────────────────────────────────
282
+ test('(g) read_file and list_dir have null permission descriptors (groupable ⇒ never reaches onPermissionAsk)', async () => {
283
+ const byTag = (t) => TOOL_REGISTRY.find((e) => Array.isArray(e.tags) && e.tags.includes(t));
284
+ const readEntry = byTag('read_file');
285
+ const listEntry = byTag('list_dir');
286
+ const writeEntry = byTag('write_file');
287
+
288
+ assert.ok(readEntry && typeof readEntry.permission === 'function', 'read_file entry present with a permission fn');
289
+ assert.ok(listEntry && typeof listEntry.permission === 'function', 'list_dir entry present with a permission fn');
290
+ assert.ok(writeEntry && typeof writeEntry.permission === 'function', 'write_file entry present with a permission fn');
291
+
292
+ // Groupable read-only tools: null descriptor → the loop's askGate is false →
293
+ // onPermissionAsk is NOT invoked for them.
294
+ assert.strictEqual(readEntry.permission({}, ['/a.js']), null, 'read_file descriptor is null');
295
+ assert.strictEqual(listEntry.permission({}, ['/d']), null, 'list_dir descriptor is null');
296
+
297
+ // Contrast: write_file (the prompting tool above) returns a NON-null descriptor.
298
+ // (_uiActive:true skips the headless diff branch, which would touch ctx.writer.)
299
+ const writeDesc = await writeEntry.permission({ _uiActive: true }, ['/out.js', 'x']);
300
+ assert.ok(writeDesc && typeof writeDesc === 'object' && writeDesc.tag === 'write_file',
301
+ 'write_file returns a non-null permission descriptor');
302
+ });