@semalt-ai/code 1.20.0 → 1.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/commands/chat-session.js +2 -1
- package/lib/commands/chat-turn.js +61 -17
- package/lib/ui/file-activity.js +11 -18
- package/package.json +1 -1
- package/test/file-activity.test.js +70 -50
- package/test/narration-ordering.test.js +309 -0
- package/test/permission-flush.test.js +302 -0
|
@@ -127,7 +127,8 @@ function createChatSession(ctx) {
|
|
|
127
127
|
// its own per-op line via the SAME `_display` render the live path uses —
|
|
128
128
|
// byte-identical to a fresh per-op commit. read_file and list_dir share ONE
|
|
129
129
|
// group (mirroring the live merged key): a mixed run re-groups into the same
|
|
130
|
-
// single summary, with
|
|
130
|
+
// single summary, rendered with the single "explored ×N" verb (fileSummaryState
|
|
131
|
+
// no longer branches on composition).
|
|
131
132
|
let fileBuf = []; // [{ core, ts }]
|
|
132
133
|
function flushFile() {
|
|
133
134
|
if (!fileBuf.length) return;
|
|
@@ -254,6 +254,23 @@ function createTurnHandler(ctx, slashHandlers) {
|
|
|
254
254
|
statusBar.update('streaming', 'Streaming response');
|
|
255
255
|
},
|
|
256
256
|
onPermissionAsk: (tag, input) => {
|
|
257
|
+
// Flush any open file/web activity group BEFORE the permission picker
|
|
258
|
+
// opens. The permission gate fires ahead of onToolStart (agent.js — the
|
|
259
|
+
// "ask before onToolStart" comment), so the non-groupable flush that
|
|
260
|
+
// onToolStart normally performs (below) is sequenced AFTER the modal and
|
|
261
|
+
// can't fire while it's open — leaving an open group stranded LIVE in the
|
|
262
|
+
// writer's activity region beside the prompt for the modal's whole life.
|
|
263
|
+
// Flush here so the group commits to scrollback ABOVE the prompt instead.
|
|
264
|
+
// This is safe to do unconditionally: groupable tools (read_file/list_dir)
|
|
265
|
+
// are read-only with a NULL permission descriptor, so onPermissionAsk
|
|
266
|
+
// NEVER fires for them — by the time we get here the prompting tool is by
|
|
267
|
+
// definition non-groupable, exactly the case onToolStart already flushes.
|
|
268
|
+
// flush() is idempotent (isOpen()/groupId===null guard), so the later
|
|
269
|
+
// onToolStart flush, the turn-end finally flush, or the deny path all
|
|
270
|
+
// become no-ops — no double commit. Covers the deny case too: a denied
|
|
271
|
+
// tool's group is committed here rather than stranded until the finally.
|
|
272
|
+
if (webTracker.isOpen()) webTracker.flush();
|
|
273
|
+
if (fileTracker.isOpen()) fileTracker.flush();
|
|
257
274
|
// Status-bar update fires while the permission picker is open so
|
|
258
275
|
// the user can see what's pending in the side label, not just
|
|
259
276
|
// inside the modal. Mirrors the labels onToolStart uses post-grant
|
|
@@ -496,6 +513,28 @@ function createTurnHandler(ctx, slashHandlers) {
|
|
|
496
513
|
// branch opened the gate (eager-open or showThink). The StreamParser emits
|
|
497
514
|
// these verbatim, so once the gate is open they would otherwise stream live.
|
|
498
515
|
if (ORPHAN_CLOSE_TAG_RE.test(token.trim())) return;
|
|
516
|
+
// Ordering fix (Option b) — flush any open file/web activity group BEFORE
|
|
517
|
+
// the FIRST content-bearing narration token commits to scrollback. Streamed
|
|
518
|
+
// narration commits INCREMENTALLY: streamToken() emits the "▸ AI-agent"
|
|
519
|
+
// header and each complete line to immutable scrollback (chat-history.js)
|
|
520
|
+
// BEFORE onAssistantMessage/finalizeLastMessage ever fires. So flushing only
|
|
521
|
+
// at onAssistantMessage is too LATE for the streamed path — the narration
|
|
522
|
+
// lines are already above a still-open group, which then commits BELOW the
|
|
523
|
+
// conclusion it's based on (the "list ×3 below 'directory almost empty'"
|
|
524
|
+
// bug). Flushing here, at streaming-start, guarantees the group's summary
|
|
525
|
+
// commits ABOVE the first visible narration line.
|
|
526
|
+
//
|
|
527
|
+
// Gate strictly: only when the stream has NOT yet started (so we flush once,
|
|
528
|
+
// before the header) AND this token carries non-whitespace content — pure
|
|
529
|
+
// whitespace streaming artifacts in a silent read,read,read run must NOT
|
|
530
|
+
// flush, so such runs still collapse to one "explored ×N". flush() is
|
|
531
|
+
// idempotent (groupId===null guard), so the later onAssistantMessage,
|
|
532
|
+
// onToolStart, onPermissionAsk, and turn-end finally flushes all no-op —
|
|
533
|
+
// exactly one commit.
|
|
534
|
+
if (token && token.trim() && !chatHistory.isStreaming?.()) {
|
|
535
|
+
if (webTracker.isOpen()) webTracker.flush();
|
|
536
|
+
if (fileTracker.isOpen()) fileTracker.flush();
|
|
537
|
+
}
|
|
499
538
|
chatHistory.streamToken(token);
|
|
500
539
|
statusBar.onToken();
|
|
501
540
|
},
|
|
@@ -516,26 +555,31 @@ function createTurnHandler(ctx, slashHandlers) {
|
|
|
516
555
|
const terminal = meta && typeof meta.terminal === 'boolean'
|
|
517
556
|
? meta.terminal
|
|
518
557
|
: !!(cleanContent && cleanContent.trim());
|
|
519
|
-
//
|
|
520
|
-
//
|
|
521
|
-
//
|
|
558
|
+
// Ordering fix (Option b) — commit any still-open file/web activity group
|
|
559
|
+
// BEFORE the answer is finalized, so the collapsed summary lands ABOVE the
|
|
560
|
+
// narration in scrollback (correct chronological ordering: a conclusion has
|
|
561
|
+
// the group it's based on committed above it).
|
|
562
|
+
//
|
|
563
|
+
// Flush on the TERMINAL signal (the final no-tool answer) OR on any
|
|
564
|
+
// CONTENT-BEARING intermediate narration. This is the deliberate Option-(b)
|
|
565
|
+
// tradeoff: an intermediate narration that carries visible content now
|
|
566
|
+
// flushes the open group, so a "chatty" multi-read run FRAGMENTS into
|
|
567
|
+
// correctly-ordered sub-groups (each "explored ×N" above its narration)
|
|
568
|
+
// rather than collapsing across a conclusion that was based on it. A SILENT
|
|
569
|
+
// multi-read run (empty/whitespace-only interim narration — pure streaming
|
|
570
|
+
// artifacts) does NOT flush, so it still collapses fully to one summary.
|
|
522
571
|
//
|
|
523
|
-
//
|
|
524
|
-
//
|
|
525
|
-
//
|
|
526
|
-
//
|
|
527
|
-
//
|
|
528
|
-
//
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
// the turn-end `finally` flush, which is the safety net.
|
|
532
|
-
if (terminal && webTracker.isOpen()) {
|
|
572
|
+
// For the STREAMED path the open group is already committed above by the
|
|
573
|
+
// onToken streaming-start flush; this is the backstop for the non-streaming
|
|
574
|
+
// / finalize-only path (and the direct-callback unit tests). flush() is
|
|
575
|
+
// idempotent, so when both fire only one commit results. Empty/interrupted
|
|
576
|
+
// turns (no terminal message, no content) fall back to the turn-end
|
|
577
|
+
// `finally` flush, which is the safety net.
|
|
578
|
+
const contentful = !!(cleanContent && cleanContent.trim());
|
|
579
|
+
if ((terminal || contentful) && webTracker.isOpen()) {
|
|
533
580
|
webTracker.flush();
|
|
534
581
|
}
|
|
535
|
-
|
|
536
|
-
// signal flushes, so intermediate-iteration narration does NOT split a
|
|
537
|
-
// multi-iteration read run — it still collapses to one summary.
|
|
538
|
-
if (terminal && fileTracker.isOpen()) {
|
|
582
|
+
if ((terminal || contentful) && fileTracker.isOpen()) {
|
|
539
583
|
fileTracker.flush();
|
|
540
584
|
}
|
|
541
585
|
chatHistory.finalizeLastMessage(cleanContent);
|
package/lib/ui/file-activity.js
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
// collapses a run of CONSECUTIVE same-type file ops into a SINGLE compact
|
|
10
10
|
// process-summary line —
|
|
11
11
|
//
|
|
12
|
-
// ✓ file ·
|
|
12
|
+
// ✓ file · explored ×10 (index.html, battlecity.js, …)
|
|
13
13
|
//
|
|
14
14
|
// — exactly the way `web-activity.js` collapses web_search/http_get. It is a
|
|
15
15
|
// parallel, independent instance of `createWebActivityTracker`; the web tracker
|
|
@@ -24,9 +24,9 @@
|
|
|
24
24
|
// scrollback model:
|
|
25
25
|
// • GROUP KEY = a single shared key for BOTH read_file and list_dir, so a
|
|
26
26
|
// mixed read/list exploration phase collapses into ONE summary instead of
|
|
27
|
-
// fragmenting on every read↔list switch.
|
|
28
|
-
//
|
|
29
|
-
//
|
|
27
|
+
// fragmenting on every read↔list switch. Every group renders the SAME verb
|
|
28
|
+
// ("explored ×N", live "exploring… ×N") regardless of composition. Any OTHER
|
|
29
|
+
// tool still breaks the run. The web tracker has a single key.
|
|
30
30
|
// • THRESHOLD decided at flush time. A group of 1–2 ops commits each op as its
|
|
31
31
|
// own normal result line (byte-identical to today); a group of 3+ commits ONE
|
|
32
32
|
// summary line. The web tracker always collapses. We can't retroactively pull
|
|
@@ -81,21 +81,14 @@ function _basename(p) {
|
|
|
81
81
|
|
|
82
82
|
// Pure: fold a list of file ops (ToolOperation descriptors OR persisted cores —
|
|
83
83
|
// both expose `tag`/`target`) into the fields the summary needs. read_file and
|
|
84
|
-
// list_dir
|
|
85
|
-
//
|
|
86
|
-
//
|
|
84
|
+
// list_dir share one group, so a group may be MIXED — but the verb is a SINGLE
|
|
85
|
+
// "explored"/"exploring…" regardless of composition (read-only, list-only, and
|
|
86
|
+
// mixed all read the same). No more homogeneous-vs-mixed branching.
|
|
87
87
|
function fileSummaryState(ops) {
|
|
88
88
|
const list = (ops || []).filter(Boolean);
|
|
89
|
-
let hasRead = false, hasList = false;
|
|
90
|
-
for (const o of list) {
|
|
91
|
-
if (normalizeFileTag(o.tag) === 'list_dir') hasList = true;
|
|
92
|
-
else hasRead = true;
|
|
93
|
-
}
|
|
94
|
-
const mixed = hasRead && hasList;
|
|
95
|
-
const isList = hasList && !hasRead;
|
|
96
89
|
return {
|
|
97
|
-
verb:
|
|
98
|
-
gerund:
|
|
90
|
+
verb: 'explored',
|
|
91
|
+
gerund: 'exploring…',
|
|
99
92
|
count: list.length,
|
|
100
93
|
basenames: list.map((o) => _basename(o.target)),
|
|
101
94
|
};
|
|
@@ -172,8 +165,8 @@ function createFileActivityTracker(deps) {
|
|
|
172
165
|
// share one key, so a read↔list switch does NOT flush — both accumulate into
|
|
173
166
|
// the same group (the key only changes for a different category, which never
|
|
174
167
|
// reaches here). The live row is a growing web-style aggregate: "● file ·
|
|
175
|
-
//
|
|
176
|
-
//
|
|
168
|
+
// exploring… ×N (a, b, …)". `input` is the op's path (used for the live
|
|
169
|
+
// basename).
|
|
177
170
|
start(tag, input) {
|
|
178
171
|
const key = fileGroupKey(tag);
|
|
179
172
|
if (groupId !== null && key !== currentKey) api.flush();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@semalt-ai/code",
|
|
3
|
-
"version": "1.20.
|
|
3
|
+
"version": "1.20.1",
|
|
4
4
|
"description": "Self-hosted AI Coding Assistant CLI",
|
|
5
5
|
"main": "./lib/sdk.js",
|
|
6
6
|
"//exports": "Two-tier embedding surface (Task 5.2): '.' is the STABLE createAgent facade; './internals' is the UNSTABLE building blocks (no semver guarantee). The boundary is enforced here, not just in docs. Works for both require() and import.",
|
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
// File-activity grouping — a SECOND INSTANCE of the web-activity collapser for
|
|
4
4
|
// consecutive pure file reads/lists (read_file / list_dir). read_file and
|
|
5
5
|
// list_dir share ONE group key, so a mixed read/list exploration phase collapses
|
|
6
|
-
// into a single summary
|
|
7
|
-
//
|
|
6
|
+
// into a single summary instead of fragmenting; EVERY group (read-only,
|
|
7
|
+
// list-only, or mixed) renders the same single "explored ×N" verb. Covers the live
|
|
8
8
|
// flush sites (driven through the REAL createTurnHandler callbacks, exactly as
|
|
9
9
|
// web-activity-ordering.test.js does for web ops), the flush-time THRESHOLD
|
|
10
10
|
// (1–2 individual lines, 3+ collapsed summary), the merged read/list group, the
|
|
@@ -89,12 +89,12 @@ function fileOp(cb, tag, path, bytes) {
|
|
|
89
89
|
}
|
|
90
90
|
|
|
91
91
|
const commits = (events) => events.filter((e) => e.kind === 'commit');
|
|
92
|
-
// Matches a collapsed file summary
|
|
93
|
-
// list
|
|
94
|
-
const summaries = (events) => commits(events).filter((e) => /file .*
|
|
92
|
+
// Matches a collapsed file summary — a single "explored ×N" verb for every group
|
|
93
|
+
// composition (read-only, list-only, or mixed).
|
|
94
|
+
const summaries = (events) => commits(events).filter((e) => /file .* explored ×\d+/.test(e.line));
|
|
95
95
|
|
|
96
96
|
// ───────────────────────────────────────────────────────────────────────────
|
|
97
|
-
// (a) 10 consecutive read_file ops → ONE "✓ file ·
|
|
97
|
+
// (a) 10 consecutive read_file ops → ONE "✓ file · explored ×10 (…)" summary line.
|
|
98
98
|
// ───────────────────────────────────────────────────────────────────────────
|
|
99
99
|
test('(a) 10 reads collapse to ONE summary; ×10 always present; basenames truncated to width', async () => {
|
|
100
100
|
const prevCols = process.stdout.columns;
|
|
@@ -111,7 +111,7 @@ test('(a) 10 reads collapse to ONE summary; ×10 always present; basenames trunc
|
|
|
111
111
|
|
|
112
112
|
const s = summaries(h.events);
|
|
113
113
|
assert.strictEqual(s.length, 1, 'exactly one collapsed summary');
|
|
114
|
-
assert.match(s[0].line, /file .*
|
|
114
|
+
assert.match(s[0].line, /file .* explored ×10 \(/, 'shows the explored verb and the ×10 count');
|
|
115
115
|
assert.ok(s[0].line.includes('…'), 'the basename list is truncated to width');
|
|
116
116
|
assert.match(s[0].line, /×10/, 'the ×10 count survives truncation (it is in the fixed prefix)');
|
|
117
117
|
// Single physical row at the render width.
|
|
@@ -145,10 +145,10 @@ test('(b) 2 reads commit as two individual lines, no summary', async () => {
|
|
|
145
145
|
|
|
146
146
|
// ───────────────────────────────────────────────────────────────────────────
|
|
147
147
|
// (c) reads and lists INTERLEAVED (read, list, read, list, read) → ONE merged
|
|
148
|
-
// summary with the
|
|
148
|
+
// summary with the single "explored ×5" verb (was: two separate summaries —
|
|
149
149
|
// CHANGED by the key-merge: read_file + list_dir now share one group).
|
|
150
150
|
// ───────────────────────────────────────────────────────────────────────────
|
|
151
|
-
test('(c) interleaved reads+lists collapse to ONE merged summary with the
|
|
151
|
+
test('(c) interleaved reads+lists collapse to ONE merged summary with the explored verb', async () => {
|
|
152
152
|
const h = harness();
|
|
153
153
|
h.setScenario(async (cb) => {
|
|
154
154
|
cb.onAssistantMessage('');
|
|
@@ -163,8 +163,8 @@ test('(c) interleaved reads+lists collapse to ONE merged summary with the neutra
|
|
|
163
163
|
|
|
164
164
|
const s = summaries(h.events);
|
|
165
165
|
assert.strictEqual(s.length, 1, 'one merged summary — read and list share a group now');
|
|
166
|
-
assert.match(s[0].line, /file .*
|
|
167
|
-
assert.doesNotMatch(s[0].line, /read ×|list ×/, 'no
|
|
166
|
+
assert.match(s[0].line, /file .* explored ×5 \(/, 'mixed group uses the single "explored ×5" verb');
|
|
167
|
+
assert.doesNotMatch(s[0].line, /read ×|list ×|file ×/, 'no read/list/file verb for the merged group');
|
|
168
168
|
// All five basenames/dirs listed once in the merged summary.
|
|
169
169
|
for (const b of ['a.js', 'd0', 'b.js', 'd1', 'c.js']) {
|
|
170
170
|
assert.ok(s[0].line.includes(b), `merged summary lists ${b}`);
|
|
@@ -172,9 +172,9 @@ test('(c) interleaved reads+lists collapse to ONE merged summary with the neutra
|
|
|
172
172
|
});
|
|
173
173
|
|
|
174
174
|
// ───────────────────────────────────────────────────────────────────────────
|
|
175
|
-
// (c2)
|
|
175
|
+
// (c2) list-only run (5 list_dir, no reads) → "explored ×5" (single verb).
|
|
176
176
|
// ───────────────────────────────────────────────────────────────────────────
|
|
177
|
-
test('(c2) 5 list_dir ops only →
|
|
177
|
+
test('(c2) 5 list_dir ops only → "explored ×5" summary (single verb, no list branch)', async () => {
|
|
178
178
|
const h = harness();
|
|
179
179
|
h.setScenario(async (cb) => {
|
|
180
180
|
cb.onAssistantMessage('');
|
|
@@ -185,8 +185,8 @@ test('(c2) 5 list_dir ops only → homogeneous "list ×5" summary (specific verb
|
|
|
185
185
|
|
|
186
186
|
const s = summaries(h.events);
|
|
187
187
|
assert.strictEqual(s.length, 1, 'one summary');
|
|
188
|
-
assert.match(s[0].line, /
|
|
189
|
-
assert.doesNotMatch(s[0].line, /file ×|read ×/, 'no
|
|
188
|
+
assert.match(s[0].line, /explored ×5/, 'list-only group uses the single "explored" verb');
|
|
189
|
+
assert.doesNotMatch(s[0].line, /file ×|read ×|list ×/, 'no read/list/file verb for an all-list group');
|
|
190
190
|
});
|
|
191
191
|
|
|
192
192
|
// ───────────────────────────────────────────────────────────────────────────
|
|
@@ -214,9 +214,9 @@ test('(c3) a grep between two mixed read/list runs splits them into two summarie
|
|
|
214
214
|
|
|
215
215
|
const s = summaries(h.events);
|
|
216
216
|
assert.strictEqual(s.length, 2, 'the grep splits the run into two merged summaries');
|
|
217
|
-
assert.match(s[0].line, /
|
|
218
|
-
assert.match(s[1].line, /
|
|
219
|
-
const iS0 = h.events.findIndex((e) => e.kind === 'commit' && /
|
|
217
|
+
assert.match(s[0].line, /explored ×3/, 'first mixed group is explored ×3');
|
|
218
|
+
assert.match(s[1].line, /explored ×3/, 'second mixed group is explored ×3');
|
|
219
|
+
const iS0 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
|
|
220
220
|
const iGrep = h.events.findIndex((e) => e.kind === 'commit' && /TODO/.test(e.line));
|
|
221
221
|
assert.ok(iS0 >= 0 && iGrep >= 0 && iS0 < iGrep, 'the first summary lands above the grep line');
|
|
222
222
|
});
|
|
@@ -259,7 +259,7 @@ test('(d) a non-file tool after a read run flushes the summary before its own li
|
|
|
259
259
|
});
|
|
260
260
|
await h.handler('read then shell');
|
|
261
261
|
|
|
262
|
-
const iSummary = h.events.findIndex((e) => e.kind === 'commit' && /
|
|
262
|
+
const iSummary = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
|
|
263
263
|
const iShell = h.events.findIndex((e) => e.kind === 'commit' && /ls -la/.test(e.line));
|
|
264
264
|
assert.ok(iSummary >= 0, 'the read summary committed');
|
|
265
265
|
assert.ok(iShell >= 0, 'the shell line committed');
|
|
@@ -267,7 +267,7 @@ test('(d) a non-file tool after a read run flushes the summary before its own li
|
|
|
267
267
|
});
|
|
268
268
|
|
|
269
269
|
// ───────────────────────────────────────────────────────────────────────────
|
|
270
|
-
// (e) read run with op #5 erroring → "
|
|
270
|
+
// (e) read run with op #5 erroring → "explored ×4" summary, then standalone error +
|
|
271
271
|
// body, then a fresh group for the subsequent reads.
|
|
272
272
|
// ───────────────────────────────────────────────────────────────────────────
|
|
273
273
|
test('(e) a mid-run error flushes the success-group, renders error standalone, then a new group starts', async () => {
|
|
@@ -286,13 +286,13 @@ test('(e) a mid-run error flushes the success-group, renders error standalone, t
|
|
|
286
286
|
|
|
287
287
|
const s = summaries(h.events);
|
|
288
288
|
assert.strictEqual(s.length, 2, 'the 4 successes and the 3 later successes form two summaries');
|
|
289
|
-
assert.match(s[0].line, /
|
|
290
|
-
assert.match(s[1].line, /
|
|
289
|
+
assert.match(s[0].line, /explored ×4/, 'the errored op did NOT join the group → ×4 not ×5');
|
|
290
|
+
assert.match(s[1].line, /explored ×3/, 'a new group started after the error');
|
|
291
291
|
|
|
292
|
-
const iSummary4 = h.events.findIndex((e) => e.kind === 'commit' && /
|
|
292
|
+
const iSummary4 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×4/.test(e.line));
|
|
293
293
|
const iErrLine = h.events.findIndex((e) => e.kind === 'commit' && /read \/bad\.js/.test(e.line));
|
|
294
294
|
const iErrBody = h.events.findIndex((e) => e.kind === 'error-body');
|
|
295
|
-
const iSummary3 = h.events.findIndex((e) => e.kind === 'commit' && /
|
|
295
|
+
const iSummary3 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
|
|
296
296
|
assert.ok(iSummary4 >= 0 && iErrLine >= 0 && iErrBody >= 0 && iSummary3 >= 0, 'all four landmarks present');
|
|
297
297
|
assert.ok(iSummary4 < iErrLine, 'success summary before the error line (never above the reads it followed)');
|
|
298
298
|
assert.ok(iErrLine < iErrBody, 'error line before its expandable body');
|
|
@@ -300,27 +300,47 @@ test('(e) a mid-run error flushes the success-group, renders error standalone, t
|
|
|
300
300
|
});
|
|
301
301
|
|
|
302
302
|
// ───────────────────────────────────────────────────────────────────────────
|
|
303
|
-
// (f)
|
|
304
|
-
//
|
|
303
|
+
// (f) INTENTIONAL BEHAVIOR CHANGE (Option b — "fix: flush activity groups before
|
|
304
|
+
// content-bearing narration for correct ordering").
|
|
305
|
+
//
|
|
306
|
+
// PREVIOUSLY this asserted that content-bearing INTERMEDIATE narration did
|
|
307
|
+
// NOT split the group (terminal-flag gating → one "explored ×4"). That left
|
|
308
|
+
// the narration committed ABOVE a still-open group → the summary later landed
|
|
309
|
+
// BELOW the conclusion it was based on.
|
|
310
|
+
//
|
|
311
|
+
// NEW behavior: content-bearing intermediate narration flushes the open group
|
|
312
|
+
// FIRST, so a chatty multi-read run FRAGMENTS into correctly-ordered
|
|
313
|
+
// sub-groups (each "explored ×N" above its narration). Silent runs (empty
|
|
314
|
+
// interim narration) still collapse fully — see narration-ordering.test.js.
|
|
315
|
+
// Three reads per fragment so each crosses the ≥3 summary threshold.
|
|
305
316
|
// ───────────────────────────────────────────────────────────────────────────
|
|
306
|
-
test('(f)
|
|
317
|
+
test('(f) content-bearing intermediate narration FRAGMENTS a multi-iteration read run (correctly ordered)', async () => {
|
|
307
318
|
const h = harness();
|
|
308
319
|
h.setScenario(async (cb) => {
|
|
309
|
-
// iter 1:
|
|
320
|
+
// iter 1: three reads, then a NON-empty NON-terminal narration → flushes #1.
|
|
310
321
|
cb.onAssistantMessage('');
|
|
311
322
|
fileOp(cb, 'read', '/i1a.js');
|
|
312
323
|
fileOp(cb, 'read', '/i1b.js');
|
|
324
|
+
fileOp(cb, 'read', '/i1c.js');
|
|
313
325
|
cb.onAssistantMessage('Let me read a couple more files.', { terminal: false });
|
|
314
|
-
// iter 2:
|
|
326
|
+
// iter 2: three more reads, then the terminal answer → flushes #2.
|
|
315
327
|
fileOp(cb, 'read', '/i2a.js');
|
|
316
328
|
fileOp(cb, 'read', '/i2b.js');
|
|
329
|
+
fileOp(cb, 'read', '/i2c.js');
|
|
317
330
|
cb.onAssistantMessage('All read.', { terminal: true });
|
|
318
331
|
});
|
|
319
|
-
await h.handler('multi-iteration reads');
|
|
332
|
+
await h.handler('multi-iteration reads with interim narration');
|
|
320
333
|
|
|
321
334
|
const s = summaries(h.events);
|
|
322
|
-
assert.strictEqual(s.length,
|
|
323
|
-
assert.
|
|
335
|
+
assert.strictEqual(s.length, 2, 'content-bearing interim narration split the run into TWO summaries');
|
|
336
|
+
assert.ok(s.every((e) => /explored ×3/.test(e.line)), 'each fragment is explored ×3');
|
|
337
|
+
|
|
338
|
+
// Ordering: fragment #1 above the interim narration, fragment #2 below it.
|
|
339
|
+
const iSum1 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
|
|
340
|
+
const iNarr1 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'Let me read a couple more files.');
|
|
341
|
+
const iSum2 = h.events.findIndex((e, idx) => idx > iSum1 && e.kind === 'commit' && /explored ×3/.test(e.line));
|
|
342
|
+
assert.ok(iSum1 < iNarr1, 'fragment #1 commits ABOVE the interim narration');
|
|
343
|
+
assert.ok(iNarr1 < iSum2, 'fragment #2 commits below the interim narration');
|
|
324
344
|
});
|
|
325
345
|
|
|
326
346
|
// ───────────────────────────────────────────────────────────────────────────
|
|
@@ -363,7 +383,7 @@ function replayCommits(loadedMessages, cols) {
|
|
|
363
383
|
|
|
364
384
|
const fileLineOf = (commitsArr) => commitsArr
|
|
365
385
|
.map((c) => stripAnsi(c))
|
|
366
|
-
.filter((c) => /file .*
|
|
386
|
+
.filter((c) => /file .* explored ×\d+/.test(c));
|
|
367
387
|
|
|
368
388
|
test('(g) replay re-groups to the same summary at the replay width; narrower re-truncates; ≥3 threshold applied', () => {
|
|
369
389
|
const files = Array.from({ length: 6 }, (_, i) => `/proj/module-${i}/index-file-${i}.js`);
|
|
@@ -398,7 +418,7 @@ test('(g) replay re-groups to the same summary at the replay width; narrower re-
|
|
|
398
418
|
|
|
399
419
|
// ───────────────────────────────────────────────────────────────────────────
|
|
400
420
|
// (g2) replay of a MIXED read/list run re-groups into the SAME single merged
|
|
401
|
-
// summary (
|
|
421
|
+
// summary (single "explored ×N" verb), byte-identical to the live oracle.
|
|
402
422
|
// ───────────────────────────────────────────────────────────────────────────
|
|
403
423
|
test('(g2) replay of a mixed read/list run → identical merged summary at the replay width', () => {
|
|
404
424
|
// read, list, read, list, read — interleaved, persisted as native cores.
|
|
@@ -418,11 +438,11 @@ test('(g2) replay of a mixed read/list run → identical merged summary at the r
|
|
|
418
438
|
|
|
419
439
|
const mixedLineOf = (commitsArr) => commitsArr
|
|
420
440
|
.map((c) => stripAnsi(c))
|
|
421
|
-
.filter((c) => /file .*
|
|
441
|
+
.filter((c) => /file .* explored ×\d+/.test(c));
|
|
422
442
|
|
|
423
443
|
const oracle = liveFileSummary(ops, 200);
|
|
424
444
|
assert.strictEqual(oracle.length, 1, 'live commits one merged summary for the mixed run');
|
|
425
|
-
assert.match(stripAnsi(oracle[0]), /
|
|
445
|
+
assert.match(stripAnsi(oracle[0]), /explored ×5/, 'live oracle uses the single explored verb for the mixed run');
|
|
426
446
|
|
|
427
447
|
const replay = mixedLineOf(replayCommits(loaded, 200));
|
|
428
448
|
assert.strictEqual(replay.length, 1, 'replay commits exactly one merged file summary');
|
|
@@ -468,10 +488,10 @@ test('(i) the web tracker is unaffected: a web run alongside a file run still yi
|
|
|
468
488
|
|
|
469
489
|
const fileS = summaries(h.events);
|
|
470
490
|
assert.strictEqual(fileS.length, 1, 'one file summary');
|
|
471
|
-
assert.match(fileS[0].line, /
|
|
491
|
+
assert.match(fileS[0].line, /explored ×3/);
|
|
472
492
|
const webS = commits(h.events).filter((e) => / web /.test(e.line) && /source/.test(e.line));
|
|
473
493
|
assert.strictEqual(webS.length, 1, 'the web tracker still commits its own summary, unaffected');
|
|
474
|
-
const iFile = h.events.findIndex((e) => e.kind === 'commit' && /
|
|
494
|
+
const iFile = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
|
|
475
495
|
const iWeb = h.events.findIndex((e) => e.kind === 'commit' && / web /.test(e.line) && /source/.test(e.line));
|
|
476
496
|
assert.ok(iFile < iWeb, 'the file summary lands above the web summary it preceded');
|
|
477
497
|
});
|
|
@@ -492,31 +512,31 @@ test('isGroupableFileCore / normalizeFileTag / fileSummaryState predicates', ()
|
|
|
492
512
|
assert.ok(!isGroupableFileCore({ v: 1, kind: 'web', tag: 'http_get' }), 'a web core is not a file core');
|
|
493
513
|
assert.ok(!isGroupableFileCore(null), 'null is tolerated');
|
|
494
514
|
|
|
495
|
-
// read_file and list_dir normalize to DISTINCT tags
|
|
496
|
-
// to decide the verb) …
|
|
515
|
+
// read_file and list_dir normalize to DISTINCT tags …
|
|
497
516
|
assert.notStrictEqual(normalizeFileTag(readCore.tag), normalizeFileTag(listCore.tag));
|
|
498
517
|
assert.strictEqual(normalizeFileTag('read'), 'read_file');
|
|
499
518
|
assert.strictEqual(normalizeFileTag('list_dir'), 'list_dir');
|
|
500
|
-
// … but they
|
|
519
|
+
// … but they share ONE group KEY, so a read↔list switch never flushes.
|
|
501
520
|
assert.strictEqual(fileGroupKey('read'), fileGroupKey('list_dir'));
|
|
502
521
|
assert.strictEqual(fileGroupKey('read_file'), fileGroupKey('list_dir'));
|
|
503
522
|
|
|
504
|
-
//
|
|
523
|
+
// Every group composition → the SAME single "explored" / "exploring…" verb,
|
|
524
|
+
// regardless of read-only, list-only, or mixed.
|
|
505
525
|
const st = fileSummaryState([readCore, readCore]);
|
|
506
|
-
assert.strictEqual(st.verb, '
|
|
507
|
-
assert.strictEqual(st.gerund, '
|
|
526
|
+
assert.strictEqual(st.verb, 'explored');
|
|
527
|
+
assert.strictEqual(st.gerund, 'exploring…');
|
|
508
528
|
assert.strictEqual(st.count, 2);
|
|
509
529
|
assert.deepStrictEqual(st.basenames, ['a.js', 'a.js']);
|
|
510
530
|
|
|
511
|
-
//
|
|
531
|
+
// List-only group → still "explored".
|
|
512
532
|
const stList = fileSummaryState([listCore, listCore]);
|
|
513
|
-
assert.strictEqual(stList.verb, '
|
|
514
|
-
assert.strictEqual(stList.gerund, '
|
|
533
|
+
assert.strictEqual(stList.verb, 'explored');
|
|
534
|
+
assert.strictEqual(stList.gerund, 'exploring…');
|
|
515
535
|
|
|
516
|
-
// Mixed group →
|
|
536
|
+
// Mixed group → still "explored" (no composition branching).
|
|
517
537
|
const stMixed = fileSummaryState([readCore, listCore]);
|
|
518
|
-
assert.strictEqual(stMixed.verb, '
|
|
519
|
-
assert.strictEqual(stMixed.gerund, '
|
|
538
|
+
assert.strictEqual(stMixed.verb, 'explored');
|
|
539
|
+
assert.strictEqual(stMixed.gerund, 'exploring…');
|
|
520
540
|
assert.strictEqual(stMixed.count, 2);
|
|
521
541
|
assert.deepStrictEqual(stMixed.basenames, ['a.js', 'd']);
|
|
522
542
|
});
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Activity-group ordering vs assistant narration (Option b — "fix: flush activity
|
|
4
|
+
// groups before content-bearing narration for correct ordering").
|
|
5
|
+
//
|
|
6
|
+
// THE BUG: assistant narration commits to immutable scrollback immediately
|
|
7
|
+
// (streamed token-by-token via streamToken, finalized via finalizeLastMessage),
|
|
8
|
+
// while an open file/web activity group lives in the redrawable activity region
|
|
9
|
+
// BELOW scrollback and only commits at a later boundary. When narration was
|
|
10
|
+
// INTERMEDIATE (a tool follows), the old terminal-only flush gate was skipped, so
|
|
11
|
+
// the narration committed ABOVE the still-open group and the group flushed later,
|
|
12
|
+
// landing BELOW the conclusion it was based on (the "list ×3 below 'directory
|
|
13
|
+
// almost empty'" screenshot).
|
|
14
|
+
//
|
|
15
|
+
// THE FIX (Option b): flush the open group BEFORE any content-bearing narration.
|
|
16
|
+
// • Streamed path — flush at streaming-START (onToken, before the first
|
|
17
|
+
// content-bearing token commits the "▸ AI-agent" header + line to scrollback).
|
|
18
|
+
// • Finalize path — flush at onAssistantMessage on terminal OR content-bearing
|
|
19
|
+
// narration (backstop for the non-streamed path).
|
|
20
|
+
// • Silent runs (empty/whitespace-only interim narration) still fully collapse.
|
|
21
|
+
//
|
|
22
|
+
// Tests drive the REAL createTurnHandler callbacks (same harness shape as
|
|
23
|
+
// permission-flush.test.js / file-activity.test.js), recording every committed
|
|
24
|
+
// scrollback line and every narration line in ONE ordered log so ordering can be
|
|
25
|
+
// asserted directly.
|
|
26
|
+
|
|
27
|
+
const { test } = require('node:test');
|
|
28
|
+
const assert = require('node:assert');
|
|
29
|
+
|
|
30
|
+
process.stdout.isTTY = true;
|
|
31
|
+
delete process.env.NO_COLOR;
|
|
32
|
+
|
|
33
|
+
const { stripAnsi } = require('../lib/ui/utils');
|
|
34
|
+
const { createTurnHandler } = require('../lib/commands/chat-turn');
|
|
35
|
+
|
|
36
|
+
// ── Live harness ──────────────────────────────────────────────────────────────
|
|
37
|
+
// `turnOpts` becomes ctx.opts. Pass { showThink: true } to disable the
|
|
38
|
+
// implicit-think buffering gate so onToken streams tokens straight to
|
|
39
|
+
// chatHistory.streamToken (exercising the streamed-narration path).
|
|
40
|
+
function harness(turnOpts) {
|
|
41
|
+
const events = [];
|
|
42
|
+
// streaming flag mirrors chat-history's _streamActive: set on the first
|
|
43
|
+
// streamToken, cleared on clear/finalize. isStreaming() drives the
|
|
44
|
+
// streaming-start flush gate in onToken.
|
|
45
|
+
let streaming = false;
|
|
46
|
+
|
|
47
|
+
const writerModule = {
|
|
48
|
+
startActivity() {}, updateActivity() {},
|
|
49
|
+
endActivity(id, line) {
|
|
50
|
+
for (const raw of String(line == null ? '' : line).split('\n')) {
|
|
51
|
+
if (raw === '') continue;
|
|
52
|
+
events.push({ kind: 'commit', line: stripAnsi(raw) });
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
scrollback(line) { events.push({ kind: 'scrollback', line: stripAnsi(String(line)) }); },
|
|
56
|
+
};
|
|
57
|
+
const chatHistory = {
|
|
58
|
+
addMessage(m) { if (m && m.isError) events.push({ kind: 'error', output: m.output }); },
|
|
59
|
+
streamToken(t) {
|
|
60
|
+
if (!t) return;
|
|
61
|
+
// Mirror the real per-line commit: the header on the first token, then the
|
|
62
|
+
// token text as a committed narration line. (We don't need byte-fidelity —
|
|
63
|
+
// only that a narration line lands in the ordered log when the model speaks.)
|
|
64
|
+
if (!streaming) { streaming = true; events.push({ kind: 'narration', line: '▸ AI-agent' }); }
|
|
65
|
+
events.push({ kind: 'narration', line: t });
|
|
66
|
+
},
|
|
67
|
+
isStreaming() { return streaming; },
|
|
68
|
+
clearStreamingContent() { streaming = false; },
|
|
69
|
+
deferToolOutput() {}, commitDeferredDetail() {},
|
|
70
|
+
finalizeLastMessage(content) {
|
|
71
|
+
streaming = false;
|
|
72
|
+
if (content && content.trim()) events.push({ kind: 'answer', content });
|
|
73
|
+
},
|
|
74
|
+
};
|
|
75
|
+
const statusBar = { update() {}, onToken() {}, addPendingTokens() {}, updateMetrics() {}, setCost() {} };
|
|
76
|
+
const inputField = { on() {}, removeListener() {}, releaseNavigation() {}, setDisabled() {} };
|
|
77
|
+
|
|
78
|
+
let scenario = async () => {};
|
|
79
|
+
const runAgentLoop = async (messages, model, maxIter, limit, loopOpts) => {
|
|
80
|
+
await scenario(loopOpts.callbacks);
|
|
81
|
+
return { messages, metrics: { turns: [] }, withheldActions: [] };
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
const ctx = {
|
|
85
|
+
inputField, statusBar, chatHistory, writerModule, runAgentLoop,
|
|
86
|
+
getConfig: () => ({ auth_token: 'tok', max_iterations: 50, show_cost: false, system_prompt_mode: 'system_role' }),
|
|
87
|
+
approxTokens: () => 0,
|
|
88
|
+
resolveCommand: () => null,
|
|
89
|
+
opts: turnOpts || {},
|
|
90
|
+
TAG_REGISTRY: {},
|
|
91
|
+
collapseListMsg() {}, handlePendingSelection() {}, showPendingStep() {},
|
|
92
|
+
activateNavCapture() {}, finalizeListMsg() {},
|
|
93
|
+
createChatIfNeeded: async () => {}, saveTurnToDashboard: async () => {}, saveSession() {},
|
|
94
|
+
messages: [], currentModel: 'm', debugMode: false, pendingImages: [],
|
|
95
|
+
chatSync: async () => '', resolvedSystemPrompt: '', resolvedTokenLimit: null, planMode: false,
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
const handler = createTurnHandler(ctx, {});
|
|
99
|
+
return { events, handler, setScenario: (fn) => { scenario = fn; } };
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// One fully-successful groupable file op (read / list_dir).
|
|
103
|
+
function fileOp(cb, tag, path) {
|
|
104
|
+
cb.onToolStart(tag, path, { id: `${tag}-${path}`, attrs: { path } });
|
|
105
|
+
cb.onToolEnd(tag, 'contents', 5, { id: `${tag}-${path}`, attrs: { path }, meta: { bytes: 10 }, error: null });
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// One fully-successful web op (http_get) — leaves the web group OPEN.
|
|
109
|
+
function webOp(cb, url) {
|
|
110
|
+
cb.onToolStart('http_get', url, { id: `g-${url}`, attrs: { url } });
|
|
111
|
+
cb.onToolEnd('http_get', {}, 120, { id: `g-${url}`, attrs: { url }, meta: { status_code: 200, bytes: 1000 }, error: null });
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const commits = (events) => events.filter((e) => e.kind === 'commit');
|
|
115
|
+
const fileSummaries = (events) => commits(events).filter((e) => /file .* explored ×\d+/.test(e.line));
|
|
116
|
+
const webSummaries = (events) => commits(events).filter((e) => / web /.test(e.line) && /source/.test(e.line));
|
|
117
|
+
|
|
118
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
119
|
+
// (a) The SCREENSHOT scenario: a group of list/read ops, then a content-bearing
|
|
120
|
+
// INTERMEDIATE narration (the conclusion based on them), then a non-groupable
|
|
121
|
+
// tool (write_file). The group must commit ABOVE the narration.
|
|
122
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
123
|
+
test('(a) group of list ops → intermediate conclusion narration → write_file: group commits ABOVE the narration', async () => {
|
|
124
|
+
const h = harness();
|
|
125
|
+
h.setScenario(async (cb) => {
|
|
126
|
+
cb.onAssistantMessage(''); // empty pre-tool narration — must NOT flush
|
|
127
|
+
fileOp(cb, 'list_dir', '/proj');
|
|
128
|
+
fileOp(cb, 'list_dir', '/proj/src');
|
|
129
|
+
fileOp(cb, 'list_dir', '/proj/test');
|
|
130
|
+
// The conclusion drawn FROM the listings — content-bearing, intermediate (a
|
|
131
|
+
// tool follows). This is what must NOT sit above the listings it summarizes.
|
|
132
|
+
cb.onAssistantMessage('The directory is almost empty.', { terminal: false });
|
|
133
|
+
// A non-groupable effectful tool follows.
|
|
134
|
+
cb.onToolStart('write_file', '/proj/new.js', { id: 'w1', attrs: { path: '/proj/new.js' } });
|
|
135
|
+
cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/proj/new.js' }, meta: { bytes: 3 }, error: null });
|
|
136
|
+
cb.onAssistantMessage('Done.', { terminal: true });
|
|
137
|
+
});
|
|
138
|
+
await h.handler('list a few dirs, conclude, then write');
|
|
139
|
+
|
|
140
|
+
const s = fileSummaries(h.events);
|
|
141
|
+
assert.strictEqual(s.length, 1, 'the three listings collapse to one summary');
|
|
142
|
+
assert.match(s[0].line, /explored ×3/);
|
|
143
|
+
|
|
144
|
+
const iGroup = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
|
|
145
|
+
const iNarration = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'The directory is almost empty.');
|
|
146
|
+
assert.ok(iGroup >= 0 && iNarration >= 0, 'both group and narration present');
|
|
147
|
+
assert.ok(iGroup < iNarration, 'the explored ×3 group commits ABOVE the conclusion narration (the screenshot fix)');
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
151
|
+
// (b) SILENT multi-read run — only empty interim narration. Still ONE summary.
|
|
152
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
153
|
+
test('(b) silent multi-read run (empty interim narration) still collapses to ONE explored ×N', async () => {
|
|
154
|
+
const h = harness();
|
|
155
|
+
h.setScenario(async (cb) => {
|
|
156
|
+
cb.onAssistantMessage('');
|
|
157
|
+
fileOp(cb, 'read', '/a.js');
|
|
158
|
+
fileOp(cb, 'read', '/b.js');
|
|
159
|
+
cb.onAssistantMessage('', { terminal: false }); // silent intermediate — must NOT flush
|
|
160
|
+
fileOp(cb, 'read', '/c.js');
|
|
161
|
+
fileOp(cb, 'read', '/d.js');
|
|
162
|
+
cb.onAssistantMessage('', { terminal: false }); // silent intermediate — must NOT flush
|
|
163
|
+
fileOp(cb, 'read', '/e.js');
|
|
164
|
+
cb.onAssistantMessage('Read everything.', { terminal: true });
|
|
165
|
+
});
|
|
166
|
+
await h.handler('silent multi-read run');
|
|
167
|
+
|
|
168
|
+
const s = fileSummaries(h.events);
|
|
169
|
+
assert.strictEqual(s.length, 1, 'a silent run collapses to exactly ONE summary across all iterations');
|
|
170
|
+
assert.match(s[0].line, /explored ×5/, 'all five reads counted — empty interim narration did not split the group');
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
174
|
+
// (c) CHATTY run — content-bearing narration between reads. Fragments into
|
|
175
|
+
// correctly-ordered sub-groups, each ABOVE its narration.
|
|
176
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
177
|
+
test('(c) chatty run: content-bearing interim narration fragments into correctly-ordered sub-groups', async () => {
|
|
178
|
+
const h = harness();
|
|
179
|
+
h.setScenario(async (cb) => {
|
|
180
|
+
cb.onAssistantMessage('');
|
|
181
|
+
fileOp(cb, 'read', '/a.js');
|
|
182
|
+
fileOp(cb, 'read', '/b.js');
|
|
183
|
+
fileOp(cb, 'read', '/c.js');
|
|
184
|
+
cb.onAssistantMessage('First batch looks fine.', { terminal: false }); // flushes group #1
|
|
185
|
+
fileOp(cb, 'read', '/d.js');
|
|
186
|
+
fileOp(cb, 'read', '/e.js');
|
|
187
|
+
fileOp(cb, 'read', '/f.js');
|
|
188
|
+
cb.onAssistantMessage('Second batch too.', { terminal: true }); // flushes group #2
|
|
189
|
+
});
|
|
190
|
+
await h.handler('chatty multi-read run');
|
|
191
|
+
|
|
192
|
+
const s = fileSummaries(h.events);
|
|
193
|
+
assert.strictEqual(s.length, 2, 'two content-bearing narrations → two fragments');
|
|
194
|
+
assert.ok(s.every((e) => /explored ×3/.test(e.line)), 'each fragment is explored ×3');
|
|
195
|
+
|
|
196
|
+
const iSum1 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
|
|
197
|
+
const iNarr1 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'First batch looks fine.');
|
|
198
|
+
const iSum2 = h.events.findIndex((e, idx) => idx > iSum1 && e.kind === 'commit' && /explored ×3/.test(e.line));
|
|
199
|
+
const iNarr2 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'Second batch too.');
|
|
200
|
+
assert.ok(iSum1 < iNarr1, 'fragment #1 above its narration');
|
|
201
|
+
assert.ok(iNarr1 < iSum2, 'fragment #2 starts after narration #1');
|
|
202
|
+
assert.ok(iSum2 < iNarr2, 'fragment #2 above its narration');
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
206
|
+
// (d) STREAMED narration (token path) — the group must commit ABOVE the first
|
|
207
|
+
// streamed narration line. This is the load-bearing case: streamToken commits
|
|
208
|
+
// the header + line to scrollback BEFORE onAssistantMessage, so the flush must
|
|
209
|
+
// happen at streaming-START (onToken), not at onAssistantMessage.
|
|
210
|
+
// showThink:true disables the implicit-think buffer so onToken streams live.
|
|
211
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
212
|
+
test('(d) streamed content-bearing narration: group commits ABOVE the first narration line', async () => {
|
|
213
|
+
const h = harness({ showThink: true });
|
|
214
|
+
h.setScenario(async (cb) => {
|
|
215
|
+
fileOp(cb, 'list_dir', '/proj');
|
|
216
|
+
fileOp(cb, 'list_dir', '/proj/src');
|
|
217
|
+
fileOp(cb, 'list_dir', '/proj/test');
|
|
218
|
+
// Narration STREAMS in token-by-token (what really happens). The very first
|
|
219
|
+
// content token must flush the open group first.
|
|
220
|
+
for (const tok of ['The ', 'directory ', 'is ', 'almost ', 'empty.', '\n']) cb.onToken(tok);
|
|
221
|
+
cb.onAssistantMessage('The directory is almost empty.', { terminal: true });
|
|
222
|
+
});
|
|
223
|
+
await h.handler('stream a conclusion after listing');
|
|
224
|
+
|
|
225
|
+
const s = fileSummaries(h.events);
|
|
226
|
+
assert.strictEqual(s.length, 1, 'one summary');
|
|
227
|
+
assert.match(s[0].line, /explored ×3/);
|
|
228
|
+
|
|
229
|
+
const iGroup = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
|
|
230
|
+
const iFirstNarration = h.events.findIndex((e) => e.kind === 'narration');
|
|
231
|
+
assert.ok(iGroup >= 0 && iFirstNarration >= 0, 'group and streamed narration both present');
|
|
232
|
+
assert.ok(iGroup < iFirstNarration, 'the group commits ABOVE the FIRST streamed narration line (streaming-start flush)');
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
236
|
+
// (e) DOUBLE-FLUSH idempotency — the streaming-start flush, then the
|
|
237
|
+
// onAssistantMessage flush, then the turn-end finally flush all call flush();
|
|
238
|
+
// the group commits EXACTLY once.
|
|
239
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
240
|
+
test('(e) double-flush idempotency: streaming-start + onAssistantMessage + finally → exactly one commit', async () => {
|
|
241
|
+
const h = harness({ showThink: true });
|
|
242
|
+
h.setScenario(async (cb) => {
|
|
243
|
+
fileOp(cb, 'read', '/a.js');
|
|
244
|
+
fileOp(cb, 'read', '/b.js');
|
|
245
|
+
fileOp(cb, 'read', '/c.js');
|
|
246
|
+
for (const tok of ['All ', 'good.', '\n']) cb.onToken(tok); // flush #1 (streaming-start)
|
|
247
|
+
cb.onAssistantMessage('All good.', { terminal: true }); // flush #2 (no-op) + finally flush (no-op)
|
|
248
|
+
});
|
|
249
|
+
await h.handler('one group, many flush opportunities');
|
|
250
|
+
|
|
251
|
+
const s = fileSummaries(h.events);
|
|
252
|
+
assert.strictEqual(s.length, 1, 'the group committed EXACTLY once despite multiple flush() calls');
|
|
253
|
+
assert.match(s[0].line, /explored ×3/);
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
257
|
+
// (f) WEB group parity — the open web group also flushes before content-bearing
|
|
258
|
+
// narration, symmetrically with the file group.
|
|
259
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
260
|
+
test('(f) web group flushes before content-bearing narration (symmetry with the file group)', async () => {
|
|
261
|
+
const h = harness();
|
|
262
|
+
h.setScenario(async (cb) => {
|
|
263
|
+
cb.onAssistantMessage('');
|
|
264
|
+
webOp(cb, 'https://a.example');
|
|
265
|
+
webOp(cb, 'https://b.example');
|
|
266
|
+
// Content-bearing intermediate narration must flush the web group above it.
|
|
267
|
+
cb.onAssistantMessage('Both pages confirm the API shape.', { terminal: false });
|
|
268
|
+
cb.onToolStart('write_file', '/notes.md', { id: 'w1', attrs: { path: '/notes.md' } });
|
|
269
|
+
cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/notes.md' }, meta: { bytes: 3 }, error: null });
|
|
270
|
+
cb.onAssistantMessage('Done.', { terminal: true });
|
|
271
|
+
});
|
|
272
|
+
await h.handler('fetch two pages, conclude, then write');
|
|
273
|
+
|
|
274
|
+
const w = webSummaries(h.events);
|
|
275
|
+
assert.strictEqual(w.length, 1, 'the two fetches collapse to one web summary');
|
|
276
|
+
const iWeb = h.events.findIndex((e) => e.kind === 'commit' && / web /.test(e.line) && /source/.test(e.line));
|
|
277
|
+
const iNarration = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'Both pages confirm the API shape.');
|
|
278
|
+
assert.ok(iWeb >= 0 && iNarration >= 0, 'web summary and narration present');
|
|
279
|
+
assert.ok(iWeb < iNarration, 'the web summary commits ABOVE the content-bearing narration');
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
283
|
+
// (g) WHITESPACE-only interim narration does NOT flush (silent collapse preserved
|
|
284
|
+
// even when pure streaming artifacts arrive between reads).
|
|
285
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
286
|
+
test('(g) whitespace-only interim narration does NOT flush — silent collapse preserved', async () => {
|
|
287
|
+
const h = harness({ showThink: true });
|
|
288
|
+
let commitsAfterWhitespace = -1;
|
|
289
|
+
h.setScenario(async (cb) => {
|
|
290
|
+
fileOp(cb, 'read', '/a.js');
|
|
291
|
+
fileOp(cb, 'read', '/b.js');
|
|
292
|
+
// Pure streaming artifacts between reads — whitespace tokens + a whitespace
|
|
293
|
+
// finalize. NONE of these may flush the open group (token.trim() is empty so
|
|
294
|
+
// the streaming-start gate is skipped; the onAssistantMessage gate sees no
|
|
295
|
+
// content). The group stays open and uncommitted.
|
|
296
|
+
cb.onToken(' ');
|
|
297
|
+
cb.onToken('\n');
|
|
298
|
+
cb.onAssistantMessage(' ', { terminal: false });
|
|
299
|
+
commitsAfterWhitespace = commits(h.events).length; // snapshot: must be 0 — nothing flushed
|
|
300
|
+
fileOp(cb, 'read', '/c.js');
|
|
301
|
+
cb.onAssistantMessage('Read all three.', { terminal: true }); // terminal → the single flush
|
|
302
|
+
});
|
|
303
|
+
await h.handler('whitespace artifacts between reads');
|
|
304
|
+
|
|
305
|
+
assert.strictEqual(commitsAfterWhitespace, 0, 'whitespace interim narration committed NOTHING — the group was not flushed');
|
|
306
|
+
const s = fileSummaries(h.events);
|
|
307
|
+
assert.strictEqual(s.length, 1, 'whitespace interim narration did NOT split the group');
|
|
308
|
+
assert.match(s[0].line, /explored ×3/, 'all three reads collapsed into ONE summary');
|
|
309
|
+
});
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Permission-prompt flush — open file/web activity groups must be committed to
|
|
4
|
+
// scrollback when a permission-gated (effectful) tool triggers a prompt, NOT
|
|
5
|
+
// left rendering LIVE in the writer's activity region beside the modal.
|
|
6
|
+
//
|
|
7
|
+
// Root cause this guards: the agent loop asks permission BEFORE onToolStart
|
|
8
|
+
// (agent.js), so onToolStart's "flush the other group before this non-groupable
|
|
9
|
+
// op" step is sequenced AFTER the modal and cannot fire while it is open. The fix
|
|
10
|
+
// adds an unconditional flush of both trackers at the TOP of the onPermissionAsk
|
|
11
|
+
// handler (chat-turn.js). This is safe because groupable tools (read_file /
|
|
12
|
+
// list_dir) are read-only with a NULL permission descriptor, so onPermissionAsk
|
|
13
|
+
// never fires for them — by the time it fires the prompting tool is non-groupable.
|
|
14
|
+
//
|
|
15
|
+
// Tests drive the REAL createTurnHandler callbacks (same harness shape as
|
|
16
|
+
// file-activity.test.js / web-activity-ordering.test.js), simulating the
|
|
17
|
+
// loop's onPermissionAsk → (grant ⇒ onToolStart/onToolEnd | deny ⇒ nothing)
|
|
18
|
+
// sequence by hand.
|
|
19
|
+
|
|
20
|
+
const { test } = require('node:test');
|
|
21
|
+
const assert = require('node:assert');
|
|
22
|
+
|
|
23
|
+
// Stable colour env for byte comparisons (node:test isolates each file's process).
|
|
24
|
+
process.stdout.isTTY = true;
|
|
25
|
+
delete process.env.NO_COLOR;
|
|
26
|
+
|
|
27
|
+
const { stripAnsi } = require('../lib/ui/utils');
|
|
28
|
+
const { createTurnHandler } = require('../lib/commands/chat-turn');
|
|
29
|
+
const { TOOL_REGISTRY } = require('../lib/tool_registry');
|
|
30
|
+
|
|
31
|
+
// ── Live harness: drive the real createTurnHandler callbacks ──────────────────
|
|
32
|
+
// Mirrors file-activity.test.js's harness. Records every committed line in one
|
|
33
|
+
// ordered log so we can assert flush ORDERING (group above the prompting tool).
|
|
34
|
+
function harness(opts) {
|
|
35
|
+
const events = [];
|
|
36
|
+
const writerModule = {
|
|
37
|
+
startActivity() {}, updateActivity() {},
|
|
38
|
+
endActivity(id, line) {
|
|
39
|
+
for (const raw of String(line == null ? '' : line).split('\n')) {
|
|
40
|
+
if (raw === '') continue;
|
|
41
|
+
events.push({ kind: 'commit', line: stripAnsi(raw) });
|
|
42
|
+
}
|
|
43
|
+
},
|
|
44
|
+
scrollback(line) { events.push({ kind: 'scrollback', line: stripAnsi(String(line)) }); },
|
|
45
|
+
};
|
|
46
|
+
const chatHistory = {
|
|
47
|
+
addMessage(m) { if (m && m.isError) events.push({ kind: 'error-body', output: m.output }); },
|
|
48
|
+
streamToken() {}, clearStreamingContent() {},
|
|
49
|
+
deferToolOutput() {}, commitDeferredDetail() {},
|
|
50
|
+
finalizeLastMessage(content) { if (content && content.trim()) events.push({ kind: 'answer', content }); },
|
|
51
|
+
};
|
|
52
|
+
const statusBar = { update() {}, onToken() {}, addPendingTokens() {}, updateMetrics() {}, setCost() {} };
|
|
53
|
+
const inputField = { on() {}, removeListener() {}, releaseNavigation() {}, setDisabled() {} };
|
|
54
|
+
|
|
55
|
+
let scenario = async () => {};
|
|
56
|
+
const runAgentLoop = async (messages, model, maxIter, limit, loopOpts) => {
|
|
57
|
+
await scenario(loopOpts.callbacks);
|
|
58
|
+
return { messages, metrics: { turns: [] }, withheldActions: [] };
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
const ctx = {
|
|
62
|
+
inputField, statusBar, chatHistory, writerModule, runAgentLoop,
|
|
63
|
+
getConfig: () => ({ auth_token: 'tok', max_iterations: 50, show_cost: false, system_prompt_mode: 'system_role' }),
|
|
64
|
+
approxTokens: () => 0,
|
|
65
|
+
resolveCommand: () => null,
|
|
66
|
+
opts: {},
|
|
67
|
+
TAG_REGISTRY: {},
|
|
68
|
+
collapseListMsg() {}, handlePendingSelection() {}, showPendingStep() {},
|
|
69
|
+
activateNavCapture() {}, finalizeListMsg() {},
|
|
70
|
+
createChatIfNeeded: async () => {}, saveTurnToDashboard: async () => {}, saveSession() {},
|
|
71
|
+
messages: [], currentModel: 'm', debugMode: (opts && opts.debugMode) || false, pendingImages: [],
|
|
72
|
+
chatSync: async () => '', resolvedSystemPrompt: '', resolvedTokenLimit: null, planMode: false,
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
const handler = createTurnHandler(ctx, {});
|
|
76
|
+
return { events, handler, setScenario: (fn) => { scenario = fn; } };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// One fully-successful groupable file op (read / list_dir).
|
|
80
|
+
function fileOp(cb, tag, path, bytes) {
|
|
81
|
+
cb.onToolStart(tag, path, { id: `${tag}-${path}`, attrs: { path } });
|
|
82
|
+
cb.onToolEnd(tag, 'contents', 5, { id: `${tag}-${path}`, attrs: { path }, meta: { bytes: bytes || 10 }, error: null });
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// One fully-successful web op (http_get) — leaves the web group OPEN (it only
|
|
86
|
+
// flushes on a non-web tool start, terminal narration, or turn end).
|
|
87
|
+
function webOp(cb, url) {
|
|
88
|
+
cb.onToolStart('http_get', url, { id: `g-${url}`, attrs: { url } });
|
|
89
|
+
cb.onToolEnd('http_get', {}, 120, { id: `g-${url}`, attrs: { url }, meta: { status_code: 200, bytes: 1000 }, error: null });
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const commits = (events) => events.filter((e) => e.kind === 'commit');
|
|
93
|
+
const fileSummaries = (events) => commits(events).filter((e) => /file .* explored ×\d+/.test(e.line));
|
|
94
|
+
const webSummaries = (events) => commits(events).filter((e) => / web /.test(e.line) && /source/.test(e.line));
|
|
95
|
+
|
|
96
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
97
|
+
// (a) 2-read group (below threshold) + permission-gated write_file → the group
|
|
98
|
+
// flushes as TWO individual lines at onPermissionAsk, BEFORE the prompt; no
|
|
99
|
+
// stale live group remains during the modal. (grant path)
|
|
100
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
101
|
+
test('(a) <3 file group flushes as individual lines at onPermissionAsk, above the prompting tool', async () => {
|
|
102
|
+
const h = harness();
|
|
103
|
+
let commitsAtAsk = -1;
|
|
104
|
+
h.setScenario(async (cb) => {
|
|
105
|
+
cb.onAssistantMessage('');
|
|
106
|
+
fileOp(cb, 'read', '/a.js');
|
|
107
|
+
fileOp(cb, 'read', '/b.js');
|
|
108
|
+
// Effectful tool triggers a permission prompt — fires BEFORE onToolStart.
|
|
109
|
+
cb.onPermissionAsk('write_file', '/out.js');
|
|
110
|
+
commitsAtAsk = commits(h.events).length; // snapshot at the ask
|
|
111
|
+
// Grant → the tool now starts and ends.
|
|
112
|
+
cb.onToolStart('write_file', '/out.js', { id: 'w1', attrs: { path: '/out.js' } });
|
|
113
|
+
cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/out.js' }, meta: { bytes: 3 }, error: null });
|
|
114
|
+
cb.onAssistantMessage('done');
|
|
115
|
+
});
|
|
116
|
+
await h.handler('two reads then a write');
|
|
117
|
+
|
|
118
|
+
// The two reads were committed at the moment the prompt opened — not stranded.
|
|
119
|
+
assert.strictEqual(commitsAtAsk, 2, 'both read lines committed AT onPermissionAsk, before the modal');
|
|
120
|
+
assert.strictEqual(fileSummaries(h.events).length, 0, 'a 2-op group stays individual lines (no summary)');
|
|
121
|
+
const reads = commits(h.events).filter((e) => /read \//.test(e.line));
|
|
122
|
+
assert.strictEqual(reads.length, 2, 'two individual read lines');
|
|
123
|
+
// Ordering: the read lines land ABOVE the write_file line.
|
|
124
|
+
const iLastRead = h.events.map((e) => e).reduce((acc, e, i) => (e.kind === 'commit' && /read \//.test(e.line) ? i : acc), -1);
|
|
125
|
+
const iWrite = h.events.findIndex((e) => e.kind === 'commit' && /out\.js/.test(e.line) && !/read/.test(e.line));
|
|
126
|
+
assert.ok(iLastRead >= 0 && iWrite >= 0 && iLastRead < iWrite, 'read group commits ABOVE the write_file row');
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
130
|
+
// (b) ≥3-read group + permission-gated write_file → the group flushes as ONE
|
|
131
|
+
// summary at onPermissionAsk, BEFORE the prompt, above the tool row.
|
|
132
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
133
|
+
test('(b) ≥3 file group flushes as a summary at onPermissionAsk, above the prompting tool', async () => {
|
|
134
|
+
const h = harness();
|
|
135
|
+
let summariesAtAsk = -1;
|
|
136
|
+
h.setScenario(async (cb) => {
|
|
137
|
+
cb.onAssistantMessage('');
|
|
138
|
+
for (let i = 0; i < 3; i++) fileOp(cb, 'read', `/r${i}.js`);
|
|
139
|
+
cb.onPermissionAsk('write_file', '/out.js');
|
|
140
|
+
summariesAtAsk = fileSummaries(h.events).length; // snapshot at the ask
|
|
141
|
+
cb.onToolStart('write_file', '/out.js', { id: 'w1', attrs: { path: '/out.js' } });
|
|
142
|
+
cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/out.js' }, meta: { bytes: 3 }, error: null });
|
|
143
|
+
cb.onAssistantMessage('done');
|
|
144
|
+
});
|
|
145
|
+
await h.handler('three reads then a write');
|
|
146
|
+
|
|
147
|
+
assert.strictEqual(summariesAtAsk, 1, 'the summary committed AT onPermissionAsk');
|
|
148
|
+
const s = fileSummaries(h.events);
|
|
149
|
+
assert.strictEqual(s.length, 1, 'exactly one summary overall');
|
|
150
|
+
assert.match(s[0].line, /explored ×3/, 'collapsed explored ×3 summary');
|
|
151
|
+
const iSummary = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
|
|
152
|
+
const iWrite = h.events.findIndex((e) => e.kind === 'commit' && /out\.js/.test(e.line) && !/read/.test(e.line));
|
|
153
|
+
assert.ok(iSummary >= 0 && iWrite >= 0 && iSummary < iWrite, 'summary lands ABOVE the write_file row');
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
157
|
+
// (c) open WEB group + permission-gated write_file → the web group flushes at
|
|
158
|
+
// onPermissionAsk (the IDENTICAL latent gap on the web tracker).
|
|
159
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
160
|
+
test('(c) open web group flushes at onPermissionAsk, above the prompting tool', async () => {
|
|
161
|
+
const h = harness();
|
|
162
|
+
let webAtAsk = -1;
|
|
163
|
+
h.setScenario(async (cb) => {
|
|
164
|
+
cb.onAssistantMessage('');
|
|
165
|
+
webOp(cb, 'https://x.example'); // web group left OPEN
|
|
166
|
+
cb.onPermissionAsk('write_file', '/out.js');
|
|
167
|
+
webAtAsk = webSummaries(h.events).length; // snapshot at the ask
|
|
168
|
+
cb.onToolStart('write_file', '/out.js', { id: 'w1', attrs: { path: '/out.js' } });
|
|
169
|
+
cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/out.js' }, meta: { bytes: 3 }, error: null });
|
|
170
|
+
cb.onAssistantMessage('done');
|
|
171
|
+
});
|
|
172
|
+
await h.handler('a fetch then a write');
|
|
173
|
+
|
|
174
|
+
assert.strictEqual(webAtAsk, 1, 'the web summary committed AT onPermissionAsk (latent web gap fixed)');
|
|
175
|
+
const w = webSummaries(h.events);
|
|
176
|
+
assert.strictEqual(w.length, 1, 'exactly one web summary');
|
|
177
|
+
const iWeb = h.events.findIndex((e) => e.kind === 'commit' && / web /.test(e.line) && /source/.test(e.line));
|
|
178
|
+
const iWrite = h.events.findIndex((e) => e.kind === 'commit' && /out\.js/.test(e.line) && !/web/.test(e.line));
|
|
179
|
+
assert.ok(iWeb >= 0 && iWrite >= 0 && iWeb < iWrite, 'web summary lands ABOVE the write_file row');
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
183
|
+
// (d) DENIAL path — onToolStart never runs. The group must still be flushed at
|
|
184
|
+
// onPermissionAsk, not stranded live until the turn-end finally.
|
|
185
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
186
|
+
test('(d) denial path: the group is flushed at onPermissionAsk, not stranded until the finally', async () => {
|
|
187
|
+
const h = harness();
|
|
188
|
+
let commitsAtAsk = -1;
|
|
189
|
+
h.setScenario(async (cb) => {
|
|
190
|
+
cb.onAssistantMessage('');
|
|
191
|
+
for (let i = 0; i < 3; i++) fileOp(cb, 'read', `/r${i}.js`);
|
|
192
|
+
cb.onPermissionAsk('write_file', '/out.js');
|
|
193
|
+
commitsAtAsk = fileSummaries(h.events).length; // snapshot at the ask
|
|
194
|
+
// DENY: agent.js breaks the loop — NO onToolStart, NO onToolEnd for the tool.
|
|
195
|
+
cb.onAssistantMessage('I was denied, stopping.');
|
|
196
|
+
});
|
|
197
|
+
await h.handler('three reads then a denied write');
|
|
198
|
+
|
|
199
|
+
assert.strictEqual(commitsAtAsk, 1, 'the read group was committed AT onPermissionAsk, before deny — not stranded');
|
|
200
|
+
// And there is exactly one summary in total (the finally flush is a no-op).
|
|
201
|
+
assert.strictEqual(fileSummaries(h.events).length, 1, 'still exactly one summary after the finally');
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
205
|
+
// (e) DOUBLE-FLUSH guard — onPermissionAsk flush, then the post-grant onToolStart
|
|
206
|
+
// flush, then the turn-end finally flush all call flush(); the group must
|
|
207
|
+
// commit EXACTLY ONCE (idempotent isOpen()/groupId===null guard).
|
|
208
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
209
|
+
test('(e) double-flush guard: onPermissionAsk + onToolStart + finally → exactly one commit', async () => {
|
|
210
|
+
const h = harness();
|
|
211
|
+
h.setScenario(async (cb) => {
|
|
212
|
+
cb.onAssistantMessage('');
|
|
213
|
+
for (let i = 0; i < 3; i++) fileOp(cb, 'read', `/r${i}.js`); // ≥3 → one summary line
|
|
214
|
+
cb.onPermissionAsk('write_file', '/out.js'); // flush #1 (commits)
|
|
215
|
+
cb.onToolStart('write_file', '/out.js', { id: 'w1', attrs: { path: '/out.js' } }); // flush #2 (no-op)
|
|
216
|
+
cb.onToolEnd('write_file', 'ok', 4, { id: 'w1', attrs: { path: '/out.js' }, meta: { bytes: 3 }, error: null });
|
|
217
|
+
cb.onAssistantMessage('done'); // finally flush (no-op)
|
|
218
|
+
});
|
|
219
|
+
await h.handler('idempotent double flush');
|
|
220
|
+
|
|
221
|
+
const s = fileSummaries(h.events);
|
|
222
|
+
assert.strictEqual(s.length, 1, 'the group committed EXACTLY once despite three flush() calls');
|
|
223
|
+
assert.match(s[0].line, /explored ×3/);
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
227
|
+
// (f) INTENTIONAL BEHAVIOR CHANGE (Option b — "fix: flush activity groups before
|
|
228
|
+
// content-bearing narration for correct ordering").
|
|
229
|
+
//
|
|
230
|
+
// PREVIOUSLY this test asserted that content-bearing INTERMEDIATE narration
|
|
231
|
+
// ("Reading a couple more.") did NOT split the group, collapsing all four
|
|
232
|
+
// reads into one "explored ×4" summary. That ordering was chronologically
|
|
233
|
+
// WRONG: the narration committed to scrollback ABOVE a still-open group, so
|
|
234
|
+
// the group's summary later landed BELOW the conclusion it was based on.
|
|
235
|
+
//
|
|
236
|
+
// NEW behavior: any content-bearing intermediate narration flushes the open
|
|
237
|
+
// group FIRST, so each sub-group commits ABOVE its narration. A chatty
|
|
238
|
+
// multi-read run therefore FRAGMENTS into correctly-ordered sub-groups
|
|
239
|
+
// ("explored ×3" / narration / "explored ×3") instead of one "explored ×6".
|
|
240
|
+
// This is the deliberate Option-(b) tradeoff — each fragment is chronologically
|
|
241
|
+
// truthful. (Silent runs with empty interim narration STILL fully collapse —
|
|
242
|
+
// see narration-ordering.test.js case (b)/(g).)
|
|
243
|
+
//
|
|
244
|
+
// Uses 3 reads per fragment so each crosses GROUP_THRESHOLD and emits a
|
|
245
|
+
// summary line (a <3 fragment would render individual per-op lines instead).
|
|
246
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
247
|
+
test('(f) content-bearing interim narration FRAGMENTS the read run into correctly-ordered sub-groups', async () => {
|
|
248
|
+
const h = harness();
|
|
249
|
+
h.setScenario(async (cb) => {
|
|
250
|
+
cb.onAssistantMessage(''); // empty pre-tool narration — must NOT flush
|
|
251
|
+
fileOp(cb, 'read', '/i1a.js');
|
|
252
|
+
fileOp(cb, 'read', '/i1b.js');
|
|
253
|
+
fileOp(cb, 'read', '/i1c.js');
|
|
254
|
+
cb.onAssistantMessage('Reading a couple more.', { terminal: false }); // content-bearing → FLUSHES group #1
|
|
255
|
+
fileOp(cb, 'read', '/i2a.js');
|
|
256
|
+
fileOp(cb, 'read', '/i2b.js');
|
|
257
|
+
fileOp(cb, 'read', '/i2c.js');
|
|
258
|
+
cb.onAssistantMessage('All read.', { terminal: true }); // terminal → flushes group #2
|
|
259
|
+
// onPermissionAsk is intentionally never called for this read-only run.
|
|
260
|
+
});
|
|
261
|
+
await h.handler('multi-iteration reads, content-bearing interim narration');
|
|
262
|
+
|
|
263
|
+
const s = fileSummaries(h.events);
|
|
264
|
+
assert.strictEqual(s.length, 2, 'content-bearing interim narration split the run into TWO summaries');
|
|
265
|
+
assert.match(s[0].line, /explored ×3/, 'first fragment: the three reads before the interim narration');
|
|
266
|
+
assert.match(s[1].line, /explored ×3/, 'second fragment: the three reads after it');
|
|
267
|
+
|
|
268
|
+
// Ordering: each summary lands ABOVE its narration (the Option-(b) guarantee).
|
|
269
|
+
const iSum1 = h.events.findIndex((e) => e.kind === 'commit' && /explored ×3/.test(e.line));
|
|
270
|
+
const iNarr1 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'Reading a couple more.');
|
|
271
|
+
const iSum2 = h.events.findIndex((e, idx) => idx > iSum1 && e.kind === 'commit' && /explored ×3/.test(e.line));
|
|
272
|
+
const iNarr2 = h.events.findIndex((e) => e.kind === 'answer' && e.content === 'All read.');
|
|
273
|
+
assert.ok(iSum1 >= 0 && iNarr1 > iSum1, 'group #1 commits ABOVE the interim narration');
|
|
274
|
+
assert.ok(iSum2 > iNarr1 && iNarr2 > iSum2, 'group #2 commits below the interim narration and ABOVE the terminal answer');
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
278
|
+
// (g) read_file / list_dir have NULL permission descriptors → onPermissionAsk is
|
|
279
|
+
// never invoked for them, so the unconditional flush can never wrongly break
|
|
280
|
+
// an in-progress read/list group. (Groupable ⇒ null descriptor invariant.)
|
|
281
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
282
|
+
test('(g) read_file and list_dir have null permission descriptors (groupable ⇒ never reaches onPermissionAsk)', async () => {
|
|
283
|
+
const byTag = (t) => TOOL_REGISTRY.find((e) => Array.isArray(e.tags) && e.tags.includes(t));
|
|
284
|
+
const readEntry = byTag('read_file');
|
|
285
|
+
const listEntry = byTag('list_dir');
|
|
286
|
+
const writeEntry = byTag('write_file');
|
|
287
|
+
|
|
288
|
+
assert.ok(readEntry && typeof readEntry.permission === 'function', 'read_file entry present with a permission fn');
|
|
289
|
+
assert.ok(listEntry && typeof listEntry.permission === 'function', 'list_dir entry present with a permission fn');
|
|
290
|
+
assert.ok(writeEntry && typeof writeEntry.permission === 'function', 'write_file entry present with a permission fn');
|
|
291
|
+
|
|
292
|
+
// Groupable read-only tools: null descriptor → the loop's askGate is false →
|
|
293
|
+
// onPermissionAsk is NOT invoked for them.
|
|
294
|
+
assert.strictEqual(readEntry.permission({}, ['/a.js']), null, 'read_file descriptor is null');
|
|
295
|
+
assert.strictEqual(listEntry.permission({}, ['/d']), null, 'list_dir descriptor is null');
|
|
296
|
+
|
|
297
|
+
// Contrast: write_file (the prompting tool above) returns a NON-null descriptor.
|
|
298
|
+
// (_uiActive:true skips the headless diff branch, which would touch ctx.writer.)
|
|
299
|
+
const writeDesc = await writeEntry.permission({ _uiActive: true }, ['/out.js', 'x']);
|
|
300
|
+
assert.ok(writeDesc && typeof writeDesc === 'object' && writeDesc.tag === 'write_file',
|
|
301
|
+
'write_file returns a non-null permission descriptor');
|
|
302
|
+
});
|