@semalt-ai/code 1.19.0 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/.claude/settings.local.json +2 -1
  2. package/ARCHITECTURE.md +6 -95
  3. package/CLAUDE.md +196 -1874
  4. package/README.md +1 -1
  5. package/docs/ARCHITECTURE.md +1321 -0
  6. package/docs/CONFIG.md +340 -0
  7. package/docs/HISTORY.md +245 -0
  8. package/index.js +1 -1
  9. package/lib/agent.js +145 -16
  10. package/lib/api.js +28 -3
  11. package/lib/commands/chat-session.js +187 -4
  12. package/lib/commands/chat-slash.js +16 -0
  13. package/lib/commands/chat-turn.js +272 -49
  14. package/lib/commands/chat.js +12 -8
  15. package/lib/config.js +27 -0
  16. package/lib/constants.js +30 -1
  17. package/lib/headless.js +36 -1
  18. package/lib/images.js +8 -2
  19. package/lib/permissions.js +23 -16
  20. package/lib/prompts.js +15 -3
  21. package/lib/tool_registry.js +357 -53
  22. package/lib/tool_specs.js +42 -8
  23. package/lib/tools.js +80 -19
  24. package/lib/ui/anim.js +86 -0
  25. package/lib/ui/ansi.js +17 -27
  26. package/lib/ui/chat-history.js +253 -71
  27. package/lib/ui/create-ui.js +67 -24
  28. package/lib/ui/diff.js +90 -25
  29. package/lib/ui/file-activity.js +236 -0
  30. package/lib/ui/format.js +173 -28
  31. package/lib/ui/input-field.js +5 -4
  32. package/lib/ui/md-stream.js +234 -0
  33. package/lib/ui/render-operation.js +113 -0
  34. package/lib/ui/select.js +1 -4
  35. package/lib/ui/status-bar.js +99 -57
  36. package/lib/ui/stream.js +20 -13
  37. package/lib/ui/theme.js +190 -45
  38. package/lib/ui/tool-operation.js +190 -0
  39. package/lib/ui/utils.js +9 -5
  40. package/lib/ui/web-activity.js +58 -6
  41. package/lib/ui/writer.js +159 -45
  42. package/lib/ui.js +1 -1
  43. package/package.json +1 -1
  44. package/test/anim-driver.test.js +153 -0
  45. package/test/ask-user-display.test.js +226 -0
  46. package/test/ask-user-gate.test.js +231 -0
  47. package/test/chat-history-nocolor.test.js +155 -0
  48. package/test/chat-relogin.test.js +207 -0
  49. package/test/defer-detail-band.test.js +403 -0
  50. package/test/detail-band-tab-flatten.test.js +242 -0
  51. package/test/exec-diff.test.js +268 -0
  52. package/test/executors.test.js +250 -13
  53. package/test/extract-tool-calls.test.js +37 -3
  54. package/test/file-activity.test.js +522 -0
  55. package/test/grep-path-target.test.js +227 -0
  56. package/test/harness/chat-harness.js +2 -1
  57. package/test/headless.test.js +146 -1
  58. package/test/input-field-ctrl-o.test.js +37 -0
  59. package/test/live-height-physical.test.js +281 -0
  60. package/test/max-iterations.test.js +9 -7
  61. package/test/md-stream.test.js +183 -0
  62. package/test/native-dispatch.test.js +53 -0
  63. package/test/native-live-narration.test.js +254 -0
  64. package/test/output-heredoc-leak.test.js +195 -0
  65. package/test/output-preview.test.js +245 -0
  66. package/test/permissions.test.js +199 -0
  67. package/test/read-paginate.test.js +1 -1
  68. package/test/render-operation.test.js +317 -0
  69. package/test/replay-descriptor-xml.test.js +216 -0
  70. package/test/replay-descriptor.test.js +189 -0
  71. package/test/replay-web-aggregate.test.js +291 -0
  72. package/test/replay-web-persist.test.js +241 -0
  73. package/test/running-glyph-anim.test.js +111 -0
  74. package/test/status-bar-driver.test.js +93 -0
  75. package/test/status-bar-resync.test.js +188 -0
  76. package/test/stream-parser.test.js +24 -0
  77. package/test/theme-palette.test.js +166 -0
  78. package/test/truncate-visible.test.js +78 -0
  79. package/test/view-image.test.js +199 -0
  80. package/test/web-activity-ordering.test.js +12 -3
  81. package/path +0 -1
@@ -0,0 +1,189 @@
1
+ 'use strict';
2
+
3
+ // Output Refactor — Phase 6a: native-rail replay parity via a persisted descriptor.
4
+ //
5
+ // Replay (/history, /chats, --resume) historically rendered tool lines through
6
+ // the lossy summarizeToolResult one-liner (no diff, no real duration, no real
7
+ // status). Phase 6a persists the terminal-state descriptor core as a sibling
8
+ // `_display` key on the native `{role:'tool'}` result message and replays it
9
+ // through the SAME renderOperation a fresh turn uses. These tests pin:
10
+ // 1. POSITIVE — a saved native tool message with `_display` replays byte-identical
11
+ // to a fresh renderOperation of the same descriptor (result line + diff body),
12
+ // across ok / error / diff cases.
13
+ // 2. NEGATIVE — a legacy message (no `_display`) and an unknown-version `_display`
14
+ // both fall back to summarizeToolResult, unchanged from today, no crash.
15
+ // 3. INV.1 — persisting `_display` leaves the model-facing `content` byte-identical
16
+ // and never feeds `_display` to the bound (model-facing) tool output.
17
+ // 4. ANTI-PING-PONG — the Phase 1 fresh-render characterization bytes are unchanged.
18
+
19
+ const { test } = require('node:test');
20
+ const assert = require('node:assert');
21
+
22
+ // Phase 2.5: colour is gated on `isTTY && !NO_COLOR`. Force a colour-capable env
23
+ // so the byte comparisons below are stable regardless of how the runner wires
24
+ // stdout (node:test runs each file in its own process — no leak to other suites).
25
+ process.stdout.isTTY = true;
26
+ delete process.env.NO_COLOR;
27
+
28
+ const { buildToolOperation, serializeOperation, descriptorFromStored } = require('../lib/ui/tool-operation');
29
+ const { renderOperation } = require('../lib/ui/render-operation');
30
+ const { ChatHistory } = require('../lib/ui/chat-history');
31
+ const { summarizeToolResult } = require('../lib/ui/format');
32
+
33
+ const stripAnsi = (s) => String(s).replace(/\x1b\[[0-9;]*m/g, '');
34
+
35
+ // Mirror output-preview.test.js: intercept _commit to capture exactly what
36
+ // addMessage commits to scrollback.
37
+ function capture(ch) {
38
+ const out = [];
39
+ ch._commit = (t) => out.push(t);
40
+ return out;
41
+ }
42
+
43
+ // Representative COMPLETED operations spanning the fidelity dimensions that
44
+ // summarizeToolResult drops: real duration, real exit/status meta, an error
45
+ // state, and a file-edit diff body.
46
+ const SHELL_OK = buildToolOperation({
47
+ id: 'tool-1', tag: 'shell', arg: 'npm test', attrs: { command: 'npm test' },
48
+ status: 'ok', durationMs: 2300, meta: { exit_code: 0 },
49
+ });
50
+ const SHELL_ERR = buildToolOperation({
51
+ id: 'tool-2', tag: 'shell', arg: 'npm test', attrs: { command: 'npm test' },
52
+ status: 'error', durationMs: 800, meta: { exit_code: 1 }, error: { message: 'exit 1', code: 1 },
53
+ });
54
+ const EDIT_DIFF = buildToolOperation({
55
+ id: 'tool-3', tag: 'edit_file', arg: 'lib/x.js', attrs: { path: 'lib/x.js' },
56
+ status: 'ok', durationMs: 12, diff: { before: 'a\nb\nc\n', after: 'a\nB\nc\n', path: 'lib/x.js' },
57
+ });
58
+
59
+ // ───────────────────────────────────────────────────────────────────────────
60
+ // §3.1 backbone — the round-trip is the spine of test 1: a stored core rebuilds
61
+ // a descriptor that renders byte-identical to the live one, for result + detail.
62
+ // ───────────────────────────────────────────────────────────────────────────
63
+ for (const [name, op] of [['shell ok', SHELL_OK], ['shell error', SHELL_ERR], ['edit diff', EDIT_DIFF]]) {
64
+ test(`round-trip: descriptorFromStored(serializeOperation(op)) renders == op — '${name}' (guards replay parity backbone)`, () => {
65
+ const restored = descriptorFromStored(serializeOperation(op));
66
+ assert.ok(restored, 'a v:1 core rebuilds a descriptor');
67
+ // result line (the default phase) — duration + meta + status preserved.
68
+ assert.strictEqual(
69
+ renderOperation(restored, { mode: 'ansi' }),
70
+ renderOperation(op, { mode: 'ansi' }),
71
+ 'result line byte-identical after round-trip',
72
+ );
73
+ // detail body — the diff renders identically (empty for non-diff ops).
74
+ assert.strictEqual(
75
+ renderOperation(restored, { mode: 'ansi', phase: 'detail', maxLines: 50 }),
76
+ renderOperation(op, { mode: 'ansi', phase: 'detail', maxLines: 50 }),
77
+ 'detail (diff) body byte-identical after round-trip',
78
+ );
79
+ });
80
+ }
81
+
82
+ test("round-trip: a real diff actually renders a non-empty body (the parity isn't vacuous)", () => {
83
+ const restored = descriptorFromStored(serializeOperation(EDIT_DIFF));
84
+ const body = renderOperation(restored, { mode: 'ansi', phase: 'detail', maxLines: 50 });
85
+ assert.ok(body.length > 0, 'the edit diff body is present, not empty');
86
+ });
87
+
88
+ // ───────────────────────────────────────────────────────────────────────────
89
+ // 1. POSITIVE — native replay parity. A saved {role:'tool'} message carrying
90
+ // `_display` replays through addMessage byte-identical to a fresh render.
91
+ // ───────────────────────────────────────────────────────────────────────────
92
+ test('replay: native tool message with _display renders the result line == fresh renderOperation (ok, with duration/exit) (guards positive parity)', () => {
93
+ const ch = new ChatHistory();
94
+ const out = capture(ch);
95
+ ch.addMessage({ role: 'tool', tag: 'tool', content: 'Command `npm test`:\nExit code: 0\nok', _display: serializeOperation(SHELL_OK) });
96
+ const committed = out.join('');
97
+ const expected = renderOperation(descriptorFromStored(serializeOperation(SHELL_OK)), { mode: 'ansi' }) + '\n';
98
+ // The first committed line is the descriptor result line, verbatim (duration
99
+ // "2.3s" and "exit 0" present — both dropped by summarizeToolResult).
100
+ assert.ok(committed.startsWith(expected), 'replayed result line is byte-identical to a fresh render');
101
+ assert.match(stripAnsi(committed), /2\.3s/, 'real duration shown (not dropped)');
102
+ assert.match(stripAnsi(committed), /exit 0/, 'real exit meta shown');
103
+ });
104
+
105
+ test('replay: native tool message with _display shows the diff body and error styling (guards full fidelity)', () => {
106
+ // Error op: status must survive as an error glyph, not a generic ✓.
107
+ const chErr = new ChatHistory();
108
+ const outErr = capture(chErr);
109
+ chErr.addMessage({ role: 'tool', tag: 'tool', content: 'Command `npm test`:\nExit code: 1\nFAIL', _display: serializeOperation(SHELL_ERR) });
110
+ const errLine = renderOperation(descriptorFromStored(serializeOperation(SHELL_ERR)), { mode: 'ansi' }) + '\n';
111
+ assert.ok(outErr.join('').startsWith(errLine), 'error op replays byte-identical (error-styled)');
112
+ assert.match(stripAnsi(outErr.join('')), /exit 1/, 'error meta preserved');
113
+
114
+ // Diff op: the diff body must be replayed, not just the one-line header.
115
+ const chDiff = new ChatHistory();
116
+ const outDiff = capture(chDiff);
117
+ chDiff.addMessage({ role: 'tool', tag: 'tool', content: 'Updated lib/x.js', _display: serializeOperation(EDIT_DIFF), diffMaxLines: 50 });
118
+ const committed = outDiff.join('');
119
+ const resultLine = renderOperation(descriptorFromStored(serializeOperation(EDIT_DIFF)), { mode: 'ansi' });
120
+ const diffBody = renderOperation(descriptorFromStored(serializeOperation(EDIT_DIFF)), { mode: 'ansi', phase: 'detail', maxLines: 50 });
121
+ assert.ok(diffBody.length > 0, 'precondition: there is a diff body to show');
122
+ assert.strictEqual(committed, resultLine + '\n' + diffBody + '\n', 'replay commits result line + diff body, byte-identical to fresh');
123
+ });
124
+
125
+ // ───────────────────────────────────────────────────────────────────────────
126
+ // 2. NEGATIVE / fallback guard — legacy (no _display) and unknown-version
127
+ // _display both take the summarizeToolResult path, unchanged, no crash.
128
+ // ───────────────────────────────────────────────────────────────────────────
129
+ test('replay: a legacy tool message (no _display) falls back to summarizeToolResult, unchanged (guards fallback)', () => {
130
+ const content = 'Command `npm test`:\nExit code: 0\nall good';
131
+ // Snapshot of today's behavior: the legacy path collapses via summarizeToolResult.
132
+ const expectedSummary = summarizeToolResult(content);
133
+ const ch = new ChatHistory();
134
+ const out = capture(ch);
135
+ ch.addMessage({ role: 'tool', tag: 'tool', content });
136
+ const text = stripAnsi(out.join(''));
137
+ assert.match(text, new RegExp(expectedSummary.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')), 'legacy summary unchanged');
138
+ assert.doesNotMatch(text, /2\.3s/, 'legacy path carries no descriptor duration');
139
+ });
140
+
141
+ test('replay: an unknown-version _display (v:999) falls back, does not crash (guards version gate)', () => {
142
+ assert.strictEqual(descriptorFromStored({ v: 999, tag: 'shell', status: 'ok' }), null, 'unknown version → null descriptor');
143
+ const content = 'Command `npm test`:\nExit code: 0\nall good';
144
+ const ch = new ChatHistory();
145
+ const out = capture(ch);
146
+ // Must NOT throw, and must produce the same fallback as the legacy case.
147
+ ch.addMessage({ role: 'tool', tag: 'tool', content, _display: { v: 999, tag: 'shell', status: 'ok' } });
148
+ const text = stripAnsi(out.join(''));
149
+ assert.match(text, new RegExp(summarizeToolResult(content).replace(/[.*+?^${}()|[\]\\]/g, '\\$&')), 'unknown-version takes the same fallback path');
150
+ });
151
+
152
+ // ───────────────────────────────────────────────────────────────────────────
153
+ // 3. INV.1 — content byte-identical; _display is additive and never fed to the
154
+ // model-facing bound output. We model the agent.js persist step: the result
155
+ // string handed to boundToolOutput becomes the message `content`; `_display`
156
+ // is attached as a sibling and must not alter `content`.
157
+ // ───────────────────────────────────────────────────────────────────────────
158
+ test('inv.1: attaching _display leaves content byte-identical and is additive only (guards the model-facing chokepoint)', () => {
159
+ const resultStr = 'Command `npm test`:\nExit code: 0\nall good';
160
+ // The persist site (agent.js): content = the exact bound result string; the
161
+ // descriptor core rides alongside as a sibling key.
162
+ const msg = { role: 'tool', tool_call_id: 'call_1', content: resultStr, _display: serializeOperation(SHELL_OK) };
163
+ assert.strictEqual(msg.content, resultStr, 'content equals the original bound result string, byte-for-byte');
164
+ // The bound (model-facing) value is `content`; nothing from `_display` leaks in.
165
+ assert.ok(!msg.content.includes('2.3s'), 'descriptor duration is NOT in the model-facing content');
166
+ assert.ok(!msg.content.includes('_display'), 'no descriptor framing in content');
167
+ // _display carries the display-only core, separate from content.
168
+ assert.strictEqual(msg._display.v, 1);
169
+ assert.strictEqual(msg._display.durationMs, 2300);
170
+ });
171
+
172
+ // ───────────────────────────────────────────────────────────────────────────
173
+ // 4. ANTI-PING-PONG — the Phase 1 fresh-render characterization bytes are still
174
+ // byte-identical (threading the descriptor through persistence changed
175
+ // nothing the live path renders).
176
+ // ───────────────────────────────────────────────────────────────────────────
177
+ test('anti-ping-pong: Phase 1 fresh-render bytes unchanged (guards no live-path regression)', () => {
178
+ // Re-pin a representative subset of the Phase 1 PINNED bytes (see
179
+ // render-operation.test.js). If descriptor threading altered the live render,
180
+ // these break.
181
+ const shellOk = renderOperation(
182
+ buildToolOperation({ status: 'success', tag: 'shell', arg: 'npm install', attrs: { command: 'npm install' }, durationMs: 2300, meta: { exit_code: 0 } }),
183
+ { mode: 'ansi', phase: 'result' },
184
+ );
185
+ assert.strictEqual(
186
+ shellOk,
187
+ ' \x1b[38;5;40m✓\x1b[0m \x1b[38;5;214mshell\x1b[0m \x1b[2m·\x1b[0m \x1b[38;5;214mnpm install\x1b[0m \x1b[2m·\x1b[0m \x1b[38;5;244m2.3s\x1b[0m \x1b[2m·\x1b[0m \x1b[38;5;244mexit 0\x1b[0m',
188
+ );
189
+ });
@@ -0,0 +1,291 @@
1
+ 'use strict';
2
+
3
+ // Output Refactor — Phase 6c-ii: two-rail web replay AGGREGATION.
4
+ //
5
+ // This is the only web commit with a VISIBLE replay change: replayed web activity
6
+ // renders as the aggregated `✓ web · …` committed summary — byte-identical to the
7
+ // LIVE committed output — on the native rail, the XML rail, and mixed sessions,
8
+ // including multi-group interleaving and cross-iteration/cross-blob/cross-message
9
+ // COALESCING into a single summary.
10
+ //
11
+ // THE ORACLE is the live committed output, NOT a bare aggregateWebOps re-call:
12
+ // each test drives the real createWebActivityTracker (with a fake writerModule
13
+ // that records endActivity(id, line)) through start()/end(), triggering flush()
14
+ // exactly where the live runtime would — a non-web onToolStart (chat-turn.js:222),
15
+ // a terminal onAssistantMessage with content (chat-turn.js:389-391), and the
16
+ // turn-end `finally` (chat-turn.js:501). The recorded endActivity line(s), in
17
+ // order, are the oracle. The SAME op sequence's persisted messages are then run
18
+ // through chat-session.displayLoadedMessages against a fake writer, and the
19
+ // committed scrollback lines must be byte-identical AND in the same order.
20
+
21
+ const { test } = require('node:test');
22
+ const assert = require('node:assert');
23
+
24
+ // Stable colour env for byte comparisons (node:test isolates each file's process).
25
+ process.stdout.isTTY = true;
26
+ delete process.env.NO_COLOR;
27
+
28
+ const { buildToolOperation, serializeOperation } = require('../lib/ui/tool-operation');
29
+ const { renderOperation } = require('../lib/ui/render-operation');
30
+ const { ChatHistory } = require('../lib/ui/chat-history');
31
+ const { createChatSession } = require('../lib/commands/chat-session');
32
+ const {
33
+ serializeWebOp,
34
+ createWebActivityTracker,
35
+ } = require('../lib/ui/web-activity');
36
+
37
+ const stripAnsi = (s) => String(s).replace(/\x1b\[[0-9;]*m/g, '');
38
+ const CFG = { diff_max_lines: 50, shell_preview_lines: 5 };
39
+
40
+ // ── The live ORACLE driver ──────────────────────────────────────────────────
41
+ // `ops` is a list of step objects describing the live runtime, in order:
42
+ // { web: { tag, input, ctx, durationMs } } → tracker.start()/end()
43
+ // { flush: 'tool' | 'assistant' | 'finally' } → tracker.flush() (a live trigger)
44
+ // Returns the ordered list of committed summary LINES (each endActivity arg).
45
+ function liveOracleLines(steps) {
46
+ const lines = [];
47
+ const tracker = createWebActivityTracker({ writerModule: {
48
+ startActivity: () => {}, updateActivity: () => {},
49
+ endActivity: (_id, line) => lines.push(line),
50
+ } });
51
+ for (const s of steps) {
52
+ if (s.web) {
53
+ tracker.start(s.web.tag, s.web.input);
54
+ tracker.end(s.web.tag, 'done', s.web.durationMs, s.web.ctx);
55
+ } else if (s.flush && tracker.isOpen()) {
56
+ tracker.flush();
57
+ }
58
+ }
59
+ return lines;
60
+ }
61
+
62
+ // ── The REAL replay path, capturing every committed scrollback chunk in order ──
63
+ function replayCommits(loadedMessages, cfg) {
64
+ const ch = new ChatHistory();
65
+ const out = [];
66
+ ch._commit = (t) => out.push(t);
67
+ const session = createChatSession({ chatHistory: ch, getConfig: () => cfg || CFG });
68
+ session.displayLoadedMessages(loadedMessages);
69
+ return out;
70
+ }
71
+
72
+ // Faithful models of agent.js per-rail persistence (mirrors replay-web-persist):
73
+ // native attaches `_display` only when truthy; XML pushes a `displayCore || null`
74
+ // per slot into the {role:'user'} feedback blob's `_display[]`.
75
+ function nativeToolMsg(content, core) {
76
+ const msg = { role: 'tool', content };
77
+ if (core) msg._display = core;
78
+ return msg;
79
+ }
80
+ function assistantMsg(content) { return { role: 'assistant', content }; }
81
+ function xmlBlob(results, cores) {
82
+ const m = {
83
+ role: 'user',
84
+ content: `Tool execution results:\n\n${results.join('\n\n')}\n\nContinue with the task. If everything is done, summarize what was accomplished.`,
85
+ };
86
+ if (cores !== undefined) m._display = cores.map((c) => c || null);
87
+ return m;
88
+ }
89
+
90
+ // Op fixtures — ctx shape exactly as onToolEnd receives it ({ attrs, meta, error }).
91
+ const SEARCH = {
92
+ input: 'коррупционные скандалы 2024',
93
+ ctx: { attrs: { query: 'коррупционные скандалы 2024' }, meta: { status_code: 200, bytes: 4096 }, error: null },
94
+ };
95
+ const FETCH_OK = {
96
+ input: 'https://example.com/article',
97
+ ctx: { attrs: { url: 'https://example.com/article' }, meta: { status_code: 200, bytes: 2048 }, error: null },
98
+ };
99
+ const EDIT_DIFF = buildToolOperation({
100
+ id: 'tool-1', tag: 'edit_file', arg: 'lib/x.js', attrs: { path: 'lib/x.js' },
101
+ status: 'ok', durationMs: 12, diff: { before: 'a\nb\nc\n', after: 'a\nB\nc\n', path: 'lib/x.js' },
102
+ });
103
+
104
+ // The committed web summaries among a replay's scrollback chunks (the ones the
105
+ // web-activity summary produces — they carry the `web` category and no \n).
106
+ function webLines(commits) {
107
+ return commits.filter((c) => / web /.test(stripAnsi(c)) && /[✓●]/.test(stripAnsi(c)));
108
+ }
109
+
110
+ // ───────────────────────────────────────────────────────────────────────────
111
+ // 1. INTERLEAVING — web_search, edit_file, http_get → [web 1][edit][web 2], in
112
+ // order, each committed at its position (proves per-position commit; the
113
+ // non-web op breaks the run, so the two web ops do NOT coalesce).
114
+ // ───────────────────────────────────────────────────────────────────────────
115
+ test('interleaving: web · non-web · web commits two summaries straddling the edit line, byte-identical and in order', () => {
116
+ // Oracle: web_search opens; the edit's onToolStart flushes summary 1; http_get
117
+ // opens a new group; turn-end `finally` flushes summary 2.
118
+ const oracle = liveOracleLines([
119
+ { web: { tag: 'web_search', input: SEARCH.input, ctx: SEARCH.ctx, durationMs: 300 } },
120
+ { flush: 'tool' },
121
+ { web: { tag: 'http_get', input: FETCH_OK.input, ctx: FETCH_OK.ctx, durationMs: 120 } },
122
+ { flush: 'finally' },
123
+ ]);
124
+ assert.strictEqual(oracle.length, 2, 'live commits two separate web summaries');
125
+
126
+ // Native rail: tool(web) · tool(edit) · tool(web).
127
+ const commits = replayCommits([
128
+ nativeToolMsg('web search', serializeWebOp(SEARCH.ctx, 'web_search', 300)),
129
+ nativeToolMsg('edited', serializeOperation(EDIT_DIFF)),
130
+ nativeToolMsg('fetched', serializeWebOp(FETCH_OK.ctx, 'http_get', 120)),
131
+ ]);
132
+ const wl = webLines(commits);
133
+ assert.deepStrictEqual(wl, oracle, 'two web summaries replay byte-identical to the oracle, in order');
134
+ // Order proof: summary 1 lands BEFORE the edit line, summary 2 AFTER it.
135
+ const editLine = renderOperation(EDIT_DIFF, { mode: 'ansi', phase: 'result' });
136
+ const editIdx = commits.findIndex((c) => c.includes(editLine));
137
+ const s1 = commits.indexOf(oracle[0]);
138
+ const s2 = commits.lastIndexOf(oracle[1]);
139
+ assert.ok(s1 >= 0 && editIdx >= 0 && s2 >= 0, 'all three landmarks present');
140
+ assert.ok(s1 < editIdx && editIdx < s2, 'order is [web 1][edit][web 2]');
141
+ });
142
+
143
+ // ───────────────────────────────────────────────────────────────────────────
144
+ // 2. CROSS-ITERATION COALESCING (the key test) — web_search(iter1) → http_get
145
+ // (iter2) → terminal answer commits ONE summary, on BOTH rails. The ops live
146
+ // in separate {role:'user'} blobs (XML) / separate {role:'tool'} messages
147
+ // (native); the intermediate iteration must not flush.
148
+ // ───────────────────────────────────────────────────────────────────────────
149
+ test('cross-iteration coalescing — XML rail (two separate {role:user} blobs) → ONE summary, byte-identical', () => {
150
+ // Oracle: both web ops in one open group; only the terminal assistant flushes.
151
+ const oracle = liveOracleLines([
152
+ { web: { tag: 'web_search', input: SEARCH.input, ctx: SEARCH.ctx, durationMs: 300 } },
153
+ { web: { tag: 'http_get', input: FETCH_OK.input, ctx: FETCH_OK.ctx, durationMs: 120 } },
154
+ { flush: 'assistant' },
155
+ ]);
156
+ assert.strictEqual(oracle.length, 1, 'live commits exactly one coalesced summary');
157
+
158
+ const commits = replayCommits([
159
+ assistantMsg(''), // iter1 tool-call-only (empty)
160
+ xmlBlob(['search results…'], [serializeWebOp(SEARCH.ctx, 'web_search', 300)]),
161
+ assistantMsg(''), // iter2 tool-call-only (empty)
162
+ xmlBlob(['page body…'], [serializeWebOp(FETCH_OK.ctx, 'http_get', 120)]),
163
+ assistantMsg('Here is the answer.'), // terminal answer
164
+ ]);
165
+ assert.deepStrictEqual(webLines(commits), oracle, 'XML rail coalesces two cross-blob iterations into one summary');
166
+ });
167
+
168
+ test('cross-iteration coalescing — native rail (two separate {role:tool} messages) → ONE summary, byte-identical', () => {
169
+ const oracle = liveOracleLines([
170
+ { web: { tag: 'web_search', input: SEARCH.input, ctx: SEARCH.ctx, durationMs: 300 } },
171
+ { web: { tag: 'http_get', input: FETCH_OK.input, ctx: FETCH_OK.ctx, durationMs: 120 } },
172
+ { flush: 'assistant' },
173
+ ]);
174
+ const commits = replayCommits([
175
+ assistantMsg(''), // iter1 tool-call-only
176
+ nativeToolMsg('web search', serializeWebOp(SEARCH.ctx, 'web_search', 300)),
177
+ assistantMsg(''), // iter2 tool-call-only
178
+ nativeToolMsg('fetched', serializeWebOp(FETCH_OK.ctx, 'http_get', 120)),
179
+ assistantMsg('Here is the answer.'), // terminal answer
180
+ ]);
181
+ assert.deepStrictEqual(webLines(commits), oracle, 'native rail coalesces two cross-message iterations into one summary');
182
+ });
183
+
184
+ // ───────────────────────────────────────────────────────────────────────────
185
+ // 3. NO PREMATURE FLUSH ON INTERMEDIATE ASSISTANT — an empty-content assistant
186
+ // message sitting BETWEEN two web iterations must not split the group; still
187
+ // ONE summary. Guards the §1 flush condition (terminal-with-content only).
188
+ // ───────────────────────────────────────────────────────────────────────────
189
+ test('no premature flush: an empty-content assistant between two web iterations yields ONE summary', () => {
190
+ const oracle = liveOracleLines([
191
+ { web: { tag: 'web_search', input: SEARCH.input, ctx: SEARCH.ctx, durationMs: 300 } },
192
+ { web: { tag: 'http_get', input: FETCH_OK.input, ctx: FETCH_OK.ctx, durationMs: 120 } },
193
+ { flush: 'finally' },
194
+ ]);
195
+ const commits = replayCommits([
196
+ nativeToolMsg('web search', serializeWebOp(SEARCH.ctx, 'web_search', 300)),
197
+ assistantMsg(' '), // whitespace-only / empty display content → MUST NOT flush
198
+ nativeToolMsg('fetched', serializeWebOp(FETCH_OK.ctx, 'http_get', 120)),
199
+ ]);
200
+ assert.deepStrictEqual(webLines(commits), oracle, 'the intermediate empty assistant does not split the group');
201
+ // And it really is a single line, not two.
202
+ assert.strictEqual(webLines(commits).length, 1);
203
+ });
204
+
205
+ // ───────────────────────────────────────────────────────────────────────────
206
+ // 4. GATE FAIL-SAFE — an XML blob with a null/unknown slot still drops to the
207
+ // legacy whole-blob summary (after flushing any open run); non-null slots are
208
+ // NOT partially rendered. Preserves the 6b/6c-i invariant.
209
+ // ───────────────────────────────────────────────────────────────────────────
210
+ test('gate fail-safe: a null slot still drops the whole blob to the legacy summary after flushing the open run', () => {
211
+ const results = ['edited lib/x.js', 'web search results…'];
212
+ const blob = xmlBlob(results, [serializeOperation(EDIT_DIFF), null]); // one null slot → gate fails
213
+ // A web run is open from a PRIOR native web op; the failing blob must flush it
214
+ // first, then render the legacy whole-blob summary (no per-slot render).
215
+ const commits = replayCommits([
216
+ nativeToolMsg('web search', serializeWebOp(SEARCH.ctx, 'web_search', 300)),
217
+ blob,
218
+ ]);
219
+ // The open run flushed → exactly one web summary committed (the prior op).
220
+ assert.strictEqual(webLines(commits).length, 1, 'the open web run is flushed before the fallback');
221
+ // Whole-blob legacy summary present; the normal slot's diff body is NOT rendered.
222
+ const body = blob.content
223
+ .replace(/^Tool execution results[^\n]*\n+/, '')
224
+ .replace(/\n+Continue with the task\.[\s\S]*$/, '')
225
+ .trim();
226
+ const ch = new ChatHistory();
227
+ const legacy = [];
228
+ ch._commit = (t) => legacy.push(t);
229
+ ch.addMessage({ role: 'tool', tag: 'tool', content: body });
230
+ const joined = commits.join('');
231
+ assert.ok(joined.includes(legacy.join('')), 'the whole-blob legacy summary is rendered');
232
+ const diffBody = renderOperation(EDIT_DIFF, { mode: 'ansi', phase: 'detail', maxLines: 50 });
233
+ assert.ok(diffBody.length > 0 && !joined.includes(diffBody), 'the non-web slot is NOT partially rendered');
234
+ });
235
+
236
+ // ───────────────────────────────────────────────────────────────────────────
237
+ // 5. SCROLL-FIX SAFETY — the replay path NEVER instantiates the live web tracker
238
+ // and never calls a live-region method. Guards idle-pause/startup-resync/anim.
239
+ // ───────────────────────────────────────────────────────────────────────────
240
+ test('scroll-fix safety: replay never instantiates createWebActivityTracker nor calls a live-region method', () => {
241
+ const webMod = require('../lib/ui/web-activity');
242
+ const realTracker = webMod.createWebActivityTracker;
243
+ let trackerCalls = 0;
244
+ webMod.createWebActivityTracker = (...a) => { trackerCalls++; return realTracker(...a); };
245
+
246
+ // Spy the writer's ACTIVITY-REGION methods — the ones the live web tracker
247
+ // drives (startActivity/updateActivity/endActivity, plus cancelActivity). Replay
248
+ // must touch none of them. (redrawLive/setModal are general teardown — clearMessages
249
+ // calls redrawLive — and are NOT part of the web live region, so not spied.)
250
+ const writer = require('../lib/ui/writer');
251
+ const liveMethods = ['startActivity', 'updateActivity', 'endActivity', 'cancelActivity'];
252
+ const saved = {};
253
+ const hits = [];
254
+ for (const name of liveMethods) {
255
+ saved[name] = writer[name];
256
+ writer[name] = (...a) => { hits.push(name); if (saved[name]) return saved[name](...a); };
257
+ }
258
+ try {
259
+ const ch = new ChatHistory();
260
+ ch._commit = () => {}; // swallow scrollback; we only watch the live region
261
+ const session = createChatSession({ chatHistory: ch, getConfig: () => CFG });
262
+ session.displayLoadedMessages([
263
+ nativeToolMsg('web search', serializeWebOp(SEARCH.ctx, 'web_search', 300)),
264
+ assistantMsg(''),
265
+ nativeToolMsg('fetched', serializeWebOp(FETCH_OK.ctx, 'http_get', 120)),
266
+ assistantMsg('Here is the answer.'),
267
+ ]);
268
+ assert.strictEqual(trackerCalls, 0, 'replay does NOT instantiate the live web tracker');
269
+ assert.deepStrictEqual(hits, [], 'replay calls no live-region method');
270
+ } finally {
271
+ webMod.createWebActivityTracker = realTracker;
272
+ for (const name of liveMethods) writer[name] = saved[name];
273
+ }
274
+ });
275
+
276
+ // ───────────────────────────────────────────────────────────────────────────
277
+ // 6. ANTI-PING-PONG — fresh LIVE web display unchanged; the pure aggregator the
278
+ // replay path reuses is the SAME one the live tracker commits, so a single
279
+ // web op replays exactly as it renders live.
280
+ // ───────────────────────────────────────────────────────────────────────────
281
+ test('anti-ping-pong: a single web op replays byte-identical to its fresh LIVE committed summary', () => {
282
+ const oracle = liveOracleLines([
283
+ { web: { tag: 'web_search', input: SEARCH.input, ctx: SEARCH.ctx, durationMs: 300 } },
284
+ { flush: 'finally' },
285
+ ]);
286
+ const commits = replayCommits([nativeToolMsg('web search', serializeWebOp(SEARCH.ctx, 'web_search', 300))]);
287
+ assert.deepStrictEqual(webLines(commits), oracle, 'one web op replays identically to its live committed line');
288
+ // Live render is untouched by 6c-ii — the committed line still carries `web` + `search`.
289
+ assert.match(stripAnsi(oracle[0]), /web/);
290
+ assert.match(stripAnsi(oracle[0]), /search/);
291
+ });