@semalt-ai/code 1.19.0 → 1.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/.claude/settings.local.json +2 -1
  2. package/ARCHITECTURE.md +6 -95
  3. package/CLAUDE.md +196 -1874
  4. package/README.md +1 -1
  5. package/docs/ARCHITECTURE.md +1321 -0
  6. package/docs/CONFIG.md +340 -0
  7. package/docs/HISTORY.md +245 -0
  8. package/index.js +1 -1
  9. package/lib/agent.js +145 -16
  10. package/lib/api.js +28 -3
  11. package/lib/commands/chat-session.js +188 -4
  12. package/lib/commands/chat-slash.js +16 -0
  13. package/lib/commands/chat-turn.js +319 -52
  14. package/lib/commands/chat.js +12 -8
  15. package/lib/config.js +27 -0
  16. package/lib/constants.js +30 -1
  17. package/lib/headless.js +36 -1
  18. package/lib/images.js +8 -2
  19. package/lib/permissions.js +23 -16
  20. package/lib/prompts.js +15 -3
  21. package/lib/tool_registry.js +357 -53
  22. package/lib/tool_specs.js +42 -8
  23. package/lib/tools.js +80 -19
  24. package/lib/ui/anim.js +86 -0
  25. package/lib/ui/ansi.js +17 -27
  26. package/lib/ui/chat-history.js +253 -71
  27. package/lib/ui/create-ui.js +67 -24
  28. package/lib/ui/diff.js +90 -25
  29. package/lib/ui/file-activity.js +229 -0
  30. package/lib/ui/format.js +173 -28
  31. package/lib/ui/input-field.js +5 -4
  32. package/lib/ui/md-stream.js +234 -0
  33. package/lib/ui/render-operation.js +113 -0
  34. package/lib/ui/select.js +1 -4
  35. package/lib/ui/status-bar.js +99 -57
  36. package/lib/ui/stream.js +20 -13
  37. package/lib/ui/theme.js +190 -45
  38. package/lib/ui/tool-operation.js +190 -0
  39. package/lib/ui/utils.js +9 -5
  40. package/lib/ui/web-activity.js +58 -6
  41. package/lib/ui/writer.js +159 -45
  42. package/lib/ui.js +1 -1
  43. package/package.json +1 -1
  44. package/test/anim-driver.test.js +153 -0
  45. package/test/ask-user-display.test.js +226 -0
  46. package/test/ask-user-gate.test.js +231 -0
  47. package/test/chat-history-nocolor.test.js +155 -0
  48. package/test/chat-relogin.test.js +207 -0
  49. package/test/defer-detail-band.test.js +403 -0
  50. package/test/detail-band-tab-flatten.test.js +242 -0
  51. package/test/exec-diff.test.js +268 -0
  52. package/test/executors.test.js +250 -13
  53. package/test/extract-tool-calls.test.js +37 -3
  54. package/test/file-activity.test.js +542 -0
  55. package/test/grep-path-target.test.js +227 -0
  56. package/test/harness/chat-harness.js +2 -1
  57. package/test/headless.test.js +146 -1
  58. package/test/input-field-ctrl-o.test.js +37 -0
  59. package/test/live-height-physical.test.js +281 -0
  60. package/test/max-iterations.test.js +9 -7
  61. package/test/md-stream.test.js +183 -0
  62. package/test/narration-ordering.test.js +309 -0
  63. package/test/native-dispatch.test.js +53 -0
  64. package/test/native-live-narration.test.js +254 -0
  65. package/test/output-heredoc-leak.test.js +195 -0
  66. package/test/output-preview.test.js +245 -0
  67. package/test/permission-flush.test.js +302 -0
  68. package/test/permissions.test.js +199 -0
  69. package/test/read-paginate.test.js +1 -1
  70. package/test/render-operation.test.js +317 -0
  71. package/test/replay-descriptor-xml.test.js +216 -0
  72. package/test/replay-descriptor.test.js +189 -0
  73. package/test/replay-web-aggregate.test.js +291 -0
  74. package/test/replay-web-persist.test.js +241 -0
  75. package/test/running-glyph-anim.test.js +111 -0
  76. package/test/status-bar-driver.test.js +93 -0
  77. package/test/status-bar-resync.test.js +188 -0
  78. package/test/stream-parser.test.js +24 -0
  79. package/test/theme-palette.test.js +166 -0
  80. package/test/truncate-visible.test.js +78 -0
  81. package/test/view-image.test.js +199 -0
  82. package/test/web-activity-ordering.test.js +12 -3
  83. package/path +0 -1
@@ -0,0 +1,268 @@
1
+ 'use strict';
2
+
3
+ // Execution-time file-edit diffs.
4
+ //
5
+ // The fix moves file-edit diff rendering OUT of the permission modal and INTO
6
+ // the execution-result path, so the diff renders for EVERY edit regardless of
7
+ // approval state (manual-approved, auto-approved) or entry mode (fresh /
8
+ // --resume / /history / /chats). These tests pin the three layers of that fix:
9
+ //
10
+ // 1. lib/ui/diff.js renderDiff — the changed-line cap (head+tail + notice for
11
+ // a large edit; full render for a small edit or a series of small edits).
12
+ // 2. lib/ui/diff.js buildExecutionDiff — the once-per-edit decision: renders
13
+ // when a payload is present, returns null on error / no payload (a loaded
14
+ // history turn carries none, so past turns are NOT replayed) / no-op edit.
15
+ // 3. The mutating executors (write/append/edit_file/replace_in_file) attach
16
+ // the before/after payload UNCONDITIONALLY — not gated by the permission
17
+ // modal — which is what makes an auto-approved edit show its diff.
18
+ //
19
+ // Home-based paths are redirected into a temp dir BEFORE any lib module loads,
20
+ // matching test/readonly-tools.test.js.
21
+
22
+ const os = require('node:os');
23
+ const fs = require('node:fs');
24
+ const path = require('node:path');
25
+
26
+ const TMP_HOME = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-diff-home-'));
27
+ const PREV_HOME = process.env.HOME;
28
+ const PREV_USERPROFILE = process.env.USERPROFILE;
29
+ process.env.HOME = TMP_HOME;
30
+ process.env.USERPROFILE = TMP_HOME;
31
+
32
+ const { test, before, after } = require('node:test');
33
+ const assert = require('node:assert');
34
+
35
+ const ui = require('../lib/ui');
36
+ const { renderDiff, buildExecutionDiff } = require('../lib/ui/diff');
37
+ const { summarizeToolResult } = require('../lib/ui/format');
38
+ const { createPermissionManager } = require('../lib/permissions');
39
+ const { createToolExecutor } = require('../lib/tools');
40
+ const { DEFAULT_CONFIG } = require('../lib/constants');
41
+
42
+ // renderDiff styles with ANSI only when stdout.isTTY; under `node --test` it is
43
+ // false, so output is already plain. stripAnsi defends against a TTY harness.
44
+ const stripAnsi = (s) => String(s).replace(/\x1b\[[0-9;]*m/g, '');
45
+
46
+ function mkExec({ config = {}, pmOpts = {} } = {}) {
47
+ const pm = createPermissionManager(ui, pmOpts);
48
+ return createToolExecutor(pm, ui, () => ({
49
+ max_file_size_kb: 512,
50
+ command_timeout_ms: 30000,
51
+ max_output_lines: 50,
52
+ ...config,
53
+ }));
54
+ }
55
+
56
+ let CWD;
57
+ let PREV_CWD;
58
+
59
+ before(() => {
60
+ PREV_CWD = process.cwd();
61
+ CWD = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-diff-cwd-'));
62
+ process.chdir(CWD);
63
+ });
64
+
65
+ after(() => {
66
+ process.chdir(PREV_CWD);
67
+ if (PREV_HOME === undefined) delete process.env.HOME; else process.env.HOME = PREV_HOME;
68
+ if (PREV_USERPROFILE === undefined) delete process.env.USERPROFILE; else process.env.USERPROFILE = PREV_USERPROFILE;
69
+ });
70
+
71
+ const tmpFile = (name, content) => {
72
+ const p = path.join(CWD, name);
73
+ fs.writeFileSync(p, content);
74
+ return p;
75
+ };
76
+
77
+ // ── 1. renderDiff cap ───────────────────────────────────────────────────────
78
+
79
+ test('renderDiff: small edit renders fully with no truncation notice', () => {
80
+ const out = stripAnsi(renderDiff('a\nb\nc', 'a\nB\nc', 'f.txt', { maxLines: 50 }));
81
+ assert.match(out, /B/, 'the changed line is shown');
82
+ assert.doesNotMatch(out, /more changed lines/, 'no cap notice for a small edit');
83
+ });
84
+
85
+ test('renderDiff: edit exceeding diff_max_lines renders head+tail + notice', () => {
86
+ const before = Array.from({ length: 100 }, (_, i) => `old ${i}`).join('\n');
87
+ const after = Array.from({ length: 100 }, (_, i) => `new ${i}`).join('\n');
88
+ const out = stripAnsi(renderDiff(before, after, 'big.txt', { maxLines: 10 }));
89
+ // 100 removed + 100 added = 200 changed lines, capped at 10 (head 6 + tail 4).
90
+ assert.match(out, /…\s*190 more changed lines \(200 total\)/, 'notice reports elided + total changed lines');
91
+ // Head shows the first removed line; tail shows the last added line.
92
+ assert.match(out, /old 0/, 'head of the diff is present');
93
+ assert.match(out, /new 99/, 'tail of the diff is present');
94
+ // Bounded: head(6) + notice + tail(4) = 11 lines, far below the input.
95
+ assert.ok(out.split('\n').length <= 14, 'capped output is bounded');
96
+ });
97
+
98
+ test('renderDiff: a series of small edits each render fully (no spurious truncation)', () => {
99
+ for (let i = 0; i < 5; i++) {
100
+ const before = `line one\nline two\nline three`;
101
+ const after = `line one\nline two CHANGED ${i}\nline three`;
102
+ const out = stripAnsi(renderDiff(before, after, `s${i}.txt`, { maxLines: 50 }));
103
+ assert.match(out, new RegExp(`CHANGED ${i}`), `edit ${i} renders its change`);
104
+ assert.doesNotMatch(out, /more changed lines/, `edit ${i} is not truncated`);
105
+ }
106
+ });
107
+
108
+ test('renderDiff: a large NEW file is also capped on changed (added) lines', () => {
109
+ const after = Array.from({ length: 80 }, (_, i) => `gen ${i}`).join('\n');
110
+ const out = stripAnsi(renderDiff('', after, 'new.txt', { maxLines: 10 }));
111
+ assert.match(out, /more changed lines \(80 total\)/, 'new-file additions count as changed lines');
112
+ });
113
+
114
+ // ── 1b. D3 (Output Refactor Phase 2): the diff body no longer repeats the path ─
115
+ // The result line above the diff already states the file path (the descriptor's
116
+ // `target`). The diff body therefore drops its redundant path header — the path
117
+ // is stated once, on the result line. The @@ hunk ranges and the (new file)
118
+ // marker remain; only the bare path-header line is gone.
119
+
120
+ test('D3: an existing-file diff body does NOT restate the file path', () => {
121
+ const out = stripAnsi(renderDiff('a\nb\nc', 'a\nB\nc', 'unique/path/to/file.js', { maxLines: 50 }));
122
+ assert.doesNotMatch(out, /unique\/path\/to\/file\.js/, 'the path is not repeated in the diff body');
123
+ assert.match(out, /B/, 'the changed line is still shown');
124
+ assert.match(out, /@@/, 'the hunk range header remains');
125
+ });
126
+
127
+ test('D3: the capped diff body also omits the redundant path header', () => {
128
+ const before = Array.from({ length: 100 }, (_, i) => `old ${i}`).join('\n');
129
+ const after = Array.from({ length: 100 }, (_, i) => `new ${i}`).join('\n');
130
+ const out = stripAnsi(renderDiff(before, after, 'capped/unique-path.txt', { maxLines: 10 }));
131
+ assert.doesNotMatch(out, /capped\/unique-path\.txt/, 'no path header on the capped path');
132
+ // The cap notice + head + tail are unchanged.
133
+ assert.match(out, /…\s*190 more changed lines \(200 total\)/);
134
+ assert.match(out, /old 0/, 'head of the diff is present');
135
+ assert.match(out, /new 99/, 'tail of the diff is present');
136
+ });
137
+
138
+ test('D3: a new-file diff keeps its @@ (new file) marker but not a path header', () => {
139
+ const out = stripAnsi(renderDiff('', 'one\ntwo\nthree', 'brand/new-file.js', { maxLines: 50 }));
140
+ assert.doesNotMatch(out, /brand\/new-file\.js/, 'no path header for a new file');
141
+ assert.match(out, /\(new file\)/, 'the new-file hunk marker remains');
142
+ assert.match(out, /@@ -0,0 \+1,3 @@/, 'the new-file hunk range remains');
143
+ });
144
+
145
+ // ── 2. buildExecutionDiff (the once-per-edit decision) ───────────────────────
146
+
147
+ test('buildExecutionDiff: renders the diff when a payload is present', () => {
148
+ const out = buildExecutionDiff({ diff: { before: 'a\nb', after: 'a\nB', path: 'f.txt' }, maxLines: 50 });
149
+ assert.ok(out, 'a diff string is produced');
150
+ assert.match(stripAnsi(out), /B/);
151
+ });
152
+
153
+ test('buildExecutionDiff: returns null on a failed edit (no diff under an error)', () => {
154
+ const out = buildExecutionDiff({ diff: { before: 'a', after: 'b', path: 'f' }, error: { message: 'boom' }, maxLines: 50 });
155
+ assert.strictEqual(out, null);
156
+ });
157
+
158
+ test('buildExecutionDiff: returns null with no payload — loaded turns are not replayed', () => {
159
+ // A loaded-history tool result reaches the UI without a before/after payload
160
+ // (diffs are never persisted), so nothing replays for past turns.
161
+ assert.strictEqual(buildExecutionDiff({ diff: null, maxLines: 50 }), null);
162
+ assert.strictEqual(buildExecutionDiff({ maxLines: 50 }), null);
163
+ assert.strictEqual(buildExecutionDiff({ diff: { before: 'a' }, maxLines: 50 }), null);
164
+ });
165
+
166
+ test('buildExecutionDiff: returns null for a no-op edit (before === after)', () => {
167
+ assert.strictEqual(buildExecutionDiff({ diff: { before: 'x\ny', after: 'x\ny', path: 'f' }, maxLines: 50 }), null);
168
+ });
169
+
170
+ test('buildExecutionDiff: honors diff_max_lines for the cap', () => {
171
+ const before = Array.from({ length: 60 }, (_, i) => `o${i}`).join('\n');
172
+ const after = Array.from({ length: 60 }, (_, i) => `n${i}`).join('\n');
173
+ const out = stripAnsi(buildExecutionDiff({ diff: { before, after, path: 'p' }, maxLines: 8 }));
174
+ assert.match(out, /more changed lines \(120 total\)/);
175
+ });
176
+
177
+ // ── 3. Executors attach the before/after payload unconditionally ─────────────
178
+ // The payload is produced by the executor itself, NOT by the permission modal —
179
+ // so it is present whether the call was manual-approved, auto-approved, or run
180
+ // in any entry mode. (The agent loop forwards it to onToolEnd, which renders it
181
+ // exactly once — the modal no longer carries the full diff.)
182
+
183
+ test('write (new file): payload captures empty before → full content after', async () => {
184
+ const exec = mkExec();
185
+ const f = path.join(CWD, 'created.txt');
186
+ const r = await exec.agentExecFile('write', f, 'hello\nworld');
187
+ assert.strictEqual(r.status, 'ok');
188
+ assert.strictEqual(r._diffBefore, '');
189
+ assert.strictEqual(r._diffAfter, 'hello\nworld');
190
+ });
191
+
192
+ test('write (existing file): payload captures prior content as before', async () => {
193
+ const exec = mkExec();
194
+ const f = tmpFile('over.txt', 'old text');
195
+ const r = await exec.agentExecFile('write', f, 'new text');
196
+ assert.strictEqual(r._diffBefore, 'old text');
197
+ assert.strictEqual(r._diffAfter, 'new text');
198
+ });
199
+
200
+ test('append: payload after is prior + appended content', async () => {
201
+ const exec = mkExec();
202
+ const f = tmpFile('log.txt', 'line1\n');
203
+ const r = await exec.agentExecFile('append', f, 'line2\n');
204
+ assert.strictEqual(r._diffBefore, 'line1\n');
205
+ assert.strictEqual(r._diffAfter, 'line1\nline2\n');
206
+ });
207
+
208
+ test('edit_file (single line): payload reflects the one-line swap', async () => {
209
+ const exec = mkExec();
210
+ const f = tmpFile('e.txt', 'a\nb\nc');
211
+ const r = await exec.agentExecFile('edit_file', f, 2, 'B');
212
+ assert.strictEqual(r._diffBefore, 'a\nb\nc');
213
+ assert.strictEqual(r._diffAfter, 'a\nB\nc');
214
+ });
215
+
216
+ test('edit_file (line range): payload reflects the block replacement', async () => {
217
+ const exec = mkExec();
218
+ const f = tmpFile('r.txt', 'a\nb\nc\nd');
219
+ const r = await exec.agentExecFile('edit_file', f, 2, 'X\nY\nZ', 3);
220
+ assert.strictEqual(r._diffBefore, 'a\nb\nc\nd');
221
+ assert.strictEqual(r._diffAfter, 'a\nX\nY\nZ\nd');
222
+ });
223
+
224
+ test('replace_in_file: payload captures before/after of the replacement', async () => {
225
+ const exec = mkExec();
226
+ const f = tmpFile('rep.txt', 'foo bar foo');
227
+ const r = await exec.agentExecFile('replace_in_file', f, 'bar', 'baz', '');
228
+ assert.strictEqual(r._diffBefore, 'foo bar foo');
229
+ assert.strictEqual(r._diffAfter, 'foo baz foo');
230
+ });
231
+
232
+ test('a failed edit attaches NO payload (out-of-range line)', async () => {
233
+ const exec = mkExec();
234
+ const f = tmpFile('oor.txt', 'one\ntwo');
235
+ const r = await exec.agentExecFile('edit_file', f, 99, 'X');
236
+ assert.ok(r.error, 'the edit failed');
237
+ assert.strictEqual(r._diffBefore, undefined, 'no diff payload on a failed edit');
238
+ });
239
+
240
+ // End-to-end through buildExecutionDiff: the executor payload renders a real diff
241
+ // for an auto-approve-style call (no modal involved in this harness at all).
242
+ test('parity: an executor payload renders a diff with no modal in the loop', async () => {
243
+ const exec = mkExec();
244
+ const f = tmpFile('parity.txt', 'keep\nold\nkeep');
245
+ const r = await exec.agentExecFile('edit_file', f, 2, 'new');
246
+ const out = stripAnsi(buildExecutionDiff({
247
+ diff: { before: r._diffBefore, after: r._diffAfter, path: f },
248
+ maxLines: DEFAULT_CONFIG.diff_max_lines,
249
+ }));
250
+ assert.match(out, /new/, 'the new content shows in the rendered diff');
251
+ assert.match(out, /old/, 'the removed content shows in the rendered diff');
252
+ });
253
+
254
+ // ── 4. No replay: loaded history collapses to summaries, not diffs ───────────
255
+
256
+ test('loaded-history file results summarize to a one-liner (never a diff)', () => {
257
+ // displayLoadedMessages renders stored tool results through summarizeToolResult.
258
+ // A persisted write result is a short "Wrote …" line — there is no diff to replay.
259
+ const summary = summarizeToolResult('Wrote 42 bytes to /tmp/x.txt');
260
+ assert.strictEqual(summary, 'Wrote 42 bytes to /tmp/x.txt');
261
+ assert.doesNotMatch(summary, /more changed lines/);
262
+ });
263
+
264
+ // ── config default ───────────────────────────────────────────────────────────
265
+
266
+ test('diff_max_lines has a sane default', () => {
267
+ assert.strictEqual(DEFAULT_CONFIG.diff_max_lines, 50);
268
+ });
@@ -186,6 +186,63 @@ test('edit_file out-of-range line returns an error', async () => {
186
186
  assert.ok(r.error && /out of range/i.test(r.error));
187
187
  });
188
188
 
189
+ // Part 2: line-range edit_file (regex-free block replacement) ----------------
190
+
191
+ test('edit_file replaces a contiguous line range with multi-line content', async () => {
192
+ await ef('write', 'range.txt', 'l1\nl2\nl3\nl4\nl5');
193
+ const r = await ef('edit_file', 'range.txt', 2, 'A\nB\nC', 4);
194
+ assert.strictEqual(r.status, 'ok');
195
+ assert.strictEqual(r.line, 2);
196
+ assert.strictEqual(r.end_line, 4);
197
+ assert.strictEqual(read('range.txt'), 'l1\nA\nB\nC\nl5', 'lines 2-4 replaced wholesale');
198
+ });
199
+
200
+ test('edit_file line-range collapses several lines into one', async () => {
201
+ await ef('write', 'range2.txt', 'keep\nx\ny\nz\nkeep2');
202
+ const r = await ef('edit_file', 'range2.txt', 2, 'ONE', 4);
203
+ assert.strictEqual(r.status, 'ok');
204
+ assert.strictEqual(read('range2.txt'), 'keep\nONE\nkeep2');
205
+ });
206
+
207
+ test('edit_file line-range out of range returns an error (no mutation)', async () => {
208
+ await ef('write', 'range3.txt', 'a\nb\nc');
209
+ const r = await ef('edit_file', 'range3.txt', 2, 'X', 99);
210
+ assert.ok(r.error && /out of range/i.test(r.error));
211
+ assert.strictEqual(read('range3.txt'), 'a\nb\nc');
212
+ });
213
+
214
+ test('edit_file rejects an inverted range (end before start)', async () => {
215
+ await ef('write', 'range4.txt', 'a\nb\nc\nd');
216
+ const r = await ef('edit_file', 'range4.txt', 3, 'X', 2);
217
+ assert.ok(r.error && /out of range/i.test(r.error));
218
+ assert.strictEqual(read('range4.txt'), 'a\nb\nc\nd');
219
+ });
220
+
221
+ test('edit_file single-line (no end_line) is unchanged (paired no-regression)', async () => {
222
+ await ef('write', 'single.txt', 'l1\nl2\nl3');
223
+ const r = await ef('edit_file', 'single.txt', 2, 'REPLACED');
224
+ assert.strictEqual(r.status, 'ok');
225
+ assert.strictEqual(r.line, 2);
226
+ assert.strictEqual(r.end_line, undefined, 'no end_line reported for a single-line edit');
227
+ assert.strictEqual(read('single.txt'), 'l1\nREPLACED\nl3');
228
+ });
229
+
230
+ test('edit_file round-trip: read a file, then replace an exact line range by number', async () => {
231
+ // Mirrors the intended large-edit workflow: read_file (a numbered slice tells
232
+ // the agent the 1-based line numbers), then edit_file with line..end_line.
233
+ const src = ['def race(self):', ' speed = 0', ' # broken', ' pass', 'done()'].join('\n');
234
+ await ef('write', 'cligames.py', src);
235
+ const rd = await ef('read', 'cligames.py', null, null, true);
236
+ const lines = rd.content.split('\n');
237
+ // Locate the broken body (lines 2..4, 1-based) the way an agent would from numbers.
238
+ const start = lines.findIndex((l) => l.includes('speed = 0')) + 1;
239
+ const end = lines.findIndex((l) => l.includes('pass')) + 1;
240
+ assert.deepStrictEqual([start, end], [2, 4]);
241
+ const r = await ef('edit_file', 'cligames.py', start, ' speed = 10\n move()', end);
242
+ assert.strictEqual(r.status, 'ok');
243
+ assert.strictEqual(read('cligames.py'), 'def race(self):\n speed = 10\n move()\ndone()');
244
+ });
245
+
189
246
  test('search_in_file returns matching lines with 1-based numbers', async () => {
190
247
  await ef('write', 's.txt', 'alpha\nbeta\ngamma beta');
191
248
  const r = await ef('search_in_file', 's.txt', 'beta');
@@ -200,34 +257,214 @@ test('search_in_file refuses a protected secret path', async () => {
200
257
  assert.ok(r.error && /secrets|credentials/i.test(r.error));
201
258
  });
202
259
 
203
- test('replace_in_file with explicit "g" flag replaces all and reports the count', async () => {
260
+ test('replace_in_file with replace_all replaces all occurrences and reports the count', async () => {
261
+ // Literal is now the default; replacing more than one occurrence requires the
262
+ // explicit replace_all flag (positional arg 6) — without it the >1-match guard
263
+ // would refuse (see the ambiguity test below).
204
264
  await ef('write', 'repg.txt', 'a a a');
205
- const r = await ef('replace_in_file', 'repg.txt', 'a', 'b', 'g');
265
+ const r = await ef('replace_in_file', 'repg.txt', 'a', 'b', '', false, true);
206
266
  assert.strictEqual(r.status, 'ok');
207
267
  assert.strictEqual(r.count, 3);
208
268
  assert.strictEqual(read('repg.txt'), 'b b b');
209
269
  });
210
270
 
211
- test('replace_in_file without "g" flag replaces only the first match and reports count 1', async () => {
212
- // Fixed in Task 1.4c (was a count bug pinned in 1.4b): the replacement
213
- // semantics are unchanged without "g", only the first occurrence is replaced
214
- // — but the returned count now reflects the replacements ACTUALLY performed
215
- // (1) instead of the always-global match total (3).
271
+ test('replace_in_file REFUSES an ambiguous (>1) match when replace_all is not set, file unchanged', async () => {
272
+ // Was: silently replaced only the FIRST of three (wrong-span corruption risk).
273
+ // Now: the uniqueness guard refuses and names the count, file untouched.
216
274
  await ef('write', 'rep.txt', 'a a a');
217
275
  const r = await ef('replace_in_file', 'rep.txt', 'a', 'b', '');
218
- assert.strictEqual(r.status, 'ok');
219
- assert.strictEqual(read('rep.txt'), 'b a a', 'only the first occurrence is replaced (semantics unchanged)');
220
- assert.strictEqual(r.count, 1, 'count equals the actual number of replacements');
276
+ assert.ok(r.error && /found 3 matches/i.test(r.error), `expected ambiguity error, got ${JSON.stringify(r)}`);
277
+ assert.ok(/replace_all/i.test(r.error));
278
+ assert.strictEqual(read('rep.txt'), 'a a a', 'file must be unchanged on refusal');
221
279
  });
222
280
 
223
- test('replace_in_file reports count 0 when there is no match (no file change)', async () => {
281
+ test('replace_in_file REFUSES when there is no match (count 0 is an error, file unchanged)', async () => {
282
+ // Was: returned {status:'ok', count:0} — masking a no-op as success. Now errors.
224
283
  await ef('write', 'rep0.txt', 'xyz');
225
284
  const r = await ef('replace_in_file', 'rep0.txt', 'q', 'b', '');
226
- assert.strictEqual(r.status, 'ok');
227
- assert.strictEqual(r.count, 0);
285
+ assert.ok(r.error && /not found/i.test(r.error), `expected not-found error, got ${JSON.stringify(r)}`);
228
286
  assert.strictEqual(read('rep0.txt'), 'xyz');
229
287
  });
230
288
 
289
+ // ---------------------------------------------------------------------------
290
+ // replace_in_file: LITERAL-by-default + uniqueness guard (Claude Code Edit model)
291
+ // Literal is the DEFAULT for ALL searches — a block with ( ) { } . [ ] is matched
292
+ // byte-for-byte, never as a regex (so "Nothing to repeat" can't happen on real
293
+ // code). Regex is opt-in via regex:true and keeps the ReDoS guard. The match must
294
+ // be UNIQUE: 0 → error, >1 → error unless replace_all. These tests are PAIRED:
295
+ // the now-allowed literals and the still-blocked regex bombs share one mechanism.
296
+
297
+ test('replace_in_file: a ~5,000-char literal block on a 40 KB file is allowed and replaces correctly (the reported bug)', async () => {
298
+ // A plain block, NO regex metacharacters — only words, spaces, newlines.
299
+ const block = Array.from({ length: 250 }, (_, i) => `line number ${i} of the copied block`).join('\n');
300
+ assert.ok(block.length >= 5000, `block is ${block.length} chars`);
301
+ const filler = ('x'.repeat(79) + '\n').repeat(500); // ~40 KB of surrounding file
302
+ await ef('write', 'big-literal.txt', filler + block + '\n' + filler);
303
+ const r = await ef('replace_in_file', 'big-literal.txt', block, 'REPLACED_BLOCK', '');
304
+ assert.strictEqual(r.status, 'ok', `expected ok, got ${JSON.stringify(r)}`);
305
+ assert.strictEqual(r.count, 1);
306
+ assert.ok(read('big-literal.txt').includes('REPLACED_BLOCK'));
307
+ assert.ok(!read('big-literal.txt').includes('line number 0 of the copied block'));
308
+ });
309
+
310
+ test('replace_in_file: a ~1,500-char literal block is allowed (default literal, no flag needed)', async () => {
311
+ const block = Array.from({ length: 60 }, (_, i) => `const value_${i} = compute(${i});`).join('\n');
312
+ // contains ( ) ; — matched verbatim by default (no regex, no literal flag needed)
313
+ assert.ok(block.length >= 1500, `block is ${block.length} chars`);
314
+ await ef('write', 'mid-literal.txt', 'header\n' + block + '\nfooter');
315
+ const r = await ef('replace_in_file', 'mid-literal.txt', block, 'X', '');
316
+ assert.strictEqual(r.status, 'ok');
317
+ assert.strictEqual(r.count, 1);
318
+ assert.strictEqual(read('mid-literal.txt'), 'header\nX\nfooter');
319
+ });
320
+
321
+ test('replace_in_file: dangerous regexes stay BLOCKED with regex:true (ReDoS protection intact)', async () => {
322
+ await ef('write', 'redos.txt', 'aaaaaaaaaaaaaaaaaaaa,');
323
+ for (const bomb of ['(a+)+$', '(.*,)*']) {
324
+ const r = await ef('replace_in_file', 'redos.txt', bomb, 'x', 'g', true);
325
+ assert.ok(r.error && /backtracking/i.test(r.error), `${bomb} must be rejected, got ${JSON.stringify(r)}`);
326
+ }
327
+ });
328
+
329
+ test('replace_in_file: a ~2,000-char metacharacter-heavy regex stays BLOCKED with regex:true', async () => {
330
+ // Heavy with active metacharacters → regex path → length cap rejects it.
331
+ const heavy = 'a.*b+c?'.repeat(300); // ~2,100 chars, full of . * + ?
332
+ assert.ok(heavy.length >= 2000, `heavy is ${heavy.length} chars`);
333
+ await ef('write', 'heavy.txt', 'abc');
334
+ const r = await ef('replace_in_file', 'heavy.txt', heavy, 'x', 'g', true);
335
+ assert.ok(r.error && /exceeds 1000 chars/i.test(r.error), `expected length rejection, got ${JSON.stringify(r)}`);
336
+ });
337
+
338
+ test('replace_in_file: a metacharacter-heavy string is matched LITERALLY by default (no length bound, no ReDoS check)', async () => {
339
+ // Same string that is length-rejected as a regex is matched VERBATIM by default
340
+ // — literal cannot backtrack, so its length is irrelevant. This is the trap the
341
+ // old auto-detect created: it routed this to the regex path and rejected it.
342
+ const heavy = 'a.*b+c?'.repeat(300);
343
+ await ef('write', 'litheavy.txt', 'head\n' + heavy + '\ntail');
344
+ const r = await ef('replace_in_file', 'litheavy.txt', heavy, 'Z', '');
345
+ assert.strictEqual(r.status, 'ok', `default-literal must match verbatim: ${JSON.stringify(r).slice(0, 160)}`);
346
+ assert.strictEqual(r.count, 1);
347
+ assert.strictEqual(read('litheavy.txt'), 'head\nZ\ntail');
348
+ });
349
+
350
+ test('replace_in_file: a literal block containing regex-special chars (parens) is matched literally, not as a group', async () => {
351
+ // foo(x) as a REGEX would match "foox" capturing x — never the literal text.
352
+ // By default (literal) it must match the verbatim "foo(x)".
353
+ await ef('write', 'parens.txt', 'before foo(x) after');
354
+ const r = await ef('replace_in_file', 'parens.txt', 'foo(x)', 'bar(y)', '');
355
+ assert.strictEqual(r.status, 'ok');
356
+ assert.strictEqual(r.count, 1);
357
+ assert.strictEqual(read('parens.txt'), 'before bar(y) after');
358
+ });
359
+
360
+ test('replace_in_file: default-literal with brackets/quantifier chars replaces a copied code line verbatim', async () => {
361
+ const line = 'arr[i] = items.map(x => x * 2);';
362
+ await ef('write', 'code.txt', 'a\n' + line + '\nb');
363
+ const r = await ef('replace_in_file', 'code.txt', line, 'arr[i] = items;', '');
364
+ assert.strictEqual(r.status, 'ok');
365
+ assert.strictEqual(r.count, 1);
366
+ assert.strictEqual(read('code.txt'), 'a\narr[i] = items;\nb');
367
+ });
368
+
369
+ test('replace_in_file: regex mode works (back-references) only when regex:true is set', async () => {
370
+ // Single group, so it does NOT trip the nested-quantifier guard. $1 honored.
371
+ await ef('write', 'rx.txt', 'key=value');
372
+ const r = await ef('replace_in_file', 'rx.txt', '(value)', '[$1]', '', true);
373
+ assert.strictEqual(r.status, 'ok');
374
+ assert.strictEqual(read('rx.txt'), 'key=[value]');
375
+ });
376
+
377
+ // ---------------------------------------------------------------------------
378
+ // replace_in_file: focused new tests for the literal-default + uniqueness guard
379
+ // (Claude Code Edit model — change set for this task).
380
+
381
+ test('replace_in_file (a): a literal block with ( ) { } . [ ] is matched VERBATIM (no "Nothing to repeat")', async () => {
382
+ const block = 'if (a[i].fn({x: 1}) && (b||c)) { return *p; }';
383
+ await ef('write', 'meta.js', 'top\n' + block + '\nbottom');
384
+ const r = await ef('replace_in_file', 'meta.js', block, 'noop();', '');
385
+ assert.ok(!r.error, `must not error on metacharacters: ${JSON.stringify(r)}`);
386
+ assert.strictEqual(r.status, 'ok');
387
+ assert.strictEqual(r.count, 1);
388
+ assert.strictEqual(read('meta.js'), 'top\nnoop();\nbottom');
389
+ });
390
+
391
+ test('replace_in_file (b): search not found → ERROR, file unchanged', async () => {
392
+ await ef('write', 'nf.txt', 'hello world');
393
+ const r = await ef('replace_in_file', 'nf.txt', 'goodbye', 'x', '');
394
+ assert.ok(r.error && /not found/i.test(r.error));
395
+ assert.strictEqual(read('nf.txt'), 'hello world');
396
+ });
397
+
398
+ test('replace_in_file (c): 2+ matches without replace_all → ERROR naming the count, file unchanged', async () => {
399
+ await ef('write', 'amb.txt', 'foo\nbar\nfoo\nbaz\nfoo');
400
+ const r = await ef('replace_in_file', 'amb.txt', 'foo', 'qux', '');
401
+ assert.ok(r.error && /found 3 matches/i.test(r.error), `got ${JSON.stringify(r)}`);
402
+ assert.ok(/line/i.test(r.error), 'error should surface match line numbers for disambiguation');
403
+ assert.strictEqual(read('amb.txt'), 'foo\nbar\nfoo\nbaz\nfoo');
404
+ });
405
+
406
+ test('replace_in_file (d): unique match → replaced, honest count 1', async () => {
407
+ await ef('write', 'uniq.txt', 'alpha\nbeta\ngamma');
408
+ const r = await ef('replace_in_file', 'uniq.txt', 'beta', 'BETA', '');
409
+ assert.strictEqual(r.status, 'ok');
410
+ assert.strictEqual(r.count, 1);
411
+ assert.strictEqual(read('uniq.txt'), 'alpha\nBETA\ngamma');
412
+ });
413
+
414
+ test('replace_in_file (e): replace_all:true with N matches → all replaced, honest count N', async () => {
415
+ await ef('write', 'all.txt', 'x x x x');
416
+ const r = await ef('replace_in_file', 'all.txt', 'x', 'y', '', false, true);
417
+ assert.strictEqual(r.status, 'ok');
418
+ assert.strictEqual(r.count, 4);
419
+ assert.strictEqual(read('all.txt'), 'y y y y');
420
+ });
421
+
422
+ test('replace_in_file (f): regex:true with a real regex works and the ReDoS guard is still active', async () => {
423
+ await ef('write', 'rxf.txt', 'id=42; id=7;');
424
+ const ok = await ef('replace_in_file', 'rxf.txt', 'id=\\d+', 'id=0', '', true, true);
425
+ assert.strictEqual(ok.status, 'ok');
426
+ assert.strictEqual(ok.count, 2);
427
+ assert.strictEqual(read('rxf.txt'), 'id=0; id=0;');
428
+ // ReDoS guard still fires in regex mode
429
+ await ef('write', 'rxbomb.txt', 'aaaaaaaaaa!');
430
+ const bomb = await ef('replace_in_file', 'rxbomb.txt', '(a+)+$', 'x', 'g', true);
431
+ assert.ok(bomb.error && /backtracking/i.test(bomb.error));
432
+ });
433
+
434
+ test('replace_in_file (g): the original multi-line corruption repro now errors instead of silently corrupting', async () => {
435
+ // The reported "newBullet duplication": a block that appears more than once
436
+ // used to silently replace only the first span (wrong-span corruption). Now the
437
+ // uniqueness guard refuses; adding context makes it unique and it replaces.
438
+ const dup = ' const newBullet = makeBullet();\n list.push(newBullet);';
439
+ await ef('write', 'bullets.js', 'function a() {\n' + dup + '\n}\nfunction b() {\n' + dup + '\n}\n');
440
+ const ambiguous = await ef('replace_in_file', 'bullets.js', dup, ' // removed', '');
441
+ assert.ok(ambiguous.error && /found 2 matches/i.test(ambiguous.error), `should refuse, got ${JSON.stringify(ambiguous)}`);
442
+ assert.ok(read('bullets.js').split(dup).length - 1 === 2, 'file unchanged — both copies intact');
443
+ // Disambiguate with surrounding context → unique → replaces correctly.
444
+ const unique = await ef('replace_in_file', 'bullets.js', 'function b() {\n' + dup + '\n}', 'function b() {}', '');
445
+ assert.strictEqual(unique.status, 'ok');
446
+ assert.strictEqual(unique.count, 1);
447
+ assert.ok(read('bullets.js').includes('function a() {\n' + dup), 'function a copy preserved');
448
+ });
449
+
450
+ test('replace_in_file (h): post-replace verification warns when the search string still remains', async () => {
451
+ // Replacement CONTAINS the search string → after replacing, it still appears.
452
+ await ef('write', 'warn.txt', 'value');
453
+ const r = await ef('replace_in_file', 'warn.txt', 'value', '[value]', '');
454
+ assert.strictEqual(r.status, 'ok');
455
+ assert.strictEqual(r.count, 1);
456
+ assert.ok(r.warning && /still appears/i.test(r.warning), `expected warning, got ${JSON.stringify(r)}`);
457
+ assert.strictEqual(read('warn.txt'), '[value]');
458
+ });
459
+
460
+ test('search_in_file: a long literal pattern is no longer rejected for length', async () => {
461
+ const block = 'token '.repeat(400).trimEnd(); // 2,399 chars, no metacharacters
462
+ await ef('write', 'searchbig.txt', 'noise\n' + block + '\nnoise');
463
+ const r = await ef('search_in_file', 'searchbig.txt', block);
464
+ assert.ok(!r.error, `expected matches, got ${JSON.stringify(r).slice(0, 120)}`);
465
+ assert.deepStrictEqual(r.matches, [{ line: 2, content: block }]);
466
+ });
467
+
231
468
  test('search_files finds files by glob', async () => {
232
469
  await ef('write', 'find/x.ts', '1');
233
470
  await ef('write', 'find/y.js', '2');
@@ -60,8 +60,9 @@ const ATTR_TAG_CASES = [
60
60
  { name: 'move_file', tmpl: (q) => `<move_file src=${q}a${q} dst=${q}b${q}/>`, expected: [['move_file', 'a', 'b']] },
61
61
  { name: 'copy_file', tmpl: (q) => `<copy_file src=${q}a${q} dst=${q}b${q}/>`, expected: [['copy_file', 'a', 'b']] },
62
62
  { name: 'edit_file', tmpl: (q) => `<edit_file path=${q}a.js${q} line=${q}42${q}>x = 1</edit_file>`, expected: [['edit_file', 'a.js', 42, 'x = 1']] },
63
+ { name: 'edit_file range', tmpl: (q) => `<edit_file path=${q}a.js${q} line=${q}10${q} end_line=${q}12${q}>block</edit_file>`, expected: [['edit_file', 'a.js', 10, 'block', 12]] },
63
64
  { name: 'search_in_file', tmpl: (q) => `<search_in_file path=${q}a.js${q}>TODO</search_in_file>`, expected: [['search_in_file', 'a.js', 'TODO']] },
64
- { name: 'replace_in_file', tmpl: (q) => `<replace_in_file path=${q}a.js${q} search=${q}old${q} replace=${q}new${q}>g</replace_in_file>`, expected: [['replace_in_file', 'a.js', 'old', 'new', 'g']] },
65
+ { name: 'replace_in_file', tmpl: (q) => `<replace_in_file path=${q}a.js${q} search=${q}old${q} replace=${q}new${q}>g</replace_in_file>`, expected: [['replace_in_file', 'a.js', 'old', 'new', 'g', false, false]] },
65
66
  { name: 'upload', tmpl: (q) => `<upload path=${q}a.bin${q}>QUJD</upload>`, expected: [['upload', 'a.bin', 'QUJD']] },
66
67
  { name: 'http_get attr', tmpl: (q) => `<http_get url=${q}http://x/api${q}/>`, expected: [['http_get', 'http://x/api', {}]] },
67
68
  { name: 'ask_user', tmpl: (q) => `<ask_user question=${q}Which lang?${q}/>`, expected: [['ask_user', 'Which lang?']] },
@@ -138,6 +139,36 @@ test('fenced shell block: each non-comment line becomes a shell call', () => {
138
139
  assert.deepStrictEqual(extractToolCalls(fx.SHELL_FENCE), [['shell', 'echo hi'], ['shell', 'ls -la']]);
139
140
  });
140
141
 
142
+ // ---------------------------------------------------------------------------
143
+ // skipTextHeuristics (P1, native-rail safety). The bare-code-fence pass is the
144
+ // ONLY text heuristic that infers commands from untagged prose; on the native
145
+ // rail it must be skipped so an illustrative ```bash block in a final answer is
146
+ // never executed. EXPLICIT tool tags are unaffected — they run on both rails.
147
+ // ---------------------------------------------------------------------------
148
+
149
+ test('skipTextHeuristics: the bare ```bash fence is NOT extracted (native-rail guard)', () => {
150
+ // Same fixture the XML rail extracts above — with the flag, zero calls.
151
+ assert.deepStrictEqual(extractToolCalls(fx.SHELL_FENCE, { skipTextHeuristics: true }), []);
152
+ // A realistic illustrative block (the incident shape) yields nothing.
153
+ const prose = 'Here is an example you could run:\n```bash\nsu nobody\necho $TOKEN\n```\nThat would do it.';
154
+ assert.deepStrictEqual(extractToolCalls(prose, { skipTextHeuristics: true }), []);
155
+ });
156
+
157
+ test('skipTextHeuristics: EXPLICIT tool tags STILL extract (category A unaffected)', () => {
158
+ // <shell>/<exec> and every wrapper/registered tag are deliberate dispatch, so
159
+ // the flag leaves them intact — the native rail dispatches tools this way too.
160
+ assert.deepStrictEqual(extractToolCalls('<shell>echo HI</shell>', { skipTextHeuristics: true }), [['shell', 'echo HI']]);
161
+ assert.deepStrictEqual(extractToolCalls('<read_file>a.txt</read_file>', { skipTextHeuristics: true }), [['read', 'a.txt', null, null, false]]);
162
+ assert.deepStrictEqual(extractToolCalls(fx.MINIMAX_WRAPPER, { skipTextHeuristics: true }), [['write', 'a.json', '{"k":1}']]);
163
+ });
164
+
165
+ test('skipTextHeuristics OFF (XML rail): bare fence extraction is byte-identical to default', () => {
166
+ // Regression guard for the rail we keep: omitting the flag (XML rail) parses
167
+ // the fence exactly as before — same as the no-options characterization above.
168
+ assert.deepStrictEqual(extractToolCalls(fx.SHELL_FENCE, {}), [['shell', 'echo hi'], ['shell', 'ls -la']]);
169
+ assert.deepStrictEqual(extractToolCalls(fx.SHELL_FENCE, { skipTextHeuristics: false }), [['shell', 'echo hi'], ['shell', 'ls -la']]);
170
+ });
171
+
141
172
  // ---------------------------------------------------------------------------
142
173
  // Multiple calls, ordering, nesting, equivalence.
143
174
  // ---------------------------------------------------------------------------
@@ -265,8 +296,10 @@ const MAP_CASES = [
265
296
  ['copy_file', { src: 'a', dst: 'b' }, ['copy_file', 'a', 'b']],
266
297
  ['file_stat', { path: 'a' }, ['file_stat', 'a']],
267
298
  ['search_in_file', { path: 'a', pattern: 'p' }, ['search_in_file', 'a', 'p']],
268
- ['replace_in_file', { path: 'a', search: 'o', replace: 'n' }, ['replace_in_file', 'a', 'o', 'n', '']],
269
- ['replace_in_file', { path: 'a', search: 'o', replace: 'n', flags: 'g' }, ['replace_in_file', 'a', 'o', 'n', 'g']],
299
+ ['replace_in_file', { path: 'a', search: 'o', replace: 'n' }, ['replace_in_file', 'a', 'o', 'n', '', false, false]],
300
+ ['replace_in_file', { path: 'a', search: 'o', replace: 'n', flags: 'g' }, ['replace_in_file', 'a', 'o', 'n', 'g', false, false]],
301
+ ['replace_in_file', { path: 'a', search: 'o', replace: 'n', regex: true }, ['replace_in_file', 'a', 'o', 'n', '', true, false]],
302
+ ['replace_in_file', { path: 'a', search: 'o', replace: 'n', replace_all: true }, ['replace_in_file', 'a', 'o', 'n', '', false, true]],
270
303
  ['get_env', { name: 'X' }, ['get_env', 'X']],
271
304
  ['set_env', { name: 'X', value: 'v' }, ['set_env', 'X', 'v']],
272
305
  ['set_env', { name: 'X' }, ['set_env', 'X', '']],
@@ -295,6 +328,7 @@ test('mapInvokeToCall: more missing-required-param guards return null', () => {
295
328
 
296
329
  test('mapInvokeToCall: edit_file coerces line to int', () => {
297
330
  assert.deepStrictEqual(mapInvokeToCall('edit_file', { path: 'a', line: '7', content: 'x' }), ['edit_file', 'a', 7, 'x']);
331
+ assert.deepStrictEqual(mapInvokeToCall('edit_file', { path: 'a', line: '7', content: 'x', end_line: '9' }), ['edit_file', 'a', 7, 'x', 9]);
298
332
  });
299
333
 
300
334
  // ---------------------------------------------------------------------------