@semalt-ai/code 1.19.0 → 1.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/.claude/settings.local.json +2 -1
  2. package/ARCHITECTURE.md +6 -95
  3. package/CLAUDE.md +196 -1874
  4. package/README.md +1 -1
  5. package/docs/ARCHITECTURE.md +1321 -0
  6. package/docs/CONFIG.md +340 -0
  7. package/docs/HISTORY.md +245 -0
  8. package/index.js +1 -1
  9. package/lib/agent.js +145 -16
  10. package/lib/api.js +28 -3
  11. package/lib/commands/chat-session.js +188 -4
  12. package/lib/commands/chat-slash.js +16 -0
  13. package/lib/commands/chat-turn.js +319 -52
  14. package/lib/commands/chat.js +12 -8
  15. package/lib/config.js +27 -0
  16. package/lib/constants.js +30 -1
  17. package/lib/headless.js +36 -1
  18. package/lib/images.js +8 -2
  19. package/lib/permissions.js +23 -16
  20. package/lib/prompts.js +15 -3
  21. package/lib/tool_registry.js +357 -53
  22. package/lib/tool_specs.js +42 -8
  23. package/lib/tools.js +80 -19
  24. package/lib/ui/anim.js +86 -0
  25. package/lib/ui/ansi.js +17 -27
  26. package/lib/ui/chat-history.js +253 -71
  27. package/lib/ui/create-ui.js +67 -24
  28. package/lib/ui/diff.js +90 -25
  29. package/lib/ui/file-activity.js +229 -0
  30. package/lib/ui/format.js +173 -28
  31. package/lib/ui/input-field.js +5 -4
  32. package/lib/ui/md-stream.js +234 -0
  33. package/lib/ui/render-operation.js +113 -0
  34. package/lib/ui/select.js +1 -4
  35. package/lib/ui/status-bar.js +99 -57
  36. package/lib/ui/stream.js +20 -13
  37. package/lib/ui/theme.js +190 -45
  38. package/lib/ui/tool-operation.js +190 -0
  39. package/lib/ui/utils.js +9 -5
  40. package/lib/ui/web-activity.js +58 -6
  41. package/lib/ui/writer.js +159 -45
  42. package/lib/ui.js +1 -1
  43. package/package.json +1 -1
  44. package/test/anim-driver.test.js +153 -0
  45. package/test/ask-user-display.test.js +226 -0
  46. package/test/ask-user-gate.test.js +231 -0
  47. package/test/chat-history-nocolor.test.js +155 -0
  48. package/test/chat-relogin.test.js +207 -0
  49. package/test/defer-detail-band.test.js +403 -0
  50. package/test/detail-band-tab-flatten.test.js +242 -0
  51. package/test/exec-diff.test.js +268 -0
  52. package/test/executors.test.js +250 -13
  53. package/test/extract-tool-calls.test.js +37 -3
  54. package/test/file-activity.test.js +542 -0
  55. package/test/grep-path-target.test.js +227 -0
  56. package/test/harness/chat-harness.js +2 -1
  57. package/test/headless.test.js +146 -1
  58. package/test/input-field-ctrl-o.test.js +37 -0
  59. package/test/live-height-physical.test.js +281 -0
  60. package/test/max-iterations.test.js +9 -7
  61. package/test/md-stream.test.js +183 -0
  62. package/test/narration-ordering.test.js +309 -0
  63. package/test/native-dispatch.test.js +53 -0
  64. package/test/native-live-narration.test.js +254 -0
  65. package/test/output-heredoc-leak.test.js +195 -0
  66. package/test/output-preview.test.js +245 -0
  67. package/test/permission-flush.test.js +302 -0
  68. package/test/permissions.test.js +199 -0
  69. package/test/read-paginate.test.js +1 -1
  70. package/test/render-operation.test.js +317 -0
  71. package/test/replay-descriptor-xml.test.js +216 -0
  72. package/test/replay-descriptor.test.js +189 -0
  73. package/test/replay-web-aggregate.test.js +291 -0
  74. package/test/replay-web-persist.test.js +241 -0
  75. package/test/running-glyph-anim.test.js +111 -0
  76. package/test/status-bar-driver.test.js +93 -0
  77. package/test/status-bar-resync.test.js +188 -0
  78. package/test/stream-parser.test.js +24 -0
  79. package/test/theme-palette.test.js +166 -0
  80. package/test/truncate-visible.test.js +78 -0
  81. package/test/view-image.test.js +199 -0
  82. package/test/web-activity-ordering.test.js +12 -3
  83. package/path +0 -1
@@ -87,6 +87,30 @@ test('reasoning_content is collected separately from content', async () => {
87
87
  assert.strictEqual(res.reasoning, 'let me think');
88
88
  });
89
89
 
90
+ test('onReasoning fires once, before content, when reasoning_content is first seen', async () => {
91
+ // Live-narration safety signal (a): the UI eager-open gate needs to know the
92
+ // structured reasoning channel is in use BEFORE the first content token.
93
+ const order = [];
94
+ const srv = await startSseServer({ chunks: [
95
+ sse({ choices: [{ delta: { reasoning_content: 'thinking a' } }] }),
96
+ sse({ choices: [{ delta: { reasoning_content: ' thinking b' } }] }),
97
+ sse({ choices: [{ delta: { content: 'answer' } }] }),
98
+ DONE,
99
+ ] });
100
+ try {
101
+ const api = clientFor(srv.base);
102
+ await api.chatStream([{ role: 'user', content: 'hi' }], {
103
+ silent: true,
104
+ onReasoning: () => order.push('reasoning'),
105
+ onToken: (t) => order.push(`token:${t}`),
106
+ });
107
+ } finally {
108
+ await srv.close();
109
+ }
110
+ // Exactly one onReasoning, and it precedes the first content token.
111
+ assert.deepStrictEqual(order, ['reasoning', 'token:answer']);
112
+ });
113
+
90
114
  test('native tool_calls deltas accumulate by index into structured calls', async () => {
91
115
  const { res } = await run([
92
116
  sse({ choices: [{ delta: { tool_calls: [{ index: 0, id: 'call_1', type: 'function', function: { name: 'read_file', arguments: '' } }] } }] }),
@@ -0,0 +1,166 @@
1
+ 'use strict';
2
+
3
+ // Output Refactor — Phase 2.5: saturated palette + one theme table + NO_COLOR.
4
+ //
5
+ // These tests pin the INTENT of the phase (not just bytes): colour resolves
6
+ // through a single table (theme.js), the palette is saturated + differentiated,
7
+ // statuses are vivid, the gratuitous dim is gone, and NO_COLOR / non-TTY emit
8
+ // no ANSI. The byte-level characterization lives in render-operation.test.js.
9
+
10
+ const { test } = require('node:test');
11
+ const assert = require('node:assert');
12
+
13
+ // Colour is gated on `isTTY && !NO_COLOR`. Force colour ON for the palette
14
+ // assertions; the NO_COLOR test flips it back off explicitly.
15
+ process.stdout.isTTY = true;
16
+ delete process.env.NO_COLOR;
17
+
18
+ const theme = require('../lib/ui/theme');
19
+ const ansi = require('../lib/ui/ansi');
20
+ const { resolveLineColors, categoryForTag, colorEnabled, UI_THEME } = theme;
21
+ const { formatToolLine } = require('../lib/ui/format');
22
+ const { FullStatusBar } = require('../lib/ui/status-bar');
23
+
24
+ const fg = (n) => `\x1b[38;5;${n}m`;
25
+
26
+ // ---------------------------------------------------------------------------
27
+ // One table — colour is defined in theme.js; ansi.js only re-exports it.
28
+ // ---------------------------------------------------------------------------
29
+ test('one table: ansi.js re-exports the palette from theme.js (same object refs)', () => {
30
+ // Same references prove ansi.js no longer DEFINES a competing palette — it
31
+ // hands back exactly what theme.js owns.
32
+ assert.strictEqual(ansi.THEME, theme.THEME, 'THEME is the same object on both surfaces');
33
+ assert.strictEqual(ansi.FG_RED, theme.FG_RED);
34
+ assert.strictEqual(ansi.FG_DARK, theme.FG_DARK);
35
+ assert.strictEqual(ansi.FG_CODE_BG, theme.FG_CODE_BG);
36
+ // theme.js is the home of the resolver + category map (the chrome seam).
37
+ assert.strictEqual(typeof theme.resolveLineColors, 'function');
38
+ assert.strictEqual(typeof theme.categoryForTag, 'function');
39
+ });
40
+
41
+ // ---------------------------------------------------------------------------
42
+ // Saturation applied — categories distinct and vivid; git/mcp first-class.
43
+ // ---------------------------------------------------------------------------
44
+ test('saturation: category labels use the new saturated codes', () => {
45
+ assert.strictEqual(resolveLineColors('shell', 'ok').label, fg(214));
46
+ assert.strictEqual(resolveLineColors('file', 'ok').label, fg(77));
47
+ assert.strictEqual(resolveLineColors('net', 'ok').label, fg(39));
48
+ assert.strictEqual(resolveLineColors('web', 'ok').label, fg(44));
49
+ assert.strictEqual(resolveLineColors('git', 'ok').label, fg(170));
50
+ assert.strictEqual(resolveLineColors('mcp', 'ok').label, fg(141));
51
+ });
52
+
53
+ test('saturation: git, mcp and the tool fallback are three DISTINCT colours (gap closed)', () => {
54
+ const git = resolveLineColors('git', 'ok').label;
55
+ const mcp = resolveLineColors('mcp', 'ok').label;
56
+ const tool = resolveLineColors('tool', 'ok').label;
57
+ assert.notStrictEqual(git, mcp);
58
+ assert.notStrictEqual(mcp, tool);
59
+ assert.notStrictEqual(git, tool);
60
+ });
61
+
62
+ test('saturation: the operation text is painted in the category colour, not terminal default', () => {
63
+ const c = resolveLineColors('shell', 'ok');
64
+ assert.strictEqual(c.op, fg(214));
65
+ assert.notStrictEqual(c.op, UI_THEME.default);
66
+ });
67
+
68
+ test('category map: git_* and mcp__* resolve to their own categories', () => {
69
+ assert.strictEqual(categoryForTag('git_commit'), 'git');
70
+ assert.strictEqual(categoryForTag('git_status'), 'git');
71
+ assert.strictEqual(categoryForTag('mcp__server__lookup'), 'mcp');
72
+ assert.strictEqual(categoryForTag('read'), 'file');
73
+ assert.strictEqual(categoryForTag('exec'), 'shell');
74
+ assert.strictEqual(categoryForTag('spawn_agent'), 'tool');
75
+ });
76
+
77
+ // ---------------------------------------------------------------------------
78
+ // Status colours — saturated; the running glyph is never gray.
79
+ // ---------------------------------------------------------------------------
80
+ test('status: ok glyph 40, error glyph 203', () => {
81
+ assert.strictEqual(resolveLineColors('shell', 'ok').glyph, fg(40));
82
+ assert.strictEqual(resolveLineColors('shell', 'error').glyph, fg(203));
83
+ assert.strictEqual(resolveLineColors('shell', 'success').glyph, fg(40));
84
+ assert.strictEqual(resolveLineColors('shell', 'failure').glyph, fg(203));
85
+ });
86
+
87
+ test('status: the running/pending glyph is never gray (240) — category-tinted, or cyan for fallbacks', () => {
88
+ // A vivid category tints its own running glyph…
89
+ assert.strictEqual(resolveLineColors('file', 'pending').glyph, fg(77));
90
+ assert.strictEqual(resolveLineColors('shell', 'running').glyph, fg(214));
91
+ // …and the gray fallback categories use cyan 39 instead of their gray tint.
92
+ assert.strictEqual(resolveLineColors('tool', 'pending').glyph, fg(39));
93
+ assert.strictEqual(resolveLineColors('debug', 'running').glyph, fg(39));
94
+ // Never the old muted gray.
95
+ for (const cat of ['file', 'shell', 'tool', 'debug', 'net']) {
96
+ assert.notStrictEqual(resolveLineColors(cat, 'pending').glyph, fg(240));
97
+ }
98
+ });
99
+
100
+ // ---------------------------------------------------------------------------
101
+ // Dim removed — durations are subtle (244), not muted (240).
102
+ // ---------------------------------------------------------------------------
103
+ test('dim removed: duration/meta resolve to subtle 244 on success, red 203 on error', () => {
104
+ const ok = resolveLineColors('shell', 'ok');
105
+ assert.strictEqual(ok.dur, fg(244));
106
+ assert.strictEqual(ok.meta, fg(244));
107
+ assert.notStrictEqual(ok.dur, fg(240));
108
+ const err = resolveLineColors('shell', 'error');
109
+ assert.strictEqual(err.dur, fg(203));
110
+ assert.strictEqual(err.meta, fg(203));
111
+ });
112
+
113
+ test('dim removed: a pending tool line glyph is not the old muted gray', () => {
114
+ // Phase 3: the running (pending, non-blocking) glyph is an animated spinner
115
+ // frame (tool SPINNER_DEF), not the static dot. At durationMs 10 the frame is
116
+ // index 0 ('⣾'). The colour is still category-tinted (214), never gray.
117
+ const { SPINNER_DEFS } = require('../lib/ui/ansi');
118
+ const frame0 = SPINNER_DEFS.tool.frames[0];
119
+ const line = formatToolLine({ status: 'pending', tag: 'shell', arg: 'x', attrs: { command: 'x' }, durationMs: 10 });
120
+ assert.ok(!line.includes(fg(240)), 'no muted-240 anywhere in a pending line');
121
+ assert.ok(line.startsWith(` ${fg(214)}${frame0}`), 'pending glyph is category-tinted (214) spinner frame, not gray');
122
+ });
123
+
124
+ // ---------------------------------------------------------------------------
125
+ // NO_COLOR + non-TTY — the resolver emits no ANSI; lines render as plain text.
126
+ // ---------------------------------------------------------------------------
127
+ test('NO_COLOR: the resolver emits no ANSI and formatToolLine is plain text', () => {
128
+ process.env.NO_COLOR = '1';
129
+ try {
130
+ assert.strictEqual(colorEnabled(), false);
131
+ const c = resolveLineColors('shell', 'ok');
132
+ for (const k of ['glyph', 'label', 'op', 'dur', 'meta']) {
133
+ assert.strictEqual(c[k], '', `resolver.${k} is empty under NO_COLOR`);
134
+ }
135
+ const line = formatToolLine({ status: 'success', tag: 'shell', arg: 'npm install', attrs: { command: 'npm install' }, durationMs: 2300, meta: { exit_code: 0 } });
136
+ assert.ok(!line.includes('\x1b'), 'no ANSI escapes leak under NO_COLOR');
137
+ assert.strictEqual(line, ' ✓ shell · npm install · 2.3s · exit 0');
138
+ } finally {
139
+ delete process.env.NO_COLOR;
140
+ }
141
+ });
142
+
143
+ // ---------------------------------------------------------------------------
144
+ // Dim removed — the status-bar right fields are no longer wholesale-DIM.
145
+ // ---------------------------------------------------------------------------
146
+ test('status bar: the model field renders in accent, not a wholesale DIM wrap', () => {
147
+ process.stdout.isTTY = true;
148
+ const bar = new FullStatusBar({ cols: 200 }, () => {});
149
+ bar.setModel('claude-opus-4-8');
150
+ const line = bar.renderLine();
151
+ assert.ok(line.includes('claude-opus-4-8'), 'model name is present');
152
+ assert.ok(line.includes(`${UI_THEME.accent}claude-opus-4-8`), 'model rendered in accent');
153
+ assert.ok(!line.includes('\x1b[2mclaude'), 'model is not DIM-wrapped');
154
+ });
155
+
156
+ test('non-TTY: with stdout not a TTY, colour is off and lines are plain', () => {
157
+ const orig = process.stdout.isTTY;
158
+ process.stdout.isTTY = false;
159
+ try {
160
+ assert.strictEqual(colorEnabled(), false);
161
+ const line = formatToolLine({ status: 'success', tag: 'file', arg: 'x', attrs: { path: 'x' }, durationMs: 5 });
162
+ assert.ok(!line.includes('\x1b'), 'no ANSI escapes in non-TTY output');
163
+ } finally {
164
+ process.stdout.isTTY = orig;
165
+ }
166
+ });
@@ -0,0 +1,78 @@
1
+ 'use strict';
2
+
3
+ // Direct unit tests for truncateVisible's trailing-reset decision and width
4
+ // math. The trailing `\x1b[0m` must be CONTENT-conditional: appended only when
5
+ // the (possibly truncated) output actually contains an escape, so escape-free
6
+ // output stays escape-free (no NO_COLOR leak) while a cut-open SGR span is
7
+ // still defensively closed (no color bleed).
8
+
9
+ const { test } = require('node:test');
10
+ const assert = require('node:assert');
11
+
12
+ const { truncateVisible, termWidth } = require('../lib/ui/utils');
13
+
14
+ const RST = '\x1b[0m';
15
+ const RED = '\x1b[31m';
16
+
17
+ test('escape-free string within budget has no trailing reset', () => {
18
+ const out = truncateVisible('hello world', 80);
19
+ assert.strictEqual(out, 'hello world');
20
+ assert.strictEqual(out.indexOf('\x1b'), -1);
21
+ });
22
+
23
+ test('escape-free string truncated mid-string has no trailing reset', () => {
24
+ const out = truncateVisible('hello world', 5);
25
+ assert.strictEqual(out, 'hello');
26
+ assert.strictEqual(out.indexOf('\x1b'), -1);
27
+ });
28
+
29
+ test('SGR cut mid-span ends with a reset (bleed-safe)', () => {
30
+ // Opening SGR is copied through (0 width); the next over-budget glyph breaks
31
+ // the loop BEFORE that span's own reset → unclosed color span → RST required.
32
+ const out = truncateVisible(RED + 'colored text', 4);
33
+ assert.ok(out.indexOf('\x1b') !== -1, 'opener should survive');
34
+ assert.ok(out.endsWith(RST), 'cut-open span must be closed with a reset');
35
+ assert.strictEqual(out, RED + 'colo' + RST);
36
+ });
37
+
38
+ test('fully-paired SGR within budget round-trips, reset present from content', () => {
39
+ // opener + content + closer all fit. Because an escape is present we append a
40
+ // trailing RST (defensive — idempotent on already-reset content).
41
+ const input = RED + 'hi' + RST;
42
+ const out = truncateVisible(input, 80);
43
+ assert.ok(out.startsWith(RED + 'hi' + RST), 'full span preserved');
44
+ assert.ok(out.endsWith(RST));
45
+ assert.strictEqual(out, input + RST);
46
+ });
47
+
48
+ test('max === 0 returns empty string (no bare reset)', () => {
49
+ assert.strictEqual(truncateVisible('anything', 0), '');
50
+ assert.strictEqual(truncateVisible(RED + 'x', 0), '');
51
+ });
52
+
53
+ test('empty / falsy input returns empty string', () => {
54
+ assert.strictEqual(truncateVisible('', 10), '');
55
+ assert.strictEqual(truncateVisible(null, 10), '');
56
+ assert.strictEqual(truncateVisible(undefined, 10), '');
57
+ });
58
+
59
+ test('CJK glyphs count as width 2 in the truncation column math', () => {
60
+ // Three ideographs (U+65E5 U+672C U+8A9E), each 2 cols. Budget 4 fits exactly
61
+ // two; output is escape-free → no trailing reset.
62
+ const cjk = '日本語';
63
+ const out = truncateVisible(cjk, 4);
64
+ assert.strictEqual(out, '日本');
65
+ assert.strictEqual(out.indexOf('\x1b'), -1);
66
+ assert.strictEqual(termWidth('日本'), 4);
67
+ });
68
+
69
+ test('combining marks count as width 0', () => {
70
+ // 'e' + combining acute (U+0301): 1 visible column total. Budget 1 keeps both
71
+ // the base glyph and the zero-width mark. Built from explicit codepoints so
72
+ // the test does not depend on the source file's Unicode normalization.
73
+ const input = 'é';
74
+ assert.strictEqual(termWidth(input), 1);
75
+ const out = truncateVisible(input, 1);
76
+ assert.strictEqual(out, input);
77
+ assert.strictEqual(out.indexOf('\x1b'), -1);
78
+ });
@@ -0,0 +1,199 @@
1
+ 'use strict';
2
+
3
+ // Coverage for the model-callable view_image tool: it stages a LOCAL image into
4
+ // the model's vision context via the SAME readImage→images[]→buildProviderMessages
5
+ // path the /image slash command uses — no parallel encoder. Asserts:
6
+ // (a) a valid PNG staged through the real agent loop reaches buildProviderMessages
7
+ // (b) both transport rails converge on the same ['view_image', path] tuple
8
+ // (c) unsupported / missing / oversized inputs return a clean text error (no crash)
9
+ // (d) an out-of-sandbox path is refused like any other file read
10
+ // (e) minimax now resolves vision-capable (true, not null)
11
+ // (f) view_image needs NO permission gate, while an effectful tool still does
12
+
13
+ const { test, before, after } = require('node:test');
14
+ const assert = require('node:assert');
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+
18
+ const ui = require('../lib/ui');
19
+ const { createApiClient } = require('../lib/api');
20
+ const {
21
+ createToolExecutor, extractToolCalls, mapInvokeToCall,
22
+ } = require('../lib/tools');
23
+ const { createPermissionManager } = require('../lib/permissions');
24
+ const { createAgentRunner } = require('../lib/agent');
25
+ const { buildProviderMessages, resolveVisionCapability } = require('../lib/images');
26
+ const { startMockLLM } = require('./harness/mock-llm');
27
+
28
+ // Minimal valid PNG (magic bytes + ≥12 bytes so detectMediaType locks on).
29
+ const PNG = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0, 0, 0, 13]);
30
+
31
+ let prevKey;
32
+ before(() => { prevKey = process.env.SEMALT_API_KEY; process.env.SEMALT_API_KEY = 'test-key'; });
33
+ after(() => {
34
+ if (prevKey === undefined) delete process.env.SEMALT_API_KEY;
35
+ else process.env.SEMALT_API_KEY = prevKey;
36
+ });
37
+
38
+ // view_image reads through the real isPathSafe (CWD-confined), so test images must
39
+ // live inside process.cwd(). Create a unique throwaway file there and clean it up.
40
+ async function withCwdFile(name, buf, fn) {
41
+ const p = path.join(process.cwd(), `._vimg_test_${process.pid}_${name}`);
42
+ fs.writeFileSync(p, buf);
43
+ try { return await fn(p); } finally { try { fs.unlinkSync(p); } catch {} }
44
+ }
45
+
46
+ function buildRunner(base, extraConfig = {}) {
47
+ const config = {
48
+ api_base: base, api_key: 'test-key', default_model: 'test-model',
49
+ temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
50
+ image_max_bytes: 5 * 1024 * 1024,
51
+ sandbox: { mode: 'off' },
52
+ ...extraConfig,
53
+ };
54
+ const getConfig = () => config;
55
+ const saveConfig = (c) => Object.assign(config, c);
56
+ const api = createApiClient({ getConfig, saveConfig, ui });
57
+ const pm = createPermissionManager(ui, { skipPermissions: true });
58
+ pm.setUICallbacks({ onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {} });
59
+ const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, getConfig);
60
+ const runner = createAgentRunner({
61
+ chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
62
+ describePermission, permissionManager: pm, ui, getConfig,
63
+ });
64
+ return { runner, agentExecFile, describePermission };
65
+ }
66
+
67
+ const noopCb = {
68
+ onToken: () => {}, onToolStart: () => {}, onToolEnd: () => {},
69
+ onError: () => {}, onRetry: () => {}, onAssistantMessage: () => {},
70
+ };
71
+
72
+ // ---------------------------------------------------------------------------
73
+ // (a) Valid PNG staged through the real loop reaches buildProviderMessages.
74
+ // ---------------------------------------------------------------------------
75
+ test('view_image stages a valid PNG into vision context via the /image wire path', async () => {
76
+ const mock = await startMockLLM();
77
+ await withCwdFile('a.png', PNG, async (imgPath) => {
78
+ mock.replyWith(`<view_image>${imgPath}</view_image>`);
79
+ mock.replyWith('It is a red square.');
80
+ try {
81
+ const { runner } = buildRunner(mock.base);
82
+ const messages = [{ role: 'user', content: 'what is in the image?' }];
83
+ await runner.runAgentLoop(messages, 'test-model', 10, null, { callbacks: noopCb });
84
+
85
+ // The encoded image is attached to a message's images[] (same field /image sets).
86
+ const imgMsg = messages.find((m) => Array.isArray(m.images) && m.images.length);
87
+ assert.ok(imgMsg, 'a message carries the staged image');
88
+ assert.strictEqual(imgMsg.images[0].media_type, 'image/png');
89
+ assert.ok(typeof imgMsg.images[0].data === 'string' && imgMsg.images[0].data.length > 0, 'base64 bytes present');
90
+
91
+ // The model-facing text confirms attach without claiming the user can see it.
92
+ const toolMsg = messages.find((m) => typeof m.content === 'string' && /attached to your vision context/.test(m.content));
93
+ assert.ok(toolMsg, 'tool result text confirms the vision attach');
94
+ assert.match(toolMsg.content, /NOT displayed to the user/);
95
+
96
+ // buildProviderMessages (the api.js wire transform) turns it into an OpenAI image_url part.
97
+ const wire = buildProviderMessages(messages, 'openai');
98
+ const wireImg = wire.find((m) => Array.isArray(m.content) && m.content.some((p) => p.type === 'image_url'));
99
+ assert.ok(wireImg, 'buildProviderMessages produced an image_url content part');
100
+ const part = wireImg.content.find((p) => p.type === 'image_url');
101
+ assert.match(part.image_url.url, /^data:image\/png;base64,/);
102
+ } finally {
103
+ await mock.close();
104
+ }
105
+ });
106
+ });
107
+
108
+ // ---------------------------------------------------------------------------
109
+ // (b) Both rails converge on the same tuple.
110
+ // ---------------------------------------------------------------------------
111
+ test('view_image: native params and XML tags converge on ["view_image", path]', () => {
112
+ // Native function-calling rail.
113
+ assert.deepStrictEqual(mapInvokeToCall('view_image', { path: '/a/b.png' }), ['view_image', '/a/b.png']);
114
+ // XML inline form.
115
+ assert.deepStrictEqual(extractToolCalls('<view_image>/a/b.png</view_image>'), [['view_image', '/a/b.png']]);
116
+ // XML attribute form (self-closing and paired).
117
+ assert.deepStrictEqual(extractToolCalls('<view_image path="/a/b.png"/>'), [['view_image', '/a/b.png']]);
118
+ assert.deepStrictEqual(extractToolCalls('<view_image path="/a/b.png"></view_image>'), [['view_image', '/a/b.png']]);
119
+ // Missing path → no call (native), like every other tool.
120
+ assert.strictEqual(mapInvokeToCall('view_image', {}), null);
121
+ });
122
+
123
+ // ---------------------------------------------------------------------------
124
+ // (c) Unsupported / missing / oversized → clean text error, loop not crashed.
125
+ // ---------------------------------------------------------------------------
126
+ test('view_image: unsupported/missing/oversized return a clean error, never crash', async () => {
127
+ const { agentExecFile } = buildRunner('http://127.0.0.1:1'); // base unused here
128
+
129
+ // Missing file.
130
+ const missing = await agentExecFile('view_image', path.join(process.cwd(), 'definitely-not-here.png'));
131
+ assert.ok(missing.error && /not found|unreadable/i.test(missing.error), 'missing file → error');
132
+ assert.ok(!missing.image, 'no image staged on error');
133
+
134
+ // Unsupported format (a .txt with no image magic bytes, inside CWD).
135
+ await withCwdFile('notimg.txt', Buffer.from('hello, not an image'), async (p) => {
136
+ const bad = await agentExecFile('view_image', p);
137
+ assert.ok(bad.error && /Unsupported image format/i.test(bad.error), 'non-image → unsupported error');
138
+ });
139
+
140
+ // Oversized: a valid PNG under a deliberately tiny cap.
141
+ await withCwdFile('big.png', PNG, async (p) => {
142
+ const { agentExecFile: execTiny } = buildRunner('http://127.0.0.1:1', { image_max_bytes: 4 });
143
+ const big = await execTiny('view_image', p);
144
+ assert.ok(big.error && /too large/i.test(big.error), 'oversized → too-large error');
145
+ });
146
+ });
147
+
148
+ test('view_image: a missing-file call inside the loop ends cleanly without crashing', async () => {
149
+ const mock = await startMockLLM();
150
+ mock.replyWith(`<view_image>${path.join(process.cwd(), 'nope.png')}</view_image>`);
151
+ mock.replyWith('Could not load the image; stopping.');
152
+ try {
153
+ const { runner } = buildRunner(mock.base);
154
+ const messages = [{ role: 'user', content: 'view it' }];
155
+ const out = await runner.runAgentLoop(messages, 'test-model', 10, null, { callbacks: noopCb });
156
+ assert.ok(out && out.messages, 'loop returned normally (no crash)');
157
+ const toolMsg = messages.find((m) => typeof m.content === 'string' && /Tool execution results/.test(m.content));
158
+ assert.match(toolMsg.content, /Error —/);
159
+ assert.ok(!messages.some((m) => Array.isArray(m.images) && m.images.length), 'no image staged when the read failed');
160
+ assert.ok(messages.some((m) => m.role === 'assistant' && /stopping/i.test(m.content)), 'final answer recorded');
161
+ } finally {
162
+ await mock.close();
163
+ }
164
+ });
165
+
166
+ // ---------------------------------------------------------------------------
167
+ // (d) Path safety: out-of-sandbox path refused like any file read.
168
+ // ---------------------------------------------------------------------------
169
+ test('view_image: an out-of-sandbox path is refused', async () => {
170
+ const { agentExecFile } = buildRunner('http://127.0.0.1:1');
171
+ const res = await agentExecFile('view_image', '/etc/hostname');
172
+ assert.ok(res.error && /outside allowed area/i.test(res.error), 'path outside CWD refused');
173
+ assert.ok(!res.image, 'no image staged for a refused path');
174
+ });
175
+
176
+ // ---------------------------------------------------------------------------
177
+ // (e) minimax now resolves vision-capable; unconfirmed qwen coder stays null.
178
+ // ---------------------------------------------------------------------------
179
+ test('resolveVisionCapability: minimax is now true; plain qwen coder stays null', () => {
180
+ assert.strictEqual(resolveVisionCapability({}, 'minimax-m3'), true);
181
+ assert.strictEqual(resolveVisionCapability({}, 'MiniMax-M3'), true);
182
+ // Unconfirmed model families must NOT be silently marked vision-capable.
183
+ assert.strictEqual(resolveVisionCapability({}, 'qwen2.5-coder-32b'), null);
184
+ // The pre-existing VL signal is unaffected.
185
+ assert.strictEqual(resolveVisionCapability({}, 'qwen2-vl-7b'), true);
186
+ });
187
+
188
+ // ---------------------------------------------------------------------------
189
+ // (f) No permission gate for view_image; an effectful tool still gates.
190
+ // ---------------------------------------------------------------------------
191
+ test('view_image is read-only (no permission descriptor); an effectful tool still gates', async () => {
192
+ const { describePermission } = buildRunner('http://127.0.0.1:1');
193
+ const viewDesc = await describePermission(['view_image', '/x.png']);
194
+ assert.strictEqual(viewDesc, null, 'view_image resolves to no permission gate (read-only)');
195
+
196
+ // Isolation: an effectful network tool still produces a gate descriptor.
197
+ const dlDesc = await describePermission(['download', 'https://example.com/a.png']);
198
+ assert.ok(dlDesc && dlDesc.actionType, 'download still requires a permission descriptor');
199
+ });
@@ -26,8 +26,10 @@ const { createTurnHandler } = require('../lib/commands/chat-turn');
26
26
  // A fake writer + chatHistory that push into ONE shared ordered log. The web
27
27
  // summary commits via writerModule.endActivity (from webTracker.flush); the
28
28
  // answer commits via chatHistory.finalizeLastMessage. A non-web tool line also
29
- // commits via endActivity — distinguished by content (formatToolLine is mocked
30
- // to a recognizable "TOOL:<tag>" string).
29
+ // commits via endActivity — distinguished by content. As of Phase 1 (Output
30
+ // Refactor) the core tool line renders via the real descriptor→renderer (no
31
+ // longer the injected formatToolLine seam), so web vs tool lines are told apart
32
+ // by the web summary's wording, not a synthetic marker.
31
33
  function harness() {
32
34
  const events = [];
33
35
  const writerModule = {
@@ -47,6 +49,10 @@ function harness() {
47
49
  addMessage() {},
48
50
  streamToken() {},
49
51
  clearStreamingContent() {},
52
+ // Phase 7b boundary calls (chat-turn onToolStart / turn-end finally). No
53
+ // output-preview deferral is driven in these web-ordering scenarios, so a
54
+ // no-op keeps the harness focused on the web-summary ordering it tests.
55
+ deferToolOutput() {}, commitDeferredDetail() {},
50
56
  // An empty finalize (the suppressed intermediate iteration) commits no
51
57
  // visible answer bubble — only record the non-empty terminal answer, which
52
58
  // is what must land below the web summary.
@@ -187,7 +193,10 @@ test('non-web tool after web ops: summary flushed before the non-web tool line',
187
193
  const summaries = h.events.filter((e) => e.kind === 'web-summary');
188
194
  assert.strictEqual(summaries.length, 1, 'one web summary');
189
195
  const iSummary = indexOfKind(h.events, 'web-summary');
190
- const iToolLine = h.events.findIndex((e) => e.kind === 'tool-line' && /read_file/.test(e.line));
196
+ // Phase 1 (Output Refactor): the core tool line now renders via the real
197
+ // descriptor→renderer (read_file → "read /x"), not the injected formatToolLine
198
+ // marker — match the rendered operation rather than the tag name.
199
+ const iToolLine = h.events.findIndex((e) => e.kind === 'tool-line' && /read \/x/.test(e.line));
191
200
  const iAnswer = indexOfKind(h.events, 'answer');
192
201
  assert.ok(iSummary < iToolLine, 'web summary precedes the non-web tool line (flushed by onToolStart)');
193
202
  assert.ok(iToolLine < iAnswer, 'and both precede the answer');
package/path DELETED
@@ -1 +0,0 @@
1
- contenttext