@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/ARCHITECTURE.md +6 -95
  4. package/CLAUDE.md +196 -316
  5. package/README.md +148 -4
  6. package/docs/ARCHITECTURE.md +1321 -0
  7. package/docs/CONFIG.md +340 -0
  8. package/docs/HISTORY.md +245 -0
  9. package/examples/embed.js +74 -0
  10. package/index.js +251 -10
  11. package/lib/agent.js +856 -120
  12. package/lib/api.js +239 -50
  13. package/lib/args.js +74 -2
  14. package/lib/audit.js +23 -1
  15. package/lib/background.js +584 -0
  16. package/lib/checkpoints.js +757 -0
  17. package/lib/commands/auth.js +94 -0
  18. package/lib/commands/chat-session.js +489 -0
  19. package/lib/commands/chat-slash.js +415 -0
  20. package/lib/commands/chat-turn.js +669 -0
  21. package/lib/commands/chat.js +407 -0
  22. package/lib/commands/custom.js +157 -0
  23. package/lib/commands/history-utils.js +66 -0
  24. package/lib/commands/index.js +268 -0
  25. package/lib/commands/mcp.js +113 -0
  26. package/lib/commands/oneshot.js +193 -0
  27. package/lib/commands/registry.js +269 -0
  28. package/lib/commands/tasks.js +89 -0
  29. package/lib/compact.js +87 -0
  30. package/lib/config.js +360 -11
  31. package/lib/constants.js +401 -3
  32. package/lib/deny.js +199 -0
  33. package/lib/doctor.js +160 -0
  34. package/lib/headless.js +202 -0
  35. package/lib/hooks.js +286 -0
  36. package/lib/images.js +270 -0
  37. package/lib/internals.js +49 -0
  38. package/lib/mcp/boundary.js +131 -0
  39. package/lib/mcp/client.js +270 -0
  40. package/lib/mcp/oauth.js +134 -0
  41. package/lib/memory.js +209 -0
  42. package/lib/metrics.js +37 -2
  43. package/lib/payload.js +54 -0
  44. package/lib/permission-rules.js +401 -0
  45. package/lib/permissions.js +123 -26
  46. package/lib/pricing.js +67 -0
  47. package/lib/proc.js +62 -0
  48. package/lib/prompts.js +99 -8
  49. package/lib/sandbox.js +568 -0
  50. package/lib/sdk.js +328 -0
  51. package/lib/secrets.js +211 -0
  52. package/lib/skills.js +223 -0
  53. package/lib/subagents.js +516 -0
  54. package/lib/tool_registry.js +2862 -0
  55. package/lib/tool_specs.js +263 -9
  56. package/lib/tools.js +352 -1039
  57. package/lib/ui/anim.js +86 -0
  58. package/lib/ui/ansi.js +17 -27
  59. package/lib/ui/chat-history.js +253 -71
  60. package/lib/ui/create-ui.js +67 -24
  61. package/lib/ui/diff.js +90 -25
  62. package/lib/ui/file-activity.js +236 -0
  63. package/lib/ui/format.js +195 -29
  64. package/lib/ui/input-field.js +21 -11
  65. package/lib/ui/md-stream.js +234 -0
  66. package/lib/ui/render-operation.js +113 -0
  67. package/lib/ui/select.js +1 -4
  68. package/lib/ui/status-bar.js +146 -36
  69. package/lib/ui/stream.js +20 -13
  70. package/lib/ui/theme.js +190 -44
  71. package/lib/ui/tool-operation.js +190 -0
  72. package/lib/ui/utils.js +9 -5
  73. package/lib/ui/web-activity.js +270 -0
  74. package/lib/ui/writer.js +159 -45
  75. package/lib/ui.js +1 -1
  76. package/lib/verify.js +229 -0
  77. package/lib/web-extract.js +213 -0
  78. package/lib/web-summarize.js +68 -0
  79. package/package.json +19 -4
  80. package/scripts/lint.js +57 -0
  81. package/test/agent-loop.test.js +389 -0
  82. package/test/anim-driver.test.js +153 -0
  83. package/test/ask-user-display.test.js +226 -0
  84. package/test/ask-user-gate.test.js +231 -0
  85. package/test/background.test.js +414 -0
  86. package/test/chat-history-nocolor.test.js +155 -0
  87. package/test/chat-relogin.test.js +207 -0
  88. package/test/chat.test.js +114 -0
  89. package/test/checkpoints-agent.test.js +181 -0
  90. package/test/checkpoints.test.js +650 -0
  91. package/test/command-registry.test.js +160 -0
  92. package/test/compact.test.js +116 -0
  93. package/test/completion-lazy.test.js +52 -0
  94. package/test/config-merge.test.js +324 -0
  95. package/test/config-quarantine.test.js +128 -0
  96. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  97. package/test/config-write-guard-skip.test.js +46 -0
  98. package/test/config-write-guard.test.js +153 -0
  99. package/test/context-split.test.js +215 -0
  100. package/test/cost-doctor.test.js +142 -0
  101. package/test/custom-commands-chat.test.js +106 -0
  102. package/test/custom-commands.test.js +230 -0
  103. package/test/defer-detail-band.test.js +403 -0
  104. package/test/deny-windows.test.js +120 -0
  105. package/test/deny.test.js +83 -0
  106. package/test/detail-band-tab-flatten.test.js +242 -0
  107. package/test/download-allow-anywhere.test.js +66 -0
  108. package/test/download-confine.test.js +153 -0
  109. package/test/exec-diff.test.js +268 -0
  110. package/test/executors.test.js +599 -0
  111. package/test/extract-tool-calls.test.js +349 -0
  112. package/test/fetch-url-validation.test.js +219 -0
  113. package/test/file-activity.test.js +522 -0
  114. package/test/fixtures/tool-calls.js +57 -0
  115. package/test/fixtures/web-page.js +91 -0
  116. package/test/git-tools.test.js +384 -0
  117. package/test/grep-glob-serialize.test.js +242 -0
  118. package/test/grep-glob.test.js +268 -0
  119. package/test/grep-path-target.test.js +227 -0
  120. package/test/harness/README.md +57 -0
  121. package/test/harness/chat-harness.js +143 -0
  122. package/test/harness/memwarn-headless-child.js +65 -0
  123. package/test/harness/mock-llm.js +120 -0
  124. package/test/harness/mock-mcp-server.js +142 -0
  125. package/test/harness/sse-server.js +69 -0
  126. package/test/headless.test.js +348 -0
  127. package/test/history-utils.test.js +88 -0
  128. package/test/hooks-agent.test.js +238 -0
  129. package/test/hooks-verify-sandbox.test.js +232 -0
  130. package/test/hooks.test.js +216 -0
  131. package/test/http-get-user-agent.test.js +142 -0
  132. package/test/images-api.test.js +208 -0
  133. package/test/images.test.js +238 -0
  134. package/test/input-field-ctrl-o.test.js +37 -0
  135. package/test/live-height-physical.test.js +281 -0
  136. package/test/max-iterations.test.js +218 -0
  137. package/test/mcp-boundary.test.js +57 -0
  138. package/test/mcp-client.test.js +267 -0
  139. package/test/mcp-oauth.test.js +86 -0
  140. package/test/md-stream.test.js +183 -0
  141. package/test/memory-truncation-warning.test.js +222 -0
  142. package/test/memory.test.js +198 -0
  143. package/test/native-dispatch.test.js +409 -0
  144. package/test/native-live-narration.test.js +254 -0
  145. package/test/output-chokepoint.test.js +188 -0
  146. package/test/output-heredoc-leak.test.js +195 -0
  147. package/test/output-preview.test.js +245 -0
  148. package/test/path-guards.test.js +134 -0
  149. package/test/payload.test.js +99 -0
  150. package/test/permission-rules-agent.test.js +210 -0
  151. package/test/permission-rules.test.js +297 -0
  152. package/test/permissions.test.js +362 -0
  153. package/test/plan-mode.test.js +167 -0
  154. package/test/read-paginate.test.js +275 -0
  155. package/test/readonly-tools.test.js +177 -0
  156. package/test/render-operation.test.js +317 -0
  157. package/test/replay-descriptor-xml.test.js +216 -0
  158. package/test/replay-descriptor.test.js +189 -0
  159. package/test/replay-web-aggregate.test.js +291 -0
  160. package/test/replay-web-persist.test.js +241 -0
  161. package/test/result-cap.test.js +233 -0
  162. package/test/running-glyph-anim.test.js +111 -0
  163. package/test/sandbox-agent.test.js +147 -0
  164. package/test/sandbox-integration.test.js +216 -0
  165. package/test/sandbox.test.js +408 -0
  166. package/test/sdk.test.js +234 -0
  167. package/test/shell-output-cap.test.js +181 -0
  168. package/test/skills-chat.test.js +110 -0
  169. package/test/skills.test.js +295 -0
  170. package/test/smoke.test.js +68 -0
  171. package/test/status-bar-driver.test.js +93 -0
  172. package/test/status-bar-pause.test.js +164 -0
  173. package/test/status-bar-resync.test.js +188 -0
  174. package/test/stream-parser.test.js +171 -0
  175. package/test/subagents-agent.test.js +178 -0
  176. package/test/subagents.test.js +222 -0
  177. package/test/theme-palette.test.js +166 -0
  178. package/test/tool-registry.test.js +85 -0
  179. package/test/trim-budget.test.js +101 -0
  180. package/test/truncate-visible.test.js +78 -0
  181. package/test/verify-agent.test.js +317 -0
  182. package/test/verify.test.js +141 -0
  183. package/test/view-image.test.js +199 -0
  184. package/test/web-activity-ordering.test.js +203 -0
  185. package/test/web-activity.test.js +207 -0
  186. package/test/web-data-extraction-guidance.test.js +71 -0
  187. package/test/web-extract.test.js +185 -0
  188. package/test/web-fetch-agent.test.js +291 -0
  189. package/test/web-fetch-mode.test.js +193 -0
  190. package/test/web-search.test.js +380 -0
  191. package/lib/commands.js +0 -1438
  192. package/path +0 -1
@@ -0,0 +1,199 @@
1
+ 'use strict';
2
+
3
+ // Coverage for the model-callable view_image tool: it stages a LOCAL image into
4
+ // the model's vision context via the SAME readImage→images[]→buildProviderMessages
5
+ // path the /image slash command uses — no parallel encoder. Asserts:
6
+ // (a) a valid PNG staged through the real agent loop reaches buildProviderMessages
7
+ // (b) both transport rails converge on the same ['view_image', path] tuple
8
+ // (c) unsupported / missing / oversized inputs return a clean text error (no crash)
9
+ // (d) an out-of-sandbox path is refused like any other file read
10
+ // (e) minimax now resolves vision-capable (true, not null)
11
+ // (f) view_image needs NO permission gate, while an effectful tool still does
12
+
13
+ const { test, before, after } = require('node:test');
14
+ const assert = require('node:assert');
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+
18
+ const ui = require('../lib/ui');
19
+ const { createApiClient } = require('../lib/api');
20
+ const {
21
+ createToolExecutor, extractToolCalls, mapInvokeToCall,
22
+ } = require('../lib/tools');
23
+ const { createPermissionManager } = require('../lib/permissions');
24
+ const { createAgentRunner } = require('../lib/agent');
25
+ const { buildProviderMessages, resolveVisionCapability } = require('../lib/images');
26
+ const { startMockLLM } = require('./harness/mock-llm');
27
+
28
+ // Minimal valid PNG (magic bytes + ≥12 bytes so detectMediaType locks on).
29
+ const PNG = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0, 0, 0, 13]);
30
+
31
+ let prevKey;
32
+ before(() => { prevKey = process.env.SEMALT_API_KEY; process.env.SEMALT_API_KEY = 'test-key'; });
33
+ after(() => {
34
+ if (prevKey === undefined) delete process.env.SEMALT_API_KEY;
35
+ else process.env.SEMALT_API_KEY = prevKey;
36
+ });
37
+
38
+ // view_image reads through the real isPathSafe (CWD-confined), so test images must
39
+ // live inside process.cwd(). Create a unique throwaway file there and clean it up.
40
+ async function withCwdFile(name, buf, fn) {
41
+ const p = path.join(process.cwd(), `._vimg_test_${process.pid}_${name}`);
42
+ fs.writeFileSync(p, buf);
43
+ try { return await fn(p); } finally { try { fs.unlinkSync(p); } catch {} }
44
+ }
45
+
46
+ function buildRunner(base, extraConfig = {}) {
47
+ const config = {
48
+ api_base: base, api_key: 'test-key', default_model: 'test-model',
49
+ temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
50
+ image_max_bytes: 5 * 1024 * 1024,
51
+ sandbox: { mode: 'off' },
52
+ ...extraConfig,
53
+ };
54
+ const getConfig = () => config;
55
+ const saveConfig = (c) => Object.assign(config, c);
56
+ const api = createApiClient({ getConfig, saveConfig, ui });
57
+ const pm = createPermissionManager(ui, { skipPermissions: true });
58
+ pm.setUICallbacks({ onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {} });
59
+ const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, getConfig);
60
+ const runner = createAgentRunner({
61
+ chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
62
+ describePermission, permissionManager: pm, ui, getConfig,
63
+ });
64
+ return { runner, agentExecFile, describePermission };
65
+ }
66
+
67
+ const noopCb = {
68
+ onToken: () => {}, onToolStart: () => {}, onToolEnd: () => {},
69
+ onError: () => {}, onRetry: () => {}, onAssistantMessage: () => {},
70
+ };
71
+
72
+ // ---------------------------------------------------------------------------
73
+ // (a) Valid PNG staged through the real loop reaches buildProviderMessages.
74
+ // ---------------------------------------------------------------------------
75
+ test('view_image stages a valid PNG into vision context via the /image wire path', async () => {
76
+ const mock = await startMockLLM();
77
+ await withCwdFile('a.png', PNG, async (imgPath) => {
78
+ mock.replyWith(`<view_image>${imgPath}</view_image>`);
79
+ mock.replyWith('It is a red square.');
80
+ try {
81
+ const { runner } = buildRunner(mock.base);
82
+ const messages = [{ role: 'user', content: 'what is in the image?' }];
83
+ await runner.runAgentLoop(messages, 'test-model', 10, null, { callbacks: noopCb });
84
+
85
+ // The encoded image is attached to a message's images[] (same field /image sets).
86
+ const imgMsg = messages.find((m) => Array.isArray(m.images) && m.images.length);
87
+ assert.ok(imgMsg, 'a message carries the staged image');
88
+ assert.strictEqual(imgMsg.images[0].media_type, 'image/png');
89
+ assert.ok(typeof imgMsg.images[0].data === 'string' && imgMsg.images[0].data.length > 0, 'base64 bytes present');
90
+
91
+ // The model-facing text confirms attach without claiming the user can see it.
92
+ const toolMsg = messages.find((m) => typeof m.content === 'string' && /attached to your vision context/.test(m.content));
93
+ assert.ok(toolMsg, 'tool result text confirms the vision attach');
94
+ assert.match(toolMsg.content, /NOT displayed to the user/);
95
+
96
+ // buildProviderMessages (the api.js wire transform) turns it into an OpenAI image_url part.
97
+ const wire = buildProviderMessages(messages, 'openai');
98
+ const wireImg = wire.find((m) => Array.isArray(m.content) && m.content.some((p) => p.type === 'image_url'));
99
+ assert.ok(wireImg, 'buildProviderMessages produced an image_url content part');
100
+ const part = wireImg.content.find((p) => p.type === 'image_url');
101
+ assert.match(part.image_url.url, /^data:image\/png;base64,/);
102
+ } finally {
103
+ await mock.close();
104
+ }
105
+ });
106
+ });
107
+
108
+ // ---------------------------------------------------------------------------
109
+ // (b) Both rails converge on the same tuple.
110
+ // ---------------------------------------------------------------------------
111
+ test('view_image: native params and XML tags converge on ["view_image", path]', () => {
112
+ // Native function-calling rail.
113
+ assert.deepStrictEqual(mapInvokeToCall('view_image', { path: '/a/b.png' }), ['view_image', '/a/b.png']);
114
+ // XML inline form.
115
+ assert.deepStrictEqual(extractToolCalls('<view_image>/a/b.png</view_image>'), [['view_image', '/a/b.png']]);
116
+ // XML attribute form (self-closing and paired).
117
+ assert.deepStrictEqual(extractToolCalls('<view_image path="/a/b.png"/>'), [['view_image', '/a/b.png']]);
118
+ assert.deepStrictEqual(extractToolCalls('<view_image path="/a/b.png"></view_image>'), [['view_image', '/a/b.png']]);
119
+ // Missing path → no call (native), like every other tool.
120
+ assert.strictEqual(mapInvokeToCall('view_image', {}), null);
121
+ });
122
+
123
+ // ---------------------------------------------------------------------------
124
+ // (c) Unsupported / missing / oversized → clean text error, loop not crashed.
125
+ // ---------------------------------------------------------------------------
126
+ test('view_image: unsupported/missing/oversized return a clean error, never crash', async () => {
127
+ const { agentExecFile } = buildRunner('http://127.0.0.1:1'); // base unused here
128
+
129
+ // Missing file.
130
+ const missing = await agentExecFile('view_image', path.join(process.cwd(), 'definitely-not-here.png'));
131
+ assert.ok(missing.error && /not found|unreadable/i.test(missing.error), 'missing file → error');
132
+ assert.ok(!missing.image, 'no image staged on error');
133
+
134
+ // Unsupported format (a .txt with no image magic bytes, inside CWD).
135
+ await withCwdFile('notimg.txt', Buffer.from('hello, not an image'), async (p) => {
136
+ const bad = await agentExecFile('view_image', p);
137
+ assert.ok(bad.error && /Unsupported image format/i.test(bad.error), 'non-image → unsupported error');
138
+ });
139
+
140
+ // Oversized: a valid PNG under a deliberately tiny cap.
141
+ await withCwdFile('big.png', PNG, async (p) => {
142
+ const { agentExecFile: execTiny } = buildRunner('http://127.0.0.1:1', { image_max_bytes: 4 });
143
+ const big = await execTiny('view_image', p);
144
+ assert.ok(big.error && /too large/i.test(big.error), 'oversized → too-large error');
145
+ });
146
+ });
147
+
148
+ test('view_image: a missing-file call inside the loop ends cleanly without crashing', async () => {
149
+ const mock = await startMockLLM();
150
+ mock.replyWith(`<view_image>${path.join(process.cwd(), 'nope.png')}</view_image>`);
151
+ mock.replyWith('Could not load the image; stopping.');
152
+ try {
153
+ const { runner } = buildRunner(mock.base);
154
+ const messages = [{ role: 'user', content: 'view it' }];
155
+ const out = await runner.runAgentLoop(messages, 'test-model', 10, null, { callbacks: noopCb });
156
+ assert.ok(out && out.messages, 'loop returned normally (no crash)');
157
+ const toolMsg = messages.find((m) => typeof m.content === 'string' && /Tool execution results/.test(m.content));
158
+ assert.match(toolMsg.content, /Error —/);
159
+ assert.ok(!messages.some((m) => Array.isArray(m.images) && m.images.length), 'no image staged when the read failed');
160
+ assert.ok(messages.some((m) => m.role === 'assistant' && /stopping/i.test(m.content)), 'final answer recorded');
161
+ } finally {
162
+ await mock.close();
163
+ }
164
+ });
165
+
166
+ // ---------------------------------------------------------------------------
167
+ // (d) Path safety: out-of-sandbox path refused like any file read.
168
+ // ---------------------------------------------------------------------------
169
+ test('view_image: an out-of-sandbox path is refused', async () => {
170
+ const { agentExecFile } = buildRunner('http://127.0.0.1:1');
171
+ const res = await agentExecFile('view_image', '/etc/hostname');
172
+ assert.ok(res.error && /outside allowed area/i.test(res.error), 'path outside CWD refused');
173
+ assert.ok(!res.image, 'no image staged for a refused path');
174
+ });
175
+
176
+ // ---------------------------------------------------------------------------
177
+ // (e) minimax now resolves vision-capable; unconfirmed qwen coder stays null.
178
+ // ---------------------------------------------------------------------------
179
+ test('resolveVisionCapability: minimax is now true; plain qwen coder stays null', () => {
180
+ assert.strictEqual(resolveVisionCapability({}, 'minimax-m3'), true);
181
+ assert.strictEqual(resolveVisionCapability({}, 'MiniMax-M3'), true);
182
+ // Unconfirmed model families must NOT be silently marked vision-capable.
183
+ assert.strictEqual(resolveVisionCapability({}, 'qwen2.5-coder-32b'), null);
184
+ // The pre-existing VL signal is unaffected.
185
+ assert.strictEqual(resolveVisionCapability({}, 'qwen2-vl-7b'), true);
186
+ });
187
+
188
+ // ---------------------------------------------------------------------------
189
+ // (f) No permission gate for view_image; an effectful tool still gates.
190
+ // ---------------------------------------------------------------------------
191
+ test('view_image is read-only (no permission descriptor); an effectful tool still gates', async () => {
192
+ const { describePermission } = buildRunner('http://127.0.0.1:1');
193
+ const viewDesc = await describePermission(['view_image', '/x.png']);
194
+ assert.strictEqual(viewDesc, null, 'view_image resolves to no permission gate (read-only)');
195
+
196
+ // Isolation: an effectful network tool still produces a gate descriptor.
197
+ const dlDesc = await describePermission(['download', 'https://example.com/a.png']);
198
+ assert.ok(dlDesc && dlDesc.actionType, 'download still requires a permission descriptor');
199
+ });
@@ -0,0 +1,203 @@
1
+ 'use strict';
2
+
3
+ // Web-activity ordering (W.3 regression fix). The collapsed "✓ web · …" summary
4
+ // must commit to scrollback BEFORE the agent's answer, not after it.
5
+ //
6
+ // The W.3 regression: http_get/web_search deferred their scrollback commit from
7
+ // "tool end" to webTracker.flush(), and in a "web-op(s) → answer" turn the only
8
+ // flush that fired was the turn-end `finally` — which runs AFTER runAgentLoop
9
+ // returns, i.e. after the answer was already committed. The fix flushes the open
10
+ // web group in onAssistantMessage when cleanContent is non-empty (the terminal
11
+ // response signal), while intermediate empty-content iterations keep the group
12
+ // open so multi-step search→fetch still collapses to one line.
13
+ //
14
+ // These tests drive the REAL createTurnHandler callbacks (chat-turn.js) with a
15
+ // mock runAgentLoop that invokes them in the order agent.js does — per iteration
16
+ // onAssistantMessage(displayReply) fires first (empty '' when the iteration
17
+ // carried tool calls, non-empty on the final answer), then the tools execute —
18
+ // recording an ordered event log so we can assert "summary before answer".
19
+
20
+ const { test } = require('node:test');
21
+ const assert = require('node:assert');
22
+
23
+ const { stripAnsi } = require('../lib/ui/utils');
24
+ const { createTurnHandler } = require('../lib/commands/chat-turn');
25
+
26
+ // A fake writer + chatHistory that push into ONE shared ordered log. The web
27
+ // summary commits via writerModule.endActivity (from webTracker.flush); the
28
+ // answer commits via chatHistory.finalizeLastMessage. A non-web tool line also
29
+ // commits via endActivity — distinguished by content. As of Phase 1 (Output
30
+ // Refactor) the core tool line renders via the real descriptor→renderer (no
31
+ // longer the injected formatToolLine seam), so web vs tool lines are told apart
32
+ // by the web summary's wording, not a synthetic marker.
33
+ function harness() {
34
+ const events = [];
35
+ const writerModule = {
36
+ startActivity() {},
37
+ updateActivity() {},
38
+ endActivity(id, line) {
39
+ const plain = stripAnsi(String(line));
40
+ if (/web\b/.test(plain) && /(source|search|web)/.test(plain) && !plain.startsWith('TOOL:')) {
41
+ events.push({ kind: 'web-summary', line: plain });
42
+ } else {
43
+ events.push({ kind: 'tool-line', line: plain });
44
+ }
45
+ },
46
+ scrollback(line) { events.push({ kind: 'scrollback', line: String(line) }); },
47
+ };
48
+ const chatHistory = {
49
+ addMessage() {},
50
+ streamToken() {},
51
+ clearStreamingContent() {},
52
+ // Phase 7b boundary calls (chat-turn onToolStart / turn-end finally). No
53
+ // output-preview deferral is driven in these web-ordering scenarios, so a
54
+ // no-op keeps the harness focused on the web-summary ordering it tests.
55
+ deferToolOutput() {}, commitDeferredDetail() {},
56
+ // An empty finalize (the suppressed intermediate iteration) commits no
57
+ // visible answer bubble — only record the non-empty terminal answer, which
58
+ // is what must land below the web summary.
59
+ finalizeLastMessage(content) { if (content && content.trim()) events.push({ kind: 'answer', content }); },
60
+ };
61
+ const statusBar = {
62
+ update() {}, onToken() {}, addPendingTokens() {}, updateMetrics() {}, setCost() {},
63
+ };
64
+ const inputField = {
65
+ on() {}, removeListener() {}, releaseNavigation() {}, setDisabled() {},
66
+ };
67
+
68
+ // Set by each test before invoking the handler.
69
+ let scenario = async () => {};
70
+ const runAgentLoop = async (messages, model, maxIter, limit, loopOpts) => {
71
+ await scenario(loopOpts.callbacks);
72
+ return { messages, metrics: { turns: [] }, withheldActions: [] };
73
+ };
74
+
75
+ const ctx = {
76
+ inputField, statusBar, chatHistory, writerModule, runAgentLoop,
77
+ getConfig: () => ({ auth_token: 'tok', max_iterations: 50, show_cost: false, system_prompt_mode: 'system_role' }),
78
+ approxTokens: () => 0,
79
+ resolveCommand: () => null,
80
+ opts: {},
81
+ TAG_REGISTRY: {},
82
+ formatToolLine: (o) => `TOOL:${o && o.tag}`,
83
+ collapseListMsg() {}, handlePendingSelection() {}, showPendingStep() {},
84
+ activateNavCapture() {}, finalizeListMsg() {},
85
+ createChatIfNeeded: async () => {}, saveTurnToDashboard: async () => {}, saveSession() {},
86
+ messages: [], currentModel: 'm', debugMode: false, pendingImages: [],
87
+ chatSync: async () => '', resolvedSystemPrompt: '', resolvedTokenLimit: null, planMode: false,
88
+ };
89
+
90
+ const handler = createTurnHandler(ctx, {});
91
+ return { events, handler, setScenario: (fn) => { scenario = fn; } };
92
+ }
93
+
94
+ // Helpers to simulate the agent.js per-iteration callback order.
95
+ function webToolIteration(cb, tag, input, meta) {
96
+ cb.onAssistantMessage(''); // suppressed (this iteration had a tool call)
97
+ cb.onToolStart(tag, input, { id: `${tag}-1`, attrs: tag === 'web_search' ? { query: input } : { url: input } });
98
+ cb.onToolEnd(tag, {}, 120, { id: `${tag}-1`, attrs: tag === 'web_search' ? { query: input } : { url: input }, meta, error: null });
99
+ }
100
+
101
+ function indexOfKind(events, kind) { return events.findIndex((e) => e.kind === kind); }
102
+
103
+ // ---------------------------------------------------------------------------
104
+ // The regression: single http_get → answer commits the summary BEFORE the answer
105
+ // ---------------------------------------------------------------------------
106
+
107
+ test('single http_get → answer: web summary commits before the answer', async () => {
108
+ const h = harness();
109
+ h.setScenario(async (cb) => {
110
+ webToolIteration(cb, 'http_get', 'https://a.example', { status_code: 200, bytes: 1000 });
111
+ cb.onAssistantMessage('Here is the synthesized answer.'); // final answer iteration
112
+ });
113
+
114
+ await h.handler('summarize https://a.example');
115
+
116
+ const summaries = h.events.filter((e) => e.kind === 'web-summary');
117
+ assert.strictEqual(summaries.length, 1, 'exactly one collapsed summary');
118
+ const iSummary = indexOfKind(h.events, 'web-summary');
119
+ const iAnswer = indexOfKind(h.events, 'answer');
120
+ assert.ok(iSummary >= 0 && iAnswer >= 0, 'both committed');
121
+ assert.ok(iSummary < iAnswer, 'the web summary precedes the answer (the bug being fixed)');
122
+ assert.match(summaries[0].line, /1 source read/);
123
+ });
124
+
125
+ // ---------------------------------------------------------------------------
126
+ // The W.3 guarantee preserved: multi-step search→fetch still collapses to ONE line
127
+ // ---------------------------------------------------------------------------
128
+
129
+ test('web_search → http_get → answer: one collapsed line, before the answer; intermediate iteration does NOT flush', async () => {
130
+ const h = harness();
131
+ h.setScenario(async (cb) => {
132
+ // Iteration 1: web_search (separate LLM round-trip from the fetch).
133
+ webToolIteration(cb, 'web_search', 'corruption scandals', null);
134
+ // Iteration 2: http_get — its onAssistantMessage('') must NOT flush, else the
135
+ // single collapsed line would split into two.
136
+ webToolIteration(cb, 'http_get', 'https://a.example', { status_code: 200, bytes: 1000 });
137
+ // Iteration 3: the final answer.
138
+ cb.onAssistantMessage('Final answer with citations.');
139
+ });
140
+
141
+ await h.handler('research corruption scandals');
142
+
143
+ const summaries = h.events.filter((e) => e.kind === 'web-summary');
144
+ assert.strictEqual(summaries.length, 1, 'multi-step web activity collapses to exactly ONE line (W.3 guarantee)');
145
+ const iSummary = indexOfKind(h.events, 'web-summary');
146
+ const iAnswer = indexOfKind(h.events, 'answer');
147
+ assert.ok(iSummary < iAnswer, 'the single collapsed summary precedes the answer');
148
+ // Both the search and the read are reflected in the one line.
149
+ assert.match(summaries[0].line, /search "corruption scandals"/);
150
+ assert.match(summaries[0].line, /1 source read/);
151
+ });
152
+
153
+ // ---------------------------------------------------------------------------
154
+ // Safety net: an empty / interrupted turn still flushes via the turn-end finally
155
+ // ---------------------------------------------------------------------------
156
+
157
+ test('empty/interrupted answer: summary still committed via the turn-end finally', async () => {
158
+ const h = harness();
159
+ h.setScenario(async (cb) => {
160
+ // A turn that did web work but never produced a non-empty assistant message
161
+ // (e.g. hit the iteration cap, or was interrupted). No final flush in
162
+ // onAssistantMessage — the `finally` is the safety net.
163
+ webToolIteration(cb, 'http_get', 'https://a.example', { status_code: 200, bytes: 1000 });
164
+ });
165
+
166
+ await h.handler('fetch https://a.example');
167
+
168
+ const summaries = h.events.filter((e) => e.kind === 'web-summary');
169
+ assert.strictEqual(summaries.length, 1, 'the summary is not lost — flushed in finally');
170
+ assert.strictEqual(indexOfKind(h.events, 'answer'), -1, 'no non-empty answer was finalized');
171
+ });
172
+
173
+ // ---------------------------------------------------------------------------
174
+ // Non-web tool after web ops: still flushes via onToolStart (unregressed)
175
+ // ---------------------------------------------------------------------------
176
+
177
+ test('non-web tool after web ops: summary flushed before the non-web tool line', async () => {
178
+ const h = harness();
179
+ h.setScenario(async (cb) => {
180
+ // Iteration 1: http_get.
181
+ webToolIteration(cb, 'http_get', 'https://a.example', { status_code: 200, bytes: 1000 });
182
+ // Iteration 2: a non-web tool (read_file). Its onToolStart closes the open
183
+ // web group first (chat-turn.js line 211) so the summary lands above its line.
184
+ cb.onAssistantMessage('');
185
+ cb.onToolStart('read_file', '/x', { id: 'rf-1', attrs: { path: '/x' } });
186
+ cb.onToolEnd('read_file', 'contents', 5, { id: 'rf-1', attrs: { path: '/x' }, meta: null, error: null });
187
+ // Iteration 3: the answer.
188
+ cb.onAssistantMessage('Done.');
189
+ });
190
+
191
+ await h.handler('fetch then read');
192
+
193
+ const summaries = h.events.filter((e) => e.kind === 'web-summary');
194
+ assert.strictEqual(summaries.length, 1, 'one web summary');
195
+ const iSummary = indexOfKind(h.events, 'web-summary');
196
+ // Phase 1 (Output Refactor): the core tool line now renders via the real
197
+ // descriptor→renderer (read_file → "read /x"), not the injected formatToolLine
198
+ // marker — match the rendered operation rather than the tag name.
199
+ const iToolLine = h.events.findIndex((e) => e.kind === 'tool-line' && /read \/x/.test(e.line));
200
+ const iAnswer = indexOfKind(h.events, 'answer');
201
+ assert.ok(iSummary < iToolLine, 'web summary precedes the non-web tool line (flushed by onToolStart)');
202
+ assert.ok(iToolLine < iAnswer, 'and both precede the answer');
203
+ });
@@ -0,0 +1,207 @@
1
+ 'use strict';
2
+
3
+ // Web-activity process summary (Task W.3, Part 1). The default view collapses a
4
+ // run of web ops (web_search → http_get) into ONE compact summary line; --debug
5
+ // keeps the full per-operation lines. These tests pin the pure renderer (counts:
6
+ // queries / sources read / failures), the debug-vs-default branch, that a failed
7
+ // fetch (403/timeout) is reflected (not dropped), that non-web tools are out of
8
+ // scope, and the stateful tracker's collapse-to-one-committed-line behaviour.
9
+
10
+ const { test } = require('node:test');
11
+ const assert = require('node:assert');
12
+
13
+ const { stripAnsi } = require('../lib/ui/utils');
14
+ const { formatToolLine } = require('../lib/ui/format');
15
+ const {
16
+ isWebTool,
17
+ opSucceeded,
18
+ aggregateWebOps,
19
+ webSummaryText,
20
+ formatWebSummaryLine,
21
+ renderWebActivity,
22
+ createWebActivityTracker,
23
+ } = require('../lib/ui/web-activity');
24
+
25
+ // ---------------------------------------------------------------------------
26
+ // Scope: which tools are collapsed
27
+ // ---------------------------------------------------------------------------
28
+
29
+ test('isWebTool: only web_search and http_get are in scope', () => {
30
+ assert.strictEqual(isWebTool('web_search'), true);
31
+ assert.strictEqual(isWebTool('http_get'), true);
32
+ // download writes a file, not a page read — keeps its own line.
33
+ assert.strictEqual(isWebTool('download'), false);
34
+ assert.strictEqual(isWebTool('shell'), false);
35
+ assert.strictEqual(isWebTool('read_file'), false);
36
+ assert.strictEqual(isWebTool('write_file'), false);
37
+ });
38
+
39
+ // ---------------------------------------------------------------------------
40
+ // Success classification (the 403/406 "blocked" rule)
41
+ // ---------------------------------------------------------------------------
42
+
43
+ test('opSucceeded: http_get >= 400 is a failure even with no transport error', () => {
44
+ assert.strictEqual(opSucceeded({ tag: 'http_get', status: 200 }), true);
45
+ assert.strictEqual(opSucceeded({ tag: 'http_get', status: 403 }), false);
46
+ assert.strictEqual(opSucceeded({ tag: 'http_get', status: 406 }), false);
47
+ // A transport error (timeout/DNS) is a failure regardless of status.
48
+ assert.strictEqual(opSucceeded({ tag: 'http_get', error: 'Request timeout' }), false);
49
+ // web_search is ok unless the backend errored.
50
+ assert.strictEqual(opSucceeded({ tag: 'web_search' }), true);
51
+ assert.strictEqual(opSucceeded({ tag: 'web_search', error: 'web search unavailable' }), false);
52
+ });
53
+
54
+ // ---------------------------------------------------------------------------
55
+ // Pure summary text — reflects queries, sources read, failures
56
+ // ---------------------------------------------------------------------------
57
+
58
+ test('webSummaryText: reflects query count, sources read, and blocked count', () => {
59
+ const ops = [
60
+ { tag: 'web_search', query: 'коррупционные скандалы 2024' },
61
+ { tag: 'web_search', query: 'follow-up query' },
62
+ { tag: 'http_get', url: 'https://a.example/1', status: 200 },
63
+ { tag: 'http_get', url: 'https://b.example/2', status: 200 },
64
+ { tag: 'http_get', url: 'https://ru.wikipedia.org/x', status: 403 },
65
+ ];
66
+ const text = webSummaryText(aggregateWebOps(ops));
67
+ assert.match(text, /search "коррупционные/); // leads with the query
68
+ assert.match(text, /2 queries/); // query count visible
69
+ assert.match(text, /2 sources read/); // successful reads
70
+ assert.match(text, /1 blocked/); // the 403 is surfaced, not dropped
71
+ });
72
+
73
+ test('webSummaryText: a timeout counts as blocked, not silently dropped', () => {
74
+ const ops = [
75
+ { tag: 'http_get', url: 'https://slow.example', error: 'Request timeout' },
76
+ { tag: 'http_get', url: 'https://ok.example', status: 200 },
77
+ ];
78
+ const text = webSummaryText(aggregateWebOps(ops));
79
+ assert.match(text, /1 source read/);
80
+ assert.match(text, /1 blocked/);
81
+ });
82
+
83
+ test('webSummaryText: a failed web_search is surfaced', () => {
84
+ const ops = [{ tag: 'web_search', query: 'q', error: 'web search unavailable: backend down' }];
85
+ const text = webSummaryText(aggregateWebOps(ops));
86
+ assert.match(text, /search failed/);
87
+ });
88
+
89
+ test('webSummaryText: fetch-only flow (no search) still reads cleanly', () => {
90
+ const ops = [{ tag: 'http_get', url: 'https://x', status: 200 }];
91
+ assert.match(webSummaryText(aggregateWebOps(ops)), /1 source read/);
92
+ });
93
+
94
+ test('aggregateWebOps: counts are exact', () => {
95
+ const s = aggregateWebOps([
96
+ { tag: 'web_search', query: 'a' },
97
+ { tag: 'http_get', status: 200 },
98
+ { tag: 'http_get', status: 200 },
99
+ { tag: 'http_get', status: 500 },
100
+ ]);
101
+ assert.deepStrictEqual(
102
+ { searchCount: s.searchCount, fetchCount: s.fetchCount, fetchOk: s.fetchOk, fetchFailed: s.fetchFailed },
103
+ { searchCount: 1, fetchCount: 3, fetchOk: 2, fetchFailed: 1 },
104
+ );
105
+ });
106
+
107
+ // ---------------------------------------------------------------------------
108
+ // renderWebActivity — debug branch keeps full per-op detail; default collapses
109
+ // ---------------------------------------------------------------------------
110
+
111
+ const SAMPLE_OPS = [
112
+ { tag: 'web_search', query: 'how do tariffs work', durationMs: 941 },
113
+ { tag: 'http_get', url: 'https://24tv.ua/article', status: 200, bytes: 406 * 1024, durationMs: 171 },
114
+ { tag: 'http_get', url: 'https://ru.wikipedia.org/page', status: 403, bytes: 126, durationMs: 25 },
115
+ ];
116
+
117
+ test('renderWebActivity (default): a sequence of web ops → ONE compact summary line', () => {
118
+ const lines = renderWebActivity(SAMPLE_OPS, { debug: false, formatToolLine });
119
+ assert.strictEqual(lines.length, 1, 'collapsed to a single line');
120
+ const plain = stripAnsi(lines[0]);
121
+ assert.match(plain, /web/);
122
+ assert.match(plain, /search "how do tariffs work"/);
123
+ assert.match(plain, /1 source read/);
124
+ assert.match(plain, /1 blocked/);
125
+ });
126
+
127
+ test('renderWebActivity (--debug): full per-operation lines, nothing hidden', () => {
128
+ const lines = renderWebActivity(SAMPLE_OPS, { debug: true, formatToolLine });
129
+ assert.strictEqual(lines.length, SAMPLE_OPS.length, 'one line per op');
130
+ const all = lines.map(stripAnsi);
131
+ // The query and both URLs survive in the detailed view.
132
+ assert.ok(all.some((l) => /how do tariffs work/.test(l)));
133
+ assert.ok(all.some((l) => /24tv\.ua/.test(l)));
134
+ assert.ok(all.some((l) => /ru\.wikipedia\.org/.test(l)));
135
+ // The HTTP status codes (200 / 403) are present in the per-op meta.
136
+ assert.ok(all.some((l) => /\b200\b/.test(l)));
137
+ assert.ok(all.some((l) => /\b403\b/.test(l)));
138
+ });
139
+
140
+ // ---------------------------------------------------------------------------
141
+ // Styled line: glyph + failures coloured, plain text correct
142
+ // ---------------------------------------------------------------------------
143
+
144
+ test('formatWebSummaryLine: pending shows ●, committed shows ✓', () => {
145
+ const state = aggregateWebOps(SAMPLE_OPS);
146
+ assert.match(formatWebSummaryLine(state, { pending: true, durationMs: 500 }), /●/);
147
+ assert.match(formatWebSummaryLine(state, { pending: false }), /✓/);
148
+ });
149
+
150
+ // ---------------------------------------------------------------------------
151
+ // Stateful tracker — collapse a multi-op group into one committed line
152
+ // ---------------------------------------------------------------------------
153
+
154
+ function fakeWriter() {
155
+ const calls = { start: [], update: 0, end: [] };
156
+ return {
157
+ calls,
158
+ startActivity(id) { calls.start.push(id); },
159
+ updateActivity() { calls.update += 1; },
160
+ endActivity(id, line) { calls.end.push({ id, line }); },
161
+ };
162
+ }
163
+
164
+ test('tracker: a run of web ops commits exactly ONE summary line on flush', () => {
165
+ const w = fakeWriter();
166
+ const t = createWebActivityTracker({ writerModule: w });
167
+
168
+ t.start('web_search', 'коррупционные скандалы');
169
+ t.end('web_search', { results: [] }, 900, { attrs: { query: 'коррупционные скандалы' } });
170
+ t.start('http_get', 'https://a.example');
171
+ t.end('http_get', {}, 170, { attrs: { url: 'https://a.example' }, meta: { status_code: 200, bytes: 1000 } });
172
+ t.start('http_get', 'https://ru.wikipedia.org/x');
173
+ t.end('http_get', {}, 25, { attrs: { url: 'https://ru.wikipedia.org/x' }, meta: { status_code: 403, bytes: 126 } });
174
+
175
+ assert.strictEqual(w.calls.start.length, 1, 'one activity opened for the whole group');
176
+ assert.strictEqual(t.isOpen(), true);
177
+
178
+ t.flush();
179
+ assert.strictEqual(t.isOpen(), false);
180
+ assert.strictEqual(w.calls.end.length, 1, 'one committed summary line');
181
+ const plain = stripAnsi(w.calls.end[0].line);
182
+ assert.match(plain, /search "коррупционные скандалы"/);
183
+ assert.match(plain, /1 source read/);
184
+ assert.match(plain, /1 blocked/);
185
+ });
186
+
187
+ test('tracker: flush with no open group is a no-op', () => {
188
+ const w = fakeWriter();
189
+ const t = createWebActivityTracker({ writerModule: w });
190
+ t.flush();
191
+ assert.strictEqual(w.calls.start.length, 0);
192
+ assert.strictEqual(w.calls.end.length, 0);
193
+ });
194
+
195
+ test('tracker: a second group after flush opens a fresh activity', () => {
196
+ const w = fakeWriter();
197
+ const t = createWebActivityTracker({ writerModule: w });
198
+ t.start('web_search', 'q1');
199
+ t.end('web_search', {}, 10, { attrs: { query: 'q1' } });
200
+ t.flush();
201
+ t.start('http_get', 'https://x');
202
+ t.end('http_get', {}, 10, { attrs: { url: 'https://x' }, meta: { status_code: 200 } });
203
+ t.flush();
204
+ assert.strictEqual(w.calls.start.length, 2, 'two distinct groups');
205
+ assert.strictEqual(w.calls.end.length, 2);
206
+ assert.notStrictEqual(w.calls.start[0], w.calls.start[1], 'distinct group ids');
207
+ });