@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/ARCHITECTURE.md +6 -95
  4. package/CLAUDE.md +196 -316
  5. package/README.md +148 -4
  6. package/docs/ARCHITECTURE.md +1321 -0
  7. package/docs/CONFIG.md +340 -0
  8. package/docs/HISTORY.md +245 -0
  9. package/examples/embed.js +74 -0
  10. package/index.js +251 -10
  11. package/lib/agent.js +856 -120
  12. package/lib/api.js +239 -50
  13. package/lib/args.js +74 -2
  14. package/lib/audit.js +23 -1
  15. package/lib/background.js +584 -0
  16. package/lib/checkpoints.js +757 -0
  17. package/lib/commands/auth.js +94 -0
  18. package/lib/commands/chat-session.js +489 -0
  19. package/lib/commands/chat-slash.js +415 -0
  20. package/lib/commands/chat-turn.js +669 -0
  21. package/lib/commands/chat.js +407 -0
  22. package/lib/commands/custom.js +157 -0
  23. package/lib/commands/history-utils.js +66 -0
  24. package/lib/commands/index.js +268 -0
  25. package/lib/commands/mcp.js +113 -0
  26. package/lib/commands/oneshot.js +193 -0
  27. package/lib/commands/registry.js +269 -0
  28. package/lib/commands/tasks.js +89 -0
  29. package/lib/compact.js +87 -0
  30. package/lib/config.js +360 -11
  31. package/lib/constants.js +401 -3
  32. package/lib/deny.js +199 -0
  33. package/lib/doctor.js +160 -0
  34. package/lib/headless.js +202 -0
  35. package/lib/hooks.js +286 -0
  36. package/lib/images.js +270 -0
  37. package/lib/internals.js +49 -0
  38. package/lib/mcp/boundary.js +131 -0
  39. package/lib/mcp/client.js +270 -0
  40. package/lib/mcp/oauth.js +134 -0
  41. package/lib/memory.js +209 -0
  42. package/lib/metrics.js +37 -2
  43. package/lib/payload.js +54 -0
  44. package/lib/permission-rules.js +401 -0
  45. package/lib/permissions.js +123 -26
  46. package/lib/pricing.js +67 -0
  47. package/lib/proc.js +62 -0
  48. package/lib/prompts.js +99 -8
  49. package/lib/sandbox.js +568 -0
  50. package/lib/sdk.js +328 -0
  51. package/lib/secrets.js +211 -0
  52. package/lib/skills.js +223 -0
  53. package/lib/subagents.js +516 -0
  54. package/lib/tool_registry.js +2862 -0
  55. package/lib/tool_specs.js +263 -9
  56. package/lib/tools.js +352 -1039
  57. package/lib/ui/anim.js +86 -0
  58. package/lib/ui/ansi.js +17 -27
  59. package/lib/ui/chat-history.js +253 -71
  60. package/lib/ui/create-ui.js +67 -24
  61. package/lib/ui/diff.js +90 -25
  62. package/lib/ui/file-activity.js +236 -0
  63. package/lib/ui/format.js +195 -29
  64. package/lib/ui/input-field.js +21 -11
  65. package/lib/ui/md-stream.js +234 -0
  66. package/lib/ui/render-operation.js +113 -0
  67. package/lib/ui/select.js +1 -4
  68. package/lib/ui/status-bar.js +146 -36
  69. package/lib/ui/stream.js +20 -13
  70. package/lib/ui/theme.js +190 -44
  71. package/lib/ui/tool-operation.js +190 -0
  72. package/lib/ui/utils.js +9 -5
  73. package/lib/ui/web-activity.js +270 -0
  74. package/lib/ui/writer.js +159 -45
  75. package/lib/ui.js +1 -1
  76. package/lib/verify.js +229 -0
  77. package/lib/web-extract.js +213 -0
  78. package/lib/web-summarize.js +68 -0
  79. package/package.json +19 -4
  80. package/scripts/lint.js +57 -0
  81. package/test/agent-loop.test.js +389 -0
  82. package/test/anim-driver.test.js +153 -0
  83. package/test/ask-user-display.test.js +226 -0
  84. package/test/ask-user-gate.test.js +231 -0
  85. package/test/background.test.js +414 -0
  86. package/test/chat-history-nocolor.test.js +155 -0
  87. package/test/chat-relogin.test.js +207 -0
  88. package/test/chat.test.js +114 -0
  89. package/test/checkpoints-agent.test.js +181 -0
  90. package/test/checkpoints.test.js +650 -0
  91. package/test/command-registry.test.js +160 -0
  92. package/test/compact.test.js +116 -0
  93. package/test/completion-lazy.test.js +52 -0
  94. package/test/config-merge.test.js +324 -0
  95. package/test/config-quarantine.test.js +128 -0
  96. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  97. package/test/config-write-guard-skip.test.js +46 -0
  98. package/test/config-write-guard.test.js +153 -0
  99. package/test/context-split.test.js +215 -0
  100. package/test/cost-doctor.test.js +142 -0
  101. package/test/custom-commands-chat.test.js +106 -0
  102. package/test/custom-commands.test.js +230 -0
  103. package/test/defer-detail-band.test.js +403 -0
  104. package/test/deny-windows.test.js +120 -0
  105. package/test/deny.test.js +83 -0
  106. package/test/detail-band-tab-flatten.test.js +242 -0
  107. package/test/download-allow-anywhere.test.js +66 -0
  108. package/test/download-confine.test.js +153 -0
  109. package/test/exec-diff.test.js +268 -0
  110. package/test/executors.test.js +599 -0
  111. package/test/extract-tool-calls.test.js +349 -0
  112. package/test/fetch-url-validation.test.js +219 -0
  113. package/test/file-activity.test.js +522 -0
  114. package/test/fixtures/tool-calls.js +57 -0
  115. package/test/fixtures/web-page.js +91 -0
  116. package/test/git-tools.test.js +384 -0
  117. package/test/grep-glob-serialize.test.js +242 -0
  118. package/test/grep-glob.test.js +268 -0
  119. package/test/grep-path-target.test.js +227 -0
  120. package/test/harness/README.md +57 -0
  121. package/test/harness/chat-harness.js +143 -0
  122. package/test/harness/memwarn-headless-child.js +65 -0
  123. package/test/harness/mock-llm.js +120 -0
  124. package/test/harness/mock-mcp-server.js +142 -0
  125. package/test/harness/sse-server.js +69 -0
  126. package/test/headless.test.js +348 -0
  127. package/test/history-utils.test.js +88 -0
  128. package/test/hooks-agent.test.js +238 -0
  129. package/test/hooks-verify-sandbox.test.js +232 -0
  130. package/test/hooks.test.js +216 -0
  131. package/test/http-get-user-agent.test.js +142 -0
  132. package/test/images-api.test.js +208 -0
  133. package/test/images.test.js +238 -0
  134. package/test/input-field-ctrl-o.test.js +37 -0
  135. package/test/live-height-physical.test.js +281 -0
  136. package/test/max-iterations.test.js +218 -0
  137. package/test/mcp-boundary.test.js +57 -0
  138. package/test/mcp-client.test.js +267 -0
  139. package/test/mcp-oauth.test.js +86 -0
  140. package/test/md-stream.test.js +183 -0
  141. package/test/memory-truncation-warning.test.js +222 -0
  142. package/test/memory.test.js +198 -0
  143. package/test/native-dispatch.test.js +409 -0
  144. package/test/native-live-narration.test.js +254 -0
  145. package/test/output-chokepoint.test.js +188 -0
  146. package/test/output-heredoc-leak.test.js +195 -0
  147. package/test/output-preview.test.js +245 -0
  148. package/test/path-guards.test.js +134 -0
  149. package/test/payload.test.js +99 -0
  150. package/test/permission-rules-agent.test.js +210 -0
  151. package/test/permission-rules.test.js +297 -0
  152. package/test/permissions.test.js +362 -0
  153. package/test/plan-mode.test.js +167 -0
  154. package/test/read-paginate.test.js +275 -0
  155. package/test/readonly-tools.test.js +177 -0
  156. package/test/render-operation.test.js +317 -0
  157. package/test/replay-descriptor-xml.test.js +216 -0
  158. package/test/replay-descriptor.test.js +189 -0
  159. package/test/replay-web-aggregate.test.js +291 -0
  160. package/test/replay-web-persist.test.js +241 -0
  161. package/test/result-cap.test.js +233 -0
  162. package/test/running-glyph-anim.test.js +111 -0
  163. package/test/sandbox-agent.test.js +147 -0
  164. package/test/sandbox-integration.test.js +216 -0
  165. package/test/sandbox.test.js +408 -0
  166. package/test/sdk.test.js +234 -0
  167. package/test/shell-output-cap.test.js +181 -0
  168. package/test/skills-chat.test.js +110 -0
  169. package/test/skills.test.js +295 -0
  170. package/test/smoke.test.js +68 -0
  171. package/test/status-bar-driver.test.js +93 -0
  172. package/test/status-bar-pause.test.js +164 -0
  173. package/test/status-bar-resync.test.js +188 -0
  174. package/test/stream-parser.test.js +171 -0
  175. package/test/subagents-agent.test.js +178 -0
  176. package/test/subagents.test.js +222 -0
  177. package/test/theme-palette.test.js +166 -0
  178. package/test/tool-registry.test.js +85 -0
  179. package/test/trim-budget.test.js +101 -0
  180. package/test/truncate-visible.test.js +78 -0
  181. package/test/verify-agent.test.js +317 -0
  182. package/test/verify.test.js +141 -0
  183. package/test/view-image.test.js +199 -0
  184. package/test/web-activity-ordering.test.js +203 -0
  185. package/test/web-activity.test.js +207 -0
  186. package/test/web-data-extraction-guidance.test.js +71 -0
  187. package/test/web-extract.test.js +185 -0
  188. package/test/web-fetch-agent.test.js +291 -0
  189. package/test/web-fetch-mode.test.js +193 -0
  190. package/test/web-search.test.js +380 -0
  191. package/lib/commands.js +0 -1438
  192. package/path +0 -1
@@ -0,0 +1,216 @@
1
+ 'use strict';
2
+
3
+ // Unit tests for lib/hooks.js (Task 3.4). Cover the pure normalization/matching
4
+ // helpers and the dispatcher with an INJECTED spawn so exit-code semantics,
5
+ // deny-list enforcement, timeout handling, and failure containment are tested
6
+ // deterministically with no real subprocesses.
7
+
8
+ const { test } = require('node:test');
9
+ const assert = require('node:assert');
10
+
11
+ const {
12
+ HOOK_EVENTS,
13
+ normalizeHooks,
14
+ normalizeHookDef,
15
+ hookMatches,
16
+ wrapUntrusted,
17
+ createHookRunner: _createHookRunner,
18
+ } = require('../lib/hooks');
19
+
20
+ // These tests exercise hook ORCHESTRATION (deny-list, exit semantics, matcher,
21
+ // timeout/failure containment, payload) with an injected spawn — NOT the OS
22
+ // sandbox, which has its own dedicated tests (hooks-verify-sandbox.test.js).
23
+ // Inject a pass-through sandbox resolver so the command runs plain via the
24
+ // 2-arg spawn(command, opts) form the injected stubs assume. The real sandbox
25
+ // routing is proven separately.
26
+ const NO_SANDBOX = (command) => ({ run: true, useShell: true, file: command, args: [], sandbox: 'off' });
27
+ const createHookRunner = (opts = {}) => _createHookRunner({ sandbox: NO_SANDBOX, ...opts });
28
+
29
+ // ---------------------------------------------------------------------------
30
+ // normalizeHooks / normalizeHookDef
31
+ // ---------------------------------------------------------------------------
32
+
33
+ test('normalizeHooks always returns one array per known event', () => {
34
+ const out = normalizeHooks(undefined);
35
+ assert.deepStrictEqual(Object.keys(out).sort(), [...HOOK_EVENTS].sort());
36
+ for (const ev of HOOK_EVENTS) assert.deepStrictEqual(out[ev], []);
37
+ });
38
+
39
+ test('normalizeHooks drops malformed entries and unknown events', () => {
40
+ const out = normalizeHooks({
41
+ PreToolUse: [
42
+ { command: 'echo ok' },
43
+ { type: 'command' }, // no command → dropped
44
+ { type: 'prompt', prompt: '' }, // empty prompt → dropped
45
+ 'not-an-object', // dropped
46
+ { type: 'prompt', prompt: 'hi' },
47
+ ],
48
+ BogusEvent: [{ command: 'echo nope' }], // unknown event key → ignored
49
+ });
50
+ assert.strictEqual(out.PreToolUse.length, 2);
51
+ assert.deepStrictEqual(out.PreToolUse[0], { type: 'command', command: 'echo ok' });
52
+ assert.deepStrictEqual(out.PreToolUse[1], { type: 'prompt', prompt: 'hi' });
53
+ assert.ok(!('BogusEvent' in out));
54
+ });
55
+
56
+ test('normalizeHookDef keeps matcher and positive integer timeout', () => {
57
+ const def = normalizeHookDef({ command: 'x', matcher: ' shell ', timeout_ms: 1500 });
58
+ assert.deepStrictEqual(def, { type: 'command', command: 'x', matcher: 'shell', timeout_ms: 1500 });
59
+ // Non-positive / non-integer timeouts are dropped.
60
+ assert.strictEqual(normalizeHookDef({ command: 'x', timeout_ms: 0 }).timeout_ms, undefined);
61
+ assert.strictEqual(normalizeHookDef({ command: 'x', timeout_ms: 1.5 }).timeout_ms, undefined);
62
+ });
63
+
64
+ // ---------------------------------------------------------------------------
65
+ // hookMatches
66
+ // ---------------------------------------------------------------------------
67
+
68
+ test('hookMatches: no matcher / * matches everything', () => {
69
+ assert.ok(hookMatches({}, 'shell'));
70
+ assert.ok(hookMatches({ matcher: '*' }, 'anything'));
71
+ });
72
+
73
+ test('hookMatches: exact, pipe-list, and regex', () => {
74
+ assert.ok(hookMatches({ matcher: 'shell' }, 'shell'));
75
+ assert.ok(!hookMatches({ matcher: 'shell' }, 'read'));
76
+ assert.ok(hookMatches({ matcher: 'shell|exec' }, 'exec'));
77
+ assert.ok(hookMatches({ matcher: 'mcp__.*' }, 'mcp__fs__read'));
78
+ assert.ok(!hookMatches({ matcher: 'mcp__.*' }, 'shell'));
79
+ // Anchored: a partial name does not match.
80
+ assert.ok(!hookMatches({ matcher: 'read' }, 'read_file'));
81
+ });
82
+
83
+ test('wrapUntrusted fences text in the shared delimiter', () => {
84
+ const w = wrapUntrusted('payload', '[hook X]');
85
+ assert.match(w, /<<<UNTRUSTED_EXTERNAL_CONTENT/);
86
+ assert.match(w, /<<<END_UNTRUSTED_EXTERNAL_CONTENT>>>/);
87
+ assert.match(w, /payload/);
88
+ assert.match(w, /\[hook X\]/);
89
+ });
90
+
91
+ // ---------------------------------------------------------------------------
92
+ // createHookRunner — dispatch with an injected spawn
93
+ // ---------------------------------------------------------------------------
94
+
95
+ // A spawn stub: maps a command string → a spawnSync-shaped result.
96
+ function fakeSpawn(map) {
97
+ const calls = [];
98
+ const fn = (command, opts) => {
99
+ calls.push({ command, opts });
100
+ const r = typeof map === 'function' ? map(command, opts) : map[command];
101
+ return r || { status: 0, stdout: '', stderr: '' };
102
+ };
103
+ fn.calls = calls;
104
+ return fn;
105
+ }
106
+
107
+ test('PreToolUse: non-zero exit BLOCKS and surfaces the reason', async () => {
108
+ const getConfig = () => ({ hooks: { PreToolUse: [{ type: 'command', command: 'guard' }] } });
109
+ const spawn = fakeSpawn({ guard: { status: 1, stdout: 'not allowed to touch prod', stderr: '' } });
110
+ const runner = createHookRunner({ getConfig, spawn });
111
+ const r = await runner.run('PreToolUse', { tool: 'shell', input: { command: 'deploy' } });
112
+ assert.strictEqual(r.blocked, true);
113
+ assert.match(r.blockReason, /not allowed to touch prod/);
114
+ assert.strictEqual(r.feedback.length, 0, 'a blocking hook does not also emit feedback');
115
+ });
116
+
117
+ test('PreToolUse: exit 0 ALLOWS, and stdout is surfaced as untrusted feedback', async () => {
118
+ const getConfig = () => ({ hooks: { PreToolUse: [{ type: 'command', command: 'note' }] } });
119
+ const spawn = fakeSpawn({ note: { status: 0, stdout: 'fyi: linting first', stderr: '' } });
120
+ const runner = createHookRunner({ getConfig, spawn });
121
+ const r = await runner.run('PreToolUse', { tool: 'shell' });
122
+ assert.strictEqual(r.blocked, false);
123
+ assert.strictEqual(r.feedback.length, 1);
124
+ assert.match(r.feedback[0], /fyi: linting first/);
125
+ assert.match(r.feedback[0], /UNTRUSTED_EXTERNAL_CONTENT/);
126
+ });
127
+
128
+ test('matcher filters which hooks run for a tool event', async () => {
129
+ const getConfig = () => ({ hooks: { PreToolUse: [
130
+ { type: 'command', command: 'only-shell', matcher: 'shell' },
131
+ ] } });
132
+ const spawn = fakeSpawn({ 'only-shell': { status: 1, stdout: 'blocked', stderr: '' } });
133
+ const runner = createHookRunner({ getConfig, spawn });
134
+
135
+ const blocked = await runner.run('PreToolUse', { tool: 'shell' });
136
+ assert.strictEqual(blocked.blocked, true);
137
+
138
+ const other = await runner.run('PreToolUse', { tool: 'read' });
139
+ assert.strictEqual(other.blocked, false, 'non-matching tool is unaffected');
140
+ assert.strictEqual(spawn.calls.length, 1, 'the hook only ran for the matching tool');
141
+ });
142
+
143
+ test('deny-listed hook command is NOT run (skipped, contained)', async () => {
144
+ const getConfig = () => ({ hooks: { PreToolUse: [{ type: 'command', command: 'rm -rf /' }] } });
145
+ const spawn = fakeSpawn(() => { throw new Error('spawn must not be called for a denied hook'); });
146
+ const logs = [];
147
+ const runner = createHookRunner({ getConfig, spawn, log: (m) => logs.push(m) });
148
+ const r = await runner.run('PreToolUse', { tool: 'shell' });
149
+ assert.strictEqual(spawn.calls.length, 0, 'deny-listed command never spawned');
150
+ assert.strictEqual(r.blocked, false, 'a denied hook does not block the tool');
151
+ assert.strictEqual(r.ran[0].denied && typeof r.ran[0].denied, 'string');
152
+ assert.ok(logs.some((l) => /deny-list/i.test(l)));
153
+ });
154
+
155
+ test('timeout is contained: not a block, not feedback, just logged', async () => {
156
+ const getConfig = () => ({ hooks: { PreToolUse: [{ type: 'command', command: 'slow', timeout_ms: 10 }] } });
157
+ // spawnSync surfaces a timeout via error.code ETIMEDOUT + signal SIGTERM.
158
+ const spawn = fakeSpawn({ slow: { status: null, signal: 'SIGTERM', stdout: '', stderr: '', error: { code: 'ETIMEDOUT', message: 'spawnSync timed out' } } });
159
+ const logs = [];
160
+ const runner = createHookRunner({ getConfig, spawn, log: (m) => logs.push(m) });
161
+ const r = await runner.run('PreToolUse', { tool: 'shell' });
162
+ assert.strictEqual(r.blocked, false, 'a timed-out PreToolUse hook does not block');
163
+ assert.strictEqual(r.feedback.length, 0);
164
+ assert.strictEqual(r.ran[0].timedOut, true);
165
+ assert.ok(logs.some((l) => /timed out/i.test(l)));
166
+ });
167
+
168
+ test('a spawn that throws is contained (no crash, recorded as failed)', async () => {
169
+ const getConfig = () => ({ hooks: { PostToolUse: [{ type: 'command', command: 'boom' }] } });
170
+ const spawn = fakeSpawn(() => { throw new Error('kaboom'); });
171
+ const runner = createHookRunner({ getConfig, spawn });
172
+ const r = await runner.run('PostToolUse', { tool: 'shell', result: 'x' });
173
+ assert.strictEqual(r.blocked, false);
174
+ assert.strictEqual(r.ran[0].ok, false);
175
+ assert.match(r.ran[0].error, /kaboom/);
176
+ });
177
+
178
+ test('prompt hook injects its text as untrusted feedback (no shell)', async () => {
179
+ const getConfig = () => ({ hooks: { UserPromptSubmit: [{ type: 'prompt', prompt: 'Follow the style guide.' }] } });
180
+ const spawn = fakeSpawn(() => { throw new Error('prompt hooks never spawn'); });
181
+ const runner = createHookRunner({ getConfig, spawn });
182
+ const r = await runner.run('UserPromptSubmit', { prompt: 'do a thing' });
183
+ assert.strictEqual(spawn.calls.length, 0);
184
+ assert.strictEqual(r.feedback.length, 1);
185
+ assert.match(r.feedback[0], /Follow the style guide/);
186
+ });
187
+
188
+ test('payload reaches the hook via env vars and stdin', async () => {
189
+ const getConfig = () => ({ hooks: { PostToolUse: [{ type: 'command', command: 'capture' }] } });
190
+ let seen = null;
191
+ const spawn = fakeSpawn((command, opts) => { seen = opts; return { status: 0, stdout: '', stderr: '' }; });
192
+ const runner = createHookRunner({ getConfig, spawn });
193
+ await runner.run('PostToolUse', { tool: 'read', input: { path: '/a' }, result: 'contents' });
194
+ assert.strictEqual(seen.env.SEMALT_HOOK_EVENT, 'PostToolUse');
195
+ assert.strictEqual(seen.env.SEMALT_TOOL_NAME, 'read');
196
+ assert.strictEqual(seen.env.SEMALT_TOOL_INPUT, JSON.stringify({ path: '/a' }));
197
+ assert.strictEqual(seen.env.SEMALT_TOOL_RESULT, 'contents');
198
+ const stdin = JSON.parse(seen.input);
199
+ assert.strictEqual(stdin.event, 'PostToolUse');
200
+ assert.strictEqual(stdin.tool, 'read');
201
+ });
202
+
203
+ test('unknown event name is a no-op', async () => {
204
+ const getConfig = () => ({ hooks: {} });
205
+ const runner = createHookRunner({ getConfig, spawn: fakeSpawn({}) });
206
+ const r = await runner.run('NotARealEvent', {});
207
+ assert.deepStrictEqual(r.feedback, []);
208
+ assert.strictEqual(r.blocked, false);
209
+ });
210
+
211
+ test('a getConfig that throws is contained (no hooks, no crash)', async () => {
212
+ const runner = createHookRunner({ getConfig: () => { throw new Error('boom'); }, spawn: fakeSpawn({}) });
213
+ const r = await runner.run('PreToolUse', { tool: 'shell' });
214
+ assert.strictEqual(r.blocked, false);
215
+ assert.deepStrictEqual(r.ran, []);
216
+ });
@@ -0,0 +1,142 @@
1
+ 'use strict';
2
+
3
+ // http_get / download User-Agent (Task W.3 Part 2). The fetch tools must send a
4
+ // fixed, realistic browser User-Agent so sites that reject empty/curl-like UAs
5
+ // (Wikipedia 403, the Guardian 406) are less likely to block. It is:
6
+ // - operator-overridable via config.web.user_agent,
7
+ // - NOT model-selectable (no UA parameter in the tool spec the model sees),
8
+ // - applied uniformly to both http_get and download.
9
+ //
10
+ // Home-based paths are redirected to a temp dir BEFORE any lib module loads so
11
+ // the secret-file/config guards resolve against the temp config path.
12
+
13
+ const os = require('node:os');
14
+ const fs = require('node:fs');
15
+ const path = require('node:path');
16
+
17
+ const TMP_HOME = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-ua-home-'));
18
+ const PREV_HOME = process.env.HOME;
19
+ const PREV_USERPROFILE = process.env.USERPROFILE;
20
+ process.env.HOME = TMP_HOME;
21
+ process.env.USERPROFILE = TMP_HOME;
22
+
23
+ const { test, before, after } = require('node:test');
24
+ const assert = require('node:assert');
25
+ const http = require('node:http');
26
+
27
+ const ui = require('../lib/ui');
28
+ const { createPermissionManager } = require('../lib/permissions');
29
+ const { createToolExecutor } = require('../lib/tools');
30
+ const { DEFAULT_USER_AGENT } = require('../lib/constants');
31
+ const { normalizeConfig } = require('../lib/config');
32
+ const { TOOL_SPECS } = require('../lib/tool_specs');
33
+
34
+ let CWD;
35
+ let PREV_CWD;
36
+ let server;
37
+ let baseUrl;
38
+ let lastUserAgent; // captured from the most recent inbound request
39
+
40
+ function mkExec({ web } = {}) {
41
+ const pm = createPermissionManager(ui, {});
42
+ const getConfig = () => ({
43
+ max_file_size_kb: 512,
44
+ command_timeout_ms: 30000,
45
+ http_fetch_max_bytes: 262144,
46
+ download_max_bytes: 1048576,
47
+ // summarize off → predictable pass-through, no summarizer needed.
48
+ web: { summarize: false, summary_model: '', max_content_tokens: 6000, ...(web || {}) },
49
+ });
50
+ return createToolExecutor(pm, ui, getConfig, {});
51
+ }
52
+
53
+ before(async () => {
54
+ PREV_CWD = process.cwd();
55
+ CWD = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-ua-cwd-'));
56
+ process.chdir(CWD);
57
+ server = http.createServer((req, res) => {
58
+ lastUserAgent = req.headers['user-agent'];
59
+ res.writeHead(200, { 'Content-Type': 'text/plain' });
60
+ res.end('hello body');
61
+ });
62
+ await new Promise((r) => server.listen(0, '127.0.0.1', r));
63
+ baseUrl = `http://127.0.0.1:${server.address().port}`;
64
+ });
65
+
66
+ after(async () => {
67
+ await new Promise((r) => server.close(r));
68
+ process.chdir(PREV_CWD);
69
+ if (PREV_HOME === undefined) delete process.env.HOME; else process.env.HOME = PREV_HOME;
70
+ if (PREV_USERPROFILE === undefined) delete process.env.USERPROFILE; else process.env.USERPROFILE = PREV_USERPROFILE;
71
+ });
72
+
73
+ // ---------------------------------------------------------------------------
74
+ // http_get sends the default / configured UA
75
+ // ---------------------------------------------------------------------------
76
+
77
+ test('http_get: sends the default browser User-Agent', async () => {
78
+ const exec = mkExec();
79
+ lastUserAgent = undefined;
80
+ const r = await exec.agentExecFile('http_get', `${baseUrl}/page`, {}, { signal: null });
81
+ assert.ok(!r.error, `valid URL should not error: ${r.error}`);
82
+ assert.strictEqual(lastUserAgent, DEFAULT_USER_AGENT);
83
+ // The default looks like a real browser, not curl / empty.
84
+ assert.match(lastUserAgent, /Mozilla\/5\.0/);
85
+ assert.match(lastUserAgent, /Chrome\/\d+/);
86
+ });
87
+
88
+ test('http_get: a config web.user_agent override is honored', async () => {
89
+ const exec = mkExec({ web: { user_agent: 'AcmeCorpBot/2.0 (+https://acme.example/bot)' } });
90
+ lastUserAgent = undefined;
91
+ await exec.agentExecFile('http_get', `${baseUrl}/page`, {}, { signal: null });
92
+ assert.strictEqual(lastUserAgent, 'AcmeCorpBot/2.0 (+https://acme.example/bot)');
93
+ });
94
+
95
+ // ---------------------------------------------------------------------------
96
+ // download carries the same UA
97
+ // ---------------------------------------------------------------------------
98
+
99
+ test('download: sends the default browser User-Agent', async () => {
100
+ const exec = mkExec();
101
+ lastUserAgent = undefined;
102
+ const r = await exec.agentExecFile('download', `${baseUrl}/file.txt`, 'file.txt');
103
+ assert.strictEqual(r.status, 'ok');
104
+ assert.strictEqual(lastUserAgent, DEFAULT_USER_AGENT);
105
+ });
106
+
107
+ test('download: a config web.user_agent override is honored', async () => {
108
+ const exec = mkExec({ web: { user_agent: 'AcmeCorpBot/2.0' } });
109
+ lastUserAgent = undefined;
110
+ await exec.agentExecFile('download', `${baseUrl}/file2.txt`, 'file2.txt');
111
+ assert.strictEqual(lastUserAgent, 'AcmeCorpBot/2.0');
112
+ });
113
+
114
+ // ---------------------------------------------------------------------------
115
+ // The UA is operator-only — never exposed to the model
116
+ // ---------------------------------------------------------------------------
117
+
118
+ test('the tool spec exposes NO user-agent parameter (not model-selectable)', () => {
119
+ for (const tool of ['http_get', 'download']) {
120
+ const spec = TOOL_SPECS[tool];
121
+ const props = (spec && spec.parameters && spec.parameters.properties) || {};
122
+ for (const key of Object.keys(props)) {
123
+ assert.ok(!/user.?agent|^ua$|headers?/i.test(key),
124
+ `${tool} spec must not expose a UA/header parameter, found "${key}"`);
125
+ }
126
+ // Belt-and-suspenders: the whole spec JSON never mentions a user-agent knob.
127
+ assert.ok(!/user.?agent/i.test(JSON.stringify(spec)),
128
+ `${tool} spec text must not mention user-agent`);
129
+ }
130
+ });
131
+
132
+ // ---------------------------------------------------------------------------
133
+ // Normalization: config.web.user_agent defaults to the fixed UA
134
+ // ---------------------------------------------------------------------------
135
+
136
+ test('config normalization: web.user_agent defaults to DEFAULT_USER_AGENT, override trims', () => {
137
+ assert.strictEqual(normalizeConfig({}).web.user_agent, DEFAULT_USER_AGENT);
138
+ assert.strictEqual(normalizeConfig({ web: {} }).web.user_agent, DEFAULT_USER_AGENT);
139
+ assert.strictEqual(normalizeConfig({ web: { user_agent: ' CustomUA/1 ' } }).web.user_agent, 'CustomUA/1');
140
+ // An empty/whitespace override falls back to the default, not ''.
141
+ assert.strictEqual(normalizeConfig({ web: { user_agent: ' ' } }).web.user_agent, DEFAULT_USER_AGENT);
142
+ });
@@ -0,0 +1,208 @@
1
+ 'use strict';
2
+
3
+ // Integration tests for multimodal image input on the wire (Task 5.4). They
4
+ // drive the REAL api client / SDK against the mock-LLM harness and assert the
5
+ // PROVIDER-SPECIFIC content-part shape that actually leaves the client, the
6
+ // fail-loud vision check (image is NEVER silently dropped), and that the
7
+ // headless/SDK surface accepts images with the rest of the loop unaffected.
8
+
9
+ const { test } = require('node:test');
10
+ const assert = require('node:assert');
11
+ const fs = require('fs');
12
+ const os = require('os');
13
+ const path = require('path');
14
+
15
+ const { createApiClient } = require('../lib/api');
16
+ const { createAgent } = require('../lib/sdk');
17
+ const ui = require('../lib/ui');
18
+ const { normalizeConfig } = require('../lib/config');
19
+ const { startMockLLM } = require('./harness/mock-llm');
20
+
21
+ const PNG_BUF = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0, 0, 0, 13]);
22
+ const IMG = { media_type: 'image/png', data: PNG_BUF.toString('base64') };
23
+
24
+ function clientFor(mock, cfgOverride = {}) {
25
+ let config = normalizeConfig({ api_base: mock.base, api_key: 'k', default_model: 'gpt-4o', ...cfgOverride });
26
+ const getConfig = () => config;
27
+ const saveConfig = (c) => { config = normalizeConfig(c); };
28
+ return createApiClient({ getConfig, saveConfig, ui });
29
+ }
30
+
31
+ function lastRequestMessages(mock) {
32
+ const req = mock.requests[mock.requests.length - 1];
33
+ return JSON.parse(req.body).messages;
34
+ }
35
+
36
+ // ── provider-specific content-part shape (constraint #1) ─────────────────────
37
+
38
+ test('OpenAI-style image_url is sent for an OpenAI-compatible vision model', async () => {
39
+ const mock = await startMockLLM();
40
+ mock.replyWith('It is a diagram.');
41
+ const client = clientFor(mock); // default model gpt-4o → vision via heuristic
42
+ try {
43
+ await client.chatStream(
44
+ [{ role: 'user', content: 'what is this', images: [IMG] }],
45
+ { model: 'gpt-4o', silent: true },
46
+ );
47
+ const msgs = lastRequestMessages(mock);
48
+ const user = msgs.find((m) => m.role === 'user');
49
+ assert.ok(Array.isArray(user.content), 'content is a multimodal array');
50
+ const textPart = user.content.find((p) => p.type === 'text');
51
+ const imgPart = user.content.find((p) => p.type === 'image_url');
52
+ assert.strictEqual(textPart.text, 'what is this');
53
+ assert.ok(imgPart, 'image_url part present');
54
+ assert.strictEqual(imgPart.image_url.url, `data:image/png;base64,${IMG.data}`);
55
+ assert.strictEqual(user.images, undefined, 'internal images field not on the wire');
56
+ } finally {
57
+ await mock.close();
58
+ }
59
+ });
60
+
61
+ test('Anthropic-style image source block is sent for an anthropic-format profile', async () => {
62
+ const mock = await startMockLLM();
63
+ mock.replyWith('ack');
64
+ const client = clientFor(mock, {
65
+ default_model: 'claude-vision',
66
+ models: [{ api_base: mock.base, api_key: 'k', model: 'claude-vision', image_format: 'anthropic', vision: true }],
67
+ });
68
+ try {
69
+ await client.chatStream(
70
+ [{ role: 'user', content: 'describe', images: [IMG] }],
71
+ { model: 'claude-vision', silent: true },
72
+ );
73
+ const msgs = lastRequestMessages(mock);
74
+ const user = msgs.find((m) => m.role === 'user');
75
+ const imgPart = user.content.find((p) => p.type === 'image');
76
+ assert.ok(imgPart, 'anthropic image part present');
77
+ assert.deepStrictEqual(imgPart.source, { type: 'base64', media_type: 'image/png', data: IMG.data });
78
+ assert.ok(!user.content.some((p) => p.type === 'image_url'), 'no OpenAI-style part');
79
+ } finally {
80
+ await mock.close();
81
+ }
82
+ });
83
+
84
+ // ── vision capability: fail loud, NEVER silently drop (constraint #2) ─────────
85
+
86
+ test('text-only model → clear error, image NOT sent (no silent drop)', async () => {
87
+ const mock = await startMockLLM();
88
+ // Deliberately queue NOTHING — a request would 500. We assert none is made.
89
+ const client = clientFor(mock, {
90
+ default_model: 'text-only',
91
+ models: [{ api_base: mock.base, api_key: 'k', model: 'text-only', vision: false }],
92
+ });
93
+ try {
94
+ await assert.rejects(
95
+ () => client.chatStream(
96
+ [{ role: 'user', content: 'see this', images: [IMG] }],
97
+ { model: 'text-only', silent: true },
98
+ ),
99
+ /not vision-capable/i,
100
+ );
101
+ assert.strictEqual(mock.requestCount(), 0, 'no request left the client — image not silently dropped');
102
+ } finally {
103
+ await mock.close();
104
+ }
105
+ });
106
+
107
+ test('paired positive: a vision model accepts the same image', async () => {
108
+ const mock = await startMockLLM();
109
+ mock.replyWith('I see a cat.');
110
+ const client = clientFor(mock, {
111
+ default_model: 'vision-ok',
112
+ models: [{ api_base: mock.base, api_key: 'k', model: 'vision-ok', vision: true }],
113
+ });
114
+ try {
115
+ const res = await client.chatStream(
116
+ [{ role: 'user', content: 'see this', images: [IMG] }],
117
+ { model: 'vision-ok', silent: true },
118
+ );
119
+ assert.match(res.content, /cat/);
120
+ assert.strictEqual(mock.requestCount(), 1);
121
+ } finally {
122
+ await mock.close();
123
+ }
124
+ });
125
+
126
+ // ── the rest of the loop is unaffected when images are present ───────────────
127
+
128
+ test('a plain text turn (no images) still sends string content', async () => {
129
+ const mock = await startMockLLM();
130
+ mock.replyWith('hi');
131
+ const client = clientFor(mock);
132
+ try {
133
+ await client.chatStream([{ role: 'user', content: 'hello' }], { model: 'gpt-4o', silent: true });
134
+ const msgs = lastRequestMessages(mock);
135
+ const user = msgs.find((m) => m.role === 'user');
136
+ assert.strictEqual(user.content, 'hello', 'string content unchanged without images');
137
+ } finally {
138
+ await mock.close();
139
+ }
140
+ });
141
+
142
+ // ── SDK / headless surface accepts images, envelope unaffected ───────────────
143
+
144
+ test('SDK run({ images }) reads a real file, attaches it, returns the envelope', async () => {
145
+ const prevKey = process.env.SEMALT_API_KEY;
146
+ process.env.SEMALT_API_KEY = 'test-key';
147
+ const prevCwd = process.cwd();
148
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'images-sdk-'));
149
+ process.chdir(tmpDir);
150
+ const imgPath = path.join(tmpDir, 'shot.png');
151
+ fs.writeFileSync(imgPath, PNG_BUF);
152
+
153
+ const mock = await startMockLLM();
154
+ mock.replyWith('A screenshot of a form.');
155
+ const agent = createAgent({
156
+ apiBase: mock.base,
157
+ apiKey: 'test-key',
158
+ model: 'gpt-4o',
159
+ sandbox: { mode: 'off' },
160
+ });
161
+ try {
162
+ const res = await agent.run('what is in this screenshot', { images: [imgPath] });
163
+ assert.match(res.result, /screenshot/i);
164
+ assert.ok(Array.isArray(res.toolCalls));
165
+ assert.strictEqual(res.stopReason, 'end_turn');
166
+ // The image actually rode the request as an OpenAI-style part.
167
+ const msgs = lastRequestMessages(mock);
168
+ const user = msgs.find((m) => Array.isArray(m.content) && m.content.some((p) => p.type === 'image_url'));
169
+ assert.ok(user, 'image_url part sent from a real file path');
170
+ } finally {
171
+ await agent.close();
172
+ await mock.close();
173
+ process.chdir(prevCwd);
174
+ if (prevKey === undefined) delete process.env.SEMALT_API_KEY;
175
+ else process.env.SEMALT_API_KEY = prevKey;
176
+ try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
177
+ }
178
+ });
179
+
180
+ test('SDK run({ images }) refuses an out-of-CWD path via isPathSafe', async () => {
181
+ const prevKey = process.env.SEMALT_API_KEY;
182
+ process.env.SEMALT_API_KEY = 'test-key';
183
+ const prevCwd = process.cwd();
184
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'images-sdk-cwd-'));
185
+ // Write the image OUTSIDE the CWD we will chdir into.
186
+ const outsideDir = fs.mkdtempSync(path.join(os.tmpdir(), 'images-outside-'));
187
+ const outsidePath = path.join(outsideDir, 'secret.png');
188
+ fs.writeFileSync(outsidePath, PNG_BUF);
189
+ process.chdir(tmpDir);
190
+
191
+ const mock = await startMockLLM();
192
+ const agent = createAgent({ apiBase: mock.base, apiKey: 'test-key', model: 'gpt-4o', sandbox: { mode: 'off' } });
193
+ try {
194
+ await assert.rejects(
195
+ () => agent.run('peek', { images: [outsidePath] }),
196
+ /outside allowed area/i,
197
+ );
198
+ assert.strictEqual(mock.requestCount(), 0, 'refused before any request');
199
+ } finally {
200
+ await agent.close();
201
+ await mock.close();
202
+ process.chdir(prevCwd);
203
+ if (prevKey === undefined) delete process.env.SEMALT_API_KEY;
204
+ else process.env.SEMALT_API_KEY = prevKey;
205
+ try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
206
+ try { fs.rmSync(outsideDir, { recursive: true, force: true }); } catch {}
207
+ }
208
+ });