@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/ARCHITECTURE.md +6 -95
  4. package/CLAUDE.md +196 -316
  5. package/README.md +148 -4
  6. package/docs/ARCHITECTURE.md +1321 -0
  7. package/docs/CONFIG.md +340 -0
  8. package/docs/HISTORY.md +245 -0
  9. package/examples/embed.js +74 -0
  10. package/index.js +251 -10
  11. package/lib/agent.js +856 -120
  12. package/lib/api.js +239 -50
  13. package/lib/args.js +74 -2
  14. package/lib/audit.js +23 -1
  15. package/lib/background.js +584 -0
  16. package/lib/checkpoints.js +757 -0
  17. package/lib/commands/auth.js +94 -0
  18. package/lib/commands/chat-session.js +489 -0
  19. package/lib/commands/chat-slash.js +415 -0
  20. package/lib/commands/chat-turn.js +669 -0
  21. package/lib/commands/chat.js +407 -0
  22. package/lib/commands/custom.js +157 -0
  23. package/lib/commands/history-utils.js +66 -0
  24. package/lib/commands/index.js +268 -0
  25. package/lib/commands/mcp.js +113 -0
  26. package/lib/commands/oneshot.js +193 -0
  27. package/lib/commands/registry.js +269 -0
  28. package/lib/commands/tasks.js +89 -0
  29. package/lib/compact.js +87 -0
  30. package/lib/config.js +360 -11
  31. package/lib/constants.js +401 -3
  32. package/lib/deny.js +199 -0
  33. package/lib/doctor.js +160 -0
  34. package/lib/headless.js +202 -0
  35. package/lib/hooks.js +286 -0
  36. package/lib/images.js +270 -0
  37. package/lib/internals.js +49 -0
  38. package/lib/mcp/boundary.js +131 -0
  39. package/lib/mcp/client.js +270 -0
  40. package/lib/mcp/oauth.js +134 -0
  41. package/lib/memory.js +209 -0
  42. package/lib/metrics.js +37 -2
  43. package/lib/payload.js +54 -0
  44. package/lib/permission-rules.js +401 -0
  45. package/lib/permissions.js +123 -26
  46. package/lib/pricing.js +67 -0
  47. package/lib/proc.js +62 -0
  48. package/lib/prompts.js +99 -8
  49. package/lib/sandbox.js +568 -0
  50. package/lib/sdk.js +328 -0
  51. package/lib/secrets.js +211 -0
  52. package/lib/skills.js +223 -0
  53. package/lib/subagents.js +516 -0
  54. package/lib/tool_registry.js +2862 -0
  55. package/lib/tool_specs.js +263 -9
  56. package/lib/tools.js +352 -1039
  57. package/lib/ui/anim.js +86 -0
  58. package/lib/ui/ansi.js +17 -27
  59. package/lib/ui/chat-history.js +253 -71
  60. package/lib/ui/create-ui.js +67 -24
  61. package/lib/ui/diff.js +90 -25
  62. package/lib/ui/file-activity.js +236 -0
  63. package/lib/ui/format.js +195 -29
  64. package/lib/ui/input-field.js +21 -11
  65. package/lib/ui/md-stream.js +234 -0
  66. package/lib/ui/render-operation.js +113 -0
  67. package/lib/ui/select.js +1 -4
  68. package/lib/ui/status-bar.js +146 -36
  69. package/lib/ui/stream.js +20 -13
  70. package/lib/ui/theme.js +190 -44
  71. package/lib/ui/tool-operation.js +190 -0
  72. package/lib/ui/utils.js +9 -5
  73. package/lib/ui/web-activity.js +270 -0
  74. package/lib/ui/writer.js +159 -45
  75. package/lib/ui.js +1 -1
  76. package/lib/verify.js +229 -0
  77. package/lib/web-extract.js +213 -0
  78. package/lib/web-summarize.js +68 -0
  79. package/package.json +19 -4
  80. package/scripts/lint.js +57 -0
  81. package/test/agent-loop.test.js +389 -0
  82. package/test/anim-driver.test.js +153 -0
  83. package/test/ask-user-display.test.js +226 -0
  84. package/test/ask-user-gate.test.js +231 -0
  85. package/test/background.test.js +414 -0
  86. package/test/chat-history-nocolor.test.js +155 -0
  87. package/test/chat-relogin.test.js +207 -0
  88. package/test/chat.test.js +114 -0
  89. package/test/checkpoints-agent.test.js +181 -0
  90. package/test/checkpoints.test.js +650 -0
  91. package/test/command-registry.test.js +160 -0
  92. package/test/compact.test.js +116 -0
  93. package/test/completion-lazy.test.js +52 -0
  94. package/test/config-merge.test.js +324 -0
  95. package/test/config-quarantine.test.js +128 -0
  96. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  97. package/test/config-write-guard-skip.test.js +46 -0
  98. package/test/config-write-guard.test.js +153 -0
  99. package/test/context-split.test.js +215 -0
  100. package/test/cost-doctor.test.js +142 -0
  101. package/test/custom-commands-chat.test.js +106 -0
  102. package/test/custom-commands.test.js +230 -0
  103. package/test/defer-detail-band.test.js +403 -0
  104. package/test/deny-windows.test.js +120 -0
  105. package/test/deny.test.js +83 -0
  106. package/test/detail-band-tab-flatten.test.js +242 -0
  107. package/test/download-allow-anywhere.test.js +66 -0
  108. package/test/download-confine.test.js +153 -0
  109. package/test/exec-diff.test.js +268 -0
  110. package/test/executors.test.js +599 -0
  111. package/test/extract-tool-calls.test.js +349 -0
  112. package/test/fetch-url-validation.test.js +219 -0
  113. package/test/file-activity.test.js +522 -0
  114. package/test/fixtures/tool-calls.js +57 -0
  115. package/test/fixtures/web-page.js +91 -0
  116. package/test/git-tools.test.js +384 -0
  117. package/test/grep-glob-serialize.test.js +242 -0
  118. package/test/grep-glob.test.js +268 -0
  119. package/test/grep-path-target.test.js +227 -0
  120. package/test/harness/README.md +57 -0
  121. package/test/harness/chat-harness.js +143 -0
  122. package/test/harness/memwarn-headless-child.js +65 -0
  123. package/test/harness/mock-llm.js +120 -0
  124. package/test/harness/mock-mcp-server.js +142 -0
  125. package/test/harness/sse-server.js +69 -0
  126. package/test/headless.test.js +348 -0
  127. package/test/history-utils.test.js +88 -0
  128. package/test/hooks-agent.test.js +238 -0
  129. package/test/hooks-verify-sandbox.test.js +232 -0
  130. package/test/hooks.test.js +216 -0
  131. package/test/http-get-user-agent.test.js +142 -0
  132. package/test/images-api.test.js +208 -0
  133. package/test/images.test.js +238 -0
  134. package/test/input-field-ctrl-o.test.js +37 -0
  135. package/test/live-height-physical.test.js +281 -0
  136. package/test/max-iterations.test.js +218 -0
  137. package/test/mcp-boundary.test.js +57 -0
  138. package/test/mcp-client.test.js +267 -0
  139. package/test/mcp-oauth.test.js +86 -0
  140. package/test/md-stream.test.js +183 -0
  141. package/test/memory-truncation-warning.test.js +222 -0
  142. package/test/memory.test.js +198 -0
  143. package/test/native-dispatch.test.js +409 -0
  144. package/test/native-live-narration.test.js +254 -0
  145. package/test/output-chokepoint.test.js +188 -0
  146. package/test/output-heredoc-leak.test.js +195 -0
  147. package/test/output-preview.test.js +245 -0
  148. package/test/path-guards.test.js +134 -0
  149. package/test/payload.test.js +99 -0
  150. package/test/permission-rules-agent.test.js +210 -0
  151. package/test/permission-rules.test.js +297 -0
  152. package/test/permissions.test.js +362 -0
  153. package/test/plan-mode.test.js +167 -0
  154. package/test/read-paginate.test.js +275 -0
  155. package/test/readonly-tools.test.js +177 -0
  156. package/test/render-operation.test.js +317 -0
  157. package/test/replay-descriptor-xml.test.js +216 -0
  158. package/test/replay-descriptor.test.js +189 -0
  159. package/test/replay-web-aggregate.test.js +291 -0
  160. package/test/replay-web-persist.test.js +241 -0
  161. package/test/result-cap.test.js +233 -0
  162. package/test/running-glyph-anim.test.js +111 -0
  163. package/test/sandbox-agent.test.js +147 -0
  164. package/test/sandbox-integration.test.js +216 -0
  165. package/test/sandbox.test.js +408 -0
  166. package/test/sdk.test.js +234 -0
  167. package/test/shell-output-cap.test.js +181 -0
  168. package/test/skills-chat.test.js +110 -0
  169. package/test/skills.test.js +295 -0
  170. package/test/smoke.test.js +68 -0
  171. package/test/status-bar-driver.test.js +93 -0
  172. package/test/status-bar-pause.test.js +164 -0
  173. package/test/status-bar-resync.test.js +188 -0
  174. package/test/stream-parser.test.js +171 -0
  175. package/test/subagents-agent.test.js +178 -0
  176. package/test/subagents.test.js +222 -0
  177. package/test/theme-palette.test.js +166 -0
  178. package/test/tool-registry.test.js +85 -0
  179. package/test/trim-budget.test.js +101 -0
  180. package/test/truncate-visible.test.js +78 -0
  181. package/test/verify-agent.test.js +317 -0
  182. package/test/verify.test.js +141 -0
  183. package/test/view-image.test.js +199 -0
  184. package/test/web-activity-ordering.test.js +203 -0
  185. package/test/web-activity.test.js +207 -0
  186. package/test/web-data-extraction-guidance.test.js +71 -0
  187. package/test/web-extract.test.js +185 -0
  188. package/test/web-fetch-agent.test.js +291 -0
  189. package/test/web-fetch-mode.test.js +193 -0
  190. package/test/web-search.test.js +380 -0
  191. package/lib/commands.js +0 -1438
  192. package/path +0 -1
@@ -0,0 +1,408 @@
1
+ 'use strict';
2
+
3
+ // Unit tests for the OS sandbox layer (Task 4.4). These cover the PURE pieces —
4
+ // config normalization, platform detection (with injected tooling so they run on
5
+ // any OS), policy/argv generation, command wrapping, the config×detection
6
+ // decision, and the fallback rules — without requiring a real bwrap/sandbox-exec
7
+ // on the runner. Kernel-level enforcement is exercised separately in
8
+ // test/sandbox-integration.test.js (which skips gracefully when the primitive is
9
+ // absent, mirroring the ripgrep-parity pattern).
10
+
11
+ const { test } = require('node:test');
12
+ const assert = require('node:assert');
13
+ const fs = require('fs');
14
+ const os = require('os');
15
+ const path = require('path');
16
+
17
+ const sandbox = require('../lib/sandbox');
18
+ const {
19
+ normalizeSandbox, protectedPaths, buildSeatbeltPolicy, buildBwrapArgs,
20
+ detectSandbox, _resetSandboxDetection, wrapCommand, decideSandbox, sandboxStatusReport,
21
+ } = sandbox;
22
+
23
+ // ---------------------------------------------------------------------------
24
+ // normalizeSandbox
25
+ // ---------------------------------------------------------------------------
26
+
27
+ test('normalizeSandbox defaults to auto + no hard gate + network on', () => {
28
+ assert.deepStrictEqual(normalizeSandbox(undefined), { mode: 'auto', failIfUnavailable: false, network: 'on' });
29
+ assert.deepStrictEqual(normalizeSandbox({}), { mode: 'auto', failIfUnavailable: false, network: 'on' });
30
+ assert.deepStrictEqual(normalizeSandbox(null), { mode: 'auto', failIfUnavailable: false, network: 'on' });
31
+ assert.deepStrictEqual(normalizeSandbox('off'), { mode: 'auto', failIfUnavailable: false, network: 'on' });
32
+ });
33
+
34
+ test('normalizeSandbox honors off + failIfUnavailable', () => {
35
+ assert.deepStrictEqual(normalizeSandbox({ mode: 'off' }), { mode: 'off', failIfUnavailable: false, network: 'on' });
36
+ assert.deepStrictEqual(normalizeSandbox({ failIfUnavailable: true }), { mode: 'auto', failIfUnavailable: true, network: 'on' });
37
+ assert.deepStrictEqual(normalizeSandbox({ mode: 'off', failIfUnavailable: true }), { mode: 'off', failIfUnavailable: true, network: 'on' });
38
+ });
39
+
40
+ test('normalizeSandbox rejects unknown modes / truthy-but-not-true flags', () => {
41
+ assert.strictEqual(normalizeSandbox({ mode: 'on' }).mode, 'auto');
42
+ assert.strictEqual(normalizeSandbox({ mode: 'disabled' }).mode, 'auto');
43
+ assert.strictEqual(normalizeSandbox({ failIfUnavailable: 'yes' }).failIfUnavailable, false);
44
+ assert.strictEqual(normalizeSandbox({ failIfUnavailable: 1 }).failIfUnavailable, false);
45
+ });
46
+
47
+ // --- network: binary on/off + anti-fail-open (Task 4.4b, CVE-2025-66479 lesson) ---
48
+
49
+ test('normalizeSandbox: network defaults ON only when the key is ABSENT', () => {
50
+ assert.strictEqual(normalizeSandbox({}).network, 'on');
51
+ assert.strictEqual(normalizeSandbox({ mode: 'auto' }).network, 'on');
52
+ assert.strictEqual(normalizeSandbox({ network: 'on' }).network, 'on');
53
+ });
54
+
55
+ test('normalizeSandbox: explicit network off resolves to off', () => {
56
+ assert.strictEqual(normalizeSandbox({ network: 'off' }).network, 'off');
57
+ });
58
+
59
+ test('ANTI-FAIL-OPEN: a PRESENT-but-malformed network value resolves to off (never silently on)', () => {
60
+ // The allowedDomains:[] → "allow all" trap (CVE-2025-66479): once the human has
61
+ // TOUCHED the network key, anything we do not recognize as exactly 'on' must be
62
+ // the SAFE isolated state. The intended-most-restrictive input is the most
63
+ // restrictive outcome.
64
+ for (const bad of ['', 'On', 'ON', 'enabled', 'true', 'yes', '0', 'allow', {}, [], null, false, true, 0, 1]) {
65
+ assert.strictEqual(normalizeSandbox({ network: bad }).network, 'off',
66
+ `network=${JSON.stringify(bad)} must fail toward isolation (off)`);
67
+ }
68
+ });
69
+
70
+ // ---------------------------------------------------------------------------
71
+ // protectedPaths (constraint #2)
72
+ // ---------------------------------------------------------------------------
73
+
74
+ test('protectedPaths includes ~/.semalt-ai, secret dirs, and /etc', () => {
75
+ const home = '/home/tester';
76
+ const ps = protectedPaths({ home });
77
+ // realpathSync may resolve missing paths to themselves (path.resolve fallback).
78
+ assert.ok(ps.includes(path.join(home, '.semalt-ai')), 'must protect the config dir');
79
+ assert.ok(ps.includes(path.join(home, '.ssh')), 'must protect ~/.ssh');
80
+ assert.ok(ps.includes(path.join(home, '.aws')), 'must protect ~/.aws');
81
+ assert.ok(ps.includes(path.join(home, '.gnupg')), 'must protect ~/.gnupg');
82
+ assert.ok(ps.includes('/etc'), 'must protect /etc');
83
+ });
84
+
85
+ test('protectedPaths includes the project .semalt dir for the given cwd (Pre-Task 5.0b)', () => {
86
+ // A real cwd (no .git) so the walk halts at the filesystem root; the .semalt
87
+ // dir directly under cwd must be among the protected paths so a sandboxed
88
+ // shell cannot write .semalt/config.json even though cwd is writable.
89
+ const cwd = fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-sbx-cfg-')));
90
+ const ps = protectedPaths({ home: '/home/tester', cwd });
91
+ assert.ok(ps.includes(path.join(cwd, '.semalt')), 'must protect the project .semalt dir');
92
+ });
93
+
94
+ // ---------------------------------------------------------------------------
95
+ // buildSeatbeltPolicy (macOS)
96
+ // ---------------------------------------------------------------------------
97
+
98
+ test('buildSeatbeltPolicy denies all writes then re-allows working dir, re-denies protected', () => {
99
+ const policy = buildSeatbeltPolicy({
100
+ writableRoots: ['/work/dir'],
101
+ protectedPaths: ['/home/tester/.semalt-ai', '/etc'],
102
+ rootWritable: false,
103
+ });
104
+ assert.match(policy, /\(version 1\)/);
105
+ assert.match(policy, /\(allow default\)/);
106
+ assert.match(policy, /\(deny file-write\* \(subpath "\/"\)\)/);
107
+ assert.match(policy, /\(allow file-write\* \(subpath "\/work\/dir"\)\)/);
108
+ // protected re-denials come AFTER the allow so last-match-wins denies them.
109
+ const denyAll = policy.indexOf('(deny file-write* (subpath "/"))');
110
+ const allowWork = policy.indexOf('(allow file-write* (subpath "/work/dir"))');
111
+ const denySemalt = policy.indexOf('(deny file-write* (subpath "/home/tester/.semalt-ai"))');
112
+ assert.ok(denyAll < allowWork, 'deny-all precedes allow-work');
113
+ assert.ok(allowWork < denySemalt, 'protected re-deny comes last (wins)');
114
+ });
115
+
116
+ test('buildSeatbeltPolicy: network off adds a (deny network*) clause; on does not', () => {
117
+ const off = buildSeatbeltPolicy({ writableRoots: ['/w'], protectedPaths: [], network: 'off' });
118
+ assert.match(off, /\(deny network\*\)/);
119
+ // The network deny must come right after (allow default) so last-match-wins keeps it.
120
+ assert.ok(off.indexOf('(allow default)') < off.indexOf('(deny network*)'));
121
+ const on = buildSeatbeltPolicy({ writableRoots: ['/w'], protectedPaths: [], network: 'on' });
122
+ assert.doesNotMatch(on, /\(deny network\*\)/);
123
+ // Default (omitted) is network on — no deny clause.
124
+ assert.doesNotMatch(buildSeatbeltPolicy({ writableRoots: ['/w'], protectedPaths: [] }), /\(deny network\*\)/);
125
+ });
126
+
127
+ test('buildSeatbeltPolicy with rootWritable skips the blanket deny but still re-denies protected', () => {
128
+ const policy = buildSeatbeltPolicy({
129
+ writableRoots: [],
130
+ protectedPaths: ['/home/tester/.semalt-ai'],
131
+ rootWritable: true,
132
+ });
133
+ assert.doesNotMatch(policy, /\(deny file-write\* \(subpath "\/"\)\)/);
134
+ assert.match(policy, /\(deny file-write\* \(subpath "\/home\/tester\/\.semalt-ai"\)\)/);
135
+ });
136
+
137
+ // ---------------------------------------------------------------------------
138
+ // buildBwrapArgs (Linux)
139
+ // ---------------------------------------------------------------------------
140
+
141
+ test('buildBwrapArgs ro-binds root, fresh /proc, rw working dir, ro protected, chdir', () => {
142
+ const args = buildBwrapArgs({
143
+ writableRoots: ['/work'],
144
+ protectedPaths: ['/prot'],
145
+ rootWritable: false,
146
+ chdir: '/work',
147
+ fsExists: () => true,
148
+ });
149
+ const j = args.join(' ');
150
+ assert.match(j, /--ro-bind \/ \//);
151
+ assert.match(j, /--proc \/proc/); // fresh procfs ⇒ /proc/self/root confined (constraint #3)
152
+ assert.match(j, /--bind \/work \/work/);
153
+ assert.match(j, /--ro-bind \/prot \/prot/);
154
+ assert.match(j, /--chdir \/work/);
155
+ // protected ro-bind must come AFTER the writable bind so it wins on overlap.
156
+ assert.ok(j.indexOf('--bind /work /work') < j.indexOf('--ro-bind /prot /prot'));
157
+ });
158
+
159
+ test('buildBwrapArgs: network off adds --unshare-net; on (and default) does not', () => {
160
+ const off = buildBwrapArgs({ writableRoots: ['/work'], protectedPaths: [], chdir: '/work', fsExists: () => true, network: 'off' });
161
+ assert.ok(off.includes('--unshare-net'), 'no-network jail must unshare the network namespace');
162
+ const on = buildBwrapArgs({ writableRoots: ['/work'], protectedPaths: [], chdir: '/work', fsExists: () => true, network: 'on' });
163
+ assert.ok(!on.includes('--unshare-net'), 'network-on jail keeps the host network');
164
+ const dflt = buildBwrapArgs({ writableRoots: ['/work'], protectedPaths: [], chdir: '/work', fsExists: () => true });
165
+ assert.ok(!dflt.includes('--unshare-net'), 'default is network on');
166
+ });
167
+
168
+ test('buildBwrapArgs rootWritable binds / read-write (for --allow-anywhere) but keeps protected ro', () => {
169
+ const args = buildBwrapArgs({
170
+ writableRoots: [],
171
+ protectedPaths: ['/prot'],
172
+ rootWritable: true,
173
+ chdir: '/work',
174
+ fsExists: () => true,
175
+ });
176
+ const j = args.join(' ');
177
+ assert.match(j, /--bind \/ \//);
178
+ assert.doesNotMatch(j, /--ro-bind \/ \//);
179
+ assert.match(j, /--ro-bind \/prot \/prot/); // protected stays read-only even under allow-anywhere
180
+ });
181
+
182
+ test('buildBwrapArgs skips binds for paths that do not exist', () => {
183
+ const args = buildBwrapArgs({
184
+ writableRoots: ['/missing'],
185
+ protectedPaths: ['/also-missing'],
186
+ rootWritable: false,
187
+ chdir: '/work',
188
+ fsExists: (p) => p === '/', // only root exists
189
+ });
190
+ const j = args.join(' ');
191
+ assert.doesNotMatch(j, /--bind \/missing/);
192
+ assert.doesNotMatch(j, /--ro-bind \/also-missing/);
193
+ });
194
+
195
+ // ---------------------------------------------------------------------------
196
+ // detectSandbox — every platform path, with injected tooling
197
+ // ---------------------------------------------------------------------------
198
+
199
+ test('detectSandbox: macOS uses sandbox-exec when present', () => {
200
+ _resetSandboxDetection();
201
+ const d = detectSandbox({ platform: 'darwin', which: () => '/usr/bin/sandbox-exec', force: true, noCache: true });
202
+ assert.strictEqual(d.supported, true);
203
+ assert.strictEqual(d.tool, 'sandbox-exec');
204
+ assert.strictEqual(d.available, true);
205
+ });
206
+
207
+ test('detectSandbox: Linux with usable bwrap is available', () => {
208
+ _resetSandboxDetection();
209
+ const d = detectSandbox({
210
+ platform: 'linux',
211
+ which: (n) => (n === 'bwrap' ? '/usr/bin/bwrap' : null),
212
+ probe: () => true,
213
+ readFile: () => 'Linux version 6.0',
214
+ force: true, noCache: true,
215
+ });
216
+ assert.strictEqual(d.tool, 'bwrap');
217
+ assert.strictEqual(d.available, true);
218
+ });
219
+
220
+ test('detectSandbox: Linux without bwrap is unavailable with an install hint', () => {
221
+ _resetSandboxDetection();
222
+ const d = detectSandbox({
223
+ platform: 'linux',
224
+ which: () => null,
225
+ readFile: () => 'Linux version 6.0',
226
+ force: true, noCache: true,
227
+ });
228
+ assert.strictEqual(d.available, false);
229
+ assert.match(d.reason, /not found/);
230
+ assert.match(d.installHint, /bubblewrap/);
231
+ });
232
+
233
+ test('detectSandbox: Linux with bwrap present but unusable (probe fails) is unavailable', () => {
234
+ _resetSandboxDetection();
235
+ const d = detectSandbox({
236
+ platform: 'linux',
237
+ which: () => '/usr/bin/bwrap',
238
+ probe: () => false,
239
+ readFile: () => 'Linux version 6.0',
240
+ force: true, noCache: true,
241
+ });
242
+ assert.strictEqual(d.available, false);
243
+ assert.match(d.reason, /could not start a jail|namespaces/);
244
+ });
245
+
246
+ test('detectSandbox: WSL1 is detected and explained even if bwrap is installed', () => {
247
+ _resetSandboxDetection();
248
+ const d = detectSandbox({
249
+ platform: 'linux',
250
+ which: () => '/usr/bin/bwrap',
251
+ probe: () => false,
252
+ readFile: () => 'Linux version 4.4.0-19041-Microsoft (WSL)',
253
+ force: true, noCache: true,
254
+ });
255
+ assert.strictEqual(d.available, false);
256
+ assert.match(d.reason, /WSL1/);
257
+ assert.match(d.installHint, /WSL2/);
258
+ });
259
+
260
+ test('detectSandbox: native Windows is unsupported, not crashing', () => {
261
+ _resetSandboxDetection();
262
+ const d = detectSandbox({ platform: 'win32', force: true, noCache: true });
263
+ assert.strictEqual(d.supported, false);
264
+ assert.strictEqual(d.available, false);
265
+ assert.match(d.reason, /Windows/);
266
+ });
267
+
268
+ test('detectSandbox caches by default and force re-evaluates', () => {
269
+ _resetSandboxDetection();
270
+ let calls = 0;
271
+ const which = () => { calls++; return '/usr/bin/sandbox-exec'; };
272
+ detectSandbox({ platform: 'darwin', which, force: true });
273
+ detectSandbox({ platform: 'darwin', which }); // cached
274
+ assert.strictEqual(calls, 1, 'second call should hit the cache');
275
+ detectSandbox({ platform: 'darwin', which, force: true });
276
+ assert.strictEqual(calls, 2, 'force re-evaluates');
277
+ _resetSandboxDetection();
278
+ });
279
+
280
+ // ---------------------------------------------------------------------------
281
+ // wrapCommand
282
+ // ---------------------------------------------------------------------------
283
+
284
+ test('wrapCommand builds a bwrap argv that runs the command via /bin/sh -c', () => {
285
+ const w = wrapCommand('echo hi', { tool: 'bwrap', cwd: os.tmpdir(), home: '/home/tester' });
286
+ assert.strictEqual(w.file, 'bwrap');
287
+ assert.deepStrictEqual(w.args.slice(-3), ['/bin/sh', '-c', 'echo hi']);
288
+ assert.ok(w.args.includes('--proc'));
289
+ });
290
+
291
+ test('wrapCommand builds a sandbox-exec argv carrying the policy', () => {
292
+ const w = wrapCommand('echo hi', { tool: 'sandbox-exec', cwd: os.tmpdir(), home: '/home/tester' });
293
+ assert.strictEqual(w.file, 'sandbox-exec');
294
+ assert.strictEqual(w.args[0], '-p');
295
+ assert.match(w.args[1], /\(version 1\)/);
296
+ assert.deepStrictEqual(w.args.slice(-3), ['/bin/sh', '-c', 'echo hi']);
297
+ });
298
+
299
+ test('wrapCommand returns null for an unknown tool or empty command', () => {
300
+ assert.strictEqual(wrapCommand('echo hi', { tool: 'nope' }), null);
301
+ assert.strictEqual(wrapCommand('', { tool: 'bwrap' }), null);
302
+ });
303
+
304
+ test('wrapCommand threads the network mode into the bwrap argv', () => {
305
+ const off = wrapCommand('echo hi', { tool: 'bwrap', cwd: os.tmpdir(), home: '/home/tester', network: 'off' });
306
+ assert.ok(off.args.includes('--unshare-net'));
307
+ const on = wrapCommand('echo hi', { tool: 'bwrap', cwd: os.tmpdir(), home: '/home/tester', network: 'on' });
308
+ assert.ok(!on.args.includes('--unshare-net'));
309
+ });
310
+
311
+ test('wrapCommand threads the network mode into the sandbox-exec policy', () => {
312
+ const off = wrapCommand('echo hi', { tool: 'sandbox-exec', cwd: os.tmpdir(), home: '/home/tester', network: 'off' });
313
+ assert.match(off.args[1], /\(deny network\*\)/);
314
+ const on = wrapCommand('echo hi', { tool: 'sandbox-exec', cwd: os.tmpdir(), home: '/home/tester', network: 'on' });
315
+ assert.doesNotMatch(on.args[1], /\(deny network\*\)/);
316
+ });
317
+
318
+ // ---------------------------------------------------------------------------
319
+ // decideSandbox — config × detection
320
+ // ---------------------------------------------------------------------------
321
+
322
+ const detAvail = { platform: 'linux', supported: true, tool: 'bwrap', available: true };
323
+ const detUnavail = { platform: 'linux', supported: true, tool: 'bwrap', available: false, reason: 'no bwrap', installHint: 'install it' };
324
+
325
+ test('decideSandbox: auto + available → on', () => {
326
+ const d = decideSandbox({ getConfig: () => ({ sandbox: { mode: 'auto' } }), detection: detAvail });
327
+ assert.strictEqual(d.status, 'on');
328
+ assert.strictEqual(d.tool, 'bwrap');
329
+ });
330
+
331
+ test('decideSandbox: mode off → off (deliberate human opt-out, even when available)', () => {
332
+ const d = decideSandbox({ getConfig: () => ({ sandbox: { mode: 'off' } }), detection: detAvail });
333
+ assert.strictEqual(d.status, 'off');
334
+ });
335
+
336
+ test('decideSandbox: auto + unavailable → unavailable, carrying failIfUnavailable + hint', () => {
337
+ const d = decideSandbox({ getConfig: () => ({ sandbox: { mode: 'auto', failIfUnavailable: true } }), detection: detUnavail });
338
+ assert.strictEqual(d.status, 'unavailable');
339
+ assert.strictEqual(d.failIfUnavailable, true);
340
+ assert.match(d.installHint, /install it/);
341
+ });
342
+
343
+ test('decideSandbox: a throwing getConfig is contained (defaults to auto)', () => {
344
+ const d = decideSandbox({ getConfig: () => { throw new Error('boom'); }, detection: detAvail });
345
+ assert.strictEqual(d.status, 'on');
346
+ });
347
+
348
+ test('decideSandbox: network defaults on; carries the kernel-enforced network mode on the decision', () => {
349
+ // noNetwork passed explicitly so the test never depends on the real process argv.
350
+ const onByDefault = decideSandbox({ getConfig: () => ({ sandbox: { mode: 'auto' } }), detection: detAvail, noNetwork: false });
351
+ assert.strictEqual(onByDefault.status, 'on');
352
+ assert.strictEqual(onByDefault.network, 'on');
353
+
354
+ const offByConfig = decideSandbox({ getConfig: () => ({ sandbox: { mode: 'auto', network: 'off' } }), detection: detAvail, noNetwork: false });
355
+ assert.strictEqual(offByConfig.network, 'off');
356
+
357
+ const offByFlag = decideSandbox({ getConfig: () => ({ sandbox: { mode: 'auto' } }), detection: detAvail, noNetwork: true });
358
+ assert.strictEqual(offByFlag.network, 'off', '--no-network forces off even when config says nothing');
359
+ });
360
+
361
+ test('decideSandbox: ANTI-FAIL-OPEN — a malformed network config yields net off on an on decision', () => {
362
+ const d = decideSandbox({ getConfig: () => ({ sandbox: { mode: 'auto', network: '' } }), detection: detAvail, noNetwork: false });
363
+ assert.strictEqual(d.status, 'on');
364
+ assert.strictEqual(d.network, 'off', 'present-but-malformed network must resolve to the isolated state');
365
+ });
366
+
367
+ // ---------------------------------------------------------------------------
368
+ // sandboxStatusReport
369
+ // ---------------------------------------------------------------------------
370
+
371
+ test('sandboxStatusReport reflects an available auto sandbox as effective ON', () => {
372
+ const r = sandboxStatusReport({ getConfig: () => ({ sandbox: { mode: 'auto' } }), detection: detAvail });
373
+ assert.match(r, /mode:\s+auto/);
374
+ assert.match(r, /effective:\s+ON/);
375
+ });
376
+
377
+ test('sandboxStatusReport shows the hard-block when unavailable + failIfUnavailable', () => {
378
+ const r = sandboxStatusReport({ getConfig: () => ({ sandbox: { mode: 'auto', failIfUnavailable: true } }), detection: detUnavail });
379
+ assert.match(r, /HARD-BLOCKED/);
380
+ assert.match(r, /install it/);
381
+ });
382
+
383
+ test('sandboxStatusReport shows the approval fallback when unavailable without the hard gate', () => {
384
+ const r = sandboxStatusReport({ getConfig: () => ({ sandbox: { mode: 'auto' } }), detection: detUnavail });
385
+ assert.match(r, /require human approval/);
386
+ });
387
+
388
+ test('sandboxStatusReport shows OFF when mode is off', () => {
389
+ const r = sandboxStatusReport({ getConfig: () => ({ sandbox: { mode: 'off' } }), detection: detAvail });
390
+ assert.match(r, /effective:\s+OFF/);
391
+ });
392
+
393
+ test('sandboxStatusReport surfaces the network mode (net:on by default, net:off when isolated)', () => {
394
+ const on = sandboxStatusReport({ getConfig: () => ({ sandbox: { mode: 'auto' } }), detection: detAvail, noNetwork: false });
395
+ assert.match(on, /network:\s+on/);
396
+ assert.match(on, /effective:\s+ON \(net:on\)/);
397
+
398
+ const off = sandboxStatusReport({ getConfig: () => ({ sandbox: { mode: 'auto', network: 'off' } }), detection: detAvail, noNetwork: false });
399
+ assert.match(off, /network:\s+off/);
400
+ assert.match(off, /effective:\s+ON \(net:off\)/);
401
+ // Binary semantics are stated, not a domain allowlist.
402
+ assert.match(off, /no host proxy, no domain allowlist, no TLS interception/);
403
+ });
404
+
405
+ test('sandboxStatusReport notes that no-network needs an active sandbox when the sandbox is off', () => {
406
+ const r = sandboxStatusReport({ getConfig: () => ({ sandbox: { mode: 'off', network: 'off' } }), detection: detAvail, noNetwork: false });
407
+ assert.match(r, /no-network requires an active sandbox/);
408
+ });
@@ -0,0 +1,234 @@
1
+ 'use strict';
2
+
3
+ // Integration tests for the embedding SDK facade (Task 5.2). They drive the
4
+ // REAL createAgent() against the mock-LLM harness, exercising:
5
+ // * run() returns the documented { result, toolCalls, usage, cost, stopReason,
6
+ // verifyStatus } envelope;
7
+ // * the SAFE EMBEDDED DEFAULT — a mutating tool is refused with no policy —
8
+ // paired with the positive case (an approver/allow-rule lets it proceed);
9
+ // * the deny-list stays active in embedded mode (not disabled by absence of a
10
+ // TTY) and the sandbox opt-out is via explicit config only;
11
+ // * close() releases resources (a real MCP stdio server is disconnected);
12
+ // * two instances don't collide on per-instance config state;
13
+ // * the package `exports` map resolves the facade and the /internals subpath.
14
+
15
+ const { test, before, after } = require('node:test');
16
+ const assert = require('node:assert');
17
+ const fs = require('fs');
18
+ const os = require('os');
19
+ const path = require('path');
20
+
21
+ const { createAgent } = require('../lib/sdk');
22
+ const { startMockLLM } = require('./harness/mock-llm');
23
+
24
+ const MOCK_MCP = path.join(__dirname, 'harness', 'mock-mcp-server.js');
25
+
26
+ let prevKey;
27
+ let tmpDir;
28
+ let prevCwd;
29
+
30
+ before(() => {
31
+ prevKey = process.env.SEMALT_API_KEY;
32
+ process.env.SEMALT_API_KEY = 'test-key';
33
+ // Run inside an isolated temp dir so file writes that pass the gate land in a
34
+ // throwaway CWD (isPathSafe confines writes to process.cwd()).
35
+ prevCwd = process.cwd();
36
+ tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sdk-test-'));
37
+ process.chdir(tmpDir);
38
+ });
39
+
40
+ after(() => {
41
+ if (prevKey === undefined) delete process.env.SEMALT_API_KEY;
42
+ else process.env.SEMALT_API_KEY = prevKey;
43
+ process.chdir(prevCwd);
44
+ try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
45
+ });
46
+
47
+ function agentFor(mock, extra = {}) {
48
+ return createAgent({
49
+ apiBase: mock.base,
50
+ apiKey: 'test-key',
51
+ model: 'test-model',
52
+ // These suites test the FACADE + permission perimeter, not the OS sandbox
53
+ // (covered by the sandbox suites). Default sandbox off for determinism
54
+ // across the CI matrix; the dedicated test below proves the default is on.
55
+ sandbox: { mode: 'off' },
56
+ ...extra,
57
+ });
58
+ }
59
+
60
+ test('run() returns the documented headless-shaped envelope', async () => {
61
+ const mock = await startMockLLM();
62
+ mock.replyWith('The answer is 42.');
63
+ const agent = agentFor(mock);
64
+ try {
65
+ const res = await agent.run('what is the answer');
66
+ assert.strictEqual(typeof res.result, 'string');
67
+ assert.match(res.result, /42/);
68
+ assert.ok(Array.isArray(res.toolCalls), 'toolCalls is an array');
69
+ assert.ok(res.usage && typeof res.usage.total_tokens === 'number', 'usage present');
70
+ assert.ok('cost' in res, 'cost key present (may be null)');
71
+ assert.strictEqual(res.stopReason, 'end_turn');
72
+ assert.strictEqual(res.verifyStatus, 'skipped');
73
+ assert.ok(Array.isArray(res.messages), 'messages returned for continuation');
74
+ } finally {
75
+ await agent.close();
76
+ await mock.close();
77
+ }
78
+ });
79
+
80
+ test('SAFE DEFAULT: a mutating tool is refused with no policy', async () => {
81
+ const mock = await startMockLLM();
82
+ // The model asks to write a file. With no policy the gate refuses, which
83
+ // aborts the turn (no further LLM request needed).
84
+ mock.replyWith('<write_file path="should-not-exist.txt">secret</write_file>');
85
+ const agent = agentFor(mock); // NO approve, NO rules → refuse mutations
86
+ try {
87
+ const res = await agent.run('please write the file');
88
+ assert.strictEqual(
89
+ fs.existsSync(path.join(tmpDir, 'should-not-exist.txt')),
90
+ false,
91
+ 'mutating write must NOT happen without an explicit policy',
92
+ );
93
+ assert.match(JSON.stringify(res.messages), /Permission denied/i, 'denial surfaced to the host');
94
+ } finally {
95
+ await agent.close();
96
+ await mock.close();
97
+ }
98
+ });
99
+
100
+ test('PAIRED POSITIVE: an approver lets the same mutating tool proceed', async () => {
101
+ const mock = await startMockLLM();
102
+ mock.replyWith('<write_file path="allowed.txt">hello sdk</write_file>');
103
+ mock.replyWith('Done writing.');
104
+ const calls = [];
105
+ const agent = agentFor(mock, {
106
+ approve: (call) => { calls.push(call); return true; },
107
+ });
108
+ try {
109
+ const res = await agent.run('write the file');
110
+ const target = path.join(tmpDir, 'allowed.txt');
111
+ assert.strictEqual(fs.existsSync(target), true, 'approved write happened');
112
+ assert.strictEqual(fs.readFileSync(target, 'utf8'), 'hello sdk');
113
+ assert.ok(calls.length >= 1, 'approver was consulted');
114
+ assert.ok(calls[0] && typeof calls[0].description === 'string', 'approver gets a call descriptor');
115
+ // The successful tool shows up in the structured envelope (canonical action
116
+ // for <write_file> is 'write').
117
+ assert.ok(res.toolCalls.some((t) => t.tool === 'write' && t.ok), 'write recorded as ok');
118
+ } finally {
119
+ await agent.close();
120
+ await mock.close();
121
+ }
122
+ });
123
+
124
+ test('PAIRED POSITIVE: an allow rule lets a mutating tool proceed (no approver)', async () => {
125
+ const mock = await startMockLLM();
126
+ mock.replyWith('<write_file path="by-rule.txt">rule data</write_file>');
127
+ mock.replyWith('Done.');
128
+ const agent = agentFor(mock, {
129
+ rules: [{ tool: 'write_file', path: '**', action: 'allow' }],
130
+ });
131
+ try {
132
+ await agent.run('write it');
133
+ assert.strictEqual(fs.existsSync(path.join(tmpDir, 'by-rule.txt')), true, 'allow rule permitted the write');
134
+ } finally {
135
+ await agent.close();
136
+ await mock.close();
137
+ }
138
+ });
139
+
140
+ test('deny-list stays active in embedded mode even with an approver', async () => {
141
+ const mock = await startMockLLM();
142
+ mock.replyWith('<exec>rm -rf /</exec>');
143
+ mock.replyWith('Stopped.');
144
+ const toolEvents = [];
145
+ const agent = agentFor(mock, { approve: () => true }); // approve the gate…
146
+ agent.on('tool', (e) => toolEvents.push(e));
147
+ try {
148
+ await agent.run('wipe everything');
149
+ // …but the destructive-command deny-list (which the approver cannot bypass)
150
+ // must still hard-block it.
151
+ const blocked = toolEvents.some((e) => /deny-list/i.test(String(e.result || '')));
152
+ assert.ok(blocked, 'rm -rf / blocked by the deny-list despite gate approval');
153
+ } finally {
154
+ await agent.close();
155
+ await mock.close();
156
+ }
157
+ });
158
+
159
+ test('sandbox defaults ON (auto); opt-out is explicit config only', async () => {
160
+ const mock = await startMockLLM();
161
+ const onByDefault = createAgent({ apiBase: mock.base, apiKey: 'k', model: 'm' });
162
+ const offByOptIn = createAgent({ apiBase: mock.base, apiKey: 'k', model: 'm', sandbox: { mode: 'off' } });
163
+ try {
164
+ assert.strictEqual(onByDefault.getConfig().sandbox.mode, 'auto', 'sandbox on by default in embedded mode');
165
+ assert.strictEqual(offByOptIn.getConfig().sandbox.mode, 'off', 'explicit opt-out honored');
166
+ } finally {
167
+ await onByDefault.close();
168
+ await offByOptIn.close();
169
+ await mock.close();
170
+ }
171
+ });
172
+
173
+ test('two instances keep independent config (no global-state collision)', async () => {
174
+ const mock = await startMockLLM();
175
+ const a = createAgent({ apiBase: mock.base, apiKey: 'k', model: 'model-a' });
176
+ const b = createAgent({ apiBase: mock.base, apiKey: 'k', model: 'model-b' });
177
+ try {
178
+ assert.strictEqual(a.getConfig().default_model, 'model-a');
179
+ assert.strictEqual(b.getConfig().default_model, 'model-b');
180
+ assert.notStrictEqual(a.getConfig(), b.getConfig(), 'configs are distinct objects');
181
+ } finally {
182
+ await a.close();
183
+ await b.close();
184
+ await mock.close();
185
+ }
186
+ });
187
+
188
+ test('close() releases resources: a real MCP server is disconnected', async () => {
189
+ const { toolRegistry } = require('../lib/internals');
190
+ const mock = await startMockLLM();
191
+ mock.replyWith('Hi.'); // one reply for the run() that triggers MCP connect
192
+ const agent = createAgent({
193
+ apiBase: mock.base,
194
+ apiKey: 'k',
195
+ model: 'm',
196
+ sandbox: { mode: 'off' },
197
+ config: {
198
+ mcp: { servers: { sdkfs: { transport: 'stdio', command: process.execPath, args: [MOCK_MCP], allowAll: true } } },
199
+ },
200
+ });
201
+ try {
202
+ await agent.run('hello'); // lazily connects MCP + registers its tools
203
+ const namesConnected = toolRegistry.dynamicToolEntries().map((e) => e.tool);
204
+ assert.ok(namesConnected.some((n) => n.startsWith('mcp__sdkfs__')), 'MCP tools registered after run()');
205
+ await agent.close();
206
+ const namesAfter = toolRegistry.dynamicToolEntries().map((e) => e.tool);
207
+ assert.ok(!namesAfter.some((n) => n.startsWith('mcp__sdkfs__')), 'MCP tools unregistered after close()');
208
+ } finally {
209
+ if (!agent.closed) await agent.close();
210
+ await mock.close();
211
+ }
212
+ });
213
+
214
+ test('run() after close() throws', async () => {
215
+ const mock = await startMockLLM();
216
+ const agent = agentFor(mock);
217
+ await agent.close();
218
+ await assert.rejects(() => agent.run('anything'), /close/i);
219
+ await mock.close();
220
+ });
221
+
222
+ test('exports map resolves the facade and the /internals subpath', () => {
223
+ const facade = require('@semalt-ai/code');
224
+ assert.strictEqual(typeof facade.createAgent, 'function', 'main entry exposes createAgent');
225
+
226
+ const internals = require('@semalt-ai/code/internals');
227
+ assert.strictEqual(internals.__unstable__, true, 'internals carries the instability marker');
228
+ assert.strictEqual(typeof internals.createAgentRunner, 'function');
229
+ assert.strictEqual(typeof internals.createApiClient, 'function');
230
+
231
+ const pkg = require('../package.json');
232
+ assert.strictEqual(pkg.exports['.'], './lib/sdk.js', 'main subpath → facade');
233
+ assert.strictEqual(pkg.exports['./internals'], './lib/internals.js', 'internals subpath → building blocks');
234
+ });