@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/ARCHITECTURE.md +6 -95
  4. package/CLAUDE.md +196 -316
  5. package/README.md +148 -4
  6. package/docs/ARCHITECTURE.md +1321 -0
  7. package/docs/CONFIG.md +340 -0
  8. package/docs/HISTORY.md +245 -0
  9. package/examples/embed.js +74 -0
  10. package/index.js +251 -10
  11. package/lib/agent.js +856 -120
  12. package/lib/api.js +239 -50
  13. package/lib/args.js +74 -2
  14. package/lib/audit.js +23 -1
  15. package/lib/background.js +584 -0
  16. package/lib/checkpoints.js +757 -0
  17. package/lib/commands/auth.js +94 -0
  18. package/lib/commands/chat-session.js +489 -0
  19. package/lib/commands/chat-slash.js +415 -0
  20. package/lib/commands/chat-turn.js +669 -0
  21. package/lib/commands/chat.js +407 -0
  22. package/lib/commands/custom.js +157 -0
  23. package/lib/commands/history-utils.js +66 -0
  24. package/lib/commands/index.js +268 -0
  25. package/lib/commands/mcp.js +113 -0
  26. package/lib/commands/oneshot.js +193 -0
  27. package/lib/commands/registry.js +269 -0
  28. package/lib/commands/tasks.js +89 -0
  29. package/lib/compact.js +87 -0
  30. package/lib/config.js +360 -11
  31. package/lib/constants.js +401 -3
  32. package/lib/deny.js +199 -0
  33. package/lib/doctor.js +160 -0
  34. package/lib/headless.js +202 -0
  35. package/lib/hooks.js +286 -0
  36. package/lib/images.js +270 -0
  37. package/lib/internals.js +49 -0
  38. package/lib/mcp/boundary.js +131 -0
  39. package/lib/mcp/client.js +270 -0
  40. package/lib/mcp/oauth.js +134 -0
  41. package/lib/memory.js +209 -0
  42. package/lib/metrics.js +37 -2
  43. package/lib/payload.js +54 -0
  44. package/lib/permission-rules.js +401 -0
  45. package/lib/permissions.js +123 -26
  46. package/lib/pricing.js +67 -0
  47. package/lib/proc.js +62 -0
  48. package/lib/prompts.js +99 -8
  49. package/lib/sandbox.js +568 -0
  50. package/lib/sdk.js +328 -0
  51. package/lib/secrets.js +211 -0
  52. package/lib/skills.js +223 -0
  53. package/lib/subagents.js +516 -0
  54. package/lib/tool_registry.js +2862 -0
  55. package/lib/tool_specs.js +263 -9
  56. package/lib/tools.js +352 -1039
  57. package/lib/ui/anim.js +86 -0
  58. package/lib/ui/ansi.js +17 -27
  59. package/lib/ui/chat-history.js +253 -71
  60. package/lib/ui/create-ui.js +67 -24
  61. package/lib/ui/diff.js +90 -25
  62. package/lib/ui/file-activity.js +236 -0
  63. package/lib/ui/format.js +195 -29
  64. package/lib/ui/input-field.js +21 -11
  65. package/lib/ui/md-stream.js +234 -0
  66. package/lib/ui/render-operation.js +113 -0
  67. package/lib/ui/select.js +1 -4
  68. package/lib/ui/status-bar.js +146 -36
  69. package/lib/ui/stream.js +20 -13
  70. package/lib/ui/theme.js +190 -44
  71. package/lib/ui/tool-operation.js +190 -0
  72. package/lib/ui/utils.js +9 -5
  73. package/lib/ui/web-activity.js +270 -0
  74. package/lib/ui/writer.js +159 -45
  75. package/lib/ui.js +1 -1
  76. package/lib/verify.js +229 -0
  77. package/lib/web-extract.js +213 -0
  78. package/lib/web-summarize.js +68 -0
  79. package/package.json +19 -4
  80. package/scripts/lint.js +57 -0
  81. package/test/agent-loop.test.js +389 -0
  82. package/test/anim-driver.test.js +153 -0
  83. package/test/ask-user-display.test.js +226 -0
  84. package/test/ask-user-gate.test.js +231 -0
  85. package/test/background.test.js +414 -0
  86. package/test/chat-history-nocolor.test.js +155 -0
  87. package/test/chat-relogin.test.js +207 -0
  88. package/test/chat.test.js +114 -0
  89. package/test/checkpoints-agent.test.js +181 -0
  90. package/test/checkpoints.test.js +650 -0
  91. package/test/command-registry.test.js +160 -0
  92. package/test/compact.test.js +116 -0
  93. package/test/completion-lazy.test.js +52 -0
  94. package/test/config-merge.test.js +324 -0
  95. package/test/config-quarantine.test.js +128 -0
  96. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  97. package/test/config-write-guard-skip.test.js +46 -0
  98. package/test/config-write-guard.test.js +153 -0
  99. package/test/context-split.test.js +215 -0
  100. package/test/cost-doctor.test.js +142 -0
  101. package/test/custom-commands-chat.test.js +106 -0
  102. package/test/custom-commands.test.js +230 -0
  103. package/test/defer-detail-band.test.js +403 -0
  104. package/test/deny-windows.test.js +120 -0
  105. package/test/deny.test.js +83 -0
  106. package/test/detail-band-tab-flatten.test.js +242 -0
  107. package/test/download-allow-anywhere.test.js +66 -0
  108. package/test/download-confine.test.js +153 -0
  109. package/test/exec-diff.test.js +268 -0
  110. package/test/executors.test.js +599 -0
  111. package/test/extract-tool-calls.test.js +349 -0
  112. package/test/fetch-url-validation.test.js +219 -0
  113. package/test/file-activity.test.js +522 -0
  114. package/test/fixtures/tool-calls.js +57 -0
  115. package/test/fixtures/web-page.js +91 -0
  116. package/test/git-tools.test.js +384 -0
  117. package/test/grep-glob-serialize.test.js +242 -0
  118. package/test/grep-glob.test.js +268 -0
  119. package/test/grep-path-target.test.js +227 -0
  120. package/test/harness/README.md +57 -0
  121. package/test/harness/chat-harness.js +143 -0
  122. package/test/harness/memwarn-headless-child.js +65 -0
  123. package/test/harness/mock-llm.js +120 -0
  124. package/test/harness/mock-mcp-server.js +142 -0
  125. package/test/harness/sse-server.js +69 -0
  126. package/test/headless.test.js +348 -0
  127. package/test/history-utils.test.js +88 -0
  128. package/test/hooks-agent.test.js +238 -0
  129. package/test/hooks-verify-sandbox.test.js +232 -0
  130. package/test/hooks.test.js +216 -0
  131. package/test/http-get-user-agent.test.js +142 -0
  132. package/test/images-api.test.js +208 -0
  133. package/test/images.test.js +238 -0
  134. package/test/input-field-ctrl-o.test.js +37 -0
  135. package/test/live-height-physical.test.js +281 -0
  136. package/test/max-iterations.test.js +218 -0
  137. package/test/mcp-boundary.test.js +57 -0
  138. package/test/mcp-client.test.js +267 -0
  139. package/test/mcp-oauth.test.js +86 -0
  140. package/test/md-stream.test.js +183 -0
  141. package/test/memory-truncation-warning.test.js +222 -0
  142. package/test/memory.test.js +198 -0
  143. package/test/native-dispatch.test.js +409 -0
  144. package/test/native-live-narration.test.js +254 -0
  145. package/test/output-chokepoint.test.js +188 -0
  146. package/test/output-heredoc-leak.test.js +195 -0
  147. package/test/output-preview.test.js +245 -0
  148. package/test/path-guards.test.js +134 -0
  149. package/test/payload.test.js +99 -0
  150. package/test/permission-rules-agent.test.js +210 -0
  151. package/test/permission-rules.test.js +297 -0
  152. package/test/permissions.test.js +362 -0
  153. package/test/plan-mode.test.js +167 -0
  154. package/test/read-paginate.test.js +275 -0
  155. package/test/readonly-tools.test.js +177 -0
  156. package/test/render-operation.test.js +317 -0
  157. package/test/replay-descriptor-xml.test.js +216 -0
  158. package/test/replay-descriptor.test.js +189 -0
  159. package/test/replay-web-aggregate.test.js +291 -0
  160. package/test/replay-web-persist.test.js +241 -0
  161. package/test/result-cap.test.js +233 -0
  162. package/test/running-glyph-anim.test.js +111 -0
  163. package/test/sandbox-agent.test.js +147 -0
  164. package/test/sandbox-integration.test.js +216 -0
  165. package/test/sandbox.test.js +408 -0
  166. package/test/sdk.test.js +234 -0
  167. package/test/shell-output-cap.test.js +181 -0
  168. package/test/skills-chat.test.js +110 -0
  169. package/test/skills.test.js +295 -0
  170. package/test/smoke.test.js +68 -0
  171. package/test/status-bar-driver.test.js +93 -0
  172. package/test/status-bar-pause.test.js +164 -0
  173. package/test/status-bar-resync.test.js +188 -0
  174. package/test/stream-parser.test.js +171 -0
  175. package/test/subagents-agent.test.js +178 -0
  176. package/test/subagents.test.js +222 -0
  177. package/test/theme-palette.test.js +166 -0
  178. package/test/tool-registry.test.js +85 -0
  179. package/test/trim-budget.test.js +101 -0
  180. package/test/truncate-visible.test.js +78 -0
  181. package/test/verify-agent.test.js +317 -0
  182. package/test/verify.test.js +141 -0
  183. package/test/view-image.test.js +199 -0
  184. package/test/web-activity-ordering.test.js +203 -0
  185. package/test/web-activity.test.js +207 -0
  186. package/test/web-data-extraction-guidance.test.js +71 -0
  187. package/test/web-extract.test.js +185 -0
  188. package/test/web-fetch-agent.test.js +291 -0
  189. package/test/web-fetch-mode.test.js +193 -0
  190. package/test/web-search.test.js +380 -0
  191. package/lib/commands.js +0 -1438
  192. package/path +0 -1
@@ -0,0 +1,147 @@
1
+ 'use strict';
2
+
3
+ // Integration tests for the OS-sandbox FALLBACK rules wired into agentExecShell
4
+ // (Task 4.4). These exercise the config×detection decision at the executor
5
+ // chokepoint without needing a real bwrap/sandbox-exec: the detection cache is
6
+ // primed to "unavailable" (or the runner genuinely lacks the tool), and we
7
+ // assert the fail-safe behavior — never a silent unsandboxed run.
8
+
9
+ const { test } = require('node:test');
10
+ const assert = require('node:assert');
11
+
12
+ const fs = require('fs');
13
+ const os = require('os');
14
+ const path = require('path');
15
+
16
+ const ui = require('../lib/ui');
17
+ const { createPermissionManager } = require('../lib/permissions');
18
+ const { createToolExecutor } = require('../lib/tools');
19
+ const { detectSandbox, _resetSandboxDetection } = require('../lib/sandbox');
20
+
21
+ // Force the shared detection cache to "unavailable" so the unavailable-path
22
+ // tests are deterministic on ANY runner (including macOS / a bwrap-equipped
23
+ // Linux box where the sandbox would otherwise be available).
24
+ function primeUnavailable() {
25
+ _resetSandboxDetection();
26
+ detectSandbox({
27
+ platform: 'linux',
28
+ which: () => null, // no bwrap
29
+ readFile: () => 'Linux version 6.0',
30
+ force: true,
31
+ });
32
+ }
33
+
34
+ function buildExec({ config, onUnsandboxed } = {}) {
35
+ const pm = createPermissionManager(ui, { skipPermissions: false, allowedTiers: ['exec'] });
36
+ pm.setUICallbacks({ onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {} });
37
+ const getConfig = () => config;
38
+ return createToolExecutor(pm, ui, getConfig, { onUnsandboxed });
39
+ }
40
+
41
+ test('sandbox unavailable + auto + NO approver → REFUSED (never a silent unsandboxed run)', async () => {
42
+ primeUnavailable();
43
+ const { agentExecShell } = buildExec({ config: { sandbox: { mode: 'auto' } } });
44
+ const r = await agentExecShell('echo SHOULD_NOT_RUN');
45
+ assert.strictEqual(r.blocked, true);
46
+ assert.strictEqual(r.sandbox, 'unavailable');
47
+ assert.match(r.stderr, /refused to run unsandboxed/i);
48
+ assert.doesNotMatch(r.stdout || '', /SHOULD_NOT_RUN/);
49
+ _resetSandboxDetection();
50
+ });
51
+
52
+ test('sandbox unavailable + failIfUnavailable → HARD ERROR (strict gate)', async () => {
53
+ primeUnavailable();
54
+ const { agentExecShell } = buildExec({ config: { sandbox: { mode: 'auto', failIfUnavailable: true } } });
55
+ const r = await agentExecShell('echo SHOULD_NOT_RUN');
56
+ assert.strictEqual(r.blocked, true);
57
+ assert.strictEqual(r.sandbox, 'unavailable');
58
+ assert.match(r.stderr, /failIfUnavailable/);
59
+ _resetSandboxDetection();
60
+ });
61
+
62
+ test('sandbox unavailable + auto + approver says NO → refused', async () => {
63
+ primeUnavailable();
64
+ const { agentExecShell } = buildExec({ config: { sandbox: { mode: 'auto' } }, onUnsandboxed: async () => false });
65
+ const r = await agentExecShell('echo SHOULD_NOT_RUN');
66
+ assert.strictEqual(r.blocked, true);
67
+ _resetSandboxDetection();
68
+ });
69
+
70
+ test('sandbox unavailable + auto + human approver says YES → runs unsandboxed (status reflects it)', async () => {
71
+ primeUnavailable();
72
+ let asked = null;
73
+ const { agentExecShell } = buildExec({
74
+ config: { sandbox: { mode: 'auto' }, command_timeout_ms: 5000 },
75
+ onUnsandboxed: async (info) => { asked = info; return true; },
76
+ });
77
+ const r = await agentExecShell('echo RAN_UNSANDBOXED');
78
+ assert.ok(asked && typeof asked.reason === 'string' && asked.reason.length > 0, 'approver receives the reason');
79
+ assert.match(asked.reason, /bwrap|bubblewrap|not found/i);
80
+ assert.strictEqual(r.exit_code, 0);
81
+ assert.match(r.stdout, /RAN_UNSANDBOXED/);
82
+ assert.strictEqual(r.sandbox, 'unavailable'); // ran, but without kernel confinement
83
+ _resetSandboxDetection();
84
+ });
85
+
86
+ test('mode off → runs unsandboxed deterministically (human opt-out), status off', async () => {
87
+ // mode:off short-circuits before detection, so this is deterministic on every
88
+ // runner regardless of the cache.
89
+ primeUnavailable();
90
+ const { agentExecShell } = buildExec({ config: { sandbox: { mode: 'off' }, command_timeout_ms: 5000 } });
91
+ const r = await agentExecShell('echo MODE_OFF_RAN');
92
+ assert.strictEqual(r.exit_code, 0);
93
+ assert.match(r.stdout, /MODE_OFF_RAN/);
94
+ assert.strictEqual(r.sandbox, 'off');
95
+ // With no jail the command keeps the host network — surfaced honestly (Task 4.4b).
96
+ assert.strictEqual(r.network, 'on');
97
+ _resetSandboxDetection();
98
+ });
99
+
100
+ test('no MODEL-reachable path disables the sandbox: call-level options cannot flip the decision', async () => {
101
+ primeUnavailable();
102
+ const { agentExecShell } = buildExec({ config: { sandbox: { mode: 'auto' } } });
103
+ // A model only ever controls the command string + framework {signal}. Even if
104
+ // call options carried sandbox-ish keys (they never do in the real schema),
105
+ // they must NOT disable the gate — the decision reads only human config.
106
+ const r = await agentExecShell('echo SHOULD_NOT_RUN', { sandbox: 'off', mode: 'off', skipSandbox: true, failIfUnavailable: false });
107
+ assert.strictEqual(r.blocked, true, 'call-level options cannot disable the sandbox');
108
+ assert.strictEqual(r.sandbox, 'unavailable');
109
+ _resetSandboxDetection();
110
+ });
111
+
112
+ // Binary network isolation surfaced through the executor (Task 4.4b). Uses the
113
+ // REAL sandbox so the result's `network` field reflects an actual kernel jail;
114
+ // skips gracefully when the primitive is absent (mirrors the integration suite).
115
+ const _realDet = (() => { _resetSandboxDetection(); const d = detectSandbox({ force: true }); _resetSandboxDetection(); return d; })();
116
+ const NET_SKIP = _realDet.available && _realDet.tool === 'bwrap'
117
+ ? false
118
+ : `OS sandbox (bwrap) unavailable on this runner (${_realDet.reason || _realDet.platform})`;
119
+
120
+ test('sandbox.network off → the shell result reports net OFF and the command has no network (real jail)', { skip: NET_SKIP }, async () => {
121
+ _resetSandboxDetection();
122
+ detectSandbox({ force: true }); // real detection, available
123
+ const probe = path.join(os.tmpdir(), `semalt-agent-netprobe-${process.pid}.js`);
124
+ fs.writeFileSync(probe, "const i=require('os').networkInterfaces();const nonLo=Object.keys(i).filter(n=>n!=='lo'&&n!=='lo0');process.exit(nonLo.length>0?0:7);");
125
+ try {
126
+ const { agentExecShell } = buildExec({ config: { sandbox: { mode: 'auto', network: 'off' }, command_timeout_ms: 10000 } });
127
+ // Pass model-reachable-looking call options — they must NOT re-enable the network.
128
+ const r = await agentExecShell(`${JSON.stringify(process.execPath)} ${JSON.stringify(probe)}`, { network: 'on', noNetwork: false });
129
+ assert.strictEqual(r.sandbox, 'on', 'ran inside a real jail');
130
+ assert.strictEqual(r.network, 'off', 'the result reports kernel-level no-network');
131
+ assert.strictEqual(r.exit_code, 7, 'the jailed command genuinely had no network (only loopback)');
132
+ } finally {
133
+ try { fs.unlinkSync(probe); } catch {}
134
+ _resetSandboxDetection();
135
+ }
136
+ });
137
+
138
+ test('the deny-list still fires UNDER the sandbox layer (defense in depth, agent-initiated)', async () => {
139
+ primeUnavailable();
140
+ const { agentExecShell } = buildExec({ config: { sandbox: { mode: 'off' } } });
141
+ // Even with the sandbox off, the deny-list chokepoint must still hard-block a
142
+ // destructive agent command before it ever reaches the spawn/sandbox path.
143
+ const r = await agentExecShell('rm -rf /');
144
+ assert.strictEqual(r.blocked, true);
145
+ assert.match(r.stderr, /deny-list/i);
146
+ _resetSandboxDetection();
147
+ });
@@ -0,0 +1,216 @@
1
+ 'use strict';
2
+
3
+ // Kernel-level enforcement tests for the OS sandbox (Task 4.4). These run REAL
4
+ // bwrap (Linux/WSL2) or sandbox-exec (macOS) jails and assert the OS — not our
5
+ // pattern-matching — blocks the writes. They SKIP gracefully when the primitive
6
+ // is absent on the runner (mirroring the ripgrep-parity pattern), so the suite
7
+ // stays green on a Windows/WSL1 box or a Linux box without bubblewrap.
8
+
9
+ const { test } = require('node:test');
10
+ const assert = require('node:assert');
11
+ const fs = require('fs');
12
+ const os = require('os');
13
+ const path = require('path');
14
+ const { spawnSync } = require('child_process');
15
+
16
+ const { detectSandbox, buildBwrapArgs, buildSeatbeltPolicy } = require('../lib/sandbox');
17
+
18
+ const det = detectSandbox({ force: true });
19
+ const SKIP = det.available ? false : `OS sandbox tool unavailable on this runner (${det.reason || det.platform})`;
20
+
21
+ function mkdir(prefix) {
22
+ return fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), prefix)));
23
+ }
24
+
25
+ // Run `cmd` inside a real jail whose ONLY writable root is `writableRoots`, with
26
+ // `protectedPaths` forced read-only. Returns the spawn result ({ status, ... }).
27
+ function runJailed(cmd, { writableRoots, protectedPaths, chdir }) {
28
+ if (det.tool === 'bwrap') {
29
+ const args = buildBwrapArgs({
30
+ writableRoots,
31
+ protectedPaths,
32
+ rootWritable: false,
33
+ chdir,
34
+ fsExists: (p) => { try { return fs.existsSync(p); } catch { return false; } },
35
+ });
36
+ return spawnSync(det.binPath || 'bwrap', [...args, '/bin/sh', '-c', cmd], { encoding: 'utf8', timeout: 10000 });
37
+ }
38
+ // sandbox-exec (macOS Seatbelt)
39
+ const policy = buildSeatbeltPolicy({ writableRoots, protectedPaths, rootWritable: false });
40
+ return spawnSync(det.binPath || 'sandbox-exec', ['-p', policy, '/bin/sh', '-c', cmd], { encoding: 'utf8', timeout: 10000 });
41
+ }
42
+
43
+ test('a write INSIDE the working dir succeeds under the jail', { skip: SKIP }, () => {
44
+ const work = mkdir('semalt-sbx-work-');
45
+ const r = runJailed(`echo ok > ${work}/inside.txt`, { writableRoots: [work], protectedPaths: [], chdir: work });
46
+ assert.strictEqual(r.status, 0, `expected success, stderr: ${r.stderr}`);
47
+ assert.ok(fs.existsSync(path.join(work, 'inside.txt')), 'file inside the working dir should be written');
48
+ });
49
+
50
+ test('a write OUTSIDE the working dir is blocked by the kernel layer', { skip: SKIP }, () => {
51
+ const work = mkdir('semalt-sbx-work-');
52
+ const outside = mkdir('semalt-sbx-out-');
53
+ const target = path.join(outside, 'escaped.txt');
54
+ const r = runJailed(`echo pwned > ${target}`, { writableRoots: [work], protectedPaths: [], chdir: work });
55
+ assert.notStrictEqual(r.status, 0, 'write outside the working dir must fail');
56
+ assert.ok(!fs.existsSync(target), 'the out-of-jail file must NOT exist');
57
+ });
58
+
59
+ test('writes to a protected dir are denied — including a NOT-YET-EXISTING config (CVE-2026-25725)', { skip: SKIP }, () => {
60
+ const work = mkdir('semalt-sbx-work-');
61
+ // Simulate ~/.semalt-ai: the dir exists but config.json does NOT. The whole
62
+ // dir is bound read-only, so the jailed process cannot CREATE the missing file
63
+ // to inject host-privileged hooks.
64
+ const protectedDir = mkdir('semalt-sbx-prot-');
65
+ const missingConfig = path.join(protectedDir, 'config.json');
66
+ assert.ok(!fs.existsSync(missingConfig), 'precondition: config.json does not exist yet');
67
+ const r = runJailed(`echo '{"hooks":1}' > ${missingConfig}`, { writableRoots: [work], protectedPaths: [protectedDir], chdir: work });
68
+ assert.notStrictEqual(r.status, 0, 'creating a missing config in a protected dir must fail');
69
+ assert.ok(!fs.existsSync(missingConfig), 'the not-yet-existing config must NOT have been created');
70
+ });
71
+
72
+ test('a protected dir nested inside a writable root still wins (read-only)', { skip: SKIP }, () => {
73
+ // cwd == $HOME edge case: the writable root CONTAINS the protected dir, and the
74
+ // protected re-bind must still win.
75
+ const work = mkdir('semalt-sbx-home-');
76
+ const protectedDir = path.join(work, '.semalt-ai');
77
+ fs.mkdirSync(protectedDir);
78
+ const target = path.join(protectedDir, 'config.json');
79
+ const r = runJailed(`echo x > ${target}`, { writableRoots: [work], protectedPaths: [protectedDir], chdir: work });
80
+ assert.notStrictEqual(r.status, 0, 'protected dir nested in a writable root must stay read-only');
81
+ assert.ok(!fs.existsSync(target), 'config.json under the nested protected dir must NOT be created');
82
+ });
83
+
84
+ test('a project .semalt/config.json write is blocked by the kernel layer, incl. not-yet-existing (Pre-Task 5.0b)', { skip: SKIP }, () => {
85
+ // The project .semalt dir lives INSIDE the writable working dir, yet binding it
86
+ // read-only must still win — so a sandboxed shell cannot create or modify
87
+ // .semalt/config.json (or agents/hooks) to drive host-privileged execution.
88
+ // Mirrors the CVE-2026-25725 not-yet-existing-file pattern: the dir exists, the
89
+ // config file does not.
90
+ const work = mkdir('semalt-sbx-proj-');
91
+ const dotSemalt = path.join(work, '.semalt');
92
+ fs.mkdirSync(dotSemalt);
93
+ const missingConfig = path.join(dotSemalt, 'config.json');
94
+ assert.ok(!fs.existsSync(missingConfig), 'precondition: .semalt/config.json does not exist yet');
95
+ const r = runJailed(`echo '{"hooks":1}' > ${missingConfig}`, { writableRoots: [work], protectedPaths: [dotSemalt], chdir: work });
96
+ assert.notStrictEqual(r.status, 0, 'creating .semalt/config.json inside the jail must fail');
97
+ assert.ok(!fs.existsSync(missingConfig), 'the not-yet-existing .semalt/config.json must NOT have been created');
98
+ });
99
+
100
+ test('a /proc/self/root rewrite is confined on the RESOLVED path (the Ona bypass)', { skip: SKIP }, () => {
101
+ // bwrap mounts a fresh /proc, so /proc/self/root resolves to the jail root and
102
+ // the kernel enforces the bind on the resolved path. (Seatbelt enforces on the
103
+ // resolved vnode the same way.) Either way the write must be blocked.
104
+ const work = mkdir('semalt-sbx-work-');
105
+ const outside = mkdir('semalt-sbx-out-');
106
+ const viaProc = `/proc/self/root${path.join(outside, 'rewrite.txt')}`;
107
+ const r = runJailed(`echo pwned > ${viaProc}`, { writableRoots: [work], protectedPaths: [], chdir: work });
108
+ assert.notStrictEqual(r.status, 0, 'a /proc/self/root rewrite must be confined on the resolved path');
109
+ assert.ok(!fs.existsSync(path.join(outside, 'rewrite.txt')), 'the rewritten target must NOT be written');
110
+ });
111
+
112
+ test('child processes inherit the jail (a spawned subprocess is confined)', { skip: SKIP }, () => {
113
+ const work = mkdir('semalt-sbx-work-');
114
+ const outside = mkdir('semalt-sbx-out-');
115
+ const target = path.join(outside, 'child.txt');
116
+ // The outer sh spawns an inner sh that attempts the escape — the boundary must
117
+ // cover the whole subprocess tree, not just the first process.
118
+ const r = runJailed(`sh -c "echo pwned > ${target}"`, { writableRoots: [work], protectedPaths: [], chdir: work });
119
+ assert.notStrictEqual(r.status, 0, 'a child process must not be able to escape the jail');
120
+ assert.ok(!fs.existsSync(target), 'the child-written out-of-jail file must NOT exist');
121
+ });
122
+
123
+ test('reads are allowed broadly even when writes are confined', { skip: SKIP }, () => {
124
+ const work = mkdir('semalt-sbx-work-');
125
+ // /etc/hostname exists on Linux; on macOS read /etc/hosts. Pick one that exists.
126
+ const readable = fs.existsSync('/etc/hostname') ? '/etc/hostname' : '/etc/hosts';
127
+ const r = runJailed(`cat ${readable} > ${work}/copy.txt`, { writableRoots: [work], protectedPaths: ['/etc'], chdir: work });
128
+ assert.strictEqual(r.status, 0, `reading ${readable} should succeed, stderr: ${r.stderr}`);
129
+ });
130
+
131
+ // ---------------------------------------------------------------------------
132
+ // Binary network isolation (Task 4.4b) — REAL kernel enforcement.
133
+ // ---------------------------------------------------------------------------
134
+ //
135
+ // Discriminator (no external connectivity required, so it is reliable in CI):
136
+ // * bwrap (Linux/WSL2): --unshare-net gives the jail a fresh network namespace
137
+ // with NO real interfaces — only loopback. We count non-loopback interfaces.
138
+ // * sandbox-exec (macOS): (deny network*) blocks socket operations — we test
139
+ // whether a TCP bind is permitted. (Skips on this Linux runner.)
140
+ // Exit 0 ⇒ the probe HAS network; exit 7 ⇒ it does NOT.
141
+
142
+ const NODE = process.execPath;
143
+
144
+ function writeNetProbe(dir) {
145
+ const body = det.tool === 'bwrap'
146
+ ? "const i=require('os').networkInterfaces();const nonLo=Object.keys(i).filter(n=>n!=='lo'&&n!=='lo0');process.exit(nonLo.length>0?0:7);"
147
+ : "const s=require('net').createServer();s.on('error',()=>process.exit(7));s.listen(0,'0.0.0.0',()=>{s.close();process.exit(0);});";
148
+ const p = path.join(dir, 'netprobe.js');
149
+ fs.writeFileSync(p, body);
150
+ return p;
151
+ }
152
+
153
+ // Run a command inside a jail with the given network mode. `cmd` defaults to the
154
+ // network probe; callers may override to test composition with the fs layer.
155
+ function runJailedNet({ network, writableRoots, protectedPaths = [], chdir, cmd }) {
156
+ if (det.tool === 'bwrap') {
157
+ const args = buildBwrapArgs({
158
+ writableRoots, protectedPaths, rootWritable: false, chdir, network,
159
+ fsExists: (p) => { try { return fs.existsSync(p); } catch { return false; } },
160
+ });
161
+ return spawnSync(det.binPath || 'bwrap', [...args, '/bin/sh', '-c', cmd], { encoding: 'utf8', timeout: 10000 });
162
+ }
163
+ const policy = buildSeatbeltPolicy({ writableRoots, protectedPaths, rootWritable: false, network });
164
+ return spawnSync(det.binPath || 'sandbox-exec', ['-p', policy, '/bin/sh', '-c', cmd], { encoding: 'utf8', timeout: 10000 });
165
+ }
166
+
167
+ // Does the HOST itself have network the probe can see? (A degenerate CI box with
168
+ // only loopback would make the "network on" positive vacuous — guard for it.)
169
+ function hostHasNetwork() {
170
+ const dir = mkdir('semalt-sbx-host-');
171
+ const probe = writeNetProbe(dir);
172
+ const r = spawnSync(NODE, [probe], { encoding: 'utf8', timeout: 10000 });
173
+ return r.status === 0;
174
+ }
175
+
176
+ test('network OFF: a sandboxed command CANNOT reach the network (kernel-enforced)', { skip: SKIP }, () => {
177
+ const work = mkdir('semalt-sbx-net-off-');
178
+ const probe = writeNetProbe(work);
179
+ const r = runJailedNet({ network: 'off', writableRoots: [work], chdir: work, cmd: `${NODE} ${probe}` });
180
+ assert.strictEqual(r.status, 7, `no-network jail must have no network, got status ${r.status} stderr: ${r.stderr}`);
181
+ });
182
+
183
+ test('PAIRED positive — network ON: the same sandboxed command CAN reach the network', { skip: SKIP }, () => {
184
+ if (!hostHasNetwork()) { return; } // degenerate runner with only loopback — nothing to pair against
185
+ const work = mkdir('semalt-sbx-net-on-');
186
+ const probe = writeNetProbe(work);
187
+ const on = runJailedNet({ network: 'on', writableRoots: [work], chdir: work, cmd: `${NODE} ${probe}` });
188
+ assert.strictEqual(on.status, 0, `network-on jail must keep the host network, stderr: ${on.stderr}`);
189
+ // And the negative side of the pair, for an unambiguous on≠off difference.
190
+ const off = runJailedNet({ network: 'off', writableRoots: [work], chdir: work, cmd: `${NODE} ${probe}` });
191
+ assert.strictEqual(off.status, 7, 'network-off must differ from network-on');
192
+ });
193
+
194
+ test('network isolation COMPOSES with filesystem confinement (both apply)', { skip: SKIP }, () => {
195
+ // One no-network jail: prove the fs boundary AND the net boundary hold together.
196
+ const work = mkdir('semalt-sbx-compose-');
197
+ const outside = mkdir('semalt-sbx-compose-out-');
198
+ const probe = writeNetProbe(work);
199
+ // (a) filesystem: an out-of-CWD write is still blocked under the no-network jail.
200
+ const target = path.join(outside, 'escaped.txt');
201
+ const wr = runJailedNet({ network: 'off', writableRoots: [work], chdir: work, cmd: `echo pwned > ${target}` });
202
+ assert.notStrictEqual(wr.status, 0, 'fs confinement still applies under network-off');
203
+ assert.ok(!fs.existsSync(target), 'the out-of-jail write must not land');
204
+ // (b) network: the same jail config has no network.
205
+ const nr = runJailedNet({ network: 'off', writableRoots: [work], chdir: work, cmd: `${NODE} ${probe}` });
206
+ assert.strictEqual(nr.status, 7, 'network confinement applies in the same jail');
207
+ });
208
+
209
+ test('child processes inherit the no-network jail', { skip: SKIP }, () => {
210
+ const work = mkdir('semalt-sbx-net-child-');
211
+ const probe = writeNetProbe(work);
212
+ // The outer sh spawns an INNER sh that runs the probe — the no-network boundary
213
+ // must cover the whole subprocess tree, not just the first process.
214
+ const r = runJailedNet({ network: 'off', writableRoots: [work], chdir: work, cmd: `sh -c "${NODE} ${probe}"` });
215
+ assert.strictEqual(r.status, 7, 'a child process must also have no network');
216
+ });