@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/ARCHITECTURE.md +6 -95
  4. package/CLAUDE.md +196 -316
  5. package/README.md +148 -4
  6. package/docs/ARCHITECTURE.md +1321 -0
  7. package/docs/CONFIG.md +340 -0
  8. package/docs/HISTORY.md +245 -0
  9. package/examples/embed.js +74 -0
  10. package/index.js +251 -10
  11. package/lib/agent.js +856 -120
  12. package/lib/api.js +239 -50
  13. package/lib/args.js +74 -2
  14. package/lib/audit.js +23 -1
  15. package/lib/background.js +584 -0
  16. package/lib/checkpoints.js +757 -0
  17. package/lib/commands/auth.js +94 -0
  18. package/lib/commands/chat-session.js +489 -0
  19. package/lib/commands/chat-slash.js +415 -0
  20. package/lib/commands/chat-turn.js +669 -0
  21. package/lib/commands/chat.js +407 -0
  22. package/lib/commands/custom.js +157 -0
  23. package/lib/commands/history-utils.js +66 -0
  24. package/lib/commands/index.js +268 -0
  25. package/lib/commands/mcp.js +113 -0
  26. package/lib/commands/oneshot.js +193 -0
  27. package/lib/commands/registry.js +269 -0
  28. package/lib/commands/tasks.js +89 -0
  29. package/lib/compact.js +87 -0
  30. package/lib/config.js +360 -11
  31. package/lib/constants.js +401 -3
  32. package/lib/deny.js +199 -0
  33. package/lib/doctor.js +160 -0
  34. package/lib/headless.js +202 -0
  35. package/lib/hooks.js +286 -0
  36. package/lib/images.js +270 -0
  37. package/lib/internals.js +49 -0
  38. package/lib/mcp/boundary.js +131 -0
  39. package/lib/mcp/client.js +270 -0
  40. package/lib/mcp/oauth.js +134 -0
  41. package/lib/memory.js +209 -0
  42. package/lib/metrics.js +37 -2
  43. package/lib/payload.js +54 -0
  44. package/lib/permission-rules.js +401 -0
  45. package/lib/permissions.js +123 -26
  46. package/lib/pricing.js +67 -0
  47. package/lib/proc.js +62 -0
  48. package/lib/prompts.js +99 -8
  49. package/lib/sandbox.js +568 -0
  50. package/lib/sdk.js +328 -0
  51. package/lib/secrets.js +211 -0
  52. package/lib/skills.js +223 -0
  53. package/lib/subagents.js +516 -0
  54. package/lib/tool_registry.js +2862 -0
  55. package/lib/tool_specs.js +263 -9
  56. package/lib/tools.js +352 -1039
  57. package/lib/ui/anim.js +86 -0
  58. package/lib/ui/ansi.js +17 -27
  59. package/lib/ui/chat-history.js +253 -71
  60. package/lib/ui/create-ui.js +67 -24
  61. package/lib/ui/diff.js +90 -25
  62. package/lib/ui/file-activity.js +236 -0
  63. package/lib/ui/format.js +195 -29
  64. package/lib/ui/input-field.js +21 -11
  65. package/lib/ui/md-stream.js +234 -0
  66. package/lib/ui/render-operation.js +113 -0
  67. package/lib/ui/select.js +1 -4
  68. package/lib/ui/status-bar.js +146 -36
  69. package/lib/ui/stream.js +20 -13
  70. package/lib/ui/theme.js +190 -44
  71. package/lib/ui/tool-operation.js +190 -0
  72. package/lib/ui/utils.js +9 -5
  73. package/lib/ui/web-activity.js +270 -0
  74. package/lib/ui/writer.js +159 -45
  75. package/lib/ui.js +1 -1
  76. package/lib/verify.js +229 -0
  77. package/lib/web-extract.js +213 -0
  78. package/lib/web-summarize.js +68 -0
  79. package/package.json +19 -4
  80. package/scripts/lint.js +57 -0
  81. package/test/agent-loop.test.js +389 -0
  82. package/test/anim-driver.test.js +153 -0
  83. package/test/ask-user-display.test.js +226 -0
  84. package/test/ask-user-gate.test.js +231 -0
  85. package/test/background.test.js +414 -0
  86. package/test/chat-history-nocolor.test.js +155 -0
  87. package/test/chat-relogin.test.js +207 -0
  88. package/test/chat.test.js +114 -0
  89. package/test/checkpoints-agent.test.js +181 -0
  90. package/test/checkpoints.test.js +650 -0
  91. package/test/command-registry.test.js +160 -0
  92. package/test/compact.test.js +116 -0
  93. package/test/completion-lazy.test.js +52 -0
  94. package/test/config-merge.test.js +324 -0
  95. package/test/config-quarantine.test.js +128 -0
  96. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  97. package/test/config-write-guard-skip.test.js +46 -0
  98. package/test/config-write-guard.test.js +153 -0
  99. package/test/context-split.test.js +215 -0
  100. package/test/cost-doctor.test.js +142 -0
  101. package/test/custom-commands-chat.test.js +106 -0
  102. package/test/custom-commands.test.js +230 -0
  103. package/test/defer-detail-band.test.js +403 -0
  104. package/test/deny-windows.test.js +120 -0
  105. package/test/deny.test.js +83 -0
  106. package/test/detail-band-tab-flatten.test.js +242 -0
  107. package/test/download-allow-anywhere.test.js +66 -0
  108. package/test/download-confine.test.js +153 -0
  109. package/test/exec-diff.test.js +268 -0
  110. package/test/executors.test.js +599 -0
  111. package/test/extract-tool-calls.test.js +349 -0
  112. package/test/fetch-url-validation.test.js +219 -0
  113. package/test/file-activity.test.js +522 -0
  114. package/test/fixtures/tool-calls.js +57 -0
  115. package/test/fixtures/web-page.js +91 -0
  116. package/test/git-tools.test.js +384 -0
  117. package/test/grep-glob-serialize.test.js +242 -0
  118. package/test/grep-glob.test.js +268 -0
  119. package/test/grep-path-target.test.js +227 -0
  120. package/test/harness/README.md +57 -0
  121. package/test/harness/chat-harness.js +143 -0
  122. package/test/harness/memwarn-headless-child.js +65 -0
  123. package/test/harness/mock-llm.js +120 -0
  124. package/test/harness/mock-mcp-server.js +142 -0
  125. package/test/harness/sse-server.js +69 -0
  126. package/test/headless.test.js +348 -0
  127. package/test/history-utils.test.js +88 -0
  128. package/test/hooks-agent.test.js +238 -0
  129. package/test/hooks-verify-sandbox.test.js +232 -0
  130. package/test/hooks.test.js +216 -0
  131. package/test/http-get-user-agent.test.js +142 -0
  132. package/test/images-api.test.js +208 -0
  133. package/test/images.test.js +238 -0
  134. package/test/input-field-ctrl-o.test.js +37 -0
  135. package/test/live-height-physical.test.js +281 -0
  136. package/test/max-iterations.test.js +218 -0
  137. package/test/mcp-boundary.test.js +57 -0
  138. package/test/mcp-client.test.js +267 -0
  139. package/test/mcp-oauth.test.js +86 -0
  140. package/test/md-stream.test.js +183 -0
  141. package/test/memory-truncation-warning.test.js +222 -0
  142. package/test/memory.test.js +198 -0
  143. package/test/native-dispatch.test.js +409 -0
  144. package/test/native-live-narration.test.js +254 -0
  145. package/test/output-chokepoint.test.js +188 -0
  146. package/test/output-heredoc-leak.test.js +195 -0
  147. package/test/output-preview.test.js +245 -0
  148. package/test/path-guards.test.js +134 -0
  149. package/test/payload.test.js +99 -0
  150. package/test/permission-rules-agent.test.js +210 -0
  151. package/test/permission-rules.test.js +297 -0
  152. package/test/permissions.test.js +362 -0
  153. package/test/plan-mode.test.js +167 -0
  154. package/test/read-paginate.test.js +275 -0
  155. package/test/readonly-tools.test.js +177 -0
  156. package/test/render-operation.test.js +317 -0
  157. package/test/replay-descriptor-xml.test.js +216 -0
  158. package/test/replay-descriptor.test.js +189 -0
  159. package/test/replay-web-aggregate.test.js +291 -0
  160. package/test/replay-web-persist.test.js +241 -0
  161. package/test/result-cap.test.js +233 -0
  162. package/test/running-glyph-anim.test.js +111 -0
  163. package/test/sandbox-agent.test.js +147 -0
  164. package/test/sandbox-integration.test.js +216 -0
  165. package/test/sandbox.test.js +408 -0
  166. package/test/sdk.test.js +234 -0
  167. package/test/shell-output-cap.test.js +181 -0
  168. package/test/skills-chat.test.js +110 -0
  169. package/test/skills.test.js +295 -0
  170. package/test/smoke.test.js +68 -0
  171. package/test/status-bar-driver.test.js +93 -0
  172. package/test/status-bar-pause.test.js +164 -0
  173. package/test/status-bar-resync.test.js +188 -0
  174. package/test/stream-parser.test.js +171 -0
  175. package/test/subagents-agent.test.js +178 -0
  176. package/test/subagents.test.js +222 -0
  177. package/test/theme-palette.test.js +166 -0
  178. package/test/tool-registry.test.js +85 -0
  179. package/test/trim-budget.test.js +101 -0
  180. package/test/truncate-visible.test.js +78 -0
  181. package/test/verify-agent.test.js +317 -0
  182. package/test/verify.test.js +141 -0
  183. package/test/view-image.test.js +199 -0
  184. package/test/web-activity-ordering.test.js +203 -0
  185. package/test/web-activity.test.js +207 -0
  186. package/test/web-data-extraction-guidance.test.js +71 -0
  187. package/test/web-extract.test.js +185 -0
  188. package/test/web-fetch-agent.test.js +291 -0
  189. package/test/web-fetch-mode.test.js +193 -0
  190. package/test/web-search.test.js +380 -0
  191. package/lib/commands.js +0 -1438
  192. package/path +0 -1
@@ -0,0 +1,227 @@
1
+ 'use strict';
2
+
3
+ // Focused coverage for the path-aware grep fix: `path` now denotes a FILE, a
4
+ // DIRECTORY, or a GLOB filter, and an unresolvable path returns a diagnostic
5
+ // error instead of a silent {count:0}. The original bug: `path` was treated ONLY
6
+ // as a glob over a cwd-rooted walk, so a path pointing at a file/dir OUTSIDE the
7
+ // cwd subtree was never reached → {count:0}, indistinguishable from a true
8
+ // negative. See ROOT CAUSE in the task brief.
9
+ //
10
+ // Isolation mirrors grep-glob.test.js: temp $HOME (audit log + protected config
11
+ // resolve there) set BEFORE any lib require, plus a temp cwd.
12
+
13
+ const os = require('node:os');
14
+ const fs = require('node:fs');
15
+ const path = require('node:path');
16
+
17
+ const TMP_HOME = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-home-'));
18
+ const PREV_HOME = process.env.HOME;
19
+ const PREV_USERPROFILE = process.env.USERPROFILE;
20
+ process.env.HOME = TMP_HOME;
21
+ process.env.USERPROFILE = TMP_HOME;
22
+
23
+ const { test, before, after } = require('node:test');
24
+ const assert = require('node:assert');
25
+
26
+ const ui = require('../lib/ui');
27
+ const { createPermissionManager } = require('../lib/permissions');
28
+ const { createToolExecutor } = require('../lib/tools');
29
+ const {
30
+ _grepSearch,
31
+ _resolveGrepPath,
32
+ _detectRipgrep,
33
+ } = require('../lib/tool_registry');
34
+
35
+ const HAVE_RG = !!_detectRipgrep();
36
+ const rgOpts = HAVE_RG ? {} : { skip: 'ripgrep (rg) not on PATH' };
37
+
38
+ let exec;
39
+ let CWD; // the agent's working directory (search root for the legacy walk)
40
+ let OUTSIDE; // a sibling directory OUTSIDE CWD — the heart of the original bug
41
+ let OUTFILE; // an explicit file under OUTSIDE that contains the needle
42
+ let PREV_CWD;
43
+
44
+ const NEEDLE = 'T.PICKUP'; // regex: '.' is a wildcard, matches "T.PICKUP" literally too
45
+
46
+ before(() => {
47
+ PREV_CWD = process.cwd();
48
+ CWD = fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-cwd-')));
49
+ OUTSIDE = fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-out-')));
50
+ process.chdir(CWD);
51
+
52
+ // Inside the cwd tree: an ordinary JS file with the needle, plus a decoy.
53
+ fs.writeFileSync(path.join(CWD, 'in.js'), 'const x = 1;\nconst T_PICKUP = handler;\n');
54
+ fs.mkdirSync(path.join(CWD, 'sub'), { recursive: true });
55
+ fs.writeFileSync(path.join(CWD, 'sub', 'deep.js'), 'noop\nT.PICKUP here\nmore\n');
56
+ fs.writeFileSync(path.join(CWD, 'plain.txt'), 'just text, no needle\n');
57
+
58
+ // OUTSIDE the cwd subtree: the file the model would address by absolute path.
59
+ OUTFILE = path.join(OUTSIDE, 'registry.js');
60
+ fs.writeFileSync(OUTFILE, 'line one\nreturn T.PICKUP(args)\nline three\n');
61
+ fs.writeFileSync(path.join(OUTSIDE, 'other.js'), 'T.PICKUP again\n');
62
+
63
+ const pm = createPermissionManager(ui, {});
64
+ exec = createToolExecutor(pm, ui, () => ({ max_file_size_kb: 512, command_timeout_ms: 30000 }));
65
+ });
66
+
67
+ after(() => {
68
+ process.chdir(PREV_CWD);
69
+ if (PREV_HOME === undefined) delete process.env.HOME; else process.env.HOME = PREV_HOME;
70
+ if (PREV_USERPROFILE === undefined) delete process.env.USERPROFILE; else process.env.USERPROFILE = PREV_USERPROFILE;
71
+ });
72
+
73
+ // ---------------------------------------------------------------------------
74
+ // _resolveGrepPath — the file/dir/glob/omitted decision table
75
+ // ---------------------------------------------------------------------------
76
+
77
+ test('_resolveGrepPath: existing FILE → file mode', () => {
78
+ assert.deepStrictEqual(_resolveGrepPath(OUTFILE), { mode: 'file', file: OUTFILE, display: OUTFILE });
79
+ });
80
+
81
+ test('_resolveGrepPath: existing DIRECTORY → dir mode with that dir as baseDir', () => {
82
+ assert.deepStrictEqual(_resolveGrepPath(OUTSIDE), { mode: 'dir', baseDir: OUTSIDE, pathGlob: null });
83
+ });
84
+
85
+ test('_resolveGrepPath: non-existent path / glob → glob mode (legacy cwd walk)', () => {
86
+ assert.deepStrictEqual(_resolveGrepPath('*.js'), { mode: 'glob', baseDir: '.', pathGlob: '*.js' });
87
+ assert.deepStrictEqual(_resolveGrepPath('does/not/exist.js'), { mode: 'glob', baseDir: '.', pathGlob: 'does/not/exist.js' });
88
+ });
89
+
90
+ test('_resolveGrepPath: omitted → no filter', () => {
91
+ assert.deepStrictEqual(_resolveGrepPath(null), { mode: 'none', baseDir: '.', pathGlob: null });
92
+ assert.deepStrictEqual(_resolveGrepPath(''), { mode: 'none', baseDir: '.', pathGlob: null });
93
+ });
94
+
95
+ // ---------------------------------------------------------------------------
96
+ // (a) THE BUG: an absolute FILE path OUTSIDE cwd now finds its matches.
97
+ // ---------------------------------------------------------------------------
98
+
99
+ test('(a) grep on an absolute FILE path outside cwd finds the matches (was {count:0})', () => {
100
+ const r = _grepSearch({ pattern: NEEDLE, path: OUTFILE, engine: 'node' });
101
+ assert.strictEqual(r.error, undefined);
102
+ assert.strictEqual(r.count, 1);
103
+ assert.deepStrictEqual(r.matches, [{ file: _posix(OUTFILE), line: 2, text: 'return T.PICKUP(args)' }]);
104
+ });
105
+
106
+ // ---------------------------------------------------------------------------
107
+ // (b) a DIRECTORY path searches that directory recursively.
108
+ // ---------------------------------------------------------------------------
109
+
110
+ test('(b) grep on a DIRECTORY path searches that dir', () => {
111
+ const r = _grepSearch({ pattern: NEEDLE, path: OUTSIDE, engine: 'node' });
112
+ assert.strictEqual(r.error, undefined);
113
+ // dir mode → paths are relative to the directory used as the walk root
114
+ const files = r.matches.map((m) => m.file).sort();
115
+ assert.deepStrictEqual(files, ['other.js', 'registry.js']);
116
+ });
117
+
118
+ // ---------------------------------------------------------------------------
119
+ // (c) a glob with no existing-path collision still glob-filters from cwd.
120
+ // ---------------------------------------------------------------------------
121
+
122
+ test('(c) grep with glob "*.js" still glob-filters the cwd walk (back-compat)', () => {
123
+ const r = _grepSearch({ pattern: NEEDLE, path: '*.js', engine: 'node' });
124
+ assert.strictEqual(r.error, undefined);
125
+ // in.js (T_PICKUP — '.' wildcard matches '_') and sub/deep.js, NOT plain.txt
126
+ const files = r.matches.map((m) => m.file).sort();
127
+ assert.deepStrictEqual(files, ['in.js', 'sub/deep.js']);
128
+ });
129
+
130
+ // ---------------------------------------------------------------------------
131
+ // (d) a path that doesn't exist anywhere → diagnostic ERROR, not {count:0}.
132
+ // ---------------------------------------------------------------------------
133
+
134
+ test('(d) grep on a path that resolves to nothing returns a diagnostic error', () => {
135
+ const r = _grepSearch({ pattern: NEEDLE, path: '/no/such/place/file.js', engine: 'node' });
136
+ assert.ok(r.error, 'must be an error, not a silent count:0');
137
+ assert.match(r.error, /did not resolve to any file/);
138
+ assert.strictEqual(r.count, undefined);
139
+ });
140
+
141
+ test('(d2) grep on a glob matching zero files → diagnostic error (nothing to search)', () => {
142
+ const r = _grepSearch({ pattern: NEEDLE, path: '*.nonexistentext', engine: 'node' });
143
+ assert.ok(r.error);
144
+ assert.match(r.error, /did not resolve to any file/);
145
+ });
146
+
147
+ // ---------------------------------------------------------------------------
148
+ // (e) THE GATING PROOF: a valid file/glob where the pattern is genuinely absent
149
+ // returns {count:0} — a true negative, NOT an error.
150
+ // ---------------------------------------------------------------------------
151
+
152
+ test('(e1) grep an existing FILE with no match → {count:0}, NOT an error (true negative)', () => {
153
+ const r = _grepSearch({ pattern: 'ZZZ_ABSENT_TOKEN', path: OUTFILE, engine: 'node' });
154
+ assert.strictEqual(r.error, undefined);
155
+ assert.strictEqual(r.count, 0);
156
+ assert.deepStrictEqual(r.matches, []);
157
+ });
158
+
159
+ test('(e2) grep a glob matching real files but pattern absent → {count:0}, NOT an error', () => {
160
+ // "*.js" matches in.js + sub/deep.js (real candidates) but the token is absent.
161
+ const r = _grepSearch({ pattern: 'ZZZ_ABSENT_TOKEN', path: '*.js', engine: 'node' });
162
+ assert.strictEqual(r.error, undefined, 'real candidate files exist → true negative, never an error');
163
+ assert.strictEqual(r.count, 0);
164
+ });
165
+
166
+ test('(e3) grep a DIRECTORY that exists but lacks the pattern → {count:0}, NOT an error', () => {
167
+ const r = _grepSearch({ pattern: 'ZZZ_ABSENT_TOKEN', path: OUTSIDE, engine: 'node' });
168
+ assert.strictEqual(r.error, undefined);
169
+ assert.strictEqual(r.count, 0);
170
+ });
171
+
172
+ // ---------------------------------------------------------------------------
173
+ // (f) BOTH ENGINES consistent for the path-resolution path.
174
+ // ---------------------------------------------------------------------------
175
+
176
+ test('(f) file-target: node and rg agree', rgOpts, () => {
177
+ const viaNode = _grepSearch({ pattern: NEEDLE, path: OUTFILE, engine: 'node' });
178
+ const viaRg = _grepSearch({ pattern: NEEDLE, path: OUTFILE, engine: 'rg' });
179
+ assert.deepStrictEqual(viaRg, viaNode);
180
+ });
181
+
182
+ test('(f) dir-target: node and rg agree', rgOpts, () => {
183
+ const viaNode = _grepSearch({ pattern: NEEDLE, path: OUTSIDE, engine: 'node' });
184
+ const viaRg = _grepSearch({ pattern: NEEDLE, path: OUTSIDE, engine: 'rg' });
185
+ assert.deepStrictEqual(viaRg, viaNode);
186
+ });
187
+
188
+ test('(f) unresolvable-path error is identical across engines', rgOpts, () => {
189
+ const viaNode = _grepSearch({ pattern: NEEDLE, path: '/no/such/place.js', engine: 'node' });
190
+ const viaRg = _grepSearch({ pattern: NEEDLE, path: '/no/such/place.js', engine: 'rg' });
191
+ assert.deepStrictEqual(viaRg, viaNode);
192
+ });
193
+
194
+ // ---------------------------------------------------------------------------
195
+ // (g) SANDBOX / path-safety: grep refuses a protected secret path exactly like
196
+ // read_file / search_in_file (isProtectedSecretPath). The OS sandbox remains
197
+ // the outer confinement; this is the name-based guard parity check.
198
+ // ---------------------------------------------------------------------------
199
+
200
+ test('(g) grep on a protected secret path is refused like other file reads', async () => {
201
+ const cfg = path.join(TMP_HOME, '.semalt-ai', 'config.json'); // == CONFIG_PATH under temp HOME
202
+ const r = await exec.agentExecFile('grep', NEEDLE, cfg);
203
+ assert.ok(r.error, 'must be refused');
204
+ assert.match(r.error, /secrets|credentials|cannot be read/i);
205
+ // parity: search_in_file refuses the same path the same way
206
+ const r2 = await exec.agentExecFile('search_in_file', cfg, NEEDLE);
207
+ assert.ok(r2.error);
208
+ assert.match(r2.error, /secrets|credentials|cannot be read/i);
209
+ });
210
+
211
+ // ---------------------------------------------------------------------------
212
+ // End-to-end through the executor (auto engine) — the real model-facing path.
213
+ // ---------------------------------------------------------------------------
214
+
215
+ test('executor: agentExecFile grep on an out-of-cwd file finds the needle', async () => {
216
+ const r = await exec.agentExecFile('grep', NEEDLE, OUTFILE);
217
+ assert.strictEqual(r.error, undefined);
218
+ assert.strictEqual(r.count, 1);
219
+ });
220
+
221
+ test('executor: agentExecFile grep on a vanished path returns the diagnostic error', async () => {
222
+ const r = await exec.agentExecFile('grep', NEEDLE, '/no/such/place/file.js');
223
+ assert.ok(r.error);
224
+ assert.match(r.error, /did not resolve to any file/);
225
+ });
226
+
227
+ function _posix(p) { return p.split(path.sep).join('/'); }
@@ -0,0 +1,57 @@
1
+ # Test harness — mock LLM
2
+
3
+ Deterministic, zero-dependency fakes for testing the streaming client and the
4
+ agent loop without a real model or network. Built on Node's `http` module only.
5
+
6
+ ## Files
7
+
8
+ | File | Purpose |
9
+ |------|---------|
10
+ | `sse-server.js` | Low-level scriptable SSE server + `sse()` / `DONE` helpers. Serves one scripted response per request. Used by the streaming-parser tests (1.1). |
11
+ | `mock-llm.js` | Queue-based mock LLM built on `sse-server`. Serves a FIFO queue of scripted responses — one per inbound POST — so a multi-turn agent loop or a retry sequence runs deterministically. Used by the agent-loop tests (1.2). |
12
+
13
+ ## `startMockLLM()`
14
+
15
+ ```js
16
+ const { startMockLLM } = require('./harness/mock-llm');
17
+
18
+ const mock = await startMockLLM(); // listens on 127.0.0.1:<random port>
19
+ // Point config.api_base at mock.base, then script responses (FIFO):
20
+
21
+ mock.replyWith('<exec>echo hi</exec>'); // streamed assistant content
22
+ mock.replyWith('All done.'); // next turn's reply
23
+ mock.replyWithToolCall('read_file', { path: 'a.txt' }); // native tool_calls
24
+ mock.streamChunks([...customSSE]); // full control over the SSE stream
25
+ mock.failWith(429, { headers: { 'retry-after': '0' } }); // an error response
26
+ mock.failWith(400, { body: JSON.stringify({ error: { message: 'context length is only 100' } }) });
27
+
28
+ // Introspection:
29
+ mock.pending(); // queued-but-unserved responses
30
+ mock.requestCount(); // requests received so far
31
+ mock.requests; // [{ url, body, headers }] recorded per request
32
+
33
+ await mock.close(); // always close in a finally / after()
34
+ ```
35
+
36
+ ### Response ordering
37
+
38
+ Responses are served strictly FIFO, one per request. A single `chatStream` call
39
+ is normally one request — **except** the 400/413 self-healing path in `api.js`,
40
+ which transparently retries, consuming **two** queued responses (the error, then
41
+ the recovery). The agent loop's own retry/backoff (429, 5xx) also consumes one
42
+ queued response per attempt.
43
+
44
+ ### Determinism / timing
45
+
46
+ - `gapMs` (default 2ms) controls the delay between SSE chunks. Raise it to keep a
47
+ stream open long enough to test mid-stream abort.
48
+ - For retry tests, use `Retry-After: 0` so backoff is ~instant; this both keeps
49
+ the test fast and proves the header is honored (the base backoff is 1000ms).
50
+ - No `Math.random`/wall-clock dependence; every run is reproducible.
51
+
52
+ ## Wiring the agent loop
53
+
54
+ Inject `mock.base` through the existing `config.api_base` seam — no production
55
+ code changes. Build the real `chatStream` (`createApiClient`) and a real tool
56
+ executor, then `createAgentRunner(...)`. See `test/agent-loop.test.js` for the
57
+ `buildRunner()` helper used across all scenarios.
@@ -0,0 +1,143 @@
1
+ 'use strict';
2
+
3
+ // Chat-loop smoke harness for cmdChat (Task 1.5, tests-first). Drives the real
4
+ // createCommands().cmdChat() with a fully mocked UI so the interactive chat
5
+ // closure can be characterized WITHOUT a TTY: the mock inputField captures the
6
+ // onSubmit callback, and submit(text) invokes it exactly as a keypress would.
7
+ //
8
+ // Home-based paths (saved sessions, audit log) are redirected to a temp dir
9
+ // before any lib module loads, so a chat session writes nothing real.
10
+
11
+ const os = require('node:os');
12
+ const fs = require('node:fs');
13
+ const path = require('node:path');
14
+
15
+ const TMP_HOME = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-chat-home-'));
16
+ process.env.HOME = TMP_HOME;
17
+ process.env.USERPROFILE = TMP_HOME;
18
+
19
+ const { createCommands } = require('../../lib/commands');
20
+ const tools = require('../../lib/tools');
21
+
22
+ const delay = (ms) => new Promise((r) => setTimeout(r, ms));
23
+
24
+ // The mock UI handles returned by createUI().
25
+ function makeChatUI() {
26
+ const chatHistory = {
27
+ messages: [],
28
+ addMessage(m) { this.messages.push(m); },
29
+ clearMessages() { this.messages = []; },
30
+ clearStreamingContent() {},
31
+ collapseById() {}, removeById() {}, rerenderById() {},
32
+ finalizeLastMessage() {}, streamToken() {},
33
+ texts() { return this.messages.map((m) => (typeof m.content === 'string' ? m.content : '')); },
34
+ last() { return this.messages[this.messages.length - 1]; },
35
+ find(re) { return this.texts().find((t) => re.test(t)); },
36
+ };
37
+ const statusBar = {
38
+ states: [],
39
+ update(...a) { this.states.push(a); },
40
+ setModel() {}, setContextLimit() {}, updateMetrics() {}, addPendingTokens() {}, onToken() {},
41
+ };
42
+ const inputField = {
43
+ _submit: null, _handlers: {}, _nav: null,
44
+ onSubmit(cb) { this._submit = cb; },
45
+ on(ev, cb) { (this._handlers[ev] = this._handlers[ev] || []).push(cb); },
46
+ removeListener(ev, cb) { const a = this._handlers[ev] || []; const i = a.indexOf(cb); if (i >= 0) a.splice(i, 1); },
47
+ captureNavigation(h) { this._nav = h; },
48
+ releaseNavigation() { this._nav = null; },
49
+ setDisabled() {}, setSearchItems() {}, captureSelect() { return Promise.resolve(null); },
50
+ isIdle() { return false; },
51
+ async submit(text) { if (!this._submit) throw new Error('onSubmit not registered'); return this._submit(text); },
52
+ emit(ev, ...args) { for (const cb of (this._handlers[ev] || [])) cb(...args); },
53
+ };
54
+ return { chatHistory, statusBar, inputField, layout: null, destroy: () => {}, redrawFixed: () => {} };
55
+ }
56
+
57
+ // The `ui` object createCommands destructures (colors + helpers + createUI).
58
+ function makeUI(chatUI) {
59
+ const ui = { createUI: () => chatUI };
60
+ for (const k of ['BOLD', 'BG_SELECTED', 'FG_BLUE', 'FG_CYAN', 'FG_DARK', 'FG_GRAY', 'FG_GREEN', 'FG_RED', 'FG_TEAL', 'FG_YELLOW', 'RST', 'DIM']) ui[k] = '';
61
+ ui.approxTokens = (s) => Math.ceil(((s || '').length) / 4);
62
+ ui.getCols = () => 80;
63
+ ui.boxLine = (s) => s;
64
+ ui.interactiveSelect = async () => null;
65
+ return ui;
66
+ }
67
+
68
+ function makeDeps(chatUI, overrides = {}) {
69
+ const config = {
70
+ auth_token: '', default_model: 'test-model', dashboard_model_id: null,
71
+ dashboard_url: 'http://dash', api_base: 'http://api', models: [],
72
+ system_prompt_mode: 'system_role', temperature: 0.7,
73
+ ...(overrides.config || {}),
74
+ };
75
+ const getConfig = () => config;
76
+ const setConfig = (c) => Object.assign(config, c);
77
+
78
+ const calls = { runAgentLoop: [], chatStream: [], shell: [], permissionClear: 0 };
79
+
80
+ const permissionManager = {
81
+ setUICallbacks() {}, clear() { calls.permissionClear++; }, toggleAll() { return true; },
82
+ askPermission: async () => true, readonlyBlock: () => null, captureSelect: async () => null, state: {},
83
+ };
84
+
85
+ const apiClient = {
86
+ chatStream: async (...a) => { calls.chatStream.push(a); return { content: '', usage: null }; },
87
+ chatSync: async () => '',
88
+ dashboardCreateChat: async () => ({ chat: { id: 1 } }),
89
+ dashboardGetChat: async () => ({ chat: { title: 't' }, messages: [] }),
90
+ dashboardSaveMessages: async () => ({}),
91
+ dashboardListChats: async () => ({ chats: [] }),
92
+ dashboardListModels: async () => ({ models: [] }),
93
+ dashboardGetModelForCli: async () => ({ model: null }),
94
+ dashboardWhoAmI: async () => ({ user: null }),
95
+ dashboardLogout: async () => ({}),
96
+ estimateTokens: (s) => Math.ceil((s || '').length / 4),
97
+ getCliLoginStatus: async () => ({ status: 'authorized' }),
98
+ requestCliLogin: async () => ({ id: 1, hash: 'h', token: 'tok', verification_url: 'http://v' }),
99
+ setActiveModelProfile: () => {},
100
+ ...(overrides.apiClient || {}),
101
+ };
102
+
103
+ const runAgentLoop = overrides.runAgentLoop
104
+ || (async (messages, model, maxIter, limit, opts) => {
105
+ calls.runAgentLoop.push({ messages: messages.map((m) => ({ ...m })), model, opts });
106
+ return { messages, metrics: { summary: () => 'metrics-summary' } };
107
+ });
108
+ const readFileContext = overrides.readFileContext || ((f) => `ctx:${JSON.stringify(f)}`);
109
+ const agentExecShell = overrides.agentExecShell
110
+ || (async (cmd, o) => { calls.shell.push({ cmd, o }); return { exit_code: 0, stdout: `out:${cmd}`, stderr: '' }; });
111
+
112
+ return {
113
+ deps: { getConfig, setConfig, permissionManager, ui: makeUI(chatUI), apiClient, runAgentLoop, readFileContext, agentExecShell },
114
+ config, calls,
115
+ };
116
+ }
117
+
118
+ // Start a chat session. Returns once onSubmit is registered. `submit(text)` runs
119
+ // a turn; `submit('exit')` ends the session (await `done` to confirm teardown).
120
+ async function startChat(overrides = {}) {
121
+ const chatUI = makeChatUI();
122
+ const { deps, config, calls } = makeDeps(chatUI, overrides);
123
+ const commands = createCommands(deps);
124
+ const done = commands.cmdChat({ model: undefined, ...(overrides.opts || {}) });
125
+
126
+ // Wait for cmdChat's async setup (ensureDefaultModel + resolveTokenLimit) to
127
+ // register the submit handler.
128
+ for (let i = 0; i < 200 && !chatUI.inputField._submit; i++) await delay(2);
129
+ if (!chatUI.inputField._submit) throw new Error('cmdChat did not register onSubmit');
130
+
131
+ return {
132
+ chatHistory: chatUI.chatHistory,
133
+ statusBar: chatUI.statusBar,
134
+ inputField: chatUI.inputField,
135
+ config,
136
+ calls,
137
+ submit: (text) => chatUI.inputField.submit(text),
138
+ done,
139
+ cleanup: () => { try { tools.setUIActive(false); } catch {} },
140
+ };
141
+ }
142
+
143
+ module.exports = { startChat };
@@ -0,0 +1,65 @@
1
+ 'use strict';
2
+
3
+ // Child process for the memory-truncation headless test. Runs cmdCode in json
4
+ // mode with an oversized project AGENTS.md so the PARENT can capture this
5
+ // process's stdout (the JSON envelope) and stderr (the truncation warning)
6
+ // cleanly — running in a child avoids swapping the parent's global
7
+ // process.stdout, which would collide with the node:test TAP reporter.
8
+
9
+ const os = require('os');
10
+ const fs = require('fs');
11
+ const path = require('path');
12
+
13
+ const ROOT = path.resolve(__dirname, '..', '..'); // project root
14
+
15
+ const home = fs.mkdtempSync(path.join(os.tmpdir(), 'memwarn-child-home-'));
16
+ process.env.HOME = home;
17
+ process.env.USERPROFILE = home;
18
+ process.env.SEMALT_API_KEY = 'test-key';
19
+
20
+ const repo = fs.mkdtempSync(path.join(os.tmpdir(), 'memwarn-child-repo-'));
21
+ fs.mkdirSync(path.join(repo, '.git'), { recursive: true });
22
+ const { DEFAULT_MEMORY_MAX_BYTES } = require(path.join(ROOT, 'lib', 'memory'));
23
+ fs.writeFileSync(path.join(repo, 'AGENTS.md'), 'Z'.repeat(DEFAULT_MEMORY_MAX_BYTES + 4000));
24
+ process.chdir(repo);
25
+
26
+ const ui = require(path.join(ROOT, 'lib', 'ui'));
27
+ const { createApiClient } = require(path.join(ROOT, 'lib', 'api'));
28
+ const { createToolExecutor, extractToolCalls } = require(path.join(ROOT, 'lib', 'tools'));
29
+ const { createPermissionManager } = require(path.join(ROOT, 'lib', 'permissions'));
30
+ const { createAgentRunner } = require(path.join(ROOT, 'lib', 'agent'));
31
+ const { createOneshotCommands } = require(path.join(ROOT, 'lib', 'commands', 'oneshot'));
32
+ const { startMockLLM } = require(path.join(ROOT, 'test', 'harness', 'mock-llm'));
33
+
34
+ (async () => {
35
+ const mock = await startMockLLM();
36
+ mock.replyWith('All done.');
37
+ const config = {
38
+ api_base: mock.base, api_key: 'test-key', auth_token: 'tok', default_model: 'test-model',
39
+ temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
40
+ max_iterations: 10, system_prompt_mode: 'system_role', pricing: {},
41
+ };
42
+ const getConfig = () => config;
43
+ const api = createApiClient({ getConfig, saveConfig() {}, ui });
44
+ const pm = createPermissionManager(ui, { skipPermissions: true });
45
+ pm.setUICallbacks({ onAddMessage() {}, onShowModal() {}, onCloseModal() {}, onCaptureNavigation: () => () => {} });
46
+ const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, getConfig);
47
+ const runner = createAgentRunner({
48
+ chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
49
+ describePermission, permissionManager: pm, ui, getConfig,
50
+ });
51
+ const shared = {
52
+ ...ui,
53
+ writer: { scrollback() {} },
54
+ getConfig, setConfig() {},
55
+ runAgentLoop: runner.runAgentLoop,
56
+ readFileContext: () => '',
57
+ ensureDefaultModel: async () => {},
58
+ msgs: require(path.join(ROOT, 'lib', 'ui', 'messages')),
59
+ dbg: require(path.join(ROOT, 'lib', 'debug')),
60
+ };
61
+ const { cmdCode } = createOneshotCommands(shared);
62
+ await cmdCode({ outputFormat: 'json' }, ['do something']);
63
+ await mock.close();
64
+ process.exit(0);
65
+ })().catch((e) => { process.stderr.write('CHILDERR:' + (e && e.stack || e) + '\n'); process.exit(1); });
@@ -0,0 +1,120 @@
1
+ 'use strict';
2
+
3
+ // Scriptable mock LLM server for agent-loop integration tests (Task 1.2).
4
+ // Built on the SSE primitives in ./sse-server. A single server instance serves
5
+ // a FIFO queue of scripted responses — one per inbound POST — so a multi-turn
6
+ // agent loop (or a retry sequence) is driven deterministically with no real
7
+ // network and no real model.
8
+ //
9
+ // Each queued response is either:
10
+ // * a streamed 200 SSE reply (assistant content and/or native tool_calls), or
11
+ // * an error (non-200) with an optional JSON body and headers (e.g.
12
+ // Retry-After) — used to exercise the retry/backoff and 400/413 paths.
13
+ //
14
+ // See ./README.md for the full contract and examples.
15
+
16
+ const http = require('http');
17
+ const { sse, DONE } = require('./sse-server');
18
+
19
+ // Build the SSE chunk list for a plain assistant message that streams `content`.
20
+ // `content` may be a string (sent as one delta) or an array of strings (sent as
21
+ // successive deltas, to exercise token-by-token handling and mid-stream abort).
22
+ function contentChunks(content, { finish = 'stop', usage = null } = {}) {
23
+ const parts = Array.isArray(content) ? content : (content ? [content] : []);
24
+ const chunks = parts.map((p) => sse({ choices: [{ delta: { content: p } }] }));
25
+ chunks.push(sse({ choices: [{ finish_reason: finish, delta: {} }] }));
26
+ if (usage) chunks.push(sse({ usage }));
27
+ chunks.push(DONE);
28
+ return chunks;
29
+ }
30
+
31
+ // Build the SSE chunk list for a native OpenAI tool_calls response.
32
+ function toolCallChunks(name, args, { id = 'call_1' } = {}) {
33
+ return [
34
+ sse({ choices: [{ delta: { tool_calls: [{ index: 0, id, type: 'function', function: { name, arguments: JSON.stringify(args) } }] } }] }),
35
+ sse({ choices: [{ finish_reason: 'tool_calls', delta: {} }] }),
36
+ DONE,
37
+ ];
38
+ }
39
+
40
+ function startMockLLM() {
41
+ const queue = [];
42
+ const requests = [];
43
+
44
+ const server = http.createServer((req, res) => {
45
+ let body = '';
46
+ req.setEncoding('utf8');
47
+ req.on('data', (c) => { body += c; });
48
+ req.on('end', () => {
49
+ requests.push({ url: req.url, body, headers: req.headers });
50
+ const spec = queue.shift();
51
+ if (!spec) {
52
+ res.writeHead(500, { 'Content-Type': 'application/json' });
53
+ res.end('{"error":"mock-llm: response queue empty"}');
54
+ return;
55
+ }
56
+ serve(res, spec);
57
+ });
58
+ });
59
+
60
+ function serve(res, spec) {
61
+ const status = spec.status || 200;
62
+ const isSse = status === 200;
63
+ res.writeHead(status, {
64
+ 'Content-Type': isSse ? 'text/event-stream' : 'application/json',
65
+ ...(spec.headers || {}),
66
+ });
67
+ if (!isSse) {
68
+ res.end(spec.body != null ? spec.body : '{}');
69
+ return;
70
+ }
71
+ const chunks = spec.chunks || [];
72
+ const gap = spec.gapMs == null ? 2 : spec.gapMs;
73
+ let i = 0;
74
+ const next = () => {
75
+ if (res.writableEnded) return;
76
+ if (i >= chunks.length) { res.end(); return; }
77
+ res.write(chunks[i++]);
78
+ if (gap > 0) setTimeout(next, gap); else next();
79
+ };
80
+ next();
81
+ }
82
+
83
+ return new Promise((resolve) => {
84
+ server.listen(0, '127.0.0.1', () => {
85
+ const { port } = server.address();
86
+ resolve({
87
+ base: `http://127.0.0.1:${port}`,
88
+ port,
89
+ requests,
90
+
91
+ // Enqueue a streamed assistant message (string or string[] of deltas).
92
+ replyWith(content, opts = {}) {
93
+ queue.push({ status: 200, chunks: contentChunks(content, opts), gapMs: opts.gapMs });
94
+ return this;
95
+ },
96
+ // Enqueue a native tool_calls response.
97
+ replyWithToolCall(name, args, opts = {}) {
98
+ queue.push({ status: 200, chunks: toolCallChunks(name, args, opts), gapMs: opts.gapMs });
99
+ return this;
100
+ },
101
+ // Enqueue raw SSE chunks (full control).
102
+ streamChunks(chunks, opts = {}) {
103
+ queue.push({ status: 200, chunks, gapMs: opts.gapMs });
104
+ return this;
105
+ },
106
+ // Enqueue an error (non-200) response.
107
+ failWith(status, { body, headers } = {}) {
108
+ queue.push({ status, body, headers });
109
+ return this;
110
+ },
111
+
112
+ pending() { return queue.length; },
113
+ requestCount() { return requests.length; },
114
+ close() { return new Promise((r) => server.close(r)); },
115
+ });
116
+ });
117
+ });
118
+ }
119
+
120
+ module.exports = { startMockLLM, contentChunks, toolCallChunks };