@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/ARCHITECTURE.md +6 -95
  4. package/CLAUDE.md +196 -316
  5. package/README.md +148 -4
  6. package/docs/ARCHITECTURE.md +1321 -0
  7. package/docs/CONFIG.md +340 -0
  8. package/docs/HISTORY.md +245 -0
  9. package/examples/embed.js +74 -0
  10. package/index.js +251 -10
  11. package/lib/agent.js +856 -120
  12. package/lib/api.js +239 -50
  13. package/lib/args.js +74 -2
  14. package/lib/audit.js +23 -1
  15. package/lib/background.js +584 -0
  16. package/lib/checkpoints.js +757 -0
  17. package/lib/commands/auth.js +94 -0
  18. package/lib/commands/chat-session.js +489 -0
  19. package/lib/commands/chat-slash.js +415 -0
  20. package/lib/commands/chat-turn.js +669 -0
  21. package/lib/commands/chat.js +407 -0
  22. package/lib/commands/custom.js +157 -0
  23. package/lib/commands/history-utils.js +66 -0
  24. package/lib/commands/index.js +268 -0
  25. package/lib/commands/mcp.js +113 -0
  26. package/lib/commands/oneshot.js +193 -0
  27. package/lib/commands/registry.js +269 -0
  28. package/lib/commands/tasks.js +89 -0
  29. package/lib/compact.js +87 -0
  30. package/lib/config.js +360 -11
  31. package/lib/constants.js +401 -3
  32. package/lib/deny.js +199 -0
  33. package/lib/doctor.js +160 -0
  34. package/lib/headless.js +202 -0
  35. package/lib/hooks.js +286 -0
  36. package/lib/images.js +270 -0
  37. package/lib/internals.js +49 -0
  38. package/lib/mcp/boundary.js +131 -0
  39. package/lib/mcp/client.js +270 -0
  40. package/lib/mcp/oauth.js +134 -0
  41. package/lib/memory.js +209 -0
  42. package/lib/metrics.js +37 -2
  43. package/lib/payload.js +54 -0
  44. package/lib/permission-rules.js +401 -0
  45. package/lib/permissions.js +123 -26
  46. package/lib/pricing.js +67 -0
  47. package/lib/proc.js +62 -0
  48. package/lib/prompts.js +99 -8
  49. package/lib/sandbox.js +568 -0
  50. package/lib/sdk.js +328 -0
  51. package/lib/secrets.js +211 -0
  52. package/lib/skills.js +223 -0
  53. package/lib/subagents.js +516 -0
  54. package/lib/tool_registry.js +2862 -0
  55. package/lib/tool_specs.js +263 -9
  56. package/lib/tools.js +352 -1039
  57. package/lib/ui/anim.js +86 -0
  58. package/lib/ui/ansi.js +17 -27
  59. package/lib/ui/chat-history.js +253 -71
  60. package/lib/ui/create-ui.js +67 -24
  61. package/lib/ui/diff.js +90 -25
  62. package/lib/ui/file-activity.js +236 -0
  63. package/lib/ui/format.js +195 -29
  64. package/lib/ui/input-field.js +21 -11
  65. package/lib/ui/md-stream.js +234 -0
  66. package/lib/ui/render-operation.js +113 -0
  67. package/lib/ui/select.js +1 -4
  68. package/lib/ui/status-bar.js +146 -36
  69. package/lib/ui/stream.js +20 -13
  70. package/lib/ui/theme.js +190 -44
  71. package/lib/ui/tool-operation.js +190 -0
  72. package/lib/ui/utils.js +9 -5
  73. package/lib/ui/web-activity.js +270 -0
  74. package/lib/ui/writer.js +159 -45
  75. package/lib/ui.js +1 -1
  76. package/lib/verify.js +229 -0
  77. package/lib/web-extract.js +213 -0
  78. package/lib/web-summarize.js +68 -0
  79. package/package.json +19 -4
  80. package/scripts/lint.js +57 -0
  81. package/test/agent-loop.test.js +389 -0
  82. package/test/anim-driver.test.js +153 -0
  83. package/test/ask-user-display.test.js +226 -0
  84. package/test/ask-user-gate.test.js +231 -0
  85. package/test/background.test.js +414 -0
  86. package/test/chat-history-nocolor.test.js +155 -0
  87. package/test/chat-relogin.test.js +207 -0
  88. package/test/chat.test.js +114 -0
  89. package/test/checkpoints-agent.test.js +181 -0
  90. package/test/checkpoints.test.js +650 -0
  91. package/test/command-registry.test.js +160 -0
  92. package/test/compact.test.js +116 -0
  93. package/test/completion-lazy.test.js +52 -0
  94. package/test/config-merge.test.js +324 -0
  95. package/test/config-quarantine.test.js +128 -0
  96. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  97. package/test/config-write-guard-skip.test.js +46 -0
  98. package/test/config-write-guard.test.js +153 -0
  99. package/test/context-split.test.js +215 -0
  100. package/test/cost-doctor.test.js +142 -0
  101. package/test/custom-commands-chat.test.js +106 -0
  102. package/test/custom-commands.test.js +230 -0
  103. package/test/defer-detail-band.test.js +403 -0
  104. package/test/deny-windows.test.js +120 -0
  105. package/test/deny.test.js +83 -0
  106. package/test/detail-band-tab-flatten.test.js +242 -0
  107. package/test/download-allow-anywhere.test.js +66 -0
  108. package/test/download-confine.test.js +153 -0
  109. package/test/exec-diff.test.js +268 -0
  110. package/test/executors.test.js +599 -0
  111. package/test/extract-tool-calls.test.js +349 -0
  112. package/test/fetch-url-validation.test.js +219 -0
  113. package/test/file-activity.test.js +522 -0
  114. package/test/fixtures/tool-calls.js +57 -0
  115. package/test/fixtures/web-page.js +91 -0
  116. package/test/git-tools.test.js +384 -0
  117. package/test/grep-glob-serialize.test.js +242 -0
  118. package/test/grep-glob.test.js +268 -0
  119. package/test/grep-path-target.test.js +227 -0
  120. package/test/harness/README.md +57 -0
  121. package/test/harness/chat-harness.js +143 -0
  122. package/test/harness/memwarn-headless-child.js +65 -0
  123. package/test/harness/mock-llm.js +120 -0
  124. package/test/harness/mock-mcp-server.js +142 -0
  125. package/test/harness/sse-server.js +69 -0
  126. package/test/headless.test.js +348 -0
  127. package/test/history-utils.test.js +88 -0
  128. package/test/hooks-agent.test.js +238 -0
  129. package/test/hooks-verify-sandbox.test.js +232 -0
  130. package/test/hooks.test.js +216 -0
  131. package/test/http-get-user-agent.test.js +142 -0
  132. package/test/images-api.test.js +208 -0
  133. package/test/images.test.js +238 -0
  134. package/test/input-field-ctrl-o.test.js +37 -0
  135. package/test/live-height-physical.test.js +281 -0
  136. package/test/max-iterations.test.js +218 -0
  137. package/test/mcp-boundary.test.js +57 -0
  138. package/test/mcp-client.test.js +267 -0
  139. package/test/mcp-oauth.test.js +86 -0
  140. package/test/md-stream.test.js +183 -0
  141. package/test/memory-truncation-warning.test.js +222 -0
  142. package/test/memory.test.js +198 -0
  143. package/test/native-dispatch.test.js +409 -0
  144. package/test/native-live-narration.test.js +254 -0
  145. package/test/output-chokepoint.test.js +188 -0
  146. package/test/output-heredoc-leak.test.js +195 -0
  147. package/test/output-preview.test.js +245 -0
  148. package/test/path-guards.test.js +134 -0
  149. package/test/payload.test.js +99 -0
  150. package/test/permission-rules-agent.test.js +210 -0
  151. package/test/permission-rules.test.js +297 -0
  152. package/test/permissions.test.js +362 -0
  153. package/test/plan-mode.test.js +167 -0
  154. package/test/read-paginate.test.js +275 -0
  155. package/test/readonly-tools.test.js +177 -0
  156. package/test/render-operation.test.js +317 -0
  157. package/test/replay-descriptor-xml.test.js +216 -0
  158. package/test/replay-descriptor.test.js +189 -0
  159. package/test/replay-web-aggregate.test.js +291 -0
  160. package/test/replay-web-persist.test.js +241 -0
  161. package/test/result-cap.test.js +233 -0
  162. package/test/running-glyph-anim.test.js +111 -0
  163. package/test/sandbox-agent.test.js +147 -0
  164. package/test/sandbox-integration.test.js +216 -0
  165. package/test/sandbox.test.js +408 -0
  166. package/test/sdk.test.js +234 -0
  167. package/test/shell-output-cap.test.js +181 -0
  168. package/test/skills-chat.test.js +110 -0
  169. package/test/skills.test.js +295 -0
  170. package/test/smoke.test.js +68 -0
  171. package/test/status-bar-driver.test.js +93 -0
  172. package/test/status-bar-pause.test.js +164 -0
  173. package/test/status-bar-resync.test.js +188 -0
  174. package/test/stream-parser.test.js +171 -0
  175. package/test/subagents-agent.test.js +178 -0
  176. package/test/subagents.test.js +222 -0
  177. package/test/theme-palette.test.js +166 -0
  178. package/test/tool-registry.test.js +85 -0
  179. package/test/trim-budget.test.js +101 -0
  180. package/test/truncate-visible.test.js +78 -0
  181. package/test/verify-agent.test.js +317 -0
  182. package/test/verify.test.js +141 -0
  183. package/test/view-image.test.js +199 -0
  184. package/test/web-activity-ordering.test.js +203 -0
  185. package/test/web-activity.test.js +207 -0
  186. package/test/web-data-extraction-guidance.test.js +71 -0
  187. package/test/web-extract.test.js +185 -0
  188. package/test/web-fetch-agent.test.js +291 -0
  189. package/test/web-fetch-mode.test.js +193 -0
  190. package/test/web-search.test.js +380 -0
  191. package/lib/commands.js +0 -1438
  192. package/path +0 -1
@@ -0,0 +1,389 @@
1
+ 'use strict';
2
+
3
+ // Agent-loop integration tests (Task 1.2). These drive the real runAgentLoop
4
+ // against the mock LLM harness, wired through the production config/api_base
5
+ // seam (no production code changes). Covers the six scenarios in the task:
6
+ // clean tool turn, multi-iteration termination, 429 retry honoring Retry-After,
7
+ // 400/413 self-healing, mid-stream abort, and retryable-vs-non-retryable errors.
8
+
9
+ const { test, before, after } = require('node:test');
10
+ const assert = require('node:assert');
11
+ const fs = require('fs');
12
+ const os = require('os');
13
+ const path = require('path');
14
+
15
+ const ui = require('../lib/ui');
16
+ const { createApiClient } = require('../lib/api');
17
+ const { createToolExecutor, extractToolCalls } = require('../lib/tools');
18
+ const { createPermissionManager } = require('../lib/permissions');
19
+ const { createAgentRunner } = require('../lib/agent');
20
+ const { startMockLLM } = require('./harness/mock-llm');
21
+
22
+ let prevKey;
23
+ before(() => { prevKey = process.env.SEMALT_API_KEY; process.env.SEMALT_API_KEY = 'test-key'; });
24
+ after(() => {
25
+ if (prevKey === undefined) delete process.env.SEMALT_API_KEY;
26
+ else process.env.SEMALT_API_KEY = prevKey;
27
+ });
28
+
29
+ // Wire a real agent runner whose chatStream points at `base`. skipPermissions
30
+ // auto-approves tool calls so the loop runs unattended; uiCallbacks are stubbed
31
+ // to noops so nothing prints during the test.
32
+ function buildRunner(base) {
33
+ const config = {
34
+ api_base: base, api_key: 'test-key', default_model: 'test-model',
35
+ temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
36
+ // This suite exercises the agent loop, not the OS sandbox (Task 4.4). Disable
37
+ // it so real `echo` test commands run unsandboxed regardless of whether the
38
+ // runner has bwrap/sandbox-exec; the sandbox has its own dedicated tests.
39
+ sandbox: { mode: 'off' },
40
+ };
41
+ let saved = null;
42
+ const getConfig = () => config;
43
+ const saveConfig = (c) => { saved = { ...c }; Object.assign(config, c); };
44
+
45
+ const api = createApiClient({ getConfig, saveConfig, ui });
46
+ const pm = createPermissionManager(ui, { skipPermissions: true });
47
+ pm.setUICallbacks({ onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {} });
48
+ const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, getConfig);
49
+ const runner = createAgentRunner({
50
+ chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
51
+ describePermission, permissionManager: pm, ui, getConfig,
52
+ });
53
+ return { runner, getSaved: () => saved, config };
54
+ }
55
+
56
+ function collector(extra = {}) {
57
+ const ev = { tokens: [], tools: [], errors: [], retries: [], assistants: [] };
58
+ const cb = {
59
+ onToken: (t) => ev.tokens.push(t),
60
+ onToolStart: () => {},
61
+ onToolEnd: (tag, result) => ev.tools.push({ tag, result }),
62
+ onError: (e) => ev.errors.push(e),
63
+ onRetry: (next, max) => ev.retries.push({ next, max }),
64
+ onAssistantMessage: (m) => ev.assistants.push(m),
65
+ ...extra,
66
+ };
67
+ return { ev, cb };
68
+ }
69
+
70
+ // ---------------------------------------------------------------------------
71
+ // 1. Clean single-tool-call turn
72
+ // ---------------------------------------------------------------------------
73
+
74
+ test('clean single-tool-call turn: executes the tool then ends on the final reply', async () => {
75
+ const mock = await startMockLLM();
76
+ mock.replyWith('<exec>echo HELLO_S1</exec>');
77
+ mock.replyWith('All done.');
78
+ try {
79
+ const { runner } = buildRunner(mock.base);
80
+ const { ev, cb } = collector();
81
+ const messages = [{ role: 'user', content: 'do it' }];
82
+ const { metrics } = await runner.runAgentLoop(messages, 'test-model', 10, null, { callbacks: cb });
83
+
84
+ assert.strictEqual(metrics.turns.length, 2, 'one tool turn + one final turn');
85
+ assert.strictEqual(mock.pending(), 0, 'both scripted responses consumed');
86
+
87
+ const toolResult = messages.find((m) => m.role === 'user' && /Tool execution results/.test(m.content));
88
+ assert.ok(toolResult, 'tool results fed back to the model');
89
+ assert.match(toolResult.content, /HELLO_S1/);
90
+ assert.match(toolResult.content, /Exit code: 0/);
91
+
92
+ assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'All done.'), 'final answer recorded');
93
+ assert.strictEqual(ev.tools.length, 1);
94
+ assert.strictEqual(ev.tools[0].tag, 'shell');
95
+ } finally {
96
+ await mock.close();
97
+ }
98
+ });
99
+
100
+ // ---------------------------------------------------------------------------
101
+ // 2. Multi-iteration loop terminating correctly
102
+ // ---------------------------------------------------------------------------
103
+
104
+ test('multi-iteration loop runs each tool turn then terminates on the no-tool reply', async () => {
105
+ const mock = await startMockLLM();
106
+ mock.replyWith('<exec>echo step1</exec>');
107
+ mock.replyWith('<exec>echo step2</exec>');
108
+ mock.replyWith('Finished.');
109
+ try {
110
+ const { runner } = buildRunner(mock.base);
111
+ const { ev, cb } = collector();
112
+ const messages = [{ role: 'user', content: 'multi' }];
113
+ const { metrics } = await runner.runAgentLoop(messages, 'test-model', 10, null, { callbacks: cb });
114
+
115
+ assert.strictEqual(metrics.turns.length, 3);
116
+ assert.strictEqual(ev.tools.length, 2, 'two tools executed across two iterations');
117
+ assert.strictEqual(mock.requestCount(), 3);
118
+ assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'Finished.'));
119
+ } finally {
120
+ await mock.close();
121
+ }
122
+ });
123
+
124
+ test('maxIterations caps the loop even when the model keeps emitting tools', async () => {
125
+ const mock = await startMockLLM();
126
+ mock.replyWith('<exec>echo a</exec>');
127
+ mock.replyWith('<exec>echo b</exec>');
128
+ mock.replyWith('<exec>echo c</exec>'); // would be a 3rd turn — must NOT be reached
129
+ try {
130
+ const { runner } = buildRunner(mock.base);
131
+ const { cb } = collector();
132
+ const messages = [{ role: 'user', content: 'loop' }];
133
+ const { metrics } = await runner.runAgentLoop(messages, 'test-model', 2, null, { callbacks: cb });
134
+
135
+ assert.strictEqual(metrics.turns.length, 2, 'stopped at the iteration cap');
136
+ assert.strictEqual(mock.requestCount(), 2, 'no third request was made');
137
+ assert.strictEqual(mock.pending(), 1, 'third scripted reply left unused');
138
+ } finally {
139
+ await mock.close();
140
+ }
141
+ });
142
+
143
+ // ---------------------------------------------------------------------------
144
+ // 3. Retry/backoff on 429 honoring Retry-After
145
+ // ---------------------------------------------------------------------------
146
+
147
+ test('429 is retried, and Retry-After: 0 is honored (near-instant, not the 1s base backoff)', async () => {
148
+ const mock = await startMockLLM();
149
+ mock.failWith(429, { headers: { 'retry-after': '0' } });
150
+ mock.replyWith('recovered after 429');
151
+ try {
152
+ const { runner } = buildRunner(mock.base);
153
+ const { ev, cb } = collector();
154
+ const messages = [{ role: 'user', content: 'retry me' }];
155
+
156
+ const t0 = Date.now();
157
+ const { metrics } = await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
158
+ const elapsed = Date.now() - t0;
159
+
160
+ assert.deepStrictEqual(ev.retries, [{ next: 2, max: 3 }], 'one retry announced');
161
+ assert.ok(elapsed < 800, `retry honored Retry-After:0 (elapsed ${elapsed}ms, base backoff would be ~1000ms)`);
162
+ assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'recovered after 429'));
163
+ assert.strictEqual(metrics.turns.length, 1);
164
+ assert.strictEqual(mock.requestCount(), 2, 'failed attempt + successful retry');
165
+ assert.strictEqual(ev.errors.length, 0, 'transient error not surfaced after recovery');
166
+ } finally {
167
+ await mock.close();
168
+ }
169
+ });
170
+
171
+ // ---------------------------------------------------------------------------
172
+ // 4. 400/413 context-overflow self-healing (inside api.js, transparent to loop)
173
+ // ---------------------------------------------------------------------------
174
+
175
+ test('400 context-overflow self-heals: parses the window, persists it, retries, succeeds', async () => {
176
+ const mock = await startMockLLM();
177
+ mock.failWith(400, { body: JSON.stringify({ error: { message: 'This model context length is only 100 tokens, but the request used more.' } }) });
178
+ mock.replyWith('healed 400');
179
+ try {
180
+ const { runner, getSaved } = buildRunner(mock.base);
181
+ const { ev, cb } = collector();
182
+ const messages = [{ role: 'user', content: 'x'.repeat(2000) }];
183
+ await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
184
+
185
+ assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'healed 400'));
186
+ assert.strictEqual(mock.requestCount(), 2, 'overflow request + trimmed retry');
187
+ assert.strictEqual(ev.retries.length, 0, 'self-heal is not an agent-level retry');
188
+ const trimWarn = ev.errors.find((e) => e.isWarning && /Context trimmed \(overflow-400\)/.test(e.message));
189
+ assert.ok(trimWarn, 'a context-trim warning was surfaced');
190
+ assert.strictEqual(getSaved() && getSaved().context_length, 100, 'learned context window persisted');
191
+ } finally {
192
+ await mock.close();
193
+ }
194
+ });
195
+
196
+ test('413 self-heals: trims and retries without an agent-level retry', async () => {
197
+ const mock = await startMockLLM();
198
+ mock.failWith(413);
199
+ mock.replyWith('healed 413');
200
+ try {
201
+ const { runner } = buildRunner(mock.base);
202
+ const { ev, cb } = collector();
203
+ const messages = [{ role: 'user', content: 'y'.repeat(2000) }];
204
+ await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
205
+
206
+ assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'healed 413'));
207
+ assert.strictEqual(mock.requestCount(), 2);
208
+ assert.ok(ev.errors.some((e) => e.isWarning && /Context trimmed \(overflow-413\)/.test(e.message)));
209
+ } finally {
210
+ await mock.close();
211
+ }
212
+ });
213
+
214
+ // ---------------------------------------------------------------------------
215
+ // 5. Abort propagation mid-stream
216
+ // ---------------------------------------------------------------------------
217
+
218
+ test('aborting mid-stream stops the turn without recording an assistant message', async () => {
219
+ const mock = await startMockLLM();
220
+ // Slow, multi-delta stream so the abort flag flips while it is still open.
221
+ mock.replyWith(['part-a ', 'part-b ', 'part-c ', 'part-d ', 'part-e'], { gapMs: 60 });
222
+ try {
223
+ const { runner } = buildRunner(mock.base);
224
+ let aborted = false;
225
+ const { ev, cb } = collector({
226
+ onToken: () => { aborted = true; }, // flip on the first streamed token
227
+ });
228
+ const messages = [{ role: 'user', content: 'abort me' }];
229
+
230
+ const { metrics } = await runner.runAgentLoop(messages, 'test-model', 5, null, {
231
+ callbacks: cb,
232
+ getAbortFlag: () => aborted,
233
+ });
234
+
235
+ assert.strictEqual(messages.length, 1, 'no assistant/tool messages appended after abort');
236
+ assert.strictEqual(ev.assistants.length, 0, 'no final assistant message emitted');
237
+ assert.strictEqual(metrics.turns.length, 1, 'the turn started but did not complete');
238
+ } finally {
239
+ await mock.close();
240
+ }
241
+ });
242
+
243
+ // ---------------------------------------------------------------------------
244
+ // 6. Retryable vs non-retryable error distinction
245
+ // ---------------------------------------------------------------------------
246
+
247
+ test('a non-retryable error (401) breaks immediately with no retry', async () => {
248
+ const mock = await startMockLLM();
249
+ mock.failWith(401, { body: JSON.stringify({ error: 'unauthorized' }) });
250
+ try {
251
+ const { runner } = buildRunner(mock.base);
252
+ const { ev, cb } = collector();
253
+ const messages = [{ role: 'user', content: 'nope' }];
254
+ await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
255
+
256
+ assert.strictEqual(ev.retries.length, 0, '401 is not retried');
257
+ assert.strictEqual(mock.requestCount(), 1, 'only one request made');
258
+ const surfaced = ev.errors.find((e) => e.statusCode === 401);
259
+ assert.ok(surfaced, 'the 401 error was surfaced to the caller');
260
+ } finally {
261
+ await mock.close();
262
+ }
263
+ });
264
+
265
+ test('a retryable error (429) is retried up to the limit, then surfaced', async () => {
266
+ const mock = await startMockLLM();
267
+ mock.failWith(429, { headers: { 'retry-after': '0' } });
268
+ mock.failWith(429, { headers: { 'retry-after': '0' } });
269
+ mock.failWith(429, { headers: { 'retry-after': '0' } });
270
+ try {
271
+ const { runner } = buildRunner(mock.base);
272
+ const { ev, cb } = collector();
273
+ const messages = [{ role: 'user', content: 'keep failing' }];
274
+ await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
275
+
276
+ assert.strictEqual(ev.retries.length, 2, 'two retries before exhausting MAX_RETRIES=3');
277
+ assert.deepStrictEqual(ev.retries, [{ next: 2, max: 3 }, { next: 3, max: 3 }]);
278
+ assert.strictEqual(mock.requestCount(), 3, 'three attempts total');
279
+ assert.ok(ev.errors.some((e) => e.statusCode === 429), 'final error surfaced after exhausting retries');
280
+ } finally {
281
+ await mock.close();
282
+ }
283
+ });
284
+
285
+ // ---------------------------------------------------------------------------
286
+ // 7. A BUILT-IN tool through BOTH dispatch paths (Task 3.3b)
287
+ //
288
+ // Task 3.3 fixed a latent bug: a native function-calling response with empty
289
+ // text content + structured tool_calls was mistaken for a dropped/empty
290
+ // response, so native dispatch silently broke (the `!reply && !hasNativeToolCalls`
291
+ // guard in lib/agent.js). That fix was regression-tested only via an MCP tool —
292
+ // not via a BUILT-IN tool, which is the most-used path and the one that was
293
+ // actually broken. These two tests lock the built-in path on both rails:
294
+ // (a) native function-calling (tool_calls, EMPTY text) — the regression, and
295
+ // (b) the XML tag path — the long-standing path,
296
+ // proving both flow through the SAME runAgentLoop end-to-end (the extraction-level
297
+ // equivalence Task 1.4 asserted, now asserted at loop level).
298
+ // `read_file` is used because it is read-only (auto-runs unattended), maps from
299
+ // both `{path}` (native) and `<read_file>…</read_file>` (XML), and returns
300
+ // deterministic content from a temp file.
301
+ // ---------------------------------------------------------------------------
302
+
303
+ function withTempFile(contents, fn) {
304
+ const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-agentloop-'));
305
+ const file = path.join(dir, 'fixture.txt');
306
+ fs.writeFileSync(file, contents, 'utf8');
307
+ return Promise.resolve(fn(file)).finally(() => {
308
+ fs.rmSync(dir, { recursive: true, force: true });
309
+ });
310
+ }
311
+
312
+ test('built-in tool via the NATIVE path: empty-text tool_calls response dispatches (regression locked)', async () => {
313
+ const MARKER = 'NATIVE_PATH_FILE_CONTENT_42';
314
+ await withTempFile(MARKER, async (file) => {
315
+ const mock = await startMockLLM();
316
+ // Native function-calling: the response carries tool_calls and NO text
317
+ // content. Pre-fix, the empty `content` made the loop treat this as an
318
+ // empty/dropped response and break before dispatching.
319
+ mock.replyWithToolCall('read_file', { path: file });
320
+ mock.replyWith('Read it.');
321
+ try {
322
+ const { runner } = buildRunner(mock.base);
323
+ const { ev, cb } = collector();
324
+ const messages = [{ role: 'user', content: 'read the file' }];
325
+ const { metrics } = await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
326
+
327
+ // The regression's symptom was a false "empty response" warning + an
328
+ // un-dispatched tool. Neither must happen.
329
+ assert.ok(
330
+ !ev.errors.some((e) => /empty response/i.test(e.message || '')),
331
+ 'native tool_calls with empty text content is NOT mistaken for an empty response',
332
+ );
333
+ assert.strictEqual(ev.tools.length, 1, 'the built-in read tool was dispatched');
334
+ assert.strictEqual(ev.tools[0].tag, 'read');
335
+
336
+ // The assistant turn carried structured tool_calls with empty text…
337
+ const assistantWithCall = messages.find((m) => m.role === 'assistant' && Array.isArray(m.tool_calls));
338
+ assert.ok(assistantWithCall, 'assistant message recorded the native tool_calls');
339
+ assert.strictEqual(assistantWithCall.content, '', 'native tool-call turn has empty text content');
340
+ assert.strictEqual(assistantWithCall.tool_calls[0].function.name, 'read_file');
341
+
342
+ // …and the result came back on a role:'tool' message (native shape),
343
+ // carrying the real file content.
344
+ const toolMsg = messages.find((m) => m.role === 'tool');
345
+ assert.ok(toolMsg, 'native path appends a role:"tool" result message');
346
+ assert.match(toolMsg.content, new RegExp(MARKER), 'file content flowed back to the model');
347
+ assert.strictEqual(toolMsg.tool_call_id, assistantWithCall.tool_calls[0].id, 'result rooted to its tool_call id');
348
+
349
+ // The loop proceeded to the final non-tool reply — proof it did not stall.
350
+ assert.strictEqual(metrics.turns.length, 2, 'tool turn + final turn');
351
+ assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'Read it.'), 'final answer recorded');
352
+ assert.strictEqual(mock.pending(), 0, 'both scripted responses consumed');
353
+ } finally {
354
+ await mock.close();
355
+ }
356
+ });
357
+ });
358
+
359
+ test('built-in tool via the XML path: <read_file> tag dispatches the same tool through the loop', async () => {
360
+ const MARKER = 'XML_PATH_FILE_CONTENT_99';
361
+ await withTempFile(MARKER, async (file) => {
362
+ const mock = await startMockLLM();
363
+ // XML tag path: the response is plain assistant text containing the tag.
364
+ mock.replyWith(`<read_file>${file}</read_file>`);
365
+ mock.replyWith('Read it.');
366
+ try {
367
+ const { runner } = buildRunner(mock.base);
368
+ const { ev, cb } = collector();
369
+ const messages = [{ role: 'user', content: 'read the file' }];
370
+ const { metrics } = await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
371
+
372
+ assert.strictEqual(ev.tools.length, 1, 'the built-in read tool was dispatched');
373
+ assert.strictEqual(ev.tools[0].tag, 'read');
374
+
375
+ // XML results come back as a role:'user' "Tool execution results" message
376
+ // (not a role:'tool' message) — the long-standing XML shape.
377
+ const toolResult = messages.find((m) => m.role === 'user' && /Tool execution results/.test(m.content));
378
+ assert.ok(toolResult, 'XML path feeds results back as a user message');
379
+ assert.match(toolResult.content, new RegExp(MARKER), 'file content flowed back to the model');
380
+ assert.ok(!messages.some((m) => m.role === 'tool'), 'XML path does not use role:"tool" messages');
381
+
382
+ assert.strictEqual(metrics.turns.length, 2, 'tool turn + final turn');
383
+ assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'Read it.'), 'final answer recorded');
384
+ assert.strictEqual(mock.pending(), 0, 'both scripted responses consumed');
385
+ } finally {
386
+ await mock.close();
387
+ }
388
+ });
389
+ });
@@ -0,0 +1,153 @@
1
+ 'use strict';
2
+
3
+ // Single animation driver (Output Refactor — Phase 3).
4
+ //
5
+ // THE CHANGE: the status bar used to own TWO independent setIntervals — a 1 Hz
6
+ // clock tick and a 100 ms spinner glyph cycle — that each repainted the whole
7
+ // live region without coordinating. Phase 3 replaces both with ONE driver
8
+ // (lib/ui/anim.js). The clock and spinner are now subscribers: one timer, one
9
+ // frame counter, and at most ONE coordinated repaint per tick.
10
+ //
11
+ // These tests drive the single timer via node:test mock timers and assert that
12
+ // (a) only one interval is ever created by a constructed status bar, (b)
13
+ // advancing it updates BOTH the clock field and the spinner glyph, and (c) a
14
+ // tick that fires more than one subscriber still produces exactly one repaint.
15
+
16
+ const { test, mock } = require('node:test');
17
+ const assert = require('node:assert');
18
+
19
+ const { AnimDriver, BASE_INTERVAL_MS, TICKS_PER_SECOND } = require('../lib/ui/anim');
20
+ const { FullStatusBar } = require('../lib/ui/status-bar');
21
+
22
+ const layout = { cols: 200 };
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // Exactly one interval is created — the clock and spinner share it.
26
+ // ---------------------------------------------------------------------------
27
+
28
+ test('a constructed status bar creates exactly one setInterval (one driver, not two)', () => {
29
+ mock.timers.enable({ apis: ['setInterval'] });
30
+ try {
31
+ let intervalsCreated = 0;
32
+ const realSetInterval = global.setInterval;
33
+ global.setInterval = (...args) => { intervalsCreated++; return realSetInterval(...args); };
34
+ let bar;
35
+ try {
36
+ bar = new FullStatusBar(layout, () => {});
37
+ // Entering an animating state must NOT create a second timer — the
38
+ // spinner is a subscriber to the one driver, not its own interval.
39
+ bar.update('tool', 'running');
40
+ } finally {
41
+ global.setInterval = realSetInterval;
42
+ }
43
+ assert.strictEqual(intervalsCreated, 1, 'only one interval for clock + spinner');
44
+ bar.destroy();
45
+ } finally {
46
+ mock.timers.reset();
47
+ }
48
+ });
49
+
50
+ // ---------------------------------------------------------------------------
51
+ // Advancing the one driver updates BOTH the clock and the spinner glyph.
52
+ // ---------------------------------------------------------------------------
53
+
54
+ test('advancing the single driver updates both the clock and the spinner', () => {
55
+ mock.timers.enable({ apis: ['setInterval'] });
56
+ try {
57
+ let redraws = 0;
58
+ const bar = new FullStatusBar(layout, () => { redraws++; });
59
+
60
+ // Animating state → the spinner glyph cycles. Capture the rendered glyph
61
+ // across two base-interval ticks; it must change.
62
+ bar.update('thinking', 'Thinking');
63
+ const glyphAt = () => {
64
+ const line = bar.renderLine();
65
+ // First non-space visible char after stripping ANSI is the spinner glyph.
66
+ const stripped = line.replace(/\x1b\[[0-9;]*m/g, '');
67
+ return stripped.trimStart()[0];
68
+ };
69
+ const g0 = glyphAt();
70
+ mock.timers.tick(BASE_INTERVAL_MS);
71
+ const g1 = glyphAt();
72
+ assert.notStrictEqual(g0, g1, 'spinner glyph advances on a driver tick');
73
+
74
+ // And the clock still ticks once per second off the SAME driver. Over a
75
+ // full second the driver fires repaints (the clock subscriber gates on
76
+ // frame % TICKS_PER_SECOND); assert at least the spinner cadence redraws.
77
+ redraws = 0;
78
+ mock.timers.tick(1000);
79
+ assert.ok(redraws >= TICKS_PER_SECOND - 1, 'driver repaints at the spinner cadence while animating');
80
+
81
+ bar.destroy();
82
+ } finally {
83
+ mock.timers.reset();
84
+ }
85
+ });
86
+
87
+ // ---------------------------------------------------------------------------
88
+ // One coordinated repaint per tick — even when multiple subscribers fire.
89
+ // ---------------------------------------------------------------------------
90
+
91
+ test('a tick that fires multiple subscribers still yields exactly one repaint', () => {
92
+ // Pure driver-level check: two subscribers both request a repaint on the
93
+ // same frame; the driver coalesces them into a single _repaint call.
94
+ mock.timers.enable({ apis: ['setInterval'] });
95
+ try {
96
+ let repaints = 0;
97
+ const d = new AnimDriver();
98
+ d.onRepaint(() => { repaints++; });
99
+ d.subscribe(() => true); // always wants a repaint
100
+ d.subscribe(() => true); // also always wants a repaint
101
+ d.start();
102
+ mock.timers.tick(BASE_INTERVAL_MS);
103
+ assert.strictEqual(repaints, 1, 'two truthy subscribers → one coordinated repaint');
104
+ mock.timers.tick(BASE_INTERVAL_MS);
105
+ assert.strictEqual(repaints, 2, 'one repaint per subsequent tick too');
106
+ d.stop();
107
+ } finally {
108
+ mock.timers.reset();
109
+ }
110
+ });
111
+
112
+ // ---------------------------------------------------------------------------
113
+ // A tick where no subscriber wants a repaint produces none (idle clock gap).
114
+ // ---------------------------------------------------------------------------
115
+
116
+ test('a tick with no truthy subscriber produces no repaint', () => {
117
+ mock.timers.enable({ apis: ['setInterval'] });
118
+ try {
119
+ let repaints = 0;
120
+ const d = new AnimDriver();
121
+ d.onRepaint(() => { repaints++; });
122
+ d.subscribe(() => false);
123
+ d.start();
124
+ mock.timers.tick(BASE_INTERVAL_MS * 5);
125
+ assert.strictEqual(repaints, 0, 'no repaint when nothing requests one');
126
+ d.stop();
127
+ } finally {
128
+ mock.timers.reset();
129
+ }
130
+ });
131
+
132
+ // ---------------------------------------------------------------------------
133
+ // start()/stop() are idempotent — no stacked timers (the 5404bd0 lesson).
134
+ // ---------------------------------------------------------------------------
135
+
136
+ test('AnimDriver start()/stop() are idempotent and never stack timers', () => {
137
+ mock.timers.enable({ apis: ['setInterval'] });
138
+ try {
139
+ let ticks = 0;
140
+ const d = new AnimDriver();
141
+ d.subscribe(() => { ticks++; return false; });
142
+ d.start(); d.start(); d.start(); // three starts → one timer
143
+ mock.timers.tick(BASE_INTERVAL_MS);
144
+ assert.strictEqual(ticks, 1, 'one tick per interval despite repeated start()');
145
+ d.stop(); d.stop(); // two stops → no error, fully stopped
146
+ ticks = 0;
147
+ mock.timers.tick(BASE_INTERVAL_MS * 5);
148
+ assert.strictEqual(ticks, 0, 'no ticks after stop()');
149
+ assert.strictEqual(d.isRunning(), false, 'isRunning() reflects stopped state');
150
+ } finally {
151
+ mock.timers.reset();
152
+ }
153
+ });