@semalt-ai/code 1.8.5 → 1.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -1
- package/.github/workflows/ci.yml +69 -0
- package/ARCHITECTURE.md +6 -95
- package/CLAUDE.md +196 -316
- package/README.md +148 -4
- package/docs/ARCHITECTURE.md +1321 -0
- package/docs/CONFIG.md +340 -0
- package/docs/HISTORY.md +245 -0
- package/examples/embed.js +74 -0
- package/index.js +251 -10
- package/lib/agent.js +856 -120
- package/lib/api.js +239 -50
- package/lib/args.js +74 -2
- package/lib/audit.js +23 -1
- package/lib/background.js +584 -0
- package/lib/checkpoints.js +757 -0
- package/lib/commands/auth.js +94 -0
- package/lib/commands/chat-session.js +489 -0
- package/lib/commands/chat-slash.js +415 -0
- package/lib/commands/chat-turn.js +669 -0
- package/lib/commands/chat.js +407 -0
- package/lib/commands/custom.js +157 -0
- package/lib/commands/history-utils.js +66 -0
- package/lib/commands/index.js +268 -0
- package/lib/commands/mcp.js +113 -0
- package/lib/commands/oneshot.js +193 -0
- package/lib/commands/registry.js +269 -0
- package/lib/commands/tasks.js +89 -0
- package/lib/compact.js +87 -0
- package/lib/config.js +360 -11
- package/lib/constants.js +401 -3
- package/lib/deny.js +199 -0
- package/lib/doctor.js +160 -0
- package/lib/headless.js +202 -0
- package/lib/hooks.js +286 -0
- package/lib/images.js +270 -0
- package/lib/internals.js +49 -0
- package/lib/mcp/boundary.js +131 -0
- package/lib/mcp/client.js +270 -0
- package/lib/mcp/oauth.js +134 -0
- package/lib/memory.js +209 -0
- package/lib/metrics.js +37 -2
- package/lib/payload.js +54 -0
- package/lib/permission-rules.js +401 -0
- package/lib/permissions.js +123 -26
- package/lib/pricing.js +67 -0
- package/lib/proc.js +62 -0
- package/lib/prompts.js +99 -8
- package/lib/sandbox.js +568 -0
- package/lib/sdk.js +328 -0
- package/lib/secrets.js +211 -0
- package/lib/skills.js +223 -0
- package/lib/subagents.js +516 -0
- package/lib/tool_registry.js +2862 -0
- package/lib/tool_specs.js +263 -9
- package/lib/tools.js +352 -1039
- package/lib/ui/anim.js +86 -0
- package/lib/ui/ansi.js +17 -27
- package/lib/ui/chat-history.js +253 -71
- package/lib/ui/create-ui.js +67 -24
- package/lib/ui/diff.js +90 -25
- package/lib/ui/file-activity.js +236 -0
- package/lib/ui/format.js +195 -29
- package/lib/ui/input-field.js +21 -11
- package/lib/ui/md-stream.js +234 -0
- package/lib/ui/render-operation.js +113 -0
- package/lib/ui/select.js +1 -4
- package/lib/ui/status-bar.js +146 -36
- package/lib/ui/stream.js +20 -13
- package/lib/ui/theme.js +190 -44
- package/lib/ui/tool-operation.js +190 -0
- package/lib/ui/utils.js +9 -5
- package/lib/ui/web-activity.js +270 -0
- package/lib/ui/writer.js +159 -45
- package/lib/ui.js +1 -1
- package/lib/verify.js +229 -0
- package/lib/web-extract.js +213 -0
- package/lib/web-summarize.js +68 -0
- package/package.json +19 -4
- package/scripts/lint.js +57 -0
- package/test/agent-loop.test.js +389 -0
- package/test/anim-driver.test.js +153 -0
- package/test/ask-user-display.test.js +226 -0
- package/test/ask-user-gate.test.js +231 -0
- package/test/background.test.js +414 -0
- package/test/chat-history-nocolor.test.js +155 -0
- package/test/chat-relogin.test.js +207 -0
- package/test/chat.test.js +114 -0
- package/test/checkpoints-agent.test.js +181 -0
- package/test/checkpoints.test.js +650 -0
- package/test/command-registry.test.js +160 -0
- package/test/compact.test.js +116 -0
- package/test/completion-lazy.test.js +52 -0
- package/test/config-merge.test.js +324 -0
- package/test/config-quarantine.test.js +128 -0
- package/test/config-write-guard-allow-anywhere.test.js +56 -0
- package/test/config-write-guard-skip.test.js +46 -0
- package/test/config-write-guard.test.js +153 -0
- package/test/context-split.test.js +215 -0
- package/test/cost-doctor.test.js +142 -0
- package/test/custom-commands-chat.test.js +106 -0
- package/test/custom-commands.test.js +230 -0
- package/test/defer-detail-band.test.js +403 -0
- package/test/deny-windows.test.js +120 -0
- package/test/deny.test.js +83 -0
- package/test/detail-band-tab-flatten.test.js +242 -0
- package/test/download-allow-anywhere.test.js +66 -0
- package/test/download-confine.test.js +153 -0
- package/test/exec-diff.test.js +268 -0
- package/test/executors.test.js +599 -0
- package/test/extract-tool-calls.test.js +349 -0
- package/test/fetch-url-validation.test.js +219 -0
- package/test/file-activity.test.js +522 -0
- package/test/fixtures/tool-calls.js +57 -0
- package/test/fixtures/web-page.js +91 -0
- package/test/git-tools.test.js +384 -0
- package/test/grep-glob-serialize.test.js +242 -0
- package/test/grep-glob.test.js +268 -0
- package/test/grep-path-target.test.js +227 -0
- package/test/harness/README.md +57 -0
- package/test/harness/chat-harness.js +143 -0
- package/test/harness/memwarn-headless-child.js +65 -0
- package/test/harness/mock-llm.js +120 -0
- package/test/harness/mock-mcp-server.js +142 -0
- package/test/harness/sse-server.js +69 -0
- package/test/headless.test.js +348 -0
- package/test/history-utils.test.js +88 -0
- package/test/hooks-agent.test.js +238 -0
- package/test/hooks-verify-sandbox.test.js +232 -0
- package/test/hooks.test.js +216 -0
- package/test/http-get-user-agent.test.js +142 -0
- package/test/images-api.test.js +208 -0
- package/test/images.test.js +238 -0
- package/test/input-field-ctrl-o.test.js +37 -0
- package/test/live-height-physical.test.js +281 -0
- package/test/max-iterations.test.js +218 -0
- package/test/mcp-boundary.test.js +57 -0
- package/test/mcp-client.test.js +267 -0
- package/test/mcp-oauth.test.js +86 -0
- package/test/md-stream.test.js +183 -0
- package/test/memory-truncation-warning.test.js +222 -0
- package/test/memory.test.js +198 -0
- package/test/native-dispatch.test.js +409 -0
- package/test/native-live-narration.test.js +254 -0
- package/test/output-chokepoint.test.js +188 -0
- package/test/output-heredoc-leak.test.js +195 -0
- package/test/output-preview.test.js +245 -0
- package/test/path-guards.test.js +134 -0
- package/test/payload.test.js +99 -0
- package/test/permission-rules-agent.test.js +210 -0
- package/test/permission-rules.test.js +297 -0
- package/test/permissions.test.js +362 -0
- package/test/plan-mode.test.js +167 -0
- package/test/read-paginate.test.js +275 -0
- package/test/readonly-tools.test.js +177 -0
- package/test/render-operation.test.js +317 -0
- package/test/replay-descriptor-xml.test.js +216 -0
- package/test/replay-descriptor.test.js +189 -0
- package/test/replay-web-aggregate.test.js +291 -0
- package/test/replay-web-persist.test.js +241 -0
- package/test/result-cap.test.js +233 -0
- package/test/running-glyph-anim.test.js +111 -0
- package/test/sandbox-agent.test.js +147 -0
- package/test/sandbox-integration.test.js +216 -0
- package/test/sandbox.test.js +408 -0
- package/test/sdk.test.js +234 -0
- package/test/shell-output-cap.test.js +181 -0
- package/test/skills-chat.test.js +110 -0
- package/test/skills.test.js +295 -0
- package/test/smoke.test.js +68 -0
- package/test/status-bar-driver.test.js +93 -0
- package/test/status-bar-pause.test.js +164 -0
- package/test/status-bar-resync.test.js +188 -0
- package/test/stream-parser.test.js +171 -0
- package/test/subagents-agent.test.js +178 -0
- package/test/subagents.test.js +222 -0
- package/test/theme-palette.test.js +166 -0
- package/test/tool-registry.test.js +85 -0
- package/test/trim-budget.test.js +101 -0
- package/test/truncate-visible.test.js +78 -0
- package/test/verify-agent.test.js +317 -0
- package/test/verify.test.js +141 -0
- package/test/view-image.test.js +199 -0
- package/test/web-activity-ordering.test.js +203 -0
- package/test/web-activity.test.js +207 -0
- package/test/web-data-extraction-guidance.test.js +71 -0
- package/test/web-extract.test.js +185 -0
- package/test/web-fetch-agent.test.js +291 -0
- package/test/web-fetch-mode.test.js +193 -0
- package/test/web-search.test.js +380 -0
- package/lib/commands.js +0 -1438
- package/path +0 -1
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Agent-loop integration tests (Task 1.2). These drive the real runAgentLoop
|
|
4
|
+
// against the mock LLM harness, wired through the production config/api_base
|
|
5
|
+
// seam (no production code changes). Covers the six scenarios in the task:
|
|
6
|
+
// clean tool turn, multi-iteration termination, 429 retry honoring Retry-After,
|
|
7
|
+
// 400/413 self-healing, mid-stream abort, and retryable-vs-non-retryable errors.
|
|
8
|
+
|
|
9
|
+
const { test, before, after } = require('node:test');
|
|
10
|
+
const assert = require('node:assert');
|
|
11
|
+
const fs = require('fs');
|
|
12
|
+
const os = require('os');
|
|
13
|
+
const path = require('path');
|
|
14
|
+
|
|
15
|
+
const ui = require('../lib/ui');
|
|
16
|
+
const { createApiClient } = require('../lib/api');
|
|
17
|
+
const { createToolExecutor, extractToolCalls } = require('../lib/tools');
|
|
18
|
+
const { createPermissionManager } = require('../lib/permissions');
|
|
19
|
+
const { createAgentRunner } = require('../lib/agent');
|
|
20
|
+
const { startMockLLM } = require('./harness/mock-llm');
|
|
21
|
+
|
|
22
|
+
let prevKey;
|
|
23
|
+
before(() => { prevKey = process.env.SEMALT_API_KEY; process.env.SEMALT_API_KEY = 'test-key'; });
|
|
24
|
+
after(() => {
|
|
25
|
+
if (prevKey === undefined) delete process.env.SEMALT_API_KEY;
|
|
26
|
+
else process.env.SEMALT_API_KEY = prevKey;
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
// Wire a real agent runner whose chatStream points at `base`. skipPermissions
|
|
30
|
+
// auto-approves tool calls so the loop runs unattended; uiCallbacks are stubbed
|
|
31
|
+
// to noops so nothing prints during the test.
|
|
32
|
+
function buildRunner(base) {
|
|
33
|
+
const config = {
|
|
34
|
+
api_base: base, api_key: 'test-key', default_model: 'test-model',
|
|
35
|
+
temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
|
|
36
|
+
// This suite exercises the agent loop, not the OS sandbox (Task 4.4). Disable
|
|
37
|
+
// it so real `echo` test commands run unsandboxed regardless of whether the
|
|
38
|
+
// runner has bwrap/sandbox-exec; the sandbox has its own dedicated tests.
|
|
39
|
+
sandbox: { mode: 'off' },
|
|
40
|
+
};
|
|
41
|
+
let saved = null;
|
|
42
|
+
const getConfig = () => config;
|
|
43
|
+
const saveConfig = (c) => { saved = { ...c }; Object.assign(config, c); };
|
|
44
|
+
|
|
45
|
+
const api = createApiClient({ getConfig, saveConfig, ui });
|
|
46
|
+
const pm = createPermissionManager(ui, { skipPermissions: true });
|
|
47
|
+
pm.setUICallbacks({ onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {} });
|
|
48
|
+
const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, getConfig);
|
|
49
|
+
const runner = createAgentRunner({
|
|
50
|
+
chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
|
|
51
|
+
describePermission, permissionManager: pm, ui, getConfig,
|
|
52
|
+
});
|
|
53
|
+
return { runner, getSaved: () => saved, config };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function collector(extra = {}) {
|
|
57
|
+
const ev = { tokens: [], tools: [], errors: [], retries: [], assistants: [] };
|
|
58
|
+
const cb = {
|
|
59
|
+
onToken: (t) => ev.tokens.push(t),
|
|
60
|
+
onToolStart: () => {},
|
|
61
|
+
onToolEnd: (tag, result) => ev.tools.push({ tag, result }),
|
|
62
|
+
onError: (e) => ev.errors.push(e),
|
|
63
|
+
onRetry: (next, max) => ev.retries.push({ next, max }),
|
|
64
|
+
onAssistantMessage: (m) => ev.assistants.push(m),
|
|
65
|
+
...extra,
|
|
66
|
+
};
|
|
67
|
+
return { ev, cb };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
// 1. Clean single-tool-call turn
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
test('clean single-tool-call turn: executes the tool then ends on the final reply', async () => {
|
|
75
|
+
const mock = await startMockLLM();
|
|
76
|
+
mock.replyWith('<exec>echo HELLO_S1</exec>');
|
|
77
|
+
mock.replyWith('All done.');
|
|
78
|
+
try {
|
|
79
|
+
const { runner } = buildRunner(mock.base);
|
|
80
|
+
const { ev, cb } = collector();
|
|
81
|
+
const messages = [{ role: 'user', content: 'do it' }];
|
|
82
|
+
const { metrics } = await runner.runAgentLoop(messages, 'test-model', 10, null, { callbacks: cb });
|
|
83
|
+
|
|
84
|
+
assert.strictEqual(metrics.turns.length, 2, 'one tool turn + one final turn');
|
|
85
|
+
assert.strictEqual(mock.pending(), 0, 'both scripted responses consumed');
|
|
86
|
+
|
|
87
|
+
const toolResult = messages.find((m) => m.role === 'user' && /Tool execution results/.test(m.content));
|
|
88
|
+
assert.ok(toolResult, 'tool results fed back to the model');
|
|
89
|
+
assert.match(toolResult.content, /HELLO_S1/);
|
|
90
|
+
assert.match(toolResult.content, /Exit code: 0/);
|
|
91
|
+
|
|
92
|
+
assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'All done.'), 'final answer recorded');
|
|
93
|
+
assert.strictEqual(ev.tools.length, 1);
|
|
94
|
+
assert.strictEqual(ev.tools[0].tag, 'shell');
|
|
95
|
+
} finally {
|
|
96
|
+
await mock.close();
|
|
97
|
+
}
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
// ---------------------------------------------------------------------------
|
|
101
|
+
// 2. Multi-iteration loop terminating correctly
|
|
102
|
+
// ---------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
test('multi-iteration loop runs each tool turn then terminates on the no-tool reply', async () => {
|
|
105
|
+
const mock = await startMockLLM();
|
|
106
|
+
mock.replyWith('<exec>echo step1</exec>');
|
|
107
|
+
mock.replyWith('<exec>echo step2</exec>');
|
|
108
|
+
mock.replyWith('Finished.');
|
|
109
|
+
try {
|
|
110
|
+
const { runner } = buildRunner(mock.base);
|
|
111
|
+
const { ev, cb } = collector();
|
|
112
|
+
const messages = [{ role: 'user', content: 'multi' }];
|
|
113
|
+
const { metrics } = await runner.runAgentLoop(messages, 'test-model', 10, null, { callbacks: cb });
|
|
114
|
+
|
|
115
|
+
assert.strictEqual(metrics.turns.length, 3);
|
|
116
|
+
assert.strictEqual(ev.tools.length, 2, 'two tools executed across two iterations');
|
|
117
|
+
assert.strictEqual(mock.requestCount(), 3);
|
|
118
|
+
assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'Finished.'));
|
|
119
|
+
} finally {
|
|
120
|
+
await mock.close();
|
|
121
|
+
}
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
test('maxIterations caps the loop even when the model keeps emitting tools', async () => {
|
|
125
|
+
const mock = await startMockLLM();
|
|
126
|
+
mock.replyWith('<exec>echo a</exec>');
|
|
127
|
+
mock.replyWith('<exec>echo b</exec>');
|
|
128
|
+
mock.replyWith('<exec>echo c</exec>'); // would be a 3rd turn — must NOT be reached
|
|
129
|
+
try {
|
|
130
|
+
const { runner } = buildRunner(mock.base);
|
|
131
|
+
const { cb } = collector();
|
|
132
|
+
const messages = [{ role: 'user', content: 'loop' }];
|
|
133
|
+
const { metrics } = await runner.runAgentLoop(messages, 'test-model', 2, null, { callbacks: cb });
|
|
134
|
+
|
|
135
|
+
assert.strictEqual(metrics.turns.length, 2, 'stopped at the iteration cap');
|
|
136
|
+
assert.strictEqual(mock.requestCount(), 2, 'no third request was made');
|
|
137
|
+
assert.strictEqual(mock.pending(), 1, 'third scripted reply left unused');
|
|
138
|
+
} finally {
|
|
139
|
+
await mock.close();
|
|
140
|
+
}
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
// 3. Retry/backoff on 429 honoring Retry-After
|
|
145
|
+
// ---------------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
test('429 is retried, and Retry-After: 0 is honored (near-instant, not the 1s base backoff)', async () => {
|
|
148
|
+
const mock = await startMockLLM();
|
|
149
|
+
mock.failWith(429, { headers: { 'retry-after': '0' } });
|
|
150
|
+
mock.replyWith('recovered after 429');
|
|
151
|
+
try {
|
|
152
|
+
const { runner } = buildRunner(mock.base);
|
|
153
|
+
const { ev, cb } = collector();
|
|
154
|
+
const messages = [{ role: 'user', content: 'retry me' }];
|
|
155
|
+
|
|
156
|
+
const t0 = Date.now();
|
|
157
|
+
const { metrics } = await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
|
|
158
|
+
const elapsed = Date.now() - t0;
|
|
159
|
+
|
|
160
|
+
assert.deepStrictEqual(ev.retries, [{ next: 2, max: 3 }], 'one retry announced');
|
|
161
|
+
assert.ok(elapsed < 800, `retry honored Retry-After:0 (elapsed ${elapsed}ms, base backoff would be ~1000ms)`);
|
|
162
|
+
assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'recovered after 429'));
|
|
163
|
+
assert.strictEqual(metrics.turns.length, 1);
|
|
164
|
+
assert.strictEqual(mock.requestCount(), 2, 'failed attempt + successful retry');
|
|
165
|
+
assert.strictEqual(ev.errors.length, 0, 'transient error not surfaced after recovery');
|
|
166
|
+
} finally {
|
|
167
|
+
await mock.close();
|
|
168
|
+
}
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
// ---------------------------------------------------------------------------
|
|
172
|
+
// 4. 400/413 context-overflow self-healing (inside api.js, transparent to loop)
|
|
173
|
+
// ---------------------------------------------------------------------------
|
|
174
|
+
|
|
175
|
+
test('400 context-overflow self-heals: parses the window, persists it, retries, succeeds', async () => {
|
|
176
|
+
const mock = await startMockLLM();
|
|
177
|
+
mock.failWith(400, { body: JSON.stringify({ error: { message: 'This model context length is only 100 tokens, but the request used more.' } }) });
|
|
178
|
+
mock.replyWith('healed 400');
|
|
179
|
+
try {
|
|
180
|
+
const { runner, getSaved } = buildRunner(mock.base);
|
|
181
|
+
const { ev, cb } = collector();
|
|
182
|
+
const messages = [{ role: 'user', content: 'x'.repeat(2000) }];
|
|
183
|
+
await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
|
|
184
|
+
|
|
185
|
+
assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'healed 400'));
|
|
186
|
+
assert.strictEqual(mock.requestCount(), 2, 'overflow request + trimmed retry');
|
|
187
|
+
assert.strictEqual(ev.retries.length, 0, 'self-heal is not an agent-level retry');
|
|
188
|
+
const trimWarn = ev.errors.find((e) => e.isWarning && /Context trimmed \(overflow-400\)/.test(e.message));
|
|
189
|
+
assert.ok(trimWarn, 'a context-trim warning was surfaced');
|
|
190
|
+
assert.strictEqual(getSaved() && getSaved().context_length, 100, 'learned context window persisted');
|
|
191
|
+
} finally {
|
|
192
|
+
await mock.close();
|
|
193
|
+
}
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
test('413 self-heals: trims and retries without an agent-level retry', async () => {
|
|
197
|
+
const mock = await startMockLLM();
|
|
198
|
+
mock.failWith(413);
|
|
199
|
+
mock.replyWith('healed 413');
|
|
200
|
+
try {
|
|
201
|
+
const { runner } = buildRunner(mock.base);
|
|
202
|
+
const { ev, cb } = collector();
|
|
203
|
+
const messages = [{ role: 'user', content: 'y'.repeat(2000) }];
|
|
204
|
+
await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
|
|
205
|
+
|
|
206
|
+
assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'healed 413'));
|
|
207
|
+
assert.strictEqual(mock.requestCount(), 2);
|
|
208
|
+
assert.ok(ev.errors.some((e) => e.isWarning && /Context trimmed \(overflow-413\)/.test(e.message)));
|
|
209
|
+
} finally {
|
|
210
|
+
await mock.close();
|
|
211
|
+
}
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
// ---------------------------------------------------------------------------
|
|
215
|
+
// 5. Abort propagation mid-stream
|
|
216
|
+
// ---------------------------------------------------------------------------
|
|
217
|
+
|
|
218
|
+
test('aborting mid-stream stops the turn without recording an assistant message', async () => {
|
|
219
|
+
const mock = await startMockLLM();
|
|
220
|
+
// Slow, multi-delta stream so the abort flag flips while it is still open.
|
|
221
|
+
mock.replyWith(['part-a ', 'part-b ', 'part-c ', 'part-d ', 'part-e'], { gapMs: 60 });
|
|
222
|
+
try {
|
|
223
|
+
const { runner } = buildRunner(mock.base);
|
|
224
|
+
let aborted = false;
|
|
225
|
+
const { ev, cb } = collector({
|
|
226
|
+
onToken: () => { aborted = true; }, // flip on the first streamed token
|
|
227
|
+
});
|
|
228
|
+
const messages = [{ role: 'user', content: 'abort me' }];
|
|
229
|
+
|
|
230
|
+
const { metrics } = await runner.runAgentLoop(messages, 'test-model', 5, null, {
|
|
231
|
+
callbacks: cb,
|
|
232
|
+
getAbortFlag: () => aborted,
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
assert.strictEqual(messages.length, 1, 'no assistant/tool messages appended after abort');
|
|
236
|
+
assert.strictEqual(ev.assistants.length, 0, 'no final assistant message emitted');
|
|
237
|
+
assert.strictEqual(metrics.turns.length, 1, 'the turn started but did not complete');
|
|
238
|
+
} finally {
|
|
239
|
+
await mock.close();
|
|
240
|
+
}
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
// ---------------------------------------------------------------------------
|
|
244
|
+
// 6. Retryable vs non-retryable error distinction
|
|
245
|
+
// ---------------------------------------------------------------------------
|
|
246
|
+
|
|
247
|
+
test('a non-retryable error (401) breaks immediately with no retry', async () => {
|
|
248
|
+
const mock = await startMockLLM();
|
|
249
|
+
mock.failWith(401, { body: JSON.stringify({ error: 'unauthorized' }) });
|
|
250
|
+
try {
|
|
251
|
+
const { runner } = buildRunner(mock.base);
|
|
252
|
+
const { ev, cb } = collector();
|
|
253
|
+
const messages = [{ role: 'user', content: 'nope' }];
|
|
254
|
+
await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
|
|
255
|
+
|
|
256
|
+
assert.strictEqual(ev.retries.length, 0, '401 is not retried');
|
|
257
|
+
assert.strictEqual(mock.requestCount(), 1, 'only one request made');
|
|
258
|
+
const surfaced = ev.errors.find((e) => e.statusCode === 401);
|
|
259
|
+
assert.ok(surfaced, 'the 401 error was surfaced to the caller');
|
|
260
|
+
} finally {
|
|
261
|
+
await mock.close();
|
|
262
|
+
}
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
test('a retryable error (429) is retried up to the limit, then surfaced', async () => {
|
|
266
|
+
const mock = await startMockLLM();
|
|
267
|
+
mock.failWith(429, { headers: { 'retry-after': '0' } });
|
|
268
|
+
mock.failWith(429, { headers: { 'retry-after': '0' } });
|
|
269
|
+
mock.failWith(429, { headers: { 'retry-after': '0' } });
|
|
270
|
+
try {
|
|
271
|
+
const { runner } = buildRunner(mock.base);
|
|
272
|
+
const { ev, cb } = collector();
|
|
273
|
+
const messages = [{ role: 'user', content: 'keep failing' }];
|
|
274
|
+
await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
|
|
275
|
+
|
|
276
|
+
assert.strictEqual(ev.retries.length, 2, 'two retries before exhausting MAX_RETRIES=3');
|
|
277
|
+
assert.deepStrictEqual(ev.retries, [{ next: 2, max: 3 }, { next: 3, max: 3 }]);
|
|
278
|
+
assert.strictEqual(mock.requestCount(), 3, 'three attempts total');
|
|
279
|
+
assert.ok(ev.errors.some((e) => e.statusCode === 429), 'final error surfaced after exhausting retries');
|
|
280
|
+
} finally {
|
|
281
|
+
await mock.close();
|
|
282
|
+
}
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
// ---------------------------------------------------------------------------
|
|
286
|
+
// 7. A BUILT-IN tool through BOTH dispatch paths (Task 3.3b)
|
|
287
|
+
//
|
|
288
|
+
// Task 3.3 fixed a latent bug: a native function-calling response with empty
|
|
289
|
+
// text content + structured tool_calls was mistaken for a dropped/empty
|
|
290
|
+
// response, so native dispatch silently broke (the `!reply && !hasNativeToolCalls`
|
|
291
|
+
// guard in lib/agent.js). That fix was regression-tested only via an MCP tool —
|
|
292
|
+
// not via a BUILT-IN tool, which is the most-used path and the one that was
|
|
293
|
+
// actually broken. These two tests lock the built-in path on both rails:
|
|
294
|
+
// (a) native function-calling (tool_calls, EMPTY text) — the regression, and
|
|
295
|
+
// (b) the XML tag path — the long-standing path,
|
|
296
|
+
// proving both flow through the SAME runAgentLoop end-to-end (the extraction-level
|
|
297
|
+
// equivalence Task 1.4 asserted, now asserted at loop level).
|
|
298
|
+
// `read_file` is used because it is read-only (auto-runs unattended), maps from
|
|
299
|
+
// both `{path}` (native) and `<read_file>…</read_file>` (XML), and returns
|
|
300
|
+
// deterministic content from a temp file.
|
|
301
|
+
// ---------------------------------------------------------------------------
|
|
302
|
+
|
|
303
|
+
function withTempFile(contents, fn) {
|
|
304
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-agentloop-'));
|
|
305
|
+
const file = path.join(dir, 'fixture.txt');
|
|
306
|
+
fs.writeFileSync(file, contents, 'utf8');
|
|
307
|
+
return Promise.resolve(fn(file)).finally(() => {
|
|
308
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
309
|
+
});
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
test('built-in tool via the NATIVE path: empty-text tool_calls response dispatches (regression locked)', async () => {
|
|
313
|
+
const MARKER = 'NATIVE_PATH_FILE_CONTENT_42';
|
|
314
|
+
await withTempFile(MARKER, async (file) => {
|
|
315
|
+
const mock = await startMockLLM();
|
|
316
|
+
// Native function-calling: the response carries tool_calls and NO text
|
|
317
|
+
// content. Pre-fix, the empty `content` made the loop treat this as an
|
|
318
|
+
// empty/dropped response and break before dispatching.
|
|
319
|
+
mock.replyWithToolCall('read_file', { path: file });
|
|
320
|
+
mock.replyWith('Read it.');
|
|
321
|
+
try {
|
|
322
|
+
const { runner } = buildRunner(mock.base);
|
|
323
|
+
const { ev, cb } = collector();
|
|
324
|
+
const messages = [{ role: 'user', content: 'read the file' }];
|
|
325
|
+
const { metrics } = await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
|
|
326
|
+
|
|
327
|
+
// The regression's symptom was a false "empty response" warning + an
|
|
328
|
+
// un-dispatched tool. Neither must happen.
|
|
329
|
+
assert.ok(
|
|
330
|
+
!ev.errors.some((e) => /empty response/i.test(e.message || '')),
|
|
331
|
+
'native tool_calls with empty text content is NOT mistaken for an empty response',
|
|
332
|
+
);
|
|
333
|
+
assert.strictEqual(ev.tools.length, 1, 'the built-in read tool was dispatched');
|
|
334
|
+
assert.strictEqual(ev.tools[0].tag, 'read');
|
|
335
|
+
|
|
336
|
+
// The assistant turn carried structured tool_calls with empty text…
|
|
337
|
+
const assistantWithCall = messages.find((m) => m.role === 'assistant' && Array.isArray(m.tool_calls));
|
|
338
|
+
assert.ok(assistantWithCall, 'assistant message recorded the native tool_calls');
|
|
339
|
+
assert.strictEqual(assistantWithCall.content, '', 'native tool-call turn has empty text content');
|
|
340
|
+
assert.strictEqual(assistantWithCall.tool_calls[0].function.name, 'read_file');
|
|
341
|
+
|
|
342
|
+
// …and the result came back on a role:'tool' message (native shape),
|
|
343
|
+
// carrying the real file content.
|
|
344
|
+
const toolMsg = messages.find((m) => m.role === 'tool');
|
|
345
|
+
assert.ok(toolMsg, 'native path appends a role:"tool" result message');
|
|
346
|
+
assert.match(toolMsg.content, new RegExp(MARKER), 'file content flowed back to the model');
|
|
347
|
+
assert.strictEqual(toolMsg.tool_call_id, assistantWithCall.tool_calls[0].id, 'result rooted to its tool_call id');
|
|
348
|
+
|
|
349
|
+
// The loop proceeded to the final non-tool reply — proof it did not stall.
|
|
350
|
+
assert.strictEqual(metrics.turns.length, 2, 'tool turn + final turn');
|
|
351
|
+
assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'Read it.'), 'final answer recorded');
|
|
352
|
+
assert.strictEqual(mock.pending(), 0, 'both scripted responses consumed');
|
|
353
|
+
} finally {
|
|
354
|
+
await mock.close();
|
|
355
|
+
}
|
|
356
|
+
});
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
test('built-in tool via the XML path: <read_file> tag dispatches the same tool through the loop', async () => {
|
|
360
|
+
const MARKER = 'XML_PATH_FILE_CONTENT_99';
|
|
361
|
+
await withTempFile(MARKER, async (file) => {
|
|
362
|
+
const mock = await startMockLLM();
|
|
363
|
+
// XML tag path: the response is plain assistant text containing the tag.
|
|
364
|
+
mock.replyWith(`<read_file>${file}</read_file>`);
|
|
365
|
+
mock.replyWith('Read it.');
|
|
366
|
+
try {
|
|
367
|
+
const { runner } = buildRunner(mock.base);
|
|
368
|
+
const { ev, cb } = collector();
|
|
369
|
+
const messages = [{ role: 'user', content: 'read the file' }];
|
|
370
|
+
const { metrics } = await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
|
|
371
|
+
|
|
372
|
+
assert.strictEqual(ev.tools.length, 1, 'the built-in read tool was dispatched');
|
|
373
|
+
assert.strictEqual(ev.tools[0].tag, 'read');
|
|
374
|
+
|
|
375
|
+
// XML results come back as a role:'user' "Tool execution results" message
|
|
376
|
+
// (not a role:'tool' message) — the long-standing XML shape.
|
|
377
|
+
const toolResult = messages.find((m) => m.role === 'user' && /Tool execution results/.test(m.content));
|
|
378
|
+
assert.ok(toolResult, 'XML path feeds results back as a user message');
|
|
379
|
+
assert.match(toolResult.content, new RegExp(MARKER), 'file content flowed back to the model');
|
|
380
|
+
assert.ok(!messages.some((m) => m.role === 'tool'), 'XML path does not use role:"tool" messages');
|
|
381
|
+
|
|
382
|
+
assert.strictEqual(metrics.turns.length, 2, 'tool turn + final turn');
|
|
383
|
+
assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'Read it.'), 'final answer recorded');
|
|
384
|
+
assert.strictEqual(mock.pending(), 0, 'both scripted responses consumed');
|
|
385
|
+
} finally {
|
|
386
|
+
await mock.close();
|
|
387
|
+
}
|
|
388
|
+
});
|
|
389
|
+
});
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Single animation driver (Output Refactor — Phase 3).
|
|
4
|
+
//
|
|
5
|
+
// THE CHANGE: the status bar used to own TWO independent setIntervals — a 1 Hz
|
|
6
|
+
// clock tick and a 100 ms spinner glyph cycle — that each repainted the whole
|
|
7
|
+
// live region without coordinating. Phase 3 replaces both with ONE driver
|
|
8
|
+
// (lib/ui/anim.js). The clock and spinner are now subscribers: one timer, one
|
|
9
|
+
// frame counter, and at most ONE coordinated repaint per tick.
|
|
10
|
+
//
|
|
11
|
+
// These tests drive the single timer via node:test mock timers and assert that
|
|
12
|
+
// (a) only one interval is ever created by a constructed status bar, (b)
|
|
13
|
+
// advancing it updates BOTH the clock field and the spinner glyph, and (c) a
|
|
14
|
+
// tick that fires more than one subscriber still produces exactly one repaint.
|
|
15
|
+
|
|
16
|
+
const { test, mock } = require('node:test');
|
|
17
|
+
const assert = require('node:assert');
|
|
18
|
+
|
|
19
|
+
const { AnimDriver, BASE_INTERVAL_MS, TICKS_PER_SECOND } = require('../lib/ui/anim');
|
|
20
|
+
const { FullStatusBar } = require('../lib/ui/status-bar');
|
|
21
|
+
|
|
22
|
+
const layout = { cols: 200 };
|
|
23
|
+
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// Exactly one interval is created — the clock and spinner share it.
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
test('a constructed status bar creates exactly one setInterval (one driver, not two)', () => {
|
|
29
|
+
mock.timers.enable({ apis: ['setInterval'] });
|
|
30
|
+
try {
|
|
31
|
+
let intervalsCreated = 0;
|
|
32
|
+
const realSetInterval = global.setInterval;
|
|
33
|
+
global.setInterval = (...args) => { intervalsCreated++; return realSetInterval(...args); };
|
|
34
|
+
let bar;
|
|
35
|
+
try {
|
|
36
|
+
bar = new FullStatusBar(layout, () => {});
|
|
37
|
+
// Entering an animating state must NOT create a second timer — the
|
|
38
|
+
// spinner is a subscriber to the one driver, not its own interval.
|
|
39
|
+
bar.update('tool', 'running');
|
|
40
|
+
} finally {
|
|
41
|
+
global.setInterval = realSetInterval;
|
|
42
|
+
}
|
|
43
|
+
assert.strictEqual(intervalsCreated, 1, 'only one interval for clock + spinner');
|
|
44
|
+
bar.destroy();
|
|
45
|
+
} finally {
|
|
46
|
+
mock.timers.reset();
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// Advancing the one driver updates BOTH the clock and the spinner glyph.
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
test('advancing the single driver updates both the clock and the spinner', () => {
|
|
55
|
+
mock.timers.enable({ apis: ['setInterval'] });
|
|
56
|
+
try {
|
|
57
|
+
let redraws = 0;
|
|
58
|
+
const bar = new FullStatusBar(layout, () => { redraws++; });
|
|
59
|
+
|
|
60
|
+
// Animating state → the spinner glyph cycles. Capture the rendered glyph
|
|
61
|
+
// across two base-interval ticks; it must change.
|
|
62
|
+
bar.update('thinking', 'Thinking');
|
|
63
|
+
const glyphAt = () => {
|
|
64
|
+
const line = bar.renderLine();
|
|
65
|
+
// First non-space visible char after stripping ANSI is the spinner glyph.
|
|
66
|
+
const stripped = line.replace(/\x1b\[[0-9;]*m/g, '');
|
|
67
|
+
return stripped.trimStart()[0];
|
|
68
|
+
};
|
|
69
|
+
const g0 = glyphAt();
|
|
70
|
+
mock.timers.tick(BASE_INTERVAL_MS);
|
|
71
|
+
const g1 = glyphAt();
|
|
72
|
+
assert.notStrictEqual(g0, g1, 'spinner glyph advances on a driver tick');
|
|
73
|
+
|
|
74
|
+
// And the clock still ticks once per second off the SAME driver. Over a
|
|
75
|
+
// full second the driver fires repaints (the clock subscriber gates on
|
|
76
|
+
// frame % TICKS_PER_SECOND); assert at least the spinner cadence redraws.
|
|
77
|
+
redraws = 0;
|
|
78
|
+
mock.timers.tick(1000);
|
|
79
|
+
assert.ok(redraws >= TICKS_PER_SECOND - 1, 'driver repaints at the spinner cadence while animating');
|
|
80
|
+
|
|
81
|
+
bar.destroy();
|
|
82
|
+
} finally {
|
|
83
|
+
mock.timers.reset();
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
// ---------------------------------------------------------------------------
|
|
88
|
+
// One coordinated repaint per tick — even when multiple subscribers fire.
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
test('a tick that fires multiple subscribers still yields exactly one repaint', () => {
|
|
92
|
+
// Pure driver-level check: two subscribers both request a repaint on the
|
|
93
|
+
// same frame; the driver coalesces them into a single _repaint call.
|
|
94
|
+
mock.timers.enable({ apis: ['setInterval'] });
|
|
95
|
+
try {
|
|
96
|
+
let repaints = 0;
|
|
97
|
+
const d = new AnimDriver();
|
|
98
|
+
d.onRepaint(() => { repaints++; });
|
|
99
|
+
d.subscribe(() => true); // always wants a repaint
|
|
100
|
+
d.subscribe(() => true); // also always wants a repaint
|
|
101
|
+
d.start();
|
|
102
|
+
mock.timers.tick(BASE_INTERVAL_MS);
|
|
103
|
+
assert.strictEqual(repaints, 1, 'two truthy subscribers → one coordinated repaint');
|
|
104
|
+
mock.timers.tick(BASE_INTERVAL_MS);
|
|
105
|
+
assert.strictEqual(repaints, 2, 'one repaint per subsequent tick too');
|
|
106
|
+
d.stop();
|
|
107
|
+
} finally {
|
|
108
|
+
mock.timers.reset();
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
// ---------------------------------------------------------------------------
|
|
113
|
+
// A tick where no subscriber wants a repaint produces none (idle clock gap).
|
|
114
|
+
// ---------------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
test('a tick with no truthy subscriber produces no repaint', () => {
|
|
117
|
+
mock.timers.enable({ apis: ['setInterval'] });
|
|
118
|
+
try {
|
|
119
|
+
let repaints = 0;
|
|
120
|
+
const d = new AnimDriver();
|
|
121
|
+
d.onRepaint(() => { repaints++; });
|
|
122
|
+
d.subscribe(() => false);
|
|
123
|
+
d.start();
|
|
124
|
+
mock.timers.tick(BASE_INTERVAL_MS * 5);
|
|
125
|
+
assert.strictEqual(repaints, 0, 'no repaint when nothing requests one');
|
|
126
|
+
d.stop();
|
|
127
|
+
} finally {
|
|
128
|
+
mock.timers.reset();
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
// ---------------------------------------------------------------------------
|
|
133
|
+
// start()/stop() are idempotent — no stacked timers (the 5404bd0 lesson).
|
|
134
|
+
// ---------------------------------------------------------------------------
|
|
135
|
+
|
|
136
|
+
test('AnimDriver start()/stop() are idempotent and never stack timers', () => {
|
|
137
|
+
mock.timers.enable({ apis: ['setInterval'] });
|
|
138
|
+
try {
|
|
139
|
+
let ticks = 0;
|
|
140
|
+
const d = new AnimDriver();
|
|
141
|
+
d.subscribe(() => { ticks++; return false; });
|
|
142
|
+
d.start(); d.start(); d.start(); // three starts → one timer
|
|
143
|
+
mock.timers.tick(BASE_INTERVAL_MS);
|
|
144
|
+
assert.strictEqual(ticks, 1, 'one tick per interval despite repeated start()');
|
|
145
|
+
d.stop(); d.stop(); // two stops → no error, fully stopped
|
|
146
|
+
ticks = 0;
|
|
147
|
+
mock.timers.tick(BASE_INTERVAL_MS * 5);
|
|
148
|
+
assert.strictEqual(ticks, 0, 'no ticks after stop()');
|
|
149
|
+
assert.strictEqual(d.isRunning(), false, 'isRunning() reflects stopped state');
|
|
150
|
+
} finally {
|
|
151
|
+
mock.timers.reset();
|
|
152
|
+
}
|
|
153
|
+
});
|