@semalt-ai/code 1.8.5 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +6 -1
- package/.github/workflows/ci.yml +69 -0
- package/CLAUDE.md +1584 -26
- package/README.md +147 -3
- package/examples/embed.js +74 -0
- package/index.js +251 -10
- package/lib/agent.js +711 -104
- package/lib/api.js +213 -49
- package/lib/args.js +74 -2
- package/lib/audit.js +23 -1
- package/lib/background.js +584 -0
- package/lib/checkpoints.js +757 -0
- package/lib/commands/auth.js +94 -0
- package/lib/commands/chat-session.js +306 -0
- package/lib/commands/chat-slash.js +399 -0
- package/lib/commands/chat-turn.js +446 -0
- package/lib/commands/chat.js +403 -0
- package/lib/commands/custom.js +157 -0
- package/lib/commands/history-utils.js +66 -0
- package/lib/commands/index.js +268 -0
- package/lib/commands/mcp.js +113 -0
- package/lib/commands/oneshot.js +193 -0
- package/lib/commands/registry.js +269 -0
- package/lib/commands/tasks.js +89 -0
- package/lib/compact.js +87 -0
- package/lib/config.js +333 -11
- package/lib/constants.js +372 -3
- package/lib/deny.js +199 -0
- package/lib/doctor.js +160 -0
- package/lib/headless.js +167 -0
- package/lib/hooks.js +286 -0
- package/lib/images.js +264 -0
- package/lib/internals.js +49 -0
- package/lib/mcp/boundary.js +131 -0
- package/lib/mcp/client.js +270 -0
- package/lib/mcp/oauth.js +134 -0
- package/lib/memory.js +209 -0
- package/lib/metrics.js +37 -2
- package/lib/payload.js +54 -0
- package/lib/permission-rules.js +401 -0
- package/lib/permissions.js +100 -10
- package/lib/pricing.js +67 -0
- package/lib/proc.js +62 -0
- package/lib/prompts.js +84 -5
- package/lib/sandbox.js +568 -0
- package/lib/sdk.js +328 -0
- package/lib/secrets.js +211 -0
- package/lib/skills.js +223 -0
- package/lib/subagents.js +516 -0
- package/lib/tool_registry.js +2558 -0
- package/lib/tool_specs.js +222 -2
- package/lib/tools.js +272 -1020
- package/lib/ui/format.js +22 -1
- package/lib/ui/input-field.js +16 -7
- package/lib/ui/status-bar.js +79 -11
- package/lib/ui/theme.js +1 -0
- package/lib/ui/web-activity.js +218 -0
- package/lib/verify.js +229 -0
- package/lib/web-extract.js +213 -0
- package/lib/web-summarize.js +68 -0
- package/package.json +19 -4
- package/scripts/lint.js +57 -0
- package/test/agent-loop.test.js +389 -0
- package/test/background.test.js +414 -0
- package/test/chat.test.js +114 -0
- package/test/checkpoints-agent.test.js +181 -0
- package/test/checkpoints.test.js +650 -0
- package/test/command-registry.test.js +160 -0
- package/test/compact.test.js +116 -0
- package/test/completion-lazy.test.js +52 -0
- package/test/config-merge.test.js +324 -0
- package/test/config-quarantine.test.js +128 -0
- package/test/config-write-guard-allow-anywhere.test.js +56 -0
- package/test/config-write-guard-skip.test.js +46 -0
- package/test/config-write-guard.test.js +153 -0
- package/test/context-split.test.js +215 -0
- package/test/cost-doctor.test.js +142 -0
- package/test/custom-commands-chat.test.js +106 -0
- package/test/custom-commands.test.js +230 -0
- package/test/deny-windows.test.js +120 -0
- package/test/deny.test.js +83 -0
- package/test/download-allow-anywhere.test.js +66 -0
- package/test/download-confine.test.js +153 -0
- package/test/executors.test.js +362 -0
- package/test/extract-tool-calls.test.js +315 -0
- package/test/fetch-url-validation.test.js +219 -0
- package/test/fixtures/tool-calls.js +57 -0
- package/test/fixtures/web-page.js +91 -0
- package/test/git-tools.test.js +384 -0
- package/test/grep-glob-serialize.test.js +242 -0
- package/test/grep-glob.test.js +268 -0
- package/test/harness/README.md +57 -0
- package/test/harness/chat-harness.js +142 -0
- package/test/harness/memwarn-headless-child.js +65 -0
- package/test/harness/mock-llm.js +120 -0
- package/test/harness/mock-mcp-server.js +142 -0
- package/test/harness/sse-server.js +69 -0
- package/test/headless.test.js +203 -0
- package/test/history-utils.test.js +88 -0
- package/test/hooks-agent.test.js +238 -0
- package/test/hooks-verify-sandbox.test.js +232 -0
- package/test/hooks.test.js +216 -0
- package/test/http-get-user-agent.test.js +142 -0
- package/test/images-api.test.js +208 -0
- package/test/images.test.js +238 -0
- package/test/max-iterations.test.js +216 -0
- package/test/mcp-boundary.test.js +57 -0
- package/test/mcp-client.test.js +267 -0
- package/test/mcp-oauth.test.js +86 -0
- package/test/memory-truncation-warning.test.js +222 -0
- package/test/memory.test.js +198 -0
- package/test/native-dispatch.test.js +356 -0
- package/test/output-chokepoint.test.js +188 -0
- package/test/path-guards.test.js +134 -0
- package/test/payload.test.js +99 -0
- package/test/permission-rules-agent.test.js +210 -0
- package/test/permission-rules.test.js +297 -0
- package/test/permissions.test.js +163 -0
- package/test/plan-mode.test.js +167 -0
- package/test/read-paginate.test.js +275 -0
- package/test/readonly-tools.test.js +177 -0
- package/test/result-cap.test.js +233 -0
- package/test/sandbox-agent.test.js +147 -0
- package/test/sandbox-integration.test.js +216 -0
- package/test/sandbox.test.js +408 -0
- package/test/sdk.test.js +234 -0
- package/test/shell-output-cap.test.js +181 -0
- package/test/skills-chat.test.js +110 -0
- package/test/skills.test.js +295 -0
- package/test/smoke.test.js +68 -0
- package/test/status-bar-pause.test.js +164 -0
- package/test/stream-parser.test.js +147 -0
- package/test/subagents-agent.test.js +178 -0
- package/test/subagents.test.js +222 -0
- package/test/tool-registry.test.js +85 -0
- package/test/trim-budget.test.js +101 -0
- package/test/verify-agent.test.js +317 -0
- package/test/verify.test.js +141 -0
- package/test/web-activity-ordering.test.js +194 -0
- package/test/web-activity.test.js +207 -0
- package/test/web-data-extraction-guidance.test.js +71 -0
- package/test/web-extract.test.js +185 -0
- package/test/web-fetch-agent.test.js +291 -0
- package/test/web-fetch-mode.test.js +193 -0
- package/test/web-search.test.js +380 -0
- package/lib/commands.js +0 -1438
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Unit tests for subagents (Task 3.6) — the pure / injectable surface:
|
|
4
|
+
// * agent-definition discovery + frontmatter (name / model / tools / prompt)
|
|
5
|
+
// * allowed-tool resolution (constrains tools; never includes spawn_agent)
|
|
6
|
+
// * resolveSpec applies a named definition's model / prompt / tools
|
|
7
|
+
// * bounded-concurrency pool (injected runChild records peak concurrency)
|
|
8
|
+
// * spawn single vs. parallel `tasks`
|
|
9
|
+
// * the spawn_agent dynamic-tool entry (permission gate, fromParams, parseXml)
|
|
10
|
+
//
|
|
11
|
+
// The real isolated-child-loop behavior is covered by subagents-agent.test.js.
|
|
12
|
+
|
|
13
|
+
const { test } = require('node:test');
|
|
14
|
+
const assert = require('node:assert');
|
|
15
|
+
const fs = require('fs');
|
|
16
|
+
const os = require('os');
|
|
17
|
+
const path = require('path');
|
|
18
|
+
|
|
19
|
+
const {
|
|
20
|
+
parseAgentFrontmatter,
|
|
21
|
+
discoverAgentDefs,
|
|
22
|
+
loadAgentDefsFromDir,
|
|
23
|
+
resolveAllowedActions,
|
|
24
|
+
createSubagentManager,
|
|
25
|
+
buildSpawnAgentEntry,
|
|
26
|
+
SPAWN_AGENT_TOOL,
|
|
27
|
+
} = require('../lib/subagents');
|
|
28
|
+
|
|
29
|
+
function tmpdir() { return fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-subagents-')); }
|
|
30
|
+
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
// 1. Frontmatter parsing
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
test('parseAgentFrontmatter reads name/model/tools and returns the body as the system prompt', () => {
|
|
36
|
+
const text = [
|
|
37
|
+
'---',
|
|
38
|
+
'name: Code Reviewer',
|
|
39
|
+
'model: gpt-mini',
|
|
40
|
+
'tools: read_file, grep glob',
|
|
41
|
+
'description: reviews diffs',
|
|
42
|
+
'---',
|
|
43
|
+
'You are a meticulous reviewer.',
|
|
44
|
+
'Focus on correctness.',
|
|
45
|
+
].join('\n');
|
|
46
|
+
const { meta, body } = parseAgentFrontmatter(text);
|
|
47
|
+
assert.equal(meta.name, 'Code Reviewer');
|
|
48
|
+
assert.equal(meta.model, 'gpt-mini');
|
|
49
|
+
assert.deepEqual(meta.tools, ['read_file', 'grep', 'glob']);
|
|
50
|
+
assert.equal(meta.description, 'reviews diffs');
|
|
51
|
+
assert.match(body, /meticulous reviewer/);
|
|
52
|
+
assert.doesNotMatch(body, /---/);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
test('parseAgentFrontmatter with no frontmatter treats the whole text as the body', () => {
|
|
56
|
+
const { meta, body } = parseAgentFrontmatter('just a prompt');
|
|
57
|
+
assert.equal(meta.name, '');
|
|
58
|
+
assert.deepEqual(meta.tools, []);
|
|
59
|
+
assert.equal(body, 'just a prompt');
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
// 2. Definition discovery
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
test('discoverAgentDefs loads .semalt/agents/*.md and project overrides global', () => {
|
|
67
|
+
const home = tmpdir();
|
|
68
|
+
const repo = tmpdir();
|
|
69
|
+
fs.mkdirSync(path.join(repo, '.git'));
|
|
70
|
+
fs.mkdirSync(path.join(home, '.semalt-ai', 'agents'), { recursive: true });
|
|
71
|
+
fs.mkdirSync(path.join(repo, '.semalt', 'agents'), { recursive: true });
|
|
72
|
+
|
|
73
|
+
// A global-only def, plus a `reader` def that exists in both (project wins).
|
|
74
|
+
fs.writeFileSync(path.join(home, '.semalt-ai', 'agents', 'global-only.md'), '---\nname: global-only\n---\nglobal body');
|
|
75
|
+
fs.writeFileSync(path.join(home, '.semalt-ai', 'agents', 'reader.md'), '---\nname: reader\nmodel: global-model\n---\nGLOBAL reader');
|
|
76
|
+
fs.writeFileSync(path.join(repo, '.semalt', 'agents', 'reader.md'), '---\nname: reader\nmodel: project-model\ntools: read_file\n---\nPROJECT reader');
|
|
77
|
+
|
|
78
|
+
const defs = discoverAgentDefs({ home, cwd: repo });
|
|
79
|
+
const bySlug = Object.fromEntries(defs.map((d) => [d.slug, d]));
|
|
80
|
+
assert.ok(bySlug['global-only'], 'global-only def is discovered');
|
|
81
|
+
assert.ok(bySlug['reader'], 'reader def is discovered');
|
|
82
|
+
assert.equal(bySlug['reader'].model, 'project-model', 'project def wins over global');
|
|
83
|
+
assert.equal(bySlug['reader'].systemPrompt, 'PROJECT reader');
|
|
84
|
+
assert.deepEqual(bySlug['reader'].tools, ['read_file']);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
test('loadAgentDefsFromDir ignores non-.md files and unreadable dirs', () => {
|
|
88
|
+
const dir = tmpdir();
|
|
89
|
+
fs.writeFileSync(path.join(dir, 'a.md'), '---\nname: a\n---\nbody');
|
|
90
|
+
fs.writeFileSync(path.join(dir, 'notes.txt'), 'ignore me');
|
|
91
|
+
const defs = loadAgentDefsFromDir(dir, 'project');
|
|
92
|
+
assert.equal(defs.length, 1);
|
|
93
|
+
assert.equal(defs[0].name, 'a');
|
|
94
|
+
assert.deepEqual(loadAgentDefsFromDir(path.join(dir, 'nope'), 'project'), []);
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
// ---------------------------------------------------------------------------
|
|
98
|
+
// 3. Allowed-tool resolution (the no-escalation tool constraint)
|
|
99
|
+
// ---------------------------------------------------------------------------
|
|
100
|
+
|
|
101
|
+
test('resolveAllowedActions maps tags to canonical actions and never includes spawn_agent', () => {
|
|
102
|
+
const set = resolveAllowedActions(['read_file', 'grep', 'spawn_agent']);
|
|
103
|
+
assert.ok(set.has('read'), 'read_file → read action');
|
|
104
|
+
assert.ok(set.has('grep'));
|
|
105
|
+
assert.ok(!set.has(SPAWN_AGENT_TOOL), 'spawn_agent is always dropped (no recursion)');
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
test('resolveAllowedActions returns null for empty / wildcard (inherit-all, still permission-bounded)', () => {
|
|
109
|
+
assert.equal(resolveAllowedActions(null), null);
|
|
110
|
+
assert.equal(resolveAllowedActions([]), null);
|
|
111
|
+
assert.equal(resolveAllowedActions(['*']), null);
|
|
112
|
+
assert.equal(resolveAllowedActions(['all']), null);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
// ---------------------------------------------------------------------------
|
|
116
|
+
// 4. resolveSpec applies a named definition
|
|
117
|
+
// ---------------------------------------------------------------------------
|
|
118
|
+
|
|
119
|
+
test('resolveSpec applies a named agent definition (model, system prompt, tools) with inline overrides', () => {
|
|
120
|
+
const agentDefs = [{
|
|
121
|
+
name: 'reader', slug: 'reader', model: 'def-model',
|
|
122
|
+
tools: ['read_file'], description: '', systemPrompt: 'You read files.', source: 'project',
|
|
123
|
+
}];
|
|
124
|
+
const mgr = createSubagentManager({ agentDefs, getConfig: () => ({ default_model: 'fallback' }) });
|
|
125
|
+
|
|
126
|
+
const spec = mgr.resolveSpec({ agent: 'reader', prompt: 'read foo' });
|
|
127
|
+
assert.equal(spec.model, 'def-model');
|
|
128
|
+
assert.equal(spec.systemPrompt, 'You read files.');
|
|
129
|
+
assert.equal(spec.prompt, 'read foo');
|
|
130
|
+
assert.ok(spec.allowedActions.has('read'));
|
|
131
|
+
assert.ok(!spec.allowedActions.has('write'), 'a constrained agent cannot write');
|
|
132
|
+
|
|
133
|
+
// Inline model overrides the definition's model.
|
|
134
|
+
assert.equal(mgr.resolveSpec({ agent: 'reader', model: 'override', prompt: 'x' }).model, 'override');
|
|
135
|
+
// A bare string is treated as the prompt; unknown agent falls back to config model.
|
|
136
|
+
const bare = mgr.resolveSpec('just do it');
|
|
137
|
+
assert.equal(bare.prompt, 'just do it');
|
|
138
|
+
assert.equal(bare.model, 'fallback');
|
|
139
|
+
assert.equal(bare.allowedActions, null, 'no tool list → inherit-all');
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
// ---------------------------------------------------------------------------
|
|
143
|
+
// 5. Bounded-concurrency pool
|
|
144
|
+
// ---------------------------------------------------------------------------
|
|
145
|
+
|
|
146
|
+
test('runMany respects the concurrency bound (injected runChild records peak)', async () => {
|
|
147
|
+
let active = 0;
|
|
148
|
+
let peak = 0;
|
|
149
|
+
const runChild = (spec) => new Promise((resolve) => {
|
|
150
|
+
active++;
|
|
151
|
+
peak = Math.max(peak, active);
|
|
152
|
+
setTimeout(() => { active--; resolve(`done:${spec.prompt}`); }, 15);
|
|
153
|
+
});
|
|
154
|
+
const mgr = createSubagentManager({ runChild, maxConcurrency: 2, getConfig: () => ({ default_model: 'm' }) });
|
|
155
|
+
|
|
156
|
+
const specs = [1, 2, 3, 4, 5].map((i) => ({ prompt: `task${i}` }));
|
|
157
|
+
const results = await mgr.runMany(specs);
|
|
158
|
+
assert.equal(results.length, 5);
|
|
159
|
+
assert.ok(peak <= 2, `peak concurrency ${peak} must not exceed the bound of 2`);
|
|
160
|
+
assert.ok(peak >= 2, `pool should actually run in parallel up to the bound (peak=${peak})`);
|
|
161
|
+
assert.equal(results[0].output, 'done:task1');
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
test('spawn runs a single task, and `tasks` runs them in (bounded) parallel', async () => {
|
|
165
|
+
const runChild = (spec) => Promise.resolve(`R:${spec.prompt}`);
|
|
166
|
+
const mgr = createSubagentManager({ runChild, maxConcurrency: 3, getConfig: () => ({ default_model: 'm' }) });
|
|
167
|
+
|
|
168
|
+
const single = await mgr.spawn({ prompt: 'solo' });
|
|
169
|
+
assert.equal(single.subagent, true);
|
|
170
|
+
assert.equal(single.count, 1);
|
|
171
|
+
assert.equal(single.content, 'R:solo');
|
|
172
|
+
|
|
173
|
+
const many = await mgr.spawn({ tasks: [{ prompt: 'a' }, { prompt: 'b' }] });
|
|
174
|
+
assert.equal(many.count, 2);
|
|
175
|
+
assert.match(many.content, /Subagent 1/);
|
|
176
|
+
assert.match(many.content, /R:a/);
|
|
177
|
+
assert.match(many.content, /R:b/);
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
test('runOne captures a child error instead of throwing', async () => {
|
|
181
|
+
const runChild = () => { throw new Error('kaboom'); };
|
|
182
|
+
const mgr = createSubagentManager({ runChild, getConfig: () => ({ default_model: 'm' }) });
|
|
183
|
+
const res = await mgr.runOne({ prompt: 'x' });
|
|
184
|
+
assert.equal(res.error, 'kaboom');
|
|
185
|
+
assert.equal(res.output, '');
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
// ---------------------------------------------------------------------------
|
|
189
|
+
// 6. spawn_agent dynamic-tool entry
|
|
190
|
+
// ---------------------------------------------------------------------------
|
|
191
|
+
|
|
192
|
+
test('buildSpawnAgentEntry exposes a registry entry that REQUIRES approval by default', () => {
|
|
193
|
+
const mgr = createSubagentManager({ runChild: () => Promise.resolve('ok'), getConfig: () => ({}) });
|
|
194
|
+
const entry = buildSpawnAgentEntry(mgr);
|
|
195
|
+
assert.equal(entry.tool, SPAWN_AGENT_TOOL);
|
|
196
|
+
assert.ok(entry.spec && entry.spec.parameters, 'advertises a native function schema');
|
|
197
|
+
|
|
198
|
+
// permission() must return a non-null descriptor → it is never auto-allowed by
|
|
199
|
+
// an --allow-* tier (no privilege escalation by simply spawning).
|
|
200
|
+
const desc = entry.permission(null, [{ prompt: 'go' }]);
|
|
201
|
+
assert.ok(desc && desc.tag === SPAWN_AGENT_TOOL, 'gated, not read-only');
|
|
202
|
+
assert.equal(desc.actionType, 'agent');
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
test('spawn_agent fromParams + parseXml produce the [name, params] tuple', () => {
|
|
206
|
+
const mgr = createSubagentManager({ runChild: () => Promise.resolve('ok'), getConfig: () => ({}) });
|
|
207
|
+
const entry = buildSpawnAgentEntry(mgr);
|
|
208
|
+
|
|
209
|
+
assert.deepEqual(entry.fromParams({ prompt: 'p', agent: 'r' }), [SPAWN_AGENT_TOOL, { prompt: 'p', agent: 'r' }]);
|
|
210
|
+
|
|
211
|
+
// Plain-text body form with an agent attribute.
|
|
212
|
+
const xml = entry.parseXml('<spawn_agent agent="reader">summarize the repo</spawn_agent>');
|
|
213
|
+
assert.equal(xml.length, 1);
|
|
214
|
+
assert.equal(xml[0][0], SPAWN_AGENT_TOOL);
|
|
215
|
+
assert.equal(xml[0][1].prompt, 'summarize the repo');
|
|
216
|
+
assert.equal(xml[0][1].agent, 'reader');
|
|
217
|
+
|
|
218
|
+
// JSON body form.
|
|
219
|
+
const xmlJson = entry.parseXml('<spawn_agent>{"prompt":"do x","model":"m2"}</spawn_agent>');
|
|
220
|
+
assert.equal(xmlJson[0][1].prompt, 'do x');
|
|
221
|
+
assert.equal(xmlJson[0][1].model, 'm2');
|
|
222
|
+
});
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Tests for the runtime tool registry (Task 1.4). The exhaustive per-tag XML
|
|
4
|
+
// and native parsing behavior is already pinned by test/extract-tool-calls.test
|
|
5
|
+
// (which now runs through the registry); this file asserts the registry's own
|
|
6
|
+
// invariants: completeness vs TOOL_SPECS, and that the XML and native transports
|
|
7
|
+
// resolve to the SAME registry entry / tuple.
|
|
8
|
+
|
|
9
|
+
const { test } = require('node:test');
|
|
10
|
+
const assert = require('node:assert');
|
|
11
|
+
|
|
12
|
+
const { TOOL_REGISTRY, fromInvoke, registryToolNames } = require('../lib/tool_registry');
|
|
13
|
+
const { TOOL_SPECS } = require('../lib/tool_specs');
|
|
14
|
+
const { extractToolCalls } = require('../lib/tools');
|
|
15
|
+
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Completeness — the registry resolves exactly the callable (non-wrapper) specs.
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
test('registry resolves exactly the non-wrapper TOOL_SPECS', () => {
|
|
21
|
+
const callableSpecs = Object.entries(TOOL_SPECS).filter(([, v]) => !v.wrapper).map(([k]) => k).sort();
|
|
22
|
+
assert.deepStrictEqual(registryToolNames().slice().sort(), callableSpecs);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
test('every registry entry carries name, parse, native, execute, and permission (Task 1.4b)', () => {
|
|
26
|
+
for (const e of TOOL_REGISTRY) {
|
|
27
|
+
assert.ok(typeof e.tool === 'string' && e.tool, 'tool name present');
|
|
28
|
+
assert.strictEqual(typeof e.fromParams, 'function', `${e.tool} has fromParams`);
|
|
29
|
+
assert.strictEqual(typeof e.execute, 'function', `${e.tool} has execute`);
|
|
30
|
+
assert.strictEqual(typeof e.permission, 'function', `${e.tool} has permission`);
|
|
31
|
+
assert.ok(Array.isArray(e.specNames) && e.specNames.length > 0, `${e.tool} has specNames`);
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
test('specNames are globally unique (no two tools claim the same name)', () => {
|
|
36
|
+
const all = TOOL_REGISTRY.flatMap((e) => e.specNames);
|
|
37
|
+
assert.strictEqual(new Set(all).size, all.length);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
// XML and native paths resolve to the same tuple via the shared registry.
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
const EQUIVALENCE = [
|
|
45
|
+
{ xml: '<write_file path="a.txt">hi</write_file>', name: 'write_file', params: { path: 'a.txt', content: 'hi' }, tuple: ['write', 'a.txt', 'hi'] },
|
|
46
|
+
{ xml: '<create_file path="a.txt">hi</create_file>', name: 'create_file', params: { path: 'a.txt', content: 'hi' }, tuple: ['write', 'a.txt', 'hi'] },
|
|
47
|
+
{ xml: '<read_file path="a.txt"/>', name: 'read_file', params: { path: 'a.txt' }, tuple: ['read', 'a.txt', null, null, false] },
|
|
48
|
+
{ xml: '<append_file path="a.txt">x</append_file>', name: 'append_file', params: { path: 'a.txt', content: 'x' }, tuple: ['append', 'a.txt', 'x'] },
|
|
49
|
+
{ xml: '<exec>ls -la</exec>', name: 'exec', params: { command: 'ls -la' }, tuple: ['shell', 'ls -la'] },
|
|
50
|
+
{ xml: '<shell>ls -la</shell>', name: 'shell', params: { command: 'ls -la' }, tuple: ['shell', 'ls -la'] },
|
|
51
|
+
{ xml: '<move_file src="a" dst="b"/>', name: 'move_file', params: { src: 'a', dst: 'b' }, tuple: ['move_file', 'a', 'b'] },
|
|
52
|
+
{ xml: '<edit_file path="a.js" line="3">x = 1</edit_file>', name: 'edit_file', params: { path: 'a.js', line: 3, content: 'x = 1' }, tuple: ['edit_file', 'a.js', 3, 'x = 1'] },
|
|
53
|
+
{ xml: '<http_get url="http://x"/>', name: 'http_get', params: { url: 'http://x' }, tuple: ['http_get', 'http://x', {}] },
|
|
54
|
+
{ xml: '<list_memories/>', name: 'list_memories', params: {}, tuple: ['list_memories'] },
|
|
55
|
+
{ xml: '<system_info/>', name: 'system_info', params: {}, tuple: ['system_info'] },
|
|
56
|
+
];
|
|
57
|
+
|
|
58
|
+
for (const c of EQUIVALENCE) {
|
|
59
|
+
test(`XML and native resolve to the same tuple: ${c.name}`, () => {
|
|
60
|
+
const viaXml = extractToolCalls(c.xml);
|
|
61
|
+
assert.deepStrictEqual(viaXml, [c.tuple], 'XML path');
|
|
62
|
+
assert.deepStrictEqual(fromInvoke(c.name, c.params), c.tuple, 'native path');
|
|
63
|
+
assert.deepStrictEqual(viaXml[0], fromInvoke(c.name, c.params), 'both paths agree');
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
test('fromInvoke returns null for an unknown tool', () => {
|
|
68
|
+
assert.strictEqual(fromInvoke('frobnicate', { x: 1 }), null);
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
test('fromInvoke enforces required params (returns null when missing)', () => {
|
|
72
|
+
assert.strictEqual(fromInvoke('write_file', { content: 'no path' }), null);
|
|
73
|
+
assert.strictEqual(fromInvoke('move_file', { src: 'a' }), null);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
// QUIRK #1.1 preserved through the registry: attribute content is NOT trimmed.
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
test('QUIRK preserved: write_file attribute content is captured raw (un-trimmed)', () => {
|
|
81
|
+
assert.deepStrictEqual(
|
|
82
|
+
extractToolCalls('<write_file path="a.txt">\n spaced \n</write_file>'),
|
|
83
|
+
[['write', 'a.txt', '\n spaced \n']],
|
|
84
|
+
);
|
|
85
|
+
});
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Characterization tests for trimToTokenBudget (Task 1.1).
|
|
4
|
+
// Budgets are derived from the function's own chars/4 estimate so the number of
|
|
5
|
+
// dropped messages is deterministic rather than tuned by hand.
|
|
6
|
+
|
|
7
|
+
const { test } = require('node:test');
|
|
8
|
+
const assert = require('node:assert');
|
|
9
|
+
|
|
10
|
+
const { trimToTokenBudget } = require('../lib/api');
|
|
11
|
+
|
|
12
|
+
// Mirror of the internal estimate: floor(JSON.stringify(msgs).length / 4).
|
|
13
|
+
const est = (msgs) => Math.floor(JSON.stringify(msgs).length / 4);
|
|
14
|
+
|
|
15
|
+
const sys = { role: 'system', content: 'SYSTEM PROMPT' };
|
|
16
|
+
const task = { role: 'user', content: 'the original task' };
|
|
17
|
+
const mk = (tag, n) => ({ role: 'user', content: `${tag}:` + 'x'.repeat(n) });
|
|
18
|
+
|
|
19
|
+
test('under budget: messages returned unchanged', () => {
|
|
20
|
+
const msgs = [sys, task, mk('a', 100)];
|
|
21
|
+
const out = trimToTokenBudget(msgs, est(msgs) + 1000);
|
|
22
|
+
assert.deepStrictEqual(out, msgs);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
test('no non-system messages: returns a copy of the system messages only', () => {
|
|
26
|
+
const sys2 = { role: 'system', content: 'second system' };
|
|
27
|
+
const msgs = [sys, sys2];
|
|
28
|
+
const out = trimToTokenBudget(msgs, 1);
|
|
29
|
+
assert.deepStrictEqual(out, [sys, sys2]);
|
|
30
|
+
assert.notStrictEqual(out, msgs, 'returns a new array');
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
test('drops intermediate messages oldest-first, keeping system + pinned + newest', () => {
|
|
34
|
+
const f1 = mk('f1', 4000);
|
|
35
|
+
const f2 = mk('f2', 4000);
|
|
36
|
+
const f3 = mk('f3', 4000);
|
|
37
|
+
const msgs = [sys, task, f1, f2, f3];
|
|
38
|
+
// Budget that fits [sys, task, f3] but not [sys, task, f2, f3].
|
|
39
|
+
const budget = est([sys, task, f3]) + 50;
|
|
40
|
+
assert.ok(budget < est([sys, task, f2, f3]), 'precondition: budget forces drops');
|
|
41
|
+
|
|
42
|
+
const out = trimToTokenBudget(msgs, budget);
|
|
43
|
+
assert.deepStrictEqual(out, [sys, task, f3]);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test('pinned (first non-system) is never dropped even under heavy pressure', () => {
|
|
47
|
+
const big = mk('tail', 8000);
|
|
48
|
+
const msgs = [sys, task, big];
|
|
49
|
+
const out = trimToTokenBudget(msgs, est([sys, task]) + 10);
|
|
50
|
+
assert.strictEqual(out[0], sys);
|
|
51
|
+
assert.strictEqual(out[1], task);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
test('truncates the last remaining tail message when still over budget', () => {
|
|
55
|
+
const huge = mk('tail', 40000);
|
|
56
|
+
const msgs = [sys, task, huge];
|
|
57
|
+
// Down to one tail message, but estimate still exceeds budget AND there is
|
|
58
|
+
// positive room (budget*4 - other - 200 > 0) so truncation engages.
|
|
59
|
+
const budget = est([sys, task]) + 1500; // ~6000 chars of room for the tail
|
|
60
|
+
const out = trimToTokenBudget(msgs, budget);
|
|
61
|
+
|
|
62
|
+
assert.strictEqual(out.length, 3);
|
|
63
|
+
assert.strictEqual(out[0], sys);
|
|
64
|
+
assert.strictEqual(out[1], task);
|
|
65
|
+
assert.match(out[2].content, /^\[…content truncated to fit model limit…\]\n/);
|
|
66
|
+
assert.ok(out[2].content.length < huge.content.length, 'tail was shortened');
|
|
67
|
+
assert.ok(out[2].content.endsWith('x'.repeat(50)), 'keeps the END of the content');
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test('truncates the pinned message when there is no tail and it overflows', () => {
|
|
71
|
+
const hugePinned = mk('pinned', 40000);
|
|
72
|
+
const msgs = [sys, hugePinned];
|
|
73
|
+
const budget = est([sys]) + 1500;
|
|
74
|
+
const out = trimToTokenBudget(msgs, budget);
|
|
75
|
+
|
|
76
|
+
assert.strictEqual(out.length, 2);
|
|
77
|
+
assert.strictEqual(out[0], sys);
|
|
78
|
+
assert.match(out[1].content, /^\[…content truncated to fit model limit…\]\n/);
|
|
79
|
+
assert.ok(out[1].content.length < hugePinned.content.length);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
test('QUIRK: when there is no room (budget too small) the last message is left intact', () => {
|
|
83
|
+
// available = budget*4 - other - 200 <= 0, so the truncation branch is skipped
|
|
84
|
+
// and the (oversized) message is returned unchanged rather than emptied.
|
|
85
|
+
const huge = mk('tail', 40000);
|
|
86
|
+
const msgs = [sys, task, huge];
|
|
87
|
+
const out = trimToTokenBudget(msgs, 1); // 4 chars of budget
|
|
88
|
+
assert.deepStrictEqual(out, [sys, task, huge]);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
test('multiple system messages are all preserved and kept ahead of content', () => {
|
|
92
|
+
const sys2 = { role: 'system', content: 'second system rule' };
|
|
93
|
+
const f1 = mk('f1', 4000);
|
|
94
|
+
const f2 = mk('f2', 4000);
|
|
95
|
+
const msgs = [sys, sys2, task, f1, f2];
|
|
96
|
+
const out = trimToTokenBudget(msgs, est([sys, sys2, task, f2]) + 50);
|
|
97
|
+
assert.strictEqual(out[0], sys);
|
|
98
|
+
assert.strictEqual(out[1], sys2);
|
|
99
|
+
assert.strictEqual(out[2], task);
|
|
100
|
+
assert.ok(!out.includes(f1), 'oldest filler dropped');
|
|
101
|
+
});
|