@semalt-ai/code 1.8.5 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +6 -1
- package/.github/workflows/ci.yml +69 -0
- package/CLAUDE.md +1584 -26
- package/README.md +147 -3
- package/examples/embed.js +74 -0
- package/index.js +251 -10
- package/lib/agent.js +711 -104
- package/lib/api.js +213 -49
- package/lib/args.js +74 -2
- package/lib/audit.js +23 -1
- package/lib/background.js +584 -0
- package/lib/checkpoints.js +757 -0
- package/lib/commands/auth.js +94 -0
- package/lib/commands/chat-session.js +306 -0
- package/lib/commands/chat-slash.js +399 -0
- package/lib/commands/chat-turn.js +446 -0
- package/lib/commands/chat.js +403 -0
- package/lib/commands/custom.js +157 -0
- package/lib/commands/history-utils.js +66 -0
- package/lib/commands/index.js +268 -0
- package/lib/commands/mcp.js +113 -0
- package/lib/commands/oneshot.js +193 -0
- package/lib/commands/registry.js +269 -0
- package/lib/commands/tasks.js +89 -0
- package/lib/compact.js +87 -0
- package/lib/config.js +333 -11
- package/lib/constants.js +372 -3
- package/lib/deny.js +199 -0
- package/lib/doctor.js +160 -0
- package/lib/headless.js +167 -0
- package/lib/hooks.js +286 -0
- package/lib/images.js +264 -0
- package/lib/internals.js +49 -0
- package/lib/mcp/boundary.js +131 -0
- package/lib/mcp/client.js +270 -0
- package/lib/mcp/oauth.js +134 -0
- package/lib/memory.js +209 -0
- package/lib/metrics.js +37 -2
- package/lib/payload.js +54 -0
- package/lib/permission-rules.js +401 -0
- package/lib/permissions.js +100 -10
- package/lib/pricing.js +67 -0
- package/lib/proc.js +62 -0
- package/lib/prompts.js +84 -5
- package/lib/sandbox.js +568 -0
- package/lib/sdk.js +328 -0
- package/lib/secrets.js +211 -0
- package/lib/skills.js +223 -0
- package/lib/subagents.js +516 -0
- package/lib/tool_registry.js +2558 -0
- package/lib/tool_specs.js +222 -2
- package/lib/tools.js +272 -1020
- package/lib/ui/format.js +22 -1
- package/lib/ui/input-field.js +16 -7
- package/lib/ui/status-bar.js +79 -11
- package/lib/ui/theme.js +1 -0
- package/lib/ui/web-activity.js +218 -0
- package/lib/verify.js +229 -0
- package/lib/web-extract.js +213 -0
- package/lib/web-summarize.js +68 -0
- package/package.json +19 -4
- package/scripts/lint.js +57 -0
- package/test/agent-loop.test.js +389 -0
- package/test/background.test.js +414 -0
- package/test/chat.test.js +114 -0
- package/test/checkpoints-agent.test.js +181 -0
- package/test/checkpoints.test.js +650 -0
- package/test/command-registry.test.js +160 -0
- package/test/compact.test.js +116 -0
- package/test/completion-lazy.test.js +52 -0
- package/test/config-merge.test.js +324 -0
- package/test/config-quarantine.test.js +128 -0
- package/test/config-write-guard-allow-anywhere.test.js +56 -0
- package/test/config-write-guard-skip.test.js +46 -0
- package/test/config-write-guard.test.js +153 -0
- package/test/context-split.test.js +215 -0
- package/test/cost-doctor.test.js +142 -0
- package/test/custom-commands-chat.test.js +106 -0
- package/test/custom-commands.test.js +230 -0
- package/test/deny-windows.test.js +120 -0
- package/test/deny.test.js +83 -0
- package/test/download-allow-anywhere.test.js +66 -0
- package/test/download-confine.test.js +153 -0
- package/test/executors.test.js +362 -0
- package/test/extract-tool-calls.test.js +315 -0
- package/test/fetch-url-validation.test.js +219 -0
- package/test/fixtures/tool-calls.js +57 -0
- package/test/fixtures/web-page.js +91 -0
- package/test/git-tools.test.js +384 -0
- package/test/grep-glob-serialize.test.js +242 -0
- package/test/grep-glob.test.js +268 -0
- package/test/harness/README.md +57 -0
- package/test/harness/chat-harness.js +142 -0
- package/test/harness/memwarn-headless-child.js +65 -0
- package/test/harness/mock-llm.js +120 -0
- package/test/harness/mock-mcp-server.js +142 -0
- package/test/harness/sse-server.js +69 -0
- package/test/headless.test.js +203 -0
- package/test/history-utils.test.js +88 -0
- package/test/hooks-agent.test.js +238 -0
- package/test/hooks-verify-sandbox.test.js +232 -0
- package/test/hooks.test.js +216 -0
- package/test/http-get-user-agent.test.js +142 -0
- package/test/images-api.test.js +208 -0
- package/test/images.test.js +238 -0
- package/test/max-iterations.test.js +216 -0
- package/test/mcp-boundary.test.js +57 -0
- package/test/mcp-client.test.js +267 -0
- package/test/mcp-oauth.test.js +86 -0
- package/test/memory-truncation-warning.test.js +222 -0
- package/test/memory.test.js +198 -0
- package/test/native-dispatch.test.js +356 -0
- package/test/output-chokepoint.test.js +188 -0
- package/test/path-guards.test.js +134 -0
- package/test/payload.test.js +99 -0
- package/test/permission-rules-agent.test.js +210 -0
- package/test/permission-rules.test.js +297 -0
- package/test/permissions.test.js +163 -0
- package/test/plan-mode.test.js +167 -0
- package/test/read-paginate.test.js +275 -0
- package/test/readonly-tools.test.js +177 -0
- package/test/result-cap.test.js +233 -0
- package/test/sandbox-agent.test.js +147 -0
- package/test/sandbox-integration.test.js +216 -0
- package/test/sandbox.test.js +408 -0
- package/test/sdk.test.js +234 -0
- package/test/shell-output-cap.test.js +181 -0
- package/test/skills-chat.test.js +110 -0
- package/test/skills.test.js +295 -0
- package/test/smoke.test.js +68 -0
- package/test/status-bar-pause.test.js +164 -0
- package/test/stream-parser.test.js +147 -0
- package/test/subagents-agent.test.js +178 -0
- package/test/subagents.test.js +222 -0
- package/test/tool-registry.test.js +85 -0
- package/test/trim-budget.test.js +101 -0
- package/test/verify-agent.test.js +317 -0
- package/test/verify.test.js +141 -0
- package/test/web-activity-ordering.test.js +194 -0
- package/test/web-activity.test.js +207 -0
- package/test/web-data-extraction-guidance.test.js +71 -0
- package/test/web-extract.test.js +185 -0
- package/test/web-fetch-agent.test.js +291 -0
- package/test/web-fetch-mode.test.js +193 -0
- package/test/web-search.test.js +380 -0
- package/lib/commands.js +0 -1438
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Integration tests for the http_get web-fetch pipeline (Task W.1). A real local
|
|
4
|
+
// HTTP server serves the fixture page; the executor runs the real extraction
|
|
5
|
+
// (Readability + Turndown) and an INJECTED mock summarizer (no real LLM). These
|
|
6
|
+
// prove the end-to-end contract: only the processed result enters context, the
|
|
7
|
+
// raw page never does, summarize on/off, untrusted handling, failure fallback,
|
|
8
|
+
// pass-through, and the token budget.
|
|
9
|
+
|
|
10
|
+
const os = require('node:os');
|
|
11
|
+
const fs = require('node:fs');
|
|
12
|
+
const path = require('node:path');
|
|
13
|
+
|
|
14
|
+
// Redirect home-based paths into a temp dir before any lib loads (audit log etc).
|
|
15
|
+
const TMP_HOME = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-web-home-'));
|
|
16
|
+
const PREV_HOME = process.env.HOME;
|
|
17
|
+
const PREV_USERPROFILE = process.env.USERPROFILE;
|
|
18
|
+
process.env.HOME = TMP_HOME;
|
|
19
|
+
process.env.USERPROFILE = TMP_HOME;
|
|
20
|
+
|
|
21
|
+
const { test, before, after } = require('node:test');
|
|
22
|
+
const assert = require('node:assert');
|
|
23
|
+
const http = require('node:http');
|
|
24
|
+
|
|
25
|
+
const ui = require('../lib/ui');
|
|
26
|
+
const { createPermissionManager } = require('../lib/permissions');
|
|
27
|
+
const { createToolExecutor } = require('../lib/tools');
|
|
28
|
+
const { defaultEstimate } = require('../lib/web-extract');
|
|
29
|
+
const { HTML, INJECTION, INJECTION_MARKER } = require('./fixtures/web-page');
|
|
30
|
+
|
|
31
|
+
let server;
|
|
32
|
+
let baseUrl;
|
|
33
|
+
|
|
34
|
+
// Start a fixture server: GET / → the HTML page; /api.json → JSON; /plain → text.
|
|
35
|
+
before(async () => {
|
|
36
|
+
server = http.createServer((req, res) => {
|
|
37
|
+
if (req.url.startsWith('/api.json')) {
|
|
38
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
39
|
+
res.end(JSON.stringify({ name: 'widget', tags: ['a', 'b'], note: '<b>not html</b>' }));
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
if (req.url.startsWith('/plain')) {
|
|
43
|
+
res.writeHead(200, { 'Content-Type': 'text/plain' });
|
|
44
|
+
res.end('plain line one\nplain line two');
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' });
|
|
48
|
+
res.end(HTML);
|
|
49
|
+
});
|
|
50
|
+
await new Promise((r) => server.listen(0, '127.0.0.1', r));
|
|
51
|
+
baseUrl = `http://127.0.0.1:${server.address().port}`;
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
after(async () => {
|
|
55
|
+
await new Promise((r) => server.close(r));
|
|
56
|
+
if (PREV_HOME === undefined) delete process.env.HOME; else process.env.HOME = PREV_HOME;
|
|
57
|
+
if (PREV_USERPROFILE === undefined) delete process.env.USERPROFILE; else process.env.USERPROFILE = PREV_USERPROFILE;
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
// Build an executor with a configurable web config + a recording mock summarizer.
|
|
61
|
+
function makeExec({ web, webChat } = {}) {
|
|
62
|
+
const pm = createPermissionManager(ui, {});
|
|
63
|
+
const getConfig = () => ({
|
|
64
|
+
max_file_size_kb: 512,
|
|
65
|
+
command_timeout_ms: 30000,
|
|
66
|
+
http_fetch_max_bytes: 262144,
|
|
67
|
+
web: web || { summarize: true, summary_model: '', max_content_tokens: 6000 },
|
|
68
|
+
});
|
|
69
|
+
return createToolExecutor(pm, ui, getConfig, { webChat });
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Invoke http_get the way the agent loop does: trailing { signal } options bag.
|
|
73
|
+
function fetchUrl(exec, url, callOpts = {}) {
|
|
74
|
+
return exec.agentExecFile('http_get', url, callOpts, { signal: null });
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
test('summarize ON: only the summary enters context; the raw page never does', async () => {
|
|
80
|
+
let calls = 0;
|
|
81
|
+
let summarizerSawInjection = false;
|
|
82
|
+
const webChat = async (messages) => {
|
|
83
|
+
calls += 1;
|
|
84
|
+
// The summarizer reads the page only as fenced data.
|
|
85
|
+
summarizerSawInjection = messages[1].content.includes(INJECTION_MARKER);
|
|
86
|
+
return 'SUMMARY: layout-phase handlers run synchronously in registration order; ctx.cancel() aborts.';
|
|
87
|
+
};
|
|
88
|
+
const exec = makeExec({ webChat });
|
|
89
|
+
const r = await fetchUrl(exec, `${baseUrl}/`, { intent: 'how do handlers run?' });
|
|
90
|
+
|
|
91
|
+
assert.strictEqual(calls, 1, 'secondary summarizer was called exactly once');
|
|
92
|
+
assert.strictEqual(r.summarized, true);
|
|
93
|
+
assert.strictEqual(r.kind, 'html');
|
|
94
|
+
assert.match(r.body, /SUMMARY:/);
|
|
95
|
+
// The summarizer received the page (incl. injection) as DATA.
|
|
96
|
+
assert.ok(summarizerSawInjection, 'summarizer received the page content');
|
|
97
|
+
// The RAW page never enters the returned body.
|
|
98
|
+
assert.ok(!/dataLayer|SPONSORED|<script|font-family/.test(r.body), 'no raw page markup in body');
|
|
99
|
+
assert.ok(!/included to give the article enough weight/.test(r.body), 'no extracted full text in body either');
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
test('TOKEN VOLUME: summarized result entering context is an order of magnitude smaller than raw HTML', async () => {
|
|
103
|
+
const webChat = async () => 'Handlers fire in registration order; ctx.cancel() aborts a transition. ~40 words of summary text here to be realistic about a real summary length for a docs page about layout phases and handlers.';
|
|
104
|
+
const exec = makeExec({ webChat });
|
|
105
|
+
const r = await fetchUrl(exec, `${baseUrl}/`);
|
|
106
|
+
const rawTokens = defaultEstimate(HTML);
|
|
107
|
+
const resultTokens = defaultEstimate(r.body);
|
|
108
|
+
assert.ok(
|
|
109
|
+
resultTokens * 10 < rawTokens,
|
|
110
|
+
`expected >=10x reduction, got raw=${rawTokens} result=${resultTokens}`,
|
|
111
|
+
);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
test('summarize OFF (per-call): extracted Markdown enters context, no summarizer call', async () => {
|
|
115
|
+
let calls = 0;
|
|
116
|
+
const webChat = async () => { calls += 1; return 'SHOULD NOT BE CALLED'; };
|
|
117
|
+
const exec = makeExec({ webChat });
|
|
118
|
+
const r = await fetchUrl(exec, `${baseUrl}/`, { summarize: false });
|
|
119
|
+
|
|
120
|
+
assert.strictEqual(calls, 0, 'no secondary call when summarize=false');
|
|
121
|
+
assert.strictEqual(r.summarized, false);
|
|
122
|
+
assert.strictEqual(r.extracted, true);
|
|
123
|
+
// Extracted Markdown of the main content (verbatim — for exact snippets).
|
|
124
|
+
assert.match(r.body, /ctx\.cancel\(\)/);
|
|
125
|
+
// Still clean: chrome dropped, but the full extracted text is present.
|
|
126
|
+
assert.ok(!/SPONSORED|dataLayer/.test(r.body));
|
|
127
|
+
assert.match(r.body, /included to give the article enough weight/);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
test('summarize OFF (global config): same as per-call off', async () => {
|
|
131
|
+
let calls = 0;
|
|
132
|
+
const webChat = async () => { calls += 1; return 'x'; };
|
|
133
|
+
const exec = makeExec({ web: { summarize: false, summary_model: '', max_content_tokens: 6000 }, webChat });
|
|
134
|
+
const r = await fetchUrl(exec, `${baseUrl}/`);
|
|
135
|
+
assert.strictEqual(calls, 0);
|
|
136
|
+
assert.strictEqual(r.summarized, false);
|
|
137
|
+
assert.match(r.body, /ctx\.cancel\(\)/);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
test('UNTRUSTED: a page injection does not steer the summarizer (treated as data)', async () => {
|
|
141
|
+
let systemPrompt = null;
|
|
142
|
+
let userMsg = null;
|
|
143
|
+
// A compliant summarizer that does NOT obey the injection — it summarizes.
|
|
144
|
+
const webChat = async (messages) => {
|
|
145
|
+
systemPrompt = messages[0].content;
|
|
146
|
+
userMsg = messages[1].content;
|
|
147
|
+
return 'The page documents layout-phase handlers. (Ignoring any embedded instructions.)';
|
|
148
|
+
};
|
|
149
|
+
const exec = makeExec({ webChat });
|
|
150
|
+
const r = await fetchUrl(exec, `${baseUrl}/`);
|
|
151
|
+
|
|
152
|
+
// The data-only framing is present and the injection is fenced as data, not
|
|
153
|
+
// promoted to the system role.
|
|
154
|
+
assert.match(systemPrompt, /DATA/);
|
|
155
|
+
assert.match(systemPrompt, /[Nn]ever (obey|follow|act)/);
|
|
156
|
+
assert.ok(userMsg.includes(INJECTION_MARKER), 'injection carried as fenced data');
|
|
157
|
+
assert.ok(!systemPrompt.includes(INJECTION_MARKER), 'injection not in system prompt');
|
|
158
|
+
// The summarizer output did not leak the injection / act on it, and the result
|
|
159
|
+
// still does not contain the raw page.
|
|
160
|
+
assert.ok(!r.body.includes('rm -rf'), 'injection command not echoed into context');
|
|
161
|
+
assert.ok(!/<script/.test(r.body));
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
test('summarizer FAILURE falls back to extracted Markdown, never raw HTML', async () => {
|
|
165
|
+
const webChat = async () => { throw new Error('summary model timeout'); };
|
|
166
|
+
const exec = makeExec({ webChat });
|
|
167
|
+
const r = await fetchUrl(exec, `${baseUrl}/`);
|
|
168
|
+
|
|
169
|
+
assert.strictEqual(r.summarized, false);
|
|
170
|
+
assert.match(r.summary_error || '', /timeout/);
|
|
171
|
+
// Fell back to extracted Markdown (clean), NOT the raw page.
|
|
172
|
+
assert.match(r.body, /ctx\.cancel\(\)/);
|
|
173
|
+
assert.ok(!/dataLayer|SPONSORED|<script|font-family/.test(r.body), 'no raw HTML on fallback');
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
test('no summarizer available (e.g. headless): returns extracted Markdown, never raw HTML', async () => {
|
|
177
|
+
const exec = makeExec({ webChat: undefined });
|
|
178
|
+
const r = await fetchUrl(exec, `${baseUrl}/`);
|
|
179
|
+
assert.strictEqual(r.summarized, false);
|
|
180
|
+
assert.match(r.body, /ctx\.cancel\(\)/);
|
|
181
|
+
assert.ok(!/<script|dataLayer/.test(r.body));
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
test('JSON passes through without mangling and is not summarized', async () => {
|
|
185
|
+
let calls = 0;
|
|
186
|
+
const webChat = async () => { calls += 1; return 'x'; };
|
|
187
|
+
const exec = makeExec({ webChat });
|
|
188
|
+
const r = await fetchUrl(exec, `${baseUrl}/api.json`);
|
|
189
|
+
assert.strictEqual(r.kind, 'json');
|
|
190
|
+
assert.strictEqual(r.summarized, false);
|
|
191
|
+
assert.strictEqual(calls, 0, 'JSON is not sent to the summarizer');
|
|
192
|
+
const parsed = JSON.parse(r.body);
|
|
193
|
+
assert.strictEqual(parsed.name, 'widget');
|
|
194
|
+
assert.strictEqual(parsed.note, '<b>not html</b>');
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
test('plain text passes through without mangling', async () => {
|
|
198
|
+
const exec = makeExec({ webChat: async () => 'x' });
|
|
199
|
+
const r = await fetchUrl(exec, `${baseUrl}/plain`);
|
|
200
|
+
assert.strictEqual(r.kind, 'text');
|
|
201
|
+
assert.strictEqual(r.body, 'plain line one\nplain line two');
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
test('token budget caps oversized extracted content with a notice', async () => {
|
|
205
|
+
// Tiny budget forces truncation of the extracted Markdown; summarize off so we
|
|
206
|
+
// observe the capped content directly.
|
|
207
|
+
const exec = makeExec({ web: { summarize: false, summary_model: '', max_content_tokens: 300 } });
|
|
208
|
+
const r = await fetchUrl(exec, `${baseUrl}/`, { summarize: false });
|
|
209
|
+
assert.strictEqual(r.content_truncated, true);
|
|
210
|
+
assert.match(r.body, /\[\.\.\. truncated/);
|
|
211
|
+
assert.ok(defaultEstimate(r.body) <= 300 + 60, 'capped near the token budget');
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
// ---------------------------------------------------------------------------
|
|
215
|
+
// Mode enum (Task W.1b): raw / extracted / summarized end-to-end
|
|
216
|
+
// ---------------------------------------------------------------------------
|
|
217
|
+
|
|
218
|
+
test('mode="raw" REGRESSION: original HTML reaches context (markup intact), no summarizer call', async () => {
|
|
219
|
+
let calls = 0;
|
|
220
|
+
// A generous budget so the WHOLE fixture is retained — this test isolates
|
|
221
|
+
// "raw returns the original markup" from the token cap (covered just below).
|
|
222
|
+
// The default 6000-token budget now trims the fixture's tail under the
|
|
223
|
+
// markup-aware estimate (Task W.4 Part 2), so SPONSORED (deep in the body)
|
|
224
|
+
// would otherwise be capped away.
|
|
225
|
+
const exec = makeExec({ web: { summarize: true, summary_model: '', max_content_tokens: 100000 }, webChat: async () => { calls += 1; return 'NOPE'; } });
|
|
226
|
+
const r = await fetchUrl(exec, `${baseUrl}/`, { mode: 'raw' });
|
|
227
|
+
assert.strictEqual(calls, 0, 'raw never summarizes');
|
|
228
|
+
assert.strictEqual(r.mode, 'raw');
|
|
229
|
+
assert.strictEqual(r.summarized, false);
|
|
230
|
+
assert.strictEqual(r.extracted, false);
|
|
231
|
+
assert.strictEqual(r.kind, 'html');
|
|
232
|
+
// The things extraction destroys are present — raw HTML access is restored.
|
|
233
|
+
assert.match(r.body, /<script/);
|
|
234
|
+
assert.match(r.body, /<style/);
|
|
235
|
+
assert.match(r.body, /font-family/);
|
|
236
|
+
assert.match(r.body, /dataLayer/);
|
|
237
|
+
assert.match(r.body, /SPONSORED/);
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
test('mode="raw" still applies the token budget (raw HTML is heavier, capped)', async () => {
|
|
241
|
+
const exec = makeExec({ web: { summarize: true, summary_model: '', max_content_tokens: 300 } });
|
|
242
|
+
const r = await fetchUrl(exec, `${baseUrl}/`, { mode: 'raw' });
|
|
243
|
+
assert.strictEqual(r.content_truncated, true);
|
|
244
|
+
assert.match(r.body, /\[\.\.\. truncated/);
|
|
245
|
+
assert.ok(defaultEstimate(r.body) <= 300 + 60, 'capped near the token budget');
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
test('mode="extracted" == legacy summarize=false (Markdown, no summary)', async () => {
|
|
249
|
+
let calls = 0;
|
|
250
|
+
const exec = makeExec({ webChat: async () => { calls += 1; return 'x'; } });
|
|
251
|
+
const a = await fetchUrl(exec, `${baseUrl}/`, { mode: 'extracted' });
|
|
252
|
+
const b = await fetchUrl(exec, `${baseUrl}/`, { summarize: false });
|
|
253
|
+
assert.strictEqual(calls, 0);
|
|
254
|
+
assert.strictEqual(a.summarized, false);
|
|
255
|
+
assert.strictEqual(a.extracted, true);
|
|
256
|
+
assert.strictEqual(b.summarized, false);
|
|
257
|
+
assert.strictEqual(a.body, b.body, 'extracted mode and legacy summarize=false are identical');
|
|
258
|
+
assert.match(a.body, /ctx\.cancel\(\)/);
|
|
259
|
+
assert.ok(!/<script|SPONSORED/.test(a.body));
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
test('mode="summarized" == the default behavior', async () => {
|
|
263
|
+
const webChat = async () => 'SUMMARY: handlers fire in registration order.';
|
|
264
|
+
const exec = makeExec({ webChat });
|
|
265
|
+
const a = await fetchUrl(exec, `${baseUrl}/`, { mode: 'summarized' });
|
|
266
|
+
const b = await fetchUrl(exec, `${baseUrl}/`); // default
|
|
267
|
+
assert.strictEqual(a.summarized, true);
|
|
268
|
+
assert.strictEqual(b.summarized, true);
|
|
269
|
+
assert.match(a.body, /SUMMARY:/);
|
|
270
|
+
assert.strictEqual(a.body, b.body);
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
test('back-compat: legacy raw="true" resolves to extracted (NOT raw HTML)', async () => {
|
|
274
|
+
const exec = makeExec({ webChat: async () => 'x' });
|
|
275
|
+
const r = await fetchUrl(exec, `${baseUrl}/`, { raw: true });
|
|
276
|
+
// The deprecated raw=true alias still means "extracted Markdown", as in W.1.
|
|
277
|
+
assert.notStrictEqual(r.mode, 'raw');
|
|
278
|
+
assert.strictEqual(r.summarized, false);
|
|
279
|
+
assert.strictEqual(r.extracted, true);
|
|
280
|
+
assert.match(r.body, /ctx\.cancel\(\)/);
|
|
281
|
+
assert.ok(!/<script|font-family/.test(r.body), 'raw=true alias does NOT leak HTML');
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
test('precedence: explicit mode="raw" beats a legacy summarize=true', async () => {
|
|
285
|
+
let calls = 0;
|
|
286
|
+
const exec = makeExec({ webChat: async () => { calls += 1; return 'x'; } });
|
|
287
|
+
const r = await fetchUrl(exec, `${baseUrl}/`, { mode: 'raw', summarize: true });
|
|
288
|
+
assert.strictEqual(calls, 0);
|
|
289
|
+
assert.strictEqual(r.mode, 'raw');
|
|
290
|
+
assert.match(r.body, /<script/);
|
|
291
|
+
});
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Unit tests for the three-level web-fetch `mode` enum (Task W.1b):
|
|
4
|
+
// summarized | extracted | raw. Cover the opts alias-resolution precedence (XML
|
|
5
|
+
// + native), the raw short-circuit in processWebContent (the regression fix —
|
|
6
|
+
// extraction-bypassing original HTML), and the model-facing spec content.
|
|
7
|
+
// Network-free: drives the parser/pipeline helpers directly over the fixture.
|
|
8
|
+
|
|
9
|
+
const { test } = require('node:test');
|
|
10
|
+
const assert = require('node:assert');
|
|
11
|
+
|
|
12
|
+
const {
|
|
13
|
+
_httpGetOpts,
|
|
14
|
+
_httpGetOptsFromParams,
|
|
15
|
+
processWebContent,
|
|
16
|
+
WEB_FETCH_MODES,
|
|
17
|
+
} = require('../lib/tool_registry');
|
|
18
|
+
const { TOOL_SPECS } = require('../lib/tool_specs');
|
|
19
|
+
const { defaultEstimate } = require('../lib/web-extract');
|
|
20
|
+
const { HTML } = require('./fixtures/web-page');
|
|
21
|
+
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Opts resolution — XML (parseXml) path
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
test('XML opts: explicit mode resolves to the enum value', () => {
|
|
27
|
+
assert.deepStrictEqual(_httpGetOpts('url="http://x" mode="raw"').mode, 'raw');
|
|
28
|
+
assert.deepStrictEqual(_httpGetOpts('url="http://x" mode="extracted"').mode, 'extracted');
|
|
29
|
+
assert.deepStrictEqual(_httpGetOpts('url="http://x" mode="summarized"').mode, 'summarized');
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
test('XML opts: an unknown mode is ignored (falls back to config default later)', () => {
|
|
33
|
+
assert.strictEqual(_httpGetOpts('url="http://x" mode="bogus"').mode, undefined);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test('XML opts: legacy booleans map to modes (back-compat)', () => {
|
|
37
|
+
assert.strictEqual(_httpGetOpts('url="http://x" summarize="false"').mode, 'extracted');
|
|
38
|
+
assert.strictEqual(_httpGetOpts('url="http://x" summarize="true"').mode, 'summarized');
|
|
39
|
+
assert.strictEqual(_httpGetOpts('url="http://x" raw="true"').mode, 'extracted');
|
|
40
|
+
assert.strictEqual(_httpGetOpts('url="http://x" raw="false"').mode, 'summarized');
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
test('XML opts: explicit mode WINS over a legacy boolean if both are given', () => {
|
|
44
|
+
// mode=raw beats summarize=true.
|
|
45
|
+
assert.strictEqual(_httpGetOpts('url="http://x" mode="raw" summarize="true"').mode, 'raw');
|
|
46
|
+
// mode=summarized beats raw=true.
|
|
47
|
+
assert.strictEqual(_httpGetOpts('url="http://x" raw="true" mode="summarized"').mode, 'summarized');
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// Opts resolution — native (fromParams) path
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
test('native opts: explicit mode resolves; unknown ignored', () => {
|
|
55
|
+
assert.strictEqual(_httpGetOptsFromParams({ url: 'x', mode: 'raw' }).mode, 'raw');
|
|
56
|
+
assert.strictEqual(_httpGetOptsFromParams({ url: 'x', mode: 'EXTRACTED' }).mode, 'extracted');
|
|
57
|
+
assert.strictEqual(_httpGetOptsFromParams({ url: 'x', mode: 'nope' }).mode, undefined);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
test('native opts: legacy booleans map to modes (back-compat)', () => {
|
|
61
|
+
assert.strictEqual(_httpGetOptsFromParams({ url: 'x', summarize: false }).mode, 'extracted');
|
|
62
|
+
assert.strictEqual(_httpGetOptsFromParams({ url: 'x', summarize: true }).mode, 'summarized');
|
|
63
|
+
assert.strictEqual(_httpGetOptsFromParams({ url: 'x', raw: true }).mode, 'extracted');
|
|
64
|
+
assert.strictEqual(_httpGetOptsFromParams({ url: 'x', raw: false }).mode, 'summarized');
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
test('native opts: explicit mode WINS over a legacy boolean', () => {
|
|
68
|
+
assert.strictEqual(_httpGetOptsFromParams({ url: 'x', mode: 'raw', summarize: true }).mode, 'raw');
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
// processWebContent — RAW mode is the regression fix (original HTML reachable)
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
test('RAW REGRESSION: mode="raw" returns the ORIGINAL HTML, markup intact', async () => {
|
|
76
|
+
const r = await processWebContent({
|
|
77
|
+
rawBody: HTML, contentType: 'text/html', url: 'http://x/docs', statusCode: 200,
|
|
78
|
+
totalBytes: HTML.length, transferCapped: false,
|
|
79
|
+
mode: 'raw', maxContentTokens: 100000, webChat: undefined,
|
|
80
|
+
});
|
|
81
|
+
assert.strictEqual(r.mode, 'raw');
|
|
82
|
+
assert.strictEqual(r.extracted, false);
|
|
83
|
+
assert.strictEqual(r.summarized, false);
|
|
84
|
+
assert.strictEqual(r.kind, 'html');
|
|
85
|
+
// The exact things extraction DESTROYS are present in raw mode:
|
|
86
|
+
assert.match(r.body, /<script/, 'script tags preserved');
|
|
87
|
+
assert.match(r.body, /<style/, 'style block preserved');
|
|
88
|
+
assert.match(r.body, /font-family/, 'CSS preserved');
|
|
89
|
+
assert.match(r.body, /dataLayer/, 'inline script body preserved');
|
|
90
|
+
assert.match(r.body, /class="ad-sidebar"/, 'chrome + attributes preserved');
|
|
91
|
+
assert.match(r.body, /SPONSORED/, 'ad copy preserved (would be dropped by extraction)');
|
|
92
|
+
assert.match(r.body, /<!doctype html>/i, 'doctype preserved');
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test('RAW: still token-capped (context protection holds) with a notice', async () => {
|
|
96
|
+
const r = await processWebContent({
|
|
97
|
+
rawBody: HTML, contentType: 'text/html', url: 'http://x/docs', statusCode: 200,
|
|
98
|
+
totalBytes: HTML.length, transferCapped: false,
|
|
99
|
+
mode: 'raw', maxContentTokens: 200, webChat: undefined,
|
|
100
|
+
});
|
|
101
|
+
assert.strictEqual(r.content_truncated, true);
|
|
102
|
+
assert.match(r.body, /\[\.\.\. truncated/);
|
|
103
|
+
assert.ok(defaultEstimate(r.body) <= 200 + 60, 'capped near the token budget');
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
test('RAW markup is capped by the DENSER markup estimate (Task W.4 Part 2)', async () => {
|
|
107
|
+
const { markupEstimate, MARKUP_CHARS_PER_TOKEN } = require('../lib/web-extract');
|
|
108
|
+
// A CSS-dense page: the markup estimate counts these tokens more densely than
|
|
109
|
+
// char/4, so a given token budget keeps fewer chars than the prose path would.
|
|
110
|
+
const css = '<style>' + '.x{color:#ffffff;background:#000000;margin:0;padding:0}\n'.repeat(2000) + '</style>';
|
|
111
|
+
const html = '<!doctype html><html><head>' + css + '</head><body>hi</body></html>';
|
|
112
|
+
const budget = 500;
|
|
113
|
+
const r = await processWebContent({
|
|
114
|
+
rawBody: html, contentType: 'text/html', url: 'http://x', statusCode: 200,
|
|
115
|
+
totalBytes: html.length, transferCapped: false,
|
|
116
|
+
mode: 'raw', maxContentTokens: budget, webChat: undefined,
|
|
117
|
+
});
|
|
118
|
+
assert.strictEqual(r.kind, 'html');
|
|
119
|
+
assert.strictEqual(r.content_truncated, true);
|
|
120
|
+
// content_tokens is the MARKUP estimate of the full body (denser than char/4).
|
|
121
|
+
assert.strictEqual(r.content_tokens, markupEstimate(html));
|
|
122
|
+
assert.ok(markupEstimate(html) > defaultEstimate(html), 'markup estimate is denser');
|
|
123
|
+
// Kept chars reflect the markup divisor (~budget*2.5), NOT char/4 (budget*4) —
|
|
124
|
+
// i.e. raw markup is trimmed more aggressively for the same token budget.
|
|
125
|
+
const keptChars = r.body.split('\n\n[... truncated')[0].length;
|
|
126
|
+
assert.strictEqual(keptChars, Math.floor(budget * MARKUP_CHARS_PER_TOKEN));
|
|
127
|
+
assert.ok(keptChars < budget * 4, 'fewer chars than the prose char/4 budget would keep');
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
test('RAW: never calls the summarizer', async () => {
|
|
131
|
+
let calls = 0;
|
|
132
|
+
const r = await processWebContent({
|
|
133
|
+
rawBody: HTML, contentType: 'text/html', url: 'http://x', statusCode: 200,
|
|
134
|
+
totalBytes: HTML.length, transferCapped: false,
|
|
135
|
+
mode: 'raw', maxContentTokens: 100000, webChat: async () => { calls += 1; return 'x'; },
|
|
136
|
+
});
|
|
137
|
+
assert.strictEqual(calls, 0);
|
|
138
|
+
assert.strictEqual(r.summarized, false);
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
// ---------------------------------------------------------------------------
|
|
142
|
+
// processWebContent — extracted vs summarized still behave as W.1
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
|
|
145
|
+
test('extracted: Markdown of main content, chrome dropped, no summary', async () => {
|
|
146
|
+
let calls = 0;
|
|
147
|
+
const r = await processWebContent({
|
|
148
|
+
rawBody: HTML, contentType: 'text/html', url: 'http://x', statusCode: 200,
|
|
149
|
+
totalBytes: HTML.length, transferCapped: false,
|
|
150
|
+
mode: 'extracted', maxContentTokens: 6000, webChat: async () => { calls += 1; return 'x'; },
|
|
151
|
+
});
|
|
152
|
+
assert.strictEqual(calls, 0, 'extracted never summarizes');
|
|
153
|
+
assert.strictEqual(r.summarized, false);
|
|
154
|
+
assert.strictEqual(r.extracted, true);
|
|
155
|
+
assert.match(r.body, /ctx\.cancel\(\)/, 'main content kept');
|
|
156
|
+
assert.ok(!/<script|dataLayer|SPONSORED/.test(r.body), 'chrome/markup dropped by extraction');
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
test('summarized: secondary summary is the body; extraction ran', async () => {
|
|
160
|
+
const r = await processWebContent({
|
|
161
|
+
rawBody: HTML, contentType: 'text/html', url: 'http://x', statusCode: 200,
|
|
162
|
+
totalBytes: HTML.length, transferCapped: false,
|
|
163
|
+
mode: 'summarized', maxContentTokens: 6000, webChat: async () => 'SUMMARY: phases run in order.',
|
|
164
|
+
});
|
|
165
|
+
assert.strictEqual(r.summarized, true);
|
|
166
|
+
assert.match(r.body, /SUMMARY:/);
|
|
167
|
+
assert.ok(!/<script|ctx\.cancel/.test(r.body), 'neither raw markup nor full extracted text');
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
// ---------------------------------------------------------------------------
|
|
171
|
+
// The model-facing spec lists the three modes with guidance
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
test('spec: http_get exposes the mode enum and describes all three modes', () => {
|
|
175
|
+
const spec = TOOL_SPECS.http_get;
|
|
176
|
+
const modeProp = spec.parameters.properties.mode;
|
|
177
|
+
assert.ok(modeProp, 'mode parameter is present');
|
|
178
|
+
assert.deepStrictEqual(modeProp.enum, ['summarized', 'extracted', 'raw']);
|
|
179
|
+
// Guidance for each mode is described to the model.
|
|
180
|
+
const blob = (spec.description + ' ' + modeProp.description).toLowerCase();
|
|
181
|
+
assert.match(blob, /summarized/);
|
|
182
|
+
assert.match(blob, /extracted/);
|
|
183
|
+
assert.match(blob, /raw/);
|
|
184
|
+
// The raw use-case (analyze markup/structure) is called out.
|
|
185
|
+
assert.match(blob, /html|css|markup|structure/);
|
|
186
|
+
// Legacy booleans documented as deprecated aliases.
|
|
187
|
+
assert.match(spec.parameters.properties.summarize.description.toLowerCase(), /deprecat/);
|
|
188
|
+
assert.match(spec.parameters.properties.raw.description.toLowerCase(), /deprecat/);
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
test('WEB_FETCH_MODES is the canonical enum', () => {
|
|
192
|
+
assert.deepStrictEqual(WEB_FETCH_MODES, ['summarized', 'extracted', 'raw']);
|
|
193
|
+
});
|