@semalt-ai/code 1.8.5 → 1.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -1
- package/.github/workflows/ci.yml +69 -0
- package/ARCHITECTURE.md +6 -95
- package/CLAUDE.md +196 -316
- package/README.md +148 -4
- package/docs/ARCHITECTURE.md +1321 -0
- package/docs/CONFIG.md +340 -0
- package/docs/HISTORY.md +245 -0
- package/examples/embed.js +74 -0
- package/index.js +251 -10
- package/lib/agent.js +856 -120
- package/lib/api.js +239 -50
- package/lib/args.js +74 -2
- package/lib/audit.js +23 -1
- package/lib/background.js +584 -0
- package/lib/checkpoints.js +757 -0
- package/lib/commands/auth.js +94 -0
- package/lib/commands/chat-session.js +489 -0
- package/lib/commands/chat-slash.js +415 -0
- package/lib/commands/chat-turn.js +669 -0
- package/lib/commands/chat.js +407 -0
- package/lib/commands/custom.js +157 -0
- package/lib/commands/history-utils.js +66 -0
- package/lib/commands/index.js +268 -0
- package/lib/commands/mcp.js +113 -0
- package/lib/commands/oneshot.js +193 -0
- package/lib/commands/registry.js +269 -0
- package/lib/commands/tasks.js +89 -0
- package/lib/compact.js +87 -0
- package/lib/config.js +360 -11
- package/lib/constants.js +401 -3
- package/lib/deny.js +199 -0
- package/lib/doctor.js +160 -0
- package/lib/headless.js +202 -0
- package/lib/hooks.js +286 -0
- package/lib/images.js +270 -0
- package/lib/internals.js +49 -0
- package/lib/mcp/boundary.js +131 -0
- package/lib/mcp/client.js +270 -0
- package/lib/mcp/oauth.js +134 -0
- package/lib/memory.js +209 -0
- package/lib/metrics.js +37 -2
- package/lib/payload.js +54 -0
- package/lib/permission-rules.js +401 -0
- package/lib/permissions.js +123 -26
- package/lib/pricing.js +67 -0
- package/lib/proc.js +62 -0
- package/lib/prompts.js +99 -8
- package/lib/sandbox.js +568 -0
- package/lib/sdk.js +328 -0
- package/lib/secrets.js +211 -0
- package/lib/skills.js +223 -0
- package/lib/subagents.js +516 -0
- package/lib/tool_registry.js +2862 -0
- package/lib/tool_specs.js +263 -9
- package/lib/tools.js +352 -1039
- package/lib/ui/anim.js +86 -0
- package/lib/ui/ansi.js +17 -27
- package/lib/ui/chat-history.js +253 -71
- package/lib/ui/create-ui.js +67 -24
- package/lib/ui/diff.js +90 -25
- package/lib/ui/file-activity.js +236 -0
- package/lib/ui/format.js +195 -29
- package/lib/ui/input-field.js +21 -11
- package/lib/ui/md-stream.js +234 -0
- package/lib/ui/render-operation.js +113 -0
- package/lib/ui/select.js +1 -4
- package/lib/ui/status-bar.js +146 -36
- package/lib/ui/stream.js +20 -13
- package/lib/ui/theme.js +190 -44
- package/lib/ui/tool-operation.js +190 -0
- package/lib/ui/utils.js +9 -5
- package/lib/ui/web-activity.js +270 -0
- package/lib/ui/writer.js +159 -45
- package/lib/ui.js +1 -1
- package/lib/verify.js +229 -0
- package/lib/web-extract.js +213 -0
- package/lib/web-summarize.js +68 -0
- package/package.json +19 -4
- package/scripts/lint.js +57 -0
- package/test/agent-loop.test.js +389 -0
- package/test/anim-driver.test.js +153 -0
- package/test/ask-user-display.test.js +226 -0
- package/test/ask-user-gate.test.js +231 -0
- package/test/background.test.js +414 -0
- package/test/chat-history-nocolor.test.js +155 -0
- package/test/chat-relogin.test.js +207 -0
- package/test/chat.test.js +114 -0
- package/test/checkpoints-agent.test.js +181 -0
- package/test/checkpoints.test.js +650 -0
- package/test/command-registry.test.js +160 -0
- package/test/compact.test.js +116 -0
- package/test/completion-lazy.test.js +52 -0
- package/test/config-merge.test.js +324 -0
- package/test/config-quarantine.test.js +128 -0
- package/test/config-write-guard-allow-anywhere.test.js +56 -0
- package/test/config-write-guard-skip.test.js +46 -0
- package/test/config-write-guard.test.js +153 -0
- package/test/context-split.test.js +215 -0
- package/test/cost-doctor.test.js +142 -0
- package/test/custom-commands-chat.test.js +106 -0
- package/test/custom-commands.test.js +230 -0
- package/test/defer-detail-band.test.js +403 -0
- package/test/deny-windows.test.js +120 -0
- package/test/deny.test.js +83 -0
- package/test/detail-band-tab-flatten.test.js +242 -0
- package/test/download-allow-anywhere.test.js +66 -0
- package/test/download-confine.test.js +153 -0
- package/test/exec-diff.test.js +268 -0
- package/test/executors.test.js +599 -0
- package/test/extract-tool-calls.test.js +349 -0
- package/test/fetch-url-validation.test.js +219 -0
- package/test/file-activity.test.js +522 -0
- package/test/fixtures/tool-calls.js +57 -0
- package/test/fixtures/web-page.js +91 -0
- package/test/git-tools.test.js +384 -0
- package/test/grep-glob-serialize.test.js +242 -0
- package/test/grep-glob.test.js +268 -0
- package/test/grep-path-target.test.js +227 -0
- package/test/harness/README.md +57 -0
- package/test/harness/chat-harness.js +143 -0
- package/test/harness/memwarn-headless-child.js +65 -0
- package/test/harness/mock-llm.js +120 -0
- package/test/harness/mock-mcp-server.js +142 -0
- package/test/harness/sse-server.js +69 -0
- package/test/headless.test.js +348 -0
- package/test/history-utils.test.js +88 -0
- package/test/hooks-agent.test.js +238 -0
- package/test/hooks-verify-sandbox.test.js +232 -0
- package/test/hooks.test.js +216 -0
- package/test/http-get-user-agent.test.js +142 -0
- package/test/images-api.test.js +208 -0
- package/test/images.test.js +238 -0
- package/test/input-field-ctrl-o.test.js +37 -0
- package/test/live-height-physical.test.js +281 -0
- package/test/max-iterations.test.js +218 -0
- package/test/mcp-boundary.test.js +57 -0
- package/test/mcp-client.test.js +267 -0
- package/test/mcp-oauth.test.js +86 -0
- package/test/md-stream.test.js +183 -0
- package/test/memory-truncation-warning.test.js +222 -0
- package/test/memory.test.js +198 -0
- package/test/native-dispatch.test.js +409 -0
- package/test/native-live-narration.test.js +254 -0
- package/test/output-chokepoint.test.js +188 -0
- package/test/output-heredoc-leak.test.js +195 -0
- package/test/output-preview.test.js +245 -0
- package/test/path-guards.test.js +134 -0
- package/test/payload.test.js +99 -0
- package/test/permission-rules-agent.test.js +210 -0
- package/test/permission-rules.test.js +297 -0
- package/test/permissions.test.js +362 -0
- package/test/plan-mode.test.js +167 -0
- package/test/read-paginate.test.js +275 -0
- package/test/readonly-tools.test.js +177 -0
- package/test/render-operation.test.js +317 -0
- package/test/replay-descriptor-xml.test.js +216 -0
- package/test/replay-descriptor.test.js +189 -0
- package/test/replay-web-aggregate.test.js +291 -0
- package/test/replay-web-persist.test.js +241 -0
- package/test/result-cap.test.js +233 -0
- package/test/running-glyph-anim.test.js +111 -0
- package/test/sandbox-agent.test.js +147 -0
- package/test/sandbox-integration.test.js +216 -0
- package/test/sandbox.test.js +408 -0
- package/test/sdk.test.js +234 -0
- package/test/shell-output-cap.test.js +181 -0
- package/test/skills-chat.test.js +110 -0
- package/test/skills.test.js +295 -0
- package/test/smoke.test.js +68 -0
- package/test/status-bar-driver.test.js +93 -0
- package/test/status-bar-pause.test.js +164 -0
- package/test/status-bar-resync.test.js +188 -0
- package/test/stream-parser.test.js +171 -0
- package/test/subagents-agent.test.js +178 -0
- package/test/subagents.test.js +222 -0
- package/test/theme-palette.test.js +166 -0
- package/test/tool-registry.test.js +85 -0
- package/test/trim-budget.test.js +101 -0
- package/test/truncate-visible.test.js +78 -0
- package/test/verify-agent.test.js +317 -0
- package/test/verify.test.js +141 -0
- package/test/view-image.test.js +199 -0
- package/test/web-activity-ordering.test.js +203 -0
- package/test/web-activity.test.js +207 -0
- package/test/web-data-extraction-guidance.test.js +71 -0
- package/test/web-extract.test.js +185 -0
- package/test/web-fetch-agent.test.js +291 -0
- package/test/web-fetch-mode.test.js +193 -0
- package/test/web-search.test.js +380 -0
- package/lib/commands.js +0 -1438
- package/path +0 -1
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Task W.4 Part 1 — guidance: teach fetch+grep for data-extraction tasks.
|
|
4
|
+
//
|
|
5
|
+
// "Extract specific values from a page" (colors/hex, versions, IDs, URLs,
|
|
6
|
+
// counts) is a different task class from "read a page": the right pattern is
|
|
7
|
+
// targeted matching (download→grep / curl|grep) so only the matches enter
|
|
8
|
+
// context — NOT loading page content via any http_get mode. These tests pin
|
|
9
|
+
// that the guidance text is present (the spec drives the model) and that the
|
|
10
|
+
// stale prompts.js http_get description ("byte cap body") is corrected.
|
|
11
|
+
//
|
|
12
|
+
// NOTE: Part 1 is a guidance change; its real effect is behavioral (a live
|
|
13
|
+
// re-run of the "what colors" task), not a unit assertion — these tests only
|
|
14
|
+
// pin that the guidance exists and is discoverable to the model.
|
|
15
|
+
|
|
16
|
+
const { test } = require('node:test');
|
|
17
|
+
const assert = require('node:assert');
|
|
18
|
+
|
|
19
|
+
const { TOOL_SPECS } = require('../lib/tool_specs');
|
|
20
|
+
const { getSystemPrompt } = require('../lib/prompts');
|
|
21
|
+
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// The http_get spec guides the agent to fetch+grep for extracting values
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
test('http_get spec guides fetch+grep for extracting specific values', () => {
|
|
27
|
+
const blob = TOOL_SPECS.http_get.description.toLowerCase();
|
|
28
|
+
// It points at download/grep (or curl|grep) as the extraction pattern.
|
|
29
|
+
assert.match(blob, /grep/, 'mentions grep as the extraction tool');
|
|
30
|
+
assert.match(blob, /download|curl/, 'points at download/curl to disk first');
|
|
31
|
+
// It frames the task class — extracting specific values.
|
|
32
|
+
assert.match(blob, /extract|specific value|color|version|id/, 'names the extract-values task');
|
|
33
|
+
// It warns that raw is expensive for simple value extraction.
|
|
34
|
+
assert.match(blob, /raw/, 'still describes raw');
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
test('http_get spec notes the SPA / linked-asset case', () => {
|
|
38
|
+
const blob = TOOL_SPECS.http_get.description.toLowerCase();
|
|
39
|
+
// Values may live in linked assets, not the top-level HTML.
|
|
40
|
+
assert.match(blob, /asset|_nuxt|stylesheet|bundle|\.css|\.js/, 'mentions linked assets / SPA case');
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
// The system prompt carries the same guidance (covers the XML tag rail)
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
test('system prompt teaches fetch+grep for value extraction (XML + native)', () => {
|
|
48
|
+
for (const native of [false, true]) {
|
|
49
|
+
const prompt = getSystemPrompt(native, '', '').toLowerCase();
|
|
50
|
+
assert.match(prompt, /grep/, `native=${native}: mentions grep`);
|
|
51
|
+
assert.match(prompt, /download|curl/, `native=${native}: mentions download/curl`);
|
|
52
|
+
assert.match(prompt, /extract/, `native=${native}: names the extract task`);
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
// The stale prompts.js http_get description is corrected
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
test('http_get tag purpose no longer claims a raw byte-cap response body', () => {
|
|
61
|
+
const { TOOL_TAG_SPECS } = require('../lib/prompts');
|
|
62
|
+
const purpose = (TOOL_TAG_SPECS.http_get.purpose || '').toLowerCase();
|
|
63
|
+
// The old text described http_get as "returns the response body (truncated to
|
|
64
|
+
// a byte cap …)" — pre-W.1. It now runs the extract/summarize pipeline.
|
|
65
|
+
assert.ok(
|
|
66
|
+
!/response body \(truncated to a byte cap/.test(purpose),
|
|
67
|
+
'stale byte-cap-body description must be gone',
|
|
68
|
+
);
|
|
69
|
+
// The accurate description references the pipeline / modes / token cap.
|
|
70
|
+
assert.match(purpose, /summari|extract|mode|token|pipeline/, 'describes the W.1 pipeline');
|
|
71
|
+
});
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Unit tests for the web-fetch extraction + summary-request stages (Task W.1).
|
|
4
|
+
// Network-free: they run Readability + Turndown over a fixture HTML page and the
|
|
5
|
+
// pure summary-message builder. The pipeline integration (http_get end-to-end
|
|
6
|
+
// with a mock fetch + mock summarizer) lives in test/web-fetch-agent.test.js.
|
|
7
|
+
|
|
8
|
+
const { test } = require('node:test');
|
|
9
|
+
const assert = require('node:assert');
|
|
10
|
+
|
|
11
|
+
const {
|
|
12
|
+
classifyContentType,
|
|
13
|
+
extractContent,
|
|
14
|
+
capToTokens,
|
|
15
|
+
defaultEstimate,
|
|
16
|
+
} = require('../lib/web-extract');
|
|
17
|
+
const { buildSummaryMessages, summarizeWebContent, FENCE_OPEN } = require('../lib/web-summarize');
|
|
18
|
+
const { HTML, INJECTION } = require('./fixtures/web-page');
|
|
19
|
+
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
// Content-type classification
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
test('classifyContentType: html / json / text / markdown by content-type', () => {
|
|
25
|
+
assert.strictEqual(classifyContentType('text/html; charset=utf-8', 'http://x', ''), 'html');
|
|
26
|
+
assert.strictEqual(classifyContentType('application/json', 'http://x/api', '{}'), 'json');
|
|
27
|
+
assert.strictEqual(classifyContentType('application/vnd.api+json', 'http://x', '{}'), 'json');
|
|
28
|
+
assert.strictEqual(classifyContentType('text/plain', 'http://x/readme.txt', 'hi'), 'text');
|
|
29
|
+
assert.strictEqual(classifyContentType('text/markdown', 'http://x/r.md', '# hi'), 'markdown');
|
|
30
|
+
// .md served as text/plain is still markdown
|
|
31
|
+
assert.strictEqual(classifyContentType('text/plain', 'http://x/README.md', '# hi'), 'markdown');
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
test('classifyContentType: sniffs HTML when content-type is absent/generic', () => {
|
|
35
|
+
assert.strictEqual(classifyContentType('', 'http://x', '<!doctype html><html><body>hi</body></html>'), 'html');
|
|
36
|
+
assert.strictEqual(classifyContentType('application/octet-stream', 'http://x', '<div>x</div>'), 'html');
|
|
37
|
+
assert.strictEqual(classifyContentType('', 'http://x', 'just some plain text, no tags here'), 'text');
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
// Extraction: HTML → clean Markdown of the MAIN content
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
test('extractContent: HTML yields clean Markdown of the article; nav/scripts/ads gone', () => {
|
|
45
|
+
const { kind, markdown, extracted, title } = extractContent({ body: HTML, contentType: 'text/html', url: 'http://x/docs' });
|
|
46
|
+
assert.strictEqual(kind, 'html');
|
|
47
|
+
assert.strictEqual(extracted, true);
|
|
48
|
+
|
|
49
|
+
// Title is recovered as metadata.
|
|
50
|
+
assert.match(title || '', /HyperWidget Layout Phases/);
|
|
51
|
+
// Main content survives.
|
|
52
|
+
assert.match(markdown, /layout phases/i);
|
|
53
|
+
assert.match(markdown, /ctx\.cancel\(\)/);
|
|
54
|
+
|
|
55
|
+
// Chrome is gone.
|
|
56
|
+
assert.ok(!/SPONSORED/.test(markdown), 'ad copy must be dropped');
|
|
57
|
+
assert.ok(!/Accept all/.test(markdown), 'cookie banner must be dropped');
|
|
58
|
+
assert.ok(!/All rights reserved/.test(markdown), 'footer must be dropped');
|
|
59
|
+
assert.ok(!/Sign up/.test(markdown), 'nav must be dropped');
|
|
60
|
+
|
|
61
|
+
// No executable markup leaks through (script bodies, style).
|
|
62
|
+
assert.ok(!/dataLayer/.test(markdown), 'inline script body must be dropped');
|
|
63
|
+
assert.ok(!/footer analytics beacon/.test(markdown), 'trailing script must be dropped');
|
|
64
|
+
assert.ok(!/<script/i.test(markdown) && !/font-family/.test(markdown), 'no script/style markup');
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
test('TOKEN VOLUME: extraction alone yields a large reduction vs raw HTML (chrome dropped)', () => {
|
|
68
|
+
const { markdown } = extractContent({ body: HTML, contentType: 'text/html', url: 'http://x/docs' });
|
|
69
|
+
const rawTokens = defaultEstimate(HTML);
|
|
70
|
+
const extractedTokens = defaultEstimate(markdown);
|
|
71
|
+
// Extraction alone (before summarization) already cuts the page substantially
|
|
72
|
+
// by dropping scripts/nav/ads/JSON-LD/CSS. The ORDER-OF-MAGNITUDE reduction —
|
|
73
|
+
// "the result entering context vs raw HTML" — is asserted on the full
|
|
74
|
+
// summarized pipeline in test/web-fetch-agent.test.js.
|
|
75
|
+
assert.ok(extractedTokens > 0, 'extraction produced content');
|
|
76
|
+
assert.ok(
|
|
77
|
+
extractedTokens * 3 < rawTokens,
|
|
78
|
+
`expected >=3x token reduction from extraction, got raw=${rawTokens} extracted=${extractedTokens}`,
|
|
79
|
+
);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
test('extractContent: JSON passes through verbatim (no mangling)', () => {
|
|
83
|
+
const json = JSON.stringify({ a: 1, b: [2, 3], c: { d: '<not html>' } });
|
|
84
|
+
const { kind, markdown, extracted } = extractContent({ body: json, contentType: 'application/json', url: 'http://x/api' });
|
|
85
|
+
assert.strictEqual(kind, 'json');
|
|
86
|
+
assert.strictEqual(extracted, false);
|
|
87
|
+
assert.strictEqual(markdown, json);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
test('extractContent: plain text passes through verbatim', () => {
|
|
91
|
+
const txt = 'line one\nline two\n indented three';
|
|
92
|
+
const { kind, markdown } = extractContent({ body: txt, contentType: 'text/plain', url: 'http://x/f.txt' });
|
|
93
|
+
assert.strictEqual(kind, 'text');
|
|
94
|
+
assert.strictEqual(markdown, txt);
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
// ---------------------------------------------------------------------------
|
|
98
|
+
// Token budget
|
|
99
|
+
// ---------------------------------------------------------------------------
|
|
100
|
+
|
|
101
|
+
test('capToTokens: under budget is unchanged; over budget is truncated with a notice', () => {
|
|
102
|
+
const small = 'hello world';
|
|
103
|
+
const r1 = capToTokens(small, 6000, defaultEstimate);
|
|
104
|
+
assert.strictEqual(r1.truncated, false);
|
|
105
|
+
assert.strictEqual(r1.text, small);
|
|
106
|
+
|
|
107
|
+
const big = 'x'.repeat(40000); // ~10k tokens at char/4
|
|
108
|
+
const r2 = capToTokens(big, 1000, defaultEstimate);
|
|
109
|
+
assert.strictEqual(r2.truncated, true);
|
|
110
|
+
assert.match(r2.text, /\[\.\.\. truncated/);
|
|
111
|
+
// Capped to ~ the budget in chars (+ the notice), far below the original.
|
|
112
|
+
assert.ok(r2.text.length < big.length / 5);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
// ---------------------------------------------------------------------------
|
|
116
|
+
// Markup-aware token estimate (Task W.4 Part 2)
|
|
117
|
+
// ---------------------------------------------------------------------------
|
|
118
|
+
|
|
119
|
+
test('markupEstimate: denser than the prose char/4 estimate', () => {
|
|
120
|
+
const { markupEstimate, MARKUP_CHARS_PER_TOKEN, DEFAULT_CHARS_PER_TOKEN } = require('../lib/web-extract');
|
|
121
|
+
assert.ok(MARKUP_CHARS_PER_TOKEN < DEFAULT_CHARS_PER_TOKEN, 'markup divisor is smaller');
|
|
122
|
+
const css = '.x{color:#ffffff;margin:0}'.repeat(1000);
|
|
123
|
+
assert.ok(markupEstimate(css) > defaultEstimate(css), 'markup estimates MORE tokens for the same chars');
|
|
124
|
+
// Exactly the divisor relationship.
|
|
125
|
+
assert.strictEqual(markupEstimate('x'.repeat(2500)), Math.ceil(2500 / MARKUP_CHARS_PER_TOKEN));
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
test('capToTokens: markup charsPerToken trims more aggressively than prose for the SAME budget', () => {
|
|
129
|
+
const { markupEstimate, MARKUP_CHARS_PER_TOKEN } = require('../lib/web-extract');
|
|
130
|
+
const big = '.x{color:#fff;background:#000}'.repeat(5000); // dense markup, well over budget
|
|
131
|
+
const budget = 1000;
|
|
132
|
+
|
|
133
|
+
const prose = capToTokens(big, budget, defaultEstimate); // char/4
|
|
134
|
+
const markup = capToTokens(big, budget, markupEstimate, MARKUP_CHARS_PER_TOKEN); // char/2.5
|
|
135
|
+
assert.ok(prose.truncated && markup.truncated, 'both truncate');
|
|
136
|
+
|
|
137
|
+
// The kept char budget reflects the divisor: markup keeps ~budget*2.5 chars,
|
|
138
|
+
// prose keeps ~budget*4 — so markup is trimmed more aggressively.
|
|
139
|
+
const keptChars = (r) => r.text.split('\n\n[... truncated')[0].length;
|
|
140
|
+
assert.ok(keptChars(markup) < keptChars(prose), `markup ${keptChars(markup)} < prose ${keptChars(prose)}`);
|
|
141
|
+
assert.strictEqual(keptChars(markup), Math.floor(budget * MARKUP_CHARS_PER_TOKEN));
|
|
142
|
+
assert.strictEqual(keptChars(prose), Math.floor(budget * 4));
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
test('capToTokens: prose path (no charsPerToken) is byte-for-byte unchanged', () => {
|
|
146
|
+
// The default divisor stays 4 — the prose path must not change.
|
|
147
|
+
const big = 'x'.repeat(40000);
|
|
148
|
+
const r = capToTokens(big, 1000, defaultEstimate);
|
|
149
|
+
assert.strictEqual(r.text.split('\n\n[... truncated')[0].length, 4000);
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
// ---------------------------------------------------------------------------
|
|
153
|
+
// Summary request builder — untrusted, data-only framing
|
|
154
|
+
// ---------------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
test('buildSummaryMessages: page text is fenced as data-only and never framed as instructions', () => {
|
|
157
|
+
const msgs = buildSummaryMessages(`some content with ${INJECTION}`, 'what are layout phases?');
|
|
158
|
+
assert.strictEqual(msgs.length, 2);
|
|
159
|
+
const sys = msgs[0].content;
|
|
160
|
+
const user = msgs[1].content;
|
|
161
|
+
// System prompt instructs the model to treat the block as data and ignore injections.
|
|
162
|
+
assert.match(sys, /DATA/);
|
|
163
|
+
assert.match(sys, /[Nn]ever (obey|follow|act)/);
|
|
164
|
+
// The injection text lives INSIDE the fenced untrusted block, not in the system role.
|
|
165
|
+
assert.ok(user.includes(FENCE_OPEN), 'content is fenced');
|
|
166
|
+
assert.ok(user.includes(INJECTION), 'injection carried as data');
|
|
167
|
+
assert.ok(!sys.includes(INJECTION), 'injection must not be in the system prompt');
|
|
168
|
+
// Intent threaded in.
|
|
169
|
+
assert.match(user, /layout phases/);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
test('summarizeWebContent: passes data-only messages to the injected chat and returns its text', async () => {
|
|
173
|
+
let seen = null;
|
|
174
|
+
const chat = async (messages) => { seen = messages; return ' SUMMARY: phases fire in order. '; };
|
|
175
|
+
const out = await summarizeWebContent({ markdown: `body ${INJECTION}`, intent: 'x', chat });
|
|
176
|
+
assert.strictEqual(out, 'SUMMARY: phases fire in order.');
|
|
177
|
+
// The summarizer received the injection only as fenced data.
|
|
178
|
+
assert.ok(seen[1].content.includes(INJECTION));
|
|
179
|
+
assert.match(seen[0].content, /DATA/);
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
test('summarizeWebContent: empty result throws (so the caller can fall back)', async () => {
|
|
183
|
+
await assert.rejects(() => summarizeWebContent({ markdown: 'x', chat: async () => ' ' }));
|
|
184
|
+
await assert.rejects(() => summarizeWebContent({ markdown: 'x', chat: null }));
|
|
185
|
+
});
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Integration tests for the http_get web-fetch pipeline (Task W.1). A real local
|
|
4
|
+
// HTTP server serves the fixture page; the executor runs the real extraction
|
|
5
|
+
// (Readability + Turndown) and an INJECTED mock summarizer (no real LLM). These
|
|
6
|
+
// prove the end-to-end contract: only the processed result enters context, the
|
|
7
|
+
// raw page never does, summarize on/off, untrusted handling, failure fallback,
|
|
8
|
+
// pass-through, and the token budget.
|
|
9
|
+
|
|
10
|
+
const os = require('node:os');
|
|
11
|
+
const fs = require('node:fs');
|
|
12
|
+
const path = require('node:path');
|
|
13
|
+
|
|
14
|
+
// Redirect home-based paths into a temp dir before any lib loads (audit log etc).
|
|
15
|
+
const TMP_HOME = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-web-home-'));
|
|
16
|
+
const PREV_HOME = process.env.HOME;
|
|
17
|
+
const PREV_USERPROFILE = process.env.USERPROFILE;
|
|
18
|
+
process.env.HOME = TMP_HOME;
|
|
19
|
+
process.env.USERPROFILE = TMP_HOME;
|
|
20
|
+
|
|
21
|
+
const { test, before, after } = require('node:test');
|
|
22
|
+
const assert = require('node:assert');
|
|
23
|
+
const http = require('node:http');
|
|
24
|
+
|
|
25
|
+
const ui = require('../lib/ui');
|
|
26
|
+
const { createPermissionManager } = require('../lib/permissions');
|
|
27
|
+
const { createToolExecutor } = require('../lib/tools');
|
|
28
|
+
const { defaultEstimate } = require('../lib/web-extract');
|
|
29
|
+
const { HTML, INJECTION, INJECTION_MARKER } = require('./fixtures/web-page');
|
|
30
|
+
|
|
31
|
+
let server;
|
|
32
|
+
let baseUrl;
|
|
33
|
+
|
|
34
|
+
// Start a fixture server: GET / → the HTML page; /api.json → JSON; /plain → text.
|
|
35
|
+
before(async () => {
|
|
36
|
+
server = http.createServer((req, res) => {
|
|
37
|
+
if (req.url.startsWith('/api.json')) {
|
|
38
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
39
|
+
res.end(JSON.stringify({ name: 'widget', tags: ['a', 'b'], note: '<b>not html</b>' }));
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
if (req.url.startsWith('/plain')) {
|
|
43
|
+
res.writeHead(200, { 'Content-Type': 'text/plain' });
|
|
44
|
+
res.end('plain line one\nplain line two');
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' });
|
|
48
|
+
res.end(HTML);
|
|
49
|
+
});
|
|
50
|
+
await new Promise((r) => server.listen(0, '127.0.0.1', r));
|
|
51
|
+
baseUrl = `http://127.0.0.1:${server.address().port}`;
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
after(async () => {
|
|
55
|
+
await new Promise((r) => server.close(r));
|
|
56
|
+
if (PREV_HOME === undefined) delete process.env.HOME; else process.env.HOME = PREV_HOME;
|
|
57
|
+
if (PREV_USERPROFILE === undefined) delete process.env.USERPROFILE; else process.env.USERPROFILE = PREV_USERPROFILE;
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
// Build an executor with a configurable web config + a recording mock summarizer.
|
|
61
|
+
function makeExec({ web, webChat } = {}) {
|
|
62
|
+
const pm = createPermissionManager(ui, {});
|
|
63
|
+
const getConfig = () => ({
|
|
64
|
+
max_file_size_kb: 512,
|
|
65
|
+
command_timeout_ms: 30000,
|
|
66
|
+
http_fetch_max_bytes: 262144,
|
|
67
|
+
web: web || { summarize: true, summary_model: '', max_content_tokens: 6000 },
|
|
68
|
+
});
|
|
69
|
+
return createToolExecutor(pm, ui, getConfig, { webChat });
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Invoke http_get the way the agent loop does: trailing { signal } options bag.
|
|
73
|
+
function fetchUrl(exec, url, callOpts = {}) {
|
|
74
|
+
return exec.agentExecFile('http_get', url, callOpts, { signal: null });
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
test('summarize ON: only the summary enters context; the raw page never does', async () => {
|
|
80
|
+
let calls = 0;
|
|
81
|
+
let summarizerSawInjection = false;
|
|
82
|
+
const webChat = async (messages) => {
|
|
83
|
+
calls += 1;
|
|
84
|
+
// The summarizer reads the page only as fenced data.
|
|
85
|
+
summarizerSawInjection = messages[1].content.includes(INJECTION_MARKER);
|
|
86
|
+
return 'SUMMARY: layout-phase handlers run synchronously in registration order; ctx.cancel() aborts.';
|
|
87
|
+
};
|
|
88
|
+
const exec = makeExec({ webChat });
|
|
89
|
+
const r = await fetchUrl(exec, `${baseUrl}/`, { intent: 'how do handlers run?' });
|
|
90
|
+
|
|
91
|
+
assert.strictEqual(calls, 1, 'secondary summarizer was called exactly once');
|
|
92
|
+
assert.strictEqual(r.summarized, true);
|
|
93
|
+
assert.strictEqual(r.kind, 'html');
|
|
94
|
+
assert.match(r.body, /SUMMARY:/);
|
|
95
|
+
// The summarizer received the page (incl. injection) as DATA.
|
|
96
|
+
assert.ok(summarizerSawInjection, 'summarizer received the page content');
|
|
97
|
+
// The RAW page never enters the returned body.
|
|
98
|
+
assert.ok(!/dataLayer|SPONSORED|<script|font-family/.test(r.body), 'no raw page markup in body');
|
|
99
|
+
assert.ok(!/included to give the article enough weight/.test(r.body), 'no extracted full text in body either');
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
test('TOKEN VOLUME: summarized result entering context is an order of magnitude smaller than raw HTML', async () => {
|
|
103
|
+
const webChat = async () => 'Handlers fire in registration order; ctx.cancel() aborts a transition. ~40 words of summary text here to be realistic about a real summary length for a docs page about layout phases and handlers.';
|
|
104
|
+
const exec = makeExec({ webChat });
|
|
105
|
+
const r = await fetchUrl(exec, `${baseUrl}/`);
|
|
106
|
+
const rawTokens = defaultEstimate(HTML);
|
|
107
|
+
const resultTokens = defaultEstimate(r.body);
|
|
108
|
+
assert.ok(
|
|
109
|
+
resultTokens * 10 < rawTokens,
|
|
110
|
+
`expected >=10x reduction, got raw=${rawTokens} result=${resultTokens}`,
|
|
111
|
+
);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
test('summarize OFF (per-call): extracted Markdown enters context, no summarizer call', async () => {
|
|
115
|
+
let calls = 0;
|
|
116
|
+
const webChat = async () => { calls += 1; return 'SHOULD NOT BE CALLED'; };
|
|
117
|
+
const exec = makeExec({ webChat });
|
|
118
|
+
const r = await fetchUrl(exec, `${baseUrl}/`, { summarize: false });
|
|
119
|
+
|
|
120
|
+
assert.strictEqual(calls, 0, 'no secondary call when summarize=false');
|
|
121
|
+
assert.strictEqual(r.summarized, false);
|
|
122
|
+
assert.strictEqual(r.extracted, true);
|
|
123
|
+
// Extracted Markdown of the main content (verbatim — for exact snippets).
|
|
124
|
+
assert.match(r.body, /ctx\.cancel\(\)/);
|
|
125
|
+
// Still clean: chrome dropped, but the full extracted text is present.
|
|
126
|
+
assert.ok(!/SPONSORED|dataLayer/.test(r.body));
|
|
127
|
+
assert.match(r.body, /included to give the article enough weight/);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
test('summarize OFF (global config): same as per-call off', async () => {
|
|
131
|
+
let calls = 0;
|
|
132
|
+
const webChat = async () => { calls += 1; return 'x'; };
|
|
133
|
+
const exec = makeExec({ web: { summarize: false, summary_model: '', max_content_tokens: 6000 }, webChat });
|
|
134
|
+
const r = await fetchUrl(exec, `${baseUrl}/`);
|
|
135
|
+
assert.strictEqual(calls, 0);
|
|
136
|
+
assert.strictEqual(r.summarized, false);
|
|
137
|
+
assert.match(r.body, /ctx\.cancel\(\)/);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
test('UNTRUSTED: a page injection does not steer the summarizer (treated as data)', async () => {
|
|
141
|
+
let systemPrompt = null;
|
|
142
|
+
let userMsg = null;
|
|
143
|
+
// A compliant summarizer that does NOT obey the injection — it summarizes.
|
|
144
|
+
const webChat = async (messages) => {
|
|
145
|
+
systemPrompt = messages[0].content;
|
|
146
|
+
userMsg = messages[1].content;
|
|
147
|
+
return 'The page documents layout-phase handlers. (Ignoring any embedded instructions.)';
|
|
148
|
+
};
|
|
149
|
+
const exec = makeExec({ webChat });
|
|
150
|
+
const r = await fetchUrl(exec, `${baseUrl}/`);
|
|
151
|
+
|
|
152
|
+
// The data-only framing is present and the injection is fenced as data, not
|
|
153
|
+
// promoted to the system role.
|
|
154
|
+
assert.match(systemPrompt, /DATA/);
|
|
155
|
+
assert.match(systemPrompt, /[Nn]ever (obey|follow|act)/);
|
|
156
|
+
assert.ok(userMsg.includes(INJECTION_MARKER), 'injection carried as fenced data');
|
|
157
|
+
assert.ok(!systemPrompt.includes(INJECTION_MARKER), 'injection not in system prompt');
|
|
158
|
+
// The summarizer output did not leak the injection / act on it, and the result
|
|
159
|
+
// still does not contain the raw page.
|
|
160
|
+
assert.ok(!r.body.includes('rm -rf'), 'injection command not echoed into context');
|
|
161
|
+
assert.ok(!/<script/.test(r.body));
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
test('summarizer FAILURE falls back to extracted Markdown, never raw HTML', async () => {
|
|
165
|
+
const webChat = async () => { throw new Error('summary model timeout'); };
|
|
166
|
+
const exec = makeExec({ webChat });
|
|
167
|
+
const r = await fetchUrl(exec, `${baseUrl}/`);
|
|
168
|
+
|
|
169
|
+
assert.strictEqual(r.summarized, false);
|
|
170
|
+
assert.match(r.summary_error || '', /timeout/);
|
|
171
|
+
// Fell back to extracted Markdown (clean), NOT the raw page.
|
|
172
|
+
assert.match(r.body, /ctx\.cancel\(\)/);
|
|
173
|
+
assert.ok(!/dataLayer|SPONSORED|<script|font-family/.test(r.body), 'no raw HTML on fallback');
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
test('no summarizer available (e.g. headless): returns extracted Markdown, never raw HTML', async () => {
|
|
177
|
+
const exec = makeExec({ webChat: undefined });
|
|
178
|
+
const r = await fetchUrl(exec, `${baseUrl}/`);
|
|
179
|
+
assert.strictEqual(r.summarized, false);
|
|
180
|
+
assert.match(r.body, /ctx\.cancel\(\)/);
|
|
181
|
+
assert.ok(!/<script|dataLayer/.test(r.body));
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
test('JSON passes through without mangling and is not summarized', async () => {
|
|
185
|
+
let calls = 0;
|
|
186
|
+
const webChat = async () => { calls += 1; return 'x'; };
|
|
187
|
+
const exec = makeExec({ webChat });
|
|
188
|
+
const r = await fetchUrl(exec, `${baseUrl}/api.json`);
|
|
189
|
+
assert.strictEqual(r.kind, 'json');
|
|
190
|
+
assert.strictEqual(r.summarized, false);
|
|
191
|
+
assert.strictEqual(calls, 0, 'JSON is not sent to the summarizer');
|
|
192
|
+
const parsed = JSON.parse(r.body);
|
|
193
|
+
assert.strictEqual(parsed.name, 'widget');
|
|
194
|
+
assert.strictEqual(parsed.note, '<b>not html</b>');
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
test('plain text passes through without mangling', async () => {
|
|
198
|
+
const exec = makeExec({ webChat: async () => 'x' });
|
|
199
|
+
const r = await fetchUrl(exec, `${baseUrl}/plain`);
|
|
200
|
+
assert.strictEqual(r.kind, 'text');
|
|
201
|
+
assert.strictEqual(r.body, 'plain line one\nplain line two');
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
test('token budget caps oversized extracted content with a notice', async () => {
|
|
205
|
+
// Tiny budget forces truncation of the extracted Markdown; summarize off so we
|
|
206
|
+
// observe the capped content directly.
|
|
207
|
+
const exec = makeExec({ web: { summarize: false, summary_model: '', max_content_tokens: 300 } });
|
|
208
|
+
const r = await fetchUrl(exec, `${baseUrl}/`, { summarize: false });
|
|
209
|
+
assert.strictEqual(r.content_truncated, true);
|
|
210
|
+
assert.match(r.body, /\[\.\.\. truncated/);
|
|
211
|
+
assert.ok(defaultEstimate(r.body) <= 300 + 60, 'capped near the token budget');
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
// ---------------------------------------------------------------------------
|
|
215
|
+
// Mode enum (Task W.1b): raw / extracted / summarized end-to-end
|
|
216
|
+
// ---------------------------------------------------------------------------
|
|
217
|
+
|
|
218
|
+
test('mode="raw" REGRESSION: original HTML reaches context (markup intact), no summarizer call', async () => {
|
|
219
|
+
let calls = 0;
|
|
220
|
+
// A generous budget so the WHOLE fixture is retained — this test isolates
|
|
221
|
+
// "raw returns the original markup" from the token cap (covered just below).
|
|
222
|
+
// The default 6000-token budget now trims the fixture's tail under the
|
|
223
|
+
// markup-aware estimate (Task W.4 Part 2), so SPONSORED (deep in the body)
|
|
224
|
+
// would otherwise be capped away.
|
|
225
|
+
const exec = makeExec({ web: { summarize: true, summary_model: '', max_content_tokens: 100000 }, webChat: async () => { calls += 1; return 'NOPE'; } });
|
|
226
|
+
const r = await fetchUrl(exec, `${baseUrl}/`, { mode: 'raw' });
|
|
227
|
+
assert.strictEqual(calls, 0, 'raw never summarizes');
|
|
228
|
+
assert.strictEqual(r.mode, 'raw');
|
|
229
|
+
assert.strictEqual(r.summarized, false);
|
|
230
|
+
assert.strictEqual(r.extracted, false);
|
|
231
|
+
assert.strictEqual(r.kind, 'html');
|
|
232
|
+
// The things extraction destroys are present — raw HTML access is restored.
|
|
233
|
+
assert.match(r.body, /<script/);
|
|
234
|
+
assert.match(r.body, /<style/);
|
|
235
|
+
assert.match(r.body, /font-family/);
|
|
236
|
+
assert.match(r.body, /dataLayer/);
|
|
237
|
+
assert.match(r.body, /SPONSORED/);
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
test('mode="raw" still applies the token budget (raw HTML is heavier, capped)', async () => {
|
|
241
|
+
const exec = makeExec({ web: { summarize: true, summary_model: '', max_content_tokens: 300 } });
|
|
242
|
+
const r = await fetchUrl(exec, `${baseUrl}/`, { mode: 'raw' });
|
|
243
|
+
assert.strictEqual(r.content_truncated, true);
|
|
244
|
+
assert.match(r.body, /\[\.\.\. truncated/);
|
|
245
|
+
assert.ok(defaultEstimate(r.body) <= 300 + 60, 'capped near the token budget');
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
test('mode="extracted" == legacy summarize=false (Markdown, no summary)', async () => {
|
|
249
|
+
let calls = 0;
|
|
250
|
+
const exec = makeExec({ webChat: async () => { calls += 1; return 'x'; } });
|
|
251
|
+
const a = await fetchUrl(exec, `${baseUrl}/`, { mode: 'extracted' });
|
|
252
|
+
const b = await fetchUrl(exec, `${baseUrl}/`, { summarize: false });
|
|
253
|
+
assert.strictEqual(calls, 0);
|
|
254
|
+
assert.strictEqual(a.summarized, false);
|
|
255
|
+
assert.strictEqual(a.extracted, true);
|
|
256
|
+
assert.strictEqual(b.summarized, false);
|
|
257
|
+
assert.strictEqual(a.body, b.body, 'extracted mode and legacy summarize=false are identical');
|
|
258
|
+
assert.match(a.body, /ctx\.cancel\(\)/);
|
|
259
|
+
assert.ok(!/<script|SPONSORED/.test(a.body));
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
test('mode="summarized" == the default behavior', async () => {
|
|
263
|
+
const webChat = async () => 'SUMMARY: handlers fire in registration order.';
|
|
264
|
+
const exec = makeExec({ webChat });
|
|
265
|
+
const a = await fetchUrl(exec, `${baseUrl}/`, { mode: 'summarized' });
|
|
266
|
+
const b = await fetchUrl(exec, `${baseUrl}/`); // default
|
|
267
|
+
assert.strictEqual(a.summarized, true);
|
|
268
|
+
assert.strictEqual(b.summarized, true);
|
|
269
|
+
assert.match(a.body, /SUMMARY:/);
|
|
270
|
+
assert.strictEqual(a.body, b.body);
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
test('back-compat: legacy raw="true" resolves to extracted (NOT raw HTML)', async () => {
|
|
274
|
+
const exec = makeExec({ webChat: async () => 'x' });
|
|
275
|
+
const r = await fetchUrl(exec, `${baseUrl}/`, { raw: true });
|
|
276
|
+
// The deprecated raw=true alias still means "extracted Markdown", as in W.1.
|
|
277
|
+
assert.notStrictEqual(r.mode, 'raw');
|
|
278
|
+
assert.strictEqual(r.summarized, false);
|
|
279
|
+
assert.strictEqual(r.extracted, true);
|
|
280
|
+
assert.match(r.body, /ctx\.cancel\(\)/);
|
|
281
|
+
assert.ok(!/<script|font-family/.test(r.body), 'raw=true alias does NOT leak HTML');
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
test('precedence: explicit mode="raw" beats a legacy summarize=true', async () => {
|
|
285
|
+
let calls = 0;
|
|
286
|
+
const exec = makeExec({ webChat: async () => { calls += 1; return 'x'; } });
|
|
287
|
+
const r = await fetchUrl(exec, `${baseUrl}/`, { mode: 'raw', summarize: true });
|
|
288
|
+
assert.strictEqual(calls, 0);
|
|
289
|
+
assert.strictEqual(r.mode, 'raw');
|
|
290
|
+
assert.match(r.body, /<script/);
|
|
291
|
+
});
|