@semalt-ai/code 1.8.5 → 1.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -1
- package/.github/workflows/ci.yml +69 -0
- package/ARCHITECTURE.md +6 -95
- package/CLAUDE.md +196 -316
- package/README.md +148 -4
- package/docs/ARCHITECTURE.md +1321 -0
- package/docs/CONFIG.md +340 -0
- package/docs/HISTORY.md +245 -0
- package/examples/embed.js +74 -0
- package/index.js +251 -10
- package/lib/agent.js +856 -120
- package/lib/api.js +239 -50
- package/lib/args.js +74 -2
- package/lib/audit.js +23 -1
- package/lib/background.js +584 -0
- package/lib/checkpoints.js +757 -0
- package/lib/commands/auth.js +94 -0
- package/lib/commands/chat-session.js +489 -0
- package/lib/commands/chat-slash.js +415 -0
- package/lib/commands/chat-turn.js +669 -0
- package/lib/commands/chat.js +407 -0
- package/lib/commands/custom.js +157 -0
- package/lib/commands/history-utils.js +66 -0
- package/lib/commands/index.js +268 -0
- package/lib/commands/mcp.js +113 -0
- package/lib/commands/oneshot.js +193 -0
- package/lib/commands/registry.js +269 -0
- package/lib/commands/tasks.js +89 -0
- package/lib/compact.js +87 -0
- package/lib/config.js +360 -11
- package/lib/constants.js +401 -3
- package/lib/deny.js +199 -0
- package/lib/doctor.js +160 -0
- package/lib/headless.js +202 -0
- package/lib/hooks.js +286 -0
- package/lib/images.js +270 -0
- package/lib/internals.js +49 -0
- package/lib/mcp/boundary.js +131 -0
- package/lib/mcp/client.js +270 -0
- package/lib/mcp/oauth.js +134 -0
- package/lib/memory.js +209 -0
- package/lib/metrics.js +37 -2
- package/lib/payload.js +54 -0
- package/lib/permission-rules.js +401 -0
- package/lib/permissions.js +123 -26
- package/lib/pricing.js +67 -0
- package/lib/proc.js +62 -0
- package/lib/prompts.js +99 -8
- package/lib/sandbox.js +568 -0
- package/lib/sdk.js +328 -0
- package/lib/secrets.js +211 -0
- package/lib/skills.js +223 -0
- package/lib/subagents.js +516 -0
- package/lib/tool_registry.js +2862 -0
- package/lib/tool_specs.js +263 -9
- package/lib/tools.js +352 -1039
- package/lib/ui/anim.js +86 -0
- package/lib/ui/ansi.js +17 -27
- package/lib/ui/chat-history.js +253 -71
- package/lib/ui/create-ui.js +67 -24
- package/lib/ui/diff.js +90 -25
- package/lib/ui/file-activity.js +236 -0
- package/lib/ui/format.js +195 -29
- package/lib/ui/input-field.js +21 -11
- package/lib/ui/md-stream.js +234 -0
- package/lib/ui/render-operation.js +113 -0
- package/lib/ui/select.js +1 -4
- package/lib/ui/status-bar.js +146 -36
- package/lib/ui/stream.js +20 -13
- package/lib/ui/theme.js +190 -44
- package/lib/ui/tool-operation.js +190 -0
- package/lib/ui/utils.js +9 -5
- package/lib/ui/web-activity.js +270 -0
- package/lib/ui/writer.js +159 -45
- package/lib/ui.js +1 -1
- package/lib/verify.js +229 -0
- package/lib/web-extract.js +213 -0
- package/lib/web-summarize.js +68 -0
- package/package.json +19 -4
- package/scripts/lint.js +57 -0
- package/test/agent-loop.test.js +389 -0
- package/test/anim-driver.test.js +153 -0
- package/test/ask-user-display.test.js +226 -0
- package/test/ask-user-gate.test.js +231 -0
- package/test/background.test.js +414 -0
- package/test/chat-history-nocolor.test.js +155 -0
- package/test/chat-relogin.test.js +207 -0
- package/test/chat.test.js +114 -0
- package/test/checkpoints-agent.test.js +181 -0
- package/test/checkpoints.test.js +650 -0
- package/test/command-registry.test.js +160 -0
- package/test/compact.test.js +116 -0
- package/test/completion-lazy.test.js +52 -0
- package/test/config-merge.test.js +324 -0
- package/test/config-quarantine.test.js +128 -0
- package/test/config-write-guard-allow-anywhere.test.js +56 -0
- package/test/config-write-guard-skip.test.js +46 -0
- package/test/config-write-guard.test.js +153 -0
- package/test/context-split.test.js +215 -0
- package/test/cost-doctor.test.js +142 -0
- package/test/custom-commands-chat.test.js +106 -0
- package/test/custom-commands.test.js +230 -0
- package/test/defer-detail-band.test.js +403 -0
- package/test/deny-windows.test.js +120 -0
- package/test/deny.test.js +83 -0
- package/test/detail-band-tab-flatten.test.js +242 -0
- package/test/download-allow-anywhere.test.js +66 -0
- package/test/download-confine.test.js +153 -0
- package/test/exec-diff.test.js +268 -0
- package/test/executors.test.js +599 -0
- package/test/extract-tool-calls.test.js +349 -0
- package/test/fetch-url-validation.test.js +219 -0
- package/test/file-activity.test.js +522 -0
- package/test/fixtures/tool-calls.js +57 -0
- package/test/fixtures/web-page.js +91 -0
- package/test/git-tools.test.js +384 -0
- package/test/grep-glob-serialize.test.js +242 -0
- package/test/grep-glob.test.js +268 -0
- package/test/grep-path-target.test.js +227 -0
- package/test/harness/README.md +57 -0
- package/test/harness/chat-harness.js +143 -0
- package/test/harness/memwarn-headless-child.js +65 -0
- package/test/harness/mock-llm.js +120 -0
- package/test/harness/mock-mcp-server.js +142 -0
- package/test/harness/sse-server.js +69 -0
- package/test/headless.test.js +348 -0
- package/test/history-utils.test.js +88 -0
- package/test/hooks-agent.test.js +238 -0
- package/test/hooks-verify-sandbox.test.js +232 -0
- package/test/hooks.test.js +216 -0
- package/test/http-get-user-agent.test.js +142 -0
- package/test/images-api.test.js +208 -0
- package/test/images.test.js +238 -0
- package/test/input-field-ctrl-o.test.js +37 -0
- package/test/live-height-physical.test.js +281 -0
- package/test/max-iterations.test.js +218 -0
- package/test/mcp-boundary.test.js +57 -0
- package/test/mcp-client.test.js +267 -0
- package/test/mcp-oauth.test.js +86 -0
- package/test/md-stream.test.js +183 -0
- package/test/memory-truncation-warning.test.js +222 -0
- package/test/memory.test.js +198 -0
- package/test/native-dispatch.test.js +409 -0
- package/test/native-live-narration.test.js +254 -0
- package/test/output-chokepoint.test.js +188 -0
- package/test/output-heredoc-leak.test.js +195 -0
- package/test/output-preview.test.js +245 -0
- package/test/path-guards.test.js +134 -0
- package/test/payload.test.js +99 -0
- package/test/permission-rules-agent.test.js +210 -0
- package/test/permission-rules.test.js +297 -0
- package/test/permissions.test.js +362 -0
- package/test/plan-mode.test.js +167 -0
- package/test/read-paginate.test.js +275 -0
- package/test/readonly-tools.test.js +177 -0
- package/test/render-operation.test.js +317 -0
- package/test/replay-descriptor-xml.test.js +216 -0
- package/test/replay-descriptor.test.js +189 -0
- package/test/replay-web-aggregate.test.js +291 -0
- package/test/replay-web-persist.test.js +241 -0
- package/test/result-cap.test.js +233 -0
- package/test/running-glyph-anim.test.js +111 -0
- package/test/sandbox-agent.test.js +147 -0
- package/test/sandbox-integration.test.js +216 -0
- package/test/sandbox.test.js +408 -0
- package/test/sdk.test.js +234 -0
- package/test/shell-output-cap.test.js +181 -0
- package/test/skills-chat.test.js +110 -0
- package/test/skills.test.js +295 -0
- package/test/smoke.test.js +68 -0
- package/test/status-bar-driver.test.js +93 -0
- package/test/status-bar-pause.test.js +164 -0
- package/test/status-bar-resync.test.js +188 -0
- package/test/stream-parser.test.js +171 -0
- package/test/subagents-agent.test.js +178 -0
- package/test/subagents.test.js +222 -0
- package/test/theme-palette.test.js +166 -0
- package/test/tool-registry.test.js +85 -0
- package/test/trim-budget.test.js +101 -0
- package/test/truncate-visible.test.js +78 -0
- package/test/verify-agent.test.js +317 -0
- package/test/verify.test.js +141 -0
- package/test/view-image.test.js +199 -0
- package/test/web-activity-ordering.test.js +203 -0
- package/test/web-activity.test.js +207 -0
- package/test/web-data-extraction-guidance.test.js +71 -0
- package/test/web-extract.test.js +185 -0
- package/test/web-fetch-agent.test.js +291 -0
- package/test/web-fetch-mode.test.js +193 -0
- package/test/web-search.test.js +380 -0
- package/lib/commands.js +0 -1438
- package/path +0 -1
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Task W.5 — grep/glob result serialization + output modes + head_limit.
|
|
4
|
+
//
|
|
5
|
+
// THE BUG these tests pin: formatFileResult had no `grep`/`glob` case, so both
|
|
6
|
+
// fell through to the default and the model received "grep: done" / "glob: done"
|
|
7
|
+
// — the engine computed the matches (and the UI even printed a count) but the
|
|
8
|
+
// STRUCTURED RESULT WAS DROPPED before it entered context. These tests assert
|
|
9
|
+
// what the MODEL receives (the audit's empirical method), not just what the
|
|
10
|
+
// engine returns: the pure serializers AND the real agent loop feeding the tool
|
|
11
|
+
// result back to the model.
|
|
12
|
+
|
|
13
|
+
const { test, before, after } = require('node:test');
|
|
14
|
+
const assert = require('node:assert');
|
|
15
|
+
const fs = require('fs');
|
|
16
|
+
const os = require('os');
|
|
17
|
+
const path = require('path');
|
|
18
|
+
|
|
19
|
+
const ui = require('../lib/ui');
|
|
20
|
+
const { createApiClient } = require('../lib/api');
|
|
21
|
+
const { createToolExecutor, extractToolCalls } = require('../lib/tools');
|
|
22
|
+
const { createPermissionManager } = require('../lib/permissions');
|
|
23
|
+
const { createAgentRunner, formatGrepResult, formatGlobResult } = require('../lib/agent');
|
|
24
|
+
const { startMockLLM } = require('./harness/mock-llm');
|
|
25
|
+
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// Part A — pure model-facing serialization (formatGrepResult / formatGlobResult)
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
function grepResult(matches, extra = {}) {
|
|
31
|
+
return { matches, pattern: 'TODO', count: matches.length, output_mode: 'content', ...extra };
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
test('content mode (default): serializes file:line:text per match — NOT "grep: done"', () => {
|
|
35
|
+
const out = formatGrepResult(grepResult([
|
|
36
|
+
{ file: 'a.js', line: 3, text: ' // TODO fix' },
|
|
37
|
+
{ file: 'b.js', line: 9, text: 'x = 1 // TODO' },
|
|
38
|
+
]));
|
|
39
|
+
assert.match(out, /a\.js:3: \/\/ TODO fix/);
|
|
40
|
+
assert.match(out, /b\.js:9:x = 1 \/\/ TODO/);
|
|
41
|
+
assert.doesNotMatch(out, /grep: done/);
|
|
42
|
+
assert.match(out, /2 match\(es\)/);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test('count mode: per-file + total counts, NO line text', () => {
|
|
46
|
+
const out = formatGrepResult(grepResult([
|
|
47
|
+
{ file: 'a.js', line: 3, text: 'TODO one' },
|
|
48
|
+
{ file: 'a.js', line: 7, text: 'TODO two' },
|
|
49
|
+
{ file: 'b.js', line: 1, text: 'TODO three' },
|
|
50
|
+
], { output_mode: 'count' }));
|
|
51
|
+
assert.match(out, /3 match\(es\) in 2 file\(s\)/);
|
|
52
|
+
assert.match(out, /a\.js: 2/);
|
|
53
|
+
assert.match(out, /b\.js: 1/);
|
|
54
|
+
// count mode must not leak the line content
|
|
55
|
+
assert.doesNotMatch(out, /TODO one/);
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
test('files_with_matches mode: unique paths only, no line content', () => {
|
|
59
|
+
const out = formatGrepResult(grepResult([
|
|
60
|
+
{ file: 'a.js', line: 3, text: 'TODO one' },
|
|
61
|
+
{ file: 'a.js', line: 7, text: 'TODO two' },
|
|
62
|
+
{ file: 'b.js', line: 1, text: 'TODO three' },
|
|
63
|
+
], { output_mode: 'files_with_matches' }));
|
|
64
|
+
assert.match(out, /2 file\(s\) with matches/);
|
|
65
|
+
assert.match(out, /^a\.js$/m);
|
|
66
|
+
assert.match(out, /^b\.js$/m);
|
|
67
|
+
// no per-line text and no file:line:text references (header echoes the pattern,
|
|
68
|
+
// so we check for line-content shape, not the literal pattern string)
|
|
69
|
+
assert.doesNotMatch(out, /:\d+:/);
|
|
70
|
+
assert.doesNotMatch(out, /TODO (one|two|three)/);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
test('head_limit BOUNDS context (content): over-limit shows only N + truncation notice', () => {
|
|
74
|
+
const matches = [];
|
|
75
|
+
for (let i = 1; i <= 50; i++) matches.push({ file: 'f.js', line: i, text: `TODO ${i}` });
|
|
76
|
+
const out = formatGrepResult(grepResult(matches, { head_limit: 10 }));
|
|
77
|
+
// exactly 10 file:line:text lines serialized
|
|
78
|
+
const shownLines = out.split('\n').filter((l) => /^f\.js:\d+:/.test(l));
|
|
79
|
+
assert.strictEqual(shownLines.length, 10, 'serialized lines bounded to head_limit');
|
|
80
|
+
assert.match(out, /40 more match\(es\) not shown/);
|
|
81
|
+
assert.match(out, /head_limit/);
|
|
82
|
+
// the full count is still reported honestly
|
|
83
|
+
assert.match(out, /50 match\(es\)/);
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
test('head_limit PAIRED POSITIVE: under-limit shows all with NO truncation notice', () => {
|
|
87
|
+
const matches = [];
|
|
88
|
+
for (let i = 1; i <= 5; i++) matches.push({ file: 'f.js', line: i, text: `TODO ${i}` });
|
|
89
|
+
const out = formatGrepResult(grepResult(matches, { head_limit: 10 }));
|
|
90
|
+
const shownLines = out.split('\n').filter((l) => /^f\.js:\d+:/.test(l));
|
|
91
|
+
assert.strictEqual(shownLines.length, 5, 'all matches shown');
|
|
92
|
+
assert.doesNotMatch(out, /more match\(es\) not shown/);
|
|
93
|
+
assert.doesNotMatch(out, /not shown/);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
test('grep no-match → clear no-matches line, not "done"', () => {
|
|
97
|
+
const out = formatGrepResult(grepResult([]));
|
|
98
|
+
assert.match(out, /no matches/);
|
|
99
|
+
assert.doesNotMatch(out, /grep: done/);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
test('glob: serializes the file list — NOT "glob: done"', () => {
|
|
103
|
+
const out = formatGlobResult({ files: [{ path: 'a.ts' }, { path: 'src/b.ts' }], pattern: '*.ts', count: 2, head_limit: 100 });
|
|
104
|
+
assert.match(out, /^a\.ts$/m);
|
|
105
|
+
assert.match(out, /^src\/b\.ts$/m);
|
|
106
|
+
assert.doesNotMatch(out, /glob: done/);
|
|
107
|
+
assert.match(out, /2 file\(s\)/);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
test('glob head_limit bounds + paired under-limit no notice', () => {
|
|
111
|
+
const many = [];
|
|
112
|
+
for (let i = 0; i < 40; i++) many.push({ path: `f${i}.ts` });
|
|
113
|
+
const bounded = formatGlobResult({ files: many, pattern: '*.ts', count: 40, head_limit: 10 });
|
|
114
|
+
const shown = bounded.split('\n').filter((l) => /^f\d+\.ts$/.test(l));
|
|
115
|
+
assert.strictEqual(shown.length, 10);
|
|
116
|
+
assert.match(bounded, /30 more file\(s\) not shown/);
|
|
117
|
+
|
|
118
|
+
const few = [{ path: 'a.ts' }, { path: 'b.ts' }];
|
|
119
|
+
const full = formatGlobResult({ files: few, pattern: '*.ts', count: 2, head_limit: 10 });
|
|
120
|
+
assert.doesNotMatch(full, /not shown/);
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
test('spec + prompt accurately describe the modes and what is returned', () => {
|
|
124
|
+
const { TOOL_SPECS } = require('../lib/tool_specs');
|
|
125
|
+
const { TOOL_TAG_SPECS: TAG_DOCS } = require('../lib/prompts');
|
|
126
|
+
// tool_specs grep: documents all three modes + head_limit, and the modes are
|
|
127
|
+
// model-selectable (enum present).
|
|
128
|
+
const grepDesc = TOOL_SPECS.grep.description;
|
|
129
|
+
for (const m of ['content', 'files_with_matches', 'count']) assert.ok(grepDesc.includes(m), `grep desc mentions ${m}`);
|
|
130
|
+
assert.ok(grepDesc.includes('head_limit'), 'grep desc mentions head_limit');
|
|
131
|
+
assert.deepStrictEqual(TOOL_SPECS.grep.parameters.properties.output_mode.enum, ['content', 'files_with_matches', 'count']);
|
|
132
|
+
assert.ok(TOOL_SPECS.grep.parameters.properties.head_limit, 'grep spec exposes head_limit');
|
|
133
|
+
assert.ok(TOOL_SPECS.glob.parameters.properties.head_limit, 'glob spec exposes head_limit');
|
|
134
|
+
// prompts.js no longer lies: grep purpose advertises the real serialized shape.
|
|
135
|
+
if (TAG_DOCS && TAG_DOCS.grep) {
|
|
136
|
+
assert.ok(/file:line:text/.test(TAG_DOCS.grep.purpose), 'prompt grep purpose names the real return shape');
|
|
137
|
+
assert.ok(/output_mode/.test(TAG_DOCS.grep.purpose));
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
// ---------------------------------------------------------------------------
|
|
142
|
+
// Part B — end-to-end through the REAL agent loop (the regression):
|
|
143
|
+
// a <grep>/<glob> tag must feed the actual matches back to the model.
|
|
144
|
+
// ---------------------------------------------------------------------------
|
|
145
|
+
|
|
146
|
+
let prevKey;
|
|
147
|
+
let prevCwd;
|
|
148
|
+
let tmpDir;
|
|
149
|
+
|
|
150
|
+
before(() => {
|
|
151
|
+
prevKey = process.env.SEMALT_API_KEY;
|
|
152
|
+
process.env.SEMALT_API_KEY = 'test-key';
|
|
153
|
+
prevCwd = process.cwd();
|
|
154
|
+
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-w5-'));
|
|
155
|
+
fs.writeFileSync(path.join(tmpDir, 'alpha.js'), 'line one\nconst NEEDLE = 1; // here\nplain\n');
|
|
156
|
+
fs.writeFileSync(path.join(tmpDir, 'beta.js'), 'no match here\nanother NEEDLE token\n');
|
|
157
|
+
fs.writeFileSync(path.join(tmpDir, 'gamma.ts'), 'irrelevant\n');
|
|
158
|
+
process.chdir(tmpDir);
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
after(() => {
|
|
162
|
+
process.chdir(prevCwd);
|
|
163
|
+
if (prevKey === undefined) delete process.env.SEMALT_API_KEY;
|
|
164
|
+
else process.env.SEMALT_API_KEY = prevKey;
|
|
165
|
+
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
function buildRunner(base) {
|
|
169
|
+
const config = {
|
|
170
|
+
api_base: base, api_key: 'test-key', default_model: 'test-model',
|
|
171
|
+
temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
|
|
172
|
+
sandbox: { mode: 'off' },
|
|
173
|
+
};
|
|
174
|
+
const getConfig = () => config;
|
|
175
|
+
const saveConfig = (c) => Object.assign(config, c);
|
|
176
|
+
const api = createApiClient({ getConfig, saveConfig, ui });
|
|
177
|
+
const pm = createPermissionManager(ui, { skipPermissions: true });
|
|
178
|
+
pm.setUICallbacks({ onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {} });
|
|
179
|
+
const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, getConfig);
|
|
180
|
+
const runner = createAgentRunner({
|
|
181
|
+
chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
|
|
182
|
+
describePermission, permissionManager: pm, ui, getConfig,
|
|
183
|
+
});
|
|
184
|
+
return runner;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
async function runOneToolTurn(toolTag) {
|
|
188
|
+
const mock = await startMockLLM();
|
|
189
|
+
mock.replyWith(toolTag);
|
|
190
|
+
mock.replyWith('Done.');
|
|
191
|
+
try {
|
|
192
|
+
const runner = buildRunner(mock.base);
|
|
193
|
+
const tools = [];
|
|
194
|
+
const cb = {
|
|
195
|
+
onToken: () => {}, onToolStart: () => {},
|
|
196
|
+
onToolEnd: (tag, result) => tools.push({ tag, result }),
|
|
197
|
+
onError: () => {}, onRetry: () => {}, onAssistantMessage: () => {},
|
|
198
|
+
};
|
|
199
|
+
const messages = [{ role: 'user', content: 'go' }];
|
|
200
|
+
await runner.runAgentLoop(messages, 'test-model', 10, null, { callbacks: cb });
|
|
201
|
+
const fedBack = messages.find((m) => m.role === 'user' && /Tool execution results/.test(m.content));
|
|
202
|
+
return { tools, fedBack };
|
|
203
|
+
} finally {
|
|
204
|
+
await mock.close?.();
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
test('REGRESSION: real loop — grep feeds actual matches (file:line:text) to the model, not "grep: done"', async () => {
|
|
209
|
+
const { tools, fedBack } = await runOneToolTurn('<grep pattern="NEEDLE"/>');
|
|
210
|
+
assert.strictEqual(tools.length, 1);
|
|
211
|
+
const result = tools[0].result;
|
|
212
|
+
assert.doesNotMatch(result, /grep: done/, 'the W.5 bug (undelivered result) must be closed');
|
|
213
|
+
assert.match(result, /alpha\.js:2:.*NEEDLE/);
|
|
214
|
+
assert.match(result, /beta\.js:2:.*NEEDLE/);
|
|
215
|
+
// and it actually reached the messages array the model sees next turn
|
|
216
|
+
assert.ok(fedBack && /NEEDLE/.test(fedBack.content), 'matches present in the fed-back tool message');
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
test('REGRESSION: real loop — glob feeds the file list to the model, not "glob: done"', async () => {
|
|
220
|
+
const { tools } = await runOneToolTurn('<glob pattern="*.js"/>');
|
|
221
|
+
const result = tools[0].result;
|
|
222
|
+
assert.doesNotMatch(result, /glob: done/);
|
|
223
|
+
assert.match(result, /alpha\.js/);
|
|
224
|
+
assert.match(result, /beta\.js/);
|
|
225
|
+
assert.doesNotMatch(result, /gamma\.ts/); // *.js glob excludes the .ts file
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
test('real loop — grep output_mode="count" yields counts, no line text', async () => {
|
|
229
|
+
const { tools } = await runOneToolTurn('<grep pattern="NEEDLE" output_mode="count"/>');
|
|
230
|
+
const result = tools[0].result;
|
|
231
|
+
assert.match(result, /2 match\(es\) in 2 file\(s\)/);
|
|
232
|
+
assert.match(result, /alpha\.js: 1/);
|
|
233
|
+
assert.doesNotMatch(result, /const NEEDLE/); // no source line text in count mode
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
test('real loop — head_limit threads end-to-end and bounds the model-facing result', async () => {
|
|
237
|
+
const { tools } = await runOneToolTurn('<grep pattern="NEEDLE" head_limit="1"/>');
|
|
238
|
+
const result = tools[0].result;
|
|
239
|
+
const shown = result.split('\n').filter((l) => /\.js:\d+:/.test(l));
|
|
240
|
+
assert.strictEqual(shown.length, 1, 'bounded to head_limit=1');
|
|
241
|
+
assert.match(result, /1 more match\(es\) not shown/);
|
|
242
|
+
});
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Characterization tests for the first-class `grep` and `glob` tools (Task 2.1).
|
|
4
|
+
// Written tests-first. All fs work is isolated under a temp $HOME (so the audit
|
|
5
|
+
// log resolves there) and a temp working directory (so isPathSafe permits the
|
|
6
|
+
// search root and grep/glob confine to it).
|
|
7
|
+
//
|
|
8
|
+
// The central guarantee proven here: grep produces BYTE-IDENTICAL output whether
|
|
9
|
+
// it runs through ripgrep (`rg`) or the pure-Node fallback. The two engines are
|
|
10
|
+
// exercised explicitly via the exported `_grepSearch({ engine })` seam and their
|
|
11
|
+
// results are deep-compared. The rg branch is skipped (visibly, not silently)
|
|
12
|
+
// when ripgrep is not on PATH.
|
|
13
|
+
|
|
14
|
+
const os = require('node:os');
|
|
15
|
+
const fs = require('node:fs');
|
|
16
|
+
const path = require('node:path');
|
|
17
|
+
|
|
18
|
+
// Redirect home-based paths (audit log) into a temp dir BEFORE any lib module is
|
|
19
|
+
// required — those paths are computed at module load.
|
|
20
|
+
const TMP_HOME = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-home-'));
|
|
21
|
+
const PREV_HOME = process.env.HOME;
|
|
22
|
+
const PREV_USERPROFILE = process.env.USERPROFILE;
|
|
23
|
+
process.env.HOME = TMP_HOME;
|
|
24
|
+
process.env.USERPROFILE = TMP_HOME;
|
|
25
|
+
|
|
26
|
+
const { test, before, after } = require('node:test');
|
|
27
|
+
const assert = require('node:assert');
|
|
28
|
+
|
|
29
|
+
const ui = require('../lib/ui');
|
|
30
|
+
const { createPermissionManager } = require('../lib/permissions');
|
|
31
|
+
const { createToolExecutor } = require('../lib/tools');
|
|
32
|
+
const {
|
|
33
|
+
fromInvoke,
|
|
34
|
+
registryToolNames,
|
|
35
|
+
_grepSearch,
|
|
36
|
+
_globSearch,
|
|
37
|
+
_detectRipgrep,
|
|
38
|
+
} = require('../lib/tool_registry');
|
|
39
|
+
const { extractToolCalls } = require('../lib/tools');
|
|
40
|
+
|
|
41
|
+
const HAVE_RG = !!_detectRipgrep();
|
|
42
|
+
|
|
43
|
+
let exec;
|
|
44
|
+
let CWD;
|
|
45
|
+
let PREV_CWD;
|
|
46
|
+
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
// Fixture tree (mirrors the rg empirical probe used to pin canonical semantics)
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
function writeFixture(root) {
|
|
52
|
+
const w = (rel, data) => {
|
|
53
|
+
const p = path.join(root, rel);
|
|
54
|
+
fs.mkdirSync(path.dirname(p), { recursive: true });
|
|
55
|
+
fs.writeFileSync(p, data);
|
|
56
|
+
};
|
|
57
|
+
w('a.txt', 'hello TODO world\nsecond line\nTODO again\n');
|
|
58
|
+
w('b.txt', 'nothing here\n');
|
|
59
|
+
w('sub/c.txt', 'deep TODO\n');
|
|
60
|
+
w('f.md', 'TODO md\n');
|
|
61
|
+
w('bin.dat', Buffer.from('TODO\x00binary\n', 'binary')); // NUL → binary, must be skipped
|
|
62
|
+
w('node_modules/d.txt', 'TODO in node_modules\n'); // large dir → skipped
|
|
63
|
+
w('.gitignore', 'ignored.txt\n*.log\nskip/\n');
|
|
64
|
+
w('ignored.txt', 'TODO ignored\n'); // gitignore name → skipped
|
|
65
|
+
w('x.log', 'TODO log\n'); // gitignore *.log → skipped
|
|
66
|
+
w('skip/e.txt', 'TODO skipdir\n'); // gitignore skip/ → skipped
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
before(() => {
|
|
70
|
+
PREV_CWD = process.cwd();
|
|
71
|
+
CWD = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-cwd-'));
|
|
72
|
+
fs.realpathSync(CWD); // touch
|
|
73
|
+
CWD = fs.realpathSync(CWD);
|
|
74
|
+
process.chdir(CWD);
|
|
75
|
+
writeFixture(CWD);
|
|
76
|
+
const pm = createPermissionManager(ui, {});
|
|
77
|
+
exec = createToolExecutor(pm, ui, () => ({ max_file_size_kb: 512, command_timeout_ms: 30000 }));
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
after(() => {
|
|
81
|
+
process.chdir(PREV_CWD);
|
|
82
|
+
if (PREV_HOME === undefined) delete process.env.HOME; else process.env.HOME = PREV_HOME;
|
|
83
|
+
if (PREV_USERPROFILE === undefined) delete process.env.USERPROFILE; else process.env.USERPROFILE = PREV_USERPROFILE;
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
const EXPECTED_TODO = [
|
|
87
|
+
{ file: 'a.txt', line: 1, text: 'hello TODO world' },
|
|
88
|
+
{ file: 'a.txt', line: 3, text: 'TODO again' },
|
|
89
|
+
{ file: 'f.md', line: 1, text: 'TODO md' },
|
|
90
|
+
{ file: 'sub/c.txt', line: 1, text: 'deep TODO' },
|
|
91
|
+
];
|
|
92
|
+
|
|
93
|
+
// ---------------------------------------------------------------------------
|
|
94
|
+
// grep — Node engine (always runs, the reference behavior)
|
|
95
|
+
// ---------------------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
test('grep (node): regex match across the tree returns file/line/text', () => {
|
|
98
|
+
const r = _grepSearch({ pattern: 'TODO', baseDir: CWD, engine: 'node' });
|
|
99
|
+
assert.deepStrictEqual(r.matches, EXPECTED_TODO);
|
|
100
|
+
assert.strictEqual(r.pattern, 'TODO');
|
|
101
|
+
assert.strictEqual(r.count, 4);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
test('grep (node): gitignored, binary, hidden, and node_modules files are skipped', () => {
|
|
105
|
+
const r = _grepSearch({ pattern: 'TODO', baseDir: CWD, engine: 'node' });
|
|
106
|
+
const files = r.matches.map((m) => m.file);
|
|
107
|
+
for (const skipped of ['ignored.txt', 'x.log', 'skip/e.txt', 'bin.dat', 'node_modules/d.txt']) {
|
|
108
|
+
assert.ok(!files.includes(skipped), `${skipped} must be skipped`);
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
test('grep (node): zero matches returns an empty list, not an error', () => {
|
|
113
|
+
const r = _grepSearch({ pattern: 'ZZZ_NO_SUCH_TOKEN', baseDir: CWD, engine: 'node' });
|
|
114
|
+
assert.deepStrictEqual(r.matches, []);
|
|
115
|
+
assert.strictEqual(r.count, 0);
|
|
116
|
+
assert.strictEqual(r.error, undefined);
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
test('grep (node): case-insensitive flag widens the match', () => {
|
|
120
|
+
const sensitive = _grepSearch({ pattern: 'todo', baseDir: CWD, engine: 'node' });
|
|
121
|
+
assert.deepStrictEqual(sensitive.matches, []);
|
|
122
|
+
const insensitive = _grepSearch({ pattern: 'todo', baseDir: CWD, engine: 'node', ignoreCase: true });
|
|
123
|
+
assert.deepStrictEqual(insensitive.matches, EXPECTED_TODO);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test('grep (node): regex metacharacters work', () => {
|
|
127
|
+
const r = _grepSearch({ pattern: 'TODO\\s\\w+', baseDir: CWD, engine: 'node' });
|
|
128
|
+
assert.deepStrictEqual(r.matches, [
|
|
129
|
+
{ file: 'a.txt', line: 1, text: 'hello TODO world' },
|
|
130
|
+
{ file: 'a.txt', line: 3, text: 'TODO again' },
|
|
131
|
+
{ file: 'f.md', line: 1, text: 'TODO md' },
|
|
132
|
+
]);
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
test('grep (node): path glob filter restricts which files are searched', () => {
|
|
136
|
+
const r = _grepSearch({ pattern: 'TODO', baseDir: CWD, engine: 'node', pathGlob: '*.md' });
|
|
137
|
+
assert.deepStrictEqual(r.matches, [{ file: 'f.md', line: 1, text: 'TODO md' }]);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
test('grep (node): an invalid regex is reported as an error (not a throw)', () => {
|
|
141
|
+
const r = _grepSearch({ pattern: '(', baseDir: CWD, engine: 'node' });
|
|
142
|
+
assert.ok(r.error && /regex|pattern/i.test(r.error));
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
// ---------------------------------------------------------------------------
|
|
146
|
+
// grep — rg vs Node parity (the headline guarantee). Skipped without ripgrep.
|
|
147
|
+
// ---------------------------------------------------------------------------
|
|
148
|
+
|
|
149
|
+
const rgOpts = HAVE_RG ? {} : { skip: 'ripgrep (rg) not on PATH' };
|
|
150
|
+
|
|
151
|
+
for (const c of [
|
|
152
|
+
{ label: 'plain literal', params: { pattern: 'TODO' } },
|
|
153
|
+
{ label: 'regex metachars', params: { pattern: 'TODO\\s\\w+' } },
|
|
154
|
+
{ label: 'case-insensitive', params: { pattern: 'todo', ignoreCase: true } },
|
|
155
|
+
{ label: 'path glob filter', params: { pattern: 'TODO', pathGlob: '*.md' } },
|
|
156
|
+
{ label: 'zero matches', params: { pattern: 'ZZZ_NO_SUCH_TOKEN' } },
|
|
157
|
+
]) {
|
|
158
|
+
test(`grep parity (rg === node): ${c.label}`, rgOpts, () => {
|
|
159
|
+
const viaRg = _grepSearch({ ...c.params, baseDir: CWD, engine: 'rg' });
|
|
160
|
+
const viaNode = _grepSearch({ ...c.params, baseDir: CWD, engine: 'node' });
|
|
161
|
+
assert.strictEqual(viaRg.error, undefined, 'rg path produced no error');
|
|
162
|
+
assert.deepStrictEqual(viaRg, viaNode, 'rg and node outputs are byte-identical');
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
test('grep auto-engine resolves to rg when present and matches node', rgOpts, () => {
|
|
167
|
+
const auto = _grepSearch({ pattern: 'TODO', baseDir: CWD, engine: 'auto' });
|
|
168
|
+
const node = _grepSearch({ pattern: 'TODO', baseDir: CWD, engine: 'node' });
|
|
169
|
+
assert.deepStrictEqual(auto, node);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
// glob
|
|
174
|
+
// ---------------------------------------------------------------------------
|
|
175
|
+
|
|
176
|
+
test('glob: basename pattern matches at any depth with metadata', () => {
|
|
177
|
+
const r = _globSearch({ pattern: '*.txt', baseDir: CWD });
|
|
178
|
+
const paths = r.files.map((f) => f.path).sort();
|
|
179
|
+
assert.deepStrictEqual(paths, ['a.txt', 'b.txt', 'ignored.txt', 'skip/e.txt', 'sub/c.txt']);
|
|
180
|
+
const a = r.files.find((f) => f.path === 'a.txt');
|
|
181
|
+
assert.strictEqual(a.size, fs.statSync(path.join(CWD, 'a.txt')).size);
|
|
182
|
+
assert.ok(!Number.isNaN(Date.parse(a.mtime)));
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
test('glob: node_modules and .git are skipped', () => {
|
|
186
|
+
const r = _globSearch({ pattern: '*.txt', baseDir: CWD });
|
|
187
|
+
assert.ok(!r.files.some((f) => f.path.includes('node_modules')));
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
test('glob: a slashed pattern is anchored to the base', () => {
|
|
191
|
+
const r = _globSearch({ pattern: 'sub/*.txt', baseDir: CWD });
|
|
192
|
+
assert.deepStrictEqual(r.files.map((f) => f.path), ['sub/c.txt']);
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
test('glob: zero matches returns an empty list', () => {
|
|
196
|
+
const r = _globSearch({ pattern: '*.nonexistent', baseDir: CWD });
|
|
197
|
+
assert.deepStrictEqual(r.files, []);
|
|
198
|
+
assert.strictEqual(r.count, 0);
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
// ---------------------------------------------------------------------------
|
|
202
|
+
// Registry wiring: both tools register and dispatch via XML and native paths
|
|
203
|
+
// ---------------------------------------------------------------------------
|
|
204
|
+
|
|
205
|
+
test('grep and glob register as callable tools', () => {
|
|
206
|
+
const names = registryToolNames();
|
|
207
|
+
assert.ok(names.includes('grep'));
|
|
208
|
+
assert.ok(names.includes('glob'));
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
test('constants parity guard still passes with grep/glob added', () => {
|
|
212
|
+
assert.doesNotThrow(() => require('../lib/constants'));
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
test('grep dispatches identically via XML and native', () => {
|
|
216
|
+
// Tuple now carries output_mode/head_limit/offset (Task W.5); absent → null in both rails.
|
|
217
|
+
assert.deepStrictEqual(
|
|
218
|
+
extractToolCalls('<grep pattern="TODO" path="*.js" ignore_case="true"/>'),
|
|
219
|
+
[['grep', 'TODO', '*.js', true, null, null, null]],
|
|
220
|
+
);
|
|
221
|
+
assert.deepStrictEqual(
|
|
222
|
+
fromInvoke('grep', { pattern: 'TODO', path: '*.js', ignore_case: true }),
|
|
223
|
+
['grep', 'TODO', '*.js', true, null, null, null],
|
|
224
|
+
);
|
|
225
|
+
// inline-content form: body is the pattern
|
|
226
|
+
assert.deepStrictEqual(extractToolCalls('<grep>TODO</grep>'), [['grep', 'TODO', null, false, null, null, null]]);
|
|
227
|
+
// W.5 params parse through both rails identically.
|
|
228
|
+
assert.deepStrictEqual(
|
|
229
|
+
extractToolCalls('<grep pattern="TODO" output_mode="count" head_limit="5" offset="2"/>'),
|
|
230
|
+
[['grep', 'TODO', null, false, 'count', '5', '2']],
|
|
231
|
+
);
|
|
232
|
+
assert.deepStrictEqual(
|
|
233
|
+
fromInvoke('grep', { pattern: 'TODO', output_mode: 'count', head_limit: 5, offset: 2 }),
|
|
234
|
+
['grep', 'TODO', null, false, 'count', 5, 2],
|
|
235
|
+
);
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
test('glob dispatches identically via XML and native', () => {
|
|
239
|
+
assert.deepStrictEqual(extractToolCalls('<glob pattern="*.ts"/>'), [['glob', '*.ts', '.', null, null]]);
|
|
240
|
+
assert.deepStrictEqual(extractToolCalls('<glob pattern="*.ts" path="src"/>'), [['glob', '*.ts', 'src', null, null]]);
|
|
241
|
+
assert.deepStrictEqual(fromInvoke('glob', { pattern: '*.ts' }), ['glob', '*.ts', '.', null, null]);
|
|
242
|
+
assert.deepStrictEqual(extractToolCalls('<glob pattern="*.ts" head_limit="3"/>'), [['glob', '*.ts', '.', '3', null]]);
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
test('grep/glob are read-only (no permission gate)', async () => {
|
|
246
|
+
assert.strictEqual(await exec.describePermission(['grep', 'TODO', null, false]), null);
|
|
247
|
+
assert.strictEqual(await exec.describePermission(['glob', '*.ts', '.']), null);
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
// ---------------------------------------------------------------------------
|
|
251
|
+
// Integration through the executor (auto engine)
|
|
252
|
+
// ---------------------------------------------------------------------------
|
|
253
|
+
|
|
254
|
+
test('agentExecFile grep returns matches', async () => {
|
|
255
|
+
const r = await exec.agentExecFile('grep', 'TODO');
|
|
256
|
+
assert.deepStrictEqual(r.matches, EXPECTED_TODO);
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
test('agentExecFile glob returns files', async () => {
|
|
260
|
+
const r = await exec.agentExecFile('glob', '*.md');
|
|
261
|
+
assert.deepStrictEqual(r.files.map((f) => f.path), ['f.md']);
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
test('grep confines a path-escaping glob to the working tree (no escape)', () => {
|
|
265
|
+
const r = _grepSearch({ pattern: 'TODO', baseDir: CWD, engine: 'node', pathGlob: '../**' });
|
|
266
|
+
// nothing outside the base is reachable; the worst case is an empty result
|
|
267
|
+
for (const m of r.matches) assert.ok(!m.file.startsWith('..'));
|
|
268
|
+
});
|