@semalt-ai/code 1.19.0 → 1.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +2 -1
- package/ARCHITECTURE.md +6 -95
- package/CLAUDE.md +196 -1874
- package/README.md +1 -1
- package/docs/ARCHITECTURE.md +1321 -0
- package/docs/CONFIG.md +340 -0
- package/docs/HISTORY.md +245 -0
- package/index.js +1 -1
- package/lib/agent.js +145 -16
- package/lib/api.js +28 -3
- package/lib/commands/chat-session.js +188 -4
- package/lib/commands/chat-slash.js +16 -0
- package/lib/commands/chat-turn.js +319 -52
- package/lib/commands/chat.js +12 -8
- package/lib/config.js +27 -0
- package/lib/constants.js +30 -1
- package/lib/headless.js +36 -1
- package/lib/images.js +8 -2
- package/lib/permissions.js +23 -16
- package/lib/prompts.js +15 -3
- package/lib/tool_registry.js +357 -53
- package/lib/tool_specs.js +42 -8
- package/lib/tools.js +80 -19
- package/lib/ui/anim.js +86 -0
- package/lib/ui/ansi.js +17 -27
- package/lib/ui/chat-history.js +253 -71
- package/lib/ui/create-ui.js +67 -24
- package/lib/ui/diff.js +90 -25
- package/lib/ui/file-activity.js +229 -0
- package/lib/ui/format.js +173 -28
- package/lib/ui/input-field.js +5 -4
- package/lib/ui/md-stream.js +234 -0
- package/lib/ui/render-operation.js +113 -0
- package/lib/ui/select.js +1 -4
- package/lib/ui/status-bar.js +99 -57
- package/lib/ui/stream.js +20 -13
- package/lib/ui/theme.js +190 -45
- package/lib/ui/tool-operation.js +190 -0
- package/lib/ui/utils.js +9 -5
- package/lib/ui/web-activity.js +58 -6
- package/lib/ui/writer.js +159 -45
- package/lib/ui.js +1 -1
- package/package.json +1 -1
- package/test/anim-driver.test.js +153 -0
- package/test/ask-user-display.test.js +226 -0
- package/test/ask-user-gate.test.js +231 -0
- package/test/chat-history-nocolor.test.js +155 -0
- package/test/chat-relogin.test.js +207 -0
- package/test/defer-detail-band.test.js +403 -0
- package/test/detail-band-tab-flatten.test.js +242 -0
- package/test/exec-diff.test.js +268 -0
- package/test/executors.test.js +250 -13
- package/test/extract-tool-calls.test.js +37 -3
- package/test/file-activity.test.js +542 -0
- package/test/grep-path-target.test.js +227 -0
- package/test/harness/chat-harness.js +2 -1
- package/test/headless.test.js +146 -1
- package/test/input-field-ctrl-o.test.js +37 -0
- package/test/live-height-physical.test.js +281 -0
- package/test/max-iterations.test.js +9 -7
- package/test/md-stream.test.js +183 -0
- package/test/narration-ordering.test.js +309 -0
- package/test/native-dispatch.test.js +53 -0
- package/test/native-live-narration.test.js +254 -0
- package/test/output-heredoc-leak.test.js +195 -0
- package/test/output-preview.test.js +245 -0
- package/test/permission-flush.test.js +302 -0
- package/test/permissions.test.js +199 -0
- package/test/read-paginate.test.js +1 -1
- package/test/render-operation.test.js +317 -0
- package/test/replay-descriptor-xml.test.js +216 -0
- package/test/replay-descriptor.test.js +189 -0
- package/test/replay-web-aggregate.test.js +291 -0
- package/test/replay-web-persist.test.js +241 -0
- package/test/running-glyph-anim.test.js +111 -0
- package/test/status-bar-driver.test.js +93 -0
- package/test/status-bar-resync.test.js +188 -0
- package/test/stream-parser.test.js +24 -0
- package/test/theme-palette.test.js +166 -0
- package/test/truncate-visible.test.js +78 -0
- package/test/view-image.test.js +199 -0
- package/test/web-activity-ordering.test.js +12 -3
- package/path +0 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@semalt-ai/code",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.20.1",
|
|
4
4
|
"description": "Self-hosted AI Coding Assistant CLI",
|
|
5
5
|
"main": "./lib/sdk.js",
|
|
6
6
|
"//exports": "Two-tier embedding surface (Task 5.2): '.' is the STABLE createAgent facade; './internals' is the UNSTABLE building blocks (no semver guarantee). The boundary is enforced here, not just in docs. Works for both require() and import.",
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Single animation driver (Output Refactor — Phase 3).
|
|
4
|
+
//
|
|
5
|
+
// THE CHANGE: the status bar used to own TWO independent setIntervals — a 1 Hz
|
|
6
|
+
// clock tick and a 100 ms spinner glyph cycle — that each repainted the whole
|
|
7
|
+
// live region without coordinating. Phase 3 replaces both with ONE driver
|
|
8
|
+
// (lib/ui/anim.js). The clock and spinner are now subscribers: one timer, one
|
|
9
|
+
// frame counter, and at most ONE coordinated repaint per tick.
|
|
10
|
+
//
|
|
11
|
+
// These tests drive the single timer via node:test mock timers and assert that
|
|
12
|
+
// (a) only one interval is ever created by a constructed status bar, (b)
|
|
13
|
+
// advancing it updates BOTH the clock field and the spinner glyph, and (c) a
|
|
14
|
+
// tick that fires more than one subscriber still produces exactly one repaint.
|
|
15
|
+
|
|
16
|
+
const { test, mock } = require('node:test');
|
|
17
|
+
const assert = require('node:assert');
|
|
18
|
+
|
|
19
|
+
const { AnimDriver, BASE_INTERVAL_MS, TICKS_PER_SECOND } = require('../lib/ui/anim');
|
|
20
|
+
const { FullStatusBar } = require('../lib/ui/status-bar');
|
|
21
|
+
|
|
22
|
+
const layout = { cols: 200 };
|
|
23
|
+
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// Exactly one interval is created — the clock and spinner share it.
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
test('a constructed status bar creates exactly one setInterval (one driver, not two)', () => {
|
|
29
|
+
mock.timers.enable({ apis: ['setInterval'] });
|
|
30
|
+
try {
|
|
31
|
+
let intervalsCreated = 0;
|
|
32
|
+
const realSetInterval = global.setInterval;
|
|
33
|
+
global.setInterval = (...args) => { intervalsCreated++; return realSetInterval(...args); };
|
|
34
|
+
let bar;
|
|
35
|
+
try {
|
|
36
|
+
bar = new FullStatusBar(layout, () => {});
|
|
37
|
+
// Entering an animating state must NOT create a second timer — the
|
|
38
|
+
// spinner is a subscriber to the one driver, not its own interval.
|
|
39
|
+
bar.update('tool', 'running');
|
|
40
|
+
} finally {
|
|
41
|
+
global.setInterval = realSetInterval;
|
|
42
|
+
}
|
|
43
|
+
assert.strictEqual(intervalsCreated, 1, 'only one interval for clock + spinner');
|
|
44
|
+
bar.destroy();
|
|
45
|
+
} finally {
|
|
46
|
+
mock.timers.reset();
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// Advancing the one driver updates BOTH the clock and the spinner glyph.
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
test('advancing the single driver updates both the clock and the spinner', () => {
|
|
55
|
+
mock.timers.enable({ apis: ['setInterval'] });
|
|
56
|
+
try {
|
|
57
|
+
let redraws = 0;
|
|
58
|
+
const bar = new FullStatusBar(layout, () => { redraws++; });
|
|
59
|
+
|
|
60
|
+
// Animating state → the spinner glyph cycles. Capture the rendered glyph
|
|
61
|
+
// across two base-interval ticks; it must change.
|
|
62
|
+
bar.update('thinking', 'Thinking');
|
|
63
|
+
const glyphAt = () => {
|
|
64
|
+
const line = bar.renderLine();
|
|
65
|
+
// First non-space visible char after stripping ANSI is the spinner glyph.
|
|
66
|
+
const stripped = line.replace(/\x1b\[[0-9;]*m/g, '');
|
|
67
|
+
return stripped.trimStart()[0];
|
|
68
|
+
};
|
|
69
|
+
const g0 = glyphAt();
|
|
70
|
+
mock.timers.tick(BASE_INTERVAL_MS);
|
|
71
|
+
const g1 = glyphAt();
|
|
72
|
+
assert.notStrictEqual(g0, g1, 'spinner glyph advances on a driver tick');
|
|
73
|
+
|
|
74
|
+
// And the clock still ticks once per second off the SAME driver. Over a
|
|
75
|
+
// full second the driver fires repaints (the clock subscriber gates on
|
|
76
|
+
// frame % TICKS_PER_SECOND); assert at least the spinner cadence redraws.
|
|
77
|
+
redraws = 0;
|
|
78
|
+
mock.timers.tick(1000);
|
|
79
|
+
assert.ok(redraws >= TICKS_PER_SECOND - 1, 'driver repaints at the spinner cadence while animating');
|
|
80
|
+
|
|
81
|
+
bar.destroy();
|
|
82
|
+
} finally {
|
|
83
|
+
mock.timers.reset();
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
// ---------------------------------------------------------------------------
|
|
88
|
+
// One coordinated repaint per tick — even when multiple subscribers fire.
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
test('a tick that fires multiple subscribers still yields exactly one repaint', () => {
|
|
92
|
+
// Pure driver-level check: two subscribers both request a repaint on the
|
|
93
|
+
// same frame; the driver coalesces them into a single _repaint call.
|
|
94
|
+
mock.timers.enable({ apis: ['setInterval'] });
|
|
95
|
+
try {
|
|
96
|
+
let repaints = 0;
|
|
97
|
+
const d = new AnimDriver();
|
|
98
|
+
d.onRepaint(() => { repaints++; });
|
|
99
|
+
d.subscribe(() => true); // always wants a repaint
|
|
100
|
+
d.subscribe(() => true); // also always wants a repaint
|
|
101
|
+
d.start();
|
|
102
|
+
mock.timers.tick(BASE_INTERVAL_MS);
|
|
103
|
+
assert.strictEqual(repaints, 1, 'two truthy subscribers → one coordinated repaint');
|
|
104
|
+
mock.timers.tick(BASE_INTERVAL_MS);
|
|
105
|
+
assert.strictEqual(repaints, 2, 'one repaint per subsequent tick too');
|
|
106
|
+
d.stop();
|
|
107
|
+
} finally {
|
|
108
|
+
mock.timers.reset();
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
// ---------------------------------------------------------------------------
|
|
113
|
+
// A tick where no subscriber wants a repaint produces none (idle clock gap).
|
|
114
|
+
// ---------------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
test('a tick with no truthy subscriber produces no repaint', () => {
|
|
117
|
+
mock.timers.enable({ apis: ['setInterval'] });
|
|
118
|
+
try {
|
|
119
|
+
let repaints = 0;
|
|
120
|
+
const d = new AnimDriver();
|
|
121
|
+
d.onRepaint(() => { repaints++; });
|
|
122
|
+
d.subscribe(() => false);
|
|
123
|
+
d.start();
|
|
124
|
+
mock.timers.tick(BASE_INTERVAL_MS * 5);
|
|
125
|
+
assert.strictEqual(repaints, 0, 'no repaint when nothing requests one');
|
|
126
|
+
d.stop();
|
|
127
|
+
} finally {
|
|
128
|
+
mock.timers.reset();
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
// ---------------------------------------------------------------------------
|
|
133
|
+
// start()/stop() are idempotent — no stacked timers (the 5404bd0 lesson).
|
|
134
|
+
// ---------------------------------------------------------------------------
|
|
135
|
+
|
|
136
|
+
test('AnimDriver start()/stop() are idempotent and never stack timers', () => {
|
|
137
|
+
mock.timers.enable({ apis: ['setInterval'] });
|
|
138
|
+
try {
|
|
139
|
+
let ticks = 0;
|
|
140
|
+
const d = new AnimDriver();
|
|
141
|
+
d.subscribe(() => { ticks++; return false; });
|
|
142
|
+
d.start(); d.start(); d.start(); // three starts → one timer
|
|
143
|
+
mock.timers.tick(BASE_INTERVAL_MS);
|
|
144
|
+
assert.strictEqual(ticks, 1, 'one tick per interval despite repeated start()');
|
|
145
|
+
d.stop(); d.stop(); // two stops → no error, fully stopped
|
|
146
|
+
ticks = 0;
|
|
147
|
+
mock.timers.tick(BASE_INTERVAL_MS * 5);
|
|
148
|
+
assert.strictEqual(ticks, 0, 'no ticks after stop()');
|
|
149
|
+
assert.strictEqual(d.isRunning(), false, 'isRunning() reflects stopped state');
|
|
150
|
+
} finally {
|
|
151
|
+
mock.timers.reset();
|
|
152
|
+
}
|
|
153
|
+
});
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ask_user UX improvements (display-only):
|
|
4
|
+
// 1. The full QUESTION renders as wrapped, clamped header rows above the menu.
|
|
5
|
+
// 2. The prompt header shows ONLY prose; options live ONLY in the menu (no dup).
|
|
6
|
+
// 3. The chosen ANSWER is surfaced as a meta segment on the result line and
|
|
7
|
+
// survives serialize→replay.
|
|
8
|
+
//
|
|
9
|
+
// All three are DISPLAY-ONLY: the model-facing tool result still carries the
|
|
10
|
+
// FULL original question + answer. These tests lock that split in place and
|
|
11
|
+
// prove interactiveSelect (model/rewind/permission pickers) is untouched.
|
|
12
|
+
|
|
13
|
+
const { test, before, after } = require('node:test');
|
|
14
|
+
const assert = require('node:assert');
|
|
15
|
+
const os = require('node:os');
|
|
16
|
+
const fs = require('node:fs');
|
|
17
|
+
const path = require('node:path');
|
|
18
|
+
|
|
19
|
+
const ui = require('../lib/ui');
|
|
20
|
+
const { createApiClient } = require('../lib/api');
|
|
21
|
+
const { createToolExecutor, extractToolCalls, parseAskMenu } = require('../lib/tools');
|
|
22
|
+
const { createPermissionManager } = require('../lib/permissions');
|
|
23
|
+
const { createAgentRunner } = require('../lib/agent');
|
|
24
|
+
const { wrapPromptLines } = require('../lib/ui/format');
|
|
25
|
+
const { stripAnsi } = require('../lib/ui/utils');
|
|
26
|
+
const { buildToolOperation, serializeOperation, descriptorFromStored } = require('../lib/ui/tool-operation');
|
|
27
|
+
const { renderOperation } = require('../lib/ui/render-operation');
|
|
28
|
+
const { interactiveSelect } = require('../lib/ui/select');
|
|
29
|
+
const { startMockLLM } = require('./harness/mock-llm');
|
|
30
|
+
|
|
31
|
+
let prevKey;
|
|
32
|
+
let CWD;
|
|
33
|
+
let PREV_CWD;
|
|
34
|
+
before(() => {
|
|
35
|
+
prevKey = process.env.SEMALT_API_KEY; process.env.SEMALT_API_KEY = 'test-key';
|
|
36
|
+
PREV_CWD = process.cwd();
|
|
37
|
+
CWD = fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-askdisp-')));
|
|
38
|
+
process.chdir(CWD);
|
|
39
|
+
});
|
|
40
|
+
after(() => {
|
|
41
|
+
process.chdir(PREV_CWD);
|
|
42
|
+
if (prevKey === undefined) delete process.env.SEMALT_API_KEY; else process.env.SEMALT_API_KEY = prevKey;
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
// Runner whose interactive menu is spied: `selectCalls` records the FULL menu
|
|
46
|
+
// object handed to captureSelect ({ prompt, options }) so we can assert the
|
|
47
|
+
// display-only split. Returns selectPick (or options[0] in non-TTY auto-answer).
|
|
48
|
+
function buildRunner(base, { selectPick = null } = {}) {
|
|
49
|
+
const config = {
|
|
50
|
+
api_base: base, api_key: 'test-key', default_model: 'test-model',
|
|
51
|
+
temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
|
|
52
|
+
};
|
|
53
|
+
const selectCalls = [];
|
|
54
|
+
const api = createApiClient({ getConfig: () => config, saveConfig: () => {}, ui });
|
|
55
|
+
const pm = createPermissionManager(ui, { approver: () => true });
|
|
56
|
+
pm.setUICallbacks({
|
|
57
|
+
onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {},
|
|
58
|
+
captureSelect: (menu) => { selectCalls.push(menu); return selectPick != null ? selectPick : (menu.options && menu.options[0]); },
|
|
59
|
+
});
|
|
60
|
+
const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, () => config);
|
|
61
|
+
const runner = createAgentRunner({
|
|
62
|
+
chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
|
|
63
|
+
describePermission, permissionManager: pm, ui, getConfig: () => config,
|
|
64
|
+
});
|
|
65
|
+
return { runner, selectCalls };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function allToolText(messages) {
|
|
69
|
+
return messages
|
|
70
|
+
.filter((m) => m.role === 'tool' || (m.role === 'user' && /Tool execution results/.test(m.content || '')))
|
|
71
|
+
.map((m) => (typeof m.content === 'string' ? m.content : JSON.stringify(m.content)))
|
|
72
|
+
.join('\n');
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// ── (a) parse helper split: prose → prompt, numbered → options, no duplication ──
|
|
76
|
+
|
|
77
|
+
test('(a) parseAskMenu splits prose prompt from numbered options — no duplication', () => {
|
|
78
|
+
const r = parseAskMenu('Which fruit do you prefer?\nPick one:\n1. Apples\n2) Bananas');
|
|
79
|
+
assert.deepStrictEqual(r.options, ['Apples', 'Bananas'], 'both numbered options parsed (1. and 2) forms)');
|
|
80
|
+
assert.strictEqual(r.prompt, 'Which fruit do you prefer?\nPick one:', 'prompt is the prose only');
|
|
81
|
+
assert.ok(!/Apples|Bananas/.test(r.prompt), 'option labels never appear in the prompt header');
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
test('(a) executor menu gets options only; header gets prompt only (display-only split)', async () => {
|
|
85
|
+
const mock = await startMockLLM();
|
|
86
|
+
mock.replyWithToolCall('ask_user', { question: 'Which fruit?\nChoose:\n1. Apples\n2. Bananas' });
|
|
87
|
+
mock.replyWith('done');
|
|
88
|
+
try {
|
|
89
|
+
const { runner, selectCalls } = buildRunner(mock.base, { selectPick: 'Bananas' });
|
|
90
|
+
const messages = [{ role: 'user', content: 'ask me' }];
|
|
91
|
+
await runner.runAgentLoop(messages, 'test-model', 10, null, {});
|
|
92
|
+
assert.strictEqual(selectCalls.length, 1);
|
|
93
|
+
assert.deepStrictEqual(selectCalls[0].options, ['Apples', 'Bananas'], 'menu gets the options');
|
|
94
|
+
assert.strictEqual(selectCalls[0].prompt, 'Which fruit?\nChoose:', 'header gets the prose only');
|
|
95
|
+
assert.ok(!/Apples|Bananas/.test(selectCalls[0].prompt), 'no option duplication in the header');
|
|
96
|
+
} finally {
|
|
97
|
+
await mock.close();
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
// ── (b) model-facing result still carries the FULL original question + answer ──
|
|
102
|
+
|
|
103
|
+
test('(b) model-facing result keeps the FULL original question (prose + options) + answer', async () => {
|
|
104
|
+
const mock = await startMockLLM();
|
|
105
|
+
mock.replyWithToolCall('ask_user', { question: 'Which fruit?\n1. Apples\n2. Bananas' });
|
|
106
|
+
mock.replyWith('done');
|
|
107
|
+
try {
|
|
108
|
+
const { runner } = buildRunner(mock.base, { selectPick: 'Bananas' });
|
|
109
|
+
const messages = [{ role: 'user', content: 'ask me' }];
|
|
110
|
+
await runner.runAgentLoop(messages, 'test-model', 10, null, {});
|
|
111
|
+
const text = allToolText(messages);
|
|
112
|
+
assert.ok(/User answered/.test(text), 'model sees the answered string');
|
|
113
|
+
assert.ok(text.includes('Which fruit?'), 'full prose present for the model');
|
|
114
|
+
assert.ok(text.includes('1. Apples') && text.includes('2. Bananas'), 'full numbered options present for the model (NOT stripped)');
|
|
115
|
+
assert.ok(/:\s*Bananas/.test(text), 'the chosen answer is present');
|
|
116
|
+
} finally {
|
|
117
|
+
await mock.close();
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
// ── (c) long question → header wraps + clamps; never overflows the modal band ──
|
|
122
|
+
|
|
123
|
+
test('(c) wrapPromptLines clamps a long question to the cap with a "… N more lines" tail', () => {
|
|
124
|
+
const long = Array.from({ length: 40 }, (_, i) => `prompt line number ${i}`).join('\n');
|
|
125
|
+
const out = wrapPromptLines(long, { cols: 80, maxLines: 12 });
|
|
126
|
+
assert.strictEqual(out.length, 13, '12 visible lines + 1 tail row');
|
|
127
|
+
assert.match(out[12], /… 28 more lines/, 'tail counts exactly the hidden lines');
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
test('(c) wrapPromptLines word-wraps to the column budget and hard-breaks long tokens', () => {
|
|
131
|
+
const wrapped = wrapPromptLines('alpha beta gamma delta', { cols: 12, maxLines: 12 });
|
|
132
|
+
assert.ok(wrapped.every((l) => l.length <= 12), 'every wrapped line fits the column budget');
|
|
133
|
+
assert.ok(wrapped.length > 1, 'wrapped across multiple rows');
|
|
134
|
+
const huge = wrapPromptLines('x'.repeat(30), { cols: 10, maxLines: 12 });
|
|
135
|
+
assert.ok(huge.every((l) => l.length <= 10), 'an over-long single token is hard-broken to the budget');
|
|
136
|
+
assert.deepStrictEqual(wrapPromptLines(' \n ', { cols: 80 }), [], 'whitespace-only prompt yields no header rows');
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
// ── (d) the chosen answer rides the result line as meta, and survives replay ──
|
|
140
|
+
|
|
141
|
+
test('(d) the result line shows the chosen answer as a meta segment, and it survives replay', () => {
|
|
142
|
+
const spec = { tag: 'ask_user', arg: 'Pick', attrs: { question: 'Pick' }, status: 'ok', durationMs: 0, meta: { answer: 'Bananas' }, noDuration: true };
|
|
143
|
+
const op = buildToolOperation(spec);
|
|
144
|
+
const line = stripAnsi(renderOperation(op, { mode: 'ansi', phase: 'result' }));
|
|
145
|
+
assert.ok(line.includes('→ Bananas'), 'fresh result line carries "→ <answer>"');
|
|
146
|
+
|
|
147
|
+
// Replay: serialize → descriptorFromStored → render again (rides _display).
|
|
148
|
+
const stored = serializeOperation(op);
|
|
149
|
+
assert.deepStrictEqual(stored.meta, { answer: 'Bananas' }, 'meta is persisted in the serialized core');
|
|
150
|
+
const rebuilt = descriptorFromStored(stored);
|
|
151
|
+
const line2 = stripAnsi(renderOperation(rebuilt, { mode: 'ansi', phase: 'result' }));
|
|
152
|
+
assert.ok(line2.includes('→ Bananas'), 'answer renders identically on reload');
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
test('(d) a long chosen answer is truncated so the result line stays one row', () => {
|
|
156
|
+
const op = buildToolOperation({ tag: 'ask_user', arg: 'q', attrs: { question: 'q' }, status: 'ok', durationMs: 0, meta: { answer: 'y'.repeat(100) }, noDuration: true });
|
|
157
|
+
const line = stripAnsi(renderOperation(op, { mode: 'ansi', phase: 'result' }));
|
|
158
|
+
assert.ok(line.includes('…'), 'long answer is truncated with an ellipsis');
|
|
159
|
+
// The answer segment itself stays well under a terminal row.
|
|
160
|
+
const seg = line.split(' · ').find((s) => s.includes('→'));
|
|
161
|
+
assert.ok(seg && stripAnsi(seg).length <= 45, 'answer meta segment is bounded');
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
// ── (e) edge: no prose before the numbers → fall back to the raw question ──
|
|
165
|
+
|
|
166
|
+
test('(e) question with no prose → empty prompt → header falls back to the raw question', async () => {
|
|
167
|
+
const mock = await startMockLLM();
|
|
168
|
+
mock.replyWith('<ask_user question="1. Red\n2. Blue"/>');
|
|
169
|
+
mock.replyWith('done');
|
|
170
|
+
try {
|
|
171
|
+
const { runner, selectCalls } = buildRunner(mock.base, { selectPick: 'Blue' });
|
|
172
|
+
const messages = [{ role: 'user', content: 'ask me' }];
|
|
173
|
+
await runner.runAgentLoop(messages, 'test-model', 10, null, {});
|
|
174
|
+
assert.strictEqual(selectCalls.length, 1);
|
|
175
|
+
assert.deepStrictEqual(selectCalls[0].options, ['Red', 'Blue']);
|
|
176
|
+
assert.strictEqual(selectCalls[0].prompt, '1. Red\n2. Blue', 'no prose → raw question used as header (never empty)');
|
|
177
|
+
// parseAskMenu itself reports the empty prompt; the fallback is the executor's.
|
|
178
|
+
assert.strictEqual(parseAskMenu('1. Red\n2. Blue').prompt, '', 'parse helper reports empty prose');
|
|
179
|
+
} finally {
|
|
180
|
+
await mock.close();
|
|
181
|
+
}
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
// ── (f) headless auto-answer path still works (no crash from the split) ──
|
|
185
|
+
|
|
186
|
+
test('(f) headless auto-answer (non-TTY): the split helper does not crash; first option answered', async () => {
|
|
187
|
+
const mock = await startMockLLM();
|
|
188
|
+
mock.replyWithToolCall('ask_user', { question: 'Prose?\n1. First\n2. Second' });
|
|
189
|
+
mock.replyWith('done');
|
|
190
|
+
try {
|
|
191
|
+
const { runner } = buildRunner(mock.base, { selectPick: null }); // no pick → auto options[0]
|
|
192
|
+
const messages = [{ role: 'user', content: 'ask me' }];
|
|
193
|
+
await runner.runAgentLoop(messages, 'test-model', 10, null, {});
|
|
194
|
+
assert.ok(/:\s*First/.test(allToolText(messages)), 'auto-answer resolved to the first option without crashing');
|
|
195
|
+
} finally {
|
|
196
|
+
await mock.close();
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
// ── (g) interactiveSelect (model/rewind/permission pickers) is UNCHANGED ──
|
|
201
|
+
|
|
202
|
+
test('(g) interactiveSelect still returns the selected index via captureNavigation (signature intact)', async () => {
|
|
203
|
+
// Suppress the modal's escape writes so TAP output stays clean; the contract
|
|
204
|
+
// under test is the index-returning navigation, not the rendering.
|
|
205
|
+
const origWrite = process.stdout.write;
|
|
206
|
+
process.stdout.write = () => true;
|
|
207
|
+
try {
|
|
208
|
+
let handler;
|
|
209
|
+
const p = interactiveSelect(
|
|
210
|
+
['A', 'B', 'C'],
|
|
211
|
+
(item, sel) => (sel ? '> ' : ' ') + item,
|
|
212
|
+
{ initialIndex: 0, captureNavigation: (h) => { handler = h; return () => {}; } },
|
|
213
|
+
);
|
|
214
|
+
handler('next'); handler('next'); handler('select');
|
|
215
|
+
const idx = await p;
|
|
216
|
+
assert.strictEqual(idx, 2, 'navigation + select returns the selected index — behavior unchanged');
|
|
217
|
+
|
|
218
|
+
let h2;
|
|
219
|
+
const p2 = interactiveSelect(['A', 'B'], (i) => i, { captureNavigation: (h) => { h2 = h; return () => {}; } });
|
|
220
|
+
h2('cancel');
|
|
221
|
+
assert.strictEqual(await p2, null, 'cancel still returns null');
|
|
222
|
+
} finally {
|
|
223
|
+
await new Promise((r) => setImmediate(r));
|
|
224
|
+
process.stdout.write = origWrite;
|
|
225
|
+
}
|
|
226
|
+
});
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ask_user gate + menu tests.
|
|
4
|
+
//
|
|
5
|
+
// Covers two coupled changes:
|
|
6
|
+
// 1. The model is told (in prompts.js) that a numbered list in the question
|
|
7
|
+
// renders an interactive menu. (Wording is asserted at the bottom.)
|
|
8
|
+
// 2. ask_user carries a NULL permission descriptor, so:
|
|
9
|
+
// - it never fires the redundant "may I ask you?" approval gate;
|
|
10
|
+
// - it is available during plan mode (read-only-style, not withheld);
|
|
11
|
+
// - in non-TTY it reaches its executor and auto-answers (not refused).
|
|
12
|
+
//
|
|
13
|
+
// Isolation is proven by running a real effectful tool (exec) through the SAME
|
|
14
|
+
// runner: its gate STILL fires. The gate is observed via a programmatic
|
|
15
|
+
// `approver` (records every gate consultation) and the menu via a spied
|
|
16
|
+
// `captureSelect` (records every interactive select).
|
|
17
|
+
|
|
18
|
+
const { test, before, after } = require('node:test');
|
|
19
|
+
const assert = require('node:assert');
|
|
20
|
+
const os = require('node:os');
|
|
21
|
+
const fs = require('node:fs');
|
|
22
|
+
const path = require('node:path');
|
|
23
|
+
|
|
24
|
+
const ui = require('../lib/ui');
|
|
25
|
+
const { createApiClient } = require('../lib/api');
|
|
26
|
+
const { createToolExecutor, extractToolCalls } = require('../lib/tools');
|
|
27
|
+
const { createPermissionManager } = require('../lib/permissions');
|
|
28
|
+
const { createAgentRunner } = require('../lib/agent');
|
|
29
|
+
const { startMockLLM } = require('./harness/mock-llm');
|
|
30
|
+
|
|
31
|
+
let prevKey;
|
|
32
|
+
let CWD;
|
|
33
|
+
let PREV_CWD;
|
|
34
|
+
before(() => {
|
|
35
|
+
prevKey = process.env.SEMALT_API_KEY; process.env.SEMALT_API_KEY = 'test-key';
|
|
36
|
+
PREV_CWD = process.cwd();
|
|
37
|
+
CWD = fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-askuser-')));
|
|
38
|
+
process.chdir(CWD);
|
|
39
|
+
});
|
|
40
|
+
after(() => {
|
|
41
|
+
process.chdir(PREV_CWD);
|
|
42
|
+
if (prevKey === undefined) delete process.env.SEMALT_API_KEY; else process.env.SEMALT_API_KEY = prevKey;
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
// Build a runner whose permission manager is observable. `approver` records
|
|
46
|
+
// every gate consultation; `captureSelect` records every menu render and
|
|
47
|
+
// returns the configured pick. When `approver` is null, NO programmatic
|
|
48
|
+
// approver is wired (so a gate that fires in non-TTY refuses — exactly headless).
|
|
49
|
+
function buildRunner(base, { approverFn = null, selectPick = null } = {}) {
|
|
50
|
+
const config = {
|
|
51
|
+
api_base: base, api_key: 'test-key', default_model: 'test-model',
|
|
52
|
+
temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
|
|
53
|
+
};
|
|
54
|
+
const gateCalls = [];
|
|
55
|
+
const selectCalls = [];
|
|
56
|
+
const api = createApiClient({ getConfig: () => config, saveConfig: () => {}, ui });
|
|
57
|
+
const approver = approverFn
|
|
58
|
+
? (info) => { gateCalls.push(info); return approverFn(info); }
|
|
59
|
+
: null;
|
|
60
|
+
const pm = createPermissionManager(ui, { approver });
|
|
61
|
+
pm.setUICallbacks({
|
|
62
|
+
onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {},
|
|
63
|
+
captureSelect: (menu) => { selectCalls.push(menu); return selectPick != null ? selectPick : (menu.options && menu.options[0]); },
|
|
64
|
+
});
|
|
65
|
+
const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, () => config);
|
|
66
|
+
const runner = createAgentRunner({
|
|
67
|
+
chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
|
|
68
|
+
describePermission, permissionManager: pm, ui, getConfig: () => config,
|
|
69
|
+
});
|
|
70
|
+
return { runner, gateCalls, selectCalls };
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// The two rails feed tool output back differently — the XML rail bundles it in a
|
|
74
|
+
// user-role "Tool execution results" block, the native rail emits role:'tool'
|
|
75
|
+
// messages. Scan every message's content so an assertion works on both rails.
|
|
76
|
+
function allToolText(messages) {
|
|
77
|
+
return messages
|
|
78
|
+
.filter((m) => m.role === 'tool' || (m.role === 'user' && /Tool execution results/.test(m.content || '')))
|
|
79
|
+
.map((m) => (typeof m.content === 'string' ? m.content : JSON.stringify(m.content)))
|
|
80
|
+
.join('\n');
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// ---------------------------------------------------------------------------
|
|
84
|
+
// (a) numbered-list question → menu path, NO permission gate
|
|
85
|
+
// ---------------------------------------------------------------------------
|
|
86
|
+
|
|
87
|
+
test('ask_user with a numbered list renders the menu and fires NO permission gate', async () => {
|
|
88
|
+
const mock = await startMockLLM();
|
|
89
|
+
mock.replyWithToolCall('ask_user', { question: '1. Apples\n2. Bananas' });
|
|
90
|
+
mock.replyWith('done');
|
|
91
|
+
try {
|
|
92
|
+
const { runner, gateCalls, selectCalls } = buildRunner(mock.base, {
|
|
93
|
+
approverFn: () => true, // would approve IF asked
|
|
94
|
+
selectPick: 'Bananas',
|
|
95
|
+
});
|
|
96
|
+
const messages = [{ role: 'user', content: 'ask me' }];
|
|
97
|
+
await runner.runAgentLoop(messages, 'test-model', 10, null, {});
|
|
98
|
+
|
|
99
|
+
assert.strictEqual(selectCalls.length, 1, 'the interactive menu was rendered exactly once');
|
|
100
|
+
assert.deepStrictEqual(selectCalls[0].options, ['Apples', 'Bananas'], 'parsed both numbered options');
|
|
101
|
+
assert.strictEqual(gateCalls.length, 0, 'NO permission gate fired before the menu');
|
|
102
|
+
assert.ok(/Bananas/.test(allToolText(messages)), 'the chosen option was fed back to the model');
|
|
103
|
+
} finally {
|
|
104
|
+
await mock.close();
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
// ---------------------------------------------------------------------------
|
|
109
|
+
// (b) free-text question → free-text path, NO permission gate
|
|
110
|
+
// ---------------------------------------------------------------------------
|
|
111
|
+
|
|
112
|
+
test('ask_user free-text question fires NO permission gate and auto-answers in non-TTY', async () => {
|
|
113
|
+
const mock = await startMockLLM();
|
|
114
|
+
mock.replyWithToolCall('ask_user', { question: 'What is your name?' });
|
|
115
|
+
mock.replyWith('done');
|
|
116
|
+
try {
|
|
117
|
+
const { runner, gateCalls, selectCalls } = buildRunner(mock.base, { approverFn: () => true });
|
|
118
|
+
const messages = [{ role: 'user', content: 'ask me' }];
|
|
119
|
+
await runner.runAgentLoop(messages, 'test-model', 10, null, {});
|
|
120
|
+
|
|
121
|
+
assert.strictEqual(gateCalls.length, 0, 'NO permission gate fired for free-text ask_user');
|
|
122
|
+
assert.strictEqual(selectCalls.length, 0, 'no menu (free-text)');
|
|
123
|
+
assert.ok(/answered[^\n]*:\s*y\b/.test(allToolText(messages)), 'non-TTY auto-answer reached the executor (answer "y")');
|
|
124
|
+
} finally {
|
|
125
|
+
await mock.close();
|
|
126
|
+
}
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
// ---------------------------------------------------------------------------
|
|
130
|
+
// (c) ISOLATION: a real effectful tool STILL fires its gate
|
|
131
|
+
// ---------------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
test('isolation: exec STILL fires the permission gate through the same runner', async () => {
|
|
134
|
+
const mock = await startMockLLM();
|
|
135
|
+
mock.replyWith('<exec>echo hi</exec>');
|
|
136
|
+
mock.replyWith('done');
|
|
137
|
+
try {
|
|
138
|
+
const { runner, gateCalls } = buildRunner(mock.base, { approverFn: () => true });
|
|
139
|
+
const messages = [{ role: 'user', content: 'run it' }];
|
|
140
|
+
await runner.runAgentLoop(messages, 'test-model', 10, null, {});
|
|
141
|
+
|
|
142
|
+
assert.strictEqual(gateCalls.length, 1, 'exec consulted the permission gate exactly once');
|
|
143
|
+
assert.strictEqual(gateCalls[0].actionType, 'shell', 'gate saw the shell action type — unchanged');
|
|
144
|
+
} finally {
|
|
145
|
+
await mock.close();
|
|
146
|
+
}
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
// ---------------------------------------------------------------------------
|
|
150
|
+
// (d) non-TTY / headless: ask_user is NOT refused by the gate (regression of
|
|
151
|
+
// the old wart, where the non-null descriptor made the gate refuse in
|
|
152
|
+
// non-TTY before the executor's auto-answer could run). No approver, not
|
|
153
|
+
// skip-permissions — i.e. an effectful tool here WOULD be refused.
|
|
154
|
+
// ---------------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
test('non-TTY without an approver: ask_user still reaches its executor (not gate-refused)', async () => {
|
|
157
|
+
const mock = await startMockLLM();
|
|
158
|
+
mock.replyWithToolCall('ask_user', { question: 'Proceed?' });
|
|
159
|
+
mock.replyWith('done');
|
|
160
|
+
try {
|
|
161
|
+
const { runner, gateCalls } = buildRunner(mock.base, { approverFn: null });
|
|
162
|
+
const messages = [{ role: 'user', content: 'ask me' }];
|
|
163
|
+
await runner.runAgentLoop(messages, 'test-model', 10, null, {});
|
|
164
|
+
|
|
165
|
+
assert.strictEqual(gateCalls.length, 0, 'no gate (and so no non-TTY refusal) for ask_user');
|
|
166
|
+
assert.ok(/answered[^\n]*:\s*y\b/.test(allToolText(messages)), 'executor auto-answered "y" — unchanged headless behavior');
|
|
167
|
+
} finally {
|
|
168
|
+
await mock.close();
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
// (e) PLAN MODE: ask_user is now allowed (runs) while an effectful tool in the
|
|
174
|
+
// SAME response is withheld. Proves the null descriptor lifts the plan-mode
|
|
175
|
+
// withhold for ask_user only, not for effectful tools.
|
|
176
|
+
// ---------------------------------------------------------------------------
|
|
177
|
+
|
|
178
|
+
test('plan mode: ask_user runs (clarifying question allowed) while write_file is withheld', async () => {
|
|
179
|
+
const mock = await startMockLLM();
|
|
180
|
+
mock.replyWith('<write_file path="planned.txt">x</write_file>\n<ask_user question="1. Yes\n2. No"/>');
|
|
181
|
+
mock.replyWith('plan ready');
|
|
182
|
+
try {
|
|
183
|
+
const { runner, gateCalls, selectCalls } = buildRunner(mock.base, { approverFn: () => true, selectPick: 'Yes' });
|
|
184
|
+
const messages = [{ role: 'user', content: 'plan it' }];
|
|
185
|
+
const res = await runner.runAgentLoop(messages, 'test-model', 10, null, { planMode: true });
|
|
186
|
+
|
|
187
|
+
const withheldTags = res.withheldActions.map((a) => a.tag);
|
|
188
|
+
assert.ok(withheldTags.includes('write'), 'write_file is still withheld in plan mode');
|
|
189
|
+
assert.ok(!withheldTags.includes('ask_user'), 'ask_user is NOT withheld in plan mode');
|
|
190
|
+
assert.strictEqual(selectCalls.length, 1, 'ask_user actually rendered its menu during planning');
|
|
191
|
+
assert.strictEqual(gateCalls.length, 0, 'ask_user did not consult the approval gate while planning');
|
|
192
|
+
assert.ok(!fs.existsSync(path.join(CWD, 'planned.txt')), 'the withheld write did not touch the disk');
|
|
193
|
+
} finally {
|
|
194
|
+
await mock.close();
|
|
195
|
+
}
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
// ---------------------------------------------------------------------------
|
|
199
|
+
// Convergence: the XML rail produces the same ['ask_user', question] tuple and
|
|
200
|
+
// behaves identically (menu, no gate).
|
|
201
|
+
// ---------------------------------------------------------------------------
|
|
202
|
+
|
|
203
|
+
test('XML rail converges: <ask_user> numbered list also renders the menu with no gate', async () => {
|
|
204
|
+
const mock = await startMockLLM();
|
|
205
|
+
mock.replyWith('<ask_user question="1. Red\n2. Blue"/>');
|
|
206
|
+
mock.replyWith('done');
|
|
207
|
+
try {
|
|
208
|
+
const { runner, gateCalls, selectCalls } = buildRunner(mock.base, { approverFn: () => true, selectPick: 'Blue' });
|
|
209
|
+
const messages = [{ role: 'user', content: 'ask me' }];
|
|
210
|
+
await runner.runAgentLoop(messages, 'test-model', 10, null, {});
|
|
211
|
+
|
|
212
|
+
assert.strictEqual(selectCalls.length, 1, 'XML rail rendered the menu');
|
|
213
|
+
assert.deepStrictEqual(selectCalls[0].options, ['Red', 'Blue']);
|
|
214
|
+
assert.strictEqual(gateCalls.length, 0, 'XML rail fired no gate either');
|
|
215
|
+
assert.ok(/Blue/.test(allToolText(messages)));
|
|
216
|
+
} finally {
|
|
217
|
+
await mock.close();
|
|
218
|
+
}
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
// ---------------------------------------------------------------------------
|
|
222
|
+
// Prompt wording: the inventory now documents the numbered-list menu trigger so
|
|
223
|
+
// the model knows how to surface a choice.
|
|
224
|
+
// ---------------------------------------------------------------------------
|
|
225
|
+
|
|
226
|
+
test('prompts.js ask_user inventory documents the numbered-list menu', () => {
|
|
227
|
+
const { TOOL_TAG_SPECS } = require('../lib/prompts');
|
|
228
|
+
const purpose = TOOL_TAG_SPECS.ask_user.purpose;
|
|
229
|
+
assert.ok(/numbered list/i.test(purpose), 'mentions a numbered list');
|
|
230
|
+
assert.ok(/menu/i.test(purpose), 'mentions the menu rendering');
|
|
231
|
+
});
|