@semalt-ai/code 1.8.5 → 1.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -1
- package/.github/workflows/ci.yml +69 -0
- package/ARCHITECTURE.md +6 -95
- package/CLAUDE.md +196 -316
- package/README.md +148 -4
- package/docs/ARCHITECTURE.md +1321 -0
- package/docs/CONFIG.md +340 -0
- package/docs/HISTORY.md +245 -0
- package/examples/embed.js +74 -0
- package/index.js +251 -10
- package/lib/agent.js +856 -120
- package/lib/api.js +239 -50
- package/lib/args.js +74 -2
- package/lib/audit.js +23 -1
- package/lib/background.js +584 -0
- package/lib/checkpoints.js +757 -0
- package/lib/commands/auth.js +94 -0
- package/lib/commands/chat-session.js +489 -0
- package/lib/commands/chat-slash.js +415 -0
- package/lib/commands/chat-turn.js +669 -0
- package/lib/commands/chat.js +407 -0
- package/lib/commands/custom.js +157 -0
- package/lib/commands/history-utils.js +66 -0
- package/lib/commands/index.js +268 -0
- package/lib/commands/mcp.js +113 -0
- package/lib/commands/oneshot.js +193 -0
- package/lib/commands/registry.js +269 -0
- package/lib/commands/tasks.js +89 -0
- package/lib/compact.js +87 -0
- package/lib/config.js +360 -11
- package/lib/constants.js +401 -3
- package/lib/deny.js +199 -0
- package/lib/doctor.js +160 -0
- package/lib/headless.js +202 -0
- package/lib/hooks.js +286 -0
- package/lib/images.js +270 -0
- package/lib/internals.js +49 -0
- package/lib/mcp/boundary.js +131 -0
- package/lib/mcp/client.js +270 -0
- package/lib/mcp/oauth.js +134 -0
- package/lib/memory.js +209 -0
- package/lib/metrics.js +37 -2
- package/lib/payload.js +54 -0
- package/lib/permission-rules.js +401 -0
- package/lib/permissions.js +123 -26
- package/lib/pricing.js +67 -0
- package/lib/proc.js +62 -0
- package/lib/prompts.js +99 -8
- package/lib/sandbox.js +568 -0
- package/lib/sdk.js +328 -0
- package/lib/secrets.js +211 -0
- package/lib/skills.js +223 -0
- package/lib/subagents.js +516 -0
- package/lib/tool_registry.js +2862 -0
- package/lib/tool_specs.js +263 -9
- package/lib/tools.js +352 -1039
- package/lib/ui/anim.js +86 -0
- package/lib/ui/ansi.js +17 -27
- package/lib/ui/chat-history.js +253 -71
- package/lib/ui/create-ui.js +67 -24
- package/lib/ui/diff.js +90 -25
- package/lib/ui/file-activity.js +236 -0
- package/lib/ui/format.js +195 -29
- package/lib/ui/input-field.js +21 -11
- package/lib/ui/md-stream.js +234 -0
- package/lib/ui/render-operation.js +113 -0
- package/lib/ui/select.js +1 -4
- package/lib/ui/status-bar.js +146 -36
- package/lib/ui/stream.js +20 -13
- package/lib/ui/theme.js +190 -44
- package/lib/ui/tool-operation.js +190 -0
- package/lib/ui/utils.js +9 -5
- package/lib/ui/web-activity.js +270 -0
- package/lib/ui/writer.js +159 -45
- package/lib/ui.js +1 -1
- package/lib/verify.js +229 -0
- package/lib/web-extract.js +213 -0
- package/lib/web-summarize.js +68 -0
- package/package.json +19 -4
- package/scripts/lint.js +57 -0
- package/test/agent-loop.test.js +389 -0
- package/test/anim-driver.test.js +153 -0
- package/test/ask-user-display.test.js +226 -0
- package/test/ask-user-gate.test.js +231 -0
- package/test/background.test.js +414 -0
- package/test/chat-history-nocolor.test.js +155 -0
- package/test/chat-relogin.test.js +207 -0
- package/test/chat.test.js +114 -0
- package/test/checkpoints-agent.test.js +181 -0
- package/test/checkpoints.test.js +650 -0
- package/test/command-registry.test.js +160 -0
- package/test/compact.test.js +116 -0
- package/test/completion-lazy.test.js +52 -0
- package/test/config-merge.test.js +324 -0
- package/test/config-quarantine.test.js +128 -0
- package/test/config-write-guard-allow-anywhere.test.js +56 -0
- package/test/config-write-guard-skip.test.js +46 -0
- package/test/config-write-guard.test.js +153 -0
- package/test/context-split.test.js +215 -0
- package/test/cost-doctor.test.js +142 -0
- package/test/custom-commands-chat.test.js +106 -0
- package/test/custom-commands.test.js +230 -0
- package/test/defer-detail-band.test.js +403 -0
- package/test/deny-windows.test.js +120 -0
- package/test/deny.test.js +83 -0
- package/test/detail-band-tab-flatten.test.js +242 -0
- package/test/download-allow-anywhere.test.js +66 -0
- package/test/download-confine.test.js +153 -0
- package/test/exec-diff.test.js +268 -0
- package/test/executors.test.js +599 -0
- package/test/extract-tool-calls.test.js +349 -0
- package/test/fetch-url-validation.test.js +219 -0
- package/test/file-activity.test.js +522 -0
- package/test/fixtures/tool-calls.js +57 -0
- package/test/fixtures/web-page.js +91 -0
- package/test/git-tools.test.js +384 -0
- package/test/grep-glob-serialize.test.js +242 -0
- package/test/grep-glob.test.js +268 -0
- package/test/grep-path-target.test.js +227 -0
- package/test/harness/README.md +57 -0
- package/test/harness/chat-harness.js +143 -0
- package/test/harness/memwarn-headless-child.js +65 -0
- package/test/harness/mock-llm.js +120 -0
- package/test/harness/mock-mcp-server.js +142 -0
- package/test/harness/sse-server.js +69 -0
- package/test/headless.test.js +348 -0
- package/test/history-utils.test.js +88 -0
- package/test/hooks-agent.test.js +238 -0
- package/test/hooks-verify-sandbox.test.js +232 -0
- package/test/hooks.test.js +216 -0
- package/test/http-get-user-agent.test.js +142 -0
- package/test/images-api.test.js +208 -0
- package/test/images.test.js +238 -0
- package/test/input-field-ctrl-o.test.js +37 -0
- package/test/live-height-physical.test.js +281 -0
- package/test/max-iterations.test.js +218 -0
- package/test/mcp-boundary.test.js +57 -0
- package/test/mcp-client.test.js +267 -0
- package/test/mcp-oauth.test.js +86 -0
- package/test/md-stream.test.js +183 -0
- package/test/memory-truncation-warning.test.js +222 -0
- package/test/memory.test.js +198 -0
- package/test/native-dispatch.test.js +409 -0
- package/test/native-live-narration.test.js +254 -0
- package/test/output-chokepoint.test.js +188 -0
- package/test/output-heredoc-leak.test.js +195 -0
- package/test/output-preview.test.js +245 -0
- package/test/path-guards.test.js +134 -0
- package/test/payload.test.js +99 -0
- package/test/permission-rules-agent.test.js +210 -0
- package/test/permission-rules.test.js +297 -0
- package/test/permissions.test.js +362 -0
- package/test/plan-mode.test.js +167 -0
- package/test/read-paginate.test.js +275 -0
- package/test/readonly-tools.test.js +177 -0
- package/test/render-operation.test.js +317 -0
- package/test/replay-descriptor-xml.test.js +216 -0
- package/test/replay-descriptor.test.js +189 -0
- package/test/replay-web-aggregate.test.js +291 -0
- package/test/replay-web-persist.test.js +241 -0
- package/test/result-cap.test.js +233 -0
- package/test/running-glyph-anim.test.js +111 -0
- package/test/sandbox-agent.test.js +147 -0
- package/test/sandbox-integration.test.js +216 -0
- package/test/sandbox.test.js +408 -0
- package/test/sdk.test.js +234 -0
- package/test/shell-output-cap.test.js +181 -0
- package/test/skills-chat.test.js +110 -0
- package/test/skills.test.js +295 -0
- package/test/smoke.test.js +68 -0
- package/test/status-bar-driver.test.js +93 -0
- package/test/status-bar-pause.test.js +164 -0
- package/test/status-bar-resync.test.js +188 -0
- package/test/stream-parser.test.js +171 -0
- package/test/subagents-agent.test.js +178 -0
- package/test/subagents.test.js +222 -0
- package/test/theme-palette.test.js +166 -0
- package/test/tool-registry.test.js +85 -0
- package/test/trim-budget.test.js +101 -0
- package/test/truncate-visible.test.js +78 -0
- package/test/verify-agent.test.js +317 -0
- package/test/verify.test.js +141 -0
- package/test/view-image.test.js +199 -0
- package/test/web-activity-ordering.test.js +203 -0
- package/test/web-activity.test.js +207 -0
- package/test/web-data-extraction-guidance.test.js +71 -0
- package/test/web-extract.test.js +185 -0
- package/test/web-fetch-agent.test.js +291 -0
- package/test/web-fetch-mode.test.js +193 -0
- package/test/web-search.test.js +380 -0
- package/lib/commands.js +0 -1438
- package/path +0 -1
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Coverage for the relogin chat-context reset + 404 self-heal on save.
|
|
4
|
+
// Root cause: /logout and /login did NOT reset ctx.currentChatId, so a stale id
|
|
5
|
+
// survived a relogin and suppressed fresh-chat creation; the next save POSTed to
|
|
6
|
+
// a chat the new principal can't see → 404 "Chat not found" → warn-and-drop.
|
|
7
|
+
//
|
|
8
|
+
// The two factories (createChatSession / createSlashHandlers) both take a plain
|
|
9
|
+
// `ctx`; in production cmdChat hands them the SAME ctx. We mirror that by passing
|
|
10
|
+
// one shared ctx, so a slash handler's reset is visible to the session helpers.
|
|
11
|
+
|
|
12
|
+
const test = require('node:test');
|
|
13
|
+
const assert = require('node:assert');
|
|
14
|
+
|
|
15
|
+
const { createChatSession } = require('../lib/commands/chat-session');
|
|
16
|
+
const { createSlashHandlers } = require('../lib/commands/chat-slash');
|
|
17
|
+
|
|
18
|
+
// Build a shared ctx with recording stubs. Only the fields the functions under
|
|
19
|
+
// test touch are real; the rest are inert so the destructures don't throw.
|
|
20
|
+
function makeCtx(overrides = {}) {
|
|
21
|
+
const warns = [];
|
|
22
|
+
const saves = []; // { chatId, count }
|
|
23
|
+
let createSeq = 0;
|
|
24
|
+
const created = [];
|
|
25
|
+
|
|
26
|
+
const config = {
|
|
27
|
+
auth_token: 'tok', dashboard_model_id: 'm1', ...(overrides.config || {}),
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
const ctx = {
|
|
31
|
+
// session state
|
|
32
|
+
messages: overrides.messages || [],
|
|
33
|
+
currentChatId: overrides.currentChatId !== undefined ? overrides.currentChatId : null,
|
|
34
|
+
savedUpTo: overrides.savedUpTo || 0,
|
|
35
|
+
currentModel: 'test-model',
|
|
36
|
+
resolvedTokenLimit: 1000,
|
|
37
|
+
|
|
38
|
+
// collaborators used by chat-session helpers
|
|
39
|
+
msgs: { sysWarn: (m) => warns.push(m) },
|
|
40
|
+
getConfig: () => config,
|
|
41
|
+
setConfig: (c) => Object.assign(config, c),
|
|
42
|
+
storage: { list: () => [], save() {}, load: () => null },
|
|
43
|
+
approxTokens: (s) => Math.ceil((s || '').length / 4),
|
|
44
|
+
PAGE_SIZE: 10, sessionStart: 0, getCols: () => 80,
|
|
45
|
+
FG_GRAY: '', RST: '',
|
|
46
|
+
cleanOrphanedToolMessages: (m) => ({ messages: m, droppedTool: 0, droppedAssistantCalls: 0, droppedAssistantMsgs: 0 }),
|
|
47
|
+
reconstructLoadedMessage: (m) => m,
|
|
48
|
+
resolveTokenLimit: async () => 1000,
|
|
49
|
+
|
|
50
|
+
dashboardCreateChat: overrides.dashboardCreateChat || (async () => {
|
|
51
|
+
const id = `chat-${++createSeq}`; created.push(id); return { chat: { id } };
|
|
52
|
+
}),
|
|
53
|
+
dashboardSaveMessages: overrides.dashboardSaveMessages || (async (chatId, msgs) => {
|
|
54
|
+
saves.push({ chatId, count: msgs.length }); return {};
|
|
55
|
+
}),
|
|
56
|
+
dashboardGetChat: async () => ({ messages: [] }),
|
|
57
|
+
dashboardGetModelForCli: async () => ({ model: null }),
|
|
58
|
+
|
|
59
|
+
// collaborators used by slash handlers
|
|
60
|
+
chatHistory: { addMessage() {} },
|
|
61
|
+
statusBar: { update() {}, setModel() {}, setContextLimit() {} },
|
|
62
|
+
inputField: { setDisabled() {} },
|
|
63
|
+
permissionManager: { clear() { ctx._permClears = (ctx._permClears || 0) + 1; } },
|
|
64
|
+
loginFlow: overrides.loginFlow || (async () => {}),
|
|
65
|
+
ensureDefaultModel: overrides.ensureDefaultModel || (async () => null),
|
|
66
|
+
dashboardLogout: overrides.dashboardLogout || (async () => ({})),
|
|
67
|
+
|
|
68
|
+
_warns: warns, _saves: saves, _created: created,
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
// Note: createChatIfNeeded reads `created` length via the default stub above.
|
|
72
|
+
Object.defineProperty(ctx, '_createdCount', { get: () => created.length });
|
|
73
|
+
return ctx;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// (b) /login resets currentChatId and savedUpTo (and mirrors /new: messages + approvals).
|
|
77
|
+
test('/login resets chat context (currentChatId, savedUpTo, messages)', async () => {
|
|
78
|
+
const ctx = makeCtx({ currentChatId: 'stale', savedUpTo: 5, messages: [{ role: 'user', content: 'x' }] });
|
|
79
|
+
const handlers = createSlashHandlers(ctx);
|
|
80
|
+
await handlers['/login']();
|
|
81
|
+
assert.strictEqual(ctx.currentChatId, null);
|
|
82
|
+
assert.strictEqual(ctx.savedUpTo, 0);
|
|
83
|
+
assert.deepStrictEqual(ctx.messages, []);
|
|
84
|
+
assert.strictEqual(ctx._permClears, 1, 'approvals cleared on principal change');
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
// (c) /logout resets them too, even if the dashboardLogout HTTP call failed.
|
|
88
|
+
test('/logout resets chat context even when dashboardLogout HTTP fails', async () => {
|
|
89
|
+
const err503 = Object.assign(new Error('Service Unavailable'), { statusCode: 503 });
|
|
90
|
+
const ctx = makeCtx({
|
|
91
|
+
currentChatId: 'stale', savedUpTo: 5,
|
|
92
|
+
dashboardLogout: async () => { throw err503; },
|
|
93
|
+
});
|
|
94
|
+
const handlers = createSlashHandlers(ctx);
|
|
95
|
+
await handlers['/logout'](); // early-returns on the 503, but reset must still hold
|
|
96
|
+
assert.strictEqual(ctx.currentChatId, null);
|
|
97
|
+
assert.strictEqual(ctx.savedUpTo, 0);
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
// (a) in-process /logout → /login → next turn creates a NEW chat; save targets it.
|
|
101
|
+
test('logout then login: next turn creates a fresh chat and save targets it', async () => {
|
|
102
|
+
const ctx = makeCtx({
|
|
103
|
+
currentChatId: 'chat-old', savedUpTo: 3,
|
|
104
|
+
loginFlow: async () => { ctx.setConfig({ auth_token: 'newtok' }); },
|
|
105
|
+
});
|
|
106
|
+
const session = createChatSession(ctx);
|
|
107
|
+
const handlers = createSlashHandlers(ctx);
|
|
108
|
+
|
|
109
|
+
await handlers['/logout']();
|
|
110
|
+
assert.strictEqual(ctx.currentChatId, null, 'logout dropped the stale id');
|
|
111
|
+
await handlers['/login']();
|
|
112
|
+
assert.strictEqual(ctx.currentChatId, null, 'login left it null for lazy create');
|
|
113
|
+
|
|
114
|
+
// Next turn.
|
|
115
|
+
ctx.messages.push({ role: 'user', content: 'hello' }, { role: 'assistant', content: 'hi' });
|
|
116
|
+
await session.createChatIfNeeded('hello');
|
|
117
|
+
await session.saveTurnToDashboard();
|
|
118
|
+
|
|
119
|
+
assert.strictEqual(ctx._createdCount, 1, 'exactly one fresh chat created');
|
|
120
|
+
assert.strictEqual(ctx._saves.length, 1);
|
|
121
|
+
assert.strictEqual(ctx._saves[0].chatId, 'chat-1', 'save went to the new chat, not chat-old');
|
|
122
|
+
assert.deepStrictEqual(ctx._warns, [], 'no "Chat not found" warning');
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
// (d) 404 on save → self-heal: new chat, re-save same slice, advance savedUpTo after success.
|
|
126
|
+
test('saveTurnToDashboard self-heals on 404 (recreate + re-save, once)', async () => {
|
|
127
|
+
let createSeq = 0;
|
|
128
|
+
const saves = [];
|
|
129
|
+
const ctx = makeCtx({
|
|
130
|
+
currentChatId: 'stale', savedUpTo: 0,
|
|
131
|
+
messages: [{ role: 'user', content: 'hi' }, { role: 'assistant', content: 'yo' }],
|
|
132
|
+
dashboardCreateChat: async () => ({ chat: { id: `fresh-${++createSeq}` } }),
|
|
133
|
+
dashboardSaveMessages: async (chatId, msgs) => {
|
|
134
|
+
saves.push({ chatId, count: msgs.length });
|
|
135
|
+
if (chatId === 'stale') throw Object.assign(new Error('Chat not found'), { statusCode: 404 });
|
|
136
|
+
return {};
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
const session = createChatSession(ctx);
|
|
140
|
+
await session.saveTurnToDashboard();
|
|
141
|
+
|
|
142
|
+
assert.strictEqual(createSeq, 1, 'exactly one new chat created (no retry loop)');
|
|
143
|
+
assert.strictEqual(ctx.currentChatId, 'fresh-1');
|
|
144
|
+
assert.deepStrictEqual(saves.map((s) => s.chatId), ['stale', 'fresh-1'], 'retried on the fresh chat');
|
|
145
|
+
assert.strictEqual(saves[1].count, 2, 're-saved the SAME pending slice');
|
|
146
|
+
assert.strictEqual(ctx.savedUpTo, 2, 'savedUpTo advanced only after re-save succeeded');
|
|
147
|
+
assert.ok(!ctx._warns.some((w) => /history save failed/.test(w)), 'no failure warning on success');
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
// (e) non-404 (503) → does NOT recreate; warns; savedUpTo unchanged.
|
|
151
|
+
test('saveTurnToDashboard does not recover on non-404 (503)', async () => {
|
|
152
|
+
let creates = 0;
|
|
153
|
+
const ctx = makeCtx({
|
|
154
|
+
currentChatId: 'stale', savedUpTo: 0,
|
|
155
|
+
messages: [{ role: 'user', content: 'hi' }, { role: 'assistant', content: 'yo' }],
|
|
156
|
+
dashboardCreateChat: async () => { creates++; return { chat: { id: 'x' } }; },
|
|
157
|
+
dashboardSaveMessages: async () => { throw Object.assign(new Error('Service Unavailable'), { statusCode: 503 }); },
|
|
158
|
+
});
|
|
159
|
+
const session = createChatSession(ctx);
|
|
160
|
+
await session.saveTurnToDashboard();
|
|
161
|
+
|
|
162
|
+
assert.strictEqual(creates, 0, 'transient error must NOT spawn a new chat');
|
|
163
|
+
assert.strictEqual(ctx.currentChatId, 'stale', 'chat id untouched');
|
|
164
|
+
assert.strictEqual(ctx.savedUpTo, 0, 'savedUpTo unadvanced so a later turn retries');
|
|
165
|
+
assert.ok(ctx._warns.some((w) => /history save failed/.test(w)), 'warned');
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
// (f) createChatIfNeeded creation failure is surfaced, not swallowed.
|
|
169
|
+
test('createChatIfNeeded surfaces a creation failure (not silent)', async () => {
|
|
170
|
+
const ctx = makeCtx({
|
|
171
|
+
currentChatId: null,
|
|
172
|
+
dashboardCreateChat: async () => { throw new Error('boom'); },
|
|
173
|
+
});
|
|
174
|
+
const session = createChatSession(ctx);
|
|
175
|
+
await session.createChatIfNeeded('hello');
|
|
176
|
+
assert.strictEqual(ctx.currentChatId, null, 'still no chat (non-fatal)');
|
|
177
|
+
assert.ok(ctx._warns.some((w) => /could not create dashboard chat/.test(w)), 'failure surfaced');
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
// (g) regression: normal session and --resume session save to the right chat.
|
|
181
|
+
test('regression: normal session lazily creates and saves; --resume saves to resumed id', async () => {
|
|
182
|
+
// Normal session: currentChatId starts null → lazy create → save.
|
|
183
|
+
const a = makeCtx({ currentChatId: null, savedUpTo: 0, messages: [{ role: 'user', content: 'q' }, { role: 'assistant', content: 'a' }] });
|
|
184
|
+
const sa = createChatSession(a);
|
|
185
|
+
await sa.createChatIfNeeded('q');
|
|
186
|
+
await sa.saveTurnToDashboard();
|
|
187
|
+
assert.strictEqual(a._created.length, 1);
|
|
188
|
+
assert.strictEqual(a._saves[0].chatId, 'chat-1');
|
|
189
|
+
assert.strictEqual(a.savedUpTo, 2);
|
|
190
|
+
assert.deepStrictEqual(a._warns, []);
|
|
191
|
+
|
|
192
|
+
// --resume session: currentChatId set, savedUpTo at resumed length; a new turn
|
|
193
|
+
// saves the incremental slice to the SAME chat (no reset, no new chat).
|
|
194
|
+
const b = makeCtx({
|
|
195
|
+
currentChatId: 'resumed-id', savedUpTo: 2,
|
|
196
|
+
messages: [{ role: 'user', content: 'q1' }, { role: 'assistant', content: 'a1' }],
|
|
197
|
+
});
|
|
198
|
+
const sb = createChatSession(b);
|
|
199
|
+
await sb.createChatIfNeeded('ignored'); // no-op: currentChatId already set
|
|
200
|
+
b.messages.push({ role: 'user', content: 'q2' }, { role: 'assistant', content: 'a2' });
|
|
201
|
+
await sb.saveTurnToDashboard();
|
|
202
|
+
assert.strictEqual(b._created.length, 0, '--resume must not create a new chat');
|
|
203
|
+
assert.strictEqual(b._saves.length, 1);
|
|
204
|
+
assert.strictEqual(b._saves[0].chatId, 'resumed-id');
|
|
205
|
+
assert.strictEqual(b._saves[0].count, 2, 'only the incremental slice saved');
|
|
206
|
+
assert.strictEqual(b.savedUpTo, 4);
|
|
207
|
+
});
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Characterization tests for cmdChat (Task 1.5, tests-first). These lock in the
|
|
4
|
+
// chat loop's observable behavior — slash dispatch, the agent path, session
|
|
5
|
+
// reset, and teardown — BEFORE cmdChat is decomposed into lib/commands/ modules,
|
|
6
|
+
// so the split can be proven behavior-preserving.
|
|
7
|
+
|
|
8
|
+
const { test } = require('node:test');
|
|
9
|
+
const assert = require('node:assert');
|
|
10
|
+
|
|
11
|
+
const { helpText } = require('../lib/commands/registry');
|
|
12
|
+
const { startChat } = require('./harness/chat-harness');
|
|
13
|
+
|
|
14
|
+
test('startup shows the welcome banner message', async () => {
|
|
15
|
+
const c = await startChat();
|
|
16
|
+
try {
|
|
17
|
+
assert.ok(c.chatHistory.find(/Semalt\.AI/), 'welcome message present');
|
|
18
|
+
assert.ok(c.chatHistory.find(/Type \/help for commands/), 'help hint present');
|
|
19
|
+
} finally {
|
|
20
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
21
|
+
}
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
test('/help renders the registry help text', async () => {
|
|
25
|
+
const c = await startChat();
|
|
26
|
+
try {
|
|
27
|
+
await c.submit('/help');
|
|
28
|
+
assert.ok(c.chatHistory.texts().includes(helpText()), 'help text emitted verbatim');
|
|
29
|
+
} finally {
|
|
30
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
test('a normal message is blocked when not logged in', async () => {
|
|
35
|
+
const c = await startChat({ config: { auth_token: '' } });
|
|
36
|
+
try {
|
|
37
|
+
await c.submit('hello there');
|
|
38
|
+
assert.ok(c.chatHistory.find(/Not logged in/), 'unauthenticated message refused');
|
|
39
|
+
assert.strictEqual(c.calls.runAgentLoop.length, 0, 'agent not invoked');
|
|
40
|
+
} finally {
|
|
41
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
42
|
+
}
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test('a normal message runs the agent loop when authenticated', async () => {
|
|
46
|
+
const c = await startChat({ config: { auth_token: 'tok' } });
|
|
47
|
+
try {
|
|
48
|
+
await c.submit('do the thing');
|
|
49
|
+
assert.strictEqual(c.calls.runAgentLoop.length, 1, 'agent invoked once');
|
|
50
|
+
const turn = c.calls.runAgentLoop[0];
|
|
51
|
+
assert.ok(turn.messages.some((m) => m.role === 'user' && m.content === 'do the thing'), 'user message threaded into the loop');
|
|
52
|
+
assert.ok(c.chatHistory.messages.some((m) => m.role === 'user' && m.content === 'do the thing'), 'user message shown in history');
|
|
53
|
+
} finally {
|
|
54
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
test('/new resets the conversation so the next turn starts fresh', async () => {
|
|
59
|
+
const c = await startChat({ config: { auth_token: 'tok' } });
|
|
60
|
+
try {
|
|
61
|
+
await c.submit('first message');
|
|
62
|
+
await c.submit('/new');
|
|
63
|
+
assert.ok(c.chatHistory.find(/Started new conversation/));
|
|
64
|
+
await c.submit('second message');
|
|
65
|
+
const lastTurn = c.calls.runAgentLoop[c.calls.runAgentLoop.length - 1];
|
|
66
|
+
const userMsgs = lastTurn.messages.filter((m) => m.role === 'user').map((m) => m.content);
|
|
67
|
+
assert.deepStrictEqual(userMsgs, ['second message'], 'history reset — only the post-/new message remains');
|
|
68
|
+
} finally {
|
|
69
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
70
|
+
}
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
test('/clear resets conversation and clears approvals', async () => {
|
|
74
|
+
const c = await startChat({ config: { auth_token: 'tok' } });
|
|
75
|
+
try {
|
|
76
|
+
await c.submit('a message');
|
|
77
|
+
const before = c.calls.permissionClear;
|
|
78
|
+
await c.submit('/clear');
|
|
79
|
+
assert.ok(c.chatHistory.find(/cleared/i));
|
|
80
|
+
assert.strictEqual(c.calls.permissionClear, before + 1, 'permission approvals cleared');
|
|
81
|
+
} finally {
|
|
82
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
test('/model with no arg shows the current model; with an arg switches it', async () => {
|
|
87
|
+
const c = await startChat({ config: { auth_token: 'tok', default_model: 'm-one' } });
|
|
88
|
+
try {
|
|
89
|
+
await c.submit('/model');
|
|
90
|
+
assert.ok(c.chatHistory.find(/Current model: m-one/));
|
|
91
|
+
await c.submit('/model m-two');
|
|
92
|
+
assert.ok(c.chatHistory.find(/Model → m-two/));
|
|
93
|
+
} finally {
|
|
94
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
test('an unknown slash-looking message falls through to the agent (authed)', async () => {
|
|
99
|
+
const c = await startChat({ config: { auth_token: 'tok' } });
|
|
100
|
+
try {
|
|
101
|
+
await c.submit('/notacommand please');
|
|
102
|
+
assert.strictEqual(c.calls.runAgentLoop.length, 1, 'non-command text reaches the agent');
|
|
103
|
+
} finally {
|
|
104
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
test('exit ends the chat session (the cmdChat promise resolves)', async () => {
|
|
109
|
+
const c = await startChat();
|
|
110
|
+
await c.submit('exit');
|
|
111
|
+
await c.done; // resolves only if the exit handler ran resolveExit
|
|
112
|
+
c.cleanup();
|
|
113
|
+
assert.ok(true);
|
|
114
|
+
});
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Integration tests for checkpoints (Task 4.3) driving the REAL executor +
|
|
4
|
+
// runAgentLoop (and a REAL subagent loop) against the mock-LLM harness. Covers:
|
|
5
|
+
// * a write through the real loop is checkpointed post-gate/pre-mutation, with
|
|
6
|
+
// turn linkage, and is rewindable;
|
|
7
|
+
// * a DENIED tool call produces NO checkpoint;
|
|
8
|
+
// * a SUBAGENT's mutation is checkpointed into the PARENT session and is
|
|
9
|
+
// rewindable (subagents reuse the parent's agentExecFile).
|
|
10
|
+
|
|
11
|
+
const { test, before, after, afterEach } = require('node:test');
|
|
12
|
+
const assert = require('node:assert');
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const os = require('os');
|
|
15
|
+
const path = require('path');
|
|
16
|
+
|
|
17
|
+
const ui = require('../lib/ui');
|
|
18
|
+
const { createApiClient } = require('../lib/api');
|
|
19
|
+
const { createToolExecutor, extractToolCalls } = require('../lib/tools');
|
|
20
|
+
const { createPermissionManager } = require('../lib/permissions');
|
|
21
|
+
const { createAgentRunner } = require('../lib/agent');
|
|
22
|
+
const { createCheckpointStore } = require('../lib/checkpoints');
|
|
23
|
+
const toolRegistry = require('../lib/tool_registry');
|
|
24
|
+
const { createSubagentManager, buildSpawnAgentEntry } = require('../lib/subagents');
|
|
25
|
+
const { startMockLLM } = require('./harness/mock-llm');
|
|
26
|
+
|
|
27
|
+
let prevKey;
|
|
28
|
+
before(() => { prevKey = process.env.SEMALT_API_KEY; process.env.SEMALT_API_KEY = 'test-key'; });
|
|
29
|
+
after(() => {
|
|
30
|
+
if (prevKey === undefined) delete process.env.SEMALT_API_KEY;
|
|
31
|
+
else process.env.SEMALT_API_KEY = prevKey;
|
|
32
|
+
});
|
|
33
|
+
afterEach(() => { toolRegistry.clearDynamicTools(); });
|
|
34
|
+
|
|
35
|
+
function tmpdir(tag = 'work') { return fs.mkdtempSync(path.join(os.tmpdir(), `semalt-cpa-${tag}-`)); }
|
|
36
|
+
|
|
37
|
+
// Build a full stack (api + permissions + executor wired with a real checkpoint
|
|
38
|
+
// store + agent runner). Optionally wires a subagent manager sharing the SAME
|
|
39
|
+
// executor, so a child's mutations flow through the same checkpoint store.
|
|
40
|
+
function buildStack(base, { skipPermissions = false, withSubagents = false } = {}) {
|
|
41
|
+
const config = {
|
|
42
|
+
api_base: base, api_key: 'test-key', default_model: 'test-model',
|
|
43
|
+
temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
|
|
44
|
+
checkpoints: { enabled: true },
|
|
45
|
+
};
|
|
46
|
+
const getConfig = () => config;
|
|
47
|
+
const api = createApiClient({ getConfig, saveConfig: (c) => Object.assign(config, c), ui });
|
|
48
|
+
const pm = createPermissionManager(ui, { skipPermissions });
|
|
49
|
+
pm.setUICallbacks({ onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {} });
|
|
50
|
+
|
|
51
|
+
const root = tmpdir('cproot');
|
|
52
|
+
const checkpointStore = createCheckpointStore({
|
|
53
|
+
getConfig, sessionId: 'agent-sess', rootDir: root, audit: { logCheckpoint: () => {} },
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, getConfig, { checkpointStore });
|
|
57
|
+
const runner = createAgentRunner({
|
|
58
|
+
chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
|
|
59
|
+
describePermission, permissionManager: pm, ui, getConfig, checkpoints: checkpointStore,
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
let manager = null;
|
|
63
|
+
if (withSubagents) {
|
|
64
|
+
manager = createSubagentManager({
|
|
65
|
+
chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
|
|
66
|
+
describePermission, permissionManager: pm, ui, getConfig, agentDefs: [],
|
|
67
|
+
});
|
|
68
|
+
toolRegistry.registerDynamicTool(buildSpawnAgentEntry(manager));
|
|
69
|
+
}
|
|
70
|
+
return { runner, manager, checkpointStore, root, config };
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
// 1. A write through the real loop is checkpointed (post-gate) and rewindable
|
|
75
|
+
// ---------------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
test('a top-level write is checkpointed with turn linkage and is rewindable', async () => {
|
|
78
|
+
const dir = tmpdir();
|
|
79
|
+
const file = path.join(dir, 'note.txt');
|
|
80
|
+
fs.writeFileSync(file, 'ORIGINAL');
|
|
81
|
+
|
|
82
|
+
// isPathSafe confines writes to the CWD — point it at the work dir.
|
|
83
|
+
const cwd0 = process.cwd();
|
|
84
|
+
process.chdir(dir);
|
|
85
|
+
const mock = await startMockLLM();
|
|
86
|
+
mock.replyWith(`<write_file path="${file}">REWRITTEN</write_file>`); // iter 0: the mutation
|
|
87
|
+
mock.replyWith('Done.'); // iter 1: final
|
|
88
|
+
try {
|
|
89
|
+
const { runner, checkpointStore } = buildStack(mock.base, { skipPermissions: true });
|
|
90
|
+
const messages = [{ role: 'user', content: 'rewrite the note' }];
|
|
91
|
+
await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: { onError: () => {} } });
|
|
92
|
+
|
|
93
|
+
assert.strictEqual(fs.readFileSync(file, 'utf8'), 'REWRITTEN', 'the write happened');
|
|
94
|
+
|
|
95
|
+
const items = checkpointStore.list();
|
|
96
|
+
assert.strictEqual(items.length, 1, 'one checkpoint captured for the write');
|
|
97
|
+
const rec = checkpointStore._loadRecord(items[0].seq);
|
|
98
|
+
assert.strictEqual(rec.action, 'write');
|
|
99
|
+
assert.strictEqual(rec.targets[0].path, file);
|
|
100
|
+
// prior state was captured BEFORE the mutation
|
|
101
|
+
assert.strictEqual(Buffer.from(rec.targets[0].priorContentB64, 'base64').toString('utf8'), 'ORIGINAL');
|
|
102
|
+
// turn linkage recorded (forward-compat for conversation-rewind, Task 4.3b)
|
|
103
|
+
assert.strictEqual(rec.turn.turnId, 'turn-1');
|
|
104
|
+
assert.strictEqual(typeof rec.turn.messageCountAtStart, 'number');
|
|
105
|
+
|
|
106
|
+
// and it actually rewinds
|
|
107
|
+
const res = checkpointStore.rewind('last');
|
|
108
|
+
assert.ok(res.ok);
|
|
109
|
+
assert.strictEqual(fs.readFileSync(file, 'utf8'), 'ORIGINAL', 'rewind restored prior content');
|
|
110
|
+
} finally {
|
|
111
|
+
await mock.close();
|
|
112
|
+
process.chdir(cwd0);
|
|
113
|
+
}
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
// 2. A denied tool call produces NO checkpoint
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
test('a denied write (non-TTY, no skip) is NOT checkpointed', async () => {
|
|
121
|
+
const dir = tmpdir();
|
|
122
|
+
const file = path.join(dir, 'blocked.txt');
|
|
123
|
+
|
|
124
|
+
const mock = await startMockLLM();
|
|
125
|
+
mock.replyWith(`<write_file path="${file}">SHOULD NOT WRITE</write_file>`); // denied at the gate
|
|
126
|
+
mock.replyWith('ok');
|
|
127
|
+
try {
|
|
128
|
+
// skipPermissions:false in a non-TTY env → the gate REFUSES the write, so
|
|
129
|
+
// the executor never runs and nothing is checkpointed.
|
|
130
|
+
const { runner, checkpointStore } = buildStack(mock.base, { skipPermissions: false });
|
|
131
|
+
const messages = [{ role: 'user', content: 'try to write' }];
|
|
132
|
+
await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: { onError: () => {} } });
|
|
133
|
+
|
|
134
|
+
assert.ok(!fs.existsSync(file), 'the write was denied');
|
|
135
|
+
assert.strictEqual(checkpointStore.list().length, 0, 'no checkpoint for a denied call');
|
|
136
|
+
} finally {
|
|
137
|
+
await mock.close();
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
// ---------------------------------------------------------------------------
|
|
142
|
+
// 3. A subagent's mutation is checkpointed in the PARENT session, rewindable
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
|
|
145
|
+
test('a subagent mutation is checkpointed in the parent session and is rewindable', async () => {
|
|
146
|
+
const dir = tmpdir();
|
|
147
|
+
const file = path.join(dir, 'child.txt');
|
|
148
|
+
fs.writeFileSync(file, 'BEFORE');
|
|
149
|
+
|
|
150
|
+
const cwd0 = process.cwd();
|
|
151
|
+
process.chdir(dir);
|
|
152
|
+
const mock = await startMockLLM();
|
|
153
|
+
// Parent dispatches spawn_agent; the child performs the write; child + parent
|
|
154
|
+
// each conclude. The child reuses the parent's agentExecFile, so its mutation
|
|
155
|
+
// flows through the SAME checkpoint store.
|
|
156
|
+
mock.replyWithToolCall('spawn_agent', { prompt: 'rewrite child.txt' }); // parent iter 0
|
|
157
|
+
mock.replyWith(`<write_file path="${file}">AFTER</write_file>`); // child iter 0 (the mutation)
|
|
158
|
+
mock.replyWith('child done'); // child iter 1 (final)
|
|
159
|
+
mock.replyWith('parent done'); // parent iter 1 (final)
|
|
160
|
+
try {
|
|
161
|
+
const { runner, checkpointStore } = buildStack(mock.base, { skipPermissions: true, withSubagents: true });
|
|
162
|
+
const messages = [{ role: 'user', content: 'delegate a write' }];
|
|
163
|
+
await runner.runAgentLoop(messages, 'test-model', 6, null, { callbacks: { onError: () => {} } });
|
|
164
|
+
|
|
165
|
+
assert.strictEqual(fs.readFileSync(file, 'utf8'), 'AFTER', 'the child performed the write');
|
|
166
|
+
|
|
167
|
+
const items = checkpointStore.list();
|
|
168
|
+
assert.strictEqual(items.length, 1, "the child's write is checkpointed in the parent session");
|
|
169
|
+
const rec = checkpointStore._loadRecord(items[0].seq);
|
|
170
|
+
assert.strictEqual(rec.targets[0].path, file);
|
|
171
|
+
// linked to the parent's turn (the parent never reset turn context for the child)
|
|
172
|
+
assert.strictEqual(rec.turn.turnId, 'turn-1');
|
|
173
|
+
|
|
174
|
+
const res = checkpointStore.rewind('last');
|
|
175
|
+
assert.ok(res.ok);
|
|
176
|
+
assert.strictEqual(fs.readFileSync(file, 'utf8'), 'BEFORE', "a subagent's mutation is rewindable from the parent");
|
|
177
|
+
} finally {
|
|
178
|
+
await mock.close();
|
|
179
|
+
process.chdir(cwd0);
|
|
180
|
+
}
|
|
181
|
+
});
|