@semalt-ai/code 1.8.4 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +8 -1
- package/.github/workflows/ci.yml +69 -0
- package/CLAUDE.md +1588 -27
- package/README.md +147 -3
- package/TECHNICAL_DEBT.md +66 -0
- package/examples/embed.js +74 -0
- package/index.js +259 -11
- package/lib/agent.js +935 -181
- package/lib/api.js +308 -55
- package/lib/args.js +96 -2
- package/lib/audit.js +23 -1
- package/lib/background.js +584 -0
- package/lib/checkpoints.js +757 -0
- package/lib/commands/auth.js +94 -0
- package/lib/commands/chat-session.js +306 -0
- package/lib/commands/chat-slash.js +399 -0
- package/lib/commands/chat-turn.js +446 -0
- package/lib/commands/chat.js +403 -0
- package/lib/commands/custom.js +157 -0
- package/lib/commands/history-utils.js +66 -0
- package/lib/commands/index.js +268 -0
- package/lib/commands/mcp.js +113 -0
- package/lib/commands/oneshot.js +193 -0
- package/lib/commands/registry.js +269 -0
- package/lib/commands/tasks.js +89 -0
- package/lib/compact.js +87 -0
- package/lib/config.js +346 -11
- package/lib/constants.js +372 -3
- package/lib/debug.js +106 -0
- package/lib/deny.js +199 -0
- package/lib/doctor.js +160 -0
- package/lib/headless.js +167 -0
- package/lib/hooks.js +286 -0
- package/lib/images.js +264 -0
- package/lib/internals.js +49 -0
- package/lib/mcp/boundary.js +131 -0
- package/lib/mcp/client.js +270 -0
- package/lib/mcp/oauth.js +134 -0
- package/lib/memory.js +209 -0
- package/lib/metrics.js +37 -2
- package/lib/payload.js +54 -0
- package/lib/permission-rules.js +401 -0
- package/lib/permissions.js +100 -10
- package/lib/pricing.js +67 -0
- package/lib/proc.js +158 -0
- package/lib/prompts.js +88 -8
- package/lib/sandbox.js +568 -0
- package/lib/sdk.js +328 -0
- package/lib/secrets.js +211 -0
- package/lib/skills.js +223 -0
- package/lib/subagents.js +516 -0
- package/lib/tool_registry.js +2558 -0
- package/lib/tool_specs.js +236 -9
- package/lib/tools.js +370 -944
- package/lib/ui/chat-history.js +19 -1
- package/lib/ui/format.js +101 -6
- package/lib/ui/input-field.js +16 -7
- package/lib/ui/status-bar.js +79 -11
- package/lib/ui/terminal.js +10 -4
- package/lib/ui/theme.js +1 -0
- package/lib/ui/web-activity.js +218 -0
- package/lib/ui/writer.js +7 -9
- package/lib/verify.js +229 -0
- package/lib/web-extract.js +213 -0
- package/lib/web-summarize.js +68 -0
- package/package.json +19 -4
- package/scripts/lint.js +57 -0
- package/test/agent-loop.test.js +389 -0
- package/test/background.test.js +414 -0
- package/test/chat.test.js +114 -0
- package/test/checkpoints-agent.test.js +181 -0
- package/test/checkpoints.test.js +650 -0
- package/test/command-registry.test.js +160 -0
- package/test/compact.test.js +116 -0
- package/test/completion-lazy.test.js +52 -0
- package/test/config-merge.test.js +324 -0
- package/test/config-quarantine.test.js +128 -0
- package/test/config-write-guard-allow-anywhere.test.js +56 -0
- package/test/config-write-guard-skip.test.js +46 -0
- package/test/config-write-guard.test.js +153 -0
- package/test/context-split.test.js +215 -0
- package/test/cost-doctor.test.js +142 -0
- package/test/custom-commands-chat.test.js +106 -0
- package/test/custom-commands.test.js +230 -0
- package/test/deny-windows.test.js +120 -0
- package/test/deny.test.js +83 -0
- package/test/download-allow-anywhere.test.js +66 -0
- package/test/download-confine.test.js +153 -0
- package/test/executors.test.js +362 -0
- package/test/extract-tool-calls.test.js +315 -0
- package/test/fetch-url-validation.test.js +219 -0
- package/test/fixtures/tool-calls.js +57 -0
- package/test/fixtures/web-page.js +91 -0
- package/test/git-tools.test.js +384 -0
- package/test/grep-glob-serialize.test.js +242 -0
- package/test/grep-glob.test.js +268 -0
- package/test/harness/README.md +57 -0
- package/test/harness/chat-harness.js +142 -0
- package/test/harness/memwarn-headless-child.js +65 -0
- package/test/harness/mock-llm.js +120 -0
- package/test/harness/mock-mcp-server.js +142 -0
- package/test/harness/sse-server.js +69 -0
- package/test/headless.test.js +203 -0
- package/test/history-utils.test.js +88 -0
- package/test/hooks-agent.test.js +238 -0
- package/test/hooks-verify-sandbox.test.js +232 -0
- package/test/hooks.test.js +216 -0
- package/test/http-get-user-agent.test.js +142 -0
- package/test/images-api.test.js +208 -0
- package/test/images.test.js +238 -0
- package/test/max-iterations.test.js +216 -0
- package/test/mcp-boundary.test.js +57 -0
- package/test/mcp-client.test.js +267 -0
- package/test/mcp-oauth.test.js +86 -0
- package/test/memory-truncation-warning.test.js +222 -0
- package/test/memory.test.js +198 -0
- package/test/native-dispatch.test.js +356 -0
- package/test/output-chokepoint.test.js +188 -0
- package/test/path-guards.test.js +134 -0
- package/test/payload.test.js +99 -0
- package/test/permission-rules-agent.test.js +210 -0
- package/test/permission-rules.test.js +297 -0
- package/test/permissions.test.js +163 -0
- package/test/plan-mode.test.js +167 -0
- package/test/read-paginate.test.js +275 -0
- package/test/readonly-tools.test.js +177 -0
- package/test/result-cap.test.js +233 -0
- package/test/sandbox-agent.test.js +147 -0
- package/test/sandbox-integration.test.js +216 -0
- package/test/sandbox.test.js +408 -0
- package/test/sdk.test.js +234 -0
- package/test/shell-output-cap.test.js +181 -0
- package/test/skills-chat.test.js +110 -0
- package/test/skills.test.js +295 -0
- package/test/smoke.test.js +68 -0
- package/test/status-bar-pause.test.js +164 -0
- package/test/stream-parser.test.js +147 -0
- package/test/subagents-agent.test.js +178 -0
- package/test/subagents.test.js +222 -0
- package/test/tool-registry.test.js +85 -0
- package/test/trim-budget.test.js +101 -0
- package/test/verify-agent.test.js +317 -0
- package/test/verify.test.js +141 -0
- package/test/web-activity-ordering.test.js +194 -0
- package/test/web-activity.test.js +207 -0
- package/test/web-data-extraction-guidance.test.js +71 -0
- package/test/web-extract.test.js +185 -0
- package/test/web-fetch-agent.test.js +291 -0
- package/test/web-fetch-mode.test.js +193 -0
- package/test/web-search.test.js +380 -0
- package/lib/commands.js +0 -1288
package/lib/api.js
CHANGED
|
@@ -6,8 +6,159 @@ const { URL } = require('url');
|
|
|
6
6
|
|
|
7
7
|
const { buildToolsSchema, isUIActive } = require('./tools');
|
|
8
8
|
const { TOOL_SPECS } = require('./tool_specs');
|
|
9
|
+
const { dynamicToolSpecs } = require('./tool_registry');
|
|
10
|
+
const { resolveApiKey } = require('./secrets');
|
|
11
|
+
const { applyPromptCaching, applyReasoningEffort } = require('./payload');
|
|
12
|
+
const {
|
|
13
|
+
messagesHaveImages,
|
|
14
|
+
countImages,
|
|
15
|
+
selectImageFormat,
|
|
16
|
+
resolveVisionCapability,
|
|
17
|
+
buildProviderMessages,
|
|
18
|
+
} = require('./images');
|
|
9
19
|
const writer = require('./ui/writer');
|
|
10
20
|
const messages = require('./ui/messages');
|
|
21
|
+
const dbg = require('./debug');
|
|
22
|
+
|
|
23
|
+
// Strict precondition for any payload that includes role:tool messages or
|
|
24
|
+
// assistant.tool_calls: every tool_call_id must reference a non-empty id from
|
|
25
|
+
// a prior assistant tool_calls entry. Catches the upstream "tool result's tool
|
|
26
|
+
// id() not found" 400 before it leaves the client and points at the exact
|
|
27
|
+
// violating message instead of a cryptic provider error.
|
|
28
|
+
function validateToolCallInvariant(msgs) {
|
|
29
|
+
const calledIds = new Set();
|
|
30
|
+
for (let idx = 0; idx < msgs.length; idx++) {
|
|
31
|
+
const m = msgs[idx];
|
|
32
|
+
if (m.role === 'assistant' && Array.isArray(m.tool_calls)) {
|
|
33
|
+
for (let j = 0; j < m.tool_calls.length; j++) {
|
|
34
|
+
const tc = m.tool_calls[j];
|
|
35
|
+
if (!tc || !tc.id) {
|
|
36
|
+
throw new Error(
|
|
37
|
+
`Invalid tool_calls invariant: messages[${idx}] role=assistant tool_calls[${j}] has empty id`
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
calledIds.add(tc.id);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
for (let idx = 0; idx < msgs.length; idx++) {
|
|
45
|
+
const m = msgs[idx];
|
|
46
|
+
if (m.role !== 'tool') continue;
|
|
47
|
+
if (!m.tool_call_id) {
|
|
48
|
+
const preview = String(m.content || '').slice(0, 80).replace(/\s+/g, ' ');
|
|
49
|
+
throw new Error(
|
|
50
|
+
`Invalid tool_calls invariant: messages[${idx}] role=tool has empty tool_call_id (content_preview="${preview}")`
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
if (!calledIds.has(m.tool_call_id)) {
|
|
54
|
+
throw new Error(
|
|
55
|
+
`Invalid tool_calls invariant: messages[${idx}] role=tool tool_call_id=${m.tool_call_id} has no matching prior assistant tool_calls`
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function debugDumpMessages(msgs) {
|
|
62
|
+
dbg.logExtended('[messages dump before API request]');
|
|
63
|
+
for (let i = 0; i < msgs.length; i++) {
|
|
64
|
+
const m = msgs[i];
|
|
65
|
+
const callIds = Array.isArray(m.tool_calls)
|
|
66
|
+
? m.tool_calls.map((t) => (t && t.id) || '<EMPTY>').join(',')
|
|
67
|
+
: '';
|
|
68
|
+
const toolCallId = m.tool_call_id !== undefined
|
|
69
|
+
? ` tool_call_id=${m.tool_call_id || '<EMPTY>'}`
|
|
70
|
+
: '';
|
|
71
|
+
const tcs = callIds ? ` tool_calls=[${callIds}]` : '';
|
|
72
|
+
const contentLen = (m.content !== undefined && m.content !== null)
|
|
73
|
+
? ` content_chars=${(m.content + '').length}`
|
|
74
|
+
: '';
|
|
75
|
+
dbg.logExtended(` [${i}] role=${m.role}${toolCallId}${tcs}${contentLen}`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Fit messages into tokenBudget tokens.
|
|
80
|
+
// Uses chars/4 — aligned with estimateTokens; a deliberate under-estimate
|
|
81
|
+
// for token-dense content (code, JSON, HTML) but consistent across the
|
|
82
|
+
// codebase.
|
|
83
|
+
//
|
|
84
|
+
// Always keeps: system prompt + first non-system message (original task).
|
|
85
|
+
// Drops intermediate messages oldest-first, then truncates the last tail
|
|
86
|
+
// message (typically a large tool result) if still over budget.
|
|
87
|
+
//
|
|
88
|
+
// Pure function (no closure dependencies) — lives at module scope so it can be
|
|
89
|
+
// unit-tested in isolation. Called from chatStream's proactive-trim and
|
|
90
|
+
// 400/413 self-healing paths.
|
|
91
|
+
function trimToTokenBudget(msgs, tokenBudget) {
|
|
92
|
+
const CHARS_PER_TOKEN = 4;
|
|
93
|
+
const system = msgs.filter((m) => m.role === 'system');
|
|
94
|
+
const nonSystem = msgs.filter((m) => m.role !== 'system');
|
|
95
|
+
if (nonSystem.length === 0) return [...system];
|
|
96
|
+
|
|
97
|
+
const pinned = nonSystem[0]; // original task — never dropped
|
|
98
|
+
let tail = nonSystem.slice(1);
|
|
99
|
+
|
|
100
|
+
const estimate = () => {
|
|
101
|
+
const all = tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
|
|
102
|
+
return Math.floor(JSON.stringify(all).length / CHARS_PER_TOKEN);
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
while (tail.length > 1 && estimate() > tokenBudget) {
|
|
106
|
+
tail = tail.slice(1);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (tail.length === 1 && estimate() > tokenBudget) {
|
|
110
|
+
const msg = tail[0];
|
|
111
|
+
const otherChars = JSON.stringify([...system, pinned]).length;
|
|
112
|
+
const available = tokenBudget * CHARS_PER_TOKEN - otherChars - 200;
|
|
113
|
+
if (available > 0 && typeof msg.content === 'string' && msg.content.length > available) {
|
|
114
|
+
tail = [{ ...msg, content: '[…content truncated to fit model limit…]\n' + msg.content.slice(-available) }];
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (tail.length === 0 && estimate() > tokenBudget) {
|
|
119
|
+
const systemChars = JSON.stringify(system).length;
|
|
120
|
+
const available = tokenBudget * CHARS_PER_TOKEN - systemChars - 200;
|
|
121
|
+
if (available > 0 && typeof pinned.content === 'string' && pinned.content.length > available) {
|
|
122
|
+
return [...system, { ...pinned, content: '[…content truncated to fit model limit…]\n' + pinned.content.slice(-available) }];
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Estimate the context split for the counter (Variant B, display-only).
|
|
130
|
+
//
|
|
131
|
+
// The API returns usage.prompt_tokens PRE-SUMMED — it never breaks the prompt
|
|
132
|
+
// into base (system prompt + tool specs) vs working (history + tool results).
|
|
133
|
+
// So the split cannot be measured; it is ESTIMATED here from the assembled
|
|
134
|
+
// payload. Both halves use the SAME char/4 estimator so they sum consistently
|
|
135
|
+
// (the point of Variant B — no "real minus estimate" mixing where working would
|
|
136
|
+
// look measured but secretly carry the base estimate's error). The real
|
|
137
|
+
// prompt_tokens remains the authoritative anchor shown alongside this split.
|
|
138
|
+
//
|
|
139
|
+
// base = estimate(system messages) + estimate(serialized tool schema)
|
|
140
|
+
// working = estimate(every non-system message) ← the part that grows
|
|
141
|
+
//
|
|
142
|
+
// Recompute PER REQUEST (cheap): the base is NOT eternally constant — it shifts
|
|
143
|
+
// with native-vs-XML mode (tools live in payload.tools vs inside the system
|
|
144
|
+
// prompt), dynamic tools (MCP connecting/failing mid-session), and plan-mode
|
|
145
|
+
// toggling (PLAN_MODE_NOTICE). In XML mode `tools` is absent and the tool weight
|
|
146
|
+
// lives inside the system prompt string, so estimating the actual system message
|
|
147
|
+
// still captures it — base is never silently zero. Pure; unit-tested.
|
|
148
|
+
function estimateContextSplit(msgs, tools) {
|
|
149
|
+
let systemChars = 0;
|
|
150
|
+
let workingChars = 0;
|
|
151
|
+
for (const m of (Array.isArray(msgs) ? msgs : [])) {
|
|
152
|
+
const len = JSON.stringify(m == null ? '' : m).length;
|
|
153
|
+
if (m && m.role === 'system') systemChars += len;
|
|
154
|
+
else workingChars += len;
|
|
155
|
+
}
|
|
156
|
+
const toolChars = tools ? JSON.stringify(tools).length : 0;
|
|
157
|
+
return {
|
|
158
|
+
base: Math.floor((systemChars + toolChars) / 4),
|
|
159
|
+
working: Math.floor(workingChars / 4),
|
|
160
|
+
};
|
|
161
|
+
}
|
|
11
162
|
|
|
12
163
|
function createApiClient({ getConfig, saveConfig, ui }) {
|
|
13
164
|
const {
|
|
@@ -195,6 +346,26 @@ function createApiClient({ getConfig, saveConfig, ui }) {
|
|
|
195
346
|
});
|
|
196
347
|
}
|
|
197
348
|
|
|
349
|
+
// Web search (Task W.2b). Calls the backend POST /api/search — which
|
|
350
|
+
// authenticates the Bearer token, queries SearXNG, and returns
|
|
351
|
+
// { results: [{title,url,snippet}, …] } (or an {error} envelope on failure,
|
|
352
|
+
// mapped to a thrown Error by requestJson). Modeled byte-for-byte on
|
|
353
|
+
// dashboardListModels: requireAuthToken() → requestJson(...). The optional
|
|
354
|
+
// `count` is forwarded so the backend can clamp it. The caller (the
|
|
355
|
+
// web_search tool) is responsible for catching every failure mode and
|
|
356
|
+
// surfacing a clean tool error — nothing here is special-cased.
|
|
357
|
+
function dashboardSearch(query, { count, timeout } = {}) {
|
|
358
|
+
const authToken = requireAuthToken();
|
|
359
|
+
const body = { query };
|
|
360
|
+
if (count != null) body.count = count;
|
|
361
|
+
return requestJson(dashboardUrl('/api/search'), {
|
|
362
|
+
method: 'POST',
|
|
363
|
+
timeout: timeout || 15000,
|
|
364
|
+
headers: { 'Authorization': `Bearer ${authToken}` },
|
|
365
|
+
body,
|
|
366
|
+
});
|
|
367
|
+
}
|
|
368
|
+
|
|
198
369
|
function dashboardGetModelForCli(id) {
|
|
199
370
|
const authToken = requireAuthToken();
|
|
200
371
|
return requestJson(dashboardUrl(`/api/models/${encodeURIComponent(String(id))}/cli`), {
|
|
@@ -251,6 +422,26 @@ function createApiClient({ getConfig, saveConfig, ui }) {
|
|
|
251
422
|
|
|
252
423
|
if (signal && signal.aborted) throw new Error('Aborted');
|
|
253
424
|
|
|
425
|
+
// Multimodal image input (Task 5.4). When any turn carries attached images,
|
|
426
|
+
// resolve the provider content-part shape (Anthropic-style vs OpenAI-style)
|
|
427
|
+
// and FAIL LOUD for a known text-only model — never silently drop the image
|
|
428
|
+
// from the payload (constraint #2). An unknown capability (null) proceeds and
|
|
429
|
+
// lets the endpoint reject cleanly.
|
|
430
|
+
const imagesPresent = messagesHaveImages(messages);
|
|
431
|
+
let imageFormat = null;
|
|
432
|
+
if (imagesPresent) {
|
|
433
|
+
imageFormat = selectImageFormat(config, resolvedModel);
|
|
434
|
+
const vision = resolveVisionCapability(config, resolvedModel);
|
|
435
|
+
if (vision === false) {
|
|
436
|
+
const n = countImages(messages);
|
|
437
|
+
throw new Error(
|
|
438
|
+
`Model "${resolvedModel}" is not vision-capable, but ${n} image${n === 1 ? '' : 's'} ` +
|
|
439
|
+
`${n === 1 ? 'was' : 'were'} attached. Select a vision-capable model, or set ` +
|
|
440
|
+
`vision:true on the model profile if this endpoint does accept images.`,
|
|
441
|
+
);
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
|
|
254
445
|
let trimNotified = false;
|
|
255
446
|
function notifyTrim(info) {
|
|
256
447
|
if (trimNotified) return;
|
|
@@ -260,51 +451,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
|
|
|
260
451
|
}
|
|
261
452
|
}
|
|
262
453
|
|
|
263
|
-
//
|
|
264
|
-
//
|
|
265
|
-
// for token-dense content (code, JSON, HTML) but consistent across the
|
|
266
|
-
// codebase.
|
|
267
|
-
//
|
|
268
|
-
// Always keeps: system prompt + first non-system message (original task).
|
|
269
|
-
// Drops intermediate messages oldest-first, then truncates the last tail
|
|
270
|
-
// message (typically a large tool result) if still over budget.
|
|
271
|
-
function trimToTokenBudget(msgs, tokenBudget) {
|
|
272
|
-
const CHARS_PER_TOKEN = 4;
|
|
273
|
-
const system = msgs.filter((m) => m.role === 'system');
|
|
274
|
-
const nonSystem = msgs.filter((m) => m.role !== 'system');
|
|
275
|
-
if (nonSystem.length === 0) return [...system];
|
|
276
|
-
|
|
277
|
-
const pinned = nonSystem[0]; // original task — never dropped
|
|
278
|
-
let tail = nonSystem.slice(1);
|
|
279
|
-
|
|
280
|
-
const estimate = () => {
|
|
281
|
-
const all = tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
|
|
282
|
-
return Math.floor(JSON.stringify(all).length / CHARS_PER_TOKEN);
|
|
283
|
-
};
|
|
284
|
-
|
|
285
|
-
while (tail.length > 1 && estimate() > tokenBudget) {
|
|
286
|
-
tail = tail.slice(1);
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
if (tail.length === 1 && estimate() > tokenBudget) {
|
|
290
|
-
const msg = tail[0];
|
|
291
|
-
const otherChars = JSON.stringify([...system, pinned]).length;
|
|
292
|
-
const available = tokenBudget * CHARS_PER_TOKEN - otherChars - 200;
|
|
293
|
-
if (available > 0 && typeof msg.content === 'string' && msg.content.length > available) {
|
|
294
|
-
tail = [{ ...msg, content: '[…content truncated to fit model limit…]\n' + msg.content.slice(-available) }];
|
|
295
|
-
}
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
if (tail.length === 0 && estimate() > tokenBudget) {
|
|
299
|
-
const systemChars = JSON.stringify(system).length;
|
|
300
|
-
const available = tokenBudget * CHARS_PER_TOKEN - systemChars - 200;
|
|
301
|
-
if (available > 0 && typeof pinned.content === 'string' && pinned.content.length > available) {
|
|
302
|
-
return [...system, { ...pinned, content: '[…content truncated to fit model limit…]\n' + pinned.content.slice(-available) }];
|
|
303
|
-
}
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
return tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
|
|
307
|
-
}
|
|
454
|
+
// trimToTokenBudget is a pure, module-scope helper (lifted out of this
|
|
455
|
+
// closure in Task 1.1 so it can be unit-tested directly; body unchanged).
|
|
308
456
|
|
|
309
457
|
// Proactive trim: prefer a limit learned from a prior 400 overflow; otherwise
|
|
310
458
|
// fall back to config.context_length (with a ~10% safety margin) as a hint.
|
|
@@ -352,21 +500,36 @@ function createApiClient({ getConfig, saveConfig, ui }) {
|
|
|
352
500
|
const callable = Object.fromEntries(
|
|
353
501
|
Object.entries(TOOL_SPECS).filter(([, spec]) => !spec.wrapper)
|
|
354
502
|
);
|
|
355
|
-
|
|
503
|
+
// Dynamic MCP tools (Task 3.3) advertise their schema here too, so the
|
|
504
|
+
// model can emit native tool_calls against `mcp__server__tool` names that
|
|
505
|
+
// dispatch through the same registry path as built-ins.
|
|
506
|
+
payload.tools = buildToolsSchema({ ...callable, ...dynamicToolSpecs() });
|
|
356
507
|
payload.tool_choice = 'auto';
|
|
357
508
|
}
|
|
358
509
|
|
|
359
510
|
const endpoint = apiUrl('/v1/chat/completions');
|
|
360
511
|
|
|
361
512
|
async function doRequest(msgs) {
|
|
362
|
-
|
|
513
|
+
if (dbg.isFile()) debugDumpMessages(msgs);
|
|
514
|
+
validateToolCallInvariant(msgs);
|
|
515
|
+
// Transform any image-bearing turn into the provider-specific multimodal
|
|
516
|
+
// content[] shape right before the wire (Task 5.4); the internal `images`
|
|
517
|
+
// field never leaves the client.
|
|
518
|
+
const wireMsgs = imagesPresent ? buildProviderMessages(msgs, imageFormat) : msgs;
|
|
519
|
+
const reqPayload = { ...payload, messages: wireMsgs };
|
|
520
|
+
// Optional payload augmentations (Task 2.7): reasoning_effort for models
|
|
521
|
+
// that support it, and prompt-caching markers on the stable prefix when
|
|
522
|
+
// the user has opted in (config.prompt_caching). Both no-op otherwise.
|
|
523
|
+
applyReasoningEffort(reqPayload, config.reasoning_effort, resolvedModel, { force: !!config.reasoning_effort_force });
|
|
524
|
+
applyPromptCaching(reqPayload, config.prompt_caching === true);
|
|
363
525
|
const reqBody = JSON.stringify(reqPayload);
|
|
364
526
|
const res = await httpRequest(endpoint, {
|
|
365
527
|
method: 'POST',
|
|
366
528
|
timeout: config.request_timeout_ms,
|
|
367
529
|
headers: {
|
|
368
530
|
'Content-Type': 'application/json',
|
|
369
|
-
|
|
531
|
+
// Precedence: SEMALT_API_KEY env → OS keychain → config.api_key.
|
|
532
|
+
'Authorization': `Bearer ${resolveApiKey(config)}`,
|
|
370
533
|
'Content-Length': Buffer.byteLength(reqBody),
|
|
371
534
|
},
|
|
372
535
|
signal,
|
|
@@ -516,6 +679,11 @@ function createApiClient({ getConfig, saveConfig, ui }) {
|
|
|
516
679
|
type: 'function',
|
|
517
680
|
function: { name: t.name, arguments: t.arguments || '{}' },
|
|
518
681
|
}));
|
|
682
|
+
dbg.logExtended(
|
|
683
|
+
`[tool_call finalize] acc_len=${toolCallAcc.length} ` +
|
|
684
|
+
`valid=${validToolCalls.length} nativeTools=${nativeTools} ` +
|
|
685
|
+
`acc=${JSON.stringify(toolCallAcc).slice(0, 400)}`
|
|
686
|
+
);
|
|
519
687
|
if (!nativeTools) appendToolCallsXml();
|
|
520
688
|
if (!silent) renderer.flush();
|
|
521
689
|
// Fallback for endpoints that don't honor stream_options.include_usage:
|
|
@@ -528,10 +696,18 @@ function createApiClient({ getConfig, saveConfig, ui }) {
|
|
|
528
696
|
};
|
|
529
697
|
}
|
|
530
698
|
const elapsedMs = Date.now() - startTime;
|
|
699
|
+
// Estimated base/working split (Variant B, display-only) computed from
|
|
700
|
+
// the payload ACTUALLY sent — trimmedMessages holds the final value
|
|
701
|
+
// after any 413/400-overflow retry, and payload.tools is present only in
|
|
702
|
+
// native mode (XML mode embeds tools in the system prompt, captured by
|
|
703
|
+
// the system-message estimate). Recomputed every request so it stays
|
|
704
|
+
// correct when MCP connects or plan mode toggles mid-session.
|
|
705
|
+
const contextEstimate = estimateContextSplit(trimmedMessages, payload.tools);
|
|
531
706
|
resolve({
|
|
532
707
|
content: fullText,
|
|
533
708
|
toolCalls: nativeTools ? validToolCalls : [],
|
|
534
709
|
usage,
|
|
710
|
+
context_estimate: contextEstimate,
|
|
535
711
|
usage_from_provider: !!streamUsage,
|
|
536
712
|
tool_calls_count: validToolCalls.length,
|
|
537
713
|
finish_reason: streamFinishReason,
|
|
@@ -564,6 +740,10 @@ function createApiClient({ getConfig, saveConfig, ui }) {
|
|
|
564
740
|
res.setEncoding('utf8');
|
|
565
741
|
|
|
566
742
|
res.on('data', (chunk) => {
|
|
743
|
+
if (dbg.isFile()) {
|
|
744
|
+
const raw = typeof chunk === 'string' ? chunk : chunk.toString('utf8');
|
|
745
|
+
dbg.logExtended(`[SSE raw] ${raw.slice(0, 500).replace(/\n/g, '\\n')}`);
|
|
746
|
+
}
|
|
567
747
|
lineBuffer += chunk;
|
|
568
748
|
const lines = lineBuffer.split('\n');
|
|
569
749
|
lineBuffer = lines.pop();
|
|
@@ -572,11 +752,14 @@ function createApiClient({ getConfig, saveConfig, ui }) {
|
|
|
572
752
|
if (!line.startsWith('data: ')) continue;
|
|
573
753
|
const data = line.slice(6).trim();
|
|
574
754
|
if (data === '[DONE]') {
|
|
755
|
+
dbg.logExtended(`[SSE event] [DONE]`);
|
|
575
756
|
finalize();
|
|
576
757
|
res.destroy();
|
|
577
758
|
return;
|
|
578
759
|
}
|
|
579
760
|
|
|
761
|
+
dbg.logExtended(`[SSE event] ${data.slice(0, 500)}`);
|
|
762
|
+
|
|
580
763
|
try {
|
|
581
764
|
const obj = JSON.parse(data);
|
|
582
765
|
if (obj.usage && (obj.usage.prompt_tokens !== undefined || obj.usage.completion_tokens !== undefined)) {
|
|
@@ -619,15 +802,31 @@ function createApiClient({ getConfig, saveConfig, ui }) {
|
|
|
619
802
|
}
|
|
620
803
|
}
|
|
621
804
|
|
|
805
|
+
// Standard OpenAI tool_call streaming: the announcement chunk
|
|
806
|
+
// carries id + type + function.name with arguments="", and one or
|
|
807
|
+
// more follow-up chunks stream arguments deltas (no id/name).
|
|
808
|
+
// Process every chunk that has delta.tool_calls and patch in
|
|
809
|
+
// whichever fields are present — never gate slot creation or
|
|
810
|
+
// field updates on arguments being non-empty, or the announcement
|
|
811
|
+
// (which carries the only id/name) gets dropped.
|
|
622
812
|
const toolCallsDelta = delta.tool_calls;
|
|
623
813
|
if (Array.isArray(toolCallsDelta)) {
|
|
624
814
|
for (const tc of toolCallsDelta) {
|
|
815
|
+
if (!tc || typeof tc !== 'object') continue;
|
|
625
816
|
const idx = typeof tc.index === 'number' ? tc.index : toolCallAcc.length;
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
if (tc.
|
|
817
|
+
if (!toolCallAcc[idx]) {
|
|
818
|
+
toolCallAcc[idx] = { id: '', name: '', arguments: '' };
|
|
819
|
+
}
|
|
820
|
+
const slot = toolCallAcc[idx];
|
|
821
|
+
if (tc.id) slot.id = tc.id;
|
|
822
|
+
const fnName = tc.function && tc.function.name;
|
|
823
|
+
if (typeof fnName === 'string' && fnName) slot.name = fnName;
|
|
824
|
+
const fnArgs = tc.function && tc.function.arguments;
|
|
825
|
+
if (typeof fnArgs === 'string') slot.arguments += fnArgs;
|
|
826
|
+
dbg.logExtended(
|
|
827
|
+
`[tool_call acc] idx=${idx} id=${slot.id || '<empty>'} ` +
|
|
828
|
+
`name=${slot.name || '<empty>'} args_len=${slot.arguments.length}`
|
|
829
|
+
);
|
|
631
830
|
}
|
|
632
831
|
}
|
|
633
832
|
|
|
@@ -649,7 +848,9 @@ function createApiClient({ getConfig, saveConfig, ui }) {
|
|
|
649
848
|
fullText += content;
|
|
650
849
|
tokenCount++;
|
|
651
850
|
}
|
|
652
|
-
} catch {
|
|
851
|
+
} catch (err) {
|
|
852
|
+
dbg.logExtended(`[SSE parse-error] ${err.message} :: ${data.slice(0, 200)}`);
|
|
853
|
+
}
|
|
653
854
|
}
|
|
654
855
|
});
|
|
655
856
|
|
|
@@ -684,7 +885,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
|
|
|
684
885
|
timeout: config.request_timeout_ms,
|
|
685
886
|
headers: {
|
|
686
887
|
'Content-Type': 'application/json',
|
|
687
|
-
|
|
888
|
+
// Precedence: SEMALT_API_KEY env → OS keychain → config.api_key.
|
|
889
|
+
'Authorization': `Bearer ${resolveApiKey(config)}`,
|
|
688
890
|
'Content-Length': Buffer.byteLength(body),
|
|
689
891
|
},
|
|
690
892
|
}, body);
|
|
@@ -723,9 +925,55 @@ function createApiClient({ getConfig, saveConfig, ui }) {
|
|
|
723
925
|
});
|
|
724
926
|
}
|
|
725
927
|
|
|
928
|
+
// Quiet, non-streaming completion. Unlike chatSync it does NOT write to
|
|
929
|
+
// scrollback or route errors through the UI — it returns the assistant text
|
|
930
|
+
// or THROWS, so a programmatic caller (the web-fetch secondary summarizer,
|
|
931
|
+
// Task W.1) can decide its own fallback. No native tools, no streaming chrome.
|
|
932
|
+
async function chatComplete(messages, { model, temperature, signal } = {}) {
|
|
933
|
+
const config = getConfig();
|
|
934
|
+
const payload = {
|
|
935
|
+
model: model || config.default_model,
|
|
936
|
+
messages,
|
|
937
|
+
temperature: typeof temperature === 'number' ? temperature : config.temperature,
|
|
938
|
+
stream: false,
|
|
939
|
+
};
|
|
940
|
+
const body = JSON.stringify(payload);
|
|
941
|
+
const res = await httpRequest(apiUrl('/v1/chat/completions'), {
|
|
942
|
+
method: 'POST',
|
|
943
|
+
timeout: config.request_timeout_ms,
|
|
944
|
+
signal: signal || undefined,
|
|
945
|
+
headers: {
|
|
946
|
+
'Content-Type': 'application/json',
|
|
947
|
+
'Authorization': `Bearer ${resolveApiKey(config)}`,
|
|
948
|
+
'Content-Length': Buffer.byteLength(body),
|
|
949
|
+
},
|
|
950
|
+
}, body);
|
|
951
|
+
return new Promise((resolve, reject) => {
|
|
952
|
+
let data = '';
|
|
953
|
+
res.setEncoding('utf8');
|
|
954
|
+
res.on('data', (chunk) => { data += chunk; });
|
|
955
|
+
res.on('end', () => {
|
|
956
|
+
if (res.statusCode !== 200) {
|
|
957
|
+
reject(new Error(`HTTP ${res.statusCode} — ${String(data).slice(0, 200)}`));
|
|
958
|
+
return;
|
|
959
|
+
}
|
|
960
|
+
try {
|
|
961
|
+
const parsed = JSON.parse(data);
|
|
962
|
+
const content = parsed && parsed.choices && parsed.choices[0] && parsed.choices[0].message
|
|
963
|
+
? parsed.choices[0].message.content : '';
|
|
964
|
+
resolve(content || '');
|
|
965
|
+
} catch (error) {
|
|
966
|
+
reject(new Error(`Parse error: ${error.message}`));
|
|
967
|
+
}
|
|
968
|
+
});
|
|
969
|
+
res.on('error', reject);
|
|
970
|
+
});
|
|
971
|
+
}
|
|
972
|
+
|
|
726
973
|
return {
|
|
727
974
|
chatStream,
|
|
728
975
|
chatSync,
|
|
976
|
+
chatComplete,
|
|
729
977
|
dashboardCreateChat,
|
|
730
978
|
dashboardGetChat,
|
|
731
979
|
dashboardGetModelForCli,
|
|
@@ -733,6 +981,7 @@ function createApiClient({ getConfig, saveConfig, ui }) {
|
|
|
733
981
|
dashboardListModels,
|
|
734
982
|
dashboardLogout,
|
|
735
983
|
dashboardSaveMessages,
|
|
984
|
+
dashboardSearch,
|
|
736
985
|
dashboardWhoAmI,
|
|
737
986
|
estimateTokens,
|
|
738
987
|
getCliLoginStatus,
|
|
@@ -743,4 +992,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
|
|
|
743
992
|
|
|
744
993
|
module.exports = {
|
|
745
994
|
createApiClient,
|
|
995
|
+
// Exported for unit testing (Task 1.1). Pure helper, no runtime behavior change.
|
|
996
|
+
trimToTokenBudget,
|
|
997
|
+
// Exported for unit testing the split-context counter (Variant B). Pure helper.
|
|
998
|
+
estimateContextSplit,
|
|
746
999
|
};
|
package/lib/args.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
const debug = require('./debug');
|
|
4
|
+
|
|
3
5
|
function parseArgs(argv) {
|
|
4
6
|
const opts = {};
|
|
5
7
|
const positional = [];
|
|
@@ -15,6 +17,12 @@ function parseArgs(argv) {
|
|
|
15
17
|
case '--file':
|
|
16
18
|
(opts.file = opts.file || []).push(argv[++i]);
|
|
17
19
|
break;
|
|
20
|
+
case '--image':
|
|
21
|
+
// Multimodal image input (Task 5.4). Repeatable: attach one or more
|
|
22
|
+
// images (PNG/JPEG/WebP/GIF) to the user turn. Read through isPathSafe,
|
|
23
|
+
// size-checked, base64-encoded by the entry point (lib/images.js).
|
|
24
|
+
(opts.image = opts.image || []).push(argv[++i]);
|
|
25
|
+
break;
|
|
18
26
|
case '-a':
|
|
19
27
|
case '--analyze':
|
|
20
28
|
opts.analyze = true;
|
|
@@ -22,6 +30,28 @@ function parseArgs(argv) {
|
|
|
22
30
|
case '--dry-run':
|
|
23
31
|
opts.dryRun = true;
|
|
24
32
|
break;
|
|
33
|
+
case '-p':
|
|
34
|
+
case '--print':
|
|
35
|
+
opts.print = true;
|
|
36
|
+
break;
|
|
37
|
+
case '-b':
|
|
38
|
+
case '--background':
|
|
39
|
+
// Launch the task as a detached background process (Task 5.3). Used by
|
|
40
|
+
// `semalt-code run --background <prompt>`. The permission policy is fixed
|
|
41
|
+
// from the other flags at launch and cannot change after detach.
|
|
42
|
+
opts.background = true;
|
|
43
|
+
break;
|
|
44
|
+
case '--output-format': {
|
|
45
|
+
const v = argv[++i];
|
|
46
|
+
const allowed = ['text', 'json', 'stream-json'];
|
|
47
|
+
if (!allowed.includes(v)) {
|
|
48
|
+
process.stderr.write(`Error: --output-format must be one of ${allowed.join(', ')}.\n`);
|
|
49
|
+
process.exit(1);
|
|
50
|
+
}
|
|
51
|
+
opts.outputFormat = v;
|
|
52
|
+
opts.print = true; // selecting a machine format implies headless
|
|
53
|
+
break;
|
|
54
|
+
}
|
|
25
55
|
case '--api-base':
|
|
26
56
|
opts.apiBase = argv[++i];
|
|
27
57
|
break;
|
|
@@ -53,8 +83,52 @@ function parseArgs(argv) {
|
|
|
53
83
|
case '--readonly':
|
|
54
84
|
opts.readonly = true;
|
|
55
85
|
break;
|
|
56
|
-
case '--
|
|
57
|
-
opts.
|
|
86
|
+
case '--plan':
|
|
87
|
+
opts.plan = true;
|
|
88
|
+
break;
|
|
89
|
+
case '--no-verify':
|
|
90
|
+
// One-off skip of self-verification (Task 4.2) for this invocation, in
|
|
91
|
+
// BOTH advisory and enforcing modes. Threaded into runAgentLoop opts.
|
|
92
|
+
opts.noVerify = true;
|
|
93
|
+
break;
|
|
94
|
+
case '--max-iterations': {
|
|
95
|
+
// Cap on agent-loop iterations per turn. A positive integer caps the
|
|
96
|
+
// loop; 0 or 'unlimited' removes the cap (power-user choice). The value
|
|
97
|
+
// also flows through flagsConfigLayer (config.js) into config.max_iterations;
|
|
98
|
+
// it's consumed here so it isn't mis-parsed as a positional.
|
|
99
|
+
const v = argv[++i];
|
|
100
|
+
const ok = v !== undefined && (v === 'unlimited' || /^\d+$/.test(v));
|
|
101
|
+
if (!ok) {
|
|
102
|
+
process.stderr.write(`Error: --max-iterations requires a non-negative integer or "unlimited".\n`);
|
|
103
|
+
process.exit(1);
|
|
104
|
+
}
|
|
105
|
+
opts.maxIterations = v;
|
|
106
|
+
break;
|
|
107
|
+
}
|
|
108
|
+
case '--reasoning-effort':
|
|
109
|
+
// Consumed here so the value isn't mis-parsed as a positional; the
|
|
110
|
+
// runtime override flows through flagsConfigLayer (config.js).
|
|
111
|
+
opts.reasoningEffort = argv[++i];
|
|
112
|
+
break;
|
|
113
|
+
case '--prompt-caching':
|
|
114
|
+
opts.promptCaching = true;
|
|
115
|
+
break;
|
|
116
|
+
case '--allow-anywhere':
|
|
117
|
+
opts.allowAnywhere = true;
|
|
118
|
+
break;
|
|
119
|
+
case '--no-network':
|
|
120
|
+
// Binary network isolation (Task 4.4b): force kernel-level no-network for
|
|
121
|
+
// sandboxed commands (bwrap --unshare-net / Seatbelt deny network*). A
|
|
122
|
+
// human-only opt-in; the model can never reach it. The sandbox decision
|
|
123
|
+
// (lib/sandbox.js resolveSandboxedSpawn) reads the flag from argv directly,
|
|
124
|
+
// so this just records intent + keeps it out of the positional args.
|
|
125
|
+
opts.noNetwork = true;
|
|
126
|
+
break;
|
|
127
|
+
case '--dangerously-skip-permissions':
|
|
128
|
+
// The single explicit opt-out of ALL safety: disables the destructive
|
|
129
|
+
// command deny-list and the config-file read guard, and fully
|
|
130
|
+
// auto-approves every tool call. Pre-scanned in index.js too.
|
|
131
|
+
opts.dangerouslySkipPermissions = true;
|
|
58
132
|
break;
|
|
59
133
|
case '--show-think':
|
|
60
134
|
opts.showThink = true;
|
|
@@ -62,6 +136,15 @@ function parseArgs(argv) {
|
|
|
62
136
|
case '--debug':
|
|
63
137
|
opts.debug = true;
|
|
64
138
|
break;
|
|
139
|
+
case '--debug-file': {
|
|
140
|
+
const v = argv[++i];
|
|
141
|
+
if (!v || v.startsWith('-')) {
|
|
142
|
+
process.stderr.write(`Error: --debug-file requires a path argument.\n`);
|
|
143
|
+
process.exit(1);
|
|
144
|
+
}
|
|
145
|
+
opts.debugFile = v;
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
65
148
|
case '--system-prompt':
|
|
66
149
|
opts.systemPromptFile = argv[++i];
|
|
67
150
|
break;
|
|
@@ -71,6 +154,17 @@ function parseArgs(argv) {
|
|
|
71
154
|
i++;
|
|
72
155
|
}
|
|
73
156
|
|
|
157
|
+
if (opts.debug && opts.debugFile) {
|
|
158
|
+
process.stderr.write(
|
|
159
|
+
`Error: --debug and --debug-file are mutually exclusive.\n` +
|
|
160
|
+
` Use --debug for inline debug output, or --debug-file <path>\n` +
|
|
161
|
+
` for extended debug traces written to a file.\n`
|
|
162
|
+
);
|
|
163
|
+
process.exit(1);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
debug.init({ debug: opts.debug, debugFile: opts.debugFile });
|
|
167
|
+
|
|
74
168
|
return { opts, positional };
|
|
75
169
|
}
|
|
76
170
|
|
package/lib/audit.js
CHANGED
|
@@ -28,4 +28,26 @@ function logToolCall(tag, input, approved, resultStatus) {
|
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
// Checkpoint activity (Task 4.3). Recorded as a `checkpoint` row so the audit
|
|
32
|
+
// log shows when prior file state was snapshotted before a mutation (and on
|
|
33
|
+
// rewind). `seq` is the per-session checkpoint sequence number; `note` carries
|
|
34
|
+
// the action + affected path(s) or the rewind outcome. Like logToolCall this
|
|
35
|
+
// never throws.
|
|
36
|
+
function logCheckpoint(seq, note) {
|
|
37
|
+
try {
|
|
38
|
+
let noteStr = typeof note === 'string' ? note : JSON.stringify(note);
|
|
39
|
+
if (noteStr.length > 200) noteStr = noteStr.slice(0, 197) + '...';
|
|
40
|
+
const entry = JSON.stringify({
|
|
41
|
+
ts: new Date().toISOString(),
|
|
42
|
+
tag: 'checkpoint',
|
|
43
|
+
input: `checkpoint:${seq} ${noteStr}`,
|
|
44
|
+
approved: true,
|
|
45
|
+
result: 'ok',
|
|
46
|
+
});
|
|
47
|
+
fs.appendFileSync(AUDIT_LOG, entry + '\n');
|
|
48
|
+
} catch {
|
|
49
|
+
// never throw
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
module.exports = { AUDIT_LOG, logToolCall, logCheckpoint };
|