@semalt-ai/code 1.8.4 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/.claude/settings.local.json +8 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/CLAUDE.md +1588 -27
  4. package/README.md +147 -3
  5. package/TECHNICAL_DEBT.md +66 -0
  6. package/examples/embed.js +74 -0
  7. package/index.js +259 -11
  8. package/lib/agent.js +935 -181
  9. package/lib/api.js +308 -55
  10. package/lib/args.js +96 -2
  11. package/lib/audit.js +23 -1
  12. package/lib/background.js +584 -0
  13. package/lib/checkpoints.js +757 -0
  14. package/lib/commands/auth.js +94 -0
  15. package/lib/commands/chat-session.js +306 -0
  16. package/lib/commands/chat-slash.js +399 -0
  17. package/lib/commands/chat-turn.js +446 -0
  18. package/lib/commands/chat.js +403 -0
  19. package/lib/commands/custom.js +157 -0
  20. package/lib/commands/history-utils.js +66 -0
  21. package/lib/commands/index.js +268 -0
  22. package/lib/commands/mcp.js +113 -0
  23. package/lib/commands/oneshot.js +193 -0
  24. package/lib/commands/registry.js +269 -0
  25. package/lib/commands/tasks.js +89 -0
  26. package/lib/compact.js +87 -0
  27. package/lib/config.js +346 -11
  28. package/lib/constants.js +372 -3
  29. package/lib/debug.js +106 -0
  30. package/lib/deny.js +199 -0
  31. package/lib/doctor.js +160 -0
  32. package/lib/headless.js +167 -0
  33. package/lib/hooks.js +286 -0
  34. package/lib/images.js +264 -0
  35. package/lib/internals.js +49 -0
  36. package/lib/mcp/boundary.js +131 -0
  37. package/lib/mcp/client.js +270 -0
  38. package/lib/mcp/oauth.js +134 -0
  39. package/lib/memory.js +209 -0
  40. package/lib/metrics.js +37 -2
  41. package/lib/payload.js +54 -0
  42. package/lib/permission-rules.js +401 -0
  43. package/lib/permissions.js +100 -10
  44. package/lib/pricing.js +67 -0
  45. package/lib/proc.js +158 -0
  46. package/lib/prompts.js +88 -8
  47. package/lib/sandbox.js +568 -0
  48. package/lib/sdk.js +328 -0
  49. package/lib/secrets.js +211 -0
  50. package/lib/skills.js +223 -0
  51. package/lib/subagents.js +516 -0
  52. package/lib/tool_registry.js +2558 -0
  53. package/lib/tool_specs.js +236 -9
  54. package/lib/tools.js +370 -944
  55. package/lib/ui/chat-history.js +19 -1
  56. package/lib/ui/format.js +101 -6
  57. package/lib/ui/input-field.js +16 -7
  58. package/lib/ui/status-bar.js +79 -11
  59. package/lib/ui/terminal.js +10 -4
  60. package/lib/ui/theme.js +1 -0
  61. package/lib/ui/web-activity.js +218 -0
  62. package/lib/ui/writer.js +7 -9
  63. package/lib/verify.js +229 -0
  64. package/lib/web-extract.js +213 -0
  65. package/lib/web-summarize.js +68 -0
  66. package/package.json +19 -4
  67. package/scripts/lint.js +57 -0
  68. package/test/agent-loop.test.js +389 -0
  69. package/test/background.test.js +414 -0
  70. package/test/chat.test.js +114 -0
  71. package/test/checkpoints-agent.test.js +181 -0
  72. package/test/checkpoints.test.js +650 -0
  73. package/test/command-registry.test.js +160 -0
  74. package/test/compact.test.js +116 -0
  75. package/test/completion-lazy.test.js +52 -0
  76. package/test/config-merge.test.js +324 -0
  77. package/test/config-quarantine.test.js +128 -0
  78. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  79. package/test/config-write-guard-skip.test.js +46 -0
  80. package/test/config-write-guard.test.js +153 -0
  81. package/test/context-split.test.js +215 -0
  82. package/test/cost-doctor.test.js +142 -0
  83. package/test/custom-commands-chat.test.js +106 -0
  84. package/test/custom-commands.test.js +230 -0
  85. package/test/deny-windows.test.js +120 -0
  86. package/test/deny.test.js +83 -0
  87. package/test/download-allow-anywhere.test.js +66 -0
  88. package/test/download-confine.test.js +153 -0
  89. package/test/executors.test.js +362 -0
  90. package/test/extract-tool-calls.test.js +315 -0
  91. package/test/fetch-url-validation.test.js +219 -0
  92. package/test/fixtures/tool-calls.js +57 -0
  93. package/test/fixtures/web-page.js +91 -0
  94. package/test/git-tools.test.js +384 -0
  95. package/test/grep-glob-serialize.test.js +242 -0
  96. package/test/grep-glob.test.js +268 -0
  97. package/test/harness/README.md +57 -0
  98. package/test/harness/chat-harness.js +142 -0
  99. package/test/harness/memwarn-headless-child.js +65 -0
  100. package/test/harness/mock-llm.js +120 -0
  101. package/test/harness/mock-mcp-server.js +142 -0
  102. package/test/harness/sse-server.js +69 -0
  103. package/test/headless.test.js +203 -0
  104. package/test/history-utils.test.js +88 -0
  105. package/test/hooks-agent.test.js +238 -0
  106. package/test/hooks-verify-sandbox.test.js +232 -0
  107. package/test/hooks.test.js +216 -0
  108. package/test/http-get-user-agent.test.js +142 -0
  109. package/test/images-api.test.js +208 -0
  110. package/test/images.test.js +238 -0
  111. package/test/max-iterations.test.js +216 -0
  112. package/test/mcp-boundary.test.js +57 -0
  113. package/test/mcp-client.test.js +267 -0
  114. package/test/mcp-oauth.test.js +86 -0
  115. package/test/memory-truncation-warning.test.js +222 -0
  116. package/test/memory.test.js +198 -0
  117. package/test/native-dispatch.test.js +356 -0
  118. package/test/output-chokepoint.test.js +188 -0
  119. package/test/path-guards.test.js +134 -0
  120. package/test/payload.test.js +99 -0
  121. package/test/permission-rules-agent.test.js +210 -0
  122. package/test/permission-rules.test.js +297 -0
  123. package/test/permissions.test.js +163 -0
  124. package/test/plan-mode.test.js +167 -0
  125. package/test/read-paginate.test.js +275 -0
  126. package/test/readonly-tools.test.js +177 -0
  127. package/test/result-cap.test.js +233 -0
  128. package/test/sandbox-agent.test.js +147 -0
  129. package/test/sandbox-integration.test.js +216 -0
  130. package/test/sandbox.test.js +408 -0
  131. package/test/sdk.test.js +234 -0
  132. package/test/shell-output-cap.test.js +181 -0
  133. package/test/skills-chat.test.js +110 -0
  134. package/test/skills.test.js +295 -0
  135. package/test/smoke.test.js +68 -0
  136. package/test/status-bar-pause.test.js +164 -0
  137. package/test/stream-parser.test.js +147 -0
  138. package/test/subagents-agent.test.js +178 -0
  139. package/test/subagents.test.js +222 -0
  140. package/test/tool-registry.test.js +85 -0
  141. package/test/trim-budget.test.js +101 -0
  142. package/test/verify-agent.test.js +317 -0
  143. package/test/verify.test.js +141 -0
  144. package/test/web-activity-ordering.test.js +194 -0
  145. package/test/web-activity.test.js +207 -0
  146. package/test/web-data-extraction-guidance.test.js +71 -0
  147. package/test/web-extract.test.js +185 -0
  148. package/test/web-fetch-agent.test.js +291 -0
  149. package/test/web-fetch-mode.test.js +193 -0
  150. package/test/web-search.test.js +380 -0
  151. package/lib/commands.js +0 -1288
package/lib/api.js CHANGED
@@ -6,8 +6,159 @@ const { URL } = require('url');
6
6
 
7
7
  const { buildToolsSchema, isUIActive } = require('./tools');
8
8
  const { TOOL_SPECS } = require('./tool_specs');
9
+ const { dynamicToolSpecs } = require('./tool_registry');
10
+ const { resolveApiKey } = require('./secrets');
11
+ const { applyPromptCaching, applyReasoningEffort } = require('./payload');
12
+ const {
13
+ messagesHaveImages,
14
+ countImages,
15
+ selectImageFormat,
16
+ resolveVisionCapability,
17
+ buildProviderMessages,
18
+ } = require('./images');
9
19
  const writer = require('./ui/writer');
10
20
  const messages = require('./ui/messages');
21
+ const dbg = require('./debug');
22
+
23
+ // Strict precondition for any payload that includes role:tool messages or
24
+ // assistant.tool_calls: every tool_call_id must reference a non-empty id from
25
+ // a prior assistant tool_calls entry. Catches the upstream "tool result's tool
26
+ // id() not found" 400 before it leaves the client and points at the exact
27
+ // violating message instead of a cryptic provider error.
28
+ function validateToolCallInvariant(msgs) {
29
+ const calledIds = new Set();
30
+ for (let idx = 0; idx < msgs.length; idx++) {
31
+ const m = msgs[idx];
32
+ if (m.role === 'assistant' && Array.isArray(m.tool_calls)) {
33
+ for (let j = 0; j < m.tool_calls.length; j++) {
34
+ const tc = m.tool_calls[j];
35
+ if (!tc || !tc.id) {
36
+ throw new Error(
37
+ `Invalid tool_calls invariant: messages[${idx}] role=assistant tool_calls[${j}] has empty id`
38
+ );
39
+ }
40
+ calledIds.add(tc.id);
41
+ }
42
+ }
43
+ }
44
+ for (let idx = 0; idx < msgs.length; idx++) {
45
+ const m = msgs[idx];
46
+ if (m.role !== 'tool') continue;
47
+ if (!m.tool_call_id) {
48
+ const preview = String(m.content || '').slice(0, 80).replace(/\s+/g, ' ');
49
+ throw new Error(
50
+ `Invalid tool_calls invariant: messages[${idx}] role=tool has empty tool_call_id (content_preview="${preview}")`
51
+ );
52
+ }
53
+ if (!calledIds.has(m.tool_call_id)) {
54
+ throw new Error(
55
+ `Invalid tool_calls invariant: messages[${idx}] role=tool tool_call_id=${m.tool_call_id} has no matching prior assistant tool_calls`
56
+ );
57
+ }
58
+ }
59
+ }
60
+
61
+ function debugDumpMessages(msgs) {
62
+ dbg.logExtended('[messages dump before API request]');
63
+ for (let i = 0; i < msgs.length; i++) {
64
+ const m = msgs[i];
65
+ const callIds = Array.isArray(m.tool_calls)
66
+ ? m.tool_calls.map((t) => (t && t.id) || '<EMPTY>').join(',')
67
+ : '';
68
+ const toolCallId = m.tool_call_id !== undefined
69
+ ? ` tool_call_id=${m.tool_call_id || '<EMPTY>'}`
70
+ : '';
71
+ const tcs = callIds ? ` tool_calls=[${callIds}]` : '';
72
+ const contentLen = (m.content !== undefined && m.content !== null)
73
+ ? ` content_chars=${(m.content + '').length}`
74
+ : '';
75
+ dbg.logExtended(` [${i}] role=${m.role}${toolCallId}${tcs}${contentLen}`);
76
+ }
77
+ }
78
+
79
+ // Fit messages into tokenBudget tokens.
80
+ // Uses chars/4 — aligned with estimateTokens; a deliberate under-estimate
81
+ // for token-dense content (code, JSON, HTML) but consistent across the
82
+ // codebase.
83
+ //
84
+ // Always keeps: system prompt + first non-system message (original task).
85
+ // Drops intermediate messages oldest-first, then truncates the last tail
86
+ // message (typically a large tool result) if still over budget.
87
+ //
88
+ // Pure function (no closure dependencies) — lives at module scope so it can be
89
+ // unit-tested in isolation. Called from chatStream's proactive-trim and
90
+ // 400/413 self-healing paths.
91
+ function trimToTokenBudget(msgs, tokenBudget) {
92
+ const CHARS_PER_TOKEN = 4;
93
+ const system = msgs.filter((m) => m.role === 'system');
94
+ const nonSystem = msgs.filter((m) => m.role !== 'system');
95
+ if (nonSystem.length === 0) return [...system];
96
+
97
+ const pinned = nonSystem[0]; // original task — never dropped
98
+ let tail = nonSystem.slice(1);
99
+
100
+ const estimate = () => {
101
+ const all = tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
102
+ return Math.floor(JSON.stringify(all).length / CHARS_PER_TOKEN);
103
+ };
104
+
105
+ while (tail.length > 1 && estimate() > tokenBudget) {
106
+ tail = tail.slice(1);
107
+ }
108
+
109
+ if (tail.length === 1 && estimate() > tokenBudget) {
110
+ const msg = tail[0];
111
+ const otherChars = JSON.stringify([...system, pinned]).length;
112
+ const available = tokenBudget * CHARS_PER_TOKEN - otherChars - 200;
113
+ if (available > 0 && typeof msg.content === 'string' && msg.content.length > available) {
114
+ tail = [{ ...msg, content: '[…content truncated to fit model limit…]\n' + msg.content.slice(-available) }];
115
+ }
116
+ }
117
+
118
+ if (tail.length === 0 && estimate() > tokenBudget) {
119
+ const systemChars = JSON.stringify(system).length;
120
+ const available = tokenBudget * CHARS_PER_TOKEN - systemChars - 200;
121
+ if (available > 0 && typeof pinned.content === 'string' && pinned.content.length > available) {
122
+ return [...system, { ...pinned, content: '[…content truncated to fit model limit…]\n' + pinned.content.slice(-available) }];
123
+ }
124
+ }
125
+
126
+ return tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
127
+ }
128
+
129
+ // Estimate the context split for the counter (Variant B, display-only).
130
+ //
131
+ // The API returns usage.prompt_tokens PRE-SUMMED — it never breaks the prompt
132
+ // into base (system prompt + tool specs) vs working (history + tool results).
133
+ // So the split cannot be measured; it is ESTIMATED here from the assembled
134
+ // payload. Both halves use the SAME char/4 estimator so they sum consistently
135
+ // (the point of Variant B — no "real minus estimate" mixing where working would
136
+ // look measured but secretly carry the base estimate's error). The real
137
+ // prompt_tokens remains the authoritative anchor shown alongside this split.
138
+ //
139
+ // base = estimate(system messages) + estimate(serialized tool schema)
140
+ // working = estimate(every non-system message) ← the part that grows
141
+ //
142
+ // Recompute PER REQUEST (cheap): the base is NOT eternally constant — it shifts
143
+ // with native-vs-XML mode (tools live in payload.tools vs inside the system
144
+ // prompt), dynamic tools (MCP connecting/failing mid-session), and plan-mode
145
+ // toggling (PLAN_MODE_NOTICE). In XML mode `tools` is absent and the tool weight
146
+ // lives inside the system prompt string, so estimating the actual system message
147
+ // still captures it — base is never silently zero. Pure; unit-tested.
148
+ function estimateContextSplit(msgs, tools) {
149
+ let systemChars = 0;
150
+ let workingChars = 0;
151
+ for (const m of (Array.isArray(msgs) ? msgs : [])) {
152
+ const len = JSON.stringify(m == null ? '' : m).length;
153
+ if (m && m.role === 'system') systemChars += len;
154
+ else workingChars += len;
155
+ }
156
+ const toolChars = tools ? JSON.stringify(tools).length : 0;
157
+ return {
158
+ base: Math.floor((systemChars + toolChars) / 4),
159
+ working: Math.floor(workingChars / 4),
160
+ };
161
+ }
11
162
 
12
163
  function createApiClient({ getConfig, saveConfig, ui }) {
13
164
  const {
@@ -195,6 +346,26 @@ function createApiClient({ getConfig, saveConfig, ui }) {
195
346
  });
196
347
  }
197
348
 
349
+ // Web search (Task W.2b). Calls the backend POST /api/search — which
350
+ // authenticates the Bearer token, queries SearXNG, and returns
351
+ // { results: [{title,url,snippet}, …] } (or an {error} envelope on failure,
352
+ // mapped to a thrown Error by requestJson). Modeled byte-for-byte on
353
+ // dashboardListModels: requireAuthToken() → requestJson(...). The optional
354
+ // `count` is forwarded so the backend can clamp it. The caller (the
355
+ // web_search tool) is responsible for catching every failure mode and
356
+ // surfacing a clean tool error — nothing here is special-cased.
357
+ function dashboardSearch(query, { count, timeout } = {}) {
358
+ const authToken = requireAuthToken();
359
+ const body = { query };
360
+ if (count != null) body.count = count;
361
+ return requestJson(dashboardUrl('/api/search'), {
362
+ method: 'POST',
363
+ timeout: timeout || 15000,
364
+ headers: { 'Authorization': `Bearer ${authToken}` },
365
+ body,
366
+ });
367
+ }
368
+
198
369
  function dashboardGetModelForCli(id) {
199
370
  const authToken = requireAuthToken();
200
371
  return requestJson(dashboardUrl(`/api/models/${encodeURIComponent(String(id))}/cli`), {
@@ -251,6 +422,26 @@ function createApiClient({ getConfig, saveConfig, ui }) {
251
422
 
252
423
  if (signal && signal.aborted) throw new Error('Aborted');
253
424
 
425
+ // Multimodal image input (Task 5.4). When any turn carries attached images,
426
+ // resolve the provider content-part shape (Anthropic-style vs OpenAI-style)
427
+ // and FAIL LOUD for a known text-only model — never silently drop the image
428
+ // from the payload (constraint #2). An unknown capability (null) proceeds and
429
+ // lets the endpoint reject cleanly.
430
+ const imagesPresent = messagesHaveImages(messages);
431
+ let imageFormat = null;
432
+ if (imagesPresent) {
433
+ imageFormat = selectImageFormat(config, resolvedModel);
434
+ const vision = resolveVisionCapability(config, resolvedModel);
435
+ if (vision === false) {
436
+ const n = countImages(messages);
437
+ throw new Error(
438
+ `Model "${resolvedModel}" is not vision-capable, but ${n} image${n === 1 ? '' : 's'} ` +
439
+ `${n === 1 ? 'was' : 'were'} attached. Select a vision-capable model, or set ` +
440
+ `vision:true on the model profile if this endpoint does accept images.`,
441
+ );
442
+ }
443
+ }
444
+
254
445
  let trimNotified = false;
255
446
  function notifyTrim(info) {
256
447
  if (trimNotified) return;
@@ -260,51 +451,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
260
451
  }
261
452
  }
262
453
 
263
- // Fit messages into tokenBudget tokens.
264
- // Uses chars/4 aligned with estimateTokens; a deliberate under-estimate
265
- // for token-dense content (code, JSON, HTML) but consistent across the
266
- // codebase.
267
- //
268
- // Always keeps: system prompt + first non-system message (original task).
269
- // Drops intermediate messages oldest-first, then truncates the last tail
270
- // message (typically a large tool result) if still over budget.
271
- function trimToTokenBudget(msgs, tokenBudget) {
272
- const CHARS_PER_TOKEN = 4;
273
- const system = msgs.filter((m) => m.role === 'system');
274
- const nonSystem = msgs.filter((m) => m.role !== 'system');
275
- if (nonSystem.length === 0) return [...system];
276
-
277
- const pinned = nonSystem[0]; // original task — never dropped
278
- let tail = nonSystem.slice(1);
279
-
280
- const estimate = () => {
281
- const all = tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
282
- return Math.floor(JSON.stringify(all).length / CHARS_PER_TOKEN);
283
- };
284
-
285
- while (tail.length > 1 && estimate() > tokenBudget) {
286
- tail = tail.slice(1);
287
- }
288
-
289
- if (tail.length === 1 && estimate() > tokenBudget) {
290
- const msg = tail[0];
291
- const otherChars = JSON.stringify([...system, pinned]).length;
292
- const available = tokenBudget * CHARS_PER_TOKEN - otherChars - 200;
293
- if (available > 0 && typeof msg.content === 'string' && msg.content.length > available) {
294
- tail = [{ ...msg, content: '[…content truncated to fit model limit…]\n' + msg.content.slice(-available) }];
295
- }
296
- }
297
-
298
- if (tail.length === 0 && estimate() > tokenBudget) {
299
- const systemChars = JSON.stringify(system).length;
300
- const available = tokenBudget * CHARS_PER_TOKEN - systemChars - 200;
301
- if (available > 0 && typeof pinned.content === 'string' && pinned.content.length > available) {
302
- return [...system, { ...pinned, content: '[…content truncated to fit model limit…]\n' + pinned.content.slice(-available) }];
303
- }
304
- }
305
-
306
- return tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
307
- }
454
+ // trimToTokenBudget is a pure, module-scope helper (lifted out of this
455
+ // closure in Task 1.1 so it can be unit-tested directly; body unchanged).
308
456
 
309
457
  // Proactive trim: prefer a limit learned from a prior 400 overflow; otherwise
310
458
  // fall back to config.context_length (with a ~10% safety margin) as a hint.
@@ -352,21 +500,36 @@ function createApiClient({ getConfig, saveConfig, ui }) {
352
500
  const callable = Object.fromEntries(
353
501
  Object.entries(TOOL_SPECS).filter(([, spec]) => !spec.wrapper)
354
502
  );
355
- payload.tools = buildToolsSchema(callable);
503
+ // Dynamic MCP tools (Task 3.3) advertise their schema here too, so the
504
+ // model can emit native tool_calls against `mcp__server__tool` names that
505
+ // dispatch through the same registry path as built-ins.
506
+ payload.tools = buildToolsSchema({ ...callable, ...dynamicToolSpecs() });
356
507
  payload.tool_choice = 'auto';
357
508
  }
358
509
 
359
510
  const endpoint = apiUrl('/v1/chat/completions');
360
511
 
361
512
  async function doRequest(msgs) {
362
- const reqPayload = { ...payload, messages: msgs };
513
+ if (dbg.isFile()) debugDumpMessages(msgs);
514
+ validateToolCallInvariant(msgs);
515
+ // Transform any image-bearing turn into the provider-specific multimodal
516
+ // content[] shape right before the wire (Task 5.4); the internal `images`
517
+ // field never leaves the client.
518
+ const wireMsgs = imagesPresent ? buildProviderMessages(msgs, imageFormat) : msgs;
519
+ const reqPayload = { ...payload, messages: wireMsgs };
520
+ // Optional payload augmentations (Task 2.7): reasoning_effort for models
521
+ // that support it, and prompt-caching markers on the stable prefix when
522
+ // the user has opted in (config.prompt_caching). Both no-op otherwise.
523
+ applyReasoningEffort(reqPayload, config.reasoning_effort, resolvedModel, { force: !!config.reasoning_effort_force });
524
+ applyPromptCaching(reqPayload, config.prompt_caching === true);
363
525
  const reqBody = JSON.stringify(reqPayload);
364
526
  const res = await httpRequest(endpoint, {
365
527
  method: 'POST',
366
528
  timeout: config.request_timeout_ms,
367
529
  headers: {
368
530
  'Content-Type': 'application/json',
369
- 'Authorization': `Bearer ${config.api_key}`,
531
+ // Precedence: SEMALT_API_KEY env → OS keychain → config.api_key.
532
+ 'Authorization': `Bearer ${resolveApiKey(config)}`,
370
533
  'Content-Length': Buffer.byteLength(reqBody),
371
534
  },
372
535
  signal,
@@ -516,6 +679,11 @@ function createApiClient({ getConfig, saveConfig, ui }) {
516
679
  type: 'function',
517
680
  function: { name: t.name, arguments: t.arguments || '{}' },
518
681
  }));
682
+ dbg.logExtended(
683
+ `[tool_call finalize] acc_len=${toolCallAcc.length} ` +
684
+ `valid=${validToolCalls.length} nativeTools=${nativeTools} ` +
685
+ `acc=${JSON.stringify(toolCallAcc).slice(0, 400)}`
686
+ );
519
687
  if (!nativeTools) appendToolCallsXml();
520
688
  if (!silent) renderer.flush();
521
689
  // Fallback for endpoints that don't honor stream_options.include_usage:
@@ -528,10 +696,18 @@ function createApiClient({ getConfig, saveConfig, ui }) {
528
696
  };
529
697
  }
530
698
  const elapsedMs = Date.now() - startTime;
699
+ // Estimated base/working split (Variant B, display-only) computed from
700
+ // the payload ACTUALLY sent — trimmedMessages holds the final value
701
+ // after any 413/400-overflow retry, and payload.tools is present only in
702
+ // native mode (XML mode embeds tools in the system prompt, captured by
703
+ // the system-message estimate). Recomputed every request so it stays
704
+ // correct when MCP connects or plan mode toggles mid-session.
705
+ const contextEstimate = estimateContextSplit(trimmedMessages, payload.tools);
531
706
  resolve({
532
707
  content: fullText,
533
708
  toolCalls: nativeTools ? validToolCalls : [],
534
709
  usage,
710
+ context_estimate: contextEstimate,
535
711
  usage_from_provider: !!streamUsage,
536
712
  tool_calls_count: validToolCalls.length,
537
713
  finish_reason: streamFinishReason,
@@ -564,6 +740,10 @@ function createApiClient({ getConfig, saveConfig, ui }) {
564
740
  res.setEncoding('utf8');
565
741
 
566
742
  res.on('data', (chunk) => {
743
+ if (dbg.isFile()) {
744
+ const raw = typeof chunk === 'string' ? chunk : chunk.toString('utf8');
745
+ dbg.logExtended(`[SSE raw] ${raw.slice(0, 500).replace(/\n/g, '\\n')}`);
746
+ }
567
747
  lineBuffer += chunk;
568
748
  const lines = lineBuffer.split('\n');
569
749
  lineBuffer = lines.pop();
@@ -572,11 +752,14 @@ function createApiClient({ getConfig, saveConfig, ui }) {
572
752
  if (!line.startsWith('data: ')) continue;
573
753
  const data = line.slice(6).trim();
574
754
  if (data === '[DONE]') {
755
+ dbg.logExtended(`[SSE event] [DONE]`);
575
756
  finalize();
576
757
  res.destroy();
577
758
  return;
578
759
  }
579
760
 
761
+ dbg.logExtended(`[SSE event] ${data.slice(0, 500)}`);
762
+
580
763
  try {
581
764
  const obj = JSON.parse(data);
582
765
  if (obj.usage && (obj.usage.prompt_tokens !== undefined || obj.usage.completion_tokens !== undefined)) {
@@ -619,15 +802,31 @@ function createApiClient({ getConfig, saveConfig, ui }) {
619
802
  }
620
803
  }
621
804
 
805
+ // Standard OpenAI tool_call streaming: the announcement chunk
806
+ // carries id + type + function.name with arguments="", and one or
807
+ // more follow-up chunks stream arguments deltas (no id/name).
808
+ // Process every chunk that has delta.tool_calls and patch in
809
+ // whichever fields are present — never gate slot creation or
810
+ // field updates on arguments being non-empty, or the announcement
811
+ // (which carries the only id/name) gets dropped.
622
812
  const toolCallsDelta = delta.tool_calls;
623
813
  if (Array.isArray(toolCallsDelta)) {
624
814
  for (const tc of toolCallsDelta) {
815
+ if (!tc || typeof tc !== 'object') continue;
625
816
  const idx = typeof tc.index === 'number' ? tc.index : toolCallAcc.length;
626
- const isNew = !toolCallAcc[idx];
627
- if (isNew) toolCallAcc[idx] = { id: '', name: '', arguments: '' };
628
- if (tc.id) toolCallAcc[idx].id = tc.id;
629
- if (tc.function?.name) toolCallAcc[idx].name += tc.function.name;
630
- if (tc.function?.arguments) toolCallAcc[idx].arguments += tc.function.arguments;
817
+ if (!toolCallAcc[idx]) {
818
+ toolCallAcc[idx] = { id: '', name: '', arguments: '' };
819
+ }
820
+ const slot = toolCallAcc[idx];
821
+ if (tc.id) slot.id = tc.id;
822
+ const fnName = tc.function && tc.function.name;
823
+ if (typeof fnName === 'string' && fnName) slot.name = fnName;
824
+ const fnArgs = tc.function && tc.function.arguments;
825
+ if (typeof fnArgs === 'string') slot.arguments += fnArgs;
826
+ dbg.logExtended(
827
+ `[tool_call acc] idx=${idx} id=${slot.id || '<empty>'} ` +
828
+ `name=${slot.name || '<empty>'} args_len=${slot.arguments.length}`
829
+ );
631
830
  }
632
831
  }
633
832
 
@@ -649,7 +848,9 @@ function createApiClient({ getConfig, saveConfig, ui }) {
649
848
  fullText += content;
650
849
  tokenCount++;
651
850
  }
652
- } catch {}
851
+ } catch (err) {
852
+ dbg.logExtended(`[SSE parse-error] ${err.message} :: ${data.slice(0, 200)}`);
853
+ }
653
854
  }
654
855
  });
655
856
 
@@ -684,7 +885,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
684
885
  timeout: config.request_timeout_ms,
685
886
  headers: {
686
887
  'Content-Type': 'application/json',
687
- 'Authorization': `Bearer ${config.api_key}`,
888
+ // Precedence: SEMALT_API_KEY env → OS keychain → config.api_key.
889
+ 'Authorization': `Bearer ${resolveApiKey(config)}`,
688
890
  'Content-Length': Buffer.byteLength(body),
689
891
  },
690
892
  }, body);
@@ -723,9 +925,55 @@ function createApiClient({ getConfig, saveConfig, ui }) {
723
925
  });
724
926
  }
725
927
 
928
+ // Quiet, non-streaming completion. Unlike chatSync it does NOT write to
929
+ // scrollback or route errors through the UI — it returns the assistant text
930
+ // or THROWS, so a programmatic caller (the web-fetch secondary summarizer,
931
+ // Task W.1) can decide its own fallback. No native tools, no streaming chrome.
932
+ async function chatComplete(messages, { model, temperature, signal } = {}) {
933
+ const config = getConfig();
934
+ const payload = {
935
+ model: model || config.default_model,
936
+ messages,
937
+ temperature: typeof temperature === 'number' ? temperature : config.temperature,
938
+ stream: false,
939
+ };
940
+ const body = JSON.stringify(payload);
941
+ const res = await httpRequest(apiUrl('/v1/chat/completions'), {
942
+ method: 'POST',
943
+ timeout: config.request_timeout_ms,
944
+ signal: signal || undefined,
945
+ headers: {
946
+ 'Content-Type': 'application/json',
947
+ 'Authorization': `Bearer ${resolveApiKey(config)}`,
948
+ 'Content-Length': Buffer.byteLength(body),
949
+ },
950
+ }, body);
951
+ return new Promise((resolve, reject) => {
952
+ let data = '';
953
+ res.setEncoding('utf8');
954
+ res.on('data', (chunk) => { data += chunk; });
955
+ res.on('end', () => {
956
+ if (res.statusCode !== 200) {
957
+ reject(new Error(`HTTP ${res.statusCode} — ${String(data).slice(0, 200)}`));
958
+ return;
959
+ }
960
+ try {
961
+ const parsed = JSON.parse(data);
962
+ const content = parsed && parsed.choices && parsed.choices[0] && parsed.choices[0].message
963
+ ? parsed.choices[0].message.content : '';
964
+ resolve(content || '');
965
+ } catch (error) {
966
+ reject(new Error(`Parse error: ${error.message}`));
967
+ }
968
+ });
969
+ res.on('error', reject);
970
+ });
971
+ }
972
+
726
973
  return {
727
974
  chatStream,
728
975
  chatSync,
976
+ chatComplete,
729
977
  dashboardCreateChat,
730
978
  dashboardGetChat,
731
979
  dashboardGetModelForCli,
@@ -733,6 +981,7 @@ function createApiClient({ getConfig, saveConfig, ui }) {
733
981
  dashboardListModels,
734
982
  dashboardLogout,
735
983
  dashboardSaveMessages,
984
+ dashboardSearch,
736
985
  dashboardWhoAmI,
737
986
  estimateTokens,
738
987
  getCliLoginStatus,
@@ -743,4 +992,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
743
992
 
744
993
  module.exports = {
745
994
  createApiClient,
995
+ // Exported for unit testing (Task 1.1). Pure helper, no runtime behavior change.
996
+ trimToTokenBudget,
997
+ // Exported for unit testing the split-context counter (Variant B). Pure helper.
998
+ estimateContextSplit,
746
999
  };
package/lib/args.js CHANGED
@@ -1,5 +1,7 @@
1
1
  'use strict';
2
2
 
3
+ const debug = require('./debug');
4
+
3
5
  function parseArgs(argv) {
4
6
  const opts = {};
5
7
  const positional = [];
@@ -15,6 +17,12 @@ function parseArgs(argv) {
15
17
  case '--file':
16
18
  (opts.file = opts.file || []).push(argv[++i]);
17
19
  break;
20
+ case '--image':
21
+ // Multimodal image input (Task 5.4). Repeatable: attach one or more
22
+ // images (PNG/JPEG/WebP/GIF) to the user turn. Read through isPathSafe,
23
+ // size-checked, base64-encoded by the entry point (lib/images.js).
24
+ (opts.image = opts.image || []).push(argv[++i]);
25
+ break;
18
26
  case '-a':
19
27
  case '--analyze':
20
28
  opts.analyze = true;
@@ -22,6 +30,28 @@ function parseArgs(argv) {
22
30
  case '--dry-run':
23
31
  opts.dryRun = true;
24
32
  break;
33
+ case '-p':
34
+ case '--print':
35
+ opts.print = true;
36
+ break;
37
+ case '-b':
38
+ case '--background':
39
+ // Launch the task as a detached background process (Task 5.3). Used by
40
+ // `semalt-code run --background <prompt>`. The permission policy is fixed
41
+ // from the other flags at launch and cannot change after detach.
42
+ opts.background = true;
43
+ break;
44
+ case '--output-format': {
45
+ const v = argv[++i];
46
+ const allowed = ['text', 'json', 'stream-json'];
47
+ if (!allowed.includes(v)) {
48
+ process.stderr.write(`Error: --output-format must be one of ${allowed.join(', ')}.\n`);
49
+ process.exit(1);
50
+ }
51
+ opts.outputFormat = v;
52
+ opts.print = true; // selecting a machine format implies headless
53
+ break;
54
+ }
25
55
  case '--api-base':
26
56
  opts.apiBase = argv[++i];
27
57
  break;
@@ -53,8 +83,52 @@ function parseArgs(argv) {
53
83
  case '--readonly':
54
84
  opts.readonly = true;
55
85
  break;
56
- case '--new':
57
- opts.new = true;
86
+ case '--plan':
87
+ opts.plan = true;
88
+ break;
89
+ case '--no-verify':
90
+ // One-off skip of self-verification (Task 4.2) for this invocation, in
91
+ // BOTH advisory and enforcing modes. Threaded into runAgentLoop opts.
92
+ opts.noVerify = true;
93
+ break;
94
+ case '--max-iterations': {
95
+ // Cap on agent-loop iterations per turn. A positive integer caps the
96
+ // loop; 0 or 'unlimited' removes the cap (power-user choice). The value
97
+ // also flows through flagsConfigLayer (config.js) into config.max_iterations;
98
+ // it's consumed here so it isn't mis-parsed as a positional.
99
+ const v = argv[++i];
100
+ const ok = v !== undefined && (v === 'unlimited' || /^\d+$/.test(v));
101
+ if (!ok) {
102
+ process.stderr.write(`Error: --max-iterations requires a non-negative integer or "unlimited".\n`);
103
+ process.exit(1);
104
+ }
105
+ opts.maxIterations = v;
106
+ break;
107
+ }
108
+ case '--reasoning-effort':
109
+ // Consumed here so the value isn't mis-parsed as a positional; the
110
+ // runtime override flows through flagsConfigLayer (config.js).
111
+ opts.reasoningEffort = argv[++i];
112
+ break;
113
+ case '--prompt-caching':
114
+ opts.promptCaching = true;
115
+ break;
116
+ case '--allow-anywhere':
117
+ opts.allowAnywhere = true;
118
+ break;
119
+ case '--no-network':
120
+ // Binary network isolation (Task 4.4b): force kernel-level no-network for
121
+ // sandboxed commands (bwrap --unshare-net / Seatbelt deny network*). A
122
+ // human-only opt-in; the model can never reach it. The sandbox decision
123
+ // (lib/sandbox.js resolveSandboxedSpawn) reads the flag from argv directly,
124
+ // so this just records intent + keeps it out of the positional args.
125
+ opts.noNetwork = true;
126
+ break;
127
+ case '--dangerously-skip-permissions':
128
+ // The single explicit opt-out of ALL safety: disables the destructive
129
+ // command deny-list and the config-file read guard, and fully
130
+ // auto-approves every tool call. Pre-scanned in index.js too.
131
+ opts.dangerouslySkipPermissions = true;
58
132
  break;
59
133
  case '--show-think':
60
134
  opts.showThink = true;
@@ -62,6 +136,15 @@ function parseArgs(argv) {
62
136
  case '--debug':
63
137
  opts.debug = true;
64
138
  break;
139
+ case '--debug-file': {
140
+ const v = argv[++i];
141
+ if (!v || v.startsWith('-')) {
142
+ process.stderr.write(`Error: --debug-file requires a path argument.\n`);
143
+ process.exit(1);
144
+ }
145
+ opts.debugFile = v;
146
+ break;
147
+ }
65
148
  case '--system-prompt':
66
149
  opts.systemPromptFile = argv[++i];
67
150
  break;
@@ -71,6 +154,17 @@ function parseArgs(argv) {
71
154
  i++;
72
155
  }
73
156
 
157
+ if (opts.debug && opts.debugFile) {
158
+ process.stderr.write(
159
+ `Error: --debug and --debug-file are mutually exclusive.\n` +
160
+ ` Use --debug for inline debug output, or --debug-file <path>\n` +
161
+ ` for extended debug traces written to a file.\n`
162
+ );
163
+ process.exit(1);
164
+ }
165
+
166
+ debug.init({ debug: opts.debug, debugFile: opts.debugFile });
167
+
74
168
  return { opts, positional };
75
169
  }
76
170
 
package/lib/audit.js CHANGED
@@ -28,4 +28,26 @@ function logToolCall(tag, input, approved, resultStatus) {
28
28
  }
29
29
  }
30
30
 
31
- module.exports = { AUDIT_LOG, logToolCall };
31
+ // Checkpoint activity (Task 4.3). Recorded as a `checkpoint` row so the audit
32
+ // log shows when prior file state was snapshotted before a mutation (and on
33
+ // rewind). `seq` is the per-session checkpoint sequence number; `note` carries
34
+ // the action + affected path(s) or the rewind outcome. Like logToolCall this
35
+ // never throws.
36
+ function logCheckpoint(seq, note) {
37
+ try {
38
+ let noteStr = typeof note === 'string' ? note : JSON.stringify(note);
39
+ if (noteStr.length > 200) noteStr = noteStr.slice(0, 197) + '...';
40
+ const entry = JSON.stringify({
41
+ ts: new Date().toISOString(),
42
+ tag: 'checkpoint',
43
+ input: `checkpoint:${seq} ${noteStr}`,
44
+ approved: true,
45
+ result: 'ok',
46
+ });
47
+ fs.appendFileSync(AUDIT_LOG, entry + '\n');
48
+ } catch {
49
+ // never throw
50
+ }
51
+ }
52
+
53
+ module.exports = { AUDIT_LOG, logToolCall, logCheckpoint };