@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/ARCHITECTURE.md +6 -95
  4. package/CLAUDE.md +196 -316
  5. package/README.md +148 -4
  6. package/docs/ARCHITECTURE.md +1321 -0
  7. package/docs/CONFIG.md +340 -0
  8. package/docs/HISTORY.md +245 -0
  9. package/examples/embed.js +74 -0
  10. package/index.js +251 -10
  11. package/lib/agent.js +856 -120
  12. package/lib/api.js +239 -50
  13. package/lib/args.js +74 -2
  14. package/lib/audit.js +23 -1
  15. package/lib/background.js +584 -0
  16. package/lib/checkpoints.js +757 -0
  17. package/lib/commands/auth.js +94 -0
  18. package/lib/commands/chat-session.js +489 -0
  19. package/lib/commands/chat-slash.js +415 -0
  20. package/lib/commands/chat-turn.js +669 -0
  21. package/lib/commands/chat.js +407 -0
  22. package/lib/commands/custom.js +157 -0
  23. package/lib/commands/history-utils.js +66 -0
  24. package/lib/commands/index.js +268 -0
  25. package/lib/commands/mcp.js +113 -0
  26. package/lib/commands/oneshot.js +193 -0
  27. package/lib/commands/registry.js +269 -0
  28. package/lib/commands/tasks.js +89 -0
  29. package/lib/compact.js +87 -0
  30. package/lib/config.js +360 -11
  31. package/lib/constants.js +401 -3
  32. package/lib/deny.js +199 -0
  33. package/lib/doctor.js +160 -0
  34. package/lib/headless.js +202 -0
  35. package/lib/hooks.js +286 -0
  36. package/lib/images.js +270 -0
  37. package/lib/internals.js +49 -0
  38. package/lib/mcp/boundary.js +131 -0
  39. package/lib/mcp/client.js +270 -0
  40. package/lib/mcp/oauth.js +134 -0
  41. package/lib/memory.js +209 -0
  42. package/lib/metrics.js +37 -2
  43. package/lib/payload.js +54 -0
  44. package/lib/permission-rules.js +401 -0
  45. package/lib/permissions.js +123 -26
  46. package/lib/pricing.js +67 -0
  47. package/lib/proc.js +62 -0
  48. package/lib/prompts.js +99 -8
  49. package/lib/sandbox.js +568 -0
  50. package/lib/sdk.js +328 -0
  51. package/lib/secrets.js +211 -0
  52. package/lib/skills.js +223 -0
  53. package/lib/subagents.js +516 -0
  54. package/lib/tool_registry.js +2862 -0
  55. package/lib/tool_specs.js +263 -9
  56. package/lib/tools.js +352 -1039
  57. package/lib/ui/anim.js +86 -0
  58. package/lib/ui/ansi.js +17 -27
  59. package/lib/ui/chat-history.js +253 -71
  60. package/lib/ui/create-ui.js +67 -24
  61. package/lib/ui/diff.js +90 -25
  62. package/lib/ui/file-activity.js +236 -0
  63. package/lib/ui/format.js +195 -29
  64. package/lib/ui/input-field.js +21 -11
  65. package/lib/ui/md-stream.js +234 -0
  66. package/lib/ui/render-operation.js +113 -0
  67. package/lib/ui/select.js +1 -4
  68. package/lib/ui/status-bar.js +146 -36
  69. package/lib/ui/stream.js +20 -13
  70. package/lib/ui/theme.js +190 -44
  71. package/lib/ui/tool-operation.js +190 -0
  72. package/lib/ui/utils.js +9 -5
  73. package/lib/ui/web-activity.js +270 -0
  74. package/lib/ui/writer.js +159 -45
  75. package/lib/ui.js +1 -1
  76. package/lib/verify.js +229 -0
  77. package/lib/web-extract.js +213 -0
  78. package/lib/web-summarize.js +68 -0
  79. package/package.json +19 -4
  80. package/scripts/lint.js +57 -0
  81. package/test/agent-loop.test.js +389 -0
  82. package/test/anim-driver.test.js +153 -0
  83. package/test/ask-user-display.test.js +226 -0
  84. package/test/ask-user-gate.test.js +231 -0
  85. package/test/background.test.js +414 -0
  86. package/test/chat-history-nocolor.test.js +155 -0
  87. package/test/chat-relogin.test.js +207 -0
  88. package/test/chat.test.js +114 -0
  89. package/test/checkpoints-agent.test.js +181 -0
  90. package/test/checkpoints.test.js +650 -0
  91. package/test/command-registry.test.js +160 -0
  92. package/test/compact.test.js +116 -0
  93. package/test/completion-lazy.test.js +52 -0
  94. package/test/config-merge.test.js +324 -0
  95. package/test/config-quarantine.test.js +128 -0
  96. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  97. package/test/config-write-guard-skip.test.js +46 -0
  98. package/test/config-write-guard.test.js +153 -0
  99. package/test/context-split.test.js +215 -0
  100. package/test/cost-doctor.test.js +142 -0
  101. package/test/custom-commands-chat.test.js +106 -0
  102. package/test/custom-commands.test.js +230 -0
  103. package/test/defer-detail-band.test.js +403 -0
  104. package/test/deny-windows.test.js +120 -0
  105. package/test/deny.test.js +83 -0
  106. package/test/detail-band-tab-flatten.test.js +242 -0
  107. package/test/download-allow-anywhere.test.js +66 -0
  108. package/test/download-confine.test.js +153 -0
  109. package/test/exec-diff.test.js +268 -0
  110. package/test/executors.test.js +599 -0
  111. package/test/extract-tool-calls.test.js +349 -0
  112. package/test/fetch-url-validation.test.js +219 -0
  113. package/test/file-activity.test.js +522 -0
  114. package/test/fixtures/tool-calls.js +57 -0
  115. package/test/fixtures/web-page.js +91 -0
  116. package/test/git-tools.test.js +384 -0
  117. package/test/grep-glob-serialize.test.js +242 -0
  118. package/test/grep-glob.test.js +268 -0
  119. package/test/grep-path-target.test.js +227 -0
  120. package/test/harness/README.md +57 -0
  121. package/test/harness/chat-harness.js +143 -0
  122. package/test/harness/memwarn-headless-child.js +65 -0
  123. package/test/harness/mock-llm.js +120 -0
  124. package/test/harness/mock-mcp-server.js +142 -0
  125. package/test/harness/sse-server.js +69 -0
  126. package/test/headless.test.js +348 -0
  127. package/test/history-utils.test.js +88 -0
  128. package/test/hooks-agent.test.js +238 -0
  129. package/test/hooks-verify-sandbox.test.js +232 -0
  130. package/test/hooks.test.js +216 -0
  131. package/test/http-get-user-agent.test.js +142 -0
  132. package/test/images-api.test.js +208 -0
  133. package/test/images.test.js +238 -0
  134. package/test/input-field-ctrl-o.test.js +37 -0
  135. package/test/live-height-physical.test.js +281 -0
  136. package/test/max-iterations.test.js +218 -0
  137. package/test/mcp-boundary.test.js +57 -0
  138. package/test/mcp-client.test.js +267 -0
  139. package/test/mcp-oauth.test.js +86 -0
  140. package/test/md-stream.test.js +183 -0
  141. package/test/memory-truncation-warning.test.js +222 -0
  142. package/test/memory.test.js +198 -0
  143. package/test/native-dispatch.test.js +409 -0
  144. package/test/native-live-narration.test.js +254 -0
  145. package/test/output-chokepoint.test.js +188 -0
  146. package/test/output-heredoc-leak.test.js +195 -0
  147. package/test/output-preview.test.js +245 -0
  148. package/test/path-guards.test.js +134 -0
  149. package/test/payload.test.js +99 -0
  150. package/test/permission-rules-agent.test.js +210 -0
  151. package/test/permission-rules.test.js +297 -0
  152. package/test/permissions.test.js +362 -0
  153. package/test/plan-mode.test.js +167 -0
  154. package/test/read-paginate.test.js +275 -0
  155. package/test/readonly-tools.test.js +177 -0
  156. package/test/render-operation.test.js +317 -0
  157. package/test/replay-descriptor-xml.test.js +216 -0
  158. package/test/replay-descriptor.test.js +189 -0
  159. package/test/replay-web-aggregate.test.js +291 -0
  160. package/test/replay-web-persist.test.js +241 -0
  161. package/test/result-cap.test.js +233 -0
  162. package/test/running-glyph-anim.test.js +111 -0
  163. package/test/sandbox-agent.test.js +147 -0
  164. package/test/sandbox-integration.test.js +216 -0
  165. package/test/sandbox.test.js +408 -0
  166. package/test/sdk.test.js +234 -0
  167. package/test/shell-output-cap.test.js +181 -0
  168. package/test/skills-chat.test.js +110 -0
  169. package/test/skills.test.js +295 -0
  170. package/test/smoke.test.js +68 -0
  171. package/test/status-bar-driver.test.js +93 -0
  172. package/test/status-bar-pause.test.js +164 -0
  173. package/test/status-bar-resync.test.js +188 -0
  174. package/test/stream-parser.test.js +171 -0
  175. package/test/subagents-agent.test.js +178 -0
  176. package/test/subagents.test.js +222 -0
  177. package/test/theme-palette.test.js +166 -0
  178. package/test/tool-registry.test.js +85 -0
  179. package/test/trim-budget.test.js +101 -0
  180. package/test/truncate-visible.test.js +78 -0
  181. package/test/verify-agent.test.js +317 -0
  182. package/test/verify.test.js +141 -0
  183. package/test/view-image.test.js +199 -0
  184. package/test/web-activity-ordering.test.js +203 -0
  185. package/test/web-activity.test.js +207 -0
  186. package/test/web-data-extraction-guidance.test.js +71 -0
  187. package/test/web-extract.test.js +185 -0
  188. package/test/web-fetch-agent.test.js +291 -0
  189. package/test/web-fetch-mode.test.js +193 -0
  190. package/test/web-search.test.js +380 -0
  191. package/lib/commands.js +0 -1438
  192. package/path +0 -1
package/lib/api.js CHANGED
@@ -6,6 +6,16 @@ const { URL } = require('url');
6
6
 
7
7
  const { buildToolsSchema, isUIActive } = require('./tools');
8
8
  const { TOOL_SPECS } = require('./tool_specs');
9
+ const { dynamicToolSpecs } = require('./tool_registry');
10
+ const { resolveApiKey } = require('./secrets');
11
+ const { applyPromptCaching, applyReasoningEffort } = require('./payload');
12
+ const {
13
+ messagesHaveImages,
14
+ countImages,
15
+ selectImageFormat,
16
+ resolveVisionCapability,
17
+ buildProviderMessages,
18
+ } = require('./images');
9
19
  const writer = require('./ui/writer');
10
20
  const messages = require('./ui/messages');
11
21
  const dbg = require('./debug');
@@ -66,6 +76,105 @@ function debugDumpMessages(msgs) {
66
76
  }
67
77
  }
68
78
 
79
+ // Strip client-only sibling keys from messages right before the wire. Today
80
+ // that is the Phase 6a `_display` descriptor core (persisted on native tool
81
+ // messages for replay fidelity). Returns the array unchanged when no message
82
+ // carries one, so the common path allocates nothing.
83
+ function stripInternalKeys(messages) {
84
+ if (!Array.isArray(messages) || !messages.some((m) => m && m._display !== undefined)) return messages;
85
+ return messages.map((m) => {
86
+ if (m && m._display !== undefined) {
87
+ const { _display, ...rest } = m;
88
+ return rest;
89
+ }
90
+ return m;
91
+ });
92
+ }
93
+
94
+ // Fit messages into tokenBudget tokens.
95
+ // Uses chars/4 — aligned with estimateTokens; a deliberate under-estimate
96
+ // for token-dense content (code, JSON, HTML) but consistent across the
97
+ // codebase.
98
+ //
99
+ // Always keeps: system prompt + first non-system message (original task).
100
+ // Drops intermediate messages oldest-first, then truncates the last tail
101
+ // message (typically a large tool result) if still over budget.
102
+ //
103
+ // Pure function (no closure dependencies) — lives at module scope so it can be
104
+ // unit-tested in isolation. Called from chatStream's proactive-trim and
105
+ // 400/413 self-healing paths.
106
+ function trimToTokenBudget(msgs, tokenBudget) {
107
+ const CHARS_PER_TOKEN = 4;
108
+ const system = msgs.filter((m) => m.role === 'system');
109
+ const nonSystem = msgs.filter((m) => m.role !== 'system');
110
+ if (nonSystem.length === 0) return [...system];
111
+
112
+ const pinned = nonSystem[0]; // original task — never dropped
113
+ let tail = nonSystem.slice(1);
114
+
115
+ const estimate = () => {
116
+ const all = tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
117
+ return Math.floor(JSON.stringify(all).length / CHARS_PER_TOKEN);
118
+ };
119
+
120
+ while (tail.length > 1 && estimate() > tokenBudget) {
121
+ tail = tail.slice(1);
122
+ }
123
+
124
+ if (tail.length === 1 && estimate() > tokenBudget) {
125
+ const msg = tail[0];
126
+ const otherChars = JSON.stringify([...system, pinned]).length;
127
+ const available = tokenBudget * CHARS_PER_TOKEN - otherChars - 200;
128
+ if (available > 0 && typeof msg.content === 'string' && msg.content.length > available) {
129
+ tail = [{ ...msg, content: '[…content truncated to fit model limit…]\n' + msg.content.slice(-available) }];
130
+ }
131
+ }
132
+
133
+ if (tail.length === 0 && estimate() > tokenBudget) {
134
+ const systemChars = JSON.stringify(system).length;
135
+ const available = tokenBudget * CHARS_PER_TOKEN - systemChars - 200;
136
+ if (available > 0 && typeof pinned.content === 'string' && pinned.content.length > available) {
137
+ return [...system, { ...pinned, content: '[…content truncated to fit model limit…]\n' + pinned.content.slice(-available) }];
138
+ }
139
+ }
140
+
141
+ return tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
142
+ }
143
+
144
+ // Estimate the context split for the counter (Variant B, display-only).
145
+ //
146
+ // The API returns usage.prompt_tokens PRE-SUMMED — it never breaks the prompt
147
+ // into base (system prompt + tool specs) vs working (history + tool results).
148
+ // So the split cannot be measured; it is ESTIMATED here from the assembled
149
+ // payload. Both halves use the SAME char/4 estimator so they sum consistently
150
+ // (the point of Variant B — no "real minus estimate" mixing where working would
151
+ // look measured but secretly carry the base estimate's error). The real
152
+ // prompt_tokens remains the authoritative anchor shown alongside this split.
153
+ //
154
+ // base = estimate(system messages) + estimate(serialized tool schema)
155
+ // working = estimate(every non-system message) ← the part that grows
156
+ //
157
+ // Recompute PER REQUEST (cheap): the base is NOT eternally constant — it shifts
158
+ // with native-vs-XML mode (tools live in payload.tools vs inside the system
159
+ // prompt), dynamic tools (MCP connecting/failing mid-session), and plan-mode
160
+ // toggling (PLAN_MODE_NOTICE). In XML mode `tools` is absent and the tool weight
161
+ // lives inside the system prompt string, so estimating the actual system message
162
+ // still captures it — base is never silently zero. Pure; unit-tested.
163
+ function estimateContextSplit(msgs, tools) {
164
+ let systemChars = 0;
165
+ let workingChars = 0;
166
+ for (const m of (Array.isArray(msgs) ? msgs : [])) {
167
+ const len = JSON.stringify(m == null ? '' : m).length;
168
+ if (m && m.role === 'system') systemChars += len;
169
+ else workingChars += len;
170
+ }
171
+ const toolChars = tools ? JSON.stringify(tools).length : 0;
172
+ return {
173
+ base: Math.floor((systemChars + toolChars) / 4),
174
+ working: Math.floor(workingChars / 4),
175
+ };
176
+ }
177
+
69
178
  function createApiClient({ getConfig, saveConfig, ui }) {
70
179
  const {
71
180
  BOLD,
@@ -252,6 +361,26 @@ function createApiClient({ getConfig, saveConfig, ui }) {
252
361
  });
253
362
  }
254
363
 
364
+ // Web search (Task W.2b). Calls the backend POST /api/search — which
365
+ // authenticates the Bearer token, queries SearXNG, and returns
366
+ // { results: [{title,url,snippet}, …] } (or an {error} envelope on failure,
367
+ // mapped to a thrown Error by requestJson). Modeled byte-for-byte on
368
+ // dashboardListModels: requireAuthToken() → requestJson(...). The optional
369
+ // `count` is forwarded so the backend can clamp it. The caller (the
370
+ // web_search tool) is responsible for catching every failure mode and
371
+ // surfacing a clean tool error — nothing here is special-cased.
372
+ function dashboardSearch(query, { count, timeout } = {}) {
373
+ const authToken = requireAuthToken();
374
+ const body = { query };
375
+ if (count != null) body.count = count;
376
+ return requestJson(dashboardUrl('/api/search'), {
377
+ method: 'POST',
378
+ timeout: timeout || 15000,
379
+ headers: { 'Authorization': `Bearer ${authToken}` },
380
+ body,
381
+ });
382
+ }
383
+
255
384
  function dashboardGetModelForCli(id) {
256
385
  const authToken = requireAuthToken();
257
386
  return requestJson(dashboardUrl(`/api/models/${encodeURIComponent(String(id))}/cli`), {
@@ -301,13 +430,33 @@ function createApiClient({ getConfig, saveConfig, ui }) {
301
430
  });
302
431
  }
303
432
 
304
- async function chatStream(messages, { model, temperature, maxTokens, linePrefix = '', showThink = false, onToken = null, silent = false, signal = null, onTrim = null, nativeTools = true } = {}) {
433
+ async function chatStream(messages, { model, temperature, maxTokens, linePrefix = '', showThink = false, onToken = null, onReasoning = null, silent = false, signal = null, onTrim = null, nativeTools = true } = {}) {
305
434
  // nativeTools is plumbed through for downstream use (tools param + tool_calls parsing); no behavior change yet.
306
435
  const config = getConfig();
307
436
  const resolvedModel = model || config.default_model;
308
437
 
309
438
  if (signal && signal.aborted) throw new Error('Aborted');
310
439
 
440
+ // Multimodal image input (Task 5.4). When any turn carries attached images,
441
+ // resolve the provider content-part shape (Anthropic-style vs OpenAI-style)
442
+ // and FAIL LOUD for a known text-only model — never silently drop the image
443
+ // from the payload (constraint #2). An unknown capability (null) proceeds and
444
+ // lets the endpoint reject cleanly.
445
+ const imagesPresent = messagesHaveImages(messages);
446
+ let imageFormat = null;
447
+ if (imagesPresent) {
448
+ imageFormat = selectImageFormat(config, resolvedModel);
449
+ const vision = resolveVisionCapability(config, resolvedModel);
450
+ if (vision === false) {
451
+ const n = countImages(messages);
452
+ throw new Error(
453
+ `Model "${resolvedModel}" is not vision-capable, but ${n} image${n === 1 ? '' : 's'} ` +
454
+ `${n === 1 ? 'was' : 'were'} attached. Select a vision-capable model, or set ` +
455
+ `vision:true on the model profile if this endpoint does accept images.`,
456
+ );
457
+ }
458
+ }
459
+
311
460
  let trimNotified = false;
312
461
  function notifyTrim(info) {
313
462
  if (trimNotified) return;
@@ -317,51 +466,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
317
466
  }
318
467
  }
319
468
 
320
- // Fit messages into tokenBudget tokens.
321
- // Uses chars/4 aligned with estimateTokens; a deliberate under-estimate
322
- // for token-dense content (code, JSON, HTML) but consistent across the
323
- // codebase.
324
- //
325
- // Always keeps: system prompt + first non-system message (original task).
326
- // Drops intermediate messages oldest-first, then truncates the last tail
327
- // message (typically a large tool result) if still over budget.
328
- function trimToTokenBudget(msgs, tokenBudget) {
329
- const CHARS_PER_TOKEN = 4;
330
- const system = msgs.filter((m) => m.role === 'system');
331
- const nonSystem = msgs.filter((m) => m.role !== 'system');
332
- if (nonSystem.length === 0) return [...system];
333
-
334
- const pinned = nonSystem[0]; // original task — never dropped
335
- let tail = nonSystem.slice(1);
336
-
337
- const estimate = () => {
338
- const all = tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
339
- return Math.floor(JSON.stringify(all).length / CHARS_PER_TOKEN);
340
- };
341
-
342
- while (tail.length > 1 && estimate() > tokenBudget) {
343
- tail = tail.slice(1);
344
- }
345
-
346
- if (tail.length === 1 && estimate() > tokenBudget) {
347
- const msg = tail[0];
348
- const otherChars = JSON.stringify([...system, pinned]).length;
349
- const available = tokenBudget * CHARS_PER_TOKEN - otherChars - 200;
350
- if (available > 0 && typeof msg.content === 'string' && msg.content.length > available) {
351
- tail = [{ ...msg, content: '[…content truncated to fit model limit…]\n' + msg.content.slice(-available) }];
352
- }
353
- }
354
-
355
- if (tail.length === 0 && estimate() > tokenBudget) {
356
- const systemChars = JSON.stringify(system).length;
357
- const available = tokenBudget * CHARS_PER_TOKEN - systemChars - 200;
358
- if (available > 0 && typeof pinned.content === 'string' && pinned.content.length > available) {
359
- return [...system, { ...pinned, content: '[…content truncated to fit model limit…]\n' + pinned.content.slice(-available) }];
360
- }
361
- }
362
-
363
- return tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
364
- }
469
+ // trimToTokenBudget is a pure, module-scope helper (lifted out of this
470
+ // closure in Task 1.1 so it can be unit-tested directly; body unchanged).
365
471
 
366
472
  // Proactive trim: prefer a limit learned from a prior 400 overflow; otherwise
367
473
  // fall back to config.context_length (with a ~10% safety margin) as a hint.
@@ -409,7 +515,10 @@ function createApiClient({ getConfig, saveConfig, ui }) {
409
515
  const callable = Object.fromEntries(
410
516
  Object.entries(TOOL_SPECS).filter(([, spec]) => !spec.wrapper)
411
517
  );
412
- payload.tools = buildToolsSchema(callable);
518
+ // Dynamic MCP tools (Task 3.3) advertise their schema here too, so the
519
+ // model can emit native tool_calls against `mcp__server__tool` names that
520
+ // dispatch through the same registry path as built-ins.
521
+ payload.tools = buildToolsSchema({ ...callable, ...dynamicToolSpecs() });
413
522
  payload.tool_choice = 'auto';
414
523
  }
415
524
 
@@ -418,14 +527,26 @@ function createApiClient({ getConfig, saveConfig, ui }) {
418
527
  async function doRequest(msgs) {
419
528
  if (dbg.isFile()) debugDumpMessages(msgs);
420
529
  validateToolCallInvariant(msgs);
421
- const reqPayload = { ...payload, messages: msgs };
530
+ // Transform any image-bearing turn into the provider-specific multimodal
531
+ // content[] shape right before the wire (Task 5.4); the internal `images`
532
+ // field never leaves the client. The Phase 6a `_display` descriptor sibling
533
+ // (persisted on native tool messages for replay) is likewise client-only —
534
+ // strip it here so it is never fed to the model.
535
+ const wireMsgs = stripInternalKeys(imagesPresent ? buildProviderMessages(msgs, imageFormat) : msgs);
536
+ const reqPayload = { ...payload, messages: wireMsgs };
537
+ // Optional payload augmentations (Task 2.7): reasoning_effort for models
538
+ // that support it, and prompt-caching markers on the stable prefix when
539
+ // the user has opted in (config.prompt_caching). Both no-op otherwise.
540
+ applyReasoningEffort(reqPayload, config.reasoning_effort, resolvedModel, { force: !!config.reasoning_effort_force });
541
+ applyPromptCaching(reqPayload, config.prompt_caching === true);
422
542
  const reqBody = JSON.stringify(reqPayload);
423
543
  const res = await httpRequest(endpoint, {
424
544
  method: 'POST',
425
545
  timeout: config.request_timeout_ms,
426
546
  headers: {
427
547
  'Content-Type': 'application/json',
428
- 'Authorization': `Bearer ${config.api_key}`,
548
+ // Precedence: SEMALT_API_KEY env → OS keychain → config.api_key.
549
+ 'Authorization': `Bearer ${resolveApiKey(config)}`,
429
550
  'Content-Length': Buffer.byteLength(reqBody),
430
551
  },
431
552
  signal,
@@ -592,10 +713,18 @@ function createApiClient({ getConfig, saveConfig, ui }) {
592
713
  };
593
714
  }
594
715
  const elapsedMs = Date.now() - startTime;
716
+ // Estimated base/working split (Variant B, display-only) computed from
717
+ // the payload ACTUALLY sent — trimmedMessages holds the final value
718
+ // after any 413/400-overflow retry, and payload.tools is present only in
719
+ // native mode (XML mode embeds tools in the system prompt, captured by
720
+ // the system-message estimate). Recomputed every request so it stays
721
+ // correct when MCP connects or plan mode toggles mid-session.
722
+ const contextEstimate = estimateContextSplit(trimmedMessages, payload.tools);
595
723
  resolve({
596
724
  content: fullText,
597
725
  toolCalls: nativeTools ? validToolCalls : [],
598
726
  usage,
727
+ context_estimate: contextEstimate,
599
728
  usage_from_provider: !!streamUsage,
600
729
  tool_calls_count: validToolCalls.length,
601
730
  finish_reason: streamFinishReason,
@@ -676,6 +805,14 @@ function createApiClient({ getConfig, saveConfig, ui }) {
676
805
  const uiActive = isUIActive();
677
806
  if (!inReasoning) {
678
807
  inReasoning = true;
808
+ // Live-narration safety signal (a): the model demonstrably uses
809
+ // the structured reasoning_content channel this turn, so any
810
+ // delta.content that follows is narration, not inlined reasoning.
811
+ // Fire once per stream so the UI can eager-open its live gate on
812
+ // the native rail. Failures here must never break the stream.
813
+ if (typeof onReasoning === 'function') {
814
+ try { onReasoning(); } catch { /* UI signal is best-effort */ }
815
+ }
679
816
  if (showThink && !uiActive) {
680
817
  // audit: allowed — non-TUI thinking output, interleaves with StreamRenderer sync writes.
681
818
  process.stdout.write(`\n ${FG_DARK}${DIM}⟨thinking⟩${RST}`);
@@ -773,7 +910,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
773
910
  timeout: config.request_timeout_ms,
774
911
  headers: {
775
912
  'Content-Type': 'application/json',
776
- 'Authorization': `Bearer ${config.api_key}`,
913
+ // Precedence: SEMALT_API_KEY env → OS keychain → config.api_key.
914
+ 'Authorization': `Bearer ${resolveApiKey(config)}`,
777
915
  'Content-Length': Buffer.byteLength(body),
778
916
  },
779
917
  }, body);
@@ -812,9 +950,55 @@ function createApiClient({ getConfig, saveConfig, ui }) {
812
950
  });
813
951
  }
814
952
 
953
+ // Quiet, non-streaming completion. Unlike chatSync it does NOT write to
954
+ // scrollback or route errors through the UI — it returns the assistant text
955
+ // or THROWS, so a programmatic caller (the web-fetch secondary summarizer,
956
+ // Task W.1) can decide its own fallback. No native tools, no streaming chrome.
957
+ async function chatComplete(messages, { model, temperature, signal } = {}) {
958
+ const config = getConfig();
959
+ const payload = {
960
+ model: model || config.default_model,
961
+ messages,
962
+ temperature: typeof temperature === 'number' ? temperature : config.temperature,
963
+ stream: false,
964
+ };
965
+ const body = JSON.stringify(payload);
966
+ const res = await httpRequest(apiUrl('/v1/chat/completions'), {
967
+ method: 'POST',
968
+ timeout: config.request_timeout_ms,
969
+ signal: signal || undefined,
970
+ headers: {
971
+ 'Content-Type': 'application/json',
972
+ 'Authorization': `Bearer ${resolveApiKey(config)}`,
973
+ 'Content-Length': Buffer.byteLength(body),
974
+ },
975
+ }, body);
976
+ return new Promise((resolve, reject) => {
977
+ let data = '';
978
+ res.setEncoding('utf8');
979
+ res.on('data', (chunk) => { data += chunk; });
980
+ res.on('end', () => {
981
+ if (res.statusCode !== 200) {
982
+ reject(new Error(`HTTP ${res.statusCode} — ${String(data).slice(0, 200)}`));
983
+ return;
984
+ }
985
+ try {
986
+ const parsed = JSON.parse(data);
987
+ const content = parsed && parsed.choices && parsed.choices[0] && parsed.choices[0].message
988
+ ? parsed.choices[0].message.content : '';
989
+ resolve(content || '');
990
+ } catch (error) {
991
+ reject(new Error(`Parse error: ${error.message}`));
992
+ }
993
+ });
994
+ res.on('error', reject);
995
+ });
996
+ }
997
+
815
998
  return {
816
999
  chatStream,
817
1000
  chatSync,
1001
+ chatComplete,
818
1002
  dashboardCreateChat,
819
1003
  dashboardGetChat,
820
1004
  dashboardGetModelForCli,
@@ -822,6 +1006,7 @@ function createApiClient({ getConfig, saveConfig, ui }) {
822
1006
  dashboardListModels,
823
1007
  dashboardLogout,
824
1008
  dashboardSaveMessages,
1009
+ dashboardSearch,
825
1010
  dashboardWhoAmI,
826
1011
  estimateTokens,
827
1012
  getCliLoginStatus,
@@ -832,4 +1017,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
832
1017
 
833
1018
  module.exports = {
834
1019
  createApiClient,
1020
+ // Exported for unit testing (Task 1.1). Pure helper, no runtime behavior change.
1021
+ trimToTokenBudget,
1022
+ // Exported for unit testing the split-context counter (Variant B). Pure helper.
1023
+ estimateContextSplit,
835
1024
  };
package/lib/args.js CHANGED
@@ -17,6 +17,12 @@ function parseArgs(argv) {
17
17
  case '--file':
18
18
  (opts.file = opts.file || []).push(argv[++i]);
19
19
  break;
20
+ case '--image':
21
+ // Multimodal image input (Task 5.4). Repeatable: attach one or more
22
+ // images (PNG/JPEG/WebP/GIF) to the user turn. Read through isPathSafe,
23
+ // size-checked, base64-encoded by the entry point (lib/images.js).
24
+ (opts.image = opts.image || []).push(argv[++i]);
25
+ break;
20
26
  case '-a':
21
27
  case '--analyze':
22
28
  opts.analyze = true;
@@ -24,6 +30,28 @@ function parseArgs(argv) {
24
30
  case '--dry-run':
25
31
  opts.dryRun = true;
26
32
  break;
33
+ case '-p':
34
+ case '--print':
35
+ opts.print = true;
36
+ break;
37
+ case '-b':
38
+ case '--background':
39
+ // Launch the task as a detached background process (Task 5.3). Used by
40
+ // `semalt-code run --background <prompt>`. The permission policy is fixed
41
+ // from the other flags at launch and cannot change after detach.
42
+ opts.background = true;
43
+ break;
44
+ case '--output-format': {
45
+ const v = argv[++i];
46
+ const allowed = ['text', 'json', 'stream-json'];
47
+ if (!allowed.includes(v)) {
48
+ process.stderr.write(`Error: --output-format must be one of ${allowed.join(', ')}.\n`);
49
+ process.exit(1);
50
+ }
51
+ opts.outputFormat = v;
52
+ opts.print = true; // selecting a machine format implies headless
53
+ break;
54
+ }
27
55
  case '--api-base':
28
56
  opts.apiBase = argv[++i];
29
57
  break;
@@ -55,8 +83,52 @@ function parseArgs(argv) {
55
83
  case '--readonly':
56
84
  opts.readonly = true;
57
85
  break;
58
- case '--new':
59
- opts.new = true;
86
+ case '--plan':
87
+ opts.plan = true;
88
+ break;
89
+ case '--no-verify':
90
+ // One-off skip of self-verification (Task 4.2) for this invocation, in
91
+ // BOTH advisory and enforcing modes. Threaded into runAgentLoop opts.
92
+ opts.noVerify = true;
93
+ break;
94
+ case '--max-iterations': {
95
+ // Cap on agent-loop iterations per turn. A positive integer caps the
96
+ // loop; 0 or 'unlimited' removes the cap (power-user choice). The value
97
+ // also flows through flagsConfigLayer (config.js) into config.max_iterations;
98
+ // it's consumed here so it isn't mis-parsed as a positional.
99
+ const v = argv[++i];
100
+ const ok = v !== undefined && (v === 'unlimited' || /^\d+$/.test(v));
101
+ if (!ok) {
102
+ process.stderr.write(`Error: --max-iterations requires a non-negative integer or "unlimited".\n`);
103
+ process.exit(1);
104
+ }
105
+ opts.maxIterations = v;
106
+ break;
107
+ }
108
+ case '--reasoning-effort':
109
+ // Consumed here so the value isn't mis-parsed as a positional; the
110
+ // runtime override flows through flagsConfigLayer (config.js).
111
+ opts.reasoningEffort = argv[++i];
112
+ break;
113
+ case '--prompt-caching':
114
+ opts.promptCaching = true;
115
+ break;
116
+ case '--allow-anywhere':
117
+ opts.allowAnywhere = true;
118
+ break;
119
+ case '--no-network':
120
+ // Binary network isolation (Task 4.4b): force kernel-level no-network for
121
+ // sandboxed commands (bwrap --unshare-net / Seatbelt deny network*). A
122
+ // human-only opt-in; the model can never reach it. The sandbox decision
123
+ // (lib/sandbox.js resolveSandboxedSpawn) reads the flag from argv directly,
124
+ // so this just records intent + keeps it out of the positional args.
125
+ opts.noNetwork = true;
126
+ break;
127
+ case '--dangerously-skip-permissions':
128
+ // The single explicit opt-out of ALL safety: disables the destructive
129
+ // command deny-list and the config-file read guard, and fully
130
+ // auto-approves every tool call. Pre-scanned in index.js too.
131
+ opts.dangerouslySkipPermissions = true;
60
132
  break;
61
133
  case '--show-think':
62
134
  opts.showThink = true;
package/lib/audit.js CHANGED
@@ -28,4 +28,26 @@ function logToolCall(tag, input, approved, resultStatus) {
28
28
  }
29
29
  }
30
30
 
31
- module.exports = { AUDIT_LOG, logToolCall };
31
+ // Checkpoint activity (Task 4.3). Recorded as a `checkpoint` row so the audit
32
+ // log shows when prior file state was snapshotted before a mutation (and on
33
+ // rewind). `seq` is the per-session checkpoint sequence number; `note` carries
34
+ // the action + affected path(s) or the rewind outcome. Like logToolCall this
35
+ // never throws.
36
+ function logCheckpoint(seq, note) {
37
+ try {
38
+ let noteStr = typeof note === 'string' ? note : JSON.stringify(note);
39
+ if (noteStr.length > 200) noteStr = noteStr.slice(0, 197) + '...';
40
+ const entry = JSON.stringify({
41
+ ts: new Date().toISOString(),
42
+ tag: 'checkpoint',
43
+ input: `checkpoint:${seq} ${noteStr}`,
44
+ approved: true,
45
+ result: 'ok',
46
+ });
47
+ fs.appendFileSync(AUDIT_LOG, entry + '\n');
48
+ } catch {
49
+ // never throw
50
+ }
51
+ }
52
+
53
+ module.exports = { AUDIT_LOG, logToolCall, logCheckpoint };