@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/ARCHITECTURE.md +6 -95
  4. package/CLAUDE.md +196 -316
  5. package/README.md +148 -4
  6. package/docs/ARCHITECTURE.md +1321 -0
  7. package/docs/CONFIG.md +340 -0
  8. package/docs/HISTORY.md +245 -0
  9. package/examples/embed.js +74 -0
  10. package/index.js +251 -10
  11. package/lib/agent.js +856 -120
  12. package/lib/api.js +239 -50
  13. package/lib/args.js +74 -2
  14. package/lib/audit.js +23 -1
  15. package/lib/background.js +584 -0
  16. package/lib/checkpoints.js +757 -0
  17. package/lib/commands/auth.js +94 -0
  18. package/lib/commands/chat-session.js +489 -0
  19. package/lib/commands/chat-slash.js +415 -0
  20. package/lib/commands/chat-turn.js +669 -0
  21. package/lib/commands/chat.js +407 -0
  22. package/lib/commands/custom.js +157 -0
  23. package/lib/commands/history-utils.js +66 -0
  24. package/lib/commands/index.js +268 -0
  25. package/lib/commands/mcp.js +113 -0
  26. package/lib/commands/oneshot.js +193 -0
  27. package/lib/commands/registry.js +269 -0
  28. package/lib/commands/tasks.js +89 -0
  29. package/lib/compact.js +87 -0
  30. package/lib/config.js +360 -11
  31. package/lib/constants.js +401 -3
  32. package/lib/deny.js +199 -0
  33. package/lib/doctor.js +160 -0
  34. package/lib/headless.js +202 -0
  35. package/lib/hooks.js +286 -0
  36. package/lib/images.js +270 -0
  37. package/lib/internals.js +49 -0
  38. package/lib/mcp/boundary.js +131 -0
  39. package/lib/mcp/client.js +270 -0
  40. package/lib/mcp/oauth.js +134 -0
  41. package/lib/memory.js +209 -0
  42. package/lib/metrics.js +37 -2
  43. package/lib/payload.js +54 -0
  44. package/lib/permission-rules.js +401 -0
  45. package/lib/permissions.js +123 -26
  46. package/lib/pricing.js +67 -0
  47. package/lib/proc.js +62 -0
  48. package/lib/prompts.js +99 -8
  49. package/lib/sandbox.js +568 -0
  50. package/lib/sdk.js +328 -0
  51. package/lib/secrets.js +211 -0
  52. package/lib/skills.js +223 -0
  53. package/lib/subagents.js +516 -0
  54. package/lib/tool_registry.js +2862 -0
  55. package/lib/tool_specs.js +263 -9
  56. package/lib/tools.js +352 -1039
  57. package/lib/ui/anim.js +86 -0
  58. package/lib/ui/ansi.js +17 -27
  59. package/lib/ui/chat-history.js +253 -71
  60. package/lib/ui/create-ui.js +67 -24
  61. package/lib/ui/diff.js +90 -25
  62. package/lib/ui/file-activity.js +236 -0
  63. package/lib/ui/format.js +195 -29
  64. package/lib/ui/input-field.js +21 -11
  65. package/lib/ui/md-stream.js +234 -0
  66. package/lib/ui/render-operation.js +113 -0
  67. package/lib/ui/select.js +1 -4
  68. package/lib/ui/status-bar.js +146 -36
  69. package/lib/ui/stream.js +20 -13
  70. package/lib/ui/theme.js +190 -44
  71. package/lib/ui/tool-operation.js +190 -0
  72. package/lib/ui/utils.js +9 -5
  73. package/lib/ui/web-activity.js +270 -0
  74. package/lib/ui/writer.js +159 -45
  75. package/lib/ui.js +1 -1
  76. package/lib/verify.js +229 -0
  77. package/lib/web-extract.js +213 -0
  78. package/lib/web-summarize.js +68 -0
  79. package/package.json +19 -4
  80. package/scripts/lint.js +57 -0
  81. package/test/agent-loop.test.js +389 -0
  82. package/test/anim-driver.test.js +153 -0
  83. package/test/ask-user-display.test.js +226 -0
  84. package/test/ask-user-gate.test.js +231 -0
  85. package/test/background.test.js +414 -0
  86. package/test/chat-history-nocolor.test.js +155 -0
  87. package/test/chat-relogin.test.js +207 -0
  88. package/test/chat.test.js +114 -0
  89. package/test/checkpoints-agent.test.js +181 -0
  90. package/test/checkpoints.test.js +650 -0
  91. package/test/command-registry.test.js +160 -0
  92. package/test/compact.test.js +116 -0
  93. package/test/completion-lazy.test.js +52 -0
  94. package/test/config-merge.test.js +324 -0
  95. package/test/config-quarantine.test.js +128 -0
  96. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  97. package/test/config-write-guard-skip.test.js +46 -0
  98. package/test/config-write-guard.test.js +153 -0
  99. package/test/context-split.test.js +215 -0
  100. package/test/cost-doctor.test.js +142 -0
  101. package/test/custom-commands-chat.test.js +106 -0
  102. package/test/custom-commands.test.js +230 -0
  103. package/test/defer-detail-band.test.js +403 -0
  104. package/test/deny-windows.test.js +120 -0
  105. package/test/deny.test.js +83 -0
  106. package/test/detail-band-tab-flatten.test.js +242 -0
  107. package/test/download-allow-anywhere.test.js +66 -0
  108. package/test/download-confine.test.js +153 -0
  109. package/test/exec-diff.test.js +268 -0
  110. package/test/executors.test.js +599 -0
  111. package/test/extract-tool-calls.test.js +349 -0
  112. package/test/fetch-url-validation.test.js +219 -0
  113. package/test/file-activity.test.js +522 -0
  114. package/test/fixtures/tool-calls.js +57 -0
  115. package/test/fixtures/web-page.js +91 -0
  116. package/test/git-tools.test.js +384 -0
  117. package/test/grep-glob-serialize.test.js +242 -0
  118. package/test/grep-glob.test.js +268 -0
  119. package/test/grep-path-target.test.js +227 -0
  120. package/test/harness/README.md +57 -0
  121. package/test/harness/chat-harness.js +143 -0
  122. package/test/harness/memwarn-headless-child.js +65 -0
  123. package/test/harness/mock-llm.js +120 -0
  124. package/test/harness/mock-mcp-server.js +142 -0
  125. package/test/harness/sse-server.js +69 -0
  126. package/test/headless.test.js +348 -0
  127. package/test/history-utils.test.js +88 -0
  128. package/test/hooks-agent.test.js +238 -0
  129. package/test/hooks-verify-sandbox.test.js +232 -0
  130. package/test/hooks.test.js +216 -0
  131. package/test/http-get-user-agent.test.js +142 -0
  132. package/test/images-api.test.js +208 -0
  133. package/test/images.test.js +238 -0
  134. package/test/input-field-ctrl-o.test.js +37 -0
  135. package/test/live-height-physical.test.js +281 -0
  136. package/test/max-iterations.test.js +218 -0
  137. package/test/mcp-boundary.test.js +57 -0
  138. package/test/mcp-client.test.js +267 -0
  139. package/test/mcp-oauth.test.js +86 -0
  140. package/test/md-stream.test.js +183 -0
  141. package/test/memory-truncation-warning.test.js +222 -0
  142. package/test/memory.test.js +198 -0
  143. package/test/native-dispatch.test.js +409 -0
  144. package/test/native-live-narration.test.js +254 -0
  145. package/test/output-chokepoint.test.js +188 -0
  146. package/test/output-heredoc-leak.test.js +195 -0
  147. package/test/output-preview.test.js +245 -0
  148. package/test/path-guards.test.js +134 -0
  149. package/test/payload.test.js +99 -0
  150. package/test/permission-rules-agent.test.js +210 -0
  151. package/test/permission-rules.test.js +297 -0
  152. package/test/permissions.test.js +362 -0
  153. package/test/plan-mode.test.js +167 -0
  154. package/test/read-paginate.test.js +275 -0
  155. package/test/readonly-tools.test.js +177 -0
  156. package/test/render-operation.test.js +317 -0
  157. package/test/replay-descriptor-xml.test.js +216 -0
  158. package/test/replay-descriptor.test.js +189 -0
  159. package/test/replay-web-aggregate.test.js +291 -0
  160. package/test/replay-web-persist.test.js +241 -0
  161. package/test/result-cap.test.js +233 -0
  162. package/test/running-glyph-anim.test.js +111 -0
  163. package/test/sandbox-agent.test.js +147 -0
  164. package/test/sandbox-integration.test.js +216 -0
  165. package/test/sandbox.test.js +408 -0
  166. package/test/sdk.test.js +234 -0
  167. package/test/shell-output-cap.test.js +181 -0
  168. package/test/skills-chat.test.js +110 -0
  169. package/test/skills.test.js +295 -0
  170. package/test/smoke.test.js +68 -0
  171. package/test/status-bar-driver.test.js +93 -0
  172. package/test/status-bar-pause.test.js +164 -0
  173. package/test/status-bar-resync.test.js +188 -0
  174. package/test/stream-parser.test.js +171 -0
  175. package/test/subagents-agent.test.js +178 -0
  176. package/test/subagents.test.js +222 -0
  177. package/test/theme-palette.test.js +166 -0
  178. package/test/tool-registry.test.js +85 -0
  179. package/test/trim-budget.test.js +101 -0
  180. package/test/truncate-visible.test.js +78 -0
  181. package/test/verify-agent.test.js +317 -0
  182. package/test/verify.test.js +141 -0
  183. package/test/view-image.test.js +199 -0
  184. package/test/web-activity-ordering.test.js +203 -0
  185. package/test/web-activity.test.js +207 -0
  186. package/test/web-data-extraction-guidance.test.js +71 -0
  187. package/test/web-extract.test.js +185 -0
  188. package/test/web-fetch-agent.test.js +291 -0
  189. package/test/web-fetch-mode.test.js +193 -0
  190. package/test/web-search.test.js +380 -0
  191. package/lib/commands.js +0 -1438
  192. package/path +0 -1
package/lib/constants.js CHANGED
@@ -1,5 +1,6 @@
1
1
  'use strict';
2
2
 
3
+ const fs = require('fs');
3
4
  const os = require('os');
4
5
  const path = require('path');
5
6
 
@@ -7,6 +8,170 @@ const PACKAGE_JSON = require('../package.json');
7
8
 
8
9
  const DEFAULT_API_TIMEOUT_MS = 15 * 60 * 1000;
9
10
 
11
+ // Default cap on agent-loop iterations per user turn. This is the single source
12
+ // of truth for the bound: it seeds DEFAULT_CONFIG.max_iterations (overridable via
13
+ // --max-iterations / config) and is also the factory default of runAgentLoop, so
14
+ // even a caller that omits the value gets a real cap rather than an unbounded
15
+ // loop. A config value of 0 (the "unlimited" sentinel) opts out — see
16
+ // resolveMaxIterations in lib/config.js.
17
+ const DEFAULT_MAX_ITERATIONS = 125;
18
+
19
+ // Self-verification (Task 4.2). When the agent declares a task done, an optional
20
+ // configured shell command (e.g. `npm test`) is run and its result fed back.
21
+ // These defaults seed DEFAULT_CONFIG.verify and lib/verify.js normalizeVerify.
22
+ // * DEFAULT_VERIFY_TIMEOUT_MS — a hung verify (e.g. a stuck `npm test`) must
23
+ // not hang the agent; on timeout the verify is treated as a failure.
24
+ // * DEFAULT_VERIFY_MAX_ATTEMPTS — in enforcing mode, the agent re-enters the
25
+ // loop on a failing verify, bounded by this many attempts (distinct from and
26
+ // much smaller than the coarse iteration cap) before terminating with the
27
+ // `verify_failed` stop reason.
28
+ const DEFAULT_VERIFY_TIMEOUT_MS = 120000;
29
+ const DEFAULT_VERIFY_MAX_ATTEMPTS = 3;
30
+
31
+ // Checkpoints & rewind (Task 4.3). Before each file-tool mutation the prior file
32
+ // state is snapshotted so `/rewind` (and `semalt-code rewind`) can restore it.
33
+ // These defaults seed DEFAULT_CONFIG.checkpoints and lib/checkpoints.js.
34
+ // * DEFAULT_CHECKPOINT_MAX_FILE_BYTES — a file larger than this is NOT
35
+ // snapshotted (recorded as rewind-unavailable) rather than silently
36
+ // exhausting disk. The mutation still proceeds.
37
+ // * DEFAULT_CHECKPOINT_MAX_PER_SESSION — retention cap; the oldest checkpoints
38
+ // in a session are pruned once this many exist.
39
+ const DEFAULT_CHECKPOINT_MAX_FILE_BYTES = 5 * 1024 * 1024;
40
+ const DEFAULT_CHECKPOINT_MAX_PER_SESSION = 100;
41
+
42
+ // Multimodal image input (Task 5.4). Cap on the RAW bytes of an attached image
43
+ // before base64-encoding (base64 inflates the payload ~33%). A clear pre-send
44
+ // error on exceed beats an opaque endpoint rejection of an oversized payload.
45
+ // 5 MB matches the common per-image ceiling of vision endpoints.
46
+ const DEFAULT_IMAGE_MAX_BYTES = 5 * 1024 * 1024;
47
+
48
+ // grep/glob context bound (Task W.5). The engine returns up to GREP_MAX_MATCHES
49
+ // (1000) / GLOB_MAX_FILES (5000) — internal caps that were NEVER a context bound
50
+ // (the structured result used to be dropped before reaching the model). These
51
+ // head_limit defaults are the real context bound: a fixed grep on a common
52
+ // pattern serializes at most this many items into context, with a truncation
53
+ // notice telling the agent how to narrow (refine the pattern, use
54
+ // output_mode="count"/"files_with_matches", or raise head_limit). Model-overridable
55
+ // per call via the head_limit parameter.
56
+ const DEFAULT_GREP_HEAD_LIMIT = 100;
57
+ const DEFAULT_GLOB_HEAD_LIMIT = 100;
58
+ // Token safety net for grep/glob serialized output (Task W.9). head_limit bounds
59
+ // the COUNT of matches/files, but — like the shell line cap (W.6) — a count bound
60
+ // does NOT bound tokens: 100 matches of a 5000-char minified line is ~125k tokens.
61
+ // Routing grep/glob through the shared boundToolOutput chokepoint adds this token
62
+ // backstop so a pathological huge-line result cannot blow context. A normal grep
63
+ // (head_limit short lines) is never clipped; this only catches the few-but-huge case.
64
+ const DEFAULT_GREP_GLOB_MAX_TOKENS = 10000;
65
+
66
+ // read_file pagination context bound (Task W.7). read_file used to dump the WHOLE
67
+ // file into context verbatim (the only guard was a hard byte refusal at
68
+ // max_file_size_kb) — worst case ~128k tokens for a 500 KB file. The fix mirrors
69
+ // the Claude Code standard: read the first page (a ~2000-LINE cap) + a PARTIAL
70
+ // notice telling the model the range shown, the total, and the start_line for the
71
+ // next page. start_line/end_line return an explicit slice (also line-capped, so a
72
+ // huge explicit range cannot dump everything). A token safety net (like W.6's)
73
+ // bounds the pathological few-but-enormous-lines case the line cap misses.
74
+ // - DEFAULT_READ_LINE_CAP: lines returned in one page (and the width of an
75
+ // explicit start_line window). Model-overridable by narrowing the range; the
76
+ // operator can tune via config.read_line_cap.
77
+ // - DEFAULT_READ_MAX_TOKENS: token ceiling on the page. Generous — a normal
78
+ // 2000-line source page (~10-20k tokens) is never clipped; only pages of
79
+ // pathologically long lines (minified JS, a single megabyte line) are.
80
+ // - DEFAULT_READ_MAX_FILE_KB: the BYTE BACKSTOP (max_file_size_kb default).
81
+ // Pagination — not this — is now the PRIMARY bound: a large line-readable
82
+ // file paginates instead of hard-refusing. This stays only as a sane upper
83
+ // ceiling so a multi-GB file is never slurped whole into memory. An operator
84
+ // can still lower it to hard-refuse smaller files.
85
+ const DEFAULT_READ_LINE_CAP = 2000;
86
+ const DEFAULT_READ_MAX_TOKENS = 25000;
87
+ const DEFAULT_READ_MAX_FILE_KB = 51200; // 50 MB
88
+
89
+ // Shell/exec output context bound (Task W.6). Shell stdout+stderr used to enter
90
+ // context VERBATIM and UNBOUNDED (`max_output_lines` was applied only in the UI
91
+ // renderer, never to the model-facing message) — the #1 context risk: one
92
+ // `seq 1 5000` / `cat` / test run / build could dump tens of thousands of tokens.
93
+ // The fix is a DOUBLE bound (like `download`'s byte-cap + path-guard):
94
+ // 1. Head+tail line cap of `max_output_lines` — keep the first OUTPUT_HEAD_RATIO
95
+ // of the budget and the last (1-ratio), eliding the middle. BOTH ends matter:
96
+ // the commands that ran at the top AND the pass/fail summary / error at the
97
+ // bottom. A head-only cap would drop the result — the most important part.
98
+ // 2. Token safety net (DEFAULT_OUTPUT_MAX_TOKENS) — a single line can be enormous
99
+ // (minified JS on one line, a `cat` of a binary), so the line cap alone does
100
+ // NOT bound tokens. Reuses the web pipeline's capToTokens after the line cap.
101
+ // The truncation notice teaches the now-working (Task W.5) redirect-to-file → grep
102
+ // pattern instead of re-running the command to see more. The exit code stays on
103
+ // its own line, so truncating output volume never hides the command's outcome.
104
+ const DEFAULT_MAX_OUTPUT_LINES = 50;
105
+ // Fraction of the line budget kept as HEAD (the rest is the tail). 0.6 → first 30
106
+ // + last 20 for the default 50-line budget.
107
+ const OUTPUT_HEAD_RATIO = 0.6;
108
+ // Token ceiling for shell output entering context. Comfortably above what a normal
109
+ // `max_output_lines` (50) run produces (~1-3k tokens), so it never interferes with
110
+ // line-bounded output — it only catches the pathological few-but-huge-lines case.
111
+ const DEFAULT_OUTPUT_MAX_TOKENS = 10000;
112
+
113
+ // File-edit diff display bound (execution-time diff rendering). Every mutating
114
+ // file edit (write/append/edit_file/replace_in_file) renders its diff at the
115
+ // moment it executes — decoupled from the permission modal, so an auto-approved
116
+ // edit shows its changes just like a manually-approved one. `diff_max_lines`
117
+ // caps the number of CHANGED (+/-) lines shown: a small edit (or a series of
118
+ // small edits) renders in full; one large edit shows head+tail of the changed
119
+ // lines with a `… K more changed lines (N total)` notice (mirrors the W.6
120
+ // shell head+tail discipline). Operator-overridable via config.diff_max_lines.
121
+ const DEFAULT_DIFF_MAX_LINES = 50;
122
+
123
+ // Collapsed output-preview bound (Output Refactor — Phase 5). Shell / MCP /
124
+ // subagent output is shown in MODERATION in the chrome: the first
125
+ // `shell_preview_lines` lines render below the result line, then a static
126
+ // `… N more lines` hint. There is no in-terminal way to expand — full viewing is
127
+ // deferred to the planned transcript viewer. This is DISPLAY-ONLY — the model
128
+ // still receives the full output via boundToolOutput; this cap never touches
129
+ // context.
130
+ // Diffs (file edits) are NOT subject to this — they render expanded to
131
+ // `diff_max_lines` (the user explicitly wants to see diffs). Operator-overridable
132
+ // via config.shell_preview_lines.
133
+ const DEFAULT_SHELL_PREVIEW_LINES = 5;
134
+
135
+ // MCP & subagent result context bounds (Task W.8). MCP tool results
136
+ // (lib/mcp/client.js mcpResultToText) and subagent final text (lib/subagents.js)
137
+ // were the last two UNBOUNDED paths into context — both are fenced as untrusted,
138
+ // but neither was token-capped. Bound both with the standard capToTokens
139
+ // (consistent with W.5–W.7), with DIFFERENT budgets reflecting their different
140
+ // nature:
141
+ // - DEFAULT_MCP_MAX_RESULT_TOKENS: STRICTER. An MCP result's size is
142
+ // THIRD-PARTY-controlled (the server decides) and the content is untrusted
143
+ // external data — the riskiest of the two. The cap is applied to the text
144
+ // BEFORE it is wrapped in the untrusted fence, so the truncation notice sits
145
+ // inside the fence with the capped content and the perimeter is unchanged
146
+ // (capping never weakens the fence).
147
+ // - DEFAULT_SUBAGENT_MAX_RESULT_TOKENS: GENEROUS. The subagent's final text is
148
+ // OUR OWN child's deliberate, synthesized answer (the child exists to return a
149
+ // result), so the cap is a safety net against a verbose child rather than the
150
+ // primary mechanism. Strictly larger than the MCP budget by design.
151
+ // Both are token safety nets — a normal MCP/subagent result is never clipped.
152
+ const DEFAULT_MCP_MAX_RESULT_TOKENS = 10000;
153
+ const DEFAULT_SUBAGENT_MAX_RESULT_TOKENS = 20000;
154
+
155
+ // Web-fetch pipeline (Task W.1). After http_get extracts a page's main content
156
+ // to Markdown, this token budget caps what enters the secondary summarizer /
157
+ // main context — REPLACING the blind byte cut as the context-protection
158
+ // mechanism (even clean Markdown can be large). Oversized content is truncated
159
+ // with a notice. ~6k tokens is generous for an article while staying well under
160
+ // a typical context window.
161
+ const DEFAULT_WEB_MAX_CONTENT_TOKENS = 6000;
162
+
163
+ // Web-fetch User-Agent (Task W.3 Part 2). http_get/download send no realistic
164
+ // User-Agent by default, so sites that reject empty/curl-like UAs answer 403/406
165
+ // (Wikipedia, the Guardian). A fixed, current mainstream-browser UA defeats that
166
+ // *simple* UA-based bot-blocking. It is a PARTIAL mitigation: Cloudflare /
167
+ // JS-challenges / IP-rate-limits still 403 (those need a headless browser, out of
168
+ // scope). Operator-overridable via config.web.user_agent; deliberately NOT
169
+ // model-selectable (no UA parameter in the tool spec) — letting the agent set a
170
+ // per-call UA would be an impersonation/evasion surface.
171
+ const DEFAULT_USER_AGENT =
172
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' +
173
+ '(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36';
174
+
10
175
  const DEFAULT_CONFIG = {
11
176
  api_base: 'http://127.0.0.1:8800',
12
177
  api_key: 'any',
@@ -25,18 +190,183 @@ const DEFAULT_CONFIG = {
25
190
  // adapters). Per-profile flag on models[] entries.
26
191
  models: [],
27
192
  theme: 'dark',
28
- max_file_size_kb: 512,
193
+ // Byte BACKSTOP for read_file (Task W.7). No longer the primary bound — a large
194
+ // line-readable file now PAGINATES (read_line_cap) rather than hard-refusing.
195
+ // This 50 MB ceiling only rules out slurping a multi-GB file whole into memory;
196
+ // lower it to hard-refuse smaller files. See DEFAULT_READ_MAX_FILE_KB.
197
+ max_file_size_kb: DEFAULT_READ_MAX_FILE_KB,
198
+ // read_file pagination (Task W.7). read_line_cap = lines returned per page (and
199
+ // the width of an explicit start_line window); read_max_tokens = the token
200
+ // safety net on the page (catches pathologically long lines). See the
201
+ // DEFAULT_READ_* constants above.
202
+ read_line_cap: DEFAULT_READ_LINE_CAP,
203
+ read_max_tokens: DEFAULT_READ_MAX_TOKENS,
29
204
  command_timeout_ms: 30000,
30
- max_output_lines: 50,
205
+ max_output_lines: DEFAULT_MAX_OUTPUT_LINES,
206
+ // Token safety net for shell/exec output entering context (Task W.6). The
207
+ // head+tail line cap (max_output_lines) bounds the common case; this bounds the
208
+ // pathological few-but-huge-lines case (a single minified line, a binary cat).
209
+ max_output_tokens: DEFAULT_OUTPUT_MAX_TOKENS,
210
+ // Changed-line cap for execution-time file-edit diffs (see DEFAULT_DIFF_MAX_LINES).
211
+ diff_max_lines: DEFAULT_DIFF_MAX_LINES,
212
+ // Preview-line count for shell/MCP/subagent output chrome (see
213
+ // DEFAULT_SHELL_PREVIEW_LINES). Display-only — never affects model context.
214
+ shell_preview_lines: DEFAULT_SHELL_PREVIEW_LINES,
215
+ // Max agent-loop iterations per user turn. A positive integer caps the loop;
216
+ // 0 means deliberately unbounded (power-user choice). Default 50.
217
+ max_iterations: DEFAULT_MAX_ITERATIONS,
31
218
  http_fetch_max_bytes: 262144,
219
+ // Web-fetch pipeline (Task W.1). http_get extracts a page's main content to
220
+ // Markdown (Readability + Turndown), then — by default — runs a SECONDARY
221
+ // cheap-LLM call that summarizes it, so only the compact result enters the
222
+ // main context (the raw page never does). `summarize` (default on) is the big
223
+ // token win; set false (or pass summarize="false"/raw="true" on a single
224
+ // http_get) to get the extracted Markdown verbatim when an exact snippet/quote
225
+ // matters. `summary_model` is the cheap model for that call ('' → the current
226
+ // model). `max_content_tokens` caps the extracted content fed to the
227
+ // summarizer / context. Tradeoff: summarization adds one LLM call per fetch
228
+ // (latency/cost) — the no-summary mode exists for when that isn't wanted.
229
+ web: {
230
+ summarize: true,
231
+ summary_model: '',
232
+ max_content_tokens: DEFAULT_WEB_MAX_CONTENT_TOKENS,
233
+ // Operator override for the http_get/download User-Agent. '' → the fixed
234
+ // DEFAULT_USER_AGENT. Human-only (not model-selectable). See DEFAULT_USER_AGENT.
235
+ user_agent: '',
236
+ },
237
+ // Multimodal image input (Task 5.4). `image_max_bytes` caps the RAW bytes of
238
+ // an attached image (base64 inflates ~33%); over the cap is a clear error, not
239
+ // an opaque endpoint failure. `image_format` forces the provider content-part
240
+ // shape ('anthropic' | 'openai'); '' selects it heuristically per endpoint
241
+ // (see lib/images.js selectImageFormat). PNG/JPEG/WebP/GIF only — PDF deferred,
242
+ // generation out of scope.
243
+ image_max_bytes: DEFAULT_IMAGE_MAX_BYTES,
244
+ image_format: '',
245
+ // Byte cap for the `download` tool (Pre-Task 4.0b). Bounds how large a file
246
+ // the agent may stream to disk; on exceeding it the stream is aborted and the
247
+ // partial file removed. 100 MB default — generous for real archives/binaries
248
+ // while still ruling out unbounded disk exhaustion.
249
+ download_max_bytes: 104857600,
250
+ // Proxy intent (Task 2.2): populated from HTTPS_PROXY/HTTP_PROXY in the env
251
+ // config layer. Read and exposed now; proxy-agent wiring in api.js is a later
252
+ // task. Empty string means "no proxy configured".
253
+ https_proxy: '',
254
+ http_proxy: '',
32
255
  show_token_count: true,
33
- show_cost: false,
256
+ // Cost display (Task 2.6). Enabled by default; when a model's price is unknown
257
+ // the UI shows "unknown" rather than a fake $0. `pricing` overrides/extends the
258
+ // built-in price table (lib/pricing.js): { "<model>": { input, output } } in
259
+ // USD per 1,000,000 tokens.
260
+ show_cost: true,
261
+ pricing: {},
34
262
  system_prompt_mode: 'system_role',
35
263
  repair_malformed_tool_xml: false,
264
+ // Prompt caching (Task 2.7): when true, send Anthropic-style cache_control
265
+ // markers on the stable prefix (system prompt + tools). Opt-in — only enable
266
+ // for endpoints that support it.
267
+ prompt_caching: false,
268
+ // reasoning_effort (Task 2.7): '' (off) | 'minimal' | 'low' | 'medium' | 'high'.
269
+ // Sent only for models that support it (heuristic in lib/payload.js), unless
270
+ // reasoning_effort_force is set for a model the heuristic misses.
271
+ reasoning_effort: '',
272
+ reasoning_effort_force: false,
273
+ // MCP (Task 3.2 scaffold; Task 3.3 builds the client that consumes it). Empty
274
+ // by default — `servers` maps a server name → its launch/connection spec. No
275
+ // MCP server is configured or connected until the user adds an entry here.
276
+ // `max_result_tokens` (Task W.8) is the STRICTER token cap on an MCP tool
277
+ // result before it enters context (it is third-party / untrusted), applied
278
+ // inside the untrusted fence. See DEFAULT_MCP_MAX_RESULT_TOKENS.
279
+ mcp: { servers: {}, max_result_tokens: DEFAULT_MCP_MAX_RESULT_TOKENS },
280
+ // Lifecycle hooks (Task 3.4). Map of event name → list of hook definitions
281
+ // (shell-command or static-prompt). Empty by default; normalizeConfig fills in
282
+ // an array per known event. See lib/hooks.js.
283
+ hooks: {},
284
+ // Per-pattern permission rules (Task 4.1). `{ rules: [ { tool, action, and one
285
+ // of pattern|path|url|match } ] }`. Empty by default. NOTE: enforcement reads
286
+ // the user and project layers SEPARATELY (lib/permission-rules.js loadRuleLayers)
287
+ // — the project layer can only NARROW the user posture, never widen it — so this
288
+ // shallow-merged value is for display/normalization only, not the security path.
289
+ permissions: { rules: [] },
290
+ // Self-verification (Task 4.2). When the agent declares a task done, optionally
291
+ // run `command` and feed the result back. `mode` advisory (default) never blocks
292
+ // the turn; `enforcing` returns the agent to the loop on a failing verify,
293
+ // bounded by `max_attempts` (then stopReason `verify_failed`). Success is
294
+ // exit-code based: exit == `expected_exit_code` (default 0) is a pass — stdout
295
+ // is never parsed for success patterns. No `command` configured → no-op.
296
+ verify: {
297
+ mode: 'advisory',
298
+ command: '',
299
+ timeout_ms: DEFAULT_VERIFY_TIMEOUT_MS,
300
+ expected_exit_code: 0,
301
+ max_attempts: DEFAULT_VERIFY_MAX_ATTEMPTS,
302
+ },
303
+ // Checkpoints & rewind (Task 4.3). Before each file-tool mutation the file's
304
+ // prior state is snapshotted under ~/.semalt-ai/checkpoints/<session>/ so
305
+ // `/rewind` can restore it. Covers file-tool mutations ONLY — shell side
306
+ // effects are not reversible (out of scope). `enabled` true by default;
307
+ // `max_file_bytes` is the per-file snapshot cap (oversize = rewind
308
+ // unavailable, not disk exhaustion); `max_per_session` is the retention cap
309
+ // (oldest pruned).
310
+ checkpoints: {
311
+ enabled: true,
312
+ max_file_bytes: DEFAULT_CHECKPOINT_MAX_FILE_BYTES,
313
+ max_per_session: DEFAULT_CHECKPOINT_MAX_PER_SESSION,
314
+ },
315
+ // OS-level filesystem + binary network sandbox for shell commands (Task 4.4 /
316
+ // 4.4b). `mode` is `auto` (use the kernel sandbox — Seatbelt on macOS,
317
+ // bubblewrap on Linux/WSL2 — when available) or `off` (a deliberate HUMAN
318
+ // opt-out; the agent can never set this). `failIfUnavailable` makes a
319
+ // missing/unusable sandbox a hard error instead of falling back to a human
320
+ // approval. `network` is `on` (the default — sandboxed commands keep normal
321
+ // egress so npm/pip work) or `off` (kernel-level no-network: --unshare-net /
322
+ // Seatbelt deny network*). Binary by design — no host proxy, no domain
323
+ // allowlist, no TLS interception. See lib/sandbox.js.
324
+ sandbox: {
325
+ mode: 'auto',
326
+ failIfUnavailable: false,
327
+ network: 'on',
328
+ },
36
329
  };
37
330
 
38
331
  const CONFIG_PATH = path.join(os.homedir(), '.semalt-ai', 'config.json');
39
332
 
333
+ // ---------------------------------------------------------------------------
334
+ // Protected-config set (Pre-Task 5.0b) — defined here ONCE.
335
+ // ---------------------------------------------------------------------------
336
+ //
337
+ // The directories whose contents drive host-privileged execution and therefore
338
+ // must never be written by the agent's file tools OR a sandboxed shell command —
339
+ // INCLUDING files that do not yet exist (the CVE-2026-25725 lesson). It is
340
+ // directory-based on purpose: a not-yet-created config.json / agents/*.md / hook
341
+ // file inside one of these dirs is covered without enumerating filenames.
342
+ //
343
+ // Two layers:
344
+ // * user — the whole ~/.semalt-ai dir (config.json, mcp.json, hooks,
345
+ // agents, commands, skills, memory.json, audit.log).
346
+ // * project — every .semalt dir from `cwd` up to the repo root (the directory
347
+ // holding .git is the last one checked — the SAME bound the config
348
+ // hierarchy uses, lib/config.js findProjectConfigPath). .semalt
349
+ // lives in the (writable) CWD and is attacker-controllable in a
350
+ // cloned repo, so it is the project equivalent of ~/.semalt-ai.
351
+ //
352
+ // Pure (impure only via fs.existsSync/walk at call time): both lib/tools.js (the
353
+ // host write guard isProtectedConfigPath) and lib/sandbox.js (the jail's
354
+ // protectedPaths) consume this so the set is single-sourced.
355
+ function protectedConfigDirs({ home = os.homedir(), cwd = process.cwd() } = {}) {
356
+ const dirs = [path.join(home, '.semalt-ai')];
357
+ let dir = cwd;
358
+ while (true) {
359
+ dirs.push(path.join(dir, '.semalt'));
360
+ let atRepoRoot = false;
361
+ try { atRepoRoot = fs.existsSync(path.join(dir, '.git')); } catch { /* unreadable — keep walking */ }
362
+ if (atRepoRoot) break;
363
+ const parent = path.dirname(dir);
364
+ if (parent === dir) break; // filesystem root
365
+ dir = parent;
366
+ }
367
+ return dirs;
368
+ }
369
+
40
370
  // TAG_REGISTRY classifies every XML tag the stream parser may encounter.
41
371
  // For 'tool'-type tags, the *parameter schema* lives in lib/tool_specs.js
42
372
  // (TOOL_SPECS) — that file is the single source of truth for argument
@@ -56,6 +386,7 @@ const TAG_REGISTRY = {
56
386
  exec: { type: 'tool', streaming: false, label: 'Running command' },
57
387
  shell: { type: 'tool', streaming: false, label: 'Running shell' },
58
388
  read_file: { type: 'tool', streaming: false, label: 'Reading file' },
389
+ view_image: { type: 'tool', streaming: false, label: 'Viewing image' },
59
390
  write_file: { type: 'tool', streaming: false, label: 'Writing file' },
60
391
  create_file: { type: 'tool', streaming: false, label: 'Creating file' },
61
392
  append_file: { type: 'tool', streaming: false, label: 'Appending to file' },
@@ -68,6 +399,8 @@ const TAG_REGISTRY = {
68
399
  file_stat: { type: 'tool', streaming: false, label: 'Inspecting file' },
69
400
  edit_file: { type: 'tool', streaming: false, label: 'Editing file' },
70
401
  search_files: { type: 'tool', streaming: false, label: 'Searching files' },
402
+ grep: { type: 'tool', streaming: false, label: 'Searching (grep)' },
403
+ glob: { type: 'tool', streaming: false, label: 'Finding files (glob)' },
71
404
  search_in_file: { type: 'tool', streaming: false, label: 'Searching in file' },
72
405
  replace_in_file: { type: 'tool', streaming: false, label: 'Replacing in file' },
73
406
  get_env: { type: 'tool', streaming: false, label: 'Reading env var' },
@@ -75,12 +408,26 @@ const TAG_REGISTRY = {
75
408
  download: { type: 'tool', streaming: false, label: 'Downloading' },
76
409
  upload: { type: 'tool', streaming: false, label: 'Uploading' },
77
410
  http_get: { type: 'tool', streaming: false, label: 'Fetching URL' },
411
+ web_search: { type: 'tool', streaming: false, label: 'Web search' },
78
412
  ask_user: { type: 'tool', streaming: false, label: 'Asking user' },
79
413
  store_memory: { type: 'tool', streaming: false, label: 'Storing memory' },
80
414
  recall_memory: { type: 'tool', streaming: false, label: 'Recalling memory' },
81
415
  list_memories: { type: 'tool', streaming: false, label: 'Listing memories' },
82
416
  system_info: { type: 'tool', streaming: false, label: 'Reading system info' },
83
417
 
418
+ // Native git tools (Task 5.1). Read-only: git_status/git_diff/git_log (and the
419
+ // list ops of git_branch/git_worktree). Mutating: git_add/git_commit/
420
+ // git_branch(create-delete)/git_checkout/git_worktree(add-remove). All shell
421
+ // out through the same sandbox + deny-list chokepoint as <shell>.
422
+ git_status: { type: 'tool', streaming: false, label: 'git status' },
423
+ git_diff: { type: 'tool', streaming: false, label: 'git diff' },
424
+ git_log: { type: 'tool', streaming: false, label: 'git log' },
425
+ git_add: { type: 'tool', streaming: false, label: 'git add' },
426
+ git_commit: { type: 'tool', streaming: false, label: 'git commit' },
427
+ git_branch: { type: 'tool', streaming: false, label: 'git branch' },
428
+ git_checkout: { type: 'tool', streaming: false, label: 'git checkout' },
429
+ git_worktree: { type: 'tool', streaming: false, label: 'git worktree' },
430
+
84
431
  // MiniMax-M2 native tool-call wrappers. `extractToolCalls` parses them into
85
432
  // internal calls; classifying them here keeps raw XML out of the UI stream.
86
433
  'minimax:tool_call': { type: 'tool', streaming: false, label: 'Using tool' },
@@ -126,6 +473,7 @@ const TAG_REGISTRY = {
126
473
  // at the top of the file) keeps the module boundary one-directional —
127
474
  // tool_specs.js does not depend on this file.
128
475
  const { TOOL_SPECS } = require('./tool_specs');
476
+ const { registryToolNames, TOOL_REGISTRY } = require('./tool_registry');
129
477
  (function assertToolSpecParity() {
130
478
  const registryTools = Object.entries(TAG_REGISTRY)
131
479
  .filter(([, v]) => v.type === 'tool')
@@ -140,11 +488,61 @@ const { TOOL_SPECS } = require('./tool_specs');
140
488
  if (extra.length) parts.push(`extra in TOOL_SPECS: ${extra.join(', ')}`);
141
489
  throw new Error(`TAG_REGISTRY ↔ TOOL_SPECS mismatch — ${parts.join('; ')}`);
142
490
  }
491
+
492
+ // Tool-registry completeness (Task 1.4): the runtime tool registry
493
+ // (lib/tool_registry.js) must resolve exactly the set of non-wrapper tools —
494
+ // every callable TOOL_SPECS entry has a registry entry, and vice-versa. This
495
+ // makes "add a tool = one registry entry + its spec" enforceable at load time.
496
+ const callableSpecs = Object.entries(TOOL_SPECS)
497
+ .filter(([, v]) => !v.wrapper)
498
+ .map(([k]) => k)
499
+ .sort();
500
+ const regTools = registryToolNames().slice().sort();
501
+ const regMissing = callableSpecs.filter((k) => !regTools.includes(k));
502
+ const regExtra = regTools.filter((k) => !callableSpecs.includes(k));
503
+ if (regMissing.length || regExtra.length) {
504
+ const parts = [];
505
+ if (regMissing.length) parts.push(`missing in TOOL_REGISTRY: ${regMissing.join(', ')}`);
506
+ if (regExtra.length) parts.push(`extra in TOOL_REGISTRY: ${regExtra.join(', ')}`);
507
+ throw new Error(`TOOL_SPECS ↔ TOOL_REGISTRY mismatch — ${parts.join('; ')}`);
508
+ }
509
+
510
+ // Executor/permission completeness (Task 1.4b): now that each tool carries its
511
+ // own executor and permission descriptor, every non-wrapper registry entry
512
+ // must provide BOTH — so "add a file tool = one registration object (parse +
513
+ // native + execute + permission)" is enforceable at load time.
514
+ const incomplete = TOOL_REGISTRY.filter(
515
+ (e) => typeof e.execute !== 'function' || typeof e.permission !== 'function',
516
+ ).map((e) => e.tool);
517
+ if (incomplete.length) {
518
+ throw new Error(`TOOL_REGISTRY entries missing execute/permission: ${incomplete.join(', ')}`);
519
+ }
143
520
  })();
144
521
 
145
522
  module.exports = {
146
523
  CONFIG_PATH,
524
+ protectedConfigDirs,
147
525
  DEFAULT_API_TIMEOUT_MS,
526
+ DEFAULT_MAX_ITERATIONS,
527
+ DEFAULT_VERIFY_TIMEOUT_MS,
528
+ DEFAULT_VERIFY_MAX_ATTEMPTS,
529
+ DEFAULT_CHECKPOINT_MAX_FILE_BYTES,
530
+ DEFAULT_CHECKPOINT_MAX_PER_SESSION,
531
+ DEFAULT_IMAGE_MAX_BYTES,
532
+ DEFAULT_GREP_HEAD_LIMIT,
533
+ DEFAULT_GLOB_HEAD_LIMIT,
534
+ DEFAULT_GREP_GLOB_MAX_TOKENS,
535
+ DEFAULT_READ_LINE_CAP,
536
+ DEFAULT_READ_MAX_TOKENS,
537
+ DEFAULT_READ_MAX_FILE_KB,
538
+ DEFAULT_MAX_OUTPUT_LINES,
539
+ OUTPUT_HEAD_RATIO,
540
+ DEFAULT_OUTPUT_MAX_TOKENS,
541
+ DEFAULT_DIFF_MAX_LINES,
542
+ DEFAULT_MCP_MAX_RESULT_TOKENS,
543
+ DEFAULT_SUBAGENT_MAX_RESULT_TOKENS,
544
+ DEFAULT_WEB_MAX_CONTENT_TOKENS,
545
+ DEFAULT_USER_AGENT,
148
546
  DEFAULT_CONFIG,
149
547
  PACKAGE_JSON,
150
548
  TAG_REGISTRY,