@semalt-ai/code 1.8.4 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/.claude/settings.local.json +8 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/CLAUDE.md +1588 -27
  4. package/README.md +147 -3
  5. package/TECHNICAL_DEBT.md +66 -0
  6. package/examples/embed.js +74 -0
  7. package/index.js +259 -11
  8. package/lib/agent.js +935 -181
  9. package/lib/api.js +308 -55
  10. package/lib/args.js +96 -2
  11. package/lib/audit.js +23 -1
  12. package/lib/background.js +584 -0
  13. package/lib/checkpoints.js +757 -0
  14. package/lib/commands/auth.js +94 -0
  15. package/lib/commands/chat-session.js +306 -0
  16. package/lib/commands/chat-slash.js +399 -0
  17. package/lib/commands/chat-turn.js +446 -0
  18. package/lib/commands/chat.js +403 -0
  19. package/lib/commands/custom.js +157 -0
  20. package/lib/commands/history-utils.js +66 -0
  21. package/lib/commands/index.js +268 -0
  22. package/lib/commands/mcp.js +113 -0
  23. package/lib/commands/oneshot.js +193 -0
  24. package/lib/commands/registry.js +269 -0
  25. package/lib/commands/tasks.js +89 -0
  26. package/lib/compact.js +87 -0
  27. package/lib/config.js +346 -11
  28. package/lib/constants.js +372 -3
  29. package/lib/debug.js +106 -0
  30. package/lib/deny.js +199 -0
  31. package/lib/doctor.js +160 -0
  32. package/lib/headless.js +167 -0
  33. package/lib/hooks.js +286 -0
  34. package/lib/images.js +264 -0
  35. package/lib/internals.js +49 -0
  36. package/lib/mcp/boundary.js +131 -0
  37. package/lib/mcp/client.js +270 -0
  38. package/lib/mcp/oauth.js +134 -0
  39. package/lib/memory.js +209 -0
  40. package/lib/metrics.js +37 -2
  41. package/lib/payload.js +54 -0
  42. package/lib/permission-rules.js +401 -0
  43. package/lib/permissions.js +100 -10
  44. package/lib/pricing.js +67 -0
  45. package/lib/proc.js +158 -0
  46. package/lib/prompts.js +88 -8
  47. package/lib/sandbox.js +568 -0
  48. package/lib/sdk.js +328 -0
  49. package/lib/secrets.js +211 -0
  50. package/lib/skills.js +223 -0
  51. package/lib/subagents.js +516 -0
  52. package/lib/tool_registry.js +2558 -0
  53. package/lib/tool_specs.js +236 -9
  54. package/lib/tools.js +370 -944
  55. package/lib/ui/chat-history.js +19 -1
  56. package/lib/ui/format.js +101 -6
  57. package/lib/ui/input-field.js +16 -7
  58. package/lib/ui/status-bar.js +79 -11
  59. package/lib/ui/terminal.js +10 -4
  60. package/lib/ui/theme.js +1 -0
  61. package/lib/ui/web-activity.js +218 -0
  62. package/lib/ui/writer.js +7 -9
  63. package/lib/verify.js +229 -0
  64. package/lib/web-extract.js +213 -0
  65. package/lib/web-summarize.js +68 -0
  66. package/package.json +19 -4
  67. package/scripts/lint.js +57 -0
  68. package/test/agent-loop.test.js +389 -0
  69. package/test/background.test.js +414 -0
  70. package/test/chat.test.js +114 -0
  71. package/test/checkpoints-agent.test.js +181 -0
  72. package/test/checkpoints.test.js +650 -0
  73. package/test/command-registry.test.js +160 -0
  74. package/test/compact.test.js +116 -0
  75. package/test/completion-lazy.test.js +52 -0
  76. package/test/config-merge.test.js +324 -0
  77. package/test/config-quarantine.test.js +128 -0
  78. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  79. package/test/config-write-guard-skip.test.js +46 -0
  80. package/test/config-write-guard.test.js +153 -0
  81. package/test/context-split.test.js +215 -0
  82. package/test/cost-doctor.test.js +142 -0
  83. package/test/custom-commands-chat.test.js +106 -0
  84. package/test/custom-commands.test.js +230 -0
  85. package/test/deny-windows.test.js +120 -0
  86. package/test/deny.test.js +83 -0
  87. package/test/download-allow-anywhere.test.js +66 -0
  88. package/test/download-confine.test.js +153 -0
  89. package/test/executors.test.js +362 -0
  90. package/test/extract-tool-calls.test.js +315 -0
  91. package/test/fetch-url-validation.test.js +219 -0
  92. package/test/fixtures/tool-calls.js +57 -0
  93. package/test/fixtures/web-page.js +91 -0
  94. package/test/git-tools.test.js +384 -0
  95. package/test/grep-glob-serialize.test.js +242 -0
  96. package/test/grep-glob.test.js +268 -0
  97. package/test/harness/README.md +57 -0
  98. package/test/harness/chat-harness.js +142 -0
  99. package/test/harness/memwarn-headless-child.js +65 -0
  100. package/test/harness/mock-llm.js +120 -0
  101. package/test/harness/mock-mcp-server.js +142 -0
  102. package/test/harness/sse-server.js +69 -0
  103. package/test/headless.test.js +203 -0
  104. package/test/history-utils.test.js +88 -0
  105. package/test/hooks-agent.test.js +238 -0
  106. package/test/hooks-verify-sandbox.test.js +232 -0
  107. package/test/hooks.test.js +216 -0
  108. package/test/http-get-user-agent.test.js +142 -0
  109. package/test/images-api.test.js +208 -0
  110. package/test/images.test.js +238 -0
  111. package/test/max-iterations.test.js +216 -0
  112. package/test/mcp-boundary.test.js +57 -0
  113. package/test/mcp-client.test.js +267 -0
  114. package/test/mcp-oauth.test.js +86 -0
  115. package/test/memory-truncation-warning.test.js +222 -0
  116. package/test/memory.test.js +198 -0
  117. package/test/native-dispatch.test.js +356 -0
  118. package/test/output-chokepoint.test.js +188 -0
  119. package/test/path-guards.test.js +134 -0
  120. package/test/payload.test.js +99 -0
  121. package/test/permission-rules-agent.test.js +210 -0
  122. package/test/permission-rules.test.js +297 -0
  123. package/test/permissions.test.js +163 -0
  124. package/test/plan-mode.test.js +167 -0
  125. package/test/read-paginate.test.js +275 -0
  126. package/test/readonly-tools.test.js +177 -0
  127. package/test/result-cap.test.js +233 -0
  128. package/test/sandbox-agent.test.js +147 -0
  129. package/test/sandbox-integration.test.js +216 -0
  130. package/test/sandbox.test.js +408 -0
  131. package/test/sdk.test.js +234 -0
  132. package/test/shell-output-cap.test.js +181 -0
  133. package/test/skills-chat.test.js +110 -0
  134. package/test/skills.test.js +295 -0
  135. package/test/smoke.test.js +68 -0
  136. package/test/status-bar-pause.test.js +164 -0
  137. package/test/stream-parser.test.js +147 -0
  138. package/test/subagents-agent.test.js +178 -0
  139. package/test/subagents.test.js +222 -0
  140. package/test/tool-registry.test.js +85 -0
  141. package/test/trim-budget.test.js +101 -0
  142. package/test/verify-agent.test.js +317 -0
  143. package/test/verify.test.js +141 -0
  144. package/test/web-activity-ordering.test.js +194 -0
  145. package/test/web-activity.test.js +207 -0
  146. package/test/web-data-extraction-guidance.test.js +71 -0
  147. package/test/web-extract.test.js +185 -0
  148. package/test/web-fetch-agent.test.js +291 -0
  149. package/test/web-fetch-mode.test.js +193 -0
  150. package/test/web-search.test.js +380 -0
  151. package/lib/commands.js +0 -1288
package/lib/constants.js CHANGED
@@ -1,5 +1,6 @@
1
1
  'use strict';
2
2
 
3
+ const fs = require('fs');
3
4
  const os = require('os');
4
5
  const path = require('path');
5
6
 
@@ -7,6 +8,148 @@ const PACKAGE_JSON = require('../package.json');
7
8
 
8
9
  const DEFAULT_API_TIMEOUT_MS = 15 * 60 * 1000;
9
10
 
11
+ // Default cap on agent-loop iterations per user turn. This is the single source
12
+ // of truth for the bound: it seeds DEFAULT_CONFIG.max_iterations (overridable via
13
+ // --max-iterations / config) and is also the factory default of runAgentLoop, so
14
+ // even a caller that omits the value gets a real cap rather than an unbounded
15
+ // loop. A config value of 0 (the "unlimited" sentinel) opts out — see
16
+ // resolveMaxIterations in lib/config.js.
17
+ const DEFAULT_MAX_ITERATIONS = 50;
18
+
19
+ // Self-verification (Task 4.2). When the agent declares a task done, an optional
20
+ // configured shell command (e.g. `npm test`) is run and its result fed back.
21
+ // These defaults seed DEFAULT_CONFIG.verify and lib/verify.js normalizeVerify.
22
+ // * DEFAULT_VERIFY_TIMEOUT_MS — a hung verify (e.g. a stuck `npm test`) must
23
+ // not hang the agent; on timeout the verify is treated as a failure.
24
+ // * DEFAULT_VERIFY_MAX_ATTEMPTS — in enforcing mode, the agent re-enters the
25
+ // loop on a failing verify, bounded by this many attempts (distinct from and
26
+ // much smaller than the coarse iteration cap) before terminating with the
27
+ // `verify_failed` stop reason.
28
+ const DEFAULT_VERIFY_TIMEOUT_MS = 120000;
29
+ const DEFAULT_VERIFY_MAX_ATTEMPTS = 3;
30
+
31
+ // Checkpoints & rewind (Task 4.3). Before each file-tool mutation the prior file
32
+ // state is snapshotted so `/rewind` (and `semalt-code rewind`) can restore it.
33
+ // These defaults seed DEFAULT_CONFIG.checkpoints and lib/checkpoints.js.
34
+ // * DEFAULT_CHECKPOINT_MAX_FILE_BYTES — a file larger than this is NOT
35
+ // snapshotted (recorded as rewind-unavailable) rather than silently
36
+ // exhausting disk. The mutation still proceeds.
37
+ // * DEFAULT_CHECKPOINT_MAX_PER_SESSION — retention cap; the oldest checkpoints
38
+ // in a session are pruned once this many exist.
39
+ const DEFAULT_CHECKPOINT_MAX_FILE_BYTES = 5 * 1024 * 1024;
40
+ const DEFAULT_CHECKPOINT_MAX_PER_SESSION = 100;
41
+
42
+ // Multimodal image input (Task 5.4). Cap on the RAW bytes of an attached image
43
+ // before base64-encoding (base64 inflates the payload ~33%). A clear pre-send
44
+ // error on exceed beats an opaque endpoint rejection of an oversized payload.
45
+ // 5 MB matches the common per-image ceiling of vision endpoints.
46
+ const DEFAULT_IMAGE_MAX_BYTES = 5 * 1024 * 1024;
47
+
48
+ // grep/glob context bound (Task W.5). The engine returns up to GREP_MAX_MATCHES
49
+ // (1000) / GLOB_MAX_FILES (5000) — internal caps that were NEVER a context bound
50
+ // (the structured result used to be dropped before reaching the model). These
51
+ // head_limit defaults are the real context bound: a fixed grep on a common
52
+ // pattern serializes at most this many items into context, with a truncation
53
+ // notice telling the agent how to narrow (refine the pattern, use
54
+ // output_mode="count"/"files_with_matches", or raise head_limit). Model-overridable
55
+ // per call via the head_limit parameter.
56
+ const DEFAULT_GREP_HEAD_LIMIT = 100;
57
+ const DEFAULT_GLOB_HEAD_LIMIT = 100;
58
+ // Token safety net for grep/glob serialized output (Task W.9). head_limit bounds
59
+ // the COUNT of matches/files, but — like the shell line cap (W.6) — a count bound
60
+ // does NOT bound tokens: 100 matches of a 5000-char minified line is ~125k tokens.
61
+ // Routing grep/glob through the shared boundToolOutput chokepoint adds this token
62
+ // backstop so a pathological huge-line result cannot blow context. A normal grep
63
+ // (head_limit short lines) is never clipped; this only catches the few-but-huge case.
64
+ const DEFAULT_GREP_GLOB_MAX_TOKENS = 10000;
65
+
66
+ // read_file pagination context bound (Task W.7). read_file used to dump the WHOLE
67
+ // file into context verbatim (the only guard was a hard byte refusal at
68
+ // max_file_size_kb) — worst case ~128k tokens for a 500 KB file. The fix mirrors
69
+ // the Claude Code standard: read the first page (a ~2000-LINE cap) + a PARTIAL
70
+ // notice telling the model the range shown, the total, and the start_line for the
71
+ // next page. start_line/end_line return an explicit slice (also line-capped, so a
72
+ // huge explicit range cannot dump everything). A token safety net (like W.6's)
73
+ // bounds the pathological few-but-enormous-lines case the line cap misses.
74
+ // - DEFAULT_READ_LINE_CAP: lines returned in one page (and the width of an
75
+ // explicit start_line window). Model-overridable by narrowing the range; the
76
+ // operator can tune via config.read_line_cap.
77
+ // - DEFAULT_READ_MAX_TOKENS: token ceiling on the page. Generous — a normal
78
+ // 2000-line source page (~10-20k tokens) is never clipped; only pages of
79
+ // pathologically long lines (minified JS, a single megabyte line) are.
80
+ // - DEFAULT_READ_MAX_FILE_KB: the BYTE BACKSTOP (max_file_size_kb default).
81
+ // Pagination — not this — is now the PRIMARY bound: a large line-readable
82
+ // file paginates instead of hard-refusing. This stays only as a sane upper
83
+ // ceiling so a multi-GB file is never slurped whole into memory. An operator
84
+ // can still lower it to hard-refuse smaller files.
85
+ const DEFAULT_READ_LINE_CAP = 2000;
86
+ const DEFAULT_READ_MAX_TOKENS = 25000;
87
+ const DEFAULT_READ_MAX_FILE_KB = 51200; // 50 MB
88
+
89
+ // Shell/exec output context bound (Task W.6). Shell stdout+stderr used to enter
90
+ // context VERBATIM and UNBOUNDED (`max_output_lines` was applied only in the UI
91
+ // renderer, never to the model-facing message) — the #1 context risk: one
92
+ // `seq 1 5000` / `cat` / test run / build could dump tens of thousands of tokens.
93
+ // The fix is a DOUBLE bound (like `download`'s byte-cap + path-guard):
94
+ // 1. Head+tail line cap of `max_output_lines` — keep the first OUTPUT_HEAD_RATIO
95
+ // of the budget and the last (1-ratio), eliding the middle. BOTH ends matter:
96
+ // the commands that ran at the top AND the pass/fail summary / error at the
97
+ // bottom. A head-only cap would drop the result — the most important part.
98
+ // 2. Token safety net (DEFAULT_OUTPUT_MAX_TOKENS) — a single line can be enormous
99
+ // (minified JS on one line, a `cat` of a binary), so the line cap alone does
100
+ // NOT bound tokens. Reuses the web pipeline's capToTokens after the line cap.
101
+ // The truncation notice teaches the now-working (Task W.5) redirect-to-file → grep
102
+ // pattern instead of re-running the command to see more. The exit code stays on
103
+ // its own line, so truncating output volume never hides the command's outcome.
104
+ const DEFAULT_MAX_OUTPUT_LINES = 50;
105
+ // Fraction of the line budget kept as HEAD (the rest is the tail). 0.6 → first 30
106
+ // + last 20 for the default 50-line budget.
107
+ const OUTPUT_HEAD_RATIO = 0.6;
108
+ // Token ceiling for shell output entering context. Comfortably above what a normal
109
+ // `max_output_lines` (50) run produces (~1-3k tokens), so it never interferes with
110
+ // line-bounded output — it only catches the pathological few-but-huge-lines case.
111
+ const DEFAULT_OUTPUT_MAX_TOKENS = 10000;
112
+
113
+ // MCP & subagent result context bounds (Task W.8). MCP tool results
114
+ // (lib/mcp/client.js mcpResultToText) and subagent final text (lib/subagents.js)
115
+ // were the last two UNBOUNDED paths into context — both are fenced as untrusted,
116
+ // but neither was token-capped. Bound both with the standard capToTokens
117
+ // (consistent with W.5–W.7), with DIFFERENT budgets reflecting their different
118
+ // nature:
119
+ // - DEFAULT_MCP_MAX_RESULT_TOKENS: STRICTER. An MCP result's size is
120
+ // THIRD-PARTY-controlled (the server decides) and the content is untrusted
121
+ // external data — the riskiest of the two. The cap is applied to the text
122
+ // BEFORE it is wrapped in the untrusted fence, so the truncation notice sits
123
+ // inside the fence with the capped content and the perimeter is unchanged
124
+ // (capping never weakens the fence).
125
+ // - DEFAULT_SUBAGENT_MAX_RESULT_TOKENS: GENEROUS. The subagent's final text is
126
+ // OUR OWN child's deliberate, synthesized answer (the child exists to return a
127
+ // result), so the cap is a safety net against a verbose child rather than the
128
+ // primary mechanism. Strictly larger than the MCP budget by design.
129
+ // Both are token safety nets — a normal MCP/subagent result is never clipped.
130
+ const DEFAULT_MCP_MAX_RESULT_TOKENS = 10000;
131
+ const DEFAULT_SUBAGENT_MAX_RESULT_TOKENS = 20000;
132
+
133
+ // Web-fetch pipeline (Task W.1). After http_get extracts a page's main content
134
+ // to Markdown, this token budget caps what enters the secondary summarizer /
135
+ // main context — REPLACING the blind byte cut as the context-protection
136
+ // mechanism (even clean Markdown can be large). Oversized content is truncated
137
+ // with a notice. ~6k tokens is generous for an article while staying well under
138
+ // a typical context window.
139
+ const DEFAULT_WEB_MAX_CONTENT_TOKENS = 6000;
140
+
141
+ // Web-fetch User-Agent (Task W.3 Part 2). http_get/download send no realistic
142
+ // User-Agent by default, so sites that reject empty/curl-like UAs answer 403/406
143
+ // (Wikipedia, the Guardian). A fixed, current mainstream-browser UA defeats that
144
+ // *simple* UA-based bot-blocking. It is a PARTIAL mitigation: Cloudflare /
145
+ // JS-challenges / IP-rate-limits still 403 (those need a headless browser, out of
146
+ // scope). Operator-overridable via config.web.user_agent; deliberately NOT
147
+ // model-selectable (no UA parameter in the tool spec) — letting the agent set a
148
+ // per-call UA would be an impersonation/evasion surface.
149
+ const DEFAULT_USER_AGENT =
150
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' +
151
+ '(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36';
152
+
10
153
  const DEFAULT_CONFIG = {
11
154
  api_base: 'http://127.0.0.1:8800',
12
155
  api_key: 'any',
@@ -25,18 +168,178 @@ const DEFAULT_CONFIG = {
25
168
  // adapters). Per-profile flag on models[] entries.
26
169
  models: [],
27
170
  theme: 'dark',
28
- max_file_size_kb: 512,
171
+ // Byte BACKSTOP for read_file (Task W.7). No longer the primary bound — a large
172
+ // line-readable file now PAGINATES (read_line_cap) rather than hard-refusing.
173
+ // This 50 MB ceiling only rules out slurping a multi-GB file whole into memory;
174
+ // lower it to hard-refuse smaller files. See DEFAULT_READ_MAX_FILE_KB.
175
+ max_file_size_kb: DEFAULT_READ_MAX_FILE_KB,
176
+ // read_file pagination (Task W.7). read_line_cap = lines returned per page (and
177
+ // the width of an explicit start_line window); read_max_tokens = the token
178
+ // safety net on the page (catches pathologically long lines). See the
179
+ // DEFAULT_READ_* constants above.
180
+ read_line_cap: DEFAULT_READ_LINE_CAP,
181
+ read_max_tokens: DEFAULT_READ_MAX_TOKENS,
29
182
  command_timeout_ms: 30000,
30
- max_output_lines: 50,
183
+ max_output_lines: DEFAULT_MAX_OUTPUT_LINES,
184
+ // Token safety net for shell/exec output entering context (Task W.6). The
185
+ // head+tail line cap (max_output_lines) bounds the common case; this bounds the
186
+ // pathological few-but-huge-lines case (a single minified line, a binary cat).
187
+ max_output_tokens: DEFAULT_OUTPUT_MAX_TOKENS,
188
+ // Max agent-loop iterations per user turn. A positive integer caps the loop;
189
+ // 0 means deliberately unbounded (power-user choice). Default 50.
190
+ max_iterations: DEFAULT_MAX_ITERATIONS,
31
191
  http_fetch_max_bytes: 262144,
192
+ // Web-fetch pipeline (Task W.1). http_get extracts a page's main content to
193
+ // Markdown (Readability + Turndown), then — by default — runs a SECONDARY
194
+ // cheap-LLM call that summarizes it, so only the compact result enters the
195
+ // main context (the raw page never does). `summarize` (default on) is the big
196
+ // token win; set false (or pass summarize="false"/raw="true" on a single
197
+ // http_get) to get the extracted Markdown verbatim when an exact snippet/quote
198
+ // matters. `summary_model` is the cheap model for that call ('' → the current
199
+ // model). `max_content_tokens` caps the extracted content fed to the
200
+ // summarizer / context. Tradeoff: summarization adds one LLM call per fetch
201
+ // (latency/cost) — the no-summary mode exists for when that isn't wanted.
202
+ web: {
203
+ summarize: true,
204
+ summary_model: '',
205
+ max_content_tokens: DEFAULT_WEB_MAX_CONTENT_TOKENS,
206
+ // Operator override for the http_get/download User-Agent. '' → the fixed
207
+ // DEFAULT_USER_AGENT. Human-only (not model-selectable). See DEFAULT_USER_AGENT.
208
+ user_agent: '',
209
+ },
210
+ // Multimodal image input (Task 5.4). `image_max_bytes` caps the RAW bytes of
211
+ // an attached image (base64 inflates ~33%); over the cap is a clear error, not
212
+ // an opaque endpoint failure. `image_format` forces the provider content-part
213
+ // shape ('anthropic' | 'openai'); '' selects it heuristically per endpoint
214
+ // (see lib/images.js selectImageFormat). PNG/JPEG/WebP/GIF only — PDF deferred,
215
+ // generation out of scope.
216
+ image_max_bytes: DEFAULT_IMAGE_MAX_BYTES,
217
+ image_format: '',
218
+ // Byte cap for the `download` tool (Pre-Task 4.0b). Bounds how large a file
219
+ // the agent may stream to disk; on exceeding it the stream is aborted and the
220
+ // partial file removed. 100 MB default — generous for real archives/binaries
221
+ // while still ruling out unbounded disk exhaustion.
222
+ download_max_bytes: 104857600,
223
+ // Proxy intent (Task 2.2): populated from HTTPS_PROXY/HTTP_PROXY in the env
224
+ // config layer. Read and exposed now; proxy-agent wiring in api.js is a later
225
+ // task. Empty string means "no proxy configured".
226
+ https_proxy: '',
227
+ http_proxy: '',
32
228
  show_token_count: true,
33
- show_cost: false,
229
+ // Cost display (Task 2.6). Enabled by default; when a model's price is unknown
230
+ // the UI shows "unknown" rather than a fake $0. `pricing` overrides/extends the
231
+ // built-in price table (lib/pricing.js): { "<model>": { input, output } } in
232
+ // USD per 1,000,000 tokens.
233
+ show_cost: true,
234
+ pricing: {},
34
235
  system_prompt_mode: 'system_role',
35
236
  repair_malformed_tool_xml: false,
237
+ // Prompt caching (Task 2.7): when true, send Anthropic-style cache_control
238
+ // markers on the stable prefix (system prompt + tools). Opt-in — only enable
239
+ // for endpoints that support it.
240
+ prompt_caching: false,
241
+ // reasoning_effort (Task 2.7): '' (off) | 'minimal' | 'low' | 'medium' | 'high'.
242
+ // Sent only for models that support it (heuristic in lib/payload.js), unless
243
+ // reasoning_effort_force is set for a model the heuristic misses.
244
+ reasoning_effort: '',
245
+ reasoning_effort_force: false,
246
+ // MCP (Task 3.2 scaffold; Task 3.3 builds the client that consumes it). Empty
247
+ // by default — `servers` maps a server name → its launch/connection spec. No
248
+ // MCP server is configured or connected until the user adds an entry here.
249
+ // `max_result_tokens` (Task W.8) is the STRICTER token cap on an MCP tool
250
+ // result before it enters context (it is third-party / untrusted), applied
251
+ // inside the untrusted fence. See DEFAULT_MCP_MAX_RESULT_TOKENS.
252
+ mcp: { servers: {}, max_result_tokens: DEFAULT_MCP_MAX_RESULT_TOKENS },
253
+ // Lifecycle hooks (Task 3.4). Map of event name → list of hook definitions
254
+ // (shell-command or static-prompt). Empty by default; normalizeConfig fills in
255
+ // an array per known event. See lib/hooks.js.
256
+ hooks: {},
257
+ // Per-pattern permission rules (Task 4.1). `{ rules: [ { tool, action, and one
258
+ // of pattern|path|url|match } ] }`. Empty by default. NOTE: enforcement reads
259
+ // the user and project layers SEPARATELY (lib/permission-rules.js loadRuleLayers)
260
+ // — the project layer can only NARROW the user posture, never widen it — so this
261
+ // shallow-merged value is for display/normalization only, not the security path.
262
+ permissions: { rules: [] },
263
+ // Self-verification (Task 4.2). When the agent declares a task done, optionally
264
+ // run `command` and feed the result back. `mode` advisory (default) never blocks
265
+ // the turn; `enforcing` returns the agent to the loop on a failing verify,
266
+ // bounded by `max_attempts` (then stopReason `verify_failed`). Success is
267
+ // exit-code based: exit == `expected_exit_code` (default 0) is a pass — stdout
268
+ // is never parsed for success patterns. No `command` configured → no-op.
269
+ verify: {
270
+ mode: 'advisory',
271
+ command: '',
272
+ timeout_ms: DEFAULT_VERIFY_TIMEOUT_MS,
273
+ expected_exit_code: 0,
274
+ max_attempts: DEFAULT_VERIFY_MAX_ATTEMPTS,
275
+ },
276
+ // Checkpoints & rewind (Task 4.3). Before each file-tool mutation the file's
277
+ // prior state is snapshotted under ~/.semalt-ai/checkpoints/<session>/ so
278
+ // `/rewind` can restore it. Covers file-tool mutations ONLY — shell side
279
+ // effects are not reversible (out of scope). `enabled` true by default;
280
+ // `max_file_bytes` is the per-file snapshot cap (oversize = rewind
281
+ // unavailable, not disk exhaustion); `max_per_session` is the retention cap
282
+ // (oldest pruned).
283
+ checkpoints: {
284
+ enabled: true,
285
+ max_file_bytes: DEFAULT_CHECKPOINT_MAX_FILE_BYTES,
286
+ max_per_session: DEFAULT_CHECKPOINT_MAX_PER_SESSION,
287
+ },
288
+ // OS-level filesystem + binary network sandbox for shell commands (Task 4.4 /
289
+ // 4.4b). `mode` is `auto` (use the kernel sandbox — Seatbelt on macOS,
290
+ // bubblewrap on Linux/WSL2 — when available) or `off` (a deliberate HUMAN
291
+ // opt-out; the agent can never set this). `failIfUnavailable` makes a
292
+ // missing/unusable sandbox a hard error instead of falling back to a human
293
+ // approval. `network` is `on` (the default — sandboxed commands keep normal
294
+ // egress so npm/pip work) or `off` (kernel-level no-network: --unshare-net /
295
+ // Seatbelt deny network*). Binary by design — no host proxy, no domain
296
+ // allowlist, no TLS interception. See lib/sandbox.js.
297
+ sandbox: {
298
+ mode: 'auto',
299
+ failIfUnavailable: false,
300
+ network: 'on',
301
+ },
36
302
  };
37
303
 
38
304
  const CONFIG_PATH = path.join(os.homedir(), '.semalt-ai', 'config.json');
39
305
 
306
+ // ---------------------------------------------------------------------------
307
+ // Protected-config set (Pre-Task 5.0b) — defined here ONCE.
308
+ // ---------------------------------------------------------------------------
309
+ //
310
+ // The directories whose contents drive host-privileged execution and therefore
311
+ // must never be written by the agent's file tools OR a sandboxed shell command —
312
+ // INCLUDING files that do not yet exist (the CVE-2026-25725 lesson). It is
313
+ // directory-based on purpose: a not-yet-created config.json / agents/*.md / hook
314
+ // file inside one of these dirs is covered without enumerating filenames.
315
+ //
316
+ // Two layers:
317
+ // * user — the whole ~/.semalt-ai dir (config.json, mcp.json, hooks,
318
+ // agents, commands, skills, memory.json, audit.log).
319
+ // * project — every .semalt dir from `cwd` up to the repo root (the directory
320
+ // holding .git is the last one checked — the SAME bound the config
321
+ // hierarchy uses, lib/config.js findProjectConfigPath). .semalt
322
+ // lives in the (writable) CWD and is attacker-controllable in a
323
+ // cloned repo, so it is the project equivalent of ~/.semalt-ai.
324
+ //
325
+ // Pure (impure only via fs.existsSync/walk at call time): both lib/tools.js (the
326
+ // host write guard isProtectedConfigPath) and lib/sandbox.js (the jail's
327
+ // protectedPaths) consume this so the set is single-sourced.
328
+ function protectedConfigDirs({ home = os.homedir(), cwd = process.cwd() } = {}) {
329
+ const dirs = [path.join(home, '.semalt-ai')];
330
+ let dir = cwd;
331
+ while (true) {
332
+ dirs.push(path.join(dir, '.semalt'));
333
+ let atRepoRoot = false;
334
+ try { atRepoRoot = fs.existsSync(path.join(dir, '.git')); } catch { /* unreadable — keep walking */ }
335
+ if (atRepoRoot) break;
336
+ const parent = path.dirname(dir);
337
+ if (parent === dir) break; // filesystem root
338
+ dir = parent;
339
+ }
340
+ return dirs;
341
+ }
342
+
40
343
  // TAG_REGISTRY classifies every XML tag the stream parser may encounter.
41
344
  // For 'tool'-type tags, the *parameter schema* lives in lib/tool_specs.js
42
345
  // (TOOL_SPECS) — that file is the single source of truth for argument
@@ -68,6 +371,8 @@ const TAG_REGISTRY = {
68
371
  file_stat: { type: 'tool', streaming: false, label: 'Inspecting file' },
69
372
  edit_file: { type: 'tool', streaming: false, label: 'Editing file' },
70
373
  search_files: { type: 'tool', streaming: false, label: 'Searching files' },
374
+ grep: { type: 'tool', streaming: false, label: 'Searching (grep)' },
375
+ glob: { type: 'tool', streaming: false, label: 'Finding files (glob)' },
71
376
  search_in_file: { type: 'tool', streaming: false, label: 'Searching in file' },
72
377
  replace_in_file: { type: 'tool', streaming: false, label: 'Replacing in file' },
73
378
  get_env: { type: 'tool', streaming: false, label: 'Reading env var' },
@@ -75,12 +380,26 @@ const TAG_REGISTRY = {
75
380
  download: { type: 'tool', streaming: false, label: 'Downloading' },
76
381
  upload: { type: 'tool', streaming: false, label: 'Uploading' },
77
382
  http_get: { type: 'tool', streaming: false, label: 'Fetching URL' },
383
+ web_search: { type: 'tool', streaming: false, label: 'Web search' },
78
384
  ask_user: { type: 'tool', streaming: false, label: 'Asking user' },
79
385
  store_memory: { type: 'tool', streaming: false, label: 'Storing memory' },
80
386
  recall_memory: { type: 'tool', streaming: false, label: 'Recalling memory' },
81
387
  list_memories: { type: 'tool', streaming: false, label: 'Listing memories' },
82
388
  system_info: { type: 'tool', streaming: false, label: 'Reading system info' },
83
389
 
390
+ // Native git tools (Task 5.1). Read-only: git_status/git_diff/git_log (and the
391
+ // list ops of git_branch/git_worktree). Mutating: git_add/git_commit/
392
+ // git_branch(create-delete)/git_checkout/git_worktree(add-remove). All shell
393
+ // out through the same sandbox + deny-list chokepoint as <shell>.
394
+ git_status: { type: 'tool', streaming: false, label: 'git status' },
395
+ git_diff: { type: 'tool', streaming: false, label: 'git diff' },
396
+ git_log: { type: 'tool', streaming: false, label: 'git log' },
397
+ git_add: { type: 'tool', streaming: false, label: 'git add' },
398
+ git_commit: { type: 'tool', streaming: false, label: 'git commit' },
399
+ git_branch: { type: 'tool', streaming: false, label: 'git branch' },
400
+ git_checkout: { type: 'tool', streaming: false, label: 'git checkout' },
401
+ git_worktree: { type: 'tool', streaming: false, label: 'git worktree' },
402
+
84
403
  // MiniMax-M2 native tool-call wrappers. `extractToolCalls` parses them into
85
404
  // internal calls; classifying them here keeps raw XML out of the UI stream.
86
405
  'minimax:tool_call': { type: 'tool', streaming: false, label: 'Using tool' },
@@ -126,6 +445,7 @@ const TAG_REGISTRY = {
126
445
  // at the top of the file) keeps the module boundary one-directional —
127
446
  // tool_specs.js does not depend on this file.
128
447
  const { TOOL_SPECS } = require('./tool_specs');
448
+ const { registryToolNames, TOOL_REGISTRY } = require('./tool_registry');
129
449
  (function assertToolSpecParity() {
130
450
  const registryTools = Object.entries(TAG_REGISTRY)
131
451
  .filter(([, v]) => v.type === 'tool')
@@ -140,11 +460,60 @@ const { TOOL_SPECS } = require('./tool_specs');
140
460
  if (extra.length) parts.push(`extra in TOOL_SPECS: ${extra.join(', ')}`);
141
461
  throw new Error(`TAG_REGISTRY ↔ TOOL_SPECS mismatch — ${parts.join('; ')}`);
142
462
  }
463
+
464
+ // Tool-registry completeness (Task 1.4): the runtime tool registry
465
+ // (lib/tool_registry.js) must resolve exactly the set of non-wrapper tools —
466
+ // every callable TOOL_SPECS entry has a registry entry, and vice-versa. This
467
+ // makes "add a tool = one registry entry + its spec" enforceable at load time.
468
+ const callableSpecs = Object.entries(TOOL_SPECS)
469
+ .filter(([, v]) => !v.wrapper)
470
+ .map(([k]) => k)
471
+ .sort();
472
+ const regTools = registryToolNames().slice().sort();
473
+ const regMissing = callableSpecs.filter((k) => !regTools.includes(k));
474
+ const regExtra = regTools.filter((k) => !callableSpecs.includes(k));
475
+ if (regMissing.length || regExtra.length) {
476
+ const parts = [];
477
+ if (regMissing.length) parts.push(`missing in TOOL_REGISTRY: ${regMissing.join(', ')}`);
478
+ if (regExtra.length) parts.push(`extra in TOOL_REGISTRY: ${regExtra.join(', ')}`);
479
+ throw new Error(`TOOL_SPECS ↔ TOOL_REGISTRY mismatch — ${parts.join('; ')}`);
480
+ }
481
+
482
+ // Executor/permission completeness (Task 1.4b): now that each tool carries its
483
+ // own executor and permission descriptor, every non-wrapper registry entry
484
+ // must provide BOTH — so "add a file tool = one registration object (parse +
485
+ // native + execute + permission)" is enforceable at load time.
486
+ const incomplete = TOOL_REGISTRY.filter(
487
+ (e) => typeof e.execute !== 'function' || typeof e.permission !== 'function',
488
+ ).map((e) => e.tool);
489
+ if (incomplete.length) {
490
+ throw new Error(`TOOL_REGISTRY entries missing execute/permission: ${incomplete.join(', ')}`);
491
+ }
143
492
  })();
144
493
 
145
494
  module.exports = {
146
495
  CONFIG_PATH,
496
+ protectedConfigDirs,
147
497
  DEFAULT_API_TIMEOUT_MS,
498
+ DEFAULT_MAX_ITERATIONS,
499
+ DEFAULT_VERIFY_TIMEOUT_MS,
500
+ DEFAULT_VERIFY_MAX_ATTEMPTS,
501
+ DEFAULT_CHECKPOINT_MAX_FILE_BYTES,
502
+ DEFAULT_CHECKPOINT_MAX_PER_SESSION,
503
+ DEFAULT_IMAGE_MAX_BYTES,
504
+ DEFAULT_GREP_HEAD_LIMIT,
505
+ DEFAULT_GLOB_HEAD_LIMIT,
506
+ DEFAULT_GREP_GLOB_MAX_TOKENS,
507
+ DEFAULT_READ_LINE_CAP,
508
+ DEFAULT_READ_MAX_TOKENS,
509
+ DEFAULT_READ_MAX_FILE_KB,
510
+ DEFAULT_MAX_OUTPUT_LINES,
511
+ OUTPUT_HEAD_RATIO,
512
+ DEFAULT_OUTPUT_MAX_TOKENS,
513
+ DEFAULT_MCP_MAX_RESULT_TOKENS,
514
+ DEFAULT_SUBAGENT_MAX_RESULT_TOKENS,
515
+ DEFAULT_WEB_MAX_CONTENT_TOKENS,
516
+ DEFAULT_USER_AGENT,
148
517
  DEFAULT_CONFIG,
149
518
  PACKAGE_JSON,
150
519
  TAG_REGISTRY,
package/lib/debug.js ADDED
@@ -0,0 +1,106 @@
1
+ 'use strict';
2
+
3
+ // Two mutually-exclusive debug modes, configured once at startup from the
4
+ // CLI flags (--debug or --debug-file <path>).
5
+ //
6
+ // off — no debug output anywhere.
7
+ // simple — visible inline. Basic per-iteration info routed through
8
+ // writer.scrollback so the TUI keeps working (no SSE dumps,
9
+ // no per-chunk noise).
10
+ // file — every debug call (basic AND extended) is written to a file.
11
+ // Nothing debug-related goes to stdout. The TUI stays clean.
12
+ //
13
+ // Two log functions with a clear semantic split:
14
+ //
15
+ // log(line) — "always-on" debug. Visible in simple mode (scrollback)
16
+ // and file mode (file). Silent in off mode.
17
+ // logExtended(line) — extended traces (raw SSE, request bodies, delta
18
+ // accumulators). Visible only in file mode.
19
+ //
20
+ // File-mode lines are formatted as `[ISO-timestamp] <line>\n` so they're
21
+ // greppable and tail-friendly.
22
+
23
+ const fs = require('fs');
24
+
25
+ let mode = 'off';
26
+ let fileStream = null;
27
+
28
+ function init({ debug, debugFile } = {}) {
29
+ if (debug && debugFile) {
30
+ // Belt-and-braces: cli.js (args parser) errors out before this is ever
31
+ // reached. Throw rather than silently coerce so any internal misuse is
32
+ // surfaced loudly.
33
+ throw new Error('debug and debugFile are mutually exclusive');
34
+ }
35
+ if (debugFile) {
36
+ mode = 'file';
37
+ fileStream = fs.createWriteStream(debugFile, { flags: 'a' });
38
+ const ts = new Date().toISOString();
39
+ try {
40
+ fileStream.write(`\n[${ts}] [session] semalt-code debug session start pid=${process.pid}\n`);
41
+ } catch {}
42
+ } else if (debug) {
43
+ mode = 'simple';
44
+ } else {
45
+ mode = 'off';
46
+ }
47
+ }
48
+
49
+ function isActive() { return mode !== 'off'; }
50
+ function isSimple() { return mode === 'simple'; }
51
+ function isFile() { return mode === 'file'; }
52
+ function getMode() { return mode; }
53
+
54
+ function _writeFile(line) {
55
+ if (!fileStream) return;
56
+ const ts = new Date().toISOString();
57
+ try { fileStream.write(`[${ts}] ${line}\n`); } catch {}
58
+ }
59
+
60
+ // "Always-on" debug — visible in simple mode (scrollback) and file mode (file).
61
+ // Silent in off mode. Multi-line input gets one timestamp per line in file mode
62
+ // so each line stays greppable.
63
+ function log(line) {
64
+ if (mode === 'off') return;
65
+ const s = String(line);
66
+ if (mode === 'simple') {
67
+ // Lazy-require to avoid a require cycle: writer pulls in this module
68
+ // for its own drift diagnostic.
69
+ const writer = require('./ui/writer');
70
+ writer.scrollback(s);
71
+ } else {
72
+ for (const l of s.split('\n')) _writeFile(l);
73
+ }
74
+ }
75
+
76
+ // Extended-only debug — visible in file mode only. Used for high-volume
77
+ // per-chunk traces (raw SSE, request body dumps, accumulator state) that
78
+ // would shred the TUI if printed inline.
79
+ function logExtended(line) {
80
+ if (mode !== 'file') return;
81
+ const s = String(line);
82
+ for (const l of s.split('\n')) _writeFile(l);
83
+ }
84
+
85
+ function close() {
86
+ if (fileStream) {
87
+ try {
88
+ const ts = new Date().toISOString();
89
+ fileStream.write(`[${ts}] [session] end pid=${process.pid}\n`);
90
+ fileStream.end();
91
+ } catch {}
92
+ fileStream = null;
93
+ }
94
+ mode = 'off';
95
+ }
96
+
97
+ module.exports = {
98
+ init,
99
+ isActive,
100
+ isSimple,
101
+ isFile,
102
+ getMode,
103
+ log,
104
+ logExtended,
105
+ close,
106
+ };