@semalt-ai/code 1.8.4 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/.claude/settings.local.json +8 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/CLAUDE.md +1588 -27
  4. package/README.md +147 -3
  5. package/TECHNICAL_DEBT.md +66 -0
  6. package/examples/embed.js +74 -0
  7. package/index.js +259 -11
  8. package/lib/agent.js +935 -181
  9. package/lib/api.js +308 -55
  10. package/lib/args.js +96 -2
  11. package/lib/audit.js +23 -1
  12. package/lib/background.js +584 -0
  13. package/lib/checkpoints.js +757 -0
  14. package/lib/commands/auth.js +94 -0
  15. package/lib/commands/chat-session.js +306 -0
  16. package/lib/commands/chat-slash.js +399 -0
  17. package/lib/commands/chat-turn.js +446 -0
  18. package/lib/commands/chat.js +403 -0
  19. package/lib/commands/custom.js +157 -0
  20. package/lib/commands/history-utils.js +66 -0
  21. package/lib/commands/index.js +268 -0
  22. package/lib/commands/mcp.js +113 -0
  23. package/lib/commands/oneshot.js +193 -0
  24. package/lib/commands/registry.js +269 -0
  25. package/lib/commands/tasks.js +89 -0
  26. package/lib/compact.js +87 -0
  27. package/lib/config.js +346 -11
  28. package/lib/constants.js +372 -3
  29. package/lib/debug.js +106 -0
  30. package/lib/deny.js +199 -0
  31. package/lib/doctor.js +160 -0
  32. package/lib/headless.js +167 -0
  33. package/lib/hooks.js +286 -0
  34. package/lib/images.js +264 -0
  35. package/lib/internals.js +49 -0
  36. package/lib/mcp/boundary.js +131 -0
  37. package/lib/mcp/client.js +270 -0
  38. package/lib/mcp/oauth.js +134 -0
  39. package/lib/memory.js +209 -0
  40. package/lib/metrics.js +37 -2
  41. package/lib/payload.js +54 -0
  42. package/lib/permission-rules.js +401 -0
  43. package/lib/permissions.js +100 -10
  44. package/lib/pricing.js +67 -0
  45. package/lib/proc.js +158 -0
  46. package/lib/prompts.js +88 -8
  47. package/lib/sandbox.js +568 -0
  48. package/lib/sdk.js +328 -0
  49. package/lib/secrets.js +211 -0
  50. package/lib/skills.js +223 -0
  51. package/lib/subagents.js +516 -0
  52. package/lib/tool_registry.js +2558 -0
  53. package/lib/tool_specs.js +236 -9
  54. package/lib/tools.js +370 -944
  55. package/lib/ui/chat-history.js +19 -1
  56. package/lib/ui/format.js +101 -6
  57. package/lib/ui/input-field.js +16 -7
  58. package/lib/ui/status-bar.js +79 -11
  59. package/lib/ui/terminal.js +10 -4
  60. package/lib/ui/theme.js +1 -0
  61. package/lib/ui/web-activity.js +218 -0
  62. package/lib/ui/writer.js +7 -9
  63. package/lib/verify.js +229 -0
  64. package/lib/web-extract.js +213 -0
  65. package/lib/web-summarize.js +68 -0
  66. package/package.json +19 -4
  67. package/scripts/lint.js +57 -0
  68. package/test/agent-loop.test.js +389 -0
  69. package/test/background.test.js +414 -0
  70. package/test/chat.test.js +114 -0
  71. package/test/checkpoints-agent.test.js +181 -0
  72. package/test/checkpoints.test.js +650 -0
  73. package/test/command-registry.test.js +160 -0
  74. package/test/compact.test.js +116 -0
  75. package/test/completion-lazy.test.js +52 -0
  76. package/test/config-merge.test.js +324 -0
  77. package/test/config-quarantine.test.js +128 -0
  78. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  79. package/test/config-write-guard-skip.test.js +46 -0
  80. package/test/config-write-guard.test.js +153 -0
  81. package/test/context-split.test.js +215 -0
  82. package/test/cost-doctor.test.js +142 -0
  83. package/test/custom-commands-chat.test.js +106 -0
  84. package/test/custom-commands.test.js +230 -0
  85. package/test/deny-windows.test.js +120 -0
  86. package/test/deny.test.js +83 -0
  87. package/test/download-allow-anywhere.test.js +66 -0
  88. package/test/download-confine.test.js +153 -0
  89. package/test/executors.test.js +362 -0
  90. package/test/extract-tool-calls.test.js +315 -0
  91. package/test/fetch-url-validation.test.js +219 -0
  92. package/test/fixtures/tool-calls.js +57 -0
  93. package/test/fixtures/web-page.js +91 -0
  94. package/test/git-tools.test.js +384 -0
  95. package/test/grep-glob-serialize.test.js +242 -0
  96. package/test/grep-glob.test.js +268 -0
  97. package/test/harness/README.md +57 -0
  98. package/test/harness/chat-harness.js +142 -0
  99. package/test/harness/memwarn-headless-child.js +65 -0
  100. package/test/harness/mock-llm.js +120 -0
  101. package/test/harness/mock-mcp-server.js +142 -0
  102. package/test/harness/sse-server.js +69 -0
  103. package/test/headless.test.js +203 -0
  104. package/test/history-utils.test.js +88 -0
  105. package/test/hooks-agent.test.js +238 -0
  106. package/test/hooks-verify-sandbox.test.js +232 -0
  107. package/test/hooks.test.js +216 -0
  108. package/test/http-get-user-agent.test.js +142 -0
  109. package/test/images-api.test.js +208 -0
  110. package/test/images.test.js +238 -0
  111. package/test/max-iterations.test.js +216 -0
  112. package/test/mcp-boundary.test.js +57 -0
  113. package/test/mcp-client.test.js +267 -0
  114. package/test/mcp-oauth.test.js +86 -0
  115. package/test/memory-truncation-warning.test.js +222 -0
  116. package/test/memory.test.js +198 -0
  117. package/test/native-dispatch.test.js +356 -0
  118. package/test/output-chokepoint.test.js +188 -0
  119. package/test/path-guards.test.js +134 -0
  120. package/test/payload.test.js +99 -0
  121. package/test/permission-rules-agent.test.js +210 -0
  122. package/test/permission-rules.test.js +297 -0
  123. package/test/permissions.test.js +163 -0
  124. package/test/plan-mode.test.js +167 -0
  125. package/test/read-paginate.test.js +275 -0
  126. package/test/readonly-tools.test.js +177 -0
  127. package/test/result-cap.test.js +233 -0
  128. package/test/sandbox-agent.test.js +147 -0
  129. package/test/sandbox-integration.test.js +216 -0
  130. package/test/sandbox.test.js +408 -0
  131. package/test/sdk.test.js +234 -0
  132. package/test/shell-output-cap.test.js +181 -0
  133. package/test/skills-chat.test.js +110 -0
  134. package/test/skills.test.js +295 -0
  135. package/test/smoke.test.js +68 -0
  136. package/test/status-bar-pause.test.js +164 -0
  137. package/test/stream-parser.test.js +147 -0
  138. package/test/subagents-agent.test.js +178 -0
  139. package/test/subagents.test.js +222 -0
  140. package/test/tool-registry.test.js +85 -0
  141. package/test/trim-budget.test.js +101 -0
  142. package/test/verify-agent.test.js +317 -0
  143. package/test/verify.test.js +141 -0
  144. package/test/web-activity-ordering.test.js +194 -0
  145. package/test/web-activity.test.js +207 -0
  146. package/test/web-data-extraction-guidance.test.js +71 -0
  147. package/test/web-extract.test.js +185 -0
  148. package/test/web-fetch-agent.test.js +291 -0
  149. package/test/web-fetch-mode.test.js +193 -0
  150. package/test/web-search.test.js +380 -0
  151. package/lib/commands.js +0 -1288
package/README.md CHANGED
@@ -18,7 +18,7 @@ It provides an interactive chat interface, one-shot code generation, AI-assisted
18
18
 
19
19
  ## Requirements
20
20
 
21
- - Node.js `>=16`
21
+ - Node.js `>=18` (Node 16 is end-of-life; `node --test` is unreliable on it)
22
22
  - An OpenAI-compatible API endpoint
23
23
 
24
24
  The default configuration expects a local API server at `http://127.0.0.1:8800`.
@@ -114,6 +114,14 @@ semalt-code [command] [options]
114
114
  - `semalt-code init`
115
115
  Creates or updates the local config file.
116
116
 
117
+ - `semalt-code mcp <list|status|add|remove|auth>`
118
+ Manage MCP servers. `add` registers a server (stdio `command`/`args` or remote `--url`),
119
+ `status` connects and reports each server's tools, `auth` runs the OAuth flow for a remote
120
+ server (tokens are stored in the OS keychain). Discovered tools register as
121
+ `mcp__<server>__<tool>` and run through the same approval-gated agent loop as built-ins —
122
+ MCP results are treated as untrusted external content, and MCP tools require approval by
123
+ default (opt in per server with `allow`/`allowAll`). In chat, `/mcp` shows the same status.
124
+
117
125
  ### Options
118
126
 
119
127
  - `-m, --model <name>`
@@ -246,6 +254,99 @@ Saved profiles can then be selected inside chat mode with `/model` or `/models`.
246
254
  semalt-code --version
247
255
  ```
248
256
 
257
+ ## Embedding SDK
258
+
259
+ `@semalt-ai/code` can be embedded in another program as a library, not just run as a
260
+ CLI. There are **two tiers**, physically separated by the package `exports` map:
261
+
262
+ | Import | Surface | Stability |
263
+ |--------|---------|-----------|
264
+ | `require('@semalt-ai/code')` | `createAgent` — the high-level facade | **Stable** (semver) |
265
+ | `require('@semalt-ai/code/internals')` | `createAgentRunner`, `createApiClient`, the registries, … | **Unstable** — no guarantee, may change in any release |
266
+
267
+ Both subpaths work for `require` and `import` (the package is CommonJS; ESM consumers
268
+ get the named exports via interop).
269
+
270
+ ### The facade
271
+
272
+ ```js
273
+ const { createAgent } = require('@semalt-ai/code');
274
+
275
+ const agent = createAgent({
276
+ apiBase: 'http://127.0.0.1:8800',
277
+ apiKey: process.env.SEMALT_API_KEY,
278
+ model: 'my-model',
279
+ // permission policy — see below
280
+ });
281
+
282
+ const res = await agent.run('Summarise README.md in three bullets');
283
+ // res = { result, toolCalls, usage, cost, stopReason, verifyStatus, messages }
284
+ console.log(res.result);
285
+
286
+ await agent.close(); // REQUIRED — releases MCP connections / processes
287
+ ```
288
+
289
+ `run(prompt, opts?)` executes a prompt to completion and returns the same structured
290
+ envelope headless mode produces, plus `messages` so you can continue the conversation
291
+ (`agent.run(next, { messages: res.messages })`). Stream events with
292
+ `agent.on('token' | 'assistant' | 'tool' | 'tool-start' | 'error' | 'warning' | 'done', cb)`.
293
+
294
+ ### Permission policy — safe by default
295
+
296
+ There is no terminal in embedded use, so the facade takes a **programmatic** permission
297
+ policy. With **neither** a policy provided, the default is to **refuse every mutating /
298
+ effectful tool** (read-only tools still run) — it never auto-approves:
299
+
300
+ ```js
301
+ // 1) an async approver callback
302
+ createAgent({ /* … */, approve: async (call) => {
303
+ // call = { actionType, description, tag, rule }
304
+ return call.tag !== 'delete_file'; // your decision
305
+ }});
306
+
307
+ // 2) preset allow/deny/ask rules (the same engine as the CLI's per-pattern rules)
308
+ createAgent({ /* … */, rules: [
309
+ { tool: 'write_file', path: 'src/**', action: 'allow' },
310
+ { tool: 'shell', pattern: 'git *', action: 'allow' },
311
+ { tool: 'shell', pattern: '/curl.*\\| *sh/', action: 'deny' },
312
+ ]});
313
+
314
+ // 3) coarse tiers (like --allow-fs/exec/net) and read-only
315
+ createAgent({ /* … */, allow: ['fs', 'net'], readonly: true });
316
+ ```
317
+
318
+ The **OS sandbox** and the **destructive-command deny-list** stay **on** regardless of
319
+ there being no TTY. The sandbox is opt-out **only** via explicit config
320
+ (`sandbox: { mode: 'off' }`); a host can permit running a command unsandboxed when the
321
+ kernel primitive is missing by supplying `onUnsandboxed`. A `deny` rule and the
322
+ deny-list are honored even under the deliberate `dangerouslySkipPermissions: true`
323
+ gate opt-out.
324
+
325
+ By default the SDK does **not** read your `~/.semalt-ai/config.json` (a server wants
326
+ isolation, not the operator's personal defaults) — pass `loadUserConfig: true` to layer
327
+ it in. Pass arbitrary config under `config: { … }`.
328
+
329
+ ### Lifecycle & multi-instance
330
+
331
+ - **Always call `await agent.close()`** when done. It disconnects MCP servers and frees
332
+ handles. `run()` after `close()` throws.
333
+ - Each `createAgent` instance keeps its **own config** — two instances never share
334
+ config state.
335
+ - **Process-global state (documented limitation).** A few things are process-wide, not
336
+ per-instance, because they were built for the single-process CLI:
337
+ - the **dynamic tool registry** (MCP + `spawn_agent`) is global — two instances with
338
+ *different* MCP servers would see each other's tools;
339
+ - file-path confinement (`isPathSafe`) and the deny-list/secret/config guards read
340
+ `process.cwd()` and `process.argv` **once at module load**, so they're shared by all
341
+ instances and the deny-list opt-out requires launching the host process with
342
+ `--dangerously-skip-permissions`;
343
+ - the stdout-chrome-suppression flag is process-wide.
344
+
345
+ For most embeddings (one agent per process, or instances sharing a CWD and MCP set)
346
+ none of this matters; run fully-isolated agents in separate processes if it does.
347
+
348
+ A runnable example lives in [`examples/embed.js`](examples/embed.js).
349
+
249
350
  ## How Responses Are Rendered
250
351
 
251
352
  The CLI formats streamed output for terminal readability:
@@ -259,13 +360,56 @@ The CLI formats streamed output for terminal readability:
259
360
 
260
361
  If the backend returns `reasoning_content`, the CLI also shows a lightweight `thinking` section during streaming.
261
362
 
363
+ ## Dependency Policy
364
+
365
+ This project keeps its runtime dependency surface **minimal, vetted, and pinned**. It
366
+ ran with **zero runtime dependencies** through its first phases; as of v1.9.0 it has a
367
+ single one — the official Model Context Protocol SDK,
368
+ [`@modelcontextprotocol/sdk`](https://www.npmjs.com/package/@modelcontextprotocol/sdk) —
369
+ adopted to implement MCP against its reference (rather than hand-rolling the protocol).
370
+
371
+ The policy for any runtime dependency:
372
+
373
+ - **Minimal & justified** — added only when a Node.js built-in genuinely cannot do the
374
+ job, with a recorded rationale.
375
+ - **Pinned to an exact version** — no `^`/`~` ranges in `package.json`. Upgrades are
376
+ deliberate, reviewed commits.
377
+ - **Reviewed with the lockfile** — `package-lock.json` is committed; adding or bumping a
378
+ dependency is a reviewed change.
379
+
380
+ **Supply-chain checks.** CI runs `npm ci` (lockfile integrity) and
381
+ `npm audit --omit=dev --audit-level=high` (fails on HIGH/CRITICAL advisories in the
382
+ runtime tree). The full audit-findings policy is documented in `CLAUDE.md`.
383
+
384
+ The SDK is ESM-only while this project is CommonJS, so it is loaded in exactly one
385
+ place — `lib/mcp/boundary.js`, via dynamic `import()` — and the rest of the codebase
386
+ stays CommonJS.
387
+
262
388
  ## Notes and Limitations
263
389
 
264
- - This project is currently a single-file CLI implementation centered in `index.js`.
265
- - It uses Node's built-in `http` and `https` modules and does not require extra runtime dependencies.
390
+ - It uses Node's built-in `http` and `https` modules for all networking; the only
391
+ runtime dependency is the MCP SDK (see **Dependency Policy** above).
266
392
  - The `edit` command writes the model output directly back to the target file, so review prompts and backend behavior carefully.
267
393
  - Shell and file operations are approval-based, but they still execute on the local system after approval.
268
394
 
395
+ ### Not yet implemented
396
+
397
+ A few capabilities are intentionally absent today — documented here so you don't build
398
+ on something that isn't wired up. See **Deferred / Not Yet Implemented** in `CLAUDE.md`
399
+ for the full list and roadmap status.
400
+
401
+ - **MCP tools are interactive-chat only** — they are not connected in the `code`/`edit`/`shell`
402
+ one-shot commands or headless `-p/--print` mode.
403
+ - **No session auto-resume** — there's no "resume your last session?" prompt at startup;
404
+ use `/history` (local sessions) or `--resume <chat-id>` (dashboard chats).
405
+ - **Proxy env vars are not consumed** — `HTTPS_PROXY`/`HTTP_PROXY` are read into config but
406
+ outbound HTTP does not yet route through a proxy agent (matters on corporate networks).
407
+ - Planned for Phase 4+: per-pattern permissions, self-verification, checkpoints/rewind, and an OS sandbox.
408
+
409
+ ## Contributing
410
+
411
+ PRs must pass the CI pipeline (`npm ci` + `npm audit` + lint + tests on Linux/macOS/Windows, Node 18 & 20) before they can be merged. Run `npm ci && npm run lint && npm test` locally first. Any dependency change must follow the **Dependency Policy** above (exact pin, committed lockfile, justification).
412
+
269
413
  ## License
270
414
 
271
415
  MIT
@@ -0,0 +1,66 @@
1
+ ## Activity region in-place update breaks when a modal is open
2
+
3
+ When a modal occupies screen lines below an active activity bubble, the
4
+ activity region's redraw mechanism appears to fall back to scrollback
5
+ append per tick instead of in-place rewrite. Surfaced via the `ask_user`
6
+ ticking-timer bug; mitigated by making `ask_user` a static bubble.
7
+
8
+ Latent: any future long-running tool that opens a modal concurrently
9
+ (none today) will reproduce the fragmentation. Fix likely involves
10
+ making the activity region modal-aware in `lib/ui/writer.js` — when a
11
+ modal region is active, route activity updates through a path that
12
+ clears modal, redraws activity, redraws modal — or reserves activity
13
+ above the modal in a way that survives modal lifecycle.
14
+
15
+ Not blocking. Revisit if a second use-case appears.
16
+
17
+ ## `cmdShell` and `chatStream` write to stdout bypassing the writer
18
+
19
+ Several call sites currently emit directly to `process.stdout.write`
20
+ without going through `lib/ui/writer.js`:
21
+
22
+ - `lib/commands.js` (`cmdShell`)
23
+ - `lib/api.js` (streaming output path)
24
+
25
+ These were flagged during the Phase 2 writer audit and annotated as
26
+ `// audit: allowed` because they need to interleave with synchronous
27
+ writes from `StreamRenderer`. Routing them through the writer today
28
+ would require buffering or sequencing changes that don't compose with
29
+ how `StreamRenderer` flushes content per chunk.
30
+
31
+ Resolves when: `StreamRenderer` itself is migrated to write through
32
+ the writer. After that, the bypass annotations can be removed and
33
+ these call sites become normal `writer.scrollback(...)` calls.
34
+
35
+ Not blocking. The audit annotation makes the bypass intentional and
36
+ greppable. Revisit when `StreamRenderer` migration is on the table.
37
+
38
+ ## Tool result storage: single `content` field used for both model and UI
39
+
40
+ Storage (PHP backend, MySQL `messages` table) holds one `content` field
41
+ per tool result. The full payload is required for the model on
42
+ subsequent turns, but the UI needs a compact summary (e.g. `net · GET
43
+ https://... · 200 · 256 KB`).
44
+
45
+ Today this is handled UI-side: `summarizeToolResult` in
46
+ `lib/ui/format.js` runs read-side heuristics on the raw `content` every
47
+ time `/history` renders. Heuristics cover HTTP, exec, file ops, with
48
+ a fallback for unknown shapes. They work in practice but are a
49
+ compromise — any tool whose output format drifts will fall through to
50
+ the generic fallback until the heuristic is updated.
51
+
52
+ Full fix: storage holds both `content` (full, model-bound) and
53
+ `display` (pre-rendered summary, UI-bound). Summary is generated
54
+ write-side at tool execution time, when the live activity bubble
55
+ already produces the right string — that string just needs to be
56
+ captured and persisted alongside the full content.
57
+
58
+ Resolves when: backend schema migration for native function calling
59
+ lands (Phase 2.2 of the native-tools plan, which already touches the
60
+ `messages` table). Adding a `display` column in the same migration is
61
+ cheap; doing it as a separate migration later is not. When this lands,
62
+ `summarizeToolResult` becomes unnecessary for new tool results; it
63
+ stays only as a fallback for legacy rows lacking `display`.
64
+
65
+ Not blocking — current heuristics cover all 33 tools' output shapes.
66
+ Track until Phase 2.2 lands.
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ // ---------------------------------------------------------------------------
5
+ // Embedding SDK example (Task 5.2)
6
+ // ---------------------------------------------------------------------------
7
+ //
8
+ // Shows the supported, stable way to embed the agent in another program via the
9
+ // `createAgent` facade: a permission policy that defaults safe, streaming
10
+ // events, the structured run result, and the required close() teardown.
11
+ //
12
+ // Run it against any OpenAI-compatible endpoint:
13
+ //
14
+ // SEMALT_API_BASE=http://127.0.0.1:8800 \
15
+ // SEMALT_API_KEY=sk-… \
16
+ // SEMALT_MODEL=my-model \
17
+ // node examples/embed.js "List the files in this directory"
18
+ //
19
+ // (From outside this repo, `require('@semalt-ai/code')` instead of the relative
20
+ // path below.)
21
+
22
+ const { createAgent } = require('../lib/sdk'); // → require('@semalt-ai/code')
23
+
24
+ async function main() {
25
+ const prompt = process.argv.slice(2).join(' ') || 'Say hello and tell me what tools you have.';
26
+
27
+ const agent = createAgent({
28
+ apiBase: process.env.SEMALT_API_BASE || 'http://127.0.0.1:8800',
29
+ apiKey: process.env.SEMALT_API_KEY || 'any',
30
+ model: process.env.SEMALT_MODEL || 'default',
31
+
32
+ // Permission policy. With NONE of these, the SDK refuses every mutating
33
+ // tool (the safe default). Here we approve read-only-ish work but veto
34
+ // anything destructive — your host decides.
35
+ approve: async ({ tag, description }) => {
36
+ const denied = new Set(['delete_file', 'remove_dir', 'move_file']);
37
+ const ok = !denied.has(tag);
38
+ console.error(`[approve] ${ok ? 'ALLOW' : 'DENY '} ${tag} — ${description}`);
39
+ return ok;
40
+ },
41
+
42
+ // The OS sandbox + deny-list stay ON by default. To run unsandboxed when the
43
+ // kernel primitive is missing you'd opt in explicitly, e.g.:
44
+ // sandbox: { mode: 'off' },
45
+ // onUnsandboxed: async () => true,
46
+ });
47
+
48
+ // Stream activity (advisory — the run result is authoritative).
49
+ agent.on('token', (t) => process.stdout.write(t));
50
+ agent.on('tool', (e) => console.error(`\n[tool] ${e.tag} (${e.ms}ms)`));
51
+ agent.on('warning', (m) => console.error(`[warn] ${m}`));
52
+
53
+ try {
54
+ const res = await agent.run(prompt);
55
+ console.log('\n\n--- result ---');
56
+ console.log(res.result);
57
+ console.log('--- meta ---');
58
+ console.log(JSON.stringify({
59
+ toolCalls: res.toolCalls.length,
60
+ usage: res.usage,
61
+ cost: res.cost,
62
+ stopReason: res.stopReason,
63
+ verifyStatus: res.verifyStatus,
64
+ }, null, 2));
65
+ } finally {
66
+ // ALWAYS close — releases MCP connections / spawned processes.
67
+ await agent.close();
68
+ }
69
+ }
70
+
71
+ main().catch((err) => {
72
+ console.error('embed example failed:', err.message);
73
+ process.exit(1);
74
+ });