npm - @semalt-ai/code - Versions diffs - 1.8.4 → 1.19.0 - Mend

@semalt-ai/code 1.8.4 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (151) hide show

package/.claude/settings.local.json +8 -1
package/.github/workflows/ci.yml +69 -0
package/CLAUDE.md +1588 -27
package/README.md +147 -3
package/TECHNICAL_DEBT.md +66 -0
package/examples/embed.js +74 -0
package/index.js +259 -11
package/lib/agent.js +935 -181
package/lib/api.js +308 -55
package/lib/args.js +96 -2
package/lib/audit.js +23 -1
package/lib/background.js +584 -0
package/lib/checkpoints.js +757 -0
package/lib/commands/auth.js +94 -0
package/lib/commands/chat-session.js +306 -0
package/lib/commands/chat-slash.js +399 -0
package/lib/commands/chat-turn.js +446 -0
package/lib/commands/chat.js +403 -0
package/lib/commands/custom.js +157 -0
package/lib/commands/history-utils.js +66 -0
package/lib/commands/index.js +268 -0
package/lib/commands/mcp.js +113 -0
package/lib/commands/oneshot.js +193 -0
package/lib/commands/registry.js +269 -0
package/lib/commands/tasks.js +89 -0
package/lib/compact.js +87 -0
package/lib/config.js +346 -11
package/lib/constants.js +372 -3
package/lib/debug.js +106 -0
package/lib/deny.js +199 -0
package/lib/doctor.js +160 -0
package/lib/headless.js +167 -0
package/lib/hooks.js +286 -0
package/lib/images.js +264 -0
package/lib/internals.js +49 -0
package/lib/mcp/boundary.js +131 -0
package/lib/mcp/client.js +270 -0
package/lib/mcp/oauth.js +134 -0
package/lib/memory.js +209 -0
package/lib/metrics.js +37 -2
package/lib/payload.js +54 -0
package/lib/permission-rules.js +401 -0
package/lib/permissions.js +100 -10
package/lib/pricing.js +67 -0
package/lib/proc.js +158 -0
package/lib/prompts.js +88 -8
package/lib/sandbox.js +568 -0
package/lib/sdk.js +328 -0
package/lib/secrets.js +211 -0
package/lib/skills.js +223 -0
package/lib/subagents.js +516 -0
package/lib/tool_registry.js +2558 -0
package/lib/tool_specs.js +236 -9
package/lib/tools.js +370 -944
package/lib/ui/chat-history.js +19 -1
package/lib/ui/format.js +101 -6
package/lib/ui/input-field.js +16 -7
package/lib/ui/status-bar.js +79 -11
package/lib/ui/terminal.js +10 -4
package/lib/ui/theme.js +1 -0
package/lib/ui/web-activity.js +218 -0
package/lib/ui/writer.js +7 -9
package/lib/verify.js +229 -0
package/lib/web-extract.js +213 -0
package/lib/web-summarize.js +68 -0
package/package.json +19 -4
package/scripts/lint.js +57 -0
package/test/agent-loop.test.js +389 -0
package/test/background.test.js +414 -0
package/test/chat.test.js +114 -0
package/test/checkpoints-agent.test.js +181 -0
package/test/checkpoints.test.js +650 -0
package/test/command-registry.test.js +160 -0
package/test/compact.test.js +116 -0
package/test/completion-lazy.test.js +52 -0
package/test/config-merge.test.js +324 -0
package/test/config-quarantine.test.js +128 -0
package/test/config-write-guard-allow-anywhere.test.js +56 -0
package/test/config-write-guard-skip.test.js +46 -0
package/test/config-write-guard.test.js +153 -0
package/test/context-split.test.js +215 -0
package/test/cost-doctor.test.js +142 -0
package/test/custom-commands-chat.test.js +106 -0
package/test/custom-commands.test.js +230 -0
package/test/deny-windows.test.js +120 -0
package/test/deny.test.js +83 -0
package/test/download-allow-anywhere.test.js +66 -0
package/test/download-confine.test.js +153 -0
package/test/executors.test.js +362 -0
package/test/extract-tool-calls.test.js +315 -0
package/test/fetch-url-validation.test.js +219 -0
package/test/fixtures/tool-calls.js +57 -0
package/test/fixtures/web-page.js +91 -0
package/test/git-tools.test.js +384 -0
package/test/grep-glob-serialize.test.js +242 -0
package/test/grep-glob.test.js +268 -0
package/test/harness/README.md +57 -0
package/test/harness/chat-harness.js +142 -0
package/test/harness/memwarn-headless-child.js +65 -0
package/test/harness/mock-llm.js +120 -0
package/test/harness/mock-mcp-server.js +142 -0
package/test/harness/sse-server.js +69 -0
package/test/headless.test.js +203 -0
package/test/history-utils.test.js +88 -0
package/test/hooks-agent.test.js +238 -0
package/test/hooks-verify-sandbox.test.js +232 -0
package/test/hooks.test.js +216 -0
package/test/http-get-user-agent.test.js +142 -0
package/test/images-api.test.js +208 -0
package/test/images.test.js +238 -0
package/test/max-iterations.test.js +216 -0
package/test/mcp-boundary.test.js +57 -0
package/test/mcp-client.test.js +267 -0
package/test/mcp-oauth.test.js +86 -0
package/test/memory-truncation-warning.test.js +222 -0
package/test/memory.test.js +198 -0
package/test/native-dispatch.test.js +356 -0
package/test/output-chokepoint.test.js +188 -0
package/test/path-guards.test.js +134 -0
package/test/payload.test.js +99 -0
package/test/permission-rules-agent.test.js +210 -0
package/test/permission-rules.test.js +297 -0
package/test/permissions.test.js +163 -0
package/test/plan-mode.test.js +167 -0
package/test/read-paginate.test.js +275 -0
package/test/readonly-tools.test.js +177 -0
package/test/result-cap.test.js +233 -0
package/test/sandbox-agent.test.js +147 -0
package/test/sandbox-integration.test.js +216 -0
package/test/sandbox.test.js +408 -0
package/test/sdk.test.js +234 -0
package/test/shell-output-cap.test.js +181 -0
package/test/skills-chat.test.js +110 -0
package/test/skills.test.js +295 -0
package/test/smoke.test.js +68 -0
package/test/status-bar-pause.test.js +164 -0
package/test/stream-parser.test.js +147 -0
package/test/subagents-agent.test.js +178 -0
package/test/subagents.test.js +222 -0
package/test/tool-registry.test.js +85 -0
package/test/trim-budget.test.js +101 -0
package/test/verify-agent.test.js +317 -0
package/test/verify.test.js +141 -0
package/test/web-activity-ordering.test.js +194 -0
package/test/web-activity.test.js +207 -0
package/test/web-data-extraction-guidance.test.js +71 -0
package/test/web-extract.test.js +185 -0
package/test/web-fetch-agent.test.js +291 -0
package/test/web-fetch-mode.test.js +193 -0
package/test/web-search.test.js +380 -0
package/lib/commands.js +0 -1288

package/README.md CHANGED Viewed

@@ -18,7 +18,7 @@ It provides an interactive chat interface, one-shot code generation, AI-assisted
 ## Requirements
-- Node.js `>=16`
+- Node.js `>=18` (Node 16 is end-of-life; `node --test` is unreliable on it)
 - An OpenAI-compatible API endpoint
 The default configuration expects a local API server at `http://127.0.0.1:8800`.
@@ -114,6 +114,14 @@ semalt-code [command] [options]
 - `semalt-code init`
   Creates or updates the local config file.
+- `semalt-code mcp <list|status|add|remove|auth>`
+  Manage MCP servers. `add` registers a server (stdio `command`/`args` or remote `--url`),
+  `status` connects and reports each server's tools, `auth` runs the OAuth flow for a remote
+  server (tokens are stored in the OS keychain). Discovered tools register as
+  `mcp__<server>__<tool>` and run through the same approval-gated agent loop as built-ins —
+  MCP results are treated as untrusted external content, and MCP tools require approval by
+  default (opt in per server with `allow`/`allowAll`). In chat, `/mcp` shows the same status.
 ### Options
 - `-m, --model <name>`
@@ -246,6 +254,99 @@ Saved profiles can then be selected inside chat mode with `/model` or `/models`.
 semalt-code --version
 ```
+## Embedding SDK
+`@semalt-ai/code` can be embedded in another program as a library, not just run as a
+CLI. There are **two tiers**, physically separated by the package `exports` map:
+| Import | Surface | Stability |
+|--------|---------|-----------|
+| `require('@semalt-ai/code')` | `createAgent` — the high-level facade | **Stable** (semver) |
+| `require('@semalt-ai/code/internals')` | `createAgentRunner`, `createApiClient`, the registries, … | **Unstable** — no guarantee, may change in any release |
+Both subpaths work for `require` and `import` (the package is CommonJS; ESM consumers
+get the named exports via interop).
+### The facade
+```js
+const { createAgent } = require('@semalt-ai/code');
+const agent = createAgent({
+  apiBase: 'http://127.0.0.1:8800',
+  apiKey:  process.env.SEMALT_API_KEY,
+  model:   'my-model',
+  // permission policy — see below
+});
+const res = await agent.run('Summarise README.md in three bullets');
+// res = { result, toolCalls, usage, cost, stopReason, verifyStatus, messages }
+console.log(res.result);
+await agent.close(); // REQUIRED — releases MCP connections / processes
+```
+`run(prompt, opts?)` executes a prompt to completion and returns the same structured
+envelope headless mode produces, plus `messages` so you can continue the conversation
+(`agent.run(next, { messages: res.messages })`). Stream events with
+`agent.on('token' | 'assistant' | 'tool' | 'tool-start' | 'error' | 'warning' | 'done', cb)`.
+### Permission policy — safe by default
+There is no terminal in embedded use, so the facade takes a **programmatic** permission
+policy. With **neither** a policy provided, the default is to **refuse every mutating /
+effectful tool** (read-only tools still run) — it never auto-approves:
+```js
+// 1) an async approver callback
+createAgent({ /* … */, approve: async (call) => {
+  // call = { actionType, description, tag, rule }
+  return call.tag !== 'delete_file'; // your decision
+}});
+// 2) preset allow/deny/ask rules (the same engine as the CLI's per-pattern rules)
+createAgent({ /* … */, rules: [
+  { tool: 'write_file', path: 'src/**', action: 'allow' },
+  { tool: 'shell',      pattern: 'git *', action: 'allow' },
+  { tool: 'shell',      pattern: '/curl.*\\| *sh/', action: 'deny' },
+]});
+// 3) coarse tiers (like --allow-fs/exec/net) and read-only
+createAgent({ /* … */, allow: ['fs', 'net'], readonly: true });
+```
+The **OS sandbox** and the **destructive-command deny-list** stay **on** regardless of
+there being no TTY. The sandbox is opt-out **only** via explicit config
+(`sandbox: { mode: 'off' }`); a host can permit running a command unsandboxed when the
+kernel primitive is missing by supplying `onUnsandboxed`. A `deny` rule and the
+deny-list are honored even under the deliberate `dangerouslySkipPermissions: true`
+gate opt-out.
+By default the SDK does **not** read your `~/.semalt-ai/config.json` (a server wants
+isolation, not the operator's personal defaults) — pass `loadUserConfig: true` to layer
+it in. Pass arbitrary config under `config: { … }`.
+### Lifecycle & multi-instance
+- **Always call `await agent.close()`** when done. It disconnects MCP servers and frees
+  handles. `run()` after `close()` throws.
+- Each `createAgent` instance keeps its **own config** — two instances never share
+  config state.
+- **Process-global state (documented limitation).** A few things are process-wide, not
+  per-instance, because they were built for the single-process CLI:
+  - the **dynamic tool registry** (MCP + `spawn_agent`) is global — two instances with
+    *different* MCP servers would see each other's tools;
+  - file-path confinement (`isPathSafe`) and the deny-list/secret/config guards read
+    `process.cwd()` and `process.argv` **once at module load**, so they're shared by all
+    instances and the deny-list opt-out requires launching the host process with
+    `--dangerously-skip-permissions`;
+  - the stdout-chrome-suppression flag is process-wide.
+  For most embeddings (one agent per process, or instances sharing a CWD and MCP set)
+  none of this matters; run fully-isolated agents in separate processes if it does.
+A runnable example lives in [`examples/embed.js`](examples/embed.js).
 ## How Responses Are Rendered
 The CLI formats streamed output for terminal readability:
@@ -259,13 +360,56 @@ The CLI formats streamed output for terminal readability:
 If the backend returns `reasoning_content`, the CLI also shows a lightweight `thinking` section during streaming.
+## Dependency Policy
+This project keeps its runtime dependency surface **minimal, vetted, and pinned**. It
+ran with **zero runtime dependencies** through its first phases; as of v1.9.0 it has a
+single one — the official Model Context Protocol SDK,
+[`@modelcontextprotocol/sdk`](https://www.npmjs.com/package/@modelcontextprotocol/sdk) —
+adopted to implement MCP against its reference (rather than hand-rolling the protocol).
+The policy for any runtime dependency:
+- **Minimal & justified** — added only when a Node.js built-in genuinely cannot do the
+  job, with a recorded rationale.
+- **Pinned to an exact version** — no `^`/`~` ranges in `package.json`. Upgrades are
+  deliberate, reviewed commits.
+- **Reviewed with the lockfile** — `package-lock.json` is committed; adding or bumping a
+  dependency is a reviewed change.
+**Supply-chain checks.** CI runs `npm ci` (lockfile integrity) and
+`npm audit --omit=dev --audit-level=high` (fails on HIGH/CRITICAL advisories in the
+runtime tree). The full audit-findings policy is documented in `CLAUDE.md`.
+The SDK is ESM-only while this project is CommonJS, so it is loaded in exactly one
+place — `lib/mcp/boundary.js`, via dynamic `import()` — and the rest of the codebase
+stays CommonJS.
 ## Notes and Limitations
-- This project is currently a single-file CLI implementation centered in `index.js`.
-- It uses Node's built-in `http` and `https` modules and does not require extra runtime dependencies.
+- It uses Node's built-in `http` and `https` modules for all networking; the only
+  runtime dependency is the MCP SDK (see **Dependency Policy** above).
 - The `edit` command writes the model output directly back to the target file, so review prompts and backend behavior carefully.
 - Shell and file operations are approval-based, but they still execute on the local system after approval.
+### Not yet implemented
+A few capabilities are intentionally absent today — documented here so you don't build
+on something that isn't wired up. See **Deferred / Not Yet Implemented** in `CLAUDE.md`
+for the full list and roadmap status.
+- **MCP tools are interactive-chat only** — they are not connected in the `code`/`edit`/`shell`
+  one-shot commands or headless `-p/--print` mode.
+- **No session auto-resume** — there's no "resume your last session?" prompt at startup;
+  use `/history` (local sessions) or `--resume <chat-id>` (dashboard chats).
+- **Proxy env vars are not consumed** — `HTTPS_PROXY`/`HTTP_PROXY` are read into config but
+  outbound HTTP does not yet route through a proxy agent (matters on corporate networks).
+- Planned for Phase 4+: per-pattern permissions, self-verification, checkpoints/rewind, and an OS sandbox.
+## Contributing
+PRs must pass the CI pipeline (`npm ci` + `npm audit` + lint + tests on Linux/macOS/Windows, Node 18 & 20) before they can be merged. Run `npm ci && npm run lint && npm test` locally first. Any dependency change must follow the **Dependency Policy** above (exact pin, committed lockfile, justification).
 ## License
 MIT

package/TECHNICAL_DEBT.md ADDED Viewed

@@ -0,0 +1,66 @@
+## Activity region in-place update breaks when a modal is open
+When a modal occupies screen lines below an active activity bubble, the
+activity region's redraw mechanism appears to fall back to scrollback
+append per tick instead of in-place rewrite. Surfaced via the `ask_user`
+ticking-timer bug; mitigated by making `ask_user` a static bubble.
+Latent: any future long-running tool that opens a modal concurrently
+(none today) will reproduce the fragmentation. Fix likely involves
+making the activity region modal-aware in `lib/ui/writer.js` — when a
+modal region is active, route activity updates through a path that
+clears modal, redraws activity, redraws modal — or reserves activity
+above the modal in a way that survives modal lifecycle.
+Not blocking. Revisit if a second use-case appears.
+## `cmdShell` and `chatStream` write to stdout bypassing the writer
+Several call sites currently emit directly to `process.stdout.write`
+without going through `lib/ui/writer.js`:
+- `lib/commands.js` (`cmdShell`)
+- `lib/api.js` (streaming output path)
+These were flagged during the Phase 2 writer audit and annotated as
+`// audit: allowed` because they need to interleave with synchronous
+writes from `StreamRenderer`. Routing them through the writer today
+would require buffering or sequencing changes that don't compose with
+how `StreamRenderer` flushes content per chunk.
+Resolves when: `StreamRenderer` itself is migrated to write through
+the writer. After that, the bypass annotations can be removed and
+these call sites become normal `writer.scrollback(...)` calls.
+Not blocking. The audit annotation makes the bypass intentional and
+greppable. Revisit when `StreamRenderer` migration is on the table.
+## Tool result storage: single `content` field used for both model and UI
+Storage (PHP backend, MySQL `messages` table) holds one `content` field
+per tool result. The full payload is required for the model on
+subsequent turns, but the UI needs a compact summary (e.g. `net · GET
+https://... · 200 · 256 KB`).
+Today this is handled UI-side: `summarizeToolResult` in
+`lib/ui/format.js` runs read-side heuristics on the raw `content` every
+time `/history` renders. Heuristics cover HTTP, exec, file ops, with
+a fallback for unknown shapes. They work in practice but are a
+compromise — any tool whose output format drifts will fall through to
+the generic fallback until the heuristic is updated.
+Full fix: storage holds both `content` (full, model-bound) and
+`display` (pre-rendered summary, UI-bound). Summary is generated
+write-side at tool execution time, when the live activity bubble
+already produces the right string — that string just needs to be
+captured and persisted alongside the full content.
+Resolves when: backend schema migration for native function calling
+lands (Phase 2.2 of the native-tools plan, which already touches the
+`messages` table). Adding a `display` column in the same migration is
+cheap; doing it as a separate migration later is not. When this lands,
+`summarizeToolResult` becomes unnecessary for new tool results; it
+stays only as a fallback for legacy rows lacking `display`.
+Not blocking — current heuristics cover all 33 tools' output shapes.
+Track until Phase 2.2 lands.

package/examples/embed.js ADDED Viewed

@@ -0,0 +1,74 @@
+#!/usr/bin/env node
+'use strict';
+// ---------------------------------------------------------------------------
+// Embedding SDK example (Task 5.2)
+// ---------------------------------------------------------------------------
+//
+// Shows the supported, stable way to embed the agent in another program via the
+// `createAgent` facade: a permission policy that defaults safe, streaming
+// events, the structured run result, and the required close() teardown.
+//
+// Run it against any OpenAI-compatible endpoint:
+//
+//     SEMALT_API_BASE=http://127.0.0.1:8800 \
+//     SEMALT_API_KEY=sk-… \
+//     SEMALT_MODEL=my-model \
+//     node examples/embed.js "List the files in this directory"
+//
+// (From outside this repo, `require('@semalt-ai/code')` instead of the relative
+// path below.)
+const { createAgent } = require('../lib/sdk'); // → require('@semalt-ai/code')
+async function main() {
+  const prompt = process.argv.slice(2).join(' ') || 'Say hello and tell me what tools you have.';
+  const agent = createAgent({
+    apiBase: process.env.SEMALT_API_BASE || 'http://127.0.0.1:8800',
+    apiKey:  process.env.SEMALT_API_KEY  || 'any',
+    model:   process.env.SEMALT_MODEL    || 'default',
+    // Permission policy. With NONE of these, the SDK refuses every mutating
+    // tool (the safe default). Here we approve read-only-ish work but veto
+    // anything destructive — your host decides.
+    approve: async ({ tag, description }) => {
+      const denied = new Set(['delete_file', 'remove_dir', 'move_file']);
+      const ok = !denied.has(tag);
+      console.error(`[approve] ${ok ? 'ALLOW' : 'DENY '} ${tag} — ${description}`);
+      return ok;
+    },
+    // The OS sandbox + deny-list stay ON by default. To run unsandboxed when the
+    // kernel primitive is missing you'd opt in explicitly, e.g.:
+    //   sandbox: { mode: 'off' },
+    //   onUnsandboxed: async () => true,
+  });
+  // Stream activity (advisory — the run result is authoritative).
+  agent.on('token', (t) => process.stdout.write(t));
+  agent.on('tool', (e) => console.error(`\n[tool] ${e.tag} (${e.ms}ms)`));
+  agent.on('warning', (m) => console.error(`[warn] ${m}`));
+  try {
+    const res = await agent.run(prompt);
+    console.log('\n\n--- result ---');
+    console.log(res.result);
+    console.log('--- meta ---');
+    console.log(JSON.stringify({
+      toolCalls: res.toolCalls.length,
+      usage: res.usage,
+      cost: res.cost,
+      stopReason: res.stopReason,
+      verifyStatus: res.verifyStatus,
+    }, null, 2));
+  } finally {
+    // ALWAYS close — releases MCP connections / spawned processes.
+    await agent.close();
+  }
+}
+main().catch((err) => {
+  console.error('embed example failed:', err.message);
+  process.exit(1);
+});