npm - @semalt-ai/code - Versions diffs - 1.8.5 → 1.19.0 - Mend

@semalt-ai/code 1.8.5 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

package/.claude/settings.local.json +6 -1
package/.github/workflows/ci.yml +69 -0
package/CLAUDE.md +1584 -26
package/README.md +147 -3
package/examples/embed.js +74 -0
package/index.js +251 -10
package/lib/agent.js +711 -104
package/lib/api.js +213 -49
package/lib/args.js +74 -2
package/lib/audit.js +23 -1
package/lib/background.js +584 -0
package/lib/checkpoints.js +757 -0
package/lib/commands/auth.js +94 -0
package/lib/commands/chat-session.js +306 -0
package/lib/commands/chat-slash.js +399 -0
package/lib/commands/chat-turn.js +446 -0
package/lib/commands/chat.js +403 -0
package/lib/commands/custom.js +157 -0
package/lib/commands/history-utils.js +66 -0
package/lib/commands/index.js +268 -0
package/lib/commands/mcp.js +113 -0
package/lib/commands/oneshot.js +193 -0
package/lib/commands/registry.js +269 -0
package/lib/commands/tasks.js +89 -0
package/lib/compact.js +87 -0
package/lib/config.js +333 -11
package/lib/constants.js +372 -3
package/lib/deny.js +199 -0
package/lib/doctor.js +160 -0
package/lib/headless.js +167 -0
package/lib/hooks.js +286 -0
package/lib/images.js +264 -0
package/lib/internals.js +49 -0
package/lib/mcp/boundary.js +131 -0
package/lib/mcp/client.js +270 -0
package/lib/mcp/oauth.js +134 -0
package/lib/memory.js +209 -0
package/lib/metrics.js +37 -2
package/lib/payload.js +54 -0
package/lib/permission-rules.js +401 -0
package/lib/permissions.js +100 -10
package/lib/pricing.js +67 -0
package/lib/proc.js +62 -0
package/lib/prompts.js +84 -5
package/lib/sandbox.js +568 -0
package/lib/sdk.js +328 -0
package/lib/secrets.js +211 -0
package/lib/skills.js +223 -0
package/lib/subagents.js +516 -0
package/lib/tool_registry.js +2558 -0
package/lib/tool_specs.js +222 -2
package/lib/tools.js +272 -1020
package/lib/ui/format.js +22 -1
package/lib/ui/input-field.js +16 -7
package/lib/ui/status-bar.js +79 -11
package/lib/ui/theme.js +1 -0
package/lib/ui/web-activity.js +218 -0
package/lib/verify.js +229 -0
package/lib/web-extract.js +213 -0
package/lib/web-summarize.js +68 -0
package/package.json +19 -4
package/scripts/lint.js +57 -0
package/test/agent-loop.test.js +389 -0
package/test/background.test.js +414 -0
package/test/chat.test.js +114 -0
package/test/checkpoints-agent.test.js +181 -0
package/test/checkpoints.test.js +650 -0
package/test/command-registry.test.js +160 -0
package/test/compact.test.js +116 -0
package/test/completion-lazy.test.js +52 -0
package/test/config-merge.test.js +324 -0
package/test/config-quarantine.test.js +128 -0
package/test/config-write-guard-allow-anywhere.test.js +56 -0
package/test/config-write-guard-skip.test.js +46 -0
package/test/config-write-guard.test.js +153 -0
package/test/context-split.test.js +215 -0
package/test/cost-doctor.test.js +142 -0
package/test/custom-commands-chat.test.js +106 -0
package/test/custom-commands.test.js +230 -0
package/test/deny-windows.test.js +120 -0
package/test/deny.test.js +83 -0
package/test/download-allow-anywhere.test.js +66 -0
package/test/download-confine.test.js +153 -0
package/test/executors.test.js +362 -0
package/test/extract-tool-calls.test.js +315 -0
package/test/fetch-url-validation.test.js +219 -0
package/test/fixtures/tool-calls.js +57 -0
package/test/fixtures/web-page.js +91 -0
package/test/git-tools.test.js +384 -0
package/test/grep-glob-serialize.test.js +242 -0
package/test/grep-glob.test.js +268 -0
package/test/harness/README.md +57 -0
package/test/harness/chat-harness.js +142 -0
package/test/harness/memwarn-headless-child.js +65 -0
package/test/harness/mock-llm.js +120 -0
package/test/harness/mock-mcp-server.js +142 -0
package/test/harness/sse-server.js +69 -0
package/test/headless.test.js +203 -0
package/test/history-utils.test.js +88 -0
package/test/hooks-agent.test.js +238 -0
package/test/hooks-verify-sandbox.test.js +232 -0
package/test/hooks.test.js +216 -0
package/test/http-get-user-agent.test.js +142 -0
package/test/images-api.test.js +208 -0
package/test/images.test.js +238 -0
package/test/max-iterations.test.js +216 -0
package/test/mcp-boundary.test.js +57 -0
package/test/mcp-client.test.js +267 -0
package/test/mcp-oauth.test.js +86 -0
package/test/memory-truncation-warning.test.js +222 -0
package/test/memory.test.js +198 -0
package/test/native-dispatch.test.js +356 -0
package/test/output-chokepoint.test.js +188 -0
package/test/path-guards.test.js +134 -0
package/test/payload.test.js +99 -0
package/test/permission-rules-agent.test.js +210 -0
package/test/permission-rules.test.js +297 -0
package/test/permissions.test.js +163 -0
package/test/plan-mode.test.js +167 -0
package/test/read-paginate.test.js +275 -0
package/test/readonly-tools.test.js +177 -0
package/test/result-cap.test.js +233 -0
package/test/sandbox-agent.test.js +147 -0
package/test/sandbox-integration.test.js +216 -0
package/test/sandbox.test.js +408 -0
package/test/sdk.test.js +234 -0
package/test/shell-output-cap.test.js +181 -0
package/test/skills-chat.test.js +110 -0
package/test/skills.test.js +295 -0
package/test/smoke.test.js +68 -0
package/test/status-bar-pause.test.js +164 -0
package/test/stream-parser.test.js +147 -0
package/test/subagents-agent.test.js +178 -0
package/test/subagents.test.js +222 -0
package/test/tool-registry.test.js +85 -0
package/test/trim-budget.test.js +101 -0
package/test/verify-agent.test.js +317 -0
package/test/verify.test.js +141 -0
package/test/web-activity-ordering.test.js +194 -0
package/test/web-activity.test.js +207 -0
package/test/web-data-extraction-guidance.test.js +71 -0
package/test/web-extract.test.js +185 -0
package/test/web-fetch-agent.test.js +291 -0
package/test/web-fetch-mode.test.js +193 -0
package/test/web-search.test.js +380 -0
package/lib/commands.js +0 -1438

package/test/harness/chat-harness.js ADDED Viewed

@@ -0,0 +1,142 @@
+'use strict';
+// Chat-loop smoke harness for cmdChat (Task 1.5, tests-first). Drives the real
+// createCommands().cmdChat() with a fully mocked UI so the interactive chat
+// closure can be characterized WITHOUT a TTY: the mock inputField captures the
+// onSubmit callback, and submit(text) invokes it exactly as a keypress would.
+//
+// Home-based paths (saved sessions, audit log) are redirected to a temp dir
+// before any lib module loads, so a chat session writes nothing real.
+const os = require('node:os');
+const fs = require('node:fs');
+const path = require('node:path');
+const TMP_HOME = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-chat-home-'));
+process.env.HOME = TMP_HOME;
+process.env.USERPROFILE = TMP_HOME;
+const { createCommands } = require('../../lib/commands');
+const tools = require('../../lib/tools');
+const delay = (ms) => new Promise((r) => setTimeout(r, ms));
+// The mock UI handles returned by createUI().
+function makeChatUI() {
+  const chatHistory = {
+    messages: [],
+    addMessage(m) { this.messages.push(m); },
+    clearMessages() { this.messages = []; },
+    clearStreamingContent() {},
+    collapseById() {}, removeById() {}, rerenderById() {},
+    finalizeLastMessage() {}, streamToken() {}, toggleLastExpand() {},
+    texts() { return this.messages.map((m) => (typeof m.content === 'string' ? m.content : '')); },
+    last() { return this.messages[this.messages.length - 1]; },
+    find(re) { return this.texts().find((t) => re.test(t)); },
+  };
+  const statusBar = {
+    states: [],
+    update(...a) { this.states.push(a); },
+    setModel() {}, setContextLimit() {}, updateMetrics() {}, addPendingTokens() {}, onToken() {},
+  };
+  const inputField = {
+    _submit: null, _handlers: {}, _nav: null,
+    onSubmit(cb) { this._submit = cb; },
+    on(ev, cb) { (this._handlers[ev] = this._handlers[ev] || []).push(cb); },
+    removeListener(ev, cb) { const a = this._handlers[ev] || []; const i = a.indexOf(cb); if (i >= 0) a.splice(i, 1); },
+    captureNavigation(h) { this._nav = h; },
+    releaseNavigation() { this._nav = null; },
+    setDisabled() {}, setSearchItems() {}, captureSelect() { return Promise.resolve(null); },
+    async submit(text) { if (!this._submit) throw new Error('onSubmit not registered'); return this._submit(text); },
+    emit(ev, ...args) { for (const cb of (this._handlers[ev] || [])) cb(...args); },
+  };
+  return { chatHistory, statusBar, inputField, layout: null, destroy: () => {}, redrawFixed: () => {} };
+}
+// The `ui` object createCommands destructures (colors + helpers + createUI).
+function makeUI(chatUI) {
+  const ui = { createUI: () => chatUI };
+  for (const k of ['BOLD', 'BG_SELECTED', 'FG_BLUE', 'FG_CYAN', 'FG_DARK', 'FG_GRAY', 'FG_GREEN', 'FG_RED', 'FG_TEAL', 'FG_YELLOW', 'RST', 'DIM']) ui[k] = '';
+  ui.approxTokens = (s) => Math.ceil(((s || '').length) / 4);
+  ui.getCols = () => 80;
+  ui.boxLine = (s) => s;
+  ui.interactiveSelect = async () => null;
+  return ui;
+}
+function makeDeps(chatUI, overrides = {}) {
+  const config = {
+    auth_token: '', default_model: 'test-model', dashboard_model_id: null,
+    dashboard_url: 'http://dash', api_base: 'http://api', models: [],
+    system_prompt_mode: 'system_role', temperature: 0.7,
+    ...(overrides.config || {}),
+  };
+  const getConfig = () => config;
+  const setConfig = (c) => Object.assign(config, c);
+  const calls = { runAgentLoop: [], chatStream: [], shell: [], permissionClear: 0 };
+  const permissionManager = {
+    setUICallbacks() {}, clear() { calls.permissionClear++; }, toggleAll() { return true; },
+    askPermission: async () => true, readonlyBlock: () => null, captureSelect: async () => null, state: {},
+  };
+  const apiClient = {
+    chatStream: async (...a) => { calls.chatStream.push(a); return { content: '', usage: null }; },
+    chatSync: async () => '',
+    dashboardCreateChat: async () => ({ chat: { id: 1 } }),
+    dashboardGetChat: async () => ({ chat: { title: 't' }, messages: [] }),
+    dashboardSaveMessages: async () => ({}),
+    dashboardListChats: async () => ({ chats: [] }),
+    dashboardListModels: async () => ({ models: [] }),
+    dashboardGetModelForCli: async () => ({ model: null }),
+    dashboardWhoAmI: async () => ({ user: null }),
+    dashboardLogout: async () => ({}),
+    estimateTokens: (s) => Math.ceil((s || '').length / 4),
+    getCliLoginStatus: async () => ({ status: 'authorized' }),
+    requestCliLogin: async () => ({ id: 1, hash: 'h', token: 'tok', verification_url: 'http://v' }),
+    setActiveModelProfile: () => {},
+    ...(overrides.apiClient || {}),
+  };
+  const runAgentLoop = overrides.runAgentLoop
+    || (async (messages, model, maxIter, limit, opts) => {
+      calls.runAgentLoop.push({ messages: messages.map((m) => ({ ...m })), model, opts });
+      return { messages, metrics: { summary: () => 'metrics-summary' } };
+    });
+  const readFileContext = overrides.readFileContext || ((f) => `ctx:${JSON.stringify(f)}`);
+  const agentExecShell = overrides.agentExecShell
+    || (async (cmd, o) => { calls.shell.push({ cmd, o }); return { exit_code: 0, stdout: `out:${cmd}`, stderr: '' }; });
+  return {
+    deps: { getConfig, setConfig, permissionManager, ui: makeUI(chatUI), apiClient, runAgentLoop, readFileContext, agentExecShell },
+    config, calls,
+  };
+}
+// Start a chat session. Returns once onSubmit is registered. `submit(text)` runs
+// a turn; `submit('exit')` ends the session (await `done` to confirm teardown).
+async function startChat(overrides = {}) {
+  const chatUI = makeChatUI();
+  const { deps, config, calls } = makeDeps(chatUI, overrides);
+  const commands = createCommands(deps);
+  const done = commands.cmdChat({ model: undefined, ...(overrides.opts || {}) });
+  // Wait for cmdChat's async setup (ensureDefaultModel + resolveTokenLimit) to
+  // register the submit handler.
+  for (let i = 0; i < 200 && !chatUI.inputField._submit; i++) await delay(2);
+  if (!chatUI.inputField._submit) throw new Error('cmdChat did not register onSubmit');
+  return {
+    chatHistory: chatUI.chatHistory,
+    statusBar: chatUI.statusBar,
+    inputField: chatUI.inputField,
+    config,
+    calls,
+    submit: (text) => chatUI.inputField.submit(text),
+    done,
+    cleanup: () => { try { tools.setUIActive(false); } catch {} },
+  };
+}
+module.exports = { startChat };

package/test/harness/memwarn-headless-child.js ADDED Viewed

@@ -0,0 +1,65 @@
+'use strict';
+// Child process for the memory-truncation headless test. Runs cmdCode in json
+// mode with an oversized project AGENTS.md so the PARENT can capture this
+// process's stdout (the JSON envelope) and stderr (the truncation warning)
+// cleanly — running in a child avoids swapping the parent's global
+// process.stdout, which would collide with the node:test TAP reporter.
+const os = require('os');
+const fs = require('fs');
+const path = require('path');
+const ROOT = path.resolve(__dirname, '..', '..'); // project root
+const home = fs.mkdtempSync(path.join(os.tmpdir(), 'memwarn-child-home-'));
+process.env.HOME = home;
+process.env.USERPROFILE = home;
+process.env.SEMALT_API_KEY = 'test-key';
+const repo = fs.mkdtempSync(path.join(os.tmpdir(), 'memwarn-child-repo-'));
+fs.mkdirSync(path.join(repo, '.git'), { recursive: true });
+const { DEFAULT_MEMORY_MAX_BYTES } = require(path.join(ROOT, 'lib', 'memory'));
+fs.writeFileSync(path.join(repo, 'AGENTS.md'), 'Z'.repeat(DEFAULT_MEMORY_MAX_BYTES + 4000));
+process.chdir(repo);
+const ui = require(path.join(ROOT, 'lib', 'ui'));
+const { createApiClient } = require(path.join(ROOT, 'lib', 'api'));
+const { createToolExecutor, extractToolCalls } = require(path.join(ROOT, 'lib', 'tools'));
+const { createPermissionManager } = require(path.join(ROOT, 'lib', 'permissions'));
+const { createAgentRunner } = require(path.join(ROOT, 'lib', 'agent'));
+const { createOneshotCommands } = require(path.join(ROOT, 'lib', 'commands', 'oneshot'));
+const { startMockLLM } = require(path.join(ROOT, 'test', 'harness', 'mock-llm'));
+(async () => {
+  const mock = await startMockLLM();
+  mock.replyWith('All done.');
+  const config = {
+    api_base: mock.base, api_key: 'test-key', auth_token: 'tok', default_model: 'test-model',
+    temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
+    max_iterations: 10, system_prompt_mode: 'system_role', pricing: {},
+  };
+  const getConfig = () => config;
+  const api = createApiClient({ getConfig, saveConfig() {}, ui });
+  const pm = createPermissionManager(ui, { skipPermissions: true });
+  pm.setUICallbacks({ onAddMessage() {}, onShowModal() {}, onCloseModal() {}, onCaptureNavigation: () => () => {} });
+  const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, getConfig);
+  const runner = createAgentRunner({
+    chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
+    describePermission, permissionManager: pm, ui, getConfig,
+  });
+  const shared = {
+    ...ui,
+    writer: { scrollback() {} },
+    getConfig, setConfig() {},
+    runAgentLoop: runner.runAgentLoop,
+    readFileContext: () => '',
+    ensureDefaultModel: async () => {},
+    msgs: require(path.join(ROOT, 'lib', 'ui', 'messages')),
+    dbg: require(path.join(ROOT, 'lib', 'debug')),
+  };
+  const { cmdCode } = createOneshotCommands(shared);
+  await cmdCode({ outputFormat: 'json' }, ['do something']);
+  await mock.close();
+  process.exit(0);
+})().catch((e) => { process.stderr.write('CHILDERR:' + (e && e.stack || e) + '\n'); process.exit(1); });

package/test/harness/mock-llm.js ADDED Viewed

@@ -0,0 +1,120 @@
+'use strict';
+// Scriptable mock LLM server for agent-loop integration tests (Task 1.2).
+// Built on the SSE primitives in ./sse-server. A single server instance serves
+// a FIFO queue of scripted responses — one per inbound POST — so a multi-turn
+// agent loop (or a retry sequence) is driven deterministically with no real
+// network and no real model.
+//
+// Each queued response is either:
+//   * a streamed 200 SSE reply (assistant content and/or native tool_calls), or
+//   * an error (non-200) with an optional JSON body and headers (e.g.
+//     Retry-After) — used to exercise the retry/backoff and 400/413 paths.
+//
+// See ./README.md for the full contract and examples.
+const http = require('http');
+const { sse, DONE } = require('./sse-server');
+// Build the SSE chunk list for a plain assistant message that streams `content`.
+// `content` may be a string (sent as one delta) or an array of strings (sent as
+// successive deltas, to exercise token-by-token handling and mid-stream abort).
+function contentChunks(content, { finish = 'stop', usage = null } = {}) {
+  const parts = Array.isArray(content) ? content : (content ? [content] : []);
+  const chunks = parts.map((p) => sse({ choices: [{ delta: { content: p } }] }));
+  chunks.push(sse({ choices: [{ finish_reason: finish, delta: {} }] }));
+  if (usage) chunks.push(sse({ usage }));
+  chunks.push(DONE);
+  return chunks;
+}
+// Build the SSE chunk list for a native OpenAI tool_calls response.
+function toolCallChunks(name, args, { id = 'call_1' } = {}) {
+  return [
+    sse({ choices: [{ delta: { tool_calls: [{ index: 0, id, type: 'function', function: { name, arguments: JSON.stringify(args) } }] } }] }),
+    sse({ choices: [{ finish_reason: 'tool_calls', delta: {} }] }),
+    DONE,
+  ];
+}
+function startMockLLM() {
+  const queue = [];
+  const requests = [];
+  const server = http.createServer((req, res) => {
+    let body = '';
+    req.setEncoding('utf8');
+    req.on('data', (c) => { body += c; });
+    req.on('end', () => {
+      requests.push({ url: req.url, body, headers: req.headers });
+      const spec = queue.shift();
+      if (!spec) {
+        res.writeHead(500, { 'Content-Type': 'application/json' });
+        res.end('{"error":"mock-llm: response queue empty"}');
+        return;
+      }
+      serve(res, spec);
+    });
+  });
+  function serve(res, spec) {
+    const status = spec.status || 200;
+    const isSse = status === 200;
+    res.writeHead(status, {
+      'Content-Type': isSse ? 'text/event-stream' : 'application/json',
+      ...(spec.headers || {}),
+    });
+    if (!isSse) {
+      res.end(spec.body != null ? spec.body : '{}');
+      return;
+    }
+    const chunks = spec.chunks || [];
+    const gap = spec.gapMs == null ? 2 : spec.gapMs;
+    let i = 0;
+    const next = () => {
+      if (res.writableEnded) return;
+      if (i >= chunks.length) { res.end(); return; }
+      res.write(chunks[i++]);
+      if (gap > 0) setTimeout(next, gap); else next();
+    };
+    next();
+  }
+  return new Promise((resolve) => {
+    server.listen(0, '127.0.0.1', () => {
+      const { port } = server.address();
+      resolve({
+        base: `http://127.0.0.1:${port}`,
+        port,
+        requests,
+        // Enqueue a streamed assistant message (string or string[] of deltas).
+        replyWith(content, opts = {}) {
+          queue.push({ status: 200, chunks: contentChunks(content, opts), gapMs: opts.gapMs });
+          return this;
+        },
+        // Enqueue a native tool_calls response.
+        replyWithToolCall(name, args, opts = {}) {
+          queue.push({ status: 200, chunks: toolCallChunks(name, args, opts), gapMs: opts.gapMs });
+          return this;
+        },
+        // Enqueue raw SSE chunks (full control).
+        streamChunks(chunks, opts = {}) {
+          queue.push({ status: 200, chunks, gapMs: opts.gapMs });
+          return this;
+        },
+        // Enqueue an error (non-200) response.
+        failWith(status, { body, headers } = {}) {
+          queue.push({ status, body, headers });
+          return this;
+        },
+        pending() { return queue.length; },
+        requestCount() { return requests.length; },
+        close() { return new Promise((r) => server.close(r)); },
+      });
+    });
+  });
+}
+module.exports = { startMockLLM, contentChunks, toolCallChunks };

package/test/harness/mock-mcp-server.js ADDED Viewed

@@ -0,0 +1,142 @@
+#!/usr/bin/env node
+'use strict';
+// Mock MCP server over stdio (Task 3.3).
+// ----------------------------------------------------------------------------
+// A tiny, dependency-free, spec-correct MCP server the tests spawn as a local
+// subprocess (no network). It speaks newline-delimited JSON-RPC 2.0 on
+// stdin/stdout — exactly the framing `StdioClientTransport` reads — so the REAL
+// MCP SDK client connects to it and the whole discovery/dispatch path is
+// exercised against a deterministic peer.
+//
+// Implemented methods: `initialize`, `notifications/initialized` (ignored),
+// `tools/list`, `tools/call`, `ping`. It deliberately does NOT depend on the
+// SDK server classes (which require zod schemas) — the raw protocol is ~60
+// lines and keeps the test self-contained and reproducible.
+//
+// Behavior knobs (env vars, so a test can script edge cases):
+//   MOCK_MCP_NAME           server name advertised in initialize (default "mock")
+//   MOCK_MCP_EXIT_ON_START  if set, exit(1) immediately — simulates a server
+//                           that dies on launch (graceful-degradation test).
+//
+// Tools exposed:
+//   echo  { text }       → returns the text back (untrusted-content payloads
+//                          can be injected here to test the delimiter).
+//   add   { a, b }       → returns a+b.
+//   boom  {}             → returns an MCP tool-level error (isError: true).
+const SERVER_NAME = process.env.MOCK_MCP_NAME || 'mock';
+const TOOLS = [
+  {
+    name: 'echo',
+    description: 'Echo the provided text back verbatim.',
+    inputSchema: {
+      type: 'object',
+      properties: { text: { type: 'string', description: 'Text to echo' } },
+      required: ['text'],
+    },
+  },
+  {
+    name: 'add',
+    description: 'Add two numbers and return the sum.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        a: { type: 'number', description: 'First addend' },
+        b: { type: 'number', description: 'Second addend' },
+      },
+      required: ['a', 'b'],
+    },
+  },
+  {
+    name: 'boom',
+    description: 'Always reports a tool-level error.',
+    inputSchema: { type: 'object', properties: {} },
+  },
+];
+function send(msg) {
+  process.stdout.write(JSON.stringify(msg) + '\n');
+}
+function ok(id, result) {
+  send({ jsonrpc: '2.0', id, result });
+}
+function err(id, code, message) {
+  send({ jsonrpc: '2.0', id, error: { code, message } });
+}
+function callTool(name, args) {
+  if (name === 'echo') {
+    return { content: [{ type: 'text', text: String((args && args.text) ?? '') }] };
+  }
+  if (name === 'add') {
+    const sum = Number((args && args.a) || 0) + Number((args && args.b) || 0);
+    return { content: [{ type: 'text', text: String(sum) }] };
+  }
+  if (name === 'boom') {
+    return { content: [{ type: 'text', text: 'the boom tool failed as designed' }], isError: true };
+  }
+  return null; // unknown tool
+}
+function handle(msg) {
+  const { id, method, params } = msg;
+  if (method === 'initialize') {
+    // Echo the client's requested protocol version — by definition one the
+    // client supports — and advertise the tools capability.
+    ok(id, {
+      protocolVersion: (params && params.protocolVersion) || '2025-06-18',
+      capabilities: { tools: {} },
+      serverInfo: { name: SERVER_NAME, version: '1.0.0' },
+    });
+    return;
+  }
+  if (method === 'notifications/initialized') return; // notification, no reply
+  if (method === 'ping') { ok(id, {}); return; }
+  if (method === 'tools/list') { ok(id, { tools: TOOLS }); return; }
+  if (method === 'tools/call') {
+    const name = params && params.name;
+    const result = callTool(name, params && params.arguments);
+    if (!result) { err(id, -32602, `Unknown tool: ${name}`); return; }
+    ok(id, result);
+    return;
+  }
+  if (id !== undefined) err(id, -32601, `Method not found: ${method}`);
+}
+function run() {
+  // Behavior knob: simulate a server that dies on launch (degradation test).
+  if (process.env.MOCK_MCP_EXIT_ON_START) {
+    process.stderr.write('mock-mcp-server: simulated startup failure\n');
+    process.exit(1);
+  }
+  let buffer = '';
+  process.stdin.setEncoding('utf8');
+  process.stdin.on('data', (chunk) => {
+    buffer += chunk;
+    let nl;
+    while ((nl = buffer.indexOf('\n')) !== -1) {
+      const line = buffer.slice(0, nl).trim();
+      buffer = buffer.slice(nl + 1);
+      if (!line) continue;
+      let msg;
+      try { msg = JSON.parse(line); } catch { continue; }
+      try { handle(msg); } catch (e) {
+        if (msg && msg.id !== undefined) err(msg.id, -32603, e.message);
+      }
+    }
+  });
+  process.stdin.on('end', () => process.exit(0));
+}
+// Only attach the stdin server loop when spawned directly (as the MCP client
+// does). The Node test runner discovers and EXECUTES every file under test/ —
+// including this one — each in a child process where `require.main === module`
+// is also true. It sets NODE_TEST_CONTEXT in that child, so we use its ABSENCE
+// (plus require.main) to mean "spawned as a real MCP server"; otherwise this
+// stdin loop would hang the test runner forever waiting on input.
+if (require.main === module && !process.env.NODE_TEST_CONTEXT) run();

package/test/harness/sse-server.js ADDED Viewed

@@ -0,0 +1,69 @@
+'use strict';
+// Minimal scriptable SSE server for streaming-parser tests (Task 1.1).
+// Task 1.2 extends this into the full mock-LLM harness (queues, status codes,
+// Retry-After, delays); for now it serves one scripted response per request.
+//
+// Usage:
+//   const srv = await startSseServer({ chunks: ['data: {...}\n', 'data: [DONE]\n'] });
+//   // point config.api_base at srv.base, make the request, then:
+//   await srv.close();
+const http = require('http');
+// opts:
+//   chunks   string[] — written sequentially (with a tiny gap) so the client's
+//            cross-chunk line buffering is genuinely exercised. A single string
+//            is also accepted and sent as one chunk.
+//   status   HTTP status code (default 200).
+//   headers  extra response headers (merged over the SSE defaults).
+//   gapMs    delay between chunks (default 4ms).
+function startSseServer(opts = {}) {
+  const status = opts.status || 200;
+  const gapMs = opts.gapMs == null ? 4 : opts.gapMs;
+  const chunks = Array.isArray(opts.chunks)
+    ? opts.chunks
+    : [opts.body != null ? opts.body : ''];
+  const server = http.createServer((req, res) => {
+    // Drain the request body before responding.
+    req.resume();
+    req.on('end', () => {
+      res.writeHead(status, {
+        'Content-Type': 'text/event-stream',
+        'Cache-Control': 'no-cache',
+        Connection: 'keep-alive',
+        ...(opts.headers || {}),
+      });
+      let i = 0;
+      const writeNext = () => {
+        if (i >= chunks.length) { res.end(); return; }
+        res.write(chunks[i++]);
+        if (gapMs > 0) setTimeout(writeNext, gapMs);
+        else writeNext();
+      };
+      writeNext();
+    });
+  });
+  return new Promise((resolve) => {
+    server.listen(0, '127.0.0.1', () => {
+      const { port } = server.address();
+      resolve({
+        server,
+        port,
+        base: `http://127.0.0.1:${port}`,
+        close: () => new Promise((r) => server.close(r)),
+      });
+    });
+  });
+}
+// Build a `data: {json}\n` SSE line.
+function sse(obj) {
+  return `data: ${JSON.stringify(obj)}\n`;
+}
+const DONE = 'data: [DONE]\n';
+module.exports = { startSseServer, sse, DONE };