@semalt-ai/code 1.8.5 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/.claude/settings.local.json +6 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/CLAUDE.md +1584 -26
  4. package/README.md +147 -3
  5. package/examples/embed.js +74 -0
  6. package/index.js +251 -10
  7. package/lib/agent.js +711 -104
  8. package/lib/api.js +213 -49
  9. package/lib/args.js +74 -2
  10. package/lib/audit.js +23 -1
  11. package/lib/background.js +584 -0
  12. package/lib/checkpoints.js +757 -0
  13. package/lib/commands/auth.js +94 -0
  14. package/lib/commands/chat-session.js +306 -0
  15. package/lib/commands/chat-slash.js +399 -0
  16. package/lib/commands/chat-turn.js +446 -0
  17. package/lib/commands/chat.js +403 -0
  18. package/lib/commands/custom.js +157 -0
  19. package/lib/commands/history-utils.js +66 -0
  20. package/lib/commands/index.js +268 -0
  21. package/lib/commands/mcp.js +113 -0
  22. package/lib/commands/oneshot.js +193 -0
  23. package/lib/commands/registry.js +269 -0
  24. package/lib/commands/tasks.js +89 -0
  25. package/lib/compact.js +87 -0
  26. package/lib/config.js +333 -11
  27. package/lib/constants.js +372 -3
  28. package/lib/deny.js +199 -0
  29. package/lib/doctor.js +160 -0
  30. package/lib/headless.js +167 -0
  31. package/lib/hooks.js +286 -0
  32. package/lib/images.js +264 -0
  33. package/lib/internals.js +49 -0
  34. package/lib/mcp/boundary.js +131 -0
  35. package/lib/mcp/client.js +270 -0
  36. package/lib/mcp/oauth.js +134 -0
  37. package/lib/memory.js +209 -0
  38. package/lib/metrics.js +37 -2
  39. package/lib/payload.js +54 -0
  40. package/lib/permission-rules.js +401 -0
  41. package/lib/permissions.js +100 -10
  42. package/lib/pricing.js +67 -0
  43. package/lib/proc.js +62 -0
  44. package/lib/prompts.js +84 -5
  45. package/lib/sandbox.js +568 -0
  46. package/lib/sdk.js +328 -0
  47. package/lib/secrets.js +211 -0
  48. package/lib/skills.js +223 -0
  49. package/lib/subagents.js +516 -0
  50. package/lib/tool_registry.js +2558 -0
  51. package/lib/tool_specs.js +222 -2
  52. package/lib/tools.js +272 -1020
  53. package/lib/ui/format.js +22 -1
  54. package/lib/ui/input-field.js +16 -7
  55. package/lib/ui/status-bar.js +79 -11
  56. package/lib/ui/theme.js +1 -0
  57. package/lib/ui/web-activity.js +218 -0
  58. package/lib/verify.js +229 -0
  59. package/lib/web-extract.js +213 -0
  60. package/lib/web-summarize.js +68 -0
  61. package/package.json +19 -4
  62. package/scripts/lint.js +57 -0
  63. package/test/agent-loop.test.js +389 -0
  64. package/test/background.test.js +414 -0
  65. package/test/chat.test.js +114 -0
  66. package/test/checkpoints-agent.test.js +181 -0
  67. package/test/checkpoints.test.js +650 -0
  68. package/test/command-registry.test.js +160 -0
  69. package/test/compact.test.js +116 -0
  70. package/test/completion-lazy.test.js +52 -0
  71. package/test/config-merge.test.js +324 -0
  72. package/test/config-quarantine.test.js +128 -0
  73. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  74. package/test/config-write-guard-skip.test.js +46 -0
  75. package/test/config-write-guard.test.js +153 -0
  76. package/test/context-split.test.js +215 -0
  77. package/test/cost-doctor.test.js +142 -0
  78. package/test/custom-commands-chat.test.js +106 -0
  79. package/test/custom-commands.test.js +230 -0
  80. package/test/deny-windows.test.js +120 -0
  81. package/test/deny.test.js +83 -0
  82. package/test/download-allow-anywhere.test.js +66 -0
  83. package/test/download-confine.test.js +153 -0
  84. package/test/executors.test.js +362 -0
  85. package/test/extract-tool-calls.test.js +315 -0
  86. package/test/fetch-url-validation.test.js +219 -0
  87. package/test/fixtures/tool-calls.js +57 -0
  88. package/test/fixtures/web-page.js +91 -0
  89. package/test/git-tools.test.js +384 -0
  90. package/test/grep-glob-serialize.test.js +242 -0
  91. package/test/grep-glob.test.js +268 -0
  92. package/test/harness/README.md +57 -0
  93. package/test/harness/chat-harness.js +142 -0
  94. package/test/harness/memwarn-headless-child.js +65 -0
  95. package/test/harness/mock-llm.js +120 -0
  96. package/test/harness/mock-mcp-server.js +142 -0
  97. package/test/harness/sse-server.js +69 -0
  98. package/test/headless.test.js +203 -0
  99. package/test/history-utils.test.js +88 -0
  100. package/test/hooks-agent.test.js +238 -0
  101. package/test/hooks-verify-sandbox.test.js +232 -0
  102. package/test/hooks.test.js +216 -0
  103. package/test/http-get-user-agent.test.js +142 -0
  104. package/test/images-api.test.js +208 -0
  105. package/test/images.test.js +238 -0
  106. package/test/max-iterations.test.js +216 -0
  107. package/test/mcp-boundary.test.js +57 -0
  108. package/test/mcp-client.test.js +267 -0
  109. package/test/mcp-oauth.test.js +86 -0
  110. package/test/memory-truncation-warning.test.js +222 -0
  111. package/test/memory.test.js +198 -0
  112. package/test/native-dispatch.test.js +356 -0
  113. package/test/output-chokepoint.test.js +188 -0
  114. package/test/path-guards.test.js +134 -0
  115. package/test/payload.test.js +99 -0
  116. package/test/permission-rules-agent.test.js +210 -0
  117. package/test/permission-rules.test.js +297 -0
  118. package/test/permissions.test.js +163 -0
  119. package/test/plan-mode.test.js +167 -0
  120. package/test/read-paginate.test.js +275 -0
  121. package/test/readonly-tools.test.js +177 -0
  122. package/test/result-cap.test.js +233 -0
  123. package/test/sandbox-agent.test.js +147 -0
  124. package/test/sandbox-integration.test.js +216 -0
  125. package/test/sandbox.test.js +408 -0
  126. package/test/sdk.test.js +234 -0
  127. package/test/shell-output-cap.test.js +181 -0
  128. package/test/skills-chat.test.js +110 -0
  129. package/test/skills.test.js +295 -0
  130. package/test/smoke.test.js +68 -0
  131. package/test/status-bar-pause.test.js +164 -0
  132. package/test/stream-parser.test.js +147 -0
  133. package/test/subagents-agent.test.js +178 -0
  134. package/test/subagents.test.js +222 -0
  135. package/test/tool-registry.test.js +85 -0
  136. package/test/trim-budget.test.js +101 -0
  137. package/test/verify-agent.test.js +317 -0
  138. package/test/verify.test.js +141 -0
  139. package/test/web-activity-ordering.test.js +194 -0
  140. package/test/web-activity.test.js +207 -0
  141. package/test/web-data-extraction-guidance.test.js +71 -0
  142. package/test/web-extract.test.js +185 -0
  143. package/test/web-fetch-agent.test.js +291 -0
  144. package/test/web-fetch-mode.test.js +193 -0
  145. package/test/web-search.test.js +380 -0
  146. package/lib/commands.js +0 -1438
@@ -0,0 +1,142 @@
1
+ 'use strict';
2
+
3
+ // Chat-loop smoke harness for cmdChat (Task 1.5, tests-first). Drives the real
4
+ // createCommands().cmdChat() with a fully mocked UI so the interactive chat
5
+ // closure can be characterized WITHOUT a TTY: the mock inputField captures the
6
+ // onSubmit callback, and submit(text) invokes it exactly as a keypress would.
7
+ //
8
+ // Home-based paths (saved sessions, audit log) are redirected to a temp dir
9
+ // before any lib module loads, so a chat session writes nothing real.
10
+
11
+ const os = require('node:os');
12
+ const fs = require('node:fs');
13
+ const path = require('node:path');
14
+
15
+ const TMP_HOME = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-chat-home-'));
16
+ process.env.HOME = TMP_HOME;
17
+ process.env.USERPROFILE = TMP_HOME;
18
+
19
+ const { createCommands } = require('../../lib/commands');
20
+ const tools = require('../../lib/tools');
21
+
22
+ const delay = (ms) => new Promise((r) => setTimeout(r, ms));
23
+
24
+ // The mock UI handles returned by createUI().
25
+ function makeChatUI() {
26
+ const chatHistory = {
27
+ messages: [],
28
+ addMessage(m) { this.messages.push(m); },
29
+ clearMessages() { this.messages = []; },
30
+ clearStreamingContent() {},
31
+ collapseById() {}, removeById() {}, rerenderById() {},
32
+ finalizeLastMessage() {}, streamToken() {}, toggleLastExpand() {},
33
+ texts() { return this.messages.map((m) => (typeof m.content === 'string' ? m.content : '')); },
34
+ last() { return this.messages[this.messages.length - 1]; },
35
+ find(re) { return this.texts().find((t) => re.test(t)); },
36
+ };
37
+ const statusBar = {
38
+ states: [],
39
+ update(...a) { this.states.push(a); },
40
+ setModel() {}, setContextLimit() {}, updateMetrics() {}, addPendingTokens() {}, onToken() {},
41
+ };
42
+ const inputField = {
43
+ _submit: null, _handlers: {}, _nav: null,
44
+ onSubmit(cb) { this._submit = cb; },
45
+ on(ev, cb) { (this._handlers[ev] = this._handlers[ev] || []).push(cb); },
46
+ removeListener(ev, cb) { const a = this._handlers[ev] || []; const i = a.indexOf(cb); if (i >= 0) a.splice(i, 1); },
47
+ captureNavigation(h) { this._nav = h; },
48
+ releaseNavigation() { this._nav = null; },
49
+ setDisabled() {}, setSearchItems() {}, captureSelect() { return Promise.resolve(null); },
50
+ async submit(text) { if (!this._submit) throw new Error('onSubmit not registered'); return this._submit(text); },
51
+ emit(ev, ...args) { for (const cb of (this._handlers[ev] || [])) cb(...args); },
52
+ };
53
+ return { chatHistory, statusBar, inputField, layout: null, destroy: () => {}, redrawFixed: () => {} };
54
+ }
55
+
56
+ // The `ui` object createCommands destructures (colors + helpers + createUI).
57
+ function makeUI(chatUI) {
58
+ const ui = { createUI: () => chatUI };
59
+ for (const k of ['BOLD', 'BG_SELECTED', 'FG_BLUE', 'FG_CYAN', 'FG_DARK', 'FG_GRAY', 'FG_GREEN', 'FG_RED', 'FG_TEAL', 'FG_YELLOW', 'RST', 'DIM']) ui[k] = '';
60
+ ui.approxTokens = (s) => Math.ceil(((s || '').length) / 4);
61
+ ui.getCols = () => 80;
62
+ ui.boxLine = (s) => s;
63
+ ui.interactiveSelect = async () => null;
64
+ return ui;
65
+ }
66
+
67
+ function makeDeps(chatUI, overrides = {}) {
68
+ const config = {
69
+ auth_token: '', default_model: 'test-model', dashboard_model_id: null,
70
+ dashboard_url: 'http://dash', api_base: 'http://api', models: [],
71
+ system_prompt_mode: 'system_role', temperature: 0.7,
72
+ ...(overrides.config || {}),
73
+ };
74
+ const getConfig = () => config;
75
+ const setConfig = (c) => Object.assign(config, c);
76
+
77
+ const calls = { runAgentLoop: [], chatStream: [], shell: [], permissionClear: 0 };
78
+
79
+ const permissionManager = {
80
+ setUICallbacks() {}, clear() { calls.permissionClear++; }, toggleAll() { return true; },
81
+ askPermission: async () => true, readonlyBlock: () => null, captureSelect: async () => null, state: {},
82
+ };
83
+
84
+ const apiClient = {
85
+ chatStream: async (...a) => { calls.chatStream.push(a); return { content: '', usage: null }; },
86
+ chatSync: async () => '',
87
+ dashboardCreateChat: async () => ({ chat: { id: 1 } }),
88
+ dashboardGetChat: async () => ({ chat: { title: 't' }, messages: [] }),
89
+ dashboardSaveMessages: async () => ({}),
90
+ dashboardListChats: async () => ({ chats: [] }),
91
+ dashboardListModels: async () => ({ models: [] }),
92
+ dashboardGetModelForCli: async () => ({ model: null }),
93
+ dashboardWhoAmI: async () => ({ user: null }),
94
+ dashboardLogout: async () => ({}),
95
+ estimateTokens: (s) => Math.ceil((s || '').length / 4),
96
+ getCliLoginStatus: async () => ({ status: 'authorized' }),
97
+ requestCliLogin: async () => ({ id: 1, hash: 'h', token: 'tok', verification_url: 'http://v' }),
98
+ setActiveModelProfile: () => {},
99
+ ...(overrides.apiClient || {}),
100
+ };
101
+
102
+ const runAgentLoop = overrides.runAgentLoop
103
+ || (async (messages, model, maxIter, limit, opts) => {
104
+ calls.runAgentLoop.push({ messages: messages.map((m) => ({ ...m })), model, opts });
105
+ return { messages, metrics: { summary: () => 'metrics-summary' } };
106
+ });
107
+ const readFileContext = overrides.readFileContext || ((f) => `ctx:${JSON.stringify(f)}`);
108
+ const agentExecShell = overrides.agentExecShell
109
+ || (async (cmd, o) => { calls.shell.push({ cmd, o }); return { exit_code: 0, stdout: `out:${cmd}`, stderr: '' }; });
110
+
111
+ return {
112
+ deps: { getConfig, setConfig, permissionManager, ui: makeUI(chatUI), apiClient, runAgentLoop, readFileContext, agentExecShell },
113
+ config, calls,
114
+ };
115
+ }
116
+
117
+ // Start a chat session. Returns once onSubmit is registered. `submit(text)` runs
118
+ // a turn; `submit('exit')` ends the session (await `done` to confirm teardown).
119
+ async function startChat(overrides = {}) {
120
+ const chatUI = makeChatUI();
121
+ const { deps, config, calls } = makeDeps(chatUI, overrides);
122
+ const commands = createCommands(deps);
123
+ const done = commands.cmdChat({ model: undefined, ...(overrides.opts || {}) });
124
+
125
+ // Wait for cmdChat's async setup (ensureDefaultModel + resolveTokenLimit) to
126
+ // register the submit handler.
127
+ for (let i = 0; i < 200 && !chatUI.inputField._submit; i++) await delay(2);
128
+ if (!chatUI.inputField._submit) throw new Error('cmdChat did not register onSubmit');
129
+
130
+ return {
131
+ chatHistory: chatUI.chatHistory,
132
+ statusBar: chatUI.statusBar,
133
+ inputField: chatUI.inputField,
134
+ config,
135
+ calls,
136
+ submit: (text) => chatUI.inputField.submit(text),
137
+ done,
138
+ cleanup: () => { try { tools.setUIActive(false); } catch {} },
139
+ };
140
+ }
141
+
142
+ module.exports = { startChat };
@@ -0,0 +1,65 @@
1
+ 'use strict';
2
+
3
+ // Child process for the memory-truncation headless test. Runs cmdCode in json
4
+ // mode with an oversized project AGENTS.md so the PARENT can capture this
5
+ // process's stdout (the JSON envelope) and stderr (the truncation warning)
6
+ // cleanly — running in a child avoids swapping the parent's global
7
+ // process.stdout, which would collide with the node:test TAP reporter.
8
+
9
+ const os = require('os');
10
+ const fs = require('fs');
11
+ const path = require('path');
12
+
13
+ const ROOT = path.resolve(__dirname, '..', '..'); // project root
14
+
15
+ const home = fs.mkdtempSync(path.join(os.tmpdir(), 'memwarn-child-home-'));
16
+ process.env.HOME = home;
17
+ process.env.USERPROFILE = home;
18
+ process.env.SEMALT_API_KEY = 'test-key';
19
+
20
+ const repo = fs.mkdtempSync(path.join(os.tmpdir(), 'memwarn-child-repo-'));
21
+ fs.mkdirSync(path.join(repo, '.git'), { recursive: true });
22
+ const { DEFAULT_MEMORY_MAX_BYTES } = require(path.join(ROOT, 'lib', 'memory'));
23
+ fs.writeFileSync(path.join(repo, 'AGENTS.md'), 'Z'.repeat(DEFAULT_MEMORY_MAX_BYTES + 4000));
24
+ process.chdir(repo);
25
+
26
+ const ui = require(path.join(ROOT, 'lib', 'ui'));
27
+ const { createApiClient } = require(path.join(ROOT, 'lib', 'api'));
28
+ const { createToolExecutor, extractToolCalls } = require(path.join(ROOT, 'lib', 'tools'));
29
+ const { createPermissionManager } = require(path.join(ROOT, 'lib', 'permissions'));
30
+ const { createAgentRunner } = require(path.join(ROOT, 'lib', 'agent'));
31
+ const { createOneshotCommands } = require(path.join(ROOT, 'lib', 'commands', 'oneshot'));
32
+ const { startMockLLM } = require(path.join(ROOT, 'test', 'harness', 'mock-llm'));
33
+
34
+ (async () => {
35
+ const mock = await startMockLLM();
36
+ mock.replyWith('All done.');
37
+ const config = {
38
+ api_base: mock.base, api_key: 'test-key', auth_token: 'tok', default_model: 'test-model',
39
+ temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
40
+ max_iterations: 10, system_prompt_mode: 'system_role', pricing: {},
41
+ };
42
+ const getConfig = () => config;
43
+ const api = createApiClient({ getConfig, saveConfig() {}, ui });
44
+ const pm = createPermissionManager(ui, { skipPermissions: true });
45
+ pm.setUICallbacks({ onAddMessage() {}, onShowModal() {}, onCloseModal() {}, onCaptureNavigation: () => () => {} });
46
+ const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, getConfig);
47
+ const runner = createAgentRunner({
48
+ chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
49
+ describePermission, permissionManager: pm, ui, getConfig,
50
+ });
51
+ const shared = {
52
+ ...ui,
53
+ writer: { scrollback() {} },
54
+ getConfig, setConfig() {},
55
+ runAgentLoop: runner.runAgentLoop,
56
+ readFileContext: () => '',
57
+ ensureDefaultModel: async () => {},
58
+ msgs: require(path.join(ROOT, 'lib', 'ui', 'messages')),
59
+ dbg: require(path.join(ROOT, 'lib', 'debug')),
60
+ };
61
+ const { cmdCode } = createOneshotCommands(shared);
62
+ await cmdCode({ outputFormat: 'json' }, ['do something']);
63
+ await mock.close();
64
+ process.exit(0);
65
+ })().catch((e) => { process.stderr.write('CHILDERR:' + (e && e.stack || e) + '\n'); process.exit(1); });
@@ -0,0 +1,120 @@
1
+ 'use strict';
2
+
3
+ // Scriptable mock LLM server for agent-loop integration tests (Task 1.2).
4
+ // Built on the SSE primitives in ./sse-server. A single server instance serves
5
+ // a FIFO queue of scripted responses — one per inbound POST — so a multi-turn
6
+ // agent loop (or a retry sequence) is driven deterministically with no real
7
+ // network and no real model.
8
+ //
9
+ // Each queued response is either:
10
+ // * a streamed 200 SSE reply (assistant content and/or native tool_calls), or
11
+ // * an error (non-200) with an optional JSON body and headers (e.g.
12
+ // Retry-After) — used to exercise the retry/backoff and 400/413 paths.
13
+ //
14
+ // See ./README.md for the full contract and examples.
15
+
16
+ const http = require('http');
17
+ const { sse, DONE } = require('./sse-server');
18
+
19
+ // Build the SSE chunk list for a plain assistant message that streams `content`.
20
+ // `content` may be a string (sent as one delta) or an array of strings (sent as
21
+ // successive deltas, to exercise token-by-token handling and mid-stream abort).
22
+ function contentChunks(content, { finish = 'stop', usage = null } = {}) {
23
+ const parts = Array.isArray(content) ? content : (content ? [content] : []);
24
+ const chunks = parts.map((p) => sse({ choices: [{ delta: { content: p } }] }));
25
+ chunks.push(sse({ choices: [{ finish_reason: finish, delta: {} }] }));
26
+ if (usage) chunks.push(sse({ usage }));
27
+ chunks.push(DONE);
28
+ return chunks;
29
+ }
30
+
31
+ // Build the SSE chunk list for a native OpenAI tool_calls response.
32
+ function toolCallChunks(name, args, { id = 'call_1' } = {}) {
33
+ return [
34
+ sse({ choices: [{ delta: { tool_calls: [{ index: 0, id, type: 'function', function: { name, arguments: JSON.stringify(args) } }] } }] }),
35
+ sse({ choices: [{ finish_reason: 'tool_calls', delta: {} }] }),
36
+ DONE,
37
+ ];
38
+ }
39
+
40
+ function startMockLLM() {
41
+ const queue = [];
42
+ const requests = [];
43
+
44
+ const server = http.createServer((req, res) => {
45
+ let body = '';
46
+ req.setEncoding('utf8');
47
+ req.on('data', (c) => { body += c; });
48
+ req.on('end', () => {
49
+ requests.push({ url: req.url, body, headers: req.headers });
50
+ const spec = queue.shift();
51
+ if (!spec) {
52
+ res.writeHead(500, { 'Content-Type': 'application/json' });
53
+ res.end('{"error":"mock-llm: response queue empty"}');
54
+ return;
55
+ }
56
+ serve(res, spec);
57
+ });
58
+ });
59
+
60
+ function serve(res, spec) {
61
+ const status = spec.status || 200;
62
+ const isSse = status === 200;
63
+ res.writeHead(status, {
64
+ 'Content-Type': isSse ? 'text/event-stream' : 'application/json',
65
+ ...(spec.headers || {}),
66
+ });
67
+ if (!isSse) {
68
+ res.end(spec.body != null ? spec.body : '{}');
69
+ return;
70
+ }
71
+ const chunks = spec.chunks || [];
72
+ const gap = spec.gapMs == null ? 2 : spec.gapMs;
73
+ let i = 0;
74
+ const next = () => {
75
+ if (res.writableEnded) return;
76
+ if (i >= chunks.length) { res.end(); return; }
77
+ res.write(chunks[i++]);
78
+ if (gap > 0) setTimeout(next, gap); else next();
79
+ };
80
+ next();
81
+ }
82
+
83
+ return new Promise((resolve) => {
84
+ server.listen(0, '127.0.0.1', () => {
85
+ const { port } = server.address();
86
+ resolve({
87
+ base: `http://127.0.0.1:${port}`,
88
+ port,
89
+ requests,
90
+
91
+ // Enqueue a streamed assistant message (string or string[] of deltas).
92
+ replyWith(content, opts = {}) {
93
+ queue.push({ status: 200, chunks: contentChunks(content, opts), gapMs: opts.gapMs });
94
+ return this;
95
+ },
96
+ // Enqueue a native tool_calls response.
97
+ replyWithToolCall(name, args, opts = {}) {
98
+ queue.push({ status: 200, chunks: toolCallChunks(name, args, opts), gapMs: opts.gapMs });
99
+ return this;
100
+ },
101
+ // Enqueue raw SSE chunks (full control).
102
+ streamChunks(chunks, opts = {}) {
103
+ queue.push({ status: 200, chunks, gapMs: opts.gapMs });
104
+ return this;
105
+ },
106
+ // Enqueue an error (non-200) response.
107
+ failWith(status, { body, headers } = {}) {
108
+ queue.push({ status, body, headers });
109
+ return this;
110
+ },
111
+
112
+ pending() { return queue.length; },
113
+ requestCount() { return requests.length; },
114
+ close() { return new Promise((r) => server.close(r)); },
115
+ });
116
+ });
117
+ });
118
+ }
119
+
120
+ module.exports = { startMockLLM, contentChunks, toolCallChunks };
@@ -0,0 +1,142 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ // Mock MCP server over stdio (Task 3.3).
5
+ // ----------------------------------------------------------------------------
6
+ // A tiny, dependency-free, spec-correct MCP server the tests spawn as a local
7
+ // subprocess (no network). It speaks newline-delimited JSON-RPC 2.0 on
8
+ // stdin/stdout — exactly the framing `StdioClientTransport` reads — so the REAL
9
+ // MCP SDK client connects to it and the whole discovery/dispatch path is
10
+ // exercised against a deterministic peer.
11
+ //
12
+ // Implemented methods: `initialize`, `notifications/initialized` (ignored),
13
+ // `tools/list`, `tools/call`, `ping`. It deliberately does NOT depend on the
14
+ // SDK server classes (which require zod schemas) — the raw protocol is ~60
15
+ // lines and keeps the test self-contained and reproducible.
16
+ //
17
+ // Behavior knobs (env vars, so a test can script edge cases):
18
+ // MOCK_MCP_NAME server name advertised in initialize (default "mock")
19
+ // MOCK_MCP_EXIT_ON_START if set, exit(1) immediately — simulates a server
20
+ // that dies on launch (graceful-degradation test).
21
+ //
22
+ // Tools exposed:
23
+ // echo { text } → returns the text back (untrusted-content payloads
24
+ // can be injected here to test the delimiter).
25
+ // add { a, b } → returns a+b.
26
+ // boom {} → returns an MCP tool-level error (isError: true).
27
+
28
+ const SERVER_NAME = process.env.MOCK_MCP_NAME || 'mock';
29
+
30
+ const TOOLS = [
31
+ {
32
+ name: 'echo',
33
+ description: 'Echo the provided text back verbatim.',
34
+ inputSchema: {
35
+ type: 'object',
36
+ properties: { text: { type: 'string', description: 'Text to echo' } },
37
+ required: ['text'],
38
+ },
39
+ },
40
+ {
41
+ name: 'add',
42
+ description: 'Add two numbers and return the sum.',
43
+ inputSchema: {
44
+ type: 'object',
45
+ properties: {
46
+ a: { type: 'number', description: 'First addend' },
47
+ b: { type: 'number', description: 'Second addend' },
48
+ },
49
+ required: ['a', 'b'],
50
+ },
51
+ },
52
+ {
53
+ name: 'boom',
54
+ description: 'Always reports a tool-level error.',
55
+ inputSchema: { type: 'object', properties: {} },
56
+ },
57
+ ];
58
+
59
+ function send(msg) {
60
+ process.stdout.write(JSON.stringify(msg) + '\n');
61
+ }
62
+
63
+ function ok(id, result) {
64
+ send({ jsonrpc: '2.0', id, result });
65
+ }
66
+
67
+ function err(id, code, message) {
68
+ send({ jsonrpc: '2.0', id, error: { code, message } });
69
+ }
70
+
71
+ function callTool(name, args) {
72
+ if (name === 'echo') {
73
+ return { content: [{ type: 'text', text: String((args && args.text) ?? '') }] };
74
+ }
75
+ if (name === 'add') {
76
+ const sum = Number((args && args.a) || 0) + Number((args && args.b) || 0);
77
+ return { content: [{ type: 'text', text: String(sum) }] };
78
+ }
79
+ if (name === 'boom') {
80
+ return { content: [{ type: 'text', text: 'the boom tool failed as designed' }], isError: true };
81
+ }
82
+ return null; // unknown tool
83
+ }
84
+
85
+ function handle(msg) {
86
+ const { id, method, params } = msg;
87
+ if (method === 'initialize') {
88
+ // Echo the client's requested protocol version — by definition one the
89
+ // client supports — and advertise the tools capability.
90
+ ok(id, {
91
+ protocolVersion: (params && params.protocolVersion) || '2025-06-18',
92
+ capabilities: { tools: {} },
93
+ serverInfo: { name: SERVER_NAME, version: '1.0.0' },
94
+ });
95
+ return;
96
+ }
97
+ if (method === 'notifications/initialized') return; // notification, no reply
98
+ if (method === 'ping') { ok(id, {}); return; }
99
+ if (method === 'tools/list') { ok(id, { tools: TOOLS }); return; }
100
+ if (method === 'tools/call') {
101
+ const name = params && params.name;
102
+ const result = callTool(name, params && params.arguments);
103
+ if (!result) { err(id, -32602, `Unknown tool: ${name}`); return; }
104
+ ok(id, result);
105
+ return;
106
+ }
107
+ if (id !== undefined) err(id, -32601, `Method not found: ${method}`);
108
+ }
109
+
110
+ function run() {
111
+ // Behavior knob: simulate a server that dies on launch (degradation test).
112
+ if (process.env.MOCK_MCP_EXIT_ON_START) {
113
+ process.stderr.write('mock-mcp-server: simulated startup failure\n');
114
+ process.exit(1);
115
+ }
116
+ let buffer = '';
117
+ process.stdin.setEncoding('utf8');
118
+ process.stdin.on('data', (chunk) => {
119
+ buffer += chunk;
120
+ let nl;
121
+ while ((nl = buffer.indexOf('\n')) !== -1) {
122
+ const line = buffer.slice(0, nl).trim();
123
+ buffer = buffer.slice(nl + 1);
124
+ if (!line) continue;
125
+ let msg;
126
+ try { msg = JSON.parse(line); } catch { continue; }
127
+ try { handle(msg); } catch (e) {
128
+ if (msg && msg.id !== undefined) err(msg.id, -32603, e.message);
129
+ }
130
+ }
131
+ });
132
+ process.stdin.on('end', () => process.exit(0));
133
+ }
134
+
135
+ // Only attach the stdin server loop when spawned directly (as the MCP client
136
+ // does). The Node test runner discovers and EXECUTES every file under test/ —
137
+ // including this one — each in a child process where `require.main === module`
138
+ // is also true. It sets NODE_TEST_CONTEXT in that child, so we use its ABSENCE
139
+ // (plus require.main) to mean "spawned as a real MCP server"; otherwise this
140
+ // stdin loop would hang the test runner forever waiting on input.
141
+ if (require.main === module && !process.env.NODE_TEST_CONTEXT) run();
142
+
@@ -0,0 +1,69 @@
1
+ 'use strict';
2
+
3
+ // Minimal scriptable SSE server for streaming-parser tests (Task 1.1).
4
+ // Task 1.2 extends this into the full mock-LLM harness (queues, status codes,
5
+ // Retry-After, delays); for now it serves one scripted response per request.
6
+ //
7
+ // Usage:
8
+ // const srv = await startSseServer({ chunks: ['data: {...}\n', 'data: [DONE]\n'] });
9
+ // // point config.api_base at srv.base, make the request, then:
10
+ // await srv.close();
11
+
12
+ const http = require('http');
13
+
14
+ // opts:
15
+ // chunks string[] — written sequentially (with a tiny gap) so the client's
16
+ // cross-chunk line buffering is genuinely exercised. A single string
17
+ // is also accepted and sent as one chunk.
18
+ // status HTTP status code (default 200).
19
+ // headers extra response headers (merged over the SSE defaults).
20
+ // gapMs delay between chunks (default 4ms).
21
+ function startSseServer(opts = {}) {
22
+ const status = opts.status || 200;
23
+ const gapMs = opts.gapMs == null ? 4 : opts.gapMs;
24
+ const chunks = Array.isArray(opts.chunks)
25
+ ? opts.chunks
26
+ : [opts.body != null ? opts.body : ''];
27
+
28
+ const server = http.createServer((req, res) => {
29
+ // Drain the request body before responding.
30
+ req.resume();
31
+ req.on('end', () => {
32
+ res.writeHead(status, {
33
+ 'Content-Type': 'text/event-stream',
34
+ 'Cache-Control': 'no-cache',
35
+ Connection: 'keep-alive',
36
+ ...(opts.headers || {}),
37
+ });
38
+ let i = 0;
39
+ const writeNext = () => {
40
+ if (i >= chunks.length) { res.end(); return; }
41
+ res.write(chunks[i++]);
42
+ if (gapMs > 0) setTimeout(writeNext, gapMs);
43
+ else writeNext();
44
+ };
45
+ writeNext();
46
+ });
47
+ });
48
+
49
+ return new Promise((resolve) => {
50
+ server.listen(0, '127.0.0.1', () => {
51
+ const { port } = server.address();
52
+ resolve({
53
+ server,
54
+ port,
55
+ base: `http://127.0.0.1:${port}`,
56
+ close: () => new Promise((r) => server.close(r)),
57
+ });
58
+ });
59
+ });
60
+ }
61
+
62
+ // Build a `data: {json}\n` SSE line.
63
+ function sse(obj) {
64
+ return `data: ${JSON.stringify(obj)}\n`;
65
+ }
66
+
67
+ const DONE = 'data: [DONE]\n';
68
+
69
+ module.exports = { startSseServer, sse, DONE };