mulmoclaude 0.6.2 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. package/README.md +26 -0
  2. package/bin/mulmoclaude.js +11 -1
  3. package/client/assets/JsonEditor-D6WBWLoa.js +10 -0
  4. package/client/assets/JsonEditor-Di5xGeZY.css +1 -0
  5. package/client/assets/_plugin-vue_export-helper-BOai-rQB.js +1 -0
  6. package/client/assets/chunk-D8eiyYIV-LcKZGJv5.js +1 -0
  7. package/client/assets/{html2canvas-CDGcmOD3-Bkf2uOth.js → html2canvas-CDGcmOD3-XVrO-eyz.js} +1 -1
  8. package/client/assets/index-CyBr8Mkr.css +2 -0
  9. package/client/assets/index-zZIqEbNX.js +5106 -0
  10. package/client/assets/{index.es-DqtpmBm8-D9mAh_KQ.js → index.es-DqtpmBm8-DHT6q10o.js} +1 -1
  11. package/client/assets/material-symbols-outlined-DtIK7AQn.woff2 +0 -0
  12. package/client/assets/runtime-protocol-vue-D6kcV0wa.js +1 -0
  13. package/client/assets/{runtime-vue-BVUzgYGA.js → runtime-vue-fFYhnNg3.js} +1 -1
  14. package/client/assets/{vue-C8UuIO9J.js → vue-D4w8THF_.js} +1 -1
  15. package/client/assets/vue-i18n-CQbxVmNs.js +3 -0
  16. package/client/assets/vue.runtime.esm-bundler-BTyIdNAI.js +4 -0
  17. package/client/index.html +10 -10
  18. package/package.json +9 -8
  19. package/server/agent/backend/claude-code.ts +34 -0
  20. package/server/agent/backend/fake-echo.ts +370 -0
  21. package/server/agent/backend/index.ts +16 -1
  22. package/server/agent/config.ts +74 -24
  23. package/server/agent/index.ts +104 -80
  24. package/server/agent/mcpFailureMonitor.ts +167 -0
  25. package/server/agent/mcpPreflight.ts +185 -0
  26. package/server/agent/prompt.ts +50 -359
  27. package/server/agent/stdioHttpShim.ts +171 -0
  28. package/server/agent/stream.ts +12 -1
  29. package/server/api/routes/encore.ts +55 -0
  30. package/server/api/routes/files.ts +22 -0
  31. package/server/api/routes/mulmo-script.ts +19 -1
  32. package/server/api/routes/schedulerHandlers.ts +52 -4
  33. package/server/api/routes/sessions.ts +15 -0
  34. package/server/api/routes/skills.ts +263 -0
  35. package/server/build/dispatcher.mjs +299 -0
  36. package/server/encore/INVARIANTS.md +272 -0
  37. package/server/encore/boot.ts +39 -0
  38. package/server/encore/closure.ts +36 -0
  39. package/server/encore/cycle.ts +276 -0
  40. package/server/encore/dispatch.ts +103 -0
  41. package/server/encore/handlers/amend.ts +99 -0
  42. package/server/encore/handlers/appendNote.ts +74 -0
  43. package/server/encore/handlers/defineEncore.ts +42 -0
  44. package/server/encore/handlers/listTickets.ts +107 -0
  45. package/server/encore/handlers/markStepDone.ts +41 -0
  46. package/server/encore/handlers/markTargetSkipped.ts +33 -0
  47. package/server/encore/handlers/query.ts +138 -0
  48. package/server/encore/handlers/recordValues.ts +44 -0
  49. package/server/encore/handlers/resolveNotification.ts +121 -0
  50. package/server/encore/handlers/setup.ts +81 -0
  51. package/server/encore/handlers/shared.ts +137 -0
  52. package/server/encore/handlers/snooze.ts +87 -0
  53. package/server/encore/handlers/startObligationChat.ts +64 -0
  54. package/server/encore/handlers/startSetupChat.ts +50 -0
  55. package/server/encore/lock.ts +61 -0
  56. package/server/encore/notifier.ts +123 -0
  57. package/server/encore/obligation.ts +25 -0
  58. package/server/encore/paths.ts +78 -0
  59. package/server/encore/reconcile.ts +661 -0
  60. package/server/encore/tick.ts +191 -0
  61. package/server/encore/yaml-fm.ts +63 -0
  62. package/server/events/notifications.ts +19 -91
  63. package/server/index.ts +94 -9
  64. package/server/notifier/engine.ts +102 -1
  65. package/server/notifier/macosReminderAdapter.ts +30 -0
  66. package/server/notifier/runtime-api.ts +41 -1
  67. package/server/notifier/types.ts +15 -2
  68. package/server/plugins/runtime.ts +11 -2
  69. package/server/prompts/index.ts +39 -0
  70. package/server/prompts/system/journal-pointer.md +12 -0
  71. package/server/prompts/system/memory-management-atomic.md +33 -0
  72. package/server/prompts/system/memory-management-topic.md +60 -0
  73. package/server/prompts/system/news-concierge.md +24 -0
  74. package/server/prompts/system/sandbox-tools.md +10 -0
  75. package/server/prompts/system/sources-context.md +16 -0
  76. package/server/prompts/system/system.md +91 -0
  77. package/server/system/announceOptionalDeps.ts +57 -0
  78. package/server/system/appVersion.ts +34 -0
  79. package/server/system/config.ts +17 -1
  80. package/server/system/docker.ts +14 -6
  81. package/server/system/env.ts +18 -5
  82. package/server/system/optionalDeps.ts +129 -0
  83. package/server/utils/cli-flags.d.mts +14 -0
  84. package/server/utils/cli-flags.mjs +53 -0
  85. package/server/utils/files/encore-io.ts +111 -0
  86. package/server/utils/time.ts +6 -0
  87. package/server/workspace/helps/business.md +2 -2
  88. package/server/workspace/helps/encore-dsl.md +482 -0
  89. package/server/workspace/helps/index.md +15 -13
  90. package/server/workspace/helps/mulmoscript.md +3 -3
  91. package/server/workspace/helps/sandbox.md +2 -2
  92. package/server/workspace/hooks/dispatcher.ts +7 -5
  93. package/server/workspace/hooks/provision.ts +6 -3
  94. package/server/workspace/paths.ts +13 -4
  95. package/server/workspace/skills/catalog.ts +355 -0
  96. package/server/workspace/skills/external/catalog.ts +283 -0
  97. package/server/workspace/skills/external/clone.ts +129 -0
  98. package/server/workspace/skills/external/id.ts +194 -0
  99. package/server/workspace/skills/external/install.ts +417 -0
  100. package/server/workspace/skills/external/presets.ts +50 -0
  101. package/server/workspace/skills-preset.ts +29 -17
  102. package/server/workspace/workspace.ts +10 -5
  103. package/src/App.vue +37 -8
  104. package/src/components/FileContentRenderer.vue +102 -9
  105. package/src/components/JsonEditor.vue +160 -0
  106. package/src/components/NotificationBell.vue +35 -3
  107. package/src/components/PluginLauncher.vue +20 -41
  108. package/src/components/RightSidebar.vue +19 -0
  109. package/src/components/SettingsMcpTab.vue +58 -11
  110. package/src/components/SettingsModal.vue +22 -1
  111. package/src/components/StackView.vue +10 -1
  112. package/src/components/TodoExplorer.vue +16 -0
  113. package/src/components/todo/TodoKanbanView.vue +34 -6
  114. package/src/composables/useNotifications.ts +21 -1
  115. package/src/config/apiRoutes.ts +0 -6
  116. package/src/config/mcpCatalog.ts +12 -7
  117. package/src/config/mcpTypes.ts +5 -0
  118. package/src/config/roles.ts +52 -15
  119. package/src/config/systemFileDescriptors.ts +12 -0
  120. package/src/lang/de.ts +108 -12
  121. package/src/lang/en.ts +105 -11
  122. package/src/lang/es.ts +106 -11
  123. package/src/lang/fr.ts +106 -11
  124. package/src/lang/ja.ts +104 -11
  125. package/src/lang/ko.ts +105 -11
  126. package/src/lang/pt-BR.ts +106 -11
  127. package/src/lang/zh.ts +103 -11
  128. package/src/main.ts +1 -0
  129. package/src/plugins/_generated/metas.ts +4 -0
  130. package/src/plugins/_generated/registrations.ts +2 -0
  131. package/src/plugins/_generated/server-bindings.ts +5 -0
  132. package/src/plugins/encore/EncoreDashboard.vue +504 -0
  133. package/src/plugins/encore/EncoreRedirect.vue +116 -0
  134. package/src/plugins/encore/View.vue +36 -0
  135. package/src/plugins/encore/defineEncoreDefinition.ts +74 -0
  136. package/src/plugins/encore/defineEncoreMeta.ts +13 -0
  137. package/src/plugins/encore/index.ts +93 -0
  138. package/src/plugins/encore/manageEncoreDefinition.ts +100 -0
  139. package/src/plugins/encore/manageEncoreMeta.ts +36 -0
  140. package/src/plugins/manageSkills/View.vue +832 -30
  141. package/src/plugins/manageSkills/categories.ts +125 -0
  142. package/src/plugins/manageSkills/meta.ts +30 -0
  143. package/src/plugins/markdown/definition.ts +3 -3
  144. package/src/plugins/meta-types.ts +5 -0
  145. package/src/plugins/presentMulmoScript/Preview.vue +3 -3
  146. package/src/plugins/presentMulmoScript/View.vue +157 -33
  147. package/src/plugins/presentMulmoScript/meta.ts +4 -0
  148. package/src/plugins/scheduler/View.vue +45 -9
  149. package/src/plugins/scheduler/calendarDefinition.ts +6 -2
  150. package/src/plugins/scheduler/multiDayHelpers.ts +95 -0
  151. package/src/plugins/skill/View.vue +1 -5
  152. package/src/plugins/spreadsheet/View.vue +3 -3
  153. package/src/plugins/spreadsheet/definition.ts +1 -1
  154. package/src/plugins/textResponse/Preview.vue +14 -1
  155. package/src/plugins/textResponse/View.vue +39 -24
  156. package/src/plugins/wiki/components/WikiPageBody.vue +4 -0
  157. package/src/router/index.ts +11 -0
  158. package/src/router/pageRoutes.ts +1 -0
  159. package/src/types/encore-dsl/at-expression.ts +120 -0
  160. package/src/types/encore-dsl/at-resolver.ts +32 -0
  161. package/src/types/encore-dsl/cadence.ts +289 -0
  162. package/src/types/encore-dsl/schema.ts +288 -0
  163. package/src/types/notification.ts +2 -1
  164. package/src/types/session.ts +6 -0
  165. package/src/types/sse.ts +5 -0
  166. package/src/types/toolCallHistory.ts +7 -0
  167. package/src/utils/agent/eventDispatch.ts +26 -5
  168. package/src/utils/agent/mcpHint.ts +50 -0
  169. package/src/utils/image/htmlSrcAttrs.ts +117 -13
  170. package/src/utils/session/sessionEntries.ts +8 -32
  171. package/client/assets/PluginScopedRoot-YjvQq0Nn.js +0 -3
  172. package/client/assets/chunk-CernVdwh.js +0 -1
  173. package/client/assets/chunk-D8eiyYIV-CAXpUwLd.js +0 -1
  174. package/client/assets/index-BwrlMMHr.js +0 -5005
  175. package/client/assets/index-CvvNuegU.css +0 -2
  176. package/client/assets/material-symbols-outlined-BOZVWuR3.woff2 +0 -0
  177. package/client/assets/runtime-protocol-vue-C1To4M3t.js +0 -1
  178. package/client/assets/vue.runtime.esm-bundler-DQ8Kjjui.js +0 -4
  179. package/server/api/routes/notifications.ts +0 -195
  180. package/server/notifier/legacy-adapters.ts +0 -76
  181. package/server/workspace/hooks/dispatcher.mjs +0 -300
  182. package/src/composables/useSelectedResult.ts +0 -49
@@ -0,0 +1,370 @@
1
+ // Test-only LLM backend. Loaded by `getActiveBackend()` only when
2
+ // `MULMOCLAUDE_FAKE_AGENT=1` (CI workflow boot wiring), and re-usable
3
+ // from unit tests via `setFakeResponse()` / `resetFakeResponse()`.
4
+ //
5
+ // Default behavior:
6
+ // - emits a synthesized `claudeSessionId` so the orchestrator's
7
+ // resume bookkeeping sees the same shape as a real run
8
+ // - short-circuits `/<slug>` slash-command turns by reading the
9
+ // seeded SKILL.md and echoing the canary marker line
10
+ // - emits the concatenated per-session message history as the
11
+ // assistant text reply, so context-recall tests (session L-12)
12
+ // see prior turn content
13
+ //
14
+ // Tool dispatch: when the user prompt matches a known shape (see
15
+ // detectToolCalls), fake-echo emits the corresponding tool_call
16
+ // AND posts the args to the same internal plugin endpoint the MCP
17
+ // bridge would use under real Claude (see PLUGIN_ENDPOINTS). The
18
+ // handler runs unmodified, the artifact lands on disk, and the
19
+ // canvas mounts the plugin View — fake at the LLM seam only, real
20
+ // from the tool dispatch downward. Tests that need an LLM that
21
+ // actually reasons (presentForm field design, agent-driven slug
22
+ // choice in skill creation, etc.) still stay gated on
23
+ // `E2E_LIVE_NO_LLM=1`.
24
+
25
+ import { randomUUID } from "node:crypto";
26
+ import { readFile } from "node:fs/promises";
27
+ import path from "node:path";
28
+
29
+ import { getCurrentToken } from "../../api/auth/token.js";
30
+ import { makeUuid } from "../../utils/id.js";
31
+ import { API_ROUTES } from "../../../src/config/apiRoutes.js";
32
+ import { EVENT_TYPES } from "../../../src/types/events.js";
33
+ import { WORKSPACE_DIRS } from "../../workspace/paths.js";
34
+ import type { AgentEvent } from "../stream.js";
35
+ import type { AgentInput, LLMBackend } from "./types.js";
36
+
37
+ interface PluginEnvelope {
38
+ data?: unknown;
39
+ message?: unknown;
40
+ instructions?: unknown;
41
+ [key: string]: unknown;
42
+ }
43
+
44
+ export interface FakeToolCall {
45
+ toolName: string;
46
+ args: unknown;
47
+ /** Result string emitted in the matching `tool_call_result`.
48
+ * Defaults to `{ ok: true }` JSON. */
49
+ result?: string;
50
+ }
51
+
52
+ export interface FakeResponse {
53
+ /** Tool calls emitted before the text block. Default generator
54
+ * never emits any — tests that want tool events drive them
55
+ * through `setFakeResponse()`. */
56
+ toolCalls?: readonly FakeToolCall[];
57
+ /** Assistant text. Omit to skip the text event entirely. */
58
+ text?: string;
59
+ /** When set, emit a single `error` AgentEvent with this message
60
+ * and stop — mirrors what the claude-code backend does when the
61
+ * CLI exits non-zero (`readAgentEvents`). Tool calls / text that
62
+ * would otherwise follow are suppressed. */
63
+ error?: string;
64
+ /** Emit the `tool_call` for each `toolCalls` entry but NOT the
65
+ * paired `tool_call_result` — simulates a truncated / partial
66
+ * stream where the model died mid tool round-trip. */
67
+ omitToolResult?: boolean;
68
+ }
69
+
70
+ export type FakeResponseFn = (input: AgentInput) => FakeResponse | Promise<FakeResponse>;
71
+
72
+ // Per-session conversation memory so context-recall tests see prior
73
+ // turn content in the reply. Cleared by `resetFakeResponse()`.
74
+ const sessionTurns = new Map<string, string[]>();
75
+
76
+ async function defaultResponse(input: AgentInput): Promise<FakeResponse> {
77
+ // Slash-command turn shape: the SPA's "Run" button on a skill row
78
+ // (e2e-live L-22) starts a new chat with `/<slug>` as the only
79
+ // user message. Real Claude resolves this through its skill
80
+ // pipeline and uses the SKILL.md body as system prompt; here we
81
+ // short-circuit to read the seeded body and apply the
82
+ // "respond with this exact line" heuristic the e2e-live canaries
83
+ // rely on. Falls through to default echo on no match.
84
+ // Prompt-driven error trigger for e2e-live. The in-process
85
+ // `setFakeResponse()` knob is unreachable from a browser-driven
86
+ // spec (separate process), so the error-banner UI canary opts in
87
+ // by sending a message containing this exact marker. Prod never
88
+ // reaches fake-echo (real Claude backend) so this is inert there.
89
+ if (input.message.includes("__FAKE_ERROR__")) {
90
+ // Message text is rendered through marked() in the chat card,
91
+ // so keep it free of markdown-significant characters (no `__`,
92
+ // `*`, backticks) — the e2e-live canary asserts on a literal
93
+ // substring of this string.
94
+ return { error: "fake-echo forced error for the e2e-live error-banner canary" };
95
+ }
96
+
97
+ const slashMatch = input.message.trim().match(/^\/([a-z0-9][a-z0-9-]*)$/i);
98
+ if (slashMatch) {
99
+ const skillReply = await replyFromSeededSkill(input.workspacePath, slashMatch[1]);
100
+ if (skillReply !== null) return { text: skillReply };
101
+ }
102
+
103
+ const history = sessionTurns.get(input.sessionId) ?? [];
104
+ history.push(input.message);
105
+ sessionTurns.set(input.sessionId, history);
106
+
107
+ const toolCalls = detectToolCalls(input.message);
108
+ return {
109
+ toolCalls,
110
+ text: history.join("\n\n"),
111
+ };
112
+ }
113
+
114
+ // ── Tool-call pattern detectors ───────────────────────────────────
115
+ //
116
+ // Each detector matches one e2e-live prompt shape. The fake-echo
117
+ // loop below dispatches each detected call to the matching server-
118
+ // side plugin endpoint (see PLUGIN_ENDPOINTS) so the real handler
119
+ // runs, the artifact lands on disk, and the canvas mounts the View.
120
+ // Production never reaches this code path — gated by
121
+ // MULMOCLAUDE_FAKE_AGENT=1 at server boot.
122
+
123
+ function detectPresentMulmoScript(message: string): FakeToolCall | null {
124
+ if (!/presentMulmoScript/i.test(message)) return null;
125
+ const filePathMatch = message.match(/filePath:\s*["']([^"']+)["']/);
126
+ if (!filePathMatch) return null;
127
+ return { toolName: "presentMulmoScript", args: { filePath: filePathMatch[1] } };
128
+ }
129
+
130
+ function detectPresentHtml(message: string): FakeToolCall | null {
131
+ if (!/presentHtml/i.test(message)) return null;
132
+ const idx = message.indexOf("<");
133
+ if (idx < 0) return null;
134
+ // The handler expects a self-contained document; wrap if the
135
+ // prompt only supplies fragments (the spec's prompt does).
136
+ const fragment = message.slice(idx).trim();
137
+ const html = /^<!DOCTYPE/i.test(fragment) ? fragment : `<!DOCTYPE html><html><body>${fragment}</body></html>`;
138
+ return { toolName: "presentHtml", args: { html } };
139
+ }
140
+
141
+ function detectPresentForm(message: string): FakeToolCall | null {
142
+ if (!/presentForm/i.test(message)) return null;
143
+ const titleMatch = message.match(/titled\s+['"]([^'"]+)['"]/i);
144
+ const idMatch = message.match(/id\s*=\s*['"]([^'"]+)['"]/i);
145
+ const labelMatch = message.match(/label\s*=\s*['"]([^'"]+)['"]/i);
146
+ return {
147
+ toolName: "presentForm",
148
+ args: {
149
+ title: titleMatch?.[1] ?? "Quick check",
150
+ fields: [
151
+ {
152
+ id: idMatch?.[1] ?? "field1",
153
+ type: "text",
154
+ label: labelMatch?.[1] ?? "Field",
155
+ required: /required/i.test(message),
156
+ description: "auto-generated by fake-echo",
157
+ },
158
+ ],
159
+ },
160
+ };
161
+ }
162
+
163
+ function detectPresentChart(message: string): FakeToolCall | null {
164
+ if (!/presentChart/i.test(message)) return null;
165
+ const titleMatch = message.match(/titled\s+['"]([^'"]+)['"]/i);
166
+ const pairs = Array.from(message.matchAll(/\b([A-Za-z]{3,})\s+(\d{1,6})\b/g)).map(([, label, value]) => ({ label, value: Number(value) }));
167
+ const labels = pairs.length > 0 ? pairs.map((pair) => pair.label) : ["A", "B", "C"];
168
+ const values = pairs.length > 0 ? pairs.map((pair) => pair.value) : [1, 2, 3];
169
+ const title = titleMatch?.[1] ?? "Untitled";
170
+ return {
171
+ toolName: "presentChart",
172
+ args: {
173
+ document: {
174
+ title,
175
+ charts: [
176
+ {
177
+ title,
178
+ type: "bar",
179
+ option: {
180
+ xAxis: { type: "category", data: labels },
181
+ yAxis: { type: "value" },
182
+ series: [{ type: "bar", data: values }],
183
+ },
184
+ },
185
+ ],
186
+ },
187
+ },
188
+ };
189
+ }
190
+
191
+ function detectToolCalls(message: string): FakeToolCall[] | undefined {
192
+ const calls: FakeToolCall[] = [];
193
+ for (const detector of [detectPresentMulmoScript, detectPresentHtml, detectPresentForm, detectPresentChart]) {
194
+ const call = detector(message);
195
+ if (call) calls.push(call);
196
+ }
197
+ return calls.length > 0 ? calls : undefined;
198
+ }
199
+
200
+ // ── Plugin dispatch ───────────────────────────────────────────────
201
+ //
202
+ // Maps each fake-detected tool to the same internal API the MCP
203
+ // bridge would post to in a real run, so the actual server-side
204
+ // handler runs end-to-end: artifact saved, canvas slug returned.
205
+ // Anything not in this table falls back to a synthesized success
206
+ // envelope (caller can override via FakeToolCall.result).
207
+ const PLUGIN_ENDPOINTS: Readonly<Record<string, string>> = {
208
+ presentForm: "/api/form",
209
+ presentHtml: "/api/html",
210
+ presentChart: "/api/chart",
211
+ presentMulmoScript: "/api/mulmoScript/save",
212
+ };
213
+
214
+ // Mirrors what server/agent/mcp-server.ts#handleToolCall does for
215
+ // the real MCP bridge:
216
+ // 1. POST to the plugin endpoint to get the envelope back
217
+ // 2. If envelope.data is set, PUSH the envelope to
218
+ // /api/internal/tool-result — this is what surfaces the result
219
+ // to the canvas as a ToolResultComplete (toolName + uuid
220
+ // stamped by the bridge so the plugin can't impersonate).
221
+ // 3. Return the text representation (message + instructions) so
222
+ // the matching `tool_call_result` event carries something
223
+ // meaningful for the tool-call history pane.
224
+ async function dispatchToPlugin(call: FakeToolCall, port: number, chatSessionId: string): Promise<string> {
225
+ if (call.result !== undefined) return call.result;
226
+ const endpoint = PLUGIN_ENDPOINTS[call.toolName];
227
+ if (!endpoint) return '{"ok":true}';
228
+ const token = getCurrentToken();
229
+ const authHeaders: Record<string, string> = token ? { Authorization: `Bearer ${token}` } : {};
230
+ try {
231
+ const response = await fetch(`http://localhost:${port}${endpoint}`, {
232
+ method: "POST",
233
+ headers: { "Content-Type": "application/json", ...authHeaders },
234
+ body: JSON.stringify(call.args),
235
+ });
236
+ if (!response.ok) {
237
+ const errBody = await response.text();
238
+ return JSON.stringify({ error: `plugin ${call.toolName} returned ${response.status}: ${errBody.slice(0, 200)}` });
239
+ }
240
+ const envelope = ((await response.json()) ?? {}) as PluginEnvelope;
241
+ if (envelope.data !== undefined) {
242
+ // Query key is `session`, not `chatSessionId` — matches the
243
+ // `getSessionQuery(req)` reader and what the MCP bridge's
244
+ // postJson(...) helper passes (`?session=${SESSION_ID}`).
245
+ const toolResultUrl = `http://localhost:${port}${API_ROUTES.agent.internal.toolResult}?session=${encodeURIComponent(chatSessionId)}`;
246
+ const pushRes = await fetch(toolResultUrl, {
247
+ method: "POST",
248
+ headers: { "Content-Type": "application/json", ...authHeaders },
249
+ body: JSON.stringify({ ...envelope, toolName: call.toolName, uuid: makeUuid() }),
250
+ });
251
+ if (!pushRes.ok) {
252
+ // Fail loudly per codex review — a swallowed publish would
253
+ // leave the canvas blank while the chat reads "Done", which
254
+ // masks a real wiring break. Surface the failure as the
255
+ // tool result so the test fails loud instead of timing out
256
+ // on an absent View.
257
+ const errBody = await pushRes.text();
258
+ return JSON.stringify({
259
+ error: `tool-result push failed for ${call.toolName}: ${pushRes.status} ${errBody.slice(0, 200)}`,
260
+ });
261
+ }
262
+ }
263
+ const text: string[] = [];
264
+ if (typeof envelope.message === "string") text.push(envelope.message);
265
+ if (typeof envelope.instructions === "string") text.push(envelope.instructions);
266
+ return text.length > 0 ? text.join("\n") : "Done";
267
+ } catch (err) {
268
+ // Don't tear down the chat turn on plugin-dispatch failure —
269
+ // surface the error in the tool_result so the test sees it.
270
+ return JSON.stringify({ error: err instanceof Error ? err.message : String(err) });
271
+ }
272
+ }
273
+
274
+ // Look up a project-scope skill seeded by `placeProjectSkill` and
275
+ // extract the canary line the seeded body asks the model to echo
276
+ // back ("respond with this exact line and nothing else: X").
277
+ // Returns null when the file is missing or the marker shape is
278
+ // absent — caller falls through to default echo.
279
+ async function replyFromSeededSkill(workspacePath: string, slug: string): Promise<string | null> {
280
+ const skillFile = path.join(workspacePath, WORKSPACE_DIRS.claudeSkills, slug, "SKILL.md");
281
+ let body: string;
282
+ try {
283
+ body = await readFile(skillFile, "utf8");
284
+ } catch {
285
+ return null;
286
+ }
287
+ // Line-by-line scan to avoid backtracking surprises.
288
+ for (const line of body.split(/\r?\n/)) {
289
+ const match = line.match(/respond with this exact line(?: and nothing else)?:\s*(.+)/i);
290
+ if (match) return match[1].trim();
291
+ }
292
+ return null;
293
+ }
294
+
295
+ // ── Backend wiring ────────────────────────────────────────────────
296
+
297
+ let responseFn: FakeResponseFn = defaultResponse;
298
+
299
+ /** Replace the default echo + slash-command generator. Useful for
300
+ * unit tests that want full control over what the fake backend
301
+ * emits. Pair with `resetFakeResponse()` in teardown so the next
302
+ * test sees a clean state. */
303
+ export function setFakeResponse(generator: FakeResponseFn): void {
304
+ responseFn = generator;
305
+ }
306
+
307
+ /** Restore the default generator AND clear per-session history. */
308
+ export function resetFakeResponse(): void {
309
+ responseFn = defaultResponse;
310
+ sessionTurns.clear();
311
+ }
312
+
313
+ // Abort is checked between every yield. Real claude-code kills the
314
+ // subprocess on abort; the echo stub has no subprocess, so the
315
+ // faithful equivalent is "stop emitting immediately".
316
+ function aborted(input: AgentInput): boolean {
317
+ return input.abortSignal?.aborted === true;
318
+ }
319
+
320
+ async function* runFakeEchoAgent(input: AgentInput): AsyncGenerator<AgentEvent> {
321
+ if (aborted(input)) return;
322
+ yield { type: EVENT_TYPES.claudeSessionId, id: randomUUID() };
323
+
324
+ const response = await responseFn(input);
325
+
326
+ // Error short-circuit: surface the error and stop, exactly like
327
+ // the claude-code backend on a non-zero CLI exit.
328
+ if (response.error !== undefined) {
329
+ if (aborted(input)) return;
330
+ yield { type: EVENT_TYPES.error, message: response.error };
331
+ return;
332
+ }
333
+
334
+ for (const call of response.toolCalls ?? []) {
335
+ if (aborted(input)) return;
336
+ const toolUseId = `fake-${randomUUID()}`;
337
+ yield {
338
+ type: EVENT_TYPES.toolCall,
339
+ toolUseId,
340
+ toolName: call.toolName,
341
+ args: call.args,
342
+ };
343
+ // Partial-stream simulation: skip the result half.
344
+ if (response.omitToolResult) continue;
345
+ // Run the actual plugin handler AND push the envelope to
346
+ // /api/internal/tool-result so the canvas mounts the View — same
347
+ // two-step the MCP bridge does for real Claude.
348
+ const content = await dispatchToPlugin(call, input.port, input.sessionId);
349
+ if (aborted(input)) return;
350
+ yield {
351
+ type: EVENT_TYPES.toolCallResult,
352
+ toolUseId,
353
+ content,
354
+ };
355
+ }
356
+
357
+ if (response.text !== undefined && !aborted(input)) {
358
+ yield { type: EVENT_TYPES.text, message: response.text };
359
+ }
360
+ }
361
+
362
+ export const fakeEchoBackend: LLMBackend = {
363
+ id: "fake-echo",
364
+ // Resume-by-token / MCP aren't meaningfully replayable from a
365
+ // stub. Flag them unsupported so callers that depend on the real
366
+ // Claude semantics opt out instead of getting silently wrong
367
+ // behavior.
368
+ capabilities: { sessionResume: false, mcp: false },
369
+ runAgent: runFakeEchoAgent,
370
+ };
@@ -3,12 +3,27 @@
3
3
  // env / settings. Callers go through getActiveBackend() rather than
4
4
  // importing a concrete adapter so adding a backend doesn't require
5
5
  // touching every call site.
6
+ //
7
+ // Tests / CI swap in `fakeEchoBackend` via setActiveBackend() at
8
+ // server bootstrap; the decision is made once and read with zero
9
+ // per-call overhead by the agent orchestrator.
6
10
 
7
11
  import { claudeCodeBackend } from "./claude-code.js";
8
12
  import type { LLMBackend } from "./types.js";
9
13
 
10
14
  export type { AgentInput, BackendCapabilities, LLMBackend } from "./types.js";
11
15
 
16
+ let activeBackend: LLMBackend = claudeCodeBackend;
17
+
18
+ /** Replace the active backend. Intended for server-bootstrap wiring
19
+ * (e.g. CI sets `MULMOCLAUDE_FAKE_AGENT=1`, the boot script then
20
+ * passes `fakeEchoBackend` here). Not safe to call mid-flight — the
21
+ * in-flight agent generators have already captured the previous
22
+ * backend reference, and swapping under them would race. */
23
+ export function setActiveBackend(backend: LLMBackend): void {
24
+ activeBackend = backend;
25
+ }
26
+
12
27
  export function getActiveBackend(): LLMBackend {
13
- return claudeCodeBackend;
28
+ return activeBackend;
14
29
  }
@@ -1,19 +1,28 @@
1
- import { dirname, join } from "path";
1
+ import { basename, dirname, join } from "path";
2
2
  import { homedir, tmpdir } from "os";
3
3
  import { createRequire } from "node:module";
4
4
  import type { Role } from "../../src/config/roles.js";
5
5
  import { mcpTools, isMcpToolEnabled } from "./mcp-tools/index.js";
6
6
  import { getActiveToolDescriptors } from "./activeTools.js";
7
7
  import type { EffortLevel, McpServerSpec } from "../system/config.js";
8
+ import { startStdioHttpShim, type ShimHandle } from "./stdioHttpShim.js";
8
9
  import { getCurrentToken } from "../api/auth/token.js";
9
10
  import type { Attachment } from "@mulmobridge/protocol";
10
11
  import { isImageMime, isNativeAttachmentMime } from "@mulmobridge/client";
11
12
  import { convertAttachment } from "./attachmentConverter.js";
12
13
  import { log } from "../system/logger/index.js";
14
+ import { preflightUserServers, logPreflightResult } from "./mcpPreflight.js";
13
15
 
14
16
  export const CONTAINER_WORKSPACE_PATH = "/home/node/mulmoclaude";
15
17
 
16
- const BASE_ALLOWED_TOOLS = ["Bash", "Read", "Write", "Edit", "Glob", "Grep", "WebFetch", "WebSearch"];
18
+ // `Skill` is the tool Claude Code uses to execute a discovered
19
+ // `.claude/skills/<name>/SKILL.md`. Because `--allowedTools` is passed
20
+ // as a strict allowlist, omitting it permission-denies every
21
+ // `Skill({skill:"…"})` call — the harness errors with
22
+ // `Execute skill: <name>` and the model falls back to Glob+Read.
23
+ // Bare `Skill` (no parens) permits all skills. See
24
+ // plans/fix-skill-tool-allowlist.md.
25
+ const BASE_ALLOWED_TOOLS = ["Bash", "Read", "Write", "Edit", "Glob", "Grep", "WebFetch", "WebSearch", "Skill"];
17
26
 
18
27
  /** Tool names the agent is allowed to call this session. Drives
19
28
  * `PLUGIN_NAMES` env (the MCP child's filter) and the CLI's
@@ -72,33 +81,65 @@ function prepareUserStdioServer(spec: Extract<McpServerSpec, { type: "stdio" }>,
72
81
  return { ...spec, args };
73
82
  }
74
83
 
75
- export function prepareUserServers(userServers: Record<string, McpServerSpec>, useDocker: boolean, hostWorkspacePath: string): Record<string, McpServerSpec> {
84
+ export interface PreparedUserServers {
85
+ servers: Record<string, McpServerSpec>;
86
+ /** Host-side stdio→HTTP gateways started for opted-in servers
87
+ * (#1421 Phase B). The caller MUST `close()` each one when the
88
+ * agent turn ends, or host processes / ports leak. */
89
+ shims: ShimHandle[];
90
+ }
91
+
92
+ // Async because the opt-in stdio→HTTP path spawns a host gateway and
93
+ // waits for it to listen before the spec can be rewritten to http.
94
+ export async function prepareUserServers(
95
+ userServers: Record<string, McpServerSpec>,
96
+ useDocker: boolean,
97
+ hostWorkspacePath: string,
98
+ ): Promise<PreparedUserServers> {
99
+ // Drop catalog-known entries that are missing required config (#1352).
100
+ // The dedup cache inside `logPreflightResult` keeps per-agent-run
101
+ // calls quiet so a Settings UI fix only logs once when it transitions
102
+ // missing → ok.
103
+ const preflight = preflightUserServers(userServers);
104
+ logPreflightResult(preflight, "agent-run");
76
105
  const out: Record<string, McpServerSpec> = {};
77
- for (const [serverId, spec] of Object.entries(userServers)) {
106
+ const shims: ShimHandle[] = [];
107
+ for (const [serverId, spec] of Object.entries(preflight.ready)) {
78
108
  if (spec.enabled === false) continue;
79
109
  if (spec.type === "http") {
80
110
  out[serverId] = prepareUserHttpServer(spec, useDocker);
81
- } else {
82
- // Stay symmetric with `userServerAllowedToolNames`: stdio
83
- // servers can't run inside the sandbox image (see
84
- // docs/mcp-sandbox.md for the full rationale — #162 / #1334).
85
- // Claude CLI 2.1.x silently exits 1 when a stdio MCP fails to
86
- // start, so passing the spec through here would mask the
87
- // failure as a generic boot error. Drop + log per skipped
88
- // entry so an operator scanning the log knows why their MCP
89
- // didn't load.
90
- if (useDocker) {
91
- log.info("mcp", "skipping stdio server in Docker sandbox", {
92
- serverId,
93
- transport: "stdio",
94
- reason: "sandbox image is too minimal to host arbitrary stdio MCP runtimes",
95
- });
111
+ continue;
112
+ }
113
+ if (!useDocker) {
114
+ out[serverId] = prepareUserStdioServer(spec, useDocker, hostWorkspacePath);
115
+ continue;
116
+ }
117
+ // Docker mode + stdio. Default: drop (the sandbox image can't
118
+ // host arbitrary stdio runtimes docs/mcp-sandbox.md, #162 /
119
+ // #1334). Exception: an explicit, UI-acknowledged opt-in
120
+ // (#1421 Phase B) runs the server on the HOST behind a
121
+ // stdio↔HTTP gateway and rewrites the spec to http so the
122
+ // sandboxed agent can still reach it.
123
+ if (spec.hostExecInDocker === true) {
124
+ const shim = await startStdioHttpShim(serverId, spec, hostWorkspacePath);
125
+ if (shim) {
126
+ shims.push(shim);
127
+ out[serverId] = { type: "http", url: rewriteLocalhostForDocker(shim.url, useDocker) };
96
128
  continue;
97
129
  }
98
- out[serverId] = prepareUserStdioServer(spec, useDocker, hostWorkspacePath);
130
+ // Shim failed to come up — fall through to the safe default
131
+ // (drop + log) rather than wiring a half-broken server.
99
132
  }
133
+ log.info("mcp", "skipping stdio server in Docker sandbox", {
134
+ serverId,
135
+ transport: "stdio",
136
+ reason:
137
+ spec.hostExecInDocker === true
138
+ ? "host-exec shim unavailable — see mcp-shim warnings"
139
+ : "sandbox image is too minimal to host arbitrary stdio MCP runtimes",
140
+ });
100
141
  }
101
- return out;
142
+ return { servers: out, shims };
102
143
  }
103
144
 
104
145
  // When running in Docker the MCP server subprocess won't inherit the host
@@ -374,13 +415,22 @@ export interface McpConfigPaths {
374
415
  argPath: string;
375
416
  }
376
417
 
418
+ // `sessionId` reaches a filesystem path here. `basename` strips any
419
+ // directory components (the recognised path-traversal barrier — a
420
+ // crafted `../../x` collapses to `x`); the char-strip then removes
421
+ // any residual non-id chars (CodeQL js/path-injection).
422
+ function safeSessionSegment(sessionId: string): string {
423
+ return basename(sessionId).replace(/[^A-Za-z0-9_-]/g, "_");
424
+ }
425
+
377
426
  export function resolveMcpConfigPaths(opts: { workspacePath: string; sessionId: string; useDocker: boolean }): McpConfigPaths {
427
+ const sid = safeSessionSegment(opts.sessionId);
378
428
  if (opts.useDocker) {
379
- const hostPath = join(opts.workspacePath, ".mulmoclaude", `mcp-${opts.sessionId}.json`);
380
- const argPath = `${CONTAINER_WORKSPACE_PATH}/.mulmoclaude/mcp-${opts.sessionId}.json`;
429
+ const hostPath = join(opts.workspacePath, ".mulmoclaude", `mcp-${sid}.json`);
430
+ const argPath = `${CONTAINER_WORKSPACE_PATH}/.mulmoclaude/mcp-${sid}.json`;
381
431
  return { hostPath, argPath };
382
432
  }
383
- const hostPath = join(tmpdir(), `mulmoclaude-mcp-${opts.sessionId}.json`);
433
+ const hostPath = join(tmpdir(), `mulmoclaude-mcp-${sid}.json`);
384
434
  return { hostPath, argPath: hostPath };
385
435
  }
386
436