mulmoclaude 0.6.2 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +26 -0
  2. package/bin/mulmoclaude.js +11 -1
  3. package/client/assets/chunk-D8eiyYIV-CW0rPbG2.js +1 -0
  4. package/client/assets/{html2canvas-CDGcmOD3-Bkf2uOth.js → html2canvas-CDGcmOD3-BjwfzAN8.js} +1 -1
  5. package/client/assets/index-Bp1owZ-i.js +5101 -0
  6. package/client/assets/index-c63H1pnd.css +2 -0
  7. package/client/assets/{index.es-DqtpmBm8-D9mAh_KQ.js → index.es-DqtpmBm8-DudYPW7R.js} +1 -1
  8. package/client/assets/material-symbols-outlined-C0dZ3SlO.woff2 +0 -0
  9. package/client/assets/runtime-protocol-vue-BUk5WXSy.js +1 -0
  10. package/client/assets/{runtime-vue-BVUzgYGA.js → runtime-vue-fFYhnNg3.js} +1 -1
  11. package/client/assets/{vue-C8UuIO9J.js → vue-Kqzpl9Vx.js} +1 -1
  12. package/client/assets/vue.runtime.esm-bundler-BTyIdNAI.js +4 -0
  13. package/client/index.html +9 -11
  14. package/package.json +5 -4
  15. package/server/agent/backend/claude-code.ts +34 -0
  16. package/server/agent/backend/fake-echo.ts +370 -0
  17. package/server/agent/backend/index.ts +16 -1
  18. package/server/agent/config.ts +8 -1
  19. package/server/agent/mcpFailureMonitor.ts +167 -0
  20. package/server/agent/mcpPreflight.ts +185 -0
  21. package/server/agent/stream.ts +12 -1
  22. package/server/api/routes/mulmo-script.ts +19 -1
  23. package/server/api/routes/schedulerHandlers.ts +52 -4
  24. package/server/api/routes/sessions.ts +15 -0
  25. package/server/api/routes/skills.ts +263 -0
  26. package/server/events/notifications.ts +19 -91
  27. package/server/index.ts +87 -9
  28. package/server/notifier/macosReminderAdapter.ts +30 -0
  29. package/server/system/announceOptionalDeps.ts +50 -0
  30. package/server/system/config.ts +8 -1
  31. package/server/system/docker.ts +14 -6
  32. package/server/system/env.ts +18 -5
  33. package/server/system/optionalDeps.ts +129 -0
  34. package/server/utils/cli-flags.d.mts +14 -0
  35. package/server/utils/cli-flags.mjs +53 -0
  36. package/server/utils/time.ts +6 -0
  37. package/server/workspace/helps/business.md +2 -2
  38. package/server/workspace/helps/mulmoscript.md +3 -3
  39. package/server/workspace/helps/sandbox.md +2 -2
  40. package/server/workspace/hooks/dispatcher.mjs +1 -1
  41. package/server/workspace/paths.ts +13 -4
  42. package/server/workspace/skills/catalog.ts +355 -0
  43. package/server/workspace/skills/external/catalog.ts +283 -0
  44. package/server/workspace/skills/external/clone.ts +129 -0
  45. package/server/workspace/skills/external/id.ts +194 -0
  46. package/server/workspace/skills/external/install.ts +417 -0
  47. package/server/workspace/skills/external/presets.ts +50 -0
  48. package/server/workspace/skills-preset.ts +29 -17
  49. package/server/workspace/workspace.ts +10 -5
  50. package/src/App.vue +19 -8
  51. package/src/components/RightSidebar.vue +19 -0
  52. package/src/components/StackView.vue +10 -1
  53. package/src/config/apiRoutes.ts +0 -6
  54. package/src/config/roles.ts +2 -0
  55. package/src/lang/de.ts +50 -1
  56. package/src/lang/en.ts +49 -1
  57. package/src/lang/es.ts +49 -1
  58. package/src/lang/fr.ts +49 -1
  59. package/src/lang/ja.ts +49 -1
  60. package/src/lang/ko.ts +49 -1
  61. package/src/lang/pt-BR.ts +49 -1
  62. package/src/lang/zh.ts +49 -1
  63. package/src/plugins/manageSkills/View.vue +795 -30
  64. package/src/plugins/manageSkills/categories.ts +125 -0
  65. package/src/plugins/manageSkills/meta.ts +30 -0
  66. package/src/plugins/markdown/definition.ts +3 -3
  67. package/src/plugins/meta-types.ts +5 -0
  68. package/src/plugins/presentMulmoScript/Preview.vue +3 -3
  69. package/src/plugins/presentMulmoScript/View.vue +157 -33
  70. package/src/plugins/presentMulmoScript/meta.ts +4 -0
  71. package/src/plugins/scheduler/View.vue +45 -9
  72. package/src/plugins/scheduler/calendarDefinition.ts +6 -2
  73. package/src/plugins/scheduler/multiDayHelpers.ts +95 -0
  74. package/src/plugins/spreadsheet/View.vue +3 -3
  75. package/src/types/notification.ts +1 -1
  76. package/src/types/session.ts +6 -0
  77. package/src/types/sse.ts +5 -0
  78. package/src/types/toolCallHistory.ts +7 -0
  79. package/src/utils/agent/eventDispatch.ts +26 -5
  80. package/src/utils/agent/mcpHint.ts +50 -0
  81. package/src/utils/session/sessionEntries.ts +8 -32
  82. package/client/assets/PluginScopedRoot-YjvQq0Nn.js +0 -3
  83. package/client/assets/chunk-CernVdwh.js +0 -1
  84. package/client/assets/chunk-D8eiyYIV-CAXpUwLd.js +0 -1
  85. package/client/assets/index-BwrlMMHr.js +0 -5005
  86. package/client/assets/index-CvvNuegU.css +0 -2
  87. package/client/assets/material-symbols-outlined-BOZVWuR3.woff2 +0 -0
  88. package/client/assets/runtime-protocol-vue-C1To4M3t.js +0 -1
  89. package/client/assets/vue.runtime.esm-bundler-DQ8Kjjui.js +0 -4
  90. package/server/api/routes/notifications.ts +0 -195
  91. package/server/notifier/legacy-adapters.ts +0 -76
  92. package/src/composables/useSelectedResult.ts +0 -49
  93. /package/client/assets/{purify.es-Fx1Nqyry-Dwtk-9WZ.js → purify.es-Fx1Nqyry-B3aL7Uvj.js} +0 -0
  94. /package/client/assets/{typeof-DBp4T-Ny-CSr8wx1e.js → typeof-DBp4T-Ny-Bef7RiR_.js} +0 -0
@@ -0,0 +1,370 @@
1
+ // Test-only LLM backend. Loaded by `getActiveBackend()` only when
2
+ // `MULMOCLAUDE_FAKE_AGENT=1` (CI workflow boot wiring), and re-usable
3
+ // from unit tests via `setFakeResponse()` / `resetFakeResponse()`.
4
+ //
5
+ // Default behavior:
6
+ // - emits a synthesized `claudeSessionId` so the orchestrator's
7
+ // resume bookkeeping sees the same shape as a real run
8
+ // - short-circuits `/<slug>` slash-command turns by reading the
9
+ // seeded SKILL.md and echoing the canary marker line
10
+ // - emits the concatenated per-session message history as the
11
+ // assistant text reply, so context-recall tests (session L-12)
12
+ // see prior turn content
13
+ //
14
+ // Tool dispatch: when the user prompt matches a known shape (see
15
+ // detectToolCalls), fake-echo emits the corresponding tool_call
16
+ // AND posts the args to the same internal plugin endpoint the MCP
17
+ // bridge would use under real Claude (see PLUGIN_ENDPOINTS). The
18
+ // handler runs unmodified, the artifact lands on disk, and the
19
+ // canvas mounts the plugin View — fake at the LLM seam only, real
20
+ // from the tool dispatch downward. Tests that need an LLM that
21
+ // actually reasons (presentForm field design, agent-driven slug
22
+ // choice in skill creation, etc.) still stay gated on
23
+ // `E2E_LIVE_NO_LLM=1`.
24
+
25
+ import { randomUUID } from "node:crypto";
26
+ import { readFile } from "node:fs/promises";
27
+ import path from "node:path";
28
+
29
+ import { getCurrentToken } from "../../api/auth/token.js";
30
+ import { makeUuid } from "../../utils/id.js";
31
+ import { API_ROUTES } from "../../../src/config/apiRoutes.js";
32
+ import { EVENT_TYPES } from "../../../src/types/events.js";
33
+ import { WORKSPACE_DIRS } from "../../workspace/paths.js";
34
+ import type { AgentEvent } from "../stream.js";
35
+ import type { AgentInput, LLMBackend } from "./types.js";
36
+
37
+ interface PluginEnvelope {
38
+ data?: unknown;
39
+ message?: unknown;
40
+ instructions?: unknown;
41
+ [key: string]: unknown;
42
+ }
43
+
44
+ export interface FakeToolCall {
45
+ toolName: string;
46
+ args: unknown;
47
+ /** Result string emitted in the matching `tool_call_result`.
48
+ * Defaults to `{ ok: true }` JSON. */
49
+ result?: string;
50
+ }
51
+
52
+ export interface FakeResponse {
53
+ /** Tool calls emitted before the text block. Default generator
54
+ * never emits any — tests that want tool events drive them
55
+ * through `setFakeResponse()`. */
56
+ toolCalls?: readonly FakeToolCall[];
57
+ /** Assistant text. Omit to skip the text event entirely. */
58
+ text?: string;
59
+ /** When set, emit a single `error` AgentEvent with this message
60
+ * and stop — mirrors what the claude-code backend does when the
61
+ * CLI exits non-zero (`readAgentEvents`). Tool calls / text that
62
+ * would otherwise follow are suppressed. */
63
+ error?: string;
64
+ /** Emit the `tool_call` for each `toolCalls` entry but NOT the
65
+ * paired `tool_call_result` — simulates a truncated / partial
66
+ * stream where the model died mid tool round-trip. */
67
+ omitToolResult?: boolean;
68
+ }
69
+
70
+ export type FakeResponseFn = (input: AgentInput) => FakeResponse | Promise<FakeResponse>;
71
+
72
+ // Per-session conversation memory so context-recall tests see prior
73
+ // turn content in the reply. Cleared by `resetFakeResponse()`.
74
+ const sessionTurns = new Map<string, string[]>();
75
+
76
+ async function defaultResponse(input: AgentInput): Promise<FakeResponse> {
77
+ // Slash-command turn shape: the SPA's "Run" button on a skill row
78
+ // (e2e-live L-22) starts a new chat with `/<slug>` as the only
79
+ // user message. Real Claude resolves this through its skill
80
+ // pipeline and uses the SKILL.md body as system prompt; here we
81
+ // short-circuit to read the seeded body and apply the
82
+ // "respond with this exact line" heuristic the e2e-live canaries
83
+ // rely on. Falls through to default echo on no match.
84
+ // Prompt-driven error trigger for e2e-live. The in-process
85
+ // `setFakeResponse()` knob is unreachable from a browser-driven
86
+ // spec (separate process), so the error-banner UI canary opts in
87
+ // by sending a message containing this exact marker. Prod never
88
+ // reaches fake-echo (real Claude backend) so this is inert there.
89
+ if (input.message.includes("__FAKE_ERROR__")) {
90
+ // Message text is rendered through marked() in the chat card,
91
+ // so keep it free of markdown-significant characters (no `__`,
92
+ // `*`, backticks) — the e2e-live canary asserts on a literal
93
+ // substring of this string.
94
+ return { error: "fake-echo forced error for the e2e-live error-banner canary" };
95
+ }
96
+
97
+ const slashMatch = input.message.trim().match(/^\/([a-z0-9][a-z0-9-]*)$/i);
98
+ if (slashMatch) {
99
+ const skillReply = await replyFromSeededSkill(input.workspacePath, slashMatch[1]);
100
+ if (skillReply !== null) return { text: skillReply };
101
+ }
102
+
103
+ const history = sessionTurns.get(input.sessionId) ?? [];
104
+ history.push(input.message);
105
+ sessionTurns.set(input.sessionId, history);
106
+
107
+ const toolCalls = detectToolCalls(input.message);
108
+ return {
109
+ toolCalls,
110
+ text: history.join("\n\n"),
111
+ };
112
+ }
113
+
114
+ // ── Tool-call pattern detectors ───────────────────────────────────
115
+ //
116
+ // Each detector matches one e2e-live prompt shape. The fake-echo
117
+ // loop below dispatches each detected call to the matching server-
118
+ // side plugin endpoint (see PLUGIN_ENDPOINTS) so the real handler
119
+ // runs, the artifact lands on disk, and the canvas mounts the View.
120
+ // Production never reaches this code path — gated by
121
+ // MULMOCLAUDE_FAKE_AGENT=1 at server boot.
122
+
123
+ function detectPresentMulmoScript(message: string): FakeToolCall | null {
124
+ if (!/presentMulmoScript/i.test(message)) return null;
125
+ const filePathMatch = message.match(/filePath:\s*["']([^"']+)["']/);
126
+ if (!filePathMatch) return null;
127
+ return { toolName: "presentMulmoScript", args: { filePath: filePathMatch[1] } };
128
+ }
129
+
130
+ function detectPresentHtml(message: string): FakeToolCall | null {
131
+ if (!/presentHtml/i.test(message)) return null;
132
+ const idx = message.indexOf("<");
133
+ if (idx < 0) return null;
134
+ // The handler expects a self-contained document; wrap if the
135
+ // prompt only supplies fragments (the spec's prompt does).
136
+ const fragment = message.slice(idx).trim();
137
+ const html = /^<!DOCTYPE/i.test(fragment) ? fragment : `<!DOCTYPE html><html><body>${fragment}</body></html>`;
138
+ return { toolName: "presentHtml", args: { html } };
139
+ }
140
+
141
+ function detectPresentForm(message: string): FakeToolCall | null {
142
+ if (!/presentForm/i.test(message)) return null;
143
+ const titleMatch = message.match(/titled\s+['"]([^'"]+)['"]/i);
144
+ const idMatch = message.match(/id\s*=\s*['"]([^'"]+)['"]/i);
145
+ const labelMatch = message.match(/label\s*=\s*['"]([^'"]+)['"]/i);
146
+ return {
147
+ toolName: "presentForm",
148
+ args: {
149
+ title: titleMatch?.[1] ?? "Quick check",
150
+ fields: [
151
+ {
152
+ id: idMatch?.[1] ?? "field1",
153
+ type: "text",
154
+ label: labelMatch?.[1] ?? "Field",
155
+ required: /required/i.test(message),
156
+ description: "auto-generated by fake-echo",
157
+ },
158
+ ],
159
+ },
160
+ };
161
+ }
162
+
163
+ function detectPresentChart(message: string): FakeToolCall | null {
164
+ if (!/presentChart/i.test(message)) return null;
165
+ const titleMatch = message.match(/titled\s+['"]([^'"]+)['"]/i);
166
+ const pairs = Array.from(message.matchAll(/\b([A-Za-z]{3,})\s+(\d{1,6})\b/g)).map(([, label, value]) => ({ label, value: Number(value) }));
167
+ const labels = pairs.length > 0 ? pairs.map((pair) => pair.label) : ["A", "B", "C"];
168
+ const values = pairs.length > 0 ? pairs.map((pair) => pair.value) : [1, 2, 3];
169
+ const title = titleMatch?.[1] ?? "Untitled";
170
+ return {
171
+ toolName: "presentChart",
172
+ args: {
173
+ document: {
174
+ title,
175
+ charts: [
176
+ {
177
+ title,
178
+ type: "bar",
179
+ option: {
180
+ xAxis: { type: "category", data: labels },
181
+ yAxis: { type: "value" },
182
+ series: [{ type: "bar", data: values }],
183
+ },
184
+ },
185
+ ],
186
+ },
187
+ },
188
+ };
189
+ }
190
+
191
+ function detectToolCalls(message: string): FakeToolCall[] | undefined {
192
+ const calls: FakeToolCall[] = [];
193
+ for (const detector of [detectPresentMulmoScript, detectPresentHtml, detectPresentForm, detectPresentChart]) {
194
+ const call = detector(message);
195
+ if (call) calls.push(call);
196
+ }
197
+ return calls.length > 0 ? calls : undefined;
198
+ }
199
+
200
+ // ── Plugin dispatch ───────────────────────────────────────────────
201
+ //
202
+ // Maps each fake-detected tool to the same internal API the MCP
203
+ // bridge would post to in a real run, so the actual server-side
204
+ // handler runs end-to-end: artifact saved, canvas slug returned.
205
+ // Anything not in this table falls back to a synthesized success
206
+ // envelope (caller can override via FakeToolCall.result).
207
+ const PLUGIN_ENDPOINTS: Readonly<Record<string, string>> = {
208
+ presentForm: "/api/form",
209
+ presentHtml: "/api/html",
210
+ presentChart: "/api/chart",
211
+ presentMulmoScript: "/api/mulmoScript/save",
212
+ };
213
+
214
+ // Mirrors what server/agent/mcp-server.ts#handleToolCall does for
215
+ // the real MCP bridge:
216
+ // 1. POST to the plugin endpoint to get the envelope back
217
+ // 2. If envelope.data is set, PUSH the envelope to
218
+ // /api/internal/tool-result — this is what surfaces the result
219
+ // to the canvas as a ToolResultComplete (toolName + uuid
220
+ // stamped by the bridge so the plugin can't impersonate).
221
+ // 3. Return the text representation (message + instructions) so
222
+ // the matching `tool_call_result` event carries something
223
+ // meaningful for the tool-call history pane.
224
+ async function dispatchToPlugin(call: FakeToolCall, port: number, chatSessionId: string): Promise<string> {
225
+ if (call.result !== undefined) return call.result;
226
+ const endpoint = PLUGIN_ENDPOINTS[call.toolName];
227
+ if (!endpoint) return '{"ok":true}';
228
+ const token = getCurrentToken();
229
+ const authHeaders: Record<string, string> = token ? { Authorization: `Bearer ${token}` } : {};
230
+ try {
231
+ const response = await fetch(`http://localhost:${port}${endpoint}`, {
232
+ method: "POST",
233
+ headers: { "Content-Type": "application/json", ...authHeaders },
234
+ body: JSON.stringify(call.args),
235
+ });
236
+ if (!response.ok) {
237
+ const errBody = await response.text();
238
+ return JSON.stringify({ error: `plugin ${call.toolName} returned ${response.status}: ${errBody.slice(0, 200)}` });
239
+ }
240
+ const envelope = ((await response.json()) ?? {}) as PluginEnvelope;
241
+ if (envelope.data !== undefined) {
242
+ // Query key is `session`, not `chatSessionId` — matches the
243
+ // `getSessionQuery(req)` reader and what the MCP bridge's
244
+ // postJson(...) helper passes (`?session=${SESSION_ID}`).
245
+ const toolResultUrl = `http://localhost:${port}${API_ROUTES.agent.internal.toolResult}?session=${encodeURIComponent(chatSessionId)}`;
246
+ const pushRes = await fetch(toolResultUrl, {
247
+ method: "POST",
248
+ headers: { "Content-Type": "application/json", ...authHeaders },
249
+ body: JSON.stringify({ ...envelope, toolName: call.toolName, uuid: makeUuid() }),
250
+ });
251
+ if (!pushRes.ok) {
252
+ // Fail loudly per codex review — a swallowed publish would
253
+ // leave the canvas blank while the chat reads "Done", which
254
+ // masks a real wiring break. Surface the failure as the
255
+ // tool result so the test fails loud instead of timing out
256
+ // on an absent View.
257
+ const errBody = await pushRes.text();
258
+ return JSON.stringify({
259
+ error: `tool-result push failed for ${call.toolName}: ${pushRes.status} ${errBody.slice(0, 200)}`,
260
+ });
261
+ }
262
+ }
263
+ const text: string[] = [];
264
+ if (typeof envelope.message === "string") text.push(envelope.message);
265
+ if (typeof envelope.instructions === "string") text.push(envelope.instructions);
266
+ return text.length > 0 ? text.join("\n") : "Done";
267
+ } catch (err) {
268
+ // Don't tear down the chat turn on plugin-dispatch failure —
269
+ // surface the error in the tool_result so the test sees it.
270
+ return JSON.stringify({ error: err instanceof Error ? err.message : String(err) });
271
+ }
272
+ }
273
+
274
+ // Look up a project-scope skill seeded by `placeProjectSkill` and
275
+ // extract the canary line the seeded body asks the model to echo
276
+ // back ("respond with this exact line and nothing else: X").
277
+ // Returns null when the file is missing or the marker shape is
278
+ // absent — caller falls through to default echo.
279
+ async function replyFromSeededSkill(workspacePath: string, slug: string): Promise<string | null> {
280
+ const skillFile = path.join(workspacePath, WORKSPACE_DIRS.claudeSkills, slug, "SKILL.md");
281
+ let body: string;
282
+ try {
283
+ body = await readFile(skillFile, "utf8");
284
+ } catch {
285
+ return null;
286
+ }
287
+ // Line-by-line scan to avoid backtracking surprises.
288
+ for (const line of body.split(/\r?\n/)) {
289
+ const match = line.match(/respond with this exact line(?: and nothing else)?:\s*(.+)/i);
290
+ if (match) return match[1].trim();
291
+ }
292
+ return null;
293
+ }
294
+
295
+ // ── Backend wiring ────────────────────────────────────────────────
296
+
297
+ let responseFn: FakeResponseFn = defaultResponse;
298
+
299
+ /** Replace the default echo + slash-command generator. Useful for
300
+ * unit tests that want full control over what the fake backend
301
+ * emits. Pair with `resetFakeResponse()` in teardown so the next
302
+ * test sees a clean state. */
303
+ export function setFakeResponse(generator: FakeResponseFn): void {
304
+ responseFn = generator;
305
+ }
306
+
307
+ /** Restore the default generator AND clear per-session history. */
308
+ export function resetFakeResponse(): void {
309
+ responseFn = defaultResponse;
310
+ sessionTurns.clear();
311
+ }
312
+
313
+ // Abort is checked between every yield. Real claude-code kills the
314
+ // subprocess on abort; the echo stub has no subprocess, so the
315
+ // faithful equivalent is "stop emitting immediately".
316
+ function aborted(input: AgentInput): boolean {
317
+ return input.abortSignal?.aborted === true;
318
+ }
319
+
320
+ async function* runFakeEchoAgent(input: AgentInput): AsyncGenerator<AgentEvent> {
321
+ if (aborted(input)) return;
322
+ yield { type: EVENT_TYPES.claudeSessionId, id: randomUUID() };
323
+
324
+ const response = await responseFn(input);
325
+
326
+ // Error short-circuit: surface the error and stop, exactly like
327
+ // the claude-code backend on a non-zero CLI exit.
328
+ if (response.error !== undefined) {
329
+ if (aborted(input)) return;
330
+ yield { type: EVENT_TYPES.error, message: response.error };
331
+ return;
332
+ }
333
+
334
+ for (const call of response.toolCalls ?? []) {
335
+ if (aborted(input)) return;
336
+ const toolUseId = `fake-${randomUUID()}`;
337
+ yield {
338
+ type: EVENT_TYPES.toolCall,
339
+ toolUseId,
340
+ toolName: call.toolName,
341
+ args: call.args,
342
+ };
343
+ // Partial-stream simulation: skip the result half.
344
+ if (response.omitToolResult) continue;
345
+ // Run the actual plugin handler AND push the envelope to
346
+ // /api/internal/tool-result so the canvas mounts the View — same
347
+ // two-step the MCP bridge does for real Claude.
348
+ const content = await dispatchToPlugin(call, input.port, input.sessionId);
349
+ if (aborted(input)) return;
350
+ yield {
351
+ type: EVENT_TYPES.toolCallResult,
352
+ toolUseId,
353
+ content,
354
+ };
355
+ }
356
+
357
+ if (response.text !== undefined && !aborted(input)) {
358
+ yield { type: EVENT_TYPES.text, message: response.text };
359
+ }
360
+ }
361
+
362
+ export const fakeEchoBackend: LLMBackend = {
363
+ id: "fake-echo",
364
+ // Resume-by-token / MCP aren't meaningfully replayable from a
365
+ // stub. Flag them unsupported so callers that depend on the real
366
+ // Claude semantics opt out instead of getting silently wrong
367
+ // behavior.
368
+ capabilities: { sessionResume: false, mcp: false },
369
+ runAgent: runFakeEchoAgent,
370
+ };
@@ -3,12 +3,27 @@
3
3
  // env / settings. Callers go through getActiveBackend() rather than
4
4
  // importing a concrete adapter so adding a backend doesn't require
5
5
  // touching every call site.
6
+ //
7
+ // Tests / CI swap in `fakeEchoBackend` via setActiveBackend() at
8
+ // server bootstrap; the decision is made once and read with zero
9
+ // per-call overhead by the agent orchestrator.
6
10
 
7
11
  import { claudeCodeBackend } from "./claude-code.js";
8
12
  import type { LLMBackend } from "./types.js";
9
13
 
10
14
  export type { AgentInput, BackendCapabilities, LLMBackend } from "./types.js";
11
15
 
16
+ let activeBackend: LLMBackend = claudeCodeBackend;
17
+
18
+ /** Replace the active backend. Intended for server-bootstrap wiring
19
+ * (e.g. CI sets `MULMOCLAUDE_FAKE_AGENT=1`, the boot script then
20
+ * passes `fakeEchoBackend` here). Not safe to call mid-flight — the
21
+ * in-flight agent generators have already captured the previous
22
+ * backend reference, and swapping under them would race. */
23
+ export function setActiveBackend(backend: LLMBackend): void {
24
+ activeBackend = backend;
25
+ }
26
+
12
27
  export function getActiveBackend(): LLMBackend {
13
- return claudeCodeBackend;
28
+ return activeBackend;
14
29
  }
@@ -10,6 +10,7 @@ import type { Attachment } from "@mulmobridge/protocol";
10
10
  import { isImageMime, isNativeAttachmentMime } from "@mulmobridge/client";
11
11
  import { convertAttachment } from "./attachmentConverter.js";
12
12
  import { log } from "../system/logger/index.js";
13
+ import { preflightUserServers, logPreflightResult } from "./mcpPreflight.js";
13
14
 
14
15
  export const CONTAINER_WORKSPACE_PATH = "/home/node/mulmoclaude";
15
16
 
@@ -73,8 +74,14 @@ function prepareUserStdioServer(spec: Extract<McpServerSpec, { type: "stdio" }>,
73
74
  }
74
75
 
75
76
  export function prepareUserServers(userServers: Record<string, McpServerSpec>, useDocker: boolean, hostWorkspacePath: string): Record<string, McpServerSpec> {
77
+ // Drop catalog-known entries that are missing required config (#1352).
78
+ // The dedup cache inside `logPreflightResult` keeps per-agent-run
79
+ // calls quiet so a Settings UI fix only logs once when it transitions
80
+ // missing → ok.
81
+ const preflight = preflightUserServers(userServers);
82
+ logPreflightResult(preflight, "agent-run");
76
83
  const out: Record<string, McpServerSpec> = {};
77
- for (const [serverId, spec] of Object.entries(userServers)) {
84
+ for (const [serverId, spec] of Object.entries(preflight.ready)) {
78
85
  if (spec.enabled === false) continue;
79
86
  if (spec.type === "http") {
80
87
  out[serverId] = prepareUserHttpServer(spec, useDocker);
@@ -0,0 +1,167 @@
1
+ // Runtime failure monitor for external MCP servers (#1353).
2
+ //
3
+ // `mcpPreflight.ts` (#1352) handles the static case: servers that
4
+ // can't even start because of missing required config. This module
5
+ // handles the dynamic case: servers that start OK but fail to answer
6
+ // tool calls (API key rotated, upstream down, OAuth scope wrong …).
7
+ //
8
+ // Signal source is `tool_result.is_error: true` from Claude Code's
9
+ // stream-json. `stream.ts` now forwards that flag as
10
+ // `AgentEvent.toolCallResult.isError`; this monitor consumes those
11
+ // events, attributes errors to the originating MCP server (parsed
12
+ // from `mcp__<server>__<tool>` names cached at toolCall time), and
13
+ // fires a single warn + bell notification per server once a
14
+ // consecutive-failure threshold is crossed.
15
+ //
16
+ // What the monitor intentionally does NOT do:
17
+ // - Restart the MCP server (operator's call).
18
+ // - Capture MCP subprocess stderr (Claude Agent SDK holds that —
19
+ // out of scope for #1353).
20
+ // - Auto-dismiss the bell entry when calls recover. The notification
21
+ // engine has no dismissal API exposed yet; future work can wire
22
+ // it once that lands. For now: bell stays until user dismisses.
23
+
24
+ import { EVENT_TYPES } from "../../src/types/events.js";
25
+ import { NOTIFICATION_ACTION_TYPES, NOTIFICATION_PRIORITIES } from "../../src/types/notification.js";
26
+ import { publishNotification } from "../events/notifications.js";
27
+ import { log } from "../system/logger/index.js";
28
+
29
+ export const MCP_FAILURE_THRESHOLD = 3;
30
+
31
+ // Server-id contract — mirrors `isMcpServerId` in
32
+ // `server/system/config.ts`. Single `_` is allowed; consecutive
33
+ // `__` is forbidden because it would collide with the
34
+ // `mcp__<server>__<tool>` delimiter and make the parser ambiguous
35
+ // (Codex iter-2 on #1356). Both ends agreeing on this shape is
36
+ // what lets the monitor attribute failures back to the right
37
+ // server entry in `mcp.json`.
38
+ const MCP_SERVER_ID_PATTERN = /^[a-z][a-z0-9_-]{0,63}$/;
39
+ const MCP_PREFIX = "mcp__";
40
+ const MCP_DELIM = "__";
41
+
42
+ function isValidServerId(value: string): boolean {
43
+ return MCP_SERVER_ID_PATTERN.test(value) && !value.includes("__");
44
+ }
45
+
46
+ /** Minimal AgentEvent surface the monitor needs. Defined locally to
47
+ * avoid a circular import; structurally matches the relevant fields
48
+ * on the real `AgentEvent` union. */
49
+ interface TrackableEvent {
50
+ type: string;
51
+ toolUseId?: string;
52
+ toolName?: string;
53
+ content?: string;
54
+ isError?: boolean;
55
+ }
56
+
57
+ interface ServerStats {
58
+ consecutiveFailures: number;
59
+ totalFailures: number;
60
+ totalCalls: number;
61
+ }
62
+
63
+ interface NotificationSink {
64
+ publish: typeof publishNotification;
65
+ warn: (event: string, message: string, data?: Record<string, unknown>) => void;
66
+ }
67
+
68
+ const defaultSink: NotificationSink = {
69
+ publish: publishNotification,
70
+ warn: (event, message, data) => log.warn(event, message, data),
71
+ };
72
+
73
+ /** Pure helper: returns the server id encoded in an MCP tool name,
74
+ * or `null` for non-MCP tools.
75
+ *
76
+ * Parses by string-split rather than a single regex so:
77
+ * - server ids containing `_` (allowed by `isMcpServerId`, e.g.
78
+ * `a1_b2`) attribute correctly. The first `__` after the
79
+ * `mcp__` prefix is treated as the server↔tool delimiter, so
80
+ * `mcp__a1_b2__do_thing` resolves to server `"a1_b2"`,
81
+ * tool-part `"do_thing"`.
82
+ * - no regex backtracking surface (Codex flagged ReDoS on the
83
+ * previous `[^_]+(?:_[^_]+)*` form; this fix uses split + a
84
+ * simple per-character validator instead). */
85
+ export function mcpServerFromToolName(toolName: string): string | null {
86
+ if (!toolName.startsWith(MCP_PREFIX)) return null;
87
+ const rest = toolName.slice(MCP_PREFIX.length);
88
+ const delim = rest.indexOf(MCP_DELIM);
89
+ if (delim <= 0) return null;
90
+ const serverId = rest.slice(0, delim);
91
+ // The tool-part is everything after the delimiter; it can carry
92
+ // `__` of its own (some MCP authors use `__` in tool names) — we
93
+ // only care that something is there.
94
+ if (rest.length <= delim + MCP_DELIM.length) return null;
95
+ return isValidServerId(serverId) ? serverId : null;
96
+ }
97
+
98
+ /** Build a session-scoped monitor. Returns the same shape as
99
+ * `createMcpTracker` so the backend wires both in the same loop.
100
+ *
101
+ * `sink` is injectable for tests so we don't need to mock the
102
+ * notification engine / logger globally. */
103
+ export function createMcpFailureMonitor(opts: { sink?: NotificationSink; threshold?: number } = {}): {
104
+ track: (event: TrackableEvent) => void;
105
+ } {
106
+ const sink = opts.sink ?? defaultSink;
107
+ const threshold = opts.threshold ?? MCP_FAILURE_THRESHOLD;
108
+ const toolUseIdToServer = new Map<string, string>();
109
+ const stats = new Map<string, ServerStats>();
110
+ const notified = new Set<string>();
111
+
112
+ function recordCall(toolUseId: string, toolName: string): void {
113
+ const server = mcpServerFromToolName(toolName);
114
+ if (server === null) return;
115
+ toolUseIdToServer.set(toolUseId, server);
116
+ }
117
+
118
+ function recordResult(toolUseId: string, isError: boolean): void {
119
+ const server = toolUseIdToServer.get(toolUseId);
120
+ if (server === undefined) return; // not an MCP call (or orphan result)
121
+ toolUseIdToServer.delete(toolUseId);
122
+ const entry = stats.get(server) ?? { consecutiveFailures: 0, totalFailures: 0, totalCalls: 0 };
123
+ entry.totalCalls += 1;
124
+ if (isError) {
125
+ entry.consecutiveFailures += 1;
126
+ entry.totalFailures += 1;
127
+ if (entry.consecutiveFailures >= threshold && !notified.has(server)) {
128
+ notified.add(server);
129
+ emitFailureNotice(server, entry, sink);
130
+ }
131
+ } else {
132
+ entry.consecutiveFailures = 0;
133
+ }
134
+ stats.set(server, entry);
135
+ }
136
+
137
+ return {
138
+ track(event: TrackableEvent): void {
139
+ if (event.type === EVENT_TYPES.toolCall && typeof event.toolUseId === "string" && typeof event.toolName === "string") {
140
+ recordCall(event.toolUseId, event.toolName);
141
+ } else if (event.type === EVENT_TYPES.toolCallResult && typeof event.toolUseId === "string") {
142
+ recordResult(event.toolUseId, event.isError === true);
143
+ }
144
+ },
145
+ };
146
+ }
147
+
148
+ function emitFailureNotice(server: string, entry: ServerStats, sink: NotificationSink): void {
149
+ const message = `MCP server ${server} returned errors on ${String(entry.consecutiveFailures)} consecutive tool calls; check API key, network, or upstream service health.`;
150
+ sink.warn("mcp", "subprocess appears broken — consecutive tool errors crossed threshold", {
151
+ server,
152
+ consecutiveFailures: entry.consecutiveFailures,
153
+ totalFailures: entry.totalFailures,
154
+ totalCalls: entry.totalCalls,
155
+ });
156
+ sink.publish({
157
+ // Deterministic id so the notification engine's legacyId dedup
158
+ // matches across restarts — bell entries from previous boots
159
+ // for the same broken server don't pile up.
160
+ id: `mcp-failure-${server}`,
161
+ kind: "system",
162
+ title: "MCP server failing",
163
+ body: message,
164
+ action: { type: NOTIFICATION_ACTION_TYPES.none },
165
+ priority: NOTIFICATION_PRIORITIES.high,
166
+ });
167
+ }