mulmoclaude 0.6.2 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -0
- package/bin/mulmoclaude.js +11 -1
- package/client/assets/chunk-D8eiyYIV-CW0rPbG2.js +1 -0
- package/client/assets/{html2canvas-CDGcmOD3-Bkf2uOth.js → html2canvas-CDGcmOD3-BjwfzAN8.js} +1 -1
- package/client/assets/index-Bp1owZ-i.js +5101 -0
- package/client/assets/index-c63H1pnd.css +2 -0
- package/client/assets/{index.es-DqtpmBm8-D9mAh_KQ.js → index.es-DqtpmBm8-DudYPW7R.js} +1 -1
- package/client/assets/material-symbols-outlined-C0dZ3SlO.woff2 +0 -0
- package/client/assets/runtime-protocol-vue-BUk5WXSy.js +1 -0
- package/client/assets/{runtime-vue-BVUzgYGA.js → runtime-vue-fFYhnNg3.js} +1 -1
- package/client/assets/{vue-C8UuIO9J.js → vue-Kqzpl9Vx.js} +1 -1
- package/client/assets/vue.runtime.esm-bundler-BTyIdNAI.js +4 -0
- package/client/index.html +9 -11
- package/package.json +5 -4
- package/server/agent/backend/claude-code.ts +34 -0
- package/server/agent/backend/fake-echo.ts +370 -0
- package/server/agent/backend/index.ts +16 -1
- package/server/agent/config.ts +8 -1
- package/server/agent/mcpFailureMonitor.ts +167 -0
- package/server/agent/mcpPreflight.ts +185 -0
- package/server/agent/stream.ts +12 -1
- package/server/api/routes/mulmo-script.ts +19 -1
- package/server/api/routes/schedulerHandlers.ts +52 -4
- package/server/api/routes/sessions.ts +15 -0
- package/server/api/routes/skills.ts +263 -0
- package/server/events/notifications.ts +19 -91
- package/server/index.ts +87 -9
- package/server/notifier/macosReminderAdapter.ts +30 -0
- package/server/system/announceOptionalDeps.ts +50 -0
- package/server/system/config.ts +8 -1
- package/server/system/docker.ts +14 -6
- package/server/system/env.ts +18 -5
- package/server/system/optionalDeps.ts +129 -0
- package/server/utils/cli-flags.d.mts +14 -0
- package/server/utils/cli-flags.mjs +53 -0
- package/server/utils/time.ts +6 -0
- package/server/workspace/helps/business.md +2 -2
- package/server/workspace/helps/mulmoscript.md +3 -3
- package/server/workspace/helps/sandbox.md +2 -2
- package/server/workspace/hooks/dispatcher.mjs +1 -1
- package/server/workspace/paths.ts +13 -4
- package/server/workspace/skills/catalog.ts +355 -0
- package/server/workspace/skills/external/catalog.ts +283 -0
- package/server/workspace/skills/external/clone.ts +129 -0
- package/server/workspace/skills/external/id.ts +194 -0
- package/server/workspace/skills/external/install.ts +417 -0
- package/server/workspace/skills/external/presets.ts +50 -0
- package/server/workspace/skills-preset.ts +29 -17
- package/server/workspace/workspace.ts +10 -5
- package/src/App.vue +19 -8
- package/src/components/RightSidebar.vue +19 -0
- package/src/components/StackView.vue +10 -1
- package/src/config/apiRoutes.ts +0 -6
- package/src/config/roles.ts +2 -0
- package/src/lang/de.ts +50 -1
- package/src/lang/en.ts +49 -1
- package/src/lang/es.ts +49 -1
- package/src/lang/fr.ts +49 -1
- package/src/lang/ja.ts +49 -1
- package/src/lang/ko.ts +49 -1
- package/src/lang/pt-BR.ts +49 -1
- package/src/lang/zh.ts +49 -1
- package/src/plugins/manageSkills/View.vue +795 -30
- package/src/plugins/manageSkills/categories.ts +125 -0
- package/src/plugins/manageSkills/meta.ts +30 -0
- package/src/plugins/markdown/definition.ts +3 -3
- package/src/plugins/meta-types.ts +5 -0
- package/src/plugins/presentMulmoScript/Preview.vue +3 -3
- package/src/plugins/presentMulmoScript/View.vue +157 -33
- package/src/plugins/presentMulmoScript/meta.ts +4 -0
- package/src/plugins/scheduler/View.vue +45 -9
- package/src/plugins/scheduler/calendarDefinition.ts +6 -2
- package/src/plugins/scheduler/multiDayHelpers.ts +95 -0
- package/src/plugins/spreadsheet/View.vue +3 -3
- package/src/types/notification.ts +1 -1
- package/src/types/session.ts +6 -0
- package/src/types/sse.ts +5 -0
- package/src/types/toolCallHistory.ts +7 -0
- package/src/utils/agent/eventDispatch.ts +26 -5
- package/src/utils/agent/mcpHint.ts +50 -0
- package/src/utils/session/sessionEntries.ts +8 -32
- package/client/assets/PluginScopedRoot-YjvQq0Nn.js +0 -3
- package/client/assets/chunk-CernVdwh.js +0 -1
- package/client/assets/chunk-D8eiyYIV-CAXpUwLd.js +0 -1
- package/client/assets/index-BwrlMMHr.js +0 -5005
- package/client/assets/index-CvvNuegU.css +0 -2
- package/client/assets/material-symbols-outlined-BOZVWuR3.woff2 +0 -0
- package/client/assets/runtime-protocol-vue-C1To4M3t.js +0 -1
- package/client/assets/vue.runtime.esm-bundler-DQ8Kjjui.js +0 -4
- package/server/api/routes/notifications.ts +0 -195
- package/server/notifier/legacy-adapters.ts +0 -76
- package/src/composables/useSelectedResult.ts +0 -49
- /package/client/assets/{purify.es-Fx1Nqyry-Dwtk-9WZ.js → purify.es-Fx1Nqyry-B3aL7Uvj.js} +0 -0
- /package/client/assets/{typeof-DBp4T-Ny-CSr8wx1e.js → typeof-DBp4T-Ny-Bef7RiR_.js} +0 -0
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
// Test-only LLM backend. Loaded by `getActiveBackend()` only when
|
|
2
|
+
// `MULMOCLAUDE_FAKE_AGENT=1` (CI workflow boot wiring), and re-usable
|
|
3
|
+
// from unit tests via `setFakeResponse()` / `resetFakeResponse()`.
|
|
4
|
+
//
|
|
5
|
+
// Default behavior:
|
|
6
|
+
// - emits a synthesized `claudeSessionId` so the orchestrator's
|
|
7
|
+
// resume bookkeeping sees the same shape as a real run
|
|
8
|
+
// - short-circuits `/<slug>` slash-command turns by reading the
|
|
9
|
+
// seeded SKILL.md and echoing the canary marker line
|
|
10
|
+
// - emits the concatenated per-session message history as the
|
|
11
|
+
// assistant text reply, so context-recall tests (session L-12)
|
|
12
|
+
// see prior turn content
|
|
13
|
+
//
|
|
14
|
+
// Tool dispatch: when the user prompt matches a known shape (see
|
|
15
|
+
// detectToolCalls), fake-echo emits the corresponding tool_call
|
|
16
|
+
// AND posts the args to the same internal plugin endpoint the MCP
|
|
17
|
+
// bridge would use under real Claude (see PLUGIN_ENDPOINTS). The
|
|
18
|
+
// handler runs unmodified, the artifact lands on disk, and the
|
|
19
|
+
// canvas mounts the plugin View — fake at the LLM seam only, real
|
|
20
|
+
// from the tool dispatch downward. Tests that need an LLM that
|
|
21
|
+
// actually reasons (presentForm field design, agent-driven slug
|
|
22
|
+
// choice in skill creation, etc.) still stay gated on
|
|
23
|
+
// `E2E_LIVE_NO_LLM=1`.
|
|
24
|
+
|
|
25
|
+
import { randomUUID } from "node:crypto";
|
|
26
|
+
import { readFile } from "node:fs/promises";
|
|
27
|
+
import path from "node:path";
|
|
28
|
+
|
|
29
|
+
import { getCurrentToken } from "../../api/auth/token.js";
|
|
30
|
+
import { makeUuid } from "../../utils/id.js";
|
|
31
|
+
import { API_ROUTES } from "../../../src/config/apiRoutes.js";
|
|
32
|
+
import { EVENT_TYPES } from "../../../src/types/events.js";
|
|
33
|
+
import { WORKSPACE_DIRS } from "../../workspace/paths.js";
|
|
34
|
+
import type { AgentEvent } from "../stream.js";
|
|
35
|
+
import type { AgentInput, LLMBackend } from "./types.js";
|
|
36
|
+
|
|
37
|
+
interface PluginEnvelope {
|
|
38
|
+
data?: unknown;
|
|
39
|
+
message?: unknown;
|
|
40
|
+
instructions?: unknown;
|
|
41
|
+
[key: string]: unknown;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export interface FakeToolCall {
|
|
45
|
+
toolName: string;
|
|
46
|
+
args: unknown;
|
|
47
|
+
/** Result string emitted in the matching `tool_call_result`.
|
|
48
|
+
* Defaults to `{ ok: true }` JSON. */
|
|
49
|
+
result?: string;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface FakeResponse {
|
|
53
|
+
/** Tool calls emitted before the text block. Default generator
|
|
54
|
+
* never emits any — tests that want tool events drive them
|
|
55
|
+
* through `setFakeResponse()`. */
|
|
56
|
+
toolCalls?: readonly FakeToolCall[];
|
|
57
|
+
/** Assistant text. Omit to skip the text event entirely. */
|
|
58
|
+
text?: string;
|
|
59
|
+
/** When set, emit a single `error` AgentEvent with this message
|
|
60
|
+
* and stop — mirrors what the claude-code backend does when the
|
|
61
|
+
* CLI exits non-zero (`readAgentEvents`). Tool calls / text that
|
|
62
|
+
* would otherwise follow are suppressed. */
|
|
63
|
+
error?: string;
|
|
64
|
+
/** Emit the `tool_call` for each `toolCalls` entry but NOT the
|
|
65
|
+
* paired `tool_call_result` — simulates a truncated / partial
|
|
66
|
+
* stream where the model died mid tool round-trip. */
|
|
67
|
+
omitToolResult?: boolean;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export type FakeResponseFn = (input: AgentInput) => FakeResponse | Promise<FakeResponse>;
|
|
71
|
+
|
|
72
|
+
// Per-session conversation memory so context-recall tests see prior
|
|
73
|
+
// turn content in the reply. Cleared by `resetFakeResponse()`.
|
|
74
|
+
const sessionTurns = new Map<string, string[]>();
|
|
75
|
+
|
|
76
|
+
async function defaultResponse(input: AgentInput): Promise<FakeResponse> {
|
|
77
|
+
// Slash-command turn shape: the SPA's "Run" button on a skill row
|
|
78
|
+
// (e2e-live L-22) starts a new chat with `/<slug>` as the only
|
|
79
|
+
// user message. Real Claude resolves this through its skill
|
|
80
|
+
// pipeline and uses the SKILL.md body as system prompt; here we
|
|
81
|
+
// short-circuit to read the seeded body and apply the
|
|
82
|
+
// "respond with this exact line" heuristic the e2e-live canaries
|
|
83
|
+
// rely on. Falls through to default echo on no match.
|
|
84
|
+
// Prompt-driven error trigger for e2e-live. The in-process
|
|
85
|
+
// `setFakeResponse()` knob is unreachable from a browser-driven
|
|
86
|
+
// spec (separate process), so the error-banner UI canary opts in
|
|
87
|
+
// by sending a message containing this exact marker. Prod never
|
|
88
|
+
// reaches fake-echo (real Claude backend) so this is inert there.
|
|
89
|
+
if (input.message.includes("__FAKE_ERROR__")) {
|
|
90
|
+
// Message text is rendered through marked() in the chat card,
|
|
91
|
+
// so keep it free of markdown-significant characters (no `__`,
|
|
92
|
+
// `*`, backticks) — the e2e-live canary asserts on a literal
|
|
93
|
+
// substring of this string.
|
|
94
|
+
return { error: "fake-echo forced error for the e2e-live error-banner canary" };
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const slashMatch = input.message.trim().match(/^\/([a-z0-9][a-z0-9-]*)$/i);
|
|
98
|
+
if (slashMatch) {
|
|
99
|
+
const skillReply = await replyFromSeededSkill(input.workspacePath, slashMatch[1]);
|
|
100
|
+
if (skillReply !== null) return { text: skillReply };
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const history = sessionTurns.get(input.sessionId) ?? [];
|
|
104
|
+
history.push(input.message);
|
|
105
|
+
sessionTurns.set(input.sessionId, history);
|
|
106
|
+
|
|
107
|
+
const toolCalls = detectToolCalls(input.message);
|
|
108
|
+
return {
|
|
109
|
+
toolCalls,
|
|
110
|
+
text: history.join("\n\n"),
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// ── Tool-call pattern detectors ───────────────────────────────────
|
|
115
|
+
//
|
|
116
|
+
// Each detector matches one e2e-live prompt shape. The fake-echo
|
|
117
|
+
// loop below dispatches each detected call to the matching server-
|
|
118
|
+
// side plugin endpoint (see PLUGIN_ENDPOINTS) so the real handler
|
|
119
|
+
// runs, the artifact lands on disk, and the canvas mounts the View.
|
|
120
|
+
// Production never reaches this code path — gated by
|
|
121
|
+
// MULMOCLAUDE_FAKE_AGENT=1 at server boot.
|
|
122
|
+
|
|
123
|
+
function detectPresentMulmoScript(message: string): FakeToolCall | null {
|
|
124
|
+
if (!/presentMulmoScript/i.test(message)) return null;
|
|
125
|
+
const filePathMatch = message.match(/filePath:\s*["']([^"']+)["']/);
|
|
126
|
+
if (!filePathMatch) return null;
|
|
127
|
+
return { toolName: "presentMulmoScript", args: { filePath: filePathMatch[1] } };
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function detectPresentHtml(message: string): FakeToolCall | null {
|
|
131
|
+
if (!/presentHtml/i.test(message)) return null;
|
|
132
|
+
const idx = message.indexOf("<");
|
|
133
|
+
if (idx < 0) return null;
|
|
134
|
+
// The handler expects a self-contained document; wrap if the
|
|
135
|
+
// prompt only supplies fragments (the spec's prompt does).
|
|
136
|
+
const fragment = message.slice(idx).trim();
|
|
137
|
+
const html = /^<!DOCTYPE/i.test(fragment) ? fragment : `<!DOCTYPE html><html><body>${fragment}</body></html>`;
|
|
138
|
+
return { toolName: "presentHtml", args: { html } };
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function detectPresentForm(message: string): FakeToolCall | null {
|
|
142
|
+
if (!/presentForm/i.test(message)) return null;
|
|
143
|
+
const titleMatch = message.match(/titled\s+['"]([^'"]+)['"]/i);
|
|
144
|
+
const idMatch = message.match(/id\s*=\s*['"]([^'"]+)['"]/i);
|
|
145
|
+
const labelMatch = message.match(/label\s*=\s*['"]([^'"]+)['"]/i);
|
|
146
|
+
return {
|
|
147
|
+
toolName: "presentForm",
|
|
148
|
+
args: {
|
|
149
|
+
title: titleMatch?.[1] ?? "Quick check",
|
|
150
|
+
fields: [
|
|
151
|
+
{
|
|
152
|
+
id: idMatch?.[1] ?? "field1",
|
|
153
|
+
type: "text",
|
|
154
|
+
label: labelMatch?.[1] ?? "Field",
|
|
155
|
+
required: /required/i.test(message),
|
|
156
|
+
description: "auto-generated by fake-echo",
|
|
157
|
+
},
|
|
158
|
+
],
|
|
159
|
+
},
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function detectPresentChart(message: string): FakeToolCall | null {
|
|
164
|
+
if (!/presentChart/i.test(message)) return null;
|
|
165
|
+
const titleMatch = message.match(/titled\s+['"]([^'"]+)['"]/i);
|
|
166
|
+
const pairs = Array.from(message.matchAll(/\b([A-Za-z]{3,})\s+(\d{1,6})\b/g)).map(([, label, value]) => ({ label, value: Number(value) }));
|
|
167
|
+
const labels = pairs.length > 0 ? pairs.map((pair) => pair.label) : ["A", "B", "C"];
|
|
168
|
+
const values = pairs.length > 0 ? pairs.map((pair) => pair.value) : [1, 2, 3];
|
|
169
|
+
const title = titleMatch?.[1] ?? "Untitled";
|
|
170
|
+
return {
|
|
171
|
+
toolName: "presentChart",
|
|
172
|
+
args: {
|
|
173
|
+
document: {
|
|
174
|
+
title,
|
|
175
|
+
charts: [
|
|
176
|
+
{
|
|
177
|
+
title,
|
|
178
|
+
type: "bar",
|
|
179
|
+
option: {
|
|
180
|
+
xAxis: { type: "category", data: labels },
|
|
181
|
+
yAxis: { type: "value" },
|
|
182
|
+
series: [{ type: "bar", data: values }],
|
|
183
|
+
},
|
|
184
|
+
},
|
|
185
|
+
],
|
|
186
|
+
},
|
|
187
|
+
},
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function detectToolCalls(message: string): FakeToolCall[] | undefined {
|
|
192
|
+
const calls: FakeToolCall[] = [];
|
|
193
|
+
for (const detector of [detectPresentMulmoScript, detectPresentHtml, detectPresentForm, detectPresentChart]) {
|
|
194
|
+
const call = detector(message);
|
|
195
|
+
if (call) calls.push(call);
|
|
196
|
+
}
|
|
197
|
+
return calls.length > 0 ? calls : undefined;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// ── Plugin dispatch ───────────────────────────────────────────────
|
|
201
|
+
//
|
|
202
|
+
// Maps each fake-detected tool to the same internal API the MCP
|
|
203
|
+
// bridge would post to in a real run, so the actual server-side
|
|
204
|
+
// handler runs end-to-end: artifact saved, canvas slug returned.
|
|
205
|
+
// Anything not in this table falls back to a synthesized success
|
|
206
|
+
// envelope (caller can override via FakeToolCall.result).
|
|
207
|
+
const PLUGIN_ENDPOINTS: Readonly<Record<string, string>> = {
|
|
208
|
+
presentForm: "/api/form",
|
|
209
|
+
presentHtml: "/api/html",
|
|
210
|
+
presentChart: "/api/chart",
|
|
211
|
+
presentMulmoScript: "/api/mulmoScript/save",
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
// Mirrors what server/agent/mcp-server.ts#handleToolCall does for
|
|
215
|
+
// the real MCP bridge:
|
|
216
|
+
// 1. POST to the plugin endpoint to get the envelope back
|
|
217
|
+
// 2. If envelope.data is set, PUSH the envelope to
|
|
218
|
+
// /api/internal/tool-result — this is what surfaces the result
|
|
219
|
+
// to the canvas as a ToolResultComplete (toolName + uuid
|
|
220
|
+
// stamped by the bridge so the plugin can't impersonate).
|
|
221
|
+
// 3. Return the text representation (message + instructions) so
|
|
222
|
+
// the matching `tool_call_result` event carries something
|
|
223
|
+
// meaningful for the tool-call history pane.
|
|
224
|
+
async function dispatchToPlugin(call: FakeToolCall, port: number, chatSessionId: string): Promise<string> {
|
|
225
|
+
if (call.result !== undefined) return call.result;
|
|
226
|
+
const endpoint = PLUGIN_ENDPOINTS[call.toolName];
|
|
227
|
+
if (!endpoint) return '{"ok":true}';
|
|
228
|
+
const token = getCurrentToken();
|
|
229
|
+
const authHeaders: Record<string, string> = token ? { Authorization: `Bearer ${token}` } : {};
|
|
230
|
+
try {
|
|
231
|
+
const response = await fetch(`http://localhost:${port}${endpoint}`, {
|
|
232
|
+
method: "POST",
|
|
233
|
+
headers: { "Content-Type": "application/json", ...authHeaders },
|
|
234
|
+
body: JSON.stringify(call.args),
|
|
235
|
+
});
|
|
236
|
+
if (!response.ok) {
|
|
237
|
+
const errBody = await response.text();
|
|
238
|
+
return JSON.stringify({ error: `plugin ${call.toolName} returned ${response.status}: ${errBody.slice(0, 200)}` });
|
|
239
|
+
}
|
|
240
|
+
const envelope = ((await response.json()) ?? {}) as PluginEnvelope;
|
|
241
|
+
if (envelope.data !== undefined) {
|
|
242
|
+
// Query key is `session`, not `chatSessionId` — matches the
|
|
243
|
+
// `getSessionQuery(req)` reader and what the MCP bridge's
|
|
244
|
+
// postJson(...) helper passes (`?session=${SESSION_ID}`).
|
|
245
|
+
const toolResultUrl = `http://localhost:${port}${API_ROUTES.agent.internal.toolResult}?session=${encodeURIComponent(chatSessionId)}`;
|
|
246
|
+
const pushRes = await fetch(toolResultUrl, {
|
|
247
|
+
method: "POST",
|
|
248
|
+
headers: { "Content-Type": "application/json", ...authHeaders },
|
|
249
|
+
body: JSON.stringify({ ...envelope, toolName: call.toolName, uuid: makeUuid() }),
|
|
250
|
+
});
|
|
251
|
+
if (!pushRes.ok) {
|
|
252
|
+
// Fail loudly per codex review — a swallowed publish would
|
|
253
|
+
// leave the canvas blank while the chat reads "Done", which
|
|
254
|
+
// masks a real wiring break. Surface the failure as the
|
|
255
|
+
// tool result so the test fails loud instead of timing out
|
|
256
|
+
// on an absent View.
|
|
257
|
+
const errBody = await pushRes.text();
|
|
258
|
+
return JSON.stringify({
|
|
259
|
+
error: `tool-result push failed for ${call.toolName}: ${pushRes.status} ${errBody.slice(0, 200)}`,
|
|
260
|
+
});
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
const text: string[] = [];
|
|
264
|
+
if (typeof envelope.message === "string") text.push(envelope.message);
|
|
265
|
+
if (typeof envelope.instructions === "string") text.push(envelope.instructions);
|
|
266
|
+
return text.length > 0 ? text.join("\n") : "Done";
|
|
267
|
+
} catch (err) {
|
|
268
|
+
// Don't tear down the chat turn on plugin-dispatch failure —
|
|
269
|
+
// surface the error in the tool_result so the test sees it.
|
|
270
|
+
return JSON.stringify({ error: err instanceof Error ? err.message : String(err) });
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Look up a project-scope skill seeded by `placeProjectSkill` and
|
|
275
|
+
// extract the canary line the seeded body asks the model to echo
|
|
276
|
+
// back ("respond with this exact line and nothing else: X").
|
|
277
|
+
// Returns null when the file is missing or the marker shape is
|
|
278
|
+
// absent — caller falls through to default echo.
|
|
279
|
+
async function replyFromSeededSkill(workspacePath: string, slug: string): Promise<string | null> {
|
|
280
|
+
const skillFile = path.join(workspacePath, WORKSPACE_DIRS.claudeSkills, slug, "SKILL.md");
|
|
281
|
+
let body: string;
|
|
282
|
+
try {
|
|
283
|
+
body = await readFile(skillFile, "utf8");
|
|
284
|
+
} catch {
|
|
285
|
+
return null;
|
|
286
|
+
}
|
|
287
|
+
// Line-by-line scan to avoid backtracking surprises.
|
|
288
|
+
for (const line of body.split(/\r?\n/)) {
|
|
289
|
+
const match = line.match(/respond with this exact line(?: and nothing else)?:\s*(.+)/i);
|
|
290
|
+
if (match) return match[1].trim();
|
|
291
|
+
}
|
|
292
|
+
return null;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// ── Backend wiring ────────────────────────────────────────────────
|
|
296
|
+
|
|
297
|
+
let responseFn: FakeResponseFn = defaultResponse;
|
|
298
|
+
|
|
299
|
+
/** Replace the default echo + slash-command generator. Useful for
|
|
300
|
+
* unit tests that want full control over what the fake backend
|
|
301
|
+
* emits. Pair with `resetFakeResponse()` in teardown so the next
|
|
302
|
+
* test sees a clean state. */
|
|
303
|
+
export function setFakeResponse(generator: FakeResponseFn): void {
|
|
304
|
+
responseFn = generator;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/** Restore the default generator AND clear per-session history. */
|
|
308
|
+
export function resetFakeResponse(): void {
|
|
309
|
+
responseFn = defaultResponse;
|
|
310
|
+
sessionTurns.clear();
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// Abort is checked between every yield. Real claude-code kills the
|
|
314
|
+
// subprocess on abort; the echo stub has no subprocess, so the
|
|
315
|
+
// faithful equivalent is "stop emitting immediately".
|
|
316
|
+
function aborted(input: AgentInput): boolean {
|
|
317
|
+
return input.abortSignal?.aborted === true;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
async function* runFakeEchoAgent(input: AgentInput): AsyncGenerator<AgentEvent> {
|
|
321
|
+
if (aborted(input)) return;
|
|
322
|
+
yield { type: EVENT_TYPES.claudeSessionId, id: randomUUID() };
|
|
323
|
+
|
|
324
|
+
const response = await responseFn(input);
|
|
325
|
+
|
|
326
|
+
// Error short-circuit: surface the error and stop, exactly like
|
|
327
|
+
// the claude-code backend on a non-zero CLI exit.
|
|
328
|
+
if (response.error !== undefined) {
|
|
329
|
+
if (aborted(input)) return;
|
|
330
|
+
yield { type: EVENT_TYPES.error, message: response.error };
|
|
331
|
+
return;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
for (const call of response.toolCalls ?? []) {
|
|
335
|
+
if (aborted(input)) return;
|
|
336
|
+
const toolUseId = `fake-${randomUUID()}`;
|
|
337
|
+
yield {
|
|
338
|
+
type: EVENT_TYPES.toolCall,
|
|
339
|
+
toolUseId,
|
|
340
|
+
toolName: call.toolName,
|
|
341
|
+
args: call.args,
|
|
342
|
+
};
|
|
343
|
+
// Partial-stream simulation: skip the result half.
|
|
344
|
+
if (response.omitToolResult) continue;
|
|
345
|
+
// Run the actual plugin handler AND push the envelope to
|
|
346
|
+
// /api/internal/tool-result so the canvas mounts the View — same
|
|
347
|
+
// two-step the MCP bridge does for real Claude.
|
|
348
|
+
const content = await dispatchToPlugin(call, input.port, input.sessionId);
|
|
349
|
+
if (aborted(input)) return;
|
|
350
|
+
yield {
|
|
351
|
+
type: EVENT_TYPES.toolCallResult,
|
|
352
|
+
toolUseId,
|
|
353
|
+
content,
|
|
354
|
+
};
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
if (response.text !== undefined && !aborted(input)) {
|
|
358
|
+
yield { type: EVENT_TYPES.text, message: response.text };
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
export const fakeEchoBackend: LLMBackend = {
|
|
363
|
+
id: "fake-echo",
|
|
364
|
+
// Resume-by-token / MCP aren't meaningfully replayable from a
|
|
365
|
+
// stub. Flag them unsupported so callers that depend on the real
|
|
366
|
+
// Claude semantics opt out instead of getting silently wrong
|
|
367
|
+
// behavior.
|
|
368
|
+
capabilities: { sessionResume: false, mcp: false },
|
|
369
|
+
runAgent: runFakeEchoAgent,
|
|
370
|
+
};
|
|
@@ -3,12 +3,27 @@
|
|
|
3
3
|
// env / settings. Callers go through getActiveBackend() rather than
|
|
4
4
|
// importing a concrete adapter so adding a backend doesn't require
|
|
5
5
|
// touching every call site.
|
|
6
|
+
//
|
|
7
|
+
// Tests / CI swap in `fakeEchoBackend` via setActiveBackend() at
|
|
8
|
+
// server bootstrap; the decision is made once and read with zero
|
|
9
|
+
// per-call overhead by the agent orchestrator.
|
|
6
10
|
|
|
7
11
|
import { claudeCodeBackend } from "./claude-code.js";
|
|
8
12
|
import type { LLMBackend } from "./types.js";
|
|
9
13
|
|
|
10
14
|
export type { AgentInput, BackendCapabilities, LLMBackend } from "./types.js";
|
|
11
15
|
|
|
16
|
+
let activeBackend: LLMBackend = claudeCodeBackend;
|
|
17
|
+
|
|
18
|
+
/** Replace the active backend. Intended for server-bootstrap wiring
|
|
19
|
+
* (e.g. CI sets `MULMOCLAUDE_FAKE_AGENT=1`, the boot script then
|
|
20
|
+
* passes `fakeEchoBackend` here). Not safe to call mid-flight — the
|
|
21
|
+
* in-flight agent generators have already captured the previous
|
|
22
|
+
* backend reference, and swapping under them would race. */
|
|
23
|
+
export function setActiveBackend(backend: LLMBackend): void {
|
|
24
|
+
activeBackend = backend;
|
|
25
|
+
}
|
|
26
|
+
|
|
12
27
|
export function getActiveBackend(): LLMBackend {
|
|
13
|
-
return
|
|
28
|
+
return activeBackend;
|
|
14
29
|
}
|
package/server/agent/config.ts
CHANGED
|
@@ -10,6 +10,7 @@ import type { Attachment } from "@mulmobridge/protocol";
|
|
|
10
10
|
import { isImageMime, isNativeAttachmentMime } from "@mulmobridge/client";
|
|
11
11
|
import { convertAttachment } from "./attachmentConverter.js";
|
|
12
12
|
import { log } from "../system/logger/index.js";
|
|
13
|
+
import { preflightUserServers, logPreflightResult } from "./mcpPreflight.js";
|
|
13
14
|
|
|
14
15
|
export const CONTAINER_WORKSPACE_PATH = "/home/node/mulmoclaude";
|
|
15
16
|
|
|
@@ -73,8 +74,14 @@ function prepareUserStdioServer(spec: Extract<McpServerSpec, { type: "stdio" }>,
|
|
|
73
74
|
}
|
|
74
75
|
|
|
75
76
|
export function prepareUserServers(userServers: Record<string, McpServerSpec>, useDocker: boolean, hostWorkspacePath: string): Record<string, McpServerSpec> {
|
|
77
|
+
// Drop catalog-known entries that are missing required config (#1352).
|
|
78
|
+
// The dedup cache inside `logPreflightResult` keeps per-agent-run
|
|
79
|
+
// calls quiet so a Settings UI fix only logs once when it transitions
|
|
80
|
+
// missing → ok.
|
|
81
|
+
const preflight = preflightUserServers(userServers);
|
|
82
|
+
logPreflightResult(preflight, "agent-run");
|
|
76
83
|
const out: Record<string, McpServerSpec> = {};
|
|
77
|
-
for (const [serverId, spec] of Object.entries(
|
|
84
|
+
for (const [serverId, spec] of Object.entries(preflight.ready)) {
|
|
78
85
|
if (spec.enabled === false) continue;
|
|
79
86
|
if (spec.type === "http") {
|
|
80
87
|
out[serverId] = prepareUserHttpServer(spec, useDocker);
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
// Runtime failure monitor for external MCP servers (#1353).
|
|
2
|
+
//
|
|
3
|
+
// `mcpPreflight.ts` (#1352) handles the static case: servers that
|
|
4
|
+
// can't even start because of missing required config. This module
|
|
5
|
+
// handles the dynamic case: servers that start OK but fail to answer
|
|
6
|
+
// tool calls (API key rotated, upstream down, OAuth scope wrong …).
|
|
7
|
+
//
|
|
8
|
+
// Signal source is `tool_result.is_error: true` from Claude Code's
|
|
9
|
+
// stream-json. `stream.ts` now forwards that flag as
|
|
10
|
+
// `AgentEvent.toolCallResult.isError`; this monitor consumes those
|
|
11
|
+
// events, attributes errors to the originating MCP server (parsed
|
|
12
|
+
// from `mcp__<server>__<tool>` names cached at toolCall time), and
|
|
13
|
+
// fires a single warn + bell notification per server once a
|
|
14
|
+
// consecutive-failure threshold is crossed.
|
|
15
|
+
//
|
|
16
|
+
// What the monitor intentionally does NOT do:
|
|
17
|
+
// - Restart the MCP server (operator's call).
|
|
18
|
+
// - Capture MCP subprocess stderr (Claude Agent SDK holds that —
|
|
19
|
+
// out of scope for #1353).
|
|
20
|
+
// - Auto-dismiss the bell entry when calls recover. The notification
|
|
21
|
+
// engine has no dismissal API exposed yet; future work can wire
|
|
22
|
+
// it once that lands. For now: bell stays until user dismisses.
|
|
23
|
+
|
|
24
|
+
import { EVENT_TYPES } from "../../src/types/events.js";
|
|
25
|
+
import { NOTIFICATION_ACTION_TYPES, NOTIFICATION_PRIORITIES } from "../../src/types/notification.js";
|
|
26
|
+
import { publishNotification } from "../events/notifications.js";
|
|
27
|
+
import { log } from "../system/logger/index.js";
|
|
28
|
+
|
|
29
|
+
export const MCP_FAILURE_THRESHOLD = 3;
|
|
30
|
+
|
|
31
|
+
// Server-id contract — mirrors `isMcpServerId` in
|
|
32
|
+
// `server/system/config.ts`. Single `_` is allowed; consecutive
|
|
33
|
+
// `__` is forbidden because it would collide with the
|
|
34
|
+
// `mcp__<server>__<tool>` delimiter and make the parser ambiguous
|
|
35
|
+
// (Codex iter-2 on #1356). Both ends agreeing on this shape is
|
|
36
|
+
// what lets the monitor attribute failures back to the right
|
|
37
|
+
// server entry in `mcp.json`.
|
|
38
|
+
const MCP_SERVER_ID_PATTERN = /^[a-z][a-z0-9_-]{0,63}$/;
|
|
39
|
+
const MCP_PREFIX = "mcp__";
|
|
40
|
+
const MCP_DELIM = "__";
|
|
41
|
+
|
|
42
|
+
function isValidServerId(value: string): boolean {
|
|
43
|
+
return MCP_SERVER_ID_PATTERN.test(value) && !value.includes("__");
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Minimal AgentEvent surface the monitor needs. Defined locally to
|
|
47
|
+
* avoid a circular import; structurally matches the relevant fields
|
|
48
|
+
* on the real `AgentEvent` union. */
|
|
49
|
+
interface TrackableEvent {
|
|
50
|
+
type: string;
|
|
51
|
+
toolUseId?: string;
|
|
52
|
+
toolName?: string;
|
|
53
|
+
content?: string;
|
|
54
|
+
isError?: boolean;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
interface ServerStats {
|
|
58
|
+
consecutiveFailures: number;
|
|
59
|
+
totalFailures: number;
|
|
60
|
+
totalCalls: number;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
interface NotificationSink {
|
|
64
|
+
publish: typeof publishNotification;
|
|
65
|
+
warn: (event: string, message: string, data?: Record<string, unknown>) => void;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const defaultSink: NotificationSink = {
|
|
69
|
+
publish: publishNotification,
|
|
70
|
+
warn: (event, message, data) => log.warn(event, message, data),
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
/** Pure helper: returns the server id encoded in an MCP tool name,
|
|
74
|
+
* or `null` for non-MCP tools.
|
|
75
|
+
*
|
|
76
|
+
* Parses by string-split rather than a single regex so:
|
|
77
|
+
* - server ids containing `_` (allowed by `isMcpServerId`, e.g.
|
|
78
|
+
* `a1_b2`) attribute correctly. The first `__` after the
|
|
79
|
+
* `mcp__` prefix is treated as the server↔tool delimiter, so
|
|
80
|
+
* `mcp__a1_b2__do_thing` resolves to server `"a1_b2"`,
|
|
81
|
+
* tool-part `"do_thing"`.
|
|
82
|
+
* - no regex backtracking surface (Codex flagged ReDoS on the
|
|
83
|
+
* previous `[^_]+(?:_[^_]+)*` form; this fix uses split + a
|
|
84
|
+
* simple per-character validator instead). */
|
|
85
|
+
export function mcpServerFromToolName(toolName: string): string | null {
|
|
86
|
+
if (!toolName.startsWith(MCP_PREFIX)) return null;
|
|
87
|
+
const rest = toolName.slice(MCP_PREFIX.length);
|
|
88
|
+
const delim = rest.indexOf(MCP_DELIM);
|
|
89
|
+
if (delim <= 0) return null;
|
|
90
|
+
const serverId = rest.slice(0, delim);
|
|
91
|
+
// The tool-part is everything after the delimiter; it can carry
|
|
92
|
+
// `__` of its own (some MCP authors use `__` in tool names) — we
|
|
93
|
+
// only care that something is there.
|
|
94
|
+
if (rest.length <= delim + MCP_DELIM.length) return null;
|
|
95
|
+
return isValidServerId(serverId) ? serverId : null;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/** Build a session-scoped monitor. Returns the same shape as
|
|
99
|
+
* `createMcpTracker` so the backend wires both in the same loop.
|
|
100
|
+
*
|
|
101
|
+
* `sink` is injectable for tests so we don't need to mock the
|
|
102
|
+
* notification engine / logger globally. */
|
|
103
|
+
export function createMcpFailureMonitor(opts: { sink?: NotificationSink; threshold?: number } = {}): {
|
|
104
|
+
track: (event: TrackableEvent) => void;
|
|
105
|
+
} {
|
|
106
|
+
const sink = opts.sink ?? defaultSink;
|
|
107
|
+
const threshold = opts.threshold ?? MCP_FAILURE_THRESHOLD;
|
|
108
|
+
const toolUseIdToServer = new Map<string, string>();
|
|
109
|
+
const stats = new Map<string, ServerStats>();
|
|
110
|
+
const notified = new Set<string>();
|
|
111
|
+
|
|
112
|
+
function recordCall(toolUseId: string, toolName: string): void {
|
|
113
|
+
const server = mcpServerFromToolName(toolName);
|
|
114
|
+
if (server === null) return;
|
|
115
|
+
toolUseIdToServer.set(toolUseId, server);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function recordResult(toolUseId: string, isError: boolean): void {
|
|
119
|
+
const server = toolUseIdToServer.get(toolUseId);
|
|
120
|
+
if (server === undefined) return; // not an MCP call (or orphan result)
|
|
121
|
+
toolUseIdToServer.delete(toolUseId);
|
|
122
|
+
const entry = stats.get(server) ?? { consecutiveFailures: 0, totalFailures: 0, totalCalls: 0 };
|
|
123
|
+
entry.totalCalls += 1;
|
|
124
|
+
if (isError) {
|
|
125
|
+
entry.consecutiveFailures += 1;
|
|
126
|
+
entry.totalFailures += 1;
|
|
127
|
+
if (entry.consecutiveFailures >= threshold && !notified.has(server)) {
|
|
128
|
+
notified.add(server);
|
|
129
|
+
emitFailureNotice(server, entry, sink);
|
|
130
|
+
}
|
|
131
|
+
} else {
|
|
132
|
+
entry.consecutiveFailures = 0;
|
|
133
|
+
}
|
|
134
|
+
stats.set(server, entry);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return {
|
|
138
|
+
track(event: TrackableEvent): void {
|
|
139
|
+
if (event.type === EVENT_TYPES.toolCall && typeof event.toolUseId === "string" && typeof event.toolName === "string") {
|
|
140
|
+
recordCall(event.toolUseId, event.toolName);
|
|
141
|
+
} else if (event.type === EVENT_TYPES.toolCallResult && typeof event.toolUseId === "string") {
|
|
142
|
+
recordResult(event.toolUseId, event.isError === true);
|
|
143
|
+
}
|
|
144
|
+
},
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function emitFailureNotice(server: string, entry: ServerStats, sink: NotificationSink): void {
|
|
149
|
+
const message = `MCP server ${server} returned errors on ${String(entry.consecutiveFailures)} consecutive tool calls; check API key, network, or upstream service health.`;
|
|
150
|
+
sink.warn("mcp", "subprocess appears broken — consecutive tool errors crossed threshold", {
|
|
151
|
+
server,
|
|
152
|
+
consecutiveFailures: entry.consecutiveFailures,
|
|
153
|
+
totalFailures: entry.totalFailures,
|
|
154
|
+
totalCalls: entry.totalCalls,
|
|
155
|
+
});
|
|
156
|
+
sink.publish({
|
|
157
|
+
// Deterministic id so the notification engine's legacyId dedup
|
|
158
|
+
// matches across restarts — bell entries from previous boots
|
|
159
|
+
// for the same broken server don't pile up.
|
|
160
|
+
id: `mcp-failure-${server}`,
|
|
161
|
+
kind: "system",
|
|
162
|
+
title: "MCP server failing",
|
|
163
|
+
body: message,
|
|
164
|
+
action: { type: NOTIFICATION_ACTION_TYPES.none },
|
|
165
|
+
priority: NOTIFICATION_PRIORITIES.high,
|
|
166
|
+
});
|
|
167
|
+
}
|