@mandujs/mcp 0.28.2 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,154 +1,404 @@
1
- /**
2
- * `mandu_ate_run` — Phase A.2 agent-facing spec runner.
3
- *
4
- * Wraps `@mandujs/ate`'s `runSpec` behind the MCP tool surface.
5
- *
6
- * Semantics: execute a single spec file (Playwright or bun:test,
7
- * auto-detected from the path), then return the failure.v1-shaped
8
- * JSON — `{ status: "pass", ... }` on green, full failure envelope
9
- * on red. Shard argument is forwarded transparently.
10
- *
11
- * The handler validates the returned shape against the failure.v1
12
- * Zod schema on failure (cheap, catches translator regressions).
13
- * On pass we return the pass envelope as-is.
14
- *
15
- * Snake_case naming per §11 decision 4.
16
- */
17
- import type { Tool } from "@modelcontextprotocol/sdk/types.js";
18
- import { runSpec, failureV1Schema, type RunResult } from "@mandujs/ate";
19
-
20
- export const ateRunToolDefinitions: Tool[] = [
21
- {
22
- name: "mandu_ate_run",
23
- annotations: {
24
- readOnlyHint: false,
25
- },
26
- description:
27
- "Phase A.2 agent-native spec runner. Executes ONE spec file " +
28
- "(Playwright if the path matches tests/e2e/** or *.e2e.ts, otherwise bun:test) " +
29
- "and returns structured JSON. On pass: { status: 'pass', durationMs, assertions, graphVersion, runId }. " +
30
- "On fail: a failure.v1 envelope with discriminated `kind` (one of: selector_drift, " +
31
- "contract_mismatch, redirect_unexpected, hydration_timeout, rate_limit_exceeded, " +
32
- "csrf_invalid, fixture_missing, semantic_divergence), kind-specific `detail`, " +
33
- "`healing.auto[]` (deterministic replacements when confidence >= threshold), " +
34
- "`healing.requires_llm` (true for shape-level failures), `flakeScore`, `lastPassedAt`, " +
35
- "`graphVersion` (agent cache invalidation key), and trace/screenshot/dom artifacts " +
36
- "staged under .mandu/ate-artifacts/<runId>/. Use `shard: { current, total }` to " +
37
- "distribute across CI workers.",
38
- inputSchema: {
39
- type: "object",
40
- properties: {
41
- repoRoot: {
42
- type: "string",
43
- description: "Absolute path to the Mandu project root",
44
- },
45
- spec: {
46
- oneOf: [
47
- { type: "string" },
48
- {
49
- type: "object",
50
- properties: {
51
- path: { type: "string" },
52
- },
53
- required: ["path"],
54
- },
55
- ],
56
- description:
57
- "Spec file either a path string (relative to repoRoot) or { path }. " +
58
- "Runner is auto-detected from the path (Playwright vs bun:test).",
59
- },
60
- headed: {
61
- type: "boolean",
62
- description: "Playwright only run headed. Default: false (headless).",
63
- },
64
- trace: {
65
- type: "boolean",
66
- description: "Playwright only — capture trace. Default: true.",
67
- },
68
- shard: {
69
- type: "object",
70
- properties: {
71
- current: { type: "number", minimum: 1 },
72
- total: { type: "number", minimum: 1 },
73
- },
74
- required: ["current", "total"],
75
- description:
76
- "CI sharding — `current` is 1-based. Playwright receives --shard=current/total; " +
77
- "bun:test falls back to hash-based partitioning.",
78
- },
79
- },
80
- required: ["repoRoot", "spec"],
81
- },
82
- },
83
- ];
84
-
85
- export function ateRunTools(_projectRoot: string) {
86
- return {
87
- mandu_ate_run: async (args: Record<string, unknown>) => {
88
- const { repoRoot, spec, headed, trace, shard } = args as {
89
- repoRoot: string;
90
- spec: string | { path: string };
91
- headed?: boolean;
92
- trace?: boolean;
93
- shard?: { current: number; total: number };
94
- };
95
- if (!repoRoot || typeof repoRoot !== "string") {
96
- return { ok: false, error: "repoRoot is required" };
97
- }
98
- if (!spec) {
99
- return { ok: false, error: "spec is required" };
100
- }
101
- const specPath = typeof spec === "string" ? spec : spec?.path;
102
- if (!specPath || typeof specPath !== "string") {
103
- return { ok: false, error: "spec.path or spec string is required" };
104
- }
105
- if (shard) {
106
- if (
107
- typeof shard.current !== "number" ||
108
- typeof shard.total !== "number" ||
109
- shard.current < 1 ||
110
- shard.total < 1 ||
111
- shard.current > shard.total
112
- ) {
113
- return {
114
- ok: false,
115
- error: `invalid shard: ${JSON.stringify(shard)} (current must be 1..total)`,
116
- };
117
- }
118
- }
119
-
120
- let result: RunResult;
121
- try {
122
- result = await runSpec({
123
- repoRoot,
124
- spec: specPath,
125
- headed,
126
- trace,
127
- shard,
128
- });
129
- } catch (err) {
130
- return {
131
- ok: false,
132
- error: `runSpec failed: ${err instanceof Error ? err.message : String(err)}`,
133
- };
134
- }
135
-
136
- // On failure, re-validate the shape against failure.v1. The
137
- // runSpec path already does this, but re-checking at the MCP
138
- // boundary means a buggy translator is caught before the
139
- // payload crosses the wire.
140
- if (result.status === "fail") {
141
- const parsed = failureV1Schema.safeParse(result);
142
- if (!parsed.success) {
143
- return {
144
- ok: false,
145
- error: `runSpec emitted invalid failure.v1: ${parsed.error.issues[0]?.message ?? "schema mismatch"}`,
146
- result,
147
- };
148
- }
149
- return { ok: true, result: parsed.data };
150
- }
151
- return { ok: true, result };
152
- },
153
- };
154
- }
1
+ /**
2
+ * `mandu_ate_run` — Phase A.2 agent-facing spec runner.
3
+ *
4
+ * Wraps `@mandujs/ate`'s `runSpec` behind the MCP tool surface.
5
+ *
6
+ * Semantics: execute a single spec file (Playwright or bun:test,
7
+ * auto-detected from the path), then return the failure.v1-shaped
8
+ * JSON — `{ status: "pass", ... }` on green, full failure envelope
9
+ * on red. Shard argument is forwarded transparently.
10
+ *
11
+ * The handler validates the returned shape against the failure.v1
12
+ * Zod schema on failure (cheap, catches translator regressions).
13
+ * On pass we return the pass envelope as-is.
14
+ *
15
+ * Issue #238 wiring:
16
+ * - Subscribes to `eventBus.on("ate", ...)` for the duration of the
17
+ * run and forwards every `spec_done` as an MCP
18
+ * `notifications/progress`. Progress total is captured from the
19
+ * `run_start` event, progressToken defaults to the runId when the
20
+ * caller didn't supply a client token (graceful no-op in that
21
+ * case — the notification is still emitted through the server but
22
+ * without an actionable token).
23
+ * - On timeout / exec failure, writes a partial results.json under
24
+ * `.mandu/reports/run-<runId>/` so `mandu.ate.heal` stays reachable
25
+ * even when the 10-min watchdog killed the runner.
26
+ *
27
+ * Snake_case naming per §11 decision 4.
28
+ */
29
+ import type { Tool } from "@modelcontextprotocol/sdk/types.js";
30
+ import type { Server } from "@modelcontextprotocol/sdk/server/index.js";
31
+ import { mkdirSync, writeFileSync } from "node:fs";
32
+ import { join } from "node:path";
33
+ import {
34
+ runSpec,
35
+ failureV1Schema,
36
+ type RunResult,
37
+ type AteMonitorEvent,
38
+ type FailureV1,
39
+ } from "@mandujs/ate";
40
+ import { eventBus } from "@mandujs/core/observability";
41
+
42
+ export const ateRunToolDefinitions: Tool[] = [
43
+ {
44
+ name: "mandu_ate_run",
45
+ annotations: {
46
+ readOnlyHint: false,
47
+ },
48
+ description:
49
+ "Phase A.2 agent-native spec runner. Executes ONE spec file " +
50
+ "(Playwright if the path matches tests/e2e/** or *.e2e.ts, otherwise bun:test) " +
51
+ "and returns structured JSON. On pass: { status: 'pass', durationMs, assertions, graphVersion, runId }. " +
52
+ "On fail: a failure.v1 envelope with discriminated `kind` (one of: selector_drift, " +
53
+ "contract_mismatch, redirect_unexpected, hydration_timeout, rate_limit_exceeded, " +
54
+ "csrf_invalid, fixture_missing, semantic_divergence), kind-specific `detail`, " +
55
+ "`healing.auto[]` (deterministic replacements when confidence >= threshold), " +
56
+ "`healing.requires_llm` (true for shape-level failures), `flakeScore`, `lastPassedAt`, " +
57
+ "`graphVersion` (agent cache invalidation key), and trace/screenshot/dom artifacts " +
58
+ "staged under .mandu/ate-artifacts/<runId>/. Use `shard: { current, total }` to " +
59
+ "distribute across CI workers. Emits notifications/progress per spec_done event. " +
60
+ "On timeout / cancel, writes .mandu/reports/run-<runId>/results.json with partial state. " +
61
+ "Issue #237 — `grep` narrows execution to specific `test(...)` titles inside the " +
62
+ "selected spec (forwarded to Playwright --grep / bun:test --test-name-pattern). " +
63
+ "For batch / multi-spec runs use mandu.ate.run (which also accepts onlyFiles + onlyRoutes).",
64
+ inputSchema: {
65
+ type: "object",
66
+ properties: {
67
+ repoRoot: {
68
+ type: "string",
69
+ description: "Absolute path to the Mandu project root",
70
+ },
71
+ spec: {
72
+ oneOf: [
73
+ { type: "string" },
74
+ {
75
+ type: "object",
76
+ properties: {
77
+ path: { type: "string" },
78
+ },
79
+ required: ["path"],
80
+ },
81
+ ],
82
+ description:
83
+ "Spec file — either a path string (relative to repoRoot) or { path }. " +
84
+ "Runner is auto-detected from the path (Playwright vs bun:test).",
85
+ },
86
+ headed: {
87
+ type: "boolean",
88
+ description: "Playwright only run headed. Default: false (headless).",
89
+ },
90
+ trace: {
91
+ type: "boolean",
92
+ description: "Playwright only — capture trace. Default: true.",
93
+ },
94
+ grep: {
95
+ type: "string",
96
+ description:
97
+ "Issue #237 — pass-through to Playwright --grep / bun:test --test-name-pattern. " +
98
+ "Filters by test-block title within the selected spec.",
99
+ },
100
+ shard: {
101
+ type: "object",
102
+ properties: {
103
+ current: { type: "number", minimum: 1 },
104
+ total: { type: "number", minimum: 1 },
105
+ },
106
+ required: ["current", "total"],
107
+ description:
108
+ "CI sharding — `current` is 1-based. Playwright receives --shard=current/total; " +
109
+ "bun:test falls back to hash-based partitioning.",
110
+ },
111
+ progressToken: {
112
+ type: ["string", "number"],
113
+ description:
114
+ "Optional MCP progress token to associate with emitted notifications/progress. " +
115
+ "When omitted the runId is used as a fallback so progress events still correlate.",
116
+ },
117
+ },
118
+ required: ["repoRoot", "spec"],
119
+ },
120
+ },
121
+ ];
122
+
123
+ /**
124
+ * Partial-result envelope written to disk when a run is killed mid-way.
125
+ * Mirrors the shape heal/report consumers already know how to parse,
126
+ * plus the extra status/killedAt fields so downstream tooling can spot
127
+ * incomplete records without probing `mtime`.
128
+ */
129
+ export interface PartialRunResults {
130
+ runId: string;
131
+ status: "timed_out" | "cancelled" | "error";
132
+ graphVersion: string;
133
+ completedSpecs: Array<{
134
+ specPath: string;
135
+ status: "pass" | "fail" | "skip";
136
+ durationMs: number;
137
+ }>;
138
+ inProgressSpec: string | null;
139
+ failures: FailureV1[];
140
+ startedAt: string;
141
+ killedAt: string;
142
+ error?: string;
143
+ }
144
+
145
+ /**
146
+ * Write the partial-results record under `.mandu/reports/run-<runId>/`.
147
+ * Never throws — a write failure is logged via a noop since the caller
148
+ * has already decided the run is over.
149
+ */
150
+ export function writePartialResults(
151
+ repoRoot: string,
152
+ partial: PartialRunResults,
153
+ ): string | null {
154
+ try {
155
+ const dir = join(repoRoot, ".mandu", "reports", `run-${partial.runId}`);
156
+ mkdirSync(dir, { recursive: true });
157
+ const target = join(dir, "results.json");
158
+ writeFileSync(target, JSON.stringify(partial, null, 2), "utf8");
159
+ return target;
160
+ } catch {
161
+ return null;
162
+ }
163
+ }
164
+
165
+ /**
166
+ * Stateful accumulator + progress-notification pipe. Exposed as a
167
+ * factory so unit tests can drive the event handling path without
168
+ * depending on the timing of a live runSpec call.
169
+ *
170
+ * Subscribe by calling `handle()` for each incoming AteMonitorEvent;
171
+ * the corresponding progress notification fires synchronously via
172
+ * `sendProgress`. Snapshot the run state via `snapshot()` after kill
173
+ * to build a PartialRunResults.
174
+ */
175
+ export interface AteProgressTracker {
176
+ handle: (data: AteMonitorEvent) => void;
177
+ snapshot: () => {
178
+ runId: string | null;
179
+ graphVersion: string;
180
+ completedSpecs: PartialRunResults["completedSpecs"];
181
+ inProgressSpec: string | null;
182
+ failures: FailureV1[];
183
+ };
184
+ }
185
+
186
+ export function createAteProgressTracker(options: {
187
+ progressToken?: string | number;
188
+ sendProgress: (progress: number, total: number, message: string) => void | Promise<void>;
189
+ }): AteProgressTracker {
190
+ let runId: string | null = null;
191
+ let graphVersion = "";
192
+ let specTotal = 1;
193
+ let completedCount = 0;
194
+ let inProgressSpec: string | null = null;
195
+ const completedSpecs: PartialRunResults["completedSpecs"] = [];
196
+ const failures: FailureV1[] = [];
197
+
198
+ const fire = (progress: number, total: number, message: string) => {
199
+ try {
200
+ const res = options.sendProgress(progress, total, message);
201
+ if (res && typeof (res as Promise<void>).then === "function") {
202
+ (res as Promise<void>).catch(() => {
203
+ /* swallow */
204
+ });
205
+ }
206
+ } catch {
207
+ /* swallow */
208
+ }
209
+ };
210
+
211
+ return {
212
+ handle(data: AteMonitorEvent) {
213
+ try {
214
+ if (data.kind === "run_start") {
215
+ runId = data.runId;
216
+ graphVersion = data.graphVersion;
217
+ specTotal = Math.max(1, data.specPaths.length);
218
+ return;
219
+ }
220
+ if (data.kind === "spec_progress" && data.phase === "executing") {
221
+ inProgressSpec = data.specPath;
222
+ return;
223
+ }
224
+ if (data.kind === "failure_captured") {
225
+ failures.push(data.failure);
226
+ return;
227
+ }
228
+ if (data.kind === "spec_done") {
229
+ completedCount += 1;
230
+ inProgressSpec = null;
231
+ completedSpecs.push({
232
+ specPath: data.specPath,
233
+ status: data.status,
234
+ durationMs: data.durationMs,
235
+ });
236
+ const basename = data.specPath.split(/[\\/]/).pop() ?? data.specPath;
237
+ fire(
238
+ completedCount,
239
+ specTotal,
240
+ `[${completedCount}/${specTotal}] ${basename} ${data.status}`,
241
+ );
242
+ return;
243
+ }
244
+ if (data.kind === "run_end") {
245
+ fire(
246
+ specTotal,
247
+ specTotal,
248
+ `done — ${data.passed} pass, ${data.failed} fail, ${data.skipped} skip`,
249
+ );
250
+ return;
251
+ }
252
+ } catch {
253
+ /* swallow */
254
+ }
255
+ },
256
+ snapshot() {
257
+ return {
258
+ runId,
259
+ graphVersion,
260
+ completedSpecs,
261
+ inProgressSpec,
262
+ failures,
263
+ };
264
+ },
265
+ };
266
+ }
267
+
268
+ /**
269
+ * Build the handler factory. `server` is optional — tests that don't
270
+ * instantiate an MCP server (e.g. unit-level invocations) can pass
271
+ * `undefined` and progress notifications are silently no-oped.
272
+ */
273
+ export function ateRunTools(_projectRoot: string, server?: Server) {
274
+ return {
275
+ mandu_ate_run: async (args: Record<string, unknown>) => {
276
+ const { repoRoot, spec, headed, trace, shard, grep, progressToken } = args as {
277
+ repoRoot: string;
278
+ spec: string | { path: string };
279
+ headed?: boolean;
280
+ trace?: boolean;
281
+ shard?: { current: number; total: number };
282
+ grep?: string;
283
+ progressToken?: string | number;
284
+ };
285
+ if (!repoRoot || typeof repoRoot !== "string") {
286
+ return { ok: false, error: "repoRoot is required" };
287
+ }
288
+ if (!spec) {
289
+ return { ok: false, error: "spec is required" };
290
+ }
291
+ const specPath = typeof spec === "string" ? spec : spec?.path;
292
+ if (!specPath || typeof specPath !== "string") {
293
+ return { ok: false, error: "spec.path or spec string is required" };
294
+ }
295
+ if (shard) {
296
+ if (
297
+ typeof shard.current !== "number" ||
298
+ typeof shard.total !== "number" ||
299
+ shard.current < 1 ||
300
+ shard.total < 1 ||
301
+ shard.current > shard.total
302
+ ) {
303
+ return {
304
+ ok: false,
305
+ error: `invalid shard: ${JSON.stringify(shard)} (current must be 1..total)`,
306
+ };
307
+ }
308
+ }
309
+
310
+ // ── Event accumulator for progress + partial-results on timeout.
311
+ const started = new Date().toISOString();
312
+
313
+ const tracker = createAteProgressTracker({
314
+ progressToken,
315
+ sendProgress: async (progress, total, message) => {
316
+ if (!server) return;
317
+ const snap = tracker.snapshot();
318
+ const token = progressToken ?? snap.runId;
319
+ if (!token) return;
320
+ try {
321
+ await server.notification({
322
+ method: "notifications/progress",
323
+ params: { progressToken: token, progress, total, message },
324
+ });
325
+ } catch {
326
+ // Transport may be offline — never fail the run.
327
+ }
328
+ },
329
+ });
330
+
331
+ const unsubscribe = eventBus.on("ate", (event) => {
332
+ try {
333
+ const data = event.data as unknown as AteMonitorEvent | undefined;
334
+ if (!data || typeof data.kind !== "string") return;
335
+ tracker.handle(data);
336
+ } catch {
337
+ // Listener errors must never propagate.
338
+ }
339
+ });
340
+
341
+ let result: RunResult;
342
+ try {
343
+ result = await runSpec({
344
+ repoRoot,
345
+ spec: specPath,
346
+ headed,
347
+ trace,
348
+ shard,
349
+ grep,
350
+ });
351
+ } catch (err) {
352
+ // Runner timeout / exec error — persist partial state so heal
353
+ // stays reachable.
354
+ const message = err instanceof Error ? err.message : String(err);
355
+ const isTimeout = /timed out/i.test(message);
356
+ const snap = tracker.snapshot();
357
+ const partial: PartialRunResults = {
358
+ runId: snap.runId ?? `unknown-${Date.now()}`,
359
+ status: isTimeout ? "timed_out" : "error",
360
+ graphVersion: snap.graphVersion,
361
+ completedSpecs: snap.completedSpecs,
362
+ inProgressSpec: snap.inProgressSpec,
363
+ failures: snap.failures,
364
+ startedAt: started,
365
+ killedAt: new Date().toISOString(),
366
+ error: message,
367
+ };
368
+ const resultsPath = writePartialResults(repoRoot, partial);
369
+ unsubscribe();
370
+ return {
371
+ ok: false,
372
+ error: `runSpec failed: ${message}`,
373
+ partial,
374
+ resultsPath,
375
+ runId: partial.runId,
376
+ };
377
+ } finally {
378
+ // Runtime-safe even on success — idempotent unsubscribe.
379
+ try {
380
+ unsubscribe();
381
+ } catch {
382
+ /* no-op */
383
+ }
384
+ }
385
+
386
+ // On failure, re-validate the shape against failure.v1. The
387
+ // runSpec path already does this, but re-checking at the MCP
388
+ // boundary means a buggy translator is caught before the
389
+ // payload crosses the wire.
390
+ if (result.status === "fail") {
391
+ const parsed = failureV1Schema.safeParse(result);
392
+ if (!parsed.success) {
393
+ return {
394
+ ok: false,
395
+ error: `runSpec emitted invalid failure.v1: ${parsed.error.issues[0]?.message ?? "schema mismatch"}`,
396
+ result,
397
+ };
398
+ }
399
+ return { ok: true, result: parsed.data };
400
+ }
401
+ return { ok: true, result };
402
+ },
403
+ };
404
+ }