@mandujs/mcp 0.28.2 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mandujs/mcp",
3
- "version": "0.28.2",
3
+ "version": "0.29.0",
4
4
  "description": "Mandu MCP Server - Agent-native interface for Mandu framework operations",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -34,8 +34,8 @@
34
34
  "access": "public"
35
35
  },
36
36
  "dependencies": {
37
- "@mandujs/core": "^0.41.2",
38
- "@mandujs/ate": "^0.24.0",
37
+ "@mandujs/core": "^0.42.0",
38
+ "@mandujs/ate": "^0.25.0",
39
39
  "@mandujs/skills": "^0.18.0",
40
40
  "@modelcontextprotocol/sdk": "^1.25.3"
41
41
  },
@@ -9,6 +9,14 @@ import fs from "fs";
9
9
  import path from "path";
10
10
  import type { Subprocess } from "bun";
11
11
  import { eventBus } from "@mandujs/core/observability";
12
+ import type { AteMonitorEvent } from "@mandujs/ate";
13
+
14
+ /**
15
+ * Local alias — reserved in case we need to accept slightly looser
16
+ * shapes at the subscription boundary (forward-compat with events
17
+ * emitted by newer ATE versions). Today it is a direct re-export.
18
+ */
19
+ type AteMonitorEventShape = AteMonitorEvent;
12
20
 
13
21
  const TOOL_ICONS: Record<string, string> = {
14
22
  // Spec
@@ -60,6 +68,10 @@ const TOOL_ICONS: Record<string, string> = {
60
68
  mandu_add_client_slot: "CLIENT+",
61
69
  // Error
62
70
  mandu_analyze_error: "ERROR",
71
+ // ATE — display tokens for per-run/per-spec lifecycle events
72
+ "ate.run": "ATE-RUN",
73
+ "ate.pass": "ATE-PASS",
74
+ "ate.fail": "ATE-FAIL",
63
75
  };
64
76
 
65
77
  type MonitorSeverity = "info" | "warn" | "error";
@@ -281,6 +293,12 @@ export class ActivityMonitor {
281
293
  private toolStartTimes = new Map<string, number>();
282
294
  // Phase 5-1: 에이전트 세션 식별 (MCP 클라이언트별 추적)
283
295
  public sessionId: string = crypto.randomUUID();
296
+ // ATE monitor plumbing — subscription handle + per-run accumulator for
297
+ // artifacts (so run_end can summarize them) + per-spec failure kind
298
+ // cache (so spec_done can inline it).
299
+ private ateUnsubscribe: (() => void) | null = null;
300
+ private ateRunArtifacts = new Map<string, { count: number; dir?: string }>();
301
+ private ateSpecFailureKinds = new Map<string, string>();
284
302
 
285
303
  constructor(projectRoot: string) {
286
304
  this.projectRoot = projectRoot;
@@ -338,9 +356,25 @@ export class ActivityMonitor {
338
356
  if (this.config.openTerminal) {
339
357
  this.openTerminal();
340
358
  }
359
+
360
+ // Subscribe to ATE runner events — structured per-run progress,
361
+ // per-spec pass/fail, failure.v1 captures, artifact writes.
362
+ this.ateUnsubscribe = eventBus.on("ate", (event) => {
363
+ try {
364
+ const payload = event.data as unknown as AteMonitorEventShape | undefined;
365
+ if (!payload || typeof payload.kind !== "string") return;
366
+ this.handleAteEvent(payload);
367
+ } catch {
368
+ // Never let a bad payload tear the monitor down.
369
+ }
370
+ });
341
371
  }
342
372
 
343
373
  stop(): void {
374
+ if (this.ateUnsubscribe) {
375
+ this.ateUnsubscribe();
376
+ this.ateUnsubscribe = null;
377
+ }
344
378
  this.flush(true);
345
379
  if (this.tailProcess) {
346
380
  this.tailProcess.kill();
@@ -637,6 +671,148 @@ export class ActivityMonitor {
637
671
  }
638
672
  }
639
673
 
674
+ /**
675
+ * Render an ATE monitor event (run_start / spec_progress / spec_done /
676
+ * failure_captured / artifact_saved / run_end). Writes through the
677
+ * shared output path so both pretty + JSON modes work uniformly.
678
+ *
679
+ * Pretty mode policies:
680
+ * - `spec_progress` suppressed unless MANDU_ATE_VERBOSE=1 or the
681
+ * phase is `capturing_artifacts` (signal useful for debugging).
682
+ * - `artifact_saved` collected silently and summarized in run_end.
683
+ * - `spec_done(fail)` inlines the `failure.v1` kind when a matching
684
+ * `failure_captured` fired within the same spec.
685
+ */
686
+ private handleAteEvent(data: AteMonitorEventShape): void {
687
+ if (!this.logStream) return;
688
+
689
+ // JSON mode → verbatim line per event.
690
+ if (this.outputFormat === "json") {
691
+ const payload: MonitorEvent = {
692
+ ts: new Date().toISOString(),
693
+ type: `ate.${data.kind}`,
694
+ severity: this.ateSeverityFor(data),
695
+ source: "ate",
696
+ data: data as unknown as Record<string, unknown>,
697
+ };
698
+ const line = this.formatEvent(payload);
699
+ if (line) {
700
+ this.write(line);
701
+ this.updateSummary(payload);
702
+ }
703
+ return;
704
+ }
705
+
706
+ // Pretty mode — route per-kind.
707
+ const verbose = process.env.MANDU_ATE_VERBOSE === "1";
708
+ const time = getTime();
709
+
710
+ switch (data.kind) {
711
+ case "run_start": {
712
+ this.ateRunArtifacts.set(data.runId, { count: 0 });
713
+ const runIdShort = data.runId.slice(-8);
714
+ const line = `${time} > [ATE-RUN] ${runIdShort} starting (${data.specPaths.length} specs)\n`;
715
+ this.write(line);
716
+ this.updateSummary({
717
+ ts: new Date().toISOString(),
718
+ type: "ate.run_start",
719
+ severity: "info",
720
+ source: "ate",
721
+ });
722
+ return;
723
+ }
724
+ case "spec_progress": {
725
+ // Suppressed by default — too noisy. Render only when
726
+ // MANDU_ATE_VERBOSE=1 is set.
727
+ if (!verbose) return;
728
+ const line = `${time} [ATE] ${data.specPath} (${data.phase})\n`;
729
+ this.write(line);
730
+ return;
731
+ }
732
+ case "failure_captured": {
733
+ // Cache the failure kind so `spec_done` can inline it. Render
734
+ // nothing here — the line is attached to the spec_done row.
735
+ this.ateSpecFailureKinds.set(
736
+ `${data.runId}:${data.specPath}`,
737
+ data.failure.kind,
738
+ );
739
+ return;
740
+ }
741
+ case "spec_done": {
742
+ const secs = (data.durationMs / 1000).toFixed(1);
743
+ const file = data.specPath.split(/[\\/]/).pop() ?? data.specPath;
744
+ if (data.status === "pass") {
745
+ const line = `${time} + [ATE] ${file} (${secs}s)\n`;
746
+ this.write(line);
747
+ this.updateSummary({
748
+ ts: new Date().toISOString(),
749
+ type: "ate.spec_done",
750
+ severity: "info",
751
+ source: "ate",
752
+ });
753
+ } else if (data.status === "fail") {
754
+ const kindKey = `${data.runId}:${data.specPath}`;
755
+ const failureKind = this.ateSpecFailureKinds.get(kindKey);
756
+ this.ateSpecFailureKinds.delete(kindKey);
757
+ const suffix = failureKind ? ` [${failureKind}]` : "";
758
+ const line = `${time} x [ATE] ${file} (${secs}s)${suffix}\n`;
759
+ this.write(line);
760
+ this.updateSummary({
761
+ ts: new Date().toISOString(),
762
+ type: "ate.spec_done",
763
+ severity: "error",
764
+ source: "ate",
765
+ });
766
+ } else {
767
+ // skip
768
+ if (verbose) {
769
+ const line = `${time} [ATE] ${file} skipped\n`;
770
+ this.write(line);
771
+ }
772
+ }
773
+ return;
774
+ }
775
+ case "artifact_saved": {
776
+ // Accumulate silently; run_end summarizes.
777
+ const entry = this.ateRunArtifacts.get(data.runId) ?? { count: 0 };
778
+ entry.count += 1;
779
+ if (!entry.dir) {
780
+ const dir = path.dirname(data.path);
781
+ entry.dir = dir;
782
+ }
783
+ this.ateRunArtifacts.set(data.runId, entry);
784
+ return;
785
+ }
786
+ case "run_end": {
787
+ const runIdShort = data.runId.slice(-8);
788
+ const secs = (data.durationMs / 1000).toFixed(1);
789
+ const artifactInfo = this.ateRunArtifacts.get(data.runId);
790
+ this.ateRunArtifacts.delete(data.runId);
791
+ const artifactSuffix = artifactInfo && artifactInfo.count > 0 && artifactInfo.dir
792
+ ? `. artifacts: ${artifactInfo.dir}`
793
+ : "";
794
+ const line =
795
+ `${time} * [ATE-RUN] ${runIdShort} done — ` +
796
+ `${data.passed} pass, ${data.failed} fail, ${data.skipped} skip (${secs}s)${artifactSuffix}\n`;
797
+ this.write(line);
798
+ this.updateSummary({
799
+ ts: new Date().toISOString(),
800
+ type: "ate.run_end",
801
+ severity: data.failed > 0 ? "error" : "info",
802
+ source: "ate",
803
+ });
804
+ return;
805
+ }
806
+ }
807
+ }
808
+
809
+ private ateSeverityFor(data: AteMonitorEventShape): MonitorSeverity {
810
+ if (data.kind === "failure_captured") return "error";
811
+ if (data.kind === "spec_done" && data.status === "fail") return "error";
812
+ if (data.kind === "run_end" && data.failed > 0) return "error";
813
+ return "info";
814
+ }
815
+
640
816
  private enqueue(event: MonitorEvent): void {
641
817
  if (!this.logStream) return;
642
818
  const now = Date.now();
@@ -1,154 +1,393 @@
1
- /**
2
- * `mandu_ate_run` — Phase A.2 agent-facing spec runner.
3
- *
4
- * Wraps `@mandujs/ate`'s `runSpec` behind the MCP tool surface.
5
- *
6
- * Semantics: execute a single spec file (Playwright or bun:test,
7
- * auto-detected from the path), then return the failure.v1-shaped
8
- * JSON — `{ status: "pass", ... }` on green, full failure envelope
9
- * on red. Shard argument is forwarded transparently.
10
- *
11
- * The handler validates the returned shape against the failure.v1
12
- * Zod schema on failure (cheap, catches translator regressions).
13
- * On pass we return the pass envelope as-is.
14
- *
15
- * Snake_case naming per §11 decision 4.
16
- */
17
- import type { Tool } from "@modelcontextprotocol/sdk/types.js";
18
- import { runSpec, failureV1Schema, type RunResult } from "@mandujs/ate";
19
-
20
- export const ateRunToolDefinitions: Tool[] = [
21
- {
22
- name: "mandu_ate_run",
23
- annotations: {
24
- readOnlyHint: false,
25
- },
26
- description:
27
- "Phase A.2 agent-native spec runner. Executes ONE spec file " +
28
- "(Playwright if the path matches tests/e2e/** or *.e2e.ts, otherwise bun:test) " +
29
- "and returns structured JSON. On pass: { status: 'pass', durationMs, assertions, graphVersion, runId }. " +
30
- "On fail: a failure.v1 envelope with discriminated `kind` (one of: selector_drift, " +
31
- "contract_mismatch, redirect_unexpected, hydration_timeout, rate_limit_exceeded, " +
32
- "csrf_invalid, fixture_missing, semantic_divergence), kind-specific `detail`, " +
33
- "`healing.auto[]` (deterministic replacements when confidence >= threshold), " +
34
- "`healing.requires_llm` (true for shape-level failures), `flakeScore`, `lastPassedAt`, " +
35
- "`graphVersion` (agent cache invalidation key), and trace/screenshot/dom artifacts " +
36
- "staged under .mandu/ate-artifacts/<runId>/. Use `shard: { current, total }` to " +
37
- "distribute across CI workers.",
38
- inputSchema: {
39
- type: "object",
40
- properties: {
41
- repoRoot: {
42
- type: "string",
43
- description: "Absolute path to the Mandu project root",
44
- },
45
- spec: {
46
- oneOf: [
47
- { type: "string" },
48
- {
49
- type: "object",
50
- properties: {
51
- path: { type: "string" },
52
- },
53
- required: ["path"],
54
- },
55
- ],
56
- description:
57
- "Spec file either a path string (relative to repoRoot) or { path }. " +
58
- "Runner is auto-detected from the path (Playwright vs bun:test).",
59
- },
60
- headed: {
61
- type: "boolean",
62
- description: "Playwright only — run headed. Default: false (headless).",
63
- },
64
- trace: {
65
- type: "boolean",
66
- description: "Playwright only capture trace. Default: true.",
67
- },
68
- shard: {
69
- type: "object",
70
- properties: {
71
- current: { type: "number", minimum: 1 },
72
- total: { type: "number", minimum: 1 },
73
- },
74
- required: ["current", "total"],
75
- description:
76
- "CI sharding — `current` is 1-based. Playwright receives --shard=current/total; " +
77
- "bun:test falls back to hash-based partitioning.",
78
- },
79
- },
80
- required: ["repoRoot", "spec"],
81
- },
82
- },
83
- ];
84
-
85
- export function ateRunTools(_projectRoot: string) {
86
- return {
87
- mandu_ate_run: async (args: Record<string, unknown>) => {
88
- const { repoRoot, spec, headed, trace, shard } = args as {
89
- repoRoot: string;
90
- spec: string | { path: string };
91
- headed?: boolean;
92
- trace?: boolean;
93
- shard?: { current: number; total: number };
94
- };
95
- if (!repoRoot || typeof repoRoot !== "string") {
96
- return { ok: false, error: "repoRoot is required" };
97
- }
98
- if (!spec) {
99
- return { ok: false, error: "spec is required" };
100
- }
101
- const specPath = typeof spec === "string" ? spec : spec?.path;
102
- if (!specPath || typeof specPath !== "string") {
103
- return { ok: false, error: "spec.path or spec string is required" };
104
- }
105
- if (shard) {
106
- if (
107
- typeof shard.current !== "number" ||
108
- typeof shard.total !== "number" ||
109
- shard.current < 1 ||
110
- shard.total < 1 ||
111
- shard.current > shard.total
112
- ) {
113
- return {
114
- ok: false,
115
- error: `invalid shard: ${JSON.stringify(shard)} (current must be 1..total)`,
116
- };
117
- }
118
- }
119
-
120
- let result: RunResult;
121
- try {
122
- result = await runSpec({
123
- repoRoot,
124
- spec: specPath,
125
- headed,
126
- trace,
127
- shard,
128
- });
129
- } catch (err) {
130
- return {
131
- ok: false,
132
- error: `runSpec failed: ${err instanceof Error ? err.message : String(err)}`,
133
- };
134
- }
135
-
136
- // On failure, re-validate the shape against failure.v1. The
137
- // runSpec path already does this, but re-checking at the MCP
138
- // boundary means a buggy translator is caught before the
139
- // payload crosses the wire.
140
- if (result.status === "fail") {
141
- const parsed = failureV1Schema.safeParse(result);
142
- if (!parsed.success) {
143
- return {
144
- ok: false,
145
- error: `runSpec emitted invalid failure.v1: ${parsed.error.issues[0]?.message ?? "schema mismatch"}`,
146
- result,
147
- };
148
- }
149
- return { ok: true, result: parsed.data };
150
- }
151
- return { ok: true, result };
152
- },
153
- };
154
- }
1
+ /**
2
+ * `mandu_ate_run` — Phase A.2 agent-facing spec runner.
3
+ *
4
+ * Wraps `@mandujs/ate`'s `runSpec` behind the MCP tool surface.
5
+ *
6
+ * Semantics: execute a single spec file (Playwright or bun:test,
7
+ * auto-detected from the path), then return the failure.v1-shaped
8
+ * JSON — `{ status: "pass", ... }` on green, full failure envelope
9
+ * on red. Shard argument is forwarded transparently.
10
+ *
11
+ * The handler validates the returned shape against the failure.v1
12
+ * Zod schema on failure (cheap, catches translator regressions).
13
+ * On pass we return the pass envelope as-is.
14
+ *
15
+ * Issue #238 wiring:
16
+ * - Subscribes to `eventBus.on("ate", ...)` for the duration of the
17
+ * run and forwards every `spec_done` as an MCP
18
+ * `notifications/progress`. Progress total is captured from the
19
+ * `run_start` event, progressToken defaults to the runId when the
20
+ * caller didn't supply a client token (graceful no-op in that
21
+ * case — the notification is still emitted through the server but
22
+ * without an actionable token).
23
+ * - On timeout / exec failure, writes a partial results.json under
24
+ * `.mandu/reports/run-<runId>/` so `mandu.ate.heal` stays reachable
25
+ * even when the 10-min watchdog killed the runner.
26
+ *
27
+ * Snake_case naming per §11 decision 4.
28
+ */
29
+ import type { Tool } from "@modelcontextprotocol/sdk/types.js";
30
+ import type { Server } from "@modelcontextprotocol/sdk/server/index.js";
31
+ import { mkdirSync, writeFileSync } from "node:fs";
32
+ import { join } from "node:path";
33
+ import {
34
+ runSpec,
35
+ failureV1Schema,
36
+ type RunResult,
37
+ type AteMonitorEvent,
38
+ type FailureV1,
39
+ } from "@mandujs/ate";
40
+ import { eventBus } from "@mandujs/core/observability";
41
+
42
+ export const ateRunToolDefinitions: Tool[] = [
43
+ {
44
+ name: "mandu_ate_run",
45
+ annotations: {
46
+ readOnlyHint: false,
47
+ },
48
+ description:
49
+ "Phase A.2 agent-native spec runner. Executes ONE spec file " +
50
+ "(Playwright if the path matches tests/e2e/** or *.e2e.ts, otherwise bun:test) " +
51
+ "and returns structured JSON. On pass: { status: 'pass', durationMs, assertions, graphVersion, runId }. " +
52
+ "On fail: a failure.v1 envelope with discriminated `kind` (one of: selector_drift, " +
53
+ "contract_mismatch, redirect_unexpected, hydration_timeout, rate_limit_exceeded, " +
54
+ "csrf_invalid, fixture_missing, semantic_divergence), kind-specific `detail`, " +
55
+ "`healing.auto[]` (deterministic replacements when confidence >= threshold), " +
56
+ "`healing.requires_llm` (true for shape-level failures), `flakeScore`, `lastPassedAt`, " +
57
+ "`graphVersion` (agent cache invalidation key), and trace/screenshot/dom artifacts " +
58
+ "staged under .mandu/ate-artifacts/<runId>/. Use `shard: { current, total }` to " +
59
+ "distribute across CI workers. Emits notifications/progress per spec_done event. " +
60
+ "On timeout / cancel, writes .mandu/reports/run-<runId>/results.json with partial state.",
61
+ inputSchema: {
62
+ type: "object",
63
+ properties: {
64
+ repoRoot: {
65
+ type: "string",
66
+ description: "Absolute path to the Mandu project root",
67
+ },
68
+ spec: {
69
+ oneOf: [
70
+ { type: "string" },
71
+ {
72
+ type: "object",
73
+ properties: {
74
+ path: { type: "string" },
75
+ },
76
+ required: ["path"],
77
+ },
78
+ ],
79
+ description:
80
+ "Spec file — either a path string (relative to repoRoot) or { path }. " +
81
+ "Runner is auto-detected from the path (Playwright vs bun:test).",
82
+ },
83
+ headed: {
84
+ type: "boolean",
85
+ description: "Playwright only — run headed. Default: false (headless).",
86
+ },
87
+ trace: {
88
+ type: "boolean",
89
+ description: "Playwright only — capture trace. Default: true.",
90
+ },
91
+ shard: {
92
+ type: "object",
93
+ properties: {
94
+ current: { type: "number", minimum: 1 },
95
+ total: { type: "number", minimum: 1 },
96
+ },
97
+ required: ["current", "total"],
98
+ description:
99
+ "CI sharding `current` is 1-based. Playwright receives --shard=current/total; " +
100
+ "bun:test falls back to hash-based partitioning.",
101
+ },
102
+ progressToken: {
103
+ type: ["string", "number"],
104
+ description:
105
+ "Optional MCP progress token to associate with emitted notifications/progress. " +
106
+ "When omitted the runId is used as a fallback so progress events still correlate.",
107
+ },
108
+ },
109
+ required: ["repoRoot", "spec"],
110
+ },
111
+ },
112
+ ];
113
+
114
+ /**
115
+ * Partial-result envelope written to disk when a run is killed mid-way.
116
+ * Mirrors the shape heal/report consumers already know how to parse,
117
+ * plus the extra status/killedAt fields so downstream tooling can spot
118
+ * incomplete records without probing `mtime`.
119
+ */
120
+ export interface PartialRunResults {
121
+ runId: string;
122
+ status: "timed_out" | "cancelled" | "error";
123
+ graphVersion: string;
124
+ completedSpecs: Array<{
125
+ specPath: string;
126
+ status: "pass" | "fail" | "skip";
127
+ durationMs: number;
128
+ }>;
129
+ inProgressSpec: string | null;
130
+ failures: FailureV1[];
131
+ startedAt: string;
132
+ killedAt: string;
133
+ error?: string;
134
+ }
135
+
136
+ /**
137
+ * Write the partial-results record under `.mandu/reports/run-<runId>/`.
138
+ * Never throws — a write failure is logged via a noop since the caller
139
+ * has already decided the run is over.
140
+ */
141
+ export function writePartialResults(
142
+ repoRoot: string,
143
+ partial: PartialRunResults,
144
+ ): string | null {
145
+ try {
146
+ const dir = join(repoRoot, ".mandu", "reports", `run-${partial.runId}`);
147
+ mkdirSync(dir, { recursive: true });
148
+ const target = join(dir, "results.json");
149
+ writeFileSync(target, JSON.stringify(partial, null, 2), "utf8");
150
+ return target;
151
+ } catch {
152
+ return null;
153
+ }
154
+ }
155
+
156
+ /**
157
+ * Stateful accumulator + progress-notification pipe. Exposed as a
158
+ * factory so unit tests can drive the event handling path without
159
+ * depending on the timing of a live runSpec call.
160
+ *
161
+ * Subscribe by calling `handle()` for each incoming AteMonitorEvent;
162
+ * the corresponding progress notification fires synchronously via
163
+ * `sendProgress`. Snapshot the run state via `snapshot()` after kill
164
+ * to build a PartialRunResults.
165
+ */
166
+ export interface AteProgressTracker {
167
+ handle: (data: AteMonitorEvent) => void;
168
+ snapshot: () => {
169
+ runId: string | null;
170
+ graphVersion: string;
171
+ completedSpecs: PartialRunResults["completedSpecs"];
172
+ inProgressSpec: string | null;
173
+ failures: FailureV1[];
174
+ };
175
+ }
176
+
177
+ export function createAteProgressTracker(options: {
178
+ progressToken?: string | number;
179
+ sendProgress: (progress: number, total: number, message: string) => void | Promise<void>;
180
+ }): AteProgressTracker {
181
+ let runId: string | null = null;
182
+ let graphVersion = "";
183
+ let specTotal = 1;
184
+ let completedCount = 0;
185
+ let inProgressSpec: string | null = null;
186
+ const completedSpecs: PartialRunResults["completedSpecs"] = [];
187
+ const failures: FailureV1[] = [];
188
+
189
+ const fire = (progress: number, total: number, message: string) => {
190
+ try {
191
+ const res = options.sendProgress(progress, total, message);
192
+ if (res && typeof (res as Promise<void>).then === "function") {
193
+ (res as Promise<void>).catch(() => {
194
+ /* swallow */
195
+ });
196
+ }
197
+ } catch {
198
+ /* swallow */
199
+ }
200
+ };
201
+
202
+ return {
203
+ handle(data: AteMonitorEvent) {
204
+ try {
205
+ if (data.kind === "run_start") {
206
+ runId = data.runId;
207
+ graphVersion = data.graphVersion;
208
+ specTotal = Math.max(1, data.specPaths.length);
209
+ return;
210
+ }
211
+ if (data.kind === "spec_progress" && data.phase === "executing") {
212
+ inProgressSpec = data.specPath;
213
+ return;
214
+ }
215
+ if (data.kind === "failure_captured") {
216
+ failures.push(data.failure);
217
+ return;
218
+ }
219
+ if (data.kind === "spec_done") {
220
+ completedCount += 1;
221
+ inProgressSpec = null;
222
+ completedSpecs.push({
223
+ specPath: data.specPath,
224
+ status: data.status,
225
+ durationMs: data.durationMs,
226
+ });
227
+ const basename = data.specPath.split(/[\\/]/).pop() ?? data.specPath;
228
+ fire(
229
+ completedCount,
230
+ specTotal,
231
+ `[${completedCount}/${specTotal}] ${basename} ${data.status}`,
232
+ );
233
+ return;
234
+ }
235
+ if (data.kind === "run_end") {
236
+ fire(
237
+ specTotal,
238
+ specTotal,
239
+ `done — ${data.passed} pass, ${data.failed} fail, ${data.skipped} skip`,
240
+ );
241
+ return;
242
+ }
243
+ } catch {
244
+ /* swallow */
245
+ }
246
+ },
247
+ snapshot() {
248
+ return {
249
+ runId,
250
+ graphVersion,
251
+ completedSpecs,
252
+ inProgressSpec,
253
+ failures,
254
+ };
255
+ },
256
+ };
257
+ }
258
+
259
+ /**
260
+ * Build the handler factory. `server` is optional — tests that don't
261
+ * instantiate an MCP server (e.g. unit-level invocations) can pass
262
+ * `undefined` and progress notifications are silently no-oped.
263
+ */
264
+ export function ateRunTools(_projectRoot: string, server?: Server) {
265
+ return {
266
+ mandu_ate_run: async (args: Record<string, unknown>) => {
267
+ const { repoRoot, spec, headed, trace, shard, progressToken } = args as {
268
+ repoRoot: string;
269
+ spec: string | { path: string };
270
+ headed?: boolean;
271
+ trace?: boolean;
272
+ shard?: { current: number; total: number };
273
+ progressToken?: string | number;
274
+ };
275
+ if (!repoRoot || typeof repoRoot !== "string") {
276
+ return { ok: false, error: "repoRoot is required" };
277
+ }
278
+ if (!spec) {
279
+ return { ok: false, error: "spec is required" };
280
+ }
281
+ const specPath = typeof spec === "string" ? spec : spec?.path;
282
+ if (!specPath || typeof specPath !== "string") {
283
+ return { ok: false, error: "spec.path or spec string is required" };
284
+ }
285
+ if (shard) {
286
+ if (
287
+ typeof shard.current !== "number" ||
288
+ typeof shard.total !== "number" ||
289
+ shard.current < 1 ||
290
+ shard.total < 1 ||
291
+ shard.current > shard.total
292
+ ) {
293
+ return {
294
+ ok: false,
295
+ error: `invalid shard: ${JSON.stringify(shard)} (current must be 1..total)`,
296
+ };
297
+ }
298
+ }
299
+
300
+ // ── Event accumulator for progress + partial-results on timeout.
301
+ const started = new Date().toISOString();
302
+
303
+ const tracker = createAteProgressTracker({
304
+ progressToken,
305
+ sendProgress: async (progress, total, message) => {
306
+ if (!server) return;
307
+ const snap = tracker.snapshot();
308
+ const token = progressToken ?? snap.runId;
309
+ if (!token) return;
310
+ try {
311
+ await server.notification({
312
+ method: "notifications/progress",
313
+ params: { progressToken: token, progress, total, message },
314
+ });
315
+ } catch {
316
+ // Transport may be offline — never fail the run.
317
+ }
318
+ },
319
+ });
320
+
321
+ const unsubscribe = eventBus.on("ate", (event) => {
322
+ try {
323
+ const data = event.data as unknown as AteMonitorEvent | undefined;
324
+ if (!data || typeof data.kind !== "string") return;
325
+ tracker.handle(data);
326
+ } catch {
327
+ // Listener errors must never propagate.
328
+ }
329
+ });
330
+
331
+ let result: RunResult;
332
+ try {
333
+ result = await runSpec({
334
+ repoRoot,
335
+ spec: specPath,
336
+ headed,
337
+ trace,
338
+ shard,
339
+ });
340
+ } catch (err) {
341
+ // Runner timeout / exec error — persist partial state so heal
342
+ // stays reachable.
343
+ const message = err instanceof Error ? err.message : String(err);
344
+ const isTimeout = /timed out/i.test(message);
345
+ const snap = tracker.snapshot();
346
+ const partial: PartialRunResults = {
347
+ runId: snap.runId ?? `unknown-${Date.now()}`,
348
+ status: isTimeout ? "timed_out" : "error",
349
+ graphVersion: snap.graphVersion,
350
+ completedSpecs: snap.completedSpecs,
351
+ inProgressSpec: snap.inProgressSpec,
352
+ failures: snap.failures,
353
+ startedAt: started,
354
+ killedAt: new Date().toISOString(),
355
+ error: message,
356
+ };
357
+ const resultsPath = writePartialResults(repoRoot, partial);
358
+ unsubscribe();
359
+ return {
360
+ ok: false,
361
+ error: `runSpec failed: ${message}`,
362
+ partial,
363
+ resultsPath,
364
+ runId: partial.runId,
365
+ };
366
+ } finally {
367
+ // Runtime-safe even on success — idempotent unsubscribe.
368
+ try {
369
+ unsubscribe();
370
+ } catch {
371
+ /* no-op */
372
+ }
373
+ }
374
+
375
+ // On failure, re-validate the shape against failure.v1. The
376
+ // runSpec path already does this, but re-checking at the MCP
377
+ // boundary means a buggy translator is caught before the
378
+ // payload crosses the wire.
379
+ if (result.status === "fail") {
380
+ const parsed = failureV1Schema.safeParse(result);
381
+ if (!parsed.success) {
382
+ return {
383
+ ok: false,
384
+ error: `runSpec emitted invalid failure.v1: ${parsed.error.issues[0]?.message ?? "schema mismatch"}`,
385
+ result,
386
+ };
387
+ }
388
+ return { ok: true, result: parsed.data };
389
+ }
390
+ return { ok: true, result };
391
+ },
392
+ };
393
+ }
package/src/tools/ate.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import type { Tool } from "@modelcontextprotocol/sdk/types.js";
2
+ import type { Server } from "@modelcontextprotocol/sdk/server/index.js";
2
3
  import {
3
4
  ateExtract,
4
5
  ateGenerate,
@@ -13,7 +14,13 @@ import {
13
14
  detectCoverageGaps,
14
15
  precommitCheck,
15
16
  } from "@mandujs/ate";
16
- import type { OracleLevel } from "@mandujs/ate";
17
+ import type { OracleLevel, AteMonitorEvent, FailureV1 } from "@mandujs/ate";
18
+ import { eventBus } from "@mandujs/core/observability";
19
+ import {
20
+ writePartialResults,
21
+ createAteProgressTracker,
22
+ type PartialRunResults,
23
+ } from "./ate-run.js";
17
24
 
18
25
  export const ateToolDefinitions: Tool[] = [
19
26
  {
@@ -83,7 +90,10 @@ export const ateToolDefinitions: Tool[] = [
83
90
  "ATE Step 3 — Run: Execute the generated Playwright specs against a running Mandu dev server. " +
84
91
  "Collects test artifacts (screenshots, traces, results) in .mandu/ate/runs/{runId}/. " +
85
92
  "Requires the Mandu dev server to be running (use mandu_dev_start first). " +
86
- "Returns a runId for use with mandu.ate.report and mandu.ate.heal.",
93
+ "Returns a runId for use with mandu.ate.report and mandu.ate.heal. " +
94
+ "Streams notifications/progress per spec_done event (issue #238). " +
95
+ "On timeout / kill, persists partial state under .mandu/reports/run-<runId>/results.json " +
96
+ "so mandu.ate.heal remains reachable after the 10-min watchdog.",
87
97
  inputSchema: {
88
98
  type: "object",
89
99
  properties: {
@@ -99,6 +109,12 @@ export const ateToolDefinitions: Tool[] = [
99
109
  items: { type: "string", enum: ["chromium", "firefox", "webkit"] },
100
110
  description: "Browsers to test against (default: ['chromium'])",
101
111
  },
112
+ progressToken: {
113
+ type: ["string", "number"],
114
+ description:
115
+ "Optional MCP progress token. When present, per-spec progress notifications are " +
116
+ "sent with this token so the client can correlate them with the originating call.",
117
+ },
102
118
  },
103
119
  required: ["repoRoot"],
104
120
  },
@@ -288,7 +304,87 @@ export const ateToolDefinitions: Tool[] = [
288
304
  },
289
305
  ];
290
306
 
291
- export function ateTools(projectRoot: string) {
307
+ export function ateTools(projectRoot: string, server?: Server) {
308
+ /**
309
+ * Shared subscription helper for `mandu.ate.run`. Wraps ateRun (which
310
+ * drives Playwright) with eventBus listeners so per-spec progress
311
+ * notifications flow through the MCP transport and a partial
312
+ * results.json is persisted on timeout / kill. Downstream consumers
313
+ * can then hand the runId to `mandu.ate.heal` even when the 10-min
314
+ * watchdog fired mid-run.
315
+ */
316
+ const runWithObservability = async (
317
+ input: Parameters<typeof ateRun>[0],
318
+ opts: { progressToken?: string | number } = {},
319
+ ) => {
320
+ const started = new Date().toISOString();
321
+
322
+ const tracker = createAteProgressTracker({
323
+ progressToken: opts.progressToken,
324
+ sendProgress: async (progress, total, message) => {
325
+ if (!server) return;
326
+ const snap = tracker.snapshot();
327
+ const token = opts.progressToken ?? snap.runId;
328
+ if (!token) return;
329
+ try {
330
+ await server.notification({
331
+ method: "notifications/progress",
332
+ params: { progressToken: token, progress, total, message },
333
+ });
334
+ } catch {
335
+ /* transport offline — never fail the run */
336
+ }
337
+ },
338
+ });
339
+
340
+ const unsubscribe = eventBus.on("ate", (event) => {
341
+ try {
342
+ const data = event.data as unknown as AteMonitorEvent | undefined;
343
+ if (!data || typeof data.kind !== "string") return;
344
+ tracker.handle(data);
345
+ } catch {
346
+ /* swallow — never break the run */
347
+ }
348
+ });
349
+
350
+ try {
351
+ return await ateRun(input);
352
+ } catch (err) {
353
+ const message = err instanceof Error ? err.message : String(err);
354
+ const isTimeout = /timed out/i.test(message);
355
+ const snap = tracker.snapshot();
356
+ const partial: PartialRunResults = {
357
+ runId: snap.runId ?? `unknown-${Date.now()}`,
358
+ status: isTimeout ? "timed_out" : "error",
359
+ graphVersion: snap.graphVersion,
360
+ completedSpecs: snap.completedSpecs,
361
+ inProgressSpec: snap.inProgressSpec,
362
+ failures: snap.failures,
363
+ startedAt: started,
364
+ killedAt: new Date().toISOString(),
365
+ error: message,
366
+ };
367
+ const resultsPath = writePartialResults(input.repoRoot, partial);
368
+ return {
369
+ ok: false,
370
+ error: `ateRun failed: ${message}`,
371
+ partial,
372
+ resultsPath,
373
+ runId: partial.runId,
374
+ };
375
+ } finally {
376
+ try {
377
+ unsubscribe();
378
+ } catch {
379
+ /* no-op */
380
+ }
381
+ }
382
+ };
383
+ // Reserved for future use (progress capability detection). Not used
384
+ // during registration today but documented on the closure so the
385
+ // next caller understands the parameter shape.
386
+ void projectRoot;
387
+
292
388
  return {
293
389
  "mandu.ate.extract": async (args: Record<string, unknown>) => {
294
390
  const { repoRoot, tsconfigPath, routeGlobs, buildSalt } = args as {
@@ -308,14 +404,18 @@ export function ateTools(projectRoot: string) {
308
404
  return ateGenerate({ repoRoot, oracleLevel, onlyRoutes });
309
405
  },
310
406
  "mandu.ate.run": async (args: Record<string, unknown>) => {
311
- const { repoRoot, baseURL, ci, headless, browsers } = args as {
407
+ const { repoRoot, baseURL, ci, headless, browsers, progressToken } = args as {
312
408
  repoRoot: string;
313
409
  baseURL?: string;
314
410
  ci?: boolean;
315
411
  headless?: boolean;
316
412
  browsers?: ("chromium" | "firefox" | "webkit")[];
413
+ progressToken?: string | number;
317
414
  };
318
- return await ateRun({ repoRoot, baseURL, ci, headless, browsers });
415
+ return await runWithObservability(
416
+ { repoRoot, baseURL, ci, headless, browsers },
417
+ { progressToken },
418
+ );
319
419
  },
320
420
  "mandu.ate.report": async (args: Record<string, unknown>) => {
321
421
  const { repoRoot, runId, startedAt, finishedAt, exitCode, oracleLevel, format, impact } = args as {
@@ -160,7 +160,19 @@ interface ToolModule {
160
160
  server?: Server,
161
161
  monitor?: ActivityMonitor
162
162
  ) => Record<string, (args: Record<string, unknown>) => Promise<unknown>>;
163
+ /**
164
+ * Hard requirement: skip registration entirely when `server` is
165
+ * absent. Used for tools that cannot function without MCP transport
166
+ * access (e.g. brain, project).
167
+ */
163
168
  requiresServer?: boolean;
169
+ /**
170
+ * Soft requirement: forward the `Server` instance when one is
171
+ * available, but register the tool either way. Used for tools that
172
+ * gracefully degrade (e.g. notifications/progress silently no-ops
173
+ * when the transport isn't attached).
174
+ */
175
+ acceptsServer?: boolean;
164
176
  }
165
177
 
166
178
  /**
@@ -182,10 +194,14 @@ const TOOL_MODULES: ToolModule[] = [
182
194
  { category: "runtime", definitions: runtimeToolDefinitions, handlers: runtimeTools },
183
195
  { category: "seo", definitions: seoToolDefinitions, handlers: seoTools },
184
196
  { category: "project", definitions: projectToolDefinitions, handlers: projectTools as ToolModule["handlers"], requiresServer: true },
185
- { category: "ate", definitions: ateToolDefinitions, handlers: ateTools as ToolModule["handlers"] },
197
+ // ate + ate-run accept an optional Server so notifications/progress
198
+ // can flow (issue #238). `acceptsServer: true` forwards the server
199
+ // when available but still registers when it isn't — callers that
200
+ // boot without an MCP transport get progress no-oped silently.
201
+ { category: "ate", definitions: ateToolDefinitions, handlers: ateTools as ToolModule["handlers"], acceptsServer: true },
186
202
  { category: "ate-phase5", definitions: atePhase5ToolDefinitions, handlers: createAtePhase5Handlers as unknown as ToolModule["handlers"] },
187
203
  { category: "ate-context", definitions: ateContextToolDefinitions, handlers: ateContextTools },
188
- { category: "ate-run", definitions: ateRunToolDefinitions, handlers: ateRunTools },
204
+ { category: "ate-run", definitions: ateRunToolDefinitions, handlers: ateRunTools as ToolModule["handlers"], acceptsServer: true },
189
205
  { category: "ate-flakes", definitions: ateFlakesToolDefinitions, handlers: ateFlakesTools },
190
206
  { category: "ate-prompt", definitions: atePromptToolDefinitions, handlers: atePromptTools },
191
207
  { category: "ate-exemplar", definitions: ateExemplarToolDefinitions, handlers: ateExemplarTools },
@@ -290,13 +306,21 @@ export function registerBuiltinTools(
290
306
  }
291
307
 
292
308
  try {
293
- const handlers = module.requiresServer
294
- ? (module.handlers as (root: string, srv: Server, mon: ActivityMonitor) => Record<string, (args: Record<string, unknown>) => Promise<unknown>>)(
295
- projectRoot,
296
- server!,
297
- monitor!
298
- )
299
- : module.handlers(projectRoot);
309
+ let handlers: Record<string, (args: Record<string, unknown>) => Promise<unknown>>;
310
+ if (module.requiresServer) {
311
+ handlers = (module.handlers as (root: string, srv: Server, mon: ActivityMonitor) => Record<string, (args: Record<string, unknown>) => Promise<unknown>>)(
312
+ projectRoot,
313
+ server!,
314
+ monitor!,
315
+ );
316
+ } else if (module.acceptsServer) {
317
+ // Forward the Server when available; fall back to just projectRoot.
318
+ handlers = server
319
+ ? module.handlers(projectRoot, server)
320
+ : module.handlers(projectRoot);
321
+ } else {
322
+ handlers = module.handlers(projectRoot);
323
+ }
300
324
 
301
325
  const plugins = moduleToPlugins(module.definitions, handlers);
302
326
  mcpToolRegistry.registerAll(plugins, module.category);