npm - @mandujs/mcp - Versions diffs - 0.28.2 → 0.29.0 - Mend

@mandujs/mcp 0.28.2 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mandujs/mcp",
-  "version": "0.28.2",
+  "version": "0.29.0",
   "description": "Mandu MCP Server - Agent-native interface for Mandu framework operations",
   "type": "module",
   "main": "./src/index.ts",
@@ -34,8 +34,8 @@
     "access": "public"
   },
   "dependencies": {
-    "@mandujs/core": "^0.41.2",
-    "@mandujs/ate": "^0.24.0",
+    "@mandujs/core": "^0.42.0",
+    "@mandujs/ate": "^0.25.0",
     "@mandujs/skills": "^0.18.0",
     "@modelcontextprotocol/sdk": "^1.25.3"
   },

package/src/activity-monitor.ts CHANGED Viewed

@@ -9,6 +9,14 @@ import fs from "fs";
 import path from "path";
 import type { Subprocess } from "bun";
 import { eventBus } from "@mandujs/core/observability";
+import type { AteMonitorEvent } from "@mandujs/ate";
+/**
+ * Local alias — reserved in case we need to accept slightly looser
+ * shapes at the subscription boundary (forward-compat with events
+ * emitted by newer ATE versions). Today it is a direct re-export.
+ */
+type AteMonitorEventShape = AteMonitorEvent;
 const TOOL_ICONS: Record<string, string> = {
   // Spec
@@ -60,6 +68,10 @@ const TOOL_ICONS: Record<string, string> = {
   mandu_add_client_slot: "CLIENT+",
   // Error
   mandu_analyze_error: "ERROR",
+  // ATE — display tokens for per-run/per-spec lifecycle events
+  "ate.run": "ATE-RUN",
+  "ate.pass": "ATE-PASS",
+  "ate.fail": "ATE-FAIL",
 };
 type MonitorSeverity = "info" | "warn" | "error";
@@ -281,6 +293,12 @@ export class ActivityMonitor {
   private toolStartTimes = new Map<string, number>();
   // Phase 5-1: 에이전트 세션 식별 (MCP 클라이언트별 추적)
   public sessionId: string = crypto.randomUUID();
+  // ATE monitor plumbing — subscription handle + per-run accumulator for
+  // artifacts (so run_end can summarize them) + per-spec failure kind
+  // cache (so spec_done can inline it).
+  private ateUnsubscribe: (() => void) | null = null;
+  private ateRunArtifacts = new Map<string, { count: number; dir?: string }>();
+  private ateSpecFailureKinds = new Map<string, string>();
   constructor(projectRoot: string) {
     this.projectRoot = projectRoot;
@@ -338,9 +356,25 @@ export class ActivityMonitor {
     if (this.config.openTerminal) {
       this.openTerminal();
     }
+    // Subscribe to ATE runner events — structured per-run progress,
+    // per-spec pass/fail, failure.v1 captures, artifact writes.
+    this.ateUnsubscribe = eventBus.on("ate", (event) => {
+      try {
+        const payload = event.data as unknown as AteMonitorEventShape | undefined;
+        if (!payload || typeof payload.kind !== "string") return;
+        this.handleAteEvent(payload);
+      } catch {
+        // Never let a bad payload tear the monitor down.
+      }
+    });
   }
   stop(): void {
+    if (this.ateUnsubscribe) {
+      this.ateUnsubscribe();
+      this.ateUnsubscribe = null;
+    }
     this.flush(true);
     if (this.tailProcess) {
       this.tailProcess.kill();
@@ -637,6 +671,148 @@ export class ActivityMonitor {
     }
   }
+  /**
+   * Render an ATE monitor event (run_start / spec_progress / spec_done /
+   * failure_captured / artifact_saved / run_end). Writes through the
+   * shared output path so both pretty + JSON modes work uniformly.
+   *
+   * Pretty mode policies:
+   *  - `spec_progress` suppressed unless MANDU_ATE_VERBOSE=1 or the
+   *    phase is `capturing_artifacts` (signal useful for debugging).
+   *  - `artifact_saved` collected silently and summarized in run_end.
+   *  - `spec_done(fail)` inlines the `failure.v1` kind when a matching
+   *    `failure_captured` fired within the same spec.
+   */
+  private handleAteEvent(data: AteMonitorEventShape): void {
+    if (!this.logStream) return;
+    // JSON mode → verbatim line per event.
+    if (this.outputFormat === "json") {
+      const payload: MonitorEvent = {
+        ts: new Date().toISOString(),
+        type: `ate.${data.kind}`,
+        severity: this.ateSeverityFor(data),
+        source: "ate",
+        data: data as unknown as Record<string, unknown>,
+      };
+      const line = this.formatEvent(payload);
+      if (line) {
+        this.write(line);
+        this.updateSummary(payload);
+      }
+      return;
+    }
+    // Pretty mode — route per-kind.
+    const verbose = process.env.MANDU_ATE_VERBOSE === "1";
+    const time = getTime();
+    switch (data.kind) {
+      case "run_start": {
+        this.ateRunArtifacts.set(data.runId, { count: 0 });
+        const runIdShort = data.runId.slice(-8);
+        const line = `${time} > [ATE-RUN] ${runIdShort} starting (${data.specPaths.length} specs)\n`;
+        this.write(line);
+        this.updateSummary({
+          ts: new Date().toISOString(),
+          type: "ate.run_start",
+          severity: "info",
+          source: "ate",
+        });
+        return;
+      }
+      case "spec_progress": {
+        // Suppressed by default — too noisy. Render only when
+        // MANDU_ATE_VERBOSE=1 is set.
+        if (!verbose) return;
+        const line = `${time}   [ATE] ${data.specPath} (${data.phase})\n`;
+        this.write(line);
+        return;
+      }
+      case "failure_captured": {
+        // Cache the failure kind so `spec_done` can inline it. Render
+        // nothing here — the line is attached to the spec_done row.
+        this.ateSpecFailureKinds.set(
+          `${data.runId}:${data.specPath}`,
+          data.failure.kind,
+        );
+        return;
+      }
+      case "spec_done": {
+        const secs = (data.durationMs / 1000).toFixed(1);
+        const file = data.specPath.split(/[\\/]/).pop() ?? data.specPath;
+        if (data.status === "pass") {
+          const line = `${time} + [ATE] ${file} (${secs}s)\n`;
+          this.write(line);
+          this.updateSummary({
+            ts: new Date().toISOString(),
+            type: "ate.spec_done",
+            severity: "info",
+            source: "ate",
+          });
+        } else if (data.status === "fail") {
+          const kindKey = `${data.runId}:${data.specPath}`;
+          const failureKind = this.ateSpecFailureKinds.get(kindKey);
+          this.ateSpecFailureKinds.delete(kindKey);
+          const suffix = failureKind ? ` [${failureKind}]` : "";
+          const line = `${time} x [ATE] ${file} (${secs}s)${suffix}\n`;
+          this.write(line);
+          this.updateSummary({
+            ts: new Date().toISOString(),
+            type: "ate.spec_done",
+            severity: "error",
+            source: "ate",
+          });
+        } else {
+          // skip
+          if (verbose) {
+            const line = `${time}   [ATE] ${file} skipped\n`;
+            this.write(line);
+          }
+        }
+        return;
+      }
+      case "artifact_saved": {
+        // Accumulate silently; run_end summarizes.
+        const entry = this.ateRunArtifacts.get(data.runId) ?? { count: 0 };
+        entry.count += 1;
+        if (!entry.dir) {
+          const dir = path.dirname(data.path);
+          entry.dir = dir;
+        }
+        this.ateRunArtifacts.set(data.runId, entry);
+        return;
+      }
+      case "run_end": {
+        const runIdShort = data.runId.slice(-8);
+        const secs = (data.durationMs / 1000).toFixed(1);
+        const artifactInfo = this.ateRunArtifacts.get(data.runId);
+        this.ateRunArtifacts.delete(data.runId);
+        const artifactSuffix = artifactInfo && artifactInfo.count > 0 && artifactInfo.dir
+          ? `. artifacts: ${artifactInfo.dir}`
+          : "";
+        const line =
+          `${time} * [ATE-RUN] ${runIdShort} done — ` +
+          `${data.passed} pass, ${data.failed} fail, ${data.skipped} skip (${secs}s)${artifactSuffix}\n`;
+        this.write(line);
+        this.updateSummary({
+          ts: new Date().toISOString(),
+          type: "ate.run_end",
+          severity: data.failed > 0 ? "error" : "info",
+          source: "ate",
+        });
+        return;
+      }
+    }
+  }
+  private ateSeverityFor(data: AteMonitorEventShape): MonitorSeverity {
+    if (data.kind === "failure_captured") return "error";
+    if (data.kind === "spec_done" && data.status === "fail") return "error";
+    if (data.kind === "run_end" && data.failed > 0) return "error";
+    return "info";
+  }
   private enqueue(event: MonitorEvent): void {
     if (!this.logStream) return;
     const now = Date.now();

package/src/tools/ate-run.ts CHANGED Viewed

@@ -1,154 +1,393 @@
-/**
- * `mandu_ate_run` — Phase A.2 agent-facing spec runner.
- *
- * Wraps `@mandujs/ate`'s `runSpec` behind the MCP tool surface.
- *
- * Semantics: execute a single spec file (Playwright or bun:test,
- * auto-detected from the path), then return the failure.v1-shaped
- * JSON — `{ status: "pass", ... }` on green, full failure envelope
- * on red. Shard argument is forwarded transparently.
- *
- * The handler validates the returned shape against the failure.v1
- * Zod schema on failure (cheap, catches translator regressions).
- * On pass we return the pass envelope as-is.
- *
- * Snake_case naming per §11 decision 4.
- */
-import type { Tool } from "@modelcontextprotocol/sdk/types.js";
-import { runSpec, failureV1Schema, type RunResult } from "@mandujs/ate";
-export const ateRunToolDefinitions: Tool[] = [
-  {
-    name: "mandu_ate_run",
-    annotations: {
-      readOnlyHint: false,
-    },
-    description:
-      "Phase A.2 agent-native spec runner. Executes ONE spec file " +
-      "(Playwright if the path matches tests/e2e/** or *.e2e.ts, otherwise bun:test) " +
-      "and returns structured JSON. On pass: { status: 'pass', durationMs, assertions, graphVersion, runId }. " +
-      "On fail: a failure.v1 envelope with discriminated `kind` (one of: selector_drift, " +
-      "contract_mismatch, redirect_unexpected, hydration_timeout, rate_limit_exceeded, " +
-      "csrf_invalid, fixture_missing, semantic_divergence), kind-specific `detail`, " +
-      "`healing.auto[]` (deterministic replacements when confidence >= threshold), " +
-      "`healing.requires_llm` (true for shape-level failures), `flakeScore`, `lastPassedAt`, " +
-      "`graphVersion` (agent cache invalidation key), and trace/screenshot/dom artifacts " +
-      "staged under .mandu/ate-artifacts/<runId>/. Use `shard: { current, total }` to " +
-      "distribute across CI workers.",
-    inputSchema: {
-      type: "object",
-      properties: {
-        repoRoot: {
-          type: "string",
-          description: "Absolute path to the Mandu project root",
-        },
-        spec: {
-          oneOf: [
-            { type: "string" },
-            {
-              type: "object",
-              properties: {
-                path: { type: "string" },
-              },
-              required: ["path"],
-            },
-          ],
-          description:
-            "Spec file — either a path string (relative to repoRoot) or { path }. " +
-            "Runner is auto-detected from the path (Playwright vs bun:test).",
-        },
-        headed: {
-          type: "boolean",
-          description: "Playwright only — run headed. Default: false (headless).",
-        },
-        trace: {
-          type: "boolean",
-          description: "Playwright only — capture trace. Default: true.",
-        },
-        shard: {
-          type: "object",
-          properties: {
-            current: { type: "number", minimum: 1 },
-            total: { type: "number", minimum: 1 },
-          },
-          required: ["current", "total"],
-          description:
-            "CI sharding — `current` is 1-based. Playwright receives --shard=current/total; " +
-            "bun:test falls back to hash-based partitioning.",
-        },
-      },
-      required: ["repoRoot", "spec"],
-    },
-  },
-];
-export function ateRunTools(_projectRoot: string) {
-  return {
-    mandu_ate_run: async (args: Record<string, unknown>) => {
-      const { repoRoot, spec, headed, trace, shard } = args as {
-        repoRoot: string;
-        spec: string | { path: string };
-        headed?: boolean;
-        trace?: boolean;
-        shard?: { current: number; total: number };
-      };
-      if (!repoRoot || typeof repoRoot !== "string") {
-        return { ok: false, error: "repoRoot is required" };
-      }
-      if (!spec) {
-        return { ok: false, error: "spec is required" };
-      }
-      const specPath = typeof spec === "string" ? spec : spec?.path;
-      if (!specPath || typeof specPath !== "string") {
-        return { ok: false, error: "spec.path or spec string is required" };
-      }
-      if (shard) {
-        if (
-          typeof shard.current !== "number" ||
-          typeof shard.total !== "number" ||
-          shard.current < 1 ||
-          shard.total < 1 ||
-          shard.current > shard.total
-        ) {
-          return {
-            ok: false,
-            error: `invalid shard: ${JSON.stringify(shard)} (current must be 1..total)`,
-          };
-        }
-      }
-      let result: RunResult;
-      try {
-        result = await runSpec({
-          repoRoot,
-          spec: specPath,
-          headed,
-          trace,
-          shard,
-        });
-      } catch (err) {
-        return {
-          ok: false,
-          error: `runSpec failed: ${err instanceof Error ? err.message : String(err)}`,
-        };
-      }
-      // On failure, re-validate the shape against failure.v1. The
-      // runSpec path already does this, but re-checking at the MCP
-      // boundary means a buggy translator is caught before the
-      // payload crosses the wire.
-      if (result.status === "fail") {
-        const parsed = failureV1Schema.safeParse(result);
-        if (!parsed.success) {
-          return {
-            ok: false,
-            error: `runSpec emitted invalid failure.v1: ${parsed.error.issues[0]?.message ?? "schema mismatch"}`,
-            result,
-          };
-        }
-        return { ok: true, result: parsed.data };
-      }
-      return { ok: true, result };
-    },
-  };
-}
+/**
+ * `mandu_ate_run` — Phase A.2 agent-facing spec runner.
+ *
+ * Wraps `@mandujs/ate`'s `runSpec` behind the MCP tool surface.
+ *
+ * Semantics: execute a single spec file (Playwright or bun:test,
+ * auto-detected from the path), then return the failure.v1-shaped
+ * JSON — `{ status: "pass", ... }` on green, full failure envelope
+ * on red. Shard argument is forwarded transparently.
+ *
+ * The handler validates the returned shape against the failure.v1
+ * Zod schema on failure (cheap, catches translator regressions).
+ * On pass we return the pass envelope as-is.
+ *
+ * Issue #238 wiring:
+ *   - Subscribes to `eventBus.on("ate", ...)` for the duration of the
+ *     run and forwards every `spec_done` as an MCP
+ *     `notifications/progress`. Progress total is captured from the
+ *     `run_start` event, progressToken defaults to the runId when the
+ *     caller didn't supply a client token (graceful no-op in that
+ *     case — the notification is still emitted through the server but
+ *     without an actionable token).
+ *   - On timeout / exec failure, writes a partial results.json under
+ *     `.mandu/reports/run-<runId>/` so `mandu.ate.heal` stays reachable
+ *     even when the 10-min watchdog killed the runner.
+ *
+ * Snake_case naming per §11 decision 4.
+ */
+import type { Tool } from "@modelcontextprotocol/sdk/types.js";
+import type { Server } from "@modelcontextprotocol/sdk/server/index.js";
+import { mkdirSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import {
+  runSpec,
+  failureV1Schema,
+  type RunResult,
+  type AteMonitorEvent,
+  type FailureV1,
+} from "@mandujs/ate";
+import { eventBus } from "@mandujs/core/observability";
+export const ateRunToolDefinitions: Tool[] = [
+  {
+    name: "mandu_ate_run",
+    annotations: {
+      readOnlyHint: false,
+    },
+    description:
+      "Phase A.2 agent-native spec runner. Executes ONE spec file " +
+      "(Playwright if the path matches tests/e2e/** or *.e2e.ts, otherwise bun:test) " +
+      "and returns structured JSON. On pass: { status: 'pass', durationMs, assertions, graphVersion, runId }. " +
+      "On fail: a failure.v1 envelope with discriminated `kind` (one of: selector_drift, " +
+      "contract_mismatch, redirect_unexpected, hydration_timeout, rate_limit_exceeded, " +
+      "csrf_invalid, fixture_missing, semantic_divergence), kind-specific `detail`, " +
+      "`healing.auto[]` (deterministic replacements when confidence >= threshold), " +
+      "`healing.requires_llm` (true for shape-level failures), `flakeScore`, `lastPassedAt`, " +
+      "`graphVersion` (agent cache invalidation key), and trace/screenshot/dom artifacts " +
+      "staged under .mandu/ate-artifacts/<runId>/. Use `shard: { current, total }` to " +
+      "distribute across CI workers. Emits notifications/progress per spec_done event. " +
+      "On timeout / cancel, writes .mandu/reports/run-<runId>/results.json with partial state.",
+    inputSchema: {
+      type: "object",
+      properties: {
+        repoRoot: {
+          type: "string",
+          description: "Absolute path to the Mandu project root",
+        },
+        spec: {
+          oneOf: [
+            { type: "string" },
+            {
+              type: "object",
+              properties: {
+                path: { type: "string" },
+              },
+              required: ["path"],
+            },
+          ],
+          description:
+            "Spec file — either a path string (relative to repoRoot) or { path }. " +
+            "Runner is auto-detected from the path (Playwright vs bun:test).",
+        },
+        headed: {
+          type: "boolean",
+          description: "Playwright only — run headed. Default: false (headless).",
+        },
+        trace: {
+          type: "boolean",
+          description: "Playwright only — capture trace. Default: true.",
+        },
+        shard: {
+          type: "object",
+          properties: {
+            current: { type: "number", minimum: 1 },
+            total: { type: "number", minimum: 1 },
+          },
+          required: ["current", "total"],
+          description:
+            "CI sharding — `current` is 1-based. Playwright receives --shard=current/total; " +
+            "bun:test falls back to hash-based partitioning.",
+        },
+        progressToken: {
+          type: ["string", "number"],
+          description:
+            "Optional MCP progress token to associate with emitted notifications/progress. " +
+            "When omitted the runId is used as a fallback so progress events still correlate.",
+        },
+      },
+      required: ["repoRoot", "spec"],
+    },
+  },
+];
+/**
+ * Partial-result envelope written to disk when a run is killed mid-way.
+ * Mirrors the shape heal/report consumers already know how to parse,
+ * plus the extra status/killedAt fields so downstream tooling can spot
+ * incomplete records without probing `mtime`.
+ */
+export interface PartialRunResults {
+  runId: string;
+  status: "timed_out" | "cancelled" | "error";
+  graphVersion: string;
+  completedSpecs: Array<{
+    specPath: string;
+    status: "pass" | "fail" | "skip";
+    durationMs: number;
+  }>;
+  inProgressSpec: string | null;
+  failures: FailureV1[];
+  startedAt: string;
+  killedAt: string;
+  error?: string;
+}
+/**
+ * Write the partial-results record under `.mandu/reports/run-<runId>/`.
+ * Never throws — a write failure is logged via a noop since the caller
+ * has already decided the run is over.
+ */
+export function writePartialResults(
+  repoRoot: string,
+  partial: PartialRunResults,
+): string | null {
+  try {
+    const dir = join(repoRoot, ".mandu", "reports", `run-${partial.runId}`);
+    mkdirSync(dir, { recursive: true });
+    const target = join(dir, "results.json");
+    writeFileSync(target, JSON.stringify(partial, null, 2), "utf8");
+    return target;
+  } catch {
+    return null;
+  }
+}
+/**
+ * Stateful accumulator + progress-notification pipe. Exposed as a
+ * factory so unit tests can drive the event handling path without
+ * depending on the timing of a live runSpec call.
+ *
+ * Subscribe by calling `handle()` for each incoming AteMonitorEvent;
+ * the corresponding progress notification fires synchronously via
+ * `sendProgress`. Snapshot the run state via `snapshot()` after kill
+ * to build a PartialRunResults.
+ */
+export interface AteProgressTracker {
+  handle: (data: AteMonitorEvent) => void;
+  snapshot: () => {
+    runId: string | null;
+    graphVersion: string;
+    completedSpecs: PartialRunResults["completedSpecs"];
+    inProgressSpec: string | null;
+    failures: FailureV1[];
+  };
+}
+export function createAteProgressTracker(options: {
+  progressToken?: string | number;
+  sendProgress: (progress: number, total: number, message: string) => void | Promise<void>;
+}): AteProgressTracker {
+  let runId: string | null = null;
+  let graphVersion = "";
+  let specTotal = 1;
+  let completedCount = 0;
+  let inProgressSpec: string | null = null;
+  const completedSpecs: PartialRunResults["completedSpecs"] = [];
+  const failures: FailureV1[] = [];
+  const fire = (progress: number, total: number, message: string) => {
+    try {
+      const res = options.sendProgress(progress, total, message);
+      if (res && typeof (res as Promise<void>).then === "function") {
+        (res as Promise<void>).catch(() => {
+          /* swallow */
+        });
+      }
+    } catch {
+      /* swallow */
+    }
+  };
+  return {
+    handle(data: AteMonitorEvent) {
+      try {
+        if (data.kind === "run_start") {
+          runId = data.runId;
+          graphVersion = data.graphVersion;
+          specTotal = Math.max(1, data.specPaths.length);
+          return;
+        }
+        if (data.kind === "spec_progress" && data.phase === "executing") {
+          inProgressSpec = data.specPath;
+          return;
+        }
+        if (data.kind === "failure_captured") {
+          failures.push(data.failure);
+          return;
+        }
+        if (data.kind === "spec_done") {
+          completedCount += 1;
+          inProgressSpec = null;
+          completedSpecs.push({
+            specPath: data.specPath,
+            status: data.status,
+            durationMs: data.durationMs,
+          });
+          const basename = data.specPath.split(/[\\/]/).pop() ?? data.specPath;
+          fire(
+            completedCount,
+            specTotal,
+            `[${completedCount}/${specTotal}] ${basename} ${data.status}`,
+          );
+          return;
+        }
+        if (data.kind === "run_end") {
+          fire(
+            specTotal,
+            specTotal,
+            `done — ${data.passed} pass, ${data.failed} fail, ${data.skipped} skip`,
+          );
+          return;
+        }
+      } catch {
+        /* swallow */
+      }
+    },
+    snapshot() {
+      return {
+        runId,
+        graphVersion,
+        completedSpecs,
+        inProgressSpec,
+        failures,
+      };
+    },
+  };
+}
+/**
+ * Build the handler factory. `server` is optional — tests that don't
+ * instantiate an MCP server (e.g. unit-level invocations) can pass
+ * `undefined` and progress notifications are silently no-oped.
+ */
+export function ateRunTools(_projectRoot: string, server?: Server) {
+  return {
+    mandu_ate_run: async (args: Record<string, unknown>) => {
+      const { repoRoot, spec, headed, trace, shard, progressToken } = args as {
+        repoRoot: string;
+        spec: string | { path: string };
+        headed?: boolean;
+        trace?: boolean;
+        shard?: { current: number; total: number };
+        progressToken?: string | number;
+      };
+      if (!repoRoot || typeof repoRoot !== "string") {
+        return { ok: false, error: "repoRoot is required" };
+      }
+      if (!spec) {
+        return { ok: false, error: "spec is required" };
+      }
+      const specPath = typeof spec === "string" ? spec : spec?.path;
+      if (!specPath || typeof specPath !== "string") {
+        return { ok: false, error: "spec.path or spec string is required" };
+      }
+      if (shard) {
+        if (
+          typeof shard.current !== "number" ||
+          typeof shard.total !== "number" ||
+          shard.current < 1 ||
+          shard.total < 1 ||
+          shard.current > shard.total
+        ) {
+          return {
+            ok: false,
+            error: `invalid shard: ${JSON.stringify(shard)} (current must be 1..total)`,
+          };
+        }
+      }
+      // ── Event accumulator for progress + partial-results on timeout.
+      const started = new Date().toISOString();
+      const tracker = createAteProgressTracker({
+        progressToken,
+        sendProgress: async (progress, total, message) => {
+          if (!server) return;
+          const snap = tracker.snapshot();
+          const token = progressToken ?? snap.runId;
+          if (!token) return;
+          try {
+            await server.notification({
+              method: "notifications/progress",
+              params: { progressToken: token, progress, total, message },
+            });
+          } catch {
+            // Transport may be offline — never fail the run.
+          }
+        },
+      });
+      const unsubscribe = eventBus.on("ate", (event) => {
+        try {
+          const data = event.data as unknown as AteMonitorEvent | undefined;
+          if (!data || typeof data.kind !== "string") return;
+          tracker.handle(data);
+        } catch {
+          // Listener errors must never propagate.
+        }
+      });
+      let result: RunResult;
+      try {
+        result = await runSpec({
+          repoRoot,
+          spec: specPath,
+          headed,
+          trace,
+          shard,
+        });
+      } catch (err) {
+        // Runner timeout / exec error — persist partial state so heal
+        // stays reachable.
+        const message = err instanceof Error ? err.message : String(err);
+        const isTimeout = /timed out/i.test(message);
+        const snap = tracker.snapshot();
+        const partial: PartialRunResults = {
+          runId: snap.runId ?? `unknown-${Date.now()}`,
+          status: isTimeout ? "timed_out" : "error",
+          graphVersion: snap.graphVersion,
+          completedSpecs: snap.completedSpecs,
+          inProgressSpec: snap.inProgressSpec,
+          failures: snap.failures,
+          startedAt: started,
+          killedAt: new Date().toISOString(),
+          error: message,
+        };
+        const resultsPath = writePartialResults(repoRoot, partial);
+        unsubscribe();
+        return {
+          ok: false,
+          error: `runSpec failed: ${message}`,
+          partial,
+          resultsPath,
+          runId: partial.runId,
+        };
+      } finally {
+        // Runtime-safe even on success — idempotent unsubscribe.
+        try {
+          unsubscribe();
+        } catch {
+          /* no-op */
+        }
+      }
+      // On failure, re-validate the shape against failure.v1. The
+      // runSpec path already does this, but re-checking at the MCP
+      // boundary means a buggy translator is caught before the
+      // payload crosses the wire.
+      if (result.status === "fail") {
+        const parsed = failureV1Schema.safeParse(result);
+        if (!parsed.success) {
+          return {
+            ok: false,
+            error: `runSpec emitted invalid failure.v1: ${parsed.error.issues[0]?.message ?? "schema mismatch"}`,
+            result,
+          };
+        }
+        return { ok: true, result: parsed.data };
+      }
+      return { ok: true, result };
+    },
+  };
+}

package/src/tools/ate.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { Tool } from "@modelcontextprotocol/sdk/types.js";
+import type { Server } from "@modelcontextprotocol/sdk/server/index.js";
 import {
   ateExtract,
   ateGenerate,
@@ -13,7 +14,13 @@ import {
   detectCoverageGaps,
   precommitCheck,
 } from "@mandujs/ate";
-import type { OracleLevel } from "@mandujs/ate";
+import type { OracleLevel, AteMonitorEvent, FailureV1 } from "@mandujs/ate";
+import { eventBus } from "@mandujs/core/observability";
+import {
+  writePartialResults,
+  createAteProgressTracker,
+  type PartialRunResults,
+} from "./ate-run.js";
 export const ateToolDefinitions: Tool[] = [
   {
@@ -83,7 +90,10 @@ export const ateToolDefinitions: Tool[] = [
       "ATE Step 3 — Run: Execute the generated Playwright specs against a running Mandu dev server. " +
       "Collects test artifacts (screenshots, traces, results) in .mandu/ate/runs/{runId}/. " +
       "Requires the Mandu dev server to be running (use mandu_dev_start first). " +
-      "Returns a runId for use with mandu.ate.report and mandu.ate.heal.",
+      "Returns a runId for use with mandu.ate.report and mandu.ate.heal. " +
+      "Streams notifications/progress per spec_done event (issue #238). " +
+      "On timeout / kill, persists partial state under .mandu/reports/run-<runId>/results.json " +
+      "so mandu.ate.heal remains reachable after the 10-min watchdog.",
     inputSchema: {
       type: "object",
       properties: {
@@ -99,6 +109,12 @@ export const ateToolDefinitions: Tool[] = [
           items: { type: "string", enum: ["chromium", "firefox", "webkit"] },
           description: "Browsers to test against (default: ['chromium'])",
         },
+        progressToken: {
+          type: ["string", "number"],
+          description:
+            "Optional MCP progress token. When present, per-spec progress notifications are " +
+            "sent with this token so the client can correlate them with the originating call.",
+        },
       },
       required: ["repoRoot"],
     },
@@ -288,7 +304,87 @@ export const ateToolDefinitions: Tool[] = [
   },
 ];
-export function ateTools(projectRoot: string) {
+export function ateTools(projectRoot: string, server?: Server) {
+  /**
+   * Shared subscription helper for `mandu.ate.run`. Wraps ateRun (which
+   * drives Playwright) with eventBus listeners so per-spec progress
+   * notifications flow through the MCP transport and a partial
+   * results.json is persisted on timeout / kill. Downstream consumers
+   * can then hand the runId to `mandu.ate.heal` even when the 10-min
+   * watchdog fired mid-run.
+   */
+  const runWithObservability = async (
+    input: Parameters<typeof ateRun>[0],
+    opts: { progressToken?: string | number } = {},
+  ) => {
+    const started = new Date().toISOString();
+    const tracker = createAteProgressTracker({
+      progressToken: opts.progressToken,
+      sendProgress: async (progress, total, message) => {
+        if (!server) return;
+        const snap = tracker.snapshot();
+        const token = opts.progressToken ?? snap.runId;
+        if (!token) return;
+        try {
+          await server.notification({
+            method: "notifications/progress",
+            params: { progressToken: token, progress, total, message },
+          });
+        } catch {
+          /* transport offline — never fail the run */
+        }
+      },
+    });
+    const unsubscribe = eventBus.on("ate", (event) => {
+      try {
+        const data = event.data as unknown as AteMonitorEvent | undefined;
+        if (!data || typeof data.kind !== "string") return;
+        tracker.handle(data);
+      } catch {
+        /* swallow — never break the run */
+      }
+    });
+    try {
+      return await ateRun(input);
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      const isTimeout = /timed out/i.test(message);
+      const snap = tracker.snapshot();
+      const partial: PartialRunResults = {
+        runId: snap.runId ?? `unknown-${Date.now()}`,
+        status: isTimeout ? "timed_out" : "error",
+        graphVersion: snap.graphVersion,
+        completedSpecs: snap.completedSpecs,
+        inProgressSpec: snap.inProgressSpec,
+        failures: snap.failures,
+        startedAt: started,
+        killedAt: new Date().toISOString(),
+        error: message,
+      };
+      const resultsPath = writePartialResults(input.repoRoot, partial);
+      return {
+        ok: false,
+        error: `ateRun failed: ${message}`,
+        partial,
+        resultsPath,
+        runId: partial.runId,
+      };
+    } finally {
+      try {
+        unsubscribe();
+      } catch {
+        /* no-op */
+      }
+    }
+  };
+  // Reserved for future use (progress capability detection). Not used
+  // during registration today but documented on the closure so the
+  // next caller understands the parameter shape.
+  void projectRoot;
   return {
     "mandu.ate.extract": async (args: Record<string, unknown>) => {
       const { repoRoot, tsconfigPath, routeGlobs, buildSalt } = args as {
@@ -308,14 +404,18 @@ export function ateTools(projectRoot: string) {
       return ateGenerate({ repoRoot, oracleLevel, onlyRoutes });
     },
     "mandu.ate.run": async (args: Record<string, unknown>) => {
-      const { repoRoot, baseURL, ci, headless, browsers } = args as {
+      const { repoRoot, baseURL, ci, headless, browsers, progressToken } = args as {
         repoRoot: string;
         baseURL?: string;
         ci?: boolean;
         headless?: boolean;
         browsers?: ("chromium" | "firefox" | "webkit")[];
+        progressToken?: string | number;
       };
-      return await ateRun({ repoRoot, baseURL, ci, headless, browsers });
+      return await runWithObservability(
+        { repoRoot, baseURL, ci, headless, browsers },
+        { progressToken },
+      );
     },
     "mandu.ate.report": async (args: Record<string, unknown>) => {
       const { repoRoot, runId, startedAt, finishedAt, exitCode, oracleLevel, format, impact } = args as {

package/src/tools/index.ts CHANGED Viewed

@@ -160,7 +160,19 @@ interface ToolModule {
     server?: Server,
     monitor?: ActivityMonitor
   ) => Record<string, (args: Record<string, unknown>) => Promise<unknown>>;
+  /**
+   * Hard requirement: skip registration entirely when `server` is
+   * absent. Used for tools that cannot function without MCP transport
+   * access (e.g. brain, project).
+   */
   requiresServer?: boolean;
+  /**
+   * Soft requirement: forward the `Server` instance when one is
+   * available, but register the tool either way. Used for tools that
+   * gracefully degrade (e.g. notifications/progress silently no-ops
+   * when the transport isn't attached).
+   */
+  acceptsServer?: boolean;
 }
 /**
@@ -182,10 +194,14 @@ const TOOL_MODULES: ToolModule[] = [
   { category: "runtime", definitions: runtimeToolDefinitions, handlers: runtimeTools },
   { category: "seo", definitions: seoToolDefinitions, handlers: seoTools },
   { category: "project", definitions: projectToolDefinitions, handlers: projectTools as ToolModule["handlers"], requiresServer: true },
-  { category: "ate", definitions: ateToolDefinitions, handlers: ateTools as ToolModule["handlers"] },
+  // ate + ate-run accept an optional Server so notifications/progress
+  // can flow (issue #238). `acceptsServer: true` forwards the server
+  // when available but still registers when it isn't — callers that
+  // boot without an MCP transport get progress no-oped silently.
+  { category: "ate", definitions: ateToolDefinitions, handlers: ateTools as ToolModule["handlers"], acceptsServer: true },
   { category: "ate-phase5", definitions: atePhase5ToolDefinitions, handlers: createAtePhase5Handlers as unknown as ToolModule["handlers"] },
   { category: "ate-context", definitions: ateContextToolDefinitions, handlers: ateContextTools },
-  { category: "ate-run", definitions: ateRunToolDefinitions, handlers: ateRunTools },
+  { category: "ate-run", definitions: ateRunToolDefinitions, handlers: ateRunTools as ToolModule["handlers"], acceptsServer: true },
   { category: "ate-flakes", definitions: ateFlakesToolDefinitions, handlers: ateFlakesTools },
   { category: "ate-prompt", definitions: atePromptToolDefinitions, handlers: atePromptTools },
   { category: "ate-exemplar", definitions: ateExemplarToolDefinitions, handlers: ateExemplarTools },
@@ -290,13 +306,21 @@ export function registerBuiltinTools(
     }
     try {
-      const handlers = module.requiresServer
-        ? (module.handlers as (root: string, srv: Server, mon: ActivityMonitor) => Record<string, (args: Record<string, unknown>) => Promise<unknown>>)(
-            projectRoot,
-            server!,
-            monitor!
-          )
-        : module.handlers(projectRoot);
+      let handlers: Record<string, (args: Record<string, unknown>) => Promise<unknown>>;
+      if (module.requiresServer) {
+        handlers = (module.handlers as (root: string, srv: Server, mon: ActivityMonitor) => Record<string, (args: Record<string, unknown>) => Promise<unknown>>)(
+          projectRoot,
+          server!,
+          monitor!,
+        );
+      } else if (module.acceptsServer) {
+        // Forward the Server when available; fall back to just projectRoot.
+        handlers = server
+          ? module.handlers(projectRoot, server)
+          : module.handlers(projectRoot);
+      } else {
+        handlers = module.handlers(projectRoot);
+      }
       const plugins = moduleToPlugins(module.definitions, handlers);
       mcpToolRegistry.registerAll(plugins, module.category);