@mandujs/mcp 0.23.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/tools/ate-boundary-probe.ts +109 -0
- package/src/tools/ate-context.ts +159 -96
- package/src/tools/ate-coverage.ts +71 -0
- package/src/tools/ate-exemplar.ts +92 -0
- package/src/tools/ate-flakes.ts +90 -0
- package/src/tools/ate-prompt.ts +146 -0
- package/src/tools/ate-recall.ts +85 -0
- package/src/tools/ate-remember.ts +79 -0
- package/src/tools/ate-run.ts +154 -0
- package/src/tools/ate-save.ts +160 -0
- package/src/tools/ate.ts +34 -7
- package/src/tools/index.ts +39 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mandujs/mcp",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.25.0",
|
|
4
4
|
"description": "Mandu MCP Server - Agent-native interface for Mandu framework operations",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
},
|
|
36
36
|
"dependencies": {
|
|
37
37
|
"@mandujs/core": "^0.37.0",
|
|
38
|
-
"@mandujs/ate": "^0.
|
|
38
|
+
"@mandujs/ate": "^0.22.0",
|
|
39
39
|
"@mandujs/skills": "^16.0.0",
|
|
40
40
|
"@modelcontextprotocol/sdk": "^1.25.3"
|
|
41
41
|
},
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `mandu_ate_boundary_probe` — Phase B.1 deterministic boundary-value
|
|
3
|
+
* generator for Zod contracts.
|
|
4
|
+
*
|
|
5
|
+
* See docs/ate/phase-b-spec.md §B.1 for the full I/O shape. Agents
|
|
6
|
+
* feed the returned probe set into `mandu_ate_prompt({ kind:
|
|
7
|
+
* "property_based" })` to produce adversarial specs.
|
|
8
|
+
*
|
|
9
|
+
* Snake_case tool name (§11 decision #4). Read-only.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type { Tool } from "@modelcontextprotocol/sdk/types.js";
|
|
13
|
+
import { generateBoundaryProbes } from "@mandujs/ate";
|
|
14
|
+
|
|
15
|
+
export const ateBoundaryProbeToolDefinitions: Tool[] = [
|
|
16
|
+
{
|
|
17
|
+
name: "mandu_ate_boundary_probe",
|
|
18
|
+
annotations: {
|
|
19
|
+
readOnlyHint: true,
|
|
20
|
+
},
|
|
21
|
+
description:
|
|
22
|
+
"Phase B.1 deterministic boundary probe for Zod contracts. Reads a " +
|
|
23
|
+
"*.contract.ts file, parses request-body schemas per HTTP method, and " +
|
|
24
|
+
"returns a deterministic set of probe values per field — one per " +
|
|
25
|
+
"category (valid / invalid_format / boundary_min / boundary_max / " +
|
|
26
|
+
"empty / null / type_mismatch / enum_reject / missing_required). " +
|
|
27
|
+
"Every probe also carries the expectedStatus code derived from the " +
|
|
28
|
+
"contract's response map (400/422 for invalid, 200/201 for valid). " +
|
|
29
|
+
"The output is stamped with graphVersion for agent cache " +
|
|
30
|
+
"invalidation. No LLM. No runtime Zod evaluation — source text is " +
|
|
31
|
+
"parsed directly. Default depth 1, max 3.",
|
|
32
|
+
inputSchema: {
|
|
33
|
+
type: "object",
|
|
34
|
+
properties: {
|
|
35
|
+
repoRoot: {
|
|
36
|
+
type: "string",
|
|
37
|
+
description: "Absolute path to the Mandu project root.",
|
|
38
|
+
},
|
|
39
|
+
contractName: {
|
|
40
|
+
type: "string",
|
|
41
|
+
description:
|
|
42
|
+
"Contract identifier. Usually the basename of the contract file (e.g. 'SignupContract' or 'api-signup').",
|
|
43
|
+
},
|
|
44
|
+
contractFile: {
|
|
45
|
+
type: "string",
|
|
46
|
+
description: "Direct absolute path to the contract file (bypasses name resolution).",
|
|
47
|
+
},
|
|
48
|
+
method: {
|
|
49
|
+
type: "string",
|
|
50
|
+
enum: ["GET", "POST", "PUT", "PATCH", "DELETE"],
|
|
51
|
+
description: "Optional HTTP method filter. Omit to probe every declared method.",
|
|
52
|
+
},
|
|
53
|
+
depth: {
|
|
54
|
+
type: "number",
|
|
55
|
+
description: "Recursion depth for nested z.object() fields. Default 1, max 3.",
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
required: ["repoRoot"],
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
];
|
|
62
|
+
|
|
63
|
+
export function ateBoundaryProbeTools(_projectRoot: string) {
|
|
64
|
+
return {
|
|
65
|
+
mandu_ate_boundary_probe: async (args: Record<string, unknown>) => {
|
|
66
|
+
const repoRoot = args.repoRoot as string | undefined;
|
|
67
|
+
const contractName = args.contractName as string | undefined;
|
|
68
|
+
const contractFile = args.contractFile as string | undefined;
|
|
69
|
+
const method = args.method as
|
|
70
|
+
| "GET"
|
|
71
|
+
| "POST"
|
|
72
|
+
| "PUT"
|
|
73
|
+
| "PATCH"
|
|
74
|
+
| "DELETE"
|
|
75
|
+
| undefined;
|
|
76
|
+
const depth = typeof args.depth === "number" ? args.depth : undefined;
|
|
77
|
+
|
|
78
|
+
if (!repoRoot || typeof repoRoot !== "string") {
|
|
79
|
+
return { ok: false, error: "repoRoot is required" };
|
|
80
|
+
}
|
|
81
|
+
if (!contractName && !contractFile) {
|
|
82
|
+
return { ok: false, error: "contractName or contractFile is required" };
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
try {
|
|
86
|
+
const result = await generateBoundaryProbes({
|
|
87
|
+
repoRoot,
|
|
88
|
+
contractName,
|
|
89
|
+
contractFile,
|
|
90
|
+
...(method ? { method } : {}),
|
|
91
|
+
...(depth !== undefined ? { depth } : {}),
|
|
92
|
+
});
|
|
93
|
+
return {
|
|
94
|
+
ok: true,
|
|
95
|
+
contractName: result.contractName,
|
|
96
|
+
contractFile: result.contractFile,
|
|
97
|
+
graphVersion: result.graphVersion,
|
|
98
|
+
probes: result.probes,
|
|
99
|
+
warnings: result.warnings,
|
|
100
|
+
};
|
|
101
|
+
} catch (err) {
|
|
102
|
+
return {
|
|
103
|
+
ok: false,
|
|
104
|
+
error: err instanceof Error ? err.message : String(err),
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
},
|
|
108
|
+
};
|
|
109
|
+
}
|
package/src/tools/ate-context.ts
CHANGED
|
@@ -1,96 +1,159 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* `mandu_ate_context` — Phase A.1 agent-native context tool.
|
|
3
|
-
*
|
|
4
|
-
* See `docs/ate/roadmap-v2-agent-native.md` §4.1 for the full design
|
|
5
|
-
* and §11 decision 4 for the naming convention (snake_case).
|
|
6
|
-
*
|
|
7
|
-
* Semantics: return a single JSON blob that an LLM-driven agent
|
|
8
|
-
* (Cursor / Claude Code / Codex) can read *before* generating a test.
|
|
9
|
-
* The blob fuses:
|
|
10
|
-
*
|
|
11
|
-
* 1. Route metadata (pattern, file, isRedirect, static params)
|
|
12
|
-
* 2. Contract surface (request/response schemas + examples)
|
|
13
|
-
* 3. Middleware chain (canonical name + options + file)
|
|
14
|
-
* 4. Guard preset + suggested data-route-id selectors
|
|
15
|
-
* 5. Fixture recommendations (createTestSession, createTestDb, ...)
|
|
16
|
-
* 6. Existing specs (user-written vs ate-generated, last-run status)
|
|
17
|
-
* 7. Related routes (siblings + ui-entry-point pairing)
|
|
18
|
-
*
|
|
19
|
-
* The handler itself is deliberately thin — almost all work is done
|
|
20
|
-
* inside `@mandujs/ate`'s `buildContext` so the same logic is
|
|
21
|
-
* importable from non-MCP callers (CLI, tests).
|
|
22
|
-
*/
|
|
23
|
-
import type { Tool } from "@modelcontextprotocol/sdk/types.js";
|
|
24
|
-
import {
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
"
|
|
39
|
-
"
|
|
40
|
-
"
|
|
41
|
-
"
|
|
42
|
-
"
|
|
43
|
-
"
|
|
44
|
-
"
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
description:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
description:
|
|
61
|
-
"
|
|
62
|
-
},
|
|
63
|
-
|
|
64
|
-
type: "string",
|
|
65
|
-
description:
|
|
66
|
-
"Route
|
|
67
|
-
},
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* `mandu_ate_context` — Phase A.1 agent-native context tool.
|
|
3
|
+
*
|
|
4
|
+
* See `docs/ate/roadmap-v2-agent-native.md` §4.1 for the full design
|
|
5
|
+
* and §11 decision 4 for the naming convention (snake_case).
|
|
6
|
+
*
|
|
7
|
+
* Semantics: return a single JSON blob that an LLM-driven agent
|
|
8
|
+
* (Cursor / Claude Code / Codex) can read *before* generating a test.
|
|
9
|
+
* The blob fuses:
|
|
10
|
+
*
|
|
11
|
+
* 1. Route metadata (pattern, file, isRedirect, static params)
|
|
12
|
+
* 2. Contract surface (request/response schemas + examples)
|
|
13
|
+
* 3. Middleware chain (canonical name + options + file)
|
|
14
|
+
* 4. Guard preset + suggested data-route-id selectors
|
|
15
|
+
* 5. Fixture recommendations (createTestSession, createTestDb, ...)
|
|
16
|
+
* 6. Existing specs (user-written vs ate-generated, last-run status)
|
|
17
|
+
* 7. Related routes (siblings + ui-entry-point pairing)
|
|
18
|
+
*
|
|
19
|
+
* The handler itself is deliberately thin — almost all work is done
|
|
20
|
+
* inside `@mandujs/ate`'s `buildContext` so the same logic is
|
|
21
|
+
* importable from non-MCP callers (CLI, tests).
|
|
22
|
+
*/
|
|
23
|
+
import type { Tool } from "@modelcontextprotocol/sdk/types.js";
|
|
24
|
+
import {
|
|
25
|
+
ateContext,
|
|
26
|
+
appendMemoryEvent,
|
|
27
|
+
nowTimestamp,
|
|
28
|
+
readMemoryEvents,
|
|
29
|
+
} from "@mandujs/ate";
|
|
30
|
+
|
|
31
|
+
export const ateContextToolDefinitions: Tool[] = [
|
|
32
|
+
{
|
|
33
|
+
name: "mandu_ate_context",
|
|
34
|
+
annotations: {
|
|
35
|
+
readOnlyHint: true,
|
|
36
|
+
},
|
|
37
|
+
description:
|
|
38
|
+
"Phase A.1 agent-native context. Returns a single JSON blob containing the " +
|
|
39
|
+
"Mandu-specific semantic context an LLM needs to write a correct test: " +
|
|
40
|
+
"route metadata, contract (with examples), middleware chain, guard preset + " +
|
|
41
|
+
"suggested [data-route-id] selectors, recommended @mandujs/core/testing fixtures, " +
|
|
42
|
+
"existing specs (with last-run status when .mandu/ate-last-run.json is present), " +
|
|
43
|
+
"and related routes (sibling + ui-entry-point pairing). " +
|
|
44
|
+
"Scope values: " +
|
|
45
|
+
"'project' = repo summary with route + coverage counts; " +
|
|
46
|
+
"'route' = single-route deep view (requires id or route); " +
|
|
47
|
+
"'filling' = server-handler view with middleware + actions (requires id); " +
|
|
48
|
+
"'contract' = request/response + examples for a contract definition. " +
|
|
49
|
+
"Run mandu.ate.extract first — this tool reads .mandu/interaction-graph.json.",
|
|
50
|
+
inputSchema: {
|
|
51
|
+
type: "object",
|
|
52
|
+
properties: {
|
|
53
|
+
repoRoot: {
|
|
54
|
+
type: "string",
|
|
55
|
+
description: "Absolute path to the Mandu project root",
|
|
56
|
+
},
|
|
57
|
+
scope: {
|
|
58
|
+
type: "string",
|
|
59
|
+
enum: ["project", "route", "filling", "contract"],
|
|
60
|
+
description:
|
|
61
|
+
"project (summary) | route (single route deep view) | filling (handler view) | contract (contract definition view)",
|
|
62
|
+
},
|
|
63
|
+
id: {
|
|
64
|
+
type: "string",
|
|
65
|
+
description:
|
|
66
|
+
"Route id ('api-signup'), filling id ('filling:api-signup'), or contract name. Optional — supply id OR route.",
|
|
67
|
+
},
|
|
68
|
+
route: {
|
|
69
|
+
type: "string",
|
|
70
|
+
description:
|
|
71
|
+
"Route pattern match (e.g. '/api/signup'). Optional — supply id OR route.",
|
|
72
|
+
},
|
|
73
|
+
},
|
|
74
|
+
required: ["repoRoot", "scope"],
|
|
75
|
+
},
|
|
76
|
+
},
|
|
77
|
+
];
|
|
78
|
+
|
|
79
|
+
export function ateContextTools(_projectRoot: string) {
|
|
80
|
+
return {
|
|
81
|
+
mandu_ate_context: async (args: Record<string, unknown>) => {
|
|
82
|
+
const { repoRoot, scope, id, route } = args as {
|
|
83
|
+
repoRoot: string;
|
|
84
|
+
scope: "project" | "route" | "filling" | "contract";
|
|
85
|
+
id?: string;
|
|
86
|
+
route?: string;
|
|
87
|
+
};
|
|
88
|
+
// Minimal validation — the MCP SDK already enforces the schema,
|
|
89
|
+
// but we guard repoRoot explicitly so mis-invocations surface a
|
|
90
|
+
// loud error rather than a cascading filesystem failure.
|
|
91
|
+
if (!repoRoot || typeof repoRoot !== "string") {
|
|
92
|
+
return { ok: false, error: "repoRoot is required" };
|
|
93
|
+
}
|
|
94
|
+
if (!scope) {
|
|
95
|
+
return { ok: false, error: "scope is required" };
|
|
96
|
+
}
|
|
97
|
+
const blob = await ateContext({ repoRoot, scope, id, route });
|
|
98
|
+
|
|
99
|
+
// Phase B.2 — first `mandu_ate_context` call of the day writes a
|
|
100
|
+
// `coverage_snapshot` event (best-effort). The snapshot is derived
|
|
101
|
+
// from the `project`-scope blob summary; for other scopes we still
|
|
102
|
+
// fire the snapshot (using scope==='project' would require an
|
|
103
|
+
// extra call — the project summary's field presence is enough).
|
|
104
|
+
try {
|
|
105
|
+
if (!snapshottedToday(repoRoot)) {
|
|
106
|
+
const withSpec = countWithSpec(blob);
|
|
107
|
+
const withProperty = 0; // Phase B — property-test detection is part of `mandu_ate_coverage`.
|
|
108
|
+
const totalRoutes = countTotalRoutes(blob);
|
|
109
|
+
appendMemoryEvent(repoRoot, {
|
|
110
|
+
kind: "coverage_snapshot",
|
|
111
|
+
timestamp: nowTimestamp(),
|
|
112
|
+
routes: totalRoutes,
|
|
113
|
+
withSpec,
|
|
114
|
+
withProperty,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
} catch {
|
|
118
|
+
// swallow — snapshot is best-effort.
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return { ok: true, context: blob };
|
|
122
|
+
},
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function snapshottedToday(repoRoot: string): boolean {
|
|
127
|
+
try {
|
|
128
|
+
const events = readMemoryEvents(repoRoot);
|
|
129
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
130
|
+
return events.some(
|
|
131
|
+
(e) => e.kind === "coverage_snapshot" && e.timestamp.slice(0, 10) === today,
|
|
132
|
+
);
|
|
133
|
+
} catch {
|
|
134
|
+
return false;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function countTotalRoutes(blob: unknown): number {
|
|
139
|
+
if (!blob || typeof blob !== "object") return 0;
|
|
140
|
+
const b = blob as { scope?: string; summary?: { routes?: number }; route?: unknown };
|
|
141
|
+
if (b.scope === "project" && b.summary && typeof b.summary.routes === "number") {
|
|
142
|
+
return b.summary.routes;
|
|
143
|
+
}
|
|
144
|
+
// Non-project scope — we can't meaningfully count; leave 0 so the snapshot
|
|
145
|
+
// still records the timestamp without lying about totals.
|
|
146
|
+
return 0;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function countWithSpec(blob: unknown): number {
|
|
150
|
+
if (!blob || typeof blob !== "object") return 0;
|
|
151
|
+
const b = blob as {
|
|
152
|
+
scope?: string;
|
|
153
|
+
routes?: Array<{ existingSpecCount: number }>;
|
|
154
|
+
};
|
|
155
|
+
if (b.scope === "project" && Array.isArray(b.routes)) {
|
|
156
|
+
return b.routes.filter((r) => (r.existingSpecCount ?? 0) > 0).length;
|
|
157
|
+
}
|
|
158
|
+
return 0;
|
|
159
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `mandu_ate_coverage` — Phase B.4 quantified gap report.
|
|
3
|
+
*
|
|
4
|
+
* See docs/ate/phase-b-spec.md §B.5 for the output shape. Agents call
|
|
5
|
+
* this to discover `topGaps` and prioritize spec generation work.
|
|
6
|
+
*
|
|
7
|
+
* Snake_case (§11 decision #4). Read-only.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { Tool } from "@modelcontextprotocol/sdk/types.js";
|
|
11
|
+
import { computeCoverage } from "@mandujs/ate";
|
|
12
|
+
|
|
13
|
+
export const ateCoverageToolDefinitions: Tool[] = [
|
|
14
|
+
{
|
|
15
|
+
name: "mandu_ate_coverage",
|
|
16
|
+
annotations: {
|
|
17
|
+
readOnlyHint: true,
|
|
18
|
+
},
|
|
19
|
+
description:
|
|
20
|
+
"Phase B.4 coverage metrics. Returns the 3-axis coverage report: " +
|
|
21
|
+
"(1) routes with unit / integration / e2e spec; (2) contracts with " +
|
|
22
|
+
"full / partial / no boundary-probe coverage; (3) middleware " +
|
|
23
|
+
"invariants (csrf / rate-limit / session / auth / i18n) tagged as " +
|
|
24
|
+
"covered / partial / missing. Also returns a `topGaps` list sorted " +
|
|
25
|
+
"high → medium → low severity. Stamped with graphVersion for " +
|
|
26
|
+
"agent cache invalidation.",
|
|
27
|
+
inputSchema: {
|
|
28
|
+
type: "object",
|
|
29
|
+
properties: {
|
|
30
|
+
repoRoot: {
|
|
31
|
+
type: "string",
|
|
32
|
+
description: "Absolute path to the Mandu project root.",
|
|
33
|
+
},
|
|
34
|
+
scope: {
|
|
35
|
+
type: "string",
|
|
36
|
+
enum: ["project", "route", "contract"],
|
|
37
|
+
description:
|
|
38
|
+
"Default 'project'. Use 'route' (with target=routeId) or 'contract' (with target=contractName) for narrow scans.",
|
|
39
|
+
},
|
|
40
|
+
target: {
|
|
41
|
+
type: "string",
|
|
42
|
+
description: "Route id or contract basename when scope is not 'project'.",
|
|
43
|
+
},
|
|
44
|
+
},
|
|
45
|
+
required: ["repoRoot"],
|
|
46
|
+
},
|
|
47
|
+
},
|
|
48
|
+
];
|
|
49
|
+
|
|
50
|
+
export function ateCoverageTools(_projectRoot: string) {
|
|
51
|
+
return {
|
|
52
|
+
mandu_ate_coverage: async (args: Record<string, unknown>) => {
|
|
53
|
+
const repoRoot = args.repoRoot as string | undefined;
|
|
54
|
+
if (!repoRoot || typeof repoRoot !== "string") {
|
|
55
|
+
return { ok: false, error: "repoRoot is required" };
|
|
56
|
+
}
|
|
57
|
+
const scope = args.scope as "project" | "route" | "contract" | undefined;
|
|
58
|
+
const target = typeof args.target === "string" ? args.target : undefined;
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
const metrics = await computeCoverage(repoRoot, {
|
|
62
|
+
scope: scope ?? "project",
|
|
63
|
+
...(target ? { target } : {}),
|
|
64
|
+
});
|
|
65
|
+
return { ok: true, ...metrics };
|
|
66
|
+
} catch (err) {
|
|
67
|
+
return { ok: false, error: err instanceof Error ? err.message : String(err) };
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
};
|
|
71
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `mandu_ate_exemplar` — Phase A.3 exemplar browser.
|
|
3
|
+
*
|
|
4
|
+
* See `docs/ate/roadmap-v2-agent-native.md` §4.3. Returns the
|
|
5
|
+
* `@ate-exemplar:` tagged tests for a given kind so an agent can
|
|
6
|
+
* few-shot against them without paying the "compose the whole prompt"
|
|
7
|
+
* token cost.
|
|
8
|
+
*
|
|
9
|
+
* Snake_case tool name (§11 decision #4). Read-only.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type { Tool } from "@modelcontextprotocol/sdk/types.js";
|
|
13
|
+
import { scanExemplars, type Exemplar } from "@mandujs/ate";
|
|
14
|
+
|
|
15
|
+
export const ateExemplarToolDefinitions: Tool[] = [
|
|
16
|
+
{
|
|
17
|
+
name: "mandu_ate_exemplar",
|
|
18
|
+
annotations: {
|
|
19
|
+
readOnlyHint: true,
|
|
20
|
+
},
|
|
21
|
+
description:
|
|
22
|
+
"Phase A.3 agent-native exemplar browser. Returns up to `limit` tests " +
|
|
23
|
+
"tagged with `@ate-exemplar: kind=<kind>` from the repo. Each entry " +
|
|
24
|
+
"carries the file path, start/end line, tags, and the full source of the " +
|
|
25
|
+
"test() / it() / describe() call that follows the tag. Set " +
|
|
26
|
+
"includeAnti:true to also surface `@ate-exemplar-anti:` (DO-NOT-do-this) " +
|
|
27
|
+
"cases. Exemplars are manually curated (roadmap §11 decision 2) — no " +
|
|
28
|
+
"auto-heuristic. Use this when you want few-shot examples without paying " +
|
|
29
|
+
"for the full composed prompt.",
|
|
30
|
+
inputSchema: {
|
|
31
|
+
type: "object",
|
|
32
|
+
properties: {
|
|
33
|
+
repoRoot: {
|
|
34
|
+
type: "string",
|
|
35
|
+
description: "Absolute path to the Mandu project root.",
|
|
36
|
+
},
|
|
37
|
+
kind: {
|
|
38
|
+
type: "string",
|
|
39
|
+
description:
|
|
40
|
+
"Match against the tag's `kind=` attribute. Examples: filling_unit, filling_integration, e2e_playwright.",
|
|
41
|
+
},
|
|
42
|
+
limit: {
|
|
43
|
+
type: "number",
|
|
44
|
+
description: "Max entries to return. Default 5.",
|
|
45
|
+
},
|
|
46
|
+
includeAnti: {
|
|
47
|
+
type: "boolean",
|
|
48
|
+
description:
|
|
49
|
+
"Also include @ate-exemplar-anti markers (default false — only positive exemplars).",
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
required: ["repoRoot", "kind"],
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
];
|
|
56
|
+
|
|
57
|
+
export function ateExemplarTools(_projectRoot: string) {
|
|
58
|
+
return {
|
|
59
|
+
mandu_ate_exemplar: async (args: Record<string, unknown>) => {
|
|
60
|
+
const repoRoot = args.repoRoot as string | undefined;
|
|
61
|
+
const kind = args.kind as string | undefined;
|
|
62
|
+
const limit = typeof args.limit === "number" ? args.limit : 5;
|
|
63
|
+
const includeAnti = args.includeAnti === true;
|
|
64
|
+
|
|
65
|
+
if (!repoRoot || typeof repoRoot !== "string") {
|
|
66
|
+
return { ok: false, error: "'repoRoot' is required" };
|
|
67
|
+
}
|
|
68
|
+
if (!kind || typeof kind !== "string") {
|
|
69
|
+
return { ok: false, error: "'kind' is required" };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
try {
|
|
73
|
+
const all = await scanExemplars(repoRoot);
|
|
74
|
+
const filtered = all.filter((e) => e.kind === kind);
|
|
75
|
+
const selected: Exemplar[] = [];
|
|
76
|
+
const positives = filtered.filter((e) => !e.anti).slice(0, limit);
|
|
77
|
+
selected.push(...positives);
|
|
78
|
+
if (includeAnti) {
|
|
79
|
+
// Reserve up to half the limit for antis so positives aren't crowded out.
|
|
80
|
+
const antiBudget = Math.max(1, Math.floor(limit / 2));
|
|
81
|
+
const antis = filtered.filter((e) => e.anti).slice(0, antiBudget);
|
|
82
|
+
selected.push(...antis);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return { ok: true, exemplars: selected, total: filtered.length };
|
|
86
|
+
} catch (err) {
|
|
87
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
88
|
+
return { ok: false, error: msg };
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
};
|
|
92
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `mandu_ate_flakes` — Phase A.2 flake detector surface.
|
|
3
|
+
*
|
|
4
|
+
* Returns every spec whose rolling pass/fail transition ratio exceeds
|
|
5
|
+
* `minScore` (default 0.1) within the last `windowSize` runs
|
|
6
|
+
* (default 20). Agents use this to prioritize stabilization work.
|
|
7
|
+
*
|
|
8
|
+
* Data source: `.mandu/ate-run-history.jsonl`, appended to by
|
|
9
|
+
* `runSpec`. When no history is present we return an empty array —
|
|
10
|
+
* not an error.
|
|
11
|
+
*
|
|
12
|
+
* Snake_case naming per §11 decision 4.
|
|
13
|
+
*/
|
|
14
|
+
import type { Tool } from "@modelcontextprotocol/sdk/types.js";
|
|
15
|
+
import { summarizeFlakes } from "@mandujs/ate";
|
|
16
|
+
|
|
17
|
+
export const ateFlakesToolDefinitions: Tool[] = [
|
|
18
|
+
{
|
|
19
|
+
name: "mandu_ate_flakes",
|
|
20
|
+
annotations: {
|
|
21
|
+
readOnlyHint: true,
|
|
22
|
+
},
|
|
23
|
+
description:
|
|
24
|
+
"Phase A.2 flake detector. Reads `.mandu/ate-run-history.jsonl` and returns specs " +
|
|
25
|
+
"whose pass/fail status flips often within the rolling window. `flakeScore` = " +
|
|
26
|
+
"status_transitions / (N - 1) over last `windowSize` non-skipped runs. " +
|
|
27
|
+
"Pure-pass PPPPP = 0.0 (stable), pure-fail FFFFF = 0.0 (broken, NOT flaky), " +
|
|
28
|
+
"alternating PFPF = 1.0. Returns an empty list when history is empty or no spec " +
|
|
29
|
+
"clears `minScore`. Use this to prioritize which flaky tests to fix first — feed " +
|
|
30
|
+
"a specPath from the result into mandu_ate_run for a re-run + full failure.v1 " +
|
|
31
|
+
"diagnostic.",
|
|
32
|
+
inputSchema: {
|
|
33
|
+
type: "object",
|
|
34
|
+
properties: {
|
|
35
|
+
repoRoot: {
|
|
36
|
+
type: "string",
|
|
37
|
+
description: "Absolute path to the Mandu project root",
|
|
38
|
+
},
|
|
39
|
+
windowSize: {
|
|
40
|
+
type: "number",
|
|
41
|
+
minimum: 2,
|
|
42
|
+
description: "Rolling window size. Default: 20.",
|
|
43
|
+
},
|
|
44
|
+
minScore: {
|
|
45
|
+
type: "number",
|
|
46
|
+
minimum: 0,
|
|
47
|
+
maximum: 1,
|
|
48
|
+
description: "Filter threshold for flakeScore. Default: 0.1.",
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
required: ["repoRoot"],
|
|
52
|
+
},
|
|
53
|
+
},
|
|
54
|
+
];
|
|
55
|
+
|
|
56
|
+
export function ateFlakesTools(_projectRoot: string) {
|
|
57
|
+
return {
|
|
58
|
+
mandu_ate_flakes: async (args: Record<string, unknown>) => {
|
|
59
|
+
const { repoRoot, windowSize, minScore } = args as {
|
|
60
|
+
repoRoot: string;
|
|
61
|
+
windowSize?: number;
|
|
62
|
+
minScore?: number;
|
|
63
|
+
};
|
|
64
|
+
if (!repoRoot || typeof repoRoot !== "string") {
|
|
65
|
+
return { ok: false, error: "repoRoot is required" };
|
|
66
|
+
}
|
|
67
|
+
if (typeof windowSize === "number" && windowSize < 2) {
|
|
68
|
+
return { ok: false, error: "windowSize must be >= 2" };
|
|
69
|
+
}
|
|
70
|
+
if (
|
|
71
|
+
typeof minScore === "number" &&
|
|
72
|
+
(minScore < 0 || minScore > 1 || !Number.isFinite(minScore))
|
|
73
|
+
) {
|
|
74
|
+
return { ok: false, error: "minScore must be in [0, 1]" };
|
|
75
|
+
}
|
|
76
|
+
try {
|
|
77
|
+
const flakyTests = summarizeFlakes(repoRoot, {
|
|
78
|
+
windowSize,
|
|
79
|
+
minScore,
|
|
80
|
+
});
|
|
81
|
+
return { ok: true, flakyTests };
|
|
82
|
+
} catch (err) {
|
|
83
|
+
return {
|
|
84
|
+
ok: false,
|
|
85
|
+
error: `summarizeFlakes failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
};
|
|
90
|
+
}
|