@roadmapperai/mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +885 -0
- package/README.md +111 -0
- package/package.json +35 -0
- package/server.mjs +4019 -0
package/server.mjs
ADDED
|
@@ -0,0 +1,4019 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Roadmapper MCP server — zero-dependency stdio JSON-RPC.
|
|
4
|
+
*
|
|
5
|
+
* Exposes a planning surface so an agent can read the roadmap and
|
|
6
|
+
* (when authorized) propose tasks or stamp acceptance grades:
|
|
7
|
+
*
|
|
8
|
+
* list_themes read
|
|
9
|
+
* list_capabilities read (optionally filtered by themeId)
|
|
10
|
+
* list_tasks read (optionally filtered by capabilityId / status)
|
|
11
|
+
* get_task read (full task detail, including acceptance + deps)
|
|
12
|
+
* get_agents_md read (the planning contract)
|
|
13
|
+
* propose_task write (requires SUPABASE_SERVICE_ROLE_KEY)
|
|
14
|
+
* submit_acceptance_grades write (requires SUPABASE_SERVICE_ROLE_KEY)
|
|
15
|
+
*
|
|
16
|
+
* Data sources, in order:
|
|
17
|
+
* 1. Local seed at src/data/roadmap.json (always read).
|
|
18
|
+
* 2. Workspace edits via Supabase REST, when SUPABASE_URL,
|
|
19
|
+
* SUPABASE_WORKSPACE_ID, and either SUPABASE_PUBLISHABLE_KEY or
|
|
20
|
+
* the legacy SUPABASE_ANON_KEY are set. Edits override / extend
|
|
21
|
+
* the seed exactly like the app does.
|
|
22
|
+
* 3. Writes require SUPABASE_SERVICE_ROLE_KEY (bypasses RLS). Without
|
|
23
|
+
* it the write tools return an error result and the read tools
|
|
24
|
+
* still work.
|
|
25
|
+
*
|
|
26
|
+
* Wire-up (Claude Code / Claude Desktop / any MCP client):
|
|
27
|
+
* {
|
|
28
|
+
* "mcpServers": {
|
|
29
|
+
* "roadmapper": {
|
|
30
|
+
* "command": "node",
|
|
31
|
+
* "args": ["/absolute/path/to/roadmap/mcp/server.mjs"],
|
|
32
|
+
* "env": {
|
|
33
|
+
* "SUPABASE_URL": "...",
|
|
34
|
+
* "SUPABASE_PUBLISHABLE_KEY": "sb_publishable_...",
|
|
35
|
+
* "SUPABASE_WORKSPACE_ID": "...",
|
|
36
|
+
* "SUPABASE_SERVICE_ROLE_KEY": "..."
|
|
37
|
+
* }
|
|
38
|
+
* }
|
|
39
|
+
* }
|
|
40
|
+
* }
|
|
41
|
+
*
|
|
42
|
+
* Self-test: `node mcp/server.mjs --selftest` exercises every tool
|
|
43
|
+
* against the local seed and exits 0 on success, 1 on failure. Useful
|
|
44
|
+
* for verifying the install without an MCP client.
|
|
45
|
+
*
|
|
46
|
+
* Speaks the MCP stdio protocol: newline-delimited JSON-RPC 2.0 on
|
|
47
|
+
* stdin/stdout. Logs go to stderr only.
|
|
48
|
+
*/
|
|
49
|
+
|
|
50
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
51
|
+
import { dirname, join, resolve } from "node:path";
|
|
52
|
+
import { fileURLToPath } from "node:url";
|
|
53
|
+
|
|
54
|
+
const HERE = dirname(fileURLToPath(import.meta.url));
|
|
55
|
+
const REPO = resolve(HERE, "..");
|
|
56
|
+
// Seed JSON: dev environment ships the file at src/data/roadmap.json
|
|
57
|
+
// (used by the SPA). The npm-packaged build has no SPA source tree,
|
|
58
|
+
// so readSeed() falls back to an empty roadmap and the real data
|
|
59
|
+
// loads from Supabase as "edits" on top.
|
|
60
|
+
const SEED_PATH = join(REPO, "src", "data", "roadmap.json");
|
|
61
|
+
// AGENTS.md (planning rubric): prefer the copy bundled inside the
|
|
62
|
+
// npm package (HERE/AGENTS.md) so customers get the right rubric
|
|
63
|
+
// without needing the repo. Fall back to REPO/AGENTS.md for local
|
|
64
|
+
// dev where the canonical file lives at the repo root.
|
|
65
|
+
const BUNDLED_AGENTS_PATH = join(HERE, "AGENTS.md");
|
|
66
|
+
const REPO_AGENTS_PATH = join(REPO, "AGENTS.md");
|
|
67
|
+
|
|
68
|
+
const PROTOCOL_VERSION = "2024-11-05";
|
|
69
|
+
const SERVER_NAME = "roadmapper";
|
|
70
|
+
const SERVER_VERSION = "0.6.0";
|
|
71
|
+
|
|
72
|
+
// Must match src/types.ts EFFORT_DAYS — AI-era calibration.
|
|
73
|
+
// Fractional values (XS=0.25, S=0.5) get rounded up when used to
|
|
74
|
+
// project a target date below, since date strings are day-resolution.
|
|
75
|
+
const EFFORT_DAYS = { XS: 0.25, S: 0.5, M: 1, L: 3, XL: 8 };
|
|
76
|
+
const VALID_PRIORITIES = new Set(["P0", "P1", "P2", "P3"]);
|
|
77
|
+
const VALID_EFFORTS = new Set(["XS", "S", "M", "L", "XL"]);
|
|
78
|
+
const VALID_KINDS = new Set(["feature", "bug", "chore", "spike"]);
|
|
79
|
+
const VALID_STATUSES = new Set(["delivered", "in_progress", "planned", "exploring"]);
|
|
80
|
+
|
|
81
|
+
function log(...args) {
|
|
82
|
+
console.error("[roadmapper-mcp]", ...args);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function send(message) {
|
|
86
|
+
process.stdout.write(JSON.stringify(message) + "\n");
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function readSeed() {
|
|
90
|
+
try {
|
|
91
|
+
return JSON.parse(readFileSync(SEED_PATH, "utf-8"));
|
|
92
|
+
} catch (e) {
|
|
93
|
+
log("failed to read seed", e.message);
|
|
94
|
+
return { product: { themes: [] }, capabilities: [], tasks: [], sprints: [] };
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function readAgentsMd() {
|
|
99
|
+
// Try the bundled copy first (npm install case), then the repo
|
|
100
|
+
// root copy (local dev). One of them is always present in a
|
|
101
|
+
// normal install; both missing means the package was assembled
|
|
102
|
+
// wrong and the rubric is unavailable.
|
|
103
|
+
try {
|
|
104
|
+
return readFileSync(BUNDLED_AGENTS_PATH, "utf-8");
|
|
105
|
+
} catch {
|
|
106
|
+
try {
|
|
107
|
+
return readFileSync(REPO_AGENTS_PATH, "utf-8");
|
|
108
|
+
} catch {
|
|
109
|
+
return "AGENTS.md not found — rubric unavailable.";
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* The read key used to fetch the workspace row. Accepts the new
|
|
116
|
+
* publishable key (`sb_publishable_…`) or the legacy `anon`/JWT key.
|
|
117
|
+
*/
|
|
118
|
+
function readKey() {
|
|
119
|
+
return (
|
|
120
|
+
process.env.SUPABASE_PUBLISHABLE_KEY ||
|
|
121
|
+
process.env.SUPABASE_ANON_KEY ||
|
|
122
|
+
null
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function supabaseConfig() {
|
|
127
|
+
return {
|
|
128
|
+
url: process.env.SUPABASE_URL || null,
|
|
129
|
+
readKey: readKey(),
|
|
130
|
+
writeKey: process.env.SUPABASE_SERVICE_ROLE_KEY || null,
|
|
131
|
+
workspaceId: process.env.SUPABASE_WORKSPACE_ID || null,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Read `.roadmapper/snapshot.json` from the current working directory
|
|
137
|
+
* once at first access. Returns the workspaceId it names, or null if
|
|
138
|
+
* the file is missing/malformed. The snapshot is committed by the
|
|
139
|
+
* snapshot-roadmaps Edge Function into every connected repo's
|
|
140
|
+
* roadmapper-snapshot branch — so if an agent is running from a
|
|
141
|
+
* checkout that has it, the cwd unambiguously names a workspace.
|
|
142
|
+
*
|
|
143
|
+
* This is the "if I'm working in repo X right now, I almost certainly
|
|
144
|
+
* mean to write to repo X's workspace" safety net. It catches the
|
|
145
|
+
* wrong-workspace-push class of mistake before it lands.
|
|
146
|
+
*/
|
|
147
|
+
const SNAPSHOT_FILE = join(".roadmapper", "snapshot.json");
|
|
148
|
+
let _snapshotWorkspace = undefined; // undefined = unread; null = read & absent/bad
|
|
149
|
+
function snapshotWorkspaceId() {
|
|
150
|
+
if (_snapshotWorkspace !== undefined) return _snapshotWorkspace;
|
|
151
|
+
try {
|
|
152
|
+
const path = join(process.cwd(), SNAPSHOT_FILE);
|
|
153
|
+
if (!existsSync(path)) {
|
|
154
|
+
_snapshotWorkspace = null;
|
|
155
|
+
return null;
|
|
156
|
+
}
|
|
157
|
+
const raw = JSON.parse(readFileSync(path, "utf8"));
|
|
158
|
+
if (typeof raw?.workspaceId === "string" && raw.workspaceId.length > 0) {
|
|
159
|
+
_snapshotWorkspace = raw.workspaceId;
|
|
160
|
+
return _snapshotWorkspace;
|
|
161
|
+
}
|
|
162
|
+
_snapshotWorkspace = null;
|
|
163
|
+
} catch {
|
|
164
|
+
// Unreadable / malformed snapshot is non-fatal — the server keeps
|
|
165
|
+
// serving with env + per-call defaults. Operators see the warning
|
|
166
|
+
// when callTool surfaces "no workspace" or via --selftest.
|
|
167
|
+
_snapshotWorkspace = null;
|
|
168
|
+
}
|
|
169
|
+
return _snapshotWorkspace;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Test hook for selftest. Module-internal; not surfaced via MCP. Pass
|
|
173
|
+
// `undefined` to force snapshotWorkspaceId() to re-read from disk on
|
|
174
|
+
// next call, or a string/null to short-circuit the cache.
|
|
175
|
+
function __setSnapshotWorkspaceForTest(value) {
|
|
176
|
+
_snapshotWorkspace = value;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Resolve the workspace id for a tool call. Resolution order:
|
|
181
|
+
* 1. Explicit `workspaceId` arg on the call.
|
|
182
|
+
* 2. `.roadmapper/snapshot.json` in the cwd (committed by the
|
|
183
|
+
* snapshot-roadmaps cron — names the workspace this repo
|
|
184
|
+
* belongs to).
|
|
185
|
+
* 3. Env-driven `SUPABASE_WORKSPACE_ID`.
|
|
186
|
+
* 4. null.
|
|
187
|
+
*
|
|
188
|
+
* Snapshot beats env because the snapshot reflects "where the agent
|
|
189
|
+
* is right now", while the env reflects "where the operator pointed
|
|
190
|
+
* the MCP install when they configured it". Cwd-specific wins.
|
|
191
|
+
*
|
|
192
|
+
* Mutators with an explicit `workspaceId` arg that conflicts with the
|
|
193
|
+
* cwd snapshot are refused upstream in `callTool` — see the
|
|
194
|
+
* cross-workspace guard there.
|
|
195
|
+
*/
|
|
196
|
+
function resolveWorkspaceId(argWorkspaceId) {
|
|
197
|
+
if (argWorkspaceId) return argWorkspaceId;
|
|
198
|
+
const snap = snapshotWorkspaceId();
|
|
199
|
+
if (snap) return snap;
|
|
200
|
+
return supabaseConfig().workspaceId ?? null;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Read the workspace's current entity state directly from the
|
|
205
|
+
* normalized tables (Stage 3 Piece 6c — `workspaces.edits` column
|
|
206
|
+
* was dropped). Returns `{ themes, capabilities, tasks }` in the
|
|
207
|
+
* legacy camelCase shape the rest of MCP consumes, or `null` if
|
|
208
|
+
* the read failed (callers fall back to the bundled seed).
|
|
209
|
+
*
|
|
210
|
+
* Prefers the service-role key when set so RLS doesn't filter
|
|
211
|
+
* agent reads down to the caller's visible_pillars allow-list.
|
|
212
|
+
*/
|
|
213
|
+
async function readWorkspaceProjected(wsIdOverride) {
|
|
214
|
+
const { url, readKey: anonKey, writeKey } = supabaseConfig();
|
|
215
|
+
const workspaceId = resolveWorkspaceId(wsIdOverride);
|
|
216
|
+
const key = writeKey || anonKey;
|
|
217
|
+
if (!url || !key || !workspaceId) return null;
|
|
218
|
+
const filter = `workspace_id=eq.${encodeURIComponent(workspaceId)}`;
|
|
219
|
+
const headers = {
|
|
220
|
+
apikey: key,
|
|
221
|
+
Authorization: `Bearer ${key}`,
|
|
222
|
+
Accept: "application/json",
|
|
223
|
+
};
|
|
224
|
+
const fetchTable = async (path) => {
|
|
225
|
+
const res = await fetch(`${url}/rest/v1/${path}&${filter}`, { headers });
|
|
226
|
+
if (!res.ok) {
|
|
227
|
+
throw new Error(`${path}: ${res.status} ${await res.text().catch(() => "")}`);
|
|
228
|
+
}
|
|
229
|
+
return res.json();
|
|
230
|
+
};
|
|
231
|
+
try {
|
|
232
|
+
const [pillars, caps, tasks] = await Promise.all([
|
|
233
|
+
fetchTable("pillars?select=*"),
|
|
234
|
+
fetchTable("capabilities?select=*"),
|
|
235
|
+
fetchTable("tasks?select=*"),
|
|
236
|
+
]);
|
|
237
|
+
return {
|
|
238
|
+
themes: pillars.map(rowToThemeProjected),
|
|
239
|
+
capabilities: caps.map(rowToCapabilityProjected),
|
|
240
|
+
tasks: tasks.map(rowToTaskProjected),
|
|
241
|
+
};
|
|
242
|
+
} catch (e) {
|
|
243
|
+
log("supabase entity read failed:", e.message);
|
|
244
|
+
return null;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/** Row → camelCase projection helpers. Snake-case columns map to
|
|
249
|
+
* the same camelCase keys the SPA + agent surfaces have always
|
|
250
|
+
* used; the legacy JSONB shape and these table rows agree on
|
|
251
|
+
* every field. */
|
|
252
|
+
function rowToThemeProjected(r) {
|
|
253
|
+
return stripUndefined({
|
|
254
|
+
id: r.id,
|
|
255
|
+
name: r.name,
|
|
256
|
+
description: r.description,
|
|
257
|
+
color: r.color,
|
|
258
|
+
targetRoi: r.target_roi,
|
|
259
|
+
ownerUserId: r.owner_user_id,
|
|
260
|
+
idempotencyKey: r.idempotency_key,
|
|
261
|
+
archived: r.archived,
|
|
262
|
+
archivedAt: r.archived_at,
|
|
263
|
+
createdAt: r.created_at,
|
|
264
|
+
updatedAt: r.updated_at,
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
function rowToCapabilityProjected(r) {
|
|
268
|
+
return stripUndefined({
|
|
269
|
+
id: r.id,
|
|
270
|
+
pillarId: r.pillar_id,
|
|
271
|
+
name: r.name,
|
|
272
|
+
description: r.description,
|
|
273
|
+
outcome: r.outcome,
|
|
274
|
+
reach: r.reach,
|
|
275
|
+
impact: r.impact,
|
|
276
|
+
confidence: r.confidence,
|
|
277
|
+
roi: r.roi,
|
|
278
|
+
color: r.color,
|
|
279
|
+
status: r.status,
|
|
280
|
+
start: r.start_date,
|
|
281
|
+
target: r.target_date,
|
|
282
|
+
delivered: r.delivered_date,
|
|
283
|
+
originalTarget: r.original_target,
|
|
284
|
+
laneRow: r.lane_row,
|
|
285
|
+
ownerUserId: r.owner_user_id,
|
|
286
|
+
specRef: r.spec_ref,
|
|
287
|
+
outcomeStatus: r.outcome_status,
|
|
288
|
+
outcomeCheckedAt: r.outcome_checked_at,
|
|
289
|
+
outcomeReadings: r.outcome_readings,
|
|
290
|
+
dependsOn: r.depends_on,
|
|
291
|
+
idempotencyKey: r.idempotency_key,
|
|
292
|
+
archived: r.archived,
|
|
293
|
+
archivedAt: r.archived_at,
|
|
294
|
+
createdAt: r.created_at,
|
|
295
|
+
updatedAt: r.updated_at,
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
function rowToTaskProjected(r) {
|
|
299
|
+
return stripUndefined({
|
|
300
|
+
id: r.id,
|
|
301
|
+
capabilityId: r.capability_id,
|
|
302
|
+
pillarId: r.pillar_id,
|
|
303
|
+
title: r.title,
|
|
304
|
+
summary: r.summary,
|
|
305
|
+
status: r.status,
|
|
306
|
+
priority: r.priority,
|
|
307
|
+
effort: r.effort,
|
|
308
|
+
start: r.start_date,
|
|
309
|
+
target: r.target_date,
|
|
310
|
+
originalTarget: r.original_target,
|
|
311
|
+
delivered: r.delivered_date,
|
|
312
|
+
deliveredAt: r.delivered_at,
|
|
313
|
+
progress: r.progress,
|
|
314
|
+
owner: r.owner,
|
|
315
|
+
ownerGithub: r.owner_github,
|
|
316
|
+
ownerAvatarUrl: r.owner_avatar_url,
|
|
317
|
+
ownerUserId: r.owner_user_id,
|
|
318
|
+
laneRow: r.lane_row,
|
|
319
|
+
matrixDx: r.matrix_dx,
|
|
320
|
+
matrixDy: r.matrix_dy,
|
|
321
|
+
team: r.team,
|
|
322
|
+
kind: r.kind,
|
|
323
|
+
authorKind: r.author_kind,
|
|
324
|
+
expectedPRs: r.expected_prs,
|
|
325
|
+
expectedScope: r.expected_scope,
|
|
326
|
+
tags: r.tags,
|
|
327
|
+
prs: r.prs,
|
|
328
|
+
links: r.links,
|
|
329
|
+
acceptance: r.acceptance,
|
|
330
|
+
acceptanceGrades: r.acceptance_grades,
|
|
331
|
+
dependsOn: r.depends_on,
|
|
332
|
+
idempotencyKey: r.idempotency_key,
|
|
333
|
+
archived: r.archived,
|
|
334
|
+
archivedAt: r.archived_at,
|
|
335
|
+
createdAt: r.created_at,
|
|
336
|
+
updatedAt: r.updated_at,
|
|
337
|
+
});
|
|
338
|
+
}
|
|
339
|
+
function stripUndefined(o) {
|
|
340
|
+
for (const k of Object.keys(o)) if (o[k] === undefined || o[k] === null) delete o[k];
|
|
341
|
+
return o;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
/**
|
|
345
|
+
* Invoke a Postgres function exposed via PostgREST. Used by the
|
|
346
|
+
* write tools so the read-modify-write happens inside a single
|
|
347
|
+
* Postgres transaction (with row-level locking on the workspace),
|
|
348
|
+
* not across two round-trips from the MCP. That's what makes
|
|
349
|
+
* concurrent agent writes safe — see migration 0006 for the
|
|
350
|
+
* function bodies.
|
|
351
|
+
*/
|
|
352
|
+
async function rpcCall(fn, body) {
|
|
353
|
+
const { url, writeKey } = supabaseConfig();
|
|
354
|
+
// body must already carry p_workspace_id — the per-tool resolver
|
|
355
|
+
// injects it before calling rpcCall so the override path works.
|
|
356
|
+
if (!url || !writeKey || !body?.p_workspace_id) {
|
|
357
|
+
throw new Error(
|
|
358
|
+
"Write tools require SUPABASE_URL + SUPABASE_SERVICE_ROLE_KEY in env and a resolvable workspaceId (either SUPABASE_WORKSPACE_ID env or workspaceId arg)."
|
|
359
|
+
);
|
|
360
|
+
}
|
|
361
|
+
const res = await fetch(`${url}/rest/v1/rpc/${fn}`, {
|
|
362
|
+
method: "POST",
|
|
363
|
+
headers: {
|
|
364
|
+
apikey: writeKey,
|
|
365
|
+
Authorization: `Bearer ${writeKey}`,
|
|
366
|
+
"content-type": "application/json",
|
|
367
|
+
Accept: "application/json",
|
|
368
|
+
},
|
|
369
|
+
body: JSON.stringify(body),
|
|
370
|
+
});
|
|
371
|
+
if (!res.ok) {
|
|
372
|
+
const txt = await res.text();
|
|
373
|
+
throw new Error(
|
|
374
|
+
`rpc ${fn} failed: ${res.status} ${txt.slice(0, 300)}`
|
|
375
|
+
);
|
|
376
|
+
}
|
|
377
|
+
return res.json();
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* Project (seed + edits) into a flat view the tools can serve.
|
|
382
|
+
* Mirrors the lightweight subset of src/lib/store.ts merge logic we
|
|
383
|
+
* need read-only: replace seed records by id with edited copies,
|
|
384
|
+
* concat new ones, drop deleted ids.
|
|
385
|
+
*/
|
|
386
|
+
function project(seed, edits) {
|
|
387
|
+
const e = edits ?? {};
|
|
388
|
+
const themes = mergeList(
|
|
389
|
+
seed?.product?.themes ?? [],
|
|
390
|
+
e.themes ?? {},
|
|
391
|
+
// The app writes new themes to edits.newPillars and deletes to
|
|
392
|
+
// edits.deletedPillarIds (Theme is still `Pillar` in the schema
|
|
393
|
+
// for legacy reasons — see src/types.ts comment). The MCP must
|
|
394
|
+
// read those same keys so a theme created in the UI shows up in
|
|
395
|
+
// list_themes — and so propose_theme below doesn't have to fight
|
|
396
|
+
// the app over which key holds the truth.
|
|
397
|
+
e.newPillars ?? [],
|
|
398
|
+
e.deletedPillarIds ?? []
|
|
399
|
+
);
|
|
400
|
+
const capabilities = mergeList(
|
|
401
|
+
seed?.capabilities ?? [],
|
|
402
|
+
e.capabilities ?? {},
|
|
403
|
+
e.newCapabilities ?? [],
|
|
404
|
+
e.deletedCapabilityIds ?? []
|
|
405
|
+
);
|
|
406
|
+
const tasks = mergeList(
|
|
407
|
+
seed?.tasks ?? [],
|
|
408
|
+
e.tasks ?? {},
|
|
409
|
+
e.newTasks ?? [],
|
|
410
|
+
e.deletedTaskIds ?? []
|
|
411
|
+
);
|
|
412
|
+
return { themes, capabilities, tasks };
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
/**
|
|
416
|
+
* Effective capability status — mirrors
|
|
417
|
+
* effectiveCapabilityStatus + deriveCapabilityStatus in
|
|
418
|
+
* src/lib/util.ts. Explicit `cap.status` wins; otherwise derived
|
|
419
|
+
* from linked tasks. Used here to keep delivered capabilities out
|
|
420
|
+
* of agent-facing lists so plans target work that's still in flight.
|
|
421
|
+
*/
|
|
422
|
+
function effectiveCapabilityStatus(cap, tasks) {
|
|
423
|
+
if (cap.status) return cap.status;
|
|
424
|
+
const own = tasks.filter((t) => t.capabilityId === cap.id);
|
|
425
|
+
if (own.length === 0) return "exploring";
|
|
426
|
+
if (own.every((t) => t.status === "delivered")) return "delivered";
|
|
427
|
+
if (own.some((t) => t.status === "in_progress")) return "in_progress";
|
|
428
|
+
if (own.every((t) => t.status === "exploring")) return "exploring";
|
|
429
|
+
return "planned";
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
function mergeList(seedList, patches, additions, deletedIds) {
|
|
433
|
+
const del = new Set(deletedIds);
|
|
434
|
+
const merged = [];
|
|
435
|
+
for (const row of seedList) {
|
|
436
|
+
if (del.has(row.id)) continue;
|
|
437
|
+
merged.push({ ...row, ...(patches[row.id] ?? {}) });
|
|
438
|
+
}
|
|
439
|
+
// Patches apply to newly-created records too — matches the app's
|
|
440
|
+
// store merge. This is what makes grade_acceptance writes visible
|
|
441
|
+
// on tasks created via propose_task earlier in the same workspace.
|
|
442
|
+
for (const row of additions) {
|
|
443
|
+
if (!del.has(row.id)) merged.push({ ...row, ...(patches[row.id] ?? {}) });
|
|
444
|
+
}
|
|
445
|
+
return merged;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
function todayISO() {
|
|
449
|
+
return new Date().toISOString().slice(0, 10);
|
|
450
|
+
}
|
|
451
|
+
function addDays(iso, days) {
|
|
452
|
+
const d = new Date(iso + "T00:00:00Z");
|
|
453
|
+
d.setUTCDate(d.getUTCDate() + days);
|
|
454
|
+
return d.toISOString().slice(0, 10);
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
/**
|
|
458
|
+
* Decode the five HTML entities agents most often emit when they
|
|
459
|
+
* think they're rendering into markup. Mirrors src/lib/text.ts.
|
|
460
|
+
*
|
|
461
|
+
* Agents sometimes propose names like `Sandbox & Test Mode` —
|
|
462
|
+
* the storage path stores that verbatim, then React renders it
|
|
463
|
+
* literally in the UI. Apply this on every propose / update path
|
|
464
|
+
* so values land in the database in their decoded form.
|
|
465
|
+
*/
|
|
466
|
+
function decodeHtmlEntities(input) {
|
|
467
|
+
if (!input || typeof input !== "string") return input;
|
|
468
|
+
return input
|
|
469
|
+
.replace(/&/g, "&")
|
|
470
|
+
.replace(/</g, "<")
|
|
471
|
+
.replace(/>/g, ">")
|
|
472
|
+
.replace(/"/g, '"')
|
|
473
|
+
.replace(/'/g, "'")
|
|
474
|
+
.replace(/'/gi, "'");
|
|
475
|
+
}
|
|
476
|
+
/** Convenience: trim AND decode HTML entities on the same value. */
|
|
477
|
+
function cleanText(s) {
|
|
478
|
+
return decodeHtmlEntities((s ?? "").trim());
|
|
479
|
+
}
|
|
480
|
+
function randomTaskId() {
|
|
481
|
+
// TK-NNNNNN — 6-digit zero-padded random. Matches the app's format
|
|
482
|
+
// and stays comfortably under collision risk for any realistic ws.
|
|
483
|
+
return `TK-${String(Math.floor(Math.random() * 1_000_000)).padStart(6, "0")}`;
|
|
484
|
+
}
|
|
485
|
+
function randomThemeId() {
|
|
486
|
+
// TH-NNNNNN — 6-digit numeric, same shape store.ts numericId6 emits.
|
|
487
|
+
return `TH-${String(100000 + Math.floor(Math.random() * 900000))}`;
|
|
488
|
+
}
|
|
489
|
+
function randomCapabilityId() {
|
|
490
|
+
// CAP-XXXXXX — 6-char uppercase base36, matches store.ts uid("CAP").
|
|
491
|
+
return `CAP-${Math.random().toString(36).slice(2, 8).toUpperCase()}`;
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
const VALID_IMPACTS = new Set([3, 2, 1, 0.5, 0.25]);
|
|
495
|
+
|
|
496
|
+
// ── Validators ────────────────────────────────────────────────────
|
|
497
|
+
// Server-side guardrails for the planning rubric in AGENTS.md.
|
|
498
|
+
// Every propose_* tool runs these before touching Supabase. With
|
|
499
|
+
// dryRun=true the caller sees the validation result without writing.
|
|
500
|
+
|
|
501
|
+
// Month names must be followed by whitespace + a digit so we don't
|
|
502
|
+
// false-positive on phrases like "may have moved" or "September lifts"
|
|
503
|
+
// where the month token has nothing to do with a date.
|
|
504
|
+
const TEMPORAL_RE =
|
|
505
|
+
/\b(20\d\d|q[1-4](\s*20\d\d)?|by\s+\d|(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+\d)\b/i;
|
|
506
|
+
const NUMBER_RE = /\d/;
|
|
507
|
+
|
|
508
|
+
/**
|
|
509
|
+
* An outcome is "falsifiable" when it carries both a number (the
|
|
510
|
+
* thing you're moving) and a temporal anchor (when you're checking
|
|
511
|
+
* it). Empty outcomes are also rejected — every capability needs
|
|
512
|
+
* one. The regex is intentionally loose so reasonable phrasings
|
|
513
|
+
* pass: "32% to 55% by Q3 2026", "median 3/quarter by 2026-09",
|
|
514
|
+
* "5x by 2026" all clear.
|
|
515
|
+
*/
|
|
516
|
+
function validateOutcome(outcome) {
|
|
517
|
+
const o = (outcome ?? "").trim();
|
|
518
|
+
if (!o) {
|
|
519
|
+
return "outcome is required. Use the template: <metric> moves from <baseline> to <target> by <date>, measured by <source>.";
|
|
520
|
+
}
|
|
521
|
+
const hasNumber = NUMBER_RE.test(o);
|
|
522
|
+
const hasTemporal = TEMPORAL_RE.test(o);
|
|
523
|
+
if (!hasNumber || !hasTemporal) {
|
|
524
|
+
return `outcome must include both a number (the metric) and a date or quarter (when you'll check). Missing: ${[
|
|
525
|
+
!hasNumber ? "number" : null,
|
|
526
|
+
!hasTemporal ? "date/quarter" : null,
|
|
527
|
+
]
|
|
528
|
+
.filter(Boolean)
|
|
529
|
+
.join(" + ")}. See get_agents_md for examples.`;
|
|
530
|
+
}
|
|
531
|
+
return null;
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
function validateName(name, minLen = 8) {
|
|
535
|
+
const n = (name ?? "").trim();
|
|
536
|
+
if (!n) return "name is required.";
|
|
537
|
+
if (n.length < minLen)
|
|
538
|
+
return `name is too short (${n.length} chars) — aim for ${minLen}+ that describe the bet, not the verb.`;
|
|
539
|
+
return null;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
function validateConfidence(confidence) {
|
|
543
|
+
if (confidence == null) return null;
|
|
544
|
+
if (confidence < 0 || confidence > 100)
|
|
545
|
+
return `confidence must be 0–100, got ${confidence}.`;
|
|
546
|
+
if (confidence > 95)
|
|
547
|
+
return `confidence ${confidence} is too high. >95 is reserved for work that's already shipped or behind a flag. Cap at 95 unless you can point to the deployed flag.`;
|
|
548
|
+
return null;
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
/**
|
|
552
|
+
* Warning (not error) when a capability's ROI lands well below the
|
|
553
|
+
* parent theme's target. Caller can still proceed — but the
|
|
554
|
+
* warning surfaces in dryRun output so the agent can rethink.
|
|
555
|
+
*/
|
|
556
|
+
function warnRoiVsTheme(roi, theme) {
|
|
557
|
+
if (roi == null || theme?.targetRoi == null) return null;
|
|
558
|
+
const floor = theme.targetRoi * 0.7;
|
|
559
|
+
if (roi < floor) {
|
|
560
|
+
return `roi $${roi}M is well below 70% of theme "${theme.name}" target ($${theme.targetRoi}M). Justify the gap in your outcome, or rethink the parent theme.`;
|
|
561
|
+
}
|
|
562
|
+
return null;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
// ── Lightweight Jaccard for suggest_capability_for ────────────────
|
|
566
|
+
//
|
|
567
|
+
// Kept in sync with src/lib/textMatch.ts and api/github-webhook.ts
|
|
568
|
+
// so the three places that score PR-to-capability overlap rank the
|
|
569
|
+
// same matches identically. The "Jaccard" name is a slight misnomer:
|
|
570
|
+
// we normalise by max(|a|,|b|) instead of |a ∪ b| (textbook Jaccard)
|
|
571
|
+
// so a tiny PR title doesn't trivially match a long capability
|
|
572
|
+
// description via two common tokens.
|
|
573
|
+
const STOPWORDS = new Set([
|
|
574
|
+
"the", "and", "for", "with", "this", "that", "from", "into", "onto", "upon",
|
|
575
|
+
"fix", "fixes", "fixing", "add", "adds", "adding", "update", "updates", "updating",
|
|
576
|
+
"remove", "removes", "removing", "refactor", "refactors", "refactoring",
|
|
577
|
+
"feat", "feature", "bug", "bugfix", "chore", "wip", "draft",
|
|
578
|
+
"use", "uses", "using", "make", "makes", "making", "support", "supports",
|
|
579
|
+
"via", "out", "off", "now", "but", "any", "all", "new", "old",
|
|
580
|
+
"pull", "request", "merge", "branch", "commit", "test", "tests", "testing",
|
|
581
|
+
]);
|
|
582
|
+
function tokenize(s) {
|
|
583
|
+
const out = new Set();
|
|
584
|
+
const lower = (s ?? "").toLowerCase().replace(/[^a-z0-9 ]+/g, " ");
|
|
585
|
+
for (const w of lower.split(/\s+/)) {
|
|
586
|
+
if (w.length < 3) continue;
|
|
587
|
+
if (STOPWORDS.has(w)) continue;
|
|
588
|
+
out.add(w);
|
|
589
|
+
}
|
|
590
|
+
return out;
|
|
591
|
+
}
|
|
592
|
+
function jaccardScore(a, b) {
|
|
593
|
+
if (a.size === 0 || b.size === 0) return 0;
|
|
594
|
+
let overlap = 0;
|
|
595
|
+
for (const t of a) if (b.has(t)) overlap += 1;
|
|
596
|
+
if (overlap === 0) return 0;
|
|
597
|
+
return overlap / Math.max(a.size, b.size);
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
// ── Session state + enforcement gates ─────────────────────────────
|
|
601
|
+
//
|
|
602
|
+
// One process serves one MCP client (stdio). State below is the
|
|
603
|
+
// client's session-scoped memory: when get_agents_md was last
|
|
604
|
+
// fetched, how many mutator calls have been attempted without the
|
|
605
|
+
// rubric, etc.
|
|
606
|
+
//
|
|
607
|
+
// The point of this is to stop relying on the agent's discretion to
|
|
608
|
+
// follow the rubric. Tool descriptions catch most cases; the gate
|
|
609
|
+
// here catches the rest with a structured error whose `fix` field
|
|
610
|
+
// names the exact next call.
|
|
611
|
+
const session = {
|
|
612
|
+
startedAt: Date.now(),
|
|
613
|
+
rubricFetchedAt: null,
|
|
614
|
+
// "Discovery" gates — the agent must have looked at the current
|
|
615
|
+
// catalogue before proposing new theme/cap records. Prevents the
|
|
616
|
+
// "agent invents Theme X when 'X-ish Theme' already exists"
|
|
617
|
+
// failure mode that token-overlap alone catches inconsistently.
|
|
618
|
+
themesListedAt: null,
|
|
619
|
+
capsDiscoveredAt: null,
|
|
620
|
+
mutatorAttempts: 0,
|
|
621
|
+
mutatorBlocks: 0,
|
|
622
|
+
};
|
|
623
|
+
|
|
624
|
+
function resetSession() {
|
|
625
|
+
session.startedAt = Date.now();
|
|
626
|
+
session.rubricFetchedAt = null;
|
|
627
|
+
session.themesListedAt = null;
|
|
628
|
+
session.capsDiscoveredAt = null;
|
|
629
|
+
session.mutatorAttempts = 0;
|
|
630
|
+
session.mutatorBlocks = 0;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
/**
|
|
634
|
+
* Build the structured "prereq missing" result the mutators return
|
|
635
|
+
* when the agent hasn't fetched the rubric this session. The shape
|
|
636
|
+
* matters: LLMs recover well from errors whose `fix` field names
|
|
637
|
+
* the exact next call, badly from prose. Mirrors the recommendation
|
|
638
|
+
* in the MCP-effectiveness memo.
|
|
639
|
+
*/
|
|
640
|
+
function rubricMissingResult(toolName) {
|
|
641
|
+
return {
|
|
642
|
+
content: [
|
|
643
|
+
{
|
|
644
|
+
type: "text",
|
|
645
|
+
text: JSON.stringify(
|
|
646
|
+
{
|
|
647
|
+
error: "prerequisite_missing",
|
|
648
|
+
message:
|
|
649
|
+
`Call get_agents_md first this session, then retry ${toolName}. ` +
|
|
650
|
+
"The rubric defines acceptance criteria shape and grading dimensions — " +
|
|
651
|
+
"proposals filed without it will not round-trip.",
|
|
652
|
+
fix: "get_agents_md()",
|
|
653
|
+
},
|
|
654
|
+
null,
|
|
655
|
+
2
|
|
656
|
+
),
|
|
657
|
+
},
|
|
658
|
+
],
|
|
659
|
+
isError: true,
|
|
660
|
+
};
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
/**
|
|
664
|
+
* Structured error for the per-tool discovery gates. Same shape +
|
|
665
|
+
* rationale as rubricMissingResult — LLMs follow the `fix` field
|
|
666
|
+
* reliably when it names the exact next call.
|
|
667
|
+
*/
|
|
668
|
+
function discoveryMissingResult(toolName, fixCall, rationale) {
|
|
669
|
+
return {
|
|
670
|
+
content: [
|
|
671
|
+
{
|
|
672
|
+
type: "text",
|
|
673
|
+
text: JSON.stringify(
|
|
674
|
+
{
|
|
675
|
+
error: "discovery_missing",
|
|
676
|
+
message:
|
|
677
|
+
`Call ${fixCall} first this session, then retry ${toolName}. ${rationale}`,
|
|
678
|
+
fix: fixCall,
|
|
679
|
+
},
|
|
680
|
+
null,
|
|
681
|
+
2
|
|
682
|
+
),
|
|
683
|
+
},
|
|
684
|
+
],
|
|
685
|
+
isError: true,
|
|
686
|
+
};
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
/**
|
|
690
|
+
* Telemetry write — fire-and-forget POST to public.mcp_telemetry
|
|
691
|
+
* via PostgREST when a service-role key is set. Never blocks the
|
|
692
|
+
* caller; failures are logged to stderr and swallowed.
|
|
693
|
+
*
|
|
694
|
+
* The point isn't real-time observability — it's accumulating
|
|
695
|
+
* signal about *where* the rubric flow breaks so we can tune
|
|
696
|
+
* descriptions and gates against real failure patterns.
|
|
697
|
+
*/
|
|
698
|
+
function recordTelemetry(event, payload, wsIdOverride) {
|
|
699
|
+
const { url, writeKey, workspaceId: envWsId } = supabaseConfig();
|
|
700
|
+
if (!url || !writeKey) return; // self-hosted devs without service key get no-op
|
|
701
|
+
const body = {
|
|
702
|
+
event,
|
|
703
|
+
// Prefer the per-call workspace when one was passed. Mutator
|
|
704
|
+
// calls resolve this once at the top of callTool so the row
|
|
705
|
+
// captures the workspace actually being acted on, not just the
|
|
706
|
+
// env default.
|
|
707
|
+
workspace_id: wsIdOverride ?? envWsId ?? null,
|
|
708
|
+
server_version: SERVER_VERSION,
|
|
709
|
+
session_started_at: new Date(session.startedAt).toISOString(),
|
|
710
|
+
rubric_fetched_at: session.rubricFetchedAt
|
|
711
|
+
? new Date(session.rubricFetchedAt).toISOString()
|
|
712
|
+
: null,
|
|
713
|
+
payload: payload ?? null,
|
|
714
|
+
};
|
|
715
|
+
fetch(`${url}/rest/v1/mcp_telemetry`, {
|
|
716
|
+
method: "POST",
|
|
717
|
+
headers: {
|
|
718
|
+
apikey: writeKey,
|
|
719
|
+
Authorization: `Bearer ${writeKey}`,
|
|
720
|
+
"content-type": "application/json",
|
|
721
|
+
Prefer: "return=minimal",
|
|
722
|
+
},
|
|
723
|
+
body: JSON.stringify(body),
|
|
724
|
+
}).catch((e) => {
|
|
725
|
+
log("telemetry write failed (non-fatal)", e.message);
|
|
726
|
+
});
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
const TOOLS = [
|
|
730
|
+
{
|
|
731
|
+
name: "list_themes",
|
|
732
|
+
description:
|
|
733
|
+
"List active themes (strategic pillars). Excludes archived themes by default.\n\n" +
|
|
734
|
+
"USE WHEN: orienting to the roadmap at session start, scoping which theme a capability belongs under, or answering 'what strategic bets are we tracking'.\n" +
|
|
735
|
+
"PREREQUISITE: none — read-only.\n" +
|
|
736
|
+
"ANTI-PATTERN: do not call to look up a single theme by id (use get_roadmap_snapshot or filter the response yourself). Do not call repeatedly in one session — theme catalogue is years-stable. Pass includeArchived=true only when reviewing closed bets — almost never in a planning session.\n" +
|
|
737
|
+
"EXAMPLE: list_themes()",
|
|
738
|
+
inputSchema: {
|
|
739
|
+
type: "object",
|
|
740
|
+
properties: {
|
|
741
|
+
workspaceId: { type: "string" },
|
|
742
|
+
includeArchived: { type: "boolean" },
|
|
743
|
+
},
|
|
744
|
+
additionalProperties: false,
|
|
745
|
+
},
|
|
746
|
+
},
|
|
747
|
+
{
|
|
748
|
+
name: "list_capabilities",
|
|
749
|
+
description:
|
|
750
|
+
"List active capabilities (quarterly bets). Excludes delivered and archived capabilities by default — agents should target work that's still in flight.\n\n" +
|
|
751
|
+
"USE WHEN: planning a feature and need to find the right parent capability, reviewing in-flight bets, or scoping what's still on the table this quarter.\n" +
|
|
752
|
+
"PREREQUISITE: none — read-only. For routing a specific work description, prefer suggest_capability_for which ranks by token overlap.\n" +
|
|
753
|
+
"ANTI-PATTERN: do not call to find a capability when you already know its id (use get_roadmap_snapshot for richer context). Pass includeDelivered=true or includeArchived=true only when reviewing historical bets — almost never in a planning session.\n" +
|
|
754
|
+
"EXAMPLE: list_capabilities({ themeId: 'TH-XXX' })",
|
|
755
|
+
inputSchema: {
|
|
756
|
+
type: "object",
|
|
757
|
+
properties: {
|
|
758
|
+
themeId: { type: "string" },
|
|
759
|
+
includeDelivered: { type: "boolean" },
|
|
760
|
+
includeArchived: { type: "boolean" },
|
|
761
|
+
workspaceId: { type: "string" },
|
|
762
|
+
},
|
|
763
|
+
additionalProperties: false,
|
|
764
|
+
},
|
|
765
|
+
},
|
|
766
|
+
{
|
|
767
|
+
name: "list_tasks",
|
|
768
|
+
description:
|
|
769
|
+
"List tasks. Filter by capabilityId or status. Excludes archived tasks by default.\n\n" +
|
|
770
|
+
"USE WHEN: surveying what already exists under a capability before proposing a new task (avoid duplicates), reviewing a status bucket (e.g. all in_progress), or answering 'what's open right now'.\n" +
|
|
771
|
+
"PREREQUISITE: none — read-only.\n" +
|
|
772
|
+
"ANTI-PATTERN: do not call to track in-progress work within a single conversation — use the harness TodoWrite tool. Do not call without a filter when the workspace has many tasks; scope by capabilityId or status. Pass includeArchived=true only when reviewing closed history.\n" +
|
|
773
|
+
"EXAMPLE: list_tasks({ capabilityId: 'CAP-XXX', status: 'in_progress' })",
|
|
774
|
+
inputSchema: {
|
|
775
|
+
type: "object",
|
|
776
|
+
properties: {
|
|
777
|
+
capabilityId: { type: "string" },
|
|
778
|
+
status: {
|
|
779
|
+
type: "string",
|
|
780
|
+
enum: ["delivered", "in_progress", "planned", "exploring"],
|
|
781
|
+
},
|
|
782
|
+
includeArchived: { type: "boolean" },
|
|
783
|
+
workspaceId: { type: "string" },
|
|
784
|
+
},
|
|
785
|
+
additionalProperties: false,
|
|
786
|
+
},
|
|
787
|
+
},
|
|
788
|
+
{
|
|
789
|
+
name: "get_task",
|
|
790
|
+
description:
|
|
791
|
+
"Return one task by id with full detail: title, summary, status, owner, acceptance criteria, dependsOn, attached PRs, and acceptance grades.\n\n" +
|
|
792
|
+
"USE WHEN: about to submit acceptance grades (need the criteria indexes), reviewing a specific task before linking a PR, or answering questions about a particular TK-XXXXXX.\n" +
|
|
793
|
+
"PREREQUISITE: none — read-only.\n" +
|
|
794
|
+
"ANTI-PATTERN: do not call to discover that a task exists — use list_tasks for discovery first. Don't loop over many ids; list_tasks returns the same shape in one round trip.\n" +
|
|
795
|
+
"EXAMPLE: get_task({ id: 'TK-100201' })",
|
|
796
|
+
inputSchema: {
|
|
797
|
+
type: "object",
|
|
798
|
+
properties: {
|
|
799
|
+
id: { type: "string" },
|
|
800
|
+
workspaceId: { type: "string" },
|
|
801
|
+
},
|
|
802
|
+
required: ["id"],
|
|
803
|
+
additionalProperties: false,
|
|
804
|
+
},
|
|
805
|
+
},
|
|
806
|
+
{
|
|
807
|
+
name: "get_agents_md",
|
|
808
|
+
description:
|
|
809
|
+
"Return the AGENTS.md planning contract — task shape, acceptance criteria format, capability outcome rubric, grading dimensions.\n\n" +
|
|
810
|
+
"USE WHEN: starting ANY planning session before calling propose_task / propose_capability / propose_theme / submit_acceptance_grades. Call this ONCE per session — the mutator tools refuse without it. Cache the result on your side.\n" +
|
|
811
|
+
"PREREQUISITE: none.\n" +
|
|
812
|
+
"ANTI-PATTERN: do not skip when the user says 'just plan some features' — the rubric IS the planning interface; proposals filed without it won't round-trip into the product.\n" +
|
|
813
|
+
"EXAMPLE: get_agents_md()",
|
|
814
|
+
inputSchema: { type: "object", properties: {}, additionalProperties: false },
|
|
815
|
+
},
|
|
816
|
+
{
|
|
817
|
+
name: "get_roadmap_snapshot",
|
|
818
|
+
description:
|
|
819
|
+
"Single-call orient: themes + active capabilities + in-flight tasks for the workspace, plus the resolved workspaceId. Always live. Excludes archived entities by default.\n\n" +
|
|
820
|
+
"USE WHEN: starting fresh in a workspace and need the whole canonical model in one read, or before opening a PR to confirm which workspace + capability to attach to.\n" +
|
|
821
|
+
"PREREQUISITE: none — read-only. Often the very first call after get_agents_md.\n" +
|
|
822
|
+
"ANTI-PATTERN: do not call repeatedly within one planning pass; the data doesn't change inside a single session. Use list_tasks / list_capabilities if you need just one slice. Pass includeArchived=true only when surveying historical state.\n" +
|
|
823
|
+
"EXAMPLE: get_roadmap_snapshot()",
|
|
824
|
+
inputSchema: {
|
|
825
|
+
type: "object",
|
|
826
|
+
properties: {
|
|
827
|
+
workspaceId: {
|
|
828
|
+
type: "string",
|
|
829
|
+
description:
|
|
830
|
+
"Optional. Override the env-default workspace. Useful when the agent is operating against a .roadmapper/snapshot.json that names its own workspace.",
|
|
831
|
+
},
|
|
832
|
+
includeArchived: { type: "boolean" },
|
|
833
|
+
},
|
|
834
|
+
additionalProperties: false,
|
|
835
|
+
},
|
|
836
|
+
},
|
|
837
|
+
{
|
|
838
|
+
name: "propose_task",
|
|
839
|
+
description:
|
|
840
|
+
"Propose a new task under an existing roadmapper capability. Server stamps authorKind='agent' + status='planned' + a TK-NNNNNN id.\n\n" +
|
|
841
|
+
"USE WHEN: the user asks to plan features, design new work, sketch a roadmap, file a TODO that should persist beyond this conversation, or break a capability into deliverables.\n" +
|
|
842
|
+
"PREREQUISITE: get_agents_md once this session (the server enforces this and returns an error with a `fix` field if missing). Call suggest_capability_for first to find the right parent capability — do not invent a new one.\n" +
|
|
843
|
+
"ANTI-PATTERN: do not call to track in-progress work within a single conversation — use the harness TodoWrite tool. Do not call to log a bug discovered during implementation — file in the issue tracker, not roadmapper. Do not call when you don't know which capability the task belongs under; resolve that first.\n" +
|
|
844
|
+
"EXAMPLE: propose_task({ capabilityId: 'CAP-XXX', title: 'Drag-and-drop block reorder', acceptance: ['Block can be dragged with mouse + keyboard', 'Order persists across reloads'], idempotencyKey: 'session-1-task-3' })\n\n" +
|
|
845
|
+
"Requires SUPABASE_SERVICE_ROLE_KEY. Pass idempotencyKey so retries don't duplicate. Pass dryRun: true to validate without writing. Pass workspaceId to target a workspace other than the env default.",
|
|
846
|
+
inputSchema: {
|
|
847
|
+
type: "object",
|
|
848
|
+
properties: {
|
|
849
|
+
capabilityId: { type: "string" },
|
|
850
|
+
title: { type: "string" },
|
|
851
|
+
summary: { type: "string" },
|
|
852
|
+
effort: { type: "string", enum: ["XS", "S", "M", "L", "XL"] },
|
|
853
|
+
priority: { type: "string", enum: ["P0", "P1", "P2", "P3"] },
|
|
854
|
+
kind: { type: "string", enum: ["feature", "bug", "chore", "spike"] },
|
|
855
|
+
owner: { type: "string" },
|
|
856
|
+
acceptance: { type: "array", items: { type: "string" } },
|
|
857
|
+
dependsOn: { type: "array", items: { type: "string" } },
|
|
858
|
+
expectedPRs: {
|
|
859
|
+
type: "number",
|
|
860
|
+
description:
|
|
861
|
+
"Advisory cap on merged PRs for this task. Unset by default (no cap). Webhook records a scope_overrun audit row when the actual count exceeds this. Not enforced; this is a hint to track how often tasks blow their envelope.",
|
|
862
|
+
},
|
|
863
|
+
expectedScope: {
|
|
864
|
+
type: "number",
|
|
865
|
+
description:
|
|
866
|
+
"Advisory cap on cumulative LoC (additions+deletions) across all PRs linked to this task. Webhook records a scope_overrun audit row when exceeded. Not enforced.",
|
|
867
|
+
},
|
|
868
|
+
idempotencyKey: { type: "string" },
|
|
869
|
+
dryRun: { type: "boolean" },
|
|
870
|
+
workspaceId: { type: "string" },
|
|
871
|
+
},
|
|
872
|
+
required: ["capabilityId", "title"],
|
|
873
|
+
additionalProperties: false,
|
|
874
|
+
},
|
|
875
|
+
},
|
|
876
|
+
{
|
|
877
|
+
name: "propose_theme",
|
|
878
|
+
description:
|
|
879
|
+
"Propose a new strategic theme (pillar). Themes are years-stable — only propose one when nothing existing fits.\n\n" +
|
|
880
|
+
"USE WHEN: the work the user is describing genuinely doesn't fit ANY existing theme, AND the user explicitly says they want a new strategic direction. Almost never the right answer in a planning session.\n" +
|
|
881
|
+
"PREREQUISITE: get_agents_md once this session (enforced). Theme discovery once this session, satisfied by suggest_theme_for (preferred — returns ranked matches with a fit signal), list_themes, or get_roadmap_snapshot. Enforced — the server returns discovery_missing with a fix field if you skip it. Duplicating a theme is the most common failure mode; the gate stops it.\n" +
|
|
882
|
+
"ANTI-PATTERN: do not call to organize a quarter of work — that's a capability, not a theme. Do not call because the existing themes feel too coarse — they're SUPPOSED to be coarse. Use propose_capability under an existing theme instead.\n" +
|
|
883
|
+
"EXAMPLE: propose_theme({ name: 'AI Agent Reliability', description: 'Multi-year bet on making agent workflows reproducible.', targetRoi: 20, idempotencyKey: 'session-1-theme-1' })\n\n" +
|
|
884
|
+
"Requires SUPABASE_SERVICE_ROLE_KEY. Pass idempotencyKey so retries don't duplicate. Pass dryRun: true to validate without writing. Pass workspaceId to target a workspace other than the env default.",
|
|
885
|
+
inputSchema: {
|
|
886
|
+
type: "object",
|
|
887
|
+
properties: {
|
|
888
|
+
name: { type: "string" },
|
|
889
|
+
description: { type: "string" },
|
|
890
|
+
color: { type: "string" },
|
|
891
|
+
targetRoi: { type: "number" },
|
|
892
|
+
idempotencyKey: { type: "string" },
|
|
893
|
+
dryRun: { type: "boolean" },
|
|
894
|
+
workspaceId: { type: "string" },
|
|
895
|
+
},
|
|
896
|
+
required: ["name"],
|
|
897
|
+
additionalProperties: false,
|
|
898
|
+
},
|
|
899
|
+
},
|
|
900
|
+
{
|
|
901
|
+
name: "propose_capability",
|
|
902
|
+
description:
|
|
903
|
+
"Propose a new capability under an existing theme — a quarterly bet with a falsifiable outcome.\n\n" +
|
|
904
|
+
"USE WHEN: planning a multi-task workstream that needs a shared outcome statement; the work is a coherent bet, not a single task; AND suggest_capability_for returned no strong match.\n" +
|
|
905
|
+
"PREREQUISITE: get_agents_md once this session (enforced). suggest_capability_for (or list_capabilities / get_roadmap_snapshot / the roadmapper://capabilities/active resource) once this session (enforced — server returns discovery_missing with a fix field if you skip it). The server WILL also reject if token overlap with an existing capability is too high; the gate is upstream of that.\n" +
|
|
906
|
+
"ANTI-PATTERN: do not call for a single deliverable — that's a task. Do not call when the outcome is fuzzy ('improve X') — the server rejects non-falsifiable outcomes. Do not call when an existing capability is close-enough; capabilities cost human attention to maintain.\n" +
|
|
907
|
+
"EXAMPLE: propose_capability({ pillarId: 'TH-XXX', name: 'Self-serve landing page builder', outcome: 'Customers publish a landing page in under 5 minutes without engineering involvement.', reach: 200, impact: 1, confidence: 70, idempotencyKey: 'session-1-cap-1' })\n\n" +
|
|
908
|
+
"Server rejects empty / non-falsifiable outcomes, confidence >95, and names <8 chars. Requires SUPABASE_SERVICE_ROLE_KEY. Pass idempotencyKey, dryRun, workspaceId as for propose_task.",
|
|
909
|
+
inputSchema: {
|
|
910
|
+
type: "object",
|
|
911
|
+
properties: {
|
|
912
|
+
name: { type: "string" },
|
|
913
|
+
pillarId: { type: "string" },
|
|
914
|
+
description: { type: "string" },
|
|
915
|
+
outcome: { type: "string" },
|
|
916
|
+
reach: { type: "number" },
|
|
917
|
+
impact: { type: "number", enum: [3, 2, 1, 0.5, 0.25] },
|
|
918
|
+
confidence: { type: "number", minimum: 0, maximum: 100 },
|
|
919
|
+
roi: { type: "number" },
|
|
920
|
+
specRef: { type: "string" },
|
|
921
|
+
idempotencyKey: { type: "string" },
|
|
922
|
+
dryRun: { type: "boolean" },
|
|
923
|
+
workspaceId: { type: "string" },
|
|
924
|
+
},
|
|
925
|
+
required: ["name", "pillarId", "outcome"],
|
|
926
|
+
additionalProperties: false,
|
|
927
|
+
},
|
|
928
|
+
},
|
|
929
|
+
{
|
|
930
|
+
name: "submit_acceptance_grades",
|
|
931
|
+
description:
|
|
932
|
+
"Stamp self-grade results onto a task's acceptanceGrades array. Each entry sets pass/fail on the criterion at the given index.\n\n" +
|
|
933
|
+
"USE WHEN: you've finished implementing a task and verified its acceptance criteria. Always call before opening a PR — the rubric requires self-grading prior to human review.\n" +
|
|
934
|
+
"PREREQUISITE: get_agents_md once this session (enforced — defines grading dimensions). Call get_task first to read the acceptance criteria in order — indexes are positional.\n" +
|
|
935
|
+
"ANTI-PATTERN: do not call before the implementation actually works — fabricated passes destroy the trust this signal carries. Do not call without a note when status='fail' — the reviewer needs the failure mode.\n" +
|
|
936
|
+
"EXAMPLE: submit_acceptance_grades({ taskId: 'TK-100201', grades: [{ index: 0, status: 'pass' }, { index: 1, status: 'fail', note: 'Reload-persistence is flaky on Firefox; tracked in TK-100202' }] })\n\n" +
|
|
937
|
+
"Requires SUPABASE_SERVICE_ROLE_KEY. Pass workspaceId to target a workspace other than the env default.",
|
|
938
|
+
inputSchema: {
|
|
939
|
+
type: "object",
|
|
940
|
+
properties: {
|
|
941
|
+
taskId: { type: "string" },
|
|
942
|
+
grades: {
|
|
943
|
+
type: "array",
|
|
944
|
+
items: {
|
|
945
|
+
type: "object",
|
|
946
|
+
properties: {
|
|
947
|
+
index: { type: "integer", minimum: 0 },
|
|
948
|
+
status: { type: "string", enum: ["pass", "fail"] },
|
|
949
|
+
note: { type: "string" },
|
|
950
|
+
},
|
|
951
|
+
required: ["index", "status"],
|
|
952
|
+
additionalProperties: false,
|
|
953
|
+
},
|
|
954
|
+
},
|
|
955
|
+
workspaceId: { type: "string" },
|
|
956
|
+
},
|
|
957
|
+
required: ["taskId", "grades"],
|
|
958
|
+
additionalProperties: false,
|
|
959
|
+
},
|
|
960
|
+
},
|
|
961
|
+
{
|
|
962
|
+
name: "suggest_capability_for",
|
|
963
|
+
description:
|
|
964
|
+
"Given a free-text description of work, return the top existing capabilities ranked by token overlap.\n\n" +
|
|
965
|
+
"USE WHEN: about to propose tasks or a capability — call this FIRST to find an existing parent. If any returned score > 0.4, strongly prefer attaching tasks there over creating a new capability.\n" +
|
|
966
|
+
"PREREQUISITE: none — read-only.\n" +
|
|
967
|
+
"ANTI-PATTERN: do not call after you've already decided to create a new capability — that's the case this tool is meant to prevent. Do not interpret weak matches (<0.2) as fits; if nothing's close, propose_capability is the right next call (after confirming with the user).\n" +
|
|
968
|
+
"EXAMPLE: suggest_capability_for({ description: 'multi-tenant landing page builder with drag-and-drop blocks' })",
|
|
969
|
+
inputSchema: {
|
|
970
|
+
type: "object",
|
|
971
|
+
properties: {
|
|
972
|
+
description: { type: "string" },
|
|
973
|
+
limit: { type: "integer", minimum: 1, maximum: 25 },
|
|
974
|
+
workspaceId: { type: "string" },
|
|
975
|
+
},
|
|
976
|
+
required: ["description"],
|
|
977
|
+
additionalProperties: false,
|
|
978
|
+
},
|
|
979
|
+
},
|
|
980
|
+
{
|
|
981
|
+
name: "suggest_theme_for",
|
|
982
|
+
description:
|
|
983
|
+
"Given a free-text description of work, return the top existing themes ranked by token overlap. Mirror of suggest_capability_for but at the theme level — themes are years-stable, so the bar to create a new one is higher.\n\n" +
|
|
984
|
+
"USE WHEN: about to plan a feature and you've decided you need a Theme/Capability/Task tree. Call this FIRST so you can decide whether to attach a new capability under an existing theme (the usual answer) or whether the work represents a genuinely new strategic direction worth a new theme.\n" +
|
|
985
|
+
"INTERPRETATION: a top score above ~0.4 = existing theme fits, do NOT create a new one. 0.2-0.4 = weak overlap, almost always still better to use the existing theme. Below 0.2 OR empty matches = ask the user before calling propose_theme — themes are years-stable, not per-feature, and duplicates are the most common failure mode.\n" +
|
|
986
|
+
"PREREQUISITE: none — read-only. Also satisfies the discovery gate for propose_theme.\n" +
|
|
987
|
+
"ANTI-PATTERN: do not call after deciding to create a new theme — that's the case this tool is meant to prevent. Do not interpret weak matches as 'must create new' without explicit user confirmation that a new strategic direction is intended.\n" +
|
|
988
|
+
"EXAMPLE: suggest_theme_for({ description: 'multi-channel marketing analytics dashboard with attribution modeling' })",
|
|
989
|
+
inputSchema: {
|
|
990
|
+
type: "object",
|
|
991
|
+
properties: {
|
|
992
|
+
description: { type: "string" },
|
|
993
|
+
limit: { type: "integer", minimum: 1, maximum: 25 },
|
|
994
|
+
workspaceId: { type: "string" },
|
|
995
|
+
},
|
|
996
|
+
required: ["description"],
|
|
997
|
+
additionalProperties: false,
|
|
998
|
+
},
|
|
999
|
+
},
|
|
1000
|
+
{
|
|
1001
|
+
name: "link_pr",
|
|
1002
|
+
description:
|
|
1003
|
+
"Attach a PR to a task. Closes the deliver-loop gap so an agent that just opened a PR can stamp it onto the parent task without waiting for the next GitHub webhook.\n\n" +
|
|
1004
|
+
"USE WHEN: you just opened a PR for a task and want it visible in roadmapper immediately. Always call alongside submit_acceptance_grades when closing out a task.\n" +
|
|
1005
|
+
"PREREQUISITE: get_agents_md once this session (enforced). The task id must exist (get_task / list_tasks to confirm).\n" +
|
|
1006
|
+
"ANTI-PATTERN: do not call as a substitute for the Roadmapper-Task: PR-body trailer convention — the trailer is the durable contract; link_pr is the instant-feedback shortcut. Do not call for PRs that don't have a parent task in roadmapper.\n" +
|
|
1007
|
+
"EXAMPLE: link_pr({ taskId: 'TK-100201', repo: 'acme/frontend', number: 1234, title: 'Drag block reorder', authorGithub: 'octocat' })\n\n" +
|
|
1008
|
+
"Idempotent by (repo, number) — re-calling with an already-linked PR returns idempotent:true. Requires SUPABASE_SERVICE_ROLE_KEY. Pass workspaceId to target a workspace other than the env default.",
|
|
1009
|
+
inputSchema: {
|
|
1010
|
+
type: "object",
|
|
1011
|
+
properties: {
|
|
1012
|
+
taskId: { type: "string" },
|
|
1013
|
+
repo: { type: "string" },
|
|
1014
|
+
number: { type: "integer", minimum: 1 },
|
|
1015
|
+
title: { type: "string" },
|
|
1016
|
+
merged: { type: "boolean" },
|
|
1017
|
+
mergedAt: { type: "string" },
|
|
1018
|
+
authorGithub: { type: "string" },
|
|
1019
|
+
authorKind: { type: "string", enum: ["human", "agent"] },
|
|
1020
|
+
workspaceId: { type: "string" },
|
|
1021
|
+
},
|
|
1022
|
+
required: ["taskId", "repo", "number"],
|
|
1023
|
+
additionalProperties: false,
|
|
1024
|
+
},
|
|
1025
|
+
},
|
|
1026
|
+
// ── Archive lifecycle (Phase 2 of the upgrade) ──────────────
|
|
1027
|
+
// Soft delete: archived rows stay in the workspace's edits
|
|
1028
|
+
// blob; list views filter them out, by-id lookups still
|
|
1029
|
+
// resolve. Refuses with active children (forces bottom-up
|
|
1030
|
+
// archive). Reason required on every call (audit trail).
|
|
1031
|
+
...archiveLifecycleTools(),
|
|
1032
|
+
// ── Move lifecycle (Phase 3 of the upgrade) ─────────────────
|
|
1033
|
+
// Re-parent tasks under capabilities and capabilities under
|
|
1034
|
+
// themes. IDs are stable across moves. Target parent must be
|
|
1035
|
+
// active (refuses move into archived parent). Moving an
|
|
1036
|
+
// archived entity into an active parent unarchives in one step.
|
|
1037
|
+
...moveLifecycleTools(),
|
|
1038
|
+
// ── Update lifecycle (Phase 4 of the upgrade) ───────────────
|
|
1039
|
+
// Patch fields on an entity. Parent fields (capabilityId,
|
|
1040
|
+
// pillarId) and lifecycle flags (archived, archivedAt) are
|
|
1041
|
+
// forbidden — those go through move_/archive_/unarchive_ for
|
|
1042
|
+
// audit clarity. UP5 idempotent on identical input.
|
|
1043
|
+
...updateLifecycleTools(),
|
|
1044
|
+
// ── Outcome readings ────────────────────────────────────────
|
|
1045
|
+
// Track empirical metric readings against capability outcomes.
|
|
1046
|
+
// Append-only; the RPC takes a row lock so concurrent writes
|
|
1047
|
+
// never clobber. list_stale_outcomes flags capabilities whose
|
|
1048
|
+
// most recent reading is older than N days.
|
|
1049
|
+
{
|
|
1050
|
+
name: "record_outcome_reading",
|
|
1051
|
+
description:
|
|
1052
|
+
"Record a metric reading against a capability's stated outcome. Captures the empirical signal between 'outcome declared' and 'outcome decided.'\n\n" +
|
|
1053
|
+
"USE WHEN: you have a fresh measurement of the metric the capability is moving. A weekly Mixpanel paste, a warehouse extract, a Datadog reading — any source. The reading append-only-ly augments the capability's history; it doesn't replace prior readings.\n" +
|
|
1054
|
+
"PREREQUISITE: get_agents_md once this session (enforced). The capability must exist.\n" +
|
|
1055
|
+
"ANTI-PATTERN: do not use to declare the FINAL outcome (use outcomeStatus via update_capability for that). Readings are observations along the way, not the verdict.\n" +
|
|
1056
|
+
"EXAMPLE: record_outcome_reading({ capabilityId: 'CAP-9F2C7E', value: 0.41, asOf: '2026-05-12', source: 'mixpanel: activated_within_7d weekly', note: 'sample size 4218' })\n\n" +
|
|
1057
|
+
"Requires SUPABASE_SERVICE_ROLE_KEY. Audit log records each reading as 'outcome_reading_recorded'.",
|
|
1058
|
+
inputSchema: {
|
|
1059
|
+
type: "object",
|
|
1060
|
+
properties: {
|
|
1061
|
+
capabilityId: { type: "string", description: "CAP-XXXXXX" },
|
|
1062
|
+
value: { type: "number", description: "The metric reading (cardinality matches the outcome statement)." },
|
|
1063
|
+
asOf: { type: "string", description: "ISO date or timestamp the reading was sampled (not recorded)." },
|
|
1064
|
+
source: { type: "string", description: "Where the reading came from (e.g. 'mixpanel weekly', 'warehouse:fact_orders')." },
|
|
1065
|
+
note: { type: "string" },
|
|
1066
|
+
workspaceId: { type: "string" },
|
|
1067
|
+
},
|
|
1068
|
+
required: ["capabilityId", "value", "asOf", "source"],
|
|
1069
|
+
additionalProperties: false,
|
|
1070
|
+
},
|
|
1071
|
+
},
|
|
1072
|
+
{
|
|
1073
|
+
name: "list_stale_outcomes",
|
|
1074
|
+
description:
|
|
1075
|
+
"List capabilities whose outcome metric hasn't been measured recently. Default threshold: 14 days. Surfaces bets that have lost the empirical loop — outcome was declared but nobody's checking.\n\n" +
|
|
1076
|
+
"USE WHEN: at quarterly review, weekly outcome check, or any time you want to spot capabilities that are running without a reading.\n" +
|
|
1077
|
+
"Returns each stale capability with its id, name, outcome, days since last reading (or null if never), and most recent reading if present.",
|
|
1078
|
+
inputSchema: {
|
|
1079
|
+
type: "object",
|
|
1080
|
+
properties: {
|
|
1081
|
+
thresholdDays: {
|
|
1082
|
+
type: "number",
|
|
1083
|
+
description: "Days since last reading to count as stale. Default 14.",
|
|
1084
|
+
},
|
|
1085
|
+
includeArchived: { type: "boolean" },
|
|
1086
|
+
workspaceId: { type: "string" },
|
|
1087
|
+
},
|
|
1088
|
+
additionalProperties: false,
|
|
1089
|
+
},
|
|
1090
|
+
},
|
|
1091
|
+
];
|
|
1092
|
+
|
|
1093
|
+
/**
|
|
1094
|
+
* Six archive/unarchive tools share most of their schema — same
|
|
1095
|
+
* shape per entity kind (task, capability, theme), same required
|
|
1096
|
+
* inputs (entity id + reason), same optional knobs (idempotency,
|
|
1097
|
+
* dryRun, workspaceId). Build them via a factory so the surface
|
|
1098
|
+
* stays in sync if the contract changes.
|
|
1099
|
+
*/
|
|
1100
|
+
function archiveLifecycleTools() {
|
|
1101
|
+
const kinds = [
|
|
1102
|
+
{
|
|
1103
|
+
kind: "task",
|
|
1104
|
+
idDoc: "TK-NNNNNN",
|
|
1105
|
+
idKey: "taskId",
|
|
1106
|
+
example:
|
|
1107
|
+
"archive_task({ taskId: 'TK-100201', reason: 'cut from this quarter; superseded by TK-100299' })",
|
|
1108
|
+
},
|
|
1109
|
+
{
|
|
1110
|
+
kind: "capability",
|
|
1111
|
+
idDoc: "CAP-XXXXXX",
|
|
1112
|
+
idKey: "capabilityId",
|
|
1113
|
+
example:
|
|
1114
|
+
"archive_capability({ capabilityId: 'CAP-9F2C7E', reason: 'bet was wrong; we're going a different direction' })",
|
|
1115
|
+
},
|
|
1116
|
+
{
|
|
1117
|
+
kind: "theme",
|
|
1118
|
+
idDoc: "TH-XXXXXX",
|
|
1119
|
+
idKey: "themeId",
|
|
1120
|
+
example:
|
|
1121
|
+
"archive_theme({ themeId: 'TH-OLD-AREA', reason: 'theme retired; remaining bets re-parented' })",
|
|
1122
|
+
},
|
|
1123
|
+
];
|
|
1124
|
+
const out = [];
|
|
1125
|
+
for (const { kind, idDoc, idKey, example } of kinds) {
|
|
1126
|
+
const idSchema = { [idKey]: { type: "string", description: idDoc } };
|
|
1127
|
+
out.push({
|
|
1128
|
+
name: `archive_${kind}`,
|
|
1129
|
+
description:
|
|
1130
|
+
`Archive a ${kind} (soft delete). The row stays in the workspace; list views filter it out, by-id lookups still resolve.\n\n` +
|
|
1131
|
+
`USE WHEN: a ${kind} is no longer relevant — cut from scope, superseded, or retired. Soft delete preserves history without cluttering the active roadmap.\n` +
|
|
1132
|
+
"PREREQUISITE: get_agents_md once this session (enforced). For capabilities/themes, every active child must already be archived — the server refuses with a count of blocking children. For tasks, no child check.\n" +
|
|
1133
|
+
`ANTI-PATTERN: do not archive a ${kind} you might come back to within the same session — prefer moving it (move_${kind === "theme" ? "capability" : kind}) or updating its status. Archive is the right tool for "this is closed out, get it out of the picker."\n` +
|
|
1134
|
+
`EXAMPLE: ${example}\n\n` +
|
|
1135
|
+
"Idempotent: re-archiving an already-archived entity returns { idempotent: true } and emits no audit row. Requires SUPABASE_SERVICE_ROLE_KEY. Pass workspaceId to target a workspace other than the env default.",
|
|
1136
|
+
inputSchema: {
|
|
1137
|
+
type: "object",
|
|
1138
|
+
properties: {
|
|
1139
|
+
...idSchema,
|
|
1140
|
+
reason: {
|
|
1141
|
+
type: "string",
|
|
1142
|
+
description:
|
|
1143
|
+
"Why this is being archived. Required — landed in the audit log so future readers know the rationale.",
|
|
1144
|
+
},
|
|
1145
|
+
idempotencyKey: { type: "string" },
|
|
1146
|
+
dryRun: { type: "boolean" },
|
|
1147
|
+
workspaceId: { type: "string" },
|
|
1148
|
+
},
|
|
1149
|
+
required: [idKey, "reason"],
|
|
1150
|
+
additionalProperties: false,
|
|
1151
|
+
},
|
|
1152
|
+
});
|
|
1153
|
+
out.push({
|
|
1154
|
+
name: `unarchive_${kind}`,
|
|
1155
|
+
description:
|
|
1156
|
+
`Unarchive a ${kind}. Reverses archive_${kind}.\n\n` +
|
|
1157
|
+
`USE WHEN: an archived ${kind} is being pulled back into scope. To move an archived entity to a different parent, call move_${kind === "theme" ? "capability" : kind} instead — that path unarchives in one step.\n` +
|
|
1158
|
+
"PREREQUISITE: get_agents_md once this session (enforced). The parent (if any) must be active — cannot unarchive a task whose capability is archived, or a capability whose theme is archived. Unarchive the parent first.\n" +
|
|
1159
|
+
"ANTI-PATTERN: do not unarchive en masse without thinking — every unarchive re-floats noise into list views. If you're recovering from an over-aggressive archive sweep, work top-down.\n" +
|
|
1160
|
+
`EXAMPLE: un${example.replace("archive", "archive")}\n\n` +
|
|
1161
|
+
"Idempotent: unarchiving an already-active entity returns { idempotent: true }. Requires SUPABASE_SERVICE_ROLE_KEY.",
|
|
1162
|
+
inputSchema: {
|
|
1163
|
+
type: "object",
|
|
1164
|
+
properties: {
|
|
1165
|
+
...idSchema,
|
|
1166
|
+
reason: { type: "string" },
|
|
1167
|
+
idempotencyKey: { type: "string" },
|
|
1168
|
+
dryRun: { type: "boolean" },
|
|
1169
|
+
workspaceId: { type: "string" },
|
|
1170
|
+
},
|
|
1171
|
+
required: [idKey, "reason"],
|
|
1172
|
+
additionalProperties: false,
|
|
1173
|
+
},
|
|
1174
|
+
});
|
|
1175
|
+
}
|
|
1176
|
+
return out;
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
/**
|
|
1180
|
+
* Four move tools: two single (move_task, move_capability) and two
|
|
1181
|
+
* bulk (move_tasks, move_capabilities). Single tools re-parent one
|
|
1182
|
+
* entity; bulk tools accept up to 100 moves and stamp a shared
|
|
1183
|
+
* batchId into each audit row so a reorg shows up in history as one
|
|
1184
|
+
* logical operation. Themes have no parent, so no move_theme.
|
|
1185
|
+
*/
|
|
1186
|
+
function moveLifecycleTools() {
|
|
1187
|
+
const kinds = [
|
|
1188
|
+
{
|
|
1189
|
+
kind: "task",
|
|
1190
|
+
kindPlural: "tasks",
|
|
1191
|
+
idKey: "taskId",
|
|
1192
|
+
idDoc: "TK-NNNNNN",
|
|
1193
|
+
parentKey: "newCapabilityId",
|
|
1194
|
+
parentDoc: "CAP-XXXXXX",
|
|
1195
|
+
parentKind: "capability",
|
|
1196
|
+
example:
|
|
1197
|
+
"move_task({ taskId: 'TK-100201', newCapabilityId: 'CAP-7A2D9F', reason: 'belongs under the auth capability, not onboarding' })",
|
|
1198
|
+
},
|
|
1199
|
+
{
|
|
1200
|
+
kind: "capability",
|
|
1201
|
+
kindPlural: "capabilities",
|
|
1202
|
+
idKey: "capabilityId",
|
|
1203
|
+
idDoc: "CAP-XXXXXX",
|
|
1204
|
+
parentKey: "newThemeId",
|
|
1205
|
+
parentDoc: "TH-NNNNNN",
|
|
1206
|
+
parentKind: "theme",
|
|
1207
|
+
example:
|
|
1208
|
+
"move_capability({ capabilityId: 'CAP-9F2C7E', newThemeId: 'TH-100042', reason: 'theme reorg — moving under Platform' })",
|
|
1209
|
+
},
|
|
1210
|
+
];
|
|
1211
|
+
const out = [];
|
|
1212
|
+
for (const { kind, kindPlural, idKey, idDoc, parentKey, parentDoc, parentKind, example } of kinds) {
|
|
1213
|
+
out.push({
|
|
1214
|
+
name: `move_${kind}`,
|
|
1215
|
+
description:
|
|
1216
|
+
`Re-parent a ${kind} under a different ${parentKind}. The ${kind}'s id stays the same (${idDoc} never changes).\n\n` +
|
|
1217
|
+
`USE WHEN: a ${kind} is in the wrong place — wrong ${parentKind} after a reorg, or initially proposed under the wrong parent. To unarchive while moving, pass the archived entity's id with a DIFFERENT active target parent; if the new parent is active the entity unarchives in one step. To unarchive in place (without moving), call unarchive_${kind} directly — move to the SAME parent short-circuits as idempotent and won't unarchive.\n` +
|
|
1218
|
+
"PREREQUISITE: get_agents_md once this session (enforced). Target parent must exist AND be active — refuses move into an archived parent.\n" +
|
|
1219
|
+
`ANTI-PATTERN: do not use move to change anything other than the parent. To rename or rescope, use update_${kind} (coming soon). To delete, use archive_${kind}.\n` +
|
|
1220
|
+
`EXAMPLE: ${example}\n\n` +
|
|
1221
|
+
"Idempotent: moving to the current parent returns { idempotent: true } and emits no audit row. Requires SUPABASE_SERVICE_ROLE_KEY.",
|
|
1222
|
+
inputSchema: {
|
|
1223
|
+
type: "object",
|
|
1224
|
+
properties: {
|
|
1225
|
+
[idKey]: { type: "string", description: idDoc },
|
|
1226
|
+
[parentKey]: { type: "string", description: `Target ${parentKind} id (${parentDoc}).` },
|
|
1227
|
+
reason: {
|
|
1228
|
+
type: "string",
|
|
1229
|
+
description:
|
|
1230
|
+
"Why this is being moved. Required — landed in the audit log so future readers know the rationale.",
|
|
1231
|
+
},
|
|
1232
|
+
dryRun: { type: "boolean" },
|
|
1233
|
+
workspaceId: { type: "string" },
|
|
1234
|
+
},
|
|
1235
|
+
required: [idKey, parentKey, "reason"],
|
|
1236
|
+
additionalProperties: false,
|
|
1237
|
+
},
|
|
1238
|
+
});
|
|
1239
|
+
const bulkExample =
|
|
1240
|
+
kind === "task"
|
|
1241
|
+
? `move_tasks({ moves: [{ taskId: 'TK-100201', newCapabilityId: 'CAP-7A2D9F' }, { taskId: 'TK-100202', newCapabilityId: 'CAP-7A2D9F' }], reason: 'auth reorg — pulling these under the new auth capability' })`
|
|
1242
|
+
: `move_capabilities({ moves: [{ capabilityId: 'CAP-9F2C7E', newThemeId: 'TH-100042' }, { capabilityId: 'CAP-9F2C7F', newThemeId: 'TH-100042' }], reason: 'theme reorg — moving under Platform' })`;
|
|
1243
|
+
out.push({
|
|
1244
|
+
name: `move_${kindPlural}`,
|
|
1245
|
+
description:
|
|
1246
|
+
`Bulk re-parent up to 100 ${kindPlural} in one call. Each item lists the entity id and its new ${parentKind}; the server stamps a shared batchId so the audit log groups the reorg as one logical operation.\n\n` +
|
|
1247
|
+
`USE WHEN: you have a planned reorg touching many ${kindPlural} at once — splitting a ${parentKind}, merging two, or rebalancing scope. The shared batchId is what makes a 50-row reorg show up as one event in history rather than 50 disconnected moves.\n` +
|
|
1248
|
+
"PREREQUISITE: get_agents_md once this session (enforced). Each move follows the same rules as the single tool — target parent active, reason required. Partial failures: the server processes moves one at a time and returns per-item ok/error; later failures do not roll back earlier successes.\n" +
|
|
1249
|
+
"ANTI-PATTERN: do not loop the single tool when you have a batch — you lose the batchId grouping in audit history. Conversely, do not use bulk for a single move; the single tool has a cleaner response shape.\n" +
|
|
1250
|
+
`EXAMPLE: ${bulkExample}`,
|
|
1251
|
+
inputSchema: {
|
|
1252
|
+
type: "object",
|
|
1253
|
+
properties: {
|
|
1254
|
+
moves: {
|
|
1255
|
+
type: "array",
|
|
1256
|
+
minItems: 1,
|
|
1257
|
+
maxItems: 100,
|
|
1258
|
+
items: {
|
|
1259
|
+
type: "object",
|
|
1260
|
+
properties: {
|
|
1261
|
+
[idKey]: { type: "string", description: idDoc },
|
|
1262
|
+
[parentKey]: { type: "string", description: parentDoc },
|
|
1263
|
+
},
|
|
1264
|
+
required: [idKey, parentKey],
|
|
1265
|
+
additionalProperties: false,
|
|
1266
|
+
},
|
|
1267
|
+
description: "Up to 100 moves to apply with a shared batchId.",
|
|
1268
|
+
},
|
|
1269
|
+
reason: {
|
|
1270
|
+
type: "string",
|
|
1271
|
+
description:
|
|
1272
|
+
"Shared rationale for the reorg — written to every audit row in the batch.",
|
|
1273
|
+
},
|
|
1274
|
+
dryRun: { type: "boolean" },
|
|
1275
|
+
workspaceId: { type: "string" },
|
|
1276
|
+
},
|
|
1277
|
+
required: ["moves", "reason"],
|
|
1278
|
+
additionalProperties: false,
|
|
1279
|
+
},
|
|
1280
|
+
});
|
|
1281
|
+
}
|
|
1282
|
+
return out;
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
/**
|
|
1286
|
+
* Three update tools: one per kind. Each accepts a `patch` object
|
|
1287
|
+
* whose keys are the fields to mutate. Schemas enumerate the
|
|
1288
|
+
* mutable fields per kind so an agent can introspect what's
|
|
1289
|
+
* settable. Parent fields and lifecycle flags are NOT listed — the
|
|
1290
|
+
* server also rejects them, but advertising them up-front prevents
|
|
1291
|
+
* the round-trip.
|
|
1292
|
+
*/
|
|
1293
|
+
function updateLifecycleTools() {
|
|
1294
|
+
const kinds = [
|
|
1295
|
+
{
|
|
1296
|
+
kind: "task",
|
|
1297
|
+
idKey: "taskId",
|
|
1298
|
+
idDoc: "TK-NNNNNN",
|
|
1299
|
+
patchProps: {
|
|
1300
|
+
title: { type: "string", description: "Task title. Minimum 5 chars." },
|
|
1301
|
+
summary: { type: "string", description: "Free-form description." },
|
|
1302
|
+
status: {
|
|
1303
|
+
type: "string",
|
|
1304
|
+
enum: ["delivered", "in_progress", "planned", "exploring"],
|
|
1305
|
+
},
|
|
1306
|
+
priority: { type: "string", enum: ["P0", "P1", "P2", "P3"] },
|
|
1307
|
+
effort: { type: "string", enum: ["XS", "S", "M", "L", "XL"] },
|
|
1308
|
+
kind: { type: "string", enum: ["feature", "bug", "chore", "spike"] },
|
|
1309
|
+
start: { type: "string", description: "ISO date YYYY-MM-DD." },
|
|
1310
|
+
target: { type: "string", description: "ISO date YYYY-MM-DD." },
|
|
1311
|
+
progress: { type: "number", description: "0–100." },
|
|
1312
|
+
owner: { type: "string" },
|
|
1313
|
+
team: { type: "string" },
|
|
1314
|
+
tags: { type: "array", items: { type: "string" } },
|
|
1315
|
+
acceptance: { type: "array" },
|
|
1316
|
+
dependsOn: { type: "array", items: { type: "string" } },
|
|
1317
|
+
links: { type: "object", additionalProperties: { type: "string" } },
|
|
1318
|
+
expectedPRs: {
|
|
1319
|
+
type: "number",
|
|
1320
|
+
description: "Advisory: max merged PRs for this task (overrun → audit warning).",
|
|
1321
|
+
},
|
|
1322
|
+
expectedScope: {
|
|
1323
|
+
type: "number",
|
|
1324
|
+
description: "Advisory: cumulative LoC ceiling across linked PRs.",
|
|
1325
|
+
},
|
|
1326
|
+
},
|
|
1327
|
+
example:
|
|
1328
|
+
"update_task({ taskId: 'TK-100201', patch: { status: 'in_progress', progress: 25 }, reason: 'work started — kicking off this week' })",
|
|
1329
|
+
},
|
|
1330
|
+
{
|
|
1331
|
+
kind: "capability",
|
|
1332
|
+
idKey: "capabilityId",
|
|
1333
|
+
idDoc: "CAP-XXXXXX",
|
|
1334
|
+
patchProps: {
|
|
1335
|
+
name: { type: "string", description: "Capability name. Minimum 8 chars." },
|
|
1336
|
+
outcome: { type: "string", description: "Falsifiable outcome statement." },
|
|
1337
|
+
hypothesis: { type: "string" },
|
|
1338
|
+
owner: { type: "string" },
|
|
1339
|
+
team: { type: "string" },
|
|
1340
|
+
confidence: { type: "number", description: "0–95." },
|
|
1341
|
+
impact: { type: "number", description: "One of 0.25, 0.5, 1, 2, 3." },
|
|
1342
|
+
roi: { type: "number" },
|
|
1343
|
+
tags: { type: "array", items: { type: "string" } },
|
|
1344
|
+
links: { type: "object", additionalProperties: { type: "string" } },
|
|
1345
|
+
},
|
|
1346
|
+
example:
|
|
1347
|
+
"update_capability({ capabilityId: 'CAP-9F2C7E', patch: { confidence: 80, outcome: 'Activation moves from 32% to 55% by 2026-09-30, measured by mixpanel_activated_v2.' }, reason: 'sharper outcome after the leadership review' })",
|
|
1348
|
+
},
|
|
1349
|
+
{
|
|
1350
|
+
kind: "theme",
|
|
1351
|
+
idKey: "themeId",
|
|
1352
|
+
idDoc: "TH-XXXXXX",
|
|
1353
|
+
patchProps: {
|
|
1354
|
+
name: { type: "string", description: "Theme name. Minimum 5 chars." },
|
|
1355
|
+
description: { type: "string" },
|
|
1356
|
+
owner: { type: "string" },
|
|
1357
|
+
targetRoi: { type: "number" },
|
|
1358
|
+
},
|
|
1359
|
+
example:
|
|
1360
|
+
"update_theme({ themeId: 'TH-100042', patch: { name: 'Platform Reliability' }, reason: 'sharper name; same scope' })",
|
|
1361
|
+
},
|
|
1362
|
+
];
|
|
1363
|
+
return kinds.map(({ kind, idKey, idDoc, patchProps, example }) => {
|
|
1364
|
+
// Themes are roots (no parent), so the "use move_*" guidance
|
|
1365
|
+
// doesn't apply to them. Per-kind reparenting hint:
|
|
1366
|
+
const reparentHint =
|
|
1367
|
+
kind === "task"
|
|
1368
|
+
? "Reparenting must use move_task — passing capabilityId/id/archived in the patch is rejected server-side."
|
|
1369
|
+
: kind === "capability"
|
|
1370
|
+
? "Reparenting must use move_capability — passing pillarId/id/archived in the patch is rejected server-side."
|
|
1371
|
+
: "Themes are top-level (no parent). Passing id/archived in the patch is rejected server-side; use archive_theme to retire.";
|
|
1372
|
+
return {
|
|
1373
|
+
name: `update_${kind}`,
|
|
1374
|
+
description:
|
|
1375
|
+
`Patch fields on a ${kind}. The patch is a partial object — only the keys you include are touched.\n\n` +
|
|
1376
|
+
`USE WHEN: a ${kind}'s details need to change. Renaming, sharpening outcomes, bumping confidence, fixing typos, advancing status, adding tags — all here.\n` +
|
|
1377
|
+
`PREREQUISITE: get_agents_md once this session (enforced). Reason required (audit trail). ${reparentHint}\n` +
|
|
1378
|
+
`ANTI-PATTERN: do not echo the entity back to the server — pass only the keys that changed. The server diffs against current state and a patch that matches everything returns { idempotent: true }.\n` +
|
|
1379
|
+
`EXAMPLE: ${example}\n\n` +
|
|
1380
|
+
"Idempotent: a patch where every key already matches current state returns { idempotent: true } and emits no audit row. Requires SUPABASE_SERVICE_ROLE_KEY.",
|
|
1381
|
+
inputSchema: {
|
|
1382
|
+
type: "object",
|
|
1383
|
+
properties: {
|
|
1384
|
+
[idKey]: { type: "string", description: idDoc },
|
|
1385
|
+
patch: {
|
|
1386
|
+
type: "object",
|
|
1387
|
+
description: `Partial ${kind} — keys to update. Parent fields and lifecycle flags are rejected.`,
|
|
1388
|
+
properties: patchProps,
|
|
1389
|
+
additionalProperties: false,
|
|
1390
|
+
minProperties: 1,
|
|
1391
|
+
},
|
|
1392
|
+
reason: {
|
|
1393
|
+
type: "string",
|
|
1394
|
+
description: "Why this is being updated. Required — landed in the audit log.",
|
|
1395
|
+
},
|
|
1396
|
+
dryRun: { type: "boolean" },
|
|
1397
|
+
workspaceId: { type: "string" },
|
|
1398
|
+
},
|
|
1399
|
+
required: [idKey, "patch", "reason"],
|
|
1400
|
+
additionalProperties: false,
|
|
1401
|
+
},
|
|
1402
|
+
};
|
|
1403
|
+
});
|
|
1404
|
+
}
|
|
1405
|
+
|
|
1406
|
+
/** Tools that mutate the workspace — all gated on rubric fetch. */
|
|
1407
|
+
const MUTATOR_TOOLS = new Set([
|
|
1408
|
+
"propose_task",
|
|
1409
|
+
"propose_theme",
|
|
1410
|
+
"propose_capability",
|
|
1411
|
+
"submit_acceptance_grades",
|
|
1412
|
+
"link_pr",
|
|
1413
|
+
"archive_task",
|
|
1414
|
+
"archive_capability",
|
|
1415
|
+
"archive_theme",
|
|
1416
|
+
"unarchive_task",
|
|
1417
|
+
"unarchive_capability",
|
|
1418
|
+
"unarchive_theme",
|
|
1419
|
+
"move_task",
|
|
1420
|
+
"move_capability",
|
|
1421
|
+
"move_tasks",
|
|
1422
|
+
"move_capabilities",
|
|
1423
|
+
"update_task",
|
|
1424
|
+
"update_capability",
|
|
1425
|
+
"update_theme",
|
|
1426
|
+
"record_outcome_reading",
|
|
1427
|
+
]);
|
|
1428
|
+
|
|
1429
|
+
async function callTool(name, args) {
|
|
1430
|
+
// Each tool may override the workspace via args.workspaceId. The
|
|
1431
|
+
// projection is workspace-scoped, so we pass that through to the
|
|
1432
|
+
// read. Tools that need to know the resolved id later (write paths,
|
|
1433
|
+
// snapshot) read it back via resolveWorkspaceId(args?.workspaceId).
|
|
1434
|
+
const wsId = resolveWorkspaceId(args?.workspaceId);
|
|
1435
|
+
// Post-Piece-6c, the entity tables ARE the canonical projection
|
|
1436
|
+
// — no edits blob, no seed-overlay merge. Fall back to the
|
|
1437
|
+
// bundled seed only when the DB is unreachable (offline / dev).
|
|
1438
|
+
const projected =
|
|
1439
|
+
(await readWorkspaceProjected(wsId)) ?? project(readSeed(), {});
|
|
1440
|
+
|
|
1441
|
+
// Rubric gate. The agent must have called get_agents_md this
|
|
1442
|
+
// session before any mutator runs — the rubric defines validation
|
|
1443
|
+
// shapes the mutator enforces, and we'd rather block the call
|
|
1444
|
+
// than let a malformed proposal land in the roadmap. Read-only
|
|
1445
|
+
// tools and get_agents_md itself are always available.
|
|
1446
|
+
if (MUTATOR_TOOLS.has(name)) {
|
|
1447
|
+
session.mutatorAttempts += 1;
|
|
1448
|
+
// Best-effort target attribution for telemetry. Pulled here so
|
|
1449
|
+
// every mutator branch below shares it. The arg names are
|
|
1450
|
+
// inconsistent across tools (id / taskId / capabilityId /
|
|
1451
|
+
// themeId / pillarId), and bulk reorgs (move_tasks /
|
|
1452
|
+
// move_capabilities) nest ids inside args.moves[]. We try the
|
|
1453
|
+
// obvious candidates in a sensible priority order. The first
|
|
1454
|
+
// hit wins. Null when the tool genuinely doesn't carry a
|
|
1455
|
+
// target (e.g. propose_* calls that create a new record where
|
|
1456
|
+
// the id only exists after the server stamps it).
|
|
1457
|
+
const firstMove = Array.isArray(args?.moves) ? args.moves[0] : null;
|
|
1458
|
+
const targetId =
|
|
1459
|
+
(typeof args?.id === "string" && args.id) ||
|
|
1460
|
+
(typeof args?.taskId === "string" && args.taskId) ||
|
|
1461
|
+
(typeof args?.capabilityId === "string" && args.capabilityId) ||
|
|
1462
|
+
(typeof args?.themeId === "string" && args.themeId) ||
|
|
1463
|
+
(typeof args?.pillarId === "string" && args.pillarId) ||
|
|
1464
|
+
(firstMove &&
|
|
1465
|
+
typeof firstMove.taskId === "string" &&
|
|
1466
|
+
firstMove.taskId) ||
|
|
1467
|
+
(firstMove &&
|
|
1468
|
+
typeof firstMove.capabilityId === "string" &&
|
|
1469
|
+
firstMove.capabilityId) ||
|
|
1470
|
+
null;
|
|
1471
|
+
if (session.rubricFetchedAt === null) {
|
|
1472
|
+
session.mutatorBlocks += 1;
|
|
1473
|
+
recordTelemetry(
|
|
1474
|
+
"mutator_blocked_no_rubric",
|
|
1475
|
+
{ tool: name, targetId },
|
|
1476
|
+
wsId
|
|
1477
|
+
);
|
|
1478
|
+
return rubricMissingResult(name);
|
|
1479
|
+
}
|
|
1480
|
+
// Per-tool discovery gates. Block propose_theme until the agent
|
|
1481
|
+
// has actually inspected the existing theme catalogue, and
|
|
1482
|
+
// propose_capability until they've ranked existing caps for fit.
|
|
1483
|
+
// Tool descriptions already steer agents this way; this turns
|
|
1484
|
+
// the recommendation into enforcement so the most common
|
|
1485
|
+
// failure mode (creating a duplicate of an existing record) can't
|
|
1486
|
+
// slip through when the LLM skipped the discovery step.
|
|
1487
|
+
if (name === "propose_theme" && session.themesListedAt === null) {
|
|
1488
|
+
session.mutatorBlocks += 1;
|
|
1489
|
+
recordTelemetry(
|
|
1490
|
+
"mutator_blocked_no_discovery",
|
|
1491
|
+
{ tool: name, missing: "suggest_theme_for", targetId },
|
|
1492
|
+
wsId
|
|
1493
|
+
);
|
|
1494
|
+
return discoveryMissingResult(
|
|
1495
|
+
name,
|
|
1496
|
+
'suggest_theme_for({ description: "<the work you are about to propose>" })',
|
|
1497
|
+
"Rank existing themes by relevance before proposing a new one — themes are years-stable, duplicates are the most common failure mode. Any returned top score >0.4 means an existing theme is a sensible home; re-use it. list_themes() or get_roadmap_snapshot() also satisfy this gate if you want the full catalogue."
|
|
1498
|
+
);
|
|
1499
|
+
}
|
|
1500
|
+
if (
|
|
1501
|
+
name === "propose_capability" &&
|
|
1502
|
+
session.capsDiscoveredAt === null
|
|
1503
|
+
) {
|
|
1504
|
+
session.mutatorBlocks += 1;
|
|
1505
|
+
recordTelemetry(
|
|
1506
|
+
"mutator_blocked_no_discovery",
|
|
1507
|
+
{
|
|
1508
|
+
tool: name,
|
|
1509
|
+
missing: "suggest_capability_for",
|
|
1510
|
+
targetId,
|
|
1511
|
+
},
|
|
1512
|
+
wsId
|
|
1513
|
+
);
|
|
1514
|
+
return discoveryMissingResult(
|
|
1515
|
+
name,
|
|
1516
|
+
'suggest_capability_for({ description: "<the work you are about to propose>" })',
|
|
1517
|
+
"Rank existing capabilities by relevance before proposing a new one. If any score is >0.4, attach tasks there instead."
|
|
1518
|
+
);
|
|
1519
|
+
}
|
|
1520
|
+
// Cross-workspace guard. If the cwd has a .roadmapper/snapshot.json
|
|
1521
|
+
// naming a workspace, and the call carries an explicit workspaceId
|
|
1522
|
+
// pointing somewhere else, refuse — almost always a mistake. An
|
|
1523
|
+
// operator who really needs to write across workspaces can set
|
|
1524
|
+
// ROADMAPPER_ALLOW_CROSS_WORKSPACE=1 in env to bypass.
|
|
1525
|
+
const snap = snapshotWorkspaceId();
|
|
1526
|
+
const argWs = args?.workspaceId;
|
|
1527
|
+
if (
|
|
1528
|
+
snap &&
|
|
1529
|
+
typeof argWs === "string" &&
|
|
1530
|
+
argWs.length > 0 &&
|
|
1531
|
+
argWs !== snap &&
|
|
1532
|
+
process.env.ROADMAPPER_ALLOW_CROSS_WORKSPACE !== "1"
|
|
1533
|
+
) {
|
|
1534
|
+
session.mutatorBlocks += 1;
|
|
1535
|
+
recordTelemetry(
|
|
1536
|
+
"mutator_blocked_cross_workspace",
|
|
1537
|
+
{ tool: name, targetId, cwdWorkspace: snap, argWorkspace: argWs },
|
|
1538
|
+
wsId
|
|
1539
|
+
);
|
|
1540
|
+
return errorResult(
|
|
1541
|
+
`Refusing cross-workspace write: cwd's .roadmapper/snapshot.json names workspace "${snap}" but ${name} call targets "${argWs}". Almost always a mistake — drop the workspaceId arg to use the cwd default, or set ROADMAPPER_ALLOW_CROSS_WORKSPACE=1 to override.`
|
|
1542
|
+
);
|
|
1543
|
+
}
|
|
1544
|
+
recordTelemetry("mutator_attempted", { tool: name, targetId }, wsId);
|
|
1545
|
+
}
|
|
1546
|
+
|
|
1547
|
+
switch (name) {
|
|
1548
|
+
case "list_themes": {
|
|
1549
|
+
// Satisfies the propose_theme discovery gate. The agent has
|
|
1550
|
+
// explicitly enumerated the existing theme catalogue.
|
|
1551
|
+
session.themesListedAt = Date.now();
|
|
1552
|
+
let filtered = projected.themes;
|
|
1553
|
+
if (!args?.includeArchived) {
|
|
1554
|
+
filtered = filtered.filter((t) => !t.archived);
|
|
1555
|
+
}
|
|
1556
|
+
return withReminder(
|
|
1557
|
+
"list_themes",
|
|
1558
|
+
projected,
|
|
1559
|
+
textResult(JSON.stringify(filtered, null, 2))
|
|
1560
|
+
);
|
|
1561
|
+
}
|
|
1562
|
+
case "list_capabilities": {
|
|
1563
|
+
// Counts as cap discovery for the propose_capability gate.
|
|
1564
|
+
session.capsDiscoveredAt = Date.now();
|
|
1565
|
+
let filtered = args?.themeId
|
|
1566
|
+
? projected.capabilities.filter((c) => c.pillarId === args.themeId)
|
|
1567
|
+
: projected.capabilities;
|
|
1568
|
+
if (!args?.includeDelivered) {
|
|
1569
|
+
filtered = filtered.filter(
|
|
1570
|
+
(c) => effectiveCapabilityStatus(c, projected.tasks) !== "delivered"
|
|
1571
|
+
);
|
|
1572
|
+
}
|
|
1573
|
+
if (!args?.includeArchived) {
|
|
1574
|
+
filtered = filtered.filter((c) => !c.archived);
|
|
1575
|
+
}
|
|
1576
|
+
return withReminder(
|
|
1577
|
+
"list_capabilities",
|
|
1578
|
+
projected,
|
|
1579
|
+
textResult(JSON.stringify(filtered, null, 2))
|
|
1580
|
+
);
|
|
1581
|
+
}
|
|
1582
|
+
case "list_tasks": {
|
|
1583
|
+
let filtered = projected.tasks;
|
|
1584
|
+
if (args?.capabilityId)
|
|
1585
|
+
filtered = filtered.filter((t) => t.capabilityId === args.capabilityId);
|
|
1586
|
+
if (args?.status)
|
|
1587
|
+
filtered = filtered.filter((t) => t.status === args.status);
|
|
1588
|
+
if (!args?.includeArchived) {
|
|
1589
|
+
filtered = filtered.filter((t) => !t.archived);
|
|
1590
|
+
}
|
|
1591
|
+
return withReminder(
|
|
1592
|
+
"list_tasks",
|
|
1593
|
+
projected,
|
|
1594
|
+
textResult(JSON.stringify(filtered, null, 2))
|
|
1595
|
+
);
|
|
1596
|
+
}
|
|
1597
|
+
case "get_task": {
|
|
1598
|
+
// S5: direct-by-id lookups always resolve, even for archived
|
|
1599
|
+
// entities. Cross-references (PR links, dependsOn) need this.
|
|
1600
|
+
const t = projected.tasks.find((x) => x.id === args?.id);
|
|
1601
|
+
if (!t) return errorResult(`Task ${args?.id} not found.`);
|
|
1602
|
+
return textResult(JSON.stringify(t, null, 2));
|
|
1603
|
+
}
|
|
1604
|
+
case "get_agents_md": {
|
|
1605
|
+
const fresh = session.rubricFetchedAt === null;
|
|
1606
|
+
session.rubricFetchedAt = Date.now();
|
|
1607
|
+
if (fresh) recordTelemetry("rubric_fetched", { via: "tool" }, wsId);
|
|
1608
|
+
return textResult(readAgentsMd(), {
|
|
1609
|
+
_meta: {
|
|
1610
|
+
roadmapper: {
|
|
1611
|
+
reminder:
|
|
1612
|
+
"Rubric loaded. You can now safely call propose_task, propose_capability, propose_theme, submit_acceptance_grades, link_pr.",
|
|
1613
|
+
},
|
|
1614
|
+
},
|
|
1615
|
+
});
|
|
1616
|
+
}
|
|
1617
|
+
case "get_roadmap_snapshot": {
|
|
1618
|
+
// The snapshot returns themes + active caps + open tasks in a
|
|
1619
|
+
// single response, so the agent has effectively enumerated both
|
|
1620
|
+
// catalogues. Satisfies BOTH discovery gates.
|
|
1621
|
+
const ts = Date.now();
|
|
1622
|
+
session.themesListedAt = ts;
|
|
1623
|
+
session.capsDiscoveredAt = ts;
|
|
1624
|
+
return withReminder(
|
|
1625
|
+
"get_roadmap_snapshot",
|
|
1626
|
+
projected,
|
|
1627
|
+
getRoadmapSnapshot(projected, wsId, args?.includeArchived === true)
|
|
1628
|
+
);
|
|
1629
|
+
}
|
|
1630
|
+
case "propose_task":
|
|
1631
|
+
return proposeTask(args, projected, wsId);
|
|
1632
|
+
case "propose_theme":
|
|
1633
|
+
return proposeTheme(args, projected, wsId);
|
|
1634
|
+
case "propose_capability":
|
|
1635
|
+
return proposeCapability(args, projected, wsId);
|
|
1636
|
+
case "submit_acceptance_grades":
|
|
1637
|
+
return submitAcceptanceGrades(args, projected, wsId);
|
|
1638
|
+
case "suggest_capability_for":
|
|
1639
|
+
// Counts as cap discovery — the agent has explicitly asked
|
|
1640
|
+
// the server to rank existing caps for fit against the work
|
|
1641
|
+
// they're about to propose.
|
|
1642
|
+
session.capsDiscoveredAt = Date.now();
|
|
1643
|
+
return suggestCapabilityFor(args, projected);
|
|
1644
|
+
case "suggest_theme_for":
|
|
1645
|
+
// Mirror of the cap case: satisfies the propose_theme gate,
|
|
1646
|
+
// because the agent has explicitly asked the server to rank
|
|
1647
|
+
// existing themes for fit.
|
|
1648
|
+
session.themesListedAt = Date.now();
|
|
1649
|
+
return suggestThemeFor(args, projected);
|
|
1650
|
+
case "link_pr":
|
|
1651
|
+
return linkPR(args, projected, seed, wsId);
|
|
1652
|
+
case "archive_task":
|
|
1653
|
+
return archiveLifecycle("task", "archive", args, wsId);
|
|
1654
|
+
case "archive_capability":
|
|
1655
|
+
return archiveLifecycle("capability", "archive", args, wsId);
|
|
1656
|
+
case "archive_theme":
|
|
1657
|
+
return archiveLifecycle("theme", "archive", args, wsId);
|
|
1658
|
+
case "unarchive_task":
|
|
1659
|
+
return archiveLifecycle("task", "unarchive", args, wsId);
|
|
1660
|
+
case "unarchive_capability":
|
|
1661
|
+
return archiveLifecycle("capability", "unarchive", args, wsId);
|
|
1662
|
+
case "unarchive_theme":
|
|
1663
|
+
return archiveLifecycle("theme", "unarchive", args, wsId);
|
|
1664
|
+
case "move_task":
|
|
1665
|
+
return moveEntity("task", args, wsId);
|
|
1666
|
+
case "move_capability":
|
|
1667
|
+
return moveEntity("capability", args, wsId);
|
|
1668
|
+
case "move_tasks":
|
|
1669
|
+
return moveBulk("task", args, wsId);
|
|
1670
|
+
case "move_capabilities":
|
|
1671
|
+
return moveBulk("capability", args, wsId);
|
|
1672
|
+
case "update_task":
|
|
1673
|
+
return updateEntity("task", args, wsId, projected);
|
|
1674
|
+
case "update_capability":
|
|
1675
|
+
return updateEntity("capability", args, wsId, projected);
|
|
1676
|
+
case "update_theme":
|
|
1677
|
+
return updateEntity("theme", args, wsId, projected);
|
|
1678
|
+
case "record_outcome_reading":
|
|
1679
|
+
return recordOutcomeReading(args, wsId, projected);
|
|
1680
|
+
case "list_stale_outcomes":
|
|
1681
|
+
return listStaleOutcomes(args, projected);
|
|
1682
|
+
default:
|
|
1683
|
+
return errorResult(`Unknown tool: ${name}`);
|
|
1684
|
+
}
|
|
1685
|
+
}
|
|
1686
|
+
|
|
1687
|
+
async function proposeTask(args, projected, wsId) {
|
|
1688
|
+
const cap = projected.capabilities.find((c) => c.id === args.capabilityId);
|
|
1689
|
+
if (!cap) return errorResult(`Capability ${args.capabilityId} not found.`);
|
|
1690
|
+
const titleErr = validateName(args.title, 5);
|
|
1691
|
+
if (titleErr) return errorResult(titleErr);
|
|
1692
|
+
if (args.effort && !VALID_EFFORTS.has(args.effort))
|
|
1693
|
+
return errorResult(`Invalid effort ${args.effort}.`);
|
|
1694
|
+
if (args.priority && !VALID_PRIORITIES.has(args.priority))
|
|
1695
|
+
return errorResult(`Invalid priority ${args.priority}.`);
|
|
1696
|
+
if (args.kind && !VALID_KINDS.has(args.kind))
|
|
1697
|
+
return errorResult(`Invalid kind ${args.kind}.`);
|
|
1698
|
+
if (
|
|
1699
|
+
args.expectedPRs !== undefined &&
|
|
1700
|
+
(typeof args.expectedPRs !== "number" || args.expectedPRs <= 0)
|
|
1701
|
+
)
|
|
1702
|
+
return errorResult(`expectedPRs must be a positive number, got ${args.expectedPRs}.`);
|
|
1703
|
+
if (
|
|
1704
|
+
args.expectedScope !== undefined &&
|
|
1705
|
+
(typeof args.expectedScope !== "number" || args.expectedScope <= 0)
|
|
1706
|
+
)
|
|
1707
|
+
return errorResult(`expectedScope must be a positive number, got ${args.expectedScope}.`);
|
|
1708
|
+
|
|
1709
|
+
const effort = args.effort ?? "M";
|
|
1710
|
+
const start = todayISO();
|
|
1711
|
+
// Target dates are day-resolution; round up so sub-day estimates
|
|
1712
|
+
// (XS=0.25, S=0.5) still nudge the target at least one day out.
|
|
1713
|
+
const target = addDays(start, Math.max(1, Math.ceil(EFFORT_DAYS[effort])));
|
|
1714
|
+
const id = randomTaskId();
|
|
1715
|
+
const task = {
|
|
1716
|
+
id,
|
|
1717
|
+
capabilityId: cap.id,
|
|
1718
|
+
title: cleanText(args.title),
|
|
1719
|
+
summary: cleanText(args.summary),
|
|
1720
|
+
status: "planned",
|
|
1721
|
+
priority: args.priority ?? "P2",
|
|
1722
|
+
effort,
|
|
1723
|
+
kind: args.kind ?? "feature",
|
|
1724
|
+
start,
|
|
1725
|
+
target,
|
|
1726
|
+
originalTarget: target,
|
|
1727
|
+
progress: 0,
|
|
1728
|
+
owner: args.owner?.trim() ?? "",
|
|
1729
|
+
team: cap.team ?? "",
|
|
1730
|
+
tags: [],
|
|
1731
|
+
prs: [],
|
|
1732
|
+
links: {},
|
|
1733
|
+
acceptance: args.acceptance ?? [],
|
|
1734
|
+
dependsOn: args.dependsOn ?? [],
|
|
1735
|
+
authorKind: "agent",
|
|
1736
|
+
// Advisory scope ceiling — left unset by default ("default-then-
|
|
1737
|
+
// observe" pattern). Authors who want to declare an envelope can
|
|
1738
|
+
// pass expectedPRs / expectedScope at propose-time, or set them
|
|
1739
|
+
// later via update_task. A future get_size_baseline tool will
|
|
1740
|
+
// suggest sensible defaults from observed history.
|
|
1741
|
+
...(args.expectedPRs !== undefined ? { expectedPRs: args.expectedPRs } : {}),
|
|
1742
|
+
...(args.expectedScope !== undefined ? { expectedScope: args.expectedScope } : {}),
|
|
1743
|
+
};
|
|
1744
|
+
|
|
1745
|
+
if (args.dryRun) {
|
|
1746
|
+
return textResult(
|
|
1747
|
+
JSON.stringify(
|
|
1748
|
+
{
|
|
1749
|
+
ok: true,
|
|
1750
|
+
dryRun: true,
|
|
1751
|
+
wouldCreate: task,
|
|
1752
|
+
warnings: [],
|
|
1753
|
+
message: `Would create task ${id} under ${cap.id} (${cap.name}). No record written.`,
|
|
1754
|
+
},
|
|
1755
|
+
null,
|
|
1756
|
+
2
|
|
1757
|
+
)
|
|
1758
|
+
);
|
|
1759
|
+
}
|
|
1760
|
+
|
|
1761
|
+
let rpcResult;
|
|
1762
|
+
try {
|
|
1763
|
+
// RPC does an idempotency check + append inside a row lock.
|
|
1764
|
+
// Concurrent retries with the same key collapse to one task;
|
|
1765
|
+
// concurrent calls without a key both insert distinct tasks.
|
|
1766
|
+
rpcResult = await rpcCall("propose_task", {
|
|
1767
|
+
p_workspace_id: wsId,
|
|
1768
|
+
p_task: task,
|
|
1769
|
+
p_idempotency_key: args.idempotencyKey ?? null,
|
|
1770
|
+
});
|
|
1771
|
+
} catch (e) {
|
|
1772
|
+
return errorResult(e.message);
|
|
1773
|
+
}
|
|
1774
|
+
|
|
1775
|
+
// RPC returns { task, idempotent }. When idempotent=true, an
|
|
1776
|
+
// earlier call with the same idempotencyKey already created the
|
|
1777
|
+
// task — surface that instead of pretending we just made a new one.
|
|
1778
|
+
const stored = rpcResult?.task ?? task;
|
|
1779
|
+
const idempotent = rpcResult?.idempotent === true;
|
|
1780
|
+
|
|
1781
|
+
return textResult(
|
|
1782
|
+
JSON.stringify(
|
|
1783
|
+
{
|
|
1784
|
+
ok: true,
|
|
1785
|
+
id: stored.id,
|
|
1786
|
+
capabilityId: stored.capabilityId,
|
|
1787
|
+
idempotent,
|
|
1788
|
+
message: idempotent
|
|
1789
|
+
? `Task ${stored.id} already exists with idempotencyKey ${args.idempotencyKey}; returning existing task instead of creating a duplicate.`
|
|
1790
|
+
: `Created ${stored.id} under ${cap.id} (${cap.name}). status=planned, authorKind=agent.`,
|
|
1791
|
+
},
|
|
1792
|
+
null,
|
|
1793
|
+
2
|
|
1794
|
+
)
|
|
1795
|
+
);
|
|
1796
|
+
}
|
|
1797
|
+
|
|
1798
|
+
async function proposeTheme(args, _projected /* unused — themes carry no parent */, wsId) {
|
|
1799
|
+
const nameErr = validateName(args.name, 6);
|
|
1800
|
+
if (nameErr) return errorResult(nameErr);
|
|
1801
|
+
|
|
1802
|
+
const name = cleanText(args.name);
|
|
1803
|
+
const id = randomThemeId();
|
|
1804
|
+
const theme = {
|
|
1805
|
+
id,
|
|
1806
|
+
name,
|
|
1807
|
+
description: cleanText(args.description),
|
|
1808
|
+
color: args.color || "#6366f1", // brand-indigo default; user can change
|
|
1809
|
+
...(typeof args.targetRoi === "number" ? { targetRoi: args.targetRoi } : {}),
|
|
1810
|
+
};
|
|
1811
|
+
|
|
1812
|
+
if (args.dryRun) {
|
|
1813
|
+
return textResult(
|
|
1814
|
+
JSON.stringify(
|
|
1815
|
+
{
|
|
1816
|
+
ok: true,
|
|
1817
|
+
dryRun: true,
|
|
1818
|
+
wouldCreate: theme,
|
|
1819
|
+
warnings: [],
|
|
1820
|
+
message: `Would create theme ${id} (${name}). No record written.`,
|
|
1821
|
+
},
|
|
1822
|
+
null,
|
|
1823
|
+
2
|
|
1824
|
+
)
|
|
1825
|
+
);
|
|
1826
|
+
}
|
|
1827
|
+
|
|
1828
|
+
let rpcResult;
|
|
1829
|
+
try {
|
|
1830
|
+
rpcResult = await rpcCall("propose_theme", {
|
|
1831
|
+
p_workspace_id: wsId,
|
|
1832
|
+
p_theme: theme,
|
|
1833
|
+
p_idempotency_key: args.idempotencyKey ?? null,
|
|
1834
|
+
});
|
|
1835
|
+
} catch (e) {
|
|
1836
|
+
return errorResult(e.message);
|
|
1837
|
+
}
|
|
1838
|
+
const stored = rpcResult?.theme ?? theme;
|
|
1839
|
+
const idempotent = rpcResult?.idempotent === true;
|
|
1840
|
+
return textResult(
|
|
1841
|
+
JSON.stringify(
|
|
1842
|
+
{
|
|
1843
|
+
ok: true,
|
|
1844
|
+
id: stored.id,
|
|
1845
|
+
idempotent,
|
|
1846
|
+
message: idempotent
|
|
1847
|
+
? `Theme ${stored.id} already exists with idempotencyKey ${args.idempotencyKey}; returning existing instead of duplicating.`
|
|
1848
|
+
: `Created theme ${stored.id} (${stored.name}).`,
|
|
1849
|
+
},
|
|
1850
|
+
null,
|
|
1851
|
+
2
|
|
1852
|
+
)
|
|
1853
|
+
);
|
|
1854
|
+
}
|
|
1855
|
+
|
|
1856
|
+
async function proposeCapability(args, projected, wsId) {
|
|
1857
|
+
const pillarId = (args.pillarId || "").trim();
|
|
1858
|
+
|
|
1859
|
+
// Validation order: cheap structural checks first, then rubric.
|
|
1860
|
+
const nameErr = validateName(args.name, 8);
|
|
1861
|
+
if (nameErr) return errorResult(nameErr);
|
|
1862
|
+
if (!pillarId) return errorResult("pillarId is required.");
|
|
1863
|
+
|
|
1864
|
+
// pillarId must exist in the projected view (seed + newPillars −
|
|
1865
|
+
// deletedPillarIds). The RPC catches the "deleted in this
|
|
1866
|
+
// workspace mid-session" case too.
|
|
1867
|
+
const theme = projected.themes.find((t) => t.id === pillarId);
|
|
1868
|
+
if (!theme) {
|
|
1869
|
+
return errorResult(
|
|
1870
|
+
`pillarId ${pillarId} doesn't match any known theme. Call list_themes first.`
|
|
1871
|
+
);
|
|
1872
|
+
}
|
|
1873
|
+
if (typeof args.impact === "number" && !VALID_IMPACTS.has(args.impact)) {
|
|
1874
|
+
return errorResult(
|
|
1875
|
+
`Invalid impact ${args.impact} — must be one of 3, 2, 1, 0.5, 0.25.`
|
|
1876
|
+
);
|
|
1877
|
+
}
|
|
1878
|
+
const confidenceErr = validateConfidence(args.confidence);
|
|
1879
|
+
if (confidenceErr) return errorResult(confidenceErr);
|
|
1880
|
+
if (typeof args.reach === "number" && args.reach < 0) {
|
|
1881
|
+
return errorResult(`Invalid reach ${args.reach} — must be >= 0.`);
|
|
1882
|
+
}
|
|
1883
|
+
// Outcome is required + must be falsifiable per the AGENTS.md rubric.
|
|
1884
|
+
const outcomeErr = validateOutcome(args.outcome);
|
|
1885
|
+
if (outcomeErr) return errorResult(outcomeErr);
|
|
1886
|
+
|
|
1887
|
+
const id = randomCapabilityId();
|
|
1888
|
+
const capability = {
|
|
1889
|
+
id,
|
|
1890
|
+
name: cleanText(args.name),
|
|
1891
|
+
pillarId,
|
|
1892
|
+
description: cleanText(args.description),
|
|
1893
|
+
outcome: cleanText(args.outcome),
|
|
1894
|
+
reach: typeof args.reach === "number" ? args.reach : 100,
|
|
1895
|
+
impact: typeof args.impact === "number" ? args.impact : 1,
|
|
1896
|
+
confidence: typeof args.confidence === "number" ? args.confidence : 70,
|
|
1897
|
+
...(typeof args.roi === "number" ? { roi: args.roi } : {}),
|
|
1898
|
+
...(args.specRef ? { specRef: args.specRef } : {}),
|
|
1899
|
+
};
|
|
1900
|
+
|
|
1901
|
+
// Soft warnings — surface, don't reject.
|
|
1902
|
+
const warnings = [];
|
|
1903
|
+
const roiWarn = warnRoiVsTheme(capability.roi, theme);
|
|
1904
|
+
if (roiWarn) warnings.push(roiWarn);
|
|
1905
|
+
|
|
1906
|
+
if (args.dryRun) {
|
|
1907
|
+
return textResult(
|
|
1908
|
+
JSON.stringify(
|
|
1909
|
+
{
|
|
1910
|
+
ok: true,
|
|
1911
|
+
dryRun: true,
|
|
1912
|
+
wouldCreate: capability,
|
|
1913
|
+
warnings,
|
|
1914
|
+
message: `Would create capability ${id} (${capability.name}) under ${theme.id} (${theme.name}). No record written.`,
|
|
1915
|
+
},
|
|
1916
|
+
null,
|
|
1917
|
+
2
|
|
1918
|
+
)
|
|
1919
|
+
);
|
|
1920
|
+
}
|
|
1921
|
+
|
|
1922
|
+
let rpcResult;
|
|
1923
|
+
try {
|
|
1924
|
+
rpcResult = await rpcCall("propose_capability", {
|
|
1925
|
+
p_workspace_id: wsId,
|
|
1926
|
+
p_capability: capability,
|
|
1927
|
+
p_idempotency_key: args.idempotencyKey ?? null,
|
|
1928
|
+
});
|
|
1929
|
+
} catch (e) {
|
|
1930
|
+
return errorResult(e.message);
|
|
1931
|
+
}
|
|
1932
|
+
const stored = rpcResult?.capability ?? capability;
|
|
1933
|
+
const idempotent = rpcResult?.idempotent === true;
|
|
1934
|
+
return textResult(
|
|
1935
|
+
JSON.stringify(
|
|
1936
|
+
{
|
|
1937
|
+
ok: true,
|
|
1938
|
+
id: stored.id,
|
|
1939
|
+
pillarId: stored.pillarId,
|
|
1940
|
+
idempotent,
|
|
1941
|
+
warnings,
|
|
1942
|
+
message: idempotent
|
|
1943
|
+
? `Capability ${stored.id} already exists with idempotencyKey ${args.idempotencyKey}; returning existing instead of duplicating.`
|
|
1944
|
+
: `Created capability ${stored.id} (${stored.name}) under ${stored.pillarId}.`,
|
|
1945
|
+
},
|
|
1946
|
+
null,
|
|
1947
|
+
2
|
|
1948
|
+
)
|
|
1949
|
+
);
|
|
1950
|
+
}
|
|
1951
|
+
|
|
1952
|
+
/**
|
|
1953
|
+
* One-shot snapshot for cold-start agents. Bundles themes, active
|
|
1954
|
+
* capabilities, and in-flight tasks (status=in_progress|planned)
|
|
1955
|
+
* into a single response so the agent doesn't need three round
|
|
1956
|
+
* trips to orient. Always live — never cached.
|
|
1957
|
+
*
|
|
1958
|
+
* The response includes the resolved workspaceId so the agent knows
|
|
1959
|
+
* which id to thread back through subsequent write tools. This is
|
|
1960
|
+
* how a single MCP install operates against multiple workspaces:
|
|
1961
|
+
* the agent reads the workspaceId out of this response (or out of
|
|
1962
|
+
* `.roadmapper/snapshot.json` in the repo it's working in), then
|
|
1963
|
+
* passes that id back on `propose_task` / `propose_capability` /
|
|
1964
|
+
* `propose_theme` calls.
|
|
1965
|
+
*/
|
|
1966
|
+
function getRoadmapSnapshot(projected, wsId, includeArchived = false) {
|
|
1967
|
+
// Archived entities are filtered out by default — the snapshot
|
|
1968
|
+
// is meant to surface what an agent should plan against, and
|
|
1969
|
+
// archived rows are by definition not in scope. Pass
|
|
1970
|
+
// includeArchived=true to include them (e.g. when reviewing
|
|
1971
|
+
// historical state).
|
|
1972
|
+
const themes = includeArchived
|
|
1973
|
+
? projected.themes
|
|
1974
|
+
: projected.themes.filter((t) => !t.archived);
|
|
1975
|
+
const activeCapabilities = projected.capabilities.filter((c) => {
|
|
1976
|
+
if (!includeArchived && c.archived) return false;
|
|
1977
|
+
return effectiveCapabilityStatus(c, projected.tasks) !== "delivered";
|
|
1978
|
+
});
|
|
1979
|
+
const inFlightTasks = projected.tasks.filter((t) => {
|
|
1980
|
+
if (!includeArchived && t.archived) return false;
|
|
1981
|
+
return t.status === "in_progress" || t.status === "planned";
|
|
1982
|
+
});
|
|
1983
|
+
return textResult(
|
|
1984
|
+
JSON.stringify(
|
|
1985
|
+
{
|
|
1986
|
+
workspaceId: wsId,
|
|
1987
|
+
generatedAt: new Date().toISOString(),
|
|
1988
|
+
themes,
|
|
1989
|
+
capabilities: activeCapabilities,
|
|
1990
|
+
tasks: inFlightTasks,
|
|
1991
|
+
counts: {
|
|
1992
|
+
themes: themes.length,
|
|
1993
|
+
activeCapabilities: activeCapabilities.length,
|
|
1994
|
+
inFlightTasks: inFlightTasks.length,
|
|
1995
|
+
totalCapabilities: projected.capabilities.length,
|
|
1996
|
+
totalTasks: projected.tasks.length,
|
|
1997
|
+
},
|
|
1998
|
+
},
|
|
1999
|
+
null,
|
|
2000
|
+
2
|
|
2001
|
+
)
|
|
2002
|
+
);
|
|
2003
|
+
}
|
|
2004
|
+
|
|
2005
|
+
function suggestCapabilityFor(args, projected) {
|
|
2006
|
+
const desc = (args.description || "").trim();
|
|
2007
|
+
if (!desc) return errorResult("description is required.");
|
|
2008
|
+
const limit = Math.min(25, Math.max(1, args.limit ?? 5));
|
|
2009
|
+
|
|
2010
|
+
// Skip delivered capabilities — they're closed bets. A new PR
|
|
2011
|
+
// mapping to a delivered cap would either be wrong (work for a
|
|
2012
|
+
// different bet) or reopen-the-bet (which the user should do
|
|
2013
|
+
// explicitly, not as a side effect of agent triage).
|
|
2014
|
+
const activeCaps = projected.capabilities.filter(
|
|
2015
|
+
(c) => effectiveCapabilityStatus(c, projected.tasks) !== "delivered"
|
|
2016
|
+
);
|
|
2017
|
+
const query = tokenize(desc);
|
|
2018
|
+
const ranked = activeCaps
|
|
2019
|
+
.map((c) => {
|
|
2020
|
+
const hay = tokenize(
|
|
2021
|
+
`${c.name} ${c.description ?? ""} ${c.outcome ?? ""}`
|
|
2022
|
+
);
|
|
2023
|
+
return { capability: c, score: jaccardScore(query, hay) };
|
|
2024
|
+
})
|
|
2025
|
+
.filter((r) => r.score > 0)
|
|
2026
|
+
.sort((a, b) => b.score - a.score)
|
|
2027
|
+
.slice(0, limit)
|
|
2028
|
+
.map(({ capability, score }) => ({
|
|
2029
|
+
id: capability.id,
|
|
2030
|
+
name: capability.name,
|
|
2031
|
+
pillarId: capability.pillarId,
|
|
2032
|
+
outcome: capability.outcome,
|
|
2033
|
+
score: Number(score.toFixed(3)),
|
|
2034
|
+
}));
|
|
2035
|
+
|
|
2036
|
+
// Reminder via _meta when nothing strong came back — the model
|
|
2037
|
+
// should pause and ask the user before inventing a new capability.
|
|
2038
|
+
const topScore = ranked[0]?.score ?? 0;
|
|
2039
|
+
const meta =
|
|
2040
|
+
topScore < 0.4
|
|
2041
|
+
? {
|
|
2042
|
+
_meta: {
|
|
2043
|
+
roadmapper: {
|
|
2044
|
+
reminder:
|
|
2045
|
+
ranked.length === 0
|
|
2046
|
+
? "No existing capability is a sensible parent. Before calling propose_capability, verify with the user that a brand-new capability is warranted — capabilities are quarterly bets, not single tasks."
|
|
2047
|
+
: "No strong match (top score < 0.4). If none of the listed capabilities fit, ask the user before calling propose_capability — the top match is often closer than it scores.",
|
|
2048
|
+
},
|
|
2049
|
+
},
|
|
2050
|
+
}
|
|
2051
|
+
: undefined;
|
|
2052
|
+
|
|
2053
|
+
return textResult(
|
|
2054
|
+
JSON.stringify(
|
|
2055
|
+
{
|
|
2056
|
+
ok: true,
|
|
2057
|
+
query: desc,
|
|
2058
|
+
matches: ranked,
|
|
2059
|
+
hint:
|
|
2060
|
+
ranked.length === 0
|
|
2061
|
+
? "No existing capabilities overlap your description. propose_capability is likely the right next step."
|
|
2062
|
+
: ranked[0].score > 0.4
|
|
2063
|
+
? `Strong match: ${ranked[0].id} (${ranked[0].name}). Strongly consider attaching tasks here instead of creating a duplicate capability.`
|
|
2064
|
+
: `Weak overlap. If none of these fit, propose_capability is reasonable — but read the top match first.`,
|
|
2065
|
+
},
|
|
2066
|
+
null,
|
|
2067
|
+
2
|
|
2068
|
+
),
|
|
2069
|
+
meta
|
|
2070
|
+
);
|
|
2071
|
+
}
|
|
2072
|
+
|
|
2073
|
+
/**
|
|
2074
|
+
* Theme-level mirror of suggestCapabilityFor — ranks existing
|
|
2075
|
+
* themes by token overlap with the description and signals
|
|
2076
|
+
* whether any existing theme is a sensible home for the work.
|
|
2077
|
+
*
|
|
2078
|
+
* Themes are years-stable so the messaging is more conservative:
|
|
2079
|
+
* even a weak match should usually win over creating a new theme.
|
|
2080
|
+
* Only an empty or very-low-overlap result + explicit user intent
|
|
2081
|
+
* should lead to propose_theme.
|
|
2082
|
+
*/
|
|
2083
|
+
function suggestThemeFor(args, projected) {
|
|
2084
|
+
const desc = (args.description || "").trim();
|
|
2085
|
+
if (!desc) return errorResult("description is required.");
|
|
2086
|
+
const limit = Math.min(25, Math.max(1, args.limit ?? 5));
|
|
2087
|
+
|
|
2088
|
+
// Skip archived themes — retired strategic bets shouldn't pull
|
|
2089
|
+
// new work into a closed mandate.
|
|
2090
|
+
const activeThemes = projected.themes.filter((t) => !t.archived);
|
|
2091
|
+
const query = tokenize(desc);
|
|
2092
|
+
const ranked = activeThemes
|
|
2093
|
+
.map((t) => {
|
|
2094
|
+
const hay = tokenize(`${t.name} ${t.description ?? ""}`);
|
|
2095
|
+
return { theme: t, score: jaccardScore(query, hay) };
|
|
2096
|
+
})
|
|
2097
|
+
.filter((r) => r.score > 0)
|
|
2098
|
+
.sort((a, b) => b.score - a.score)
|
|
2099
|
+
.slice(0, limit)
|
|
2100
|
+
.map(({ theme, score }) => ({
|
|
2101
|
+
id: theme.id,
|
|
2102
|
+
name: theme.name,
|
|
2103
|
+
description: theme.description ?? "",
|
|
2104
|
+
score: Number(score.toFixed(3)),
|
|
2105
|
+
}));
|
|
2106
|
+
|
|
2107
|
+
// Reminder when nothing matches strongly — theme creation is the
|
|
2108
|
+
// years-stable decision, so even a weak match deserves a pause.
|
|
2109
|
+
const topScore = ranked[0]?.score ?? 0;
|
|
2110
|
+
const meta =
|
|
2111
|
+
topScore < 0.4
|
|
2112
|
+
? {
|
|
2113
|
+
_meta: {
|
|
2114
|
+
roadmapper: {
|
|
2115
|
+
reminder:
|
|
2116
|
+
ranked.length === 0
|
|
2117
|
+
? "No existing theme overlaps your description. Themes are years-stable, so creating a new one is a big decision — verify with the user that this represents a genuinely new strategic direction, not a reframing of an existing bet, before calling propose_theme."
|
|
2118
|
+
: "No strong match (top score < 0.4). Re-using a 'close-enough' theme is almost always the right move; ask the user before calling propose_theme.",
|
|
2119
|
+
},
|
|
2120
|
+
},
|
|
2121
|
+
}
|
|
2122
|
+
: undefined;
|
|
2123
|
+
|
|
2124
|
+
return textResult(
|
|
2125
|
+
JSON.stringify(
|
|
2126
|
+
{
|
|
2127
|
+
ok: true,
|
|
2128
|
+
query: desc,
|
|
2129
|
+
matches: ranked,
|
|
2130
|
+
hint:
|
|
2131
|
+
ranked.length === 0
|
|
2132
|
+
? "No existing theme overlaps. propose_theme MAY be appropriate, but only with explicit user confirmation that a new strategic direction is intended — themes are years-stable, not per-feature."
|
|
2133
|
+
: ranked[0].score > 0.4
|
|
2134
|
+
? `Strong match: ${ranked[0].id} (${ranked[0].name}). Attach capabilities under this theme instead of creating a new one.`
|
|
2135
|
+
: `Weak overlap. The top match is often closer than it scores; prefer that over creating a new theme unless the user explicitly asks for a new strategic direction.`,
|
|
2136
|
+
},
|
|
2137
|
+
null,
|
|
2138
|
+
2
|
|
2139
|
+
),
|
|
2140
|
+
meta
|
|
2141
|
+
);
|
|
2142
|
+
}
|
|
2143
|
+
|
|
2144
|
+
async function linkPR(args, projected, seed, wsId) {
|
|
2145
|
+
const task = projected.tasks.find((t) => t.id === args.taskId);
|
|
2146
|
+
if (!task) return errorResult(`Task ${args.taskId} not found.`);
|
|
2147
|
+
if (!args.repo || !args.number)
|
|
2148
|
+
return errorResult("repo and number are required.");
|
|
2149
|
+
|
|
2150
|
+
// Build the PR object the way the app expects.
|
|
2151
|
+
const pr = {
|
|
2152
|
+
repo: args.repo,
|
|
2153
|
+
number: args.number,
|
|
2154
|
+
...(args.title ? { title: args.title } : {}),
|
|
2155
|
+
...(typeof args.merged === "boolean" ? { merged: args.merged } : {}),
|
|
2156
|
+
...(args.mergedAt ? { mergedAt: args.mergedAt } : {}),
|
|
2157
|
+
...(args.authorGithub ? { authorGithub: args.authorGithub } : {}),
|
|
2158
|
+
...(args.authorKind ? { authorKind: args.authorKind } : {}),
|
|
2159
|
+
};
|
|
2160
|
+
|
|
2161
|
+
// The RPC can't see the seed JSON; if the task is a seed task with
|
|
2162
|
+
// no prior PR patches, we need to pass its seed prs so the RPC can
|
|
2163
|
+
// union our new PR on top rather than clobber.
|
|
2164
|
+
const seedTask = (seed?.tasks ?? []).find((t) => t.id === args.taskId);
|
|
2165
|
+
const seedPrs = seedTask?.prs ?? [];
|
|
2166
|
+
|
|
2167
|
+
let rpcResult;
|
|
2168
|
+
try {
|
|
2169
|
+
rpcResult = await rpcCall("link_pr", {
|
|
2170
|
+
p_workspace_id: wsId,
|
|
2171
|
+
p_task_id: args.taskId,
|
|
2172
|
+
p_pr: pr,
|
|
2173
|
+
p_seed_prs: seedPrs,
|
|
2174
|
+
});
|
|
2175
|
+
} catch (e) {
|
|
2176
|
+
return errorResult(e.message);
|
|
2177
|
+
}
|
|
2178
|
+
const idempotent = rpcResult?.idempotent === true;
|
|
2179
|
+
return textResult(
|
|
2180
|
+
JSON.stringify(
|
|
2181
|
+
{
|
|
2182
|
+
ok: true,
|
|
2183
|
+
taskId: args.taskId,
|
|
2184
|
+
pr: `${pr.repo}#${pr.number}`,
|
|
2185
|
+
idempotent,
|
|
2186
|
+
message: idempotent
|
|
2187
|
+
? `${pr.repo}#${pr.number} was already linked to ${args.taskId}; no change.`
|
|
2188
|
+
: `Attached ${pr.repo}#${pr.number} to ${args.taskId}.`,
|
|
2189
|
+
},
|
|
2190
|
+
null,
|
|
2191
|
+
2
|
|
2192
|
+
)
|
|
2193
|
+
);
|
|
2194
|
+
}
|
|
2195
|
+
|
|
2196
|
+
/**
|
|
2197
|
+
* Shared handler for the six archive/unarchive tools. Validates
|
|
2198
|
+
* inputs, calls the SQL RPC (which does the heavy lifting +
|
|
2199
|
+
* audit-log write), and renders the response. The SQL handles
|
|
2200
|
+
* refuse-with-children, idempotency, parent-active checks, and
|
|
2201
|
+
* audit attribution; the JS layer just routes.
|
|
2202
|
+
*/
|
|
2203
|
+
async function archiveLifecycle(kind, action, args, wsId) {
|
|
2204
|
+
const idArg =
|
|
2205
|
+
kind === "task" ? "taskId" : kind === "capability" ? "capabilityId" : "themeId";
|
|
2206
|
+
const entityId = (args?.[idArg] ?? "").trim();
|
|
2207
|
+
if (!entityId) return errorResult(`${idArg} is required.`);
|
|
2208
|
+
const reason = (args?.reason ?? "").trim();
|
|
2209
|
+
if (!reason) return errorResult("reason is required.");
|
|
2210
|
+
if (!wsId) {
|
|
2211
|
+
return errorResult(
|
|
2212
|
+
"workspaceId could not be resolved (pass workspaceId arg or set SUPABASE_WORKSPACE_ID)."
|
|
2213
|
+
);
|
|
2214
|
+
}
|
|
2215
|
+
try {
|
|
2216
|
+
const result = await rpcCall(
|
|
2217
|
+
action === "archive" ? "archive_entity" : "unarchive_entity",
|
|
2218
|
+
{
|
|
2219
|
+
p_workspace_id: wsId,
|
|
2220
|
+
p_kind: kind,
|
|
2221
|
+
p_entity_id: entityId,
|
|
2222
|
+
p_reason: reason,
|
|
2223
|
+
p_actor_label: "mcp:agent",
|
|
2224
|
+
p_idempotency_key: args?.idempotencyKey ?? null,
|
|
2225
|
+
p_dry_run: args?.dryRun === true,
|
|
2226
|
+
}
|
|
2227
|
+
);
|
|
2228
|
+
return textResult(JSON.stringify(result, null, 2));
|
|
2229
|
+
} catch (e) {
|
|
2230
|
+
return errorResult(e.message);
|
|
2231
|
+
}
|
|
2232
|
+
}
|
|
2233
|
+
|
|
2234
|
+
/**
|
|
2235
|
+
* Single-entity move. Validates inputs, calls move_entity RPC,
|
|
2236
|
+
* renders the response. SQL handles target-active check, idempotency,
|
|
2237
|
+
* U5 unarchive-on-move, and audit attribution.
|
|
2238
|
+
*/
|
|
2239
|
+
async function moveEntity(kind, args, wsId) {
|
|
2240
|
+
const idArg = kind === "task" ? "taskId" : "capabilityId";
|
|
2241
|
+
const parentArg = kind === "task" ? "newCapabilityId" : "newThemeId";
|
|
2242
|
+
const entityId = (args?.[idArg] ?? "").trim();
|
|
2243
|
+
if (!entityId) return errorResult(`${idArg} is required.`);
|
|
2244
|
+
const newParentId = (args?.[parentArg] ?? "").trim();
|
|
2245
|
+
if (!newParentId) return errorResult(`${parentArg} is required.`);
|
|
2246
|
+
const reason = (args?.reason ?? "").trim();
|
|
2247
|
+
if (!reason) return errorResult("reason is required.");
|
|
2248
|
+
if (!wsId) {
|
|
2249
|
+
return errorResult(
|
|
2250
|
+
"workspaceId could not be resolved (pass workspaceId arg or set SUPABASE_WORKSPACE_ID)."
|
|
2251
|
+
);
|
|
2252
|
+
}
|
|
2253
|
+
try {
|
|
2254
|
+
const result = await rpcCall("move_entity", {
|
|
2255
|
+
p_workspace_id: wsId,
|
|
2256
|
+
p_kind: kind,
|
|
2257
|
+
p_entity_id: entityId,
|
|
2258
|
+
p_new_parent_id: newParentId,
|
|
2259
|
+
p_reason: reason,
|
|
2260
|
+
p_actor_label: "mcp:agent",
|
|
2261
|
+
p_batch_id: null,
|
|
2262
|
+
p_dry_run: args?.dryRun === true,
|
|
2263
|
+
});
|
|
2264
|
+
return textResult(JSON.stringify(result, null, 2));
|
|
2265
|
+
} catch (e) {
|
|
2266
|
+
return errorResult(e.message);
|
|
2267
|
+
}
|
|
2268
|
+
}
|
|
2269
|
+
|
|
2270
|
+
/**
|
|
2271
|
+
* Bulk move. Validates the batch (size, shape), generates one
|
|
2272
|
+
* batchId, then issues move_entity calls in sequence stamping each
|
|
2273
|
+
* audit row with the shared id. Per-item failures don't roll back
|
|
2274
|
+
* earlier successes — the response surfaces each move's outcome so
|
|
2275
|
+
* the caller can retry the failures or surface them to the user.
|
|
2276
|
+
*/
|
|
2277
|
+
async function moveBulk(kind, args, wsId) {
|
|
2278
|
+
const idArg = kind === "task" ? "taskId" : "capabilityId";
|
|
2279
|
+
const parentArg = kind === "task" ? "newCapabilityId" : "newThemeId";
|
|
2280
|
+
const moves = args?.moves;
|
|
2281
|
+
if (!Array.isArray(moves) || moves.length === 0) {
|
|
2282
|
+
return errorResult("moves must be a non-empty array.");
|
|
2283
|
+
}
|
|
2284
|
+
if (moves.length > 100) {
|
|
2285
|
+
return errorResult(`bulk move cap is 100 (got ${moves.length}).`);
|
|
2286
|
+
}
|
|
2287
|
+
const reason = (args?.reason ?? "").trim();
|
|
2288
|
+
if (!reason) return errorResult("reason is required.");
|
|
2289
|
+
if (!wsId) {
|
|
2290
|
+
return errorResult(
|
|
2291
|
+
"workspaceId could not be resolved (pass workspaceId arg or set SUPABASE_WORKSPACE_ID)."
|
|
2292
|
+
);
|
|
2293
|
+
}
|
|
2294
|
+
// Validate every item up-front so we don't half-apply a batch
|
|
2295
|
+
// that's structurally broken — caller probably wants to fix the
|
|
2296
|
+
// payload, not see 7 successes and 3 "missing field" errors.
|
|
2297
|
+
for (let i = 0; i < moves.length; i++) {
|
|
2298
|
+
const m = moves[i];
|
|
2299
|
+
if (!m || typeof m !== "object") {
|
|
2300
|
+
return errorResult(`moves[${i}] must be an object.`);
|
|
2301
|
+
}
|
|
2302
|
+
if (typeof m[idArg] !== "string" || !m[idArg].trim()) {
|
|
2303
|
+
return errorResult(`moves[${i}].${idArg} is required.`);
|
|
2304
|
+
}
|
|
2305
|
+
if (typeof m[parentArg] !== "string" || !m[parentArg].trim()) {
|
|
2306
|
+
return errorResult(`moves[${i}].${parentArg} is required.`);
|
|
2307
|
+
}
|
|
2308
|
+
}
|
|
2309
|
+
const batchId = `batch-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
|
|
2310
|
+
const dryRun = args?.dryRun === true;
|
|
2311
|
+
const results = [];
|
|
2312
|
+
for (let i = 0; i < moves.length; i++) {
|
|
2313
|
+
const m = moves[i];
|
|
2314
|
+
try {
|
|
2315
|
+
const out = await rpcCall("move_entity", {
|
|
2316
|
+
p_workspace_id: wsId,
|
|
2317
|
+
p_kind: kind,
|
|
2318
|
+
p_entity_id: m[idArg].trim(),
|
|
2319
|
+
p_new_parent_id: m[parentArg].trim(),
|
|
2320
|
+
p_reason: reason,
|
|
2321
|
+
p_actor_label: "mcp:agent",
|
|
2322
|
+
p_batch_id: batchId,
|
|
2323
|
+
p_dry_run: dryRun,
|
|
2324
|
+
});
|
|
2325
|
+
results.push({ index: i, ...out });
|
|
2326
|
+
} catch (e) {
|
|
2327
|
+
results.push({ index: i, ok: false, error: e.message, entityId: m[idArg] });
|
|
2328
|
+
}
|
|
2329
|
+
}
|
|
2330
|
+
const okCount = results.filter((r) => r.ok).length;
|
|
2331
|
+
const failCount = results.length - okCount;
|
|
2332
|
+
return textResult(
|
|
2333
|
+
JSON.stringify(
|
|
2334
|
+
{ ok: failCount === 0, batchId, total: results.length, okCount, failCount, dryRun, results },
|
|
2335
|
+
null,
|
|
2336
|
+
2
|
|
2337
|
+
)
|
|
2338
|
+
);
|
|
2339
|
+
}
|
|
2340
|
+
|
|
2341
|
+
/**
|
|
2342
|
+
* Per-kind field validators for update_*. Enums and ranges live here
|
|
2343
|
+
* (UP3 in the spec) so the SQL function can stay structural. Returns
|
|
2344
|
+
* an array of error strings; empty array means valid.
|
|
2345
|
+
*/
|
|
2346
|
+
function validateUpdatePatch(kind, patch) {
|
|
2347
|
+
const errors = [];
|
|
2348
|
+
if (kind === "task") {
|
|
2349
|
+
if (patch.title !== undefined) {
|
|
2350
|
+
const e = validateName(patch.title, 5);
|
|
2351
|
+
if (e) errors.push(e);
|
|
2352
|
+
}
|
|
2353
|
+
if (patch.status !== undefined && !VALID_STATUSES.has(patch.status)) {
|
|
2354
|
+
errors.push(`invalid status: ${patch.status}`);
|
|
2355
|
+
}
|
|
2356
|
+
if (patch.priority !== undefined && !VALID_PRIORITIES.has(patch.priority)) {
|
|
2357
|
+
errors.push(`invalid priority: ${patch.priority}`);
|
|
2358
|
+
}
|
|
2359
|
+
if (patch.effort !== undefined && !VALID_EFFORTS.has(patch.effort)) {
|
|
2360
|
+
errors.push(`invalid effort: ${patch.effort}`);
|
|
2361
|
+
}
|
|
2362
|
+
if (patch.kind !== undefined && !VALID_KINDS.has(patch.kind)) {
|
|
2363
|
+
errors.push(`invalid kind: ${patch.kind}`);
|
|
2364
|
+
}
|
|
2365
|
+
if (
|
|
2366
|
+
patch.progress !== undefined &&
|
|
2367
|
+
(typeof patch.progress !== "number" || patch.progress < 0 || patch.progress > 100)
|
|
2368
|
+
) {
|
|
2369
|
+
errors.push(`progress must be 0–100, got ${patch.progress}.`);
|
|
2370
|
+
}
|
|
2371
|
+
if (
|
|
2372
|
+
patch.expectedPRs !== undefined &&
|
|
2373
|
+
(typeof patch.expectedPRs !== "number" || patch.expectedPRs <= 0)
|
|
2374
|
+
) {
|
|
2375
|
+
errors.push(`expectedPRs must be a positive number, got ${patch.expectedPRs}.`);
|
|
2376
|
+
}
|
|
2377
|
+
if (
|
|
2378
|
+
patch.expectedScope !== undefined &&
|
|
2379
|
+
(typeof patch.expectedScope !== "number" || patch.expectedScope <= 0)
|
|
2380
|
+
) {
|
|
2381
|
+
errors.push(`expectedScope must be a positive number, got ${patch.expectedScope}.`);
|
|
2382
|
+
}
|
|
2383
|
+
} else if (kind === "capability") {
|
|
2384
|
+
if (patch.name !== undefined) {
|
|
2385
|
+
const e = validateName(patch.name, 8);
|
|
2386
|
+
if (e) errors.push(e);
|
|
2387
|
+
}
|
|
2388
|
+
if (patch.outcome !== undefined) {
|
|
2389
|
+
const e = validateOutcome(patch.outcome);
|
|
2390
|
+
if (e) errors.push(e);
|
|
2391
|
+
}
|
|
2392
|
+
if (patch.confidence !== undefined) {
|
|
2393
|
+
const e = validateConfidence(patch.confidence);
|
|
2394
|
+
if (e) errors.push(e);
|
|
2395
|
+
}
|
|
2396
|
+
if (patch.impact !== undefined) {
|
|
2397
|
+
if (typeof patch.impact !== "number" || !VALID_IMPACTS.has(patch.impact)) {
|
|
2398
|
+
errors.push(`invalid impact: ${patch.impact} (must be 0.25, 0.5, 1, 2, or 3).`);
|
|
2399
|
+
}
|
|
2400
|
+
}
|
|
2401
|
+
} else if (kind === "theme") {
|
|
2402
|
+
if (patch.name !== undefined) {
|
|
2403
|
+
const e = validateName(patch.name, 5);
|
|
2404
|
+
if (e) errors.push(e);
|
|
2405
|
+
}
|
|
2406
|
+
}
|
|
2407
|
+
return errors;
|
|
2408
|
+
}
|
|
2409
|
+
|
|
2410
|
+
/**
|
|
2411
|
+
* Deep JSON equality for patch-vs-current diffing. Handles primitives,
|
|
2412
|
+
* arrays (order-sensitive — tags/dependsOn order is meaningful), and
|
|
2413
|
+
* plain objects. Sufficient for the field shapes update_* accepts.
|
|
2414
|
+
*/
|
|
2415
|
+
function jsonEqual(a, b) {
|
|
2416
|
+
if (a === b) return true;
|
|
2417
|
+
if (a === null || b === null || a === undefined || b === undefined) {
|
|
2418
|
+
return a == null && b == null;
|
|
2419
|
+
}
|
|
2420
|
+
if (typeof a !== typeof b) return false;
|
|
2421
|
+
if (typeof a !== "object") return false;
|
|
2422
|
+
if (Array.isArray(a) !== Array.isArray(b)) return false;
|
|
2423
|
+
if (Array.isArray(a)) {
|
|
2424
|
+
if (a.length !== b.length) return false;
|
|
2425
|
+
return a.every((x, i) => jsonEqual(x, b[i]));
|
|
2426
|
+
}
|
|
2427
|
+
const ka = Object.keys(a);
|
|
2428
|
+
const kb = Object.keys(b);
|
|
2429
|
+
if (ka.length !== kb.length) return false;
|
|
2430
|
+
return ka.every((k) => Object.prototype.hasOwnProperty.call(b, k) && jsonEqual(a[k], b[k]));
|
|
2431
|
+
}
|
|
2432
|
+
|
|
2433
|
+
/**
|
|
2434
|
+
* Single-entity update. Validates the patch against per-kind rules
|
|
2435
|
+
* (UP3), then diffs against the *projected* entity (seed + edits
|
|
2436
|
+
* merged) so seed-resident values participate in idempotency and
|
|
2437
|
+
* audit before-snapshots. Only the truly differing keys are sent
|
|
2438
|
+
* to SQL — the MCP layer is the diff authority because SQL only
|
|
2439
|
+
* sees the sparse patches row, not the seed overlay.
|
|
2440
|
+
*/
|
|
2441
|
+
async function updateEntity(kind, args, wsId, projected) {
|
|
2442
|
+
const idArg = kind === "task" ? "taskId" : kind === "capability" ? "capabilityId" : "themeId";
|
|
2443
|
+
const entityId = (args?.[idArg] ?? "").trim();
|
|
2444
|
+
if (!entityId) return errorResult(`${idArg} is required.`);
|
|
2445
|
+
const patch = args?.patch;
|
|
2446
|
+
if (!patch || typeof patch !== "object" || Array.isArray(patch)) {
|
|
2447
|
+
return errorResult("patch must be a non-empty object.");
|
|
2448
|
+
}
|
|
2449
|
+
if (Object.keys(patch).length === 0) {
|
|
2450
|
+
return errorResult("patch must include at least one field.");
|
|
2451
|
+
}
|
|
2452
|
+
const reason = (args?.reason ?? "").trim();
|
|
2453
|
+
if (!reason) return errorResult("reason is required.");
|
|
2454
|
+
const validationErrors = validateUpdatePatch(kind, patch);
|
|
2455
|
+
if (validationErrors.length) {
|
|
2456
|
+
return errorResult(`Invalid patch: ${validationErrors.join("; ")}`);
|
|
2457
|
+
}
|
|
2458
|
+
if (!wsId) {
|
|
2459
|
+
return errorResult(
|
|
2460
|
+
"workspaceId could not be resolved (pass workspaceId arg or set SUPABASE_WORKSPACE_ID)."
|
|
2461
|
+
);
|
|
2462
|
+
}
|
|
2463
|
+
if (!projected) {
|
|
2464
|
+
return errorResult("internal: projected view not available.");
|
|
2465
|
+
}
|
|
2466
|
+
|
|
2467
|
+
// Look up the entity in the projected view (seed + edits merged).
|
|
2468
|
+
const collection =
|
|
2469
|
+
kind === "task" ? projected.tasks
|
|
2470
|
+
: kind === "capability" ? projected.capabilities
|
|
2471
|
+
: projected.themes;
|
|
2472
|
+
const current = collection.find((e) => e.id === entityId);
|
|
2473
|
+
if (!current) {
|
|
2474
|
+
return errorResult(`${kind} ${entityId} not found in workspace.`);
|
|
2475
|
+
}
|
|
2476
|
+
|
|
2477
|
+
// Decode HTML entities on user-facing text fields before diffing.
|
|
2478
|
+
// Without this, an agent that re-sends `Sandbox & Test Mode`
|
|
2479
|
+
// to "fix" a previously-encoded value would land another &
|
|
2480
|
+
// and look idempotent against the encoded current value. Decode
|
|
2481
|
+
// first so the comparison and the persisted value are both clean.
|
|
2482
|
+
const TEXT_FIELDS = new Set([
|
|
2483
|
+
"title", "summary", "name", "description", "outcome", "hypothesis",
|
|
2484
|
+
"owner", "team", "note",
|
|
2485
|
+
]);
|
|
2486
|
+
const cleanedPatch = {};
|
|
2487
|
+
for (const [k, v] of Object.entries(patch)) {
|
|
2488
|
+
cleanedPatch[k] = TEXT_FIELDS.has(k) && typeof v === "string" ? cleanText(v) : v;
|
|
2489
|
+
}
|
|
2490
|
+
|
|
2491
|
+
// Compute the effective patch and its before-snapshot. SQL will
|
|
2492
|
+
// write before_snapshot verbatim into audit.before_json — that's
|
|
2493
|
+
// the whole point of doing the diff up here where the seed is
|
|
2494
|
+
// overlaid.
|
|
2495
|
+
const effectivePatch = {};
|
|
2496
|
+
const beforeSnapshot = {};
|
|
2497
|
+
for (const [k, v] of Object.entries(cleanedPatch)) {
|
|
2498
|
+
if (!jsonEqual(current[k], v)) {
|
|
2499
|
+
effectivePatch[k] = v;
|
|
2500
|
+
beforeSnapshot[k] = current[k] ?? null;
|
|
2501
|
+
}
|
|
2502
|
+
}
|
|
2503
|
+
if (Object.keys(effectivePatch).length === 0) {
|
|
2504
|
+
return textResult(
|
|
2505
|
+
JSON.stringify(
|
|
2506
|
+
{
|
|
2507
|
+
ok: true,
|
|
2508
|
+
entityId,
|
|
2509
|
+
kind,
|
|
2510
|
+
idempotent: true,
|
|
2511
|
+
dryRun: args?.dryRun === true,
|
|
2512
|
+
},
|
|
2513
|
+
null,
|
|
2514
|
+
2
|
|
2515
|
+
)
|
|
2516
|
+
);
|
|
2517
|
+
}
|
|
2518
|
+
|
|
2519
|
+
try {
|
|
2520
|
+
const result = await rpcCall("update_entity", {
|
|
2521
|
+
p_workspace_id: wsId,
|
|
2522
|
+
p_kind: kind,
|
|
2523
|
+
p_entity_id: entityId,
|
|
2524
|
+
p_patch: effectivePatch,
|
|
2525
|
+
p_before: beforeSnapshot,
|
|
2526
|
+
p_reason: reason,
|
|
2527
|
+
p_actor_label: "mcp:agent",
|
|
2528
|
+
p_dry_run: args?.dryRun === true,
|
|
2529
|
+
});
|
|
2530
|
+
return textResult(JSON.stringify(result, null, 2));
|
|
2531
|
+
} catch (e) {
|
|
2532
|
+
return errorResult(e.message);
|
|
2533
|
+
}
|
|
2534
|
+
}
|
|
2535
|
+
|
|
2536
|
+
/**
|
|
2537
|
+
* Append a metric reading to a capability's outcomeReadings array.
|
|
2538
|
+
* Server takes a row lock so concurrent writers (script + human)
|
|
2539
|
+
* union safely instead of clobbering.
|
|
2540
|
+
*/
|
|
2541
|
+
async function recordOutcomeReading(args, wsId, projected) {
|
|
2542
|
+
const capabilityId = (args?.capabilityId ?? "").trim();
|
|
2543
|
+
if (!capabilityId) return errorResult("capabilityId is required.");
|
|
2544
|
+
if (typeof args?.value !== "number" || !Number.isFinite(args.value)) {
|
|
2545
|
+
return errorResult("value is required and must be a finite number.");
|
|
2546
|
+
}
|
|
2547
|
+
const asOf = (args?.asOf ?? "").trim();
|
|
2548
|
+
if (!asOf) return errorResult("asOf is required (ISO date or timestamp).");
|
|
2549
|
+
const source = (args?.source ?? "").trim();
|
|
2550
|
+
if (!source) return errorResult("source is required.");
|
|
2551
|
+
if (args?.note !== undefined && args?.note !== null && typeof args.note !== "string") {
|
|
2552
|
+
return errorResult("note must be a string when supplied.");
|
|
2553
|
+
}
|
|
2554
|
+
if (!wsId) {
|
|
2555
|
+
return errorResult(
|
|
2556
|
+
"workspaceId could not be resolved (pass workspaceId arg or set SUPABASE_WORKSPACE_ID)."
|
|
2557
|
+
);
|
|
2558
|
+
}
|
|
2559
|
+
// Existence check — the RPC will happily write a patch entry against
|
|
2560
|
+
// any capabilityId, which would orphan the reading invisibly (the
|
|
2561
|
+
// projector iterates seed + newCapabilities, not the patches dict
|
|
2562
|
+
// alone). Refuse here so phantom readings never land.
|
|
2563
|
+
if (projected) {
|
|
2564
|
+
const exists = projected.capabilities.some((c) => c.id === capabilityId);
|
|
2565
|
+
if (!exists) return errorResult(`capability ${capabilityId} not found in workspace.`);
|
|
2566
|
+
}
|
|
2567
|
+
try {
|
|
2568
|
+
const result = await rpcCall("record_outcome_reading", {
|
|
2569
|
+
p_workspace_id: wsId,
|
|
2570
|
+
p_capability_id: capabilityId,
|
|
2571
|
+
p_value: args.value,
|
|
2572
|
+
p_as_of: asOf,
|
|
2573
|
+
p_source: source,
|
|
2574
|
+
p_note: args.note ?? null,
|
|
2575
|
+
p_actor_label: "mcp:agent",
|
|
2576
|
+
});
|
|
2577
|
+
return textResult(JSON.stringify(result, null, 2));
|
|
2578
|
+
} catch (e) {
|
|
2579
|
+
return errorResult(e.message);
|
|
2580
|
+
}
|
|
2581
|
+
}
|
|
2582
|
+
|
|
2583
|
+
/**
|
|
2584
|
+
* Read tool — surface capabilities with no recent outcome reading.
|
|
2585
|
+
* Pure projection over the workspace edits; no SQL round-trip needed
|
|
2586
|
+
* since the readings live on each capability already.
|
|
2587
|
+
*/
|
|
2588
|
+
function listStaleOutcomes(args, projected) {
|
|
2589
|
+
const thresholdDays = typeof args?.thresholdDays === "number" ? args.thresholdDays : 14;
|
|
2590
|
+
const includeArchived = args?.includeArchived === true;
|
|
2591
|
+
const nowMs = Date.now();
|
|
2592
|
+
const thresholdMs = thresholdDays * 24 * 60 * 60 * 1000;
|
|
2593
|
+
|
|
2594
|
+
const stale = [];
|
|
2595
|
+
for (const cap of projected.capabilities) {
|
|
2596
|
+
if (!includeArchived && cap.archived) continue;
|
|
2597
|
+
// Only flag capabilities that declared an outcome — bets without
|
|
2598
|
+
// a falsifiable outcome can't be stale-checked meaningfully.
|
|
2599
|
+
if (!cap.outcome || cap.outcome.trim().length === 0) continue;
|
|
2600
|
+
const rawReadings = Array.isArray(cap.outcomeReadings) ? cap.outcomeReadings : [];
|
|
2601
|
+
// Drop readings whose asOf can't be parsed — they can't anchor a
|
|
2602
|
+
// staleness calculation, and if we let them participate they
|
|
2603
|
+
// could win the "latest" reducer (NaN > X is false) and zero out
|
|
2604
|
+
// staleness for an otherwise-stale capability.
|
|
2605
|
+
const readings = rawReadings.filter(
|
|
2606
|
+
(r) => r && typeof r.asOf === "string" && Number.isFinite(Date.parse(r.asOf))
|
|
2607
|
+
);
|
|
2608
|
+
const latest = readings.length === 0
|
|
2609
|
+
? null
|
|
2610
|
+
: readings.reduce(
|
|
2611
|
+
(acc, r) => (acc && Date.parse(acc.asOf) >= Date.parse(r.asOf) ? acc : r),
|
|
2612
|
+
null
|
|
2613
|
+
);
|
|
2614
|
+
const daysSince = latest
|
|
2615
|
+
? Math.floor((nowMs - Date.parse(latest.asOf)) / (24 * 60 * 60 * 1000))
|
|
2616
|
+
: null;
|
|
2617
|
+
const isStale =
|
|
2618
|
+
latest === null ||
|
|
2619
|
+
(typeof daysSince === "number" && daysSince * 24 * 60 * 60 * 1000 > thresholdMs);
|
|
2620
|
+
if (!isStale) continue;
|
|
2621
|
+
stale.push({
|
|
2622
|
+
id: cap.id,
|
|
2623
|
+
name: cap.name,
|
|
2624
|
+
outcome: cap.outcome,
|
|
2625
|
+
daysSinceLastReading: daysSince,
|
|
2626
|
+
latestReading: latest,
|
|
2627
|
+
readingCount: rawReadings.length,
|
|
2628
|
+
malformedReadingCount: rawReadings.length - readings.length || undefined,
|
|
2629
|
+
});
|
|
2630
|
+
}
|
|
2631
|
+
// Sort: never-measured first, then by stalest.
|
|
2632
|
+
stale.sort((a, b) => {
|
|
2633
|
+
if (a.daysSinceLastReading == null && b.daysSinceLastReading != null) return -1;
|
|
2634
|
+
if (b.daysSinceLastReading == null && a.daysSinceLastReading != null) return 1;
|
|
2635
|
+
return (b.daysSinceLastReading ?? 0) - (a.daysSinceLastReading ?? 0);
|
|
2636
|
+
});
|
|
2637
|
+
return textResult(
|
|
2638
|
+
JSON.stringify({ thresholdDays, count: stale.length, stale }, null, 2)
|
|
2639
|
+
);
|
|
2640
|
+
}
|
|
2641
|
+
|
|
2642
|
+
async function submitAcceptanceGrades(args, projected, wsId) {
|
|
2643
|
+
const task = projected.tasks.find((t) => t.id === args.taskId);
|
|
2644
|
+
if (!task) return errorResult(`Task ${args.taskId} not found.`);
|
|
2645
|
+
const max = (task.acceptance ?? []).length;
|
|
2646
|
+
if (max === 0)
|
|
2647
|
+
return errorResult(
|
|
2648
|
+
`Task ${task.id} has no acceptance criteria to grade. Add some first.`
|
|
2649
|
+
);
|
|
2650
|
+
for (const g of args.grades) {
|
|
2651
|
+
if (g.index >= max)
|
|
2652
|
+
return errorResult(
|
|
2653
|
+
`Grade index ${g.index} is out of range (task has ${max} criteria).`
|
|
2654
|
+
);
|
|
2655
|
+
}
|
|
2656
|
+
|
|
2657
|
+
const today = todayISO();
|
|
2658
|
+
const payload = args.grades.map((g) => ({
|
|
2659
|
+
index: g.index,
|
|
2660
|
+
status: g.status,
|
|
2661
|
+
gradedAt: today,
|
|
2662
|
+
gradedBy: "mcp:agent",
|
|
2663
|
+
...(g.note ? { note: g.note } : {}),
|
|
2664
|
+
}));
|
|
2665
|
+
|
|
2666
|
+
try {
|
|
2667
|
+
// RPC takes a row lock, re-reads existing grades under the lock,
|
|
2668
|
+
// merges these indices on top, writes back. Concurrent graders
|
|
2669
|
+
// for the same task queue cleanly — no clobber.
|
|
2670
|
+
await rpcCall("grade_acceptance", {
|
|
2671
|
+
p_workspace_id: wsId,
|
|
2672
|
+
p_task_id: task.id,
|
|
2673
|
+
p_grades: payload,
|
|
2674
|
+
});
|
|
2675
|
+
} catch (e) {
|
|
2676
|
+
return errorResult(e.message);
|
|
2677
|
+
}
|
|
2678
|
+
|
|
2679
|
+
return textResult(
|
|
2680
|
+
JSON.stringify(
|
|
2681
|
+
{
|
|
2682
|
+
ok: true,
|
|
2683
|
+
taskId: task.id,
|
|
2684
|
+
graded: args.grades.length,
|
|
2685
|
+
of: max,
|
|
2686
|
+
},
|
|
2687
|
+
null,
|
|
2688
|
+
2
|
|
2689
|
+
)
|
|
2690
|
+
);
|
|
2691
|
+
}
|
|
2692
|
+
|
|
2693
|
+
/**
|
|
2694
|
+
* Standard MCP tool-result envelope. The optional `extra` object can
|
|
2695
|
+
* carry `_meta` (annotations the client may surface as system
|
|
2696
|
+
* reminders), `structuredContent`, or other MCP-spec fields. We use
|
|
2697
|
+
* _meta for the system-reminder nudges per the effectiveness memo.
|
|
2698
|
+
*/
|
|
2699
|
+
function textResult(text, extra) {
|
|
2700
|
+
return { content: [{ type: "text", text }], ...(extra ?? {}) };
|
|
2701
|
+
}
|
|
2702
|
+
function errorResult(message) {
|
|
2703
|
+
return { content: [{ type: "text", text: message }], isError: true };
|
|
2704
|
+
}
|
|
2705
|
+
|
|
2706
|
+
/**
|
|
2707
|
+
* Build the nudge text we attach via _meta on certain read results.
|
|
2708
|
+
* Returns null when no nudge is warranted — keeps the response
|
|
2709
|
+
* clean for the common case.
|
|
2710
|
+
*/
|
|
2711
|
+
function buildReminder(toolName, projected) {
|
|
2712
|
+
const reminders = [];
|
|
2713
|
+
// If the rubric hasn't been fetched and the agent's reading list_*
|
|
2714
|
+
// / get_*, they're orienting and about to plan — get the rubric in.
|
|
2715
|
+
if (
|
|
2716
|
+
session.rubricFetchedAt === null &&
|
|
2717
|
+
(toolName === "list_capabilities" ||
|
|
2718
|
+
toolName === "list_tasks" ||
|
|
2719
|
+
toolName === "get_roadmap_snapshot" ||
|
|
2720
|
+
toolName === "list_themes")
|
|
2721
|
+
) {
|
|
2722
|
+
reminders.push(
|
|
2723
|
+
"Call get_agents_md before any propose_* / submit_acceptance_grades / link_pr call — those tools refuse without it."
|
|
2724
|
+
);
|
|
2725
|
+
}
|
|
2726
|
+
// Tasks with merged PRs but no acceptance grades = ungraded
|
|
2727
|
+
// deliveries. The rubric requires self-grading before reviewer
|
|
2728
|
+
// approval.
|
|
2729
|
+
if (toolName === "list_tasks" || toolName === "get_roadmap_snapshot") {
|
|
2730
|
+
const ungraded = projected.tasks.filter((t) => {
|
|
2731
|
+
if (t.status !== "delivered") return false;
|
|
2732
|
+
const merged = (t.prs ?? []).some((p) => p.merged);
|
|
2733
|
+
if (!merged) return false;
|
|
2734
|
+
return !t.acceptanceGrades || t.acceptanceGrades.length === 0;
|
|
2735
|
+
});
|
|
2736
|
+
if (ungraded.length > 0) {
|
|
2737
|
+
const ids = ungraded.slice(0, 5).map((t) => t.id).join(", ");
|
|
2738
|
+
const more = ungraded.length > 5 ? `, +${ungraded.length - 5} more` : "";
|
|
2739
|
+
reminders.push(
|
|
2740
|
+
`${ungraded.length} delivered task${ungraded.length === 1 ? "" : "s"} ` +
|
|
2741
|
+
`have merged PRs without submitted acceptance grades. ` +
|
|
2742
|
+
`Call submit_acceptance_grades for: ${ids}${more}.`
|
|
2743
|
+
);
|
|
2744
|
+
}
|
|
2745
|
+
}
|
|
2746
|
+
if (reminders.length === 0) return null;
|
|
2747
|
+
return reminders.join(" ");
|
|
2748
|
+
}
|
|
2749
|
+
|
|
2750
|
+
function withReminder(toolName, projected, payload) {
|
|
2751
|
+
const text = buildReminder(toolName, projected);
|
|
2752
|
+
if (!text) return payload;
|
|
2753
|
+
return {
|
|
2754
|
+
...payload,
|
|
2755
|
+
_meta: {
|
|
2756
|
+
...(payload._meta ?? {}),
|
|
2757
|
+
roadmapper: {
|
|
2758
|
+
...(payload._meta?.roadmapper ?? {}),
|
|
2759
|
+
reminder: text,
|
|
2760
|
+
},
|
|
2761
|
+
},
|
|
2762
|
+
};
|
|
2763
|
+
}
|
|
2764
|
+
|
|
2765
|
+
// ── MCP resources + prompts ───────────────────────────────────────
|
|
2766
|
+
//
|
|
2767
|
+
// resources/* — content the client can pull without the model
|
|
2768
|
+
// deciding to call a tool. Some clients auto-subscribe on connect,
|
|
2769
|
+
// which sidesteps the "agent forgot to fetch the rubric" failure.
|
|
2770
|
+
// prompts/* — parameterized templates the user invokes directly
|
|
2771
|
+
// (e.g. "/roadmapper:plan-feature lp v2"). They orchestrate a flow
|
|
2772
|
+
// without depending on the model's judgment.
|
|
2773
|
+
//
|
|
2774
|
+
// Both arrays are intentionally small. Keeping the surface tight
|
|
2775
|
+
// is part of the contract — agents read all of this on connect.
|
|
2776
|
+
|
|
2777
|
+
const RESOURCES = [
|
|
2778
|
+
{
|
|
2779
|
+
uri: "roadmapper://rubric",
|
|
2780
|
+
name: "Planning rubric (AGENTS.md)",
|
|
2781
|
+
description:
|
|
2782
|
+
"The contract every planner must satisfy: task shape, acceptance criteria format, capability outcome rubric, grading dimensions. Same content as get_agents_md, exposed as a resource so MCP clients that auto-subscribe pull it at connect.",
|
|
2783
|
+
mimeType: "text/markdown",
|
|
2784
|
+
},
|
|
2785
|
+
{
|
|
2786
|
+
uri: "roadmapper://capabilities/active",
|
|
2787
|
+
name: "Active capabilities (snapshot)",
|
|
2788
|
+
description:
|
|
2789
|
+
"Live list of non-delivered capabilities for the env-default workspace. Read this before propose_task or propose_capability to find the right parent. Note: MCP resources don't accept arguments, so this always reads SUPABASE_WORKSPACE_ID's workspace — use list_capabilities({ workspaceId }) for cross-workspace reads.",
|
|
2790
|
+
mimeType: "application/json",
|
|
2791
|
+
},
|
|
2792
|
+
{
|
|
2793
|
+
uri: "roadmapper://tasks/open",
|
|
2794
|
+
name: "Open tasks (snapshot)",
|
|
2795
|
+
description:
|
|
2796
|
+
"Live list of in_progress + planned tasks for the env-default workspace. Same workspaceId caveat as roadmapper://capabilities/active — use list_tasks({ workspaceId }) for cross-workspace reads.",
|
|
2797
|
+
mimeType: "application/json",
|
|
2798
|
+
},
|
|
2799
|
+
];
|
|
2800
|
+
|
|
2801
|
+
async function readResource(uri) {
|
|
2802
|
+
if (uri === "roadmapper://rubric") {
|
|
2803
|
+
// Side-effect: reading the rubric resource counts as fetching
|
|
2804
|
+
// it. Mutators after this don't get blocked even if the agent
|
|
2805
|
+
// never called the tool — the contract is "the rubric reached
|
|
2806
|
+
// the model," not "this specific call shape ran."
|
|
2807
|
+
if (session.rubricFetchedAt === null) {
|
|
2808
|
+
session.rubricFetchedAt = Date.now();
|
|
2809
|
+
// Pass the cwd snapshot's workspace id so the row is
|
|
2810
|
+
// visible in Settings → MCP activity. Without this the
|
|
2811
|
+
// resource-route fetch lands with workspace_id=NULL and
|
|
2812
|
+
// gets filtered out for non-operator viewers (per migration
|
|
2813
|
+
// 0038's NULL-workspace lock).
|
|
2814
|
+
recordTelemetry(
|
|
2815
|
+
"rubric_fetched",
|
|
2816
|
+
{ via: "resource" },
|
|
2817
|
+
snapshotWorkspaceId() ?? undefined
|
|
2818
|
+
);
|
|
2819
|
+
}
|
|
2820
|
+
return {
|
|
2821
|
+
contents: [
|
|
2822
|
+
{ uri, mimeType: "text/markdown", text: readAgentsMd() },
|
|
2823
|
+
],
|
|
2824
|
+
};
|
|
2825
|
+
}
|
|
2826
|
+
// The two snapshot resources project workspace state on each read
|
|
2827
|
+
// so the response is always live; mirrors get_roadmap_snapshot.
|
|
2828
|
+
const projected =
|
|
2829
|
+
(await readWorkspaceProjected()) ?? project(readSeed(), {});
|
|
2830
|
+
|
|
2831
|
+
if (uri === "roadmapper://capabilities/active") {
|
|
2832
|
+
// Counts as cap discovery for the propose_capability gate —
|
|
2833
|
+
// identical intent to suggest_capability_for / list_capabilities,
|
|
2834
|
+
// just delivered as an MCP resource.
|
|
2835
|
+
session.capsDiscoveredAt = Date.now();
|
|
2836
|
+
const active = projected.capabilities.filter(
|
|
2837
|
+
(c) => effectiveCapabilityStatus(c, projected.tasks) !== "delivered"
|
|
2838
|
+
);
|
|
2839
|
+
return {
|
|
2840
|
+
contents: [
|
|
2841
|
+
{
|
|
2842
|
+
uri,
|
|
2843
|
+
mimeType: "application/json",
|
|
2844
|
+
text: JSON.stringify(active, null, 2),
|
|
2845
|
+
},
|
|
2846
|
+
],
|
|
2847
|
+
};
|
|
2848
|
+
}
|
|
2849
|
+
if (uri === "roadmapper://tasks/open") {
|
|
2850
|
+
const open = projected.tasks.filter(
|
|
2851
|
+
(t) => t.status === "in_progress" || t.status === "planned"
|
|
2852
|
+
);
|
|
2853
|
+
return {
|
|
2854
|
+
contents: [
|
|
2855
|
+
{
|
|
2856
|
+
uri,
|
|
2857
|
+
mimeType: "application/json",
|
|
2858
|
+
text: JSON.stringify(open, null, 2),
|
|
2859
|
+
},
|
|
2860
|
+
],
|
|
2861
|
+
};
|
|
2862
|
+
}
|
|
2863
|
+
throw new Error(`Unknown resource: ${uri}`);
|
|
2864
|
+
}
|
|
2865
|
+
|
|
2866
|
+
const PROMPTS = [
|
|
2867
|
+
{
|
|
2868
|
+
name: "plan-feature",
|
|
2869
|
+
description:
|
|
2870
|
+
"Force the full planning flow: rubric → capability resolution → tasks under it. Use when the user says 'design features for X' / 'plan Y' — bypasses model judgment.",
|
|
2871
|
+
arguments: [
|
|
2872
|
+
{
|
|
2873
|
+
name: "description",
|
|
2874
|
+
description: "One-line description of the feature or workstream.",
|
|
2875
|
+
required: true,
|
|
2876
|
+
},
|
|
2877
|
+
],
|
|
2878
|
+
},
|
|
2879
|
+
{
|
|
2880
|
+
name: "close-task",
|
|
2881
|
+
description:
|
|
2882
|
+
"Force the deliver-flow: load the task → self-grade against its acceptance criteria → link the PR. Use after implementing a TK-NNNNNN.",
|
|
2883
|
+
arguments: [
|
|
2884
|
+
{ name: "task_id", description: "TK-NNNNNN", required: true },
|
|
2885
|
+
{ name: "pr_url", description: "https://github.com/...", required: false },
|
|
2886
|
+
],
|
|
2887
|
+
},
|
|
2888
|
+
{
|
|
2889
|
+
name: "weekly-review",
|
|
2890
|
+
description:
|
|
2891
|
+
"Walk through open tasks, stale capabilities, and ungraded deliveries. Use for a structured roadmap review pass.",
|
|
2892
|
+
arguments: [],
|
|
2893
|
+
},
|
|
2894
|
+
];
|
|
2895
|
+
|
|
2896
|
+
function renderPrompt(name, args) {
|
|
2897
|
+
const text = (() => {
|
|
2898
|
+
switch (name) {
|
|
2899
|
+
case "plan-feature":
|
|
2900
|
+
return (
|
|
2901
|
+
`Plan a feature: "${args.description ?? "(no description provided)"}"\n\n` +
|
|
2902
|
+
"Follow this flow exactly:\n" +
|
|
2903
|
+
"1. Call get_agents_md (or read roadmapper://rubric) to load the rubric for this session.\n" +
|
|
2904
|
+
"2. Call suggest_capability_for with the description above. Read every returned candidate's outcome before deciding.\n" +
|
|
2905
|
+
"3. If a returned candidate scores > 0.4 OR its outcome maps to what we're building, propose tasks under it via propose_task. Each task MUST include acceptance criteria per the rubric.\n" +
|
|
2906
|
+
"4. If nothing fits, STOP and ask the user before calling propose_capability — capabilities are quarterly bets, not single tasks.\n" +
|
|
2907
|
+
"5. After tasks are proposed, summarize: capabilityId chosen, task ids created, anything skipped and why."
|
|
2908
|
+
);
|
|
2909
|
+
case "close-task":
|
|
2910
|
+
return (
|
|
2911
|
+
`Close task ${args.task_id ?? "(missing task_id)"}.\n\n` +
|
|
2912
|
+
"Follow this flow exactly:\n" +
|
|
2913
|
+
"1. Call get_agents_md (or read roadmapper://rubric) to load grading dimensions.\n" +
|
|
2914
|
+
`2. Call get_task({ id: "${args.task_id ?? ""}" }) and read every acceptance criterion.\n` +
|
|
2915
|
+
"3. For each criterion, decide pass/fail. Fabricated passes destroy this signal — only mark pass if you verified.\n" +
|
|
2916
|
+
"4. Call submit_acceptance_grades with the per-index results. Include a note on any fail.\n" +
|
|
2917
|
+
(args.pr_url
|
|
2918
|
+
? `5. Call link_pr to attach ${args.pr_url} to the task.\n`
|
|
2919
|
+
: "5. If you opened a PR, call link_pr to attach it.\n") +
|
|
2920
|
+
"6. Stamp Roadmapper-Task: " +
|
|
2921
|
+
(args.task_id ?? "TK-NNNNNN") +
|
|
2922
|
+
" in the PR body so the webhook routes future events back here."
|
|
2923
|
+
);
|
|
2924
|
+
case "weekly-review":
|
|
2925
|
+
return (
|
|
2926
|
+
"Run a structured roadmap review.\n\n" +
|
|
2927
|
+
"1. Call get_agents_md to load the rubric (or confirm rubric is current).\n" +
|
|
2928
|
+
"2. Call get_roadmap_snapshot for the canonical model. Note any _meta reminders in the response.\n" +
|
|
2929
|
+
"3. For each active capability, scan: are open tasks aging? Are any without acceptance criteria? Are there delivered tasks without acceptance grades?\n" +
|
|
2930
|
+
"4. List capabilities whose outcomes are no longer falsifiable or whose tasks all delivered (close them or pivot).\n" +
|
|
2931
|
+
"5. Report: ungraded deliveries, stale capabilities, capabilities ready to close, suggested next bets."
|
|
2932
|
+
);
|
|
2933
|
+
default:
|
|
2934
|
+
throw new Error(`Unknown prompt: ${name}`);
|
|
2935
|
+
}
|
|
2936
|
+
})();
|
|
2937
|
+
return {
|
|
2938
|
+
description: PROMPTS.find((p) => p.name === name)?.description ?? "",
|
|
2939
|
+
messages: [
|
|
2940
|
+
{ role: "user", content: { type: "text", text } },
|
|
2941
|
+
],
|
|
2942
|
+
};
|
|
2943
|
+
}
|
|
2944
|
+
|
|
2945
|
+
async function handle(request) {
|
|
2946
|
+
const { id, method, params } = request;
|
|
2947
|
+
try {
|
|
2948
|
+
if (method === "initialize") {
|
|
2949
|
+
// Snapshot counts so an MCP client showing server info
|
|
2950
|
+
// surfaces actual roadmap shape, not just "connected".
|
|
2951
|
+
const projected =
|
|
2952
|
+
(await readWorkspaceProjected()) ?? project(readSeed(), {});
|
|
2953
|
+
const openTasks = projected.tasks.filter(
|
|
2954
|
+
(t) => t.status !== "delivered"
|
|
2955
|
+
).length;
|
|
2956
|
+
const stats = {
|
|
2957
|
+
themes: projected.themes.length,
|
|
2958
|
+
capabilities: projected.capabilities.length,
|
|
2959
|
+
openTasks,
|
|
2960
|
+
};
|
|
2961
|
+
// Fresh session — reset rubric-fetched state. The client
|
|
2962
|
+
// re-initializes when it reconnects, which is the right
|
|
2963
|
+
// boundary for "you need to fetch the rubric again."
|
|
2964
|
+
resetSession();
|
|
2965
|
+
recordTelemetry("session_initialized", { stats });
|
|
2966
|
+
return {
|
|
2967
|
+
jsonrpc: "2.0",
|
|
2968
|
+
id,
|
|
2969
|
+
result: {
|
|
2970
|
+
protocolVersion: PROTOCOL_VERSION,
|
|
2971
|
+
// Declare every capability we support. resources +
|
|
2972
|
+
// prompts unlock the auto-pull / slash-command surfaces
|
|
2973
|
+
// some MCP clients expose; tools work for everyone.
|
|
2974
|
+
capabilities: {
|
|
2975
|
+
tools: {},
|
|
2976
|
+
resources: { listChanged: false },
|
|
2977
|
+
prompts: { listChanged: false },
|
|
2978
|
+
},
|
|
2979
|
+
serverInfo: {
|
|
2980
|
+
name: SERVER_NAME,
|
|
2981
|
+
version: SERVER_VERSION,
|
|
2982
|
+
stats,
|
|
2983
|
+
instructions:
|
|
2984
|
+
"Roadmapper online — " +
|
|
2985
|
+
`${stats.themes} theme${stats.themes === 1 ? "" : "s"}, ` +
|
|
2986
|
+
`${stats.capabilities} capabilit${stats.capabilities === 1 ? "y" : "ies"}, ` +
|
|
2987
|
+
`${stats.openTasks} open task${stats.openTasks === 1 ? "" : "s"}. ` +
|
|
2988
|
+
"Call get_agents_md before planning — the propose_* and submit_acceptance_grades tools refuse without it. " +
|
|
2989
|
+
"Use suggest_capability_for before propose_capability. " +
|
|
2990
|
+
"Slash-prompts available: roadmapper:plan-feature, roadmapper:close-task, roadmapper:weekly-review.",
|
|
2991
|
+
},
|
|
2992
|
+
},
|
|
2993
|
+
};
|
|
2994
|
+
}
|
|
2995
|
+
if (method === "tools/list") {
|
|
2996
|
+
return { jsonrpc: "2.0", id, result: { tools: TOOLS } };
|
|
2997
|
+
}
|
|
2998
|
+
if (method === "tools/call") {
|
|
2999
|
+
const result = await callTool(params?.name, params?.arguments ?? {});
|
|
3000
|
+
return { jsonrpc: "2.0", id, result };
|
|
3001
|
+
}
|
|
3002
|
+
if (method === "resources/list") {
|
|
3003
|
+
return { jsonrpc: "2.0", id, result: { resources: RESOURCES } };
|
|
3004
|
+
}
|
|
3005
|
+
if (method === "resources/read") {
|
|
3006
|
+
const result = await readResource(params?.uri);
|
|
3007
|
+
return { jsonrpc: "2.0", id, result };
|
|
3008
|
+
}
|
|
3009
|
+
if (method === "prompts/list") {
|
|
3010
|
+
return { jsonrpc: "2.0", id, result: { prompts: PROMPTS } };
|
|
3011
|
+
}
|
|
3012
|
+
if (method === "prompts/get") {
|
|
3013
|
+
const result = renderPrompt(params?.name, params?.arguments ?? {});
|
|
3014
|
+
return { jsonrpc: "2.0", id, result };
|
|
3015
|
+
}
|
|
3016
|
+
// Notifications (no id) and unknown methods: ignore.
|
|
3017
|
+
if (id === undefined) return null;
|
|
3018
|
+
return {
|
|
3019
|
+
jsonrpc: "2.0",
|
|
3020
|
+
id,
|
|
3021
|
+
error: { code: -32601, message: `Method not found: ${method}` },
|
|
3022
|
+
};
|
|
3023
|
+
} catch (e) {
|
|
3024
|
+
return {
|
|
3025
|
+
jsonrpc: "2.0",
|
|
3026
|
+
id,
|
|
3027
|
+
error: { code: -32603, message: e.message || String(e) },
|
|
3028
|
+
};
|
|
3029
|
+
}
|
|
3030
|
+
}
|
|
3031
|
+
|
|
3032
|
+
/**
|
|
3033
|
+
* `node mcp/server.mjs --selftest` — invokes each read tool against
|
|
3034
|
+
* the local seed and prints a pass/fail table. Useful for sanity-
|
|
3035
|
+
* checking the install without wiring it into an MCP client.
|
|
3036
|
+
*
|
|
3037
|
+
* Write tools (propose_task, submit_acceptance_grades) are smoke-
|
|
3038
|
+
* tested for argument validation only — they don't touch Supabase.
|
|
3039
|
+
*/
|
|
3040
|
+
async function runSelftest() {
|
|
3041
|
+
const seed = readSeed();
|
|
3042
|
+
const aTheme = seed?.product?.themes?.[0]?.id;
|
|
3043
|
+
const aCap = seed?.capabilities?.[0]?.id;
|
|
3044
|
+
const aTask = seed?.tasks?.[0]?.id;
|
|
3045
|
+
|
|
3046
|
+
const checks = [
|
|
3047
|
+
{
|
|
3048
|
+
name: "initialize",
|
|
3049
|
+
fn: () => handle({ id: 1, method: "initialize", params: {} }),
|
|
3050
|
+
pass: (r) =>
|
|
3051
|
+
r?.result?.serverInfo?.name === SERVER_NAME &&
|
|
3052
|
+
// New: capabilities advertise resources + prompts too.
|
|
3053
|
+
r?.result?.capabilities?.resources &&
|
|
3054
|
+
r?.result?.capabilities?.prompts,
|
|
3055
|
+
},
|
|
3056
|
+
{
|
|
3057
|
+
// Hitting a mutator with no rubric fetched must return the
|
|
3058
|
+
// structured prerequisite_missing error with a `fix` field,
|
|
3059
|
+
// not a successful write. This is the gate the effectiveness
|
|
3060
|
+
// memo specified. Explicit resetSession() so the check is
|
|
3061
|
+
// order-independent — if a prior check fetched the rubric,
|
|
3062
|
+
// this would otherwise silently pass for the wrong reason.
|
|
3063
|
+
name: "rubric gate blocks mutator before get_agents_md",
|
|
3064
|
+
fn: () => {
|
|
3065
|
+
resetSession();
|
|
3066
|
+
return handle({
|
|
3067
|
+
id: 11,
|
|
3068
|
+
method: "tools/call",
|
|
3069
|
+
params: {
|
|
3070
|
+
name: "propose_task",
|
|
3071
|
+
arguments: { capabilityId: aCap, title: "Should be blocked" },
|
|
3072
|
+
},
|
|
3073
|
+
});
|
|
3074
|
+
},
|
|
3075
|
+
pass: (r) => {
|
|
3076
|
+
if (!r?.result?.isError) return false;
|
|
3077
|
+
const text = r.result.content?.[0]?.text ?? "";
|
|
3078
|
+
return (
|
|
3079
|
+
text.includes("prerequisite_missing") && text.includes("get_agents_md")
|
|
3080
|
+
);
|
|
3081
|
+
},
|
|
3082
|
+
},
|
|
3083
|
+
{
|
|
3084
|
+
// After fetching the rubric, propose_theme should still be
|
|
3085
|
+
// blocked until the agent has actually listed themes. Asserts
|
|
3086
|
+
// the discovery gate fires with the right `fix` field.
|
|
3087
|
+
name: "discovery gate blocks propose_theme before list_themes",
|
|
3088
|
+
fn: () => {
|
|
3089
|
+
resetSession();
|
|
3090
|
+
session.rubricFetchedAt = Date.now(); // rubric satisfied
|
|
3091
|
+
return handle({
|
|
3092
|
+
id: 16,
|
|
3093
|
+
method: "tools/call",
|
|
3094
|
+
params: {
|
|
3095
|
+
name: "propose_theme",
|
|
3096
|
+
arguments: { name: "Some New Theme Idea" },
|
|
3097
|
+
},
|
|
3098
|
+
});
|
|
3099
|
+
},
|
|
3100
|
+
pass: (r) => {
|
|
3101
|
+
if (!r?.result?.isError) return false;
|
|
3102
|
+
const text = r.result.content?.[0]?.text ?? "";
|
|
3103
|
+
return (
|
|
3104
|
+
text.includes("discovery_missing") && text.includes("list_themes")
|
|
3105
|
+
);
|
|
3106
|
+
},
|
|
3107
|
+
},
|
|
3108
|
+
{
|
|
3109
|
+
// Same gate for propose_capability — requires suggest_capability_for
|
|
3110
|
+
// (or list_capabilities / get_roadmap_snapshot) first.
|
|
3111
|
+
name:
|
|
3112
|
+
"discovery gate blocks propose_capability before suggest_capability_for",
|
|
3113
|
+
fn: () => {
|
|
3114
|
+
resetSession();
|
|
3115
|
+
session.rubricFetchedAt = Date.now();
|
|
3116
|
+
return handle({
|
|
3117
|
+
id: 17,
|
|
3118
|
+
method: "tools/call",
|
|
3119
|
+
params: {
|
|
3120
|
+
name: "propose_capability",
|
|
3121
|
+
arguments: {
|
|
3122
|
+
name: "Brand new capability",
|
|
3123
|
+
pillarId: aTheme,
|
|
3124
|
+
outcome: "x",
|
|
3125
|
+
},
|
|
3126
|
+
},
|
|
3127
|
+
});
|
|
3128
|
+
},
|
|
3129
|
+
pass: (r) => {
|
|
3130
|
+
if (!r?.result?.isError) return false;
|
|
3131
|
+
const text = r.result.content?.[0]?.text ?? "";
|
|
3132
|
+
return (
|
|
3133
|
+
text.includes("discovery_missing") &&
|
|
3134
|
+
text.includes("suggest_capability_for")
|
|
3135
|
+
);
|
|
3136
|
+
},
|
|
3137
|
+
},
|
|
3138
|
+
{
|
|
3139
|
+
// get_roadmap_snapshot returns BOTH themes and caps in a single
|
|
3140
|
+
// response, so it satisfies BOTH discovery gates at once.
|
|
3141
|
+
name: "get_roadmap_snapshot satisfies both discovery gates",
|
|
3142
|
+
fn: async () => {
|
|
3143
|
+
resetSession();
|
|
3144
|
+
session.rubricFetchedAt = Date.now();
|
|
3145
|
+
await handle({
|
|
3146
|
+
id: 18,
|
|
3147
|
+
method: "tools/call",
|
|
3148
|
+
params: { name: "get_roadmap_snapshot", arguments: {} },
|
|
3149
|
+
});
|
|
3150
|
+
return {
|
|
3151
|
+
themesListedAt: session.themesListedAt,
|
|
3152
|
+
capsDiscoveredAt: session.capsDiscoveredAt,
|
|
3153
|
+
};
|
|
3154
|
+
},
|
|
3155
|
+
pass: (r) => r?.themesListedAt !== null && r?.capsDiscoveredAt !== null,
|
|
3156
|
+
},
|
|
3157
|
+
{
|
|
3158
|
+
name: "resources/list returns the three resources",
|
|
3159
|
+
fn: () => handle({ id: 12, method: "resources/list", params: {} }),
|
|
3160
|
+
pass: (r) =>
|
|
3161
|
+
Array.isArray(r?.result?.resources) &&
|
|
3162
|
+
r.result.resources.length === RESOURCES.length &&
|
|
3163
|
+
r.result.resources.some((x) => x.uri === "roadmapper://rubric"),
|
|
3164
|
+
},
|
|
3165
|
+
{
|
|
3166
|
+
name: "resources/read rubric counts as a fetched rubric",
|
|
3167
|
+
fn: () =>
|
|
3168
|
+
handle({
|
|
3169
|
+
id: 13,
|
|
3170
|
+
method: "resources/read",
|
|
3171
|
+
params: { uri: "roadmapper://rubric" },
|
|
3172
|
+
}),
|
|
3173
|
+
pass: (r) =>
|
|
3174
|
+
r?.result?.contents?.[0]?.text?.includes("# AGENTS.md") &&
|
|
3175
|
+
session.rubricFetchedAt !== null,
|
|
3176
|
+
},
|
|
3177
|
+
{
|
|
3178
|
+
name: "prompts/list returns the three prompts",
|
|
3179
|
+
fn: () => handle({ id: 14, method: "prompts/list", params: {} }),
|
|
3180
|
+
pass: (r) =>
|
|
3181
|
+
Array.isArray(r?.result?.prompts) &&
|
|
3182
|
+
r.result.prompts.length === PROMPTS.length &&
|
|
3183
|
+
r.result.prompts.some((p) => p.name === "plan-feature"),
|
|
3184
|
+
},
|
|
3185
|
+
{
|
|
3186
|
+
name: "prompts/get plan-feature expands the template",
|
|
3187
|
+
fn: () =>
|
|
3188
|
+
handle({
|
|
3189
|
+
id: 15,
|
|
3190
|
+
method: "prompts/get",
|
|
3191
|
+
params: {
|
|
3192
|
+
name: "plan-feature",
|
|
3193
|
+
arguments: { description: "demo description" },
|
|
3194
|
+
},
|
|
3195
|
+
}),
|
|
3196
|
+
pass: (r) =>
|
|
3197
|
+
r?.result?.messages?.[0]?.content?.text?.includes(
|
|
3198
|
+
"suggest_capability_for"
|
|
3199
|
+
) &&
|
|
3200
|
+
r.result.messages[0].content.text.includes("demo description"),
|
|
3201
|
+
},
|
|
3202
|
+
{
|
|
3203
|
+
name: "tools/list",
|
|
3204
|
+
fn: () => handle({ id: 2, method: "tools/list", params: {} }),
|
|
3205
|
+
pass: (r) =>
|
|
3206
|
+
Array.isArray(r?.result?.tools) && r.result.tools.length === TOOLS.length,
|
|
3207
|
+
},
|
|
3208
|
+
{
|
|
3209
|
+
name: "list_themes",
|
|
3210
|
+
fn: () =>
|
|
3211
|
+
handle({
|
|
3212
|
+
id: 3,
|
|
3213
|
+
method: "tools/call",
|
|
3214
|
+
params: { name: "list_themes", arguments: {} },
|
|
3215
|
+
}),
|
|
3216
|
+
pass: (r) => !!r?.result?.content?.[0]?.text?.includes(aTheme),
|
|
3217
|
+
},
|
|
3218
|
+
{
|
|
3219
|
+
name: `list_capabilities themeId=${aTheme}`,
|
|
3220
|
+
fn: () =>
|
|
3221
|
+
handle({
|
|
3222
|
+
id: 4,
|
|
3223
|
+
method: "tools/call",
|
|
3224
|
+
params: {
|
|
3225
|
+
name: "list_capabilities",
|
|
3226
|
+
arguments: { themeId: aTheme },
|
|
3227
|
+
},
|
|
3228
|
+
}),
|
|
3229
|
+
pass: (r) => r?.result && !r.result.isError,
|
|
3230
|
+
},
|
|
3231
|
+
{
|
|
3232
|
+
name: "list_tasks status=delivered",
|
|
3233
|
+
fn: () =>
|
|
3234
|
+
handle({
|
|
3235
|
+
id: 5,
|
|
3236
|
+
method: "tools/call",
|
|
3237
|
+
params: {
|
|
3238
|
+
name: "list_tasks",
|
|
3239
|
+
arguments: { status: "delivered" },
|
|
3240
|
+
},
|
|
3241
|
+
}),
|
|
3242
|
+
pass: (r) => r?.result && !r.result.isError,
|
|
3243
|
+
},
|
|
3244
|
+
{
|
|
3245
|
+
// Phase-1 archive smoke: with no archived rows in the seed,
|
|
3246
|
+
// includeArchived: true should still return the full set —
|
|
3247
|
+
// and crucially the code path doesn't throw. Once Phase 2
|
|
3248
|
+
// ships writes, a future check can assert that an archived
|
|
3249
|
+
// task is hidden by default and visible with the flag.
|
|
3250
|
+
name: "list_tasks includeArchived=true (smoke)",
|
|
3251
|
+
fn: () =>
|
|
3252
|
+
handle({
|
|
3253
|
+
id: 51,
|
|
3254
|
+
method: "tools/call",
|
|
3255
|
+
params: {
|
|
3256
|
+
name: "list_tasks",
|
|
3257
|
+
arguments: { includeArchived: true },
|
|
3258
|
+
},
|
|
3259
|
+
}),
|
|
3260
|
+
pass: (r) => r?.result && !r.result.isError,
|
|
3261
|
+
},
|
|
3262
|
+
{
|
|
3263
|
+
name: `get_task id=${aTask}`,
|
|
3264
|
+
fn: () =>
|
|
3265
|
+
handle({
|
|
3266
|
+
id: 6,
|
|
3267
|
+
method: "tools/call",
|
|
3268
|
+
params: { name: "get_task", arguments: { id: aTask } },
|
|
3269
|
+
}),
|
|
3270
|
+
pass: (r) =>
|
|
3271
|
+
!r?.result?.isError &&
|
|
3272
|
+
r?.result?.content?.[0]?.text?.includes(`"id": "${aTask}"`),
|
|
3273
|
+
},
|
|
3274
|
+
{
|
|
3275
|
+
name: "get_task (bogus id returns error result)",
|
|
3276
|
+
fn: () =>
|
|
3277
|
+
handle({
|
|
3278
|
+
id: 7,
|
|
3279
|
+
method: "tools/call",
|
|
3280
|
+
params: {
|
|
3281
|
+
name: "get_task",
|
|
3282
|
+
arguments: { id: "TK-NOPE" },
|
|
3283
|
+
},
|
|
3284
|
+
}),
|
|
3285
|
+
pass: (r) => r?.result?.isError === true,
|
|
3286
|
+
},
|
|
3287
|
+
{
|
|
3288
|
+
name: "get_agents_md",
|
|
3289
|
+
fn: () =>
|
|
3290
|
+
handle({
|
|
3291
|
+
id: 8,
|
|
3292
|
+
method: "tools/call",
|
|
3293
|
+
params: { name: "get_agents_md", arguments: {} },
|
|
3294
|
+
}),
|
|
3295
|
+
pass: (r) => r?.result?.content?.[0]?.text?.includes("# AGENTS.md"),
|
|
3296
|
+
},
|
|
3297
|
+
{
|
|
3298
|
+
name: "propose_task (bad capabilityId returns error result)",
|
|
3299
|
+
fn: () =>
|
|
3300
|
+
handle({
|
|
3301
|
+
id: 9,
|
|
3302
|
+
method: "tools/call",
|
|
3303
|
+
params: {
|
|
3304
|
+
name: "propose_task",
|
|
3305
|
+
arguments: {
|
|
3306
|
+
capabilityId: "CAP-NOPE",
|
|
3307
|
+
title: "Should fail",
|
|
3308
|
+
},
|
|
3309
|
+
},
|
|
3310
|
+
}),
|
|
3311
|
+
pass: (r) => r?.result?.isError === true,
|
|
3312
|
+
},
|
|
3313
|
+
{
|
|
3314
|
+
name: "propose_task (valid args, no service key) errors cleanly",
|
|
3315
|
+
fn: () =>
|
|
3316
|
+
handle({
|
|
3317
|
+
id: 10,
|
|
3318
|
+
method: "tools/call",
|
|
3319
|
+
params: {
|
|
3320
|
+
name: "propose_task",
|
|
3321
|
+
arguments: { capabilityId: aCap, title: "Selftest task" },
|
|
3322
|
+
},
|
|
3323
|
+
}),
|
|
3324
|
+
// Without SUPABASE_SERVICE_ROLE_KEY this must return an error result
|
|
3325
|
+
// (not throw). With the key set, this would actually write — so we
|
|
3326
|
+
// only assert the no-key path here.
|
|
3327
|
+
pass: (r) =>
|
|
3328
|
+
process.env.SUPABASE_SERVICE_ROLE_KEY
|
|
3329
|
+
? r?.result && !r.result.isError
|
|
3330
|
+
: r?.result?.isError === true,
|
|
3331
|
+
},
|
|
3332
|
+
{
|
|
3333
|
+
name: "propose_theme (missing name returns error result)",
|
|
3334
|
+
fn: () =>
|
|
3335
|
+
handle({
|
|
3336
|
+
id: 11,
|
|
3337
|
+
method: "tools/call",
|
|
3338
|
+
params: { name: "propose_theme", arguments: { name: "" } },
|
|
3339
|
+
}),
|
|
3340
|
+
pass: (r) => r?.result?.isError === true,
|
|
3341
|
+
},
|
|
3342
|
+
{
|
|
3343
|
+
name: "propose_capability (unknown pillarId returns error result)",
|
|
3344
|
+
fn: () =>
|
|
3345
|
+
handle({
|
|
3346
|
+
id: 12,
|
|
3347
|
+
method: "tools/call",
|
|
3348
|
+
params: {
|
|
3349
|
+
name: "propose_capability",
|
|
3350
|
+
arguments: { name: "bogus", pillarId: "TH-DOES-NOT-EXIST" },
|
|
3351
|
+
},
|
|
3352
|
+
}),
|
|
3353
|
+
pass: (r) => r?.result?.isError === true,
|
|
3354
|
+
},
|
|
3355
|
+
{
|
|
3356
|
+
name: "propose_capability (invalid impact returns error result)",
|
|
3357
|
+
fn: () =>
|
|
3358
|
+
handle({
|
|
3359
|
+
id: 13,
|
|
3360
|
+
method: "tools/call",
|
|
3361
|
+
params: {
|
|
3362
|
+
name: "propose_capability",
|
|
3363
|
+
arguments: { name: "bad-impact", pillarId: aTheme, impact: 7 },
|
|
3364
|
+
},
|
|
3365
|
+
}),
|
|
3366
|
+
pass: (r) => r?.result?.isError === true,
|
|
3367
|
+
},
|
|
3368
|
+
{
|
|
3369
|
+
name: "propose_capability (valid args, no service key) errors cleanly",
|
|
3370
|
+
fn: () =>
|
|
3371
|
+
handle({
|
|
3372
|
+
id: 14,
|
|
3373
|
+
method: "tools/call",
|
|
3374
|
+
params: {
|
|
3375
|
+
name: "propose_capability",
|
|
3376
|
+
arguments: {
|
|
3377
|
+
name: "Selftest capability example",
|
|
3378
|
+
pillarId: aTheme,
|
|
3379
|
+
outcome:
|
|
3380
|
+
"Selftest metric moves from 0 to 10 by 2026-12-31, measured by selftest_event.",
|
|
3381
|
+
},
|
|
3382
|
+
},
|
|
3383
|
+
}),
|
|
3384
|
+
pass: (r) =>
|
|
3385
|
+
process.env.SUPABASE_SERVICE_ROLE_KEY
|
|
3386
|
+
? r?.result && !r.result.isError
|
|
3387
|
+
: r?.result?.isError === true,
|
|
3388
|
+
},
|
|
3389
|
+
{
|
|
3390
|
+
name: "propose_capability (empty outcome rejected by validator)",
|
|
3391
|
+
fn: () =>
|
|
3392
|
+
handle({
|
|
3393
|
+
id: 15,
|
|
3394
|
+
method: "tools/call",
|
|
3395
|
+
params: {
|
|
3396
|
+
name: "propose_capability",
|
|
3397
|
+
arguments: {
|
|
3398
|
+
name: "Selftest capability example",
|
|
3399
|
+
pillarId: aTheme,
|
|
3400
|
+
outcome: "",
|
|
3401
|
+
},
|
|
3402
|
+
},
|
|
3403
|
+
}),
|
|
3404
|
+
pass: (r) => r?.result?.isError === true,
|
|
3405
|
+
},
|
|
3406
|
+
{
|
|
3407
|
+
name: "propose_capability (non-falsifiable outcome rejected)",
|
|
3408
|
+
fn: () =>
|
|
3409
|
+
handle({
|
|
3410
|
+
id: 16,
|
|
3411
|
+
method: "tools/call",
|
|
3412
|
+
params: {
|
|
3413
|
+
name: "propose_capability",
|
|
3414
|
+
arguments: {
|
|
3415
|
+
name: "Selftest capability example",
|
|
3416
|
+
pillarId: aTheme,
|
|
3417
|
+
outcome: "Make the thing better.",
|
|
3418
|
+
},
|
|
3419
|
+
},
|
|
3420
|
+
}),
|
|
3421
|
+
pass: (r) => r?.result?.isError === true,
|
|
3422
|
+
},
|
|
3423
|
+
{
|
|
3424
|
+
// Regression guard for the over-lax month-name match. Outcome
|
|
3425
|
+
// contains a digit ("50%") but no real date — just the verb
|
|
3426
|
+
// "may". Should be rejected, not pass via the month branch.
|
|
3427
|
+
name: "propose_capability (bare month name without a digit-after rejected)",
|
|
3428
|
+
fn: () =>
|
|
3429
|
+
handle({
|
|
3430
|
+
id: 161,
|
|
3431
|
+
method: "tools/call",
|
|
3432
|
+
params: {
|
|
3433
|
+
name: "propose_capability",
|
|
3434
|
+
arguments: {
|
|
3435
|
+
name: "Selftest capability example",
|
|
3436
|
+
pillarId: aTheme,
|
|
3437
|
+
outcome: "We may improve activation from 30% to 50% if all goes well.",
|
|
3438
|
+
},
|
|
3439
|
+
},
|
|
3440
|
+
}),
|
|
3441
|
+
pass: (r) => r?.result?.isError === true,
|
|
3442
|
+
},
|
|
3443
|
+
{
|
|
3444
|
+
name: "propose_capability (confidence 100 rejected by validator)",
|
|
3445
|
+
fn: () =>
|
|
3446
|
+
handle({
|
|
3447
|
+
id: 17,
|
|
3448
|
+
method: "tools/call",
|
|
3449
|
+
params: {
|
|
3450
|
+
name: "propose_capability",
|
|
3451
|
+
arguments: {
|
|
3452
|
+
name: "Selftest capability example",
|
|
3453
|
+
pillarId: aTheme,
|
|
3454
|
+
outcome: "Metric moves from 0 to 5 by 2026-09-30, measured by event.",
|
|
3455
|
+
confidence: 100,
|
|
3456
|
+
},
|
|
3457
|
+
},
|
|
3458
|
+
}),
|
|
3459
|
+
pass: (r) => r?.result?.isError === true,
|
|
3460
|
+
},
|
|
3461
|
+
{
|
|
3462
|
+
name: "propose_capability dryRun returns wouldCreate without writing",
|
|
3463
|
+
fn: () =>
|
|
3464
|
+
handle({
|
|
3465
|
+
id: 18,
|
|
3466
|
+
method: "tools/call",
|
|
3467
|
+
params: {
|
|
3468
|
+
name: "propose_capability",
|
|
3469
|
+
arguments: {
|
|
3470
|
+
name: "Selftest dry run capability",
|
|
3471
|
+
pillarId: aTheme,
|
|
3472
|
+
outcome:
|
|
3473
|
+
"Metric moves from 0 to 5 by 2026-09-30, measured by event.",
|
|
3474
|
+
dryRun: true,
|
|
3475
|
+
},
|
|
3476
|
+
},
|
|
3477
|
+
}),
|
|
3478
|
+
// dryRun: works regardless of whether service key is set
|
|
3479
|
+
pass: (r) =>
|
|
3480
|
+
!r?.result?.isError &&
|
|
3481
|
+
r?.result?.content?.[0]?.text?.includes('"dryRun": true'),
|
|
3482
|
+
},
|
|
3483
|
+
{
|
|
3484
|
+
name: "suggest_capability_for (returns matches sorted by score)",
|
|
3485
|
+
fn: () =>
|
|
3486
|
+
handle({
|
|
3487
|
+
id: 19,
|
|
3488
|
+
method: "tools/call",
|
|
3489
|
+
params: {
|
|
3490
|
+
name: "suggest_capability_for",
|
|
3491
|
+
arguments: { description: "example capability" },
|
|
3492
|
+
},
|
|
3493
|
+
}),
|
|
3494
|
+
pass: (r) =>
|
|
3495
|
+
!r?.result?.isError &&
|
|
3496
|
+
r?.result?.content?.[0]?.text?.includes('"matches"'),
|
|
3497
|
+
},
|
|
3498
|
+
{
|
|
3499
|
+
name: "suggest_capability_for (empty description rejected)",
|
|
3500
|
+
fn: () =>
|
|
3501
|
+
handle({
|
|
3502
|
+
id: 20,
|
|
3503
|
+
method: "tools/call",
|
|
3504
|
+
params: {
|
|
3505
|
+
name: "suggest_capability_for",
|
|
3506
|
+
arguments: { description: "" },
|
|
3507
|
+
},
|
|
3508
|
+
}),
|
|
3509
|
+
pass: (r) => r?.result?.isError === true,
|
|
3510
|
+
},
|
|
3511
|
+
{
|
|
3512
|
+
// suggest_theme_for is the theme-level mirror — same shape,
|
|
3513
|
+
// returns ranked matches against an arbitrary description.
|
|
3514
|
+
name: "suggest_theme_for (returns matches sorted by score)",
|
|
3515
|
+
fn: () =>
|
|
3516
|
+
handle({
|
|
3517
|
+
id: 30,
|
|
3518
|
+
method: "tools/call",
|
|
3519
|
+
params: {
|
|
3520
|
+
name: "suggest_theme_for",
|
|
3521
|
+
arguments: { description: "example theme" },
|
|
3522
|
+
},
|
|
3523
|
+
}),
|
|
3524
|
+
pass: (r) =>
|
|
3525
|
+
!r?.result?.isError &&
|
|
3526
|
+
r?.result?.content?.[0]?.text?.includes('"matches"'),
|
|
3527
|
+
},
|
|
3528
|
+
{
|
|
3529
|
+
name: "suggest_theme_for (empty description rejected)",
|
|
3530
|
+
fn: () =>
|
|
3531
|
+
handle({
|
|
3532
|
+
id: 31,
|
|
3533
|
+
method: "tools/call",
|
|
3534
|
+
params: {
|
|
3535
|
+
name: "suggest_theme_for",
|
|
3536
|
+
arguments: { description: "" },
|
|
3537
|
+
},
|
|
3538
|
+
}),
|
|
3539
|
+
pass: (r) => r?.result?.isError === true,
|
|
3540
|
+
},
|
|
3541
|
+
{
|
|
3542
|
+
// suggest_theme_for satisfies the propose_theme discovery
|
|
3543
|
+
// gate the same way suggest_capability_for satisfies the
|
|
3544
|
+
// propose_capability gate. After the call, themesListedAt
|
|
3545
|
+
// should be populated.
|
|
3546
|
+
name: "suggest_theme_for satisfies propose_theme discovery gate",
|
|
3547
|
+
fn: async () => {
|
|
3548
|
+
resetSession();
|
|
3549
|
+
session.rubricFetchedAt = Date.now();
|
|
3550
|
+
await handle({
|
|
3551
|
+
id: 32,
|
|
3552
|
+
method: "tools/call",
|
|
3553
|
+
params: {
|
|
3554
|
+
name: "suggest_theme_for",
|
|
3555
|
+
arguments: { description: "any" },
|
|
3556
|
+
},
|
|
3557
|
+
});
|
|
3558
|
+
return { themesListedAt: session.themesListedAt };
|
|
3559
|
+
},
|
|
3560
|
+
pass: (r) => r?.themesListedAt !== null,
|
|
3561
|
+
},
|
|
3562
|
+
{
|
|
3563
|
+
name: "link_pr (unknown task rejected)",
|
|
3564
|
+
fn: () =>
|
|
3565
|
+
handle({
|
|
3566
|
+
id: 21,
|
|
3567
|
+
method: "tools/call",
|
|
3568
|
+
params: {
|
|
3569
|
+
name: "link_pr",
|
|
3570
|
+
arguments: { taskId: "TK-NOPE", repo: "x/y", number: 1 },
|
|
3571
|
+
},
|
|
3572
|
+
}),
|
|
3573
|
+
pass: (r) => r?.result?.isError === true,
|
|
3574
|
+
},
|
|
3575
|
+
{
|
|
3576
|
+
name: "link_pr (valid args, no service key errors cleanly)",
|
|
3577
|
+
fn: () =>
|
|
3578
|
+
handle({
|
|
3579
|
+
id: 22,
|
|
3580
|
+
method: "tools/call",
|
|
3581
|
+
params: {
|
|
3582
|
+
name: "link_pr",
|
|
3583
|
+
arguments: { taskId: aTask, repo: "x/y", number: 1 },
|
|
3584
|
+
},
|
|
3585
|
+
}),
|
|
3586
|
+
pass: (r) =>
|
|
3587
|
+
process.env.SUPABASE_SERVICE_ROLE_KEY
|
|
3588
|
+
? r?.result && !r.result.isError
|
|
3589
|
+
: r?.result?.isError === true,
|
|
3590
|
+
},
|
|
3591
|
+
{
|
|
3592
|
+
// Reason is required on archive/unarchive — empty reason
|
|
3593
|
+
// must return an error result regardless of service-key
|
|
3594
|
+
// availability.
|
|
3595
|
+
name: "archive_task (missing reason returns error result)",
|
|
3596
|
+
fn: () =>
|
|
3597
|
+
handle({
|
|
3598
|
+
id: 23,
|
|
3599
|
+
method: "tools/call",
|
|
3600
|
+
params: {
|
|
3601
|
+
name: "archive_task",
|
|
3602
|
+
arguments: { taskId: aTask, reason: "" },
|
|
3603
|
+
},
|
|
3604
|
+
}),
|
|
3605
|
+
pass: (r) => r?.result?.isError === true,
|
|
3606
|
+
},
|
|
3607
|
+
{
|
|
3608
|
+
name: "archive_capability (missing reason returns error result)",
|
|
3609
|
+
fn: () =>
|
|
3610
|
+
handle({
|
|
3611
|
+
id: 24,
|
|
3612
|
+
method: "tools/call",
|
|
3613
|
+
params: {
|
|
3614
|
+
name: "archive_capability",
|
|
3615
|
+
arguments: { capabilityId: aCap, reason: " " },
|
|
3616
|
+
},
|
|
3617
|
+
}),
|
|
3618
|
+
pass: (r) => r?.result?.isError === true,
|
|
3619
|
+
},
|
|
3620
|
+
{
|
|
3621
|
+
name: "unarchive_theme (missing themeId returns error result)",
|
|
3622
|
+
fn: () =>
|
|
3623
|
+
handle({
|
|
3624
|
+
id: 25,
|
|
3625
|
+
method: "tools/call",
|
|
3626
|
+
params: { name: "unarchive_theme", arguments: { reason: "x" } },
|
|
3627
|
+
}),
|
|
3628
|
+
pass: (r) => r?.result?.isError === true,
|
|
3629
|
+
},
|
|
3630
|
+
{
|
|
3631
|
+
// Move validation: missing newCapabilityId must error out.
|
|
3632
|
+
name: "move_task (missing newCapabilityId returns error result)",
|
|
3633
|
+
fn: () =>
|
|
3634
|
+
handle({
|
|
3635
|
+
id: 26,
|
|
3636
|
+
method: "tools/call",
|
|
3637
|
+
params: {
|
|
3638
|
+
name: "move_task",
|
|
3639
|
+
arguments: { taskId: aTask, reason: "reorg" },
|
|
3640
|
+
},
|
|
3641
|
+
}),
|
|
3642
|
+
pass: (r) => r?.result?.isError === true,
|
|
3643
|
+
},
|
|
3644
|
+
{
|
|
3645
|
+
name: "move_capability (missing reason returns error result)",
|
|
3646
|
+
fn: () =>
|
|
3647
|
+
handle({
|
|
3648
|
+
id: 27,
|
|
3649
|
+
method: "tools/call",
|
|
3650
|
+
params: {
|
|
3651
|
+
name: "move_capability",
|
|
3652
|
+
arguments: { capabilityId: aCap, newThemeId: aTheme, reason: "" },
|
|
3653
|
+
},
|
|
3654
|
+
}),
|
|
3655
|
+
pass: (r) => r?.result?.isError === true,
|
|
3656
|
+
},
|
|
3657
|
+
{
|
|
3658
|
+
// Bulk shape: oversize batch rejected pre-flight, doesn't call SQL.
|
|
3659
|
+
name: "move_tasks (over 100-item cap returns error result)",
|
|
3660
|
+
fn: () => {
|
|
3661
|
+
const moves = Array.from({ length: 101 }, (_, i) => ({
|
|
3662
|
+
taskId: `TK-${String(i).padStart(6, "0")}`,
|
|
3663
|
+
newCapabilityId: aCap,
|
|
3664
|
+
}));
|
|
3665
|
+
return handle({
|
|
3666
|
+
id: 28,
|
|
3667
|
+
method: "tools/call",
|
|
3668
|
+
params: { name: "move_tasks", arguments: { moves, reason: "reorg" } },
|
|
3669
|
+
});
|
|
3670
|
+
},
|
|
3671
|
+
pass: (r) => r?.result?.isError === true,
|
|
3672
|
+
},
|
|
3673
|
+
{
|
|
3674
|
+
name: "move_capabilities (empty moves returns error result)",
|
|
3675
|
+
fn: () =>
|
|
3676
|
+
handle({
|
|
3677
|
+
id: 29,
|
|
3678
|
+
method: "tools/call",
|
|
3679
|
+
params: {
|
|
3680
|
+
name: "move_capabilities",
|
|
3681
|
+
arguments: { moves: [], reason: "reorg" },
|
|
3682
|
+
},
|
|
3683
|
+
}),
|
|
3684
|
+
pass: (r) => r?.result?.isError === true,
|
|
3685
|
+
},
|
|
3686
|
+
{
|
|
3687
|
+
// Schema-level: tools/list must advertise the four move tools.
|
|
3688
|
+
name: "tools/list advertises four move tools",
|
|
3689
|
+
fn: () => handle({ id: 30, method: "tools/list", params: {} }),
|
|
3690
|
+
pass: (r) => {
|
|
3691
|
+
const names = (r?.result?.tools ?? []).map((t) => t.name);
|
|
3692
|
+
return ["move_task", "move_capability", "move_tasks", "move_capabilities"].every((n) =>
|
|
3693
|
+
names.includes(n)
|
|
3694
|
+
);
|
|
3695
|
+
},
|
|
3696
|
+
},
|
|
3697
|
+
{
|
|
3698
|
+
// Update validation: missing patch.
|
|
3699
|
+
name: "update_task (missing patch returns error result)",
|
|
3700
|
+
fn: () =>
|
|
3701
|
+
handle({
|
|
3702
|
+
id: 31,
|
|
3703
|
+
method: "tools/call",
|
|
3704
|
+
params: { name: "update_task", arguments: { taskId: aTask, reason: "r" } },
|
|
3705
|
+
}),
|
|
3706
|
+
pass: (r) => r?.result?.isError === true,
|
|
3707
|
+
},
|
|
3708
|
+
{
|
|
3709
|
+
// Update validation: empty patch.
|
|
3710
|
+
name: "update_capability (empty patch returns error result)",
|
|
3711
|
+
fn: () =>
|
|
3712
|
+
handle({
|
|
3713
|
+
id: 32,
|
|
3714
|
+
method: "tools/call",
|
|
3715
|
+
params: {
|
|
3716
|
+
name: "update_capability",
|
|
3717
|
+
arguments: { capabilityId: aCap, patch: {}, reason: "r" },
|
|
3718
|
+
},
|
|
3719
|
+
}),
|
|
3720
|
+
pass: (r) => r?.result?.isError === true,
|
|
3721
|
+
},
|
|
3722
|
+
{
|
|
3723
|
+
// UP3: invalid status rejected client-side before SQL.
|
|
3724
|
+
name: "update_task (invalid status rejected by validator)",
|
|
3725
|
+
fn: () =>
|
|
3726
|
+
handle({
|
|
3727
|
+
id: 33,
|
|
3728
|
+
method: "tools/call",
|
|
3729
|
+
params: {
|
|
3730
|
+
name: "update_task",
|
|
3731
|
+
arguments: {
|
|
3732
|
+
taskId: aTask,
|
|
3733
|
+
patch: { status: "shipped" },
|
|
3734
|
+
reason: "advance",
|
|
3735
|
+
},
|
|
3736
|
+
},
|
|
3737
|
+
}),
|
|
3738
|
+
pass: (r) => {
|
|
3739
|
+
const txt = r?.result?.content?.[0]?.text ?? "";
|
|
3740
|
+
return r?.result?.isError === true && txt.includes("invalid status");
|
|
3741
|
+
},
|
|
3742
|
+
},
|
|
3743
|
+
{
|
|
3744
|
+
// UP3: invalid confidence (>95) rejected by validator.
|
|
3745
|
+
name: "update_capability (confidence 99 rejected by validator)",
|
|
3746
|
+
fn: () =>
|
|
3747
|
+
handle({
|
|
3748
|
+
id: 34,
|
|
3749
|
+
method: "tools/call",
|
|
3750
|
+
params: {
|
|
3751
|
+
name: "update_capability",
|
|
3752
|
+
arguments: {
|
|
3753
|
+
capabilityId: aCap,
|
|
3754
|
+
patch: { confidence: 99 },
|
|
3755
|
+
reason: "bump",
|
|
3756
|
+
},
|
|
3757
|
+
},
|
|
3758
|
+
}),
|
|
3759
|
+
pass: (r) => r?.result?.isError === true,
|
|
3760
|
+
},
|
|
3761
|
+
{
|
|
3762
|
+
// Schema-level: parent fields are blocked at JSON-schema layer
|
|
3763
|
+
// (additionalProperties:false on patch). Without service key
|
|
3764
|
+
// we won't reach SQL, but the schema rejects it pre-call.
|
|
3765
|
+
name: "tools/list advertises three update tools",
|
|
3766
|
+
fn: () => handle({ id: 35, method: "tools/list", params: {} }),
|
|
3767
|
+
pass: (r) => {
|
|
3768
|
+
const names = (r?.result?.tools ?? []).map((t) => t.name);
|
|
3769
|
+
return ["update_task", "update_capability", "update_theme"].every((n) =>
|
|
3770
|
+
names.includes(n)
|
|
3771
|
+
);
|
|
3772
|
+
},
|
|
3773
|
+
},
|
|
3774
|
+
{
|
|
3775
|
+
// Cross-workspace guard fires when snapshot.json names workspace
|
|
3776
|
+
// A and a mutator call carries workspaceId=B. Cleanup is in
|
|
3777
|
+
// finally so a thrown handle() doesn't leave the cache pinned.
|
|
3778
|
+
name: "cross-workspace write refused when snapshot conflicts with arg",
|
|
3779
|
+
fn: async () => {
|
|
3780
|
+
try {
|
|
3781
|
+
__setSnapshotWorkspaceForTest("ws-cwd");
|
|
3782
|
+
return await handle({
|
|
3783
|
+
id: 36,
|
|
3784
|
+
method: "tools/call",
|
|
3785
|
+
params: {
|
|
3786
|
+
name: "archive_task",
|
|
3787
|
+
arguments: {
|
|
3788
|
+
taskId: aTask,
|
|
3789
|
+
reason: "cross-workspace probe",
|
|
3790
|
+
workspaceId: "ws-other",
|
|
3791
|
+
},
|
|
3792
|
+
},
|
|
3793
|
+
});
|
|
3794
|
+
} finally {
|
|
3795
|
+
__setSnapshotWorkspaceForTest(undefined);
|
|
3796
|
+
}
|
|
3797
|
+
},
|
|
3798
|
+
pass: (r) => {
|
|
3799
|
+
if (!r?.result?.isError) return false;
|
|
3800
|
+
const txt = r.result.content?.[0]?.text ?? "";
|
|
3801
|
+
return (
|
|
3802
|
+
txt.includes("Refusing cross-workspace") &&
|
|
3803
|
+
txt.includes("ws-cwd") &&
|
|
3804
|
+
txt.includes("ws-other")
|
|
3805
|
+
);
|
|
3806
|
+
},
|
|
3807
|
+
},
|
|
3808
|
+
{
|
|
3809
|
+
// Matching workspaceId arg — guard passes, call continues. The
|
|
3810
|
+
// tool reaches archiveLifecycle / rpcCall which fails with the
|
|
3811
|
+
// missing-service-key error (or in selftest mode without env,
|
|
3812
|
+
// some other downstream error). The key assertion: NOT the
|
|
3813
|
+
// cross-workspace refusal — proves guard ran and let through.
|
|
3814
|
+
name: "matching workspaceId arg passes the cross-workspace guard",
|
|
3815
|
+
fn: async () => {
|
|
3816
|
+
try {
|
|
3817
|
+
__setSnapshotWorkspaceForTest("ws-cwd");
|
|
3818
|
+
return await handle({
|
|
3819
|
+
id: 37,
|
|
3820
|
+
method: "tools/call",
|
|
3821
|
+
params: {
|
|
3822
|
+
name: "archive_task",
|
|
3823
|
+
arguments: {
|
|
3824
|
+
taskId: aTask,
|
|
3825
|
+
reason: "same-workspace probe",
|
|
3826
|
+
workspaceId: "ws-cwd",
|
|
3827
|
+
},
|
|
3828
|
+
},
|
|
3829
|
+
});
|
|
3830
|
+
} finally {
|
|
3831
|
+
__setSnapshotWorkspaceForTest(undefined);
|
|
3832
|
+
}
|
|
3833
|
+
},
|
|
3834
|
+
pass: (r) => {
|
|
3835
|
+
// Must be an error result (no service key) but specifically
|
|
3836
|
+
// NOT the cross-workspace refusal. The two non-cross-workspace
|
|
3837
|
+
// errors we can land on are the missing-service-key error or
|
|
3838
|
+
// a workspace-resolution error — both prove the guard
|
|
3839
|
+
// accepted and the request reached downstream code.
|
|
3840
|
+
if (!r?.result?.isError) return false;
|
|
3841
|
+
const txt = r.result.content?.[0]?.text ?? "";
|
|
3842
|
+
if (txt.includes("Refusing cross-workspace")) return false;
|
|
3843
|
+
return (
|
|
3844
|
+
txt.includes("SUPABASE_SERVICE_ROLE_KEY") ||
|
|
3845
|
+
txt.includes("workspaceId could not be resolved") ||
|
|
3846
|
+
txt.includes("Write tools require")
|
|
3847
|
+
);
|
|
3848
|
+
},
|
|
3849
|
+
},
|
|
3850
|
+
{
|
|
3851
|
+
// record_outcome_reading rejects missing value.
|
|
3852
|
+
name: "record_outcome_reading (missing value returns error result)",
|
|
3853
|
+
fn: () =>
|
|
3854
|
+
handle({
|
|
3855
|
+
id: 39,
|
|
3856
|
+
method: "tools/call",
|
|
3857
|
+
params: {
|
|
3858
|
+
name: "record_outcome_reading",
|
|
3859
|
+
arguments: { capabilityId: aCap, asOf: "2026-05-12", source: "test" },
|
|
3860
|
+
},
|
|
3861
|
+
}),
|
|
3862
|
+
pass: (r) => r?.result?.isError === true,
|
|
3863
|
+
},
|
|
3864
|
+
{
|
|
3865
|
+
// record_outcome_reading rejects missing source.
|
|
3866
|
+
name: "record_outcome_reading (missing source returns error result)",
|
|
3867
|
+
fn: () =>
|
|
3868
|
+
handle({
|
|
3869
|
+
id: 40,
|
|
3870
|
+
method: "tools/call",
|
|
3871
|
+
params: {
|
|
3872
|
+
name: "record_outcome_reading",
|
|
3873
|
+
arguments: { capabilityId: aCap, value: 0.5, asOf: "2026-05-12" },
|
|
3874
|
+
},
|
|
3875
|
+
}),
|
|
3876
|
+
pass: (r) => r?.result?.isError === true,
|
|
3877
|
+
},
|
|
3878
|
+
{
|
|
3879
|
+
// list_stale_outcomes is a read tool — should return without
|
|
3880
|
+
// service-role and surface a count field.
|
|
3881
|
+
name: "list_stale_outcomes returns a structured stale list",
|
|
3882
|
+
fn: () =>
|
|
3883
|
+
handle({
|
|
3884
|
+
id: 41,
|
|
3885
|
+
method: "tools/call",
|
|
3886
|
+
params: { name: "list_stale_outcomes", arguments: {} },
|
|
3887
|
+
}),
|
|
3888
|
+
pass: (r) => {
|
|
3889
|
+
if (r?.result?.isError) return false;
|
|
3890
|
+
const txt = r?.result?.content?.[0]?.text ?? "";
|
|
3891
|
+
try {
|
|
3892
|
+
const parsed = JSON.parse(txt);
|
|
3893
|
+
return (
|
|
3894
|
+
typeof parsed.thresholdDays === "number" &&
|
|
3895
|
+
typeof parsed.count === "number" &&
|
|
3896
|
+
Array.isArray(parsed.stale)
|
|
3897
|
+
);
|
|
3898
|
+
} catch {
|
|
3899
|
+
return false;
|
|
3900
|
+
}
|
|
3901
|
+
},
|
|
3902
|
+
},
|
|
3903
|
+
{
|
|
3904
|
+
// ROADMAPPER_ALLOW_CROSS_WORKSPACE=1 disables the guard. Env
|
|
3905
|
+
// cleanup in finally so a thrown handle() doesn't leak the
|
|
3906
|
+
// permissive flag into subsequent tests.
|
|
3907
|
+
name: "ROADMAPPER_ALLOW_CROSS_WORKSPACE=1 disables the cross-workspace guard",
|
|
3908
|
+
fn: async () => {
|
|
3909
|
+
try {
|
|
3910
|
+
__setSnapshotWorkspaceForTest("ws-cwd");
|
|
3911
|
+
process.env.ROADMAPPER_ALLOW_CROSS_WORKSPACE = "1";
|
|
3912
|
+
return await handle({
|
|
3913
|
+
id: 38,
|
|
3914
|
+
method: "tools/call",
|
|
3915
|
+
params: {
|
|
3916
|
+
name: "archive_task",
|
|
3917
|
+
arguments: {
|
|
3918
|
+
taskId: aTask,
|
|
3919
|
+
reason: "override probe",
|
|
3920
|
+
workspaceId: "ws-other",
|
|
3921
|
+
},
|
|
3922
|
+
},
|
|
3923
|
+
});
|
|
3924
|
+
} finally {
|
|
3925
|
+
__setSnapshotWorkspaceForTest(undefined);
|
|
3926
|
+
delete process.env.ROADMAPPER_ALLOW_CROSS_WORKSPACE;
|
|
3927
|
+
}
|
|
3928
|
+
},
|
|
3929
|
+
pass: (r) => {
|
|
3930
|
+
const txt = r?.result?.content?.[0]?.text ?? "";
|
|
3931
|
+
return !txt.includes("Refusing cross-workspace");
|
|
3932
|
+
},
|
|
3933
|
+
},
|
|
3934
|
+
];
|
|
3935
|
+
|
|
3936
|
+
let passed = 0;
|
|
3937
|
+
for (const c of checks) {
|
|
3938
|
+
let ok = false;
|
|
3939
|
+
let err = null;
|
|
3940
|
+
try {
|
|
3941
|
+
const r = await c.fn();
|
|
3942
|
+
ok = !!c.pass(r);
|
|
3943
|
+
} catch (e) {
|
|
3944
|
+
err = e.message;
|
|
3945
|
+
}
|
|
3946
|
+
const mark = ok ? "PASS" : "FAIL";
|
|
3947
|
+
log(`${mark} ${c.name}${err ? " — " + err : ""}`);
|
|
3948
|
+
if (ok) passed++;
|
|
3949
|
+
}
|
|
3950
|
+
log(`---`);
|
|
3951
|
+
log(`${passed}/${checks.length} checks passed.`);
|
|
3952
|
+
if (passed === checks.length) {
|
|
3953
|
+
log("");
|
|
3954
|
+
log("Server is healthy. If your MCP client doesn't see the");
|
|
3955
|
+
log("roadmapper tools after this passes, the client almost");
|
|
3956
|
+
log("certainly needs a full process restart — MCP servers are");
|
|
3957
|
+
log("connected at client startup. For Claude Code:");
|
|
3958
|
+
log(" /exit → relaunch `claude` → /mcp to verify");
|
|
3959
|
+
}
|
|
3960
|
+
process.exit(passed === checks.length ? 0 : 1);
|
|
3961
|
+
}
|
|
3962
|
+
|
|
3963
|
+
if (process.argv.includes("--selftest")) {
|
|
3964
|
+
runSelftest();
|
|
3965
|
+
} else {
|
|
3966
|
+
let buf = "";
|
|
3967
|
+
process.stdin.setEncoding("utf-8");
|
|
3968
|
+
process.stdin.on("data", async (chunk) => {
|
|
3969
|
+
buf += chunk;
|
|
3970
|
+
let nl;
|
|
3971
|
+
while ((nl = buf.indexOf("\n")) >= 0) {
|
|
3972
|
+
const line = buf.slice(0, nl).trim();
|
|
3973
|
+
buf = buf.slice(nl + 1);
|
|
3974
|
+
if (!line) continue;
|
|
3975
|
+
let msg;
|
|
3976
|
+
try {
|
|
3977
|
+
msg = JSON.parse(line);
|
|
3978
|
+
} catch {
|
|
3979
|
+
log("bad json", line.slice(0, 200));
|
|
3980
|
+
continue;
|
|
3981
|
+
}
|
|
3982
|
+
const response = await handle(msg);
|
|
3983
|
+
if (response) send(response);
|
|
3984
|
+
}
|
|
3985
|
+
});
|
|
3986
|
+
|
|
3987
|
+
process.stdin.on("end", () => process.exit(0));
|
|
3988
|
+
const { url, readKey: rk, writeKey } = supabaseConfig();
|
|
3989
|
+
const mode = url && rk
|
|
3990
|
+
? writeKey
|
|
3991
|
+
? "supabase (rw)"
|
|
3992
|
+
: "supabase (ro)"
|
|
3993
|
+
: "seed-only";
|
|
3994
|
+
|
|
3995
|
+
// Boot-time snapshot of the roadmap shape — gives the operator a
|
|
3996
|
+
// fast sanity check that the MCP can read the right workspace.
|
|
3997
|
+
// Errors here are swallowed so a flaky network doesn't keep the
|
|
3998
|
+
// server from booting.
|
|
3999
|
+
(async () => {
|
|
4000
|
+
let stats = null;
|
|
4001
|
+
try {
|
|
4002
|
+
const projected =
|
|
4003
|
+
(await readWorkspaceProjected()) ?? project(readSeed(), {});
|
|
4004
|
+
stats = {
|
|
4005
|
+
themes: projected.themes.length,
|
|
4006
|
+
capabilities: projected.capabilities.length,
|
|
4007
|
+
openTasks: projected.tasks.filter((t) => t.status !== "delivered").length,
|
|
4008
|
+
};
|
|
4009
|
+
} catch (e) {
|
|
4010
|
+
log("ready-snapshot errored:", e.message);
|
|
4011
|
+
}
|
|
4012
|
+
const tail = stats
|
|
4013
|
+
? `, ${stats.themes} themes, ${stats.capabilities} capabilities, ${stats.openTasks} open tasks`
|
|
4014
|
+
: "";
|
|
4015
|
+
const snap = snapshotWorkspaceId();
|
|
4016
|
+
const snapTail = snap ? `, snapshot-workspace=${snap}` : "";
|
|
4017
|
+
log(`ready (mode=${mode}${tail}${snapTail})`);
|
|
4018
|
+
})();
|
|
4019
|
+
}
|