@roadmapperai/mcp 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/AGENTS.md +47 -2
  2. package/README.md +22 -2
  3. package/package.json +1 -1
  4. package/server.mjs +1755 -99
package/server.mjs CHANGED
@@ -8,7 +8,12 @@
8
8
  * Customer-facing env vars (brand-named, no backend disclosure):
9
9
  * ROADMAPPER_BACKEND_URL — backend project URL
10
10
  * ROADMAPPER_PUBLISHABLE_KEY — public client key (RLS-scoped)
11
- * ROADMAPPER_WORKSPACE_ID — target workspace
11
+ * ROADMAPPER_WORKSPACE_ID — DEFAULT workspace (optional). With
12
+ * one install serving many repos, the
13
+ * server prefers the repo you're in
14
+ * (see "Workspace resolution" below);
15
+ * this env var is only the fallback for
16
+ * repos with no mapping / no roots.
12
17
  * ROADMAPPER_API_KEY — write auth (rmpr_… token from
13
18
  * Settings → MCP activity → API keys)
14
19
  * ROADMAPPER_BROKER_URL — optional override for the write
@@ -47,6 +52,17 @@
47
52
  * validated server-side at the broker) or ROADMAPPER_ADMIN_KEY
48
53
  * (operator path, bypasses RLS).
49
54
  *
55
+ * Workspace resolution (which workspace a tool call targets), in order:
56
+ * 1. Explicit `workspaceId` arg on the call.
57
+ * 2. The repo the agent is in: MCP `roots` → git origin → owner/repo →
58
+ * `repo_workspace_map` (the mapping you set in the Roadmapper
59
+ * GitHub-connect UI). This is what makes ONE install work across
60
+ * MANY repos/workspaces — no per-repo config or env juggling.
61
+ * 3. `.roadmapper/snapshot.json` in cwd (committed offline fallback).
62
+ * 4. ROADMAPPER_WORKSPACE_ID env (the install default).
63
+ * Mismatch between an explicit arg and #2/#3 is refused (cross-workspace
64
+ * guard) unless ROADMAPPER_ALLOW_CROSS_WORKSPACE=1.
65
+ *
50
66
  * Self-test: `node mcp/server.mjs --selftest` exercises every tool
51
67
  * against the local seed and exits 0 on success, 1 on failure. Useful
52
68
  * for verifying the install without an MCP client.
@@ -58,6 +74,10 @@
58
74
  import { readFileSync, existsSync } from "node:fs";
59
75
  import { dirname, join, resolve } from "node:path";
60
76
  import { fileURLToPath } from "node:url";
77
+ import { execFile } from "node:child_process";
78
+ import { promisify } from "node:util";
79
+
80
+ const execFileAsync = promisify(execFile);
61
81
 
62
82
  const HERE = dirname(fileURLToPath(import.meta.url));
63
83
  const REPO = resolve(HERE, "..");
@@ -94,6 +114,37 @@ function send(message) {
94
114
  process.stdout.write(JSON.stringify(message) + "\n");
95
115
  }
96
116
 
117
+ /**
118
+ * Ask the client for its current roots (workspace folders). Server→client
119
+ * request; the reply arrives as a normal JSON-RPC response in the main
120
+ * read loop, routed by id to handleClientResponse(). No-op if the client
121
+ * never declared roots support.
122
+ */
123
+ function requestClientRoots() {
124
+ if (!_clientSupportsRoots) return;
125
+ send({ jsonrpc: "2.0", id: ROOTS_LIST_REQUEST_ID, method: "roots/list" });
126
+ }
127
+
128
+ /**
129
+ * Handle a JSON-RPC *response* from the client (not a request). Only the
130
+ * roots/list reply matters today: record the roots, then re-resolve the
131
+ * root→workspace mapping and cache it for the sync resolvers.
132
+ */
133
+ async function handleClientResponse(msg) {
134
+ if (msg.id !== ROOTS_LIST_REQUEST_ID) return;
135
+ if (msg.error) {
136
+ log("roots/list failed:", msg.error?.message ?? msg.error);
137
+ return;
138
+ }
139
+ const roots = msg.result?.roots ?? [];
140
+ setClientRoots(roots);
141
+ await resolveRootWorkspace();
142
+ const { id: ws, source, repo } = resolveWorkspaceWithSource();
143
+ if (source === "repo") {
144
+ log(`workspace resolved from repo ${repo} → ${ws}`);
145
+ }
146
+ }
147
+
97
148
  function readSeed() {
98
149
  try {
99
150
  return JSON.parse(readFileSync(SEED_PATH, "utf-8"));
@@ -160,6 +211,49 @@ async function readAgentsMdForWorkspace() {
160
211
  return readAgentsMd();
161
212
  }
162
213
 
214
+ /**
215
+ * Fetch the workspace's roadmap entities (pillars + capabilities +
216
+ * tasks) via the mcp-broker — the AUTHENTICATED read path for the
217
+ * customer (rmpr_) install.
218
+ *
219
+ * Why this exists: RLS (pillars_select_visible et al.) only grants
220
+ * SELECT to an authenticated workspace member (auth.uid()). The bare
221
+ * publishable key the MCP holds is the anon role with no user, so a
222
+ * direct PostgREST read returns ZERO rows for every workspace. The
223
+ * broker validates the rmpr_ key server-side and reads with the
224
+ * service role, scoped to that key's workspace — same pattern as the
225
+ * rubric/labels reads.
226
+ *
227
+ * Returns `{ pillars, capabilities, tasks }` (raw rows) on success, or
228
+ * null on any error / when no API key is set (operator path falls back
229
+ * to the direct service-role read in readWorkspaceProjected).
230
+ */
231
+ async function fetchWorkspaceEntitiesViaBroker() {
232
+ const { apiKey, brokerUrl } = supabaseConfig();
233
+ if (!apiKey || !brokerUrl) return null;
234
+ try {
235
+ const res = await fetch(brokerUrl, {
236
+ method: "POST",
237
+ headers: {
238
+ Authorization: `Bearer ${apiKey}`,
239
+ "content-type": "application/json",
240
+ Accept: "application/json",
241
+ },
242
+ body: JSON.stringify({ rpc: "get_workspace_entities", body: {} }),
243
+ });
244
+ if (!res.ok) return null;
245
+ const parsed = await res.json();
246
+ if (!parsed || typeof parsed !== "object") return null;
247
+ return {
248
+ pillars: Array.isArray(parsed.pillars) ? parsed.pillars : [],
249
+ capabilities: Array.isArray(parsed.capabilities) ? parsed.capabilities : [],
250
+ tasks: Array.isArray(parsed.tasks) ? parsed.tasks : [],
251
+ };
252
+ } catch {
253
+ return null;
254
+ }
255
+ }
256
+
163
257
  /**
164
258
  * Per-workspace label cache for tool descriptions.
165
259
  *
@@ -391,30 +485,228 @@ function __setSnapshotWorkspaceForTest(value) {
391
485
  _snapshotWorkspace = value;
392
486
  }
393
487
 
488
+ // ── MCP roots → per-repo workspace resolution ───────────────────────
489
+ //
490
+ // THE PROBLEM this solves: a stdio MCP server is spawned ONCE by the
491
+ // client (Claude Code) with a fixed process.cwd() — usually $HOME or
492
+ // the first project the client opened. That cwd does NOT change as the
493
+ // agent moves between repos. So the old `.roadmapper/snapshot.json in
494
+ // cwd` resolution never fired for the common case: one MCP install,
495
+ // many repos, each mapped to a different workspace. Everything fell
496
+ // through to the single env default — silently polluting one workspace
497
+ // with every repo's planning.
498
+ //
499
+ // THE FIX: MCP clients advertise `roots` (the workspace folders they're
500
+ // operating in) at initialize and via notifications/roots/list_changed.
501
+ // We capture those, derive each root's GitHub `owner/repo` from its git
502
+ // remote, and look the repo up in `repo_workspace_map` — the SAME table
503
+ // the Roadmapper GitHub-connect UI writes when you map a repo to a
504
+ // workspace. So "which workspace is this repo?" is answered by the
505
+ // server-side mapping the user already configured. Zero per-repo config.
506
+ //
507
+ // Resolution is async (DB lookup), but the per-call resolvers are sync,
508
+ // so we resolve on initialize / roots-change and cache the result here.
509
+ let _clientRoots = []; // array of absolute dir paths from the client
510
+ let _rootWorkspace = undefined; // undefined=unresolved, null=resolved-but-none, string=workspaceId
511
+ let _rootWorkspaceRepo = null; // the owner/repo that resolved (for diagnostics)
512
+ let _clientSupportsRoots = false; // set from initialize params.capabilities.roots
513
+ const ROOTS_LIST_REQUEST_ID = "roadmapper-roots-list"; // our id for the roots/list request we send
514
+
515
+ /** Convert a file:// root URI (or a plain path) to an absolute dir path. */
516
+ function rootUriToPath(uri) {
517
+ if (typeof uri !== "string" || !uri) return null;
518
+ if (uri.startsWith("file://")) {
519
+ try {
520
+ return fileURLToPath(uri);
521
+ } catch {
522
+ return null;
523
+ }
524
+ }
525
+ return uri; // some clients send a bare path
526
+ }
527
+
528
+ /** Record the client's advertised roots (called from initialize + roots/list). */
529
+ function setClientRoots(roots) {
530
+ if (!Array.isArray(roots)) return;
531
+ _clientRoots = roots
532
+ .map((r) => rootUriToPath(r?.uri ?? r))
533
+ .filter((p) => typeof p === "string" && p.length > 0);
534
+ // Invalidate the cached resolution so the next access re-derives it.
535
+ _rootWorkspace = undefined;
536
+ _rootWorkspaceRepo = null;
537
+ }
538
+
539
+ /**
540
+ * Derive `owner/repo` from a directory's git origin remote. Walks up
541
+ * to find the repo root implicitly via `git -C <dir>`. Returns null if
542
+ * the dir isn't a git repo, has no origin, or git isn't available.
543
+ */
544
+ async function repoSlugForDir(dir) {
545
+ try {
546
+ // Async so a slow/hanging git call never blocks the stdin event loop
547
+ // (this runs while handling the client's roots/list reply). 2s cap.
548
+ const out = (
549
+ await execFileAsync("git", ["-C", dir, "remote", "get-url", "origin"], {
550
+ encoding: "utf8",
551
+ timeout: 2000,
552
+ })
553
+ ).stdout.trim();
554
+ // Normalize https + ssh forms to owner/repo:
555
+ // https://github.com/owner/repo.git
556
+ // git@github.com:owner/repo.git
557
+ const m = out.match(/[/:]([^/:]+\/[^/]+?)(?:\.git)?$/);
558
+ return m ? m[1] : null;
559
+ } catch {
560
+ return null;
561
+ }
562
+ }
563
+
564
+ /**
565
+ * Look up a repo slug in repo_workspace_map (enabled rows only) and
566
+ * return its workspace_id, or null. Read via the server's existing
567
+ * Supabase REST access (service-role key preferred so RLS doesn't hide
568
+ * the row). Best-effort — any failure resolves to null and we fall
569
+ * through to snapshot/env.
570
+ */
571
+ async function workspaceForRepoSlug(slug) {
572
+ if (!slug) return null;
573
+ const { url, readKey: anonKey, writeKey } = supabaseConfig();
574
+ const key = writeKey || anonKey;
575
+ if (!url || !key) return null;
576
+ try {
577
+ const res = await fetch(
578
+ `${url}/rest/v1/repo_workspace_map?select=workspace_id&enabled=eq.true&repo=eq.${encodeURIComponent(
579
+ slug
580
+ )}&limit=1`,
581
+ { headers: { apikey: key, authorization: `Bearer ${key}` } }
582
+ );
583
+ if (!res.ok) return null;
584
+ const rows = await res.json();
585
+ return Array.isArray(rows) && rows[0]?.workspace_id
586
+ ? rows[0].workspace_id
587
+ : null;
588
+ } catch {
589
+ return null;
590
+ }
591
+ }
592
+
593
+ /**
594
+ * Resolve (and cache) the workspace implied by the client's roots, by
595
+ * mapping each root's git repo through repo_workspace_map. Async; call
596
+ * from initialize / roots-change. Sync resolvers read the cached
597
+ * `_rootWorkspace`.
598
+ *
599
+ * Collects ALL mapped roots rather than first-match, so we can detect
600
+ * the ambiguous case — two mapped repos open at once (e.g. meridian +
601
+ * outerjoyn). When that happens we pick the first but LOG a warning,
602
+ * because silently guessing a workspace is the exact footgun this whole
603
+ * feature exists to kill. (A future client that tells us the active
604
+ * root could disambiguate; today the protocol gives us an unordered set.)
605
+ */
606
+ async function resolveRootWorkspace() {
607
+ const matches = [];
608
+ for (const dir of _clientRoots) {
609
+ const slug = await repoSlugForDir(dir);
610
+ if (!slug) continue;
611
+ const ws = await workspaceForRepoSlug(slug);
612
+ if (ws) matches.push({ ws, slug });
613
+ }
614
+ const distinct = [...new Set(matches.map((m) => m.ws))];
615
+ if (distinct.length > 1) {
616
+ log(
617
+ `roots map to MULTIPLE workspaces (${matches
618
+ .map((m) => `${m.slug}→${m.ws}`)
619
+ .join(", ")}). Using "${matches[0].ws}". ` +
620
+ `Pass workspaceId explicitly on calls to target a specific one.`
621
+ );
622
+ }
623
+ if (matches.length > 0) {
624
+ _rootWorkspace = matches[0].ws;
625
+ _rootWorkspaceRepo = matches[0].slug;
626
+ return _rootWorkspace;
627
+ }
628
+ _rootWorkspace = null;
629
+ _rootWorkspaceRepo = null;
630
+ return null;
631
+ }
632
+
633
+ /** Cached root-derived workspace id (sync read). null if none/unresolved. */
634
+ function rootWorkspaceId() {
635
+ return _rootWorkspace ?? null;
636
+ }
637
+
638
+ // Test hook: seed the root-resolution cache without touching the client
639
+ // protocol or the network.
640
+ function __setRootWorkspaceForTest(id, repo = null) {
641
+ _rootWorkspace = id;
642
+ _rootWorkspaceRepo = repo;
643
+ }
644
+
394
645
  /**
395
646
  * Resolve the workspace id for a tool call. Resolution order:
396
647
  * 1. Explicit `workspaceId` arg on the call.
397
- * 2. `.roadmapper/snapshot.json` in the cwd (committed by the
398
- * snapshot-roadmaps cron names the workspace this repo
399
- * belongs to).
400
- * 3. Env-driven `SUPABASE_WORKSPACE_ID`.
401
- * 4. null.
648
+ * 2. Client roots git remote → repo_workspace_map (the repo the
649
+ * agent is actually working in, mapped via the GitHub-connect UI).
650
+ * 3. `.roadmapper/snapshot.json` in the cwd (offline fallback).
651
+ * 4. Env-driven `SUPABASE_WORKSPACE_ID` (the install default).
652
+ * 5. null.
402
653
  *
403
- * Snapshot beats env because the snapshot reflects "where the agent
404
- * is right now", while the env reflects "where the operator pointed
405
- * the MCP install when they configured it". Cwd-specific wins.
654
+ * Roots beat snapshot beat env: roots reflect "the repo open right now"
655
+ * (most specific), snapshot reflects "this checkout's committed
656
+ * workspace", env reflects "where the operator pointed the install".
406
657
  *
407
658
  * Mutators with an explicit `workspaceId` arg that conflicts with the
408
- * cwd snapshot are refused upstream in `callTool` — see the
409
- * cross-workspace guard there.
659
+ * resolved repo/snapshot workspace are refused upstream in `callTool` —
660
+ * see the cross-workspace guard there.
410
661
  */
411
662
  function resolveWorkspaceId(argWorkspaceId) {
412
663
  if (argWorkspaceId) return argWorkspaceId;
664
+ const root = rootWorkspaceId();
665
+ if (root) return root;
413
666
  const snap = snapshotWorkspaceId();
414
667
  if (snap) return snap;
415
668
  return supabaseConfig().workspaceId ?? null;
416
669
  }
417
670
 
671
+ // The workspace id a fresh install ships with — the bundled seed/demo
672
+ // data lives here ("delete it once you add your own"). Mirrors the
673
+ // VITE_SUPABASE_WORKSPACE_ID default in .env.example. Used by the
674
+ // seed-workspace write guard to catch accidental writes to demo data.
675
+ const SEED_WORKSPACE_ID = "default";
676
+
677
+ /**
678
+ * Same resolution as resolveWorkspaceId, but also reports WHERE the id
679
+ * came from. The silent fall-through to the env default is the #1
680
+ * wrong-workspace footgun: launch the agent outside a connected repo
681
+ * checkout and every call quietly targets the install's env default
682
+ * (often the seed workspace) with nothing saying so. Surfacing the
683
+ * source — "arg" / "snapshot" / "env" / "none" — is the cheapest
684
+ * guardrail, and it feeds both get_active_workspace and the snapshot's
685
+ * resolvedFrom field.
686
+ */
687
+ function resolveWorkspaceWithSource(argWorkspaceId) {
688
+ if (argWorkspaceId) return { id: argWorkspaceId, source: "arg" };
689
+ const root = rootWorkspaceId();
690
+ if (root) return { id: root, source: "repo", repo: _rootWorkspaceRepo };
691
+ const snap = snapshotWorkspaceId();
692
+ if (snap) return { id: snap, source: "snapshot" };
693
+ const envWs = supabaseConfig().workspaceId;
694
+ if (envWs) return { id: envWs, source: "env" };
695
+ return { id: null, source: "none" };
696
+ }
697
+
698
+ /**
699
+ * Which write path is active, for diagnostics. The customer path
700
+ * (rmpr_ key → mcp-broker) keeps the service-role key off this machine;
701
+ * the operator path holds a service-role-equivalent key locally.
702
+ */
703
+ function writeMode() {
704
+ const { apiKey, writeKey } = supabaseConfig();
705
+ if (apiKey) return "broker"; // rmpr_ key, validated server-side
706
+ if (writeKey) return "operator"; // service-role-equivalent key, local
707
+ return "read-only";
708
+ }
709
+
418
710
  /**
419
711
  * Read the workspace's current entity state directly from the
420
712
  * normalized tables (Stage 3 Piece 6c — `workspaces.edits` column
@@ -426,8 +718,31 @@ function resolveWorkspaceId(argWorkspaceId) {
426
718
  * agent reads down to the caller's visible_pillars allow-list.
427
719
  */
428
720
  async function readWorkspaceProjected(wsIdOverride) {
429
- const { url, readKey: anonKey, writeKey } = supabaseConfig();
721
+ const { url, readKey: anonKey, writeKey, apiKey } = supabaseConfig();
430
722
  const workspaceId = resolveWorkspaceId(wsIdOverride);
723
+
724
+ // Customer path: when an rmpr_ API key is set, read through the broker.
725
+ // A direct PostgREST read with the publishable (anon) key returns zero
726
+ // rows — RLS only grants SELECT to authenticated workspace members. The
727
+ // broker authenticates the key server-side and reads (service role)
728
+ // scoped to THAT key's workspace. The key pins one workspace, so a
729
+ // wsIdOverride for a different workspace isn't readable on the customer
730
+ // path anyway — the broker correctly returns the key's workspace, and
731
+ // the cross-workspace guard upstream already blocks writes elsewhere.
732
+ if (apiKey) {
733
+ const ent = await fetchWorkspaceEntitiesViaBroker();
734
+ if (ent) {
735
+ return {
736
+ themes: ent.pillars.map(rowToThemeProjected),
737
+ capabilities: ent.capabilities.map(rowToCapabilityProjected),
738
+ tasks: ent.tasks.map(rowToTaskProjected),
739
+ };
740
+ }
741
+ // Broker failed — fall through to the direct read below. On a pure
742
+ // customer install (anon key only) that returns null; operator
743
+ // installs that ALSO set a service key still get a working read.
744
+ }
745
+
431
746
  const key = writeKey || anonKey;
432
747
  if (!url || !key || !workspaceId) return null;
433
748
  const filter = `workspace_id=eq.${encodeURIComponent(workspaceId)}`;
@@ -556,6 +871,91 @@ function stripUndefined(o) {
556
871
  return o;
557
872
  }
558
873
 
874
+ // ---- Token-efficiency: light projections + pagination ----------------
875
+ //
876
+ // Read tools return light rows BY DEFAULT (detail:true opts into full
877
+ // rows). The heavy fields — prs[], acceptance[], acceptanceGrades[],
878
+ // outcomeReadings[], dependsOn[], and long summary/description text —
879
+ // are ~95% of a row's token cost on a large workspace, so dropping
880
+ // them turns a naive list_tasks() from ~81KB into <1KB. The cap is a
881
+ // backstop, not the lever; the projection is.
882
+
883
+ const LIST_DEFAULT_LIMIT = 50;
884
+ const LIST_MAX_LIMIT = 200;
885
+
886
+ // Light task row: identity + the fields you triage on. No prs/
887
+ // acceptance/summary. summary is replaced by a presence flag so the
888
+ // agent knows detail exists without paying for it.
889
+ function taskLight(t) {
890
+ return stripUndefined({
891
+ id: t.id,
892
+ title: t.title,
893
+ status: t.status,
894
+ priority: t.priority,
895
+ effort: t.effort,
896
+ kind: t.kind,
897
+ capabilityId: t.capabilityId,
898
+ pillarId: t.pillarId,
899
+ owner: t.owner,
900
+ prCount: Array.isArray(t.prs) ? t.prs.length : undefined,
901
+ hasSummary: t.summary ? true : undefined,
902
+ archived: t.archived,
903
+ });
904
+ }
905
+
906
+ // Light capability row: identity + status signals. No
907
+ // outcomeReadings[]/dependsOn[]/description. outcome kept — it's the
908
+ // one field the agent needs to judge fit, and it's bounded text.
909
+ //
910
+ // status is the EFFECTIVE status (derived from child tasks when the
911
+ // row has no explicit status), so the light row agrees with how the
912
+ // snapshot/list filters decided to include it. Pass `tasks` to enable
913
+ // the derivation; without it we fall back to the raw column (which is
914
+ // often null — that's the bug this guards against).
915
+ function capabilityLight(c, tasks) {
916
+ return stripUndefined({
917
+ id: c.id,
918
+ pillarId: c.pillarId,
919
+ name: c.name,
920
+ status: tasks ? effectiveCapabilityStatus(c, tasks) : c.status,
921
+ outcome: c.outcome,
922
+ outcomeStatus: c.outcomeStatus,
923
+ roi: c.roi,
924
+ target: c.target,
925
+ archived: c.archived,
926
+ });
927
+ }
928
+
929
+ // Clamp a requested limit to [1, LIST_MAX_LIMIT], default 50.
930
+ function clampLimit(raw) {
931
+ const n = Number.isFinite(raw) ? Math.floor(raw) : LIST_DEFAULT_LIMIT;
932
+ return Math.min(LIST_MAX_LIMIT, Math.max(1, n));
933
+ }
934
+
935
+ // Apply limit + light/full projection to a row list and wrap with a
936
+ // {total, returned, truncated} envelope so the agent knows whether to
937
+ // narrow its filter rather than page blindly.
938
+ function paginateRows(rows, args, lightFn, ctx) {
939
+ const limit = clampLimit(args?.limit);
940
+ const detail = args?.detail === true;
941
+ const sliced = rows.slice(0, limit);
942
+ return {
943
+ total: rows.length,
944
+ returned: sliced.length,
945
+ truncated: rows.length > sliced.length,
946
+ // ctx is passed through to the mapper (capabilityLight uses it to
947
+ // derive effective status from tasks); taskLight ignores it.
948
+ items: detail ? sliced : sliced.map((r) => lightFn(r, ctx)),
949
+ };
950
+ }
951
+
952
+ // Compact JSON (no 2-space pretty-print) — pretty-printing is ~20-30%
953
+ // pure-whitespace tokens across every list return. Humans read these
954
+ // through a client that re-formats; the wire form should be compact.
955
+ function compactResult(obj) {
956
+ return textResult(JSON.stringify(obj));
957
+ }
958
+
559
959
  /**
560
960
  * Invoke a Postgres function exposed via PostgREST. Used by the
561
961
  * write tools so the read-modify-write happens inside a single
@@ -802,11 +1202,25 @@ function validateConfidence(confidence) {
802
1202
  * parent theme's target. Caller can still proceed — but the
803
1203
  * warning surfaces in dryRun output so the agent can rethink.
804
1204
  */
1205
+ // Compact dollar formatter (ROI is stored as RAW DOLLARS). Local copy
1206
+ // of src/lib/util.ts formatCompactMoney — the .mjs can't import the TS.
1207
+ function fmtMoney(dollars) {
1208
+ if (dollars == null || !Number.isFinite(dollars) || dollars <= 0) return "$0";
1209
+ const f = (n, s) => {
1210
+ const r = Math.round(n * 10) / 10;
1211
+ return `$${Number.isInteger(r) ? r.toFixed(0) : r.toFixed(1)}${s}`;
1212
+ };
1213
+ if (dollars < 1e3) return `$${Math.round(dollars)}`;
1214
+ if (dollars < 1e6) return f(dollars / 1e3, "K");
1215
+ if (dollars < 1e9) return f(dollars / 1e6, "M");
1216
+ return f(dollars / 1e9, "B");
1217
+ }
1218
+
805
1219
  function warnRoiVsTheme(roi, theme) {
806
1220
  if (roi == null || theme?.targetRoi == null) return null;
807
1221
  const floor = theme.targetRoi * 0.7;
808
1222
  if (roi < floor) {
809
- return `roi $${roi}M is well below 70% of theme "${theme.name}" target ($${theme.targetRoi}M). Justify the gap in your outcome, or rethink the parent theme.`;
1223
+ return `roi ${fmtMoney(roi)} is well below 70% of theme "${theme.name}" target (${fmtMoney(theme.targetRoi)}). Justify the gap in your outcome, or rethink the parent theme.`;
810
1224
  }
811
1225
  return null;
812
1226
  }
@@ -996,7 +1410,7 @@ const TOOLS = [
996
1410
  {
997
1411
  name: "list_capabilities",
998
1412
  description:
999
- "List active capabilities (quarterly bets). Excludes delivered and archived capabilities by default — agents should target work that's still in flight.\n\n" +
1413
+ "List active capabilities (quarterly bets). Excludes delivered and archived capabilities by default — agents should target work that's still in flight. Returns LIGHT rows by default (id/pillarId/name/status/outcome/outcomeStatus/roi/target), capped at 50; pass detail:true for full rows incl. outcomeReadings/dependsOn/description. Response envelope: { total, returned, truncated, items }.\n\n" +
1000
1414
  "USE WHEN: planning a feature and need to find the right parent capability, reviewing in-flight bets, or scoping what's still on the table this quarter.\n" +
1001
1415
  "PREREQUISITE: none — read-only. For routing a specific work description, prefer suggest_capability_for which ranks by token overlap.\n" +
1002
1416
  "ANTI-PATTERN: do not call to find a capability when you already know its id (use get_roadmap_snapshot for richer context). Pass includeDelivered=true or includeArchived=true only when reviewing historical bets — almost never in a planning session.\n" +
@@ -1007,6 +1421,17 @@ const TOOLS = [
1007
1421
  themeId: { type: "string" },
1008
1422
  includeDelivered: { type: "boolean" },
1009
1423
  includeArchived: { type: "boolean" },
1424
+ detail: {
1425
+ type: "boolean",
1426
+ description:
1427
+ "Return full capability rows (outcomeReadings, dependsOn, description) instead of light rows. Default false.",
1428
+ },
1429
+ limit: {
1430
+ type: "integer",
1431
+ minimum: 1,
1432
+ maximum: 200,
1433
+ description: "Max rows to return. Default 50, hard cap 200.",
1434
+ },
1010
1435
  workspaceId: { type: "string" },
1011
1436
  },
1012
1437
  additionalProperties: false,
@@ -1015,10 +1440,10 @@ const TOOLS = [
1015
1440
  {
1016
1441
  name: "list_tasks",
1017
1442
  description:
1018
- "List tasks. Filter by capabilityId or status. Excludes archived tasks by default.\n\n" +
1443
+ "List tasks. Filter by capabilityId or status. Excludes archived tasks by default. Returns LIGHT rows by default (id/title/status/priority/effort/kind/capabilityId/owner + prCount + hasSummary), capped at 50; pass detail:true for full rows incl. prs/acceptance/summary, and limit to raise the cap (max 200). The response is an envelope: { total, returned, truncated, items }.\n\n" +
1019
1444
  "USE WHEN: surveying what already exists under a capability before proposing a new task (avoid duplicates), reviewing a status bucket (e.g. all in_progress), or answering 'what's open right now'.\n" +
1020
1445
  "PREREQUISITE: none — read-only.\n" +
1021
- "ANTI-PATTERN: do not call to track in-progress work within a single conversation — use the harness TodoWrite tool. Do not call without a filter when the workspace has many tasks; scope by capabilityId or status. Pass includeArchived=true only when reviewing closed history.\n" +
1446
+ "ANTI-PATTERN: do not call to track in-progress work within a single conversation — use the harness TodoWrite tool. If truncated:true, NARROW the filter (capabilityId/status) rather than cranking limit light rows are cheap but full detail on hundreds of rows is not. Reach for detail:true only when you actually need prs/acceptance, ideally with a filter.\n" +
1022
1447
  "EXAMPLE: list_tasks({ capabilityId: 'CAP-XXX', status: 'in_progress' })",
1023
1448
  inputSchema: {
1024
1449
  type: "object",
@@ -1029,6 +1454,49 @@ const TOOLS = [
1029
1454
  enum: ["delivered", "in_progress", "planned", "exploring"],
1030
1455
  },
1031
1456
  includeArchived: { type: "boolean" },
1457
+ detail: {
1458
+ type: "boolean",
1459
+ description:
1460
+ "Return full task rows (prs, acceptance, acceptanceGrades, summary, dependsOn) instead of light rows. Default false.",
1461
+ },
1462
+ limit: {
1463
+ type: "integer",
1464
+ minimum: 1,
1465
+ maximum: 200,
1466
+ description: "Max rows to return. Default 50, hard cap 200.",
1467
+ },
1468
+ workspaceId: { type: "string" },
1469
+ },
1470
+ additionalProperties: false,
1471
+ },
1472
+ },
1473
+ {
1474
+ name: "list_uncategorized_tasks",
1475
+ description:
1476
+ "List tasks with no parent capability (capabilityId is null) — the orphans the GitHub webhook auto-created from PRs that carried no Roadmapper-Capability trailer and matched no capability. Excludes archived tasks by default.\n\n" +
1477
+ "USE WHEN: triaging the roadmap — finding work that shipped but never got filed under a quarterly bet, so it's invisible in capability rollups, burndown, and the outlook view. Pair with suggest_capability_for({ taskId }) to find each one's best-fit home, then move_task to file it.\n" +
1478
+ "PREREQUISITE: none — read-only.\n" +
1479
+ "ANTI-PATTERN: do not use to list ALL tasks — that's list_tasks. This is specifically the unparented backlog. A long result here is a signal that PRs aren't carrying capability trailers, not that you should ignore it.\n" +
1480
+ "EXAMPLE: list_uncategorized_tasks({ status: 'in_progress' })",
1481
+ inputSchema: {
1482
+ type: "object",
1483
+ properties: {
1484
+ status: {
1485
+ type: "string",
1486
+ enum: ["delivered", "in_progress", "planned", "exploring"],
1487
+ },
1488
+ includeArchived: { type: "boolean" },
1489
+ detail: {
1490
+ type: "boolean",
1491
+ description:
1492
+ "Return full task rows instead of light rows. Default false.",
1493
+ },
1494
+ limit: {
1495
+ type: "integer",
1496
+ minimum: 1,
1497
+ maximum: 200,
1498
+ description: "Max rows to return. Default 50, hard cap 200.",
1499
+ },
1032
1500
  workspaceId: { type: "string" },
1033
1501
  },
1034
1502
  additionalProperties: false,
@@ -1065,10 +1533,10 @@ const TOOLS = [
1065
1533
  {
1066
1534
  name: "get_roadmap_snapshot",
1067
1535
  description:
1068
- "Single-call orient: themes + active capabilities + in-flight tasks for the workspace, plus the resolved workspaceId. Always live. Excludes archived entities by default.\n\n" +
1536
+ "Single-call orient: themes + active capabilities + in-flight tasks for the workspace, plus the resolved workspaceId. Always live. Excludes archived entities by default. Returns LIGHT rows by default and caps the task list at 50 (the counts block always carries true totals); pass detail:true for full rows. Response carries mode ('summary'|'detail') and tasksTruncated.\n\n" +
1069
1537
  "USE WHEN: starting fresh in a workspace and need the whole canonical model in one read, or before opening a PR to confirm which workspace + capability to attach to.\n" +
1070
1538
  "PREREQUISITE: none — read-only. Often the very first call after get_agents_md.\n" +
1071
- "ANTI-PATTERN: do not call repeatedly within one planning pass; the data doesn't change inside a single session. Use list_tasks / list_capabilities if you need just one slice. Pass includeArchived=true only when surveying historical state.\n" +
1539
+ "ANTI-PATTERN: do not call repeatedly within one planning pass; the data doesn't change inside a single session. Avoid detail:true on large workspaces — use list_tasks with a filter for the rows you actually need. Pass includeArchived=true only when surveying historical state.\n" +
1072
1540
  "EXAMPLE: get_roadmap_snapshot()",
1073
1541
  inputSchema: {
1074
1542
  type: "object",
@@ -1079,6 +1547,31 @@ const TOOLS = [
1079
1547
  "Optional. Override the env-default workspace. Useful when the agent is operating against a .roadmapper/snapshot.json that names its own workspace.",
1080
1548
  },
1081
1549
  includeArchived: { type: "boolean" },
1550
+ detail: {
1551
+ type: "boolean",
1552
+ description:
1553
+ "Return full theme/capability/task rows instead of light ones. Default false. Can be large on big workspaces.",
1554
+ },
1555
+ },
1556
+ additionalProperties: false,
1557
+ },
1558
+ },
1559
+ {
1560
+ name: "get_active_workspace",
1561
+ description:
1562
+ "Report the workspace this server will act on RIGHT NOW and HOW it was resolved — arg / .roadmapper snapshot / env default — plus whether writes are enabled and via which path (broker vs operator). Cheap: no roadmap data, no DB read.\n\n" +
1563
+ "USE WHEN: you're unsure which workspace is active; before the FIRST mutating call in a session; after changing directories. Especially important when the agent was launched outside a connected repo checkout, where the env default (often the seed workspace) silently wins.\n" +
1564
+ "PREREQUISITE: none — read-only.\n" +
1565
+ "ANTI-PATTERN: don't use it to inspect roadmap contents — that's get_roadmap_snapshot. This only answers 'where am I pointed'.\n" +
1566
+ "EXAMPLE: get_active_workspace()",
1567
+ inputSchema: {
1568
+ type: "object",
1569
+ properties: {
1570
+ workspaceId: {
1571
+ type: "string",
1572
+ description:
1573
+ "Optional. Resolve as if this override were passed to a real call, to preview which workspace it would target.",
1574
+ },
1082
1575
  },
1083
1576
  additionalProperties: false,
1084
1577
  },
@@ -1092,7 +1585,7 @@ const TOOLS = [
1092
1585
  "ANTI-PATTERN: do not call to track in-progress work within a single conversation — use the harness TodoWrite tool. Do not call to log a bug discovered during implementation — file in the issue tracker, not roadmapper. Do not call when you don't know which capability the task belongs under; resolve that first.\n" +
1093
1586
  "REQUIRED FIELDS: capabilityId, title, effort. Always size the task — XS (≤2h) / S (≤1d) / M (~1-3d) / L (~1-2w) / XL (>2w). Effort drives capability % roll-up weighting; do not omit.\n" +
1094
1587
  "EXAMPLE: propose_task({ capabilityId: 'CAP-XXX', title: 'Drag-and-drop block reorder', effort: 'M', acceptance: ['Block can be dragged with mouse + keyboard', 'Order persists across reloads'], idempotencyKey: 'session-1-task-3' })\n\n" +
1095
- "Requires SUPABASE_SERVICE_ROLE_KEY. Pass idempotencyKey so retries don't duplicate. Pass dryRun: true to validate without writing. Pass workspaceId to target a workspace other than the env default.",
1588
+ "Requires write auth (set ROADMAPPER_API_KEY). Pass idempotencyKey so retries don't duplicate. Pass dryRun: true to validate without writing. Pass workspaceId to target a workspace other than the env default.",
1096
1589
  inputSchema: {
1097
1590
  type: "object",
1098
1591
  properties: {
@@ -1130,15 +1623,15 @@ const TOOLS = [
1130
1623
  "USE WHEN: the work the user is describing genuinely doesn't fit ANY existing theme, AND the user explicitly says they want a new strategic direction. Almost never the right answer in a planning session.\n" +
1131
1624
  "PREREQUISITE: get_agents_md once this session (enforced). Theme discovery once this session, satisfied by suggest_theme_for (preferred — returns ranked matches with a fit signal), list_themes, or get_roadmap_snapshot. Enforced — the server returns discovery_missing with a fix field if you skip it. Duplicating a theme is the most common failure mode; the gate stops it.\n" +
1132
1625
  "ANTI-PATTERN: do not call to organize a quarter of work — that's a capability, not a theme. Do not call because the existing themes feel too coarse — they're SUPPOSED to be coarse. Use propose_capability under an existing theme instead.\n" +
1133
- "EXAMPLE: propose_theme({ name: 'AI Agent Reliability', description: 'Multi-year bet on making agent workflows reproducible.', targetRoi: 20, idempotencyKey: 'session-1-theme-1' })\n\n" +
1134
- "Requires SUPABASE_SERVICE_ROLE_KEY. Pass idempotencyKey so retries don't duplicate. Pass dryRun: true to validate without writing. Pass workspaceId to target a workspace other than the env default.",
1626
+ "EXAMPLE: propose_theme({ name: 'AI Agent Reliability', description: 'Multi-year bet on making agent workflows reproducible.', targetRoi: 20000000, idempotencyKey: 'session-1-theme-1' })\n\n" +
1627
+ "Requires write auth (set ROADMAPPER_API_KEY). targetRoi is RAW ANNUAL DOLLARS (e.g. 20000000 = $20M), not millions. Pass idempotencyKey so retries don't duplicate. Pass dryRun: true to validate without writing. Pass workspaceId to target a workspace other than the env default.",
1135
1628
  inputSchema: {
1136
1629
  type: "object",
1137
1630
  properties: {
1138
1631
  name: { type: "string" },
1139
1632
  description: { type: "string" },
1140
1633
  color: { type: "string" },
1141
- targetRoi: { type: "number" },
1634
+ targetRoi: { type: "number", description: "Annual ROI target in raw dollars (e.g. 20000000 = $20M)." },
1142
1635
  idempotencyKey: { type: "string" },
1143
1636
  dryRun: { type: "boolean" },
1144
1637
  workspaceId: { type: "string" },
@@ -1155,7 +1648,7 @@ const TOOLS = [
1155
1648
  "PREREQUISITE: get_agents_md once this session (enforced). suggest_capability_for (or list_capabilities / get_roadmap_snapshot / the roadmapper://capabilities/active resource) once this session (enforced — server returns discovery_missing with a fix field if you skip it). The server WILL also reject if token overlap with an existing capability is too high; the gate is upstream of that.\n" +
1156
1649
  "ANTI-PATTERN: do not call for a single deliverable — that's a task. Do not call when the outcome is fuzzy ('improve X') — the server rejects non-falsifiable outcomes. Do not call when an existing capability is close-enough; capabilities cost human attention to maintain.\n" +
1157
1650
  "EXAMPLE: propose_capability({ pillarId: 'TH-XXX', name: 'Self-serve landing page builder', outcome: 'Customers publish a landing page in under 5 minutes without engineering involvement.', reach: 200, impact: 1, confidence: 70, idempotencyKey: 'session-1-cap-1' })\n\n" +
1158
- "Server rejects empty / non-falsifiable outcomes, confidence >95, and names <8 chars. Requires SUPABASE_SERVICE_ROLE_KEY. Pass idempotencyKey, dryRun, workspaceId as for propose_task.",
1651
+ "Server rejects empty / non-falsifiable outcomes, confidence >95, and names <8 chars. Requires write auth (set ROADMAPPER_API_KEY). Pass idempotencyKey, dryRun, workspaceId as for propose_task.",
1159
1652
  inputSchema: {
1160
1653
  type: "object",
1161
1654
  properties: {
@@ -1166,7 +1659,7 @@ const TOOLS = [
1166
1659
  reach: { type: "number" },
1167
1660
  impact: { type: "number", enum: [3, 2, 1, 0.5, 0.25] },
1168
1661
  confidence: { type: "number", minimum: 0, maximum: 100 },
1169
- roi: { type: "number" },
1662
+ roi: { type: "number", description: "Estimated annual ROI in raw dollars (e.g. 2500000 = $2.5M)." },
1170
1663
  specRef: { type: "string" },
1171
1664
  idempotencyKey: { type: "string" },
1172
1665
  dryRun: { type: "boolean" },
@@ -1184,7 +1677,7 @@ const TOOLS = [
1184
1677
  "PREREQUISITE: get_agents_md once this session (enforced — defines grading dimensions). Call get_task first to read the acceptance criteria in order — indexes are positional.\n" +
1185
1678
  "ANTI-PATTERN: do not call before the implementation actually works — fabricated passes destroy the trust this signal carries. Do not call without a note when status='fail' — the reviewer needs the failure mode.\n" +
1186
1679
  "EXAMPLE: submit_acceptance_grades({ taskId: 'TK-100201', grades: [{ index: 0, status: 'pass' }, { index: 1, status: 'fail', note: 'Reload-persistence is flaky on Firefox; tracked in TK-100202' }] })\n\n" +
1187
- "Requires SUPABASE_SERVICE_ROLE_KEY. Pass workspaceId to target a workspace other than the env default.",
1680
+ "Requires write auth (set ROADMAPPER_API_KEY). Pass workspaceId to target a workspace other than the env default.",
1188
1681
  inputSchema: {
1189
1682
  type: "object",
1190
1683
  properties: {
@@ -1211,19 +1704,23 @@ const TOOLS = [
1211
1704
  {
1212
1705
  name: "suggest_capability_for",
1213
1706
  description:
1214
- "Given a free-text description of work, return the top existing capabilities ranked by token overlap.\n\n" +
1215
- "USE WHEN: about to propose tasks or a capability — call this FIRST to find an existing parent. If any returned score > 0.4, strongly prefer attaching tasks there over creating a new capability.\n" +
1707
+ "Return the top existing capabilities ranked by token overlap with either a free-text description OR an existing task (pass taskId and the server synthesizes the query from the task's title + summary).\n\n" +
1708
+ "USE WHEN: about to propose tasks or a capability — call this FIRST to find an existing parent. If any returned score > 0.4, strongly prefer attaching tasks there over creating a new capability. With taskId, this is the triage companion to list_uncategorized_tasks: rank a home for an orphaned task, then move_task it.\n" +
1216
1709
  "PREREQUISITE: none — read-only.\n" +
1217
- "ANTI-PATTERN: do not call after you've already decided to create a new capability — that's the case this tool is meant to prevent. Do not interpret weak matches (<0.2) as fits; if nothing's close, propose_capability is the right next call (after confirming with the user).\n" +
1218
- "EXAMPLE: suggest_capability_for({ description: 'multi-tenant landing page builder with drag-and-drop blocks' })",
1710
+ "ANTI-PATTERN: do not call after you've already decided to create a new capability — that's the case this tool is meant to prevent. Do not interpret weak matches (<0.2) as fits; if nothing's close, propose_capability is the right next call (after confirming with the user). Pass exactly one of description / taskId.\n" +
1711
+ "EXAMPLE: suggest_capability_for({ description: 'multi-tenant landing page builder with drag-and-drop blocks' }) — or — suggest_capability_for({ taskId: 'TK-100201' })",
1219
1712
  inputSchema: {
1220
1713
  type: "object",
1221
1714
  properties: {
1222
1715
  description: { type: "string" },
1716
+ taskId: {
1717
+ type: "string",
1718
+ description:
1719
+ "TK-NNNNNN. When set, the query is built from the task's title + summary. Mutually exclusive with description.",
1720
+ },
1223
1721
  limit: { type: "integer", minimum: 1, maximum: 25 },
1224
1722
  workspaceId: { type: "string" },
1225
1723
  },
1226
- required: ["description"],
1227
1724
  additionalProperties: false,
1228
1725
  },
1229
1726
  },
@@ -1255,7 +1752,7 @@ const TOOLS = [
1255
1752
  "PREREQUISITE: get_agents_md once this session (enforced). The task id must exist (get_task / list_tasks to confirm).\n" +
1256
1753
  "ANTI-PATTERN: do not call as a substitute for the Roadmapper-Task: PR-body trailer convention — the trailer is the durable contract; link_pr is the instant-feedback shortcut. Do not call for PRs that don't have a parent task in roadmapper.\n" +
1257
1754
  "EXAMPLE: link_pr({ taskId: 'TK-100201', repo: 'acme/frontend', number: 1234, title: 'Drag block reorder', authorGithub: 'octocat' })\n\n" +
1258
- "Idempotent by (repo, number) — re-calling with an already-linked PR returns idempotent:true. Requires SUPABASE_SERVICE_ROLE_KEY. Pass workspaceId to target a workspace other than the env default.",
1755
+ "Idempotent by (repo, number) — re-calling with an already-linked PR returns idempotent:true. Requires write auth (set ROADMAPPER_API_KEY). Pass workspaceId to target a workspace other than the env default.",
1259
1756
  inputSchema: {
1260
1757
  type: "object",
1261
1758
  properties: {
@@ -1304,7 +1801,7 @@ const TOOLS = [
1304
1801
  "PREREQUISITE: get_agents_md once this session (enforced). The capability must exist.\n" +
1305
1802
  "ANTI-PATTERN: do not use to declare the FINAL outcome (use outcomeStatus via update_capability for that). Readings are observations along the way, not the verdict.\n" +
1306
1803
  "EXAMPLE: record_outcome_reading({ capabilityId: 'CAP-9F2C7E', value: 0.41, asOf: '2026-05-12', source: 'mixpanel: activated_within_7d weekly', note: 'sample size 4218' })\n\n" +
1307
- "Requires SUPABASE_SERVICE_ROLE_KEY. Audit log records each reading as 'outcome_reading_recorded'.",
1804
+ "Requires write auth (set ROADMAPPER_API_KEY). Audit log records each reading as 'outcome_reading_recorded'.",
1308
1805
  inputSchema: {
1309
1806
  type: "object",
1310
1807
  properties: {
@@ -1338,6 +1835,38 @@ const TOOLS = [
1338
1835
  additionalProperties: false,
1339
1836
  },
1340
1837
  },
1838
+ {
1839
+ name: "detect_capability_gaps",
1840
+ description:
1841
+ "Find CLUSTERS of uncategorized tasks that don't fit any existing capability — i.e. work that's accumulating with no quarterly bet to hold it. This is the 'a capability is missing' signal: not 'file this orphan under an existing cap' (that's suggest_capability_for + move_task), but 'these N orphans share a theme that no capability covers — consider proposing one.'\n\n" +
1842
+ "How it works: takes every uncategorized non-archived task, scores its best fit against active capabilities, keeps the ones with no decent fit ('homeless'), then clusters the homeless tasks by shared vocabulary. Each returned cluster has shared keywords, member task ids, and a suggested capability name.\n" +
1843
+ "USE WHEN: triaging a webhook-fed workspace (lots of orphans), at quarterly review, or any time you want to know whether the roadmap is missing a bet. Pair with list_uncategorized_tasks.\n" +
1844
+ "PREREQUISITE: none — read-only. Counts as capability discovery (it enumerates every active capability to score fit), so it satisfies the propose_capability gate.\n" +
1845
+ "ANTI-PATTERN: don't treat a cluster as an automatic mandate to create a capability — capabilities are quarterly bets, a human confirms. A single homeless task is not a gap; that's just an orphan to file. Tune minClusterSize/fitThreshold rather than acting on noise.\n" +
1846
+ "EXAMPLE: detect_capability_gaps({ minClusterSize: 3 })",
1847
+ inputSchema: {
1848
+ type: "object",
1849
+ properties: {
1850
+ minClusterSize: {
1851
+ type: "integer",
1852
+ minimum: 2,
1853
+ maximum: 50,
1854
+ description:
1855
+ "Min homeless tasks sharing a theme to report as a gap. Default 3. A cluster smaller than this is noise, not a missing bet.",
1856
+ },
1857
+ fitThreshold: {
1858
+ type: "number",
1859
+ minimum: 0,
1860
+ maximum: 1,
1861
+ description:
1862
+ "A task is 'homeless' when its best Jaccard fit against any active capability is below this. Default 0.2 (the 'medium' bar). Raise to be stricter about what counts as already-covered.",
1863
+ },
1864
+ includeArchived: { type: "boolean" },
1865
+ workspaceId: { type: "string" },
1866
+ },
1867
+ additionalProperties: false,
1868
+ },
1869
+ },
1341
1870
  ];
1342
1871
 
1343
1872
  /**
@@ -1382,7 +1911,7 @@ function archiveLifecycleTools() {
1382
1911
  "PREREQUISITE: get_agents_md once this session (enforced). For capabilities/themes, every active child must already be archived — the server refuses with a count of blocking children. For tasks, no child check.\n" +
1383
1912
  `ANTI-PATTERN: do not archive a ${kind} you might come back to within the same session — prefer moving it (move_${kind === "theme" ? "capability" : kind}) or updating its status. Archive is the right tool for "this is closed out, get it out of the picker."\n` +
1384
1913
  `EXAMPLE: ${example}\n\n` +
1385
- "Idempotent: re-archiving an already-archived entity returns { idempotent: true } and emits no audit row. Requires SUPABASE_SERVICE_ROLE_KEY. Pass workspaceId to target a workspace other than the env default.",
1914
+ "Idempotent: re-archiving an already-archived entity returns { idempotent: true } and emits no audit row. Requires write auth (set ROADMAPPER_API_KEY). Pass workspaceId to target a workspace other than the env default.",
1386
1915
  inputSchema: {
1387
1916
  type: "object",
1388
1917
  properties: {
@@ -1408,7 +1937,7 @@ function archiveLifecycleTools() {
1408
1937
  "PREREQUISITE: get_agents_md once this session (enforced). The parent (if any) must be active — cannot unarchive a task whose capability is archived, or a capability whose theme is archived. Unarchive the parent first.\n" +
1409
1938
  "ANTI-PATTERN: do not unarchive en masse without thinking — every unarchive re-floats noise into list views. If you're recovering from an over-aggressive archive sweep, work top-down.\n" +
1410
1939
  `EXAMPLE: un${example.replace("archive", "archive")}\n\n` +
1411
- "Idempotent: unarchiving an already-active entity returns { idempotent: true }. Requires SUPABASE_SERVICE_ROLE_KEY.",
1940
+ "Idempotent: unarchiving an already-active entity returns { idempotent: true }. Requires write auth (set ROADMAPPER_API_KEY).",
1412
1941
  inputSchema: {
1413
1942
  type: "object",
1414
1943
  properties: {
@@ -1468,7 +1997,7 @@ function moveLifecycleTools() {
1468
1997
  "PREREQUISITE: get_agents_md once this session (enforced). Target parent must exist AND be active — refuses move into an archived parent.\n" +
1469
1998
  `ANTI-PATTERN: do not use move to change anything other than the parent. To rename or rescope, use update_${kind} (coming soon). To delete, use archive_${kind}.\n` +
1470
1999
  `EXAMPLE: ${example}\n\n` +
1471
- "Idempotent: moving to the current parent returns { idempotent: true } and emits no audit row. Requires SUPABASE_SERVICE_ROLE_KEY.",
2000
+ "Idempotent: moving to the current parent returns { idempotent: true } and emits no audit row. Requires write auth (set ROADMAPPER_API_KEY).",
1472
2001
  inputSchema: {
1473
2002
  type: "object",
1474
2003
  properties: {
@@ -1589,7 +2118,7 @@ function updateLifecycleTools() {
1589
2118
  team: { type: "string" },
1590
2119
  confidence: { type: "number", description: "0–95." },
1591
2120
  impact: { type: "number", description: "One of 0.25, 0.5, 1, 2, 3." },
1592
- roi: { type: "number" },
2121
+ roi: { type: "number", description: "Estimated annual ROI in raw dollars (e.g. 2500000 = $2.5M)." },
1593
2122
  tags: { type: "array", items: { type: "string" } },
1594
2123
  links: { type: "object", additionalProperties: { type: "string" } },
1595
2124
  },
@@ -1604,7 +2133,7 @@ function updateLifecycleTools() {
1604
2133
  name: { type: "string", description: "Theme name. Minimum 5 chars." },
1605
2134
  description: { type: "string" },
1606
2135
  owner: { type: "string" },
1607
- targetRoi: { type: "number" },
2136
+ targetRoi: { type: "number", description: "Annual ROI target in raw dollars (e.g. 20000000 = $20M)." },
1608
2137
  },
1609
2138
  example:
1610
2139
  "update_theme({ themeId: 'TH-100042', patch: { name: 'Platform Reliability' }, reason: 'sharper name; same scope' })",
@@ -1627,7 +2156,7 @@ function updateLifecycleTools() {
1627
2156
  `PREREQUISITE: get_agents_md once this session (enforced). Reason required (audit trail). ${reparentHint}\n` +
1628
2157
  `ANTI-PATTERN: do not echo the entity back to the server — pass only the keys that changed. The server diffs against current state and a patch that matches everything returns { idempotent: true }.\n` +
1629
2158
  `EXAMPLE: ${example}\n\n` +
1630
- "Idempotent: a patch where every key already matches current state returns { idempotent: true } and emits no audit row. Requires SUPABASE_SERVICE_ROLE_KEY.",
2159
+ "Idempotent: a patch where every key already matches current state returns { idempotent: true } and emits no audit row. Requires write auth (set ROADMAPPER_API_KEY).",
1631
2160
  inputSchema: {
1632
2161
  type: "object",
1633
2162
  properties: {
@@ -1682,6 +2211,36 @@ async function callTool(name, args) {
1682
2211
  // read. Tools that need to know the resolved id later (write paths,
1683
2212
  // snapshot) read it back via resolveWorkspaceId(args?.workspaceId).
1684
2213
  const wsId = resolveWorkspaceId(args?.workspaceId);
2214
+
2215
+ // get_active_workspace answers "where am I pointed" without touching
2216
+ // the DB — return before the projection read below. Cheap by design:
2217
+ // agents should be able to spam it to confirm orientation.
2218
+ if (name === "get_active_workspace") {
2219
+ const { id, source } = resolveWorkspaceWithSource(args?.workspaceId);
2220
+ const { url } = supabaseConfig();
2221
+ let note;
2222
+ if (source === "env") {
2223
+ note =
2224
+ "Resolved from the MCP install's env default — NOT from the current directory. If you meant a specific repo's workspace, launch from that checkout (connected repos carry .roadmapper/snapshot.json) or pass workspaceId explicitly.";
2225
+ } else if (source === "none") {
2226
+ note =
2227
+ "No workspace resolved. Set ROADMAPPER_WORKSPACE_ID in env, run from a connected repo checkout, or pass workspaceId on the call.";
2228
+ }
2229
+ return textResult(
2230
+ JSON.stringify(
2231
+ {
2232
+ workspaceId: id,
2233
+ resolvedFrom: source, // "arg" | "snapshot" | "env" | "none"
2234
+ writeMode: writeMode(), // "broker" | "operator" | "read-only"
2235
+ backendConfigured: Boolean(url),
2236
+ ...(note ? { note } : {}),
2237
+ },
2238
+ null,
2239
+ 2
2240
+ )
2241
+ );
2242
+ }
2243
+
1685
2244
  // Post-Piece-6c, the entity tables ARE the canonical projection
1686
2245
  // — no edits blob, no seed-overlay merge. Fall back to the
1687
2246
  // bundled seed only when the DB is unreachable (offline / dev).
@@ -1767,28 +2326,62 @@ async function callTool(name, args) {
1767
2326
  "Rank existing capabilities by relevance before proposing a new one. If any score is >0.4, attach tasks there instead."
1768
2327
  );
1769
2328
  }
1770
- // Cross-workspace guard. If the cwd has a .roadmapper/snapshot.json
1771
- // naming a workspace, and the call carries an explicit workspaceId
1772
- // pointing somewhere else, refusealmost always a mistake. An
1773
- // operator who really needs to write across workspaces can set
1774
- // ROADMAPPER_ALLOW_CROSS_WORKSPACE=1 in env to bypass.
1775
- const snap = snapshotWorkspaceId();
2329
+ // Cross-workspace guard. If the LOCAL context unambiguously names a
2330
+ // workspace either the repo the agent is in (roots → repo_workspace_map)
2331
+ // or the cwd's .roadmapper/snapshot.jsonand the call carries an
2332
+ // explicit workspaceId pointing somewhere else, refuse. Almost always
2333
+ // a mistake. The repo signal beats the snapshot (it's the more specific
2334
+ // "where am I right now"). An operator who really needs to write across
2335
+ // workspaces can set ROADMAPPER_ALLOW_CROSS_WORKSPACE=1 to bypass.
2336
+ // dryRun is non-destructive validation — let it through both the
2337
+ // cross-workspace and seed-workspace guards.
2338
+ const isDryRun = args?.dryRun === true;
2339
+ const localWs = rootWorkspaceId() ?? snapshotWorkspaceId();
2340
+ const localSource = rootWorkspaceId()
2341
+ ? `the repo you're in (${_rootWorkspaceRepo})`
2342
+ : "the cwd's .roadmapper/snapshot.json";
1776
2343
  const argWs = args?.workspaceId;
1777
2344
  if (
1778
- snap &&
2345
+ localWs &&
1779
2346
  typeof argWs === "string" &&
1780
2347
  argWs.length > 0 &&
1781
- argWs !== snap &&
2348
+ argWs !== localWs &&
2349
+ !isDryRun &&
1782
2350
  process.env.ROADMAPPER_ALLOW_CROSS_WORKSPACE !== "1"
1783
2351
  ) {
1784
2352
  session.mutatorBlocks += 1;
1785
2353
  recordTelemetry(
1786
2354
  "mutator_blocked_cross_workspace",
1787
- { tool: name, targetId, cwdWorkspace: snap, argWorkspace: argWs },
2355
+ { tool: name, targetId, localWorkspace: localWs, argWorkspace: argWs },
1788
2356
  wsId
1789
2357
  );
1790
2358
  return errorResult(
1791
- `Refusing cross-workspace write: cwd's .roadmapper/snapshot.json names workspace "${snap}" but ${name} call targets "${argWs}". Almost always a mistake — drop the workspaceId arg to use the cwd default, or set ROADMAPPER_ALLOW_CROSS_WORKSPACE=1 to override.`
2359
+ `Refusing cross-workspace write: ${localSource} names workspace "${localWs}" but ${name} call targets "${argWs}". Almost always a mistake — drop the workspaceId arg to use the local default, or set ROADMAPPER_ALLOW_CROSS_WORKSPACE=1 to override.`
2360
+ );
2361
+ }
2362
+ // Seed-workspace guard. The cross-workspace guard above only fires
2363
+ // when a snapshot exists to disagree with. The other half of the
2364
+ // wrong-workspace footgun is launching OUTSIDE any configured
2365
+ // checkout: no arg, no snapshot, so wsId falls through to the env
2366
+ // default — and on an unconfigured install that default is the
2367
+ // bundled seed/demo workspace. Writing real planning data there is
2368
+ // almost never intended. Refuse, unless the caller named "default"
2369
+ // explicitly (source "arg") or pointed env/snapshot at it
2370
+ // deliberately (source "snapshot").
2371
+ const { source: wsSource } = resolveWorkspaceWithSource(args?.workspaceId);
2372
+ if (
2373
+ wsId === SEED_WORKSPACE_ID &&
2374
+ wsSource === "env" &&
2375
+ !isDryRun &&
2376
+ // Parity with the cross-workspace guard: operators whose real
2377
+ // workspace is genuinely named "default" (or who otherwise mean
2378
+ // it) can opt out.
2379
+ process.env.ROADMAPPER_ALLOW_SEED_WORKSPACE !== "1"
2380
+ ) {
2381
+ session.mutatorBlocks += 1;
2382
+ recordTelemetry("mutator_blocked_seed_workspace", { tool: name, targetId }, wsId);
2383
+ return errorResult(
2384
+ `Refusing to write to the seed/demo workspace "${SEED_WORKSPACE_ID}": it was resolved from the env default, the call carries no workspaceId, and there's no .roadmapper/snapshot.json in the cwd — so the agent was likely launched outside a configured repo checkout and is about to pollute the placeholder data a fresh install ships with. Run get_active_workspace to confirm where you're pointed. To proceed deliberately, pass workspaceId explicitly, set ROADMAPPER_WORKSPACE_ID to the workspace you mean, or set ROADMAPPER_ALLOW_SEED_WORKSPACE=1.`
1792
2385
  );
1793
2386
  }
1794
2387
  recordTelemetry("mutator_attempted", { tool: name, targetId }, wsId);
@@ -1826,7 +2419,9 @@ async function callTool(name, args) {
1826
2419
  return withReminder(
1827
2420
  "list_capabilities",
1828
2421
  projected,
1829
- textResult(JSON.stringify(filtered, null, 2))
2422
+ compactResult(
2423
+ paginateRows(filtered, args, capabilityLight, projected.tasks)
2424
+ )
1830
2425
  );
1831
2426
  }
1832
2427
  case "list_tasks": {
@@ -1841,7 +2436,25 @@ async function callTool(name, args) {
1841
2436
  return withReminder(
1842
2437
  "list_tasks",
1843
2438
  projected,
1844
- textResult(JSON.stringify(filtered, null, 2))
2439
+ compactResult(paginateRows(filtered, args, taskLight))
2440
+ );
2441
+ }
2442
+ case "list_uncategorized_tasks": {
2443
+ // capabilityId == null catches both an explicit null and a
2444
+ // stripped-undefined key (unparented PRs auto-created by the
2445
+ // webhook with no Roadmapper-Capability trailer + no Jaccard
2446
+ // hit). A task may still carry a pillarId (direct theme
2447
+ // parenting); we only key on the capability link here.
2448
+ let filtered = projected.tasks.filter((t) => t.capabilityId == null);
2449
+ if (args?.status)
2450
+ filtered = filtered.filter((t) => t.status === args.status);
2451
+ if (!args?.includeArchived) {
2452
+ filtered = filtered.filter((t) => !t.archived);
2453
+ }
2454
+ return withReminder(
2455
+ "list_uncategorized_tasks",
2456
+ projected,
2457
+ compactResult(paginateRows(filtered, args, taskLight))
1845
2458
  );
1846
2459
  }
1847
2460
  case "get_task": {
@@ -1875,10 +2488,17 @@ async function callTool(name, args) {
1875
2488
  const ts = Date.now();
1876
2489
  session.themesListedAt = ts;
1877
2490
  session.capsDiscoveredAt = ts;
2491
+ const { source } = resolveWorkspaceWithSource(args?.workspaceId);
1878
2492
  return withReminder(
1879
2493
  "get_roadmap_snapshot",
1880
2494
  projected,
1881
- getRoadmapSnapshot(projected, wsId, args?.includeArchived === true)
2495
+ getRoadmapSnapshot(
2496
+ projected,
2497
+ wsId,
2498
+ args?.includeArchived === true,
2499
+ source,
2500
+ args?.detail === true
2501
+ )
1882
2502
  );
1883
2503
  }
1884
2504
  case "propose_task":
@@ -1902,7 +2522,14 @@ async function callTool(name, args) {
1902
2522
  session.themesListedAt = Date.now();
1903
2523
  return suggestThemeFor(args, projected);
1904
2524
  case "link_pr":
1905
- return linkPR(args, projected, seed, wsId);
2525
+ // Pass null as the seed reference. linkPR's only use of seed
2526
+ // is to look up legacy seed-bundled PR entries on a task; in
2527
+ // the live path the canonical PRs live on the projected task,
2528
+ // so seed is never the source of truth. Passing the previously-
2529
+ // undefined identifier `seed` raised "seed is not defined" at
2530
+ // runtime for every link_pr call — caught by the live test
2531
+ // drive that exposed bug #5.
2532
+ return linkPR(args, projected, null, wsId);
1906
2533
  case "archive_task":
1907
2534
  return archiveLifecycle("task", "archive", args, wsId);
1908
2535
  case "archive_capability":
@@ -1933,6 +2560,12 @@ async function callTool(name, args) {
1933
2560
  return recordOutcomeReading(args, wsId, projected);
1934
2561
  case "list_stale_outcomes":
1935
2562
  return listStaleOutcomes(args, projected);
2563
+ case "detect_capability_gaps":
2564
+ // Enumerates every active capability to score fit, so the agent
2565
+ // has effectively discovered the catalogue — satisfies the
2566
+ // propose_capability gate (the natural next step on a gap).
2567
+ session.capsDiscoveredAt = Date.now();
2568
+ return detectCapabilityGaps(args, projected);
1936
2569
  default:
1937
2570
  return errorResult(`Unknown tool: ${name}`);
1938
2571
  }
@@ -1969,6 +2602,61 @@ async function proposeTask(args, projected, wsId) {
1969
2602
  )
1970
2603
  return errorResult(`expectedScope must be a positive number, got ${args.expectedScope}.`);
1971
2604
 
2605
+ // Warn-on-skip (not block): if the agent never surveyed capabilities
2606
+ // this session, it may have picked the wrong parent. Rather than a
2607
+ // hard gate (which would false-positive on legit known-capability
2608
+ // filing and just get worked around), we compute a fit check and
2609
+ // attach an actionable _meta warning to the response. propose_task
2610
+ // stays allow; propose_capability keeps its hard discovery gate.
2611
+ //
2612
+ // Useful signal: score the task text against the CHOSEN capability
2613
+ // and against the best available one. If a different capability
2614
+ // scores materially higher, surface it — that's the likely-misfiled
2615
+ // case, the exact thing discovery would have caught.
2616
+ function buildSkipWarning() {
2617
+ if (session.capsDiscoveredAt !== null) return null; // discovery happened
2618
+ const taskToks = tokenize(
2619
+ [args.title ?? "", args.summary ?? ""].join(" ")
2620
+ );
2621
+ if (taskToks.size === 0) return null;
2622
+ const themeById = new Map(
2623
+ (projected.themes ?? []).map((t) => [t.id, t])
2624
+ );
2625
+ const hayFor = (c) => {
2626
+ const theme = themeById.get(c.pillarId);
2627
+ const titles = (projected.tasks ?? [])
2628
+ .filter((t) => t.capabilityId === c.id)
2629
+ .map((t) => t.title)
2630
+ .join(" ");
2631
+ return tokenize(
2632
+ [c.name, c.description ?? "", c.outcome ?? "", theme?.name ?? "", titles].join(" ")
2633
+ );
2634
+ };
2635
+ const chosenScore = jaccardScore(taskToks, hayFor(cap));
2636
+ // Best OTHER active, non-delivered capability.
2637
+ let best = null;
2638
+ for (const c of projected.capabilities) {
2639
+ if (c.id === cap.id || c.archived) continue;
2640
+ if (effectiveCapabilityStatus(c, projected.tasks) === "delivered") continue;
2641
+ const s = jaccardScore(taskToks, hayFor(c));
2642
+ if (!best || s > best.score) best = { id: c.id, name: c.name, score: s };
2643
+ }
2644
+ const base =
2645
+ "Heads up: you filed this task without calling suggest_capability_for / list_capabilities / get_roadmap_snapshot this session, so you may not have surveyed existing capabilities. ";
2646
+ // Only escalate to a concrete suggestion when another cap clearly
2647
+ // fits better than the chosen one — otherwise just a gentle note.
2648
+ if (best && best.score > 0.2 && best.score > chosenScore + 0.1) {
2649
+ return (
2650
+ base +
2651
+ `The task text fits ${best.id} (${best.name}) noticeably better (score ${best.score.toFixed(2)}) than the chosen ${cap.id} (${chosenScore.toFixed(2)}). If that's the right home, move_task it there.`
2652
+ );
2653
+ }
2654
+ return (
2655
+ base +
2656
+ "If you're confident in the parent, ignore this; otherwise call suggest_capability_for({ taskId }) to confirm."
2657
+ );
2658
+ }
2659
+
1972
2660
  const effort = args.effort;
1973
2661
  const start = todayISO();
1974
2662
  // Target dates are day-resolution; round up so sub-day estimates
@@ -2005,6 +2693,11 @@ async function proposeTask(args, projected, wsId) {
2005
2693
  ...(args.expectedScope !== undefined ? { expectedScope: args.expectedScope } : {}),
2006
2694
  };
2007
2695
 
2696
+ const skipWarning = buildSkipWarning();
2697
+ const skipMeta = skipWarning
2698
+ ? { _meta: { roadmapper: { reminder: skipWarning } } }
2699
+ : undefined;
2700
+
2008
2701
  if (args.dryRun) {
2009
2702
  return textResult(
2010
2703
  JSON.stringify(
@@ -2012,12 +2705,13 @@ async function proposeTask(args, projected, wsId) {
2012
2705
  ok: true,
2013
2706
  dryRun: true,
2014
2707
  wouldCreate: task,
2015
- warnings: [],
2708
+ warnings: skipWarning ? [skipWarning] : [],
2016
2709
  message: `Would create task ${id} under ${cap.id} (${cap.name}). No record written.`,
2017
2710
  },
2018
2711
  null,
2019
2712
  2
2020
- )
2713
+ ),
2714
+ skipMeta
2021
2715
  );
2022
2716
  }
2023
2717
 
@@ -2048,13 +2742,15 @@ async function proposeTask(args, projected, wsId) {
2048
2742
  id: stored.id,
2049
2743
  capabilityId: stored.capabilityId,
2050
2744
  idempotent,
2745
+ ...(skipWarning ? { warnings: [skipWarning] } : {}),
2051
2746
  message: idempotent
2052
2747
  ? `Task ${stored.id} already exists with idempotencyKey ${args.idempotencyKey}; returning existing task instead of creating a duplicate.`
2053
2748
  : `Created ${stored.id} under ${cap.id} (${cap.name}). status=planned, authorKind=agent.`,
2054
2749
  },
2055
2750
  null,
2056
2751
  2
2057
- )
2752
+ ),
2753
+ skipMeta
2058
2754
  );
2059
2755
  }
2060
2756
 
@@ -2226,7 +2922,13 @@ async function proposeCapability(args, projected, wsId) {
2226
2922
  * passes that id back on `propose_task` / `propose_capability` /
2227
2923
  * `propose_theme` calls.
2228
2924
  */
2229
- function getRoadmapSnapshot(projected, wsId, includeArchived = false) {
2925
+ function getRoadmapSnapshot(
2926
+ projected,
2927
+ wsId,
2928
+ includeArchived = false,
2929
+ source,
2930
+ detail = false
2931
+ ) {
2230
2932
  // Archived entities are filtered out by default — the snapshot
2231
2933
  // is meant to surface what an agent should plan against, and
2232
2934
  // archived rows are by definition not in scope. Pass
@@ -2243,36 +2945,72 @@ function getRoadmapSnapshot(projected, wsId, includeArchived = false) {
2243
2945
  if (!includeArchived && t.archived) return false;
2244
2946
  return t.status === "in_progress" || t.status === "planned";
2245
2947
  });
2246
- return textResult(
2247
- JSON.stringify(
2248
- {
2249
- workspaceId: wsId,
2250
- generatedAt: new Date().toISOString(),
2251
- themes,
2252
- capabilities: activeCapabilities,
2253
- tasks: inFlightTasks,
2254
- counts: {
2255
- themes: themes.length,
2256
- activeCapabilities: activeCapabilities.length,
2257
- inFlightTasks: inFlightTasks.length,
2258
- totalCapabilities: projected.capabilities.length,
2259
- totalTasks: projected.tasks.length,
2260
- },
2261
- },
2262
- null,
2263
- 2
2264
- )
2265
- );
2948
+
2949
+ // Light by default — this is the cold-start orient call, so it must
2950
+ // never blow the token budget on a large workspace (the 670-task
2951
+ // workspace produced an 81KB full-detail response). detail:true
2952
+ // restores full rows. Even light, we cap the task list: counts
2953
+ // below carries the true totals, and an agent that needs every row
2954
+ // should use list_tasks with a filter, not the snapshot.
2955
+ const capItems = detail
2956
+ ? activeCapabilities
2957
+ : activeCapabilities.map((c) => capabilityLight(c, projected.tasks));
2958
+ const taskCap = detail ? LIST_MAX_LIMIT : LIST_DEFAULT_LIMIT;
2959
+ const taskSlice = inFlightTasks.slice(0, taskCap);
2960
+ const taskItems = detail ? taskSlice : taskSlice.map(taskLight);
2961
+
2962
+ return compactResult({
2963
+ workspaceId: wsId,
2964
+ // How wsId was resolved (arg / snapshot / env / none). Lets the
2965
+ // agent catch a silent env-default fall-through before planning
2966
+ // against the wrong (often seed) workspace. Omitted when unknown.
2967
+ ...(source ? { resolvedFrom: source } : {}),
2968
+ generatedAt: new Date().toISOString(),
2969
+ mode: detail ? "detail" : "summary",
2970
+ themes,
2971
+ capabilities: capItems,
2972
+ tasks: taskItems,
2973
+ tasksTruncated: inFlightTasks.length > taskSlice.length,
2974
+ counts: {
2975
+ themes: themes.length,
2976
+ activeCapabilities: activeCapabilities.length,
2977
+ inFlightTasks: inFlightTasks.length,
2978
+ totalCapabilities: projected.capabilities.length,
2979
+ totalTasks: projected.tasks.length,
2980
+ },
2981
+ });
2266
2982
  }
2267
2983
 
2268
2984
  function suggestCapabilityFor(args, projected) {
2269
- const desc = (args.description || "").trim();
2270
- if (!desc) return errorResult("description is required.");
2271
- const limit = Math.min(25, Math.max(1, args.limit ?? 5));
2272
-
2273
- // Skip delivered capabilities — they're closed bets. A new PR
2274
- // mapping to a delivered cap would either be wrong (work for a
2275
- // different bet) or reopen-the-bet (which the user should do
2985
+ // Two query sources: free-text description, or an existing task
2986
+ // (title + summary). taskId is the triage path — rank a home for
2987
+ // an orphan surfaced by list_uncategorized_tasks. Exactly one.
2988
+ if (args.taskId && args.description) {
2989
+ return errorResult(
2990
+ "Pass exactly one of description / taskId, not both."
2991
+ );
2992
+ }
2993
+ let desc;
2994
+ let sourceTaskId;
2995
+ if (args.taskId) {
2996
+ const task = projected.tasks.find((t) => t.id === args.taskId);
2997
+ if (!task) return errorResult(`Task ${args.taskId} not found.`);
2998
+ sourceTaskId = task.id;
2999
+ desc = [task.title ?? "", task.summary ?? ""].join(" ").trim();
3000
+ if (!desc) {
3001
+ return errorResult(
3002
+ `Task ${args.taskId} has no title or summary to match on.`
3003
+ );
3004
+ }
3005
+ } else {
3006
+ desc = (args.description || "").trim();
3007
+ if (!desc) return errorResult("description or taskId is required.");
3008
+ }
3009
+ const limit = Math.min(25, Math.max(1, args.limit ?? 5));
3010
+
3011
+ // Skip delivered capabilities — they're closed bets. A new PR
3012
+ // mapping to a delivered cap would either be wrong (work for a
3013
+ // different bet) or reopen-the-bet (which the user should do
2276
3014
  // explicitly, not as a side effect of agent triage).
2277
3015
  const activeCaps = projected.capabilities.filter(
2278
3016
  (c) => effectiveCapabilityStatus(c, projected.tasks) !== "delivered"
@@ -2340,6 +3078,7 @@ function suggestCapabilityFor(args, projected) {
2340
3078
  {
2341
3079
  ok: true,
2342
3080
  query: desc,
3081
+ ...(sourceTaskId ? { taskId: sourceTaskId } : {}),
2343
3082
  matches: ranked,
2344
3083
  hint:
2345
3084
  ranked.length === 0
@@ -2924,6 +3663,162 @@ function listStaleOutcomes(args, projected) {
2924
3663
  );
2925
3664
  }
2926
3665
 
3666
+ /**
3667
+ * Find clusters of uncategorized tasks that no existing capability
3668
+ * covers — the "a bet is missing" signal. Two-stage:
3669
+ * 1. Homeless filter: a task is homeless when its BEST Jaccard fit
3670
+ * against any active (non-delivered) capability is below
3671
+ * fitThreshold. Tasks that fit an existing cap aren't gaps —
3672
+ * they're just orphans to file (suggest_capability_for +
3673
+ * move_task), so they're excluded here.
3674
+ * 2. Greedy clustering: seed a cluster from the first ungrouped
3675
+ * homeless task, then pull in any other homeless task whose
3676
+ * tokens overlap the cluster's accumulated tokens at >=
3677
+ * fitThreshold. Repeat until everything is grouped. Clusters
3678
+ * below minClusterSize are dropped as noise.
3679
+ *
3680
+ * Read-only, deterministic (no Date/random), and order-stable so a
3681
+ * resumed/cached run reproduces. Returns a suggested capability name
3682
+ * per cluster (its top shared keywords) — a HINT for propose_capability,
3683
+ * not an auto-create.
3684
+ */
3685
+ function detectCapabilityGaps(args, projected) {
3686
+ // Guard against non-numeric input (a non-compliant client, or an
3687
+ // explicit null) — Math.floor(NaN) would propagate NaN through the
3688
+ // clamp and make `members.length >= NaN` always false, silently
3689
+ // returning zero gaps. Fall back to the default unless we got a
3690
+ // finite number, mirroring the fitThreshold guard below.
3691
+ const minClusterSize =
3692
+ typeof args?.minClusterSize === "number" &&
3693
+ Number.isFinite(args.minClusterSize)
3694
+ ? Math.min(50, Math.max(2, Math.floor(args.minClusterSize)))
3695
+ : 3;
3696
+ const fitThreshold =
3697
+ typeof args?.fitThreshold === "number" &&
3698
+ Number.isFinite(args.fitThreshold)
3699
+ ? Math.min(1, Math.max(0, args.fitThreshold))
3700
+ : 0.2;
3701
+ const includeArchived = args?.includeArchived === true;
3702
+
3703
+ // Active capabilities = candidate homes. Build each one's haystack
3704
+ // once (same vocabulary blend suggest_capability_for uses).
3705
+ const themeById = new Map((projected.themes ?? []).map((t) => [t.id, t]));
3706
+ const activeCaps = projected.capabilities.filter(
3707
+ (c) =>
3708
+ (includeArchived || !c.archived) &&
3709
+ effectiveCapabilityStatus(c, projected.tasks) !== "delivered"
3710
+ );
3711
+ const capHaystacks = activeCaps.map((c) => {
3712
+ const theme = themeById.get(c.pillarId);
3713
+ const taskTitles = (projected.tasks ?? [])
3714
+ .filter((t) => t.capabilityId === c.id)
3715
+ .map((t) => t.title)
3716
+ .join(" ");
3717
+ return tokenize(
3718
+ [
3719
+ c.name,
3720
+ c.description ?? "",
3721
+ c.outcome ?? "",
3722
+ theme?.name ?? "",
3723
+ theme?.description ?? "",
3724
+ taskTitles,
3725
+ ].join(" ")
3726
+ );
3727
+ });
3728
+
3729
+ // Stage 1 — homeless uncategorized tasks (best cap fit < threshold).
3730
+ const uncategorized = projected.tasks.filter(
3731
+ (t) => t.capabilityId == null && (includeArchived || !t.archived)
3732
+ );
3733
+ const homeless = [];
3734
+ for (const t of uncategorized) {
3735
+ const toks = tokenize([t.title ?? "", t.summary ?? ""].join(" "));
3736
+ if (toks.size === 0) continue; // nothing to match on
3737
+ let best = 0;
3738
+ for (const hay of capHaystacks) {
3739
+ const s = jaccardScore(toks, hay);
3740
+ if (s > best) best = s;
3741
+ }
3742
+ if (best < fitThreshold) homeless.push({ task: t, toks, bestFit: best });
3743
+ }
3744
+
3745
+ // Stage 2 — greedy clustering by shared vocabulary. Deterministic:
3746
+ // iterate in array order, never random.
3747
+ const used = new Set();
3748
+ const clusters = [];
3749
+ for (let i = 0; i < homeless.length; i++) {
3750
+ if (used.has(i)) continue;
3751
+ used.add(i);
3752
+ const members = [homeless[i]];
3753
+ const clusterToks = new Set(homeless[i].toks);
3754
+ for (let j = i + 1; j < homeless.length; j++) {
3755
+ if (used.has(j)) continue;
3756
+ if (jaccardScore(homeless[j].toks, clusterToks) >= fitThreshold) {
3757
+ used.add(j);
3758
+ members.push(homeless[j]);
3759
+ for (const tk of homeless[j].toks) clusterToks.add(tk);
3760
+ }
3761
+ }
3762
+ if (members.length >= minClusterSize) clusters.push({ members, clusterToks });
3763
+ }
3764
+
3765
+ // Shape the output: shared keywords (most common tokens across the
3766
+ // cluster's members), a suggested name, and member task ids/titles.
3767
+ const shaped = clusters
3768
+ .map(({ members, clusterToks }) => {
3769
+ // Rank tokens by how many members contain them — the shared
3770
+ // vocabulary is what names the bet.
3771
+ const freq = new Map();
3772
+ for (const m of members)
3773
+ for (const tk of m.toks) freq.set(tk, (freq.get(tk) ?? 0) + 1);
3774
+ const keywords = [...freq.entries()]
3775
+ .filter(([, n]) => n >= 2) // shared by at least two members
3776
+ .sort((a, b) => b[1] - a[1])
3777
+ .slice(0, 6)
3778
+ .map(([tk]) => tk);
3779
+ return {
3780
+ size: members.length,
3781
+ keywords,
3782
+ suggestedCapabilityName:
3783
+ keywords.length > 0
3784
+ ? keywords.slice(0, 4).join(" ")
3785
+ : "(no shared keywords)",
3786
+ tasks: members.map((m) => ({
3787
+ id: m.task.id,
3788
+ title: m.task.title,
3789
+ bestExistingFit: Number(m.bestFit.toFixed(3)),
3790
+ })),
3791
+ };
3792
+ })
3793
+ .sort((a, b) => b.size - a.size);
3794
+
3795
+ const meta =
3796
+ shaped.length > 0
3797
+ ? {
3798
+ _meta: {
3799
+ roadmapper: {
3800
+ reminder:
3801
+ `${shaped.length} capability gap(s) detected — clusters of uncategorized work no existing bet covers. ` +
3802
+ "Each is a CANDIDATE for propose_capability (confirm with the user — capabilities are quarterly bets, not auto-created), then move_tasks the members under it.",
3803
+ },
3804
+ },
3805
+ }
3806
+ : undefined;
3807
+
3808
+ // Compact JSON + optional _meta nudge (textResult spreads `extra`).
3809
+ return textResult(
3810
+ JSON.stringify({
3811
+ uncategorizedScanned: uncategorized.length,
3812
+ homelessCount: homeless.length,
3813
+ minClusterSize,
3814
+ fitThreshold,
3815
+ gapCount: shaped.length,
3816
+ gaps: shaped,
3817
+ }),
3818
+ meta
3819
+ );
3820
+ }
3821
+
2927
3822
  async function submitAcceptanceGrades(args, projected, wsId) {
2928
3823
  const task = projected.tasks.find((t) => t.id === args.taskId);
2929
3824
  if (!task) return errorResult(`Task ${args.taskId} not found.`);
@@ -3091,15 +3986,15 @@ async function readResource(uri) {
3091
3986
  // the model," not "this specific call shape ran."
3092
3987
  if (session.rubricFetchedAt === null) {
3093
3988
  session.rubricFetchedAt = Date.now();
3094
- // Pass the cwd snapshot's workspace id so the row is
3095
- // visible in Settings → MCP activity. Without this the
3096
- // resource-route fetch lands with workspace_id=NULL and
3097
- // gets filtered out for non-operator viewers (per migration
3098
- // 0038's NULL-workspace lock).
3989
+ // Pass the resolved workspace id (repo → snapshot → env) so the
3990
+ // row is visible in Settings → MCP activity under the right
3991
+ // workspace. Without this the resource-route fetch lands with
3992
+ // workspace_id=NULL and gets filtered out for non-operator
3993
+ // viewers (per migration 0038's NULL-workspace lock).
3099
3994
  recordTelemetry(
3100
3995
  "rubric_fetched",
3101
3996
  { via: "resource" },
3102
- snapshotWorkspaceId() ?? undefined
3997
+ resolveWorkspaceId() ?? undefined
3103
3998
  );
3104
3999
  }
3105
4000
  return {
@@ -3128,12 +4023,22 @@ async function readResource(uri) {
3128
4023
  const active = projected.capabilities.filter(
3129
4024
  (c) => effectiveCapabilityStatus(c, projected.tasks) !== "delivered"
3130
4025
  );
4026
+ // Resources auto-fire on client connect with NO args and no model
4027
+ // gate — so they MUST be bounded unconditionally. Light rows +
4028
+ // cap, with a total/truncated envelope. An agent that needs full
4029
+ // detail uses list_capabilities({ detail:true }).
4030
+ const capped = active.slice(0, LIST_MAX_LIMIT);
3131
4031
  return {
3132
4032
  contents: [
3133
4033
  {
3134
4034
  uri,
3135
4035
  mimeType: "application/json",
3136
- text: JSON.stringify(active, null, 2),
4036
+ text: JSON.stringify({
4037
+ total: active.length,
4038
+ returned: capped.length,
4039
+ truncated: active.length > capped.length,
4040
+ items: capped.map((c) => capabilityLight(c, projected.tasks)),
4041
+ }),
3137
4042
  },
3138
4043
  ],
3139
4044
  };
@@ -3142,12 +4047,20 @@ async function readResource(uri) {
3142
4047
  const open = projected.tasks.filter(
3143
4048
  (t) => t.status === "in_progress" || t.status === "planned"
3144
4049
  );
4050
+ // Same rationale as the capabilities resource — bounded + light,
4051
+ // because this fires on connect without anyone asking.
4052
+ const capped = open.slice(0, LIST_MAX_LIMIT);
3145
4053
  return {
3146
4054
  contents: [
3147
4055
  {
3148
4056
  uri,
3149
4057
  mimeType: "application/json",
3150
- text: JSON.stringify(open, null, 2),
4058
+ text: JSON.stringify({
4059
+ total: open.length,
4060
+ returned: capped.length,
4061
+ truncated: open.length > capped.length,
4062
+ items: capped.map(taskLight),
4063
+ }),
3151
4064
  },
3152
4065
  ],
3153
4066
  };
@@ -3238,6 +4151,11 @@ async function handle(request) {
3238
4151
  const { id, method, params } = request;
3239
4152
  try {
3240
4153
  if (method === "initialize") {
4154
+ // If the client declares roots support, request the root list
4155
+ // right after we respond (can't send mid-handler — the client
4156
+ // isn't listening until it gets our initialize result). The
4157
+ // main loop fires requestClientRoots() once this returns.
4158
+ _clientSupportsRoots = !!params?.capabilities?.roots;
3241
4159
  // Snapshot counts so an MCP client showing server info
3242
4160
  // surfaces actual roadmap shape, not just "connected".
3243
4161
  const projected =
@@ -3272,14 +4190,34 @@ async function handle(request) {
3272
4190
  name: SERVER_NAME,
3273
4191
  version: SERVER_VERSION,
3274
4192
  stats,
3275
- instructions:
3276
- "Roadmapper online " +
3277
- `${stats.themes} theme${stats.themes === 1 ? "" : "s"}, ` +
3278
- `${stats.capabilities} capabilit${stats.capabilities === 1 ? "y" : "ies"}, ` +
3279
- `${stats.openTasks} open task${stats.openTasks === 1 ? "" : "s"}. ` +
3280
- "Call get_agents_md before planning the propose_* and submit_acceptance_grades tools refuse without it. " +
3281
- "Use suggest_capability_for before propose_capability. " +
3282
- "Slash-prompts available: roadmapper:plan-feature, roadmapper:close-task, roadmapper:weekly-review.",
4193
+ instructions: (() => {
4194
+ // Name the workspace we resolve to RIGHT NOW + where it came
4195
+ // from, so the agent can trust where its writes land instead
4196
+ // of discovering an empty/wrong workspace later. Repo-based
4197
+ // resolution (roots repo_workspace_map) finishes just after
4198
+ // this handshake, so if the client supports roots we say the
4199
+ // target may refine and to confirm via get_active_workspace.
4200
+ const { id: ws, source } = resolveWorkspaceWithSource();
4201
+ const wsLine = ws
4202
+ ? `Workspace: ${ws} (resolved from ${source}). `
4203
+ : "No workspace resolved yet. ";
4204
+ const rootsLine = _clientSupportsRoots
4205
+ ? "Detecting the repo you're in to pick its workspace; call get_active_workspace before your first write to confirm. "
4206
+ : ws
4207
+ ? ""
4208
+ : "Set ROADMAPPER_WORKSPACE_ID or open a connected repo. ";
4209
+ return (
4210
+ "Roadmapper online — " +
4211
+ wsLine +
4212
+ `${stats.themes} theme${stats.themes === 1 ? "" : "s"}, ` +
4213
+ `${stats.capabilities} capabilit${stats.capabilities === 1 ? "y" : "ies"}, ` +
4214
+ `${stats.openTasks} open task${stats.openTasks === 1 ? "" : "s"}. ` +
4215
+ rootsLine +
4216
+ "Call get_agents_md before planning — the propose_* and submit_acceptance_grades tools refuse without it. " +
4217
+ "Use suggest_capability_for before propose_capability. " +
4218
+ "Slash-prompts available: roadmapper:plan-feature, roadmapper:close-task, roadmapper:weekly-review."
4219
+ );
4220
+ })(),
3283
4221
  },
3284
4222
  },
3285
4223
  };
@@ -3317,6 +4255,11 @@ async function handle(request) {
3317
4255
  const result = renderPrompt(params?.name, params?.arguments ?? {});
3318
4256
  return { jsonrpc: "2.0", id, result };
3319
4257
  }
4258
+ if (method === "notifications/roots/list_changed") {
4259
+ // The client's open folders changed — re-pull and re-resolve.
4260
+ requestClientRoots();
4261
+ return null;
4262
+ }
3320
4263
  // Notifications (no id) and unknown methods: ignore.
3321
4264
  if (id === undefined) return null;
3322
4265
  return {
@@ -3509,6 +4452,27 @@ async function runSelftest() {
3509
4452
  pass: (r) =>
3510
4453
  Array.isArray(r?.result?.tools) && r.result.tools.length === TOOLS.length,
3511
4454
  },
4455
+ {
4456
+ name: "get_active_workspace reports a resolution source",
4457
+ fn: () =>
4458
+ handle({
4459
+ id: 22,
4460
+ method: "tools/call",
4461
+ params: { name: "get_active_workspace", arguments: {} },
4462
+ }),
4463
+ pass: (r) => {
4464
+ try {
4465
+ const out = JSON.parse(r?.result?.content?.[0]?.text ?? "{}");
4466
+ return (
4467
+ typeof out.resolvedFrom === "string" &&
4468
+ ["arg", "snapshot", "env", "none"].includes(out.resolvedFrom) &&
4469
+ ["broker", "operator", "read-only"].includes(out.writeMode)
4470
+ );
4471
+ } catch {
4472
+ return false;
4473
+ }
4474
+ },
4475
+ },
3512
4476
  {
3513
4477
  name: "list_themes",
3514
4478
  fn: () =>
@@ -3633,6 +4597,93 @@ async function runSelftest() {
3633
4597
  ? r?.result && !r.result.isError
3634
4598
  : r?.result?.isError === true,
3635
4599
  },
4600
+ {
4601
+ name: "propose_task (warn-on-skip: dryRun WITHOUT discovery attaches a capability-fit warning)",
4602
+ fn: async () => {
4603
+ // Fresh session + rubric (mutator gate) but NO capability
4604
+ // discovery → the warn-on-skip path should fire. dryRun avoids
4605
+ // needing write auth and returns before the RPC.
4606
+ resetSession();
4607
+ await handle({
4608
+ id: 300,
4609
+ method: "tools/call",
4610
+ params: { name: "get_agents_md", arguments: {} },
4611
+ });
4612
+ return handle({
4613
+ id: 301,
4614
+ method: "tools/call",
4615
+ params: {
4616
+ name: "propose_task",
4617
+ arguments: {
4618
+ capabilityId: aCap,
4619
+ title: "Selftest warn task",
4620
+ effort: "S",
4621
+ dryRun: true,
4622
+ },
4623
+ },
4624
+ });
4625
+ },
4626
+ pass: (r) => {
4627
+ if (r?.result?.isError) return false;
4628
+ let body;
4629
+ try {
4630
+ body = JSON.parse(r?.result?.content?.[0]?.text);
4631
+ } catch {
4632
+ return false;
4633
+ }
4634
+ // Warning present in both the warnings[] array and the _meta nudge.
4635
+ return (
4636
+ Array.isArray(body?.warnings) &&
4637
+ body.warnings.length === 1 &&
4638
+ typeof r?.result?._meta?.roadmapper?.reminder === "string"
4639
+ );
4640
+ },
4641
+ },
4642
+ {
4643
+ name: "propose_task (warn-on-skip: dryRun AFTER discovery has NO warning)",
4644
+ fn: async () => {
4645
+ resetSession();
4646
+ await handle({
4647
+ id: 310,
4648
+ method: "tools/call",
4649
+ params: { name: "get_agents_md", arguments: {} },
4650
+ });
4651
+ // get_roadmap_snapshot sets capsDiscoveredAt → discovery done.
4652
+ await handle({
4653
+ id: 311,
4654
+ method: "tools/call",
4655
+ params: { name: "get_roadmap_snapshot", arguments: {} },
4656
+ });
4657
+ return handle({
4658
+ id: 312,
4659
+ method: "tools/call",
4660
+ params: {
4661
+ name: "propose_task",
4662
+ arguments: {
4663
+ capabilityId: aCap,
4664
+ title: "Selftest no-warn task",
4665
+ effort: "S",
4666
+ dryRun: true,
4667
+ },
4668
+ },
4669
+ });
4670
+ },
4671
+ pass: (r) => {
4672
+ if (r?.result?.isError) return false;
4673
+ let body;
4674
+ try {
4675
+ body = JSON.parse(r?.result?.content?.[0]?.text);
4676
+ } catch {
4677
+ return false;
4678
+ }
4679
+ // Discovery happened → warnings empty, no _meta nudge.
4680
+ return (
4681
+ Array.isArray(body?.warnings) &&
4682
+ body.warnings.length === 0 &&
4683
+ !r?.result?._meta?.roadmapper?.reminder
4684
+ );
4685
+ },
4686
+ },
3636
4687
  {
3637
4688
  name: "propose_theme (missing name returns error result)",
3638
4689
  fn: () =>
@@ -3812,6 +4863,398 @@ async function runSelftest() {
3812
4863
  }),
3813
4864
  pass: (r) => r?.result?.isError === true,
3814
4865
  },
4866
+ {
4867
+ name: "suggest_capability_for (taskId path builds query from the task + echoes taskId)",
4868
+ fn: () =>
4869
+ handle({
4870
+ id: 201,
4871
+ method: "tools/call",
4872
+ params: {
4873
+ name: "suggest_capability_for",
4874
+ arguments: { taskId: "TK-DEMO" },
4875
+ },
4876
+ }),
4877
+ pass: (r) =>
4878
+ !r?.result?.isError &&
4879
+ r?.result?.content?.[0]?.text?.includes('"matches"') &&
4880
+ r?.result?.content?.[0]?.text?.includes('"taskId": "TK-DEMO"'),
4881
+ },
4882
+ {
4883
+ name: "suggest_capability_for (unknown taskId rejected)",
4884
+ fn: () =>
4885
+ handle({
4886
+ id: 202,
4887
+ method: "tools/call",
4888
+ params: {
4889
+ name: "suggest_capability_for",
4890
+ arguments: { taskId: "TK-000000" },
4891
+ },
4892
+ }),
4893
+ pass: (r) => r?.result?.isError === true,
4894
+ },
4895
+ {
4896
+ name: "suggest_capability_for (description + taskId together rejected)",
4897
+ fn: () =>
4898
+ handle({
4899
+ id: 203,
4900
+ method: "tools/call",
4901
+ params: {
4902
+ name: "suggest_capability_for",
4903
+ arguments: { taskId: "TK-DEMO", description: "x" },
4904
+ },
4905
+ }),
4906
+ pass: (r) => r?.result?.isError === true,
4907
+ },
4908
+ {
4909
+ name: "list_uncategorized_tasks (envelope shape, excludes parented seed task)",
4910
+ fn: () =>
4911
+ handle({
4912
+ id: 204,
4913
+ method: "tools/call",
4914
+ params: {
4915
+ name: "list_uncategorized_tasks",
4916
+ arguments: {},
4917
+ },
4918
+ }),
4919
+ pass: (r) => {
4920
+ if (r?.result?.isError) return false;
4921
+ const text = r?.result?.content?.[0]?.text;
4922
+ if (typeof text !== "string") return false;
4923
+ let env;
4924
+ try {
4925
+ env = JSON.parse(text);
4926
+ } catch {
4927
+ return false;
4928
+ }
4929
+ // New shape: { total, returned, truncated, items }. The seed's
4930
+ // only task (TK-DEMO) is parented under CAP-DEMO, so it must
4931
+ // NOT appear in the uncategorized items.
4932
+ return (
4933
+ Array.isArray(env?.items) &&
4934
+ typeof env.total === "number" &&
4935
+ typeof env.truncated === "boolean" &&
4936
+ !env.items.some((t) => t.id === "TK-DEMO")
4937
+ );
4938
+ },
4939
+ },
4940
+ {
4941
+ name: "list_tasks (default: envelope + LIGHT rows drop prs/acceptance/summary)",
4942
+ fn: () =>
4943
+ handle({
4944
+ id: 205,
4945
+ method: "tools/call",
4946
+ params: { name: "list_tasks", arguments: {} },
4947
+ }),
4948
+ pass: (r) => {
4949
+ if (r?.result?.isError) return false;
4950
+ let env;
4951
+ try {
4952
+ env = JSON.parse(r?.result?.content?.[0]?.text);
4953
+ } catch {
4954
+ return false;
4955
+ }
4956
+ if (!Array.isArray(env?.items)) return false;
4957
+ // Light rows must NOT carry the heavy arrays/text. (Seed has
4958
+ // at least TK-DEMO.) Every returned row is light.
4959
+ return env.items.every(
4960
+ (t) =>
4961
+ !("prs" in t) &&
4962
+ !("acceptance" in t) &&
4963
+ !("acceptanceGrades" in t) &&
4964
+ !("summary" in t) &&
4965
+ "id" in t &&
4966
+ "status" in t
4967
+ );
4968
+ },
4969
+ },
4970
+ {
4971
+ name: "list_tasks (detail:true restores full rows)",
4972
+ fn: () =>
4973
+ handle({
4974
+ id: 206,
4975
+ method: "tools/call",
4976
+ params: { name: "list_tasks", arguments: { detail: true } },
4977
+ }),
4978
+ pass: (r) => {
4979
+ if (r?.result?.isError) return false;
4980
+ let env;
4981
+ try {
4982
+ env = JSON.parse(r?.result?.content?.[0]?.text);
4983
+ } catch {
4984
+ return false;
4985
+ }
4986
+ // TK-DEMO in the seed carries acceptance criteria; detail mode
4987
+ // must surface them. Find it and confirm a heavy field is back.
4988
+ const demo = env?.items?.find((t) => t.id === "TK-DEMO");
4989
+ return !!demo && "acceptance" in demo;
4990
+ },
4991
+ },
4992
+ {
4993
+ name: "list_tasks (limit clamps to the requested cap)",
4994
+ fn: () =>
4995
+ handle({
4996
+ id: 207,
4997
+ method: "tools/call",
4998
+ params: { name: "list_tasks", arguments: { limit: 1 } },
4999
+ }),
5000
+ pass: (r) => {
5001
+ if (r?.result?.isError) return false;
5002
+ let env;
5003
+ try {
5004
+ env = JSON.parse(r?.result?.content?.[0]?.text);
5005
+ } catch {
5006
+ return false;
5007
+ }
5008
+ return (
5009
+ Array.isArray(env?.items) &&
5010
+ env.items.length <= 1 &&
5011
+ env.returned <= 1
5012
+ );
5013
+ },
5014
+ },
5015
+ {
5016
+ name: "list_capabilities (light row carries EFFECTIVE status, not the null column)",
5017
+ fn: () =>
5018
+ handle({
5019
+ id: 210,
5020
+ method: "tools/call",
5021
+ params: { name: "list_capabilities", arguments: {} },
5022
+ }),
5023
+ pass: (r) => {
5024
+ if (r?.result?.isError) return false;
5025
+ let env;
5026
+ try {
5027
+ env = JSON.parse(r?.result?.content?.[0]?.text);
5028
+ } catch {
5029
+ return false;
5030
+ }
5031
+ // CAP-DEMO has no explicit status column; its child task
5032
+ // TK-DEMO is 'planned', so the effective status must derive to
5033
+ // a non-empty value in the light row (regression guard: before
5034
+ // the fix, status was stripped and absent entirely).
5035
+ const demo = env?.items?.find((c) => c.id === "CAP-DEMO");
5036
+ return !!demo && typeof demo.status === "string" && demo.status.length > 0;
5037
+ },
5038
+ },
5039
+ {
5040
+ name: "list_capabilities (compact JSON — no pretty-print whitespace)",
5041
+ fn: () =>
5042
+ handle({
5043
+ id: 208,
5044
+ method: "tools/call",
5045
+ params: { name: "list_capabilities", arguments: {} },
5046
+ }),
5047
+ pass: (r) => {
5048
+ if (r?.result?.isError) return false;
5049
+ const text = r?.result?.content?.[0]?.text;
5050
+ if (typeof text !== "string") return false;
5051
+ // Pretty-print would emit '\n ' indentation. Compact must
5052
+ // not. Also confirm it parses to the envelope shape.
5053
+ if (/\n\s\s/.test(text)) return false;
5054
+ let env;
5055
+ try {
5056
+ env = JSON.parse(text);
5057
+ } catch {
5058
+ return false;
5059
+ }
5060
+ return Array.isArray(env?.items) && typeof env.total === "number";
5061
+ },
5062
+ },
5063
+ {
5064
+ name: "get_roadmap_snapshot (summary mode by default: light tasks + counts + mode flag)",
5065
+ fn: () =>
5066
+ handle({
5067
+ id: 209,
5068
+ method: "tools/call",
5069
+ params: { name: "get_roadmap_snapshot", arguments: {} },
5070
+ }),
5071
+ pass: (r) => {
5072
+ if (r?.result?.isError) return false;
5073
+ let snap;
5074
+ try {
5075
+ snap = JSON.parse(r?.result?.content?.[0]?.text);
5076
+ } catch {
5077
+ return false;
5078
+ }
5079
+ if (snap?.mode !== "summary") return false;
5080
+ if (!snap?.counts || typeof snap.counts.totalTasks !== "number")
5081
+ return false;
5082
+ // Light tasks in summary mode: no heavy arrays.
5083
+ return (snap.tasks ?? []).every(
5084
+ (t) => !("prs" in t) && !("acceptance" in t)
5085
+ );
5086
+ },
5087
+ },
5088
+ {
5089
+ name: "detect_capability_gaps (through handle: seed has no orphans → well-formed empty result)",
5090
+ fn: () =>
5091
+ handle({
5092
+ id: 211,
5093
+ method: "tools/call",
5094
+ params: { name: "detect_capability_gaps", arguments: {} },
5095
+ }),
5096
+ pass: (r) => {
5097
+ if (r?.result?.isError) return false;
5098
+ let out;
5099
+ try {
5100
+ out = JSON.parse(r?.result?.content?.[0]?.text);
5101
+ } catch {
5102
+ return false;
5103
+ }
5104
+ // Seed's only task is categorized → zero homeless, zero gaps,
5105
+ // but the envelope fields must all be present and numeric.
5106
+ return (
5107
+ Array.isArray(out?.gaps) &&
5108
+ out.gapCount === 0 &&
5109
+ typeof out.uncategorizedScanned === "number" &&
5110
+ typeof out.homelessCount === "number"
5111
+ );
5112
+ },
5113
+ },
5114
+ {
5115
+ name: "detect_capability_gaps (direct: clusters homeless tasks, names the cluster by shared keywords)",
5116
+ fn: () => {
5117
+ // Fixture: one active capability about 'billing invoices', plus
5118
+ // 4 uncategorized tasks — 3 clearly about 'wallet apple google
5119
+ // pass' (a missing bet) and 1 lone 'documentation typo'.
5120
+ const projected = {
5121
+ themes: [{ id: "TH-1", name: "Platform", description: "" }],
5122
+ capabilities: [
5123
+ {
5124
+ id: "CAP-BILL",
5125
+ pillarId: "TH-1",
5126
+ name: "Billing invoices",
5127
+ description: "invoice generation and billing runs",
5128
+ outcome: "invoices delivered",
5129
+ status: "in_progress",
5130
+ },
5131
+ ],
5132
+ tasks: [
5133
+ { id: "TK-1", capabilityId: null, title: "wallet apple pass provisioning", summary: "google wallet pass signing" },
5134
+ { id: "TK-2", capabilityId: null, title: "google wallet pass install telemetry", summary: "apple wallet pass rate" },
5135
+ { id: "TK-3", capabilityId: null, title: "apple google wallet pass dashboard", summary: "wallet pass provisioning ui" },
5136
+ { id: "TK-4", capabilityId: null, title: "fix documentation typo", summary: "" },
5137
+ ],
5138
+ };
5139
+ return detectCapabilityGaps({ minClusterSize: 3 }, projected);
5140
+ },
5141
+ pass: (r) => {
5142
+ if (r?.isError) return false;
5143
+ let out;
5144
+ try {
5145
+ out = JSON.parse(r?.content?.[0]?.text);
5146
+ } catch {
5147
+ return false;
5148
+ }
5149
+ // The 3 wallet-pass tasks cluster; the lone typo task does not
5150
+ // reach minClusterSize=3. So exactly one gap of size 3, named
5151
+ // from the shared 'wallet/pass' vocabulary.
5152
+ if (out.gapCount !== 1) return false;
5153
+ const gap = out.gaps[0];
5154
+ return (
5155
+ gap.size === 3 &&
5156
+ gap.keywords.includes("wallet") &&
5157
+ gap.keywords.includes("pass") &&
5158
+ !gap.tasks.some((t) => t.id === "TK-4")
5159
+ );
5160
+ },
5161
+ },
5162
+ {
5163
+ name: "detect_capability_gaps (direct: tasks that FIT an existing cap are not homeless)",
5164
+ fn: () => {
5165
+ // 3 tasks that clearly match the billing capability — they must
5166
+ // NOT be reported as a gap (they have a home; they just need
5167
+ // filing via move_task, not a new capability).
5168
+ const projected = {
5169
+ themes: [{ id: "TH-1", name: "Platform", description: "" }],
5170
+ capabilities: [
5171
+ {
5172
+ id: "CAP-BILL",
5173
+ pillarId: "TH-1",
5174
+ name: "Billing invoices generation",
5175
+ description: "invoice generation billing runs dunning",
5176
+ outcome: "invoices delivered to every customer",
5177
+ status: "in_progress",
5178
+ },
5179
+ ],
5180
+ tasks: [
5181
+ { id: "TK-1", capabilityId: null, title: "billing invoice generation bug", summary: "invoice runs" },
5182
+ { id: "TK-2", capabilityId: null, title: "invoice generation dunning", summary: "billing runs" },
5183
+ { id: "TK-3", capabilityId: null, title: "billing invoice dunning runs", summary: "invoice generation" },
5184
+ ],
5185
+ };
5186
+ return detectCapabilityGaps({ minClusterSize: 2 }, projected);
5187
+ },
5188
+ pass: (r) => {
5189
+ if (r?.isError) return false;
5190
+ let out;
5191
+ try {
5192
+ out = JSON.parse(r?.content?.[0]?.text);
5193
+ } catch {
5194
+ return false;
5195
+ }
5196
+ // They fit CAP-BILL, so zero homeless → zero gaps.
5197
+ return out.gapCount === 0 && out.homelessCount === 0;
5198
+ },
5199
+ },
5200
+ {
5201
+ name: "detect_capability_gaps (direct: minClusterSize filters clusters below the floor)",
5202
+ fn: () => {
5203
+ const projected = {
5204
+ themes: [{ id: "TH-1", name: "Platform", description: "" }],
5205
+ capabilities: [],
5206
+ tasks: [
5207
+ { id: "TK-1", capabilityId: null, title: "wallet apple pass", summary: "google wallet" },
5208
+ { id: "TK-2", capabilityId: null, title: "google wallet pass", summary: "apple wallet" },
5209
+ ],
5210
+ };
5211
+ // Two homeless tasks cluster, but minClusterSize=3 drops it.
5212
+ return detectCapabilityGaps({ minClusterSize: 3 }, projected);
5213
+ },
5214
+ pass: (r) => {
5215
+ if (r?.isError) return false;
5216
+ let out;
5217
+ try {
5218
+ out = JSON.parse(r?.content?.[0]?.text);
5219
+ } catch {
5220
+ return false;
5221
+ }
5222
+ return out.homelessCount === 2 && out.gapCount === 0;
5223
+ },
5224
+ },
5225
+ {
5226
+ name: "detect_capability_gaps (regression: non-numeric minClusterSize falls back to default, not NaN)",
5227
+ fn: () => {
5228
+ // 3 wallet-pass tasks that SHOULD cluster into a gap at the
5229
+ // default minClusterSize=3. Pass a non-numeric minClusterSize
5230
+ // (a non-compliant client / explicit null). Before the guard,
5231
+ // Math.floor(NaN) propagated → members.length >= NaN always
5232
+ // false → zero gaps (silent wrong answer). After: falls back to
5233
+ // default 3, so the gap is still detected.
5234
+ const projected = {
5235
+ themes: [{ id: "TH-1", name: "Platform", description: "" }],
5236
+ capabilities: [],
5237
+ tasks: [
5238
+ { id: "TK-1", capabilityId: null, title: "wallet apple pass provisioning", summary: "google wallet pass" },
5239
+ { id: "TK-2", capabilityId: null, title: "google wallet pass telemetry", summary: "apple wallet pass" },
5240
+ { id: "TK-3", capabilityId: null, title: "apple google wallet pass dashboard", summary: "wallet pass" },
5241
+ ],
5242
+ };
5243
+ return detectCapabilityGaps({ minClusterSize: null }, projected);
5244
+ },
5245
+ pass: (r) => {
5246
+ if (r?.isError) return false;
5247
+ let out;
5248
+ try {
5249
+ out = JSON.parse(r?.content?.[0]?.text);
5250
+ } catch {
5251
+ return false;
5252
+ }
5253
+ // minClusterSize coerced to default 3, NOT NaN → the 3-task
5254
+ // cluster is detected.
5255
+ return out.minClusterSize === 3 && out.gapCount === 1;
5256
+ },
5257
+ },
3815
5258
  {
3816
5259
  // suggest_theme_for is the theme-level mirror — same shape,
3817
5260
  // returns ranked matches against an arbitrary description.
@@ -4151,6 +5594,155 @@ async function runSelftest() {
4151
5594
  );
4152
5595
  },
4153
5596
  },
5597
+ {
5598
+ // Seed-workspace guard: no arg + no snapshot + env default ==
5599
+ // seed workspace "default" → refuse the mutator. Env is set/
5600
+ // restored around the call; snapshot forced absent.
5601
+ name: "seed-workspace write refused when resolved from env default",
5602
+ fn: async () => {
5603
+ const prevR = process.env.ROADMAPPER_WORKSPACE_ID;
5604
+ const prevS = process.env.SUPABASE_WORKSPACE_ID;
5605
+ try {
5606
+ resetSession();
5607
+ session.rubricFetchedAt = Date.now();
5608
+ __setSnapshotWorkspaceForTest(null);
5609
+ process.env.ROADMAPPER_WORKSPACE_ID = "default";
5610
+ delete process.env.SUPABASE_WORKSPACE_ID;
5611
+ return await handle({
5612
+ id: 38,
5613
+ method: "tools/call",
5614
+ params: {
5615
+ name: "propose_task",
5616
+ arguments: { capabilityId: "CAP-X", title: "x", effort: "M" },
5617
+ },
5618
+ });
5619
+ } finally {
5620
+ __setSnapshotWorkspaceForTest(undefined);
5621
+ if (prevR === undefined) delete process.env.ROADMAPPER_WORKSPACE_ID;
5622
+ else process.env.ROADMAPPER_WORKSPACE_ID = prevR;
5623
+ if (prevS !== undefined) process.env.SUPABASE_WORKSPACE_ID = prevS;
5624
+ }
5625
+ },
5626
+ pass: (r) => {
5627
+ if (!r?.result?.isError) return false;
5628
+ const txt = r.result.content?.[0]?.text ?? "";
5629
+ return txt.includes("seed/demo workspace");
5630
+ },
5631
+ },
5632
+ {
5633
+ // Explicit workspaceId="default" makes the source "arg", a
5634
+ // deliberate choice — the seed guard must NOT fire. The call
5635
+ // then fails downstream (no write auth), proving the guard let
5636
+ // it through.
5637
+ name: "explicit workspaceId arg bypasses the seed-workspace guard",
5638
+ fn: async () => {
5639
+ const prevR = process.env.ROADMAPPER_WORKSPACE_ID;
5640
+ try {
5641
+ resetSession();
5642
+ session.rubricFetchedAt = Date.now();
5643
+ __setSnapshotWorkspaceForTest(null);
5644
+ process.env.ROADMAPPER_WORKSPACE_ID = "default";
5645
+ return await handle({
5646
+ id: 39,
5647
+ method: "tools/call",
5648
+ params: {
5649
+ name: "propose_task",
5650
+ arguments: {
5651
+ capabilityId: "CAP-X",
5652
+ title: "x",
5653
+ effort: "M",
5654
+ workspaceId: "default",
5655
+ },
5656
+ },
5657
+ });
5658
+ } finally {
5659
+ __setSnapshotWorkspaceForTest(undefined);
5660
+ if (prevR === undefined) delete process.env.ROADMAPPER_WORKSPACE_ID;
5661
+ else process.env.ROADMAPPER_WORKSPACE_ID = prevR;
5662
+ }
5663
+ },
5664
+ pass: (r) => {
5665
+ const txt = r?.result?.content?.[0]?.text ?? "";
5666
+ return !txt.includes("seed/demo workspace");
5667
+ },
5668
+ },
5669
+ {
5670
+ // ROADMAPPER_ALLOW_SEED_WORKSPACE=1 opts out of the seed guard
5671
+ // (parity with the cross-workspace override). Guard must NOT fire
5672
+ // even when resolved from the env default.
5673
+ name: "ROADMAPPER_ALLOW_SEED_WORKSPACE=1 disables the seed-workspace guard",
5674
+ fn: async () => {
5675
+ const prevR = process.env.ROADMAPPER_WORKSPACE_ID;
5676
+ const prevS = process.env.SUPABASE_WORKSPACE_ID;
5677
+ const prevAllow = process.env.ROADMAPPER_ALLOW_SEED_WORKSPACE;
5678
+ try {
5679
+ resetSession();
5680
+ session.rubricFetchedAt = Date.now();
5681
+ __setSnapshotWorkspaceForTest(null);
5682
+ process.env.ROADMAPPER_WORKSPACE_ID = "default";
5683
+ delete process.env.SUPABASE_WORKSPACE_ID;
5684
+ process.env.ROADMAPPER_ALLOW_SEED_WORKSPACE = "1";
5685
+ return await handle({
5686
+ id: 138,
5687
+ method: "tools/call",
5688
+ params: {
5689
+ name: "propose_task",
5690
+ arguments: { capabilityId: "CAP-X", title: "x", effort: "M" },
5691
+ },
5692
+ });
5693
+ } finally {
5694
+ __setSnapshotWorkspaceForTest(undefined);
5695
+ if (prevR === undefined) delete process.env.ROADMAPPER_WORKSPACE_ID;
5696
+ else process.env.ROADMAPPER_WORKSPACE_ID = prevR;
5697
+ if (prevS !== undefined) process.env.SUPABASE_WORKSPACE_ID = prevS;
5698
+ if (prevAllow === undefined)
5699
+ delete process.env.ROADMAPPER_ALLOW_SEED_WORKSPACE;
5700
+ else process.env.ROADMAPPER_ALLOW_SEED_WORKSPACE = prevAllow;
5701
+ }
5702
+ },
5703
+ pass: (r) => {
5704
+ const txt = r?.result?.content?.[0]?.text ?? "";
5705
+ return !txt.includes("seed/demo workspace");
5706
+ },
5707
+ },
5708
+ {
5709
+ // dryRun is non-destructive validation — it must pass BOTH guards
5710
+ // even when resolved from the seed env default.
5711
+ name: "dryRun bypasses the seed-workspace guard",
5712
+ fn: async () => {
5713
+ const prevR = process.env.ROADMAPPER_WORKSPACE_ID;
5714
+ const prevS = process.env.SUPABASE_WORKSPACE_ID;
5715
+ try {
5716
+ resetSession();
5717
+ session.rubricFetchedAt = Date.now();
5718
+ __setSnapshotWorkspaceForTest(null);
5719
+ process.env.ROADMAPPER_WORKSPACE_ID = "default";
5720
+ delete process.env.SUPABASE_WORKSPACE_ID;
5721
+ return await handle({
5722
+ id: 139,
5723
+ method: "tools/call",
5724
+ params: {
5725
+ name: "propose_task",
5726
+ arguments: {
5727
+ capabilityId: "CAP-X",
5728
+ title: "x",
5729
+ effort: "M",
5730
+ dryRun: true,
5731
+ },
5732
+ },
5733
+ });
5734
+ } finally {
5735
+ __setSnapshotWorkspaceForTest(undefined);
5736
+ if (prevR === undefined) delete process.env.ROADMAPPER_WORKSPACE_ID;
5737
+ else process.env.ROADMAPPER_WORKSPACE_ID = prevR;
5738
+ if (prevS !== undefined) process.env.SUPABASE_WORKSPACE_ID = prevS;
5739
+ }
5740
+ },
5741
+ pass: (r) => {
5742
+ const txt = r?.result?.content?.[0]?.text ?? "";
5743
+ return !txt.includes("seed/demo workspace");
5744
+ },
5745
+ },
4154
5746
  {
4155
5747
  // record_outcome_reading rejects missing value.
4156
5748
  name: "record_outcome_reading (missing value returns error result)",
@@ -4235,6 +5827,59 @@ async function runSelftest() {
4235
5827
  return !txt.includes("Refusing cross-workspace");
4236
5828
  },
4237
5829
  },
5830
+ {
5831
+ // Root-derived workspace (repo→repo_workspace_map) beats the cwd
5832
+ // snapshot AND the env default. This is the multi-repo fix: the
5833
+ // repo the agent is actually in wins.
5834
+ name: "root workspace (repo map) beats snapshot in resolution order",
5835
+ fn: () => {
5836
+ try {
5837
+ __setRootWorkspaceForTest("ws-from-repo", "owner/repo");
5838
+ __setSnapshotWorkspaceForTest("ws-from-snapshot");
5839
+ const { id, source } = resolveWorkspaceWithSource();
5840
+ return { result: { id, source } };
5841
+ } finally {
5842
+ __setRootWorkspaceForTest(undefined);
5843
+ __setSnapshotWorkspaceForTest(undefined);
5844
+ }
5845
+ },
5846
+ pass: (r) =>
5847
+ r?.result?.id === "ws-from-repo" && r?.result?.source === "repo",
5848
+ },
5849
+ {
5850
+ // With no root mapping, resolution falls back to the snapshot —
5851
+ // the existing offline path must still work.
5852
+ name: "resolution falls back to snapshot when no root mapping",
5853
+ fn: () => {
5854
+ try {
5855
+ __setRootWorkspaceForTest(null);
5856
+ __setSnapshotWorkspaceForTest("ws-from-snapshot");
5857
+ const { id, source } = resolveWorkspaceWithSource();
5858
+ return { result: { id, source } };
5859
+ } finally {
5860
+ __setRootWorkspaceForTest(undefined);
5861
+ __setSnapshotWorkspaceForTest(undefined);
5862
+ }
5863
+ },
5864
+ pass: (r) =>
5865
+ r?.result?.id === "ws-from-snapshot" && r?.result?.source === "snapshot",
5866
+ },
5867
+ {
5868
+ // setClientRoots parses both file:// URIs and bare paths and
5869
+ // invalidates the cached resolution.
5870
+ name: "setClientRoots parses file:// URIs and bare paths",
5871
+ fn: () => {
5872
+ __setRootWorkspaceForTest("stale"); // should be invalidated
5873
+ setClientRoots([
5874
+ { uri: "file:///Users/x/proj-a" },
5875
+ { uri: "/Users/x/proj-b" },
5876
+ ]);
5877
+ return { result: { cleared: rootWorkspaceId() } };
5878
+ },
5879
+ // After setClientRoots, the cache is reset to undefined → rootWorkspaceId()
5880
+ // returns null until resolveRootWorkspace() runs. So "cleared" must be null.
5881
+ pass: (r) => r?.result?.cleared === null,
5882
+ },
4238
5883
  ];
4239
5884
 
4240
5885
  let passed = 0;
@@ -4283,8 +5928,19 @@ if (process.argv.includes("--selftest")) {
4283
5928
  log("bad json", line.slice(0, 200));
4284
5929
  continue;
4285
5930
  }
5931
+ // A message with no `method` but a result/error is a RESPONSE to a
5932
+ // request WE sent (e.g. our roots/list). Route it, don't dispatch.
5933
+ if (msg.method === undefined && (msg.result !== undefined || msg.error !== undefined)) {
5934
+ await handleClientResponse(msg);
5935
+ continue;
5936
+ }
4286
5937
  const response = await handle(msg);
4287
5938
  if (response) send(response);
5939
+ // After answering initialize, ask the client for its roots so we
5940
+ // can resolve the per-repo workspace. Done here (not inside the
5941
+ // handler) because the client only starts listening for our
5942
+ // requests once it has our initialize result in hand.
5943
+ if (msg.method === "initialize") requestClientRoots();
4288
5944
  }
4289
5945
  });
4290
5946