@roadmapperai/mcp 0.7.1 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/AGENTS.md +47 -2
  2. package/README.md +37 -2
  3. package/package.json +1 -1
  4. package/server.mjs +1825 -94
package/server.mjs CHANGED
@@ -8,7 +8,12 @@
8
8
  * Customer-facing env vars (brand-named, no backend disclosure):
9
9
  * ROADMAPPER_BACKEND_URL — backend project URL
10
10
  * ROADMAPPER_PUBLISHABLE_KEY — public client key (RLS-scoped)
11
- * ROADMAPPER_WORKSPACE_ID — target workspace
11
+ * ROADMAPPER_WORKSPACE_ID — DEFAULT workspace (optional). With
12
+ * one install serving many repos, the
13
+ * server prefers the repo you're in
14
+ * (see "Workspace resolution" below);
15
+ * this env var is only the fallback for
16
+ * repos with no mapping / no roots.
12
17
  * ROADMAPPER_API_KEY — write auth (rmpr_… token from
13
18
  * Settings → MCP activity → API keys)
14
19
  * ROADMAPPER_BROKER_URL — optional override for the write
@@ -47,6 +52,17 @@
47
52
  * validated server-side at the broker) or ROADMAPPER_ADMIN_KEY
48
53
  * (operator path, bypasses RLS).
49
54
  *
55
+ * Workspace resolution (which workspace a tool call targets), in order:
56
+ * 1. Explicit `workspaceId` arg on the call.
57
+ * 2. The repo the agent is in: MCP `roots` → git origin → owner/repo →
58
+ * `repo_workspace_map` (the mapping you set in the Roadmapper
59
+ * GitHub-connect UI). This is what makes ONE install work across
60
+ * MANY repos/workspaces — no per-repo config or env juggling.
61
+ * 3. `.roadmapper/snapshot.json` in cwd (committed offline fallback).
62
+ * 4. ROADMAPPER_WORKSPACE_ID env (the install default).
63
+ * Mismatch between an explicit arg and #2/#3 is refused (cross-workspace
64
+ * guard) unless ROADMAPPER_ALLOW_CROSS_WORKSPACE=1.
65
+ *
50
66
  * Self-test: `node mcp/server.mjs --selftest` exercises every tool
51
67
  * against the local seed and exits 0 on success, 1 on failure. Useful
52
68
  * for verifying the install without an MCP client.
@@ -58,6 +74,10 @@
58
74
  import { readFileSync, existsSync } from "node:fs";
59
75
  import { dirname, join, resolve } from "node:path";
60
76
  import { fileURLToPath } from "node:url";
77
+ import { execFile } from "node:child_process";
78
+ import { promisify } from "node:util";
79
+
80
+ const execFileAsync = promisify(execFile);
61
81
 
62
82
  const HERE = dirname(fileURLToPath(import.meta.url));
63
83
  const REPO = resolve(HERE, "..");
@@ -75,7 +95,22 @@ const REPO_AGENTS_PATH = join(REPO, "AGENTS.md");
75
95
 
76
96
  const PROTOCOL_VERSION = "2024-11-05";
77
97
  const SERVER_NAME = "roadmapper";
78
- const SERVER_VERSION = "0.6.0";
98
+ // Read the real version from the bundled package.json (it's in the npm
99
+ // `files` allow-list) so SERVER_VERSION never drifts from the published
100
+ // package. A hardcoded constant rotted to "0.6.0" while the package was
101
+ // at 0.9.0 — which silently mis-stamped every audit-log row's
102
+ // server_version. scripts/check-mcp-version.mjs guards the dashboard's
103
+ // LATEST_MCP_VERSION against package.json; this closes the same gap on
104
+ // the server side. Falls back to "0.0.0" only if the file is somehow
105
+ // unreadable (never expected in a real install).
106
+ const SERVER_VERSION = (() => {
107
+ try {
108
+ const pkg = JSON.parse(readFileSync(join(HERE, "package.json"), "utf-8"));
109
+ return typeof pkg.version === "string" ? pkg.version : "0.0.0";
110
+ } catch {
111
+ return "0.0.0";
112
+ }
113
+ })();
79
114
 
80
115
  // Must match src/types.ts EFFORT_DAYS — AI-era calibration.
81
116
  // Fractional values (XS=0.25, S=0.5) get rounded up when used to
@@ -90,10 +125,104 @@ function log(...args) {
90
125
  console.error("[roadmapper-mcp]", ...args);
91
126
  }
92
127
 
128
+ /**
129
+ * Compare two semver-ish strings ("1.2.3"). Returns 1 if a>b, -1 if
130
+ * a<b, 0 if equal. Compares the numeric major.minor.patch only — any
131
+ * prerelease/build suffix is ignored, which is fine for "is there a
132
+ * newer published release?" The probe below tolerates a 0 or negative
133
+ * result by staying silent, so an unexpected suffix never produces a
134
+ * false "upgrade available" nag.
135
+ */
136
+ function compareVersions(a, b) {
137
+ const pa = String(a).split(".").map((n) => parseInt(n, 10) || 0);
138
+ const pb = String(b).split(".").map((n) => parseInt(n, 10) || 0);
139
+ for (let i = 0; i < 3; i++) {
140
+ const da = pa[i] ?? 0;
141
+ const db = pb[i] ?? 0;
142
+ if (da > db) return 1;
143
+ if (da < db) return -1;
144
+ }
145
+ return 0;
146
+ }
147
+
148
+ /**
149
+ * Best-effort startup staleness check. Fetches the `latest` dist-tag
150
+ * for @roadmapperai/mcp from the npm registry and, if the running
151
+ * version is behind, logs a one-line nudge to stderr. Entirely
152
+ * advisory — the server is pinned to a specific version in the
153
+ * client's MCP config (see the dashboard install snippets), so there
154
+ * is no auto-update; the user upgrades by re-copying the install
155
+ * command from Settings → MCP. This is the only signal a stranded,
156
+ * outdated install ever gets, so failures must stay silent (a flaky
157
+ * network or offline install must never spam the log or delay boot).
158
+ * Disable entirely with ROADMAPPER_DISABLE_UPDATE_CHECK=1.
159
+ */
160
+ async function checkForUpdate() {
161
+ if (process.env.ROADMAPPER_DISABLE_UPDATE_CHECK === "1") return;
162
+ if (SERVER_VERSION === "0.0.0") return; // version unreadable; nothing to compare
163
+ try {
164
+ const ctrl = new AbortController();
165
+ const timer = setTimeout(() => ctrl.abort(), 2500);
166
+ let latest;
167
+ try {
168
+ const res = await fetch(
169
+ "https://registry.npmjs.org/@roadmapperai/mcp/latest",
170
+ { headers: { Accept: "application/json" }, signal: ctrl.signal }
171
+ );
172
+ if (!res.ok) return;
173
+ const body = await res.json();
174
+ latest = body && typeof body.version === "string" ? body.version : null;
175
+ } finally {
176
+ clearTimeout(timer);
177
+ }
178
+ if (!latest) return;
179
+ if (compareVersions(latest, SERVER_VERSION) > 0) {
180
+ log(
181
+ `update available: v${SERVER_VERSION} installed, v${latest} published. ` +
182
+ `This install is pinned — re-copy the install command from ` +
183
+ `Settings → MCP in the Roadmapper dashboard to upgrade.`
184
+ );
185
+ }
186
+ } catch {
187
+ // Offline, blocked, slow, or aborted — stay silent by design.
188
+ }
189
+ }
190
+
93
191
  function send(message) {
94
192
  process.stdout.write(JSON.stringify(message) + "\n");
95
193
  }
96
194
 
195
+ /**
196
+ * Ask the client for its current roots (workspace folders). Server→client
197
+ * request; the reply arrives as a normal JSON-RPC response in the main
198
+ * read loop, routed by id to handleClientResponse(). No-op if the client
199
+ * never declared roots support.
200
+ */
201
+ function requestClientRoots() {
202
+ if (!_clientSupportsRoots) return;
203
+ send({ jsonrpc: "2.0", id: ROOTS_LIST_REQUEST_ID, method: "roots/list" });
204
+ }
205
+
206
+ /**
207
+ * Handle a JSON-RPC *response* from the client (not a request). Only the
208
+ * roots/list reply matters today: record the roots, then re-resolve the
209
+ * root→workspace mapping and cache it for the sync resolvers.
210
+ */
211
+ async function handleClientResponse(msg) {
212
+ if (msg.id !== ROOTS_LIST_REQUEST_ID) return;
213
+ if (msg.error) {
214
+ log("roots/list failed:", msg.error?.message ?? msg.error);
215
+ return;
216
+ }
217
+ const roots = msg.result?.roots ?? [];
218
+ setClientRoots(roots);
219
+ await resolveRootWorkspace();
220
+ const { id: ws, source, repo } = resolveWorkspaceWithSource();
221
+ if (source === "repo") {
222
+ log(`workspace resolved from repo ${repo} → ${ws}`);
223
+ }
224
+ }
225
+
97
226
  function readSeed() {
98
227
  try {
99
228
  return JSON.parse(readFileSync(SEED_PATH, "utf-8"));
@@ -160,6 +289,49 @@ async function readAgentsMdForWorkspace() {
160
289
  return readAgentsMd();
161
290
  }
162
291
 
292
+ /**
293
+ * Fetch the workspace's roadmap entities (pillars + capabilities +
294
+ * tasks) via the mcp-broker — the AUTHENTICATED read path for the
295
+ * customer (rmpr_) install.
296
+ *
297
+ * Why this exists: RLS (pillars_select_visible et al.) only grants
298
+ * SELECT to an authenticated workspace member (auth.uid()). The bare
299
+ * publishable key the MCP holds is the anon role with no user, so a
300
+ * direct PostgREST read returns ZERO rows for every workspace. The
301
+ * broker validates the rmpr_ key server-side and reads with the
302
+ * service role, scoped to that key's workspace — same pattern as the
303
+ * rubric/labels reads.
304
+ *
305
+ * Returns `{ pillars, capabilities, tasks }` (raw rows) on success, or
306
+ * null on any error / when no API key is set (operator path falls back
307
+ * to the direct service-role read in readWorkspaceProjected).
308
+ */
309
+ async function fetchWorkspaceEntitiesViaBroker() {
310
+ const { apiKey, brokerUrl } = supabaseConfig();
311
+ if (!apiKey || !brokerUrl) return null;
312
+ try {
313
+ const res = await fetch(brokerUrl, {
314
+ method: "POST",
315
+ headers: {
316
+ Authorization: `Bearer ${apiKey}`,
317
+ "content-type": "application/json",
318
+ Accept: "application/json",
319
+ },
320
+ body: JSON.stringify({ rpc: "get_workspace_entities", body: {} }),
321
+ });
322
+ if (!res.ok) return null;
323
+ const parsed = await res.json();
324
+ if (!parsed || typeof parsed !== "object") return null;
325
+ return {
326
+ pillars: Array.isArray(parsed.pillars) ? parsed.pillars : [],
327
+ capabilities: Array.isArray(parsed.capabilities) ? parsed.capabilities : [],
328
+ tasks: Array.isArray(parsed.tasks) ? parsed.tasks : [],
329
+ };
330
+ } catch {
331
+ return null;
332
+ }
333
+ }
334
+
163
335
  /**
164
336
  * Per-workspace label cache for tool descriptions.
165
337
  *
@@ -391,30 +563,228 @@ function __setSnapshotWorkspaceForTest(value) {
391
563
  _snapshotWorkspace = value;
392
564
  }
393
565
 
566
+ // ── MCP roots → per-repo workspace resolution ───────────────────────
567
+ //
568
+ // THE PROBLEM this solves: a stdio MCP server is spawned ONCE by the
569
+ // client (Claude Code) with a fixed process.cwd() — usually $HOME or
570
+ // the first project the client opened. That cwd does NOT change as the
571
+ // agent moves between repos. So the old `.roadmapper/snapshot.json in
572
+ // cwd` resolution never fired for the common case: one MCP install,
573
+ // many repos, each mapped to a different workspace. Everything fell
574
+ // through to the single env default — silently polluting one workspace
575
+ // with every repo's planning.
576
+ //
577
+ // THE FIX: MCP clients advertise `roots` (the workspace folders they're
578
+ // operating in) at initialize and via notifications/roots/list_changed.
579
+ // We capture those, derive each root's GitHub `owner/repo` from its git
580
+ // remote, and look the repo up in `repo_workspace_map` — the SAME table
581
+ // the Roadmapper GitHub-connect UI writes when you map a repo to a
582
+ // workspace. So "which workspace is this repo?" is answered by the
583
+ // server-side mapping the user already configured. Zero per-repo config.
584
+ //
585
+ // Resolution is async (DB lookup), but the per-call resolvers are sync,
586
+ // so we resolve on initialize / roots-change and cache the result here.
587
+ let _clientRoots = []; // array of absolute dir paths from the client
588
+ let _rootWorkspace = undefined; // undefined=unresolved, null=resolved-but-none, string=workspaceId
589
+ let _rootWorkspaceRepo = null; // the owner/repo that resolved (for diagnostics)
590
+ let _clientSupportsRoots = false; // set from initialize params.capabilities.roots
591
+ const ROOTS_LIST_REQUEST_ID = "roadmapper-roots-list"; // our id for the roots/list request we send
592
+
593
+ /** Convert a file:// root URI (or a plain path) to an absolute dir path. */
594
+ function rootUriToPath(uri) {
595
+ if (typeof uri !== "string" || !uri) return null;
596
+ if (uri.startsWith("file://")) {
597
+ try {
598
+ return fileURLToPath(uri);
599
+ } catch {
600
+ return null;
601
+ }
602
+ }
603
+ return uri; // some clients send a bare path
604
+ }
605
+
606
+ /** Record the client's advertised roots (called from initialize + roots/list). */
607
+ function setClientRoots(roots) {
608
+ if (!Array.isArray(roots)) return;
609
+ _clientRoots = roots
610
+ .map((r) => rootUriToPath(r?.uri ?? r))
611
+ .filter((p) => typeof p === "string" && p.length > 0);
612
+ // Invalidate the cached resolution so the next access re-derives it.
613
+ _rootWorkspace = undefined;
614
+ _rootWorkspaceRepo = null;
615
+ }
616
+
617
+ /**
618
+ * Derive `owner/repo` from a directory's git origin remote. Walks up
619
+ * to find the repo root implicitly via `git -C <dir>`. Returns null if
620
+ * the dir isn't a git repo, has no origin, or git isn't available.
621
+ */
622
+ async function repoSlugForDir(dir) {
623
+ try {
624
+ // Async so a slow/hanging git call never blocks the stdin event loop
625
+ // (this runs while handling the client's roots/list reply). 2s cap.
626
+ const out = (
627
+ await execFileAsync("git", ["-C", dir, "remote", "get-url", "origin"], {
628
+ encoding: "utf8",
629
+ timeout: 2000,
630
+ })
631
+ ).stdout.trim();
632
+ // Normalize https + ssh forms to owner/repo:
633
+ // https://github.com/owner/repo.git
634
+ // git@github.com:owner/repo.git
635
+ const m = out.match(/[/:]([^/:]+\/[^/]+?)(?:\.git)?$/);
636
+ return m ? m[1] : null;
637
+ } catch {
638
+ return null;
639
+ }
640
+ }
641
+
642
+ /**
643
+ * Look up a repo slug in repo_workspace_map (enabled rows only) and
644
+ * return its workspace_id, or null. Read via the server's existing
645
+ * Supabase REST access (service-role key preferred so RLS doesn't hide
646
+ * the row). Best-effort — any failure resolves to null and we fall
647
+ * through to snapshot/env.
648
+ */
649
+ async function workspaceForRepoSlug(slug) {
650
+ if (!slug) return null;
651
+ const { url, readKey: anonKey, writeKey } = supabaseConfig();
652
+ const key = writeKey || anonKey;
653
+ if (!url || !key) return null;
654
+ try {
655
+ const res = await fetch(
656
+ `${url}/rest/v1/repo_workspace_map?select=workspace_id&enabled=eq.true&repo=eq.${encodeURIComponent(
657
+ slug
658
+ )}&limit=1`,
659
+ { headers: { apikey: key, authorization: `Bearer ${key}` } }
660
+ );
661
+ if (!res.ok) return null;
662
+ const rows = await res.json();
663
+ return Array.isArray(rows) && rows[0]?.workspace_id
664
+ ? rows[0].workspace_id
665
+ : null;
666
+ } catch {
667
+ return null;
668
+ }
669
+ }
670
+
671
+ /**
672
+ * Resolve (and cache) the workspace implied by the client's roots, by
673
+ * mapping each root's git repo through repo_workspace_map. Async; call
674
+ * from initialize / roots-change. Sync resolvers read the cached
675
+ * `_rootWorkspace`.
676
+ *
677
+ * Collects ALL mapped roots rather than first-match, so we can detect
678
+ * the ambiguous case — two mapped repos open at once (e.g. meridian +
679
+ * outerjoyn). When that happens we pick the first but LOG a warning,
680
+ * because silently guessing a workspace is the exact footgun this whole
681
+ * feature exists to kill. (A future client that tells us the active
682
+ * root could disambiguate; today the protocol gives us an unordered set.)
683
+ */
684
+ async function resolveRootWorkspace() {
685
+ const matches = [];
686
+ for (const dir of _clientRoots) {
687
+ const slug = await repoSlugForDir(dir);
688
+ if (!slug) continue;
689
+ const ws = await workspaceForRepoSlug(slug);
690
+ if (ws) matches.push({ ws, slug });
691
+ }
692
+ const distinct = [...new Set(matches.map((m) => m.ws))];
693
+ if (distinct.length > 1) {
694
+ log(
695
+ `roots map to MULTIPLE workspaces (${matches
696
+ .map((m) => `${m.slug}→${m.ws}`)
697
+ .join(", ")}). Using "${matches[0].ws}". ` +
698
+ `Pass workspaceId explicitly on calls to target a specific one.`
699
+ );
700
+ }
701
+ if (matches.length > 0) {
702
+ _rootWorkspace = matches[0].ws;
703
+ _rootWorkspaceRepo = matches[0].slug;
704
+ return _rootWorkspace;
705
+ }
706
+ _rootWorkspace = null;
707
+ _rootWorkspaceRepo = null;
708
+ return null;
709
+ }
710
+
711
+ /** Cached root-derived workspace id (sync read). null if none/unresolved. */
712
+ function rootWorkspaceId() {
713
+ return _rootWorkspace ?? null;
714
+ }
715
+
716
+ // Test hook: seed the root-resolution cache without touching the client
717
+ // protocol or the network.
718
+ function __setRootWorkspaceForTest(id, repo = null) {
719
+ _rootWorkspace = id;
720
+ _rootWorkspaceRepo = repo;
721
+ }
722
+
394
723
  /**
395
724
  * Resolve the workspace id for a tool call. Resolution order:
396
725
  * 1. Explicit `workspaceId` arg on the call.
397
- * 2. `.roadmapper/snapshot.json` in the cwd (committed by the
398
- * snapshot-roadmaps cron names the workspace this repo
399
- * belongs to).
400
- * 3. Env-driven `SUPABASE_WORKSPACE_ID`.
401
- * 4. null.
726
+ * 2. Client roots git remote → repo_workspace_map (the repo the
727
+ * agent is actually working in, mapped via the GitHub-connect UI).
728
+ * 3. `.roadmapper/snapshot.json` in the cwd (offline fallback).
729
+ * 4. Env-driven `SUPABASE_WORKSPACE_ID` (the install default).
730
+ * 5. null.
402
731
  *
403
- * Snapshot beats env because the snapshot reflects "where the agent
404
- * is right now", while the env reflects "where the operator pointed
405
- * the MCP install when they configured it". Cwd-specific wins.
732
+ * Roots beat snapshot beat env: roots reflect "the repo open right now"
733
+ * (most specific), snapshot reflects "this checkout's committed
734
+ * workspace", env reflects "where the operator pointed the install".
406
735
  *
407
736
  * Mutators with an explicit `workspaceId` arg that conflicts with the
408
- * cwd snapshot are refused upstream in `callTool` — see the
409
- * cross-workspace guard there.
737
+ * resolved repo/snapshot workspace are refused upstream in `callTool` —
738
+ * see the cross-workspace guard there.
410
739
  */
411
740
  function resolveWorkspaceId(argWorkspaceId) {
412
741
  if (argWorkspaceId) return argWorkspaceId;
742
+ const root = rootWorkspaceId();
743
+ if (root) return root;
413
744
  const snap = snapshotWorkspaceId();
414
745
  if (snap) return snap;
415
746
  return supabaseConfig().workspaceId ?? null;
416
747
  }
417
748
 
749
+ // The workspace id a fresh install ships with — the bundled seed/demo
750
+ // data lives here ("delete it once you add your own"). Mirrors the
751
+ // VITE_SUPABASE_WORKSPACE_ID default in .env.example. Used by the
752
+ // seed-workspace write guard to catch accidental writes to demo data.
753
+ const SEED_WORKSPACE_ID = "default";
754
+
755
+ /**
756
+ * Same resolution as resolveWorkspaceId, but also reports WHERE the id
757
+ * came from. The silent fall-through to the env default is the #1
758
+ * wrong-workspace footgun: launch the agent outside a connected repo
759
+ * checkout and every call quietly targets the install's env default
760
+ * (often the seed workspace) with nothing saying so. Surfacing the
761
+ * source — "arg" / "snapshot" / "env" / "none" — is the cheapest
762
+ * guardrail, and it feeds both get_active_workspace and the snapshot's
763
+ * resolvedFrom field.
764
+ */
765
+ function resolveWorkspaceWithSource(argWorkspaceId) {
766
+ if (argWorkspaceId) return { id: argWorkspaceId, source: "arg" };
767
+ const root = rootWorkspaceId();
768
+ if (root) return { id: root, source: "repo", repo: _rootWorkspaceRepo };
769
+ const snap = snapshotWorkspaceId();
770
+ if (snap) return { id: snap, source: "snapshot" };
771
+ const envWs = supabaseConfig().workspaceId;
772
+ if (envWs) return { id: envWs, source: "env" };
773
+ return { id: null, source: "none" };
774
+ }
775
+
776
+ /**
777
+ * Which write path is active, for diagnostics. The customer path
778
+ * (rmpr_ key → mcp-broker) keeps the service-role key off this machine;
779
+ * the operator path holds a service-role-equivalent key locally.
780
+ */
781
+ function writeMode() {
782
+ const { apiKey, writeKey } = supabaseConfig();
783
+ if (apiKey) return "broker"; // rmpr_ key, validated server-side
784
+ if (writeKey) return "operator"; // service-role-equivalent key, local
785
+ return "read-only";
786
+ }
787
+
418
788
  /**
419
789
  * Read the workspace's current entity state directly from the
420
790
  * normalized tables (Stage 3 Piece 6c — `workspaces.edits` column
@@ -426,8 +796,31 @@ function resolveWorkspaceId(argWorkspaceId) {
426
796
  * agent reads down to the caller's visible_pillars allow-list.
427
797
  */
428
798
  async function readWorkspaceProjected(wsIdOverride) {
429
- const { url, readKey: anonKey, writeKey } = supabaseConfig();
799
+ const { url, readKey: anonKey, writeKey, apiKey } = supabaseConfig();
430
800
  const workspaceId = resolveWorkspaceId(wsIdOverride);
801
+
802
+ // Customer path: when an rmpr_ API key is set, read through the broker.
803
+ // A direct PostgREST read with the publishable (anon) key returns zero
804
+ // rows — RLS only grants SELECT to authenticated workspace members. The
805
+ // broker authenticates the key server-side and reads (service role)
806
+ // scoped to THAT key's workspace. The key pins one workspace, so a
807
+ // wsIdOverride for a different workspace isn't readable on the customer
808
+ // path anyway — the broker correctly returns the key's workspace, and
809
+ // the cross-workspace guard upstream already blocks writes elsewhere.
810
+ if (apiKey) {
811
+ const ent = await fetchWorkspaceEntitiesViaBroker();
812
+ if (ent) {
813
+ return {
814
+ themes: ent.pillars.map(rowToThemeProjected),
815
+ capabilities: ent.capabilities.map(rowToCapabilityProjected),
816
+ tasks: ent.tasks.map(rowToTaskProjected),
817
+ };
818
+ }
819
+ // Broker failed — fall through to the direct read below. On a pure
820
+ // customer install (anon key only) that returns null; operator
821
+ // installs that ALSO set a service key still get a working read.
822
+ }
823
+
431
824
  const key = writeKey || anonKey;
432
825
  if (!url || !key || !workspaceId) return null;
433
826
  const filter = `workspace_id=eq.${encodeURIComponent(workspaceId)}`;
@@ -556,6 +949,91 @@ function stripUndefined(o) {
556
949
  return o;
557
950
  }
558
951
 
952
+ // ---- Token-efficiency: light projections + pagination ----------------
953
+ //
954
+ // Read tools return light rows BY DEFAULT (detail:true opts into full
955
+ // rows). The heavy fields — prs[], acceptance[], acceptanceGrades[],
956
+ // outcomeReadings[], dependsOn[], and long summary/description text —
957
+ // are ~95% of a row's token cost on a large workspace, so dropping
958
+ // them turns a naive list_tasks() from ~81KB into <1KB. The cap is a
959
+ // backstop, not the lever; the projection is.
960
+
961
+ const LIST_DEFAULT_LIMIT = 50;
962
+ const LIST_MAX_LIMIT = 200;
963
+
964
+ // Light task row: identity + the fields you triage on. No prs/
965
+ // acceptance/summary. summary is replaced by a presence flag so the
966
+ // agent knows detail exists without paying for it.
967
+ function taskLight(t) {
968
+ return stripUndefined({
969
+ id: t.id,
970
+ title: t.title,
971
+ status: t.status,
972
+ priority: t.priority,
973
+ effort: t.effort,
974
+ kind: t.kind,
975
+ capabilityId: t.capabilityId,
976
+ pillarId: t.pillarId,
977
+ owner: t.owner,
978
+ prCount: Array.isArray(t.prs) ? t.prs.length : undefined,
979
+ hasSummary: t.summary ? true : undefined,
980
+ archived: t.archived,
981
+ });
982
+ }
983
+
984
+ // Light capability row: identity + status signals. No
985
+ // outcomeReadings[]/dependsOn[]/description. outcome kept — it's the
986
+ // one field the agent needs to judge fit, and it's bounded text.
987
+ //
988
+ // status is the EFFECTIVE status (derived from child tasks when the
989
+ // row has no explicit status), so the light row agrees with how the
990
+ // snapshot/list filters decided to include it. Pass `tasks` to enable
991
+ // the derivation; without it we fall back to the raw column (which is
992
+ // often null — that's the bug this guards against).
993
+ function capabilityLight(c, tasks) {
994
+ return stripUndefined({
995
+ id: c.id,
996
+ pillarId: c.pillarId,
997
+ name: c.name,
998
+ status: tasks ? effectiveCapabilityStatus(c, tasks) : c.status,
999
+ outcome: c.outcome,
1000
+ outcomeStatus: c.outcomeStatus,
1001
+ roi: c.roi,
1002
+ target: c.target,
1003
+ archived: c.archived,
1004
+ });
1005
+ }
1006
+
1007
+ // Clamp a requested limit to [1, LIST_MAX_LIMIT], default 50.
1008
+ function clampLimit(raw) {
1009
+ const n = Number.isFinite(raw) ? Math.floor(raw) : LIST_DEFAULT_LIMIT;
1010
+ return Math.min(LIST_MAX_LIMIT, Math.max(1, n));
1011
+ }
1012
+
1013
+ // Apply limit + light/full projection to a row list and wrap with a
1014
+ // {total, returned, truncated} envelope so the agent knows whether to
1015
+ // narrow its filter rather than page blindly.
1016
+ function paginateRows(rows, args, lightFn, ctx) {
1017
+ const limit = clampLimit(args?.limit);
1018
+ const detail = args?.detail === true;
1019
+ const sliced = rows.slice(0, limit);
1020
+ return {
1021
+ total: rows.length,
1022
+ returned: sliced.length,
1023
+ truncated: rows.length > sliced.length,
1024
+ // ctx is passed through to the mapper (capabilityLight uses it to
1025
+ // derive effective status from tasks); taskLight ignores it.
1026
+ items: detail ? sliced : sliced.map((r) => lightFn(r, ctx)),
1027
+ };
1028
+ }
1029
+
1030
+ // Compact JSON (no 2-space pretty-print) — pretty-printing is ~20-30%
1031
+ // pure-whitespace tokens across every list return. Humans read these
1032
+ // through a client that re-formats; the wire form should be compact.
1033
+ function compactResult(obj) {
1034
+ return textResult(JSON.stringify(obj));
1035
+ }
1036
+
559
1037
  /**
560
1038
  * Invoke a Postgres function exposed via PostgREST. Used by the
561
1039
  * write tools so the read-modify-write happens inside a single
@@ -802,11 +1280,25 @@ function validateConfidence(confidence) {
802
1280
  * parent theme's target. Caller can still proceed — but the
803
1281
  * warning surfaces in dryRun output so the agent can rethink.
804
1282
  */
1283
+ // Compact dollar formatter (ROI is stored as RAW DOLLARS). Local copy
1284
+ // of src/lib/util.ts formatCompactMoney — the .mjs can't import the TS.
1285
+ function fmtMoney(dollars) {
1286
+ if (dollars == null || !Number.isFinite(dollars) || dollars <= 0) return "$0";
1287
+ const f = (n, s) => {
1288
+ const r = Math.round(n * 10) / 10;
1289
+ return `$${Number.isInteger(r) ? r.toFixed(0) : r.toFixed(1)}${s}`;
1290
+ };
1291
+ if (dollars < 1e3) return `$${Math.round(dollars)}`;
1292
+ if (dollars < 1e6) return f(dollars / 1e3, "K");
1293
+ if (dollars < 1e9) return f(dollars / 1e6, "M");
1294
+ return f(dollars / 1e9, "B");
1295
+ }
1296
+
805
1297
  function warnRoiVsTheme(roi, theme) {
806
1298
  if (roi == null || theme?.targetRoi == null) return null;
807
1299
  const floor = theme.targetRoi * 0.7;
808
1300
  if (roi < floor) {
809
- return `roi $${roi}M is well below 70% of theme "${theme.name}" target ($${theme.targetRoi}M). Justify the gap in your outcome, or rethink the parent theme.`;
1301
+ return `roi ${fmtMoney(roi)} is well below 70% of theme "${theme.name}" target (${fmtMoney(theme.targetRoi)}). Justify the gap in your outcome, or rethink the parent theme.`;
810
1302
  }
811
1303
  return null;
812
1304
  }
@@ -996,7 +1488,7 @@ const TOOLS = [
996
1488
  {
997
1489
  name: "list_capabilities",
998
1490
  description:
999
- "List active capabilities (quarterly bets). Excludes delivered and archived capabilities by default — agents should target work that's still in flight.\n\n" +
1491
+ "List active capabilities (quarterly bets). Excludes delivered and archived capabilities by default — agents should target work that's still in flight. Returns LIGHT rows by default (id/pillarId/name/status/outcome/outcomeStatus/roi/target), capped at 50; pass detail:true for full rows incl. outcomeReadings/dependsOn/description. Response envelope: { total, returned, truncated, items }.\n\n" +
1000
1492
  "USE WHEN: planning a feature and need to find the right parent capability, reviewing in-flight bets, or scoping what's still on the table this quarter.\n" +
1001
1493
  "PREREQUISITE: none — read-only. For routing a specific work description, prefer suggest_capability_for which ranks by token overlap.\n" +
1002
1494
  "ANTI-PATTERN: do not call to find a capability when you already know its id (use get_roadmap_snapshot for richer context). Pass includeDelivered=true or includeArchived=true only when reviewing historical bets — almost never in a planning session.\n" +
@@ -1007,6 +1499,17 @@ const TOOLS = [
1007
1499
  themeId: { type: "string" },
1008
1500
  includeDelivered: { type: "boolean" },
1009
1501
  includeArchived: { type: "boolean" },
1502
+ detail: {
1503
+ type: "boolean",
1504
+ description:
1505
+ "Return full capability rows (outcomeReadings, dependsOn, description) instead of light rows. Default false.",
1506
+ },
1507
+ limit: {
1508
+ type: "integer",
1509
+ minimum: 1,
1510
+ maximum: 200,
1511
+ description: "Max rows to return. Default 50, hard cap 200.",
1512
+ },
1010
1513
  workspaceId: { type: "string" },
1011
1514
  },
1012
1515
  additionalProperties: false,
@@ -1015,10 +1518,10 @@ const TOOLS = [
1015
1518
  {
1016
1519
  name: "list_tasks",
1017
1520
  description:
1018
- "List tasks. Filter by capabilityId or status. Excludes archived tasks by default.\n\n" +
1521
+ "List tasks. Filter by capabilityId or status. Excludes archived tasks by default. Returns LIGHT rows by default (id/title/status/priority/effort/kind/capabilityId/owner + prCount + hasSummary), capped at 50; pass detail:true for full rows incl. prs/acceptance/summary, and limit to raise the cap (max 200). The response is an envelope: { total, returned, truncated, items }.\n\n" +
1019
1522
  "USE WHEN: surveying what already exists under a capability before proposing a new task (avoid duplicates), reviewing a status bucket (e.g. all in_progress), or answering 'what's open right now'.\n" +
1020
1523
  "PREREQUISITE: none — read-only.\n" +
1021
- "ANTI-PATTERN: do not call to track in-progress work within a single conversation — use the harness TodoWrite tool. Do not call without a filter when the workspace has many tasks; scope by capabilityId or status. Pass includeArchived=true only when reviewing closed history.\n" +
1524
+ "ANTI-PATTERN: do not call to track in-progress work within a single conversation — use the harness TodoWrite tool. If truncated:true, NARROW the filter (capabilityId/status) rather than cranking limit light rows are cheap but full detail on hundreds of rows is not. Reach for detail:true only when you actually need prs/acceptance, ideally with a filter.\n" +
1022
1525
  "EXAMPLE: list_tasks({ capabilityId: 'CAP-XXX', status: 'in_progress' })",
1023
1526
  inputSchema: {
1024
1527
  type: "object",
@@ -1029,6 +1532,49 @@ const TOOLS = [
1029
1532
  enum: ["delivered", "in_progress", "planned", "exploring"],
1030
1533
  },
1031
1534
  includeArchived: { type: "boolean" },
1535
+ detail: {
1536
+ type: "boolean",
1537
+ description:
1538
+ "Return full task rows (prs, acceptance, acceptanceGrades, summary, dependsOn) instead of light rows. Default false.",
1539
+ },
1540
+ limit: {
1541
+ type: "integer",
1542
+ minimum: 1,
1543
+ maximum: 200,
1544
+ description: "Max rows to return. Default 50, hard cap 200.",
1545
+ },
1546
+ workspaceId: { type: "string" },
1547
+ },
1548
+ additionalProperties: false,
1549
+ },
1550
+ },
1551
+ {
1552
+ name: "list_uncategorized_tasks",
1553
+ description:
1554
+ "List tasks with no parent capability (capabilityId is null) — the orphans the GitHub webhook auto-created from PRs that carried no Roadmapper-Capability trailer and matched no capability. Excludes archived tasks by default.\n\n" +
1555
+ "USE WHEN: triaging the roadmap — finding work that shipped but never got filed under a quarterly bet, so it's invisible in capability rollups, burndown, and the outlook view. Pair with suggest_capability_for({ taskId }) to find each one's best-fit home, then move_task to file it.\n" +
1556
+ "PREREQUISITE: none — read-only.\n" +
1557
+ "ANTI-PATTERN: do not use to list ALL tasks — that's list_tasks. This is specifically the unparented backlog. A long result here is a signal that PRs aren't carrying capability trailers, not that you should ignore it.\n" +
1558
+ "EXAMPLE: list_uncategorized_tasks({ status: 'in_progress' })",
1559
+ inputSchema: {
1560
+ type: "object",
1561
+ properties: {
1562
+ status: {
1563
+ type: "string",
1564
+ enum: ["delivered", "in_progress", "planned", "exploring"],
1565
+ },
1566
+ includeArchived: { type: "boolean" },
1567
+ detail: {
1568
+ type: "boolean",
1569
+ description:
1570
+ "Return full task rows instead of light rows. Default false.",
1571
+ },
1572
+ limit: {
1573
+ type: "integer",
1574
+ minimum: 1,
1575
+ maximum: 200,
1576
+ description: "Max rows to return. Default 50, hard cap 200.",
1577
+ },
1032
1578
  workspaceId: { type: "string" },
1033
1579
  },
1034
1580
  additionalProperties: false,
@@ -1065,10 +1611,10 @@ const TOOLS = [
1065
1611
  {
1066
1612
  name: "get_roadmap_snapshot",
1067
1613
  description:
1068
- "Single-call orient: themes + active capabilities + in-flight tasks for the workspace, plus the resolved workspaceId. Always live. Excludes archived entities by default.\n\n" +
1614
+ "Single-call orient: themes + active capabilities + in-flight tasks for the workspace, plus the resolved workspaceId. Always live. Excludes archived entities by default. Returns LIGHT rows by default and caps the task list at 50 (the counts block always carries true totals); pass detail:true for full rows. Response carries mode ('summary'|'detail') and tasksTruncated.\n\n" +
1069
1615
  "USE WHEN: starting fresh in a workspace and need the whole canonical model in one read, or before opening a PR to confirm which workspace + capability to attach to.\n" +
1070
1616
  "PREREQUISITE: none — read-only. Often the very first call after get_agents_md.\n" +
1071
- "ANTI-PATTERN: do not call repeatedly within one planning pass; the data doesn't change inside a single session. Use list_tasks / list_capabilities if you need just one slice. Pass includeArchived=true only when surveying historical state.\n" +
1617
+ "ANTI-PATTERN: do not call repeatedly within one planning pass; the data doesn't change inside a single session. Avoid detail:true on large workspaces — use list_tasks with a filter for the rows you actually need. Pass includeArchived=true only when surveying historical state.\n" +
1072
1618
  "EXAMPLE: get_roadmap_snapshot()",
1073
1619
  inputSchema: {
1074
1620
  type: "object",
@@ -1079,6 +1625,31 @@ const TOOLS = [
1079
1625
  "Optional. Override the env-default workspace. Useful when the agent is operating against a .roadmapper/snapshot.json that names its own workspace.",
1080
1626
  },
1081
1627
  includeArchived: { type: "boolean" },
1628
+ detail: {
1629
+ type: "boolean",
1630
+ description:
1631
+ "Return full theme/capability/task rows instead of light ones. Default false. Can be large on big workspaces.",
1632
+ },
1633
+ },
1634
+ additionalProperties: false,
1635
+ },
1636
+ },
1637
+ {
1638
+ name: "get_active_workspace",
1639
+ description:
1640
+ "Report the workspace this server will act on RIGHT NOW and HOW it was resolved — arg / .roadmapper snapshot / env default — plus whether writes are enabled and via which path (broker vs operator). Cheap: no roadmap data, no DB read.\n\n" +
1641
+ "USE WHEN: you're unsure which workspace is active; before the FIRST mutating call in a session; after changing directories. Especially important when the agent was launched outside a connected repo checkout, where the env default (often the seed workspace) silently wins.\n" +
1642
+ "PREREQUISITE: none — read-only.\n" +
1643
+ "ANTI-PATTERN: don't use it to inspect roadmap contents — that's get_roadmap_snapshot. This only answers 'where am I pointed'.\n" +
1644
+ "EXAMPLE: get_active_workspace()",
1645
+ inputSchema: {
1646
+ type: "object",
1647
+ properties: {
1648
+ workspaceId: {
1649
+ type: "string",
1650
+ description:
1651
+ "Optional. Resolve as if this override were passed to a real call, to preview which workspace it would target.",
1652
+ },
1082
1653
  },
1083
1654
  additionalProperties: false,
1084
1655
  },
@@ -1092,7 +1663,7 @@ const TOOLS = [
1092
1663
  "ANTI-PATTERN: do not call to track in-progress work within a single conversation — use the harness TodoWrite tool. Do not call to log a bug discovered during implementation — file in the issue tracker, not roadmapper. Do not call when you don't know which capability the task belongs under; resolve that first.\n" +
1093
1664
  "REQUIRED FIELDS: capabilityId, title, effort. Always size the task — XS (≤2h) / S (≤1d) / M (~1-3d) / L (~1-2w) / XL (>2w). Effort drives capability % roll-up weighting; do not omit.\n" +
1094
1665
  "EXAMPLE: propose_task({ capabilityId: 'CAP-XXX', title: 'Drag-and-drop block reorder', effort: 'M', acceptance: ['Block can be dragged with mouse + keyboard', 'Order persists across reloads'], idempotencyKey: 'session-1-task-3' })\n\n" +
1095
- "Requires SUPABASE_SERVICE_ROLE_KEY. Pass idempotencyKey so retries don't duplicate. Pass dryRun: true to validate without writing. Pass workspaceId to target a workspace other than the env default.",
1666
+ "Requires write auth (set ROADMAPPER_API_KEY). Pass idempotencyKey so retries don't duplicate. Pass dryRun: true to validate without writing. Pass workspaceId to target a workspace other than the env default.",
1096
1667
  inputSchema: {
1097
1668
  type: "object",
1098
1669
  properties: {
@@ -1130,15 +1701,15 @@ const TOOLS = [
1130
1701
  "USE WHEN: the work the user is describing genuinely doesn't fit ANY existing theme, AND the user explicitly says they want a new strategic direction. Almost never the right answer in a planning session.\n" +
1131
1702
  "PREREQUISITE: get_agents_md once this session (enforced). Theme discovery once this session, satisfied by suggest_theme_for (preferred — returns ranked matches with a fit signal), list_themes, or get_roadmap_snapshot. Enforced — the server returns discovery_missing with a fix field if you skip it. Duplicating a theme is the most common failure mode; the gate stops it.\n" +
1132
1703
  "ANTI-PATTERN: do not call to organize a quarter of work — that's a capability, not a theme. Do not call because the existing themes feel too coarse — they're SUPPOSED to be coarse. Use propose_capability under an existing theme instead.\n" +
1133
- "EXAMPLE: propose_theme({ name: 'AI Agent Reliability', description: 'Multi-year bet on making agent workflows reproducible.', targetRoi: 20, idempotencyKey: 'session-1-theme-1' })\n\n" +
1134
- "Requires SUPABASE_SERVICE_ROLE_KEY. Pass idempotencyKey so retries don't duplicate. Pass dryRun: true to validate without writing. Pass workspaceId to target a workspace other than the env default.",
1704
+ "EXAMPLE: propose_theme({ name: 'AI Agent Reliability', description: 'Multi-year bet on making agent workflows reproducible.', targetRoi: 20000000, idempotencyKey: 'session-1-theme-1' })\n\n" +
1705
+ "Requires write auth (set ROADMAPPER_API_KEY). targetRoi is RAW ANNUAL DOLLARS (e.g. 20000000 = $20M), not millions. Pass idempotencyKey so retries don't duplicate. Pass dryRun: true to validate without writing. Pass workspaceId to target a workspace other than the env default.",
1135
1706
  inputSchema: {
1136
1707
  type: "object",
1137
1708
  properties: {
1138
1709
  name: { type: "string" },
1139
1710
  description: { type: "string" },
1140
1711
  color: { type: "string" },
1141
- targetRoi: { type: "number" },
1712
+ targetRoi: { type: "number", description: "Annual ROI target in raw dollars (e.g. 20000000 = $20M)." },
1142
1713
  idempotencyKey: { type: "string" },
1143
1714
  dryRun: { type: "boolean" },
1144
1715
  workspaceId: { type: "string" },
@@ -1155,7 +1726,7 @@ const TOOLS = [
1155
1726
  "PREREQUISITE: get_agents_md once this session (enforced). suggest_capability_for (or list_capabilities / get_roadmap_snapshot / the roadmapper://capabilities/active resource) once this session (enforced — server returns discovery_missing with a fix field if you skip it). The server WILL also reject if token overlap with an existing capability is too high; the gate is upstream of that.\n" +
1156
1727
  "ANTI-PATTERN: do not call for a single deliverable — that's a task. Do not call when the outcome is fuzzy ('improve X') — the server rejects non-falsifiable outcomes. Do not call when an existing capability is close-enough; capabilities cost human attention to maintain.\n" +
1157
1728
  "EXAMPLE: propose_capability({ pillarId: 'TH-XXX', name: 'Self-serve landing page builder', outcome: 'Customers publish a landing page in under 5 minutes without engineering involvement.', reach: 200, impact: 1, confidence: 70, idempotencyKey: 'session-1-cap-1' })\n\n" +
1158
- "Server rejects empty / non-falsifiable outcomes, confidence >95, and names <8 chars. Requires SUPABASE_SERVICE_ROLE_KEY. Pass idempotencyKey, dryRun, workspaceId as for propose_task.",
1729
+ "Server rejects empty / non-falsifiable outcomes, confidence >95, and names <8 chars. Requires write auth (set ROADMAPPER_API_KEY). Pass idempotencyKey, dryRun, workspaceId as for propose_task.",
1159
1730
  inputSchema: {
1160
1731
  type: "object",
1161
1732
  properties: {
@@ -1166,7 +1737,7 @@ const TOOLS = [
1166
1737
  reach: { type: "number" },
1167
1738
  impact: { type: "number", enum: [3, 2, 1, 0.5, 0.25] },
1168
1739
  confidence: { type: "number", minimum: 0, maximum: 100 },
1169
- roi: { type: "number" },
1740
+ roi: { type: "number", description: "Estimated annual ROI in raw dollars (e.g. 2500000 = $2.5M)." },
1170
1741
  specRef: { type: "string" },
1171
1742
  idempotencyKey: { type: "string" },
1172
1743
  dryRun: { type: "boolean" },
@@ -1184,7 +1755,7 @@ const TOOLS = [
1184
1755
  "PREREQUISITE: get_agents_md once this session (enforced — defines grading dimensions). Call get_task first to read the acceptance criteria in order — indexes are positional.\n" +
1185
1756
  "ANTI-PATTERN: do not call before the implementation actually works — fabricated passes destroy the trust this signal carries. Do not call without a note when status='fail' — the reviewer needs the failure mode.\n" +
1186
1757
  "EXAMPLE: submit_acceptance_grades({ taskId: 'TK-100201', grades: [{ index: 0, status: 'pass' }, { index: 1, status: 'fail', note: 'Reload-persistence is flaky on Firefox; tracked in TK-100202' }] })\n\n" +
1187
- "Requires SUPABASE_SERVICE_ROLE_KEY. Pass workspaceId to target a workspace other than the env default.",
1758
+ "Requires write auth (set ROADMAPPER_API_KEY). Pass workspaceId to target a workspace other than the env default.",
1188
1759
  inputSchema: {
1189
1760
  type: "object",
1190
1761
  properties: {
@@ -1211,19 +1782,23 @@ const TOOLS = [
1211
1782
  {
1212
1783
  name: "suggest_capability_for",
1213
1784
  description:
1214
- "Given a free-text description of work, return the top existing capabilities ranked by token overlap.\n\n" +
1215
- "USE WHEN: about to propose tasks or a capability — call this FIRST to find an existing parent. If any returned score > 0.4, strongly prefer attaching tasks there over creating a new capability.\n" +
1785
+ "Return the top existing capabilities ranked by token overlap with either a free-text description OR an existing task (pass taskId and the server synthesizes the query from the task's title + summary).\n\n" +
1786
+ "USE WHEN: about to propose tasks or a capability — call this FIRST to find an existing parent. If any returned score > 0.4, strongly prefer attaching tasks there over creating a new capability. With taskId, this is the triage companion to list_uncategorized_tasks: rank a home for an orphaned task, then move_task it.\n" +
1216
1787
  "PREREQUISITE: none — read-only.\n" +
1217
- "ANTI-PATTERN: do not call after you've already decided to create a new capability — that's the case this tool is meant to prevent. Do not interpret weak matches (<0.2) as fits; if nothing's close, propose_capability is the right next call (after confirming with the user).\n" +
1218
- "EXAMPLE: suggest_capability_for({ description: 'multi-tenant landing page builder with drag-and-drop blocks' })",
1788
+ "ANTI-PATTERN: do not call after you've already decided to create a new capability — that's the case this tool is meant to prevent. Do not interpret weak matches (<0.2) as fits; if nothing's close, propose_capability is the right next call (after confirming with the user). Pass exactly one of description / taskId.\n" +
1789
+ "EXAMPLE: suggest_capability_for({ description: 'multi-tenant landing page builder with drag-and-drop blocks' }) — or — suggest_capability_for({ taskId: 'TK-100201' })",
1219
1790
  inputSchema: {
1220
1791
  type: "object",
1221
1792
  properties: {
1222
1793
  description: { type: "string" },
1794
+ taskId: {
1795
+ type: "string",
1796
+ description:
1797
+ "TK-NNNNNN. When set, the query is built from the task's title + summary. Mutually exclusive with description.",
1798
+ },
1223
1799
  limit: { type: "integer", minimum: 1, maximum: 25 },
1224
1800
  workspaceId: { type: "string" },
1225
1801
  },
1226
- required: ["description"],
1227
1802
  additionalProperties: false,
1228
1803
  },
1229
1804
  },
@@ -1255,7 +1830,7 @@ const TOOLS = [
1255
1830
  "PREREQUISITE: get_agents_md once this session (enforced). The task id must exist (get_task / list_tasks to confirm).\n" +
1256
1831
  "ANTI-PATTERN: do not call as a substitute for the Roadmapper-Task: PR-body trailer convention — the trailer is the durable contract; link_pr is the instant-feedback shortcut. Do not call for PRs that don't have a parent task in roadmapper.\n" +
1257
1832
  "EXAMPLE: link_pr({ taskId: 'TK-100201', repo: 'acme/frontend', number: 1234, title: 'Drag block reorder', authorGithub: 'octocat' })\n\n" +
1258
- "Idempotent by (repo, number) — re-calling with an already-linked PR returns idempotent:true. Requires SUPABASE_SERVICE_ROLE_KEY. Pass workspaceId to target a workspace other than the env default.",
1833
+ "Idempotent by (repo, number) — re-calling with an already-linked PR returns idempotent:true. Requires write auth (set ROADMAPPER_API_KEY). Pass workspaceId to target a workspace other than the env default.",
1259
1834
  inputSchema: {
1260
1835
  type: "object",
1261
1836
  properties: {
@@ -1304,7 +1879,7 @@ const TOOLS = [
1304
1879
  "PREREQUISITE: get_agents_md once this session (enforced). The capability must exist.\n" +
1305
1880
  "ANTI-PATTERN: do not use to declare the FINAL outcome (use outcomeStatus via update_capability for that). Readings are observations along the way, not the verdict.\n" +
1306
1881
  "EXAMPLE: record_outcome_reading({ capabilityId: 'CAP-9F2C7E', value: 0.41, asOf: '2026-05-12', source: 'mixpanel: activated_within_7d weekly', note: 'sample size 4218' })\n\n" +
1307
- "Requires SUPABASE_SERVICE_ROLE_KEY. Audit log records each reading as 'outcome_reading_recorded'.",
1882
+ "Requires write auth (set ROADMAPPER_API_KEY). Audit log records each reading as 'outcome_reading_recorded'.",
1308
1883
  inputSchema: {
1309
1884
  type: "object",
1310
1885
  properties: {
@@ -1338,6 +1913,38 @@ const TOOLS = [
1338
1913
  additionalProperties: false,
1339
1914
  },
1340
1915
  },
1916
+ {
1917
+ name: "detect_capability_gaps",
1918
+ description:
1919
+ "Find CLUSTERS of uncategorized tasks that don't fit any existing capability — i.e. work that's accumulating with no quarterly bet to hold it. This is the 'a capability is missing' signal: not 'file this orphan under an existing cap' (that's suggest_capability_for + move_task), but 'these N orphans share a theme that no capability covers — consider proposing one.'\n\n" +
1920
+ "How it works: takes every uncategorized non-archived task, scores its best fit against active capabilities, keeps the ones with no decent fit ('homeless'), then clusters the homeless tasks by shared vocabulary. Each returned cluster has shared keywords, member task ids, and a suggested capability name.\n" +
1921
+ "USE WHEN: triaging a webhook-fed workspace (lots of orphans), at quarterly review, or any time you want to know whether the roadmap is missing a bet. Pair with list_uncategorized_tasks.\n" +
1922
+ "PREREQUISITE: none — read-only. Counts as capability discovery (it enumerates every active capability to score fit), so it satisfies the propose_capability gate.\n" +
1923
+ "ANTI-PATTERN: don't treat a cluster as an automatic mandate to create a capability — capabilities are quarterly bets, a human confirms. A single homeless task is not a gap; that's just an orphan to file. Tune minClusterSize/fitThreshold rather than acting on noise.\n" +
1924
+ "EXAMPLE: detect_capability_gaps({ minClusterSize: 3 })",
1925
+ inputSchema: {
1926
+ type: "object",
1927
+ properties: {
1928
+ minClusterSize: {
1929
+ type: "integer",
1930
+ minimum: 2,
1931
+ maximum: 50,
1932
+ description:
1933
+ "Min homeless tasks sharing a theme to report as a gap. Default 3. A cluster smaller than this is noise, not a missing bet.",
1934
+ },
1935
+ fitThreshold: {
1936
+ type: "number",
1937
+ minimum: 0,
1938
+ maximum: 1,
1939
+ description:
1940
+ "A task is 'homeless' when its best Jaccard fit against any active capability is below this. Default 0.2 (the 'medium' bar). Raise to be stricter about what counts as already-covered.",
1941
+ },
1942
+ includeArchived: { type: "boolean" },
1943
+ workspaceId: { type: "string" },
1944
+ },
1945
+ additionalProperties: false,
1946
+ },
1947
+ },
1341
1948
  ];
1342
1949
 
1343
1950
  /**
@@ -1382,7 +1989,7 @@ function archiveLifecycleTools() {
1382
1989
  "PREREQUISITE: get_agents_md once this session (enforced). For capabilities/themes, every active child must already be archived — the server refuses with a count of blocking children. For tasks, no child check.\n" +
1383
1990
  `ANTI-PATTERN: do not archive a ${kind} you might come back to within the same session — prefer moving it (move_${kind === "theme" ? "capability" : kind}) or updating its status. Archive is the right tool for "this is closed out, get it out of the picker."\n` +
1384
1991
  `EXAMPLE: ${example}\n\n` +
1385
- "Idempotent: re-archiving an already-archived entity returns { idempotent: true } and emits no audit row. Requires SUPABASE_SERVICE_ROLE_KEY. Pass workspaceId to target a workspace other than the env default.",
1992
+ "Idempotent: re-archiving an already-archived entity returns { idempotent: true } and emits no audit row. Requires write auth (set ROADMAPPER_API_KEY). Pass workspaceId to target a workspace other than the env default.",
1386
1993
  inputSchema: {
1387
1994
  type: "object",
1388
1995
  properties: {
@@ -1408,7 +2015,7 @@ function archiveLifecycleTools() {
1408
2015
  "PREREQUISITE: get_agents_md once this session (enforced). The parent (if any) must be active — cannot unarchive a task whose capability is archived, or a capability whose theme is archived. Unarchive the parent first.\n" +
1409
2016
  "ANTI-PATTERN: do not unarchive en masse without thinking — every unarchive re-floats noise into list views. If you're recovering from an over-aggressive archive sweep, work top-down.\n" +
1410
2017
  `EXAMPLE: un${example.replace("archive", "archive")}\n\n` +
1411
- "Idempotent: unarchiving an already-active entity returns { idempotent: true }. Requires SUPABASE_SERVICE_ROLE_KEY.",
2018
+ "Idempotent: unarchiving an already-active entity returns { idempotent: true }. Requires write auth (set ROADMAPPER_API_KEY).",
1412
2019
  inputSchema: {
1413
2020
  type: "object",
1414
2021
  properties: {
@@ -1468,7 +2075,7 @@ function moveLifecycleTools() {
1468
2075
  "PREREQUISITE: get_agents_md once this session (enforced). Target parent must exist AND be active — refuses move into an archived parent.\n" +
1469
2076
  `ANTI-PATTERN: do not use move to change anything other than the parent. To rename or rescope, use update_${kind} (coming soon). To delete, use archive_${kind}.\n` +
1470
2077
  `EXAMPLE: ${example}\n\n` +
1471
- "Idempotent: moving to the current parent returns { idempotent: true } and emits no audit row. Requires SUPABASE_SERVICE_ROLE_KEY.",
2078
+ "Idempotent: moving to the current parent returns { idempotent: true } and emits no audit row. Requires write auth (set ROADMAPPER_API_KEY).",
1472
2079
  inputSchema: {
1473
2080
  type: "object",
1474
2081
  properties: {
@@ -1589,7 +2196,7 @@ function updateLifecycleTools() {
1589
2196
  team: { type: "string" },
1590
2197
  confidence: { type: "number", description: "0–95." },
1591
2198
  impact: { type: "number", description: "One of 0.25, 0.5, 1, 2, 3." },
1592
- roi: { type: "number" },
2199
+ roi: { type: "number", description: "Estimated annual ROI in raw dollars (e.g. 2500000 = $2.5M)." },
1593
2200
  tags: { type: "array", items: { type: "string" } },
1594
2201
  links: { type: "object", additionalProperties: { type: "string" } },
1595
2202
  },
@@ -1604,7 +2211,7 @@ function updateLifecycleTools() {
1604
2211
  name: { type: "string", description: "Theme name. Minimum 5 chars." },
1605
2212
  description: { type: "string" },
1606
2213
  owner: { type: "string" },
1607
- targetRoi: { type: "number" },
2214
+ targetRoi: { type: "number", description: "Annual ROI target in raw dollars (e.g. 20000000 = $20M)." },
1608
2215
  },
1609
2216
  example:
1610
2217
  "update_theme({ themeId: 'TH-100042', patch: { name: 'Platform Reliability' }, reason: 'sharper name; same scope' })",
@@ -1627,7 +2234,7 @@ function updateLifecycleTools() {
1627
2234
  `PREREQUISITE: get_agents_md once this session (enforced). Reason required (audit trail). ${reparentHint}\n` +
1628
2235
  `ANTI-PATTERN: do not echo the entity back to the server — pass only the keys that changed. The server diffs against current state and a patch that matches everything returns { idempotent: true }.\n` +
1629
2236
  `EXAMPLE: ${example}\n\n` +
1630
- "Idempotent: a patch where every key already matches current state returns { idempotent: true } and emits no audit row. Requires SUPABASE_SERVICE_ROLE_KEY.",
2237
+ "Idempotent: a patch where every key already matches current state returns { idempotent: true } and emits no audit row. Requires write auth (set ROADMAPPER_API_KEY).",
1631
2238
  inputSchema: {
1632
2239
  type: "object",
1633
2240
  properties: {
@@ -1682,6 +2289,36 @@ async function callTool(name, args) {
1682
2289
  // read. Tools that need to know the resolved id later (write paths,
1683
2290
  // snapshot) read it back via resolveWorkspaceId(args?.workspaceId).
1684
2291
  const wsId = resolveWorkspaceId(args?.workspaceId);
2292
+
2293
+ // get_active_workspace answers "where am I pointed" without touching
2294
+ // the DB — return before the projection read below. Cheap by design:
2295
+ // agents should be able to spam it to confirm orientation.
2296
+ if (name === "get_active_workspace") {
2297
+ const { id, source } = resolveWorkspaceWithSource(args?.workspaceId);
2298
+ const { url } = supabaseConfig();
2299
+ let note;
2300
+ if (source === "env") {
2301
+ note =
2302
+ "Resolved from the MCP install's env default — NOT from the current directory. If you meant a specific repo's workspace, launch from that checkout (connected repos carry .roadmapper/snapshot.json) or pass workspaceId explicitly.";
2303
+ } else if (source === "none") {
2304
+ note =
2305
+ "No workspace resolved. Set ROADMAPPER_WORKSPACE_ID in env, run from a connected repo checkout, or pass workspaceId on the call.";
2306
+ }
2307
+ return textResult(
2308
+ JSON.stringify(
2309
+ {
2310
+ workspaceId: id,
2311
+ resolvedFrom: source, // "arg" | "snapshot" | "env" | "none"
2312
+ writeMode: writeMode(), // "broker" | "operator" | "read-only"
2313
+ backendConfigured: Boolean(url),
2314
+ ...(note ? { note } : {}),
2315
+ },
2316
+ null,
2317
+ 2
2318
+ )
2319
+ );
2320
+ }
2321
+
1685
2322
  // Post-Piece-6c, the entity tables ARE the canonical projection
1686
2323
  // — no edits blob, no seed-overlay merge. Fall back to the
1687
2324
  // bundled seed only when the DB is unreachable (offline / dev).
@@ -1767,28 +2404,62 @@ async function callTool(name, args) {
1767
2404
  "Rank existing capabilities by relevance before proposing a new one. If any score is >0.4, attach tasks there instead."
1768
2405
  );
1769
2406
  }
1770
- // Cross-workspace guard. If the cwd has a .roadmapper/snapshot.json
1771
- // naming a workspace, and the call carries an explicit workspaceId
1772
- // pointing somewhere else, refusealmost always a mistake. An
1773
- // operator who really needs to write across workspaces can set
1774
- // ROADMAPPER_ALLOW_CROSS_WORKSPACE=1 in env to bypass.
1775
- const snap = snapshotWorkspaceId();
2407
+ // Cross-workspace guard. If the LOCAL context unambiguously names a
2408
+ // workspace either the repo the agent is in (roots → repo_workspace_map)
2409
+ // or the cwd's .roadmapper/snapshot.jsonand the call carries an
2410
+ // explicit workspaceId pointing somewhere else, refuse. Almost always
2411
+ // a mistake. The repo signal beats the snapshot (it's the more specific
2412
+ // "where am I right now"). An operator who really needs to write across
2413
+ // workspaces can set ROADMAPPER_ALLOW_CROSS_WORKSPACE=1 to bypass.
2414
+ // dryRun is non-destructive validation — let it through both the
2415
+ // cross-workspace and seed-workspace guards.
2416
+ const isDryRun = args?.dryRun === true;
2417
+ const localWs = rootWorkspaceId() ?? snapshotWorkspaceId();
2418
+ const localSource = rootWorkspaceId()
2419
+ ? `the repo you're in (${_rootWorkspaceRepo})`
2420
+ : "the cwd's .roadmapper/snapshot.json";
1776
2421
  const argWs = args?.workspaceId;
1777
2422
  if (
1778
- snap &&
2423
+ localWs &&
1779
2424
  typeof argWs === "string" &&
1780
2425
  argWs.length > 0 &&
1781
- argWs !== snap &&
2426
+ argWs !== localWs &&
2427
+ !isDryRun &&
1782
2428
  process.env.ROADMAPPER_ALLOW_CROSS_WORKSPACE !== "1"
1783
2429
  ) {
1784
2430
  session.mutatorBlocks += 1;
1785
2431
  recordTelemetry(
1786
2432
  "mutator_blocked_cross_workspace",
1787
- { tool: name, targetId, cwdWorkspace: snap, argWorkspace: argWs },
2433
+ { tool: name, targetId, localWorkspace: localWs, argWorkspace: argWs },
1788
2434
  wsId
1789
2435
  );
1790
2436
  return errorResult(
1791
- `Refusing cross-workspace write: cwd's .roadmapper/snapshot.json names workspace "${snap}" but ${name} call targets "${argWs}". Almost always a mistake — drop the workspaceId arg to use the cwd default, or set ROADMAPPER_ALLOW_CROSS_WORKSPACE=1 to override.`
2437
+ `Refusing cross-workspace write: ${localSource} names workspace "${localWs}" but ${name} call targets "${argWs}". Almost always a mistake — drop the workspaceId arg to use the local default, or set ROADMAPPER_ALLOW_CROSS_WORKSPACE=1 to override.`
2438
+ );
2439
+ }
2440
+ // Seed-workspace guard. The cross-workspace guard above only fires
2441
+ // when a snapshot exists to disagree with. The other half of the
2442
+ // wrong-workspace footgun is launching OUTSIDE any configured
2443
+ // checkout: no arg, no snapshot, so wsId falls through to the env
2444
+ // default — and on an unconfigured install that default is the
2445
+ // bundled seed/demo workspace. Writing real planning data there is
2446
+ // almost never intended. Refuse, unless the caller named "default"
2447
+ // explicitly (source "arg") or pointed env/snapshot at it
2448
+ // deliberately (source "snapshot").
2449
+ const { source: wsSource } = resolveWorkspaceWithSource(args?.workspaceId);
2450
+ if (
2451
+ wsId === SEED_WORKSPACE_ID &&
2452
+ wsSource === "env" &&
2453
+ !isDryRun &&
2454
+ // Parity with the cross-workspace guard: operators whose real
2455
+ // workspace is genuinely named "default" (or who otherwise mean
2456
+ // it) can opt out.
2457
+ process.env.ROADMAPPER_ALLOW_SEED_WORKSPACE !== "1"
2458
+ ) {
2459
+ session.mutatorBlocks += 1;
2460
+ recordTelemetry("mutator_blocked_seed_workspace", { tool: name, targetId }, wsId);
2461
+ return errorResult(
2462
+ `Refusing to write to the seed/demo workspace "${SEED_WORKSPACE_ID}": it was resolved from the env default, the call carries no workspaceId, and there's no .roadmapper/snapshot.json in the cwd — so the agent was likely launched outside a configured repo checkout and is about to pollute the placeholder data a fresh install ships with. Run get_active_workspace to confirm where you're pointed. To proceed deliberately, pass workspaceId explicitly, set ROADMAPPER_WORKSPACE_ID to the workspace you mean, or set ROADMAPPER_ALLOW_SEED_WORKSPACE=1.`
1792
2463
  );
1793
2464
  }
1794
2465
  recordTelemetry("mutator_attempted", { tool: name, targetId }, wsId);
@@ -1826,7 +2497,9 @@ async function callTool(name, args) {
1826
2497
  return withReminder(
1827
2498
  "list_capabilities",
1828
2499
  projected,
1829
- textResult(JSON.stringify(filtered, null, 2))
2500
+ compactResult(
2501
+ paginateRows(filtered, args, capabilityLight, projected.tasks)
2502
+ )
1830
2503
  );
1831
2504
  }
1832
2505
  case "list_tasks": {
@@ -1841,7 +2514,25 @@ async function callTool(name, args) {
1841
2514
  return withReminder(
1842
2515
  "list_tasks",
1843
2516
  projected,
1844
- textResult(JSON.stringify(filtered, null, 2))
2517
+ compactResult(paginateRows(filtered, args, taskLight))
2518
+ );
2519
+ }
2520
+ case "list_uncategorized_tasks": {
2521
+ // capabilityId == null catches both an explicit null and a
2522
+ // stripped-undefined key (unparented PRs auto-created by the
2523
+ // webhook with no Roadmapper-Capability trailer + no Jaccard
2524
+ // hit). A task may still carry a pillarId (direct theme
2525
+ // parenting); we only key on the capability link here.
2526
+ let filtered = projected.tasks.filter((t) => t.capabilityId == null);
2527
+ if (args?.status)
2528
+ filtered = filtered.filter((t) => t.status === args.status);
2529
+ if (!args?.includeArchived) {
2530
+ filtered = filtered.filter((t) => !t.archived);
2531
+ }
2532
+ return withReminder(
2533
+ "list_uncategorized_tasks",
2534
+ projected,
2535
+ compactResult(paginateRows(filtered, args, taskLight))
1845
2536
  );
1846
2537
  }
1847
2538
  case "get_task": {
@@ -1875,10 +2566,17 @@ async function callTool(name, args) {
1875
2566
  const ts = Date.now();
1876
2567
  session.themesListedAt = ts;
1877
2568
  session.capsDiscoveredAt = ts;
2569
+ const { source } = resolveWorkspaceWithSource(args?.workspaceId);
1878
2570
  return withReminder(
1879
2571
  "get_roadmap_snapshot",
1880
2572
  projected,
1881
- getRoadmapSnapshot(projected, wsId, args?.includeArchived === true)
2573
+ getRoadmapSnapshot(
2574
+ projected,
2575
+ wsId,
2576
+ args?.includeArchived === true,
2577
+ source,
2578
+ args?.detail === true
2579
+ )
1882
2580
  );
1883
2581
  }
1884
2582
  case "propose_task":
@@ -1940,6 +2638,12 @@ async function callTool(name, args) {
1940
2638
  return recordOutcomeReading(args, wsId, projected);
1941
2639
  case "list_stale_outcomes":
1942
2640
  return listStaleOutcomes(args, projected);
2641
+ case "detect_capability_gaps":
2642
+ // Enumerates every active capability to score fit, so the agent
2643
+ // has effectively discovered the catalogue — satisfies the
2644
+ // propose_capability gate (the natural next step on a gap).
2645
+ session.capsDiscoveredAt = Date.now();
2646
+ return detectCapabilityGaps(args, projected);
1943
2647
  default:
1944
2648
  return errorResult(`Unknown tool: ${name}`);
1945
2649
  }
@@ -1976,6 +2680,61 @@ async function proposeTask(args, projected, wsId) {
1976
2680
  )
1977
2681
  return errorResult(`expectedScope must be a positive number, got ${args.expectedScope}.`);
1978
2682
 
2683
+ // Warn-on-skip (not block): if the agent never surveyed capabilities
2684
+ // this session, it may have picked the wrong parent. Rather than a
2685
+ // hard gate (which would false-positive on legit known-capability
2686
+ // filing and just get worked around), we compute a fit check and
2687
+ // attach an actionable _meta warning to the response. propose_task
2688
+ // stays allow; propose_capability keeps its hard discovery gate.
2689
+ //
2690
+ // Useful signal: score the task text against the CHOSEN capability
2691
+ // and against the best available one. If a different capability
2692
+ // scores materially higher, surface it — that's the likely-misfiled
2693
+ // case, the exact thing discovery would have caught.
2694
+ function buildSkipWarning() {
2695
+ if (session.capsDiscoveredAt !== null) return null; // discovery happened
2696
+ const taskToks = tokenize(
2697
+ [args.title ?? "", args.summary ?? ""].join(" ")
2698
+ );
2699
+ if (taskToks.size === 0) return null;
2700
+ const themeById = new Map(
2701
+ (projected.themes ?? []).map((t) => [t.id, t])
2702
+ );
2703
+ const hayFor = (c) => {
2704
+ const theme = themeById.get(c.pillarId);
2705
+ const titles = (projected.tasks ?? [])
2706
+ .filter((t) => t.capabilityId === c.id)
2707
+ .map((t) => t.title)
2708
+ .join(" ");
2709
+ return tokenize(
2710
+ [c.name, c.description ?? "", c.outcome ?? "", theme?.name ?? "", titles].join(" ")
2711
+ );
2712
+ };
2713
+ const chosenScore = jaccardScore(taskToks, hayFor(cap));
2714
+ // Best OTHER active, non-delivered capability.
2715
+ let best = null;
2716
+ for (const c of projected.capabilities) {
2717
+ if (c.id === cap.id || c.archived) continue;
2718
+ if (effectiveCapabilityStatus(c, projected.tasks) === "delivered") continue;
2719
+ const s = jaccardScore(taskToks, hayFor(c));
2720
+ if (!best || s > best.score) best = { id: c.id, name: c.name, score: s };
2721
+ }
2722
+ const base =
2723
+ "Heads up: you filed this task without calling suggest_capability_for / list_capabilities / get_roadmap_snapshot this session, so you may not have surveyed existing capabilities. ";
2724
+ // Only escalate to a concrete suggestion when another cap clearly
2725
+ // fits better than the chosen one — otherwise just a gentle note.
2726
+ if (best && best.score > 0.2 && best.score > chosenScore + 0.1) {
2727
+ return (
2728
+ base +
2729
+ `The task text fits ${best.id} (${best.name}) noticeably better (score ${best.score.toFixed(2)}) than the chosen ${cap.id} (${chosenScore.toFixed(2)}). If that's the right home, move_task it there.`
2730
+ );
2731
+ }
2732
+ return (
2733
+ base +
2734
+ "If you're confident in the parent, ignore this; otherwise call suggest_capability_for({ taskId }) to confirm."
2735
+ );
2736
+ }
2737
+
1979
2738
  const effort = args.effort;
1980
2739
  const start = todayISO();
1981
2740
  // Target dates are day-resolution; round up so sub-day estimates
@@ -2012,6 +2771,11 @@ async function proposeTask(args, projected, wsId) {
2012
2771
  ...(args.expectedScope !== undefined ? { expectedScope: args.expectedScope } : {}),
2013
2772
  };
2014
2773
 
2774
+ const skipWarning = buildSkipWarning();
2775
+ const skipMeta = skipWarning
2776
+ ? { _meta: { roadmapper: { reminder: skipWarning } } }
2777
+ : undefined;
2778
+
2015
2779
  if (args.dryRun) {
2016
2780
  return textResult(
2017
2781
  JSON.stringify(
@@ -2019,12 +2783,13 @@ async function proposeTask(args, projected, wsId) {
2019
2783
  ok: true,
2020
2784
  dryRun: true,
2021
2785
  wouldCreate: task,
2022
- warnings: [],
2786
+ warnings: skipWarning ? [skipWarning] : [],
2023
2787
  message: `Would create task ${id} under ${cap.id} (${cap.name}). No record written.`,
2024
2788
  },
2025
2789
  null,
2026
2790
  2
2027
- )
2791
+ ),
2792
+ skipMeta
2028
2793
  );
2029
2794
  }
2030
2795
 
@@ -2055,13 +2820,15 @@ async function proposeTask(args, projected, wsId) {
2055
2820
  id: stored.id,
2056
2821
  capabilityId: stored.capabilityId,
2057
2822
  idempotent,
2823
+ ...(skipWarning ? { warnings: [skipWarning] } : {}),
2058
2824
  message: idempotent
2059
2825
  ? `Task ${stored.id} already exists with idempotencyKey ${args.idempotencyKey}; returning existing task instead of creating a duplicate.`
2060
2826
  : `Created ${stored.id} under ${cap.id} (${cap.name}). status=planned, authorKind=agent.`,
2061
2827
  },
2062
2828
  null,
2063
2829
  2
2064
- )
2830
+ ),
2831
+ skipMeta
2065
2832
  );
2066
2833
  }
2067
2834
 
@@ -2233,7 +3000,13 @@ async function proposeCapability(args, projected, wsId) {
2233
3000
  * passes that id back on `propose_task` / `propose_capability` /
2234
3001
  * `propose_theme` calls.
2235
3002
  */
2236
- function getRoadmapSnapshot(projected, wsId, includeArchived = false) {
3003
+ function getRoadmapSnapshot(
3004
+ projected,
3005
+ wsId,
3006
+ includeArchived = false,
3007
+ source,
3008
+ detail = false
3009
+ ) {
2237
3010
  // Archived entities are filtered out by default — the snapshot
2238
3011
  // is meant to surface what an agent should plan against, and
2239
3012
  // archived rows are by definition not in scope. Pass
@@ -2250,31 +3023,67 @@ function getRoadmapSnapshot(projected, wsId, includeArchived = false) {
2250
3023
  if (!includeArchived && t.archived) return false;
2251
3024
  return t.status === "in_progress" || t.status === "planned";
2252
3025
  });
2253
- return textResult(
2254
- JSON.stringify(
2255
- {
2256
- workspaceId: wsId,
2257
- generatedAt: new Date().toISOString(),
2258
- themes,
2259
- capabilities: activeCapabilities,
2260
- tasks: inFlightTasks,
2261
- counts: {
2262
- themes: themes.length,
2263
- activeCapabilities: activeCapabilities.length,
2264
- inFlightTasks: inFlightTasks.length,
2265
- totalCapabilities: projected.capabilities.length,
2266
- totalTasks: projected.tasks.length,
2267
- },
2268
- },
2269
- null,
2270
- 2
2271
- )
2272
- );
3026
+
3027
+ // Light by default — this is the cold-start orient call, so it must
3028
+ // never blow the token budget on a large workspace (the 670-task
3029
+ // workspace produced an 81KB full-detail response). detail:true
3030
+ // restores full rows. Even light, we cap the task list: counts
3031
+ // below carries the true totals, and an agent that needs every row
3032
+ // should use list_tasks with a filter, not the snapshot.
3033
+ const capItems = detail
3034
+ ? activeCapabilities
3035
+ : activeCapabilities.map((c) => capabilityLight(c, projected.tasks));
3036
+ const taskCap = detail ? LIST_MAX_LIMIT : LIST_DEFAULT_LIMIT;
3037
+ const taskSlice = inFlightTasks.slice(0, taskCap);
3038
+ const taskItems = detail ? taskSlice : taskSlice.map(taskLight);
3039
+
3040
+ return compactResult({
3041
+ workspaceId: wsId,
3042
+ // How wsId was resolved (arg / snapshot / env / none). Lets the
3043
+ // agent catch a silent env-default fall-through before planning
3044
+ // against the wrong (often seed) workspace. Omitted when unknown.
3045
+ ...(source ? { resolvedFrom: source } : {}),
3046
+ generatedAt: new Date().toISOString(),
3047
+ mode: detail ? "detail" : "summary",
3048
+ themes,
3049
+ capabilities: capItems,
3050
+ tasks: taskItems,
3051
+ tasksTruncated: inFlightTasks.length > taskSlice.length,
3052
+ counts: {
3053
+ themes: themes.length,
3054
+ activeCapabilities: activeCapabilities.length,
3055
+ inFlightTasks: inFlightTasks.length,
3056
+ totalCapabilities: projected.capabilities.length,
3057
+ totalTasks: projected.tasks.length,
3058
+ },
3059
+ });
2273
3060
  }
2274
3061
 
2275
3062
  function suggestCapabilityFor(args, projected) {
2276
- const desc = (args.description || "").trim();
2277
- if (!desc) return errorResult("description is required.");
3063
+ // Two query sources: free-text description, or an existing task
3064
+ // (title + summary). taskId is the triage path — rank a home for
3065
+ // an orphan surfaced by list_uncategorized_tasks. Exactly one.
3066
+ if (args.taskId && args.description) {
3067
+ return errorResult(
3068
+ "Pass exactly one of description / taskId, not both."
3069
+ );
3070
+ }
3071
+ let desc;
3072
+ let sourceTaskId;
3073
+ if (args.taskId) {
3074
+ const task = projected.tasks.find((t) => t.id === args.taskId);
3075
+ if (!task) return errorResult(`Task ${args.taskId} not found.`);
3076
+ sourceTaskId = task.id;
3077
+ desc = [task.title ?? "", task.summary ?? ""].join(" ").trim();
3078
+ if (!desc) {
3079
+ return errorResult(
3080
+ `Task ${args.taskId} has no title or summary to match on.`
3081
+ );
3082
+ }
3083
+ } else {
3084
+ desc = (args.description || "").trim();
3085
+ if (!desc) return errorResult("description or taskId is required.");
3086
+ }
2278
3087
  const limit = Math.min(25, Math.max(1, args.limit ?? 5));
2279
3088
 
2280
3089
  // Skip delivered capabilities — they're closed bets. A new PR
@@ -2347,6 +3156,7 @@ function suggestCapabilityFor(args, projected) {
2347
3156
  {
2348
3157
  ok: true,
2349
3158
  query: desc,
3159
+ ...(sourceTaskId ? { taskId: sourceTaskId } : {}),
2350
3160
  matches: ranked,
2351
3161
  hint:
2352
3162
  ranked.length === 0
@@ -2931,6 +3741,162 @@ function listStaleOutcomes(args, projected) {
2931
3741
  );
2932
3742
  }
2933
3743
 
3744
+ /**
3745
+ * Find clusters of uncategorized tasks that no existing capability
3746
+ * covers — the "a bet is missing" signal. Two-stage:
3747
+ * 1. Homeless filter: a task is homeless when its BEST Jaccard fit
3748
+ * against any active (non-delivered) capability is below
3749
+ * fitThreshold. Tasks that fit an existing cap aren't gaps —
3750
+ * they're just orphans to file (suggest_capability_for +
3751
+ * move_task), so they're excluded here.
3752
+ * 2. Greedy clustering: seed a cluster from the first ungrouped
3753
+ * homeless task, then pull in any other homeless task whose
3754
+ * tokens overlap the cluster's accumulated tokens at >=
3755
+ * fitThreshold. Repeat until everything is grouped. Clusters
3756
+ * below minClusterSize are dropped as noise.
3757
+ *
3758
+ * Read-only, deterministic (no Date/random), and order-stable so a
3759
+ * resumed/cached run reproduces. Returns a suggested capability name
3760
+ * per cluster (its top shared keywords) — a HINT for propose_capability,
3761
+ * not an auto-create.
3762
+ */
3763
+ function detectCapabilityGaps(args, projected) {
3764
+ // Guard against non-numeric input (a non-compliant client, or an
3765
+ // explicit null) — Math.floor(NaN) would propagate NaN through the
3766
+ // clamp and make `members.length >= NaN` always false, silently
3767
+ // returning zero gaps. Fall back to the default unless we got a
3768
+ // finite number, mirroring the fitThreshold guard below.
3769
+ const minClusterSize =
3770
+ typeof args?.minClusterSize === "number" &&
3771
+ Number.isFinite(args.minClusterSize)
3772
+ ? Math.min(50, Math.max(2, Math.floor(args.minClusterSize)))
3773
+ : 3;
3774
+ const fitThreshold =
3775
+ typeof args?.fitThreshold === "number" &&
3776
+ Number.isFinite(args.fitThreshold)
3777
+ ? Math.min(1, Math.max(0, args.fitThreshold))
3778
+ : 0.2;
3779
+ const includeArchived = args?.includeArchived === true;
3780
+
3781
+ // Active capabilities = candidate homes. Build each one's haystack
3782
+ // once (same vocabulary blend suggest_capability_for uses).
3783
+ const themeById = new Map((projected.themes ?? []).map((t) => [t.id, t]));
3784
+ const activeCaps = projected.capabilities.filter(
3785
+ (c) =>
3786
+ (includeArchived || !c.archived) &&
3787
+ effectiveCapabilityStatus(c, projected.tasks) !== "delivered"
3788
+ );
3789
+ const capHaystacks = activeCaps.map((c) => {
3790
+ const theme = themeById.get(c.pillarId);
3791
+ const taskTitles = (projected.tasks ?? [])
3792
+ .filter((t) => t.capabilityId === c.id)
3793
+ .map((t) => t.title)
3794
+ .join(" ");
3795
+ return tokenize(
3796
+ [
3797
+ c.name,
3798
+ c.description ?? "",
3799
+ c.outcome ?? "",
3800
+ theme?.name ?? "",
3801
+ theme?.description ?? "",
3802
+ taskTitles,
3803
+ ].join(" ")
3804
+ );
3805
+ });
3806
+
3807
+ // Stage 1 — homeless uncategorized tasks (best cap fit < threshold).
3808
+ const uncategorized = projected.tasks.filter(
3809
+ (t) => t.capabilityId == null && (includeArchived || !t.archived)
3810
+ );
3811
+ const homeless = [];
3812
+ for (const t of uncategorized) {
3813
+ const toks = tokenize([t.title ?? "", t.summary ?? ""].join(" "));
3814
+ if (toks.size === 0) continue; // nothing to match on
3815
+ let best = 0;
3816
+ for (const hay of capHaystacks) {
3817
+ const s = jaccardScore(toks, hay);
3818
+ if (s > best) best = s;
3819
+ }
3820
+ if (best < fitThreshold) homeless.push({ task: t, toks, bestFit: best });
3821
+ }
3822
+
3823
+ // Stage 2 — greedy clustering by shared vocabulary. Deterministic:
3824
+ // iterate in array order, never random.
3825
+ const used = new Set();
3826
+ const clusters = [];
3827
+ for (let i = 0; i < homeless.length; i++) {
3828
+ if (used.has(i)) continue;
3829
+ used.add(i);
3830
+ const members = [homeless[i]];
3831
+ const clusterToks = new Set(homeless[i].toks);
3832
+ for (let j = i + 1; j < homeless.length; j++) {
3833
+ if (used.has(j)) continue;
3834
+ if (jaccardScore(homeless[j].toks, clusterToks) >= fitThreshold) {
3835
+ used.add(j);
3836
+ members.push(homeless[j]);
3837
+ for (const tk of homeless[j].toks) clusterToks.add(tk);
3838
+ }
3839
+ }
3840
+ if (members.length >= minClusterSize) clusters.push({ members, clusterToks });
3841
+ }
3842
+
3843
+ // Shape the output: shared keywords (most common tokens across the
3844
+ // cluster's members), a suggested name, and member task ids/titles.
3845
+ const shaped = clusters
3846
+ .map(({ members, clusterToks }) => {
3847
+ // Rank tokens by how many members contain them — the shared
3848
+ // vocabulary is what names the bet.
3849
+ const freq = new Map();
3850
+ for (const m of members)
3851
+ for (const tk of m.toks) freq.set(tk, (freq.get(tk) ?? 0) + 1);
3852
+ const keywords = [...freq.entries()]
3853
+ .filter(([, n]) => n >= 2) // shared by at least two members
3854
+ .sort((a, b) => b[1] - a[1])
3855
+ .slice(0, 6)
3856
+ .map(([tk]) => tk);
3857
+ return {
3858
+ size: members.length,
3859
+ keywords,
3860
+ suggestedCapabilityName:
3861
+ keywords.length > 0
3862
+ ? keywords.slice(0, 4).join(" ")
3863
+ : "(no shared keywords)",
3864
+ tasks: members.map((m) => ({
3865
+ id: m.task.id,
3866
+ title: m.task.title,
3867
+ bestExistingFit: Number(m.bestFit.toFixed(3)),
3868
+ })),
3869
+ };
3870
+ })
3871
+ .sort((a, b) => b.size - a.size);
3872
+
3873
+ const meta =
3874
+ shaped.length > 0
3875
+ ? {
3876
+ _meta: {
3877
+ roadmapper: {
3878
+ reminder:
3879
+ `${shaped.length} capability gap(s) detected — clusters of uncategorized work no existing bet covers. ` +
3880
+ "Each is a CANDIDATE for propose_capability (confirm with the user — capabilities are quarterly bets, not auto-created), then move_tasks the members under it.",
3881
+ },
3882
+ },
3883
+ }
3884
+ : undefined;
3885
+
3886
+ // Compact JSON + optional _meta nudge (textResult spreads `extra`).
3887
+ return textResult(
3888
+ JSON.stringify({
3889
+ uncategorizedScanned: uncategorized.length,
3890
+ homelessCount: homeless.length,
3891
+ minClusterSize,
3892
+ fitThreshold,
3893
+ gapCount: shaped.length,
3894
+ gaps: shaped,
3895
+ }),
3896
+ meta
3897
+ );
3898
+ }
3899
+
2934
3900
  async function submitAcceptanceGrades(args, projected, wsId) {
2935
3901
  const task = projected.tasks.find((t) => t.id === args.taskId);
2936
3902
  if (!task) return errorResult(`Task ${args.taskId} not found.`);
@@ -3098,15 +4064,15 @@ async function readResource(uri) {
3098
4064
  // the model," not "this specific call shape ran."
3099
4065
  if (session.rubricFetchedAt === null) {
3100
4066
  session.rubricFetchedAt = Date.now();
3101
- // Pass the cwd snapshot's workspace id so the row is
3102
- // visible in Settings → MCP activity. Without this the
3103
- // resource-route fetch lands with workspace_id=NULL and
3104
- // gets filtered out for non-operator viewers (per migration
3105
- // 0038's NULL-workspace lock).
4067
+ // Pass the resolved workspace id (repo → snapshot → env) so the
4068
+ // row is visible in Settings → MCP activity under the right
4069
+ // workspace. Without this the resource-route fetch lands with
4070
+ // workspace_id=NULL and gets filtered out for non-operator
4071
+ // viewers (per migration 0038's NULL-workspace lock).
3106
4072
  recordTelemetry(
3107
4073
  "rubric_fetched",
3108
4074
  { via: "resource" },
3109
- snapshotWorkspaceId() ?? undefined
4075
+ resolveWorkspaceId() ?? undefined
3110
4076
  );
3111
4077
  }
3112
4078
  return {
@@ -3135,12 +4101,22 @@ async function readResource(uri) {
3135
4101
  const active = projected.capabilities.filter(
3136
4102
  (c) => effectiveCapabilityStatus(c, projected.tasks) !== "delivered"
3137
4103
  );
4104
+ // Resources auto-fire on client connect with NO args and no model
4105
+ // gate — so they MUST be bounded unconditionally. Light rows +
4106
+ // cap, with a total/truncated envelope. An agent that needs full
4107
+ // detail uses list_capabilities({ detail:true }).
4108
+ const capped = active.slice(0, LIST_MAX_LIMIT);
3138
4109
  return {
3139
4110
  contents: [
3140
4111
  {
3141
4112
  uri,
3142
4113
  mimeType: "application/json",
3143
- text: JSON.stringify(active, null, 2),
4114
+ text: JSON.stringify({
4115
+ total: active.length,
4116
+ returned: capped.length,
4117
+ truncated: active.length > capped.length,
4118
+ items: capped.map((c) => capabilityLight(c, projected.tasks)),
4119
+ }),
3144
4120
  },
3145
4121
  ],
3146
4122
  };
@@ -3149,12 +4125,20 @@ async function readResource(uri) {
3149
4125
  const open = projected.tasks.filter(
3150
4126
  (t) => t.status === "in_progress" || t.status === "planned"
3151
4127
  );
4128
+ // Same rationale as the capabilities resource — bounded + light,
4129
+ // because this fires on connect without anyone asking.
4130
+ const capped = open.slice(0, LIST_MAX_LIMIT);
3152
4131
  return {
3153
4132
  contents: [
3154
4133
  {
3155
4134
  uri,
3156
4135
  mimeType: "application/json",
3157
- text: JSON.stringify(open, null, 2),
4136
+ text: JSON.stringify({
4137
+ total: open.length,
4138
+ returned: capped.length,
4139
+ truncated: open.length > capped.length,
4140
+ items: capped.map(taskLight),
4141
+ }),
3158
4142
  },
3159
4143
  ],
3160
4144
  };
@@ -3245,6 +4229,11 @@ async function handle(request) {
3245
4229
  const { id, method, params } = request;
3246
4230
  try {
3247
4231
  if (method === "initialize") {
4232
+ // If the client declares roots support, request the root list
4233
+ // right after we respond (can't send mid-handler — the client
4234
+ // isn't listening until it gets our initialize result). The
4235
+ // main loop fires requestClientRoots() once this returns.
4236
+ _clientSupportsRoots = !!params?.capabilities?.roots;
3248
4237
  // Snapshot counts so an MCP client showing server info
3249
4238
  // surfaces actual roadmap shape, not just "connected".
3250
4239
  const projected =
@@ -3279,14 +4268,34 @@ async function handle(request) {
3279
4268
  name: SERVER_NAME,
3280
4269
  version: SERVER_VERSION,
3281
4270
  stats,
3282
- instructions:
3283
- "Roadmapper online " +
3284
- `${stats.themes} theme${stats.themes === 1 ? "" : "s"}, ` +
3285
- `${stats.capabilities} capabilit${stats.capabilities === 1 ? "y" : "ies"}, ` +
3286
- `${stats.openTasks} open task${stats.openTasks === 1 ? "" : "s"}. ` +
3287
- "Call get_agents_md before planning the propose_* and submit_acceptance_grades tools refuse without it. " +
3288
- "Use suggest_capability_for before propose_capability. " +
3289
- "Slash-prompts available: roadmapper:plan-feature, roadmapper:close-task, roadmapper:weekly-review.",
4271
+ instructions: (() => {
4272
+ // Name the workspace we resolve to RIGHT NOW + where it came
4273
+ // from, so the agent can trust where its writes land instead
4274
+ // of discovering an empty/wrong workspace later. Repo-based
4275
+ // resolution (roots repo_workspace_map) finishes just after
4276
+ // this handshake, so if the client supports roots we say the
4277
+ // target may refine and to confirm via get_active_workspace.
4278
+ const { id: ws, source } = resolveWorkspaceWithSource();
4279
+ const wsLine = ws
4280
+ ? `Workspace: ${ws} (resolved from ${source}). `
4281
+ : "No workspace resolved yet. ";
4282
+ const rootsLine = _clientSupportsRoots
4283
+ ? "Detecting the repo you're in to pick its workspace; call get_active_workspace before your first write to confirm. "
4284
+ : ws
4285
+ ? ""
4286
+ : "Set ROADMAPPER_WORKSPACE_ID or open a connected repo. ";
4287
+ return (
4288
+ "Roadmapper online — " +
4289
+ wsLine +
4290
+ `${stats.themes} theme${stats.themes === 1 ? "" : "s"}, ` +
4291
+ `${stats.capabilities} capabilit${stats.capabilities === 1 ? "y" : "ies"}, ` +
4292
+ `${stats.openTasks} open task${stats.openTasks === 1 ? "" : "s"}. ` +
4293
+ rootsLine +
4294
+ "Call get_agents_md before planning — the propose_* and submit_acceptance_grades tools refuse without it. " +
4295
+ "Use suggest_capability_for before propose_capability. " +
4296
+ "Slash-prompts available: roadmapper:plan-feature, roadmapper:close-task, roadmapper:weekly-review."
4297
+ );
4298
+ })(),
3290
4299
  },
3291
4300
  },
3292
4301
  };
@@ -3324,6 +4333,11 @@ async function handle(request) {
3324
4333
  const result = renderPrompt(params?.name, params?.arguments ?? {});
3325
4334
  return { jsonrpc: "2.0", id, result };
3326
4335
  }
4336
+ if (method === "notifications/roots/list_changed") {
4337
+ // The client's open folders changed — re-pull and re-resolve.
4338
+ requestClientRoots();
4339
+ return null;
4340
+ }
3327
4341
  // Notifications (no id) and unknown methods: ignore.
3328
4342
  if (id === undefined) return null;
3329
4343
  return {
@@ -3516,6 +4530,27 @@ async function runSelftest() {
3516
4530
  pass: (r) =>
3517
4531
  Array.isArray(r?.result?.tools) && r.result.tools.length === TOOLS.length,
3518
4532
  },
4533
+ {
4534
+ name: "get_active_workspace reports a resolution source",
4535
+ fn: () =>
4536
+ handle({
4537
+ id: 22,
4538
+ method: "tools/call",
4539
+ params: { name: "get_active_workspace", arguments: {} },
4540
+ }),
4541
+ pass: (r) => {
4542
+ try {
4543
+ const out = JSON.parse(r?.result?.content?.[0]?.text ?? "{}");
4544
+ return (
4545
+ typeof out.resolvedFrom === "string" &&
4546
+ ["arg", "snapshot", "env", "none"].includes(out.resolvedFrom) &&
4547
+ ["broker", "operator", "read-only"].includes(out.writeMode)
4548
+ );
4549
+ } catch {
4550
+ return false;
4551
+ }
4552
+ },
4553
+ },
3519
4554
  {
3520
4555
  name: "list_themes",
3521
4556
  fn: () =>
@@ -3640,6 +4675,93 @@ async function runSelftest() {
3640
4675
  ? r?.result && !r.result.isError
3641
4676
  : r?.result?.isError === true,
3642
4677
  },
4678
+ {
4679
+ name: "propose_task (warn-on-skip: dryRun WITHOUT discovery attaches a capability-fit warning)",
4680
+ fn: async () => {
4681
+ // Fresh session + rubric (mutator gate) but NO capability
4682
+ // discovery → the warn-on-skip path should fire. dryRun avoids
4683
+ // needing write auth and returns before the RPC.
4684
+ resetSession();
4685
+ await handle({
4686
+ id: 300,
4687
+ method: "tools/call",
4688
+ params: { name: "get_agents_md", arguments: {} },
4689
+ });
4690
+ return handle({
4691
+ id: 301,
4692
+ method: "tools/call",
4693
+ params: {
4694
+ name: "propose_task",
4695
+ arguments: {
4696
+ capabilityId: aCap,
4697
+ title: "Selftest warn task",
4698
+ effort: "S",
4699
+ dryRun: true,
4700
+ },
4701
+ },
4702
+ });
4703
+ },
4704
+ pass: (r) => {
4705
+ if (r?.result?.isError) return false;
4706
+ let body;
4707
+ try {
4708
+ body = JSON.parse(r?.result?.content?.[0]?.text);
4709
+ } catch {
4710
+ return false;
4711
+ }
4712
+ // Warning present in both the warnings[] array and the _meta nudge.
4713
+ return (
4714
+ Array.isArray(body?.warnings) &&
4715
+ body.warnings.length === 1 &&
4716
+ typeof r?.result?._meta?.roadmapper?.reminder === "string"
4717
+ );
4718
+ },
4719
+ },
4720
+ {
4721
+ name: "propose_task (warn-on-skip: dryRun AFTER discovery has NO warning)",
4722
+ fn: async () => {
4723
+ resetSession();
4724
+ await handle({
4725
+ id: 310,
4726
+ method: "tools/call",
4727
+ params: { name: "get_agents_md", arguments: {} },
4728
+ });
4729
+ // get_roadmap_snapshot sets capsDiscoveredAt → discovery done.
4730
+ await handle({
4731
+ id: 311,
4732
+ method: "tools/call",
4733
+ params: { name: "get_roadmap_snapshot", arguments: {} },
4734
+ });
4735
+ return handle({
4736
+ id: 312,
4737
+ method: "tools/call",
4738
+ params: {
4739
+ name: "propose_task",
4740
+ arguments: {
4741
+ capabilityId: aCap,
4742
+ title: "Selftest no-warn task",
4743
+ effort: "S",
4744
+ dryRun: true,
4745
+ },
4746
+ },
4747
+ });
4748
+ },
4749
+ pass: (r) => {
4750
+ if (r?.result?.isError) return false;
4751
+ let body;
4752
+ try {
4753
+ body = JSON.parse(r?.result?.content?.[0]?.text);
4754
+ } catch {
4755
+ return false;
4756
+ }
4757
+ // Discovery happened → warnings empty, no _meta nudge.
4758
+ return (
4759
+ Array.isArray(body?.warnings) &&
4760
+ body.warnings.length === 0 &&
4761
+ !r?.result?._meta?.roadmapper?.reminder
4762
+ );
4763
+ },
4764
+ },
3643
4765
  {
3644
4766
  name: "propose_theme (missing name returns error result)",
3645
4767
  fn: () =>
@@ -3819,6 +4941,398 @@ async function runSelftest() {
3819
4941
  }),
3820
4942
  pass: (r) => r?.result?.isError === true,
3821
4943
  },
4944
+ {
4945
+ name: "suggest_capability_for (taskId path builds query from the task + echoes taskId)",
4946
+ fn: () =>
4947
+ handle({
4948
+ id: 201,
4949
+ method: "tools/call",
4950
+ params: {
4951
+ name: "suggest_capability_for",
4952
+ arguments: { taskId: "TK-DEMO" },
4953
+ },
4954
+ }),
4955
+ pass: (r) =>
4956
+ !r?.result?.isError &&
4957
+ r?.result?.content?.[0]?.text?.includes('"matches"') &&
4958
+ r?.result?.content?.[0]?.text?.includes('"taskId": "TK-DEMO"'),
4959
+ },
4960
+ {
4961
+ name: "suggest_capability_for (unknown taskId rejected)",
4962
+ fn: () =>
4963
+ handle({
4964
+ id: 202,
4965
+ method: "tools/call",
4966
+ params: {
4967
+ name: "suggest_capability_for",
4968
+ arguments: { taskId: "TK-000000" },
4969
+ },
4970
+ }),
4971
+ pass: (r) => r?.result?.isError === true,
4972
+ },
4973
+ {
4974
+ name: "suggest_capability_for (description + taskId together rejected)",
4975
+ fn: () =>
4976
+ handle({
4977
+ id: 203,
4978
+ method: "tools/call",
4979
+ params: {
4980
+ name: "suggest_capability_for",
4981
+ arguments: { taskId: "TK-DEMO", description: "x" },
4982
+ },
4983
+ }),
4984
+ pass: (r) => r?.result?.isError === true,
4985
+ },
4986
+ {
4987
+ name: "list_uncategorized_tasks (envelope shape, excludes parented seed task)",
4988
+ fn: () =>
4989
+ handle({
4990
+ id: 204,
4991
+ method: "tools/call",
4992
+ params: {
4993
+ name: "list_uncategorized_tasks",
4994
+ arguments: {},
4995
+ },
4996
+ }),
4997
+ pass: (r) => {
4998
+ if (r?.result?.isError) return false;
4999
+ const text = r?.result?.content?.[0]?.text;
5000
+ if (typeof text !== "string") return false;
5001
+ let env;
5002
+ try {
5003
+ env = JSON.parse(text);
5004
+ } catch {
5005
+ return false;
5006
+ }
5007
+ // New shape: { total, returned, truncated, items }. The seed's
5008
+ // only task (TK-DEMO) is parented under CAP-DEMO, so it must
5009
+ // NOT appear in the uncategorized items.
5010
+ return (
5011
+ Array.isArray(env?.items) &&
5012
+ typeof env.total === "number" &&
5013
+ typeof env.truncated === "boolean" &&
5014
+ !env.items.some((t) => t.id === "TK-DEMO")
5015
+ );
5016
+ },
5017
+ },
5018
+ {
5019
+ name: "list_tasks (default: envelope + LIGHT rows drop prs/acceptance/summary)",
5020
+ fn: () =>
5021
+ handle({
5022
+ id: 205,
5023
+ method: "tools/call",
5024
+ params: { name: "list_tasks", arguments: {} },
5025
+ }),
5026
+ pass: (r) => {
5027
+ if (r?.result?.isError) return false;
5028
+ let env;
5029
+ try {
5030
+ env = JSON.parse(r?.result?.content?.[0]?.text);
5031
+ } catch {
5032
+ return false;
5033
+ }
5034
+ if (!Array.isArray(env?.items)) return false;
5035
+ // Light rows must NOT carry the heavy arrays/text. (Seed has
5036
+ // at least TK-DEMO.) Every returned row is light.
5037
+ return env.items.every(
5038
+ (t) =>
5039
+ !("prs" in t) &&
5040
+ !("acceptance" in t) &&
5041
+ !("acceptanceGrades" in t) &&
5042
+ !("summary" in t) &&
5043
+ "id" in t &&
5044
+ "status" in t
5045
+ );
5046
+ },
5047
+ },
5048
+ {
5049
+ name: "list_tasks (detail:true restores full rows)",
5050
+ fn: () =>
5051
+ handle({
5052
+ id: 206,
5053
+ method: "tools/call",
5054
+ params: { name: "list_tasks", arguments: { detail: true } },
5055
+ }),
5056
+ pass: (r) => {
5057
+ if (r?.result?.isError) return false;
5058
+ let env;
5059
+ try {
5060
+ env = JSON.parse(r?.result?.content?.[0]?.text);
5061
+ } catch {
5062
+ return false;
5063
+ }
5064
+ // TK-DEMO in the seed carries acceptance criteria; detail mode
5065
+ // must surface them. Find it and confirm a heavy field is back.
5066
+ const demo = env?.items?.find((t) => t.id === "TK-DEMO");
5067
+ return !!demo && "acceptance" in demo;
5068
+ },
5069
+ },
5070
+ {
5071
+ name: "list_tasks (limit clamps to the requested cap)",
5072
+ fn: () =>
5073
+ handle({
5074
+ id: 207,
5075
+ method: "tools/call",
5076
+ params: { name: "list_tasks", arguments: { limit: 1 } },
5077
+ }),
5078
+ pass: (r) => {
5079
+ if (r?.result?.isError) return false;
5080
+ let env;
5081
+ try {
5082
+ env = JSON.parse(r?.result?.content?.[0]?.text);
5083
+ } catch {
5084
+ return false;
5085
+ }
5086
+ return (
5087
+ Array.isArray(env?.items) &&
5088
+ env.items.length <= 1 &&
5089
+ env.returned <= 1
5090
+ );
5091
+ },
5092
+ },
5093
+ {
5094
+ name: "list_capabilities (light row carries EFFECTIVE status, not the null column)",
5095
+ fn: () =>
5096
+ handle({
5097
+ id: 210,
5098
+ method: "tools/call",
5099
+ params: { name: "list_capabilities", arguments: {} },
5100
+ }),
5101
+ pass: (r) => {
5102
+ if (r?.result?.isError) return false;
5103
+ let env;
5104
+ try {
5105
+ env = JSON.parse(r?.result?.content?.[0]?.text);
5106
+ } catch {
5107
+ return false;
5108
+ }
5109
+ // CAP-DEMO has no explicit status column; its child task
5110
+ // TK-DEMO is 'planned', so the effective status must derive to
5111
+ // a non-empty value in the light row (regression guard: before
5112
+ // the fix, status was stripped and absent entirely).
5113
+ const demo = env?.items?.find((c) => c.id === "CAP-DEMO");
5114
+ return !!demo && typeof demo.status === "string" && demo.status.length > 0;
5115
+ },
5116
+ },
5117
+ {
5118
+ name: "list_capabilities (compact JSON — no pretty-print whitespace)",
5119
+ fn: () =>
5120
+ handle({
5121
+ id: 208,
5122
+ method: "tools/call",
5123
+ params: { name: "list_capabilities", arguments: {} },
5124
+ }),
5125
+ pass: (r) => {
5126
+ if (r?.result?.isError) return false;
5127
+ const text = r?.result?.content?.[0]?.text;
5128
+ if (typeof text !== "string") return false;
5129
+ // Pretty-print would emit '\n ' indentation. Compact must
5130
+ // not. Also confirm it parses to the envelope shape.
5131
+ if (/\n\s\s/.test(text)) return false;
5132
+ let env;
5133
+ try {
5134
+ env = JSON.parse(text);
5135
+ } catch {
5136
+ return false;
5137
+ }
5138
+ return Array.isArray(env?.items) && typeof env.total === "number";
5139
+ },
5140
+ },
5141
+ {
5142
+ name: "get_roadmap_snapshot (summary mode by default: light tasks + counts + mode flag)",
5143
+ fn: () =>
5144
+ handle({
5145
+ id: 209,
5146
+ method: "tools/call",
5147
+ params: { name: "get_roadmap_snapshot", arguments: {} },
5148
+ }),
5149
+ pass: (r) => {
5150
+ if (r?.result?.isError) return false;
5151
+ let snap;
5152
+ try {
5153
+ snap = JSON.parse(r?.result?.content?.[0]?.text);
5154
+ } catch {
5155
+ return false;
5156
+ }
5157
+ if (snap?.mode !== "summary") return false;
5158
+ if (!snap?.counts || typeof snap.counts.totalTasks !== "number")
5159
+ return false;
5160
+ // Light tasks in summary mode: no heavy arrays.
5161
+ return (snap.tasks ?? []).every(
5162
+ (t) => !("prs" in t) && !("acceptance" in t)
5163
+ );
5164
+ },
5165
+ },
5166
+ {
5167
+ name: "detect_capability_gaps (through handle: seed has no orphans → well-formed empty result)",
5168
+ fn: () =>
5169
+ handle({
5170
+ id: 211,
5171
+ method: "tools/call",
5172
+ params: { name: "detect_capability_gaps", arguments: {} },
5173
+ }),
5174
+ pass: (r) => {
5175
+ if (r?.result?.isError) return false;
5176
+ let out;
5177
+ try {
5178
+ out = JSON.parse(r?.result?.content?.[0]?.text);
5179
+ } catch {
5180
+ return false;
5181
+ }
5182
+ // Seed's only task is categorized → zero homeless, zero gaps,
5183
+ // but the envelope fields must all be present and numeric.
5184
+ return (
5185
+ Array.isArray(out?.gaps) &&
5186
+ out.gapCount === 0 &&
5187
+ typeof out.uncategorizedScanned === "number" &&
5188
+ typeof out.homelessCount === "number"
5189
+ );
5190
+ },
5191
+ },
5192
+ {
5193
+ name: "detect_capability_gaps (direct: clusters homeless tasks, names the cluster by shared keywords)",
5194
+ fn: () => {
5195
+ // Fixture: one active capability about 'billing invoices', plus
5196
+ // 4 uncategorized tasks — 3 clearly about 'wallet apple google
5197
+ // pass' (a missing bet) and 1 lone 'documentation typo'.
5198
+ const projected = {
5199
+ themes: [{ id: "TH-1", name: "Platform", description: "" }],
5200
+ capabilities: [
5201
+ {
5202
+ id: "CAP-BILL",
5203
+ pillarId: "TH-1",
5204
+ name: "Billing invoices",
5205
+ description: "invoice generation and billing runs",
5206
+ outcome: "invoices delivered",
5207
+ status: "in_progress",
5208
+ },
5209
+ ],
5210
+ tasks: [
5211
+ { id: "TK-1", capabilityId: null, title: "wallet apple pass provisioning", summary: "google wallet pass signing" },
5212
+ { id: "TK-2", capabilityId: null, title: "google wallet pass install telemetry", summary: "apple wallet pass rate" },
5213
+ { id: "TK-3", capabilityId: null, title: "apple google wallet pass dashboard", summary: "wallet pass provisioning ui" },
5214
+ { id: "TK-4", capabilityId: null, title: "fix documentation typo", summary: "" },
5215
+ ],
5216
+ };
5217
+ return detectCapabilityGaps({ minClusterSize: 3 }, projected);
5218
+ },
5219
+ pass: (r) => {
5220
+ if (r?.isError) return false;
5221
+ let out;
5222
+ try {
5223
+ out = JSON.parse(r?.content?.[0]?.text);
5224
+ } catch {
5225
+ return false;
5226
+ }
5227
+ // The 3 wallet-pass tasks cluster; the lone typo task does not
5228
+ // reach minClusterSize=3. So exactly one gap of size 3, named
5229
+ // from the shared 'wallet/pass' vocabulary.
5230
+ if (out.gapCount !== 1) return false;
5231
+ const gap = out.gaps[0];
5232
+ return (
5233
+ gap.size === 3 &&
5234
+ gap.keywords.includes("wallet") &&
5235
+ gap.keywords.includes("pass") &&
5236
+ !gap.tasks.some((t) => t.id === "TK-4")
5237
+ );
5238
+ },
5239
+ },
5240
+ {
5241
+ name: "detect_capability_gaps (direct: tasks that FIT an existing cap are not homeless)",
5242
+ fn: () => {
5243
+ // 3 tasks that clearly match the billing capability — they must
5244
+ // NOT be reported as a gap (they have a home; they just need
5245
+ // filing via move_task, not a new capability).
5246
+ const projected = {
5247
+ themes: [{ id: "TH-1", name: "Platform", description: "" }],
5248
+ capabilities: [
5249
+ {
5250
+ id: "CAP-BILL",
5251
+ pillarId: "TH-1",
5252
+ name: "Billing invoices generation",
5253
+ description: "invoice generation billing runs dunning",
5254
+ outcome: "invoices delivered to every customer",
5255
+ status: "in_progress",
5256
+ },
5257
+ ],
5258
+ tasks: [
5259
+ { id: "TK-1", capabilityId: null, title: "billing invoice generation bug", summary: "invoice runs" },
5260
+ { id: "TK-2", capabilityId: null, title: "invoice generation dunning", summary: "billing runs" },
5261
+ { id: "TK-3", capabilityId: null, title: "billing invoice dunning runs", summary: "invoice generation" },
5262
+ ],
5263
+ };
5264
+ return detectCapabilityGaps({ minClusterSize: 2 }, projected);
5265
+ },
5266
+ pass: (r) => {
5267
+ if (r?.isError) return false;
5268
+ let out;
5269
+ try {
5270
+ out = JSON.parse(r?.content?.[0]?.text);
5271
+ } catch {
5272
+ return false;
5273
+ }
5274
+ // They fit CAP-BILL, so zero homeless → zero gaps.
5275
+ return out.gapCount === 0 && out.homelessCount === 0;
5276
+ },
5277
+ },
5278
+ {
5279
+ name: "detect_capability_gaps (direct: minClusterSize filters clusters below the floor)",
5280
+ fn: () => {
5281
+ const projected = {
5282
+ themes: [{ id: "TH-1", name: "Platform", description: "" }],
5283
+ capabilities: [],
5284
+ tasks: [
5285
+ { id: "TK-1", capabilityId: null, title: "wallet apple pass", summary: "google wallet" },
5286
+ { id: "TK-2", capabilityId: null, title: "google wallet pass", summary: "apple wallet" },
5287
+ ],
5288
+ };
5289
+ // Two homeless tasks cluster, but minClusterSize=3 drops it.
5290
+ return detectCapabilityGaps({ minClusterSize: 3 }, projected);
5291
+ },
5292
+ pass: (r) => {
5293
+ if (r?.isError) return false;
5294
+ let out;
5295
+ try {
5296
+ out = JSON.parse(r?.content?.[0]?.text);
5297
+ } catch {
5298
+ return false;
5299
+ }
5300
+ return out.homelessCount === 2 && out.gapCount === 0;
5301
+ },
5302
+ },
5303
+ {
5304
+ name: "detect_capability_gaps (regression: non-numeric minClusterSize falls back to default, not NaN)",
5305
+ fn: () => {
5306
+ // 3 wallet-pass tasks that SHOULD cluster into a gap at the
5307
+ // default minClusterSize=3. Pass a non-numeric minClusterSize
5308
+ // (a non-compliant client / explicit null). Before the guard,
5309
+ // Math.floor(NaN) propagated → members.length >= NaN always
5310
+ // false → zero gaps (silent wrong answer). After: falls back to
5311
+ // default 3, so the gap is still detected.
5312
+ const projected = {
5313
+ themes: [{ id: "TH-1", name: "Platform", description: "" }],
5314
+ capabilities: [],
5315
+ tasks: [
5316
+ { id: "TK-1", capabilityId: null, title: "wallet apple pass provisioning", summary: "google wallet pass" },
5317
+ { id: "TK-2", capabilityId: null, title: "google wallet pass telemetry", summary: "apple wallet pass" },
5318
+ { id: "TK-3", capabilityId: null, title: "apple google wallet pass dashboard", summary: "wallet pass" },
5319
+ ],
5320
+ };
5321
+ return detectCapabilityGaps({ minClusterSize: null }, projected);
5322
+ },
5323
+ pass: (r) => {
5324
+ if (r?.isError) return false;
5325
+ let out;
5326
+ try {
5327
+ out = JSON.parse(r?.content?.[0]?.text);
5328
+ } catch {
5329
+ return false;
5330
+ }
5331
+ // minClusterSize coerced to default 3, NOT NaN → the 3-task
5332
+ // cluster is detected.
5333
+ return out.minClusterSize === 3 && out.gapCount === 1;
5334
+ },
5335
+ },
3822
5336
  {
3823
5337
  // suggest_theme_for is the theme-level mirror — same shape,
3824
5338
  // returns ranked matches against an arbitrary description.
@@ -4158,6 +5672,155 @@ async function runSelftest() {
4158
5672
  );
4159
5673
  },
4160
5674
  },
5675
+ {
5676
+ // Seed-workspace guard: no arg + no snapshot + env default ==
5677
+ // seed workspace "default" → refuse the mutator. Env is set/
5678
+ // restored around the call; snapshot forced absent.
5679
+ name: "seed-workspace write refused when resolved from env default",
5680
+ fn: async () => {
5681
+ const prevR = process.env.ROADMAPPER_WORKSPACE_ID;
5682
+ const prevS = process.env.SUPABASE_WORKSPACE_ID;
5683
+ try {
5684
+ resetSession();
5685
+ session.rubricFetchedAt = Date.now();
5686
+ __setSnapshotWorkspaceForTest(null);
5687
+ process.env.ROADMAPPER_WORKSPACE_ID = "default";
5688
+ delete process.env.SUPABASE_WORKSPACE_ID;
5689
+ return await handle({
5690
+ id: 38,
5691
+ method: "tools/call",
5692
+ params: {
5693
+ name: "propose_task",
5694
+ arguments: { capabilityId: "CAP-X", title: "x", effort: "M" },
5695
+ },
5696
+ });
5697
+ } finally {
5698
+ __setSnapshotWorkspaceForTest(undefined);
5699
+ if (prevR === undefined) delete process.env.ROADMAPPER_WORKSPACE_ID;
5700
+ else process.env.ROADMAPPER_WORKSPACE_ID = prevR;
5701
+ if (prevS !== undefined) process.env.SUPABASE_WORKSPACE_ID = prevS;
5702
+ }
5703
+ },
5704
+ pass: (r) => {
5705
+ if (!r?.result?.isError) return false;
5706
+ const txt = r.result.content?.[0]?.text ?? "";
5707
+ return txt.includes("seed/demo workspace");
5708
+ },
5709
+ },
5710
+ {
5711
+ // Explicit workspaceId="default" makes the source "arg", a
5712
+ // deliberate choice — the seed guard must NOT fire. The call
5713
+ // then fails downstream (no write auth), proving the guard let
5714
+ // it through.
5715
+ name: "explicit workspaceId arg bypasses the seed-workspace guard",
5716
+ fn: async () => {
5717
+ const prevR = process.env.ROADMAPPER_WORKSPACE_ID;
5718
+ try {
5719
+ resetSession();
5720
+ session.rubricFetchedAt = Date.now();
5721
+ __setSnapshotWorkspaceForTest(null);
5722
+ process.env.ROADMAPPER_WORKSPACE_ID = "default";
5723
+ return await handle({
5724
+ id: 39,
5725
+ method: "tools/call",
5726
+ params: {
5727
+ name: "propose_task",
5728
+ arguments: {
5729
+ capabilityId: "CAP-X",
5730
+ title: "x",
5731
+ effort: "M",
5732
+ workspaceId: "default",
5733
+ },
5734
+ },
5735
+ });
5736
+ } finally {
5737
+ __setSnapshotWorkspaceForTest(undefined);
5738
+ if (prevR === undefined) delete process.env.ROADMAPPER_WORKSPACE_ID;
5739
+ else process.env.ROADMAPPER_WORKSPACE_ID = prevR;
5740
+ }
5741
+ },
5742
+ pass: (r) => {
5743
+ const txt = r?.result?.content?.[0]?.text ?? "";
5744
+ return !txt.includes("seed/demo workspace");
5745
+ },
5746
+ },
5747
+ {
5748
+ // ROADMAPPER_ALLOW_SEED_WORKSPACE=1 opts out of the seed guard
5749
+ // (parity with the cross-workspace override). Guard must NOT fire
5750
+ // even when resolved from the env default.
5751
+ name: "ROADMAPPER_ALLOW_SEED_WORKSPACE=1 disables the seed-workspace guard",
5752
+ fn: async () => {
5753
+ const prevR = process.env.ROADMAPPER_WORKSPACE_ID;
5754
+ const prevS = process.env.SUPABASE_WORKSPACE_ID;
5755
+ const prevAllow = process.env.ROADMAPPER_ALLOW_SEED_WORKSPACE;
5756
+ try {
5757
+ resetSession();
5758
+ session.rubricFetchedAt = Date.now();
5759
+ __setSnapshotWorkspaceForTest(null);
5760
+ process.env.ROADMAPPER_WORKSPACE_ID = "default";
5761
+ delete process.env.SUPABASE_WORKSPACE_ID;
5762
+ process.env.ROADMAPPER_ALLOW_SEED_WORKSPACE = "1";
5763
+ return await handle({
5764
+ id: 138,
5765
+ method: "tools/call",
5766
+ params: {
5767
+ name: "propose_task",
5768
+ arguments: { capabilityId: "CAP-X", title: "x", effort: "M" },
5769
+ },
5770
+ });
5771
+ } finally {
5772
+ __setSnapshotWorkspaceForTest(undefined);
5773
+ if (prevR === undefined) delete process.env.ROADMAPPER_WORKSPACE_ID;
5774
+ else process.env.ROADMAPPER_WORKSPACE_ID = prevR;
5775
+ if (prevS !== undefined) process.env.SUPABASE_WORKSPACE_ID = prevS;
5776
+ if (prevAllow === undefined)
5777
+ delete process.env.ROADMAPPER_ALLOW_SEED_WORKSPACE;
5778
+ else process.env.ROADMAPPER_ALLOW_SEED_WORKSPACE = prevAllow;
5779
+ }
5780
+ },
5781
+ pass: (r) => {
5782
+ const txt = r?.result?.content?.[0]?.text ?? "";
5783
+ return !txt.includes("seed/demo workspace");
5784
+ },
5785
+ },
5786
+ {
5787
+ // dryRun is non-destructive validation — it must pass BOTH guards
5788
+ // even when resolved from the seed env default.
5789
+ name: "dryRun bypasses the seed-workspace guard",
5790
+ fn: async () => {
5791
+ const prevR = process.env.ROADMAPPER_WORKSPACE_ID;
5792
+ const prevS = process.env.SUPABASE_WORKSPACE_ID;
5793
+ try {
5794
+ resetSession();
5795
+ session.rubricFetchedAt = Date.now();
5796
+ __setSnapshotWorkspaceForTest(null);
5797
+ process.env.ROADMAPPER_WORKSPACE_ID = "default";
5798
+ delete process.env.SUPABASE_WORKSPACE_ID;
5799
+ return await handle({
5800
+ id: 139,
5801
+ method: "tools/call",
5802
+ params: {
5803
+ name: "propose_task",
5804
+ arguments: {
5805
+ capabilityId: "CAP-X",
5806
+ title: "x",
5807
+ effort: "M",
5808
+ dryRun: true,
5809
+ },
5810
+ },
5811
+ });
5812
+ } finally {
5813
+ __setSnapshotWorkspaceForTest(undefined);
5814
+ if (prevR === undefined) delete process.env.ROADMAPPER_WORKSPACE_ID;
5815
+ else process.env.ROADMAPPER_WORKSPACE_ID = prevR;
5816
+ if (prevS !== undefined) process.env.SUPABASE_WORKSPACE_ID = prevS;
5817
+ }
5818
+ },
5819
+ pass: (r) => {
5820
+ const txt = r?.result?.content?.[0]?.text ?? "";
5821
+ return !txt.includes("seed/demo workspace");
5822
+ },
5823
+ },
4161
5824
  {
4162
5825
  // record_outcome_reading rejects missing value.
4163
5826
  name: "record_outcome_reading (missing value returns error result)",
@@ -4242,6 +5905,59 @@ async function runSelftest() {
4242
5905
  return !txt.includes("Refusing cross-workspace");
4243
5906
  },
4244
5907
  },
5908
+ {
5909
+ // Root-derived workspace (repo→repo_workspace_map) beats the cwd
5910
+ // snapshot AND the env default. This is the multi-repo fix: the
5911
+ // repo the agent is actually in wins.
5912
+ name: "root workspace (repo map) beats snapshot in resolution order",
5913
+ fn: () => {
5914
+ try {
5915
+ __setRootWorkspaceForTest("ws-from-repo", "owner/repo");
5916
+ __setSnapshotWorkspaceForTest("ws-from-snapshot");
5917
+ const { id, source } = resolveWorkspaceWithSource();
5918
+ return { result: { id, source } };
5919
+ } finally {
5920
+ __setRootWorkspaceForTest(undefined);
5921
+ __setSnapshotWorkspaceForTest(undefined);
5922
+ }
5923
+ },
5924
+ pass: (r) =>
5925
+ r?.result?.id === "ws-from-repo" && r?.result?.source === "repo",
5926
+ },
5927
+ {
5928
+ // With no root mapping, resolution falls back to the snapshot —
5929
+ // the existing offline path must still work.
5930
+ name: "resolution falls back to snapshot when no root mapping",
5931
+ fn: () => {
5932
+ try {
5933
+ __setRootWorkspaceForTest(null);
5934
+ __setSnapshotWorkspaceForTest("ws-from-snapshot");
5935
+ const { id, source } = resolveWorkspaceWithSource();
5936
+ return { result: { id, source } };
5937
+ } finally {
5938
+ __setRootWorkspaceForTest(undefined);
5939
+ __setSnapshotWorkspaceForTest(undefined);
5940
+ }
5941
+ },
5942
+ pass: (r) =>
5943
+ r?.result?.id === "ws-from-snapshot" && r?.result?.source === "snapshot",
5944
+ },
5945
+ {
5946
+ // setClientRoots parses both file:// URIs and bare paths and
5947
+ // invalidates the cached resolution.
5948
+ name: "setClientRoots parses file:// URIs and bare paths",
5949
+ fn: () => {
5950
+ __setRootWorkspaceForTest("stale"); // should be invalidated
5951
+ setClientRoots([
5952
+ { uri: "file:///Users/x/proj-a" },
5953
+ { uri: "/Users/x/proj-b" },
5954
+ ]);
5955
+ return { result: { cleared: rootWorkspaceId() } };
5956
+ },
5957
+ // After setClientRoots, the cache is reset to undefined → rootWorkspaceId()
5958
+ // returns null until resolveRootWorkspace() runs. So "cleared" must be null.
5959
+ pass: (r) => r?.result?.cleared === null,
5960
+ },
4245
5961
  ];
4246
5962
 
4247
5963
  let passed = 0;
@@ -4290,8 +6006,19 @@ if (process.argv.includes("--selftest")) {
4290
6006
  log("bad json", line.slice(0, 200));
4291
6007
  continue;
4292
6008
  }
6009
+ // A message with no `method` but a result/error is a RESPONSE to a
6010
+ // request WE sent (e.g. our roots/list). Route it, don't dispatch.
6011
+ if (msg.method === undefined && (msg.result !== undefined || msg.error !== undefined)) {
6012
+ await handleClientResponse(msg);
6013
+ continue;
6014
+ }
4293
6015
  const response = await handle(msg);
4294
6016
  if (response) send(response);
6017
+ // After answering initialize, ask the client for its roots so we
6018
+ // can resolve the per-repo workspace. Done here (not inside the
6019
+ // handler) because the client only starts listening for our
6020
+ // requests once it has our initialize result in hand.
6021
+ if (msg.method === "initialize") requestClientRoots();
4295
6022
  }
4296
6023
  });
4297
6024
 
@@ -4327,4 +6054,8 @@ if (process.argv.includes("--selftest")) {
4327
6054
  const snapTail = snap ? `, snapshot-workspace=${snap}` : "";
4328
6055
  log(`ready (mode=${mode}${tail}${snapTail})`);
4329
6056
  })();
6057
+
6058
+ // Advisory: nudge the operator if a newer package is published. Runs
6059
+ // detached from boot — never blocks startup, never throws.
6060
+ checkForUpdate();
4330
6061
  }