@roadmapperai/mcp 0.9.1 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/AGENTS.md +95 -16
  2. package/README.md +7 -0
  3. package/package.json +1 -1
  4. package/server.mjs +1336 -34
package/server.mjs CHANGED
@@ -326,6 +326,13 @@ async function fetchWorkspaceEntitiesViaBroker() {
326
326
  pillars: Array.isArray(parsed.pillars) ? parsed.pillars : [],
327
327
  capabilities: Array.isArray(parsed.capabilities) ? parsed.capabilities : [],
328
328
  tasks: Array.isArray(parsed.tasks) ? parsed.tasks : [],
329
+ // Additive (migration 0108): the workspace_settings row, used to
330
+ // resolve agent_theme_autonomy. Absent on older backends → null,
331
+ // which the projection treats as "all defaults" (autonomy on).
332
+ settings:
333
+ parsed.settings && typeof parsed.settings === "object"
334
+ ? parsed.settings
335
+ : null,
329
336
  };
330
337
  } catch {
331
338
  return null;
@@ -587,6 +594,9 @@ function __setSnapshotWorkspaceForTest(value) {
587
594
  let _clientRoots = []; // array of absolute dir paths from the client
588
595
  let _rootWorkspace = undefined; // undefined=unresolved, null=resolved-but-none, string=workspaceId
589
596
  let _rootWorkspaceRepo = null; // the owner/repo that resolved (for diagnostics)
597
+ let _rootWorkspaceMatches = []; // [{ ws, slug }] for EVERY mapped open root — kept
598
+ // (not just the first) so get_active_workspace can report the ambiguous case
599
+ // instead of silently committing to matches[0].
590
600
  let _clientSupportsRoots = false; // set from initialize params.capabilities.roots
591
601
  const ROOTS_LIST_REQUEST_ID = "roadmapper-roots-list"; // our id for the roots/list request we send
592
602
 
@@ -612,6 +622,7 @@ function setClientRoots(roots) {
612
622
  // Invalidate the cached resolution so the next access re-derives it.
613
623
  _rootWorkspace = undefined;
614
624
  _rootWorkspaceRepo = null;
625
+ _rootWorkspaceMatches = [];
615
626
  }
616
627
 
617
628
  /**
@@ -619,7 +630,21 @@ function setClientRoots(roots) {
619
630
  * to find the repo root implicitly via `git -C <dir>`. Returns null if
620
631
  * the dir isn't a git repo, has no origin, or git isn't available.
621
632
  */
633
+ // Test seam: when set, repoSlugForDir returns this without shelling out
634
+ // to git. Lets the selftest exercise link_repo deterministically (the
635
+ // git call is environment-dependent and slow). Map of dir → slug, or a
636
+ // bare string applied to any dir. null/undefined = real git resolution.
637
+ let _repoSlugOverride = undefined;
638
+ function __setRepoSlugForTest(v) {
639
+ _repoSlugOverride = v;
640
+ }
641
+
622
642
  async function repoSlugForDir(dir) {
643
+ if (_repoSlugOverride !== undefined) {
644
+ return typeof _repoSlugOverride === "string"
645
+ ? _repoSlugOverride
646
+ : (_repoSlugOverride && _repoSlugOverride[dir]) || null;
647
+ }
623
648
  try {
624
649
  // Async so a slow/hanging git call never blocks the stdin event loop
625
650
  // (this runs while handling the client's roots/list reply). 2s cap.
@@ -698,6 +723,7 @@ async function resolveRootWorkspace() {
698
723
  `Pass workspaceId explicitly on calls to target a specific one.`
699
724
  );
700
725
  }
726
+ _rootWorkspaceMatches = matches;
701
727
  if (matches.length > 0) {
702
728
  _rootWorkspace = matches[0].ws;
703
729
  _rootWorkspaceRepo = matches[0].slug;
@@ -708,6 +734,24 @@ async function resolveRootWorkspace() {
708
734
  return null;
709
735
  }
710
736
 
737
+ /**
738
+ * Distinct workspaces the currently-open roots map to, as
739
+ * `[{ workspaceId, repo }]`. Length > 1 means the resolution is
740
+ * ambiguous (two mapped repos open at once) and the caller picked the
741
+ * first — get_active_workspace surfaces this so the agent can pass an
742
+ * explicit workspaceId instead of trusting the silent first-match.
743
+ */
744
+ function rootWorkspaceCandidates() {
745
+ const seen = new Set();
746
+ const out = [];
747
+ for (const m of _rootWorkspaceMatches) {
748
+ if (seen.has(m.ws)) continue;
749
+ seen.add(m.ws);
750
+ out.push({ workspaceId: m.ws, repo: m.slug });
751
+ }
752
+ return out;
753
+ }
754
+
711
755
  /** Cached root-derived workspace id (sync read). null if none/unresolved. */
712
756
  function rootWorkspaceId() {
713
757
  return _rootWorkspace ?? null;
@@ -715,9 +759,11 @@ function rootWorkspaceId() {
715
759
 
716
760
  // Test hook: seed the root-resolution cache without touching the client
717
761
  // protocol or the network.
718
- function __setRootWorkspaceForTest(id, repo = null) {
762
+ function __setRootWorkspaceForTest(id, repo = null, matches = null) {
719
763
  _rootWorkspace = id;
720
764
  _rootWorkspaceRepo = repo;
765
+ _rootWorkspaceMatches =
766
+ matches ?? (id ? [{ ws: id, slug: repo }] : []);
721
767
  }
722
768
 
723
769
  /**
@@ -814,6 +860,7 @@ async function readWorkspaceProjected(wsIdOverride) {
814
860
  themes: ent.pillars.map(rowToThemeProjected),
815
861
  capabilities: ent.capabilities.map(rowToCapabilityProjected),
816
862
  tasks: ent.tasks.map(rowToTaskProjected),
863
+ settings: rowToSettingsProjected(ent.settings),
817
864
  };
818
865
  }
819
866
  // Broker failed — fall through to the direct read below. On a pure
@@ -837,15 +884,22 @@ async function readWorkspaceProjected(wsIdOverride) {
837
884
  return res.json();
838
885
  };
839
886
  try {
840
- const [pillars, caps, tasks] = await Promise.all([
887
+ const [pillars, caps, tasks, settingsRows] = await Promise.all([
841
888
  fetchTable("pillars?select=*"),
842
889
  fetchTable("capabilities?select=*"),
843
890
  fetchTable("tasks?select=*"),
891
+ // Operator path: workspace_settings is one row per workspace.
892
+ // Tolerate the table not existing on an older DB (404) — fall
893
+ // back to defaults rather than failing the whole read.
894
+ fetchTable("workspace_settings?select=*").catch(() => []),
844
895
  ]);
845
896
  return {
846
897
  themes: pillars.map(rowToThemeProjected),
847
898
  capabilities: caps.map(rowToCapabilityProjected),
848
899
  tasks: tasks.map(rowToTaskProjected),
900
+ settings: rowToSettingsProjected(
901
+ Array.isArray(settingsRows) ? settingsRows[0] : settingsRows
902
+ ),
849
903
  };
850
904
  } catch (e) {
851
905
  log("supabase entity read failed:", e.message);
@@ -857,6 +911,20 @@ async function readWorkspaceProjected(wsIdOverride) {
857
911
  * the same camelCase keys the SPA + agent surfaces have always
858
912
  * used; the legacy JSONB shape and these table rows agree on
859
913
  * every field. */
914
+ /**
915
+ * Project a workspace_settings row to the camelCase shape the server
916
+ * reads. Tolerant of null / {} (no row yet, or an older backend that
917
+ * doesn't return settings): every flag falls back to its product
918
+ * default. agent_theme_autonomy defaults TRUE — agents create themes
919
+ * autonomously unless a workspace explicitly turns it off.
920
+ */
921
+ function rowToSettingsProjected(r) {
922
+ const row = r && typeof r === "object" ? r : {};
923
+ return {
924
+ // Default true: missing column / row / backend all mean "on".
925
+ agentThemeAutonomy: row.agent_theme_autonomy !== false,
926
+ };
927
+ }
860
928
  function rowToThemeProjected(r) {
861
929
  return stripUndefined({
862
930
  id: r.id,
@@ -1042,13 +1110,29 @@ function compactResult(obj) {
1042
1110
  * concurrent agent writes safe — see migration 0006 for the
1043
1111
  * function bodies.
1044
1112
  */
1113
+ // A few write tools forward to a differently-named SECURITY DEFINER RPC
1114
+ // (the *_for_api_key convention). The broker has the matching map
1115
+ // (WRITE_RPC_ALIASES); we mirror it here so the operator path (direct
1116
+ // PostgREST, no broker) hits the same function name. Tools not listed
1117
+ // forward to an RPC of the same name (the common case).
1118
+ const RPC_ALIASES = {
1119
+ link_repo: "link_repo_for_api_key",
1120
+ };
1121
+
1045
1122
  async function rpcCall(fn, body) {
1046
1123
  const { url, writeKey, apiKey, brokerUrl } = supabaseConfig();
1047
- // body must already carry p_workspace_id — the per-tool resolver
1048
- // injects it before calling rpcCall so the override path works.
1049
- if (!url || !body?.p_workspace_id) {
1124
+ if (!url) {
1125
+ throw new Error("Write tools require ROADMAPPER_BACKEND_URL in env.");
1126
+ }
1127
+ // The body must carry p_workspace_id EXCEPT on the broker path, where
1128
+ // the broker injects the validated workspace from the rmpr_ key (see
1129
+ // link_repo, which deliberately omits it so the key's workspace wins
1130
+ // rather than tripping the broker's cross-workspace guard). On the
1131
+ // operator path there's no broker to supply it, so it's required.
1132
+ const onBrokerPath = Boolean(apiKey && brokerUrl);
1133
+ if (!onBrokerPath && !body?.p_workspace_id) {
1050
1134
  throw new Error(
1051
- "Write tools require ROADMAPPER_BACKEND_URL in env and a resolvable workspaceId (either ROADMAPPER_WORKSPACE_ID env or workspaceId arg)."
1135
+ "Write tools require a resolvable workspaceId (either ROADMAPPER_WORKSPACE_ID env or workspaceId arg)."
1052
1136
  );
1053
1137
  }
1054
1138
 
@@ -1085,7 +1169,10 @@ async function rpcCall(fn, body) {
1085
1169
  "Write tools require either ROADMAPPER_API_KEY (customer path) or ROADMAPPER_ADMIN_KEY (operator path)."
1086
1170
  );
1087
1171
  }
1088
- const res = await fetch(`${url}/rest/v1/rpc/${fn}`, {
1172
+ // Resolve the alias for the direct PostgREST call (the broker does this
1173
+ // server-side for the customer path; the operator path goes direct).
1174
+ const rpcName = RPC_ALIASES[fn] ?? fn;
1175
+ const res = await fetch(`${url}/rest/v1/rpc/${rpcName}`, {
1089
1176
  method: "POST",
1090
1177
  headers: {
1091
1178
  apikey: writeKey,
@@ -1098,7 +1185,7 @@ async function rpcCall(fn, body) {
1098
1185
  if (!res.ok) {
1099
1186
  const txt = await res.text();
1100
1187
  throw new Error(
1101
- `rpc ${fn} failed: ${res.status} ${txt.slice(0, 300)}`
1188
+ `rpc ${rpcName} failed: ${res.status} ${txt.slice(0, 300)}`
1102
1189
  );
1103
1190
  }
1104
1191
  return res.json();
@@ -1338,6 +1425,27 @@ function jaccardScore(a, b) {
1338
1425
  return overlap / Math.max(a.size, b.size);
1339
1426
  }
1340
1427
 
1428
+ // ── Theme sprawl control ──────────────────────────────────────────
1429
+ //
1430
+ // With agent_theme_autonomy ON (the default), the old "stop and ask a
1431
+ // human before any new theme" guard is gone — so the sprawl guard has
1432
+ // to live server-side instead. A proposed theme whose name+description
1433
+ // overlaps an existing active theme at or above this bar is almost
1434
+ // certainly a near-duplicate ("Data Intelligence" vs "Data &
1435
+ // Intelligence"); propose_theme refuses it and points at the match so
1436
+ // the agent reuses/updates that theme instead of minting a sibling.
1437
+ // Set deliberately high: themes are coarse, so only a strong overlap
1438
+ // is a real duplicate. 0.6 blocks name-containment dups ("Data
1439
+ // Intelligence" ⊂ "Data Intelligence Platform" = 0.67) without
1440
+ // false-positiving on two distinct short themes that happen to share
1441
+ // ONE word ("Customer Loyalty" vs "Customer Retention" = 0.5 < 0.6).
1442
+ // force:true overrides for the rare legitimate case.
1443
+ const THEME_SPRAWL_BLOCK = 0.6;
1444
+ // Two existing themes overlapping at/above this are flagged as a
1445
+ // consolidation candidate by detect_theme_sprawl (lower than the block
1446
+ // bar — we want to surface drift before it's an exact dup).
1447
+ const THEME_SPRAWL_WARN = 0.34;
1448
+
1341
1449
  // ── Session state + enforcement gates ─────────────────────────────
1342
1450
  //
1343
1451
  // One process serves one MCP client (stdio). State below is the
@@ -1427,6 +1535,88 @@ function discoveryMissingResult(toolName, fixCall, rationale) {
1427
1535
  };
1428
1536
  }
1429
1537
 
1538
+ /**
1539
+ * Block result for a mutator whose target workspace fell through to the
1540
+ * install's env default WHILE the agent is sitting in a git repo that
1541
+ * isn't mapped to any workspace. Same shape + self-heal rationale as the
1542
+ * rubric gate: name the exact fix so the LLM links the repo, then retries.
1543
+ *
1544
+ * Why this is repo-aware, not session-aware — a developer routinely has
1545
+ * SEVERAL repos open in one chat. The gate must only fire for the specific
1546
+ * unmapped repo, and must never brick a legitimate cross-repo write:
1547
+ * • An explicit `workspaceId` arg → caller is intentionally targeting a
1548
+ * workspace; never blocked (checked before this is reached).
1549
+ * • source === "repo"/"snapshot"/"arg" → already resolved to a real
1550
+ * mapping; this only fires on "env" (the silent install-default
1551
+ * fall-through), which — because resolveWorkspaceWithSource prefers a
1552
+ * repo_workspace_map hit — means THIS repo genuinely isn't mapped.
1553
+ * • No git slug (not in a repo) → nothing to link; fall through to the
1554
+ * env default rather than deadlock.
1555
+ * The message offers BOTH escape hatches so a multi-repo chat is never
1556
+ * stuck: link_repo (map this repo) OR pass workspaceId (target an existing
1557
+ * workspace without mapping the repo at all).
1558
+ */
1559
+ function repoUnmappedResult(toolName, slug, envWsId) {
1560
+ return {
1561
+ content: [
1562
+ {
1563
+ type: "text",
1564
+ text: JSON.stringify(
1565
+ {
1566
+ error: "repo_unmapped",
1567
+ message:
1568
+ `"${slug}" isn't mapped to a workspace, so ${toolName} would land on the install-default workspace "${envWsId}" — probably not what you want. ` +
1569
+ `Map it once with link_repo (this repo → your key's workspace, resolves silently forever after), then retry ${toolName}. ` +
1570
+ `Or, if you meant a specific existing workspace, pass workspaceId on the call and it proceeds without mapping the repo.`,
1571
+ repo: slug,
1572
+ envDefaultWorkspace: envWsId,
1573
+ fix: "link_repo()",
1574
+ alt: `${toolName}({ workspaceId: "<target>", ... })`,
1575
+ },
1576
+ null,
1577
+ 2
1578
+ ),
1579
+ },
1580
+ ],
1581
+ isError: true,
1582
+ };
1583
+ }
1584
+
1585
+ /**
1586
+ * Decide whether a mutator should be blocked because the agent is in an
1587
+ * unmapped repo and the write would silently hit the env default. Returns
1588
+ * a block result, or null to proceed. Pure + sync (no network) so it's
1589
+ * cheap on every mutator: the per-repo "is it mapped" question was already
1590
+ * answered by resolveWorkspaceWithSource (a mapped repo resolves to
1591
+ * source "repo", never "env"), so we only need the cwd's git slug here.
1592
+ *
1593
+ * Escape hatches, in order:
1594
+ * 1. Explicit workspaceId arg → intentional target, allow.
1595
+ * 2. Writes disabled → not our concern (set_credentials path handles it).
1596
+ * 3. Source isn't "env" → already resolved to a real mapping, allow.
1597
+ * 4. No client roots / no git slug → not in a linkable repo, allow
1598
+ * (fall through to env default; blocking would deadlock).
1599
+ * 5. Bypass env var set → allow (operator opt-out).
1600
+ */
1601
+ async function repoLinkGate(name, args, source, envWsId) {
1602
+ if (args?.workspaceId) return null; // explicit target — never block
1603
+ if (writeMode() === "read-only") return null; // different problem
1604
+ if (source !== "env") return null; // resolved via repo/snapshot/arg
1605
+ if (process.env.ROADMAPPER_ALLOW_UNMAPPED_REPO === "1") return null;
1606
+ if (_clientRoots.length === 0) return null; // not in a repo at all
1607
+
1608
+ // Find the first open root with a resolvable origin slug. If none, the
1609
+ // agent isn't in a linkable git repo — don't block (let env default win).
1610
+ let slug = null;
1611
+ for (const dir of _clientRoots) {
1612
+ slug = await repoSlugForDir(dir);
1613
+ if (slug) break;
1614
+ }
1615
+ if (!slug) return null;
1616
+
1617
+ return repoUnmappedResult(name, slug, envWsId);
1618
+ }
1619
+
1430
1620
  /**
1431
1621
  * Telemetry write — fire-and-forget POST to public.mcp_telemetry
1432
1622
  * via PostgREST when a service-role key is set. Never blocks the
@@ -1637,7 +1827,8 @@ const TOOLS = [
1637
1827
  {
1638
1828
  name: "get_active_workspace",
1639
1829
  description:
1640
- "Report the workspace this server will act on RIGHT NOW and HOW it was resolved — arg / .roadmapper snapshot / env default — plus whether writes are enabled and via which path (broker vs operator). Cheap: no roadmap data, no DB read.\n\n" +
1830
+ "Report the workspace this server will act on RIGHT NOW and HOW it was resolved — arg / repo (git origin → repo_workspace_map) / .roadmapper snapshot / env default — plus whether writes are enabled and via which path (broker vs operator). Cheap: no roadmap data, no DB read.\n\n" +
1831
+ "RETURNS a `status` (resolved | ambiguous | env_default | unresolved), `writesEnabled`, and a `next` action object (or null). Act only on status \"resolved\"; for any other status `next` carries the exact step to fix it (and `candidates` when a pick is needed) — surface it to the user as a one-tap choice rather than guessing.\n" +
1641
1832
  "USE WHEN: you're unsure which workspace is active; before the FIRST mutating call in a session; after changing directories. Especially important when the agent was launched outside a connected repo checkout, where the env default (often the seed workspace) silently wins.\n" +
1642
1833
  "PREREQUISITE: none — read-only.\n" +
1643
1834
  "ANTI-PATTERN: don't use it to inspect roadmap contents — that's get_roadmap_snapshot. This only answers 'where am I pointed'.\n" +
@@ -1654,6 +1845,21 @@ const TOOLS = [
1654
1845
  additionalProperties: false,
1655
1846
  },
1656
1847
  },
1848
+ {
1849
+ name: "link_repo",
1850
+ description:
1851
+ "Persist a mapping from the CURRENT git repo to your workspace, so future sessions in this repo resolve the workspace silently (no env default, no workspaceId arg). Your API key already pins ONE workspace, so this is a one-tap confirm — it links whatever repo you're in to that workspace.\n\n" +
1852
+ "USE WHEN: get_active_workspace returns status \"env_default\" or \"unresolved\" while you ARE in a git repo, and you want writes to land in the right workspace going forward. This is the `next.onChoice` action those statuses point at.\n" +
1853
+ "PREREQUISITE: the client must have shared a root (workspace folder) whose git origin resolves to an owner/name slug, and write auth must be configured (ROADMAPPER_API_KEY or operator key). The repo slug is derived server-side from your roots — you do NOT pass it.\n" +
1854
+ "ANTI-PATTERN: don't call to switch an already-resolved workspace (it can't — your key pins one workspace); don't call outside a git repo (returns an actionable error). If the repo is already mapped to a DIFFERENT workspace, the call returns a conflict rather than stealing it.\n" +
1855
+ "RETURNS status \"linked\" (mapping saved; resolution re-runs so the next call resolves from it), \"conflict\" (repo already maps elsewhere — surfaces the existing workspace), or an error result (no repo / no auth).\n" +
1856
+ "EXAMPLE: link_repo()",
1857
+ inputSchema: {
1858
+ type: "object",
1859
+ properties: {},
1860
+ additionalProperties: false,
1861
+ },
1862
+ },
1657
1863
  {
1658
1864
  name: "propose_task",
1659
1865
  description:
@@ -1694,13 +1900,70 @@ const TOOLS = [
1694
1900
  additionalProperties: false,
1695
1901
  },
1696
1902
  },
1903
+ {
1904
+ name: "propose_tasks",
1905
+ description:
1906
+ "Bulk-create MANY tasks under ONE capability in a single call. Token-efficient: prefer this over N separate propose_task calls when filing a plan — one request, one compact {id,title} array back instead of N round trips. When write tools are live, file directly via this tool; do NOT also paste the full JSON plan into chat (that pays for the plan twice).\n\n" +
1907
+ "USE WHEN: decomposing a capability into its 3-8 tasks, or importing a planned backlog. All tasks share the one capabilityId.\n" +
1908
+ "PREREQUISITE: get_agents_md once this session (enforced). The capability must already exist — propose_capability first if needed.\n" +
1909
+ "INTRA-BATCH DEPENDENCIES: give a task a `ref` (any alias string) and reference it in another task's `dependsOn` — refs are rewritten to the real TK ids after minting. dependsOn entries that aren't a sibling ref pass through as literal existing TK ids.\n" +
1910
+ "PARTIAL SUCCESS: a structural/validation error in any row fails the whole call before writing (fix the batch). Once validated, per-row RPC failures are reported in tasks[].error without sinking the rest.\n" +
1911
+ "ANTI-PATTERN: don't use for a single task (use propose_task); don't spread one capability's tasks across multiple capabilities (call once per capability).\n" +
1912
+ "EXAMPLE: propose_tasks({ capabilityId: 'CAP-018', tasks: [{ ref: 'a', title: 'Schema + migration', effort: 'M' }, { title: 'API endpoint', effort: 'M', dependsOn: ['a'] }] })\n\n" +
1913
+ "Requires write auth (set ROADMAPPER_API_KEY). Pass dryRun:true to validate + preview ids without writing. Pass workspaceId to target a workspace other than the env default.",
1914
+ inputSchema: {
1915
+ type: "object",
1916
+ properties: {
1917
+ capabilityId: { type: "string" },
1918
+ tasks: {
1919
+ type: "array",
1920
+ minItems: 1,
1921
+ maxItems: 100,
1922
+ description: "Task specs. Each needs title + effort; everything else is optional.",
1923
+ items: {
1924
+ type: "object",
1925
+ properties: {
1926
+ ref: {
1927
+ type: "string",
1928
+ description:
1929
+ "Optional caller alias for intra-batch dependsOn references. Not stored.",
1930
+ },
1931
+ title: { type: "string" },
1932
+ summary: { type: "string" },
1933
+ effort: { type: "string", enum: ["XS", "S", "M", "L", "XL"] },
1934
+ kind: { type: "string", enum: ["feature", "bug", "chore", "spike"] },
1935
+ priority: { type: "string", enum: ["P0", "P1", "P2", "P3"] },
1936
+ acceptance: { type: "array", items: { type: "string" } },
1937
+ dependsOn: {
1938
+ type: "array",
1939
+ items: { type: "string" },
1940
+ description:
1941
+ "Sibling refs (rewritten to real ids) and/or existing TK-NNNNNN ids.",
1942
+ },
1943
+ owner: { type: "string" },
1944
+ expectedPRs: { type: "number" },
1945
+ expectedScope: { type: "number" },
1946
+ idempotencyKey: { type: "string" },
1947
+ },
1948
+ required: ["title", "effort"],
1949
+ additionalProperties: false,
1950
+ },
1951
+ },
1952
+ dryRun: { type: "boolean" },
1953
+ workspaceId: { type: "string" },
1954
+ },
1955
+ required: ["capabilityId", "tasks"],
1956
+ additionalProperties: false,
1957
+ },
1958
+ },
1697
1959
  {
1698
1960
  name: "propose_theme",
1699
1961
  description:
1700
- "Propose a new strategic theme (pillar). Themes are years-stable — only propose one when nothing existing fits.\n\n" +
1701
- "USE WHEN: the work the user is describing genuinely doesn't fit ANY existing theme, AND the user explicitly says they want a new strategic direction. Almost never the right answer in a planning session.\n" +
1702
- "PREREQUISITE: get_agents_md once this session (enforced). Theme discovery once this session, satisfied by suggest_theme_for (preferred — returns ranked matches with a fit signal), list_themes, or get_roadmap_snapshot. Enforced the server returns discovery_missing with a fix field if you skip it. Duplicating a theme is the most common failure mode; the gate stops it.\n" +
1703
- "ANTI-PATTERN: do not call to organize a quarter of work — that's a capability, not a theme. Do not call because the existing themes feel too coarse they're SUPPOSED to be coarse. Use propose_capability under an existing theme instead.\n" +
1962
+ "Propose a new strategic theme (pillar). Themes are years-stable, coarse pillars the small top tier of the tree.\n\n" +
1963
+ "AUTONOMY: by default (agent_theme_autonomy ON) you may create a theme without human confirmation when the work genuinely needs a new pillar. The server controls sprawl for you — it REFUSES a near-duplicate of an existing theme (returns error:\"too_similar\" naming the match) so you reuse/update that one instead. If a workspace turned autonomy OFF, propose_theme returns error:\"confirmation_required\" until you surface the theme to the user and retry with confirm:true.\n" +
1964
+ "USE WHEN: the work doesn't fit any existing theme AND represents a distinct multi-year strategic direction. Most planning needs a capability under an existing theme, not a new theme.\n" +
1965
+ "PREREQUISITE: get_agents_md once this session (enforced). Theme discovery once this session, satisfied by suggest_theme_for (preferred returns ranked matches), list_themes, or get_roadmap_snapshot (enforceddiscovery_missing with a fix field otherwise).\n" +
1966
+ "ANTI-PATTERN: do not call to organize a quarter of work — that's a capability. Do not retry with force:true to bypass a too_similar block unless the overlap is a genuine false positive — that's the sprawl guard working.\n" +
1704
1967
  "EXAMPLE: propose_theme({ name: 'AI Agent Reliability', description: 'Multi-year bet on making agent workflows reproducible.', targetRoi: 20000000, idempotencyKey: 'session-1-theme-1' })\n\n" +
1705
1968
  "Requires write auth (set ROADMAPPER_API_KEY). targetRoi is RAW ANNUAL DOLLARS (e.g. 20000000 = $20M), not millions. Pass idempotencyKey so retries don't duplicate. Pass dryRun: true to validate without writing. Pass workspaceId to target a workspace other than the env default.",
1706
1969
  inputSchema: {
@@ -1710,6 +1973,16 @@ const TOOLS = [
1710
1973
  description: { type: "string" },
1711
1974
  color: { type: "string" },
1712
1975
  targetRoi: { type: "number", description: "Annual ROI target in raw dollars (e.g. 20000000 = $20M)." },
1976
+ force: {
1977
+ type: "boolean",
1978
+ description:
1979
+ "Override the too_similar sprawl block. Use ONLY when a flagged overlap with an existing theme is a genuine false positive and this is truly a distinct strategic pillar.",
1980
+ },
1981
+ confirm: {
1982
+ type: "boolean",
1983
+ description:
1984
+ "Set true to proceed when the workspace has agent theme-autonomy turned OFF — your attestation that the user explicitly approved this new theme. Ignored when autonomy is on (the default).",
1985
+ },
1713
1986
  idempotencyKey: { type: "string" },
1714
1987
  dryRun: { type: "boolean" },
1715
1988
  workspaceId: { type: "string" },
@@ -1945,6 +2218,31 @@ const TOOLS = [
1945
2218
  additionalProperties: false,
1946
2219
  },
1947
2220
  },
2221
+ {
2222
+ name: "detect_theme_sprawl",
2223
+ description:
2224
+ "Find pairs/clusters of EXISTING themes that overlap enough to be candidates for consolidation — the 'we have too many near-duplicate pillars' signal. The companion to agent_theme_autonomy: autonomy lets agents create themes freely, this is how you periodically detect and clean up the drift.\n\n" +
2225
+ "How it works: scores every active theme against every other by name+description token overlap, and reports pairs at or above the warn threshold (default 0.34). Each pair comes with the overlap score and a suggested action (merge via move_capabilities + archive_theme).\n" +
2226
+ "USE WHEN: quarterly review, or any time the theme list feels bloated. With autonomy on, run this occasionally to catch sibling themes that should be one.\n" +
2227
+ "PREREQUISITE: none — read-only. Enumerates every theme, so it satisfies the propose_theme discovery gate.\n" +
2228
+ "ANTI-PATTERN: don't auto-merge on a single weak overlap — a human owns theme structure. Tune threshold rather than acting on noise. Two themes CAN legitimately share vocabulary (e.g. 'Data Ingestion' vs 'Data Governance').\n" +
2229
+ "EXAMPLE: detect_theme_sprawl({ threshold: 0.34 })",
2230
+ inputSchema: {
2231
+ type: "object",
2232
+ properties: {
2233
+ threshold: {
2234
+ type: "number",
2235
+ minimum: 0,
2236
+ maximum: 1,
2237
+ description:
2238
+ "Min name+description Jaccard overlap between two themes to flag as a consolidation candidate. Default 0.34. Raise to surface only the most blatant duplicates.",
2239
+ },
2240
+ includeArchived: { type: "boolean" },
2241
+ workspaceId: { type: "string" },
2242
+ },
2243
+ additionalProperties: false,
2244
+ },
2245
+ },
1948
2246
  ];
1949
2247
 
1950
2248
  /**
@@ -2263,6 +2561,7 @@ function updateLifecycleTools() {
2263
2561
  /** Tools that mutate the workspace — all gated on rubric fetch. */
2264
2562
  const MUTATOR_TOOLS = new Set([
2265
2563
  "propose_task",
2564
+ "propose_tasks",
2266
2565
  "propose_theme",
2267
2566
  "propose_capability",
2268
2567
  "submit_acceptance_grades",
@@ -2296,21 +2595,111 @@ async function callTool(name, args) {
2296
2595
  if (name === "get_active_workspace") {
2297
2596
  const { id, source } = resolveWorkspaceWithSource(args?.workspaceId);
2298
2597
  const { url } = supabaseConfig();
2299
- let note;
2300
- if (source === "env") {
2301
- note =
2302
- "Resolved from the MCP install's env default — NOT from the current directory. If you meant a specific repo's workspace, launch from that checkout (connected repos carry .roadmapper/snapshot.json) or pass workspaceId explicitly.";
2303
- } else if (source === "none") {
2304
- note =
2305
- "No workspace resolved. Set ROADMAPPER_WORKSPACE_ID in env, run from a connected repo checkout, or pass workspaceId on the call.";
2598
+ const mode = writeMode(); // "broker" | "operator" | "read-only"
2599
+ const candidates = rootWorkspaceCandidates();
2600
+ const ambiguous = source === "repo" && candidates.length > 1;
2601
+
2602
+ // Map resolution into ONE explicit status. Every state has a defined
2603
+ // outcome and a `next` action the agent can act on directly — no prose
2604
+ // the LLM has to interpret, no state where the answer is "I silently
2605
+ // guessed". This is the contract a caller checks before its first write:
2606
+ // act only on "resolved"; anything else carries the steps to fix it.
2607
+ // resolved — arg / repo map / snapshot pinned it; safe to proceed
2608
+ // ambiguous — several open repos map to different workspaces
2609
+ // env_default — fell through to the install default (the #1 footgun)
2610
+ // unresolved — nothing named a workspace at all
2611
+ let status;
2612
+ if (ambiguous) status = "ambiguous";
2613
+ else if (source === "arg" || source === "repo" || source === "snapshot")
2614
+ status = "resolved";
2615
+ else if (source === "env") status = "env_default";
2616
+ else status = "unresolved";
2617
+
2618
+ // `next`: the single recommended action, machine-shaped so the agent
2619
+ // (or its harness) can turn it into a one-tap prompt. null when nothing
2620
+ // is needed. `prompt` is phrased for a human; `candidates` (when set) is
2621
+ // the pre-resolved pick list so the agent never has to ask open-ended.
2622
+ let next = null;
2623
+ if (status === "ambiguous") {
2624
+ next = {
2625
+ action: "pass_workspace_id",
2626
+ prompt:
2627
+ "Several open repos map to different workspaces. Which one do you mean?",
2628
+ candidates,
2629
+ detail:
2630
+ "Resolution picked the first match. Pass workspaceId explicitly on the call to target a specific workspace.",
2631
+ };
2632
+ } else if (status === "env_default") {
2633
+ next = {
2634
+ action: "confirm_or_relocate",
2635
+ prompt: `Act on the install default workspace "${id}"?`,
2636
+ detail:
2637
+ "Resolved from the MCP install's env default — NOT the current directory. If that's correct, proceed. Otherwise launch from the connected repo checkout (connected repos resolve via git origin → repo_workspace_map, or carry .roadmapper/snapshot.json) or pass workspaceId explicitly.",
2638
+ // If the client shared a git root and writes are enabled, the
2639
+ // agent can persist this repo → workspace in one tap so future
2640
+ // sessions resolve silently (link_repo derives the slug itself).
2641
+ ...(_clientRoots.length > 0 && mode !== "read-only"
2642
+ ? {
2643
+ onChoice: { tool: "link_repo", args: {} },
2644
+ onChoicePrompt: `Link this repo to "${id}" so it resolves automatically next time?`,
2645
+ }
2646
+ : {}),
2647
+ };
2648
+ } else if (status === "unresolved") {
2649
+ next = {
2650
+ action: "configure_workspace",
2651
+ prompt: "No workspace is resolvable. How should I target one?",
2652
+ detail:
2653
+ "Set ROADMAPPER_WORKSPACE_ID in env, run from a connected repo checkout, or pass workspaceId on the call.",
2654
+ ...(_clientRoots.length > 0 && mode !== "read-only"
2655
+ ? {
2656
+ onChoice: { tool: "link_repo", args: {} },
2657
+ onChoicePrompt:
2658
+ "Link the repo you're in to your workspace so it resolves automatically next time?",
2659
+ }
2660
+ : {}),
2661
+ };
2662
+ }
2663
+
2664
+ // Auth/write gate, reported separately from workspace resolution: a
2665
+ // read-only install can resolve a workspace fine but still can't write.
2666
+ // Surfacing it here lets the agent prompt for credentials BEFORE
2667
+ // attempting a mutator, rather than after it's refused.
2668
+ const writesEnabled = mode !== "read-only";
2669
+ if (!writesEnabled) {
2670
+ next = next ?? {
2671
+ action: "set_credentials",
2672
+ prompt: "Writes are disabled. Connect credentials to enable them?",
2673
+ detail:
2674
+ "Set ROADMAPPER_API_KEY (rmpr_ token from dashboard → Settings → MCP activity) to enable writes through the broker. The key pins exactly one workspace.",
2675
+ };
2306
2676
  }
2677
+
2678
+ // Back-compat prose for older callers that read `note`.
2679
+ const note =
2680
+ next && (status === "env_default" || status === "unresolved")
2681
+ ? next.detail
2682
+ : undefined;
2683
+
2307
2684
  return textResult(
2308
2685
  JSON.stringify(
2309
2686
  {
2310
2687
  workspaceId: id,
2311
- resolvedFrom: source, // "arg" | "snapshot" | "env" | "none"
2312
- writeMode: writeMode(), // "broker" | "operator" | "read-only"
2688
+ // "arg" | "repo" | "snapshot" | "env" | "none"
2689
+ resolvedFrom: source,
2690
+ status, // "resolved" | "ambiguous" | "env_default" | "unresolved"
2691
+ writeMode: mode, // "broker" | "operator" | "read-only"
2692
+ writesEnabled,
2313
2693
  backendConfigured: Boolean(url),
2694
+ // Only report the repo that actually resolved the workspace —
2695
+ // _rootWorkspaceRepo can hold a stale root match when an arg /
2696
+ // snapshot / env value is what won.
2697
+ repo: source === "repo" ? _rootWorkspaceRepo || null : null,
2698
+ // Candidates only make sense when resolution was ambiguous;
2699
+ // emitting them on a cleanly-resolved response reads as a
2700
+ // contradiction to the agent consuming the envelope.
2701
+ ...(status === "ambiguous" ? { candidates } : {}),
2702
+ next, // recommended action (or null), pre-shaped for a prompt
2314
2703
  ...(note ? { note } : {}),
2315
2704
  },
2316
2705
  null,
@@ -2319,6 +2708,97 @@ async function callTool(name, args) {
2319
2708
  );
2320
2709
  }
2321
2710
 
2711
+ // link_repo persists "this repo → my key's workspace" so future
2712
+ // sessions resolve silently. Plumbing, not a roadmap write — exempt
2713
+ // from the rubric/discovery gates (it's not in MUTATOR_TOOLS), and it
2714
+ // returns before the projection read since it touches no roadmap data.
2715
+ // The repo slug is derived server-side from the client's roots; the
2716
+ // agent supplies nothing.
2717
+ if (name === "link_repo") {
2718
+ if (writeMode() === "read-only") {
2719
+ return textResult(
2720
+ JSON.stringify({
2721
+ status: "no_auth",
2722
+ error:
2723
+ "Writes are disabled. Set ROADMAPPER_API_KEY (rmpr_ token from dashboard → Settings → MCP activity) to enable linking.",
2724
+ })
2725
+ );
2726
+ }
2727
+ // Find the first open root whose git origin resolves to a slug.
2728
+ let slug = null;
2729
+ for (const dir of _clientRoots) {
2730
+ slug = await repoSlugForDir(dir);
2731
+ if (slug) break;
2732
+ }
2733
+ if (!slug) {
2734
+ return textResult(
2735
+ JSON.stringify({
2736
+ status: "no_repo",
2737
+ error:
2738
+ "Not in a git repo with an 'origin' remote (no resolvable owner/name slug from the client's roots). Open the repo you want to link as a workspace folder and retry.",
2739
+ })
2740
+ );
2741
+ }
2742
+ try {
2743
+ // Workspace-id handling differs by path — link_repo is the one tool
2744
+ // where it MUST:
2745
+ // • Broker (customer rmpr_) path: do NOT send p_workspace_id. The
2746
+ // broker's cross-workspace guard 403s when the body's
2747
+ // p_workspace_id != the key's validated workspace — and link_repo
2748
+ // is invoked exactly when resolution is env_default/unresolved,
2749
+ // i.e. when the MCP's resolved wsId is a guess that usually does
2750
+ // NOT match the key. Omitting it lets the broker inject the
2751
+ // validated workspace (the key pins it — that's the whole point).
2752
+ // • Operator path: no broker to inject, so pass the resolved wsId
2753
+ // to satisfy rpcCall's required-field guard + target the right
2754
+ // workspace. rpcCall throws (→ "error" result) if it's unresolved.
2755
+ const body =
2756
+ writeMode() === "broker"
2757
+ ? { p_repo: slug }
2758
+ : { p_workspace_id: wsId, p_repo: slug };
2759
+ const res = await rpcCall("link_repo", body);
2760
+ const result = Array.isArray(res) ? res[0] : res;
2761
+ if (result?.status === "linked") {
2762
+ // Force the next resolution to re-read repo_workspace_map so the
2763
+ // freshly-linked mapping wins immediately (no stale cache).
2764
+ _rootWorkspace = undefined;
2765
+ _rootWorkspaceMatches = [];
2766
+ _rootWorkspaceRepo = null;
2767
+ return textResult(
2768
+ JSON.stringify({
2769
+ status: "linked",
2770
+ repo: slug,
2771
+ workspaceId: result.workspace_id ?? wsId,
2772
+ detail:
2773
+ "Mapping saved. This repo now resolves to your workspace on future calls.",
2774
+ })
2775
+ );
2776
+ }
2777
+ if (result?.status === "conflict") {
2778
+ return textResult(
2779
+ JSON.stringify({
2780
+ status: "conflict",
2781
+ repo: slug,
2782
+ existingWorkspace: result.existing_workspace ?? null,
2783
+ detail:
2784
+ "This repo is already linked to a different workspace. It was not changed. Pass workspaceId explicitly if you need to act on a specific workspace.",
2785
+ })
2786
+ );
2787
+ }
2788
+ return textResult(
2789
+ JSON.stringify({ status: "unknown", repo: slug, raw: result ?? null })
2790
+ );
2791
+ } catch (e) {
2792
+ return textResult(
2793
+ JSON.stringify({
2794
+ status: "error",
2795
+ repo: slug,
2796
+ error: e instanceof Error ? e.message : String(e),
2797
+ })
2798
+ );
2799
+ }
2800
+ }
2801
+
2322
2802
  // Post-Piece-6c, the entity tables ARE the canonical projection
2323
2803
  // — no edits blob, no seed-overlay merge. Fall back to the
2324
2804
  // bundled seed only when the DB is unreachable (offline / dev).
@@ -2364,6 +2844,27 @@ async function callTool(name, args) {
2364
2844
  );
2365
2845
  return rubricMissingResult(name);
2366
2846
  }
2847
+ // Repo-link gate. If the agent is in a git repo that isn't mapped to a
2848
+ // workspace, this write would silently land on the install's env
2849
+ // default. Block once with the link_repo fix (or the workspaceId escape
2850
+ // hatch) so the mapping gets done instead of writes scattering onto the
2851
+ // wrong workspace. Repo-aware so a multi-repo chat is never bricked —
2852
+ // see repoLinkGate / repoUnmappedResult for the full escape-hatch list.
2853
+ {
2854
+ const { source: wsSource } = resolveWorkspaceWithSource(
2855
+ args?.workspaceId
2856
+ );
2857
+ const linkBlock = await repoLinkGate(name, args, wsSource, wsId);
2858
+ if (linkBlock) {
2859
+ session.mutatorBlocks += 1;
2860
+ recordTelemetry(
2861
+ "mutator_blocked_repo_unmapped",
2862
+ { tool: name, targetId },
2863
+ wsId
2864
+ );
2865
+ return linkBlock;
2866
+ }
2867
+ }
2367
2868
  // Per-tool discovery gates. Block propose_theme until the agent
2368
2869
  // has actually inspected the existing theme catalogue, and
2369
2870
  // propose_capability until they've ranked existing caps for fit.
@@ -2581,6 +3082,8 @@ async function callTool(name, args) {
2581
3082
  }
2582
3083
  case "propose_task":
2583
3084
  return proposeTask(args, projected, wsId);
3085
+ case "propose_tasks":
3086
+ return proposeTasks(args, projected, wsId);
2584
3087
  case "propose_theme":
2585
3088
  return proposeTheme(args, projected, wsId);
2586
3089
  case "propose_capability":
@@ -2644,6 +3147,11 @@ async function callTool(name, args) {
2644
3147
  // propose_capability gate (the natural next step on a gap).
2645
3148
  session.capsDiscoveredAt = Date.now();
2646
3149
  return detectCapabilityGaps(args, projected);
3150
+ case "detect_theme_sprawl":
3151
+ // Enumerates every active theme, so it satisfies the propose_theme
3152
+ // discovery gate (consolidating or proposing is the natural next step).
3153
+ session.themesListedAt = Date.now();
3154
+ return detectThemeSprawl(args, projected);
2647
3155
  default:
2648
3156
  return errorResult(`Unknown tool: ${name}`);
2649
3157
  }
@@ -2832,16 +3340,235 @@ async function proposeTask(args, projected, wsId) {
2832
3340
  );
2833
3341
  }
2834
3342
 
2835
- async function proposeTheme(args, _projected /* unused — themes carry no parent */, wsId) {
3343
+ /**
3344
+ * Shared field validation for a single task spec (used by the bulk
3345
+ * propose_tasks path). Returns an error string or null. Mirrors the
3346
+ * inline checks in proposeTask so both paths reject identically.
3347
+ */
3348
+ function taskSpecError(t) {
3349
+ const titleErr = validateName(t.title, 5);
3350
+ if (titleErr) return titleErr;
3351
+ if (!t.effort)
3352
+ return "effort is required (one of XS, S, M, L, XL) on every task in the batch.";
3353
+ if (!VALID_EFFORTS.has(t.effort)) return `Invalid effort ${t.effort}.`;
3354
+ if (t.priority && !VALID_PRIORITIES.has(t.priority))
3355
+ return `Invalid priority ${t.priority}.`;
3356
+ if (t.kind && !VALID_KINDS.has(t.kind)) return `Invalid kind ${t.kind}.`;
3357
+ if (t.expectedPRs !== undefined && (typeof t.expectedPRs !== "number" || t.expectedPRs <= 0))
3358
+ return `expectedPRs must be a positive number, got ${t.expectedPRs}.`;
3359
+ if (t.expectedScope !== undefined && (typeof t.expectedScope !== "number" || t.expectedScope <= 0))
3360
+ return `expectedScope must be a positive number, got ${t.expectedScope}.`;
3361
+ return null;
3362
+ }
3363
+
3364
+ /** Build a task record from a spec + its pre-minted id. Mirrors the
3365
+ * object proposeTask constructs (minus the per-call skip warning). */
3366
+ function buildTaskRecord(t, cap, id) {
3367
+ const start = todayISO();
3368
+ const target = addDays(start, Math.max(1, Math.ceil(EFFORT_DAYS[t.effort])));
3369
+ return {
3370
+ id,
3371
+ capabilityId: cap.id,
3372
+ title: cleanText(t.title),
3373
+ summary: cleanText(t.summary),
3374
+ status: "planned",
3375
+ priority: t.priority ?? "P2",
3376
+ effort: t.effort,
3377
+ kind: t.kind ?? "feature",
3378
+ start,
3379
+ target,
3380
+ originalTarget: target,
3381
+ progress: 0,
3382
+ owner: t.owner?.trim() ?? "",
3383
+ team: cap.team ?? "",
3384
+ tags: [],
3385
+ prs: [],
3386
+ links: {},
3387
+ acceptance: t.acceptance ?? [],
3388
+ dependsOn: t.dependsOn ?? [],
3389
+ authorKind: "agent",
3390
+ ...(t.expectedPRs !== undefined ? { expectedPRs: t.expectedPRs } : {}),
3391
+ ...(t.expectedScope !== undefined ? { expectedScope: t.expectedScope } : {}),
3392
+ };
3393
+ }
3394
+
3395
+ /**
3396
+ * propose_tasks — file MANY tasks under one capability in a single
3397
+ * call. This is the token-efficient path: instead of N round trips
3398
+ * (each with its own tool-call framing + result), the agent sends the
3399
+ * whole batch once and gets back one compact array of {id, title}.
3400
+ *
3401
+ * Intra-batch dependencies: a task may carry a `ref` (a caller-chosen
3402
+ * alias) and other tasks may list that ref in `dependsOn`. We mint all
3403
+ * ids first, then rewrite any dependsOn entry that matches a sibling's
3404
+ * ref to the real TK id. dependsOn entries that aren't a known ref pass
3405
+ * through unchanged (assumed to be existing TK ids).
3406
+ *
3407
+ * Per-item failures don't sink the batch: each result row carries ok
3408
+ * or error, mirroring move_tasks. Validation errors are reported
3409
+ * per-row WITHOUT writing that row; valid rows still get created.
3410
+ */
3411
+ async function proposeTasks(args, projected, wsId) {
3412
+ const cap = projected.capabilities.find((c) => c.id === args.capabilityId);
3413
+ if (!cap) return errorResult(`Capability ${args.capabilityId} not found.`);
3414
+ const specs = Array.isArray(args.tasks) ? args.tasks : null;
3415
+ if (!specs || specs.length === 0)
3416
+ return errorResult("tasks must be a non-empty array of task specs.");
3417
+ if (specs.length > 100)
3418
+ return errorResult(`Too many tasks (${specs.length}); cap is 100 per call.`);
3419
+
3420
+ // Mint ids up front so intra-batch dependsOn refs can resolve.
3421
+ const minted = specs.map((t) => ({ spec: t, id: randomTaskId() }));
3422
+ const refToId = new Map();
3423
+ for (const m of minted) {
3424
+ if (typeof m.spec.ref === "string" && m.spec.ref.trim())
3425
+ refToId.set(m.spec.ref.trim(), m.id);
3426
+ }
3427
+ const resolveDeps = (deps) =>
3428
+ Array.isArray(deps) ? deps.map((d) => refToId.get(d) ?? d) : [];
3429
+
3430
+ // Validate everything first; a structural error in any row fails the
3431
+ // whole call (cheaper to fix the batch than to half-apply it). RPC
3432
+ // errors below are the per-row, partial-success case.
3433
+ for (let i = 0; i < minted.length; i++) {
3434
+ const err = taskSpecError(minted[i].spec);
3435
+ if (err) return errorResult(`tasks[${i}]: ${err}`);
3436
+ }
3437
+
3438
+ if (args.dryRun) {
3439
+ return textResult(
3440
+ JSON.stringify({
3441
+ ok: true,
3442
+ dryRun: true,
3443
+ capabilityId: cap.id,
3444
+ wouldCreate: minted.map(({ spec, id }) => ({
3445
+ id,
3446
+ title: cleanText(spec.title),
3447
+ effort: spec.effort,
3448
+ })),
3449
+ message: `Would create ${minted.length} task(s) under ${cap.id} (${cap.name}). No records written.`,
3450
+ }),
3451
+ );
3452
+ }
3453
+
3454
+ const results = [];
3455
+ let created = 0;
3456
+ for (const { spec, id } of minted) {
3457
+ const record = buildTaskRecord(
3458
+ { ...spec, dependsOn: resolveDeps(spec.dependsOn) },
3459
+ cap,
3460
+ id
3461
+ );
3462
+ try {
3463
+ const rpcResult = await rpcCall("propose_task", {
3464
+ p_workspace_id: wsId,
3465
+ p_task: record,
3466
+ p_idempotency_key: spec.idempotencyKey ?? null,
3467
+ });
3468
+ const stored = rpcResult?.task ?? record;
3469
+ const idempotent = rpcResult?.idempotent === true;
3470
+ if (!idempotent) created += 1;
3471
+ results.push({ ok: true, id: stored.id, title: record.title, idempotent });
3472
+ } catch (e) {
3473
+ results.push({ ok: false, title: record.title, error: e.message });
3474
+ }
3475
+ }
3476
+
3477
+ const failed = results.filter((r) => !r.ok).length;
3478
+ return textResult(
3479
+ JSON.stringify({
3480
+ ok: failed === 0,
3481
+ capabilityId: cap.id,
3482
+ created,
3483
+ idempotent: results.filter((r) => r.ok && r.idempotent).length,
3484
+ failed,
3485
+ tasks: results,
3486
+ message:
3487
+ `Filed ${created} new task(s) under ${cap.id} (${cap.name})` +
3488
+ (failed ? `; ${failed} failed (see tasks[].error).` : "."),
3489
+ })
3490
+ );
3491
+ }
3492
+
3493
+ async function proposeTheme(args, projected, wsId) {
2836
3494
  const nameErr = validateName(args.name, 6);
2837
3495
  if (nameErr) return errorResult(nameErr);
2838
3496
 
2839
3497
  const name = cleanText(args.name);
3498
+ const description = cleanText(args.description);
3499
+
3500
+ // ── Sprawl control (always on, independent of autonomy) ──────────
3501
+ // Refuse a near-duplicate of an existing active theme. This is the
3502
+ // server-side replacement for the human gate: instead of asking a
3503
+ // person every time, we only stop the agent when it's about to mint
3504
+ // a theme that overlaps one that already exists. Reuse/update beats
3505
+ // a sibling. force:true is the deliberate override.
3506
+ const activeThemes = (projected?.themes ?? []).filter((t) => !t.archived);
3507
+ const proposedTokens = tokenize(`${name} ${description ?? ""}`);
3508
+ let nearest = null;
3509
+ let nearestScore = 0;
3510
+ for (const t of activeThemes) {
3511
+ const s = jaccardScore(proposedTokens, tokenize(`${t.name} ${t.description ?? ""}`));
3512
+ if (s > nearestScore) {
3513
+ nearestScore = s;
3514
+ nearest = t;
3515
+ }
3516
+ }
3517
+ if (nearest && nearestScore >= THEME_SPRAWL_BLOCK && args.force !== true) {
3518
+ return textResult(
3519
+ JSON.stringify(
3520
+ {
3521
+ error: "too_similar",
3522
+ message:
3523
+ `"${name}" overlaps the existing theme ${nearest.id} (${nearest.name}) ` +
3524
+ `at ${nearestScore.toFixed(2)} (block bar ${THEME_SPRAWL_BLOCK}). Themes are the ` +
3525
+ "small, years-stable top tier — a near-duplicate fragments the strategic view. " +
3526
+ "Reuse it: file your work as a capability under it (propose_capability with " +
3527
+ `pillarId: "${nearest.id}"), or broaden its scope with update_theme. If this is ` +
3528
+ "genuinely a distinct strategic pillar, retry with force:true.",
3529
+ nearestTheme: { id: nearest.id, name: nearest.name, score: Number(nearestScore.toFixed(3)) },
3530
+ fix: `propose_capability({ pillarId: "${nearest.id}", ... })`,
3531
+ },
3532
+ null,
3533
+ 2
3534
+ ),
3535
+ { isError: true }
3536
+ );
3537
+ }
3538
+
3539
+ // ── Autonomy gate ────────────────────────────────────────────────
3540
+ // Default ON: agents create themes without confirmation. A workspace
3541
+ // that flips agent_theme_autonomy OFF re-imposes a human checkpoint —
3542
+ // propose_theme then refuses until the caller passes confirm:true
3543
+ // (the agent's signal that it surfaced the new theme to the user and
3544
+ // got an explicit yes). The sprawl block above still applies either way.
3545
+ const autonomy = projected?.settings?.agentThemeAutonomy !== false;
3546
+ if (!autonomy && args.confirm !== true && !args.dryRun) {
3547
+ return textResult(
3548
+ JSON.stringify(
3549
+ {
3550
+ error: "confirmation_required",
3551
+ message:
3552
+ `This workspace has agent theme-autonomy turned OFF, so a new theme ("${name}") ` +
3553
+ "needs explicit human sign-off. Surface the proposed theme to the user; if they " +
3554
+ "approve, retry with confirm:true. Otherwise file the work under an existing theme.",
3555
+ ...(nearest
3556
+ ? { closestExisting: { id: nearest.id, name: nearest.name, score: Number(nearestScore.toFixed(3)) } }
3557
+ : {}),
3558
+ fix: "propose_theme({ ...same args, confirm: true })",
3559
+ },
3560
+ null,
3561
+ 2
3562
+ ),
3563
+ { isError: true }
3564
+ );
3565
+ }
3566
+
2840
3567
  const id = randomThemeId();
2841
3568
  const theme = {
2842
3569
  id,
2843
3570
  name,
2844
- description: cleanText(args.description),
3571
+ description,
2845
3572
  color: args.color || "#6366f1", // brand-indigo default; user can change
2846
3573
  ...(typeof args.targetRoi === "number" ? { targetRoi: args.targetRoi } : {}),
2847
3574
  };
@@ -3206,18 +3933,25 @@ function suggestThemeFor(args, projected) {
3206
3933
  score: Number(score.toFixed(3)),
3207
3934
  }));
3208
3935
 
3209
- // Reminder when nothing matches strongly — theme creation is the
3210
- // years-stable decision, so even a weak match deserves a pause.
3936
+ // Autonomy-aware guidance. With agent_theme_autonomy ON (default),
3937
+ // the agent may create a theme on a weak/no match WITHOUT asking
3938
+ // the server's too_similar block in propose_theme is the sprawl
3939
+ // guard, not a human checkpoint. With it OFF, fall back to the old
3940
+ // "confirm with the user first" framing.
3941
+ const autonomy = projected?.settings?.agentThemeAutonomy !== false;
3211
3942
  const topScore = ranked[0]?.score ?? 0;
3212
3943
  const meta =
3213
3944
  topScore < 0.4
3214
3945
  ? {
3215
3946
  _meta: {
3216
3947
  roadmapper: {
3217
- reminder:
3218
- ranked.length === 0
3219
- ? "No existing theme overlaps your description. Themes are years-stable, so creating a new one is a big decision verify with the user that this represents a genuinely new strategic direction, not a reframing of an existing bet, before calling propose_theme."
3220
- : "No strong match (top score < 0.4). Re-using a 'close-enough' theme is almost always the right move; ask the user before calling propose_theme.",
3948
+ reminder: autonomy
3949
+ ? ranked.length === 0
3950
+ ? "No existing theme overlaps. Theme-autonomy is ON, so you may call propose_theme directly if this is a genuinely new strategic pillar the server will refuse it only if it's a near-duplicate of an existing theme."
3951
+ : "No strong match (top score < 0.4). Prefer the closest existing theme if it fits; otherwise propose_theme is fine (autonomy is ON, sprawl is guarded server-side)."
3952
+ : ranked.length === 0
3953
+ ? "No existing theme overlaps. Theme-autonomy is OFF for this workspace — verify with the user that this is a genuinely new strategic direction before propose_theme, and pass confirm:true."
3954
+ : "No strong match (top score < 0.4). Re-using a 'close-enough' theme is almost always right; theme-autonomy is OFF, so confirm with the user before propose_theme.",
3221
3955
  },
3222
3956
  },
3223
3957
  }
@@ -3228,13 +3962,18 @@ function suggestThemeFor(args, projected) {
3228
3962
  {
3229
3963
  ok: true,
3230
3964
  query: desc,
3965
+ themeAutonomy: autonomy,
3231
3966
  matches: ranked,
3232
3967
  hint:
3233
3968
  ranked.length === 0
3234
- ? "No existing theme overlaps. propose_theme MAY be appropriate, but only with explicit user confirmation that a new strategic direction is intended — themes are years-stable, not per-feature."
3969
+ ? autonomy
3970
+ ? "No existing theme overlaps. propose_theme is appropriate if this is a distinct strategic pillar — autonomy is on; the server blocks only near-duplicates."
3971
+ : "No existing theme overlaps. propose_theme needs explicit user confirmation (autonomy off): pass confirm:true once the user approves."
3235
3972
  : ranked[0].score > 0.4
3236
3973
  ? `Strong match: ${ranked[0].id} (${ranked[0].name}). Attach capabilities under this theme instead of creating a new one.`
3237
- : `Weak overlap. The top match is often closer than it scores; prefer that over creating a new theme unless the user explicitly asks for a new strategic direction.`,
3974
+ : autonomy
3975
+ ? `Weak overlap. The top match is often closer than it scores — prefer it if it fits; otherwise propose_theme is fine (sprawl guarded server-side).`
3976
+ : `Weak overlap. Prefer the top match over a new theme unless the user explicitly asks for a new strategic direction (autonomy off).`,
3238
3977
  },
3239
3978
  null,
3240
3979
  2
@@ -3897,6 +4636,86 @@ function detectCapabilityGaps(args, projected) {
3897
4636
  );
3898
4637
  }
3899
4638
 
4639
+ /**
4640
+ * detect_theme_sprawl — the consolidation companion to
4641
+ * agent_theme_autonomy. Autonomy lets agents mint themes freely (with
4642
+ * the per-create too_similar block as a guard); over time, two themes
4643
+ * created from different sessions can still drift toward overlap. This
4644
+ * surfaces those pairs so a human can merge them.
4645
+ *
4646
+ * O(n^2) over active themes — fine; themes are the small top tier
4647
+ * (tens, not thousands). Deterministic: stable id sort, never random.
4648
+ */
4649
+ function detectThemeSprawl(args, projected) {
4650
+ const threshold =
4651
+ typeof args?.threshold === "number" && Number.isFinite(args.threshold)
4652
+ ? Math.min(1, Math.max(0, args.threshold))
4653
+ : THEME_SPRAWL_WARN;
4654
+ const includeArchived = args?.includeArchived === true;
4655
+
4656
+ const themes = (projected.themes ?? [])
4657
+ .filter((t) => includeArchived || !t.archived)
4658
+ .slice()
4659
+ .sort((a, b) => String(a.id).localeCompare(String(b.id)));
4660
+
4661
+ const capCountByTheme = new Map();
4662
+ for (const c of projected.capabilities ?? []) {
4663
+ if (c.archived) continue;
4664
+ capCountByTheme.set(c.pillarId, (capCountByTheme.get(c.pillarId) ?? 0) + 1);
4665
+ }
4666
+
4667
+ const toks = themes.map((t) => tokenize(`${t.name} ${t.description ?? ""}`));
4668
+ const pairs = [];
4669
+ for (let i = 0; i < themes.length; i++) {
4670
+ for (let j = i + 1; j < themes.length; j++) {
4671
+ const score = jaccardScore(toks[i], toks[j]);
4672
+ if (score < threshold) continue;
4673
+ // Suggest merging the lighter theme (fewer capabilities) INTO the
4674
+ // heavier one — the smaller bet is the cheaper thing to re-parent.
4675
+ const a = themes[i], b = themes[j];
4676
+ const aCaps = capCountByTheme.get(a.id) ?? 0;
4677
+ const bCaps = capCountByTheme.get(b.id) ?? 0;
4678
+ const [keep, fold] = aCaps >= bCaps ? [a, b] : [b, a];
4679
+ const foldCaps = keep === a ? bCaps : aCaps;
4680
+ pairs.push({
4681
+ score: Number(score.toFixed(3)),
4682
+ themes: [
4683
+ { id: a.id, name: a.name, capabilities: aCaps },
4684
+ { id: b.id, name: b.name, capabilities: bCaps },
4685
+ ],
4686
+ suggestion:
4687
+ foldCaps > 0
4688
+ ? `Likely duplicate. To consolidate: move_capabilities the ${foldCaps} capabilit${foldCaps === 1 ? "y" : "ies"} under ${fold.id} (${fold.name}) to ${keep.id} (${keep.name}), then archive_theme ${fold.id}.`
4689
+ : `Likely duplicate. ${fold.id} (${fold.name}) has no capabilities — archive_theme it and keep ${keep.id} (${keep.name}).`,
4690
+ });
4691
+ }
4692
+ }
4693
+ pairs.sort((x, y) => y.score - x.score);
4694
+
4695
+ const meta =
4696
+ pairs.length > 0
4697
+ ? {
4698
+ _meta: {
4699
+ roadmapper: {
4700
+ reminder:
4701
+ `${pairs.length} theme pair(s) overlap at/above ${threshold} — candidate duplicates. ` +
4702
+ "Themes are the years-stable top tier; consolidating keeps the strategic view legible. A human should confirm each merge.",
4703
+ },
4704
+ },
4705
+ }
4706
+ : undefined;
4707
+
4708
+ return textResult(
4709
+ JSON.stringify({
4710
+ themesScanned: themes.length,
4711
+ threshold,
4712
+ sprawlPairCount: pairs.length,
4713
+ pairs,
4714
+ }),
4715
+ meta
4716
+ );
4717
+ }
4718
+
3900
4719
  async function submitAcceptanceGrades(args, projected, wsId) {
3901
4720
  const task = projected.tasks.find((t) => t.id === args.taskId);
3902
4721
  if (!task) return errorResult(`Task ${args.taskId} not found.`);
@@ -4479,6 +5298,147 @@ async function runSelftest() {
4479
5298
  },
4480
5299
  pass: (r) => r?.themesListedAt !== null && r?.capsDiscoveredAt !== null,
4481
5300
  },
5301
+ {
5302
+ // Sprawl control: a theme that overlaps an existing one above the
5303
+ // block bar is refused with too_similar, naming the match — even
5304
+ // on dryRun (the guard runs before the write/preview).
5305
+ name: "propose_theme blocks a near-duplicate theme (too_similar)",
5306
+ fn: () =>
5307
+ proposeTheme(
5308
+ { name: "Data Intelligence Platform Core", dryRun: true },
5309
+ {
5310
+ themes: [
5311
+ { id: "TH-DUP", name: "Data Intelligence Platform", description: "" },
5312
+ ],
5313
+ settings: { agentThemeAutonomy: true },
5314
+ },
5315
+ "ws-test"
5316
+ ),
5317
+ pass: (r) => {
5318
+ const t = r?.content?.[0]?.text ?? "";
5319
+ return t.includes("too_similar") && t.includes("TH-DUP");
5320
+ },
5321
+ },
5322
+ {
5323
+ // A distinct theme passes the sprawl guard, and with autonomy ON
5324
+ // (default) sails through to the (dryRun) create — no confirmation.
5325
+ name: "propose_theme allows a distinct theme when autonomy is on",
5326
+ fn: () =>
5327
+ proposeTheme(
5328
+ { name: "Customer Onboarding Automation", dryRun: true },
5329
+ {
5330
+ themes: [
5331
+ { id: "TH-DUP", name: "Data Intelligence Platform", description: "" },
5332
+ ],
5333
+ settings: { agentThemeAutonomy: true },
5334
+ },
5335
+ "ws-test"
5336
+ ),
5337
+ pass: (r) => {
5338
+ const t = r?.content?.[0]?.text ?? "";
5339
+ return t.includes("\"ok\": true") && t.includes("wouldCreate") && !t.includes("too_similar");
5340
+ },
5341
+ },
5342
+ {
5343
+ // force:true overrides a too_similar block for the rare genuine
5344
+ // false positive.
5345
+ name: "propose_theme force:true overrides the sprawl block",
5346
+ fn: () =>
5347
+ proposeTheme(
5348
+ { name: "Data Intelligence Platform Core", force: true, dryRun: true },
5349
+ {
5350
+ themes: [
5351
+ { id: "TH-DUP", name: "Data Intelligence Platform", description: "" },
5352
+ ],
5353
+ settings: { agentThemeAutonomy: true },
5354
+ },
5355
+ "ws-test"
5356
+ ),
5357
+ pass: (r) => {
5358
+ const t = r?.content?.[0]?.text ?? "";
5359
+ return t.includes("wouldCreate") && !t.includes("too_similar");
5360
+ },
5361
+ },
5362
+ {
5363
+ // With autonomy OFF, a brand-new theme needs confirm:true — the
5364
+ // server returns confirmation_required until the human signs off.
5365
+ name: "propose_theme requires confirm when autonomy is off",
5366
+ fn: () =>
5367
+ proposeTheme(
5368
+ { name: "Brand New Distinct Strategic Pillar" },
5369
+ { themes: [], settings: { agentThemeAutonomy: false } },
5370
+ "ws-test"
5371
+ ),
5372
+ pass: (r) => {
5373
+ const t = r?.content?.[0]?.text ?? "";
5374
+ return t.includes("confirmation_required") && t.includes("confirm");
5375
+ },
5376
+ },
5377
+ {
5378
+ // detect_theme_sprawl surfaces overlapping existing themes as
5379
+ // consolidation candidates.
5380
+ name: "detect_theme_sprawl flags overlapping themes",
5381
+ fn: () =>
5382
+ detectThemeSprawl(
5383
+ {},
5384
+ {
5385
+ themes: [
5386
+ { id: "TH-A", name: "Data Intelligence", description: "" },
5387
+ { id: "TH-B", name: "Data Intelligence Platform", description: "" },
5388
+ ],
5389
+ capabilities: [],
5390
+ }
5391
+ ),
5392
+ pass: (r) => {
5393
+ const t = r?.content?.[0]?.text ?? "";
5394
+ return t.includes("\"sprawlPairCount\": 1") || (t.includes("TH-A") && t.includes("TH-B"));
5395
+ },
5396
+ },
5397
+ {
5398
+ // propose_tasks bulk: dryRun previews all rows + mints an id each.
5399
+ name: "propose_tasks bulk previews the whole batch (dryRun)",
5400
+ fn: () =>
5401
+ proposeTasks(
5402
+ {
5403
+ capabilityId: "CAP-1",
5404
+ dryRun: true,
5405
+ tasks: [
5406
+ { ref: "a", title: "First bulk task here", effort: "M" },
5407
+ { title: "Second bulk task here", effort: "S", dependsOn: ["a"] },
5408
+ ],
5409
+ },
5410
+ { capabilities: [{ id: "CAP-1", name: "Test Cap" }], themes: [], tasks: [] },
5411
+ "ws-test"
5412
+ ),
5413
+ pass: (r) => {
5414
+ const t = r?.content?.[0]?.text ?? "";
5415
+ try {
5416
+ const j = JSON.parse(t);
5417
+ return j.dryRun === true && Array.isArray(j.wouldCreate) && j.wouldCreate.length === 2;
5418
+ } catch {
5419
+ return false;
5420
+ }
5421
+ },
5422
+ },
5423
+ {
5424
+ // propose_tasks rejects the whole batch on a per-row validation
5425
+ // error (missing effort), naming the offending index.
5426
+ name: "propose_tasks rejects a batch with a missing-effort row",
5427
+ fn: () =>
5428
+ proposeTasks(
5429
+ {
5430
+ capabilityId: "CAP-1",
5431
+ tasks: [{ title: "No effort on this task" }],
5432
+ },
5433
+ { capabilities: [{ id: "CAP-1", name: "Test Cap" }], themes: [], tasks: [] },
5434
+ "ws-test"
5435
+ ),
5436
+ pass: (r) => {
5437
+ if (!r?.isError) return false;
5438
+ const t = r?.content?.[0]?.text ?? "";
5439
+ return t.includes("tasks[0]") && t.includes("effort");
5440
+ },
5441
+ },
4482
5442
  {
4483
5443
  name: "resources/list returns the three resources",
4484
5444
  fn: () => handle({ id: 12, method: "resources/list", params: {} }),
@@ -4543,8 +5503,16 @@ async function runSelftest() {
4543
5503
  const out = JSON.parse(r?.result?.content?.[0]?.text ?? "{}");
4544
5504
  return (
4545
5505
  typeof out.resolvedFrom === "string" &&
4546
- ["arg", "snapshot", "env", "none"].includes(out.resolvedFrom) &&
4547
- ["broker", "operator", "read-only"].includes(out.writeMode)
5506
+ ["arg", "repo", "snapshot", "env", "none"].includes(
5507
+ out.resolvedFrom
5508
+ ) &&
5509
+ ["broker", "operator", "read-only"].includes(out.writeMode) &&
5510
+ ["resolved", "ambiguous", "env_default", "unresolved"].includes(
5511
+ out.status
5512
+ ) &&
5513
+ // `next` is either null (resolved) or a shaped action object.
5514
+ (out.next === null ||
5515
+ (out.next && typeof out.next.action === "string"))
4548
5516
  );
4549
5517
  } catch {
4550
5518
  return false;
@@ -5942,6 +6910,42 @@ async function runSelftest() {
5942
6910
  pass: (r) =>
5943
6911
  r?.result?.id === "ws-from-snapshot" && r?.result?.source === "snapshot",
5944
6912
  },
6913
+ {
6914
+ // Two open repos mapping to DIFFERENT workspaces must not resolve
6915
+ // silently to the first — get_active_workspace reports status
6916
+ // "ambiguous" and hands back the candidate list so the agent can
6917
+ // prompt for an explicit workspaceId instead of guessing.
6918
+ name: "get_active_workspace surfaces ambiguous multi-repo resolution",
6919
+ fn: () => {
6920
+ try {
6921
+ __setRootWorkspaceForTest("ws-a", "owner/repo-a", [
6922
+ { ws: "ws-a", slug: "owner/repo-a" },
6923
+ { ws: "ws-b", slug: "owner/repo-b" },
6924
+ ]);
6925
+ return handle({
6926
+ id: 23,
6927
+ method: "tools/call",
6928
+ params: { name: "get_active_workspace", arguments: {} },
6929
+ });
6930
+ } finally {
6931
+ __setRootWorkspaceForTest(undefined);
6932
+ }
6933
+ },
6934
+ pass: (r) => {
6935
+ try {
6936
+ const out = JSON.parse(r?.result?.content?.[0]?.text ?? "{}");
6937
+ return (
6938
+ out.status === "ambiguous" &&
6939
+ out.next?.action === "pass_workspace_id" &&
6940
+ Array.isArray(out.candidates) &&
6941
+ out.candidates.length === 2 &&
6942
+ out.candidates.some((c) => c.workspaceId === "ws-b")
6943
+ );
6944
+ } catch {
6945
+ return false;
6946
+ }
6947
+ },
6948
+ },
5945
6949
  {
5946
6950
  // setClientRoots parses both file:// URIs and bare paths and
5947
6951
  // invalidates the cached resolution.
@@ -5958,6 +6962,304 @@ async function runSelftest() {
5958
6962
  // returns null until resolveRootWorkspace() runs. So "cleared" must be null.
5959
6963
  pass: (r) => r?.result?.cleared === null,
5960
6964
  },
6965
+ {
6966
+ // link_repo with no resolvable repo slug returns an actionable
6967
+ // no_repo error rather than calling the broker.
6968
+ name: "link_repo returns no_repo when not in a git repo",
6969
+ fn: async () => {
6970
+ const savedKey = process.env.ROADMAPPER_API_KEY;
6971
+ const savedUrl = process.env.ROADMAPPER_BACKEND_URL;
6972
+ try {
6973
+ process.env.ROADMAPPER_API_KEY = "rmpr_selftest";
6974
+ process.env.ROADMAPPER_BACKEND_URL = "https://selftest.local";
6975
+ _clientRoots = ["/tmp/x"];
6976
+ __setRepoSlugForTest(null); // no slug resolves
6977
+ return await handle({
6978
+ id: 91,
6979
+ method: "tools/call",
6980
+ params: { name: "link_repo", arguments: {} },
6981
+ });
6982
+ } finally {
6983
+ __setRepoSlugForTest(undefined);
6984
+ _clientRoots = [];
6985
+ if (savedKey === undefined) delete process.env.ROADMAPPER_API_KEY;
6986
+ else process.env.ROADMAPPER_API_KEY = savedKey;
6987
+ if (savedUrl === undefined) delete process.env.ROADMAPPER_BACKEND_URL;
6988
+ else process.env.ROADMAPPER_BACKEND_URL = savedUrl;
6989
+ }
6990
+ },
6991
+ pass: (r) => {
6992
+ try {
6993
+ return JSON.parse(r?.result?.content?.[0]?.text ?? "{}").status === "no_repo";
6994
+ } catch {
6995
+ return false;
6996
+ }
6997
+ },
6998
+ },
6999
+ {
7000
+ // link_repo with a resolvable slug POSTs { p_repo } through the
7001
+ // broker and, on {status:"linked"}, invalidates the resolution
7002
+ // cache. Stub fetch to assert the body shape + the linked result.
7003
+ name: "link_repo posts slug to broker and reports linked",
7004
+ fn: async () => {
7005
+ const savedFetch = globalThis.fetch;
7006
+ const savedKey = process.env.ROADMAPPER_API_KEY;
7007
+ const savedUrl = process.env.ROADMAPPER_BACKEND_URL;
7008
+ let sentBody = null;
7009
+ try {
7010
+ process.env.ROADMAPPER_API_KEY = "rmpr_selftest";
7011
+ process.env.ROADMAPPER_BACKEND_URL = "https://selftest.local";
7012
+ _clientRoots = ["/tmp/proj"];
7013
+ __setRepoSlugForTest("acme/widget");
7014
+ __setRootWorkspaceForTest("stale-ws"); // must be invalidated on link
7015
+ globalThis.fetch = async (_u, opts) => {
7016
+ sentBody = JSON.parse(opts.body);
7017
+ return {
7018
+ ok: true,
7019
+ json: async () => ({ status: "linked", workspace_id: "ws-1", repo: "acme/widget" }),
7020
+ text: async () => "",
7021
+ };
7022
+ };
7023
+ const r = await handle({
7024
+ id: 92,
7025
+ method: "tools/call",
7026
+ params: { name: "link_repo", arguments: {} },
7027
+ });
7028
+ return { r, sentBody, cacheCleared: _rootWorkspace };
7029
+ } finally {
7030
+ globalThis.fetch = savedFetch;
7031
+ __setRepoSlugForTest(undefined);
7032
+ __setRootWorkspaceForTest(undefined);
7033
+ _clientRoots = [];
7034
+ if (savedKey === undefined) delete process.env.ROADMAPPER_API_KEY;
7035
+ else process.env.ROADMAPPER_API_KEY = savedKey;
7036
+ if (savedUrl === undefined) delete process.env.ROADMAPPER_BACKEND_URL;
7037
+ else process.env.ROADMAPPER_BACKEND_URL = savedUrl;
7038
+ }
7039
+ },
7040
+ pass: (r) => {
7041
+ try {
7042
+ const out = JSON.parse(r?.r?.result?.content?.[0]?.text ?? "{}");
7043
+ return (
7044
+ out.status === "linked" &&
7045
+ out.repo === "acme/widget" &&
7046
+ // broker body carries the derived slug as p_repo
7047
+ r?.sentBody?.rpc === "link_repo" &&
7048
+ r?.sentBody?.body?.p_repo === "acme/widget" &&
7049
+ // CRITICAL: on the broker path we must NOT send p_workspace_id —
7050
+ // the broker injects the key's validated workspace, and sending
7051
+ // a (likely mismatched) wsId would trip its cross-workspace
7052
+ // guard and 403. This assertion locks in that fix.
7053
+ r?.sentBody?.body?.p_workspace_id === undefined &&
7054
+ // resolution cache invalidated so the new mapping wins next call
7055
+ r?.cacheCleared === undefined
7056
+ );
7057
+ } catch {
7058
+ return false;
7059
+ }
7060
+ },
7061
+ },
7062
+ {
7063
+ // A repo already mapped to a different workspace returns the
7064
+ // conflict passthrough (not a silent steal).
7065
+ name: "link_repo surfaces conflict when repo maps elsewhere",
7066
+ fn: async () => {
7067
+ const savedFetch = globalThis.fetch;
7068
+ const savedKey = process.env.ROADMAPPER_API_KEY;
7069
+ const savedUrl = process.env.ROADMAPPER_BACKEND_URL;
7070
+ const savedWs = process.env.ROADMAPPER_WORKSPACE_ID;
7071
+ try {
7072
+ process.env.ROADMAPPER_API_KEY = "rmpr_selftest";
7073
+ process.env.ROADMAPPER_BACKEND_URL = "https://selftest.local";
7074
+ process.env.ROADMAPPER_WORKSPACE_ID = "ws-mine";
7075
+ _clientRoots = ["/tmp/proj"];
7076
+ __setRepoSlugForTest("acme/taken");
7077
+ globalThis.fetch = async () => ({
7078
+ ok: true,
7079
+ json: async () => ({ status: "conflict", existing_workspace: "ws-other" }),
7080
+ text: async () => "",
7081
+ });
7082
+ return await handle({
7083
+ id: 93,
7084
+ method: "tools/call",
7085
+ params: { name: "link_repo", arguments: {} },
7086
+ });
7087
+ } finally {
7088
+ globalThis.fetch = savedFetch;
7089
+ __setRepoSlugForTest(undefined);
7090
+ _clientRoots = [];
7091
+ if (savedKey === undefined) delete process.env.ROADMAPPER_API_KEY;
7092
+ else process.env.ROADMAPPER_API_KEY = savedKey;
7093
+ if (savedUrl === undefined) delete process.env.ROADMAPPER_BACKEND_URL;
7094
+ else process.env.ROADMAPPER_BACKEND_URL = savedUrl;
7095
+ if (savedWs === undefined) delete process.env.ROADMAPPER_WORKSPACE_ID;
7096
+ else process.env.ROADMAPPER_WORKSPACE_ID = savedWs;
7097
+ }
7098
+ },
7099
+ pass: (r) => {
7100
+ try {
7101
+ const out = JSON.parse(r?.result?.content?.[0]?.text ?? "{}");
7102
+ return out.status === "conflict" && out.existingWorkspace === "ws-other";
7103
+ } catch {
7104
+ return false;
7105
+ }
7106
+ },
7107
+ },
7108
+ {
7109
+ // Repo-link gate: a mutator in an UNMAPPED git repo (slug resolves
7110
+ // but no repo_workspace_map row, so resolution falls to env source)
7111
+ // is blocked with repo_unmapped naming the slug + the link_repo fix.
7112
+ name: "mutator blocked when in an unmapped repo (would hit env default)",
7113
+ fn: async () => {
7114
+ const savedWs = process.env.ROADMAPPER_WORKSPACE_ID;
7115
+ const savedKey = process.env.ROADMAPPER_API_KEY;
7116
+ const savedUrl = process.env.ROADMAPPER_BACKEND_URL;
7117
+ try {
7118
+ // Writes must be enabled or the gate defers to set_credentials.
7119
+ process.env.ROADMAPPER_API_KEY = "rmpr_selftest";
7120
+ process.env.ROADMAPPER_BACKEND_URL = "https://selftest.local";
7121
+ process.env.ROADMAPPER_WORKSPACE_ID = "ws-envdefault";
7122
+ session.rubricFetchedAt = Date.now(); // past the rubric gate
7123
+ _clientRoots = ["/tmp/unmapped"];
7124
+ __setRepoSlugForTest("acme/unmapped");
7125
+ __setRootWorkspaceForTest(null); // no repo_workspace_map hit → env source
7126
+ return await handle({
7127
+ id: 94,
7128
+ method: "tools/call",
7129
+ params: {
7130
+ name: "archive_task",
7131
+ arguments: { taskId: aTask, reason: "unmapped-repo probe" },
7132
+ },
7133
+ });
7134
+ } finally {
7135
+ __setRepoSlugForTest(undefined);
7136
+ __setRootWorkspaceForTest(undefined);
7137
+ _clientRoots = [];
7138
+ if (savedWs === undefined) delete process.env.ROADMAPPER_WORKSPACE_ID;
7139
+ else process.env.ROADMAPPER_WORKSPACE_ID = savedWs;
7140
+ if (savedKey === undefined) delete process.env.ROADMAPPER_API_KEY;
7141
+ else process.env.ROADMAPPER_API_KEY = savedKey;
7142
+ if (savedUrl === undefined) delete process.env.ROADMAPPER_BACKEND_URL;
7143
+ else process.env.ROADMAPPER_BACKEND_URL = savedUrl;
7144
+ }
7145
+ },
7146
+ pass: (r) => {
7147
+ try {
7148
+ const out = JSON.parse(r?.result?.content?.[0]?.text ?? "{}");
7149
+ return (
7150
+ out.error === "repo_unmapped" &&
7151
+ out.repo === "acme/unmapped" &&
7152
+ out.fix === "link_repo()" &&
7153
+ out.envDefaultWorkspace === "ws-envdefault"
7154
+ );
7155
+ } catch {
7156
+ return false;
7157
+ }
7158
+ },
7159
+ },
7160
+ {
7161
+ // ESCAPE HATCH 1 (the multi-repo case): an explicit workspaceId arg
7162
+ // means the caller is intentionally targeting a workspace — the gate
7163
+ // must NOT fire even in an unmapped repo. Proves a developer juggling
7164
+ // several repos in one chat is never bricked: pass workspaceId and the
7165
+ // write proceeds (lands downstream on the missing-service-key error in
7166
+ // selftest, NOT the repo_unmapped block — that's the assertion).
7167
+ name: "repo-link gate skipped when workspaceId passed explicitly",
7168
+ fn: async () => {
7169
+ try {
7170
+ session.rubricFetchedAt = Date.now();
7171
+ _clientRoots = ["/tmp/unmapped"];
7172
+ __setRepoSlugForTest("acme/unmapped");
7173
+ __setRootWorkspaceForTest(null);
7174
+ return await handle({
7175
+ id: 95,
7176
+ method: "tools/call",
7177
+ params: {
7178
+ name: "archive_task",
7179
+ arguments: {
7180
+ taskId: aTask,
7181
+ reason: "explicit-ws probe",
7182
+ workspaceId: "ws-explicit",
7183
+ },
7184
+ },
7185
+ });
7186
+ } finally {
7187
+ __setRepoSlugForTest(undefined);
7188
+ __setRootWorkspaceForTest(undefined);
7189
+ _clientRoots = [];
7190
+ }
7191
+ },
7192
+ pass: (r) => {
7193
+ // Must be an error result (no service key downstream) but NOT the
7194
+ // repo_unmapped block — proves the gate let the explicit target through.
7195
+ if (!r?.result?.isError) return false;
7196
+ const txt = r.result.content?.[0]?.text ?? "";
7197
+ return !txt.includes("repo_unmapped");
7198
+ },
7199
+ },
7200
+ {
7201
+ // ESCAPE HATCH 2: a MAPPED repo (resolution returns source "repo")
7202
+ // never trips the gate — the whole point. Seeding a root workspace
7203
+ // makes resolveWorkspaceWithSource return source:"repo", not "env".
7204
+ name: "repo-link gate skipped when repo IS mapped (source=repo)",
7205
+ fn: async () => {
7206
+ try {
7207
+ session.rubricFetchedAt = Date.now();
7208
+ _clientRoots = ["/tmp/mapped"];
7209
+ __setRepoSlugForTest("acme/mapped");
7210
+ __setRootWorkspaceForTest("ws-mapped", "acme/mapped"); // mapped → source "repo"
7211
+ return await handle({
7212
+ id: 96,
7213
+ method: "tools/call",
7214
+ params: {
7215
+ name: "archive_task",
7216
+ arguments: { taskId: aTask, reason: "mapped-repo probe" },
7217
+ },
7218
+ });
7219
+ } finally {
7220
+ __setRepoSlugForTest(undefined);
7221
+ __setRootWorkspaceForTest(undefined);
7222
+ _clientRoots = [];
7223
+ }
7224
+ },
7225
+ pass: (r) => {
7226
+ if (!r?.result?.isError) return false;
7227
+ const txt = r.result.content?.[0]?.text ?? "";
7228
+ return !txt.includes("repo_unmapped");
7229
+ },
7230
+ },
7231
+ {
7232
+ // ESCAPE HATCH 3: not in a git repo at all (no client roots) — nothing
7233
+ // to link, so the gate must fall through to the env default rather than
7234
+ // deadlock. Asserts NOT repo_unmapped.
7235
+ name: "repo-link gate skipped when not in a git repo (no deadlock)",
7236
+ fn: async () => {
7237
+ const savedWs = process.env.ROADMAPPER_WORKSPACE_ID;
7238
+ try {
7239
+ process.env.ROADMAPPER_WORKSPACE_ID = "ws-envdefault";
7240
+ session.rubricFetchedAt = Date.now();
7241
+ _clientRoots = []; // not in a repo
7242
+ __setRootWorkspaceForTest(null);
7243
+ return await handle({
7244
+ id: 97,
7245
+ method: "tools/call",
7246
+ params: {
7247
+ name: "archive_task",
7248
+ arguments: { taskId: aTask, reason: "no-repo probe" },
7249
+ },
7250
+ });
7251
+ } finally {
7252
+ __setRootWorkspaceForTest(undefined);
7253
+ if (savedWs === undefined) delete process.env.ROADMAPPER_WORKSPACE_ID;
7254
+ else process.env.ROADMAPPER_WORKSPACE_ID = savedWs;
7255
+ }
7256
+ },
7257
+ pass: (r) => {
7258
+ if (!r?.result?.isError) return false;
7259
+ const txt = r.result.content?.[0]?.text ?? "";
7260
+ return !txt.includes("repo_unmapped");
7261
+ },
7262
+ },
5961
7263
  ];
5962
7264
 
5963
7265
  let passed = 0;