@roadmapperai/mcp 0.9.4 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +71 -37
- package/README.md +51 -16
- package/package.json +1 -1
- package/server.mjs +887 -96
package/server.mjs
CHANGED
|
@@ -469,6 +469,75 @@ function tplDescription(text, labels) {
|
|
|
469
469
|
return out;
|
|
470
470
|
}
|
|
471
471
|
|
|
472
|
+
/**
|
|
473
|
+
* The full tool descriptions below carry the planning methodology
|
|
474
|
+
* (USE WHEN / PREREQUISITE / ANTI-PATTERN / EXAMPLE) inline. That prose
|
|
475
|
+
* is relocated to the server `instructions` field — sent once at connect —
|
|
476
|
+
* plus the roadmapper://rubric resource, so the per-tool wire payload is
|
|
477
|
+
* just the one-line summary: the segment before the first blank line.
|
|
478
|
+
*
|
|
479
|
+
* Why: the 34 full descriptions cost ~15k tokens in every tools/list, in
|
|
480
|
+
* every session, used or not. The summaries cost ~3-4k. The methodology
|
|
481
|
+
* isn't lost — it moves to `instructions` (always sent, deduped) and the
|
|
482
|
+
* rubric resource (on demand), and the contract is still enforced server
|
|
483
|
+
* side (rubric/discovery gates + validateOutcome/validateName/etc. return
|
|
484
|
+
* structured `fix` errors). inputSchema, including every per-field
|
|
485
|
+
* description, is untouched — callers keep full argument-level guidance.
|
|
486
|
+
*/
|
|
487
|
+
function summaryOf(description) {
|
|
488
|
+
const i = description.indexOf("\n\n");
|
|
489
|
+
return i === -1 ? description : description.slice(0, i);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
/**
|
|
493
|
+
* The minimal planning contract an agent needs to file a VALID proposal,
|
|
494
|
+
* sent once in the initialize `instructions` field. This is the CORE
|
|
495
|
+
* extract of AGENTS.md (~600 tokens vs the full ~12.5k doc): the gate
|
|
496
|
+
* sequence, the task/capability shapes, the server-enforced falsifiable
|
|
497
|
+
* outcome + confidence rules, the enums, IDs, and don'ts. The full doc
|
|
498
|
+
* (tool catalogue, PR/branch conventions, RICE narrative, GitHub wiring)
|
|
499
|
+
* stays on demand via get_agents_md / roadmapper://rubric — reading
|
|
500
|
+
* either also satisfies the rubric gate. Keep this in sync with AGENTS.md
|
|
501
|
+
* sections: TL;DR, The mental model, Required agent task, Required
|
|
502
|
+
* capability fields, Outcome statements, Impact/Confidence, ID
|
|
503
|
+
* conventions, Don'ts.
|
|
504
|
+
*/
|
|
505
|
+
const CORE_CONTRACT = `ROADMAPPER PLANNING CONTRACT (essentials — full version: the get_agents_md op, or read the roadmapper://rubric resource)
|
|
506
|
+
|
|
507
|
+
ACCESS — every operation runs through ONE tool
|
|
508
|
+
roadmap({ op, args }) executes an operation. roadmap_search(intent) lists/ranks the operations; roadmap_describe(op) returns an op's exact arguments. The op names used below (get_roadmap_snapshot, suggest_capability_for, propose_task, get_agents_md, ...) are values for op — e.g. roadmap({ op: "get_agents_md" }) or roadmap({ op: "propose_task", args: { capabilityId, title, effort } }).
|
|
509
|
+
|
|
510
|
+
PER-SESSION WORKFLOW
|
|
511
|
+
1. Orient first: get_roadmap_snapshot (or list_themes / list_capabilities). This also satisfies the discovery gates below.
|
|
512
|
+
2. Writing requires the rubric: every workspace-mutating tool (propose_*/update_*/archive_*/unarchive_*/move_*/record_outcome_reading/link_pr/submit_acceptance_grades) refuses until you call get_agents_md once this session (reading roadmapper://rubric also counts).
|
|
513
|
+
3. Reuse before creating: suggest_capability_for({description}) to find an existing home; only propose a new capability if nothing fits. suggest_theme_for / list_themes before proposing a theme.
|
|
514
|
+
4. Before your first write, call get_active_workspace. Proceed if status is "resolved", or if it's "env_default" and the named workspace is the one you intend (the common, correct case for an env-configured install — its \`next.detail\` says how to confirm). Stop and follow the \`next\` action only for "ambiguous" or "unresolved" (e.g. link_repo), so writes don't land in the wrong workspace.
|
|
515
|
+
5. dryRun:true validates any write without committing. Reference everything by stable ID, never by name.
|
|
516
|
+
|
|
517
|
+
MODEL (don't conflate the layers)
|
|
518
|
+
Theme (TH-NNNNNN · leadership · years) > Capability (CAP-XXXXXX · PM · quarters · a falsifiable bet) > Task (TK-NNNNNN · IC/agent · days) > PR (closes tasks). Sprints (SP-NNN) are 1-2 week buckets.
|
|
519
|
+
|
|
520
|
+
TASK fields
|
|
521
|
+
Required: capabilityId, title (>=5 chars), effort: XS|S|M|L|XL (XS=2h S=4h M=1d L=3d XL=8d).
|
|
522
|
+
Recommended (not enforced): kind: feature|bug|chore|spike, priority: P0|P1|P2|P3, acceptance: [checkable assertions], dependsOn: [TK-...].
|
|
523
|
+
Give any task an agent will pick up a non-empty acceptance list — an empty list is a stop signal (spike it or ask). Stamp authorKind:agent. Only set dependsOn when one task truly blocks another.
|
|
524
|
+
|
|
525
|
+
CAPABILITY fields
|
|
526
|
+
Required: name (>=8 chars), pillarId: TH-..., outcome (falsifiable — see below).
|
|
527
|
+
Optional (defaulted): reach: number >=0 (default 100), impact: 3|2|1|0.5|0.25 (default 1), confidence: 0-95 (default 70), specRef (spec link; supply before decomposing a capability into tasks so scope is pinned — convention, not enforced).
|
|
528
|
+
|
|
529
|
+
FALSIFIABLE OUTCOME (server-enforced — propose_capability rejects otherwise)
|
|
530
|
+
Template: <metric> moves from <baseline> to <target> by <date>, measured by <source>.
|
|
531
|
+
The outcome MUST contain both a number AND a temporal anchor. Use a 20XX year (e.g. 2026-09-30 or "Sep 2026") or a quarter (Q3, q1 2026) — a bare month name or "by <month>" is NOT accepted, so always include the year. Confidence: 100 is never accepted (server caps at 95); reserve 91-95 for work already shipped or behind a flag.
|
|
532
|
+
Good: "Activation rate moves from 32% to 55% by 2026-09-30, measured by the activated_user event."
|
|
533
|
+
Weak: "Improve builder UX" (no metric/baseline/date — rewrite, or file it as a task).
|
|
534
|
+
|
|
535
|
+
WHEN NOT TO CREATE A CAPABILITY
|
|
536
|
+
A one-off fix, infra under an existing bet, a refactor/rename, or anything that fits in one PR is a TASK under the existing capability — not a new bet. If you can't write a falsifiable outcome, it isn't a capability yet.
|
|
537
|
+
|
|
538
|
+
DON'TS
|
|
539
|
+
No capability-per-PR. No blank outcomes. Don't game RICE inputs. Don't edit theme IDs after creation. Don't self-promote a task to delivered — wait for the merged PR.`;
|
|
540
|
+
|
|
472
541
|
/**
|
|
473
542
|
* Resolve a config value from a primary `ROADMAPPER_*` env var,
|
|
474
543
|
* falling back to a legacy `SUPABASE_*` alias when the primary
|
|
@@ -1340,7 +1409,7 @@ function validateOutcome(outcome) {
|
|
|
1340
1409
|
!hasTemporal ? "date/quarter" : null,
|
|
1341
1410
|
]
|
|
1342
1411
|
.filter(Boolean)
|
|
1343
|
-
.join(" + ")}. See get_agents_md for examples.`;
|
|
1412
|
+
.join(" + ")}. See ${opCall("get_agents_md")} for examples.`;
|
|
1344
1413
|
}
|
|
1345
1414
|
return null;
|
|
1346
1415
|
}
|
|
@@ -1479,6 +1548,20 @@ function resetSession() {
|
|
|
1479
1548
|
session.mutatorBlocks = 0;
|
|
1480
1549
|
}
|
|
1481
1550
|
|
|
1551
|
+
/**
|
|
1552
|
+
* Format an agent-facing "next call" in the dispatch shape. After the
|
|
1553
|
+
* tool-surface collapse the ONLY callable tool is `roadmap`; the 34 ops are
|
|
1554
|
+
* `op` values, so a fix field of `get_agents_md()` names something a real MCP
|
|
1555
|
+
* client can't invoke (it isn't in tools/list). opCall renders the reachable
|
|
1556
|
+
* form `roadmap({ op: "<op>"[, args: {...}] })`. argsHint is a preformatted
|
|
1557
|
+
* args literal (string) when the call needs arguments.
|
|
1558
|
+
*/
|
|
1559
|
+
function opCall(op, argsHint) {
|
|
1560
|
+
return argsHint
|
|
1561
|
+
? `roadmap({ op: "${op}", args: ${argsHint} })`
|
|
1562
|
+
: `roadmap({ op: "${op}" })`;
|
|
1563
|
+
}
|
|
1564
|
+
|
|
1482
1565
|
/**
|
|
1483
1566
|
* Build the structured "prereq missing" result the mutators return
|
|
1484
1567
|
* when the agent hasn't fetched the rubric this session. The shape
|
|
@@ -1495,10 +1578,10 @@ function rubricMissingResult(toolName) {
|
|
|
1495
1578
|
{
|
|
1496
1579
|
error: "prerequisite_missing",
|
|
1497
1580
|
message:
|
|
1498
|
-
`Call get_agents_md first this session, then retry ${toolName}. ` +
|
|
1581
|
+
`Call ${opCall("get_agents_md")} first this session, then retry your ${toolName} call. ` +
|
|
1499
1582
|
"The rubric defines acceptance criteria shape and grading dimensions — " +
|
|
1500
1583
|
"proposals filed without it will not round-trip.",
|
|
1501
|
-
fix: "get_agents_md
|
|
1584
|
+
fix: opCall("get_agents_md"),
|
|
1502
1585
|
},
|
|
1503
1586
|
null,
|
|
1504
1587
|
2
|
|
@@ -1523,7 +1606,7 @@ function discoveryMissingResult(toolName, fixCall, rationale) {
|
|
|
1523
1606
|
{
|
|
1524
1607
|
error: "discovery_missing",
|
|
1525
1608
|
message:
|
|
1526
|
-
`Call ${fixCall} first this session, then retry ${toolName}. ${rationale}`,
|
|
1609
|
+
`Call ${fixCall} first this session, then retry your ${toolName} call. ${rationale}`,
|
|
1527
1610
|
fix: fixCall,
|
|
1528
1611
|
},
|
|
1529
1612
|
null,
|
|
@@ -1566,12 +1649,12 @@ function repoUnmappedResult(toolName, slug, envWsId) {
|
|
|
1566
1649
|
error: "repo_unmapped",
|
|
1567
1650
|
message:
|
|
1568
1651
|
`"${slug}" isn't mapped to a workspace, so ${toolName} would land on the install-default workspace "${envWsId}" — probably not what you want. ` +
|
|
1569
|
-
`Map it once with link_repo (this repo → your key's workspace, resolves silently forever after), then retry ${toolName}. ` +
|
|
1570
|
-
`Or, if you meant a specific existing workspace, pass workspaceId
|
|
1652
|
+
`Map it once with ${opCall("link_repo")} (this repo → your key's workspace, resolves silently forever after), then retry your ${toolName} call. ` +
|
|
1653
|
+
`Or, if you meant a specific existing workspace, pass workspaceId in the op's args and it proceeds without mapping the repo.`,
|
|
1571
1654
|
repo: slug,
|
|
1572
1655
|
envDefaultWorkspace: envWsId,
|
|
1573
|
-
fix: "link_repo
|
|
1574
|
-
alt:
|
|
1656
|
+
fix: opCall("link_repo"),
|
|
1657
|
+
alt: opCall(toolName, '{ workspaceId: "<target>", ... }'),
|
|
1575
1658
|
},
|
|
1576
1659
|
null,
|
|
1577
1660
|
2
|
|
@@ -2009,7 +2092,7 @@ const TOOLS = [
|
|
|
2009
2092
|
outcome: { type: "string" },
|
|
2010
2093
|
reach: { type: "number" },
|
|
2011
2094
|
impact: { type: "number", enum: [3, 2, 1, 0.5, 0.25] },
|
|
2012
|
-
confidence: { type: "number", minimum: 0, maximum:
|
|
2095
|
+
confidence: { type: "number", minimum: 0, maximum: 95 },
|
|
2013
2096
|
roi: { type: "number", description: "Estimated annual ROI in raw dollars (e.g. 2500000 = $2.5M)." },
|
|
2014
2097
|
specRef: { type: "string" },
|
|
2015
2098
|
idempotencyKey: { type: "string" },
|
|
@@ -2040,7 +2123,11 @@ const TOOLS = [
|
|
|
2040
2123
|
properties: {
|
|
2041
2124
|
index: { type: "integer", minimum: 0 },
|
|
2042
2125
|
status: { type: "string", enum: ["pass", "fail"] },
|
|
2043
|
-
note: {
|
|
2126
|
+
note: {
|
|
2127
|
+
type: "string",
|
|
2128
|
+
description:
|
|
2129
|
+
"Required when status=fail — the failure mode the reviewer needs. Call this before opening the PR.",
|
|
2130
|
+
},
|
|
2044
2131
|
},
|
|
2045
2132
|
required: ["index", "status"],
|
|
2046
2133
|
additionalProperties: false,
|
|
@@ -2582,7 +2669,206 @@ const MUTATOR_TOOLS = new Set([
|
|
|
2582
2669
|
"record_outcome_reading",
|
|
2583
2670
|
]);
|
|
2584
2671
|
|
|
2672
|
+
// --- Dispatch surface -------------------------------------------------
|
|
2673
|
+
// Token-efficiency collapse: instead of advertising all 34 tools (their
|
|
2674
|
+
// inputSchemas alone are ~5k tokens in every tools/list), the wire surface
|
|
2675
|
+
// is three dispatch tools. The 34 operations are routed by name through
|
|
2676
|
+
// callTool (see the roadmap/roadmap_search/roadmap_describe early returns),
|
|
2677
|
+
// and their schemas are served on demand via roadmap_describe. This keeps
|
|
2678
|
+
// tools/list tiny while every operation, gate, and validator is unchanged.
|
|
2679
|
+
const OP_NAMES = new Set(TOOLS.map((t) => t.name));
|
|
2680
|
+
const DISPATCH_TOOLS = new Set(["roadmap", "roadmap_search", "roadmap_describe"]);
|
|
2681
|
+
|
|
2682
|
+
const META_TOOLS = [
|
|
2683
|
+
{
|
|
2684
|
+
name: "roadmap_search",
|
|
2685
|
+
description:
|
|
2686
|
+
"Find the right roadmap operation for what you want to do. Returns operation names with one-line summaries, ranked by your intent (or all of them if you omit intent). Then call roadmap_describe(op) for an op's arguments and roadmap({op, args}) to run it.",
|
|
2687
|
+
inputSchema: {
|
|
2688
|
+
type: "object",
|
|
2689
|
+
properties: {
|
|
2690
|
+
intent: {
|
|
2691
|
+
type: "string",
|
|
2692
|
+
description:
|
|
2693
|
+
"Free-text description of the task, e.g. 'file a new bet' or 'mark a task done'. Omit to list every operation.",
|
|
2694
|
+
},
|
|
2695
|
+
},
|
|
2696
|
+
additionalProperties: false,
|
|
2697
|
+
},
|
|
2698
|
+
},
|
|
2699
|
+
{
|
|
2700
|
+
name: "roadmap_describe",
|
|
2701
|
+
description:
|
|
2702
|
+
"Return the input schema and summary for one roadmap operation (op, e.g. 'propose_task'). Call before roadmap({op, args}) when you need the exact argument shape; this is the same schema the operation validates against.",
|
|
2703
|
+
inputSchema: {
|
|
2704
|
+
type: "object",
|
|
2705
|
+
properties: {
|
|
2706
|
+
op: { type: "string", description: "Operation name to describe, e.g. propose_task." },
|
|
2707
|
+
},
|
|
2708
|
+
required: ["op"],
|
|
2709
|
+
additionalProperties: false,
|
|
2710
|
+
},
|
|
2711
|
+
},
|
|
2712
|
+
{
|
|
2713
|
+
name: "roadmap",
|
|
2714
|
+
description:
|
|
2715
|
+
"Execute any roadmap operation: roadmap({ op, args }). op is an operation name such as get_roadmap_snapshot, list_capabilities, suggest_capability_for, propose_task, or update_capability — discover them with roadmap_search, get their arguments with roadmap_describe. All reads, planning, and writes go through here; the server enforces the rubric/discovery gates and per-op validation. See the server instructions for the planning contract.",
|
|
2716
|
+
inputSchema: {
|
|
2717
|
+
type: "object",
|
|
2718
|
+
properties: {
|
|
2719
|
+
op: {
|
|
2720
|
+
type: "string",
|
|
2721
|
+
description:
|
|
2722
|
+
"Operation name, e.g. get_roadmap_snapshot, suggest_capability_for, propose_task. Call roadmap_search to discover ops.",
|
|
2723
|
+
},
|
|
2724
|
+
args: {
|
|
2725
|
+
type: "object",
|
|
2726
|
+
description:
|
|
2727
|
+
"Arguments for the op (see roadmap_describe(op)). Omit for ops that take none.",
|
|
2728
|
+
additionalProperties: true,
|
|
2729
|
+
},
|
|
2730
|
+
},
|
|
2731
|
+
required: ["op"],
|
|
2732
|
+
additionalProperties: false,
|
|
2733
|
+
},
|
|
2734
|
+
},
|
|
2735
|
+
];
|
|
2736
|
+
|
|
2737
|
+
// roadmap_search: rank the 34 ops by token overlap with the intent and
|
|
2738
|
+
// return {op, summary} rows. Summaries are the trimmed first line, run
|
|
2739
|
+
// through the same label substitution tool descriptions get, so custom
|
|
2740
|
+
// workspace labels (theme -> initiative) stay consistent.
|
|
2741
|
+
function roadmapSearchResult(intent) {
|
|
2742
|
+
const labels = currentLabels();
|
|
2743
|
+
const ops = TOOLS.map((t) => ({
|
|
2744
|
+
op: t.name,
|
|
2745
|
+
summary: tplDescription(summaryOf(t.description), labels),
|
|
2746
|
+
}));
|
|
2747
|
+
const q = (intent || "").toLowerCase().trim();
|
|
2748
|
+
let operations = ops;
|
|
2749
|
+
if (q) {
|
|
2750
|
+
const terms = q.split(/[^a-z0-9]+/).filter((w) => w.length > 2);
|
|
2751
|
+
if (terms.length) {
|
|
2752
|
+
const score = (o) => {
|
|
2753
|
+
const hay = (o.op + " " + o.summary).toLowerCase();
|
|
2754
|
+
return terms.reduce((n, w) => n + (hay.includes(w) ? 1 : 0), 0);
|
|
2755
|
+
};
|
|
2756
|
+
operations = ops
|
|
2757
|
+
.map((o) => ({ o, s: score(o) }))
|
|
2758
|
+
.sort((a, b) => b.s - a.s)
|
|
2759
|
+
.map((x) => x.o);
|
|
2760
|
+
}
|
|
2761
|
+
}
|
|
2762
|
+
return textResult(
|
|
2763
|
+
JSON.stringify(
|
|
2764
|
+
{
|
|
2765
|
+
intent: intent || null,
|
|
2766
|
+
note: "Call roadmap_describe({ op }) for an op's arguments, then roadmap({ op, args }) to run it.",
|
|
2767
|
+
total: operations.length,
|
|
2768
|
+
operations,
|
|
2769
|
+
},
|
|
2770
|
+
null,
|
|
2771
|
+
2
|
|
2772
|
+
)
|
|
2773
|
+
);
|
|
2774
|
+
}
|
|
2775
|
+
|
|
2776
|
+
// roadmap_describe: serve one op's inputSchema (the bulk that was evicted
|
|
2777
|
+
// from tools/list) plus its trimmed summary, on demand.
|
|
2778
|
+
function roadmapDescribeResult(op) {
|
|
2779
|
+
if (typeof op !== "string" || !op) {
|
|
2780
|
+
return errorResult(
|
|
2781
|
+
"roadmap_describe requires an 'op' string, e.g. roadmap_describe({ op: 'propose_task' })."
|
|
2782
|
+
);
|
|
2783
|
+
}
|
|
2784
|
+
const t = TOOLS.find((x) => x.name === op);
|
|
2785
|
+
if (!t) {
|
|
2786
|
+
return errorResult(
|
|
2787
|
+
`Unknown op '${op}'. Call roadmap_search to list operations.`
|
|
2788
|
+
);
|
|
2789
|
+
}
|
|
2790
|
+
return textResult(
|
|
2791
|
+
JSON.stringify(
|
|
2792
|
+
{
|
|
2793
|
+
op: t.name,
|
|
2794
|
+
summary: tplDescription(summaryOf(t.description), currentLabels()),
|
|
2795
|
+
inputSchema: t.inputSchema,
|
|
2796
|
+
},
|
|
2797
|
+
null,
|
|
2798
|
+
2
|
|
2799
|
+
)
|
|
2800
|
+
);
|
|
2801
|
+
}
|
|
2802
|
+
|
|
2585
2803
|
async function callTool(name, args) {
|
|
2804
|
+
// Dispatch surface. roadmap_search / roadmap_describe answer here without
|
|
2805
|
+
// touching the workspace. roadmap({op, args}) re-enters callTool(op, args)
|
|
2806
|
+
// so the operation runs through the IDENTICAL pipeline below — workspace
|
|
2807
|
+
// resolution, the MUTATOR_TOOLS gates, validators, session-flag side
|
|
2808
|
+
// effects — keyed off the real op name, with nothing duplicated. The ops
|
|
2809
|
+
// also stay directly callable (back-compat + what the selftest drives).
|
|
2810
|
+
if (name === "roadmap_search") {
|
|
2811
|
+
return roadmapSearchResult(typeof args?.intent === "string" ? args.intent : "");
|
|
2812
|
+
}
|
|
2813
|
+
if (name === "roadmap_describe") {
|
|
2814
|
+
return roadmapDescribeResult(args?.op);
|
|
2815
|
+
}
|
|
2816
|
+
if (name === "roadmap") {
|
|
2817
|
+
const op = args?.op;
|
|
2818
|
+
if (typeof op !== "string" || !op) {
|
|
2819
|
+
return errorResult(
|
|
2820
|
+
"roadmap requires an 'op', e.g. roadmap({ op: 'get_roadmap_snapshot' }). Call roadmap_search to discover operations."
|
|
2821
|
+
);
|
|
2822
|
+
}
|
|
2823
|
+
if (DISPATCH_TOOLS.has(op)) {
|
|
2824
|
+
return errorResult(
|
|
2825
|
+
`'${op}' is a dispatch tool, not an operation. Pass a real op such as get_roadmap_snapshot — call roadmap_search to list them.`
|
|
2826
|
+
);
|
|
2827
|
+
}
|
|
2828
|
+
if (!OP_NAMES.has(op)) {
|
|
2829
|
+
return errorResult(
|
|
2830
|
+
`Unknown op '${op}'. Call roadmap_search to list operations, or roadmap_describe({ op }) for one.`
|
|
2831
|
+
);
|
|
2832
|
+
}
|
|
2833
|
+
// Accept BOTH the documented nested shape { op, args: {...} } AND the flat
|
|
2834
|
+
// shape { op, ...fields } that LLM clients routinely emit when they hoist
|
|
2835
|
+
// scalar arguments to the top level. Flat siblings fill in keys the nested
|
|
2836
|
+
// object omits; on conflict the nested (documented) shape wins. This means
|
|
2837
|
+
// a top-level workspaceId/dryRun is never silently dropped — which would
|
|
2838
|
+
// mis-target a write or turn a validate-only call into a real one. No op
|
|
2839
|
+
// uses 'op'/'args' as an argument name, so stripping them here is safe.
|
|
2840
|
+
const { op: _op, args: nested, ...flat } = args ?? {};
|
|
2841
|
+
let inner;
|
|
2842
|
+
if (nested == null) {
|
|
2843
|
+
inner = flat; // flat shape (or no args at all)
|
|
2844
|
+
} else if (typeof nested === "object" && !Array.isArray(nested)) {
|
|
2845
|
+
inner = { ...flat, ...nested };
|
|
2846
|
+
} else if (typeof nested === "string") {
|
|
2847
|
+
// Some clients JSON-encode the args object into a string. Parse and
|
|
2848
|
+
// merge when it's an object; otherwise surface the real cause rather
|
|
2849
|
+
// than silently dropping it (which produced a misleading downstream
|
|
2850
|
+
// "X is required" from the inner op).
|
|
2851
|
+
let parsed;
|
|
2852
|
+
try {
|
|
2853
|
+
parsed = JSON.parse(nested);
|
|
2854
|
+
} catch {
|
|
2855
|
+
parsed = undefined;
|
|
2856
|
+
}
|
|
2857
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
2858
|
+
inner = { ...flat, ...parsed };
|
|
2859
|
+
} else {
|
|
2860
|
+
return errorResult(
|
|
2861
|
+
`roadmap 'args' must be an object — got a string that isn't a JSON object. Call as roadmap({ op: "${op}", args: { ... } }) (or hoist the fields to the top level).`
|
|
2862
|
+
);
|
|
2863
|
+
}
|
|
2864
|
+
} else {
|
|
2865
|
+
return errorResult(
|
|
2866
|
+
`roadmap 'args' must be an object — got ${Array.isArray(nested) ? "an array" : typeof nested}. Call as roadmap({ op: "${op}", args: { ... } }).`
|
|
2867
|
+
);
|
|
2868
|
+
}
|
|
2869
|
+
return callTool(op, inner);
|
|
2870
|
+
}
|
|
2871
|
+
|
|
2586
2872
|
// Each tool may override the workspace via args.workspaceId. The
|
|
2587
2873
|
// projection is workspace-scoped, so we pass that through to the
|
|
2588
2874
|
// read. Tools that need to know the resolved id later (write paths,
|
|
@@ -2881,8 +3167,8 @@ async function callTool(name, args) {
|
|
|
2881
3167
|
);
|
|
2882
3168
|
return discoveryMissingResult(
|
|
2883
3169
|
name,
|
|
2884
|
-
'
|
|
2885
|
-
"Rank existing themes by relevance before proposing a new one — themes are years-stable, duplicates are the most common failure mode. Any returned top score >0.4 means an existing theme is a sensible home; re-use it. list_themes
|
|
3170
|
+
opCall("suggest_theme_for", '{ description: "<the work you are about to propose>" }'),
|
|
3171
|
+
"Rank existing themes by relevance before proposing a new one — themes are years-stable, duplicates are the most common failure mode. Any returned top score >0.4 means an existing theme is a sensible home; re-use it. The list_themes or get_roadmap_snapshot ops also satisfy this gate if you want the full catalogue."
|
|
2886
3172
|
);
|
|
2887
3173
|
}
|
|
2888
3174
|
if (
|
|
@@ -2901,7 +3187,7 @@ async function callTool(name, args) {
|
|
|
2901
3187
|
);
|
|
2902
3188
|
return discoveryMissingResult(
|
|
2903
3189
|
name,
|
|
2904
|
-
'
|
|
3190
|
+
opCall("suggest_capability_for", '{ description: "<the work you are about to propose>" }'),
|
|
2905
3191
|
"Rank existing capabilities by relevance before proposing a new one. If any score is >0.4, attach tasks there instead."
|
|
2906
3192
|
);
|
|
2907
3193
|
}
|
|
@@ -3055,7 +3341,7 @@ async function callTool(name, args) {
|
|
|
3055
3341
|
_meta: {
|
|
3056
3342
|
roadmapper: {
|
|
3057
3343
|
reminder:
|
|
3058
|
-
|
|
3344
|
+
'Rubric loaded. You can now safely run the write ops via roadmap({ op, args }) — e.g. roadmap({ op: "propose_task", args: {...} }), propose_capability, propose_theme, submit_acceptance_grades, link_pr.',
|
|
3059
3345
|
},
|
|
3060
3346
|
},
|
|
3061
3347
|
});
|
|
@@ -3234,12 +3520,12 @@ async function proposeTask(args, projected, wsId) {
|
|
|
3234
3520
|
if (best && best.score > 0.2 && best.score > chosenScore + 0.1) {
|
|
3235
3521
|
return (
|
|
3236
3522
|
base +
|
|
3237
|
-
`The task text fits ${best.id} (${best.name}) noticeably better (score ${best.score.toFixed(2)}) than the chosen ${cap.id} (${chosenScore.toFixed(2)}). If that's the right home,
|
|
3523
|
+
`The task text fits ${best.id} (${best.name}) noticeably better (score ${best.score.toFixed(2)}) than the chosen ${cap.id} (${chosenScore.toFixed(2)}). If that's the right home, move it there with ${opCall("move_task")}.`
|
|
3238
3524
|
);
|
|
3239
3525
|
}
|
|
3240
3526
|
return (
|
|
3241
3527
|
base +
|
|
3242
|
-
|
|
3528
|
+
`If you're confident in the parent, ignore this; otherwise call ${opCall("suggest_capability_for", "{ taskId }")} to confirm.`
|
|
3243
3529
|
);
|
|
3244
3530
|
}
|
|
3245
3531
|
|
|
@@ -3514,7 +3800,9 @@ async function proposeTheme(args, projected, wsId) {
|
|
|
3514
3800
|
nearest = t;
|
|
3515
3801
|
}
|
|
3516
3802
|
}
|
|
3517
|
-
|
|
3803
|
+
// dryRun is exempt (like the autonomy gate below) so a validate-only call
|
|
3804
|
+
// always returns a preview; the overlap is surfaced as a warning instead.
|
|
3805
|
+
if (nearest && nearestScore >= THEME_SPRAWL_BLOCK && args.force !== true && !args.dryRun) {
|
|
3518
3806
|
return textResult(
|
|
3519
3807
|
JSON.stringify(
|
|
3520
3808
|
{
|
|
@@ -3523,11 +3811,11 @@ async function proposeTheme(args, projected, wsId) {
|
|
|
3523
3811
|
`"${name}" overlaps the existing theme ${nearest.id} (${nearest.name}) ` +
|
|
3524
3812
|
`at ${nearestScore.toFixed(2)} (block bar ${THEME_SPRAWL_BLOCK}). Themes are the ` +
|
|
3525
3813
|
"small, years-stable top tier — a near-duplicate fragments the strategic view. " +
|
|
3526
|
-
"Reuse it: file your work as a capability under it (propose_capability with " +
|
|
3527
|
-
`pillarId: "${nearest.id}"), or broaden its scope with update_theme. If this is ` +
|
|
3814
|
+
"Reuse it: file your work as a capability under it (the propose_capability op with " +
|
|
3815
|
+
`pillarId: "${nearest.id}"), or broaden its scope with the update_theme op. If this is ` +
|
|
3528
3816
|
"genuinely a distinct strategic pillar, retry with force:true.",
|
|
3529
3817
|
nearestTheme: { id: nearest.id, name: nearest.name, score: Number(nearestScore.toFixed(3)) },
|
|
3530
|
-
fix: `
|
|
3818
|
+
fix: opCall("propose_capability", `{ pillarId: "${nearest.id}", ... }`),
|
|
3531
3819
|
},
|
|
3532
3820
|
null,
|
|
3533
3821
|
2
|
|
@@ -3555,7 +3843,7 @@ async function proposeTheme(args, projected, wsId) {
|
|
|
3555
3843
|
...(nearest
|
|
3556
3844
|
? { closestExisting: { id: nearest.id, name: nearest.name, score: Number(nearestScore.toFixed(3)) } }
|
|
3557
3845
|
: {}),
|
|
3558
|
-
fix: "propose_theme
|
|
3846
|
+
fix: opCall("propose_theme", "{ ...same args, confirm: true }"),
|
|
3559
3847
|
},
|
|
3560
3848
|
null,
|
|
3561
3849
|
2
|
|
@@ -3574,13 +3862,19 @@ async function proposeTheme(args, projected, wsId) {
|
|
|
3574
3862
|
};
|
|
3575
3863
|
|
|
3576
3864
|
if (args.dryRun) {
|
|
3865
|
+
const warnings =
|
|
3866
|
+
nearest && nearestScore >= THEME_SPRAWL_BLOCK && args.force !== true
|
|
3867
|
+
? [
|
|
3868
|
+
`Overlaps existing theme ${nearest.id} (${nearest.name}) at ${nearestScore.toFixed(2)} (block bar ${THEME_SPRAWL_BLOCK}). A real call would be refused as too_similar unless force:true — prefer filing under it via ${opCall("propose_capability", `{ pillarId: "${nearest.id}", ... }`)}.`,
|
|
3869
|
+
]
|
|
3870
|
+
: [];
|
|
3577
3871
|
return textResult(
|
|
3578
3872
|
JSON.stringify(
|
|
3579
3873
|
{
|
|
3580
3874
|
ok: true,
|
|
3581
3875
|
dryRun: true,
|
|
3582
3876
|
wouldCreate: theme,
|
|
3583
|
-
warnings
|
|
3877
|
+
warnings,
|
|
3584
3878
|
message: `Would create theme ${id} (${name}). No record written.`,
|
|
3585
3879
|
},
|
|
3586
3880
|
null,
|
|
@@ -3631,7 +3925,7 @@ async function proposeCapability(args, projected, wsId) {
|
|
|
3631
3925
|
const theme = projected.themes.find((t) => t.id === pillarId);
|
|
3632
3926
|
if (!theme) {
|
|
3633
3927
|
return errorResult(
|
|
3634
|
-
`pillarId ${pillarId} doesn't match any known theme.
|
|
3928
|
+
`pillarId ${pillarId} doesn't match any known theme. Run ${opCall("list_themes")} first.`
|
|
3635
3929
|
);
|
|
3636
3930
|
}
|
|
3637
3931
|
if (typeof args.impact === "number" && !VALID_IMPACTS.has(args.impact)) {
|
|
@@ -3871,7 +4165,7 @@ function suggestCapabilityFor(args, projected) {
|
|
|
3871
4165
|
roadmapper: {
|
|
3872
4166
|
reminder:
|
|
3873
4167
|
ranked.length === 0
|
|
3874
|
-
?
|
|
4168
|
+
? `No existing capability is a sensible parent. Before ${opCall("propose_capability")}, verify with the user that a brand-new capability is warranted — capabilities are quarterly bets, not single tasks.`
|
|
3875
4169
|
: "No strong match (top score < 0.4). If none of the listed capabilities fit, ask the user before calling propose_capability — the top match is often closer than it scores.",
|
|
3876
4170
|
},
|
|
3877
4171
|
},
|
|
@@ -3947,7 +4241,7 @@ function suggestThemeFor(args, projected) {
|
|
|
3947
4241
|
roadmapper: {
|
|
3948
4242
|
reminder: autonomy
|
|
3949
4243
|
? ranked.length === 0
|
|
3950
|
-
?
|
|
4244
|
+
? `No existing theme overlaps. Theme-autonomy is ON, so you may run ${opCall("propose_theme")} directly if this is a genuinely new strategic pillar — the server will refuse it only if it's a near-duplicate of an existing theme.`
|
|
3951
4245
|
: "No strong match (top score < 0.4). Prefer the closest existing theme if it fits; otherwise propose_theme is fine (autonomy is ON, sprawl is guarded server-side)."
|
|
3952
4246
|
: ranked.length === 0
|
|
3953
4247
|
? "No existing theme overlaps. Theme-autonomy is OFF for this workspace — verify with the user that this is a genuinely new strategic direction before propose_theme, and pass confirm:true."
|
|
@@ -4357,6 +4651,28 @@ async function updateEntity(kind, args, wsId, projected) {
|
|
|
4357
4651
|
);
|
|
4358
4652
|
}
|
|
4359
4653
|
|
|
4654
|
+
// Acceptance gate (MCP-path stopgap). The SQL gate added in migration 0096
|
|
4655
|
+
// ("a task can't transition to in_progress without >=1 acceptance criterion")
|
|
4656
|
+
// landed on a different update_entity overload than the one this JS path
|
|
4657
|
+
// calls (an overload collision — the durable fix is a SQL migration that
|
|
4658
|
+
// reconciles them). Enforce it here too so the rule holds on the MCP path:
|
|
4659
|
+
// we have the merged `current` and the patch, so we know the post-update
|
|
4660
|
+
// acceptance and the real status transition.
|
|
4661
|
+
if (
|
|
4662
|
+
kind === "task" &&
|
|
4663
|
+
effectivePatch.status === "in_progress" &&
|
|
4664
|
+
current.status !== "in_progress"
|
|
4665
|
+
) {
|
|
4666
|
+
const accAfter = Array.isArray(cleanedPatch.acceptance)
|
|
4667
|
+
? cleanedPatch.acceptance
|
|
4668
|
+
: current.acceptance;
|
|
4669
|
+
if (!Array.isArray(accAfter) || accAfter.length === 0) {
|
|
4670
|
+
return errorResult(
|
|
4671
|
+
"Cannot move a task to in_progress without at least one acceptance criterion — add acceptance in the same patch (an empty acceptance list is a stop signal)."
|
|
4672
|
+
);
|
|
4673
|
+
}
|
|
4674
|
+
}
|
|
4675
|
+
|
|
4360
4676
|
try {
|
|
4361
4677
|
const result = await rpcCall("update_entity", {
|
|
4362
4678
|
p_workspace_id: wsId,
|
|
@@ -4616,7 +4932,7 @@ function detectCapabilityGaps(args, projected) {
|
|
|
4616
4932
|
roadmapper: {
|
|
4617
4933
|
reminder:
|
|
4618
4934
|
`${shaped.length} capability gap(s) detected — clusters of uncategorized work no existing bet covers. ` +
|
|
4619
|
-
|
|
4935
|
+
`Each is a CANDIDATE for ${opCall("propose_capability")} (confirm with the user — capabilities are quarterly bets, not auto-created), then ${opCall("move_tasks")} the members under it.`,
|
|
4620
4936
|
},
|
|
4621
4937
|
},
|
|
4622
4938
|
}
|
|
@@ -4724,10 +5040,27 @@ async function submitAcceptanceGrades(args, projected, wsId) {
|
|
|
4724
5040
|
return errorResult(
|
|
4725
5041
|
`Task ${task.id} has no acceptance criteria to grade. Add some first.`
|
|
4726
5042
|
);
|
|
5043
|
+
// Validate every grade BEFORE the RPC. The per-op inputSchema
|
|
5044
|
+
// (index integer >= 0, status enum) is advisory only — nothing enforces it
|
|
5045
|
+
// server-side, and since the tool-surface collapse the schema isn't even on
|
|
5046
|
+
// the wire for a client to check. So guard here: a non-array drops to an
|
|
5047
|
+
// opaque -32603 from the for-of, and a negative/float index reaches the SQL
|
|
5048
|
+
// jsonb_set with Postgres negative-index-from-end semantics, silently
|
|
5049
|
+
// overwriting an UNRELATED criterion's grade.
|
|
5050
|
+
if (!Array.isArray(args.grades) || args.grades.length === 0)
|
|
5051
|
+
return errorResult(
|
|
5052
|
+
"grades must be a non-empty array of { index, status, note? } objects."
|
|
5053
|
+
);
|
|
4727
5054
|
for (const g of args.grades) {
|
|
4728
|
-
if (g
|
|
5055
|
+
if (!g || typeof g !== "object" || Array.isArray(g))
|
|
5056
|
+
return errorResult("each grade must be an object { index, status, note? }.");
|
|
5057
|
+
if (!Number.isInteger(g.index) || g.index < 0 || g.index >= max)
|
|
4729
5058
|
return errorResult(
|
|
4730
|
-
`Grade index ${g.index} is
|
|
5059
|
+
`Grade index ${JSON.stringify(g.index)} is invalid — must be an integer in 0..${max - 1} (task has ${max} criteria).`
|
|
5060
|
+
);
|
|
5061
|
+
if (g.status !== "pass" && g.status !== "fail")
|
|
5062
|
+
return errorResult(
|
|
5063
|
+
`Grade status for index ${g.index} must be "pass" or "fail" (got ${JSON.stringify(g.status)}).`
|
|
4731
5064
|
);
|
|
4732
5065
|
}
|
|
4733
5066
|
|
|
@@ -4797,7 +5130,7 @@ function buildReminder(toolName, projected) {
|
|
|
4797
5130
|
toolName === "list_themes")
|
|
4798
5131
|
) {
|
|
4799
5132
|
reminders.push(
|
|
4800
|
-
|
|
5133
|
+
`Call ${opCall("get_agents_md")} before any write op (propose_* / submit_acceptance_grades / link_pr) — they refuse without it.`
|
|
4801
5134
|
);
|
|
4802
5135
|
}
|
|
4803
5136
|
// Tasks with merged PRs but no acceptance grades = ungraded
|
|
@@ -4816,7 +5149,7 @@ function buildReminder(toolName, projected) {
|
|
|
4816
5149
|
reminders.push(
|
|
4817
5150
|
`${ungraded.length} delivered task${ungraded.length === 1 ? "" : "s"} ` +
|
|
4818
5151
|
`have merged PRs without submitted acceptance grades. ` +
|
|
4819
|
-
`Call submit_acceptance_grades for: ${ids}${more}.`
|
|
5152
|
+
`Call ${opCall("submit_acceptance_grades")} for: ${ids}${more}.`
|
|
4820
5153
|
);
|
|
4821
5154
|
}
|
|
4822
5155
|
}
|
|
@@ -4863,14 +5196,14 @@ const RESOURCES = [
|
|
|
4863
5196
|
uri: "roadmapper://capabilities/active",
|
|
4864
5197
|
name: "Active capabilities (snapshot)",
|
|
4865
5198
|
description:
|
|
4866
|
-
|
|
5199
|
+
`Live list of non-delivered capabilities for the env-default workspace. Read this before proposing tasks or capabilities to find the right parent. Note: MCP resources don't accept arguments, so this always reads SUPABASE_WORKSPACE_ID's workspace — use roadmap({ op: "list_capabilities", args: { workspaceId } }) for cross-workspace reads.`,
|
|
4867
5200
|
mimeType: "application/json",
|
|
4868
5201
|
},
|
|
4869
5202
|
{
|
|
4870
5203
|
uri: "roadmapper://tasks/open",
|
|
4871
5204
|
name: "Open tasks (snapshot)",
|
|
4872
5205
|
description:
|
|
4873
|
-
|
|
5206
|
+
`Live list of in_progress + planned tasks for the env-default workspace. Same workspaceId caveat as roadmapper://capabilities/active — use roadmap({ op: "list_tasks", args: { workspaceId } }) for cross-workspace reads.`,
|
|
4874
5207
|
mimeType: "application/json",
|
|
4875
5208
|
},
|
|
4876
5209
|
];
|
|
@@ -5001,33 +5334,33 @@ function renderPrompt(name, args) {
|
|
|
5001
5334
|
case "plan-feature":
|
|
5002
5335
|
return (
|
|
5003
5336
|
`Plan a feature: "${args.description ?? "(no description provided)"}"\n\n` +
|
|
5004
|
-
"Follow this flow exactly:\n" +
|
|
5005
|
-
|
|
5006
|
-
|
|
5007
|
-
|
|
5008
|
-
|
|
5337
|
+
"Every operation runs through one tool: roadmap({ op, args }). Follow this flow exactly:\n" +
|
|
5338
|
+
'1. roadmap({ op: "get_agents_md" }) (or read the roadmapper://rubric resource) to load the rubric for this session.\n' +
|
|
5339
|
+
'2. roadmap({ op: "suggest_capability_for", args: { description } }) with the description above. Read every returned candidate\'s outcome before deciding.\n' +
|
|
5340
|
+
'3. If a returned candidate scores > 0.4 OR its outcome maps to what we\'re building, propose tasks under it via roadmap({ op: "propose_tasks", args: { capabilityId, tasks: [...] } }). Each task MUST include acceptance criteria per the rubric.\n' +
|
|
5341
|
+
'4. If nothing fits, STOP and ask the user before roadmap({ op: "propose_capability", args }) — capabilities are quarterly bets, not single tasks.\n' +
|
|
5009
5342
|
"5. After tasks are proposed, summarize: capabilityId chosen, task ids created, anything skipped and why."
|
|
5010
5343
|
);
|
|
5011
5344
|
case "close-task":
|
|
5012
5345
|
return (
|
|
5013
5346
|
`Close task ${args.task_id ?? "(missing task_id)"}.\n\n` +
|
|
5014
|
-
"Follow this flow exactly:\n" +
|
|
5015
|
-
|
|
5016
|
-
`2.
|
|
5347
|
+
"Every operation runs through one tool: roadmap({ op, args }). Follow this flow exactly:\n" +
|
|
5348
|
+
'1. roadmap({ op: "get_agents_md" }) (or read the roadmapper://rubric resource) to load grading dimensions.\n' +
|
|
5349
|
+
`2. roadmap({ op: "get_task", args: { id: "${args.task_id ?? ""}" } }) and read every acceptance criterion.\n` +
|
|
5017
5350
|
"3. For each criterion, decide pass/fail. Fabricated passes destroy this signal — only mark pass if you verified.\n" +
|
|
5018
|
-
|
|
5351
|
+
'4. roadmap({ op: "submit_acceptance_grades", args: { taskId, grades } }) with the per-index results. Include a note on any fail.\n' +
|
|
5019
5352
|
(args.pr_url
|
|
5020
|
-
? `5.
|
|
5021
|
-
:
|
|
5353
|
+
? `5. roadmap({ op: "link_pr", args: {...} }) to attach ${args.pr_url} to the task.\n`
|
|
5354
|
+
: '5. If you opened a PR, roadmap({ op: "link_pr", args: {...} }) to attach it.\n') +
|
|
5022
5355
|
"6. Stamp Roadmapper-Task: " +
|
|
5023
5356
|
(args.task_id ?? "TK-NNNNNN") +
|
|
5024
5357
|
" in the PR body so the webhook routes future events back here."
|
|
5025
5358
|
);
|
|
5026
5359
|
case "weekly-review":
|
|
5027
5360
|
return (
|
|
5028
|
-
"Run a structured roadmap review.\n\n" +
|
|
5029
|
-
|
|
5030
|
-
|
|
5361
|
+
"Run a structured roadmap review. Every operation runs through one tool: roadmap({ op, args }).\n\n" +
|
|
5362
|
+
'1. roadmap({ op: "get_agents_md" }) to load the rubric (or confirm rubric is current).\n' +
|
|
5363
|
+
'2. roadmap({ op: "get_roadmap_snapshot" }) for the canonical model. Note any _meta reminders in the response.\n' +
|
|
5031
5364
|
"3. For each active capability, scan: are open tasks aging? Are any without acceptance criteria? Are there delivered tasks without acceptance grades?\n" +
|
|
5032
5365
|
"4. List capabilities whose outcomes are no longer falsifiable or whose tasks all delivered (close them or pivot).\n" +
|
|
5033
5366
|
"5. Report: ungraded deliveries, stale capabilities, capabilities ready to close, suggested next bets."
|
|
@@ -5070,6 +5403,36 @@ async function handle(request) {
|
|
|
5070
5403
|
// boundary for "you need to fetch the rubric again."
|
|
5071
5404
|
resetSession();
|
|
5072
5405
|
recordTelemetry("session_initialized", { stats });
|
|
5406
|
+
// Build the server instructions once. A dynamic preamble (resolved
|
|
5407
|
+
// workspace + where it came from + live counts, so the agent can
|
|
5408
|
+
// trust where its writes land instead of discovering an empty/wrong
|
|
5409
|
+
// workspace later) followed by the static CORE planning contract.
|
|
5410
|
+
// Surfaced at the TOP LEVEL of the result — the MCP-spec
|
|
5411
|
+
// `instructions` channel that compliant clients (Claude Code,
|
|
5412
|
+
// Cursor) inject into context. It previously lived only inside
|
|
5413
|
+
// serverInfo, where the spec doesn't define it, so spec-reading
|
|
5414
|
+
// clients silently dropped it. The gate/suggest reminders that used
|
|
5415
|
+
// to sit here are now folded into CORE_CONTRACT's workflow section.
|
|
5416
|
+
const instructions = (() => {
|
|
5417
|
+
const { id: ws, source } = resolveWorkspaceWithSource();
|
|
5418
|
+
const wsLine = ws
|
|
5419
|
+
? `Workspace: ${ws} (resolved from ${source}). `
|
|
5420
|
+
: "No workspace resolved yet. ";
|
|
5421
|
+
const rootsLine = _clientSupportsRoots
|
|
5422
|
+
? "Detecting the repo you're in to pick its workspace; call get_active_workspace before your first write to confirm. "
|
|
5423
|
+
: ws
|
|
5424
|
+
? ""
|
|
5425
|
+
: "Set ROADMAPPER_WORKSPACE_ID or open a connected repo. ";
|
|
5426
|
+
const preamble =
|
|
5427
|
+
"Roadmapper online — " +
|
|
5428
|
+
wsLine +
|
|
5429
|
+
`${stats.themes} theme${stats.themes === 1 ? "" : "s"}, ` +
|
|
5430
|
+
`${stats.capabilities} capabilit${stats.capabilities === 1 ? "y" : "ies"}, ` +
|
|
5431
|
+
`${stats.openTasks} open task${stats.openTasks === 1 ? "" : "s"}. ` +
|
|
5432
|
+
rootsLine +
|
|
5433
|
+
"Slash-prompts available: roadmapper:plan-feature, roadmapper:close-task, roadmapper:weekly-review.";
|
|
5434
|
+
return preamble + "\n\n" + CORE_CONTRACT;
|
|
5435
|
+
})();
|
|
5073
5436
|
return {
|
|
5074
5437
|
jsonrpc: "2.0",
|
|
5075
5438
|
id,
|
|
@@ -5083,38 +5446,14 @@ async function handle(request) {
|
|
|
5083
5446
|
resources: { listChanged: false },
|
|
5084
5447
|
prompts: { listChanged: false },
|
|
5085
5448
|
},
|
|
5449
|
+
// Top-level instructions: the spec-defined channel. serverInfo
|
|
5450
|
+
// keeps only name/version/stats (stats is a non-standard extra
|
|
5451
|
+
// some clients surface as "server info").
|
|
5452
|
+
instructions,
|
|
5086
5453
|
serverInfo: {
|
|
5087
5454
|
name: SERVER_NAME,
|
|
5088
5455
|
version: SERVER_VERSION,
|
|
5089
5456
|
stats,
|
|
5090
|
-
instructions: (() => {
|
|
5091
|
-
// Name the workspace we resolve to RIGHT NOW + where it came
|
|
5092
|
-
// from, so the agent can trust where its writes land instead
|
|
5093
|
-
// of discovering an empty/wrong workspace later. Repo-based
|
|
5094
|
-
// resolution (roots → repo_workspace_map) finishes just after
|
|
5095
|
-
// this handshake, so if the client supports roots we say the
|
|
5096
|
-
// target may refine and to confirm via get_active_workspace.
|
|
5097
|
-
const { id: ws, source } = resolveWorkspaceWithSource();
|
|
5098
|
-
const wsLine = ws
|
|
5099
|
-
? `Workspace: ${ws} (resolved from ${source}). `
|
|
5100
|
-
: "No workspace resolved yet. ";
|
|
5101
|
-
const rootsLine = _clientSupportsRoots
|
|
5102
|
-
? "Detecting the repo you're in to pick its workspace; call get_active_workspace before your first write to confirm. "
|
|
5103
|
-
: ws
|
|
5104
|
-
? ""
|
|
5105
|
-
: "Set ROADMAPPER_WORKSPACE_ID or open a connected repo. ";
|
|
5106
|
-
return (
|
|
5107
|
-
"Roadmapper online — " +
|
|
5108
|
-
wsLine +
|
|
5109
|
-
`${stats.themes} theme${stats.themes === 1 ? "" : "s"}, ` +
|
|
5110
|
-
`${stats.capabilities} capabilit${stats.capabilities === 1 ? "y" : "ies"}, ` +
|
|
5111
|
-
`${stats.openTasks} open task${stats.openTasks === 1 ? "" : "s"}. ` +
|
|
5112
|
-
rootsLine +
|
|
5113
|
-
"Call get_agents_md before planning — the propose_* and submit_acceptance_grades tools refuse without it. " +
|
|
5114
|
-
"Use suggest_capability_for before propose_capability. " +
|
|
5115
|
-
"Slash-prompts available: roadmapper:plan-feature, roadmapper:close-task, roadmapper:weekly-review."
|
|
5116
|
-
);
|
|
5117
|
-
})(),
|
|
5118
5457
|
},
|
|
5119
5458
|
},
|
|
5120
5459
|
};
|
|
@@ -5128,7 +5467,11 @@ async function handle(request) {
|
|
|
5128
5467
|
// so the timing usually works out.
|
|
5129
5468
|
startLabelLoad();
|
|
5130
5469
|
const labels = currentLabels();
|
|
5131
|
-
|
|
5470
|
+
// Advertise the three dispatch tools, not the 34 operations. The ops
|
|
5471
|
+
// (and their schemas) are reachable via roadmap_search / roadmap_describe
|
|
5472
|
+
// / roadmap — see META_TOOLS and the callTool dispatch. tplDescription
|
|
5473
|
+
// still runs so custom workspace labels apply.
|
|
5474
|
+
const tools = META_TOOLS.map((t) => ({
|
|
5132
5475
|
...t,
|
|
5133
5476
|
description: tplDescription(t.description, labels),
|
|
5134
5477
|
}));
|
|
@@ -5197,6 +5540,23 @@ async function runSelftest() {
|
|
|
5197
5540
|
r?.result?.capabilities?.resources &&
|
|
5198
5541
|
r?.result?.capabilities?.prompts,
|
|
5199
5542
|
},
|
|
5543
|
+
{
|
|
5544
|
+
// The CORE planning contract must ride on the TOP-LEVEL `instructions`
|
|
5545
|
+
// field (the spec channel clients read), not buried in serverInfo, and
|
|
5546
|
+
// must carry the server-enforced falsifiable-outcome rule so an agent
|
|
5547
|
+
// can file a valid proposal without first fetching the full AGENTS.md.
|
|
5548
|
+
name: "initialize returns top-level instructions with the core contract",
|
|
5549
|
+
fn: () => handle({ id: 2, method: "initialize", params: {} }),
|
|
5550
|
+
pass: (r) => {
|
|
5551
|
+
const instr = r?.result?.instructions;
|
|
5552
|
+
return (
|
|
5553
|
+
typeof instr === "string" &&
|
|
5554
|
+
instr.length > 0 &&
|
|
5555
|
+
instr.includes("FALSIFIABLE OUTCOME") &&
|
|
5556
|
+
instr.includes("get_agents_md")
|
|
5557
|
+
);
|
|
5558
|
+
},
|
|
5559
|
+
},
|
|
5200
5560
|
{
|
|
5201
5561
|
// Hitting a mutator with no rubric fetched must return the
|
|
5202
5562
|
// structured prerequisite_missing error with a `fix` field,
|
|
@@ -5485,10 +5845,317 @@ async function runSelftest() {
|
|
|
5485
5845
|
r.result.messages[0].content.text.includes("demo description"),
|
|
5486
5846
|
},
|
|
5487
5847
|
{
|
|
5488
|
-
|
|
5848
|
+
// The wire surface is the three dispatch tools, NOT the 34 ops.
|
|
5849
|
+
name: "tools/list advertises exactly the three dispatch tools",
|
|
5489
5850
|
fn: () => handle({ id: 2, method: "tools/list", params: {} }),
|
|
5490
|
-
pass: (r) =>
|
|
5491
|
-
|
|
5851
|
+
pass: (r) => {
|
|
5852
|
+
const names = (r?.result?.tools ?? []).map((t) => t.name).sort();
|
|
5853
|
+
return (
|
|
5854
|
+
names.length === META_TOOLS.length &&
|
|
5855
|
+
["roadmap", "roadmap_describe", "roadmap_search"].every((n) =>
|
|
5856
|
+
names.includes(n)
|
|
5857
|
+
)
|
|
5858
|
+
);
|
|
5859
|
+
},
|
|
5860
|
+
},
|
|
5861
|
+
{
|
|
5862
|
+
// tools/list must serve TRIMMED descriptions (summary only): every
|
|
5863
|
+
// tool keeps a non-empty one-line summary, and the methodology blocks
|
|
5864
|
+
// (USE WHEN / PREREQUISITE / ANTI-PATTERN / EXAMPLE) must be gone from
|
|
5865
|
+
// the wire payload — they now live in `instructions` + the rubric.
|
|
5866
|
+
// Guards against a regression that re-serves the full descriptions.
|
|
5867
|
+
name: "tools/list serves trimmed one-line descriptions",
|
|
5868
|
+
fn: () => handle({ id: 23, method: "tools/list", params: {} }),
|
|
5869
|
+
pass: (r) => {
|
|
5870
|
+
const tools = r?.result?.tools;
|
|
5871
|
+
if (!Array.isArray(tools) || tools.length === 0) return false;
|
|
5872
|
+
return tools.every(
|
|
5873
|
+
(t) =>
|
|
5874
|
+
typeof t.description === "string" &&
|
|
5875
|
+
t.description.length > 0 &&
|
|
5876
|
+
!t.description.includes("\n\n") &&
|
|
5877
|
+
!t.description.includes("ANTI-PATTERN:") &&
|
|
5878
|
+
!t.description.includes("PREREQUISITE:")
|
|
5879
|
+
);
|
|
5880
|
+
},
|
|
5881
|
+
},
|
|
5882
|
+
{
|
|
5883
|
+
// roadmap_search returns the op catalogue (all 34 when no intent),
|
|
5884
|
+
// each row carrying a trimmed summary (no methodology blocks).
|
|
5885
|
+
name: "roadmap_search lists operations with trimmed summaries",
|
|
5886
|
+
fn: () =>
|
|
5887
|
+
handle({
|
|
5888
|
+
id: 24,
|
|
5889
|
+
method: "tools/call",
|
|
5890
|
+
params: { name: "roadmap_search", arguments: {} },
|
|
5891
|
+
}),
|
|
5892
|
+
pass: (r) => {
|
|
5893
|
+
if (r?.result?.isError) return false;
|
|
5894
|
+
const text = r?.result?.content?.[0]?.text ?? "";
|
|
5895
|
+
let body;
|
|
5896
|
+
try {
|
|
5897
|
+
body = JSON.parse(text);
|
|
5898
|
+
} catch {
|
|
5899
|
+
return false;
|
|
5900
|
+
}
|
|
5901
|
+
const ops = body?.operations ?? [];
|
|
5902
|
+
return (
|
|
5903
|
+
ops.length === TOOLS.length &&
|
|
5904
|
+
ops.some((o) => o.op === "propose_task") &&
|
|
5905
|
+
ops.every(
|
|
5906
|
+
(o) =>
|
|
5907
|
+
typeof o.summary === "string" &&
|
|
5908
|
+
o.summary.length > 0 &&
|
|
5909
|
+
!o.summary.includes("ANTI-PATTERN:")
|
|
5910
|
+
)
|
|
5911
|
+
);
|
|
5912
|
+
},
|
|
5913
|
+
},
|
|
5914
|
+
{
|
|
5915
|
+
// roadmap_describe serves an op's inputSchema on demand (the bulk
|
|
5916
|
+
// evicted from tools/list). move_* / update_* live here now.
|
|
5917
|
+
name: "roadmap_describe returns inputSchema for move/update ops",
|
|
5918
|
+
fn: () =>
|
|
5919
|
+
handle({
|
|
5920
|
+
id: 25,
|
|
5921
|
+
method: "tools/call",
|
|
5922
|
+
params: { name: "roadmap_describe", arguments: { op: "move_task" } },
|
|
5923
|
+
}),
|
|
5924
|
+
pass: (r) => {
|
|
5925
|
+
if (r?.result?.isError) return false;
|
|
5926
|
+
let body;
|
|
5927
|
+
try {
|
|
5928
|
+
body = JSON.parse(r?.result?.content?.[0]?.text ?? "");
|
|
5929
|
+
} catch {
|
|
5930
|
+
return false;
|
|
5931
|
+
}
|
|
5932
|
+
return (
|
|
5933
|
+
body?.op === "move_task" &&
|
|
5934
|
+
body?.inputSchema?.type === "object" &&
|
|
5935
|
+
!!body?.inputSchema?.properties?.newCapabilityId
|
|
5936
|
+
);
|
|
5937
|
+
},
|
|
5938
|
+
},
|
|
5939
|
+
{
|
|
5940
|
+
name: "roadmap_describe rejects an unknown op",
|
|
5941
|
+
fn: () =>
|
|
5942
|
+
handle({
|
|
5943
|
+
id: 26,
|
|
5944
|
+
method: "tools/call",
|
|
5945
|
+
params: { name: "roadmap_describe", arguments: { op: "no_such_op" } },
|
|
5946
|
+
}),
|
|
5947
|
+
pass: (r) => r?.result?.isError === true,
|
|
5948
|
+
},
|
|
5949
|
+
{
|
|
5950
|
+
name: "roadmap rejects a missing op",
|
|
5951
|
+
fn: () =>
|
|
5952
|
+
handle({
|
|
5953
|
+
id: 27,
|
|
5954
|
+
method: "tools/call",
|
|
5955
|
+
params: { name: "roadmap", arguments: {} },
|
|
5956
|
+
}),
|
|
5957
|
+
pass: (r) => r?.result?.isError === true,
|
|
5958
|
+
},
|
|
5959
|
+
{
|
|
5960
|
+
name: "roadmap rejects an unknown op",
|
|
5961
|
+
fn: () =>
|
|
5962
|
+
handle({
|
|
5963
|
+
id: 28,
|
|
5964
|
+
method: "tools/call",
|
|
5965
|
+
params: { name: "roadmap", arguments: { op: "no_such_op" } },
|
|
5966
|
+
}),
|
|
5967
|
+
pass: (r) => r?.result?.isError === true,
|
|
5968
|
+
},
|
|
5969
|
+
{
|
|
5970
|
+
// Dispatch must run the SAME gates as a direct call: hitting a mutator
|
|
5971
|
+
// op through roadmap() before the rubric is fetched returns the
|
|
5972
|
+
// structured prerequisite_missing error, proving gates key off the op.
|
|
5973
|
+
name: "roadmap dispatch enforces the rubric gate on the inner op",
|
|
5974
|
+
fn: () => {
|
|
5975
|
+
resetSession();
|
|
5976
|
+
return handle({
|
|
5977
|
+
id: 29,
|
|
5978
|
+
method: "tools/call",
|
|
5979
|
+
params: {
|
|
5980
|
+
name: "roadmap",
|
|
5981
|
+
arguments: {
|
|
5982
|
+
op: "propose_task",
|
|
5983
|
+
args: { capabilityId: aCap, title: "Should be blocked via dispatch" },
|
|
5984
|
+
},
|
|
5985
|
+
},
|
|
5986
|
+
});
|
|
5987
|
+
},
|
|
5988
|
+
pass: (r) => {
|
|
5989
|
+
if (!r?.result?.isError) return false;
|
|
5990
|
+
try {
|
|
5991
|
+
// Parse and assert the fix FIELD directly (not the whole blob) so a
|
|
5992
|
+
// regression reverting fix to a bare uncallable get_agents_md() —
|
|
5993
|
+
// which still appears in the message — can't pass on substring luck.
|
|
5994
|
+
const out = JSON.parse(r.result.content?.[0]?.text ?? "");
|
|
5995
|
+
return (
|
|
5996
|
+
out.error === "prerequisite_missing" &&
|
|
5997
|
+
out.fix === 'roadmap({ op: "get_agents_md" })'
|
|
5998
|
+
);
|
|
5999
|
+
} catch {
|
|
6000
|
+
return false;
|
|
6001
|
+
}
|
|
6002
|
+
},
|
|
6003
|
+
},
|
|
6004
|
+
{
|
|
6005
|
+
// Dispatch reaches the inner op's argument validation identically to a
|
|
6006
|
+
// direct call. Tightened: assert the message is move_task's OWN
|
|
6007
|
+
// validator (newCapabilityId), so a regression that swallowed args.args
|
|
6008
|
+
// (yielding a generic 'taskId is required') can't pass this.
|
|
6009
|
+
name: "roadmap dispatch reaches inner-op validation",
|
|
6010
|
+
fn: () => {
|
|
6011
|
+
resetSession();
|
|
6012
|
+
return handle({
|
|
6013
|
+
id: 30,
|
|
6014
|
+
method: "tools/call",
|
|
6015
|
+
params: { name: "get_agents_md", arguments: {} },
|
|
6016
|
+
}).then(() =>
|
|
6017
|
+
handle({
|
|
6018
|
+
id: 31,
|
|
6019
|
+
method: "tools/call",
|
|
6020
|
+
params: {
|
|
6021
|
+
name: "roadmap",
|
|
6022
|
+
arguments: { op: "move_task", args: { taskId: "TK-1" } },
|
|
6023
|
+
},
|
|
6024
|
+
})
|
|
6025
|
+
);
|
|
6026
|
+
},
|
|
6027
|
+
pass: (r) => {
|
|
6028
|
+
if (!r?.result?.isError) return false;
|
|
6029
|
+
const text = r.result.content?.[0]?.text ?? "";
|
|
6030
|
+
return text.includes("newCapabilityId") && text.includes("required");
|
|
6031
|
+
},
|
|
6032
|
+
},
|
|
6033
|
+
{
|
|
6034
|
+
// POSITIVE path: a successful read THROUGH roadmap returns the inner
|
|
6035
|
+
// op's real data verbatim (proves the unwrap + passthrough, which the
|
|
6036
|
+
// error-path checks above never exercise).
|
|
6037
|
+
name: "roadmap dispatch returns real data on the happy path",
|
|
6038
|
+
fn: () =>
|
|
6039
|
+
handle({
|
|
6040
|
+
id: 32,
|
|
6041
|
+
method: "tools/call",
|
|
6042
|
+
params: { name: "roadmap", arguments: { op: "get_roadmap_snapshot" } },
|
|
6043
|
+
}),
|
|
6044
|
+
pass: (r) => {
|
|
6045
|
+
if (r?.result?.isError) return false;
|
|
6046
|
+
try {
|
|
6047
|
+
const body = JSON.parse(r.result.content?.[0]?.text ?? "");
|
|
6048
|
+
// Real snapshot shape (workspaceId may be null in seed-only mode).
|
|
6049
|
+
return (
|
|
6050
|
+
Array.isArray(body?.themes) &&
|
|
6051
|
+
Array.isArray(body?.capabilities) &&
|
|
6052
|
+
typeof body?.counts === "object"
|
|
6053
|
+
);
|
|
6054
|
+
} catch {
|
|
6055
|
+
return false;
|
|
6056
|
+
}
|
|
6057
|
+
},
|
|
6058
|
+
},
|
|
6059
|
+
{
|
|
6060
|
+
// Discovery/rubric session flags must be SET through dispatch, not just
|
|
6061
|
+
// blocked when unset — satisfy both gates purely via roadmap({op}) and
|
|
6062
|
+
// confirm the flags flipped (guards a regression where dispatch stopped
|
|
6063
|
+
// re-entering the switch that writes them).
|
|
6064
|
+
name: "roadmap dispatch sets the rubric + discovery session flags",
|
|
6065
|
+
fn: async () => {
|
|
6066
|
+
resetSession();
|
|
6067
|
+
await handle({
|
|
6068
|
+
id: 33,
|
|
6069
|
+
method: "tools/call",
|
|
6070
|
+
params: { name: "roadmap", arguments: { op: "get_agents_md" } },
|
|
6071
|
+
});
|
|
6072
|
+
await handle({
|
|
6073
|
+
id: 34,
|
|
6074
|
+
method: "tools/call",
|
|
6075
|
+
params: { name: "roadmap", arguments: { op: "get_roadmap_snapshot" } },
|
|
6076
|
+
});
|
|
6077
|
+
return {
|
|
6078
|
+
rubric: session.rubricFetchedAt,
|
|
6079
|
+
themes: session.themesListedAt,
|
|
6080
|
+
caps: session.capsDiscoveredAt,
|
|
6081
|
+
};
|
|
6082
|
+
},
|
|
6083
|
+
pass: (r) => r?.rubric !== null && r?.themes !== null && r?.caps !== null,
|
|
6084
|
+
},
|
|
6085
|
+
{
|
|
6086
|
+
// Flat (un-nested) args must NOT be silently dropped — the dangerous
|
|
6087
|
+
// case being a dropped workspaceId/dryRun. Use a gate-free read: a flat
|
|
6088
|
+
// get_task carrying the id at the TOP level (not under args) must reach
|
|
6089
|
+
// the op and resolve the real task. If the flat id were dropped, it
|
|
6090
|
+
// would 404 instead. Proves the flat-merge in the roadmap dispatch.
|
|
6091
|
+
name: "roadmap tolerates flat (un-nested) args",
|
|
6092
|
+
fn: () =>
|
|
6093
|
+
handle({
|
|
6094
|
+
id: 36,
|
|
6095
|
+
method: "tools/call",
|
|
6096
|
+
params: { name: "roadmap", arguments: { op: "get_task", id: aTask } },
|
|
6097
|
+
}),
|
|
6098
|
+
pass: (r) => {
|
|
6099
|
+
if (r?.result?.isError) return false;
|
|
6100
|
+
const text = r.result.content?.[0]?.text ?? "";
|
|
6101
|
+
return typeof aTask === "string" && text.includes(aTask);
|
|
6102
|
+
},
|
|
6103
|
+
},
|
|
6104
|
+
{
|
|
6105
|
+
// When both a flat sibling and a nested args carry the same key, the
|
|
6106
|
+
// nested (documented) value must win. Flat id is a bogus task; nested
|
|
6107
|
+
// id is the real one — resolving the real task proves nested precedence.
|
|
6108
|
+
name: "roadmap merge: nested args win over flat siblings on conflict",
|
|
6109
|
+
fn: () =>
|
|
6110
|
+
handle({
|
|
6111
|
+
id: 37,
|
|
6112
|
+
method: "tools/call",
|
|
6113
|
+
params: {
|
|
6114
|
+
name: "roadmap",
|
|
6115
|
+
arguments: { op: "get_task", id: "TK-DOES-NOT-EXIST", args: { id: aTask } },
|
|
6116
|
+
},
|
|
6117
|
+
}),
|
|
6118
|
+
pass: (r) => {
|
|
6119
|
+
if (r?.result?.isError) return false;
|
|
6120
|
+
const text = r.result.content?.[0]?.text ?? "";
|
|
6121
|
+
return text.includes(aTask) && !text.includes("TK-DOES-NOT-EXIST");
|
|
6122
|
+
},
|
|
6123
|
+
},
|
|
6124
|
+
{
|
|
6125
|
+
// args passed as a JSON STRING (a real LLM failure mode) must be parsed
|
|
6126
|
+
// and honored, not silently dropped — a flat get_task with stringified
|
|
6127
|
+
// args resolves the real task.
|
|
6128
|
+
name: "roadmap parses JSON-string args",
|
|
6129
|
+
fn: () =>
|
|
6130
|
+
handle({
|
|
6131
|
+
id: 38,
|
|
6132
|
+
method: "tools/call",
|
|
6133
|
+
params: {
|
|
6134
|
+
name: "roadmap",
|
|
6135
|
+
arguments: { op: "get_task", args: JSON.stringify({ id: aTask }) },
|
|
6136
|
+
},
|
|
6137
|
+
}),
|
|
6138
|
+
pass: (r) => {
|
|
6139
|
+
if (r?.result?.isError) return false;
|
|
6140
|
+
const text = r.result.content?.[0]?.text ?? "";
|
|
6141
|
+
return typeof aTask === "string" && text.includes(aTask);
|
|
6142
|
+
},
|
|
6143
|
+
},
|
|
6144
|
+
{
|
|
6145
|
+
// A non-object, non-parseable args must produce a clear boundary error
|
|
6146
|
+
// naming the cause — not a misleading downstream 'X is required'.
|
|
6147
|
+
name: "roadmap rejects non-object args with a clear error",
|
|
6148
|
+
fn: () =>
|
|
6149
|
+
handle({
|
|
6150
|
+
id: 39,
|
|
6151
|
+
method: "tools/call",
|
|
6152
|
+
params: { name: "roadmap", arguments: { op: "get_task", args: "not json at all" } },
|
|
6153
|
+
}),
|
|
6154
|
+
pass: (r) => {
|
|
6155
|
+
if (!r?.result?.isError) return false;
|
|
6156
|
+
const text = r.result.content?.[0]?.text ?? "";
|
|
6157
|
+
return text.includes("args") && text.includes("must be an object");
|
|
6158
|
+
},
|
|
5492
6159
|
},
|
|
5493
6160
|
{
|
|
5494
6161
|
name: "get_active_workspace reports a resolution source",
|
|
@@ -6477,15 +7144,37 @@ async function runSelftest() {
|
|
|
6477
7144
|
pass: (r) => r?.result?.isError === true,
|
|
6478
7145
|
},
|
|
6479
7146
|
{
|
|
6480
|
-
//
|
|
6481
|
-
|
|
6482
|
-
|
|
6483
|
-
|
|
6484
|
-
const
|
|
6485
|
-
|
|
6486
|
-
|
|
6487
|
-
|
|
7147
|
+
// The four move ops are no longer advertised by name (the surface is
|
|
7148
|
+
// the three dispatch tools) but must remain reachable + describable.
|
|
7149
|
+
name: "roadmap_describe resolves all four move ops",
|
|
7150
|
+
fn: async () => {
|
|
7151
|
+
const ops = ["move_task", "move_capability", "move_tasks", "move_capabilities"];
|
|
7152
|
+
const out = [];
|
|
7153
|
+
for (const op of ops) {
|
|
7154
|
+
out.push(
|
|
7155
|
+
await handle({
|
|
7156
|
+
id: 30,
|
|
7157
|
+
method: "tools/call",
|
|
7158
|
+
params: { name: "roadmap_describe", arguments: { op } },
|
|
7159
|
+
})
|
|
7160
|
+
);
|
|
7161
|
+
}
|
|
7162
|
+
return out;
|
|
6488
7163
|
},
|
|
7164
|
+
pass: (results) =>
|
|
7165
|
+
Array.isArray(results) &&
|
|
7166
|
+
results.length === 4 &&
|
|
7167
|
+
results.every((r) => {
|
|
7168
|
+
if (r?.result?.isError) return false;
|
|
7169
|
+
try {
|
|
7170
|
+
return (
|
|
7171
|
+
JSON.parse(r.result.content?.[0]?.text ?? "")?.inputSchema?.type ===
|
|
7172
|
+
"object"
|
|
7173
|
+
);
|
|
7174
|
+
} catch {
|
|
7175
|
+
return false;
|
|
7176
|
+
}
|
|
7177
|
+
}),
|
|
6489
7178
|
},
|
|
6490
7179
|
{
|
|
6491
7180
|
// Update validation: missing patch.
|
|
@@ -6552,18 +7241,120 @@ async function runSelftest() {
|
|
|
6552
7241
|
pass: (r) => r?.result?.isError === true,
|
|
6553
7242
|
},
|
|
6554
7243
|
{
|
|
6555
|
-
//
|
|
6556
|
-
//
|
|
6557
|
-
//
|
|
6558
|
-
|
|
6559
|
-
|
|
7244
|
+
// submit_acceptance_grades: a negative index must be rejected up front —
|
|
7245
|
+
// otherwise it reaches the SQL jsonb_set and (negative-index-from-end)
|
|
7246
|
+
// overwrites an UNRELATED criterion's grade. Direct call so the test
|
|
7247
|
+
// targets the validator, not the rubric/seed gates.
|
|
7248
|
+
name: "submit_acceptance_grades rejects a negative index",
|
|
7249
|
+
fn: () =>
|
|
7250
|
+
submitAcceptanceGrades(
|
|
7251
|
+
{ taskId: "TK-G", grades: [{ index: -1, status: "pass" }] },
|
|
7252
|
+
{ tasks: [{ id: "TK-G", acceptance: ["a", "b"] }], capabilities: [], themes: [] },
|
|
7253
|
+
"ws-test"
|
|
7254
|
+
),
|
|
7255
|
+
pass: (r) =>
|
|
7256
|
+
r?.isError === true && (r?.content?.[0]?.text ?? "").includes("invalid"),
|
|
7257
|
+
},
|
|
7258
|
+
{
|
|
7259
|
+
name: "submit_acceptance_grades rejects a non-array grades arg",
|
|
7260
|
+
fn: () =>
|
|
7261
|
+
submitAcceptanceGrades(
|
|
7262
|
+
{ taskId: "TK-G", grades: "not-an-array" },
|
|
7263
|
+
{ tasks: [{ id: "TK-G", acceptance: ["a", "b"] }], capabilities: [], themes: [] },
|
|
7264
|
+
"ws-test"
|
|
7265
|
+
),
|
|
7266
|
+
pass: (r) =>
|
|
7267
|
+
r?.isError === true &&
|
|
7268
|
+
(r?.content?.[0]?.text ?? "").includes("non-empty array"),
|
|
7269
|
+
},
|
|
7270
|
+
{
|
|
7271
|
+
name: "submit_acceptance_grades rejects a status that isn't pass/fail",
|
|
7272
|
+
fn: () =>
|
|
7273
|
+
submitAcceptanceGrades(
|
|
7274
|
+
{ taskId: "TK-G", grades: [{ index: 0, status: "maybe" }] },
|
|
7275
|
+
{ tasks: [{ id: "TK-G", acceptance: ["a", "b"] }], capabilities: [], themes: [] },
|
|
7276
|
+
"ws-test"
|
|
7277
|
+
),
|
|
7278
|
+
pass: (r) =>
|
|
7279
|
+
r?.isError === true && (r?.content?.[0]?.text ?? "").includes("pass"),
|
|
7280
|
+
},
|
|
7281
|
+
{
|
|
7282
|
+
// MCP-path stopgap for the 0096 SQL-overload gate: a task can't move to
|
|
7283
|
+
// in_progress with an empty acceptance list. Direct call with a synthetic
|
|
7284
|
+
// empty-acceptance task (the gate fires before any RPC).
|
|
7285
|
+
name: "update_task in_progress gate blocks a task with no acceptance",
|
|
7286
|
+
fn: () =>
|
|
7287
|
+
updateEntity(
|
|
7288
|
+
"task",
|
|
7289
|
+
{ taskId: "TK-EMPTYACC", patch: { status: "in_progress" }, reason: "starting" },
|
|
7290
|
+
"ws-test",
|
|
7291
|
+
{
|
|
7292
|
+
tasks: [{ id: "TK-EMPTYACC", status: "planned", acceptance: [] }],
|
|
7293
|
+
capabilities: [],
|
|
7294
|
+
themes: [],
|
|
7295
|
+
}
|
|
7296
|
+
),
|
|
7297
|
+
pass: (r) =>
|
|
7298
|
+
r?.isError === true && (r?.content?.[0]?.text ?? "").includes("acceptance"),
|
|
7299
|
+
},
|
|
7300
|
+
{
|
|
7301
|
+
// propose_theme dryRun must PREVIEW a near-duplicate (with a warning),
|
|
7302
|
+
// not hard-block — identical tokens force jaccard >= block bar.
|
|
7303
|
+
name: "propose_theme dryRun previews a near-duplicate with a warning",
|
|
7304
|
+
fn: () =>
|
|
7305
|
+
proposeTheme(
|
|
7306
|
+
{ name: "Duplicate Pillar Name", description: "identical tokens here", dryRun: true },
|
|
7307
|
+
{
|
|
7308
|
+
themes: [{ id: "TH-DUP", name: "Duplicate Pillar Name", description: "identical tokens here" }],
|
|
7309
|
+
capabilities: [],
|
|
7310
|
+
tasks: [],
|
|
7311
|
+
settings: {},
|
|
7312
|
+
},
|
|
7313
|
+
"ws-test"
|
|
7314
|
+
),
|
|
6560
7315
|
pass: (r) => {
|
|
6561
|
-
|
|
6562
|
-
|
|
6563
|
-
|
|
6564
|
-
|
|
7316
|
+
if (r?.isError) return false;
|
|
7317
|
+
try {
|
|
7318
|
+
const b = JSON.parse(r?.content?.[0]?.text ?? "{}");
|
|
7319
|
+
return b.dryRun === true && Array.isArray(b.warnings) && b.warnings.length > 0;
|
|
7320
|
+
} catch {
|
|
7321
|
+
return false;
|
|
7322
|
+
}
|
|
6565
7323
|
},
|
|
6566
7324
|
},
|
|
7325
|
+
{
|
|
7326
|
+
// The three update ops are reachable + describable via the dispatch
|
|
7327
|
+
// surface (not advertised by name in tools/list anymore).
|
|
7328
|
+
name: "roadmap_describe resolves all three update ops",
|
|
7329
|
+
fn: async () => {
|
|
7330
|
+
const ops = ["update_task", "update_capability", "update_theme"];
|
|
7331
|
+
const out = [];
|
|
7332
|
+
for (const op of ops) {
|
|
7333
|
+
out.push(
|
|
7334
|
+
await handle({
|
|
7335
|
+
id: 35,
|
|
7336
|
+
method: "tools/call",
|
|
7337
|
+
params: { name: "roadmap_describe", arguments: { op } },
|
|
7338
|
+
})
|
|
7339
|
+
);
|
|
7340
|
+
}
|
|
7341
|
+
return out;
|
|
7342
|
+
},
|
|
7343
|
+
pass: (results) =>
|
|
7344
|
+
Array.isArray(results) &&
|
|
7345
|
+
results.length === 3 &&
|
|
7346
|
+
results.every((r) => {
|
|
7347
|
+
if (r?.result?.isError) return false;
|
|
7348
|
+
try {
|
|
7349
|
+
return (
|
|
7350
|
+
JSON.parse(r.result.content?.[0]?.text ?? "")?.inputSchema?.type ===
|
|
7351
|
+
"object"
|
|
7352
|
+
);
|
|
7353
|
+
} catch {
|
|
7354
|
+
return false;
|
|
7355
|
+
}
|
|
7356
|
+
}),
|
|
7357
|
+
},
|
|
6567
7358
|
{
|
|
6568
7359
|
// Cross-workspace guard fires when snapshot.json names workspace
|
|
6569
7360
|
// A and a mutator call carries workspaceId=B. Cleanup is in
|
|
@@ -7149,7 +7940,7 @@ async function runSelftest() {
|
|
|
7149
7940
|
return (
|
|
7150
7941
|
out.error === "repo_unmapped" &&
|
|
7151
7942
|
out.repo === "acme/unmapped" &&
|
|
7152
|
-
out.fix === "link_repo
|
|
7943
|
+
out.fix === 'roadmap({ op: "link_repo" })' &&
|
|
7153
7944
|
out.envDefaultWorkspace === "ws-envdefault"
|
|
7154
7945
|
);
|
|
7155
7946
|
} catch {
|