@hegemonart/get-design-done 1.22.0 → 1.23.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,14 +5,14 @@
5
5
  },
6
6
  "metadata": {
7
7
  "description": "Get Design Done — 5-stage agent-orchestrated design pipeline with 9 connections, handoff-first workflow, bidirectional Figma write-back, 22+ specialized agents, queryable knowledge layer (intel store, dependency analysis, learnings extraction), and a self-improvement loop (reflector, frontmatter + budget feedback, global-skills layer). v1.20.0 ships the SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream, and resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) for rate-limit + 429 + context-overflow recovery. Full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows) and release automation (auto-tag + GitHub Release + release-time smoke test).",
8
- "version": "1.22.0"
8
+ "version": "1.23.5"
9
9
  },
10
10
  "plugins": [
11
11
  {
12
12
  "name": "get-design-done",
13
13
  "source": "./",
14
14
  "description": "Agent-orchestrated 5-stage design pipeline: Brief → Explore → Plan → Design → Verify. 22+ specialized agents, 9 connections (Figma, Refero, Preview, Storybook, Chromatic, Figma Writer, Graphify, Pinterest, Claude Design), Claude Design handoff, bidirectional Figma write-back, and a queryable intel store (.design/intel/) for dependency and learnings queries. Standalone commands: style, darkmode, compare, figma-write, graphify, handoff, analyze-dependencies, skill-manifest, extract-learnings. Embeds NNG heuristics, WCAG thresholds, typographic systems, motion framework, and anti-pattern catalog. Ships with a full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows) and release automation. Optimization layer (v1.0.4.1, retroactive): gdd-router + gdd-cache-manager skills, PreToolUse budget-enforcer hook, tier-aware agent frontmatter, lazy checker gates, streaming synthesizer, /gdd:warm-cache + /gdd:optimize commands, and cost telemetry at .design/telemetry/costs.jsonl — targeting 50-70% per-task token-cost reduction with no quality-floor regression. v1.20.0 SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream at .design/telemetry/events.jsonl, resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) with rate-limit + 429 + context-overflow recovery, and TypeScript toolchain.",
15
- "version": "1.22.0",
15
+ "version": "1.23.5",
16
16
  "author": {
17
17
  "name": "hegemonart"
18
18
  },
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "get-design-done",
3
3
  "short_name": "gdd",
4
- "version": "1.22.0",
4
+ "version": "1.23.5",
5
5
  "description": "Agent-orchestrated 5-stage design pipeline: Brief → Explore → Plan → Design → Verify. 22+ specialized agents, 9 connections (Figma, Refero, Preview, Storybook, Chromatic, Figma Writer, Graphify, Pinterest, Claude Design), handoff-first workflow via Claude Design bundles, bidirectional Figma write-back (annotations, Code Connect), queryable intel store (`.design/intel/`) for O(1) design surface lookups, and self-improvement loop (reflector agent, frontmatter + budget feedback, global-skills layer at `~/.claude/gdd/global-skills/`). Standalone commands: style, darkmode, compare, figma-write, graphify, handoff, analyze-dependencies, skill-manifest, extract-learnings, reflect, apply-reflections. Embeds NNG heuristics, WCAG thresholds, typographic systems, motion framework, and anti-pattern catalog. Ships with a full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows, lint + schema + frontmatter + stale-ref + shellcheck + gitleaks + injection-scan + blocking size-budget) and release automation (auto-tag + GitHub Release + release-time smoke test). Optimization layer (v1.0.4.1, retroactive): gdd-router + gdd-cache-manager skills, PreToolUse budget-enforcer hook, tier-aware agent frontmatter, lazy checker gates, streaming synthesizer, /gdd:warm-cache + /gdd:optimize commands, and cost telemetry at .design/telemetry/costs.jsonl — targeting 50-70% per-task token-cost reduction with no quality-floor regression. v1.20.0 SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream at .design/telemetry/events.jsonl, resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) with rate-limit + 429 + context-overflow recovery, and TypeScript toolchain.",
6
6
  "author": {
7
7
  "name": "hegemonart",
package/CHANGELOG.md CHANGED
@@ -4,6 +4,118 @@ All notable changes to get-design-done are documented here. Versions follow [sem
4
4
 
5
5
  ---
6
6
 
7
+ ## [1.23.5] — 2026-04-25
8
+
9
+ Phase 23.5 No-Regret Adaptive Layer milestone — turns the passive Phase 22–23 observability + validation infrastructure into a closed self-tuning loop. Three tightly-scoped no-regret algorithms sharing one feature-flag ladder. Single-user viable via informed Beta-prior bootstrap (no shared telemetry required). Ships as a decimal patch on the v1.23 minor — does NOT shift Phase 24 → v1.24.0.
10
+
11
+ ### Added
12
+
13
+ - **Bandit router** — `scripts/lib/bandit-router.cjs` implements contextual Thompson sampling over `(agent_type, touches_size_bin) → {haiku, sonnet, opus}`. Per-arm `Beta(α, β)` posterior at `.design/telemetry/posterior.json` (atomic .tmp + rename). Discounted Thompson via per-arm time-decay factor `ρ^days_since_last_use` applied at sample time (default ρ=0.988 → 60-day half-life). Informed bootstrap: each arm starts at `Beta(α, β)` with α weighted toward the historical tier success rate (haiku=0.6, sonnet=0.8, opus=0.85) and `α + β ≈ 10` so 5–10 local samples shift the posterior. Two-stage lexicographic reward: `if !solidify_pass: 0; elif user_undo_in_session: 0; else: 1 − λ · normalize(cost + ε · wall_time)`. API: `pull / update / reset / loadPosterior / savePosterior / computeReward`. Touches-size bins: `tiny <5`, `small 5–15`, `medium 16–50`, `large >50` globs. (Plan 23.5-01)
14
+
15
+ - **Hedge ensemble** — `scripts/lib/hedge-ensemble.cjs` implements parameter-free AdaNormalHedge weighted-majority over verifier + checker agents. Weights persist at `.design/telemetry/hedge-weights.json`. Update rule: `η_i = sqrt(ln(N) / max(1, cumLoss2_i))` per-agent learning rate; `w_i *= exp(-η_i * loss_i)`; renormalise. Vote semantics: weighted sum normalised by the SUM of voting agents' weights — agents in the pool that didn't vote this round don't dilute the verdict. API: `loss / vote / weights / loadWeights / saveWeights`. Default vote threshold 0.5. (Plan 23.5-02)
16
+
17
+ - **MMR re-rank** — `scripts/lib/mmr-rerank.cjs` implements Maximal Marginal Relevance over scored items. Solves the "all 5 surfaced learnings are about the same thing" failure mode in the Phase 14.5 decision-injector. Greedy criterion: `next = argmax_{i ∉ selected} λ * relevance(i) − (1 − λ) * max_sim(i, selected)`. Similarity = Jaccard on word n-grams (default n=2). λ default 0.7. No external deps, no embedding API. API: `rerank / similarity / tokenize / ngrams / jaccard`. (Plan 23.5-03)
18
+
19
+ - **Adaptive-mode feature flag ladder** — `scripts/lib/adaptive-mode.cjs` is the single source of truth for which Phase 23.5 components are active. Three modes:
20
+ - `static` (DEFAULT) — Phase 10.1 behaviour. Static `tier_overrides` applies. No posterior writes / no hedge updates / no MMR.
21
+ - `hedge` — Adds AdaNormalHedge consensus + MMR re-rank. Routing still static (bandit OFF). Safest intro level.
22
+ - `full` — Adds bandit Thompson-sampling routing + reflector confidence-interval proposals. Both posterior + hedge persist.
23
+ Read from `.design/budget.json.adaptive_mode` with safe fallback to `static` on missing/malformed/unknown values. API: `getMode / setMode / caps / isBanditEnabled / isHedgeEnabled / isMmrEnabled / isReflectorProposalsEnabled`. (Plan 23.5-04)
24
+
25
+ ### Changed
26
+
27
+ - `tests/semver-compare.test.cjs` `OFF_CADENCE_VERSIONS` gains `1.23.5`.
28
+ - `test-fixture/baselines/phase-20/resilience-primitives.txt` regenerated alphabetically with all four new `.cjs` modules added.
29
+
30
+ ### Tests
31
+
32
+ - `tests/bandit-router.test.cjs` — bin partitioning, prior elevation per tier, beta sampling, pull persistence, missing-input throws, update/reward/clamp, reset, decay-toward-prior, all reward branches, load+save, 60-round convergence smoke test (18 tests)
33
+ - `tests/hedge-ensemble.test.cjs` — init uniform, high-loss penalty, normalisation, simple vs weighted vote, boolean=numeric equivalence, custom threshold, empty votes, loss clamp, NaN throw, round-trip (14)
34
+ - `tests/mmr-rerank.test.cjs` — tokenize edges, ngram size+fallback, similarity properties, λ=1 pure relevance, λ=0 pure diversity, textOf/relevanceOf overrides, empty input, non-array throw, k>length truncation, defaults, jaccard guards, canonical "5 D-13 hits" scenario (18)
35
+ - `tests/adaptive-mode.test.cjs` — missing-file fallback, malformed-JSON fallback, unknown-mode quiet fallback, all 3 capability matrices, all 4 predicates, setMode preserves other fields, parent-dir creation, mode rejection, exports, absolute-path support (13)
36
+ - `tests/phase-23.5-baseline.test.cjs` — Phase 23.5 regression baseline (8)
37
+
38
+ Total: 71 new tests. All Phase 20/21/22/23 tests still green.
39
+
40
+ ### Reflector integration
41
+
42
+ The Phase 22 code-level reflector reads `posterior.json` + hedge weights at run time (under `adaptive_mode: "full"`). When `stddev(Beta(α, β)) < 0.05` for a single-arm dominant tier, it proposes `tier_overrides` updates via `/gdd:apply-reflections`. Pure-read; never auto-writes. Same proposal pattern as Plan 23-06 touches pattern miner.
43
+
44
+ ### Deferred (evidence-gated, per ROADMAP)
45
+
46
+ - Hierarchical shared prior (revisit after 20+ cycles of single-user convergence data)
47
+ - Dense-embedding retrieval (revisit if MMR-only miss-rate exceeds 15%)
48
+ - Offline policy evaluation harness (bootstraps from bandit's stochastic logs once accumulated)
49
+ - Auto changepoint detection (manual `/gdd:bandit-reset --reason "<msg>"` covers v1)
50
+
51
+ ### Explicitly out of scope
52
+
53
+ - HDBSCAN auto-crystallization, BOCPD changepoint detection, Personalized PageRank, MinHash/LSH dedup, Borda/Kemeny rank aggregation, submodular greedy — each rejected with rationale in ROADMAP.md.
54
+
55
+ ---
56
+
57
+ ## [1.23.0] — 2026-04-25
58
+
59
+ Phase 23 GDD SDK Domain Primitives milestone — lands the highest-leverage code primitives from the ROADMAP "GDD SDK Domain Primitives" entry as typed Node modules with tests. 10 atomic plans (23-01 through 23-10), additive — every Phase 20/21/22 consumer keeps working unchanged. Distribution as separate `@hegemonart/gdd-sdk` npm package and screenshot-capture orchestration are explicitly deferred to follow-up phases.
60
+
61
+ ### Added
62
+
63
+ - **JSON output contracts** — `reference/output-contracts/planner-decision.schema.json` + `verifier-decision.schema.json` (Draft-07). `scripts/lib/parse-contract.cjs` gains `parsePlannerDecision()` and `parseVerifierDecision()` riding the same extract→parse→validate pipeline as the existing `parseMotionMap`. Lets `/gdd:synthesize` consume planner output without regex-parsing markdown headings, and lets executor↔verifier ping-pong on a typed envelope. (Plan 23-01)
64
+
65
+ - **Solidify-with-rollback gate** — `scripts/lib/design-solidify.mjs` runs the typecheck/build/targeted-test triplet for a task and, on any failure, rolls the working tree back via `git stash` (configurable: `stash` | `hard` | `none`) and emits a `solidify.rollback` event onto the Phase 22 causal chain. Optional `emit()` callback for event-stream telemetry sink. Authored as `.mjs` to sidestep the Phase 22 Node 24 + Windows + .mjs↔.ts loader bug. (Plan 23-02)
66
+
67
+ - **Touches: analyzer + parallelism decision engine** — `scripts/lib/touches-analyzer/index.cjs` parses `Touches:` lines from task markdown into glob lists and produces a pairwise verdict (`parallel` | `sequential`) for any two tasks. Encodes today's prompt-only heuristic into auditable code. Verdict rules (first match wins): empty → unknown-touches; literal equality → shared-glob; shared component dir → shared-component-dir; resolved-file overlap → shared-file; otherwise → disjoint. (Plan 23-03)
68
+
69
+ - **Audit aggregator** — `scripts/lib/audit-aggregator/index.cjs` takes `Array<Finding>` from N audit-agents, dedups by `{file, line, rule_id}`, scores via severity-weighted formula (P0:8/P1:4/P2:2/P3:1), and returns sorted top-N + tally summary. Default merge picks higher-confidence → higher-severity → lex-earliest agent → first-seen. Confidence outside `[0, 1]` clamped with one `process.emitWarning` per call. Cross-platform path normalization. (Plan 23-04)
70
+
71
+ - **Reference resolver** — `scripts/lib/reference-resolver.cjs` adds the resolution direction on top of the Phase 14.5 reference-registry. `resolve('forms')` / `resolve('type:forms')` → `{name, path, type, excerpt}` with a 200-char excerpt suitable for inlining into agent prompts. Lookup order: exact name → slug match → singularize fuzzy → type-only-when-unique. Ambiguous match throws `RangeError` with candidates. `resolveAll(keys, {ignoreMissing})` for bulk. `excerptOf(path, {maxChars})` strips frontmatter / fenced code / HTML comments / markdown headers. (Plan 23-05)
72
+
73
+ - **Touches pattern miner** — `scripts/lib/touches-pattern-miner.cjs` scans `.design/archive/cycle-*/tasks/*.md` after `/gdd:complete-cycle`, canonicalizes signatures (lowercase + backslash-normalize + cycle-slug strip + dedup + sort), and proposes crystallization candidates when a signature recurs in ≥3 tasks across ≥2 cycles. Writes `.design/learnings/touches-patterns.json` atomically (`.tmp` + rename). **NEVER auto-applies** — `/gdd:apply-reflections` is the materialization gate. (Plan 23-06)
74
+
75
+ - **Image diff + visual baseline manager** — `scripts/lib/visual-baseline/diff.cjs` compares two PNG buffers. With `pngjs` installed (probeOptional), decodes both and counts pixels whose R/G/B/A channels differ beyond the tolerance (default 4). Without `pngjs`, falls back to bytewise SHA-256 equality. `scripts/lib/visual-baseline/index.cjs` exposes `compareToBaseline(key, pngBuffer)` and `applyBaseline(key, pngBuffer)`; reads/writes `.design/baselines/<key>.png`; rejects path-traversal keys. `pngjs@7` declared as optional dep. Defers Playwright/Preview MCP screenshot capture orchestration to a later phase. (Plan 23-07)
76
+
77
+ - **Design-token reader (multi-source)** — `scripts/lib/design-tokens/index.cjs` facades over four pure-JS readers producing the uniform `{tokens, source, format, warnings}` shape:
78
+ - `css-vars.cjs` — extracts `--token: value;` from CSS/SCSS, last-write-wins, strips block comments, warns on `$scss-vars`
79
+ - `js-const.cjs` — spawn-node harness evaluates CJS/ESM exports, recognises `{tokens: …}` / default / direct bag, flattens nested with `.` separator
80
+ - `tailwind.cjs` — same harness, walks `theme` + `theme.extend` per scale (extend overrides base)
81
+ - `figma.cjs` — parses `{variableCollections}` shape OR already-flattened bag; emits `rgb(R, G, B)` for color values, per-mode tokens for multi-mode variables
82
+ Auto-detection by extension + content sniff. (Plan 23-08)
83
+
84
+ - **Domain primitives bundle** — three checkers sharing a single hit shape (`{rule_id, severity P0-P3, summary, evidence?, line?, file}`):
85
+ - `domain-primitives/nng.cjs` — runs grep-style heuristic rules loaded from `reference/heuristics.md` fenced yaml blocks; caller may inject `opts.rules` to bypass file-load
86
+ - `domain-primitives/anti-patterns.cjs` — same yaml extractor against `reference/anti-patterns.md`
87
+ - `domain-primitives/wcag.cjs` (no axe-core dep) — `contrastRatio()` (WCAG 1.4.3 luminance), `checkContrast({fg, bg, level: AA|AAA})`, `checkTapTarget({width, height, level})` (AA 24×24, AAA 44×44), `checkAriaLabels({content})` (interactive elements without text + aria-label)
88
+ Both NNG + anti-pattern files allow no parseable yaml today (treated as empty registry); robust to gradual rule population. (Plan 23-09)
89
+
90
+ ### Changed
91
+
92
+ - `tests/semver-compare.test.cjs` `OFF_CADENCE_VERSIONS` gains `1.23.0`.
93
+ - `test-fixture/baselines/phase-20/resilience-primitives.txt` gains `reference-resolver.cjs` (alphabetical between `reference-registry.cjs` and `relevance-counter.cjs`) and `touches-pattern-miner.cjs` (alphabetical at end).
94
+
95
+ ### Tests
96
+
97
+ - `tests/output-contracts-23-01.test.cjs` — planner + verifier contracts (14 tests)
98
+ - `tests/design-solidify.test.cjs` — solidify gate, all rollback modes (6)
99
+ - `tests/touches-analyzer.test.cjs` — parser + verdict + matrix (17)
100
+ - `tests/audit-aggregator.test.cjs` — dedup + score + tallies (15)
101
+ - `tests/reference-resolver.test.cjs` — resolution rules + excerpts (12)
102
+ - `tests/touches-pattern-miner.test.cjs` — canonicalize + thresholds + atomic write (10)
103
+ - `tests/visual-baseline.test.cjs` — diff modes + baseline round-trip (14, pngjs-optional path skipped when absent)
104
+ - `tests/design-tokens.test.cjs` — 4 readers + facade auto-detect (15)
105
+ - `tests/domain-primitives.test.cjs` — NNG + anti-pattern + WCAG checkers (18)
106
+ - `tests/phase-23-baseline.test.cjs` — Phase 23 regression baseline (12)
107
+
108
+ Total: 133 new tests. All Phase 20/21/22 tests still green.
109
+
110
+ ### Deferred
111
+
112
+ - **`@hegemonart/gdd-sdk` separate npm package** — out of scope; build/packaging project of its own.
113
+ - **Screenshot capture orchestration** (Playwright + Claude Preview MCP wrapper) — needs live MCP infra to validate.
114
+ - **Spec↔code↔visual triangulation verifier** — depends on Phase 16/17 component specs being fleshed out.
115
+ - **Knowledge-graph typed query layer** + cycle/workstream model SDK + pause/resume context serializer SDK + per-stage budget allocator SDK + git operations primitive + handoff bundle parser + intel store typed reader/writer — each is roadmap-text-sized and warrants its own phase scope.
116
+
117
+ ---
118
+
7
119
  ## [1.22.0] — 2026-04-25
8
120
 
9
121
  Phase 22 GDD SDK Observability milestone — the single-typed `BaseEvent` envelope from Phase 20 grows into a queryable, redacted, transport-able observability layer with tail/grep/WebSocket consumers and a causal event chain. 10 plans (22-01 through 22-10), additive — every Phase 20 + Phase 21 consumer keeps working unchanged.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hegemonart/get-design-done",
3
- "version": "1.22.0",
3
+ "version": "1.23.5",
4
4
  "description": "A Claude Code plugin for systematic design improvement",
5
5
  "author": "Hegemon",
6
6
  "homepage": "https://github.com/hegemonart/get-design-done",
@@ -87,6 +87,7 @@
87
87
  "@modelcontextprotocol/sdk": "^1.0.0"
88
88
  },
89
89
  "optionalDependencies": {
90
+ "pngjs": "^7.0.0",
90
91
  "ws": "^8.20.0"
91
92
  }
92
93
  }
@@ -0,0 +1,94 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "planner-decision.schema.json",
4
+ "title": "Planner Decision Output Contract",
5
+ "description": "Schema for the structured JSON block emitted by design-planner. Lets /gdd:synthesize and downstream consumers (executor, audit aggregator) read planner output without regex-parsing markdown.",
6
+ "type": "object",
7
+ "required": ["schema_version", "plan_id", "tasks", "waves"],
8
+ "additionalProperties": false,
9
+ "properties": {
10
+ "schema_version": {
11
+ "type": "string",
12
+ "const": "1.0.0"
13
+ },
14
+ "plan_id": {
15
+ "type": "string",
16
+ "description": "Stable identifier — e.g. '23-04' or 'PLAN.md'.",
17
+ "minLength": 1
18
+ },
19
+ "generated_at": {
20
+ "type": "string",
21
+ "format": "date-time"
22
+ },
23
+ "tasks": {
24
+ "type": "array",
25
+ "minItems": 1,
26
+ "items": {
27
+ "type": "object",
28
+ "required": ["task_id", "summary", "touches"],
29
+ "additionalProperties": false,
30
+ "properties": {
31
+ "task_id": {
32
+ "type": "string",
33
+ "minLength": 1,
34
+ "description": "Stable per-plan task identifier (e.g. T-01, 23-04-T-1)."
35
+ },
36
+ "summary": {
37
+ "type": "string",
38
+ "minLength": 3
39
+ },
40
+ "touches": {
41
+ "type": "array",
42
+ "items": { "type": "string" },
43
+ "description": "File globs the task is expected to read or write."
44
+ },
45
+ "dependencies": {
46
+ "type": "array",
47
+ "items": { "type": "string" },
48
+ "description": "Other task_ids that must complete first.",
49
+ "default": []
50
+ },
51
+ "parallel_safe": {
52
+ "type": "boolean",
53
+ "description": "Hint from the planner — the parallelism decision engine confirms via Touches: analysis.",
54
+ "default": false
55
+ },
56
+ "estimated_minutes": {
57
+ "type": "number",
58
+ "minimum": 0,
59
+ "description": "Rough wall-clock estimate; consumed by budget allocator."
60
+ },
61
+ "acceptance": {
62
+ "type": "string",
63
+ "description": "Free-form acceptance criteria copy."
64
+ }
65
+ }
66
+ }
67
+ },
68
+ "waves": {
69
+ "type": "array",
70
+ "minItems": 1,
71
+ "items": {
72
+ "type": "object",
73
+ "required": ["wave", "task_ids"],
74
+ "additionalProperties": false,
75
+ "properties": {
76
+ "wave": {
77
+ "type": "string",
78
+ "minLength": 1,
79
+ "description": "Wave label (e.g. 'A', 'B', 'C')."
80
+ },
81
+ "task_ids": {
82
+ "type": "array",
83
+ "items": { "type": "string" },
84
+ "minItems": 1
85
+ }
86
+ }
87
+ }
88
+ },
89
+ "rationale": {
90
+ "type": "string",
91
+ "description": "Free-form planner-side rationale — not consumed by code."
92
+ }
93
+ }
94
+ }
@@ -0,0 +1,66 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "verifier-decision.schema.json",
4
+ "title": "Verifier Decision Output Contract",
5
+ "description": "Schema for the structured JSON block emitted by design-verifier. Drives executor↔verifier ping-pong with a typed envelope rather than free-form prose.",
6
+ "type": "object",
7
+ "required": ["schema_version", "verdict", "gaps", "must_fix_before_ship", "confidence"],
8
+ "additionalProperties": false,
9
+ "properties": {
10
+ "schema_version": {
11
+ "type": "string",
12
+ "const": "1.0.0"
13
+ },
14
+ "generated_at": {
15
+ "type": "string",
16
+ "format": "date-time"
17
+ },
18
+ "verdict": {
19
+ "type": "string",
20
+ "enum": ["pass", "fail", "gap"],
21
+ "description": "pass = ship-ready, gap = remediable, fail = re-plan."
22
+ },
23
+ "gaps": {
24
+ "type": "array",
25
+ "items": {
26
+ "type": "object",
27
+ "required": ["id", "severity", "area", "summary"],
28
+ "additionalProperties": false,
29
+ "properties": {
30
+ "id": { "type": "string", "minLength": 1 },
31
+ "severity": {
32
+ "type": "string",
33
+ "enum": ["P0", "P1", "P2", "P3"]
34
+ },
35
+ "area": {
36
+ "type": "string",
37
+ "description": "Free-form domain tag — e.g. 'a11y', 'motion', 'tokens'."
38
+ },
39
+ "summary": { "type": "string", "minLength": 3 },
40
+ "evidence": {
41
+ "type": "string",
42
+ "description": "Citation: file:line reference or audit excerpt."
43
+ },
44
+ "remediation": {
45
+ "type": "string",
46
+ "description": "One-line proposed fix."
47
+ }
48
+ }
49
+ }
50
+ },
51
+ "must_fix_before_ship": {
52
+ "type": "array",
53
+ "items": { "type": "string" },
54
+ "description": "Subset of gap.id values that block ship — typically the P0/P1 ones."
55
+ },
56
+ "confidence": {
57
+ "type": "string",
58
+ "enum": ["high", "med", "low"],
59
+ "description": "Verifier's self-rated confidence — drives whether to escalate to a second pass."
60
+ },
61
+ "rationale": {
62
+ "type": "string",
63
+ "description": "Free-form notes — not code-consumed."
64
+ }
65
+ }
66
+ }
@@ -0,0 +1,170 @@
1
+ /**
2
+ * adaptive-mode.cjs — feature-flag ladder facade for the Phase 23.5
3
+ * no-regret stack (Plan 23.5-04).
4
+ *
5
+ * Three modes, ladder-shaped:
6
+ *
7
+ * "static" — Phase 10.1 behaviour. Static tier_overrides map applies;
8
+ * no posterior writes; no hedge weight updates; no MMR.
9
+ * Default for all installs.
10
+ *
11
+ * "hedge" — Adds AdaNormalHedge consensus thresholding to verifier
12
+ * + checker pools. Routing still static. Safest intro
13
+ * level — bandit routing is NOT enabled, so the model
14
+ * choice for any agent is unchanged.
15
+ *
16
+ * "full" — Adds bandit Thompson-sampling routing on top of hedge.
17
+ * Both posterior + hedge weights persist. Reflector
18
+ * proposals based on confidence intervals enabled.
19
+ *
20
+ * The ladder is read from `.design/budget.json.adaptive_mode`. Fallback
21
+ * default = "static". Unknown values clamp to "static" with a stderr
22
+ * warning (silent if `quiet: true`).
23
+ *
24
+ * This module owns the SINGLE source of truth for "is bandit on / is
25
+ * hedge on" — every consumer (router, hedge, MMR, reflector, the
26
+ * Phase 22 budget-enforcer hook) reads from `getMode(opts)`.
27
+ *
28
+ * No external deps. CommonJS.
29
+ */
30
+
31
+ 'use strict';
32
+
33
+ const fs = require('node:fs');
34
+ const path = require('node:path');
35
+
36
+ const DEFAULT_BUDGET_PATH = '.design/budget.json';
37
+ const VALID_MODES = Object.freeze(['static', 'hedge', 'full']);
38
+ const DEFAULT_MODE = 'static';
39
+
40
+ /** Capability matrix per mode — consumed by callers as a boolean check. */
41
+ const MODE_CAPS = Object.freeze({
42
+ static: Object.freeze({ bandit: false, hedge: false, mmr: false, reflector_proposals: false }),
43
+ hedge: Object.freeze({ bandit: false, hedge: true, mmr: true, reflector_proposals: false }),
44
+ full: Object.freeze({ bandit: true, hedge: true, mmr: true, reflector_proposals: true }),
45
+ });
46
+
47
+ function resolveBudgetPath(opts = {}) {
48
+ if (opts.budgetPath) {
49
+ return path.isAbsolute(opts.budgetPath)
50
+ ? opts.budgetPath
51
+ : path.resolve(opts.baseDir ?? process.cwd(), opts.budgetPath);
52
+ }
53
+ return path.resolve(opts.baseDir ?? process.cwd(), DEFAULT_BUDGET_PATH);
54
+ }
55
+
56
+ /**
57
+ * Read the current adaptive_mode from .design/budget.json. Falls back
58
+ * to "static" when the file is absent, malformed, or holds an
59
+ * unrecognised value.
60
+ *
61
+ * @param {{baseDir?: string, budgetPath?: string, quiet?: boolean}} [opts]
62
+ * @returns {'static'|'hedge'|'full'}
63
+ */
64
+ function getMode(opts = {}) {
65
+ const p = resolveBudgetPath(opts);
66
+ if (!fs.existsSync(p)) return DEFAULT_MODE;
67
+ /** @type {{adaptive_mode?: string}} */
68
+ let cfg;
69
+ try {
70
+ cfg = JSON.parse(fs.readFileSync(p, 'utf8'));
71
+ } catch {
72
+ return DEFAULT_MODE;
73
+ }
74
+ const m = cfg && typeof cfg.adaptive_mode === 'string' ? cfg.adaptive_mode : null;
75
+ if (!m) return DEFAULT_MODE;
76
+ if (!VALID_MODES.includes(m)) {
77
+ if (!opts.quiet) {
78
+ try {
79
+ process.stderr.write(
80
+ `[adaptive-mode] unknown adaptive_mode "${m}" in ${p}; falling back to "static"\n`,
81
+ );
82
+ } catch {
83
+ /* swallow */
84
+ }
85
+ }
86
+ return DEFAULT_MODE;
87
+ }
88
+ return /** @type {'static'|'hedge'|'full'} */ (m);
89
+ }
90
+
91
+ /**
92
+ * Convenience: capability matrix for the current mode.
93
+ *
94
+ * @param {{baseDir?: string, budgetPath?: string, quiet?: boolean}} [opts]
95
+ * @returns {{bandit: boolean, hedge: boolean, mmr: boolean, reflector_proposals: boolean}}
96
+ */
97
+ function caps(opts = {}) {
98
+ return MODE_CAPS[getMode(opts)];
99
+ }
100
+
101
+ /**
102
+ * Set the adaptive_mode on disk. Atomic write (.tmp + rename). Creates
103
+ * the budget.json file if missing — the rest of the budget config
104
+ * defaults to {} so other readers see "no caps configured".
105
+ *
106
+ * @param {'static'|'hedge'|'full'} mode
107
+ * @param {{baseDir?: string, budgetPath?: string}} [opts]
108
+ * @returns {string} absolute path written
109
+ */
110
+ function setMode(mode, opts = {}) {
111
+ if (!VALID_MODES.includes(mode)) {
112
+ throw new RangeError(
113
+ `adaptive-mode.setMode: mode must be one of [${VALID_MODES.join('|')}], got ${JSON.stringify(mode)}`,
114
+ );
115
+ }
116
+ const p = resolveBudgetPath(opts);
117
+ /** @type {Record<string, unknown>} */
118
+ let cfg = {};
119
+ if (fs.existsSync(p)) {
120
+ try {
121
+ cfg = JSON.parse(fs.readFileSync(p, 'utf8'));
122
+ } catch {
123
+ cfg = {};
124
+ }
125
+ }
126
+ cfg.adaptive_mode = mode;
127
+ fs.mkdirSync(path.dirname(p), { recursive: true });
128
+ const tmp = p + '.tmp';
129
+ fs.writeFileSync(tmp, JSON.stringify(cfg, null, 2));
130
+ fs.renameSync(tmp, p);
131
+ return p;
132
+ }
133
+
134
+ /**
135
+ * High-level "should bandit route this agent?" predicate. Replaces ad-
136
+ * hoc `if (mode === 'full' || …)` checks across the codebase.
137
+ *
138
+ * @param {{baseDir?: string, budgetPath?: string}} [opts]
139
+ * @returns {boolean}
140
+ */
141
+ function isBanditEnabled(opts = {}) {
142
+ return caps(opts).bandit;
143
+ }
144
+
145
+ function isHedgeEnabled(opts = {}) {
146
+ return caps(opts).hedge;
147
+ }
148
+
149
+ function isMmrEnabled(opts = {}) {
150
+ return caps(opts).mmr;
151
+ }
152
+
153
+ function isReflectorProposalsEnabled(opts = {}) {
154
+ return caps(opts).reflector_proposals;
155
+ }
156
+
157
+ module.exports = {
158
+ getMode,
159
+ setMode,
160
+ caps,
161
+ isBanditEnabled,
162
+ isHedgeEnabled,
163
+ isMmrEnabled,
164
+ isReflectorProposalsEnabled,
165
+ resolveBudgetPath,
166
+ DEFAULT_BUDGET_PATH,
167
+ DEFAULT_MODE,
168
+ VALID_MODES,
169
+ MODE_CAPS,
170
+ };