@hegemonart/get-design-done 1.23.0 → 1.23.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +50 -0
- package/package.json +1 -1
- package/scripts/lib/adaptive-mode.cjs +170 -0
- package/scripts/lib/bandit-router.cjs +368 -0
- package/scripts/lib/hedge-ensemble.cjs +217 -0
- package/scripts/lib/mmr-rerank.cjs +154 -0
|
@@ -5,14 +5,14 @@
|
|
|
5
5
|
},
|
|
6
6
|
"metadata": {
|
|
7
7
|
"description": "Get Design Done — 5-stage agent-orchestrated design pipeline with 9 connections, handoff-first workflow, bidirectional Figma write-back, 22+ specialized agents, queryable knowledge layer (intel store, dependency analysis, learnings extraction), and a self-improvement loop (reflector, frontmatter + budget feedback, global-skills layer). v1.20.0 ships the SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream, and resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) for rate-limit + 429 + context-overflow recovery. Full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows) and release automation (auto-tag + GitHub Release + release-time smoke test).",
|
|
8
|
-
"version": "1.23.
|
|
8
|
+
"version": "1.23.5"
|
|
9
9
|
},
|
|
10
10
|
"plugins": [
|
|
11
11
|
{
|
|
12
12
|
"name": "get-design-done",
|
|
13
13
|
"source": "./",
|
|
14
14
|
"description": "Agent-orchestrated 5-stage design pipeline: Brief → Explore → Plan → Design → Verify. 22+ specialized agents, 9 connections (Figma, Refero, Preview, Storybook, Chromatic, Figma Writer, Graphify, Pinterest, Claude Design), Claude Design handoff, bidirectional Figma write-back, and a queryable intel store (.design/intel/) for dependency and learnings queries. Standalone commands: style, darkmode, compare, figma-write, graphify, handoff, analyze-dependencies, skill-manifest, extract-learnings. Embeds NNG heuristics, WCAG thresholds, typographic systems, motion framework, and anti-pattern catalog. Ships with a full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows) and release automation. Optimization layer (v1.0.4.1, retroactive): gdd-router + gdd-cache-manager skills, PreToolUse budget-enforcer hook, tier-aware agent frontmatter, lazy checker gates, streaming synthesizer, /gdd:warm-cache + /gdd:optimize commands, and cost telemetry at .design/telemetry/costs.jsonl — targeting 50-70% per-task token-cost reduction with no quality-floor regression. v1.20.0 SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream at .design/telemetry/events.jsonl, resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) with rate-limit + 429 + context-overflow recovery, and TypeScript toolchain.",
|
|
15
|
-
"version": "1.23.
|
|
15
|
+
"version": "1.23.5",
|
|
16
16
|
"author": {
|
|
17
17
|
"name": "hegemonart"
|
|
18
18
|
},
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "get-design-done",
|
|
3
3
|
"short_name": "gdd",
|
|
4
|
-
"version": "1.23.
|
|
4
|
+
"version": "1.23.5",
|
|
5
5
|
"description": "Agent-orchestrated 5-stage design pipeline: Brief → Explore → Plan → Design → Verify. 22+ specialized agents, 9 connections (Figma, Refero, Preview, Storybook, Chromatic, Figma Writer, Graphify, Pinterest, Claude Design), handoff-first workflow via Claude Design bundles, bidirectional Figma write-back (annotations, Code Connect), queryable intel store (`.design/intel/`) for O(1) design surface lookups, and self-improvement loop (reflector agent, frontmatter + budget feedback, global-skills layer at `~/.claude/gdd/global-skills/`). Standalone commands: style, darkmode, compare, figma-write, graphify, handoff, analyze-dependencies, skill-manifest, extract-learnings, reflect, apply-reflections. Embeds NNG heuristics, WCAG thresholds, typographic systems, motion framework, and anti-pattern catalog. Ships with a full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows, lint + schema + frontmatter + stale-ref + shellcheck + gitleaks + injection-scan + blocking size-budget) and release automation (auto-tag + GitHub Release + release-time smoke test). Optimization layer (v1.0.4.1, retroactive): gdd-router + gdd-cache-manager skills, PreToolUse budget-enforcer hook, tier-aware agent frontmatter, lazy checker gates, streaming synthesizer, /gdd:warm-cache + /gdd:optimize commands, and cost telemetry at .design/telemetry/costs.jsonl — targeting 50-70% per-task token-cost reduction with no quality-floor regression. v1.20.0 SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream at .design/telemetry/events.jsonl, resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) with rate-limit + 429 + context-overflow recovery, and TypeScript toolchain.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "hegemonart",
|
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,56 @@ All notable changes to get-design-done are documented here. Versions follow [sem
|
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
+
## [1.23.5] — 2026-04-25
|
|
8
|
+
|
|
9
|
+
Phase 23.5 No-Regret Adaptive Layer milestone — turns the passive Phase 22–23 observability + validation infrastructure into a closed self-tuning loop. Three tightly-scoped no-regret algorithms sharing one feature-flag ladder. Single-user viable via informed Beta-prior bootstrap (no shared telemetry required). Ships as a decimal patch on the v1.23 minor — does NOT shift Phase 24 → v1.24.0.
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- **Bandit router** — `scripts/lib/bandit-router.cjs` implements contextual Thompson sampling over `(agent_type, touches_size_bin) → {haiku, sonnet, opus}`. Per-arm `Beta(α, β)` posterior at `.design/telemetry/posterior.json` (atomic .tmp + rename). Discounted Thompson via per-arm time-decay factor `ρ^days_since_last_use` applied at sample time (default ρ=0.988 → 60-day half-life). Informed bootstrap: each arm starts at `Beta(α, β)` with α weighted toward the historical tier success rate (haiku=0.6, sonnet=0.8, opus=0.85) and `α + β ≈ 10` so 5–10 local samples shift the posterior. Two-stage lexicographic reward: `if !solidify_pass: 0; elif user_undo_in_session: 0; else: 1 − λ · normalize(cost + ε · wall_time)`. API: `pull / update / reset / loadPosterior / savePosterior / computeReward`. Touches-size bins: `tiny <5`, `small 5–15`, `medium 16–50`, `large >50` globs. (Plan 23.5-01)
|
|
14
|
+
|
|
15
|
+
- **Hedge ensemble** — `scripts/lib/hedge-ensemble.cjs` implements parameter-free AdaNormalHedge weighted-majority over verifier + checker agents. Weights persist at `.design/telemetry/hedge-weights.json`. Update rule: `η_i = sqrt(ln(N) / max(1, cumLoss2_i))` per-agent learning rate; `w_i *= exp(-η_i * loss_i)`; renormalise. Vote semantics: weighted sum normalised by the SUM of voting agents' weights — agents in the pool that didn't vote this round don't dilute the verdict. API: `loss / vote / weights / loadWeights / saveWeights`. Default vote threshold 0.5. (Plan 23.5-02)
|
|
16
|
+
|
|
17
|
+
- **MMR re-rank** — `scripts/lib/mmr-rerank.cjs` implements Maximal Marginal Relevance over scored items. Solves the "all 5 surfaced learnings are about the same thing" failure mode in the Phase 14.5 decision-injector. Greedy criterion: `next = argmax_{i ∉ selected} λ * relevance(i) − (1 − λ) * max_sim(i, selected)`. Similarity = Jaccard on word n-grams (default n=2). λ default 0.7. No external deps, no embedding API. API: `rerank / similarity / tokenize / ngrams / jaccard`. (Plan 23.5-03)
|
|
18
|
+
|
|
19
|
+
- **Adaptive-mode feature flag ladder** — `scripts/lib/adaptive-mode.cjs` is the single source of truth for which Phase 23.5 components are active. Three modes:
|
|
20
|
+
- `static` (DEFAULT) — Phase 10.1 behaviour. Static `tier_overrides` applies. No posterior writes / no hedge updates / no MMR.
|
|
21
|
+
- `hedge` — Adds AdaNormalHedge consensus + MMR re-rank. Routing still static (bandit OFF). Safest intro level.
|
|
22
|
+
- `full` — Adds bandit Thompson-sampling routing + reflector confidence-interval proposals. Both posterior + hedge persist.
|
|
23
|
+
Read from `.design/budget.json.adaptive_mode` with safe fallback to `static` on missing/malformed/unknown values. API: `getMode / setMode / caps / isBanditEnabled / isHedgeEnabled / isMmrEnabled / isReflectorProposalsEnabled`. (Plan 23.5-04)
|
|
24
|
+
|
|
25
|
+
### Changed
|
|
26
|
+
|
|
27
|
+
- `tests/semver-compare.test.cjs` `OFF_CADENCE_VERSIONS` gains `1.23.5`.
|
|
28
|
+
- `test-fixture/baselines/phase-20/resilience-primitives.txt` regenerated alphabetically with all four new `.cjs` modules added.
|
|
29
|
+
|
|
30
|
+
### Tests
|
|
31
|
+
|
|
32
|
+
- `tests/bandit-router.test.cjs` — bin partitioning, prior elevation per tier, beta sampling, pull persistence, missing-input throws, update/reward/clamp, reset, decay-toward-prior, all reward branches, load+save, 60-round convergence smoke test (18 tests)
|
|
33
|
+
- `tests/hedge-ensemble.test.cjs` — init uniform, high-loss penalty, normalisation, simple vs weighted vote, boolean=numeric equivalence, custom threshold, empty votes, loss clamp, NaN throw, round-trip (14)
|
|
34
|
+
- `tests/mmr-rerank.test.cjs` — tokenize edges, ngram size+fallback, similarity properties, λ=1 pure relevance, λ=0 pure diversity, textOf/relevanceOf overrides, empty input, non-array throw, k>length truncation, defaults, jaccard guards, canonical "5 D-13 hits" scenario (18)
|
|
35
|
+
- `tests/adaptive-mode.test.cjs` — missing-file fallback, malformed-JSON fallback, unknown-mode quiet fallback, all 3 capability matrices, all 4 predicates, setMode preserves other fields, parent-dir creation, mode rejection, exports, absolute-path support (13)
|
|
36
|
+
- `tests/phase-23.5-baseline.test.cjs` — Phase 23.5 regression baseline (8)
|
|
37
|
+
|
|
38
|
+
Total: 71 new tests. All Phase 20/21/22/23 tests still green.
|
|
39
|
+
|
|
40
|
+
### Reflector integration
|
|
41
|
+
|
|
42
|
+
The Phase 22 code-level reflector reads `posterior.json` + hedge weights at run time (under `adaptive_mode: "full"`). When `stddev(Beta(α, β)) < 0.05` for a single-arm dominant tier, it proposes `tier_overrides` updates via `/gdd:apply-reflections`. Pure-read; never auto-writes. Same proposal pattern as Plan 23-06 touches pattern miner.
|
|
43
|
+
|
|
44
|
+
### Deferred (evidence-gated, per ROADMAP)
|
|
45
|
+
|
|
46
|
+
- Hierarchical shared prior (revisit after 20+ cycles of single-user convergence data)
|
|
47
|
+
- Dense-embedding retrieval (revisit if MMR-only miss-rate exceeds 15%)
|
|
48
|
+
- Offline policy evaluation harness (bootstraps from bandit's stochastic logs once accumulated)
|
|
49
|
+
- Auto changepoint detection (manual `/gdd:bandit-reset --reason "<msg>"` covers v1)
|
|
50
|
+
|
|
51
|
+
### Explicitly out of scope
|
|
52
|
+
|
|
53
|
+
- HDBSCAN auto-crystallization, BOCPD changepoint detection, Personalized PageRank, MinHash/LSH dedup, Borda/Kemeny rank aggregation, submodular greedy — each rejected with rationale in ROADMAP.md.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
7
57
|
## [1.23.0] — 2026-04-25
|
|
8
58
|
|
|
9
59
|
Phase 23 GDD SDK Domain Primitives milestone — lands the highest-leverage code primitives from the ROADMAP "GDD SDK Domain Primitives" entry as typed Node modules with tests. 10 atomic plans (23-01 through 23-10), additive — every Phase 20/21/22 consumer keeps working unchanged. Distribution as separate `@hegemonart/gdd-sdk` npm package and screenshot-capture orchestration are explicitly deferred to follow-up phases.
|
package/package.json
CHANGED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* adaptive-mode.cjs — feature-flag ladder facade for the Phase 23.5
|
|
3
|
+
* no-regret stack (Plan 23.5-04).
|
|
4
|
+
*
|
|
5
|
+
* Three modes, ladder-shaped:
|
|
6
|
+
*
|
|
7
|
+
* "static" — Phase 10.1 behaviour. Static tier_overrides map applies;
|
|
8
|
+
* no posterior writes; no hedge weight updates; no MMR.
|
|
9
|
+
* Default for all installs.
|
|
10
|
+
*
|
|
11
|
+
* "hedge" — Adds AdaNormalHedge consensus thresholding to verifier
|
|
12
|
+
* + checker pools. Routing still static. Safest intro
|
|
13
|
+
* level — bandit routing is NOT enabled, so the model
|
|
14
|
+
* choice for any agent is unchanged.
|
|
15
|
+
*
|
|
16
|
+
* "full" — Adds bandit Thompson-sampling routing on top of hedge.
|
|
17
|
+
* Both posterior + hedge weights persist. Reflector
|
|
18
|
+
* proposals based on confidence intervals enabled.
|
|
19
|
+
*
|
|
20
|
+
* The ladder is read from `.design/budget.json.adaptive_mode`. Fallback
|
|
21
|
+
* default = "static". Unknown values clamp to "static" with a stderr
|
|
22
|
+
* warning (silent if `quiet: true`).
|
|
23
|
+
*
|
|
24
|
+
* This module owns the SINGLE source of truth for "is bandit on / is
|
|
25
|
+
* hedge on" — every consumer (router, hedge, MMR, reflector, the
|
|
26
|
+
* Phase 22 budget-enforcer hook) reads from `getMode(opts)`.
|
|
27
|
+
*
|
|
28
|
+
* No external deps. CommonJS.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
'use strict';
|
|
32
|
+
|
|
33
|
+
const fs = require('node:fs');
|
|
34
|
+
const path = require('node:path');
|
|
35
|
+
|
|
36
|
+
const DEFAULT_BUDGET_PATH = '.design/budget.json';
|
|
37
|
+
const VALID_MODES = Object.freeze(['static', 'hedge', 'full']);
|
|
38
|
+
const DEFAULT_MODE = 'static';
|
|
39
|
+
|
|
40
|
+
/** Capability matrix per mode — consumed by callers as a boolean check. */
|
|
41
|
+
const MODE_CAPS = Object.freeze({
|
|
42
|
+
static: Object.freeze({ bandit: false, hedge: false, mmr: false, reflector_proposals: false }),
|
|
43
|
+
hedge: Object.freeze({ bandit: false, hedge: true, mmr: true, reflector_proposals: false }),
|
|
44
|
+
full: Object.freeze({ bandit: true, hedge: true, mmr: true, reflector_proposals: true }),
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
function resolveBudgetPath(opts = {}) {
|
|
48
|
+
if (opts.budgetPath) {
|
|
49
|
+
return path.isAbsolute(opts.budgetPath)
|
|
50
|
+
? opts.budgetPath
|
|
51
|
+
: path.resolve(opts.baseDir ?? process.cwd(), opts.budgetPath);
|
|
52
|
+
}
|
|
53
|
+
return path.resolve(opts.baseDir ?? process.cwd(), DEFAULT_BUDGET_PATH);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Read the current adaptive_mode from .design/budget.json. Falls back
|
|
58
|
+
* to "static" when the file is absent, malformed, or holds an
|
|
59
|
+
* unrecognised value.
|
|
60
|
+
*
|
|
61
|
+
* @param {{baseDir?: string, budgetPath?: string, quiet?: boolean}} [opts]
|
|
62
|
+
* @returns {'static'|'hedge'|'full'}
|
|
63
|
+
*/
|
|
64
|
+
function getMode(opts = {}) {
|
|
65
|
+
const p = resolveBudgetPath(opts);
|
|
66
|
+
if (!fs.existsSync(p)) return DEFAULT_MODE;
|
|
67
|
+
/** @type {{adaptive_mode?: string}} */
|
|
68
|
+
let cfg;
|
|
69
|
+
try {
|
|
70
|
+
cfg = JSON.parse(fs.readFileSync(p, 'utf8'));
|
|
71
|
+
} catch {
|
|
72
|
+
return DEFAULT_MODE;
|
|
73
|
+
}
|
|
74
|
+
const m = cfg && typeof cfg.adaptive_mode === 'string' ? cfg.adaptive_mode : null;
|
|
75
|
+
if (!m) return DEFAULT_MODE;
|
|
76
|
+
if (!VALID_MODES.includes(m)) {
|
|
77
|
+
if (!opts.quiet) {
|
|
78
|
+
try {
|
|
79
|
+
process.stderr.write(
|
|
80
|
+
`[adaptive-mode] unknown adaptive_mode "${m}" in ${p}; falling back to "static"\n`,
|
|
81
|
+
);
|
|
82
|
+
} catch {
|
|
83
|
+
/* swallow */
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return DEFAULT_MODE;
|
|
87
|
+
}
|
|
88
|
+
return /** @type {'static'|'hedge'|'full'} */ (m);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Convenience: capability matrix for the current mode.
|
|
93
|
+
*
|
|
94
|
+
* @param {{baseDir?: string, budgetPath?: string, quiet?: boolean}} [opts]
|
|
95
|
+
* @returns {{bandit: boolean, hedge: boolean, mmr: boolean, reflector_proposals: boolean}}
|
|
96
|
+
*/
|
|
97
|
+
function caps(opts = {}) {
|
|
98
|
+
return MODE_CAPS[getMode(opts)];
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Set the adaptive_mode on disk. Atomic write (.tmp + rename). Creates
|
|
103
|
+
* the budget.json file if missing — the rest of the budget config
|
|
104
|
+
* defaults to {} so other readers see "no caps configured".
|
|
105
|
+
*
|
|
106
|
+
* @param {'static'|'hedge'|'full'} mode
|
|
107
|
+
* @param {{baseDir?: string, budgetPath?: string}} [opts]
|
|
108
|
+
* @returns {string} absolute path written
|
|
109
|
+
*/
|
|
110
|
+
function setMode(mode, opts = {}) {
|
|
111
|
+
if (!VALID_MODES.includes(mode)) {
|
|
112
|
+
throw new RangeError(
|
|
113
|
+
`adaptive-mode.setMode: mode must be one of [${VALID_MODES.join('|')}], got ${JSON.stringify(mode)}`,
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
const p = resolveBudgetPath(opts);
|
|
117
|
+
/** @type {Record<string, unknown>} */
|
|
118
|
+
let cfg = {};
|
|
119
|
+
if (fs.existsSync(p)) {
|
|
120
|
+
try {
|
|
121
|
+
cfg = JSON.parse(fs.readFileSync(p, 'utf8'));
|
|
122
|
+
} catch {
|
|
123
|
+
cfg = {};
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
cfg.adaptive_mode = mode;
|
|
127
|
+
fs.mkdirSync(path.dirname(p), { recursive: true });
|
|
128
|
+
const tmp = p + '.tmp';
|
|
129
|
+
fs.writeFileSync(tmp, JSON.stringify(cfg, null, 2));
|
|
130
|
+
fs.renameSync(tmp, p);
|
|
131
|
+
return p;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* High-level "should bandit route this agent?" predicate. Replaces ad-
|
|
136
|
+
* hoc `if (mode === 'full' || …)` checks across the codebase.
|
|
137
|
+
*
|
|
138
|
+
* @param {{baseDir?: string, budgetPath?: string}} [opts]
|
|
139
|
+
* @returns {boolean}
|
|
140
|
+
*/
|
|
141
|
+
function isBanditEnabled(opts = {}) {
|
|
142
|
+
return caps(opts).bandit;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function isHedgeEnabled(opts = {}) {
|
|
146
|
+
return caps(opts).hedge;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function isMmrEnabled(opts = {}) {
|
|
150
|
+
return caps(opts).mmr;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function isReflectorProposalsEnabled(opts = {}) {
|
|
154
|
+
return caps(opts).reflector_proposals;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
module.exports = {
|
|
158
|
+
getMode,
|
|
159
|
+
setMode,
|
|
160
|
+
caps,
|
|
161
|
+
isBanditEnabled,
|
|
162
|
+
isHedgeEnabled,
|
|
163
|
+
isMmrEnabled,
|
|
164
|
+
isReflectorProposalsEnabled,
|
|
165
|
+
resolveBudgetPath,
|
|
166
|
+
DEFAULT_BUDGET_PATH,
|
|
167
|
+
DEFAULT_MODE,
|
|
168
|
+
VALID_MODES,
|
|
169
|
+
MODE_CAPS,
|
|
170
|
+
};
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* bandit-router.cjs — contextual Thompson-sampling bandit over
|
|
3
|
+
* (agent_type, touches_size_bin) → {haiku, sonnet, opus} (Plan 23.5-01).
|
|
4
|
+
*
|
|
5
|
+
* Replaces Phase 10.1's static tier_overrides map when the user opts
|
|
6
|
+
* into adaptive_mode = "full". The static map continues to apply when
|
|
7
|
+
* adaptive_mode = "static" (default).
|
|
8
|
+
*
|
|
9
|
+
* Posterior persistence:
|
|
10
|
+
* .design/telemetry/posterior.json
|
|
11
|
+
* { schema_version: '1.0.0',
|
|
12
|
+
* generated_at: ISO,
|
|
13
|
+
* arms: [{agent, bin, tier, alpha, beta, last_used, count}] }
|
|
14
|
+
*
|
|
15
|
+
* Atomic .tmp + rename. Discounted Thompson via per-arm time-decay
|
|
16
|
+
* factor `rho^days_since_last_use` applied at sample time, not stored.
|
|
17
|
+
*
|
|
18
|
+
* Reward computation (D-06): two-stage lexicographic
|
|
19
|
+
* if !solidify_pass: reward = 0
|
|
20
|
+
* elif user_undo_in_session: reward = 0
|
|
21
|
+
* else: reward = 1 - lambda * normalize(cost + epsilon * wall_time)
|
|
22
|
+
*
|
|
23
|
+
* No external deps. CommonJS to match scripts/lib/ siblings.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
'use strict';
|
|
27
|
+
|
|
28
|
+
const fs = require('node:fs');
|
|
29
|
+
const path = require('node:path');
|
|
30
|
+
|
|
31
|
+
const DEFAULT_POSTERIOR_PATH = '.design/telemetry/posterior.json';
|
|
32
|
+
const SCHEMA_VERSION = '1.0.0';
|
|
33
|
+
|
|
34
|
+
// Decay factor — 60-day half-life.
|
|
35
|
+
const DEFAULT_DECAY = 0.988;
|
|
36
|
+
|
|
37
|
+
// Informed prior strengths per tier (D-03). alpha + beta ≈ 10 → 5–10
|
|
38
|
+
// local samples will visibly shift the posterior.
|
|
39
|
+
const TIER_PRIOR = Object.freeze({
|
|
40
|
+
haiku: 0.6,
|
|
41
|
+
sonnet: 0.8,
|
|
42
|
+
opus: 0.85,
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
const PRIOR_STRENGTH = 10;
|
|
46
|
+
const DEFAULT_TIERS = Object.freeze(['haiku', 'sonnet', 'opus']);
|
|
47
|
+
|
|
48
|
+
const DEFAULT_PRIORS = Object.freeze({
|
|
49
|
+
decay: DEFAULT_DECAY,
|
|
50
|
+
strength: PRIOR_STRENGTH,
|
|
51
|
+
tiers: DEFAULT_TIERS,
|
|
52
|
+
perTier: TIER_PRIOR,
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
const TOUCHES_BINS = Object.freeze([
|
|
56
|
+
{ name: 'tiny', max: 4 },
|
|
57
|
+
{ name: 'small', max: 15 },
|
|
58
|
+
{ name: 'medium', max: 50 },
|
|
59
|
+
{ name: 'large', max: Infinity },
|
|
60
|
+
]);
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Resolve a touches-size bin from a glob count.
|
|
64
|
+
* @param {number} globCount
|
|
65
|
+
* @returns {string}
|
|
66
|
+
*/
|
|
67
|
+
function binForGlobCount(globCount) {
|
|
68
|
+
for (const b of TOUCHES_BINS) {
|
|
69
|
+
if (globCount <= b.max) return b.name;
|
|
70
|
+
}
|
|
71
|
+
return 'large';
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Load the posterior file or return a fresh envelope.
|
|
76
|
+
* @param {{baseDir?: string, posteriorPath?: string}} [opts]
|
|
77
|
+
* @returns {{schema_version: string, generated_at: string, arms: object[]}}
|
|
78
|
+
*/
|
|
79
|
+
function loadPosterior(opts = {}) {
|
|
80
|
+
const p = resolvePath(opts);
|
|
81
|
+
if (!fs.existsSync(p)) {
|
|
82
|
+
return { schema_version: SCHEMA_VERSION, generated_at: new Date().toISOString(), arms: [] };
|
|
83
|
+
}
|
|
84
|
+
try {
|
|
85
|
+
const data = JSON.parse(fs.readFileSync(p, 'utf8'));
|
|
86
|
+
if (!Array.isArray(data.arms)) {
|
|
87
|
+
data.arms = [];
|
|
88
|
+
}
|
|
89
|
+
return data;
|
|
90
|
+
} catch {
|
|
91
|
+
return { schema_version: SCHEMA_VERSION, generated_at: new Date().toISOString(), arms: [] };
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function resolvePath(opts = {}) {
|
|
96
|
+
if (opts.posteriorPath) {
|
|
97
|
+
return path.isAbsolute(opts.posteriorPath)
|
|
98
|
+
? opts.posteriorPath
|
|
99
|
+
: path.resolve(opts.baseDir ?? process.cwd(), opts.posteriorPath);
|
|
100
|
+
}
|
|
101
|
+
return path.resolve(opts.baseDir ?? process.cwd(), DEFAULT_POSTERIOR_PATH);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Persist the posterior atomically.
|
|
106
|
+
* @param {object} posterior
|
|
107
|
+
* @param {{baseDir?: string, posteriorPath?: string}} [opts]
|
|
108
|
+
* @returns {string} absolute path written
|
|
109
|
+
*/
|
|
110
|
+
function savePosterior(posterior, opts = {}) {
|
|
111
|
+
const p = resolvePath(opts);
|
|
112
|
+
fs.mkdirSync(path.dirname(p), { recursive: true });
|
|
113
|
+
posterior.generated_at = new Date().toISOString();
|
|
114
|
+
const tmp = p + '.tmp';
|
|
115
|
+
fs.writeFileSync(tmp, JSON.stringify(posterior, null, 2));
|
|
116
|
+
fs.renameSync(tmp, p);
|
|
117
|
+
return p;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Reset the posterior — deletes the file. Next call rebootstraps.
|
|
122
|
+
*
|
|
123
|
+
* @param {{baseDir?: string, posteriorPath?: string, reason?: string}} [opts]
|
|
124
|
+
* @returns {{deleted: boolean, path: string, reason?: string}}
|
|
125
|
+
*/
|
|
126
|
+
function reset(opts = {}) {
|
|
127
|
+
const p = resolvePath(opts);
|
|
128
|
+
const existed = fs.existsSync(p);
|
|
129
|
+
if (existed) fs.unlinkSync(p);
|
|
130
|
+
return { deleted: existed, path: p, reason: opts.reason };
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function priorFor(tier, strength) {
|
|
134
|
+
const prior = TIER_PRIOR[tier];
|
|
135
|
+
if (prior === undefined) {
|
|
136
|
+
return { alpha: strength / 2, beta: strength / 2 };
|
|
137
|
+
}
|
|
138
|
+
return {
|
|
139
|
+
alpha: 2 + prior * (strength - 4),
|
|
140
|
+
beta: 2 + (1 - prior) * (strength - 4),
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function findArm(arms, agent, bin, tier) {
|
|
145
|
+
return arms.find((a) => a.agent === agent && a.bin === bin && a.tier === tier);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function ensureArm(posterior, agent, bin, tier, strength) {
|
|
149
|
+
let arm = findArm(posterior.arms, agent, bin, tier);
|
|
150
|
+
if (arm) return arm;
|
|
151
|
+
const { alpha, beta } = priorFor(tier, strength);
|
|
152
|
+
arm = {
|
|
153
|
+
agent,
|
|
154
|
+
bin,
|
|
155
|
+
tier,
|
|
156
|
+
alpha,
|
|
157
|
+
beta,
|
|
158
|
+
last_used: null,
|
|
159
|
+
count: 0,
|
|
160
|
+
};
|
|
161
|
+
posterior.arms.push(arm);
|
|
162
|
+
return arm;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Sample from a Beta(alpha, beta) distribution via the gamma-ratio
|
|
167
|
+
* trick: X = G(alpha, 1) / (G(alpha, 1) + G(beta, 1)).
|
|
168
|
+
*
|
|
169
|
+
* Gamma(k, 1) sampled via Marsaglia-Tsang (k>=1) or
|
|
170
|
+
* Ahrens-Dieter (k<1). For our priors alpha/beta ∈ [2, ~10] so the
|
|
171
|
+
* k>=1 branch dominates.
|
|
172
|
+
*
|
|
173
|
+
* @param {number} alpha
|
|
174
|
+
* @param {number} beta
|
|
175
|
+
* @returns {number}
|
|
176
|
+
*/
|
|
177
|
+
function sampleBeta(alpha, beta) {
|
|
178
|
+
if (alpha <= 0 || beta <= 0) return 0.5;
|
|
179
|
+
const x = sampleGamma(alpha);
|
|
180
|
+
const y = sampleGamma(beta);
|
|
181
|
+
if (x + y === 0) return 0.5;
|
|
182
|
+
return x / (x + y);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Math.random() is intentional here. Bandit sampling needs uniform
|
|
186
|
+
// noise, not cryptographic randomness — using crypto + arithmetic is
|
|
187
|
+
// what CodeQL js/biased-cryptographic-random flags. Math.random is
|
|
188
|
+
// uniform-enough for Thompson sampling; security is not a concern.
|
|
189
|
+
function randn() {
|
|
190
|
+
const u1 = Math.random() || 1e-12; // avoid log(0)
|
|
191
|
+
const u2 = Math.random();
|
|
192
|
+
return Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function rand01() {
|
|
196
|
+
return Math.random();
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function sampleGamma(k) {
|
|
200
|
+
if (k < 1) {
|
|
201
|
+
const u = rand01();
|
|
202
|
+
return sampleGamma(k + 1) * Math.pow(u, 1 / k);
|
|
203
|
+
}
|
|
204
|
+
const d = k - 1 / 3;
|
|
205
|
+
const c = 1 / Math.sqrt(9 * d);
|
|
206
|
+
// Marsaglia-Tsang.
|
|
207
|
+
// Loop until accepted; bounded iterations for safety.
|
|
208
|
+
for (let i = 0; i < 1000; i++) {
|
|
209
|
+
const x = randn();
|
|
210
|
+
const v = Math.pow(1 + c * x, 3);
|
|
211
|
+
if (v <= 0) continue;
|
|
212
|
+
const u = rand01();
|
|
213
|
+
if (u < 1 - 0.0331 * Math.pow(x, 4)) return d * v;
|
|
214
|
+
if (Math.log(u) < 0.5 * x * x + d * (1 - v + Math.log(v))) return d * v;
|
|
215
|
+
}
|
|
216
|
+
return d; // fallback to mean
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Apply discounted decay to an arm in place. Returns the (alpha, beta)
|
|
221
|
+
* after decay — does NOT persist.
|
|
222
|
+
*
|
|
223
|
+
* @param {object} arm
|
|
224
|
+
* @param {{decay?: number, now?: Date}} [opts]
|
|
225
|
+
* @returns {{alpha: number, beta: number}}
|
|
226
|
+
*/
|
|
227
|
+
function decayArm(arm, opts = {}) {
|
|
228
|
+
const decay = opts.decay ?? DEFAULT_DECAY;
|
|
229
|
+
const now = opts.now ?? new Date();
|
|
230
|
+
if (!arm.last_used) return { alpha: arm.alpha, beta: arm.beta };
|
|
231
|
+
const lastDate = new Date(arm.last_used);
|
|
232
|
+
const days = Math.max(0, (now.getTime() - lastDate.getTime()) / 86_400_000);
|
|
233
|
+
const factor = Math.pow(decay, days);
|
|
234
|
+
// Decay shrinks both α and β toward the prior. We never go below the
|
|
235
|
+
// initial prior strength — caller can rebuild a fresh prior via reset().
|
|
236
|
+
const { alpha: pa, beta: pb } = priorFor(arm.tier, opts.strength ?? PRIOR_STRENGTH);
|
|
237
|
+
return {
|
|
238
|
+
alpha: pa + factor * Math.max(0, arm.alpha - pa),
|
|
239
|
+
beta: pb + factor * Math.max(0, arm.beta - pb),
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Pull an arm — sample each tier's Beta posterior (with decay) and
|
|
245
|
+
* pick the argmax. Persists the chosen arm's `last_used` + `count`
|
|
246
|
+
* counters. Bandit pull does NOT update the success/fail counters —
|
|
247
|
+
* that happens in `update()` once the outcome is known.
|
|
248
|
+
*
|
|
249
|
+
* @param {{agent: string, bin: string, tiers?: string[], baseDir?: string, posteriorPath?: string, decay?: number, strength?: number, now?: Date}} input
|
|
250
|
+
* @returns {{tier: string, samples: Record<string, number>, posteriorPath: string}}
|
|
251
|
+
*/
|
|
252
|
+
function pull(input) {
|
|
253
|
+
if (!input || typeof input.agent !== 'string' || input.agent.length === 0) {
|
|
254
|
+
throw new TypeError('bandit-router.pull: agent (string) required');
|
|
255
|
+
}
|
|
256
|
+
if (typeof input.bin !== 'string' || input.bin.length === 0) {
|
|
257
|
+
throw new TypeError('bandit-router.pull: bin (string) required');
|
|
258
|
+
}
|
|
259
|
+
const tiers = input.tiers ?? DEFAULT_TIERS;
|
|
260
|
+
const strength = input.strength ?? PRIOR_STRENGTH;
|
|
261
|
+
const now = input.now ?? new Date();
|
|
262
|
+
|
|
263
|
+
const posterior = loadPosterior(input);
|
|
264
|
+
/** @type {Record<string, number>} */
|
|
265
|
+
const samples = {};
|
|
266
|
+
let bestTier = tiers[0];
|
|
267
|
+
let bestSample = -1;
|
|
268
|
+
for (const tier of tiers) {
|
|
269
|
+
const arm = ensureArm(posterior, input.agent, input.bin, tier, strength);
|
|
270
|
+
const decayed = decayArm(arm, { decay: input.decay, now, strength });
|
|
271
|
+
const s = sampleBeta(decayed.alpha, decayed.beta);
|
|
272
|
+
samples[tier] = s;
|
|
273
|
+
if (s > bestSample) {
|
|
274
|
+
bestSample = s;
|
|
275
|
+
bestTier = tier;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
// Bump counters on the chosen arm.
|
|
279
|
+
const chosen = ensureArm(posterior, input.agent, input.bin, bestTier, strength);
|
|
280
|
+
chosen.last_used = now.toISOString();
|
|
281
|
+
chosen.count += 1;
|
|
282
|
+
const written = savePosterior(posterior, input);
|
|
283
|
+
return { tier: bestTier, samples, posteriorPath: written };
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Update the posterior with a reward signal. Reward is applied as a
|
|
288
|
+
* Bernoulli observation: success → α += reward, β += (1 - reward).
|
|
289
|
+
*
|
|
290
|
+
* @param {{agent: string, bin: string, tier: string, reward: number, baseDir?: string, posteriorPath?: string, strength?: number}} input
|
|
291
|
+
* @returns {{alpha: number, beta: number, posteriorPath: string}}
|
|
292
|
+
*/
|
|
293
|
+
function update(input) {
|
|
294
|
+
if (!input) throw new TypeError('bandit-router.update: input required');
|
|
295
|
+
for (const k of ['agent', 'bin', 'tier']) {
|
|
296
|
+
if (typeof input[k] !== 'string' || input[k].length === 0) {
|
|
297
|
+
throw new TypeError(`bandit-router.update: ${k} (string) required`);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
if (typeof input.reward !== 'number' || Number.isNaN(input.reward)) {
|
|
301
|
+
throw new TypeError('bandit-router.update: reward (number) required');
|
|
302
|
+
}
|
|
303
|
+
// Reward must be in [0, 1].
|
|
304
|
+
const r = Math.min(1, Math.max(0, input.reward));
|
|
305
|
+
const posterior = loadPosterior(input);
|
|
306
|
+
const arm = ensureArm(posterior, input.agent, input.bin, input.tier, input.strength ?? PRIOR_STRENGTH);
|
|
307
|
+
arm.alpha += r;
|
|
308
|
+
arm.beta += 1 - r;
|
|
309
|
+
const p = savePosterior(posterior, input);
|
|
310
|
+
return { alpha: arm.alpha, beta: arm.beta, posteriorPath: p };
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Two-stage lexicographic reward (D-06).
|
|
315
|
+
*
|
|
316
|
+
* if !solidify_pass: 0
|
|
317
|
+
* elif user_undo_in_session: 0
|
|
318
|
+
* else: 1 - lambda * normalize(cost_usd + epsilon * wall_time_ms / 1000)
|
|
319
|
+
*
|
|
320
|
+
* Cost is normalised via the supplied `costNormalizer` (defaults to
|
|
321
|
+
* mapping [0, 5 USD] → [0, 1], capped at 1).
|
|
322
|
+
*
|
|
323
|
+
* @param {{
|
|
324
|
+
* solidify_pass: boolean,
|
|
325
|
+
* user_undo_in_session?: boolean,
|
|
326
|
+
* cost_usd?: number,
|
|
327
|
+
* wall_time_ms?: number,
|
|
328
|
+
* lambda?: number,
|
|
329
|
+
* epsilon?: number,
|
|
330
|
+
* costNormalizer?: (n: number) => number,
|
|
331
|
+
* }} input
|
|
332
|
+
* @returns {number} reward in [0, 1]
|
|
333
|
+
*/
|
|
334
|
+
function computeReward(input) {
|
|
335
|
+
if (!input || typeof input !== 'object') return 0;
|
|
336
|
+
if (!input.solidify_pass) return 0;
|
|
337
|
+
if (input.user_undo_in_session === true) return 0;
|
|
338
|
+
const lambda = typeof input.lambda === 'number' ? input.lambda : 0.3;
|
|
339
|
+
const epsilon = typeof input.epsilon === 'number' ? input.epsilon : 0.05;
|
|
340
|
+
const norm =
|
|
341
|
+
typeof input.costNormalizer === 'function'
|
|
342
|
+
? input.costNormalizer
|
|
343
|
+
: (n) => Math.min(1, Math.max(0, n / 5));
|
|
344
|
+
const wall = (typeof input.wall_time_ms === 'number' ? input.wall_time_ms : 0) / 1000;
|
|
345
|
+
const raw = (typeof input.cost_usd === 'number' ? input.cost_usd : 0) + epsilon * wall;
|
|
346
|
+
const reward = 1 - lambda * norm(raw);
|
|
347
|
+
return Math.min(1, Math.max(0, reward));
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
module.exports = {
|
|
351
|
+
pull,
|
|
352
|
+
update,
|
|
353
|
+
reset,
|
|
354
|
+
loadPosterior,
|
|
355
|
+
savePosterior,
|
|
356
|
+
computeReward,
|
|
357
|
+
binForGlobCount,
|
|
358
|
+
decayArm,
|
|
359
|
+
sampleBeta,
|
|
360
|
+
priorFor,
|
|
361
|
+
DEFAULT_PRIORS,
|
|
362
|
+
DEFAULT_TIERS,
|
|
363
|
+
TIER_PRIOR,
|
|
364
|
+
PRIOR_STRENGTH,
|
|
365
|
+
TOUCHES_BINS,
|
|
366
|
+
DEFAULT_POSTERIOR_PATH,
|
|
367
|
+
SCHEMA_VERSION,
|
|
368
|
+
};
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* hedge-ensemble.cjs — AdaNormalHedge weighted-majority over verifier
|
|
3
|
+
* + checker agents (Plan 23.5-02).
|
|
4
|
+
*
|
|
5
|
+
* Parameter-free: no manual learning rate. Weights self-adapt via
|
|
6
|
+
* the AdaNormalHedge regret-bound trick — η is recomputed each round
|
|
7
|
+
* from cumulative loss variance, eliminating the typical "tune η or
|
|
8
|
+
* suffer" tax.
|
|
9
|
+
*
|
|
10
|
+
* Weights persist at `.design/telemetry/hedge-weights.json` (atomic
|
|
11
|
+
* .tmp + rename). Schema:
|
|
12
|
+
* { schema_version: '1.0.0',
|
|
13
|
+
* generated_at: ISO,
|
|
14
|
+
* pools: { <poolId>: { agents: { <agentId>: {weight, cumLoss, cumLoss2, rounds} } } } }
|
|
15
|
+
*
|
|
16
|
+
* Reused by adaptive_mode = "hedge" or "full" — see Plan 23.5-04.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
'use strict';
|
|
20
|
+
|
|
21
|
+
const fs = require('node:fs');
|
|
22
|
+
const path = require('node:path');
|
|
23
|
+
|
|
24
|
+
const DEFAULT_WEIGHTS_PATH = '.design/telemetry/hedge-weights.json';
|
|
25
|
+
const SCHEMA_VERSION = '1.0.0';
|
|
26
|
+
const DEFAULT_VOTE_THRESHOLD = 0.5;
|
|
27
|
+
|
|
28
|
+
function resolvePath(opts = {}) {
|
|
29
|
+
if (opts.weightsPath) {
|
|
30
|
+
return path.isAbsolute(opts.weightsPath)
|
|
31
|
+
? opts.weightsPath
|
|
32
|
+
: path.resolve(opts.baseDir ?? process.cwd(), opts.weightsPath);
|
|
33
|
+
}
|
|
34
|
+
return path.resolve(opts.baseDir ?? process.cwd(), DEFAULT_WEIGHTS_PATH);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* @returns {{schema_version: string, generated_at: string, pools: object}}
|
|
39
|
+
*/
|
|
40
|
+
function loadWeights(opts = {}) {
|
|
41
|
+
const p = resolvePath(opts);
|
|
42
|
+
if (!fs.existsSync(p)) {
|
|
43
|
+
return { schema_version: SCHEMA_VERSION, generated_at: new Date().toISOString(), pools: {} };
|
|
44
|
+
}
|
|
45
|
+
try {
|
|
46
|
+
const data = JSON.parse(fs.readFileSync(p, 'utf8'));
|
|
47
|
+
if (!data.pools || typeof data.pools !== 'object') data.pools = {};
|
|
48
|
+
return data;
|
|
49
|
+
} catch {
|
|
50
|
+
return { schema_version: SCHEMA_VERSION, generated_at: new Date().toISOString(), pools: {} };
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function saveWeights(state, opts = {}) {
|
|
55
|
+
const p = resolvePath(opts);
|
|
56
|
+
fs.mkdirSync(path.dirname(p), { recursive: true });
|
|
57
|
+
state.generated_at = new Date().toISOString();
|
|
58
|
+
const tmp = p + '.tmp';
|
|
59
|
+
fs.writeFileSync(tmp, JSON.stringify(state, null, 2));
|
|
60
|
+
fs.renameSync(tmp, p);
|
|
61
|
+
return p;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function ensurePool(state, poolId) {
|
|
65
|
+
if (!state.pools[poolId]) state.pools[poolId] = { agents: {} };
|
|
66
|
+
return state.pools[poolId];
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function ensureAgent(pool, agentId) {
|
|
70
|
+
if (!pool.agents[agentId]) {
|
|
71
|
+
pool.agents[agentId] = {
|
|
72
|
+
weight: 1, // uniform start; normalised on read
|
|
73
|
+
cumLoss: 0,
|
|
74
|
+
cumLoss2: 0,
|
|
75
|
+
rounds: 0,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
return pool.agents[agentId];
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Apply one round of losses to a pool. losses: Record<agentId, lossInZeroOne>.
|
|
83
|
+
*
|
|
84
|
+
* AdaNormalHedge update (parameter-free):
|
|
85
|
+
* For each agent i:
|
|
86
|
+
* R_i = sum of (mean_loss - loss_i) over rounds (instantaneous regret)
|
|
87
|
+
* C_i = sum of (loss_i - mean_loss)^2 (cumulative loss variance)
|
|
88
|
+
* Set η_i = sqrt(ln(N) / max(1, C_i)) per-agent learning rate.
|
|
89
|
+
* weight_i ∝ Phi(R_i, C_i) where Phi is a positive-only potential.
|
|
90
|
+
*
|
|
91
|
+
* Simplification used here: w_i *= exp(-η * loss_i) with η derived
|
|
92
|
+
* from cumulative variance — gives the same regret bound as full
|
|
93
|
+
* AdaNormalHedge for the binary-loss case we care about (verifier
|
|
94
|
+
* pass/fail). Trade off: slightly less tight bound vs the full
|
|
95
|
+
* potential, but no need to plumb regret tracking everywhere.
|
|
96
|
+
*
|
|
97
|
+
* @param {{poolId: string, losses: Record<string, number>, baseDir?: string, weightsPath?: string, eta?: number}} input
|
|
98
|
+
* @returns {{weights: Record<string, number>, weightsPath: string}}
|
|
99
|
+
*/
|
|
100
|
+
function loss(input) {
|
|
101
|
+
if (!input || typeof input.poolId !== 'string' || input.poolId.length === 0) {
|
|
102
|
+
throw new TypeError('hedge-ensemble.loss: poolId (string) required');
|
|
103
|
+
}
|
|
104
|
+
if (!input.losses || typeof input.losses !== 'object') {
|
|
105
|
+
throw new TypeError('hedge-ensemble.loss: losses (Record<string, number>) required');
|
|
106
|
+
}
|
|
107
|
+
const state = loadWeights(input);
|
|
108
|
+
const pool = ensurePool(state, input.poolId);
|
|
109
|
+
// First, ensure every losing agent exists.
|
|
110
|
+
for (const [agentId, lossVal] of Object.entries(input.losses)) {
|
|
111
|
+
if (typeof lossVal !== 'number' || Number.isNaN(lossVal)) {
|
|
112
|
+
throw new TypeError(`hedge-ensemble.loss: losses.${agentId} must be a number`);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
for (const agentId of Object.keys(input.losses)) {
|
|
116
|
+
ensureAgent(pool, agentId);
|
|
117
|
+
}
|
|
118
|
+
const N = Object.keys(pool.agents).length;
|
|
119
|
+
// Compute mean loss this round (over agents that received a value).
|
|
120
|
+
const lossList = Object.values(input.losses);
|
|
121
|
+
const meanLoss = lossList.length > 0 ? lossList.reduce((a, b) => a + b, 0) / lossList.length : 0;
|
|
122
|
+
// Update each agent's cumulative variance + regret-like signal, then
|
|
123
|
+
// recompute its weight via exp(-η_i * loss_i).
|
|
124
|
+
for (const [agentId, rawLoss] of Object.entries(input.losses)) {
|
|
125
|
+
const lossVal = Math.min(1, Math.max(0, rawLoss));
|
|
126
|
+
const a = pool.agents[agentId];
|
|
127
|
+
const dev = lossVal - meanLoss;
|
|
128
|
+
a.cumLoss += lossVal;
|
|
129
|
+
a.cumLoss2 += dev * dev;
|
|
130
|
+
a.rounds += 1;
|
|
131
|
+
const eta =
|
|
132
|
+
typeof input.eta === 'number'
|
|
133
|
+
? input.eta
|
|
134
|
+
: Math.sqrt(Math.log(Math.max(2, N)) / Math.max(1, a.cumLoss2));
|
|
135
|
+
a.weight *= Math.exp(-eta * lossVal);
|
|
136
|
+
if (!Number.isFinite(a.weight) || a.weight <= 0) a.weight = 1e-9;
|
|
137
|
+
}
|
|
138
|
+
// Renormalize.
|
|
139
|
+
const total = Object.values(pool.agents).reduce((s, x) => s + x.weight, 0) || 1;
|
|
140
|
+
/** @type {Record<string, number>} */
|
|
141
|
+
const out = {};
|
|
142
|
+
for (const agentId of Object.keys(pool.agents)) {
|
|
143
|
+
pool.agents[agentId].weight /= total;
|
|
144
|
+
out[agentId] = pool.agents[agentId].weight;
|
|
145
|
+
}
|
|
146
|
+
const writtenPath = saveWeights(state, input);
|
|
147
|
+
return { weights: out, weightsPath: writtenPath };
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Compute the weighted-majority verdict for a pool given each agent's
|
|
152
|
+
* binary vote (pass=1, fail=0). Vote passes when the weighted sum
|
|
153
|
+
* exceeds threshold (default 0.5).
|
|
154
|
+
*
|
|
155
|
+
* @param {{poolId: string, votes: Record<string, 0|1|boolean>, threshold?: number, baseDir?: string, weightsPath?: string}} input
|
|
156
|
+
* @returns {{passes: boolean, weighted: number, threshold: number, perAgent: Record<string, {weight: number, vote: number}>}}
|
|
157
|
+
*/
|
|
158
|
+
function vote(input) {
|
|
159
|
+
if (!input || typeof input.poolId !== 'string') {
|
|
160
|
+
throw new TypeError('hedge-ensemble.vote: poolId required');
|
|
161
|
+
}
|
|
162
|
+
if (!input.votes || typeof input.votes !== 'object') {
|
|
163
|
+
throw new TypeError('hedge-ensemble.vote: votes required');
|
|
164
|
+
}
|
|
165
|
+
const state = loadWeights(input);
|
|
166
|
+
const pool = ensurePool(state, input.poolId);
|
|
167
|
+
const threshold = typeof input.threshold === 'number' ? input.threshold : DEFAULT_VOTE_THRESHOLD;
|
|
168
|
+
let total = 0;
|
|
169
|
+
/** @type {Record<string, {weight: number, vote: number}>} */
|
|
170
|
+
const perAgent = {};
|
|
171
|
+
let weightSum = 0;
|
|
172
|
+
for (const [agentId, raw] of Object.entries(input.votes)) {
|
|
173
|
+
const v = raw === true || raw === 1 ? 1 : 0;
|
|
174
|
+
const a = ensureAgent(pool, agentId);
|
|
175
|
+
perAgent[agentId] = { weight: a.weight, vote: v };
|
|
176
|
+
total += a.weight * v;
|
|
177
|
+
weightSum += a.weight;
|
|
178
|
+
}
|
|
179
|
+
// Normalise the weighted sum against the SUM of voting agents'
|
|
180
|
+
// weights — agents in the pool that didn't vote this round don't
|
|
181
|
+
// dilute the result.
|
|
182
|
+
const weighted = weightSum > 0 ? total / weightSum : 0;
|
|
183
|
+
return { passes: weighted >= threshold, weighted, threshold, perAgent };
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Read current weights for a pool, normalised over the pool's agents.
|
|
188
|
+
*
|
|
189
|
+
* @param {{poolId: string, baseDir?: string, weightsPath?: string}} input
|
|
190
|
+
* @returns {Record<string, number>}
|
|
191
|
+
*/
|
|
192
|
+
function weights(input) {
|
|
193
|
+
if (!input || typeof input.poolId !== 'string') {
|
|
194
|
+
throw new TypeError('hedge-ensemble.weights: poolId required');
|
|
195
|
+
}
|
|
196
|
+
const state = loadWeights(input);
|
|
197
|
+
const pool = state.pools[input.poolId];
|
|
198
|
+
if (!pool) return {};
|
|
199
|
+
const total = Object.values(pool.agents).reduce((s, x) => s + x.weight, 0);
|
|
200
|
+
/** @type {Record<string, number>} */
|
|
201
|
+
const out = {};
|
|
202
|
+
for (const [k, v] of Object.entries(pool.agents)) {
|
|
203
|
+
out[k] = total > 0 ? v.weight / total : 0;
|
|
204
|
+
}
|
|
205
|
+
return out;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
module.exports = {
|
|
209
|
+
loss,
|
|
210
|
+
vote,
|
|
211
|
+
weights,
|
|
212
|
+
loadWeights,
|
|
213
|
+
saveWeights,
|
|
214
|
+
DEFAULT_VOTE_THRESHOLD,
|
|
215
|
+
DEFAULT_WEIGHTS_PATH,
|
|
216
|
+
SCHEMA_VERSION,
|
|
217
|
+
};
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* mmr-rerank.cjs — Maximal Marginal Relevance post-pass on top-K
|
|
3
|
+
* (Plan 23.5-03).
|
|
4
|
+
*
|
|
5
|
+
* Solves the "all 5 surfaced learnings are about the same thing"
|
|
6
|
+
* failure mode in the Phase 14.5 decision-injector. Greedy selection
|
|
7
|
+
* with the standard MMR criterion:
|
|
8
|
+
*
|
|
9
|
+
* nextItem = argmax_{i ∉ selected} λ * relevance(i) − (1 − λ) * max_sim(i, selected)
|
|
10
|
+
*
|
|
11
|
+
* Similarity is token-overlap (Jaccard on case-folded word n-grams,
|
|
12
|
+
* default n=2). No external deps, no embedding API.
|
|
13
|
+
*
|
|
14
|
+
* Pure helper — caller supplies the candidates and a relevance score
|
|
15
|
+
* already computed by upstream (e.g. grep hit count, BM25, or the
|
|
16
|
+
* decision-injector's existing rank function). MMR re-ranks ONLY.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
'use strict';
|
|
20
|
+
|
|
21
|
+
const DEFAULT_LAMBDA = 0.7;
|
|
22
|
+
const DEFAULT_NGRAM = 2;
|
|
23
|
+
const TOKEN_RE = /[\p{L}\p{N}_-]+/gu;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Tokenize a string into case-folded alphanumeric+underscore+dash runs.
|
|
27
|
+
*
|
|
28
|
+
* @param {string} text
|
|
29
|
+
* @returns {string[]}
|
|
30
|
+
*/
|
|
31
|
+
function tokenize(text) {
|
|
32
|
+
if (typeof text !== 'string' || text.length === 0) return [];
|
|
33
|
+
const matches = text.toLowerCase().match(TOKEN_RE);
|
|
34
|
+
return matches ? matches : [];
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Build a Set of word n-grams from a string.
|
|
39
|
+
*
|
|
40
|
+
* @param {string} text
|
|
41
|
+
* @param {number} n
|
|
42
|
+
* @returns {Set<string>}
|
|
43
|
+
*/
|
|
44
|
+
function ngrams(text, n) {
|
|
45
|
+
const toks = tokenize(text);
|
|
46
|
+
if (toks.length < n) return new Set(toks);
|
|
47
|
+
const out = new Set();
|
|
48
|
+
for (let i = 0; i <= toks.length - n; i++) {
|
|
49
|
+
out.add(toks.slice(i, i + n).join(' '));
|
|
50
|
+
}
|
|
51
|
+
return out;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Jaccard similarity between two strings on word n-grams.
|
|
56
|
+
*
|
|
57
|
+
* @param {string} a
|
|
58
|
+
* @param {string} b
|
|
59
|
+
* @param {number} [n]
|
|
60
|
+
* @returns {number} 0..1
|
|
61
|
+
*/
|
|
62
|
+
function similarity(a, b, n = DEFAULT_NGRAM) {
|
|
63
|
+
const A = ngrams(a, n);
|
|
64
|
+
const B = ngrams(b, n);
|
|
65
|
+
if (A.size === 0 || B.size === 0) return 0;
|
|
66
|
+
let inter = 0;
|
|
67
|
+
for (const g of A) if (B.has(g)) inter += 1;
|
|
68
|
+
const union = A.size + B.size - inter;
|
|
69
|
+
return union > 0 ? inter / union : 0;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Re-rank an array of items using the MMR criterion.
|
|
74
|
+
*
|
|
75
|
+
* @param {Array<{text: string, relevance?: number}>} items
|
|
76
|
+
* @param {{lambda?: number, k?: number, ngram?: number, textOf?: (item: object) => string, relevanceOf?: (item: object) => number}} [opts]
|
|
77
|
+
* @returns {Array<object>} subset of input in MMR-selected order
|
|
78
|
+
*/
|
|
79
|
+
function rerank(items, opts = {}) {
|
|
80
|
+
if (!Array.isArray(items)) {
|
|
81
|
+
throw new TypeError('mmr-rerank.rerank: items must be an array');
|
|
82
|
+
}
|
|
83
|
+
if (items.length === 0) return [];
|
|
84
|
+
const lambda = typeof opts.lambda === 'number' ? opts.lambda : DEFAULT_LAMBDA;
|
|
85
|
+
const ngram = typeof opts.ngram === 'number' ? opts.ngram : DEFAULT_NGRAM;
|
|
86
|
+
const k = typeof opts.k === 'number' && opts.k > 0 ? Math.min(opts.k, items.length) : items.length;
|
|
87
|
+
const textOf =
|
|
88
|
+
typeof opts.textOf === 'function'
|
|
89
|
+
? opts.textOf
|
|
90
|
+
: (it) => (typeof it === 'string' ? it : (it && typeof it.text === 'string' ? it.text : ''));
|
|
91
|
+
const relOf =
|
|
92
|
+
typeof opts.relevanceOf === 'function'
|
|
93
|
+
? opts.relevanceOf
|
|
94
|
+
: (it) => {
|
|
95
|
+
if (it && typeof it.relevance === 'number') return it.relevance;
|
|
96
|
+
if (it && typeof it.score === 'number') return it.score;
|
|
97
|
+
return 1;
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
// Pre-tokenize candidates.
|
|
101
|
+
const grams = items.map((it) => ngrams(textOf(it), ngram));
|
|
102
|
+
const relevance = items.map((it) => relOf(it));
|
|
103
|
+
const remaining = items.map((_, i) => i);
|
|
104
|
+
/** @type {number[]} */
|
|
105
|
+
const selected = [];
|
|
106
|
+
|
|
107
|
+
while (selected.length < k && remaining.length > 0) {
|
|
108
|
+
let bestIdx = -1;
|
|
109
|
+
let bestScore = -Infinity;
|
|
110
|
+
for (const i of remaining) {
|
|
111
|
+
let maxSim = 0;
|
|
112
|
+
for (const j of selected) {
|
|
113
|
+
const sim = jaccard(grams[i], grams[j]);
|
|
114
|
+
if (sim > maxSim) maxSim = sim;
|
|
115
|
+
}
|
|
116
|
+
const score = lambda * relevance[i] - (1 - lambda) * maxSim;
|
|
117
|
+
if (score > bestScore) {
|
|
118
|
+
bestScore = score;
|
|
119
|
+
bestIdx = i;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
if (bestIdx === -1) break;
|
|
123
|
+
selected.push(bestIdx);
|
|
124
|
+
const pos = remaining.indexOf(bestIdx);
|
|
125
|
+
if (pos !== -1) remaining.splice(pos, 1);
|
|
126
|
+
}
|
|
127
|
+
return selected.map((i) => items[i]);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Jaccard between two pre-built ngram sets. Faster than calling
|
|
132
|
+
* `similarity()` from the rerank loop.
|
|
133
|
+
*
|
|
134
|
+
* @param {Set<string>} A
|
|
135
|
+
* @param {Set<string>} B
|
|
136
|
+
* @returns {number}
|
|
137
|
+
*/
|
|
138
|
+
function jaccard(A, B) {
|
|
139
|
+
if (A.size === 0 || B.size === 0) return 0;
|
|
140
|
+
let inter = 0;
|
|
141
|
+
for (const g of A) if (B.has(g)) inter += 1;
|
|
142
|
+
const union = A.size + B.size - inter;
|
|
143
|
+
return union > 0 ? inter / union : 0;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
module.exports = {
|
|
147
|
+
rerank,
|
|
148
|
+
similarity,
|
|
149
|
+
tokenize,
|
|
150
|
+
ngrams,
|
|
151
|
+
jaccard,
|
|
152
|
+
DEFAULT_LAMBDA,
|
|
153
|
+
DEFAULT_NGRAM,
|
|
154
|
+
};
|