@hegemonart/get-design-done 1.23.0 → 1.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +97 -0
- package/README.md +36 -7
- package/package.json +2 -1
- package/scripts/install.cjs +164 -116
- package/scripts/lib/adaptive-mode.cjs +170 -0
- package/scripts/lib/bandit-router.cjs +368 -0
- package/scripts/lib/hedge-ensemble.cjs +217 -0
- package/scripts/lib/install/config-dir.cjs +55 -0
- package/scripts/lib/install/installer.cjs +244 -0
- package/scripts/lib/install/interactive.cjs +142 -0
- package/scripts/lib/install/merge.cjs +103 -0
- package/scripts/lib/install/runtimes.cjs +172 -0
- package/scripts/lib/mmr-rerank.cjs +154 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* adaptive-mode.cjs — feature-flag ladder facade for the Phase 23.5
|
|
3
|
+
* no-regret stack (Plan 23.5-04).
|
|
4
|
+
*
|
|
5
|
+
* Three modes, ladder-shaped:
|
|
6
|
+
*
|
|
7
|
+
* "static" — Phase 10.1 behaviour. Static tier_overrides map applies;
|
|
8
|
+
* no posterior writes; no hedge weight updates; no MMR.
|
|
9
|
+
* Default for all installs.
|
|
10
|
+
*
|
|
11
|
+
* "hedge" — Adds AdaNormalHedge consensus thresholding to verifier
|
|
12
|
+
* + checker pools. Routing still static. Safest intro
|
|
13
|
+
* level — bandit routing is NOT enabled, so the model
|
|
14
|
+
* choice for any agent is unchanged.
|
|
15
|
+
*
|
|
16
|
+
* "full" — Adds bandit Thompson-sampling routing on top of hedge.
|
|
17
|
+
* Both posterior + hedge weights persist. Reflector
|
|
18
|
+
* proposals based on confidence intervals enabled.
|
|
19
|
+
*
|
|
20
|
+
* The ladder is read from `.design/budget.json.adaptive_mode`. Fallback
|
|
21
|
+
* default = "static". Unknown values clamp to "static" with a stderr
|
|
22
|
+
* warning (silent if `quiet: true`).
|
|
23
|
+
*
|
|
24
|
+
* This module owns the SINGLE source of truth for "is bandit on / is
|
|
25
|
+
* hedge on" — every consumer (router, hedge, MMR, reflector, the
|
|
26
|
+
* Phase 22 budget-enforcer hook) reads from `getMode(opts)`.
|
|
27
|
+
*
|
|
28
|
+
* No external deps. CommonJS.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
'use strict';
|
|
32
|
+
|
|
33
|
+
const fs = require('node:fs');
|
|
34
|
+
const path = require('node:path');
|
|
35
|
+
|
|
36
|
+
const DEFAULT_BUDGET_PATH = '.design/budget.json';
|
|
37
|
+
const VALID_MODES = Object.freeze(['static', 'hedge', 'full']);
|
|
38
|
+
const DEFAULT_MODE = 'static';
|
|
39
|
+
|
|
40
|
+
/** Capability matrix per mode — consumed by callers as a boolean check. */
|
|
41
|
+
const MODE_CAPS = Object.freeze({
|
|
42
|
+
static: Object.freeze({ bandit: false, hedge: false, mmr: false, reflector_proposals: false }),
|
|
43
|
+
hedge: Object.freeze({ bandit: false, hedge: true, mmr: true, reflector_proposals: false }),
|
|
44
|
+
full: Object.freeze({ bandit: true, hedge: true, mmr: true, reflector_proposals: true }),
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
function resolveBudgetPath(opts = {}) {
|
|
48
|
+
if (opts.budgetPath) {
|
|
49
|
+
return path.isAbsolute(opts.budgetPath)
|
|
50
|
+
? opts.budgetPath
|
|
51
|
+
: path.resolve(opts.baseDir ?? process.cwd(), opts.budgetPath);
|
|
52
|
+
}
|
|
53
|
+
return path.resolve(opts.baseDir ?? process.cwd(), DEFAULT_BUDGET_PATH);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Read the current adaptive_mode from .design/budget.json. Falls back
|
|
58
|
+
* to "static" when the file is absent, malformed, or holds an
|
|
59
|
+
* unrecognised value.
|
|
60
|
+
*
|
|
61
|
+
* @param {{baseDir?: string, budgetPath?: string, quiet?: boolean}} [opts]
|
|
62
|
+
* @returns {'static'|'hedge'|'full'}
|
|
63
|
+
*/
|
|
64
|
+
function getMode(opts = {}) {
|
|
65
|
+
const p = resolveBudgetPath(opts);
|
|
66
|
+
if (!fs.existsSync(p)) return DEFAULT_MODE;
|
|
67
|
+
/** @type {{adaptive_mode?: string}} */
|
|
68
|
+
let cfg;
|
|
69
|
+
try {
|
|
70
|
+
cfg = JSON.parse(fs.readFileSync(p, 'utf8'));
|
|
71
|
+
} catch {
|
|
72
|
+
return DEFAULT_MODE;
|
|
73
|
+
}
|
|
74
|
+
const m = cfg && typeof cfg.adaptive_mode === 'string' ? cfg.adaptive_mode : null;
|
|
75
|
+
if (!m) return DEFAULT_MODE;
|
|
76
|
+
if (!VALID_MODES.includes(m)) {
|
|
77
|
+
if (!opts.quiet) {
|
|
78
|
+
try {
|
|
79
|
+
process.stderr.write(
|
|
80
|
+
`[adaptive-mode] unknown adaptive_mode "${m}" in ${p}; falling back to "static"\n`,
|
|
81
|
+
);
|
|
82
|
+
} catch {
|
|
83
|
+
/* swallow */
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return DEFAULT_MODE;
|
|
87
|
+
}
|
|
88
|
+
return /** @type {'static'|'hedge'|'full'} */ (m);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Convenience: capability matrix for the current mode.
|
|
93
|
+
*
|
|
94
|
+
* @param {{baseDir?: string, budgetPath?: string, quiet?: boolean}} [opts]
|
|
95
|
+
* @returns {{bandit: boolean, hedge: boolean, mmr: boolean, reflector_proposals: boolean}}
|
|
96
|
+
*/
|
|
97
|
+
function caps(opts = {}) {
|
|
98
|
+
return MODE_CAPS[getMode(opts)];
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Set the adaptive_mode on disk. Atomic write (.tmp + rename). Creates
|
|
103
|
+
* the budget.json file if missing — the rest of the budget config
|
|
104
|
+
* defaults to {} so other readers see "no caps configured".
|
|
105
|
+
*
|
|
106
|
+
* @param {'static'|'hedge'|'full'} mode
|
|
107
|
+
* @param {{baseDir?: string, budgetPath?: string}} [opts]
|
|
108
|
+
* @returns {string} absolute path written
|
|
109
|
+
*/
|
|
110
|
+
function setMode(mode, opts = {}) {
|
|
111
|
+
if (!VALID_MODES.includes(mode)) {
|
|
112
|
+
throw new RangeError(
|
|
113
|
+
`adaptive-mode.setMode: mode must be one of [${VALID_MODES.join('|')}], got ${JSON.stringify(mode)}`,
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
const p = resolveBudgetPath(opts);
|
|
117
|
+
/** @type {Record<string, unknown>} */
|
|
118
|
+
let cfg = {};
|
|
119
|
+
if (fs.existsSync(p)) {
|
|
120
|
+
try {
|
|
121
|
+
cfg = JSON.parse(fs.readFileSync(p, 'utf8'));
|
|
122
|
+
} catch {
|
|
123
|
+
cfg = {};
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
cfg.adaptive_mode = mode;
|
|
127
|
+
fs.mkdirSync(path.dirname(p), { recursive: true });
|
|
128
|
+
const tmp = p + '.tmp';
|
|
129
|
+
fs.writeFileSync(tmp, JSON.stringify(cfg, null, 2));
|
|
130
|
+
fs.renameSync(tmp, p);
|
|
131
|
+
return p;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* High-level "should bandit route this agent?" predicate. Replaces ad-
|
|
136
|
+
* hoc `if (mode === 'full' || …)` checks across the codebase.
|
|
137
|
+
*
|
|
138
|
+
* @param {{baseDir?: string, budgetPath?: string}} [opts]
|
|
139
|
+
* @returns {boolean}
|
|
140
|
+
*/
|
|
141
|
+
function isBanditEnabled(opts = {}) {
|
|
142
|
+
return caps(opts).bandit;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function isHedgeEnabled(opts = {}) {
|
|
146
|
+
return caps(opts).hedge;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function isMmrEnabled(opts = {}) {
|
|
150
|
+
return caps(opts).mmr;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function isReflectorProposalsEnabled(opts = {}) {
|
|
154
|
+
return caps(opts).reflector_proposals;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
module.exports = {
|
|
158
|
+
getMode,
|
|
159
|
+
setMode,
|
|
160
|
+
caps,
|
|
161
|
+
isBanditEnabled,
|
|
162
|
+
isHedgeEnabled,
|
|
163
|
+
isMmrEnabled,
|
|
164
|
+
isReflectorProposalsEnabled,
|
|
165
|
+
resolveBudgetPath,
|
|
166
|
+
DEFAULT_BUDGET_PATH,
|
|
167
|
+
DEFAULT_MODE,
|
|
168
|
+
VALID_MODES,
|
|
169
|
+
MODE_CAPS,
|
|
170
|
+
};
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* bandit-router.cjs — contextual Thompson-sampling bandit over
|
|
3
|
+
* (agent_type, touches_size_bin) → {haiku, sonnet, opus} (Plan 23.5-01).
|
|
4
|
+
*
|
|
5
|
+
* Replaces Phase 10.1's static tier_overrides map when the user opts
|
|
6
|
+
* into adaptive_mode = "full". The static map continues to apply when
|
|
7
|
+
* adaptive_mode = "static" (default).
|
|
8
|
+
*
|
|
9
|
+
* Posterior persistence:
|
|
10
|
+
* .design/telemetry/posterior.json
|
|
11
|
+
* { schema_version: '1.0.0',
|
|
12
|
+
* generated_at: ISO,
|
|
13
|
+
* arms: [{agent, bin, tier, alpha, beta, last_used, count}] }
|
|
14
|
+
*
|
|
15
|
+
* Atomic .tmp + rename. Discounted Thompson via per-arm time-decay
|
|
16
|
+
* factor `rho^days_since_last_use` applied at sample time, not stored.
|
|
17
|
+
*
|
|
18
|
+
* Reward computation (D-06): two-stage lexicographic
|
|
19
|
+
* if !solidify_pass: reward = 0
|
|
20
|
+
* elif user_undo_in_session: reward = 0
|
|
21
|
+
* else: reward = 1 - lambda * normalize(cost + epsilon * wall_time)
|
|
22
|
+
*
|
|
23
|
+
* No external deps. CommonJS to match scripts/lib/ siblings.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
'use strict';
|
|
27
|
+
|
|
28
|
+
const fs = require('node:fs');
|
|
29
|
+
const path = require('node:path');
|
|
30
|
+
|
|
31
|
+
const DEFAULT_POSTERIOR_PATH = '.design/telemetry/posterior.json';
|
|
32
|
+
const SCHEMA_VERSION = '1.0.0';
|
|
33
|
+
|
|
34
|
+
// Decay factor — 60-day half-life.
|
|
35
|
+
const DEFAULT_DECAY = 0.988;
|
|
36
|
+
|
|
37
|
+
// Informed prior strengths per tier (D-03). alpha + beta ≈ 10 → 5–10
|
|
38
|
+
// local samples will visibly shift the posterior.
|
|
39
|
+
const TIER_PRIOR = Object.freeze({
|
|
40
|
+
haiku: 0.6,
|
|
41
|
+
sonnet: 0.8,
|
|
42
|
+
opus: 0.85,
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
const PRIOR_STRENGTH = 10;
|
|
46
|
+
const DEFAULT_TIERS = Object.freeze(['haiku', 'sonnet', 'opus']);
|
|
47
|
+
|
|
48
|
+
const DEFAULT_PRIORS = Object.freeze({
|
|
49
|
+
decay: DEFAULT_DECAY,
|
|
50
|
+
strength: PRIOR_STRENGTH,
|
|
51
|
+
tiers: DEFAULT_TIERS,
|
|
52
|
+
perTier: TIER_PRIOR,
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
const TOUCHES_BINS = Object.freeze([
|
|
56
|
+
{ name: 'tiny', max: 4 },
|
|
57
|
+
{ name: 'small', max: 15 },
|
|
58
|
+
{ name: 'medium', max: 50 },
|
|
59
|
+
{ name: 'large', max: Infinity },
|
|
60
|
+
]);
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Resolve a touches-size bin from a glob count.
|
|
64
|
+
* @param {number} globCount
|
|
65
|
+
* @returns {string}
|
|
66
|
+
*/
|
|
67
|
+
function binForGlobCount(globCount) {
|
|
68
|
+
for (const b of TOUCHES_BINS) {
|
|
69
|
+
if (globCount <= b.max) return b.name;
|
|
70
|
+
}
|
|
71
|
+
return 'large';
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Load the posterior file or return a fresh envelope.
|
|
76
|
+
* @param {{baseDir?: string, posteriorPath?: string}} [opts]
|
|
77
|
+
* @returns {{schema_version: string, generated_at: string, arms: object[]}}
|
|
78
|
+
*/
|
|
79
|
+
function loadPosterior(opts = {}) {
|
|
80
|
+
const p = resolvePath(opts);
|
|
81
|
+
if (!fs.existsSync(p)) {
|
|
82
|
+
return { schema_version: SCHEMA_VERSION, generated_at: new Date().toISOString(), arms: [] };
|
|
83
|
+
}
|
|
84
|
+
try {
|
|
85
|
+
const data = JSON.parse(fs.readFileSync(p, 'utf8'));
|
|
86
|
+
if (!Array.isArray(data.arms)) {
|
|
87
|
+
data.arms = [];
|
|
88
|
+
}
|
|
89
|
+
return data;
|
|
90
|
+
} catch {
|
|
91
|
+
return { schema_version: SCHEMA_VERSION, generated_at: new Date().toISOString(), arms: [] };
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function resolvePath(opts = {}) {
|
|
96
|
+
if (opts.posteriorPath) {
|
|
97
|
+
return path.isAbsolute(opts.posteriorPath)
|
|
98
|
+
? opts.posteriorPath
|
|
99
|
+
: path.resolve(opts.baseDir ?? process.cwd(), opts.posteriorPath);
|
|
100
|
+
}
|
|
101
|
+
return path.resolve(opts.baseDir ?? process.cwd(), DEFAULT_POSTERIOR_PATH);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Persist the posterior atomically.
|
|
106
|
+
* @param {object} posterior
|
|
107
|
+
* @param {{baseDir?: string, posteriorPath?: string}} [opts]
|
|
108
|
+
* @returns {string} absolute path written
|
|
109
|
+
*/
|
|
110
|
+
function savePosterior(posterior, opts = {}) {
|
|
111
|
+
const p = resolvePath(opts);
|
|
112
|
+
fs.mkdirSync(path.dirname(p), { recursive: true });
|
|
113
|
+
posterior.generated_at = new Date().toISOString();
|
|
114
|
+
const tmp = p + '.tmp';
|
|
115
|
+
fs.writeFileSync(tmp, JSON.stringify(posterior, null, 2));
|
|
116
|
+
fs.renameSync(tmp, p);
|
|
117
|
+
return p;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Reset the posterior — deletes the file. Next call rebootstraps.
|
|
122
|
+
*
|
|
123
|
+
* @param {{baseDir?: string, posteriorPath?: string, reason?: string}} [opts]
|
|
124
|
+
* @returns {{deleted: boolean, path: string, reason?: string}}
|
|
125
|
+
*/
|
|
126
|
+
function reset(opts = {}) {
|
|
127
|
+
const p = resolvePath(opts);
|
|
128
|
+
const existed = fs.existsSync(p);
|
|
129
|
+
if (existed) fs.unlinkSync(p);
|
|
130
|
+
return { deleted: existed, path: p, reason: opts.reason };
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function priorFor(tier, strength) {
|
|
134
|
+
const prior = TIER_PRIOR[tier];
|
|
135
|
+
if (prior === undefined) {
|
|
136
|
+
return { alpha: strength / 2, beta: strength / 2 };
|
|
137
|
+
}
|
|
138
|
+
return {
|
|
139
|
+
alpha: 2 + prior * (strength - 4),
|
|
140
|
+
beta: 2 + (1 - prior) * (strength - 4),
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function findArm(arms, agent, bin, tier) {
|
|
145
|
+
return arms.find((a) => a.agent === agent && a.bin === bin && a.tier === tier);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function ensureArm(posterior, agent, bin, tier, strength) {
|
|
149
|
+
let arm = findArm(posterior.arms, agent, bin, tier);
|
|
150
|
+
if (arm) return arm;
|
|
151
|
+
const { alpha, beta } = priorFor(tier, strength);
|
|
152
|
+
arm = {
|
|
153
|
+
agent,
|
|
154
|
+
bin,
|
|
155
|
+
tier,
|
|
156
|
+
alpha,
|
|
157
|
+
beta,
|
|
158
|
+
last_used: null,
|
|
159
|
+
count: 0,
|
|
160
|
+
};
|
|
161
|
+
posterior.arms.push(arm);
|
|
162
|
+
return arm;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Sample from a Beta(alpha, beta) distribution via the gamma-ratio
|
|
167
|
+
* trick: X = G(alpha, 1) / (G(alpha, 1) + G(beta, 1)).
|
|
168
|
+
*
|
|
169
|
+
* Gamma(k, 1) sampled via Marsaglia-Tsang (k>=1) or
|
|
170
|
+
* Ahrens-Dieter (k<1). For our priors alpha/beta ∈ [2, ~10] so the
|
|
171
|
+
* k>=1 branch dominates.
|
|
172
|
+
*
|
|
173
|
+
* @param {number} alpha
|
|
174
|
+
* @param {number} beta
|
|
175
|
+
* @returns {number}
|
|
176
|
+
*/
|
|
177
|
+
function sampleBeta(alpha, beta) {
|
|
178
|
+
if (alpha <= 0 || beta <= 0) return 0.5;
|
|
179
|
+
const x = sampleGamma(alpha);
|
|
180
|
+
const y = sampleGamma(beta);
|
|
181
|
+
if (x + y === 0) return 0.5;
|
|
182
|
+
return x / (x + y);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Math.random() is intentional here. Bandit sampling needs uniform
|
|
186
|
+
// noise, not cryptographic randomness — using crypto + arithmetic is
|
|
187
|
+
// what CodeQL js/biased-cryptographic-random flags. Math.random is
|
|
188
|
+
// uniform-enough for Thompson sampling; security is not a concern.
|
|
189
|
+
function randn() {
|
|
190
|
+
const u1 = Math.random() || 1e-12; // avoid log(0)
|
|
191
|
+
const u2 = Math.random();
|
|
192
|
+
return Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function rand01() {
|
|
196
|
+
return Math.random();
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function sampleGamma(k) {
|
|
200
|
+
if (k < 1) {
|
|
201
|
+
const u = rand01();
|
|
202
|
+
return sampleGamma(k + 1) * Math.pow(u, 1 / k);
|
|
203
|
+
}
|
|
204
|
+
const d = k - 1 / 3;
|
|
205
|
+
const c = 1 / Math.sqrt(9 * d);
|
|
206
|
+
// Marsaglia-Tsang.
|
|
207
|
+
// Loop until accepted; bounded iterations for safety.
|
|
208
|
+
for (let i = 0; i < 1000; i++) {
|
|
209
|
+
const x = randn();
|
|
210
|
+
const v = Math.pow(1 + c * x, 3);
|
|
211
|
+
if (v <= 0) continue;
|
|
212
|
+
const u = rand01();
|
|
213
|
+
if (u < 1 - 0.0331 * Math.pow(x, 4)) return d * v;
|
|
214
|
+
if (Math.log(u) < 0.5 * x * x + d * (1 - v + Math.log(v))) return d * v;
|
|
215
|
+
}
|
|
216
|
+
return d; // fallback to mean
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Apply discounted decay to an arm in place. Returns the (alpha, beta)
|
|
221
|
+
* after decay — does NOT persist.
|
|
222
|
+
*
|
|
223
|
+
* @param {object} arm
|
|
224
|
+
* @param {{decay?: number, now?: Date}} [opts]
|
|
225
|
+
* @returns {{alpha: number, beta: number}}
|
|
226
|
+
*/
|
|
227
|
+
function decayArm(arm, opts = {}) {
|
|
228
|
+
const decay = opts.decay ?? DEFAULT_DECAY;
|
|
229
|
+
const now = opts.now ?? new Date();
|
|
230
|
+
if (!arm.last_used) return { alpha: arm.alpha, beta: arm.beta };
|
|
231
|
+
const lastDate = new Date(arm.last_used);
|
|
232
|
+
const days = Math.max(0, (now.getTime() - lastDate.getTime()) / 86_400_000);
|
|
233
|
+
const factor = Math.pow(decay, days);
|
|
234
|
+
// Decay shrinks both α and β toward the prior. We never go below the
|
|
235
|
+
// initial prior strength — caller can rebuild a fresh prior via reset().
|
|
236
|
+
const { alpha: pa, beta: pb } = priorFor(arm.tier, opts.strength ?? PRIOR_STRENGTH);
|
|
237
|
+
return {
|
|
238
|
+
alpha: pa + factor * Math.max(0, arm.alpha - pa),
|
|
239
|
+
beta: pb + factor * Math.max(0, arm.beta - pb),
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Pull an arm — sample each tier's Beta posterior (with decay) and
|
|
245
|
+
* pick the argmax. Persists the chosen arm's `last_used` + `count`
|
|
246
|
+
* counters. Bandit pull does NOT update the success/fail counters —
|
|
247
|
+
* that happens in `update()` once the outcome is known.
|
|
248
|
+
*
|
|
249
|
+
* @param {{agent: string, bin: string, tiers?: string[], baseDir?: string, posteriorPath?: string, decay?: number, strength?: number, now?: Date}} input
|
|
250
|
+
* @returns {{tier: string, samples: Record<string, number>, posteriorPath: string}}
|
|
251
|
+
*/
|
|
252
|
+
function pull(input) {
|
|
253
|
+
if (!input || typeof input.agent !== 'string' || input.agent.length === 0) {
|
|
254
|
+
throw new TypeError('bandit-router.pull: agent (string) required');
|
|
255
|
+
}
|
|
256
|
+
if (typeof input.bin !== 'string' || input.bin.length === 0) {
|
|
257
|
+
throw new TypeError('bandit-router.pull: bin (string) required');
|
|
258
|
+
}
|
|
259
|
+
const tiers = input.tiers ?? DEFAULT_TIERS;
|
|
260
|
+
const strength = input.strength ?? PRIOR_STRENGTH;
|
|
261
|
+
const now = input.now ?? new Date();
|
|
262
|
+
|
|
263
|
+
const posterior = loadPosterior(input);
|
|
264
|
+
/** @type {Record<string, number>} */
|
|
265
|
+
const samples = {};
|
|
266
|
+
let bestTier = tiers[0];
|
|
267
|
+
let bestSample = -1;
|
|
268
|
+
for (const tier of tiers) {
|
|
269
|
+
const arm = ensureArm(posterior, input.agent, input.bin, tier, strength);
|
|
270
|
+
const decayed = decayArm(arm, { decay: input.decay, now, strength });
|
|
271
|
+
const s = sampleBeta(decayed.alpha, decayed.beta);
|
|
272
|
+
samples[tier] = s;
|
|
273
|
+
if (s > bestSample) {
|
|
274
|
+
bestSample = s;
|
|
275
|
+
bestTier = tier;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
// Bump counters on the chosen arm.
|
|
279
|
+
const chosen = ensureArm(posterior, input.agent, input.bin, bestTier, strength);
|
|
280
|
+
chosen.last_used = now.toISOString();
|
|
281
|
+
chosen.count += 1;
|
|
282
|
+
const written = savePosterior(posterior, input);
|
|
283
|
+
return { tier: bestTier, samples, posteriorPath: written };
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Update the posterior with a reward signal. Reward is applied as a
|
|
288
|
+
* Bernoulli observation: success → α += reward, β += (1 - reward).
|
|
289
|
+
*
|
|
290
|
+
* @param {{agent: string, bin: string, tier: string, reward: number, baseDir?: string, posteriorPath?: string, strength?: number}} input
|
|
291
|
+
* @returns {{alpha: number, beta: number, posteriorPath: string}}
|
|
292
|
+
*/
|
|
293
|
+
function update(input) {
|
|
294
|
+
if (!input) throw new TypeError('bandit-router.update: input required');
|
|
295
|
+
for (const k of ['agent', 'bin', 'tier']) {
|
|
296
|
+
if (typeof input[k] !== 'string' || input[k].length === 0) {
|
|
297
|
+
throw new TypeError(`bandit-router.update: ${k} (string) required`);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
if (typeof input.reward !== 'number' || Number.isNaN(input.reward)) {
|
|
301
|
+
throw new TypeError('bandit-router.update: reward (number) required');
|
|
302
|
+
}
|
|
303
|
+
// Reward must be in [0, 1].
|
|
304
|
+
const r = Math.min(1, Math.max(0, input.reward));
|
|
305
|
+
const posterior = loadPosterior(input);
|
|
306
|
+
const arm = ensureArm(posterior, input.agent, input.bin, input.tier, input.strength ?? PRIOR_STRENGTH);
|
|
307
|
+
arm.alpha += r;
|
|
308
|
+
arm.beta += 1 - r;
|
|
309
|
+
const p = savePosterior(posterior, input);
|
|
310
|
+
return { alpha: arm.alpha, beta: arm.beta, posteriorPath: p };
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Two-stage lexicographic reward (D-06).
|
|
315
|
+
*
|
|
316
|
+
* if !solidify_pass: 0
|
|
317
|
+
* elif user_undo_in_session: 0
|
|
318
|
+
* else: 1 - lambda * normalize(cost_usd + epsilon * wall_time_ms / 1000)
|
|
319
|
+
*
|
|
320
|
+
* Cost is normalised via the supplied `costNormalizer` (defaults to
|
|
321
|
+
* mapping [0, 5 USD] → [0, 1], capped at 1).
|
|
322
|
+
*
|
|
323
|
+
* @param {{
|
|
324
|
+
* solidify_pass: boolean,
|
|
325
|
+
* user_undo_in_session?: boolean,
|
|
326
|
+
* cost_usd?: number,
|
|
327
|
+
* wall_time_ms?: number,
|
|
328
|
+
* lambda?: number,
|
|
329
|
+
* epsilon?: number,
|
|
330
|
+
* costNormalizer?: (n: number) => number,
|
|
331
|
+
* }} input
|
|
332
|
+
* @returns {number} reward in [0, 1]
|
|
333
|
+
*/
|
|
334
|
+
function computeReward(input) {
|
|
335
|
+
if (!input || typeof input !== 'object') return 0;
|
|
336
|
+
if (!input.solidify_pass) return 0;
|
|
337
|
+
if (input.user_undo_in_session === true) return 0;
|
|
338
|
+
const lambda = typeof input.lambda === 'number' ? input.lambda : 0.3;
|
|
339
|
+
const epsilon = typeof input.epsilon === 'number' ? input.epsilon : 0.05;
|
|
340
|
+
const norm =
|
|
341
|
+
typeof input.costNormalizer === 'function'
|
|
342
|
+
? input.costNormalizer
|
|
343
|
+
: (n) => Math.min(1, Math.max(0, n / 5));
|
|
344
|
+
const wall = (typeof input.wall_time_ms === 'number' ? input.wall_time_ms : 0) / 1000;
|
|
345
|
+
const raw = (typeof input.cost_usd === 'number' ? input.cost_usd : 0) + epsilon * wall;
|
|
346
|
+
const reward = 1 - lambda * norm(raw);
|
|
347
|
+
return Math.min(1, Math.max(0, reward));
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
module.exports = {
|
|
351
|
+
pull,
|
|
352
|
+
update,
|
|
353
|
+
reset,
|
|
354
|
+
loadPosterior,
|
|
355
|
+
savePosterior,
|
|
356
|
+
computeReward,
|
|
357
|
+
binForGlobCount,
|
|
358
|
+
decayArm,
|
|
359
|
+
sampleBeta,
|
|
360
|
+
priorFor,
|
|
361
|
+
DEFAULT_PRIORS,
|
|
362
|
+
DEFAULT_TIERS,
|
|
363
|
+
TIER_PRIOR,
|
|
364
|
+
PRIOR_STRENGTH,
|
|
365
|
+
TOUCHES_BINS,
|
|
366
|
+
DEFAULT_POSTERIOR_PATH,
|
|
367
|
+
SCHEMA_VERSION,
|
|
368
|
+
};
|