@hegemonart/get-design-done 1.27.1 → 1.27.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +95 -0
- package/SKILL.md +1 -0
- package/agents/design-reflector.md +52 -0
- package/agents/perf-analyzer.md +166 -0
- package/hooks/budget-enforcer.ts +249 -5
- package/hooks/gdd-precompact-snapshot.js +334 -0
- package/hooks/gdd-sessionstart-recap.js +281 -0
- package/hooks/hooks.json +18 -0
- package/package.json +2 -2
- package/reference/bandit-integration.md +163 -0
- package/reference/perf-budget.md +142 -0
- package/reference/registry.json +14 -0
- package/reference/retrieval-contract.md +16 -0
- package/scripts/lib/bandit-arbitrage.cjs +423 -0
- package/scripts/lib/bandit-router/integration.cjs +309 -0
- package/scripts/lib/cache/gdd-cache-manager.cjs +292 -0
- package/scripts/lib/discuss-parallel-runner/index.ts +5 -1
- package/scripts/lib/explore-parallel-runner/index.ts +5 -1
- package/scripts/lib/parallelism-engine/concurrency-tuner.cjs +259 -0
- package/scripts/lib/parallelism-engine/concurrency-tuner.d.cts +53 -0
- package/scripts/lib/perf-analyzer/cost-regression.cjs +299 -0
- package/scripts/lib/perf-analyzer/index.cjs +139 -0
- package/scripts/lib/prompt-dedup/index.cjs +161 -0
- package/scripts/lib/session-runner/index.ts +206 -0
- package/skills/bandit-status/SKILL.md +129 -0
- package/skills/peers/SKILL.md +27 -8
|
@@ -25,6 +25,22 @@ A `/gdd:recall "term"` query that returns 5 Layer-1 hits ≈ 400 tokens. Opening
|
|
|
25
25
|
|
|
26
26
|
Layer 1 becomes `scripts/lib/design-search.cjs` — same protocol, same output shape, but backed by `.design/search.db` instead of grep. Agents do not need to change anything; the backend swap is transparent.
|
|
27
27
|
|
|
28
|
+
## Phase 27.6 — Shared-Context Dedup (D-11)
|
|
29
|
+
|
|
30
|
+
When >= 3 distinct agents in the same cycle read the same `reference/*.md` file, the Phase 14.5 retrieval-contract preamble is extended with a "shared context loaded once" marker — subsequent agents see a content-hash reference instead of the full file body. This reduces redundant token consumption per cycle.
|
|
31
|
+
|
|
32
|
+
The detection lives in `scripts/lib/prompt-dedup/index.cjs::detectDuplicateReferenceReads` and runs at retrieval-contract injection time. The threshold (3 agents) matches Phase 27.6 D-11 and is tunable via the `threshold` argument to `detectDuplicateReferenceReads`.
|
|
33
|
+
|
|
34
|
+
Operator opt-out: set `GDD_DEDUP_OPT_OUT=1` in the spawning agent's environment to bypass dedup for that read.
|
|
35
|
+
|
|
36
|
+
Event emission: each dedup decision emits a `dedup.injection` event via `appendEvent` so the Phase 27.6-01 perf-analyzer can surface "the same file is read N agents times per cycle" as a `[CONTEXT-WASTE]` proposal.
|
|
37
|
+
|
|
38
|
+
Cross-references:
|
|
39
|
+
|
|
40
|
+
- `scripts/lib/prompt-dedup/index.cjs` — analyzer + injection text builder.
|
|
41
|
+
- `tests/prompt-dedup.test.cjs` — detection rule tests.
|
|
42
|
+
- `agents/perf-analyzer.md` — consumes `dedup.injection` events for cross-cycle analysis.
|
|
43
|
+
|
|
28
44
|
---
|
|
29
45
|
|
|
30
46
|
*Imported by every skill that reads `.design/` artifacts: `/gdd:progress`, `/gdd:resume`, `/gdd:reflect`, `/gdd:pause`, `/gdd:recall` (Phase 19.5+), `/gdd:timeline` (Phase 19.5+). Tier: preamble. Phase: 14.5.*
|
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
// scripts/lib/bandit-arbitrage.cjs
|
|
2
|
+
//
|
|
3
|
+
// Plan 27.5-04 — design-reflector bandit-arbitrage analysis (D-10).
|
|
4
|
+
//
|
|
5
|
+
// Pure function: given a bandit posterior (as produced by
|
|
6
|
+
// `bandit-router.cjs`'s `loadPosterior()`) plus a map of each agent's
|
|
7
|
+
// declared frontmatter `default-tier:`, surface structured proposals
|
|
8
|
+
// when the bandit's measured best-arm tier for an `(agent, bin)` slice
|
|
9
|
+
// has drifted from the frontmatter default. This is the "stale
|
|
10
|
+
// frontmatter" signal described in Phase 27.5 CONTEXT D-10 — it mirrors
|
|
11
|
+
// the cross-runtime `cost-arbitrage.cjs` module from Phase 26-06 so
|
|
12
|
+
// `/gdd:apply-reflections` can iterate both arbitrage sources
|
|
13
|
+
// homogeneously.
|
|
14
|
+
//
|
|
15
|
+
// Contract:
|
|
16
|
+
// analyze(posterior, options) → proposals[]
|
|
17
|
+
//
|
|
18
|
+
// Inputs:
|
|
19
|
+
// * `posterior` — JSON object as returned by
|
|
20
|
+
// `bandit-router.loadPosterior()`:
|
|
21
|
+
// {
|
|
22
|
+
// schema_version: '1.0.0',
|
|
23
|
+
// generated_at: ISO-8601,
|
|
24
|
+
// arms: [
|
|
25
|
+
// { agent, bin, tier, delegate?, alpha, beta, last_used, count },
|
|
26
|
+
// ...
|
|
27
|
+
// ]
|
|
28
|
+
// }
|
|
29
|
+
// Malformed input (non-object, missing `arms`, non-array `arms`) is
|
|
30
|
+
// treated as "no signal" and returns `[]` rather than throwing.
|
|
31
|
+
//
|
|
32
|
+
// * `options.frontmatters` — REQUIRED map `{ agent: defaultTier }`.
|
|
33
|
+
// Without this, no stale-frontmatter signal can be computed (we
|
|
34
|
+
// would not know what the current declared default is); analyze()
|
|
35
|
+
// stays silent and returns `[]`. The caller (reflector agent)
|
|
36
|
+
// builds this map by parsing each `agents/*.md`'s frontmatter
|
|
37
|
+
// `default-tier:` value.
|
|
38
|
+
//
|
|
39
|
+
// * `options.pullCountThreshold` — minimum total pull count across
|
|
40
|
+
// the slice's tier arms required before any proposal can fire.
|
|
41
|
+
// Default 3 (D-10's "3+ cycles" proxy — early in life the
|
|
42
|
+
// posterior is too thin to disagree with frontmatter).
|
|
43
|
+
//
|
|
44
|
+
// * `options.stddevThreshold` — maximum stddev(Beta(α,β)) the best
|
|
45
|
+
// tier may have while still being considered "credible interval
|
|
46
|
+
// narrow enough". Default 0.05 — matches CONTEXT.md research-tail
|
|
47
|
+
// guidance that credible intervals should narrow to ≤0.05 on
|
|
48
|
+
// heavily-used slices within ~50 cycles.
|
|
49
|
+
//
|
|
50
|
+
// * `options.deltaPct` — relative delta the best mean must exceed
|
|
51
|
+
// the second-best mean by, before the signal fires. Default 0.5
|
|
52
|
+
// (50%) — matches D-09/D-10's 50% heuristic. Smaller deltas are
|
|
53
|
+
// noise / measurement variance, not actionable drift.
|
|
54
|
+
//
|
|
55
|
+
// * `options.delegateFilter` — which delegate slice of the posterior
|
|
56
|
+
// to consider. Default `'none'` (matches both the Phase 23.5 legacy
|
|
57
|
+
// slice where `delegate === undefined` AND Plan 27-07's explicit
|
|
58
|
+
// `delegate === 'none'` slice — both represent the local-call
|
|
59
|
+
// routing slice). Pass `null` to disable filtering entirely.
|
|
60
|
+
// Future: pass a specific peer (`'codex'`, `'gemini'`, …) once
|
|
61
|
+
// peer-side posterior coverage is dense enough to credibly disagree
|
|
62
|
+
// with frontmatter.
|
|
63
|
+
//
|
|
64
|
+
// Output:
|
|
65
|
+
// Array of structured proposals, each shaped like:
|
|
66
|
+
// {
|
|
67
|
+
// type: 'bandit_arbitrage',
|
|
68
|
+
// agent: 'design-verifier',
|
|
69
|
+
// bin: 'medium',
|
|
70
|
+
// current_frontmatter_tier: 'sonnet',
|
|
71
|
+
// posterior_best_tier: 'opus',
|
|
72
|
+
// posterior_mean: { haiku: 0.50, sonnet: 0.62, opus: 0.95 },
|
|
73
|
+
// posterior_stddev: { haiku: 0.04, sonnet: 0.03, opus: 0.02 },
|
|
74
|
+
// pull_count: 18,
|
|
75
|
+
// proposal: '<human-readable narrative>',
|
|
76
|
+
// evidence: 'posterior_cred_int_narrow'
|
|
77
|
+
// }
|
|
78
|
+
// Proposals are sorted deterministically by (agent, bin) ascending,
|
|
79
|
+
// matching cost-arbitrage.cjs's discipline — output ordering is
|
|
80
|
+
// stable across runs and platforms for snapshot tests and
|
|
81
|
+
// reproducible reflection files.
|
|
82
|
+
//
|
|
83
|
+
// Design notes:
|
|
84
|
+
// - The 50% delta + 3+ pulls + stddev<0.05 thresholds are starting
|
|
85
|
+
// heuristics, NOT learned values. Bandit-style learning over which
|
|
86
|
+
// arbitrage proposals were ACTED ON (was the frontmatter updated?
|
|
87
|
+
// did the posterior subsequently match?) is future work; this
|
|
88
|
+
// module's job is to surface measurement signals deterministically.
|
|
89
|
+
// - Single-tier-only slices are silent — no comparison is possible
|
|
90
|
+
// when only one tier has been pulled.
|
|
91
|
+
// - The default `delegateFilter='none'` focuses on the local-call
|
|
92
|
+
// slice. Arbitrage on peer-delegate slices is out of scope for
|
|
93
|
+
// v1.27.5 (CONTEXT D-10 explicitly notes peer-side coverage is
|
|
94
|
+
// still too sparse).
|
|
95
|
+
// - Pure: no I/O, no global state, no `require('fs')` /
|
|
96
|
+
// `require('path')`. Tests inject synthetic posterior objects;
|
|
97
|
+
// production callers (the reflector agent) load the on-disk
|
|
98
|
+
// posterior via `bandit-router.loadPosterior()` and pass the
|
|
99
|
+
// returned object in.
|
|
100
|
+
|
|
101
|
+
'use strict';
|
|
102
|
+
|
|
103
|
+
const DEFAULT_PULL_COUNT_THRESHOLD = 3;
|
|
104
|
+
const DEFAULT_STDDEV_THRESHOLD = 0.05;
|
|
105
|
+
const DEFAULT_DELTA_PCT = 0.5;
|
|
106
|
+
const DEFAULT_DELEGATE_FILTER = 'none';
|
|
107
|
+
const TIERS = Object.freeze(['haiku', 'sonnet', 'opus']);
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Posterior mean of Beta(α, β) is α / (α + β). When α + β === 0 (a
|
|
111
|
+
* pathological / impossible arm), return 0 rather than NaN so callers
|
|
112
|
+
* can compare numerically.
|
|
113
|
+
*
|
|
114
|
+
* @param {number} alpha
|
|
115
|
+
* @param {number} beta
|
|
116
|
+
* @returns {number}
|
|
117
|
+
*/
|
|
118
|
+
function mean(alpha, beta) {
|
|
119
|
+
const sum = alpha + beta;
|
|
120
|
+
if (sum === 0) return 0;
|
|
121
|
+
return alpha / sum;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Posterior stddev of Beta(α, β) is
|
|
126
|
+
* sqrt( αβ / ((α+β)² · (α+β+1)) ).
|
|
127
|
+
*
|
|
128
|
+
* Used as the credible-interval-width proxy (CONTEXT D-10 / research
|
|
129
|
+
* tail). When α + β === 0, return 0 rather than NaN.
|
|
130
|
+
*
|
|
131
|
+
* @param {number} alpha
|
|
132
|
+
* @param {number} beta
|
|
133
|
+
* @returns {number}
|
|
134
|
+
*/
|
|
135
|
+
function stddev(alpha, beta) {
|
|
136
|
+
const sum = alpha + beta;
|
|
137
|
+
if (sum === 0) return 0;
|
|
138
|
+
const variance = (alpha * beta) / (sum * sum * (sum + 1));
|
|
139
|
+
return Math.sqrt(variance);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Filter an arm list down to a single `(agent, bin, delegate-slice)`
|
|
144
|
+
* slice.
|
|
145
|
+
*
|
|
146
|
+
* delegateFilter semantics:
|
|
147
|
+
* - `null` → no delegate filtering; all arms for
|
|
148
|
+
* (agent, bin) are returned.
|
|
149
|
+
* - `'none'` (the default) → match arms where `delegate === 'none'`
|
|
150
|
+
* OR `delegate === undefined`. The
|
|
151
|
+
* latter covers the Phase 23.5 legacy
|
|
152
|
+
* slice where the `delegate` field had
|
|
153
|
+
* not yet been added — both represent
|
|
154
|
+
* the local-call routing slice.
|
|
155
|
+
* - any other string → match arms where `delegate ===
|
|
156
|
+
* delegateFilter` exactly.
|
|
157
|
+
*
|
|
158
|
+
* @param {object[]} arms
|
|
159
|
+
* @param {string} agent
|
|
160
|
+
* @param {string} bin
|
|
161
|
+
* @param {string|null} delegateFilter
|
|
162
|
+
* @returns {object[]}
|
|
163
|
+
*/
|
|
164
|
+
function findArmsForSlice(arms, agent, bin, delegateFilter) {
|
|
165
|
+
const filtered = arms.filter((a) => a && a.agent === agent && a.bin === bin);
|
|
166
|
+
if (delegateFilter === null) return filtered;
|
|
167
|
+
if (delegateFilter === 'none') {
|
|
168
|
+
return filtered.filter(
|
|
169
|
+
(a) => a.delegate === undefined || a.delegate === 'none',
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
return filtered.filter((a) => a.delegate === delegateFilter);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Build the proposal sentence. Fixed phrasing keeps test assertions
|
|
177
|
+
* stable across cycles.
|
|
178
|
+
*
|
|
179
|
+
* @param {string} agent
|
|
180
|
+
* @param {string} bin
|
|
181
|
+
* @param {string} currentTier
|
|
182
|
+
* @param {string} bestTier
|
|
183
|
+
* @param {number} meanBest
|
|
184
|
+
* @param {number} meanCurrent
|
|
185
|
+
* @param {number} pullCount
|
|
186
|
+
* @param {number} stddevBest
|
|
187
|
+
* @returns {string}
|
|
188
|
+
*/
|
|
189
|
+
function buildProposalText(
|
|
190
|
+
agent,
|
|
191
|
+
bin,
|
|
192
|
+
currentTier,
|
|
193
|
+
bestTier,
|
|
194
|
+
meanBest,
|
|
195
|
+
meanCurrent,
|
|
196
|
+
pullCount,
|
|
197
|
+
stddevBest,
|
|
198
|
+
) {
|
|
199
|
+
return (
|
|
200
|
+
`${agent} (${bin} bin) frontmatter says ${currentTier} but bandit picks ${bestTier} ` +
|
|
201
|
+
`(posterior mean ${meanBest.toFixed(3)} vs ${meanCurrent.toFixed(3)}, ` +
|
|
202
|
+
`${pullCount} pulls, stddev ${stddevBest.toFixed(3)}) — ` +
|
|
203
|
+
`update frontmatter or add tier_override: ${currentTier} if intentional`
|
|
204
|
+
);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Resolve options with defaults. Centralised so the analyze() body
|
|
209
|
+
* can stay readable.
|
|
210
|
+
*/
|
|
211
|
+
function resolveOptions(options) {
|
|
212
|
+
const opts = options && typeof options === 'object' ? options : {};
|
|
213
|
+
const pullCountThreshold =
|
|
214
|
+
typeof opts.pullCountThreshold === 'number' && opts.pullCountThreshold > 0
|
|
215
|
+
? Math.floor(opts.pullCountThreshold)
|
|
216
|
+
: DEFAULT_PULL_COUNT_THRESHOLD;
|
|
217
|
+
const stddevThreshold =
|
|
218
|
+
typeof opts.stddevThreshold === 'number' && opts.stddevThreshold > 0
|
|
219
|
+
? opts.stddevThreshold
|
|
220
|
+
: DEFAULT_STDDEV_THRESHOLD;
|
|
221
|
+
const deltaPct =
|
|
222
|
+
typeof opts.deltaPct === 'number' && opts.deltaPct > 0
|
|
223
|
+
? opts.deltaPct
|
|
224
|
+
: DEFAULT_DELTA_PCT;
|
|
225
|
+
// delegateFilter has three valid shapes: undefined (use default),
|
|
226
|
+
// null (no filtering), or a string (specific delegate slice).
|
|
227
|
+
let delegateFilter;
|
|
228
|
+
if (opts.delegateFilter === null) {
|
|
229
|
+
delegateFilter = null;
|
|
230
|
+
} else if (typeof opts.delegateFilter === 'string' && opts.delegateFilter.length > 0) {
|
|
231
|
+
delegateFilter = opts.delegateFilter;
|
|
232
|
+
} else {
|
|
233
|
+
delegateFilter = DEFAULT_DELEGATE_FILTER;
|
|
234
|
+
}
|
|
235
|
+
const frontmatters =
|
|
236
|
+
opts.frontmatters && typeof opts.frontmatters === 'object'
|
|
237
|
+
? opts.frontmatters
|
|
238
|
+
: null;
|
|
239
|
+
return {
|
|
240
|
+
pullCountThreshold,
|
|
241
|
+
stddevThreshold,
|
|
242
|
+
deltaPct,
|
|
243
|
+
delegateFilter,
|
|
244
|
+
frontmatters,
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Group arms by `(agent, bin)` key. Returns a Map keyed by
|
|
250
|
+
* `<agent>::<bin>` whose values are arrays of arms in that slice
|
|
251
|
+
* (across all tiers / delegates — `findArmsForSlice` applies the
|
|
252
|
+
* delegate filter downstream).
|
|
253
|
+
*/
|
|
254
|
+
function groupByAgentBin(arms) {
|
|
255
|
+
/** @type {Map<string, {agent: string, bin: string, arms: object[]}>} */
|
|
256
|
+
const groups = new Map();
|
|
257
|
+
for (const a of arms) {
|
|
258
|
+
if (!a || typeof a !== 'object') continue;
|
|
259
|
+
if (typeof a.agent !== 'string' || a.agent.length === 0) continue;
|
|
260
|
+
if (typeof a.bin !== 'string' || a.bin.length === 0) continue;
|
|
261
|
+
const key = a.agent + '::' + a.bin;
|
|
262
|
+
let group = groups.get(key);
|
|
263
|
+
if (group === undefined) {
|
|
264
|
+
group = { agent: a.agent, bin: a.bin, arms: [] };
|
|
265
|
+
groups.set(key, group);
|
|
266
|
+
}
|
|
267
|
+
group.arms.push(a);
|
|
268
|
+
}
|
|
269
|
+
return groups;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Main entry point. See module-level header for contract.
|
|
274
|
+
*
|
|
275
|
+
* @param {{schema_version?: string, generated_at?: string, arms?: object[]}} posterior
|
|
276
|
+
* @param {{
|
|
277
|
+
* frontmatters: Record<string, string>,
|
|
278
|
+
* pullCountThreshold?: number,
|
|
279
|
+
* stddevThreshold?: number,
|
|
280
|
+
* deltaPct?: number,
|
|
281
|
+
* delegateFilter?: string|null,
|
|
282
|
+
* }} options
|
|
283
|
+
* @returns {object[]}
|
|
284
|
+
*/
|
|
285
|
+
function analyze(posterior, options) {
|
|
286
|
+
if (!posterior || typeof posterior !== 'object') return [];
|
|
287
|
+
if (!Array.isArray(posterior.arms) || posterior.arms.length === 0) return [];
|
|
288
|
+
|
|
289
|
+
const {
|
|
290
|
+
pullCountThreshold,
|
|
291
|
+
stddevThreshold,
|
|
292
|
+
deltaPct,
|
|
293
|
+
delegateFilter,
|
|
294
|
+
frontmatters,
|
|
295
|
+
} = resolveOptions(options);
|
|
296
|
+
|
|
297
|
+
// No frontmatters → no stale-frontmatter signal can be computed.
|
|
298
|
+
// Silent rather than emit garbage proposals tagged "unknown current".
|
|
299
|
+
if (frontmatters === null) return [];
|
|
300
|
+
|
|
301
|
+
const groups = groupByAgentBin(posterior.arms);
|
|
302
|
+
|
|
303
|
+
// Iterate (agent, bin) deterministically (sorted) so output ordering
|
|
304
|
+
// is stable across runs and platforms — matches cost-arbitrage.cjs
|
|
305
|
+
// discipline; useful for snapshot tests and reproducible reflection
|
|
306
|
+
// files.
|
|
307
|
+
const sortedKeys = Array.from(groups.keys()).sort();
|
|
308
|
+
|
|
309
|
+
const proposals = [];
|
|
310
|
+
for (const key of sortedKeys) {
|
|
311
|
+
const group = groups.get(key);
|
|
312
|
+
if (group === undefined) continue;
|
|
313
|
+
const { agent, bin } = group;
|
|
314
|
+
const sliceArms = findArmsForSlice(group.arms, agent, bin, delegateFilter);
|
|
315
|
+
|
|
316
|
+
// Compute per-tier mean / stddev / count, restricted to the tiers
|
|
317
|
+
// actually present in the slice. The standard tier set is
|
|
318
|
+
// {haiku, sonnet, opus} but we accept any tier names the posterior
|
|
319
|
+
// happens to contain.
|
|
320
|
+
/** @type {Record<string, number>} */
|
|
321
|
+
const meansPerTier = {};
|
|
322
|
+
/** @type {Record<string, number>} */
|
|
323
|
+
const stddevsPerTier = {};
|
|
324
|
+
/** @type {Record<string, number>} */
|
|
325
|
+
const countsPerTier = {};
|
|
326
|
+
let totalPulls = 0;
|
|
327
|
+
for (const arm of sliceArms) {
|
|
328
|
+
if (typeof arm.tier !== 'string' || arm.tier.length === 0) continue;
|
|
329
|
+
const a = typeof arm.alpha === 'number' && Number.isFinite(arm.alpha) ? arm.alpha : 0;
|
|
330
|
+
const b = typeof arm.beta === 'number' && Number.isFinite(arm.beta) ? arm.beta : 0;
|
|
331
|
+
const c = typeof arm.count === 'number' && Number.isFinite(arm.count) ? arm.count : 0;
|
|
332
|
+
meansPerTier[arm.tier] = mean(a, b);
|
|
333
|
+
stddevsPerTier[arm.tier] = stddev(a, b);
|
|
334
|
+
countsPerTier[arm.tier] = c;
|
|
335
|
+
totalPulls += c;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// Skip if fewer than 2 tiers represented — no comparison possible.
|
|
339
|
+
const tiersPresent = Object.keys(meansPerTier);
|
|
340
|
+
if (tiersPresent.length < 2) continue;
|
|
341
|
+
|
|
342
|
+
// Skip if total pulls below threshold (posterior too thin to
|
|
343
|
+
// credibly disagree with frontmatter).
|
|
344
|
+
if (totalPulls < pullCountThreshold) continue;
|
|
345
|
+
|
|
346
|
+
// Identify best and second-best tier by posterior mean.
|
|
347
|
+
const sortedByMean = tiersPresent
|
|
348
|
+
.slice()
|
|
349
|
+
.sort((x, y) => meansPerTier[y] - meansPerTier[x]);
|
|
350
|
+
const bestTier = sortedByMean[0];
|
|
351
|
+
const secondTier = sortedByMean[1];
|
|
352
|
+
const bestMean = meansPerTier[bestTier];
|
|
353
|
+
const secondMean = meansPerTier[secondTier];
|
|
354
|
+
|
|
355
|
+
// Skip if zero-mean second-best (avoid division-by-zero and
|
|
356
|
+
// misleading Infinity% deltas).
|
|
357
|
+
if (secondMean <= 0) continue;
|
|
358
|
+
|
|
359
|
+
const delta = (bestMean - secondMean) / secondMean;
|
|
360
|
+
if (delta < deltaPct) continue;
|
|
361
|
+
|
|
362
|
+
// Skip if best-tier credible interval too wide.
|
|
363
|
+
if (stddevsPerTier[bestTier] >= stddevThreshold) continue;
|
|
364
|
+
|
|
365
|
+
// Look up frontmatter; silent if missing or already matches best.
|
|
366
|
+
const currentTier = frontmatters[agent];
|
|
367
|
+
if (typeof currentTier !== 'string' || currentTier.length === 0) continue;
|
|
368
|
+
if (currentTier === bestTier) continue;
|
|
369
|
+
|
|
370
|
+
// Render posterior means/stddevs across the canonical TIERS set
|
|
371
|
+
// (filling in undefined tiers with 0 for stable proposal shape).
|
|
372
|
+
/** @type {Record<string, number>} */
|
|
373
|
+
const posteriorMean = {};
|
|
374
|
+
/** @type {Record<string, number>} */
|
|
375
|
+
const posteriorStddev = {};
|
|
376
|
+
for (const t of TIERS) {
|
|
377
|
+
posteriorMean[t] = meansPerTier[t] === undefined ? 0 : meansPerTier[t];
|
|
378
|
+
posteriorStddev[t] = stddevsPerTier[t] === undefined ? 0 : stddevsPerTier[t];
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// Use the current frontmatter tier's mean (if present in slice)
|
|
382
|
+
// when building the proposal text; fall back to second-best mean
|
|
383
|
+
// when the frontmatter tier was not pulled at all in the slice.
|
|
384
|
+
const meanCurrent =
|
|
385
|
+
meansPerTier[currentTier] !== undefined ? meansPerTier[currentTier] : secondMean;
|
|
386
|
+
|
|
387
|
+
proposals.push({
|
|
388
|
+
type: 'bandit_arbitrage',
|
|
389
|
+
agent,
|
|
390
|
+
bin,
|
|
391
|
+
current_frontmatter_tier: currentTier,
|
|
392
|
+
posterior_best_tier: bestTier,
|
|
393
|
+
posterior_mean: posteriorMean,
|
|
394
|
+
posterior_stddev: posteriorStddev,
|
|
395
|
+
pull_count: totalPulls,
|
|
396
|
+
proposal: buildProposalText(
|
|
397
|
+
agent,
|
|
398
|
+
bin,
|
|
399
|
+
currentTier,
|
|
400
|
+
bestTier,
|
|
401
|
+
bestMean,
|
|
402
|
+
meanCurrent,
|
|
403
|
+
totalPulls,
|
|
404
|
+
stddevsPerTier[bestTier],
|
|
405
|
+
),
|
|
406
|
+
evidence: 'posterior_cred_int_narrow',
|
|
407
|
+
});
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
return proposals;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
module.exports = {
|
|
414
|
+
analyze,
|
|
415
|
+
mean,
|
|
416
|
+
stddev,
|
|
417
|
+
findArmsForSlice,
|
|
418
|
+
DEFAULT_PULL_COUNT_THRESHOLD,
|
|
419
|
+
DEFAULT_STDDEV_THRESHOLD,
|
|
420
|
+
DEFAULT_DELTA_PCT,
|
|
421
|
+
DEFAULT_DELEGATE_FILTER,
|
|
422
|
+
TIERS,
|
|
423
|
+
};
|