@hegemonart/get-design-done 1.27.1 → 1.27.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,22 @@ A `/gdd:recall "term"` query that returns 5 Layer-1 hits ≈ 400 tokens. Opening
25
25
 
26
26
  Layer 1 becomes `scripts/lib/design-search.cjs` — same protocol, same output shape, but backed by `.design/search.db` instead of grep. Agents do not need to change anything; the backend swap is transparent.
27
27
 
28
+ ## Phase 27.6 — Shared-Context Dedup (D-11)
29
+
30
+ When >= 3 distinct agents in the same cycle read the same `reference/*.md` file, the Phase 14.5 retrieval-contract preamble is extended with a "shared context loaded once" marker — subsequent agents see a content-hash reference instead of the full file body. This reduces redundant token consumption per cycle.
31
+
32
+ The detection lives in `scripts/lib/prompt-dedup/index.cjs::detectDuplicateReferenceReads` and runs at retrieval-contract injection time. The threshold (3 agents) matches Phase 27.6 D-11 and is tunable via the `threshold` argument to `detectDuplicateReferenceReads`.
33
+
34
+ Operator opt-out: set `GDD_DEDUP_OPT_OUT=1` in the spawning agent's environment to bypass dedup for that read.
35
+
36
+ Event emission: each dedup decision emits a `dedup.injection` event via `appendEvent` so the Phase 27.6-01 perf-analyzer can surface "the same file is read N agents times per cycle" as a `[CONTEXT-WASTE]` proposal.
37
+
38
+ Cross-references:
39
+
40
+ - `scripts/lib/prompt-dedup/index.cjs` — analyzer + injection text builder.
41
+ - `tests/prompt-dedup.test.cjs` — detection rule tests.
42
+ - `agents/perf-analyzer.md` — consumes `dedup.injection` events for cross-cycle analysis.
43
+
28
44
  ---
29
45
 
30
46
  *Imported by every skill that reads `.design/` artifacts: `/gdd:progress`, `/gdd:resume`, `/gdd:reflect`, `/gdd:pause`, `/gdd:recall` (Phase 19.5+), `/gdd:timeline` (Phase 19.5+). Tier: preamble. Phase: 14.5.*
@@ -0,0 +1,423 @@
1
+ // scripts/lib/bandit-arbitrage.cjs
2
+ //
3
+ // Plan 27.5-04 — design-reflector bandit-arbitrage analysis (D-10).
4
+ //
5
+ // Pure function: given a bandit posterior (as produced by
6
+ // `bandit-router.cjs`'s `loadPosterior()`) plus a map of each agent's
7
+ // declared frontmatter `default-tier:`, surface structured proposals
8
+ // when the bandit's measured best-arm tier for an `(agent, bin)` slice
9
+ // has drifted from the frontmatter default. This is the "stale
10
+ // frontmatter" signal described in Phase 27.5 CONTEXT D-10 — it mirrors
11
+ // the cross-runtime `cost-arbitrage.cjs` module from Phase 26-06 so
12
+ // `/gdd:apply-reflections` can iterate both arbitrage sources
13
+ // homogeneously.
14
+ //
15
+ // Contract:
16
+ // analyze(posterior, options) → proposals[]
17
+ //
18
+ // Inputs:
19
+ // * `posterior` — JSON object as returned by
20
+ // `bandit-router.loadPosterior()`:
21
+ // {
22
+ // schema_version: '1.0.0',
23
+ // generated_at: ISO-8601,
24
+ // arms: [
25
+ // { agent, bin, tier, delegate?, alpha, beta, last_used, count },
26
+ // ...
27
+ // ]
28
+ // }
29
+ // Malformed input (non-object, missing `arms`, non-array `arms`) is
30
+ // treated as "no signal" and returns `[]` rather than throwing.
31
+ //
32
+ // * `options.frontmatters` — REQUIRED map `{ agent: defaultTier }`.
33
+ // Without this, no stale-frontmatter signal can be computed (we
34
+ // would not know what the current declared default is); analyze()
35
+ // stays silent and returns `[]`. The caller (reflector agent)
36
+ // builds this map by parsing each `agents/*.md`'s frontmatter
37
+ // `default-tier:` value.
38
+ //
39
+ // * `options.pullCountThreshold` — minimum total pull count across
40
+ // the slice's tier arms required before any proposal can fire.
41
+ // Default 3 (D-10's "3+ cycles" proxy — early in life the
42
+ // posterior is too thin to disagree with frontmatter).
43
+ //
44
+ // * `options.stddevThreshold` — maximum stddev(Beta(α,β)) the best
45
+ // tier may have while still being considered "credible interval
46
+ // narrow enough". Default 0.05 — matches CONTEXT.md research-tail
47
+ // guidance that credible intervals should narrow to ≤0.05 on
48
+ // heavily-used slices within ~50 cycles.
49
+ //
50
+ // * `options.deltaPct` — relative delta the best mean must exceed
51
+ // the second-best mean by, before the signal fires. Default 0.5
52
+ // (50%) — matches D-09/D-10's 50% heuristic. Smaller deltas are
53
+ // noise / measurement variance, not actionable drift.
54
+ //
55
+ // * `options.delegateFilter` — which delegate slice of the posterior
56
+ // to consider. Default `'none'` (matches both the Phase 23.5 legacy
57
+ // slice where `delegate === undefined` AND Plan 27-07's explicit
58
+ // `delegate === 'none'` slice — both represent the local-call
59
+ // routing slice). Pass `null` to disable filtering entirely.
60
+ // Future: pass a specific peer (`'codex'`, `'gemini'`, …) once
61
+ // peer-side posterior coverage is dense enough to credibly disagree
62
+ // with frontmatter.
63
+ //
64
+ // Output:
65
+ // Array of structured proposals, each shaped like:
66
+ // {
67
+ // type: 'bandit_arbitrage',
68
+ // agent: 'design-verifier',
69
+ // bin: 'medium',
70
+ // current_frontmatter_tier: 'sonnet',
71
+ // posterior_best_tier: 'opus',
72
+ // posterior_mean: { haiku: 0.50, sonnet: 0.62, opus: 0.95 },
73
+ // posterior_stddev: { haiku: 0.04, sonnet: 0.03, opus: 0.02 },
74
+ // pull_count: 18,
75
+ // proposal: '<human-readable narrative>',
76
+ // evidence: 'posterior_cred_int_narrow'
77
+ // }
78
+ // Proposals are sorted deterministically by (agent, bin) ascending,
79
+ // matching cost-arbitrage.cjs's discipline — output ordering is
80
+ // stable across runs and platforms for snapshot tests and
81
+ // reproducible reflection files.
82
+ //
83
+ // Design notes:
84
+ // - The 50% delta + 3+ pulls + stddev<0.05 thresholds are starting
85
+ // heuristics, NOT learned values. Bandit-style learning over which
86
+ // arbitrage proposals were ACTED ON (was the frontmatter updated?
87
+ // did the posterior subsequently match?) is future work; this
88
+ // module's job is to surface measurement signals deterministically.
89
+ // - Single-tier-only slices are silent — no comparison is possible
90
+ // when only one tier has been pulled.
91
+ // - The default `delegateFilter='none'` focuses on the local-call
92
+ // slice. Arbitrage on peer-delegate slices is out of scope for
93
+ // v1.27.5 (CONTEXT D-10 explicitly notes peer-side coverage is
94
+ // still too sparse).
95
+ // - Pure: no I/O, no global state, no `require('fs')` /
96
+ // `require('path')`. Tests inject synthetic posterior objects;
97
+ // production callers (the reflector agent) load the on-disk
98
+ // posterior via `bandit-router.loadPosterior()` and pass the
99
+ // returned object in.
100
+
101
+ 'use strict';
102
+
103
+ const DEFAULT_PULL_COUNT_THRESHOLD = 3;
104
+ const DEFAULT_STDDEV_THRESHOLD = 0.05;
105
+ const DEFAULT_DELTA_PCT = 0.5;
106
+ const DEFAULT_DELEGATE_FILTER = 'none';
107
+ const TIERS = Object.freeze(['haiku', 'sonnet', 'opus']);
108
+
109
+ /**
110
+ * Posterior mean of Beta(α, β) is α / (α + β). When α + β === 0 (a
111
+ * pathological / impossible arm), return 0 rather than NaN so callers
112
+ * can compare numerically.
113
+ *
114
+ * @param {number} alpha
115
+ * @param {number} beta
116
+ * @returns {number}
117
+ */
118
+ function mean(alpha, beta) {
119
+ const sum = alpha + beta;
120
+ if (sum === 0) return 0;
121
+ return alpha / sum;
122
+ }
123
+
124
+ /**
125
+ * Posterior stddev of Beta(α, β) is
126
+ * sqrt( αβ / ((α+β)² · (α+β+1)) ).
127
+ *
128
+ * Used as the credible-interval-width proxy (CONTEXT D-10 / research
129
+ * tail). When α + β === 0, return 0 rather than NaN.
130
+ *
131
+ * @param {number} alpha
132
+ * @param {number} beta
133
+ * @returns {number}
134
+ */
135
+ function stddev(alpha, beta) {
136
+ const sum = alpha + beta;
137
+ if (sum === 0) return 0;
138
+ const variance = (alpha * beta) / (sum * sum * (sum + 1));
139
+ return Math.sqrt(variance);
140
+ }
141
+
142
+ /**
143
+ * Filter an arm list down to a single `(agent, bin, delegate-slice)`
144
+ * slice.
145
+ *
146
+ * delegateFilter semantics:
147
+ * - `null` → no delegate filtering; all arms for
148
+ * (agent, bin) are returned.
149
+ * - `'none'` (the default) → match arms where `delegate === 'none'`
150
+ * OR `delegate === undefined`. The
151
+ * latter covers the Phase 23.5 legacy
152
+ * slice where the `delegate` field had
153
+ * not yet been added — both represent
154
+ * the local-call routing slice.
155
+ * - any other string → match arms where `delegate ===
156
+ * delegateFilter` exactly.
157
+ *
158
+ * @param {object[]} arms
159
+ * @param {string} agent
160
+ * @param {string} bin
161
+ * @param {string|null} delegateFilter
162
+ * @returns {object[]}
163
+ */
164
+ function findArmsForSlice(arms, agent, bin, delegateFilter) {
165
+ const filtered = arms.filter((a) => a && a.agent === agent && a.bin === bin);
166
+ if (delegateFilter === null) return filtered;
167
+ if (delegateFilter === 'none') {
168
+ return filtered.filter(
169
+ (a) => a.delegate === undefined || a.delegate === 'none',
170
+ );
171
+ }
172
+ return filtered.filter((a) => a.delegate === delegateFilter);
173
+ }
174
+
175
+ /**
176
+ * Build the proposal sentence. Fixed phrasing keeps test assertions
177
+ * stable across cycles.
178
+ *
179
+ * @param {string} agent
180
+ * @param {string} bin
181
+ * @param {string} currentTier
182
+ * @param {string} bestTier
183
+ * @param {number} meanBest
184
+ * @param {number} meanCurrent
185
+ * @param {number} pullCount
186
+ * @param {number} stddevBest
187
+ * @returns {string}
188
+ */
189
+ function buildProposalText(
190
+ agent,
191
+ bin,
192
+ currentTier,
193
+ bestTier,
194
+ meanBest,
195
+ meanCurrent,
196
+ pullCount,
197
+ stddevBest,
198
+ ) {
199
+ return (
200
+ `${agent} (${bin} bin) frontmatter says ${currentTier} but bandit picks ${bestTier} ` +
201
+ `(posterior mean ${meanBest.toFixed(3)} vs ${meanCurrent.toFixed(3)}, ` +
202
+ `${pullCount} pulls, stddev ${stddevBest.toFixed(3)}) — ` +
203
+ `update frontmatter or add tier_override: ${currentTier} if intentional`
204
+ );
205
+ }
206
+
207
+ /**
208
+ * Resolve options with defaults. Centralised so the analyze() body
209
+ * can stay readable.
210
+ */
211
+ function resolveOptions(options) {
212
+ const opts = options && typeof options === 'object' ? options : {};
213
+ const pullCountThreshold =
214
+ typeof opts.pullCountThreshold === 'number' && opts.pullCountThreshold > 0
215
+ ? Math.floor(opts.pullCountThreshold)
216
+ : DEFAULT_PULL_COUNT_THRESHOLD;
217
+ const stddevThreshold =
218
+ typeof opts.stddevThreshold === 'number' && opts.stddevThreshold > 0
219
+ ? opts.stddevThreshold
220
+ : DEFAULT_STDDEV_THRESHOLD;
221
+ const deltaPct =
222
+ typeof opts.deltaPct === 'number' && opts.deltaPct > 0
223
+ ? opts.deltaPct
224
+ : DEFAULT_DELTA_PCT;
225
+ // delegateFilter has three valid shapes: undefined (use default),
226
+ // null (no filtering), or a string (specific delegate slice).
227
+ let delegateFilter;
228
+ if (opts.delegateFilter === null) {
229
+ delegateFilter = null;
230
+ } else if (typeof opts.delegateFilter === 'string' && opts.delegateFilter.length > 0) {
231
+ delegateFilter = opts.delegateFilter;
232
+ } else {
233
+ delegateFilter = DEFAULT_DELEGATE_FILTER;
234
+ }
235
+ const frontmatters =
236
+ opts.frontmatters && typeof opts.frontmatters === 'object'
237
+ ? opts.frontmatters
238
+ : null;
239
+ return {
240
+ pullCountThreshold,
241
+ stddevThreshold,
242
+ deltaPct,
243
+ delegateFilter,
244
+ frontmatters,
245
+ };
246
+ }
247
+
248
+ /**
249
+ * Group arms by `(agent, bin)` key. Returns a Map keyed by
250
+ * `<agent>::<bin>` whose values are arrays of arms in that slice
251
+ * (across all tiers / delegates — `findArmsForSlice` applies the
252
+ * delegate filter downstream).
253
+ */
254
+ function groupByAgentBin(arms) {
255
+ /** @type {Map<string, {agent: string, bin: string, arms: object[]}>} */
256
+ const groups = new Map();
257
+ for (const a of arms) {
258
+ if (!a || typeof a !== 'object') continue;
259
+ if (typeof a.agent !== 'string' || a.agent.length === 0) continue;
260
+ if (typeof a.bin !== 'string' || a.bin.length === 0) continue;
261
+ const key = a.agent + '::' + a.bin;
262
+ let group = groups.get(key);
263
+ if (group === undefined) {
264
+ group = { agent: a.agent, bin: a.bin, arms: [] };
265
+ groups.set(key, group);
266
+ }
267
+ group.arms.push(a);
268
+ }
269
+ return groups;
270
+ }
271
+
272
+ /**
273
+ * Main entry point. See module-level header for contract.
274
+ *
275
+ * @param {{schema_version?: string, generated_at?: string, arms?: object[]}} posterior
276
+ * @param {{
277
+ * frontmatters: Record<string, string>,
278
+ * pullCountThreshold?: number,
279
+ * stddevThreshold?: number,
280
+ * deltaPct?: number,
281
+ * delegateFilter?: string|null,
282
+ * }} options
283
+ * @returns {object[]}
284
+ */
285
+ function analyze(posterior, options) {
286
+ if (!posterior || typeof posterior !== 'object') return [];
287
+ if (!Array.isArray(posterior.arms) || posterior.arms.length === 0) return [];
288
+
289
+ const {
290
+ pullCountThreshold,
291
+ stddevThreshold,
292
+ deltaPct,
293
+ delegateFilter,
294
+ frontmatters,
295
+ } = resolveOptions(options);
296
+
297
+ // No frontmatters → no stale-frontmatter signal can be computed.
298
+ // Silent rather than emit garbage proposals tagged "unknown current".
299
+ if (frontmatters === null) return [];
300
+
301
+ const groups = groupByAgentBin(posterior.arms);
302
+
303
+ // Iterate (agent, bin) deterministically (sorted) so output ordering
304
+ // is stable across runs and platforms — matches cost-arbitrage.cjs
305
+ // discipline; useful for snapshot tests and reproducible reflection
306
+ // files.
307
+ const sortedKeys = Array.from(groups.keys()).sort();
308
+
309
+ const proposals = [];
310
+ for (const key of sortedKeys) {
311
+ const group = groups.get(key);
312
+ if (group === undefined) continue;
313
+ const { agent, bin } = group;
314
+ const sliceArms = findArmsForSlice(group.arms, agent, bin, delegateFilter);
315
+
316
+ // Compute per-tier mean / stddev / count, restricted to the tiers
317
+ // actually present in the slice. The standard tier set is
318
+ // {haiku, sonnet, opus} but we accept any tier names the posterior
319
+ // happens to contain.
320
+ /** @type {Record<string, number>} */
321
+ const meansPerTier = {};
322
+ /** @type {Record<string, number>} */
323
+ const stddevsPerTier = {};
324
+ /** @type {Record<string, number>} */
325
+ const countsPerTier = {};
326
+ let totalPulls = 0;
327
+ for (const arm of sliceArms) {
328
+ if (typeof arm.tier !== 'string' || arm.tier.length === 0) continue;
329
+ const a = typeof arm.alpha === 'number' && Number.isFinite(arm.alpha) ? arm.alpha : 0;
330
+ const b = typeof arm.beta === 'number' && Number.isFinite(arm.beta) ? arm.beta : 0;
331
+ const c = typeof arm.count === 'number' && Number.isFinite(arm.count) ? arm.count : 0;
332
+ meansPerTier[arm.tier] = mean(a, b);
333
+ stddevsPerTier[arm.tier] = stddev(a, b);
334
+ countsPerTier[arm.tier] = c;
335
+ totalPulls += c;
336
+ }
337
+
338
+ // Skip if fewer than 2 tiers represented — no comparison possible.
339
+ const tiersPresent = Object.keys(meansPerTier);
340
+ if (tiersPresent.length < 2) continue;
341
+
342
+ // Skip if total pulls below threshold (posterior too thin to
343
+ // credibly disagree with frontmatter).
344
+ if (totalPulls < pullCountThreshold) continue;
345
+
346
+ // Identify best and second-best tier by posterior mean.
347
+ const sortedByMean = tiersPresent
348
+ .slice()
349
+ .sort((x, y) => meansPerTier[y] - meansPerTier[x]);
350
+ const bestTier = sortedByMean[0];
351
+ const secondTier = sortedByMean[1];
352
+ const bestMean = meansPerTier[bestTier];
353
+ const secondMean = meansPerTier[secondTier];
354
+
355
+ // Skip if zero-mean second-best (avoid division-by-zero and
356
+ // misleading Infinity% deltas).
357
+ if (secondMean <= 0) continue;
358
+
359
+ const delta = (bestMean - secondMean) / secondMean;
360
+ if (delta < deltaPct) continue;
361
+
362
+ // Skip if best-tier credible interval too wide.
363
+ if (stddevsPerTier[bestTier] >= stddevThreshold) continue;
364
+
365
+ // Look up frontmatter; silent if missing or already matches best.
366
+ const currentTier = frontmatters[agent];
367
+ if (typeof currentTier !== 'string' || currentTier.length === 0) continue;
368
+ if (currentTier === bestTier) continue;
369
+
370
+ // Render posterior means/stddevs across the canonical TIERS set
371
+ // (filling in undefined tiers with 0 for stable proposal shape).
372
+ /** @type {Record<string, number>} */
373
+ const posteriorMean = {};
374
+ /** @type {Record<string, number>} */
375
+ const posteriorStddev = {};
376
+ for (const t of TIERS) {
377
+ posteriorMean[t] = meansPerTier[t] === undefined ? 0 : meansPerTier[t];
378
+ posteriorStddev[t] = stddevsPerTier[t] === undefined ? 0 : stddevsPerTier[t];
379
+ }
380
+
381
+ // Use the current frontmatter tier's mean (if present in slice)
382
+ // when building the proposal text; fall back to second-best mean
383
+ // when the frontmatter tier was not pulled at all in the slice.
384
+ const meanCurrent =
385
+ meansPerTier[currentTier] !== undefined ? meansPerTier[currentTier] : secondMean;
386
+
387
+ proposals.push({
388
+ type: 'bandit_arbitrage',
389
+ agent,
390
+ bin,
391
+ current_frontmatter_tier: currentTier,
392
+ posterior_best_tier: bestTier,
393
+ posterior_mean: posteriorMean,
394
+ posterior_stddev: posteriorStddev,
395
+ pull_count: totalPulls,
396
+ proposal: buildProposalText(
397
+ agent,
398
+ bin,
399
+ currentTier,
400
+ bestTier,
401
+ bestMean,
402
+ meanCurrent,
403
+ totalPulls,
404
+ stddevsPerTier[bestTier],
405
+ ),
406
+ evidence: 'posterior_cred_int_narrow',
407
+ });
408
+ }
409
+
410
+ return proposals;
411
+ }
412
+
413
+ module.exports = {
414
+ analyze,
415
+ mean,
416
+ stddev,
417
+ findArmsForSlice,
418
+ DEFAULT_PULL_COUNT_THRESHOLD,
419
+ DEFAULT_STDDEV_THRESHOLD,
420
+ DEFAULT_DELTA_PCT,
421
+ DEFAULT_DELEGATE_FILTER,
422
+ TIERS,
423
+ };