@hegemonart/get-design-done 1.27.1 → 1.27.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +95 -0
- package/SKILL.md +1 -0
- package/agents/design-reflector.md +52 -0
- package/agents/perf-analyzer.md +166 -0
- package/hooks/budget-enforcer.ts +249 -5
- package/hooks/gdd-precompact-snapshot.js +334 -0
- package/hooks/gdd-sessionstart-recap.js +281 -0
- package/hooks/hooks.json +18 -0
- package/package.json +2 -2
- package/reference/bandit-integration.md +163 -0
- package/reference/perf-budget.md +142 -0
- package/reference/registry.json +14 -0
- package/reference/retrieval-contract.md +16 -0
- package/scripts/lib/bandit-arbitrage.cjs +423 -0
- package/scripts/lib/bandit-router/integration.cjs +309 -0
- package/scripts/lib/cache/gdd-cache-manager.cjs +292 -0
- package/scripts/lib/discuss-parallel-runner/index.ts +5 -1
- package/scripts/lib/explore-parallel-runner/index.ts +5 -1
- package/scripts/lib/parallelism-engine/concurrency-tuner.cjs +259 -0
- package/scripts/lib/parallelism-engine/concurrency-tuner.d.cts +53 -0
- package/scripts/lib/perf-analyzer/cost-regression.cjs +299 -0
- package/scripts/lib/perf-analyzer/index.cjs +139 -0
- package/scripts/lib/prompt-dedup/index.cjs +161 -0
- package/scripts/lib/session-runner/index.ts +206 -0
- package/skills/bandit-status/SKILL.md +129 -0
- package/skills/peers/SKILL.md +27 -8
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* scripts/lib/perf-analyzer/index.cjs — Plan 27.6-01
|
|
3
|
+
*
|
|
4
|
+
* Telemetry reader for the Phase 27.6 perf-analyzer reflector agent.
|
|
5
|
+
* Reads `.design/telemetry/costs.jsonl` (cost rows, Phase 10.1) and
|
|
6
|
+
* `.design/telemetry/trajectories/<cycle>.jsonl` files (agent trace
|
|
7
|
+
* lines per Phase 22).
|
|
8
|
+
*
|
|
9
|
+
* JSONL discipline (same as scripts/lib/event-stream/reader.ts):
|
|
10
|
+
* - One JSON object per line.
|
|
11
|
+
* - Blank lines / whitespace-only lines ignored silently.
|
|
12
|
+
* - Malformed lines tolerated — counted in skipped_count, NOT thrown.
|
|
13
|
+
*
|
|
14
|
+
* No external deps. Stateless. Safe to require from CommonJS callers
|
|
15
|
+
* (agents, hooks, CI gates) without dragging the gdd-state MCP graph.
|
|
16
|
+
*/
|
|
17
|
+
'use strict';
|
|
18
|
+
|
|
19
|
+
const fs = require('node:fs');
|
|
20
|
+
const path = require('node:path');
|
|
21
|
+
|
|
22
|
+
const DEFAULT_COSTS_PATH = '.design/telemetry/costs.jsonl';
|
|
23
|
+
const DEFAULT_TRAJECTORIES_DIR = '.design/telemetry/trajectories';
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Resolve a path against an optional baseDir. Absolute paths win.
|
|
27
|
+
* @param {string} p
|
|
28
|
+
* @param {string|undefined} baseDir
|
|
29
|
+
* @returns {string}
|
|
30
|
+
*/
|
|
31
|
+
function resolvePath(p, baseDir) {
|
|
32
|
+
if (path.isAbsolute(p)) return p;
|
|
33
|
+
if (baseDir) return path.join(baseDir, p);
|
|
34
|
+
return p;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Parse a JSONL file tolerantly: blank lines silently skipped,
|
|
39
|
+
* malformed lines counted in skipped_count without throwing.
|
|
40
|
+
*
|
|
41
|
+
* @param {string} contents - raw file contents (utf-8)
|
|
42
|
+
* @returns {{ rows: object[], skipped: number }}
|
|
43
|
+
*/
|
|
44
|
+
function parseJsonl(contents) {
|
|
45
|
+
const rows = [];
|
|
46
|
+
let skipped = 0;
|
|
47
|
+
const lines = contents.split(/\r?\n/);
|
|
48
|
+
for (const line of lines) {
|
|
49
|
+
if (line.trim() === '') continue;
|
|
50
|
+
try {
|
|
51
|
+
const obj = JSON.parse(line);
|
|
52
|
+
rows.push(obj);
|
|
53
|
+
} catch {
|
|
54
|
+
skipped += 1;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return { rows, skipped };
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Read `.design/telemetry/costs.jsonl` (or override) into row objects.
|
|
62
|
+
*
|
|
63
|
+
* @param {object} [opts]
|
|
64
|
+
* @param {string} [opts.path] Override (default: DEFAULT_COSTS_PATH)
|
|
65
|
+
* @param {string} [opts.sinceCycle] Drop rows with row.cycle < this string (lex)
|
|
66
|
+
* @param {string} [opts.baseDir] Resolve relative paths against this dir
|
|
67
|
+
* @returns {{ rows: object[], parsed_count: number, skipped_count: number }}
|
|
68
|
+
*/
|
|
69
|
+
function loadCosts(opts) {
|
|
70
|
+
const o = opts || {};
|
|
71
|
+
const rawPath = o.path !== undefined ? o.path : DEFAULT_COSTS_PATH;
|
|
72
|
+
const targetPath = resolvePath(rawPath, o.baseDir);
|
|
73
|
+
|
|
74
|
+
if (!fs.existsSync(targetPath)) {
|
|
75
|
+
return { rows: [], parsed_count: 0, skipped_count: 0 };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const contents = fs.readFileSync(targetPath, 'utf8');
|
|
79
|
+
const { rows: parsed, skipped: skipped_count } = parseJsonl(contents);
|
|
80
|
+
|
|
81
|
+
let rows = parsed;
|
|
82
|
+
if (o.sinceCycle !== undefined) {
|
|
83
|
+
const since = o.sinceCycle;
|
|
84
|
+
rows = parsed.filter(
|
|
85
|
+
(row) => row && typeof row.cycle === 'string' && row.cycle >= since,
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return { rows, parsed_count: rows.length, skipped_count };
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Read `.design/telemetry/trajectories/<cycle>.jsonl` files (or override
|
|
94
|
+
* directory) into a per-cycle map keyed by basename-without-extension.
|
|
95
|
+
*
|
|
96
|
+
* @param {object} [opts]
|
|
97
|
+
* @param {string} [opts.dir] Override (default: DEFAULT_TRAJECTORIES_DIR)
|
|
98
|
+
* @param {string} [opts.baseDir] Resolve relative paths against this dir
|
|
99
|
+
* @returns {{ byCycle: Record<string, object[]>, files_read: number }}
|
|
100
|
+
*/
|
|
101
|
+
function loadTrajectories(opts) {
|
|
102
|
+
const o = opts || {};
|
|
103
|
+
const rawDir = o.dir !== undefined ? o.dir : DEFAULT_TRAJECTORIES_DIR;
|
|
104
|
+
const targetDir = resolvePath(rawDir, o.baseDir);
|
|
105
|
+
|
|
106
|
+
if (!fs.existsSync(targetDir)) {
|
|
107
|
+
return { byCycle: {}, files_read: 0 };
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/** @type {Record<string, object[]>} */
|
|
111
|
+
const byCycle = {};
|
|
112
|
+
let files_read = 0;
|
|
113
|
+
|
|
114
|
+
const entries = fs.readdirSync(targetDir);
|
|
115
|
+
for (const entry of entries) {
|
|
116
|
+
if (!entry.endsWith('.jsonl')) continue;
|
|
117
|
+
const filePath = path.join(targetDir, entry);
|
|
118
|
+
let contents;
|
|
119
|
+
try {
|
|
120
|
+
contents = fs.readFileSync(filePath, 'utf8');
|
|
121
|
+
} catch {
|
|
122
|
+
// Permission / IO error on one file should not abort the whole read.
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
const cycleSlug = path.basename(entry, '.jsonl');
|
|
126
|
+
const { rows } = parseJsonl(contents);
|
|
127
|
+
byCycle[cycleSlug] = rows;
|
|
128
|
+
files_read += 1;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return { byCycle, files_read };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
module.exports = {
|
|
135
|
+
loadCosts,
|
|
136
|
+
loadTrajectories,
|
|
137
|
+
DEFAULT_COSTS_PATH,
|
|
138
|
+
DEFAULT_TRAJECTORIES_DIR,
|
|
139
|
+
};
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* scripts/lib/prompt-dedup/index.cjs — Plan 27.6-06
|
|
3
|
+
*
|
|
4
|
+
* Phase 27.6 D-11 prompt-deduplication analyzer. Detects cases where
|
|
5
|
+
* >= 3 distinct agents in the same cycle read the same reference/*.md
|
|
6
|
+
* file. Produces a preamble injection that gets prepended to the
|
|
7
|
+
* Phase 14.5 retrieval-contract preamble during cycle execution.
|
|
8
|
+
*
|
|
9
|
+
* v1.27.6 ships the analyzer + injection text builder. The event-
|
|
10
|
+
* emission side-effect is wired here for downstream consumers. The
|
|
11
|
+
* actual `reference.read` event emission from agent-read paths is
|
|
12
|
+
* deferred to a follow-up phase (this library is ready to consume
|
|
13
|
+
* those events when they exist).
|
|
14
|
+
*
|
|
15
|
+
* No external deps. Pure analyzer + lazy event-stream require.
|
|
16
|
+
*/
|
|
17
|
+
'use strict';
|
|
18
|
+
|
|
19
|
+
const DEFAULT_THRESHOLD = 3; // D-11 — '>= 3 agents'
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Lazy require for the event-stream appendEvent helper. Returns a
|
|
23
|
+
* no-op if event-stream is unavailable so emitDedupInjection can be
|
|
24
|
+
* called in tests / Codex no-PreCompact paths without throwing.
|
|
25
|
+
*
|
|
26
|
+
* @returns {(ev: object) => void}
|
|
27
|
+
*/
|
|
28
|
+
function getAppendEvent() {
|
|
29
|
+
try {
|
|
30
|
+
const m = require('../event-stream');
|
|
31
|
+
if (m && typeof m.appendEvent === 'function') return m.appendEvent;
|
|
32
|
+
} catch { /* swallow — event-stream not on path */ }
|
|
33
|
+
return function noopAppend(_ev) {};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Detect reference/*.md files that have been read by >= threshold
|
|
38
|
+
* distinct agents in the same cycle. The detection is pure — it
|
|
39
|
+
* consumes an in-memory events array and returns a structured result.
|
|
40
|
+
*
|
|
41
|
+
* @param {object} [opts]
|
|
42
|
+
* @param {Array<object>} [opts.events] Event-stream entries (any shape)
|
|
43
|
+
* @param {number} [opts.threshold] Override DEFAULT_THRESHOLD (3)
|
|
44
|
+
* @param {string} [opts.cycle] Filter — only consider events
|
|
45
|
+
* whose event.cycle === this value
|
|
46
|
+
* @returns {{duplicates: Array<{ref_path: string, agents: string[], hash?: string, cycle?: string}>}}
|
|
47
|
+
*/
|
|
48
|
+
function detectDuplicateReferenceReads({ events, threshold, cycle } = {}) {
|
|
49
|
+
const list = Array.isArray(events) ? events : [];
|
|
50
|
+
const N = typeof threshold === 'number' && threshold >= 1
|
|
51
|
+
? Math.floor(threshold)
|
|
52
|
+
: DEFAULT_THRESHOLD;
|
|
53
|
+
const cycleFilter = typeof cycle === 'string' && cycle.length > 0 ? cycle : null;
|
|
54
|
+
|
|
55
|
+
// Group by (cycle, ref_path) → Set<agent>
|
|
56
|
+
const groups = new Map();
|
|
57
|
+
for (const ev of list) {
|
|
58
|
+
if (!ev || ev.type !== 'reference.read') continue;
|
|
59
|
+
if (!ev.payload || typeof ev.payload.ref_path !== 'string' || typeof ev.payload.agent !== 'string') continue;
|
|
60
|
+
const evCycle = typeof ev.cycle === 'string'
|
|
61
|
+
? ev.cycle
|
|
62
|
+
: (typeof ev.payload.cycle === 'string' ? ev.payload.cycle : '');
|
|
63
|
+
if (cycleFilter !== null && evCycle !== cycleFilter) continue;
|
|
64
|
+
const key = evCycle + ' ' + ev.payload.ref_path;
|
|
65
|
+
let group = groups.get(key);
|
|
66
|
+
if (!group) {
|
|
67
|
+
group = { cycle: evCycle, ref_path: ev.payload.ref_path, agents: new Set(), hash: undefined };
|
|
68
|
+
groups.set(key, group);
|
|
69
|
+
}
|
|
70
|
+
group.agents.add(ev.payload.agent);
|
|
71
|
+
if (typeof ev.payload.content_hash === 'string' && !group.hash) {
|
|
72
|
+
group.hash = ev.payload.content_hash;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const duplicates = [];
|
|
77
|
+
for (const group of groups.values()) {
|
|
78
|
+
if (group.agents.size >= N) {
|
|
79
|
+
duplicates.push({
|
|
80
|
+
ref_path: group.ref_path,
|
|
81
|
+
agents: [...group.agents].sort(),
|
|
82
|
+
hash: group.hash,
|
|
83
|
+
cycle: group.cycle || undefined,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
duplicates.sort((a, b) => a.ref_path.localeCompare(b.ref_path));
|
|
88
|
+
return { duplicates };
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Build the markdown preamble injection text that gets prepended to
|
|
93
|
+
* the Phase 14.5 retrieval-contract preamble during cycle execution.
|
|
94
|
+
* Returns an empty string when duplicates is empty (no injection).
|
|
95
|
+
*
|
|
96
|
+
* @param {object} [opts]
|
|
97
|
+
* @param {Array<object>} [opts.duplicates] From detectDuplicateReferenceReads
|
|
98
|
+
* @param {string} [opts.sessionId] Optional breadcrumb
|
|
99
|
+
* @returns {string}
|
|
100
|
+
*/
|
|
101
|
+
function buildPreambleInjection({ duplicates, sessionId } = {}) {
|
|
102
|
+
const list = Array.isArray(duplicates) ? duplicates : [];
|
|
103
|
+
if (list.length === 0) return '';
|
|
104
|
+
const lines = [
|
|
105
|
+
'## Shared Context (Phase 27.6 dedup)',
|
|
106
|
+
'',
|
|
107
|
+
'The following reference files have been read by >= 3 agents in this cycle and are now loaded ONCE as shared context. Subsequent agents see a content-hash reference instead of the full file body:',
|
|
108
|
+
'',
|
|
109
|
+
];
|
|
110
|
+
for (const d of list) {
|
|
111
|
+
const hashSuffix = d.hash ? ` [hash: ${d.hash}]` : '';
|
|
112
|
+
lines.push(`- \`${d.ref_path}\` (read by: ${d.agents.join(', ')})${hashSuffix}`);
|
|
113
|
+
}
|
|
114
|
+
lines.push('');
|
|
115
|
+
lines.push('To opt out of dedup for a specific read, set `GDD_DEDUP_OPT_OUT=1` in the agent\'s environment.');
|
|
116
|
+
lines.push('');
|
|
117
|
+
// sessionId is consumed as a breadcrumb hint; not embedded in the
|
|
118
|
+
// preamble text by default to keep the markdown minimal.
|
|
119
|
+
if (typeof sessionId === 'string' && sessionId.length > 0) {
|
|
120
|
+
lines.push(`<!-- dedup-session: ${sessionId} -->`);
|
|
121
|
+
lines.push('');
|
|
122
|
+
}
|
|
123
|
+
return lines.join('\n');
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Emit one `dedup.injection` event per duplicate via the event-stream
|
|
128
|
+
* appendEvent helper. Lazy-required; safe when event-stream is
|
|
129
|
+
* unavailable (no-op fallback). Returns void.
|
|
130
|
+
*
|
|
131
|
+
* @param {object} [opts]
|
|
132
|
+
* @param {Array<object>} [opts.duplicates]
|
|
133
|
+
* @param {string} [opts.sessionId]
|
|
134
|
+
* @returns {void}
|
|
135
|
+
*/
|
|
136
|
+
function emitDedupInjection({ duplicates, sessionId } = {}) {
|
|
137
|
+
const list = Array.isArray(duplicates) ? duplicates : [];
|
|
138
|
+
if (list.length === 0) return;
|
|
139
|
+
const append = getAppendEvent();
|
|
140
|
+
for (const d of list) {
|
|
141
|
+
append({
|
|
142
|
+
type: 'dedup.injection',
|
|
143
|
+
timestamp: new Date().toISOString(),
|
|
144
|
+
sessionId: typeof sessionId === 'string' && sessionId.length > 0 ? sessionId : 'prompt-dedup',
|
|
145
|
+
payload: {
|
|
146
|
+
ref_path: d.ref_path,
|
|
147
|
+
agents: d.agents,
|
|
148
|
+
agent_count: d.agents.length,
|
|
149
|
+
content_hash: d.hash,
|
|
150
|
+
cycle: d.cycle,
|
|
151
|
+
},
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
module.exports = {
|
|
157
|
+
detectDuplicateReferenceReads,
|
|
158
|
+
buildPreambleInjection,
|
|
159
|
+
emitDedupInjection,
|
|
160
|
+
DEFAULT_THRESHOLD,
|
|
161
|
+
};
|
|
@@ -80,12 +80,130 @@ const rateGuard = _nodeRequire(
|
|
|
80
80
|
ingestHeaders: (provider: string, headers: unknown) => Promise<unknown>;
|
|
81
81
|
};
|
|
82
82
|
|
|
83
|
+
// ── Plan 27.5-03 — Bandit posterior feedback loop ────────────────────────────
|
|
84
|
+
//
|
|
85
|
+
// `integration.cjs` is the Phase 27.5-01 production-integration shim for the
|
|
86
|
+
// Phase 23.5 bandit posterior. It exposes `recordOutcome({agent, bin, delegate,
|
|
87
|
+
// tier, status, costUsd, adaptiveMode, baseDir?, posteriorPath?})` which writes
|
|
88
|
+
// the (status + cost) reward back to the posterior arm for the (agent × bin ×
|
|
89
|
+
// tier × delegate) joint. Per CONTEXT D-04, the call fires AFTER every
|
|
90
|
+
// `emit('session.completed', …)` site so the posterior reflects the measured
|
|
91
|
+
// signal — correctness + cost.
|
|
92
|
+
//
|
|
93
|
+
// The shim is no-throw (best-effort write). The session-runner wraps each
|
|
94
|
+
// `recordOutcome` call in its own try/catch as a defensive belt-and-braces
|
|
95
|
+
// guard against future shim changes.
|
|
96
|
+
const banditIntegration = _nodeRequire(
|
|
97
|
+
_resolve(_REPO_ROOT, 'scripts/lib/bandit-router/integration.cjs'),
|
|
98
|
+
) as {
|
|
99
|
+
recordOutcome: (input: {
|
|
100
|
+
agent: string;
|
|
101
|
+
bin: string;
|
|
102
|
+
delegate?: string;
|
|
103
|
+
tier: string;
|
|
104
|
+
status: string;
|
|
105
|
+
costUsd?: number;
|
|
106
|
+
adaptiveMode?: 'static' | 'hedge' | 'full';
|
|
107
|
+
baseDir?: string;
|
|
108
|
+
posteriorPath?: string;
|
|
109
|
+
}) => void;
|
|
110
|
+
DELEGATE_NONE: string;
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
// ── Plan 27.5-03 — adaptive-mode read once per run ──────────────────────────
|
|
114
|
+
//
|
|
115
|
+
// `adaptive-mode.cjs.getMode({quiet: true})` reads `.design/budget.json` and
|
|
116
|
+
// returns `'static' | 'hedge' | 'full'`. We cache the resolved mode locally
|
|
117
|
+
// on each `run()` invocation so the recordOutcome calls at the 4 terminal
|
|
118
|
+
// emit sites all see the same value (consistent gating per session).
|
|
119
|
+
const adaptiveModeLib = _nodeRequire(
|
|
120
|
+
_resolve(_REPO_ROOT, 'scripts/lib/adaptive-mode.cjs'),
|
|
121
|
+
) as {
|
|
122
|
+
getMode: (opts?: { baseDir?: string; budgetPath?: string; quiet?: boolean }) => 'static' | 'hedge' | 'full';
|
|
123
|
+
};
|
|
124
|
+
|
|
83
125
|
/** Rate-guard provider key for the Anthropic Agent SDK. */
|
|
84
126
|
const RATE_GUARD_PROVIDER = 'anthropic';
|
|
85
127
|
|
|
86
128
|
/** Default retries (first attempt + 1 retry). */
|
|
87
129
|
const DEFAULT_MAX_RETRIES = 2;
|
|
88
130
|
|
|
131
|
+
/**
|
|
132
|
+
* Default bin marker for bandit posterior writes from session-runner.
|
|
133
|
+
*
|
|
134
|
+
* Per CONTEXT D-12, session-runner uses a deterministic placeholder bin
|
|
135
|
+
* (`'medium'`) for now; real complexity-class-based bin selection is
|
|
136
|
+
* deferred to a later plan. This matches the 27.5-02 budget-enforcer
|
|
137
|
+
* convention so the (agent × bin) posterior slices stay consistent
|
|
138
|
+
* across both write paths.
|
|
139
|
+
*/
|
|
140
|
+
const SESSION_RUNNER_DEFAULT_BIN = 'medium';
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Infer a tier ('opus' | 'sonnet' | 'haiku') from a model identifier.
|
|
144
|
+
*
|
|
145
|
+
* Used at the 4 terminal-emit sites where the final tier isn't already
|
|
146
|
+
* carried on `opts` — we fall back to inspecting `usage.model` (folded
|
|
147
|
+
* during the run loop from SDK chunks). Unknown / empty model names
|
|
148
|
+
* default to 'sonnet' (matches the DEFAULT_MODEL_RATE choice and is
|
|
149
|
+
* the safest middle tier for posterior arms).
|
|
150
|
+
*/
|
|
151
|
+
function tierFromModel(modelName: string | null | undefined): 'opus' | 'sonnet' | 'haiku' {
|
|
152
|
+
if (typeof modelName !== 'string' || modelName.length === 0) return 'sonnet';
|
|
153
|
+
const lower = modelName.toLowerCase();
|
|
154
|
+
if (lower.includes('opus')) return 'opus';
|
|
155
|
+
if (lower.includes('haiku')) return 'haiku';
|
|
156
|
+
return 'sonnet';
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Best-effort bandit posterior write following `emit('session.completed', …)`.
|
|
161
|
+
*
|
|
162
|
+
* Per CONTEXT D-04: posterior updates happen AT the terminal emit site so the
|
|
163
|
+
* recorded reward reflects the same (status + cost) the rest of the system
|
|
164
|
+
* just observed. The shim (`integration.cjs`) is no-throw and short-circuits
|
|
165
|
+
* silently in static/hedge mode; the outer try/catch here is a defensive
|
|
166
|
+
* belt-and-braces guard for any future shim change.
|
|
167
|
+
*
|
|
168
|
+
* Failures NEVER bubble out — the session-runner contract is that `run()`
|
|
169
|
+
* never throws, and that contract MUST hold even when telemetry is broken.
|
|
170
|
+
*/
|
|
171
|
+
function _recordBanditOutcome(input: {
|
|
172
|
+
agent: string;
|
|
173
|
+
bin: string;
|
|
174
|
+
delegate: string;
|
|
175
|
+
tier: string;
|
|
176
|
+
status: string;
|
|
177
|
+
costUsd: number;
|
|
178
|
+
adaptiveMode: 'static' | 'hedge' | 'full';
|
|
179
|
+
}): void {
|
|
180
|
+
try {
|
|
181
|
+
banditIntegration.recordOutcome({
|
|
182
|
+
agent: input.agent,
|
|
183
|
+
bin: input.bin,
|
|
184
|
+
delegate: input.delegate,
|
|
185
|
+
tier: input.tier,
|
|
186
|
+
status: input.status,
|
|
187
|
+
costUsd: input.costUsd,
|
|
188
|
+
adaptiveMode: input.adaptiveMode,
|
|
189
|
+
});
|
|
190
|
+
} catch (err) {
|
|
191
|
+
// Defensive: shim is no-throw, but a future change could regress.
|
|
192
|
+
// Telemetry failure must never break a session — swallow.
|
|
193
|
+
if (process.env['GDD_BANDIT_DEBUG'] === '1') {
|
|
194
|
+
try {
|
|
195
|
+
process.stderr.write(
|
|
196
|
+
'[session-runner] _recordBanditOutcome swallowed: ' +
|
|
197
|
+
(err instanceof Error ? err.message : String(err)) +
|
|
198
|
+
'\n',
|
|
199
|
+
);
|
|
200
|
+
} catch {
|
|
201
|
+
/* swallow */
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
89
207
|
// ── Plan 27-06 — Peer-CLI delegation primitives ─────────────────────────────
|
|
90
208
|
//
|
|
91
209
|
// Lazy registry loader: the registry is a .cjs module under scripts/lib/peer-cli
|
|
@@ -680,6 +798,22 @@ export async function run(opts: SessionRunnerOptions): Promise<SessionResult> {
|
|
|
680
798
|
const toolCalls: SessionResult['tool_calls'] = [];
|
|
681
799
|
const usage = { input: 0, output: 0, model: null as string | null };
|
|
682
800
|
let turns = 0;
|
|
801
|
+
|
|
802
|
+
// -- 3a. Resolve adaptive-mode once for the entire session (Plan 27.5-03). --
|
|
803
|
+
// Cached locally so all four `recordOutcome()` call sites below see the
|
|
804
|
+
// same gating decision (consistent posterior-write semantics across
|
|
805
|
+
// rate-limit, peer, turnCap=0, and terminal-completion paths).
|
|
806
|
+
//
|
|
807
|
+
// Wrapped in try/catch because adaptive-mode.getMode reads
|
|
808
|
+
// `.design/budget.json`; a broken fs.readFile / JSON.parse must not
|
|
809
|
+
// crash the session before it even starts. Fallback = 'static' which
|
|
810
|
+
// short-circuits the recordOutcome shim (no-op).
|
|
811
|
+
let adaptiveMode: 'static' | 'hedge' | 'full' = 'static';
|
|
812
|
+
try {
|
|
813
|
+
adaptiveMode = adaptiveModeLib.getMode({ quiet: true });
|
|
814
|
+
} catch {
|
|
815
|
+
// swallow — fallback to 'static' means no posterior writes
|
|
816
|
+
}
|
|
683
817
|
let finalText: string | undefined;
|
|
684
818
|
|
|
685
819
|
// -- 4. Emit session.started. -------------------------------------------
|
|
@@ -719,6 +853,20 @@ export async function run(opts: SessionRunnerOptions): Promise<SessionResult> {
|
|
|
719
853
|
transcript_path: transcriptPath,
|
|
720
854
|
sanitizer: { applied: [...result.sanitizer.applied], removedSections: [...result.sanitizer.removedSections] },
|
|
721
855
|
});
|
|
856
|
+
// Plan 27.5-03 — feedback loop. Posterior records the
|
|
857
|
+
// measured outcome (status + cost) for the (agent × bin × tier × delegate)
|
|
858
|
+
// slice. The rate-limit preflight failure path has no peer dispatch and no
|
|
859
|
+
// usage data (zero cost), so delegate=DELEGATE_NONE and tier falls back to
|
|
860
|
+
// 'sonnet' via tierFromModel(null). Shim no-ops in static/hedge mode.
|
|
861
|
+
_recordBanditOutcome({
|
|
862
|
+
agent: opts.stage,
|
|
863
|
+
bin: SESSION_RUNNER_DEFAULT_BIN,
|
|
864
|
+
delegate: banditIntegration.DELEGATE_NONE,
|
|
865
|
+
tier: tierFromModel(usage.model),
|
|
866
|
+
status: result.status,
|
|
867
|
+
costUsd: result.usage.usd_cost,
|
|
868
|
+
adaptiveMode,
|
|
869
|
+
});
|
|
722
870
|
transcript.close();
|
|
723
871
|
return result;
|
|
724
872
|
}
|
|
@@ -765,6 +913,37 @@ export async function run(opts: SessionRunnerOptions): Promise<SessionResult> {
|
|
|
765
913
|
transcript_path: transcriptPath,
|
|
766
914
|
sanitizer: { applied: [...peerResult.sanitizer.applied], removedSections: [...peerResult.sanitizer.removedSections] },
|
|
767
915
|
});
|
|
916
|
+
// Plan 27.5-03 — feedback loop, peer path. The
|
|
917
|
+
// delegate dimension is the peer name parsed from opts.delegateTo (e.g.
|
|
918
|
+
// 'gemini-research' → 'gemini'). Per CONTEXT D-04 we use the peer name
|
|
919
|
+
// for the delegate slice of the posterior so peer-success arms get the
|
|
920
|
+
// reward signal separately from local arms. Tier is 'sonnet' by default
|
|
921
|
+
// since the peer adapter doesn't surface a model identifier in v1.27.
|
|
922
|
+
// Re-parse opts.delegateTo here — tryDelegate already validated it but
|
|
923
|
+
// didn't expose the peer name on the returned SessionResult.
|
|
924
|
+
const _peerParsed = parseDelegateTo(opts.delegateTo);
|
|
925
|
+
const _delegate = _peerParsed !== null
|
|
926
|
+
? _peerParsed.peer
|
|
927
|
+
: banditIntegration.DELEGATE_NONE;
|
|
928
|
+
// Tier resolution priority for the peer path:
|
|
929
|
+
// 1. opts.delegateTier when it's a bare tier name (opus/sonnet/haiku)
|
|
930
|
+
// 2. tierFromModel(opts.delegateTier) when it's a model id string
|
|
931
|
+
// 3. tierFromModel(usage.model) fallback
|
|
932
|
+
// tierFromModel() is safe for any string and returns 'sonnet' on miss,
|
|
933
|
+
// so the second branch covers both bare-tier and model-id inputs.
|
|
934
|
+
const _peerTier: 'opus' | 'sonnet' | 'haiku' =
|
|
935
|
+
typeof opts.delegateTier === 'string' && opts.delegateTier.length > 0
|
|
936
|
+
? tierFromModel(opts.delegateTier)
|
|
937
|
+
: tierFromModel(usage.model);
|
|
938
|
+
_recordBanditOutcome({
|
|
939
|
+
agent: opts.stage,
|
|
940
|
+
bin: SESSION_RUNNER_DEFAULT_BIN,
|
|
941
|
+
delegate: _delegate,
|
|
942
|
+
tier: _peerTier,
|
|
943
|
+
status: peerResult.status,
|
|
944
|
+
costUsd: peerResult.usage.usd_cost,
|
|
945
|
+
adaptiveMode,
|
|
946
|
+
});
|
|
768
947
|
transcript.close();
|
|
769
948
|
if (opts.signal !== undefined) opts.signal.removeEventListener('abort', onExternalAbort);
|
|
770
949
|
return peerResult;
|
|
@@ -796,6 +975,18 @@ export async function run(opts: SessionRunnerOptions): Promise<SessionResult> {
|
|
|
796
975
|
transcript_path: transcriptPath,
|
|
797
976
|
sanitizer: { applied: [...sanResult.applied], removedSections: [...sanResult.removedSections] },
|
|
798
977
|
});
|
|
978
|
+
// Plan 27.5-03 — feedback loop, turnCap=0 path. No
|
|
979
|
+
// SDK call was ever made, so no peer involvement and no model id was
|
|
980
|
+
// ever observed. Reward will be 0 (status !== 'completed') with cost 0.
|
|
981
|
+
_recordBanditOutcome({
|
|
982
|
+
agent: opts.stage,
|
|
983
|
+
bin: SESSION_RUNNER_DEFAULT_BIN,
|
|
984
|
+
delegate: banditIntegration.DELEGATE_NONE,
|
|
985
|
+
tier: tierFromModel(usage.model),
|
|
986
|
+
status,
|
|
987
|
+
costUsd: result.usage.usd_cost,
|
|
988
|
+
adaptiveMode,
|
|
989
|
+
});
|
|
799
990
|
transcript.close();
|
|
800
991
|
if (opts.signal !== undefined) opts.signal.removeEventListener('abort', onExternalAbort);
|
|
801
992
|
return result;
|
|
@@ -890,6 +1081,21 @@ export async function run(opts: SessionRunnerOptions): Promise<SessionResult> {
|
|
|
890
1081
|
transcript_path: transcriptPath,
|
|
891
1082
|
sanitizer: { applied: [...sanResult.applied], removedSections: [...sanResult.removedSections] },
|
|
892
1083
|
});
|
|
1084
|
+
// Plan 27.5-03 — feedback loop, terminal main-loop path.
|
|
1085
|
+
// This is the dominant write site: covers natural completion, budget cap,
|
|
1086
|
+
// turn cap (after first turn), abort, and error (post-retry-exhaustion).
|
|
1087
|
+
// Tier is inferred from the model actually observed during the run
|
|
1088
|
+
// (usage.model). Delegate=DELEGATE_NONE because tryDelegate either returned
|
|
1089
|
+
// null (we wouldn't be here otherwise) or wasn't invoked at all.
|
|
1090
|
+
_recordBanditOutcome({
|
|
1091
|
+
agent: opts.stage,
|
|
1092
|
+
bin: SESSION_RUNNER_DEFAULT_BIN,
|
|
1093
|
+
delegate: banditIntegration.DELEGATE_NONE,
|
|
1094
|
+
tier: tierFromModel(usage.model),
|
|
1095
|
+
status: result.status,
|
|
1096
|
+
costUsd: result.usage.usd_cost,
|
|
1097
|
+
adaptiveMode,
|
|
1098
|
+
});
|
|
893
1099
|
|
|
894
1100
|
return result;
|
|
895
1101
|
}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: gdd-bandit-status
|
|
3
|
+
description: "Surface read-only per-(agent, bin, delegate) bandit posterior snapshot — alpha/beta/mean/stddev/count/last-used per arm. Phase 27.5 (v1.27.5) diagnostic. Use when investigating 'why did the bandit pick tier X for agent Y?' or when verifying posterior convergence after enabling adaptive_mode: full."
|
|
4
|
+
argument-hint: ""
|
|
5
|
+
tools: Read, Bash
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# gdd-bandit-status
|
|
9
|
+
|
|
10
|
+
## Role
|
|
11
|
+
|
|
12
|
+
You are a deterministic, read-only diagnostic skill. You do not spawn agents and do not modify the bandit posterior. You read `.design/telemetry/posterior.json` (the path declared by `scripts/lib/bandit-router.cjs`'s `DEFAULT_POSTERIOR_PATH` constant), aggregate per-`(agent, bin, delegate, tier)` arm state, and emit a single Markdown table summarizing the posterior. The user runs this when they want to inspect bandit decisions without touching the posterior.
|
|
13
|
+
|
|
14
|
+
Strictly read-only per Phase 27.5 D-11. To reset the posterior, use `/gdd:bandit-reset` from Phase 23.5.
|
|
15
|
+
|
|
16
|
+
## Invocation Contract
|
|
17
|
+
|
|
18
|
+
- **Input**: none. The skill takes no arguments.
|
|
19
|
+
- **Output**: a Markdown bandit-status table to stdout. No JSON wrapper. The table is the entire output.
|
|
20
|
+
|
|
21
|
+
## Procedure
|
|
22
|
+
|
|
23
|
+
### 1. Locate the posterior file
|
|
24
|
+
|
|
25
|
+
Read `.design/telemetry/posterior.json`. If the file does not exist:
|
|
26
|
+
|
|
27
|
+
- Emit the empty-state message:
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
## Bandit Posterior Snapshot
|
|
31
|
+
|
|
32
|
+
No posterior data yet — run a few pipeline cycles with `adaptive_mode: full` first.
|
|
33
|
+
|
|
34
|
+
No posterior data found at `.design/telemetry/posterior.json`.
|
|
35
|
+
|
|
36
|
+
Possible reasons:
|
|
37
|
+
- `adaptive_mode` is `static` or `hedge` (bandit is silent — see `.design/budget.json` `adaptive_mode` setting).
|
|
38
|
+
- No spawns have fired since Phase 27.5 wiring landed.
|
|
39
|
+
- Posterior was cleared via `/gdd:bandit-reset`.
|
|
40
|
+
|
|
41
|
+
See `reference/bandit-integration.md` for setup guidance.
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
- Skip to Section 4 (Record).
|
|
45
|
+
|
|
46
|
+
### 2. Parse the posterior
|
|
47
|
+
|
|
48
|
+
Parse the file as JSON. If parsing fails (truncated/corrupted file), emit:
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
## Bandit Posterior Snapshot
|
|
52
|
+
|
|
53
|
+
Posterior file at `.design/telemetry/posterior.json` exists but is unparseable (truncated or corrupted).
|
|
54
|
+
|
|
55
|
+
Run `/gdd:bandit-reset` to start fresh, or restore from a backup.
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
The posterior schema is:
|
|
59
|
+
|
|
60
|
+
```json
|
|
61
|
+
{
|
|
62
|
+
"schema_version": "1.0.0",
|
|
63
|
+
"generated_at": "<ISO timestamp>",
|
|
64
|
+
"arms": [
|
|
65
|
+
{ "agent": "...", "bin": "...", "tier": "...", "delegate": "...", "alpha": N, "beta": N, "last_used": "...", "count": N }
|
|
66
|
+
]
|
|
67
|
+
}
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
The `delegate` field is optional — when absent, the arm is the Phase 23.5 legacy slice (equivalent to `delegate: 'none'`). The status output renders `delegate: '-'` for legacy arms to distinguish them visually from explicit `'none'` arms.
|
|
71
|
+
|
|
72
|
+
### 3. Render the table
|
|
73
|
+
|
|
74
|
+
Compute per arm:
|
|
75
|
+
|
|
76
|
+
- `mean = alpha / (alpha + beta)` (rounded to 3 decimals)
|
|
77
|
+
- `stddev = sqrt(alpha * beta / ((alpha + beta)^2 * (alpha + beta + 1)))` (rounded to 3 decimals)
|
|
78
|
+
|
|
79
|
+
Sort arms by `(agent ascending, bin ascending, delegate ascending where '-' sorts first, tier ascending where opus < sonnet < haiku is the canonical tier ordering, last_used descending tiebreaker)`. Group rows by agent for readability.
|
|
80
|
+
|
|
81
|
+
Emit:
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
## Bandit Posterior Snapshot
|
|
85
|
+
|
|
86
|
+
Per-(agent, bin, delegate, tier) posterior state. Read-only — to reset the posterior, use `/gdd:bandit-reset` (Phase 23.5).
|
|
87
|
+
|
|
88
|
+
Posterior file: `.design/telemetry/posterior.json` (last updated: <generated_at>)
|
|
89
|
+
Total arms: <count>
|
|
90
|
+
|
|
91
|
+
| Agent | Bin | Delegate | Tier | Alpha | Beta | Mean | Stddev | Count | Last Used |
|
|
92
|
+
|-----------------|--------|----------|--------|-------|-------|-------|--------|-------|----------------------|
|
|
93
|
+
| <agent> | <bin> | <deleg> | <tier> | <a> | <b> | <m> | <s> | <c> | <last_used or '-'> |
|
|
94
|
+
|
|
95
|
+
> Mean = alpha / (alpha + beta). Stddev = sqrt(alpha*beta / ((alpha+beta)^2 * (alpha+beta+1))).
|
|
96
|
+
> Delegate '-' = Phase 23.5 legacy slice (equivalent to 'none').
|
|
97
|
+
> See `reference/bandit-integration.md` for interpretation.
|
|
98
|
+
> Read-only — use `/gdd:bandit-reset` to clear posterior state.
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Format numbers to fixed precision: alpha/beta to 2 decimals, mean/stddev to 3 decimals, count as integer, last_used truncated to the minute precision (`YYYY-MM-DDTHH:MM`).
|
|
102
|
+
|
|
103
|
+
When `last_used` is null (arm exists but never selected — possible if the arm was created by `ensureArm` without a subsequent `pull`), render `-` in the Last Used column.
|
|
104
|
+
|
|
105
|
+
After the table, surface a brief best-arm summary per `(agent, bin)` slice — for each unique `(agent, bin)` pair, identify the arm with the highest `mean` (tie-broken by `count` descending) and display it as the "best-arm" recommendation. This helps the operator answer "why did the bandit pick tier X?" at a glance.
|
|
106
|
+
|
|
107
|
+
### 4. Record
|
|
108
|
+
|
|
109
|
+
After execution, append one JSONL line to `.design/skill-records.jsonl`:
|
|
110
|
+
|
|
111
|
+
```json
|
|
112
|
+
{"skill": "gdd-bandit-status", "ts": "<ISO timestamp>", "arms_seen": <count>, "posterior_present": <bool>}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
The skill writes ONLY to `.design/skill-records.jsonl` for telemetry purposes. It never touches `.design/telemetry/posterior.json`.
|
|
116
|
+
|
|
117
|
+
## Cross-references
|
|
118
|
+
|
|
119
|
+
- `scripts/lib/bandit-router.cjs` (Phase 23.5) — posterior shape, `DEFAULT_POSTERIOR_PATH` constant, `loadPosterior()` helper.
|
|
120
|
+
- `scripts/lib/bandit-router/integration.cjs` (Phase 27.5-01) — production-integration shim.
|
|
121
|
+
- `hooks/budget-enforcer.ts` (Phase 27.5-02) — bandit consultation site.
|
|
122
|
+
- `scripts/lib/session-runner/index.ts` (Phase 27.5-03) — outcome recording site.
|
|
123
|
+
- `scripts/lib/bandit-arbitrage.cjs` (Phase 27.5-04) — automated stale-frontmatter analysis.
|
|
124
|
+
- `reference/bandit-integration.md` (Phase 27.5-06) — operator guide.
|
|
125
|
+
- `/gdd:bandit-reset` (Phase 23.5) — the ONLY surface that mutates the posterior.
|
|
126
|
+
|
|
127
|
+
## Record
|
|
128
|
+
|
|
129
|
+
See Section 4 above.
|