@hegemonart/get-design-done 1.27.1 → 1.27.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +95 -0
- package/SKILL.md +1 -0
- package/agents/design-reflector.md +52 -0
- package/agents/perf-analyzer.md +166 -0
- package/hooks/budget-enforcer.ts +249 -5
- package/hooks/gdd-precompact-snapshot.js +334 -0
- package/hooks/gdd-sessionstart-recap.js +281 -0
- package/hooks/hooks.json +18 -0
- package/package.json +2 -2
- package/reference/bandit-integration.md +163 -0
- package/reference/perf-budget.md +142 -0
- package/reference/registry.json +14 -0
- package/reference/retrieval-contract.md +16 -0
- package/scripts/lib/bandit-arbitrage.cjs +423 -0
- package/scripts/lib/bandit-router/integration.cjs +309 -0
- package/scripts/lib/cache/gdd-cache-manager.cjs +292 -0
- package/scripts/lib/discuss-parallel-runner/index.ts +5 -1
- package/scripts/lib/explore-parallel-runner/index.ts +5 -1
- package/scripts/lib/parallelism-engine/concurrency-tuner.cjs +259 -0
- package/scripts/lib/parallelism-engine/concurrency-tuner.d.cts +53 -0
- package/scripts/lib/perf-analyzer/cost-regression.cjs +299 -0
- package/scripts/lib/perf-analyzer/index.cjs +139 -0
- package/scripts/lib/prompt-dedup/index.cjs +161 -0
- package/scripts/lib/session-runner/index.ts +206 -0
- package/skills/bandit-status/SKILL.md +129 -0
- package/skills/peers/SKILL.md +27 -8
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* scripts/lib/parallelism-engine/concurrency-tuner.cjs — Plan 27.6-04
|
|
3
|
+
*
|
|
4
|
+
* Data-driven concurrency resolver per Phase 27.6 D-07. Reads the
|
|
5
|
+
* most-recent `parallelism.verdict` event from .design/telemetry/
|
|
6
|
+
* events.jsonl (Phase 22 stream) and computes:
|
|
7
|
+
*
|
|
8
|
+
* resolveConcurrency = max(1, min(min(cpu-1, last_observed), ceiling))
|
|
9
|
+
*
|
|
10
|
+
* where:
|
|
11
|
+
* cpu = os.cpus().length (override via cpuCount opt)
|
|
12
|
+
* last_observed = payload.observed_concurrency from the latest
|
|
13
|
+
* parallelism.verdict event (null if absent)
|
|
14
|
+
* ceiling = process.env.GDD_CONCURRENCY_CEILING (default 8)
|
|
15
|
+
*
|
|
16
|
+
* Hard ceiling of 8 prevents pathological process-spawn storms on
|
|
17
|
+
* high-core machines (D-07 wording: "Hard ceiling prevents pathological
|
|
18
|
+
* process-spawn storms").
|
|
19
|
+
*
|
|
20
|
+
* Public surface:
|
|
21
|
+
* * resolveConcurrency({cpuCount?, lastObservedOptimum?, hardCeiling?,
|
|
22
|
+
* eventsPath?, baseDir?}) -> number (>=1)
|
|
23
|
+
* * readLastObservedOptimum({eventsPath?, baseDir?}) -> number|null
|
|
24
|
+
* * emitParallelismVerdict({task_ids, verdict, reason,
|
|
25
|
+
* intended_concurrency?, observed_concurrency?,
|
|
26
|
+
* contention_detected?, wall_clock_ms?,
|
|
27
|
+
* sessionId?}) -> void
|
|
28
|
+
* * DEFAULT_HARD_CEILING (=8)
|
|
29
|
+
* * DEFAULT_EVENTS_PATH (='.design/telemetry/events.jsonl')
|
|
30
|
+
*
|
|
31
|
+
* The `parallelism.verdict` payload extension is purely additive
|
|
32
|
+
* (`intended_concurrency`, `observed_concurrency`, `contention_detected`,
|
|
33
|
+
* `wall_clock_ms` are all optional). Existing consumers that only read
|
|
34
|
+
* `{task_ids, verdict, reason}` keep working unchanged.
|
|
35
|
+
*
|
|
36
|
+
* No external deps. Lazy event-stream require for emit (best-effort
|
|
37
|
+
* telemetry — a failed event-stream load must not break the resolver).
|
|
38
|
+
*/
|
|
39
|
+
'use strict';
|
|
40
|
+
|
|
41
|
+
const fs = require('node:fs');
|
|
42
|
+
const path = require('node:path');
|
|
43
|
+
const os = require('node:os');
|
|
44
|
+
|
|
45
|
+
const DEFAULT_HARD_CEILING = 8;
|
|
46
|
+
const DEFAULT_EVENTS_PATH = '.design/telemetry/events.jsonl';
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Lazy-require the event-stream module. Returns a no-op `appendEvent`
|
|
50
|
+
* when the module is unavailable so callers never have to wrap emit
|
|
51
|
+
* calls in try/catch themselves.
|
|
52
|
+
*
|
|
53
|
+
* @returns {(ev: object) => void}
|
|
54
|
+
*/
|
|
55
|
+
function getAppendEvent() {
|
|
56
|
+
try {
|
|
57
|
+
// Resolved relative to this file: scripts/lib/parallelism-engine/
|
|
58
|
+
// -> ../event-stream. The event-stream module is .ts; Node 22+
|
|
59
|
+
// with --experimental-strip-types (or Node 24 built-in TS) can
|
|
60
|
+
// require it. If require fails (e.g., older runtime, missing
|
|
61
|
+
// module), fall through to the no-op.
|
|
62
|
+
const m = require('../event-stream');
|
|
63
|
+
if (m && typeof m.appendEvent === 'function') return m.appendEvent;
|
|
64
|
+
} catch {
|
|
65
|
+
// Swallow — best-effort telemetry. Losing one verdict is
|
|
66
|
+
// acceptable; breaking concurrency resolution is not.
|
|
67
|
+
}
|
|
68
|
+
return function noopAppend(_ev) {
|
|
69
|
+
/* event-stream unavailable */
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Resolve the hard ceiling. Operator override via GDD_CONCURRENCY_CEILING
|
|
75
|
+
* env var (parsed as integer) takes precedence; the explicit `override`
|
|
76
|
+
* argument wins over the env. Default is 8 (D-07).
|
|
77
|
+
*
|
|
78
|
+
* @param {number|undefined} override
|
|
79
|
+
* @returns {number}
|
|
80
|
+
*/
|
|
81
|
+
function resolveCeiling(override) {
|
|
82
|
+
if (typeof override === 'number' && override >= 1) return Math.floor(override);
|
|
83
|
+
const env = process.env.GDD_CONCURRENCY_CEILING;
|
|
84
|
+
if (typeof env === 'string' && env.length > 0) {
|
|
85
|
+
const parsed = parseInt(env, 10);
|
|
86
|
+
if (Number.isFinite(parsed) && parsed >= 1) return parsed;
|
|
87
|
+
}
|
|
88
|
+
return DEFAULT_HARD_CEILING;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Compose the JSONL events path. Relative paths are joined to baseDir
|
|
93
|
+
* when supplied; absolute paths are returned as-is.
|
|
94
|
+
*
|
|
95
|
+
* @param {{eventsPath?: string, baseDir?: string}} opts
|
|
96
|
+
* @returns {string}
|
|
97
|
+
*/
|
|
98
|
+
function resolvePath({ eventsPath, baseDir }) {
|
|
99
|
+
let p = typeof eventsPath === 'string' && eventsPath.length > 0
|
|
100
|
+
? eventsPath
|
|
101
|
+
: DEFAULT_EVENTS_PATH;
|
|
102
|
+
if (baseDir && !path.isAbsolute(p)) p = path.join(baseDir, p);
|
|
103
|
+
return p;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Read .design/telemetry/events.jsonl and return the
|
|
108
|
+
* `observed_concurrency` from the MOST RECENT parallelism.verdict event
|
|
109
|
+
* (sequential read order). Tolerates malformed lines and absent file.
|
|
110
|
+
*
|
|
111
|
+
* @param {object} [opts]
|
|
112
|
+
* @param {string} [opts.eventsPath] override events.jsonl path
|
|
113
|
+
* @param {string} [opts.baseDir] base for relative eventsPath
|
|
114
|
+
* @returns {number|null}
|
|
115
|
+
*/
|
|
116
|
+
function readLastObservedOptimum({ eventsPath, baseDir } = {} ) {
|
|
117
|
+
const target = resolvePath({ eventsPath, baseDir });
|
|
118
|
+
if (!fs.existsSync(target)) return null;
|
|
119
|
+
let body;
|
|
120
|
+
try {
|
|
121
|
+
body = fs.readFileSync(target, 'utf8');
|
|
122
|
+
} catch {
|
|
123
|
+
return null;
|
|
124
|
+
}
|
|
125
|
+
const lines = body.split(/\r?\n/);
|
|
126
|
+
let lastOptimum = null;
|
|
127
|
+
for (const line of lines) {
|
|
128
|
+
const trimmed = line.trim();
|
|
129
|
+
if (trimmed.length === 0) continue;
|
|
130
|
+
try {
|
|
131
|
+
const ev = JSON.parse(trimmed);
|
|
132
|
+
if (
|
|
133
|
+
ev
|
|
134
|
+
&& ev.type === 'parallelism.verdict'
|
|
135
|
+
&& ev.payload
|
|
136
|
+
&& typeof ev.payload.observed_concurrency === 'number'
|
|
137
|
+
) {
|
|
138
|
+
lastOptimum = Math.floor(ev.payload.observed_concurrency);
|
|
139
|
+
}
|
|
140
|
+
} catch {
|
|
141
|
+
// Tolerate malformed line — JSONL is best-effort.
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
return lastOptimum;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Resolve the recommended concurrency per D-07.
|
|
149
|
+
*
|
|
150
|
+
* 1. base = max(1, cpuCount - 1) // never below 1
|
|
151
|
+
* 2. optimum = lastObservedOptimum // explicit override
|
|
152
|
+
* ?? readLastObservedOptimum() // or read from JSONL
|
|
153
|
+
* 3. candidate = optimum > 0 ? min(base, optimum) : base
|
|
154
|
+
* 4. ceiling = override ?? GDD_CONCURRENCY_CEILING ?? 8
|
|
155
|
+
* 5. return max(1, min(candidate, ceiling))
|
|
156
|
+
*
|
|
157
|
+
* @param {object} [opts]
|
|
158
|
+
* @param {number} [opts.cpuCount] override os.cpus().length
|
|
159
|
+
* @param {number|null} [opts.lastObservedOptimum] explicit override; null/undefined triggers JSONL read
|
|
160
|
+
* @param {number} [opts.hardCeiling] override the env/default ceiling
|
|
161
|
+
* @param {string} [opts.eventsPath] override events.jsonl path
|
|
162
|
+
* @param {string} [opts.baseDir] base for relative eventsPath
|
|
163
|
+
* @returns {number} integer >= 1
|
|
164
|
+
*/
|
|
165
|
+
function resolveConcurrency({
|
|
166
|
+
cpuCount,
|
|
167
|
+
lastObservedOptimum,
|
|
168
|
+
hardCeiling,
|
|
169
|
+
eventsPath,
|
|
170
|
+
baseDir,
|
|
171
|
+
} = {}) {
|
|
172
|
+
const cpu = typeof cpuCount === 'number' && cpuCount >= 1
|
|
173
|
+
? Math.floor(cpuCount)
|
|
174
|
+
: os.cpus().length;
|
|
175
|
+
const base = Math.max(1, cpu - 1);
|
|
176
|
+
let optimum = lastObservedOptimum;
|
|
177
|
+
if (optimum === undefined || optimum === null) {
|
|
178
|
+
optimum = readLastObservedOptimum({ eventsPath, baseDir });
|
|
179
|
+
}
|
|
180
|
+
const candidate = typeof optimum === 'number' && optimum >= 1
|
|
181
|
+
? Math.min(base, Math.floor(optimum))
|
|
182
|
+
: base;
|
|
183
|
+
const ceiling = resolveCeiling(hardCeiling);
|
|
184
|
+
return Math.max(1, Math.min(candidate, ceiling));
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Emit a `parallelism.verdict` event with the Phase 27.6 superset
|
|
189
|
+
* payload. Existing fields ({task_ids, verdict, reason}) are always
|
|
190
|
+
* present; the new fields (intended_concurrency, observed_concurrency,
|
|
191
|
+
* contention_detected, wall_clock_ms) are appended only when supplied.
|
|
192
|
+
*
|
|
193
|
+
* Side effect: appendEvent({type: 'parallelism.verdict', ...}). When
|
|
194
|
+
* event-stream is unavailable, this is a no-op (lazy require fallback).
|
|
195
|
+
*
|
|
196
|
+
* @param {object} opts
|
|
197
|
+
* @param {string[]} opts.task_ids
|
|
198
|
+
* @param {'parallel'|'sequential'} opts.verdict
|
|
199
|
+
* @param {string} opts.reason
|
|
200
|
+
* @param {number} [opts.intended_concurrency]
|
|
201
|
+
* @param {number} [opts.observed_concurrency]
|
|
202
|
+
* @param {boolean} [opts.contention_detected]
|
|
203
|
+
* @param {number} [opts.wall_clock_ms]
|
|
204
|
+
* @param {string} [opts.sessionId]
|
|
205
|
+
* @returns {void}
|
|
206
|
+
*/
|
|
207
|
+
function emitParallelismVerdict({
|
|
208
|
+
task_ids,
|
|
209
|
+
verdict,
|
|
210
|
+
reason,
|
|
211
|
+
intended_concurrency,
|
|
212
|
+
observed_concurrency,
|
|
213
|
+
contention_detected,
|
|
214
|
+
wall_clock_ms,
|
|
215
|
+
sessionId,
|
|
216
|
+
} = {}) {
|
|
217
|
+
const append = getAppendEvent();
|
|
218
|
+
/** @type {Record<string, unknown>} */
|
|
219
|
+
const payload = {
|
|
220
|
+
task_ids: Array.isArray(task_ids) ? task_ids : [],
|
|
221
|
+
verdict: verdict === 'parallel' || verdict === 'sequential' ? verdict : 'sequential',
|
|
222
|
+
reason: typeof reason === 'string' ? reason : 'unspecified',
|
|
223
|
+
};
|
|
224
|
+
// Additive 27.6 fields — only include when set, to keep payloads
|
|
225
|
+
// compact and avoid noisy `undefined` keys on the wire.
|
|
226
|
+
if (typeof intended_concurrency === 'number') {
|
|
227
|
+
payload.intended_concurrency = intended_concurrency;
|
|
228
|
+
}
|
|
229
|
+
if (typeof observed_concurrency === 'number') {
|
|
230
|
+
payload.observed_concurrency = observed_concurrency;
|
|
231
|
+
}
|
|
232
|
+
if (typeof contention_detected === 'boolean') {
|
|
233
|
+
payload.contention_detected = contention_detected;
|
|
234
|
+
}
|
|
235
|
+
if (typeof wall_clock_ms === 'number') {
|
|
236
|
+
payload.wall_clock_ms = wall_clock_ms;
|
|
237
|
+
}
|
|
238
|
+
try {
|
|
239
|
+
append({
|
|
240
|
+
type: 'parallelism.verdict',
|
|
241
|
+
timestamp: new Date().toISOString(),
|
|
242
|
+
sessionId: typeof sessionId === 'string' && sessionId.length > 0
|
|
243
|
+
? sessionId
|
|
244
|
+
: 'concurrency-tuner',
|
|
245
|
+
payload,
|
|
246
|
+
});
|
|
247
|
+
} catch {
|
|
248
|
+
// Best-effort telemetry. A failed write must never break the
|
|
249
|
+
// caller's wave-execution flow.
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
module.exports = {
|
|
254
|
+
resolveConcurrency,
|
|
255
|
+
readLastObservedOptimum,
|
|
256
|
+
emitParallelismVerdict,
|
|
257
|
+
DEFAULT_HARD_CEILING,
|
|
258
|
+
DEFAULT_EVENTS_PATH,
|
|
259
|
+
};
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
// scripts/lib/parallelism-engine/concurrency-tuner.d.cts — types for concurrency-tuner.cjs (Phase 27.6 D-07).
|
|
2
|
+
|
|
3
|
+
export interface ResolveConcurrencyOptions {
|
|
4
|
+
/** Override CPU count detection (defaults to `os.cpus().length`). */
|
|
5
|
+
cpuCount?: number;
|
|
6
|
+
/** Override last-observed optimum (else read from event-chain). */
|
|
7
|
+
lastObservedOptimum?: number;
|
|
8
|
+
/** Hard ceiling cap. Defaults to `DEFAULT_HARD_CEILING` (8). */
|
|
9
|
+
hardCeiling?: number;
|
|
10
|
+
/** Event-chain path override (else use `DEFAULT_EVENTS_PATH`). */
|
|
11
|
+
eventsPath?: string;
|
|
12
|
+
/** Base directory override (else `process.cwd()`). */
|
|
13
|
+
baseDir?: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface ReadLastObservedOptimumOptions {
|
|
17
|
+
eventsPath?: string;
|
|
18
|
+
baseDir?: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface EmitParallelismVerdictPayload {
|
|
22
|
+
task_ids?: string[];
|
|
23
|
+
verdict?: string;
|
|
24
|
+
reason?: string;
|
|
25
|
+
intended_concurrency?: number;
|
|
26
|
+
observed_concurrency?: number;
|
|
27
|
+
contention_detected?: boolean;
|
|
28
|
+
wall_clock_ms?: number;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Resolve the concurrency default per D-07: `min(cpu-1, last_observed_optimum, hard_ceiling)`.
|
|
33
|
+
* Falls back to `cpu-1` capped at `hard_ceiling` when no prior verdict exists.
|
|
34
|
+
*/
|
|
35
|
+
export function resolveConcurrency(opts?: ResolveConcurrencyOptions): number;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Read the latest `parallelism.verdict` event's optimum from the event chain.
|
|
39
|
+
* Returns null when no prior verdict exists.
|
|
40
|
+
*/
|
|
41
|
+
export function readLastObservedOptimum(
|
|
42
|
+
opts?: ReadLastObservedOptimumOptions,
|
|
43
|
+
): number | null;
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Emit a `parallelism.verdict` event (additive payload — back-compat preserved).
|
|
47
|
+
*/
|
|
48
|
+
export function emitParallelismVerdict(
|
|
49
|
+
payload?: EmitParallelismVerdictPayload,
|
|
50
|
+
): void;
|
|
51
|
+
|
|
52
|
+
export const DEFAULT_HARD_CEILING: number;
|
|
53
|
+
export const DEFAULT_EVENTS_PATH: string;
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* scripts/lib/perf-analyzer/cost-regression.cjs — Plan 27.6-01
|
|
3
|
+
*
|
|
4
|
+
* Stateless detection rules over the telemetry row arrays returned by
|
|
5
|
+
* scripts/lib/perf-analyzer/index.cjs. Three pure functions:
|
|
6
|
+
*
|
|
7
|
+
* detectCostRegressions — top-3 agents whose p50 USD-cost has
|
|
8
|
+
* regressed >= thresholdPct (default 25%
|
|
9
|
+
* per Phase 27.6 D-01) vs baseline across
|
|
10
|
+
* cyclesRequired distinct cycles (default 3).
|
|
11
|
+
* computeCacheHitDelta — per-agent current hit rate vs baseline.
|
|
12
|
+
* computeP95Spikes — per-agent p95 wall-time multiplier vs
|
|
13
|
+
* baseline. Flag when multiplier >= 1.5.
|
|
14
|
+
*
|
|
15
|
+
* All inputs are plain arrays / objects. No I/O. No external deps.
|
|
16
|
+
*/
|
|
17
|
+
'use strict';
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Median (p50) of a numeric array. Returns 0 for empty input.
|
|
21
|
+
* Even-length arrays return the mean of the two middle values.
|
|
22
|
+
*
|
|
23
|
+
* @param {number[]} arr
|
|
24
|
+
* @returns {number}
|
|
25
|
+
*/
|
|
26
|
+
function p50(arr) {
|
|
27
|
+
if (!arr || arr.length === 0) return 0;
|
|
28
|
+
const sorted = [...arr].sort((a, b) => a - b);
|
|
29
|
+
const mid = Math.floor(sorted.length / 2);
|
|
30
|
+
return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* p95 of a numeric array (nearest-rank, floor index, clamped to last).
|
|
35
|
+
* Returns 0 for empty input.
|
|
36
|
+
*
|
|
37
|
+
* @param {number[]} arr
|
|
38
|
+
* @returns {number}
|
|
39
|
+
*/
|
|
40
|
+
function p95(arr) {
|
|
41
|
+
if (!arr || arr.length === 0) return 0;
|
|
42
|
+
const sorted = [...arr].sort((a, b) => a - b);
|
|
43
|
+
const idx = Math.min(sorted.length - 1, Math.floor(sorted.length * 0.95));
|
|
44
|
+
return sorted[idx];
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Group cost rows by agent, then by cycle. Filters out rows missing
|
|
49
|
+
* the required shape (agent, est_cost_usd, cycle).
|
|
50
|
+
*
|
|
51
|
+
* @param {object[]} rows
|
|
52
|
+
* @returns {Map<string, { cycles: Map<string, number[]> }>}
|
|
53
|
+
*/
|
|
54
|
+
function groupRowsByAgentCycle(rows) {
|
|
55
|
+
const byAgent = new Map();
|
|
56
|
+
for (const row of rows || []) {
|
|
57
|
+
if (
|
|
58
|
+
!row ||
|
|
59
|
+
typeof row.agent !== 'string' ||
|
|
60
|
+
typeof row.est_cost_usd !== 'number' ||
|
|
61
|
+
typeof row.cycle !== 'string'
|
|
62
|
+
) {
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
let bucket = byAgent.get(row.agent);
|
|
66
|
+
if (!bucket) {
|
|
67
|
+
bucket = { cycles: new Map() };
|
|
68
|
+
byAgent.set(row.agent, bucket);
|
|
69
|
+
}
|
|
70
|
+
let cycleArr = bucket.cycles.get(row.cycle);
|
|
71
|
+
if (!cycleArr) {
|
|
72
|
+
cycleArr = [];
|
|
73
|
+
bucket.cycles.set(row.cycle, cycleArr);
|
|
74
|
+
}
|
|
75
|
+
cycleArr.push(row.est_cost_usd);
|
|
76
|
+
}
|
|
77
|
+
return byAgent;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Top-3 token-cost regressions across the most recent `cyclesRequired`
|
|
82
|
+
* distinct cycles per agent. Honours D-01 defaults (25% / 3 cycles).
|
|
83
|
+
*
|
|
84
|
+
* @param {object} opts
|
|
85
|
+
* @param {object[]} opts.rows - cost rows (from loadCosts)
|
|
86
|
+
* @param {Record<string, {p50_usd:number, hit_rate?:number, p95_ms?:number}>} opts.baseline
|
|
87
|
+
* @param {number} [opts.thresholdPct=25] - regression threshold (D-01)
|
|
88
|
+
* @param {number} [opts.cyclesRequired=3] - minimum distinct cycles (D-01)
|
|
89
|
+
* @returns {{
|
|
90
|
+
* regressions: Array<{agent:string, baseline_p50_usd:number, current_p50_usd:number, delta_pct:number, cycles_observed:number}>,
|
|
91
|
+
* summary: {agents_evaluated:number, agents_skipped_insufficient_data:number, regressions_count:number, threshold_pct:number, cycles_required:number}
|
|
92
|
+
* }}
|
|
93
|
+
*/
|
|
94
|
+
function detectCostRegressions({ rows, baseline, thresholdPct, cyclesRequired } = {}) {
|
|
95
|
+
const _thresholdPct = thresholdPct ?? 25;
|
|
96
|
+
const _cyclesRequired = cyclesRequired ?? 3;
|
|
97
|
+
const _baseline = baseline || {};
|
|
98
|
+
|
|
99
|
+
const byAgent = groupRowsByAgentCycle(rows);
|
|
100
|
+
|
|
101
|
+
/** @type {Array<{agent:string, baseline_p50_usd:number, current_p50_usd:number, delta_pct:number, cycles_observed:number}>} */
|
|
102
|
+
const candidates = [];
|
|
103
|
+
let agents_evaluated = 0;
|
|
104
|
+
let agents_skipped = 0;
|
|
105
|
+
|
|
106
|
+
for (const [agent, bucket] of byAgent.entries()) {
|
|
107
|
+
// Newest cycles first (lexicographic descending) — take up to N.
|
|
108
|
+
const cycleKeys = [...bucket.cycles.keys()].sort().reverse();
|
|
109
|
+
const recentCycles = cycleKeys.slice(0, _cyclesRequired);
|
|
110
|
+
|
|
111
|
+
if (recentCycles.length < _cyclesRequired) {
|
|
112
|
+
agents_skipped += 1;
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const baselineEntry = _baseline[agent];
|
|
117
|
+
if (!baselineEntry || typeof baselineEntry.p50_usd !== 'number') {
|
|
118
|
+
agents_skipped += 1;
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const flatCosts = recentCycles.flatMap((c) => bucket.cycles.get(c));
|
|
123
|
+
const current = p50(flatCosts);
|
|
124
|
+
const base = baselineEntry.p50_usd;
|
|
125
|
+
|
|
126
|
+
// Contract (plan 27.6-01 behavior): "an agent's p50 USD-cost across
|
|
127
|
+
// the LAST cyclesRequired cycles is >= baseline_p50 × (1 + thresholdPct/100)".
|
|
128
|
+
// Apply the multiplicative form directly so the threshold-boundary case
|
|
129
|
+
// (e.g. baseline=0.05, current=0.0625, thresholdPct=25) is exact rather
|
|
130
|
+
// than dropping a ULP into the < side after a divide-and-multiply.
|
|
131
|
+
let delta_pct;
|
|
132
|
+
let isRegression;
|
|
133
|
+
if (base === 0) {
|
|
134
|
+
delta_pct = current === 0 ? 0 : Infinity;
|
|
135
|
+
isRegression = current > 0; // base=0+current>0 → always regression (D-01 edge)
|
|
136
|
+
} else {
|
|
137
|
+
const threshold = base * (1 + _thresholdPct / 100);
|
|
138
|
+
delta_pct = ((current - base) / base) * 100;
|
|
139
|
+
isRegression = current >= threshold;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
agents_evaluated += 1;
|
|
143
|
+
|
|
144
|
+
if (isRegression) {
|
|
145
|
+
candidates.push({
|
|
146
|
+
agent,
|
|
147
|
+
baseline_p50_usd: base,
|
|
148
|
+
current_p50_usd: current,
|
|
149
|
+
delta_pct,
|
|
150
|
+
cycles_observed: recentCycles.length,
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
candidates.sort((a, b) => b.delta_pct - a.delta_pct);
|
|
156
|
+
const regressions = candidates.slice(0, 3);
|
|
157
|
+
|
|
158
|
+
return {
|
|
159
|
+
regressions,
|
|
160
|
+
summary: {
|
|
161
|
+
agents_evaluated,
|
|
162
|
+
agents_skipped_insufficient_data: agents_skipped,
|
|
163
|
+
regressions_count: regressions.length,
|
|
164
|
+
threshold_pct: _thresholdPct,
|
|
165
|
+
cycles_required: _cyclesRequired,
|
|
166
|
+
},
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Cache-hit-rate delta per agent: current hit rate over the most recent
|
|
172
|
+
* `cyclesRequired` distinct cycles vs baseline hit rate.
|
|
173
|
+
*
|
|
174
|
+
* @param {object} opts
|
|
175
|
+
* @param {object[]} opts.rows
|
|
176
|
+
* @param {Record<string, {hit_rate?:number}>} opts.baseline
|
|
177
|
+
* @param {number} [opts.cyclesRequired=3]
|
|
178
|
+
* @returns {{ perAgent: Array<{agent:string, baseline_hit_rate:number, current_hit_rate:number, delta_pct:number, cycles_observed:number}> }}
|
|
179
|
+
*/
|
|
180
|
+
function computeCacheHitDelta({ rows, baseline, cyclesRequired } = {}) {
|
|
181
|
+
const _cyclesRequired = cyclesRequired ?? 3;
|
|
182
|
+
const _baseline = baseline || {};
|
|
183
|
+
|
|
184
|
+
// Group by agent: { agent -> Map<cycle, { hits: number, total: number }> }
|
|
185
|
+
const byAgent = new Map();
|
|
186
|
+
for (const row of rows || []) {
|
|
187
|
+
if (!row || typeof row.agent !== 'string' || typeof row.cycle !== 'string') continue;
|
|
188
|
+
let bucket = byAgent.get(row.agent);
|
|
189
|
+
if (!bucket) {
|
|
190
|
+
bucket = { cycles: new Map() };
|
|
191
|
+
byAgent.set(row.agent, bucket);
|
|
192
|
+
}
|
|
193
|
+
let cycleEntry = bucket.cycles.get(row.cycle);
|
|
194
|
+
if (!cycleEntry) {
|
|
195
|
+
cycleEntry = { hits: 0, total: 0 };
|
|
196
|
+
bucket.cycles.set(row.cycle, cycleEntry);
|
|
197
|
+
}
|
|
198
|
+
cycleEntry.total += 1;
|
|
199
|
+
if (row.cache_hit === true) cycleEntry.hits += 1;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/** @type {Array<{agent:string, baseline_hit_rate:number, current_hit_rate:number, delta_pct:number, cycles_observed:number}>} */
|
|
203
|
+
const perAgent = [];
|
|
204
|
+
for (const [agent, bucket] of byAgent.entries()) {
|
|
205
|
+
const cycleKeys = [...bucket.cycles.keys()].sort().reverse();
|
|
206
|
+
const recentCycles = cycleKeys.slice(0, _cyclesRequired);
|
|
207
|
+
if (recentCycles.length === 0) continue;
|
|
208
|
+
|
|
209
|
+
let hits = 0;
|
|
210
|
+
let total = 0;
|
|
211
|
+
for (const c of recentCycles) {
|
|
212
|
+
const entry = bucket.cycles.get(c);
|
|
213
|
+
hits += entry.hits;
|
|
214
|
+
total += entry.total;
|
|
215
|
+
}
|
|
216
|
+
if (total === 0) continue;
|
|
217
|
+
|
|
218
|
+
const current_hit_rate = hits / total;
|
|
219
|
+
const baselineEntry = _baseline[agent];
|
|
220
|
+
const baseline_hit_rate =
|
|
221
|
+
baselineEntry && typeof baselineEntry.hit_rate === 'number' ? baselineEntry.hit_rate : 0;
|
|
222
|
+
|
|
223
|
+
let delta_pct;
|
|
224
|
+
if (baseline_hit_rate === 0) {
|
|
225
|
+
delta_pct = current_hit_rate === 0 ? 0 : Infinity;
|
|
226
|
+
} else {
|
|
227
|
+
delta_pct = ((current_hit_rate - baseline_hit_rate) / baseline_hit_rate) * 100;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
perAgent.push({
|
|
231
|
+
agent,
|
|
232
|
+
baseline_hit_rate,
|
|
233
|
+
current_hit_rate,
|
|
234
|
+
delta_pct,
|
|
235
|
+
cycles_observed: recentCycles.length,
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
return { perAgent };
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Aggregate wall_time_ms per agent across all cycles in `byCycle` and
|
|
244
|
+
* compare current p95 to baseline p95. Flag agents whose
|
|
245
|
+
* `current_p95 / baseline_p95 >= multiplierThreshold` (default 1.5).
|
|
246
|
+
*
|
|
247
|
+
* @param {object} opts
|
|
248
|
+
* @param {Record<string, object[]>} opts.byCycle
|
|
249
|
+
* @param {Record<string, {p95_ms?:number}>} opts.baseline
|
|
250
|
+
* @param {number} [opts.multiplierThreshold=1.5]
|
|
251
|
+
* @returns {{ spikes: Array<{agent:string, baseline_p95_ms:number, current_p95_ms:number, multiplier:number, cycles_observed:number}> }}
|
|
252
|
+
*/
|
|
253
|
+
function computeP95Spikes({ byCycle, baseline, multiplierThreshold } = {}) {
|
|
254
|
+
const _multiplier = multiplierThreshold ?? 1.5;
|
|
255
|
+
const _baseline = baseline || {};
|
|
256
|
+
const _byCycle = byCycle || {};
|
|
257
|
+
|
|
258
|
+
// Aggregate per agent: agent -> { walls: number[], cycles: Set<string> }
|
|
259
|
+
/** @type {Map<string, { walls: number[], cycles: Set<string> }>} */
|
|
260
|
+
const byAgent = new Map();
|
|
261
|
+
for (const [cycle, entries] of Object.entries(_byCycle)) {
|
|
262
|
+
for (const entry of entries || []) {
|
|
263
|
+
if (!entry || typeof entry.agent !== 'string' || typeof entry.wall_time_ms !== 'number') {
|
|
264
|
+
continue;
|
|
265
|
+
}
|
|
266
|
+
let bucket = byAgent.get(entry.agent);
|
|
267
|
+
if (!bucket) {
|
|
268
|
+
bucket = { walls: [], cycles: new Set() };
|
|
269
|
+
byAgent.set(entry.agent, bucket);
|
|
270
|
+
}
|
|
271
|
+
bucket.walls.push(entry.wall_time_ms);
|
|
272
|
+
bucket.cycles.add(cycle);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/** @type {Array<{agent:string, baseline_p95_ms:number, current_p95_ms:number, multiplier:number, cycles_observed:number}>} */
|
|
277
|
+
const spikes = [];
|
|
278
|
+
for (const [agent, bucket] of byAgent.entries()) {
|
|
279
|
+
const baselineEntry = _baseline[agent];
|
|
280
|
+
if (!baselineEntry || typeof baselineEntry.p95_ms !== 'number') continue;
|
|
281
|
+
const base = baselineEntry.p95_ms;
|
|
282
|
+
if (base === 0) continue; // can't form a multiplier against zero
|
|
283
|
+
const current_p95_ms = p95(bucket.walls);
|
|
284
|
+
const multiplier = current_p95_ms / base;
|
|
285
|
+
if (multiplier >= _multiplier) {
|
|
286
|
+
spikes.push({
|
|
287
|
+
agent,
|
|
288
|
+
baseline_p95_ms: base,
|
|
289
|
+
current_p95_ms,
|
|
290
|
+
multiplier,
|
|
291
|
+
cycles_observed: bucket.cycles.size,
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
return { spikes };
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
module.exports = { detectCostRegressions, computeCacheHitDelta, computeP95Spikes };
|