@tekyzinc/gsd-t 3.10.13 → 3.10.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/bin/gsd-t-unattended-platform.cjs +381 -0
- package/bin/gsd-t-unattended-safety.cjs +766 -0
- package/bin/gsd-t-unattended.cjs +1259 -0
- package/bin/gsd-t.js +7 -1
- package/bin/handoff-lock.cjs +249 -0
- package/bin/headless-auto-spawn.cjs +328 -0
- package/bin/runway-estimator.cjs +242 -0
- package/bin/token-optimizer.cjs +471 -0
- package/bin/token-telemetry.cjs +246 -0
- package/commands/gsd-t-backlog-list.md +2 -2
- package/commands/gsd-t-complete-milestone.md +1 -1
- package/commands/gsd-t-debug.md +5 -5
- package/commands/gsd-t-doc-ripple.md +1 -1
- package/commands/gsd-t-execute.md +3 -3
- package/commands/gsd-t-integrate.md +3 -3
- package/commands/gsd-t-optimization-apply.md +3 -3
- package/commands/gsd-t-optimization-reject.md +3 -3
- package/commands/gsd-t-quick.md +3 -3
- package/commands/gsd-t-resume.md +1 -1
- package/commands/gsd-t-status.md +1 -1
- package/commands/gsd-t-unattended.md +2 -2
- package/commands/gsd-t-wave.md +3 -3
- package/package.json +1 -1
- package/scripts/context-meter/transcript-parser.js +63 -1
- package/scripts/context-meter/transcript-parser.test.js +53 -3
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* GSD-T Runway Estimator — Pre-flight context runway projection
|
|
5
|
+
*
|
|
6
|
+
* Reads current context percentage from the M34 context meter state file and
|
|
7
|
+
* historical token-telemetry records (token-metrics.jsonl) to project whether
|
|
8
|
+
* a command about to run will complete before the v3.0.0 stop band (85%).
|
|
9
|
+
*
|
|
10
|
+
* Confidence-weighted: high ≥50 records, medium ≥10, low <10. Low confidence
|
|
11
|
+
* applies a 1.25x conservative skew. On missing history a constant fallback
|
|
12
|
+
* is used (4%/task sonnet-default, 8%/task opus-default). On refusal the
|
|
13
|
+
* estimator never prompts the user — callers hand off to headless-auto-spawn.
|
|
14
|
+
*
|
|
15
|
+
* Zero external dependencies (Node.js built-ins only).
|
|
16
|
+
*
|
|
17
|
+
* Contract: .gsd-t/contracts/runway-estimator-contract.md v1.0.0
|
|
18
|
+
* Consumers: bin/gsd-t.js, commands/gsd-t-execute|wave|integrate|quick|debug.md
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
const fs = require("fs");
|
|
22
|
+
const path = require("path");
|
|
23
|
+
|
|
24
|
+
// ── Constants ────────────────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
// Mirrors token-budget-contract v3.0.0 — must stay in sync.
|
|
27
|
+
const STOP_THRESHOLD_PCT = 85;
|
|
28
|
+
|
|
29
|
+
// Confidence grading thresholds (frozen in runway-estimator-contract v1.0.0).
|
|
30
|
+
const CONFIDENCE_HIGH_MIN = 50;
|
|
31
|
+
const CONFIDENCE_MEDIUM_MIN = 10;
|
|
32
|
+
|
|
33
|
+
// Conservative skew multiplier applied to low-confidence projections.
|
|
34
|
+
const LOW_CONFIDENCE_SKEW = 1.25;
|
|
35
|
+
|
|
36
|
+
// Conservative constant fallback when no history exists at all.
|
|
37
|
+
const FALLBACK_PCT_PER_TASK_SONNET = 4;
|
|
38
|
+
const FALLBACK_PCT_PER_TASK_OPUS = 8;
|
|
39
|
+
|
|
40
|
+
// Opus-default phases — used when picking a constant fallback for a command
|
|
41
|
+
// with no historical telemetry. Commands not listed default to sonnet.
|
|
42
|
+
const OPUS_DEFAULT_COMMANDS = new Set([
|
|
43
|
+
"gsd-t-debug",
|
|
44
|
+
"gsd-t-integrate",
|
|
45
|
+
]);
|
|
46
|
+
|
|
47
|
+
const STATE_FILE_REL = path.join(".gsd-t", ".context-meter-state.json");
|
|
48
|
+
const METRICS_FILE_REL = path.join(".gsd-t", "token-metrics.jsonl");
|
|
49
|
+
|
|
50
|
+
// ── Exports ──────────────────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
module.exports = {
|
|
53
|
+
estimateRunway,
|
|
54
|
+
STOP_THRESHOLD_PCT,
|
|
55
|
+
CONFIDENCE_HIGH_MIN,
|
|
56
|
+
CONFIDENCE_MEDIUM_MIN,
|
|
57
|
+
LOW_CONFIDENCE_SKEW,
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
// ── estimateRunway ───────────────────────────────────────────────────────────
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* @param {{
|
|
64
|
+
* command: string,
|
|
65
|
+
* domain_type?: string,
|
|
66
|
+
* remaining_tasks: number,
|
|
67
|
+
* projectDir?: string,
|
|
68
|
+
* headlessAvailable?: boolean
|
|
69
|
+
* }} opts
|
|
70
|
+
* @returns {{
|
|
71
|
+
* can_start: boolean,
|
|
72
|
+
* current_pct: number,
|
|
73
|
+
* projected_end_pct: number,
|
|
74
|
+
* confidence: 'low'|'medium'|'high',
|
|
75
|
+
* confidence_basis: number,
|
|
76
|
+
* pct_per_task: number,
|
|
77
|
+
* recommendation: 'proceed'|'headless'|'clear-and-resume',
|
|
78
|
+
* reason: string
|
|
79
|
+
* }}
|
|
80
|
+
*/
|
|
81
|
+
function estimateRunway(opts) {
|
|
82
|
+
const command = opts.command;
|
|
83
|
+
const domain_type = opts.domain_type || "";
|
|
84
|
+
const remaining_tasks = Math.max(0, Number(opts.remaining_tasks) || 0);
|
|
85
|
+
const projectDir = opts.projectDir || process.cwd();
|
|
86
|
+
const headlessAvailable = opts.headlessAvailable !== false;
|
|
87
|
+
|
|
88
|
+
const current_pct = readCurrentPct(projectDir);
|
|
89
|
+
const records = readMetrics(projectDir);
|
|
90
|
+
const { pct_per_task, confidence, confidence_basis } = computePctPerTask(
|
|
91
|
+
records,
|
|
92
|
+
command,
|
|
93
|
+
domain_type,
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
const skew = confidence === "low" ? LOW_CONFIDENCE_SKEW : 1.0;
|
|
97
|
+
const projected_end_pct = round1(
|
|
98
|
+
current_pct + pct_per_task * remaining_tasks * skew,
|
|
99
|
+
);
|
|
100
|
+
const can_start = projected_end_pct < STOP_THRESHOLD_PCT;
|
|
101
|
+
|
|
102
|
+
let recommendation;
|
|
103
|
+
let reason;
|
|
104
|
+
if (can_start) {
|
|
105
|
+
recommendation = "proceed";
|
|
106
|
+
reason = `Projected end ${projected_end_pct}% < ${STOP_THRESHOLD_PCT}% stop threshold`;
|
|
107
|
+
} else if (headlessAvailable) {
|
|
108
|
+
recommendation = "headless";
|
|
109
|
+
reason = `Projected end ${projected_end_pct}% ≥ ${STOP_THRESHOLD_PCT}% — auto-spawn headless`;
|
|
110
|
+
} else {
|
|
111
|
+
recommendation = "clear-and-resume";
|
|
112
|
+
reason = `Projected end ${projected_end_pct}% ≥ ${STOP_THRESHOLD_PCT}% — headless unavailable, clear-and-resume`;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return {
|
|
116
|
+
can_start,
|
|
117
|
+
current_pct,
|
|
118
|
+
projected_end_pct,
|
|
119
|
+
confidence,
|
|
120
|
+
confidence_basis,
|
|
121
|
+
pct_per_task: round2(pct_per_task),
|
|
122
|
+
recommendation,
|
|
123
|
+
reason,
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// ── Internal: read current pct from M34 state file ──────────────────────────
|
|
128
|
+
|
|
129
|
+
function readCurrentPct(projectDir) {
|
|
130
|
+
try {
|
|
131
|
+
const fp = path.join(projectDir, STATE_FILE_REL);
|
|
132
|
+
const raw = fs.readFileSync(fp, "utf8");
|
|
133
|
+
const s = JSON.parse(raw);
|
|
134
|
+
if (typeof s.pct === "number" && Number.isFinite(s.pct)) {
|
|
135
|
+
return round1(s.pct);
|
|
136
|
+
}
|
|
137
|
+
} catch (_) {
|
|
138
|
+
// Missing or unreadable — warn and fall through.
|
|
139
|
+
try {
|
|
140
|
+
process.stderr.write(
|
|
141
|
+
`runway-estimator: ${STATE_FILE_REL} missing or unreadable — assuming current_pct=0\n`,
|
|
142
|
+
);
|
|
143
|
+
} catch (_) {
|
|
144
|
+
/* ignore */
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return 0;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// ── Internal: read token-metrics.jsonl ──────────────────────────────────────
|
|
151
|
+
|
|
152
|
+
function readMetrics(projectDir) {
|
|
153
|
+
try {
|
|
154
|
+
const fp = path.join(projectDir, METRICS_FILE_REL);
|
|
155
|
+
const raw = fs.readFileSync(fp, "utf8");
|
|
156
|
+
const out = [];
|
|
157
|
+
for (const line of raw.split("\n")) {
|
|
158
|
+
if (!line.trim()) continue;
|
|
159
|
+
try {
|
|
160
|
+
out.push(JSON.parse(line));
|
|
161
|
+
} catch (_) {
|
|
162
|
+
/* skip malformed */
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return out;
|
|
166
|
+
} catch (_) {
|
|
167
|
+
return [];
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// ── Internal: compute pct-per-task with confidence grading ──────────────────
|
|
172
|
+
|
|
173
|
+
function computePctPerTask(records, command, domain_type) {
|
|
174
|
+
// Tier 1: {command, domain_type} pair — sharpest match.
|
|
175
|
+
if (domain_type) {
|
|
176
|
+
const pair = records.filter(
|
|
177
|
+
(r) => r.command === command && r.domain_type === domain_type,
|
|
178
|
+
);
|
|
179
|
+
if (pair.length >= CONFIDENCE_MEDIUM_MIN) {
|
|
180
|
+
return {
|
|
181
|
+
pct_per_task: meanPctDelta(pair),
|
|
182
|
+
confidence: gradeConfidence(pair.length),
|
|
183
|
+
confidence_basis: pair.length,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Tier 2: {command} aggregate.
|
|
189
|
+
const cmd = records.filter((r) => r.command === command);
|
|
190
|
+
if (cmd.length >= CONFIDENCE_MEDIUM_MIN) {
|
|
191
|
+
return {
|
|
192
|
+
pct_per_task: meanPctDelta(cmd),
|
|
193
|
+
confidence: gradeConfidence(cmd.length),
|
|
194
|
+
confidence_basis: cmd.length,
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Tier 3: constant fallback — confidence=low, basis=cmd.length (0 or few).
|
|
199
|
+
return {
|
|
200
|
+
pct_per_task: fallbackPctPerTask(command),
|
|
201
|
+
confidence: "low",
|
|
202
|
+
confidence_basis: cmd.length,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
function meanPctDelta(records) {
|
|
207
|
+
if (!records.length) return 0;
|
|
208
|
+
let sum = 0;
|
|
209
|
+
let n = 0;
|
|
210
|
+
for (const r of records) {
|
|
211
|
+
const before = Number(r.context_window_pct_before);
|
|
212
|
+
const after = Number(r.context_window_pct_after);
|
|
213
|
+
if (!Number.isFinite(before) || !Number.isFinite(after)) continue;
|
|
214
|
+
const delta = after - before;
|
|
215
|
+
if (delta < 0) continue; // pathological — treat as 0
|
|
216
|
+
sum += delta;
|
|
217
|
+
n += 1;
|
|
218
|
+
}
|
|
219
|
+
if (n === 0) return 0;
|
|
220
|
+
return sum / n;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
function gradeConfidence(n) {
|
|
224
|
+
if (n >= CONFIDENCE_HIGH_MIN) return "high";
|
|
225
|
+
if (n >= CONFIDENCE_MEDIUM_MIN) return "medium";
|
|
226
|
+
return "low";
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function fallbackPctPerTask(command) {
|
|
230
|
+
if (OPUS_DEFAULT_COMMANDS.has(command)) return FALLBACK_PCT_PER_TASK_OPUS;
|
|
231
|
+
return FALLBACK_PCT_PER_TASK_SONNET;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// ── Internal: rounding helpers ──────────────────────────────────────────────
|
|
235
|
+
|
|
236
|
+
function round1(n) {
|
|
237
|
+
return Math.round(n * 10) / 10;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function round2(n) {
|
|
241
|
+
return Math.round(n * 100) / 100;
|
|
242
|
+
}
|
|
@@ -0,0 +1,471 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* GSD-T Token Optimizer — Retrospective tier calibration detector
|
|
5
|
+
*
|
|
6
|
+
* Reads .gsd-t/token-metrics.jsonl (v1.0.0 frozen schema), applies a set
|
|
7
|
+
* of declarative detection rules, and appends recommendations to
|
|
8
|
+
* .gsd-t/optimization-backlog.md. Invoked at complete-milestone. Never
|
|
9
|
+
* blocks, never prompts, never auto-applies a recommendation.
|
|
10
|
+
*
|
|
11
|
+
* Four detection rules:
|
|
12
|
+
* - demote — opus phases with ≥90% success + avg fix_cycle < 1.0
|
|
13
|
+
* - escalate — sonnet phases with fix-cycle rate ≥30%
|
|
14
|
+
* - runway-tune — runway estimator over-estimate ≥15 pts
|
|
15
|
+
* - outlier — per-phase p95 consumption > 2× median
|
|
16
|
+
*
|
|
17
|
+
* Rejected recommendations honor a 5-milestone cooldown so the same
|
|
18
|
+
* signal doesn't re-surface immediately.
|
|
19
|
+
*
|
|
20
|
+
* Zero external dependencies (Node.js built-ins only).
|
|
21
|
+
*
|
|
22
|
+
* Contract: .gsd-t/contracts/token-telemetry-contract.md v1.0.0 (read)
|
|
23
|
+
* .gsd-t/contracts/model-selection-contract.md v1.0.0 (read)
|
|
24
|
+
* Consumers: commands/gsd-t-complete-milestone.md
|
|
25
|
+
* commands/gsd-t-optimization-apply.md
|
|
26
|
+
* commands/gsd-t-optimization-reject.md
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
const fs = require("fs");
|
|
30
|
+
const path = require("path");
|
|
31
|
+
|
|
32
|
+
const METRICS_REL = path.join(".gsd-t", "token-metrics.jsonl");
|
|
33
|
+
const BACKLOG_REL = path.join(".gsd-t", "optimization-backlog.md");
|
|
34
|
+
const REJECTION_COOLDOWN_MILESTONES = 5;
|
|
35
|
+
|
|
36
|
+
module.exports = {
|
|
37
|
+
detectRecommendations,
|
|
38
|
+
appendToBacklog,
|
|
39
|
+
readBacklog,
|
|
40
|
+
writeBacklog,
|
|
41
|
+
parseBacklog,
|
|
42
|
+
setRecommendationStatus,
|
|
43
|
+
DETECTION_RULES: getDetectionRules(),
|
|
44
|
+
REJECTION_COOLDOWN_MILESTONES,
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
// ── detectRecommendations ───────────────────────────────────────────────────
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* @param {{projectDir?: string, lookbackMilestones?: number}} opts
|
|
51
|
+
* @returns {Array<object>} recommendation objects
|
|
52
|
+
*/
|
|
53
|
+
function detectRecommendations(opts) {
|
|
54
|
+
const projectDir = (opts && opts.projectDir) || process.cwd();
|
|
55
|
+
const lookbackMilestones = Math.max(
|
|
56
|
+
1,
|
|
57
|
+
Number((opts && opts.lookbackMilestones) || 3),
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
const records = readMetrics(projectDir);
|
|
61
|
+
const scopedRecords = filterByRecentMilestones(records, lookbackMilestones);
|
|
62
|
+
|
|
63
|
+
const existingBacklog = parseBacklog(readBacklog(projectDir));
|
|
64
|
+
const activeCooldowns = existingBacklog
|
|
65
|
+
.filter(
|
|
66
|
+
(e) =>
|
|
67
|
+
e.status === "rejected" &&
|
|
68
|
+
Number(e.rejection_cooldown || 0) > 0,
|
|
69
|
+
)
|
|
70
|
+
.map((e) => ({
|
|
71
|
+
fingerprint: fingerprintFromEntry(e),
|
|
72
|
+
remaining: Number(e.rejection_cooldown || 0),
|
|
73
|
+
}));
|
|
74
|
+
|
|
75
|
+
const recommendations = [];
|
|
76
|
+
const rules = getDetectionRules();
|
|
77
|
+
let idCounter = nextIdCounter(existingBacklog);
|
|
78
|
+
|
|
79
|
+
for (const rule of rules) {
|
|
80
|
+
const hits = rule.detect(scopedRecords);
|
|
81
|
+
for (const hit of hits) {
|
|
82
|
+
const fingerprint = makeFingerprint(rule.type, hit);
|
|
83
|
+
if (activeCooldowns.some((c) => c.fingerprint === fingerprint)) {
|
|
84
|
+
continue; // cooldown — don't resurface
|
|
85
|
+
}
|
|
86
|
+
const id = formatId(idCounter++);
|
|
87
|
+
recommendations.push({
|
|
88
|
+
id,
|
|
89
|
+
type: rule.type,
|
|
90
|
+
detected_at: new Date().toISOString(),
|
|
91
|
+
evidence: hit.evidence,
|
|
92
|
+
projected_savings: hit.projected_savings,
|
|
93
|
+
proposed_change: hit.proposed_change,
|
|
94
|
+
risk: hit.risk,
|
|
95
|
+
status: "pending",
|
|
96
|
+
rejection_cooldown: 0,
|
|
97
|
+
fingerprint,
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return recommendations;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ── Detection rules ─────────────────────────────────────────────────────────
|
|
106
|
+
|
|
107
|
+
function getDetectionRules() {
|
|
108
|
+
return [
|
|
109
|
+
{
|
|
110
|
+
type: "demote",
|
|
111
|
+
detect(records) {
|
|
112
|
+
// Group by {command, phase} filtered to opus.
|
|
113
|
+
const groups = groupBy(
|
|
114
|
+
records.filter((r) => r.model === "opus"),
|
|
115
|
+
(r) => `${r.command}|${r.phase || ""}`,
|
|
116
|
+
);
|
|
117
|
+
const hits = [];
|
|
118
|
+
for (const [key, group] of groups) {
|
|
119
|
+
if (group.length < 3) continue; // need signal
|
|
120
|
+
const successes = group.filter(
|
|
121
|
+
(r) => r.outcome === "success",
|
|
122
|
+
).length;
|
|
123
|
+
const successRate = successes / group.length;
|
|
124
|
+
if (successRate < 0.9) continue;
|
|
125
|
+
const meanTokens = mean(group.map((r) => r.tokens_consumed || 0));
|
|
126
|
+
const [command, phase] = key.split("|");
|
|
127
|
+
hits.push({
|
|
128
|
+
evidence: `${group.length} ${command}/${phase} spawns on opus, ${Math.round(successRate * 100)}% success, avg ${Math.round(meanTokens)} tokens`,
|
|
129
|
+
projected_savings: `~45% tokens (${Math.round(meanTokens * 0.45)} per spawn)`,
|
|
130
|
+
proposed_change: `bin/model-selector.js — add {command:"${command}", phase:"${phase}"} to sonnet tier`,
|
|
131
|
+
risk: "Low — equivalent success rate at sonnet tier; /advisor escalation available as safety net.",
|
|
132
|
+
key: { command, phase },
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
return hits;
|
|
136
|
+
},
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
type: "escalate",
|
|
140
|
+
detect(records) {
|
|
141
|
+
// Sonnet phases with high fix-cycle rate.
|
|
142
|
+
// Note: token-telemetry records don't carry fix_cycle_count directly
|
|
143
|
+
// (that's task-metrics), so we proxy via outcome === 'failure' rate
|
|
144
|
+
// as a conservative signal.
|
|
145
|
+
const groups = groupBy(
|
|
146
|
+
records.filter((r) => r.model === "sonnet"),
|
|
147
|
+
(r) => `${r.command}|${r.phase || ""}`,
|
|
148
|
+
);
|
|
149
|
+
const hits = [];
|
|
150
|
+
for (const [key, group] of groups) {
|
|
151
|
+
if (group.length < 5) continue;
|
|
152
|
+
const failures = group.filter(
|
|
153
|
+
(r) => r.outcome === "failure",
|
|
154
|
+
).length;
|
|
155
|
+
const failureRate = failures / group.length;
|
|
156
|
+
if (failureRate < 0.3) continue;
|
|
157
|
+
const [command, phase] = key.split("|");
|
|
158
|
+
hits.push({
|
|
159
|
+
evidence: `${group.length} ${command}/${phase} spawns on sonnet, ${Math.round(failureRate * 100)}% failure rate — exceeds 30% escalation threshold`,
|
|
160
|
+
projected_savings: "Negative tokens, positive correctness",
|
|
161
|
+
proposed_change: `bin/model-selector.js — escalate {command:"${command}", phase:"${phase}"} to opus OR wire /advisor hook`,
|
|
162
|
+
risk: "Low — escalation is additive; opus fallback preserves behavior.",
|
|
163
|
+
key: { command, phase },
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
return hits;
|
|
167
|
+
},
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
type: "runway-tune",
|
|
171
|
+
detect(records) {
|
|
172
|
+
// Requires projected_end_pct and actual_end_pct fields on records.
|
|
173
|
+
// These are not in the v1.0.0 frozen 18-field schema, so this rule
|
|
174
|
+
// is a no-op until an additive v1.x minor bump adds them. Keep the
|
|
175
|
+
// rule wired so that the moment the fields exist, the rule fires.
|
|
176
|
+
const hits = [];
|
|
177
|
+
for (const r of records) {
|
|
178
|
+
if (
|
|
179
|
+
typeof r.projected_end_pct === "number" &&
|
|
180
|
+
typeof r.actual_end_pct === "number"
|
|
181
|
+
) {
|
|
182
|
+
const overEstimate = r.projected_end_pct - r.actual_end_pct;
|
|
183
|
+
if (overEstimate > 15) {
|
|
184
|
+
hits.push({
|
|
185
|
+
evidence: `${r.command} at ${r.timestamp}: projected ${r.projected_end_pct}% vs actual ${r.actual_end_pct}% — ${Math.round(overEstimate)} pt over-estimate`,
|
|
186
|
+
projected_savings: "Tighter runway projections, fewer unnecessary headless handoffs",
|
|
187
|
+
proposed_change: `bin/runway-estimator.js — reduce LOW_CONFIDENCE_SKEW or tune fallback constants for ${r.command}`,
|
|
188
|
+
risk: "Low — conservative tuning; easy to reverse if regressions appear.",
|
|
189
|
+
key: { command: r.command, timestamp: r.timestamp },
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
return hits;
|
|
195
|
+
},
|
|
196
|
+
},
|
|
197
|
+
{
|
|
198
|
+
type: "investigate",
|
|
199
|
+
detect(records) {
|
|
200
|
+
// Per-phase p95 consumption > 2x median.
|
|
201
|
+
const groups = groupBy(
|
|
202
|
+
records,
|
|
203
|
+
(r) => `${r.command}|${r.phase || ""}`,
|
|
204
|
+
);
|
|
205
|
+
const hits = [];
|
|
206
|
+
for (const [key, group] of groups) {
|
|
207
|
+
if (group.length < 10) continue;
|
|
208
|
+
const vals = group.map((r) => r.tokens_consumed || 0).sort((a, b) => a - b);
|
|
209
|
+
const med = percentile(vals, 50);
|
|
210
|
+
const p95 = percentile(vals, 95);
|
|
211
|
+
if (med > 0 && p95 > med * 2) {
|
|
212
|
+
const [command, phase] = key.split("|");
|
|
213
|
+
hits.push({
|
|
214
|
+
evidence: `${group.length} ${command}/${phase} spawns: p95=${Math.round(p95)}, median=${Math.round(med)} (${(p95 / med).toFixed(1)}× ratio — outlier signal)`,
|
|
215
|
+
projected_savings: "Unknown until investigation completes",
|
|
216
|
+
proposed_change: `Investigate why ${command}/${phase} has outlier consumption — check for runaway subagents, context leaks, or improper tool use`,
|
|
217
|
+
risk: "Low — investigation only, no code change proposed.",
|
|
218
|
+
key: { command, phase },
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
return hits;
|
|
223
|
+
},
|
|
224
|
+
},
|
|
225
|
+
];
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// ── appendToBacklog ─────────────────────────────────────────────────────────
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* @param {Array<object>} recommendations
|
|
232
|
+
* @param {string} projectDir
|
|
233
|
+
*/
|
|
234
|
+
function appendToBacklog(recommendations, projectDir) {
|
|
235
|
+
const dir = projectDir || process.cwd();
|
|
236
|
+
const fp = path.join(dir, BACKLOG_REL);
|
|
237
|
+
ensureDir(path.dirname(fp));
|
|
238
|
+
|
|
239
|
+
const header = "# Token Optimization Backlog\n";
|
|
240
|
+
const existing = fs.existsSync(fp) ? fs.readFileSync(fp, "utf8") : "";
|
|
241
|
+
const milestone = readCurrentMilestone(dir) || "M?";
|
|
242
|
+
const now = formatDateYmd(new Date());
|
|
243
|
+
|
|
244
|
+
let body = existing;
|
|
245
|
+
if (!body.startsWith("# Token Optimization Backlog")) {
|
|
246
|
+
body = header + "\n" + body;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
if (!recommendations || recommendations.length === 0) {
|
|
250
|
+
const marker = `\n## Complete-milestone review — no recommendations (${milestone})\n**Detected**: ${now}\n`;
|
|
251
|
+
body = body.replace(/\s+$/, "") + "\n" + marker;
|
|
252
|
+
fs.writeFileSync(fp, body);
|
|
253
|
+
return;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
const blocks = recommendations
|
|
257
|
+
.map((r) => formatRecommendation(r, milestone))
|
|
258
|
+
.join("\n");
|
|
259
|
+
body = body.replace(/\s+$/, "") + "\n\n" + blocks + "\n";
|
|
260
|
+
fs.writeFileSync(fp, body);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function formatRecommendation(r, milestone) {
|
|
264
|
+
const lines = [];
|
|
265
|
+
lines.push(`## [${r.id}] ${summarizeRecommendation(r)}`);
|
|
266
|
+
lines.push(`**Type**: ${r.type}`);
|
|
267
|
+
lines.push(`**Detected**: ${r.detected_at} at complete-milestone ${milestone}`);
|
|
268
|
+
lines.push(`**Evidence**: ${r.evidence}`);
|
|
269
|
+
lines.push(`**Projected savings**: ${r.projected_savings}`);
|
|
270
|
+
lines.push(`**Proposed change**: ${r.proposed_change}`);
|
|
271
|
+
lines.push(`**Risk**: ${r.risk}`);
|
|
272
|
+
lines.push(`**Status**: ${r.status}`);
|
|
273
|
+
lines.push(`**Rejection cooldown**: ${r.rejection_cooldown}`);
|
|
274
|
+
if (r.fingerprint) lines.push(`**Fingerprint**: ${r.fingerprint}`);
|
|
275
|
+
return lines.join("\n") + "\n";
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
function summarizeRecommendation(r) {
|
|
279
|
+
switch (r.type) {
|
|
280
|
+
case "demote":
|
|
281
|
+
return "Demote phase from opus → sonnet";
|
|
282
|
+
case "escalate":
|
|
283
|
+
return "Escalate phase from sonnet → opus";
|
|
284
|
+
case "runway-tune":
|
|
285
|
+
return "Tune runway estimator — over-projection detected";
|
|
286
|
+
case "investigate":
|
|
287
|
+
return "Investigate outlier consumption";
|
|
288
|
+
default:
|
|
289
|
+
return "Optimization recommendation";
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// ── Backlog read/write + parse ─────────────────────────────────────────────
|
|
294
|
+
|
|
295
|
+
function readBacklog(projectDir) {
|
|
296
|
+
const fp = path.join(projectDir || process.cwd(), BACKLOG_REL);
|
|
297
|
+
if (!fs.existsSync(fp)) return "";
|
|
298
|
+
return fs.readFileSync(fp, "utf8");
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
function writeBacklog(projectDir, content) {
|
|
302
|
+
const fp = path.join(projectDir || process.cwd(), BACKLOG_REL);
|
|
303
|
+
ensureDir(path.dirname(fp));
|
|
304
|
+
fs.writeFileSync(fp, content);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Parse a backlog markdown string into entry objects. Returns [] on empty
|
|
309
|
+
* input. Entry objects carry {id, type, status, rejection_cooldown,
|
|
310
|
+
* evidence, projected_savings, proposed_change, risk, fingerprint}.
|
|
311
|
+
*/
|
|
312
|
+
function parseBacklog(content) {
|
|
313
|
+
if (!content || !content.trim()) return [];
|
|
314
|
+
const entries = [];
|
|
315
|
+
// Split on H2 headers of the form "## [ID] ..." — a no-recommendation
|
|
316
|
+
// marker also starts with "## " but has no [ID].
|
|
317
|
+
const parts = content.split(/\n(?=## )/);
|
|
318
|
+
for (const part of parts) {
|
|
319
|
+
const headerMatch = part.match(/^## \[([^\]]+)\]\s*(.*)/);
|
|
320
|
+
if (!headerMatch) continue;
|
|
321
|
+
const id = headerMatch[1];
|
|
322
|
+
const entry = { id };
|
|
323
|
+
const lines = part.split("\n");
|
|
324
|
+
for (const ln of lines) {
|
|
325
|
+
const m = ln.match(/^\*\*([^*]+)\*\*:\s*(.*)$/);
|
|
326
|
+
if (!m) continue;
|
|
327
|
+
const key = m[1].toLowerCase().replace(/\s+/g, "_");
|
|
328
|
+
const val = m[2].trim();
|
|
329
|
+
entry[key] = val;
|
|
330
|
+
}
|
|
331
|
+
if (entry.rejection_cooldown !== undefined) {
|
|
332
|
+
entry.rejection_cooldown = Number(entry.rejection_cooldown) || 0;
|
|
333
|
+
}
|
|
334
|
+
entries.push(entry);
|
|
335
|
+
}
|
|
336
|
+
return entries;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* Rewrite a single entry's status + optional fields in-place. Returns the
|
|
341
|
+
* updated content (caller writes it back).
|
|
342
|
+
*/
|
|
343
|
+
function setRecommendationStatus(content, id, updates) {
|
|
344
|
+
const lines = content.split("\n");
|
|
345
|
+
let inTarget = false;
|
|
346
|
+
const out = [];
|
|
347
|
+
for (const ln of lines) {
|
|
348
|
+
const headerMatch = ln.match(/^## \[([^\]]+)\]/);
|
|
349
|
+
if (headerMatch) {
|
|
350
|
+
inTarget = headerMatch[1] === id;
|
|
351
|
+
out.push(ln);
|
|
352
|
+
continue;
|
|
353
|
+
}
|
|
354
|
+
if (inTarget) {
|
|
355
|
+
const m = ln.match(/^\*\*([^*]+)\*\*:\s*(.*)$/);
|
|
356
|
+
if (m) {
|
|
357
|
+
const key = m[1].toLowerCase().replace(/\s+/g, "_");
|
|
358
|
+
if (updates[key] !== undefined) {
|
|
359
|
+
out.push(`**${m[1]}**: ${updates[key]}`);
|
|
360
|
+
continue;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
out.push(ln);
|
|
365
|
+
}
|
|
366
|
+
return out.join("\n");
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
// ── Internals ───────────────────────────────────────────────────────────────
|
|
370
|
+
|
|
371
|
+
function readMetrics(projectDir) {
|
|
372
|
+
const fp = path.join(projectDir, METRICS_REL);
|
|
373
|
+
if (!fs.existsSync(fp)) return [];
|
|
374
|
+
const raw = fs.readFileSync(fp, "utf8");
|
|
375
|
+
const records = [];
|
|
376
|
+
for (const line of raw.split("\n")) {
|
|
377
|
+
const t = line.trim();
|
|
378
|
+
if (!t) continue;
|
|
379
|
+
try {
|
|
380
|
+
records.push(JSON.parse(t));
|
|
381
|
+
} catch (_) {
|
|
382
|
+
// skip malformed
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
return records;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
function filterByRecentMilestones(records, n) {
|
|
389
|
+
if (!records.length) return [];
|
|
390
|
+
const milestones = Array.from(
|
|
391
|
+
new Set(records.map((r) => r.milestone).filter(Boolean)),
|
|
392
|
+
);
|
|
393
|
+
milestones.sort();
|
|
394
|
+
const recent = new Set(milestones.slice(-n));
|
|
395
|
+
if (recent.size === 0) return records;
|
|
396
|
+
return records.filter((r) => recent.has(r.milestone));
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function readCurrentMilestone(projectDir) {
|
|
400
|
+
try {
|
|
401
|
+
const fp = path.join(projectDir, ".gsd-t", "progress.md");
|
|
402
|
+
if (!fs.existsSync(fp)) return null;
|
|
403
|
+
const raw = fs.readFileSync(fp, "utf8");
|
|
404
|
+
const m = raw.match(/\bM\d+\b/);
|
|
405
|
+
return m ? m[0] : null;
|
|
406
|
+
} catch (_) {
|
|
407
|
+
return null;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
function nextIdCounter(existingEntries) {
|
|
412
|
+
const milestone = "M35";
|
|
413
|
+
let max = 0;
|
|
414
|
+
for (const e of existingEntries) {
|
|
415
|
+
const m = (e.id || "").match(/OPT-(\d+)/);
|
|
416
|
+
if (m) max = Math.max(max, parseInt(m[1], 10));
|
|
417
|
+
}
|
|
418
|
+
return max + 1;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
function formatId(n) {
|
|
422
|
+
return "M35-OPT-" + String(n).padStart(3, "0");
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
function makeFingerprint(type, hit) {
|
|
426
|
+
const key = hit.key || {};
|
|
427
|
+
const parts = [type];
|
|
428
|
+
for (const k of Object.keys(key).sort()) parts.push(`${k}=${key[k]}`);
|
|
429
|
+
return parts.join("|");
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
function fingerprintFromEntry(entry) {
|
|
433
|
+
// Best-effort: reconstruct a fingerprint from stored fields when
|
|
434
|
+
// available. Returns the 'fingerprint' field if stored, else empty.
|
|
435
|
+
return entry.fingerprint || "";
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
function groupBy(records, keyFn) {
|
|
439
|
+
const map = new Map();
|
|
440
|
+
for (const r of records) {
|
|
441
|
+
const k = keyFn(r);
|
|
442
|
+
if (!map.has(k)) map.set(k, []);
|
|
443
|
+
map.get(k).push(r);
|
|
444
|
+
}
|
|
445
|
+
return map;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
function mean(vals) {
|
|
449
|
+
if (!vals.length) return 0;
|
|
450
|
+
let sum = 0;
|
|
451
|
+
for (const v of vals) sum += v;
|
|
452
|
+
return sum / vals.length;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
function percentile(sortedVals, p) {
|
|
456
|
+
if (!sortedVals.length) return 0;
|
|
457
|
+
const idx = Math.min(
|
|
458
|
+
sortedVals.length - 1,
|
|
459
|
+
Math.floor((p / 100) * sortedVals.length),
|
|
460
|
+
);
|
|
461
|
+
return sortedVals[idx];
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
function ensureDir(d) {
|
|
465
|
+
if (!fs.existsSync(d)) fs.mkdirSync(d, { recursive: true });
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
function formatDateYmd(d) {
|
|
469
|
+
const pad = (n) => String(n).padStart(2, "0");
|
|
470
|
+
return `${d.getFullYear()}-${pad(d.getMonth() + 1)}-${pad(d.getDate())}`;
|
|
471
|
+
}
|