@tekyzinc/gsd-t 2.74.13 → 3.10.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +165 -0
- package/README.md +117 -1
- package/bin/advisor-integration.js +93 -0
- package/bin/check-headless-sessions.js +140 -0
- package/bin/context-meter-config.cjs +101 -0
- package/bin/context-meter-config.test.cjs +101 -0
- package/bin/gsd-t-unattended-platform.js +381 -0
- package/bin/gsd-t-unattended-safety.js +766 -0
- package/bin/gsd-t-unattended.js +1259 -0
- package/bin/gsd-t.js +723 -19
- package/bin/handoff-lock.js +249 -0
- package/bin/headless-auto-spawn.js +328 -0
- package/bin/model-selector.js +224 -0
- package/bin/runway-estimator.js +242 -0
- package/bin/token-budget.js +96 -89
- package/bin/token-optimizer.js +471 -0
- package/bin/token-telemetry.js +246 -0
- package/commands/gsd-t-audit.md +3 -3
- package/commands/gsd-t-backlog-list.md +38 -0
- package/commands/gsd-t-brainstorm.md +3 -3
- package/commands/gsd-t-complete-milestone.md +24 -0
- package/commands/gsd-t-debug.md +124 -7
- package/commands/gsd-t-discuss.md +10 -3
- package/commands/gsd-t-doc-ripple.md +32 -4
- package/commands/gsd-t-execute.md +107 -52
- package/commands/gsd-t-help.md +22 -0
- package/commands/gsd-t-integrate.md +67 -4
- package/commands/gsd-t-optimization-apply.md +91 -0
- package/commands/gsd-t-optimization-reject.md +94 -0
- package/commands/gsd-t-partition.md +7 -0
- package/commands/gsd-t-pause.md +3 -0
- package/commands/gsd-t-plan.md +10 -3
- package/commands/gsd-t-prd.md +3 -3
- package/commands/gsd-t-quick.md +71 -9
- package/commands/gsd-t-reflect.md +3 -7
- package/commands/gsd-t-resume.md +86 -1
- package/commands/gsd-t-status.md +31 -0
- package/commands/gsd-t-test-sync.md +7 -0
- package/commands/gsd-t-unattended-stop.md +83 -0
- package/commands/gsd-t-unattended-watch.md +290 -0
- package/commands/gsd-t-unattended.md +414 -0
- package/commands/gsd-t-verify.md +12 -5
- package/commands/gsd-t-visualize.md +3 -7
- package/commands/gsd-t-wave.md +82 -18
- package/docs/GSD-T-README.md +69 -0
- package/docs/architecture.md +176 -4
- package/docs/infrastructure.md +221 -0
- package/docs/methodology.md +44 -0
- package/docs/prd-harness-evolution.md +51 -37
- package/docs/requirements.md +95 -0
- package/docs/unattended-windows-caveats.md +245 -0
- package/package.json +2 -2
- package/scripts/context-meter/count-tokens-client.js +221 -0
- package/scripts/context-meter/count-tokens-client.test.js +308 -0
- package/scripts/context-meter/test-injector.js +55 -0
- package/scripts/context-meter/threshold.js +88 -0
- package/scripts/context-meter/threshold.test.js +255 -0
- package/scripts/context-meter/transcript-parser.js +252 -0
- package/scripts/context-meter/transcript-parser.test.js +320 -0
- package/scripts/gsd-t-context-meter.e2e.test.js +415 -0
- package/scripts/gsd-t-context-meter.js +350 -0
- package/scripts/gsd-t-context-meter.test.js +417 -0
- package/scripts/gsd-t-heartbeat.js +2 -2
- package/scripts/gsd-t-statusline.js +23 -8
- package/templates/CLAUDE-global.md +17 -1
- package/templates/CLAUDE-project.md +26 -6
- package/templates/context-meter-config.json +10 -0
- package/templates/prompts/README.md +1 -1
- package/bin/task-counter.cjs +0 -161
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* GSD-T Model Selector — surgical per-phase model tier assignment
|
|
5
|
+
*
|
|
6
|
+
* Replaces the v2.x "silent downgrade under context pressure" behavior with
|
|
7
|
+
* declarative per-phase tier assignments. Callers ask `selectModel({phase, ...})`
|
|
8
|
+
* and get back `{model, reason, escalation_hook}` — the tier decision is
|
|
9
|
+
* deterministic, driven by the rules table below, and does NOT depend on
|
|
10
|
+
* session context percentage.
|
|
11
|
+
*
|
|
12
|
+
* Contract: .gsd-t/contracts/model-selection-contract.md v1.0.0 (M35 T4)
|
|
13
|
+
* Findings: .gsd-t/M35-advisor-findings.md (convention-based /advisor fallback)
|
|
14
|
+
*
|
|
15
|
+
* Zero external dependencies.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
// ── Tiers ───────────────────────────────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
const TIERS = Object.freeze({
|
|
21
|
+
HAIKU: "haiku",
|
|
22
|
+
SONNET: "sonnet",
|
|
23
|
+
OPUS: "opus",
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
const DEFAULT_TIER = TIERS.SONNET;
|
|
27
|
+
|
|
28
|
+
// ── Escalation hook block (convention-based /advisor fallback) ──────────────
|
|
29
|
+
//
|
|
30
|
+
// Per `.gsd-t/M35-advisor-findings.md`, Claude Code's native /advisor has no
|
|
31
|
+
// programmable API at subagent scope. This block is injected into the subagent
|
|
32
|
+
// prompt at declared escalation points on sonnet-tier phases where the
|
|
33
|
+
// orchestrator has flagged a high-stakes sub-decision.
|
|
34
|
+
//
|
|
35
|
+
// Kept as a constant so all consumers (command files, advisor-integration.js,
|
|
36
|
+
// M35-advisor-findings.md) reference the same canonical text.
|
|
37
|
+
|
|
38
|
+
const ESCALATION_HOOK = [
|
|
39
|
+
"## Escalation Hook — /advisor convention-based fallback",
|
|
40
|
+
"",
|
|
41
|
+
"Before finalizing your answer for this phase, stop and consider:",
|
|
42
|
+
"1. Is this decision high-stakes? (architecture, contract design, security boundary,",
|
|
43
|
+
" data-loss risk, cross-module refactor, adversarial QA verdict)",
|
|
44
|
+
"2. Would a more capable model produce a materially better answer?",
|
|
45
|
+
"3. Are you confident in the assumptions you're making?",
|
|
46
|
+
"",
|
|
47
|
+
"If YES to any of the above, do ONE of the following:",
|
|
48
|
+
"- Escalate internally: spend an extra reasoning pass re-examining the decision",
|
|
49
|
+
" from first principles. Document the re-examination in your output.",
|
|
50
|
+
"- Spawn a nested opus subagent: use the Task tool with",
|
|
51
|
+
" `subagent_type: \"general-purpose\"` and include `model: opus` in the spawn.",
|
|
52
|
+
"",
|
|
53
|
+
"Record in your output whether you escalated: set `ESCALATED_VIA_ADVISOR=true` or",
|
|
54
|
+
"`ESCALATED_VIA_ADVISOR=false` on a line by itself near the end of your report.",
|
|
55
|
+
].join("\n");
|
|
56
|
+
|
|
57
|
+
// ── Declarative phase rules table ───────────────────────────────────────────
|
|
58
|
+
//
|
|
59
|
+
// Each rule maps (phase, task_type) → tier. The first matching rule wins.
|
|
60
|
+
// `task_type` is optional — when absent, the rule matches any task within
|
|
61
|
+
// that phase. Order the rules from most-specific to least-specific.
|
|
62
|
+
//
|
|
63
|
+
// Tier assignments mirror `.gsd-t/M35-definition.md` Part B and the Model
|
|
64
|
+
// Assignments section of the GSD-T global CLAUDE template:
|
|
65
|
+
// - haiku: strictly mechanical — test runners, branch guards, file checks,
|
|
66
|
+
// JSON validation, no judgment
|
|
67
|
+
// - sonnet: routine code work — execute step 2, test-sync, doc-ripple wiring,
|
|
68
|
+
// quick fixes, integration wiring, debug fix-apply
|
|
69
|
+
// - opus: high-stakes reasoning — partition, discuss, Red Team, verify
|
|
70
|
+
// judgment, debug root-cause, contract/architecture design
|
|
71
|
+
|
|
72
|
+
const PHASE_RULES = Object.freeze([
|
|
73
|
+
// Phase: execute
|
|
74
|
+
{ phase: "execute", task_type: "test_runner", model: TIERS.HAIKU, reason: "Mechanical test-suite runner — zero judgment" },
|
|
75
|
+
{ phase: "execute", task_type: "branch_guard", model: TIERS.HAIKU, reason: "Mechanical branch-name check — zero judgment" },
|
|
76
|
+
{ phase: "execute", task_type: "file_check", model: TIERS.HAIKU, reason: "Mechanical file-existence check — zero judgment" },
|
|
77
|
+
{ phase: "execute", task_type: "qa", model: TIERS.SONNET, reason: "QA evaluation needs judgment per M31 tier refinement" },
|
|
78
|
+
{ phase: "execute", task_type: "red_team", model: TIERS.OPUS, reason: "Adversarial QA benefits most from top tier" },
|
|
79
|
+
{ phase: "execute", model: TIERS.SONNET, reason: "Routine task execution — sonnet is the M35 default for routine work", hasEscalation: true },
|
|
80
|
+
|
|
81
|
+
// Phase: wave (the wave orchestrator itself)
|
|
82
|
+
{ phase: "wave", model: TIERS.SONNET, reason: "Wave orchestration dispatches per-phase subagents; the orchestrator itself is routine coordination", hasEscalation: true },
|
|
83
|
+
|
|
84
|
+
// Phase: quick
|
|
85
|
+
{ phase: "quick", task_type: "test_runner", model: TIERS.HAIKU, reason: "Mechanical test-suite runner — zero judgment" },
|
|
86
|
+
{ phase: "quick", model: TIERS.SONNET, reason: "Routine one-off task — sonnet default" },
|
|
87
|
+
|
|
88
|
+
// Phase: integrate
|
|
89
|
+
{ phase: "integrate", task_type: "test_runner", model: TIERS.HAIKU, reason: "Mechanical integration test runner — zero judgment" },
|
|
90
|
+
{ phase: "integrate", model: TIERS.SONNET, reason: "Integration wiring is routine coordination work" },
|
|
91
|
+
|
|
92
|
+
// Phase: debug
|
|
93
|
+
{ phase: "debug", task_type: "fix_apply", model: TIERS.SONNET, reason: "Applying a known fix is routine code work" },
|
|
94
|
+
{ phase: "debug", task_type: "root_cause", model: TIERS.OPUS, reason: "Root-cause analysis is high-stakes reasoning" },
|
|
95
|
+
{ phase: "debug", model: TIERS.OPUS, reason: "Debug default is high-stakes — prefer opus unless the task_type says otherwise" },
|
|
96
|
+
|
|
97
|
+
// Phase: partition — high-stakes architectural decomposition
|
|
98
|
+
{ phase: "partition", model: TIERS.OPUS, reason: "Domain partitioning is architectural reasoning — high stakes" },
|
|
99
|
+
|
|
100
|
+
// Phase: discuss — multi-perspective design exploration
|
|
101
|
+
{ phase: "discuss", model: TIERS.OPUS, reason: "Design exploration benefits from top-tier reasoning" },
|
|
102
|
+
|
|
103
|
+
// Phase: plan — task-list authoring
|
|
104
|
+
{ phase: "plan", model: TIERS.SONNET, reason: "Task decomposition is structured work — sonnet with escalation hook", hasEscalation: true },
|
|
105
|
+
|
|
106
|
+
// Phase: verify — final quality judgment before milestone complete
|
|
107
|
+
{ phase: "verify", model: TIERS.OPUS, reason: "Milestone verification is the final quality gate — high stakes" },
|
|
108
|
+
|
|
109
|
+
// Phase: test-sync — keeping tests aligned with code
|
|
110
|
+
{ phase: "test-sync", model: TIERS.SONNET, reason: "Test alignment is routine refactoring work" },
|
|
111
|
+
|
|
112
|
+
// Phase: doc-ripple — downstream document updates
|
|
113
|
+
{ phase: "doc-ripple", model: TIERS.SONNET, reason: "Documentation updates are routine prose editing" },
|
|
114
|
+
|
|
115
|
+
// Phase: red_team — explicit adversarial QA phase (separate from execute task_type)
|
|
116
|
+
{ phase: "red_team", model: TIERS.OPUS, reason: "Adversarial QA — always opus, the incentive is to find bugs" },
|
|
117
|
+
|
|
118
|
+
// Phase: qa — explicit standalone QA phase
|
|
119
|
+
{ phase: "qa", model: TIERS.SONNET, reason: "QA per M31 refinement — sonnet produces fewer false negatives than haiku" },
|
|
120
|
+
]);
|
|
121
|
+
|
|
122
|
+
// Complexity-signal overrides. If the caller provides `complexity_signals`,
|
|
123
|
+
// these can bump a sonnet decision to opus regardless of phase rule.
|
|
124
|
+
const COMPLEXITY_OVERRIDES = Object.freeze({
|
|
125
|
+
cross_module_refactor: TIERS.OPUS,
|
|
126
|
+
security_boundary: TIERS.OPUS,
|
|
127
|
+
data_loss_risk: TIERS.OPUS,
|
|
128
|
+
contract_design: TIERS.OPUS,
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
// ── Public API ──────────────────────────────────────────────────────────────
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Select the model tier for a subagent spawn.
|
|
135
|
+
*
|
|
136
|
+
* @param {object} args
|
|
137
|
+
* @param {string} args.phase — required; one of the phase names above
|
|
138
|
+
* @param {string} [args.task_type] — optional task_type for finer-grained rules
|
|
139
|
+
* @param {string} [args.domain_type] — optional, currently unused (reserved for future per-domain rules)
|
|
140
|
+
* @param {object} [args.complexity_signals] — optional object, keys matching COMPLEXITY_OVERRIDES escalate sonnet→opus
|
|
141
|
+
* @returns {{model: string, reason: string, escalation_hook: string|null}}
|
|
142
|
+
*/
|
|
143
|
+
function selectModel(args) {
|
|
144
|
+
if (!args || typeof args !== "object") {
|
|
145
|
+
return {
|
|
146
|
+
model: DEFAULT_TIER,
|
|
147
|
+
reason: "No args provided — default to routine tier (sonnet)",
|
|
148
|
+
escalation_hook: null,
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const { phase, task_type, complexity_signals } = args;
|
|
153
|
+
|
|
154
|
+
if (!phase || typeof phase !== "string") {
|
|
155
|
+
return {
|
|
156
|
+
model: DEFAULT_TIER,
|
|
157
|
+
reason: "No phase provided — default to routine tier (sonnet)",
|
|
158
|
+
escalation_hook: null,
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// First pass: find the most-specific matching rule.
|
|
163
|
+
let matched = null;
|
|
164
|
+
for (const rule of PHASE_RULES) {
|
|
165
|
+
if (rule.phase !== phase) continue;
|
|
166
|
+
if (rule.task_type && rule.task_type !== task_type) continue;
|
|
167
|
+
matched = rule;
|
|
168
|
+
break;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (!matched) {
|
|
172
|
+
return {
|
|
173
|
+
model: DEFAULT_TIER,
|
|
174
|
+
reason: `Unknown phase "${phase}" — fallback to routine tier (sonnet)`,
|
|
175
|
+
escalation_hook: null,
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
let model = matched.model;
|
|
180
|
+
let reason = matched.reason;
|
|
181
|
+
|
|
182
|
+
// Complexity-signal overrides: bump sonnet → opus if any flagged signal is truthy.
|
|
183
|
+
if (model === TIERS.SONNET && complexity_signals && typeof complexity_signals === "object") {
|
|
184
|
+
for (const key of Object.keys(complexity_signals)) {
|
|
185
|
+
if (!complexity_signals[key]) continue;
|
|
186
|
+
const override = COMPLEXITY_OVERRIDES[key];
|
|
187
|
+
if (override && override !== model) {
|
|
188
|
+
model = override;
|
|
189
|
+
reason = `${reason} (escalated to ${override} by complexity signal: ${key})`;
|
|
190
|
+
break;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Escalation hook is only injected on sonnet phases flagged as high-stakes-adjacent.
|
|
196
|
+
// Haiku phases have no hook (mechanical, no judgment). Opus phases have no hook
|
|
197
|
+
// (already at top tier — nowhere to escalate).
|
|
198
|
+
let escalation_hook = null;
|
|
199
|
+
if (model === TIERS.SONNET && matched.hasEscalation) {
|
|
200
|
+
escalation_hook = ESCALATION_HOOK;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
return { model, reason, escalation_hook };
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Return the canonical list of phase names the selector knows about.
|
|
208
|
+
* Used by tests and documentation tooling to assert coverage.
|
|
209
|
+
*/
|
|
210
|
+
function listPhases() {
|
|
211
|
+
const seen = new Set();
|
|
212
|
+
for (const rule of PHASE_RULES) seen.add(rule.phase);
|
|
213
|
+
return [...seen].sort();
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
module.exports = {
|
|
217
|
+
selectModel,
|
|
218
|
+
listPhases,
|
|
219
|
+
TIERS,
|
|
220
|
+
DEFAULT_TIER,
|
|
221
|
+
ESCALATION_HOOK,
|
|
222
|
+
PHASE_RULES,
|
|
223
|
+
COMPLEXITY_OVERRIDES,
|
|
224
|
+
};
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* GSD-T Runway Estimator — Pre-flight context runway projection
|
|
5
|
+
*
|
|
6
|
+
* Reads current context percentage from the M34 context meter state file and
|
|
7
|
+
* historical token-telemetry records (token-metrics.jsonl) to project whether
|
|
8
|
+
* a command about to run will complete before the v3.0.0 stop band (85%).
|
|
9
|
+
*
|
|
10
|
+
* Confidence-weighted: high ≥50 records, medium ≥10, low <10. Low confidence
|
|
11
|
+
* applies a 1.25x conservative skew. On missing history a constant fallback
|
|
12
|
+
* is used (4%/task sonnet-default, 8%/task opus-default). On refusal the
|
|
13
|
+
* estimator never prompts the user — callers hand off to headless-auto-spawn.
|
|
14
|
+
*
|
|
15
|
+
* Zero external dependencies (Node.js built-ins only).
|
|
16
|
+
*
|
|
17
|
+
* Contract: .gsd-t/contracts/runway-estimator-contract.md v1.0.0
|
|
18
|
+
* Consumers: bin/gsd-t.js, commands/gsd-t-execute|wave|integrate|quick|debug.md
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
const fs = require("fs");
|
|
22
|
+
const path = require("path");
|
|
23
|
+
|
|
24
|
+
// ── Constants ────────────────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
// Mirrors token-budget-contract v3.0.0 — must stay in sync.
|
|
27
|
+
const STOP_THRESHOLD_PCT = 85;
|
|
28
|
+
|
|
29
|
+
// Confidence grading thresholds (frozen in runway-estimator-contract v1.0.0).
|
|
30
|
+
const CONFIDENCE_HIGH_MIN = 50;
|
|
31
|
+
const CONFIDENCE_MEDIUM_MIN = 10;
|
|
32
|
+
|
|
33
|
+
// Conservative skew multiplier applied to low-confidence projections.
|
|
34
|
+
const LOW_CONFIDENCE_SKEW = 1.25;
|
|
35
|
+
|
|
36
|
+
// Conservative constant fallback when no history exists at all.
|
|
37
|
+
const FALLBACK_PCT_PER_TASK_SONNET = 4;
|
|
38
|
+
const FALLBACK_PCT_PER_TASK_OPUS = 8;
|
|
39
|
+
|
|
40
|
+
// Opus-default phases — used when picking a constant fallback for a command
|
|
41
|
+
// with no historical telemetry. Commands not listed default to sonnet.
|
|
42
|
+
const OPUS_DEFAULT_COMMANDS = new Set([
|
|
43
|
+
"gsd-t-debug",
|
|
44
|
+
"gsd-t-integrate",
|
|
45
|
+
]);
|
|
46
|
+
|
|
47
|
+
const STATE_FILE_REL = path.join(".gsd-t", ".context-meter-state.json");
|
|
48
|
+
const METRICS_FILE_REL = path.join(".gsd-t", "token-metrics.jsonl");
|
|
49
|
+
|
|
50
|
+
// ── Exports ──────────────────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
module.exports = {
|
|
53
|
+
estimateRunway,
|
|
54
|
+
STOP_THRESHOLD_PCT,
|
|
55
|
+
CONFIDENCE_HIGH_MIN,
|
|
56
|
+
CONFIDENCE_MEDIUM_MIN,
|
|
57
|
+
LOW_CONFIDENCE_SKEW,
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
// ── estimateRunway ───────────────────────────────────────────────────────────
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* @param {{
|
|
64
|
+
* command: string,
|
|
65
|
+
* domain_type?: string,
|
|
66
|
+
* remaining_tasks: number,
|
|
67
|
+
* projectDir?: string,
|
|
68
|
+
* headlessAvailable?: boolean
|
|
69
|
+
* }} opts
|
|
70
|
+
* @returns {{
|
|
71
|
+
* can_start: boolean,
|
|
72
|
+
* current_pct: number,
|
|
73
|
+
* projected_end_pct: number,
|
|
74
|
+
* confidence: 'low'|'medium'|'high',
|
|
75
|
+
* confidence_basis: number,
|
|
76
|
+
* pct_per_task: number,
|
|
77
|
+
* recommendation: 'proceed'|'headless'|'clear-and-resume',
|
|
78
|
+
* reason: string
|
|
79
|
+
* }}
|
|
80
|
+
*/
|
|
81
|
+
function estimateRunway(opts) {
|
|
82
|
+
const command = opts.command;
|
|
83
|
+
const domain_type = opts.domain_type || "";
|
|
84
|
+
const remaining_tasks = Math.max(0, Number(opts.remaining_tasks) || 0);
|
|
85
|
+
const projectDir = opts.projectDir || process.cwd();
|
|
86
|
+
const headlessAvailable = opts.headlessAvailable !== false;
|
|
87
|
+
|
|
88
|
+
const current_pct = readCurrentPct(projectDir);
|
|
89
|
+
const records = readMetrics(projectDir);
|
|
90
|
+
const { pct_per_task, confidence, confidence_basis } = computePctPerTask(
|
|
91
|
+
records,
|
|
92
|
+
command,
|
|
93
|
+
domain_type,
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
const skew = confidence === "low" ? LOW_CONFIDENCE_SKEW : 1.0;
|
|
97
|
+
const projected_end_pct = round1(
|
|
98
|
+
current_pct + pct_per_task * remaining_tasks * skew,
|
|
99
|
+
);
|
|
100
|
+
const can_start = projected_end_pct < STOP_THRESHOLD_PCT;
|
|
101
|
+
|
|
102
|
+
let recommendation;
|
|
103
|
+
let reason;
|
|
104
|
+
if (can_start) {
|
|
105
|
+
recommendation = "proceed";
|
|
106
|
+
reason = `Projected end ${projected_end_pct}% < ${STOP_THRESHOLD_PCT}% stop threshold`;
|
|
107
|
+
} else if (headlessAvailable) {
|
|
108
|
+
recommendation = "headless";
|
|
109
|
+
reason = `Projected end ${projected_end_pct}% ≥ ${STOP_THRESHOLD_PCT}% — auto-spawn headless`;
|
|
110
|
+
} else {
|
|
111
|
+
recommendation = "clear-and-resume";
|
|
112
|
+
reason = `Projected end ${projected_end_pct}% ≥ ${STOP_THRESHOLD_PCT}% — headless unavailable, clear-and-resume`;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return {
|
|
116
|
+
can_start,
|
|
117
|
+
current_pct,
|
|
118
|
+
projected_end_pct,
|
|
119
|
+
confidence,
|
|
120
|
+
confidence_basis,
|
|
121
|
+
pct_per_task: round2(pct_per_task),
|
|
122
|
+
recommendation,
|
|
123
|
+
reason,
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// ── Internal: read current pct from M34 state file ──────────────────────────
|
|
128
|
+
|
|
129
|
+
function readCurrentPct(projectDir) {
|
|
130
|
+
try {
|
|
131
|
+
const fp = path.join(projectDir, STATE_FILE_REL);
|
|
132
|
+
const raw = fs.readFileSync(fp, "utf8");
|
|
133
|
+
const s = JSON.parse(raw);
|
|
134
|
+
if (typeof s.pct === "number" && Number.isFinite(s.pct)) {
|
|
135
|
+
return round1(s.pct);
|
|
136
|
+
}
|
|
137
|
+
} catch (_) {
|
|
138
|
+
// Missing or unreadable — warn and fall through.
|
|
139
|
+
try {
|
|
140
|
+
process.stderr.write(
|
|
141
|
+
`runway-estimator: ${STATE_FILE_REL} missing or unreadable — assuming current_pct=0\n`,
|
|
142
|
+
);
|
|
143
|
+
} catch (_) {
|
|
144
|
+
/* ignore */
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return 0;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// ── Internal: read token-metrics.jsonl ──────────────────────────────────────
|
|
151
|
+
|
|
152
|
+
function readMetrics(projectDir) {
|
|
153
|
+
try {
|
|
154
|
+
const fp = path.join(projectDir, METRICS_FILE_REL);
|
|
155
|
+
const raw = fs.readFileSync(fp, "utf8");
|
|
156
|
+
const out = [];
|
|
157
|
+
for (const line of raw.split("\n")) {
|
|
158
|
+
if (!line.trim()) continue;
|
|
159
|
+
try {
|
|
160
|
+
out.push(JSON.parse(line));
|
|
161
|
+
} catch (_) {
|
|
162
|
+
/* skip malformed */
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return out;
|
|
166
|
+
} catch (_) {
|
|
167
|
+
return [];
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// ── Internal: compute pct-per-task with confidence grading ──────────────────
|
|
172
|
+
|
|
173
|
+
function computePctPerTask(records, command, domain_type) {
|
|
174
|
+
// Tier 1: {command, domain_type} pair — sharpest match.
|
|
175
|
+
if (domain_type) {
|
|
176
|
+
const pair = records.filter(
|
|
177
|
+
(r) => r.command === command && r.domain_type === domain_type,
|
|
178
|
+
);
|
|
179
|
+
if (pair.length >= CONFIDENCE_MEDIUM_MIN) {
|
|
180
|
+
return {
|
|
181
|
+
pct_per_task: meanPctDelta(pair),
|
|
182
|
+
confidence: gradeConfidence(pair.length),
|
|
183
|
+
confidence_basis: pair.length,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Tier 2: {command} aggregate.
|
|
189
|
+
const cmd = records.filter((r) => r.command === command);
|
|
190
|
+
if (cmd.length >= CONFIDENCE_MEDIUM_MIN) {
|
|
191
|
+
return {
|
|
192
|
+
pct_per_task: meanPctDelta(cmd),
|
|
193
|
+
confidence: gradeConfidence(cmd.length),
|
|
194
|
+
confidence_basis: cmd.length,
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Tier 3: constant fallback — confidence=low, basis=cmd.length (0 or few).
|
|
199
|
+
return {
|
|
200
|
+
pct_per_task: fallbackPctPerTask(command),
|
|
201
|
+
confidence: "low",
|
|
202
|
+
confidence_basis: cmd.length,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
function meanPctDelta(records) {
|
|
207
|
+
if (!records.length) return 0;
|
|
208
|
+
let sum = 0;
|
|
209
|
+
let n = 0;
|
|
210
|
+
for (const r of records) {
|
|
211
|
+
const before = Number(r.context_window_pct_before);
|
|
212
|
+
const after = Number(r.context_window_pct_after);
|
|
213
|
+
if (!Number.isFinite(before) || !Number.isFinite(after)) continue;
|
|
214
|
+
const delta = after - before;
|
|
215
|
+
if (delta < 0) continue; // pathological — treat as 0
|
|
216
|
+
sum += delta;
|
|
217
|
+
n += 1;
|
|
218
|
+
}
|
|
219
|
+
if (n === 0) return 0;
|
|
220
|
+
return sum / n;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
function gradeConfidence(n) {
|
|
224
|
+
if (n >= CONFIDENCE_HIGH_MIN) return "high";
|
|
225
|
+
if (n >= CONFIDENCE_MEDIUM_MIN) return "medium";
|
|
226
|
+
return "low";
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function fallbackPctPerTask(command) {
|
|
230
|
+
if (OPUS_DEFAULT_COMMANDS.has(command)) return FALLBACK_PCT_PER_TASK_OPUS;
|
|
231
|
+
return FALLBACK_PCT_PER_TASK_SONNET;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// ── Internal: rounding helpers ──────────────────────────────────────────────
|
|
235
|
+
|
|
236
|
+
function round1(n) {
|
|
237
|
+
return Math.round(n * 10) / 10;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function round2(n) {
|
|
241
|
+
return Math.round(n * 100) / 100;
|
|
242
|
+
}
|