@tekyzinc/gsd-t 2.73.25 → 2.74.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/bin/archive-progress.js +335 -0
- package/bin/context-budget-audit.js +432 -0
- package/bin/gsd-t.js +79 -1
- package/bin/log-tail.js +81 -0
- package/bin/orchestrator.js +233 -47
- package/commands/gsd-t-design-decompose.md +26 -2
- package/docs/context-budget-recovery-plan.md +170 -0
- package/package.json +1 -1
- package/scripts/gsd-t-design-review-server.js +157 -3
- package/scripts/gsd-t-design-review.html +676 -14
package/bin/orchestrator.js
CHANGED
|
@@ -108,6 +108,7 @@ class Orchestrator {
|
|
|
108
108
|
constructor(workflow) {
|
|
109
109
|
this.wf = workflow;
|
|
110
110
|
this.pids = [];
|
|
111
|
+
this._childPids = new Set();
|
|
111
112
|
}
|
|
112
113
|
|
|
113
114
|
// ─── CLI ─────────────────────────────────────────────────────────────
|
|
@@ -136,7 +137,7 @@ class Orchestrator {
|
|
|
136
137
|
case "--skip-measure": opts.skipMeasure = true; break;
|
|
137
138
|
case "--clean": opts.clean = true; break;
|
|
138
139
|
case "--verbose": case "-v": opts.verbose = true; break;
|
|
139
|
-
case "--parallel": opts.parallel = parseInt(argv[++i], 10) ||
|
|
140
|
+
case "--parallel": opts.parallel = parseInt(argv[++i], 10) || 15; break;
|
|
140
141
|
case "--help":
|
|
141
142
|
case "-h":
|
|
142
143
|
if (this.wf.showUsage) this.wf.showUsage();
|
|
@@ -164,7 +165,7 @@ ${BOLD}Options:${RESET}
|
|
|
164
165
|
--timeout <sec> Claude timeout per phase in seconds (default: 600)
|
|
165
166
|
--skip-measure Skip automated measurement (human-review only)
|
|
166
167
|
--clean Clear all artifacts from previous runs + delete build output
|
|
167
|
-
--parallel <N> Run N items concurrently (default:
|
|
168
|
+
--parallel <N> Run N items concurrently (default: 15)
|
|
168
169
|
--verbose, -v Show Claude's tool calls and prompts in terminal
|
|
169
170
|
--help Show this help
|
|
170
171
|
|
|
@@ -188,18 +189,15 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
|
|
|
188
189
|
}
|
|
189
190
|
|
|
190
191
|
spawnClaude(projectDir, prompt, timeout, opts = {}) {
|
|
192
|
+
// Synchronous wrapper around async spawn — uses a temp file signal so
|
|
193
|
+
// the event loop stays alive and SIGINT (Ctrl+C) can be handled
|
|
191
194
|
const start = Date.now();
|
|
192
|
-
let output = "";
|
|
193
|
-
let exitCode = 0;
|
|
194
195
|
const verbose = this._verbose;
|
|
195
196
|
|
|
196
|
-
// Build args: -p for print mode, --dangerously-skip-permissions so spawned
|
|
197
|
-
// Claude can write files without interactive permission prompts
|
|
198
197
|
const args = ["-p", "--dangerously-skip-permissions", "--output-format", "stream-json"];
|
|
199
198
|
if (verbose) args.push("--verbose");
|
|
200
199
|
args.push(prompt);
|
|
201
200
|
|
|
202
|
-
// Log prompt to file for debugging
|
|
203
201
|
if (verbose) {
|
|
204
202
|
const logDir = path.join(this.getReviewDir(projectDir), "build-logs");
|
|
205
203
|
ensureDir(logDir);
|
|
@@ -210,31 +208,38 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
|
|
|
210
208
|
);
|
|
211
209
|
}
|
|
212
210
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
timeout: timeout || this.wf.defaults?.timeout || 600_000,
|
|
217
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
218
|
-
cwd: projectDir,
|
|
219
|
-
maxBuffer: 10 * 1024 * 1024,
|
|
220
|
-
});
|
|
221
|
-
// Parse stream-json: each line is a JSON event, extract assistant text
|
|
222
|
-
output = this._parseStreamJson(raw, verbose);
|
|
223
|
-
} catch (e) {
|
|
224
|
-
// On timeout/error, still parse any partial stream-json output we got
|
|
225
|
-
const rawOut = (e.stdout || "") + (e.stderr || "");
|
|
226
|
-
output = this._parseStreamJson(rawOut, verbose);
|
|
227
|
-
exitCode = e.status || 1;
|
|
228
|
-
if (e.killed) warn(`Claude timed out after ${(timeout || 600_000) / 1000}s`);
|
|
229
|
-
}
|
|
211
|
+
const effectiveTimeout = timeout || this.wf.defaults?.timeout || 600_000;
|
|
212
|
+
const signalFile = path.join(this.getReviewDir(projectDir), `_sync-done-${Date.now()}.json`);
|
|
213
|
+
let result = { output: "", exitCode: 1, duration: 0 };
|
|
230
214
|
|
|
231
|
-
const
|
|
215
|
+
const child = execFile("claude", args, {
|
|
216
|
+
encoding: "utf8",
|
|
217
|
+
timeout: effectiveTimeout,
|
|
218
|
+
cwd: projectDir,
|
|
219
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
220
|
+
}, (err, stdout, stderr) => {
|
|
221
|
+
this.untrackChild(child.pid);
|
|
222
|
+
const raw = err ? ((err.stdout || "") + (err.stderr || "")) : (stdout || "");
|
|
223
|
+
const output = this._parseStreamJson(raw, verbose);
|
|
224
|
+
const exitCode = err ? (err.status || 1) : 0;
|
|
225
|
+
const duration = Math.round((Date.now() - start) / 1000);
|
|
226
|
+
if (err && err.killed) warn(`Claude timed out after ${effectiveTimeout / 1000}s`);
|
|
227
|
+
result = { output, exitCode, duration };
|
|
228
|
+
try { fs.writeFileSync(signalFile, "done"); } catch { /* ignore */ }
|
|
229
|
+
});
|
|
230
|
+
this.trackChild(child.pid);
|
|
231
|
+
|
|
232
|
+
// Block until child finishes, but keep event loop alive for SIGINT
|
|
233
|
+
while (!fs.existsSync(signalFile) && !this._interrupted) {
|
|
234
|
+
syncSleep(200);
|
|
235
|
+
}
|
|
236
|
+
try { fs.unlinkSync(signalFile); } catch { /* ignore */ }
|
|
232
237
|
|
|
233
238
|
if (verbose) {
|
|
234
|
-
dim(`Claude finished: exit=${exitCode}, duration=${duration}s, output=${output.length} chars`);
|
|
239
|
+
dim(`Claude finished: exit=${result.exitCode}, duration=${result.duration}s, output=${result.output.length} chars`);
|
|
235
240
|
}
|
|
236
241
|
|
|
237
|
-
return
|
|
242
|
+
return result;
|
|
238
243
|
}
|
|
239
244
|
|
|
240
245
|
// ─── Server Management ───────────────────────────────────────────────
|
|
@@ -266,6 +271,7 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
|
|
|
266
271
|
cwd: projectDir,
|
|
267
272
|
maxBuffer: 10 * 1024 * 1024,
|
|
268
273
|
}, (err, stdout, stderr) => {
|
|
274
|
+
this.untrackChild(child.pid);
|
|
269
275
|
const raw = err ? ((err.stdout || "") + (err.stderr || "")) : (stdout || "");
|
|
270
276
|
const output = this._parseStreamJson(raw, false);
|
|
271
277
|
const exitCode = err ? (err.code === "ERR_CHILD_PROCESS_STDIO_MAXBUFFER" ? 1 : (err.killed ? 143 : (err.code || 1))) : 0;
|
|
@@ -277,6 +283,7 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
|
|
|
277
283
|
|
|
278
284
|
resolve({ output, exitCode, duration });
|
|
279
285
|
});
|
|
286
|
+
this.trackChild(child.pid);
|
|
280
287
|
});
|
|
281
288
|
}
|
|
282
289
|
|
|
@@ -505,9 +512,9 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
|
|
|
505
512
|
|
|
506
513
|
openBrowser(`http://localhost:${reviewPort}/review`);
|
|
507
514
|
|
|
508
|
-
// IRONCLAD GATE — JavaScript polling loop
|
|
515
|
+
// IRONCLAD GATE — JavaScript polling loop (breaks on Ctrl+C via _interrupted flag)
|
|
509
516
|
let healthCheckCounter = 0;
|
|
510
|
-
while (
|
|
517
|
+
while (!this._interrupted) {
|
|
511
518
|
if (fs.existsSync(signalPath)) {
|
|
512
519
|
try {
|
|
513
520
|
const data = JSON.parse(fs.readFileSync(signalPath, "utf8"));
|
|
@@ -602,6 +609,16 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
|
|
|
602
609
|
}
|
|
603
610
|
}
|
|
604
611
|
|
|
612
|
+
// ─── Child process tracking ──────────────────────────────────────────
|
|
613
|
+
|
|
614
|
+
trackChild(pid) {
|
|
615
|
+
if (pid) this._childPids.add(pid);
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
untrackChild(pid) {
|
|
619
|
+
if (pid) this._childPids.delete(pid);
|
|
620
|
+
}
|
|
621
|
+
|
|
605
622
|
// ─── Cleanup ─────────────────────────────────────────────────────────
|
|
606
623
|
|
|
607
624
|
cleanup(projectDir) {
|
|
@@ -610,13 +627,20 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
|
|
|
610
627
|
fs.writeFileSync(shutdownPath, JSON.stringify({ shutdown: true, at: new Date().toISOString() }));
|
|
611
628
|
} catch { /* ignore */ }
|
|
612
629
|
|
|
630
|
+
// Kill all tracked child processes (Claude spawns)
|
|
631
|
+
for (const pid of this._childPids) {
|
|
632
|
+
try { process.kill(pid, "SIGTERM"); } catch { /* already dead */ }
|
|
633
|
+
}
|
|
634
|
+
this._childPids.clear();
|
|
635
|
+
|
|
636
|
+
// Kill server processes
|
|
613
637
|
for (const pid of this.pids) {
|
|
614
638
|
if (pid) {
|
|
615
639
|
try { process.kill(pid); } catch { /* already dead */ }
|
|
616
640
|
try { process.kill(-pid); } catch { /* ignore */ }
|
|
617
641
|
}
|
|
618
642
|
}
|
|
619
|
-
dim("
|
|
643
|
+
dim("All processes stopped");
|
|
620
644
|
}
|
|
621
645
|
|
|
622
646
|
// ─── Main Pipeline ──────────────────────────────────────────────────
|
|
@@ -701,9 +725,10 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
|
|
|
701
725
|
this._activeDevPort = devPort;
|
|
702
726
|
}
|
|
703
727
|
|
|
704
|
-
// Register cleanup on exit
|
|
705
|
-
|
|
706
|
-
process.on("
|
|
728
|
+
// Register cleanup on exit — set flag so sync loops can break
|
|
729
|
+
this._interrupted = false;
|
|
730
|
+
process.on("SIGINT", () => { this._interrupted = true; this.cleanup(projectDir); process.exit(0); });
|
|
731
|
+
process.on("SIGTERM", () => { this._interrupted = true; this.cleanup(projectDir); process.exit(0); });
|
|
707
732
|
|
|
708
733
|
// 5. Determine starting phase
|
|
709
734
|
let startIdx = 0;
|
|
@@ -732,6 +757,88 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
|
|
|
732
757
|
|
|
733
758
|
heading(`Phase ${i + 1}/${phases.length}: ${phase} (${items.length} items)`);
|
|
734
759
|
|
|
760
|
+
// ── Element inventory validation (widgets/pages only) ─────────────
|
|
761
|
+
// Before building widgets or pages, validate that contracts only reference
|
|
762
|
+
// elements that actually exist. Auto-correct mismatches.
|
|
763
|
+
if (phase !== phases[0]) { // skip for the first phase (elements themselves)
|
|
764
|
+
const elemDir = path.join(projectDir, "src", "components", phases[0]);
|
|
765
|
+
const contractDir = path.join(projectDir, ".gsd-t", "contracts", "design", phase);
|
|
766
|
+
if (fs.existsSync(elemDir) && fs.existsSync(contractDir)) {
|
|
767
|
+
// Build inventory of available element kebab names
|
|
768
|
+
const availableElements = new Set();
|
|
769
|
+
try {
|
|
770
|
+
for (const f of fs.readdirSync(elemDir)) {
|
|
771
|
+
if (!f.endsWith(".vue") && !f.endsWith(".tsx")) continue;
|
|
772
|
+
const name = f.replace(/\.\w+$/, "");
|
|
773
|
+
const kebab = name.replace(/([a-z0-9])([A-Z])/g, "$1-$2").toLowerCase();
|
|
774
|
+
availableElements.add(kebab);
|
|
775
|
+
}
|
|
776
|
+
} catch { /* ignore */ }
|
|
777
|
+
|
|
778
|
+
if (availableElements.size > 0) {
|
|
779
|
+
info(`Validating ${phase} contracts against element inventory (${availableElements.size} elements)`);
|
|
780
|
+
let corrections = 0;
|
|
781
|
+
|
|
782
|
+
for (const cf of fs.readdirSync(contractDir)) {
|
|
783
|
+
if (!cf.endsWith(".contract.md")) continue;
|
|
784
|
+
const cfPath = path.join(contractDir, cf);
|
|
785
|
+
let content;
|
|
786
|
+
try { content = fs.readFileSync(cfPath, "utf8"); } catch { continue; }
|
|
787
|
+
|
|
788
|
+
// Find all element contract references in table cells
|
|
789
|
+
const refPattern = /\|\s*(chart-[a-z-]+|legend-[a-z-]+|stat-[a-z-]+|table-[a-z-]+|select-[a-z-]+|tabs-[a-z-]+|date-[a-z-]+|pagination|icon|tooltip)\s*\|/g;
|
|
790
|
+
let match;
|
|
791
|
+
const missing = [];
|
|
792
|
+
while ((match = refPattern.exec(content)) !== null) {
|
|
793
|
+
const ref = match[1].trim();
|
|
794
|
+
if (!availableElements.has(ref)) {
|
|
795
|
+
missing.push(ref);
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
if (missing.length > 0) {
|
|
800
|
+
// Find closest available element for each missing ref
|
|
801
|
+
const availArr = Array.from(availableElements);
|
|
802
|
+
for (const miss of missing) {
|
|
803
|
+
// Simple similarity: count shared words
|
|
804
|
+
const missWords = miss.split("-");
|
|
805
|
+
let bestMatch = null;
|
|
806
|
+
let bestScore = 0;
|
|
807
|
+
for (const avail of availArr) {
|
|
808
|
+
const availWords = avail.split("-");
|
|
809
|
+
// Count shared words
|
|
810
|
+
let shared = 0;
|
|
811
|
+
for (const w of missWords) {
|
|
812
|
+
if (availWords.includes(w)) shared++;
|
|
813
|
+
}
|
|
814
|
+
// Prefer same prefix (chart→chart, legend→legend)
|
|
815
|
+
if (missWords[0] === availWords[0]) shared += 2;
|
|
816
|
+
if (shared > bestScore) {
|
|
817
|
+
bestScore = shared;
|
|
818
|
+
bestMatch = avail;
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
if (bestMatch && bestScore >= 2) {
|
|
822
|
+
content = content.split(miss).join(bestMatch);
|
|
823
|
+
warn(` ${cf}: ${miss} → ${bestMatch} (auto-corrected)`);
|
|
824
|
+
corrections++;
|
|
825
|
+
} else {
|
|
826
|
+
warn(` ${cf}: ${miss} not found, no close match available`);
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
try { fs.writeFileSync(cfPath, content); } catch { /* ignore */ }
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
if (corrections > 0) {
|
|
834
|
+
success(`Auto-corrected ${corrections} element reference(s) in ${phase} contracts`);
|
|
835
|
+
} else {
|
|
836
|
+
info(`All ${phase} contracts reference valid elements`);
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
|
|
735
842
|
state.currentPhase = phase;
|
|
736
843
|
this.saveState(projectDir, state);
|
|
737
844
|
|
|
@@ -1029,11 +1136,12 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
|
|
|
1029
1136
|
}
|
|
1030
1137
|
}
|
|
1031
1138
|
|
|
1032
|
-
// 6e. Human review cycle
|
|
1139
|
+
// 6e. Human review cycle — unlimited (human decides when to approve)
|
|
1140
|
+
// After each human fix, auto-review runs again with a fresh cycle counter
|
|
1033
1141
|
let reviewCycle = 0;
|
|
1034
1142
|
let allApproved = false;
|
|
1035
1143
|
|
|
1036
|
-
while (
|
|
1144
|
+
while (!allApproved) {
|
|
1037
1145
|
const queueCount = this.queuePhaseItems(projectDir, phase, items, measurements);
|
|
1038
1146
|
this.waitForReview(projectDir, phase, queueCount, reviewPort);
|
|
1039
1147
|
|
|
@@ -1046,19 +1154,97 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
|
|
|
1046
1154
|
success(`All ${phase} approved!`);
|
|
1047
1155
|
} else {
|
|
1048
1156
|
reviewCycle++;
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1157
|
+
info(`Human review cycle ${reviewCycle} — applying ${feedback.needsWork.length} fixes...`);
|
|
1158
|
+
const fixPrompt = this.wf.buildFixPrompt
|
|
1159
|
+
? this.wf.buildFixPrompt(phase, feedback.needsWork)
|
|
1160
|
+
: this._defaultFixPrompt(phase, feedback.needsWork);
|
|
1161
|
+
const fixResult = this.spawnClaude(projectDir, fixPrompt, opts.timeout || 600_000, { label: `${phase}-human-fix-c${reviewCycle}` });
|
|
1162
|
+
if (fixResult.exitCode === 0) success("Fixes applied");
|
|
1163
|
+
else warn(`Fix attempt returned code ${fixResult.exitCode}`);
|
|
1164
|
+
|
|
1165
|
+
// Re-measure after human fix
|
|
1166
|
+
if (!skipMeasure && this.wf.measure) {
|
|
1167
|
+
info("Re-measuring after human fix...");
|
|
1168
|
+
measurements = this.wf.measure(projectDir, phase, items, { devPort, reviewPort }) || {};
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
// Re-run auto-review with fresh cycle counter
|
|
1172
|
+
if (this.wf.buildReviewPrompt || this.wf.buildSingleItemReviewPrompt) {
|
|
1173
|
+
let autoReviewCycle2 = 0;
|
|
1174
|
+
let autoReviewClean2 = false;
|
|
1175
|
+
|
|
1176
|
+
while (autoReviewCycle2 < maxAutoReviewCycles && !autoReviewClean2) {
|
|
1177
|
+
autoReviewCycle2++;
|
|
1178
|
+
heading(`Post-Fix Automated Review — ${phase} (cycle ${autoReviewCycle2}/${maxAutoReviewCycles})`);
|
|
1179
|
+
let issues = [];
|
|
1180
|
+
|
|
1181
|
+
if (this.wf.buildSingleItemReviewPrompt) {
|
|
1182
|
+
const reviewTimeout = this.wf.defaults?.perItemReviewTimeout || 120_000;
|
|
1183
|
+
const perItemTimeout = this.wf.defaults?.perItemTimeout || 300_000;
|
|
1184
|
+
let totalDuration = 0;
|
|
1185
|
+
for (let idx = 0; idx < items.length; idx++) {
|
|
1186
|
+
const item = items[idx];
|
|
1187
|
+
const itemMeasurements = { [item.id]: measurements[item.id] || [] };
|
|
1188
|
+
const reviewPrompt = this.wf.buildSingleItemReviewPrompt(phase, item, itemMeasurements, projectDir, { devPort, reviewPort });
|
|
1189
|
+
dim(` [${idx + 1}/${items.length}] ${item.componentName}...`);
|
|
1190
|
+
const reviewResult = this.spawnClaude(projectDir, reviewPrompt, Math.min(reviewTimeout, perItemTimeout), { label: `${phase}-postreview-c${autoReviewCycle2}-${item.id}` });
|
|
1191
|
+
totalDuration += reviewResult.duration;
|
|
1192
|
+
|
|
1193
|
+
const isCrash = reviewResult.exitCode !== 0 && reviewResult.duration < 10;
|
|
1194
|
+
const isKilled = [143, 137].includes(reviewResult.exitCode);
|
|
1195
|
+
const isEmptyFail = reviewResult.exitCode !== 0 && !reviewResult.output.trim();
|
|
1196
|
+
|
|
1197
|
+
if (isCrash || isKilled || isEmptyFail) {
|
|
1198
|
+
issues.push({ component: item.componentName, severity: "critical", description: `Reviewer ${isCrash ? "crashed" : isKilled ? "killed/timed out" : "failed"} — review not performed` });
|
|
1199
|
+
} else {
|
|
1200
|
+
const itemIssues = this.wf.parseReviewResult
|
|
1201
|
+
? this.wf.parseReviewResult(reviewResult.output, phase)
|
|
1202
|
+
: this._parseDefaultReviewResult(reviewResult.output);
|
|
1203
|
+
if (itemIssues.length > 0) {
|
|
1204
|
+
warn(` ${item.componentName}: ${itemIssues.length} issue(s) (${reviewResult.duration}s)`);
|
|
1205
|
+
issues.push(...itemIssues);
|
|
1206
|
+
} else {
|
|
1207
|
+
success(` ${item.componentName}: clean (${reviewResult.duration}s)`);
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1210
|
+
}
|
|
1211
|
+
log(`\n Total review time: ${totalDuration}s for ${items.length} items`);
|
|
1212
|
+
} else {
|
|
1213
|
+
const reviewPrompt = this.wf.buildReviewPrompt(phase, items, measurements, projectDir, { devPort, reviewPort });
|
|
1214
|
+
const reviewResult = this.spawnClaude(projectDir, reviewPrompt, this.wf.defaults?.reviewTimeout || 300_000, { label: `${phase}-postreview-cycle${autoReviewCycle2}` });
|
|
1215
|
+
const isCrash = reviewResult.exitCode !== 0 && reviewResult.duration < 10;
|
|
1216
|
+
const isKilled = [143, 137].includes(reviewResult.exitCode);
|
|
1217
|
+
const isEmptyFail = reviewResult.exitCode !== 0 && !reviewResult.output.trim();
|
|
1218
|
+
if (isCrash || isKilled || isEmptyFail) {
|
|
1219
|
+
issues = [{ component: "ALL", severity: "critical", description: `Reviewer failed with exit code ${reviewResult.exitCode}` }];
|
|
1220
|
+
} else {
|
|
1221
|
+
issues = this.wf.parseReviewResult
|
|
1222
|
+
? this.wf.parseReviewResult(reviewResult.output, phase)
|
|
1223
|
+
: this._parseDefaultReviewResult(reviewResult.output);
|
|
1224
|
+
}
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
if (issues.length === 0) {
|
|
1228
|
+
autoReviewClean2 = true;
|
|
1229
|
+
success(`Post-fix automated review passed — no issues found`);
|
|
1230
|
+
} else {
|
|
1231
|
+
warn(`Post-fix review found ${issues.length} issue(s)`);
|
|
1232
|
+
if (autoReviewCycle2 < maxAutoReviewCycles) {
|
|
1233
|
+
const fixPrompt = this.wf.buildAutoFixPrompt
|
|
1234
|
+
? this.wf.buildAutoFixPrompt(phase, issues, items, projectDir)
|
|
1235
|
+
: this._defaultAutoFixPrompt(phase, issues);
|
|
1236
|
+
log(`\n${CYAN} ⚙${RESET} Spawning fixer for ${issues.length} issue(s)...`);
|
|
1237
|
+
this.spawnClaude(projectDir, fixPrompt, opts.timeout || 600_000, { label: `${phase}-postfix-cycle${autoReviewCycle2}` });
|
|
1238
|
+
if (!skipMeasure && this.wf.measure) {
|
|
1239
|
+
measurements = this.wf.measure(projectDir, phase, items, { devPort, reviewPort }) || {};
|
|
1240
|
+
}
|
|
1241
|
+
} else {
|
|
1242
|
+
warn(`Max post-fix auto-review cycles reached — remaining issues go to next human review`);
|
|
1243
|
+
}
|
|
1244
|
+
}
|
|
1245
|
+
}
|
|
1061
1246
|
}
|
|
1247
|
+
// Loop continues → re-queue for human review
|
|
1062
1248
|
}
|
|
1063
1249
|
}
|
|
1064
1250
|
|
|
@@ -151,9 +151,33 @@ Icons, badges, chips, dividers, avatars, status dots, spinners — every small a
|
|
|
151
151
|
|
|
152
152
|
## Step 3: Identify Widgets
|
|
153
153
|
|
|
154
|
-
A **widget** is a
|
|
154
|
+
A **widget** is a self-contained card with ONE headline job: one title, one body, optional header controls, optional footer/legend. Examples: "Revenue Breakdown" (donut + legend + title + filter), "Device Type" (donut + legend), "Number of Tools" (KPI + bar + legend).
|
|
155
155
|
|
|
156
|
-
|
|
156
|
+
A **section** is a visual grouping of MULTIPLE widgets that share a common heading or layout container. Sections live in the page contract's layout — they are NOT widgets.
|
|
157
|
+
|
|
158
|
+
### The Sub-Card Rule (MANDATORY)
|
|
159
|
+
|
|
160
|
+
**If a visual grouping contains multiple titled sub-cards (each with its own h3/header and its own body), each sub-card is its own widget. The grouping is a section handled in the page layout phase.**
|
|
161
|
+
|
|
162
|
+
```
|
|
163
|
+
WRONG — one widget conflating three cards:
|
|
164
|
+
device-browser-widget
|
|
165
|
+
├── sub-card "Device Type" (donut)
|
|
166
|
+
├── sub-card "Operating System" (bar)
|
|
167
|
+
└── sub-card "Browser" (bar)
|
|
168
|
+
|
|
169
|
+
RIGHT — three widgets + a page-level section:
|
|
170
|
+
device-type-widget ← widget
|
|
171
|
+
operating-system-widget ← widget
|
|
172
|
+
browser-widget ← widget
|
|
173
|
+
device-browser-section ← page-layout section grouping the 3 widgets
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
**Test**: Count the number of distinct titled headers (h3 / card title) inside the visual group. If > 1, it is a section, not a widget. Split it.
|
|
177
|
+
|
|
178
|
+
### Widget vs. Page-Internal Composition
|
|
179
|
+
|
|
180
|
+
For each candidate widget, determine:
|
|
157
181
|
- Does it appear on ≥2 pages, OR is it clearly a reusable unit conceptually?
|
|
158
182
|
- Yes → widget contract
|
|
159
183
|
- No → page-internal composition (no widget contract needed)
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# Context Budget Recovery Plan
|
|
2
|
+
|
|
3
|
+
**Generated**: 2026-04-13
|
|
4
|
+
**Tool**: `bin/context-budget-audit.js`
|
|
5
|
+
**Symptom**: Manual `/compact` prompts started ~2026-04-10, now constant. Long-running unattended tasks stop mid-build with no notification.
|
|
6
|
+
|
|
7
|
+
## Calibrated baseline
|
|
8
|
+
|
|
9
|
+
| Layer | Tokens | % of 200K window |
|
|
10
|
+
|-------|--------|------------------|
|
|
11
|
+
| Claude Code system prompt + tool schemas | 15,600 | 7.8% |
|
|
12
|
+
| Global `~/.claude/CLAUDE.md` | 9,679 | 4.8% |
|
|
13
|
+
| Project `CLAUDE.md` | 3,506 | 1.8% |
|
|
14
|
+
| Auto-memory (10 files) | 3,201 | 1.6% |
|
|
15
|
+
| Skill manifest (112 commands × 200 chars) | 5,600 | 2.8% |
|
|
16
|
+
| MCP tool manifest | 660 | 0.3% |
|
|
17
|
+
| **Static preamble total** | **~38,250** | **19.1%** |
|
|
18
|
+
|
|
19
|
+
**Conclusion**: Preamble itself is healthy. The problem is **per-invocation cost**.
|
|
20
|
+
|
|
21
|
+
## Per-invocation cost (where the regression actually hides)
|
|
22
|
+
|
|
23
|
+
A typical `/user:gsd-t-execute` invocation loads:
|
|
24
|
+
|
|
25
|
+
| Item | Tokens | Notes |
|
|
26
|
+
|------|--------|-------|
|
|
27
|
+
| `gsd-t-execute.md` body | 16,875 | Loaded on skill invocation |
|
|
28
|
+
| `.gsd-t/progress.md` | **51,098** | ⚠️ exceeds Read's 10K limit; agent has to chunked-read |
|
|
29
|
+
| `.gsd-t/contracts/*.md` | ~5,000-10,000 | Varies by milestone |
|
|
30
|
+
| Domain `scope.md` + `tasks.md` + `constraints.md` | ~3,000-8,000 | Per active domain |
|
|
31
|
+
| `docs/architecture.md` | ~5,000-15,000 | Re-read on every step |
|
|
32
|
+
| `docs/requirements.md` | ~5,000-15,000 | Re-read on every step |
|
|
33
|
+
| Subagent spawn (Task tool) overhead | ~5,000-10,000 | Per spawn |
|
|
34
|
+
| Bash output forwarding (test runs, builds) | ~5,000-50,000 | Wide variance |
|
|
35
|
+
| **Per-invocation typical** | **~95,000-175,000** | + 38K preamble = 133K-213K |
|
|
36
|
+
|
|
37
|
+
**This is why you hit compaction.** A single execute call can blow past 200K in one phase, even with healthy preamble.
|
|
38
|
+
|
|
39
|
+
## The two highest-leverage cuts
|
|
40
|
+
|
|
41
|
+
### CUT #1: Archive old milestones from `progress.md` (saves ~40,000 tokens per invocation)
|
|
42
|
+
|
|
43
|
+
**File**: `.gsd-t/progress.md`
|
|
44
|
+
**Current size**: 51,098 tokens
|
|
45
|
+
**Target size**: <10,000 tokens
|
|
46
|
+
**Method**:
|
|
47
|
+
- Move all COMPLETED milestones older than the last 2 to `.gsd-t/milestones/archive-2026-04.md`
|
|
48
|
+
- Keep only: current active milestone, most recent 2 completed (for context), and the Decision Log for the last 30 days
|
|
49
|
+
- Older Decision Log entries → `.gsd-t/decision-log-archive.md`
|
|
50
|
+
|
|
51
|
+
**Files to write a script for**: `bin/archive-progress.js` (one-shot, idempotent)
|
|
52
|
+
|
|
53
|
+
**Estimated reclaim**: 40,000 tokens per invocation × every command = enormous. This single change probably solves 80% of the problem.
|
|
54
|
+
|
|
55
|
+
### CUT #2: Slim `gsd-t-execute.md` (saves ~10,000 tokens per execute call)
|
|
56
|
+
|
|
57
|
+
**File**: `commands/gsd-t-execute.md`
|
|
58
|
+
**Current size**: 16,875 tokens (largest command file)
|
|
59
|
+
**Target size**: ~6,000 tokens
|
|
60
|
+
**Method**:
|
|
61
|
+
- Extract the OBSERVABILITY LOGGING block (~2,000 tokens, repeated 5+ times in the file) into `templates/observability-logging-snippet.md` and reference it once
|
|
62
|
+
- Extract the QA Subagent prompt block (~1,500 tokens, copy-pasted in 4 commands) into `templates/qa-subagent-prompt.md`
|
|
63
|
+
- Extract the Red Team prompt block (~1,500 tokens) into `templates/red-team-prompt.md`
|
|
64
|
+
- Extract the Design Verification prompt block (~2,000 tokens) into `templates/design-verification-prompt.md`
|
|
65
|
+
- Replace duplications in `gsd-t-execute.md`, `gsd-t-quick.md`, `gsd-t-integrate.md`, `gsd-t-debug.md`, `gsd-t-wave.md`, `gsd-t-complete-milestone.md` with single-line references like `> See: templates/qa-subagent-prompt.md`
|
|
66
|
+
- Convert step-by-step prose into terse bullet form where possible
|
|
67
|
+
- Remove explanatory paragraphs that duplicate `docs/methodology.md`
|
|
68
|
+
|
|
69
|
+
**Estimated reclaim**:
|
|
70
|
+
- `gsd-t-execute.md`: 16,875 → 6,000 = **−10,875 tokens**
|
|
71
|
+
- `gsd-t-quick.md`: 6,553 → 3,000 = **−3,553 tokens**
|
|
72
|
+
- `gsd-t-integrate.md`: 5,005 → 2,500 = **−2,505 tokens**
|
|
73
|
+
- `gsd-t-debug.md`: 7,015 → 3,500 = **−3,515 tokens**
|
|
74
|
+
- `gsd-t-wave.md`: 6,111 → 3,000 = **−3,111 tokens**
|
|
75
|
+
- `gsd-t-complete-milestone.md`: 6,362 → 3,000 = **−3,362 tokens**
|
|
76
|
+
- **Total per session if these commands invoked**: −26,921 tokens
|
|
77
|
+
|
|
78
|
+
Note: these savings only apply when a command is invoked. If you only ever run `quick`, you save 3,553 tokens; if you run `wave` you save up to ~27K across the cascade.
|
|
79
|
+
|
|
80
|
+
## Medium-leverage cuts
|
|
81
|
+
|
|
82
|
+
### CUT #3: Slim global `~/.claude/CLAUDE.md` (saves 5,000 tokens always-on)
|
|
83
|
+
|
|
84
|
+
**File**: `~/.claude/CLAUDE.md`
|
|
85
|
+
**Current size**: 9,679 tokens (4.8% of window — always loaded)
|
|
86
|
+
**Target size**: ~4,500 tokens
|
|
87
|
+
**Method**:
|
|
88
|
+
- The entire "Commands Reference" table (51 rows) duplicates `commands/gsd-t-help.md` — DELETE the table, replace with `Run /user:gsd-t-help for the full command list.`
|
|
89
|
+
- The "Update Notices" / "Auto-Init Guard" / "Playwright Readiness Guard" / "QA Agent" / "Design Verification Agent" / "Red Team" sections (~3,000 tokens combined) are duplicated in the relevant command files. Move them to the command files only and replace with one-line summaries here.
|
|
90
|
+
- The "Pre-Commit Gate" decision tree (~1,500 tokens) is also in project CLAUDE.md — keep one, link from the other
|
|
91
|
+
- The "Document Ripple Completion Gate" (~1,500 tokens) is duplicated in `gsd-t-doc-ripple.md` — reference only
|
|
92
|
+
|
|
93
|
+
**Estimated reclaim**: −5,000 tokens permanent baseline. Drops static preamble from 19.1% → 16.6%.
|
|
94
|
+
|
|
95
|
+
### CUT #4: Slim `docs/architecture.md` and `docs/requirements.md` reads
|
|
96
|
+
|
|
97
|
+
**Problem**: These are read on every command, but only sections relevant to the current domain are needed.
|
|
98
|
+
**Method**:
|
|
99
|
+
- Add table-of-contents anchors at the top of each
|
|
100
|
+
- Update command files to use `Read` with `offset`/`limit` to load only the relevant section, not the whole file
|
|
101
|
+
- For very large architecture docs, split into `docs/architecture/` directory with one file per subsystem
|
|
102
|
+
|
|
103
|
+
**Estimated reclaim**: 5,000-15,000 tokens per invocation, project-dependent.
|
|
104
|
+
|
|
105
|
+
### CUT #5: Bash output truncation in command files
|
|
106
|
+
|
|
107
|
+
**Problem**: When a command runs `npm test` or `playwright test`, the entire stdout (often 5,000-50,000 tokens) gets forwarded into context.
|
|
108
|
+
**Method**:
|
|
109
|
+
- Add a `bin/log-tail.js` helper: `bash:: { command }; tail -100 .gsd-t/last-build.log`
|
|
110
|
+
- Update test/build steps in command files to write full output to a log file and only forward the tail
|
|
111
|
+
- Failures: increase tail to 500 lines
|
|
112
|
+
|
|
113
|
+
**Estimated reclaim**: 5,000-30,000 tokens per build cycle.
|
|
114
|
+
|
|
115
|
+
## Low-leverage but easy
|
|
116
|
+
|
|
117
|
+
### CUT #6: Trim `gsd-t-help.md` (saves 4,000 tokens when invoked)
|
|
118
|
+
|
|
119
|
+
**File**: `commands/gsd-t-help.md`
|
|
120
|
+
**Current size**: 7,067 tokens
|
|
121
|
+
**Method**: The full command table with summaries duplicates the project CLAUDE.md table and the README table. Pick one source of truth.
|
|
122
|
+
|
|
123
|
+
### CUT #7: Auto-memory hygiene
|
|
124
|
+
|
|
125
|
+
**Files**: `~/.claude/projects/-Users-david-projects-GSD-T/memory/*.md`
|
|
126
|
+
**Current size**: 3,201 tokens (10 files, always-on)
|
|
127
|
+
**Method**: Already well-managed; no action needed unless it grows past ~5,000 tokens.
|
|
128
|
+
|
|
129
|
+
## Total potential reclaim
|
|
130
|
+
|
|
131
|
+
| Type | Always-on savings | Per-invocation savings |
|
|
132
|
+
|------|-------------------|------------------------|
|
|
133
|
+
| CUT #1 (archive progress.md) | — | **−40,000** |
|
|
134
|
+
| CUT #2 (slim execute & friends) | — | **−27,000** (if wave) |
|
|
135
|
+
| CUT #3 (slim global CLAUDE.md) | **−5,000** | — |
|
|
136
|
+
| CUT #4 (chunked doc reads) | — | −10,000 |
|
|
137
|
+
| CUT #5 (bash output truncation) | — | −15,000 |
|
|
138
|
+
| CUT #6 (slim gsd-t-help.md) | — | −4,000 |
|
|
139
|
+
| **Subtotal** | **−5,000** | **−96,000** |
|
|
140
|
+
|
|
141
|
+
**Net effect**: A typical `gsd-t-execute` call drops from ~133K tokens to ~37K tokens. Compaction prompt becomes mathematically impossible for normal workflows.
|
|
142
|
+
|
|
143
|
+
## Order of operations (recommended)
|
|
144
|
+
|
|
145
|
+
1. **Build `bin/archive-progress.js`** (1 hour) — gives 40K reclaim immediately
|
|
146
|
+
2. **Slim global CLAUDE.md** (30 min) — gives 5K permanent reclaim
|
|
147
|
+
3. **Extract observability/QA/red-team templates** (2 hours) — gives 27K reclaim per execute
|
|
148
|
+
4. **Bash output truncation helper** (1 hour) — gives 15K reclaim per build
|
|
149
|
+
5. **Chunked doc reads** (2 hours) — gives 10K reclaim per command
|
|
150
|
+
6. **Slim gsd-t-help.md** (15 min) — gives 4K reclaim when invoked
|
|
151
|
+
|
|
152
|
+
Total time: ~6.75 hours of focused work.
|
|
153
|
+
|
|
154
|
+
## What this plan does NOT solve
|
|
155
|
+
|
|
156
|
+
The user's stated requirement was: **"long-running unattended tasks must not silently stop."** Even with all cuts above applied, you can still hit compaction on a sufficiently large job. The plan above buys you 3-4× more headroom — but the durable fix for unattended runs is still:
|
|
157
|
+
|
|
158
|
+
- **Loud-stop hook**: write a sentinel file + audible alert when context > 75%, so the user knows on return
|
|
159
|
+
- **Subprocess orchestrator**: route long builds through `bin/orchestrator.js` with `claude -p` subprocesses (each gets a fresh context); parent coordinates via files
|
|
160
|
+
|
|
161
|
+
These should be a separate milestone after the cuts above are applied.
|
|
162
|
+
|
|
163
|
+
## Verification
|
|
164
|
+
|
|
165
|
+
After each cut, re-run:
|
|
166
|
+
```
|
|
167
|
+
node bin/context-budget-audit.js --top 15 --threshold 3000
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Track the "Static preamble cost" line over time. Target: keep it below 20% always; keep `progress.md` below 10K tokens always.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tekyzinc/gsd-t",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.74.10",
|
|
4
4
|
"description": "GSD-T: Contract-Driven Development for Claude Code — 56 slash commands with headless CI/CD mode, graph-powered code analysis, real-time agent dashboard, execution intelligence, task telemetry, doc-ripple enforcement, backlog management, impact analysis, test sync, milestone archival, and PRD generation",
|
|
5
5
|
"author": "Tekyz, Inc.",
|
|
6
6
|
"license": "MIT",
|