@tekyzinc/gsd-t 2.71.15 → 2.71.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,21 @@
2
2
 
3
3
  All notable changes to GSD-T are documented here. Updated with each release.
4
4
 
5
+ ## [2.71.17] - 2026-04-08
6
+
7
+ ### Fixed (orchestrator — auto-review cycle limit)
8
+ - **Bumped maxAutoReviewCycles from 2 to 4** — 2 cycles was too conservative for complex components (e.g., charts with multiple contract properties). 4 cycles gives the reviewer/fixer loop enough iterations to converge.
9
+
10
+ ## [2.71.16] - 2026-04-08
11
+
12
+ ### Added (orchestrator — automated AI review loop)
13
+ - **Automated review before human review** — orchestrator now spawns an independent reviewer Claude (no builder context) that compares built components against design contracts. If issues found, spawns a fixer Claude, re-measures, and re-reviews (max 2 cycles). Only after automated review passes do items reach human review. This is the Term 2 equivalent, running deterministically in JavaScript.
14
+ - **Review report persistence** — each auto-review cycle writes results to `.gsd-t/design-review/auto-review/`. Unresolved issues are written to `{phase}-unresolved.json` for human visibility.
15
+ - **Structured review output** — reviewer uses `[REVIEW_ISSUES]` markers for reliable parsing. Fallback parser catches DEVIATION/FAIL/CRITICAL keywords.
16
+
17
+ ### Pipeline (updated)
18
+ Build → Measure → **Automated AI Review** (reviewer → fixer → re-review loop) → Human Review → Next Tier
19
+
5
20
  ## [2.71.15] - 2026-04-08
6
21
 
7
22
  ### Changed (design-build command → orchestrator delegate)
@@ -300,6 +300,91 @@ function buildFixPrompt(phase, needsWork) {
300
300
  return `Apply these specific fixes to ${phase} components:\n\n${fixes}\n\nApply the changes and EXIT. Do not rebuild anything else.`;
301
301
  }
302
302
 
303
+ // ─── Automated AI Review (Term 2 equivalent) ───────────────────────────────
304
+
305
+ function buildReviewPrompt(phase, items, measurements, projectDir, ports) {
306
+ const singular = PHASE_SINGULAR[phase];
307
+ const contractsDir = path.join(projectDir, CONTRACTS_DIR);
308
+
309
+ const componentList = items.map(c => {
310
+ const sourcePath = c.sourcePath || guessPaths(phase, c);
311
+ return `- **${c.componentName}** — contract: ${c.fullContractPath}, source: ${sourcePath}, selector: \`${c.selector || "." + c.id}\``;
312
+ }).join("\n");
313
+
314
+ // Include any measurement failures for context
315
+ const failedMeasurements = [];
316
+ for (const item of items) {
317
+ const m = measurements[item.id] || [];
318
+ const failures = m.filter(x => !x.pass);
319
+ if (failures.length > 0) {
320
+ failedMeasurements.push(`- ${item.componentName}: ${failures.map(f => `${f.property}: expected ${f.expected}, got ${f.actual}`).join("; ")}`);
321
+ }
322
+ }
323
+ const measurementContext = failedMeasurements.length > 0
324
+ ? `\n## Known Measurement Failures\nPlaywright already detected these — verify they are real issues:\n${failedMeasurements.join("\n")}\n`
325
+ : "";
326
+
327
+ return `You are an INDEPENDENT design reviewer. You have NO knowledge of how these components were built. Your job is to compare the built ${phase} against their design contracts and find deviations.
328
+
329
+ ## Components to Review
330
+ ${componentList}
331
+
332
+ ${measurementContext}
333
+ ## Review Process
334
+
335
+ For EACH component:
336
+ 1. Read the design contract file (path given above) — note every specified property value
337
+ 2. Read the source file — check that specified values are implemented correctly
338
+ 3. Use Playwright to render the component at http://localhost:${ports.reviewPort}/ and measure:
339
+ - Does the component render and have correct dimensions?
340
+ - Do colors, fonts, spacing, border-radius match the contract?
341
+ - For charts: correct chart type, orientation, axis labels, legend position, data format?
342
+ - For layouts: correct grid columns, gap, padding, child count and arrangement?
343
+ - For interactive elements: correct states, hover effects, click behavior?
344
+ 4. Compare contract values against actual rendered values — be SPECIFIC (exact px, hex, counts)
345
+
346
+ ## Output Format
347
+
348
+ Output your findings between these markers. Each issue must have component, severity (critical/high/medium/low), and description with SPECIFIC contract vs. actual values:
349
+
350
+ [REVIEW_ISSUES]
351
+ [
352
+ {"component": "ComponentName", "severity": "critical", "description": "Contract specifies donut chart but rendered as pie chart (no inner radius)"},
353
+ {"component": "ComponentName", "severity": "high", "description": "Grid gap: contract 16px, actual 24px"}
354
+ ]
355
+ [/REVIEW_ISSUES]
356
+
357
+ If ALL components match their contracts, output:
358
+ [REVIEW_ISSUES]
359
+ []
360
+ [/REVIEW_ISSUES]
361
+
362
+ ## Rules
363
+ - You write ZERO code. You ONLY review.
364
+ - Be HARSH. Your value is in catching what the builder missed.
365
+ - NEVER say "looks close" or "appears to match" — give SPECIFIC values.
366
+ - Every contract property must be verified. Missing verification = missed issue.
367
+ - Severity guide: critical = wrong component type, missing element, broken render. high = wrong dimensions, colors, layout. medium = spacing/padding off. low = minor visual difference.`;
368
+ }
369
+
370
+ function buildAutoFixPrompt(phase, issues, items, projectDir) {
371
+ const issueList = issues.map((issue, i) => {
372
+ const item = items.find(c => c.componentName === issue.component);
373
+ const contractPath = item ? item.fullContractPath : "check .gsd-t/contracts/design/";
374
+ return `${i + 1}. [${issue.severity}] **${issue.component}** — ${issue.description}\n Contract: ${contractPath}`;
375
+ }).join("\n");
376
+
377
+ return `The automated design reviewer found these issues. Fix each one by reading the design contract and correcting the implementation.
378
+
379
+ ## Issues to Fix
380
+ ${issueList}
381
+
382
+ ## Rules
383
+ - Read each component's design contract for the correct values — do NOT guess
384
+ - Fix ONLY the listed issues — do not modify other components or add features
385
+ - After fixing all issues, EXIT. Do not start servers or ask for review.`;
386
+ }
387
+
303
388
  // ─── Summary ────────────────────────────────────────────────────────────────
304
389
 
305
390
  function formatSummary(phase, result) {
@@ -329,11 +414,13 @@ ${BOLD}Pipeline:${RESET}
329
414
  1. Read contracts from .gsd-t/contracts/design/
330
415
  2. Start dev server + review server
331
416
  3. For each tier (elements → widgets → pages):
332
- a. Spawn Claude to build components
417
+ a. Spawn Claude (builder) to build components from contracts
333
418
  b. Measure with Playwright
334
- c. Queue for human review
335
- d. Wait for review submission (blocks until human approves)
336
- e. Process feedback, proceed to next tier
419
+ c. Spawn Claude (reviewer) to compare against contracts — independent, no builder context
420
+ d. If reviewer finds issues → spawn Claude (fixer) re-measure → re-review (max 2 cycles)
421
+ e. Queue for human review (only after automated review passes)
422
+ f. Wait for human review submission (blocks until human approves)
423
+ g. Process feedback, proceed to next tier
337
424
  `);
338
425
  }
339
426
 
@@ -351,11 +438,15 @@ const designBuildWorkflow = {
351
438
  timeout: 600_000,
352
439
  devServerTimeout: 30_000,
353
440
  maxReviewCycles: 3,
441
+ maxAutoReviewCycles: 4,
442
+ reviewTimeout: 300_000,
354
443
  },
355
444
  completionMessage: "All done. Run your app to verify: npm run dev",
356
445
 
357
446
  discoverWork,
358
447
  buildPrompt,
448
+ buildReviewPrompt,
449
+ buildAutoFixPrompt,
359
450
  measure,
360
451
  buildQueueItem,
361
452
  buildFixPrompt,
@@ -576,7 +576,78 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
576
576
  measurements = this.wf.measure(projectDir, phase, items, { devPort, reviewPort }) || {};
577
577
  }
578
578
 
579
- // 6e. Review cycle
579
+ // 6d.5. Automated AI review loop (Term 2 equivalent)
580
+ // Spawns an independent reviewer Claude that compares built output against contracts.
581
+ // If issues found → spawn fixer Claude → re-measure → re-review until clean.
582
+ const maxAutoReviewCycles = this.wf.defaults?.maxAutoReviewCycles || 4;
583
+ if (this.wf.buildReviewPrompt) {
584
+ let autoReviewCycle = 0;
585
+ let autoReviewClean = false;
586
+
587
+ while (autoReviewCycle < maxAutoReviewCycles && !autoReviewClean) {
588
+ autoReviewCycle++;
589
+ heading(`Automated Review — ${phase} (cycle ${autoReviewCycle}/${maxAutoReviewCycles})`);
590
+
591
+ // Spawn reviewer Claude — independent, no builder context
592
+ const reviewPrompt = this.wf.buildReviewPrompt(phase, items, measurements, projectDir, { devPort, reviewPort });
593
+ log(`\n${CYAN} ⚙${RESET} Spawning reviewer Claude for ${phase}...`);
594
+ const reviewTimeout = this.wf.defaults?.reviewTimeout || 300_000;
595
+ const reviewResult = this.spawnClaude(projectDir, reviewPrompt, reviewTimeout);
596
+
597
+ // Parse reviewer output for issues
598
+ const issues = this.wf.parseReviewResult
599
+ ? this.wf.parseReviewResult(reviewResult.output, phase)
600
+ : this._parseDefaultReviewResult(reviewResult.output);
601
+
602
+ if (reviewResult.exitCode === 0) {
603
+ success(`Reviewer finished in ${reviewResult.duration}s`);
604
+ } else {
605
+ warn(`Reviewer exited with code ${reviewResult.exitCode} after ${reviewResult.duration}s`);
606
+ }
607
+
608
+ // Write review report
609
+ const reportDir = path.join(this.getReviewDir(projectDir), "auto-review");
610
+ ensureDir(reportDir);
611
+ fs.writeFileSync(
612
+ path.join(reportDir, `${phase}-cycle-${autoReviewCycle}.json`),
613
+ JSON.stringify({ cycle: autoReviewCycle, issues, output: reviewResult.output.slice(0, 5000) }, null, 2)
614
+ );
615
+
616
+ if (issues.length === 0) {
617
+ autoReviewClean = true;
618
+ success(`Automated review passed — no issues found in ${phase}`);
619
+ } else {
620
+ warn(`Automated review found ${issues.length} issue(s) in ${phase}`);
621
+ for (const issue of issues) {
622
+ dim(`${issue.component || "?"}: ${issue.description || issue.reason || "issue"} [${issue.severity || "medium"}]`);
623
+ }
624
+
625
+ if (autoReviewCycle < maxAutoReviewCycles) {
626
+ // Spawn fixer Claude with the issues
627
+ const fixPrompt = this.wf.buildAutoFixPrompt
628
+ ? this.wf.buildAutoFixPrompt(phase, issues, items, projectDir)
629
+ : this._defaultAutoFixPrompt(phase, issues);
630
+
631
+ log(`\n${CYAN} ⚙${RESET} Spawning fixer Claude for ${issues.length} issue(s)...`);
632
+ const fixResult = this.spawnClaude(projectDir, fixPrompt, 120_000);
633
+ if (fixResult.exitCode === 0) success(`Fixer finished in ${fixResult.duration}s`);
634
+ else warn(`Fixer exited with code ${fixResult.exitCode}`);
635
+
636
+ // Re-measure after fixes
637
+ if (!skipMeasure && this.wf.measure) {
638
+ measurements = this.wf.measure(projectDir, phase, items, { devPort, reviewPort }) || {};
639
+ }
640
+ } else {
641
+ warn(`Max auto-review cycles reached — ${issues.length} issue(s) will go to human review`);
642
+ // Attach unresolved issues to measurements for human visibility
643
+ const issueFile = path.join(this.getReviewDir(projectDir), "auto-review", `${phase}-unresolved.json`);
644
+ fs.writeFileSync(issueFile, JSON.stringify(issues, null, 2));
645
+ }
646
+ }
647
+ }
648
+ }
649
+
650
+ // 6e. Human review cycle
580
651
  let reviewCycle = 0;
581
652
  let allApproved = false;
582
653
 
@@ -655,6 +726,43 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
655
726
  };
656
727
  }
657
728
 
729
+ _parseDefaultReviewResult(output) {
730
+ // Try to parse JSON issues array from reviewer output
731
+ // Reviewer is instructed to output JSON between markers
732
+ const jsonMatch = output.match(/\[REVIEW_ISSUES\]([\s\S]*?)\[\/REVIEW_ISSUES\]/);
733
+ if (jsonMatch) {
734
+ try { return JSON.parse(jsonMatch[1].trim()); } catch { /* fall through */ }
735
+ }
736
+ // Fallback: look for PASS/FAIL verdict
737
+ if (/\bGRUDGING PASS\b/i.test(output) || /\bPASS\b.*\b0 issues\b/i.test(output) || /no issues found/i.test(output)) {
738
+ return [];
739
+ }
740
+ // If we see FAIL or DEVIATION keywords, extract what we can
741
+ const issues = [];
742
+ const deviationRegex = /(?:DEVIATION|FAIL|CRITICAL|ISSUE)[:\s—-]+(.+)/gi;
743
+ let match;
744
+ while ((match = deviationRegex.exec(output)) !== null) {
745
+ issues.push({ description: match[1].trim(), severity: "medium" });
746
+ }
747
+ return issues;
748
+ }
749
+
750
+ _defaultAutoFixPrompt(phase, issues) {
751
+ const issueList = issues.map((issue, i) =>
752
+ `${i + 1}. [${issue.severity || "medium"}] ${issue.component || "unknown"}: ${issue.description || issue.reason || "fix needed"}`
753
+ ).join("\n");
754
+
755
+ return `The automated reviewer found these issues in the ${phase} components. Fix each one.
756
+
757
+ ## Issues
758
+ ${issueList}
759
+
760
+ ## Rules
761
+ - Read the relevant design contract for each component to verify the correct values
762
+ - Fix ONLY the listed issues — do not modify other components
763
+ - After fixing, EXIT. Do not start servers or ask for review.`;
764
+ }
765
+
658
766
  _defaultFixPrompt(phase, needsWork) {
659
767
  const fixes = needsWork.map(item => {
660
768
  const parts = [`Fix ${item.id}:`];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tekyzinc/gsd-t",
3
- "version": "2.71.15",
3
+ "version": "2.71.17",
4
4
  "description": "GSD-T: Contract-Driven Development for Claude Code — 56 slash commands with headless CI/CD mode, graph-powered code analysis, real-time agent dashboard, execution intelligence, task telemetry, doc-ripple enforcement, backlog management, impact analysis, test sync, milestone archival, and PRD generation",
5
5
  "author": "Tekyz, Inc.",
6
6
  "license": "MIT",