npm - @tekyzinc/gsd-t - Versions diffs - 2.71.15 → 2.71.16 - Mend

@tekyzinc/gsd-t 2.71.15 → 2.71.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md +10 -0
package/bin/design-orchestrator.js +95 -4
package/bin/orchestrator.js +109 -1
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,16 @@
 All notable changes to GSD-T are documented here. Updated with each release.
+## [2.71.16] - 2026-04-08
+### Added (orchestrator — automated AI review loop)
+- **Automated review before human review** — orchestrator now spawns an independent reviewer Claude (no builder context) that compares built components against design contracts. If issues found, spawns a fixer Claude, re-measures, and re-reviews (max 2 cycles). Only after automated review passes do items reach human review. This is the Term 2 equivalent, running deterministically in JavaScript.
+- **Review report persistence** — each auto-review cycle writes results to `.gsd-t/design-review/auto-review/`. Unresolved issues are written to `{phase}-unresolved.json` for human visibility.
+- **Structured review output** — reviewer uses `[REVIEW_ISSUES]` markers for reliable parsing. Fallback parser catches DEVIATION/FAIL/CRITICAL keywords.
+### Pipeline (updated)
+Build → Measure → **Automated AI Review** (reviewer → fixer → re-review loop) → Human Review → Next Tier
 ## [2.71.15] - 2026-04-08
 ### Changed (design-build command → orchestrator delegate)

package/bin/design-orchestrator.js CHANGED Viewed

@@ -300,6 +300,91 @@ function buildFixPrompt(phase, needsWork) {
   return `Apply these specific fixes to ${phase} components:\n\n${fixes}\n\nApply the changes and EXIT. Do not rebuild anything else.`;
 }
+// ─── Automated AI Review (Term 2 equivalent) ───────────────────────────────
+function buildReviewPrompt(phase, items, measurements, projectDir, ports) {
+  const singular = PHASE_SINGULAR[phase];
+  const contractsDir = path.join(projectDir, CONTRACTS_DIR);
+  const componentList = items.map(c => {
+    const sourcePath = c.sourcePath || guessPaths(phase, c);
+    return `- **${c.componentName}** — contract: ${c.fullContractPath}, source: ${sourcePath}, selector: \`${c.selector || "." + c.id}\``;
+  }).join("\n");
+  // Include any measurement failures for context
+  const failedMeasurements = [];
+  for (const item of items) {
+    const m = measurements[item.id] || [];
+    const failures = m.filter(x => !x.pass);
+    if (failures.length > 0) {
+      failedMeasurements.push(`- ${item.componentName}: ${failures.map(f => `${f.property}: expected ${f.expected}, got ${f.actual}`).join("; ")}`);
+    }
+  }
+  const measurementContext = failedMeasurements.length > 0
+    ? `\n## Known Measurement Failures\nPlaywright already detected these — verify they are real issues:\n${failedMeasurements.join("\n")}\n`
+    : "";
+  return `You are an INDEPENDENT design reviewer. You have NO knowledge of how these components were built. Your job is to compare the built ${phase} against their design contracts and find deviations.
+## Components to Review
+${componentList}
+${measurementContext}
+## Review Process
+For EACH component:
+1. Read the design contract file (path given above) — note every specified property value
+2. Read the source file — check that specified values are implemented correctly
+3. Use Playwright to render the component at http://localhost:${ports.reviewPort}/ and measure:
+   - Does the component render and have correct dimensions?
+   - Do colors, fonts, spacing, border-radius match the contract?
+   - For charts: correct chart type, orientation, axis labels, legend position, data format?
+   - For layouts: correct grid columns, gap, padding, child count and arrangement?
+   - For interactive elements: correct states, hover effects, click behavior?
+4. Compare contract values against actual rendered values — be SPECIFIC (exact px, hex, counts)
+## Output Format
+Output your findings between these markers. Each issue must have component, severity (critical/high/medium/low), and description with SPECIFIC contract vs. actual values:
+[REVIEW_ISSUES]
+[
+  {"component": "ComponentName", "severity": "critical", "description": "Contract specifies donut chart but rendered as pie chart (no inner radius)"},
+  {"component": "ComponentName", "severity": "high", "description": "Grid gap: contract 16px, actual 24px"}
+]
+[/REVIEW_ISSUES]
+If ALL components match their contracts, output:
+[REVIEW_ISSUES]
+[]
+[/REVIEW_ISSUES]
+## Rules
+- You write ZERO code. You ONLY review.
+- Be HARSH. Your value is in catching what the builder missed.
+- NEVER say "looks close" or "appears to match" — give SPECIFIC values.
+- Every contract property must be verified. Missing verification = missed issue.
+- Severity guide: critical = wrong component type, missing element, broken render. high = wrong dimensions, colors, layout. medium = spacing/padding off. low = minor visual difference.`;
+}
+function buildAutoFixPrompt(phase, issues, items, projectDir) {
+  const issueList = issues.map((issue, i) => {
+    const item = items.find(c => c.componentName === issue.component);
+    const contractPath = item ? item.fullContractPath : "check .gsd-t/contracts/design/";
+    return `${i + 1}. [${issue.severity}] **${issue.component}** — ${issue.description}\n   Contract: ${contractPath}`;
+  }).join("\n");
+  return `The automated design reviewer found these issues. Fix each one by reading the design contract and correcting the implementation.
+## Issues to Fix
+${issueList}
+## Rules
+- Read each component's design contract for the correct values — do NOT guess
+- Fix ONLY the listed issues — do not modify other components or add features
+- After fixing all issues, EXIT. Do not start servers or ask for review.`;
+}
 // ─── Summary ────────────────────────────────────────────────────────────────
 function formatSummary(phase, result) {
@@ -329,11 +414,13 @@ ${BOLD}Pipeline:${RESET}
   1. Read contracts from .gsd-t/contracts/design/
   2. Start dev server + review server
   3. For each tier (elements → widgets → pages):
-     a. Spawn Claude to build components
+     a. Spawn Claude (builder) to build components from contracts
      b. Measure with Playwright
-     c. Queue for human review
-     d. Wait for review submission (blocks until human approves)
-     e. Process feedback, proceed to next tier
+     c. Spawn Claude (reviewer) to compare against contracts — independent, no builder context
+     d. If reviewer finds issues → spawn Claude (fixer) → re-measure → re-review (max 2 cycles)
+     e. Queue for human review (only after automated review passes)
+     f. Wait for human review submission (blocks until human approves)
+     g. Process feedback, proceed to next tier
 `);
 }
@@ -351,11 +438,15 @@ const designBuildWorkflow = {
     timeout: 600_000,
     devServerTimeout: 30_000,
     maxReviewCycles: 3,
+    maxAutoReviewCycles: 2,
+    reviewTimeout: 300_000,
   },
   completionMessage: "All done. Run your app to verify: npm run dev",
   discoverWork,
   buildPrompt,
+  buildReviewPrompt,
+  buildAutoFixPrompt,
   measure,
   buildQueueItem,
   buildFixPrompt,

package/bin/orchestrator.js CHANGED Viewed

@@ -576,7 +576,78 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
         measurements = this.wf.measure(projectDir, phase, items, { devPort, reviewPort }) || {};
       }
-      // 6e. Review cycle
+      // 6d.5. Automated AI review loop (Term 2 equivalent)
+      // Spawns an independent reviewer Claude that compares built output against contracts.
+      // If issues found → spawn fixer Claude → re-measure → re-review until clean.
+      const maxAutoReviewCycles = this.wf.defaults?.maxAutoReviewCycles || 2;
+      if (this.wf.buildReviewPrompt) {
+        let autoReviewCycle = 0;
+        let autoReviewClean = false;
+        while (autoReviewCycle < maxAutoReviewCycles && !autoReviewClean) {
+          autoReviewCycle++;
+          heading(`Automated Review — ${phase} (cycle ${autoReviewCycle}/${maxAutoReviewCycles})`);
+          // Spawn reviewer Claude — independent, no builder context
+          const reviewPrompt = this.wf.buildReviewPrompt(phase, items, measurements, projectDir, { devPort, reviewPort });
+          log(`\n${CYAN}  ⚙${RESET} Spawning reviewer Claude for ${phase}...`);
+          const reviewTimeout = this.wf.defaults?.reviewTimeout || 300_000;
+          const reviewResult = this.spawnClaude(projectDir, reviewPrompt, reviewTimeout);
+          // Parse reviewer output for issues
+          const issues = this.wf.parseReviewResult
+            ? this.wf.parseReviewResult(reviewResult.output, phase)
+            : this._parseDefaultReviewResult(reviewResult.output);
+          if (reviewResult.exitCode === 0) {
+            success(`Reviewer finished in ${reviewResult.duration}s`);
+          } else {
+            warn(`Reviewer exited with code ${reviewResult.exitCode} after ${reviewResult.duration}s`);
+          }
+          // Write review report
+          const reportDir = path.join(this.getReviewDir(projectDir), "auto-review");
+          ensureDir(reportDir);
+          fs.writeFileSync(
+            path.join(reportDir, `${phase}-cycle-${autoReviewCycle}.json`),
+            JSON.stringify({ cycle: autoReviewCycle, issues, output: reviewResult.output.slice(0, 5000) }, null, 2)
+          );
+          if (issues.length === 0) {
+            autoReviewClean = true;
+            success(`Automated review passed — no issues found in ${phase}`);
+          } else {
+            warn(`Automated review found ${issues.length} issue(s) in ${phase}`);
+            for (const issue of issues) {
+              dim(`${issue.component || "?"}: ${issue.description || issue.reason || "issue"} [${issue.severity || "medium"}]`);
+            }
+            if (autoReviewCycle < maxAutoReviewCycles) {
+              // Spawn fixer Claude with the issues
+              const fixPrompt = this.wf.buildAutoFixPrompt
+                ? this.wf.buildAutoFixPrompt(phase, issues, items, projectDir)
+                : this._defaultAutoFixPrompt(phase, issues);
+              log(`\n${CYAN}  ⚙${RESET} Spawning fixer Claude for ${issues.length} issue(s)...`);
+              const fixResult = this.spawnClaude(projectDir, fixPrompt, 120_000);
+              if (fixResult.exitCode === 0) success(`Fixer finished in ${fixResult.duration}s`);
+              else warn(`Fixer exited with code ${fixResult.exitCode}`);
+              // Re-measure after fixes
+              if (!skipMeasure && this.wf.measure) {
+                measurements = this.wf.measure(projectDir, phase, items, { devPort, reviewPort }) || {};
+              }
+            } else {
+              warn(`Max auto-review cycles reached — ${issues.length} issue(s) will go to human review`);
+              // Attach unresolved issues to measurements for human visibility
+              const issueFile = path.join(this.getReviewDir(projectDir), "auto-review", `${phase}-unresolved.json`);
+              fs.writeFileSync(issueFile, JSON.stringify(issues, null, 2));
+            }
+          }
+        }
+      }
+      // 6e. Human review cycle
       let reviewCycle = 0;
       let allApproved = false;
@@ -655,6 +726,43 @@ ${BOLD}Phases:${RESET} ${this.wf.phases.join(" → ")}
     };
   }
+  _parseDefaultReviewResult(output) {
+    // Try to parse JSON issues array from reviewer output
+    // Reviewer is instructed to output JSON between markers
+    const jsonMatch = output.match(/\[REVIEW_ISSUES\]([\s\S]*?)\[\/REVIEW_ISSUES\]/);
+    if (jsonMatch) {
+      try { return JSON.parse(jsonMatch[1].trim()); } catch { /* fall through */ }
+    }
+    // Fallback: look for PASS/FAIL verdict
+    if (/\bGRUDGING PASS\b/i.test(output) || /\bPASS\b.*\b0 issues\b/i.test(output) || /no issues found/i.test(output)) {
+      return [];
+    }
+    // If we see FAIL or DEVIATION keywords, extract what we can
+    const issues = [];
+    const deviationRegex = /(?:DEVIATION|FAIL|CRITICAL|ISSUE)[:\s—-]+(.+)/gi;
+    let match;
+    while ((match = deviationRegex.exec(output)) !== null) {
+      issues.push({ description: match[1].trim(), severity: "medium" });
+    }
+    return issues;
+  }
+  _defaultAutoFixPrompt(phase, issues) {
+    const issueList = issues.map((issue, i) =>
+      `${i + 1}. [${issue.severity || "medium"}] ${issue.component || "unknown"}: ${issue.description || issue.reason || "fix needed"}`
+    ).join("\n");
+    return `The automated reviewer found these issues in the ${phase} components. Fix each one.
+## Issues
+${issueList}
+## Rules
+- Read the relevant design contract for each component to verify the correct values
+- Fix ONLY the listed issues — do not modify other components
+- After fixing, EXIT. Do not start servers or ask for review.`;
+  }
   _defaultFixPrompt(phase, needsWork) {
     const fixes = needsWork.map(item => {
       const parts = [`Fix ${item.id}:`];

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tekyzinc/gsd-t",
-  "version": "2.71.15",
+  "version": "2.71.16",
   "description": "GSD-T: Contract-Driven Development for Claude Code — 56 slash commands with headless CI/CD mode, graph-powered code analysis, real-time agent dashboard, execution intelligence, task telemetry, doc-ripple enforcement, backlog management, impact analysis, test sync, milestone archival, and PRD generation",
   "author": "Tekyz, Inc.",
   "license": "MIT",