npm - @sun-asterisk/sungen - Versions diffs - 3.2.2-beta.1 → 3.2.2-beta.11 - Mend

@sun-asterisk/sungen 3.2.2-beta.1 → 3.2.2-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/src/cli/commands/depth-lint.ts ADDED Viewed

@@ -0,0 +1,51 @@
+import { Command } from 'commander';
+import * as path from 'path';
+import * as fs from 'fs';
+import { runDepthLint, renderDepthLint } from '../../harness/depth-lint';
+import { reportSlug } from '../../harness/unit-paths';
+function findScreenDir(name: string): string | null {
+  const candidates = [
+    path.join(process.cwd(), 'qa', 'screens', name),
+    path.join(process.cwd(), 'qa', 'flows', name),
+    path.join(process.cwd(), 'qa', 'api', name),
+  ];
+  for (const c of candidates) if (fs.existsSync(c)) return c;
+  return null;
+}
+export function registerDepthLintCommand(program: Command): void {
+  program
+    .command('depth-lint')
+    .description('Harness: classify shallow business-critical scenarios → deepen-in-place (with the data-assertion template) vs cross-screen (route to a flow). Generation-time depth self-check (#384).')
+    .option('-s, --screen <name>', 'Screen or flow name to lint')
+    .option('--json', 'Output the raw JSON report')
+    .action((options) => {
+      try {
+        const name = options.screen;
+        if (!name) throw new Error('Provide --screen <name>');
+        const dir = findScreenDir(name);
+        if (!dir) throw new Error(`Not found: qa/screens/${name} or qa/flows/${name}`);
+        const report = runDepthLint(dir, name);
+        const outDir = path.join(process.cwd(), '.sungen', 'reports');
+        fs.mkdirSync(outDir, { recursive: true });
+        const outPath = path.join(outDir, `${reportSlug(name)}-depth-lint.json`);
+        fs.writeFileSync(outPath, JSON.stringify(report, null, 2), 'utf-8');
+        if (options.json) {
+          console.log(JSON.stringify(report, null, 2));
+        } else {
+          renderDepthLint(report);
+          console.log(`  Report: ${path.relative(process.cwd(), outPath)}`);
+          console.log('');
+        }
+        // Non-zero when there are deepen-in-place candidates the generator should fix before audit.
+        process.exit(report.deepen.length > 0 ? 2 : 0);
+      } catch (error) {
+        console.error('Error:', error instanceof Error ? error.message : error);
+        process.exit(1);
+      }
+    });
+}

package/src/cli/commands/gate.ts ADDED Viewed

@@ -0,0 +1,44 @@
+import { Command } from 'commander';
+import * as path from 'path';
+import * as fs from 'fs';
+import { runGate, renderGate, GatePhase } from '../../harness/journey';
+function findScreenDir(name: string): string | null {
+  const candidates = [
+    path.join(process.cwd(), 'qa', 'screens', name),
+    path.join(process.cwd(), 'qa', 'flows', name),
+    path.join(process.cwd(), 'qa', 'api', name),
+  ];
+  for (const c of candidates) if (fs.existsSync(c)) return c;
+  return null;
+}
+const PHASES: GatePhase[] = ['create', 'run', 'deliver'];
+export function registerGateCommand(program: Command): void {
+  program
+    .command('gate')
+    .description('Inter-phase HALT gate (#381): a phase boundary passes only when its required obligations are satisfied or explicitly waived. Exit 2 = HALT (no silent bad output crosses the boundary).')
+    .option('-s, --screen <name>', 'Screen / flow / api unit name')
+    .option('-p, --phase <phase>', `Phase boundary: ${PHASES.join(' | ')}`)
+    .option('--json', 'Output the raw verdict')
+    .action((options) => {
+      try {
+        const name = options.screen;
+        if (!name) throw new Error('Provide --screen <name>');
+        const phase = options.phase as GatePhase;
+        if (!PHASES.includes(phase)) throw new Error(`Provide --phase <${PHASES.join('|')}>`);
+        if (!findScreenDir(name)) throw new Error(`Not found: qa/screens/${name}, qa/flows/${name}, or qa/api/${name}`);
+        const verdict = runGate(process.cwd(), name, phase);
+        if (options.json) console.log(JSON.stringify(verdict, null, 2));
+        else console.log(renderGate(verdict));
+        // Exit 2 on HALT — usable in CI / the orchestration loop to block the next phase.
+        process.exit(verdict.status === 'halt' ? 2 : 0);
+      } catch (error) {
+        console.error('Error:', error instanceof Error ? error.message : error);
+        process.exit(1);
+      }
+    });
+}

package/src/cli/commands/journey.ts ADDED Viewed

@@ -0,0 +1,59 @@
+import { Command } from 'commander';
+import * as path from 'path';
+import * as fs from 'fs';
+import { runJourney, waive, signoff, renderJourneyBoard } from '../../harness/journey';
+import { reportSlug } from '../../harness/unit-paths';
+function findScreenDir(name: string): string | null {
+  const candidates = [
+    path.join(process.cwd(), 'qa', 'screens', name),
+    path.join(process.cwd(), 'qa', 'flows', name),
+    path.join(process.cwd(), 'qa', 'api', name),
+  ];
+  for (const c of candidates) if (fs.existsSync(c)) return c;
+  return null;
+}
+export function registerJourneyCommand(program: Command): void {
+  program
+    .command('journey')
+    .description('Durable "you are here" board (#381): obligations + what-to-review + next, synthesised read-only from the audit report + ledger already on disk.')
+    .option('-s, --screen <name>', 'Screen / flow / api unit name')
+    .option('--waive <obligation>', 'Waive an obligation (e.g. OB-coverage) — requires --reason')
+    .option('--reason <text>', 'The reason a waived obligation is acceptable (mandatory with --waive)')
+    .option('--signoff', 'Sign off the review queue — the single human gate (allowed only when every other obligation is satisfied/waived)')
+    .option('--by <name>', 'Who is signing off (recorded with --signoff)')
+    .option('--json', 'Output the raw JSON report')
+    .action((options) => {
+      try {
+        const name = options.screen;
+        if (!name) throw new Error('Provide --screen <name>');
+        if (!findScreenDir(name)) throw new Error(`Not found: qa/screens/${name}, qa/flows/${name}, or qa/api/${name}`);
+        const report = options.waive
+          ? waive(process.cwd(), name, options.waive, options.reason || '')
+          : options.signoff
+            ? signoff(process.cwd(), name, options.by)
+            : runJourney(process.cwd(), name);
+        const outDir = path.join(process.cwd(), '.sungen', 'journey');
+        fs.mkdirSync(outDir, { recursive: true });
+        const slug = reportSlug(name);
+        fs.writeFileSync(path.join(outDir, `${slug}.json`), JSON.stringify(report, null, 2), 'utf-8');
+        const board = renderJourneyBoard(report);
+        fs.writeFileSync(path.join(outDir, `${slug}.board.md`), board, 'utf-8');
+        if (options.json) {
+          console.log(JSON.stringify(report, null, 2));
+        } else {
+          console.log('');
+          console.log(board);
+          console.log(`  Board: ${path.relative(process.cwd(), path.join(outDir, `${slug}.board.md`))}`);
+          console.log('');
+        }
+      } catch (error) {
+        console.error('Error:', error instanceof Error ? error.message : error);
+        process.exit(1);
+      }
+    });
+}

package/src/cli/index.ts CHANGED Viewed

@@ -15,6 +15,9 @@ import { registerFigmaCommand } from './commands/figma';
 import { registerAddFlowCommand } from './commands/add-flow';
 import { registerDashboardCommand } from './commands/dashboard';
 import { registerAuditCommand } from './commands/audit';
+import { registerDepthLintCommand } from './commands/depth-lint';
+import { registerJourneyCommand } from './commands/journey';
+import { registerGateCommand } from './commands/gate';
 import { registerIngestCommand } from './commands/ingest';
 import { registerEvalCommand } from './commands/eval';
 import { registerManifestCommand } from './commands/manifest';
@@ -57,6 +60,9 @@ async function main() {
   registerAddFlowCommand(program);
   registerDashboardCommand(program);
   registerAuditCommand(program);
+  registerDepthLintCommand(program);
+  registerJourneyCommand(program);
+  registerGateCommand(program);
   registerManifestCommand(program);
   registerLedgerCommand(program);
   registerFeedbackCommand(program);

package/src/harness/depth-lint.ts ADDED Viewed

@@ -0,0 +1,122 @@
+/**
+ * Depth lint (issue #384) — a deterministic, generation-time depth self-check.
+ *
+ * The audit's `assertionDepth` sensor decides WHICH business-critical scenarios are shallow
+ * (the authoritative set). This lint adds the missing half: for each shallow business-critical
+ * scenario it classifies the *fix* using the catalog's per-theme `depth` metadata —
+ *   • cross_screen:false  → DEEPEN in place (emit the theme's `depth.template` value assertion)
+ *   • cross_screen:true   → DEFER (flow-own, or @manual:Mx with a reason) — leaves the depth denominator
+ * so a generator (or the create-test repair step) can act mechanically BEFORE the first audit,
+ * instead of churning the 3-round repair budget on scenarios that can't be deepened on-screen.
+ *
+ * Reuses the audit plumbing verbatim (parse + catalog + assertionDepth) → same verdict as `sungen audit`.
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import { loadScenarios, parseViewpointOverview, ScenarioInfo, ViewpointEntry } from './parse';
+import { loadCatalog, viewpointGate, assertionDepth, dataThemesFor, CatalogTheme } from './sensors';
+export type DepthAction = 'deepen' | 'defer';
+export interface DepthLintItem {
+  scenario: string;
+  theme: string | null;
+  action: DepthAction;
+  /** the exact deep step to emit (deepen) or the routing hint (defer) */
+  fix: string;
+}
+export interface DepthLintReport {
+  screen: string;
+  pageType: string | null;
+  focus: string;
+  threshold: number;
+  bcDepthRatio: number;
+  verdict: 'pass' | 'warn' | 'fail';
+  businessCriticalTotal: number;
+  shallowTotal: number;
+  /** shallow business-critical scenarios that CAN be deepened on-screen (actionable now) */
+  deepen: DepthLintItem[];
+  /** shallow business-critical scenarios that are cross-screen → route to a flow / @manual */
+  defer: DepthLintItem[];
+}
+/** Find the data-theme a scenario belongs to (precise depth.keywords, fallback theme.keywords). */
+function matchTheme(s: ScenarioInfo, dataThemes: CatalogTheme[]): CatalogTheme | undefined {
+  return dataThemes.find((t) => {
+    const kws = t.depth?.keywords?.length ? t.depth.keywords : t.keywords;
+    return kws.some((k) => s.haystack.includes(k.toLowerCase()));
+  });
+}
+export function runDepthLint(screenDir: string, screenName: string, focus = 'functional'): DepthLintReport {
+  const last = screenName.split('/').pop() || screenName;
+  const featurePath = path.join(screenDir, 'features', `${last}.feature`);
+  const viewpointPath = path.join(screenDir, 'requirements', 'test-viewpoint.md');
+  const scenarios: ScenarioInfo[] = loadScenarios(featurePath);
+  const viewpoints: ViewpointEntry[] = parseViewpointOverview(viewpointPath);
+  const catalog = loadCatalog();
+  const gate = viewpointGate(scenarios, viewpoints, catalog);
+  const dataThemes = dataThemesFor(catalog, gate.pageType);
+  const depth = assertionDepth(scenarios, dataThemes, focus);
+  const byName = new Map(scenarios.map((s) => [s.name, s]));
+  const deepen: DepthLintItem[] = [];
+  const defer: DepthLintItem[] = [];
+  for (const sb of depth.shallowBusinessCritical) {
+    const s = byName.get(sb.name);
+    const theme = s ? matchTheme(s, dataThemes) : undefined;
+    const crossScreen = theme?.depth?.cross_screen ?? false;
+    if (crossScreen) {
+      defer.push({
+        scenario: sb.name,
+        theme: theme?.theme ?? null,
+        action: 'defer',
+        fix: `cross-screen — own it in a flow (sungen add-flow) or tag @manual:Mx with a reason; do not fake an on-screen data assertion`,
+      });
+    } else {
+      deepen.push({
+        scenario: sb.name,
+        theme: theme?.theme ?? null,
+        action: 'deepen',
+        fix: theme?.depth?.template ?? `add a data assertion (\`... with {{value}}\` or \`see all ... contain {{v}}\`)`,
+      });
+    }
+  }
+  return {
+    screen: screenName,
+    pageType: gate.pageType,
+    focus,
+    threshold: depth.threshold,
+    bcDepthRatio: depth.bcDepthRatio,
+    verdict: depth.verdict,
+    businessCriticalTotal: depth.businessCriticalTotal,
+    shallowTotal: depth.businessCriticalShallow,
+    deepen,
+    defer,
+  };
+}
+export function renderDepthLint(r: DepthLintReport): void {
+  const pct = (n: number) => `${Math.round(n * 100)}%`;
+  console.log('');
+  console.log(`━━━ Depth lint: ${r.screen} (page-type ${r.pageType ?? 'unknown'}) ━━━`);
+  console.log('');
+  console.log(`  businessDepth ${pct(r.bcDepthRatio)} (threshold ${pct(r.threshold)} · focus ${r.focus}) → ${r.verdict.toUpperCase()}`);
+  console.log(`  ${r.businessCriticalTotal} business-critical · ${r.shallowTotal} shallow → ${r.deepen.length} deepen-in-place · ${r.defer.length} cross-screen`);
+  if (r.deepen.length) {
+    console.log('');
+    console.log('  ── DEEPEN IN PLACE (fix before audit) ──');
+    for (const d of r.deepen) console.log(`   • ${d.scenario}\n       [${d.theme}] → ${d.fix}`);
+  }
+  if (r.defer.length) {
+    console.log('');
+    console.log('  ── CROSS-SCREEN (route to flow / @manual:Mx) ──');
+    for (const d of r.defer) console.log(`   • ${d.scenario}  [${d.theme}]`);
+  }
+  if (!r.deepen.length && !r.defer.length) console.log('  ✓ no shallow business-critical scenarios');
+  console.log('');
+}

package/src/harness/journey.ts ADDED Viewed

@@ -0,0 +1,333 @@
+/**
+ * Journey board (epic #381, story S1) — the durable, read-only "you are here" view.
+ *
+ * Reconstructs the QA's position from artifacts ALREADY on disk (no recomputation, no context
+ * needed): the audit report (`.sungen/reports/<slug>-audit.json`) supplies the obligation status
+ * via its calibration axes + gate + findings; the ledger (`.sungen/ledger/<slug>.jsonl`) supplies
+ * the phase history ("you are here"). The output answers the three QA questions — what's next /
+ * what to review / what's doubtful — and persists `.sungen/journey/<slug>.{json,board.md}`.
+ *
+ * S1 = the read-only synthesis. S2 (this file) adds the **writable lifecycle**: persisted
+ * waivers (reason-required, anti-amnesia), reconcile (auto-close satisfied; re-surface a waiver
+ * when its evidence changed), via `runJourney` + `waive`. Gate-bound predicates + inter-phase
+ * gates are S3. Pure-deterministic, no AI.
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import * as crypto from 'crypto';
+import { reportSlug } from './unit-paths';
+export type ObStatus = 'satisfied' | 'needs-work' | 'pending' | 'waived';
+export interface Obligation {
+  id: string;
+  title: string;
+  status: ObStatus;
+  detail: string;
+  waivedReason?: string;   // S2 — set when the QA explicitly waived this obligation
+}
+/** S5 — a parsed anchor so an IDE can jump straight to what needs review. */
+export interface Anchor { vpId?: string; file?: string; line?: number; }
+export interface ReviewItem { text: string; anchor: Anchor; }
+export interface JourneyReport {
+  unit: string;
+  generatedFrom: string[];          // which artifacts were available
+  youAreHere: string;               // last ledger phase, or 'not started'
+  phasesDone: string[];
+  gateStatus: string | null;
+  score: number | null;
+  obligations: Obligation[];
+  needsYou: ReviewItem[];           // S5 — the review queue: human-judgment findings, each with an anchor
+  nextSuggested: string;
+}
+function readJSON(p: string): any {
+  try { return fs.existsSync(p) ? JSON.parse(fs.readFileSync(p, 'utf-8')) : null; } catch { return null; }
+}
+function readLedgerPhases(p: string): string[] {
+  if (!fs.existsSync(p)) return [];
+  const out: string[] = [];
+  for (const line of fs.readFileSync(p, 'utf-8').split('\n')) {
+    if (!line.trim()) continue;
+    try { const d = JSON.parse(line); if (d.step) out.push(String(d.step)); } catch { /* skip */ }
+  }
+  return out;
+}
+/** Findings that ask for HUMAN judgment (the review-queue seed), vs pure machine-repair targets. */
+function isHumanFinding(f: string): boolean {
+  return /@manual|MANUAL-|DEPTH-DEFERRED|UNSOURCEABLE|CAPABILITY-SUGGESTION|judgment|oracle|review/i.test(f);
+}
+/** S5 — pull an IDE-jumpable anchor out of a finding (VP id, scenario, or `file.ext:line`). */
+function extractAnchor(text: string): Anchor {
+  const vp = text.match(/\bVP-[A-Z0-9-]+\b/);
+  const fl = text.match(/([\w./-]+\.(?:md|feature|ts|yaml)):(\d+)/);
+  return { vpId: vp ? vp[0] : undefined, file: fl ? fl[1] : undefined, line: fl ? Number(fl[2]) : undefined };
+}
+const SAT = 0.8; // axis at/above this = satisfied (below = needs-work)
+function computeFresh(projectRoot: string, unit: string): JourneyReport {
+  const slug = reportSlug(unit);
+  const audit = readJSON(path.join(projectRoot, '.sungen', 'reports', `${slug}-audit.json`));
+  const phases = readLedgerPhases(path.join(projectRoot, '.sungen', 'ledger', `${slug}.jsonl`));
+  const from: string[] = [];
+  if (audit) from.push('audit');
+  if (phases.length) from.push('ledger');
+  const youAreHere = phases.length ? phases[phases.length - 1] : 'not started';
+  const ran = (p: string) => phases.some((s) => s === p || s.startsWith(p));
+  const obligations: Obligation[] = [];
+  const needsYou: ReviewItem[] = [];
+  if (!audit) {
+    // Nothing measured yet — the journey hasn't really begun.
+    obligations.push({ id: 'OB-create', title: 'Generate test cases', status: 'pending', detail: 'No audit report yet — run /sungen:create-test.' });
+    return {
+      unit, generatedFrom: from, youAreHere, phasesDone: phases, gateStatus: null, score: null,
+      obligations, needsYou, nextSuggested: 'Run `/sungen:create-test ' + unit + '` to begin.',
+    };
+  }
+  const ax = (audit.calibration && audit.calibration.axes) || {};
+  const depthThreshold = (audit.depth && audit.depth.threshold) || SAT;
+  const ob = (id: string, title: string, val: number | undefined, thr: number, detail: string): Obligation => ({
+    id, title,
+    status: val === undefined ? 'pending' : val >= thr ? 'satisfied' : 'needs-work',
+    detail: val === undefined ? detail : `${Math.round(val * 100)}% (need ${Math.round(thr * 100)}%) — ${detail}`,
+  });
+  obligations.push(ob('OB-spec', 'Spec readiness', ax.specFR, 1, 'FR/sections sufficient to design from'));
+  obligations.push(ob('OB-coverage', 'Viewpoint coverage', ax.coverage, SAT, 'critical themes covered'));
+  obligations.push(ob('OB-depth', 'Assertion depth', ax.businessDepth, depthThreshold, 'business-critical scenarios assert data'));
+  obligations.push(ob('OB-trace', 'Traceability', ax.taxonomy ?? ax.traceability, SAT, 'scenarios trace to viewpoints'));
+  // Automation obligation: automatable @manual still pending a driver = needs-work.
+  const autoPending = audit.automatableManual && audit.automatableManual.automatable > 0;
+  obligations.push({
+    id: 'OB-automation', title: 'Automation coverage',
+    status: autoPending ? 'needs-work' : 'satisfied',
+    detail: autoPending ? `${audit.automatableManual.automatable} @manual scenario(s) a driver could automate` : 'no capability-pending automatable cases',
+  });
+  // Human sign-off: always pending until S5 explicit signoff; carries the review-queue count.
+  const manualCount = (audit.manualOracle && audit.manualOracle.total) || 0;
+  obligations.push({
+    id: 'OB-signoff', title: 'Human sign-off', status: 'pending',
+    detail: `${needsYou.length} item(s) queued for your review (single sign-off at the end)`,
+  });
+  // Needs-you: human-judgment findings (the review-queue seed), with their anchors (titles) intact.
+  for (const f of (audit.findings || [])) if (isHumanFinding(f)) needsYou.push({ text: f, anchor: extractAnchor(f) });
+  // Re-stamp the signoff detail now that needsYou is filled.
+  const signoff = obligations.find((o) => o.id === 'OB-signoff');
+  if (signoff) signoff.detail = `${needsYou.length} item(s) queued for your review · ${manualCount} @manual scenario(s)`;
+  // Next suggested = first non-satisfied obligation; if all satisfied but run-test not done → run.
+  const firstGap = obligations.find((o) => o.status !== 'satisfied' && o.id !== 'OB-signoff');
+  let nextSuggested: string;
+  if (firstGap) nextSuggested = `Repair "${firstGap.title}" (${firstGap.detail}).`;
+  else if (!ran('run')) nextSuggested = `Quality satisfied — run \`/sungen:run-test ${unit}\`.`;
+  else nextSuggested = `All obligations satisfied — review the ${needsYou.length} queued item(s), then sign off & deliver.`;
+  return {
+    unit, generatedFrom: from, youAreHere, phasesDone: phases,
+    gateStatus: audit.gateStatus ?? null, score: (audit.score && audit.score.overall) ?? null,
+    obligations, needsYou, nextSuggested,
+  };
+}
+// ---------------- S2: writable lifecycle — persisted waivers + reconcile ----------------
+interface Waiver { reason: string; at: string; auditHashAtWaive: string; }
+interface Signoff { by?: string; at: string; auditHashAtSignoff: string; }   // S5
+interface JourneyState { unit: string; auditHash: string; waivers: Record<string, Waiver>; signoff?: Signoff; }
+function statePath(projectRoot: string, slug: string): string {
+  return path.join(projectRoot, '.sungen', 'journey', `${slug}.state.json`);
+}
+/** Evidence cursor: the audit report's content hash. A waiver is invalidated when this changes. */
+function auditHashOf(projectRoot: string, slug: string): string {
+  const p = path.join(projectRoot, '.sungen', 'reports', `${slug}-audit.json`);
+  return fs.existsSync(p) ? crypto.createHash('sha256').update(fs.readFileSync(p)).digest('hex') : '';
+}
+function loadState(p: string): JourneyState | null { return readJSON(p); }
+function saveState(p: string, s: JourneyState): void {
+  fs.mkdirSync(path.dirname(p), { recursive: true });
+  fs.writeFileSync(p, JSON.stringify(s, null, 2), 'utf-8');
+}
+/** Recompute nextSuggested AFTER waivers are applied (a waived obligation is not a gap). */
+function computeNext(r: JourneyReport, unit: string): string {
+  const gap = r.obligations.find((o) => o.status !== 'satisfied' && o.status !== 'waived' && o.id !== 'OB-signoff');
+  if (gap) return `Repair "${gap.title}" (${gap.detail}).`;
+  if (!r.phasesDone.some((s) => s === 'run' || s.startsWith('run'))) return `Quality satisfied — run \`/sungen:run-test ${unit}\`.`;
+  return `All obligations satisfied/waived — review the ${r.needsYou.length} queued item(s), then sign off & deliver.`;
+}
+/**
+ * The public entry: compute fresh, then RECONCILE with the persisted state —
+ *  - auto-close is automatic (fresh recompute reflects the current artifacts);
+ *  - an active waiver (evidence unchanged) sets status='waived' (carries the reason);
+ *  - a STALE waiver (audit changed since it was waived) is re-surfaced for re-decision (anti-amnesia).
+ * Then persist the current evidence cursor.
+ */
+export function runJourney(projectRoot: string, unit: string): JourneyReport {
+  const slug = reportSlug(unit);
+  const report = computeFresh(projectRoot, unit);
+  const sp = statePath(projectRoot, slug);
+  const state = loadState(sp) || { unit, auditHash: '', waivers: {} };
+  const curHash = auditHashOf(projectRoot, slug);
+  for (const ob of report.obligations) {
+    const w = state.waivers[ob.id];
+    if (!w) continue;
+    if (w.auditHashAtWaive === curHash) {
+      ob.status = 'waived';
+      ob.waivedReason = w.reason;
+      ob.detail = `waived — ${w.reason}`;
+    } else {
+      report.needsYou.unshift({ text: `⚠️ Waiver on "${ob.title}" is STALE (evidence changed since ${w.at}) — re-decide. Was: ${w.reason}`, anchor: { vpId: ob.id } });
+    }
+  }
+  // S5 — sign-off reconcile: a recorded sign-off satisfies OB-signoff while its evidence holds;
+  // if the audit changed since, the sign-off is STALE → re-surface for re-review (anti-amnesia).
+  const signoffOb = report.obligations.find((o) => o.id === 'OB-signoff');
+  if (signoffOb && state.signoff) {
+    if (state.signoff.auditHashAtSignoff === curHash) {
+      signoffOb.status = 'satisfied';
+      signoffOb.detail = `signed off${state.signoff.by ? ' by ' + state.signoff.by : ''} at ${state.signoff.at}`;
+    } else {
+      report.needsYou.unshift({ text: `⚠️ Sign-off is STALE (suite changed since ${state.signoff.at}) — re-review and sign off again.`, anchor: { vpId: 'OB-signoff' } });
+    }
+  }
+  report.nextSuggested = computeNext(report, unit);
+  saveState(sp, { unit, auditHash: curHash, waivers: state.waivers, signoff: state.signoff });
+  return report;
+}
+/**
+ * Waive an obligation — REQUIRES a reason (anti-amnesia: a waiver leaves a recorded "why").
+ * Records the current evidence cursor so reconcile can invalidate it if the audit changes.
+ */
+export function waive(projectRoot: string, unit: string, obId: string, reason: string): JourneyReport {
+  if (!reason || !reason.trim()) {
+    throw new Error('A reason is required to waive (anti-amnesia: a waiver must record WHY). Use --reason "...".');
+  }
+  const slug = reportSlug(unit);
+  const fresh = computeFresh(projectRoot, unit);
+  const valid = fresh.obligations.map((o) => o.id);
+  if (!valid.includes(obId)) {
+    throw new Error(`Unknown obligation "${obId}". Valid: ${valid.join(', ')}`);
+  }
+  const sp = statePath(projectRoot, slug);
+  const state = loadState(sp) || { unit, auditHash: '', waivers: {} };
+  state.waivers[obId] = { reason: reason.trim(), at: new Date().toISOString(), auditHashAtWaive: auditHashOf(projectRoot, slug) };
+  saveState(sp, state);
+  return runJourney(projectRoot, unit);
+}
+/**
+ * Sign off — the single human gate (S5). Allowed ONLY when every other deliver-required
+ * obligation is satisfied or waived (you cannot sign off an unclean suite). Records who/when +
+ * the evidence cursor; reconcile invalidates it (stale → re-review) if the suite later changes.
+ */
+export function signoff(projectRoot: string, unit: string, by?: string): JourneyReport {
+  const before = runJourney(projectRoot, unit);
+  const blockers = before.obligations.filter((o) => o.id !== 'OB-signoff' && o.status !== 'satisfied' && o.status !== 'waived');
+  if (blockers.length) {
+    throw new Error(`Cannot sign off — still open: ${blockers.map((b) => b.id).join(', ')}. Fix, or waive each with a reason, first.`);
+  }
+  const slug = reportSlug(unit);
+  const sp = statePath(projectRoot, slug);
+  const state = loadState(sp) || { unit, auditHash: '', waivers: {} };
+  state.signoff = { by, at: new Date().toISOString(), auditHashAtSignoff: auditHashOf(projectRoot, slug) };
+  saveState(sp, state);
+  return runJourney(projectRoot, unit);
+}
+const ICON: Record<ObStatus, string> = { satisfied: '✅', 'needs-work': '⚠️ ', pending: '⏳', waived: '🚫' };
+export function renderJourneyBoard(r: JourneyReport): string {
+  const L: string[] = [];
+  L.push(`# Journey — ${r.unit}`);
+  L.push('');
+  L.push(`📍 **You are here:** ${r.youAreHere}` + (r.phasesDone.length ? `  (phases: ${r.phasesDone.join(' → ')})` : ''));
+  if (r.gateStatus) L.push(`   gate: **${r.gateStatus}** · score: ${r.score ?? '—'}/10`);
+  L.push('');
+  L.push('## What must still be true');
+  for (const o of r.obligations) L.push(`- ${ICON[o.status]} **${o.title}** — ${o.detail}`);
+  L.push('');
+  L.push(`## 🔎 Review queue — needs your judgment (${r.needsYou.length})`);
+  if (r.needsYou.length) {
+    for (const it of r.needsYou.slice(0, 20)) {
+      const a = it.anchor;
+      const tag = [a.vpId, a.file && `${a.file}${a.line ? ':' + a.line : ''}`].filter(Boolean).join(' · ');
+      L.push(`- ${it.text}${tag ? `  〔${tag}〕` : ''}`);
+    }
+  } else L.push('- (nothing queued)');
+  L.push('');
+  L.push(`## → Next`);
+  L.push(`${r.nextSuggested}`);
+  L.push('');
+  return L.join('\n');
+}
+// ---------------- S3: inter-phase gate — obligations as HALT predicates (#398) ----------------
+//
+// A phase boundary is a deterministic gate: before the next phase may run, this phase's required
+// obligations must each be `satisfied` OR `waived` (S2 — an explicit, reasoned human acceptance).
+// A required obligation still `needs-work`/`pending` is a BLOCKER → HALT (no silent bad output
+// crosses the boundary, §9). Reuses runJourney → obligations already reflect waivers/reconcile.
+export type GatePhase = 'create' | 'run' | 'deliver';
+export interface GateVerdict {
+  unit: string;
+  phase: GatePhase;
+  status: 'pass' | 'halt';
+  required: string[];
+  blockers: { id: string; title: string; detail: string }[];
+  waivedCredit: { id: string; title: string }[]; // required obligations accepted via an explicit waiver
+}
+const PHASE_REQUIRED: Record<GatePhase, string[]> = {
+  // post-create (design quality): spec + coverage + depth + traceability must hold.
+  create: ['OB-spec', 'OB-coverage', 'OB-depth', 'OB-trace'],
+  // post-run: the design gates + automation coverage.
+  run: ['OB-spec', 'OB-coverage', 'OB-depth', 'OB-trace', 'OB-automation'],
+  // pre-delivery: everything automated PLUS the single human sign-off (S5).
+  deliver: ['OB-spec', 'OB-coverage', 'OB-depth', 'OB-trace', 'OB-automation', 'OB-signoff'],
+};
+export function runGate(projectRoot: string, unit: string, phase: GatePhase): GateVerdict {
+  const r = runJourney(projectRoot, unit);
+  const required = PHASE_REQUIRED[phase];
+  const reqObs = r.obligations.filter((o) => required.includes(o.id));
+  const blockers = reqObs
+    .filter((o) => o.status !== 'satisfied' && o.status !== 'waived')
+    .map((o) => ({ id: o.id, title: o.title, detail: o.detail }));
+  const waivedCredit = reqObs.filter((o) => o.status === 'waived').map((o) => ({ id: o.id, title: o.title }));
+  return { unit, phase, status: blockers.length ? 'halt' : 'pass', required, blockers, waivedCredit };
+}
+export function renderGate(v: GateVerdict): string {
+  const L: string[] = [];
+  L.push('');
+  L.push(`━━━ Gate: ${v.unit} @ phase "${v.phase}" → ${v.status === 'pass' ? '✅ PASS' : '⛔ HALT'} ━━━`);
+  if (v.blockers.length) {
+    L.push('  Blocking obligations (must be satisfied or explicitly waived):');
+    for (const b of v.blockers) L.push(`   • ${b.id} ${b.title} — ${b.detail}`);
+    L.push('  → Self-correct (repair / run-test), or `sungen journey --screen ' + v.unit + ' --waive <OB> --reason "..."` if accepted.');
+  } else {
+    L.push('  All required obligations satisfied' + (v.waivedCredit.length ? ` (${v.waivedCredit.length} accepted via waiver)` : '') + '.');
+  }
+  L.push('');
+  return L.join('\n');
+}

package/src/orchestrator/templates/ai-instructions/claude-cmd-create-test.md CHANGED Viewed

@@ -77,6 +77,7 @@ If the unit is **api-first** (`qa/api/<name>/` or `qa/api/flows/<name>/`), the d
    - one **viewpoint theme** per shard — a `VP-` prefix from the viewpoint overview (`VP-SEC`, `VP-ERROR-EMPTY-STATE`, `VP-CAROUSEL`, …) — preferred when the viewpoint overview is rich (test-2/home had 47 items across many themes); **or**
    - one **`spec.md` section** per shard (the Mapping Contract walk, Table 1) — preferred when generating from spec.
    Each shard owns a disjoint `VP-` prefix ⇒ ids never collide. One shard → skip to 5c (no fan-out gain).
+   - **Budget-adaptive shard size (S4).** Size the fan-out to your context budget: `N = clamp(ceil(viewpoint_items / items_per_shard), 1, min(16, cores-2))`, where `items_per_shard` is **larger on a ~1M budget** (fewer, bigger shards; more held inline) and **smaller on a ~200k "Claude Standards" budget** (more, tighter shards + aggressive offload). The orchestrator keeps **only the compact summary each generator returns** (pointers to its fragment files) — never the raw fragments in-context. Each generator sees **only its slice** — its theme/section + the **one** matching `sungen-viewpoint` group + the relevant `spec.md` section(s); never load the other groups or the whole spec (lazy = context-cheap). If the budget is too tight even for one shard, **fall back to the sequential path (5d)** — same output, just slower; never fail for lack of budget.
    **5b. Parallel fan-out (Claude Code).** Spawn one **`sungen-generator`** sub-agent **per shard** (Task tool, `subagent_type: sungen-generator`) — issue all the Task calls **in a single message** so they run concurrently. Pass each: its shard (theme/section) + viewpoint slice, the **`sungen-discovery` report** (Step 3), only the `spec.md` section(s) it maps to, which one `sungen-viewpoint` group file holds its patterns, the unit (screen/flow) + name + tier, and its fragment paths `.sungen/fragments/<name>/<shard>.{feature,test-data.yaml}`. Each writes a **headerless** fragment + a test-data fragment and returns a compact summary. Small fragments also keep every generator under the output-token cap (the reason the single-pass path writes incrementally).
@@ -89,6 +90,11 @@ If the unit is **api-first** (`qa/api/<name>/` or `qa/api/flows/<name>/`), the d
    **5d. Sequential fallback.** Use the single-context incremental path (Step 2: tier-by-tier `Write`/`Edit` batches) when: only **one** shard applies, **Copilot / no sub-agents**, or a constrained setup. Same output, just no speedup. **For flows**: `[Screen:Element]` namespace refs, test-data namespaced by phase, `@flow` tag.
+5.4. **Depth self-check (deterministic — run BEFORE the audit).** Run `sungen depth-lint --screen <name>` (Bash). It reuses the audit's businessDepth classifier and splits every shallow business-critical scenario into two actionable buckets — act on them now so the audit/repair loop doesn't burn rounds on depth:
+   - **DEEPEN IN PLACE** — add a real value assertion to each (`User see all [X] contain {{v}}`, `User remember [X] as {{v}}` + `… with {{v}}`). The printed `template` is a **hint** keyed off the theme — apply judgment to the scenario's actual claim; do NOT paste a value assertion that doesn't fit (e.g. a carousel-visibility scenario should assert the product SET, not a price). If a flagged scenario is genuinely visibility/behavior (not data-correctness), that's an over-count — leave it and note it, never fake an assertion.
+   - **CROSS-SCREEN** — route to a flow (`/sungen:add-flow`) or tag `@manual:Mx` + reason; do NOT fake an on-screen data assertion. This removes it from the screen's depth denominator honestly.
+   Re-run `sungen depth-lint` until `deepen` is empty (or only honest over-counts remain), THEN proceed to the gate. This lifts first-pass `businessDepth` mechanically instead of via 2–3 organic repair rounds.
 5.5. **Quality gate & repair (harness — always run, do NOT skip).** Follow the `sungen-harness-audit` skill:
    - Run `sungen audit --screen <name>` (Bash) and read `gateStatus` + `findings` (deterministic, structural).
    - **Independent semantic review.** **Claude Code:** spawn the **`sungen-reviewer`** sub-agent (Task tool, `subagent_type: sungen-reviewer`) — it judges what the gate can't (does each scenario's steps PROVE its title/viewpoint, observable Thens, business-critical assertion depth) and returns `VERDICT` + `ISSUES` with concrete fixes. **Merge its NEEDS-REPAIR issues with the audit findings.** (Copilot / no sub-agents: run the same review inline using the `sungen-reviewer` criteria.)
@@ -100,6 +106,7 @@ If the unit is **api-first** (`qa/api/<name>/` or `qa/api/flows/<name>/`), the d
      - **BALANCE** → stop expanding secondary viewpoints; add business-core scenarios first.
      - **TRACE** → align `VP-` ids with the viewpoint-overview.
    - Stop when the gate PASSes and findings clear, **or** the budget is exhausted → report residual gaps honestly (never fake a pass).
+   - **Phase gate (boundary — do NOT skip).** Run `sungen gate --screen <name> --phase create` (Bash, exit 2 = HALT). It is the deterministic create-boundary: every required obligation (spec · coverage · depth · trace) must be **satisfied or explicitly waived**. On **HALT**, you have not cleared the phase — keep repairing the blocking obligation(s) within budget; if a blocker is a genuinely-accepted gap (e.g. cross-screen depth owned by a flow), record it with `sungen journey --screen <name> --waive <OB> --reason "..."` (reason mandatory). **Do not converge (step 6) past a HALT** without a fix or a reasoned waiver — no bad output crosses the boundary.
 5.6. **Record (reuse + observability).** Build the manifest and report usage:
    - `sungen manifest --screen <name>` — fingerprints for next-run change detection. On a **re-run**, start the whole command by `sungen manifest --screen <name> --diff` and only regenerate scenarios whose spec section changed (keep/regenerate/retire).