npm - @sun-asterisk/sungen - Versions diffs - 3.2.0-beta.143 → 3.2.0-beta.144 - Mend

@sun-asterisk/sungen 3.2.0-beta.143 → 3.2.0-beta.144

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/dist/cli/commands/audit.d.ts.map +1 -1
package/dist/cli/commands/audit.js +5 -3
package/dist/cli/commands/audit.js.map +1 -1
package/dist/generators/test-generator/code-generator.d.ts.map +1 -1
package/dist/generators/test-generator/code-generator.js +3 -2
package/dist/generators/test-generator/code-generator.js.map +1 -1
package/dist/harness/audit.d.ts +15 -1
package/dist/harness/audit.d.ts.map +1 -1
package/dist/harness/audit.js +60 -7
package/dist/harness/audit.js.map +1 -1
package/dist/harness/capability-plan.d.ts +2 -0
package/dist/harness/capability-plan.d.ts.map +1 -1
package/dist/harness/capability-plan.js +4 -1
package/dist/harness/capability-plan.js.map +1 -1
package/dist/harness/flow-check.d.ts.map +1 -1
package/dist/harness/flow-check.js +13 -4
package/dist/harness/flow-check.js.map +1 -1
package/dist/harness/parse.d.ts +2 -0
package/dist/harness/parse.d.ts.map +1 -1
package/dist/harness/parse.js +10 -2
package/dist/harness/parse.js.map +1 -1
package/dist/harness/quality-gates.d.ts +6 -0
package/dist/harness/quality-gates.d.ts.map +1 -1
package/dist/harness/quality-gates.js +15 -1
package/dist/harness/quality-gates.js.map +1 -1
package/dist/harness/sensors.d.ts +27 -0
package/dist/harness/sensors.d.ts.map +1 -1
package/dist/harness/sensors.js +91 -21
package/dist/harness/sensors.js.map +1 -1
package/package.json +2 -2
package/src/cli/commands/audit.ts +5 -3
package/src/generators/test-generator/code-generator.ts +3 -2
package/src/harness/audit.ts +72 -10
package/src/harness/capability-plan.ts +5 -2
package/src/harness/flow-check.ts +13 -4
package/src/harness/parse.ts +12 -2
package/src/harness/quality-gates.ts +14 -1
package/src/harness/sensors.ts +110 -22

package/src/harness/audit.ts CHANGED Viewed

@@ -12,8 +12,8 @@ import { loadScenarios, parseViewpointOverview, ScenarioInfo, ViewpointEntry } f
 import { featureBasename } from './unit-paths';
 import {
   loadCatalog, viewpointGate, assertionDepth, dataThemesFor, depthThresholdFor, coverageBalance, duplicateClusters, traceability, claimProof, taxonomyLint,
-  automatableManual, flowCoveredThemes,
-  GateResult, DepthResult, BalanceResult, DuplicateResult, TraceResult, ClaimProofResult, TaxonomyResult, Catalog, AutomatableManualResult,
+  automatableManual, flowCoveredThemes, flowRegressionDepth, oracleStrength,
+  GateResult, DepthResult, BalanceResult, DuplicateResult, TraceResult, ClaimProofResult, TaxonomyResult, Catalog, AutomatableManualResult, FlowDepthResult, OracleStrengthResult,
 } from './sensors';
 import { loadFlowScenarios } from './flow-check';
 import { manualReasonMismatches, MANUAL_REASONS, buildPlan } from './capability-plan';
@@ -22,7 +22,7 @@ import { readIntent, projectRootFromScreenDir, IntentProfile } from './intent';
 import { getProvenance, Provenance } from './provenance';
 import { specCoverage, SpecCoverageResult, parseSpecClauses } from './spec-coverage';
 import { downstreamScope, manualOracle, readText, DownstreamResult, ManualOracleResult,
-  negativeSideEffect, sourceBacked, crossArtifactOwnership } from './quality-gates';
+  negativeSideEffect, sourceBacked, crossArtifactOwnership, isolationRisk } from './quality-gates';
 import { viewpointLedger, parseViewpointItems, LedgerResult } from './viewpoint-ledger';
 import { capabilityRegistry } from '../capabilities/registry';
 import { discoverAndRegisterCapabilities } from '../capabilities/discover';
@@ -42,6 +42,8 @@ export interface AuditReport {
   downstream: DownstreamResult; // downstream screens referenced but under-covered
   manualOracle: ManualOracleResult; // @manual scenarios lacking setup/action/oracle
   automatableManual: AutomatableManualResult; // @manual that is actually automatable (deferred, not judgment) — TQ-2
+  flowDepth: FlowDepthResult;   // H3 — stateful-flow regression depth (count / teardown / multi-source)
+  oracle: OracleStrengthResult; // H4 — facet-oracle strength (weak name-substring vs title/detail/API/DB)
   ledger: LedgerResult;         // atomic viewpoint-item coverage (per-bullet status)
   calibration: {                // #8 — multi-axis score so a high overall can't hide a weak axis
     axes: Record<string, number>;
@@ -91,6 +93,20 @@ export function scoringCapabilityFor(catalogScreenName: string, defaultCap: stri
   return defaultCap;
 }
+/**
+ * H7 — senior-grade band. The top decile (≥9) is reserved for suites that ALSO clear the senior
+ * axes: a stateful flow with FULL regression depth (count + teardown + multi-source), no weak facet
+ * oracle, and no parallel-cart isolation risk. Otherwise the score is held just below 9 (8.9), so
+ * "≥9" means senior-grade — not merely "themes covered". Neutral for screens/api (no signals → 10).
+ */
+export function seniorBandedOverall(
+  rawOverall: number,
+  s: { flowStateful: boolean; flowRatio: number; oracleWeak: number; isolationRisk: boolean },
+): number {
+  const seniorGrade = (!s.flowStateful || s.flowRatio >= 1) && s.oracleWeak === 0 && !s.isolationRisk;
+  return Math.min(rawOverall, seniorGrade ? 10 : 8.9);
+}
 export function runAudit(screenDir: string, screenName: string): AuditReport {
   // The feature filename is the unit's LAST segment — an api flow (`flows/<flow>`) lives at
   // `<dir>/features/<flow>.feature`, not `features/flows/<flow>.feature` (which found 0 scenarios).
@@ -165,16 +181,43 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
   const ownership = crossArtifactOwnership(screenDir, scenarios);
   const unsourced = sourceBacked(scenarios, parseSpecClauses(specPath).frs.map((f) => f.id), parseViewpointItems(viewpointPath).map((i) => i.text), viewpoints.map((v) => v.id), featureText);
+  // H3 — stateful-flow regression depth. For a UI flow whose scenarios mutate a cart/checkout
+  // collection, the regression dimensions (count/quantity proof · teardown · multi-source) cap the
+  // businessDepth headroom: it can reach 1.0 only when all three are exercised, so a present-but-
+  // shallow flow can't claim a perfect score (floor 0.5 — assertion depth still dominates).
+  const isUiFlow = /^flows\//.test(catalogScreenName);
+  const flowDepth = isUiFlow ? flowRegressionDepth(scenarios) : { stateful: false, countProof: false, teardown: false, multiSource: false, ratio: 1, missing: [] } as FlowDepthResult;
+  const FLOW_DEPTH_FLOOR = 0.5;
+  // H4 — oracle strength: a weak facet oracle (name-substring "proves" category/brand membership)
+  // caps businessDepth the same way (floor 0.5). A suite with no facet claim, or a strong oracle, is
+  // neutral (ratio 1 → no cap), so existing snapshots are unaffected.
+  const oracle = oracleStrength(scenarios);
+  const ORACLE_FLOOR = 0.5;
   // Sub-scores
   const coverage = gate.coverageRatio;
-  const businessDepth = depth.bcDepthRatio;
-  const balanceScore = balance.coreCount + balance.secondaryCount > 0
-    ? Math.min(1, balance.coreCount / Math.max(1, balance.secondaryCount))
-    : 1;
+  const businessDepth = Math.min(
+    depth.bcDepthRatio,
+    flowDepth.stateful ? FLOW_DEPTH_FLOOR + (1 - FLOW_DEPTH_FLOOR) * flowDepth.ratio : 1,
+    oracle.weak.length ? ORACLE_FLOOR + (1 - ORACLE_FLOOR) * oracle.ratio : 1,
+  );
+  // When the taxonomy drifted (most scenarios unclassified), the balance axis is unreliable — cap it
+  // at 0.5 instead of awarding a vacuous 1.0 so a stale taxonomy fails loudly, not silently (H1).
+  const balanceScore = balance.unclassifiedRatio > 0.4
+    ? 0.5
+    : balance.coreCount + balance.secondaryCount > 0
+      ? Math.min(1, balance.coreCount / Math.max(1, balance.secondaryCount))
+      : 1;
   const traceScore = 0.5 * trace.withVpCodeRatio + 0.5 * trace.mappedRatio;
   // Business-weighted overall (coverage + depth dominate)
-  const overall = (0.4 * coverage + 0.3 * businessDepth + 0.15 * balanceScore + 0.15 * traceScore) * 10;
+  const rawOverall = (0.4 * coverage + 0.3 * businessDepth + 0.15 * balanceScore + 0.15 * traceScore) * 10;
+  // H7 — senior-grade band: the top decile (≥9) is reserved for suites that also clear the senior
+  // axes — a stateful flow with FULL regression depth (count + teardown + multi-source), no weak
+  // facet oracle, and no parallel-cart isolation risk. Otherwise the score is held just below 9, so
+  // "≥9" means senior-grade, not merely "themes covered". Neutral for screens/api (no signals).
+  const isoRisk = isolationRisk(featureText, flowDepth.stateful);
+  const overall = seniorBandedOverall(rawOverall, { flowStateful: flowDepth.stateful, flowRatio: flowDepth.ratio, oracleWeak: oracle.weak.length, isolationRisk: isoRisk });
   const findings: string[] = [];
   for (const c of flowCredits) {
@@ -195,6 +238,23 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
       `add data assertions (\`... with {{value}}\`, \`see all ... contain {{v}}\`) or, if cross-screen, defer to a flow with @manual + reason.`,
     );
   }
+  // H5 — state isolation: a @parallel stateful flow that mutates the cart needs per-scenario
+  // isolation, else count/quantity asserts go flaky. Warn (advisory) when no mitigation is present
+  // (@cleanup:cart / @isolate / a "Given … empty cart" background / fresh context).
+  if (isoRisk) {
+    findings.push('ISOLATION-RISK: this @parallel flow mutates the cart but has no per-scenario isolation → cart/count/quantity asserts can go flaky when scenarios share state. Add `@cleanup:cart` (or `@isolate`, or a `Given User has an empty cart` background) so each scenario starts clean.');
+  }
+  if (flowDepth.stateful && flowDepth.missing.length) {
+    const how: Record<string, string> = {
+      'count-proof': 'assert the cart ROW COUNT / item QUANTITY (e.g. `... table with {{two_rows}}`, `Quantity column with {{qty}}`), not just the row presence',
+      'teardown': 'add a REMOVE/clear scenario that returns the cart to its empty state (the inverse operation)',
+      'multi-source': 'add to the cart from EVERY source on the page (the main list AND the recommended/related rail), not just one',
+    };
+    findings.push(`FLOW-DEPTH: this stateful flow exercises ${3 - flowDepth.missing.length}/3 regression dimensions — missing [${flowDepth.missing.join(', ')}] → ${flowDepth.missing.map((m) => how[m]).join('; ')}. (businessDepth is capped until covered.)`);
+  }
+  for (const w of oracle.weak) {
+    findings.push(`ORACLE-WEAK: "${w.name}" — ${w.hint}`);
+  }
   for (const u of claim.unproven) {
     const tag = u.severity === 'fail' ? 'CLAIM-UNPROVEN' : 'CLAIM-WEAK';
     findings.push(`${tag}: "${u.name}" — title claims [${u.claim}] but steps lack ${u.need}. ${u.hint}`);
@@ -202,7 +262,9 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
   for (const m of taxonomy.mislabeled) {
     findings.push(`VP-MISLABEL: "${m.name}" is coded VP-${m.current} but reads as ${m.suggested} (signal: "${m.signal}") → re-tag VP-${m.suggested}-NNN so the coverage matrix isn't skewed.`);
   }
-  if (balance.imbalanced) {
+  if (balance.unclassifiedRatio > 0.4) {
+    findings.push(`TAXONOMY-UNCLASSIFIED: ${balance.note} → align the VP-<CATEGORY> codes with the catalog (or extend the bucket keywords) so coverage-balance is meaningful.`);
+  } else if (balance.imbalanced) {
     findings.push(`BALANCE: ${balance.note} Stop expanding secondary viewpoints until business-core gaps are filled.`);
   }
   if (trace.mappedRatio < 0.5) {
@@ -323,7 +385,7 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
     screen: screenName,
     scenarioCount: scenarios.length,
     gate, depth, claim, taxonomy, balance, duplicates, trace, spec,
-    taxonomyMismatch, downstream, manualOracle: manualOracleResult, automatableManual: autoManual, ledger, calibration,
+    taxonomyMismatch, downstream, manualOracle: manualOracleResult, automatableManual: autoManual, flowDepth, oracle, ledger, calibration,
     score: {
       overall: Math.round(overall * 10) / 10,
       coverage: Math.round(coverage * 100) / 100,

package/src/harness/capability-plan.ts CHANGED Viewed

@@ -56,7 +56,7 @@ export function classifyReason(text: string): string {
   return '';
 }
-interface ParsedScenario { name: string; tags: string[]; manual: boolean; reason: string }
+interface ParsedScenario { name: string; tags: string[]; manual: boolean; reason: string; deferredToFlow: boolean; ownedBy?: string }
 /** Parse scenarios with their tags + the reason comment line above (for @manual). */
 export function parseScenarios(featurePath: string): ParsedScenario[] {
@@ -84,7 +84,10 @@ export function parseScenarios(featurePath: string): ParsedScenario[] {
       else if (l === '') continue;
       else break; // a real step → stop
     }
-    out.push({ name: m[1].trim(), tags, manual: tags.some((t) => /^@manual\b/i.test(t)), reason });
+    const deferredToFlow = tags.some((t) => /^@deferred:flow$/i.test(t));
+    const ownedBy = (tags.find((t) => /^@owned-by:/i.test(t)) || '').slice('@owned-by:'.length) || undefined;
+    // @deferred:flow accounts like @manual on the screen (owned by a flow, not automated here) (H6).
+    out.push({ name: m[1].trim(), tags, manual: tags.some((t) => /^@manual\b/i.test(t)) || deferredToFlow, reason, deferredToFlow, ownedBy });
   }
   return out;
 }

package/src/harness/flow-check.ts CHANGED Viewed

@@ -74,14 +74,23 @@ export function buildFlowCheck(cwd: string, onlyFlow?: string): FlowCheckReport
   const deferrals: Deferral[] = [];
   for (const sc of screens) {
     for (const s of parseScenarios(featurePath(cwd, 'screens', sc))) {
-      if (!s.manual || !/deferred to a flow/i.test(s.reason)) continue;
-      const targets = targetsFromHint(s.reason);
-      const matches = flowScenarios.filter((fs2) => targets.some((t) => fs2.haystack.includes(t)));
+      // A deferral is the first-class `@deferred:flow` tag (H6) OR the legacy `@manual` + a
+      // "deferred to a flow" comment (back-compat). Either marks a cross-screen case owned by a flow.
+      const isDeferral = s.deferredToFlow || (s.manual && /deferred to a flow/i.test(s.reason));
+      if (!isDeferral) continue;
+      // Targets come from the comment hint; a tag-only @deferred:flow (no comment) falls back to the
+      // scenario TITLE so the covering flow scenario can still be located.
+      const targets = targetsFromHint([s.reason, s.name].join(' '));
+      // `@owned-by:<flow>` names the owner explicitly → only that flow's scenarios can cover it
+      // (a false @owned-by is then surfaced as missing). Else any flow may cover it (legacy).
+      const pool = s.ownedBy ? flowScenarios.filter((fs2) => fs2.flow === s.ownedBy) : flowScenarios;
+      const matches = pool.filter((fs2) => targets.some((t) => fs2.haystack.includes(t)));
       let verdict: Deferral['verdict'] = 'missing';
       let via: string | undefined;
       if (matches.some((m) => m.deep)) { verdict = 'covered'; via = matches.find((m) => m.deep)!.flow; }
       else if (matches.length) { verdict = 'shallow'; via = matches[0].flow; }
-      deferrals.push({ screen: sc, scenario: s.name, hint: s.reason, targets, verdict, via });
+      const hint = s.ownedBy ? `${s.reason || 'deferred to a flow'} (owned-by: ${s.ownedBy})` : s.reason;
+      deferrals.push({ screen: sc, scenario: s.name, hint, targets, verdict, via });
     }
   }

package/src/harness/parse.ts CHANGED Viewed

@@ -34,6 +34,8 @@ export interface ScenarioInfo {
   queryRefs?: string[];       // named queries referenced by this scenario (inline `query [name]` + @query: tags)
   apiRefs?: string[];         // named API endpoints referenced by this scenario (@api: tags)
   requiresCaps?: string[];    // @requires:<cap> — automation-ready but needs an opt-in driver (TQ-11)
+  deferredToFlow?: boolean;   // @deferred:flow — owned by a flow, not automated on this screen (H6)
+  ownedByFlow?: string;       // @owned-by:<flow> — the flow that owns this deferred scenario (H6)
 }
 /** Format-tolerant: is this token an ID (project's scheme), not a prose word?
@@ -101,7 +103,10 @@ const PRIORITY_TAGS: Record<string, Priority> = { '@high': 'high', '@normal': 'n
 function classifyScenario(sc: ParsedScenario): ScenarioInfo {
   const tags = sc.tags || [];
-  const manual = tags.includes('@manual');
+  const deferredToFlow = tags.includes('@deferred:flow');
+  const ownedByFlow = (tags.find((t: string) => /^@owned-by:/i.test(t)) || '').slice('@owned-by:'.length) || undefined;
+  // @deferred:flow is owned by a flow → not automated on this screen, so it accounts like @manual (H6).
+  const manual = tags.includes('@manual') || deferredToFlow;
   const casesTag = tags.find((t) => t.startsWith('@cases:'));
   const casesDataset = casesTag ? casesTag.slice('@cases:'.length).trim() : undefined;
   // Named-query references: @query:<name>[(overrides)] tags + inline `query [name]` step refs.
@@ -118,7 +123,10 @@ function classifyScenario(sc: ParsedScenario): ScenarioInfo {
   let priority: Priority = 'unknown';
   for (const t of tags) if (PRIORITY_TAGS[t]) priority = PRIORITY_TAGS[t];
-  const codeMatch = sc.name.match(/\bVP-([A-Z]+)-\d+/i);
+  // Category is everything between `VP-` and the final `-<sequence>` — INCLUDING hyphens, so
+  // compound categories (VP-LIST-DISPLAY-01, VP-ADD-TO-CART-03, VP-PRODUCT-DISCOVERY-02) parse,
+  // not just single-word ones. A single-word category (VP-CART-001) still works. (H1)
+  const codeMatch = sc.name.match(/\bVP-([A-Z]+(?:-[A-Z]+)*)-\d+/i);
   const vpCode = codeMatch ? codeMatch[0].toUpperCase() : undefined;
   const category = codeMatch ? codeMatch[1].toUpperCase() : undefined;
   // Project-scheme ID: the leading token of the title (VP0-001 / MS-HP-001 / VP-LIST-001).
@@ -173,6 +181,8 @@ function classifyScenario(sc: ParsedScenario): ScenarioInfo {
     queryRefs: queryRefs.size ? [...queryRefs] : undefined,
     apiRefs: apiRefs.size ? [...apiRefs] : undefined,
     requiresCaps: requiresCaps.length ? requiresCaps : undefined,
+    deferredToFlow: deferredToFlow || undefined,
+    ownedByFlow,
   };
 }

package/src/harness/quality-gates.ts CHANGED Viewed

@@ -20,7 +20,10 @@ function downstreamRoutes(specText: string): string[] {
   const routes = new Set<string>();
   for (const line of specText.split('\n')) {
     if (!/success|navigat|to \(|→/i.test(line)) continue;
-    for (const m of line.matchAll(/`?(\/[a-z][a-z0-9/_-]+)`?/gi)) {
+    // A real route's leading `/` sits at a path boundary (start, whitespace, backtick, quote, paren),
+    // NOT after a letter/digit. The lookbehind rejects prose slashes like "text/icon" or
+    // "category/brand" that aren't routes at all (H2 — they produced /icon, /button, /brand).
+    for (const m of line.matchAll(/(?<![A-Za-z0-9])(\/[a-z][a-z0-9/_-]+)`?/gi)) {
       const r = m[1];
       if (r !== ownRoute && r.split('/').length > ownRoute.split('/').length - 0) routes.add(r);
     }
@@ -29,6 +32,16 @@ function downstreamRoutes(specText: string): string[] {
   return [...routes].filter((r) => r !== ownRoute && (!ownRoute || r.startsWith(ownRoute + '/') || r.split('/').length >= 3));
 }
+/**
+ * H5 — a @parallel stateful (cart-mutating) flow with NO per-scenario isolation is flaky: scenarios
+ * share state, so cart count/quantity asserts race. Mitigations: @cleanup:cart, @isolate, a fresh
+ * browser context, or a "Given … empty cart" background. Returns true when the risk is unmitigated.
+ */
+export function isolationRisk(featureText: string, stateful: boolean): boolean {
+  if (!stateful || !/@parallel\b/i.test(featureText)) return false;
+  return !/@cleanup:cart\b|@isolate\b|empty cart|fresh (?:browser )?context|new context/i.test(featureText);
+}
 export function downstreamScope(specText: string, scenarios: ScenarioInfo[]): DownstreamResult {
   const routes = downstreamRoutes(specText);
   const underCovered: { route: string; slug: string }[] = [];

package/src/harness/sensors.ts CHANGED Viewed

@@ -11,17 +11,36 @@ import * as path from 'path';
 import { parse as parseYaml } from 'yaml';
 import { ScenarioInfo, ViewpointEntry, idPrefix } from './parse';
-// Business-critical category codes (project VP-<CAT> prefixes). Configurable later.
-const BUSINESS_CRITICAL_CATS = ['LIST', 'CART', 'PRODUCT', 'FILTER', 'CHECKOUT', 'ORDER'];
-// Buckets for coverage-balance.
-const BUCKETS: Record<string, string[]> = {
-  'business-core': BUSINESS_CRITICAL_CATS,
-  'presentation': ['UI'],
-  'validation-security': ['VAL', 'SEC', 'SUB'],
-  'behavior': ['LOGIC'],
-  'navigation': ['NAV'],
-};
+// Business-critical category keywords (matched by CONTAINMENT against the VP category, so a
+// compound category like LIST-DISPLAY / ADD-TO-CART / PRODUCT-DISCOVERY classifies correctly).
+const BUSINESS_CRITICAL_CATS = [
+  // UI commerce cores
+  'LIST', 'CART', 'PRODUCT', 'FILTER', 'CHECKOUT', 'ORDER', 'DETAIL', 'DISCOVERY', 'CATEGORY', 'BRAND', 'DUPLICATE', 'CONSISTENCY',
+  // API / DB capability cores — for an api/db suite the operation IS the business core
+  'API', 'ENDPOINT', 'CRUD', 'QUERY', 'CONTRACT', 'RESOURCE',
+];
+// Bucket keyword sets for coverage-balance, in PRECEDENCE order (first match wins). Matched by
+// substring containment so compound categories land in the right bucket (H1): e.g. LIST-DISPLAY
+// → business-core (LIST) not presentation (DISPLAY); CART-TRANSITION → business-core (CART).
+const BUCKET_ORDER: Array<[string, string[]]> = [
+  ['business-core', BUSINESS_CRITICAL_CATS],
+  ['behavior', ['LOGIC', 'TRANSITION', 'WORKFLOW']],
+  ['validation-security', ['VAL', 'SEC', 'SUB', 'AUTH', 'LOGIN']],
+  ['navigation', ['NAV']],
+  ['presentation', ['UI', 'LAYOUT', 'RESPONSIVE', 'DISPLAY', 'SEO', 'ACCESSIBILITY', 'USABILITY', 'VISUAL']],
+];
+const BUCKETS: Record<string, string[]> = Object.fromEntries(BUCKET_ORDER);
+/** Classify a VP category into a balance bucket by keyword containment + precedence (H1). */
+export function bucketForCategory(category: string | undefined): string {
+  const cat = (category || '').toUpperCase();
+  if (!cat) return 'other';
+  for (const [bucket, kws] of BUCKET_ORDER) {
+    if (kws.some((k) => cat.includes(k))) return bucket;
+  }
+  return 'other';
+}
 export interface ThemeDepth {
   requires: string;           // 'data-assertion' → scenarios on this theme must assert DATA
@@ -242,6 +261,72 @@ export function flowCoveredThemes(
   return out;
 }
+// ---------- Sensor: Flow regression-depth (H3) ----------
+export interface FlowDepthResult {
+  stateful: boolean;       // the suite mutates a cart/checkout collection (add/remove/quantity)
+  countProof: boolean;     // asserts a row count / item quantity, not just presence
+  teardown: boolean;       // removes an item and verifies the empty/zero state
+  multiSource: boolean;    // adds from >1 distinct source (e.g. main list AND recommended)
+  ratio: number;           // covered dimensions / 3 (1 when not stateful → neutral)
+  missing: string[];
+}
+/**
+ * Grades a STATEFUL flow's regression depth beyond "theme covered": a cart/checkout flow that only
+ * proves an item is present is shallower than one that proves the quantity/count, tears the state
+ * back down (remove → empty), and exercises every add-to-cart source. The ratio caps the
+ * businessDepth headroom (audit.ts) so a thin stateful flow can't reach a perfect score. (H3)
+ */
+export function flowRegressionDepth(scenarios: ScenarioInfo[]): FlowDepthResult {
+  const hay = scenarios.map((s) => s.haystack);
+  const any = (re: RegExp) => hay.some((h) => re.test(h));
+  const addsToCart = any(/\b(add to cart|add to basket|added (?:to )?(?:the )?cart|adds? .* cart)\b/i);
+  const stateful = (any(/\b(cart|basket|checkout)\b/i) && (addsToCart || any(/\b(remove|delete|quantity|cart line|cart row)\b/i)));
+  if (!stateful) return { stateful: false, countProof: false, teardown: false, multiSource: false, ratio: 1, missing: [] };
+  // 1. Count/quantity proof — a row count or item quantity, not just presence of a row.
+  const countProof = any(/\b(quantity|qty|two (?:rows|lines|cart)|row count|count column|number of items|one[_ ]row|two[_ ]rows|qty[_ ])/i);
+  // 2. Teardown — removes the item and verifies the empty/zero state (the inverse operation).
+  const teardown = any(/\b(remove|delete|clear)\b/i) && any(/\b(empty|no items|zero|removed|0 items)\b/i);
+  // 3. Multi-source — the cart is fed from >1 source (the main list AND a recommended/related rail).
+  const multiSource = any(/\b(recommended|related|you may also|suggest)\b/i) && addsToCart;
+  const dims: Array<[string, boolean]> = [['count-proof', countProof], ['teardown', teardown], ['multi-source', multiSource]];
+  const missing = dims.filter(([, v]) => !v).map(([k]) => k);
+  return { stateful: true, countProof, teardown, multiSource, ratio: (dims.length - missing.length) / dims.length, missing };
+}
+// ---------- Sensor: Oracle strength (H4) ----------
+export interface OracleStrengthResult {
+  weak: { name: string; hint: string }[];   // scenarios proving facet membership by a name-substring
+  facetClaims: number;                       // scenarios that touch a category/brand facet (denominator)
+  ratio: number;                             // 1 - weak/facetClaims (1 when none) — caps businessDepth
+}
+// "see all [<item name/title>] ... contain(s) {{<facet>.term}}" — asserting every item's NAME carries
+// a category/brand term does NOT prove the item BELONGS to that facet (a "Dress" item need not contain
+// "Dress" in its name). The strong oracle is the results-page title/header, a detail-page facet field,
+// an API/DB query, or an explicit @manual:M2 deferral.
+const WEAK_FACET_ORACLE = /\bsee all\b\s*\[[^\]]*\b(name|title|label)\b[^\]]*\][^{[]*\bcontains?\b[^{]*\{\{[^}]*\b(categ|brand|facet|filter|term)/i;
+const FACET_REF = /\{\{[^}]*\b(categ|brand|facet|filter)\b[^}]*\}\}|\b(category|brand)\b/i;
+export function oracleStrength(scenarios: ScenarioInfo[]): OracleStrengthResult {
+  const weak: { name: string; hint: string }[] = [];
+  for (const s of scenarios) {
+    if (s.manual) continue; // a @manual facet check is a deliberate deferral, not a weak automated oracle
+    if (WEAK_FACET_ORACLE.test(s.stepsText)) {
+      weak.push({
+        name: s.name.slice(0, 80),
+        hint: 'asserting every item NAME contains a category/brand term does not prove facet membership — assert the results-page TITLE/header carries the facet, a detail-page facet field, or an API/DB oracle; or defer the exhaustive check to @manual:M2.',
+      });
+    }
+  }
+  const facetClaims = scenarios.filter((s) => FACET_REF.test(s.stepsText) || FACET_REF.test(s.name)).length;
+  return { weak, facetClaims, ratio: facetClaims ? 1 - weak.length / Math.max(1, facetClaims) : 1 };
+}
 /** Collect data-correctness themes (depth.requires) for a page-type + universal. */
 export function dataThemesFor(catalog: Catalog, pageType: string | null): CatalogTheme[] {
   const themes: CatalogTheme[] = [];
@@ -258,6 +343,7 @@ export interface BalanceResult {
   coreCount: number;
   secondaryCount: number;
   imbalanced: boolean;
+  unclassifiedRatio: number;   // share of scenarios that fell into `other` (taxonomy drift signal, H1)
   note: string;
 }
@@ -270,23 +356,21 @@ export function coverageBalance(scenarios: ScenarioInfo[]): BalanceResult {
   for (const s of scenarios) {
     const cat = s.category || 'NONE';
     byCategory[cat] = (byCategory[cat] || 0) + 1;
-    const bucket = Object.entries(BUCKETS).find(([, cats]) => cats.includes(cat))?.[0] || 'other';
-    byBucket[bucket]++;
+    byBucket[bucketForCategory(s.category)]++;
   }
   const core = byBucket['business-core'];
   const secondary = byBucket['presentation'] + byBucket['validation-security'];
   const imbalanced = secondary > core * 1.5 && core > 0;
-  return {
-    byBucket,
-    byCategory,
-    coreCount: core,
-    secondaryCount: secondary,
-    imbalanced,
-    note: imbalanced
+  const unclassifiedRatio = scenarios.length ? byBucket['other'] / scenarios.length : 0;
+  // A high `other` share means the VP taxonomy drifted from the catalog — the balance axis is then
+  // unreliable, so we surface it (audit.ts caps the balance contribution on this signal).
+  const note = unclassifiedRatio > 0.4
+    ? `Taxonomy drift: ${byBucket['other']}/${scenarios.length} scenarios have an unrecognised VP category (bucket=other) — balance is unreliable until the viewpoint codes match the catalog.`
+    : imbalanced
       ? `Secondary viewpoints (presentation+validation/security = ${secondary}) outweigh business-core (${core}) by >1.5x.`
-      : 'Balanced.',
-  };
+      : 'Balanced.';
+  return { byBucket, byCategory, coreCount: core, secondaryCount: secondary, imbalanced, unclassifiedRatio, note };
 }
 // ---------- Sensor 4: Duplicate clusters ----------
@@ -300,6 +384,10 @@ export interface DuplicateResult {
 export function duplicateClusters(scenarios: ScenarioInfo[]): DuplicateResult {
   const map = new Map<string, ScenarioInfo[]>();
   for (const s of scenarios) {
+    // @manual scenarios compile to a degenerate skeleton (no executable steps), so they cluster
+    // with each other even though each is a distinct judgment/capability-manual viewpoint. Excluding
+    // them keeps the exact-dup signal about genuinely-copied AUTOMATED scenarios (H2).
+    if (s.manual) continue;
     const arr = map.get(s.stepSkeleton) || [];
     arr.push(s);
     map.set(s.stepSkeleton, arr);