@sun-asterisk/sungen 3.2.0-beta.143 → 3.2.0-beta.144

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/cli/commands/audit.d.ts.map +1 -1
  2. package/dist/cli/commands/audit.js +5 -3
  3. package/dist/cli/commands/audit.js.map +1 -1
  4. package/dist/generators/test-generator/code-generator.d.ts.map +1 -1
  5. package/dist/generators/test-generator/code-generator.js +3 -2
  6. package/dist/generators/test-generator/code-generator.js.map +1 -1
  7. package/dist/harness/audit.d.ts +15 -1
  8. package/dist/harness/audit.d.ts.map +1 -1
  9. package/dist/harness/audit.js +60 -7
  10. package/dist/harness/audit.js.map +1 -1
  11. package/dist/harness/capability-plan.d.ts +2 -0
  12. package/dist/harness/capability-plan.d.ts.map +1 -1
  13. package/dist/harness/capability-plan.js +4 -1
  14. package/dist/harness/capability-plan.js.map +1 -1
  15. package/dist/harness/flow-check.d.ts.map +1 -1
  16. package/dist/harness/flow-check.js +13 -4
  17. package/dist/harness/flow-check.js.map +1 -1
  18. package/dist/harness/parse.d.ts +2 -0
  19. package/dist/harness/parse.d.ts.map +1 -1
  20. package/dist/harness/parse.js +10 -2
  21. package/dist/harness/parse.js.map +1 -1
  22. package/dist/harness/quality-gates.d.ts +6 -0
  23. package/dist/harness/quality-gates.d.ts.map +1 -1
  24. package/dist/harness/quality-gates.js +15 -1
  25. package/dist/harness/quality-gates.js.map +1 -1
  26. package/dist/harness/sensors.d.ts +27 -0
  27. package/dist/harness/sensors.d.ts.map +1 -1
  28. package/dist/harness/sensors.js +91 -21
  29. package/dist/harness/sensors.js.map +1 -1
  30. package/package.json +2 -2
  31. package/src/cli/commands/audit.ts +5 -3
  32. package/src/generators/test-generator/code-generator.ts +3 -2
  33. package/src/harness/audit.ts +72 -10
  34. package/src/harness/capability-plan.ts +5 -2
  35. package/src/harness/flow-check.ts +13 -4
  36. package/src/harness/parse.ts +12 -2
  37. package/src/harness/quality-gates.ts +14 -1
  38. package/src/harness/sensors.ts +110 -22
@@ -12,8 +12,8 @@ import { loadScenarios, parseViewpointOverview, ScenarioInfo, ViewpointEntry } f
12
12
  import { featureBasename } from './unit-paths';
13
13
  import {
14
14
  loadCatalog, viewpointGate, assertionDepth, dataThemesFor, depthThresholdFor, coverageBalance, duplicateClusters, traceability, claimProof, taxonomyLint,
15
- automatableManual, flowCoveredThemes,
16
- GateResult, DepthResult, BalanceResult, DuplicateResult, TraceResult, ClaimProofResult, TaxonomyResult, Catalog, AutomatableManualResult,
15
+ automatableManual, flowCoveredThemes, flowRegressionDepth, oracleStrength,
16
+ GateResult, DepthResult, BalanceResult, DuplicateResult, TraceResult, ClaimProofResult, TaxonomyResult, Catalog, AutomatableManualResult, FlowDepthResult, OracleStrengthResult,
17
17
  } from './sensors';
18
18
  import { loadFlowScenarios } from './flow-check';
19
19
  import { manualReasonMismatches, MANUAL_REASONS, buildPlan } from './capability-plan';
@@ -22,7 +22,7 @@ import { readIntent, projectRootFromScreenDir, IntentProfile } from './intent';
22
22
  import { getProvenance, Provenance } from './provenance';
23
23
  import { specCoverage, SpecCoverageResult, parseSpecClauses } from './spec-coverage';
24
24
  import { downstreamScope, manualOracle, readText, DownstreamResult, ManualOracleResult,
25
- negativeSideEffect, sourceBacked, crossArtifactOwnership } from './quality-gates';
25
+ negativeSideEffect, sourceBacked, crossArtifactOwnership, isolationRisk } from './quality-gates';
26
26
  import { viewpointLedger, parseViewpointItems, LedgerResult } from './viewpoint-ledger';
27
27
  import { capabilityRegistry } from '../capabilities/registry';
28
28
  import { discoverAndRegisterCapabilities } from '../capabilities/discover';
@@ -42,6 +42,8 @@ export interface AuditReport {
42
42
  downstream: DownstreamResult; // downstream screens referenced but under-covered
43
43
  manualOracle: ManualOracleResult; // @manual scenarios lacking setup/action/oracle
44
44
  automatableManual: AutomatableManualResult; // @manual that is actually automatable (deferred, not judgment) — TQ-2
45
+ flowDepth: FlowDepthResult; // H3 — stateful-flow regression depth (count / teardown / multi-source)
46
+ oracle: OracleStrengthResult; // H4 — facet-oracle strength (weak name-substring vs title/detail/API/DB)
45
47
  ledger: LedgerResult; // atomic viewpoint-item coverage (per-bullet status)
46
48
  calibration: { // #8 — multi-axis score so a high overall can't hide a weak axis
47
49
  axes: Record<string, number>;
@@ -91,6 +93,20 @@ export function scoringCapabilityFor(catalogScreenName: string, defaultCap: stri
91
93
  return defaultCap;
92
94
  }
93
95
 
96
+ /**
97
+ * H7 — senior-grade band. The top decile (≥9) is reserved for suites that ALSO clear the senior
98
+ * axes: a stateful flow with FULL regression depth (count + teardown + multi-source), no weak facet
99
+ * oracle, and no parallel-cart isolation risk. Otherwise the score is held just below 9 (8.9), so
100
+ * "≥9" means senior-grade — not merely "themes covered". Neutral for screens/api (no signals → 10).
101
+ */
102
+ export function seniorBandedOverall(
103
+ rawOverall: number,
104
+ s: { flowStateful: boolean; flowRatio: number; oracleWeak: number; isolationRisk: boolean },
105
+ ): number {
106
+ const seniorGrade = (!s.flowStateful || s.flowRatio >= 1) && s.oracleWeak === 0 && !s.isolationRisk;
107
+ return Math.min(rawOverall, seniorGrade ? 10 : 8.9);
108
+ }
109
+
94
110
  export function runAudit(screenDir: string, screenName: string): AuditReport {
95
111
  // The feature filename is the unit's LAST segment — an api flow (`flows/<flow>`) lives at
96
112
  // `<dir>/features/<flow>.feature`, not `features/flows/<flow>.feature` (which found 0 scenarios).
@@ -165,16 +181,43 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
165
181
  const ownership = crossArtifactOwnership(screenDir, scenarios);
166
182
  const unsourced = sourceBacked(scenarios, parseSpecClauses(specPath).frs.map((f) => f.id), parseViewpointItems(viewpointPath).map((i) => i.text), viewpoints.map((v) => v.id), featureText);
167
183
 
184
+ // H3 — stateful-flow regression depth. For a UI flow whose scenarios mutate a cart/checkout
185
+ // collection, the regression dimensions (count/quantity proof · teardown · multi-source) cap the
186
+ // businessDepth headroom: it can reach 1.0 only when all three are exercised, so a present-but-
187
+ // shallow flow can't claim a perfect score (floor 0.5 — assertion depth still dominates).
188
+ const isUiFlow = /^flows\//.test(catalogScreenName);
189
+ const flowDepth = isUiFlow ? flowRegressionDepth(scenarios) : { stateful: false, countProof: false, teardown: false, multiSource: false, ratio: 1, missing: [] } as FlowDepthResult;
190
+ const FLOW_DEPTH_FLOOR = 0.5;
191
+ // H4 — oracle strength: a weak facet oracle (name-substring "proves" category/brand membership)
192
+ // caps businessDepth the same way (floor 0.5). A suite with no facet claim, or a strong oracle, is
193
+ // neutral (ratio 1 → no cap), so existing snapshots are unaffected.
194
+ const oracle = oracleStrength(scenarios);
195
+ const ORACLE_FLOOR = 0.5;
196
+
168
197
  // Sub-scores
169
198
  const coverage = gate.coverageRatio;
170
- const businessDepth = depth.bcDepthRatio;
171
- const balanceScore = balance.coreCount + balance.secondaryCount > 0
172
- ? Math.min(1, balance.coreCount / Math.max(1, balance.secondaryCount))
173
- : 1;
199
+ const businessDepth = Math.min(
200
+ depth.bcDepthRatio,
201
+ flowDepth.stateful ? FLOW_DEPTH_FLOOR + (1 - FLOW_DEPTH_FLOOR) * flowDepth.ratio : 1,
202
+ oracle.weak.length ? ORACLE_FLOOR + (1 - ORACLE_FLOOR) * oracle.ratio : 1,
203
+ );
204
+ // When the taxonomy drifted (most scenarios unclassified), the balance axis is unreliable — cap it
205
+ // at 0.5 instead of awarding a vacuous 1.0 so a stale taxonomy fails loudly, not silently (H1).
206
+ const balanceScore = balance.unclassifiedRatio > 0.4
207
+ ? 0.5
208
+ : balance.coreCount + balance.secondaryCount > 0
209
+ ? Math.min(1, balance.coreCount / Math.max(1, balance.secondaryCount))
210
+ : 1;
174
211
  const traceScore = 0.5 * trace.withVpCodeRatio + 0.5 * trace.mappedRatio;
175
212
 
176
213
  // Business-weighted overall (coverage + depth dominate)
177
- const overall = (0.4 * coverage + 0.3 * businessDepth + 0.15 * balanceScore + 0.15 * traceScore) * 10;
214
+ const rawOverall = (0.4 * coverage + 0.3 * businessDepth + 0.15 * balanceScore + 0.15 * traceScore) * 10;
215
+ // H7 — senior-grade band: the top decile (≥9) is reserved for suites that also clear the senior
216
+ // axes — a stateful flow with FULL regression depth (count + teardown + multi-source), no weak
217
+ // facet oracle, and no parallel-cart isolation risk. Otherwise the score is held just below 9, so
218
+ // "≥9" means senior-grade, not merely "themes covered". Neutral for screens/api (no signals).
219
+ const isoRisk = isolationRisk(featureText, flowDepth.stateful);
220
+ const overall = seniorBandedOverall(rawOverall, { flowStateful: flowDepth.stateful, flowRatio: flowDepth.ratio, oracleWeak: oracle.weak.length, isolationRisk: isoRisk });
178
221
 
179
222
  const findings: string[] = [];
180
223
  for (const c of flowCredits) {
@@ -195,6 +238,23 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
195
238
  `add data assertions (\`... with {{value}}\`, \`see all ... contain {{v}}\`) or, if cross-screen, defer to a flow with @manual + reason.`,
196
239
  );
197
240
  }
241
+ // H5 — state isolation: a @parallel stateful flow that mutates the cart needs per-scenario
242
+ // isolation, else count/quantity asserts go flaky. Warn (advisory) when no mitigation is present
243
+ // (@cleanup:cart / @isolate / a "Given … empty cart" background / fresh context).
244
+ if (isoRisk) {
245
+ findings.push('ISOLATION-RISK: this @parallel flow mutates the cart but has no per-scenario isolation → cart/count/quantity asserts can go flaky when scenarios share state. Add `@cleanup:cart` (or `@isolate`, or a `Given User has an empty cart` background) so each scenario starts clean.');
246
+ }
247
+ if (flowDepth.stateful && flowDepth.missing.length) {
248
+ const how: Record<string, string> = {
249
+ 'count-proof': 'assert the cart ROW COUNT / item QUANTITY (e.g. `... table with {{two_rows}}`, `Quantity column with {{qty}}`), not just the row presence',
250
+ 'teardown': 'add a REMOVE/clear scenario that returns the cart to its empty state (the inverse operation)',
251
+ 'multi-source': 'add to the cart from EVERY source on the page (the main list AND the recommended/related rail), not just one',
252
+ };
253
+ findings.push(`FLOW-DEPTH: this stateful flow exercises ${3 - flowDepth.missing.length}/3 regression dimensions — missing [${flowDepth.missing.join(', ')}] → ${flowDepth.missing.map((m) => how[m]).join('; ')}. (businessDepth is capped until covered.)`);
254
+ }
255
+ for (const w of oracle.weak) {
256
+ findings.push(`ORACLE-WEAK: "${w.name}" — ${w.hint}`);
257
+ }
198
258
  for (const u of claim.unproven) {
199
259
  const tag = u.severity === 'fail' ? 'CLAIM-UNPROVEN' : 'CLAIM-WEAK';
200
260
  findings.push(`${tag}: "${u.name}" — title claims [${u.claim}] but steps lack ${u.need}. ${u.hint}`);
@@ -202,7 +262,9 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
202
262
  for (const m of taxonomy.mislabeled) {
203
263
  findings.push(`VP-MISLABEL: "${m.name}" is coded VP-${m.current} but reads as ${m.suggested} (signal: "${m.signal}") → re-tag VP-${m.suggested}-NNN so the coverage matrix isn't skewed.`);
204
264
  }
205
- if (balance.imbalanced) {
265
+ if (balance.unclassifiedRatio > 0.4) {
266
+ findings.push(`TAXONOMY-UNCLASSIFIED: ${balance.note} → align the VP-<CATEGORY> codes with the catalog (or extend the bucket keywords) so coverage-balance is meaningful.`);
267
+ } else if (balance.imbalanced) {
206
268
  findings.push(`BALANCE: ${balance.note} Stop expanding secondary viewpoints until business-core gaps are filled.`);
207
269
  }
208
270
  if (trace.mappedRatio < 0.5) {
@@ -323,7 +385,7 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
323
385
  screen: screenName,
324
386
  scenarioCount: scenarios.length,
325
387
  gate, depth, claim, taxonomy, balance, duplicates, trace, spec,
326
- taxonomyMismatch, downstream, manualOracle: manualOracleResult, automatableManual: autoManual, ledger, calibration,
388
+ taxonomyMismatch, downstream, manualOracle: manualOracleResult, automatableManual: autoManual, flowDepth, oracle, ledger, calibration,
327
389
  score: {
328
390
  overall: Math.round(overall * 10) / 10,
329
391
  coverage: Math.round(coverage * 100) / 100,
@@ -56,7 +56,7 @@ export function classifyReason(text: string): string {
56
56
  return '';
57
57
  }
58
58
 
59
- interface ParsedScenario { name: string; tags: string[]; manual: boolean; reason: string }
59
+ interface ParsedScenario { name: string; tags: string[]; manual: boolean; reason: string; deferredToFlow: boolean; ownedBy?: string }
60
60
 
61
61
  /** Parse scenarios with their tags + the reason comment line above (for @manual). */
62
62
  export function parseScenarios(featurePath: string): ParsedScenario[] {
@@ -84,7 +84,10 @@ export function parseScenarios(featurePath: string): ParsedScenario[] {
84
84
  else if (l === '') continue;
85
85
  else break; // a real step → stop
86
86
  }
87
- out.push({ name: m[1].trim(), tags, manual: tags.some((t) => /^@manual\b/i.test(t)), reason });
87
+ const deferredToFlow = tags.some((t) => /^@deferred:flow$/i.test(t));
88
+ const ownedBy = (tags.find((t) => /^@owned-by:/i.test(t)) || '').slice('@owned-by:'.length) || undefined;
89
+ // @deferred:flow accounts like @manual on the screen (owned by a flow, not automated here) (H6).
90
+ out.push({ name: m[1].trim(), tags, manual: tags.some((t) => /^@manual\b/i.test(t)) || deferredToFlow, reason, deferredToFlow, ownedBy });
88
91
  }
89
92
  return out;
90
93
  }
@@ -74,14 +74,23 @@ export function buildFlowCheck(cwd: string, onlyFlow?: string): FlowCheckReport
74
74
  const deferrals: Deferral[] = [];
75
75
  for (const sc of screens) {
76
76
  for (const s of parseScenarios(featurePath(cwd, 'screens', sc))) {
77
- if (!s.manual || !/deferred to a flow/i.test(s.reason)) continue;
78
- const targets = targetsFromHint(s.reason);
79
- const matches = flowScenarios.filter((fs2) => targets.some((t) => fs2.haystack.includes(t)));
77
+ // A deferral is the first-class `@deferred:flow` tag (H6) OR the legacy `@manual` + a
78
+ // "deferred to a flow" comment (back-compat). Either marks a cross-screen case owned by a flow.
79
+ const isDeferral = s.deferredToFlow || (s.manual && /deferred to a flow/i.test(s.reason));
80
+ if (!isDeferral) continue;
81
+ // Targets come from the comment hint; a tag-only @deferred:flow (no comment) falls back to the
82
+ // scenario TITLE so the covering flow scenario can still be located.
83
+ const targets = targetsFromHint([s.reason, s.name].join(' '));
84
+ // `@owned-by:<flow>` names the owner explicitly → only that flow's scenarios can cover it
85
+ // (a false @owned-by is then surfaced as missing). Else any flow may cover it (legacy).
86
+ const pool = s.ownedBy ? flowScenarios.filter((fs2) => fs2.flow === s.ownedBy) : flowScenarios;
87
+ const matches = pool.filter((fs2) => targets.some((t) => fs2.haystack.includes(t)));
80
88
  let verdict: Deferral['verdict'] = 'missing';
81
89
  let via: string | undefined;
82
90
  if (matches.some((m) => m.deep)) { verdict = 'covered'; via = matches.find((m) => m.deep)!.flow; }
83
91
  else if (matches.length) { verdict = 'shallow'; via = matches[0].flow; }
84
- deferrals.push({ screen: sc, scenario: s.name, hint: s.reason, targets, verdict, via });
92
+ const hint = s.ownedBy ? `${s.reason || 'deferred to a flow'} (owned-by: ${s.ownedBy})` : s.reason;
93
+ deferrals.push({ screen: sc, scenario: s.name, hint, targets, verdict, via });
85
94
  }
86
95
  }
87
96
 
@@ -34,6 +34,8 @@ export interface ScenarioInfo {
34
34
  queryRefs?: string[]; // named queries referenced by this scenario (inline `query [name]` + @query: tags)
35
35
  apiRefs?: string[]; // named API endpoints referenced by this scenario (@api: tags)
36
36
  requiresCaps?: string[]; // @requires:<cap> — automation-ready but needs an opt-in driver (TQ-11)
37
+ deferredToFlow?: boolean; // @deferred:flow — owned by a flow, not automated on this screen (H6)
38
+ ownedByFlow?: string; // @owned-by:<flow> — the flow that owns this deferred scenario (H6)
37
39
  }
38
40
 
39
41
  /** Format-tolerant: is this token an ID (project's scheme), not a prose word?
@@ -101,7 +103,10 @@ const PRIORITY_TAGS: Record<string, Priority> = { '@high': 'high', '@normal': 'n
101
103
 
102
104
  function classifyScenario(sc: ParsedScenario): ScenarioInfo {
103
105
  const tags = sc.tags || [];
104
- const manual = tags.includes('@manual');
106
+ const deferredToFlow = tags.includes('@deferred:flow');
107
+ const ownedByFlow = (tags.find((t: string) => /^@owned-by:/i.test(t)) || '').slice('@owned-by:'.length) || undefined;
108
+ // @deferred:flow is owned by a flow → not automated on this screen, so it accounts like @manual (H6).
109
+ const manual = tags.includes('@manual') || deferredToFlow;
105
110
  const casesTag = tags.find((t) => t.startsWith('@cases:'));
106
111
  const casesDataset = casesTag ? casesTag.slice('@cases:'.length).trim() : undefined;
107
112
  // Named-query references: @query:<name>[(overrides)] tags + inline `query [name]` step refs.
@@ -118,7 +123,10 @@ function classifyScenario(sc: ParsedScenario): ScenarioInfo {
118
123
  let priority: Priority = 'unknown';
119
124
  for (const t of tags) if (PRIORITY_TAGS[t]) priority = PRIORITY_TAGS[t];
120
125
 
121
- const codeMatch = sc.name.match(/\bVP-([A-Z]+)-\d+/i);
126
+ // Category is everything between `VP-` and the final `-<sequence>` — INCLUDING hyphens, so
127
+ // compound categories (VP-LIST-DISPLAY-01, VP-ADD-TO-CART-03, VP-PRODUCT-DISCOVERY-02) parse,
128
+ // not just single-word ones. A single-word category (VP-CART-001) still works. (H1)
129
+ const codeMatch = sc.name.match(/\bVP-([A-Z]+(?:-[A-Z]+)*)-\d+/i);
122
130
  const vpCode = codeMatch ? codeMatch[0].toUpperCase() : undefined;
123
131
  const category = codeMatch ? codeMatch[1].toUpperCase() : undefined;
124
132
  // Project-scheme ID: the leading token of the title (VP0-001 / MS-HP-001 / VP-LIST-001).
@@ -173,6 +181,8 @@ function classifyScenario(sc: ParsedScenario): ScenarioInfo {
173
181
  queryRefs: queryRefs.size ? [...queryRefs] : undefined,
174
182
  apiRefs: apiRefs.size ? [...apiRefs] : undefined,
175
183
  requiresCaps: requiresCaps.length ? requiresCaps : undefined,
184
+ deferredToFlow: deferredToFlow || undefined,
185
+ ownedByFlow,
176
186
  };
177
187
  }
178
188
 
@@ -20,7 +20,10 @@ function downstreamRoutes(specText: string): string[] {
20
20
  const routes = new Set<string>();
21
21
  for (const line of specText.split('\n')) {
22
22
  if (!/success|navigat|to \(|→/i.test(line)) continue;
23
- for (const m of line.matchAll(/`?(\/[a-z][a-z0-9/_-]+)`?/gi)) {
23
+ // A real route's leading `/` sits at a path boundary (start, whitespace, backtick, quote, paren),
24
+ // NOT after a letter/digit. The lookbehind rejects prose slashes like "text/icon" or
25
+ // "category/brand" that aren't routes at all (H2 — they produced /icon, /button, /brand).
26
+ for (const m of line.matchAll(/(?<![A-Za-z0-9])(\/[a-z][a-z0-9/_-]+)`?/gi)) {
24
27
  const r = m[1];
25
28
  if (r !== ownRoute && r.split('/').length > ownRoute.split('/').length - 0) routes.add(r);
26
29
  }
@@ -29,6 +32,16 @@ function downstreamRoutes(specText: string): string[] {
29
32
  return [...routes].filter((r) => r !== ownRoute && (!ownRoute || r.startsWith(ownRoute + '/') || r.split('/').length >= 3));
30
33
  }
31
34
 
35
+ /**
36
+ * H5 — a @parallel stateful (cart-mutating) flow with NO per-scenario isolation is flaky: scenarios
37
+ * share state, so cart count/quantity asserts race. Mitigations: @cleanup:cart, @isolate, a fresh
38
+ * browser context, or a "Given … empty cart" background. Returns true when the risk is unmitigated.
39
+ */
40
+ export function isolationRisk(featureText: string, stateful: boolean): boolean {
41
+ if (!stateful || !/@parallel\b/i.test(featureText)) return false;
42
+ return !/@cleanup:cart\b|@isolate\b|empty cart|fresh (?:browser )?context|new context/i.test(featureText);
43
+ }
44
+
32
45
  export function downstreamScope(specText: string, scenarios: ScenarioInfo[]): DownstreamResult {
33
46
  const routes = downstreamRoutes(specText);
34
47
  const underCovered: { route: string; slug: string }[] = [];
@@ -11,17 +11,36 @@ import * as path from 'path';
11
11
  import { parse as parseYaml } from 'yaml';
12
12
  import { ScenarioInfo, ViewpointEntry, idPrefix } from './parse';
13
13
 
14
- // Business-critical category codes (project VP-<CAT> prefixes). Configurable later.
15
- const BUSINESS_CRITICAL_CATS = ['LIST', 'CART', 'PRODUCT', 'FILTER', 'CHECKOUT', 'ORDER'];
16
-
17
- // Buckets for coverage-balance.
18
- const BUCKETS: Record<string, string[]> = {
19
- 'business-core': BUSINESS_CRITICAL_CATS,
20
- 'presentation': ['UI'],
21
- 'validation-security': ['VAL', 'SEC', 'SUB'],
22
- 'behavior': ['LOGIC'],
23
- 'navigation': ['NAV'],
24
- };
14
+ // Business-critical category keywords (matched by CONTAINMENT against the VP category, so a
15
+ // compound category like LIST-DISPLAY / ADD-TO-CART / PRODUCT-DISCOVERY classifies correctly).
16
+ const BUSINESS_CRITICAL_CATS = [
17
+ // UI commerce cores
18
+ 'LIST', 'CART', 'PRODUCT', 'FILTER', 'CHECKOUT', 'ORDER', 'DETAIL', 'DISCOVERY', 'CATEGORY', 'BRAND', 'DUPLICATE', 'CONSISTENCY',
19
+ // API / DB capability cores — for an api/db suite the operation IS the business core
20
+ 'API', 'ENDPOINT', 'CRUD', 'QUERY', 'CONTRACT', 'RESOURCE',
21
+ ];
22
+
23
+ // Bucket keyword sets for coverage-balance, in PRECEDENCE order (first match wins). Matched by
24
+ // substring containment so compound categories land in the right bucket (H1): e.g. LIST-DISPLAY
25
+ // → business-core (LIST) not presentation (DISPLAY); CART-TRANSITION → business-core (CART).
26
+ const BUCKET_ORDER: Array<[string, string[]]> = [
27
+ ['business-core', BUSINESS_CRITICAL_CATS],
28
+ ['behavior', ['LOGIC', 'TRANSITION', 'WORKFLOW']],
29
+ ['validation-security', ['VAL', 'SEC', 'SUB', 'AUTH', 'LOGIN']],
30
+ ['navigation', ['NAV']],
31
+ ['presentation', ['UI', 'LAYOUT', 'RESPONSIVE', 'DISPLAY', 'SEO', 'ACCESSIBILITY', 'USABILITY', 'VISUAL']],
32
+ ];
33
+ const BUCKETS: Record<string, string[]> = Object.fromEntries(BUCKET_ORDER);
34
+
35
+ /** Classify a VP category into a balance bucket by keyword containment + precedence (H1). */
36
+ export function bucketForCategory(category: string | undefined): string {
37
+ const cat = (category || '').toUpperCase();
38
+ if (!cat) return 'other';
39
+ for (const [bucket, kws] of BUCKET_ORDER) {
40
+ if (kws.some((k) => cat.includes(k))) return bucket;
41
+ }
42
+ return 'other';
43
+ }
25
44
 
26
45
  export interface ThemeDepth {
27
46
  requires: string; // 'data-assertion' → scenarios on this theme must assert DATA
@@ -242,6 +261,72 @@ export function flowCoveredThemes(
242
261
  return out;
243
262
  }
244
263
 
264
+ // ---------- Sensor: Flow regression-depth (H3) ----------
265
+
266
+ export interface FlowDepthResult {
267
+ stateful: boolean; // the suite mutates a cart/checkout collection (add/remove/quantity)
268
+ countProof: boolean; // asserts a row count / item quantity, not just presence
269
+ teardown: boolean; // removes an item and verifies the empty/zero state
270
+ multiSource: boolean; // adds from >1 distinct source (e.g. main list AND recommended)
271
+ ratio: number; // covered dimensions / 3 (1 when not stateful → neutral)
272
+ missing: string[];
273
+ }
274
+
275
+ /**
276
+ * Grades a STATEFUL flow's regression depth beyond "theme covered": a cart/checkout flow that only
277
+ * proves an item is present is shallower than one that proves the quantity/count, tears the state
278
+ * back down (remove → empty), and exercises every add-to-cart source. The ratio caps the
279
+ * businessDepth headroom (audit.ts) so a thin stateful flow can't reach a perfect score. (H3)
280
+ */
281
+ export function flowRegressionDepth(scenarios: ScenarioInfo[]): FlowDepthResult {
282
+ const hay = scenarios.map((s) => s.haystack);
283
+ const any = (re: RegExp) => hay.some((h) => re.test(h));
284
+ const addsToCart = any(/\b(add to cart|add to basket|added (?:to )?(?:the )?cart|adds? .* cart)\b/i);
285
+ const stateful = (any(/\b(cart|basket|checkout)\b/i) && (addsToCart || any(/\b(remove|delete|quantity|cart line|cart row)\b/i)));
286
+ if (!stateful) return { stateful: false, countProof: false, teardown: false, multiSource: false, ratio: 1, missing: [] };
287
+
288
+ // 1. Count/quantity proof — a row count or item quantity, not just presence of a row.
289
+ const countProof = any(/\b(quantity|qty|two (?:rows|lines|cart)|row count|count column|number of items|one[_ ]row|two[_ ]rows|qty[_ ])/i);
290
+ // 2. Teardown — removes the item and verifies the empty/zero state (the inverse operation).
291
+ const teardown = any(/\b(remove|delete|clear)\b/i) && any(/\b(empty|no items|zero|removed|0 items)\b/i);
292
+ // 3. Multi-source — the cart is fed from >1 source (the main list AND a recommended/related rail).
293
+ const multiSource = any(/\b(recommended|related|you may also|suggest)\b/i) && addsToCart;
294
+
295
+ const dims: Array<[string, boolean]> = [['count-proof', countProof], ['teardown', teardown], ['multi-source', multiSource]];
296
+ const missing = dims.filter(([, v]) => !v).map(([k]) => k);
297
+ return { stateful: true, countProof, teardown, multiSource, ratio: (dims.length - missing.length) / dims.length, missing };
298
+ }
299
+
300
+ // ---------- Sensor: Oracle strength (H4) ----------
301
+
302
+ export interface OracleStrengthResult {
303
+ weak: { name: string; hint: string }[]; // scenarios proving facet membership by a name-substring
304
+ facetClaims: number; // scenarios that touch a category/brand facet (denominator)
305
+ ratio: number; // 1 - weak/facetClaims (1 when none) — caps businessDepth
306
+ }
307
+
308
+ // "see all [<item name/title>] ... contain(s) {{<facet>.term}}" — asserting every item's NAME carries
309
+ // a category/brand term does NOT prove the item BELONGS to that facet (a "Dress" item need not contain
310
+ // "Dress" in its name). The strong oracle is the results-page title/header, a detail-page facet field,
311
+ // an API/DB query, or an explicit @manual:M2 deferral.
312
+ const WEAK_FACET_ORACLE = /\bsee all\b\s*\[[^\]]*\b(name|title|label)\b[^\]]*\][^{[]*\bcontains?\b[^{]*\{\{[^}]*\b(categ|brand|facet|filter|term)/i;
313
+ const FACET_REF = /\{\{[^}]*\b(categ|brand|facet|filter)\b[^}]*\}\}|\b(category|brand)\b/i;
314
+
315
+ export function oracleStrength(scenarios: ScenarioInfo[]): OracleStrengthResult {
316
+ const weak: { name: string; hint: string }[] = [];
317
+ for (const s of scenarios) {
318
+ if (s.manual) continue; // a @manual facet check is a deliberate deferral, not a weak automated oracle
319
+ if (WEAK_FACET_ORACLE.test(s.stepsText)) {
320
+ weak.push({
321
+ name: s.name.slice(0, 80),
322
+ hint: 'asserting every item NAME contains a category/brand term does not prove facet membership — assert the results-page TITLE/header carries the facet, a detail-page facet field, or an API/DB oracle; or defer the exhaustive check to @manual:M2.',
323
+ });
324
+ }
325
+ }
326
+ const facetClaims = scenarios.filter((s) => FACET_REF.test(s.stepsText) || FACET_REF.test(s.name)).length;
327
+ return { weak, facetClaims, ratio: facetClaims ? 1 - weak.length / Math.max(1, facetClaims) : 1 };
328
+ }
329
+
245
330
  /** Collect data-correctness themes (depth.requires) for a page-type + universal. */
246
331
  export function dataThemesFor(catalog: Catalog, pageType: string | null): CatalogTheme[] {
247
332
  const themes: CatalogTheme[] = [];
@@ -258,6 +343,7 @@ export interface BalanceResult {
258
343
  coreCount: number;
259
344
  secondaryCount: number;
260
345
  imbalanced: boolean;
346
+ unclassifiedRatio: number; // share of scenarios that fell into `other` (taxonomy drift signal, H1)
261
347
  note: string;
262
348
  }
263
349
 
@@ -270,23 +356,21 @@ export function coverageBalance(scenarios: ScenarioInfo[]): BalanceResult {
270
356
  for (const s of scenarios) {
271
357
  const cat = s.category || 'NONE';
272
358
  byCategory[cat] = (byCategory[cat] || 0) + 1;
273
- const bucket = Object.entries(BUCKETS).find(([, cats]) => cats.includes(cat))?.[0] || 'other';
274
- byBucket[bucket]++;
359
+ byBucket[bucketForCategory(s.category)]++;
275
360
  }
276
361
 
277
362
  const core = byBucket['business-core'];
278
363
  const secondary = byBucket['presentation'] + byBucket['validation-security'];
279
364
  const imbalanced = secondary > core * 1.5 && core > 0;
280
- return {
281
- byBucket,
282
- byCategory,
283
- coreCount: core,
284
- secondaryCount: secondary,
285
- imbalanced,
286
- note: imbalanced
365
+ const unclassifiedRatio = scenarios.length ? byBucket['other'] / scenarios.length : 0;
366
+ // A high `other` share means the VP taxonomy drifted from the catalog — the balance axis is then
367
+ // unreliable, so we surface it (audit.ts caps the balance contribution on this signal).
368
+ const note = unclassifiedRatio > 0.4
369
+ ? `Taxonomy drift: ${byBucket['other']}/${scenarios.length} scenarios have an unrecognised VP category (bucket=other) — balance is unreliable until the viewpoint codes match the catalog.`
370
+ : imbalanced
287
371
  ? `Secondary viewpoints (presentation+validation/security = ${secondary}) outweigh business-core (${core}) by >1.5x.`
288
- : 'Balanced.',
289
- };
372
+ : 'Balanced.';
373
+ return { byBucket, byCategory, coreCount: core, secondaryCount: secondary, imbalanced, unclassifiedRatio, note };
290
374
  }
291
375
 
292
376
  // ---------- Sensor 4: Duplicate clusters ----------
@@ -300,6 +384,10 @@ export interface DuplicateResult {
300
384
  export function duplicateClusters(scenarios: ScenarioInfo[]): DuplicateResult {
301
385
  const map = new Map<string, ScenarioInfo[]>();
302
386
  for (const s of scenarios) {
387
+ // @manual scenarios compile to a degenerate skeleton (no executable steps), so they cluster
388
+ // with each other even though each is a distinct judgment/capability-manual viewpoint. Excluding
389
+ // them keeps the exact-dup signal about genuinely-copied AUTOMATED scenarios (H2).
390
+ if (s.manual) continue;
303
391
  const arr = map.get(s.stepSkeleton) || [];
304
392
  arr.push(s);
305
393
  map.set(s.stepSkeleton, arr);