@sun-asterisk/sungen 3.2.0-beta.143 → 3.2.0-beta.144
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/audit.d.ts.map +1 -1
- package/dist/cli/commands/audit.js +5 -3
- package/dist/cli/commands/audit.js.map +1 -1
- package/dist/generators/test-generator/code-generator.d.ts.map +1 -1
- package/dist/generators/test-generator/code-generator.js +3 -2
- package/dist/generators/test-generator/code-generator.js.map +1 -1
- package/dist/harness/audit.d.ts +15 -1
- package/dist/harness/audit.d.ts.map +1 -1
- package/dist/harness/audit.js +60 -7
- package/dist/harness/audit.js.map +1 -1
- package/dist/harness/capability-plan.d.ts +2 -0
- package/dist/harness/capability-plan.d.ts.map +1 -1
- package/dist/harness/capability-plan.js +4 -1
- package/dist/harness/capability-plan.js.map +1 -1
- package/dist/harness/flow-check.d.ts.map +1 -1
- package/dist/harness/flow-check.js +13 -4
- package/dist/harness/flow-check.js.map +1 -1
- package/dist/harness/parse.d.ts +2 -0
- package/dist/harness/parse.d.ts.map +1 -1
- package/dist/harness/parse.js +10 -2
- package/dist/harness/parse.js.map +1 -1
- package/dist/harness/quality-gates.d.ts +6 -0
- package/dist/harness/quality-gates.d.ts.map +1 -1
- package/dist/harness/quality-gates.js +15 -1
- package/dist/harness/quality-gates.js.map +1 -1
- package/dist/harness/sensors.d.ts +27 -0
- package/dist/harness/sensors.d.ts.map +1 -1
- package/dist/harness/sensors.js +91 -21
- package/dist/harness/sensors.js.map +1 -1
- package/package.json +2 -2
- package/src/cli/commands/audit.ts +5 -3
- package/src/generators/test-generator/code-generator.ts +3 -2
- package/src/harness/audit.ts +72 -10
- package/src/harness/capability-plan.ts +5 -2
- package/src/harness/flow-check.ts +13 -4
- package/src/harness/parse.ts +12 -2
- package/src/harness/quality-gates.ts +14 -1
- package/src/harness/sensors.ts +110 -22
package/src/harness/audit.ts
CHANGED
|
@@ -12,8 +12,8 @@ import { loadScenarios, parseViewpointOverview, ScenarioInfo, ViewpointEntry } f
|
|
|
12
12
|
import { featureBasename } from './unit-paths';
|
|
13
13
|
import {
|
|
14
14
|
loadCatalog, viewpointGate, assertionDepth, dataThemesFor, depthThresholdFor, coverageBalance, duplicateClusters, traceability, claimProof, taxonomyLint,
|
|
15
|
-
automatableManual, flowCoveredThemes,
|
|
16
|
-
GateResult, DepthResult, BalanceResult, DuplicateResult, TraceResult, ClaimProofResult, TaxonomyResult, Catalog, AutomatableManualResult,
|
|
15
|
+
automatableManual, flowCoveredThemes, flowRegressionDepth, oracleStrength,
|
|
16
|
+
GateResult, DepthResult, BalanceResult, DuplicateResult, TraceResult, ClaimProofResult, TaxonomyResult, Catalog, AutomatableManualResult, FlowDepthResult, OracleStrengthResult,
|
|
17
17
|
} from './sensors';
|
|
18
18
|
import { loadFlowScenarios } from './flow-check';
|
|
19
19
|
import { manualReasonMismatches, MANUAL_REASONS, buildPlan } from './capability-plan';
|
|
@@ -22,7 +22,7 @@ import { readIntent, projectRootFromScreenDir, IntentProfile } from './intent';
|
|
|
22
22
|
import { getProvenance, Provenance } from './provenance';
|
|
23
23
|
import { specCoverage, SpecCoverageResult, parseSpecClauses } from './spec-coverage';
|
|
24
24
|
import { downstreamScope, manualOracle, readText, DownstreamResult, ManualOracleResult,
|
|
25
|
-
negativeSideEffect, sourceBacked, crossArtifactOwnership } from './quality-gates';
|
|
25
|
+
negativeSideEffect, sourceBacked, crossArtifactOwnership, isolationRisk } from './quality-gates';
|
|
26
26
|
import { viewpointLedger, parseViewpointItems, LedgerResult } from './viewpoint-ledger';
|
|
27
27
|
import { capabilityRegistry } from '../capabilities/registry';
|
|
28
28
|
import { discoverAndRegisterCapabilities } from '../capabilities/discover';
|
|
@@ -42,6 +42,8 @@ export interface AuditReport {
|
|
|
42
42
|
downstream: DownstreamResult; // downstream screens referenced but under-covered
|
|
43
43
|
manualOracle: ManualOracleResult; // @manual scenarios lacking setup/action/oracle
|
|
44
44
|
automatableManual: AutomatableManualResult; // @manual that is actually automatable (deferred, not judgment) — TQ-2
|
|
45
|
+
flowDepth: FlowDepthResult; // H3 — stateful-flow regression depth (count / teardown / multi-source)
|
|
46
|
+
oracle: OracleStrengthResult; // H4 — facet-oracle strength (weak name-substring vs title/detail/API/DB)
|
|
45
47
|
ledger: LedgerResult; // atomic viewpoint-item coverage (per-bullet status)
|
|
46
48
|
calibration: { // #8 — multi-axis score so a high overall can't hide a weak axis
|
|
47
49
|
axes: Record<string, number>;
|
|
@@ -91,6 +93,20 @@ export function scoringCapabilityFor(catalogScreenName: string, defaultCap: stri
|
|
|
91
93
|
return defaultCap;
|
|
92
94
|
}
|
|
93
95
|
|
|
96
|
+
/**
|
|
97
|
+
* H7 — senior-grade band. The top decile (≥9) is reserved for suites that ALSO clear the senior
|
|
98
|
+
* axes: a stateful flow with FULL regression depth (count + teardown + multi-source), no weak facet
|
|
99
|
+
* oracle, and no parallel-cart isolation risk. Otherwise the score is held just below 9 (8.9), so
|
|
100
|
+
* "≥9" means senior-grade — not merely "themes covered". Neutral for screens/api (no signals → 10).
|
|
101
|
+
*/
|
|
102
|
+
export function seniorBandedOverall(
|
|
103
|
+
rawOverall: number,
|
|
104
|
+
s: { flowStateful: boolean; flowRatio: number; oracleWeak: number; isolationRisk: boolean },
|
|
105
|
+
): number {
|
|
106
|
+
const seniorGrade = (!s.flowStateful || s.flowRatio >= 1) && s.oracleWeak === 0 && !s.isolationRisk;
|
|
107
|
+
return Math.min(rawOverall, seniorGrade ? 10 : 8.9);
|
|
108
|
+
}
|
|
109
|
+
|
|
94
110
|
export function runAudit(screenDir: string, screenName: string): AuditReport {
|
|
95
111
|
// The feature filename is the unit's LAST segment — an api flow (`flows/<flow>`) lives at
|
|
96
112
|
// `<dir>/features/<flow>.feature`, not `features/flows/<flow>.feature` (which found 0 scenarios).
|
|
@@ -165,16 +181,43 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
|
|
|
165
181
|
const ownership = crossArtifactOwnership(screenDir, scenarios);
|
|
166
182
|
const unsourced = sourceBacked(scenarios, parseSpecClauses(specPath).frs.map((f) => f.id), parseViewpointItems(viewpointPath).map((i) => i.text), viewpoints.map((v) => v.id), featureText);
|
|
167
183
|
|
|
184
|
+
// H3 — stateful-flow regression depth. For a UI flow whose scenarios mutate a cart/checkout
|
|
185
|
+
// collection, the regression dimensions (count/quantity proof · teardown · multi-source) cap the
|
|
186
|
+
// businessDepth headroom: it can reach 1.0 only when all three are exercised, so a present-but-
|
|
187
|
+
// shallow flow can't claim a perfect score (floor 0.5 — assertion depth still dominates).
|
|
188
|
+
const isUiFlow = /^flows\//.test(catalogScreenName);
|
|
189
|
+
const flowDepth = isUiFlow ? flowRegressionDepth(scenarios) : { stateful: false, countProof: false, teardown: false, multiSource: false, ratio: 1, missing: [] } as FlowDepthResult;
|
|
190
|
+
const FLOW_DEPTH_FLOOR = 0.5;
|
|
191
|
+
// H4 — oracle strength: a weak facet oracle (name-substring "proves" category/brand membership)
|
|
192
|
+
// caps businessDepth the same way (floor 0.5). A suite with no facet claim, or a strong oracle, is
|
|
193
|
+
// neutral (ratio 1 → no cap), so existing snapshots are unaffected.
|
|
194
|
+
const oracle = oracleStrength(scenarios);
|
|
195
|
+
const ORACLE_FLOOR = 0.5;
|
|
196
|
+
|
|
168
197
|
// Sub-scores
|
|
169
198
|
const coverage = gate.coverageRatio;
|
|
170
|
-
const businessDepth =
|
|
171
|
-
|
|
172
|
-
?
|
|
173
|
-
: 1
|
|
199
|
+
const businessDepth = Math.min(
|
|
200
|
+
depth.bcDepthRatio,
|
|
201
|
+
flowDepth.stateful ? FLOW_DEPTH_FLOOR + (1 - FLOW_DEPTH_FLOOR) * flowDepth.ratio : 1,
|
|
202
|
+
oracle.weak.length ? ORACLE_FLOOR + (1 - ORACLE_FLOOR) * oracle.ratio : 1,
|
|
203
|
+
);
|
|
204
|
+
// When the taxonomy drifted (most scenarios unclassified), the balance axis is unreliable — cap it
|
|
205
|
+
// at 0.5 instead of awarding a vacuous 1.0 so a stale taxonomy fails loudly, not silently (H1).
|
|
206
|
+
const balanceScore = balance.unclassifiedRatio > 0.4
|
|
207
|
+
? 0.5
|
|
208
|
+
: balance.coreCount + balance.secondaryCount > 0
|
|
209
|
+
? Math.min(1, balance.coreCount / Math.max(1, balance.secondaryCount))
|
|
210
|
+
: 1;
|
|
174
211
|
const traceScore = 0.5 * trace.withVpCodeRatio + 0.5 * trace.mappedRatio;
|
|
175
212
|
|
|
176
213
|
// Business-weighted overall (coverage + depth dominate)
|
|
177
|
-
const
|
|
214
|
+
const rawOverall = (0.4 * coverage + 0.3 * businessDepth + 0.15 * balanceScore + 0.15 * traceScore) * 10;
|
|
215
|
+
// H7 — senior-grade band: the top decile (≥9) is reserved for suites that also clear the senior
|
|
216
|
+
// axes — a stateful flow with FULL regression depth (count + teardown + multi-source), no weak
|
|
217
|
+
// facet oracle, and no parallel-cart isolation risk. Otherwise the score is held just below 9, so
|
|
218
|
+
// "≥9" means senior-grade, not merely "themes covered". Neutral for screens/api (no signals).
|
|
219
|
+
const isoRisk = isolationRisk(featureText, flowDepth.stateful);
|
|
220
|
+
const overall = seniorBandedOverall(rawOverall, { flowStateful: flowDepth.stateful, flowRatio: flowDepth.ratio, oracleWeak: oracle.weak.length, isolationRisk: isoRisk });
|
|
178
221
|
|
|
179
222
|
const findings: string[] = [];
|
|
180
223
|
for (const c of flowCredits) {
|
|
@@ -195,6 +238,23 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
|
|
|
195
238
|
`add data assertions (\`... with {{value}}\`, \`see all ... contain {{v}}\`) or, if cross-screen, defer to a flow with @manual + reason.`,
|
|
196
239
|
);
|
|
197
240
|
}
|
|
241
|
+
// H5 — state isolation: a @parallel stateful flow that mutates the cart needs per-scenario
|
|
242
|
+
// isolation, else count/quantity asserts go flaky. Warn (advisory) when no mitigation is present
|
|
243
|
+
// (@cleanup:cart / @isolate / a "Given … empty cart" background / fresh context).
|
|
244
|
+
if (isoRisk) {
|
|
245
|
+
findings.push('ISOLATION-RISK: this @parallel flow mutates the cart but has no per-scenario isolation → cart/count/quantity asserts can go flaky when scenarios share state. Add `@cleanup:cart` (or `@isolate`, or a `Given User has an empty cart` background) so each scenario starts clean.');
|
|
246
|
+
}
|
|
247
|
+
if (flowDepth.stateful && flowDepth.missing.length) {
|
|
248
|
+
const how: Record<string, string> = {
|
|
249
|
+
'count-proof': 'assert the cart ROW COUNT / item QUANTITY (e.g. `... table with {{two_rows}}`, `Quantity column with {{qty}}`), not just the row presence',
|
|
250
|
+
'teardown': 'add a REMOVE/clear scenario that returns the cart to its empty state (the inverse operation)',
|
|
251
|
+
'multi-source': 'add to the cart from EVERY source on the page (the main list AND the recommended/related rail), not just one',
|
|
252
|
+
};
|
|
253
|
+
findings.push(`FLOW-DEPTH: this stateful flow exercises ${3 - flowDepth.missing.length}/3 regression dimensions — missing [${flowDepth.missing.join(', ')}] → ${flowDepth.missing.map((m) => how[m]).join('; ')}. (businessDepth is capped until covered.)`);
|
|
254
|
+
}
|
|
255
|
+
for (const w of oracle.weak) {
|
|
256
|
+
findings.push(`ORACLE-WEAK: "${w.name}" — ${w.hint}`);
|
|
257
|
+
}
|
|
198
258
|
for (const u of claim.unproven) {
|
|
199
259
|
const tag = u.severity === 'fail' ? 'CLAIM-UNPROVEN' : 'CLAIM-WEAK';
|
|
200
260
|
findings.push(`${tag}: "${u.name}" — title claims [${u.claim}] but steps lack ${u.need}. ${u.hint}`);
|
|
@@ -202,7 +262,9 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
|
|
|
202
262
|
for (const m of taxonomy.mislabeled) {
|
|
203
263
|
findings.push(`VP-MISLABEL: "${m.name}" is coded VP-${m.current} but reads as ${m.suggested} (signal: "${m.signal}") → re-tag VP-${m.suggested}-NNN so the coverage matrix isn't skewed.`);
|
|
204
264
|
}
|
|
205
|
-
if (balance.
|
|
265
|
+
if (balance.unclassifiedRatio > 0.4) {
|
|
266
|
+
findings.push(`TAXONOMY-UNCLASSIFIED: ${balance.note} → align the VP-<CATEGORY> codes with the catalog (or extend the bucket keywords) so coverage-balance is meaningful.`);
|
|
267
|
+
} else if (balance.imbalanced) {
|
|
206
268
|
findings.push(`BALANCE: ${balance.note} Stop expanding secondary viewpoints until business-core gaps are filled.`);
|
|
207
269
|
}
|
|
208
270
|
if (trace.mappedRatio < 0.5) {
|
|
@@ -323,7 +385,7 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
|
|
|
323
385
|
screen: screenName,
|
|
324
386
|
scenarioCount: scenarios.length,
|
|
325
387
|
gate, depth, claim, taxonomy, balance, duplicates, trace, spec,
|
|
326
|
-
taxonomyMismatch, downstream, manualOracle: manualOracleResult, automatableManual: autoManual, ledger, calibration,
|
|
388
|
+
taxonomyMismatch, downstream, manualOracle: manualOracleResult, automatableManual: autoManual, flowDepth, oracle, ledger, calibration,
|
|
327
389
|
score: {
|
|
328
390
|
overall: Math.round(overall * 10) / 10,
|
|
329
391
|
coverage: Math.round(coverage * 100) / 100,
|
|
@@ -56,7 +56,7 @@ export function classifyReason(text: string): string {
|
|
|
56
56
|
return '';
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
-
interface ParsedScenario { name: string; tags: string[]; manual: boolean; reason: string }
|
|
59
|
+
interface ParsedScenario { name: string; tags: string[]; manual: boolean; reason: string; deferredToFlow: boolean; ownedBy?: string }
|
|
60
60
|
|
|
61
61
|
/** Parse scenarios with their tags + the reason comment line above (for @manual). */
|
|
62
62
|
export function parseScenarios(featurePath: string): ParsedScenario[] {
|
|
@@ -84,7 +84,10 @@ export function parseScenarios(featurePath: string): ParsedScenario[] {
|
|
|
84
84
|
else if (l === '') continue;
|
|
85
85
|
else break; // a real step → stop
|
|
86
86
|
}
|
|
87
|
-
|
|
87
|
+
const deferredToFlow = tags.some((t) => /^@deferred:flow$/i.test(t));
|
|
88
|
+
const ownedBy = (tags.find((t) => /^@owned-by:/i.test(t)) || '').slice('@owned-by:'.length) || undefined;
|
|
89
|
+
// @deferred:flow accounts like @manual on the screen (owned by a flow, not automated here) (H6).
|
|
90
|
+
out.push({ name: m[1].trim(), tags, manual: tags.some((t) => /^@manual\b/i.test(t)) || deferredToFlow, reason, deferredToFlow, ownedBy });
|
|
88
91
|
}
|
|
89
92
|
return out;
|
|
90
93
|
}
|
|
@@ -74,14 +74,23 @@ export function buildFlowCheck(cwd: string, onlyFlow?: string): FlowCheckReport
|
|
|
74
74
|
const deferrals: Deferral[] = [];
|
|
75
75
|
for (const sc of screens) {
|
|
76
76
|
for (const s of parseScenarios(featurePath(cwd, 'screens', sc))) {
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
const
|
|
77
|
+
// A deferral is the first-class `@deferred:flow` tag (H6) OR the legacy `@manual` + a
|
|
78
|
+
// "deferred to a flow" comment (back-compat). Either marks a cross-screen case owned by a flow.
|
|
79
|
+
const isDeferral = s.deferredToFlow || (s.manual && /deferred to a flow/i.test(s.reason));
|
|
80
|
+
if (!isDeferral) continue;
|
|
81
|
+
// Targets come from the comment hint; a tag-only @deferred:flow (no comment) falls back to the
|
|
82
|
+
// scenario TITLE so the covering flow scenario can still be located.
|
|
83
|
+
const targets = targetsFromHint([s.reason, s.name].join(' '));
|
|
84
|
+
// `@owned-by:<flow>` names the owner explicitly → only that flow's scenarios can cover it
|
|
85
|
+
// (a false @owned-by is then surfaced as missing). Else any flow may cover it (legacy).
|
|
86
|
+
const pool = s.ownedBy ? flowScenarios.filter((fs2) => fs2.flow === s.ownedBy) : flowScenarios;
|
|
87
|
+
const matches = pool.filter((fs2) => targets.some((t) => fs2.haystack.includes(t)));
|
|
80
88
|
let verdict: Deferral['verdict'] = 'missing';
|
|
81
89
|
let via: string | undefined;
|
|
82
90
|
if (matches.some((m) => m.deep)) { verdict = 'covered'; via = matches.find((m) => m.deep)!.flow; }
|
|
83
91
|
else if (matches.length) { verdict = 'shallow'; via = matches[0].flow; }
|
|
84
|
-
|
|
92
|
+
const hint = s.ownedBy ? `${s.reason || 'deferred to a flow'} (owned-by: ${s.ownedBy})` : s.reason;
|
|
93
|
+
deferrals.push({ screen: sc, scenario: s.name, hint, targets, verdict, via });
|
|
85
94
|
}
|
|
86
95
|
}
|
|
87
96
|
|
package/src/harness/parse.ts
CHANGED
|
@@ -34,6 +34,8 @@ export interface ScenarioInfo {
|
|
|
34
34
|
queryRefs?: string[]; // named queries referenced by this scenario (inline `query [name]` + @query: tags)
|
|
35
35
|
apiRefs?: string[]; // named API endpoints referenced by this scenario (@api: tags)
|
|
36
36
|
requiresCaps?: string[]; // @requires:<cap> — automation-ready but needs an opt-in driver (TQ-11)
|
|
37
|
+
deferredToFlow?: boolean; // @deferred:flow — owned by a flow, not automated on this screen (H6)
|
|
38
|
+
ownedByFlow?: string; // @owned-by:<flow> — the flow that owns this deferred scenario (H6)
|
|
37
39
|
}
|
|
38
40
|
|
|
39
41
|
/** Format-tolerant: is this token an ID (project's scheme), not a prose word?
|
|
@@ -101,7 +103,10 @@ const PRIORITY_TAGS: Record<string, Priority> = { '@high': 'high', '@normal': 'n
|
|
|
101
103
|
|
|
102
104
|
function classifyScenario(sc: ParsedScenario): ScenarioInfo {
|
|
103
105
|
const tags = sc.tags || [];
|
|
104
|
-
const
|
|
106
|
+
const deferredToFlow = tags.includes('@deferred:flow');
|
|
107
|
+
const ownedByFlow = (tags.find((t: string) => /^@owned-by:/i.test(t)) || '').slice('@owned-by:'.length) || undefined;
|
|
108
|
+
// @deferred:flow is owned by a flow → not automated on this screen, so it accounts like @manual (H6).
|
|
109
|
+
const manual = tags.includes('@manual') || deferredToFlow;
|
|
105
110
|
const casesTag = tags.find((t) => t.startsWith('@cases:'));
|
|
106
111
|
const casesDataset = casesTag ? casesTag.slice('@cases:'.length).trim() : undefined;
|
|
107
112
|
// Named-query references: @query:<name>[(overrides)] tags + inline `query [name]` step refs.
|
|
@@ -118,7 +123,10 @@ function classifyScenario(sc: ParsedScenario): ScenarioInfo {
|
|
|
118
123
|
let priority: Priority = 'unknown';
|
|
119
124
|
for (const t of tags) if (PRIORITY_TAGS[t]) priority = PRIORITY_TAGS[t];
|
|
120
125
|
|
|
121
|
-
|
|
126
|
+
// Category is everything between `VP-` and the final `-<sequence>` — INCLUDING hyphens, so
|
|
127
|
+
// compound categories (VP-LIST-DISPLAY-01, VP-ADD-TO-CART-03, VP-PRODUCT-DISCOVERY-02) parse,
|
|
128
|
+
// not just single-word ones. A single-word category (VP-CART-001) still works. (H1)
|
|
129
|
+
const codeMatch = sc.name.match(/\bVP-([A-Z]+(?:-[A-Z]+)*)-\d+/i);
|
|
122
130
|
const vpCode = codeMatch ? codeMatch[0].toUpperCase() : undefined;
|
|
123
131
|
const category = codeMatch ? codeMatch[1].toUpperCase() : undefined;
|
|
124
132
|
// Project-scheme ID: the leading token of the title (VP0-001 / MS-HP-001 / VP-LIST-001).
|
|
@@ -173,6 +181,8 @@ function classifyScenario(sc: ParsedScenario): ScenarioInfo {
|
|
|
173
181
|
queryRefs: queryRefs.size ? [...queryRefs] : undefined,
|
|
174
182
|
apiRefs: apiRefs.size ? [...apiRefs] : undefined,
|
|
175
183
|
requiresCaps: requiresCaps.length ? requiresCaps : undefined,
|
|
184
|
+
deferredToFlow: deferredToFlow || undefined,
|
|
185
|
+
ownedByFlow,
|
|
176
186
|
};
|
|
177
187
|
}
|
|
178
188
|
|
|
@@ -20,7 +20,10 @@ function downstreamRoutes(specText: string): string[] {
|
|
|
20
20
|
const routes = new Set<string>();
|
|
21
21
|
for (const line of specText.split('\n')) {
|
|
22
22
|
if (!/success|navigat|to \(|→/i.test(line)) continue;
|
|
23
|
-
|
|
23
|
+
// A real route's leading `/` sits at a path boundary (start, whitespace, backtick, quote, paren),
|
|
24
|
+
// NOT after a letter/digit. The lookbehind rejects prose slashes like "text/icon" or
|
|
25
|
+
// "category/brand" that aren't routes at all (H2 — they produced /icon, /button, /brand).
|
|
26
|
+
for (const m of line.matchAll(/(?<![A-Za-z0-9])(\/[a-z][a-z0-9/_-]+)`?/gi)) {
|
|
24
27
|
const r = m[1];
|
|
25
28
|
if (r !== ownRoute && r.split('/').length > ownRoute.split('/').length - 0) routes.add(r);
|
|
26
29
|
}
|
|
@@ -29,6 +32,16 @@ function downstreamRoutes(specText: string): string[] {
|
|
|
29
32
|
return [...routes].filter((r) => r !== ownRoute && (!ownRoute || r.startsWith(ownRoute + '/') || r.split('/').length >= 3));
|
|
30
33
|
}
|
|
31
34
|
|
|
35
|
+
/**
|
|
36
|
+
* H5 — a @parallel stateful (cart-mutating) flow with NO per-scenario isolation is flaky: scenarios
|
|
37
|
+
* share state, so cart count/quantity asserts race. Mitigations: @cleanup:cart, @isolate, a fresh
|
|
38
|
+
* browser context, or a "Given … empty cart" background. Returns true when the risk is unmitigated.
|
|
39
|
+
*/
|
|
40
|
+
export function isolationRisk(featureText: string, stateful: boolean): boolean {
|
|
41
|
+
if (!stateful || !/@parallel\b/i.test(featureText)) return false;
|
|
42
|
+
return !/@cleanup:cart\b|@isolate\b|empty cart|fresh (?:browser )?context|new context/i.test(featureText);
|
|
43
|
+
}
|
|
44
|
+
|
|
32
45
|
export function downstreamScope(specText: string, scenarios: ScenarioInfo[]): DownstreamResult {
|
|
33
46
|
const routes = downstreamRoutes(specText);
|
|
34
47
|
const underCovered: { route: string; slug: string }[] = [];
|
package/src/harness/sensors.ts
CHANGED
|
@@ -11,17 +11,36 @@ import * as path from 'path';
|
|
|
11
11
|
import { parse as parseYaml } from 'yaml';
|
|
12
12
|
import { ScenarioInfo, ViewpointEntry, idPrefix } from './parse';
|
|
13
13
|
|
|
14
|
-
// Business-critical category
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
//
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
'
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
14
|
+
// Business-critical category keywords (matched by CONTAINMENT against the VP category, so a
|
|
15
|
+
// compound category like LIST-DISPLAY / ADD-TO-CART / PRODUCT-DISCOVERY classifies correctly).
|
|
16
|
+
const BUSINESS_CRITICAL_CATS = [
|
|
17
|
+
// UI commerce cores
|
|
18
|
+
'LIST', 'CART', 'PRODUCT', 'FILTER', 'CHECKOUT', 'ORDER', 'DETAIL', 'DISCOVERY', 'CATEGORY', 'BRAND', 'DUPLICATE', 'CONSISTENCY',
|
|
19
|
+
// API / DB capability cores — for an api/db suite the operation IS the business core
|
|
20
|
+
'API', 'ENDPOINT', 'CRUD', 'QUERY', 'CONTRACT', 'RESOURCE',
|
|
21
|
+
];
|
|
22
|
+
|
|
23
|
+
// Bucket keyword sets for coverage-balance, in PRECEDENCE order (first match wins). Matched by
|
|
24
|
+
// substring containment so compound categories land in the right bucket (H1): e.g. LIST-DISPLAY
|
|
25
|
+
// → business-core (LIST) not presentation (DISPLAY); CART-TRANSITION → business-core (CART).
|
|
26
|
+
const BUCKET_ORDER: Array<[string, string[]]> = [
|
|
27
|
+
['business-core', BUSINESS_CRITICAL_CATS],
|
|
28
|
+
['behavior', ['LOGIC', 'TRANSITION', 'WORKFLOW']],
|
|
29
|
+
['validation-security', ['VAL', 'SEC', 'SUB', 'AUTH', 'LOGIN']],
|
|
30
|
+
['navigation', ['NAV']],
|
|
31
|
+
['presentation', ['UI', 'LAYOUT', 'RESPONSIVE', 'DISPLAY', 'SEO', 'ACCESSIBILITY', 'USABILITY', 'VISUAL']],
|
|
32
|
+
];
|
|
33
|
+
const BUCKETS: Record<string, string[]> = Object.fromEntries(BUCKET_ORDER);
|
|
34
|
+
|
|
35
|
+
/** Classify a VP category into a balance bucket by keyword containment + precedence (H1). */
|
|
36
|
+
export function bucketForCategory(category: string | undefined): string {
|
|
37
|
+
const cat = (category || '').toUpperCase();
|
|
38
|
+
if (!cat) return 'other';
|
|
39
|
+
for (const [bucket, kws] of BUCKET_ORDER) {
|
|
40
|
+
if (kws.some((k) => cat.includes(k))) return bucket;
|
|
41
|
+
}
|
|
42
|
+
return 'other';
|
|
43
|
+
}
|
|
25
44
|
|
|
26
45
|
export interface ThemeDepth {
|
|
27
46
|
requires: string; // 'data-assertion' → scenarios on this theme must assert DATA
|
|
@@ -242,6 +261,72 @@ export function flowCoveredThemes(
|
|
|
242
261
|
return out;
|
|
243
262
|
}
|
|
244
263
|
|
|
264
|
+
// ---------- Sensor: Flow regression-depth (H3) ----------
|
|
265
|
+
|
|
266
|
+
export interface FlowDepthResult {
|
|
267
|
+
stateful: boolean; // the suite mutates a cart/checkout collection (add/remove/quantity)
|
|
268
|
+
countProof: boolean; // asserts a row count / item quantity, not just presence
|
|
269
|
+
teardown: boolean; // removes an item and verifies the empty/zero state
|
|
270
|
+
multiSource: boolean; // adds from >1 distinct source (e.g. main list AND recommended)
|
|
271
|
+
ratio: number; // covered dimensions / 3 (1 when not stateful → neutral)
|
|
272
|
+
missing: string[];
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Grades a STATEFUL flow's regression depth beyond "theme covered": a cart/checkout flow that only
|
|
277
|
+
* proves an item is present is shallower than one that proves the quantity/count, tears the state
|
|
278
|
+
* back down (remove → empty), and exercises every add-to-cart source. The ratio caps the
|
|
279
|
+
* businessDepth headroom (audit.ts) so a thin stateful flow can't reach a perfect score. (H3)
|
|
280
|
+
*/
|
|
281
|
+
export function flowRegressionDepth(scenarios: ScenarioInfo[]): FlowDepthResult {
|
|
282
|
+
const hay = scenarios.map((s) => s.haystack);
|
|
283
|
+
const any = (re: RegExp) => hay.some((h) => re.test(h));
|
|
284
|
+
const addsToCart = any(/\b(add to cart|add to basket|added (?:to )?(?:the )?cart|adds? .* cart)\b/i);
|
|
285
|
+
const stateful = (any(/\b(cart|basket|checkout)\b/i) && (addsToCart || any(/\b(remove|delete|quantity|cart line|cart row)\b/i)));
|
|
286
|
+
if (!stateful) return { stateful: false, countProof: false, teardown: false, multiSource: false, ratio: 1, missing: [] };
|
|
287
|
+
|
|
288
|
+
// 1. Count/quantity proof — a row count or item quantity, not just presence of a row.
|
|
289
|
+
const countProof = any(/\b(quantity|qty|two (?:rows|lines|cart)|row count|count column|number of items|one[_ ]row|two[_ ]rows|qty[_ ])/i);
|
|
290
|
+
// 2. Teardown — removes the item and verifies the empty/zero state (the inverse operation).
|
|
291
|
+
const teardown = any(/\b(remove|delete|clear)\b/i) && any(/\b(empty|no items|zero|removed|0 items)\b/i);
|
|
292
|
+
// 3. Multi-source — the cart is fed from >1 source (the main list AND a recommended/related rail).
|
|
293
|
+
const multiSource = any(/\b(recommended|related|you may also|suggest)\b/i) && addsToCart;
|
|
294
|
+
|
|
295
|
+
const dims: Array<[string, boolean]> = [['count-proof', countProof], ['teardown', teardown], ['multi-source', multiSource]];
|
|
296
|
+
const missing = dims.filter(([, v]) => !v).map(([k]) => k);
|
|
297
|
+
return { stateful: true, countProof, teardown, multiSource, ratio: (dims.length - missing.length) / dims.length, missing };
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// ---------- Sensor: Oracle strength (H4) ----------
|
|
301
|
+
|
|
302
|
+
export interface OracleStrengthResult {
|
|
303
|
+
weak: { name: string; hint: string }[]; // scenarios proving facet membership by a name-substring
|
|
304
|
+
facetClaims: number; // scenarios that touch a category/brand facet (denominator)
|
|
305
|
+
ratio: number; // 1 - weak/facetClaims (1 when none) — caps businessDepth
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// "see all [<item name/title>] ... contain(s) {{<facet>.term}}" — asserting every item's NAME carries
|
|
309
|
+
// a category/brand term does NOT prove the item BELONGS to that facet (a "Dress" item need not contain
|
|
310
|
+
// "Dress" in its name). The strong oracle is the results-page title/header, a detail-page facet field,
|
|
311
|
+
// an API/DB query, or an explicit @manual:M2 deferral.
|
|
312
|
+
const WEAK_FACET_ORACLE = /\bsee all\b\s*\[[^\]]*\b(name|title|label)\b[^\]]*\][^{[]*\bcontains?\b[^{]*\{\{[^}]*\b(categ|brand|facet|filter|term)/i;
|
|
313
|
+
const FACET_REF = /\{\{[^}]*\b(categ|brand|facet|filter)\b[^}]*\}\}|\b(category|brand)\b/i;
|
|
314
|
+
|
|
315
|
+
export function oracleStrength(scenarios: ScenarioInfo[]): OracleStrengthResult {
|
|
316
|
+
const weak: { name: string; hint: string }[] = [];
|
|
317
|
+
for (const s of scenarios) {
|
|
318
|
+
if (s.manual) continue; // a @manual facet check is a deliberate deferral, not a weak automated oracle
|
|
319
|
+
if (WEAK_FACET_ORACLE.test(s.stepsText)) {
|
|
320
|
+
weak.push({
|
|
321
|
+
name: s.name.slice(0, 80),
|
|
322
|
+
hint: 'asserting every item NAME contains a category/brand term does not prove facet membership — assert the results-page TITLE/header carries the facet, a detail-page facet field, or an API/DB oracle; or defer the exhaustive check to @manual:M2.',
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
const facetClaims = scenarios.filter((s) => FACET_REF.test(s.stepsText) || FACET_REF.test(s.name)).length;
|
|
327
|
+
return { weak, facetClaims, ratio: facetClaims ? 1 - weak.length / Math.max(1, facetClaims) : 1 };
|
|
328
|
+
}
|
|
329
|
+
|
|
245
330
|
/** Collect data-correctness themes (depth.requires) for a page-type + universal. */
|
|
246
331
|
export function dataThemesFor(catalog: Catalog, pageType: string | null): CatalogTheme[] {
|
|
247
332
|
const themes: CatalogTheme[] = [];
|
|
@@ -258,6 +343,7 @@ export interface BalanceResult {
|
|
|
258
343
|
coreCount: number;
|
|
259
344
|
secondaryCount: number;
|
|
260
345
|
imbalanced: boolean;
|
|
346
|
+
unclassifiedRatio: number; // share of scenarios that fell into `other` (taxonomy drift signal, H1)
|
|
261
347
|
note: string;
|
|
262
348
|
}
|
|
263
349
|
|
|
@@ -270,23 +356,21 @@ export function coverageBalance(scenarios: ScenarioInfo[]): BalanceResult {
|
|
|
270
356
|
for (const s of scenarios) {
|
|
271
357
|
const cat = s.category || 'NONE';
|
|
272
358
|
byCategory[cat] = (byCategory[cat] || 0) + 1;
|
|
273
|
-
|
|
274
|
-
byBucket[bucket]++;
|
|
359
|
+
byBucket[bucketForCategory(s.category)]++;
|
|
275
360
|
}
|
|
276
361
|
|
|
277
362
|
const core = byBucket['business-core'];
|
|
278
363
|
const secondary = byBucket['presentation'] + byBucket['validation-security'];
|
|
279
364
|
const imbalanced = secondary > core * 1.5 && core > 0;
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
imbalanced
|
|
286
|
-
note: imbalanced
|
|
365
|
+
const unclassifiedRatio = scenarios.length ? byBucket['other'] / scenarios.length : 0;
|
|
366
|
+
// A high `other` share means the VP taxonomy drifted from the catalog — the balance axis is then
|
|
367
|
+
// unreliable, so we surface it (audit.ts caps the balance contribution on this signal).
|
|
368
|
+
const note = unclassifiedRatio > 0.4
|
|
369
|
+
? `Taxonomy drift: ${byBucket['other']}/${scenarios.length} scenarios have an unrecognised VP category (bucket=other) — balance is unreliable until the viewpoint codes match the catalog.`
|
|
370
|
+
: imbalanced
|
|
287
371
|
? `Secondary viewpoints (presentation+validation/security = ${secondary}) outweigh business-core (${core}) by >1.5x.`
|
|
288
|
-
: 'Balanced.'
|
|
289
|
-
};
|
|
372
|
+
: 'Balanced.';
|
|
373
|
+
return { byBucket, byCategory, coreCount: core, secondaryCount: secondary, imbalanced, unclassifiedRatio, note };
|
|
290
374
|
}
|
|
291
375
|
|
|
292
376
|
// ---------- Sensor 4: Duplicate clusters ----------
|
|
@@ -300,6 +384,10 @@ export interface DuplicateResult {
|
|
|
300
384
|
export function duplicateClusters(scenarios: ScenarioInfo[]): DuplicateResult {
|
|
301
385
|
const map = new Map<string, ScenarioInfo[]>();
|
|
302
386
|
for (const s of scenarios) {
|
|
387
|
+
// @manual scenarios compile to a degenerate skeleton (no executable steps), so they cluster
|
|
388
|
+
// with each other even though each is a distinct judgment/capability-manual viewpoint. Excluding
|
|
389
|
+
// them keeps the exact-dup signal about genuinely-copied AUTOMATED scenarios (H2).
|
|
390
|
+
if (s.manual) continue;
|
|
303
391
|
const arr = map.get(s.stepSkeleton) || [];
|
|
304
392
|
arr.push(s);
|
|
305
393
|
map.set(s.stepSkeleton, arr);
|