@sun-asterisk/sungen 3.0.0-beta.83 → 3.0.0-beta.92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/cli/commands/audit.d.ts.map +1 -1
  2. package/dist/cli/commands/audit.js +0 -14
  3. package/dist/cli/commands/audit.js.map +1 -1
  4. package/dist/cli/index.js +0 -2
  5. package/dist/cli/index.js.map +1 -1
  6. package/dist/harness/audit.d.ts +0 -14
  7. package/dist/harness/audit.d.ts.map +1 -1
  8. package/dist/harness/audit.js +3 -56
  9. package/dist/harness/audit.js.map +1 -1
  10. package/dist/harness/parse.d.ts +0 -6
  11. package/dist/harness/parse.d.ts.map +1 -1
  12. package/dist/harness/parse.js +3 -18
  13. package/dist/harness/parse.js.map +1 -1
  14. package/dist/harness/sensors.d.ts.map +1 -1
  15. package/dist/harness/sensors.js +5 -53
  16. package/dist/harness/sensors.js.map +1 -1
  17. package/dist/orchestrator/templates/ai-instructions/claude-skill-tc-generation.md +1 -24
  18. package/dist/orchestrator/templates/ai-instructions/github-skill-sungen-tc-generation.md +7 -43
  19. package/package.json +2 -2
  20. package/src/cli/commands/audit.ts +0 -12
  21. package/src/cli/index.ts +0 -2
  22. package/src/harness/audit.ts +4 -68
  23. package/src/harness/parse.ts +3 -19
  24. package/src/harness/sensors.ts +6 -52
  25. package/src/orchestrator/templates/ai-instructions/claude-skill-tc-generation.md +1 -24
  26. package/src/orchestrator/templates/ai-instructions/github-skill-sungen-tc-generation.md +7 -43
  27. package/dist/cli/commands/eval.d.ts +0 -3
  28. package/dist/cli/commands/eval.d.ts.map +0 -1
  29. package/dist/cli/commands/eval.js +0 -37
  30. package/dist/cli/commands/eval.js.map +0 -1
  31. package/dist/harness/eval/skill-lint.d.ts +0 -16
  32. package/dist/harness/eval/skill-lint.d.ts.map +0 -1
  33. package/dist/harness/eval/skill-lint.js +0 -129
  34. package/dist/harness/eval/skill-lint.js.map +0 -1
  35. package/dist/harness/quality-gates.d.ts +0 -29
  36. package/dist/harness/quality-gates.d.ts.map +0 -1
  37. package/dist/harness/quality-gates.js +0 -183
  38. package/dist/harness/quality-gates.js.map +0 -1
  39. package/dist/harness/viewpoint-ledger.d.ts +0 -23
  40. package/dist/harness/viewpoint-ledger.d.ts.map +0 -1
  41. package/dist/harness/viewpoint-ledger.js +0 -118
  42. package/dist/harness/viewpoint-ledger.js.map +0 -1
  43. package/src/cli/commands/eval.ts +0 -28
  44. package/src/harness/eval/skill-lint.ts +0 -87
  45. package/src/harness/quality-gates.ts +0 -152
  46. package/src/harness/viewpoint-ledger.ts +0 -80
@@ -105,17 +105,6 @@ Auto-detected by `create-test` before invoking this skill:
105
105
  2. Each row / bullet / item = 1 viewpoint → add to `Viewpoint items` in Coverage Map.
106
106
  3. Do NOT pre-classify into buckets before scanning — classify only when
107
107
  writing the scenario.
108
- 4. **If it declares viewpoint IDs** (e.g. `VP0`, `VP1`…`VP12`, `MS-HP-001`), capture each
109
- item WITH its ID and **reuse that ID as the scenario code** — do not invent a generic
110
- `VP-<CAT>` scheme (the harness Taxonomy-match gate FAILs on mismatch).
111
- - `qa/context.md` — project-wide context set by the QA lead. Read ONCE before building the Coverage Map; apply to every screen. Extraction rules:
112
- - **Roles** → for each role in the table: add to the `@auth:X` tag pool; generate a VP-SEC blocked-access scenario for every role boundary relevant to this screen.
113
- - **Testing strategy → Focus areas** → if `security` listed: VP-SEC is mandatory Tier 1 for every free-text input regardless of spec risk level; if `ui` not listed: all VP-UI scenarios move to Tier 2 minimum.
114
- - **Testing strategy → Mandatory coverage** → each line is a hard override applied to this screen regardless of spec risk; document in `Context constraints` of the Coverage Map.
115
- - **Testing strategy → Deprioritize/skip** → record in `Context constraints`; suppress those VP categories from Tier 1 generation.
116
- - **Global business rules** → add each to the `Business rules` section tagged `[G]` (e.g. `[G1 – soft-delete only]`); treat as `HIGH` risk unless stated otherwise.
117
- - **Error patterns** → use as fallback only when `spec.md` does not give exact error text; never override spec-specified messages.
118
- - If `qa/context.md` is absent: proceed without it — no impact on the generation flow.
119
108
 
120
109
  **Single screen focus**: one URL = one screen. Modals on same page = part of this screen.
121
110
  This means: do not test other screens' UI layout or navigation. It does NOT mean skip documenting business outcomes that your screen's actions cause on other surfaces. Those cross-surface outcomes must appear in the Coverage Map and be covered by at least `@manual` scenarios.
@@ -140,11 +129,6 @@ Read `spec.md` fully, then extract into a Coverage Map **before writing any scen
140
129
  **Risk tags:** HIGH = complex business rules, cascading fields, multi-step state changes, auth/integration. LOW = display-only, static labels, read-only fields.
141
130
 
142
131
  ```
143
- Context constraints: [populated from qa/context.md before writing any scenario]
144
- roles: [list roles, e.g. admin / manager / staff]
145
- strategy: [active overrides, e.g. "VP-SEC mandatory T1", "VP-UI → T2 only"]
146
- global rules: [G1 – ...] → also appear in Business rules below tagged [G]
147
- → leave empty if qa/context.md is absent or has no entries applicable to this screen
148
132
  User journeys: [J1 – ...], [J2 – ...]
149
133
  Validation rules: [V1 – field → "exact error text"], [V2 – ...]
150
134
  Business rules: [B1 HIGH – ...], [B2 LOW – ...]
@@ -237,7 +221,7 @@ Security: [S1 – admin only]
237
221
  | **auth** | valid-login · invalid-credential · access-control |
238
222
 
239
223
  **Required assertion shapes (use these, not bare visibility):**
240
- - Card info: assert at **card level** (image+name+price together), e.g. `User see all [Product Card] contain {{...}}` — not `see [Section]` (section-level passes even if one card lacks price).
224
+ - Card info: assert at **card level** (image+name+price together), e.g. `User see all [Product Card] contain {{...}}` — not `see [Section]`.
241
225
  - Cross-screen consistency (detail/cart): **capture then compare** —
242
226
  ```gherkin
243
227
  When User remember [Product Name] text as {{selected_product_name}}
@@ -255,33 +239,13 @@ Security: [S1 – admin only]
255
239
  - **If the spec lacks the concrete value** a deep assertion needs (exact message, price, count): still write the deep shape with a `{{var}}` placeholder and leave a `# SPEC-GAP: <field> value not in spec` comment — do **not** downgrade to `see [X] section`. A visible gap is better than a silent shallow pass.
256
240
  - **Blind-Spot Memory:** before finishing, run `sungen blindspot list --prompt` (Bash) and make sure the suite satisfies each recorded pattern (e.g. "for any Add/Create action: check success + resulting data state + duplicate/double-submit"). These are gaps QA hit before — don't repeat them.
257
241
 
258
- **First-pass anti-patterns (these are exactly what the gate/reviewer reject — avoid them):**
259
- - Title↔steps mismatch: e.g. a "no-result state" scenario that clicks a query which **returns** products. Steps must create the condition the title claims.
260
- - Tautology `Then`: `click [Next Slide]` → `see [Carousel] section` (always visible, proves nothing). Assert the change (new slide title differs).
261
- - Business-critical scenario ending at `see [Added] modal` / `see [Cart] page` / `see [Category Products] page` with no data assertion.
242
+ **First-pass anti-patterns (exactly what the gate/reviewer reject — avoid them):**
243
+ - Title↔steps mismatch (e.g. a "no-result" scenario that clicks a query which returns products).
244
+ - Tautology `Then`: `click [Next Slide]` → `see [Carousel] section` (proves nothing).
245
+ - Business-critical scenario ending at `see [Added] modal` / `see [Cart] page` with no data assertion.
262
246
  - Brand filter covered only as navigation (must assert products belong to the brand).
263
247
 
264
- **Balance:** cover all the above (deep) BEFORE expanding subscription / UI-presence / extra validation edge cases. Do not over-invest in subscription while cart/detail/filter correctness are shallow.
265
-
266
- #### Harness gates — satisfy on the FIRST pass (don't make the repair loop fix them)
267
-
268
- `sungen audit` enforces these. Generate compliant output up front:
269
-
270
- 1. **Taxonomy-match** (`VP-TAXONOMY-MISMATCH`, gate-FAIL) — when `test-viewpoint.md` declares its own viewpoint IDs (e.g. `VP0`, `VP1`, … `VP12`, `MS-HP-001`, `MS-EH-001`), **reuse those IDs verbatim as the scenario codes**. Do NOT invent a generic `VP-UI / VP-LOGIC / VP-VAL` scheme — that breaks the coverage matrix. Only fall back to `VP-<CATEGORY>-<NNN>` when the viewpoint file declares no IDs.
271
- 2. **Spec-coverage triggers** (`TRIGGER-UNCOVERED`, gate-FAIL) — the Validation-Rules table lists a **trigger** per constraint (e.g. `blur, submit`). Generate one scenario **per (constraint × trigger)** — a `format` rule validating *on blur AND on submit* needs BOTH a blur scenario (`press Tab`) and a submit scenario (`click [Submit]` / `press Enter`). Never collapse the trigger × input matrix to one representative case.
272
- 3. **Claim-Proof** (`CLAIM-UNPROVEN`) — a title claiming `all`/`only`/`every`/`single`/`correct`/`same`/`changes`/`hidden`/`cleared`/`restored`/`independent`/`sanitized`/`announces` MUST have the matching assertion (`see all …`, count, `remember`+compare, `is hidden`, return-and-assert-empty, etc.). If the title promises it, the steps must prove it.
273
- 4. **Downstream-scope** (`DOWNSTREAM-SCOPE-MISSING`) — when the spec's Navigation Flow / success target is **another screen** (e.g. a confirmation/sent page), don't stop at a terminal `see [X] page`. Either cover that screen's content/guards (if its viewpoint items are in scope — they often have their own `MS-*` IDs), or scaffold it (`sungen add --screen <name>`) and note the handoff. Do not silently drop the downstream surface.
274
- 5. **Manual-oracle** (`MANUAL-STEPS-INSUFFICIENT`) — every `@manual` scenario needs **setup · action · observable expected · oracle/tool**, not a one-line note. Use this comment shape:
275
- ```gherkin
276
- @high @manual
277
- Scenario: VP-… <claim>
278
- # MANUAL: <why it can't be automated — needs network capture / inbox / screen-reader / multi-tab>
279
- # Tester verifies:
280
- # 1. <setup> e.g. seed a registered email; throttle the network
281
- # 2. <action> e.g. click [Submit] with the request in flight
282
- # 3. <observable> e.g. only ONE POST is dispatched
283
- # 4. Oracle: <tool> e.g. DevTools Network panel / mail-catcher / NVDA
284
- ```
248
+ **Balance:** cover all the above (deep) BEFORE expanding subscription / UI-presence / extra validation edge cases.
285
249
 
286
250
  #### Tier 1 guard — minimum before writing scenarios
287
251
 
@@ -399,7 +363,7 @@ Add cleanup tags per the `sungen-gherkin-syntax` Cleanup table. Key rules:
399
363
  **Files:** `qa/screens/<screen>/features/<screen>.feature` + `qa/screens/<screen>/test-data/<screen>.yaml`
400
364
 
401
365
  Use step patterns and element types from `sungen-gherkin-syntax`.
402
- **Naming**: reuse the **project's `test-viewpoint.md` IDs** when it declares them (e.g. `VP0`, `MS-HP-001`); otherwise `VP-<CATEGORY>-<NNN>`. Scenario name must use the **same element type** as the steps.
366
+ **Naming**: `VP-<CATEGORY>-<NNN>`. Scenario name must use the **same element type** as the steps.
403
367
 
404
368
  **Test data** — grouped by section, loaded at runtime:
405
369
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sun-asterisk/sungen",
3
- "version": "3.0.0-beta.83",
3
+ "version": "3.0.0-beta.92",
4
4
  "description": "Deterministic E2E Test Compiler - Gherkin + Selectors → Playwright tests",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -12,7 +12,7 @@
12
12
  "copy-templates": "mkdir -p dist/generators/test-generator/adapters/playwright/templates/steps && mkdir -p dist/generators/test-generator/templates && mkdir -p dist/orchestrator/templates && mkdir -p dist/dashboard/templates && cp -r src/generators/test-generator/adapters/playwright/templates/*.hbs dist/generators/test-generator/adapters/playwright/templates/ 2>/dev/null || true && cp -r src/generators/test-generator/adapters/playwright/templates/steps dist/generators/test-generator/adapters/playwright/templates/ && cp src/generators/test-generator/templates/*.hbs dist/generators/test-generator/templates/ 2>/dev/null || true && cp -r src/orchestrator/templates/* dist/orchestrator/templates/ && cp src/dashboard/templates/index.html dist/dashboard/templates/index.html && mkdir -p dist/harness/catalog && cp src/harness/catalog/*.yaml dist/harness/catalog/",
13
13
  "build:dashboard": "cd dashboard && npm install --silent && npm run build && cd .. && cp dashboard/dist/index.html src/dashboard/templates/index.html",
14
14
  "dev": "tsx src/cli/index.ts",
15
- "test": "tsx tests/golden/run.ts && tsx tests/audit/run.ts && tsx tests/ingest/run.ts && tsx tests/eval/run.ts",
15
+ "test": "tsx tests/golden/run.ts && tsx tests/audit/run.ts && tsx tests/ingest/run.ts",
16
16
  "test:update": "tsx tests/golden/run.ts --update && tsx tests/audit/run.ts --update && tsx tests/ingest/run.ts --update",
17
17
  "prepublishOnly": "npm run build:dashboard && npm run build"
18
18
  },
@@ -65,18 +65,6 @@ function render(r: AuditReport): void {
65
65
  if (!r.spec.triggerGaps.length && !r.spec.uncoveredMust.length) L(' ✓ every MUST FR + per-constraint trigger covered');
66
66
  L('');
67
67
  }
68
- if (r.ledger.hasViewpoint && r.ledger.total > 0) {
69
- L(` ⑧ Viewpoint atomic coverage — ${r.ledger.covered}/${r.ledger.total} items (${(r.ledger.ratio * 100).toFixed(0)}%)`);
70
- for (const m of r.ledger.missing.slice(0, 8)) L(` ○ missing: ${m.id ? m.id + ' — ' : ''}${m.text.slice(0, 70)}`);
71
- if (r.ledger.missing.length > 8) L(` … +${r.ledger.missing.length - 8} more`);
72
- L('');
73
- }
74
- if (r.calibration) {
75
- const ax = Object.entries(r.calibration.axes).map(([k, v]) => `${k}=${(v * 100).toFixed(0)}%`).join(' · ');
76
- L(` ⑨ Calibration — ${ax}`);
77
- L(` weakest: ${r.calibration.weakest.axis} ${(r.calibration.weakest.value * 100).toFixed(0)}%${r.calibration.inflated ? ' ⚠ SCORE-INFLATED-BY-BREADTH' : ''}`);
78
- L('');
79
- }
80
68
  L(' ── Findings (Repair targets) ──');
81
69
  if (r.findings.length === 0) L(' ✓ none — output passes the harness');
82
70
  for (const f of r.findings) L(` • ${f}`);
package/src/cli/index.ts CHANGED
@@ -16,7 +16,6 @@ import { registerAddFlowCommand } from './commands/add-flow';
16
16
  import { registerDashboardCommand } from './commands/dashboard';
17
17
  import { registerAuditCommand } from './commands/audit';
18
18
  import { registerIngestCommand } from './commands/ingest';
19
- import { registerEvalCommand } from './commands/eval';
20
19
  import { registerManifestCommand } from './commands/manifest';
21
20
  import { registerLedgerCommand } from './commands/ledger';
22
21
  import { registerFeedbackCommand } from './commands/feedback';
@@ -63,7 +62,6 @@ async function main() {
63
62
  registerCapabilityCommand(program);
64
63
  registerFlowCheckCommand(program);
65
64
  registerIngestCommand(program);
66
- registerEvalCommand(program);
67
65
 
68
66
  await program.parseAsync(process.argv);
69
67
  }
@@ -15,10 +15,7 @@ import {
15
15
  } from './sensors';
16
16
  import { readIntent, projectRootFromScreenDir, IntentProfile } from './intent';
17
17
  import { getProvenance, Provenance } from './provenance';
18
- import { specCoverage, SpecCoverageResult, parseSpecClauses } from './spec-coverage';
19
- import { downstreamScope, manualOracle, readText, DownstreamResult, ManualOracleResult,
20
- negativeSideEffect, sourceBacked, crossArtifactOwnership } from './quality-gates';
21
- import { viewpointLedger, parseViewpointItems, LedgerResult } from './viewpoint-ledger';
18
+ import { specCoverage, SpecCoverageResult } from './spec-coverage';
22
19
 
23
20
  export interface AuditReport {
24
21
  screen: string;
@@ -30,15 +27,6 @@ export interface AuditReport {
30
27
  balance: BalanceResult;
31
28
  duplicates: DuplicateResult;
32
29
  trace: TraceResult;
33
- taxonomyMismatch: boolean; // scenarios use IDs not in the project's test-viewpoint.md
34
- downstream: DownstreamResult; // downstream screens referenced but under-covered
35
- manualOracle: ManualOracleResult; // @manual scenarios lacking setup/action/oracle
36
- ledger: LedgerResult; // atomic viewpoint-item coverage (per-bullet status)
37
- calibration: { // #8 — multi-axis score so a high overall can't hide a weak axis
38
- axes: Record<string, number>;
39
- weakest: { axis: string; value: number };
40
- inflated: boolean;
41
- };
42
30
  score: {
43
31
  overall: number; // 0..10, business-weighted
44
32
  coverage: number; // 0..1
@@ -75,15 +63,6 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
75
63
  const balance = coverageBalance(scenarios);
76
64
  const duplicates = duplicateClusters(scenarios);
77
65
  const trace = traceability(scenarios, viewpoints);
78
- // #1 taxonomy-match: when the project defines a viewpoint taxonomy, scenarios must use it.
79
- const taxonomyMismatch = viewpoints.length > 0 && trace.withVpCode > 0 && trace.mappedRatio < 0.6;
80
- // #2 downstream-scope + #4 manual-oracle
81
- const downstream = downstreamScope(readText(specPath), scenarios);
82
- const manualOracleResult = manualOracle(featureText);
83
- const ledger = viewpointLedger(viewpointPath, scenarios, featureText);
84
- const negSideEffect = negativeSideEffect(scenarios);
85
- const ownership = crossArtifactOwnership(screenDir, scenarios);
86
- const unsourced = sourceBacked(scenarios, parseSpecClauses(specPath).frs.map((f) => f.id), parseViewpointItems(viewpointPath).map((i) => i.text), viewpoints.map((v) => v.id), featureText);
87
66
 
88
67
  // Sub-scores
89
68
  const coverage = gate.coverageRatio;
@@ -134,59 +113,16 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
134
113
  for (const u of spec.uncoveredMust) {
135
114
  findings.push(`SPEC-UNCOVERED: ${u.id} (MUST) has no covering scenario — "${u.text}" → add a scenario or tag one @spec:${u.id}.`);
136
115
  }
137
- if (taxonomyMismatch) {
138
- findings.push(`VP-TAXONOMY-MISMATCH: only ${(trace.mappedRatio * 100).toFixed(0)}% of scenarios use the viewpoint IDs declared in test-viewpoint.md — scenarios invented a generic VP-<CAT> scheme. Re-tag to the project's viewpoint IDs so the coverage matrix is accurate.`);
139
- }
140
- for (const d of downstream.underCovered) {
141
- findings.push(`DOWNSTREAM-SCOPE-MISSING: "${d.route}" is a navigation target but is covered only by a page-nav assertion — cover its content/guards, or scaffold it (\`sungen add --screen ${d.slug}\`).`);
142
- }
143
- for (const m of manualOracleResult.insufficient.slice(0, 8)) {
144
- findings.push(`MANUAL-STEPS-INSUFFICIENT: "${m}" — a @manual scenario needs setup · action · observable expected · oracle/tool (not just a one-line note).`);
145
- }
146
- if (ledger.hasViewpoint && ledger.missing.length) {
147
- const sample = ledger.missing.slice(0, 6).map((m) => m.id || `"${m.text}"`).join(', ');
148
- findings.push(`VIEWPOINT-ITEM-MISSING: ${ledger.missing.length}/${ledger.total} atomic viewpoint items have no covering scenario (${(ledger.ratio * 100).toFixed(0)}% covered) — e.g. ${sample}. Cover each item or mark it deferred/spec-gap.`);
149
- }
150
- for (const n of negSideEffect.slice(0, 6)) {
151
- findings.push(`NEGATIVE-SIDE-EFFECT-UNPROVEN: "${n}" — the title claims something must NOT happen but the steps don't prove the absence (assert a count / negative state, or make it @manual with an oracle).`);
152
- }
153
- for (const d of ownership.duplicates.slice(0, 6)) {
154
- findings.push(`DUPLICATE-FLOW-OWNERSHIP: "${d.scenario}" has the same shape as a scenario in flow "${d.flow}" — keep one owner (screen-local vs flow); the other should only reference/set up.`);
155
- }
156
- for (const u of unsourced.slice(0, 6)) {
157
- findings.push(`UNSOURCEABLE-SCENARIO: "${u}" doesn't trace to any FR / viewpoint item — link it to a source, or tag it @exploration (not part of the official suite).`);
158
- }
159
-
160
- // #8 — multi-axis calibration: a high overall must not hide a weak axis.
161
- const manualCompleteness = manualOracleResult.manualTotal
162
- ? 1 - manualOracleResult.insufficient.length / manualOracleResult.manualTotal : 1;
163
- const axes: Record<string, number> = {
164
- coverage: Math.round(coverage * 100) / 100,
165
- businessDepth: Math.round(businessDepth * 100) / 100,
166
- claimProof: Math.round(claim.ratio * 100) / 100,
167
- specFR: spec.frTotal ? Math.round((spec.frCovered / spec.frTotal) * 100) / 100 : 1,
168
- atomicLedger: Math.round(ledger.ratio * 100) / 100,
169
- manualOracle: Math.round(manualCompleteness * 100) / 100,
170
- taxonomy: taxonomyMismatch ? 0 : Math.round(trace.mappedRatio * 100) / 100,
171
- };
172
- const weakestEntry = Object.entries(axes).sort((a, b) => a[1] - b[1])[0];
173
- const weakest = { axis: weakestEntry[0], value: weakestEntry[1] };
174
- const inflated = overall >= 8 && weakest.value < 0.6;
175
- if (inflated) {
176
- findings.push(`SCORE-INFLATED-BY-BREADTH: overall ${Math.round(overall * 10) / 10}/10 but the weakest axis "${weakest.axis}" is ${(weakest.value * 100).toFixed(0)}% — breadth is hiding a weak dimension. Raise "${weakest.axis}" before trusting the headline.`);
177
- }
178
- const calibration = { axes, weakest, inflated };
179
116
 
180
- // Gate spans coverage (viewpoint themes), depth, claim-proof, spec-clause coverage,
181
- // AND taxonomy-match (scenarios must use the project's viewpoint IDs when defined).
117
+ // Gate spans coverage (viewpoint themes), depth (data-correctness), claim-proof,
118
+ // AND spec-clause coverage (every MUST clause + every mandated validation trigger).
182
119
  const gateStatus: 'PASS' | 'FAIL' =
183
- gate.gaps.length === 0 && depth.verdict !== 'fail' && claim.verdict !== 'fail' && spec.verdict !== 'fail' && !taxonomyMismatch ? 'PASS' : 'FAIL';
120
+ gate.gaps.length === 0 && depth.verdict !== 'fail' && claim.verdict !== 'fail' && spec.verdict !== 'fail' ? 'PASS' : 'FAIL';
184
121
 
185
122
  return {
186
123
  screen: screenName,
187
124
  scenarioCount: scenarios.length,
188
125
  gate, depth, claim, taxonomy, balance, duplicates, trace, spec,
189
- taxonomyMismatch, downstream, manualOracle: manualOracleResult, ledger, calibration,
190
126
  score: {
191
127
  overall: Math.round(overall * 10) / 10,
192
128
  coverage: Math.round(coverage * 100) / 100,
@@ -29,18 +29,6 @@ export interface ScenarioInfo {
29
29
  stepSkeleton: string; // normalized steps for duplicate clustering
30
30
  haystack: string; // lowercase name + steps text (for keyword coverage)
31
31
  stepsText: string; // lowercase steps ONLY (name excluded) — for claim-proof
32
- vpId?: string; // raw leading ID token of the title (project's scheme: VP0-001, MS-HP-001, VP-LIST-001)
33
- }
34
-
35
- /** Format-tolerant: is this token an ID (project's scheme), not a prose word?
36
- * Accepts VP0, VP0-001, MS-HP-001, TV-01, VP-LIST-001 — requires a digit + uppercase start. */
37
- export function isIdLike(s: string): boolean {
38
- return /^[A-Z][A-Za-z0-9.-]*$/.test(s) && /\d/.test(s) && s.length >= 3;
39
- }
40
-
41
- /** The ID minus its trailing -NNN sequence number (VP0-001 → VP0, MS-HP-001 → MS-HP). */
42
- export function idPrefix(id: string): string {
43
- return id.replace(/[-.]\d{1,4}$/, '');
44
32
  }
45
33
 
46
34
  // ---------- test-viewpoint.md ----------
@@ -62,7 +50,7 @@ export function parseViewpointOverview(filePath: string): ViewpointEntry[] {
62
50
  const cells = line.split('|').map((c) => c.trim()).filter((_, i, a) => i > 0 && i < a.length - 1);
63
51
  if (cells.length >= 3) {
64
52
  const id = cells[0];
65
- if (isIdLike(id) && !/^-+$/.test(cells[1])) {
53
+ if (/^VP[-A-Z0-9]/i.test(id) && !/^vp$/i.test(id) && !/^-+$/.test(cells[1])) {
66
54
  const pr = /high/i.test(cells[1]) ? 'High' : /medium/i.test(cells[1]) ? 'Medium' : /low/i.test(cells[1]) ? 'Low' : 'Unknown';
67
55
  entries.set(id.toUpperCase(), { id: id.toUpperCase(), priority: pr as any, reason: cells[2] });
68
56
  }
@@ -78,8 +66,8 @@ export function parseViewpointOverview(filePath: string): ViewpointEntry[] {
78
66
  if (g) { group = (g[1][0].toUpperCase() + g[1].slice(1).toLowerCase()) as any; continue; }
79
67
  if (/^##\s/.test(line)) { group = undefined; }
80
68
  if (group) {
81
- const m = line.match(/^[-*+]\s+([A-Za-z][A-Za-z0-9.-]*)/);
82
- if (m && isIdLike(m[1])) {
69
+ const m = line.match(/^-\s+(VP[-A-Z0-9]+)/i);
70
+ if (m) {
83
71
  const id = m[1].toUpperCase();
84
72
  const existing = entries.get(id);
85
73
  if (existing) existing.group = group;
@@ -104,9 +92,6 @@ function classifyScenario(sc: ParsedScenario): ScenarioInfo {
104
92
  const codeMatch = sc.name.match(/\bVP-([A-Z]+)-\d+/i);
105
93
  const vpCode = codeMatch ? codeMatch[0].toUpperCase() : undefined;
106
94
  const category = codeMatch ? codeMatch[1].toUpperCase() : undefined;
107
- // Project-scheme ID: the leading token of the title (VP0-001 / MS-HP-001 / VP-LIST-001).
108
- const leadMatch = sc.name.match(/^\s*([A-Za-z][A-Za-z0-9.-]*)/);
109
- const vpId = leadMatch && isIdLike(leadMatch[1]) ? leadMatch[1].toUpperCase() : undefined;
110
95
 
111
96
  // Then-phase detection (And/But inherit previous primary keyword)
112
97
  let last = 'Given';
@@ -151,7 +136,6 @@ function classifyScenario(sc: ParsedScenario): ScenarioInfo {
151
136
  stepSkeleton: skeletonParts.join(' | '),
152
137
  haystack: textParts.join(' ').toLowerCase(),
153
138
  stepsText: stepTextParts.join(' ').toLowerCase(),
154
- vpId,
155
139
  };
156
140
  }
157
141
 
@@ -9,7 +9,7 @@
9
9
  import * as fs from 'fs';
10
10
  import * as path from 'path';
11
11
  import { parse as parseYaml } from 'yaml';
12
- import { ScenarioInfo, ViewpointEntry, idPrefix } from './parse';
12
+ import { ScenarioInfo, ViewpointEntry } from './parse';
13
13
 
14
14
  // Business-critical category codes (project VP-<CAT> prefixes). Configurable later.
15
15
  const BUSINESS_CRITICAL_CATS = ['LIST', 'CART', 'PRODUCT', 'FILTER', 'CHECKOUT', 'ORDER'];
@@ -263,23 +263,17 @@ export interface TraceResult {
263
263
 
264
264
  export function traceability(scenarios: ScenarioInfo[], viewpoints: ViewpointEntry[]): TraceResult {
265
265
  const overviewIds = new Set(viewpoints.map((v) => v.id.toUpperCase()));
266
- // A scenario carries an ID if it has a project-scheme leading ID (vpId) or a VP-CAT code.
267
- const withCode = scenarios.filter((s) => s.vpId || s.vpCode);
268
- // Maps to overview if the scenario's ID, its sequence-stripped prefix, or its VP-CAT code
269
- // matches a declared viewpoint ID (format-tolerant: VP0-001↔VP0, MS-HP-001↔MS-HP-001).
270
- const mapped = withCode.filter((s) => {
271
- const id = (s.vpId || s.vpCode || '').toUpperCase();
272
- if (overviewIds.has(id) || overviewIds.has(idPrefix(id))) return true;
273
- return [...overviewIds].some((oid) => id.startsWith(oid) || oid.startsWith(idPrefix(id)) || (!!s.category && oid.includes(s.category)));
274
- });
266
+ const withCode = scenarios.filter((s) => s.vpCode);
267
+ // A scenario maps to overview if its full VP code OR its category-derived id exists in overview.
268
+ const mapped = withCode.filter((s) => overviewIds.has(s.vpCode!) || [...overviewIds].some((id) => id.includes(s.category || '###')));
275
269
  return {
276
270
  total: scenarios.length,
277
271
  withVpCode: withCode.length,
278
272
  mappedToOverview: mapped.length,
279
273
  withVpCodeRatio: scenarios.length ? withCode.length / scenarios.length : 0,
280
274
  mappedRatio: scenarios.length ? mapped.length / scenarios.length : 0,
281
- note: withCode.length && mapped.length < withCode.length * 0.5
282
- ? 'Scenario IDs do not match the viewpoint-overview ids (weak traceability — re-tag to the project viewpoint IDs).'
275
+ note: mapped.length < withCode.length * 0.5
276
+ ? 'Scenarios use ad-hoc VP-<CAT>-NNN codes not linked to viewpoint-overview ids (weak traceability — see review Gate 4).'
283
277
  : 'Traceable.',
284
278
  };
285
279
  }
@@ -381,46 +375,6 @@ const CLAIM_RULES: ClaimRule[] = [
381
375
  hint: 'assert the absence/hidden state that the title claims, not just an unrelated visible element.',
382
376
  severity: 'fail',
383
377
  },
384
- {
385
- claim: 'cleared/emptied',
386
- title: /\b(cleared|clears|emptied|empties|reset to empty|wiped)\b/,
387
- proof: /\bis empty\b|with \{\{empty|with ['"]?['"]?\s*$|\bempty\b/,
388
- need: 'an empty/cleared assertion after the action (e.g. `field with {{empty_value}}` / `is empty`)',
389
- hint: 'prove the value is actually gone — return to the screen and assert the field is empty, not just that the action ran.',
390
- severity: 'fail',
391
- },
392
- {
393
- claim: 'restored/preserved',
394
- title: /\b(restored|preserved|persists?|retained|remembered|kept)\b/,
395
- proof: /\bremember\b|with \{\{|field with/,
396
- need: 'the value re-asserted after the transition (capture or `field with {{v}}` after returning)',
397
- hint: 'prove the value survives — assert the field still holds the typed value after the reload/return, not just that it was typed.',
398
- severity: 'warn',
399
- },
400
- {
401
- claim: 'independent/separate',
402
- title: /\b(independent|separate|isolat(ed|es)|per[- ]tab|two tabs|each tab)\b/,
403
- proof: /\bcontext\b|tab a|tab b|second (tab|context)/,
404
- need: 'a multi-context proof (tab A vs tab B)',
405
- hint: 'independence across tabs/contexts is rarely DSL-expressible — mark @manual with a clear setup/action/oracle.',
406
- severity: 'warn',
407
- },
408
- {
409
- claim: 'sanitized/inert',
410
- title: /\b(sanitized|sanitised|escaped|inert|not executed|not rendered|stripped)\b/,
411
- proof: /field with \{\{|payload|inert|toContainText|is hidden/,
412
- need: 'the payload echoed as inert text (`field with {{payload}}`) + no execution',
413
- hint: 'prove the payload round-trips as literal text and triggers nothing — assert the field value and the absence of any effect.',
414
- severity: 'warn',
415
- },
416
- {
417
- claim: 'announces/aria',
418
- title: /\b(announce[sd]?|aria|screen[- ]reader|programmatically associated)\b/,
419
- proof: /aria|role|@manual|describedby|is focused/,
420
- need: 'an aria/role assertion (or @manual with a screen-reader oracle)',
421
- hint: 'ARIA announcement is usually not DSL-expressible — assert aria attributes if possible, else @manual with an NVDA/VoiceOver oracle.',
422
- severity: 'warn',
423
- },
424
378
  ];
425
379
 
426
380
  // ---------- Viewpoint taxonomy-lint (harness-roadmap §0.5 Q3) ----------
@@ -105,9 +105,6 @@ Auto-detected by `create-test` before invoking this skill:
105
105
  2. Each row / bullet / item = 1 viewpoint → add to `Viewpoint items` in Coverage Map.
106
106
  3. Do NOT pre-classify into buckets before scanning — classify only when
107
107
  writing the scenario.
108
- 4. **If it declares viewpoint IDs** (e.g. `VP0`, `VP1`…`VP12`, `MS-HP-001`), capture each
109
- item WITH its ID and **reuse that ID as the scenario code** — do not invent a generic
110
- `VP-<CAT>` scheme (the harness Taxonomy-match gate FAILs on mismatch).
111
108
  - `qa/context.md` — project-wide context set by the QA lead. Read ONCE before building the Coverage Map; apply to every screen. Extraction rules:
112
109
  - **Roles** → for each role in the table: add to the `@auth:X` tag pool; generate a VP-SEC blocked-access scenario for every role boundary relevant to this screen.
113
110
  - **Testing strategy → Focus areas** → if `security` listed: VP-SEC is mandatory Tier 1 for every free-text input regardless of spec risk level; if `ui` not listed: all VP-UI scenarios move to Tier 2 minimum.
@@ -263,26 +260,6 @@ Security: [S1 – admin only]
263
260
 
264
261
  **Balance:** cover all the above (deep) BEFORE expanding subscription / UI-presence / extra validation edge cases. Do not over-invest in subscription while cart/detail/filter correctness are shallow.
265
262
 
266
- #### Harness gates — satisfy on the FIRST pass (don't make the repair loop fix them)
267
-
268
- `sungen audit` enforces these. Generate compliant output up front:
269
-
270
- 1. **Taxonomy-match** (`VP-TAXONOMY-MISMATCH`, gate-FAIL) — when `test-viewpoint.md` declares its own viewpoint IDs (e.g. `VP0`, `VP1`, … `VP12`, `MS-HP-001`, `MS-EH-001`), **reuse those IDs verbatim as the scenario codes**. Do NOT invent a generic `VP-UI / VP-LOGIC / VP-VAL` scheme — that breaks the coverage matrix. Only fall back to `VP-<CATEGORY>-<NNN>` when the viewpoint file declares no IDs.
271
- 2. **Spec-coverage triggers** (`TRIGGER-UNCOVERED`, gate-FAIL) — the Validation-Rules table lists a **trigger** per constraint (e.g. `blur, submit`). Generate one scenario **per (constraint × trigger)** — a `format` rule validating *on blur AND on submit* needs BOTH a blur scenario (`press Tab`) and a submit scenario (`click [Submit]` / `press Enter`). Never collapse the trigger × input matrix to one representative case.
272
- 3. **Claim-Proof** (`CLAIM-UNPROVEN`) — a title claiming `all`/`only`/`every`/`single`/`correct`/`same`/`changes`/`hidden`/`cleared`/`restored`/`independent`/`sanitized`/`announces` MUST have the matching assertion (`see all …`, count, `remember`+compare, `is hidden`, return-and-assert-empty, etc.). If the title promises it, the steps must prove it.
273
- 4. **Downstream-scope** (`DOWNSTREAM-SCOPE-MISSING`) — when the spec's Navigation Flow / success target is **another screen** (e.g. a confirmation/sent page), don't stop at a terminal `see [X] page`. Either cover that screen's content/guards (if its viewpoint items are in scope — they often have their own `MS-*` IDs), or scaffold it (`sungen add --screen <name>`) and note the handoff. Do not silently drop the downstream surface.
274
- 5. **Manual-oracle** (`MANUAL-STEPS-INSUFFICIENT`) — every `@manual` scenario needs **setup · action · observable expected · oracle/tool**, not a one-line note. Use this comment shape:
275
- ```gherkin
276
- @high @manual
277
- Scenario: VP-… <claim>
278
- # MANUAL: <why it can't be automated — needs network capture / inbox / screen-reader / multi-tab>
279
- # Tester verifies:
280
- # 1. <setup> e.g. seed a registered email; throttle the network
281
- # 2. <action> e.g. click [Submit] with the request in flight
282
- # 3. <observable> e.g. only ONE POST is dispatched
283
- # 4. Oracle: <tool> e.g. DevTools Network panel / mail-catcher / NVDA
284
- ```
285
-
286
263
  #### Tier 1 guard — minimum before writing scenarios
287
264
 
288
265
  | Spec section | Minimum requirement | Tag |
@@ -399,7 +376,7 @@ Add cleanup tags per the `sungen-gherkin-syntax` Cleanup table. Key rules:
399
376
  **Files:** `qa/screens/<screen>/features/<screen>.feature` + `qa/screens/<screen>/test-data/<screen>.yaml`
400
377
 
401
378
  Use step patterns and element types from `sungen-gherkin-syntax`.
402
- **Naming**: reuse the **project's `test-viewpoint.md` IDs** when it declares them (e.g. `VP0`, `MS-HP-001`); otherwise `VP-<CATEGORY>-<NNN>`. Scenario name must use the **same element type** as the steps.
379
+ **Naming**: `VP-<CATEGORY>-<NNN>`. Scenario name must use the **same element type** as the steps.
403
380
 
404
381
  **Test data** — grouped by section, loaded at runtime:
405
382
 
@@ -105,17 +105,6 @@ Auto-detected by `create-test` before invoking this skill:
105
105
  2. Each row / bullet / item = 1 viewpoint → add to `Viewpoint items` in Coverage Map.
106
106
  3. Do NOT pre-classify into buckets before scanning — classify only when
107
107
  writing the scenario.
108
- 4. **If it declares viewpoint IDs** (e.g. `VP0`, `VP1`…`VP12`, `MS-HP-001`), capture each
109
- item WITH its ID and **reuse that ID as the scenario code** — do not invent a generic
110
- `VP-<CAT>` scheme (the harness Taxonomy-match gate FAILs on mismatch).
111
- - `qa/context.md` — project-wide context set by the QA lead. Read ONCE before building the Coverage Map; apply to every screen. Extraction rules:
112
- - **Roles** → for each role in the table: add to the `@auth:X` tag pool; generate a VP-SEC blocked-access scenario for every role boundary relevant to this screen.
113
- - **Testing strategy → Focus areas** → if `security` listed: VP-SEC is mandatory Tier 1 for every free-text input regardless of spec risk level; if `ui` not listed: all VP-UI scenarios move to Tier 2 minimum.
114
- - **Testing strategy → Mandatory coverage** → each line is a hard override applied to this screen regardless of spec risk; document in `Context constraints` of the Coverage Map.
115
- - **Testing strategy → Deprioritize/skip** → record in `Context constraints`; suppress those VP categories from Tier 1 generation.
116
- - **Global business rules** → add each to the `Business rules` section tagged `[G]` (e.g. `[G1 – soft-delete only]`); treat as `HIGH` risk unless stated otherwise.
117
- - **Error patterns** → use as fallback only when `spec.md` does not give exact error text; never override spec-specified messages.
118
- - If `qa/context.md` is absent: proceed without it — no impact on the generation flow.
119
108
 
120
109
  **Single screen focus**: one URL = one screen. Modals on same page = part of this screen.
121
110
  This means: do not test other screens' UI layout or navigation. It does NOT mean skip documenting business outcomes that your screen's actions cause on other surfaces. Those cross-surface outcomes must appear in the Coverage Map and be covered by at least `@manual` scenarios.
@@ -140,11 +129,6 @@ Read `spec.md` fully, then extract into a Coverage Map **before writing any scen
140
129
  **Risk tags:** HIGH = complex business rules, cascading fields, multi-step state changes, auth/integration. LOW = display-only, static labels, read-only fields.
141
130
 
142
131
  ```
143
- Context constraints: [populated from qa/context.md before writing any scenario]
144
- roles: [list roles, e.g. admin / manager / staff]
145
- strategy: [active overrides, e.g. "VP-SEC mandatory T1", "VP-UI → T2 only"]
146
- global rules: [G1 – ...] → also appear in Business rules below tagged [G]
147
- → leave empty if qa/context.md is absent or has no entries applicable to this screen
148
132
  User journeys: [J1 – ...], [J2 – ...]
149
133
  Validation rules: [V1 – field → "exact error text"], [V2 – ...]
150
134
  Business rules: [B1 HIGH – ...], [B2 LOW – ...]
@@ -237,7 +221,7 @@ Security: [S1 – admin only]
237
221
  | **auth** | valid-login · invalid-credential · access-control |
238
222
 
239
223
  **Required assertion shapes (use these, not bare visibility):**
240
- - Card info: assert at **card level** (image+name+price together), e.g. `User see all [Product Card] contain {{...}}` — not `see [Section]` (section-level passes even if one card lacks price).
224
+ - Card info: assert at **card level** (image+name+price together), e.g. `User see all [Product Card] contain {{...}}` — not `see [Section]`.
241
225
  - Cross-screen consistency (detail/cart): **capture then compare** —
242
226
  ```gherkin
243
227
  When User remember [Product Name] text as {{selected_product_name}}
@@ -255,33 +239,13 @@ Security: [S1 – admin only]
255
239
  - **If the spec lacks the concrete value** a deep assertion needs (exact message, price, count): still write the deep shape with a `{{var}}` placeholder and leave a `# SPEC-GAP: <field> value not in spec` comment — do **not** downgrade to `see [X] section`. A visible gap is better than a silent shallow pass.
256
240
  - **Blind-Spot Memory:** before finishing, run `sungen blindspot list --prompt` (Bash) and make sure the suite satisfies each recorded pattern (e.g. "for any Add/Create action: check success + resulting data state + duplicate/double-submit"). These are gaps QA hit before — don't repeat them.
257
241
 
258
- **First-pass anti-patterns (these are exactly what the gate/reviewer reject — avoid them):**
259
- - Title↔steps mismatch: e.g. a "no-result state" scenario that clicks a query which **returns** products. Steps must create the condition the title claims.
260
- - Tautology `Then`: `click [Next Slide]` → `see [Carousel] section` (always visible, proves nothing). Assert the change (new slide title differs).
261
- - Business-critical scenario ending at `see [Added] modal` / `see [Cart] page` / `see [Category Products] page` with no data assertion.
242
+ **First-pass anti-patterns (exactly what the gate/reviewer reject — avoid them):**
243
+ - Title↔steps mismatch (e.g. a "no-result" scenario that clicks a query which returns products).
244
+ - Tautology `Then`: `click [Next Slide]` → `see [Carousel] section` (proves nothing).
245
+ - Business-critical scenario ending at `see [Added] modal` / `see [Cart] page` with no data assertion.
262
246
  - Brand filter covered only as navigation (must assert products belong to the brand).
263
247
 
264
- **Balance:** cover all the above (deep) BEFORE expanding subscription / UI-presence / extra validation edge cases. Do not over-invest in subscription while cart/detail/filter correctness are shallow.
265
-
266
- #### Harness gates — satisfy on the FIRST pass (don't make the repair loop fix them)
267
-
268
- `sungen audit` enforces these. Generate compliant output up front:
269
-
270
- 1. **Taxonomy-match** (`VP-TAXONOMY-MISMATCH`, gate-FAIL) — when `test-viewpoint.md` declares its own viewpoint IDs (e.g. `VP0`, `VP1`, … `VP12`, `MS-HP-001`, `MS-EH-001`), **reuse those IDs verbatim as the scenario codes**. Do NOT invent a generic `VP-UI / VP-LOGIC / VP-VAL` scheme — that breaks the coverage matrix. Only fall back to `VP-<CATEGORY>-<NNN>` when the viewpoint file declares no IDs.
271
- 2. **Spec-coverage triggers** (`TRIGGER-UNCOVERED`, gate-FAIL) — the Validation-Rules table lists a **trigger** per constraint (e.g. `blur, submit`). Generate one scenario **per (constraint × trigger)** — a `format` rule validating *on blur AND on submit* needs BOTH a blur scenario (`press Tab`) and a submit scenario (`click [Submit]` / `press Enter`). Never collapse the trigger × input matrix to one representative case.
272
- 3. **Claim-Proof** (`CLAIM-UNPROVEN`) — a title claiming `all`/`only`/`every`/`single`/`correct`/`same`/`changes`/`hidden`/`cleared`/`restored`/`independent`/`sanitized`/`announces` MUST have the matching assertion (`see all …`, count, `remember`+compare, `is hidden`, return-and-assert-empty, etc.). If the title promises it, the steps must prove it.
273
- 4. **Downstream-scope** (`DOWNSTREAM-SCOPE-MISSING`) — when the spec's Navigation Flow / success target is **another screen** (e.g. a confirmation/sent page), don't stop at a terminal `see [X] page`. Either cover that screen's content/guards (if its viewpoint items are in scope — they often have their own `MS-*` IDs), or scaffold it (`sungen add --screen <name>`) and note the handoff. Do not silently drop the downstream surface.
274
- 5. **Manual-oracle** (`MANUAL-STEPS-INSUFFICIENT`) — every `@manual` scenario needs **setup · action · observable expected · oracle/tool**, not a one-line note. Use this comment shape:
275
- ```gherkin
276
- @high @manual
277
- Scenario: VP-… <claim>
278
- # MANUAL: <why it can't be automated — needs network capture / inbox / screen-reader / multi-tab>
279
- # Tester verifies:
280
- # 1. <setup> e.g. seed a registered email; throttle the network
281
- # 2. <action> e.g. click [Submit] with the request in flight
282
- # 3. <observable> e.g. only ONE POST is dispatched
283
- # 4. Oracle: <tool> e.g. DevTools Network panel / mail-catcher / NVDA
284
- ```
248
+ **Balance:** cover all the above (deep) BEFORE expanding subscription / UI-presence / extra validation edge cases.
285
249
 
286
250
  #### Tier 1 guard — minimum before writing scenarios
287
251
 
@@ -399,7 +363,7 @@ Add cleanup tags per the `sungen-gherkin-syntax` Cleanup table. Key rules:
399
363
  **Files:** `qa/screens/<screen>/features/<screen>.feature` + `qa/screens/<screen>/test-data/<screen>.yaml`
400
364
 
401
365
  Use step patterns and element types from `sungen-gherkin-syntax`.
402
- **Naming**: reuse the **project's `test-viewpoint.md` IDs** when it declares them (e.g. `VP0`, `MS-HP-001`); otherwise `VP-<CATEGORY>-<NNN>`. Scenario name must use the **same element type** as the steps.
366
+ **Naming**: `VP-<CATEGORY>-<NNN>`. Scenario name must use the **same element type** as the steps.
403
367
 
404
368
  **Test data** — grouped by section, loaded at runtime:
405
369
 
@@ -1,3 +0,0 @@
1
- import { Command } from 'commander';
2
- export declare function registerEvalCommand(program: Command): void;
3
- //# sourceMappingURL=eval.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/eval.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAGpC,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAwB1D"}
@@ -1,37 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.registerEvalCommand = registerEvalCommand;
4
- const skill_lint_1 = require("../../harness/eval/skill-lint");
5
- function registerEvalCommand(program) {
6
- program
7
- .command('eval')
8
- .description('Eval harness: quality checks on Sungen\'s own skills/instructions (dev/CI)')
9
- .option('--skills', 'Static skill-lint: frontmatter, line budget, claude↔github sync, registration')
10
- .option('--dir <path>', 'Templates dir to lint (default: bundled ai-instructions)')
11
- .option('--json', 'Output the raw findings JSON')
12
- .action((options) => {
13
- try {
14
- if (!options.skills)
15
- throw new Error('Provide --skills (the only eval mode today)');
16
- const dir = options.dir || (0, skill_lint_1.defaultSkillDir)();
17
- const r = (0, skill_lint_1.lintSkills)(dir);
18
- if (options.json) {
19
- console.log(JSON.stringify(r, null, 2));
20
- process.exit(r.errors > 0 ? 2 : 0);
21
- }
22
- console.log('');
23
- console.log(`━━━ Skill-lint: ${r.checked} skill template(s) ━━━`);
24
- if (!r.findings.length)
25
- console.log(' ✓ all skills pass (frontmatter · line-budget · variant-sync · registration)');
26
- for (const f of r.findings)
27
- console.log(` ${f.level === 'error' ? '✗' : '⚠'} [${f.rule}] ${f.file} — ${f.detail}`);
28
- console.log('');
29
- process.exit(r.errors > 0 ? 2 : 0);
30
- }
31
- catch (error) {
32
- console.error('Error:', error instanceof Error ? error.message : error);
33
- process.exit(1);
34
- }
35
- });
36
- }
37
- //# sourceMappingURL=eval.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"eval.js","sourceRoot":"","sources":["../../../src/cli/commands/eval.ts"],"names":[],"mappings":";;AAGA,kDAwBC;AA1BD,8DAA4E;AAE5E,SAAgB,mBAAmB,CAAC,OAAgB;IAClD,OAAO;SACJ,OAAO,CAAC,MAAM,CAAC;SACf,WAAW,CAAC,4EAA4E,CAAC;SACzF,MAAM,CAAC,UAAU,EAAE,+EAA+E,CAAC;SACnG,MAAM,CAAC,cAAc,EAAE,0DAA0D,CAAC;SAClF,MAAM,CAAC,QAAQ,EAAE,8BAA8B,CAAC;SAChD,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE;QAClB,IAAI,CAAC;YACH,IAAI,CAAC,OAAO,CAAC,MAAM;gBAAE,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;YACpF,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,IAAA,4BAAe,GAAE,CAAC;YAC7C,MAAM,CAAC,GAAG,IAAA,uBAAU,EAAC,GAAG,CAAC,CAAC;YAC1B,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;gBAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAAC,CAAC;YAClG,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC,OAAO,wBAAwB,CAAC,CAAC;YAClE,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM;gBAAE,OAAO,CAAC,GAAG,CAAC,+EAA+E,CAAC,CAAC;YACrH,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC,QAAQ;gBAAE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,KAAK,KAAK,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;YACpH,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAChB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACxE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC,CAAC,CAAC;AACP,CAAC"}
@@ -1,16 +0,0 @@
1
- export interface SkillLintFinding {
2
- level: 'error' | 'warn';
3
- file: string;
4
- rule: string;
5
- detail: string;
6
- }
7
- export interface SkillLintResult {
8
- checked: number;
9
- findings: SkillLintFinding[];
10
- errors: number;
11
- }
12
- /** Lint the AI-instruction templates in `dir` (default: the sungen source templates). */
13
- export declare function lintSkills(dir: string): SkillLintResult;
14
- /** Default templates dir, resolved relative to this module (works from src via tsx and dist). */
15
- export declare function defaultSkillDir(): string;
16
- //# sourceMappingURL=skill-lint.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"skill-lint.d.ts","sourceRoot":"","sources":["../../../src/harness/eval/skill-lint.ts"],"names":[],"mappings":"AAiBA,MAAM,WAAW,gBAAgB;IAAG,KAAK,EAAE,OAAO,GAAG,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;AACzG,MAAM,WAAW,eAAe;IAAG,OAAO,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,gBAAgB,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;AAWlG,yFAAyF;AACzF,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,eAAe,CAkDvD;AAED,iGAAiG;AACjG,wBAAgB,eAAe,IAAI,MAAM,CAGxC"}