@sun-asterisk/sungen 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/dist/cli/commands/audit.d.ts.map +1 -1
  2. package/dist/cli/commands/audit.js +24 -0
  3. package/dist/cli/commands/audit.js.map +1 -1
  4. package/dist/cli/commands/delivery.d.ts.map +1 -1
  5. package/dist/cli/commands/delivery.js +30 -14
  6. package/dist/cli/commands/delivery.js.map +1 -1
  7. package/dist/cli/commands/eval.d.ts +3 -0
  8. package/dist/cli/commands/eval.d.ts.map +1 -0
  9. package/dist/cli/commands/eval.js +37 -0
  10. package/dist/cli/commands/eval.js.map +1 -0
  11. package/dist/cli/commands/ingest.d.ts +3 -0
  12. package/dist/cli/commands/ingest.d.ts.map +1 -0
  13. package/dist/cli/commands/ingest.js +179 -0
  14. package/dist/cli/commands/ingest.js.map +1 -0
  15. package/dist/cli/index.js +4 -0
  16. package/dist/cli/index.js.map +1 -1
  17. package/dist/dashboard/templates/index.html +108 -194
  18. package/dist/harness/audit.d.ts +16 -0
  19. package/dist/harness/audit.d.ts.map +1 -1
  20. package/dist/harness/audit.js +68 -4
  21. package/dist/harness/audit.js.map +1 -1
  22. package/dist/harness/capability-plan.d.ts +6 -0
  23. package/dist/harness/capability-plan.d.ts.map +1 -1
  24. package/dist/harness/capability-plan.js +13 -0
  25. package/dist/harness/capability-plan.js.map +1 -1
  26. package/dist/harness/eval/skill-lint.d.ts +16 -0
  27. package/dist/harness/eval/skill-lint.d.ts.map +1 -0
  28. package/dist/harness/eval/skill-lint.js +129 -0
  29. package/dist/harness/eval/skill-lint.js.map +1 -0
  30. package/dist/harness/parse.d.ts +6 -0
  31. package/dist/harness/parse.d.ts.map +1 -1
  32. package/dist/harness/parse.js +18 -3
  33. package/dist/harness/parse.js.map +1 -1
  34. package/dist/harness/quality-gates.d.ts +29 -0
  35. package/dist/harness/quality-gates.d.ts.map +1 -0
  36. package/dist/harness/quality-gates.js +183 -0
  37. package/dist/harness/quality-gates.js.map +1 -0
  38. package/dist/harness/sensors.d.ts.map +1 -1
  39. package/dist/harness/sensors.js +85 -6
  40. package/dist/harness/sensors.js.map +1 -1
  41. package/dist/harness/spec-coverage.d.ts +37 -0
  42. package/dist/harness/spec-coverage.d.ts.map +1 -0
  43. package/dist/harness/spec-coverage.js +159 -0
  44. package/dist/harness/spec-coverage.js.map +1 -0
  45. package/dist/harness/viewpoint-ledger.d.ts +23 -0
  46. package/dist/harness/viewpoint-ledger.d.ts.map +1 -0
  47. package/dist/harness/viewpoint-ledger.js +118 -0
  48. package/dist/harness/viewpoint-ledger.js.map +1 -0
  49. package/dist/ingest/baseline-audit.d.ts +38 -0
  50. package/dist/ingest/baseline-audit.d.ts.map +1 -0
  51. package/dist/ingest/baseline-audit.js +85 -0
  52. package/dist/ingest/baseline-audit.js.map +1 -0
  53. package/dist/ingest/gsheet-fetch.d.ts +9 -0
  54. package/dist/ingest/gsheet-fetch.d.ts.map +1 -0
  55. package/dist/ingest/gsheet-fetch.js +180 -0
  56. package/dist/ingest/gsheet-fetch.js.map +1 -0
  57. package/dist/ingest/index.d.ts +6 -0
  58. package/dist/ingest/index.d.ts.map +1 -0
  59. package/dist/ingest/index.js +22 -0
  60. package/dist/ingest/index.js.map +1 -0
  61. package/dist/ingest/legacy-parser.d.ts +39 -0
  62. package/dist/ingest/legacy-parser.d.ts.map +1 -0
  63. package/dist/ingest/legacy-parser.js +218 -0
  64. package/dist/ingest/legacy-parser.js.map +1 -0
  65. package/dist/ingest/reconcile.d.ts +30 -0
  66. package/dist/ingest/reconcile.d.ts.map +1 -0
  67. package/dist/ingest/reconcile.js +65 -0
  68. package/dist/ingest/reconcile.js.map +1 -0
  69. package/dist/ingest/to-gherkin.d.ts +33 -0
  70. package/dist/ingest/to-gherkin.d.ts.map +1 -0
  71. package/dist/ingest/to-gherkin.js +93 -0
  72. package/dist/ingest/to-gherkin.js.map +1 -0
  73. package/dist/orchestrator/ai-rules-updater.d.ts.map +1 -1
  74. package/dist/orchestrator/ai-rules-updater.js +2 -0
  75. package/dist/orchestrator/ai-rules-updater.js.map +1 -1
  76. package/dist/orchestrator/templates/ai-instructions/claude-agent-reviewer.md +1 -0
  77. package/dist/orchestrator/templates/ai-instructions/claude-skill-delivery.md +10 -0
  78. package/dist/orchestrator/templates/ai-instructions/claude-skill-ingest-legacy.md +79 -0
  79. package/dist/orchestrator/templates/ai-instructions/claude-skill-tc-generation.md +25 -1
  80. package/dist/orchestrator/templates/ai-instructions/github-skill-sungen-delivery.md +10 -0
  81. package/dist/orchestrator/templates/ai-instructions/github-skill-sungen-ingest-legacy.md +79 -0
  82. package/dist/orchestrator/templates/ai-instructions/github-skill-sungen-tc-generation.md +44 -7
  83. package/package.json +3 -3
  84. package/src/cli/commands/audit.ts +19 -0
  85. package/src/cli/commands/delivery.ts +31 -15
  86. package/src/cli/commands/eval.ts +28 -0
  87. package/src/cli/commands/ingest.ts +141 -0
  88. package/src/cli/index.ts +4 -0
  89. package/src/dashboard/templates/index.html +108 -194
  90. package/src/harness/audit.ts +81 -4
  91. package/src/harness/capability-plan.ts +11 -0
  92. package/src/harness/eval/skill-lint.ts +87 -0
  93. package/src/harness/parse.ts +19 -3
  94. package/src/harness/quality-gates.ts +152 -0
  95. package/src/harness/sensors.ts +84 -7
  96. package/src/harness/spec-coverage.ts +139 -0
  97. package/src/harness/viewpoint-ledger.ts +80 -0
  98. package/src/ingest/baseline-audit.ts +100 -0
  99. package/src/ingest/gsheet-fetch.ts +152 -0
  100. package/src/ingest/index.ts +5 -0
  101. package/src/ingest/legacy-parser.ts +184 -0
  102. package/src/ingest/reconcile.ts +80 -0
  103. package/src/ingest/to-gherkin.ts +108 -0
  104. package/src/orchestrator/ai-rules-updater.ts +2 -0
  105. package/src/orchestrator/templates/ai-instructions/claude-agent-reviewer.md +1 -0
  106. package/src/orchestrator/templates/ai-instructions/claude-skill-delivery.md +10 -0
  107. package/src/orchestrator/templates/ai-instructions/claude-skill-ingest-legacy.md +79 -0
  108. package/src/orchestrator/templates/ai-instructions/claude-skill-tc-generation.md +25 -1
  109. package/src/orchestrator/templates/ai-instructions/github-skill-sungen-delivery.md +10 -0
  110. package/src/orchestrator/templates/ai-instructions/github-skill-sungen-ingest-legacy.md +79 -0
  111. package/src/orchestrator/templates/ai-instructions/github-skill-sungen-tc-generation.md +44 -7
@@ -7,6 +7,7 @@
7
7
  * docs/orchestration-spec.md §5 and reports/sungen_home_gherkin_viewpoint_coverage_review.md.
8
8
  */
9
9
  import * as path from 'path';
10
+ import * as fs from 'fs';
10
11
  import { loadScenarios, parseViewpointOverview, ScenarioInfo, ViewpointEntry } from './parse';
11
12
  import {
12
13
  loadCatalog, viewpointGate, assertionDepth, dataThemesFor, coverageBalance, duplicateClusters, traceability, claimProof, taxonomyLint,
@@ -14,6 +15,10 @@ import {
14
15
  } from './sensors';
15
16
  import { readIntent, projectRootFromScreenDir, IntentProfile } from './intent';
16
17
  import { getProvenance, Provenance } from './provenance';
18
+ import { specCoverage, SpecCoverageResult, parseSpecClauses } from './spec-coverage';
19
+ import { downstreamScope, manualOracle, readText, DownstreamResult, ManualOracleResult,
20
+ negativeSideEffect, sourceBacked, crossArtifactOwnership } from './quality-gates';
21
+ import { viewpointLedger, parseViewpointItems, LedgerResult } from './viewpoint-ledger';
17
22
 
18
23
  export interface AuditReport {
19
24
  screen: string;
@@ -25,6 +30,15 @@ export interface AuditReport {
25
30
  balance: BalanceResult;
26
31
  duplicates: DuplicateResult;
27
32
  trace: TraceResult;
33
+ taxonomyMismatch: boolean; // scenarios use IDs not in the project's test-viewpoint.md
34
+ downstream: DownstreamResult; // downstream screens referenced but under-covered
35
+ manualOracle: ManualOracleResult; // @manual scenarios lacking setup/action/oracle
36
+ ledger: LedgerResult; // atomic viewpoint-item coverage (per-bullet status)
37
+ calibration: { // #8 — multi-axis score so a high overall can't hide a weak axis
38
+ axes: Record<string, number>;
39
+ weakest: { axis: string; value: number };
40
+ inflated: boolean;
41
+ };
28
42
  score: {
29
43
  overall: number; // 0..10, business-weighted
30
44
  coverage: number; // 0..1
@@ -37,15 +51,20 @@ export interface AuditReport {
37
51
  findings: string[]; // human-actionable, what the Repair loop would target
38
52
  intent: IntentProfile; // P3 — the intent profile that drove the thresholds
39
53
  provenance: Provenance; // D1 — sungen version + catalog hash (diagnose cross-user score gaps)
54
+ spec: SpecCoverageResult; // G2 — spec-clause coverage (FR + validation-trigger matrix)
40
55
  }
41
56
 
42
57
  export function runAudit(screenDir: string, screenName: string): AuditReport {
43
58
  const featurePath = path.join(screenDir, 'features', `${screenName}.feature`);
44
59
  const viewpointPath = path.join(screenDir, 'requirements', 'test-viewpoint.md');
45
60
 
61
+ const specPath = path.join(screenDir, 'requirements', 'spec.md');
62
+ const featureText = fs.existsSync(featurePath) ? fs.readFileSync(featurePath, 'utf-8') : '';
63
+
46
64
  const scenarios: ScenarioInfo[] = loadScenarios(featurePath);
47
65
  const viewpoints: ViewpointEntry[] = parseViewpointOverview(viewpointPath);
48
66
  const catalog = loadCatalog();
67
+ const spec = specCoverage(specPath, scenarios, featureText);
49
68
 
50
69
  const gate = viewpointGate(scenarios, viewpoints, catalog);
51
70
  // P3 — intent profile from qa/context.md drives the depth threshold (focus).
@@ -56,6 +75,15 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
56
75
  const balance = coverageBalance(scenarios);
57
76
  const duplicates = duplicateClusters(scenarios);
58
77
  const trace = traceability(scenarios, viewpoints);
78
+ // #1 taxonomy-match: when the project defines a viewpoint taxonomy, scenarios must use it.
79
+ const taxonomyMismatch = viewpoints.length > 0 && trace.withVpCode > 0 && trace.mappedRatio < 0.6;
80
+ // #2 downstream-scope + #4 manual-oracle
81
+ const downstream = downstreamScope(readText(specPath), scenarios);
82
+ const manualOracleResult = manualOracle(featureText);
83
+ const ledger = viewpointLedger(viewpointPath, scenarios, featureText);
84
+ const negSideEffect = negativeSideEffect(scenarios);
85
+ const ownership = crossArtifactOwnership(screenDir, scenarios);
86
+ const unsourced = sourceBacked(scenarios, parseSpecClauses(specPath).frs.map((f) => f.id), parseViewpointItems(viewpointPath).map((i) => i.text), viewpoints.map((v) => v.id), featureText);
59
87
 
60
88
  // Sub-scores
61
89
  const coverage = gate.coverageRatio;
@@ -100,16 +128,65 @@ export function runAudit(screenDir: string, screenName: string): AuditReport {
100
128
  if (gate.universalGaps.length) {
101
129
  findings.push(`UNIVERSAL: missing theme(s): ${gate.universalGaps.join(', ')} (low priority reminder).`);
102
130
  }
131
+ for (const g of spec.triggerGaps) {
132
+ findings.push(`TRIGGER-UNCOVERED: spec validates "${g.constraint}"${g.code ? ` (${g.code})` : ''} on [${g.required.join(', ')}] but scenarios only exercise it on [${g.found.join(', ') || 'none'}] → add a ${g.missing.join(', ')}-trigger scenario for this constraint (don't collapse the trigger × input matrix).`);
133
+ }
134
+ for (const u of spec.uncoveredMust) {
135
+ findings.push(`SPEC-UNCOVERED: ${u.id} (MUST) has no covering scenario — "${u.text}" → add a scenario or tag one @spec:${u.id}.`);
136
+ }
137
+ if (taxonomyMismatch) {
138
+ findings.push(`VP-TAXONOMY-MISMATCH: only ${(trace.mappedRatio * 100).toFixed(0)}% of scenarios use the viewpoint IDs declared in test-viewpoint.md — scenarios invented a generic VP-<CAT> scheme. Re-tag to the project's viewpoint IDs so the coverage matrix is accurate.`);
139
+ }
140
+ for (const d of downstream.underCovered) {
141
+ findings.push(`DOWNSTREAM-SCOPE-MISSING: "${d.route}" is a navigation target but is covered only by a page-nav assertion — cover its content/guards, or scaffold it (\`sungen add --screen ${d.slug}\`).`);
142
+ }
143
+ for (const m of manualOracleResult.insufficient.slice(0, 8)) {
144
+ findings.push(`MANUAL-STEPS-INSUFFICIENT: "${m}" — a @manual scenario needs setup · action · observable expected · oracle/tool (not just a one-line note).`);
145
+ }
146
+ if (ledger.hasViewpoint && ledger.missing.length) {
147
+ const sample = ledger.missing.slice(0, 6).map((m) => m.id || `"${m.text}"`).join(', ');
148
+ findings.push(`VIEWPOINT-ITEM-MISSING: ${ledger.missing.length}/${ledger.total} atomic viewpoint items have no covering scenario (${(ledger.ratio * 100).toFixed(0)}% covered) — e.g. ${sample}. Cover each item or mark it deferred/spec-gap.`);
149
+ }
150
+ for (const n of negSideEffect.slice(0, 6)) {
151
+ findings.push(`NEGATIVE-SIDE-EFFECT-UNPROVEN: "${n}" — the title claims something must NOT happen but the steps don't prove the absence (assert a count / negative state, or make it @manual with an oracle).`);
152
+ }
153
+ for (const d of ownership.duplicates.slice(0, 6)) {
154
+ findings.push(`DUPLICATE-FLOW-OWNERSHIP: "${d.scenario}" has the same shape as a scenario in flow "${d.flow}" — keep one owner (screen-local vs flow); the other should only reference/set up.`);
155
+ }
156
+ for (const u of unsourced.slice(0, 6)) {
157
+ findings.push(`UNSOURCEABLE-SCENARIO: "${u}" doesn't trace to any FR / viewpoint item — link it to a source, or tag it @exploration (not part of the official suite).`);
158
+ }
159
+
160
+ // #8 — multi-axis calibration: a high overall must not hide a weak axis.
161
+ const manualCompleteness = manualOracleResult.manualTotal
162
+ ? 1 - manualOracleResult.insufficient.length / manualOracleResult.manualTotal : 1;
163
+ const axes: Record<string, number> = {
164
+ coverage: Math.round(coverage * 100) / 100,
165
+ businessDepth: Math.round(businessDepth * 100) / 100,
166
+ claimProof: Math.round(claim.ratio * 100) / 100,
167
+ specFR: spec.frTotal ? Math.round((spec.frCovered / spec.frTotal) * 100) / 100 : 1,
168
+ atomicLedger: Math.round(ledger.ratio * 100) / 100,
169
+ manualOracle: Math.round(manualCompleteness * 100) / 100,
170
+ taxonomy: taxonomyMismatch ? 0 : Math.round(trace.mappedRatio * 100) / 100,
171
+ };
172
+ const weakestEntry = Object.entries(axes).sort((a, b) => a[1] - b[1])[0];
173
+ const weakest = { axis: weakestEntry[0], value: weakestEntry[1] };
174
+ const inflated = overall >= 8 && weakest.value < 0.6;
175
+ if (inflated) {
176
+ findings.push(`SCORE-INFLATED-BY-BREADTH: overall ${Math.round(overall * 10) / 10}/10 but the weakest axis "${weakest.axis}" is ${(weakest.value * 100).toFixed(0)}% — breadth is hiding a weak dimension. Raise "${weakest.axis}" before trusting the headline.`);
177
+ }
178
+ const calibration = { axes, weakest, inflated };
103
179
 
104
- // Gate now spans coverage (viewpoint themes) AND depth (data-correctness).
105
- // A depth 'fail' (below the intent threshold) fails the gate; 'warn' does not.
180
+ // Gate spans coverage (viewpoint themes), depth, claim-proof, spec-clause coverage,
181
+ // AND taxonomy-match (scenarios must use the project's viewpoint IDs when defined).
106
182
  const gateStatus: 'PASS' | 'FAIL' =
107
- gate.gaps.length === 0 && depth.verdict !== 'fail' && claim.verdict !== 'fail' ? 'PASS' : 'FAIL';
183
+ gate.gaps.length === 0 && depth.verdict !== 'fail' && claim.verdict !== 'fail' && spec.verdict !== 'fail' && !taxonomyMismatch ? 'PASS' : 'FAIL';
108
184
 
109
185
  return {
110
186
  screen: screenName,
111
187
  scenarioCount: scenarios.length,
112
- gate, depth, claim, taxonomy, balance, duplicates, trace,
188
+ gate, depth, claim, taxonomy, balance, duplicates, trace, spec,
189
+ taxonomyMismatch, downstream, manualOracle: manualOracleResult, ledger, calibration,
113
190
  score: {
114
191
  overall: Math.round(overall * 10) / 10,
115
192
  coverage: Math.round(coverage * 100) / 100,
@@ -43,6 +43,17 @@ const INFER: { code: string; re: RegExp }[] = [
43
43
  { code: 'M9', re: /\b(judgment|human|subjective|manual review)\b/i },
44
44
  ];
45
45
 
46
+ /**
47
+ * Classify free text (e.g. a legacy testcase's precondition+steps+expected) into a
48
+ * manual-reason code, or '' when nothing matches (→ UI-automatable). Reuses the same
49
+ * INFER patterns as the scenario planner so legacy-ingest and the Gherkin planner agree.
50
+ */
51
+ export function classifyReason(text: string): string {
52
+ const t = (text || '').toLowerCase();
53
+ for (const { code, re } of INFER) if (re.test(t)) return code;
54
+ return '';
55
+ }
56
+
46
57
  interface ParsedScenario { name: string; tags: string[]; manual: boolean; reason: string }
47
58
 
48
59
  /** Parse scenarios with their tags + the reason comment line above (for @manual). */
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Static skill-lint (Eval Harness L1) — deterministic quality checks on Sungen's OWN
3
+ * AI-instruction templates, so a broken / unregistered / oversized skill fails before it
4
+ * ships. Learned (generically) from the "static validations" tier of an agent-kit evals
5
+ * layer. No project data — this lints the sungen package's own templates.
6
+ *
7
+ * Design note: the checks are MAPPING-DRIVEN. `AI_RULES_FILE_MAPPING` is the source of
8
+ * truth for what each template installs as, so the lint uses the install target (does it
9
+ * end in `/SKILL.md`?) to tell a top-level skill from a sub-content fragment — instead of
10
+ * guessing from filenames. We deliberately do NOT enforce claude↔github body parity: the
11
+ * two variants are hand-tuned per platform and intentionally diverge in wording and even
12
+ * structure, so byte/heading equality would be pure false positives.
13
+ */
14
+ import * as fs from 'fs';
15
+ import * as path from 'path';
16
+ import { AI_RULES_FILE_MAPPING } from '../../orchestrator/ai-rules-updater';
17
+
18
+ export interface SkillLintFinding { level: 'error' | 'warn'; file: string; rule: string; detail: string }
19
+ export interface SkillLintResult { checked: number; findings: SkillLintFinding[]; errors: number }
20
+
21
+ const LINE_BUDGET = 700; // a skill much larger than this is a context-cost smell (warn)
22
+ const SKILL_RE = /^(claude|github)-skill-/;
23
+
24
+ function stripFrontmatter(text: string): { fm: string | null; body: string } {
25
+ const m = text.match(/^---\n([\s\S]*?)\n---\n?/);
26
+ if (!m) return { fm: null, body: text };
27
+ return { fm: m[1], body: text.slice(m[0].length) };
28
+ }
29
+
30
+ /** Lint the AI-instruction templates in `dir` (default: the sungen source templates). */
31
+ export function lintSkills(dir: string): SkillLintResult {
32
+ const findings: SkillLintFinding[] = [];
33
+ const files = fs.existsSync(dir) ? fs.readdirSync(dir).filter((f) => f.endsWith('.md')) : [];
34
+ const skillFiles = files.filter((f) => SKILL_RE.test(f));
35
+
36
+ // mapping: template file -> install target (source of truth for "is this a top-level skill")
37
+ const target = new Map<string, string>(AI_RULES_FILE_MAPPING.map(([tpl, dst]) => [tpl, dst]));
38
+ const isTopLevelSkill = (f: string) => (target.get(f) || '').endsWith('/SKILL.md');
39
+
40
+ // 1) registration integrity (bidirectional) — the highest-value check:
41
+ // a skill file missing from the mapping never installs; a mapping to a missing file
42
+ // ships a broken/empty skill.
43
+ for (const f of skillFiles) {
44
+ if (!target.has(f)) findings.push({ level: 'error', file: f, rule: 'unregistered', detail: 'skill template not in AI_RULES_FILE_MAPPING (it would never be installed)' });
45
+ }
46
+ for (const [tpl] of AI_RULES_FILE_MAPPING) {
47
+ if (!fs.existsSync(path.join(dir, tpl))) findings.push({ level: 'error', file: tpl, rule: 'mapped-missing', detail: 'AI_RULES_FILE_MAPPING points to a template that does not exist' });
48
+ }
49
+
50
+ // 2) frontmatter (name + description) — ONLY for top-level skills (SKILL.md targets).
51
+ // Sub-content fragments (mode-*.md, group-*.md) are loaded by their parent router
52
+ // and legitimately carry no frontmatter.
53
+ for (const f of skillFiles) {
54
+ if (!isTopLevelSkill(f)) continue;
55
+ const text = fs.readFileSync(path.join(dir, f), 'utf8');
56
+ const { fm } = stripFrontmatter(text);
57
+ if (!fm) { findings.push({ level: 'error', file: f, rule: 'frontmatter', detail: 'top-level skill (SKILL.md) is missing --- frontmatter --- (Claude/Copilot will not load it)' }); continue; }
58
+ if (!/\bname\s*:/.test(fm)) findings.push({ level: 'error', file: f, rule: 'frontmatter-name', detail: 'no `name:` in frontmatter' });
59
+ if (!/\bdescription\s*:/.test(fm)) findings.push({ level: 'error', file: f, rule: 'frontmatter-description', detail: 'no `description:` in frontmatter' });
60
+ }
61
+
62
+ // 3) line budget — context-cost smell (advisory).
63
+ for (const f of skillFiles) {
64
+ const lines = fs.readFileSync(path.join(dir, f), 'utf8').split('\n').length;
65
+ if (lines > LINE_BUDGET) findings.push({ level: 'warn', file: f, rule: 'line-budget', detail: `${lines} lines > ${LINE_BUDGET} (context-cost smell)` });
66
+ }
67
+
68
+ // 4) variant PRESENCE (not body equality) — every top-level skill should ship for both
69
+ // platforms. Catches "added a Claude skill but forgot the Copilot variant". Advisory.
70
+ const skillName = (dst: string) => { const m = dst.match(/\/(sungen-[^/]+)\/SKILL\.md$/); return m ? m[1] : null; };
71
+ const claudeSkills = new Set<string>(), githubSkills = new Set<string>();
72
+ for (const f of skillFiles) {
73
+ if (!isTopLevelSkill(f)) continue;
74
+ const name = skillName(target.get(f)!); if (!name) continue;
75
+ (f.startsWith('claude-') ? claudeSkills : githubSkills).add(name);
76
+ }
77
+ for (const n of claudeSkills) if (!githubSkills.has(n)) findings.push({ level: 'warn', file: `claude .../${n}/SKILL.md`, rule: 'variant-missing', detail: `Claude skill "${n}" has no GitHub (Copilot) variant` });
78
+ for (const n of githubSkills) if (!claudeSkills.has(n)) findings.push({ level: 'warn', file: `github .../${n}/SKILL.md`, rule: 'variant-missing', detail: `GitHub skill "${n}" has no Claude variant` });
79
+
80
+ return { checked: skillFiles.length, findings, errors: findings.filter((f) => f.level === 'error').length };
81
+ }
82
+
83
+ /** Default templates dir, resolved relative to this module (works from src via tsx and dist). */
84
+ export function defaultSkillDir(): string {
85
+ // src/harness/eval → src/orchestrator/... | dist/harness/eval → dist/orchestrator/...
86
+ return path.resolve(__dirname, '..', '..', 'orchestrator', 'templates', 'ai-instructions');
87
+ }
@@ -29,6 +29,18 @@ export interface ScenarioInfo {
29
29
  stepSkeleton: string; // normalized steps for duplicate clustering
30
30
  haystack: string; // lowercase name + steps text (for keyword coverage)
31
31
  stepsText: string; // lowercase steps ONLY (name excluded) — for claim-proof
32
+ vpId?: string; // raw leading ID token of the title (project's scheme: VP0-001, MS-HP-001, VP-LIST-001)
33
+ }
34
+
35
+ /** Format-tolerant: is this token an ID (project's scheme), not a prose word?
36
+ * Accepts VP0, VP0-001, MS-HP-001, TV-01, VP-LIST-001 — requires a digit + uppercase start. */
37
+ export function isIdLike(s: string): boolean {
38
+ return /^[A-Z][A-Za-z0-9.-]*$/.test(s) && /\d/.test(s) && s.length >= 3;
39
+ }
40
+
41
+ /** The ID minus its trailing -NNN sequence number (VP0-001 → VP0, MS-HP-001 → MS-HP). */
42
+ export function idPrefix(id: string): string {
43
+ return id.replace(/[-.]\d{1,4}$/, '');
32
44
  }
33
45
 
34
46
  // ---------- test-viewpoint.md ----------
@@ -50,7 +62,7 @@ export function parseViewpointOverview(filePath: string): ViewpointEntry[] {
50
62
  const cells = line.split('|').map((c) => c.trim()).filter((_, i, a) => i > 0 && i < a.length - 1);
51
63
  if (cells.length >= 3) {
52
64
  const id = cells[0];
53
- if (/^VP[-A-Z0-9]/i.test(id) && !/^vp$/i.test(id) && !/^-+$/.test(cells[1])) {
65
+ if (isIdLike(id) && !/^-+$/.test(cells[1])) {
54
66
  const pr = /high/i.test(cells[1]) ? 'High' : /medium/i.test(cells[1]) ? 'Medium' : /low/i.test(cells[1]) ? 'Low' : 'Unknown';
55
67
  entries.set(id.toUpperCase(), { id: id.toUpperCase(), priority: pr as any, reason: cells[2] });
56
68
  }
@@ -66,8 +78,8 @@ export function parseViewpointOverview(filePath: string): ViewpointEntry[] {
66
78
  if (g) { group = (g[1][0].toUpperCase() + g[1].slice(1).toLowerCase()) as any; continue; }
67
79
  if (/^##\s/.test(line)) { group = undefined; }
68
80
  if (group) {
69
- const m = line.match(/^-\s+(VP[-A-Z0-9]+)/i);
70
- if (m) {
81
+ const m = line.match(/^[-*+]\s+([A-Za-z][A-Za-z0-9.-]*)/);
82
+ if (m && isIdLike(m[1])) {
71
83
  const id = m[1].toUpperCase();
72
84
  const existing = entries.get(id);
73
85
  if (existing) existing.group = group;
@@ -92,6 +104,9 @@ function classifyScenario(sc: ParsedScenario): ScenarioInfo {
92
104
  const codeMatch = sc.name.match(/\bVP-([A-Z]+)-\d+/i);
93
105
  const vpCode = codeMatch ? codeMatch[0].toUpperCase() : undefined;
94
106
  const category = codeMatch ? codeMatch[1].toUpperCase() : undefined;
107
+ // Project-scheme ID: the leading token of the title (VP0-001 / MS-HP-001 / VP-LIST-001).
108
+ const leadMatch = sc.name.match(/^\s*([A-Za-z][A-Za-z0-9.-]*)/);
109
+ const vpId = leadMatch && isIdLike(leadMatch[1]) ? leadMatch[1].toUpperCase() : undefined;
95
110
 
96
111
  // Then-phase detection (And/But inherit previous primary keyword)
97
112
  let last = 'Given';
@@ -136,6 +151,7 @@ function classifyScenario(sc: ParsedScenario): ScenarioInfo {
136
151
  stepSkeleton: skeletonParts.join(' | '),
137
152
  haystack: textParts.join(' ').toLowerCase(),
138
153
  stepsText: stepTextParts.join(' ').toLowerCase(),
154
+ vpId,
139
155
  };
140
156
  }
141
157
 
@@ -0,0 +1,152 @@
1
+ /**
2
+ * Quality gates (batch): downstream-scope + manual-oracle + negative-side-effect +
3
+ * cross-artifact ownership + source-backed strictness.
4
+ * Generic — read the project's own spec.md / feature text / sibling flows; no project data.
5
+ */
6
+ import * as fs from 'fs';
7
+ import * as path from 'path';
8
+ import { ScenarioInfo, loadScenarios, idPrefix } from './parse';
9
+
10
+ // ---------- #2 Downstream-scope ----------
11
+
12
+ export interface DownstreamResult {
13
+ downstreamRoutes: string[]; // success/navigation targets ≠ own route
14
+ underCovered: { route: string; slug: string }[]; // referenced only by a bare page-nav
15
+ }
16
+
17
+ /** Routes the spec hands off to (Navigation Flow / success), other than the screen's own route. */
18
+ function downstreamRoutes(specText: string): string[] {
19
+ const ownRoute = (specText.match(/\*\*Route\*\*\s*:\s*`?(\/[^\s`]+)/) || [])[1] || '';
20
+ const routes = new Set<string>();
21
+ for (const line of specText.split('\n')) {
22
+ if (!/success|navigat|to \(|→/i.test(line)) continue;
23
+ for (const m of line.matchAll(/`?(\/[a-z][a-z0-9/_-]+)`?/gi)) {
24
+ const r = m[1];
25
+ if (r !== ownRoute && r.split('/').length > ownRoute.split('/').length - 0) routes.add(r);
26
+ }
27
+ }
28
+ // keep only routes that extend beyond the own route (a distinct downstream surface)
29
+ return [...routes].filter((r) => r !== ownRoute && (!ownRoute || r.startsWith(ownRoute + '/') || r.split('/').length >= 3));
30
+ }
31
+
32
+ export function downstreamScope(specText: string, scenarios: ScenarioInfo[]): DownstreamResult {
33
+ const routes = downstreamRoutes(specText);
34
+ const underCovered: { route: string; slug: string }[] = [];
35
+ for (const route of routes) {
36
+ const slug = (route.split('/').filter(Boolean).pop() || route).toLowerCase();
37
+ const refs = scenarios.filter((s) => s.haystack.includes(slug) || s.haystack.includes(route.toLowerCase()));
38
+ if (!refs.length) continue; // not referenced at all — out of this screen's scope entirely
39
+ // Substantively covered only if some scenario OPERATES on the downstream — i.e. it
40
+ // starts there (`is on [<downstream>]`) — not merely navigates to it as a terminal
41
+ // `see [<downstream>] page` assertion. The latter just proves the transition.
42
+ const opensOn = new RegExp(`\\bis on \\[[^\\]]*${slug}`, 'i');
43
+ const contentCovered = refs.some((s) => opensOn.test(s.haystack));
44
+ if (!contentCovered) underCovered.push({ route, slug });
45
+ }
46
+ return { downstreamRoutes: routes, underCovered };
47
+ }
48
+
49
+ // ---------- #4 Manual-oracle ----------
50
+
51
+ export interface ManualOracleResult {
52
+ manualTotal: number;
53
+ insufficient: string[]; // @manual scenarios lacking setup/action/oracle
54
+ }
55
+
56
+ function blocks(featureText: string): string[] {
57
+ return featureText.split(/\n\s*\n/).filter((b) => /\bScenario:/.test(b));
58
+ }
59
+
60
+ export function manualOracle(featureText: string): ManualOracleResult {
61
+ const insufficient: string[] = [];
62
+ let manualTotal = 0;
63
+ for (const b of blocks(featureText)) {
64
+ if (!/@manual\b/.test(b)) continue;
65
+ manualTotal++;
66
+ const commentLines = b.split('\n').filter((l) => /^\s*#/.test(l));
67
+ const hasOracle = /tester verifies|oracle\s*:|requires|verify that|expected\s*:|steps?\s*:/i.test(b);
68
+ const hasNumberedSteps = /^\s*#?\s*\d+\.\s/m.test(b);
69
+ // sufficient = an oracle/steps marker, OR a substantive comment block (≥3 comment lines)
70
+ if (!(hasOracle || hasNumberedSteps || commentLines.length >= 3)) {
71
+ const name = (b.match(/Scenario:\s*(.+)/) || [])[1] || '(unnamed)';
72
+ insufficient.push(name.trim().slice(0, 80));
73
+ }
74
+ }
75
+ return { manualTotal, insufficient };
76
+ }
77
+
78
+ // ---------- #4 Negative side-effect ----------
79
+
80
+ const NEG_TITLE = /\b(does not|doesn't|no second|not dispatch|not sent|without submitting|no leak|single request|exactly one|count is 1|only one request|no duplicate|not create)\b/i;
81
+
82
+ /** Titles asserting an ABSENCE must prove it (count / negative / @manual+oracle), not just a happy outcome. */
83
+ export function negativeSideEffect(scenarios: ScenarioInfo[]): string[] {
84
+ const flagged: string[] = [];
85
+ for (const s of scenarios) {
86
+ if (s.manual) continue; // @manual is a legitimate deferral (oracle checked by #4 manual-oracle)
87
+ if (!NEG_TITLE.test(s.name)) continue;
88
+ const proven = /\bcount\b|tohavecount|table with|is hidden|are hidden|not complete|message is hidden/.test(s.stepsText);
89
+ if (!proven) flagged.push(s.name.slice(0, 80));
90
+ }
91
+ return flagged;
92
+ }
93
+
94
+ // ---------- #7 Source-backed strictness ----------
95
+
96
+ /** A scenario should trace to a source: a viewpoint ID (its own scheme), an FR id, or a
97
+ * viewpoint item (keyword overlap). ID match is language-agnostic and primary. */
98
+ export function sourceBacked(scenarios: ScenarioInfo[], frIds: string[], viewpointItems: string[], viewpointIds: string[], featureText: string): string[] {
99
+ if (!frIds.length && !viewpointItems.length && !viewpointIds.length) return []; // no contract
100
+ const vpIds = viewpointIds.map((s) => s.toUpperCase());
101
+ const itemWords = viewpointItems.map((t) => new Set((t.toLowerCase().match(/[a-z][a-z-]{4,}/g) || [])));
102
+ // per-scenario blocks (INCLUDING comments) so an FR cited in a comment counts as a source
103
+ const blockOf = new Map<string, string>();
104
+ for (const b of featureText.split(/\n\s*\n/)) {
105
+ const m = b.match(/Scenario:\s*(.+)/);
106
+ if (m) blockOf.set(m[1].trim().toLowerCase(), b.toLowerCase());
107
+ }
108
+ const unsourced: string[] = [];
109
+ for (const s of scenarios) {
110
+ const id = (s.vpId || s.vpCode || '').toUpperCase();
111
+ const mapsId = !!id && vpIds.some((v) => id === v || id.startsWith(v) || v.startsWith(idPrefix(id)));
112
+ const block = blockOf.get(s.name.trim().toLowerCase()) || s.haystack;
113
+ const citesFr = frIds.some((fid) => block.includes(fid.toLowerCase()));
114
+ const sWords = new Set((s.haystack.match(/[a-z][a-z-]{4,}/g) || []));
115
+ const mapsItem = itemWords.some((iw) => { let hits = 0; for (const w of iw) if (sWords.has(w)) hits++; return hits >= 2; });
116
+ if (!mapsId && !citesFr && !mapsItem) unsourced.push(s.name.slice(0, 80));
117
+ }
118
+ return unsourced;
119
+ }
120
+
121
+ // ---------- #6 Cross-artifact ownership ----------
122
+
123
+ export interface OwnershipResult { duplicates: { scenario: string; flow: string }[] }
124
+
125
+ /** Scenarios whose step-skeleton also appears in a sibling flow feature → duplicate ownership. */
126
+ export function crossArtifactOwnership(screenDir: string, scenarios: ScenarioInfo[]): OwnershipResult {
127
+ const duplicates: { scenario: string; flow: string }[] = [];
128
+ // screenDir = <root>/qa/screens/<name>; flows live at <root>/qa/flows/*/features/*.feature
129
+ const flowsRoot = path.resolve(screenDir, '..', '..', 'flows');
130
+ if (!fs.existsSync(flowsRoot)) return { duplicates };
131
+ const bySkeleton = new Map<string, string>();
132
+ for (const flow of fs.readdirSync(flowsRoot)) {
133
+ const fdir = path.join(flowsRoot, flow, 'features');
134
+ if (!fs.existsSync(fdir)) continue;
135
+ for (const f of fs.readdirSync(fdir).filter((x) => x.endsWith('.feature'))) {
136
+ for (const fs2 of loadScenarios(path.join(fdir, f))) {
137
+ if (fs2.stepSkeleton && fs2.stepSkeleton.length > 20) bySkeleton.set(fs2.stepSkeleton, flow);
138
+ }
139
+ }
140
+ }
141
+ if (!bySkeleton.size) return { duplicates };
142
+ for (const s of scenarios) {
143
+ const flow = s.stepSkeleton && s.stepSkeleton.length > 20 ? bySkeleton.get(s.stepSkeleton) : undefined;
144
+ if (flow) duplicates.push({ scenario: s.name.slice(0, 70), flow });
145
+ }
146
+ return { duplicates };
147
+ }
148
+
149
+ // convenience reader
150
+ export function readText(p: string): string {
151
+ return fs.existsSync(p) ? fs.readFileSync(p, 'utf-8') : '';
152
+ }
@@ -9,7 +9,7 @@
9
9
  import * as fs from 'fs';
10
10
  import * as path from 'path';
11
11
  import { parse as parseYaml } from 'yaml';
12
- import { ScenarioInfo, ViewpointEntry } from './parse';
12
+ import { ScenarioInfo, ViewpointEntry, idPrefix } from './parse';
13
13
 
14
14
  // Business-critical category codes (project VP-<CAT> prefixes). Configurable later.
15
15
  const BUSINESS_CRITICAL_CATS = ['LIST', 'CART', 'PRODUCT', 'FILTER', 'CHECKOUT', 'ORDER'];
@@ -263,17 +263,23 @@ export interface TraceResult {
263
263
 
264
264
  export function traceability(scenarios: ScenarioInfo[], viewpoints: ViewpointEntry[]): TraceResult {
265
265
  const overviewIds = new Set(viewpoints.map((v) => v.id.toUpperCase()));
266
- const withCode = scenarios.filter((s) => s.vpCode);
267
- // A scenario maps to overview if its full VP code OR its category-derived id exists in overview.
268
- const mapped = withCode.filter((s) => overviewIds.has(s.vpCode!) || [...overviewIds].some((id) => id.includes(s.category || '###')));
266
+ // A scenario carries an ID if it has a project-scheme leading ID (vpId) or a VP-CAT code.
267
+ const withCode = scenarios.filter((s) => s.vpId || s.vpCode);
268
+ // Maps to overview if the scenario's ID, its sequence-stripped prefix, or its VP-CAT code
269
+ // matches a declared viewpoint ID (format-tolerant: VP0-001↔VP0, MS-HP-001↔MS-HP-001).
270
+ const mapped = withCode.filter((s) => {
271
+ const id = (s.vpId || s.vpCode || '').toUpperCase();
272
+ if (overviewIds.has(id) || overviewIds.has(idPrefix(id))) return true;
273
+ return [...overviewIds].some((oid) => id.startsWith(oid) || oid.startsWith(idPrefix(id)) || (!!s.category && oid.includes(s.category)));
274
+ });
269
275
  return {
270
276
  total: scenarios.length,
271
277
  withVpCode: withCode.length,
272
278
  mappedToOverview: mapped.length,
273
279
  withVpCodeRatio: scenarios.length ? withCode.length / scenarios.length : 0,
274
280
  mappedRatio: scenarios.length ? mapped.length / scenarios.length : 0,
275
- note: mapped.length < withCode.length * 0.5
276
- ? 'Scenarios use ad-hoc VP-<CAT>-NNN codes not linked to viewpoint-overview ids (weak traceability — see review Gate 4).'
281
+ note: withCode.length && mapped.length < withCode.length * 0.5
282
+ ? 'Scenario IDs do not match the viewpoint-overview ids (weak traceability — re-tag to the project viewpoint IDs).'
277
283
  : 'Traceable.',
278
284
  };
279
285
  }
@@ -367,14 +373,85 @@ const CLAIM_RULES: ClaimRule[] = [
367
373
  hint: 'capture the before-state and assert the after-state differs, or assert the visible/hidden transition.',
368
374
  severity: 'warn',
369
375
  },
376
+ {
377
+ // GENERAL — mutation-absence. A title asserts that a STATE-CHANGING action does NOT
378
+ // happen / does not repeat (submit, send, create, charge, order, pay, email, request,
379
+ // OTP, register, book, a re-/double-/again repeat…) paired with a negation in EITHER
380
+ // language. A mutation's absence is NOT observable from a positive `see [X] page` —
381
+ // that page looks identical whether or not the mutation fired — so it MUST prove a
382
+ // count/contrast (record count unchanged) or defer to @manual. This is the general
383
+ // category behind "browser back does not re-submit", "does not re-charge the card",
384
+ // "double-click does not create two orders" — not a per-feature keyword.
385
+ claim: 'no-side-effect/no-duplicate',
386
+ title: /(?=.*\b(submit|sen[dt]|resend|resubmit|re-?fire|re-?issue|re-?post|repost|create|charge|order|payment|\bpay\b|email|request|\botp\b|insert|register|book|duplicate|double[- ]?submit|again|twice)\b)(?=.*(\bno\b|\bnot\b|n['’]t\b|\bnever\b|\bwithout\b|\bcannot\b|prevent|block|avoid|reject|disabl|\bdeny\b|denies|\bkhông\b|\bchưa\b))/i,
387
+ proof: /\bcount\b|row with \{\{|table with|tohavecount|is hidden|are hidden|not complete|no longer/,
388
+ need: 'a record/request-count proof (count stays at one, e.g. `User see [Table] row with {{count}}`) or @manual with a request-count oracle',
389
+ hint: 'a "does-not-happen / does-not-repeat" claim about a state-changing action is NOT proven by a terminal `see [...] page` — that page is identical whether or not the action (re-)fired. Prove the side-effect count is unchanged, or mark @manual with a setup→action→assert-no-duplicate oracle.',
390
+ severity: 'fail',
391
+ },
370
392
  {
371
393
  claim: 'hidden/rejected/not-complete',
372
- title: /\b(hidden|closed|dismiss(es|ed)?|does not|doesn't|not complete|rejected|inert)\b/,
394
+ title: /\b(hidden|closed|dismiss(es|ed)?|not complete|rejected|inert)\b/,
373
395
  proof: /\bis hidden\b|\bare hidden\b|message is hidden|not complete|\bhidden\b/,
374
396
  need: 'a negative / hidden assertion (`… is hidden`)',
375
397
  hint: 'assert the absence/hidden state that the title claims, not just an unrelated visible element.',
376
398
  severity: 'fail',
377
399
  },
400
+ {
401
+ claim: 'cleared/emptied',
402
+ title: /\b(cleared|clears|emptied|empties|reset to empty|wiped)\b/,
403
+ proof: /\bis empty\b|with \{\{empty|with ['"]?['"]?\s*$|\bempty\b/,
404
+ need: 'an empty/cleared assertion after the action (e.g. `field with {{empty_value}}` / `is empty`)',
405
+ hint: 'prove the value is actually gone — return to the screen and assert the field is empty, not just that the action ran.',
406
+ severity: 'fail',
407
+ },
408
+ {
409
+ claim: 'restored/preserved',
410
+ title: /\b(restored|preserved|persists?|retained|remembered|kept)\b/,
411
+ proof: /\bremember\b|with \{\{|field with/,
412
+ need: 'the value re-asserted after the transition (capture or `field with {{v}}` after returning)',
413
+ hint: 'prove the value survives — assert the field still holds the typed value after the reload/return, not just that it was typed.',
414
+ severity: 'warn',
415
+ },
416
+ {
417
+ claim: 'independent/separate',
418
+ title: /\b(independent|separate|isolat(ed|es)|per[- ]tab|two tabs|each tab)\b/,
419
+ proof: /\bcontext\b|tab a|tab b|second (tab|context)/,
420
+ need: 'a multi-context proof (tab A vs tab B)',
421
+ hint: 'independence across tabs/contexts is rarely DSL-expressible — mark @manual with a clear setup/action/oracle.',
422
+ severity: 'warn',
423
+ },
424
+ {
425
+ claim: 'sanitized/inert',
426
+ title: /\b(sanitized|sanitised|escaped|inert|not executed|not rendered|stripped)\b/,
427
+ proof: /field with \{\{|payload|inert|toContainText|is hidden/,
428
+ need: 'the payload echoed as inert text (`field with {{payload}}`) + no execution',
429
+ hint: 'prove the payload round-trips as literal text and triggers nothing — assert the field value and the absence of any effect.',
430
+ severity: 'warn',
431
+ },
432
+ {
433
+ claim: 'announces/aria',
434
+ title: /\b(announce[sd]?|aria|screen[- ]reader|programmatically associated)\b/,
435
+ proof: /aria|role|@manual|describedby|is focused/,
436
+ need: 'an aria/role assertion (or @manual with a screen-reader oracle)',
437
+ hint: 'ARIA announcement is usually not DSL-expressible — assert aria attributes if possible, else @manual with an NVDA/VoiceOver oracle.',
438
+ severity: 'warn',
439
+ },
440
+ {
441
+ // GENERAL CATCH-ALL (last) — any negative/absence title not handled by a specific
442
+ // rule above. Language-aware negation, NO verb list: if the title says "no / not /
443
+ // never / without / không / prevents …" the steps must carry a NEGATIVE/contrast
444
+ // assertion (hidden, empty, error, count, no-longer, a remembered before/after) — not
445
+ // only a positive presence. WARN, because a positive proxy is sometimes a valid
446
+ // negative proof (e.g. "stayed on the login page"); the semantic reviewer is the
447
+ // authoritative recall layer for the residue this can't judge structurally.
448
+ claim: 'negative-claim/absence',
449
+ title: /(\bno\b|\bnot\b|n['’]t\b|\bnever\b|\bwithout\b|\bcannot\b|prevent|block|avoid|reject|disabl|\bdeny\b|denies|\bkhông\b|\bchưa\b)/i,
450
+ proof: /is hidden|are hidden|is empty|no longer|not complete|disabl|invalid|rejected|\berror\b|\bcount\b|row with \{\{|table with|\bremember\b|\bexactly\b|tohavecount/i,
451
+ need: 'a proof of the ABSENCE — a contrast/empty/hidden/error/count assertion, or @manual with an oracle',
452
+ hint: 'a negative claim ("no / not / không …") is not proven by a positive `see [X]` that looks the same whether or not the claim holds. Assert the contrast (state hidden/empty, error shown, count unchanged), or mark @manual.',
453
+ severity: 'warn',
454
+ },
378
455
  ];
379
456
 
380
457
  // ---------- Viewpoint taxonomy-lint (harness-roadmap §0.5 Q3) ----------