@sun-asterisk/sungen 3.0.0-beta.81 → 3.0.0-beta.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ import { Command } from 'commander';
2
+ export declare function registerEvalCommand(program: Command): void;
3
+ //# sourceMappingURL=eval.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/eval.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAGpC,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAwB1D"}
@@ -0,0 +1,37 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.registerEvalCommand = registerEvalCommand;
4
+ const skill_lint_1 = require("../../harness/eval/skill-lint");
5
+ function registerEvalCommand(program) {
6
+ program
7
+ .command('eval')
8
+ .description('Eval harness: quality checks on Sungen\'s own skills/instructions (dev/CI)')
9
+ .option('--skills', 'Static skill-lint: frontmatter, line budget, claude↔github sync, registration')
10
+ .option('--dir <path>', 'Templates dir to lint (default: bundled ai-instructions)')
11
+ .option('--json', 'Output the raw findings JSON')
12
+ .action((options) => {
13
+ try {
14
+ if (!options.skills)
15
+ throw new Error('Provide --skills (the only eval mode today)');
16
+ const dir = options.dir || (0, skill_lint_1.defaultSkillDir)();
17
+ const r = (0, skill_lint_1.lintSkills)(dir);
18
+ if (options.json) {
19
+ console.log(JSON.stringify(r, null, 2));
20
+ process.exit(r.errors > 0 ? 2 : 0);
21
+ }
22
+ console.log('');
23
+ console.log(`━━━ Skill-lint: ${r.checked} skill template(s) ━━━`);
24
+ if (!r.findings.length)
25
+ console.log(' ✓ all skills pass (frontmatter · line-budget · variant-sync · registration)');
26
+ for (const f of r.findings)
27
+ console.log(` ${f.level === 'error' ? '✗' : '⚠'} [${f.rule}] ${f.file} — ${f.detail}`);
28
+ console.log('');
29
+ process.exit(r.errors > 0 ? 2 : 0);
30
+ }
31
+ catch (error) {
32
+ console.error('Error:', error instanceof Error ? error.message : error);
33
+ process.exit(1);
34
+ }
35
+ });
36
+ }
37
+ //# sourceMappingURL=eval.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval.js","sourceRoot":"","sources":["../../../src/cli/commands/eval.ts"],"names":[],"mappings":";;AAGA,kDAwBC;AA1BD,8DAA4E;AAE5E,SAAgB,mBAAmB,CAAC,OAAgB;IAClD,OAAO;SACJ,OAAO,CAAC,MAAM,CAAC;SACf,WAAW,CAAC,4EAA4E,CAAC;SACzF,MAAM,CAAC,UAAU,EAAE,+EAA+E,CAAC;SACnG,MAAM,CAAC,cAAc,EAAE,0DAA0D,CAAC;SAClF,MAAM,CAAC,QAAQ,EAAE,8BAA8B,CAAC;SAChD,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE;QAClB,IAAI,CAAC;YACH,IAAI,CAAC,OAAO,CAAC,MAAM;gBAAE,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;YACpF,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,IAAA,4BAAe,GAAE,CAAC;YAC7C,MAAM,CAAC,GAAG,IAAA,uBAAU,EAAC,GAAG,CAAC,CAAC;YAC1B,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;gBAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAAC,CAAC;YAClG,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC,OAAO,wBAAwB,CAAC,CAAC;YAClE,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM;gBAAE,OAAO,CAAC,GAAG,CAAC,+EAA+E,CAAC,CAAC;YACrH,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC,QAAQ;gBAAE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,KAAK,KAAK,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;YACpH,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAChB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACxE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC,CAAC,CAAC;AACP,CAAC"}
package/dist/cli/index.js CHANGED
@@ -17,6 +17,7 @@ const add_flow_1 = require("./commands/add-flow");
17
17
  const dashboard_1 = require("./commands/dashboard");
18
18
  const audit_1 = require("./commands/audit");
19
19
  const ingest_1 = require("./commands/ingest");
20
+ const eval_1 = require("./commands/eval");
20
21
  const manifest_1 = require("./commands/manifest");
21
22
  const ledger_1 = require("./commands/ledger");
22
23
  const feedback_1 = require("./commands/feedback");
@@ -58,6 +59,7 @@ async function main() {
58
59
  (0, capability_1.registerCapabilityCommand)(program);
59
60
  (0, flow_check_1.registerFlowCheckCommand)(program);
60
61
  (0, ingest_1.registerIngestCommand)(program);
62
+ (0, eval_1.registerEvalCommand)(program);
61
63
  await program.parseAsync(process.argv);
62
64
  }
63
65
  main().catch((error) => {
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/cli/index.ts"],"names":[],"mappings":";;AACA;;;GAGG;;AAEH,yCAAoC;AACpC,0CAAsD;AACtD,wCAAoD;AACpD,kDAA8D;AAC9D,kDAA8D;AAC9D,8CAA0D;AAC1D,kDAA8D;AAC9D,4CAAwD;AACxD,kDAA6D;AAC7D,oDAAgE;AAChE,4CAAwD;AACxD,8CAA0D;AAC1D,kDAA8D;AAC9D,8CAA0D;AAC1D,kDAA8D;AAC9D,0DAAqE;AACrE,4CAAwD;AACxD,oDAAgE;AAChE,oDAAgE;AAChE,sDAAkE;AAClE,sDAAiE;AAEjE,wFAAwF;AACxF,MAAM,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC,oBAAoB,CAAwB,CAAC;AAEzE,KAAK,UAAU,IAAI;IACjB,MAAM,OAAO,GAAG,IAAI,mBAAO,EAAE,CAAC;IAE9B,OAAO;SACJ,IAAI,CAAC,QAAQ,CAAC;SACd,WAAW,CAAC,oEAAoE,CAAC;SACjF,OAAO,CAAC,OAAO,CAAC,CAAC;IAEpB,iBAAiB;IACjB,OAAO;SACJ,MAAM,CAAC,eAAe,EAAE,wBAAwB,CAAC,CAAC;IAErD,wBAAwB;IACxB,IAAA,0BAAmB,EAAC,OAAO,CAAC,CAAC;IAC7B,IAAA,wBAAkB,EAAC,OAAO,CAAC,CAAC;IAC5B,IAAA,kCAAuB,EAAC,OAAO,CAAC,CAAC;IACjC,IAAA,kCAAuB,EAAC,OAAO,CAAC,CAAC;IACjC,IAAA,8BAAqB,EAAC,OAAO,CAAC,CAAC;IAC/B,IAAA,kCAAuB,EAAC,OAAO,CAAC,CAAC;IACjC,IAAA,4BAAoB,EAAC,OAAO,CAAC,CAAC;IAC9B,IAAA,iCAAsB,EAAC,OAAO,CAAC,CAAC;IAChC,IAAA,oCAAwB,EAAC,OAAO,CAAC,CAAC;IAClC,IAAA,4BAAoB,EAAC,OAAO,CAAC,CAAC;IAC9B,IAAA,kCAAuB,EAAC,OAAO,CAAC,CAAC;IACjC,IAAA,8BAAqB,EAAC,OAAO,CAAC,CAAC;IAC/B,IAAA,kCAAuB,EAAC,OAAO,CAAC,CAAC;IACjC,IAAA,yCAA0B,EAAC,OAAO,CAAC,CAAC;IACpC,IAAA,4BAAoB,EAAC,OAAO,CAAC,CAAC;IAC9B,IAAA,oCAAwB,EAAC,OAAO,CAAC,CAAC;IAClC,IAAA,oCAAwB,EAAC,OAAO,CAAC,CAAC;IAClC,IAAA,sCAAyB,EAAC,OAAO,CAAC,CAAC;IACnC,IAAA,qCAAwB,EAAC,OAAO,CAAC,CAAC;IAClC,IAAA,8BAAqB,EAAC,OAAO,CAAC,CAAC;IAE/B,MAAM,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;AACzC,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACrB,OAAO,CAAC,KAAK,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;IACrC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/cli/index.ts"],"names":[],"mappings":";;AACA;;;GAGG;;AAEH,yCAAoC;AACpC,0CAAsD;AACtD,wCAAoD;AACpD,kDAA8D;AAC9D,kDAA8D;AAC9D,8CAA0D;AAC1D,kDAA8D;AAC9D,4CAAwD;AACxD,kDAA6D;AAC7D,oDAAgE;AAChE,4CAAwD;AACxD,8CAA0D;AAC1D,0CAAsD;AACtD,kDAA8D;AAC9D,8CAA0D;AAC1D,kDAA8D;AAC9D,0DAAqE;AACrE,4CAAwD;AACxD,oDAAgE;AAChE,oDAAgE;AAChE,sDAAkE;AAClE,sDAAiE;AAEjE,wFAAwF;AACxF,MAAM,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC,oBAAoB,CAAwB,CAAC;AAEzE,KAAK,UAAU,IAAI;IACjB,MAAM,OAAO,GAAG,IAAI,mBAAO,EAAE,CAAC;IAE9B,OAAO;SACJ,IAAI,CAAC,QAAQ,CAAC;SACd,WAAW,CAAC,oEAAoE,CAAC;SACjF,OAAO,CAAC,OAAO,CAAC,CAAC;IAEpB,iBAAiB;IACjB,OAAO;SACJ,MAAM,CAAC,eAAe,EAAE,wBAAwB,CAAC,CAAC;IAErD,wBAAwB;IACxB,IAAA,0BAAmB,EAAC,OAAO,CAAC,CAAC;IAC7B,IAAA,wBAAkB,EAAC,OAAO,CAAC,CAAC;IAC5B,IAAA,kCAAuB,EAAC,OAAO,CAAC,CAAC;IACjC,IAAA,kCAAuB,EAAC,OAAO,CAAC,CAAC;IACjC,IAAA,8BAAqB,EAAC,OAAO,CAAC,CAAC;IAC/B,IAAA,kCAAuB,EAAC,OAAO,CAAC,CAAC;IACjC,IAAA,4BAAoB,EAAC,OAAO,CAAC,CAAC;IAC9B,IAAA,iCAAsB,EAAC,OAAO,CAAC,CAAC;IAChC,IAAA,oCAAwB,EAAC,OAAO,CAAC,CAAC;IAClC,IAAA,4BAAoB,EAAC,OAAO,CAAC,CAAC;IAC9B,IAAA,kCAAuB,EAAC,OAAO,CAAC,CAAC;IACjC,IAAA,8BAAqB,EAAC,OAAO,CAAC,CAAC;IAC/B,IAAA,kCAAuB,EAAC,OAAO,CAAC,CAAC;IACjC,IAAA,yCAA0B,EAAC,OAAO,CAAC,CAAC;IACpC,IAAA,4BAAoB,EAAC,OAAO,CAAC,CAAC;IAC9B,IAAA,oCAAwB,EAAC,OAAO,CAAC,CAAC;IAClC,IAAA,oCAAwB,EAAC,OAAO,CAAC,CAAC;IAClC,IAAA,sCAAyB,EAAC,OAAO,CAAC,CAAC;IACnC,IAAA,qCAAwB,EAAC,OAAO,CAAC,CAAC;IAClC,IAAA,8BAAqB,EAAC,OAAO,CAAC,CAAC;IAC/B,IAAA,0BAAmB,EAAC,OAAO,CAAC,CAAC;IAE7B,MAAM,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;AACzC,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACrB,OAAO,CAAC,KAAK,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;IACrC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
@@ -0,0 +1,16 @@
1
+ export interface SkillLintFinding {
2
+ level: 'error' | 'warn';
3
+ file: string;
4
+ rule: string;
5
+ detail: string;
6
+ }
7
+ export interface SkillLintResult {
8
+ checked: number;
9
+ findings: SkillLintFinding[];
10
+ errors: number;
11
+ }
12
+ /** Lint the AI-instruction templates in `dir` (default: the sungen source templates). */
13
+ export declare function lintSkills(dir: string): SkillLintResult;
14
+ /** Default templates dir, resolved relative to this module (works from src via tsx and dist). */
15
+ export declare function defaultSkillDir(): string;
16
+ //# sourceMappingURL=skill-lint.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"skill-lint.d.ts","sourceRoot":"","sources":["../../../src/harness/eval/skill-lint.ts"],"names":[],"mappings":"AAiBA,MAAM,WAAW,gBAAgB;IAAG,KAAK,EAAE,OAAO,GAAG,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;AACzG,MAAM,WAAW,eAAe;IAAG,OAAO,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,gBAAgB,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;AAWlG,yFAAyF;AACzF,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,eAAe,CAkDvD;AAED,iGAAiG;AACjG,wBAAgB,eAAe,IAAI,MAAM,CAGxC"}
@@ -0,0 +1,129 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.lintSkills = lintSkills;
37
+ exports.defaultSkillDir = defaultSkillDir;
38
+ /**
39
+ * Static skill-lint (Eval Harness L1) — deterministic quality checks on Sungen's OWN
40
+ * AI-instruction templates, so a broken / unregistered / oversized skill fails before it
41
+ * ships. Learned (generically) from the "static validations" tier of an agent-kit evals
42
+ * layer. No project data — this lints the sungen package's own templates.
43
+ *
44
+ * Design note: the checks are MAPPING-DRIVEN. `AI_RULES_FILE_MAPPING` is the source of
45
+ * truth for what each template installs as, so the lint uses the install target (does it
46
+ * end in `/SKILL.md`?) to tell a top-level skill from a sub-content fragment — instead of
47
+ * guessing from filenames. We deliberately do NOT enforce claude↔github body parity: the
48
+ * two variants are hand-tuned per platform and intentionally diverge in wording and even
49
+ * structure, so byte/heading equality would be pure false positives.
50
+ */
51
+ const fs = __importStar(require("fs"));
52
+ const path = __importStar(require("path"));
53
+ const ai_rules_updater_1 = require("../../orchestrator/ai-rules-updater");
54
+ const LINE_BUDGET = 700; // a skill much larger than this is a context-cost smell (warn)
55
+ const SKILL_RE = /^(claude|github)-skill-/;
56
+ function stripFrontmatter(text) {
57
+ const m = text.match(/^---\n([\s\S]*?)\n---\n?/);
58
+ if (!m)
59
+ return { fm: null, body: text };
60
+ return { fm: m[1], body: text.slice(m[0].length) };
61
+ }
62
+ /** Lint the AI-instruction templates in `dir` (default: the sungen source templates). */
63
+ function lintSkills(dir) {
64
+ const findings = [];
65
+ const files = fs.existsSync(dir) ? fs.readdirSync(dir).filter((f) => f.endsWith('.md')) : [];
66
+ const skillFiles = files.filter((f) => SKILL_RE.test(f));
67
+ // mapping: template file -> install target (source of truth for "is this a top-level skill")
68
+ const target = new Map(ai_rules_updater_1.AI_RULES_FILE_MAPPING.map(([tpl, dst]) => [tpl, dst]));
69
+ const isTopLevelSkill = (f) => (target.get(f) || '').endsWith('/SKILL.md');
70
+ // 1) registration integrity (bidirectional) — the highest-value check:
71
+ // a skill file missing from the mapping never installs; a mapping to a missing file
72
+ // ships a broken/empty skill.
73
+ for (const f of skillFiles) {
74
+ if (!target.has(f))
75
+ findings.push({ level: 'error', file: f, rule: 'unregistered', detail: 'skill template not in AI_RULES_FILE_MAPPING (it would never be installed)' });
76
+ }
77
+ for (const [tpl] of ai_rules_updater_1.AI_RULES_FILE_MAPPING) {
78
+ if (!fs.existsSync(path.join(dir, tpl)))
79
+ findings.push({ level: 'error', file: tpl, rule: 'mapped-missing', detail: 'AI_RULES_FILE_MAPPING points to a template that does not exist' });
80
+ }
81
+ // 2) frontmatter (name + description) — ONLY for top-level skills (SKILL.md targets).
82
+ // Sub-content fragments (mode-*.md, group-*.md) are loaded by their parent router
83
+ // and legitimately carry no frontmatter.
84
+ for (const f of skillFiles) {
85
+ if (!isTopLevelSkill(f))
86
+ continue;
87
+ const text = fs.readFileSync(path.join(dir, f), 'utf8');
88
+ const { fm } = stripFrontmatter(text);
89
+ if (!fm) {
90
+ findings.push({ level: 'error', file: f, rule: 'frontmatter', detail: 'top-level skill (SKILL.md) is missing --- frontmatter --- (Claude/Copilot will not load it)' });
91
+ continue;
92
+ }
93
+ if (!/\bname\s*:/.test(fm))
94
+ findings.push({ level: 'error', file: f, rule: 'frontmatter-name', detail: 'no `name:` in frontmatter' });
95
+ if (!/\bdescription\s*:/.test(fm))
96
+ findings.push({ level: 'error', file: f, rule: 'frontmatter-description', detail: 'no `description:` in frontmatter' });
97
+ }
98
+ // 3) line budget — context-cost smell (advisory).
99
+ for (const f of skillFiles) {
100
+ const lines = fs.readFileSync(path.join(dir, f), 'utf8').split('\n').length;
101
+ if (lines > LINE_BUDGET)
102
+ findings.push({ level: 'warn', file: f, rule: 'line-budget', detail: `${lines} lines > ${LINE_BUDGET} (context-cost smell)` });
103
+ }
104
+ // 4) variant PRESENCE (not body equality) — every top-level skill should ship for both
105
+ // platforms. Catches "added a Claude skill but forgot the Copilot variant". Advisory.
106
+ const skillName = (dst) => { const m = dst.match(/\/(sungen-[^/]+)\/SKILL\.md$/); return m ? m[1] : null; };
107
+ const claudeSkills = new Set(), githubSkills = new Set();
108
+ for (const f of skillFiles) {
109
+ if (!isTopLevelSkill(f))
110
+ continue;
111
+ const name = skillName(target.get(f));
112
+ if (!name)
113
+ continue;
114
+ (f.startsWith('claude-') ? claudeSkills : githubSkills).add(name);
115
+ }
116
+ for (const n of claudeSkills)
117
+ if (!githubSkills.has(n))
118
+ findings.push({ level: 'warn', file: `claude .../${n}/SKILL.md`, rule: 'variant-missing', detail: `Claude skill "${n}" has no GitHub (Copilot) variant` });
119
+ for (const n of githubSkills)
120
+ if (!claudeSkills.has(n))
121
+ findings.push({ level: 'warn', file: `github .../${n}/SKILL.md`, rule: 'variant-missing', detail: `GitHub skill "${n}" has no Claude variant` });
122
+ return { checked: skillFiles.length, findings, errors: findings.filter((f) => f.level === 'error').length };
123
+ }
124
+ /** Default templates dir, resolved relative to this module (works from src via tsx and dist). */
125
+ function defaultSkillDir() {
126
+ // src/harness/eval → src/orchestrator/... | dist/harness/eval → dist/orchestrator/...
127
+ return path.resolve(__dirname, '..', '..', 'orchestrator', 'templates', 'ai-instructions');
128
+ }
129
+ //# sourceMappingURL=skill-lint.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"skill-lint.js","sourceRoot":"","sources":["../../../src/harness/eval/skill-lint.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA8BA,gCAkDC;AAGD,0CAGC;AAtFD;;;;;;;;;;;;GAYG;AACH,uCAAyB;AACzB,2CAA6B;AAC7B,0EAA4E;AAK5E,MAAM,WAAW,GAAG,GAAG,CAAC,CAAC,+DAA+D;AACxF,MAAM,QAAQ,GAAG,yBAAyB,CAAC;AAE3C,SAAS,gBAAgB,CAAC,IAAY;IACpC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;IACjD,IAAI,CAAC,CAAC;QAAE,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACxC,OAAO,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AACrD,CAAC;AAED,yFAAyF;AACzF,SAAgB,UAAU,CAAC,GAAW;IACpC,MAAM,QAAQ,GAAuB,EAAE,CAAC;IACxC,MAAM,KAAK,GAAG,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC7F,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IAEzD,6FAA6F;IAC7F,MAAM,MAAM,GAAG,IAAI,GAAG,CAAiB,wCAAqB,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;IAC9F,MAAM,eAAe,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAEnF,uEAAuE;IACvE,uFAAuF;IACvF,iCAAiC;IACjC,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,MAAM,EAAE,2EAA2E,EAAE,CAAC,CAAC;IAC5K,CAAC;IACD,KAAK,MAAM,CAAC,GAAG,CAAC,IAAI,wCAAqB,EAAE,CAAC;QAC1C,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;YAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,gBAAgB,EAAE,MAAM,EAAE,gEAAgE,EAAE,CAAC,CAAC;IAC1L,CAAC;IAED,sFAAsF;IACtF,qFAAqF;IACrF,4CAA4C;IAC5C,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC;YAAE,SAAS;QAClC,MAAM,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;QACxD,MAAM,EAAE,EAAE,EAAE,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;QACtC,IAAI,CAAC,EAAE,EAAE,CAAC;YAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,MAAM,EAAE,6FAA6F,EAAE,CAAC,CAAC;YAAC,SAAS;QAAC,CAAC;QAC9L,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,EAAE,CAAC;YAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,kBAAkB,EAAE,MAAM,EAAE,2BAA2B,EAAE,CAAC,CAAC;QACtI,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC,EAAE,CAAC;YAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,yBAAyB,EAAE,MAAM,EAAE,kCAAkC,EAAE,CAAC,CAAC;IAC7J,CAAC;IAED,kDAAkD;IAClD,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;QAC5E,IAAI,KAAK,GAAG,WAAW;YAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,MAAM,EAAE,GAAG,KAAK,YAAY,WAAW,uBAAuB,EAAE,CAAC,CAAC;IAC1J,CAAC;IAED,uFAAuF;IACvF,yFAAyF;IACzF,MAAM,SAAS,GAAG,CAAC,GAAW,EAAE,EAAE,GAAG,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpH,MAAM,YAAY,GAAG,IAAI,GAAG,EAAU,EAAE,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;IACzE,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC;YAAE,SAAS;QAClC,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,CAAC;QAAC,IAAI,CAAC,IAAI;YAAE,SAAS;QAC5D,CAAC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACpE,CAAC;IACD,KAAK,MAAM,CAAC,IAAI,YAAY;QAAE,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,cAAc,CAAC,WAAW,EAAE,IAAI,EAAE,iBAAiB,EAAE,MAAM,EAAE,iBAAiB,CAAC,mCAAmC,EAAE,CAAC,CAAC;IACnN,KAAK,MAAM,CAAC,IAAI,YAAY;QAAE,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,cAAc,CAAC,WAAW,EAAE,IAAI,EAAE,iBAAiB,EAAE,MAAM,EAAE,iBAAiB,CAAC,yBAAyB,EAAE,CAAC,CAAC;IAEzM,OAAO,EAAE,OAAO,EAAE,UAAU,CAAC,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,KAAK,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC;AAC9G,CAAC;AAED,iGAAiG;AACjG,SAAgB,eAAe;IAC7B,wFAAwF;IACxF,OAAO,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,EAAE,WAAW,EAAE,iBAAiB,CAAC,CAAC;AAC7F,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sun-asterisk/sungen",
3
- "version": "3.0.0-beta.81",
3
+ "version": "3.0.0-beta.82",
4
4
  "description": "Deterministic E2E Test Compiler - Gherkin + Selectors → Playwright tests",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -12,7 +12,7 @@
12
12
  "copy-templates": "mkdir -p dist/generators/test-generator/adapters/playwright/templates/steps && mkdir -p dist/generators/test-generator/templates && mkdir -p dist/orchestrator/templates && mkdir -p dist/dashboard/templates && cp -r src/generators/test-generator/adapters/playwright/templates/*.hbs dist/generators/test-generator/adapters/playwright/templates/ 2>/dev/null || true && cp -r src/generators/test-generator/adapters/playwright/templates/steps dist/generators/test-generator/adapters/playwright/templates/ && cp src/generators/test-generator/templates/*.hbs dist/generators/test-generator/templates/ 2>/dev/null || true && cp -r src/orchestrator/templates/* dist/orchestrator/templates/ && cp src/dashboard/templates/index.html dist/dashboard/templates/index.html && mkdir -p dist/harness/catalog && cp src/harness/catalog/*.yaml dist/harness/catalog/",
13
13
  "build:dashboard": "cd dashboard && npm install --silent && npm run build && cd .. && cp dashboard/dist/index.html src/dashboard/templates/index.html",
14
14
  "dev": "tsx src/cli/index.ts",
15
- "test": "tsx tests/golden/run.ts && tsx tests/audit/run.ts && tsx tests/ingest/run.ts",
15
+ "test": "tsx tests/golden/run.ts && tsx tests/audit/run.ts && tsx tests/ingest/run.ts && tsx tests/eval/run.ts",
16
16
  "test:update": "tsx tests/golden/run.ts --update && tsx tests/audit/run.ts --update && tsx tests/ingest/run.ts --update",
17
17
  "prepublishOnly": "npm run build:dashboard && npm run build"
18
18
  },
@@ -0,0 +1,28 @@
1
+ import { Command } from 'commander';
2
+ import { lintSkills, defaultSkillDir } from '../../harness/eval/skill-lint';
3
+
4
+ export function registerEvalCommand(program: Command): void {
5
+ program
6
+ .command('eval')
7
+ .description('Eval harness: quality checks on Sungen\'s own skills/instructions (dev/CI)')
8
+ .option('--skills', 'Static skill-lint: frontmatter, line budget, claude↔github sync, registration')
9
+ .option('--dir <path>', 'Templates dir to lint (default: bundled ai-instructions)')
10
+ .option('--json', 'Output the raw findings JSON')
11
+ .action((options) => {
12
+ try {
13
+ if (!options.skills) throw new Error('Provide --skills (the only eval mode today)');
14
+ const dir = options.dir || defaultSkillDir();
15
+ const r = lintSkills(dir);
16
+ if (options.json) { console.log(JSON.stringify(r, null, 2)); process.exit(r.errors > 0 ? 2 : 0); }
17
+ console.log('');
18
+ console.log(`━━━ Skill-lint: ${r.checked} skill template(s) ━━━`);
19
+ if (!r.findings.length) console.log(' ✓ all skills pass (frontmatter · line-budget · variant-sync · registration)');
20
+ for (const f of r.findings) console.log(` ${f.level === 'error' ? '✗' : '⚠'} [${f.rule}] ${f.file} — ${f.detail}`);
21
+ console.log('');
22
+ process.exit(r.errors > 0 ? 2 : 0);
23
+ } catch (error) {
24
+ console.error('Error:', error instanceof Error ? error.message : error);
25
+ process.exit(1);
26
+ }
27
+ });
28
+ }
package/src/cli/index.ts CHANGED
@@ -16,6 +16,7 @@ import { registerAddFlowCommand } from './commands/add-flow';
16
16
  import { registerDashboardCommand } from './commands/dashboard';
17
17
  import { registerAuditCommand } from './commands/audit';
18
18
  import { registerIngestCommand } from './commands/ingest';
19
+ import { registerEvalCommand } from './commands/eval';
19
20
  import { registerManifestCommand } from './commands/manifest';
20
21
  import { registerLedgerCommand } from './commands/ledger';
21
22
  import { registerFeedbackCommand } from './commands/feedback';
@@ -62,6 +63,7 @@ async function main() {
62
63
  registerCapabilityCommand(program);
63
64
  registerFlowCheckCommand(program);
64
65
  registerIngestCommand(program);
66
+ registerEvalCommand(program);
65
67
 
66
68
  await program.parseAsync(process.argv);
67
69
  }
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Static skill-lint (Eval Harness L1) — deterministic quality checks on Sungen's OWN
3
+ * AI-instruction templates, so a broken / unregistered / oversized skill fails before it
4
+ * ships. Learned (generically) from the "static validations" tier of an agent-kit evals
5
+ * layer. No project data — this lints the sungen package's own templates.
6
+ *
7
+ * Design note: the checks are MAPPING-DRIVEN. `AI_RULES_FILE_MAPPING` is the source of
8
+ * truth for what each template installs as, so the lint uses the install target (does it
9
+ * end in `/SKILL.md`?) to tell a top-level skill from a sub-content fragment — instead of
10
+ * guessing from filenames. We deliberately do NOT enforce claude↔github body parity: the
11
+ * two variants are hand-tuned per platform and intentionally diverge in wording and even
12
+ * structure, so byte/heading equality would be pure false positives.
13
+ */
14
+ import * as fs from 'fs';
15
+ import * as path from 'path';
16
+ import { AI_RULES_FILE_MAPPING } from '../../orchestrator/ai-rules-updater';
17
+
18
+ export interface SkillLintFinding { level: 'error' | 'warn'; file: string; rule: string; detail: string }
19
+ export interface SkillLintResult { checked: number; findings: SkillLintFinding[]; errors: number }
20
+
21
+ const LINE_BUDGET = 700; // a skill much larger than this is a context-cost smell (warn)
22
+ const SKILL_RE = /^(claude|github)-skill-/;
23
+
24
+ function stripFrontmatter(text: string): { fm: string | null; body: string } {
25
+ const m = text.match(/^---\n([\s\S]*?)\n---\n?/);
26
+ if (!m) return { fm: null, body: text };
27
+ return { fm: m[1], body: text.slice(m[0].length) };
28
+ }
29
+
30
+ /** Lint the AI-instruction templates in `dir` (default: the sungen source templates). */
31
+ export function lintSkills(dir: string): SkillLintResult {
32
+ const findings: SkillLintFinding[] = [];
33
+ const files = fs.existsSync(dir) ? fs.readdirSync(dir).filter((f) => f.endsWith('.md')) : [];
34
+ const skillFiles = files.filter((f) => SKILL_RE.test(f));
35
+
36
+ // mapping: template file -> install target (source of truth for "is this a top-level skill")
37
+ const target = new Map<string, string>(AI_RULES_FILE_MAPPING.map(([tpl, dst]) => [tpl, dst]));
38
+ const isTopLevelSkill = (f: string) => (target.get(f) || '').endsWith('/SKILL.md');
39
+
40
+ // 1) registration integrity (bidirectional) — the highest-value check:
41
+ // a skill file missing from the mapping never installs; a mapping to a missing file
42
+ // ships a broken/empty skill.
43
+ for (const f of skillFiles) {
44
+ if (!target.has(f)) findings.push({ level: 'error', file: f, rule: 'unregistered', detail: 'skill template not in AI_RULES_FILE_MAPPING (it would never be installed)' });
45
+ }
46
+ for (const [tpl] of AI_RULES_FILE_MAPPING) {
47
+ if (!fs.existsSync(path.join(dir, tpl))) findings.push({ level: 'error', file: tpl, rule: 'mapped-missing', detail: 'AI_RULES_FILE_MAPPING points to a template that does not exist' });
48
+ }
49
+
50
+ // 2) frontmatter (name + description) — ONLY for top-level skills (SKILL.md targets).
51
+ // Sub-content fragments (mode-*.md, group-*.md) are loaded by their parent router
52
+ // and legitimately carry no frontmatter.
53
+ for (const f of skillFiles) {
54
+ if (!isTopLevelSkill(f)) continue;
55
+ const text = fs.readFileSync(path.join(dir, f), 'utf8');
56
+ const { fm } = stripFrontmatter(text);
57
+ if (!fm) { findings.push({ level: 'error', file: f, rule: 'frontmatter', detail: 'top-level skill (SKILL.md) is missing --- frontmatter --- (Claude/Copilot will not load it)' }); continue; }
58
+ if (!/\bname\s*:/.test(fm)) findings.push({ level: 'error', file: f, rule: 'frontmatter-name', detail: 'no `name:` in frontmatter' });
59
+ if (!/\bdescription\s*:/.test(fm)) findings.push({ level: 'error', file: f, rule: 'frontmatter-description', detail: 'no `description:` in frontmatter' });
60
+ }
61
+
62
+ // 3) line budget — context-cost smell (advisory).
63
+ for (const f of skillFiles) {
64
+ const lines = fs.readFileSync(path.join(dir, f), 'utf8').split('\n').length;
65
+ if (lines > LINE_BUDGET) findings.push({ level: 'warn', file: f, rule: 'line-budget', detail: `${lines} lines > ${LINE_BUDGET} (context-cost smell)` });
66
+ }
67
+
68
+ // 4) variant PRESENCE (not body equality) — every top-level skill should ship for both
69
+ // platforms. Catches "added a Claude skill but forgot the Copilot variant". Advisory.
70
+ const skillName = (dst: string) => { const m = dst.match(/\/(sungen-[^/]+)\/SKILL\.md$/); return m ? m[1] : null; };
71
+ const claudeSkills = new Set<string>(), githubSkills = new Set<string>();
72
+ for (const f of skillFiles) {
73
+ if (!isTopLevelSkill(f)) continue;
74
+ const name = skillName(target.get(f)!); if (!name) continue;
75
+ (f.startsWith('claude-') ? claudeSkills : githubSkills).add(name);
76
+ }
77
+ for (const n of claudeSkills) if (!githubSkills.has(n)) findings.push({ level: 'warn', file: `claude .../${n}/SKILL.md`, rule: 'variant-missing', detail: `Claude skill "${n}" has no GitHub (Copilot) variant` });
78
+ for (const n of githubSkills) if (!claudeSkills.has(n)) findings.push({ level: 'warn', file: `github .../${n}/SKILL.md`, rule: 'variant-missing', detail: `GitHub skill "${n}" has no Claude variant` });
79
+
80
+ return { checked: skillFiles.length, findings, errors: findings.filter((f) => f.level === 'error').length };
81
+ }
82
+
83
+ /** Default templates dir, resolved relative to this module (works from src via tsx and dist). */
84
+ export function defaultSkillDir(): string {
85
+ // src/harness/eval → src/orchestrator/... | dist/harness/eval → dist/orchestrator/...
86
+ return path.resolve(__dirname, '..', '..', 'orchestrator', 'templates', 'ai-instructions');
87
+ }