snapeval 1.8.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/snapeval.ts CHANGED
@@ -3,7 +3,6 @@ import { Command } from 'commander';
3
3
  import { resolveConfig } from '../src/config.js';
4
4
  import { resolveInference } from '../src/adapters/inference/resolve.js';
5
5
  import { resolveHarness } from '../src/adapters/harness/resolve.js';
6
- import { initCommand } from '../src/commands/init.js';
7
6
  import { evalCommand } from '../src/commands/eval.js';
8
7
  import { reviewCommand } from '../src/commands/review.js';
9
8
  import { TerminalReporter } from '../src/adapters/report/terminal.js';
@@ -17,28 +16,6 @@ program
17
16
  .description('Harness-agnostic eval runner for agentskills.io skills')
18
17
  .version('2.0.0');
19
18
 
20
- // --- init ---
21
- program
22
- .command('init')
23
- .description('Generate evals.json from SKILL.md (prompts + expected outputs, no assertions)')
24
- .option('--harness <harness>', 'Harness to use')
25
- .option('--inference <inference>', 'Inference adapter to use')
26
- .option('--verbose', 'Verbose output')
27
- .argument('[skill-dir]', 'Path to skill directory', process.cwd())
28
- .action(async (skillDir: string, opts: Record<string, string | boolean>) => {
29
- try {
30
- const skillPath = path.resolve(skillDir);
31
- const config = resolveConfig(
32
- { harness: opts.harness as string, inference: opts.inference as string },
33
- process.cwd(), skillPath
34
- );
35
- const inference = resolveInference(config.inference);
36
- await initCommand(skillPath, inference);
37
- console.log(`Generated evals at ${path.join(skillPath, 'evals', 'evals.json')}`);
38
- process.exit(0);
39
- } catch (err) { handleError(err); }
40
- });
41
-
42
19
  // --- eval ---
43
20
  program
44
21
  .command('eval')
@@ -3,7 +3,6 @@ import { Command } from 'commander';
3
3
  import { resolveConfig } from '../src/config.js';
4
4
  import { resolveInference } from '../src/adapters/inference/resolve.js';
5
5
  import { resolveHarness } from '../src/adapters/harness/resolve.js';
6
- import { initCommand } from '../src/commands/init.js';
7
6
  import { evalCommand } from '../src/commands/eval.js';
8
7
  import { reviewCommand } from '../src/commands/review.js';
9
8
  import { TerminalReporter } from '../src/adapters/report/terminal.js';
@@ -14,27 +13,6 @@ program
14
13
  .name('snapeval')
15
14
  .description('Harness-agnostic eval runner for agentskills.io skills')
16
15
  .version('2.0.0');
17
- // --- init ---
18
- program
19
- .command('init')
20
- .description('Generate evals.json from SKILL.md (prompts + expected outputs, no assertions)')
21
- .option('--harness <harness>', 'Harness to use')
22
- .option('--inference <inference>', 'Inference adapter to use')
23
- .option('--verbose', 'Verbose output')
24
- .argument('[skill-dir]', 'Path to skill directory', process.cwd())
25
- .action(async (skillDir, opts) => {
26
- try {
27
- const skillPath = path.resolve(skillDir);
28
- const config = resolveConfig({ harness: opts.harness, inference: opts.inference }, process.cwd(), skillPath);
29
- const inference = resolveInference(config.inference);
30
- await initCommand(skillPath, inference);
31
- console.log(`Generated evals at ${path.join(skillPath, 'evals', 'evals.json')}`);
32
- process.exit(0);
33
- }
34
- catch (err) {
35
- handleError(err);
36
- }
37
- });
38
16
  // --- eval ---
39
17
  program
40
18
  .command('eval')
@@ -1 +1 @@
1
- {"version":3,"file":"snapeval.js","sourceRoot":"","sources":["../../bin/snapeval.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,gBAAgB,EAAE,MAAM,sCAAsC,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,MAAM,oCAAoC,CAAC;AACtE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,UAAU,CAAC;KAChB,WAAW,CAAC,wDAAwD,CAAC;KACrE,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,eAAe;AACf,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,+EAA+E,CAAC;KAC5F,MAAM,CAAC,qBAAqB,EAAE,gBAAgB,CAAC;KAC/C,MAAM,CAAC,yBAAyB,EAAE,0BAA0B,CAAC;KAC7D,MAAM,CAAC,WAAW,EAAE,gBAAgB,CAAC;KACrC,QAAQ,CAAC,aAAa,EAAE,yBAAyB,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;KACjE,MAAM,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAsC,EAAE,EAAE;IACzE,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,aAAa,CAC1B,EAAE,OAAO,EAAE,IAAI,CAAC,OAAiB,EAAE,SAAS,EAAE,IAAI,CAAC,SAAmB,EAAE,EACxE,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CACzB,CAAC;QACF,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QACrD,MAAM,WAAW,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QACxC,OAAO,CAAC,GAAG,CAAC,sBAAsB,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,YAAY,CAAC,EAAE,CAAC,CAAC;QACjF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC;AACrC,CAAC,CAAC,CAAC;AAEL,eAAe;AACf,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,qEAAqE,CAAC;KAClF,MAAM,CAAC,qBAAqB,EAAE,gBAAgB,CAAC;KAC/C,MAAM,CAAC,yBAAyB,EAAE,0BAA0B,CAAC;KAC7D,MAAM,CAAC,oBAAoB,EAAE,qBAAqB,CAAC;KACnD,MAAM,CAAC,YAAY,EAAE,4CAA4C,EAAE,GAAG,CAAC;KACvE,MAAM,CAAC,oBAAoB,EAAE,uDAAuD,CAAC;KACrF,MAAM,CAAC,WAAW,EAAE,gBAAgB,CAAC;KACrC,QAAQ,CAAC,aAAa,EAAE,yBAAyB,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;KACjE,MAAM,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAsC,EAAE,EAAE;IACzE,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,aAAa,CAC1B;YACE,OAAO,EAAE,IAAI,CAAC,OAAiB;YAC/B,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;SAChE,EACD,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CACzB,CAAC;QACF,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAErD,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE;YAC/D,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,QAAQ,EAAE,IAAI,CAAC,QAA8B;SAC9C,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,IAAI,gBAAgB,EAAE,CAAC;QACxC,MAAM,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC;AACrC,CAAC,CAAC,CAAC;AAEL,iBAAiB;AACjB,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,mDAAmD,CAAC;KAChE,MAAM,CAAC,qBAAqB,EAAE,gBAAgB,CAAC;KAC/C,MAAM,CAAC,yBAAyB,EAAE,0BAA0B,CAAC;KAC7D,MAAM,CAAC,oBAAoB,EAAE,qBAAqB,CAAC;KACnD,MAAM,CAAC,YAAY,EAAE,4CAA4C,EAAE,GAAG,CAAC;KACvE,MAAM,CAAC,oBAAoB,EAAE,uDAAuD,CAAC;KACrF,MAAM,CAAC,WAAW,EAAE,qBAAqB,CAAC;KAC1C,MAAM,CAAC,WAAW,EAAE,gBAAgB,CAAC;KACrC,QAAQ,CAAC,aAAa,EAAE,yBAAyB,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;KACjE,MAAM,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAsC,EAAE,EAAE;IACzE,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,aAAa,CAC1B;YACE,OAAO,EAAE,IAAI,CAAC,OAAiB;YAC/B,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;SAChE,EACD,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CACzB,CAAC;QACF,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAErD,MAAM,aAAa,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE;YACjD,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,QAAQ,EAAE,IAAI,CAAC,QAA8B;YAC7C,MAAM,EAAE,IAAI,CAAC,IAAI,KAAK,KAAK;SAC5B,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC;AACrC,CAAC,CAAC,CAAC;AAEL,SAAS,WAAW,CAAC,GAAY;IAC/B,IAAI,GAAG,YAAY,aAAa,EAAE,CAAC;QACjC,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;IAClC,CAAC;IACD,IAAI,GAAG,YAAY,KAAK,EAAE,CAAC;QACzB,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IACD,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;IAC5C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
1
+ {"version":3,"file":"snapeval.js","sourceRoot":"","sources":["../../bin/snapeval.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,gBAAgB,EAAE,MAAM,sCAAsC,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,MAAM,oCAAoC,CAAC;AACtE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,UAAU,CAAC;KAChB,WAAW,CAAC,wDAAwD,CAAC;KACrE,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,eAAe;AACf,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,qEAAqE,CAAC;KAClF,MAAM,CAAC,qBAAqB,EAAE,gBAAgB,CAAC;KAC/C,MAAM,CAAC,yBAAyB,EAAE,0BAA0B,CAAC;KAC7D,MAAM,CAAC,oBAAoB,EAAE,qBAAqB,CAAC;KACnD,MAAM,CAAC,YAAY,EAAE,4CAA4C,EAAE,GAAG,CAAC;KACvE,MAAM,CAAC,oBAAoB,EAAE,uDAAuD,CAAC;KACrF,MAAM,CAAC,WAAW,EAAE,gBAAgB,CAAC;KACrC,QAAQ,CAAC,aAAa,EAAE,yBAAyB,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;KACjE,MAAM,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAsC,EAAE,EAAE;IACzE,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,aAAa,CAC1B;YACE,OAAO,EAAE,IAAI,CAAC,OAAiB;YAC/B,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;SAChE,EACD,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CACzB,CAAC;QACF,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAErD,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE;YAC/D,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,QAAQ,EAAE,IAAI,CAAC,QAA8B;SAC9C,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,IAAI,gBAAgB,EAAE,CAAC;QACxC,MAAM,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC;AACrC,CAAC,CAAC,CAAC;AAEL,iBAAiB;AACjB,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,mDAAmD,CAAC;KAChE,MAAM,CAAC,qBAAqB,EAAE,gBAAgB,CAAC;KAC/C,MAAM,CAAC,yBAAyB,EAAE,0BAA0B,CAAC;KAC7D,MAAM,CAAC,oBAAoB,EAAE,qBAAqB,CAAC;KACnD,MAAM,CAAC,YAAY,EAAE,4CAA4C,EAAE,GAAG,CAAC;KACvE,MAAM,CAAC,oBAAoB,EAAE,uDAAuD,CAAC;KACrF,MAAM,CAAC,WAAW,EAAE,qBAAqB,CAAC;KAC1C,MAAM,CAAC,WAAW,EAAE,gBAAgB,CAAC;KACrC,QAAQ,CAAC,aAAa,EAAE,yBAAyB,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;KACjE,MAAM,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAsC,EAAE,EAAE;IACzE,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,aAAa,CAC1B;YACE,OAAO,EAAE,IAAI,CAAC,OAAiB;YAC/B,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;SAChE,EACD,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CACzB,CAAC;QACF,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAErD,MAAM,aAAa,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE;YACjD,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,QAAQ,EAAE,IAAI,CAAC,QAA8B;YAC7C,MAAM,EAAE,IAAI,CAAC,IAAI,KAAK,KAAK;SAC5B,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC;AACrC,CAAC,CAAC,CAAC;AAEL,SAAS,WAAW,CAAC,GAAY;IAC/B,IAAI,GAAG,YAAY,aAAa,EAAE,CAAC;QACjC,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;IAClC,CAAC;IACD,IAAI,GAAG,YAAY,KAAK,EAAE,CAAC;QACzB,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IACD,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;IAC5C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
@@ -8,7 +8,7 @@ import { SnapevalError } from '../errors.js';
8
8
  export async function evalCommand(skillPath, harness, inference, options) {
9
9
  const evalsPath = path.join(skillPath, 'evals', 'evals.json');
10
10
  if (!fs.existsSync(evalsPath)) {
11
- throw new SnapevalError(`No evals.json found at ${evalsPath}. Run \`snapeval init\` first.`);
11
+ throw new SnapevalError(`No evals.json found at ${evalsPath}. Create evals/evals.json with test scenarios first.`);
12
12
  }
13
13
  const evalsFile = JSON.parse(fs.readFileSync(evalsPath, 'utf-8'));
14
14
  const ws = new WorkspaceManager(skillPath, options.workspace);
@@ -1 +1 @@
1
- {"version":3,"file":"eval.js","sourceRoot":"","sources":["../../../src/commands/eval.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAQlC,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,OAAO,EAAE,MAAM,qBAAqB,CAAC;AAC9C,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAC3D,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAE7C,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,SAAiB,EACjB,OAAgB,EAChB,SAA2B,EAC3B,OAAiE;IAEjE,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;IAC9D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,aAAa,CAAC,0BAA0B,SAAS,gCAAgC,CAAC,CAAC;IAC/F,CAAC;IAED,MAAM,SAAS,GAAc,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;IAC7E,MAAM,EAAE,GAAG,IAAI,gBAAgB,CAAC,SAAS,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;IAC9D,MAAM,YAAY,GAAG,EAAE,CAAC,eAAe,EAAE,CAAC;IAC1C,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC;IAC/B,MAAM,eAAe,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,eAAe,CAAC;IACzE,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;IAE5D,MAAM,QAAQ,GAAoB,EAAE,CAAC;IAErC,KAAK,MAAM,QAAQ,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;QACvC,MAAM,IAAI,GAAG,gBAAgB,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACzE,MAAM,OAAO,GAAG,EAAE,CAAC,aAAa,CAAC,YAAY,EAAE,IAAI,EAAE,eAAe,CAAC,CAAC;QAEtE,IAAI,OAAO,GAA+C,IAAI,CAAC;QAC/D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9B,OAAO,GAAG,MAAM,OAAO,CAAC,QAAQ,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;QACnF,CAAC;QAED,IAAI,CAAC,OAAO;YAAE,SAAS;QAEvB,MAAM,UAAU,GAAG,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC;QAC7C,MAAM,gBAAgB,GAAG,MAAM,eAAe,CAC5C,UAAU,EACV,OAAO,CAAC,SAAS,CAAC,MAAM,EACxB,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,YAAY,CAAC,EAChC,SAAS,EACT,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CACnD,CAAC;QACF,MAAM,mBAAmB,GAAG,MAAM,eAAe,CAC/C,UAAU,EACV,OAAO,CAAC,YAAY,CAAC,MAAM,EAC3B,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,eAAe,CAAC,EACnC,SAAS,EACT,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CACnD,CAAC;QAEF,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,QAAQ,CAAC,EAAE;YACnB,IAAI;YACJ,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,SAAS,EAAE;gBACT,MAAM,EAAE,OAAO,CAAC,SAAS,CAAC,MAAM;gBAChC,OAAO,EAAE,gBAAgB,IAAI,SAAS;aACvC;YACD,YAAY,EAAE;gBACZ,MAAM,EAAE,OAAO,CAAC,YAAY,CAAC,MAAM;gBACnC,OAAO,EAAE,mBAAmB,IAAI,SAAS;aAC1C;SACF,CAAC,CAAC;IACL,CAAC;IAED,MAAM,SAAS,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAE7C,EAAE,CAAC,aAAa,CACd,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,gBAAgB,CAAC,EACzC,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,CACnC,CAAC;IAEF,OAAO;QACL,SAAS,EAAE,SAAS,CAAC,UAAU;QAC/B,QAAQ;QACR,SAAS;QACT,YAAY;KACb,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"eval.js","sourceRoot":"","sources":["../../../src/commands/eval.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAQlC,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,OAAO,EAAE,MAAM,qBAAqB,CAAC;AAC9C,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAC3D,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAE7C,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,SAAiB,EACjB,OAAgB,EAChB,SAA2B,EAC3B,OAAiE;IAEjE,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;IAC9D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,aAAa,CAAC,0BAA0B,SAAS,sDAAsD,CAAC,CAAC;IACrH,CAAC;IAED,MAAM,SAAS,GAAc,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;IAC7E,MAAM,EAAE,GAAG,IAAI,gBAAgB,CAAC,SAAS,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;IAC9D,MAAM,YAAY,GAAG,EAAE,CAAC,eAAe,EAAE,CAAC;IAC1C,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC;IAC/B,MAAM,eAAe,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,eAAe,CAAC;IACzE,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;IAE5D,MAAM,QAAQ,GAAoB,EAAE,CAAC;IAErC,KAAK,MAAM,QAAQ,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;QACvC,MAAM,IAAI,GAAG,gBAAgB,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACzE,MAAM,OAAO,GAAG,EAAE,CAAC,aAAa,CAAC,YAAY,EAAE,IAAI,EAAE,eAAe,CAAC,CAAC;QAEtE,IAAI,OAAO,GAA+C,IAAI,CAAC;QAC/D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9B,OAAO,GAAG,MAAM,OAAO,CAAC,QAAQ,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;QACnF,CAAC;QAED,IAAI,CAAC,OAAO;YAAE,SAAS;QAEvB,MAAM,UAAU,GAAG,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC;QAC7C,MAAM,gBAAgB,GAAG,MAAM,eAAe,CAC5C,UAAU,EACV,OAAO,CAAC,SAAS,CAAC,MAAM,EACxB,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,YAAY,CAAC,EAChC,SAAS,EACT,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CACnD,CAAC;QACF,MAAM,mBAAmB,GAAG,MAAM,eAAe,CAC/C,UAAU,EACV,OAAO,CAAC,YAAY,CAAC,MAAM,EAC3B,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,eAAe,CAAC,EACnC,SAAS,EACT,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CACnD,CAAC;QAEF,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,QAAQ,CAAC,EAAE;YACnB,IAAI;YACJ,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,SAAS,EAAE;gBACT,MAAM,EAAE,OAAO,CAAC,SAAS,CAAC,MAAM;gBAChC,OAAO,EAAE,gBAAgB,IAAI,SAAS;aACvC;YACD,YAAY,EAAE;gBACZ,MAAM,EAAE,OAAO,CAAC,YAAY,CAAC,MAAM;gBACnC,OAAO,EAAE,mBAAmB,IAAI,SAAS;aAC1C;SACF,CAAC,CAAC;IACL,CAAC;IAED,MAAM,SAAS,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAE7C,EAAE,CAAC,aAAa,CACd,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,gBAAgB,CAAC,EACzC,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,CACnC,CAAC;IAEF,OAAO;QACL,SAAS,EAAE,SAAS,CAAC,UAAU;QAC/B,QAAQ;QACR,SAAS;QACT,YAAY;KACb,CAAC;AACJ,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "snapeval",
3
- "version": "1.8.0",
3
+ "version": "2.0.0",
4
4
  "description": "Harness-agnostic eval runner for agentskills.io skills",
5
5
  "type": "module",
6
6
  "bin": {
package/plugin.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "snapeval",
3
- "version": "1.8.0",
3
+ "version": "2.0.0",
4
4
  "description": "Semantic snapshot testing for AI skills. Zero assertions. AI-driven. Free inference.",
5
5
  "author": "Matan Tsach",
6
6
  "license": "MIT",
@@ -21,7 +21,7 @@ export async function evalCommand(
21
21
  ): Promise<EvalResults> {
22
22
  const evalsPath = path.join(skillPath, 'evals', 'evals.json');
23
23
  if (!fs.existsSync(evalsPath)) {
24
- throw new SnapevalError(`No evals.json found at ${evalsPath}. Run \`snapeval init\` first.`);
24
+ throw new SnapevalError(`No evals.json found at ${evalsPath}. Create evals/evals.json with test scenarios first.`);
25
25
  }
26
26
 
27
27
  const evalsFile: EvalsFile = JSON.parse(fs.readFileSync(evalsPath, 'utf-8'));
@@ -1,2 +0,0 @@
1
- import type { InferenceAdapter } from '../types.js';
2
- export declare function initCommand(skillPath: string, inference: InferenceAdapter): Promise<void>;
@@ -1,27 +0,0 @@
1
- import * as fs from 'node:fs';
2
- import * as path from 'node:path';
3
- import { generateEvals } from '../engine/generator.js';
4
- import { SnapevalError } from '../errors.js';
5
- export async function initCommand(skillPath, inference) {
6
- // Locate the skill definition file (SKILL.md or skill.md)
7
- const candidates = ['SKILL.md', 'skill.md'];
8
- let skillFilePath = null;
9
- for (const name of candidates) {
10
- const candidate = path.join(skillPath, name);
11
- if (fs.existsSync(candidate)) {
12
- skillFilePath = candidate;
13
- break;
14
- }
15
- }
16
- if (!skillFilePath) {
17
- throw new SnapevalError(`No SKILL.md found at ${skillPath}. Create a SKILL.md file to describe your skill.`);
18
- }
19
- const skillContent = fs.readFileSync(skillFilePath, 'utf-8');
20
- const skillName = path.basename(skillPath);
21
- const evalsFile = await generateEvals(skillContent, skillName, inference);
22
- const evalsDir = path.join(skillPath, 'evals');
23
- fs.mkdirSync(evalsDir, { recursive: true });
24
- const evalsPath = path.join(evalsDir, 'evals.json');
25
- fs.writeFileSync(evalsPath, JSON.stringify(evalsFile, null, 2), 'utf-8');
26
- }
27
- //# sourceMappingURL=init.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"init.js","sourceRoot":"","sources":["../../../src/commands/init.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,OAAO,EAAE,aAAa,EAAE,MAAM,wBAAwB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAE7C,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,SAAiB,EACjB,SAA2B;IAE3B,0DAA0D;IAC1D,MAAM,UAAU,GAAG,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;IAC5C,IAAI,aAAa,GAAkB,IAAI,CAAC;IACxC,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;QAC7C,IAAI,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAC7B,aAAa,GAAG,SAAS,CAAC;YAC1B,MAAM;QACR,CAAC;IACH,CAAC;IAED,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,MAAM,IAAI,aAAa,CACrB,wBAAwB,SAAS,kDAAkD,CACpF,CAAC;IACJ,CAAC;IAED,MAAM,YAAY,GAAG,EAAE,CAAC,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;IAE3C,MAAM,SAAS,GAAG,MAAM,aAAa,CAAC,YAAY,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;IAE1E,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IAC/C,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE5C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IACpD,EAAE,CAAC,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AAC3E,CAAC"}
@@ -1,3 +0,0 @@
1
- import type { InferenceAdapter, EvalsFile } from '../types.js';
2
- export declare function buildGeneratorPrompt(skillContent: string): string;
3
- export declare function generateEvals(skillContent: string, skillName: string, inference: InferenceAdapter): Promise<EvalsFile>;
@@ -1,51 +0,0 @@
1
- export function buildGeneratorPrompt(skillContent) {
2
- return `You are a test case generator for AI skills. Read the following skill definition and generate 5-8 realistic test scenarios.
3
-
4
- SKILL DEFINITION:
5
- ---
6
- ${skillContent}
7
- ---
8
-
9
- Generate test scenarios as JSON with this exact format:
10
- {
11
- "skill_name": "<name from skill>",
12
- "evals": [
13
- {
14
- "id": 1,
15
- "slug": "<2-4-word-kebab-case-label>",
16
- "prompt": "<realistic user prompt that would trigger this skill>",
17
- "expected_output": "<human-readable description of expected behavior>"
18
- }
19
- ]
20
- }
21
-
22
- Requirements:
23
- - Include happy path scenarios (normal use cases)
24
- - Include edge cases (empty input, malformed input, boundary conditions)
25
- - Include at least one negative test (input the skill should handle gracefully)
26
- - Prompts should be realistic — the way a real user would type them
27
- - slug must be 2-4 words in kebab-case (e.g. "happy-path", "empty-input-edge-case")
28
- - Return ONLY the JSON, no markdown wrapping`;
29
- }
30
- function extractJSON(text) {
31
- const match = text.match(/```(?:json)?\s*([\s\S]*?)```/);
32
- if (match)
33
- return match[1].trim();
34
- return text.trim();
35
- }
36
- export async function generateEvals(skillContent, skillName, inference) {
37
- const prompt = buildGeneratorPrompt(skillContent);
38
- const response = await inference.chat([{ role: 'user', content: prompt }], { temperature: 0.7, responseFormat: 'json' });
39
- const parsed = JSON.parse(extractJSON(response));
40
- return {
41
- skill_name: parsed.skill_name || skillName,
42
- evals: parsed.evals.map((e, i) => ({
43
- id: e.id || i + 1,
44
- slug: e.slug,
45
- prompt: e.prompt,
46
- expected_output: e.expected_output || '',
47
- files: e.files || [],
48
- })),
49
- };
50
- }
51
- //# sourceMappingURL=generator.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"generator.js","sourceRoot":"","sources":["../../../src/engine/generator.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,oBAAoB,CAAC,YAAoB;IACvD,OAAO;;;;EAIP,YAAY;;;;;;;;;;;;;;;;;;;;;;6CAsB+B,CAAC;AAC9C,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;IACzD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAClC,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;AACrB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,YAAoB,EACpB,SAAiB,EACjB,SAA2B;IAE3B,MAAM,MAAM,GAAG,oBAAoB,CAAC,YAAY,CAAC,CAAC;IAClD,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,IAAI,CACnC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,EACnC,EAAE,WAAW,EAAE,GAAG,EAAE,cAAc,EAAE,MAAM,EAAE,CAC7C,CAAC;IACF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAC;IACjD,OAAO;QACL,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,SAAS;QAC1C,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,CAAS,EAAE,EAAE,CAAC,CAAC;YAC9C,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC;YACjB,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,MAAM,EAAE,CAAC,CAAC,MAAM;YAChB,eAAe,EAAE,CAAC,CAAC,eAAe,IAAI,EAAE;YACxC,KAAK,EAAE,CAAC,CAAC,KAAK,IAAI,EAAE;SACrB,CAAC,CAAC;KACJ,CAAC;AACJ,CAAC"}
@@ -1,38 +0,0 @@
1
- import * as fs from 'node:fs';
2
- import * as path from 'node:path';
3
- import type { InferenceAdapter } from '../types.js';
4
- import { generateEvals } from '../engine/generator.js';
5
- import { SnapevalError } from '../errors.js';
6
-
7
- export async function initCommand(
8
- skillPath: string,
9
- inference: InferenceAdapter
10
- ): Promise<void> {
11
- // Locate the skill definition file (SKILL.md or skill.md)
12
- const candidates = ['SKILL.md', 'skill.md'];
13
- let skillFilePath: string | null = null;
14
- for (const name of candidates) {
15
- const candidate = path.join(skillPath, name);
16
- if (fs.existsSync(candidate)) {
17
- skillFilePath = candidate;
18
- break;
19
- }
20
- }
21
-
22
- if (!skillFilePath) {
23
- throw new SnapevalError(
24
- `No SKILL.md found at ${skillPath}. Create a SKILL.md file to describe your skill.`
25
- );
26
- }
27
-
28
- const skillContent = fs.readFileSync(skillFilePath, 'utf-8');
29
- const skillName = path.basename(skillPath);
30
-
31
- const evalsFile = await generateEvals(skillContent, skillName, inference);
32
-
33
- const evalsDir = path.join(skillPath, 'evals');
34
- fs.mkdirSync(evalsDir, { recursive: true });
35
-
36
- const evalsPath = path.join(evalsDir, 'evals.json');
37
- fs.writeFileSync(evalsPath, JSON.stringify(evalsFile, null, 2), 'utf-8');
38
- }
@@ -1,60 +0,0 @@
1
- import type { InferenceAdapter, EvalsFile } from '../types.js';
2
-
3
- export function buildGeneratorPrompt(skillContent: string): string {
4
- return `You are a test case generator for AI skills. Read the following skill definition and generate 5-8 realistic test scenarios.
5
-
6
- SKILL DEFINITION:
7
- ---
8
- ${skillContent}
9
- ---
10
-
11
- Generate test scenarios as JSON with this exact format:
12
- {
13
- "skill_name": "<name from skill>",
14
- "evals": [
15
- {
16
- "id": 1,
17
- "slug": "<2-4-word-kebab-case-label>",
18
- "prompt": "<realistic user prompt that would trigger this skill>",
19
- "expected_output": "<human-readable description of expected behavior>"
20
- }
21
- ]
22
- }
23
-
24
- Requirements:
25
- - Include happy path scenarios (normal use cases)
26
- - Include edge cases (empty input, malformed input, boundary conditions)
27
- - Include at least one negative test (input the skill should handle gracefully)
28
- - Prompts should be realistic — the way a real user would type them
29
- - slug must be 2-4 words in kebab-case (e.g. "happy-path", "empty-input-edge-case")
30
- - Return ONLY the JSON, no markdown wrapping`;
31
- }
32
-
33
- function extractJSON(text: string): string {
34
- const match = text.match(/```(?:json)?\s*([\s\S]*?)```/);
35
- if (match) return match[1].trim();
36
- return text.trim();
37
- }
38
-
39
- export async function generateEvals(
40
- skillContent: string,
41
- skillName: string,
42
- inference: InferenceAdapter
43
- ): Promise<EvalsFile> {
44
- const prompt = buildGeneratorPrompt(skillContent);
45
- const response = await inference.chat(
46
- [{ role: 'user', content: prompt }],
47
- { temperature: 0.7, responseFormat: 'json' }
48
- );
49
- const parsed = JSON.parse(extractJSON(response));
50
- return {
51
- skill_name: parsed.skill_name || skillName,
52
- evals: parsed.evals.map((e: any, i: number) => ({
53
- id: e.id || i + 1,
54
- slug: e.slug,
55
- prompt: e.prompt,
56
- expected_output: e.expected_output || '',
57
- files: e.files || [],
58
- })),
59
- };
60
- }