snapeval 1.8.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/snapeval.ts +0 -23
- package/dist/bin/snapeval.js +0 -22
- package/dist/bin/snapeval.js.map +1 -1
- package/dist/src/commands/eval.js +1 -1
- package/dist/src/commands/eval.js.map +1 -1
- package/package.json +1 -1
- package/plugin.json +1 -1
- package/src/commands/eval.ts +1 -1
- package/dist/src/commands/init.d.ts +0 -2
- package/dist/src/commands/init.js +0 -27
- package/dist/src/commands/init.js.map +0 -1
- package/dist/src/engine/generator.d.ts +0 -3
- package/dist/src/engine/generator.js +0 -51
- package/dist/src/engine/generator.js.map +0 -1
- package/src/commands/init.ts +0 -38
- package/src/engine/generator.ts +0 -60
package/bin/snapeval.ts
CHANGED
|
@@ -3,7 +3,6 @@ import { Command } from 'commander';
|
|
|
3
3
|
import { resolveConfig } from '../src/config.js';
|
|
4
4
|
import { resolveInference } from '../src/adapters/inference/resolve.js';
|
|
5
5
|
import { resolveHarness } from '../src/adapters/harness/resolve.js';
|
|
6
|
-
import { initCommand } from '../src/commands/init.js';
|
|
7
6
|
import { evalCommand } from '../src/commands/eval.js';
|
|
8
7
|
import { reviewCommand } from '../src/commands/review.js';
|
|
9
8
|
import { TerminalReporter } from '../src/adapters/report/terminal.js';
|
|
@@ -17,28 +16,6 @@ program
|
|
|
17
16
|
.description('Harness-agnostic eval runner for agentskills.io skills')
|
|
18
17
|
.version('2.0.0');
|
|
19
18
|
|
|
20
|
-
// --- init ---
|
|
21
|
-
program
|
|
22
|
-
.command('init')
|
|
23
|
-
.description('Generate evals.json from SKILL.md (prompts + expected outputs, no assertions)')
|
|
24
|
-
.option('--harness <harness>', 'Harness to use')
|
|
25
|
-
.option('--inference <inference>', 'Inference adapter to use')
|
|
26
|
-
.option('--verbose', 'Verbose output')
|
|
27
|
-
.argument('[skill-dir]', 'Path to skill directory', process.cwd())
|
|
28
|
-
.action(async (skillDir: string, opts: Record<string, string | boolean>) => {
|
|
29
|
-
try {
|
|
30
|
-
const skillPath = path.resolve(skillDir);
|
|
31
|
-
const config = resolveConfig(
|
|
32
|
-
{ harness: opts.harness as string, inference: opts.inference as string },
|
|
33
|
-
process.cwd(), skillPath
|
|
34
|
-
);
|
|
35
|
-
const inference = resolveInference(config.inference);
|
|
36
|
-
await initCommand(skillPath, inference);
|
|
37
|
-
console.log(`Generated evals at ${path.join(skillPath, 'evals', 'evals.json')}`);
|
|
38
|
-
process.exit(0);
|
|
39
|
-
} catch (err) { handleError(err); }
|
|
40
|
-
});
|
|
41
|
-
|
|
42
19
|
// --- eval ---
|
|
43
20
|
program
|
|
44
21
|
.command('eval')
|
package/dist/bin/snapeval.js
CHANGED
|
@@ -3,7 +3,6 @@ import { Command } from 'commander';
|
|
|
3
3
|
import { resolveConfig } from '../src/config.js';
|
|
4
4
|
import { resolveInference } from '../src/adapters/inference/resolve.js';
|
|
5
5
|
import { resolveHarness } from '../src/adapters/harness/resolve.js';
|
|
6
|
-
import { initCommand } from '../src/commands/init.js';
|
|
7
6
|
import { evalCommand } from '../src/commands/eval.js';
|
|
8
7
|
import { reviewCommand } from '../src/commands/review.js';
|
|
9
8
|
import { TerminalReporter } from '../src/adapters/report/terminal.js';
|
|
@@ -14,27 +13,6 @@ program
|
|
|
14
13
|
.name('snapeval')
|
|
15
14
|
.description('Harness-agnostic eval runner for agentskills.io skills')
|
|
16
15
|
.version('2.0.0');
|
|
17
|
-
// --- init ---
|
|
18
|
-
program
|
|
19
|
-
.command('init')
|
|
20
|
-
.description('Generate evals.json from SKILL.md (prompts + expected outputs, no assertions)')
|
|
21
|
-
.option('--harness <harness>', 'Harness to use')
|
|
22
|
-
.option('--inference <inference>', 'Inference adapter to use')
|
|
23
|
-
.option('--verbose', 'Verbose output')
|
|
24
|
-
.argument('[skill-dir]', 'Path to skill directory', process.cwd())
|
|
25
|
-
.action(async (skillDir, opts) => {
|
|
26
|
-
try {
|
|
27
|
-
const skillPath = path.resolve(skillDir);
|
|
28
|
-
const config = resolveConfig({ harness: opts.harness, inference: opts.inference }, process.cwd(), skillPath);
|
|
29
|
-
const inference = resolveInference(config.inference);
|
|
30
|
-
await initCommand(skillPath, inference);
|
|
31
|
-
console.log(`Generated evals at ${path.join(skillPath, 'evals', 'evals.json')}`);
|
|
32
|
-
process.exit(0);
|
|
33
|
-
}
|
|
34
|
-
catch (err) {
|
|
35
|
-
handleError(err);
|
|
36
|
-
}
|
|
37
|
-
});
|
|
38
16
|
// --- eval ---
|
|
39
17
|
program
|
|
40
18
|
.command('eval')
|
package/dist/bin/snapeval.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"snapeval.js","sourceRoot":"","sources":["../../bin/snapeval.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,gBAAgB,EAAE,MAAM,sCAAsC,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"snapeval.js","sourceRoot":"","sources":["../../bin/snapeval.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,gBAAgB,EAAE,MAAM,sCAAsC,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,MAAM,oCAAoC,CAAC;AACtE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,UAAU,CAAC;KAChB,WAAW,CAAC,wDAAwD,CAAC;KACrE,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,eAAe;AACf,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,qEAAqE,CAAC;KAClF,MAAM,CAAC,qBAAqB,EAAE,gBAAgB,CAAC;KAC/C,MAAM,CAAC,yBAAyB,EAAE,0BAA0B,CAAC;KAC7D,MAAM,CAAC,oBAAoB,EAAE,qBAAqB,CAAC;KACnD,MAAM,CAAC,YAAY,EAAE,4CAA4C,EAAE,GAAG,CAAC;KACvE,MAAM,CAAC,oBAAoB,EAAE,uDAAuD,CAAC;KACrF,MAAM,CAAC,WAAW,EAAE,gBAAgB,CAAC;KACrC,QAAQ,CAAC,aAAa,EAAE,yBAAyB,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;KACjE,MAAM,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAsC,EAAE,EAAE;IACzE,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,aAAa,CAC1B;YACE,OAAO,EAAE,IAAI,CAAC,OAAiB;YAC/B,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;SAChE,EACD,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CACzB,CAAC;QACF,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAErD,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE;YAC/D,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,QAAQ,EAAE,IAAI,CAAC,QAA8B;SAC9C,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,IAAI,gBAAgB,EAAE,CAAC;QACxC,MAAM,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC;AACrC,CAAC,CAAC,CAAC;AAEL,iBAAiB;AACjB,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,mDAAmD,CAAC;KAChE,MAAM,CAAC,qBAAqB,EAAE,gBAAgB,CAAC;KAC/C,MAAM,CAAC,yBAAyB,EAAE,0BAA0B,CAAC;KAC7D,MAAM,CAAC,oBAAoB,EAAE,qBAAqB,CAAC;KACnD,MAAM,CAAC,YAAY,EAAE,4CAA4C,EAAE,GAAG,CAAC;KACvE,MAAM,CAAC,oBAAoB,EAAE,uDAAuD,CAAC;KACrF,MAAM,CAAC,WAAW,EAAE,qBAAqB,CAAC;KAC1C,MAAM,CAAC,WAAW,EAAE,gBAAgB,CAAC;KACrC,QAAQ,CAAC,aAAa,EAAE,yBAAyB,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;KACjE,MAAM,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAsC,EAAE,EAAE;IACzE,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,aAAa,CAC1B;YACE,OAAO,EAAE,IAAI,CAAC,OAAiB;YAC/B,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;SAChE,EACD,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CACzB,CAAC;QACF,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAErD,MAAM,aAAa,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE;YACjD,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,QAAQ,EAAE,IAAI,CAAC,QAA8B;YAC7C,MAAM,EAAE,IAAI,CAAC,IAAI,KAAK,KAAK;SAC5B,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC;AACrC,CAAC,CAAC,CAAC;AAEL,SAAS,WAAW,CAAC,GAAY;IAC/B,IAAI,GAAG,YAAY,aAAa,EAAE,CAAC;QACjC,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;IAClC,CAAC;IACD,IAAI,GAAG,YAAY,KAAK,EAAE,CAAC;QACzB,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IACD,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;IAC5C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
|
|
@@ -8,7 +8,7 @@ import { SnapevalError } from '../errors.js';
|
|
|
8
8
|
export async function evalCommand(skillPath, harness, inference, options) {
|
|
9
9
|
const evalsPath = path.join(skillPath, 'evals', 'evals.json');
|
|
10
10
|
if (!fs.existsSync(evalsPath)) {
|
|
11
|
-
throw new SnapevalError(`No evals.json found at ${evalsPath}.
|
|
11
|
+
throw new SnapevalError(`No evals.json found at ${evalsPath}. Create evals/evals.json with test scenarios first.`);
|
|
12
12
|
}
|
|
13
13
|
const evalsFile = JSON.parse(fs.readFileSync(evalsPath, 'utf-8'));
|
|
14
14
|
const ws = new WorkspaceManager(skillPath, options.workspace);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"eval.js","sourceRoot":"","sources":["../../../src/commands/eval.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAQlC,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,OAAO,EAAE,MAAM,qBAAqB,CAAC;AAC9C,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAC3D,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAE7C,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,SAAiB,EACjB,OAAgB,EAChB,SAA2B,EAC3B,OAAiE;IAEjE,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;IAC9D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,aAAa,CAAC,0BAA0B,SAAS,
|
|
1
|
+
{"version":3,"file":"eval.js","sourceRoot":"","sources":["../../../src/commands/eval.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAQlC,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,OAAO,EAAE,MAAM,qBAAqB,CAAC;AAC9C,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAC3D,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAE7C,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,SAAiB,EACjB,OAAgB,EAChB,SAA2B,EAC3B,OAAiE;IAEjE,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;IAC9D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,aAAa,CAAC,0BAA0B,SAAS,sDAAsD,CAAC,CAAC;IACrH,CAAC;IAED,MAAM,SAAS,GAAc,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;IAC7E,MAAM,EAAE,GAAG,IAAI,gBAAgB,CAAC,SAAS,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;IAC9D,MAAM,YAAY,GAAG,EAAE,CAAC,eAAe,EAAE,CAAC;IAC1C,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC;IAC/B,MAAM,eAAe,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,eAAe,CAAC;IACzE,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;IAE5D,MAAM,QAAQ,GAAoB,EAAE,CAAC;IAErC,KAAK,MAAM,QAAQ,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;QACvC,MAAM,IAAI,GAAG,gBAAgB,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACzE,MAAM,OAAO,GAAG,EAAE,CAAC,aAAa,CAAC,YAAY,EAAE,IAAI,EAAE,eAAe,CAAC,CAAC;QAEtE,IAAI,OAAO,GAA+C,IAAI,CAAC;QAC/D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9B,OAAO,GAAG,MAAM,OAAO,CAAC,QAAQ,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;QACnF,CAAC;QAED,IAAI,CAAC,OAAO;YAAE,SAAS;QAEvB,MAAM,UAAU,GAAG,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC;QAC7C,MAAM,gBAAgB,GAAG,MAAM,eAAe,CAC5C,UAAU,EACV,OAAO,CAAC,SAAS,CAAC,MAAM,EACxB,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,YAAY,CAAC,EAChC,SAAS,EACT,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CACnD,CAAC;QACF,MAAM,mBAAmB,GAAG,MAAM,eAAe,CAC/C,UAAU,EACV,OAAO,CAAC,YAAY,CAAC,MAAM,EAC3B,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,eAAe,CAAC,EACnC,SAAS,EACT,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CACnD,CAAC;QAEF,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,QAAQ,CAAC,EAAE;YACnB,IAAI;YACJ,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,SAAS,EAAE;gBACT,MAAM,EAAE,OAAO,CAAC,SAAS,CAAC,MAAM;gBAChC,OAAO,EAAE,gBAAgB,IAAI,SAAS;aACvC;YACD,YAAY,EAAE;gBACZ,MAAM,EAAE,OAAO,CAAC,YAAY,CAAC,MAAM;gBACnC,OAAO,EAAE,mBAAmB,IAAI,SAAS;aAC1C;SACF,CAAC,CAAC;IACL,CAAC;IAED,MAAM,SAAS,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAE7C,EAAE,CAAC,aAAa,CACd,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,gBAAgB,CAAC,EACzC,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,CACnC,CAAC;IAEF,OAAO;QACL,SAAS,EAAE,SAAS,CAAC,UAAU;QAC/B,QAAQ;QACR,SAAS;QACT,YAAY;KACb,CAAC;AACJ,CAAC"}
|
package/package.json
CHANGED
package/plugin.json
CHANGED
package/src/commands/eval.ts
CHANGED
|
@@ -21,7 +21,7 @@ export async function evalCommand(
|
|
|
21
21
|
): Promise<EvalResults> {
|
|
22
22
|
const evalsPath = path.join(skillPath, 'evals', 'evals.json');
|
|
23
23
|
if (!fs.existsSync(evalsPath)) {
|
|
24
|
-
throw new SnapevalError(`No evals.json found at ${evalsPath}.
|
|
24
|
+
throw new SnapevalError(`No evals.json found at ${evalsPath}. Create evals/evals.json with test scenarios first.`);
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
const evalsFile: EvalsFile = JSON.parse(fs.readFileSync(evalsPath, 'utf-8'));
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import * as fs from 'node:fs';
|
|
2
|
-
import * as path from 'node:path';
|
|
3
|
-
import { generateEvals } from '../engine/generator.js';
|
|
4
|
-
import { SnapevalError } from '../errors.js';
|
|
5
|
-
export async function initCommand(skillPath, inference) {
|
|
6
|
-
// Locate the skill definition file (SKILL.md or skill.md)
|
|
7
|
-
const candidates = ['SKILL.md', 'skill.md'];
|
|
8
|
-
let skillFilePath = null;
|
|
9
|
-
for (const name of candidates) {
|
|
10
|
-
const candidate = path.join(skillPath, name);
|
|
11
|
-
if (fs.existsSync(candidate)) {
|
|
12
|
-
skillFilePath = candidate;
|
|
13
|
-
break;
|
|
14
|
-
}
|
|
15
|
-
}
|
|
16
|
-
if (!skillFilePath) {
|
|
17
|
-
throw new SnapevalError(`No SKILL.md found at ${skillPath}. Create a SKILL.md file to describe your skill.`);
|
|
18
|
-
}
|
|
19
|
-
const skillContent = fs.readFileSync(skillFilePath, 'utf-8');
|
|
20
|
-
const skillName = path.basename(skillPath);
|
|
21
|
-
const evalsFile = await generateEvals(skillContent, skillName, inference);
|
|
22
|
-
const evalsDir = path.join(skillPath, 'evals');
|
|
23
|
-
fs.mkdirSync(evalsDir, { recursive: true });
|
|
24
|
-
const evalsPath = path.join(evalsDir, 'evals.json');
|
|
25
|
-
fs.writeFileSync(evalsPath, JSON.stringify(evalsFile, null, 2), 'utf-8');
|
|
26
|
-
}
|
|
27
|
-
//# sourceMappingURL=init.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"init.js","sourceRoot":"","sources":["../../../src/commands/init.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,OAAO,EAAE,aAAa,EAAE,MAAM,wBAAwB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAE7C,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,SAAiB,EACjB,SAA2B;IAE3B,0DAA0D;IAC1D,MAAM,UAAU,GAAG,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;IAC5C,IAAI,aAAa,GAAkB,IAAI,CAAC;IACxC,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;QAC7C,IAAI,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAC7B,aAAa,GAAG,SAAS,CAAC;YAC1B,MAAM;QACR,CAAC;IACH,CAAC;IAED,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,MAAM,IAAI,aAAa,CACrB,wBAAwB,SAAS,kDAAkD,CACpF,CAAC;IACJ,CAAC;IAED,MAAM,YAAY,GAAG,EAAE,CAAC,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;IAE3C,MAAM,SAAS,GAAG,MAAM,aAAa,CAAC,YAAY,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;IAE1E,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IAC/C,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE5C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IACpD,EAAE,CAAC,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AAC3E,CAAC"}
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
export function buildGeneratorPrompt(skillContent) {
|
|
2
|
-
return `You are a test case generator for AI skills. Read the following skill definition and generate 5-8 realistic test scenarios.
|
|
3
|
-
|
|
4
|
-
SKILL DEFINITION:
|
|
5
|
-
---
|
|
6
|
-
${skillContent}
|
|
7
|
-
---
|
|
8
|
-
|
|
9
|
-
Generate test scenarios as JSON with this exact format:
|
|
10
|
-
{
|
|
11
|
-
"skill_name": "<name from skill>",
|
|
12
|
-
"evals": [
|
|
13
|
-
{
|
|
14
|
-
"id": 1,
|
|
15
|
-
"slug": "<2-4-word-kebab-case-label>",
|
|
16
|
-
"prompt": "<realistic user prompt that would trigger this skill>",
|
|
17
|
-
"expected_output": "<human-readable description of expected behavior>"
|
|
18
|
-
}
|
|
19
|
-
]
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
Requirements:
|
|
23
|
-
- Include happy path scenarios (normal use cases)
|
|
24
|
-
- Include edge cases (empty input, malformed input, boundary conditions)
|
|
25
|
-
- Include at least one negative test (input the skill should handle gracefully)
|
|
26
|
-
- Prompts should be realistic — the way a real user would type them
|
|
27
|
-
- slug must be 2-4 words in kebab-case (e.g. "happy-path", "empty-input-edge-case")
|
|
28
|
-
- Return ONLY the JSON, no markdown wrapping`;
|
|
29
|
-
}
|
|
30
|
-
function extractJSON(text) {
|
|
31
|
-
const match = text.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
32
|
-
if (match)
|
|
33
|
-
return match[1].trim();
|
|
34
|
-
return text.trim();
|
|
35
|
-
}
|
|
36
|
-
export async function generateEvals(skillContent, skillName, inference) {
|
|
37
|
-
const prompt = buildGeneratorPrompt(skillContent);
|
|
38
|
-
const response = await inference.chat([{ role: 'user', content: prompt }], { temperature: 0.7, responseFormat: 'json' });
|
|
39
|
-
const parsed = JSON.parse(extractJSON(response));
|
|
40
|
-
return {
|
|
41
|
-
skill_name: parsed.skill_name || skillName,
|
|
42
|
-
evals: parsed.evals.map((e, i) => ({
|
|
43
|
-
id: e.id || i + 1,
|
|
44
|
-
slug: e.slug,
|
|
45
|
-
prompt: e.prompt,
|
|
46
|
-
expected_output: e.expected_output || '',
|
|
47
|
-
files: e.files || [],
|
|
48
|
-
})),
|
|
49
|
-
};
|
|
50
|
-
}
|
|
51
|
-
//# sourceMappingURL=generator.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"generator.js","sourceRoot":"","sources":["../../../src/engine/generator.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,oBAAoB,CAAC,YAAoB;IACvD,OAAO;;;;EAIP,YAAY;;;;;;;;;;;;;;;;;;;;;;6CAsB+B,CAAC;AAC9C,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;IACzD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAClC,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;AACrB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,YAAoB,EACpB,SAAiB,EACjB,SAA2B;IAE3B,MAAM,MAAM,GAAG,oBAAoB,CAAC,YAAY,CAAC,CAAC;IAClD,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,IAAI,CACnC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,EACnC,EAAE,WAAW,EAAE,GAAG,EAAE,cAAc,EAAE,MAAM,EAAE,CAC7C,CAAC;IACF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAC;IACjD,OAAO;QACL,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,SAAS;QAC1C,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,CAAS,EAAE,EAAE,CAAC,CAAC;YAC9C,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC;YACjB,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,MAAM,EAAE,CAAC,CAAC,MAAM;YAChB,eAAe,EAAE,CAAC,CAAC,eAAe,IAAI,EAAE;YACxC,KAAK,EAAE,CAAC,CAAC,KAAK,IAAI,EAAE;SACrB,CAAC,CAAC;KACJ,CAAC;AACJ,CAAC"}
|
package/src/commands/init.ts
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
import * as fs from 'node:fs';
|
|
2
|
-
import * as path from 'node:path';
|
|
3
|
-
import type { InferenceAdapter } from '../types.js';
|
|
4
|
-
import { generateEvals } from '../engine/generator.js';
|
|
5
|
-
import { SnapevalError } from '../errors.js';
|
|
6
|
-
|
|
7
|
-
export async function initCommand(
|
|
8
|
-
skillPath: string,
|
|
9
|
-
inference: InferenceAdapter
|
|
10
|
-
): Promise<void> {
|
|
11
|
-
// Locate the skill definition file (SKILL.md or skill.md)
|
|
12
|
-
const candidates = ['SKILL.md', 'skill.md'];
|
|
13
|
-
let skillFilePath: string | null = null;
|
|
14
|
-
for (const name of candidates) {
|
|
15
|
-
const candidate = path.join(skillPath, name);
|
|
16
|
-
if (fs.existsSync(candidate)) {
|
|
17
|
-
skillFilePath = candidate;
|
|
18
|
-
break;
|
|
19
|
-
}
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
if (!skillFilePath) {
|
|
23
|
-
throw new SnapevalError(
|
|
24
|
-
`No SKILL.md found at ${skillPath}. Create a SKILL.md file to describe your skill.`
|
|
25
|
-
);
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
const skillContent = fs.readFileSync(skillFilePath, 'utf-8');
|
|
29
|
-
const skillName = path.basename(skillPath);
|
|
30
|
-
|
|
31
|
-
const evalsFile = await generateEvals(skillContent, skillName, inference);
|
|
32
|
-
|
|
33
|
-
const evalsDir = path.join(skillPath, 'evals');
|
|
34
|
-
fs.mkdirSync(evalsDir, { recursive: true });
|
|
35
|
-
|
|
36
|
-
const evalsPath = path.join(evalsDir, 'evals.json');
|
|
37
|
-
fs.writeFileSync(evalsPath, JSON.stringify(evalsFile, null, 2), 'utf-8');
|
|
38
|
-
}
|
package/src/engine/generator.ts
DELETED
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
import type { InferenceAdapter, EvalsFile } from '../types.js';
|
|
2
|
-
|
|
3
|
-
export function buildGeneratorPrompt(skillContent: string): string {
|
|
4
|
-
return `You are a test case generator for AI skills. Read the following skill definition and generate 5-8 realistic test scenarios.
|
|
5
|
-
|
|
6
|
-
SKILL DEFINITION:
|
|
7
|
-
---
|
|
8
|
-
${skillContent}
|
|
9
|
-
---
|
|
10
|
-
|
|
11
|
-
Generate test scenarios as JSON with this exact format:
|
|
12
|
-
{
|
|
13
|
-
"skill_name": "<name from skill>",
|
|
14
|
-
"evals": [
|
|
15
|
-
{
|
|
16
|
-
"id": 1,
|
|
17
|
-
"slug": "<2-4-word-kebab-case-label>",
|
|
18
|
-
"prompt": "<realistic user prompt that would trigger this skill>",
|
|
19
|
-
"expected_output": "<human-readable description of expected behavior>"
|
|
20
|
-
}
|
|
21
|
-
]
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
Requirements:
|
|
25
|
-
- Include happy path scenarios (normal use cases)
|
|
26
|
-
- Include edge cases (empty input, malformed input, boundary conditions)
|
|
27
|
-
- Include at least one negative test (input the skill should handle gracefully)
|
|
28
|
-
- Prompts should be realistic — the way a real user would type them
|
|
29
|
-
- slug must be 2-4 words in kebab-case (e.g. "happy-path", "empty-input-edge-case")
|
|
30
|
-
- Return ONLY the JSON, no markdown wrapping`;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
function extractJSON(text: string): string {
|
|
34
|
-
const match = text.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
35
|
-
if (match) return match[1].trim();
|
|
36
|
-
return text.trim();
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
export async function generateEvals(
|
|
40
|
-
skillContent: string,
|
|
41
|
-
skillName: string,
|
|
42
|
-
inference: InferenceAdapter
|
|
43
|
-
): Promise<EvalsFile> {
|
|
44
|
-
const prompt = buildGeneratorPrompt(skillContent);
|
|
45
|
-
const response = await inference.chat(
|
|
46
|
-
[{ role: 'user', content: prompt }],
|
|
47
|
-
{ temperature: 0.7, responseFormat: 'json' }
|
|
48
|
-
);
|
|
49
|
-
const parsed = JSON.parse(extractJSON(response));
|
|
50
|
-
return {
|
|
51
|
-
skill_name: parsed.skill_name || skillName,
|
|
52
|
-
evals: parsed.evals.map((e: any, i: number) => ({
|
|
53
|
-
id: e.id || i + 1,
|
|
54
|
-
slug: e.slug,
|
|
55
|
-
prompt: e.prompt,
|
|
56
|
-
expected_output: e.expected_output || '',
|
|
57
|
-
files: e.files || [],
|
|
58
|
-
})),
|
|
59
|
-
};
|
|
60
|
-
}
|