@sx4im/skillcheck 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/LICENSE +21 -0
  2. package/METHODOLOGY.md +91 -0
  3. package/README.md +159 -0
  4. package/dist/bin/skillcheck.d.ts +2 -0
  5. package/dist/bin/skillcheck.js +8 -0
  6. package/dist/bin/skillcheck.js.map +1 -0
  7. package/dist/src/adapters/nvidia-nim.d.ts +30 -0
  8. package/dist/src/adapters/nvidia-nim.js +165 -0
  9. package/dist/src/adapters/nvidia-nim.js.map +1 -0
  10. package/dist/src/cache.d.ts +5 -0
  11. package/dist/src/cache.js +27 -0
  12. package/dist/src/cache.js.map +1 -0
  13. package/dist/src/cli.d.ts +1 -0
  14. package/dist/src/cli.js +146 -0
  15. package/dist/src/cli.js.map +1 -0
  16. package/dist/src/corpus.d.ts +43 -0
  17. package/dist/src/corpus.js +233 -0
  18. package/dist/src/corpus.js.map +1 -0
  19. package/dist/src/deterministic.d.ts +7 -0
  20. package/dist/src/deterministic.js +25 -0
  21. package/dist/src/deterministic.js.map +1 -0
  22. package/dist/src/env.d.ts +12 -0
  23. package/dist/src/env.js +39 -0
  24. package/dist/src/env.js.map +1 -0
  25. package/dist/src/eval.d.ts +13 -0
  26. package/dist/src/eval.js +155 -0
  27. package/dist/src/eval.js.map +1 -0
  28. package/dist/src/generate.d.ts +9 -0
  29. package/dist/src/generate.js +94 -0
  30. package/dist/src/generate.js.map +1 -0
  31. package/dist/src/grade.d.ts +5 -0
  32. package/dist/src/grade.js +112 -0
  33. package/dist/src/grade.js.map +1 -0
  34. package/dist/src/hash.d.ts +2 -0
  35. package/dist/src/hash.js +8 -0
  36. package/dist/src/hash.js.map +1 -0
  37. package/dist/src/m0/hardcoded.d.ts +7 -0
  38. package/dist/src/m0/hardcoded.js +51 -0
  39. package/dist/src/m0/hardcoded.js.map +1 -0
  40. package/dist/src/m0/run.d.ts +38 -0
  41. package/dist/src/m0/run.js +102 -0
  42. package/dist/src/m0/run.js.map +1 -0
  43. package/dist/src/normalize.d.ts +2 -0
  44. package/dist/src/normalize.js +109 -0
  45. package/dist/src/normalize.js.map +1 -0
  46. package/dist/src/rot.d.ts +62 -0
  47. package/dist/src/rot.js +156 -0
  48. package/dist/src/rot.js.map +1 -0
  49. package/dist/src/run.d.ts +5 -0
  50. package/dist/src/run.js +47 -0
  51. package/dist/src/run.js.map +1 -0
  52. package/dist/src/score.d.ts +14 -0
  53. package/dist/src/score.js +59 -0
  54. package/dist/src/score.js.map +1 -0
  55. package/dist/src/types.d.ts +41 -0
  56. package/dist/src/types.js +2 -0
  57. package/dist/src/types.js.map +1 -0
  58. package/dist/src/verify.d.ts +5 -0
  59. package/dist/src/verify.js +71 -0
  60. package/dist/src/verify.js.map +1 -0
  61. package/package.json +64 -0
@@ -0,0 +1,146 @@
1
+ import { NvidiaNimClient } from './adapters/nvidia-nim.js';
2
+ import { runCorpus } from './corpus.js';
3
+ import { evalSkill } from './eval.js';
4
+ import { runM0Gate } from './m0/run.js';
5
+ import { runRot } from './rot.js';
6
+ import { verifyResult } from './verify.js';
7
+ function printHelp() {
8
+ console.log(`skillcheck
9
+
10
+ Usage:
11
+ skillcheck m0
12
+ skillcheck eval <path> [--tasks N] [--trials K] [--output file.json] [--task-suite file.json]
13
+ [--runner model] [--grader model] [--generator model]
14
+ skillcheck verify <result.json> [--sample n]
15
+ skillcheck corpus run --corpus corpus.json [--results dir] [--tasks N] [--trials K]
16
+ [--concurrency N] [--runner model] [--limit N]
17
+ skillcheck rot [--results dir] [--output file.json] [--model model] [--corpus corpus.yaml]
18
+ [--tasks N] [--trials K]
19
+
20
+ M0 is the hardcoded spike. eval is the M1 forced-injection evaluator.`);
21
+ }
22
+ function readOption(argv, name) {
23
+ const index = argv.indexOf(name);
24
+ if (index === -1) {
25
+ return undefined;
26
+ }
27
+ const value = argv[index + 1];
28
+ if (!value || value.startsWith('--')) {
29
+ throw new Error(`Missing value for ${name}`);
30
+ }
31
+ return value;
32
+ }
33
+ function readNumberOption(argv, name, fallback) {
34
+ const value = readOption(argv, name);
35
+ if (value === undefined) {
36
+ return fallback;
37
+ }
38
+ const parsed = Number(value);
39
+ if (!Number.isInteger(parsed) || parsed <= 0) {
40
+ throw new Error(`${name} must be a positive integer`);
41
+ }
42
+ return parsed;
43
+ }
44
+ function readOptionalNumberOption(argv, name) {
45
+ const value = readOption(argv, name);
46
+ if (value === undefined) {
47
+ return undefined;
48
+ }
49
+ const parsed = Number(value);
50
+ if (!Number.isInteger(parsed) || parsed <= 0) {
51
+ throw new Error(`${name} must be a positive integer`);
52
+ }
53
+ return parsed;
54
+ }
55
+ function parseEvalOptions(argv) {
56
+ const inputPath = argv[3];
57
+ if (!inputPath || inputPath.startsWith('--')) {
58
+ throw new Error('Usage: skillcheck eval <path> [--tasks N] [--trials K] [--output file.json]');
59
+ }
60
+ const mode = readOption(argv, '--mode') ?? 'forced';
61
+ if (mode !== 'forced') {
62
+ throw new Error('Only --mode forced is supported in v1');
63
+ }
64
+ return {
65
+ inputPath,
66
+ output: readOption(argv, '--output'),
67
+ tasks: readNumberOption(argv, '--tasks', 10),
68
+ trials: readNumberOption(argv, '--trials', 3),
69
+ mode,
70
+ runner: readOption(argv, '--runner'),
71
+ grader: readOption(argv, '--grader'),
72
+ generator: readOption(argv, '--generator'),
73
+ taskSuite: readOption(argv, '--task-suite')
74
+ };
75
+ }
76
+ function parseRotOptions(argv) {
77
+ return {
78
+ resultsDir: readOption(argv, '--results') ?? 'results',
79
+ output: readOption(argv, '--output') ?? 'results/rot/report.json',
80
+ model: readOption(argv, '--model'),
81
+ corpus: readOption(argv, '--corpus'),
82
+ tasks: readNumberOption(argv, '--tasks', 10),
83
+ trials: readNumberOption(argv, '--trials', 3)
84
+ };
85
+ }
86
+ function parseCorpusRunOptions(argv) {
87
+ const corpus = readOption(argv, '--corpus');
88
+ if (!corpus) {
89
+ throw new Error('Usage: skillcheck corpus run --corpus corpus.json [--results dir]');
90
+ }
91
+ return {
92
+ corpus,
93
+ outputDir: readOption(argv, '--results') ?? 'results/corpus',
94
+ tasks: readNumberOption(argv, '--tasks', 10),
95
+ trials: readNumberOption(argv, '--trials', 3),
96
+ concurrency: readNumberOption(argv, '--concurrency', 2),
97
+ runner: readOption(argv, '--runner'),
98
+ limit: readOptionalNumberOption(argv, '--limit')
99
+ };
100
+ }
101
+ export async function main(argv) {
102
+ const command = argv[2];
103
+ if (!command || command === '--help' || command === '-h') {
104
+ printHelp();
105
+ return;
106
+ }
107
+ if (command === 'm0') {
108
+ const report = await runM0Gate((config) => new NvidiaNimClient(config));
109
+ console.log(JSON.stringify(report, null, 2));
110
+ process.exitCode = report.passed ? 0 : 1;
111
+ return;
112
+ }
113
+ if (command === 'eval') {
114
+ const result = await evalSkill(parseEvalOptions(argv));
115
+ console.log(JSON.stringify(result, null, 2));
116
+ return;
117
+ }
118
+ if (command === 'verify') {
119
+ const resultPath = argv[3];
120
+ if (!resultPath || resultPath.startsWith('--')) {
121
+ throw new Error('Usage: skillcheck verify <result.json> [--sample n]');
122
+ }
123
+ const result = await verifyResult({
124
+ resultPath,
125
+ sample: readNumberOption(argv, '--sample', 3)
126
+ });
127
+ console.log(JSON.stringify(result, null, 2));
128
+ return;
129
+ }
130
+ if (command === 'corpus') {
131
+ const subcommand = argv[3];
132
+ if (subcommand !== 'run') {
133
+ throw new Error('Usage: skillcheck corpus run --corpus corpus.json [--results dir]');
134
+ }
135
+ const result = await runCorpus(parseCorpusRunOptions(argv));
136
+ console.log(JSON.stringify(result, null, 2));
137
+ return;
138
+ }
139
+ if (command === 'rot') {
140
+ const result = await runRot(parseRotOptions(argv));
141
+ console.log(JSON.stringify(result, null, 2));
142
+ return;
143
+ }
144
+ throw new Error(`Unknown command: ${command}`);
145
+ }
146
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../../packages/cli/src/cli.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAC3D,OAAO,EAAE,SAAS,EAAyB,MAAM,aAAa,CAAC;AAC/D,OAAO,EAAE,SAAS,EAAoB,MAAM,WAAW,CAAC;AACxD,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,MAAM,EAAmB,MAAM,UAAU,CAAC;AACnD,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,SAAS,SAAS;IAChB,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;sEAYwD,CAAC,CAAC;AACxE,CAAC;AAED,SAAS,UAAU,CAAC,IAAc,EAAE,IAAY;IAC9C,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IACjC,IAAI,KAAK,KAAK,CAAC,CAAC,EAAE,CAAC;QACjB,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IAC9B,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QACrC,MAAM,IAAI,KAAK,CAAC,qBAAqB,IAAI,EAAE,CAAC,CAAC;IAC/C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAc,EAAE,IAAY,EAAE,QAAgB;IACtE,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACrC,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QACxB,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IAC7B,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,MAAM,IAAI,CAAC,EAAE,CAAC;QAC7C,MAAM,IAAI,KAAK,CAAC,GAAG,IAAI,6BAA6B,CAAC,CAAC;IACxD,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,wBAAwB,CAAC,IAAc,EAAE,IAAY;IAC5D,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACrC,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QACxB,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IAC7B,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,MAAM,IAAI,CAAC,EAAE,CAAC;QAC7C,MAAM,IAAI,KAAK,CAAC,GAAG,IAAI,6BAA6B,CAAC,CAAC;IACxD,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAc;IACtC,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IAC1B,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QAC7C,MAAM,IAAI,KAAK,CAAC,6EAA6E,CAAC,CAAC;IACjG,CAAC;IAED,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,EAAE,QAAQ,CAAC,IAAI,QAAQ,CAAC;IACpD,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;IAC3D,CAAC;IAED,OAAO;QACL,SAAS;QACT,MAAM,EAAE,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC;QACpC,KAAK,EAAE,gBAAgB,CAAC,IAAI,EAAE,SAAS,EAAE,EAAE,CAAC;QAC5C,MAAM,EAAE,gBAAgB,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;QAC7C,IAAI;QACJ,MAAM,EAAE,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC;QACpC,MAAM,EAAE,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC;QACpC,SAAS,EAAE,UAAU,CAAC,IAAI,EAAE,aAAa,CAAC;QAC1C,SAAS,EAAE,UAAU,CAAC,IAAI,EAAE,cAAc,CAAC;KAC5C,CAAC;AACJ,CAAC;AAED,SAAS,eAAe,CAAC,IAAc;IACrC,OAAO;QACL,UAAU,EAAE,UAAU,CAAC,IAAI,EAAE,WAAW,CAAC,IAAI,SAAS;QACtD,MAAM,EAAE,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC,IAAI,yBAAyB;QACjE,KAAK,EAAE,UAAU,CAAC,IAAI,EAAE,SAAS,CAAC;QAClC,MAAM,EAAE,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC;QACpC,KAAK,EAAE,gBAAgB,CAAC,IAAI,EAAE,SAAS,EAAE,EAAE,CAAC;QAC5C,MAAM,EAAE,gBAAgB,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;KAC9C,CAAC;AACJ,CAAC;AAED,SAAS,qBAAqB,CAAC,IAAc;IAC3C,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;IAC5C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,mEAAmE,CAAC,CAAC;IACvF,CAAC;IAED,OAAO;QACL,MAAM;QACN,SAAS,EAAE,UAAU,CAAC,IAAI,EAAE,WAAW,CAAC,IAAI,gBAAgB;QAC5D,KAAK,EAAE,gBAAgB,CAAC,IAAI,EAAE,SAAS,EAAE,EAAE,CAAC;QAC5C,MAAM,EAAE,gBAAgB,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;QAC7C,WAAW,EAAE,gBAAgB,CAAC,IAAI,EAAE,eAAe,EAAE,CAAC,CAAC;QACvD,MAAM,EAAE,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC;QACpC,KAAK,EAAE,wBAAwB,CAAC,IAAI,EAAE,SAAS,CAAC;KACjD,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,IAAI,CAAC,IAAc;IACvC,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IAExB,IAAI,CAAC,OAAO,IAAI,OAAO,KAAK,QAAQ,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;QACzD,SAAS,EAAE,CAAC;QACZ,OAAO;IACT,CAAC;IAED,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;QACrB,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC;QACxE,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAC7C,OAAO,CAAC,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACzC,OAAO;IACT,CAAC;IAED,IAAI,OAAO,KAAK,MAAM,EAAE,CAAC;QACvB,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;QACvD,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAC7C,OAAO;IACT,CAAC;IAED,IAAI,OAAO,KAAK,QAAQ,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,IAAI,CAAC,UAAU,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YAC/C,MAAM,IAAI,KAAK,CAAC,qDAAqD,CAAC,CAAC;QACzE,CAAC;QACD,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC;YAChC,UAAU;YACV,MAAM,EAAE,gBAAgB,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;SAC9C,CAAC,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAC7C,OAAO;IACT,CAAC;IAED,IAAI,OAAO,KAAK,QAAQ,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,IAAI,UAAU,KAAK,KAAK,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CAAC,mEAAmE,CAAC,CAAC;QACvF,CAAC;QACD,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC;QAC5D,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAC7C,OAAO;IACT,CAAC;IAED,IAAI,OAAO,KAAK,KAAK,EAAE,CAAC;QACtB,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC;QACnD,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAC7C,OAAO;IACT,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,oBAAoB,OAAO,EAAE,CAAC,CAAC;AACjD,CAAC"}
@@ -0,0 +1,43 @@
1
+ export interface CorpusSkill {
2
+ id: string;
3
+ path: string;
4
+ source?: string;
5
+ repo?: string;
6
+ commit?: string;
7
+ }
8
+ export interface CorpusManifest {
9
+ name: string;
10
+ repo?: string;
11
+ commit?: string;
12
+ skills: CorpusSkill[];
13
+ }
14
+ export interface CorpusRunOptions {
15
+ corpus: string;
16
+ outputDir: string;
17
+ tasks: number;
18
+ trials: number;
19
+ concurrency: number;
20
+ runner?: string;
21
+ limit?: number;
22
+ }
23
+ export interface CorpusRunEntry {
24
+ id: string;
25
+ source: string;
26
+ repo?: string;
27
+ commit?: string;
28
+ path: string;
29
+ input_path: string;
30
+ output_path: string;
31
+ }
32
+ export interface CorpusRunReport {
33
+ corpus: string;
34
+ output_dir: string;
35
+ tasks: number;
36
+ trials: number;
37
+ concurrency: number;
38
+ runner?: string;
39
+ skills: CorpusRunEntry[];
40
+ }
41
+ export declare function slugify(value: string): string;
42
+ export declare function parseCorpusManifest(text: string): CorpusManifest;
43
+ export declare function runCorpus(options: CorpusRunOptions): Promise<CorpusRunReport>;
@@ -0,0 +1,233 @@
1
+ import { execFile as execFileCallback } from 'node:child_process';
2
+ import { mkdir, readFile, stat, writeFile } from 'node:fs/promises';
3
+ import path from 'node:path';
4
+ import { promisify } from 'node:util';
5
+ import { evalSkill } from './eval.js';
6
+ const execFile = promisify(execFileCallback);
7
+ export function slugify(value) {
8
+ return value
9
+ .toLowerCase()
10
+ .replace(/[^a-z0-9]+/g, '-')
11
+ .replace(/^-|-$/g, '');
12
+ }
13
+ function unquote(value) {
14
+ return value.trim().replace(/^["']|["']$/g, '');
15
+ }
16
+ function assertCorpusManifest(value) {
17
+ const manifest = value;
18
+ if (!manifest.name || !Array.isArray(manifest.skills)) {
19
+ throw new Error('Corpus manifest JSON must include name and skills');
20
+ }
21
+ for (const [index, skill] of manifest.skills.entries()) {
22
+ if (!skill.id || !skill.path) {
23
+ throw new Error(`Corpus skill ${index + 1} must include id and path`);
24
+ }
25
+ }
26
+ return manifest;
27
+ }
28
+ export function parseCorpusManifest(text) {
29
+ const trimmed = text.trim();
30
+ if (trimmed.startsWith('{')) {
31
+ return assertCorpusManifest(JSON.parse(trimmed));
32
+ }
33
+ const manifest = { name: '', skills: [] };
34
+ let currentSkill;
35
+ for (const rawLine of text.split('\n')) {
36
+ const withoutComment = rawLine.replace(/\s+#.*$/, '');
37
+ if (!withoutComment.trim()) {
38
+ continue;
39
+ }
40
+ const topLevel = /^([A-Za-z_]+):\s*(.*)$/.exec(withoutComment);
41
+ if (topLevel && !rawLine.startsWith(' ')) {
42
+ const [, key, value = ''] = topLevel;
43
+ if (key === 'name')
44
+ manifest.name = unquote(value);
45
+ if (key === 'repo')
46
+ manifest.repo = unquote(value);
47
+ if (key === 'commit')
48
+ manifest.commit = unquote(value);
49
+ continue;
50
+ }
51
+ const skillStart = /^\s*-\s*id:\s*(.+)$/.exec(withoutComment);
52
+ if (skillStart) {
53
+ currentSkill = { id: unquote(skillStart[1]) };
54
+ manifest.skills.push(currentSkill);
55
+ continue;
56
+ }
57
+ const skillField = /^\s+([A-Za-z_]+):\s*(.+)$/.exec(withoutComment);
58
+ if (skillField && currentSkill) {
59
+ const [, key, value] = skillField;
60
+ if (key === 'path')
61
+ currentSkill.path = unquote(value);
62
+ if (key === 'id')
63
+ currentSkill.id = unquote(value);
64
+ if (key === 'source')
65
+ currentSkill.source = unquote(value);
66
+ if (key === 'repo')
67
+ currentSkill.repo = unquote(value);
68
+ if (key === 'commit')
69
+ currentSkill.commit = unquote(value);
70
+ }
71
+ }
72
+ if (!manifest.name || manifest.skills.some((skill) => !skill.id || !skill.path)) {
73
+ throw new Error('Corpus manifest YAML must include name and skills with id/path');
74
+ }
75
+ return manifest;
76
+ }
77
+ async function pathExists(filePath) {
78
+ try {
79
+ await stat(filePath);
80
+ return true;
81
+ }
82
+ catch (error) {
83
+ if (error.code === 'ENOENT') {
84
+ return false;
85
+ }
86
+ throw error;
87
+ }
88
+ }
89
+ async function hasValidJsonFile(filePath) {
90
+ try {
91
+ JSON.parse(await readFile(filePath, 'utf8'));
92
+ return true;
93
+ }
94
+ catch (error) {
95
+ if (error.code === 'ENOENT') {
96
+ return false;
97
+ }
98
+ if (error instanceof SyntaxError) {
99
+ return false;
100
+ }
101
+ throw error;
102
+ }
103
+ }
104
+ function repoSlug(repo) {
105
+ return slugify(repo.replace(/^https:\/\/github\.com\//, '').replace(/\.git$/, ''));
106
+ }
107
+ function sparseDir(skillPath) {
108
+ const dir = path.posix.dirname(skillPath);
109
+ return dir === '.' ? skillPath : dir;
110
+ }
111
+ function sourceName(manifest, skill) {
112
+ if (skill.source) {
113
+ return skill.source;
114
+ }
115
+ const repo = skill.repo ?? manifest.repo;
116
+ return repo ? repo.replace(/^https:\/\/github\.com\//, '').replace(/\.git$/, '') : manifest.name;
117
+ }
118
+ function sourceLabel(manifest, skill) {
119
+ const repo = skill.repo ?? manifest.repo;
120
+ const commit = skill.commit ?? manifest.commit ?? 'HEAD';
121
+ if (!repo) {
122
+ return `${sourceName(manifest, skill)}:${skill.path}`;
123
+ }
124
+ return `${repo.replace(/\.git$/, '')}/blob/${commit}/${skill.path}`;
125
+ }
126
+ async function prepareGitSource(source, repo, commit, paths) {
127
+ const checkoutDir = path.join('.cache', 'sources', `${slugify(source)}-${repoSlug(repo)}-${(commit ?? 'head').slice(0, 12)}`);
128
+ const sparseDirs = [...new Set(paths.map(sparseDir))].sort();
129
+ if (!(await pathExists(path.join(checkoutDir, '.git')))) {
130
+ await mkdir(path.dirname(checkoutDir), { recursive: true });
131
+ await execFile('git', ['clone', '--filter=blob:none', '--no-checkout', repo, checkoutDir]);
132
+ }
133
+ await execFile('git', ['-C', checkoutDir, 'sparse-checkout', 'init', '--cone']);
134
+ await execFile('git', ['-C', checkoutDir, 'sparse-checkout', 'set', ...sparseDirs]);
135
+ if (commit) {
136
+ await execFile('git', ['-C', checkoutDir, 'fetch', '--depth', '1', 'origin', commit]);
137
+ await execFile('git', ['-C', checkoutDir, 'checkout', '--quiet', commit]);
138
+ }
139
+ else {
140
+ await execFile('git', ['-C', checkoutDir, 'checkout', '--quiet']);
141
+ }
142
+ return checkoutDir;
143
+ }
144
+ async function prepareSources(corpusPath, manifest, skills) {
145
+ const roots = new Map();
146
+ const grouped = new Map();
147
+ for (const skill of skills) {
148
+ const repo = skill.repo ?? manifest.repo;
149
+ const commit = skill.commit ?? manifest.commit;
150
+ const source = sourceName(manifest, skill);
151
+ const key = repo ? `${repo}#${commit ?? 'HEAD'}` : `local:${path.dirname(path.resolve(corpusPath))}`;
152
+ const current = grouped.get(key) ?? { source, repo, commit, paths: [] };
153
+ current.paths.push(skill.path);
154
+ grouped.set(key, current);
155
+ }
156
+ for (const [key, group] of grouped) {
157
+ roots.set(key, group.repo
158
+ ? await prepareGitSource(group.source, group.repo, group.commit, group.paths)
159
+ : path.dirname(path.resolve(corpusPath)));
160
+ }
161
+ return roots;
162
+ }
163
+ async function writeJson(filePath, value) {
164
+ await mkdir(path.dirname(filePath), { recursive: true });
165
+ await writeFile(filePath, `${JSON.stringify(value, null, 2)}\n`);
166
+ }
167
+ export async function runCorpus(options) {
168
+ const manifest = parseCorpusManifest(await readFile(options.corpus, 'utf8'));
169
+ const skills = manifest.skills.slice(0, options.limit ?? manifest.skills.length);
170
+ const roots = await prepareSources(options.corpus, manifest, skills);
171
+ const entries = new Array(skills.length);
172
+ async function runOne(skill, index) {
173
+ const repo = skill.repo ?? manifest.repo;
174
+ const commit = skill.commit ?? manifest.commit;
175
+ const source = sourceName(manifest, skill);
176
+ const key = repo ? `${repo}#${commit ?? 'HEAD'}` : `local:${path.dirname(path.resolve(options.corpus))}`;
177
+ const root = roots.get(key);
178
+ if (!root) {
179
+ throw new Error(`No prepared source root for ${source}`);
180
+ }
181
+ const outputPath = path.join(options.outputDir, `${slugify(source)}-${slugify(skill.id)}.json`);
182
+ const inputPath = path.join(root, skill.path);
183
+ const entry = {
184
+ id: skill.id,
185
+ source,
186
+ ...(repo ? { repo } : {}),
187
+ ...(commit ? { commit } : {}),
188
+ path: skill.path,
189
+ input_path: inputPath,
190
+ output_path: outputPath
191
+ };
192
+ if (await hasValidJsonFile(outputPath)) {
193
+ console.error(`[corpus] skip ${source}/${skill.id}; existing result`);
194
+ entries[index] = entry;
195
+ return;
196
+ }
197
+ console.error(`[corpus] eval ${source}/${skill.id}`);
198
+ await evalSkill({
199
+ inputPath,
200
+ output: outputPath,
201
+ tasks: options.tasks,
202
+ trials: options.trials,
203
+ mode: 'forced',
204
+ runner: options.runner,
205
+ sourceLabel: sourceLabel(manifest, skill)
206
+ });
207
+ entries[index] = entry;
208
+ }
209
+ let nextIndex = 0;
210
+ const workerCount = Math.min(options.concurrency, skills.length);
211
+ await Promise.all(Array.from({ length: workerCount }, async () => {
212
+ while (nextIndex < skills.length) {
213
+ const index = nextIndex;
214
+ nextIndex += 1;
215
+ await runOne(skills[index], index);
216
+ }
217
+ }));
218
+ if (entries.some((entry) => !entry)) {
219
+ throw new Error('Corpus run completed without producing all result entries');
220
+ }
221
+ const report = {
222
+ corpus: options.corpus,
223
+ output_dir: options.outputDir,
224
+ tasks: options.tasks,
225
+ trials: options.trials,
226
+ concurrency: options.concurrency,
227
+ ...(options.runner ? { runner: options.runner } : {}),
228
+ skills: entries
229
+ };
230
+ await writeJson(path.join(options.outputDir, 'summary.json'), report);
231
+ return report;
232
+ }
233
+ //# sourceMappingURL=corpus.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"corpus.js","sourceRoot":"","sources":["../../packages/cli/src/corpus.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,IAAI,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAClE,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACpE,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,MAAM,QAAQ,GAAG,SAAS,CAAC,gBAAgB,CAAC,CAAC;AA+C7C,MAAM,UAAU,OAAO,CAAC,KAAa;IACnC,OAAO,KAAK;SACT,WAAW,EAAE;SACb,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;AAC3B,CAAC;AAED,SAAS,OAAO,CAAC,KAAa;IAC5B,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;AAClD,CAAC;AAED,SAAS,oBAAoB,CAAC,KAAc;IAC1C,MAAM,QAAQ,GAAG,KAAgC,CAAC;IAClD,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACtD,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;IACvE,CAAC;IACD,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,QAAQ,CAAC,MAAM,CAAC,OAAO,EAAE,EAAE,CAAC;QACvD,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YAC7B,MAAM,IAAI,KAAK,CAAC,gBAAgB,KAAK,GAAG,CAAC,2BAA2B,CAAC,CAAC;QACxE,CAAC;IACH,CAAC;IACD,OAAO,QAA0B,CAAC;AACpC,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QAC5B,OAAO,oBAAoB,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAY,CAAC,CAAC;IAC9D,CAAC;IAED,MAAM,QAAQ,GAAmB,EAAE,IAAI,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;IAC1D,IAAI,YAA8C,CAAC;IAEnD,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QACvC,MAAM,cAAc,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACtD,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,EAAE,CAAC;YAC3B,SAAS;QACX,CAAC;QAED,MAAM,QAAQ,GAAG,wBAAwB,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAC/D,IAAI,QAAQ,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACzC,MAAM,CAAC,EAAE,GAAG,EAAE,KAAK,GAAG,EAAE,CAAC,GAAG,QAAQ,CAAC;YACrC,IAAI,GAAG,KAAK,MAAM;gBAAE,QAAQ,CAAC,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC;YACnD,IAAI,GAAG,KAAK,MAAM;gBAAE,QAAQ,CAAC,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC;YACnD,IAAI,GAAG,KAAK,QAAQ;gBAAE,QAAQ,CAAC,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC;YACvD,SAAS;QACX,CAAC;QAED,MAAM,UAAU,GAAG,qBAAqB,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAC9D,IAAI,UAAU,EAAE,CAAC;YACf,YAAY,GAAG,EAAE,EAAE,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAE,CAAC,EAAE,CAAC;YAC/C,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,YAA2B,CAAC,CAAC;YAClD,SAAS;QACX,CAAC;QAED,MAAM,UAAU,GAAG,2BAA2B,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QACpE,IAAI,UAAU,IAAI,YAAY,EAAE,CAAC;YAC/B,MAAM,CAAC,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,UAAU,CAAC;YAClC,IAAI,GAAG,KAAK,MAAM;gBAAE,YAAY,CAAC,IAAI,GAAG,OAAO,CAAC,KAAM,CAAC,CAAC;YACxD,IAAI,GAAG,KAAK,IAAI;gBAAE,YAAY,CAAC,EAAE,GAAG,OAAO,CAAC,KAAM,CAAC,CAAC;YACpD,IAAI,GAAG,KAAK,QAAQ;gBAAE,YAAY,CAAC,MAAM,GAAG,OAAO,CAAC,KAAM,CAAC,CAAC;YAC5D,IAAI,GAAG,KAAK,MAAM;gBAAE,YAAY,CAAC,IAAI,GAAG,OAAO,CAAC,KAAM,CAAC,CAAC;YACxD,IAAI,GAAG,KAAK,QAAQ;gBAAE,YAAY,CAAC,MAAM,GAAG,OAAO,CAAC,KAAM,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QAChF,MAAM,IAAI,KAAK,CAAC,gEAAgE,CAAC,CAAC;IACpF,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,KAAK,UAAU,UAAU,CAAC,QAAgB;IACxC,IAAI,CAAC;QACH,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC;QACrB,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAK,KAA+B,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACvD,OAAO,KAAK,CAAC;QACf,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,QAAgB;IAC9C,IAAI,CAAC;QACH,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAC;QAC7C,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAK,KAA+B,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACvD,OAAO,KAAK,CAAC;QACf,CAAC;QACD,IAAI,KAAK,YAAY,WAAW,EAAE,CAAC;YACjC,OAAO,KAAK,CAAC;QACf,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,0BAA0B,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,CAAC;AACrF,CAAC;AAED,SAAS,SAAS,CAAC,SAAiB;IAClC,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAC1C,OAAO,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC;AACvC,CAAC;AAED,SAAS,UAAU,CAAC,QAAwB,EAAE,KAAkB;IAC9D,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;QACjB,OAAO,KAAK,CAAC,MAAM,CAAC;IACtB,CAAC;IACD,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC;IACzC,OAAO,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,0BAA0B,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC;AACnG,CAAC;AAED,SAAS,WAAW,CAAC,QAAwB,EAAE,KAAkB;IAC/D,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC;IACzC,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,QAAQ,CAAC,MAAM,IAAI,MAAM,CAAC;IACzD,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,GAAG,UAAU,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;IACxD,CAAC;IACD,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,SAAS,MAAM,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;AACtE,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,MAAc,EAAE,IAAY,EAAE,MAA0B,EAAE,KAAe;IACvG,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;IAC9H,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAE7D,IAAI,CAAC,CAAC,MAAM,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;QACxD,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5D,MAAM,QAAQ,CAAC,KAAK,EAAE,CAAC,OAAO,EAAE,oBAAoB,EAAE,eAAe,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC,CAAC;IAC7F,CAAC;IAED,MAAM,QAAQ,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,WAAW,EAAE,iBAAiB,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC;IAChF,MAAM,QAAQ,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,WAAW,EAAE,iBAAiB,EAAE,KAAK,EAAE,GAAG,UAAU,CAAC,CAAC,CAAC;IACpF,IAAI,MAAM,EAAE,CAAC;QACX,MAAM,QAAQ,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAC;QACtF,MAAM,QAAQ,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC;IAC5E,CAAC;SAAM,CAAC;QACN,MAAM,QAAQ,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,CAAC,CAAC,CAAC;IACpE,CAAC;IACD,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,KAAK,UAAU,cAAc,CAC3B,UAAkB,EAClB,QAAwB,EACxB,MAAqB;IAErB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAC;IACxC,MAAM,OAAO,GAAG,IAAI,GAAG,EAA+E,CAAC;IAEvG,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC;QACzC,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,QAAQ,CAAC,MAAM,CAAC;QAC/C,MAAM,MAAM,GAAG,UAAU,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAC3C,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,IAAI,MAAM,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC;QACrG,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;QACxE,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAC5B,CAAC;IAED,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,OAAO,EAAE,CAAC;QACnC,KAAK,CAAC,GAAG,CACP,GAAG,EACH,KAAK,CAAC,IAAI;YACR,CAAC,CAAC,MAAM,gBAAgB,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC;YAC7E,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAC3C,CAAC;IACJ,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,QAAgB,EAAE,KAAc;IACvD,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzD,MAAM,SAAS,CAAC,QAAQ,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,OAAyB;IACvD,MAAM,QAAQ,GAAG,mBAAmB,CAAC,MAAM,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAC7E,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,KAAK,IAAI,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACjF,MAAM,KAAK,GAAG,MAAM,cAAc,CAAC,OAAO,CAAC,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;IACrE,MAAM,OAAO,GAAqB,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAE3D,KAAK,UAAU,MAAM,CAAC,KAAkB,EAAE,KAAa;QACrD,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC;QACzC,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,QAAQ,CAAC,MAAM,CAAC;QAC/C,MAAM,MAAM,GAAG,UAAU,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAC3C,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,IAAI,MAAM,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC;QACzG,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,MAAM,IAAI,KAAK,CAAC,+BAA+B,MAAM,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC;QAChG,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QAC9C,MAAM,KAAK,GAAmB;YAC5B,EAAE,EAAE,KAAK,CAAC,EAAE;YACZ,MAAM;YACN,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACzB,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC7B,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,UAAU,EAAE,SAAS;YACrB,WAAW,EAAE,UAAU;SACxB,CAAC;QAEF,IAAI,MAAM,gBAAgB,CAAC,UAAU,CAAC,EAAE,CAAC;YACvC,OAAO,CAAC,KAAK,CAAC,iBAAiB,MAAM,IAAI,KAAK,CAAC,EAAE,mBAAmB,CAAC,CAAC;YACtE,OAAO,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;YACvB,OAAO;QACT,CAAC;QAED,OAAO,CAAC,KAAK,CAAC,iBAAiB,MAAM,IAAI,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;QACrD,MAAM,SAAS,CAAC;YACd,SAAS;YACT,MAAM,EAAE,UAAU;YAClB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,IAAI,EAAE,QAAQ;YACd,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,WAAW,EAAE,WAAW,CAAC,QAAQ,EAAE,KAAK,CAAC;SAC1C,CAAC,CAAC;QAEH,OAAO,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;IACzB,CAAC;IAED,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;IACjE,MAAM,OAAO,CAAC,GAAG,CACf,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,WAAW,EAAE,EAAE,KAAK,IAAI,EAAE;QAC7C,OAAO,SAAS,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC;YACjC,MAAM,KAAK,GAAG,SAAS,CAAC;YACxB,SAAS,IAAI,CAAC,CAAC;YACf,MAAM,MAAM,CAAC,MAAM,CAAC,KAAK,CAAE,EAAE,KAAK,CAAC,CAAC;QACtC,CAAC;IACH,CAAC,CAAC,CACH,CAAC;IAEF,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;QACpC,MAAM,IAAI,KAAK,CAAC,2DAA2D,CAAC,CAAC;IAC/E,CAAC;IAED,MAAM,MAAM,GAAoB;QAC9B,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,UAAU,EAAE,OAAO,CAAC,SAAS;QAC7B,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,WAAW,EAAE,OAAO,CAAC,WAAW;QAChC,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACrD,MAAM,EAAE,OAA2B;KACpC,CAAC;IACF,MAAM,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,cAAc,CAAC,EAAE,MAAM,CAAC,CAAC;IACtE,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,7 @@
1
+ import type { GeneratedTask } from './types.js';
2
+ export interface DeterministicGrade {
3
+ score: number;
4
+ reason: string;
5
+ pass: boolean;
6
+ }
7
+ export declare function gradeDeterministically(task: GeneratedTask, output: string): DeterministicGrade;
@@ -0,0 +1,25 @@
1
+ export function gradeDeterministically(task, output) {
2
+ if (task.criterionType !== 'deterministic') {
3
+ throw new Error(`Task ${task.id} is not deterministic`);
4
+ }
5
+ if (task.criterion.startsWith('regex:')) {
6
+ const pattern = task.criterion.slice('regex:'.length);
7
+ const pass = new RegExp(pattern, 's').test(output);
8
+ return {
9
+ score: pass ? 1 : 0,
10
+ reason: pass ? `matched regex ${pattern}` : `did not match regex ${pattern}`,
11
+ pass
12
+ };
13
+ }
14
+ if (task.criterion.startsWith('includes:')) {
15
+ const expected = task.criterion.slice('includes:'.length);
16
+ const pass = output.includes(expected);
17
+ return {
18
+ score: pass ? 1 : 0,
19
+ reason: pass ? `included ${expected}` : `did not include ${expected}`,
20
+ pass
21
+ };
22
+ }
23
+ throw new Error(`Unsupported deterministic criterion for ${task.id}: ${task.criterion}`);
24
+ }
25
+ //# sourceMappingURL=deterministic.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"deterministic.js","sourceRoot":"","sources":["../../packages/cli/src/deterministic.ts"],"names":[],"mappings":"AAQA,MAAM,UAAU,sBAAsB,CAAC,IAAmB,EAAE,MAAc;IACxE,IAAI,IAAI,CAAC,aAAa,KAAK,eAAe,EAAE,CAAC;QAC3C,MAAM,IAAI,KAAK,CAAC,QAAQ,IAAI,CAAC,EAAE,uBAAuB,CAAC,CAAC;IAC1D,CAAC;IAED,IAAI,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QACxC,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QACtD,MAAM,IAAI,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACnD,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACnB,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,iBAAiB,OAAO,EAAE,CAAC,CAAC,CAAC,uBAAuB,OAAO,EAAE;YAC5E,IAAI;SACL,CAAC;IACJ,CAAC;IAED,IAAI,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;QAC3C,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAC1D,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QACvC,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACnB,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,YAAY,QAAQ,EAAE,CAAC,CAAC,CAAC,mBAAmB,QAAQ,EAAE;YACrE,IAAI;SACL,CAAC;IACJ,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,2CAA2C,IAAI,CAAC,EAAE,KAAK,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;AAC3F,CAAC"}
@@ -0,0 +1,12 @@
1
+ export interface NvidiaConfig {
2
+ apiKey: string;
3
+ baseUrl: string;
4
+ timeoutMs: number;
5
+ requestDelayMs: number;
6
+ maxAttempts: number;
7
+ maxRetryDelayMs: number;
8
+ generatorModel: string;
9
+ graderModel: string;
10
+ runnerModel: string;
11
+ }
12
+ export declare function loadNvidiaConfig(): NvidiaConfig;
@@ -0,0 +1,39 @@
1
+ import dotenv from 'dotenv';
2
+ dotenv.config();
3
+ function requireEnv(name) {
4
+ const value = process.env[name]?.trim();
5
+ if (!value) {
6
+ throw new Error(`Missing required environment variable: ${name}`);
7
+ }
8
+ return value;
9
+ }
10
+ export function loadNvidiaConfig() {
11
+ const timeoutMs = Number(process.env.NVIDIA_TIMEOUT_MS?.trim() || 120000);
12
+ if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
13
+ throw new Error('NVIDIA_TIMEOUT_MS must be a positive number when set');
14
+ }
15
+ const requestDelayMs = Number(process.env.NVIDIA_REQUEST_DELAY_MS?.trim() || 5000);
16
+ if (!Number.isFinite(requestDelayMs) || requestDelayMs < 0) {
17
+ throw new Error('NVIDIA_REQUEST_DELAY_MS must be a non-negative number when set');
18
+ }
19
+ const maxAttempts = Number(process.env.NVIDIA_MAX_ATTEMPTS?.trim() || 8);
20
+ if (!Number.isInteger(maxAttempts) || maxAttempts <= 0) {
21
+ throw new Error('NVIDIA_MAX_ATTEMPTS must be a positive integer when set');
22
+ }
23
+ const maxRetryDelayMs = Number(process.env.NVIDIA_MAX_RETRY_DELAY_MS?.trim() || 60000);
24
+ if (!Number.isFinite(maxRetryDelayMs) || maxRetryDelayMs <= 0) {
25
+ throw new Error('NVIDIA_MAX_RETRY_DELAY_MS must be a positive number when set');
26
+ }
27
+ return {
28
+ apiKey: requireEnv('NVIDIA_API_KEY'),
29
+ baseUrl: process.env.NVIDIA_BASE_URL?.trim() || 'https://integrate.api.nvidia.com/v1',
30
+ timeoutMs,
31
+ requestDelayMs,
32
+ maxAttempts,
33
+ maxRetryDelayMs,
34
+ generatorModel: requireEnv('NVIDIA_GENERATOR_MODEL'),
35
+ graderModel: requireEnv('NVIDIA_GRADER_MODEL'),
36
+ runnerModel: requireEnv('NVIDIA_RUNNER_MODEL')
37
+ };
38
+ }
39
+ //# sourceMappingURL=env.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"env.js","sourceRoot":"","sources":["../../packages/cli/src/env.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,CAAC,MAAM,EAAE,CAAC;AAchB,SAAS,UAAU,CAAC,IAAY;IAC9B,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,CAAC;IACxC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,0CAA0C,IAAI,EAAE,CAAC,CAAC;IACpE,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,gBAAgB;IAC9B,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE,IAAI,EAAE,IAAI,MAAM,CAAC,CAAC;IAC1E,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QAClD,MAAM,IAAI,KAAK,CAAC,sDAAsD,CAAC,CAAC;IAC1E,CAAC;IAED,MAAM,cAAc,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,uBAAuB,EAAE,IAAI,EAAE,IAAI,IAAI,CAAC,CAAC;IACnF,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,cAAc,GAAG,CAAC,EAAE,CAAC;QAC3D,MAAM,IAAI,KAAK,CAAC,gEAAgE,CAAC,CAAC;IACpF,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,mBAAmB,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;IACzE,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC,IAAI,WAAW,IAAI,CAAC,EAAE,CAAC;QACvD,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;IAC7E,CAAC;IAED,MAAM,eAAe,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,yBAAyB,EAAE,IAAI,EAAE,IAAI,KAAK,CAAC,CAAC;IACvF,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,eAAe,CAAC,IAAI,eAAe,IAAI,CAAC,EAAE,CAAC;QAC9D,MAAM,IAAI,KAAK,CAAC,8DAA8D,CAAC,CAAC;IAClF,CAAC;IAED,OAAO;QACL,MAAM,EAAE,UAAU,CAAC,gBAAgB,CAAC;QACpC,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,eAAe,EAAE,IAAI,EAAE,IAAI,qCAAqC;QACrF,SAAS;QACT,cAAc;QACd,WAAW;QACX,eAAe;QACf,cAAc,EAAE,UAAU,CAAC,wBAAwB,CAAC;QACpD,WAAW,EAAE,UAAU,CAAC,qBAAqB,CAAC;QAC9C,WAAW,EAAE,UAAU,CAAC,qBAAqB,CAAC;KAC/C,CAAC;AACJ,CAAC"}
@@ -0,0 +1,13 @@
1
+ export interface EvalOptions {
2
+ inputPath: string;
3
+ output?: string;
4
+ tasks: number;
5
+ trials: number;
6
+ mode: 'forced';
7
+ runner?: string;
8
+ grader?: string;
9
+ generator?: string;
10
+ taskSuite?: string;
11
+ sourceLabel?: string;
12
+ }
13
+ export declare function evalSkill(options: EvalOptions): Promise<unknown>;