snapeval 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +194 -0
- package/bin/snapeval.ts +226 -0
- package/dist/bin/snapeval.d.ts +2 -0
- package/dist/bin/snapeval.js +191 -0
- package/dist/bin/snapeval.js.map +1 -0
- package/dist/src/adapters/inference/copilot.d.ts +9 -0
- package/dist/src/adapters/inference/copilot.js +25 -0
- package/dist/src/adapters/inference/copilot.js.map +1 -0
- package/dist/src/adapters/inference/github-models.d.ts +9 -0
- package/dist/src/adapters/inference/github-models.js +62 -0
- package/dist/src/adapters/inference/github-models.js.map +1 -0
- package/dist/src/adapters/inference/resolve.d.ts +2 -0
- package/dist/src/adapters/inference/resolve.js +49 -0
- package/dist/src/adapters/inference/resolve.js.map +1 -0
- package/dist/src/adapters/report/json.d.ts +7 -0
- package/dist/src/adapters/report/json.js +39 -0
- package/dist/src/adapters/report/json.js.map +1 -0
- package/dist/src/adapters/report/terminal.d.ts +5 -0
- package/dist/src/adapters/report/terminal.js +42 -0
- package/dist/src/adapters/report/terminal.js.map +1 -0
- package/dist/src/adapters/skill/copilot-cli.d.ts +6 -0
- package/dist/src/adapters/skill/copilot-cli.js +51 -0
- package/dist/src/adapters/skill/copilot-cli.js.map +1 -0
- package/dist/src/commands/approve.d.ts +5 -0
- package/dist/src/commands/approve.js +40 -0
- package/dist/src/commands/approve.js.map +1 -0
- package/dist/src/commands/capture.d.ts +4 -0
- package/dist/src/commands/capture.js +18 -0
- package/dist/src/commands/capture.js.map +1 -0
- package/dist/src/commands/check.d.ts +6 -0
- package/dist/src/commands/check.js +68 -0
- package/dist/src/commands/check.js.map +1 -0
- package/dist/src/commands/init.d.ts +2 -0
- package/dist/src/commands/init.js +27 -0
- package/dist/src/commands/init.js.map +1 -0
- package/dist/src/commands/report.d.ts +4 -0
- package/dist/src/commands/report.js +26 -0
- package/dist/src/commands/report.js.map +1 -0
- package/dist/src/config.d.ts +3 -0
- package/dist/src/config.js +30 -0
- package/dist/src/config.js.map +1 -0
- package/dist/src/engine/budget.d.ts +10 -0
- package/dist/src/engine/budget.js +25 -0
- package/dist/src/engine/budget.js.map +1 -0
- package/dist/src/engine/comparison/embedding.d.ts +6 -0
- package/dist/src/engine/comparison/embedding.js +19 -0
- package/dist/src/engine/comparison/embedding.js.map +1 -0
- package/dist/src/engine/comparison/judge.d.ts +8 -0
- package/dist/src/engine/comparison/judge.js +64 -0
- package/dist/src/engine/comparison/judge.js.map +1 -0
- package/dist/src/engine/comparison/pipeline.d.ts +6 -0
- package/dist/src/engine/comparison/pipeline.js +31 -0
- package/dist/src/engine/comparison/pipeline.js.map +1 -0
- package/dist/src/engine/comparison/schema.d.ts +2 -0
- package/dist/src/engine/comparison/schema.js +28 -0
- package/dist/src/engine/comparison/schema.js.map +1 -0
- package/dist/src/engine/comparison/variance.d.ts +3 -0
- package/dist/src/engine/comparison/variance.js +26 -0
- package/dist/src/engine/comparison/variance.js.map +1 -0
- package/dist/src/engine/generator.d.ts +3 -0
- package/dist/src/engine/generator.js +52 -0
- package/dist/src/engine/generator.js.map +1 -0
- package/dist/src/engine/snapshot.d.ts +11 -0
- package/dist/src/engine/snapshot.js +46 -0
- package/dist/src/engine/snapshot.js.map +1 -0
- package/dist/src/errors.d.ts +16 -0
- package/dist/src/errors.js +33 -0
- package/dist/src/errors.js.map +1 -0
- package/dist/src/types.d.ts +125 -0
- package/dist/src/types.js +2 -0
- package/dist/src/types.js.map +1 -0
- package/package.json +53 -0
- package/plugin.json +9 -0
- package/scripts/snapeval-cli.sh +7 -0
- package/skills/snapeval/SKILL.md +51 -0
- package/src/adapters/inference/copilot.ts +30 -0
- package/src/adapters/inference/github-models.ts +74 -0
- package/src/adapters/inference/resolve.ts +70 -0
- package/src/adapters/report/json.ts +64 -0
- package/src/adapters/report/terminal.ts +59 -0
- package/src/adapters/skill/copilot-cli.ts +60 -0
- package/src/commands/approve.ts +58 -0
- package/src/commands/capture.ts +25 -0
- package/src/commands/check.ts +86 -0
- package/src/commands/init.ts +38 -0
- package/src/commands/report.ts +36 -0
- package/src/config.ts +37 -0
- package/src/engine/budget.ts +27 -0
- package/src/engine/comparison/embedding.ts +26 -0
- package/src/engine/comparison/judge.ts +78 -0
- package/src/engine/comparison/pipeline.ts +43 -0
- package/src/engine/comparison/schema.ts +22 -0
- package/src/engine/comparison/variance.ts +31 -0
- package/src/engine/generator.ts +61 -0
- package/src/engine/snapshot.ts +48 -0
- package/src/errors.ts +34 -0
- package/src/types.ts +153 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { execFileSync } from 'node:child_process';
|
|
2
|
+
import { AdapterNotAvailableError } from '../../errors.js';
|
|
3
|
+
export class CopilotInference {
|
|
4
|
+
fallback;
|
|
5
|
+
name = 'copilot';
|
|
6
|
+
constructor(fallback) {
|
|
7
|
+
this.fallback = fallback;
|
|
8
|
+
}
|
|
9
|
+
async chat(messages, _options) {
|
|
10
|
+
// Concatenate messages into a single prompt string
|
|
11
|
+
const prompt = messages.map((m) => m.content).join('\n');
|
|
12
|
+
const result = execFileSync('gh', ['copilot', '-p', prompt], { encoding: 'utf-8' });
|
|
13
|
+
return result.trim();
|
|
14
|
+
}
|
|
15
|
+
async embed(text) {
|
|
16
|
+
if (this.fallback) {
|
|
17
|
+
return this.fallback.embed(text);
|
|
18
|
+
}
|
|
19
|
+
throw new AdapterNotAvailableError('copilot-embed', 'Copilot CLI does not support embeddings. Provide a fallback InferenceAdapter (e.g. GitHubModelsInference).');
|
|
20
|
+
}
|
|
21
|
+
estimateCost(_tokens) {
|
|
22
|
+
return 0;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=copilot.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"copilot.js","sourceRoot":"","sources":["../../../../src/adapters/inference/copilot.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAElD,OAAO,EAAE,wBAAwB,EAAE,MAAM,iBAAiB,CAAC;AAE3D,MAAM,OAAO,gBAAgB;IAGE;IAFpB,IAAI,GAAG,SAAS,CAAC;IAE1B,YAA6B,QAA2B;QAA3B,aAAQ,GAAR,QAAQ,CAAmB;IAAG,CAAC;IAE5D,KAAK,CAAC,IAAI,CAAC,QAAmB,EAAE,QAAsB;QACpD,mDAAmD;QACnD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzD,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,EAAE,CAAC,SAAS,EAAE,IAAI,EAAE,MAAM,CAAC,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;QACpF,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,OAAO,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACnC,CAAC;QACD,MAAM,IAAI,wBAAwB,CAChC,eAAe,EACf,4GAA4G,CAC7G,CAAC;IACJ,CAAC;IAED,YAAY,CAAC,OAAe;QAC1B,OAAO,CAAC,CAAC;IACX,CAAC;CACF"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { InferenceAdapter, Message, ChatOptions } from '../../types.js';
|
|
2
|
+
export declare class GitHubModelsInference implements InferenceAdapter {
|
|
3
|
+
private readonly token;
|
|
4
|
+
readonly name = "github-models";
|
|
5
|
+
constructor(token?: string);
|
|
6
|
+
chat(messages: Message[], options?: ChatOptions): Promise<string>;
|
|
7
|
+
embed(text: string): Promise<number[]>;
|
|
8
|
+
estimateCost(_tokens: number): number;
|
|
9
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { RateLimitError } from '../../errors.js';
|
|
2
|
+
const API_BASE = 'https://models.github.ai/inference';
|
|
3
|
+
const CHAT_MODEL = 'openai/gpt-4o-mini';
|
|
4
|
+
const EMBEDDING_MODEL = 'openai/text-embedding-3-small';
|
|
5
|
+
export class GitHubModelsInference {
|
|
6
|
+
token;
|
|
7
|
+
name = 'github-models';
|
|
8
|
+
constructor(token = process.env.GITHUB_TOKEN ?? '') {
|
|
9
|
+
this.token = token;
|
|
10
|
+
}
|
|
11
|
+
async chat(messages, options) {
|
|
12
|
+
const body = {
|
|
13
|
+
model: CHAT_MODEL,
|
|
14
|
+
messages,
|
|
15
|
+
};
|
|
16
|
+
if (options?.temperature !== undefined)
|
|
17
|
+
body.temperature = options.temperature;
|
|
18
|
+
if (options?.maxTokens !== undefined)
|
|
19
|
+
body.max_tokens = options.maxTokens;
|
|
20
|
+
if (options?.responseFormat === 'json') {
|
|
21
|
+
body.response_format = { type: 'json_object' };
|
|
22
|
+
}
|
|
23
|
+
const response = await fetch(`${API_BASE}/chat/completions`, {
|
|
24
|
+
method: 'POST',
|
|
25
|
+
headers: {
|
|
26
|
+
Authorization: `Bearer ${this.token}`,
|
|
27
|
+
'Content-Type': 'application/json',
|
|
28
|
+
},
|
|
29
|
+
body: JSON.stringify(body),
|
|
30
|
+
});
|
|
31
|
+
if (response.status === 429) {
|
|
32
|
+
throw new RateLimitError(this.name);
|
|
33
|
+
}
|
|
34
|
+
if (!response.ok) {
|
|
35
|
+
throw new Error(`GitHub Models API error: ${response.status} ${response.statusText}`);
|
|
36
|
+
}
|
|
37
|
+
const data = (await response.json());
|
|
38
|
+
return data.choices[0].message.content;
|
|
39
|
+
}
|
|
40
|
+
async embed(text) {
|
|
41
|
+
const response = await fetch(`${API_BASE}/embeddings`, {
|
|
42
|
+
method: 'POST',
|
|
43
|
+
headers: {
|
|
44
|
+
Authorization: `Bearer ${this.token}`,
|
|
45
|
+
'Content-Type': 'application/json',
|
|
46
|
+
},
|
|
47
|
+
body: JSON.stringify({ model: EMBEDDING_MODEL, input: text }),
|
|
48
|
+
});
|
|
49
|
+
if (response.status === 429) {
|
|
50
|
+
throw new RateLimitError(this.name);
|
|
51
|
+
}
|
|
52
|
+
if (!response.ok) {
|
|
53
|
+
throw new Error(`GitHub Models API error: ${response.status} ${response.statusText}`);
|
|
54
|
+
}
|
|
55
|
+
const data = (await response.json());
|
|
56
|
+
return data.data[0].embedding;
|
|
57
|
+
}
|
|
58
|
+
estimateCost(_tokens) {
|
|
59
|
+
return 0;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=github-models.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"github-models.js","sourceRoot":"","sources":["../../../../src/adapters/inference/github-models.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAEjD,MAAM,QAAQ,GAAG,oCAAoC,CAAC;AACtD,MAAM,UAAU,GAAG,oBAAoB,CAAC;AACxC,MAAM,eAAe,GAAG,+BAA+B,CAAC;AAExD,MAAM,OAAO,qBAAqB;IAGH;IAFpB,IAAI,GAAG,eAAe,CAAC;IAEhC,YAA6B,QAAgB,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,EAAE;QAA9C,UAAK,GAAL,KAAK,CAAyC;IAAG,CAAC;IAE/E,KAAK,CAAC,IAAI,CAAC,QAAmB,EAAE,OAAqB;QACnD,MAAM,IAAI,GAA4B;YACpC,KAAK,EAAE,UAAU;YACjB,QAAQ;SACT,CAAC;QACF,IAAI,OAAO,EAAE,WAAW,KAAK,SAAS;YAAE,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;QAC/E,IAAI,OAAO,EAAE,SAAS,KAAK,SAAS;YAAE,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,SAAS,CAAC;QAC1E,IAAI,OAAO,EAAE,cAAc,KAAK,MAAM,EAAE,CAAC;YACvC,IAAI,CAAC,eAAe,GAAG,EAAE,IAAI,EAAE,aAAa,EAAE,CAAC;QACjD,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,QAAQ,mBAAmB,EAAE;YAC3D,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,aAAa,EAAE,UAAU,IAAI,CAAC,KAAK,EAAE;gBACrC,cAAc,EAAE,kBAAkB;aACnC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAC5B,MAAM,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtC,CAAC;QAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;QACxF,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAElC,CAAC;QACF,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC;IACzC,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,QAAQ,aAAa,EAAE;YACrD,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,aAAa,EAAE,UAAU,IAAI,CAAC,KAAK,EAAE;gBACrC,cAAc,EAAE,kBAAkB;aACnC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,eAAe,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;SAC9D,CAAC,CAAC;QAEH,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAC5B,MAAM,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtC,CAAC;QAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;QACxF,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAElC,CAAC;QACF,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAChC,CAAC;IAED,YAAY,CAAC,OAAe;QAC1B,OAAO,CAAC,CAAC;IACX,CAAC;CACF"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { execFileSync } from 'node:child_process';
|
|
2
|
+
import { AdapterNotAvailableError } from '../../errors.js';
|
|
3
|
+
import { GitHubModelsInference } from './github-models.js';
|
|
4
|
+
import { CopilotInference } from './copilot.js';
|
|
5
|
+
function isCopilotAvailable() {
|
|
6
|
+
try {
|
|
7
|
+
execFileSync('gh', ['copilot', '--version'], { encoding: 'utf-8', stdio: 'pipe' });
|
|
8
|
+
return true;
|
|
9
|
+
}
|
|
10
|
+
catch {
|
|
11
|
+
return false;
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
function isGitHubTokenAvailable() {
|
|
15
|
+
return Boolean(process.env.GITHUB_TOKEN);
|
|
16
|
+
}
|
|
17
|
+
export function resolveInference(preference) {
|
|
18
|
+
if (preference === 'auto') {
|
|
19
|
+
const copilotAvailable = isCopilotAvailable();
|
|
20
|
+
const tokenAvailable = isGitHubTokenAvailable();
|
|
21
|
+
if (copilotAvailable && tokenAvailable) {
|
|
22
|
+
// Copilot for chat, GitHubModels as embedding fallback
|
|
23
|
+
const githubModels = new GitHubModelsInference();
|
|
24
|
+
return new CopilotInference(githubModels);
|
|
25
|
+
}
|
|
26
|
+
if (copilotAvailable) {
|
|
27
|
+
return new CopilotInference();
|
|
28
|
+
}
|
|
29
|
+
if (tokenAvailable) {
|
|
30
|
+
return new GitHubModelsInference();
|
|
31
|
+
}
|
|
32
|
+
throw new AdapterNotAvailableError('inference', 'No inference adapter available. Install GitHub Copilot CLI (`gh extension install github/gh-copilot`) or set GITHUB_TOKEN.');
|
|
33
|
+
}
|
|
34
|
+
if (preference === 'copilot') {
|
|
35
|
+
if (!isCopilotAvailable()) {
|
|
36
|
+
throw new AdapterNotAvailableError('copilot', 'GitHub Copilot CLI is not available. Install with: gh extension install github/gh-copilot');
|
|
37
|
+
}
|
|
38
|
+
const fallback = isGitHubTokenAvailable() ? new GitHubModelsInference() : undefined;
|
|
39
|
+
return new CopilotInference(fallback);
|
|
40
|
+
}
|
|
41
|
+
if (preference === 'github-models') {
|
|
42
|
+
if (!isGitHubTokenAvailable()) {
|
|
43
|
+
throw new AdapterNotAvailableError('github-models', 'GITHUB_TOKEN environment variable is not set.');
|
|
44
|
+
}
|
|
45
|
+
return new GitHubModelsInference();
|
|
46
|
+
}
|
|
47
|
+
throw new AdapterNotAvailableError(preference, `Unknown inference adapter "${preference}". Valid options: auto, copilot, github-models.`);
|
|
48
|
+
}
|
|
49
|
+
//# sourceMappingURL=resolve.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"resolve.js","sourceRoot":"","sources":["../../../../src/adapters/inference/resolve.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAElD,OAAO,EAAE,wBAAwB,EAAE,MAAM,iBAAiB,CAAC;AAC3D,OAAO,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAEhD,SAAS,kBAAkB;IACzB,IAAI,CAAC;QACH,YAAY,CAAC,IAAI,EAAE,CAAC,SAAS,EAAE,WAAW,CAAC,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;QACnF,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,SAAS,sBAAsB;IAC7B,OAAO,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,UAAkB;IACjD,IAAI,UAAU,KAAK,MAAM,EAAE,CAAC;QAC1B,MAAM,gBAAgB,GAAG,kBAAkB,EAAE,CAAC;QAC9C,MAAM,cAAc,GAAG,sBAAsB,EAAE,CAAC;QAEhD,IAAI,gBAAgB,IAAI,cAAc,EAAE,CAAC;YACvC,uDAAuD;YACvD,MAAM,YAAY,GAAG,IAAI,qBAAqB,EAAE,CAAC;YACjD,OAAO,IAAI,gBAAgB,CAAC,YAAY,CAAC,CAAC;QAC5C,CAAC;QAED,IAAI,gBAAgB,EAAE,CAAC;YACrB,OAAO,IAAI,gBAAgB,EAAE,CAAC;QAChC,CAAC;QAED,IAAI,cAAc,EAAE,CAAC;YACnB,OAAO,IAAI,qBAAqB,EAAE,CAAC;QACrC,CAAC;QAED,MAAM,IAAI,wBAAwB,CAChC,WAAW,EACX,4HAA4H,CAC7H,CAAC;IACJ,CAAC;IAED,IAAI,UAAU,KAAK,SAAS,EAAE,CAAC;QAC7B,IAAI,CAAC,kBAAkB,EAAE,EAAE,CAAC;YAC1B,MAAM,IAAI,wBAAwB,CAChC,SAAS,EACT,2FAA2F,CAC5F,CAAC;QACJ,CAAC;QACD,MAAM,QAAQ,GAAG,sBAAsB,EAAE,CAAC,CAAC,CAAC,IAAI,qBAAqB,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;QACpF,OAAO,IAAI,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IACxC,CAAC;IAED,IAAI,UAAU,KAAK,eAAe,EAAE,CAAC;QACnC,IAAI,CAAC,sBAAsB,EAAE,EAAE,CAAC;YAC9B,MAAM,IAAI,wBAAwB,CAChC,eAAe,EACf,+CAA+C,CAChD,CAAC;QACJ,CAAC;QACD,OAAO,IAAI,qBAAqB,EAAE,CAAC;IACrC,CAAC;IAED,MAAM,IAAI,wBAAwB,CAChC,UAAU,EACV,8BAA8B,UAAU,iDAAiD,CAC1F,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { ReportAdapter, EvalResults } from '../../types.js';
|
|
2
|
+
export declare class JSONReporter implements ReportAdapter {
|
|
3
|
+
private readonly outputDir;
|
|
4
|
+
readonly name = "json";
|
|
5
|
+
constructor(outputDir: string);
|
|
6
|
+
report(results: EvalResults): Promise<void>;
|
|
7
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
import * as path from 'node:path';
|
|
3
|
+
function buildGradingFile(results) {
|
|
4
|
+
const allAssertions = [];
|
|
5
|
+
for (const scenario of results.scenarios) {
|
|
6
|
+
if (scenario.grading) {
|
|
7
|
+
allAssertions.push(...scenario.grading.assertion_results);
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
const passed = allAssertions.filter((a) => a.passed).length;
|
|
11
|
+
const failed = allAssertions.filter((a) => !a.passed).length;
|
|
12
|
+
const total = allAssertions.length;
|
|
13
|
+
const pass_rate = total > 0 ? passed / total : 0;
|
|
14
|
+
const summary = { passed, failed, total, pass_rate };
|
|
15
|
+
return { assertion_results: allAssertions, summary };
|
|
16
|
+
}
|
|
17
|
+
export class JSONReporter {
|
|
18
|
+
outputDir;
|
|
19
|
+
name = 'json';
|
|
20
|
+
constructor(outputDir) {
|
|
21
|
+
this.outputDir = outputDir;
|
|
22
|
+
}
|
|
23
|
+
async report(results) {
|
|
24
|
+
fs.mkdirSync(this.outputDir, { recursive: true });
|
|
25
|
+
// grading.json
|
|
26
|
+
const gradingFile = buildGradingFile(results);
|
|
27
|
+
fs.writeFileSync(path.join(this.outputDir, 'grading.json'), JSON.stringify(gradingFile, null, 2), 'utf-8');
|
|
28
|
+
// timing.json
|
|
29
|
+
const timingData = {
|
|
30
|
+
total_tokens: results.timing.total_tokens,
|
|
31
|
+
duration_ms: results.timing.duration_ms,
|
|
32
|
+
};
|
|
33
|
+
fs.writeFileSync(path.join(this.outputDir, 'timing.json'), JSON.stringify(timingData, null, 2), 'utf-8');
|
|
34
|
+
// benchmark.json
|
|
35
|
+
const benchmarkData = { run_summary: results.summary };
|
|
36
|
+
fs.writeFileSync(path.join(this.outputDir, 'benchmark.json'), JSON.stringify(benchmarkData, null, 2), 'utf-8');
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=json.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json.js","sourceRoot":"","sources":["../../../../src/adapters/report/json.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AASlC,SAAS,gBAAgB,CAAC,OAAoB;IAC5C,MAAM,aAAa,GAAsB,EAAE,CAAC;IAE5C,KAAK,MAAM,QAAQ,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;QACzC,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;YACrB,aAAa,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAC5D,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAC7D,MAAM,KAAK,GAAG,aAAa,CAAC,MAAM,CAAC;IACnC,MAAM,SAAS,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAEjD,MAAM,OAAO,GAAmB,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;IACrE,OAAO,EAAE,iBAAiB,EAAE,aAAa,EAAE,OAAO,EAAE,CAAC;AACvD,CAAC;AAED,MAAM,OAAO,YAAY;IAGM;IAFpB,IAAI,GAAG,MAAM,CAAC;IAEvB,YAA6B,SAAiB;QAAjB,cAAS,GAAT,SAAS,CAAQ;IAAG,CAAC;IAElD,KAAK,CAAC,MAAM,CAAC,OAAoB;QAC/B,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAElD,eAAe;QACf,MAAM,WAAW,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;QAC9C,EAAE,CAAC,aAAa,CACd,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,cAAc,CAAC,EACzC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC,EACpC,OAAO,CACR,CAAC;QAEF,cAAc;QACd,MAAM,UAAU,GAAG;YACjB,YAAY,EAAE,OAAO,CAAC,MAAM,CAAC,YAAY;YACzC,WAAW,EAAE,OAAO,CAAC,MAAM,CAAC,WAAW;SACxC,CAAC;QACF,EAAE,CAAC,aAAa,CACd,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,aAAa,CAAC,EACxC,IAAI,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,EACnC,OAAO,CACR,CAAC;QAEF,iBAAiB;QACjB,MAAM,aAAa,GAAG,EAAE,WAAW,EAAE,OAAO,CAAC,OAAO,EAAE,CAAC;QACvD,EAAE,CAAC,aAAa,CACd,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,gBAAgB,CAAC,EAC3C,IAAI,CAAC,SAAS,CAAC,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC,EACtC,OAAO,CACR,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import chalk from 'chalk';
|
|
2
|
+
function verdictIcon(verdict) {
|
|
3
|
+
switch (verdict) {
|
|
4
|
+
case 'pass':
|
|
5
|
+
return chalk.green('✓');
|
|
6
|
+
case 'regressed':
|
|
7
|
+
return chalk.red('✗');
|
|
8
|
+
default:
|
|
9
|
+
return chalk.yellow('?');
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
function formatScenario(scenario) {
|
|
13
|
+
const icon = verdictIcon(scenario.comparison.verdict);
|
|
14
|
+
const tier = `tier${scenario.comparison.tier}`;
|
|
15
|
+
const tokens = scenario.timing.total_tokens;
|
|
16
|
+
const durationS = (scenario.timing.duration_ms / 1000).toFixed(2);
|
|
17
|
+
const cost = scenario.newOutput.metadata.adapter;
|
|
18
|
+
return ` ${icon} Scenario ${scenario.scenarioId} [${tier}] — ${tokens} tokens, ${durationS}s (${cost})`;
|
|
19
|
+
}
|
|
20
|
+
export class TerminalReporter {
|
|
21
|
+
name = 'terminal';
|
|
22
|
+
async report(results) {
|
|
23
|
+
const { skillName, scenarios, summary, timing } = results;
|
|
24
|
+
console.log(chalk.bold(`\nSnapeval — ${skillName}`));
|
|
25
|
+
console.log(chalk.dim('─'.repeat(50)));
|
|
26
|
+
for (const scenario of scenarios) {
|
|
27
|
+
console.log(formatScenario(scenario));
|
|
28
|
+
}
|
|
29
|
+
console.log(chalk.dim('─'.repeat(50)));
|
|
30
|
+
const passedStr = chalk.green(`${summary.passed} passed`);
|
|
31
|
+
const regressedCount = summary.regressed;
|
|
32
|
+
const regressedStr = regressedCount > 0
|
|
33
|
+
? chalk.red(`${regressedCount} regressed`)
|
|
34
|
+
: chalk.dim(`${regressedCount} regressed`);
|
|
35
|
+
const totalStr = `${summary.total_scenarios} total`;
|
|
36
|
+
const passRate = (summary.pass_rate * 100).toFixed(0);
|
|
37
|
+
console.log(`${passedStr}, ${regressedStr}, ${totalStr} (${passRate}%)`);
|
|
38
|
+
console.log(chalk.dim(`Tokens: ${timing.total_tokens} | Duration: ${(timing.duration_ms / 1000).toFixed(2)}s | Cost: $${summary.total_cost_usd.toFixed(4)}`));
|
|
39
|
+
console.log(chalk.dim(`Tier breakdown — schema: ${summary.tier_breakdown.tier1_schema}, embedding: ${summary.tier_breakdown.tier2_embedding}, llm: ${summary.tier_breakdown.tier3_llm_judge}`));
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
//# sourceMappingURL=terminal.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"terminal.js","sourceRoot":"","sources":["../../../../src/adapters/report/terminal.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAG1B,SAAS,WAAW,CAAC,OAA0B;IAC7C,QAAQ,OAAO,EAAE,CAAC;QAChB,KAAK,MAAM;YACT,OAAO,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC1B,KAAK,WAAW;YACd,OAAO,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACxB;YACE,OAAO,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;AACH,CAAC;AAED,SAAS,cAAc,CAAC,QAAwB;IAC9C,MAAM,IAAI,GAAG,WAAW,CAAC,QAAQ,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;IACtD,MAAM,IAAI,GAAG,OAAO,QAAQ,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;IAC/C,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,YAAY,CAAC;IAC5C,MAAM,SAAS,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAClE,MAAM,IAAI,GAAG,QAAQ,CAAC,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC;IACjD,OAAO,KAAK,IAAI,aAAa,QAAQ,CAAC,UAAU,KAAK,IAAI,OAAO,MAAM,YAAY,SAAS,MAAM,IAAI,GAAG,CAAC;AAC3G,CAAC;AAED,MAAM,OAAO,gBAAgB;IAClB,IAAI,GAAG,UAAU,CAAC;IAE3B,KAAK,CAAC,MAAM,CAAC,OAAoB;QAC/B,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;QAE1D,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,gBAAgB,SAAS,EAAE,CAAC,CAAC,CAAC;QACrD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAEvC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC,CAAC;QACxC,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAEvC,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,OAAO,CAAC,MAAM,SAAS,CAAC,CAAC;QAC1D,MAAM,cAAc,GAAG,OAAO,CAAC,SAAS,CAAC;QACzC,MAAM,YAAY,GAAG,cAAc,GAAG,CAAC;YACrC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,cAAc,YAAY,CAAC;YAC1C,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,cAAc,YAAY,CAAC,CAAC;QAC7C,MAAM,QAAQ,GAAG,GAAG,OAAO,CAAC,eAAe,QAAQ,CAAC;QACpD,MAAM,QAAQ,GAAG,CAAC,OAAO,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAEtD,OAAO,CAAC,GAAG,CAAC,GAAG,SAAS,KAAK,YAAY,KAAK,QAAQ,KAAK,QAAQ,IAAI,CAAC,CAAC;QACzE,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,GAAG,CACP,WAAW,MAAM,CAAC,YAAY,gBAAgB,CAAC,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,cAAc,OAAO,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CACtI,CACF,CAAC;QACF,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,GAAG,CACP,4BAA4B,OAAO,CAAC,cAAc,CAAC,YAAY,gBAAgB,OAAO,CAAC,cAAc,CAAC,eAAe,UAAU,OAAO,CAAC,cAAc,CAAC,eAAe,EAAE,CACxK,CACF,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { SkillAdapter, SkillOutput } from '../../types.js';
|
|
2
|
+
export declare class CopilotCLIAdapter implements SkillAdapter {
|
|
3
|
+
readonly name = "copilot-cli";
|
|
4
|
+
invoke(skillPath: string, prompt: string, _files?: string[]): Promise<SkillOutput>;
|
|
5
|
+
isAvailable(): Promise<boolean>;
|
|
6
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { execFile, execFileSync } from 'node:child_process';
|
|
2
|
+
import { readFile } from 'node:fs/promises';
|
|
3
|
+
import * as path from 'node:path';
|
|
4
|
+
export class CopilotCLIAdapter {
|
|
5
|
+
name = 'copilot-cli';
|
|
6
|
+
async invoke(skillPath, prompt, _files) {
|
|
7
|
+
const startMs = Date.now();
|
|
8
|
+
// Try to include SKILL.md content as context if present
|
|
9
|
+
let skillMd = '';
|
|
10
|
+
try {
|
|
11
|
+
const skillFile = path.join(skillPath, 'SKILL.md');
|
|
12
|
+
skillMd = await readFile(skillFile, { encoding: 'utf-8' });
|
|
13
|
+
}
|
|
14
|
+
catch {
|
|
15
|
+
// ignore missing SKILL.md
|
|
16
|
+
}
|
|
17
|
+
const finalPrompt = skillMd ? `${skillMd}\n\n${prompt}` : prompt;
|
|
18
|
+
return new Promise((resolve, reject) => {
|
|
19
|
+
// Use gh copilot and pass flags after `--` so gh doesn't consume them.
|
|
20
|
+
// Use --silent to limit output to the model's response only.
|
|
21
|
+
execFile('gh', ['copilot', '--', '-p', finalPrompt, '--silent'], { encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024 }, (error, stdout, _stderr) => {
|
|
22
|
+
if (error) {
|
|
23
|
+
reject(error);
|
|
24
|
+
return;
|
|
25
|
+
}
|
|
26
|
+
const durationMs = Date.now() - startMs;
|
|
27
|
+
const raw = stdout.trim();
|
|
28
|
+
resolve({
|
|
29
|
+
raw,
|
|
30
|
+
metadata: {
|
|
31
|
+
tokens: 0,
|
|
32
|
+
durationMs,
|
|
33
|
+
model: 'copilot',
|
|
34
|
+
adapter: this.name,
|
|
35
|
+
},
|
|
36
|
+
});
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
async isAvailable() {
|
|
41
|
+
try {
|
|
42
|
+
// Use gh copilot --help as a lightweight availability check
|
|
43
|
+
execFileSync('gh', ['copilot', '--help'], { encoding: 'utf-8', stdio: 'pipe' });
|
|
44
|
+
return true;
|
|
45
|
+
}
|
|
46
|
+
catch {
|
|
47
|
+
return false;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
//# sourceMappingURL=copilot-cli.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"copilot-cli.js","sourceRoot":"","sources":["../../../../src/adapters/skill/copilot-cli.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAC5D,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAGlC,MAAM,OAAO,iBAAiB;IACnB,IAAI,GAAG,aAAa,CAAC;IAE9B,KAAK,CAAC,MAAM,CAAC,SAAiB,EAAE,MAAc,EAAE,MAAiB;QAC/D,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE3B,wDAAwD;QACxD,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;YACnD,OAAO,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;QAC7D,CAAC;QAAC,MAAM,CAAC;YACP,0BAA0B;QAC5B,CAAC;QAED,MAAM,WAAW,GAAG,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,OAAO,MAAM,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;QAEjE,OAAO,IAAI,OAAO,CAAc,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAClD,uEAAuE;YACvE,6DAA6D;YAC7D,QAAQ,CACN,IAAI,EACJ,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE,UAAU,CAAC,EAChD,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,EAClD,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,EAAE;gBACzB,IAAI,KAAK,EAAE,CAAC;oBACV,MAAM,CAAC,KAAK,CAAC,CAAC;oBACd,OAAO;gBACT,CAAC;gBACD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;gBACxC,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC1B,OAAO,CAAC;oBACN,GAAG;oBACH,QAAQ,EAAE;wBACR,MAAM,EAAE,CAAC;wBACT,UAAU;wBACV,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,IAAI,CAAC,IAAI;qBACnB;iBACF,CAAC,CAAC;YACL,CAAC,CACF,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,WAAW;QACf,IAAI,CAAC;YACH,4DAA4D;YAC5D,YAAY,CAAC,IAAI,EAAE,CAAC,SAAS,EAAE,QAAQ,CAAC,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;YAChF,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { SkillAdapter, EvalResults } from '../types.js';
|
|
2
|
+
export declare function approveCommand(skillPath: string, skillAdapter: SkillAdapter, options?: {
|
|
3
|
+
scenarioIds?: number[];
|
|
4
|
+
}): Promise<void>;
|
|
5
|
+
export declare function approveFromResults(skillPath: string, results: EvalResults, scenarioIds?: number[]): void;
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
import * as path from 'node:path';
|
|
3
|
+
import { SnapshotManager } from '../engine/snapshot.js';
|
|
4
|
+
import { SnapevalError } from '../errors.js';
|
|
5
|
+
export async function approveCommand(skillPath, skillAdapter, options = {}) {
|
|
6
|
+
const evalsPath = path.join(skillPath, 'evals', 'evals.json');
|
|
7
|
+
if (!fs.existsSync(evalsPath)) {
|
|
8
|
+
throw new SnapevalError(`No evals.json found at ${evalsPath}. Run \`snapeval init\` first.`);
|
|
9
|
+
}
|
|
10
|
+
const evalsFile = JSON.parse(fs.readFileSync(evalsPath, 'utf-8'));
|
|
11
|
+
const manager = new SnapshotManager(path.join(skillPath, 'evals'));
|
|
12
|
+
// Determine which scenarios to approve
|
|
13
|
+
const targetCases = options.scenarioIds && options.scenarioIds.length > 0
|
|
14
|
+
? evalsFile.evals.filter((e) => options.scenarioIds.includes(e.id))
|
|
15
|
+
: evalsFile.evals;
|
|
16
|
+
for (const evalCase of targetCases) {
|
|
17
|
+
const newOutput = await skillAdapter.invoke(skillPath, evalCase.prompt, evalCase.files);
|
|
18
|
+
manager.approve(evalCase.id, evalCase.prompt, newOutput);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
export function approveFromResults(skillPath, results, scenarioIds) {
|
|
22
|
+
const evalsPath = path.join(skillPath, 'evals', 'evals.json');
|
|
23
|
+
if (!fs.existsSync(evalsPath)) {
|
|
24
|
+
throw new SnapevalError(`No evals.json found at ${evalsPath}.`);
|
|
25
|
+
}
|
|
26
|
+
const evalsFile = JSON.parse(fs.readFileSync(evalsPath, 'utf-8'));
|
|
27
|
+
const manager = new SnapshotManager(path.join(skillPath, 'evals'));
|
|
28
|
+
// Find regressed scenarios from results
|
|
29
|
+
const regressedResults = results.scenarios.filter((s) => s.comparison.verdict === 'regressed');
|
|
30
|
+
const toApprove = scenarioIds && scenarioIds.length > 0
|
|
31
|
+
? regressedResults.filter((s) => scenarioIds.includes(s.scenarioId))
|
|
32
|
+
: regressedResults;
|
|
33
|
+
for (const scenario of toApprove) {
|
|
34
|
+
const evalCase = evalsFile.evals.find((e) => e.id === scenario.scenarioId);
|
|
35
|
+
if (!evalCase)
|
|
36
|
+
continue;
|
|
37
|
+
manager.approve(scenario.scenarioId, evalCase.prompt, scenario.newOutput);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=approve.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"approve.js","sourceRoot":"","sources":["../../../src/commands/approve.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAE7C,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,SAAiB,EACjB,YAA0B,EAC1B,UAAsC,EAAE;IAExC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;IAC9D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,aAAa,CAAC,0BAA0B,SAAS,gCAAgC,CAAC,CAAC;IAC/F,CAAC;IAED,MAAM,SAAS,GAAc,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;IAC7E,MAAM,OAAO,GAAG,IAAI,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;IAEnE,uCAAuC;IACvC,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC;QACvE,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,WAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACpE,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC;IAEpB,KAAK,MAAM,QAAQ,IAAI,WAAW,EAAE,CAAC;QACnC,MAAM,SAAS,GAAG,MAAM,YAAY,CAAC,MAAM,CAAC,SAAS,EAAE,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC;QACxF,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,EAAE,QAAQ,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAC3D,CAAC;AACH,CAAC;AAED,MAAM,UAAU,kBAAkB,CAChC,SAAiB,EACjB,OAAoB,EACpB,WAAsB;IAEtB,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;IAC9D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,aAAa,CAAC,0BAA0B,SAAS,GAAG,CAAC,CAAC;IAClE,CAAC;IAED,MAAM,SAAS,GAAc,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;IAC7E,MAAM,OAAO,GAAG,IAAI,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;IAEnE,wCAAwC;IACxC,MAAM,gBAAgB,GAAG,OAAO,CAAC,SAAS,CAAC,MAAM,CAC/C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,OAAO,KAAK,WAAW,CAC5C,CAAC;IAEF,MAAM,SAAS,GAAG,WAAW,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC;QACrD,CAAC,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;QACpE,CAAC,CAAC,gBAAgB,CAAC;IAErB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,QAAQ,CAAC,UAAU,CAAC,CAAC;QAC3E,IAAI,CAAC,QAAQ;YAAE,SAAS;QACxB,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,UAAU,EAAE,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC;IAC5E,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
import * as path from 'node:path';
|
|
3
|
+
import { SnapshotManager } from '../engine/snapshot.js';
|
|
4
|
+
import { SnapevalError } from '../errors.js';
|
|
5
|
+
export async function captureCommand(skillPath, skillAdapter, options = {}) {
|
|
6
|
+
const evalsPath = path.join(skillPath, 'evals', 'evals.json');
|
|
7
|
+
if (!fs.existsSync(evalsPath)) {
|
|
8
|
+
throw new SnapevalError(`No evals.json found at ${evalsPath}. Run \`snapeval init\` first.`);
|
|
9
|
+
}
|
|
10
|
+
const evalsFile = JSON.parse(fs.readFileSync(evalsPath, 'utf-8'));
|
|
11
|
+
const manager = new SnapshotManager(path.join(skillPath, 'evals'));
|
|
12
|
+
const runs = options.runs ?? 1;
|
|
13
|
+
for (const evalCase of evalsFile.evals) {
|
|
14
|
+
const output = await skillAdapter.invoke(skillPath, evalCase.prompt, evalCase.files);
|
|
15
|
+
manager.saveSnapshot(evalCase.id, evalCase.prompt, output, runs);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=capture.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"capture.js","sourceRoot":"","sources":["../../../src/commands/capture.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAE7C,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,SAAiB,EACjB,YAA0B,EAC1B,UAA6B,EAAE;IAE/B,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;IAC9D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,aAAa,CAAC,0BAA0B,SAAS,gCAAgC,CAAC,CAAC;IAC/F,CAAC;IAED,MAAM,SAAS,GAAc,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;IAC7E,MAAM,OAAO,GAAG,IAAI,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;IACnE,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC;IAE/B,KAAK,MAAM,QAAQ,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;QACvC,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,MAAM,CAAC,SAAS,EAAE,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC;QACrF,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE,EAAE,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC,CAAC;IACnE,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { SkillAdapter, InferenceAdapter, EvalResults } from '../types.js';
|
|
2
|
+
export declare function checkCommand(skillPath: string, skillAdapter: SkillAdapter, inference: InferenceAdapter, options: {
|
|
3
|
+
threshold: number;
|
|
4
|
+
budget: string;
|
|
5
|
+
skipEmbedding?: boolean;
|
|
6
|
+
}): Promise<EvalResults>;
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
import * as path from 'node:path';
|
|
3
|
+
import { SnapshotManager } from '../engine/snapshot.js';
|
|
4
|
+
import { comparePipeline } from '../engine/comparison/pipeline.js';
|
|
5
|
+
import { NoBaselineError, SnapevalError } from '../errors.js';
|
|
6
|
+
import { BudgetEngine } from '../engine/budget.js';
|
|
7
|
+
export async function checkCommand(skillPath, skillAdapter, inference, options) {
|
|
8
|
+
const evalsPath = path.join(skillPath, 'evals', 'evals.json');
|
|
9
|
+
if (!fs.existsSync(evalsPath)) {
|
|
10
|
+
throw new SnapevalError(`No evals.json found at ${evalsPath}`);
|
|
11
|
+
}
|
|
12
|
+
const evalsFile = JSON.parse(fs.readFileSync(evalsPath, 'utf-8'));
|
|
13
|
+
const manager = new SnapshotManager(path.join(skillPath, 'evals'));
|
|
14
|
+
const budget = new BudgetEngine(options.budget);
|
|
15
|
+
const startTime = Date.now();
|
|
16
|
+
if (manager.listSnapshotIds().length === 0) {
|
|
17
|
+
throw new NoBaselineError(skillPath);
|
|
18
|
+
}
|
|
19
|
+
const scenarios = [];
|
|
20
|
+
const tierBreakdown = { tier1_schema: 0, tier2_embedding: 0, tier3_llm_judge: 0 };
|
|
21
|
+
for (const evalCase of evalsFile.evals) {
|
|
22
|
+
const baseline = manager.loadSnapshot(evalCase.id);
|
|
23
|
+
if (!baseline)
|
|
24
|
+
continue;
|
|
25
|
+
const newOutput = await skillAdapter.invoke(skillPath, evalCase.prompt, evalCase.files);
|
|
26
|
+
const comparison = await comparePipeline(baseline.output.raw, newOutput.raw, inference, { threshold: options.threshold, skipEmbedding: options.skipEmbedding });
|
|
27
|
+
comparison.scenarioId = evalCase.id;
|
|
28
|
+
if (comparison.tier === 1)
|
|
29
|
+
tierBreakdown.tier1_schema++;
|
|
30
|
+
else if (comparison.tier === 2)
|
|
31
|
+
tierBreakdown.tier2_embedding++;
|
|
32
|
+
else
|
|
33
|
+
tierBreakdown.tier3_llm_judge++;
|
|
34
|
+
budget.addCost(inference.estimateCost(newOutput.metadata.tokens));
|
|
35
|
+
scenarios.push({
|
|
36
|
+
scenarioId: evalCase.id,
|
|
37
|
+
prompt: evalCase.prompt,
|
|
38
|
+
comparison,
|
|
39
|
+
timing: {
|
|
40
|
+
total_tokens: newOutput.metadata.tokens,
|
|
41
|
+
duration_ms: newOutput.metadata.durationMs,
|
|
42
|
+
},
|
|
43
|
+
newOutput,
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
const passed = scenarios.filter((s) => s.comparison.verdict === 'pass').length;
|
|
47
|
+
const regressed = scenarios.filter((s) => s.comparison.verdict === 'regressed').length;
|
|
48
|
+
const summary = {
|
|
49
|
+
total_scenarios: scenarios.length,
|
|
50
|
+
passed,
|
|
51
|
+
regressed,
|
|
52
|
+
pass_rate: scenarios.length > 0 ? passed / scenarios.length : 1.0,
|
|
53
|
+
total_tokens: scenarios.reduce((sum, s) => sum + s.timing.total_tokens, 0),
|
|
54
|
+
total_cost_usd: budget.totalCost,
|
|
55
|
+
total_duration_ms: Date.now() - startTime,
|
|
56
|
+
tier_breakdown: tierBreakdown,
|
|
57
|
+
};
|
|
58
|
+
return {
|
|
59
|
+
skillName: evalsFile.skill_name,
|
|
60
|
+
scenarios,
|
|
61
|
+
summary,
|
|
62
|
+
timing: {
|
|
63
|
+
total_tokens: summary.total_tokens,
|
|
64
|
+
duration_ms: summary.total_duration_ms,
|
|
65
|
+
},
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
//# sourceMappingURL=check.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check.js","sourceRoot":"","sources":["../../../src/commands/check.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AASlC,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,eAAe,EAAE,MAAM,kCAAkC,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,SAAiB,EACjB,YAA0B,EAC1B,SAA2B,EAC3B,OAAuE;IAEvE,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;IAC9D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,aAAa,CAAC,0BAA0B,SAAS,EAAE,CAAC,CAAC;IACjE,CAAC;IACD,MAAM,SAAS,GAAc,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;IAC7E,MAAM,OAAO,GAAG,IAAI,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;IACnE,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAChD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3C,MAAM,IAAI,eAAe,CAAC,SAAS,CAAC,CAAC;IACvC,CAAC;IAED,MAAM,SAAS,GAAqB,EAAE,CAAC;IACvC,MAAM,aAAa,GAAG,EAAE,YAAY,EAAE,CAAC,EAAE,eAAe,EAAE,CAAC,EAAE,eAAe,EAAE,CAAC,EAAE,CAAC;IAElF,KAAK,MAAM,QAAQ,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QACnD,IAAI,CAAC,QAAQ;YAAE,SAAS;QACxB,MAAM,SAAS,GAAG,MAAM,YAAY,CAAC,MAAM,CAAC,SAAS,EAAE,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC;QACxF,MAAM,UAAU,GAAG,MAAM,eAAe,CACtC,QAAQ,CAAC,MAAM,CAAC,GAAG,EACnB,SAAS,CAAC,GAAG,EACb,SAAS,EACT,EAAE,SAAS,EAAE,OAAO,CAAC,SAAS,EAAE,aAAa,EAAE,OAAO,CAAC,aAAa,EAAE,CACvE,CAAC;QACF,UAAU,CAAC,UAAU,GAAG,QAAQ,CAAC,EAAE,CAAC;QACpC,IAAI,UAAU,CAAC,IAAI,KAAK,CAAC;YAAE,aAAa,CAAC,YAAY,EAAE,CAAC;aACnD,IAAI,UAAU,CAAC,IAAI,KAAK,CAAC;YAAE,aAAa,CAAC,eAAe,EAAE,CAAC;;YAC3D,aAAa,CAAC,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,YAAY,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;QAClE,SAAS,CAAC,IAAI,CAAC;YACb,UAAU,EAAE,QAAQ,CAAC,EAAE;YACvB,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,UAAU;YACV,MAAM,EAAE;gBACN,YAAY,EAAE,SAAS,CAAC,QAAQ,CAAC,MAAM;gBACvC,WAAW,EAAE,SAAS,CAAC,QAAQ,CAAC,UAAU;aAC3C;YACD,SAAS;SACV,CAAC,CAAC;IACL,CAAC;IAED,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,OAAO,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IAC/E,MAAM,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,OAAO,KAAK,WAAW,CAAC,CAAC,MAAM,CAAC;IACvF,MAAM,OAAO,GAAqB;QAChC,eAAe,EAAE,SAAS,CAAC,MAAM;QACjC,MAAM;QACN,SAAS;QACT,SAAS,EAAE,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG;QACjE,YAAY,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;QAC1E,cAAc,EAAE,MAAM,CAAC,SAAS;QAChC,iBAAiB,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;QACzC,cAAc,EAAE,aAAa;KAC9B,CAAC;IACF,OAAO;QACL,SAAS,EAAE,SAAS,CAAC,UAAU;QAC/B,SAAS;QACT,OAAO;QACP,MAAM,EAAE;YACN,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,WAAW,EAAE,OAAO,CAAC,iBAAiB;SACvC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
import * as path from 'node:path';
|
|
3
|
+
import { generateEvals } from '../engine/generator.js';
|
|
4
|
+
import { SnapevalError } from '../errors.js';
|
|
5
|
+
export async function initCommand(skillPath, inference) {
|
|
6
|
+
// Locate the skill definition file (SKILL.md or skill.md)
|
|
7
|
+
const candidates = ['SKILL.md', 'skill.md'];
|
|
8
|
+
let skillFilePath = null;
|
|
9
|
+
for (const name of candidates) {
|
|
10
|
+
const candidate = path.join(skillPath, name);
|
|
11
|
+
if (fs.existsSync(candidate)) {
|
|
12
|
+
skillFilePath = candidate;
|
|
13
|
+
break;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
if (!skillFilePath) {
|
|
17
|
+
throw new SnapevalError(`No SKILL.md found at ${skillPath}. Create a SKILL.md file to describe your skill.`);
|
|
18
|
+
}
|
|
19
|
+
const skillContent = fs.readFileSync(skillFilePath, 'utf-8');
|
|
20
|
+
const skillName = path.basename(skillPath);
|
|
21
|
+
const evalsFile = await generateEvals(skillContent, skillName, inference);
|
|
22
|
+
const evalsDir = path.join(skillPath, 'evals');
|
|
23
|
+
fs.mkdirSync(evalsDir, { recursive: true });
|
|
24
|
+
const evalsPath = path.join(evalsDir, 'evals.json');
|
|
25
|
+
fs.writeFileSync(evalsPath, JSON.stringify(evalsFile, null, 2), 'utf-8');
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=init.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"init.js","sourceRoot":"","sources":["../../../src/commands/init.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,OAAO,EAAE,aAAa,EAAE,MAAM,wBAAwB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAE7C,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,SAAiB,EACjB,SAA2B;IAE3B,0DAA0D;IAC1D,MAAM,UAAU,GAAG,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;IAC5C,IAAI,aAAa,GAAkB,IAAI,CAAC;IACxC,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;QAC7C,IAAI,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAC7B,aAAa,GAAG,SAAS,CAAC;YAC1B,MAAM;QACR,CAAC;IACH,CAAC;IAED,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,MAAM,IAAI,aAAa,CACrB,wBAAwB,SAAS,kDAAkD,CACpF,CAAC;IACJ,CAAC;IAED,MAAM,YAAY,GAAG,EAAE,CAAC,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;IAE3C,MAAM,SAAS,GAAG,MAAM,aAAa,CAAC,YAAY,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;IAE1E,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IAC/C,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE5C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IACpD,EAAE,CAAC,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AAC3E,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
import * as path from 'node:path';
|
|
3
|
+
import { JSONReporter } from '../adapters/report/json.js';
|
|
4
|
+
import { TerminalReporter } from '../adapters/report/terminal.js';
|
|
5
|
+
export async function reportCommand(skillPath, results, options = {}) {
|
|
6
|
+
// Determine next iteration number
|
|
7
|
+
const resultsBaseDir = path.join(skillPath, 'evals', 'results');
|
|
8
|
+
fs.mkdirSync(resultsBaseDir, { recursive: true });
|
|
9
|
+
const existingIterations = fs.readdirSync(resultsBaseDir)
|
|
10
|
+
.filter((d) => /^iteration-\d+$/.test(d))
|
|
11
|
+
.map((d) => parseInt(d.replace('iteration-', ''), 10))
|
|
12
|
+
.sort((a, b) => a - b);
|
|
13
|
+
const nextIteration = existingIterations.length > 0
|
|
14
|
+
? existingIterations[existingIterations.length - 1] + 1
|
|
15
|
+
: 1;
|
|
16
|
+
const iterationDir = path.join(resultsBaseDir, `iteration-${nextIteration}`);
|
|
17
|
+
// Write JSON report
|
|
18
|
+
const jsonReporter = new JSONReporter(iterationDir);
|
|
19
|
+
await jsonReporter.report(results);
|
|
20
|
+
// Print terminal report
|
|
21
|
+
if (options.verbose !== false) {
|
|
22
|
+
const terminalReporter = new TerminalReporter();
|
|
23
|
+
await terminalReporter.report(results);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
//# sourceMappingURL=report.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"report.js","sourceRoot":"","sources":["../../../src/commands/report.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,OAAO,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,MAAM,gCAAgC,CAAC;AAElE,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,SAAiB,EACjB,OAAoB,EACpB,UAAiC,EAAE;IAEnC,kCAAkC;IAClC,MAAM,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;IAChE,EAAE,CAAC,SAAS,CAAC,cAAc,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAElD,MAAM,kBAAkB,GAAG,EAAE,CAAC,WAAW,CAAC,cAAc,CAAC;SACtD,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;SACxC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;SACrD,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAEzB,MAAM,aAAa,GAAG,kBAAkB,CAAC,MAAM,GAAG,CAAC;QACjD,CAAC,CAAC,kBAAkB,CAAC,kBAAkB,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC;QACvD,CAAC,CAAC,CAAC,CAAC;IAEN,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,aAAa,aAAa,EAAE,CAAC,CAAC;IAE7E,oBAAoB;IACpB,MAAM,YAAY,GAAG,IAAI,YAAY,CAAC,YAAY,CAAC,CAAC;IACpD,MAAM,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnC,wBAAwB;IACxB,IAAI,OAAO,CAAC,OAAO,KAAK,KAAK,EAAE,CAAC;QAC9B,MAAM,gBAAgB,GAAG,IAAI,gBAAgB,EAAE,CAAC;QAChD,MAAM,gBAAgB,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACzC,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
import * as path from 'node:path';
|
|
3
|
+
export const DEFAULT_CONFIG = {
|
|
4
|
+
adapter: 'copilot-cli',
|
|
5
|
+
inference: 'auto',
|
|
6
|
+
threshold: 0.85,
|
|
7
|
+
runs: 3,
|
|
8
|
+
budget: 'unlimited',
|
|
9
|
+
};
|
|
10
|
+
function loadConfigFile(dirPath) {
|
|
11
|
+
const configPath = path.join(dirPath, 'snapeval.config.json');
|
|
12
|
+
if (!fs.existsSync(configPath))
|
|
13
|
+
return null;
|
|
14
|
+
const raw = fs.readFileSync(configPath, 'utf-8');
|
|
15
|
+
return JSON.parse(raw);
|
|
16
|
+
}
|
|
17
|
+
export function resolveConfig(cliFlags, projectRoot, skillDir) {
|
|
18
|
+
const skillDirConfig = skillDir ? loadConfigFile(skillDir) : null;
|
|
19
|
+
const projectConfig = loadConfigFile(projectRoot);
|
|
20
|
+
return {
|
|
21
|
+
...DEFAULT_CONFIG,
|
|
22
|
+
...(projectConfig ?? {}),
|
|
23
|
+
...(skillDirConfig ?? {}),
|
|
24
|
+
...stripUndefined(cliFlags),
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
function stripUndefined(obj) {
|
|
28
|
+
return Object.fromEntries(Object.entries(obj).filter(([, v]) => v !== undefined));
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=config.js.map
|