snapeval 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +194 -0
  3. package/bin/snapeval.ts +226 -0
  4. package/dist/bin/snapeval.d.ts +2 -0
  5. package/dist/bin/snapeval.js +191 -0
  6. package/dist/bin/snapeval.js.map +1 -0
  7. package/dist/src/adapters/inference/copilot.d.ts +9 -0
  8. package/dist/src/adapters/inference/copilot.js +25 -0
  9. package/dist/src/adapters/inference/copilot.js.map +1 -0
  10. package/dist/src/adapters/inference/github-models.d.ts +9 -0
  11. package/dist/src/adapters/inference/github-models.js +62 -0
  12. package/dist/src/adapters/inference/github-models.js.map +1 -0
  13. package/dist/src/adapters/inference/resolve.d.ts +2 -0
  14. package/dist/src/adapters/inference/resolve.js +49 -0
  15. package/dist/src/adapters/inference/resolve.js.map +1 -0
  16. package/dist/src/adapters/report/json.d.ts +7 -0
  17. package/dist/src/adapters/report/json.js +39 -0
  18. package/dist/src/adapters/report/json.js.map +1 -0
  19. package/dist/src/adapters/report/terminal.d.ts +5 -0
  20. package/dist/src/adapters/report/terminal.js +42 -0
  21. package/dist/src/adapters/report/terminal.js.map +1 -0
  22. package/dist/src/adapters/skill/copilot-cli.d.ts +6 -0
  23. package/dist/src/adapters/skill/copilot-cli.js +51 -0
  24. package/dist/src/adapters/skill/copilot-cli.js.map +1 -0
  25. package/dist/src/commands/approve.d.ts +5 -0
  26. package/dist/src/commands/approve.js +40 -0
  27. package/dist/src/commands/approve.js.map +1 -0
  28. package/dist/src/commands/capture.d.ts +4 -0
  29. package/dist/src/commands/capture.js +18 -0
  30. package/dist/src/commands/capture.js.map +1 -0
  31. package/dist/src/commands/check.d.ts +6 -0
  32. package/dist/src/commands/check.js +68 -0
  33. package/dist/src/commands/check.js.map +1 -0
  34. package/dist/src/commands/init.d.ts +2 -0
  35. package/dist/src/commands/init.js +27 -0
  36. package/dist/src/commands/init.js.map +1 -0
  37. package/dist/src/commands/report.d.ts +4 -0
  38. package/dist/src/commands/report.js +26 -0
  39. package/dist/src/commands/report.js.map +1 -0
  40. package/dist/src/config.d.ts +3 -0
  41. package/dist/src/config.js +30 -0
  42. package/dist/src/config.js.map +1 -0
  43. package/dist/src/engine/budget.d.ts +10 -0
  44. package/dist/src/engine/budget.js +25 -0
  45. package/dist/src/engine/budget.js.map +1 -0
  46. package/dist/src/engine/comparison/embedding.d.ts +6 -0
  47. package/dist/src/engine/comparison/embedding.js +19 -0
  48. package/dist/src/engine/comparison/embedding.js.map +1 -0
  49. package/dist/src/engine/comparison/judge.d.ts +8 -0
  50. package/dist/src/engine/comparison/judge.js +64 -0
  51. package/dist/src/engine/comparison/judge.js.map +1 -0
  52. package/dist/src/engine/comparison/pipeline.d.ts +6 -0
  53. package/dist/src/engine/comparison/pipeline.js +31 -0
  54. package/dist/src/engine/comparison/pipeline.js.map +1 -0
  55. package/dist/src/engine/comparison/schema.d.ts +2 -0
  56. package/dist/src/engine/comparison/schema.js +28 -0
  57. package/dist/src/engine/comparison/schema.js.map +1 -0
  58. package/dist/src/engine/comparison/variance.d.ts +3 -0
  59. package/dist/src/engine/comparison/variance.js +26 -0
  60. package/dist/src/engine/comparison/variance.js.map +1 -0
  61. package/dist/src/engine/generator.d.ts +3 -0
  62. package/dist/src/engine/generator.js +52 -0
  63. package/dist/src/engine/generator.js.map +1 -0
  64. package/dist/src/engine/snapshot.d.ts +11 -0
  65. package/dist/src/engine/snapshot.js +46 -0
  66. package/dist/src/engine/snapshot.js.map +1 -0
  67. package/dist/src/errors.d.ts +16 -0
  68. package/dist/src/errors.js +33 -0
  69. package/dist/src/errors.js.map +1 -0
  70. package/dist/src/types.d.ts +125 -0
  71. package/dist/src/types.js +2 -0
  72. package/dist/src/types.js.map +1 -0
  73. package/package.json +53 -0
  74. package/plugin.json +9 -0
  75. package/scripts/snapeval-cli.sh +7 -0
  76. package/skills/snapeval/SKILL.md +51 -0
  77. package/src/adapters/inference/copilot.ts +30 -0
  78. package/src/adapters/inference/github-models.ts +74 -0
  79. package/src/adapters/inference/resolve.ts +70 -0
  80. package/src/adapters/report/json.ts +64 -0
  81. package/src/adapters/report/terminal.ts +59 -0
  82. package/src/adapters/skill/copilot-cli.ts +60 -0
  83. package/src/commands/approve.ts +58 -0
  84. package/src/commands/capture.ts +25 -0
  85. package/src/commands/check.ts +86 -0
  86. package/src/commands/init.ts +38 -0
  87. package/src/commands/report.ts +36 -0
  88. package/src/config.ts +37 -0
  89. package/src/engine/budget.ts +27 -0
  90. package/src/engine/comparison/embedding.ts +26 -0
  91. package/src/engine/comparison/judge.ts +78 -0
  92. package/src/engine/comparison/pipeline.ts +43 -0
  93. package/src/engine/comparison/schema.ts +22 -0
  94. package/src/engine/comparison/variance.ts +31 -0
  95. package/src/engine/generator.ts +61 -0
  96. package/src/engine/snapshot.ts +48 -0
  97. package/src/errors.ts +34 -0
  98. package/src/types.ts +153 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAGlC,MAAM,CAAC,MAAM,cAAc,GAAmB;IAC5C,OAAO,EAAE,aAAa;IACtB,SAAS,EAAE,MAAM;IACjB,SAAS,EAAE,IAAI;IACf,IAAI,EAAE,CAAC;IACP,MAAM,EAAE,WAAW;CACpB,CAAC;AAEF,SAAS,cAAc,CAAC,OAAe;IACrC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,sBAAsB,CAAC,CAAC;IAC9D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,IAAI,CAAC;IAC5C,MAAM,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;IACjD,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,QAAiC,EACjC,WAAmB,EACnB,QAAiB;IAEjB,MAAM,cAAc,GAAG,QAAQ,CAAC,CAAC,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAClE,MAAM,aAAa,GAAG,cAAc,CAAC,WAAW,CAAC,CAAC;IAClD,OAAO;QACL,GAAG,cAAc;QACjB,GAAG,CAAC,aAAa,IAAI,EAAE,CAAC;QACxB,GAAG,CAAC,cAAc,IAAI,EAAE,CAAC;QACzB,GAAG,cAAc,CAAC,QAAQ,CAAC;KAC5B,CAAC;AACJ,CAAC;AAED,SAAS,cAAc,CAAC,GAA4B;IAClD,OAAO,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC,CAAC;AACpF,CAAC"}
@@ -0,0 +1,10 @@
1
+ export declare class BudgetEngine {
2
+ private spent;
3
+ private cap;
4
+ constructor(budget: string);
5
+ get totalCost(): number;
6
+ addCost(amount: number): void;
7
+ isExceeded(): boolean;
8
+ estimateScenarioCost(tokens: number, isFreeModel: boolean): number;
9
+ get remaining(): number | null;
10
+ }
@@ -0,0 +1,25 @@
1
+ export class BudgetEngine {
2
+ spent = 0;
3
+ cap;
4
+ constructor(budget) {
5
+ this.cap = budget === 'unlimited' ? null : parseFloat(budget);
6
+ }
7
+ get totalCost() { return this.spent; }
8
+ addCost(amount) { this.spent += amount; }
9
+ isExceeded() {
10
+ if (this.cap === null)
11
+ return false;
12
+ return this.spent > this.cap;
13
+ }
14
+ estimateScenarioCost(tokens, isFreeModel) {
15
+ if (isFreeModel)
16
+ return 0;
17
+ return (tokens / 1_000_000) * 0.15;
18
+ }
19
+ get remaining() {
20
+ if (this.cap === null)
21
+ return null;
22
+ return Math.max(0, this.cap - this.spent);
23
+ }
24
+ }
25
+ //# sourceMappingURL=budget.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"budget.js","sourceRoot":"","sources":["../../../src/engine/budget.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,YAAY;IACf,KAAK,GAAG,CAAC,CAAC;IACV,GAAG,CAAgB;IAE3B,YAAY,MAAc;QACxB,IAAI,CAAC,GAAG,GAAG,MAAM,KAAK,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;IAChE,CAAC;IAED,IAAI,SAAS,KAAa,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;IAE9C,OAAO,CAAC,MAAc,IAAU,IAAI,CAAC,KAAK,IAAI,MAAM,CAAC,CAAC,CAAC;IAEvD,UAAU;QACR,IAAI,IAAI,CAAC,GAAG,KAAK,IAAI;YAAE,OAAO,KAAK,CAAC;QACpC,OAAO,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC;IAC/B,CAAC;IAED,oBAAoB,CAAC,MAAc,EAAE,WAAoB;QACvD,IAAI,WAAW;YAAE,OAAO,CAAC,CAAC;QAC1B,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,GAAG,IAAI,CAAC;IACrC,CAAC;IAED,IAAI,SAAS;QACX,IAAI,IAAI,CAAC,GAAG,KAAK,IAAI;YAAE,OAAO,IAAI,CAAC;QACnC,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;IAC5C,CAAC;CACF"}
@@ -0,0 +1,6 @@
1
+ import type { InferenceAdapter } from '../../types.js';
2
+ export declare function cosineSimilarity(a: number[], b: number[]): number;
3
+ export declare function embeddingCheck(baseline: string, current: string, inference: InferenceAdapter, threshold?: number): Promise<{
4
+ similarity: number;
5
+ pass: boolean;
6
+ }>;
@@ -0,0 +1,19 @@
1
+ export function cosineSimilarity(a, b) {
2
+ let dot = 0, magA = 0, magB = 0;
3
+ for (let i = 0; i < a.length; i++) {
4
+ dot += a[i] * b[i];
5
+ magA += a[i] * a[i];
6
+ magB += b[i] * b[i];
7
+ }
8
+ const denom = Math.sqrt(magA) * Math.sqrt(magB);
9
+ return denom === 0 ? 0 : dot / denom;
10
+ }
11
+ export async function embeddingCheck(baseline, current, inference, threshold = 0.85) {
12
+ const [baselineEmb, currentEmb] = await Promise.all([
13
+ inference.embed(baseline),
14
+ inference.embed(current),
15
+ ]);
16
+ const similarity = cosineSimilarity(baselineEmb, currentEmb);
17
+ return { similarity, pass: similarity >= threshold };
18
+ }
19
+ //# sourceMappingURL=embedding.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedding.js","sourceRoot":"","sources":["../../../../src/engine/comparison/embedding.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,gBAAgB,CAAC,CAAW,EAAE,CAAW;IACvD,IAAI,GAAG,GAAG,CAAC,EAAE,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,CAAC,CAAC;IAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACnB,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IACD,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChD,OAAO,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC;AACvC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,QAAgB,EAChB,OAAe,EACf,SAA2B,EAC3B,YAAoB,IAAI;IAExB,MAAM,CAAC,WAAW,EAAE,UAAU,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QAClD,SAAS,CAAC,KAAK,CAAC,QAAQ,CAAC;QACzB,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC;KACzB,CAAC,CAAC;IACH,MAAM,UAAU,GAAG,gBAAgB,CAAC,WAAW,EAAE,UAAU,CAAC,CAAC;IAC7D,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,UAAU,IAAI,SAAS,EAAE,CAAC;AACvD,CAAC"}
@@ -0,0 +1,8 @@
1
+ import type { InferenceAdapter, ComparisonVerdict } from '../../types.js';
2
+ export declare function buildJudgePrompt(outputA: string, outputB: string): string;
3
+ interface JudgeResult {
4
+ verdict: ComparisonVerdict;
5
+ details: string;
6
+ }
7
+ export declare function llmJudge(baseline: string, current: string, inference: InferenceAdapter): Promise<JudgeResult>;
8
+ export {};
@@ -0,0 +1,64 @@
1
+ export function buildJudgePrompt(outputA, outputB) {
2
+ return `You are an AI output comparison judge. Compare these two outputs and determine if they are semantically consistent (same meaning, same key information) or different (changed behavior, missing information, or contradictory content).
3
+
4
+ OUTPUT A:
5
+ ---
6
+ ${outputA}
7
+ ---
8
+
9
+ OUTPUT B:
10
+ ---
11
+ ${outputB}
12
+ ---
13
+
14
+ Respond with JSON only: {"verdict": "consistent"} or {"verdict": "different"}`;
15
+ }
16
+ function parseJudgeResponse(response) {
17
+ try {
18
+ const parsed = JSON.parse(response);
19
+ if (parsed.verdict === 'consistent' || parsed.verdict === 'different')
20
+ return parsed.verdict;
21
+ return null;
22
+ }
23
+ catch {
24
+ return null;
25
+ }
26
+ }
27
+ async function runJudgePair(baseline, current, inference) {
28
+ const [forwardResp, reverseResp] = await Promise.all([
29
+ inference.chat([{ role: 'user', content: buildJudgePrompt(baseline, current) }], {
30
+ temperature: 0,
31
+ responseFormat: 'json',
32
+ }),
33
+ inference.chat([{ role: 'user', content: buildJudgePrompt(current, baseline) }], {
34
+ temperature: 0,
35
+ responseFormat: 'json',
36
+ }),
37
+ ]);
38
+ return { forward: parseJudgeResponse(forwardResp), reverse: parseJudgeResponse(reverseResp) };
39
+ }
40
+ export async function llmJudge(baseline, current, inference) {
41
+ for (let attempt = 0; attempt < 2; attempt++) {
42
+ const { forward, reverse } = await runJudgePair(baseline, current, inference);
43
+ if (forward === null || reverse === null) {
44
+ if (attempt === 0)
45
+ continue;
46
+ return {
47
+ verdict: 'inconclusive',
48
+ details: 'LLM judge returned unparseable response after retry',
49
+ };
50
+ }
51
+ if (forward === reverse) {
52
+ return {
53
+ verdict: forward === 'consistent' ? 'pass' : 'regressed',
54
+ details: `LLM Judge: both orderings agree — ${forward}`,
55
+ };
56
+ }
57
+ return {
58
+ verdict: 'inconclusive',
59
+ details: `LLM Judge: orderings disagree (forward=${forward}, reverse=${reverse})`,
60
+ };
61
+ }
62
+ return { verdict: 'inconclusive', details: 'LLM judge exhausted retries' };
63
+ }
64
+ //# sourceMappingURL=judge.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"judge.js","sourceRoot":"","sources":["../../../../src/engine/comparison/judge.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,gBAAgB,CAAC,OAAe,EAAE,OAAe;IAC/D,OAAO;;;;EAIP,OAAO;;;;;EAKP,OAAO;;;8EAGqE,CAAC;AAC/E,CAAC;AAOD,SAAS,kBAAkB,CAAC,QAAgB;IAC1C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACpC,IAAI,MAAM,CAAC,OAAO,KAAK,YAAY,IAAI,MAAM,CAAC,OAAO,KAAK,WAAW;YAAE,OAAO,MAAM,CAAC,OAAO,CAAC;QAC7F,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,KAAK,UAAU,YAAY,CACzB,QAAgB,EAChB,OAAe,EACf,SAA2B;IAE3B,MAAM,CAAC,WAAW,EAAE,WAAW,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACnD,SAAS,CAAC,IAAI,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,CAAC,EAAE;YAC/E,WAAW,EAAE,CAAC;YACd,cAAc,EAAE,MAAM;SACvB,CAAC;QACF,SAAS,CAAC,IAAI,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,CAAC,OAAO,EAAE,QAAQ,CAAC,EAAE,CAAC,EAAE;YAC/E,WAAW,EAAE,CAAC;YACd,cAAc,EAAE,MAAM;SACvB,CAAC;KACH,CAAC,CAAC;IACH,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,WAAW,CAAC,EAAE,OAAO,EAAE,kBAAkB,CAAC,WAAW,CAAC,EAAE,CAAC;AAChG,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,QAAgB,EAChB,OAAe,EACf,SAA2B;IAE3B,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC,EAAE,OAAO,EAAE,EAAE,CAAC;QAC7C,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,MAAM,YAAY,CAAC,QAAQ,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;QAC9E,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACzC,IAAI,OAAO,KAAK,CAAC;gBAAE,SAAS;YAC5B,OAAO;gBACL,OAAO,EAAE,cAAc;gBACvB,OAAO,EAAE,qDAAqD;aAC/D,CAAC;QACJ,CAAC;QACD,IAAI,OAAO,KAAK,OAAO,EAAE,CAAC;YACxB,OAAO;gBACL,OAAO,EAAE,OAAO,KAAK,YAAY,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW;gBACxD,OAAO,EAAE,qCAAqC,OAAO,EAAE;aACxD,CAAC;QACJ,CAAC;QACD,OAAO;YACL,OAAO,EAAE,cAAc;YACvB,OAAO,EAAE,0CAA0C,OAAO,aAAa,OAAO,GAAG;SAClF,CAAC;IACJ,CAAC;IACD,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,OAAO,EAAE,6BAA6B,EAAE,CAAC;AAC7E,CAAC"}
@@ -0,0 +1,6 @@
1
+ import type { InferenceAdapter, ComparisonResult } from '../../types.js';
2
+ export interface PipelineOptions {
3
+ threshold: number;
4
+ skipEmbedding?: boolean;
5
+ }
6
+ export declare function comparePipeline(baseline: string, current: string, inference: InferenceAdapter, options: PipelineOptions): Promise<ComparisonResult>;
@@ -0,0 +1,31 @@
1
+ import { schemaCheck } from './schema.js';
2
+ import { embeddingCheck } from './embedding.js';
3
+ import { llmJudge } from './judge.js';
4
+ export async function comparePipeline(baseline, current, inference, options) {
5
+ // Tier 1: Schema check (FREE)
6
+ if (schemaCheck(baseline, current)) {
7
+ return { scenarioId: 0, verdict: 'pass', tier: 1, details: 'Schema match' };
8
+ }
9
+ // Tier 2: Embedding similarity (CHEAP) — skip if unavailable
10
+ if (!options.skipEmbedding) {
11
+ try {
12
+ const embResult = await embeddingCheck(baseline, current, inference, options.threshold);
13
+ if (embResult.pass) {
14
+ return {
15
+ scenarioId: 0,
16
+ verdict: 'pass',
17
+ tier: 2,
18
+ similarity: embResult.similarity,
19
+ details: `Embedding similarity: ${embResult.similarity.toFixed(4)}`,
20
+ };
21
+ }
22
+ }
23
+ catch {
24
+ // Embedding not available — fall through to Tier 3
25
+ }
26
+ }
27
+ // Tier 3: LLM Judge (EXPENSIVE)
28
+ const judgeResult = await llmJudge(baseline, current, inference);
29
+ return { scenarioId: 0, verdict: judgeResult.verdict, tier: 3, details: judgeResult.details };
30
+ }
31
+ //# sourceMappingURL=pipeline.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../../../src/engine/comparison/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAOtC,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,QAAgB,EAChB,OAAe,EACf,SAA2B,EAC3B,OAAwB;IAExB,8BAA8B;IAC9B,IAAI,WAAW,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,CAAC;QACnC,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,cAAc,EAAE,CAAC;IAC9E,CAAC;IAED,6DAA6D;IAC7D,IAAI,CAAC,OAAO,CAAC,aAAa,EAAE,CAAC;QAC3B,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,MAAM,cAAc,CAAC,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;YACxF,IAAI,SAAS,CAAC,IAAI,EAAE,CAAC;gBACnB,OAAO;oBACL,UAAU,EAAE,CAAC;oBACb,OAAO,EAAE,MAAM;oBACf,IAAI,EAAE,CAAC;oBACP,UAAU,EAAE,SAAS,CAAC,UAAU;oBAChC,OAAO,EAAE,yBAAyB,SAAS,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;iBACpE,CAAC;YACJ,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,mDAAmD;QACrD,CAAC;IACH,CAAC;IAED,gCAAgC;IAChC,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;IACjE,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,EAAE,WAAW,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,WAAW,CAAC,OAAO,EAAE,CAAC;AAChG,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function extractSchema(text: string): string;
2
+ export declare function schemaCheck(baseline: string, current: string): boolean;
@@ -0,0 +1,28 @@
1
+ export function extractSchema(text) {
2
+ if (!text)
3
+ return '';
4
+ return text
5
+ .split('\n')
6
+ .map((line) => {
7
+ const trimmed = line.trim();
8
+ if (/^#{1,6}\s/.test(trimmed))
9
+ return trimmed.replace(/^(#{1,6}\s).*/, '$1[heading]');
10
+ if (/^[-*+]\s/.test(trimmed))
11
+ return '- [item]';
12
+ if (/^\d+\.\s/.test(trimmed))
13
+ return '1. [item]';
14
+ if (/^```/.test(trimmed))
15
+ return '```';
16
+ if (trimmed === '')
17
+ return '';
18
+ return '[content]';
19
+ })
20
+ .join('\n')
21
+ .replace(/(\[content\]\n)+\[content\]/g, '[content]')
22
+ .replace(/(\[content\]\n)+/g, '[content]\n')
23
+ .trim();
24
+ }
25
+ export function schemaCheck(baseline, current) {
26
+ return extractSchema(baseline) === extractSchema(current);
27
+ }
28
+ //# sourceMappingURL=schema.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema.js","sourceRoot":"","sources":["../../../../src/engine/comparison/schema.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,OAAO,IAAI;SACR,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACZ,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC;YAAE,OAAO,OAAO,CAAC,OAAO,CAAC,eAAe,EAAE,aAAa,CAAC,CAAC;QACtF,IAAI,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC;YAAE,OAAO,UAAU,CAAC;QAChD,IAAI,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC;YAAE,OAAO,WAAW,CAAC;QACjD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC;YAAE,OAAO,KAAK,CAAC;QACvC,IAAI,OAAO,KAAK,EAAE;YAAE,OAAO,EAAE,CAAC;QAC9B,OAAO,WAAW,CAAC;IACrB,CAAC,CAAC;SACD,IAAI,CAAC,IAAI,CAAC;SACV,OAAO,CAAC,8BAA8B,EAAE,WAAW,CAAC;SACpD,OAAO,CAAC,mBAAmB,EAAE,aAAa,CAAC;SAC3C,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,QAAgB,EAAE,OAAe;IAC3D,OAAO,aAAa,CAAC,QAAQ,CAAC,KAAK,aAAa,CAAC,OAAO,CAAC,CAAC;AAC5D,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { VarianceEnvelope, VarianceEnvelopeRun } from '../../types.js';
2
+ export declare function computeEnvelope(scenarioId: number, runs: VarianceEnvelopeRun[]): VarianceEnvelope;
3
+ export declare function isWithinEnvelope(embedding: number[], envelope: VarianceEnvelope, threshold: number): boolean;
@@ -0,0 +1,26 @@
1
+ import { cosineSimilarity } from './embedding.js';
2
+ export function computeEnvelope(scenarioId, runs) {
3
+ const dims = runs[0].embedding.length;
4
+ const centroid = new Array(dims).fill(0);
5
+ for (const run of runs) {
6
+ for (let i = 0; i < dims; i++) {
7
+ centroid[i] += run.embedding[i];
8
+ }
9
+ }
10
+ for (let i = 0; i < dims; i++) {
11
+ centroid[i] /= runs.length;
12
+ }
13
+ let maxDistance = 0;
14
+ for (const run of runs) {
15
+ const sim = cosineSimilarity(run.embedding, centroid);
16
+ const distance = 1 - sim;
17
+ if (distance > maxDistance)
18
+ maxDistance = distance;
19
+ }
20
+ return { scenario_id: scenarioId, runs, centroid, radius: maxDistance };
21
+ }
22
+ export function isWithinEnvelope(embedding, envelope, threshold) {
23
+ const sim = cosineSimilarity(embedding, envelope.centroid);
24
+ return sim >= threshold - envelope.radius;
25
+ }
26
+ //# sourceMappingURL=variance.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"variance.js","sourceRoot":"","sources":["../../../../src/engine/comparison/variance.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAElD,MAAM,UAAU,eAAe,CAAC,UAAkB,EAAE,IAA2B;IAC7E,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC;IACtC,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9B,QAAQ,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9B,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC;IAC7B,CAAC;IACD,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,gBAAgB,CAAC,GAAG,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACtD,MAAM,QAAQ,GAAG,CAAC,GAAG,GAAG,CAAC;QACzB,IAAI,QAAQ,GAAG,WAAW;YAAE,WAAW,GAAG,QAAQ,CAAC;IACrD,CAAC;IACD,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;AAC1E,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,SAAmB,EACnB,QAA0B,EAC1B,SAAiB;IAEjB,MAAM,GAAG,GAAG,gBAAgB,CAAC,SAAS,EAAE,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC3D,OAAO,GAAG,IAAI,SAAS,GAAG,QAAQ,CAAC,MAAM,CAAC;AAC5C,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { InferenceAdapter, EvalsFile } from '../types.js';
2
+ export declare function buildGeneratorPrompt(skillContent: string): string;
3
+ export declare function generateEvals(skillContent: string, skillName: string, inference: InferenceAdapter): Promise<EvalsFile>;
@@ -0,0 +1,52 @@
1
+ export function buildGeneratorPrompt(skillContent) {
2
+ return `You are a test case generator for AI skills. Read the following skill definition and generate 5-8 realistic test scenarios.
3
+
4
+ SKILL DEFINITION:
5
+ ---
6
+ ${skillContent}
7
+ ---
8
+
9
+ Generate test scenarios as JSON with this exact format:
10
+ {
11
+ "skill_name": "<name from skill>",
12
+ "evals": [
13
+ {
14
+ "id": 1,
15
+ "prompt": "<realistic user prompt that would trigger this skill>",
16
+ "expected_output": "<human-readable description of expected behavior>",
17
+ "assertions": ["<verifiable statement about the output>"]
18
+ }
19
+ ]
20
+ }
21
+
22
+ Requirements:
23
+ - Include happy path scenarios (normal use cases)
24
+ - Include edge cases (empty input, malformed input, boundary conditions)
25
+ - Include at least one negative test (input the skill should handle gracefully)
26
+ - Prompts should be realistic — the way a real user would type them
27
+ - Each assertion should be specific and verifiable
28
+ - Return ONLY the JSON, no markdown wrapping`;
29
+ }
30
+ function extractJSON(text) {
31
+ const match = text.match(/```(?:json)?\s*([\s\S]*?)```/);
32
+ if (match)
33
+ return match[1].trim();
34
+ return text.trim();
35
+ }
36
+ export async function generateEvals(skillContent, skillName, inference) {
37
+ const prompt = buildGeneratorPrompt(skillContent);
38
+ const response = await inference.chat([{ role: 'user', content: prompt }], { temperature: 0.7, responseFormat: 'json' });
39
+ const parsed = JSON.parse(extractJSON(response));
40
+ return {
41
+ skill_name: parsed.skill_name || skillName,
42
+ generated_by: 'snapeval v1.0.0',
43
+ evals: parsed.evals.map((e, i) => ({
44
+ id: e.id || i + 1,
45
+ prompt: e.prompt,
46
+ expected_output: e.expected_output || '',
47
+ files: e.files || [],
48
+ assertions: e.assertions || [],
49
+ })),
50
+ };
51
+ }
52
+ //# sourceMappingURL=generator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generator.js","sourceRoot":"","sources":["../../../src/engine/generator.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,oBAAoB,CAAC,YAAoB;IACvD,OAAO;;;;EAIP,YAAY;;;;;;;;;;;;;;;;;;;;;;6CAsB+B,CAAC;AAC9C,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;IACzD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAClC,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;AACrB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,YAAoB,EACpB,SAAiB,EACjB,SAA2B;IAE3B,MAAM,MAAM,GAAG,oBAAoB,CAAC,YAAY,CAAC,CAAC;IAClD,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,IAAI,CACnC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,EACnC,EAAE,WAAW,EAAE,GAAG,EAAE,cAAc,EAAE,MAAM,EAAE,CAC7C,CAAC;IACF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAC;IACjD,OAAO;QACL,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,SAAS;QAC1C,YAAY,EAAE,iBAAiB;QAC/B,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,CAAS,EAAE,EAAE,CAAC,CAAC;YAC9C,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC;YACjB,MAAM,EAAE,CAAC,CAAC,MAAM;YAChB,eAAe,EAAE,CAAC,CAAC,eAAe,IAAI,EAAE;YACxC,KAAK,EAAE,CAAC,CAAC,KAAK,IAAI,EAAE;YACpB,UAAU,EAAE,CAAC,CAAC,UAAU,IAAI,EAAE;SAC/B,CAAC,CAAC;KACJ,CAAC;AACJ,CAAC"}
@@ -0,0 +1,11 @@
1
+ import type { SkillOutput, Snapshot } from '../types.js';
2
+ export declare class SnapshotManager {
3
+ private evalsDir;
4
+ private snapshotsDir;
5
+ constructor(evalsDir: string);
6
+ private snapshotPath;
7
+ saveSnapshot(scenarioId: number, prompt: string, output: SkillOutput, runs?: number): void;
8
+ loadSnapshot(scenarioId: number): Snapshot | null;
9
+ approve(scenarioId: number, prompt: string, newOutput: SkillOutput): void;
10
+ listSnapshotIds(): number[];
11
+ }
@@ -0,0 +1,46 @@
1
+ import * as fs from 'node:fs';
2
+ import * as path from 'node:path';
3
+ import * as crypto from 'node:crypto';
4
+ export class SnapshotManager {
5
+ evalsDir;
6
+ snapshotsDir;
7
+ constructor(evalsDir) {
8
+ this.evalsDir = evalsDir;
9
+ this.snapshotsDir = path.join(evalsDir, 'snapshots');
10
+ }
11
+ snapshotPath(scenarioId) {
12
+ return path.join(this.snapshotsDir, `scenario-${scenarioId}.snap.json`);
13
+ }
14
+ saveSnapshot(scenarioId, prompt, output, runs = 1) {
15
+ const snapshot = {
16
+ scenario_id: scenarioId, prompt, output,
17
+ captured_at: new Date().toISOString(), runs, approved_by: null,
18
+ };
19
+ fs.mkdirSync(this.snapshotsDir, { recursive: true });
20
+ fs.writeFileSync(this.snapshotPath(scenarioId), JSON.stringify(snapshot, null, 2));
21
+ }
22
+ loadSnapshot(scenarioId) {
23
+ const p = this.snapshotPath(scenarioId);
24
+ if (!fs.existsSync(p))
25
+ return null;
26
+ return JSON.parse(fs.readFileSync(p, 'utf-8'));
27
+ }
28
+ approve(scenarioId, prompt, newOutput) {
29
+ const old = this.loadSnapshot(scenarioId);
30
+ const previousHash = old ? crypto.createHash('sha256').update(old.output.raw).digest('hex').slice(0, 8) : 'none';
31
+ const newHash = crypto.createHash('sha256').update(newOutput.raw).digest('hex').slice(0, 8);
32
+ this.saveSnapshot(scenarioId, prompt, newOutput);
33
+ const auditEntry = { scenario_id: scenarioId, approved_at: new Date().toISOString(), previous_hash: previousHash, new_hash: newHash };
34
+ const auditPath = path.join(this.snapshotsDir, '.audit-log.jsonl');
35
+ fs.appendFileSync(auditPath, JSON.stringify(auditEntry) + '\n');
36
+ }
37
+ listSnapshotIds() {
38
+ if (!fs.existsSync(this.snapshotsDir))
39
+ return [];
40
+ return fs.readdirSync(this.snapshotsDir)
41
+ .filter((f) => f.match(/^scenario-\d+\.snap\.json$/))
42
+ .map((f) => parseInt(f.match(/scenario-(\d+)/)[1]))
43
+ .sort((a, b) => a - b);
44
+ }
45
+ }
46
+ //# sourceMappingURL=snapshot.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"snapshot.js","sourceRoot":"","sources":["../../../src/engine/snapshot.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,KAAK,MAAM,MAAM,aAAa,CAAC;AAGtC,MAAM,OAAO,eAAe;IAEN;IADZ,YAAY,CAAS;IAC7B,YAAoB,QAAgB;QAAhB,aAAQ,GAAR,QAAQ,CAAQ;QAClC,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;IACvD,CAAC;IAEO,YAAY,CAAC,UAAkB;QACrC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,YAAY,UAAU,YAAY,CAAC,CAAC;IAC1E,CAAC;IAED,YAAY,CAAC,UAAkB,EAAE,MAAc,EAAE,MAAmB,EAAE,OAAe,CAAC;QACpF,MAAM,QAAQ,GAAa;YACzB,WAAW,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM;YACvC,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI;SAC/D,CAAC;QACF,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,YAAY,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACrD,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACrF,CAAC;IAED,YAAY,CAAC,UAAkB;QAC7B,MAAM,CAAC,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;QACxC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC;YAAE,OAAO,IAAI,CAAC;QACnC,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;IACjD,CAAC;IAED,OAAO,CAAC,UAAkB,EAAE,MAAc,EAAE,SAAsB;QAChE,MAAM,GAAG,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;QAC1C,MAAM,YAAY,GAAG,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QACjH,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5F,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,UAAU,GAAG,EAAE,WAAW,EAAE,UAAU,EAAE,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,aAAa,EAAE,YAAY,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC;QACtI,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,kBAAkB,CAAC,CAAC;QACnE,EAAE,CAAC,cAAc,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC,CAAC;IAClE,CAAC;IAED,eAAe;QACb,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC;YAAE,OAAO,EAAE,CAAC;QACjD,OAAO,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,YAAY,CAAC;aACrC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;aACpD,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,gBAAgB,CAAE,CAAC,CAAC,CAAC,CAAC,CAAC;aACnD,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3B,CAAC;CACF"}
@@ -0,0 +1,16 @@
1
+ export declare class SnapevalError extends Error {
2
+ exitCode: number;
3
+ constructor(message: string, exitCode?: number);
4
+ }
5
+ export declare class AdapterNotAvailableError extends SnapevalError {
6
+ constructor(adapterName: string, installHint: string);
7
+ }
8
+ export declare class RateLimitError extends SnapevalError {
9
+ constructor(adapterName: string);
10
+ }
11
+ export declare class TimeoutError extends SnapevalError {
12
+ constructor(scenarioId: number, timeoutMs: number);
13
+ }
14
+ export declare class NoBaselineError extends SnapevalError {
15
+ constructor(skillPath: string);
16
+ }
@@ -0,0 +1,33 @@
1
+ export class SnapevalError extends Error {
2
+ exitCode;
3
+ constructor(message, exitCode = 2) {
4
+ super(message);
5
+ this.exitCode = exitCode;
6
+ this.name = 'SnapevalError';
7
+ }
8
+ }
9
+ export class AdapterNotAvailableError extends SnapevalError {
10
+ constructor(adapterName, installHint) {
11
+ super(`${adapterName} is not available. ${installHint}`);
12
+ this.name = 'AdapterNotAvailableError';
13
+ }
14
+ }
15
+ export class RateLimitError extends SnapevalError {
16
+ constructor(adapterName) {
17
+ super(`${adapterName} rate limit exceeded. Try again later or use a different adapter.`);
18
+ this.name = 'RateLimitError';
19
+ }
20
+ }
21
+ export class TimeoutError extends SnapevalError {
22
+ constructor(scenarioId, timeoutMs) {
23
+ super(`Scenario ${scenarioId} timed out after ${timeoutMs}ms.`);
24
+ this.name = 'TimeoutError';
25
+ }
26
+ }
27
+ export class NoBaselineError extends SnapevalError {
28
+ constructor(skillPath) {
29
+ super(`No baselines found at ${skillPath}/evals/snapshots/. Run \`snapeval capture\` first.`, 2);
30
+ this.name = 'NoBaselineError';
31
+ }
32
+ }
33
+ //# sourceMappingURL=errors.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"errors.js","sourceRoot":"","sources":["../../src/errors.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,aAAc,SAAQ,KAAK;IACF;IAApC,YAAY,OAAe,EAAS,WAAmB,CAAC;QACtD,KAAK,CAAC,OAAO,CAAC,CAAC;QADmB,aAAQ,GAAR,QAAQ,CAAY;QAEtD,IAAI,CAAC,IAAI,GAAG,eAAe,CAAC;IAC9B,CAAC;CACF;AAED,MAAM,OAAO,wBAAyB,SAAQ,aAAa;IACzD,YAAY,WAAmB,EAAE,WAAmB;QAClD,KAAK,CAAC,GAAG,WAAW,sBAAsB,WAAW,EAAE,CAAC,CAAC;QACzD,IAAI,CAAC,IAAI,GAAG,0BAA0B,CAAC;IACzC,CAAC;CACF;AAED,MAAM,OAAO,cAAe,SAAQ,aAAa;IAC/C,YAAY,WAAmB;QAC7B,KAAK,CAAC,GAAG,WAAW,mEAAmE,CAAC,CAAC;QACzF,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,aAAa;IAC7C,YAAY,UAAkB,EAAE,SAAiB;QAC/C,KAAK,CAAC,YAAY,UAAU,oBAAoB,SAAS,KAAK,CAAC,CAAC;QAChE,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,eAAgB,SAAQ,aAAa;IAChD,YAAY,SAAiB;QAC3B,KAAK,CAAC,yBAAyB,SAAS,oDAAoD,EAAE,CAAC,CAAC,CAAC;QACjG,IAAI,CAAC,IAAI,GAAG,iBAAiB,CAAC;IAChC,CAAC;CACF"}
@@ -0,0 +1,125 @@
1
+ export interface SkillOutput {
2
+ raw: string;
3
+ metadata: {
4
+ tokens: number;
5
+ durationMs: number;
6
+ model: string;
7
+ adapter: string;
8
+ };
9
+ }
10
+ export interface SkillAdapter {
11
+ name: string;
12
+ invoke(skillPath: string, prompt: string, files?: string[]): Promise<SkillOutput>;
13
+ isAvailable(): Promise<boolean>;
14
+ }
15
+ export interface Message {
16
+ role: 'system' | 'user' | 'assistant';
17
+ content: string;
18
+ }
19
+ export interface ChatOptions {
20
+ temperature?: number;
21
+ maxTokens?: number;
22
+ responseFormat?: 'text' | 'json';
23
+ }
24
+ export interface InferenceAdapter {
25
+ name: string;
26
+ chat(messages: Message[], options?: ChatOptions): Promise<string>;
27
+ embed(text: string): Promise<number[]>;
28
+ estimateCost(tokens: number): number;
29
+ }
30
+ export interface EvalResults {
31
+ skillName: string;
32
+ scenarios: ScenarioResult[];
33
+ summary: BenchmarkSummary;
34
+ timing: TimingData;
35
+ }
36
+ export interface ReportAdapter {
37
+ name: string;
38
+ report(results: EvalResults): Promise<void>;
39
+ }
40
+ export interface EvalCase {
41
+ id: number;
42
+ prompt: string;
43
+ expected_output: string;
44
+ files?: string[];
45
+ assertions?: string[];
46
+ }
47
+ export interface EvalsFile {
48
+ skill_name: string;
49
+ generated_by: string;
50
+ evals: EvalCase[];
51
+ }
52
+ export interface Snapshot {
53
+ scenario_id: number;
54
+ prompt: string;
55
+ output: SkillOutput;
56
+ captured_at: string;
57
+ runs: number;
58
+ approved_by: string | null;
59
+ }
60
+ export interface VarianceEnvelopeRun {
61
+ raw: string;
62
+ embedding: number[];
63
+ }
64
+ export interface VarianceEnvelope {
65
+ scenario_id: number;
66
+ runs: VarianceEnvelopeRun[];
67
+ centroid: number[];
68
+ radius: number;
69
+ }
70
+ export type ComparisonVerdict = 'pass' | 'regressed' | 'inconclusive' | 'error';
71
+ export interface ComparisonResult {
72
+ scenarioId: number;
73
+ verdict: ComparisonVerdict;
74
+ tier: 1 | 2 | 3;
75
+ similarity?: number;
76
+ details: string;
77
+ }
78
+ export interface AssertionResult {
79
+ text: string;
80
+ passed: boolean;
81
+ evidence: string;
82
+ }
83
+ export interface GradingSummary {
84
+ passed: number;
85
+ failed: number;
86
+ total: number;
87
+ pass_rate: number;
88
+ }
89
+ export interface GradingFile {
90
+ assertion_results: AssertionResult[];
91
+ summary: GradingSummary;
92
+ }
93
+ export interface TimingData {
94
+ total_tokens: number;
95
+ duration_ms: number;
96
+ }
97
+ export interface BenchmarkSummary {
98
+ total_scenarios: number;
99
+ passed: number;
100
+ regressed: number;
101
+ pass_rate: number;
102
+ total_tokens: number;
103
+ total_cost_usd: number;
104
+ total_duration_ms: number;
105
+ tier_breakdown: {
106
+ tier1_schema: number;
107
+ tier2_embedding: number;
108
+ tier3_llm_judge: number;
109
+ };
110
+ }
111
+ export interface ScenarioResult {
112
+ scenarioId: number;
113
+ prompt: string;
114
+ comparison: ComparisonResult;
115
+ grading?: GradingFile;
116
+ timing: TimingData;
117
+ newOutput: SkillOutput;
118
+ }
119
+ export interface SnapevalConfig {
120
+ adapter: string;
121
+ inference: string;
122
+ threshold: number;
123
+ runs: number;
124
+ budget: string;
125
+ }
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/types.ts"],"names":[],"mappings":""}
package/package.json ADDED
@@ -0,0 +1,53 @@
1
+ {
2
+ "name": "snapeval",
3
+ "version": "1.0.1",
4
+ "description": "Semantic snapshot testing for AI skills. Zero assertions. AI-driven. Free inference.",
5
+ "type": "module",
6
+ "bin": {
7
+ "snapeval": "./bin/snapeval.ts"
8
+ },
9
+ "scripts": {
10
+ "test": "vitest run",
11
+ "test:watch": "vitest",
12
+ "build": "tsc",
13
+ "dev": "tsx bin/snapeval.ts",
14
+ "prepublishOnly": "npm run build && npm test"
15
+ },
16
+ "keywords": [
17
+ "ai-skills",
18
+ "snapshot-testing",
19
+ "copilot-cli",
20
+ "agentskills",
21
+ "evaluation",
22
+ "semantic-testing"
23
+ ],
24
+ "author": "Matan Tsach",
25
+ "license": "MIT",
26
+ "repository": {
27
+ "type": "git",
28
+ "url": "https://github.com/matantsach/snapeval.git"
29
+ },
30
+ "homepage": "https://github.com/matantsach/snapeval",
31
+ "bugs": {
32
+ "url": "https://github.com/matantsach/snapeval/issues"
33
+ },
34
+ "files": [
35
+ "dist/src/",
36
+ "dist/bin/",
37
+ "bin/",
38
+ "src/",
39
+ "plugin.json",
40
+ "skills/snapeval/SKILL.md",
41
+ "scripts/"
42
+ ],
43
+ "dependencies": {
44
+ "chalk": "^5.4.1",
45
+ "commander": "^13.1.0"
46
+ },
47
+ "devDependencies": {
48
+ "@types/node": "^22.13.10",
49
+ "tsx": "^4.19.3",
50
+ "typescript": "^5.8.2",
51
+ "vitest": "^3.0.8"
52
+ }
53
+ }