@evalgate/sdk 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +638 -0
- package/README.md +398 -0
- package/dist/assertions.d.ts +189 -0
- package/dist/assertions.js +662 -0
- package/dist/batch.d.ts +68 -0
- package/dist/batch.js +179 -0
- package/dist/cache.d.ts +65 -0
- package/dist/cache.js +131 -0
- package/dist/cli/api.d.ts +108 -0
- package/dist/cli/api.js +132 -0
- package/dist/cli/baseline.d.ts +10 -0
- package/dist/cli/baseline.js +172 -0
- package/dist/cli/check.d.ts +73 -0
- package/dist/cli/check.js +355 -0
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +112 -0
- package/dist/cli/ci.d.ts +45 -0
- package/dist/cli/ci.js +192 -0
- package/dist/cli/config.d.ts +30 -0
- package/dist/cli/config.js +230 -0
- package/dist/cli/constants.d.ts +15 -0
- package/dist/cli/constants.js +18 -0
- package/dist/cli/diff.d.ts +173 -0
- package/dist/cli/diff.js +685 -0
- package/dist/cli/discover.d.ts +84 -0
- package/dist/cli/discover.js +419 -0
- package/dist/cli/doctor.d.ts +88 -0
- package/dist/cli/doctor.js +675 -0
- package/dist/cli/env.d.ts +21 -0
- package/dist/cli/env.js +42 -0
- package/dist/cli/explain.d.ts +58 -0
- package/dist/cli/explain.js +561 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +135 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +110 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/pr-comment.d.ts +12 -0
- package/dist/cli/formatters/pr-comment.js +103 -0
- package/dist/cli/formatters/types.d.ts +103 -0
- package/dist/cli/formatters/types.js +8 -0
- package/dist/cli/gate.d.ts +21 -0
- package/dist/cli/gate.js +179 -0
- package/dist/cli/impact-analysis.d.ts +63 -0
- package/dist/cli/impact-analysis.js +252 -0
- package/dist/cli/index.d.ts +9 -0
- package/dist/cli/index.js +332 -0
- package/dist/cli/init.d.ts +16 -0
- package/dist/cli/init.js +292 -0
- package/dist/cli/manifest.d.ts +103 -0
- package/dist/cli/manifest.js +282 -0
- package/dist/cli/migrate.d.ts +41 -0
- package/dist/cli/migrate.js +349 -0
- package/dist/cli/policy-packs.d.ts +23 -0
- package/dist/cli/policy-packs.js +89 -0
- package/dist/cli/print-config.d.ts +29 -0
- package/dist/cli/print-config.js +270 -0
- package/dist/cli/profiles.d.ts +28 -0
- package/dist/cli/profiles.js +30 -0
- package/dist/cli/reason-codes.d.ts +17 -0
- package/dist/cli/reason-codes.js +19 -0
- package/dist/cli/regression-gate.d.ts +15 -0
- package/dist/cli/regression-gate.js +341 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +19 -0
- package/dist/cli/report/build-check-report.js +132 -0
- package/dist/cli/run.d.ts +101 -0
- package/dist/cli/run.js +395 -0
- package/dist/cli/share.d.ts +17 -0
- package/dist/cli/share.js +91 -0
- package/dist/cli/upgrade.d.ts +15 -0
- package/dist/cli/upgrade.js +492 -0
- package/dist/cli/workspace.d.ts +31 -0
- package/dist/cli/workspace.js +68 -0
- package/dist/client.d.ts +368 -0
- package/dist/client.js +893 -0
- package/dist/client.request.test.d.ts +1 -0
- package/dist/client.request.test.js +232 -0
- package/dist/context.d.ts +134 -0
- package/dist/context.js +215 -0
- package/dist/errors.d.ts +82 -0
- package/dist/errors.js +298 -0
- package/dist/export.d.ts +195 -0
- package/dist/export.js +344 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.js +153 -0
- package/dist/integrations/anthropic.d.ts +91 -0
- package/dist/integrations/anthropic.js +163 -0
- package/dist/integrations/openai-eval.d.ts +57 -0
- package/dist/integrations/openai-eval.js +232 -0
- package/dist/integrations/openai.d.ts +92 -0
- package/dist/integrations/openai.js +160 -0
- package/dist/local.d.ts +39 -0
- package/dist/local.js +148 -0
- package/dist/logger.d.ts +128 -0
- package/dist/logger.js +227 -0
- package/dist/matchers/index.d.ts +1 -0
- package/dist/matchers/index.js +6 -0
- package/dist/matchers/to-pass-gate.d.ts +29 -0
- package/dist/matchers/to-pass-gate.js +35 -0
- package/dist/pagination.d.ts +74 -0
- package/dist/pagination.js +139 -0
- package/dist/regression.d.ts +100 -0
- package/dist/regression.js +44 -0
- package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
- package/dist/runtime/adapters/config-to-dsl.js +400 -0
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
- package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
- package/dist/runtime/context.d.ts +26 -0
- package/dist/runtime/context.js +74 -0
- package/dist/runtime/eval.d.ts +46 -0
- package/dist/runtime/eval.js +244 -0
- package/dist/runtime/execution-mode.d.ts +80 -0
- package/dist/runtime/execution-mode.js +357 -0
- package/dist/runtime/executor.d.ts +16 -0
- package/dist/runtime/executor.js +152 -0
- package/dist/runtime/registry.d.ts +78 -0
- package/dist/runtime/registry.js +403 -0
- package/dist/runtime/run-report.d.ts +200 -0
- package/dist/runtime/run-report.js +222 -0
- package/dist/runtime/types.d.ts +356 -0
- package/dist/runtime/types.js +76 -0
- package/dist/snapshot.d.ts +176 -0
- package/dist/snapshot.js +322 -0
- package/dist/streaming.d.ts +173 -0
- package/dist/streaming.js +268 -0
- package/dist/testing.d.ts +273 -0
- package/dist/testing.js +317 -0
- package/dist/types.d.ts +754 -0
- package/dist/types.js +54 -0
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +41 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.js +10 -0
- package/dist/workflows.d.ts +389 -0
- package/dist/workflows.js +671 -0
- package/package.json +117 -0
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TICKET 5 — Behavioral Diff CLI (EVAL-401)
|
|
3
|
+
*
|
|
4
|
+
* Goal: "Git diff for AI behavior" from two RunReports
|
|
5
|
+
*
|
|
6
|
+
* Command:
|
|
7
|
+
* evalgate diff --base main (default uses git to find baseline run)
|
|
8
|
+
* evalgate diff --a <runReportPath> --b <runReportPath>
|
|
9
|
+
* evalgate diff main..feature (nice-to-have alias)
|
|
10
|
+
*/
|
|
11
|
+
import type { RunResult } from "./run";
|
|
12
|
+
/**
|
|
13
|
+
* Diff schema version
|
|
14
|
+
*/
|
|
15
|
+
export declare const DIFF_SCHEMA_VERSION = 1;
|
|
16
|
+
/**
|
|
17
|
+
* Supported RunReport schema versions
|
|
18
|
+
*/
|
|
19
|
+
export declare const SUPPORTED_SCHEMA_VERSIONS: readonly [1];
|
|
20
|
+
/**
|
|
21
|
+
* Rounding helpers for floating point normalization
|
|
22
|
+
*/
|
|
23
|
+
export declare function round(value: number, precision?: number): number;
|
|
24
|
+
export declare function roundPct(value: number, precision?: number): number;
|
|
25
|
+
/**
|
|
26
|
+
* Validate RunReport schema version
|
|
27
|
+
*/
|
|
28
|
+
export declare function validateSchemaVersion(report: RunResult): void;
|
|
29
|
+
/**
|
|
30
|
+
* Diff result classification
|
|
31
|
+
*/
|
|
32
|
+
export type DiffClassification = "new_failure" | "fixed_failure" | "score_drop" | "score_improve" | "execution_error" | "skipped_change" | "added" | "removed";
|
|
33
|
+
/**
|
|
34
|
+
* Individual spec diff
|
|
35
|
+
*/
|
|
36
|
+
export interface SpecDiff {
|
|
37
|
+
/** Spec identifier */
|
|
38
|
+
specId: string;
|
|
39
|
+
/** Spec name */
|
|
40
|
+
name: string;
|
|
41
|
+
/** File path */
|
|
42
|
+
filePath: string;
|
|
43
|
+
/** Classification of change */
|
|
44
|
+
classification: DiffClassification;
|
|
45
|
+
/** Base run result (if exists) */
|
|
46
|
+
base?: {
|
|
47
|
+
status: "passed" | "failed" | "skipped";
|
|
48
|
+
score?: number;
|
|
49
|
+
duration: number;
|
|
50
|
+
error?: string;
|
|
51
|
+
};
|
|
52
|
+
/** Head run result (if exists) */
|
|
53
|
+
head?: {
|
|
54
|
+
status: "passed" | "failed" | "skipped";
|
|
55
|
+
score?: number;
|
|
56
|
+
duration: number;
|
|
57
|
+
error?: string;
|
|
58
|
+
};
|
|
59
|
+
/** Calculated deltas */
|
|
60
|
+
deltas: {
|
|
61
|
+
scoreDelta?: number;
|
|
62
|
+
durationDelta?: number;
|
|
63
|
+
statusChange?: string;
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Diff summary statistics
|
|
68
|
+
*/
|
|
69
|
+
export interface DiffSummary {
|
|
70
|
+
/** Total specs in base */
|
|
71
|
+
baseTotal: number;
|
|
72
|
+
/** Total specs in head */
|
|
73
|
+
headTotal: number;
|
|
74
|
+
/** Pass rate delta */
|
|
75
|
+
passRateDelta: number;
|
|
76
|
+
/** Score delta (average) */
|
|
77
|
+
scoreDelta: number;
|
|
78
|
+
/** Number of regressions */
|
|
79
|
+
regressions: number;
|
|
80
|
+
/** Number of improvements */
|
|
81
|
+
improvements: number;
|
|
82
|
+
/** Number of added specs */
|
|
83
|
+
added: number;
|
|
84
|
+
/** Number of removed specs */
|
|
85
|
+
removed: number;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Complete diff result
|
|
89
|
+
*/
|
|
90
|
+
export interface DiffResult {
|
|
91
|
+
/** Schema version */
|
|
92
|
+
schemaVersion: number;
|
|
93
|
+
/** Base run report */
|
|
94
|
+
base: RunResult;
|
|
95
|
+
/** Head run report */
|
|
96
|
+
head: RunResult;
|
|
97
|
+
/** Diff summary */
|
|
98
|
+
summary: DiffSummary;
|
|
99
|
+
/** Individual spec diffs */
|
|
100
|
+
changedSpecs: SpecDiff[];
|
|
101
|
+
/** Diff metadata */
|
|
102
|
+
metadata: {
|
|
103
|
+
generatedAt: number;
|
|
104
|
+
baseSource: string;
|
|
105
|
+
headSource: string;
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Diff options
|
|
110
|
+
*/
|
|
111
|
+
export interface DiffOptions {
|
|
112
|
+
/** Base report path or branch */
|
|
113
|
+
base?: string;
|
|
114
|
+
/** Head report path */
|
|
115
|
+
head?: string;
|
|
116
|
+
/** Output format */
|
|
117
|
+
format?: "human" | "json";
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Run diff comparison
|
|
121
|
+
*/
|
|
122
|
+
export declare function runDiff(options: DiffOptions): Promise<DiffResult>;
|
|
123
|
+
/**
|
|
124
|
+
* Compare two run reports
|
|
125
|
+
*/
|
|
126
|
+
export declare function compareReports(base: RunResult, head: RunResult): DiffResult;
|
|
127
|
+
/**
|
|
128
|
+
* Classify the type of change
|
|
129
|
+
*/
|
|
130
|
+
declare function classifyDiff(base?: RunResult["results"][0], head?: RunResult["results"][0]): DiffClassification;
|
|
131
|
+
/**
|
|
132
|
+
* Calculate deltas between base and head
|
|
133
|
+
*/
|
|
134
|
+
declare function calculateDeltas(base?: RunResult["results"][0], head?: RunResult["results"][0]): SpecDiff["deltas"];
|
|
135
|
+
/**
|
|
136
|
+
* Calculate diff summary statistics
|
|
137
|
+
*/
|
|
138
|
+
export declare function calculateDiffSummary(base: RunResult, head: RunResult, changedSpecs: SpecDiff[]): DiffSummary;
|
|
139
|
+
/**
|
|
140
|
+
* Print human-readable diff results
|
|
141
|
+
*/
|
|
142
|
+
export declare function printHumanResults(result: DiffResult): void;
|
|
143
|
+
/**
|
|
144
|
+
* Print JSON results
|
|
145
|
+
*/
|
|
146
|
+
export declare function printJsonResults(result: DiffResult): void;
|
|
147
|
+
/**
|
|
148
|
+
* Write GitHub Step Summary
|
|
149
|
+
*/
|
|
150
|
+
export declare function writeGitHubStepSummary(result: DiffResult): Promise<void>;
|
|
151
|
+
/**
|
|
152
|
+
* CLI entry point
|
|
153
|
+
*/
|
|
154
|
+
export declare function runDiffCLI(options: DiffOptions): Promise<void>;
|
|
155
|
+
export { classifyDiff, calculateDeltas };
|
|
156
|
+
export declare const diffCore: {
|
|
157
|
+
/**
|
|
158
|
+
* Compare two run reports and return diff result
|
|
159
|
+
*/
|
|
160
|
+
readonly diffRunReports: typeof compareReports;
|
|
161
|
+
/**
|
|
162
|
+
* Classify the type of change between two specs
|
|
163
|
+
*/
|
|
164
|
+
readonly classifyChange: typeof classifyDiff;
|
|
165
|
+
/**
|
|
166
|
+
* Calculate summary statistics for a diff
|
|
167
|
+
*/
|
|
168
|
+
readonly summarizeDiff: typeof calculateDiffSummary;
|
|
169
|
+
/**
|
|
170
|
+
* Calculate deltas between two spec results
|
|
171
|
+
*/
|
|
172
|
+
readonly calculateDeltas: typeof calculateDeltas;
|
|
173
|
+
};
|