@pauly4010/evalai-sdk 1.5.8 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,6 +5,34 @@ All notable changes to the @pauly4010/evalai-sdk package will be documented in t
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.6.0] - 2026-02-24
9
+
10
+ ### ✨ Added
11
+
12
+ #### CLI — Regression Gate & Baseline Management
13
+
14
+ - **`evalai baseline init`** — Create a starter `evals/baseline.json` with sample values and provenance metadata
15
+ - **`evalai baseline update`** — Run confidence tests, golden eval, and latency benchmark, then update baseline with real scores
16
+ - **`evalai gate`** — Run the local regression gate with proper exit code taxonomy (0=pass, 1=regression, 2=infra_error, 3=confidence_failed, 4=confidence_missing)
17
+ - **`evalai gate --format json`** — Output `evals/regression-report.json` as machine-readable JSON to stdout
18
+ - **`evalai gate --format github`** — Output GitHub Step Summary markdown with delta table
19
+
20
+ #### SDK Exports — Regression Gate Constants & Types
21
+
22
+ - **`GATE_EXIT`** — Exit code constants (`PASS`, `REGRESSION`, `INFRA_ERROR`, `CONFIDENCE_FAILED`, `CONFIDENCE_MISSING`)
23
+ - **`GATE_CATEGORY`** — Report category constants (`pass`, `regression`, `infra_error`)
24
+ - **`REPORT_SCHEMA_VERSION`** — Current schema version for `regression-report.json`
25
+ - **`ARTIFACTS`** — Well-known artifact paths (`BASELINE`, `REGRESSION_REPORT`, `CONFIDENCE_SUMMARY`, `LATENCY_BENCHMARK`)
26
+ - **Types**: `RegressionReport`, `RegressionDelta`, `Baseline`, `BaselineTolerance`, `GateExitCode`, `GateCategory`
27
+ - **Subpath export**: `@pauly4010/evalai-sdk/regression` for tree-shakeable imports
28
+
29
+ ### 🔧 Changed
30
+
31
+ - CLI help text updated to include `baseline` and `gate` commands
32
+ - SDK becomes the public contract for regression gate — scripts are implementation detail
33
+
34
+ ---
35
+
8
36
  ## [1.5.8] - 2026-02-22
9
37
 
10
38
  ### 🐛 Fixed
@@ -0,0 +1,10 @@
1
+ /**
2
+ * evalai baseline — Baseline management commands
3
+ *
4
+ * Subcommands:
5
+ * evalai baseline init — Create a starter evals/baseline.json
6
+ * evalai baseline update — Run tests + update baseline with real scores
7
+ */
8
+ export declare function runBaselineInit(cwd: string): number;
9
+ export declare function runBaselineUpdate(cwd: string): number;
10
+ export declare function runBaseline(argv: string[]): number;
@@ -0,0 +1,172 @@
1
+ "use strict";
2
+ /**
3
+ * evalai baseline — Baseline management commands
4
+ *
5
+ * Subcommands:
6
+ * evalai baseline init — Create a starter evals/baseline.json
7
+ * evalai baseline update — Run tests + update baseline with real scores
8
+ */
9
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ var desc = Object.getOwnPropertyDescriptor(m, k);
12
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
13
+ desc = { enumerable: true, get: function() { return m[k]; } };
14
+ }
15
+ Object.defineProperty(o, k2, desc);
16
+ }) : (function(o, m, k, k2) {
17
+ if (k2 === undefined) k2 = k;
18
+ o[k2] = m[k];
19
+ }));
20
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
21
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
22
+ }) : function(o, v) {
23
+ o["default"] = v;
24
+ });
25
+ var __importStar = (this && this.__importStar) || (function () {
26
+ var ownKeys = function(o) {
27
+ ownKeys = Object.getOwnPropertyNames || function (o) {
28
+ var ar = [];
29
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
30
+ return ar;
31
+ };
32
+ return ownKeys(o);
33
+ };
34
+ return function (mod) {
35
+ if (mod && mod.__esModule) return mod;
36
+ var result = {};
37
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
38
+ __setModuleDefault(result, mod);
39
+ return result;
40
+ };
41
+ })();
42
+ Object.defineProperty(exports, "__esModule", { value: true });
43
+ exports.runBaselineInit = runBaselineInit;
44
+ exports.runBaselineUpdate = runBaselineUpdate;
45
+ exports.runBaseline = runBaseline;
46
+ const node_child_process_1 = require("node:child_process");
47
+ const fs = __importStar(require("node:fs"));
48
+ const path = __importStar(require("node:path"));
49
+ const BASELINE_REL = "evals/baseline.json";
50
+ /** Detect the package manager used in the project */
51
+ function detectPackageManager(cwd) {
52
+ if (fs.existsSync(path.join(cwd, "pnpm-lock.yaml")))
53
+ return "pnpm";
54
+ if (fs.existsSync(path.join(cwd, "yarn.lock")))
55
+ return "yarn";
56
+ return "npm";
57
+ }
58
+ /** Run an npm script via the detected package manager */
59
+ function runScript(cwd, scriptName) {
60
+ const pm = detectPackageManager(cwd);
61
+ const isWin = process.platform === "win32";
62
+ const result = (0, node_child_process_1.spawnSync)(pm, ["run", scriptName], {
63
+ cwd,
64
+ stdio: "inherit",
65
+ shell: isWin,
66
+ });
67
+ return result.status ?? 1;
68
+ }
69
+ function runBaselineInit(cwd) {
70
+ const baselinePath = path.join(cwd, BASELINE_REL);
71
+ if (fs.existsSync(baselinePath)) {
72
+ console.log(`⚠ ${BASELINE_REL} already exists. Delete it first or use 'evalai baseline update'.`);
73
+ return 1;
74
+ }
75
+ // Ensure evals/ directory exists
76
+ const evalsDir = path.join(cwd, "evals");
77
+ if (!fs.existsSync(evalsDir)) {
78
+ fs.mkdirSync(evalsDir, { recursive: true });
79
+ }
80
+ const user = process.env.USER || process.env.USERNAME || "unknown";
81
+ const now = new Date().toISOString();
82
+ const baseline = {
83
+ schemaVersion: 1,
84
+ description: "Regression gate baseline — created by evalai baseline init",
85
+ generatedAt: now,
86
+ generatedBy: user,
87
+ commitSha: "0000000",
88
+ updatedAt: now,
89
+ updatedBy: user,
90
+ tolerance: {
91
+ scoreDrop: 5,
92
+ passRateDrop: 5,
93
+ maxLatencyIncreaseMs: 200,
94
+ maxCostIncreaseUsd: 0.05,
95
+ },
96
+ goldenEval: {
97
+ score: 100,
98
+ passRate: 100,
99
+ totalCases: 3,
100
+ passedCases: 3,
101
+ },
102
+ qualityScore: {
103
+ overall: 90,
104
+ grade: "A",
105
+ accuracy: 85,
106
+ safety: 100,
107
+ latency: 90,
108
+ cost: 90,
109
+ consistency: 90,
110
+ },
111
+ confidenceTests: {
112
+ unitPassed: true,
113
+ unitTotal: 0,
114
+ dbPassed: true,
115
+ dbTotal: 0,
116
+ },
117
+ productMetrics: {},
118
+ };
119
+ fs.writeFileSync(baselinePath, `${JSON.stringify(baseline, null, 2)}\n`);
120
+ console.log(`✅ Created ${BASELINE_REL} with sample values\n`);
121
+ console.log("Next steps:");
122
+ console.log(` 1. Commit ${BASELINE_REL} to your repo`);
123
+ console.log(" 2. Run 'evalai baseline update' to populate with real scores");
124
+ console.log(" 3. Run 'evalai gate' to verify the regression gate\n");
125
+ return 0;
126
+ }
127
+ // ── baseline update ──
128
+ function runBaselineUpdate(cwd) {
129
+ // Check if eval:baseline-update script exists in package.json
130
+ const pkgPath = path.join(cwd, "package.json");
131
+ if (!fs.existsSync(pkgPath)) {
132
+ console.error("❌ No package.json found. Run this from your project root.");
133
+ return 1;
134
+ }
135
+ let pkg;
136
+ try {
137
+ pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
138
+ }
139
+ catch {
140
+ console.error("❌ Failed to parse package.json");
141
+ return 1;
142
+ }
143
+ if (!pkg.scripts?.["eval:baseline-update"]) {
144
+ console.error("❌ Missing 'eval:baseline-update' script in package.json.");
145
+ console.error(" Add it: \"eval:baseline-update\": \"npx tsx scripts/regression-gate.ts --update-baseline\"");
146
+ return 1;
147
+ }
148
+ console.log("📊 Running baseline update...\n");
149
+ return runScript(cwd, "eval:baseline-update");
150
+ }
151
+ // ── baseline router ──
152
+ function runBaseline(argv) {
153
+ const sub = argv[0];
154
+ const cwd = process.cwd();
155
+ if (sub === "init") {
156
+ return runBaselineInit(cwd);
157
+ }
158
+ if (sub === "update") {
159
+ return runBaselineUpdate(cwd);
160
+ }
161
+ console.log(`evalai baseline — Manage regression gate baselines
162
+
163
+ Usage:
164
+ evalai baseline init Create starter ${BASELINE_REL}
165
+ evalai baseline update Run tests and update baseline with real scores
166
+
167
+ Examples:
168
+ evalai baseline init
169
+ evalai baseline update
170
+ `);
171
+ return sub === "--help" || sub === "-h" ? 0 : 1;
172
+ }
package/dist/cli/index.js CHANGED
@@ -8,9 +8,11 @@
8
8
  * evalai check — CI/CD evaluation gate (see evalai check --help)
9
9
  */
10
10
  Object.defineProperty(exports, "__esModule", { value: true });
11
+ const baseline_1 = require("./baseline");
11
12
  const check_1 = require("./check");
12
13
  const doctor_1 = require("./doctor");
13
14
  const init_1 = require("./init");
15
+ const regression_gate_1 = require("./regression-gate");
14
16
  const share_1 = require("./share");
15
17
  const argv = process.argv.slice(2);
16
18
  const subcommand = argv[0];
@@ -19,6 +21,14 @@ if (subcommand === "init") {
19
21
  const ok = (0, init_1.runInit)(cwd);
20
22
  process.exit(ok ? 0 : 1);
21
23
  }
24
+ else if (subcommand === "baseline") {
25
+ const code = (0, baseline_1.runBaseline)(argv.slice(1));
26
+ process.exit(code);
27
+ }
28
+ else if (subcommand === "gate") {
29
+ const code = (0, regression_gate_1.runGate)(argv.slice(1));
30
+ process.exit(code);
31
+ }
22
32
  else if (subcommand === "doctor") {
23
33
  (0, doctor_1.runDoctor)(argv.slice(1))
24
34
  .then((code) => process.exit(code))
@@ -57,10 +67,16 @@ else {
57
67
  console.log(`EvalAI CLI
58
68
 
59
69
  Usage:
60
- evalai init Create evalai.config.json
61
- evalai doctor [options] Verify CI/CD setup (same endpoint as check)
62
- evalai check [options] CI/CD evaluation gate
63
- evalai share [options] Create share link for a run
70
+ evalai init Create evalai.config.json
71
+ evalai baseline init Create starter evals/baseline.json
72
+ evalai baseline update Run tests and update baseline with real scores
73
+ evalai gate [options] Run regression gate (local test-based)
74
+ evalai doctor [options] Verify CI/CD setup (same endpoint as check)
75
+ evalai check [options] CI/CD evaluation gate (API-based)
76
+ evalai share [options] Create share link for a run
77
+
78
+ Options for gate:
79
+ --format <fmt> Output format: human (default), json, github
64
80
 
65
81
  Options for check:
66
82
  --evaluationId <id> Evaluation to gate on (or from config)
@@ -0,0 +1,11 @@
1
+ /**
2
+ * evalai gate — Run the regression gate
3
+ *
4
+ * Delegates to the project's eval:regression-gate npm script.
5
+ * Supports --format json to output the regression-report.json contents.
6
+ */
7
+ export interface GateArgs {
8
+ format: "human" | "json" | "github";
9
+ }
10
+ export declare function parseGateArgs(argv: string[]): GateArgs;
11
+ export declare function runGate(argv: string[]): number;
@@ -0,0 +1,150 @@
1
+ "use strict";
2
+ /**
3
+ * evalai gate — Run the regression gate
4
+ *
5
+ * Delegates to the project's eval:regression-gate npm script.
6
+ * Supports --format json to output the regression-report.json contents.
7
+ */
8
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
9
+ if (k2 === undefined) k2 = k;
10
+ var desc = Object.getOwnPropertyDescriptor(m, k);
11
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
12
+ desc = { enumerable: true, get: function() { return m[k]; } };
13
+ }
14
+ Object.defineProperty(o, k2, desc);
15
+ }) : (function(o, m, k, k2) {
16
+ if (k2 === undefined) k2 = k;
17
+ o[k2] = m[k];
18
+ }));
19
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
20
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
21
+ }) : function(o, v) {
22
+ o["default"] = v;
23
+ });
24
+ var __importStar = (this && this.__importStar) || (function () {
25
+ var ownKeys = function(o) {
26
+ ownKeys = Object.getOwnPropertyNames || function (o) {
27
+ var ar = [];
28
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
29
+ return ar;
30
+ };
31
+ return ownKeys(o);
32
+ };
33
+ return function (mod) {
34
+ if (mod && mod.__esModule) return mod;
35
+ var result = {};
36
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
37
+ __setModuleDefault(result, mod);
38
+ return result;
39
+ };
40
+ })();
41
+ Object.defineProperty(exports, "__esModule", { value: true });
42
+ exports.parseGateArgs = parseGateArgs;
43
+ exports.runGate = runGate;
44
+ const node_child_process_1 = require("node:child_process");
45
+ const fs = __importStar(require("node:fs"));
46
+ const path = __importStar(require("node:path"));
47
+ const REPORT_REL = "evals/regression-report.json";
48
+ /** Detect the package manager used in the project */
49
+ function detectPackageManager(cwd) {
50
+ if (fs.existsSync(path.join(cwd, "pnpm-lock.yaml")))
51
+ return "pnpm";
52
+ if (fs.existsSync(path.join(cwd, "yarn.lock")))
53
+ return "yarn";
54
+ return "npm";
55
+ }
56
+ function parseGateArgs(argv) {
57
+ const args = { format: "human" };
58
+ for (let i = 0; i < argv.length; i++) {
59
+ if (argv[i] === "--format" && argv[i + 1]) {
60
+ const fmt = argv[i + 1];
61
+ if (fmt === "json" || fmt === "github" || fmt === "human") {
62
+ args.format = fmt;
63
+ }
64
+ i++;
65
+ }
66
+ }
67
+ return args;
68
+ }
69
+ function runGate(argv) {
70
+ const cwd = process.cwd();
71
+ const args = parseGateArgs(argv);
72
+ // Check if eval:regression-gate script exists
73
+ const pkgPath = path.join(cwd, "package.json");
74
+ if (!fs.existsSync(pkgPath)) {
75
+ console.error("❌ No package.json found. Run this from your project root.");
76
+ return 1;
77
+ }
78
+ let pkg;
79
+ try {
80
+ pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
81
+ }
82
+ catch {
83
+ console.error("❌ Failed to parse package.json");
84
+ return 1;
85
+ }
86
+ if (!pkg.scripts?.["eval:regression-gate"]) {
87
+ console.error("❌ Missing 'eval:regression-gate' script in package.json.");
88
+ console.error(' Add it: "eval:regression-gate": "npx tsx scripts/regression-gate.ts"');
89
+ return 1;
90
+ }
91
+ const pm = detectPackageManager(cwd);
92
+ const isWin = process.platform === "win32";
93
+ // For json format, suppress human output and print report JSON
94
+ const stdio = args.format === "json" ? "pipe" : "inherit";
95
+ const result = (0, node_child_process_1.spawnSync)(pm, ["run", "eval:regression-gate"], {
96
+ cwd,
97
+ stdio: stdio,
98
+ shell: isWin,
99
+ });
100
+ const exitCode = result.status ?? 1;
101
+ if (args.format === "json") {
102
+ // Output the regression report as JSON
103
+ const reportPath = path.join(cwd, REPORT_REL);
104
+ if (fs.existsSync(reportPath)) {
105
+ const report = fs.readFileSync(reportPath, "utf-8");
106
+ process.stdout.write(report);
107
+ }
108
+ else {
109
+ console.error(JSON.stringify({ error: "regression-report.json not found", exitCode }));
110
+ }
111
+ }
112
+ else if (args.format === "github") {
113
+ // Output GitHub Step Summary markdown
114
+ const reportPath = path.join(cwd, REPORT_REL);
115
+ if (fs.existsSync(reportPath)) {
116
+ try {
117
+ const report = JSON.parse(fs.readFileSync(reportPath, "utf-8"));
118
+ const icon = report.passed ? "✅" : "❌";
119
+ const lines = [
120
+ `## ${icon} Regression Gate: ${report.category}`,
121
+ "",
122
+ "| Metric | Baseline | Current | Delta | Status |",
123
+ "|--------|----------|---------|-------|--------|",
124
+ ];
125
+ for (const d of report.deltas ?? []) {
126
+ const statusIcon = d.status === "pass" ? "✅" : "❌";
127
+ lines.push(`| ${d.metric} | ${d.baseline} | ${d.current} | ${d.delta} | ${statusIcon} |`);
128
+ }
129
+ if (report.failures?.length > 0) {
130
+ lines.push("", "### Failures", "");
131
+ for (const f of report.failures) {
132
+ lines.push(`- ${f}`);
133
+ }
134
+ }
135
+ lines.push("", `Schema version: ${report.schemaVersion ?? "unknown"}`);
136
+ const md = lines.join("\n");
137
+ // Write to $GITHUB_STEP_SUMMARY if available
138
+ const summaryPath = process.env.GITHUB_STEP_SUMMARY;
139
+ if (summaryPath) {
140
+ fs.appendFileSync(summaryPath, `${md}\n`);
141
+ }
142
+ console.log(md);
143
+ }
144
+ catch {
145
+ // Fall through — human output already printed
146
+ }
147
+ }
148
+ }
149
+ return exitCode;
150
+ }
@@ -1 +1 @@
1
- export {};
1
+ export {};
@@ -1,157 +1,157 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- Object.defineProperty(exports, "__esModule", { value: true });
36
- const vitest_1 = require("vitest");
37
- const client_1 = require("./client");
38
- const errorsModule = __importStar(require("./errors"));
39
- vitest_1.vi.mock("./cache", () => {
40
- const cacheTracker = { invalidatedPatterns: [] };
41
- const shouldCache = vitest_1.vi.fn().mockReturnValue(true);
42
- const getTTL = vitest_1.vi.fn().mockReturnValue(1000);
43
- const makeKey = (method, url, params) => `${method}:${url}:${JSON.stringify(params ?? null)}`;
44
- return {
45
- __esModule: true,
46
- shouldCache,
47
- getTTL,
48
- cacheTracker,
49
- RequestCache: class RequestCache {
50
- constructor() {
51
- this.store = new Map();
52
- }
53
- get(method, url, params) {
54
- const key = makeKey(method, url, params);
55
- return this.store.get(key) ?? null;
56
- }
57
- set(method, url, data, _ttl, params) {
58
- const key = makeKey(method, url, params);
59
- this.store.set(key, data);
60
- }
61
- invalidatePattern(pattern) {
62
- cacheTracker.invalidatedPatterns.push(pattern);
63
- }
64
- invalidate(_method, _url, _params) {
65
- // no-op for tests
66
- }
67
- clear() {
68
- this.store.clear();
69
- }
70
- },
71
- };
72
- });
73
- const cache_1 = require("./cache");
74
- (0, vitest_1.describe)("AIEvalClient.request", () => {
75
- (0, vitest_1.beforeEach)(() => {
76
- process.env.EVALAI_API_KEY = "test";
77
- cache_1.shouldCache.mockReset().mockReturnValue(true);
78
- cache_1.getTTL.mockReset().mockReturnValue(1000);
79
- cache_1.cacheTracker.invalidatedPatterns.length = 0;
80
- });
81
- (0, vitest_1.it)("caches GET responses and reuses data without re-fetching", async () => {
82
- const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
83
- const payload = { items: [1, 2, 3] };
84
- const fetchMock = vitest_1.vi.fn().mockResolvedValue({
85
- ok: true,
86
- status: 200,
87
- json: async () => payload,
88
- });
89
- globalThis.fetch = fetchMock;
90
- const first = await client.request("/api/traces", { method: "GET" });
91
- const second = await client.request("/api/traces", { method: "GET" });
92
- (0, vitest_1.expect)(first).toEqual(payload);
93
- (0, vitest_1.expect)(second).toEqual(payload);
94
- (0, vitest_1.expect)(fetchMock).toHaveBeenCalledTimes(1);
95
- });
96
- (0, vitest_1.it)("propagates non-ok responses as SDK errors", async () => {
97
- const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost" });
98
- const fetchMock = vitest_1.vi.fn().mockResolvedValue({
99
- ok: false,
100
- status: 429,
101
- json: async () => ({ error: { code: "RATE_LIMIT_EXCEEDED" } }),
102
- });
103
- globalThis.fetch = fetchMock;
104
- const createErrorSpy = vitest_1.vi
105
- .spyOn(errorsModule, "createErrorFromResponse")
106
- .mockReturnValue(new errorsModule.EvalAIError("rate limited", "RATE_LIMIT_EXCEEDED", 429));
107
- await (0, vitest_1.expect)(client.request("/api/fail", { method: "GET" })).rejects.toHaveProperty("code", "RATE_LIMIT_EXCEEDED");
108
- createErrorSpy.mockRestore();
109
- });
110
- (0, vitest_1.it)("retries on retryable SDK errors and eventually succeeds", async () => {
111
- const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
112
- vitest_1.vi.spyOn(client, "calculateBackoff").mockReturnValue(0);
113
- const failureResponse = {
114
- ok: false,
115
- status: 429,
116
- json: async () => ({ error: { code: "RATE_LIMIT_EXCEEDED" } }),
117
- };
118
- const successResponse = {
119
- ok: true,
120
- status: 200,
121
- json: async () => ({ ok: true }),
122
- };
123
- const createErrorSpy = vitest_1.vi
124
- .spyOn(errorsModule, "createErrorFromResponse")
125
- .mockReturnValue(new errorsModule.EvalAIError("rate limited", "RATE_LIMIT_EXCEEDED", 429));
126
- const fetchMock = vitest_1.vi
127
- .fn()
128
- .mockResolvedValueOnce(failureResponse)
129
- .mockResolvedValueOnce(successResponse);
130
- globalThis.fetch = fetchMock;
131
- const result = await client.request("/api/retry", { method: "GET" });
132
- (0, vitest_1.expect)(result).toEqual({ ok: true });
133
- (0, vitest_1.expect)(fetchMock).toHaveBeenCalledTimes(2);
134
- createErrorSpy.mockRestore();
135
- });
136
- (0, vitest_1.it)("throws a TIMEOUT SDK error when fetch aborts", async () => {
137
- const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
138
- const abortError = Object.assign(new Error("aborted"), { name: "AbortError" });
139
- const fetchMock = vitest_1.vi.fn().mockRejectedValue(abortError);
140
- globalThis.fetch = fetchMock;
141
- await (0, vitest_1.expect)(client.request("/api/timeout", { method: "GET" })).rejects.toMatchObject({
142
- code: "TIMEOUT",
143
- });
144
- });
145
- (0, vitest_1.it)("invalidates related cache entries for mutation requests", async () => {
146
- const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
147
- cache_1.shouldCache.mockReturnValue(false);
148
- const fetchMock = vitest_1.vi.fn().mockResolvedValue({
149
- ok: true,
150
- status: 201,
151
- json: async () => ({ result: "ok" }),
152
- });
153
- globalThis.fetch = fetchMock;
154
- await client.request("/api/evaluations", { method: "POST", body: JSON.stringify({}) });
155
- (0, vitest_1.expect)(cache_1.cacheTracker.invalidatedPatterns).toContain("evaluations");
156
- });
157
- });
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ const vitest_1 = require("vitest");
37
+ const client_1 = require("./client");
38
+ const errorsModule = __importStar(require("./errors"));
39
+ vitest_1.vi.mock("./cache", () => {
40
+ const cacheTracker = { invalidatedPatterns: [] };
41
+ const shouldCache = vitest_1.vi.fn().mockReturnValue(true);
42
+ const getTTL = vitest_1.vi.fn().mockReturnValue(1000);
43
+ const makeKey = (method, url, params) => `${method}:${url}:${JSON.stringify(params ?? null)}`;
44
+ return {
45
+ __esModule: true,
46
+ shouldCache,
47
+ getTTL,
48
+ cacheTracker,
49
+ RequestCache: class RequestCache {
50
+ constructor() {
51
+ this.store = new Map();
52
+ }
53
+ get(method, url, params) {
54
+ const key = makeKey(method, url, params);
55
+ return this.store.get(key) ?? null;
56
+ }
57
+ set(method, url, data, _ttl, params) {
58
+ const key = makeKey(method, url, params);
59
+ this.store.set(key, data);
60
+ }
61
+ invalidatePattern(pattern) {
62
+ cacheTracker.invalidatedPatterns.push(pattern);
63
+ }
64
+ invalidate(_method, _url, _params) {
65
+ // no-op for tests
66
+ }
67
+ clear() {
68
+ this.store.clear();
69
+ }
70
+ },
71
+ };
72
+ });
73
+ const cache_1 = require("./cache");
74
+ (0, vitest_1.describe)("AIEvalClient.request", () => {
75
+ (0, vitest_1.beforeEach)(() => {
76
+ process.env.EVALAI_API_KEY = "test";
77
+ cache_1.shouldCache.mockReset().mockReturnValue(true);
78
+ cache_1.getTTL.mockReset().mockReturnValue(1000);
79
+ cache_1.cacheTracker.invalidatedPatterns.length = 0;
80
+ });
81
+ (0, vitest_1.it)("caches GET responses and reuses data without re-fetching", async () => {
82
+ const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
83
+ const payload = { items: [1, 2, 3] };
84
+ const fetchMock = vitest_1.vi.fn().mockResolvedValue({
85
+ ok: true,
86
+ status: 200,
87
+ json: async () => payload,
88
+ });
89
+ globalThis.fetch = fetchMock;
90
+ const first = await client.request("/api/traces", { method: "GET" });
91
+ const second = await client.request("/api/traces", { method: "GET" });
92
+ (0, vitest_1.expect)(first).toEqual(payload);
93
+ (0, vitest_1.expect)(second).toEqual(payload);
94
+ (0, vitest_1.expect)(fetchMock).toHaveBeenCalledTimes(1);
95
+ });
96
+ (0, vitest_1.it)("propagates non-ok responses as SDK errors", async () => {
97
+ const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost" });
98
+ const fetchMock = vitest_1.vi.fn().mockResolvedValue({
99
+ ok: false,
100
+ status: 429,
101
+ json: async () => ({ error: { code: "RATE_LIMIT_EXCEEDED" } }),
102
+ });
103
+ globalThis.fetch = fetchMock;
104
+ const createErrorSpy = vitest_1.vi
105
+ .spyOn(errorsModule, "createErrorFromResponse")
106
+ .mockReturnValue(new errorsModule.EvalAIError("rate limited", "RATE_LIMIT_EXCEEDED", 429));
107
+ await (0, vitest_1.expect)(client.request("/api/fail", { method: "GET" })).rejects.toHaveProperty("code", "RATE_LIMIT_EXCEEDED");
108
+ createErrorSpy.mockRestore();
109
+ });
110
+ (0, vitest_1.it)("retries on retryable SDK errors and eventually succeeds", async () => {
111
+ const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
112
+ vitest_1.vi.spyOn(client, "calculateBackoff").mockReturnValue(0);
113
+ const failureResponse = {
114
+ ok: false,
115
+ status: 429,
116
+ json: async () => ({ error: { code: "RATE_LIMIT_EXCEEDED" } }),
117
+ };
118
+ const successResponse = {
119
+ ok: true,
120
+ status: 200,
121
+ json: async () => ({ ok: true }),
122
+ };
123
+ const createErrorSpy = vitest_1.vi
124
+ .spyOn(errorsModule, "createErrorFromResponse")
125
+ .mockReturnValue(new errorsModule.EvalAIError("rate limited", "RATE_LIMIT_EXCEEDED", 429));
126
+ const fetchMock = vitest_1.vi
127
+ .fn()
128
+ .mockResolvedValueOnce(failureResponse)
129
+ .mockResolvedValueOnce(successResponse);
130
+ globalThis.fetch = fetchMock;
131
+ const result = await client.request("/api/retry", { method: "GET" });
132
+ (0, vitest_1.expect)(result).toEqual({ ok: true });
133
+ (0, vitest_1.expect)(fetchMock).toHaveBeenCalledTimes(2);
134
+ createErrorSpy.mockRestore();
135
+ });
136
+ (0, vitest_1.it)("throws a TIMEOUT SDK error when fetch aborts", async () => {
137
+ const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
138
+ const abortError = Object.assign(new Error("aborted"), { name: "AbortError" });
139
+ const fetchMock = vitest_1.vi.fn().mockRejectedValue(abortError);
140
+ globalThis.fetch = fetchMock;
141
+ await (0, vitest_1.expect)(client.request("/api/timeout", { method: "GET" })).rejects.toMatchObject({
142
+ code: "TIMEOUT",
143
+ });
144
+ });
145
+ (0, vitest_1.it)("invalidates related cache entries for mutation requests", async () => {
146
+ const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
147
+ cache_1.shouldCache.mockReturnValue(false);
148
+ const fetchMock = vitest_1.vi.fn().mockResolvedValue({
149
+ ok: true,
150
+ status: 201,
151
+ json: async () => ({ result: "ok" }),
152
+ });
153
+ globalThis.fetch = fetchMock;
154
+ await client.request("/api/evaluations", { method: "POST", body: JSON.stringify({}) });
155
+ (0, vitest_1.expect)(cache_1.cacheTracker.invalidatedPatterns).toContain("evaluations");
156
+ });
157
+ });
package/dist/index.d.ts CHANGED
@@ -33,5 +33,6 @@ export { batchProcess, batchRead, RateLimiter, streamEvaluation } from "./stream
33
33
  export type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, BatchOptions, ClientConfig as AIEvalConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateLLMJudgeConfigParams, CreateWebhookParams, Evaluation as EvaluationData, ExportOptions, GenericMetadata as AnnotationData, GetLLMJudgeAlignmentParams, GetUsageParams, ImportOptions, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeResult as LLMJudgeData, Organization, RetryConfig, SnapshotData, Span as SpanData, StreamOptions, TestCase, TestResult, Trace as TraceData, TracedResponse, UpdateAPIKeyParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery, } from "./types";
34
34
  export { EvaluationTemplates, type EvaluationTemplateType, type FeatureUsage, type OrganizationLimits, } from "./types";
35
35
  export { type AgentHandoff, type AgentSpanContext, type CostCategory, type CostRecord, createWorkflowTracer, type DecisionAlternative, type DecisionType, type HandoffType, type LLMProvider, type RecordCostParams, type RecordDecisionParams, traceAutoGen, traceCrewAI, traceLangChainAgent, traceWorkflowStep, type WorkflowContext, type WorkflowDefinition, type WorkflowEdge, type WorkflowNode, type WorkflowStatus, WorkflowTracer, type WorkflowTracerOptions, } from "./workflows";
36
+ export { ARTIFACTS, type Baseline, type BaselineTolerance, GATE_CATEGORY, GATE_EXIT, type GateCategory, type GateExitCode, type RegressionDelta, type RegressionReport, REPORT_SCHEMA_VERSION, } from "./regression";
36
37
  import { AIEvalClient } from "./client";
37
38
  export default AIEvalClient;
package/dist/index.js CHANGED
@@ -9,7 +9,7 @@
9
9
  */
10
10
  Object.defineProperty(exports, "__esModule", { value: true });
11
11
  exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalAIError = exports.AIEvalClient = void 0;
12
- exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = void 0;
12
+ exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = void 0;
13
13
  // Main SDK exports
14
14
  var client_1 = require("./client");
15
15
  Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
@@ -117,6 +117,12 @@ Object.defineProperty(exports, "traceCrewAI", { enumerable: true, get: function
117
117
  Object.defineProperty(exports, "traceLangChainAgent", { enumerable: true, get: function () { return workflows_1.traceLangChainAgent; } });
118
118
  Object.defineProperty(exports, "traceWorkflowStep", { enumerable: true, get: function () { return workflows_1.traceWorkflowStep; } });
119
119
  Object.defineProperty(exports, "WorkflowTracer", { enumerable: true, get: function () { return workflows_1.WorkflowTracer; } });
120
+ // Regression gate constants & types (v1.6.0)
121
+ var regression_1 = require("./regression");
122
+ Object.defineProperty(exports, "ARTIFACTS", { enumerable: true, get: function () { return regression_1.ARTIFACTS; } });
123
+ Object.defineProperty(exports, "GATE_CATEGORY", { enumerable: true, get: function () { return regression_1.GATE_CATEGORY; } });
124
+ Object.defineProperty(exports, "GATE_EXIT", { enumerable: true, get: function () { return regression_1.GATE_EXIT; } });
125
+ Object.defineProperty(exports, "REPORT_SCHEMA_VERSION", { enumerable: true, get: function () { return regression_1.REPORT_SCHEMA_VERSION; } });
120
126
  // Default export for convenience
121
127
  const client_2 = require("./client");
122
128
  exports.default = client_2.AIEvalClient;
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Regression gate constants and types.
3
+ *
4
+ * These mirror the contracts defined in scripts/regression-gate.ts
5
+ * and evals/schemas/regression-report.schema.json so that SDK consumers
6
+ * can programmatically inspect gate results without parsing strings.
7
+ *
8
+ * @packageDocumentation
9
+ */
10
+ /** Exit codes emitted by `evalai gate` / `scripts/regression-gate.ts`. */
11
+ export declare const GATE_EXIT: {
12
+ /** Gate passed — no regressions detected */
13
+ readonly PASS: 0;
14
+ /** One or more regression thresholds exceeded */
15
+ readonly REGRESSION: 1;
16
+ /** Infrastructure error (baseline missing, summary missing, etc.) */
17
+ readonly INFRA_ERROR: 2;
18
+ /** Confidence tests failed (test suite red) */
19
+ readonly CONFIDENCE_FAILED: 3;
20
+ /** Confidence summary file missing (test infra crashed) */
21
+ readonly CONFIDENCE_MISSING: 4;
22
+ };
23
+ export type GateExitCode = (typeof GATE_EXIT)[keyof typeof GATE_EXIT];
24
+ /** Categories written to regression-report.json `category` field. */
25
+ export declare const GATE_CATEGORY: {
26
+ readonly PASS: "pass";
27
+ readonly REGRESSION: "regression";
28
+ readonly INFRA_ERROR: "infra_error";
29
+ };
30
+ export type GateCategory = (typeof GATE_CATEGORY)[keyof typeof GATE_CATEGORY];
31
+ /** Current schema version for regression-report.json. */
32
+ export declare const REPORT_SCHEMA_VERSION = 1;
33
+ export interface RegressionDelta {
34
+ metric: string;
35
+ baseline: number | string;
36
+ current: number | string;
37
+ delta: string;
38
+ status: "pass" | "fail";
39
+ }
40
+ export interface RegressionReport {
41
+ schemaVersion: number;
42
+ timestamp: string;
43
+ exitCode: GateExitCode;
44
+ category: GateCategory;
45
+ passed: boolean;
46
+ failures: string[];
47
+ deltas: RegressionDelta[];
48
+ }
49
+ export interface BaselineTolerance {
50
+ scoreDrop: number;
51
+ passRateDrop: number;
52
+ maxLatencyIncreaseMs: number;
53
+ maxCostIncreaseUsd: number;
54
+ }
55
+ export interface Baseline {
56
+ schemaVersion: number;
57
+ description: string;
58
+ generatedAt: string;
59
+ generatedBy: string;
60
+ commitSha: string;
61
+ updatedAt: string;
62
+ updatedBy: string;
63
+ tolerance: BaselineTolerance;
64
+ goldenEval: {
65
+ score: number;
66
+ passRate: number;
67
+ totalCases: number;
68
+ passedCases: number;
69
+ };
70
+ qualityScore: {
71
+ overall: number;
72
+ grade: string;
73
+ accuracy: number;
74
+ safety: number;
75
+ latency: number;
76
+ cost: number;
77
+ consistency: number;
78
+ };
79
+ confidenceTests: {
80
+ unitPassed: boolean;
81
+ unitTotal: number;
82
+ dbPassed: boolean;
83
+ dbTotal: number;
84
+ };
85
+ productMetrics: {
86
+ p95ApiLatencyMs?: number;
87
+ goldenCostUsd?: number;
88
+ };
89
+ qualityMetrics?: {
90
+ unitLaneDurationMs?: number;
91
+ dbLaneDurationMs?: number;
92
+ };
93
+ }
94
+ /** Well-known artifact paths relative to project root. */
95
+ export declare const ARTIFACTS: {
96
+ readonly BASELINE: "evals/baseline.json";
97
+ readonly REGRESSION_REPORT: "evals/regression-report.json";
98
+ readonly CONFIDENCE_SUMMARY: "evals/confidence-summary.json";
99
+ readonly LATENCY_BENCHMARK: "evals/latency-benchmark.json";
100
+ };
@@ -0,0 +1,44 @@
1
+ "use strict";
2
+ /**
3
+ * Regression gate constants and types.
4
+ *
5
+ * These mirror the contracts defined in scripts/regression-gate.ts
6
+ * and evals/schemas/regression-report.schema.json so that SDK consumers
7
+ * can programmatically inspect gate results without parsing strings.
8
+ *
9
+ * @packageDocumentation
10
+ */
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.ARTIFACTS = exports.REPORT_SCHEMA_VERSION = exports.GATE_CATEGORY = exports.GATE_EXIT = void 0;
13
+ // ── Exit codes ──
14
+ /** Exit codes emitted by `evalai gate` / `scripts/regression-gate.ts`. */
15
+ exports.GATE_EXIT = {
16
+ /** Gate passed — no regressions detected */
17
+ PASS: 0,
18
+ /** One or more regression thresholds exceeded */
19
+ REGRESSION: 1,
20
+ /** Infrastructure error (baseline missing, summary missing, etc.) */
21
+ INFRA_ERROR: 2,
22
+ /** Confidence tests failed (test suite red) */
23
+ CONFIDENCE_FAILED: 3,
24
+ /** Confidence summary file missing (test infra crashed) */
25
+ CONFIDENCE_MISSING: 4,
26
+ };
27
+ // ── Report categories ──
28
+ /** Categories written to regression-report.json `category` field. */
29
+ exports.GATE_CATEGORY = {
30
+ PASS: "pass",
31
+ REGRESSION: "regression",
32
+ INFRA_ERROR: "infra_error",
33
+ };
34
+ // ── Schema version ──
35
+ /** Current schema version for regression-report.json. */
36
+ exports.REPORT_SCHEMA_VERSION = 1;
37
+ // ── Artifact paths ──
38
+ /** Well-known artifact paths relative to project root. */
39
+ exports.ARTIFACTS = {
40
+ BASELINE: "evals/baseline.json",
41
+ REGRESSION_REPORT: "evals/regression-report.json",
42
+ CONFIDENCE_SUMMARY: "evals/confidence-summary.json",
43
+ LATENCY_BENCHMARK: "evals/latency-benchmark.json",
44
+ };
package/dist/version.d.ts CHANGED
@@ -3,5 +3,5 @@
3
3
  * X-EvalAI-SDK-Version: SDK package version
4
4
  * X-EvalAI-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
5
5
  */
6
- export declare const SDK_VERSION = "1.5.0";
6
+ export declare const SDK_VERSION = "1.6.0";
7
7
  export declare const SPEC_VERSION = "1.0.0";
package/dist/version.js CHANGED
@@ -6,5 +6,5 @@ exports.SPEC_VERSION = exports.SDK_VERSION = void 0;
6
6
  * X-EvalAI-SDK-Version: SDK package version
7
7
  * X-EvalAI-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
8
8
  */
9
- exports.SDK_VERSION = "1.5.0";
9
+ exports.SDK_VERSION = "1.6.0";
10
10
  exports.SPEC_VERSION = "1.0.0";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pauly4010/evalai-sdk",
3
- "version": "1.5.8",
3
+ "version": "1.6.0",
4
4
  "description": "AI Evaluation Platform SDK - Complete API Coverage with Performance Optimizations",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",
@@ -102,6 +102,11 @@
102
102
  "import": "./dist/matchers/index.js",
103
103
  "require": "./dist/matchers/index.js",
104
104
  "types": "./dist/matchers/index.d.ts"
105
+ },
106
+ "./regression": {
107
+ "import": "./dist/regression.js",
108
+ "require": "./dist/regression.js",
109
+ "types": "./dist/regression.d.ts"
105
110
  }
106
111
  }
107
112
  }