@evalgate/sdk 2.2.2 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/README.md +40 -1
  3. package/dist/assertions.d.ts +194 -10
  4. package/dist/assertions.js +525 -73
  5. package/dist/batch.js +4 -4
  6. package/dist/cache.d.ts +5 -1
  7. package/dist/cache.js +5 -1
  8. package/dist/cli/baseline.d.ts +14 -0
  9. package/dist/cli/baseline.js +43 -3
  10. package/dist/cli/check.d.ts +5 -2
  11. package/dist/cli/check.js +20 -12
  12. package/dist/cli/compare.d.ts +80 -0
  13. package/dist/cli/compare.js +266 -0
  14. package/dist/cli/index.js +244 -101
  15. package/dist/cli/regression-gate.js +23 -0
  16. package/dist/cli/run.js +22 -0
  17. package/dist/cli/start.d.ts +26 -0
  18. package/dist/cli/start.js +130 -0
  19. package/dist/cli/templates.d.ts +24 -0
  20. package/dist/cli/templates.js +314 -0
  21. package/dist/cli/traces.d.ts +109 -0
  22. package/dist/cli/traces.js +152 -0
  23. package/dist/cli/upgrade.js +5 -0
  24. package/dist/cli/validate.d.ts +37 -0
  25. package/dist/cli/validate.js +252 -0
  26. package/dist/cli/watch.d.ts +19 -0
  27. package/dist/cli/watch.js +175 -0
  28. package/dist/client.js +6 -13
  29. package/dist/constants.d.ts +2 -0
  30. package/dist/constants.js +5 -0
  31. package/dist/errors.js +7 -0
  32. package/dist/export.js +2 -2
  33. package/dist/index.d.ts +10 -9
  34. package/dist/index.js +24 -7
  35. package/dist/integrations/anthropic.js +6 -6
  36. package/dist/integrations/openai.js +84 -61
  37. package/dist/logger.d.ts +3 -1
  38. package/dist/logger.js +2 -1
  39. package/dist/otel.d.ts +130 -0
  40. package/dist/otel.js +309 -0
  41. package/dist/pagination.d.ts +13 -2
  42. package/dist/pagination.js +28 -2
  43. package/dist/runtime/adapters/testsuite-to-dsl.js +1 -6
  44. package/dist/runtime/eval.d.ts +14 -4
  45. package/dist/runtime/eval.js +127 -2
  46. package/dist/runtime/executor.d.ts +3 -2
  47. package/dist/runtime/executor.js +3 -2
  48. package/dist/runtime/registry.d.ts +8 -3
  49. package/dist/runtime/registry.js +15 -4
  50. package/dist/runtime/run-report.d.ts +1 -1
  51. package/dist/runtime/run-report.js +7 -4
  52. package/dist/runtime/types.d.ts +38 -0
  53. package/dist/snapshot.d.ts +12 -0
  54. package/dist/snapshot.js +24 -1
  55. package/dist/testing.d.ts +8 -0
  56. package/dist/testing.js +45 -10
  57. package/dist/version.d.ts +2 -2
  58. package/dist/version.js +2 -2
  59. package/dist/workflows.d.ts +2 -0
  60. package/dist/workflows.js +184 -102
  61. package/package.json +8 -1
@@ -0,0 +1,252 @@
1
+ "use strict";
2
+ /**
3
+ * evalgate validate — static validation of spec files without execution
4
+ *
5
+ * The equivalent of `tsc --noEmit` for eval specs. Catches:
6
+ * - Missing or malformed defineEval calls
7
+ * - Executor functions that don't return EvalResult shape
8
+ * - Invalid spec names (characters, length)
9
+ * - Empty spec files
10
+ * - Missing required fields in config-form defineEval
11
+ *
12
+ * Usage:
13
+ * evalgate validate
14
+ * evalgate validate --format json
15
+ */
16
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
17
+ if (k2 === undefined) k2 = k;
18
+ var desc = Object.getOwnPropertyDescriptor(m, k);
19
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
20
+ desc = { enumerable: true, get: function() { return m[k]; } };
21
+ }
22
+ Object.defineProperty(o, k2, desc);
23
+ }) : (function(o, m, k, k2) {
24
+ if (k2 === undefined) k2 = k;
25
+ o[k2] = m[k];
26
+ }));
27
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
28
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
29
+ }) : function(o, v) {
30
+ o["default"] = v;
31
+ });
32
+ var __importStar = (this && this.__importStar) || (function () {
33
+ var ownKeys = function(o) {
34
+ ownKeys = Object.getOwnPropertyNames || function (o) {
35
+ var ar = [];
36
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
37
+ return ar;
38
+ };
39
+ return ownKeys(o);
40
+ };
41
+ return function (mod) {
42
+ if (mod && mod.__esModule) return mod;
43
+ var result = {};
44
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
45
+ __setModuleDefault(result, mod);
46
+ return result;
47
+ };
48
+ })();
49
+ Object.defineProperty(exports, "__esModule", { value: true });
50
+ exports.runValidate = runValidate;
51
+ const fs = __importStar(require("node:fs"));
52
+ const path = __importStar(require("node:path"));
53
+ const execution_mode_1 = require("../runtime/execution-mode");
54
+ /**
55
+ * Name validation regex — must match the runtime's validateSpecName
56
+ */
57
+ const VALID_NAME_RE = /^[a-zA-Z0-9\s\-_]+$/;
58
+ const MAX_NAME_LENGTH = 100;
59
+ /**
60
+ * Static patterns we look for in spec files
61
+ */
62
+ const DEFINE_EVAL_RE = /defineEval\s*[.(]/g;
63
+ const DEFINE_EVAL_NAME_RE = /defineEval\s*\(\s*["'`]([^"'`]*)["'`]/g;
64
+ const DEFINE_EVAL_CONFIG_RE = /defineEval\s*\(\s*\{/g;
65
+ const DEFINE_EVAL_SKIP_RE = /defineEval\.skip\s*\(/g;
66
+ const DEFINE_EVAL_ONLY_RE = /defineEval\.only\s*\(/g;
67
+ const DEFINE_EVAL_FROM_DATASET_RE = /defineEval\.fromDataset\s*\(/g;
68
+ const EXECUTOR_RETURN_RE = /return\s*\{[^}]*pass\s*:/g;
69
+ const CREATE_RESULT_RE = /createResult\s*\(/g;
70
+ function analyzeFile(filePath) {
71
+ const issues = [];
72
+ const relPath = path.relative(process.cwd(), filePath);
73
+ let content;
74
+ try {
75
+ content = fs.readFileSync(filePath, "utf8");
76
+ }
77
+ catch {
78
+ issues.push({
79
+ severity: "error",
80
+ file: relPath,
81
+ code: "FILE_UNREADABLE",
82
+ message: `Cannot read file: ${relPath}`,
83
+ });
84
+ return issues;
85
+ }
86
+ if (content.trim().length === 0) {
87
+ issues.push({
88
+ severity: "error",
89
+ file: relPath,
90
+ code: "EMPTY_FILE",
91
+ message: "Spec file is empty",
92
+ });
93
+ return issues;
94
+ }
95
+ const lines = content.split("\n");
96
+ // Check for defineEval calls
97
+ const defineEvalMatches = content.match(DEFINE_EVAL_RE);
98
+ const skipMatches = content.match(DEFINE_EVAL_SKIP_RE);
99
+ const onlyMatches = content.match(DEFINE_EVAL_ONLY_RE);
100
+ const fromDatasetMatches = content.match(DEFINE_EVAL_FROM_DATASET_RE);
101
+ const totalCalls = (defineEvalMatches?.length ?? 0) +
102
+ (skipMatches?.length ?? 0) +
103
+ (onlyMatches?.length ?? 0) +
104
+ (fromDatasetMatches?.length ?? 0);
105
+ if (totalCalls === 0) {
106
+ issues.push({
107
+ severity: "warn",
108
+ file: relPath,
109
+ code: "NO_DEFINE_EVAL",
110
+ message: "No defineEval() calls found. File may not define any specs.",
111
+ });
112
+ }
113
+ // Validate spec names
114
+ const nameMatches = [...content.matchAll(DEFINE_EVAL_NAME_RE)];
115
+ for (const match of nameMatches) {
116
+ const name = match[1];
117
+ const matchIndex = match.index ?? 0;
118
+ const lineNum = content.substring(0, matchIndex).split("\n").length;
119
+ if (!name || name.trim() === "") {
120
+ issues.push({
121
+ severity: "error",
122
+ file: relPath,
123
+ line: lineNum,
124
+ code: "EMPTY_NAME",
125
+ message: "Spec name is empty",
126
+ });
127
+ continue;
128
+ }
129
+ if (name.length > MAX_NAME_LENGTH) {
130
+ issues.push({
131
+ severity: "error",
132
+ file: relPath,
133
+ line: lineNum,
134
+ code: "NAME_TOO_LONG",
135
+ message: `Spec name "${name.slice(0, 30)}..." exceeds ${MAX_NAME_LENGTH} characters`,
136
+ });
137
+ }
138
+ if (!VALID_NAME_RE.test(name)) {
139
+ issues.push({
140
+ severity: "error",
141
+ file: relPath,
142
+ line: lineNum,
143
+ code: "INVALID_NAME",
144
+ message: `Spec name "${name}" contains invalid characters (only letters, numbers, spaces, hyphens, underscores allowed)`,
145
+ });
146
+ }
147
+ }
148
+ // Check config-form defineEval calls have required fields
149
+ const configMatches = [...content.matchAll(DEFINE_EVAL_CONFIG_RE)];
150
+ for (const match of configMatches) {
151
+ const matchIndex = match.index ?? 0;
152
+ const lineNum = content.substring(0, matchIndex).split("\n").length;
153
+ // Simple heuristic: look for 'name:' and 'executor:' in the next ~20 lines
154
+ const contextLines = lines.slice(lineNum - 1, lineNum + 19).join("\n");
155
+ if (!contextLines.includes("name:") && !contextLines.includes("name :")) {
156
+ issues.push({
157
+ severity: "error",
158
+ file: relPath,
159
+ line: lineNum,
160
+ code: "MISSING_NAME",
161
+ message: "Config-form defineEval() missing required 'name' field",
162
+ });
163
+ }
164
+ if (!contextLines.includes("executor:") &&
165
+ !contextLines.includes("executor :")) {
166
+ issues.push({
167
+ severity: "error",
168
+ file: relPath,
169
+ line: lineNum,
170
+ code: "MISSING_EXECUTOR",
171
+ message: "Config-form defineEval() missing required 'executor' field",
172
+ });
173
+ }
174
+ }
175
+ // Check that executors return EvalResult shape
176
+ const hasCreateResult = CREATE_RESULT_RE.test(content);
177
+ const hasReturnPass = EXECUTOR_RETURN_RE.test(content);
178
+ if (totalCalls > 0 && !hasCreateResult && !hasReturnPass) {
179
+ issues.push({
180
+ severity: "warn",
181
+ file: relPath,
182
+ code: "NO_RESULT_SHAPE",
183
+ message: "No createResult() or return { pass: ... } found. Executors may not return the required EvalResult shape.",
184
+ });
185
+ }
186
+ return issues;
187
+ }
188
+ async function runValidate(args = []) {
189
+ const formatIndex = args.indexOf("--format");
190
+ const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
191
+ const projectRoot = process.cwd();
192
+ const executionMode = await (0, execution_mode_1.getExecutionMode)(projectRoot);
193
+ const specFiles = executionMode.specFiles;
194
+ if (specFiles.length === 0) {
195
+ const result = {
196
+ filesScanned: 0,
197
+ filesWithIssues: 0,
198
+ issues: [],
199
+ passed: true,
200
+ };
201
+ if (format === "json") {
202
+ console.log(JSON.stringify(result, null, 2));
203
+ }
204
+ else {
205
+ console.log("\n✨ No spec files found. Nothing to validate.");
206
+ console.log("💡 Create files with defineEval() calls to get started.");
207
+ }
208
+ return result;
209
+ }
210
+ const allIssues = [];
211
+ const filesWithIssues = new Set();
212
+ for (const file of specFiles) {
213
+ const issues = analyzeFile(file);
214
+ for (const issue of issues) {
215
+ allIssues.push(issue);
216
+ filesWithIssues.add(issue.file);
217
+ }
218
+ }
219
+ const errors = allIssues.filter((i) => i.severity === "error");
220
+ const warnings = allIssues.filter((i) => i.severity === "warn");
221
+ const passed = errors.length === 0;
222
+ const result = {
223
+ filesScanned: specFiles.length,
224
+ filesWithIssues: filesWithIssues.size,
225
+ issues: allIssues,
226
+ passed,
227
+ };
228
+ if (format === "json") {
229
+ console.log(JSON.stringify(result, null, 2));
230
+ }
231
+ else {
232
+ console.log(`\n🔍 Validated ${specFiles.length} spec file${specFiles.length === 1 ? "" : "s"}`);
233
+ if (allIssues.length === 0) {
234
+ console.log("✅ All spec files are valid.\n");
235
+ }
236
+ else {
237
+ for (const issue of allIssues) {
238
+ const loc = issue.line ? `:${issue.line}` : "";
239
+ const icon = issue.severity === "error" ? "❌" : "⚠️";
240
+ console.log(` ${icon} ${issue.file}${loc} [${issue.code}] ${issue.message}`);
241
+ }
242
+ console.log(`\n${errors.length} error${errors.length === 1 ? "" : "s"}, ${warnings.length} warning${warnings.length === 1 ? "" : "s"}`);
243
+ if (passed) {
244
+ console.log("✅ Validation passed (warnings only).\n");
245
+ }
246
+ else {
247
+ console.log("❌ Validation failed.\n");
248
+ }
249
+ }
250
+ }
251
+ return result;
252
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Watch mode for evalgate run
3
+ *
4
+ * Re-executes evaluation specs when source files change.
5
+ * Uses Node.js fs.watch with debouncing to avoid rapid re-runs.
6
+ */
7
+ import type { RunOptions } from "./run";
8
+ export interface WatchOptions extends RunOptions {
9
+ /** Debounce interval in milliseconds (default: 300) */
10
+ debounceMs?: number;
11
+ /** Additional directories to watch beyond spec files */
12
+ extraWatchDirs?: string[];
13
+ /** Clear terminal between runs */
14
+ clearScreen?: boolean;
15
+ }
16
+ /**
17
+ * Start watch mode — runs evaluations and re-runs on file changes
18
+ */
19
+ export declare function runWatch(options: WatchOptions, projectRoot?: string): Promise<void>;
@@ -0,0 +1,175 @@
1
+ "use strict";
2
+ /**
3
+ * Watch mode for evalgate run
4
+ *
5
+ * Re-executes evaluation specs when source files change.
6
+ * Uses Node.js fs.watch with debouncing to avoid rapid re-runs.
7
+ */
8
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
9
+ if (k2 === undefined) k2 = k;
10
+ var desc = Object.getOwnPropertyDescriptor(m, k);
11
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
12
+ desc = { enumerable: true, get: function() { return m[k]; } };
13
+ }
14
+ Object.defineProperty(o, k2, desc);
15
+ }) : (function(o, m, k, k2) {
16
+ if (k2 === undefined) k2 = k;
17
+ o[k2] = m[k];
18
+ }));
19
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
20
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
21
+ }) : function(o, v) {
22
+ o["default"] = v;
23
+ });
24
+ var __importStar = (this && this.__importStar) || (function () {
25
+ var ownKeys = function(o) {
26
+ ownKeys = Object.getOwnPropertyNames || function (o) {
27
+ var ar = [];
28
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
29
+ return ar;
30
+ };
31
+ return ownKeys(o);
32
+ };
33
+ return function (mod) {
34
+ if (mod && mod.__esModule) return mod;
35
+ var result = {};
36
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
37
+ __setModuleDefault(result, mod);
38
+ return result;
39
+ };
40
+ })();
41
+ Object.defineProperty(exports, "__esModule", { value: true });
42
+ exports.runWatch = runWatch;
43
+ const fs = __importStar(require("node:fs"));
44
+ const path = __importStar(require("node:path"));
45
+ const run_1 = require("./run");
46
+ /**
47
+ * Start watch mode — runs evaluations and re-runs on file changes
48
+ */
49
+ async function runWatch(options, projectRoot = process.cwd()) {
50
+ const debounceMs = options.debounceMs ?? 300;
51
+ const clearScreen = options.clearScreen ?? true;
52
+ // Directories to watch
53
+ const watchDirs = new Set();
54
+ // Always watch the eval/ directory if it exists
55
+ const evalDir = path.join(projectRoot, "eval");
56
+ if (fs.existsSync(evalDir))
57
+ watchDirs.add(evalDir);
58
+ // Watch evals/ directory too
59
+ const evalsDir = path.join(projectRoot, "evals");
60
+ if (fs.existsSync(evalsDir))
61
+ watchDirs.add(evalsDir);
62
+ // Watch src/ for code changes that may affect evals
63
+ const srcDir = path.join(projectRoot, "src");
64
+ if (fs.existsSync(srcDir))
65
+ watchDirs.add(srcDir);
66
+ // Add extra watch dirs
67
+ if (options.extraWatchDirs) {
68
+ for (const dir of options.extraWatchDirs) {
69
+ const resolved = path.isAbsolute(dir) ? dir : path.join(projectRoot, dir);
70
+ if (fs.existsSync(resolved))
71
+ watchDirs.add(resolved);
72
+ }
73
+ }
74
+ if (watchDirs.size === 0) {
75
+ console.error("❌ No directories to watch. Create eval/, evals/, or src/ directory.");
76
+ process.exit(1);
77
+ }
78
+ console.log("👁️ Watch mode enabled");
79
+ console.log(` Watching: ${[...watchDirs].map((d) => path.relative(projectRoot, d) || ".").join(", ")}`);
80
+ console.log(` Debounce: ${debounceMs}ms`);
81
+ console.log(" Press Ctrl+C to stop\n");
82
+ // Initial run
83
+ await executeRun(options, projectRoot, clearScreen, false);
84
+ // Set up watchers with debouncing
85
+ let debounceTimer = null;
86
+ let isRunning = false;
87
+ const triggerRun = () => {
88
+ if (debounceTimer)
89
+ clearTimeout(debounceTimer);
90
+ debounceTimer = setTimeout(async () => {
91
+ if (isRunning)
92
+ return;
93
+ isRunning = true;
94
+ try {
95
+ await executeRun(options, projectRoot, clearScreen, true);
96
+ }
97
+ finally {
98
+ isRunning = false;
99
+ }
100
+ }, debounceMs);
101
+ };
102
+ const watchers = [];
103
+ for (const dir of watchDirs) {
104
+ try {
105
+ const watcher = fs.watch(dir, { recursive: true }, (eventType, filename) => {
106
+ if (!filename)
107
+ return;
108
+ // Skip hidden files and node_modules
109
+ if (filename.startsWith(".") || filename.includes("node_modules"))
110
+ return;
111
+ // Only watch relevant file types
112
+ const ext = path.extname(filename).toLowerCase();
113
+ if ([".ts", ".tsx", ".js", ".jsx", ".json", ".jsonl", ".csv"].includes(ext)) {
114
+ console.log(`\n🔄 Change detected: ${filename} (${eventType})`);
115
+ triggerRun();
116
+ }
117
+ });
118
+ watchers.push(watcher);
119
+ }
120
+ catch (err) {
121
+ console.warn(`⚠️ Could not watch ${path.relative(projectRoot, dir)}: ${err instanceof Error ? err.message : String(err)}`);
122
+ }
123
+ }
124
+ // Handle graceful shutdown
125
+ const cleanup = () => {
126
+ console.log("\n\n👋 Watch mode stopped.");
127
+ for (const watcher of watchers) {
128
+ watcher.close();
129
+ }
130
+ if (debounceTimer)
131
+ clearTimeout(debounceTimer);
132
+ process.exit(0);
133
+ };
134
+ process.on("SIGINT", cleanup);
135
+ process.on("SIGTERM", cleanup);
136
+ // Keep process alive
137
+ await new Promise(() => {
138
+ // Never resolves — watch runs until interrupted
139
+ });
140
+ }
141
+ /**
142
+ * Execute a single run and print results (without process.exit)
143
+ */
144
+ async function executeRun(options, projectRoot, clearScreen, isRerun) {
145
+ if (clearScreen && isRerun) {
146
+ // Clear screen using ANSI escape
147
+ process.stdout.write("\x1B[2J\x1B[0f");
148
+ }
149
+ const timestamp = new Date().toLocaleTimeString();
150
+ console.log(`${isRerun ? "🔄 Re-running" : "▶️ Running"} evaluations... (${timestamp})`);
151
+ try {
152
+ const result = await (0, run_1.runEvaluations)({
153
+ specIds: options.specIds,
154
+ impactedOnly: options.impactedOnly,
155
+ baseBranch: options.baseBranch,
156
+ format: options.format,
157
+ writeResults: options.writeResults,
158
+ }, projectRoot);
159
+ if (options.format === "json") {
160
+ (0, run_1.printJsonResults)(result);
161
+ }
162
+ else {
163
+ (0, run_1.printHumanResults)(result);
164
+ }
165
+ // Print watch-specific summary
166
+ const statusIcon = result.summary.failed > 0 ? "❌" : "✅";
167
+ console.log(`\n${statusIcon} ${result.summary.passed}/${result.results.length} passed | Waiting for changes...`);
168
+ return result;
169
+ }
170
+ catch (error) {
171
+ console.error("❌ Run failed:", error instanceof Error ? error.message : String(error));
172
+ console.log("\n⏳ Waiting for changes...");
173
+ return null;
174
+ }
175
+ }
package/dist/client.js CHANGED
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.AIEvalClient = void 0;
4
4
  const batch_1 = require("./batch");
5
5
  const cache_1 = require("./cache");
6
+ const constants_1 = require("./constants");
6
7
  const context_1 = require("./context");
7
8
  const errors_1 = require("./errors");
8
9
  const logger_1 = require("./logger");
@@ -72,7 +73,7 @@ class AIEvalClient {
72
73
  this.baseUrl =
73
74
  config.baseUrl ||
74
75
  getEnvVar("EVALGATE_BASE_URL", "EVALAI_BASE_URL") ||
75
- (isBrowser ? "" : "http://localhost:3000");
76
+ (isBrowser ? "" : constants_1.DEFAULT_BASE_URL);
76
77
  this.timeout = config.timeout || 30000;
77
78
  // Tier 4.17: Debug mode with request logging
78
79
  const logLevel = config.logLevel || (config.debug ? "debug" : "info");
@@ -100,7 +101,7 @@ class AIEvalClient {
100
101
  const MAX_CONCURRENCY = 5;
101
102
  this.batcher = new batch_1.RequestBatcher(async (requests) => {
102
103
  const results = [];
103
- const executing = [];
104
+ const executing = new Set();
104
105
  for (const req of requests) {
105
106
  const task = (async () => {
106
107
  try {
@@ -121,18 +122,10 @@ class AIEvalClient {
121
122
  });
122
123
  }
123
124
  })();
124
- executing.push(task);
125
- if (executing.length >= MAX_CONCURRENCY) {
125
+ const tracked = task.finally(() => executing.delete(tracked));
126
+ executing.add(tracked);
127
+ if (executing.size >= MAX_CONCURRENCY) {
126
128
  await Promise.race(executing);
127
- // Remove settled promises
128
- for (let i = executing.length - 1; i >= 0; i--) {
129
- const settled = await Promise.race([
130
- executing[i].then(() => true),
131
- Promise.resolve(false),
132
- ]);
133
- if (settled)
134
- executing.splice(i, 1);
135
- }
136
129
  }
137
130
  }
138
131
  await Promise.allSettled(executing);
@@ -0,0 +1,2 @@
1
+ /** Default API base URL for the EvalGate platform. */
2
+ export declare const DEFAULT_BASE_URL = "https://api.evalgate.com";
@@ -0,0 +1,5 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DEFAULT_BASE_URL = void 0;
4
+ /** Default API base URL for the EvalGate platform. */
5
+ exports.DEFAULT_BASE_URL = "https://api.evalgate.com";
package/dist/errors.js CHANGED
@@ -271,6 +271,10 @@ class RateLimitError extends EvalGateError {
271
271
  constructor(message, retryAfter) {
272
272
  super(message, "RATE_LIMIT_EXCEEDED", 429, { retryAfter });
273
273
  this.name = "RateLimitError";
274
+ if (retryAfter !== undefined) {
275
+ this.retryAfter = retryAfter;
276
+ }
277
+ Object.setPrototypeOf(this, RateLimitError.prototype);
274
278
  }
275
279
  }
276
280
  exports.RateLimitError = RateLimitError;
@@ -278,6 +282,7 @@ class AuthenticationError extends EvalGateError {
278
282
  constructor(message = "Authentication failed") {
279
283
  super(message, "AUTHENTICATION_ERROR", 401);
280
284
  this.name = "AuthenticationError";
285
+ Object.setPrototypeOf(this, AuthenticationError.prototype);
281
286
  }
282
287
  }
283
288
  exports.AuthenticationError = AuthenticationError;
@@ -285,6 +290,7 @@ class ValidationError extends EvalGateError {
285
290
  constructor(message = "Validation failed", details) {
286
291
  super(message, "VALIDATION_ERROR", 400, details);
287
292
  this.name = "ValidationError";
293
+ Object.setPrototypeOf(this, ValidationError.prototype);
288
294
  }
289
295
  }
290
296
  exports.ValidationError = ValidationError;
@@ -293,6 +299,7 @@ class NetworkError extends EvalGateError {
293
299
  super(message, "NETWORK_ERROR", 0);
294
300
  this.name = "NetworkError";
295
301
  this.retryable = true;
302
+ Object.setPrototypeOf(this, NetworkError.prototype);
296
303
  }
297
304
  }
298
305
  exports.NetworkError = NetworkError;
package/dist/export.js CHANGED
@@ -155,7 +155,7 @@ async function importData(client, data, options = {}) {
155
155
  return result;
156
156
  }
157
157
  // Import traces
158
- if (data.traces) {
158
+ if (data.traces && client?.traces) {
159
159
  const traceResults = { imported: 0, skipped: 0, failed: 0 };
160
160
  for (const trace of data.traces) {
161
161
  try {
@@ -191,7 +191,7 @@ async function importData(client, data, options = {}) {
191
191
  result.summary.total += data.traces.length;
192
192
  }
193
193
  // Import evaluations
194
- if (data.evaluations) {
194
+ if (data.evaluations && client?.evaluations) {
195
195
  const evalResults = { imported: 0, skipped: 0, failed: 0 };
196
196
  for (const evaluation of data.evaluations) {
197
197
  try {
package/dist/index.d.ts CHANGED
@@ -7,34 +7,35 @@
7
7
  * @packageDocumentation
8
8
  */
9
9
  export { AIEvalClient } from "./client";
10
- import { AuthenticationError, EvalGateError, NetworkError, RateLimitError, SDKError } from "./errors";
11
- export { EvalGateError, RateLimitError, AuthenticationError, SDKError as ValidationError, // Using SDKError as ValidationError for backward compatibility
12
- NetworkError, };
13
- export { type AssertionLLMConfig, configureAssertions, containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, containsLanguageAsync, expect, followsInstructions, getAssertionConfig, hasFactualAccuracy, hasFactualAccuracyAsync, hasLength, hasNoHallucinations, hasNoHallucinationsAsync, hasNoToxicity, hasNoToxicityAsync, hasPII, hasReadabilityScore, hasSentiment, hasSentimentAsync, hasValidCodeSyntax, hasValidCodeSyntaxAsync, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
10
+ import { AuthenticationError, EvalGateError, NetworkError, RateLimitError, ValidationError } from "./errors";
11
+ export { EvalGateError, RateLimitError, AuthenticationError, ValidationError, NetworkError, };
12
+ export { type AssertionLLMConfig, configureAssertions, containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, containsLanguageAsync, expect, followsInstructions, getAssertionConfig, hasConsistency, hasConsistencyAsync, hasFactualAccuracy, hasFactualAccuracyAsync, hasLength, hasNoHallucinations, hasNoHallucinationsAsync, hasNoToxicity, hasNoToxicityAsync, hasPII, hasReadabilityScore, hasSentiment, hasSentimentAsync, hasSentimentWithScore, hasValidCodeSyntax, hasValidCodeSyntaxAsync, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinDuration, respondedWithinTime, respondedWithinTimeSince, type SentimentAsyncResult, similarTo, toSemanticallyContain, withinRange, } from "./assertions";
13
+ export { EvalGateError as SDKError } from "./errors";
14
14
  import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
15
15
  export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
16
16
  export { cloneContext, mergeContexts, validateContext, } from "./runtime/context";
17
- export { createContext as createEvalContext, createResult, defineEval, defineSuite, evalai, } from "./runtime/eval";
17
+ export { createContext as createEvalContext, createResult, defineEval, defineSuite, evalai, getFilteredSpecs, } from "./runtime/eval";
18
18
  export { createLocalExecutor, defaultLocalExecutor, } from "./runtime/executor";
19
19
  export { createEvalRuntime, disposeActiveRuntime, getActiveRuntime, setActiveRuntime, } from "./runtime/registry";
20
20
  export type { CloudExecutor, DefineEvalFunction, EvalContext, EvalExecutor, EvalExecutorInterface, EvalOptions, EvalResult, EvalRuntime, EvalSpec, ExecutorCapabilities, LocalExecutor, SpecConfig, SpecOptions, WorkerExecutor, } from "./runtime/types";
21
21
  export { EvalRuntimeError, RuntimeError, SpecExecutionError, SpecRegistrationError, } from "./runtime/types";
22
22
  export { createTestSuite, type TestCaseResult, TestSuite, TestSuiteCase, TestSuiteCaseResult, TestSuiteConfig, TestSuiteResult, } from "./testing";
23
- import { compareWithSnapshot, snapshot } from "./snapshot";
24
- export { snapshot, compareWithSnapshot, snapshot as saveSnapshot, compareWithSnapshot as compareSnapshots, };
23
+ import { compareSnapshots, compareWithSnapshot, snapshot } from "./snapshot";
24
+ export { snapshot, compareWithSnapshot, compareSnapshots, snapshot as saveSnapshot, };
25
25
  import type { ExportFormat } from "./export";
26
26
  import { exportData, importData } from "./export";
27
27
  export { exportData, importData };
28
28
  export type { ExportFormat, ExportFormat as ExportType };
29
29
  export { RequestBatcher } from "./batch";
30
- export { CacheTTL, RequestCache } from "./cache";
30
+ export { CacheTTL } from "./cache";
31
31
  export { type CheckArgs, EXIT, parseArgs, runCheck } from "./cli/check";
32
32
  export { traceAnthropic } from "./integrations/anthropic";
33
33
  export { traceOpenAI } from "./integrations/openai";
34
34
  export { type OpenAIChatEvalCase, type OpenAIChatEvalOptions, type OpenAIChatEvalResult, openAIChatEval, } from "./integrations/openai-eval";
35
35
  export { Logger } from "./logger";
36
36
  export { extendExpectWithToPassGate } from "./matchers";
37
- export { autoPaginate, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
37
+ export { createOTelExporter, type OTelAttribute, type OTelEvent, OTelExporter, type OTelExporterOptions, type OTelExportPayload, type OTelSpan, } from "./otel";
38
+ export { autoPaginate, autoPaginateGenerator, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
38
39
  export { ARTIFACTS, type Baseline, type BaselineTolerance, GATE_CATEGORY, GATE_EXIT, type GateCategory, type GateExitCode, REPORT_SCHEMA_VERSION, type RegressionDelta, type RegressionReport, } from "./regression";
39
40
  export { batchProcess, batchRead, RateLimiter, streamEvaluation, } from "./streaming";
40
41
  export type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, BatchOptions, ClientConfig as AIEvalConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateLLMJudgeConfigParams, CreateWebhookParams, Evaluation as EvaluationData, EvaluationRun, EvaluationRunDetail, ExportOptions, GenericMetadata as AnnotationData, GetLLMJudgeAlignmentParams, GetUsageParams, ImportOptions, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeEvaluateResult, LLMJudgeResult as LLMJudgeData, Organization, RetryConfig, SnapshotData, Span as SpanData, StreamOptions, TestCase, TestResult, Trace as TraceData, TraceDetail, TracedResponse, UpdateAPIKeyParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery, } from "./types";