@evalgate/sdk 2.2.3 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +5 -0
  2. package/README.md +38 -1
  3. package/dist/assertions.d.ts +185 -5
  4. package/dist/assertions.js +496 -61
  5. package/dist/batch.js +4 -4
  6. package/dist/cache.d.ts +4 -0
  7. package/dist/cache.js +4 -0
  8. package/dist/cli/baseline.d.ts +14 -0
  9. package/dist/cli/baseline.js +43 -3
  10. package/dist/cli/check.d.ts +5 -2
  11. package/dist/cli/check.js +20 -12
  12. package/dist/cli/compare.d.ts +80 -0
  13. package/dist/cli/compare.js +266 -0
  14. package/dist/cli/index.js +244 -101
  15. package/dist/cli/regression-gate.js +23 -0
  16. package/dist/cli/run.js +22 -0
  17. package/dist/cli/start.d.ts +26 -0
  18. package/dist/cli/start.js +130 -0
  19. package/dist/cli/templates.d.ts +24 -0
  20. package/dist/cli/templates.js +314 -0
  21. package/dist/cli/traces.d.ts +109 -0
  22. package/dist/cli/traces.js +152 -0
  23. package/dist/cli/validate.d.ts +37 -0
  24. package/dist/cli/validate.js +252 -0
  25. package/dist/cli/watch.d.ts +19 -0
  26. package/dist/cli/watch.js +175 -0
  27. package/dist/client.js +6 -13
  28. package/dist/constants.d.ts +2 -0
  29. package/dist/constants.js +5 -0
  30. package/dist/index.d.ts +7 -6
  31. package/dist/index.js +22 -6
  32. package/dist/integrations/openai.js +83 -60
  33. package/dist/logger.d.ts +3 -1
  34. package/dist/logger.js +2 -1
  35. package/dist/otel.d.ts +130 -0
  36. package/dist/otel.js +309 -0
  37. package/dist/runtime/eval.d.ts +14 -4
  38. package/dist/runtime/eval.js +127 -2
  39. package/dist/runtime/registry.d.ts +4 -2
  40. package/dist/runtime/registry.js +11 -3
  41. package/dist/runtime/run-report.d.ts +1 -1
  42. package/dist/runtime/run-report.js +7 -4
  43. package/dist/runtime/types.d.ts +38 -0
  44. package/dist/testing.d.ts +8 -0
  45. package/dist/testing.js +45 -10
  46. package/dist/version.d.ts +1 -1
  47. package/dist/version.js +1 -1
  48. package/dist/workflows.d.ts +2 -0
  49. package/dist/workflows.js +184 -102
  50. package/package.json +8 -1
@@ -0,0 +1,252 @@
1
+ "use strict";
2
+ /**
3
+ * evalgate validate — static validation of spec files without execution
4
+ *
5
+ * The equivalent of `tsc --noEmit` for eval specs. Catches:
6
+ * - Missing or malformed defineEval calls
7
+ * - Executor functions that don't return EvalResult shape
8
+ * - Invalid spec names (characters, length)
9
+ * - Empty spec files
10
+ * - Missing required fields in config-form defineEval
11
+ *
12
+ * Usage:
13
+ * evalgate validate
14
+ * evalgate validate --format json
15
+ */
16
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
17
+ if (k2 === undefined) k2 = k;
18
+ var desc = Object.getOwnPropertyDescriptor(m, k);
19
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
20
+ desc = { enumerable: true, get: function() { return m[k]; } };
21
+ }
22
+ Object.defineProperty(o, k2, desc);
23
+ }) : (function(o, m, k, k2) {
24
+ if (k2 === undefined) k2 = k;
25
+ o[k2] = m[k];
26
+ }));
27
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
28
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
29
+ }) : function(o, v) {
30
+ o["default"] = v;
31
+ });
32
+ var __importStar = (this && this.__importStar) || (function () {
33
+ var ownKeys = function(o) {
34
+ ownKeys = Object.getOwnPropertyNames || function (o) {
35
+ var ar = [];
36
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
37
+ return ar;
38
+ };
39
+ return ownKeys(o);
40
+ };
41
+ return function (mod) {
42
+ if (mod && mod.__esModule) return mod;
43
+ var result = {};
44
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
45
+ __setModuleDefault(result, mod);
46
+ return result;
47
+ };
48
+ })();
49
+ Object.defineProperty(exports, "__esModule", { value: true });
50
+ exports.runValidate = runValidate;
51
+ const fs = __importStar(require("node:fs"));
52
+ const path = __importStar(require("node:path"));
53
+ const execution_mode_1 = require("../runtime/execution-mode");
54
+ /**
55
+ * Name validation regex — must match the runtime's validateSpecName
56
+ */
57
+ const VALID_NAME_RE = /^[a-zA-Z0-9\s\-_]+$/;
58
+ const MAX_NAME_LENGTH = 100;
59
+ /**
60
+ * Static patterns we look for in spec files
61
+ */
62
+ const DEFINE_EVAL_RE = /defineEval\s*[.(]/g;
63
+ const DEFINE_EVAL_NAME_RE = /defineEval\s*\(\s*["'`]([^"'`]*)["'`]/g;
64
+ const DEFINE_EVAL_CONFIG_RE = /defineEval\s*\(\s*\{/g;
65
+ const DEFINE_EVAL_SKIP_RE = /defineEval\.skip\s*\(/g;
66
+ const DEFINE_EVAL_ONLY_RE = /defineEval\.only\s*\(/g;
67
+ const DEFINE_EVAL_FROM_DATASET_RE = /defineEval\.fromDataset\s*\(/g;
68
+ const EXECUTOR_RETURN_RE = /return\s*\{[^}]*pass\s*:/g;
69
+ const CREATE_RESULT_RE = /createResult\s*\(/g;
70
+ function analyzeFile(filePath) {
71
+ const issues = [];
72
+ const relPath = path.relative(process.cwd(), filePath);
73
+ let content;
74
+ try {
75
+ content = fs.readFileSync(filePath, "utf8");
76
+ }
77
+ catch {
78
+ issues.push({
79
+ severity: "error",
80
+ file: relPath,
81
+ code: "FILE_UNREADABLE",
82
+ message: `Cannot read file: ${relPath}`,
83
+ });
84
+ return issues;
85
+ }
86
+ if (content.trim().length === 0) {
87
+ issues.push({
88
+ severity: "error",
89
+ file: relPath,
90
+ code: "EMPTY_FILE",
91
+ message: "Spec file is empty",
92
+ });
93
+ return issues;
94
+ }
95
+ const lines = content.split("\n");
96
+ // Check for defineEval calls
97
+ const defineEvalMatches = content.match(DEFINE_EVAL_RE);
98
+ const skipMatches = content.match(DEFINE_EVAL_SKIP_RE);
99
+ const onlyMatches = content.match(DEFINE_EVAL_ONLY_RE);
100
+ const fromDatasetMatches = content.match(DEFINE_EVAL_FROM_DATASET_RE);
101
+ const totalCalls = (defineEvalMatches?.length ?? 0) +
102
+ (skipMatches?.length ?? 0) +
103
+ (onlyMatches?.length ?? 0) +
104
+ (fromDatasetMatches?.length ?? 0);
105
+ if (totalCalls === 0) {
106
+ issues.push({
107
+ severity: "warn",
108
+ file: relPath,
109
+ code: "NO_DEFINE_EVAL",
110
+ message: "No defineEval() calls found. File may not define any specs.",
111
+ });
112
+ }
113
+ // Validate spec names
114
+ const nameMatches = [...content.matchAll(DEFINE_EVAL_NAME_RE)];
115
+ for (const match of nameMatches) {
116
+ const name = match[1];
117
+ const matchIndex = match.index ?? 0;
118
+ const lineNum = content.substring(0, matchIndex).split("\n").length;
119
+ if (!name || name.trim() === "") {
120
+ issues.push({
121
+ severity: "error",
122
+ file: relPath,
123
+ line: lineNum,
124
+ code: "EMPTY_NAME",
125
+ message: "Spec name is empty",
126
+ });
127
+ continue;
128
+ }
129
+ if (name.length > MAX_NAME_LENGTH) {
130
+ issues.push({
131
+ severity: "error",
132
+ file: relPath,
133
+ line: lineNum,
134
+ code: "NAME_TOO_LONG",
135
+ message: `Spec name "${name.slice(0, 30)}..." exceeds ${MAX_NAME_LENGTH} characters`,
136
+ });
137
+ }
138
+ if (!VALID_NAME_RE.test(name)) {
139
+ issues.push({
140
+ severity: "error",
141
+ file: relPath,
142
+ line: lineNum,
143
+ code: "INVALID_NAME",
144
+ message: `Spec name "${name}" contains invalid characters (only letters, numbers, spaces, hyphens, underscores allowed)`,
145
+ });
146
+ }
147
+ }
148
+ // Check config-form defineEval calls have required fields
149
+ const configMatches = [...content.matchAll(DEFINE_EVAL_CONFIG_RE)];
150
+ for (const match of configMatches) {
151
+ const matchIndex = match.index ?? 0;
152
+ const lineNum = content.substring(0, matchIndex).split("\n").length;
153
+ // Simple heuristic: look for 'name:' and 'executor:' in the next ~20 lines
154
+ const contextLines = lines.slice(lineNum - 1, lineNum + 19).join("\n");
155
+ if (!contextLines.includes("name:") && !contextLines.includes("name :")) {
156
+ issues.push({
157
+ severity: "error",
158
+ file: relPath,
159
+ line: lineNum,
160
+ code: "MISSING_NAME",
161
+ message: "Config-form defineEval() missing required 'name' field",
162
+ });
163
+ }
164
+ if (!contextLines.includes("executor:") &&
165
+ !contextLines.includes("executor :")) {
166
+ issues.push({
167
+ severity: "error",
168
+ file: relPath,
169
+ line: lineNum,
170
+ code: "MISSING_EXECUTOR",
171
+ message: "Config-form defineEval() missing required 'executor' field",
172
+ });
173
+ }
174
+ }
175
+ // Check that executors return EvalResult shape
176
+ const hasCreateResult = CREATE_RESULT_RE.test(content);
177
+ const hasReturnPass = EXECUTOR_RETURN_RE.test(content);
178
+ if (totalCalls > 0 && !hasCreateResult && !hasReturnPass) {
179
+ issues.push({
180
+ severity: "warn",
181
+ file: relPath,
182
+ code: "NO_RESULT_SHAPE",
183
+ message: "No createResult() or return { pass: ... } found. Executors may not return the required EvalResult shape.",
184
+ });
185
+ }
186
+ return issues;
187
+ }
188
+ async function runValidate(args = []) {
189
+ const formatIndex = args.indexOf("--format");
190
+ const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
191
+ const projectRoot = process.cwd();
192
+ const executionMode = await (0, execution_mode_1.getExecutionMode)(projectRoot);
193
+ const specFiles = executionMode.specFiles;
194
+ if (specFiles.length === 0) {
195
+ const result = {
196
+ filesScanned: 0,
197
+ filesWithIssues: 0,
198
+ issues: [],
199
+ passed: true,
200
+ };
201
+ if (format === "json") {
202
+ console.log(JSON.stringify(result, null, 2));
203
+ }
204
+ else {
205
+ console.log("\n✨ No spec files found. Nothing to validate.");
206
+ console.log("💡 Create files with defineEval() calls to get started.");
207
+ }
208
+ return result;
209
+ }
210
+ const allIssues = [];
211
+ const filesWithIssues = new Set();
212
+ for (const file of specFiles) {
213
+ const issues = analyzeFile(file);
214
+ for (const issue of issues) {
215
+ allIssues.push(issue);
216
+ filesWithIssues.add(issue.file);
217
+ }
218
+ }
219
+ const errors = allIssues.filter((i) => i.severity === "error");
220
+ const warnings = allIssues.filter((i) => i.severity === "warn");
221
+ const passed = errors.length === 0;
222
+ const result = {
223
+ filesScanned: specFiles.length,
224
+ filesWithIssues: filesWithIssues.size,
225
+ issues: allIssues,
226
+ passed,
227
+ };
228
+ if (format === "json") {
229
+ console.log(JSON.stringify(result, null, 2));
230
+ }
231
+ else {
232
+ console.log(`\n🔍 Validated ${specFiles.length} spec file${specFiles.length === 1 ? "" : "s"}`);
233
+ if (allIssues.length === 0) {
234
+ console.log("✅ All spec files are valid.\n");
235
+ }
236
+ else {
237
+ for (const issue of allIssues) {
238
+ const loc = issue.line ? `:${issue.line}` : "";
239
+ const icon = issue.severity === "error" ? "❌" : "⚠️";
240
+ console.log(` ${icon} ${issue.file}${loc} [${issue.code}] ${issue.message}`);
241
+ }
242
+ console.log(`\n${errors.length} error${errors.length === 1 ? "" : "s"}, ${warnings.length} warning${warnings.length === 1 ? "" : "s"}`);
243
+ if (passed) {
244
+ console.log("✅ Validation passed (warnings only).\n");
245
+ }
246
+ else {
247
+ console.log("❌ Validation failed.\n");
248
+ }
249
+ }
250
+ }
251
+ return result;
252
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Watch mode for evalgate run
3
+ *
4
+ * Re-executes evaluation specs when source files change.
5
+ * Uses Node.js fs.watch with debouncing to avoid rapid re-runs.
6
+ */
7
+ import type { RunOptions } from "./run";
8
+ export interface WatchOptions extends RunOptions {
9
+ /** Debounce interval in milliseconds (default: 300) */
10
+ debounceMs?: number;
11
+ /** Additional directories to watch beyond spec files */
12
+ extraWatchDirs?: string[];
13
+ /** Clear terminal between runs */
14
+ clearScreen?: boolean;
15
+ }
16
+ /**
17
+ * Start watch mode — runs evaluations and re-runs on file changes
18
+ */
19
+ export declare function runWatch(options: WatchOptions, projectRoot?: string): Promise<void>;
@@ -0,0 +1,175 @@
1
+ "use strict";
2
+ /**
3
+ * Watch mode for evalgate run
4
+ *
5
+ * Re-executes evaluation specs when source files change.
6
+ * Uses Node.js fs.watch with debouncing to avoid rapid re-runs.
7
+ */
8
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
9
+ if (k2 === undefined) k2 = k;
10
+ var desc = Object.getOwnPropertyDescriptor(m, k);
11
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
12
+ desc = { enumerable: true, get: function() { return m[k]; } };
13
+ }
14
+ Object.defineProperty(o, k2, desc);
15
+ }) : (function(o, m, k, k2) {
16
+ if (k2 === undefined) k2 = k;
17
+ o[k2] = m[k];
18
+ }));
19
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
20
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
21
+ }) : function(o, v) {
22
+ o["default"] = v;
23
+ });
24
+ var __importStar = (this && this.__importStar) || (function () {
25
+ var ownKeys = function(o) {
26
+ ownKeys = Object.getOwnPropertyNames || function (o) {
27
+ var ar = [];
28
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
29
+ return ar;
30
+ };
31
+ return ownKeys(o);
32
+ };
33
+ return function (mod) {
34
+ if (mod && mod.__esModule) return mod;
35
+ var result = {};
36
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
37
+ __setModuleDefault(result, mod);
38
+ return result;
39
+ };
40
+ })();
41
+ Object.defineProperty(exports, "__esModule", { value: true });
42
+ exports.runWatch = runWatch;
43
+ const fs = __importStar(require("node:fs"));
44
+ const path = __importStar(require("node:path"));
45
+ const run_1 = require("./run");
46
+ /**
47
+ * Start watch mode — runs evaluations and re-runs on file changes
48
+ */
49
+ async function runWatch(options, projectRoot = process.cwd()) {
50
+ const debounceMs = options.debounceMs ?? 300;
51
+ const clearScreen = options.clearScreen ?? true;
52
+ // Directories to watch
53
+ const watchDirs = new Set();
54
+ // Always watch the eval/ directory if it exists
55
+ const evalDir = path.join(projectRoot, "eval");
56
+ if (fs.existsSync(evalDir))
57
+ watchDirs.add(evalDir);
58
+ // Watch evals/ directory too
59
+ const evalsDir = path.join(projectRoot, "evals");
60
+ if (fs.existsSync(evalsDir))
61
+ watchDirs.add(evalsDir);
62
+ // Watch src/ for code changes that may affect evals
63
+ const srcDir = path.join(projectRoot, "src");
64
+ if (fs.existsSync(srcDir))
65
+ watchDirs.add(srcDir);
66
+ // Add extra watch dirs
67
+ if (options.extraWatchDirs) {
68
+ for (const dir of options.extraWatchDirs) {
69
+ const resolved = path.isAbsolute(dir) ? dir : path.join(projectRoot, dir);
70
+ if (fs.existsSync(resolved))
71
+ watchDirs.add(resolved);
72
+ }
73
+ }
74
+ if (watchDirs.size === 0) {
75
+ console.error("❌ No directories to watch. Create eval/, evals/, or src/ directory.");
76
+ process.exit(1);
77
+ }
78
+ console.log("👁️ Watch mode enabled");
79
+ console.log(` Watching: ${[...watchDirs].map((d) => path.relative(projectRoot, d) || ".").join(", ")}`);
80
+ console.log(` Debounce: ${debounceMs}ms`);
81
+ console.log(" Press Ctrl+C to stop\n");
82
+ // Initial run
83
+ await executeRun(options, projectRoot, clearScreen, false);
84
+ // Set up watchers with debouncing
85
+ let debounceTimer = null;
86
+ let isRunning = false;
87
+ const triggerRun = () => {
88
+ if (debounceTimer)
89
+ clearTimeout(debounceTimer);
90
+ debounceTimer = setTimeout(async () => {
91
+ if (isRunning)
92
+ return;
93
+ isRunning = true;
94
+ try {
95
+ await executeRun(options, projectRoot, clearScreen, true);
96
+ }
97
+ finally {
98
+ isRunning = false;
99
+ }
100
+ }, debounceMs);
101
+ };
102
+ const watchers = [];
103
+ for (const dir of watchDirs) {
104
+ try {
105
+ const watcher = fs.watch(dir, { recursive: true }, (eventType, filename) => {
106
+ if (!filename)
107
+ return;
108
+ // Skip hidden files and node_modules
109
+ if (filename.startsWith(".") || filename.includes("node_modules"))
110
+ return;
111
+ // Only watch relevant file types
112
+ const ext = path.extname(filename).toLowerCase();
113
+ if ([".ts", ".tsx", ".js", ".jsx", ".json", ".jsonl", ".csv"].includes(ext)) {
114
+ console.log(`\n🔄 Change detected: ${filename} (${eventType})`);
115
+ triggerRun();
116
+ }
117
+ });
118
+ watchers.push(watcher);
119
+ }
120
+ catch (err) {
121
+ console.warn(`⚠️ Could not watch ${path.relative(projectRoot, dir)}: ${err instanceof Error ? err.message : String(err)}`);
122
+ }
123
+ }
124
+ // Handle graceful shutdown
125
+ const cleanup = () => {
126
+ console.log("\n\n👋 Watch mode stopped.");
127
+ for (const watcher of watchers) {
128
+ watcher.close();
129
+ }
130
+ if (debounceTimer)
131
+ clearTimeout(debounceTimer);
132
+ process.exit(0);
133
+ };
134
+ process.on("SIGINT", cleanup);
135
+ process.on("SIGTERM", cleanup);
136
+ // Keep process alive
137
+ await new Promise(() => {
138
+ // Never resolves — watch runs until interrupted
139
+ });
140
+ }
141
+ /**
142
+ * Execute a single run and print results (without process.exit)
143
+ */
144
+ async function executeRun(options, projectRoot, clearScreen, isRerun) {
145
+ if (clearScreen && isRerun) {
146
+ // Clear screen using ANSI escape
147
+ process.stdout.write("\x1B[2J\x1B[0f");
148
+ }
149
+ const timestamp = new Date().toLocaleTimeString();
150
+ console.log(`${isRerun ? "🔄 Re-running" : "▶️ Running"} evaluations... (${timestamp})`);
151
+ try {
152
+ const result = await (0, run_1.runEvaluations)({
153
+ specIds: options.specIds,
154
+ impactedOnly: options.impactedOnly,
155
+ baseBranch: options.baseBranch,
156
+ format: options.format,
157
+ writeResults: options.writeResults,
158
+ }, projectRoot);
159
+ if (options.format === "json") {
160
+ (0, run_1.printJsonResults)(result);
161
+ }
162
+ else {
163
+ (0, run_1.printHumanResults)(result);
164
+ }
165
+ // Print watch-specific summary
166
+ const statusIcon = result.summary.failed > 0 ? "❌" : "✅";
167
+ console.log(`\n${statusIcon} ${result.summary.passed}/${result.results.length} passed | Waiting for changes...`);
168
+ return result;
169
+ }
170
+ catch (error) {
171
+ console.error("❌ Run failed:", error instanceof Error ? error.message : String(error));
172
+ console.log("\n⏳ Waiting for changes...");
173
+ return null;
174
+ }
175
+ }
package/dist/client.js CHANGED
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.AIEvalClient = void 0;
4
4
  const batch_1 = require("./batch");
5
5
  const cache_1 = require("./cache");
6
+ const constants_1 = require("./constants");
6
7
  const context_1 = require("./context");
7
8
  const errors_1 = require("./errors");
8
9
  const logger_1 = require("./logger");
@@ -72,7 +73,7 @@ class AIEvalClient {
72
73
  this.baseUrl =
73
74
  config.baseUrl ||
74
75
  getEnvVar("EVALGATE_BASE_URL", "EVALAI_BASE_URL") ||
75
- (isBrowser ? "" : "https://api.evalgate.com");
76
+ (isBrowser ? "" : constants_1.DEFAULT_BASE_URL);
76
77
  this.timeout = config.timeout || 30000;
77
78
  // Tier 4.17: Debug mode with request logging
78
79
  const logLevel = config.logLevel || (config.debug ? "debug" : "info");
@@ -100,7 +101,7 @@ class AIEvalClient {
100
101
  const MAX_CONCURRENCY = 5;
101
102
  this.batcher = new batch_1.RequestBatcher(async (requests) => {
102
103
  const results = [];
103
- const executing = [];
104
+ const executing = new Set();
104
105
  for (const req of requests) {
105
106
  const task = (async () => {
106
107
  try {
@@ -121,18 +122,10 @@ class AIEvalClient {
121
122
  });
122
123
  }
123
124
  })();
124
- executing.push(task);
125
- if (executing.length >= MAX_CONCURRENCY) {
125
+ const tracked = task.finally(() => executing.delete(tracked));
126
+ executing.add(tracked);
127
+ if (executing.size >= MAX_CONCURRENCY) {
126
128
  await Promise.race(executing);
127
- // Remove settled promises
128
- for (let i = executing.length - 1; i >= 0; i--) {
129
- const settled = await Promise.race([
130
- executing[i].then(() => true),
131
- Promise.resolve(false),
132
- ]);
133
- if (settled)
134
- executing.splice(i, 1);
135
- }
136
129
  }
137
130
  }
138
131
  await Promise.allSettled(executing);
@@ -0,0 +1,2 @@
1
+ /** Default API base URL for the EvalGate platform. */
2
+ export declare const DEFAULT_BASE_URL = "https://api.evalgate.com";
@@ -0,0 +1,5 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DEFAULT_BASE_URL = void 0;
4
+ /** Default API base URL for the EvalGate platform. */
5
+ exports.DEFAULT_BASE_URL = "https://api.evalgate.com";
package/dist/index.d.ts CHANGED
@@ -7,14 +7,14 @@
7
7
  * @packageDocumentation
8
8
  */
9
9
  export { AIEvalClient } from "./client";
10
- import { AuthenticationError, EvalGateError, NetworkError, RateLimitError, SDKError } from "./errors";
11
- export { EvalGateError, RateLimitError, AuthenticationError, SDKError as ValidationError, // Using SDKError as ValidationError for backward compatibility
12
- NetworkError, };
13
- export { type AssertionLLMConfig, configureAssertions, containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, containsLanguageAsync, expect, followsInstructions, getAssertionConfig, hasFactualAccuracy, hasFactualAccuracyAsync, hasLength, hasNoHallucinations, hasNoHallucinationsAsync, hasNoToxicity, hasNoToxicityAsync, hasPII, hasReadabilityScore, hasSentiment, hasSentimentAsync, hasValidCodeSyntax, hasValidCodeSyntaxAsync, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
10
+ import { AuthenticationError, EvalGateError, NetworkError, RateLimitError, ValidationError } from "./errors";
11
+ export { EvalGateError, RateLimitError, AuthenticationError, ValidationError, NetworkError, };
12
+ export { type AssertionLLMConfig, configureAssertions, containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, containsLanguageAsync, expect, followsInstructions, getAssertionConfig, hasConsistency, hasConsistencyAsync, hasFactualAccuracy, hasFactualAccuracyAsync, hasLength, hasNoHallucinations, hasNoHallucinationsAsync, hasNoToxicity, hasNoToxicityAsync, hasPII, hasReadabilityScore, hasSentiment, hasSentimentAsync, hasSentimentWithScore, hasValidCodeSyntax, hasValidCodeSyntaxAsync, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinDuration, respondedWithinTime, respondedWithinTimeSince, type SentimentAsyncResult, similarTo, toSemanticallyContain, withinRange, } from "./assertions";
13
+ export { EvalGateError as SDKError } from "./errors";
14
14
  import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
15
15
  export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
16
16
  export { cloneContext, mergeContexts, validateContext, } from "./runtime/context";
17
- export { createContext as createEvalContext, createResult, defineEval, defineSuite, evalai, } from "./runtime/eval";
17
+ export { createContext as createEvalContext, createResult, defineEval, defineSuite, evalai, getFilteredSpecs, } from "./runtime/eval";
18
18
  export { createLocalExecutor, defaultLocalExecutor, } from "./runtime/executor";
19
19
  export { createEvalRuntime, disposeActiveRuntime, getActiveRuntime, setActiveRuntime, } from "./runtime/registry";
20
20
  export type { CloudExecutor, DefineEvalFunction, EvalContext, EvalExecutor, EvalExecutorInterface, EvalOptions, EvalResult, EvalRuntime, EvalSpec, ExecutorCapabilities, LocalExecutor, SpecConfig, SpecOptions, WorkerExecutor, } from "./runtime/types";
@@ -27,13 +27,14 @@ import { exportData, importData } from "./export";
27
27
  export { exportData, importData };
28
28
  export type { ExportFormat, ExportFormat as ExportType };
29
29
  export { RequestBatcher } from "./batch";
30
- export { CacheTTL, RequestCache } from "./cache";
30
+ export { CacheTTL } from "./cache";
31
31
  export { type CheckArgs, EXIT, parseArgs, runCheck } from "./cli/check";
32
32
  export { traceAnthropic } from "./integrations/anthropic";
33
33
  export { traceOpenAI } from "./integrations/openai";
34
34
  export { type OpenAIChatEvalCase, type OpenAIChatEvalOptions, type OpenAIChatEvalResult, openAIChatEval, } from "./integrations/openai-eval";
35
35
  export { Logger } from "./logger";
36
36
  export { extendExpectWithToPassGate } from "./matchers";
37
+ export { createOTelExporter, type OTelAttribute, type OTelEvent, OTelExporter, type OTelExporterOptions, type OTelExportPayload, type OTelSpan, } from "./otel";
37
38
  export { autoPaginate, autoPaginateGenerator, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
38
39
  export { ARTIFACTS, type Baseline, type BaselineTolerance, GATE_CATEGORY, GATE_EXIT, type GateCategory, type GateExitCode, REPORT_SCHEMA_VERSION, type RegressionDelta, type RegressionReport, } from "./regression";
39
40
  export { batchProcess, batchRead, RateLimiter, streamEvaluation, } from "./streaming";
package/dist/index.js CHANGED
@@ -8,8 +8,9 @@
8
8
  * @packageDocumentation
9
9
  */
10
10
  Object.defineProperty(exports, "__esModule", { value: true });
11
- exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntaxAsync = exports.hasValidCodeSyntax = exports.hasSentimentAsync = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicityAsync = exports.hasNoToxicity = exports.hasNoHallucinationsAsync = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracyAsync = exports.hasFactualAccuracy = exports.getAssertionConfig = exports.followsInstructions = exports.expect = exports.containsLanguageAsync = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.configureAssertions = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
12
- exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginateGenerator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.saveSnapshot = exports.compareSnapshots = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = void 0;
11
+ exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.SDKError = exports.withinRange = exports.toSemanticallyContain = exports.similarTo = exports.respondedWithinTimeSince = exports.respondedWithinTime = exports.respondedWithinDuration = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntaxAsync = exports.hasValidCodeSyntax = exports.hasSentimentWithScore = exports.hasSentimentAsync = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicityAsync = exports.hasNoToxicity = exports.hasNoHallucinationsAsync = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracyAsync = exports.hasFactualAccuracy = exports.hasConsistencyAsync = exports.hasConsistency = exports.getAssertionConfig = exports.followsInstructions = exports.expect = exports.containsLanguageAsync = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.configureAssertions = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
12
+ exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginateGenerator = exports.autoPaginate = exports.OTelExporter = exports.createOTelExporter = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.saveSnapshot = exports.compareSnapshots = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.getFilteredSpecs = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = void 0;
13
+ exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = void 0;
13
14
  // Main SDK exports
14
15
  var client_1 = require("./client");
15
16
  Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
@@ -19,7 +20,7 @@ Object.defineProperty(exports, "AuthenticationError", { enumerable: true, get: f
19
20
  Object.defineProperty(exports, "EvalGateError", { enumerable: true, get: function () { return errors_1.EvalGateError; } });
20
21
  Object.defineProperty(exports, "NetworkError", { enumerable: true, get: function () { return errors_1.NetworkError; } });
21
22
  Object.defineProperty(exports, "RateLimitError", { enumerable: true, get: function () { return errors_1.RateLimitError; } });
22
- Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.SDKError; } });
23
+ Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.ValidationError; } });
23
24
  // Enhanced assertions (Tier 1.3)
24
25
  var assertions_1 = require("./assertions");
25
26
  // LLM config
@@ -33,6 +34,8 @@ Object.defineProperty(exports, "containsLanguageAsync", { enumerable: true, get:
33
34
  Object.defineProperty(exports, "expect", { enumerable: true, get: function () { return assertions_1.expect; } });
34
35
  Object.defineProperty(exports, "followsInstructions", { enumerable: true, get: function () { return assertions_1.followsInstructions; } });
35
36
  Object.defineProperty(exports, "getAssertionConfig", { enumerable: true, get: function () { return assertions_1.getAssertionConfig; } });
37
+ Object.defineProperty(exports, "hasConsistency", { enumerable: true, get: function () { return assertions_1.hasConsistency; } });
38
+ Object.defineProperty(exports, "hasConsistencyAsync", { enumerable: true, get: function () { return assertions_1.hasConsistencyAsync; } });
36
39
  Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracy; } });
37
40
  Object.defineProperty(exports, "hasFactualAccuracyAsync", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracyAsync; } });
38
41
  Object.defineProperty(exports, "hasLength", { enumerable: true, get: function () { return assertions_1.hasLength; } });
@@ -44,6 +47,7 @@ Object.defineProperty(exports, "hasPII", { enumerable: true, get: function () {
44
47
  Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
45
48
  Object.defineProperty(exports, "hasSentiment", { enumerable: true, get: function () { return assertions_1.hasSentiment; } });
46
49
  Object.defineProperty(exports, "hasSentimentAsync", { enumerable: true, get: function () { return assertions_1.hasSentimentAsync; } });
50
+ Object.defineProperty(exports, "hasSentimentWithScore", { enumerable: true, get: function () { return assertions_1.hasSentimentWithScore; } });
47
51
  Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
48
52
  Object.defineProperty(exports, "hasValidCodeSyntaxAsync", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntaxAsync; } });
49
53
  Object.defineProperty(exports, "isValidEmail", { enumerable: true, get: function () { return assertions_1.isValidEmail; } });
@@ -51,9 +55,15 @@ Object.defineProperty(exports, "isValidURL", { enumerable: true, get: function (
51
55
  Object.defineProperty(exports, "matchesPattern", { enumerable: true, get: function () { return assertions_1.matchesPattern; } });
52
56
  Object.defineProperty(exports, "matchesSchema", { enumerable: true, get: function () { return assertions_1.matchesSchema; } });
53
57
  Object.defineProperty(exports, "notContainsPII", { enumerable: true, get: function () { return assertions_1.notContainsPII; } });
58
+ Object.defineProperty(exports, "respondedWithinDuration", { enumerable: true, get: function () { return assertions_1.respondedWithinDuration; } });
54
59
  Object.defineProperty(exports, "respondedWithinTime", { enumerable: true, get: function () { return assertions_1.respondedWithinTime; } });
60
+ Object.defineProperty(exports, "respondedWithinTimeSince", { enumerable: true, get: function () { return assertions_1.respondedWithinTimeSince; } });
55
61
  Object.defineProperty(exports, "similarTo", { enumerable: true, get: function () { return assertions_1.similarTo; } });
62
+ Object.defineProperty(exports, "toSemanticallyContain", { enumerable: true, get: function () { return assertions_1.toSemanticallyContain; } });
56
63
  Object.defineProperty(exports, "withinRange", { enumerable: true, get: function () { return assertions_1.withinRange; } });
64
+ // Legacy backward compat — SDKError is the old name for EvalGateError
65
+ var errors_2 = require("./errors");
66
+ Object.defineProperty(exports, "SDKError", { enumerable: true, get: function () { return errors_2.EvalGateError; } });
57
67
  // Context propagation (Tier 2.9)
58
68
  const context_1 = require("./context");
59
69
  Object.defineProperty(exports, "createContext", { enumerable: true, get: function () { return context_1.createContext; } });
@@ -71,6 +81,7 @@ Object.defineProperty(exports, "createResult", { enumerable: true, get: function
71
81
  Object.defineProperty(exports, "defineEval", { enumerable: true, get: function () { return eval_1.defineEval; } });
72
82
  Object.defineProperty(exports, "defineSuite", { enumerable: true, get: function () { return eval_1.defineSuite; } });
73
83
  Object.defineProperty(exports, "evalai", { enumerable: true, get: function () { return eval_1.evalai; } });
84
+ Object.defineProperty(exports, "getFilteredSpecs", { enumerable: true, get: function () { return eval_1.getFilteredSpecs; } });
74
85
  var executor_1 = require("./runtime/executor");
75
86
  Object.defineProperty(exports, "createLocalExecutor", { enumerable: true, get: function () { return executor_1.createLocalExecutor; } });
76
87
  Object.defineProperty(exports, "defaultLocalExecutor", { enumerable: true, get: function () { return executor_1.defaultLocalExecutor; } });
@@ -104,11 +115,12 @@ Object.defineProperty(exports, "importData", { enumerable: true, get: function (
104
115
  var batch_1 = require("./batch");
105
116
  Object.defineProperty(exports, "RequestBatcher", { enumerable: true, get: function () { return batch_1.RequestBatcher; } });
106
117
  // Performance optimization utilities (v1.3.0)
107
- // Note: RequestCache and CacheTTL are for advanced users only
108
- // Most users don't need these - caching is automatic
118
+ // Note: CacheTTL is for advanced users only
119
+ // Most users don't need this - caching is automatic
120
+ // RequestCache is intentionally NOT exported — it's an internal HTTP cache.
121
+ // Use CacheTTL to configure cache durations via client options.
109
122
  var cache_1 = require("./cache");
110
123
  Object.defineProperty(exports, "CacheTTL", { enumerable: true, get: function () { return cache_1.CacheTTL; } });
111
- Object.defineProperty(exports, "RequestCache", { enumerable: true, get: function () { return cache_1.RequestCache; } });
112
124
  // CLI (programmatic use)
113
125
  var check_1 = require("./cli/check");
114
126
  Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return check_1.EXIT; } });
@@ -128,6 +140,10 @@ Object.defineProperty(exports, "Logger", { enumerable: true, get: function () {
128
140
  // Vitest matcher: expect(await openAIChatEval(...)).toPassGate()
129
141
  var matchers_1 = require("./matchers");
130
142
  Object.defineProperty(exports, "extendExpectWithToPassGate", { enumerable: true, get: function () { return matchers_1.extendExpectWithToPassGate; } });
143
+ // OpenTelemetry export
144
+ var otel_1 = require("./otel");
145
+ Object.defineProperty(exports, "createOTelExporter", { enumerable: true, get: function () { return otel_1.createOTelExporter; } });
146
+ Object.defineProperty(exports, "OTelExporter", { enumerable: true, get: function () { return otel_1.OTelExporter; } });
131
147
  var pagination_1 = require("./pagination");
132
148
  Object.defineProperty(exports, "autoPaginate", { enumerable: true, get: function () { return pagination_1.autoPaginate; } });
133
149
  Object.defineProperty(exports, "autoPaginateGenerator", { enumerable: true, get: function () { return pagination_1.autoPaginateGenerator; } });