@evalgate/sdk 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +638 -0
  2. package/README.md +398 -0
  3. package/dist/assertions.d.ts +189 -0
  4. package/dist/assertions.js +662 -0
  5. package/dist/batch.d.ts +68 -0
  6. package/dist/batch.js +179 -0
  7. package/dist/cache.d.ts +65 -0
  8. package/dist/cache.js +131 -0
  9. package/dist/cli/api.d.ts +108 -0
  10. package/dist/cli/api.js +132 -0
  11. package/dist/cli/baseline.d.ts +10 -0
  12. package/dist/cli/baseline.js +172 -0
  13. package/dist/cli/check.d.ts +73 -0
  14. package/dist/cli/check.js +355 -0
  15. package/dist/cli/ci-context.d.ts +6 -0
  16. package/dist/cli/ci-context.js +112 -0
  17. package/dist/cli/ci.d.ts +45 -0
  18. package/dist/cli/ci.js +192 -0
  19. package/dist/cli/config.d.ts +30 -0
  20. package/dist/cli/config.js +230 -0
  21. package/dist/cli/constants.d.ts +15 -0
  22. package/dist/cli/constants.js +18 -0
  23. package/dist/cli/diff.d.ts +173 -0
  24. package/dist/cli/diff.js +685 -0
  25. package/dist/cli/discover.d.ts +84 -0
  26. package/dist/cli/discover.js +419 -0
  27. package/dist/cli/doctor.d.ts +88 -0
  28. package/dist/cli/doctor.js +675 -0
  29. package/dist/cli/env.d.ts +21 -0
  30. package/dist/cli/env.js +42 -0
  31. package/dist/cli/explain.d.ts +58 -0
  32. package/dist/cli/explain.js +561 -0
  33. package/dist/cli/formatters/github.d.ts +8 -0
  34. package/dist/cli/formatters/github.js +135 -0
  35. package/dist/cli/formatters/human.d.ts +6 -0
  36. package/dist/cli/formatters/human.js +110 -0
  37. package/dist/cli/formatters/json.d.ts +6 -0
  38. package/dist/cli/formatters/json.js +10 -0
  39. package/dist/cli/formatters/pr-comment.d.ts +12 -0
  40. package/dist/cli/formatters/pr-comment.js +103 -0
  41. package/dist/cli/formatters/types.d.ts +103 -0
  42. package/dist/cli/formatters/types.js +8 -0
  43. package/dist/cli/gate.d.ts +21 -0
  44. package/dist/cli/gate.js +179 -0
  45. package/dist/cli/impact-analysis.d.ts +63 -0
  46. package/dist/cli/impact-analysis.js +252 -0
  47. package/dist/cli/index.d.ts +9 -0
  48. package/dist/cli/index.js +332 -0
  49. package/dist/cli/init.d.ts +16 -0
  50. package/dist/cli/init.js +292 -0
  51. package/dist/cli/manifest.d.ts +103 -0
  52. package/dist/cli/manifest.js +282 -0
  53. package/dist/cli/migrate.d.ts +41 -0
  54. package/dist/cli/migrate.js +349 -0
  55. package/dist/cli/policy-packs.d.ts +23 -0
  56. package/dist/cli/policy-packs.js +89 -0
  57. package/dist/cli/print-config.d.ts +29 -0
  58. package/dist/cli/print-config.js +270 -0
  59. package/dist/cli/profiles.d.ts +28 -0
  60. package/dist/cli/profiles.js +30 -0
  61. package/dist/cli/reason-codes.d.ts +17 -0
  62. package/dist/cli/reason-codes.js +19 -0
  63. package/dist/cli/regression-gate.d.ts +15 -0
  64. package/dist/cli/regression-gate.js +341 -0
  65. package/dist/cli/render/snippet.d.ts +5 -0
  66. package/dist/cli/render/snippet.js +15 -0
  67. package/dist/cli/render/sort.d.ts +10 -0
  68. package/dist/cli/render/sort.js +24 -0
  69. package/dist/cli/report/build-check-report.d.ts +19 -0
  70. package/dist/cli/report/build-check-report.js +132 -0
  71. package/dist/cli/run.d.ts +101 -0
  72. package/dist/cli/run.js +395 -0
  73. package/dist/cli/share.d.ts +17 -0
  74. package/dist/cli/share.js +91 -0
  75. package/dist/cli/upgrade.d.ts +15 -0
  76. package/dist/cli/upgrade.js +492 -0
  77. package/dist/cli/workspace.d.ts +31 -0
  78. package/dist/cli/workspace.js +68 -0
  79. package/dist/client.d.ts +368 -0
  80. package/dist/client.js +893 -0
  81. package/dist/client.request.test.d.ts +1 -0
  82. package/dist/client.request.test.js +232 -0
  83. package/dist/context.d.ts +134 -0
  84. package/dist/context.js +215 -0
  85. package/dist/errors.d.ts +82 -0
  86. package/dist/errors.js +298 -0
  87. package/dist/export.d.ts +195 -0
  88. package/dist/export.js +344 -0
  89. package/dist/index.d.ts +44 -0
  90. package/dist/index.js +153 -0
  91. package/dist/integrations/anthropic.d.ts +91 -0
  92. package/dist/integrations/anthropic.js +163 -0
  93. package/dist/integrations/openai-eval.d.ts +57 -0
  94. package/dist/integrations/openai-eval.js +232 -0
  95. package/dist/integrations/openai.d.ts +92 -0
  96. package/dist/integrations/openai.js +160 -0
  97. package/dist/local.d.ts +39 -0
  98. package/dist/local.js +148 -0
  99. package/dist/logger.d.ts +128 -0
  100. package/dist/logger.js +227 -0
  101. package/dist/matchers/index.d.ts +1 -0
  102. package/dist/matchers/index.js +6 -0
  103. package/dist/matchers/to-pass-gate.d.ts +29 -0
  104. package/dist/matchers/to-pass-gate.js +35 -0
  105. package/dist/pagination.d.ts +74 -0
  106. package/dist/pagination.js +139 -0
  107. package/dist/regression.d.ts +100 -0
  108. package/dist/regression.js +44 -0
  109. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  110. package/dist/runtime/adapters/config-to-dsl.js +400 -0
  111. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  112. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  113. package/dist/runtime/context.d.ts +26 -0
  114. package/dist/runtime/context.js +74 -0
  115. package/dist/runtime/eval.d.ts +46 -0
  116. package/dist/runtime/eval.js +244 -0
  117. package/dist/runtime/execution-mode.d.ts +80 -0
  118. package/dist/runtime/execution-mode.js +357 -0
  119. package/dist/runtime/executor.d.ts +16 -0
  120. package/dist/runtime/executor.js +152 -0
  121. package/dist/runtime/registry.d.ts +78 -0
  122. package/dist/runtime/registry.js +403 -0
  123. package/dist/runtime/run-report.d.ts +200 -0
  124. package/dist/runtime/run-report.js +222 -0
  125. package/dist/runtime/types.d.ts +356 -0
  126. package/dist/runtime/types.js +76 -0
  127. package/dist/snapshot.d.ts +176 -0
  128. package/dist/snapshot.js +322 -0
  129. package/dist/streaming.d.ts +173 -0
  130. package/dist/streaming.js +268 -0
  131. package/dist/testing.d.ts +273 -0
  132. package/dist/testing.js +317 -0
  133. package/dist/types.d.ts +754 -0
  134. package/dist/types.js +54 -0
  135. package/dist/utils/input-hash.d.ts +8 -0
  136. package/dist/utils/input-hash.js +41 -0
  137. package/dist/version.d.ts +7 -0
  138. package/dist/version.js +10 -0
  139. package/dist/workflows.d.ts +389 -0
  140. package/dist/workflows.js +671 -0
  141. package/package.json +117 -0
package/dist/export.js ADDED
@@ -0,0 +1,344 @@
1
+ "use strict";
2
+ /**
3
+ * Data Export/Import System
4
+ * Tier 4.18: Platform migration and backup utilities
5
+ *
6
+ * @example
7
+ * ```typescript
8
+ * import { exportData, importData } from '@ai-eval-platform/sdk';
9
+ *
10
+ * // Export all data
11
+ * const data = await exportData(client, {
12
+ * format: 'json',
13
+ * includeTraces: true,
14
+ * includeEvaluations: true
15
+ * });
16
+ *
17
+ * // Save to file
18
+ * fs.writeFileSync('backup.json', JSON.stringify(data, null, 2));
19
+ *
20
+ * // Import from another platform
21
+ * await importFromLangSmith(client, langsmithData);
22
+ * ```
23
+ */
24
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
25
+ if (k2 === undefined) k2 = k;
26
+ var desc = Object.getOwnPropertyDescriptor(m, k);
27
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
28
+ desc = { enumerable: true, get: function() { return m[k]; } };
29
+ }
30
+ Object.defineProperty(o, k2, desc);
31
+ }) : (function(o, m, k, k2) {
32
+ if (k2 === undefined) k2 = k;
33
+ o[k2] = m[k];
34
+ }));
35
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
36
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
37
+ }) : function(o, v) {
38
+ o["default"] = v;
39
+ });
40
+ var __importStar = (this && this.__importStar) || (function () {
41
+ var ownKeys = function(o) {
42
+ ownKeys = Object.getOwnPropertyNames || function (o) {
43
+ var ar = [];
44
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
45
+ return ar;
46
+ };
47
+ return ownKeys(o);
48
+ };
49
+ return function (mod) {
50
+ if (mod && mod.__esModule) return mod;
51
+ var result = {};
52
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
53
+ __setModuleDefault(result, mod);
54
+ return result;
55
+ };
56
+ })();
57
+ Object.defineProperty(exports, "__esModule", { value: true });
58
+ exports.exportData = exportData;
59
+ exports.importData = importData;
60
+ exports.exportToFile = exportToFile;
61
+ exports.importFromFile = importFromFile;
62
+ exports.importFromLangSmith = importFromLangSmith;
63
+ exports.convertToCSV = convertToCSV;
64
+ /**
65
+ * Export data from the platform
66
+ *
67
+ * @example
68
+ * ```typescript
69
+ * const data = await exportData(client, {
70
+ * format: 'json',
71
+ * includeTraces: true,
72
+ * includeEvaluations: true,
73
+ * dateRange: { from: '2024-01-01', to: '2024-12-31' }
74
+ * });
75
+ *
76
+ * // Save to file
77
+ * fs.writeFileSync('backup.json', JSON.stringify(data, null, 2));
78
+ * ```
79
+ */
80
+ async function exportData(client, options) {
81
+ const exportData = {
82
+ metadata: {
83
+ exportedAt: new Date().toISOString(),
84
+ version: "1.0.0",
85
+ format: options.format,
86
+ organizationId: options.organizationId,
87
+ },
88
+ };
89
+ // Export traces
90
+ if (options.includeTraces) {
91
+ const traces = await client.traces.list({
92
+ organizationId: options.organizationId,
93
+ limit: options.limit,
94
+ });
95
+ exportData.traces = traces;
96
+ }
97
+ // Export evaluations
98
+ if (options.includeEvaluations) {
99
+ const evaluations = await client.evaluations.list({
100
+ organizationId: options.organizationId,
101
+ limit: options.limit,
102
+ });
103
+ exportData.evaluations = evaluations;
104
+ // Export test cases for each evaluation
105
+ if (options.includeTestCases) {
106
+ const allTestCases = [];
107
+ for (const evaluation of evaluations) {
108
+ const testCases = await client.evaluations.listTestCases(evaluation.id);
109
+ allTestCases.push(...testCases);
110
+ }
111
+ exportData.testCases = allTestCases;
112
+ }
113
+ // Export runs for each evaluation
114
+ if (options.includeRuns) {
115
+ const allRuns = [];
116
+ for (const evaluation of evaluations) {
117
+ const runs = await client.evaluations.listRuns(evaluation.id);
118
+ allRuns.push(...runs);
119
+ }
120
+ exportData.runs = allRuns;
121
+ }
122
+ }
123
+ return exportData;
124
+ }
125
+ /**
126
+ * Import data into the platform
127
+ *
128
+ * @example
129
+ * ```typescript
130
+ * const data = JSON.parse(fs.readFileSync('backup.json', 'utf-8'));
131
+ * const result = await importData(client, data, {
132
+ * organizationId: 123,
133
+ * skipDuplicates: true
134
+ * });
135
+ *
136
+ * console.log(`Imported ${result.summary.imported} items`);
137
+ * ```
138
+ */
139
+ async function importData(client, data, options) {
140
+ const result = {
141
+ summary: { total: 0, imported: 0, skipped: 0, failed: 0 },
142
+ details: {},
143
+ errors: [],
144
+ };
145
+ if (options.dryRun) {
146
+ // Count what would be imported
147
+ if (data.traces)
148
+ result.summary.total += data.traces.length;
149
+ if (data.evaluations)
150
+ result.summary.total += data.evaluations.length;
151
+ if (data.testCases)
152
+ result.summary.total += data.testCases.length;
153
+ if (data.runs)
154
+ result.summary.total += data.runs.length;
155
+ return result;
156
+ }
157
+ // Import traces
158
+ if (data.traces) {
159
+ const traceResults = { imported: 0, skipped: 0, failed: 0 };
160
+ for (const trace of data.traces) {
161
+ try {
162
+ await client.traces.create({
163
+ name: trace.name,
164
+ traceId: trace.traceId,
165
+ organizationId: options.organizationId || trace.organizationId,
166
+ status: trace.status,
167
+ durationMs: trace.durationMs || undefined,
168
+ metadata: trace.metadata || undefined,
169
+ });
170
+ traceResults.imported++;
171
+ result.summary.imported++;
172
+ }
173
+ catch (error) {
174
+ if (options.skipDuplicates &&
175
+ error instanceof Error &&
176
+ error.message.includes("already exists")) {
177
+ traceResults.skipped++;
178
+ result.summary.skipped++;
179
+ }
180
+ else {
181
+ traceResults.failed++;
182
+ result.summary.failed++;
183
+ result.errors?.push({
184
+ item: `trace:${trace.traceId}`,
185
+ error: error instanceof Error ? error.message : String(error),
186
+ });
187
+ }
188
+ }
189
+ }
190
+ result.details.traces = traceResults;
191
+ result.summary.total += data.traces.length;
192
+ }
193
+ // Import evaluations
194
+ if (data.evaluations) {
195
+ const evalResults = { imported: 0, skipped: 0, failed: 0 };
196
+ for (const evaluation of data.evaluations) {
197
+ try {
198
+ if (!options.createdBy) {
199
+ throw new Error("createdBy is required for importing evaluations");
200
+ }
201
+ await client.evaluations.create({
202
+ name: evaluation.name,
203
+ description: evaluation.description || undefined,
204
+ type: evaluation.type,
205
+ organizationId: options.organizationId || evaluation.organizationId,
206
+ createdBy: options.createdBy,
207
+ status: evaluation.status,
208
+ });
209
+ evalResults.imported++;
210
+ result.summary.imported++;
211
+ }
212
+ catch (error) {
213
+ if (options.skipDuplicates &&
214
+ error instanceof Error &&
215
+ error.message.includes("already exists")) {
216
+ evalResults.skipped++;
217
+ result.summary.skipped++;
218
+ }
219
+ else {
220
+ evalResults.failed++;
221
+ result.summary.failed++;
222
+ result.errors?.push({
223
+ item: `evaluation:${evaluation.name}`,
224
+ error: error instanceof Error ? error.message : String(error),
225
+ });
226
+ }
227
+ }
228
+ }
229
+ result.details.evaluations = evalResults;
230
+ result.summary.total += data.evaluations.length;
231
+ }
232
+ return result;
233
+ }
234
+ /**
235
+ * Export data to JSON file
236
+ *
237
+ * @example
238
+ * ```typescript
239
+ * await exportToFile(client, './backup.json', {
240
+ * includeTraces: true,
241
+ * includeEvaluations: true
242
+ * });
243
+ * ```
244
+ */
245
+ async function exportToFile(client, filePath, options) {
246
+ const data = await exportData(client, { ...options, format: "json" });
247
+ const fs = await Promise.resolve().then(() => __importStar(require("node:fs")));
248
+ fs.writeFileSync(filePath, JSON.stringify(data, null, 2));
249
+ }
250
+ /**
251
+ * Import data from JSON file
252
+ *
253
+ * @example
254
+ * ```typescript
255
+ * const result = await importFromFile(client, './backup.json', {
256
+ * organizationId: 123,
257
+ * createdBy: 1
258
+ * });
259
+ * ```
260
+ */
261
+ async function importFromFile(client, filePath, options) {
262
+ const fs = await Promise.resolve().then(() => __importStar(require("node:fs")));
263
+ const content = fs.readFileSync(filePath, "utf-8");
264
+ const data = JSON.parse(content);
265
+ return importData(client, data, options);
266
+ }
267
+ /**
268
+ * Import from LangSmith format
269
+ *
270
+ * @example
271
+ * ```typescript
272
+ * const langsmithData = {
273
+ * runs: [
274
+ * { name: 'test-1', inputs: { ... }, outputs: { ... } }
275
+ * ]
276
+ * };
277
+ *
278
+ * await importFromLangSmith(client, langsmithData, {
279
+ * organizationId: 123
280
+ * });
281
+ * ```
282
+ */
283
+ async function importFromLangSmith(client, langsmithData, options) {
284
+ // Transform LangSmith format to our format
285
+ const transformedData = {
286
+ metadata: {
287
+ exportedAt: new Date().toISOString(),
288
+ version: "1.0.0",
289
+ format: "json",
290
+ organizationId: options.organizationId,
291
+ },
292
+ traces: [],
293
+ };
294
+ // Transform runs to traces
295
+ const lsData = langsmithData;
296
+ if (lsData.runs && Array.isArray(lsData.runs)) {
297
+ transformedData.traces = lsData.runs.map((run) => ({
298
+ id: run.id || 0,
299
+ name: run.name || "Imported Trace",
300
+ traceId: run.id || `langsmith-${Date.now()}-${Math.random()}`,
301
+ organizationId: options.organizationId,
302
+ status: run.error ? "error" : "success",
303
+ durationMs: run.execution_time
304
+ ? Math.round(run.execution_time * 1000)
305
+ : null,
306
+ metadata: {
307
+ source: "langsmith",
308
+ original_id: run.id,
309
+ inputs: run.inputs,
310
+ outputs: run.outputs,
311
+ },
312
+ createdAt: run.start_time || new Date().toISOString(),
313
+ }));
314
+ }
315
+ return importData(client, transformedData, options);
316
+ }
317
+ /**
318
+ * Convert export data to CSV format
319
+ *
320
+ * @example
321
+ * ```typescript
322
+ * const data = await exportData(client, { format: 'json', includeTraces: true });
323
+ * const csv = convertToCSV(data, 'traces');
324
+ * fs.writeFileSync('traces.csv', csv);
325
+ * ```
326
+ */
327
+ function convertToCSV(data, type) {
328
+ const items = type === "traces" ? data.traces : data.evaluations;
329
+ if (!items || items.length === 0)
330
+ return "";
331
+ // Get headers from first item
332
+ const headers = Object.keys(items[0]);
333
+ const rows = items.map((item) => headers
334
+ .map((h) => {
335
+ const value = item[h];
336
+ if (value === null || value === undefined)
337
+ return "";
338
+ if (typeof value === "object")
339
+ return JSON.stringify(value);
340
+ return String(value);
341
+ })
342
+ .join(","));
343
+ return [headers.join(","), ...rows].join("\n");
344
+ }
@@ -0,0 +1,44 @@
1
+ /**
2
+ * EvalGate SDK
3
+ *
4
+ * Official TypeScript/JavaScript SDK for EvalGate.
5
+ * Build confidence in your AI systems with comprehensive evaluation tools.
6
+ *
7
+ * @packageDocumentation
8
+ */
9
+ export { AIEvalClient } from "./client";
10
+ import { AuthenticationError, EvalGateError, NetworkError, RateLimitError, SDKError } from "./errors";
11
+ export { EvalGateError, RateLimitError, AuthenticationError, SDKError as ValidationError, // Using SDKError as ValidationError for backward compatibility
12
+ NetworkError, };
13
+ export { containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, expect, followsInstructions, hasFactualAccuracy, hasLength, hasNoHallucinations, hasNoToxicity, hasReadabilityScore, hasSentiment, hasValidCodeSyntax, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
14
+ import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
15
+ export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
16
+ export { cloneContext, mergeContexts, validateContext, } from "./runtime/context";
17
+ export { createContext as createEvalContext, createResult, defineEval, defineSuite, evalai, } from "./runtime/eval";
18
+ export { createLocalExecutor, defaultLocalExecutor, } from "./runtime/executor";
19
+ export { createEvalRuntime, disposeActiveRuntime, getActiveRuntime, setActiveRuntime, } from "./runtime/registry";
20
+ export type { CloudExecutor, DefineEvalFunction, EvalContext, EvalExecutor, EvalExecutorInterface, EvalOptions, EvalResult, EvalRuntime, EvalSpec, ExecutorCapabilities, LocalExecutor, SpecConfig, SpecOptions, WorkerExecutor, } from "./runtime/types";
21
+ export { EvalRuntimeError, RuntimeError, SpecExecutionError, SpecRegistrationError, } from "./runtime/types";
22
+ export { createTestSuite, type TestCaseResult, TestSuite, TestSuiteCase, TestSuiteCaseResult, TestSuiteConfig, TestSuiteResult, } from "./testing";
23
+ import { compareWithSnapshot, snapshot } from "./snapshot";
24
+ export { snapshot, compareWithSnapshot, snapshot as saveSnapshot, compareWithSnapshot as compareSnapshots, };
25
+ import type { ExportFormat } from "./export";
26
+ import { exportData, importData } from "./export";
27
+ export { exportData, importData };
28
+ export type { ExportFormat, ExportFormat as ExportType };
29
+ export { RequestBatcher } from "./batch";
30
+ export { CacheTTL, RequestCache } from "./cache";
31
+ export { type CheckArgs, EXIT, parseArgs, runCheck } from "./cli/check";
32
+ export { traceAnthropic } from "./integrations/anthropic";
33
+ export { traceOpenAI } from "./integrations/openai";
34
+ export { type OpenAIChatEvalCase, type OpenAIChatEvalOptions, type OpenAIChatEvalResult, openAIChatEval, } from "./integrations/openai-eval";
35
+ export { Logger } from "./logger";
36
+ export { extendExpectWithToPassGate } from "./matchers";
37
+ export { autoPaginate, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
38
+ export { ARTIFACTS, type Baseline, type BaselineTolerance, GATE_CATEGORY, GATE_EXIT, type GateCategory, type GateExitCode, REPORT_SCHEMA_VERSION, type RegressionDelta, type RegressionReport, } from "./regression";
39
+ export { batchProcess, batchRead, RateLimiter, streamEvaluation, } from "./streaming";
40
+ export type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, BatchOptions, ClientConfig as AIEvalConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateLLMJudgeConfigParams, CreateWebhookParams, Evaluation as EvaluationData, EvaluationRun, EvaluationRunDetail, ExportOptions, GenericMetadata as AnnotationData, GetLLMJudgeAlignmentParams, GetUsageParams, ImportOptions, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeEvaluateResult, LLMJudgeResult as LLMJudgeData, Organization, RetryConfig, SnapshotData, Span as SpanData, StreamOptions, TestCase, TestResult, Trace as TraceData, TraceDetail, TracedResponse, UpdateAPIKeyParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery, } from "./types";
41
+ export { EvaluationTemplates, type EvaluationTemplateType, type FeatureUsage, type OrganizationLimits, } from "./types";
42
+ export { type AgentHandoff, type AgentSpanContext, type CostCategory, type CostRecord, createWorkflowTracer, type DecisionAlternative, type DecisionType, type HandoffType, type LLMProvider, type RecordCostParams, type RecordDecisionParams, traceAutoGen, traceCrewAI, traceLangChainAgent, traceWorkflowStep, type WorkflowContext, type WorkflowDefinition, type WorkflowEdge, type WorkflowNode, type WorkflowStatus, WorkflowTracer, type WorkflowTracerOptions, } from "./workflows";
43
+ import { AIEvalClient } from "./client";
44
+ export default AIEvalClient;
package/dist/index.js ADDED
@@ -0,0 +1,153 @@
1
+ "use strict";
2
+ /**
3
+ * EvalGate SDK
4
+ *
5
+ * Official TypeScript/JavaScript SDK for EvalGate.
6
+ * Build confidence in your AI systems with comprehensive evaluation tools.
7
+ *
8
+ * @packageDocumentation
9
+ */
10
+ Object.defineProperty(exports, "__esModule", { value: true });
11
+ exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
12
+ exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = void 0;
13
+ // Main SDK exports
14
+ var client_1 = require("./client");
15
+ Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
16
+ // Enhanced error handling (Tier 1.5)
17
+ const errors_1 = require("./errors");
18
+ Object.defineProperty(exports, "AuthenticationError", { enumerable: true, get: function () { return errors_1.AuthenticationError; } });
19
+ Object.defineProperty(exports, "EvalGateError", { enumerable: true, get: function () { return errors_1.EvalGateError; } });
20
+ Object.defineProperty(exports, "NetworkError", { enumerable: true, get: function () { return errors_1.NetworkError; } });
21
+ Object.defineProperty(exports, "RateLimitError", { enumerable: true, get: function () { return errors_1.RateLimitError; } });
22
+ Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.SDKError; } });
23
+ // Enhanced assertions (Tier 1.3)
24
+ var assertions_1 = require("./assertions");
25
+ Object.defineProperty(exports, "containsAllRequiredFields", { enumerable: true, get: function () { return assertions_1.containsAllRequiredFields; } });
26
+ Object.defineProperty(exports, "containsJSON", { enumerable: true, get: function () { return assertions_1.containsJSON; } });
27
+ Object.defineProperty(exports, "containsKeywords", { enumerable: true, get: function () { return assertions_1.containsKeywords; } });
28
+ Object.defineProperty(exports, "containsLanguage", { enumerable: true, get: function () { return assertions_1.containsLanguage; } });
29
+ Object.defineProperty(exports, "expect", { enumerable: true, get: function () { return assertions_1.expect; } });
30
+ Object.defineProperty(exports, "followsInstructions", { enumerable: true, get: function () { return assertions_1.followsInstructions; } });
31
+ Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracy; } });
32
+ Object.defineProperty(exports, "hasLength", { enumerable: true, get: function () { return assertions_1.hasLength; } });
33
+ Object.defineProperty(exports, "hasNoHallucinations", { enumerable: true, get: function () { return assertions_1.hasNoHallucinations; } });
34
+ Object.defineProperty(exports, "hasNoToxicity", { enumerable: true, get: function () { return assertions_1.hasNoToxicity; } });
35
+ Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
36
+ Object.defineProperty(exports, "hasSentiment", { enumerable: true, get: function () { return assertions_1.hasSentiment; } });
37
+ Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
38
+ Object.defineProperty(exports, "isValidEmail", { enumerable: true, get: function () { return assertions_1.isValidEmail; } });
39
+ Object.defineProperty(exports, "isValidURL", { enumerable: true, get: function () { return assertions_1.isValidURL; } });
40
+ Object.defineProperty(exports, "matchesPattern", { enumerable: true, get: function () { return assertions_1.matchesPattern; } });
41
+ Object.defineProperty(exports, "matchesSchema", { enumerable: true, get: function () { return assertions_1.matchesSchema; } });
42
+ Object.defineProperty(exports, "notContainsPII", { enumerable: true, get: function () { return assertions_1.notContainsPII; } });
43
+ Object.defineProperty(exports, "respondedWithinTime", { enumerable: true, get: function () { return assertions_1.respondedWithinTime; } });
44
+ Object.defineProperty(exports, "similarTo", { enumerable: true, get: function () { return assertions_1.similarTo; } });
45
+ Object.defineProperty(exports, "withinRange", { enumerable: true, get: function () { return assertions_1.withinRange; } });
46
+ // Context propagation (Tier 2.9)
47
+ const context_1 = require("./context");
48
+ Object.defineProperty(exports, "createContext", { enumerable: true, get: function () { return context_1.createContext; } });
49
+ Object.defineProperty(exports, "ContextManager", { enumerable: true, get: function () { return context_1.EvalContext; } });
50
+ Object.defineProperty(exports, "getContext", { enumerable: true, get: function () { return context_1.getCurrentContext; } });
51
+ Object.defineProperty(exports, "withContext", { enumerable: true, get: function () { return context_1.withContext; } });
52
+ var context_2 = require("./runtime/context");
53
+ Object.defineProperty(exports, "cloneContext", { enumerable: true, get: function () { return context_2.cloneContext; } });
54
+ Object.defineProperty(exports, "mergeContexts", { enumerable: true, get: function () { return context_2.mergeContexts; } });
55
+ Object.defineProperty(exports, "validateContext", { enumerable: true, get: function () { return context_2.validateContext; } });
56
+ // LAYER 1: Runtime Foundation - NEW PROGRAMMING MODEL
57
+ var eval_1 = require("./runtime/eval");
58
+ Object.defineProperty(exports, "createEvalContext", { enumerable: true, get: function () { return eval_1.createContext; } });
59
+ Object.defineProperty(exports, "createResult", { enumerable: true, get: function () { return eval_1.createResult; } });
60
+ Object.defineProperty(exports, "defineEval", { enumerable: true, get: function () { return eval_1.defineEval; } });
61
+ Object.defineProperty(exports, "defineSuite", { enumerable: true, get: function () { return eval_1.defineSuite; } });
62
+ Object.defineProperty(exports, "evalai", { enumerable: true, get: function () { return eval_1.evalai; } });
63
+ var executor_1 = require("./runtime/executor");
64
+ Object.defineProperty(exports, "createLocalExecutor", { enumerable: true, get: function () { return executor_1.createLocalExecutor; } });
65
+ Object.defineProperty(exports, "defaultLocalExecutor", { enumerable: true, get: function () { return executor_1.defaultLocalExecutor; } });
66
+ var registry_1 = require("./runtime/registry");
67
+ Object.defineProperty(exports, "createEvalRuntime", { enumerable: true, get: function () { return registry_1.createEvalRuntime; } });
68
+ Object.defineProperty(exports, "disposeActiveRuntime", { enumerable: true, get: function () { return registry_1.disposeActiveRuntime; } });
69
+ Object.defineProperty(exports, "getActiveRuntime", { enumerable: true, get: function () { return registry_1.getActiveRuntime; } });
70
+ Object.defineProperty(exports, "setActiveRuntime", { enumerable: true, get: function () { return registry_1.setActiveRuntime; } });
71
+ // Runtime errors
72
+ var types_1 = require("./runtime/types");
73
+ Object.defineProperty(exports, "EvalRuntimeError", { enumerable: true, get: function () { return types_1.EvalRuntimeError; } });
74
+ Object.defineProperty(exports, "RuntimeError", { enumerable: true, get: function () { return types_1.RuntimeError; } });
75
+ Object.defineProperty(exports, "SpecExecutionError", { enumerable: true, get: function () { return types_1.SpecExecutionError; } });
76
+ Object.defineProperty(exports, "SpecRegistrationError", { enumerable: true, get: function () { return types_1.SpecRegistrationError; } });
77
+ // Test suite builder (Tier 2.7) - BACKWARD COMPATIBILITY LAYER
78
+ var testing_1 = require("./testing");
79
+ Object.defineProperty(exports, "createTestSuite", { enumerable: true, get: function () { return testing_1.createTestSuite; } });
80
+ Object.defineProperty(exports, "TestSuite", { enumerable: true, get: function () { return testing_1.TestSuite; } });
81
+ // Snapshot testing (Tier 2.8)
82
+ const snapshot_1 = require("./snapshot");
83
+ Object.defineProperty(exports, "compareWithSnapshot", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
84
+ Object.defineProperty(exports, "compareSnapshots", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
85
+ Object.defineProperty(exports, "snapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
86
+ Object.defineProperty(exports, "saveSnapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
87
+ // Export/Import utilities (Tier 4.18)
88
+ const export_1 = require("./export");
89
+ Object.defineProperty(exports, "exportData", { enumerable: true, get: function () { return export_1.exportData; } });
90
+ Object.defineProperty(exports, "importData", { enumerable: true, get: function () { return export_1.importData; } });
91
+ // Note: RequestBatcher is for advanced users only
92
+ // Most users don't need this - batching is automatic
93
+ var batch_1 = require("./batch");
94
+ Object.defineProperty(exports, "RequestBatcher", { enumerable: true, get: function () { return batch_1.RequestBatcher; } });
95
+ // Performance optimization utilities (v1.3.0)
96
+ // Note: RequestCache and CacheTTL are for advanced users only
97
+ // Most users don't need these - caching is automatic
98
+ var cache_1 = require("./cache");
99
+ Object.defineProperty(exports, "CacheTTL", { enumerable: true, get: function () { return cache_1.CacheTTL; } });
100
+ Object.defineProperty(exports, "RequestCache", { enumerable: true, get: function () { return cache_1.RequestCache; } });
101
+ // CLI (programmatic use)
102
+ var check_1 = require("./cli/check");
103
+ Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return check_1.EXIT; } });
104
+ Object.defineProperty(exports, "parseArgs", { enumerable: true, get: function () { return check_1.parseArgs; } });
105
+ Object.defineProperty(exports, "runCheck", { enumerable: true, get: function () { return check_1.runCheck; } });
106
+ var anthropic_1 = require("./integrations/anthropic");
107
+ Object.defineProperty(exports, "traceAnthropic", { enumerable: true, get: function () { return anthropic_1.traceAnthropic; } });
108
+ // Framework integrations (Tier 1.2)
109
+ var openai_1 = require("./integrations/openai");
110
+ Object.defineProperty(exports, "traceOpenAI", { enumerable: true, get: function () { return openai_1.traceOpenAI; } });
111
+ // OpenAI regression eval (local-first, no account required)
112
+ var openai_eval_1 = require("./integrations/openai-eval");
113
+ Object.defineProperty(exports, "openAIChatEval", { enumerable: true, get: function () { return openai_eval_1.openAIChatEval; } });
114
+ // Debug logger (Tier 4.17)
115
+ var logger_1 = require("./logger");
116
+ Object.defineProperty(exports, "Logger", { enumerable: true, get: function () { return logger_1.Logger; } });
117
+ // Vitest matcher: expect(await openAIChatEval(...)).toPassGate()
118
+ var matchers_1 = require("./matchers");
119
+ Object.defineProperty(exports, "extendExpectWithToPassGate", { enumerable: true, get: function () { return matchers_1.extendExpectWithToPassGate; } });
120
+ var pagination_1 = require("./pagination");
121
+ Object.defineProperty(exports, "autoPaginate", { enumerable: true, get: function () { return pagination_1.autoPaginate; } });
122
+ Object.defineProperty(exports, "createPaginatedIterator", { enumerable: true, get: function () { return pagination_1.createPaginatedIterator; } });
123
+ Object.defineProperty(exports, "decodeCursor", { enumerable: true, get: function () { return pagination_1.decodeCursor; } });
124
+ Object.defineProperty(exports, "encodeCursor", { enumerable: true, get: function () { return pagination_1.encodeCursor; } });
125
+ Object.defineProperty(exports, "PaginatedIterator", { enumerable: true, get: function () { return pagination_1.PaginatedIterator; } });
126
+ // Regression gate constants & types (v1.6.0)
127
+ var regression_1 = require("./regression");
128
+ Object.defineProperty(exports, "ARTIFACTS", { enumerable: true, get: function () { return regression_1.ARTIFACTS; } });
129
+ Object.defineProperty(exports, "GATE_CATEGORY", { enumerable: true, get: function () { return regression_1.GATE_CATEGORY; } });
130
+ Object.defineProperty(exports, "GATE_EXIT", { enumerable: true, get: function () { return regression_1.GATE_EXIT; } });
131
+ Object.defineProperty(exports, "REPORT_SCHEMA_VERSION", { enumerable: true, get: function () { return regression_1.REPORT_SCHEMA_VERSION; } });
132
+ // Streaming and batch processing (Tier 3.3)
133
+ // Use functions from ./streaming module instead of these deprecated exports
134
+ var streaming_1 = require("./streaming");
135
+ Object.defineProperty(exports, "batchProcess", { enumerable: true, get: function () { return streaming_1.batchProcess; } });
136
+ Object.defineProperty(exports, "batchRead", { enumerable: true, get: function () { return streaming_1.batchRead; } });
137
+ Object.defineProperty(exports, "RateLimiter", { enumerable: true, get: function () { return streaming_1.RateLimiter; } });
138
+ Object.defineProperty(exports, "streamEvaluation", { enumerable: true, get: function () { return streaming_1.streamEvaluation; } });
139
+ // New exports for v1.1.0
140
+ var types_2 = require("./types");
141
+ Object.defineProperty(exports, "EvaluationTemplates", { enumerable: true, get: function () { return types_2.EvaluationTemplates; } });
142
+ // Workflow tracing (Orchestration Layer)
143
+ var workflows_1 = require("./workflows");
144
+ Object.defineProperty(exports, "createWorkflowTracer", { enumerable: true, get: function () { return workflows_1.createWorkflowTracer; } });
145
+ Object.defineProperty(exports, "traceAutoGen", { enumerable: true, get: function () { return workflows_1.traceAutoGen; } });
146
+ Object.defineProperty(exports, "traceCrewAI", { enumerable: true, get: function () { return workflows_1.traceCrewAI; } });
147
+ // Framework integrations
148
+ Object.defineProperty(exports, "traceLangChainAgent", { enumerable: true, get: function () { return workflows_1.traceLangChainAgent; } });
149
+ Object.defineProperty(exports, "traceWorkflowStep", { enumerable: true, get: function () { return workflows_1.traceWorkflowStep; } });
150
+ Object.defineProperty(exports, "WorkflowTracer", { enumerable: true, get: function () { return workflows_1.WorkflowTracer; } });
151
+ // Default export for convenience
152
+ const client_2 = require("./client");
153
+ exports.default = client_2.AIEvalClient;
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Anthropic Integration
3
+ * Tier 1.2: Framework Auto-Instrumentation - Anthropic wrapper
4
+ *
5
+ * @example
6
+ * ```typescript
7
+ * import { traceAnthropic } from '@ai-eval-platform/sdk/integrations/anthropic';
8
+ * import Anthropic from '@anthropic-ai/sdk';
9
+ *
10
+ * const anthropic = new Anthropic({ apiKey: '...' });
11
+ * const tracedAnthropic = traceAnthropic(anthropic, client);
12
+ *
13
+ * // All calls are automatically traced
14
+ * const message = await tracedAnthropic.messages.create({
15
+ * model: 'claude-3-5-sonnet-20241022',
16
+ * max_tokens: 1024,
17
+ * messages: [{ role: 'user', content: 'Hello!' }]
18
+ * });
19
+ * ```
20
+ */
21
+ import type { AIEvalClient } from "../client";
22
+ interface AnthropicMessageParams {
23
+ model: string;
24
+ messages: unknown[];
25
+ temperature?: number;
26
+ max_tokens?: number;
27
+ [key: string]: unknown;
28
+ }
29
+ interface AnthropicMessage {
30
+ content: unknown;
31
+ usage?: unknown;
32
+ stop_reason?: unknown;
33
+ [key: string]: unknown;
34
+ }
35
+ interface AnthropicClient {
36
+ messages: {
37
+ create: (params: AnthropicMessageParams, requestOptions?: Record<string, unknown>) => Promise<AnthropicMessage>;
38
+ };
39
+ }
40
+ export interface AnthropicTraceOptions {
41
+ /** Whether to capture input (default: true) */
42
+ captureInput?: boolean;
43
+ /** Whether to capture output (default: true) */
44
+ captureOutput?: boolean;
45
+ /** Whether to capture metadata (default: true) */
46
+ captureMetadata?: boolean;
47
+ /** Organization ID for traces */
48
+ organizationId?: number;
49
+ /** Custom trace name prefix */
50
+ tracePrefix?: string;
51
+ }
52
+ /**
53
+ * Wrap Anthropic client with automatic tracing
54
+ *
55
+ * @example
56
+ * ```typescript
57
+ * import Anthropic from '@anthropic-ai/sdk';
58
+ * import { traceAnthropic } from '@ai-eval-platform/sdk/integrations/anthropic';
59
+ *
60
+ * const anthropic = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
61
+ * const tracedAnthropic = traceAnthropic(anthropic, evalClient);
62
+ *
63
+ * // Automatically traced
64
+ * const message = await tracedAnthropic.messages.create({
65
+ * model: 'claude-3-5-sonnet-20241022',
66
+ * max_tokens: 1024,
67
+ * messages: [{ role: 'user', content: 'Hello, Claude!' }]
68
+ * });
69
+ * ```
70
+ */
71
+ export declare function traceAnthropic(anthropic: AnthropicClient, evalClient: AIEvalClient, options?: AnthropicTraceOptions): AnthropicClient;
72
+ /**
73
+ * Manual trace wrapper for Anthropic calls
74
+ *
75
+ * @example
76
+ * ```typescript
77
+ * const message = await traceAnthropicCall(
78
+ * evalClient,
79
+ * 'claude-completion',
80
+ * async () => {
81
+ * return await anthropic.messages.create({
82
+ * model: 'claude-3-5-sonnet-20241022',
83
+ * max_tokens: 1024,
84
+ * messages: [{ role: 'user', content: 'Hello!' }]
85
+ * });
86
+ * }
87
+ * );
88
+ * ```
89
+ */
90
+ export declare function traceAnthropicCall<T>(evalClient: AIEvalClient, name: string, fn: () => Promise<T>, options?: AnthropicTraceOptions): Promise<T>;
91
+ export {};