@evalgate/sdk 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +638 -0
  2. package/README.md +398 -0
  3. package/dist/assertions.d.ts +189 -0
  4. package/dist/assertions.js +662 -0
  5. package/dist/batch.d.ts +68 -0
  6. package/dist/batch.js +179 -0
  7. package/dist/cache.d.ts +65 -0
  8. package/dist/cache.js +131 -0
  9. package/dist/cli/api.d.ts +108 -0
  10. package/dist/cli/api.js +132 -0
  11. package/dist/cli/baseline.d.ts +10 -0
  12. package/dist/cli/baseline.js +172 -0
  13. package/dist/cli/check.d.ts +73 -0
  14. package/dist/cli/check.js +355 -0
  15. package/dist/cli/ci-context.d.ts +6 -0
  16. package/dist/cli/ci-context.js +112 -0
  17. package/dist/cli/ci.d.ts +45 -0
  18. package/dist/cli/ci.js +192 -0
  19. package/dist/cli/config.d.ts +30 -0
  20. package/dist/cli/config.js +230 -0
  21. package/dist/cli/constants.d.ts +15 -0
  22. package/dist/cli/constants.js +18 -0
  23. package/dist/cli/diff.d.ts +173 -0
  24. package/dist/cli/diff.js +685 -0
  25. package/dist/cli/discover.d.ts +84 -0
  26. package/dist/cli/discover.js +419 -0
  27. package/dist/cli/doctor.d.ts +88 -0
  28. package/dist/cli/doctor.js +675 -0
  29. package/dist/cli/env.d.ts +21 -0
  30. package/dist/cli/env.js +42 -0
  31. package/dist/cli/explain.d.ts +58 -0
  32. package/dist/cli/explain.js +561 -0
  33. package/dist/cli/formatters/github.d.ts +8 -0
  34. package/dist/cli/formatters/github.js +135 -0
  35. package/dist/cli/formatters/human.d.ts +6 -0
  36. package/dist/cli/formatters/human.js +110 -0
  37. package/dist/cli/formatters/json.d.ts +6 -0
  38. package/dist/cli/formatters/json.js +10 -0
  39. package/dist/cli/formatters/pr-comment.d.ts +12 -0
  40. package/dist/cli/formatters/pr-comment.js +103 -0
  41. package/dist/cli/formatters/types.d.ts +103 -0
  42. package/dist/cli/formatters/types.js +8 -0
  43. package/dist/cli/gate.d.ts +21 -0
  44. package/dist/cli/gate.js +179 -0
  45. package/dist/cli/impact-analysis.d.ts +63 -0
  46. package/dist/cli/impact-analysis.js +252 -0
  47. package/dist/cli/index.d.ts +9 -0
  48. package/dist/cli/index.js +332 -0
  49. package/dist/cli/init.d.ts +16 -0
  50. package/dist/cli/init.js +292 -0
  51. package/dist/cli/manifest.d.ts +103 -0
  52. package/dist/cli/manifest.js +282 -0
  53. package/dist/cli/migrate.d.ts +41 -0
  54. package/dist/cli/migrate.js +349 -0
  55. package/dist/cli/policy-packs.d.ts +23 -0
  56. package/dist/cli/policy-packs.js +89 -0
  57. package/dist/cli/print-config.d.ts +29 -0
  58. package/dist/cli/print-config.js +270 -0
  59. package/dist/cli/profiles.d.ts +28 -0
  60. package/dist/cli/profiles.js +30 -0
  61. package/dist/cli/reason-codes.d.ts +17 -0
  62. package/dist/cli/reason-codes.js +19 -0
  63. package/dist/cli/regression-gate.d.ts +15 -0
  64. package/dist/cli/regression-gate.js +341 -0
  65. package/dist/cli/render/snippet.d.ts +5 -0
  66. package/dist/cli/render/snippet.js +15 -0
  67. package/dist/cli/render/sort.d.ts +10 -0
  68. package/dist/cli/render/sort.js +24 -0
  69. package/dist/cli/report/build-check-report.d.ts +19 -0
  70. package/dist/cli/report/build-check-report.js +132 -0
  71. package/dist/cli/run.d.ts +101 -0
  72. package/dist/cli/run.js +395 -0
  73. package/dist/cli/share.d.ts +17 -0
  74. package/dist/cli/share.js +91 -0
  75. package/dist/cli/upgrade.d.ts +15 -0
  76. package/dist/cli/upgrade.js +492 -0
  77. package/dist/cli/workspace.d.ts +31 -0
  78. package/dist/cli/workspace.js +68 -0
  79. package/dist/client.d.ts +368 -0
  80. package/dist/client.js +893 -0
  81. package/dist/client.request.test.d.ts +1 -0
  82. package/dist/client.request.test.js +232 -0
  83. package/dist/context.d.ts +134 -0
  84. package/dist/context.js +215 -0
  85. package/dist/errors.d.ts +82 -0
  86. package/dist/errors.js +298 -0
  87. package/dist/export.d.ts +195 -0
  88. package/dist/export.js +344 -0
  89. package/dist/index.d.ts +44 -0
  90. package/dist/index.js +153 -0
  91. package/dist/integrations/anthropic.d.ts +91 -0
  92. package/dist/integrations/anthropic.js +163 -0
  93. package/dist/integrations/openai-eval.d.ts +57 -0
  94. package/dist/integrations/openai-eval.js +232 -0
  95. package/dist/integrations/openai.d.ts +92 -0
  96. package/dist/integrations/openai.js +160 -0
  97. package/dist/local.d.ts +39 -0
  98. package/dist/local.js +148 -0
  99. package/dist/logger.d.ts +128 -0
  100. package/dist/logger.js +227 -0
  101. package/dist/matchers/index.d.ts +1 -0
  102. package/dist/matchers/index.js +6 -0
  103. package/dist/matchers/to-pass-gate.d.ts +29 -0
  104. package/dist/matchers/to-pass-gate.js +35 -0
  105. package/dist/pagination.d.ts +74 -0
  106. package/dist/pagination.js +139 -0
  107. package/dist/regression.d.ts +100 -0
  108. package/dist/regression.js +44 -0
  109. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  110. package/dist/runtime/adapters/config-to-dsl.js +400 -0
  111. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  112. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  113. package/dist/runtime/context.d.ts +26 -0
  114. package/dist/runtime/context.js +74 -0
  115. package/dist/runtime/eval.d.ts +46 -0
  116. package/dist/runtime/eval.js +244 -0
  117. package/dist/runtime/execution-mode.d.ts +80 -0
  118. package/dist/runtime/execution-mode.js +357 -0
  119. package/dist/runtime/executor.d.ts +16 -0
  120. package/dist/runtime/executor.js +152 -0
  121. package/dist/runtime/registry.d.ts +78 -0
  122. package/dist/runtime/registry.js +403 -0
  123. package/dist/runtime/run-report.d.ts +200 -0
  124. package/dist/runtime/run-report.js +222 -0
  125. package/dist/runtime/types.d.ts +356 -0
  126. package/dist/runtime/types.js +76 -0
  127. package/dist/snapshot.d.ts +176 -0
  128. package/dist/snapshot.js +322 -0
  129. package/dist/streaming.d.ts +173 -0
  130. package/dist/streaming.js +268 -0
  131. package/dist/testing.d.ts +273 -0
  132. package/dist/testing.js +317 -0
  133. package/dist/types.d.ts +754 -0
  134. package/dist/types.js +54 -0
  135. package/dist/utils/input-hash.d.ts +8 -0
  136. package/dist/utils/input-hash.js +41 -0
  137. package/dist/version.d.ts +7 -0
  138. package/dist/version.js +10 -0
  139. package/dist/workflows.d.ts +389 -0
  140. package/dist/workflows.js +671 -0
  141. package/package.json +117 -0
@@ -0,0 +1,317 @@
1
+ "use strict";
2
+ /**
3
+ * Test Suite Builder
4
+ * Tier 2.7: Declarative test definitions
5
+ *
6
+ * @example
7
+ * ```typescript
8
+ * import { createTestSuite, expect } from '@ai-eval-platform/sdk';
9
+ *
10
+ * const suite = createTestSuite('chatbot-responses', {
11
+ * cases: [
12
+ * {
13
+ * input: 'Hello',
14
+ * assertions: [
15
+ * (output) => expect(output).toContain('greeting'),
16
+ * (output) => expect(output).toHaveSentiment('positive')
17
+ * ]
18
+ * }
19
+ * ]
20
+ * });
21
+ *
22
+ * const results = await suite.run();
23
+ * ```
24
+ */
25
+ Object.defineProperty(exports, "__esModule", { value: true });
26
+ exports.TestSuite = void 0;
27
+ exports.createTestSuite = createTestSuite;
28
+ exports.containsKeywords = containsKeywords;
29
+ exports.matchesPattern = matchesPattern;
30
+ exports.hasSentiment = hasSentiment;
31
+ exports.hasLength = hasLength;
32
+ const assertions_1 = require("./assertions");
33
+ /**
34
+ * Test Suite for declarative evaluation testing
35
+ */
36
+ class TestSuite {
37
+ constructor(name, config) {
38
+ this.name = name;
39
+ this.config = config;
40
+ }
41
+ /**
42
+ * Run all test cases
43
+ *
44
+ * @example
45
+ * ```typescript
46
+ * const results = await suite.run();
47
+ * console.log(`${results.passed}/${results.total} tests passed`);
48
+ * ```
49
+ */
50
+ async run() {
51
+ const startTime = Date.now();
52
+ const results = [];
53
+ const runTestCase = async (testCase, index) => {
54
+ const caseStartTime = Date.now();
55
+ const id = testCase.id || `case-${index}`;
56
+ try {
57
+ // Execute to get output
58
+ let actual;
59
+ if (this.config.executor) {
60
+ const timeout = this.config.timeout || 30000;
61
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error(`Test timeout after ${timeout}ms`)), timeout));
62
+ actual = await Promise.race([
63
+ this.config.executor(testCase.input),
64
+ timeoutPromise,
65
+ ]);
66
+ }
67
+ else if (testCase.expected) {
68
+ actual = testCase.expected; // Use expected as actual if no executor
69
+ }
70
+ else {
71
+ throw new Error("No executor provided and no expected output");
72
+ }
73
+ // Run assertions
74
+ const assertions = [];
75
+ let allPassed = true;
76
+ // Run custom assertions
77
+ if (testCase.assertions) {
78
+ for (const assertion of testCase.assertions) {
79
+ const result = assertion(actual);
80
+ assertions.push(result);
81
+ if (!result.passed)
82
+ allPassed = false;
83
+ }
84
+ }
85
+ // Default equality check if expected provided
86
+ if (testCase.expected && !testCase.assertions) {
87
+ const result = (0, assertions_1.expect)(actual).toEqual(testCase.expected);
88
+ assertions.push(result);
89
+ if (!result.passed)
90
+ allPassed = false;
91
+ }
92
+ const durationMs = Date.now() - caseStartTime;
93
+ return {
94
+ id,
95
+ input: testCase.input,
96
+ expected: testCase.expected,
97
+ actual,
98
+ passed: allPassed,
99
+ assertions,
100
+ durationMs,
101
+ };
102
+ }
103
+ catch (error) {
104
+ const durationMs = Date.now() - caseStartTime;
105
+ return {
106
+ id,
107
+ input: testCase.input,
108
+ expected: testCase.expected,
109
+ actual: "",
110
+ passed: false,
111
+ assertions: [],
112
+ durationMs,
113
+ error: error instanceof Error ? error.message : String(error),
114
+ };
115
+ }
116
+ };
117
+ // Run tests
118
+ if (this.config.parallel) {
119
+ results.push(...(await Promise.all(this.config.cases.map((tc, i) => runTestCase(tc, i)))));
120
+ }
121
+ else {
122
+ for (let i = 0; i < this.config.cases.length; i++) {
123
+ const result = await runTestCase(this.config.cases[i], i);
124
+ results.push(result);
125
+ if (this.config.stopOnFailure && !result.passed) {
126
+ break;
127
+ }
128
+ }
129
+ }
130
+ const retriedCases = [];
131
+ const retries = this.config.retries ?? 0;
132
+ if (retries > 0 && results.length > 0) {
133
+ const failingIndices = results
134
+ .map((r, i) => (r.passed ? -1 : i))
135
+ .filter((i) => i >= 0);
136
+ for (let attempt = 0; attempt < retries && failingIndices.length > 0; attempt++) {
137
+ const toRetry = [...failingIndices];
138
+ failingIndices.length = 0;
139
+ for (const i of toRetry) {
140
+ const tc = this.config.cases[i];
141
+ const retryResult = await runTestCase(tc, i);
142
+ if (retryResult.passed) {
143
+ results[i] = retryResult;
144
+ retriedCases.push(retryResult.id);
145
+ }
146
+ else {
147
+ failingIndices.push(i);
148
+ }
149
+ }
150
+ }
151
+ }
152
+ const durationMs = Date.now() - startTime;
153
+ const passed = results.filter((r) => r.passed).length;
154
+ const failed = results.filter((r) => !r.passed).length;
155
+ return {
156
+ name: this.name,
157
+ total: results.length,
158
+ passed,
159
+ failed,
160
+ durationMs,
161
+ results,
162
+ ...(retriedCases.length > 0 && { retriedCases }),
163
+ };
164
+ }
165
+ /**
166
+ * Add a test case to the suite
167
+ */
168
+ addCase(testCase) {
169
+ this.config.cases.push(testCase);
170
+ }
171
+ /**
172
+ * Get suite configuration
173
+ */
174
+ getConfig() {
175
+ return { ...this.config };
176
+ }
177
+ /**
178
+ * Get test definitions for introspection
179
+ * COMPAT-201: Public TestSuite introspection (minimal getters)
180
+ */
181
+ getTests() {
182
+ return this.config.cases.map((testCase, index) => ({
183
+ id: testCase.id || `case-${index}`,
184
+ input: testCase.input,
185
+ expected: testCase.expected,
186
+ metadata: testCase.metadata,
187
+ hasAssertions: !!testCase.assertions && testCase.assertions.length > 0,
188
+ assertionCount: testCase.assertions?.length || 0,
189
+ }));
190
+ }
191
+ /**
192
+ * Get suite metadata for introspection
193
+ * COMPAT-201: Public TestSuite introspection (minimal getters)
194
+ */
195
+ getMetadata() {
196
+ return {
197
+ suiteName: this.name,
198
+ tags: [], // TestSuite doesn't have tags, but include for future compatibility
199
+ defaults: {
200
+ timeout: this.config.timeout,
201
+ parallel: this.config.parallel,
202
+ stopOnFailure: this.config.stopOnFailure,
203
+ retries: this.config.retries,
204
+ },
205
+ };
206
+ }
207
+ /**
208
+ * Convert to portable suite representation
209
+ * COMPAT-201: Public TestSuite introspection (minimal getters)
210
+ */
211
+ toJSON() {
212
+ return {
213
+ name: this.name,
214
+ config: this.getConfig(),
215
+ tests: this.getTests(),
216
+ metadata: this.getMetadata(),
217
+ };
218
+ }
219
+ }
220
+ exports.TestSuite = TestSuite;
221
+ /**
222
+ * Create a test suite
223
+ *
224
+ * @example
225
+ * ```typescript
226
+ * const suite = createTestSuite('my-tests', {
227
+ * cases: [
228
+ * {
229
+ * input: 'Hello',
230
+ * assertions: [
231
+ * (output) => expect(output).toContain('hi'),
232
+ * (output) => expect(output).toHaveSentiment('positive')
233
+ * ]
234
+ * }
235
+ * ],
236
+ * executor: async (input) => {
237
+ * // Your LLM call here
238
+ * return callLLM(input);
239
+ * }
240
+ * });
241
+ * ```
242
+ */
243
+ function createTestSuite(name, config) {
244
+ return new TestSuite(name, config);
245
+ }
246
+ /**
247
+ * Helper to create assertions from expected keywords
248
+ *
249
+ * @example
250
+ * ```typescript
251
+ * const suite = createTestSuite('tests', {
252
+ * cases: [
253
+ * {
254
+ * input: 'refund policy',
255
+ * assertions: containsKeywords(['refund', 'return', 'policy'])
256
+ * }
257
+ * ]
258
+ * });
259
+ * ```
260
+ */
261
+ function containsKeywords(keywords) {
262
+ return (output) => (0, assertions_1.expect)(output).toContainKeywords(keywords);
263
+ }
264
+ /**
265
+ * Helper to create pattern matching assertion
266
+ *
267
+ * @example
268
+ * ```typescript
269
+ * const suite = createTestSuite('tests', {
270
+ * cases: [
271
+ * {
272
+ * input: 'What time is it?',
273
+ * assertions: matchesPattern(/\d{1,2}:\d{2}/)
274
+ * }
275
+ * ]
276
+ * });
277
+ * ```
278
+ */
279
+ function matchesPattern(pattern) {
280
+ return (output) => (0, assertions_1.expect)(output).toMatchPattern(pattern);
281
+ }
282
+ /**
283
+ * Helper to create sentiment assertion
284
+ *
285
+ * @example
286
+ * ```typescript
287
+ * const suite = createTestSuite('tests', {
288
+ * cases: [
289
+ * {
290
+ * input: 'Thank you!',
291
+ * assertions: hasSentiment('positive')
292
+ * }
293
+ * ]
294
+ * });
295
+ * ```
296
+ */
297
+ function hasSentiment(sentiment) {
298
+ return (output) => (0, assertions_1.expect)(output).toHaveSentiment(sentiment);
299
+ }
300
+ /**
301
+ * Helper to create length range assertion
302
+ *
303
+ * @example
304
+ * ```typescript
305
+ * const suite = createTestSuite('tests', {
306
+ * cases: [
307
+ * {
308
+ * input: 'Summarize this',
309
+ * assertions: hasLength({ min: 50, max: 200 })
310
+ * }
311
+ * ]
312
+ * });
313
+ * ```
314
+ */
315
+ function hasLength(range) {
316
+ return (output) => (0, assertions_1.expect)(output).toHaveLength(range);
317
+ }