@memberjunction/testing-engine 0.0.1 → 2.119.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +403 -29
- package/dist/drivers/AgentEvalDriver.d.ts +197 -0
- package/dist/drivers/AgentEvalDriver.d.ts.map +1 -0
- package/dist/drivers/AgentEvalDriver.js +370 -0
- package/dist/drivers/AgentEvalDriver.js.map +1 -0
- package/dist/drivers/BaseTestDriver.d.ts +145 -0
- package/dist/drivers/BaseTestDriver.d.ts.map +1 -0
- package/dist/drivers/BaseTestDriver.js +266 -0
- package/dist/drivers/BaseTestDriver.js.map +1 -0
- package/dist/drivers/index.d.ts +6 -0
- package/dist/drivers/index.d.ts.map +1 -0
- package/dist/drivers/index.js +22 -0
- package/dist/drivers/index.js.map +1 -0
- package/dist/engine/TestEngine.d.ts +148 -0
- package/dist/engine/TestEngine.d.ts.map +1 -0
- package/dist/engine/TestEngine.js +490 -0
- package/dist/engine/TestEngine.js.map +1 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +42 -0
- package/dist/index.js.map +1 -0
- package/dist/oracles/ExactMatchOracle.d.ts +98 -0
- package/dist/oracles/ExactMatchOracle.d.ts.map +1 -0
- package/dist/oracles/ExactMatchOracle.js +355 -0
- package/dist/oracles/ExactMatchOracle.js.map +1 -0
- package/dist/oracles/IOracle.d.ts +47 -0
- package/dist/oracles/IOracle.d.ts.map +1 -0
- package/dist/oracles/IOracle.js +7 -0
- package/dist/oracles/IOracle.js.map +1 -0
- package/dist/oracles/LLMJudgeOracle.d.ts +65 -0
- package/dist/oracles/LLMJudgeOracle.d.ts.map +1 -0
- package/dist/oracles/LLMJudgeOracle.js +214 -0
- package/dist/oracles/LLMJudgeOracle.js.map +1 -0
- package/dist/oracles/SQLValidatorOracle.d.ts +78 -0
- package/dist/oracles/SQLValidatorOracle.d.ts.map +1 -0
- package/dist/oracles/SQLValidatorOracle.js +215 -0
- package/dist/oracles/SQLValidatorOracle.js.map +1 -0
- package/dist/oracles/SchemaValidatorOracle.d.ts +61 -0
- package/dist/oracles/SchemaValidatorOracle.d.ts.map +1 -0
- package/dist/oracles/SchemaValidatorOracle.js +193 -0
- package/dist/oracles/SchemaValidatorOracle.js.map +1 -0
- package/dist/oracles/TraceValidatorOracle.d.ts +41 -0
- package/dist/oracles/TraceValidatorOracle.d.ts.map +1 -0
- package/dist/oracles/TraceValidatorOracle.js +159 -0
- package/dist/oracles/TraceValidatorOracle.js.map +1 -0
- package/dist/oracles/index.d.ts +10 -0
- package/dist/oracles/index.d.ts.map +1 -0
- package/dist/oracles/index.js +26 -0
- package/dist/oracles/index.js.map +1 -0
- package/dist/types.d.ts +428 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +6 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/cost-calculator.d.ts +92 -0
- package/dist/utils/cost-calculator.d.ts.map +1 -0
- package/dist/utils/cost-calculator.js +137 -0
- package/dist/utils/cost-calculator.js.map +1 -0
- package/dist/utils/result-formatter.d.ts +98 -0
- package/dist/utils/result-formatter.d.ts.map +1 -0
- package/dist/utils/result-formatter.js +252 -0
- package/dist/utils/result-formatter.js.map +1 -0
- package/dist/utils/scoring.d.ts +64 -0
- package/dist/utils/scoring.d.ts.map +1 -0
- package/dist/utils/scoring.js +140 -0
- package/dist/utils/scoring.js.map +1 -0
- package/package.json +36 -7
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Exact match oracle implementation
|
|
3
|
+
* @module @memberjunction/testing-engine
|
|
4
|
+
*/
|
|
5
|
+
import { IOracle } from './IOracle';
|
|
6
|
+
import { OracleInput, OracleConfig, OracleResult } from '../types';
|
|
7
|
+
/**
|
|
8
|
+
* Exact Match Oracle.
|
|
9
|
+
*
|
|
10
|
+
* Performs deterministic comparison between expected and actual output.
|
|
11
|
+
* Supports various comparison modes (exact, contains, regex, deep equality).
|
|
12
|
+
*
|
|
13
|
+
* Configuration:
|
|
14
|
+
* - mode: Comparison mode ('exact' | 'contains' | 'regex' | 'deep' | 'partial')
|
|
15
|
+
* - caseSensitive: Whether string comparisons are case-sensitive (default: true)
|
|
16
|
+
* - ignoreWhitespace: Whether to normalize whitespace before comparison (default: false)
|
|
17
|
+
* - fields: For 'partial' mode, which fields to compare (array of paths)
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* ```typescript
|
|
21
|
+
* const oracle = new ExactMatchOracle();
|
|
22
|
+
*
|
|
23
|
+
* // Exact match
|
|
24
|
+
* const result1 = await oracle.evaluate({
|
|
25
|
+
* expectedOutput: { status: 'success', count: 5 },
|
|
26
|
+
* actualOutput: { status: 'success', count: 5 }
|
|
27
|
+
* }, { mode: 'exact' });
|
|
28
|
+
*
|
|
29
|
+
* // Contains mode (actual must contain all expected fields)
|
|
30
|
+
* const result2 = await oracle.evaluate({
|
|
31
|
+
* expectedOutput: { status: 'success' },
|
|
32
|
+
* actualOutput: { status: 'success', count: 5, extra: 'data' }
|
|
33
|
+
* }, { mode: 'contains' });
|
|
34
|
+
*
|
|
35
|
+
* // Regex match
|
|
36
|
+
* const result3 = await oracle.evaluate({
|
|
37
|
+
* expectedOutput: { pattern: 'sales.*region' },
|
|
38
|
+
* actualOutput: { response: 'Sales by region report' }
|
|
39
|
+
* }, { mode: 'regex' });
|
|
40
|
+
* ```
|
|
41
|
+
*/
|
|
42
|
+
export declare class ExactMatchOracle implements IOracle {
|
|
43
|
+
readonly type = "exact-match";
|
|
44
|
+
/**
|
|
45
|
+
* Evaluate exact match between expected and actual output.
|
|
46
|
+
*
|
|
47
|
+
* @param input - Oracle input with expected and actual output
|
|
48
|
+
* @param config - Oracle configuration
|
|
49
|
+
* @returns Oracle result with match details
|
|
50
|
+
*/
|
|
51
|
+
evaluate(input: OracleInput, config: OracleConfig): Promise<OracleResult>;
|
|
52
|
+
/**
|
|
53
|
+
* Exact JSON string match.
|
|
54
|
+
* @private
|
|
55
|
+
*/
|
|
56
|
+
private exactMatch;
|
|
57
|
+
/**
|
|
58
|
+
* Check if actual contains all expected fields.
|
|
59
|
+
* @private
|
|
60
|
+
*/
|
|
61
|
+
private containsMatch;
|
|
62
|
+
/**
|
|
63
|
+
* Find missing fields in actual compared to expected.
|
|
64
|
+
* @private
|
|
65
|
+
*/
|
|
66
|
+
private findMissingFields;
|
|
67
|
+
/**
|
|
68
|
+
* Regex pattern matching.
|
|
69
|
+
* @private
|
|
70
|
+
*/
|
|
71
|
+
private regexMatch;
|
|
72
|
+
/**
|
|
73
|
+
* Extract regex patterns from expected output.
|
|
74
|
+
* @private
|
|
75
|
+
*/
|
|
76
|
+
private extractPatterns;
|
|
77
|
+
/**
|
|
78
|
+
* Deep equality check.
|
|
79
|
+
* @private
|
|
80
|
+
*/
|
|
81
|
+
private deepEqual;
|
|
82
|
+
/**
|
|
83
|
+
* Recursive deep equality check.
|
|
84
|
+
* @private
|
|
85
|
+
*/
|
|
86
|
+
private deepEquality;
|
|
87
|
+
/**
|
|
88
|
+
* Partial match on specific fields.
|
|
89
|
+
* @private
|
|
90
|
+
*/
|
|
91
|
+
private partialMatch;
|
|
92
|
+
/**
|
|
93
|
+
* Get field value by path (e.g., 'user.name' or 'items[0].id').
|
|
94
|
+
* @private
|
|
95
|
+
*/
|
|
96
|
+
private getFieldValue;
|
|
97
|
+
}
|
|
98
|
+
//# sourceMappingURL=ExactMatchOracle.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ExactMatchOracle.d.ts","sourceRoot":"","sources":["../../src/oracles/ExactMatchOracle.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAEnE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AACH,qBAAa,gBAAiB,YAAW,OAAO;IAC5C,QAAQ,CAAC,IAAI,iBAAiB;IAE9B;;;;;;OAMG;IACG,QAAQ,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IA+E/E;;;OAGG;IACH,OAAO,CAAC,UAAU;IA0BlB;;;OAGG;IACH,OAAO,CAAC,aAAa;IAsBrB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IAkCzB;;;OAGG;IACH,OAAO,CAAC,UAAU;IAoClB;;;OAGG;IACH,OAAO,CAAC,eAAe;IAiBvB;;;OAGG;IACH,OAAO,CAAC,SAAS;IAajB;;;OAGG;IACH,OAAO,CAAC,YAAY;IAwCpB;;;OAGG;IACH,OAAO,CAAC,YAAY;IAwCpB;;;OAGG;IACH,OAAO,CAAC,aAAa;CAgCxB"}
|
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* @fileoverview Exact match oracle implementation
|
|
4
|
+
* @module @memberjunction/testing-engine
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.ExactMatchOracle = void 0;
|
|
8
|
+
/**
|
|
9
|
+
* Exact Match Oracle.
|
|
10
|
+
*
|
|
11
|
+
* Performs deterministic comparison between expected and actual output.
|
|
12
|
+
* Supports various comparison modes (exact, contains, regex, deep equality).
|
|
13
|
+
*
|
|
14
|
+
* Configuration:
|
|
15
|
+
* - mode: Comparison mode ('exact' | 'contains' | 'regex' | 'deep' | 'partial')
|
|
16
|
+
* - caseSensitive: Whether string comparisons are case-sensitive (default: true)
|
|
17
|
+
* - ignoreWhitespace: Whether to normalize whitespace before comparison (default: false)
|
|
18
|
+
* - fields: For 'partial' mode, which fields to compare (array of paths)
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* ```typescript
|
|
22
|
+
* const oracle = new ExactMatchOracle();
|
|
23
|
+
*
|
|
24
|
+
* // Exact match
|
|
25
|
+
* const result1 = await oracle.evaluate({
|
|
26
|
+
* expectedOutput: { status: 'success', count: 5 },
|
|
27
|
+
* actualOutput: { status: 'success', count: 5 }
|
|
28
|
+
* }, { mode: 'exact' });
|
|
29
|
+
*
|
|
30
|
+
* // Contains mode (actual must contain all expected fields)
|
|
31
|
+
* const result2 = await oracle.evaluate({
|
|
32
|
+
* expectedOutput: { status: 'success' },
|
|
33
|
+
* actualOutput: { status: 'success', count: 5, extra: 'data' }
|
|
34
|
+
* }, { mode: 'contains' });
|
|
35
|
+
*
|
|
36
|
+
* // Regex match
|
|
37
|
+
* const result3 = await oracle.evaluate({
|
|
38
|
+
* expectedOutput: { pattern: 'sales.*region' },
|
|
39
|
+
* actualOutput: { response: 'Sales by region report' }
|
|
40
|
+
* }, { mode: 'regex' });
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
class ExactMatchOracle {
|
|
44
|
+
constructor() {
|
|
45
|
+
this.type = 'exact-match';
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Evaluate exact match between expected and actual output.
|
|
49
|
+
*
|
|
50
|
+
* @param input - Oracle input with expected and actual output
|
|
51
|
+
* @param config - Oracle configuration
|
|
52
|
+
* @returns Oracle result with match details
|
|
53
|
+
*/
|
|
54
|
+
async evaluate(input, config) {
|
|
55
|
+
try {
|
|
56
|
+
const mode = config.mode || 'exact';
|
|
57
|
+
const caseSensitive = config.caseSensitive !== false; // Default true
|
|
58
|
+
const ignoreWhitespace = config.ignoreWhitespace === true; // Default false
|
|
59
|
+
const expected = input.expectedOutput;
|
|
60
|
+
const actual = input.actualOutput;
|
|
61
|
+
if (!expected) {
|
|
62
|
+
return {
|
|
63
|
+
oracleType: this.type,
|
|
64
|
+
passed: false,
|
|
65
|
+
score: 0,
|
|
66
|
+
message: 'No expected output provided'
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
if (!actual) {
|
|
70
|
+
return {
|
|
71
|
+
oracleType: this.type,
|
|
72
|
+
passed: false,
|
|
73
|
+
score: 0,
|
|
74
|
+
message: 'No actual output provided'
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
// Perform comparison based on mode
|
|
78
|
+
let result;
|
|
79
|
+
switch (mode) {
|
|
80
|
+
case 'exact':
|
|
81
|
+
result = this.exactMatch(expected, actual);
|
|
82
|
+
break;
|
|
83
|
+
case 'contains':
|
|
84
|
+
result = this.containsMatch(expected, actual);
|
|
85
|
+
break;
|
|
86
|
+
case 'regex':
|
|
87
|
+
result = this.regexMatch(expected, actual, caseSensitive);
|
|
88
|
+
break;
|
|
89
|
+
case 'deep':
|
|
90
|
+
result = this.deepEqual(expected, actual);
|
|
91
|
+
break;
|
|
92
|
+
case 'partial':
|
|
93
|
+
result = this.partialMatch(expected, actual, config.fields);
|
|
94
|
+
break;
|
|
95
|
+
default:
|
|
96
|
+
return {
|
|
97
|
+
oracleType: this.type,
|
|
98
|
+
passed: false,
|
|
99
|
+
score: 0,
|
|
100
|
+
message: `Unknown comparison mode: ${mode}`
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
return {
|
|
104
|
+
oracleType: this.type,
|
|
105
|
+
...result
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
catch (error) {
|
|
109
|
+
return {
|
|
110
|
+
oracleType: this.type,
|
|
111
|
+
passed: false,
|
|
112
|
+
score: 0,
|
|
113
|
+
message: `Exact match error: ${error.message}`
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Exact JSON string match.
|
|
119
|
+
* @private
|
|
120
|
+
*/
|
|
121
|
+
exactMatch(expected, actual) {
|
|
122
|
+
const expectedStr = JSON.stringify(expected);
|
|
123
|
+
const actualStr = JSON.stringify(actual);
|
|
124
|
+
if (expectedStr === actualStr) {
|
|
125
|
+
return {
|
|
126
|
+
passed: true,
|
|
127
|
+
score: 1.0,
|
|
128
|
+
message: 'Output exactly matches expected'
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
return {
|
|
133
|
+
passed: false,
|
|
134
|
+
score: 0,
|
|
135
|
+
message: 'Output does not match expected',
|
|
136
|
+
details: {
|
|
137
|
+
expected: expectedStr.substring(0, 200),
|
|
138
|
+
actual: actualStr.substring(0, 200)
|
|
139
|
+
}
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Check if actual contains all expected fields.
|
|
145
|
+
* @private
|
|
146
|
+
*/
|
|
147
|
+
containsMatch(expected, actual) {
|
|
148
|
+
const missingFields = this.findMissingFields(expected, actual, '');
|
|
149
|
+
if (missingFields.length === 0) {
|
|
150
|
+
return {
|
|
151
|
+
passed: true,
|
|
152
|
+
score: 1.0,
|
|
153
|
+
message: 'Output contains all expected fields'
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
else {
|
|
157
|
+
return {
|
|
158
|
+
passed: false,
|
|
159
|
+
score: 0,
|
|
160
|
+
message: `Missing ${missingFields.length} expected field(s)`,
|
|
161
|
+
details: { missingFields }
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Find missing fields in actual compared to expected.
|
|
167
|
+
* @private
|
|
168
|
+
*/
|
|
169
|
+
findMissingFields(expected, actual, path) {
|
|
170
|
+
const missing = [];
|
|
171
|
+
if (typeof expected === 'object' && expected !== null) {
|
|
172
|
+
if (typeof actual !== 'object' || actual === null) {
|
|
173
|
+
return [path || 'root'];
|
|
174
|
+
}
|
|
175
|
+
const expectedObj = expected;
|
|
176
|
+
const actualObj = actual;
|
|
177
|
+
for (const key in expectedObj) {
|
|
178
|
+
const fieldPath = path ? `${path}.${key}` : key;
|
|
179
|
+
if (!(key in actualObj)) {
|
|
180
|
+
missing.push(fieldPath);
|
|
181
|
+
}
|
|
182
|
+
else {
|
|
183
|
+
const nested = this.findMissingFields(expectedObj[key], actualObj[key], fieldPath);
|
|
184
|
+
missing.push(...nested);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return missing;
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Regex pattern matching.
|
|
192
|
+
* @private
|
|
193
|
+
*/
|
|
194
|
+
regexMatch(expected, actual, caseSensitive) {
|
|
195
|
+
// Convert expected to regex patterns
|
|
196
|
+
const patterns = this.extractPatterns(expected);
|
|
197
|
+
const actualStr = JSON.stringify(actual);
|
|
198
|
+
const failed = [];
|
|
199
|
+
for (const pattern of patterns) {
|
|
200
|
+
const flags = caseSensitive ? '' : 'i';
|
|
201
|
+
const regex = new RegExp(pattern, flags);
|
|
202
|
+
if (!regex.test(actualStr)) {
|
|
203
|
+
failed.push(pattern);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
if (failed.length === 0) {
|
|
207
|
+
return {
|
|
208
|
+
passed: true,
|
|
209
|
+
score: 1.0,
|
|
210
|
+
message: `All ${patterns.length} pattern(s) matched`
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
else {
|
|
214
|
+
return {
|
|
215
|
+
passed: false,
|
|
216
|
+
score: 1 - (failed.length / patterns.length),
|
|
217
|
+
message: `${failed.length} of ${patterns.length} pattern(s) failed`,
|
|
218
|
+
details: { failedPatterns: failed }
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Extract regex patterns from expected output.
|
|
224
|
+
* @private
|
|
225
|
+
*/
|
|
226
|
+
extractPatterns(expected) {
|
|
227
|
+
const patterns = [];
|
|
228
|
+
if (typeof expected === 'string') {
|
|
229
|
+
patterns.push(expected);
|
|
230
|
+
}
|
|
231
|
+
else if (Array.isArray(expected)) {
|
|
232
|
+
patterns.push(...expected.filter(p => typeof p === 'string'));
|
|
233
|
+
}
|
|
234
|
+
else if (typeof expected === 'object' && expected !== null) {
|
|
235
|
+
const obj = expected;
|
|
236
|
+
if (obj.responsePatterns && Array.isArray(obj.responsePatterns)) {
|
|
237
|
+
patterns.push(...obj.responsePatterns.filter(p => typeof p === 'string'));
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return patterns;
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Deep equality check.
|
|
244
|
+
* @private
|
|
245
|
+
*/
|
|
246
|
+
deepEqual(expected, actual) {
|
|
247
|
+
const isEqual = this.deepEquality(expected, actual);
|
|
248
|
+
return {
|
|
249
|
+
passed: isEqual,
|
|
250
|
+
score: isEqual ? 1.0 : 0,
|
|
251
|
+
message: isEqual ? 'Output deeply equals expected' : 'Output does not deeply equal expected'
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Recursive deep equality check.
|
|
256
|
+
* @private
|
|
257
|
+
*/
|
|
258
|
+
deepEquality(a, b) {
|
|
259
|
+
if (a === b) {
|
|
260
|
+
return true;
|
|
261
|
+
}
|
|
262
|
+
if (a === null || b === null || a === undefined || b === undefined) {
|
|
263
|
+
return a === b;
|
|
264
|
+
}
|
|
265
|
+
if (typeof a !== typeof b) {
|
|
266
|
+
return false;
|
|
267
|
+
}
|
|
268
|
+
if (Array.isArray(a) && Array.isArray(b)) {
|
|
269
|
+
if (a.length !== b.length) {
|
|
270
|
+
return false;
|
|
271
|
+
}
|
|
272
|
+
return a.every((item, index) => this.deepEquality(item, b[index]));
|
|
273
|
+
}
|
|
274
|
+
if (typeof a === 'object' && typeof b === 'object') {
|
|
275
|
+
const aKeys = Object.keys(a);
|
|
276
|
+
const bKeys = Object.keys(b);
|
|
277
|
+
if (aKeys.length !== bKeys.length) {
|
|
278
|
+
return false;
|
|
279
|
+
}
|
|
280
|
+
const aObj = a;
|
|
281
|
+
const bObj = b;
|
|
282
|
+
return aKeys.every(key => bKeys.includes(key) && this.deepEquality(aObj[key], bObj[key]));
|
|
283
|
+
}
|
|
284
|
+
return false;
|
|
285
|
+
}
|
|
286
|
+
/**
|
|
287
|
+
* Partial match on specific fields.
|
|
288
|
+
* @private
|
|
289
|
+
*/
|
|
290
|
+
partialMatch(expected, actual, fields) {
|
|
291
|
+
if (!fields || fields.length === 0) {
|
|
292
|
+
return {
|
|
293
|
+
passed: false,
|
|
294
|
+
score: 0,
|
|
295
|
+
message: 'No fields specified for partial match'
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
const failed = [];
|
|
299
|
+
for (const field of fields) {
|
|
300
|
+
const expectedValue = this.getFieldValue(expected, field);
|
|
301
|
+
const actualValue = this.getFieldValue(actual, field);
|
|
302
|
+
if (!this.deepEquality(expectedValue, actualValue)) {
|
|
303
|
+
failed.push(field);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
if (failed.length === 0) {
|
|
307
|
+
return {
|
|
308
|
+
passed: true,
|
|
309
|
+
score: 1.0,
|
|
310
|
+
message: `All ${fields.length} field(s) matched`
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
else {
|
|
314
|
+
return {
|
|
315
|
+
passed: false,
|
|
316
|
+
score: 1 - (failed.length / fields.length),
|
|
317
|
+
message: `${failed.length} of ${fields.length} field(s) failed`,
|
|
318
|
+
details: { failedFields: failed }
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Get field value by path (e.g., 'user.name' or 'items[0].id').
|
|
324
|
+
* @private
|
|
325
|
+
*/
|
|
326
|
+
getFieldValue(obj, path) {
|
|
327
|
+
if (!obj || typeof obj !== 'object') {
|
|
328
|
+
return undefined;
|
|
329
|
+
}
|
|
330
|
+
const parts = path.split('.');
|
|
331
|
+
let current = obj;
|
|
332
|
+
for (const part of parts) {
|
|
333
|
+
if (!current || typeof current !== 'object') {
|
|
334
|
+
return undefined;
|
|
335
|
+
}
|
|
336
|
+
// Handle array indices
|
|
337
|
+
const arrayMatch = part.match(/^(.+)\[(\d+)\]$/);
|
|
338
|
+
if (arrayMatch) {
|
|
339
|
+
const key = arrayMatch[1];
|
|
340
|
+
const index = parseInt(arrayMatch[2]);
|
|
341
|
+
current = current[key];
|
|
342
|
+
if (!Array.isArray(current)) {
|
|
343
|
+
return undefined;
|
|
344
|
+
}
|
|
345
|
+
current = current[index];
|
|
346
|
+
}
|
|
347
|
+
else {
|
|
348
|
+
current = current[part];
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
return current;
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
exports.ExactMatchOracle = ExactMatchOracle;
|
|
355
|
+
//# sourceMappingURL=ExactMatchOracle.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ExactMatchOracle.js","sourceRoot":"","sources":["../../src/oracles/ExactMatchOracle.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAKH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AACH,MAAa,gBAAgB;IAA7B;QACa,SAAI,GAAG,aAAa,CAAC;IAgYlC,CAAC;IA9XG;;;;;;OAMG;IACH,KAAK,CAAC,QAAQ,CAAC,KAAkB,EAAE,MAAoB;QACnD,IAAI,CAAC;YACD,MAAM,IAAI,GAAI,MAAM,CAAC,IAAe,IAAI,OAAO,CAAC;YAChD,MAAM,aAAa,GAAG,MAAM,CAAC,aAAa,KAAK,KAAK,CAAC,CAAC,eAAe;YACrE,MAAM,gBAAgB,GAAG,MAAM,CAAC,gBAAgB,KAAK,IAAI,CAAC,CAAC,gBAAgB;YAE3E,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC;YACtC,MAAM,MAAM,GAAG,KAAK,CAAC,YAAY,CAAC;YAElC,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACZ,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,6BAA6B;iBACzC,CAAC;YACN,CAAC;YAED,IAAI,CAAC,MAAM,EAAE,CAAC;gBACV,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,2BAA2B;iBACvC,CAAC;YACN,CAAC;YAED,mCAAmC;YACnC,IAAI,MAA8E,CAAC;YAEnF,QAAQ,IAAI,EAAE,CAAC;gBACX,KAAK,OAAO;oBACR,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;oBAC3C,MAAM;gBAEV,KAAK,UAAU;oBACX,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;oBAC9C,MAAM;gBAEV,KAAK,OAAO;oBACR,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,aAAa,CAAC,CAAC;oBAC1D,MAAM;gBAEV,KAAK,MAAM;oBACP,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;oBAC1C,MAAM;gBAEV,KAAK,SAAS;oBACV,MAAM,GAAG,IAAI,CAAC,YAAY,CACtB,QAAQ,EACR,MAAM,EACN,MAAM,CAAC,MAAkB,CAC5B,CAAC;oBACF,MAAM;gBAEV;oBACI,OAAO;wBACH,UAAU,EAAE,IAAI,CAAC,IAAI;wBACrB,MAAM,EAAE,KAAK;wBACb,KAAK,EAAE,CAAC;wBACR,OAAO,EAAE,4BAA4B,IAAI,EAAE;qBAC9C,CAAC;YACV,CAAC;YAED,OAAO;gBACH,UAAU,EAAE,IAAI,CAAC,IAAI;gBACrB,GAAG,MAAM;aACZ,CAAC;QAEN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,OAAO;gBACH,UAAU,EAAE,IAAI,CAAC,IAAI;gBACrB,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,OAAO,EAAE,sBAAuB,KAAe,CAAC,OAAO,EAAE;aAC5D,CAAC;QACN,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,UAAU,CACd,QAAiB,EACjB,MAAe;QAEf,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAEzC,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;YAC5B,OAAO;gBACH,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,GAAG;gBACV,OAAO,EAAE,iCAAiC;aAC7C,CAAC;QACN,CAAC;aAAM,CAAC;YACJ,OAAO;gBACH,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,OAAO,EAAE,gCAAgC;gBACzC,OAAO,EAAE;oBACL,QAAQ,EAAE,WAAW,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;oBACvC,MAAM,EAAE,SAAS,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;iBACtC;aACJ,CAAC;QACN,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,aAAa,CACjB,QAAiB,EACjB,MAAe;QAEf,MAAM,aAAa,GAAG,IAAI,CAAC,iBAAiB,CAAC,QAAQ,EAAE,MAAM,EAAE,EAAE,CAAC,CAAC;QAEnE,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7B,OAAO;gBACH,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,GAAG;gBACV,OAAO,EAAE,qCAAqC;aACjD,CAAC;QACN,CAAC;aAAM,CAAC;YACJ,OAAO;gBACH,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,OAAO,EAAE,WAAW,aAAa,CAAC,MAAM,oBAAoB;gBAC5D,OAAO,EAAE,EAAE,aAAa,EAAE;aAC7B,CAAC;QACN,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,iBAAiB,CACrB,QAAiB,EACjB,MAAe,EACf,IAAY;QAEZ,MAAM,OAAO,GAAa,EAAE,CAAC;QAE7B,IAAI,OAAO,QAAQ,KAAK,QAAQ,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;YACpD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;gBAChD,OAAO,CAAC,IAAI,IAAI,MAAM,CAAC,CAAC;YAC5B,CAAC;YAED,MAAM,WAAW,GAAG,QAAmC,CAAC;YACxD,MAAM,SAAS,GAAG,MAAiC,CAAC;YAEpD,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;gBAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;gBAEhD,IAAI,CAAC,CAAC,GAAG,IAAI,SAAS,CAAC,EAAE,CAAC;oBACtB,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC5B,CAAC;qBAAM,CAAC;oBACJ,MAAM,MAAM,GAAG,IAAI,CAAC,iBAAiB,CACjC,WAAW,CAAC,GAAG,CAAC,EAChB,SAAS,CAAC,GAAG,CAAC,EACd,SAAS,CACZ,CAAC;oBACF,OAAO,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;gBAC5B,CAAC;YACL,CAAC;QACL,CAAC;QAED,OAAO,OAAO,CAAC;IACnB,CAAC;IAED;;;OAGG;IACK,UAAU,CACd,QAAiB,EACjB,MAAe,EACf,aAAsB;QAEtB,qCAAqC;QACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAEzC,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC7B,MAAM,KAAK,GAAG,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;YACvC,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;YAEzC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;gBACzB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACzB,CAAC;QACL,CAAC;QAED,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO;gBACH,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,GAAG;gBACV,OAAO,EAAE,OAAO,QAAQ,CAAC,MAAM,qBAAqB;aACvD,CAAC;QACN,CAAC;aAAM,CAAC;YACJ,OAAO;gBACH,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;gBAC5C,OAAO,EAAE,GAAG,MAAM,CAAC,MAAM,OAAO,QAAQ,CAAC,MAAM,oBAAoB;gBACnE,OAAO,EAAE,EAAE,cAAc,EAAE,MAAM,EAAE;aACtC,CAAC;QACN,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,eAAe,CAAC,QAAiB;QACrC,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;YAC/B,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;YACjC,QAAQ,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC;QAClE,CAAC;aAAM,IAAI,OAAO,QAAQ,KAAK,QAAQ,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;YAC3D,MAAM,GAAG,GAAG,QAAmC,CAAC;YAChD,IAAI,GAAG,CAAC,gBAAgB,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,EAAE,CAAC;gBAC9D,QAAQ,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC;YAC9E,CAAC;QACL,CAAC;QAED,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;;OAGG;IACK,SAAS,CACb,QAAiB,EACjB,MAAe;QAEf,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAEpD,OAAO;YACH,MAAM,EAAE,OAAO;YACf,KAAK,EAAE,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACxB,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,+BAA+B,CAAC,CAAC,CAAC,uCAAuC;SAC/F,CAAC;IACN,CAAC;IAED;;;OAGG;IACK,YAAY,CAAC,CAAU,EAAE,CAAU;QACvC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACV,OAAO,IAAI,CAAC;QAChB,CAAC;QAED,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,SAAS,IAAI,CAAC,KAAK,SAAS,EAAE,CAAC;YACjE,OAAO,CAAC,KAAK,CAAC,CAAC;QACnB,CAAC;QAED,IAAI,OAAO,CAAC,KAAK,OAAO,CAAC,EAAE,CAAC;YACxB,OAAO,KAAK,CAAC;QACjB,CAAC;QAED,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;YACvC,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;gBACxB,OAAO,KAAK,CAAC;YACjB,CAAC;YAED,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACvE,CAAC;QAED,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,OAAO,CAAC,KAAK,QAAQ,EAAE,CAAC;YACjD,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,CAAW,CAAC,CAAC;YACvC,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,CAAW,CAAC,CAAC;YAEvC,IAAI,KAAK,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC;gBAChC,OAAO,KAAK,CAAC;YACjB,CAAC;YAED,MAAM,IAAI,GAAG,CAA4B,CAAC;YAC1C,MAAM,IAAI,GAAG,CAA4B,CAAC;YAE1C,OAAO,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CACrB,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CACjE,CAAC;QACN,CAAC;QAED,OAAO,KAAK,CAAC;IACjB,CAAC;IAED;;;OAGG;IACK,YAAY,CAChB,QAAiB,EACjB,MAAe,EACf,MAAiB;QAEjB,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACjC,OAAO;gBACH,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,OAAO,EAAE,uCAAuC;aACnD,CAAC;QACN,CAAC;QAED,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YACzB,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;YAC1D,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;YAEtD,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,aAAa,EAAE,WAAW,CAAC,EAAE,CAAC;gBACjD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACvB,CAAC;QACL,CAAC;QAED,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO;gBACH,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,GAAG;gBACV,OAAO,EAAE,OAAO,MAAM,CAAC,MAAM,mBAAmB;aACnD,CAAC;QACN,CAAC;aAAM,CAAC;YACJ,OAAO;gBACH,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;gBAC1C,OAAO,EAAE,GAAG,MAAM,CAAC,MAAM,OAAO,MAAM,CAAC,MAAM,kBAAkB;gBAC/D,OAAO,EAAE,EAAE,YAAY,EAAE,MAAM,EAAE;aACpC,CAAC;QACN,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,aAAa,CAAC,GAAY,EAAE,IAAY;QAC5C,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;YAClC,OAAO,SAAS,CAAC;QACrB,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC9B,IAAI,OAAO,GAAY,GAAG,CAAC;QAE3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACvB,IAAI,CAAC,OAAO,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAC1C,OAAO,SAAS,CAAC;YACrB,CAAC;YAED,uBAAuB;YACvB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;YACjD,IAAI,UAAU,EAAE,CAAC;gBACb,MAAM,GAAG,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;gBAC1B,MAAM,KAAK,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;gBACtC,OAAO,GAAI,OAAmC,CAAC,GAAG,CAAC,CAAC;gBAEpD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC1B,OAAO,SAAS,CAAC;gBACrB,CAAC;gBAED,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC;YAC7B,CAAC;iBAAM,CAAC;gBACJ,OAAO,GAAI,OAAmC,CAAC,IAAI,CAAC,CAAC;YACzD,CAAC;QACL,CAAC;QAED,OAAO,OAAO,CAAC;IACnB,CAAC;CACJ;AAjYD,4CAiYC"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Oracle interface for test evaluation
|
|
3
|
+
* @module @memberjunction/testing-engine
|
|
4
|
+
*/
|
|
5
|
+
import { OracleInput, OracleConfig, OracleResult } from '../types';
|
|
6
|
+
/**
|
|
7
|
+
* Interface for oracle implementations.
|
|
8
|
+
*
|
|
9
|
+
* An oracle is a source of truth that determines whether test output is correct.
|
|
10
|
+
* Oracles can be deterministic (schema validation, exact match) or heuristic (LLM judge).
|
|
11
|
+
*
|
|
12
|
+
* Each oracle type evaluates a specific aspect of test output:
|
|
13
|
+
* - SchemaValidator: Structural correctness
|
|
14
|
+
* - TraceValidator: Execution safety (no errors)
|
|
15
|
+
* - LLMJudge: Semantic quality
|
|
16
|
+
* - ExactMatch: Deterministic output comparison
|
|
17
|
+
* - SQLValidator: Database state verification
|
|
18
|
+
*
|
|
19
|
+
* @interface IOracle
|
|
20
|
+
* @example
|
|
21
|
+
* ```typescript
|
|
22
|
+
* export class SchemaValidatorOracle implements IOracle {
|
|
23
|
+
* readonly type = 'schema-validate';
|
|
24
|
+
*
|
|
25
|
+
* async evaluate(input: OracleInput, config: OracleConfig): Promise<OracleResult> {
|
|
26
|
+
* // Validation logic
|
|
27
|
+
* return { oracleType: this.type, passed: true, score: 1.0, message: 'Valid' };
|
|
28
|
+
* }
|
|
29
|
+
* }
|
|
30
|
+
* ```
|
|
31
|
+
*/
|
|
32
|
+
export interface IOracle {
|
|
33
|
+
/**
|
|
34
|
+
* Unique type identifier for this oracle.
|
|
35
|
+
* Used in test configuration to specify which oracles to run.
|
|
36
|
+
*/
|
|
37
|
+
readonly type: string;
|
|
38
|
+
/**
|
|
39
|
+
* Evaluate the test output.
|
|
40
|
+
*
|
|
41
|
+
* @param input - Evaluation input (test, expected/actual outputs, target entity, user)
|
|
42
|
+
* @param config - Oracle-specific configuration
|
|
43
|
+
* @returns Promise resolving to oracle result
|
|
44
|
+
*/
|
|
45
|
+
evaluate(input: OracleInput, config: OracleConfig): Promise<OracleResult>;
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=IOracle.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"IOracle.d.ts","sourceRoot":"","sources":["../../src/oracles/IOracle.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAEnE;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AACH,MAAM,WAAW,OAAO;IACpB;;;OAGG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAEtB;;;;;;OAMG;IACH,QAAQ,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;CAC7E"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"IOracle.js","sourceRoot":"","sources":["../../src/oracles/IOracle.ts"],"names":[],"mappings":";AAAA;;;GAGG"}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview LLM Judge oracle implementation
|
|
3
|
+
* @module @memberjunction/testing-engine
|
|
4
|
+
*/
|
|
5
|
+
import { IOracle } from './IOracle';
|
|
6
|
+
import { OracleInput, OracleConfig, OracleResult } from '../types';
|
|
7
|
+
/**
|
|
8
|
+
* LLM Judge Oracle.
|
|
9
|
+
*
|
|
10
|
+
* Uses an LLM to evaluate output quality based on custom criteria.
|
|
11
|
+
* Provides semantic evaluation beyond deterministic checks.
|
|
12
|
+
*
|
|
13
|
+
* Configuration:
|
|
14
|
+
* - criteria: Array of validation criteria (required)
|
|
15
|
+
* - model: Model to use for judging (default: from prompt or default model)
|
|
16
|
+
* - temperature: Temperature for LLM (default: 0.1 for consistency)
|
|
17
|
+
* - promptTemplate: Custom prompt template (optional, uses default if not provided)
|
|
18
|
+
* - strictMode: Require all criteria to pass (default: false, uses weighted scoring)
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* ```typescript
|
|
22
|
+
* const oracle = new LLMJudgeOracle();
|
|
23
|
+
* const result = await oracle.evaluate({
|
|
24
|
+
* actualOutput: { response: 'Sales by region report created successfully' },
|
|
25
|
+
* expectedOutput: {
|
|
26
|
+
* judgeValidationCriteria: [
|
|
27
|
+
* 'Response accurately answers the user\'s question',
|
|
28
|
+
* 'Response includes actionable information',
|
|
29
|
+
* 'Response is professional and clear'
|
|
30
|
+
* ]
|
|
31
|
+
* },
|
|
32
|
+
* contextUser
|
|
33
|
+
* }, {
|
|
34
|
+
* model: 'claude-sonnet-4',
|
|
35
|
+
* temperature: 0.1
|
|
36
|
+
* });
|
|
37
|
+
* ```
|
|
38
|
+
*/
|
|
39
|
+
export declare class LLMJudgeOracle implements IOracle {
|
|
40
|
+
readonly type = "llm-judge";
|
|
41
|
+
/**
|
|
42
|
+
* Default prompt template for LLM judge.
|
|
43
|
+
* @private
|
|
44
|
+
*/
|
|
45
|
+
private readonly defaultPromptTemplate;
|
|
46
|
+
/**
|
|
47
|
+
* Evaluate output using LLM judge.
|
|
48
|
+
*
|
|
49
|
+
* @param input - Oracle input with expected criteria and actual output
|
|
50
|
+
* @param config - Oracle configuration
|
|
51
|
+
* @returns Oracle result with LLM judgment
|
|
52
|
+
*/
|
|
53
|
+
evaluate(input: OracleInput, config: OracleConfig): Promise<OracleResult>;
|
|
54
|
+
/**
|
|
55
|
+
* Build prompt for LLM judge.
|
|
56
|
+
* @private
|
|
57
|
+
*/
|
|
58
|
+
private buildPrompt;
|
|
59
|
+
/**
|
|
60
|
+
* Parse LLM judgment response.
|
|
61
|
+
* @private
|
|
62
|
+
*/
|
|
63
|
+
private parseJudgment;
|
|
64
|
+
}
|
|
65
|
+
//# sourceMappingURL=LLMJudgeOracle.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LLMJudgeOracle.d.ts","sourceRoot":"","sources":["../../src/oracles/LLMJudgeOracle.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAKnE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,qBAAa,cAAe,YAAW,OAAO;IAC1C,QAAQ,CAAC,IAAI,eAAe;IAE5B;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,qBAAqB,CA4BvC;IAEC;;;;;;OAMG;IACG,QAAQ,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IA+F/E;;;OAGG;IACH,OAAO,CAAC,WAAW;IAqBnB;;;OAGG;IACH,OAAO,CAAC,aAAa;CA6BxB"}
|