@memberjunction/testing-engine 0.0.1 → 2.119.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +403 -29
- package/dist/drivers/AgentEvalDriver.d.ts +197 -0
- package/dist/drivers/AgentEvalDriver.d.ts.map +1 -0
- package/dist/drivers/AgentEvalDriver.js +370 -0
- package/dist/drivers/AgentEvalDriver.js.map +1 -0
- package/dist/drivers/BaseTestDriver.d.ts +145 -0
- package/dist/drivers/BaseTestDriver.d.ts.map +1 -0
- package/dist/drivers/BaseTestDriver.js +266 -0
- package/dist/drivers/BaseTestDriver.js.map +1 -0
- package/dist/drivers/index.d.ts +6 -0
- package/dist/drivers/index.d.ts.map +1 -0
- package/dist/drivers/index.js +22 -0
- package/dist/drivers/index.js.map +1 -0
- package/dist/engine/TestEngine.d.ts +148 -0
- package/dist/engine/TestEngine.d.ts.map +1 -0
- package/dist/engine/TestEngine.js +490 -0
- package/dist/engine/TestEngine.js.map +1 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +42 -0
- package/dist/index.js.map +1 -0
- package/dist/oracles/ExactMatchOracle.d.ts +98 -0
- package/dist/oracles/ExactMatchOracle.d.ts.map +1 -0
- package/dist/oracles/ExactMatchOracle.js +355 -0
- package/dist/oracles/ExactMatchOracle.js.map +1 -0
- package/dist/oracles/IOracle.d.ts +47 -0
- package/dist/oracles/IOracle.d.ts.map +1 -0
- package/dist/oracles/IOracle.js +7 -0
- package/dist/oracles/IOracle.js.map +1 -0
- package/dist/oracles/LLMJudgeOracle.d.ts +65 -0
- package/dist/oracles/LLMJudgeOracle.d.ts.map +1 -0
- package/dist/oracles/LLMJudgeOracle.js +214 -0
- package/dist/oracles/LLMJudgeOracle.js.map +1 -0
- package/dist/oracles/SQLValidatorOracle.d.ts +78 -0
- package/dist/oracles/SQLValidatorOracle.d.ts.map +1 -0
- package/dist/oracles/SQLValidatorOracle.js +215 -0
- package/dist/oracles/SQLValidatorOracle.js.map +1 -0
- package/dist/oracles/SchemaValidatorOracle.d.ts +61 -0
- package/dist/oracles/SchemaValidatorOracle.d.ts.map +1 -0
- package/dist/oracles/SchemaValidatorOracle.js +193 -0
- package/dist/oracles/SchemaValidatorOracle.js.map +1 -0
- package/dist/oracles/TraceValidatorOracle.d.ts +41 -0
- package/dist/oracles/TraceValidatorOracle.d.ts.map +1 -0
- package/dist/oracles/TraceValidatorOracle.js +159 -0
- package/dist/oracles/TraceValidatorOracle.js.map +1 -0
- package/dist/oracles/index.d.ts +10 -0
- package/dist/oracles/index.d.ts.map +1 -0
- package/dist/oracles/index.js +26 -0
- package/dist/oracles/index.js.map +1 -0
- package/dist/types.d.ts +428 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +6 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/cost-calculator.d.ts +92 -0
- package/dist/utils/cost-calculator.d.ts.map +1 -0
- package/dist/utils/cost-calculator.js +137 -0
- package/dist/utils/cost-calculator.js.map +1 -0
- package/dist/utils/result-formatter.d.ts +98 -0
- package/dist/utils/result-formatter.d.ts.map +1 -0
- package/dist/utils/result-formatter.js +252 -0
- package/dist/utils/result-formatter.js.map +1 -0
- package/dist/utils/scoring.d.ts +64 -0
- package/dist/utils/scoring.d.ts.map +1 -0
- package/dist/utils/scoring.js +140 -0
- package/dist/utils/scoring.js.map +1 -0
- package/package.json +36 -7
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* @fileoverview Schema validation oracle implementation
|
|
4
|
+
* @module @memberjunction/testing-engine
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.SchemaValidatorOracle = void 0;
|
|
8
|
+
/**
|
|
9
|
+
* Schema Validator Oracle.
|
|
10
|
+
*
|
|
11
|
+
* Validates that actual output conforms to an expected JSON schema.
|
|
12
|
+
* Uses JSON Schema draft-07 specification for validation.
|
|
13
|
+
*
|
|
14
|
+
* Configuration:
|
|
15
|
+
* - schema: JSON Schema object defining expected structure
|
|
16
|
+
* - strict: Whether to fail on additional properties (default: false)
|
|
17
|
+
*
|
|
18
|
+
* @example
|
|
19
|
+
* ```typescript
|
|
20
|
+
* const oracle = new SchemaValidatorOracle();
|
|
21
|
+
* const result = await oracle.evaluate({
|
|
22
|
+
* actualOutput: { name: 'John', age: 30 },
|
|
23
|
+
* expectedOutput: {
|
|
24
|
+
* responseSchema: {
|
|
25
|
+
* type: 'object',
|
|
26
|
+
* required: ['name', 'age'],
|
|
27
|
+
* properties: {
|
|
28
|
+
* name: { type: 'string' },
|
|
29
|
+
* age: { type: 'number' }
|
|
30
|
+
* }
|
|
31
|
+
* }
|
|
32
|
+
* }
|
|
33
|
+
* }, {});
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
class SchemaValidatorOracle {
|
|
37
|
+
constructor() {
|
|
38
|
+
this.type = 'schema-validate';
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Evaluate actual output against JSON schema.
|
|
42
|
+
*
|
|
43
|
+
* @param input - Oracle input with expected schema and actual output
|
|
44
|
+
* @param config - Oracle configuration
|
|
45
|
+
* @returns Oracle result with pass/fail and validation details
|
|
46
|
+
*/
|
|
47
|
+
async evaluate(input, config) {
|
|
48
|
+
try {
|
|
49
|
+
// Extract schema from expected outcomes
|
|
50
|
+
const schema = input.expectedOutput?.responseSchema;
|
|
51
|
+
if (!schema) {
|
|
52
|
+
return {
|
|
53
|
+
oracleType: this.type,
|
|
54
|
+
passed: false,
|
|
55
|
+
score: 0,
|
|
56
|
+
message: 'No responseSchema provided in ExpectedOutcomes'
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
// Validate actual output against schema
|
|
60
|
+
const validationErrors = this.validateAgainstSchema(input.actualOutput, schema, config.strict);
|
|
61
|
+
if (validationErrors.length === 0) {
|
|
62
|
+
return {
|
|
63
|
+
oracleType: this.type,
|
|
64
|
+
passed: true,
|
|
65
|
+
score: 1.0,
|
|
66
|
+
message: 'Output matches expected schema'
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
return {
|
|
71
|
+
oracleType: this.type,
|
|
72
|
+
passed: false,
|
|
73
|
+
score: 0,
|
|
74
|
+
message: `Schema validation failed: ${validationErrors.join(', ')}`,
|
|
75
|
+
details: { validationErrors }
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
catch (error) {
|
|
80
|
+
return {
|
|
81
|
+
oracleType: this.type,
|
|
82
|
+
passed: false,
|
|
83
|
+
score: 0,
|
|
84
|
+
message: `Schema validation error: ${error.message}`
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Validate data against JSON schema.
|
|
90
|
+
* @private
|
|
91
|
+
*/
|
|
92
|
+
validateAgainstSchema(data, schema, strict = false) {
|
|
93
|
+
const errors = [];
|
|
94
|
+
// Simple JSON Schema validation implementation
|
|
95
|
+
// For production, consider using a library like ajv
|
|
96
|
+
this.validateValue(data, schema, 'root', errors, strict);
|
|
97
|
+
return errors;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Validate a single value against schema.
|
|
101
|
+
* @private
|
|
102
|
+
*/
|
|
103
|
+
validateValue(value, schema, path, errors, strict) {
|
|
104
|
+
// Check type
|
|
105
|
+
const expectedType = schema.type;
|
|
106
|
+
const actualType = this.getType(value);
|
|
107
|
+
if (expectedType && actualType !== expectedType) {
|
|
108
|
+
errors.push(`${path}: Expected type ${expectedType}, got ${actualType}`);
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
// Check required properties for objects
|
|
112
|
+
if (actualType === 'object' && value !== null) {
|
|
113
|
+
const obj = value;
|
|
114
|
+
const required = schema.required || [];
|
|
115
|
+
const properties = schema.properties || {};
|
|
116
|
+
// Check required fields
|
|
117
|
+
for (const requiredProp of required) {
|
|
118
|
+
if (!(requiredProp in obj)) {
|
|
119
|
+
errors.push(`${path}: Missing required property '${requiredProp}'`);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
// Validate properties
|
|
123
|
+
for (const [key, val] of Object.entries(obj)) {
|
|
124
|
+
if (properties[key]) {
|
|
125
|
+
this.validateValue(val, properties[key], `${path}.${key}`, errors, strict);
|
|
126
|
+
}
|
|
127
|
+
else if (strict) {
|
|
128
|
+
errors.push(`${path}: Unexpected property '${key}'`);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
// Check array items
|
|
133
|
+
if (actualType === 'array' && schema.items) {
|
|
134
|
+
const arr = value;
|
|
135
|
+
const itemSchema = schema.items;
|
|
136
|
+
for (let i = 0; i < arr.length; i++) {
|
|
137
|
+
this.validateValue(arr[i], itemSchema, `${path}[${i}]`, errors, strict);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
// Check enum values
|
|
141
|
+
if (schema.enum) {
|
|
142
|
+
const enumValues = schema.enum;
|
|
143
|
+
if (!enumValues.includes(value)) {
|
|
144
|
+
errors.push(`${path}: Value must be one of [${enumValues.join(', ')}], got '${value}'`);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
// Check string patterns
|
|
148
|
+
if (actualType === 'string' && schema.pattern) {
|
|
149
|
+
const pattern = new RegExp(schema.pattern);
|
|
150
|
+
if (!pattern.test(value)) {
|
|
151
|
+
errors.push(`${path}: String does not match pattern ${schema.pattern}`);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
// Check numeric constraints
|
|
155
|
+
if (actualType === 'number') {
|
|
156
|
+
const num = value;
|
|
157
|
+
if (schema.minimum != null && num < schema.minimum) {
|
|
158
|
+
errors.push(`${path}: Value ${num} is less than minimum ${schema.minimum}`);
|
|
159
|
+
}
|
|
160
|
+
if (schema.maximum != null && num > schema.maximum) {
|
|
161
|
+
errors.push(`${path}: Value ${num} is greater than maximum ${schema.maximum}`);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Get JSON Schema type for a value.
|
|
167
|
+
* @private
|
|
168
|
+
*/
|
|
169
|
+
getType(value) {
|
|
170
|
+
if (value === null) {
|
|
171
|
+
return 'null';
|
|
172
|
+
}
|
|
173
|
+
if (Array.isArray(value)) {
|
|
174
|
+
return 'array';
|
|
175
|
+
}
|
|
176
|
+
const jsType = typeof value;
|
|
177
|
+
if (jsType === 'boolean') {
|
|
178
|
+
return 'boolean';
|
|
179
|
+
}
|
|
180
|
+
if (jsType === 'number') {
|
|
181
|
+
return Number.isInteger(value) ? 'integer' : 'number';
|
|
182
|
+
}
|
|
183
|
+
if (jsType === 'string') {
|
|
184
|
+
return 'string';
|
|
185
|
+
}
|
|
186
|
+
if (jsType === 'object') {
|
|
187
|
+
return 'object';
|
|
188
|
+
}
|
|
189
|
+
return 'unknown';
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
exports.SchemaValidatorOracle = SchemaValidatorOracle;
|
|
193
|
+
//# sourceMappingURL=SchemaValidatorOracle.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SchemaValidatorOracle.js","sourceRoot":"","sources":["../../src/oracles/SchemaValidatorOracle.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAKH;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,MAAa,qBAAqB;IAAlC;QACa,SAAI,GAAG,iBAAiB,CAAC;IAgMtC,CAAC;IA9LG;;;;;;OAMG;IACH,KAAK,CAAC,QAAQ,CAAC,KAAkB,EAAE,MAAoB;QACnD,IAAI,CAAC;YACD,wCAAwC;YACxC,MAAM,MAAM,GAAI,KAAK,CAAC,cAAsB,EAAE,cAAc,CAAC;YAC7D,IAAI,CAAC,MAAM,EAAE,CAAC;gBACV,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,gDAAgD;iBAC5D,CAAC;YACN,CAAC;YAED,wCAAwC;YACxC,MAAM,gBAAgB,GAAG,IAAI,CAAC,qBAAqB,CAC/C,KAAK,CAAC,YAAY,EAClB,MAAM,EACN,MAAM,CAAC,MAAiB,CAC3B,CAAC;YAEF,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAChC,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,IAAI;oBACZ,KAAK,EAAE,GAAG;oBACV,OAAO,EAAE,gCAAgC;iBAC5C,CAAC;YACN,CAAC;iBAAM,CAAC;gBACJ,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,6BAA6B,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;oBACnE,OAAO,EAAE,EAAE,gBAAgB,EAAE;iBAChC,CAAC;YACN,CAAC;QAEL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,OAAO;gBACH,UAAU,EAAE,IAAI,CAAC,IAAI;gBACrB,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,OAAO,EAAE,4BAA6B,KAAe,CAAC,OAAO,EAAE;aAClE,CAAC;QACN,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,qBAAqB,CACzB,IAAa,EACb,MAA+B,EAC/B,SAAkB,KAAK;QAEvB,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,+CAA+C;QAC/C,oDAAoD;QACpD,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;QAEzD,OAAO,MAAM,CAAC;IAClB,CAAC;IAED;;;OAGG;IACK,aAAa,CACjB,KAAc,EACd,MAA+B,EAC/B,IAAY,EACZ,MAAgB,EAChB,MAAe;QAEf,aAAa;QACb,MAAM,YAAY,GAAG,MAAM,CAAC,IAAc,CAAC;QAC3C,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAEvC,IAAI,YAAY,IAAI,UAAU,KAAK,YAAY,EAAE,CAAC;YAC9C,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,mBAAmB,YAAY,SAAS,UAAU,EAAE,CAAC,CAAC;YACzE,OAAO;QACX,CAAC;QAED,wCAAwC;QACxC,IAAI,UAAU,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YAC5C,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAoB,IAAI,EAAE,CAAC;YACnD,MAAM,UAAU,GAAG,MAAM,CAAC,UAAqD,IAAI,EAAE,CAAC;YAEtF,wBAAwB;YACxB,KAAK,MAAM,YAAY,IAAI,QAAQ,EAAE,CAAC;gBAClC,IAAI,CAAC,CAAC,YAAY,IAAI,GAAG,CAAC,EAAE,CAAC;oBACzB,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,gCAAgC,YAAY,GAAG,CAAC,CAAC;gBACxE,CAAC;YACL,CAAC;YAED,sBAAsB;YACtB,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC3C,IAAI,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;oBAClB,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,UAAU,CAAC,GAAG,CAAC,EAAE,GAAG,IAAI,IAAI,GAAG,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;gBAC/E,CAAC;qBAAM,IAAI,MAAM,EAAE,CAAC;oBAChB,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,0BAA0B,GAAG,GAAG,CAAC,CAAC;gBACzD,CAAC;YACL,CAAC;QACL,CAAC;QAED,oBAAoB;QACpB,IAAI,UAAU,KAAK,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YACzC,MAAM,GAAG,GAAG,KAAkB,CAAC;YAC/B,MAAM,UAAU,GAAG,MAAM,CAAC,KAAgC,CAAC;YAE3D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAClC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,GAAG,IAAI,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;YAC5E,CAAC;QACL,CAAC;QAED,oBAAoB;QACpB,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;YACd,MAAM,UAAU,GAAG,MAAM,CAAC,IAAiB,CAAC;YAC5C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC9B,MAAM,CAAC,IAAI,CACP,GAAG,IAAI,2BAA2B,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,KAAK,GAAG,CAC7E,CAAC;YACN,CAAC;QACL,CAAC;QAED,wBAAwB;QACxB,IAAI,UAAU,KAAK,QAAQ,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YAC5C,MAAM,OAAO,GAAG,IAAI,MAAM,CAAC,MAAM,CAAC,OAAiB,CAAC,CAAC;YACrD,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAe,CAAC,EAAE,CAAC;gBACjC,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,mCAAmC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;YAC5E,CAAC;QACL,CAAC;QAED,4BAA4B;QAC5B,IAAI,UAAU,KAAK,QAAQ,EAAE,CAAC;YAC1B,MAAM,GAAG,GAAG,KAAe,CAAC;YAE5B,IAAI,MAAM,CAAC,OAAO,IAAI,IAAI,IAAI,GAAG,GAAI,MAAM,CAAC,OAAkB,EAAE,CAAC;gBAC7D,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,WAAW,GAAG,yBAAyB,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;YAChF,CAAC;YAED,IAAI,MAAM,CAAC,OAAO,IAAI,IAAI,IAAI,GAAG,GAAI,MAAM,CAAC,OAAkB,EAAE,CAAC;gBAC7D,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,WAAW,GAAG,4BAA4B,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;YACnF,CAAC;QACL,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,OAAO,CAAC,KAAc;QAC1B,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACjB,OAAO,MAAM,CAAC;QAClB,CAAC;QAED,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,OAAO,CAAC;QACnB,CAAC;QAED,MAAM,MAAM,GAAG,OAAO,KAAK,CAAC;QAE5B,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACvB,OAAO,SAAS,CAAC;QACrB,CAAC;QAED,IAAI,MAAM,KAAK,QAAQ,EAAE,CAAC;YACtB,OAAO,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC;QAC1D,CAAC;QAED,IAAI,MAAM,KAAK,QAAQ,EAAE,CAAC;YACtB,OAAO,QAAQ,CAAC;QACpB,CAAC;QAED,IAAI,MAAM,KAAK,QAAQ,EAAE,CAAC;YACtB,OAAO,QAAQ,CAAC;QACpB,CAAC;QAED,OAAO,SAAS,CAAC;IACrB,CAAC;CACJ;AAjMD,sDAiMC"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Trace validation oracle implementation
|
|
3
|
+
* @module @memberjunction/testing-engine
|
|
4
|
+
*/
|
|
5
|
+
import { IOracle } from './IOracle';
|
|
6
|
+
import { OracleInput, OracleConfig, OracleResult } from '../types';
|
|
7
|
+
/**
|
|
8
|
+
* Trace Validator Oracle.
|
|
9
|
+
*
|
|
10
|
+
* Validates that agent execution trace has no errors.
|
|
11
|
+
* Checks AgentRun status and all AgentRunStep records for error conditions.
|
|
12
|
+
*
|
|
13
|
+
* Configuration:
|
|
14
|
+
* - allowWarnings: Whether to allow warning-level issues (default: true)
|
|
15
|
+
* - requiredSteps: Minimum number of steps expected (optional)
|
|
16
|
+
* - maxSteps: Maximum number of steps allowed (optional)
|
|
17
|
+
*
|
|
18
|
+
* @example
|
|
19
|
+
* ```typescript
|
|
20
|
+
* const oracle = new TraceValidatorOracle();
|
|
21
|
+
* const result = await oracle.evaluate({
|
|
22
|
+
* targetEntity: agentRun,
|
|
23
|
+
* contextUser
|
|
24
|
+
* }, {
|
|
25
|
+
* allowWarnings: true,
|
|
26
|
+
* requiredSteps: 2
|
|
27
|
+
* });
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
export declare class TraceValidatorOracle implements IOracle {
|
|
31
|
+
readonly type = "trace-no-errors";
|
|
32
|
+
/**
|
|
33
|
+
* Evaluate agent run trace for errors.
|
|
34
|
+
*
|
|
35
|
+
* @param input - Oracle input with agent run entity
|
|
36
|
+
* @param config - Oracle configuration
|
|
37
|
+
* @returns Oracle result with pass/fail and trace analysis
|
|
38
|
+
*/
|
|
39
|
+
evaluate(input: OracleInput, config: OracleConfig): Promise<OracleResult>;
|
|
40
|
+
}
|
|
41
|
+
//# sourceMappingURL=TraceValidatorOracle.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TraceValidatorOracle.d.ts","sourceRoot":"","sources":["../../src/oracles/TraceValidatorOracle.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAEnE;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,qBAAa,oBAAqB,YAAW,OAAO;IAChD,QAAQ,CAAC,IAAI,qBAAqB;IAElC;;;;;;OAMG;IACG,QAAQ,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;CA+HlF"}
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* @fileoverview Trace validation oracle implementation
|
|
4
|
+
* @module @memberjunction/testing-engine
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.TraceValidatorOracle = void 0;
|
|
8
|
+
const core_1 = require("@memberjunction/core");
|
|
9
|
+
/**
|
|
10
|
+
* Trace Validator Oracle.
|
|
11
|
+
*
|
|
12
|
+
* Validates that agent execution trace has no errors.
|
|
13
|
+
* Checks AgentRun status and all AgentRunStep records for error conditions.
|
|
14
|
+
*
|
|
15
|
+
* Configuration:
|
|
16
|
+
* - allowWarnings: Whether to allow warning-level issues (default: true)
|
|
17
|
+
* - requiredSteps: Minimum number of steps expected (optional)
|
|
18
|
+
* - maxSteps: Maximum number of steps allowed (optional)
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* ```typescript
|
|
22
|
+
* const oracle = new TraceValidatorOracle();
|
|
23
|
+
* const result = await oracle.evaluate({
|
|
24
|
+
* targetEntity: agentRun,
|
|
25
|
+
* contextUser
|
|
26
|
+
* }, {
|
|
27
|
+
* allowWarnings: true,
|
|
28
|
+
* requiredSteps: 2
|
|
29
|
+
* });
|
|
30
|
+
* ```
|
|
31
|
+
*/
|
|
32
|
+
class TraceValidatorOracle {
|
|
33
|
+
constructor() {
|
|
34
|
+
this.type = 'trace-no-errors';
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Evaluate agent run trace for errors.
|
|
38
|
+
*
|
|
39
|
+
* @param input - Oracle input with agent run entity
|
|
40
|
+
* @param config - Oracle configuration
|
|
41
|
+
* @returns Oracle result with pass/fail and trace analysis
|
|
42
|
+
*/
|
|
43
|
+
async evaluate(input, config) {
|
|
44
|
+
try {
|
|
45
|
+
// Get agent run entity
|
|
46
|
+
const agentRun = input.targetEntity;
|
|
47
|
+
if (!agentRun) {
|
|
48
|
+
return {
|
|
49
|
+
oracleType: this.type,
|
|
50
|
+
passed: false,
|
|
51
|
+
score: 0,
|
|
52
|
+
message: 'No agent run entity provided'
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
// Check agent run status
|
|
56
|
+
if (agentRun.Status === 'Failed' || agentRun.Status === 'Cancelled') {
|
|
57
|
+
return {
|
|
58
|
+
oracleType: this.type,
|
|
59
|
+
passed: false,
|
|
60
|
+
score: 0,
|
|
61
|
+
message: `Agent run failed with status: ${agentRun.Status}`,
|
|
62
|
+
details: {
|
|
63
|
+
agentRunId: agentRun.ID,
|
|
64
|
+
status: agentRun.Status
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
// Load agent run steps
|
|
69
|
+
const rv = new core_1.RunView();
|
|
70
|
+
const stepsResult = await rv.RunView({
|
|
71
|
+
EntityName: 'MJ: AI Agent Run Steps',
|
|
72
|
+
ExtraFilter: `AgentRunID='${agentRun.ID}'`,
|
|
73
|
+
OrderBy: 'Sequence ASC',
|
|
74
|
+
ResultType: 'entity_object'
|
|
75
|
+
}, input.contextUser);
|
|
76
|
+
if (!stepsResult.Success) {
|
|
77
|
+
return {
|
|
78
|
+
oracleType: this.type,
|
|
79
|
+
passed: false,
|
|
80
|
+
score: 0,
|
|
81
|
+
message: `Failed to load agent run steps: ${stepsResult.ErrorMessage}`
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
const steps = stepsResult.Results || [];
|
|
85
|
+
// Check step count constraints
|
|
86
|
+
const requiredSteps = config.requiredSteps;
|
|
87
|
+
const maxSteps = config.maxSteps;
|
|
88
|
+
if (requiredSteps && steps.length < requiredSteps) {
|
|
89
|
+
return {
|
|
90
|
+
oracleType: this.type,
|
|
91
|
+
passed: false,
|
|
92
|
+
score: 0,
|
|
93
|
+
message: `Expected at least ${requiredSteps} steps, got ${steps.length}`,
|
|
94
|
+
details: { stepCount: steps.length, requiredSteps }
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
if (maxSteps && steps.length > maxSteps) {
|
|
98
|
+
return {
|
|
99
|
+
oracleType: this.type,
|
|
100
|
+
passed: false,
|
|
101
|
+
score: 0,
|
|
102
|
+
message: `Expected at most ${maxSteps} steps, got ${steps.length}`,
|
|
103
|
+
details: { stepCount: steps.length, maxSteps }
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
// Check each step for errors
|
|
107
|
+
const allowWarnings = config.allowWarnings !== false; // Default to true
|
|
108
|
+
const errors = [];
|
|
109
|
+
const warnings = [];
|
|
110
|
+
for (const step of steps) {
|
|
111
|
+
if (step.Status === 'Failed') {
|
|
112
|
+
errors.push(`Step ${step.StepNumber}: ${step.StepName} - ${step.Status}`);
|
|
113
|
+
}
|
|
114
|
+
// Note: Output field doesn't exist on AIAgentRunStepEntity
|
|
115
|
+
// If we need to check output, we'd need to load it from another source
|
|
116
|
+
}
|
|
117
|
+
// Determine result
|
|
118
|
+
if (errors.length > 0) {
|
|
119
|
+
return {
|
|
120
|
+
oracleType: this.type,
|
|
121
|
+
passed: false,
|
|
122
|
+
score: 0,
|
|
123
|
+
message: `Trace contains ${errors.length} error(s)`,
|
|
124
|
+
details: { errors, warnings, stepCount: steps.length }
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
if (!allowWarnings && warnings.length > 0) {
|
|
128
|
+
return {
|
|
129
|
+
oracleType: this.type,
|
|
130
|
+
passed: false,
|
|
131
|
+
score: 0.5,
|
|
132
|
+
message: `Trace contains ${warnings.length} warning(s)`,
|
|
133
|
+
details: { warnings, stepCount: steps.length }
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
// Success
|
|
137
|
+
return {
|
|
138
|
+
oracleType: this.type,
|
|
139
|
+
passed: true,
|
|
140
|
+
score: 1.0,
|
|
141
|
+
message: `Trace clean with ${steps.length} step(s)`,
|
|
142
|
+
details: {
|
|
143
|
+
stepCount: steps.length,
|
|
144
|
+
warnings: warnings.length > 0 ? warnings : undefined
|
|
145
|
+
}
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
catch (error) {
|
|
149
|
+
return {
|
|
150
|
+
oracleType: this.type,
|
|
151
|
+
passed: false,
|
|
152
|
+
score: 0,
|
|
153
|
+
message: `Trace validation error: ${error.message}`
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
exports.TraceValidatorOracle = TraceValidatorOracle;
|
|
159
|
+
//# sourceMappingURL=TraceValidatorOracle.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TraceValidatorOracle.js","sourceRoot":"","sources":["../../src/oracles/TraceValidatorOracle.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAEH,+CAA+C;AAK/C;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAa,oBAAoB;IAAjC;QACa,SAAI,GAAG,iBAAiB,CAAC;IAwItC,CAAC;IAtIG;;;;;;OAMG;IACH,KAAK,CAAC,QAAQ,CAAC,KAAkB,EAAE,MAAoB;QACnD,IAAI,CAAC;YACD,uBAAuB;YACvB,MAAM,QAAQ,GAAG,KAAK,CAAC,YAAgC,CAAC;YACxD,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACZ,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,8BAA8B;iBAC1C,CAAC;YACN,CAAC;YAED,yBAAyB;YACzB,IAAI,QAAQ,CAAC,MAAM,KAAK,QAAQ,IAAI,QAAQ,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBAClE,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,iCAAiC,QAAQ,CAAC,MAAM,EAAE;oBAC3D,OAAO,EAAE;wBACL,UAAU,EAAE,QAAQ,CAAC,EAAE;wBACvB,MAAM,EAAE,QAAQ,CAAC,MAAM;qBAC1B;iBACJ,CAAC;YACN,CAAC;YAED,uBAAuB;YACvB,MAAM,EAAE,GAAG,IAAI,cAAO,EAAE,CAAC;YACzB,MAAM,WAAW,GAAG,MAAM,EAAE,CAAC,OAAO,CAAuB;gBACvD,UAAU,EAAE,wBAAwB;gBACpC,WAAW,EAAE,eAAe,QAAQ,CAAC,EAAE,GAAG;gBAC1C,OAAO,EAAE,cAAc;gBACvB,UAAU,EAAE,eAAe;aAC9B,EAAE,KAAK,CAAC,WAAW,CAAC,CAAC;YAEtB,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC;gBACvB,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,mCAAmC,WAAW,CAAC,YAAY,EAAE;iBACzE,CAAC;YACN,CAAC;YAED,MAAM,KAAK,GAAG,WAAW,CAAC,OAAO,IAAI,EAAE,CAAC;YAExC,+BAA+B;YAC/B,MAAM,aAAa,GAAG,MAAM,CAAC,aAAuB,CAAC;YACrD,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAkB,CAAC;YAE3C,IAAI,aAAa,IAAI,KAAK,CAAC,MAAM,GAAG,aAAa,EAAE,CAAC;gBAChD,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,qBAAqB,aAAa,eAAe,KAAK,CAAC,MAAM,EAAE;oBACxE,OAAO,EAAE,EAAE,SAAS,EAAE,KAAK,CAAC,MAAM,EAAE,aAAa,EAAE;iBACtD,CAAC;YACN,CAAC;YAED,IAAI,QAAQ,IAAI,KAAK,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;gBACtC,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,oBAAoB,QAAQ,eAAe,KAAK,CAAC,MAAM,EAAE;oBAClE,OAAO,EAAE,EAAE,SAAS,EAAE,KAAK,CAAC,MAAM,EAAE,QAAQ,EAAE;iBACjD,CAAC;YACN,CAAC;YAED,6BAA6B;YAC7B,MAAM,aAAa,GAAG,MAAM,CAAC,aAAa,KAAK,KAAK,CAAC,CAAC,kBAAkB;YACxE,MAAM,MAAM,GAAa,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAa,EAAE,CAAC;YAE9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACvB,IAAI,IAAI,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;oBAC3B,MAAM,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,UAAU,KAAK,IAAI,CAAC,QAAQ,MAAM,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;gBAC9E,CAAC;gBAED,2DAA2D;gBAC3D,uEAAuE;YAC3E,CAAC;YAED,mBAAmB;YACnB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,kBAAkB,MAAM,CAAC,MAAM,WAAW;oBACnD,OAAO,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,CAAC,MAAM,EAAE;iBACzD,CAAC;YACN,CAAC;YAED,IAAI,CAAC,aAAa,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxC,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,GAAG;oBACV,OAAO,EAAE,kBAAkB,QAAQ,CAAC,MAAM,aAAa;oBACvD,OAAO,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,CAAC,MAAM,EAAE;iBACjD,CAAC;YACN,CAAC;YAED,UAAU;YACV,OAAO;gBACH,UAAU,EAAE,IAAI,CAAC,IAAI;gBACrB,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,GAAG;gBACV,OAAO,EAAE,oBAAoB,KAAK,CAAC,MAAM,UAAU;gBACnD,OAAO,EAAE;oBACL,SAAS,EAAE,KAAK,CAAC,MAAM;oBACvB,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;iBACvD;aACJ,CAAC;QAEN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,OAAO;gBACH,UAAU,EAAE,IAAI,CAAC,IAAI;gBACrB,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,OAAO,EAAE,2BAA4B,KAAe,CAAC,OAAO,EAAE;aACjE,CAAC;QACN,CAAC;IACL,CAAC;CACJ;AAzID,oDAyIC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Oracle implementations for test evaluation
|
|
3
|
+
*/
|
|
4
|
+
export * from './IOracle';
|
|
5
|
+
export * from './SchemaValidatorOracle';
|
|
6
|
+
export * from './TraceValidatorOracle';
|
|
7
|
+
export * from './LLMJudgeOracle';
|
|
8
|
+
export * from './ExactMatchOracle';
|
|
9
|
+
export * from './SQLValidatorOracle';
|
|
10
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/oracles/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,WAAW,CAAC;AAC1B,cAAc,yBAAyB,CAAC;AACxC,cAAc,wBAAwB,CAAC;AACvC,cAAc,kBAAkB,CAAC;AACjC,cAAc,oBAAoB,CAAC;AACnC,cAAc,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Oracle implementations for test evaluation
|
|
4
|
+
*/
|
|
5
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
6
|
+
if (k2 === undefined) k2 = k;
|
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
8
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
9
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
10
|
+
}
|
|
11
|
+
Object.defineProperty(o, k2, desc);
|
|
12
|
+
}) : (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
o[k2] = m[k];
|
|
15
|
+
}));
|
|
16
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
17
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
18
|
+
};
|
|
19
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
20
|
+
__exportStar(require("./IOracle"), exports);
|
|
21
|
+
__exportStar(require("./SchemaValidatorOracle"), exports);
|
|
22
|
+
__exportStar(require("./TraceValidatorOracle"), exports);
|
|
23
|
+
__exportStar(require("./LLMJudgeOracle"), exports);
|
|
24
|
+
__exportStar(require("./ExactMatchOracle"), exports);
|
|
25
|
+
__exportStar(require("./SQLValidatorOracle"), exports);
|
|
26
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/oracles/index.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;AAEH,4CAA0B;AAC1B,0DAAwC;AACxC,yDAAuC;AACvC,mDAAiC;AACjC,qDAAmC;AACnC,uDAAqC"}
|