@memberjunction/testing-engine 0.0.1 → 2.119.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +403 -29
  2. package/dist/drivers/AgentEvalDriver.d.ts +197 -0
  3. package/dist/drivers/AgentEvalDriver.d.ts.map +1 -0
  4. package/dist/drivers/AgentEvalDriver.js +370 -0
  5. package/dist/drivers/AgentEvalDriver.js.map +1 -0
  6. package/dist/drivers/BaseTestDriver.d.ts +145 -0
  7. package/dist/drivers/BaseTestDriver.d.ts.map +1 -0
  8. package/dist/drivers/BaseTestDriver.js +266 -0
  9. package/dist/drivers/BaseTestDriver.js.map +1 -0
  10. package/dist/drivers/index.d.ts +6 -0
  11. package/dist/drivers/index.d.ts.map +1 -0
  12. package/dist/drivers/index.js +22 -0
  13. package/dist/drivers/index.js.map +1 -0
  14. package/dist/engine/TestEngine.d.ts +148 -0
  15. package/dist/engine/TestEngine.d.ts.map +1 -0
  16. package/dist/engine/TestEngine.js +490 -0
  17. package/dist/engine/TestEngine.js.map +1 -0
  18. package/dist/index.d.ts +20 -0
  19. package/dist/index.d.ts.map +1 -0
  20. package/dist/index.js +42 -0
  21. package/dist/index.js.map +1 -0
  22. package/dist/oracles/ExactMatchOracle.d.ts +98 -0
  23. package/dist/oracles/ExactMatchOracle.d.ts.map +1 -0
  24. package/dist/oracles/ExactMatchOracle.js +355 -0
  25. package/dist/oracles/ExactMatchOracle.js.map +1 -0
  26. package/dist/oracles/IOracle.d.ts +47 -0
  27. package/dist/oracles/IOracle.d.ts.map +1 -0
  28. package/dist/oracles/IOracle.js +7 -0
  29. package/dist/oracles/IOracle.js.map +1 -0
  30. package/dist/oracles/LLMJudgeOracle.d.ts +65 -0
  31. package/dist/oracles/LLMJudgeOracle.d.ts.map +1 -0
  32. package/dist/oracles/LLMJudgeOracle.js +214 -0
  33. package/dist/oracles/LLMJudgeOracle.js.map +1 -0
  34. package/dist/oracles/SQLValidatorOracle.d.ts +78 -0
  35. package/dist/oracles/SQLValidatorOracle.d.ts.map +1 -0
  36. package/dist/oracles/SQLValidatorOracle.js +215 -0
  37. package/dist/oracles/SQLValidatorOracle.js.map +1 -0
  38. package/dist/oracles/SchemaValidatorOracle.d.ts +61 -0
  39. package/dist/oracles/SchemaValidatorOracle.d.ts.map +1 -0
  40. package/dist/oracles/SchemaValidatorOracle.js +193 -0
  41. package/dist/oracles/SchemaValidatorOracle.js.map +1 -0
  42. package/dist/oracles/TraceValidatorOracle.d.ts +41 -0
  43. package/dist/oracles/TraceValidatorOracle.d.ts.map +1 -0
  44. package/dist/oracles/TraceValidatorOracle.js +159 -0
  45. package/dist/oracles/TraceValidatorOracle.js.map +1 -0
  46. package/dist/oracles/index.d.ts +10 -0
  47. package/dist/oracles/index.d.ts.map +1 -0
  48. package/dist/oracles/index.js +26 -0
  49. package/dist/oracles/index.js.map +1 -0
  50. package/dist/types.d.ts +428 -0
  51. package/dist/types.d.ts.map +1 -0
  52. package/dist/types.js +6 -0
  53. package/dist/types.js.map +1 -0
  54. package/dist/utils/cost-calculator.d.ts +92 -0
  55. package/dist/utils/cost-calculator.d.ts.map +1 -0
  56. package/dist/utils/cost-calculator.js +137 -0
  57. package/dist/utils/cost-calculator.js.map +1 -0
  58. package/dist/utils/result-formatter.d.ts +98 -0
  59. package/dist/utils/result-formatter.d.ts.map +1 -0
  60. package/dist/utils/result-formatter.js +252 -0
  61. package/dist/utils/result-formatter.js.map +1 -0
  62. package/dist/utils/scoring.d.ts +64 -0
  63. package/dist/utils/scoring.d.ts.map +1 -0
  64. package/dist/utils/scoring.js +140 -0
  65. package/dist/utils/scoring.js.map +1 -0
  66. package/package.json +36 -7
@@ -0,0 +1,193 @@
1
+ "use strict";
2
+ /**
3
+ * @fileoverview Schema validation oracle implementation
4
+ * @module @memberjunction/testing-engine
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.SchemaValidatorOracle = void 0;
8
+ /**
9
+ * Schema Validator Oracle.
10
+ *
11
+ * Validates that actual output conforms to an expected JSON schema.
12
+ * Uses JSON Schema draft-07 specification for validation.
13
+ *
14
+ * Configuration:
15
+ * - schema: JSON Schema object defining expected structure
16
+ * - strict: Whether to fail on additional properties (default: false)
17
+ *
18
+ * @example
19
+ * ```typescript
20
+ * const oracle = new SchemaValidatorOracle();
21
+ * const result = await oracle.evaluate({
22
+ * actualOutput: { name: 'John', age: 30 },
23
+ * expectedOutput: {
24
+ * responseSchema: {
25
+ * type: 'object',
26
+ * required: ['name', 'age'],
27
+ * properties: {
28
+ * name: { type: 'string' },
29
+ * age: { type: 'number' }
30
+ * }
31
+ * }
32
+ * }
33
+ * }, {});
34
+ * ```
35
+ */
36
+ class SchemaValidatorOracle {
37
+ constructor() {
38
+ this.type = 'schema-validate';
39
+ }
40
+ /**
41
+ * Evaluate actual output against JSON schema.
42
+ *
43
+ * @param input - Oracle input with expected schema and actual output
44
+ * @param config - Oracle configuration
45
+ * @returns Oracle result with pass/fail and validation details
46
+ */
47
+ async evaluate(input, config) {
48
+ try {
49
+ // Extract schema from expected outcomes
50
+ const schema = input.expectedOutput?.responseSchema;
51
+ if (!schema) {
52
+ return {
53
+ oracleType: this.type,
54
+ passed: false,
55
+ score: 0,
56
+ message: 'No responseSchema provided in ExpectedOutcomes'
57
+ };
58
+ }
59
+ // Validate actual output against schema
60
+ const validationErrors = this.validateAgainstSchema(input.actualOutput, schema, config.strict);
61
+ if (validationErrors.length === 0) {
62
+ return {
63
+ oracleType: this.type,
64
+ passed: true,
65
+ score: 1.0,
66
+ message: 'Output matches expected schema'
67
+ };
68
+ }
69
+ else {
70
+ return {
71
+ oracleType: this.type,
72
+ passed: false,
73
+ score: 0,
74
+ message: `Schema validation failed: ${validationErrors.join(', ')}`,
75
+ details: { validationErrors }
76
+ };
77
+ }
78
+ }
79
+ catch (error) {
80
+ return {
81
+ oracleType: this.type,
82
+ passed: false,
83
+ score: 0,
84
+ message: `Schema validation error: ${error.message}`
85
+ };
86
+ }
87
+ }
88
+ /**
89
+ * Validate data against JSON schema.
90
+ * @private
91
+ */
92
+ validateAgainstSchema(data, schema, strict = false) {
93
+ const errors = [];
94
+ // Simple JSON Schema validation implementation
95
+ // For production, consider using a library like ajv
96
+ this.validateValue(data, schema, 'root', errors, strict);
97
+ return errors;
98
+ }
99
+ /**
100
+ * Validate a single value against schema.
101
+ * @private
102
+ */
103
+ validateValue(value, schema, path, errors, strict) {
104
+ // Check type
105
+ const expectedType = schema.type;
106
+ const actualType = this.getType(value);
107
+ if (expectedType && actualType !== expectedType) {
108
+ errors.push(`${path}: Expected type ${expectedType}, got ${actualType}`);
109
+ return;
110
+ }
111
+ // Check required properties for objects
112
+ if (actualType === 'object' && value !== null) {
113
+ const obj = value;
114
+ const required = schema.required || [];
115
+ const properties = schema.properties || {};
116
+ // Check required fields
117
+ for (const requiredProp of required) {
118
+ if (!(requiredProp in obj)) {
119
+ errors.push(`${path}: Missing required property '${requiredProp}'`);
120
+ }
121
+ }
122
+ // Validate properties
123
+ for (const [key, val] of Object.entries(obj)) {
124
+ if (properties[key]) {
125
+ this.validateValue(val, properties[key], `${path}.${key}`, errors, strict);
126
+ }
127
+ else if (strict) {
128
+ errors.push(`${path}: Unexpected property '${key}'`);
129
+ }
130
+ }
131
+ }
132
+ // Check array items
133
+ if (actualType === 'array' && schema.items) {
134
+ const arr = value;
135
+ const itemSchema = schema.items;
136
+ for (let i = 0; i < arr.length; i++) {
137
+ this.validateValue(arr[i], itemSchema, `${path}[${i}]`, errors, strict);
138
+ }
139
+ }
140
+ // Check enum values
141
+ if (schema.enum) {
142
+ const enumValues = schema.enum;
143
+ if (!enumValues.includes(value)) {
144
+ errors.push(`${path}: Value must be one of [${enumValues.join(', ')}], got '${value}'`);
145
+ }
146
+ }
147
+ // Check string patterns
148
+ if (actualType === 'string' && schema.pattern) {
149
+ const pattern = new RegExp(schema.pattern);
150
+ if (!pattern.test(value)) {
151
+ errors.push(`${path}: String does not match pattern ${schema.pattern}`);
152
+ }
153
+ }
154
+ // Check numeric constraints
155
+ if (actualType === 'number') {
156
+ const num = value;
157
+ if (schema.minimum != null && num < schema.minimum) {
158
+ errors.push(`${path}: Value ${num} is less than minimum ${schema.minimum}`);
159
+ }
160
+ if (schema.maximum != null && num > schema.maximum) {
161
+ errors.push(`${path}: Value ${num} is greater than maximum ${schema.maximum}`);
162
+ }
163
+ }
164
+ }
165
+ /**
166
+ * Get JSON Schema type for a value.
167
+ * @private
168
+ */
169
+ getType(value) {
170
+ if (value === null) {
171
+ return 'null';
172
+ }
173
+ if (Array.isArray(value)) {
174
+ return 'array';
175
+ }
176
+ const jsType = typeof value;
177
+ if (jsType === 'boolean') {
178
+ return 'boolean';
179
+ }
180
+ if (jsType === 'number') {
181
+ return Number.isInteger(value) ? 'integer' : 'number';
182
+ }
183
+ if (jsType === 'string') {
184
+ return 'string';
185
+ }
186
+ if (jsType === 'object') {
187
+ return 'object';
188
+ }
189
+ return 'unknown';
190
+ }
191
+ }
192
+ exports.SchemaValidatorOracle = SchemaValidatorOracle;
193
+ //# sourceMappingURL=SchemaValidatorOracle.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"SchemaValidatorOracle.js","sourceRoot":"","sources":["../../src/oracles/SchemaValidatorOracle.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAKH;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,MAAa,qBAAqB;IAAlC;QACa,SAAI,GAAG,iBAAiB,CAAC;IAgMtC,CAAC;IA9LG;;;;;;OAMG;IACH,KAAK,CAAC,QAAQ,CAAC,KAAkB,EAAE,MAAoB;QACnD,IAAI,CAAC;YACD,wCAAwC;YACxC,MAAM,MAAM,GAAI,KAAK,CAAC,cAAsB,EAAE,cAAc,CAAC;YAC7D,IAAI,CAAC,MAAM,EAAE,CAAC;gBACV,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,gDAAgD;iBAC5D,CAAC;YACN,CAAC;YAED,wCAAwC;YACxC,MAAM,gBAAgB,GAAG,IAAI,CAAC,qBAAqB,CAC/C,KAAK,CAAC,YAAY,EAClB,MAAM,EACN,MAAM,CAAC,MAAiB,CAC3B,CAAC;YAEF,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAChC,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,IAAI;oBACZ,KAAK,EAAE,GAAG;oBACV,OAAO,EAAE,gCAAgC;iBAC5C,CAAC;YACN,CAAC;iBAAM,CAAC;gBACJ,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,6BAA6B,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;oBACnE,OAAO,EAAE,EAAE,gBAAgB,EAAE;iBAChC,CAAC;YACN,CAAC;QAEL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,OAAO;gBACH,UAAU,EAAE,IAAI,CAAC,IAAI;gBACrB,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,OAAO,EAAE,4BAA6B,KAAe,CAAC,OAAO,EAAE;aAClE,CAAC;QACN,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,qBAAqB,CACzB,IAAa,EACb,MAA+B,EAC/B,SAAkB,KAAK;QAEvB,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,+CAA+C;QAC/C,oDAAoD;QACpD,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;QAEzD,OAAO,MAAM,CAAC;IAClB,CAAC;IAED;;;OAGG;IACK,aAAa,CACjB,KAAc,EACd,MAA+B,EAC/B,IAAY,EACZ,MAAgB,EAChB,MAAe;QAEf,aAAa;QACb,MAAM,YAAY,GAAG,MAAM,CAAC,IAAc,CAAC;QAC3C,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAEvC,IAAI,YAAY,IAAI,UAAU,KAAK,YAAY,EAAE,CAAC;YAC9C,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,mBAAmB,YAAY,SAAS,UAAU,EAAE,CAAC,CAAC;YACzE,OAAO;QACX,CAAC;QAED,wCAAwC;QACxC,IAAI,UAAU,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YAC5C,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAoB,IAAI,EAAE,CAAC;YACnD,MAAM,UAAU,GAAG,MAAM,CAAC,UAAqD,IAAI,EAAE,CAAC;YAEtF,wBAAwB;YACxB,KAAK,MAAM,YAAY,IAAI,QAAQ,EAAE,CAAC;gBAClC,IAAI,CAAC,CAAC,YAAY,IAAI,GAAG,CAAC,EAAE,CAAC;oBACzB,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,gCAAgC,YAAY,GAAG,CAAC,CAAC;gBACxE,CAAC;YACL,CAAC;YAED,sBAAsB;YACtB,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC3C,IAAI,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;oBAClB,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,UAAU,CAAC,GAAG,CAAC,EAAE,GAAG,IAAI,IAAI,GAAG,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;gBAC/E,CAAC;qBAAM,IAAI,MAAM,EAAE,CAAC;oBAChB,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,0BAA0B,GAAG,GAAG,CAAC,CAAC;gBACzD,CAAC;YACL,CAAC;QACL,CAAC;QAED,oBAAoB;QACpB,IAAI,UAAU,KAAK,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YACzC,MAAM,GAAG,GAAG,KAAkB,CAAC;YAC/B,MAAM,UAAU,GAAG,MAAM,CAAC,KAAgC,CAAC;YAE3D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAClC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,GAAG,IAAI,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;YAC5E,CAAC;QACL,CAAC;QAED,oBAAoB;QACpB,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;YACd,MAAM,UAAU,GAAG,MAAM,CAAC,IAAiB,CAAC;YAC5C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC9B,MAAM,CAAC,IAAI,CACP,GAAG,IAAI,2BAA2B,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,KAAK,GAAG,CAC7E,CAAC;YACN,CAAC;QACL,CAAC;QAED,wBAAwB;QACxB,IAAI,UAAU,KAAK,QAAQ,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YAC5C,MAAM,OAAO,GAAG,IAAI,MAAM,CAAC,MAAM,CAAC,OAAiB,CAAC,CAAC;YACrD,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAe,CAAC,EAAE,CAAC;gBACjC,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,mCAAmC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;YAC5E,CAAC;QACL,CAAC;QAED,4BAA4B;QAC5B,IAAI,UAAU,KAAK,QAAQ,EAAE,CAAC;YAC1B,MAAM,GAAG,GAAG,KAAe,CAAC;YAE5B,IAAI,MAAM,CAAC,OAAO,IAAI,IAAI,IAAI,GAAG,GAAI,MAAM,CAAC,OAAkB,EAAE,CAAC;gBAC7D,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,WAAW,GAAG,yBAAyB,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;YAChF,CAAC;YAED,IAAI,MAAM,CAAC,OAAO,IAAI,IAAI,IAAI,GAAG,GAAI,MAAM,CAAC,OAAkB,EAAE,CAAC;gBAC7D,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,WAAW,GAAG,4BAA4B,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;YACnF,CAAC;QACL,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,OAAO,CAAC,KAAc;QAC1B,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACjB,OAAO,MAAM,CAAC;QAClB,CAAC;QAED,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,OAAO,CAAC;QACnB,CAAC;QAED,MAAM,MAAM,GAAG,OAAO,KAAK,CAAC;QAE5B,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACvB,OAAO,SAAS,CAAC;QACrB,CAAC;QAED,IAAI,MAAM,KAAK,QAAQ,EAAE,CAAC;YACtB,OAAO,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC;QAC1D,CAAC;QAED,IAAI,MAAM,KAAK,QAAQ,EAAE,CAAC;YACtB,OAAO,QAAQ,CAAC;QACpB,CAAC;QAED,IAAI,MAAM,KAAK,QAAQ,EAAE,CAAC;YACtB,OAAO,QAAQ,CAAC;QACpB,CAAC;QAED,OAAO,SAAS,CAAC;IACrB,CAAC;CACJ;AAjMD,sDAiMC"}
@@ -0,0 +1,41 @@
1
+ /**
2
+ * @fileoverview Trace validation oracle implementation
3
+ * @module @memberjunction/testing-engine
4
+ */
5
+ import { IOracle } from './IOracle';
6
+ import { OracleInput, OracleConfig, OracleResult } from '../types';
7
+ /**
8
+ * Trace Validator Oracle.
9
+ *
10
+ * Validates that agent execution trace has no errors.
11
+ * Checks AgentRun status and all AgentRunStep records for error conditions.
12
+ *
13
+ * Configuration:
14
+ * - allowWarnings: Whether to allow warning-level issues (default: true)
15
+ * - requiredSteps: Minimum number of steps expected (optional)
16
+ * - maxSteps: Maximum number of steps allowed (optional)
17
+ *
18
+ * @example
19
+ * ```typescript
20
+ * const oracle = new TraceValidatorOracle();
21
+ * const result = await oracle.evaluate({
22
+ * targetEntity: agentRun,
23
+ * contextUser
24
+ * }, {
25
+ * allowWarnings: true,
26
+ * requiredSteps: 2
27
+ * });
28
+ * ```
29
+ */
30
+ export declare class TraceValidatorOracle implements IOracle {
31
+ readonly type = "trace-no-errors";
32
+ /**
33
+ * Evaluate agent run trace for errors.
34
+ *
35
+ * @param input - Oracle input with agent run entity
36
+ * @param config - Oracle configuration
37
+ * @returns Oracle result with pass/fail and trace analysis
38
+ */
39
+ evaluate(input: OracleInput, config: OracleConfig): Promise<OracleResult>;
40
+ }
41
+ //# sourceMappingURL=TraceValidatorOracle.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TraceValidatorOracle.d.ts","sourceRoot":"","sources":["../../src/oracles/TraceValidatorOracle.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAEnE;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,qBAAa,oBAAqB,YAAW,OAAO;IAChD,QAAQ,CAAC,IAAI,qBAAqB;IAElC;;;;;;OAMG;IACG,QAAQ,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;CA+HlF"}
@@ -0,0 +1,159 @@
1
+ "use strict";
2
+ /**
3
+ * @fileoverview Trace validation oracle implementation
4
+ * @module @memberjunction/testing-engine
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.TraceValidatorOracle = void 0;
8
+ const core_1 = require("@memberjunction/core");
9
+ /**
10
+ * Trace Validator Oracle.
11
+ *
12
+ * Validates that agent execution trace has no errors.
13
+ * Checks AgentRun status and all AgentRunStep records for error conditions.
14
+ *
15
+ * Configuration:
16
+ * - allowWarnings: Whether to allow warning-level issues (default: true)
17
+ * - requiredSteps: Minimum number of steps expected (optional)
18
+ * - maxSteps: Maximum number of steps allowed (optional)
19
+ *
20
+ * @example
21
+ * ```typescript
22
+ * const oracle = new TraceValidatorOracle();
23
+ * const result = await oracle.evaluate({
24
+ * targetEntity: agentRun,
25
+ * contextUser
26
+ * }, {
27
+ * allowWarnings: true,
28
+ * requiredSteps: 2
29
+ * });
30
+ * ```
31
+ */
32
+ class TraceValidatorOracle {
33
+ constructor() {
34
+ this.type = 'trace-no-errors';
35
+ }
36
+ /**
37
+ * Evaluate agent run trace for errors.
38
+ *
39
+ * @param input - Oracle input with agent run entity
40
+ * @param config - Oracle configuration
41
+ * @returns Oracle result with pass/fail and trace analysis
42
+ */
43
+ async evaluate(input, config) {
44
+ try {
45
+ // Get agent run entity
46
+ const agentRun = input.targetEntity;
47
+ if (!agentRun) {
48
+ return {
49
+ oracleType: this.type,
50
+ passed: false,
51
+ score: 0,
52
+ message: 'No agent run entity provided'
53
+ };
54
+ }
55
+ // Check agent run status
56
+ if (agentRun.Status === 'Failed' || agentRun.Status === 'Cancelled') {
57
+ return {
58
+ oracleType: this.type,
59
+ passed: false,
60
+ score: 0,
61
+ message: `Agent run failed with status: ${agentRun.Status}`,
62
+ details: {
63
+ agentRunId: agentRun.ID,
64
+ status: agentRun.Status
65
+ }
66
+ };
67
+ }
68
+ // Load agent run steps
69
+ const rv = new core_1.RunView();
70
+ const stepsResult = await rv.RunView({
71
+ EntityName: 'MJ: AI Agent Run Steps',
72
+ ExtraFilter: `AgentRunID='${agentRun.ID}'`,
73
+ OrderBy: 'Sequence ASC',
74
+ ResultType: 'entity_object'
75
+ }, input.contextUser);
76
+ if (!stepsResult.Success) {
77
+ return {
78
+ oracleType: this.type,
79
+ passed: false,
80
+ score: 0,
81
+ message: `Failed to load agent run steps: ${stepsResult.ErrorMessage}`
82
+ };
83
+ }
84
+ const steps = stepsResult.Results || [];
85
+ // Check step count constraints
86
+ const requiredSteps = config.requiredSteps;
87
+ const maxSteps = config.maxSteps;
88
+ if (requiredSteps && steps.length < requiredSteps) {
89
+ return {
90
+ oracleType: this.type,
91
+ passed: false,
92
+ score: 0,
93
+ message: `Expected at least ${requiredSteps} steps, got ${steps.length}`,
94
+ details: { stepCount: steps.length, requiredSteps }
95
+ };
96
+ }
97
+ if (maxSteps && steps.length > maxSteps) {
98
+ return {
99
+ oracleType: this.type,
100
+ passed: false,
101
+ score: 0,
102
+ message: `Expected at most ${maxSteps} steps, got ${steps.length}`,
103
+ details: { stepCount: steps.length, maxSteps }
104
+ };
105
+ }
106
+ // Check each step for errors
107
+ const allowWarnings = config.allowWarnings !== false; // Default to true
108
+ const errors = [];
109
+ const warnings = [];
110
+ for (const step of steps) {
111
+ if (step.Status === 'Failed') {
112
+ errors.push(`Step ${step.StepNumber}: ${step.StepName} - ${step.Status}`);
113
+ }
114
+ // Note: Output field doesn't exist on AIAgentRunStepEntity
115
+ // If we need to check output, we'd need to load it from another source
116
+ }
117
+ // Determine result
118
+ if (errors.length > 0) {
119
+ return {
120
+ oracleType: this.type,
121
+ passed: false,
122
+ score: 0,
123
+ message: `Trace contains ${errors.length} error(s)`,
124
+ details: { errors, warnings, stepCount: steps.length }
125
+ };
126
+ }
127
+ if (!allowWarnings && warnings.length > 0) {
128
+ return {
129
+ oracleType: this.type,
130
+ passed: false,
131
+ score: 0.5,
132
+ message: `Trace contains ${warnings.length} warning(s)`,
133
+ details: { warnings, stepCount: steps.length }
134
+ };
135
+ }
136
+ // Success
137
+ return {
138
+ oracleType: this.type,
139
+ passed: true,
140
+ score: 1.0,
141
+ message: `Trace clean with ${steps.length} step(s)`,
142
+ details: {
143
+ stepCount: steps.length,
144
+ warnings: warnings.length > 0 ? warnings : undefined
145
+ }
146
+ };
147
+ }
148
+ catch (error) {
149
+ return {
150
+ oracleType: this.type,
151
+ passed: false,
152
+ score: 0,
153
+ message: `Trace validation error: ${error.message}`
154
+ };
155
+ }
156
+ }
157
+ }
158
+ exports.TraceValidatorOracle = TraceValidatorOracle;
159
+ //# sourceMappingURL=TraceValidatorOracle.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TraceValidatorOracle.js","sourceRoot":"","sources":["../../src/oracles/TraceValidatorOracle.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAEH,+CAA+C;AAK/C;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAa,oBAAoB;IAAjC;QACa,SAAI,GAAG,iBAAiB,CAAC;IAwItC,CAAC;IAtIG;;;;;;OAMG;IACH,KAAK,CAAC,QAAQ,CAAC,KAAkB,EAAE,MAAoB;QACnD,IAAI,CAAC;YACD,uBAAuB;YACvB,MAAM,QAAQ,GAAG,KAAK,CAAC,YAAgC,CAAC;YACxD,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACZ,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,8BAA8B;iBAC1C,CAAC;YACN,CAAC;YAED,yBAAyB;YACzB,IAAI,QAAQ,CAAC,MAAM,KAAK,QAAQ,IAAI,QAAQ,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBAClE,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,iCAAiC,QAAQ,CAAC,MAAM,EAAE;oBAC3D,OAAO,EAAE;wBACL,UAAU,EAAE,QAAQ,CAAC,EAAE;wBACvB,MAAM,EAAE,QAAQ,CAAC,MAAM;qBAC1B;iBACJ,CAAC;YACN,CAAC;YAED,uBAAuB;YACvB,MAAM,EAAE,GAAG,IAAI,cAAO,EAAE,CAAC;YACzB,MAAM,WAAW,GAAG,MAAM,EAAE,CAAC,OAAO,CAAuB;gBACvD,UAAU,EAAE,wBAAwB;gBACpC,WAAW,EAAE,eAAe,QAAQ,CAAC,EAAE,GAAG;gBAC1C,OAAO,EAAE,cAAc;gBACvB,UAAU,EAAE,eAAe;aAC9B,EAAE,KAAK,CAAC,WAAW,CAAC,CAAC;YAEtB,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC;gBACvB,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,mCAAmC,WAAW,CAAC,YAAY,EAAE;iBACzE,CAAC;YACN,CAAC;YAED,MAAM,KAAK,GAAG,WAAW,CAAC,OAAO,IAAI,EAAE,CAAC;YAExC,+BAA+B;YAC/B,MAAM,aAAa,GAAG,MAAM,CAAC,aAAuB,CAAC;YACrD,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAkB,CAAC;YAE3C,IAAI,aAAa,IAAI,KAAK,CAAC,MAAM,GAAG,aAAa,EAAE,CAAC;gBAChD,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,qBAAqB,aAAa,eAAe,KAAK,CAAC,MAAM,EAAE;oBACxE,OAAO,EAAE,EAAE,SAAS,EAAE,KAAK,CAAC,MAAM,EAAE,aAAa,EAAE;iBACtD,CAAC;YACN,CAAC;YAED,IAAI,QAAQ,IAAI,KAAK,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;gBACtC,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,oBAAoB,QAAQ,eAAe,KAAK,CAAC,MAAM,EAAE;oBAClE,OAAO,EAAE,EAAE,SAAS,EAAE,KAAK,CAAC,MAAM,EAAE,QAAQ,EAAE;iBACjD,CAAC;YACN,CAAC;YAED,6BAA6B;YAC7B,MAAM,aAAa,GAAG,MAAM,CAAC,aAAa,KAAK,KAAK,CAAC,CAAC,kBAAkB;YACxE,MAAM,MAAM,GAAa,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAa,EAAE,CAAC;YAE9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACvB,IAAI,IAAI,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;oBAC3B,MAAM,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,UAAU,KAAK,IAAI,CAAC,QAAQ,MAAM,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;gBAC9E,CAAC;gBAED,2DAA2D;gBAC3D,uEAAuE;YAC3E,CAAC;YAED,mBAAmB;YACnB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,kBAAkB,MAAM,CAAC,MAAM,WAAW;oBACnD,OAAO,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,CAAC,MAAM,EAAE;iBACzD,CAAC;YACN,CAAC;YAED,IAAI,CAAC,aAAa,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxC,OAAO;oBACH,UAAU,EAAE,IAAI,CAAC,IAAI;oBACrB,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,GAAG;oBACV,OAAO,EAAE,kBAAkB,QAAQ,CAAC,MAAM,aAAa;oBACvD,OAAO,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,CAAC,MAAM,EAAE;iBACjD,CAAC;YACN,CAAC;YAED,UAAU;YACV,OAAO;gBACH,UAAU,EAAE,IAAI,CAAC,IAAI;gBACrB,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,GAAG;gBACV,OAAO,EAAE,oBAAoB,KAAK,CAAC,MAAM,UAAU;gBACnD,OAAO,EAAE;oBACL,SAAS,EAAE,KAAK,CAAC,MAAM;oBACvB,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;iBACvD;aACJ,CAAC;QAEN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,OAAO;gBACH,UAAU,EAAE,IAAI,CAAC,IAAI;gBACrB,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,OAAO,EAAE,2BAA4B,KAAe,CAAC,OAAO,EAAE;aACjE,CAAC;QACN,CAAC;IACL,CAAC;CACJ;AAzID,oDAyIC"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Oracle implementations for test evaluation
3
+ */
4
+ export * from './IOracle';
5
+ export * from './SchemaValidatorOracle';
6
+ export * from './TraceValidatorOracle';
7
+ export * from './LLMJudgeOracle';
8
+ export * from './ExactMatchOracle';
9
+ export * from './SQLValidatorOracle';
10
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/oracles/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,WAAW,CAAC;AAC1B,cAAc,yBAAyB,CAAC;AACxC,cAAc,wBAAwB,CAAC;AACvC,cAAc,kBAAkB,CAAC;AACjC,cAAc,oBAAoB,CAAC;AACnC,cAAc,sBAAsB,CAAC"}
@@ -0,0 +1,26 @@
1
+ "use strict";
2
+ /**
3
+ * Oracle implementations for test evaluation
4
+ */
5
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
6
+ if (k2 === undefined) k2 = k;
7
+ var desc = Object.getOwnPropertyDescriptor(m, k);
8
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
9
+ desc = { enumerable: true, get: function() { return m[k]; } };
10
+ }
11
+ Object.defineProperty(o, k2, desc);
12
+ }) : (function(o, m, k, k2) {
13
+ if (k2 === undefined) k2 = k;
14
+ o[k2] = m[k];
15
+ }));
16
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
17
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
18
+ };
19
+ Object.defineProperty(exports, "__esModule", { value: true });
20
+ __exportStar(require("./IOracle"), exports);
21
+ __exportStar(require("./SchemaValidatorOracle"), exports);
22
+ __exportStar(require("./TraceValidatorOracle"), exports);
23
+ __exportStar(require("./LLMJudgeOracle"), exports);
24
+ __exportStar(require("./ExactMatchOracle"), exports);
25
+ __exportStar(require("./SQLValidatorOracle"), exports);
26
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/oracles/index.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;AAEH,4CAA0B;AAC1B,0DAAwC;AACxC,yDAAuC;AACvC,mDAAiC;AACjC,qDAAmC;AACnC,uDAAqC"}