@memberjunction/testing-engine 0.0.1 → 2.119.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +403 -29
  2. package/dist/drivers/AgentEvalDriver.d.ts +197 -0
  3. package/dist/drivers/AgentEvalDriver.d.ts.map +1 -0
  4. package/dist/drivers/AgentEvalDriver.js +370 -0
  5. package/dist/drivers/AgentEvalDriver.js.map +1 -0
  6. package/dist/drivers/BaseTestDriver.d.ts +145 -0
  7. package/dist/drivers/BaseTestDriver.d.ts.map +1 -0
  8. package/dist/drivers/BaseTestDriver.js +266 -0
  9. package/dist/drivers/BaseTestDriver.js.map +1 -0
  10. package/dist/drivers/index.d.ts +6 -0
  11. package/dist/drivers/index.d.ts.map +1 -0
  12. package/dist/drivers/index.js +22 -0
  13. package/dist/drivers/index.js.map +1 -0
  14. package/dist/engine/TestEngine.d.ts +148 -0
  15. package/dist/engine/TestEngine.d.ts.map +1 -0
  16. package/dist/engine/TestEngine.js +490 -0
  17. package/dist/engine/TestEngine.js.map +1 -0
  18. package/dist/index.d.ts +20 -0
  19. package/dist/index.d.ts.map +1 -0
  20. package/dist/index.js +42 -0
  21. package/dist/index.js.map +1 -0
  22. package/dist/oracles/ExactMatchOracle.d.ts +98 -0
  23. package/dist/oracles/ExactMatchOracle.d.ts.map +1 -0
  24. package/dist/oracles/ExactMatchOracle.js +355 -0
  25. package/dist/oracles/ExactMatchOracle.js.map +1 -0
  26. package/dist/oracles/IOracle.d.ts +47 -0
  27. package/dist/oracles/IOracle.d.ts.map +1 -0
  28. package/dist/oracles/IOracle.js +7 -0
  29. package/dist/oracles/IOracle.js.map +1 -0
  30. package/dist/oracles/LLMJudgeOracle.d.ts +65 -0
  31. package/dist/oracles/LLMJudgeOracle.d.ts.map +1 -0
  32. package/dist/oracles/LLMJudgeOracle.js +214 -0
  33. package/dist/oracles/LLMJudgeOracle.js.map +1 -0
  34. package/dist/oracles/SQLValidatorOracle.d.ts +78 -0
  35. package/dist/oracles/SQLValidatorOracle.d.ts.map +1 -0
  36. package/dist/oracles/SQLValidatorOracle.js +215 -0
  37. package/dist/oracles/SQLValidatorOracle.js.map +1 -0
  38. package/dist/oracles/SchemaValidatorOracle.d.ts +61 -0
  39. package/dist/oracles/SchemaValidatorOracle.d.ts.map +1 -0
  40. package/dist/oracles/SchemaValidatorOracle.js +193 -0
  41. package/dist/oracles/SchemaValidatorOracle.js.map +1 -0
  42. package/dist/oracles/TraceValidatorOracle.d.ts +41 -0
  43. package/dist/oracles/TraceValidatorOracle.d.ts.map +1 -0
  44. package/dist/oracles/TraceValidatorOracle.js +159 -0
  45. package/dist/oracles/TraceValidatorOracle.js.map +1 -0
  46. package/dist/oracles/index.d.ts +10 -0
  47. package/dist/oracles/index.d.ts.map +1 -0
  48. package/dist/oracles/index.js +26 -0
  49. package/dist/oracles/index.js.map +1 -0
  50. package/dist/types.d.ts +428 -0
  51. package/dist/types.d.ts.map +1 -0
  52. package/dist/types.js +6 -0
  53. package/dist/types.js.map +1 -0
  54. package/dist/utils/cost-calculator.d.ts +92 -0
  55. package/dist/utils/cost-calculator.d.ts.map +1 -0
  56. package/dist/utils/cost-calculator.js +137 -0
  57. package/dist/utils/cost-calculator.js.map +1 -0
  58. package/dist/utils/result-formatter.d.ts +98 -0
  59. package/dist/utils/result-formatter.d.ts.map +1 -0
  60. package/dist/utils/result-formatter.js +252 -0
  61. package/dist/utils/result-formatter.js.map +1 -0
  62. package/dist/utils/scoring.d.ts +64 -0
  63. package/dist/utils/scoring.d.ts.map +1 -0
  64. package/dist/utils/scoring.js +140 -0
  65. package/dist/utils/scoring.js.map +1 -0
  66. package/package.json +36 -7
@@ -0,0 +1,370 @@
1
+ "use strict";
2
+ /**
3
+ * @fileoverview Test driver for AI Agent evaluation
4
+ * @module @memberjunction/testing-engine
5
+ */
6
+ var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
7
+ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
8
+ if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
9
+ else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
10
+ return c > 3 && r && Object.defineProperty(target, key, r), r;
11
+ };
12
+ Object.defineProperty(exports, "__esModule", { value: true });
13
+ exports.AgentEvalDriver = void 0;
14
+ const global_1 = require("@memberjunction/global");
15
+ const ai_agents_1 = require("@memberjunction/ai-agents");
16
+ const BaseTestDriver_1 = require("./BaseTestDriver");
17
+ /**
18
+ * Test driver for AI Agent evaluation.
19
+ *
20
+ * Executes an AI agent with test input, runs configured oracles,
21
+ * and creates bidirectional link between TestRun and AgentRun.
22
+ *
23
+ * @example
24
+ * ```typescript
25
+ * // Configuration JSON in Test entity
26
+ * {
27
+ * "agentId": "agent-123",
28
+ * "oracles": [
29
+ * { "type": "trace-no-errors", "weight": 0.2 },
30
+ * { "type": "llm-judge", "weight": 0.5, "config": { "criteria": [...] } },
31
+ * { "type": "schema-validate", "weight": 0.3, "config": { "schema": {...} } }
32
+ * ],
33
+ * "scoringWeights": { "trace-no-errors": 0.2, "llm-judge": 0.5, "schema-validate": 0.3 }
34
+ * }
35
+ *
36
+ * // InputDefinition JSON in Test entity
37
+ * {
38
+ * "userMessage": "Create a report showing sales by region",
39
+ * "conversationContext": null,
40
+ * "executionParams": { "temperatureOverride": 0.3 }
41
+ * }
42
+ *
43
+ * // ExpectedOutcomes JSON in Test entity
44
+ * {
45
+ * "responsePatterns": ["sales.*region", "chart|graph"],
46
+ * "expectedEntities": ["Report", "Dashboard"],
47
+ * "responseSchema": { "type": "object", "properties": {...} },
48
+ * "judgeValidationCriteria": [
49
+ * "Response accurately answers the user's question",
50
+ * "Report includes proper data visualization",
51
+ * "Response is professional and clear"
52
+ * ]
53
+ * }
54
+ * ```
55
+ */
56
+ let AgentEvalDriver = class AgentEvalDriver extends BaseTestDriver_1.BaseTestDriver {
57
+ /**
58
+ * Execute agent evaluation test.
59
+ *
60
+ * Steps:
61
+ * 1. Parse configuration and input
62
+ * 2. Load and execute agent via AgentRunner
63
+ * 3. Create bidirectional link (TestRun ↔ AgentRun)
64
+ * 4. Run oracles to evaluate results
65
+ * 5. Calculate score and determine status
66
+ * 6. Return structured results
67
+ *
68
+ * @param context - Execution context
69
+ * @returns Execution result
70
+ */
71
+ async Execute(context) {
72
+ this.log('Starting agent evaluation', context.options.verbose);
73
+ try {
74
+ // Parse configuration
75
+ const config = this.parseConfig(context.test);
76
+ const input = this.parseInputDefinition(context.test);
77
+ const expected = this.parseExpectedOutcomes(context.test);
78
+ // Load agent
79
+ const agent = await this.loadAgent(config.agentId, context.contextUser);
80
+ // Execute agent
81
+ this.log(`Executing agent: ${agent.Name}`, context.options.verbose);
82
+ const agentResult = await this.executeAgent(agent, input, context.contextUser, context.test, config.maxExecutionTime, context.testRun);
83
+ const agentRun = agentResult.agentRun;
84
+ // Create bidirectional link
85
+ await this.linkTestRunToAgentRun(context.testRun, agentRun);
86
+ // Extract actual output
87
+ const actualOutput = this.extractAgentOutput(agentRun);
88
+ // Run oracles
89
+ this.log('Running oracles for evaluation', context.options.verbose);
90
+ const oracleResults = await this.runOracles(config, input, expected, actualOutput, agentRun, context);
91
+ // Calculate score and status
92
+ // When oracles are disabled, consider test passed if agent succeeded
93
+ const score = this.calculateScore(oracleResults, config.scoringWeights);
94
+ const status = oracleResults.length === 0 && agentRun.Status === 'Completed'
95
+ ? 'Passed'
96
+ : this.determineStatus(oracleResults);
97
+ // Count checks
98
+ const passedChecks = oracleResults.filter(r => r.passed).length;
99
+ const totalChecks = oracleResults.length;
100
+ // Calculate cost
101
+ const totalCost = this.calculateTotalCost(agentRun);
102
+ // Calculate duration in MS
103
+ const durationMs = this.calculateDurationMs(agentRun);
104
+ // Build result
105
+ const result = {
106
+ targetType: 'AI Agent',
107
+ targetLogId: agentRun.ID,
108
+ status,
109
+ score,
110
+ oracleResults,
111
+ passedChecks,
112
+ failedChecks: totalChecks - passedChecks,
113
+ totalChecks,
114
+ inputData: input,
115
+ expectedOutput: expected,
116
+ actualOutput,
117
+ totalCost,
118
+ durationMs
119
+ };
120
+ this.log(`Agent evaluation completed: ${status} (Score: ${score})`, context.options.verbose);
121
+ return result;
122
+ }
123
+ catch (error) {
124
+ this.logError('Agent evaluation failed', error);
125
+ throw error;
126
+ }
127
+ }
128
+ /**
129
+ * Validate agent evaluation test configuration.
130
+ *
131
+ * Checks:
132
+ * - Base validation (InputDefinition, ExpectedOutcomes, Configuration)
133
+ * - Agent ID is valid
134
+ * - At least one oracle is configured
135
+ * - Oracle types are registered
136
+ * - Scoring weights are valid
137
+ *
138
+ * @param test - Test entity to validate
139
+ * @returns Validation result
140
+ */
141
+ async Validate(test) {
142
+ // Run base validation
143
+ const baseResult = await super.Validate(test);
144
+ if (!baseResult.valid) {
145
+ return baseResult;
146
+ }
147
+ const errors = [...baseResult.errors];
148
+ const warnings = [...baseResult.warnings];
149
+ try {
150
+ // Parse and validate configuration
151
+ const config = this.parseConfig(test);
152
+ // Validate agent ID
153
+ if (!config.agentId) {
154
+ errors.push({
155
+ category: 'configuration',
156
+ message: 'agentId is required in Configuration',
157
+ field: 'Configuration.agentId',
158
+ suggestion: 'Specify the ID of the agent to test'
159
+ });
160
+ }
161
+ // Note: We cannot validate agent existence without contextUser
162
+ // That validation will happen at execution time
163
+ // Validate oracles configuration exists
164
+ if (!config.oracles || config.oracles.length === 0) {
165
+ errors.push({
166
+ category: 'configuration',
167
+ message: 'At least one oracle is required',
168
+ field: 'Configuration.oracles',
169
+ suggestion: 'Add oracle configurations (e.g., trace-no-errors, llm-judge)'
170
+ });
171
+ }
172
+ // Note: Oracle type validation requires registry from execution context
173
+ // That validation will happen at execution time
174
+ // Validate scoring weights
175
+ if (config.scoringWeights) {
176
+ const totalWeight = Object.values(config.scoringWeights).reduce((sum, w) => sum + w, 0);
177
+ if (Math.abs(totalWeight - 1.0) > 0.01) {
178
+ warnings.push({
179
+ category: 'best-practice',
180
+ message: 'Scoring weights should sum to 1.0',
181
+ recommendation: `Current sum: ${totalWeight.toFixed(2)}`
182
+ });
183
+ }
184
+ }
185
+ // Validate input definition
186
+ const input = this.parseInputDefinition(test);
187
+ if (!input.userMessage || input.userMessage.trim() === '') {
188
+ errors.push({
189
+ category: 'input',
190
+ message: 'userMessage is required in InputDefinition',
191
+ field: 'InputDefinition.userMessage',
192
+ suggestion: 'Provide the user message to send to the agent'
193
+ });
194
+ }
195
+ }
196
+ catch (error) {
197
+ errors.push({
198
+ category: 'configuration',
199
+ message: `Configuration validation failed: ${error.message}`,
200
+ field: 'Configuration',
201
+ suggestion: 'Fix configuration JSON structure'
202
+ });
203
+ }
204
+ return {
205
+ valid: errors.length === 0,
206
+ errors,
207
+ warnings
208
+ };
209
+ }
210
+ /**
211
+ * Load agent entity.
212
+ * @private
213
+ */
214
+ async loadAgent(agentId, contextUser) {
215
+ const agent = await this._metadata.GetEntityObject('AI Agents', contextUser);
216
+ await agent.Load(agentId);
217
+ return agent;
218
+ }
219
+ /**
220
+ * Execute agent and return result.
221
+ * @private
222
+ */
223
+ async executeAgent(agent, input, contextUser, test, maxExecutionTime, testRun) {
224
+ const runner = new ai_agents_1.AgentRunner();
225
+ // Build conversation messages
226
+ const conversationMessages = [];
227
+ // Add prior messages if provided
228
+ if (input.conversationContext?.priorMessages) {
229
+ for (const msg of input.conversationContext.priorMessages) {
230
+ conversationMessages.push({
231
+ role: msg.role,
232
+ content: msg.content
233
+ });
234
+ }
235
+ }
236
+ // Add current user message
237
+ conversationMessages.push({
238
+ role: 'user',
239
+ content: input.userMessage
240
+ });
241
+ // Build execution parameters
242
+ const params = {
243
+ agent: agent, // Will be AIAgentEntityExtended at runtime
244
+ conversationMessages,
245
+ contextUser,
246
+ override: input.executionParams?.modelOverride ? {
247
+ modelId: input.executionParams.modelOverride
248
+ } : undefined
249
+ };
250
+ // Generate conversation name with sequence number (if in suite) or [Test] prefix (standalone)
251
+ // Sequence will be non-null when test is part of a suite run, showing test execution order
252
+ const conversationName = testRun.Sequence != null
253
+ ? `[${testRun.Sequence}] ${test.Name}`
254
+ : `[Test] ${test.Name}`;
255
+ // Execute agent with timeout if specified
256
+ if (maxExecutionTime) {
257
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Agent execution timeout')), maxExecutionTime));
258
+ const runResult = await Promise.race([
259
+ runner.RunAgentInConversation(params, {
260
+ userMessage: input.userMessage,
261
+ createArtifacts: true,
262
+ conversationName: conversationName,
263
+ testRunId: testRun.ID
264
+ }),
265
+ timeoutPromise
266
+ ]);
267
+ return { agentRun: runResult.agentResult.agentRun };
268
+ }
269
+ else {
270
+ const runResult = await runner.RunAgentInConversation(params, {
271
+ userMessage: input.userMessage,
272
+ createArtifacts: true,
273
+ conversationName: conversationName,
274
+ testRunId: testRun.ID
275
+ });
276
+ return { agentRun: runResult.agentResult.agentRun };
277
+ }
278
+ }
279
+ /**
280
+ * Create bidirectional link between TestRun and AgentRun.
281
+ * @private
282
+ */
283
+ async linkTestRunToAgentRun(testRun, agentRun) {
284
+ // Update AgentRun with hard FK to TestRun
285
+ agentRun.TestRunID = testRun.ID;
286
+ const saved = await agentRun.Save();
287
+ if (!saved) {
288
+ this.logError('Failed to link AgentRun to TestRun', new Error(agentRun.LatestResult?.Message));
289
+ }
290
+ }
291
+ /**
292
+ * Extract agent output from agent run.
293
+ * @private
294
+ */
295
+ extractAgentOutput(agentRun) {
296
+ return {
297
+ status: agentRun.Status,
298
+ success: agentRun.Success,
299
+ errorMessage: agentRun.ErrorMessage,
300
+ conversationId: agentRun.ConversationID
301
+ };
302
+ }
303
+ /**
304
+ * Run configured oracles.
305
+ * @private
306
+ */
307
+ async runOracles(config, input, expected, actualOutput, agentRun, context) {
308
+ const oracleResults = [];
309
+ // TODO: Temporarily skip oracle execution while oracles are being finalized
310
+ // Remove this flag once oracles are ready (SQL schema fixes, LLM Judge prompt creation, etc.)
311
+ const skipOracles = true;
312
+ if (skipOracles) {
313
+ this.log('⚠️ Oracle execution temporarily disabled', context.options.verbose);
314
+ return oracleResults;
315
+ }
316
+ for (const oracleConfig of config.oracles) {
317
+ const oracle = context.oracleRegistry.get(oracleConfig.type);
318
+ if (!oracle) {
319
+ this.logError(`Oracle not found: ${oracleConfig.type}`);
320
+ continue;
321
+ }
322
+ try {
323
+ const oracleInput = {
324
+ test: context.test,
325
+ expectedOutput: expected,
326
+ actualOutput,
327
+ targetEntity: agentRun,
328
+ contextUser: context.contextUser
329
+ };
330
+ const result = await oracle.evaluate(oracleInput, oracleConfig.config || {});
331
+ oracleResults.push(result);
332
+ this.log(`Oracle ${oracleConfig.type}: ${result.passed ? 'PASSED' : 'FAILED'} (Score: ${result.score})`, context.options.verbose);
333
+ }
334
+ catch (error) {
335
+ this.logError(`Oracle ${oracleConfig.type} failed`, error);
336
+ oracleResults.push({
337
+ oracleType: oracleConfig.type,
338
+ passed: false,
339
+ score: 0,
340
+ message: `Oracle execution failed: ${error.message}`
341
+ });
342
+ }
343
+ }
344
+ return oracleResults;
345
+ }
346
+ /**
347
+ * Calculate total cost from agent run.
348
+ * @private
349
+ */
350
+ calculateTotalCost(agentRun) {
351
+ return agentRun.TotalCost || 0;
352
+ }
353
+ /**
354
+ * Calculate duration in milliseconds from agent run.
355
+ * @private
356
+ */
357
+ calculateDurationMs(agentRun) {
358
+ if (!agentRun.StartedAt || !agentRun.CompletedAt) {
359
+ return 0;
360
+ }
361
+ const start = new Date(agentRun.StartedAt).getTime();
362
+ const end = new Date(agentRun.CompletedAt).getTime();
363
+ return end - start;
364
+ }
365
+ };
366
+ exports.AgentEvalDriver = AgentEvalDriver;
367
+ exports.AgentEvalDriver = AgentEvalDriver = __decorate([
368
+ (0, global_1.RegisterClass)(BaseTestDriver_1.BaseTestDriver, 'AgentEvalDriver')
369
+ ], AgentEvalDriver);
370
+ //# sourceMappingURL=AgentEvalDriver.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AgentEvalDriver.js","sourceRoot":"","sources":["../../src/drivers/AgentEvalDriver.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;;;;;;;AAGH,mDAAuD;AAEvD,yDAAwD;AAExD,qDAAkD;AA4GlD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AAEI,IAAM,eAAe,GAArB,MAAM,eAAgB,SAAQ,+BAAc;IAC/C;;;;;;;;;;;;;OAaG;IACI,KAAK,CAAC,OAAO,CAAC,OAA+B;QAChD,IAAI,CAAC,GAAG,CAAC,2BAA2B,EAAE,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAE/D,IAAI,CAAC;YACD,sBAAsB;YACtB,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAkB,OAAO,CAAC,IAAI,CAAC,CAAC;YAC/D,MAAM,KAAK,GAAG,IAAI,CAAC,oBAAoB,CAAiB,OAAO,CAAC,IAAI,CAAC,CAAC;YACtE,MAAM,QAAQ,GAAG,IAAI,CAAC,qBAAqB,CAA4B,OAAO,CAAC,IAAI,CAAC,CAAC;YAErF,aAAa;YACb,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;YAExE,gBAAgB;YAChB,IAAI,CAAC,GAAG,CAAC,oBAAoB,KAAK,CAAC,IAAI,EAAE,EAAE,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACpE,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,YAAY,CACvC,KAAK,EACL,KAAK,EACL,OAAO,CAAC,WAAW,EACnB,OAAO,CAAC,IAAI,EACZ,MAAM,CAAC,gBAAgB,EACvB,OAAO,CAAC,OAAO,CAClB,CAAC;YAEF,MAAM,QAAQ,GAAG,WAAW,CAAC,QAAQ,CAAC;YAEtC,4BAA4B;YAC5B,MAAM,IAAI,CAAC,qBAAqB,CAAC,OAAO,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;YAE5D,wBAAwB;YACxB,MAAM,YAAY,GAAG,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAEvD,cAAc;YACd,IAAI,CAAC,GAAG,CAAC,gCAAgC,EAAE,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACpE,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,UAAU,CACvC,MAAM,EACN,KAAK,EACL,QAAQ,EACR,YAAY,EACZ,QAAQ,EACR,OAAO,CACV,CAAC;YAEF,6BAA6B;YAC7B,qEAAqE;YACrE,MAAM,KAAK,GAAG,IAAI,CAAC,cAAc,CAAC,aAAa,EAAE,MAAM,CAAC,cAAc,CAAC,CAAC;YACxE,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,MAAM,KAAK,WAAW;gBACxE,CAAC,CAAC,QAAQ;gBACV,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,aAAa,CAAC,CAAC;YAE1C,eAAe;YACf,MAAM,YAAY,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;YAChE,MAAM,WAAW,GAAG,aAAa,CAAC,MAAM,CAAC;YAEzC,iBAAiB;YACjB,MAAM,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAEpD,2BAA2B;YAC3B,MAAM,UAAU,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,CAAC;YAEtD,eAAe;YACf,MAAM,MAAM,GAA0B;gBAClC,UAAU,EAAE,UAAU;gBACtB,WAAW,EAAE,QAAQ,CAAC,EAAE;gBACxB,MAAM;gBACN,KAAK;gBACL,aAAa;gBACb,YAAY;gBACZ,YAAY,EAAE,WAAW,GAAG,YAAY;gBACxC,WAAW;gBACX,SAAS,EAAE,KAAK;gBAChB,cAAc,EAAE,QAAQ;gBACxB,YAAY;gBACZ,SAAS;gBACT,UAAU;aACb,CAAC;YAEF,IAAI,CAAC,GAAG,CACJ,+BAA+B,MAAM,YAAY,KAAK,GAAG,EACzD,OAAO,CAAC,OAAO,CAAC,OAAO,CAC1B,CAAC;YACF,OAAO,MAAM,CAAC;QAElB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,IAAI,CAAC,QAAQ,CAAC,yBAAyB,EAAE,KAAc,CAAC,CAAC;YACzD,MAAM,KAAK,CAAC;QAChB,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;OAYG;IACa,KAAK,CAAC,QAAQ,CAAC,IAAgB;QAC3C,sBAAsB;QACtB,MAAM,UAAU,GAAG,MAAM,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAC9C,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;YACpB,OAAO,UAAU,CAAC;QACtB,CAAC;QAED,MAAM,MAAM,GAAG,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;QACtC,MAAM,QAAQ,GAAG,CAAC,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;QAE1C,IAAI,CAAC;YACD,mCAAmC;YACnC,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAkB,IAAI,CAAC,CAAC;YAEvD,oBAAoB;YACpB,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBAClB,MAAM,CAAC,IAAI,CAAC;oBACR,QAAQ,EAAE,eAAe;oBACzB,OAAO,EAAE,sCAAsC;oBAC/C,KAAK,EAAE,uBAAuB;oBAC9B,UAAU,EAAE,qCAAqC;iBACpD,CAAC,CAAC;YACP,CAAC;YACD,+DAA+D;YAC/D,gDAAgD;YAEhD,wCAAwC;YACxC,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACjD,MAAM,CAAC,IAAI,CAAC;oBACR,QAAQ,EAAE,eAAe;oBACzB,OAAO,EAAE,iCAAiC;oBAC1C,KAAK,EAAE,uBAAuB;oBAC9B,UAAU,EAAE,8DAA8D;iBAC7E,CAAC,CAAC;YACP,CAAC;YACD,wEAAwE;YACxE,gDAAgD;YAEhD,2BAA2B;YAC3B,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;gBACxB,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,MAAM,CAC3D,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EACnB,CAAC,CACJ,CAAC;gBACF,IAAI,IAAI,CAAC,GAAG,CAAC,WAAW,GAAG,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC;oBACrC,QAAQ,CAAC,IAAI,CAAC;wBACV,QAAQ,EAAE,eAAe;wBACzB,OAAO,EAAE,mCAAmC;wBAC5C,cAAc,EAAE,gBAAgB,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;qBAC3D,CAAC,CAAC;gBACP,CAAC;YACL,CAAC;YAED,4BAA4B;YAC5B,MAAM,KAAK,GAAG,IAAI,CAAC,oBAAoB,CAAiB,IAAI,CAAC,CAAC;YAC9D,IAAI,CAAC,KAAK,CAAC,WAAW,IAAI,KAAK,CAAC,WAAW,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACxD,MAAM,CAAC,IAAI,CAAC;oBACR,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,4CAA4C;oBACrD,KAAK,EAAE,6BAA6B;oBACpC,UAAU,EAAE,+CAA+C;iBAC9D,CAAC,CAAC;YACP,CAAC;QAEL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,CAAC,IAAI,CAAC;gBACR,QAAQ,EAAE,eAAe;gBACzB,OAAO,EAAE,oCAAqC,KAAe,CAAC,OAAO,EAAE;gBACvE,KAAK,EAAE,eAAe;gBACtB,UAAU,EAAE,kCAAkC;aACjD,CAAC,CAAC;QACP,CAAC;QAED,OAAO;YACH,KAAK,EAAE,MAAM,CAAC,MAAM,KAAK,CAAC;YAC1B,MAAM;YACN,QAAQ;SACX,CAAC;IACN,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,SAAS,CAAC,OAAe,EAAE,WAAqB;QAC1D,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,eAAe,CAAgB,WAAW,EAAE,WAAW,CAAC,CAAC;QAC5F,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC1B,OAAO,KAAK,CAAC;IACjB,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,YAAY,CACtB,KAAoB,EACpB,KAAqB,EACrB,WAAqB,EACrB,IAAgB,EAChB,gBAAoC,EACpC,OAAsB;QAEtB,MAAM,MAAM,GAAG,IAAI,uBAAW,EAAE,CAAC;QAEjC,8BAA8B;QAC9B,MAAM,oBAAoB,GAAkB,EAAE,CAAC;QAE/C,iCAAiC;QACjC,IAAI,KAAK,CAAC,mBAAmB,EAAE,aAAa,EAAE,CAAC;YAC3C,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,mBAAmB,CAAC,aAAa,EAAE,CAAC;gBACxD,oBAAoB,CAAC,IAAI,CAAC;oBACtB,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,OAAO,EAAE,GAAG,CAAC,OAAO;iBACR,CAAC,CAAC;YACtB,CAAC;QACL,CAAC;QAED,2BAA2B;QAC3B,oBAAoB,CAAC,IAAI,CAAC;YACtB,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,KAAK,CAAC,WAAW;SACd,CAAC,CAAC;QAElB,6BAA6B;QAC7B,MAAM,MAAM,GAAG;YACX,KAAK,EAAE,KAAY,EAAE,2CAA2C;YAChE,oBAAoB;YACpB,WAAW;YACX,QAAQ,EAAE,KAAK,CAAC,eAAe,EAAE,aAAa,CAAC,CAAC,CAAC;gBAC7C,OAAO,EAAE,KAAK,CAAC,eAAe,CAAC,aAAa;aAC/C,CAAC,CAAC,CAAC,SAAS;SAChB,CAAC;QAEF,8FAA8F;QAC9F,2FAA2F;QAC3F,MAAM,gBAAgB,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI;YAC7C,CAAC,CAAC,IAAI,OAAO,CAAC,QAAQ,KAAK,IAAI,CAAC,IAAI,EAAE;YACtC,CAAC,CAAC,UAAU,IAAI,CAAC,IAAI,EAAE,CAAC;QAE5B,0CAA0C;QAC1C,IAAI,gBAAgB,EAAE,CAAC;YACnB,MAAM,cAAc,GAAG,IAAI,OAAO,CAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CACpD,UAAU,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC,EAAE,gBAAgB,CAAC,CACnF,CAAC;YAEF,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC;gBACjC,MAAM,CAAC,sBAAsB,CAAC,MAAM,EAAE;oBAClC,WAAW,EAAE,KAAK,CAAC,WAAW;oBAC9B,eAAe,EAAE,IAAI;oBACrB,gBAAgB,EAAE,gBAAgB;oBAClC,SAAS,EAAE,OAAO,CAAC,EAAE;iBACxB,CAAC;gBACF,cAAc;aACjB,CAAC,CAAC;YAEH,OAAO,EAAE,QAAQ,EAAE,SAAS,CAAC,WAAW,CAAC,QAAQ,EAAE,CAAC;QACxD,CAAC;aAAM,CAAC;YACJ,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,MAAM,EAAE;gBAC1D,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,eAAe,EAAE,IAAI;gBACrB,gBAAgB,EAAE,gBAAgB;gBAClC,SAAS,EAAE,OAAO,CAAC,EAAE;aACxB,CAAC,CAAC;YAEH,OAAO,EAAE,QAAQ,EAAE,SAAS,CAAC,WAAW,CAAC,QAAQ,EAAE,CAAC;QACxD,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,qBAAqB,CAC/B,OAAsB,EACtB,QAA0B;QAE1B,0CAA0C;QAC1C,QAAQ,CAAC,SAAS,GAAG,OAAO,CAAC,EAAE,CAAC;QAChC,MAAM,KAAK,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAEpC,IAAI,CAAC,KAAK,EAAE,CAAC;YACT,IAAI,CAAC,QAAQ,CAAC,oCAAoC,EAAE,IAAI,KAAK,CAAC,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC;QACnG,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,kBAAkB,CAAC,QAA0B;QACjD,OAAO;YACH,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,YAAY,EAAE,QAAQ,CAAC,YAAY;YACnC,cAAc,EAAE,QAAQ,CAAC,cAAc;SAC1C,CAAC;IACN,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,UAAU,CACpB,MAAuB,EACvB,KAAqB,EACrB,QAAmC,EACnC,YAAqC,EACrC,QAA0B,EAC1B,OAA+B;QAE/B,MAAM,aAAa,GAAmB,EAAE,CAAC;QAEzC,4EAA4E;QAC5E,8FAA8F;QAC9F,MAAM,WAAW,GAAG,IAAI,CAAC;QAEzB,IAAI,WAAW,EAAE,CAAC;YACd,IAAI,CAAC,GAAG,CAAC,2CAA2C,EAAE,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAC/E,OAAO,aAAa,CAAC;QACzB,CAAC;QAED,KAAK,MAAM,YAAY,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACxC,MAAM,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC,GAAG,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;YAC7D,IAAI,CAAC,MAAM,EAAE,CAAC;gBACV,IAAI,CAAC,QAAQ,CAAC,qBAAqB,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;gBACxD,SAAS;YACb,CAAC;YAED,IAAI,CAAC;gBACD,MAAM,WAAW,GAAgB;oBAC7B,IAAI,EAAE,OAAO,CAAC,IAAI;oBAClB,cAAc,EAAE,QAAQ;oBACxB,YAAY;oBACZ,YAAY,EAAE,QAAQ;oBACtB,WAAW,EAAE,OAAO,CAAC,WAAW;iBACnC,CAAC;gBAEF,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,YAAY,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC;gBAC7E,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAE3B,IAAI,CAAC,GAAG,CACJ,UAAU,YAAY,CAAC,IAAI,KAAK,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,YAAY,MAAM,CAAC,KAAK,GAAG,EAC9F,OAAO,CAAC,OAAO,CAAC,OAAO,CAC1B,CAAC;YAEN,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACb,IAAI,CAAC,QAAQ,CAAC,UAAU,YAAY,CAAC,IAAI,SAAS,EAAE,KAAc,CAAC,CAAC;gBACpE,aAAa,CAAC,IAAI,CAAC;oBACf,UAAU,EAAE,YAAY,CAAC,IAAI;oBAC7B,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,4BAA6B,KAAe,CAAC,OAAO,EAAE;iBAClE,CAAC,CAAC;YACP,CAAC;QACL,CAAC;QAED,OAAO,aAAa,CAAC;IACzB,CAAC;IAED;;;OAGG;IACK,kBAAkB,CAAC,QAA0B;QACjD,OAAO,QAAQ,CAAC,SAAS,IAAI,CAAC,CAAC;IACnC,CAAC;IAED;;;OAGG;IACK,mBAAmB,CAAC,QAA0B;QAClD,IAAI,CAAC,QAAQ,CAAC,SAAS,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;YAC/C,OAAO,CAAC,CAAC;QACb,CAAC;QACD,MAAM,KAAK,GAAG,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,OAAO,EAAE,CAAC;QACrD,MAAM,GAAG,GAAG,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,OAAO,EAAE,CAAC;QACrD,OAAO,GAAG,GAAG,KAAK,CAAC;IACvB,CAAC;CACJ,CAAA;AA3YY,0CAAe;0BAAf,eAAe;IAD3B,IAAA,sBAAa,EAAC,+BAAc,EAAE,iBAAiB,CAAC;GACpC,eAAe,CA2Y3B"}
@@ -0,0 +1,145 @@
1
+ /**
2
+ * @fileoverview Base class for all test driver implementations
3
+ * @module @memberjunction/testing-engine
4
+ */
5
+ import { Metadata } from '@memberjunction/core';
6
+ import { TestEntity } from '@memberjunction/core-entities';
7
+ import { DriverExecutionContext, DriverExecutionResult, OracleResult, ScoringWeights, ValidationResult } from '../types';
8
+ /**
9
+ * Abstract base class for test driver implementations.
10
+ *
11
+ * Each TestType in the database has a corresponding DriverClass that extends this base.
12
+ * The driver is responsible for:
13
+ * - Parsing test-specific configuration from Configuration JSON
14
+ * - Executing the test with appropriate logic
15
+ * - Running oracles to evaluate results
16
+ * - Calculating scores and determining pass/fail status
17
+ * - Returning structured results
18
+ *
19
+ * BaseTestDriver handles common functionality:
20
+ * - Configuration parsing
21
+ * - Score calculation
22
+ * - Status determination
23
+ * - Logging
24
+ * - Error handling
25
+ *
26
+ * Follows pattern from BaseScheduledJob and BaseAgent.
27
+ *
28
+ * @abstract
29
+ * @example
30
+ * ```typescript
31
+ * @RegisterClass(BaseTestDriver, 'AgentEvalDriver')
32
+ * export class AgentEvalDriver extends BaseTestDriver {
33
+ * async Execute(context: DriverExecutionContext): Promise<DriverExecutionResult> {
34
+ * const config = this.parseConfig<AgentEvalConfig>(context.test);
35
+ * // Execute test logic
36
+ * return result;
37
+ * }
38
+ * }
39
+ * ```
40
+ */
41
+ export declare abstract class BaseTestDriver {
42
+ protected _metadata: Metadata;
43
+ /**
44
+ * Execute the test.
45
+ *
46
+ * This is the main entry point for test execution. The driver should:
47
+ * 1. Parse Configuration, InputDefinition, ExpectedOutcomes from test entity
48
+ * 2. Perform test-specific execution (e.g., run agent, execute workflow)
49
+ * 3. Run oracles to evaluate results
50
+ * 4. Calculate score and determine status
51
+ * 5. Return structured DriverExecutionResult
52
+ *
53
+ * The base engine will handle:
54
+ * - Creating/updating TestRun entity
55
+ * - Logging to database
56
+ * - Error handling
57
+ * - Timing and cost tracking
58
+ *
59
+ * @param context - Execution context including test, run, user, options
60
+ * @returns Promise resolving to execution result
61
+ */
62
+ abstract Execute(context: DriverExecutionContext): Promise<DriverExecutionResult>;
63
+ /**
64
+ * Validate test configuration.
65
+ *
66
+ * Called when creating or updating a test to ensure the configuration is valid
67
+ * for this test type. Override to add type-specific validation.
68
+ *
69
+ * @param test - The test being validated
70
+ * @returns Validation result with errors and warnings
71
+ */
72
+ Validate(test: TestEntity): Promise<ValidationResult>;
73
+ /**
74
+ * Calculate overall score from oracle results.
75
+ *
76
+ * If weights are provided, calculates weighted average.
77
+ * Otherwise, calculates simple average.
78
+ *
79
+ * @param oracleResults - Results from oracle evaluations
80
+ * @param weights - Optional scoring weights by oracle type
81
+ * @returns Overall score from 0.0 to 1.0
82
+ * @protected
83
+ */
84
+ protected calculateScore(oracleResults: OracleResult[], weights?: ScoringWeights): number;
85
+ /**
86
+ * Determine overall test status from oracle results.
87
+ *
88
+ * Test passes only if ALL oracles pass.
89
+ *
90
+ * @param oracleResults - Results from oracle evaluations
91
+ * @returns 'Passed' if all oracles passed, 'Failed' otherwise
92
+ * @protected
93
+ */
94
+ protected determineStatus(oracleResults: OracleResult[]): 'Passed' | 'Failed';
95
+ /**
96
+ * Parse and validate Configuration JSON.
97
+ *
98
+ * Helper method for drivers to parse their configuration with type safety.
99
+ * Throws if configuration is invalid.
100
+ *
101
+ * @template T - The configuration type
102
+ * @param test - The test containing the configuration
103
+ * @returns Parsed configuration
104
+ * @throws Error if configuration is missing or invalid
105
+ * @protected
106
+ */
107
+ protected parseConfig<T>(test: TestEntity): T;
108
+ /**
109
+ * Parse and validate InputDefinition JSON.
110
+ *
111
+ * @template T - The input definition type
112
+ * @param test - The test containing the input definition
113
+ * @returns Parsed input definition
114
+ * @throws Error if input definition is missing or invalid
115
+ * @protected
116
+ */
117
+ protected parseInputDefinition<T>(test: TestEntity): T;
118
+ /**
119
+ * Parse and validate ExpectedOutcomes JSON.
120
+ *
121
+ * @template T - The expected outcomes type
122
+ * @param test - The test containing the expected outcomes
123
+ * @returns Parsed expected outcomes
124
+ * @throws Error if expected outcomes is missing or invalid
125
+ * @protected
126
+ */
127
+ protected parseExpectedOutcomes<T>(test: TestEntity): T;
128
+ /**
129
+ * Log execution progress.
130
+ *
131
+ * @param message - Log message
132
+ * @param verboseOnly - Whether to only log in verbose mode (default: false)
133
+ * @protected
134
+ */
135
+ protected log(message: string, verboseOnly?: boolean): void;
136
+ /**
137
+ * Log errors.
138
+ *
139
+ * @param message - Error message
140
+ * @param error - Optional error object
141
+ * @protected
142
+ */
143
+ protected logError(message: string, error?: Error): void;
144
+ }
145
+ //# sourceMappingURL=BaseTestDriver.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"BaseTestDriver.d.ts","sourceRoot":"","sources":["../../src/drivers/BaseTestDriver.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAEH,QAAQ,EAIX,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EACH,UAAU,EAEb,MAAM,+BAA+B,CAAC;AACvC,OAAO,EACH,sBAAsB,EACtB,qBAAqB,EACrB,YAAY,EACZ,cAAc,EACd,gBAAgB,EAGnB,MAAM,UAAU,CAAC;AAElB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,8BAAsB,cAAc;IAChC,SAAS,CAAC,SAAS,EAAE,QAAQ,CAAkB;IAE/C;;;;;;;;;;;;;;;;;;OAkBG;IACH,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,sBAAsB,GAAG,OAAO,CAAC,qBAAqB,CAAC;IAEjF;;;;;;;;OAQG;IACU,QAAQ,CAAC,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAyElE;;;;;;;;;;OAUG;IACH,SAAS,CAAC,cAAc,CACpB,aAAa,EAAE,YAAY,EAAE,EAC7B,OAAO,CAAC,EAAE,cAAc,GACzB,MAAM;IAwBT;;;;;;;;OAQG;IACH,SAAS,CAAC,eAAe,CAAC,aAAa,EAAE,YAAY,EAAE,GAAG,QAAQ,GAAG,QAAQ;IAQ7E;;;;;;;;;;;OAWG;IACH,SAAS,CAAC,WAAW,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,GAAG,CAAC;IAa7C;;;;;;;;OAQG;IACH,SAAS,CAAC,oBAAoB,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,GAAG,CAAC;IAatD;;;;;;;;OAQG;IACH,SAAS,CAAC,qBAAqB,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,GAAG,CAAC;IAavD;;;;;;OAMG;IACH,SAAS,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,WAAW,GAAE,OAAe,GAAG,IAAI;IAQlE;;;;;;OAMG;IACH,SAAS,CAAC,QAAQ,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,KAAK,GAAG,IAAI;CAG3D"}