@memberjunction/testing-engine 0.0.1 → 2.119.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +403 -29
- package/dist/drivers/AgentEvalDriver.d.ts +197 -0
- package/dist/drivers/AgentEvalDriver.d.ts.map +1 -0
- package/dist/drivers/AgentEvalDriver.js +370 -0
- package/dist/drivers/AgentEvalDriver.js.map +1 -0
- package/dist/drivers/BaseTestDriver.d.ts +145 -0
- package/dist/drivers/BaseTestDriver.d.ts.map +1 -0
- package/dist/drivers/BaseTestDriver.js +266 -0
- package/dist/drivers/BaseTestDriver.js.map +1 -0
- package/dist/drivers/index.d.ts +6 -0
- package/dist/drivers/index.d.ts.map +1 -0
- package/dist/drivers/index.js +22 -0
- package/dist/drivers/index.js.map +1 -0
- package/dist/engine/TestEngine.d.ts +148 -0
- package/dist/engine/TestEngine.d.ts.map +1 -0
- package/dist/engine/TestEngine.js +490 -0
- package/dist/engine/TestEngine.js.map +1 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +42 -0
- package/dist/index.js.map +1 -0
- package/dist/oracles/ExactMatchOracle.d.ts +98 -0
- package/dist/oracles/ExactMatchOracle.d.ts.map +1 -0
- package/dist/oracles/ExactMatchOracle.js +355 -0
- package/dist/oracles/ExactMatchOracle.js.map +1 -0
- package/dist/oracles/IOracle.d.ts +47 -0
- package/dist/oracles/IOracle.d.ts.map +1 -0
- package/dist/oracles/IOracle.js +7 -0
- package/dist/oracles/IOracle.js.map +1 -0
- package/dist/oracles/LLMJudgeOracle.d.ts +65 -0
- package/dist/oracles/LLMJudgeOracle.d.ts.map +1 -0
- package/dist/oracles/LLMJudgeOracle.js +214 -0
- package/dist/oracles/LLMJudgeOracle.js.map +1 -0
- package/dist/oracles/SQLValidatorOracle.d.ts +78 -0
- package/dist/oracles/SQLValidatorOracle.d.ts.map +1 -0
- package/dist/oracles/SQLValidatorOracle.js +215 -0
- package/dist/oracles/SQLValidatorOracle.js.map +1 -0
- package/dist/oracles/SchemaValidatorOracle.d.ts +61 -0
- package/dist/oracles/SchemaValidatorOracle.d.ts.map +1 -0
- package/dist/oracles/SchemaValidatorOracle.js +193 -0
- package/dist/oracles/SchemaValidatorOracle.js.map +1 -0
- package/dist/oracles/TraceValidatorOracle.d.ts +41 -0
- package/dist/oracles/TraceValidatorOracle.d.ts.map +1 -0
- package/dist/oracles/TraceValidatorOracle.js +159 -0
- package/dist/oracles/TraceValidatorOracle.js.map +1 -0
- package/dist/oracles/index.d.ts +10 -0
- package/dist/oracles/index.d.ts.map +1 -0
- package/dist/oracles/index.js +26 -0
- package/dist/oracles/index.js.map +1 -0
- package/dist/types.d.ts +428 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +6 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/cost-calculator.d.ts +92 -0
- package/dist/utils/cost-calculator.d.ts.map +1 -0
- package/dist/utils/cost-calculator.js +137 -0
- package/dist/utils/cost-calculator.js.map +1 -0
- package/dist/utils/result-formatter.d.ts +98 -0
- package/dist/utils/result-formatter.d.ts.map +1 -0
- package/dist/utils/result-formatter.js +252 -0
- package/dist/utils/result-formatter.js.map +1 -0
- package/dist/utils/scoring.d.ts +64 -0
- package/dist/utils/scoring.d.ts.map +1 -0
- package/dist/utils/scoring.js +140 -0
- package/dist/utils/scoring.js.map +1 -0
- package/package.json +36 -7
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* @fileoverview Test driver for AI Agent evaluation
|
|
4
|
+
* @module @memberjunction/testing-engine
|
|
5
|
+
*/
|
|
6
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
7
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
8
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
9
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
10
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
11
|
+
};
|
|
12
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
13
|
+
exports.AgentEvalDriver = void 0;
|
|
14
|
+
const global_1 = require("@memberjunction/global");
|
|
15
|
+
const ai_agents_1 = require("@memberjunction/ai-agents");
|
|
16
|
+
const BaseTestDriver_1 = require("./BaseTestDriver");
|
|
17
|
+
/**
|
|
18
|
+
* Test driver for AI Agent evaluation.
|
|
19
|
+
*
|
|
20
|
+
* Executes an AI agent with test input, runs configured oracles,
|
|
21
|
+
* and creates bidirectional link between TestRun and AgentRun.
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* ```typescript
|
|
25
|
+
* // Configuration JSON in Test entity
|
|
26
|
+
* {
|
|
27
|
+
* "agentId": "agent-123",
|
|
28
|
+
* "oracles": [
|
|
29
|
+
* { "type": "trace-no-errors", "weight": 0.2 },
|
|
30
|
+
* { "type": "llm-judge", "weight": 0.5, "config": { "criteria": [...] } },
|
|
31
|
+
* { "type": "schema-validate", "weight": 0.3, "config": { "schema": {...} } }
|
|
32
|
+
* ],
|
|
33
|
+
* "scoringWeights": { "trace-no-errors": 0.2, "llm-judge": 0.5, "schema-validate": 0.3 }
|
|
34
|
+
* }
|
|
35
|
+
*
|
|
36
|
+
* // InputDefinition JSON in Test entity
|
|
37
|
+
* {
|
|
38
|
+
* "userMessage": "Create a report showing sales by region",
|
|
39
|
+
* "conversationContext": null,
|
|
40
|
+
* "executionParams": { "temperatureOverride": 0.3 }
|
|
41
|
+
* }
|
|
42
|
+
*
|
|
43
|
+
* // ExpectedOutcomes JSON in Test entity
|
|
44
|
+
* {
|
|
45
|
+
* "responsePatterns": ["sales.*region", "chart|graph"],
|
|
46
|
+
* "expectedEntities": ["Report", "Dashboard"],
|
|
47
|
+
* "responseSchema": { "type": "object", "properties": {...} },
|
|
48
|
+
* "judgeValidationCriteria": [
|
|
49
|
+
* "Response accurately answers the user's question",
|
|
50
|
+
* "Report includes proper data visualization",
|
|
51
|
+
* "Response is professional and clear"
|
|
52
|
+
* ]
|
|
53
|
+
* }
|
|
54
|
+
* ```
|
|
55
|
+
*/
|
|
56
|
+
let AgentEvalDriver = class AgentEvalDriver extends BaseTestDriver_1.BaseTestDriver {
|
|
57
|
+
/**
|
|
58
|
+
* Execute agent evaluation test.
|
|
59
|
+
*
|
|
60
|
+
* Steps:
|
|
61
|
+
* 1. Parse configuration and input
|
|
62
|
+
* 2. Load and execute agent via AgentRunner
|
|
63
|
+
* 3. Create bidirectional link (TestRun ↔ AgentRun)
|
|
64
|
+
* 4. Run oracles to evaluate results
|
|
65
|
+
* 5. Calculate score and determine status
|
|
66
|
+
* 6. Return structured results
|
|
67
|
+
*
|
|
68
|
+
* @param context - Execution context
|
|
69
|
+
* @returns Execution result
|
|
70
|
+
*/
|
|
71
|
+
async Execute(context) {
|
|
72
|
+
this.log('Starting agent evaluation', context.options.verbose);
|
|
73
|
+
try {
|
|
74
|
+
// Parse configuration
|
|
75
|
+
const config = this.parseConfig(context.test);
|
|
76
|
+
const input = this.parseInputDefinition(context.test);
|
|
77
|
+
const expected = this.parseExpectedOutcomes(context.test);
|
|
78
|
+
// Load agent
|
|
79
|
+
const agent = await this.loadAgent(config.agentId, context.contextUser);
|
|
80
|
+
// Execute agent
|
|
81
|
+
this.log(`Executing agent: ${agent.Name}`, context.options.verbose);
|
|
82
|
+
const agentResult = await this.executeAgent(agent, input, context.contextUser, context.test, config.maxExecutionTime, context.testRun);
|
|
83
|
+
const agentRun = agentResult.agentRun;
|
|
84
|
+
// Create bidirectional link
|
|
85
|
+
await this.linkTestRunToAgentRun(context.testRun, agentRun);
|
|
86
|
+
// Extract actual output
|
|
87
|
+
const actualOutput = this.extractAgentOutput(agentRun);
|
|
88
|
+
// Run oracles
|
|
89
|
+
this.log('Running oracles for evaluation', context.options.verbose);
|
|
90
|
+
const oracleResults = await this.runOracles(config, input, expected, actualOutput, agentRun, context);
|
|
91
|
+
// Calculate score and status
|
|
92
|
+
// When oracles are disabled, consider test passed if agent succeeded
|
|
93
|
+
const score = this.calculateScore(oracleResults, config.scoringWeights);
|
|
94
|
+
const status = oracleResults.length === 0 && agentRun.Status === 'Completed'
|
|
95
|
+
? 'Passed'
|
|
96
|
+
: this.determineStatus(oracleResults);
|
|
97
|
+
// Count checks
|
|
98
|
+
const passedChecks = oracleResults.filter(r => r.passed).length;
|
|
99
|
+
const totalChecks = oracleResults.length;
|
|
100
|
+
// Calculate cost
|
|
101
|
+
const totalCost = this.calculateTotalCost(agentRun);
|
|
102
|
+
// Calculate duration in MS
|
|
103
|
+
const durationMs = this.calculateDurationMs(agentRun);
|
|
104
|
+
// Build result
|
|
105
|
+
const result = {
|
|
106
|
+
targetType: 'AI Agent',
|
|
107
|
+
targetLogId: agentRun.ID,
|
|
108
|
+
status,
|
|
109
|
+
score,
|
|
110
|
+
oracleResults,
|
|
111
|
+
passedChecks,
|
|
112
|
+
failedChecks: totalChecks - passedChecks,
|
|
113
|
+
totalChecks,
|
|
114
|
+
inputData: input,
|
|
115
|
+
expectedOutput: expected,
|
|
116
|
+
actualOutput,
|
|
117
|
+
totalCost,
|
|
118
|
+
durationMs
|
|
119
|
+
};
|
|
120
|
+
this.log(`Agent evaluation completed: ${status} (Score: ${score})`, context.options.verbose);
|
|
121
|
+
return result;
|
|
122
|
+
}
|
|
123
|
+
catch (error) {
|
|
124
|
+
this.logError('Agent evaluation failed', error);
|
|
125
|
+
throw error;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Validate agent evaluation test configuration.
|
|
130
|
+
*
|
|
131
|
+
* Checks:
|
|
132
|
+
* - Base validation (InputDefinition, ExpectedOutcomes, Configuration)
|
|
133
|
+
* - Agent ID is valid
|
|
134
|
+
* - At least one oracle is configured
|
|
135
|
+
* - Oracle types are registered
|
|
136
|
+
* - Scoring weights are valid
|
|
137
|
+
*
|
|
138
|
+
* @param test - Test entity to validate
|
|
139
|
+
* @returns Validation result
|
|
140
|
+
*/
|
|
141
|
+
async Validate(test) {
|
|
142
|
+
// Run base validation
|
|
143
|
+
const baseResult = await super.Validate(test);
|
|
144
|
+
if (!baseResult.valid) {
|
|
145
|
+
return baseResult;
|
|
146
|
+
}
|
|
147
|
+
const errors = [...baseResult.errors];
|
|
148
|
+
const warnings = [...baseResult.warnings];
|
|
149
|
+
try {
|
|
150
|
+
// Parse and validate configuration
|
|
151
|
+
const config = this.parseConfig(test);
|
|
152
|
+
// Validate agent ID
|
|
153
|
+
if (!config.agentId) {
|
|
154
|
+
errors.push({
|
|
155
|
+
category: 'configuration',
|
|
156
|
+
message: 'agentId is required in Configuration',
|
|
157
|
+
field: 'Configuration.agentId',
|
|
158
|
+
suggestion: 'Specify the ID of the agent to test'
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
// Note: We cannot validate agent existence without contextUser
|
|
162
|
+
// That validation will happen at execution time
|
|
163
|
+
// Validate oracles configuration exists
|
|
164
|
+
if (!config.oracles || config.oracles.length === 0) {
|
|
165
|
+
errors.push({
|
|
166
|
+
category: 'configuration',
|
|
167
|
+
message: 'At least one oracle is required',
|
|
168
|
+
field: 'Configuration.oracles',
|
|
169
|
+
suggestion: 'Add oracle configurations (e.g., trace-no-errors, llm-judge)'
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
// Note: Oracle type validation requires registry from execution context
|
|
173
|
+
// That validation will happen at execution time
|
|
174
|
+
// Validate scoring weights
|
|
175
|
+
if (config.scoringWeights) {
|
|
176
|
+
const totalWeight = Object.values(config.scoringWeights).reduce((sum, w) => sum + w, 0);
|
|
177
|
+
if (Math.abs(totalWeight - 1.0) > 0.01) {
|
|
178
|
+
warnings.push({
|
|
179
|
+
category: 'best-practice',
|
|
180
|
+
message: 'Scoring weights should sum to 1.0',
|
|
181
|
+
recommendation: `Current sum: ${totalWeight.toFixed(2)}`
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
// Validate input definition
|
|
186
|
+
const input = this.parseInputDefinition(test);
|
|
187
|
+
if (!input.userMessage || input.userMessage.trim() === '') {
|
|
188
|
+
errors.push({
|
|
189
|
+
category: 'input',
|
|
190
|
+
message: 'userMessage is required in InputDefinition',
|
|
191
|
+
field: 'InputDefinition.userMessage',
|
|
192
|
+
suggestion: 'Provide the user message to send to the agent'
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
catch (error) {
|
|
197
|
+
errors.push({
|
|
198
|
+
category: 'configuration',
|
|
199
|
+
message: `Configuration validation failed: ${error.message}`,
|
|
200
|
+
field: 'Configuration',
|
|
201
|
+
suggestion: 'Fix configuration JSON structure'
|
|
202
|
+
});
|
|
203
|
+
}
|
|
204
|
+
return {
|
|
205
|
+
valid: errors.length === 0,
|
|
206
|
+
errors,
|
|
207
|
+
warnings
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Load agent entity.
|
|
212
|
+
* @private
|
|
213
|
+
*/
|
|
214
|
+
async loadAgent(agentId, contextUser) {
|
|
215
|
+
const agent = await this._metadata.GetEntityObject('AI Agents', contextUser);
|
|
216
|
+
await agent.Load(agentId);
|
|
217
|
+
return agent;
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Execute agent and return result.
|
|
221
|
+
* @private
|
|
222
|
+
*/
|
|
223
|
+
async executeAgent(agent, input, contextUser, test, maxExecutionTime, testRun) {
|
|
224
|
+
const runner = new ai_agents_1.AgentRunner();
|
|
225
|
+
// Build conversation messages
|
|
226
|
+
const conversationMessages = [];
|
|
227
|
+
// Add prior messages if provided
|
|
228
|
+
if (input.conversationContext?.priorMessages) {
|
|
229
|
+
for (const msg of input.conversationContext.priorMessages) {
|
|
230
|
+
conversationMessages.push({
|
|
231
|
+
role: msg.role,
|
|
232
|
+
content: msg.content
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
// Add current user message
|
|
237
|
+
conversationMessages.push({
|
|
238
|
+
role: 'user',
|
|
239
|
+
content: input.userMessage
|
|
240
|
+
});
|
|
241
|
+
// Build execution parameters
|
|
242
|
+
const params = {
|
|
243
|
+
agent: agent, // Will be AIAgentEntityExtended at runtime
|
|
244
|
+
conversationMessages,
|
|
245
|
+
contextUser,
|
|
246
|
+
override: input.executionParams?.modelOverride ? {
|
|
247
|
+
modelId: input.executionParams.modelOverride
|
|
248
|
+
} : undefined
|
|
249
|
+
};
|
|
250
|
+
// Generate conversation name with sequence number (if in suite) or [Test] prefix (standalone)
|
|
251
|
+
// Sequence will be non-null when test is part of a suite run, showing test execution order
|
|
252
|
+
const conversationName = testRun.Sequence != null
|
|
253
|
+
? `[${testRun.Sequence}] ${test.Name}`
|
|
254
|
+
: `[Test] ${test.Name}`;
|
|
255
|
+
// Execute agent with timeout if specified
|
|
256
|
+
if (maxExecutionTime) {
|
|
257
|
+
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Agent execution timeout')), maxExecutionTime));
|
|
258
|
+
const runResult = await Promise.race([
|
|
259
|
+
runner.RunAgentInConversation(params, {
|
|
260
|
+
userMessage: input.userMessage,
|
|
261
|
+
createArtifacts: true,
|
|
262
|
+
conversationName: conversationName,
|
|
263
|
+
testRunId: testRun.ID
|
|
264
|
+
}),
|
|
265
|
+
timeoutPromise
|
|
266
|
+
]);
|
|
267
|
+
return { agentRun: runResult.agentResult.agentRun };
|
|
268
|
+
}
|
|
269
|
+
else {
|
|
270
|
+
const runResult = await runner.RunAgentInConversation(params, {
|
|
271
|
+
userMessage: input.userMessage,
|
|
272
|
+
createArtifacts: true,
|
|
273
|
+
conversationName: conversationName,
|
|
274
|
+
testRunId: testRun.ID
|
|
275
|
+
});
|
|
276
|
+
return { agentRun: runResult.agentResult.agentRun };
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Create bidirectional link between TestRun and AgentRun.
|
|
281
|
+
* @private
|
|
282
|
+
*/
|
|
283
|
+
async linkTestRunToAgentRun(testRun, agentRun) {
|
|
284
|
+
// Update AgentRun with hard FK to TestRun
|
|
285
|
+
agentRun.TestRunID = testRun.ID;
|
|
286
|
+
const saved = await agentRun.Save();
|
|
287
|
+
if (!saved) {
|
|
288
|
+
this.logError('Failed to link AgentRun to TestRun', new Error(agentRun.LatestResult?.Message));
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Extract agent output from agent run.
|
|
293
|
+
* @private
|
|
294
|
+
*/
|
|
295
|
+
extractAgentOutput(agentRun) {
|
|
296
|
+
return {
|
|
297
|
+
status: agentRun.Status,
|
|
298
|
+
success: agentRun.Success,
|
|
299
|
+
errorMessage: agentRun.ErrorMessage,
|
|
300
|
+
conversationId: agentRun.ConversationID
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* Run configured oracles.
|
|
305
|
+
* @private
|
|
306
|
+
*/
|
|
307
|
+
async runOracles(config, input, expected, actualOutput, agentRun, context) {
|
|
308
|
+
const oracleResults = [];
|
|
309
|
+
// TODO: Temporarily skip oracle execution while oracles are being finalized
|
|
310
|
+
// Remove this flag once oracles are ready (SQL schema fixes, LLM Judge prompt creation, etc.)
|
|
311
|
+
const skipOracles = true;
|
|
312
|
+
if (skipOracles) {
|
|
313
|
+
this.log('⚠️ Oracle execution temporarily disabled', context.options.verbose);
|
|
314
|
+
return oracleResults;
|
|
315
|
+
}
|
|
316
|
+
for (const oracleConfig of config.oracles) {
|
|
317
|
+
const oracle = context.oracleRegistry.get(oracleConfig.type);
|
|
318
|
+
if (!oracle) {
|
|
319
|
+
this.logError(`Oracle not found: ${oracleConfig.type}`);
|
|
320
|
+
continue;
|
|
321
|
+
}
|
|
322
|
+
try {
|
|
323
|
+
const oracleInput = {
|
|
324
|
+
test: context.test,
|
|
325
|
+
expectedOutput: expected,
|
|
326
|
+
actualOutput,
|
|
327
|
+
targetEntity: agentRun,
|
|
328
|
+
contextUser: context.contextUser
|
|
329
|
+
};
|
|
330
|
+
const result = await oracle.evaluate(oracleInput, oracleConfig.config || {});
|
|
331
|
+
oracleResults.push(result);
|
|
332
|
+
this.log(`Oracle ${oracleConfig.type}: ${result.passed ? 'PASSED' : 'FAILED'} (Score: ${result.score})`, context.options.verbose);
|
|
333
|
+
}
|
|
334
|
+
catch (error) {
|
|
335
|
+
this.logError(`Oracle ${oracleConfig.type} failed`, error);
|
|
336
|
+
oracleResults.push({
|
|
337
|
+
oracleType: oracleConfig.type,
|
|
338
|
+
passed: false,
|
|
339
|
+
score: 0,
|
|
340
|
+
message: `Oracle execution failed: ${error.message}`
|
|
341
|
+
});
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
return oracleResults;
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Calculate total cost from agent run.
|
|
348
|
+
* @private
|
|
349
|
+
*/
|
|
350
|
+
calculateTotalCost(agentRun) {
|
|
351
|
+
return agentRun.TotalCost || 0;
|
|
352
|
+
}
|
|
353
|
+
/**
|
|
354
|
+
* Calculate duration in milliseconds from agent run.
|
|
355
|
+
* @private
|
|
356
|
+
*/
|
|
357
|
+
calculateDurationMs(agentRun) {
|
|
358
|
+
if (!agentRun.StartedAt || !agentRun.CompletedAt) {
|
|
359
|
+
return 0;
|
|
360
|
+
}
|
|
361
|
+
const start = new Date(agentRun.StartedAt).getTime();
|
|
362
|
+
const end = new Date(agentRun.CompletedAt).getTime();
|
|
363
|
+
return end - start;
|
|
364
|
+
}
|
|
365
|
+
};
|
|
366
|
+
exports.AgentEvalDriver = AgentEvalDriver;
|
|
367
|
+
exports.AgentEvalDriver = AgentEvalDriver = __decorate([
|
|
368
|
+
(0, global_1.RegisterClass)(BaseTestDriver_1.BaseTestDriver, 'AgentEvalDriver')
|
|
369
|
+
], AgentEvalDriver);
|
|
370
|
+
//# sourceMappingURL=AgentEvalDriver.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AgentEvalDriver.js","sourceRoot":"","sources":["../../src/drivers/AgentEvalDriver.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;;;;;;;AAGH,mDAAuD;AAEvD,yDAAwD;AAExD,qDAAkD;AA4GlD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AAEI,IAAM,eAAe,GAArB,MAAM,eAAgB,SAAQ,+BAAc;IAC/C;;;;;;;;;;;;;OAaG;IACI,KAAK,CAAC,OAAO,CAAC,OAA+B;QAChD,IAAI,CAAC,GAAG,CAAC,2BAA2B,EAAE,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAE/D,IAAI,CAAC;YACD,sBAAsB;YACtB,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAkB,OAAO,CAAC,IAAI,CAAC,CAAC;YAC/D,MAAM,KAAK,GAAG,IAAI,CAAC,oBAAoB,CAAiB,OAAO,CAAC,IAAI,CAAC,CAAC;YACtE,MAAM,QAAQ,GAAG,IAAI,CAAC,qBAAqB,CAA4B,OAAO,CAAC,IAAI,CAAC,CAAC;YAErF,aAAa;YACb,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;YAExE,gBAAgB;YAChB,IAAI,CAAC,GAAG,CAAC,oBAAoB,KAAK,CAAC,IAAI,EAAE,EAAE,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACpE,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,YAAY,CACvC,KAAK,EACL,KAAK,EACL,OAAO,CAAC,WAAW,EACnB,OAAO,CAAC,IAAI,EACZ,MAAM,CAAC,gBAAgB,EACvB,OAAO,CAAC,OAAO,CAClB,CAAC;YAEF,MAAM,QAAQ,GAAG,WAAW,CAAC,QAAQ,CAAC;YAEtC,4BAA4B;YAC5B,MAAM,IAAI,CAAC,qBAAqB,CAAC,OAAO,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;YAE5D,wBAAwB;YACxB,MAAM,YAAY,GAAG,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAEvD,cAAc;YACd,IAAI,CAAC,GAAG,CAAC,gCAAgC,EAAE,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACpE,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,UAAU,CACvC,MAAM,EACN,KAAK,EACL,QAAQ,EACR,YAAY,EACZ,QAAQ,EACR,OAAO,CACV,CAAC;YAEF,6BAA6B;YAC7B,qEAAqE;YACrE,MAAM,KAAK,GAAG,IAAI,CAAC,cAAc,CAAC,aAAa,EAAE,MAAM,CAAC,cAAc,CAAC,CAAC;YACxE,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,MAAM,KAAK,WAAW;gBACxE,CAAC,CAAC,QAAQ;gBACV,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,aAAa,CAAC,CAAC;YAE1C,eAAe;YACf,MAAM,YAAY,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;YAChE,MAAM,WAAW,GAAG,aAAa,CAAC,MAAM,CAAC;YAEzC,iBAAiB;YACjB,MAAM,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAEpD,2BAA2B;YAC3B,MAAM,UAAU,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,CAAC;YAEtD,eAAe;YACf,MAAM,MAAM,GAA0B;gBAClC,UAAU,EAAE,UAAU;gBACtB,WAAW,EAAE,QAAQ,CAAC,EAAE;gBACxB,MAAM;gBACN,KAAK;gBACL,aAAa;gBACb,YAAY;gBACZ,YAAY,EAAE,WAAW,GAAG,YAAY;gBACxC,WAAW;gBACX,SAAS,EAAE,KAAK;gBAChB,cAAc,EAAE,QAAQ;gBACxB,YAAY;gBACZ,SAAS;gBACT,UAAU;aACb,CAAC;YAEF,IAAI,CAAC,GAAG,CACJ,+BAA+B,MAAM,YAAY,KAAK,GAAG,EACzD,OAAO,CAAC,OAAO,CAAC,OAAO,CAC1B,CAAC;YACF,OAAO,MAAM,CAAC;QAElB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,IAAI,CAAC,QAAQ,CAAC,yBAAyB,EAAE,KAAc,CAAC,CAAC;YACzD,MAAM,KAAK,CAAC;QAChB,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;OAYG;IACa,KAAK,CAAC,QAAQ,CAAC,IAAgB;QAC3C,sBAAsB;QACtB,MAAM,UAAU,GAAG,MAAM,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAC9C,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;YACpB,OAAO,UAAU,CAAC;QACtB,CAAC;QAED,MAAM,MAAM,GAAG,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;QACtC,MAAM,QAAQ,GAAG,CAAC,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;QAE1C,IAAI,CAAC;YACD,mCAAmC;YACnC,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAkB,IAAI,CAAC,CAAC;YAEvD,oBAAoB;YACpB,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBAClB,MAAM,CAAC,IAAI,CAAC;oBACR,QAAQ,EAAE,eAAe;oBACzB,OAAO,EAAE,sCAAsC;oBAC/C,KAAK,EAAE,uBAAuB;oBAC9B,UAAU,EAAE,qCAAqC;iBACpD,CAAC,CAAC;YACP,CAAC;YACD,+DAA+D;YAC/D,gDAAgD;YAEhD,wCAAwC;YACxC,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACjD,MAAM,CAAC,IAAI,CAAC;oBACR,QAAQ,EAAE,eAAe;oBACzB,OAAO,EAAE,iCAAiC;oBAC1C,KAAK,EAAE,uBAAuB;oBAC9B,UAAU,EAAE,8DAA8D;iBAC7E,CAAC,CAAC;YACP,CAAC;YACD,wEAAwE;YACxE,gDAAgD;YAEhD,2BAA2B;YAC3B,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;gBACxB,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,MAAM,CAC3D,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EACnB,CAAC,CACJ,CAAC;gBACF,IAAI,IAAI,CAAC,GAAG,CAAC,WAAW,GAAG,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC;oBACrC,QAAQ,CAAC,IAAI,CAAC;wBACV,QAAQ,EAAE,eAAe;wBACzB,OAAO,EAAE,mCAAmC;wBAC5C,cAAc,EAAE,gBAAgB,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;qBAC3D,CAAC,CAAC;gBACP,CAAC;YACL,CAAC;YAED,4BAA4B;YAC5B,MAAM,KAAK,GAAG,IAAI,CAAC,oBAAoB,CAAiB,IAAI,CAAC,CAAC;YAC9D,IAAI,CAAC,KAAK,CAAC,WAAW,IAAI,KAAK,CAAC,WAAW,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACxD,MAAM,CAAC,IAAI,CAAC;oBACR,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,4CAA4C;oBACrD,KAAK,EAAE,6BAA6B;oBACpC,UAAU,EAAE,+CAA+C;iBAC9D,CAAC,CAAC;YACP,CAAC;QAEL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,CAAC,IAAI,CAAC;gBACR,QAAQ,EAAE,eAAe;gBACzB,OAAO,EAAE,oCAAqC,KAAe,CAAC,OAAO,EAAE;gBACvE,KAAK,EAAE,eAAe;gBACtB,UAAU,EAAE,kCAAkC;aACjD,CAAC,CAAC;QACP,CAAC;QAED,OAAO;YACH,KAAK,EAAE,MAAM,CAAC,MAAM,KAAK,CAAC;YAC1B,MAAM;YACN,QAAQ;SACX,CAAC;IACN,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,SAAS,CAAC,OAAe,EAAE,WAAqB;QAC1D,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,eAAe,CAAgB,WAAW,EAAE,WAAW,CAAC,CAAC;QAC5F,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC1B,OAAO,KAAK,CAAC;IACjB,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,YAAY,CACtB,KAAoB,EACpB,KAAqB,EACrB,WAAqB,EACrB,IAAgB,EAChB,gBAAoC,EACpC,OAAsB;QAEtB,MAAM,MAAM,GAAG,IAAI,uBAAW,EAAE,CAAC;QAEjC,8BAA8B;QAC9B,MAAM,oBAAoB,GAAkB,EAAE,CAAC;QAE/C,iCAAiC;QACjC,IAAI,KAAK,CAAC,mBAAmB,EAAE,aAAa,EAAE,CAAC;YAC3C,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,mBAAmB,CAAC,aAAa,EAAE,CAAC;gBACxD,oBAAoB,CAAC,IAAI,CAAC;oBACtB,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,OAAO,EAAE,GAAG,CAAC,OAAO;iBACR,CAAC,CAAC;YACtB,CAAC;QACL,CAAC;QAED,2BAA2B;QAC3B,oBAAoB,CAAC,IAAI,CAAC;YACtB,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,KAAK,CAAC,WAAW;SACd,CAAC,CAAC;QAElB,6BAA6B;QAC7B,MAAM,MAAM,GAAG;YACX,KAAK,EAAE,KAAY,EAAE,2CAA2C;YAChE,oBAAoB;YACpB,WAAW;YACX,QAAQ,EAAE,KAAK,CAAC,eAAe,EAAE,aAAa,CAAC,CAAC,CAAC;gBAC7C,OAAO,EAAE,KAAK,CAAC,eAAe,CAAC,aAAa;aAC/C,CAAC,CAAC,CAAC,SAAS;SAChB,CAAC;QAEF,8FAA8F;QAC9F,2FAA2F;QAC3F,MAAM,gBAAgB,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI;YAC7C,CAAC,CAAC,IAAI,OAAO,CAAC,QAAQ,KAAK,IAAI,CAAC,IAAI,EAAE;YACtC,CAAC,CAAC,UAAU,IAAI,CAAC,IAAI,EAAE,CAAC;QAE5B,0CAA0C;QAC1C,IAAI,gBAAgB,EAAE,CAAC;YACnB,MAAM,cAAc,GAAG,IAAI,OAAO,CAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CACpD,UAAU,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC,EAAE,gBAAgB,CAAC,CACnF,CAAC;YAEF,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC;gBACjC,MAAM,CAAC,sBAAsB,CAAC,MAAM,EAAE;oBAClC,WAAW,EAAE,KAAK,CAAC,WAAW;oBAC9B,eAAe,EAAE,IAAI;oBACrB,gBAAgB,EAAE,gBAAgB;oBAClC,SAAS,EAAE,OAAO,CAAC,EAAE;iBACxB,CAAC;gBACF,cAAc;aACjB,CAAC,CAAC;YAEH,OAAO,EAAE,QAAQ,EAAE,SAAS,CAAC,WAAW,CAAC,QAAQ,EAAE,CAAC;QACxD,CAAC;aAAM,CAAC;YACJ,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,MAAM,EAAE;gBAC1D,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,eAAe,EAAE,IAAI;gBACrB,gBAAgB,EAAE,gBAAgB;gBAClC,SAAS,EAAE,OAAO,CAAC,EAAE;aACxB,CAAC,CAAC;YAEH,OAAO,EAAE,QAAQ,EAAE,SAAS,CAAC,WAAW,CAAC,QAAQ,EAAE,CAAC;QACxD,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,qBAAqB,CAC/B,OAAsB,EACtB,QAA0B;QAE1B,0CAA0C;QAC1C,QAAQ,CAAC,SAAS,GAAG,OAAO,CAAC,EAAE,CAAC;QAChC,MAAM,KAAK,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAEpC,IAAI,CAAC,KAAK,EAAE,CAAC;YACT,IAAI,CAAC,QAAQ,CAAC,oCAAoC,EAAE,IAAI,KAAK,CAAC,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC;QACnG,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,kBAAkB,CAAC,QAA0B;QACjD,OAAO;YACH,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,YAAY,EAAE,QAAQ,CAAC,YAAY;YACnC,cAAc,EAAE,QAAQ,CAAC,cAAc;SAC1C,CAAC;IACN,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,UAAU,CACpB,MAAuB,EACvB,KAAqB,EACrB,QAAmC,EACnC,YAAqC,EACrC,QAA0B,EAC1B,OAA+B;QAE/B,MAAM,aAAa,GAAmB,EAAE,CAAC;QAEzC,4EAA4E;QAC5E,8FAA8F;QAC9F,MAAM,WAAW,GAAG,IAAI,CAAC;QAEzB,IAAI,WAAW,EAAE,CAAC;YACd,IAAI,CAAC,GAAG,CAAC,2CAA2C,EAAE,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAC/E,OAAO,aAAa,CAAC;QACzB,CAAC;QAED,KAAK,MAAM,YAAY,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACxC,MAAM,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC,GAAG,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;YAC7D,IAAI,CAAC,MAAM,EAAE,CAAC;gBACV,IAAI,CAAC,QAAQ,CAAC,qBAAqB,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;gBACxD,SAAS;YACb,CAAC;YAED,IAAI,CAAC;gBACD,MAAM,WAAW,GAAgB;oBAC7B,IAAI,EAAE,OAAO,CAAC,IAAI;oBAClB,cAAc,EAAE,QAAQ;oBACxB,YAAY;oBACZ,YAAY,EAAE,QAAQ;oBACtB,WAAW,EAAE,OAAO,CAAC,WAAW;iBACnC,CAAC;gBAEF,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,YAAY,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC;gBAC7E,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAE3B,IAAI,CAAC,GAAG,CACJ,UAAU,YAAY,CAAC,IAAI,KAAK,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,YAAY,MAAM,CAAC,KAAK,GAAG,EAC9F,OAAO,CAAC,OAAO,CAAC,OAAO,CAC1B,CAAC;YAEN,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACb,IAAI,CAAC,QAAQ,CAAC,UAAU,YAAY,CAAC,IAAI,SAAS,EAAE,KAAc,CAAC,CAAC;gBACpE,aAAa,CAAC,IAAI,CAAC;oBACf,UAAU,EAAE,YAAY,CAAC,IAAI;oBAC7B,MAAM,EAAE,KAAK;oBACb,KAAK,EAAE,CAAC;oBACR,OAAO,EAAE,4BAA6B,KAAe,CAAC,OAAO,EAAE;iBAClE,CAAC,CAAC;YACP,CAAC;QACL,CAAC;QAED,OAAO,aAAa,CAAC;IACzB,CAAC;IAED;;;OAGG;IACK,kBAAkB,CAAC,QAA0B;QACjD,OAAO,QAAQ,CAAC,SAAS,IAAI,CAAC,CAAC;IACnC,CAAC;IAED;;;OAGG;IACK,mBAAmB,CAAC,QAA0B;QAClD,IAAI,CAAC,QAAQ,CAAC,SAAS,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;YAC/C,OAAO,CAAC,CAAC;QACb,CAAC;QACD,MAAM,KAAK,GAAG,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,OAAO,EAAE,CAAC;QACrD,MAAM,GAAG,GAAG,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,OAAO,EAAE,CAAC;QACrD,OAAO,GAAG,GAAG,KAAK,CAAC;IACvB,CAAC;CACJ,CAAA;AA3YY,0CAAe;0BAAf,eAAe;IAD3B,IAAA,sBAAa,EAAC,+BAAc,EAAE,iBAAiB,CAAC;GACpC,eAAe,CA2Y3B"}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Base class for all test driver implementations
|
|
3
|
+
* @module @memberjunction/testing-engine
|
|
4
|
+
*/
|
|
5
|
+
import { Metadata } from '@memberjunction/core';
|
|
6
|
+
import { TestEntity } from '@memberjunction/core-entities';
|
|
7
|
+
import { DriverExecutionContext, DriverExecutionResult, OracleResult, ScoringWeights, ValidationResult } from '../types';
|
|
8
|
+
/**
|
|
9
|
+
* Abstract base class for test driver implementations.
|
|
10
|
+
*
|
|
11
|
+
* Each TestType in the database has a corresponding DriverClass that extends this base.
|
|
12
|
+
* The driver is responsible for:
|
|
13
|
+
* - Parsing test-specific configuration from Configuration JSON
|
|
14
|
+
* - Executing the test with appropriate logic
|
|
15
|
+
* - Running oracles to evaluate results
|
|
16
|
+
* - Calculating scores and determining pass/fail status
|
|
17
|
+
* - Returning structured results
|
|
18
|
+
*
|
|
19
|
+
* BaseTestDriver handles common functionality:
|
|
20
|
+
* - Configuration parsing
|
|
21
|
+
* - Score calculation
|
|
22
|
+
* - Status determination
|
|
23
|
+
* - Logging
|
|
24
|
+
* - Error handling
|
|
25
|
+
*
|
|
26
|
+
* Follows pattern from BaseScheduledJob and BaseAgent.
|
|
27
|
+
*
|
|
28
|
+
* @abstract
|
|
29
|
+
* @example
|
|
30
|
+
* ```typescript
|
|
31
|
+
* @RegisterClass(BaseTestDriver, 'AgentEvalDriver')
|
|
32
|
+
* export class AgentEvalDriver extends BaseTestDriver {
|
|
33
|
+
* async Execute(context: DriverExecutionContext): Promise<DriverExecutionResult> {
|
|
34
|
+
* const config = this.parseConfig<AgentEvalConfig>(context.test);
|
|
35
|
+
* // Execute test logic
|
|
36
|
+
* return result;
|
|
37
|
+
* }
|
|
38
|
+
* }
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
export declare abstract class BaseTestDriver {
|
|
42
|
+
protected _metadata: Metadata;
|
|
43
|
+
/**
|
|
44
|
+
* Execute the test.
|
|
45
|
+
*
|
|
46
|
+
* This is the main entry point for test execution. The driver should:
|
|
47
|
+
* 1. Parse Configuration, InputDefinition, ExpectedOutcomes from test entity
|
|
48
|
+
* 2. Perform test-specific execution (e.g., run agent, execute workflow)
|
|
49
|
+
* 3. Run oracles to evaluate results
|
|
50
|
+
* 4. Calculate score and determine status
|
|
51
|
+
* 5. Return structured DriverExecutionResult
|
|
52
|
+
*
|
|
53
|
+
* The base engine will handle:
|
|
54
|
+
* - Creating/updating TestRun entity
|
|
55
|
+
* - Logging to database
|
|
56
|
+
* - Error handling
|
|
57
|
+
* - Timing and cost tracking
|
|
58
|
+
*
|
|
59
|
+
* @param context - Execution context including test, run, user, options
|
|
60
|
+
* @returns Promise resolving to execution result
|
|
61
|
+
*/
|
|
62
|
+
abstract Execute(context: DriverExecutionContext): Promise<DriverExecutionResult>;
|
|
63
|
+
/**
|
|
64
|
+
* Validate test configuration.
|
|
65
|
+
*
|
|
66
|
+
* Called when creating or updating a test to ensure the configuration is valid
|
|
67
|
+
* for this test type. Override to add type-specific validation.
|
|
68
|
+
*
|
|
69
|
+
* @param test - The test being validated
|
|
70
|
+
* @returns Validation result with errors and warnings
|
|
71
|
+
*/
|
|
72
|
+
Validate(test: TestEntity): Promise<ValidationResult>;
|
|
73
|
+
/**
|
|
74
|
+
* Calculate overall score from oracle results.
|
|
75
|
+
*
|
|
76
|
+
* If weights are provided, calculates weighted average.
|
|
77
|
+
* Otherwise, calculates simple average.
|
|
78
|
+
*
|
|
79
|
+
* @param oracleResults - Results from oracle evaluations
|
|
80
|
+
* @param weights - Optional scoring weights by oracle type
|
|
81
|
+
* @returns Overall score from 0.0 to 1.0
|
|
82
|
+
* @protected
|
|
83
|
+
*/
|
|
84
|
+
protected calculateScore(oracleResults: OracleResult[], weights?: ScoringWeights): number;
|
|
85
|
+
/**
|
|
86
|
+
* Determine overall test status from oracle results.
|
|
87
|
+
*
|
|
88
|
+
* Test passes only if ALL oracles pass.
|
|
89
|
+
*
|
|
90
|
+
* @param oracleResults - Results from oracle evaluations
|
|
91
|
+
* @returns 'Passed' if all oracles passed, 'Failed' otherwise
|
|
92
|
+
* @protected
|
|
93
|
+
*/
|
|
94
|
+
protected determineStatus(oracleResults: OracleResult[]): 'Passed' | 'Failed';
|
|
95
|
+
/**
|
|
96
|
+
* Parse and validate Configuration JSON.
|
|
97
|
+
*
|
|
98
|
+
* Helper method for drivers to parse their configuration with type safety.
|
|
99
|
+
* Throws if configuration is invalid.
|
|
100
|
+
*
|
|
101
|
+
* @template T - The configuration type
|
|
102
|
+
* @param test - The test containing the configuration
|
|
103
|
+
* @returns Parsed configuration
|
|
104
|
+
* @throws Error if configuration is missing or invalid
|
|
105
|
+
* @protected
|
|
106
|
+
*/
|
|
107
|
+
protected parseConfig<T>(test: TestEntity): T;
|
|
108
|
+
/**
|
|
109
|
+
* Parse and validate InputDefinition JSON.
|
|
110
|
+
*
|
|
111
|
+
* @template T - The input definition type
|
|
112
|
+
* @param test - The test containing the input definition
|
|
113
|
+
* @returns Parsed input definition
|
|
114
|
+
* @throws Error if input definition is missing or invalid
|
|
115
|
+
* @protected
|
|
116
|
+
*/
|
|
117
|
+
protected parseInputDefinition<T>(test: TestEntity): T;
|
|
118
|
+
/**
|
|
119
|
+
* Parse and validate ExpectedOutcomes JSON.
|
|
120
|
+
*
|
|
121
|
+
* @template T - The expected outcomes type
|
|
122
|
+
* @param test - The test containing the expected outcomes
|
|
123
|
+
* @returns Parsed expected outcomes
|
|
124
|
+
* @throws Error if expected outcomes is missing or invalid
|
|
125
|
+
* @protected
|
|
126
|
+
*/
|
|
127
|
+
protected parseExpectedOutcomes<T>(test: TestEntity): T;
|
|
128
|
+
/**
|
|
129
|
+
* Log execution progress.
|
|
130
|
+
*
|
|
131
|
+
* @param message - Log message
|
|
132
|
+
* @param verboseOnly - Whether to only log in verbose mode (default: false)
|
|
133
|
+
* @protected
|
|
134
|
+
*/
|
|
135
|
+
protected log(message: string, verboseOnly?: boolean): void;
|
|
136
|
+
/**
|
|
137
|
+
* Log errors.
|
|
138
|
+
*
|
|
139
|
+
* @param message - Error message
|
|
140
|
+
* @param error - Optional error object
|
|
141
|
+
* @protected
|
|
142
|
+
*/
|
|
143
|
+
protected logError(message: string, error?: Error): void;
|
|
144
|
+
}
|
|
145
|
+
//# sourceMappingURL=BaseTestDriver.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"BaseTestDriver.d.ts","sourceRoot":"","sources":["../../src/drivers/BaseTestDriver.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAEH,QAAQ,EAIX,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EACH,UAAU,EAEb,MAAM,+BAA+B,CAAC;AACvC,OAAO,EACH,sBAAsB,EACtB,qBAAqB,EACrB,YAAY,EACZ,cAAc,EACd,gBAAgB,EAGnB,MAAM,UAAU,CAAC;AAElB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,8BAAsB,cAAc;IAChC,SAAS,CAAC,SAAS,EAAE,QAAQ,CAAkB;IAE/C;;;;;;;;;;;;;;;;;;OAkBG;IACH,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,sBAAsB,GAAG,OAAO,CAAC,qBAAqB,CAAC;IAEjF;;;;;;;;OAQG;IACU,QAAQ,CAAC,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAyElE;;;;;;;;;;OAUG;IACH,SAAS,CAAC,cAAc,CACpB,aAAa,EAAE,YAAY,EAAE,EAC7B,OAAO,CAAC,EAAE,cAAc,GACzB,MAAM;IAwBT;;;;;;;;OAQG;IACH,SAAS,CAAC,eAAe,CAAC,aAAa,EAAE,YAAY,EAAE,GAAG,QAAQ,GAAG,QAAQ;IAQ7E;;;;;;;;;;;OAWG;IACH,SAAS,CAAC,WAAW,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,GAAG,CAAC;IAa7C;;;;;;;;OAQG;IACH,SAAS,CAAC,oBAAoB,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,GAAG,CAAC;IAatD;;;;;;;;OAQG;IACH,SAAS,CAAC,qBAAqB,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,GAAG,CAAC;IAavD;;;;;;OAMG;IACH,SAAS,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,WAAW,GAAE,OAAe,GAAG,IAAI;IAQlE;;;;;;OAMG;IACH,SAAS,CAAC,QAAQ,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,KAAK,GAAG,IAAI;CAG3D"}
|