@memberjunction/testing-engine 4.0.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +147 -376
- package/package.json +10 -10
package/README.md
CHANGED
|
@@ -1,419 +1,190 @@
|
|
|
1
|
-
#
|
|
1
|
+
# @memberjunction/testing-engine
|
|
2
2
|
|
|
3
|
-
Core execution engine for the MemberJunction Testing Framework. Provides the
|
|
3
|
+
Core test execution and evaluation engine for the MemberJunction Testing Framework. Provides the driver/oracle architecture for running tests, evaluating results with multiple oracle strategies, and recording outcomes.
|
|
4
4
|
|
|
5
5
|
## Architecture
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
TestEngine
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
7
|
+
```mermaid
|
|
8
|
+
graph TD
|
|
9
|
+
subgraph "@memberjunction/testing-engine"
|
|
10
|
+
A[TestEngine] --> B[Test Drivers]
|
|
11
|
+
A --> C[Oracles]
|
|
12
|
+
A --> D[Utilities]
|
|
13
|
+
|
|
14
|
+
B --> E[BaseTestDriver]
|
|
15
|
+
E --> F[AgentEvalDriver]
|
|
16
|
+
|
|
17
|
+
C --> G[ExactMatchOracle]
|
|
18
|
+
C --> H[LLMJudgeOracle]
|
|
19
|
+
C --> I[SchemaValidatorOracle]
|
|
20
|
+
C --> J[SQLValidatorOracle]
|
|
21
|
+
C --> K[TraceValidatorOracle]
|
|
22
|
+
|
|
23
|
+
D --> L[Variable Resolver]
|
|
24
|
+
D --> M[Scoring Calculator]
|
|
25
|
+
D --> N[Cost Calculator]
|
|
26
|
+
D --> O[Result Formatter]
|
|
27
|
+
D --> P[Execution Context]
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
subgraph "Base Layer"
|
|
31
|
+
Q["TestEngineBase<br/>(from engine-base)"]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
A -->|extends| Q
|
|
35
|
+
|
|
36
|
+
style A fill:#2d6a9f,stroke:#1a4971,color:#fff
|
|
37
|
+
style E fill:#2d8659,stroke:#1a5c3a,color:#fff
|
|
38
|
+
style F fill:#b8762f,stroke:#8a5722,color:#fff
|
|
39
|
+
style G fill:#7c5295,stroke:#563a6b,color:#fff
|
|
40
|
+
style H fill:#7c5295,stroke:#563a6b,color:#fff
|
|
41
|
+
style I fill:#7c5295,stroke:#563a6b,color:#fff
|
|
42
|
+
style J fill:#7c5295,stroke:#563a6b,color:#fff
|
|
43
|
+
style K fill:#7c5295,stroke:#563a6b,color:#fff
|
|
44
|
+
style Q fill:#2d6a9f,stroke:#1a4971,color:#fff
|
|
19
45
|
```
|
|
20
46
|
|
|
21
|
-
##
|
|
22
|
-
|
|
23
|
-
### 1. Test Engine (`TestEngine`)
|
|
24
|
-
Main orchestration layer responsible for:
|
|
25
|
-
- Loading test definitions from database
|
|
26
|
-
- Instantiating appropriate test drivers via ClassFactory
|
|
27
|
-
- Managing test execution lifecycle
|
|
28
|
-
- Aggregating and storing results
|
|
29
|
-
- Tracking costs and performance metrics
|
|
30
|
-
|
|
31
|
-
### 2. Base Test Driver (`BaseTestDriver`)
|
|
32
|
-
Abstract base class that all test drivers extend:
|
|
33
|
-
- Defines execution contract
|
|
34
|
-
- Provides common utilities (scoring, validation, result formatting)
|
|
35
|
-
- Handles error management
|
|
36
|
-
- Manages test context
|
|
37
|
-
|
|
38
|
-
### 3. Oracle System
|
|
39
|
-
Pluggable evaluation components:
|
|
40
|
-
- **SchemaValidator** - Validates output structure
|
|
41
|
-
- **TraceValidator** - Checks execution traces for errors
|
|
42
|
-
- **LLMJudge** - Uses LLM to evaluate quality
|
|
43
|
-
- **ExactMatcher** - Deterministic output comparison
|
|
44
|
-
- **SQLValidator** - Database state verification
|
|
45
|
-
|
|
46
|
-
### 4. Result Management
|
|
47
|
-
Structured result capture and storage:
|
|
48
|
-
- TestRun entity management
|
|
49
|
-
- Metric collection and aggregation
|
|
50
|
-
- Artifact storage coordination
|
|
51
|
-
- Cost tracking
|
|
52
|
-
|
|
53
|
-
## Class Structure
|
|
54
|
-
|
|
55
|
-
### TestEngine
|
|
47
|
+
## Overview
|
|
56
48
|
|
|
57
|
-
|
|
58
|
-
export class TestEngine {
|
|
59
|
-
/**
|
|
60
|
-
* Run a single test by ID
|
|
61
|
-
*/
|
|
62
|
-
async runTest(
|
|
63
|
-
testId: string,
|
|
64
|
-
contextUser: UserInfo,
|
|
65
|
-
options?: TestRunOptions
|
|
66
|
-
): Promise<TestRunResult>
|
|
67
|
-
|
|
68
|
-
/**
|
|
69
|
-
* Run an entire test suite
|
|
70
|
-
*/
|
|
71
|
-
async runSuite(
|
|
72
|
-
suiteId: string,
|
|
73
|
-
contextUser: UserInfo,
|
|
74
|
-
options?: SuiteRunOptions
|
|
75
|
-
): Promise<SuiteRunResult>
|
|
76
|
-
|
|
77
|
-
/**
|
|
78
|
-
* Validate test definition without executing
|
|
79
|
-
*/
|
|
80
|
-
async validateTest(testId: string): Promise<ValidationResult>
|
|
81
|
-
}
|
|
82
|
-
```
|
|
49
|
+
This is the **server-side execution engine** for MemberJunction tests. It implements a plugin-based architecture where:
|
|
83
50
|
|
|
84
|
-
|
|
51
|
+
- **Drivers** handle test execution (running agents, calling APIs, etc.)
|
|
52
|
+
- **Oracles** evaluate test outputs against expected outcomes
|
|
53
|
+
- **Utilities** handle variable resolution, scoring, cost tracking, and result formatting
|
|
85
54
|
|
|
86
|
-
|
|
87
|
-
export abstract class BaseTestDriver {
|
|
88
|
-
/**
|
|
89
|
-
* Execute the test and return results
|
|
90
|
-
* Must be implemented by concrete drivers
|
|
91
|
-
*/
|
|
92
|
-
abstract execute(
|
|
93
|
-
test: TestEntity,
|
|
94
|
-
contextUser: UserInfo,
|
|
95
|
-
options: ExecutionOptions
|
|
96
|
-
): Promise<DriverExecutionResult>
|
|
97
|
-
|
|
98
|
-
/**
|
|
99
|
-
* Validate test configuration
|
|
100
|
-
* Can be overridden by concrete drivers
|
|
101
|
-
*/
|
|
102
|
-
async validate(test: TestEntity): Promise<ValidationResult>
|
|
103
|
-
|
|
104
|
-
/**
|
|
105
|
-
* Calculate overall score from oracle results
|
|
106
|
-
*/
|
|
107
|
-
protected calculateScore(
|
|
108
|
-
oracleResults: OracleResult[],
|
|
109
|
-
weights: ScoringWeights
|
|
110
|
-
): number
|
|
111
|
-
|
|
112
|
-
/**
|
|
113
|
-
* Store execution results to database
|
|
114
|
-
*/
|
|
115
|
-
protected async storeResults(
|
|
116
|
-
testRun: TestRunEntity,
|
|
117
|
-
results: DriverExecutionResult
|
|
118
|
-
): Promise<void>
|
|
119
|
-
}
|
|
120
|
-
```
|
|
55
|
+
**Key capabilities:**
|
|
121
56
|
|
|
122
|
-
|
|
57
|
+
- Execute individual tests or full test suites with parallel/sequential modes
|
|
58
|
+
- Multiple oracle strategies for evaluating test results
|
|
59
|
+
- Variable resolution system with priority cascading (run > suite > test > type)
|
|
60
|
+
- Automatic cost and duration tracking
|
|
61
|
+
- Progress callbacks for real-time execution monitoring
|
|
62
|
+
- Result persistence to `MJ: Test Runs` and `MJ: Test Suite Runs` entities
|
|
123
63
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
*/
|
|
129
|
-
readonly type: string;
|
|
130
|
-
|
|
131
|
-
/**
|
|
132
|
-
* Execute the oracle evaluation
|
|
133
|
-
*/
|
|
134
|
-
evaluate(
|
|
135
|
-
input: OracleInput,
|
|
136
|
-
config: OracleConfig
|
|
137
|
-
): Promise<OracleResult>
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
export interface OracleResult {
|
|
141
|
-
oracleType: string;
|
|
142
|
-
passed: boolean;
|
|
143
|
-
score?: number;
|
|
144
|
-
message: string;
|
|
145
|
-
details?: any;
|
|
146
|
-
}
|
|
64
|
+
## Installation
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
npm install @memberjunction/testing-engine
|
|
147
68
|
```
|
|
148
69
|
|
|
149
|
-
##
|
|
70
|
+
## Core Concepts
|
|
71
|
+
|
|
72
|
+
### Driver/Oracle Pattern
|
|
73
|
+
|
|
74
|
+
```mermaid
|
|
75
|
+
graph LR
|
|
76
|
+
A[Test Definition] --> B[Driver]
|
|
77
|
+
B -->|Executes| C[Target System]
|
|
78
|
+
C -->|Output| D[Oracle 1]
|
|
79
|
+
C -->|Output| E[Oracle 2]
|
|
80
|
+
C -->|Output| F[Oracle N]
|
|
81
|
+
D --> G[Combined Score]
|
|
82
|
+
E --> G
|
|
83
|
+
F --> G
|
|
84
|
+
|
|
85
|
+
style A fill:#2d6a9f,stroke:#1a4971,color:#fff
|
|
86
|
+
style B fill:#2d8659,stroke:#1a5c3a,color:#fff
|
|
87
|
+
style D fill:#7c5295,stroke:#563a6b,color:#fff
|
|
88
|
+
style E fill:#7c5295,stroke:#563a6b,color:#fff
|
|
89
|
+
style F fill:#7c5295,stroke:#563a6b,color:#fff
|
|
90
|
+
style G fill:#b8762f,stroke:#8a5722,color:#fff
|
|
91
|
+
```
|
|
150
92
|
|
|
151
|
-
The
|
|
93
|
+
1. The **driver** executes the test target (e.g., runs an AI agent)
|
|
94
|
+
2. Multiple **oracles** independently evaluate the output
|
|
95
|
+
3. Scores are combined using configurable weights
|
|
152
96
|
|
|
153
|
-
|
|
154
|
-
export class AgentEvalDriver extends BaseTestDriver {
|
|
155
|
-
async execute(
|
|
156
|
-
test: TestEntity,
|
|
157
|
-
contextUser: UserInfo,
|
|
158
|
-
options: ExecutionOptions
|
|
159
|
-
): Promise<DriverExecutionResult> {
|
|
160
|
-
// 1. Parse test configuration
|
|
161
|
-
const config = this.parseConfig(test);
|
|
162
|
-
|
|
163
|
-
// 2. Execute agent with test inputs
|
|
164
|
-
const agentRun = await this.executeAgent(config.inputs, contextUser);
|
|
165
|
-
|
|
166
|
-
// 3. Run oracles to evaluate output
|
|
167
|
-
const oracleResults = await this.runOracles(
|
|
168
|
-
agentRun,
|
|
169
|
-
config.oracles,
|
|
170
|
-
config.expectedOutcomes
|
|
171
|
-
);
|
|
172
|
-
|
|
173
|
-
// 4. Calculate score based on weights
|
|
174
|
-
const score = this.calculateScore(oracleResults, config.weights);
|
|
175
|
-
|
|
176
|
-
// 5. Return structured results
|
|
177
|
-
return {
|
|
178
|
-
targetType: 'Agent Run',
|
|
179
|
-
targetLogId: agentRun.ID,
|
|
180
|
-
status: this.determineStatus(oracleResults),
|
|
181
|
-
score,
|
|
182
|
-
oracleResults,
|
|
183
|
-
cost: this.calculateCost(agentRun),
|
|
184
|
-
duration: this.calculateDuration(agentRun)
|
|
185
|
-
};
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
private async runOracles(
|
|
189
|
-
agentRun: AIAgentRunEntity,
|
|
190
|
-
oracleConfigs: OracleConfig[],
|
|
191
|
-
expectedOutcomes: any
|
|
192
|
-
): Promise<OracleResult[]> {
|
|
193
|
-
const results: OracleResult[] = [];
|
|
194
|
-
|
|
195
|
-
for (const config of oracleConfigs) {
|
|
196
|
-
const oracle = this.getOracle(config.type);
|
|
197
|
-
const result = await oracle.evaluate({
|
|
198
|
-
agentRun,
|
|
199
|
-
expectedOutcomes
|
|
200
|
-
}, config);
|
|
201
|
-
results.push(result);
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
return results;
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
```
|
|
97
|
+
### Test Drivers
|
|
208
98
|
|
|
209
|
-
|
|
99
|
+
| Driver | Description |
|
|
100
|
+
|--------|-------------|
|
|
101
|
+
| `BaseTestDriver` | Abstract base class for all test drivers |
|
|
102
|
+
| `AgentEvalDriver` | Executes AI agents and captures their outputs |
|
|
210
103
|
|
|
211
|
-
###
|
|
212
|
-
```typescript
|
|
213
|
-
export class SchemaValidatorOracle implements IOracle {
|
|
214
|
-
readonly type = 'schema-validate';
|
|
215
|
-
|
|
216
|
-
async evaluate(input: OracleInput, config: OracleConfig): Promise<OracleResult> {
|
|
217
|
-
const { agentRun } = input;
|
|
218
|
-
const schema = this.loadSchema(config.schema);
|
|
219
|
-
|
|
220
|
-
try {
|
|
221
|
-
schema.parse(agentRun.ResultPayload);
|
|
222
|
-
return {
|
|
223
|
-
oracleType: this.type,
|
|
224
|
-
passed: true,
|
|
225
|
-
score: 1.0,
|
|
226
|
-
message: 'Output matches schema'
|
|
227
|
-
};
|
|
228
|
-
} catch (error) {
|
|
229
|
-
return {
|
|
230
|
-
oracleType: this.type,
|
|
231
|
-
passed: false,
|
|
232
|
-
score: 0.0,
|
|
233
|
-
message: `Schema validation failed: ${error.message}`,
|
|
234
|
-
details: error.errors
|
|
235
|
-
};
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
```
|
|
104
|
+
### Oracles
|
|
240
105
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
// Load agent run steps
|
|
250
|
-
const rv = new RunView();
|
|
251
|
-
const stepsResult = await rv.RunView<AIAgentRunStepEntity>({
|
|
252
|
-
EntityName: 'MJ: AI Agent Run Steps',
|
|
253
|
-
ExtraFilter: `AgentRunID='${agentRun.ID}'`,
|
|
254
|
-
ResultType: 'entity_object'
|
|
255
|
-
});
|
|
256
|
-
|
|
257
|
-
if (!stepsResult.Success) {
|
|
258
|
-
throw new Error(`Failed to load agent run steps: ${stepsResult.ErrorMessage}`);
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
const steps = stepsResult.Results || [];
|
|
262
|
-
const errorSteps = steps.filter(s => s.Status === 'Error' || s.Status === 'Failed');
|
|
263
|
-
|
|
264
|
-
if (errorSteps.length === 0) {
|
|
265
|
-
return {
|
|
266
|
-
oracleType: this.type,
|
|
267
|
-
passed: true,
|
|
268
|
-
score: 1.0,
|
|
269
|
-
message: `All ${steps.length} steps completed without errors`
|
|
270
|
-
};
|
|
271
|
-
} else {
|
|
272
|
-
return {
|
|
273
|
-
oracleType: this.type,
|
|
274
|
-
passed: false,
|
|
275
|
-
score: 0.0,
|
|
276
|
-
message: `${errorSteps.length} of ${steps.length} steps had errors`,
|
|
277
|
-
details: errorSteps.map(s => ({
|
|
278
|
-
stepId: s.ID,
|
|
279
|
-
sequence: s.Sequence,
|
|
280
|
-
error: s.Notes
|
|
281
|
-
}))
|
|
282
|
-
};
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
```
|
|
106
|
+
| Oracle | Description |
|
|
107
|
+
|--------|-------------|
|
|
108
|
+
| `ExactMatchOracle` | Compares output against an expected string |
|
|
109
|
+
| `LLMJudgeOracle` | Uses an LLM to evaluate output quality with rubrics |
|
|
110
|
+
| `SchemaValidatorOracle` | Validates output against a JSON schema |
|
|
111
|
+
| `SQLValidatorOracle` | Validates output by running SQL queries |
|
|
112
|
+
| `TraceValidatorOracle` | Validates execution trace/steps of an agent run |
|
|
287
113
|
|
|
288
|
-
###
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
const { agentRun, expectedOutcomes } = input;
|
|
295
|
-
|
|
296
|
-
// Load rubric
|
|
297
|
-
const md = new Metadata();
|
|
298
|
-
const rubric = await md.GetEntityObject<TestRubricEntity>('Test Rubrics');
|
|
299
|
-
await rubric.Load(config.rubricId);
|
|
300
|
-
|
|
301
|
-
// Build evaluation prompt
|
|
302
|
-
const prompt = this.buildPrompt(rubric, agentRun, expectedOutcomes);
|
|
303
|
-
|
|
304
|
-
// Execute LLM evaluation
|
|
305
|
-
const judgmentResult = await this.executeJudgment(prompt);
|
|
306
|
-
|
|
307
|
-
// Parse structured response
|
|
308
|
-
const scores = this.parseScores(judgmentResult, rubric.Criteria);
|
|
309
|
-
|
|
310
|
-
// Calculate weighted score
|
|
311
|
-
const overallScore = this.calculateWeightedScore(scores, rubric.Criteria);
|
|
312
|
-
|
|
313
|
-
return {
|
|
314
|
-
oracleType: this.type,
|
|
315
|
-
passed: overallScore >= (config.threshold || 0.7),
|
|
316
|
-
score: overallScore,
|
|
317
|
-
message: `LLM judge score: ${(overallScore * 100).toFixed(1)}%`,
|
|
318
|
-
details: {
|
|
319
|
-
rubricId: rubric.ID,
|
|
320
|
-
rubricVersion: rubric.Version,
|
|
321
|
-
dimensionScores: scores,
|
|
322
|
-
judgmentNotes: judgmentResult.notes
|
|
323
|
-
}
|
|
324
|
-
};
|
|
325
|
-
}
|
|
326
|
-
}
|
|
114
|
+
### Variable Resolution
|
|
115
|
+
|
|
116
|
+
Variables cascade through four levels with the highest priority winning:
|
|
117
|
+
|
|
118
|
+
```
|
|
119
|
+
Run Variables > Suite Variables > Test Variables > Type Variables
|
|
327
120
|
```
|
|
328
121
|
|
|
329
|
-
|
|
122
|
+
The resolver tracks the source of each resolved value for auditing.
|
|
330
123
|
|
|
331
|
-
|
|
332
|
-
import { TestEngine } from '@memberjunction/testing-engine';
|
|
333
|
-
import { getSystemUser } from '@memberjunction/core';
|
|
124
|
+
## Usage
|
|
334
125
|
|
|
335
|
-
|
|
336
|
-
const engine = new TestEngine();
|
|
126
|
+
### Running a Single Test
|
|
337
127
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
const result = await engine.runTest('test-id-123', contextUser, {
|
|
341
|
-
dryRun: false,
|
|
342
|
-
environment: 'staging'
|
|
343
|
-
});
|
|
128
|
+
```typescript
|
|
129
|
+
import { TestEngine } from '@memberjunction/testing-engine';
|
|
344
130
|
|
|
345
|
-
|
|
346
|
-
|
|
131
|
+
const engine = TestEngine.Instance;
|
|
132
|
+
await engine.Config(false, contextUser);
|
|
347
133
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
134
|
+
const result = await engine.RunTest(testId, contextUser, {
|
|
135
|
+
verbose: true,
|
|
136
|
+
variables: { AIConfiguration: 'gpt-4o' },
|
|
137
|
+
progressCallback: (p) => console.log(`${p.percentage}%: ${p.message}`)
|
|
352
138
|
});
|
|
353
139
|
|
|
354
|
-
console.log(`
|
|
140
|
+
console.log(`Status: ${result.status}, Score: ${result.score}`);
|
|
355
141
|
```
|
|
356
142
|
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
### Adding New Test Types
|
|
143
|
+
### Running a Test Suite
|
|
360
144
|
|
|
361
|
-
1. Create driver class extending `BaseTestDriver`:
|
|
362
145
|
```typescript
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
2. Register with ClassFactory:
|
|
371
|
-
```typescript
|
|
372
|
-
MJGlobal.Instance.ClassFactory.Register(
|
|
373
|
-
BaseTestDriver,
|
|
374
|
-
'WorkflowScenarioDriver',
|
|
375
|
-
WorkflowScenarioDriver
|
|
376
|
-
);
|
|
377
|
-
```
|
|
146
|
+
const suiteResult = await engine.RunSuite(suiteId, contextUser, {
|
|
147
|
+
parallel: true,
|
|
148
|
+
maxParallel: 5,
|
|
149
|
+
failFast: false,
|
|
150
|
+
variables: { Temperature: 0.3 }
|
|
151
|
+
});
|
|
378
152
|
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
INSERT INTO TestType (Name, DriverClass, Status)
|
|
382
|
-
VALUES ('Workflow Scenario', 'WorkflowScenarioDriver', 'Active');
|
|
153
|
+
console.log(`Passed: ${suiteResult.passedTests}/${suiteResult.totalTests}`);
|
|
154
|
+
console.log(`Average Score: ${suiteResult.averageScore}`);
|
|
383
155
|
```
|
|
384
156
|
|
|
385
|
-
|
|
157
|
+
## Utilities
|
|
386
158
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
159
|
+
| Utility | Description |
|
|
160
|
+
|---------|-------------|
|
|
161
|
+
| `variable-resolver` | Resolves variables through the priority cascade |
|
|
162
|
+
| `scoring` | Combines oracle scores using weighted averages |
|
|
163
|
+
| `cost-calculator` | Tracks and sums execution costs |
|
|
164
|
+
| `result-formatter` | Formats test results for storage and display |
|
|
165
|
+
| `execution-context` | Captures environment details (OS, Node.js version, CI/CD info) |
|
|
391
166
|
|
|
392
|
-
|
|
393
|
-
// Your evaluation logic
|
|
394
|
-
}
|
|
395
|
-
}
|
|
396
|
-
```
|
|
167
|
+
## Testing
|
|
397
168
|
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
169
|
+
```bash
|
|
170
|
+
npm test
|
|
171
|
+
npm run test:coverage
|
|
401
172
|
```
|
|
402
173
|
|
|
403
|
-
##
|
|
174
|
+
## Dependencies
|
|
404
175
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
176
|
+
| Package | Purpose |
|
|
177
|
+
|---------|---------|
|
|
178
|
+
| `@memberjunction/testing-engine-base` | Base engine, metadata caching, shared types |
|
|
179
|
+
| `@memberjunction/ai` | AI model access for LLM oracles |
|
|
180
|
+
| `@memberjunction/ai-agents` | Agent execution for AgentEvalDriver |
|
|
181
|
+
| `@memberjunction/ai-prompts` | Prompt execution |
|
|
182
|
+
| `@memberjunction/core` | Metadata, RunView, UserInfo |
|
|
183
|
+
| `@memberjunction/core-entities` | Test entity types |
|
|
184
|
+
| `@memberjunction/global` | Class factory |
|
|
185
|
+
| `zod` | Schema validation |
|
|
186
|
+
| `rxjs` | Observable patterns |
|
|
410
187
|
|
|
411
|
-
##
|
|
188
|
+
## License
|
|
412
189
|
|
|
413
|
-
|
|
414
|
-
- [ ] Test retry logic with exponential backoff
|
|
415
|
-
- [ ] Result caching for expensive evaluations
|
|
416
|
-
- [ ] Composite oracles (AND/OR logic)
|
|
417
|
-
- [ ] Dataset-driven test execution (run same test with multiple inputs)
|
|
418
|
-
- [ ] Comparative evaluation (A/B testing different agent versions)
|
|
419
|
-
- [ ] Replay mode (re-evaluate existing agent runs)
|
|
190
|
+
ISC
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@memberjunction/testing-engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "4.
|
|
4
|
+
"version": "4.1.0",
|
|
5
5
|
"description": "MemberJunction Testing Framework Engine - Core test execution and evaluation engine supporting multiple test types",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
@@ -26,15 +26,15 @@
|
|
|
26
26
|
"ts-jest": "^29.4.6"
|
|
27
27
|
},
|
|
28
28
|
"dependencies": {
|
|
29
|
-
"@memberjunction/ai": "4.
|
|
30
|
-
"@memberjunction/ai-agents": "4.
|
|
31
|
-
"@memberjunction/ai-core-plus": "4.
|
|
32
|
-
"@memberjunction/ai-prompts": "4.
|
|
33
|
-
"@memberjunction/aiengine": "4.
|
|
34
|
-
"@memberjunction/core": "4.
|
|
35
|
-
"@memberjunction/core-entities": "4.
|
|
36
|
-
"@memberjunction/global": "4.
|
|
37
|
-
"@memberjunction/testing-engine-base": "4.
|
|
29
|
+
"@memberjunction/ai": "4.1.0",
|
|
30
|
+
"@memberjunction/ai-agents": "4.1.0",
|
|
31
|
+
"@memberjunction/ai-core-plus": "4.1.0",
|
|
32
|
+
"@memberjunction/ai-prompts": "4.1.0",
|
|
33
|
+
"@memberjunction/aiengine": "4.1.0",
|
|
34
|
+
"@memberjunction/core": "4.1.0",
|
|
35
|
+
"@memberjunction/core-entities": "4.1.0",
|
|
36
|
+
"@memberjunction/global": "4.1.0",
|
|
37
|
+
"@memberjunction/testing-engine-base": "4.1.0",
|
|
38
38
|
"debug": "^4.4.3",
|
|
39
39
|
"rxjs": "^7.8.2",
|
|
40
40
|
"zod": "~3.24.4"
|