@prompd/test 0.5.0-beta.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/EvaluatorEngine.d.ts +32 -0
- package/dist/EvaluatorEngine.d.ts.map +1 -0
- package/dist/EvaluatorEngine.js +97 -0
- package/dist/TestDiscovery.d.ts +28 -0
- package/dist/TestDiscovery.d.ts.map +1 -0
- package/dist/TestDiscovery.js +137 -0
- package/dist/TestParser.d.ts +25 -0
- package/dist/TestParser.d.ts.map +1 -0
- package/dist/TestParser.js +187 -0
- package/dist/TestRunner.d.ts +57 -0
- package/dist/TestRunner.d.ts.map +1 -0
- package/dist/TestRunner.js +463 -0
- package/dist/cli-types.d.ts +62 -0
- package/dist/cli-types.d.ts.map +1 -0
- package/dist/cli-types.js +6 -0
- package/dist/evaluators/NlpEvaluator.d.ts +26 -0
- package/dist/evaluators/NlpEvaluator.d.ts.map +1 -0
- package/dist/evaluators/NlpEvaluator.js +145 -0
- package/dist/evaluators/PrmdEvaluator.d.ts +42 -0
- package/dist/evaluators/PrmdEvaluator.d.ts.map +1 -0
- package/dist/evaluators/PrmdEvaluator.js +265 -0
- package/dist/evaluators/ScriptEvaluator.d.ts +19 -0
- package/dist/evaluators/ScriptEvaluator.d.ts.map +1 -0
- package/dist/evaluators/ScriptEvaluator.js +161 -0
- package/dist/evaluators/types.d.ts +19 -0
- package/dist/evaluators/types.d.ts.map +1 -0
- package/dist/evaluators/types.js +5 -0
- package/dist/index.d.ts +25 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +33 -0
- package/dist/reporters/ConsoleReporter.d.ts +17 -0
- package/dist/reporters/ConsoleReporter.d.ts.map +1 -0
- package/dist/reporters/ConsoleReporter.js +85 -0
- package/dist/reporters/JsonReporter.d.ts +11 -0
- package/dist/reporters/JsonReporter.d.ts.map +1 -0
- package/dist/reporters/JsonReporter.js +18 -0
- package/dist/reporters/JunitReporter.d.ts +15 -0
- package/dist/reporters/JunitReporter.d.ts.map +1 -0
- package/dist/reporters/JunitReporter.js +89 -0
- package/dist/reporters/types.d.ts +8 -0
- package/dist/reporters/types.d.ts.map +1 -0
- package/dist/reporters/types.js +5 -0
- package/dist/types.d.ts +115 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +5 -0
- package/package.json +34 -0
- package/src/EvaluatorEngine.ts +130 -0
- package/src/TestDiscovery.ts +133 -0
- package/src/TestParser.ts +235 -0
- package/src/TestRunner.ts +516 -0
- package/src/cli-types.ts +92 -0
- package/src/evaluators/NlpEvaluator.ts +184 -0
- package/src/evaluators/PrmdEvaluator.ts +284 -0
- package/src/evaluators/ScriptEvaluator.ts +149 -0
- package/src/evaluators/types.ts +24 -0
- package/src/index.ts +76 -0
- package/src/reporters/ConsoleReporter.ts +100 -0
- package/src/reporters/JsonReporter.ts +21 -0
- package/src/reporters/JunitReporter.ts +113 -0
- package/src/reporters/types.ts +9 -0
- package/src/types.ts +133 -0
- package/tsconfig.json +20 -0
|
@@ -0,0 +1,516 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TestRunner - orchestrates the full test lifecycle:
|
|
3
|
+
* discovery -> compile -> execute -> evaluate -> report
|
|
4
|
+
*
|
|
5
|
+
* Consumes @prompd/cli for compilation and execution.
|
|
6
|
+
* This is the primary public API for @prompd/test.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import * as path from 'path';
|
|
10
|
+
import * as fs from 'fs';
|
|
11
|
+
import { TestDiscovery } from './TestDiscovery';
|
|
12
|
+
import { EvaluatorEngine } from './EvaluatorEngine';
|
|
13
|
+
import { ConsoleReporter } from './reporters/ConsoleReporter';
|
|
14
|
+
import { JsonReporter } from './reporters/JsonReporter';
|
|
15
|
+
import { JunitReporter } from './reporters/JunitReporter';
|
|
16
|
+
import type { Reporter } from './reporters/types';
|
|
17
|
+
import type { EvaluatorContext } from './evaluators/types';
|
|
18
|
+
import type { CompilerModule } from './cli-types';
|
|
19
|
+
import type { TestHarness } from '@prompd/cli';
|
|
20
|
+
import type {
|
|
21
|
+
TestSuite,
|
|
22
|
+
TestCase,
|
|
23
|
+
TestResult,
|
|
24
|
+
TestRunResult,
|
|
25
|
+
TestSuiteResult,
|
|
26
|
+
TestRunSummary,
|
|
27
|
+
TestRunOptions,
|
|
28
|
+
TestProgressCallback,
|
|
29
|
+
EvaluatorType,
|
|
30
|
+
} from './types';
|
|
31
|
+
|
|
32
|
+
export class TestRunner implements TestHarness {
|
|
33
|
+
private discovery: TestDiscovery;
|
|
34
|
+
private cliModule: CompilerModule | null = null;
|
|
35
|
+
private configLoaded = false;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* @param cli - Optional pre-loaded @prompd/cli module. If provided, skips dynamic import.
|
|
39
|
+
* This is the recommended approach when running inside Electron where the CLI
|
|
40
|
+
* is already loaded by the main process.
|
|
41
|
+
*/
|
|
42
|
+
constructor(cli?: CompilerModule) {
|
|
43
|
+
this.discovery = new TestDiscovery();
|
|
44
|
+
if (cli) {
|
|
45
|
+
this.cliModule = cli;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Ensure CLI config is loaded (API keys, provider settings).
|
|
51
|
+
* Called once before any execution.
|
|
52
|
+
*/
|
|
53
|
+
private async ensureConfig(): Promise<void> {
|
|
54
|
+
if (this.configLoaded) return;
|
|
55
|
+
const cli = await this.getCli();
|
|
56
|
+
try {
|
|
57
|
+
const configManager = new cli.ConfigManager();
|
|
58
|
+
console.log('[TestRunner] Loading config...');
|
|
59
|
+
// loadConfig() is async — must await it
|
|
60
|
+
if (configManager.loadConfig) {
|
|
61
|
+
await configManager.loadConfig();
|
|
62
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
63
|
+
const cfg = (configManager as any).config;
|
|
64
|
+
console.log('[TestRunner] Config loaded:', cfg ? Object.keys(cfg) : 'null');
|
|
65
|
+
console.log('[TestRunner] API keys:', cfg?.apiKeys ? Object.keys(cfg.apiKeys).filter((k: string) => cfg.apiKeys[k]) : 'none');
|
|
66
|
+
console.log('[TestRunner] Default provider:', cfg?.defaultProvider);
|
|
67
|
+
} else if (configManager.load) {
|
|
68
|
+
await configManager.load();
|
|
69
|
+
console.log('[TestRunner] Config loaded via load()');
|
|
70
|
+
}
|
|
71
|
+
this.configLoaded = true;
|
|
72
|
+
} catch (err) {
|
|
73
|
+
console.error('[TestRunner] Config load failed:', err);
|
|
74
|
+
// Config may not exist — that's OK for --no-llm runs
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Run tests for a target path (file or directory).
|
|
80
|
+
* Returns structured results and an exit code (0 = all pass, 1 = failures).
|
|
81
|
+
*/
|
|
82
|
+
async run(
|
|
83
|
+
targetPath: string,
|
|
84
|
+
options: TestRunOptions = {},
|
|
85
|
+
onProgress?: TestProgressCallback
|
|
86
|
+
): Promise<TestRunResult> {
|
|
87
|
+
const startTime = Date.now();
|
|
88
|
+
|
|
89
|
+
// 1. Discovery
|
|
90
|
+
const { suites, errors: discoveryErrors } = await this.discovery.discover(targetPath);
|
|
91
|
+
|
|
92
|
+
if (discoveryErrors.length > 0 && suites.length === 0) {
|
|
93
|
+
return this.buildErrorResult(discoveryErrors.map(e => e.message), startTime);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// 2. Run each suite
|
|
97
|
+
const suiteResults: TestSuiteResult[] = [];
|
|
98
|
+
|
|
99
|
+
for (const suite of suites) {
|
|
100
|
+
onProgress?.({ type: 'suite_start', suite: suite.name, testCount: suite.tests.length });
|
|
101
|
+
|
|
102
|
+
const results = await this.runSuite(suite, options, onProgress);
|
|
103
|
+
suiteResults.push({
|
|
104
|
+
suite: suite.name,
|
|
105
|
+
testFilePath: suite.testFilePath,
|
|
106
|
+
results,
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
onProgress?.({ type: 'suite_complete', suite: suite.name, results });
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// 3. Build summary
|
|
113
|
+
const summary = this.buildSummary(suiteResults, startTime);
|
|
114
|
+
|
|
115
|
+
return { suites: suiteResults, summary };
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Run tests and return formatted output string.
|
|
120
|
+
*/
|
|
121
|
+
async runAndReport(
|
|
122
|
+
targetPath: string,
|
|
123
|
+
options: TestRunOptions = {},
|
|
124
|
+
onProgress?: TestProgressCallback
|
|
125
|
+
): Promise<{ output: string; exitCode: number }> {
|
|
126
|
+
const result = await this.run(targetPath, options, onProgress);
|
|
127
|
+
const reporter = this.getReporter(options);
|
|
128
|
+
const output = reporter.report(result);
|
|
129
|
+
const exitCode = (result.summary.failed > 0 || result.summary.errors > 0) ? 1 : 0;
|
|
130
|
+
return { output, exitCode };
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
private async runSuite(
|
|
134
|
+
suite: TestSuite,
|
|
135
|
+
options: TestRunOptions,
|
|
136
|
+
onProgress?: TestProgressCallback
|
|
137
|
+
): Promise<TestResult[]> {
|
|
138
|
+
const results: TestResult[] = [];
|
|
139
|
+
const allowedEvaluators = this.resolveAllowedEvaluators(options);
|
|
140
|
+
|
|
141
|
+
for (const testCase of suite.tests) {
|
|
142
|
+
onProgress?.({ type: 'test_start', suite: suite.name, testName: testCase.name });
|
|
143
|
+
|
|
144
|
+
const result = await this.runTestCase(suite, testCase, allowedEvaluators, options, onProgress);
|
|
145
|
+
results.push(result);
|
|
146
|
+
|
|
147
|
+
onProgress?.({ type: 'test_complete', suite: suite.name, testName: testCase.name, result });
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return results;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
private async runTestCase(
|
|
154
|
+
suite: TestSuite,
|
|
155
|
+
testCase: TestCase,
|
|
156
|
+
allowedEvaluators: EvaluatorType[],
|
|
157
|
+
options: TestRunOptions,
|
|
158
|
+
onProgress?: TestProgressCallback
|
|
159
|
+
): Promise<TestResult> {
|
|
160
|
+
const start = Date.now();
|
|
161
|
+
|
|
162
|
+
// Step 1: Compile the target .prmd with test params
|
|
163
|
+
let compiledOutput: string;
|
|
164
|
+
let promptMetadata: Record<string, unknown> = {};
|
|
165
|
+
try {
|
|
166
|
+
const compileResult = await this.compileTarget(suite.target, testCase.params, options);
|
|
167
|
+
compiledOutput = compileResult.compiled;
|
|
168
|
+
promptMetadata = compileResult.metadata;
|
|
169
|
+
} catch (err) {
|
|
170
|
+
const errorMessage = err instanceof Error ? err.message : String(err);
|
|
171
|
+
|
|
172
|
+
// If expect_error is set, compilation failure is a PASS
|
|
173
|
+
if (testCase.expect_error) {
|
|
174
|
+
return {
|
|
175
|
+
suite: suite.name,
|
|
176
|
+
testName: testCase.name,
|
|
177
|
+
status: 'pass',
|
|
178
|
+
duration: Date.now() - start,
|
|
179
|
+
assertions: [],
|
|
180
|
+
error: `Expected error occurred: ${errorMessage}`,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return {
|
|
185
|
+
suite: suite.name,
|
|
186
|
+
testName: testCase.name,
|
|
187
|
+
status: 'error',
|
|
188
|
+
duration: Date.now() - start,
|
|
189
|
+
assertions: [],
|
|
190
|
+
error: `Compilation failed: ${errorMessage}`,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// If expect_error was set but compilation succeeded, that's a failure
|
|
195
|
+
if (testCase.expect_error) {
|
|
196
|
+
return {
|
|
197
|
+
suite: suite.name,
|
|
198
|
+
testName: testCase.name,
|
|
199
|
+
status: 'fail',
|
|
200
|
+
duration: Date.now() - start,
|
|
201
|
+
assertions: [],
|
|
202
|
+
compiledInput: compiledOutput,
|
|
203
|
+
error: 'Expected compilation to fail, but it succeeded',
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Step 2: Execute against LLM (unless --no-llm)
|
|
208
|
+
let llmOutput = '';
|
|
209
|
+
let provider = 'none';
|
|
210
|
+
let model = 'none';
|
|
211
|
+
let execDuration = 0;
|
|
212
|
+
let usage: { promptTokens?: number; completionTokens?: number; totalTokens?: number } | undefined;
|
|
213
|
+
|
|
214
|
+
if (!options.noLlm) {
|
|
215
|
+
try {
|
|
216
|
+
const execResult = await this.executePrompt(compiledOutput, promptMetadata, options);
|
|
217
|
+
llmOutput = execResult.response;
|
|
218
|
+
provider = execResult.provider;
|
|
219
|
+
model = execResult.model;
|
|
220
|
+
execDuration = execResult.duration;
|
|
221
|
+
usage = execResult.usage;
|
|
222
|
+
} catch (err) {
|
|
223
|
+
return {
|
|
224
|
+
suite: suite.name,
|
|
225
|
+
testName: testCase.name,
|
|
226
|
+
status: 'error',
|
|
227
|
+
duration: Date.now() - start,
|
|
228
|
+
assertions: [],
|
|
229
|
+
compiledInput: compiledOutput,
|
|
230
|
+
error: `Execution failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
} else {
|
|
234
|
+
// In --no-llm mode, use the compiled output as the "output" for NLP checks
|
|
235
|
+
// This enables structural assertions against the compiled prompt itself
|
|
236
|
+
llmOutput = compiledOutput;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
const execution = !options.noLlm ? { provider, model, duration: execDuration, usage } : undefined;
|
|
240
|
+
|
|
241
|
+
// Step 3: Run evaluations
|
|
242
|
+
if (testCase.assert.length === 0) {
|
|
243
|
+
return {
|
|
244
|
+
suite: suite.name,
|
|
245
|
+
testName: testCase.name,
|
|
246
|
+
status: 'pass',
|
|
247
|
+
duration: Date.now() - start,
|
|
248
|
+
assertions: [],
|
|
249
|
+
output: llmOutput,
|
|
250
|
+
compiledInput: compiledOutput,
|
|
251
|
+
execution,
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const engine = new EvaluatorEngine({
|
|
256
|
+
testFileDir: path.dirname(suite.testFilePath),
|
|
257
|
+
evaluatorPrompt: suite.evaluatorPrompt,
|
|
258
|
+
workspaceRoot: options.workspaceRoot,
|
|
259
|
+
registryUrl: options.registryUrl,
|
|
260
|
+
allowedEvaluators,
|
|
261
|
+
failFast: options.runAll ? false : (options.failFast !== false),
|
|
262
|
+
cliModule: this.cliModule || undefined,
|
|
263
|
+
provider: options.provider,
|
|
264
|
+
model: options.model,
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
const context: EvaluatorContext = {
|
|
268
|
+
prompt: compiledOutput,
|
|
269
|
+
response: llmOutput,
|
|
270
|
+
params: testCase.params,
|
|
271
|
+
metadata: { provider, model, duration: execDuration },
|
|
272
|
+
};
|
|
273
|
+
|
|
274
|
+
const assertions = await engine.evaluate(
|
|
275
|
+
testCase.assert,
|
|
276
|
+
context,
|
|
277
|
+
(assertion) => {
|
|
278
|
+
onProgress?.({
|
|
279
|
+
type: 'assertion_complete',
|
|
280
|
+
suite: suite.name,
|
|
281
|
+
testName: testCase.name,
|
|
282
|
+
assertion,
|
|
283
|
+
});
|
|
284
|
+
}
|
|
285
|
+
);
|
|
286
|
+
|
|
287
|
+
// Determine overall test status from assertions
|
|
288
|
+
const hasFailure = assertions.some(a => a.status === 'fail');
|
|
289
|
+
const hasError = assertions.some(a => a.status === 'error');
|
|
290
|
+
const status = hasError ? 'error' : hasFailure ? 'fail' : 'pass';
|
|
291
|
+
|
|
292
|
+
return {
|
|
293
|
+
suite: suite.name,
|
|
294
|
+
testName: testCase.name,
|
|
295
|
+
status,
|
|
296
|
+
duration: Date.now() - start,
|
|
297
|
+
assertions,
|
|
298
|
+
output: llmOutput,
|
|
299
|
+
compiledInput: compiledOutput,
|
|
300
|
+
execution,
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Compile a .prmd file and return both the compiled text and metadata
|
|
306
|
+
* (provider, model, temperature, max_tokens from frontmatter).
|
|
307
|
+
*/
|
|
308
|
+
private async compileTarget(
|
|
309
|
+
targetPath: string,
|
|
310
|
+
params: Record<string, unknown>,
|
|
311
|
+
options: TestRunOptions
|
|
312
|
+
): Promise<{ compiled: string; metadata: Record<string, unknown> }> {
|
|
313
|
+
const cli = await this.getCli();
|
|
314
|
+
const compiler = new cli.PrompdCompiler();
|
|
315
|
+
|
|
316
|
+
if (!fs.existsSync(targetPath)) {
|
|
317
|
+
throw new Error(`Target prompt file not found: ${targetPath}`);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// Use compileWithContext to get both output and frontmatter metadata
|
|
321
|
+
const context = await compiler.compileWithContext(targetPath, {
|
|
322
|
+
outputFormat: 'markdown',
|
|
323
|
+
parameters: params,
|
|
324
|
+
filePath: targetPath,
|
|
325
|
+
workspaceRoot: options.workspaceRoot,
|
|
326
|
+
registryUrl: options.registryUrl,
|
|
327
|
+
fileSystem: new cli.NodeFileSystem(),
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
// compileWithContext may return { compiledResult, metadata } or a string
|
|
331
|
+
let compiled: string;
|
|
332
|
+
let metadata: Record<string, unknown> = {};
|
|
333
|
+
|
|
334
|
+
if (typeof context === 'string') {
|
|
335
|
+
compiled = context;
|
|
336
|
+
} else if (context && typeof context === 'object') {
|
|
337
|
+
compiled = (context as { compiledResult?: string }).compiledResult || '';
|
|
338
|
+
metadata = (context as { metadata?: Record<string, unknown> }).metadata || {};
|
|
339
|
+
} else {
|
|
340
|
+
throw new Error('Compilation produced no output');
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
if (!compiled) {
|
|
344
|
+
throw new Error('Compilation produced no output');
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
console.log(`[TestRunner] Compiled ${targetPath}`);
|
|
348
|
+
console.log(`[TestRunner] params: ${JSON.stringify(params)}`);
|
|
349
|
+
console.log(`[TestRunner] metadata: ${JSON.stringify(metadata)}`);
|
|
350
|
+
console.log(`[TestRunner] output (${compiled.length} chars): ${compiled.substring(0, 200)}`);
|
|
351
|
+
|
|
352
|
+
return { compiled, metadata };
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
/**
|
|
356
|
+
* Execute compiled prompt text against an LLM using the executor's callLLM directly.
|
|
357
|
+
* This avoids re-compilation through executeRawText which loses metadata.
|
|
358
|
+
*/
|
|
359
|
+
private async executePrompt(
|
|
360
|
+
compiled: string,
|
|
361
|
+
metadata: Record<string, unknown>,
|
|
362
|
+
runOptions: TestRunOptions
|
|
363
|
+
): Promise<{ response: string; provider: string; model: string; duration: number; usage?: { promptTokens?: number; completionTokens?: number; totalTokens?: number } }> {
|
|
364
|
+
await this.ensureConfig();
|
|
365
|
+
const cli = await this.getCli();
|
|
366
|
+
const executor = new cli.PrompdExecutor();
|
|
367
|
+
const start = Date.now();
|
|
368
|
+
|
|
369
|
+
// Resolve provider/model from frontmatter metadata + config defaults
|
|
370
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
371
|
+
const configManager = (cli as any).ConfigManager?.getInstance
|
|
372
|
+
? (cli as any).ConfigManager.getInstance()
|
|
373
|
+
: null;
|
|
374
|
+
const config = configManager?.config || {};
|
|
375
|
+
|
|
376
|
+
// Priority: .prmd frontmatter > test run options (UI selector) > config defaults
|
|
377
|
+
const provider = String(metadata.provider || runOptions.provider || config.defaultProvider || 'openai');
|
|
378
|
+
const rawModel = metadata.model || runOptions.model || config.default_model || config.defaultModel || '';
|
|
379
|
+
// Fall back to a sensible default model if none specified
|
|
380
|
+
const model = String(rawModel) || this.getDefaultModel(provider);
|
|
381
|
+
const temperature = Number(metadata.temperature ?? 0.7);
|
|
382
|
+
const maxTokens = Number(metadata.max_tokens ?? 4096);
|
|
383
|
+
|
|
384
|
+
// Get API key from config
|
|
385
|
+
const apiKey = configManager?.getApiKey?.(provider, config) || '';
|
|
386
|
+
|
|
387
|
+
console.log(`[TestRunner] Executing: provider=${provider}, model=${model || '(default)'}, tokens=${compiled.length}`);
|
|
388
|
+
|
|
389
|
+
if (!apiKey && provider !== 'ollama') {
|
|
390
|
+
throw new Error(`No API key configured for provider "${provider}". Check ~/.prompd/config.yaml`);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
try {
|
|
394
|
+
const result = await executor.callLLM(provider, model, compiled, apiKey, temperature, maxTokens);
|
|
395
|
+
|
|
396
|
+
if (!result.success) {
|
|
397
|
+
throw new Error(result.error || 'LLM execution failed');
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
return {
|
|
401
|
+
response: result.response || result.content || '',
|
|
402
|
+
provider,
|
|
403
|
+
model,
|
|
404
|
+
duration: Date.now() - start,
|
|
405
|
+
usage: result.usage,
|
|
406
|
+
};
|
|
407
|
+
} catch (err) {
|
|
408
|
+
const errMsg = err instanceof Error ? err.message : String(err);
|
|
409
|
+
console.error(`[TestRunner] callLLM failed: ${errMsg}`);
|
|
410
|
+
throw new Error(errMsg);
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
private resolveAllowedEvaluators(options: TestRunOptions): EvaluatorType[] {
|
|
415
|
+
if (options.noLlm) {
|
|
416
|
+
// In --no-llm mode, skip prmd evaluators (they require LLM calls)
|
|
417
|
+
const base = options.evaluators || ['nlp', 'script'];
|
|
418
|
+
return base.filter(e => e !== 'prmd');
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
return options.evaluators || ['nlp', 'script', 'prmd'];
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
private getReporter(options: TestRunOptions): Reporter {
|
|
425
|
+
switch (options.reporter) {
|
|
426
|
+
case 'json':
|
|
427
|
+
return new JsonReporter(options.verbose);
|
|
428
|
+
case 'junit':
|
|
429
|
+
return new JunitReporter();
|
|
430
|
+
case 'console':
|
|
431
|
+
default:
|
|
432
|
+
return new ConsoleReporter(options.verbose);
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
private buildSummary(suiteResults: TestSuiteResult[], startTime: number): TestRunSummary {
|
|
437
|
+
let total = 0;
|
|
438
|
+
let passed = 0;
|
|
439
|
+
let failed = 0;
|
|
440
|
+
let errors = 0;
|
|
441
|
+
let skipped = 0;
|
|
442
|
+
let totalTokens = 0;
|
|
443
|
+
const providerSet = new Set<string>();
|
|
444
|
+
const modelSet = new Set<string>();
|
|
445
|
+
|
|
446
|
+
for (const suite of suiteResults) {
|
|
447
|
+
for (const result of suite.results) {
|
|
448
|
+
total++;
|
|
449
|
+
switch (result.status) {
|
|
450
|
+
case 'pass': passed++; break;
|
|
451
|
+
case 'fail': failed++; break;
|
|
452
|
+
case 'error': errors++; break;
|
|
453
|
+
case 'skip': skipped++; break;
|
|
454
|
+
}
|
|
455
|
+
if (result.execution) {
|
|
456
|
+
if (result.execution.provider && result.execution.provider !== 'none') {
|
|
457
|
+
providerSet.add(result.execution.provider);
|
|
458
|
+
}
|
|
459
|
+
if (result.execution.model && result.execution.model !== 'none') {
|
|
460
|
+
modelSet.add(result.execution.model);
|
|
461
|
+
}
|
|
462
|
+
if (result.execution.usage?.totalTokens) {
|
|
463
|
+
totalTokens += result.execution.usage.totalTokens;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
return {
|
|
470
|
+
total,
|
|
471
|
+
passed,
|
|
472
|
+
failed,
|
|
473
|
+
errors,
|
|
474
|
+
skipped,
|
|
475
|
+
duration: Date.now() - startTime,
|
|
476
|
+
totalTokens: totalTokens || undefined,
|
|
477
|
+
providers: providerSet.size > 0 ? Array.from(providerSet) : undefined,
|
|
478
|
+
models: modelSet.size > 0 ? Array.from(modelSet) : undefined,
|
|
479
|
+
};
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
private buildErrorResult(errorMessages: string[], startTime: number): TestRunResult {
|
|
483
|
+
return {
|
|
484
|
+
suites: [],
|
|
485
|
+
summary: {
|
|
486
|
+
total: 0,
|
|
487
|
+
passed: 0,
|
|
488
|
+
failed: 0,
|
|
489
|
+
errors: errorMessages.length,
|
|
490
|
+
skipped: 0,
|
|
491
|
+
duration: Date.now() - startTime,
|
|
492
|
+
},
|
|
493
|
+
};
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
private getDefaultModel(provider: string): string {
|
|
497
|
+
const defaults: Record<string, string> = {
|
|
498
|
+
openai: 'gpt-4o',
|
|
499
|
+
anthropic: 'claude-sonnet-4-20250514',
|
|
500
|
+
groq: 'llama-3.1-70b-versatile',
|
|
501
|
+
google: 'gemini-2.0-flash',
|
|
502
|
+
mistral: 'mistral-large-latest',
|
|
503
|
+
deepseek: 'deepseek-chat',
|
|
504
|
+
};
|
|
505
|
+
return defaults[provider.toLowerCase()] || 'gpt-4o';
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
private async getCli(): Promise<CompilerModule> {
|
|
509
|
+
if (!this.cliModule) {
|
|
510
|
+
throw new Error(
|
|
511
|
+
'@prompd/cli module not provided. Pass it to the TestRunner constructor: new TestRunner(cliModule)'
|
|
512
|
+
);
|
|
513
|
+
}
|
|
514
|
+
return this.cliModule;
|
|
515
|
+
}
|
|
516
|
+
}
|
package/src/cli-types.ts
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared type interfaces for dynamically imported @prompd/cli module.
|
|
3
|
+
* These mirror the CLI's public API without requiring a compile-time dependency.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export interface CompilerModule {
|
|
7
|
+
PrompdCompiler: new (config?: Record<string, unknown>) => Compiler;
|
|
8
|
+
PrompdExecutor: new () => Executor;
|
|
9
|
+
ConfigManager: new () => ConfigManagerInstance;
|
|
10
|
+
MemoryFileSystem: new (files?: Record<string, string>) => MemoryFileSystemInstance;
|
|
11
|
+
NodeFileSystem: new () => NodeFileSystemInstance;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface Compiler {
|
|
15
|
+
compile(
|
|
16
|
+
sourcePath: string,
|
|
17
|
+
options: Record<string, unknown>
|
|
18
|
+
): Promise<string | {
|
|
19
|
+
output?: string;
|
|
20
|
+
error?: string;
|
|
21
|
+
metadata?: Record<string, unknown>;
|
|
22
|
+
}>;
|
|
23
|
+
|
|
24
|
+
compileWithContext(
|
|
25
|
+
sourcePath: string,
|
|
26
|
+
options: Record<string, unknown>
|
|
27
|
+
): Promise<string | {
|
|
28
|
+
compiledResult?: string;
|
|
29
|
+
metadata?: Record<string, unknown>;
|
|
30
|
+
errors?: unknown[];
|
|
31
|
+
warnings?: unknown[];
|
|
32
|
+
}>;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface Executor {
|
|
36
|
+
execute(
|
|
37
|
+
filePath: string,
|
|
38
|
+
options: Record<string, unknown>
|
|
39
|
+
): Promise<ExecutorResult>;
|
|
40
|
+
|
|
41
|
+
executeRawText(
|
|
42
|
+
compiledText: string,
|
|
43
|
+
options: Record<string, unknown>
|
|
44
|
+
): Promise<ExecutorResult>;
|
|
45
|
+
|
|
46
|
+
callLLM(
|
|
47
|
+
provider: string,
|
|
48
|
+
model: string,
|
|
49
|
+
content: string,
|
|
50
|
+
apiKey: string,
|
|
51
|
+
temperature?: number,
|
|
52
|
+
maxTokens?: number
|
|
53
|
+
): Promise<{
|
|
54
|
+
success: boolean;
|
|
55
|
+
response?: string;
|
|
56
|
+
content?: string;
|
|
57
|
+
error?: string;
|
|
58
|
+
usage?: {
|
|
59
|
+
promptTokens?: number;
|
|
60
|
+
completionTokens?: number;
|
|
61
|
+
totalTokens?: number;
|
|
62
|
+
};
|
|
63
|
+
}>;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export interface ExecutorResult {
|
|
67
|
+
response?: string;
|
|
68
|
+
error?: string;
|
|
69
|
+
usage?: {
|
|
70
|
+
promptTokens?: number;
|
|
71
|
+
completionTokens?: number;
|
|
72
|
+
totalTokens?: number;
|
|
73
|
+
};
|
|
74
|
+
metadata?: {
|
|
75
|
+
provider?: string;
|
|
76
|
+
model?: string;
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export interface MemoryFileSystemInstance {
|
|
81
|
+
// In-memory file system for compilation without disk access
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export interface NodeFileSystemInstance {
|
|
85
|
+
// Disk-backed file system for compilation with file access
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export interface ConfigManagerInstance {
|
|
89
|
+
loadConfig?(): void;
|
|
90
|
+
load?(): void;
|
|
91
|
+
getConfig?(): Record<string, unknown> | null;
|
|
92
|
+
}
|