@prompd/test 0.5.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/EvaluatorEngine.d.ts +32 -0
  2. package/dist/EvaluatorEngine.d.ts.map +1 -0
  3. package/dist/EvaluatorEngine.js +97 -0
  4. package/dist/TestDiscovery.d.ts +28 -0
  5. package/dist/TestDiscovery.d.ts.map +1 -0
  6. package/dist/TestDiscovery.js +137 -0
  7. package/dist/TestParser.d.ts +25 -0
  8. package/dist/TestParser.d.ts.map +1 -0
  9. package/dist/TestParser.js +187 -0
  10. package/dist/TestRunner.d.ts +57 -0
  11. package/dist/TestRunner.d.ts.map +1 -0
  12. package/dist/TestRunner.js +463 -0
  13. package/dist/cli-types.d.ts +62 -0
  14. package/dist/cli-types.d.ts.map +1 -0
  15. package/dist/cli-types.js +6 -0
  16. package/dist/evaluators/NlpEvaluator.d.ts +30 -0
  17. package/dist/evaluators/NlpEvaluator.d.ts.map +1 -0
  18. package/dist/evaluators/NlpEvaluator.js +183 -0
  19. package/dist/evaluators/PrmdEvaluator.d.ts +42 -0
  20. package/dist/evaluators/PrmdEvaluator.d.ts.map +1 -0
  21. package/dist/evaluators/PrmdEvaluator.js +265 -0
  22. package/dist/evaluators/ScriptEvaluator.d.ts +19 -0
  23. package/dist/evaluators/ScriptEvaluator.d.ts.map +1 -0
  24. package/dist/evaluators/ScriptEvaluator.js +163 -0
  25. package/dist/evaluators/types.d.ts +19 -0
  26. package/dist/evaluators/types.d.ts.map +1 -0
  27. package/dist/evaluators/types.js +5 -0
  28. package/dist/index.d.ts +25 -0
  29. package/dist/index.d.ts.map +1 -0
  30. package/dist/index.js +33 -0
  31. package/dist/reporters/ConsoleReporter.d.ts +17 -0
  32. package/dist/reporters/ConsoleReporter.d.ts.map +1 -0
  33. package/dist/reporters/ConsoleReporter.js +85 -0
  34. package/dist/reporters/JsonReporter.d.ts +11 -0
  35. package/dist/reporters/JsonReporter.d.ts.map +1 -0
  36. package/dist/reporters/JsonReporter.js +18 -0
  37. package/dist/reporters/JunitReporter.d.ts +15 -0
  38. package/dist/reporters/JunitReporter.d.ts.map +1 -0
  39. package/dist/reporters/JunitReporter.js +89 -0
  40. package/dist/reporters/types.d.ts +8 -0
  41. package/dist/reporters/types.d.ts.map +1 -0
  42. package/dist/reporters/types.js +5 -0
  43. package/dist/types.d.ts +119 -0
  44. package/dist/types.d.ts.map +1 -0
  45. package/dist/types.js +5 -0
  46. package/package.json +34 -0
  47. package/src/EvaluatorEngine.ts +130 -0
  48. package/src/TestDiscovery.ts +133 -0
  49. package/src/TestParser.ts +235 -0
  50. package/src/TestRunner.ts +516 -0
  51. package/src/cli-types.ts +92 -0
  52. package/src/evaluators/NlpEvaluator.ts +240 -0
  53. package/src/evaluators/PrmdEvaluator.ts +284 -0
  54. package/src/evaluators/ScriptEvaluator.ts +152 -0
  55. package/src/evaluators/types.ts +24 -0
  56. package/src/index.ts +76 -0
  57. package/src/reporters/ConsoleReporter.ts +100 -0
  58. package/src/reporters/JsonReporter.ts +21 -0
  59. package/src/reporters/JunitReporter.ts +113 -0
  60. package/src/reporters/types.ts +9 -0
  61. package/src/types.ts +140 -0
  62. package/tsconfig.json +20 -0
@@ -0,0 +1,130 @@
1
+ /**
2
+ * Routes assertions to the correct evaluator and manages execution order.
3
+ *
4
+ * Execution order: nlp -> script -> prmd (cheap to expensive).
5
+ * Fail-fast by default — stops on first failure unless runAll is set.
6
+ */
7
+
8
+ import type { AssertionDef, AssertionResult, EvaluatorType } from './types';
9
+ import type { Evaluator, EvaluatorContext } from './evaluators/types';
10
+ import type { CompilerModule } from './cli-types';
11
+ import { NlpEvaluator } from './evaluators/NlpEvaluator';
12
+ import { ScriptEvaluator } from './evaluators/ScriptEvaluator';
13
+ import { PrmdEvaluator, type PrmdEvaluatorOptions } from './evaluators/PrmdEvaluator';
14
+
15
+ /** Execution priority — lower number runs first */
16
+ const EVALUATOR_PRIORITY: Record<EvaluatorType, number> = {
17
+ nlp: 0,
18
+ script: 1,
19
+ prmd: 2,
20
+ };
21
+
22
+ export interface EvaluatorEngineOptions {
23
+ testFileDir: string;
24
+ evaluatorPrompt?: string;
25
+ workspaceRoot?: string;
26
+ registryUrl?: string;
27
+ allowedEvaluators?: EvaluatorType[];
28
+ failFast?: boolean;
29
+ cliModule?: CompilerModule;
30
+ provider?: string;
31
+ model?: string;
32
+ }
33
+
34
+ export class EvaluatorEngine {
35
+ private evaluators: Map<EvaluatorType, Evaluator>;
36
+ private allowedEvaluators: Set<EvaluatorType>;
37
+ private failFast: boolean;
38
+
39
+ constructor(options: EvaluatorEngineOptions) {
40
+ this.failFast = options.failFast !== false;
41
+ this.allowedEvaluators = new Set(options.allowedEvaluators || ['nlp', 'script', 'prmd']);
42
+
43
+ const prmdOptions: PrmdEvaluatorOptions = {
44
+ testFileDir: options.testFileDir,
45
+ evaluatorPrompt: options.evaluatorPrompt,
46
+ workspaceRoot: options.workspaceRoot,
47
+ registryUrl: options.registryUrl,
48
+ cliModule: options.cliModule,
49
+ provider: options.provider,
50
+ model: options.model,
51
+ };
52
+
53
+ this.evaluators = new Map<EvaluatorType, Evaluator>([
54
+ ['nlp', new NlpEvaluator()],
55
+ ['script', new ScriptEvaluator(options.testFileDir)],
56
+ ['prmd', new PrmdEvaluator(prmdOptions)],
57
+ ]);
58
+ }
59
+
60
+ /**
61
+ * Evaluate all assertions in cost-priority order.
62
+ * Returns results for each assertion.
63
+ */
64
+ async evaluate(
65
+ assertions: AssertionDef[],
66
+ context: EvaluatorContext,
67
+ onResult?: (result: AssertionResult) => void
68
+ ): Promise<AssertionResult[]> {
69
+ const results: AssertionResult[] = [];
70
+
71
+ // Sort by evaluator priority (nlp first, prmd last)
72
+ const sorted = [...assertions].sort(
73
+ (a, b) => EVALUATOR_PRIORITY[a.evaluator] - EVALUATOR_PRIORITY[b.evaluator]
74
+ );
75
+
76
+ for (const assertion of sorted) {
77
+ // Skip evaluators that aren't allowed
78
+ if (!this.allowedEvaluators.has(assertion.evaluator)) {
79
+ const skipped: AssertionResult = {
80
+ evaluator: assertion.evaluator,
81
+ check: assertion.check,
82
+ status: 'skip',
83
+ reason: `Evaluator type "${assertion.evaluator}" skipped by filter`,
84
+ duration: 0,
85
+ };
86
+ results.push(skipped);
87
+ onResult?.(skipped);
88
+ continue;
89
+ }
90
+
91
+ const evaluator = this.evaluators.get(assertion.evaluator);
92
+ if (!evaluator) {
93
+ const errorResult: AssertionResult = {
94
+ evaluator: assertion.evaluator,
95
+ check: assertion.check,
96
+ status: 'error',
97
+ reason: `No evaluator registered for type "${assertion.evaluator}"`,
98
+ duration: 0,
99
+ };
100
+ results.push(errorResult);
101
+ onResult?.(errorResult);
102
+ continue;
103
+ }
104
+
105
+ const result = await evaluator.evaluate(assertion, context);
106
+ results.push(result);
107
+ onResult?.(result);
108
+
109
+ // Fail-fast: stop on first failure
110
+ if (this.failFast && result.status !== 'pass') {
111
+ // Mark remaining assertions as skipped
112
+ const remaining = sorted.slice(sorted.indexOf(assertion) + 1);
113
+ for (const rem of remaining) {
114
+ const skipped: AssertionResult = {
115
+ evaluator: rem.evaluator,
116
+ check: rem.check,
117
+ status: 'skip',
118
+ reason: 'Skipped due to prior failure (fail-fast)',
119
+ duration: 0,
120
+ };
121
+ results.push(skipped);
122
+ onResult?.(skipped);
123
+ }
124
+ break;
125
+ }
126
+ }
127
+
128
+ return results;
129
+ }
130
+ }
@@ -0,0 +1,133 @@
1
+ /**
2
+ * Discovers .test.prmd files and pairs them with their source .prmd files.
3
+ */
4
+
5
+ import * as path from 'path';
6
+ import * as fs from 'fs';
7
+ import { glob } from 'glob';
8
+ import { TestParser } from './TestParser';
9
+ import type { TestSuite } from './types';
10
+
11
+ export interface DiscoveryResult {
12
+ suites: TestSuite[];
13
+ errors: DiscoveryError[];
14
+ }
15
+
16
+ export interface DiscoveryError {
17
+ filePath: string;
18
+ message: string;
19
+ }
20
+
21
+ export class TestDiscovery {
22
+ private parser: TestParser;
23
+
24
+ constructor() {
25
+ this.parser = new TestParser();
26
+ }
27
+
28
+ /**
29
+ * Discover test suites from a target path.
30
+ *
31
+ * - If targetPath is a .test.prmd file, parse it directly.
32
+ * - If targetPath is a .prmd file, look for a colocated .test.prmd sidecar.
33
+ * - If targetPath is a directory, glob for all .test.prmd files recursively.
34
+ */
35
+ async discover(targetPath: string): Promise<DiscoveryResult> {
36
+ const resolved = path.resolve(targetPath);
37
+ const suites: TestSuite[] = [];
38
+ const errors: DiscoveryError[] = [];
39
+
40
+ if (!fs.existsSync(resolved)) {
41
+ errors.push({ filePath: resolved, message: 'Path does not exist' });
42
+ return { suites, errors };
43
+ }
44
+
45
+ const stat = fs.statSync(resolved);
46
+
47
+ if (stat.isDirectory()) {
48
+ return this.discoverDirectory(resolved);
49
+ }
50
+
51
+ if (resolved.endsWith('.test.prmd')) {
52
+ return this.discoverTestFile(resolved);
53
+ }
54
+
55
+ if (resolved.endsWith('.prmd')) {
56
+ return this.discoverFromSource(resolved);
57
+ }
58
+
59
+ errors.push({
60
+ filePath: resolved,
61
+ message: 'Target must be a .prmd file, .test.prmd file, or directory',
62
+ });
63
+ return { suites, errors };
64
+ }
65
+
66
+ private async discoverDirectory(dirPath: string): Promise<DiscoveryResult> {
67
+ const suites: TestSuite[] = [];
68
+ const errors: DiscoveryError[] = [];
69
+
70
+ const pattern = '**/*.test.prmd';
71
+ const testFiles = await glob(pattern, {
72
+ cwd: dirPath,
73
+ absolute: true,
74
+ nodir: true,
75
+ windowsPathsNoEscape: true,
76
+ });
77
+
78
+ for (const testFile of testFiles) {
79
+ const normalized = testFile.replace(/\\/g, '/');
80
+ const result = await this.discoverTestFile(normalized);
81
+ suites.push(...result.suites);
82
+ errors.push(...result.errors);
83
+ }
84
+
85
+ return { suites, errors };
86
+ }
87
+
88
+ private async discoverTestFile(testFilePath: string): Promise<DiscoveryResult> {
89
+ const suites: TestSuite[] = [];
90
+ const errors: DiscoveryError[] = [];
91
+
92
+ try {
93
+ const content = fs.readFileSync(testFilePath, 'utf-8');
94
+ const suite = this.parser.parse(content, testFilePath);
95
+
96
+ // Validate that the target .prmd file exists
97
+ if (!fs.existsSync(suite.target)) {
98
+ errors.push({
99
+ filePath: testFilePath,
100
+ message: `Target prompt file not found: ${suite.target}`,
101
+ });
102
+ return { suites, errors };
103
+ }
104
+
105
+ suites.push(suite);
106
+ } catch (err) {
107
+ errors.push({
108
+ filePath: testFilePath,
109
+ message: err instanceof Error ? err.message : String(err),
110
+ });
111
+ }
112
+
113
+ return { suites, errors };
114
+ }
115
+
116
+ private async discoverFromSource(sourcePath: string): Promise<DiscoveryResult> {
117
+ const dir = path.dirname(sourcePath);
118
+ const base = path.basename(sourcePath, '.prmd');
119
+ const testFilePath = path.join(dir, `${base}.test.prmd`);
120
+
121
+ if (!fs.existsSync(testFilePath)) {
122
+ return {
123
+ suites: [],
124
+ errors: [{
125
+ filePath: sourcePath,
126
+ message: `No colocated test file found: ${testFilePath}`,
127
+ }],
128
+ };
129
+ }
130
+
131
+ return this.discoverTestFile(testFilePath);
132
+ }
133
+ }
@@ -0,0 +1,235 @@
1
+ /**
2
+ * Parses .test.prmd files into TestSuite structures.
3
+ *
4
+ * A .test.prmd file has YAML frontmatter (test definitions) and
5
+ * an optional content block (evaluator prompt for prmd evaluators).
6
+ */
7
+
8
+ import * as path from 'path';
9
+ import * as YAML from 'yaml';
10
+ import type { TestSuite, TestCase, AssertionDef, EvaluatorType, NlpCheck } from './types';
11
+
12
+ const VALID_EVALUATOR_TYPES: EvaluatorType[] = ['nlp', 'script', 'prmd'];
13
+ const VALID_NLP_CHECKS: NlpCheck[] = [
14
+ 'contains', 'not_contains', 'matches',
15
+ 'max_tokens', 'min_tokens', 'starts_with', 'ends_with'
16
+ ];
17
+
18
+ interface ParsedFrontmatter {
19
+ name?: string;
20
+ description?: string;
21
+ target?: string;
22
+ tests?: RawTestCase[];
23
+ }
24
+
25
+ interface RawTestCase {
26
+ name?: string;
27
+ params?: Record<string, unknown>;
28
+ assert?: RawAssertionDef[];
29
+ expect_error?: boolean;
30
+ }
31
+
32
+ interface RawAssertionDef {
33
+ evaluator?: string;
34
+ check?: string;
35
+ value?: unknown;
36
+ run?: string;
37
+ prompt?: string;
38
+ provider?: string;
39
+ model?: string;
40
+ }
41
+
42
+ export class TestParser {
43
+ /**
44
+ * Parse a .test.prmd file's raw content into a TestSuite.
45
+ */
46
+ parse(content: string, testFilePath: string): TestSuite {
47
+ const normalized = content.replace(/\r\n/g, '\n');
48
+ const { frontmatter, body } = this.splitFrontmatter(normalized);
49
+
50
+ if (!frontmatter) {
51
+ throw new TestParseError('Missing YAML frontmatter in .test.prmd file', testFilePath);
52
+ }
53
+
54
+ let parsed: ParsedFrontmatter;
55
+ try {
56
+ parsed = YAML.parse(frontmatter) as ParsedFrontmatter;
57
+ } catch (err) {
58
+ const message = err instanceof Error ? err.message : String(err);
59
+ throw new TestParseError(`Invalid YAML frontmatter: ${message}`, testFilePath);
60
+ }
61
+
62
+ if (!parsed || typeof parsed !== 'object') {
63
+ throw new TestParseError('Frontmatter must be a YAML object', testFilePath);
64
+ }
65
+
66
+ const name = parsed.name || path.basename(testFilePath, '.test.prmd');
67
+ const target = this.resolveTarget(parsed.target, testFilePath);
68
+ const tests = this.parseTests(parsed.tests, testFilePath);
69
+ const evaluatorPrompt = body.trim() || undefined;
70
+
71
+ return {
72
+ name,
73
+ description: parsed.description,
74
+ target,
75
+ testFilePath,
76
+ tests,
77
+ evaluatorPrompt,
78
+ };
79
+ }
80
+
81
+ private splitFrontmatter(content: string): { frontmatter: string | null; body: string } {
82
+ const match = content.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
83
+ if (!match) {
84
+ return { frontmatter: null, body: content };
85
+ }
86
+ return {
87
+ frontmatter: match[1],
88
+ body: match[2],
89
+ };
90
+ }
91
+
92
+ private resolveTarget(target: string | undefined, testFilePath: string): string {
93
+ if (target) {
94
+ const dir = path.dirname(testFilePath);
95
+ return path.resolve(dir, target);
96
+ }
97
+
98
+ // Auto-discover: summarize.test.prmd -> summarize.prmd
99
+ const dir = path.dirname(testFilePath);
100
+ const base = path.basename(testFilePath);
101
+ const sourceBase = base.replace(/\.test\.prmd$/, '.prmd');
102
+ return path.resolve(dir, sourceBase);
103
+ }
104
+
105
+ private parseTests(rawTests: RawTestCase[] | undefined, filePath: string): TestCase[] {
106
+ if (!rawTests || !Array.isArray(rawTests)) {
107
+ throw new TestParseError('Frontmatter must contain a "tests" array', filePath);
108
+ }
109
+
110
+ if (rawTests.length === 0) {
111
+ throw new TestParseError('"tests" array must not be empty', filePath);
112
+ }
113
+
114
+ return rawTests.map((raw, index) => {
115
+ const name = raw.name || `test_${index + 1}`;
116
+ const params = raw.params && typeof raw.params === 'object' ? raw.params : {};
117
+
118
+ if (raw.expect_error) {
119
+ return {
120
+ name,
121
+ params,
122
+ assert: [],
123
+ expect_error: true,
124
+ };
125
+ }
126
+
127
+ const assertions = this.parseAssertions(raw.assert, name, filePath);
128
+ return { name, params, assert: assertions };
129
+ });
130
+ }
131
+
132
+ private parseAssertions(
133
+ rawAssertions: RawAssertionDef[] | undefined,
134
+ testName: string,
135
+ filePath: string
136
+ ): AssertionDef[] {
137
+ if (!rawAssertions || !Array.isArray(rawAssertions)) {
138
+ return [];
139
+ }
140
+
141
+ return rawAssertions.map((raw, index) => {
142
+ if (!raw.evaluator || !VALID_EVALUATOR_TYPES.includes(raw.evaluator as EvaluatorType)) {
143
+ throw new TestParseError(
144
+ `Test "${testName}", assertion ${index + 1}: invalid evaluator "${raw.evaluator}". ` +
145
+ `Must be one of: ${VALID_EVALUATOR_TYPES.join(', ')}`,
146
+ filePath
147
+ );
148
+ }
149
+
150
+ const evaluator = raw.evaluator as EvaluatorType;
151
+
152
+ if (evaluator === 'nlp') {
153
+ return this.validateNlpAssertion(raw, testName, index, filePath);
154
+ }
155
+
156
+ if (evaluator === 'script') {
157
+ return this.validateScriptAssertion(raw, testName, index, filePath);
158
+ }
159
+
160
+ return this.validatePrmdAssertion(raw, testName, index, filePath);
161
+ });
162
+ }
163
+
164
+ private validateNlpAssertion(
165
+ raw: RawAssertionDef,
166
+ testName: string,
167
+ index: number,
168
+ filePath: string
169
+ ): AssertionDef {
170
+ if (!raw.check || !VALID_NLP_CHECKS.includes(raw.check as NlpCheck)) {
171
+ throw new TestParseError(
172
+ `Test "${testName}", assertion ${index + 1}: NLP evaluator requires a valid "check". ` +
173
+ `Must be one of: ${VALID_NLP_CHECKS.join(', ')}`,
174
+ filePath
175
+ );
176
+ }
177
+
178
+ if (raw.value === undefined || raw.value === null) {
179
+ throw new TestParseError(
180
+ `Test "${testName}", assertion ${index + 1}: NLP evaluator requires a "value"`,
181
+ filePath
182
+ );
183
+ }
184
+
185
+ return {
186
+ evaluator: 'nlp',
187
+ check: raw.check as NlpCheck,
188
+ value: raw.value as string | string[] | number,
189
+ };
190
+ }
191
+
192
+ private validateScriptAssertion(
193
+ raw: RawAssertionDef,
194
+ testName: string,
195
+ index: number,
196
+ filePath: string
197
+ ): AssertionDef {
198
+ if (!raw.run || typeof raw.run !== 'string') {
199
+ throw new TestParseError(
200
+ `Test "${testName}", assertion ${index + 1}: script evaluator requires a "run" path`,
201
+ filePath
202
+ );
203
+ }
204
+
205
+ return {
206
+ evaluator: 'script',
207
+ run: raw.run,
208
+ };
209
+ }
210
+
211
+ private validatePrmdAssertion(
212
+ raw: RawAssertionDef,
213
+ _testName: string,
214
+ _index: number,
215
+ _filePath: string
216
+ ): AssertionDef {
217
+ // prompt: is optional — if omitted, uses the content block of the .test.prmd
218
+ return {
219
+ evaluator: 'prmd',
220
+ prompt: raw.prompt || undefined,
221
+ provider: raw.provider || undefined,
222
+ model: raw.model || undefined,
223
+ };
224
+ }
225
+ }
226
+
227
+ export class TestParseError extends Error {
228
+ public readonly filePath: string;
229
+
230
+ constructor(message: string, filePath: string) {
231
+ super(`${message} (${filePath})`);
232
+ this.name = 'TestParseError';
233
+ this.filePath = filePath;
234
+ }
235
+ }