@prompd/test 0.5.0-beta.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/EvaluatorEngine.d.ts +32 -0
  2. package/dist/EvaluatorEngine.d.ts.map +1 -0
  3. package/dist/EvaluatorEngine.js +97 -0
  4. package/dist/TestDiscovery.d.ts +28 -0
  5. package/dist/TestDiscovery.d.ts.map +1 -0
  6. package/dist/TestDiscovery.js +137 -0
  7. package/dist/TestParser.d.ts +25 -0
  8. package/dist/TestParser.d.ts.map +1 -0
  9. package/dist/TestParser.js +187 -0
  10. package/dist/TestRunner.d.ts +57 -0
  11. package/dist/TestRunner.d.ts.map +1 -0
  12. package/dist/TestRunner.js +463 -0
  13. package/dist/cli-types.d.ts +62 -0
  14. package/dist/cli-types.d.ts.map +1 -0
  15. package/dist/cli-types.js +6 -0
  16. package/dist/evaluators/NlpEvaluator.d.ts +26 -0
  17. package/dist/evaluators/NlpEvaluator.d.ts.map +1 -0
  18. package/dist/evaluators/NlpEvaluator.js +145 -0
  19. package/dist/evaluators/PrmdEvaluator.d.ts +42 -0
  20. package/dist/evaluators/PrmdEvaluator.d.ts.map +1 -0
  21. package/dist/evaluators/PrmdEvaluator.js +265 -0
  22. package/dist/evaluators/ScriptEvaluator.d.ts +19 -0
  23. package/dist/evaluators/ScriptEvaluator.d.ts.map +1 -0
  24. package/dist/evaluators/ScriptEvaluator.js +161 -0
  25. package/dist/evaluators/types.d.ts +19 -0
  26. package/dist/evaluators/types.d.ts.map +1 -0
  27. package/dist/evaluators/types.js +5 -0
  28. package/dist/index.d.ts +25 -0
  29. package/dist/index.d.ts.map +1 -0
  30. package/dist/index.js +33 -0
  31. package/dist/reporters/ConsoleReporter.d.ts +17 -0
  32. package/dist/reporters/ConsoleReporter.d.ts.map +1 -0
  33. package/dist/reporters/ConsoleReporter.js +85 -0
  34. package/dist/reporters/JsonReporter.d.ts +11 -0
  35. package/dist/reporters/JsonReporter.d.ts.map +1 -0
  36. package/dist/reporters/JsonReporter.js +18 -0
  37. package/dist/reporters/JunitReporter.d.ts +15 -0
  38. package/dist/reporters/JunitReporter.d.ts.map +1 -0
  39. package/dist/reporters/JunitReporter.js +89 -0
  40. package/dist/reporters/types.d.ts +8 -0
  41. package/dist/reporters/types.d.ts.map +1 -0
  42. package/dist/reporters/types.js +5 -0
  43. package/dist/types.d.ts +115 -0
  44. package/dist/types.d.ts.map +1 -0
  45. package/dist/types.js +5 -0
  46. package/package.json +34 -0
  47. package/src/EvaluatorEngine.ts +130 -0
  48. package/src/TestDiscovery.ts +133 -0
  49. package/src/TestParser.ts +235 -0
  50. package/src/TestRunner.ts +516 -0
  51. package/src/cli-types.ts +92 -0
  52. package/src/evaluators/NlpEvaluator.ts +184 -0
  53. package/src/evaluators/PrmdEvaluator.ts +284 -0
  54. package/src/evaluators/ScriptEvaluator.ts +149 -0
  55. package/src/evaluators/types.ts +24 -0
  56. package/src/index.ts +76 -0
  57. package/src/reporters/ConsoleReporter.ts +100 -0
  58. package/src/reporters/JsonReporter.ts +21 -0
  59. package/src/reporters/JunitReporter.ts +113 -0
  60. package/src/reporters/types.ts +9 -0
  61. package/src/types.ts +133 -0
  62. package/tsconfig.json +20 -0
package/src/index.ts ADDED
@@ -0,0 +1,76 @@
1
+ /**
2
+ * @prompd/test - Prompt testing and evaluation framework
3
+ *
4
+ * Provides test discovery, assertion evaluation, and reporting for .prmd files.
5
+ * Consumes @prompd/cli for compilation and execution.
6
+ */
7
+
8
+ // Core classes
9
+ export { TestRunner } from './TestRunner';
10
+ export { TestParser, TestParseError } from './TestParser';
11
+ export { TestDiscovery } from './TestDiscovery';
12
+ export { EvaluatorEngine } from './EvaluatorEngine';
13
+
14
+ // Evaluators
15
+ export { NlpEvaluator } from './evaluators/NlpEvaluator';
16
+ export { ScriptEvaluator } from './evaluators/ScriptEvaluator';
17
+ export { PrmdEvaluator } from './evaluators/PrmdEvaluator';
18
+
19
+ // Reporters
20
+ export { ConsoleReporter } from './reporters/ConsoleReporter';
21
+ export { JsonReporter } from './reporters/JsonReporter';
22
+ export { JunitReporter } from './reporters/JunitReporter';
23
+
24
+ // Types
25
+ export type {
26
+ TestSuite,
27
+ TestCase,
28
+ AssertionDef,
29
+ TestResult,
30
+ TestRunResult,
31
+ TestSuiteResult,
32
+ TestRunSummary,
33
+ TestRunOptions,
34
+ TestProgressEvent,
35
+ TestProgressCallback,
36
+ TestStatus,
37
+ AssertionStatus,
38
+ AssertionResult,
39
+ EvaluatorType,
40
+ NlpCheck,
41
+ } from './types';
42
+
43
+ export type {
44
+ Evaluator,
45
+ EvaluatorContext,
46
+ } from './evaluators/types';
47
+
48
+ export type {
49
+ Reporter,
50
+ } from './reporters/types';
51
+
52
+ export type {
53
+ DiscoveryResult,
54
+ DiscoveryError,
55
+ } from './TestDiscovery';
56
+
57
+ export type {
58
+ EvaluatorEngineOptions,
59
+ } from './EvaluatorEngine';
60
+
61
+ // Re-export TestHarness interface from @prompd/cli for convenience
62
+ export type {
63
+ TestHarness,
64
+ TestHarnessResult,
65
+ TestHarnessOptions,
66
+ TestHarnessProgressEvent,
67
+ TestHarnessProgressCallback,
68
+ } from '@prompd/cli';
69
+
70
+ export type {
71
+ PrmdEvaluatorOptions,
72
+ } from './evaluators/PrmdEvaluator';
73
+
74
+ export type {
75
+ CompilerModule,
76
+ } from './cli-types';
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Console Reporter - terminal output with pass/fail formatting.
3
+ *
4
+ * Does NOT use emojis (breaks things per project rules).
5
+ * Uses simple text markers: [PASS], [FAIL], [ERROR], [SKIP].
6
+ */
7
+
8
+ import type { Reporter } from './types';
9
+ import type { TestRunResult, TestResult, AssertionResult } from '../types';
10
+
11
+ export class ConsoleReporter implements Reporter {
12
+ private verbose: boolean;
13
+
14
+ constructor(verbose = false) {
15
+ this.verbose = verbose;
16
+ }
17
+
18
+ report(result: TestRunResult): string {
19
+ const lines: string[] = [];
20
+
21
+ lines.push('');
22
+ lines.push('=== Prompd Test Results ===');
23
+ lines.push('');
24
+
25
+ for (const suite of result.suites) {
26
+ lines.push(` ${suite.suite}`);
27
+
28
+ for (const test of suite.results) {
29
+ const marker = this.statusMarker(test.status);
30
+ const duration = this.formatDuration(test.duration);
31
+ const meta = test.execution
32
+ ? ` [${test.execution.provider}/${test.execution.model}${test.execution.usage?.totalTokens ? ` ${test.execution.usage.totalTokens}tok` : ''}]`
33
+ : '';
34
+ lines.push(` ${marker} ${test.testName} (${duration})${meta}`);
35
+
36
+ if (test.status === 'error' && test.error) {
37
+ lines.push(` Error: ${test.error}`);
38
+ }
39
+
40
+ if (this.verbose || test.status === 'fail' || test.status === 'error') {
41
+ for (const assertion of test.assertions) {
42
+ this.appendAssertionDetail(lines, assertion);
43
+ }
44
+ }
45
+ }
46
+
47
+ lines.push('');
48
+ }
49
+
50
+ // Summary
51
+ const s = result.summary;
52
+ lines.push('---');
53
+ lines.push(
54
+ `Tests: ${s.passed} passed, ${s.failed} failed, ${s.errors} errors, ${s.skipped} skipped, ${s.total} total`
55
+ );
56
+ lines.push(`Time: ${this.formatDuration(s.duration)}`);
57
+ if (s.totalTokens) {
58
+ lines.push(`Tokens: ${s.totalTokens.toLocaleString()}`);
59
+ }
60
+ if (s.models && s.models.length > 0) {
61
+ lines.push(`Models: ${s.models.join(', ')}`);
62
+ }
63
+
64
+ if (s.failed > 0 || s.errors > 0) {
65
+ lines.push('Result: FAIL');
66
+ } else {
67
+ lines.push('Result: PASS');
68
+ }
69
+
70
+ lines.push('');
71
+ return lines.join('\n');
72
+ }
73
+
74
+ private appendAssertionDetail(lines: string[], assertion: AssertionResult): void {
75
+ const marker = this.statusMarker(assertion.status);
76
+ const check = assertion.check ? ` (${assertion.check})` : '';
77
+ const duration = this.formatDuration(assertion.duration);
78
+ lines.push(` ${marker} ${assertion.evaluator}${check} [${duration}]`);
79
+
80
+ if (assertion.reason && (assertion.status !== 'pass' || this.verbose)) {
81
+ lines.push(` ${assertion.reason}`);
82
+ }
83
+ }
84
+
85
+ private statusMarker(status: string): string {
86
+ switch (status) {
87
+ case 'pass': return '[PASS]';
88
+ case 'fail': return '[FAIL]';
89
+ case 'error': return '[ERR ]';
90
+ case 'skip': return '[SKIP]';
91
+ default: return '[????]';
92
+ }
93
+ }
94
+
95
+ private formatDuration(ms: number): string {
96
+ if (ms < 1000) return `${ms}ms`;
97
+ if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
98
+ return `${(ms / 60000).toFixed(1)}m`;
99
+ }
100
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * JSON Reporter - structured output for programmatic consumption and CI.
3
+ */
4
+
5
+ import type { Reporter } from './types';
6
+ import type { TestRunResult } from '../types';
7
+
8
+ export class JsonReporter implements Reporter {
9
+ private pretty: boolean;
10
+
11
+ constructor(pretty = true) {
12
+ this.pretty = pretty;
13
+ }
14
+
15
+ report(result: TestRunResult): string {
16
+ if (this.pretty) {
17
+ return JSON.stringify(result, null, 2);
18
+ }
19
+ return JSON.stringify(result);
20
+ }
21
+ }
@@ -0,0 +1,113 @@
1
+ /**
2
+ * JUnit XML Reporter - generates JUnit-compatible XML for CI systems.
3
+ *
4
+ * Output format follows the JUnit XML schema used by Jenkins, GitHub Actions,
5
+ * Azure DevOps, and most CI platforms.
6
+ */
7
+
8
+ import type { Reporter } from './types';
9
+ import type { TestRunResult, TestResult } from '../types';
10
+
11
+ export class JunitReporter implements Reporter {
12
+ report(result: TestRunResult): string {
13
+ const lines: string[] = [];
14
+
15
+ lines.push('<?xml version="1.0" encoding="UTF-8"?>');
16
+ lines.push(
17
+ `<testsuites tests="${result.summary.total}" ` +
18
+ `failures="${result.summary.failed}" ` +
19
+ `errors="${result.summary.errors}" ` +
20
+ `skipped="${result.summary.skipped}" ` +
21
+ `time="${(result.summary.duration / 1000).toFixed(3)}">`
22
+ );
23
+
24
+ for (const suite of result.suites) {
25
+ const suiteTests = suite.results.length;
26
+ const suiteFailures = suite.results.filter(r => r.status === 'fail').length;
27
+ const suiteErrors = suite.results.filter(r => r.status === 'error').length;
28
+ const suiteSkipped = suite.results.filter(r => r.status === 'skip').length;
29
+ const suiteDuration = suite.results.reduce((sum, r) => sum + r.duration, 0);
30
+
31
+ lines.push(
32
+ ` <testsuite name="${this.escapeXml(suite.suite)}" ` +
33
+ `tests="${suiteTests}" ` +
34
+ `failures="${suiteFailures}" ` +
35
+ `errors="${suiteErrors}" ` +
36
+ `skipped="${suiteSkipped}" ` +
37
+ `time="${(suiteDuration / 1000).toFixed(3)}" ` +
38
+ `file="${this.escapeXml(suite.testFilePath)}">`
39
+ );
40
+
41
+ for (const test of suite.results) {
42
+ this.appendTestCase(lines, suite.suite, test);
43
+ }
44
+
45
+ lines.push(' </testsuite>');
46
+ }
47
+
48
+ lines.push('</testsuites>');
49
+ return lines.join('\n');
50
+ }
51
+
52
+ private appendTestCase(lines: string[], suiteName: string, test: TestResult): void {
53
+ const time = (test.duration / 1000).toFixed(3);
54
+
55
+ lines.push(
56
+ ` <testcase name="${this.escapeXml(test.testName)}" ` +
57
+ `classname="${this.escapeXml(suiteName)}" ` +
58
+ `time="${time}">`
59
+ );
60
+
61
+ if (test.status === 'fail') {
62
+ const failedAssertions = test.assertions.filter(a => a.status === 'fail');
63
+ const message = failedAssertions
64
+ .map(a => `${a.evaluator}${a.check ? `(${a.check})` : ''}: ${a.reason || 'failed'}`)
65
+ .join('; ');
66
+
67
+ lines.push(` <failure message="${this.escapeXml(message)}">`);
68
+ lines.push(this.escapeXml(this.buildFailureDetail(test)));
69
+ lines.push(' </failure>');
70
+ }
71
+
72
+ if (test.status === 'error') {
73
+ const errorMessage = test.error || 'Unknown error';
74
+ lines.push(` <error message="${this.escapeXml(errorMessage)}">`);
75
+ lines.push(this.escapeXml(errorMessage));
76
+ lines.push(' </error>');
77
+ }
78
+
79
+ if (test.status === 'skip') {
80
+ lines.push(' <skipped/>');
81
+ }
82
+
83
+ // Include output as system-out if available
84
+ if (test.output) {
85
+ lines.push(' <system-out>');
86
+ lines.push(this.escapeXml(test.output.substring(0, 10000)));
87
+ lines.push(' </system-out>');
88
+ }
89
+
90
+ lines.push(' </testcase>');
91
+ }
92
+
93
+ private buildFailureDetail(test: TestResult): string {
94
+ const details: string[] = [];
95
+
96
+ for (const assertion of test.assertions) {
97
+ const prefix = assertion.status === 'pass' ? '[PASS]' : '[FAIL]';
98
+ const check = assertion.check ? ` (${assertion.check})` : '';
99
+ details.push(`${prefix} ${assertion.evaluator}${check}: ${assertion.reason || ''}`);
100
+ }
101
+
102
+ return details.join('\n');
103
+ }
104
+
105
+ private escapeXml(str: string): string {
106
+ return str
107
+ .replace(/&/g, '&amp;')
108
+ .replace(/</g, '&lt;')
109
+ .replace(/>/g, '&gt;')
110
+ .replace(/"/g, '&quot;')
111
+ .replace(/'/g, '&apos;');
112
+ }
113
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Reporter interface for @prompd/test
3
+ */
4
+
5
+ import type { TestRunResult } from '../types';
6
+
7
+ export interface Reporter {
8
+ report(result: TestRunResult): string;
9
+ }
package/src/types.ts ADDED
@@ -0,0 +1,133 @@
1
+ /**
2
+ * Core type definitions for @prompd/test
3
+ */
4
+
5
+ // --- Evaluator taxonomy ---
6
+
7
+ export type EvaluatorType = 'nlp' | 'script' | 'prmd';
8
+
9
+ export type NlpCheck =
10
+ | 'contains'
11
+ | 'not_contains'
12
+ | 'matches'
13
+ | 'max_tokens'
14
+ | 'min_tokens'
15
+ | 'starts_with'
16
+ | 'ends_with';
17
+
18
+ // --- Test definition types (parsed from .test.prmd frontmatter) ---
19
+
20
+ export interface AssertionDef {
21
+ evaluator: EvaluatorType;
22
+ // NLP fields
23
+ check?: NlpCheck;
24
+ value?: string | string[] | number;
25
+ // Script fields
26
+ run?: string;
27
+ // Prmd fields — prompt: registry ref, local file, or omit to use content block
28
+ prompt?: string;
29
+ provider?: string;
30
+ model?: string;
31
+ }
32
+
33
+ export interface TestCase {
34
+ name: string;
35
+ params: Record<string, unknown>;
36
+ assert: AssertionDef[];
37
+ expect_error?: boolean;
38
+ }
39
+
40
+ export interface TestSuite {
41
+ name: string;
42
+ description?: string;
43
+ target: string;
44
+ testFilePath: string;
45
+ tests: TestCase[];
46
+ evaluatorPrompt?: string;
47
+ }
48
+
49
+ // --- Test result types ---
50
+
51
+ export type TestStatus = 'pass' | 'fail' | 'error' | 'skip';
52
+ export type AssertionStatus = 'pass' | 'fail' | 'error' | 'skip';
53
+
54
+ export interface AssertionResult {
55
+ evaluator: EvaluatorType;
56
+ check?: string;
57
+ status: AssertionStatus;
58
+ reason?: string;
59
+ duration: number;
60
+ }
61
+
62
+ export interface TestExecutionMetadata {
63
+ provider: string;
64
+ model: string;
65
+ duration: number;
66
+ usage?: {
67
+ promptTokens?: number;
68
+ completionTokens?: number;
69
+ totalTokens?: number;
70
+ };
71
+ }
72
+
73
+ export interface TestResult {
74
+ suite: string;
75
+ testName: string;
76
+ status: TestStatus;
77
+ duration: number;
78
+ assertions: AssertionResult[];
79
+ output?: string;
80
+ compiledInput?: string;
81
+ error?: string;
82
+ execution?: TestExecutionMetadata;
83
+ }
84
+
85
+ export interface TestRunSummary {
86
+ total: number;
87
+ passed: number;
88
+ failed: number;
89
+ errors: number;
90
+ skipped: number;
91
+ duration: number;
92
+ totalTokens?: number;
93
+ providers?: string[];
94
+ models?: string[];
95
+ }
96
+
97
+ export interface TestRunResult {
98
+ suites: TestSuiteResult[];
99
+ summary: TestRunSummary;
100
+ }
101
+
102
+ export interface TestSuiteResult {
103
+ suite: string;
104
+ testFilePath: string;
105
+ results: TestResult[];
106
+ }
107
+
108
+ // --- Options ---
109
+
110
+ export interface TestRunOptions {
111
+ evaluators?: EvaluatorType[];
112
+ noLlm?: boolean;
113
+ reporter?: 'console' | 'json' | 'junit';
114
+ failFast?: boolean;
115
+ runAll?: boolean;
116
+ verbose?: boolean;
117
+ workspaceRoot?: string;
118
+ registryUrl?: string;
119
+ // Default provider/model for test execution (overridden by .prmd frontmatter)
120
+ provider?: string;
121
+ model?: string;
122
+ }
123
+
124
+ // --- Progress callback ---
125
+
126
+ export type TestProgressEvent =
127
+ | { type: 'suite_start'; suite: string; testCount: number }
128
+ | { type: 'test_start'; suite: string; testName: string }
129
+ | { type: 'test_complete'; suite: string; testName: string; result: TestResult }
130
+ | { type: 'suite_complete'; suite: string; results: TestResult[] }
131
+ | { type: 'assertion_complete'; suite: string; testName: string; assertion: AssertionResult };
132
+
133
+ export type TestProgressCallback = (event: TestProgressEvent) => void;
package/tsconfig.json ADDED
@@ -0,0 +1,20 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2020",
4
+ "module": "commonjs",
5
+ "lib": ["ES2020"],
6
+ "declaration": true,
7
+ "declarationMap": true,
8
+ "outDir": "./dist",
9
+ "rootDir": "./src",
10
+ "strict": true,
11
+ "esModuleInterop": true,
12
+ "skipLibCheck": true,
13
+ "forceConsistentCasingInFileNames": true,
14
+ "resolveJsonModule": true,
15
+ "moduleResolution": "node",
16
+ "types": ["node"]
17
+ },
18
+ "include": ["src/**/*"],
19
+ "exclude": ["node_modules", "dist"]
20
+ }