@artemiskit/core 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/CHANGELOG.md +48 -0
  2. package/dist/adapters/factory.d.ts +23 -0
  3. package/dist/adapters/factory.d.ts.map +1 -0
  4. package/dist/adapters/index.d.ts +7 -0
  5. package/dist/adapters/index.d.ts.map +1 -0
  6. package/dist/adapters/registry.d.ts +56 -0
  7. package/dist/adapters/registry.d.ts.map +1 -0
  8. package/dist/adapters/types.d.ts +151 -0
  9. package/dist/adapters/types.d.ts.map +1 -0
  10. package/dist/artifacts/index.d.ts +6 -0
  11. package/dist/artifacts/index.d.ts.map +1 -0
  12. package/dist/artifacts/manifest.d.ts +19 -0
  13. package/dist/artifacts/manifest.d.ts.map +1 -0
  14. package/dist/artifacts/types.d.ts +368 -0
  15. package/dist/artifacts/types.d.ts.map +1 -0
  16. package/dist/evaluators/contains.d.ts +10 -0
  17. package/dist/evaluators/contains.d.ts.map +1 -0
  18. package/dist/evaluators/exact.d.ts +10 -0
  19. package/dist/evaluators/exact.d.ts.map +1 -0
  20. package/dist/evaluators/fuzzy.d.ts +10 -0
  21. package/dist/evaluators/fuzzy.d.ts.map +1 -0
  22. package/dist/evaluators/index.d.ts +24 -0
  23. package/dist/evaluators/index.d.ts.map +1 -0
  24. package/dist/evaluators/json-schema.d.ts +11 -0
  25. package/dist/evaluators/json-schema.d.ts.map +1 -0
  26. package/dist/evaluators/llm-grader.d.ts +11 -0
  27. package/dist/evaluators/llm-grader.d.ts.map +1 -0
  28. package/dist/evaluators/regex.d.ts +10 -0
  29. package/dist/evaluators/regex.d.ts.map +1 -0
  30. package/dist/evaluators/types.d.ts +29 -0
  31. package/dist/evaluators/types.d.ts.map +1 -0
  32. package/dist/index.d.ts +14 -0
  33. package/dist/index.d.ts.map +1 -0
  34. package/dist/index.js +26021 -0
  35. package/dist/provenance/environment.d.ts +12 -0
  36. package/dist/provenance/environment.d.ts.map +1 -0
  37. package/dist/provenance/git.d.ts +9 -0
  38. package/dist/provenance/git.d.ts.map +1 -0
  39. package/dist/provenance/index.d.ts +6 -0
  40. package/dist/provenance/index.d.ts.map +1 -0
  41. package/dist/redaction/index.d.ts +3 -0
  42. package/dist/redaction/index.d.ts.map +1 -0
  43. package/dist/redaction/redactor.d.ts +79 -0
  44. package/dist/redaction/redactor.d.ts.map +1 -0
  45. package/dist/redaction/types.d.ts +120 -0
  46. package/dist/redaction/types.d.ts.map +1 -0
  47. package/dist/runner/executor.d.ts +11 -0
  48. package/dist/runner/executor.d.ts.map +1 -0
  49. package/dist/runner/index.d.ts +7 -0
  50. package/dist/runner/index.d.ts.map +1 -0
  51. package/dist/runner/runner.d.ts +13 -0
  52. package/dist/runner/runner.d.ts.map +1 -0
  53. package/dist/runner/types.d.ts +57 -0
  54. package/dist/runner/types.d.ts.map +1 -0
  55. package/dist/scenario/index.d.ts +7 -0
  56. package/dist/scenario/index.d.ts.map +1 -0
  57. package/dist/scenario/parser.d.ts +17 -0
  58. package/dist/scenario/parser.d.ts.map +1 -0
  59. package/dist/scenario/schema.d.ts +945 -0
  60. package/dist/scenario/schema.d.ts.map +1 -0
  61. package/dist/scenario/variables.d.ts +19 -0
  62. package/dist/scenario/variables.d.ts.map +1 -0
  63. package/dist/storage/factory.d.ts +13 -0
  64. package/dist/storage/factory.d.ts.map +1 -0
  65. package/dist/storage/index.d.ts +8 -0
  66. package/dist/storage/index.d.ts.map +1 -0
  67. package/dist/storage/local.d.ts +20 -0
  68. package/dist/storage/local.d.ts.map +1 -0
  69. package/dist/storage/supabase.d.ts +21 -0
  70. package/dist/storage/supabase.d.ts.map +1 -0
  71. package/dist/storage/types.d.ts +86 -0
  72. package/dist/storage/types.d.ts.map +1 -0
  73. package/dist/utils/errors.d.ts +25 -0
  74. package/dist/utils/errors.d.ts.map +1 -0
  75. package/dist/utils/index.d.ts +6 -0
  76. package/dist/utils/index.d.ts.map +1 -0
  77. package/dist/utils/logger.d.ts +21 -0
  78. package/dist/utils/logger.d.ts.map +1 -0
  79. package/package.json +56 -0
  80. package/src/adapters/factory.ts +75 -0
  81. package/src/adapters/index.ts +7 -0
  82. package/src/adapters/registry.ts +143 -0
  83. package/src/adapters/types.ts +184 -0
  84. package/src/artifacts/index.ts +6 -0
  85. package/src/artifacts/manifest.test.ts +206 -0
  86. package/src/artifacts/manifest.ts +136 -0
  87. package/src/artifacts/types.ts +426 -0
  88. package/src/evaluators/contains.test.ts +58 -0
  89. package/src/evaluators/contains.ts +41 -0
  90. package/src/evaluators/exact.test.ts +48 -0
  91. package/src/evaluators/exact.ts +33 -0
  92. package/src/evaluators/fuzzy.test.ts +50 -0
  93. package/src/evaluators/fuzzy.ts +39 -0
  94. package/src/evaluators/index.ts +53 -0
  95. package/src/evaluators/json-schema.ts +98 -0
  96. package/src/evaluators/llm-grader.ts +100 -0
  97. package/src/evaluators/regex.test.ts +73 -0
  98. package/src/evaluators/regex.ts +43 -0
  99. package/src/evaluators/types.ts +37 -0
  100. package/src/index.ts +31 -0
  101. package/src/provenance/environment.ts +18 -0
  102. package/src/provenance/git.ts +48 -0
  103. package/src/provenance/index.ts +6 -0
  104. package/src/redaction/index.ts +23 -0
  105. package/src/redaction/redactor.test.ts +258 -0
  106. package/src/redaction/redactor.ts +246 -0
  107. package/src/redaction/types.ts +135 -0
  108. package/src/runner/executor.ts +251 -0
  109. package/src/runner/index.ts +7 -0
  110. package/src/runner/runner.ts +153 -0
  111. package/src/runner/types.ts +60 -0
  112. package/src/scenario/index.ts +7 -0
  113. package/src/scenario/parser.test.ts +99 -0
  114. package/src/scenario/parser.ts +108 -0
  115. package/src/scenario/schema.ts +176 -0
  116. package/src/scenario/variables.test.ts +150 -0
  117. package/src/scenario/variables.ts +60 -0
  118. package/src/storage/factory.ts +52 -0
  119. package/src/storage/index.ts +8 -0
  120. package/src/storage/local.test.ts +165 -0
  121. package/src/storage/local.ts +194 -0
  122. package/src/storage/supabase.ts +151 -0
  123. package/src/storage/types.ts +98 -0
  124. package/src/utils/errors.ts +76 -0
  125. package/src/utils/index.ts +6 -0
  126. package/src/utils/logger.ts +59 -0
  127. package/tsconfig.json +13 -0
@@ -0,0 +1,251 @@
1
+ /**
2
+ * Test case executor
3
+ */
4
+
5
+ import type { CaseRedactionInfo, CaseResult } from '../artifacts/types';
6
+ import { getEvaluator } from '../evaluators';
7
+ import { type RedactionConfig, Redactor } from '../redaction';
8
+ import type { TestCase } from '../scenario/schema';
9
+ import { mergeVariables, substituteVariables } from '../scenario/variables';
10
+ import type { ExecutorContext } from './types';
11
+
12
+ /**
13
+ * Merge redaction configs with priority: CLI > case > scenario
14
+ */
15
+ function mergeRedactionConfig(
16
+ scenarioConfig?: RedactionConfig,
17
+ caseConfig?: RedactionConfig,
18
+ cliConfig?: RedactionConfig
19
+ ): RedactionConfig {
20
+ // CLI config takes highest priority if enabled is explicitly set
21
+ if (cliConfig?.enabled !== undefined) {
22
+ return {
23
+ enabled: cliConfig.enabled,
24
+ patterns: cliConfig.patterns ?? caseConfig?.patterns ?? scenarioConfig?.patterns,
25
+ redactPrompts:
26
+ cliConfig.redactPrompts ??
27
+ caseConfig?.redactPrompts ??
28
+ scenarioConfig?.redactPrompts ??
29
+ true,
30
+ redactResponses:
31
+ cliConfig.redactResponses ??
32
+ caseConfig?.redactResponses ??
33
+ scenarioConfig?.redactResponses ??
34
+ true,
35
+ redactMetadata:
36
+ cliConfig.redactMetadata ??
37
+ caseConfig?.redactMetadata ??
38
+ scenarioConfig?.redactMetadata ??
39
+ false,
40
+ replacement:
41
+ cliConfig.replacement ??
42
+ caseConfig?.replacement ??
43
+ scenarioConfig?.replacement ??
44
+ '[REDACTED]',
45
+ };
46
+ }
47
+
48
+ // Case config takes priority over scenario
49
+ if (caseConfig?.enabled !== undefined) {
50
+ return {
51
+ enabled: caseConfig.enabled,
52
+ patterns: caseConfig.patterns ?? scenarioConfig?.patterns,
53
+ redactPrompts: caseConfig.redactPrompts ?? scenarioConfig?.redactPrompts ?? true,
54
+ redactResponses: caseConfig.redactResponses ?? scenarioConfig?.redactResponses ?? true,
55
+ redactMetadata: caseConfig.redactMetadata ?? scenarioConfig?.redactMetadata ?? false,
56
+ replacement: caseConfig.replacement ?? scenarioConfig?.replacement ?? '[REDACTED]',
57
+ };
58
+ }
59
+
60
+ // Fall back to scenario config
61
+ if (scenarioConfig?.enabled) {
62
+ return {
63
+ enabled: scenarioConfig.enabled,
64
+ patterns: scenarioConfig.patterns,
65
+ redactPrompts: scenarioConfig.redactPrompts ?? true,
66
+ redactResponses: scenarioConfig.redactResponses ?? true,
67
+ redactMetadata: scenarioConfig.redactMetadata ?? false,
68
+ replacement: scenarioConfig.replacement ?? '[REDACTED]',
69
+ };
70
+ }
71
+
72
+ // Default: disabled
73
+ return {
74
+ enabled: false,
75
+ redactPrompts: true,
76
+ redactResponses: true,
77
+ redactMetadata: false,
78
+ replacement: '[REDACTED]',
79
+ };
80
+ }
81
+
82
+ /**
83
+ * Execute a single test case
84
+ */
85
+ export async function executeCase(
86
+ testCase: TestCase,
87
+ context: ExecutorContext
88
+ ): Promise<CaseResult> {
89
+ const { timeout, retries = 0 } = context;
90
+ const caseStartTime = Date.now();
91
+
92
+ let lastError: Error | null = null;
93
+
94
+ for (let attempt = 0; attempt <= retries; attempt++) {
95
+ try {
96
+ const result = await executeCaseAttempt(testCase, context, timeout);
97
+ return result;
98
+ } catch (error) {
99
+ lastError = error as Error;
100
+ if (attempt < retries) {
101
+ // Wait before retry with exponential backoff
102
+ await sleep(2 ** attempt * 1000);
103
+ }
104
+ }
105
+ }
106
+
107
+ // All retries failed
108
+ const latencyMs = Date.now() - caseStartTime;
109
+ return {
110
+ id: testCase.id,
111
+ name: testCase.name,
112
+ ok: false,
113
+ score: 0,
114
+ matcherType: testCase.expected.type,
115
+ reason: `Failed after ${retries + 1} attempts: ${lastError?.message}`,
116
+ latencyMs,
117
+ tokens: { prompt: 0, completion: 0, total: 0 },
118
+ prompt: testCase.prompt,
119
+ response: '',
120
+ expected: testCase.expected,
121
+ tags: testCase.tags,
122
+ error: lastError?.message,
123
+ };
124
+ }
125
+
126
+ async function executeCaseAttempt(
127
+ testCase: TestCase,
128
+ context: ExecutorContext,
129
+ timeout?: number
130
+ ): Promise<CaseResult> {
131
+ const { client, scenario, redaction: cliRedaction } = context;
132
+
133
+ // Merge scenario-level and case-level variables (case overrides scenario)
134
+ const variables = mergeVariables(scenario.variables, testCase.variables);
135
+
136
+ // Apply variable substitution to prompt
137
+ let prompt = substituteVariables(testCase.prompt, variables);
138
+
139
+ // Build prompt with system prompt if present
140
+ if (scenario.setup?.systemPrompt && typeof prompt === 'string') {
141
+ const systemPrompt = substituteVariables(scenario.setup.systemPrompt, variables);
142
+ prompt = [
143
+ { role: 'system' as const, content: systemPrompt },
144
+ { role: 'user' as const, content: prompt },
145
+ ];
146
+ }
147
+
148
+ // Generate response with optional timeout
149
+ const generatePromise = client.generate({
150
+ prompt,
151
+ model: testCase.model || scenario.model,
152
+ temperature: scenario.temperature,
153
+ maxTokens: scenario.maxTokens,
154
+ seed: scenario.seed,
155
+ });
156
+
157
+ const result = timeout
158
+ ? await Promise.race([generatePromise, createTimeout(timeout)])
159
+ : await generatePromise;
160
+
161
+ // Evaluate response
162
+ const evaluator = getEvaluator(testCase.expected.type);
163
+ const evalResult = await evaluator.evaluate(result.text, testCase.expected, {
164
+ client,
165
+ testCase,
166
+ });
167
+
168
+ // Determine effective redaction config (CLI > case > scenario)
169
+ const effectiveRedaction = mergeRedactionConfig(
170
+ scenario.redaction,
171
+ testCase.redaction,
172
+ cliRedaction
173
+ );
174
+
175
+ // Apply redaction if enabled
176
+ let finalPrompt: string | object = testCase.prompt;
177
+ let finalResponse = result.text;
178
+ let redactionInfo: CaseRedactionInfo | undefined;
179
+
180
+ if (effectiveRedaction.enabled) {
181
+ const redactor = new Redactor(effectiveRedaction);
182
+
183
+ let promptRedacted = false;
184
+ let responseRedacted = false;
185
+ let totalRedactions = 0;
186
+
187
+ // Redact prompt if configured
188
+ if (effectiveRedaction.redactPrompts) {
189
+ if (typeof finalPrompt === 'string') {
190
+ const promptResult = redactor.redactPrompt(finalPrompt);
191
+ finalPrompt = promptResult.text;
192
+ promptRedacted = promptResult.wasRedacted;
193
+ totalRedactions += promptResult.redactionCount;
194
+ } else if (Array.isArray(finalPrompt)) {
195
+ // Handle chat message array
196
+ finalPrompt = finalPrompt.map((msg) => {
197
+ if (typeof msg === 'object' && 'content' in msg && typeof msg.content === 'string') {
198
+ const promptResult = redactor.redactPrompt(msg.content);
199
+ if (promptResult.wasRedacted) {
200
+ promptRedacted = true;
201
+ totalRedactions += promptResult.redactionCount;
202
+ }
203
+ return { ...msg, content: promptResult.text };
204
+ }
205
+ return msg;
206
+ });
207
+ }
208
+ }
209
+
210
+ // Redact response if configured
211
+ if (effectiveRedaction.redactResponses) {
212
+ const responseResult = redactor.redactResponse(finalResponse);
213
+ finalResponse = responseResult.text;
214
+ responseRedacted = responseResult.wasRedacted;
215
+ totalRedactions += responseResult.redactionCount;
216
+ }
217
+
218
+ redactionInfo = {
219
+ redacted: promptRedacted || responseRedacted,
220
+ promptRedacted,
221
+ responseRedacted,
222
+ redactionCount: totalRedactions,
223
+ };
224
+ }
225
+
226
+ return {
227
+ id: testCase.id,
228
+ name: testCase.name,
229
+ ok: evalResult.passed,
230
+ score: evalResult.score,
231
+ matcherType: testCase.expected.type,
232
+ reason: evalResult.reason,
233
+ latencyMs: result.latencyMs,
234
+ tokens: result.tokens,
235
+ prompt: finalPrompt,
236
+ response: finalResponse,
237
+ expected: testCase.expected,
238
+ tags: testCase.tags,
239
+ redaction: redactionInfo,
240
+ };
241
+ }
242
+
243
+ function createTimeout(ms: number): Promise<never> {
244
+ return new Promise((_, reject) => {
245
+ setTimeout(() => reject(new Error(`Timeout after ${ms}ms`)), ms);
246
+ });
247
+ }
248
+
249
+ function sleep(ms: number): Promise<void> {
250
+ return new Promise((resolve) => setTimeout(resolve, ms));
251
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Runner module exports
3
+ */
4
+
5
+ export * from './types';
6
+ export { executeCase } from './executor';
7
+ export { runScenario, runScenarios } from './runner';
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Scenario runner - main entry point for running test scenarios
3
+ */
4
+
5
+ import { createRunManifest } from '../artifacts/manifest';
6
+ import type { CaseResult, ManifestRedactionInfo } from '../artifacts/types';
7
+ import { Redactor } from '../redaction';
8
+ import { executeCase } from './executor';
9
+ import type { RunOptions, RunResult } from './types';
10
+
11
+ /**
12
+ * Run a test scenario
13
+ */
14
+ export async function runScenario(options: RunOptions): Promise<RunResult> {
15
+ const {
16
+ scenario,
17
+ client,
18
+ project = process.env.ARTEMIS_PROJECT || 'default',
19
+ resolvedConfig,
20
+ tags,
21
+ concurrency = 1,
22
+ timeout,
23
+ retries,
24
+ redaction,
25
+ onCaseComplete,
26
+ onProgress,
27
+ } = options;
28
+
29
+ // Filter cases by tags if specified
30
+ let cases = scenario.cases;
31
+ if (tags && tags.length > 0) {
32
+ cases = cases.filter((c) => tags.some((tag) => c.tags.includes(tag)));
33
+ onProgress?.(`Filtered to ${cases.length} cases by tags: ${tags.join(', ')}`);
34
+ }
35
+
36
+ if (cases.length === 0) {
37
+ throw new Error('No test cases to run after filtering');
38
+ }
39
+
40
+ onProgress?.(`Running ${cases.length} test cases...`);
41
+
42
+ const startTime = new Date();
43
+ const results: CaseResult[] = [];
44
+
45
+ if (concurrency === 1) {
46
+ // Sequential execution
47
+ for (let i = 0; i < cases.length; i++) {
48
+ const testCase = cases[i];
49
+ const result = await executeCase(testCase, {
50
+ client,
51
+ scenario,
52
+ timeout: testCase.timeout || timeout,
53
+ retries: testCase.retries ?? retries,
54
+ redaction,
55
+ });
56
+ results.push(result);
57
+ onCaseComplete?.(result, i, cases.length);
58
+ }
59
+ } else {
60
+ // Concurrent execution with limited parallelism
61
+ const chunks = chunkArray(cases, concurrency);
62
+ let completed = 0;
63
+
64
+ for (const chunk of chunks) {
65
+ const chunkResults = await Promise.all(
66
+ chunk.map(async (testCase) => {
67
+ const result = await executeCase(testCase, {
68
+ client,
69
+ scenario,
70
+ timeout: testCase.timeout || timeout,
71
+ retries: testCase.retries ?? retries,
72
+ redaction,
73
+ });
74
+ completed++;
75
+ onCaseComplete?.(result, completed - 1, cases.length);
76
+ return result;
77
+ })
78
+ );
79
+ results.push(...chunkResults);
80
+ }
81
+ }
82
+
83
+ const endTime = new Date();
84
+
85
+ // Calculate redaction metadata if any redaction occurred
86
+ let redactionInfo: ManifestRedactionInfo | undefined;
87
+ const effectiveRedaction = redaction ?? scenario.redaction;
88
+
89
+ if (effectiveRedaction?.enabled) {
90
+ const redactor = new Redactor(effectiveRedaction);
91
+ const promptsRedacted = results.filter((r) => r.redaction?.promptRedacted).length;
92
+ const responsesRedacted = results.filter((r) => r.redaction?.responseRedacted).length;
93
+ const totalRedactions = results.reduce((sum, r) => sum + (r.redaction?.redactionCount ?? 0), 0);
94
+
95
+ redactionInfo = {
96
+ enabled: true,
97
+ patternsUsed: redactor.patternNames,
98
+ replacement: redactor.replacement,
99
+ summary: {
100
+ promptsRedacted,
101
+ responsesRedacted,
102
+ totalRedactions,
103
+ },
104
+ };
105
+ }
106
+
107
+ // Create manifest
108
+ const manifest = createRunManifest({
109
+ project,
110
+ config: {
111
+ scenario: scenario.name,
112
+ provider: client.provider,
113
+ model: resolvedConfig?.model || scenario.model,
114
+ temperature: resolvedConfig?.temperature ?? scenario.temperature,
115
+ seed: scenario.seed,
116
+ },
117
+ resolvedConfig,
118
+ cases: results,
119
+ startTime,
120
+ endTime,
121
+ redaction: redactionInfo,
122
+ });
123
+
124
+ const success = manifest.metrics.failed_cases === 0;
125
+
126
+ return {
127
+ manifest,
128
+ cases: results,
129
+ success,
130
+ };
131
+ }
132
+
133
+ /**
134
+ * Run multiple scenarios
135
+ */
136
+ export async function runScenarios(optionsList: RunOptions[]): Promise<RunResult[]> {
137
+ const results: RunResult[] = [];
138
+
139
+ for (const options of optionsList) {
140
+ const result = await runScenario(options);
141
+ results.push(result);
142
+ }
143
+
144
+ return results;
145
+ }
146
+
147
+ function chunkArray<T>(array: T[], size: number): T[][] {
148
+ const chunks: T[][] = [];
149
+ for (let i = 0; i < array.length; i += size) {
150
+ chunks.push(array.slice(i, i + size));
151
+ }
152
+ return chunks;
153
+ }
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Runner types and interfaces
3
+ */
4
+
5
+ import type { ModelClient } from '../adapters/types';
6
+ import type { CaseResult, ResolvedConfig, RunManifest } from '../artifacts/types';
7
+ import type { RedactionConfig } from '../redaction/types';
8
+ import type { Scenario } from '../scenario/schema';
9
+
10
+ /**
11
+ * Options for running a scenario
12
+ */
13
+ export interface RunOptions {
14
+ /** The scenario to run */
15
+ scenario: Scenario;
16
+ /** Model client to use */
17
+ client: ModelClient;
18
+ /** Project name for the manifest */
19
+ project?: string;
20
+ /** Resolved configuration with source tracking */
21
+ resolvedConfig?: ResolvedConfig;
22
+ /** Filter cases by tags */
23
+ tags?: string[];
24
+ /** Number of concurrent requests */
25
+ concurrency?: number;
26
+ /** Timeout per case in milliseconds */
27
+ timeout?: number;
28
+ /** Number of retries per case */
29
+ retries?: number;
30
+ /** Redaction configuration (CLI overrides scenario) */
31
+ redaction?: RedactionConfig;
32
+ /** Callback for each case result */
33
+ onCaseComplete?: (result: CaseResult, index: number, total: number) => void;
34
+ /** Callback for progress updates */
35
+ onProgress?: (message: string) => void;
36
+ }
37
+
38
+ /**
39
+ * Result of a scenario run
40
+ */
41
+ export interface RunResult {
42
+ /** The generated manifest */
43
+ manifest: RunManifest;
44
+ /** Individual case results */
45
+ cases: CaseResult[];
46
+ /** Whether all cases passed */
47
+ success: boolean;
48
+ }
49
+
50
+ /**
51
+ * Context passed to case executor
52
+ */
53
+ export interface ExecutorContext {
54
+ client: ModelClient;
55
+ scenario: Scenario;
56
+ timeout?: number;
57
+ retries?: number;
58
+ /** Redaction configuration for this execution */
59
+ redaction?: RedactionConfig;
60
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Scenario module exports
3
+ */
4
+
5
+ export * from './schema';
6
+ export * from './parser';
7
+ export * from './variables';
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Tests for scenario parser
3
+ */
4
+
5
+ import { describe, expect, test } from 'bun:test';
6
+ import { parseScenarioString, validateScenario } from './parser';
7
+
8
+ describe('parseScenarioString', () => {
9
+ test('parses valid YAML scenario', () => {
10
+ const yaml = `
11
+ name: Test Scenario
12
+ description: A test scenario
13
+ version: "1.0"
14
+ provider: openai
15
+ model: gpt-4
16
+ cases:
17
+ - id: test1
18
+ prompt: "Hello"
19
+ expected:
20
+ type: exact
21
+ value: "Hi"
22
+ `;
23
+ const scenario = parseScenarioString(yaml);
24
+ expect(scenario.name).toBe('Test Scenario');
25
+ expect(scenario.cases.length).toBe(1);
26
+ expect(scenario.cases[0].id).toBe('test1');
27
+ });
28
+
29
+ test('throws on invalid YAML', () => {
30
+ const yaml = `
31
+ name: Test
32
+ cases: not an array
33
+ `;
34
+ expect(() => parseScenarioString(yaml)).toThrow();
35
+ });
36
+
37
+ test('throws on missing required fields', () => {
38
+ const yaml = `
39
+ description: Missing name and cases
40
+ `;
41
+ expect(() => parseScenarioString(yaml)).toThrow();
42
+ });
43
+
44
+ test('parses different expected types', () => {
45
+ const yaml = `
46
+ name: Multi-matcher Test
47
+ cases:
48
+ - id: regex-test
49
+ prompt: "Test"
50
+ expected:
51
+ type: regex
52
+ pattern: "^\\\\w+$"
53
+ - id: fuzzy-test
54
+ prompt: "Test"
55
+ expected:
56
+ type: fuzzy
57
+ value: "test"
58
+ threshold: 0.8
59
+ - id: contains-test
60
+ prompt: "Test"
61
+ expected:
62
+ type: contains
63
+ values:
64
+ - foo
65
+ - bar
66
+ mode: all
67
+ `;
68
+ const scenario = parseScenarioString(yaml);
69
+ expect(scenario.cases.length).toBe(3);
70
+ expect(scenario.cases[0].expected.type).toBe('regex');
71
+ expect(scenario.cases[1].expected.type).toBe('fuzzy');
72
+ expect(scenario.cases[2].expected.type).toBe('contains');
73
+ });
74
+ });
75
+
76
+ describe('validateScenario', () => {
77
+ test('validates correct scenario object', () => {
78
+ const scenario = {
79
+ name: 'Test',
80
+ cases: [
81
+ {
82
+ id: 'test1',
83
+ prompt: 'Hello',
84
+ expected: { type: 'exact', value: 'Hi' },
85
+ },
86
+ ],
87
+ };
88
+ const validated = validateScenario(scenario);
89
+ expect(validated.name).toBe('Test');
90
+ });
91
+
92
+ test('throws on invalid scenario', () => {
93
+ const scenario = {
94
+ name: 'Test',
95
+ cases: [],
96
+ };
97
+ expect(() => validateScenario(scenario)).toThrow();
98
+ });
99
+ });
@@ -0,0 +1,108 @@
1
+ /**
2
+ * Scenario parser for YAML files
3
+ */
4
+
5
+ import { readFile } from 'node:fs/promises';
6
+ import { parse as parseYaml } from 'yaml';
7
+ import { ArtemisError } from '../utils/errors';
8
+ import { type Scenario, ScenarioSchema } from './schema';
9
+
10
+ /**
11
+ * Expand environment variables in config values
12
+ * Supports ${VAR} and ${VAR:-default} syntax
13
+ */
14
+ function expandEnvVars(obj: unknown): unknown {
15
+ if (typeof obj === 'string') {
16
+ return obj.replace(/\$\{([^}]+)\}/g, (_, expr) => {
17
+ const [varName, defaultValue] = expr.split(':-');
18
+ return process.env[varName] || defaultValue || '';
19
+ });
20
+ }
21
+
22
+ if (Array.isArray(obj)) {
23
+ return obj.map(expandEnvVars);
24
+ }
25
+
26
+ if (obj && typeof obj === 'object') {
27
+ const result: Record<string, unknown> = {};
28
+ for (const [key, value] of Object.entries(obj)) {
29
+ result[key] = expandEnvVars(value);
30
+ }
31
+ return result;
32
+ }
33
+
34
+ return obj;
35
+ }
36
+
37
+ /**
38
+ * Parse a scenario from a YAML file
39
+ */
40
+ export async function parseScenarioFile(filePath: string): Promise<Scenario> {
41
+ try {
42
+ const content = await readFile(filePath, 'utf-8');
43
+ return parseScenarioString(content, filePath);
44
+ } catch (error) {
45
+ if (error instanceof ArtemisError) {
46
+ throw error;
47
+ }
48
+ throw new ArtemisError(`Failed to read scenario file: ${filePath}`, 'SCENARIO_READ_ERROR', {
49
+ cause: error as Error,
50
+ });
51
+ }
52
+ }
53
+
54
+ /**
55
+ * Parse a scenario from a YAML string
56
+ */
57
+ export function parseScenarioString(content: string, source?: string): Scenario {
58
+ try {
59
+ const raw = parseYaml(content);
60
+
61
+ // Expand environment variables before validation
62
+ const expanded = expandEnvVars(raw);
63
+
64
+ const result = ScenarioSchema.safeParse(expanded);
65
+
66
+ if (!result.success) {
67
+ const issues = result.error.issues
68
+ .map((i) => ` - ${i.path.join('.')}: ${i.message}`)
69
+ .join('\n');
70
+
71
+ throw new ArtemisError(
72
+ `Invalid scenario${source ? ` in ${source}` : ''}:\n${issues}`,
73
+ 'SCENARIO_VALIDATION_ERROR',
74
+ { zodError: result.error }
75
+ );
76
+ }
77
+
78
+ return result.data;
79
+ } catch (error) {
80
+ if (error instanceof ArtemisError) {
81
+ throw error;
82
+ }
83
+ throw new ArtemisError(
84
+ `Failed to parse scenario YAML${source ? ` from ${source}` : ''}`,
85
+ 'SCENARIO_PARSE_ERROR',
86
+ { cause: error as Error }
87
+ );
88
+ }
89
+ }
90
+
91
+ /**
92
+ * Validate a scenario object
93
+ */
94
+ export function validateScenario(scenario: unknown): Scenario {
95
+ const result = ScenarioSchema.safeParse(scenario);
96
+
97
+ if (!result.success) {
98
+ const issues = result.error.issues
99
+ .map((i) => ` - ${i.path.join('.')}: ${i.message}`)
100
+ .join('\n');
101
+
102
+ throw new ArtemisError(`Invalid scenario:\n${issues}`, 'SCENARIO_VALIDATION_ERROR', {
103
+ zodError: result.error,
104
+ });
105
+ }
106
+
107
+ return result.data;
108
+ }