@artemiskit/core 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/CHANGELOG.md +48 -0
  2. package/dist/adapters/factory.d.ts +23 -0
  3. package/dist/adapters/factory.d.ts.map +1 -0
  4. package/dist/adapters/index.d.ts +7 -0
  5. package/dist/adapters/index.d.ts.map +1 -0
  6. package/dist/adapters/registry.d.ts +56 -0
  7. package/dist/adapters/registry.d.ts.map +1 -0
  8. package/dist/adapters/types.d.ts +151 -0
  9. package/dist/adapters/types.d.ts.map +1 -0
  10. package/dist/artifacts/index.d.ts +6 -0
  11. package/dist/artifacts/index.d.ts.map +1 -0
  12. package/dist/artifacts/manifest.d.ts +19 -0
  13. package/dist/artifacts/manifest.d.ts.map +1 -0
  14. package/dist/artifacts/types.d.ts +368 -0
  15. package/dist/artifacts/types.d.ts.map +1 -0
  16. package/dist/evaluators/contains.d.ts +10 -0
  17. package/dist/evaluators/contains.d.ts.map +1 -0
  18. package/dist/evaluators/exact.d.ts +10 -0
  19. package/dist/evaluators/exact.d.ts.map +1 -0
  20. package/dist/evaluators/fuzzy.d.ts +10 -0
  21. package/dist/evaluators/fuzzy.d.ts.map +1 -0
  22. package/dist/evaluators/index.d.ts +24 -0
  23. package/dist/evaluators/index.d.ts.map +1 -0
  24. package/dist/evaluators/json-schema.d.ts +11 -0
  25. package/dist/evaluators/json-schema.d.ts.map +1 -0
  26. package/dist/evaluators/llm-grader.d.ts +11 -0
  27. package/dist/evaluators/llm-grader.d.ts.map +1 -0
  28. package/dist/evaluators/regex.d.ts +10 -0
  29. package/dist/evaluators/regex.d.ts.map +1 -0
  30. package/dist/evaluators/types.d.ts +29 -0
  31. package/dist/evaluators/types.d.ts.map +1 -0
  32. package/dist/index.d.ts +14 -0
  33. package/dist/index.d.ts.map +1 -0
  34. package/dist/index.js +26021 -0
  35. package/dist/provenance/environment.d.ts +12 -0
  36. package/dist/provenance/environment.d.ts.map +1 -0
  37. package/dist/provenance/git.d.ts +9 -0
  38. package/dist/provenance/git.d.ts.map +1 -0
  39. package/dist/provenance/index.d.ts +6 -0
  40. package/dist/provenance/index.d.ts.map +1 -0
  41. package/dist/redaction/index.d.ts +3 -0
  42. package/dist/redaction/index.d.ts.map +1 -0
  43. package/dist/redaction/redactor.d.ts +79 -0
  44. package/dist/redaction/redactor.d.ts.map +1 -0
  45. package/dist/redaction/types.d.ts +120 -0
  46. package/dist/redaction/types.d.ts.map +1 -0
  47. package/dist/runner/executor.d.ts +11 -0
  48. package/dist/runner/executor.d.ts.map +1 -0
  49. package/dist/runner/index.d.ts +7 -0
  50. package/dist/runner/index.d.ts.map +1 -0
  51. package/dist/runner/runner.d.ts +13 -0
  52. package/dist/runner/runner.d.ts.map +1 -0
  53. package/dist/runner/types.d.ts +57 -0
  54. package/dist/runner/types.d.ts.map +1 -0
  55. package/dist/scenario/index.d.ts +7 -0
  56. package/dist/scenario/index.d.ts.map +1 -0
  57. package/dist/scenario/parser.d.ts +17 -0
  58. package/dist/scenario/parser.d.ts.map +1 -0
  59. package/dist/scenario/schema.d.ts +945 -0
  60. package/dist/scenario/schema.d.ts.map +1 -0
  61. package/dist/scenario/variables.d.ts +19 -0
  62. package/dist/scenario/variables.d.ts.map +1 -0
  63. package/dist/storage/factory.d.ts +13 -0
  64. package/dist/storage/factory.d.ts.map +1 -0
  65. package/dist/storage/index.d.ts +8 -0
  66. package/dist/storage/index.d.ts.map +1 -0
  67. package/dist/storage/local.d.ts +20 -0
  68. package/dist/storage/local.d.ts.map +1 -0
  69. package/dist/storage/supabase.d.ts +21 -0
  70. package/dist/storage/supabase.d.ts.map +1 -0
  71. package/dist/storage/types.d.ts +86 -0
  72. package/dist/storage/types.d.ts.map +1 -0
  73. package/dist/utils/errors.d.ts +25 -0
  74. package/dist/utils/errors.d.ts.map +1 -0
  75. package/dist/utils/index.d.ts +6 -0
  76. package/dist/utils/index.d.ts.map +1 -0
  77. package/dist/utils/logger.d.ts +21 -0
  78. package/dist/utils/logger.d.ts.map +1 -0
  79. package/package.json +56 -0
  80. package/src/adapters/factory.ts +75 -0
  81. package/src/adapters/index.ts +7 -0
  82. package/src/adapters/registry.ts +143 -0
  83. package/src/adapters/types.ts +184 -0
  84. package/src/artifacts/index.ts +6 -0
  85. package/src/artifacts/manifest.test.ts +206 -0
  86. package/src/artifacts/manifest.ts +136 -0
  87. package/src/artifacts/types.ts +426 -0
  88. package/src/evaluators/contains.test.ts +58 -0
  89. package/src/evaluators/contains.ts +41 -0
  90. package/src/evaluators/exact.test.ts +48 -0
  91. package/src/evaluators/exact.ts +33 -0
  92. package/src/evaluators/fuzzy.test.ts +50 -0
  93. package/src/evaluators/fuzzy.ts +39 -0
  94. package/src/evaluators/index.ts +53 -0
  95. package/src/evaluators/json-schema.ts +98 -0
  96. package/src/evaluators/llm-grader.ts +100 -0
  97. package/src/evaluators/regex.test.ts +73 -0
  98. package/src/evaluators/regex.ts +43 -0
  99. package/src/evaluators/types.ts +37 -0
  100. package/src/index.ts +31 -0
  101. package/src/provenance/environment.ts +18 -0
  102. package/src/provenance/git.ts +48 -0
  103. package/src/provenance/index.ts +6 -0
  104. package/src/redaction/index.ts +23 -0
  105. package/src/redaction/redactor.test.ts +258 -0
  106. package/src/redaction/redactor.ts +246 -0
  107. package/src/redaction/types.ts +135 -0
  108. package/src/runner/executor.ts +251 -0
  109. package/src/runner/index.ts +7 -0
  110. package/src/runner/runner.ts +153 -0
  111. package/src/runner/types.ts +60 -0
  112. package/src/scenario/index.ts +7 -0
  113. package/src/scenario/parser.test.ts +99 -0
  114. package/src/scenario/parser.ts +108 -0
  115. package/src/scenario/schema.ts +176 -0
  116. package/src/scenario/variables.test.ts +150 -0
  117. package/src/scenario/variables.ts +60 -0
  118. package/src/storage/factory.ts +52 -0
  119. package/src/storage/index.ts +8 -0
  120. package/src/storage/local.test.ts +165 -0
  121. package/src/storage/local.ts +194 -0
  122. package/src/storage/supabase.ts +151 -0
  123. package/src/storage/types.ts +98 -0
  124. package/src/utils/errors.ts +76 -0
  125. package/src/utils/index.ts +6 -0
  126. package/src/utils/logger.ts +59 -0
  127. package/tsconfig.json +13 -0
@@ -0,0 +1,176 @@
1
+ /**
2
+ * Scenario schema definitions using Zod
3
+ */
4
+
5
+ import { z } from 'zod';
6
+ import { RedactionConfigSchema as BaseRedactionConfigSchema } from '../redaction/types';
7
+
8
+ /**
9
+ * Provider schema - supports all providers
10
+ */
11
+ export const ProviderSchema = z.enum([
12
+ 'openai',
13
+ 'azure-openai',
14
+ 'vercel-ai',
15
+ 'anthropic',
16
+ 'google',
17
+ 'mistral',
18
+ 'cohere',
19
+ 'huggingface',
20
+ 'ollama',
21
+ 'custom',
22
+ ]);
23
+
24
+ /**
25
+ * Provider config schema - optional overrides for provider settings
26
+ * Supports ${ENV_VAR} and ${ENV_VAR:-default} syntax for values
27
+ * All fields are optional - only specified fields override defaults
28
+ */
29
+ export const ProviderConfigSchema = z
30
+ .object({
31
+ // Common fields
32
+ apiKey: z.string().optional(),
33
+ baseUrl: z.string().optional(),
34
+ defaultModel: z.string().optional(),
35
+ timeout: z.number().optional(),
36
+ maxRetries: z.number().optional(),
37
+
38
+ // OpenAI specific
39
+ organization: z.string().optional(),
40
+
41
+ // Azure OpenAI specific
42
+ resourceName: z.string().optional(),
43
+ deploymentName: z.string().optional(),
44
+ apiVersion: z.string().optional(),
45
+
46
+ // Vercel AI specific
47
+ underlyingProvider: z.enum(['openai', 'azure', 'anthropic', 'google', 'mistral']).optional(),
48
+ })
49
+ .optional();
50
+
51
+ /**
52
+ * Expected result types - how to evaluate responses
53
+ */
54
+ export const ExpectedSchema = z.discriminatedUnion('type', [
55
+ z.object({
56
+ type: z.literal('exact'),
57
+ value: z.string(),
58
+ caseSensitive: z.boolean().optional().default(true),
59
+ }),
60
+
61
+ z.object({
62
+ type: z.literal('regex'),
63
+ pattern: z.string(),
64
+ flags: z.string().optional(),
65
+ }),
66
+
67
+ z.object({
68
+ type: z.literal('fuzzy'),
69
+ value: z.string(),
70
+ threshold: z.number().min(0).max(1).default(0.8),
71
+ }),
72
+
73
+ z.object({
74
+ type: z.literal('llm_grader'),
75
+ rubric: z.string(),
76
+ model: z.string().optional(),
77
+ provider: ProviderSchema.optional(),
78
+ threshold: z.number().min(0).max(1).default(0.7),
79
+ }),
80
+
81
+ z.object({
82
+ type: z.literal('contains'),
83
+ values: z.array(z.string()),
84
+ mode: z.enum(['all', 'any']).default('all'),
85
+ }),
86
+
87
+ z.object({
88
+ type: z.literal('json_schema'),
89
+ schema: z.record(z.unknown()),
90
+ }),
91
+
92
+ z.object({
93
+ type: z.literal('custom'),
94
+ evaluator: z.string(),
95
+ config: z.record(z.unknown()).optional(),
96
+ }),
97
+ ]);
98
+
99
+ /**
100
+ * Chat message schema
101
+ */
102
+ export const ChatMessageSchema = z.object({
103
+ role: z.enum(['system', 'user', 'assistant']),
104
+ content: z.string(),
105
+ });
106
+
107
+ /**
108
+ * Variables schema - key-value pairs for template substitution
109
+ */
110
+ export const VariablesSchema = z.record(z.string(), z.union([z.string(), z.number(), z.boolean()]));
111
+
112
+ /**
113
+ * Redaction configuration schema for scenario-level settings
114
+ * Re-exported from redaction module, made optional for scenario context
115
+ */
116
+ const RedactionConfigSchema = BaseRedactionConfigSchema.optional();
117
+
118
+ /**
119
+ * Test case schema
120
+ */
121
+ export const TestCaseSchema = z.object({
122
+ id: z.string(),
123
+ name: z.string().optional(),
124
+ description: z.string().optional(),
125
+ prompt: z.union([z.string(), z.array(ChatMessageSchema)]),
126
+ expected: ExpectedSchema,
127
+ tags: z.array(z.string()).optional().default([]),
128
+ metadata: z.record(z.unknown()).optional().default({}),
129
+ timeout: z.number().optional(),
130
+ retries: z.number().optional().default(0),
131
+ provider: ProviderSchema.optional(),
132
+ model: z.string().optional(),
133
+ variables: VariablesSchema.optional(),
134
+ /** Case-level redaction config (overrides scenario-level) */
135
+ redaction: RedactionConfigSchema,
136
+ });
137
+
138
+ /**
139
+ * Scenario schema - a collection of test cases
140
+ */
141
+ export const ScenarioSchema = z.object({
142
+ name: z.string(),
143
+ description: z.string().optional(),
144
+ version: z.string().optional().default('1.0'),
145
+ provider: ProviderSchema.optional(),
146
+ model: z.string().optional(),
147
+ providerConfig: ProviderConfigSchema,
148
+ seed: z.number().optional(),
149
+ temperature: z.number().min(0).max(2).optional(),
150
+ maxTokens: z.number().optional(),
151
+ tags: z.array(z.string()).optional().default([]),
152
+ variables: VariablesSchema.optional(),
153
+ /** Scenario-level redaction configuration */
154
+ redaction: RedactionConfigSchema,
155
+ setup: z
156
+ .object({
157
+ systemPrompt: z.string().optional(),
158
+ functions: z.array(z.unknown()).optional(),
159
+ })
160
+ .optional(),
161
+ cases: z.array(TestCaseSchema).min(1),
162
+ teardown: z
163
+ .object({
164
+ cleanup: z.boolean().optional(),
165
+ })
166
+ .optional(),
167
+ });
168
+
169
+ export type Expected = z.infer<typeof ExpectedSchema>;
170
+ export type TestCase = z.infer<typeof TestCaseSchema>;
171
+ export type Scenario = z.infer<typeof ScenarioSchema>;
172
+ export type Provider = z.infer<typeof ProviderSchema>;
173
+ export type ProviderConfig = z.infer<typeof ProviderConfigSchema>;
174
+ export type ChatMessageType = z.infer<typeof ChatMessageSchema>;
175
+ export type Variables = z.infer<typeof VariablesSchema>;
176
+ export type ScenarioRedactionConfig = z.infer<typeof RedactionConfigSchema>;
@@ -0,0 +1,150 @@
1
+ /**
2
+ * Tests for variable substitution
3
+ */
4
+
5
+ import { describe, expect, test } from 'bun:test';
6
+ import { mergeVariables, substituteString, substituteVariables } from './variables';
7
+
8
+ describe('substituteString', () => {
9
+ test('substitutes single variable', () => {
10
+ const result = substituteString('Hello {{name}}!', { name: 'World' });
11
+ expect(result).toBe('Hello World!');
12
+ });
13
+
14
+ test('substitutes multiple variables', () => {
15
+ const result = substituteString('{{greeting}} {{name}}!', {
16
+ greeting: 'Hello',
17
+ name: 'World',
18
+ });
19
+ expect(result).toBe('Hello World!');
20
+ });
21
+
22
+ test('substitutes same variable multiple times', () => {
23
+ const result = substituteString('{{x}} + {{x}} = 2{{x}}', { x: '1' });
24
+ expect(result).toBe('1 + 1 = 21');
25
+ });
26
+
27
+ test('leaves unmatched variables as-is', () => {
28
+ const result = substituteString('Hello {{name}} and {{unknown}}!', { name: 'World' });
29
+ expect(result).toBe('Hello World and {{unknown}}!');
30
+ });
31
+
32
+ test('handles number values', () => {
33
+ const result = substituteString('Count: {{count}}', { count: 42 });
34
+ expect(result).toBe('Count: 42');
35
+ });
36
+
37
+ test('handles boolean values', () => {
38
+ const result = substituteString('Enabled: {{enabled}}', { enabled: true });
39
+ expect(result).toBe('Enabled: true');
40
+ });
41
+
42
+ test('handles empty variables object', () => {
43
+ const result = substituteString('Hello {{name}}!', {});
44
+ expect(result).toBe('Hello {{name}}!');
45
+ });
46
+
47
+ test('handles string without variables', () => {
48
+ const result = substituteString('Hello World!', { name: 'Test' });
49
+ expect(result).toBe('Hello World!');
50
+ });
51
+ });
52
+
53
+ describe('substituteVariables', () => {
54
+ test('substitutes in simple object', () => {
55
+ const result = substituteVariables({ message: 'Hello {{name}}!' }, { name: 'World' });
56
+ expect(result).toEqual({ message: 'Hello World!' });
57
+ });
58
+
59
+ test('substitutes in nested object', () => {
60
+ const result = substituteVariables(
61
+ {
62
+ outer: {
63
+ inner: 'Value is {{value}}',
64
+ },
65
+ },
66
+ { value: '42' }
67
+ );
68
+ expect(result).toEqual({
69
+ outer: {
70
+ inner: 'Value is 42',
71
+ },
72
+ });
73
+ });
74
+
75
+ test('substitutes in arrays', () => {
76
+ const result = substituteVariables(['Hello {{name}}', 'Goodbye {{name}}'], { name: 'World' });
77
+ expect(result).toEqual(['Hello World', 'Goodbye World']);
78
+ });
79
+
80
+ test('substitutes in array of objects', () => {
81
+ const result = substituteVariables(
82
+ [
83
+ { role: 'user', content: 'My name is {{name}}' },
84
+ { role: 'assistant', content: 'Hello {{name}}!' },
85
+ ],
86
+ { name: 'Alice' }
87
+ );
88
+ expect(result).toEqual([
89
+ { role: 'user', content: 'My name is Alice' },
90
+ { role: 'assistant', content: 'Hello Alice!' },
91
+ ]);
92
+ });
93
+
94
+ test('preserves non-string values', () => {
95
+ const result = substituteVariables(
96
+ {
97
+ name: '{{product}}',
98
+ count: 42,
99
+ enabled: true,
100
+ items: [1, 2, 3],
101
+ },
102
+ { product: 'Widget' }
103
+ );
104
+ expect(result).toEqual({
105
+ name: 'Widget',
106
+ count: 42,
107
+ enabled: true,
108
+ items: [1, 2, 3],
109
+ });
110
+ });
111
+
112
+ test('handles empty variables', () => {
113
+ const obj = { message: 'Hello {{name}}!' };
114
+ const result = substituteVariables(obj, {});
115
+ expect(result).toEqual({ message: 'Hello {{name}}!' });
116
+ });
117
+
118
+ test('returns primitive values unchanged', () => {
119
+ expect(substituteVariables(42, { x: '1' })).toBe(42);
120
+ expect(substituteVariables(true, { x: '1' })).toBe(true);
121
+ expect(substituteVariables(null, { x: '1' })).toBe(null);
122
+ });
123
+ });
124
+
125
+ describe('mergeVariables', () => {
126
+ test('merges scenario and case variables', () => {
127
+ const result = mergeVariables({ a: '1', b: '2' }, { c: '3' });
128
+ expect(result).toEqual({ a: '1', b: '2', c: '3' });
129
+ });
130
+
131
+ test('case variables override scenario variables', () => {
132
+ const result = mergeVariables({ name: 'Scenario', value: 'original' }, { name: 'Case' });
133
+ expect(result).toEqual({ name: 'Case', value: 'original' });
134
+ });
135
+
136
+ test('handles undefined scenario variables', () => {
137
+ const result = mergeVariables(undefined, { name: 'Case' });
138
+ expect(result).toEqual({ name: 'Case' });
139
+ });
140
+
141
+ test('handles undefined case variables', () => {
142
+ const result = mergeVariables({ name: 'Scenario' }, undefined);
143
+ expect(result).toEqual({ name: 'Scenario' });
144
+ });
145
+
146
+ test('handles both undefined', () => {
147
+ const result = mergeVariables(undefined, undefined);
148
+ expect(result).toEqual({});
149
+ });
150
+ });
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Variable substitution for scenario templates
3
+ * Supports {{variable}} syntax in strings
4
+ */
5
+
6
+ import type { Variables } from './schema';
7
+
8
+ /**
9
+ * Substitute variables in a string using {{variable}} syntax
10
+ */
11
+ export function substituteString(str: string, variables: Variables): string {
12
+ return str.replace(/\{\{(\w+)\}\}/g, (match, varName) => {
13
+ if (varName in variables) {
14
+ return String(variables[varName]);
15
+ }
16
+ // Leave unmatched variables as-is
17
+ return match;
18
+ });
19
+ }
20
+
21
+ /**
22
+ * Recursively substitute variables in an object
23
+ * Only substitutes in string values, preserving other types
24
+ */
25
+ export function substituteVariables<T>(obj: T, variables: Variables): T {
26
+ if (!variables || Object.keys(variables).length === 0) {
27
+ return obj;
28
+ }
29
+
30
+ if (typeof obj === 'string') {
31
+ return substituteString(obj, variables) as T;
32
+ }
33
+
34
+ if (Array.isArray(obj)) {
35
+ return obj.map((item) => substituteVariables(item, variables)) as T;
36
+ }
37
+
38
+ if (obj && typeof obj === 'object') {
39
+ const result: Record<string, unknown> = {};
40
+ for (const [key, value] of Object.entries(obj)) {
41
+ result[key] = substituteVariables(value, variables);
42
+ }
43
+ return result as T;
44
+ }
45
+
46
+ return obj;
47
+ }
48
+
49
+ /**
50
+ * Merge variables with case-level overriding scenario-level
51
+ */
52
+ export function mergeVariables(
53
+ scenarioVars: Variables | undefined,
54
+ caseVars: Variables | undefined
55
+ ): Variables {
56
+ return {
57
+ ...scenarioVars,
58
+ ...caseVars,
59
+ };
60
+ }
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Storage factory for creating storage adapters
3
+ */
4
+
5
+ import { ArtemisError } from '../utils/errors';
6
+ import { LocalStorageAdapter } from './local';
7
+ import { SupabaseStorageAdapter } from './supabase';
8
+ import type { StorageAdapter, StorageConfig } from './types';
9
+
10
+ /**
11
+ * Create a storage adapter from configuration
12
+ */
13
+ export function createStorageAdapter(config: StorageConfig): StorageAdapter {
14
+ switch (config.type) {
15
+ case 'supabase':
16
+ if (!config.url || !config.anonKey) {
17
+ throw new ArtemisError(
18
+ 'Supabase storage requires url and anonKey configuration',
19
+ 'CONFIG_ERROR'
20
+ );
21
+ }
22
+ return new SupabaseStorageAdapter({
23
+ url: config.url,
24
+ anonKey: config.anonKey,
25
+ bucket: config.bucket,
26
+ });
27
+
28
+ case 'local':
29
+ return new LocalStorageAdapter(config.basePath);
30
+
31
+ default:
32
+ throw new ArtemisError(`Unknown storage type: ${config.type}`, 'CONFIG_ERROR');
33
+ }
34
+ }
35
+
36
+ /**
37
+ * Create storage adapter from environment variables
38
+ */
39
+ export function createStorageFromEnv(): StorageAdapter {
40
+ const supabaseUrl = process.env.SUPABASE_URL;
41
+ const supabaseKey = process.env.SUPABASE_ANON_KEY;
42
+
43
+ if (supabaseUrl && supabaseKey) {
44
+ return new SupabaseStorageAdapter({
45
+ url: supabaseUrl,
46
+ anonKey: supabaseKey,
47
+ bucket: process.env.SUPABASE_BUCKET,
48
+ });
49
+ }
50
+
51
+ return new LocalStorageAdapter(process.env.ARTEMIS_STORAGE_PATH || './artemis-runs');
52
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Storage module exports
3
+ */
4
+
5
+ export * from './types';
6
+ export { createStorageAdapter, createStorageFromEnv } from './factory';
7
+ export { SupabaseStorageAdapter, type SupabaseStorageConfig } from './supabase';
8
+ export { LocalStorageAdapter } from './local';
@@ -0,0 +1,165 @@
1
+ /**
2
+ * Tests for LocalStorageAdapter
3
+ */
4
+
5
+ import { afterAll, beforeAll, describe, expect, test } from 'bun:test';
6
+ import { mkdir, rm } from 'node:fs/promises';
7
+ import type { RunManifest } from '../artifacts/types';
8
+ import { LocalStorageAdapter } from './local';
9
+
10
+ const TEST_DIR = './test-artemis-runs';
11
+
12
+ describe('LocalStorageAdapter', () => {
13
+ let storage: LocalStorageAdapter;
14
+
15
+ const mockManifest: RunManifest = {
16
+ version: '1.0',
17
+ run_id: 'test-run-123',
18
+ project: 'test-project',
19
+ start_time: '2024-01-01T00:00:00.000Z',
20
+ end_time: '2024-01-01T00:01:00.000Z',
21
+ duration_ms: 60000,
22
+ config: {
23
+ scenario: 'test-scenario',
24
+ provider: 'openai',
25
+ model: 'gpt-4',
26
+ },
27
+ metrics: {
28
+ success_rate: 0.8,
29
+ total_cases: 10,
30
+ passed_cases: 8,
31
+ failed_cases: 2,
32
+ median_latency_ms: 150,
33
+ p95_latency_ms: 300,
34
+ total_tokens: 1000,
35
+ total_prompt_tokens: 600,
36
+ total_completion_tokens: 400,
37
+ },
38
+ cases: [],
39
+ environment: {
40
+ node_version: '20.0.0',
41
+ os: 'darwin',
42
+ arch: 'arm64',
43
+ },
44
+ provenance: {
45
+ run_by: 'test-user',
46
+ },
47
+ };
48
+
49
+ beforeAll(async () => {
50
+ storage = new LocalStorageAdapter(TEST_DIR);
51
+ await mkdir(TEST_DIR, { recursive: true });
52
+ });
53
+
54
+ afterAll(async () => {
55
+ await rm(TEST_DIR, { recursive: true, force: true });
56
+ });
57
+
58
+ test('saves manifest to filesystem', async () => {
59
+ const filePath = await storage.save(mockManifest);
60
+ expect(filePath).toContain('test-run-123.json');
61
+ });
62
+
63
+ test('loads manifest from filesystem', async () => {
64
+ // Save first
65
+ await storage.save(mockManifest);
66
+
67
+ // Load back
68
+ const loaded = await storage.load('test-run-123');
69
+ expect(loaded.run_id).toBe('test-run-123');
70
+ expect(loaded.project).toBe('test-project');
71
+ expect(loaded.config.scenario).toBe('test-scenario');
72
+ });
73
+
74
+ test('loadRun returns RunManifest', async () => {
75
+ await storage.save(mockManifest);
76
+ const loaded = await storage.loadRun('test-run-123');
77
+ expect(loaded.metrics.success_rate).toBe(0.8);
78
+ });
79
+
80
+ test('throws error for non-existent run', async () => {
81
+ await expect(storage.load('non-existent-run')).rejects.toThrow('Run not found');
82
+ });
83
+
84
+ test('lists runs', async () => {
85
+ // Save a manifest
86
+ await storage.save(mockManifest);
87
+
88
+ // List runs
89
+ const runs = await storage.list();
90
+ expect(runs.length).toBeGreaterThanOrEqual(1);
91
+
92
+ const testRun = runs.find((r) => r.runId === 'test-run-123');
93
+ expect(testRun).toBeDefined();
94
+ expect(testRun?.scenario).toBe('test-scenario');
95
+ expect(testRun?.successRate).toBe(0.8);
96
+ });
97
+
98
+ test('lists runs with project filter', async () => {
99
+ await storage.save(mockManifest);
100
+
101
+ const runs = await storage.list({ project: 'test-project' });
102
+ expect(runs.every((r) => r.runId === 'test-run-123' || true)).toBe(true);
103
+ });
104
+
105
+ test('lists runs with scenario filter', async () => {
106
+ await storage.save(mockManifest);
107
+
108
+ const runs = await storage.list({ scenario: 'test-scenario' });
109
+ expect(runs.length).toBeGreaterThanOrEqual(1);
110
+ });
111
+
112
+ test('lists runs with limit', async () => {
113
+ await storage.save(mockManifest);
114
+ await storage.save({ ...mockManifest, run_id: 'test-run-456' });
115
+
116
+ const runs = await storage.list({ limit: 1 });
117
+ expect(runs.length).toBe(1);
118
+ });
119
+
120
+ test('deletes run', async () => {
121
+ const manifest = { ...mockManifest, run_id: 'to-delete-123' };
122
+ await storage.save(manifest);
123
+
124
+ // Verify it exists
125
+ const loaded = await storage.load('to-delete-123');
126
+ expect(loaded.run_id).toBe('to-delete-123');
127
+
128
+ // Delete
129
+ await storage.delete('to-delete-123');
130
+
131
+ // Verify it's gone
132
+ await expect(storage.load('to-delete-123')).rejects.toThrow('Run not found');
133
+ });
134
+
135
+ test('compares two runs', async () => {
136
+ const baseline = { ...mockManifest, run_id: 'baseline-run' };
137
+ const current = {
138
+ ...mockManifest,
139
+ run_id: 'current-run',
140
+ metrics: {
141
+ ...mockManifest.metrics,
142
+ success_rate: 0.9,
143
+ median_latency_ms: 120,
144
+ total_tokens: 1100,
145
+ },
146
+ };
147
+
148
+ await storage.save(baseline);
149
+ await storage.save(current);
150
+
151
+ const comparison = await storage.compare('baseline-run', 'current-run');
152
+
153
+ expect(comparison.baseline.run_id).toBe('baseline-run');
154
+ expect(comparison.current.run_id).toBe('current-run');
155
+ expect(comparison.delta.successRate).toBeCloseTo(0.1, 2);
156
+ expect(comparison.delta.latency).toBe(-30);
157
+ expect(comparison.delta.tokens).toBe(100);
158
+ });
159
+
160
+ test('handles empty storage gracefully', async () => {
161
+ const emptyStorage = new LocalStorageAdapter('./empty-test-dir');
162
+ const runs = await emptyStorage.list();
163
+ expect(runs).toEqual([]);
164
+ });
165
+ });