@artemiskit/core 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +48 -0
- package/dist/adapters/factory.d.ts +23 -0
- package/dist/adapters/factory.d.ts.map +1 -0
- package/dist/adapters/index.d.ts +7 -0
- package/dist/adapters/index.d.ts.map +1 -0
- package/dist/adapters/registry.d.ts +56 -0
- package/dist/adapters/registry.d.ts.map +1 -0
- package/dist/adapters/types.d.ts +151 -0
- package/dist/adapters/types.d.ts.map +1 -0
- package/dist/artifacts/index.d.ts +6 -0
- package/dist/artifacts/index.d.ts.map +1 -0
- package/dist/artifacts/manifest.d.ts +19 -0
- package/dist/artifacts/manifest.d.ts.map +1 -0
- package/dist/artifacts/types.d.ts +368 -0
- package/dist/artifacts/types.d.ts.map +1 -0
- package/dist/evaluators/contains.d.ts +10 -0
- package/dist/evaluators/contains.d.ts.map +1 -0
- package/dist/evaluators/exact.d.ts +10 -0
- package/dist/evaluators/exact.d.ts.map +1 -0
- package/dist/evaluators/fuzzy.d.ts +10 -0
- package/dist/evaluators/fuzzy.d.ts.map +1 -0
- package/dist/evaluators/index.d.ts +24 -0
- package/dist/evaluators/index.d.ts.map +1 -0
- package/dist/evaluators/json-schema.d.ts +11 -0
- package/dist/evaluators/json-schema.d.ts.map +1 -0
- package/dist/evaluators/llm-grader.d.ts +11 -0
- package/dist/evaluators/llm-grader.d.ts.map +1 -0
- package/dist/evaluators/regex.d.ts +10 -0
- package/dist/evaluators/regex.d.ts.map +1 -0
- package/dist/evaluators/types.d.ts +29 -0
- package/dist/evaluators/types.d.ts.map +1 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +26021 -0
- package/dist/provenance/environment.d.ts +12 -0
- package/dist/provenance/environment.d.ts.map +1 -0
- package/dist/provenance/git.d.ts +9 -0
- package/dist/provenance/git.d.ts.map +1 -0
- package/dist/provenance/index.d.ts +6 -0
- package/dist/provenance/index.d.ts.map +1 -0
- package/dist/redaction/index.d.ts +3 -0
- package/dist/redaction/index.d.ts.map +1 -0
- package/dist/redaction/redactor.d.ts +79 -0
- package/dist/redaction/redactor.d.ts.map +1 -0
- package/dist/redaction/types.d.ts +120 -0
- package/dist/redaction/types.d.ts.map +1 -0
- package/dist/runner/executor.d.ts +11 -0
- package/dist/runner/executor.d.ts.map +1 -0
- package/dist/runner/index.d.ts +7 -0
- package/dist/runner/index.d.ts.map +1 -0
- package/dist/runner/runner.d.ts +13 -0
- package/dist/runner/runner.d.ts.map +1 -0
- package/dist/runner/types.d.ts +57 -0
- package/dist/runner/types.d.ts.map +1 -0
- package/dist/scenario/index.d.ts +7 -0
- package/dist/scenario/index.d.ts.map +1 -0
- package/dist/scenario/parser.d.ts +17 -0
- package/dist/scenario/parser.d.ts.map +1 -0
- package/dist/scenario/schema.d.ts +945 -0
- package/dist/scenario/schema.d.ts.map +1 -0
- package/dist/scenario/variables.d.ts +19 -0
- package/dist/scenario/variables.d.ts.map +1 -0
- package/dist/storage/factory.d.ts +13 -0
- package/dist/storage/factory.d.ts.map +1 -0
- package/dist/storage/index.d.ts +8 -0
- package/dist/storage/index.d.ts.map +1 -0
- package/dist/storage/local.d.ts +20 -0
- package/dist/storage/local.d.ts.map +1 -0
- package/dist/storage/supabase.d.ts +21 -0
- package/dist/storage/supabase.d.ts.map +1 -0
- package/dist/storage/types.d.ts +86 -0
- package/dist/storage/types.d.ts.map +1 -0
- package/dist/utils/errors.d.ts +25 -0
- package/dist/utils/errors.d.ts.map +1 -0
- package/dist/utils/index.d.ts +6 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/logger.d.ts +21 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/package.json +56 -0
- package/src/adapters/factory.ts +75 -0
- package/src/adapters/index.ts +7 -0
- package/src/adapters/registry.ts +143 -0
- package/src/adapters/types.ts +184 -0
- package/src/artifacts/index.ts +6 -0
- package/src/artifacts/manifest.test.ts +206 -0
- package/src/artifacts/manifest.ts +136 -0
- package/src/artifacts/types.ts +426 -0
- package/src/evaluators/contains.test.ts +58 -0
- package/src/evaluators/contains.ts +41 -0
- package/src/evaluators/exact.test.ts +48 -0
- package/src/evaluators/exact.ts +33 -0
- package/src/evaluators/fuzzy.test.ts +50 -0
- package/src/evaluators/fuzzy.ts +39 -0
- package/src/evaluators/index.ts +53 -0
- package/src/evaluators/json-schema.ts +98 -0
- package/src/evaluators/llm-grader.ts +100 -0
- package/src/evaluators/regex.test.ts +73 -0
- package/src/evaluators/regex.ts +43 -0
- package/src/evaluators/types.ts +37 -0
- package/src/index.ts +31 -0
- package/src/provenance/environment.ts +18 -0
- package/src/provenance/git.ts +48 -0
- package/src/provenance/index.ts +6 -0
- package/src/redaction/index.ts +23 -0
- package/src/redaction/redactor.test.ts +258 -0
- package/src/redaction/redactor.ts +246 -0
- package/src/redaction/types.ts +135 -0
- package/src/runner/executor.ts +251 -0
- package/src/runner/index.ts +7 -0
- package/src/runner/runner.ts +153 -0
- package/src/runner/types.ts +60 -0
- package/src/scenario/index.ts +7 -0
- package/src/scenario/parser.test.ts +99 -0
- package/src/scenario/parser.ts +108 -0
- package/src/scenario/schema.ts +176 -0
- package/src/scenario/variables.test.ts +150 -0
- package/src/scenario/variables.ts +60 -0
- package/src/storage/factory.ts +52 -0
- package/src/storage/index.ts +8 -0
- package/src/storage/local.test.ts +165 -0
- package/src/storage/local.ts +194 -0
- package/src/storage/supabase.ts +151 -0
- package/src/storage/types.ts +98 -0
- package/src/utils/errors.ts +76 -0
- package/src/utils/index.ts +6 -0
- package/src/utils/logger.ts +59 -0
- package/tsconfig.json +13 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core types and interfaces for model adapters
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Chat message format compatible with OpenAI/Anthropic
|
|
7
|
+
*/
|
|
8
|
+
export interface ChatMessage {
|
|
9
|
+
role: 'system' | 'user' | 'assistant' | 'function' | 'tool';
|
|
10
|
+
content: string;
|
|
11
|
+
name?: string;
|
|
12
|
+
function_call?: {
|
|
13
|
+
name: string;
|
|
14
|
+
arguments: string;
|
|
15
|
+
};
|
|
16
|
+
tool_calls?: ToolCall[];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface ToolCall {
|
|
20
|
+
id: string;
|
|
21
|
+
type: 'function';
|
|
22
|
+
function: {
|
|
23
|
+
name: string;
|
|
24
|
+
arguments: string;
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Options for generating a completion
|
|
30
|
+
*/
|
|
31
|
+
export interface GenerateOptions {
|
|
32
|
+
prompt: string | ChatMessage[];
|
|
33
|
+
model?: string;
|
|
34
|
+
maxTokens?: number;
|
|
35
|
+
temperature?: number;
|
|
36
|
+
topP?: number;
|
|
37
|
+
seed?: number;
|
|
38
|
+
stop?: string[];
|
|
39
|
+
functions?: FunctionDefinition[];
|
|
40
|
+
tools?: ToolDefinition[];
|
|
41
|
+
responseFormat?: { type: 'text' | 'json_object' };
|
|
42
|
+
metadata?: Record<string, unknown>;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Function/Tool definitions for function calling
|
|
47
|
+
*/
|
|
48
|
+
export interface FunctionDefinition {
|
|
49
|
+
name: string;
|
|
50
|
+
description?: string;
|
|
51
|
+
parameters: Record<string, unknown>;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export interface ToolDefinition {
|
|
55
|
+
type: 'function';
|
|
56
|
+
function: FunctionDefinition;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Token usage information
|
|
61
|
+
*/
|
|
62
|
+
export interface TokenUsage {
|
|
63
|
+
prompt: number;
|
|
64
|
+
completion: number;
|
|
65
|
+
total: number;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Result from a generation request
|
|
70
|
+
*/
|
|
71
|
+
export interface GenerateResult {
|
|
72
|
+
id: string;
|
|
73
|
+
model: string;
|
|
74
|
+
text: string;
|
|
75
|
+
tokens: TokenUsage;
|
|
76
|
+
latencyMs: number;
|
|
77
|
+
finishReason?: 'stop' | 'length' | 'function_call' | 'tool_calls' | 'content_filter';
|
|
78
|
+
functionCall?: {
|
|
79
|
+
name: string;
|
|
80
|
+
arguments: string;
|
|
81
|
+
};
|
|
82
|
+
toolCalls?: ToolCall[];
|
|
83
|
+
raw?: unknown;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Model capabilities
|
|
88
|
+
*/
|
|
89
|
+
export interface ModelCapabilities {
|
|
90
|
+
streaming: boolean;
|
|
91
|
+
functionCalling: boolean;
|
|
92
|
+
toolUse: boolean;
|
|
93
|
+
maxContext: number;
|
|
94
|
+
vision?: boolean;
|
|
95
|
+
jsonMode?: boolean;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* ModelClient interface - All adapters must implement this
|
|
100
|
+
*/
|
|
101
|
+
export interface ModelClient {
|
|
102
|
+
readonly provider: string;
|
|
103
|
+
|
|
104
|
+
generate(options: GenerateOptions): Promise<GenerateResult>;
|
|
105
|
+
|
|
106
|
+
stream?(options: GenerateOptions, onChunk: (chunk: string) => void): AsyncIterable<string>;
|
|
107
|
+
|
|
108
|
+
embed?(text: string): Promise<number[]>;
|
|
109
|
+
|
|
110
|
+
capabilities(): Promise<ModelCapabilities>;
|
|
111
|
+
|
|
112
|
+
close?(): Promise<void>;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Provider types - all supported providers
|
|
117
|
+
*/
|
|
118
|
+
export type ProviderType =
|
|
119
|
+
| 'openai'
|
|
120
|
+
| 'azure-openai'
|
|
121
|
+
| 'vercel-ai'
|
|
122
|
+
| 'anthropic'
|
|
123
|
+
| 'google'
|
|
124
|
+
| 'mistral'
|
|
125
|
+
| 'cohere'
|
|
126
|
+
| 'huggingface'
|
|
127
|
+
| 'ollama'
|
|
128
|
+
| 'custom';
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Base adapter configuration
|
|
132
|
+
*/
|
|
133
|
+
export interface BaseAdapterConfig {
|
|
134
|
+
provider: ProviderType;
|
|
135
|
+
apiKey?: string;
|
|
136
|
+
baseUrl?: string;
|
|
137
|
+
defaultModel?: string;
|
|
138
|
+
timeout?: number;
|
|
139
|
+
maxRetries?: number;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* OpenAI-specific configuration
|
|
144
|
+
*/
|
|
145
|
+
export interface OpenAIAdapterConfig extends BaseAdapterConfig {
|
|
146
|
+
provider: 'openai';
|
|
147
|
+
organization?: string;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Azure OpenAI-specific configuration
|
|
152
|
+
*/
|
|
153
|
+
export interface AzureOpenAIAdapterConfig extends BaseAdapterConfig {
|
|
154
|
+
provider: 'azure-openai';
|
|
155
|
+
resourceName: string;
|
|
156
|
+
deploymentName: string;
|
|
157
|
+
apiVersion: string;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Vercel AI SDK configuration
|
|
162
|
+
*/
|
|
163
|
+
export interface VercelAIAdapterConfig extends BaseAdapterConfig {
|
|
164
|
+
provider: 'vercel-ai';
|
|
165
|
+
underlyingProvider: 'openai' | 'azure' | 'anthropic' | 'google' | 'mistral';
|
|
166
|
+
providerConfig?: Record<string, unknown>;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Anthropic-specific configuration (Post-MVP)
|
|
171
|
+
*/
|
|
172
|
+
export interface AnthropicAdapterConfig extends BaseAdapterConfig {
|
|
173
|
+
provider: 'anthropic';
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Union type for all adapter configs
|
|
178
|
+
*/
|
|
179
|
+
export type AdapterConfig =
|
|
180
|
+
| OpenAIAdapterConfig
|
|
181
|
+
| AzureOpenAIAdapterConfig
|
|
182
|
+
| VercelAIAdapterConfig
|
|
183
|
+
| AnthropicAdapterConfig
|
|
184
|
+
| BaseAdapterConfig;
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for manifest generation
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, expect, test } from 'bun:test';
|
|
6
|
+
import { createRunManifest } from './manifest';
|
|
7
|
+
import type { CaseResult } from './types';
|
|
8
|
+
|
|
9
|
+
describe('createRunManifest', () => {
|
|
10
|
+
const mockCases: CaseResult[] = [
|
|
11
|
+
{
|
|
12
|
+
id: 'case-1',
|
|
13
|
+
name: 'Test Case 1',
|
|
14
|
+
ok: true,
|
|
15
|
+
latencyMs: 100,
|
|
16
|
+
prompt: 'Hello',
|
|
17
|
+
response: 'Hi there!',
|
|
18
|
+
tokens: { prompt: 10, completion: 5 },
|
|
19
|
+
evaluations: [
|
|
20
|
+
{
|
|
21
|
+
type: 'contains',
|
|
22
|
+
passed: true,
|
|
23
|
+
score: 1,
|
|
24
|
+
reason: 'Contains expected value',
|
|
25
|
+
},
|
|
26
|
+
],
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
id: 'case-2',
|
|
30
|
+
name: 'Test Case 2',
|
|
31
|
+
ok: false,
|
|
32
|
+
latencyMs: 200,
|
|
33
|
+
prompt: 'Goodbye',
|
|
34
|
+
response: 'See you!',
|
|
35
|
+
tokens: { prompt: 8, completion: 4 },
|
|
36
|
+
evaluations: [
|
|
37
|
+
{
|
|
38
|
+
type: 'contains',
|
|
39
|
+
passed: false,
|
|
40
|
+
score: 0,
|
|
41
|
+
reason: 'Missing expected value',
|
|
42
|
+
},
|
|
43
|
+
],
|
|
44
|
+
},
|
|
45
|
+
];
|
|
46
|
+
|
|
47
|
+
test('creates manifest with correct structure', () => {
|
|
48
|
+
const startTime = new Date('2024-01-01T00:00:00Z');
|
|
49
|
+
const endTime = new Date('2024-01-01T00:01:00Z');
|
|
50
|
+
|
|
51
|
+
const manifest = createRunManifest({
|
|
52
|
+
project: 'test-project',
|
|
53
|
+
config: {
|
|
54
|
+
scenario: 'test-scenario',
|
|
55
|
+
provider: 'openai',
|
|
56
|
+
model: 'gpt-4',
|
|
57
|
+
},
|
|
58
|
+
cases: mockCases,
|
|
59
|
+
startTime,
|
|
60
|
+
endTime,
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
expect(manifest.version).toBe('1.0');
|
|
64
|
+
expect(manifest.project).toBe('test-project');
|
|
65
|
+
expect(manifest.run_id).toBeTruthy();
|
|
66
|
+
expect(manifest.run_id.length).toBe(12);
|
|
67
|
+
expect(manifest.config.scenario).toBe('test-scenario');
|
|
68
|
+
expect(manifest.config.provider).toBe('openai');
|
|
69
|
+
expect(manifest.config.model).toBe('gpt-4');
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
test('calculates metrics correctly', () => {
|
|
73
|
+
const manifest = createRunManifest({
|
|
74
|
+
project: 'test-project',
|
|
75
|
+
config: {
|
|
76
|
+
scenario: 'test-scenario',
|
|
77
|
+
provider: 'openai',
|
|
78
|
+
model: 'gpt-4',
|
|
79
|
+
},
|
|
80
|
+
cases: mockCases,
|
|
81
|
+
startTime: new Date(),
|
|
82
|
+
endTime: new Date(),
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
expect(manifest.metrics.total_cases).toBe(2);
|
|
86
|
+
expect(manifest.metrics.passed_cases).toBe(1);
|
|
87
|
+
expect(manifest.metrics.failed_cases).toBe(1);
|
|
88
|
+
expect(manifest.metrics.success_rate).toBe(0.5);
|
|
89
|
+
expect(manifest.metrics.total_tokens).toBe(27); // (10+5) + (8+4)
|
|
90
|
+
expect(manifest.metrics.total_prompt_tokens).toBe(18); // 10 + 8
|
|
91
|
+
expect(manifest.metrics.total_completion_tokens).toBe(9); // 5 + 4
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
test('calculates duration correctly', () => {
|
|
95
|
+
const startTime = new Date('2024-01-01T00:00:00Z');
|
|
96
|
+
const endTime = new Date('2024-01-01T00:01:00Z');
|
|
97
|
+
|
|
98
|
+
const manifest = createRunManifest({
|
|
99
|
+
project: 'test-project',
|
|
100
|
+
config: {
|
|
101
|
+
scenario: 'test-scenario',
|
|
102
|
+
provider: 'openai',
|
|
103
|
+
model: 'gpt-4',
|
|
104
|
+
},
|
|
105
|
+
cases: mockCases,
|
|
106
|
+
startTime,
|
|
107
|
+
endTime,
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
expect(manifest.duration_ms).toBe(60000); // 1 minute
|
|
111
|
+
expect(manifest.start_time).toBe('2024-01-01T00:00:00.000Z');
|
|
112
|
+
expect(manifest.end_time).toBe('2024-01-01T00:01:00.000Z');
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
test('handles empty cases array', () => {
|
|
116
|
+
const manifest = createRunManifest({
|
|
117
|
+
project: 'test-project',
|
|
118
|
+
config: {
|
|
119
|
+
scenario: 'test-scenario',
|
|
120
|
+
provider: 'openai',
|
|
121
|
+
model: 'gpt-4',
|
|
122
|
+
},
|
|
123
|
+
cases: [],
|
|
124
|
+
startTime: new Date(),
|
|
125
|
+
endTime: new Date(),
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
expect(manifest.metrics.total_cases).toBe(0);
|
|
129
|
+
expect(manifest.metrics.passed_cases).toBe(0);
|
|
130
|
+
expect(manifest.metrics.failed_cases).toBe(0);
|
|
131
|
+
expect(manifest.metrics.success_rate).toBe(0);
|
|
132
|
+
expect(manifest.metrics.median_latency_ms).toBe(0);
|
|
133
|
+
expect(manifest.metrics.p95_latency_ms).toBe(0);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
test('includes resolved_config when provided', () => {
|
|
137
|
+
const manifest = createRunManifest({
|
|
138
|
+
project: 'test-project',
|
|
139
|
+
config: {
|
|
140
|
+
scenario: 'test-scenario',
|
|
141
|
+
provider: 'openai',
|
|
142
|
+
model: 'gpt-4',
|
|
143
|
+
},
|
|
144
|
+
resolvedConfig: {
|
|
145
|
+
provider: 'openai',
|
|
146
|
+
model: 'gpt-4',
|
|
147
|
+
source: {
|
|
148
|
+
provider: 'cli',
|
|
149
|
+
model: 'config',
|
|
150
|
+
},
|
|
151
|
+
},
|
|
152
|
+
cases: mockCases,
|
|
153
|
+
startTime: new Date(),
|
|
154
|
+
endTime: new Date(),
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
expect(manifest.resolved_config).toBeDefined();
|
|
158
|
+
expect(manifest.resolved_config?.provider).toBe('openai');
|
|
159
|
+
expect(manifest.resolved_config?.source.provider).toBe('cli');
|
|
160
|
+
expect(manifest.resolved_config?.source.model).toBe('config');
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
test('includes provenance information', () => {
|
|
164
|
+
const manifest = createRunManifest({
|
|
165
|
+
project: 'test-project',
|
|
166
|
+
config: {
|
|
167
|
+
scenario: 'test-scenario',
|
|
168
|
+
provider: 'openai',
|
|
169
|
+
model: 'gpt-4',
|
|
170
|
+
},
|
|
171
|
+
cases: mockCases,
|
|
172
|
+
startTime: new Date(),
|
|
173
|
+
endTime: new Date(),
|
|
174
|
+
runBy: 'test-user',
|
|
175
|
+
runReason: 'unit-test',
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
expect(manifest.provenance.run_by).toBe('test-user');
|
|
179
|
+
expect(manifest.provenance.run_reason).toBe('unit-test');
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
test('calculates latency percentiles correctly', () => {
|
|
183
|
+
const casesWithLatencies: CaseResult[] = [
|
|
184
|
+
{ ...mockCases[0], latencyMs: 100 },
|
|
185
|
+
{ ...mockCases[0], id: 'case-2', latencyMs: 200 },
|
|
186
|
+
{ ...mockCases[0], id: 'case-3', latencyMs: 300 },
|
|
187
|
+
{ ...mockCases[0], id: 'case-4', latencyMs: 400 },
|
|
188
|
+
{ ...mockCases[0], id: 'case-5', latencyMs: 500 },
|
|
189
|
+
];
|
|
190
|
+
|
|
191
|
+
const manifest = createRunManifest({
|
|
192
|
+
project: 'test-project',
|
|
193
|
+
config: {
|
|
194
|
+
scenario: 'test-scenario',
|
|
195
|
+
provider: 'openai',
|
|
196
|
+
model: 'gpt-4',
|
|
197
|
+
},
|
|
198
|
+
cases: casesWithLatencies,
|
|
199
|
+
startTime: new Date(),
|
|
200
|
+
endTime: new Date(),
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
expect(manifest.metrics.median_latency_ms).toBe(300);
|
|
204
|
+
expect(manifest.metrics.p95_latency_ms).toBe(500);
|
|
205
|
+
});
|
|
206
|
+
});
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Run manifest generation utilities
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { nanoid } from 'nanoid';
|
|
6
|
+
import { getEnvironmentInfo } from '../provenance/environment';
|
|
7
|
+
import { getGitInfo } from '../provenance/git';
|
|
8
|
+
import type {
|
|
9
|
+
CaseResult,
|
|
10
|
+
ManifestRedactionInfo,
|
|
11
|
+
ResolvedConfig,
|
|
12
|
+
RunConfig,
|
|
13
|
+
RunManifest,
|
|
14
|
+
RunMetrics,
|
|
15
|
+
} from './types';
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Create a new run manifest
|
|
19
|
+
*/
|
|
20
|
+
export function createRunManifest(options: {
|
|
21
|
+
project: string;
|
|
22
|
+
config: RunConfig;
|
|
23
|
+
resolvedConfig?: ResolvedConfig;
|
|
24
|
+
cases: CaseResult[];
|
|
25
|
+
startTime: Date;
|
|
26
|
+
endTime: Date;
|
|
27
|
+
runBy?: string;
|
|
28
|
+
runReason?: string;
|
|
29
|
+
redaction?: ManifestRedactionInfo;
|
|
30
|
+
}): RunManifest {
|
|
31
|
+
const {
|
|
32
|
+
project,
|
|
33
|
+
config,
|
|
34
|
+
resolvedConfig,
|
|
35
|
+
cases,
|
|
36
|
+
startTime,
|
|
37
|
+
endTime,
|
|
38
|
+
runBy,
|
|
39
|
+
runReason,
|
|
40
|
+
redaction,
|
|
41
|
+
} = options;
|
|
42
|
+
|
|
43
|
+
const metrics = calculateMetrics(cases);
|
|
44
|
+
const git = getGitInfo();
|
|
45
|
+
const environment = getEnvironmentInfo();
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
version: '1.0',
|
|
49
|
+
run_id: nanoid(12),
|
|
50
|
+
project,
|
|
51
|
+
start_time: startTime.toISOString(),
|
|
52
|
+
end_time: endTime.toISOString(),
|
|
53
|
+
duration_ms: endTime.getTime() - startTime.getTime(),
|
|
54
|
+
config,
|
|
55
|
+
resolved_config: resolvedConfig,
|
|
56
|
+
metrics,
|
|
57
|
+
git,
|
|
58
|
+
provenance: {
|
|
59
|
+
run_by: runBy || process.env.USER || 'unknown',
|
|
60
|
+
run_reason: runReason,
|
|
61
|
+
ci: detectCIEnvironment(),
|
|
62
|
+
},
|
|
63
|
+
cases,
|
|
64
|
+
environment,
|
|
65
|
+
redaction,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Calculate metrics from case results
|
|
71
|
+
*/
|
|
72
|
+
function calculateMetrics(cases: CaseResult[]): RunMetrics {
|
|
73
|
+
const passedCases = cases.filter((c) => c.ok);
|
|
74
|
+
const latencies = cases.map((c) => c.latencyMs).sort((a, b) => a - b);
|
|
75
|
+
|
|
76
|
+
const medianLatency = latencies.length > 0 ? latencies[Math.floor(latencies.length / 2)] : 0;
|
|
77
|
+
|
|
78
|
+
const p95Index = Math.floor(latencies.length * 0.95);
|
|
79
|
+
const p95Latency = latencies.length > 0 ? latencies[p95Index] : 0;
|
|
80
|
+
|
|
81
|
+
const totalPromptTokens = cases.reduce((sum, c) => sum + c.tokens.prompt, 0);
|
|
82
|
+
const totalCompletionTokens = cases.reduce((sum, c) => sum + c.tokens.completion, 0);
|
|
83
|
+
|
|
84
|
+
return {
|
|
85
|
+
success_rate: cases.length > 0 ? passedCases.length / cases.length : 0,
|
|
86
|
+
total_cases: cases.length,
|
|
87
|
+
passed_cases: passedCases.length,
|
|
88
|
+
failed_cases: cases.length - passedCases.length,
|
|
89
|
+
median_latency_ms: medianLatency,
|
|
90
|
+
p95_latency_ms: p95Latency,
|
|
91
|
+
total_tokens: totalPromptTokens + totalCompletionTokens,
|
|
92
|
+
total_prompt_tokens: totalPromptTokens,
|
|
93
|
+
total_completion_tokens: totalCompletionTokens,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Detect CI environment
|
|
99
|
+
*/
|
|
100
|
+
function detectCIEnvironment():
|
|
101
|
+
| { provider: string; build_id: string; build_url?: string }
|
|
102
|
+
| undefined {
|
|
103
|
+
if (process.env.GITHUB_ACTIONS) {
|
|
104
|
+
return {
|
|
105
|
+
provider: 'github-actions',
|
|
106
|
+
build_id: process.env.GITHUB_RUN_ID || '',
|
|
107
|
+
build_url: `${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (process.env.GITLAB_CI) {
|
|
112
|
+
return {
|
|
113
|
+
provider: 'gitlab-ci',
|
|
114
|
+
build_id: process.env.CI_JOB_ID || '',
|
|
115
|
+
build_url: process.env.CI_JOB_URL,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (process.env.CIRCLECI) {
|
|
120
|
+
return {
|
|
121
|
+
provider: 'circleci',
|
|
122
|
+
build_id: process.env.CIRCLE_BUILD_NUM || '',
|
|
123
|
+
build_url: process.env.CIRCLE_BUILD_URL,
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (process.env.JENKINS_URL) {
|
|
128
|
+
return {
|
|
129
|
+
provider: 'jenkins',
|
|
130
|
+
build_id: process.env.BUILD_ID || '',
|
|
131
|
+
build_url: process.env.BUILD_URL,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
return undefined;
|
|
136
|
+
}
|