@artemiskit/core 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +48 -0
- package/dist/adapters/factory.d.ts +23 -0
- package/dist/adapters/factory.d.ts.map +1 -0
- package/dist/adapters/index.d.ts +7 -0
- package/dist/adapters/index.d.ts.map +1 -0
- package/dist/adapters/registry.d.ts +56 -0
- package/dist/adapters/registry.d.ts.map +1 -0
- package/dist/adapters/types.d.ts +151 -0
- package/dist/adapters/types.d.ts.map +1 -0
- package/dist/artifacts/index.d.ts +6 -0
- package/dist/artifacts/index.d.ts.map +1 -0
- package/dist/artifacts/manifest.d.ts +19 -0
- package/dist/artifacts/manifest.d.ts.map +1 -0
- package/dist/artifacts/types.d.ts +368 -0
- package/dist/artifacts/types.d.ts.map +1 -0
- package/dist/evaluators/contains.d.ts +10 -0
- package/dist/evaluators/contains.d.ts.map +1 -0
- package/dist/evaluators/exact.d.ts +10 -0
- package/dist/evaluators/exact.d.ts.map +1 -0
- package/dist/evaluators/fuzzy.d.ts +10 -0
- package/dist/evaluators/fuzzy.d.ts.map +1 -0
- package/dist/evaluators/index.d.ts +24 -0
- package/dist/evaluators/index.d.ts.map +1 -0
- package/dist/evaluators/json-schema.d.ts +11 -0
- package/dist/evaluators/json-schema.d.ts.map +1 -0
- package/dist/evaluators/llm-grader.d.ts +11 -0
- package/dist/evaluators/llm-grader.d.ts.map +1 -0
- package/dist/evaluators/regex.d.ts +10 -0
- package/dist/evaluators/regex.d.ts.map +1 -0
- package/dist/evaluators/types.d.ts +29 -0
- package/dist/evaluators/types.d.ts.map +1 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +26021 -0
- package/dist/provenance/environment.d.ts +12 -0
- package/dist/provenance/environment.d.ts.map +1 -0
- package/dist/provenance/git.d.ts +9 -0
- package/dist/provenance/git.d.ts.map +1 -0
- package/dist/provenance/index.d.ts +6 -0
- package/dist/provenance/index.d.ts.map +1 -0
- package/dist/redaction/index.d.ts +3 -0
- package/dist/redaction/index.d.ts.map +1 -0
- package/dist/redaction/redactor.d.ts +79 -0
- package/dist/redaction/redactor.d.ts.map +1 -0
- package/dist/redaction/types.d.ts +120 -0
- package/dist/redaction/types.d.ts.map +1 -0
- package/dist/runner/executor.d.ts +11 -0
- package/dist/runner/executor.d.ts.map +1 -0
- package/dist/runner/index.d.ts +7 -0
- package/dist/runner/index.d.ts.map +1 -0
- package/dist/runner/runner.d.ts +13 -0
- package/dist/runner/runner.d.ts.map +1 -0
- package/dist/runner/types.d.ts +57 -0
- package/dist/runner/types.d.ts.map +1 -0
- package/dist/scenario/index.d.ts +7 -0
- package/dist/scenario/index.d.ts.map +1 -0
- package/dist/scenario/parser.d.ts +17 -0
- package/dist/scenario/parser.d.ts.map +1 -0
- package/dist/scenario/schema.d.ts +945 -0
- package/dist/scenario/schema.d.ts.map +1 -0
- package/dist/scenario/variables.d.ts +19 -0
- package/dist/scenario/variables.d.ts.map +1 -0
- package/dist/storage/factory.d.ts +13 -0
- package/dist/storage/factory.d.ts.map +1 -0
- package/dist/storage/index.d.ts +8 -0
- package/dist/storage/index.d.ts.map +1 -0
- package/dist/storage/local.d.ts +20 -0
- package/dist/storage/local.d.ts.map +1 -0
- package/dist/storage/supabase.d.ts +21 -0
- package/dist/storage/supabase.d.ts.map +1 -0
- package/dist/storage/types.d.ts +86 -0
- package/dist/storage/types.d.ts.map +1 -0
- package/dist/utils/errors.d.ts +25 -0
- package/dist/utils/errors.d.ts.map +1 -0
- package/dist/utils/index.d.ts +6 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/logger.d.ts +21 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/package.json +56 -0
- package/src/adapters/factory.ts +75 -0
- package/src/adapters/index.ts +7 -0
- package/src/adapters/registry.ts +143 -0
- package/src/adapters/types.ts +184 -0
- package/src/artifacts/index.ts +6 -0
- package/src/artifacts/manifest.test.ts +206 -0
- package/src/artifacts/manifest.ts +136 -0
- package/src/artifacts/types.ts +426 -0
- package/src/evaluators/contains.test.ts +58 -0
- package/src/evaluators/contains.ts +41 -0
- package/src/evaluators/exact.test.ts +48 -0
- package/src/evaluators/exact.ts +33 -0
- package/src/evaluators/fuzzy.test.ts +50 -0
- package/src/evaluators/fuzzy.ts +39 -0
- package/src/evaluators/index.ts +53 -0
- package/src/evaluators/json-schema.ts +98 -0
- package/src/evaluators/llm-grader.ts +100 -0
- package/src/evaluators/regex.test.ts +73 -0
- package/src/evaluators/regex.ts +43 -0
- package/src/evaluators/types.ts +37 -0
- package/src/index.ts +31 -0
- package/src/provenance/environment.ts +18 -0
- package/src/provenance/git.ts +48 -0
- package/src/provenance/index.ts +6 -0
- package/src/redaction/index.ts +23 -0
- package/src/redaction/redactor.test.ts +258 -0
- package/src/redaction/redactor.ts +246 -0
- package/src/redaction/types.ts +135 -0
- package/src/runner/executor.ts +251 -0
- package/src/runner/index.ts +7 -0
- package/src/runner/runner.ts +153 -0
- package/src/runner/types.ts +60 -0
- package/src/scenario/index.ts +7 -0
- package/src/scenario/parser.test.ts +99 -0
- package/src/scenario/parser.ts +108 -0
- package/src/scenario/schema.ts +176 -0
- package/src/scenario/variables.test.ts +150 -0
- package/src/scenario/variables.ts +60 -0
- package/src/storage/factory.ts +52 -0
- package/src/storage/index.ts +8 -0
- package/src/storage/local.test.ts +165 -0
- package/src/storage/local.ts +194 -0
- package/src/storage/supabase.ts +151 -0
- package/src/storage/types.ts +98 -0
- package/src/utils/errors.ts +76 -0
- package/src/utils/index.ts +6 -0
- package/src/utils/logger.ts +59 -0
- package/tsconfig.json +13 -0
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Artifact types - run manifests and related structures
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Redaction details for a single case result
|
|
6
|
+
*/
|
|
7
|
+
export interface CaseRedactionInfo {
|
|
8
|
+
/** Whether this case had redaction applied */
|
|
9
|
+
redacted: boolean;
|
|
10
|
+
/** Whether prompt was redacted */
|
|
11
|
+
promptRedacted: boolean;
|
|
12
|
+
/** Whether response was redacted */
|
|
13
|
+
responseRedacted: boolean;
|
|
14
|
+
/** Number of redactions in this case */
|
|
15
|
+
redactionCount: number;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Redaction metadata for a manifest
|
|
19
|
+
*/
|
|
20
|
+
export interface ManifestRedactionInfo {
|
|
21
|
+
/** Whether redaction was enabled */
|
|
22
|
+
enabled: boolean;
|
|
23
|
+
/** Pattern names used (not actual regex for security) */
|
|
24
|
+
patternsUsed: string[];
|
|
25
|
+
/** Replacement string used */
|
|
26
|
+
replacement: string;
|
|
27
|
+
/** Summary of redactions */
|
|
28
|
+
summary: {
|
|
29
|
+
promptsRedacted: number;
|
|
30
|
+
responsesRedacted: number;
|
|
31
|
+
totalRedactions: number;
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Individual test case result
|
|
36
|
+
*/
|
|
37
|
+
export interface CaseResult {
|
|
38
|
+
id: string;
|
|
39
|
+
name?: string;
|
|
40
|
+
ok: boolean;
|
|
41
|
+
score: number;
|
|
42
|
+
matcherType: string;
|
|
43
|
+
reason?: string;
|
|
44
|
+
latencyMs: number;
|
|
45
|
+
tokens: {
|
|
46
|
+
prompt: number;
|
|
47
|
+
completion: number;
|
|
48
|
+
total: number;
|
|
49
|
+
};
|
|
50
|
+
prompt: string | object;
|
|
51
|
+
response: string;
|
|
52
|
+
expected: object;
|
|
53
|
+
tags: string[];
|
|
54
|
+
error?: string;
|
|
55
|
+
/** Redaction information for this case */
|
|
56
|
+
redaction?: CaseRedactionInfo;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Run metrics
|
|
60
|
+
*/
|
|
61
|
+
export interface RunMetrics {
|
|
62
|
+
success_rate: number;
|
|
63
|
+
total_cases: number;
|
|
64
|
+
passed_cases: number;
|
|
65
|
+
failed_cases: number;
|
|
66
|
+
median_latency_ms: number;
|
|
67
|
+
p95_latency_ms: number;
|
|
68
|
+
total_tokens: number;
|
|
69
|
+
total_prompt_tokens: number;
|
|
70
|
+
total_completion_tokens: number;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Git provenance information
|
|
74
|
+
*/
|
|
75
|
+
export interface GitInfo {
|
|
76
|
+
commit: string;
|
|
77
|
+
branch: string;
|
|
78
|
+
dirty: boolean;
|
|
79
|
+
remote?: string;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Run provenance information
|
|
83
|
+
*/
|
|
84
|
+
export interface ProvenanceInfo {
|
|
85
|
+
run_by: string;
|
|
86
|
+
run_reason?: string;
|
|
87
|
+
ci?: {
|
|
88
|
+
provider: string;
|
|
89
|
+
build_id: string;
|
|
90
|
+
build_url?: string;
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Configuration source - where a value came from
|
|
95
|
+
*/
|
|
96
|
+
export type ConfigSource = 'cli' | 'scenario' | 'config' | 'env' | 'default';
|
|
97
|
+
/**
|
|
98
|
+
* Resolved configuration with source tracking
|
|
99
|
+
* Captures exactly what was sent to the provider for reproducibility
|
|
100
|
+
*/
|
|
101
|
+
export interface ResolvedConfig {
|
|
102
|
+
/** Provider used */
|
|
103
|
+
provider: string;
|
|
104
|
+
/** Model identifier passed to the API */
|
|
105
|
+
model?: string;
|
|
106
|
+
/** OpenAI organization ID */
|
|
107
|
+
organization?: string;
|
|
108
|
+
/** Base URL for API (custom endpoints) */
|
|
109
|
+
base_url?: string;
|
|
110
|
+
/** Azure resource name */
|
|
111
|
+
resource_name?: string;
|
|
112
|
+
/** Azure deployment name */
|
|
113
|
+
deployment_name?: string;
|
|
114
|
+
/** Azure API version */
|
|
115
|
+
api_version?: string;
|
|
116
|
+
/** Underlying provider for Vercel AI SDK */
|
|
117
|
+
underlying_provider?: string;
|
|
118
|
+
/** Request timeout in ms */
|
|
119
|
+
timeout?: number;
|
|
120
|
+
/** Max retries */
|
|
121
|
+
max_retries?: number;
|
|
122
|
+
/** Temperature setting */
|
|
123
|
+
temperature?: number;
|
|
124
|
+
/** Max tokens */
|
|
125
|
+
max_tokens?: number;
|
|
126
|
+
/** Source tracking - where each value came from */
|
|
127
|
+
source: {
|
|
128
|
+
provider?: ConfigSource;
|
|
129
|
+
model?: ConfigSource;
|
|
130
|
+
organization?: ConfigSource;
|
|
131
|
+
base_url?: ConfigSource;
|
|
132
|
+
resource_name?: ConfigSource;
|
|
133
|
+
deployment_name?: ConfigSource;
|
|
134
|
+
api_version?: ConfigSource;
|
|
135
|
+
underlying_provider?: ConfigSource;
|
|
136
|
+
timeout?: ConfigSource;
|
|
137
|
+
max_retries?: ConfigSource;
|
|
138
|
+
temperature?: ConfigSource;
|
|
139
|
+
max_tokens?: ConfigSource;
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Run configuration (user-facing display)
|
|
144
|
+
*/
|
|
145
|
+
export interface RunConfig {
|
|
146
|
+
scenario: string;
|
|
147
|
+
provider: string;
|
|
148
|
+
model?: string;
|
|
149
|
+
temperature?: number;
|
|
150
|
+
maxTokens?: number;
|
|
151
|
+
seed?: number;
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Complete run manifest
|
|
155
|
+
*/
|
|
156
|
+
export interface RunManifest {
|
|
157
|
+
version: string;
|
|
158
|
+
run_id: string;
|
|
159
|
+
project: string;
|
|
160
|
+
start_time: string;
|
|
161
|
+
end_time: string;
|
|
162
|
+
duration_ms: number;
|
|
163
|
+
config: RunConfig;
|
|
164
|
+
/** Resolved configuration with full provider details and source tracking */
|
|
165
|
+
resolved_config?: ResolvedConfig;
|
|
166
|
+
metrics: RunMetrics;
|
|
167
|
+
git: GitInfo;
|
|
168
|
+
provenance: ProvenanceInfo;
|
|
169
|
+
cases: CaseResult[];
|
|
170
|
+
environment: {
|
|
171
|
+
node_version: string;
|
|
172
|
+
platform: string;
|
|
173
|
+
arch: string;
|
|
174
|
+
};
|
|
175
|
+
/** Redaction information for this run */
|
|
176
|
+
redaction?: ManifestRedactionInfo;
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Red team test result status
|
|
180
|
+
*/
|
|
181
|
+
export type RedTeamStatus = 'safe' | 'unsafe' | 'blocked' | 'error';
|
|
182
|
+
/**
|
|
183
|
+
* Red team severity levels
|
|
184
|
+
*/
|
|
185
|
+
export type RedTeamSeverity = 'none' | 'low' | 'medium' | 'high' | 'critical';
|
|
186
|
+
/**
|
|
187
|
+
* Individual red team test result
|
|
188
|
+
*/
|
|
189
|
+
export interface RedTeamCaseResult {
|
|
190
|
+
/** Original test case ID */
|
|
191
|
+
caseId: string;
|
|
192
|
+
/** Applied mutations */
|
|
193
|
+
mutation: string;
|
|
194
|
+
/** Mutated prompt sent to the model */
|
|
195
|
+
prompt: string;
|
|
196
|
+
/** Model response */
|
|
197
|
+
response: string;
|
|
198
|
+
/** Result status */
|
|
199
|
+
status: RedTeamStatus;
|
|
200
|
+
/** Severity if unsafe */
|
|
201
|
+
severity: RedTeamSeverity;
|
|
202
|
+
/** Reasons for the status */
|
|
203
|
+
reasons: string[];
|
|
204
|
+
/** Latency in milliseconds */
|
|
205
|
+
latencyMs?: number;
|
|
206
|
+
/** Redaction information for this case */
|
|
207
|
+
redaction?: CaseRedactionInfo;
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Red team metrics
|
|
211
|
+
*/
|
|
212
|
+
export interface RedTeamMetrics {
|
|
213
|
+
/** Total number of tests run */
|
|
214
|
+
total_tests: number;
|
|
215
|
+
/** Tests where model responded safely */
|
|
216
|
+
safe_responses: number;
|
|
217
|
+
/** Tests blocked by provider content filter */
|
|
218
|
+
blocked_responses: number;
|
|
219
|
+
/** Tests with potentially unsafe responses */
|
|
220
|
+
unsafe_responses: number;
|
|
221
|
+
/** Tests that errored */
|
|
222
|
+
error_responses: number;
|
|
223
|
+
/** Total defended (safe + blocked) */
|
|
224
|
+
defended: number;
|
|
225
|
+
/** Defense rate (defended / testable results) */
|
|
226
|
+
defense_rate: number;
|
|
227
|
+
/** Breakdown by severity */
|
|
228
|
+
by_severity: {
|
|
229
|
+
low: number;
|
|
230
|
+
medium: number;
|
|
231
|
+
high: number;
|
|
232
|
+
critical: number;
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Red team configuration
|
|
237
|
+
*/
|
|
238
|
+
export interface RedTeamConfig {
|
|
239
|
+
scenario: string;
|
|
240
|
+
provider: string;
|
|
241
|
+
model?: string;
|
|
242
|
+
mutations: string[];
|
|
243
|
+
count_per_case: number;
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Complete red team manifest
|
|
247
|
+
*/
|
|
248
|
+
export interface RedTeamManifest {
|
|
249
|
+
version: string;
|
|
250
|
+
type: 'redteam';
|
|
251
|
+
run_id: string;
|
|
252
|
+
project: string;
|
|
253
|
+
start_time: string;
|
|
254
|
+
end_time: string;
|
|
255
|
+
duration_ms: number;
|
|
256
|
+
config: RedTeamConfig;
|
|
257
|
+
/** Resolved configuration with full provider details and source tracking */
|
|
258
|
+
resolved_config?: ResolvedConfig;
|
|
259
|
+
metrics: RedTeamMetrics;
|
|
260
|
+
git: GitInfo;
|
|
261
|
+
provenance: ProvenanceInfo;
|
|
262
|
+
results: RedTeamCaseResult[];
|
|
263
|
+
environment: {
|
|
264
|
+
node_version: string;
|
|
265
|
+
platform: string;
|
|
266
|
+
arch: string;
|
|
267
|
+
};
|
|
268
|
+
/** Redaction information for this run */
|
|
269
|
+
redaction?: ManifestRedactionInfo;
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Individual stress test request result
|
|
273
|
+
*/
|
|
274
|
+
export interface StressRequestResult {
|
|
275
|
+
/** Whether the request succeeded */
|
|
276
|
+
success: boolean;
|
|
277
|
+
/** Latency in milliseconds */
|
|
278
|
+
latencyMs: number;
|
|
279
|
+
/** Error message if failed */
|
|
280
|
+
error?: string;
|
|
281
|
+
/** Timestamp of the request */
|
|
282
|
+
timestamp: number;
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Stress test metrics
|
|
286
|
+
*/
|
|
287
|
+
export interface StressMetrics {
|
|
288
|
+
/** Total requests made */
|
|
289
|
+
total_requests: number;
|
|
290
|
+
/** Successful requests */
|
|
291
|
+
successful_requests: number;
|
|
292
|
+
/** Failed requests */
|
|
293
|
+
failed_requests: number;
|
|
294
|
+
/** Success rate (0-1) */
|
|
295
|
+
success_rate: number;
|
|
296
|
+
/** Requests per second */
|
|
297
|
+
requests_per_second: number;
|
|
298
|
+
/** Minimum latency in ms */
|
|
299
|
+
min_latency_ms: number;
|
|
300
|
+
/** Maximum latency in ms */
|
|
301
|
+
max_latency_ms: number;
|
|
302
|
+
/** Average latency in ms */
|
|
303
|
+
avg_latency_ms: number;
|
|
304
|
+
/** 50th percentile latency */
|
|
305
|
+
p50_latency_ms: number;
|
|
306
|
+
/** 90th percentile latency */
|
|
307
|
+
p90_latency_ms: number;
|
|
308
|
+
/** 95th percentile latency */
|
|
309
|
+
p95_latency_ms: number;
|
|
310
|
+
/** 99th percentile latency */
|
|
311
|
+
p99_latency_ms: number;
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Stress test configuration
|
|
315
|
+
*/
|
|
316
|
+
export interface StressConfig {
|
|
317
|
+
scenario: string;
|
|
318
|
+
provider: string;
|
|
319
|
+
model?: string;
|
|
320
|
+
concurrency: number;
|
|
321
|
+
duration_seconds: number;
|
|
322
|
+
ramp_up_seconds: number;
|
|
323
|
+
max_requests?: number;
|
|
324
|
+
}
|
|
325
|
+
/**
|
|
326
|
+
* Complete stress test manifest
|
|
327
|
+
*/
|
|
328
|
+
export interface StressManifest {
|
|
329
|
+
version: string;
|
|
330
|
+
type: 'stress';
|
|
331
|
+
run_id: string;
|
|
332
|
+
project: string;
|
|
333
|
+
start_time: string;
|
|
334
|
+
end_time: string;
|
|
335
|
+
duration_ms: number;
|
|
336
|
+
config: StressConfig;
|
|
337
|
+
/** Resolved configuration with full provider details and source tracking */
|
|
338
|
+
resolved_config?: ResolvedConfig;
|
|
339
|
+
metrics: StressMetrics;
|
|
340
|
+
git: GitInfo;
|
|
341
|
+
provenance: ProvenanceInfo;
|
|
342
|
+
/** Sample of request results (not all, to keep size manageable) */
|
|
343
|
+
sample_results: StressRequestResult[];
|
|
344
|
+
environment: {
|
|
345
|
+
node_version: string;
|
|
346
|
+
platform: string;
|
|
347
|
+
arch: string;
|
|
348
|
+
};
|
|
349
|
+
/** Redaction information for this run */
|
|
350
|
+
redaction?: ManifestRedactionInfo;
|
|
351
|
+
}
|
|
352
|
+
/**
|
|
353
|
+
* Any manifest type
|
|
354
|
+
*/
|
|
355
|
+
export type AnyManifest = RunManifest | RedTeamManifest | StressManifest;
|
|
356
|
+
/**
|
|
357
|
+
* Type guard for RunManifest
|
|
358
|
+
*/
|
|
359
|
+
export declare function isRunManifest(manifest: AnyManifest): manifest is RunManifest;
|
|
360
|
+
/**
|
|
361
|
+
* Type guard for RedTeamManifest
|
|
362
|
+
*/
|
|
363
|
+
export declare function isRedTeamManifest(manifest: AnyManifest): manifest is RedTeamManifest;
|
|
364
|
+
/**
|
|
365
|
+
* Type guard for StressManifest
|
|
366
|
+
*/
|
|
367
|
+
export declare function isStressManifest(manifest: AnyManifest): manifest is StressManifest;
|
|
368
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/artifacts/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,8CAA8C;IAC9C,QAAQ,EAAE,OAAO,CAAC;IAClB,kCAAkC;IAClC,cAAc,EAAE,OAAO,CAAC;IACxB,oCAAoC;IACpC,gBAAgB,EAAE,OAAO,CAAC;IAC1B,wCAAwC;IACxC,cAAc,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,oCAAoC;IACpC,OAAO,EAAE,OAAO,CAAC;IACjB,yDAAyD;IACzD,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,8BAA8B;IAC9B,WAAW,EAAE,MAAM,CAAC;IACpB,4BAA4B;IAC5B,OAAO,EAAE;QACP,eAAe,EAAE,MAAM,CAAC;QACxB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,eAAe,EAAE,MAAM,CAAC;KACzB,CAAC;CACH;AAMD;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,EAAE,EAAE,OAAO,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE;QACN,MAAM,EAAE,MAAM,CAAC;QACf,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;IACF,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,0CAA0C;IAC1C,SAAS,CAAC,EAAE,iBAAiB,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,uBAAuB,EAAE,MAAM,CAAC;CACjC;AAED;;GAEG;AACH,MAAM,WAAW,OAAO;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,OAAO,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,EAAE,CAAC,EAAE;QACH,QAAQ,EAAE,MAAM,CAAC;QACjB,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC;CACH;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,KAAK,GAAG,UAAU,GAAG,QAAQ,GAAG,KAAK,GAAG,SAAS,CAAC;AAE7E;;;GAGG;AACH,MAAM,WAAW,cAAc;IAC7B,oBAAoB;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,yCAAyC;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IAGf,6BAA6B;IAC7B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAGlB,0BAA0B;IAC1B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,4BAA4B;IAC5B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,wBAAwB;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IAGrB,4CAA4C;IAC5C,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAG7B,4BAA4B;IAC5B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kBAAkB;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,iBAAiB;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,mDAAmD;IACnD,MAAM,EAAE;QACN,QAAQ,CAAC,EAAE,YAAY,CAAC;QACxB,KAAK,CAAC,EAAE,YAAY,CAAC;QACrB,YAAY,CAAC,EAAE,YAAY,CAAC;QAC5B,QAAQ,CAAC,EAAE,YAAY,CAAC;QACxB,aAAa,CAAC,EAAE,YAAY,CAAC;QAC7B,eAAe,CAAC,EAAE,YAAY,CAAC;QAC/B,WAAW,CAAC,EAAE,YAAY,CAAC;QAC3B,mBAAmB,CAAC,EAAE,YAAY,CAAC;QACnC,OAAO,CAAC,EAAE,YAAY,CAAC;QACvB,WAAW,CAAC,EAAE,YAAY,CAAC;QAC3B,WAAW,CAAC,EAAE,YAAY,CAAC;QAC3B,UAAU,CAAC,EAAE,YAAY,CAAC;KAC3B,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,SAAS,CAAC;IAClB,4EAA4E;IAC5E,eAAe,CAAC,EAAE,cAAc,CAAC;IACjC,OAAO,EAAE,UAAU,CAAC;IACpB,GAAG,EAAE,OAAO,CAAC;IACb,UAAU,EAAE,cAAc,CAAC;IAC3B,KAAK,EAAE,UAAU,EAAE,CAAC;IACpB,WAAW,EAAE;QACX,YAAY,EAAE,MAAM,CAAC;QACrB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC;KACd,CAAC;IACF,yCAAyC;IACzC,SAAS,CAAC,EAAE,qBAAqB,CAAC;CACnC;AAMD;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,MAAM,GAAG,QAAQ,GAAG,SAAS,GAAG,OAAO,CAAC;AAEpE;;GAEG;AACH,MAAM,MAAM,eAAe,GAAG,MAAM,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;AAE9E;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,4BAA4B;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,wBAAwB;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,uCAAuC;IACvC,MAAM,EAAE,MAAM,CAAC;IACf,qBAAqB;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,oBAAoB;IACpB,MAAM,EAAE,aAAa,CAAC;IACtB,yBAAyB;IACzB,QAAQ,EAAE,eAAe,CAAC;IAC1B,6BAA6B;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,8BAA8B;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,SAAS,CAAC,EAAE,iBAAiB,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,gCAAgC;IAChC,WAAW,EAAE,MAAM,CAAC;IACpB,yCAAyC;IACzC,cAAc,EAAE,MAAM,CAAC;IACvB,+CAA+C;IAC/C,iBAAiB,EAAE,MAAM,CAAC;IAC1B,8CAA8C;IAC9C,gBAAgB,EAAE,MAAM,CAAC;IACzB,yBAAyB;IACzB,eAAe,EAAE,MAAM,CAAC;IACxB,sCAAsC;IACtC,QAAQ,EAAE,MAAM,CAAC;IACjB,iDAAiD;IACjD,YAAY,EAAE,MAAM,CAAC;IACrB,4BAA4B;IAC5B,WAAW,EAAE;QACX,GAAG,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,MAAM,CAAC;QACf,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,cAAc,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,SAAS,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,aAAa,CAAC;IACtB,4EAA4E;IAC5E,eAAe,CAAC,EAAE,cAAc,CAAC;IACjC,OAAO,EAAE,cAAc,CAAC;IACxB,GAAG,EAAE,OAAO,CAAC;IACb,UAAU,EAAE,cAAc,CAAC;IAC3B,OAAO,EAAE,iBAAiB,EAAE,CAAC;IAC7B,WAAW,EAAE;QACX,YAAY,EAAE,MAAM,CAAC;QACrB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC;KACd,CAAC;IACF,yCAAyC;IACzC,SAAS,CAAC,EAAE,qBAAqB,CAAC;CACnC;AAMD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,oCAAoC;IACpC,OAAO,EAAE,OAAO,CAAC;IACjB,8BAA8B;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,8BAA8B;IAC9B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,+BAA+B;IAC/B,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,0BAA0B;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,0BAA0B;IAC1B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,sBAAsB;IACtB,eAAe,EAAE,MAAM,CAAC;IACxB,yBAAyB;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,4BAA4B;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,4BAA4B;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,4BAA4B;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,8BAA8B;IAC9B,cAAc,EAAE,MAAM,CAAC;IACvB,8BAA8B;IAC9B,cAAc,EAAE,MAAM,CAAC;IACvB,8BAA8B;IAC9B,cAAc,EAAE,MAAM,CAAC;IACvB,8BAA8B;IAC9B,cAAc,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,gBAAgB,EAAE,MAAM,CAAC;IACzB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,QAAQ,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,YAAY,CAAC;IACrB,4EAA4E;IAC5E,eAAe,CAAC,EAAE,cAAc,CAAC;IACjC,OAAO,EAAE,aAAa,CAAC;IACvB,GAAG,EAAE,OAAO,CAAC;IACb,UAAU,EAAE,cAAc,CAAC;IAC3B,mEAAmE;IACnE,cAAc,EAAE,mBAAmB,EAAE,CAAC;IACtC,WAAW,EAAE;QACX,YAAY,EAAE,MAAM,CAAC;QACrB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC;KACd,CAAC;IACF,yCAAyC;IACzC,SAAS,CAAC,EAAE,qBAAqB,CAAC;CACnC;AAMD;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,WAAW,GAAG,eAAe,GAAG,cAAc,CAAC;AAEzE;;GAEG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,WAAW,GAAG,QAAQ,IAAI,WAAW,CAE5E;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,WAAW,GAAG,QAAQ,IAAI,eAAe,CAEpF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,WAAW,GAAG,QAAQ,IAAI,cAAc,CAElF"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Contains evaluator - checks if response contains specific values
|
|
3
|
+
*/
|
|
4
|
+
import type { Expected } from '../scenario/schema';
|
|
5
|
+
import type { Evaluator, EvaluatorResult } from './types';
|
|
6
|
+
export declare class ContainsEvaluator implements Evaluator {
|
|
7
|
+
readonly type = "contains";
|
|
8
|
+
evaluate(response: string, expected: Expected): Promise<EvaluatorResult>;
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=contains.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"contains.d.ts","sourceRoot":"","sources":["../../src/evaluators/contains.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,KAAK,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE1D,qBAAa,iBAAkB,YAAW,SAAS;IACjD,QAAQ,CAAC,IAAI,cAAc;IAErB,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,eAAe,CAAC;CA8B/E"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Exact match evaluator
|
|
3
|
+
*/
|
|
4
|
+
import type { Expected } from '../scenario/schema';
|
|
5
|
+
import type { Evaluator, EvaluatorResult } from './types';
|
|
6
|
+
export declare class ExactEvaluator implements Evaluator {
|
|
7
|
+
readonly type = "exact";
|
|
8
|
+
evaluate(response: string, expected: Expected): Promise<EvaluatorResult>;
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=exact.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"exact.d.ts","sourceRoot":"","sources":["../../src/evaluators/exact.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,KAAK,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE1D,qBAAa,cAAe,YAAW,SAAS;IAC9C,QAAQ,CAAC,IAAI,WAAW;IAElB,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,eAAe,CAAC;CAsB/E"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fuzzy match evaluator using Levenshtein distance
|
|
3
|
+
*/
|
|
4
|
+
import type { Expected } from '../scenario/schema';
|
|
5
|
+
import type { Evaluator, EvaluatorResult } from './types';
|
|
6
|
+
export declare class FuzzyEvaluator implements Evaluator {
|
|
7
|
+
readonly type = "fuzzy";
|
|
8
|
+
evaluate(response: string, expected: Expected): Promise<EvaluatorResult>;
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=fuzzy.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fuzzy.d.ts","sourceRoot":"","sources":["../../src/evaluators/fuzzy.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,KAAK,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE1D,qBAAa,cAAe,YAAW,SAAS;IAC9C,QAAQ,CAAC,IAAI,WAAW;IAElB,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,eAAe,CAAC;CA2B/E"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluators module - exports all evaluator types and utilities
|
|
3
|
+
*/
|
|
4
|
+
import type { Evaluator } from './types';
|
|
5
|
+
/**
|
|
6
|
+
* Get an evaluator by type
|
|
7
|
+
*/
|
|
8
|
+
export declare function getEvaluator(type: string): Evaluator;
|
|
9
|
+
/**
|
|
10
|
+
* Register a custom evaluator
|
|
11
|
+
*/
|
|
12
|
+
export declare function registerEvaluator(type: string, evaluator: Evaluator): void;
|
|
13
|
+
/**
|
|
14
|
+
* List available evaluator types
|
|
15
|
+
*/
|
|
16
|
+
export declare function listEvaluators(): string[];
|
|
17
|
+
export * from './types';
|
|
18
|
+
export { ExactEvaluator } from './exact';
|
|
19
|
+
export { RegexEvaluator } from './regex';
|
|
20
|
+
export { FuzzyEvaluator } from './fuzzy';
|
|
21
|
+
export { ContainsEvaluator } from './contains';
|
|
22
|
+
export { JsonSchemaEvaluator } from './json-schema';
|
|
23
|
+
export { LLMGraderEvaluator } from './llm-grader';
|
|
24
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/evaluators/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAQH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAUzC;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,CAOpD;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,GAAG,IAAI,CAE1E;AAED;;GAEG;AACH,wBAAgB,cAAc,IAAI,MAAM,EAAE,CAEzC;AAED,cAAc,SAAS,CAAC;AACxB,OAAO,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAC/C,OAAO,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAC;AACpD,OAAO,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON Schema evaluator - validates response against a JSON schema
|
|
3
|
+
*/
|
|
4
|
+
import type { Expected } from '../scenario/schema';
|
|
5
|
+
import type { Evaluator, EvaluatorResult } from './types';
|
|
6
|
+
export declare class JsonSchemaEvaluator implements Evaluator {
|
|
7
|
+
readonly type = "json_schema";
|
|
8
|
+
evaluate(response: string, expected: Expected): Promise<EvaluatorResult>;
|
|
9
|
+
private jsonSchemaToZod;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=json-schema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json-schema.d.ts","sourceRoot":"","sources":["../../src/evaluators/json-schema.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,KAAK,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE1D,qBAAa,mBAAoB,YAAW,SAAS;IACnD,QAAQ,CAAC,IAAI,iBAAiB;IAExB,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,eAAe,CAAC;IAmD9E,OAAO,CAAC,eAAe;CAmCxB"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-based grader evaluator
|
|
3
|
+
*/
|
|
4
|
+
import type { Expected } from '../scenario/schema';
|
|
5
|
+
import type { Evaluator, EvaluatorContext, EvaluatorResult } from './types';
|
|
6
|
+
export declare class LLMGraderEvaluator implements Evaluator {
|
|
7
|
+
readonly type = "llm_grader";
|
|
8
|
+
evaluate(response: string, expected: Expected, context?: EvaluatorContext): Promise<EvaluatorResult>;
|
|
9
|
+
private parseGraderResponse;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=llm-grader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm-grader.d.ts","sourceRoot":"","sources":["../../src/evaluators/llm-grader.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAmB5E,qBAAa,kBAAmB,YAAW,SAAS;IAClD,QAAQ,CAAC,IAAI,gBAAgB;IAEvB,QAAQ,CACZ,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,gBAAgB,GACzB,OAAO,CAAC,eAAe,CAAC;IA8C3B,OAAO,CAAC,mBAAmB;CAsB5B"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Regex pattern evaluator
|
|
3
|
+
*/
|
|
4
|
+
import type { Expected } from '../scenario/schema';
|
|
5
|
+
import type { Evaluator, EvaluatorResult } from './types';
|
|
6
|
+
export declare class RegexEvaluator implements Evaluator {
|
|
7
|
+
readonly type = "regex";
|
|
8
|
+
evaluate(response: string, expected: Expected): Promise<EvaluatorResult>;
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=regex.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"regex.d.ts","sourceRoot":"","sources":["../../src/evaluators/regex.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,KAAK,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE1D,qBAAa,cAAe,YAAW,SAAS;IAC9C,QAAQ,CAAC,IAAI,WAAW;IAElB,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,eAAe,CAAC;CAgC/E"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluator types and interfaces
|
|
3
|
+
*/
|
|
4
|
+
import type { ModelClient } from '../adapters/types';
|
|
5
|
+
import type { Expected, TestCase } from '../scenario/schema';
|
|
6
|
+
/**
|
|
7
|
+
* Context provided to evaluators
|
|
8
|
+
*/
|
|
9
|
+
export interface EvaluatorContext {
|
|
10
|
+
client?: ModelClient;
|
|
11
|
+
testCase?: TestCase;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Result from an evaluation
|
|
15
|
+
*/
|
|
16
|
+
export interface EvaluatorResult {
|
|
17
|
+
passed: boolean;
|
|
18
|
+
score: number;
|
|
19
|
+
reason?: string;
|
|
20
|
+
details?: Record<string, unknown>;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Evaluator interface - implement to create custom evaluators
|
|
24
|
+
*/
|
|
25
|
+
export interface Evaluator {
|
|
26
|
+
readonly type: string;
|
|
27
|
+
evaluate(response: string, expected: Expected, context?: EvaluatorContext): Promise<EvaluatorResult>;
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/evaluators/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAE7D;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,QAAQ,CAAC,EAAE,QAAQ,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,OAAO,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAEtB,QAAQ,CACN,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,gBAAgB,GACzB,OAAO,CAAC,eAAe,CAAC,CAAC;CAC7B"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @artemiskit/core
|
|
3
|
+
* Core library for Artemis Agent Reliability Toolkit
|
|
4
|
+
*/
|
|
5
|
+
export * from './adapters';
|
|
6
|
+
export * from './scenario';
|
|
7
|
+
export * from './evaluators';
|
|
8
|
+
export * from './runner';
|
|
9
|
+
export * from './storage';
|
|
10
|
+
export * from './artifacts';
|
|
11
|
+
export * from './provenance';
|
|
12
|
+
export * from './utils';
|
|
13
|
+
export * from './redaction';
|
|
14
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,cAAc,YAAY,CAAC;AAG3B,cAAc,YAAY,CAAC;AAG3B,cAAc,cAAc,CAAC;AAG7B,cAAc,UAAU,CAAC;AAGzB,cAAc,WAAW,CAAC;AAG1B,cAAc,aAAa,CAAC;AAG5B,cAAc,cAAc,CAAC;AAG7B,cAAc,SAAS,CAAC;AAGxB,cAAc,aAAa,CAAC"}
|