@artemiskit/core 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +71 -0
- package/dist/artifacts/manifest.d.ts.map +1 -1
- package/dist/artifacts/types.d.ts +20 -0
- package/dist/artifacts/types.d.ts.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +688 -408
- package/dist/storage/local.d.ts.map +1 -1
- package/dist/storage/types.d.ts +4 -0
- package/dist/storage/types.d.ts.map +1 -1
- package/dist/validator/index.d.ts +6 -0
- package/dist/validator/index.d.ts.map +1 -0
- package/dist/validator/types.d.ts +58 -0
- package/dist/validator/types.d.ts.map +1 -0
- package/dist/validator/validator.d.ts +55 -0
- package/dist/validator/validator.d.ts.map +1 -0
- package/package.json +1 -1
- package/src/artifacts/manifest.ts +24 -2
- package/src/artifacts/types.ts +21 -0
- package/src/evaluators/similarity.test.ts +4 -3
- package/src/index.ts +3 -0
- package/src/storage/local.ts +24 -2
- package/src/storage/types.ts +4 -0
- package/src/validator/index.ts +6 -0
- package/src/validator/types.ts +62 -0
- package/src/validator/validator.ts +345 -0
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scenario Validator
|
|
3
|
+
*
|
|
4
|
+
* Validates scenario files for:
|
|
5
|
+
* 1. YAML syntax errors
|
|
6
|
+
* 2. Schema violations (required fields, types)
|
|
7
|
+
* 3. Semantic errors (duplicate IDs, undefined variables)
|
|
8
|
+
* 4. Warnings (deprecated patterns)
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { readFileSync } from 'node:fs';
|
|
12
|
+
import yaml from 'yaml';
|
|
13
|
+
import type { ZodError } from 'zod';
|
|
14
|
+
import { ScenarioSchema } from '../scenario/schema';
|
|
15
|
+
import type { ValidationIssue, ValidationResult, ValidatorOptions } from './types';
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Scenario validator class
|
|
19
|
+
*/
|
|
20
|
+
export class ScenarioValidator {
|
|
21
|
+
private _options: ValidatorOptions;
|
|
22
|
+
|
|
23
|
+
constructor(options: ValidatorOptions = {}) {
|
|
24
|
+
this._options = options;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
get options(): ValidatorOptions {
|
|
28
|
+
return this._options;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Validate a scenario file
|
|
33
|
+
*/
|
|
34
|
+
validate(filePath: string): ValidationResult {
|
|
35
|
+
const errors: ValidationIssue[] = [];
|
|
36
|
+
const warnings: ValidationIssue[] = [];
|
|
37
|
+
|
|
38
|
+
// Read file content
|
|
39
|
+
let content: string;
|
|
40
|
+
try {
|
|
41
|
+
content = readFileSync(filePath, 'utf-8');
|
|
42
|
+
} catch (err) {
|
|
43
|
+
const error = err as NodeJS.ErrnoException;
|
|
44
|
+
errors.push({
|
|
45
|
+
line: 1,
|
|
46
|
+
message: `Failed to read file: ${error.message}`,
|
|
47
|
+
rule: 'file-read',
|
|
48
|
+
severity: 'error',
|
|
49
|
+
});
|
|
50
|
+
return { file: filePath, valid: false, errors, warnings };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Level 1: YAML Syntax validation
|
|
54
|
+
let parsed: unknown;
|
|
55
|
+
try {
|
|
56
|
+
parsed = yaml.parse(content, {
|
|
57
|
+
prettyErrors: true,
|
|
58
|
+
strict: true,
|
|
59
|
+
});
|
|
60
|
+
} catch (err) {
|
|
61
|
+
if (err instanceof yaml.YAMLError) {
|
|
62
|
+
const linePos = err.linePos?.[0];
|
|
63
|
+
errors.push({
|
|
64
|
+
line: linePos?.line || 1,
|
|
65
|
+
column: linePos?.col,
|
|
66
|
+
message: `Invalid YAML syntax: ${err.message}`,
|
|
67
|
+
rule: 'yaml-syntax',
|
|
68
|
+
severity: 'error',
|
|
69
|
+
});
|
|
70
|
+
} else {
|
|
71
|
+
errors.push({
|
|
72
|
+
line: 1,
|
|
73
|
+
message: `YAML parse error: ${(err as Error).message}`,
|
|
74
|
+
rule: 'yaml-syntax',
|
|
75
|
+
severity: 'error',
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
return { file: filePath, valid: false, errors, warnings };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Check if parsed result is null or not an object
|
|
82
|
+
if (parsed === null || typeof parsed !== 'object') {
|
|
83
|
+
errors.push({
|
|
84
|
+
line: 1,
|
|
85
|
+
message: 'Scenario must be a YAML object',
|
|
86
|
+
rule: 'schema-type',
|
|
87
|
+
severity: 'error',
|
|
88
|
+
});
|
|
89
|
+
return { file: filePath, valid: false, errors, warnings };
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Level 2: Schema validation using Zod
|
|
93
|
+
const schemaResult = ScenarioSchema.safeParse(parsed);
|
|
94
|
+
if (!schemaResult.success) {
|
|
95
|
+
const zodErrors = this.formatZodErrors(schemaResult.error, content);
|
|
96
|
+
errors.push(...zodErrors);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Level 3: Semantic validation (only if schema passed)
|
|
100
|
+
if (schemaResult.success) {
|
|
101
|
+
const semanticErrors = this.validateSemantics(schemaResult.data, content);
|
|
102
|
+
errors.push(...semanticErrors);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Level 4: Warnings detection
|
|
106
|
+
const detectedWarnings = this.detectWarnings(parsed, content);
|
|
107
|
+
warnings.push(...detectedWarnings);
|
|
108
|
+
|
|
109
|
+
return {
|
|
110
|
+
file: filePath,
|
|
111
|
+
valid: errors.length === 0,
|
|
112
|
+
errors,
|
|
113
|
+
warnings,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Format Zod errors into ValidationIssues
|
|
119
|
+
*/
|
|
120
|
+
private formatZodErrors(error: ZodError, content: string): ValidationIssue[] {
|
|
121
|
+
const issues: ValidationIssue[] = [];
|
|
122
|
+
const lines = content.split('\n');
|
|
123
|
+
|
|
124
|
+
for (const issue of error.issues) {
|
|
125
|
+
const path = issue.path.join('.');
|
|
126
|
+
const line = this.findLineForPath(lines, issue.path);
|
|
127
|
+
|
|
128
|
+
let message: string;
|
|
129
|
+
switch (issue.code) {
|
|
130
|
+
case 'invalid_type':
|
|
131
|
+
message = `'${path}' expected ${issue.expected}, received ${issue.received}`;
|
|
132
|
+
break;
|
|
133
|
+
case 'invalid_enum_value':
|
|
134
|
+
message = `'${path}' must be one of: ${(issue as { options: string[] }).options.join(', ')}`;
|
|
135
|
+
break;
|
|
136
|
+
case 'too_small':
|
|
137
|
+
if ((issue as { type: string }).type === 'array') {
|
|
138
|
+
message = `'${path}' must have at least ${(issue as { minimum: number }).minimum} item(s)`;
|
|
139
|
+
} else {
|
|
140
|
+
message = `'${path}' is too small`;
|
|
141
|
+
}
|
|
142
|
+
break;
|
|
143
|
+
case 'unrecognized_keys':
|
|
144
|
+
message = `Unrecognized field(s): ${(issue as { keys: string[] }).keys.join(', ')}`;
|
|
145
|
+
break;
|
|
146
|
+
default:
|
|
147
|
+
message = issue.message;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
issues.push({
|
|
151
|
+
line,
|
|
152
|
+
message,
|
|
153
|
+
rule: `schema-${issue.code}`,
|
|
154
|
+
severity: 'error',
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return issues;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Find approximate line number for a YAML path
|
|
163
|
+
*/
|
|
164
|
+
private findLineForPath(lines: string[], path: (string | number)[]): number {
|
|
165
|
+
if (path.length === 0) return 1;
|
|
166
|
+
|
|
167
|
+
// Simple heuristic: search for the key in the file
|
|
168
|
+
const searchKey = String(path[path.length - 1]);
|
|
169
|
+
|
|
170
|
+
for (let i = 0; i < lines.length; i++) {
|
|
171
|
+
const line = lines[i];
|
|
172
|
+
// Check if line contains the key (accounting for YAML formatting)
|
|
173
|
+
if (line.includes(`${searchKey}:`) || line.includes(`- ${searchKey}:`)) {
|
|
174
|
+
return i + 1; // 1-indexed
|
|
175
|
+
}
|
|
176
|
+
// For array indices, look for "- id:" pattern
|
|
177
|
+
if (typeof path[path.length - 1] === 'number' && path.includes('cases')) {
|
|
178
|
+
if (line.trim().startsWith('- id:')) {
|
|
179
|
+
return i + 1;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return 1; // Default to first line
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Validate semantic rules
|
|
189
|
+
*/
|
|
190
|
+
private validateSemantics(
|
|
191
|
+
scenario: {
|
|
192
|
+
cases: Array<{ id: string; prompt: string | unknown; variables?: Record<string, unknown> }>;
|
|
193
|
+
variables?: Record<string, unknown>;
|
|
194
|
+
},
|
|
195
|
+
content: string
|
|
196
|
+
): ValidationIssue[] {
|
|
197
|
+
const errors: ValidationIssue[] = [];
|
|
198
|
+
const lines = content.split('\n');
|
|
199
|
+
|
|
200
|
+
// Check for duplicate case IDs
|
|
201
|
+
const caseIds = new Set<string>();
|
|
202
|
+
for (const testCase of scenario.cases) {
|
|
203
|
+
if (caseIds.has(testCase.id)) {
|
|
204
|
+
const line = this.findLineForCaseId(lines, testCase.id);
|
|
205
|
+
errors.push({
|
|
206
|
+
line,
|
|
207
|
+
message: `Duplicate case ID: '${testCase.id}'`,
|
|
208
|
+
rule: 'duplicate-case-id',
|
|
209
|
+
severity: 'error',
|
|
210
|
+
});
|
|
211
|
+
}
|
|
212
|
+
caseIds.add(testCase.id);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Check variable references
|
|
216
|
+
const globalVars = scenario.variables || {};
|
|
217
|
+
for (const testCase of scenario.cases) {
|
|
218
|
+
const caseVars = testCase.variables || {};
|
|
219
|
+
const allVars = { ...globalVars, ...caseVars };
|
|
220
|
+
|
|
221
|
+
const prompt =
|
|
222
|
+
typeof testCase.prompt === 'string' ? testCase.prompt : JSON.stringify(testCase.prompt);
|
|
223
|
+
|
|
224
|
+
const refs = this.extractVariableRefs(prompt);
|
|
225
|
+
for (const ref of refs) {
|
|
226
|
+
if (!(ref in allVars)) {
|
|
227
|
+
const line = this.findLineForCaseId(lines, testCase.id);
|
|
228
|
+
errors.push({
|
|
229
|
+
line,
|
|
230
|
+
message: `Undefined variable '{{${ref}}}' in case '${testCase.id}'`,
|
|
231
|
+
rule: 'undefined-variable',
|
|
232
|
+
severity: 'error',
|
|
233
|
+
suggestion: `Define '${ref}' in scenario.variables or case.variables`,
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
return errors;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Find line number for a case ID
|
|
244
|
+
*/
|
|
245
|
+
private findLineForCaseId(lines: string[], caseId: string): number {
|
|
246
|
+
for (let i = 0; i < lines.length; i++) {
|
|
247
|
+
if (
|
|
248
|
+
lines[i].includes(`id: ${caseId}`) ||
|
|
249
|
+
lines[i].includes(`id: "${caseId}"`) ||
|
|
250
|
+
lines[i].includes(`id: '${caseId}'`)
|
|
251
|
+
) {
|
|
252
|
+
return i + 1;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
return 1;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Extract variable references from a string ({{varName}} format)
|
|
260
|
+
*/
|
|
261
|
+
private extractVariableRefs(text: string): string[] {
|
|
262
|
+
const regex = /\{\{(\w+)\}\}/g;
|
|
263
|
+
const refs: string[] = [];
|
|
264
|
+
const matches = text.matchAll(regex);
|
|
265
|
+
for (const match of matches) {
|
|
266
|
+
refs.push(match[1]);
|
|
267
|
+
}
|
|
268
|
+
return refs;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* Detect warnings (non-blocking issues)
|
|
273
|
+
*/
|
|
274
|
+
private detectWarnings(parsed: unknown, content: string): ValidationIssue[] {
|
|
275
|
+
const warnings: ValidationIssue[] = [];
|
|
276
|
+
const lines = content.split('\n');
|
|
277
|
+
|
|
278
|
+
if (parsed && typeof parsed === 'object') {
|
|
279
|
+
const obj = parsed as Record<string, unknown>;
|
|
280
|
+
|
|
281
|
+
// Check for deprecated 'criteria' field (should be 'rubric' for llm_grader)
|
|
282
|
+
if (this.hasDeepKey(obj, 'criteria')) {
|
|
283
|
+
const line = this.findLineForKey(lines, 'criteria');
|
|
284
|
+
warnings.push({
|
|
285
|
+
line,
|
|
286
|
+
message: "'criteria' is deprecated, use 'rubric' instead (llm_grader)",
|
|
287
|
+
rule: 'deprecated-field',
|
|
288
|
+
severity: 'warning',
|
|
289
|
+
suggestion: "Replace 'criteria' with 'rubric'",
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Check for very large number of cases without parallel recommendation
|
|
294
|
+
const cases = obj.cases as unknown[] | undefined;
|
|
295
|
+
if (Array.isArray(cases) && cases.length > 20) {
|
|
296
|
+
warnings.push({
|
|
297
|
+
line: 1,
|
|
298
|
+
message: `Scenario has ${cases.length} cases. Consider using --parallel for faster execution.`,
|
|
299
|
+
rule: 'performance-hint',
|
|
300
|
+
severity: 'warning',
|
|
301
|
+
});
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// Check for missing description
|
|
305
|
+
if (!obj.description) {
|
|
306
|
+
warnings.push({
|
|
307
|
+
line: 1,
|
|
308
|
+
message:
|
|
309
|
+
"Scenario is missing 'description' field. Adding a description improves documentation.",
|
|
310
|
+
rule: 'missing-description',
|
|
311
|
+
severity: 'warning',
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
return warnings;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Check if object has a key at any depth
|
|
321
|
+
*/
|
|
322
|
+
private hasDeepKey(obj: unknown, key: string): boolean {
|
|
323
|
+
if (obj === null || typeof obj !== 'object') return false;
|
|
324
|
+
|
|
325
|
+
if (key in (obj as Record<string, unknown>)) return true;
|
|
326
|
+
|
|
327
|
+
for (const value of Object.values(obj as Record<string, unknown>)) {
|
|
328
|
+
if (this.hasDeepKey(value, key)) return true;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
return false;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* Find line number for a key
|
|
336
|
+
*/
|
|
337
|
+
private findLineForKey(lines: string[], key: string): number {
|
|
338
|
+
for (let i = 0; i < lines.length; i++) {
|
|
339
|
+
if (lines[i].includes(`${key}:`)) {
|
|
340
|
+
return i + 1;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
return 1;
|
|
344
|
+
}
|
|
345
|
+
}
|