@agentv/core 2.5.8 → 2.7.1-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-LGQ5OPJD.js → chunk-6W5E3VR6.js} +383 -54
- package/dist/chunk-6W5E3VR6.js.map +1 -0
- package/dist/chunk-HFSYZHGF.js +82 -0
- package/dist/chunk-HFSYZHGF.js.map +1 -0
- package/dist/chunk-HMXZ2AX4.js +112 -0
- package/dist/chunk-HMXZ2AX4.js.map +1 -0
- package/dist/esm-5Q4BZALM.js +968 -0
- package/dist/esm-5Q4BZALM.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +337 -70
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +294 -69
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +9221 -4040
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1717 -234
- package/dist/index.d.ts +1717 -234
- package/dist/index.js +6563 -3147
- package/dist/index.js.map +1 -1
- package/dist/otlp-json-file-exporter-77FDBRSY.js +7 -0
- package/dist/otlp-json-file-exporter-77FDBRSY.js.map +1 -0
- package/dist/simple-trace-file-exporter-S76DMABU.js +7 -0
- package/dist/simple-trace-file-exporter-S76DMABU.js.map +1 -0
- package/package.json +18 -5
- package/dist/chunk-LGQ5OPJD.js.map +0 -1
|
@@ -4,8 +4,9 @@ import {
|
|
|
4
4
|
PROVIDER_ALIASES,
|
|
5
5
|
buildSearchRoots,
|
|
6
6
|
findGitRoot,
|
|
7
|
+
isEvaluatorKind,
|
|
7
8
|
resolveFileReference
|
|
8
|
-
} from "../../chunk-
|
|
9
|
+
} from "../../chunk-6W5E3VR6.js";
|
|
9
10
|
|
|
10
11
|
// src/evaluation/validation/file-type.ts
|
|
11
12
|
import { readFile } from "node:fs/promises";
|
|
@@ -73,6 +74,9 @@ function getExpectedSchema(fileType) {
|
|
|
73
74
|
import { readFile as readFile2 } from "node:fs/promises";
|
|
74
75
|
import path2 from "node:path";
|
|
75
76
|
import { parse as parse2 } from "yaml";
|
|
77
|
+
var ASSERTION_TYPES_WITH_VALUE = /* @__PURE__ */ new Set(["contains", "equals", "regex"]);
|
|
78
|
+
var VALID_TEST_FILE_EXTENSIONS = /* @__PURE__ */ new Set([".yaml", ".yml", ".jsonl"]);
|
|
79
|
+
var NAME_PATTERN = /^[a-z0-9-]+$/;
|
|
76
80
|
function isObject(value) {
|
|
77
81
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
78
82
|
}
|
|
@@ -109,13 +113,41 @@ async function validateEvalFile(filePath) {
|
|
|
109
113
|
errors
|
|
110
114
|
};
|
|
111
115
|
}
|
|
112
|
-
|
|
113
|
-
|
|
116
|
+
validateMetadata(parsed, absolutePath, errors);
|
|
117
|
+
let cases = parsed.tests;
|
|
118
|
+
if (cases === void 0 && "eval_cases" in parsed) {
|
|
119
|
+
cases = parsed.eval_cases;
|
|
114
120
|
errors.push({
|
|
115
|
-
severity: "
|
|
121
|
+
severity: "warning",
|
|
122
|
+
filePath: absolutePath,
|
|
123
|
+
location: "eval_cases",
|
|
124
|
+
message: "'eval_cases' is deprecated. Use 'tests' instead."
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
if (cases === void 0 && "evalcases" in parsed) {
|
|
128
|
+
cases = parsed.evalcases;
|
|
129
|
+
errors.push({
|
|
130
|
+
severity: "warning",
|
|
116
131
|
filePath: absolutePath,
|
|
117
132
|
location: "evalcases",
|
|
118
|
-
message: "
|
|
133
|
+
message: "'evalcases' is deprecated. Use 'tests' instead."
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
if (typeof cases === "string") {
|
|
137
|
+
validateTestsStringPath(cases, absolutePath, errors);
|
|
138
|
+
return {
|
|
139
|
+
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
140
|
+
filePath: absolutePath,
|
|
141
|
+
fileType: "eval",
|
|
142
|
+
errors
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
if (!Array.isArray(cases)) {
|
|
146
|
+
errors.push({
|
|
147
|
+
severity: "error",
|
|
148
|
+
filePath: absolutePath,
|
|
149
|
+
location: "tests",
|
|
150
|
+
message: "Missing or invalid 'tests' field (must be an array or a file path string)"
|
|
119
151
|
});
|
|
120
152
|
return {
|
|
121
153
|
valid: errors.length === 0,
|
|
@@ -124,9 +156,9 @@ async function validateEvalFile(filePath) {
|
|
|
124
156
|
errors
|
|
125
157
|
};
|
|
126
158
|
}
|
|
127
|
-
for (let i = 0; i <
|
|
128
|
-
const evalCase =
|
|
129
|
-
const location = `
|
|
159
|
+
for (let i = 0; i < cases.length; i++) {
|
|
160
|
+
const evalCase = cases[i];
|
|
161
|
+
const location = `tests[${i}]`;
|
|
130
162
|
if (!isObject(evalCase)) {
|
|
131
163
|
errors.push({
|
|
132
164
|
severity: "error",
|
|
@@ -145,23 +177,29 @@ async function validateEvalFile(filePath) {
|
|
|
145
177
|
message: "Missing or invalid 'id' field (must be a non-empty string)"
|
|
146
178
|
});
|
|
147
179
|
}
|
|
148
|
-
|
|
149
|
-
if (
|
|
180
|
+
let criteria = evalCase.criteria;
|
|
181
|
+
if (criteria === void 0 && "expected_outcome" in evalCase) {
|
|
182
|
+
criteria = evalCase.expected_outcome;
|
|
150
183
|
errors.push({
|
|
151
|
-
severity: "
|
|
184
|
+
severity: "warning",
|
|
152
185
|
filePath: absolutePath,
|
|
153
186
|
location: `${location}.expected_outcome`,
|
|
154
|
-
message: "
|
|
187
|
+
message: "'expected_outcome' is deprecated. Use 'criteria' instead."
|
|
155
188
|
});
|
|
156
189
|
}
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
}
|
|
164
|
-
|
|
190
|
+
if (criteria !== void 0 && (typeof criteria !== "string" || criteria.trim().length === 0)) {
|
|
191
|
+
errors.push({
|
|
192
|
+
severity: "error",
|
|
193
|
+
filePath: absolutePath,
|
|
194
|
+
location: `${location}.criteria`,
|
|
195
|
+
message: "Invalid 'criteria' field (must be a non-empty string if provided)"
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
const inputField = evalCase.input;
|
|
199
|
+
if (inputField !== void 0) {
|
|
200
|
+
if (typeof inputField === "string") {
|
|
201
|
+
} else if (Array.isArray(inputField)) {
|
|
202
|
+
validateMessages(inputField, `${location}.input`, absolutePath, errors);
|
|
165
203
|
} else {
|
|
166
204
|
errors.push({
|
|
167
205
|
severity: "error",
|
|
@@ -174,33 +212,23 @@ async function validateEvalFile(filePath) {
|
|
|
174
212
|
errors.push({
|
|
175
213
|
severity: "error",
|
|
176
214
|
filePath: absolutePath,
|
|
177
|
-
location: `${location}.
|
|
178
|
-
message: "Missing '
|
|
215
|
+
location: `${location}.input`,
|
|
216
|
+
message: "Missing 'input' field (must be a string or array of messages)"
|
|
179
217
|
});
|
|
180
218
|
}
|
|
181
|
-
const
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
filePath: absolutePath,
|
|
187
|
-
location: `${location}.expected_messages`,
|
|
188
|
-
message: "Invalid 'expected_messages' field (must be an array if provided)"
|
|
189
|
-
});
|
|
190
|
-
} else if (Array.isArray(expectedMessages)) {
|
|
191
|
-
validateMessages(expectedMessages, `${location}.expected_messages`, absolutePath, errors);
|
|
192
|
-
} else if (expectedOutputAlias !== void 0) {
|
|
193
|
-
if (typeof expectedOutputAlias === "string") {
|
|
194
|
-
} else if (Array.isArray(expectedOutputAlias)) {
|
|
195
|
-
if (expectedOutputAlias.length > 0 && isObject(expectedOutputAlias[0]) && "role" in expectedOutputAlias[0]) {
|
|
219
|
+
const expectedOutputField = evalCase.expected_output;
|
|
220
|
+
if (expectedOutputField !== void 0) {
|
|
221
|
+
if (typeof expectedOutputField === "string") {
|
|
222
|
+
} else if (Array.isArray(expectedOutputField)) {
|
|
223
|
+
if (expectedOutputField.length > 0 && isObject(expectedOutputField[0]) && "role" in expectedOutputField[0]) {
|
|
196
224
|
validateMessages(
|
|
197
|
-
|
|
225
|
+
expectedOutputField,
|
|
198
226
|
`${location}.expected_output`,
|
|
199
227
|
absolutePath,
|
|
200
228
|
errors
|
|
201
229
|
);
|
|
202
230
|
}
|
|
203
|
-
} else if (isObject(
|
|
231
|
+
} else if (isObject(expectedOutputField)) {
|
|
204
232
|
} else {
|
|
205
233
|
errors.push({
|
|
206
234
|
severity: "error",
|
|
@@ -210,9 +238,13 @@ async function validateEvalFile(filePath) {
|
|
|
210
238
|
});
|
|
211
239
|
}
|
|
212
240
|
}
|
|
241
|
+
const assertField = evalCase.assert;
|
|
242
|
+
if (assertField !== void 0) {
|
|
243
|
+
validateAssertArray(assertField, location, absolutePath, errors);
|
|
244
|
+
}
|
|
213
245
|
}
|
|
214
246
|
return {
|
|
215
|
-
valid: errors.length === 0,
|
|
247
|
+
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
216
248
|
filePath: absolutePath,
|
|
217
249
|
fileType: "eval",
|
|
218
250
|
errors
|
|
@@ -292,6 +324,133 @@ function validateMessages(messages, location, filePath, errors) {
|
|
|
292
324
|
}
|
|
293
325
|
}
|
|
294
326
|
}
|
|
327
|
+
function validateMetadata(parsed, filePath, errors) {
|
|
328
|
+
const name = parsed.name;
|
|
329
|
+
if (name !== void 0) {
|
|
330
|
+
if (typeof name === "string") {
|
|
331
|
+
if (!NAME_PATTERN.test(name)) {
|
|
332
|
+
errors.push({
|
|
333
|
+
severity: "warning",
|
|
334
|
+
filePath,
|
|
335
|
+
location: "name",
|
|
336
|
+
message: `Invalid 'name' format '${name}'. Must match pattern /^[a-z0-9-]+$/ (lowercase alphanumeric with hyphens).`
|
|
337
|
+
});
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
if (!("description" in parsed) || parsed.description === void 0) {
|
|
341
|
+
errors.push({
|
|
342
|
+
severity: "warning",
|
|
343
|
+
filePath,
|
|
344
|
+
location: "name",
|
|
345
|
+
message: "When 'name' is present, 'description' should also be provided."
|
|
346
|
+
});
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
function validateTestsStringPath(testsPath, filePath, errors) {
|
|
351
|
+
const ext = path2.extname(testsPath);
|
|
352
|
+
if (!VALID_TEST_FILE_EXTENSIONS.has(ext)) {
|
|
353
|
+
errors.push({
|
|
354
|
+
severity: "warning",
|
|
355
|
+
filePath,
|
|
356
|
+
location: "tests",
|
|
357
|
+
message: `Unsupported file extension '${ext}' for tests path '${testsPath}'. Supported extensions: ${[...VALID_TEST_FILE_EXTENSIONS].join(", ")}`
|
|
358
|
+
});
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
function validateAssertArray(assertField, parentLocation, filePath, errors) {
|
|
362
|
+
if (!Array.isArray(assertField)) {
|
|
363
|
+
errors.push({
|
|
364
|
+
severity: "warning",
|
|
365
|
+
filePath,
|
|
366
|
+
location: `${parentLocation}.assert`,
|
|
367
|
+
message: "'assert' must be an array of assertion objects."
|
|
368
|
+
});
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
for (let i = 0; i < assertField.length; i++) {
|
|
372
|
+
const item = assertField[i];
|
|
373
|
+
const location = `${parentLocation}.assert[${i}]`;
|
|
374
|
+
if (!isObject(item)) {
|
|
375
|
+
errors.push({
|
|
376
|
+
severity: "warning",
|
|
377
|
+
filePath,
|
|
378
|
+
location,
|
|
379
|
+
message: "Assertion item must be an object with a type field."
|
|
380
|
+
});
|
|
381
|
+
continue;
|
|
382
|
+
}
|
|
383
|
+
const typeValue = item.type;
|
|
384
|
+
if (typeValue === void 0 || typeof typeValue !== "string") {
|
|
385
|
+
errors.push({
|
|
386
|
+
severity: "warning",
|
|
387
|
+
filePath,
|
|
388
|
+
location: `${location}.type`,
|
|
389
|
+
message: "Assertion item is missing a 'type' field."
|
|
390
|
+
});
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
if (!isEvaluatorKind(typeValue)) {
|
|
394
|
+
errors.push({
|
|
395
|
+
severity: "warning",
|
|
396
|
+
filePath,
|
|
397
|
+
location: `${location}.type`,
|
|
398
|
+
message: `Unknown assertion type '${typeValue}'.`
|
|
399
|
+
});
|
|
400
|
+
continue;
|
|
401
|
+
}
|
|
402
|
+
if (ASSERTION_TYPES_WITH_VALUE.has(typeValue)) {
|
|
403
|
+
const value = item.value;
|
|
404
|
+
if (value === void 0 || typeof value !== "string") {
|
|
405
|
+
errors.push({
|
|
406
|
+
severity: "warning",
|
|
407
|
+
filePath,
|
|
408
|
+
location: `${location}.value`,
|
|
409
|
+
message: `Assertion type '${typeValue}' requires a 'value' field (string).`
|
|
410
|
+
});
|
|
411
|
+
continue;
|
|
412
|
+
}
|
|
413
|
+
if (typeValue === "regex") {
|
|
414
|
+
try {
|
|
415
|
+
new RegExp(value);
|
|
416
|
+
} catch {
|
|
417
|
+
errors.push({
|
|
418
|
+
severity: "warning",
|
|
419
|
+
filePath,
|
|
420
|
+
location: `${location}.value`,
|
|
421
|
+
message: `Invalid regex pattern '${value}': not a valid regular expression.`
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
const required = item.required;
|
|
427
|
+
if (required !== void 0) {
|
|
428
|
+
validateRequiredField(required, location, filePath, errors);
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
function validateRequiredField(required, parentLocation, filePath, errors) {
|
|
433
|
+
if (typeof required === "boolean") {
|
|
434
|
+
return;
|
|
435
|
+
}
|
|
436
|
+
if (typeof required === "number") {
|
|
437
|
+
if (required <= 0 || required > 1) {
|
|
438
|
+
errors.push({
|
|
439
|
+
severity: "warning",
|
|
440
|
+
filePath,
|
|
441
|
+
location: `${parentLocation}.required`,
|
|
442
|
+
message: `Invalid 'required' value ${required}. When a number, it must be between 0 (exclusive) and 1 (inclusive).`
|
|
443
|
+
});
|
|
444
|
+
}
|
|
445
|
+
return;
|
|
446
|
+
}
|
|
447
|
+
errors.push({
|
|
448
|
+
severity: "warning",
|
|
449
|
+
filePath,
|
|
450
|
+
location: `${parentLocation}.required`,
|
|
451
|
+
message: `Invalid 'required' value. Must be a boolean or a number between 0 (exclusive) and 1 (inclusive).`
|
|
452
|
+
});
|
|
453
|
+
}
|
|
295
454
|
function validateContentForRoleMarkers(content, location, filePath, errors) {
|
|
296
455
|
const markers = ["@[System]:", "@[User]:", "@[Assistant]:", "@[Tool]:"];
|
|
297
456
|
for (const marker of markers) {
|
|
@@ -371,6 +530,7 @@ var GEMINI_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
371
530
|
]);
|
|
372
531
|
var CODEX_SETTINGS = /* @__PURE__ */ new Set([
|
|
373
532
|
...COMMON_SETTINGS,
|
|
533
|
+
"model",
|
|
374
534
|
"executable",
|
|
375
535
|
"command",
|
|
376
536
|
"binary",
|
|
@@ -386,41 +546,45 @@ var CODEX_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
386
546
|
"log_format",
|
|
387
547
|
"logFormat",
|
|
388
548
|
"log_output_format",
|
|
389
|
-
"logOutputFormat"
|
|
549
|
+
"logOutputFormat",
|
|
550
|
+
"system_prompt",
|
|
551
|
+
"systemPrompt",
|
|
552
|
+
"workspace_template",
|
|
553
|
+
"workspaceTemplate"
|
|
390
554
|
]);
|
|
391
|
-
var
|
|
555
|
+
var COPILOT_SDK_SETTINGS = /* @__PURE__ */ new Set([
|
|
392
556
|
...COMMON_SETTINGS,
|
|
393
|
-
"
|
|
394
|
-
"
|
|
395
|
-
"
|
|
396
|
-
"
|
|
397
|
-
"
|
|
557
|
+
"cli_url",
|
|
558
|
+
"cliUrl",
|
|
559
|
+
"cli_path",
|
|
560
|
+
"cliPath",
|
|
561
|
+
"github_token",
|
|
562
|
+
"githubToken",
|
|
398
563
|
"model",
|
|
399
564
|
"cwd",
|
|
400
565
|
"timeout_seconds",
|
|
401
566
|
"timeoutSeconds",
|
|
402
567
|
"log_dir",
|
|
403
568
|
"logDir",
|
|
404
|
-
"log_directory",
|
|
405
|
-
"logDirectory",
|
|
406
569
|
"log_format",
|
|
407
570
|
"logFormat",
|
|
408
|
-
"log_output_format",
|
|
409
|
-
"logOutputFormat",
|
|
410
571
|
"system_prompt",
|
|
411
|
-
"systemPrompt"
|
|
572
|
+
"systemPrompt",
|
|
573
|
+
"workspace_template",
|
|
574
|
+
"workspaceTemplate"
|
|
412
575
|
]);
|
|
413
576
|
var VSCODE_SETTINGS = /* @__PURE__ */ new Set([
|
|
414
577
|
...COMMON_SETTINGS,
|
|
578
|
+
"executable",
|
|
415
579
|
"workspace_template",
|
|
416
580
|
"workspaceTemplate",
|
|
417
|
-
"vscode_cmd",
|
|
418
|
-
"command",
|
|
419
581
|
"wait",
|
|
420
582
|
"dry_run",
|
|
421
583
|
"dryRun",
|
|
422
584
|
"subagent_root",
|
|
423
|
-
"subagentRoot"
|
|
585
|
+
"subagentRoot",
|
|
586
|
+
"timeout_seconds",
|
|
587
|
+
"timeoutSeconds"
|
|
424
588
|
]);
|
|
425
589
|
var MOCK_SETTINGS = /* @__PURE__ */ new Set([
|
|
426
590
|
...COMMON_SETTINGS,
|
|
@@ -431,6 +595,29 @@ var MOCK_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
431
595
|
"trace"
|
|
432
596
|
// For testing tool_trajectory evaluator
|
|
433
597
|
]);
|
|
598
|
+
var CLAUDE_SETTINGS = /* @__PURE__ */ new Set([
|
|
599
|
+
...COMMON_SETTINGS,
|
|
600
|
+
"model",
|
|
601
|
+
"cwd",
|
|
602
|
+
"timeout_seconds",
|
|
603
|
+
"timeoutSeconds",
|
|
604
|
+
"log_dir",
|
|
605
|
+
"logDir",
|
|
606
|
+
"log_directory",
|
|
607
|
+
"logDirectory",
|
|
608
|
+
"log_format",
|
|
609
|
+
"logFormat",
|
|
610
|
+
"log_output_format",
|
|
611
|
+
"logOutputFormat",
|
|
612
|
+
"system_prompt",
|
|
613
|
+
"systemPrompt",
|
|
614
|
+
"workspace_template",
|
|
615
|
+
"workspaceTemplate",
|
|
616
|
+
"max_turns",
|
|
617
|
+
"maxTurns",
|
|
618
|
+
"max_budget_usd",
|
|
619
|
+
"maxBudgetUsd"
|
|
620
|
+
]);
|
|
434
621
|
function getKnownSettings(provider) {
|
|
435
622
|
const normalizedProvider = provider.toLowerCase();
|
|
436
623
|
switch (normalizedProvider) {
|
|
@@ -446,8 +633,15 @@ function getKnownSettings(provider) {
|
|
|
446
633
|
case "codex":
|
|
447
634
|
case "codex-cli":
|
|
448
635
|
return CODEX_SETTINGS;
|
|
636
|
+
case "copilot":
|
|
637
|
+
case "copilot-sdk":
|
|
638
|
+
case "copilot_sdk":
|
|
449
639
|
case "copilot-cli":
|
|
450
|
-
return
|
|
640
|
+
return COPILOT_SDK_SETTINGS;
|
|
641
|
+
case "claude":
|
|
642
|
+
case "claude-code":
|
|
643
|
+
case "claude-sdk":
|
|
644
|
+
return CLAUDE_SETTINGS;
|
|
451
645
|
case "vscode":
|
|
452
646
|
case "vscode-insiders":
|
|
453
647
|
return VSCODE_SETTINGS;
|
|
@@ -745,7 +939,32 @@ async function validateConfigFile(filePath) {
|
|
|
745
939
|
});
|
|
746
940
|
}
|
|
747
941
|
}
|
|
748
|
-
const
|
|
942
|
+
const evalPatterns = config.eval_patterns;
|
|
943
|
+
if (evalPatterns !== void 0) {
|
|
944
|
+
if (!Array.isArray(evalPatterns)) {
|
|
945
|
+
errors.push({
|
|
946
|
+
severity: "error",
|
|
947
|
+
filePath,
|
|
948
|
+
location: "eval_patterns",
|
|
949
|
+
message: "Field 'eval_patterns' must be an array"
|
|
950
|
+
});
|
|
951
|
+
} else if (!evalPatterns.every((p) => typeof p === "string")) {
|
|
952
|
+
errors.push({
|
|
953
|
+
severity: "error",
|
|
954
|
+
filePath,
|
|
955
|
+
location: "eval_patterns",
|
|
956
|
+
message: "All entries in 'eval_patterns' must be strings"
|
|
957
|
+
});
|
|
958
|
+
} else if (evalPatterns.length === 0) {
|
|
959
|
+
errors.push({
|
|
960
|
+
severity: "warning",
|
|
961
|
+
filePath,
|
|
962
|
+
location: "eval_patterns",
|
|
963
|
+
message: "Field 'eval_patterns' is empty. Consider removing it or adding patterns."
|
|
964
|
+
});
|
|
965
|
+
}
|
|
966
|
+
}
|
|
967
|
+
const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns", "eval_patterns"]);
|
|
749
968
|
const unexpectedFields = Object.keys(config).filter((key) => !allowedFields.has(key));
|
|
750
969
|
if (unexpectedFields.length > 0) {
|
|
751
970
|
errors.push({
|
|
@@ -800,30 +1019,36 @@ async function validateFileReferences(evalFilePath) {
|
|
|
800
1019
|
if (!isObject3(parsed)) {
|
|
801
1020
|
return errors;
|
|
802
1021
|
}
|
|
803
|
-
|
|
804
|
-
if (
|
|
1022
|
+
let cases = parsed.tests;
|
|
1023
|
+
if (cases === void 0 && "eval_cases" in parsed) {
|
|
1024
|
+
cases = parsed.eval_cases;
|
|
1025
|
+
}
|
|
1026
|
+
if (cases === void 0 && "evalcases" in parsed) {
|
|
1027
|
+
cases = parsed.evalcases;
|
|
1028
|
+
}
|
|
1029
|
+
if (!Array.isArray(cases)) {
|
|
805
1030
|
return errors;
|
|
806
1031
|
}
|
|
807
|
-
for (let i = 0; i <
|
|
808
|
-
const evalCase =
|
|
1032
|
+
for (let i = 0; i < cases.length; i++) {
|
|
1033
|
+
const evalCase = cases[i];
|
|
809
1034
|
if (!isObject3(evalCase)) {
|
|
810
1035
|
continue;
|
|
811
1036
|
}
|
|
812
|
-
const
|
|
813
|
-
if (Array.isArray(
|
|
1037
|
+
const inputField = evalCase.input;
|
|
1038
|
+
if (Array.isArray(inputField)) {
|
|
814
1039
|
await validateMessagesFileRefs(
|
|
815
|
-
|
|
816
|
-
`
|
|
1040
|
+
inputField,
|
|
1041
|
+
`tests[${i}].input`,
|
|
817
1042
|
searchRoots,
|
|
818
1043
|
absolutePath,
|
|
819
1044
|
errors
|
|
820
1045
|
);
|
|
821
1046
|
}
|
|
822
|
-
const
|
|
823
|
-
if (Array.isArray(
|
|
1047
|
+
const expectedOutputField = evalCase.expected_output;
|
|
1048
|
+
if (Array.isArray(expectedOutputField)) {
|
|
824
1049
|
await validateMessagesFileRefs(
|
|
825
|
-
|
|
826
|
-
`
|
|
1050
|
+
expectedOutputField,
|
|
1051
|
+
`tests[${i}].expected_output`,
|
|
827
1052
|
searchRoots,
|
|
828
1053
|
absolutePath,
|
|
829
1054
|
errors
|