@agentv/core 2.5.8 → 2.7.1-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-LGQ5OPJD.js → chunk-6W5E3VR6.js} +383 -54
- package/dist/chunk-6W5E3VR6.js.map +1 -0
- package/dist/chunk-HFSYZHGF.js +82 -0
- package/dist/chunk-HFSYZHGF.js.map +1 -0
- package/dist/chunk-HMXZ2AX4.js +112 -0
- package/dist/chunk-HMXZ2AX4.js.map +1 -0
- package/dist/esm-5Q4BZALM.js +968 -0
- package/dist/esm-5Q4BZALM.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +337 -70
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +294 -69
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +9221 -4040
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1717 -234
- package/dist/index.d.ts +1717 -234
- package/dist/index.js +6563 -3147
- package/dist/index.js.map +1 -1
- package/dist/otlp-json-file-exporter-77FDBRSY.js +7 -0
- package/dist/otlp-json-file-exporter-77FDBRSY.js.map +1 -0
- package/dist/simple-trace-file-exporter-S76DMABU.js +7 -0
- package/dist/simple-trace-file-exporter-S76DMABU.js.map +1 -0
- package/package.json +18 -5
- package/dist/chunk-LGQ5OPJD.js.map +0 -1
|
@@ -106,6 +106,37 @@ function getExpectedSchema(fileType) {
|
|
|
106
106
|
var import_promises2 = require("fs/promises");
|
|
107
107
|
var import_node_path2 = __toESM(require("path"), 1);
|
|
108
108
|
var import_yaml2 = require("yaml");
|
|
109
|
+
|
|
110
|
+
// src/evaluation/types.ts
|
|
111
|
+
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
112
|
+
var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
|
|
113
|
+
var EVALUATOR_KIND_VALUES = [
|
|
114
|
+
"code_judge",
|
|
115
|
+
"llm_judge",
|
|
116
|
+
"rubric",
|
|
117
|
+
"composite",
|
|
118
|
+
"tool_trajectory",
|
|
119
|
+
"field_accuracy",
|
|
120
|
+
"latency",
|
|
121
|
+
"cost",
|
|
122
|
+
"token_usage",
|
|
123
|
+
"execution_metrics",
|
|
124
|
+
"agent_judge",
|
|
125
|
+
"contains",
|
|
126
|
+
"regex",
|
|
127
|
+
"is_json",
|
|
128
|
+
"equals",
|
|
129
|
+
"rubrics"
|
|
130
|
+
];
|
|
131
|
+
var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
|
|
132
|
+
function isEvaluatorKind(value) {
|
|
133
|
+
return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// src/evaluation/validation/eval-validator.ts
|
|
137
|
+
var ASSERTION_TYPES_WITH_VALUE = /* @__PURE__ */ new Set(["contains", "equals", "regex"]);
|
|
138
|
+
var VALID_TEST_FILE_EXTENSIONS = /* @__PURE__ */ new Set([".yaml", ".yml", ".jsonl"]);
|
|
139
|
+
var NAME_PATTERN = /^[a-z0-9-]+$/;
|
|
109
140
|
function isObject(value) {
|
|
110
141
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
111
142
|
}
|
|
@@ -142,13 +173,41 @@ async function validateEvalFile(filePath) {
|
|
|
142
173
|
errors
|
|
143
174
|
};
|
|
144
175
|
}
|
|
145
|
-
|
|
146
|
-
|
|
176
|
+
validateMetadata(parsed, absolutePath, errors);
|
|
177
|
+
let cases = parsed.tests;
|
|
178
|
+
if (cases === void 0 && "eval_cases" in parsed) {
|
|
179
|
+
cases = parsed.eval_cases;
|
|
147
180
|
errors.push({
|
|
148
|
-
severity: "
|
|
181
|
+
severity: "warning",
|
|
182
|
+
filePath: absolutePath,
|
|
183
|
+
location: "eval_cases",
|
|
184
|
+
message: "'eval_cases' is deprecated. Use 'tests' instead."
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
if (cases === void 0 && "evalcases" in parsed) {
|
|
188
|
+
cases = parsed.evalcases;
|
|
189
|
+
errors.push({
|
|
190
|
+
severity: "warning",
|
|
149
191
|
filePath: absolutePath,
|
|
150
192
|
location: "evalcases",
|
|
151
|
-
message: "
|
|
193
|
+
message: "'evalcases' is deprecated. Use 'tests' instead."
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
if (typeof cases === "string") {
|
|
197
|
+
validateTestsStringPath(cases, absolutePath, errors);
|
|
198
|
+
return {
|
|
199
|
+
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
200
|
+
filePath: absolutePath,
|
|
201
|
+
fileType: "eval",
|
|
202
|
+
errors
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
if (!Array.isArray(cases)) {
|
|
206
|
+
errors.push({
|
|
207
|
+
severity: "error",
|
|
208
|
+
filePath: absolutePath,
|
|
209
|
+
location: "tests",
|
|
210
|
+
message: "Missing or invalid 'tests' field (must be an array or a file path string)"
|
|
152
211
|
});
|
|
153
212
|
return {
|
|
154
213
|
valid: errors.length === 0,
|
|
@@ -157,9 +216,9 @@ async function validateEvalFile(filePath) {
|
|
|
157
216
|
errors
|
|
158
217
|
};
|
|
159
218
|
}
|
|
160
|
-
for (let i = 0; i <
|
|
161
|
-
const evalCase =
|
|
162
|
-
const location = `
|
|
219
|
+
for (let i = 0; i < cases.length; i++) {
|
|
220
|
+
const evalCase = cases[i];
|
|
221
|
+
const location = `tests[${i}]`;
|
|
163
222
|
if (!isObject(evalCase)) {
|
|
164
223
|
errors.push({
|
|
165
224
|
severity: "error",
|
|
@@ -178,23 +237,29 @@ async function validateEvalFile(filePath) {
|
|
|
178
237
|
message: "Missing or invalid 'id' field (must be a non-empty string)"
|
|
179
238
|
});
|
|
180
239
|
}
|
|
181
|
-
|
|
182
|
-
if (
|
|
240
|
+
let criteria = evalCase.criteria;
|
|
241
|
+
if (criteria === void 0 && "expected_outcome" in evalCase) {
|
|
242
|
+
criteria = evalCase.expected_outcome;
|
|
183
243
|
errors.push({
|
|
184
|
-
severity: "
|
|
244
|
+
severity: "warning",
|
|
185
245
|
filePath: absolutePath,
|
|
186
246
|
location: `${location}.expected_outcome`,
|
|
187
|
-
message: "
|
|
247
|
+
message: "'expected_outcome' is deprecated. Use 'criteria' instead."
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
if (criteria !== void 0 && (typeof criteria !== "string" || criteria.trim().length === 0)) {
|
|
251
|
+
errors.push({
|
|
252
|
+
severity: "error",
|
|
253
|
+
filePath: absolutePath,
|
|
254
|
+
location: `${location}.criteria`,
|
|
255
|
+
message: "Invalid 'criteria' field (must be a non-empty string if provided)"
|
|
188
256
|
});
|
|
189
257
|
}
|
|
190
|
-
const
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
if (typeof inputAlias === "string") {
|
|
196
|
-
} else if (Array.isArray(inputAlias)) {
|
|
197
|
-
validateMessages(inputAlias, `${location}.input`, absolutePath, errors);
|
|
258
|
+
const inputField = evalCase.input;
|
|
259
|
+
if (inputField !== void 0) {
|
|
260
|
+
if (typeof inputField === "string") {
|
|
261
|
+
} else if (Array.isArray(inputField)) {
|
|
262
|
+
validateMessages(inputField, `${location}.input`, absolutePath, errors);
|
|
198
263
|
} else {
|
|
199
264
|
errors.push({
|
|
200
265
|
severity: "error",
|
|
@@ -207,33 +272,23 @@ async function validateEvalFile(filePath) {
|
|
|
207
272
|
errors.push({
|
|
208
273
|
severity: "error",
|
|
209
274
|
filePath: absolutePath,
|
|
210
|
-
location: `${location}.
|
|
211
|
-
message: "Missing '
|
|
275
|
+
location: `${location}.input`,
|
|
276
|
+
message: "Missing 'input' field (must be a string or array of messages)"
|
|
212
277
|
});
|
|
213
278
|
}
|
|
214
|
-
const
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
filePath: absolutePath,
|
|
220
|
-
location: `${location}.expected_messages`,
|
|
221
|
-
message: "Invalid 'expected_messages' field (must be an array if provided)"
|
|
222
|
-
});
|
|
223
|
-
} else if (Array.isArray(expectedMessages)) {
|
|
224
|
-
validateMessages(expectedMessages, `${location}.expected_messages`, absolutePath, errors);
|
|
225
|
-
} else if (expectedOutputAlias !== void 0) {
|
|
226
|
-
if (typeof expectedOutputAlias === "string") {
|
|
227
|
-
} else if (Array.isArray(expectedOutputAlias)) {
|
|
228
|
-
if (expectedOutputAlias.length > 0 && isObject(expectedOutputAlias[0]) && "role" in expectedOutputAlias[0]) {
|
|
279
|
+
const expectedOutputField = evalCase.expected_output;
|
|
280
|
+
if (expectedOutputField !== void 0) {
|
|
281
|
+
if (typeof expectedOutputField === "string") {
|
|
282
|
+
} else if (Array.isArray(expectedOutputField)) {
|
|
283
|
+
if (expectedOutputField.length > 0 && isObject(expectedOutputField[0]) && "role" in expectedOutputField[0]) {
|
|
229
284
|
validateMessages(
|
|
230
|
-
|
|
285
|
+
expectedOutputField,
|
|
231
286
|
`${location}.expected_output`,
|
|
232
287
|
absolutePath,
|
|
233
288
|
errors
|
|
234
289
|
);
|
|
235
290
|
}
|
|
236
|
-
} else if (isObject(
|
|
291
|
+
} else if (isObject(expectedOutputField)) {
|
|
237
292
|
} else {
|
|
238
293
|
errors.push({
|
|
239
294
|
severity: "error",
|
|
@@ -243,9 +298,13 @@ async function validateEvalFile(filePath) {
|
|
|
243
298
|
});
|
|
244
299
|
}
|
|
245
300
|
}
|
|
301
|
+
const assertField = evalCase.assert;
|
|
302
|
+
if (assertField !== void 0) {
|
|
303
|
+
validateAssertArray(assertField, location, absolutePath, errors);
|
|
304
|
+
}
|
|
246
305
|
}
|
|
247
306
|
return {
|
|
248
|
-
valid: errors.length === 0,
|
|
307
|
+
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
249
308
|
filePath: absolutePath,
|
|
250
309
|
fileType: "eval",
|
|
251
310
|
errors
|
|
@@ -325,6 +384,133 @@ function validateMessages(messages, location, filePath, errors) {
|
|
|
325
384
|
}
|
|
326
385
|
}
|
|
327
386
|
}
|
|
387
|
+
function validateMetadata(parsed, filePath, errors) {
|
|
388
|
+
const name = parsed.name;
|
|
389
|
+
if (name !== void 0) {
|
|
390
|
+
if (typeof name === "string") {
|
|
391
|
+
if (!NAME_PATTERN.test(name)) {
|
|
392
|
+
errors.push({
|
|
393
|
+
severity: "warning",
|
|
394
|
+
filePath,
|
|
395
|
+
location: "name",
|
|
396
|
+
message: `Invalid 'name' format '${name}'. Must match pattern /^[a-z0-9-]+$/ (lowercase alphanumeric with hyphens).`
|
|
397
|
+
});
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
if (!("description" in parsed) || parsed.description === void 0) {
|
|
401
|
+
errors.push({
|
|
402
|
+
severity: "warning",
|
|
403
|
+
filePath,
|
|
404
|
+
location: "name",
|
|
405
|
+
message: "When 'name' is present, 'description' should also be provided."
|
|
406
|
+
});
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
function validateTestsStringPath(testsPath, filePath, errors) {
|
|
411
|
+
const ext = import_node_path2.default.extname(testsPath);
|
|
412
|
+
if (!VALID_TEST_FILE_EXTENSIONS.has(ext)) {
|
|
413
|
+
errors.push({
|
|
414
|
+
severity: "warning",
|
|
415
|
+
filePath,
|
|
416
|
+
location: "tests",
|
|
417
|
+
message: `Unsupported file extension '${ext}' for tests path '${testsPath}'. Supported extensions: ${[...VALID_TEST_FILE_EXTENSIONS].join(", ")}`
|
|
418
|
+
});
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
function validateAssertArray(assertField, parentLocation, filePath, errors) {
|
|
422
|
+
if (!Array.isArray(assertField)) {
|
|
423
|
+
errors.push({
|
|
424
|
+
severity: "warning",
|
|
425
|
+
filePath,
|
|
426
|
+
location: `${parentLocation}.assert`,
|
|
427
|
+
message: "'assert' must be an array of assertion objects."
|
|
428
|
+
});
|
|
429
|
+
return;
|
|
430
|
+
}
|
|
431
|
+
for (let i = 0; i < assertField.length; i++) {
|
|
432
|
+
const item = assertField[i];
|
|
433
|
+
const location = `${parentLocation}.assert[${i}]`;
|
|
434
|
+
if (!isObject(item)) {
|
|
435
|
+
errors.push({
|
|
436
|
+
severity: "warning",
|
|
437
|
+
filePath,
|
|
438
|
+
location,
|
|
439
|
+
message: "Assertion item must be an object with a type field."
|
|
440
|
+
});
|
|
441
|
+
continue;
|
|
442
|
+
}
|
|
443
|
+
const typeValue = item.type;
|
|
444
|
+
if (typeValue === void 0 || typeof typeValue !== "string") {
|
|
445
|
+
errors.push({
|
|
446
|
+
severity: "warning",
|
|
447
|
+
filePath,
|
|
448
|
+
location: `${location}.type`,
|
|
449
|
+
message: "Assertion item is missing a 'type' field."
|
|
450
|
+
});
|
|
451
|
+
continue;
|
|
452
|
+
}
|
|
453
|
+
if (!isEvaluatorKind(typeValue)) {
|
|
454
|
+
errors.push({
|
|
455
|
+
severity: "warning",
|
|
456
|
+
filePath,
|
|
457
|
+
location: `${location}.type`,
|
|
458
|
+
message: `Unknown assertion type '${typeValue}'.`
|
|
459
|
+
});
|
|
460
|
+
continue;
|
|
461
|
+
}
|
|
462
|
+
if (ASSERTION_TYPES_WITH_VALUE.has(typeValue)) {
|
|
463
|
+
const value = item.value;
|
|
464
|
+
if (value === void 0 || typeof value !== "string") {
|
|
465
|
+
errors.push({
|
|
466
|
+
severity: "warning",
|
|
467
|
+
filePath,
|
|
468
|
+
location: `${location}.value`,
|
|
469
|
+
message: `Assertion type '${typeValue}' requires a 'value' field (string).`
|
|
470
|
+
});
|
|
471
|
+
continue;
|
|
472
|
+
}
|
|
473
|
+
if (typeValue === "regex") {
|
|
474
|
+
try {
|
|
475
|
+
new RegExp(value);
|
|
476
|
+
} catch {
|
|
477
|
+
errors.push({
|
|
478
|
+
severity: "warning",
|
|
479
|
+
filePath,
|
|
480
|
+
location: `${location}.value`,
|
|
481
|
+
message: `Invalid regex pattern '${value}': not a valid regular expression.`
|
|
482
|
+
});
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
const required = item.required;
|
|
487
|
+
if (required !== void 0) {
|
|
488
|
+
validateRequiredField(required, location, filePath, errors);
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
function validateRequiredField(required, parentLocation, filePath, errors) {
|
|
493
|
+
if (typeof required === "boolean") {
|
|
494
|
+
return;
|
|
495
|
+
}
|
|
496
|
+
if (typeof required === "number") {
|
|
497
|
+
if (required <= 0 || required > 1) {
|
|
498
|
+
errors.push({
|
|
499
|
+
severity: "warning",
|
|
500
|
+
filePath,
|
|
501
|
+
location: `${parentLocation}.required`,
|
|
502
|
+
message: `Invalid 'required' value ${required}. When a number, it must be between 0 (exclusive) and 1 (inclusive).`
|
|
503
|
+
});
|
|
504
|
+
}
|
|
505
|
+
return;
|
|
506
|
+
}
|
|
507
|
+
errors.push({
|
|
508
|
+
severity: "warning",
|
|
509
|
+
filePath,
|
|
510
|
+
location: `${parentLocation}.required`,
|
|
511
|
+
message: `Invalid 'required' value. Must be a boolean or a number between 0 (exclusive) and 1 (inclusive).`
|
|
512
|
+
});
|
|
513
|
+
}
|
|
328
514
|
function validateContentForRoleMarkers(content, location, filePath, errors) {
|
|
329
515
|
const markers = ["@[System]:", "@[User]:", "@[Assistant]:", "@[Tool]:"];
|
|
330
516
|
for (const marker of markers) {
|
|
@@ -378,6 +564,9 @@ var CliTargetInputSchema = import_zod.z.object({
|
|
|
378
564
|
attachmentsFormat: import_zod.z.string().optional(),
|
|
379
565
|
// Working directory - optional
|
|
380
566
|
cwd: import_zod.z.string().optional(),
|
|
567
|
+
// Workspace template directory - optional (mutually exclusive with cwd)
|
|
568
|
+
workspace_template: import_zod.z.string().optional(),
|
|
569
|
+
workspaceTemplate: import_zod.z.string().optional(),
|
|
381
570
|
// Timeout in seconds - optional
|
|
382
571
|
timeout_seconds: import_zod.z.number().positive().optional(),
|
|
383
572
|
timeoutSeconds: import_zod.z.number().positive().optional(),
|
|
@@ -419,6 +608,7 @@ var CliTargetConfigSchema = import_zod.z.object({
|
|
|
419
608
|
commandTemplate: import_zod.z.string().min(1),
|
|
420
609
|
filesFormat: import_zod.z.string().optional(),
|
|
421
610
|
cwd: import_zod.z.string().optional(),
|
|
611
|
+
workspaceTemplate: import_zod.z.string().optional(),
|
|
422
612
|
timeoutMs: import_zod.z.number().positive().optional(),
|
|
423
613
|
healthcheck: CliHealthcheckSchema.optional(),
|
|
424
614
|
verbose: import_zod.z.boolean().optional(),
|
|
@@ -436,7 +626,9 @@ var BASE_TARGET_SCHEMA = import_zod.z.object({
|
|
|
436
626
|
name: import_zod.z.string().min(1, "target name is required"),
|
|
437
627
|
provider: import_zod.z.string().min(1, "provider is required"),
|
|
438
628
|
judge_target: import_zod.z.string().optional(),
|
|
439
|
-
workers: import_zod.z.number().int().min(1).optional()
|
|
629
|
+
workers: import_zod.z.number().int().min(1).optional(),
|
|
630
|
+
workspace_template: import_zod.z.string().optional(),
|
|
631
|
+
workspaceTemplate: import_zod.z.string().optional()
|
|
440
632
|
}).passthrough();
|
|
441
633
|
|
|
442
634
|
// src/evaluation/providers/types.ts
|
|
@@ -445,10 +637,11 @@ var KNOWN_PROVIDERS = [
|
|
|
445
637
|
"anthropic",
|
|
446
638
|
"gemini",
|
|
447
639
|
"codex",
|
|
640
|
+
"copilot",
|
|
448
641
|
"copilot-cli",
|
|
449
642
|
"pi-coding-agent",
|
|
450
643
|
"pi-agent-sdk",
|
|
451
|
-
"claude
|
|
644
|
+
"claude",
|
|
452
645
|
"cli",
|
|
453
646
|
"mock",
|
|
454
647
|
"vscode",
|
|
@@ -463,8 +656,16 @@ var PROVIDER_ALIASES = [
|
|
|
463
656
|
// alias for "gemini"
|
|
464
657
|
"codex-cli",
|
|
465
658
|
// alias for "codex"
|
|
659
|
+
"copilot-sdk",
|
|
660
|
+
// alias for "copilot"
|
|
661
|
+
"copilot_sdk",
|
|
662
|
+
// alias for "copilot" (underscore variant)
|
|
466
663
|
"pi",
|
|
467
664
|
// alias for "pi-coding-agent"
|
|
665
|
+
"claude-code",
|
|
666
|
+
// alias for "claude" (legacy)
|
|
667
|
+
"claude-sdk",
|
|
668
|
+
// alias for "claude"
|
|
468
669
|
"openai",
|
|
469
670
|
// legacy/future support
|
|
470
671
|
"bedrock",
|
|
@@ -535,6 +736,7 @@ var GEMINI_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
535
736
|
]);
|
|
536
737
|
var CODEX_SETTINGS = /* @__PURE__ */ new Set([
|
|
537
738
|
...COMMON_SETTINGS,
|
|
739
|
+
"model",
|
|
538
740
|
"executable",
|
|
539
741
|
"command",
|
|
540
742
|
"binary",
|
|
@@ -550,41 +752,45 @@ var CODEX_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
550
752
|
"log_format",
|
|
551
753
|
"logFormat",
|
|
552
754
|
"log_output_format",
|
|
553
|
-
"logOutputFormat"
|
|
755
|
+
"logOutputFormat",
|
|
756
|
+
"system_prompt",
|
|
757
|
+
"systemPrompt",
|
|
758
|
+
"workspace_template",
|
|
759
|
+
"workspaceTemplate"
|
|
554
760
|
]);
|
|
555
|
-
var
|
|
761
|
+
var COPILOT_SDK_SETTINGS = /* @__PURE__ */ new Set([
|
|
556
762
|
...COMMON_SETTINGS,
|
|
557
|
-
"
|
|
558
|
-
"
|
|
559
|
-
"
|
|
560
|
-
"
|
|
561
|
-
"
|
|
763
|
+
"cli_url",
|
|
764
|
+
"cliUrl",
|
|
765
|
+
"cli_path",
|
|
766
|
+
"cliPath",
|
|
767
|
+
"github_token",
|
|
768
|
+
"githubToken",
|
|
562
769
|
"model",
|
|
563
770
|
"cwd",
|
|
564
771
|
"timeout_seconds",
|
|
565
772
|
"timeoutSeconds",
|
|
566
773
|
"log_dir",
|
|
567
774
|
"logDir",
|
|
568
|
-
"log_directory",
|
|
569
|
-
"logDirectory",
|
|
570
775
|
"log_format",
|
|
571
776
|
"logFormat",
|
|
572
|
-
"log_output_format",
|
|
573
|
-
"logOutputFormat",
|
|
574
777
|
"system_prompt",
|
|
575
|
-
"systemPrompt"
|
|
778
|
+
"systemPrompt",
|
|
779
|
+
"workspace_template",
|
|
780
|
+
"workspaceTemplate"
|
|
576
781
|
]);
|
|
577
782
|
var VSCODE_SETTINGS = /* @__PURE__ */ new Set([
|
|
578
783
|
...COMMON_SETTINGS,
|
|
784
|
+
"executable",
|
|
579
785
|
"workspace_template",
|
|
580
786
|
"workspaceTemplate",
|
|
581
|
-
"vscode_cmd",
|
|
582
|
-
"command",
|
|
583
787
|
"wait",
|
|
584
788
|
"dry_run",
|
|
585
789
|
"dryRun",
|
|
586
790
|
"subagent_root",
|
|
587
|
-
"subagentRoot"
|
|
791
|
+
"subagentRoot",
|
|
792
|
+
"timeout_seconds",
|
|
793
|
+
"timeoutSeconds"
|
|
588
794
|
]);
|
|
589
795
|
var MOCK_SETTINGS = /* @__PURE__ */ new Set([
|
|
590
796
|
...COMMON_SETTINGS,
|
|
@@ -595,6 +801,29 @@ var MOCK_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
595
801
|
"trace"
|
|
596
802
|
// For testing tool_trajectory evaluator
|
|
597
803
|
]);
|
|
804
|
+
var CLAUDE_SETTINGS = /* @__PURE__ */ new Set([
|
|
805
|
+
...COMMON_SETTINGS,
|
|
806
|
+
"model",
|
|
807
|
+
"cwd",
|
|
808
|
+
"timeout_seconds",
|
|
809
|
+
"timeoutSeconds",
|
|
810
|
+
"log_dir",
|
|
811
|
+
"logDir",
|
|
812
|
+
"log_directory",
|
|
813
|
+
"logDirectory",
|
|
814
|
+
"log_format",
|
|
815
|
+
"logFormat",
|
|
816
|
+
"log_output_format",
|
|
817
|
+
"logOutputFormat",
|
|
818
|
+
"system_prompt",
|
|
819
|
+
"systemPrompt",
|
|
820
|
+
"workspace_template",
|
|
821
|
+
"workspaceTemplate",
|
|
822
|
+
"max_turns",
|
|
823
|
+
"maxTurns",
|
|
824
|
+
"max_budget_usd",
|
|
825
|
+
"maxBudgetUsd"
|
|
826
|
+
]);
|
|
598
827
|
function getKnownSettings(provider) {
|
|
599
828
|
const normalizedProvider = provider.toLowerCase();
|
|
600
829
|
switch (normalizedProvider) {
|
|
@@ -610,8 +839,15 @@ function getKnownSettings(provider) {
|
|
|
610
839
|
case "codex":
|
|
611
840
|
case "codex-cli":
|
|
612
841
|
return CODEX_SETTINGS;
|
|
842
|
+
case "copilot":
|
|
843
|
+
case "copilot-sdk":
|
|
844
|
+
case "copilot_sdk":
|
|
613
845
|
case "copilot-cli":
|
|
614
|
-
return
|
|
846
|
+
return COPILOT_SDK_SETTINGS;
|
|
847
|
+
case "claude":
|
|
848
|
+
case "claude-code":
|
|
849
|
+
case "claude-sdk":
|
|
850
|
+
return CLAUDE_SETTINGS;
|
|
615
851
|
case "vscode":
|
|
616
852
|
case "vscode-insiders":
|
|
617
853
|
return VSCODE_SETTINGS;
|
|
@@ -909,7 +1145,32 @@ async function validateConfigFile(filePath) {
|
|
|
909
1145
|
});
|
|
910
1146
|
}
|
|
911
1147
|
}
|
|
912
|
-
const
|
|
1148
|
+
const evalPatterns = config.eval_patterns;
|
|
1149
|
+
if (evalPatterns !== void 0) {
|
|
1150
|
+
if (!Array.isArray(evalPatterns)) {
|
|
1151
|
+
errors.push({
|
|
1152
|
+
severity: "error",
|
|
1153
|
+
filePath,
|
|
1154
|
+
location: "eval_patterns",
|
|
1155
|
+
message: "Field 'eval_patterns' must be an array"
|
|
1156
|
+
});
|
|
1157
|
+
} else if (!evalPatterns.every((p) => typeof p === "string")) {
|
|
1158
|
+
errors.push({
|
|
1159
|
+
severity: "error",
|
|
1160
|
+
filePath,
|
|
1161
|
+
location: "eval_patterns",
|
|
1162
|
+
message: "All entries in 'eval_patterns' must be strings"
|
|
1163
|
+
});
|
|
1164
|
+
} else if (evalPatterns.length === 0) {
|
|
1165
|
+
errors.push({
|
|
1166
|
+
severity: "warning",
|
|
1167
|
+
filePath,
|
|
1168
|
+
location: "eval_patterns",
|
|
1169
|
+
message: "Field 'eval_patterns' is empty. Consider removing it or adding patterns."
|
|
1170
|
+
});
|
|
1171
|
+
}
|
|
1172
|
+
}
|
|
1173
|
+
const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns", "eval_patterns"]);
|
|
913
1174
|
const unexpectedFields = Object.keys(config).filter((key) => !allowedFields.has(key));
|
|
914
1175
|
if (unexpectedFields.length > 0) {
|
|
915
1176
|
errors.push({
|
|
@@ -1046,30 +1307,36 @@ async function validateFileReferences(evalFilePath) {
|
|
|
1046
1307
|
if (!isObject3(parsed)) {
|
|
1047
1308
|
return errors;
|
|
1048
1309
|
}
|
|
1049
|
-
|
|
1050
|
-
if (
|
|
1310
|
+
let cases = parsed.tests;
|
|
1311
|
+
if (cases === void 0 && "eval_cases" in parsed) {
|
|
1312
|
+
cases = parsed.eval_cases;
|
|
1313
|
+
}
|
|
1314
|
+
if (cases === void 0 && "evalcases" in parsed) {
|
|
1315
|
+
cases = parsed.evalcases;
|
|
1316
|
+
}
|
|
1317
|
+
if (!Array.isArray(cases)) {
|
|
1051
1318
|
return errors;
|
|
1052
1319
|
}
|
|
1053
|
-
for (let i = 0; i <
|
|
1054
|
-
const evalCase =
|
|
1320
|
+
for (let i = 0; i < cases.length; i++) {
|
|
1321
|
+
const evalCase = cases[i];
|
|
1055
1322
|
if (!isObject3(evalCase)) {
|
|
1056
1323
|
continue;
|
|
1057
1324
|
}
|
|
1058
|
-
const
|
|
1059
|
-
if (Array.isArray(
|
|
1325
|
+
const inputField = evalCase.input;
|
|
1326
|
+
if (Array.isArray(inputField)) {
|
|
1060
1327
|
await validateMessagesFileRefs(
|
|
1061
|
-
|
|
1062
|
-
`
|
|
1328
|
+
inputField,
|
|
1329
|
+
`tests[${i}].input`,
|
|
1063
1330
|
searchRoots,
|
|
1064
1331
|
absolutePath,
|
|
1065
1332
|
errors
|
|
1066
1333
|
);
|
|
1067
1334
|
}
|
|
1068
|
-
const
|
|
1069
|
-
if (Array.isArray(
|
|
1335
|
+
const expectedOutputField = evalCase.expected_output;
|
|
1336
|
+
if (Array.isArray(expectedOutputField)) {
|
|
1070
1337
|
await validateMessagesFileRefs(
|
|
1071
|
-
|
|
1072
|
-
`
|
|
1338
|
+
expectedOutputField,
|
|
1339
|
+
`tests[${i}].expected_output`,
|
|
1073
1340
|
searchRoots,
|
|
1074
1341
|
absolutePath,
|
|
1075
1342
|
errors
|