@agentv/core 2.5.8 → 2.7.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,8 +4,9 @@ import {
4
4
  PROVIDER_ALIASES,
5
5
  buildSearchRoots,
6
6
  findGitRoot,
7
+ isEvaluatorKind,
7
8
  resolveFileReference
8
- } from "../../chunk-LGQ5OPJD.js";
9
+ } from "../../chunk-6W5E3VR6.js";
9
10
 
10
11
  // src/evaluation/validation/file-type.ts
11
12
  import { readFile } from "node:fs/promises";
@@ -73,6 +74,9 @@ function getExpectedSchema(fileType) {
73
74
  import { readFile as readFile2 } from "node:fs/promises";
74
75
  import path2 from "node:path";
75
76
  import { parse as parse2 } from "yaml";
77
+ var ASSERTION_TYPES_WITH_VALUE = /* @__PURE__ */ new Set(["contains", "equals", "regex"]);
78
+ var VALID_TEST_FILE_EXTENSIONS = /* @__PURE__ */ new Set([".yaml", ".yml", ".jsonl"]);
79
+ var NAME_PATTERN = /^[a-z0-9-]+$/;
76
80
  function isObject(value) {
77
81
  return typeof value === "object" && value !== null && !Array.isArray(value);
78
82
  }
@@ -109,13 +113,41 @@ async function validateEvalFile(filePath) {
109
113
  errors
110
114
  };
111
115
  }
112
- const evalcases = parsed.evalcases;
113
- if (!Array.isArray(evalcases)) {
116
+ validateMetadata(parsed, absolutePath, errors);
117
+ let cases = parsed.tests;
118
+ if (cases === void 0 && "eval_cases" in parsed) {
119
+ cases = parsed.eval_cases;
114
120
  errors.push({
115
- severity: "error",
121
+ severity: "warning",
122
+ filePath: absolutePath,
123
+ location: "eval_cases",
124
+ message: "'eval_cases' is deprecated. Use 'tests' instead."
125
+ });
126
+ }
127
+ if (cases === void 0 && "evalcases" in parsed) {
128
+ cases = parsed.evalcases;
129
+ errors.push({
130
+ severity: "warning",
116
131
  filePath: absolutePath,
117
132
  location: "evalcases",
118
- message: "Missing or invalid 'evalcases' field (must be an array)"
133
+ message: "'evalcases' is deprecated. Use 'tests' instead."
134
+ });
135
+ }
136
+ if (typeof cases === "string") {
137
+ validateTestsStringPath(cases, absolutePath, errors);
138
+ return {
139
+ valid: errors.filter((e) => e.severity === "error").length === 0,
140
+ filePath: absolutePath,
141
+ fileType: "eval",
142
+ errors
143
+ };
144
+ }
145
+ if (!Array.isArray(cases)) {
146
+ errors.push({
147
+ severity: "error",
148
+ filePath: absolutePath,
149
+ location: "tests",
150
+ message: "Missing or invalid 'tests' field (must be an array or a file path string)"
119
151
  });
120
152
  return {
121
153
  valid: errors.length === 0,
@@ -124,9 +156,9 @@ async function validateEvalFile(filePath) {
124
156
  errors
125
157
  };
126
158
  }
127
- for (let i = 0; i < evalcases.length; i++) {
128
- const evalCase = evalcases[i];
129
- const location = `evalcases[${i}]`;
159
+ for (let i = 0; i < cases.length; i++) {
160
+ const evalCase = cases[i];
161
+ const location = `tests[${i}]`;
130
162
  if (!isObject(evalCase)) {
131
163
  errors.push({
132
164
  severity: "error",
@@ -145,23 +177,29 @@ async function validateEvalFile(filePath) {
145
177
  message: "Missing or invalid 'id' field (must be a non-empty string)"
146
178
  });
147
179
  }
148
- const expectedOutcome = evalCase.expected_outcome ?? evalCase.outcome;
149
- if (expectedOutcome !== void 0 && (typeof expectedOutcome !== "string" || expectedOutcome.trim().length === 0)) {
180
+ let criteria = evalCase.criteria;
181
+ if (criteria === void 0 && "expected_outcome" in evalCase) {
182
+ criteria = evalCase.expected_outcome;
150
183
  errors.push({
151
- severity: "error",
184
+ severity: "warning",
152
185
  filePath: absolutePath,
153
186
  location: `${location}.expected_outcome`,
154
- message: "Invalid 'expected_outcome' or 'outcome' field (must be a non-empty string if provided)"
187
+ message: "'expected_outcome' is deprecated. Use 'criteria' instead."
155
188
  });
156
189
  }
157
- const inputMessages = evalCase.input_messages;
158
- const inputAlias = evalCase.input;
159
- if (Array.isArray(inputMessages)) {
160
- validateMessages(inputMessages, `${location}.input_messages`, absolutePath, errors);
161
- } else if (inputAlias !== void 0) {
162
- if (typeof inputAlias === "string") {
163
- } else if (Array.isArray(inputAlias)) {
164
- validateMessages(inputAlias, `${location}.input`, absolutePath, errors);
190
+ if (criteria !== void 0 && (typeof criteria !== "string" || criteria.trim().length === 0)) {
191
+ errors.push({
192
+ severity: "error",
193
+ filePath: absolutePath,
194
+ location: `${location}.criteria`,
195
+ message: "Invalid 'criteria' field (must be a non-empty string if provided)"
196
+ });
197
+ }
198
+ const inputField = evalCase.input;
199
+ if (inputField !== void 0) {
200
+ if (typeof inputField === "string") {
201
+ } else if (Array.isArray(inputField)) {
202
+ validateMessages(inputField, `${location}.input`, absolutePath, errors);
165
203
  } else {
166
204
  errors.push({
167
205
  severity: "error",
@@ -174,33 +212,23 @@ async function validateEvalFile(filePath) {
174
212
  errors.push({
175
213
  severity: "error",
176
214
  filePath: absolutePath,
177
- location: `${location}.input_messages`,
178
- message: "Missing 'input_messages' or 'input' field (must provide one)"
215
+ location: `${location}.input`,
216
+ message: "Missing 'input' field (must be a string or array of messages)"
179
217
  });
180
218
  }
181
- const expectedMessages = evalCase.expected_messages;
182
- const expectedOutputAlias = evalCase.expected_output;
183
- if (expectedMessages !== void 0 && !Array.isArray(expectedMessages)) {
184
- errors.push({
185
- severity: "error",
186
- filePath: absolutePath,
187
- location: `${location}.expected_messages`,
188
- message: "Invalid 'expected_messages' field (must be an array if provided)"
189
- });
190
- } else if (Array.isArray(expectedMessages)) {
191
- validateMessages(expectedMessages, `${location}.expected_messages`, absolutePath, errors);
192
- } else if (expectedOutputAlias !== void 0) {
193
- if (typeof expectedOutputAlias === "string") {
194
- } else if (Array.isArray(expectedOutputAlias)) {
195
- if (expectedOutputAlias.length > 0 && isObject(expectedOutputAlias[0]) && "role" in expectedOutputAlias[0]) {
219
+ const expectedOutputField = evalCase.expected_output;
220
+ if (expectedOutputField !== void 0) {
221
+ if (typeof expectedOutputField === "string") {
222
+ } else if (Array.isArray(expectedOutputField)) {
223
+ if (expectedOutputField.length > 0 && isObject(expectedOutputField[0]) && "role" in expectedOutputField[0]) {
196
224
  validateMessages(
197
- expectedOutputAlias,
225
+ expectedOutputField,
198
226
  `${location}.expected_output`,
199
227
  absolutePath,
200
228
  errors
201
229
  );
202
230
  }
203
- } else if (isObject(expectedOutputAlias)) {
231
+ } else if (isObject(expectedOutputField)) {
204
232
  } else {
205
233
  errors.push({
206
234
  severity: "error",
@@ -210,9 +238,13 @@ async function validateEvalFile(filePath) {
210
238
  });
211
239
  }
212
240
  }
241
+ const assertField = evalCase.assert;
242
+ if (assertField !== void 0) {
243
+ validateAssertArray(assertField, location, absolutePath, errors);
244
+ }
213
245
  }
214
246
  return {
215
- valid: errors.length === 0,
247
+ valid: errors.filter((e) => e.severity === "error").length === 0,
216
248
  filePath: absolutePath,
217
249
  fileType: "eval",
218
250
  errors
@@ -292,6 +324,133 @@ function validateMessages(messages, location, filePath, errors) {
292
324
  }
293
325
  }
294
326
  }
327
+ function validateMetadata(parsed, filePath, errors) {
328
+ const name = parsed.name;
329
+ if (name !== void 0) {
330
+ if (typeof name === "string") {
331
+ if (!NAME_PATTERN.test(name)) {
332
+ errors.push({
333
+ severity: "warning",
334
+ filePath,
335
+ location: "name",
336
+ message: `Invalid 'name' format '${name}'. Must match pattern /^[a-z0-9-]+$/ (lowercase alphanumeric with hyphens).`
337
+ });
338
+ }
339
+ }
340
+ if (!("description" in parsed) || parsed.description === void 0) {
341
+ errors.push({
342
+ severity: "warning",
343
+ filePath,
344
+ location: "name",
345
+ message: "When 'name' is present, 'description' should also be provided."
346
+ });
347
+ }
348
+ }
349
+ }
350
+ function validateTestsStringPath(testsPath, filePath, errors) {
351
+ const ext = path2.extname(testsPath);
352
+ if (!VALID_TEST_FILE_EXTENSIONS.has(ext)) {
353
+ errors.push({
354
+ severity: "warning",
355
+ filePath,
356
+ location: "tests",
357
+ message: `Unsupported file extension '${ext}' for tests path '${testsPath}'. Supported extensions: ${[...VALID_TEST_FILE_EXTENSIONS].join(", ")}`
358
+ });
359
+ }
360
+ }
361
+ function validateAssertArray(assertField, parentLocation, filePath, errors) {
362
+ if (!Array.isArray(assertField)) {
363
+ errors.push({
364
+ severity: "warning",
365
+ filePath,
366
+ location: `${parentLocation}.assert`,
367
+ message: "'assert' must be an array of assertion objects."
368
+ });
369
+ return;
370
+ }
371
+ for (let i = 0; i < assertField.length; i++) {
372
+ const item = assertField[i];
373
+ const location = `${parentLocation}.assert[${i}]`;
374
+ if (!isObject(item)) {
375
+ errors.push({
376
+ severity: "warning",
377
+ filePath,
378
+ location,
379
+ message: "Assertion item must be an object with a type field."
380
+ });
381
+ continue;
382
+ }
383
+ const typeValue = item.type;
384
+ if (typeValue === void 0 || typeof typeValue !== "string") {
385
+ errors.push({
386
+ severity: "warning",
387
+ filePath,
388
+ location: `${location}.type`,
389
+ message: "Assertion item is missing a 'type' field."
390
+ });
391
+ continue;
392
+ }
393
+ if (!isEvaluatorKind(typeValue)) {
394
+ errors.push({
395
+ severity: "warning",
396
+ filePath,
397
+ location: `${location}.type`,
398
+ message: `Unknown assertion type '${typeValue}'.`
399
+ });
400
+ continue;
401
+ }
402
+ if (ASSERTION_TYPES_WITH_VALUE.has(typeValue)) {
403
+ const value = item.value;
404
+ if (value === void 0 || typeof value !== "string") {
405
+ errors.push({
406
+ severity: "warning",
407
+ filePath,
408
+ location: `${location}.value`,
409
+ message: `Assertion type '${typeValue}' requires a 'value' field (string).`
410
+ });
411
+ continue;
412
+ }
413
+ if (typeValue === "regex") {
414
+ try {
415
+ new RegExp(value);
416
+ } catch {
417
+ errors.push({
418
+ severity: "warning",
419
+ filePath,
420
+ location: `${location}.value`,
421
+ message: `Invalid regex pattern '${value}': not a valid regular expression.`
422
+ });
423
+ }
424
+ }
425
+ }
426
+ const required = item.required;
427
+ if (required !== void 0) {
428
+ validateRequiredField(required, location, filePath, errors);
429
+ }
430
+ }
431
+ }
432
+ function validateRequiredField(required, parentLocation, filePath, errors) {
433
+ if (typeof required === "boolean") {
434
+ return;
435
+ }
436
+ if (typeof required === "number") {
437
+ if (required <= 0 || required > 1) {
438
+ errors.push({
439
+ severity: "warning",
440
+ filePath,
441
+ location: `${parentLocation}.required`,
442
+ message: `Invalid 'required' value ${required}. When a number, it must be between 0 (exclusive) and 1 (inclusive).`
443
+ });
444
+ }
445
+ return;
446
+ }
447
+ errors.push({
448
+ severity: "warning",
449
+ filePath,
450
+ location: `${parentLocation}.required`,
451
+ message: `Invalid 'required' value. Must be a boolean or a number between 0 (exclusive) and 1 (inclusive).`
452
+ });
453
+ }
295
454
  function validateContentForRoleMarkers(content, location, filePath, errors) {
296
455
  const markers = ["@[System]:", "@[User]:", "@[Assistant]:", "@[Tool]:"];
297
456
  for (const marker of markers) {
@@ -371,6 +530,7 @@ var GEMINI_SETTINGS = /* @__PURE__ */ new Set([
371
530
  ]);
372
531
  var CODEX_SETTINGS = /* @__PURE__ */ new Set([
373
532
  ...COMMON_SETTINGS,
533
+ "model",
374
534
  "executable",
375
535
  "command",
376
536
  "binary",
@@ -386,41 +546,45 @@ var CODEX_SETTINGS = /* @__PURE__ */ new Set([
386
546
  "log_format",
387
547
  "logFormat",
388
548
  "log_output_format",
389
- "logOutputFormat"
549
+ "logOutputFormat",
550
+ "system_prompt",
551
+ "systemPrompt",
552
+ "workspace_template",
553
+ "workspaceTemplate"
390
554
  ]);
391
- var COPILOT_SETTINGS = /* @__PURE__ */ new Set([
555
+ var COPILOT_SDK_SETTINGS = /* @__PURE__ */ new Set([
392
556
  ...COMMON_SETTINGS,
393
- "executable",
394
- "command",
395
- "binary",
396
- "args",
397
- "arguments",
557
+ "cli_url",
558
+ "cliUrl",
559
+ "cli_path",
560
+ "cliPath",
561
+ "github_token",
562
+ "githubToken",
398
563
  "model",
399
564
  "cwd",
400
565
  "timeout_seconds",
401
566
  "timeoutSeconds",
402
567
  "log_dir",
403
568
  "logDir",
404
- "log_directory",
405
- "logDirectory",
406
569
  "log_format",
407
570
  "logFormat",
408
- "log_output_format",
409
- "logOutputFormat",
410
571
  "system_prompt",
411
- "systemPrompt"
572
+ "systemPrompt",
573
+ "workspace_template",
574
+ "workspaceTemplate"
412
575
  ]);
413
576
  var VSCODE_SETTINGS = /* @__PURE__ */ new Set([
414
577
  ...COMMON_SETTINGS,
578
+ "executable",
415
579
  "workspace_template",
416
580
  "workspaceTemplate",
417
- "vscode_cmd",
418
- "command",
419
581
  "wait",
420
582
  "dry_run",
421
583
  "dryRun",
422
584
  "subagent_root",
423
- "subagentRoot"
585
+ "subagentRoot",
586
+ "timeout_seconds",
587
+ "timeoutSeconds"
424
588
  ]);
425
589
  var MOCK_SETTINGS = /* @__PURE__ */ new Set([
426
590
  ...COMMON_SETTINGS,
@@ -431,6 +595,29 @@ var MOCK_SETTINGS = /* @__PURE__ */ new Set([
431
595
  "trace"
432
596
  // For testing tool_trajectory evaluator
433
597
  ]);
598
+ var CLAUDE_SETTINGS = /* @__PURE__ */ new Set([
599
+ ...COMMON_SETTINGS,
600
+ "model",
601
+ "cwd",
602
+ "timeout_seconds",
603
+ "timeoutSeconds",
604
+ "log_dir",
605
+ "logDir",
606
+ "log_directory",
607
+ "logDirectory",
608
+ "log_format",
609
+ "logFormat",
610
+ "log_output_format",
611
+ "logOutputFormat",
612
+ "system_prompt",
613
+ "systemPrompt",
614
+ "workspace_template",
615
+ "workspaceTemplate",
616
+ "max_turns",
617
+ "maxTurns",
618
+ "max_budget_usd",
619
+ "maxBudgetUsd"
620
+ ]);
434
621
  function getKnownSettings(provider) {
435
622
  const normalizedProvider = provider.toLowerCase();
436
623
  switch (normalizedProvider) {
@@ -446,8 +633,15 @@ function getKnownSettings(provider) {
446
633
  case "codex":
447
634
  case "codex-cli":
448
635
  return CODEX_SETTINGS;
636
+ case "copilot":
637
+ case "copilot-sdk":
638
+ case "copilot_sdk":
449
639
  case "copilot-cli":
450
- return COPILOT_SETTINGS;
640
+ return COPILOT_SDK_SETTINGS;
641
+ case "claude":
642
+ case "claude-code":
643
+ case "claude-sdk":
644
+ return CLAUDE_SETTINGS;
451
645
  case "vscode":
452
646
  case "vscode-insiders":
453
647
  return VSCODE_SETTINGS;
@@ -745,7 +939,32 @@ async function validateConfigFile(filePath) {
745
939
  });
746
940
  }
747
941
  }
748
- const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns"]);
942
+ const evalPatterns = config.eval_patterns;
943
+ if (evalPatterns !== void 0) {
944
+ if (!Array.isArray(evalPatterns)) {
945
+ errors.push({
946
+ severity: "error",
947
+ filePath,
948
+ location: "eval_patterns",
949
+ message: "Field 'eval_patterns' must be an array"
950
+ });
951
+ } else if (!evalPatterns.every((p) => typeof p === "string")) {
952
+ errors.push({
953
+ severity: "error",
954
+ filePath,
955
+ location: "eval_patterns",
956
+ message: "All entries in 'eval_patterns' must be strings"
957
+ });
958
+ } else if (evalPatterns.length === 0) {
959
+ errors.push({
960
+ severity: "warning",
961
+ filePath,
962
+ location: "eval_patterns",
963
+ message: "Field 'eval_patterns' is empty. Consider removing it or adding patterns."
964
+ });
965
+ }
966
+ }
967
+ const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns", "eval_patterns"]);
749
968
  const unexpectedFields = Object.keys(config).filter((key) => !allowedFields.has(key));
750
969
  if (unexpectedFields.length > 0) {
751
970
  errors.push({
@@ -800,30 +1019,36 @@ async function validateFileReferences(evalFilePath) {
800
1019
  if (!isObject3(parsed)) {
801
1020
  return errors;
802
1021
  }
803
- const evalcases = parsed.evalcases;
804
- if (!Array.isArray(evalcases)) {
1022
+ let cases = parsed.tests;
1023
+ if (cases === void 0 && "eval_cases" in parsed) {
1024
+ cases = parsed.eval_cases;
1025
+ }
1026
+ if (cases === void 0 && "evalcases" in parsed) {
1027
+ cases = parsed.evalcases;
1028
+ }
1029
+ if (!Array.isArray(cases)) {
805
1030
  return errors;
806
1031
  }
807
- for (let i = 0; i < evalcases.length; i++) {
808
- const evalCase = evalcases[i];
1032
+ for (let i = 0; i < cases.length; i++) {
1033
+ const evalCase = cases[i];
809
1034
  if (!isObject3(evalCase)) {
810
1035
  continue;
811
1036
  }
812
- const inputMessages = evalCase.input_messages;
813
- if (Array.isArray(inputMessages)) {
1037
+ const inputField = evalCase.input;
1038
+ if (Array.isArray(inputField)) {
814
1039
  await validateMessagesFileRefs(
815
- inputMessages,
816
- `evalcases[${i}].input_messages`,
1040
+ inputField,
1041
+ `tests[${i}].input`,
817
1042
  searchRoots,
818
1043
  absolutePath,
819
1044
  errors
820
1045
  );
821
1046
  }
822
- const expectedMessages = evalCase.expected_messages;
823
- if (Array.isArray(expectedMessages)) {
1047
+ const expectedOutputField = evalCase.expected_output;
1048
+ if (Array.isArray(expectedOutputField)) {
824
1049
  await validateMessagesFileRefs(
825
- expectedMessages,
826
- `evalcases[${i}].expected_messages`,
1050
+ expectedOutputField,
1051
+ `tests[${i}].expected_output`,
827
1052
  searchRoots,
828
1053
  absolutePath,
829
1054
  errors