@agentv/core 2.6.0 → 2.7.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -106,6 +106,37 @@ function getExpectedSchema(fileType) {
106
106
  var import_promises2 = require("fs/promises");
107
107
  var import_node_path2 = __toESM(require("path"), 1);
108
108
  var import_yaml2 = require("yaml");
109
+
110
+ // src/evaluation/types.ts
111
+ var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
112
+ var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
113
+ var EVALUATOR_KIND_VALUES = [
114
+ "code_judge",
115
+ "llm_judge",
116
+ "rubric",
117
+ "composite",
118
+ "tool_trajectory",
119
+ "field_accuracy",
120
+ "latency",
121
+ "cost",
122
+ "token_usage",
123
+ "execution_metrics",
124
+ "agent_judge",
125
+ "contains",
126
+ "regex",
127
+ "is_json",
128
+ "equals",
129
+ "rubrics"
130
+ ];
131
+ var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
132
+ function isEvaluatorKind(value) {
133
+ return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
134
+ }
135
+
136
+ // src/evaluation/validation/eval-validator.ts
137
+ var ASSERTION_TYPES_WITH_VALUE = /* @__PURE__ */ new Set(["contains", "equals", "regex"]);
138
+ var VALID_TEST_FILE_EXTENSIONS = /* @__PURE__ */ new Set([".yaml", ".yml", ".jsonl"]);
139
+ var NAME_PATTERN = /^[a-z0-9-]+$/;
109
140
  function isObject(value) {
110
141
  return typeof value === "object" && value !== null && !Array.isArray(value);
111
142
  }
@@ -142,13 +173,41 @@ async function validateEvalFile(filePath) {
142
173
  errors
143
174
  };
144
175
  }
145
- const evalcases = parsed.evalcases;
146
- if (!Array.isArray(evalcases)) {
176
+ validateMetadata(parsed, absolutePath, errors);
177
+ let cases = parsed.tests;
178
+ if (cases === void 0 && "eval_cases" in parsed) {
179
+ cases = parsed.eval_cases;
147
180
  errors.push({
148
- severity: "error",
181
+ severity: "warning",
182
+ filePath: absolutePath,
183
+ location: "eval_cases",
184
+ message: "'eval_cases' is deprecated. Use 'tests' instead."
185
+ });
186
+ }
187
+ if (cases === void 0 && "evalcases" in parsed) {
188
+ cases = parsed.evalcases;
189
+ errors.push({
190
+ severity: "warning",
149
191
  filePath: absolutePath,
150
192
  location: "evalcases",
151
- message: "Missing or invalid 'evalcases' field (must be an array)"
193
+ message: "'evalcases' is deprecated. Use 'tests' instead."
194
+ });
195
+ }
196
+ if (typeof cases === "string") {
197
+ validateTestsStringPath(cases, absolutePath, errors);
198
+ return {
199
+ valid: errors.filter((e) => e.severity === "error").length === 0,
200
+ filePath: absolutePath,
201
+ fileType: "eval",
202
+ errors
203
+ };
204
+ }
205
+ if (!Array.isArray(cases)) {
206
+ errors.push({
207
+ severity: "error",
208
+ filePath: absolutePath,
209
+ location: "tests",
210
+ message: "Missing or invalid 'tests' field (must be an array or a file path string)"
152
211
  });
153
212
  return {
154
213
  valid: errors.length === 0,
@@ -157,9 +216,9 @@ async function validateEvalFile(filePath) {
157
216
  errors
158
217
  };
159
218
  }
160
- for (let i = 0; i < evalcases.length; i++) {
161
- const evalCase = evalcases[i];
162
- const location = `evalcases[${i}]`;
219
+ for (let i = 0; i < cases.length; i++) {
220
+ const evalCase = cases[i];
221
+ const location = `tests[${i}]`;
163
222
  if (!isObject(evalCase)) {
164
223
  errors.push({
165
224
  severity: "error",
@@ -178,23 +237,29 @@ async function validateEvalFile(filePath) {
178
237
  message: "Missing or invalid 'id' field (must be a non-empty string)"
179
238
  });
180
239
  }
181
- const expectedOutcome = evalCase.expected_outcome ?? evalCase.outcome;
182
- if (expectedOutcome !== void 0 && (typeof expectedOutcome !== "string" || expectedOutcome.trim().length === 0)) {
240
+ let criteria = evalCase.criteria;
241
+ if (criteria === void 0 && "expected_outcome" in evalCase) {
242
+ criteria = evalCase.expected_outcome;
183
243
  errors.push({
184
- severity: "error",
244
+ severity: "warning",
185
245
  filePath: absolutePath,
186
246
  location: `${location}.expected_outcome`,
187
- message: "Invalid 'expected_outcome' or 'outcome' field (must be a non-empty string if provided)"
247
+ message: "'expected_outcome' is deprecated. Use 'criteria' instead."
248
+ });
249
+ }
250
+ if (criteria !== void 0 && (typeof criteria !== "string" || criteria.trim().length === 0)) {
251
+ errors.push({
252
+ severity: "error",
253
+ filePath: absolutePath,
254
+ location: `${location}.criteria`,
255
+ message: "Invalid 'criteria' field (must be a non-empty string if provided)"
188
256
  });
189
257
  }
190
- const inputMessages = evalCase.input_messages;
191
- const inputAlias = evalCase.input;
192
- if (Array.isArray(inputMessages)) {
193
- validateMessages(inputMessages, `${location}.input_messages`, absolutePath, errors);
194
- } else if (inputAlias !== void 0) {
195
- if (typeof inputAlias === "string") {
196
- } else if (Array.isArray(inputAlias)) {
197
- validateMessages(inputAlias, `${location}.input`, absolutePath, errors);
258
+ const inputField = evalCase.input;
259
+ if (inputField !== void 0) {
260
+ if (typeof inputField === "string") {
261
+ } else if (Array.isArray(inputField)) {
262
+ validateMessages(inputField, `${location}.input`, absolutePath, errors);
198
263
  } else {
199
264
  errors.push({
200
265
  severity: "error",
@@ -207,33 +272,23 @@ async function validateEvalFile(filePath) {
207
272
  errors.push({
208
273
  severity: "error",
209
274
  filePath: absolutePath,
210
- location: `${location}.input_messages`,
211
- message: "Missing 'input_messages' or 'input' field (must provide one)"
275
+ location: `${location}.input`,
276
+ message: "Missing 'input' field (must be a string or array of messages)"
212
277
  });
213
278
  }
214
- const expectedMessages = evalCase.expected_messages;
215
- const expectedOutputAlias = evalCase.expected_output;
216
- if (expectedMessages !== void 0 && !Array.isArray(expectedMessages)) {
217
- errors.push({
218
- severity: "error",
219
- filePath: absolutePath,
220
- location: `${location}.expected_messages`,
221
- message: "Invalid 'expected_messages' field (must be an array if provided)"
222
- });
223
- } else if (Array.isArray(expectedMessages)) {
224
- validateMessages(expectedMessages, `${location}.expected_messages`, absolutePath, errors);
225
- } else if (expectedOutputAlias !== void 0) {
226
- if (typeof expectedOutputAlias === "string") {
227
- } else if (Array.isArray(expectedOutputAlias)) {
228
- if (expectedOutputAlias.length > 0 && isObject(expectedOutputAlias[0]) && "role" in expectedOutputAlias[0]) {
279
+ const expectedOutputField = evalCase.expected_output;
280
+ if (expectedOutputField !== void 0) {
281
+ if (typeof expectedOutputField === "string") {
282
+ } else if (Array.isArray(expectedOutputField)) {
283
+ if (expectedOutputField.length > 0 && isObject(expectedOutputField[0]) && "role" in expectedOutputField[0]) {
229
284
  validateMessages(
230
- expectedOutputAlias,
285
+ expectedOutputField,
231
286
  `${location}.expected_output`,
232
287
  absolutePath,
233
288
  errors
234
289
  );
235
290
  }
236
- } else if (isObject(expectedOutputAlias)) {
291
+ } else if (isObject(expectedOutputField)) {
237
292
  } else {
238
293
  errors.push({
239
294
  severity: "error",
@@ -243,9 +298,13 @@ async function validateEvalFile(filePath) {
243
298
  });
244
299
  }
245
300
  }
301
+ const assertField = evalCase.assert;
302
+ if (assertField !== void 0) {
303
+ validateAssertArray(assertField, location, absolutePath, errors);
304
+ }
246
305
  }
247
306
  return {
248
- valid: errors.length === 0,
307
+ valid: errors.filter((e) => e.severity === "error").length === 0,
249
308
  filePath: absolutePath,
250
309
  fileType: "eval",
251
310
  errors
@@ -325,6 +384,133 @@ function validateMessages(messages, location, filePath, errors) {
325
384
  }
326
385
  }
327
386
  }
387
+ function validateMetadata(parsed, filePath, errors) {
388
+ const name = parsed.name;
389
+ if (name !== void 0) {
390
+ if (typeof name === "string") {
391
+ if (!NAME_PATTERN.test(name)) {
392
+ errors.push({
393
+ severity: "warning",
394
+ filePath,
395
+ location: "name",
396
+ message: `Invalid 'name' format '${name}'. Must match pattern /^[a-z0-9-]+$/ (lowercase alphanumeric with hyphens).`
397
+ });
398
+ }
399
+ }
400
+ if (!("description" in parsed) || parsed.description === void 0) {
401
+ errors.push({
402
+ severity: "warning",
403
+ filePath,
404
+ location: "name",
405
+ message: "When 'name' is present, 'description' should also be provided."
406
+ });
407
+ }
408
+ }
409
+ }
410
+ function validateTestsStringPath(testsPath, filePath, errors) {
411
+ const ext = import_node_path2.default.extname(testsPath);
412
+ if (!VALID_TEST_FILE_EXTENSIONS.has(ext)) {
413
+ errors.push({
414
+ severity: "warning",
415
+ filePath,
416
+ location: "tests",
417
+ message: `Unsupported file extension '${ext}' for tests path '${testsPath}'. Supported extensions: ${[...VALID_TEST_FILE_EXTENSIONS].join(", ")}`
418
+ });
419
+ }
420
+ }
421
+ function validateAssertArray(assertField, parentLocation, filePath, errors) {
422
+ if (!Array.isArray(assertField)) {
423
+ errors.push({
424
+ severity: "warning",
425
+ filePath,
426
+ location: `${parentLocation}.assert`,
427
+ message: "'assert' must be an array of assertion objects."
428
+ });
429
+ return;
430
+ }
431
+ for (let i = 0; i < assertField.length; i++) {
432
+ const item = assertField[i];
433
+ const location = `${parentLocation}.assert[${i}]`;
434
+ if (!isObject(item)) {
435
+ errors.push({
436
+ severity: "warning",
437
+ filePath,
438
+ location,
439
+ message: "Assertion item must be an object with a type field."
440
+ });
441
+ continue;
442
+ }
443
+ const typeValue = item.type;
444
+ if (typeValue === void 0 || typeof typeValue !== "string") {
445
+ errors.push({
446
+ severity: "warning",
447
+ filePath,
448
+ location: `${location}.type`,
449
+ message: "Assertion item is missing a 'type' field."
450
+ });
451
+ continue;
452
+ }
453
+ if (!isEvaluatorKind(typeValue)) {
454
+ errors.push({
455
+ severity: "warning",
456
+ filePath,
457
+ location: `${location}.type`,
458
+ message: `Unknown assertion type '${typeValue}'.`
459
+ });
460
+ continue;
461
+ }
462
+ if (ASSERTION_TYPES_WITH_VALUE.has(typeValue)) {
463
+ const value = item.value;
464
+ if (value === void 0 || typeof value !== "string") {
465
+ errors.push({
466
+ severity: "warning",
467
+ filePath,
468
+ location: `${location}.value`,
469
+ message: `Assertion type '${typeValue}' requires a 'value' field (string).`
470
+ });
471
+ continue;
472
+ }
473
+ if (typeValue === "regex") {
474
+ try {
475
+ new RegExp(value);
476
+ } catch {
477
+ errors.push({
478
+ severity: "warning",
479
+ filePath,
480
+ location: `${location}.value`,
481
+ message: `Invalid regex pattern '${value}': not a valid regular expression.`
482
+ });
483
+ }
484
+ }
485
+ }
486
+ const required = item.required;
487
+ if (required !== void 0) {
488
+ validateRequiredField(required, location, filePath, errors);
489
+ }
490
+ }
491
+ }
492
+ function validateRequiredField(required, parentLocation, filePath, errors) {
493
+ if (typeof required === "boolean") {
494
+ return;
495
+ }
496
+ if (typeof required === "number") {
497
+ if (required <= 0 || required > 1) {
498
+ errors.push({
499
+ severity: "warning",
500
+ filePath,
501
+ location: `${parentLocation}.required`,
502
+ message: `Invalid 'required' value ${required}. When a number, it must be between 0 (exclusive) and 1 (inclusive).`
503
+ });
504
+ }
505
+ return;
506
+ }
507
+ errors.push({
508
+ severity: "warning",
509
+ filePath,
510
+ location: `${parentLocation}.required`,
511
+ message: `Invalid 'required' value. Must be a boolean or a number between 0 (exclusive) and 1 (inclusive).`
512
+ });
513
+ }
328
514
  function validateContentForRoleMarkers(content, location, filePath, errors) {
329
515
  const markers = ["@[System]:", "@[User]:", "@[Assistant]:", "@[Tool]:"];
330
516
  for (const marker of markers) {
@@ -378,6 +564,9 @@ var CliTargetInputSchema = import_zod.z.object({
378
564
  attachmentsFormat: import_zod.z.string().optional(),
379
565
  // Working directory - optional
380
566
  cwd: import_zod.z.string().optional(),
567
+ // Workspace template directory - optional (mutually exclusive with cwd)
568
+ workspace_template: import_zod.z.string().optional(),
569
+ workspaceTemplate: import_zod.z.string().optional(),
381
570
  // Timeout in seconds - optional
382
571
  timeout_seconds: import_zod.z.number().positive().optional(),
383
572
  timeoutSeconds: import_zod.z.number().positive().optional(),
@@ -419,6 +608,7 @@ var CliTargetConfigSchema = import_zod.z.object({
419
608
  commandTemplate: import_zod.z.string().min(1),
420
609
  filesFormat: import_zod.z.string().optional(),
421
610
  cwd: import_zod.z.string().optional(),
611
+ workspaceTemplate: import_zod.z.string().optional(),
422
612
  timeoutMs: import_zod.z.number().positive().optional(),
423
613
  healthcheck: CliHealthcheckSchema.optional(),
424
614
  verbose: import_zod.z.boolean().optional(),
@@ -436,7 +626,9 @@ var BASE_TARGET_SCHEMA = import_zod.z.object({
436
626
  name: import_zod.z.string().min(1, "target name is required"),
437
627
  provider: import_zod.z.string().min(1, "provider is required"),
438
628
  judge_target: import_zod.z.string().optional(),
439
- workers: import_zod.z.number().int().min(1).optional()
629
+ workers: import_zod.z.number().int().min(1).optional(),
630
+ workspace_template: import_zod.z.string().optional(),
631
+ workspaceTemplate: import_zod.z.string().optional()
440
632
  }).passthrough();
441
633
 
442
634
  // src/evaluation/providers/types.ts
@@ -445,10 +637,11 @@ var KNOWN_PROVIDERS = [
445
637
  "anthropic",
446
638
  "gemini",
447
639
  "codex",
640
+ "copilot",
448
641
  "copilot-cli",
449
642
  "pi-coding-agent",
450
643
  "pi-agent-sdk",
451
- "claude-code",
644
+ "claude",
452
645
  "cli",
453
646
  "mock",
454
647
  "vscode",
@@ -463,8 +656,16 @@ var PROVIDER_ALIASES = [
463
656
  // alias for "gemini"
464
657
  "codex-cli",
465
658
  // alias for "codex"
659
+ "copilot-sdk",
660
+ // alias for "copilot"
661
+ "copilot_sdk",
662
+ // alias for "copilot" (underscore variant)
466
663
  "pi",
467
664
  // alias for "pi-coding-agent"
665
+ "claude-code",
666
+ // alias for "claude" (legacy)
667
+ "claude-sdk",
668
+ // alias for "claude"
468
669
  "openai",
469
670
  // legacy/future support
470
671
  "bedrock",
@@ -535,6 +736,7 @@ var GEMINI_SETTINGS = /* @__PURE__ */ new Set([
535
736
  ]);
536
737
  var CODEX_SETTINGS = /* @__PURE__ */ new Set([
537
738
  ...COMMON_SETTINGS,
739
+ "model",
538
740
  "executable",
539
741
  "command",
540
742
  "binary",
@@ -550,41 +752,45 @@ var CODEX_SETTINGS = /* @__PURE__ */ new Set([
550
752
  "log_format",
551
753
  "logFormat",
552
754
  "log_output_format",
553
- "logOutputFormat"
755
+ "logOutputFormat",
756
+ "system_prompt",
757
+ "systemPrompt",
758
+ "workspace_template",
759
+ "workspaceTemplate"
554
760
  ]);
555
- var COPILOT_SETTINGS = /* @__PURE__ */ new Set([
761
+ var COPILOT_SDK_SETTINGS = /* @__PURE__ */ new Set([
556
762
  ...COMMON_SETTINGS,
557
- "executable",
558
- "command",
559
- "binary",
560
- "args",
561
- "arguments",
763
+ "cli_url",
764
+ "cliUrl",
765
+ "cli_path",
766
+ "cliPath",
767
+ "github_token",
768
+ "githubToken",
562
769
  "model",
563
770
  "cwd",
564
771
  "timeout_seconds",
565
772
  "timeoutSeconds",
566
773
  "log_dir",
567
774
  "logDir",
568
- "log_directory",
569
- "logDirectory",
570
775
  "log_format",
571
776
  "logFormat",
572
- "log_output_format",
573
- "logOutputFormat",
574
777
  "system_prompt",
575
- "systemPrompt"
778
+ "systemPrompt",
779
+ "workspace_template",
780
+ "workspaceTemplate"
576
781
  ]);
577
782
  var VSCODE_SETTINGS = /* @__PURE__ */ new Set([
578
783
  ...COMMON_SETTINGS,
784
+ "executable",
579
785
  "workspace_template",
580
786
  "workspaceTemplate",
581
- "vscode_cmd",
582
- "command",
583
787
  "wait",
584
788
  "dry_run",
585
789
  "dryRun",
586
790
  "subagent_root",
587
- "subagentRoot"
791
+ "subagentRoot",
792
+ "timeout_seconds",
793
+ "timeoutSeconds"
588
794
  ]);
589
795
  var MOCK_SETTINGS = /* @__PURE__ */ new Set([
590
796
  ...COMMON_SETTINGS,
@@ -595,6 +801,29 @@ var MOCK_SETTINGS = /* @__PURE__ */ new Set([
595
801
  "trace"
596
802
  // For testing tool_trajectory evaluator
597
803
  ]);
804
+ var CLAUDE_SETTINGS = /* @__PURE__ */ new Set([
805
+ ...COMMON_SETTINGS,
806
+ "model",
807
+ "cwd",
808
+ "timeout_seconds",
809
+ "timeoutSeconds",
810
+ "log_dir",
811
+ "logDir",
812
+ "log_directory",
813
+ "logDirectory",
814
+ "log_format",
815
+ "logFormat",
816
+ "log_output_format",
817
+ "logOutputFormat",
818
+ "system_prompt",
819
+ "systemPrompt",
820
+ "workspace_template",
821
+ "workspaceTemplate",
822
+ "max_turns",
823
+ "maxTurns",
824
+ "max_budget_usd",
825
+ "maxBudgetUsd"
826
+ ]);
598
827
  function getKnownSettings(provider) {
599
828
  const normalizedProvider = provider.toLowerCase();
600
829
  switch (normalizedProvider) {
@@ -610,8 +839,15 @@ function getKnownSettings(provider) {
610
839
  case "codex":
611
840
  case "codex-cli":
612
841
  return CODEX_SETTINGS;
842
+ case "copilot":
843
+ case "copilot-sdk":
844
+ case "copilot_sdk":
613
845
  case "copilot-cli":
614
- return COPILOT_SETTINGS;
846
+ return COPILOT_SDK_SETTINGS;
847
+ case "claude":
848
+ case "claude-code":
849
+ case "claude-sdk":
850
+ return CLAUDE_SETTINGS;
615
851
  case "vscode":
616
852
  case "vscode-insiders":
617
853
  return VSCODE_SETTINGS;
@@ -909,7 +1145,32 @@ async function validateConfigFile(filePath) {
909
1145
  });
910
1146
  }
911
1147
  }
912
- const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns"]);
1148
+ const evalPatterns = config.eval_patterns;
1149
+ if (evalPatterns !== void 0) {
1150
+ if (!Array.isArray(evalPatterns)) {
1151
+ errors.push({
1152
+ severity: "error",
1153
+ filePath,
1154
+ location: "eval_patterns",
1155
+ message: "Field 'eval_patterns' must be an array"
1156
+ });
1157
+ } else if (!evalPatterns.every((p) => typeof p === "string")) {
1158
+ errors.push({
1159
+ severity: "error",
1160
+ filePath,
1161
+ location: "eval_patterns",
1162
+ message: "All entries in 'eval_patterns' must be strings"
1163
+ });
1164
+ } else if (evalPatterns.length === 0) {
1165
+ errors.push({
1166
+ severity: "warning",
1167
+ filePath,
1168
+ location: "eval_patterns",
1169
+ message: "Field 'eval_patterns' is empty. Consider removing it or adding patterns."
1170
+ });
1171
+ }
1172
+ }
1173
+ const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns", "eval_patterns"]);
913
1174
  const unexpectedFields = Object.keys(config).filter((key) => !allowedFields.has(key));
914
1175
  if (unexpectedFields.length > 0) {
915
1176
  errors.push({
@@ -1046,30 +1307,36 @@ async function validateFileReferences(evalFilePath) {
1046
1307
  if (!isObject3(parsed)) {
1047
1308
  return errors;
1048
1309
  }
1049
- const evalcases = parsed.evalcases;
1050
- if (!Array.isArray(evalcases)) {
1310
+ let cases = parsed.tests;
1311
+ if (cases === void 0 && "eval_cases" in parsed) {
1312
+ cases = parsed.eval_cases;
1313
+ }
1314
+ if (cases === void 0 && "evalcases" in parsed) {
1315
+ cases = parsed.evalcases;
1316
+ }
1317
+ if (!Array.isArray(cases)) {
1051
1318
  return errors;
1052
1319
  }
1053
- for (let i = 0; i < evalcases.length; i++) {
1054
- const evalCase = evalcases[i];
1320
+ for (let i = 0; i < cases.length; i++) {
1321
+ const evalCase = cases[i];
1055
1322
  if (!isObject3(evalCase)) {
1056
1323
  continue;
1057
1324
  }
1058
- const inputMessages = evalCase.input_messages;
1059
- if (Array.isArray(inputMessages)) {
1325
+ const inputField = evalCase.input;
1326
+ if (Array.isArray(inputField)) {
1060
1327
  await validateMessagesFileRefs(
1061
- inputMessages,
1062
- `evalcases[${i}].input_messages`,
1328
+ inputField,
1329
+ `tests[${i}].input`,
1063
1330
  searchRoots,
1064
1331
  absolutePath,
1065
1332
  errors
1066
1333
  );
1067
1334
  }
1068
- const expectedMessages = evalCase.expected_messages;
1069
- if (Array.isArray(expectedMessages)) {
1335
+ const expectedOutputField = evalCase.expected_output;
1336
+ if (Array.isArray(expectedOutputField)) {
1070
1337
  await validateMessagesFileRefs(
1071
- expectedMessages,
1072
- `evalcases[${i}].expected_messages`,
1338
+ expectedOutputField,
1339
+ `tests[${i}].expected_output`,
1073
1340
  searchRoots,
1074
1341
  absolutePath,
1075
1342
  errors