@agentv/core 2.6.0 → 2.7.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,76 @@
1
+ // src/evaluation/types.ts
2
+ var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
3
+ var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
4
+ var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
5
+ function isTestMessageRole(value) {
6
+ return typeof value === "string" && TEST_MESSAGE_ROLE_SET.has(value);
7
+ }
8
+ function isJsonObject(value) {
9
+ if (typeof value !== "object" || value === null || Array.isArray(value)) {
10
+ return false;
11
+ }
12
+ return Object.values(value).every(isJsonValue);
13
+ }
14
+ function isJsonValue(value) {
15
+ if (value === null || typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
16
+ return true;
17
+ }
18
+ if (Array.isArray(value)) {
19
+ return value.every(isJsonValue);
20
+ }
21
+ if (typeof value === "object") {
22
+ return isJsonObject(value);
23
+ }
24
+ return false;
25
+ }
26
+ function isTestMessage(value) {
27
+ if (typeof value !== "object" || value === null) {
28
+ return false;
29
+ }
30
+ const candidate = value;
31
+ if (!isTestMessageRole(candidate.role)) {
32
+ return false;
33
+ }
34
+ if (typeof candidate.content === "string") {
35
+ return true;
36
+ }
37
+ if (Array.isArray(candidate.content) && candidate.content.every(isJsonObject)) {
38
+ return true;
39
+ }
40
+ if (Array.isArray(candidate.tool_calls) && candidate.tool_calls.length > 0) {
41
+ return true;
42
+ }
43
+ if (isJsonObject(candidate.content)) {
44
+ return true;
45
+ }
46
+ return false;
47
+ }
48
+ var EVALUATOR_KIND_VALUES = [
49
+ "code_judge",
50
+ "llm_judge",
51
+ "rubric",
52
+ "composite",
53
+ "tool_trajectory",
54
+ "field_accuracy",
55
+ "latency",
56
+ "cost",
57
+ "token_usage",
58
+ "execution_metrics",
59
+ "agent_judge",
60
+ "contains",
61
+ "regex",
62
+ "is_json",
63
+ "equals",
64
+ "rubrics"
65
+ ];
66
+ var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
67
+ function isEvaluatorKind(value) {
68
+ return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
69
+ }
70
+ function getHitCount(result) {
71
+ return result.hits.length;
72
+ }
73
+
1
74
  // src/evaluation/file-utils.ts
2
75
  import { constants } from "node:fs";
3
76
  import { access, readFile } from "node:fs/promises";
@@ -147,6 +220,9 @@ var CliTargetInputSchema = z.object({
147
220
  attachmentsFormat: z.string().optional(),
148
221
  // Working directory - optional
149
222
  cwd: z.string().optional(),
223
+ // Workspace template directory - optional (mutually exclusive with cwd)
224
+ workspace_template: z.string().optional(),
225
+ workspaceTemplate: z.string().optional(),
150
226
  // Timeout in seconds - optional
151
227
  timeout_seconds: z.number().positive().optional(),
152
228
  timeoutSeconds: z.number().positive().optional(),
@@ -188,6 +264,7 @@ var CliTargetConfigSchema = z.object({
188
264
  commandTemplate: z.string().min(1),
189
265
  filesFormat: z.string().optional(),
190
266
  cwd: z.string().optional(),
267
+ workspaceTemplate: z.string().optional(),
191
268
  timeoutMs: z.number().positive().optional(),
192
269
  healthcheck: CliHealthcheckSchema.optional(),
193
270
  verbose: z.boolean().optional(),
@@ -247,6 +324,19 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
247
324
  );
248
325
  const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
249
326
  const filesFormat = resolveOptionalLiteralString(filesFormatSource);
327
+ const workspaceTemplateSource = input.workspace_template ?? input.workspaceTemplate;
328
+ let workspaceTemplate = resolveOptionalString(
329
+ workspaceTemplateSource,
330
+ env,
331
+ `${targetName} workspace template`,
332
+ {
333
+ allowLiteral: true,
334
+ optionalEnv: true
335
+ }
336
+ );
337
+ if (workspaceTemplate && evalFilePath && !path2.isAbsolute(workspaceTemplate)) {
338
+ workspaceTemplate = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), workspaceTemplate);
339
+ }
250
340
  let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
251
341
  allowLiteral: true,
252
342
  optionalEnv: true
@@ -254,7 +344,12 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
254
344
  if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
255
345
  cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
256
346
  }
257
- if (!cwd && evalFilePath) {
347
+ if (cwd && workspaceTemplate) {
348
+ throw new Error(
349
+ `${targetName}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
350
+ );
351
+ }
352
+ if (!cwd && !workspaceTemplate && evalFilePath) {
258
353
  cwd = path2.dirname(path2.resolve(evalFilePath));
259
354
  }
260
355
  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
@@ -268,6 +363,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
268
363
  commandTemplate,
269
364
  filesFormat,
270
365
  cwd,
366
+ workspaceTemplate,
271
367
  timeoutMs,
272
368
  healthcheck,
273
369
  verbose,
@@ -286,7 +382,9 @@ var BASE_TARGET_SCHEMA = z.object({
286
382
  name: z.string().min(1, "target name is required"),
287
383
  provider: z.string().min(1, "provider is required"),
288
384
  judge_target: z.string().optional(),
289
- workers: z.number().int().min(1).optional()
385
+ workers: z.number().int().min(1).optional(),
386
+ workspace_template: z.string().optional(),
387
+ workspaceTemplate: z.string().optional()
290
388
  }).passthrough();
291
389
  var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
292
390
  function normalizeAzureApiVersion(value) {
@@ -377,7 +475,18 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
377
475
  judgeTarget: parsed.judge_target,
378
476
  workers: parsed.workers,
379
477
  providerBatching,
380
- config: resolveCodexConfig(parsed, env)
478
+ config: resolveCodexConfig(parsed, env, evalFilePath)
479
+ };
480
+ case "copilot":
481
+ case "copilot-sdk":
482
+ case "copilot_sdk":
483
+ return {
484
+ kind: "copilot",
485
+ name: parsed.name,
486
+ judgeTarget: parsed.judge_target,
487
+ workers: parsed.workers,
488
+ providerBatching,
489
+ config: resolveCopilotSdkConfig(parsed, env, evalFilePath)
381
490
  };
382
491
  case "copilot-cli":
383
492
  return {
@@ -386,7 +495,7 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
386
495
  judgeTarget: parsed.judge_target,
387
496
  workers: parsed.workers,
388
497
  providerBatching,
389
- config: resolveCopilotConfig(parsed, env)
498
+ config: resolveCopilotCliConfig(parsed, env, evalFilePath)
390
499
  };
391
500
  case "pi":
392
501
  case "pi-coding-agent":
@@ -396,7 +505,7 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
396
505
  judgeTarget: parsed.judge_target,
397
506
  workers: parsed.workers,
398
507
  providerBatching,
399
- config: resolvePiCodingAgentConfig(parsed, env)
508
+ config: resolvePiCodingAgentConfig(parsed, env, evalFilePath)
400
509
  };
401
510
  case "pi-agent-sdk":
402
511
  return {
@@ -407,14 +516,16 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
407
516
  providerBatching,
408
517
  config: resolvePiAgentSdkConfig(parsed, env)
409
518
  };
519
+ case "claude":
410
520
  case "claude-code":
521
+ case "claude-sdk":
411
522
  return {
412
- kind: "claude-code",
523
+ kind: "claude",
413
524
  name: parsed.name,
414
525
  judgeTarget: parsed.judge_target,
415
526
  workers: parsed.workers,
416
527
  providerBatching,
417
- config: resolveClaudeCodeConfig(parsed, env)
528
+ config: resolveClaudeConfig(parsed, env, evalFilePath)
418
529
  };
419
530
  case "mock":
420
531
  return {
@@ -433,7 +544,7 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
433
544
  judgeTarget: parsed.judge_target,
434
545
  workers: parsed.workers,
435
546
  providerBatching,
436
- config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
547
+ config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders", evalFilePath)
437
548
  };
438
549
  case "cli":
439
550
  return {
@@ -445,7 +556,14 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
445
556
  config: resolveCliConfig(parsed, env, evalFilePath)
446
557
  };
447
558
  default:
448
- throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
559
+ return {
560
+ kind: "cli",
561
+ name: parsed.name,
562
+ judgeTarget: parsed.judge_target,
563
+ workers: parsed.workers,
564
+ providerBatching,
565
+ config: resolveDiscoveredProviderConfig(parsed, provider, env, evalFilePath)
566
+ };
449
567
  }
450
568
  }
451
569
  function resolveAzureConfig(target, env) {
@@ -517,14 +635,20 @@ function resolveGeminiConfig(target, env) {
517
635
  retry
518
636
  };
519
637
  }
520
- function resolveCodexConfig(target, env) {
638
+ function resolveCodexConfig(target, env, evalFilePath) {
639
+ const modelSource = target.model;
521
640
  const executableSource = target.executable ?? target.command ?? target.binary;
522
641
  const argsSource = target.args ?? target.arguments;
523
642
  const cwdSource = target.cwd;
643
+ const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
524
644
  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
525
645
  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
526
646
  const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
527
647
  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
648
+ const model = resolveOptionalString(modelSource, env, `${target.name} codex model`, {
649
+ allowLiteral: true,
650
+ optionalEnv: true
651
+ });
528
652
  const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
529
653
  allowLiteral: true,
530
654
  optionalEnv: true
@@ -534,6 +658,23 @@ function resolveCodexConfig(target, env) {
534
658
  allowLiteral: true,
535
659
  optionalEnv: true
536
660
  });
661
+ let workspaceTemplate = resolveOptionalString(
662
+ workspaceTemplateSource,
663
+ env,
664
+ `${target.name} codex workspace template`,
665
+ {
666
+ allowLiteral: true,
667
+ optionalEnv: true
668
+ }
669
+ );
670
+ if (workspaceTemplate && evalFilePath && !path2.isAbsolute(workspaceTemplate)) {
671
+ workspaceTemplate = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), workspaceTemplate);
672
+ }
673
+ if (cwd && workspaceTemplate) {
674
+ throw new Error(
675
+ `${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
676
+ );
677
+ }
537
678
  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
538
679
  const logDir = resolveOptionalString(logDirSource, env, `${target.name} codex log directory`, {
539
680
  allowLiteral: true,
@@ -542,9 +683,11 @@ function resolveCodexConfig(target, env) {
542
683
  const logFormat = normalizeCodexLogFormat(logFormatSource);
543
684
  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
544
685
  return {
686
+ model,
545
687
  executable,
546
688
  args,
547
689
  cwd,
690
+ workspaceTemplate,
548
691
  timeoutMs,
549
692
  logDir,
550
693
  logFormat,
@@ -564,36 +707,147 @@ function normalizeCodexLogFormat(value) {
564
707
  }
565
708
  throw new Error("codex log format must be 'summary' or 'json'");
566
709
  }
567
- function resolveCopilotConfig(target, env) {
568
- const executableSource = target.executable ?? target.command ?? target.binary;
710
+ function resolveCopilotSdkConfig(target, env, evalFilePath) {
711
+ const cliUrlSource = target.cli_url ?? target.cliUrl;
712
+ const cliPathSource = target.cli_path ?? target.cliPath;
713
+ const githubTokenSource = target.github_token ?? target.githubToken;
569
714
  const modelSource = target.model;
570
- const argsSource = target.args ?? target.arguments;
571
715
  const cwdSource = target.cwd;
716
+ const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
572
717
  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
573
718
  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
574
- const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat;
719
+ const logFormatSource = target.log_format ?? target.logFormat;
575
720
  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
576
- const executable = resolveOptionalString(executableSource, env, `${target.name} copilot executable`, {
721
+ const cliUrl = resolveOptionalString(cliUrlSource, env, `${target.name} copilot-sdk cli URL`, {
577
722
  allowLiteral: true,
578
723
  optionalEnv: true
579
- }) ?? "copilot";
580
- const model = resolveOptionalString(modelSource, env, `${target.name} copilot model`, {
724
+ });
725
+ const cliPath = resolveOptionalString(cliPathSource, env, `${target.name} copilot-sdk cli path`, {
726
+ allowLiteral: true,
727
+ optionalEnv: true
728
+ });
729
+ const githubToken = resolveOptionalString(
730
+ githubTokenSource,
731
+ env,
732
+ `${target.name} copilot-sdk github token`,
733
+ {
734
+ allowLiteral: false,
735
+ optionalEnv: true
736
+ }
737
+ );
738
+ const model = resolveOptionalString(modelSource, env, `${target.name} copilot-sdk model`, {
739
+ allowLiteral: true,
740
+ optionalEnv: true
741
+ });
742
+ const cwd = resolveOptionalString(cwdSource, env, `${target.name} copilot-sdk cwd`, {
581
743
  allowLiteral: true,
582
744
  optionalEnv: true
583
745
  });
584
- const args = resolveOptionalStringArray(argsSource, env, `${target.name} copilot args`);
585
- const cwd = resolveOptionalString(cwdSource, env, `${target.name} copilot cwd`, {
746
+ let workspaceTemplate = resolveOptionalString(
747
+ workspaceTemplateSource,
748
+ env,
749
+ `${target.name} copilot-sdk workspace template`,
750
+ {
751
+ allowLiteral: true,
752
+ optionalEnv: true
753
+ }
754
+ );
755
+ if (workspaceTemplate && evalFilePath && !path2.isAbsolute(workspaceTemplate)) {
756
+ workspaceTemplate = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), workspaceTemplate);
757
+ }
758
+ if (cwd && workspaceTemplate) {
759
+ throw new Error(
760
+ `${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
761
+ );
762
+ }
763
+ const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} copilot-sdk timeout`);
764
+ const logDir = resolveOptionalString(
765
+ logDirSource,
766
+ env,
767
+ `${target.name} copilot-sdk log directory`,
768
+ {
769
+ allowLiteral: true,
770
+ optionalEnv: true
771
+ }
772
+ );
773
+ const logFormat = normalizeCopilotLogFormat(logFormatSource);
774
+ const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
775
+ return {
776
+ cliUrl,
777
+ cliPath,
778
+ githubToken,
779
+ model,
780
+ cwd,
781
+ workspaceTemplate,
782
+ timeoutMs,
783
+ logDir,
784
+ logFormat,
785
+ systemPrompt
786
+ };
787
+ }
788
+ function resolveCopilotCliConfig(target, env, evalFilePath) {
789
+ const executableSource = target.executable ?? target.command ?? target.binary;
790
+ const modelSource = target.model;
791
+ const argsSource = target.args ?? target.arguments;
792
+ const cwdSource = target.cwd;
793
+ const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
794
+ const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
795
+ const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
796
+ const logFormatSource = target.log_format ?? target.logFormat;
797
+ const systemPromptSource = target.system_prompt ?? target.systemPrompt;
798
+ const executable = resolveOptionalString(executableSource, env, `${target.name} copilot-cli executable`, {
799
+ allowLiteral: true,
800
+ optionalEnv: true
801
+ }) ?? "copilot";
802
+ const model = resolveOptionalString(modelSource, env, `${target.name} copilot-cli model`, {
586
803
  allowLiteral: true,
587
804
  optionalEnv: true
588
805
  });
589
- const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} copilot timeout`);
590
- const logDir = resolveOptionalString(logDirSource, env, `${target.name} copilot log directory`, {
806
+ const args = resolveOptionalStringArray(argsSource, env, `${target.name} copilot-cli args`);
807
+ const cwd = resolveOptionalString(cwdSource, env, `${target.name} copilot-cli cwd`, {
591
808
  allowLiteral: true,
592
809
  optionalEnv: true
593
810
  });
811
+ let workspaceTemplate = resolveOptionalString(
812
+ workspaceTemplateSource,
813
+ env,
814
+ `${target.name} copilot-cli workspace template`,
815
+ {
816
+ allowLiteral: true,
817
+ optionalEnv: true
818
+ }
819
+ );
820
+ if (workspaceTemplate && evalFilePath && !path2.isAbsolute(workspaceTemplate)) {
821
+ workspaceTemplate = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), workspaceTemplate);
822
+ }
823
+ if (cwd && workspaceTemplate) {
824
+ throw new Error(
825
+ `${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
826
+ );
827
+ }
828
+ const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} copilot-cli timeout`);
829
+ const logDir = resolveOptionalString(
830
+ logDirSource,
831
+ env,
832
+ `${target.name} copilot-cli log directory`,
833
+ {
834
+ allowLiteral: true,
835
+ optionalEnv: true
836
+ }
837
+ );
594
838
  const logFormat = normalizeCopilotLogFormat(logFormatSource);
595
839
  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
596
- return { executable, model, args, cwd, timeoutMs, logDir, logFormat, systemPrompt };
840
+ return {
841
+ executable,
842
+ model,
843
+ args,
844
+ cwd,
845
+ workspaceTemplate,
846
+ timeoutMs,
847
+ logDir,
848
+ logFormat,
849
+ systemPrompt
850
+ };
597
851
  }
598
852
  function normalizeCopilotLogFormat(value) {
599
853
  if (value === void 0 || value === null) return void 0;
@@ -602,7 +856,7 @@ function normalizeCopilotLogFormat(value) {
602
856
  if (normalized === "json" || normalized === "summary") return normalized;
603
857
  throw new Error("copilot log format must be 'summary' or 'json'");
604
858
  }
605
- function resolvePiCodingAgentConfig(target, env) {
859
+ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
606
860
  const executableSource = target.executable ?? target.command ?? target.binary;
607
861
  const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
608
862
  const modelSource = target.model ?? target.pi_model ?? target.piModel;
@@ -611,6 +865,7 @@ function resolvePiCodingAgentConfig(target, env) {
611
865
  const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
612
866
  const argsSource = target.args ?? target.arguments;
613
867
  const cwdSource = target.cwd;
868
+ const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
614
869
  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
615
870
  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
616
871
  const logFormatSource = target.log_format ?? target.logFormat;
@@ -644,6 +899,23 @@ function resolvePiCodingAgentConfig(target, env) {
644
899
  allowLiteral: true,
645
900
  optionalEnv: true
646
901
  });
902
+ let workspaceTemplate = resolveOptionalString(
903
+ workspaceTemplateSource,
904
+ env,
905
+ `${target.name} pi workspace template`,
906
+ {
907
+ allowLiteral: true,
908
+ optionalEnv: true
909
+ }
910
+ );
911
+ if (workspaceTemplate && evalFilePath && !path2.isAbsolute(workspaceTemplate)) {
912
+ workspaceTemplate = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), workspaceTemplate);
913
+ }
914
+ if (cwd && workspaceTemplate) {
915
+ throw new Error(
916
+ `${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
917
+ );
918
+ }
647
919
  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi timeout`);
648
920
  const logDir = resolveOptionalString(logDirSource, env, `${target.name} pi log directory`, {
649
921
  allowLiteral: true,
@@ -660,6 +932,7 @@ function resolvePiCodingAgentConfig(target, env) {
660
932
  thinking,
661
933
  args,
662
934
  cwd,
935
+ workspaceTemplate,
663
936
  timeoutMs,
664
937
  logDir,
665
938
  logFormat,
@@ -699,96 +972,114 @@ function resolvePiAgentSdkConfig(target, env) {
699
972
  systemPrompt
700
973
  };
701
974
  }
702
- function resolveClaudeCodeConfig(target, env) {
703
- const executableSource = target.executable ?? target.command ?? target.binary;
975
+ function resolveClaudeConfig(target, env, evalFilePath) {
704
976
  const modelSource = target.model;
705
- const argsSource = target.args ?? target.arguments;
706
977
  const cwdSource = target.cwd;
978
+ const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
707
979
  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
708
980
  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
709
- const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CLAUDE_CODE_LOG_FORMAT;
981
+ const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CLAUDE_LOG_FORMAT;
710
982
  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
711
- const executable = resolveOptionalString(executableSource, env, `${target.name} claude-code executable`, {
712
- allowLiteral: true,
713
- optionalEnv: true
714
- }) ?? "claude";
715
- const model = resolveOptionalString(modelSource, env, `${target.name} claude-code model`, {
983
+ const model = resolveOptionalString(modelSource, env, `${target.name} claude model`, {
716
984
  allowLiteral: true,
717
985
  optionalEnv: true
718
986
  });
719
- const args = resolveOptionalStringArray(argsSource, env, `${target.name} claude-code args`);
720
- const cwd = resolveOptionalString(cwdSource, env, `${target.name} claude-code cwd`, {
987
+ const cwd = resolveOptionalString(cwdSource, env, `${target.name} claude cwd`, {
721
988
  allowLiteral: true,
722
989
  optionalEnv: true
723
990
  });
724
- const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} claude-code timeout`);
725
- const logDir = resolveOptionalString(
726
- logDirSource,
991
+ let workspaceTemplate = resolveOptionalString(
992
+ workspaceTemplateSource,
727
993
  env,
728
- `${target.name} claude-code log directory`,
994
+ `${target.name} claude workspace template`,
729
995
  {
730
996
  allowLiteral: true,
731
997
  optionalEnv: true
732
998
  }
733
999
  );
734
- const logFormat = normalizeClaudeCodeLogFormat(logFormatSource);
1000
+ if (workspaceTemplate && evalFilePath && !path2.isAbsolute(workspaceTemplate)) {
1001
+ workspaceTemplate = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), workspaceTemplate);
1002
+ }
1003
+ if (cwd && workspaceTemplate) {
1004
+ throw new Error(
1005
+ `${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
1006
+ );
1007
+ }
1008
+ const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} claude timeout`);
1009
+ const logDir = resolveOptionalString(logDirSource, env, `${target.name} claude log directory`, {
1010
+ allowLiteral: true,
1011
+ optionalEnv: true
1012
+ });
1013
+ const logFormat = normalizeClaudeLogFormat(logFormatSource);
735
1014
  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
1015
+ const maxTurns = typeof target.max_turns === "number" ? target.max_turns : typeof target.maxTurns === "number" ? target.maxTurns : void 0;
1016
+ const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd : typeof target.maxBudgetUsd === "number" ? target.maxBudgetUsd : void 0;
736
1017
  return {
737
- executable,
738
1018
  model,
739
1019
  systemPrompt,
740
- args,
741
1020
  cwd,
1021
+ workspaceTemplate,
742
1022
  timeoutMs,
1023
+ maxTurns,
1024
+ maxBudgetUsd,
743
1025
  logDir,
744
1026
  logFormat
745
1027
  };
746
1028
  }
747
- function normalizeClaudeCodeLogFormat(value) {
1029
+ function normalizeClaudeLogFormat(value) {
748
1030
  if (value === void 0 || value === null) {
749
1031
  return void 0;
750
1032
  }
751
1033
  if (typeof value !== "string") {
752
- throw new Error("claude-code log format must be 'summary' or 'json'");
1034
+ throw new Error("claude log format must be 'summary' or 'json'");
753
1035
  }
754
1036
  const normalized = value.trim().toLowerCase();
755
1037
  if (normalized === "json" || normalized === "summary") {
756
1038
  return normalized;
757
1039
  }
758
- throw new Error("claude-code log format must be 'summary' or 'json'");
1040
+ throw new Error("claude log format must be 'summary' or 'json'");
759
1041
  }
760
1042
  function resolveMockConfig(target) {
761
1043
  const response = typeof target.response === "string" ? target.response : void 0;
762
1044
  return { response };
763
1045
  }
764
- function resolveVSCodeConfig(target, env, insiders) {
1046
+ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
765
1047
  const workspaceTemplateEnvVar = resolveOptionalLiteralString(
766
1048
  target.workspace_template ?? target.workspaceTemplate
767
1049
  );
768
- const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(
1050
+ let workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(
769
1051
  workspaceTemplateEnvVar,
770
1052
  env,
771
1053
  `${target.name} workspace template path`,
772
1054
  {
773
- allowLiteral: false,
1055
+ allowLiteral: true,
774
1056
  optionalEnv: true
775
1057
  }
776
1058
  ) : void 0;
777
- const commandSource = target.vscode_cmd ?? target.command;
1059
+ if (workspaceTemplate && evalFilePath && !path2.isAbsolute(workspaceTemplate)) {
1060
+ workspaceTemplate = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), workspaceTemplate);
1061
+ }
1062
+ const executableSource = target.executable;
778
1063
  const waitSource = target.wait;
779
1064
  const dryRunSource = target.dry_run ?? target.dryRun;
780
1065
  const subagentRootSource = target.subagent_root ?? target.subagentRoot;
1066
+ const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
781
1067
  const defaultCommand = insiders ? "code-insiders" : "code";
782
- const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
1068
+ const executable = resolveOptionalString(executableSource, env, `${target.name} vscode executable`, {
1069
+ allowLiteral: true,
1070
+ optionalEnv: true
1071
+ }) ?? defaultCommand;
1072
+ const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} vscode timeout`);
783
1073
  return {
784
- command,
1074
+ executable,
785
1075
  waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
786
1076
  dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
787
1077
  subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
788
1078
  allowLiteral: true,
789
1079
  optionalEnv: true
790
1080
  }),
791
- workspaceTemplate
1081
+ workspaceTemplate,
1082
+ timeoutMs
792
1083
  };
793
1084
  }
794
1085
  var cliErrorMap = (issue, ctx) => {
@@ -821,6 +1112,27 @@ function resolveCliConfig(target, env, evalFilePath) {
821
1112
  }
822
1113
  return normalized;
823
1114
  }
1115
+ function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath) {
1116
+ const commandTemplateSource = target.command_template ?? target.commandTemplate;
1117
+ const commandTemplate = commandTemplateSource ? resolveString(commandTemplateSource, env, `${target.name} command template`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
1118
+ const timeoutSeconds = target.timeout_seconds ?? target.timeoutSeconds;
1119
+ const timeoutMs = resolveTimeoutMs(timeoutSeconds, `${target.name} timeout`);
1120
+ let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
1121
+ allowLiteral: true,
1122
+ optionalEnv: true
1123
+ });
1124
+ if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
1125
+ cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
1126
+ }
1127
+ if (!cwd && evalFilePath) {
1128
+ cwd = path2.dirname(path2.resolve(evalFilePath));
1129
+ }
1130
+ return {
1131
+ commandTemplate,
1132
+ cwd,
1133
+ timeoutMs
1134
+ };
1135
+ }
824
1136
  function resolveTimeoutMs(source, description) {
825
1137
  const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
826
1138
  if (seconds === void 0) {
@@ -998,9 +1310,10 @@ function resolveOptionalNumberArray(source, description) {
998
1310
  // src/evaluation/providers/types.ts
999
1311
  var AGENT_PROVIDER_KINDS = [
1000
1312
  "codex",
1313
+ "copilot",
1001
1314
  "copilot-cli",
1002
1315
  "pi-coding-agent",
1003
- "claude-code",
1316
+ "claude",
1004
1317
  "vscode",
1005
1318
  "vscode-insiders"
1006
1319
  ];
@@ -1009,10 +1322,11 @@ var KNOWN_PROVIDERS = [
1009
1322
  "anthropic",
1010
1323
  "gemini",
1011
1324
  "codex",
1325
+ "copilot",
1012
1326
  "copilot-cli",
1013
1327
  "pi-coding-agent",
1014
1328
  "pi-agent-sdk",
1015
- "claude-code",
1329
+ "claude",
1016
1330
  "cli",
1017
1331
  "mock",
1018
1332
  "vscode",
@@ -1027,8 +1341,16 @@ var PROVIDER_ALIASES = [
1027
1341
  // alias for "gemini"
1028
1342
  "codex-cli",
1029
1343
  // alias for "codex"
1344
+ "copilot-sdk",
1345
+ // alias for "copilot"
1346
+ "copilot_sdk",
1347
+ // alias for "copilot" (underscore variant)
1030
1348
  "pi",
1031
1349
  // alias for "pi-coding-agent"
1350
+ "claude-code",
1351
+ // alias for "claude" (legacy)
1352
+ "claude-sdk",
1353
+ // alias for "claude"
1032
1354
  "openai",
1033
1355
  // legacy/future support
1034
1356
  "bedrock",
@@ -1056,6 +1378,13 @@ function isAgentProvider(provider) {
1056
1378
  }
1057
1379
 
1058
1380
  export {
1381
+ TEST_MESSAGE_ROLES,
1382
+ isTestMessageRole,
1383
+ isJsonObject,
1384
+ isJsonValue,
1385
+ isTestMessage,
1386
+ isEvaluatorKind,
1387
+ getHitCount,
1059
1388
  fileExists,
1060
1389
  normalizeLineEndings,
1061
1390
  readTextFile,
@@ -1071,4 +1400,4 @@ export {
1071
1400
  extractLastAssistantContent,
1072
1401
  isAgentProvider
1073
1402
  };
1074
- //# sourceMappingURL=chunk-SSPAANFZ.js.map
1403
+ //# sourceMappingURL=chunk-6W5E3VR6.js.map