@agentv/core 2.6.0 → 2.7.1-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-SSPAANFZ.js → chunk-6W5E3VR6.js} +383 -54
- package/dist/chunk-6W5E3VR6.js.map +1 -0
- package/dist/chunk-HFSYZHGF.js +82 -0
- package/dist/chunk-HFSYZHGF.js.map +1 -0
- package/dist/chunk-HMXZ2AX4.js +112 -0
- package/dist/chunk-HMXZ2AX4.js.map +1 -0
- package/dist/esm-5Q4BZALM.js +968 -0
- package/dist/esm-5Q4BZALM.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +337 -70
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +294 -69
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +9221 -4037
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1717 -234
- package/dist/index.d.ts +1717 -234
- package/dist/index.js +6559 -3140
- package/dist/index.js.map +1 -1
- package/dist/otlp-json-file-exporter-77FDBRSY.js +7 -0
- package/dist/otlp-json-file-exporter-77FDBRSY.js.map +1 -0
- package/dist/simple-trace-file-exporter-S76DMABU.js +7 -0
- package/dist/simple-trace-file-exporter-S76DMABU.js.map +1 -0
- package/package.json +16 -3
- package/dist/chunk-SSPAANFZ.js.map +0 -1
|
@@ -1,3 +1,76 @@
|
|
|
1
|
+
// src/evaluation/types.ts
|
|
2
|
+
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
3
|
+
var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
|
|
4
|
+
var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
|
|
5
|
+
function isTestMessageRole(value) {
|
|
6
|
+
return typeof value === "string" && TEST_MESSAGE_ROLE_SET.has(value);
|
|
7
|
+
}
|
|
8
|
+
function isJsonObject(value) {
|
|
9
|
+
if (typeof value !== "object" || value === null || Array.isArray(value)) {
|
|
10
|
+
return false;
|
|
11
|
+
}
|
|
12
|
+
return Object.values(value).every(isJsonValue);
|
|
13
|
+
}
|
|
14
|
+
function isJsonValue(value) {
|
|
15
|
+
if (value === null || typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
|
16
|
+
return true;
|
|
17
|
+
}
|
|
18
|
+
if (Array.isArray(value)) {
|
|
19
|
+
return value.every(isJsonValue);
|
|
20
|
+
}
|
|
21
|
+
if (typeof value === "object") {
|
|
22
|
+
return isJsonObject(value);
|
|
23
|
+
}
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
function isTestMessage(value) {
|
|
27
|
+
if (typeof value !== "object" || value === null) {
|
|
28
|
+
return false;
|
|
29
|
+
}
|
|
30
|
+
const candidate = value;
|
|
31
|
+
if (!isTestMessageRole(candidate.role)) {
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
if (typeof candidate.content === "string") {
|
|
35
|
+
return true;
|
|
36
|
+
}
|
|
37
|
+
if (Array.isArray(candidate.content) && candidate.content.every(isJsonObject)) {
|
|
38
|
+
return true;
|
|
39
|
+
}
|
|
40
|
+
if (Array.isArray(candidate.tool_calls) && candidate.tool_calls.length > 0) {
|
|
41
|
+
return true;
|
|
42
|
+
}
|
|
43
|
+
if (isJsonObject(candidate.content)) {
|
|
44
|
+
return true;
|
|
45
|
+
}
|
|
46
|
+
return false;
|
|
47
|
+
}
|
|
48
|
+
var EVALUATOR_KIND_VALUES = [
|
|
49
|
+
"code_judge",
|
|
50
|
+
"llm_judge",
|
|
51
|
+
"rubric",
|
|
52
|
+
"composite",
|
|
53
|
+
"tool_trajectory",
|
|
54
|
+
"field_accuracy",
|
|
55
|
+
"latency",
|
|
56
|
+
"cost",
|
|
57
|
+
"token_usage",
|
|
58
|
+
"execution_metrics",
|
|
59
|
+
"agent_judge",
|
|
60
|
+
"contains",
|
|
61
|
+
"regex",
|
|
62
|
+
"is_json",
|
|
63
|
+
"equals",
|
|
64
|
+
"rubrics"
|
|
65
|
+
];
|
|
66
|
+
var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
|
|
67
|
+
function isEvaluatorKind(value) {
|
|
68
|
+
return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
|
|
69
|
+
}
|
|
70
|
+
function getHitCount(result) {
|
|
71
|
+
return result.hits.length;
|
|
72
|
+
}
|
|
73
|
+
|
|
1
74
|
// src/evaluation/file-utils.ts
|
|
2
75
|
import { constants } from "node:fs";
|
|
3
76
|
import { access, readFile } from "node:fs/promises";
|
|
@@ -147,6 +220,9 @@ var CliTargetInputSchema = z.object({
|
|
|
147
220
|
attachmentsFormat: z.string().optional(),
|
|
148
221
|
// Working directory - optional
|
|
149
222
|
cwd: z.string().optional(),
|
|
223
|
+
// Workspace template directory - optional (mutually exclusive with cwd)
|
|
224
|
+
workspace_template: z.string().optional(),
|
|
225
|
+
workspaceTemplate: z.string().optional(),
|
|
150
226
|
// Timeout in seconds - optional
|
|
151
227
|
timeout_seconds: z.number().positive().optional(),
|
|
152
228
|
timeoutSeconds: z.number().positive().optional(),
|
|
@@ -188,6 +264,7 @@ var CliTargetConfigSchema = z.object({
|
|
|
188
264
|
commandTemplate: z.string().min(1),
|
|
189
265
|
filesFormat: z.string().optional(),
|
|
190
266
|
cwd: z.string().optional(),
|
|
267
|
+
workspaceTemplate: z.string().optional(),
|
|
191
268
|
timeoutMs: z.number().positive().optional(),
|
|
192
269
|
healthcheck: CliHealthcheckSchema.optional(),
|
|
193
270
|
verbose: z.boolean().optional(),
|
|
@@ -247,6 +324,19 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
247
324
|
);
|
|
248
325
|
const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
|
|
249
326
|
const filesFormat = resolveOptionalLiteralString(filesFormatSource);
|
|
327
|
+
const workspaceTemplateSource = input.workspace_template ?? input.workspaceTemplate;
|
|
328
|
+
let workspaceTemplate = resolveOptionalString(
|
|
329
|
+
workspaceTemplateSource,
|
|
330
|
+
env,
|
|
331
|
+
`${targetName} workspace template`,
|
|
332
|
+
{
|
|
333
|
+
allowLiteral: true,
|
|
334
|
+
optionalEnv: true
|
|
335
|
+
}
|
|
336
|
+
);
|
|
337
|
+
if (workspaceTemplate && evalFilePath && !path2.isAbsolute(workspaceTemplate)) {
|
|
338
|
+
workspaceTemplate = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), workspaceTemplate);
|
|
339
|
+
}
|
|
250
340
|
let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
|
|
251
341
|
allowLiteral: true,
|
|
252
342
|
optionalEnv: true
|
|
@@ -254,7 +344,12 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
254
344
|
if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
|
|
255
345
|
cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
|
|
256
346
|
}
|
|
257
|
-
if (
|
|
347
|
+
if (cwd && workspaceTemplate) {
|
|
348
|
+
throw new Error(
|
|
349
|
+
`${targetName}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
|
|
350
|
+
);
|
|
351
|
+
}
|
|
352
|
+
if (!cwd && !workspaceTemplate && evalFilePath) {
|
|
258
353
|
cwd = path2.dirname(path2.resolve(evalFilePath));
|
|
259
354
|
}
|
|
260
355
|
const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
|
|
@@ -268,6 +363,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
268
363
|
commandTemplate,
|
|
269
364
|
filesFormat,
|
|
270
365
|
cwd,
|
|
366
|
+
workspaceTemplate,
|
|
271
367
|
timeoutMs,
|
|
272
368
|
healthcheck,
|
|
273
369
|
verbose,
|
|
@@ -286,7 +382,9 @@ var BASE_TARGET_SCHEMA = z.object({
|
|
|
286
382
|
name: z.string().min(1, "target name is required"),
|
|
287
383
|
provider: z.string().min(1, "provider is required"),
|
|
288
384
|
judge_target: z.string().optional(),
|
|
289
|
-
workers: z.number().int().min(1).optional()
|
|
385
|
+
workers: z.number().int().min(1).optional(),
|
|
386
|
+
workspace_template: z.string().optional(),
|
|
387
|
+
workspaceTemplate: z.string().optional()
|
|
290
388
|
}).passthrough();
|
|
291
389
|
var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
|
|
292
390
|
function normalizeAzureApiVersion(value) {
|
|
@@ -377,7 +475,18 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
377
475
|
judgeTarget: parsed.judge_target,
|
|
378
476
|
workers: parsed.workers,
|
|
379
477
|
providerBatching,
|
|
380
|
-
config: resolveCodexConfig(parsed, env)
|
|
478
|
+
config: resolveCodexConfig(parsed, env, evalFilePath)
|
|
479
|
+
};
|
|
480
|
+
case "copilot":
|
|
481
|
+
case "copilot-sdk":
|
|
482
|
+
case "copilot_sdk":
|
|
483
|
+
return {
|
|
484
|
+
kind: "copilot",
|
|
485
|
+
name: parsed.name,
|
|
486
|
+
judgeTarget: parsed.judge_target,
|
|
487
|
+
workers: parsed.workers,
|
|
488
|
+
providerBatching,
|
|
489
|
+
config: resolveCopilotSdkConfig(parsed, env, evalFilePath)
|
|
381
490
|
};
|
|
382
491
|
case "copilot-cli":
|
|
383
492
|
return {
|
|
@@ -386,7 +495,7 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
386
495
|
judgeTarget: parsed.judge_target,
|
|
387
496
|
workers: parsed.workers,
|
|
388
497
|
providerBatching,
|
|
389
|
-
config:
|
|
498
|
+
config: resolveCopilotCliConfig(parsed, env, evalFilePath)
|
|
390
499
|
};
|
|
391
500
|
case "pi":
|
|
392
501
|
case "pi-coding-agent":
|
|
@@ -396,7 +505,7 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
396
505
|
judgeTarget: parsed.judge_target,
|
|
397
506
|
workers: parsed.workers,
|
|
398
507
|
providerBatching,
|
|
399
|
-
config: resolvePiCodingAgentConfig(parsed, env)
|
|
508
|
+
config: resolvePiCodingAgentConfig(parsed, env, evalFilePath)
|
|
400
509
|
};
|
|
401
510
|
case "pi-agent-sdk":
|
|
402
511
|
return {
|
|
@@ -407,14 +516,16 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
407
516
|
providerBatching,
|
|
408
517
|
config: resolvePiAgentSdkConfig(parsed, env)
|
|
409
518
|
};
|
|
519
|
+
case "claude":
|
|
410
520
|
case "claude-code":
|
|
521
|
+
case "claude-sdk":
|
|
411
522
|
return {
|
|
412
|
-
kind: "claude
|
|
523
|
+
kind: "claude",
|
|
413
524
|
name: parsed.name,
|
|
414
525
|
judgeTarget: parsed.judge_target,
|
|
415
526
|
workers: parsed.workers,
|
|
416
527
|
providerBatching,
|
|
417
|
-
config:
|
|
528
|
+
config: resolveClaudeConfig(parsed, env, evalFilePath)
|
|
418
529
|
};
|
|
419
530
|
case "mock":
|
|
420
531
|
return {
|
|
@@ -433,7 +544,7 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
433
544
|
judgeTarget: parsed.judge_target,
|
|
434
545
|
workers: parsed.workers,
|
|
435
546
|
providerBatching,
|
|
436
|
-
config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
|
|
547
|
+
config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders", evalFilePath)
|
|
437
548
|
};
|
|
438
549
|
case "cli":
|
|
439
550
|
return {
|
|
@@ -445,7 +556,14 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
445
556
|
config: resolveCliConfig(parsed, env, evalFilePath)
|
|
446
557
|
};
|
|
447
558
|
default:
|
|
448
|
-
|
|
559
|
+
return {
|
|
560
|
+
kind: "cli",
|
|
561
|
+
name: parsed.name,
|
|
562
|
+
judgeTarget: parsed.judge_target,
|
|
563
|
+
workers: parsed.workers,
|
|
564
|
+
providerBatching,
|
|
565
|
+
config: resolveDiscoveredProviderConfig(parsed, provider, env, evalFilePath)
|
|
566
|
+
};
|
|
449
567
|
}
|
|
450
568
|
}
|
|
451
569
|
function resolveAzureConfig(target, env) {
|
|
@@ -517,14 +635,20 @@ function resolveGeminiConfig(target, env) {
|
|
|
517
635
|
retry
|
|
518
636
|
};
|
|
519
637
|
}
|
|
520
|
-
function resolveCodexConfig(target, env) {
|
|
638
|
+
function resolveCodexConfig(target, env, evalFilePath) {
|
|
639
|
+
const modelSource = target.model;
|
|
521
640
|
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
522
641
|
const argsSource = target.args ?? target.arguments;
|
|
523
642
|
const cwdSource = target.cwd;
|
|
643
|
+
const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
|
|
524
644
|
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
525
645
|
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
526
646
|
const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
527
647
|
const systemPromptSource = target.system_prompt ?? target.systemPrompt;
|
|
648
|
+
const model = resolveOptionalString(modelSource, env, `${target.name} codex model`, {
|
|
649
|
+
allowLiteral: true,
|
|
650
|
+
optionalEnv: true
|
|
651
|
+
});
|
|
528
652
|
const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
|
|
529
653
|
allowLiteral: true,
|
|
530
654
|
optionalEnv: true
|
|
@@ -534,6 +658,23 @@ function resolveCodexConfig(target, env) {
|
|
|
534
658
|
allowLiteral: true,
|
|
535
659
|
optionalEnv: true
|
|
536
660
|
});
|
|
661
|
+
let workspaceTemplate = resolveOptionalString(
|
|
662
|
+
workspaceTemplateSource,
|
|
663
|
+
env,
|
|
664
|
+
`${target.name} codex workspace template`,
|
|
665
|
+
{
|
|
666
|
+
allowLiteral: true,
|
|
667
|
+
optionalEnv: true
|
|
668
|
+
}
|
|
669
|
+
);
|
|
670
|
+
if (workspaceTemplate && evalFilePath && !path2.isAbsolute(workspaceTemplate)) {
|
|
671
|
+
workspaceTemplate = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), workspaceTemplate);
|
|
672
|
+
}
|
|
673
|
+
if (cwd && workspaceTemplate) {
|
|
674
|
+
throw new Error(
|
|
675
|
+
`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
|
|
676
|
+
);
|
|
677
|
+
}
|
|
537
678
|
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
|
|
538
679
|
const logDir = resolveOptionalString(logDirSource, env, `${target.name} codex log directory`, {
|
|
539
680
|
allowLiteral: true,
|
|
@@ -542,9 +683,11 @@ function resolveCodexConfig(target, env) {
|
|
|
542
683
|
const logFormat = normalizeCodexLogFormat(logFormatSource);
|
|
543
684
|
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
544
685
|
return {
|
|
686
|
+
model,
|
|
545
687
|
executable,
|
|
546
688
|
args,
|
|
547
689
|
cwd,
|
|
690
|
+
workspaceTemplate,
|
|
548
691
|
timeoutMs,
|
|
549
692
|
logDir,
|
|
550
693
|
logFormat,
|
|
@@ -564,36 +707,147 @@ function normalizeCodexLogFormat(value) {
|
|
|
564
707
|
}
|
|
565
708
|
throw new Error("codex log format must be 'summary' or 'json'");
|
|
566
709
|
}
|
|
567
|
-
function
|
|
568
|
-
const
|
|
710
|
+
function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
711
|
+
const cliUrlSource = target.cli_url ?? target.cliUrl;
|
|
712
|
+
const cliPathSource = target.cli_path ?? target.cliPath;
|
|
713
|
+
const githubTokenSource = target.github_token ?? target.githubToken;
|
|
569
714
|
const modelSource = target.model;
|
|
570
|
-
const argsSource = target.args ?? target.arguments;
|
|
571
715
|
const cwdSource = target.cwd;
|
|
716
|
+
const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
|
|
572
717
|
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
573
718
|
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
574
|
-
const logFormatSource = target.log_format ?? target.logFormat
|
|
719
|
+
const logFormatSource = target.log_format ?? target.logFormat;
|
|
575
720
|
const systemPromptSource = target.system_prompt ?? target.systemPrompt;
|
|
576
|
-
const
|
|
721
|
+
const cliUrl = resolveOptionalString(cliUrlSource, env, `${target.name} copilot-sdk cli URL`, {
|
|
577
722
|
allowLiteral: true,
|
|
578
723
|
optionalEnv: true
|
|
579
|
-
})
|
|
580
|
-
const
|
|
724
|
+
});
|
|
725
|
+
const cliPath = resolveOptionalString(cliPathSource, env, `${target.name} copilot-sdk cli path`, {
|
|
726
|
+
allowLiteral: true,
|
|
727
|
+
optionalEnv: true
|
|
728
|
+
});
|
|
729
|
+
const githubToken = resolveOptionalString(
|
|
730
|
+
githubTokenSource,
|
|
731
|
+
env,
|
|
732
|
+
`${target.name} copilot-sdk github token`,
|
|
733
|
+
{
|
|
734
|
+
allowLiteral: false,
|
|
735
|
+
optionalEnv: true
|
|
736
|
+
}
|
|
737
|
+
);
|
|
738
|
+
const model = resolveOptionalString(modelSource, env, `${target.name} copilot-sdk model`, {
|
|
739
|
+
allowLiteral: true,
|
|
740
|
+
optionalEnv: true
|
|
741
|
+
});
|
|
742
|
+
const cwd = resolveOptionalString(cwdSource, env, `${target.name} copilot-sdk cwd`, {
|
|
581
743
|
allowLiteral: true,
|
|
582
744
|
optionalEnv: true
|
|
583
745
|
});
|
|
584
|
-
|
|
585
|
-
|
|
746
|
+
let workspaceTemplate = resolveOptionalString(
|
|
747
|
+
workspaceTemplateSource,
|
|
748
|
+
env,
|
|
749
|
+
`${target.name} copilot-sdk workspace template`,
|
|
750
|
+
{
|
|
751
|
+
allowLiteral: true,
|
|
752
|
+
optionalEnv: true
|
|
753
|
+
}
|
|
754
|
+
);
|
|
755
|
+
if (workspaceTemplate && evalFilePath && !path2.isAbsolute(workspaceTemplate)) {
|
|
756
|
+
workspaceTemplate = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), workspaceTemplate);
|
|
757
|
+
}
|
|
758
|
+
if (cwd && workspaceTemplate) {
|
|
759
|
+
throw new Error(
|
|
760
|
+
`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
|
|
761
|
+
);
|
|
762
|
+
}
|
|
763
|
+
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} copilot-sdk timeout`);
|
|
764
|
+
const logDir = resolveOptionalString(
|
|
765
|
+
logDirSource,
|
|
766
|
+
env,
|
|
767
|
+
`${target.name} copilot-sdk log directory`,
|
|
768
|
+
{
|
|
769
|
+
allowLiteral: true,
|
|
770
|
+
optionalEnv: true
|
|
771
|
+
}
|
|
772
|
+
);
|
|
773
|
+
const logFormat = normalizeCopilotLogFormat(logFormatSource);
|
|
774
|
+
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
775
|
+
return {
|
|
776
|
+
cliUrl,
|
|
777
|
+
cliPath,
|
|
778
|
+
githubToken,
|
|
779
|
+
model,
|
|
780
|
+
cwd,
|
|
781
|
+
workspaceTemplate,
|
|
782
|
+
timeoutMs,
|
|
783
|
+
logDir,
|
|
784
|
+
logFormat,
|
|
785
|
+
systemPrompt
|
|
786
|
+
};
|
|
787
|
+
}
|
|
788
|
+
function resolveCopilotCliConfig(target, env, evalFilePath) {
|
|
789
|
+
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
790
|
+
const modelSource = target.model;
|
|
791
|
+
const argsSource = target.args ?? target.arguments;
|
|
792
|
+
const cwdSource = target.cwd;
|
|
793
|
+
const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
|
|
794
|
+
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
795
|
+
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
796
|
+
const logFormatSource = target.log_format ?? target.logFormat;
|
|
797
|
+
const systemPromptSource = target.system_prompt ?? target.systemPrompt;
|
|
798
|
+
const executable = resolveOptionalString(executableSource, env, `${target.name} copilot-cli executable`, {
|
|
799
|
+
allowLiteral: true,
|
|
800
|
+
optionalEnv: true
|
|
801
|
+
}) ?? "copilot";
|
|
802
|
+
const model = resolveOptionalString(modelSource, env, `${target.name} copilot-cli model`, {
|
|
586
803
|
allowLiteral: true,
|
|
587
804
|
optionalEnv: true
|
|
588
805
|
});
|
|
589
|
-
const
|
|
590
|
-
const
|
|
806
|
+
const args = resolveOptionalStringArray(argsSource, env, `${target.name} copilot-cli args`);
|
|
807
|
+
const cwd = resolveOptionalString(cwdSource, env, `${target.name} copilot-cli cwd`, {
|
|
591
808
|
allowLiteral: true,
|
|
592
809
|
optionalEnv: true
|
|
593
810
|
});
|
|
811
|
+
let workspaceTemplate = resolveOptionalString(
|
|
812
|
+
workspaceTemplateSource,
|
|
813
|
+
env,
|
|
814
|
+
`${target.name} copilot-cli workspace template`,
|
|
815
|
+
{
|
|
816
|
+
allowLiteral: true,
|
|
817
|
+
optionalEnv: true
|
|
818
|
+
}
|
|
819
|
+
);
|
|
820
|
+
if (workspaceTemplate && evalFilePath && !path2.isAbsolute(workspaceTemplate)) {
|
|
821
|
+
workspaceTemplate = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), workspaceTemplate);
|
|
822
|
+
}
|
|
823
|
+
if (cwd && workspaceTemplate) {
|
|
824
|
+
throw new Error(
|
|
825
|
+
`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
|
|
826
|
+
);
|
|
827
|
+
}
|
|
828
|
+
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} copilot-cli timeout`);
|
|
829
|
+
const logDir = resolveOptionalString(
|
|
830
|
+
logDirSource,
|
|
831
|
+
env,
|
|
832
|
+
`${target.name} copilot-cli log directory`,
|
|
833
|
+
{
|
|
834
|
+
allowLiteral: true,
|
|
835
|
+
optionalEnv: true
|
|
836
|
+
}
|
|
837
|
+
);
|
|
594
838
|
const logFormat = normalizeCopilotLogFormat(logFormatSource);
|
|
595
839
|
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
596
|
-
return {
|
|
840
|
+
return {
|
|
841
|
+
executable,
|
|
842
|
+
model,
|
|
843
|
+
args,
|
|
844
|
+
cwd,
|
|
845
|
+
workspaceTemplate,
|
|
846
|
+
timeoutMs,
|
|
847
|
+
logDir,
|
|
848
|
+
logFormat,
|
|
849
|
+
systemPrompt
|
|
850
|
+
};
|
|
597
851
|
}
|
|
598
852
|
function normalizeCopilotLogFormat(value) {
|
|
599
853
|
if (value === void 0 || value === null) return void 0;
|
|
@@ -602,7 +856,7 @@ function normalizeCopilotLogFormat(value) {
|
|
|
602
856
|
if (normalized === "json" || normalized === "summary") return normalized;
|
|
603
857
|
throw new Error("copilot log format must be 'summary' or 'json'");
|
|
604
858
|
}
|
|
605
|
-
function resolvePiCodingAgentConfig(target, env) {
|
|
859
|
+
function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
606
860
|
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
607
861
|
const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
|
|
608
862
|
const modelSource = target.model ?? target.pi_model ?? target.piModel;
|
|
@@ -611,6 +865,7 @@ function resolvePiCodingAgentConfig(target, env) {
|
|
|
611
865
|
const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
|
|
612
866
|
const argsSource = target.args ?? target.arguments;
|
|
613
867
|
const cwdSource = target.cwd;
|
|
868
|
+
const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
|
|
614
869
|
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
615
870
|
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
616
871
|
const logFormatSource = target.log_format ?? target.logFormat;
|
|
@@ -644,6 +899,23 @@ function resolvePiCodingAgentConfig(target, env) {
|
|
|
644
899
|
allowLiteral: true,
|
|
645
900
|
optionalEnv: true
|
|
646
901
|
});
|
|
902
|
+
let workspaceTemplate = resolveOptionalString(
|
|
903
|
+
workspaceTemplateSource,
|
|
904
|
+
env,
|
|
905
|
+
`${target.name} pi workspace template`,
|
|
906
|
+
{
|
|
907
|
+
allowLiteral: true,
|
|
908
|
+
optionalEnv: true
|
|
909
|
+
}
|
|
910
|
+
);
|
|
911
|
+
if (workspaceTemplate && evalFilePath && !path2.isAbsolute(workspaceTemplate)) {
|
|
912
|
+
workspaceTemplate = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), workspaceTemplate);
|
|
913
|
+
}
|
|
914
|
+
if (cwd && workspaceTemplate) {
|
|
915
|
+
throw new Error(
|
|
916
|
+
`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
|
|
917
|
+
);
|
|
918
|
+
}
|
|
647
919
|
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi timeout`);
|
|
648
920
|
const logDir = resolveOptionalString(logDirSource, env, `${target.name} pi log directory`, {
|
|
649
921
|
allowLiteral: true,
|
|
@@ -660,6 +932,7 @@ function resolvePiCodingAgentConfig(target, env) {
|
|
|
660
932
|
thinking,
|
|
661
933
|
args,
|
|
662
934
|
cwd,
|
|
935
|
+
workspaceTemplate,
|
|
663
936
|
timeoutMs,
|
|
664
937
|
logDir,
|
|
665
938
|
logFormat,
|
|
@@ -699,96 +972,114 @@ function resolvePiAgentSdkConfig(target, env) {
|
|
|
699
972
|
systemPrompt
|
|
700
973
|
};
|
|
701
974
|
}
|
|
702
|
-
function
|
|
703
|
-
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
975
|
+
function resolveClaudeConfig(target, env, evalFilePath) {
|
|
704
976
|
const modelSource = target.model;
|
|
705
|
-
const argsSource = target.args ?? target.arguments;
|
|
706
977
|
const cwdSource = target.cwd;
|
|
978
|
+
const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
|
|
707
979
|
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
708
980
|
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
709
|
-
const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.
|
|
981
|
+
const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CLAUDE_LOG_FORMAT;
|
|
710
982
|
const systemPromptSource = target.system_prompt ?? target.systemPrompt;
|
|
711
|
-
const
|
|
712
|
-
allowLiteral: true,
|
|
713
|
-
optionalEnv: true
|
|
714
|
-
}) ?? "claude";
|
|
715
|
-
const model = resolveOptionalString(modelSource, env, `${target.name} claude-code model`, {
|
|
983
|
+
const model = resolveOptionalString(modelSource, env, `${target.name} claude model`, {
|
|
716
984
|
allowLiteral: true,
|
|
717
985
|
optionalEnv: true
|
|
718
986
|
});
|
|
719
|
-
const
|
|
720
|
-
const cwd = resolveOptionalString(cwdSource, env, `${target.name} claude-code cwd`, {
|
|
987
|
+
const cwd = resolveOptionalString(cwdSource, env, `${target.name} claude cwd`, {
|
|
721
988
|
allowLiteral: true,
|
|
722
989
|
optionalEnv: true
|
|
723
990
|
});
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
logDirSource,
|
|
991
|
+
let workspaceTemplate = resolveOptionalString(
|
|
992
|
+
workspaceTemplateSource,
|
|
727
993
|
env,
|
|
728
|
-
`${target.name} claude
|
|
994
|
+
`${target.name} claude workspace template`,
|
|
729
995
|
{
|
|
730
996
|
allowLiteral: true,
|
|
731
997
|
optionalEnv: true
|
|
732
998
|
}
|
|
733
999
|
);
|
|
734
|
-
|
|
1000
|
+
if (workspaceTemplate && evalFilePath && !path2.isAbsolute(workspaceTemplate)) {
|
|
1001
|
+
workspaceTemplate = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), workspaceTemplate);
|
|
1002
|
+
}
|
|
1003
|
+
if (cwd && workspaceTemplate) {
|
|
1004
|
+
throw new Error(
|
|
1005
|
+
`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
|
|
1006
|
+
);
|
|
1007
|
+
}
|
|
1008
|
+
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} claude timeout`);
|
|
1009
|
+
const logDir = resolveOptionalString(logDirSource, env, `${target.name} claude log directory`, {
|
|
1010
|
+
allowLiteral: true,
|
|
1011
|
+
optionalEnv: true
|
|
1012
|
+
});
|
|
1013
|
+
const logFormat = normalizeClaudeLogFormat(logFormatSource);
|
|
735
1014
|
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
1015
|
+
const maxTurns = typeof target.max_turns === "number" ? target.max_turns : typeof target.maxTurns === "number" ? target.maxTurns : void 0;
|
|
1016
|
+
const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd : typeof target.maxBudgetUsd === "number" ? target.maxBudgetUsd : void 0;
|
|
736
1017
|
return {
|
|
737
|
-
executable,
|
|
738
1018
|
model,
|
|
739
1019
|
systemPrompt,
|
|
740
|
-
args,
|
|
741
1020
|
cwd,
|
|
1021
|
+
workspaceTemplate,
|
|
742
1022
|
timeoutMs,
|
|
1023
|
+
maxTurns,
|
|
1024
|
+
maxBudgetUsd,
|
|
743
1025
|
logDir,
|
|
744
1026
|
logFormat
|
|
745
1027
|
};
|
|
746
1028
|
}
|
|
747
|
-
function
|
|
1029
|
+
function normalizeClaudeLogFormat(value) {
|
|
748
1030
|
if (value === void 0 || value === null) {
|
|
749
1031
|
return void 0;
|
|
750
1032
|
}
|
|
751
1033
|
if (typeof value !== "string") {
|
|
752
|
-
throw new Error("claude
|
|
1034
|
+
throw new Error("claude log format must be 'summary' or 'json'");
|
|
753
1035
|
}
|
|
754
1036
|
const normalized = value.trim().toLowerCase();
|
|
755
1037
|
if (normalized === "json" || normalized === "summary") {
|
|
756
1038
|
return normalized;
|
|
757
1039
|
}
|
|
758
|
-
throw new Error("claude
|
|
1040
|
+
throw new Error("claude log format must be 'summary' or 'json'");
|
|
759
1041
|
}
|
|
760
1042
|
function resolveMockConfig(target) {
|
|
761
1043
|
const response = typeof target.response === "string" ? target.response : void 0;
|
|
762
1044
|
return { response };
|
|
763
1045
|
}
|
|
764
|
-
function resolveVSCodeConfig(target, env, insiders) {
|
|
1046
|
+
function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
|
|
765
1047
|
const workspaceTemplateEnvVar = resolveOptionalLiteralString(
|
|
766
1048
|
target.workspace_template ?? target.workspaceTemplate
|
|
767
1049
|
);
|
|
768
|
-
|
|
1050
|
+
let workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(
|
|
769
1051
|
workspaceTemplateEnvVar,
|
|
770
1052
|
env,
|
|
771
1053
|
`${target.name} workspace template path`,
|
|
772
1054
|
{
|
|
773
|
-
allowLiteral:
|
|
1055
|
+
allowLiteral: true,
|
|
774
1056
|
optionalEnv: true
|
|
775
1057
|
}
|
|
776
1058
|
) : void 0;
|
|
777
|
-
|
|
1059
|
+
if (workspaceTemplate && evalFilePath && !path2.isAbsolute(workspaceTemplate)) {
|
|
1060
|
+
workspaceTemplate = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), workspaceTemplate);
|
|
1061
|
+
}
|
|
1062
|
+
const executableSource = target.executable;
|
|
778
1063
|
const waitSource = target.wait;
|
|
779
1064
|
const dryRunSource = target.dry_run ?? target.dryRun;
|
|
780
1065
|
const subagentRootSource = target.subagent_root ?? target.subagentRoot;
|
|
1066
|
+
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
781
1067
|
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
782
|
-
const
|
|
1068
|
+
const executable = resolveOptionalString(executableSource, env, `${target.name} vscode executable`, {
|
|
1069
|
+
allowLiteral: true,
|
|
1070
|
+
optionalEnv: true
|
|
1071
|
+
}) ?? defaultCommand;
|
|
1072
|
+
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} vscode timeout`);
|
|
783
1073
|
return {
|
|
784
|
-
|
|
1074
|
+
executable,
|
|
785
1075
|
waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
|
|
786
1076
|
dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
|
|
787
1077
|
subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
|
|
788
1078
|
allowLiteral: true,
|
|
789
1079
|
optionalEnv: true
|
|
790
1080
|
}),
|
|
791
|
-
workspaceTemplate
|
|
1081
|
+
workspaceTemplate,
|
|
1082
|
+
timeoutMs
|
|
792
1083
|
};
|
|
793
1084
|
}
|
|
794
1085
|
var cliErrorMap = (issue, ctx) => {
|
|
@@ -821,6 +1112,27 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
821
1112
|
}
|
|
822
1113
|
return normalized;
|
|
823
1114
|
}
|
|
1115
|
+
function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath) {
|
|
1116
|
+
const commandTemplateSource = target.command_template ?? target.commandTemplate;
|
|
1117
|
+
const commandTemplate = commandTemplateSource ? resolveString(commandTemplateSource, env, `${target.name} command template`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
|
|
1118
|
+
const timeoutSeconds = target.timeout_seconds ?? target.timeoutSeconds;
|
|
1119
|
+
const timeoutMs = resolveTimeoutMs(timeoutSeconds, `${target.name} timeout`);
|
|
1120
|
+
let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
|
|
1121
|
+
allowLiteral: true,
|
|
1122
|
+
optionalEnv: true
|
|
1123
|
+
});
|
|
1124
|
+
if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
|
|
1125
|
+
cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
|
|
1126
|
+
}
|
|
1127
|
+
if (!cwd && evalFilePath) {
|
|
1128
|
+
cwd = path2.dirname(path2.resolve(evalFilePath));
|
|
1129
|
+
}
|
|
1130
|
+
return {
|
|
1131
|
+
commandTemplate,
|
|
1132
|
+
cwd,
|
|
1133
|
+
timeoutMs
|
|
1134
|
+
};
|
|
1135
|
+
}
|
|
824
1136
|
function resolveTimeoutMs(source, description) {
|
|
825
1137
|
const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
|
|
826
1138
|
if (seconds === void 0) {
|
|
@@ -998,9 +1310,10 @@ function resolveOptionalNumberArray(source, description) {
|
|
|
998
1310
|
// src/evaluation/providers/types.ts
|
|
999
1311
|
var AGENT_PROVIDER_KINDS = [
|
|
1000
1312
|
"codex",
|
|
1313
|
+
"copilot",
|
|
1001
1314
|
"copilot-cli",
|
|
1002
1315
|
"pi-coding-agent",
|
|
1003
|
-
"claude
|
|
1316
|
+
"claude",
|
|
1004
1317
|
"vscode",
|
|
1005
1318
|
"vscode-insiders"
|
|
1006
1319
|
];
|
|
@@ -1009,10 +1322,11 @@ var KNOWN_PROVIDERS = [
|
|
|
1009
1322
|
"anthropic",
|
|
1010
1323
|
"gemini",
|
|
1011
1324
|
"codex",
|
|
1325
|
+
"copilot",
|
|
1012
1326
|
"copilot-cli",
|
|
1013
1327
|
"pi-coding-agent",
|
|
1014
1328
|
"pi-agent-sdk",
|
|
1015
|
-
"claude
|
|
1329
|
+
"claude",
|
|
1016
1330
|
"cli",
|
|
1017
1331
|
"mock",
|
|
1018
1332
|
"vscode",
|
|
@@ -1027,8 +1341,16 @@ var PROVIDER_ALIASES = [
|
|
|
1027
1341
|
// alias for "gemini"
|
|
1028
1342
|
"codex-cli",
|
|
1029
1343
|
// alias for "codex"
|
|
1344
|
+
"copilot-sdk",
|
|
1345
|
+
// alias for "copilot"
|
|
1346
|
+
"copilot_sdk",
|
|
1347
|
+
// alias for "copilot" (underscore variant)
|
|
1030
1348
|
"pi",
|
|
1031
1349
|
// alias for "pi-coding-agent"
|
|
1350
|
+
"claude-code",
|
|
1351
|
+
// alias for "claude" (legacy)
|
|
1352
|
+
"claude-sdk",
|
|
1353
|
+
// alias for "claude"
|
|
1032
1354
|
"openai",
|
|
1033
1355
|
// legacy/future support
|
|
1034
1356
|
"bedrock",
|
|
@@ -1056,6 +1378,13 @@ function isAgentProvider(provider) {
|
|
|
1056
1378
|
}
|
|
1057
1379
|
|
|
1058
1380
|
export {
|
|
1381
|
+
TEST_MESSAGE_ROLES,
|
|
1382
|
+
isTestMessageRole,
|
|
1383
|
+
isJsonObject,
|
|
1384
|
+
isJsonValue,
|
|
1385
|
+
isTestMessage,
|
|
1386
|
+
isEvaluatorKind,
|
|
1387
|
+
getHitCount,
|
|
1059
1388
|
fileExists,
|
|
1060
1389
|
normalizeLineEndings,
|
|
1061
1390
|
readTextFile,
|
|
@@ -1071,4 +1400,4 @@ export {
|
|
|
1071
1400
|
extractLastAssistantContent,
|
|
1072
1401
|
isAgentProvider
|
|
1073
1402
|
};
|
|
1074
|
-
//# sourceMappingURL=chunk-
|
|
1403
|
+
//# sourceMappingURL=chunk-6W5E3VR6.js.map
|