@agentv/core 3.14.6 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-HP5PFOVK.js → chunk-PXYYRDHH.js} +142 -148
- package/dist/chunk-PXYYRDHH.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +9 -2
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +3 -2
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +569 -257
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +135 -93
- package/dist/index.d.ts +135 -93
- package/dist/index.js +459 -141
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-HP5PFOVK.js.map +0 -1
|
@@ -1,3 +1,26 @@
|
|
|
1
|
+
// src/evaluation/content.ts
|
|
2
|
+
var CONTENT_TYPES = /* @__PURE__ */ new Set(["text", "image", "file"]);
|
|
3
|
+
function isContent(value) {
|
|
4
|
+
if (!value || typeof value !== "object") return false;
|
|
5
|
+
const v = value;
|
|
6
|
+
return typeof v.type === "string" && CONTENT_TYPES.has(v.type);
|
|
7
|
+
}
|
|
8
|
+
function isContentArray(value) {
|
|
9
|
+
return Array.isArray(value) && value.length > 0 && value.every(isContent);
|
|
10
|
+
}
|
|
11
|
+
function getTextContent(content) {
|
|
12
|
+
if (content == null) return "";
|
|
13
|
+
if (typeof content === "string") return content;
|
|
14
|
+
if (!Array.isArray(content)) return "";
|
|
15
|
+
const parts = [];
|
|
16
|
+
for (const block of content) {
|
|
17
|
+
if (block.type === "text") {
|
|
18
|
+
parts.push(block.text);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
return parts.join("\n");
|
|
22
|
+
}
|
|
23
|
+
|
|
1
24
|
// src/evaluation/types.ts
|
|
2
25
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
3
26
|
var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
|
|
@@ -360,6 +383,12 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
|
|
|
360
383
|
"FILES",
|
|
361
384
|
"OUTPUT_FILE"
|
|
362
385
|
]);
|
|
386
|
+
var COMMON_TARGET_SETTINGS = [
|
|
387
|
+
"provider_batching",
|
|
388
|
+
"providerBatching",
|
|
389
|
+
"subagent_mode_allowed",
|
|
390
|
+
"subagentModeAllowed"
|
|
391
|
+
];
|
|
363
392
|
var BASE_TARGET_SCHEMA = z.object({
|
|
364
393
|
name: z.string().min(1, "target name is required"),
|
|
365
394
|
provider: z.string().min(1, "provider is required"),
|
|
@@ -368,7 +397,8 @@ var BASE_TARGET_SCHEMA = z.object({
|
|
|
368
397
|
// backward compat
|
|
369
398
|
workers: z.number().int().min(1).optional(),
|
|
370
399
|
workspace_template: z.string().optional(),
|
|
371
|
-
workspaceTemplate: z.string().optional()
|
|
400
|
+
workspaceTemplate: z.string().optional(),
|
|
401
|
+
subagent_mode_allowed: z.boolean().optional()
|
|
372
402
|
}).passthrough();
|
|
373
403
|
var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
|
|
374
404
|
var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
|
|
@@ -431,42 +461,40 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
431
461
|
const providerBatching = resolveOptionalBoolean(
|
|
432
462
|
parsed.provider_batching ?? parsed.providerBatching
|
|
433
463
|
);
|
|
464
|
+
const subagentModeAllowed = resolveOptionalBoolean(
|
|
465
|
+
parsed.subagent_mode_allowed ?? parsed.subagentModeAllowed
|
|
466
|
+
);
|
|
467
|
+
const base = {
|
|
468
|
+
name: parsed.name,
|
|
469
|
+
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
470
|
+
workers: parsed.workers,
|
|
471
|
+
providerBatching,
|
|
472
|
+
subagentModeAllowed
|
|
473
|
+
};
|
|
434
474
|
switch (provider) {
|
|
435
475
|
case "openai":
|
|
436
476
|
return {
|
|
437
477
|
kind: "openai",
|
|
438
|
-
|
|
439
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
440
|
-
workers: parsed.workers,
|
|
441
|
-
providerBatching,
|
|
478
|
+
...base,
|
|
442
479
|
config: resolveOpenAIConfig(parsed, env)
|
|
443
480
|
};
|
|
444
481
|
case "openrouter":
|
|
445
482
|
return {
|
|
446
483
|
kind: "openrouter",
|
|
447
|
-
|
|
448
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
449
|
-
workers: parsed.workers,
|
|
450
|
-
providerBatching,
|
|
484
|
+
...base,
|
|
451
485
|
config: resolveOpenRouterConfig(parsed, env)
|
|
452
486
|
};
|
|
453
487
|
case "azure":
|
|
454
488
|
case "azure-openai":
|
|
455
489
|
return {
|
|
456
490
|
kind: "azure",
|
|
457
|
-
|
|
458
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
459
|
-
workers: parsed.workers,
|
|
460
|
-
providerBatching,
|
|
491
|
+
...base,
|
|
461
492
|
config: resolveAzureConfig(parsed, env)
|
|
462
493
|
};
|
|
463
494
|
case "anthropic":
|
|
464
495
|
return {
|
|
465
496
|
kind: "anthropic",
|
|
466
|
-
|
|
467
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
468
|
-
workers: parsed.workers,
|
|
469
|
-
providerBatching,
|
|
497
|
+
...base,
|
|
470
498
|
config: resolveAnthropicConfig(parsed, env)
|
|
471
499
|
};
|
|
472
500
|
case "gemini":
|
|
@@ -474,68 +502,47 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
474
502
|
case "google-gemini":
|
|
475
503
|
return {
|
|
476
504
|
kind: "gemini",
|
|
477
|
-
|
|
478
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
479
|
-
workers: parsed.workers,
|
|
480
|
-
providerBatching,
|
|
505
|
+
...base,
|
|
481
506
|
config: resolveGeminiConfig(parsed, env)
|
|
482
507
|
};
|
|
483
508
|
case "codex":
|
|
484
509
|
case "codex-cli":
|
|
485
510
|
return {
|
|
486
511
|
kind: "codex",
|
|
487
|
-
|
|
488
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
489
|
-
workers: parsed.workers,
|
|
490
|
-
providerBatching,
|
|
512
|
+
...base,
|
|
491
513
|
config: resolveCodexConfig(parsed, env, evalFilePath)
|
|
492
514
|
};
|
|
493
515
|
case "copilot-sdk":
|
|
494
516
|
case "copilot_sdk":
|
|
495
517
|
return {
|
|
496
518
|
kind: "copilot-sdk",
|
|
497
|
-
|
|
498
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
499
|
-
workers: parsed.workers,
|
|
500
|
-
providerBatching,
|
|
519
|
+
...base,
|
|
501
520
|
config: resolveCopilotSdkConfig(parsed, env, evalFilePath)
|
|
502
521
|
};
|
|
503
522
|
case "copilot":
|
|
504
523
|
case "copilot-cli":
|
|
505
524
|
return {
|
|
506
525
|
kind: "copilot-cli",
|
|
507
|
-
|
|
508
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
509
|
-
workers: parsed.workers,
|
|
510
|
-
providerBatching,
|
|
526
|
+
...base,
|
|
511
527
|
config: resolveCopilotCliConfig(parsed, env, evalFilePath)
|
|
512
528
|
};
|
|
513
529
|
case "copilot-log":
|
|
514
530
|
return {
|
|
515
531
|
kind: "copilot-log",
|
|
516
|
-
|
|
517
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
518
|
-
workers: parsed.workers,
|
|
519
|
-
providerBatching,
|
|
532
|
+
...base,
|
|
520
533
|
config: resolveCopilotLogConfig(parsed, env)
|
|
521
534
|
};
|
|
522
535
|
case "pi":
|
|
523
536
|
case "pi-coding-agent":
|
|
524
537
|
return {
|
|
525
538
|
kind: "pi-coding-agent",
|
|
526
|
-
|
|
527
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
528
|
-
workers: parsed.workers,
|
|
529
|
-
providerBatching,
|
|
539
|
+
...base,
|
|
530
540
|
config: resolvePiCodingAgentConfig(parsed, env, evalFilePath)
|
|
531
541
|
};
|
|
532
542
|
case "pi-cli":
|
|
533
543
|
return {
|
|
534
544
|
kind: "pi-cli",
|
|
535
|
-
|
|
536
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
537
|
-
workers: parsed.workers,
|
|
538
|
-
providerBatching,
|
|
545
|
+
...base,
|
|
539
546
|
config: resolvePiCliConfig(parsed, env, evalFilePath)
|
|
540
547
|
};
|
|
541
548
|
case "claude":
|
|
@@ -543,38 +550,26 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
543
550
|
case "claude-cli":
|
|
544
551
|
return {
|
|
545
552
|
kind: "claude-cli",
|
|
546
|
-
|
|
547
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
548
|
-
workers: parsed.workers,
|
|
549
|
-
providerBatching,
|
|
553
|
+
...base,
|
|
550
554
|
config: resolveClaudeConfig(parsed, env, evalFilePath)
|
|
551
555
|
};
|
|
552
556
|
case "claude-sdk":
|
|
553
557
|
return {
|
|
554
558
|
kind: "claude-sdk",
|
|
555
|
-
|
|
556
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
557
|
-
workers: parsed.workers,
|
|
558
|
-
providerBatching,
|
|
559
|
+
...base,
|
|
559
560
|
config: resolveClaudeConfig(parsed, env, evalFilePath)
|
|
560
561
|
};
|
|
561
562
|
case "mock":
|
|
562
563
|
return {
|
|
563
564
|
kind: "mock",
|
|
564
|
-
|
|
565
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
566
|
-
workers: parsed.workers,
|
|
567
|
-
providerBatching,
|
|
565
|
+
...base,
|
|
568
566
|
config: resolveMockConfig(parsed)
|
|
569
567
|
};
|
|
570
568
|
case "vscode":
|
|
571
569
|
case "vscode-insiders":
|
|
572
570
|
return {
|
|
573
571
|
kind: provider,
|
|
574
|
-
|
|
575
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
576
|
-
workers: parsed.workers,
|
|
577
|
-
providerBatching,
|
|
572
|
+
...base,
|
|
578
573
|
config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders", evalFilePath)
|
|
579
574
|
};
|
|
580
575
|
case "agentv": {
|
|
@@ -587,29 +582,21 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
587
582
|
const temperature = typeof parsed.temperature === "number" ? parsed.temperature : 0;
|
|
588
583
|
return {
|
|
589
584
|
kind: "agentv",
|
|
590
|
-
|
|
591
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
585
|
+
...base,
|
|
592
586
|
workers: typeof parsed.workers === "number" ? parsed.workers : void 0,
|
|
593
|
-
providerBatching,
|
|
594
587
|
config: { model, temperature }
|
|
595
588
|
};
|
|
596
589
|
}
|
|
597
590
|
case "cli":
|
|
598
591
|
return {
|
|
599
592
|
kind: "cli",
|
|
600
|
-
|
|
601
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
602
|
-
workers: parsed.workers,
|
|
603
|
-
providerBatching,
|
|
593
|
+
...base,
|
|
604
594
|
config: resolveCliConfig(parsed, env, evalFilePath)
|
|
605
595
|
};
|
|
606
596
|
default:
|
|
607
597
|
return {
|
|
608
598
|
kind: "cli",
|
|
609
|
-
|
|
610
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
611
|
-
workers: parsed.workers,
|
|
612
|
-
providerBatching,
|
|
599
|
+
...base,
|
|
613
600
|
config: resolveDiscoveredProviderConfig(parsed, provider, env, evalFilePath)
|
|
614
601
|
};
|
|
615
602
|
}
|
|
@@ -1482,6 +1469,84 @@ function resolveOptionalNumberArray(source, description) {
|
|
|
1482
1469
|
return resolved.length > 0 ? resolved : void 0;
|
|
1483
1470
|
}
|
|
1484
1471
|
|
|
1472
|
+
// src/evaluation/providers/types.ts
|
|
1473
|
+
var AGENT_PROVIDER_KINDS = [
|
|
1474
|
+
"codex",
|
|
1475
|
+
"copilot-sdk",
|
|
1476
|
+
"copilot-cli",
|
|
1477
|
+
"pi-coding-agent",
|
|
1478
|
+
"pi-cli",
|
|
1479
|
+
"claude",
|
|
1480
|
+
"claude-cli",
|
|
1481
|
+
"claude-sdk",
|
|
1482
|
+
"vscode",
|
|
1483
|
+
"vscode-insiders"
|
|
1484
|
+
];
|
|
1485
|
+
var KNOWN_PROVIDERS = [
|
|
1486
|
+
"openai",
|
|
1487
|
+
"openrouter",
|
|
1488
|
+
"azure",
|
|
1489
|
+
"anthropic",
|
|
1490
|
+
"gemini",
|
|
1491
|
+
"codex",
|
|
1492
|
+
"copilot-sdk",
|
|
1493
|
+
"copilot-cli",
|
|
1494
|
+
"copilot-log",
|
|
1495
|
+
"pi-coding-agent",
|
|
1496
|
+
"pi-cli",
|
|
1497
|
+
"claude",
|
|
1498
|
+
"claude-cli",
|
|
1499
|
+
"claude-sdk",
|
|
1500
|
+
"cli",
|
|
1501
|
+
"mock",
|
|
1502
|
+
"vscode",
|
|
1503
|
+
"vscode-insiders",
|
|
1504
|
+
"agentv"
|
|
1505
|
+
];
|
|
1506
|
+
var PROVIDER_ALIASES = [
|
|
1507
|
+
"azure-openai",
|
|
1508
|
+
// alias for "azure"
|
|
1509
|
+
"google",
|
|
1510
|
+
// alias for "gemini"
|
|
1511
|
+
"google-gemini",
|
|
1512
|
+
// alias for "gemini"
|
|
1513
|
+
"codex-cli",
|
|
1514
|
+
// alias for "codex"
|
|
1515
|
+
"copilot",
|
|
1516
|
+
// alias for "copilot-cli" (default copilot experience)
|
|
1517
|
+
"copilot_sdk",
|
|
1518
|
+
// alias for "copilot-sdk" (underscore variant)
|
|
1519
|
+
"pi",
|
|
1520
|
+
// alias for "pi-coding-agent"
|
|
1521
|
+
"claude-code",
|
|
1522
|
+
// alias for "claude" (legacy)
|
|
1523
|
+
"bedrock",
|
|
1524
|
+
// legacy/future support
|
|
1525
|
+
"vertex"
|
|
1526
|
+
// legacy/future support
|
|
1527
|
+
];
|
|
1528
|
+
function extractLastAssistantContent(messages) {
|
|
1529
|
+
if (!messages || messages.length === 0) {
|
|
1530
|
+
return "";
|
|
1531
|
+
}
|
|
1532
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
1533
|
+
const msg = messages[i];
|
|
1534
|
+
if (msg.role === "assistant" && msg.content !== void 0) {
|
|
1535
|
+
if (typeof msg.content === "string") {
|
|
1536
|
+
return msg.content;
|
|
1537
|
+
}
|
|
1538
|
+
if (isContentArray(msg.content)) {
|
|
1539
|
+
return getTextContent(msg.content);
|
|
1540
|
+
}
|
|
1541
|
+
return JSON.stringify(msg.content);
|
|
1542
|
+
}
|
|
1543
|
+
}
|
|
1544
|
+
return "";
|
|
1545
|
+
}
|
|
1546
|
+
function isAgentProvider(provider) {
|
|
1547
|
+
return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
|
|
1548
|
+
}
|
|
1549
|
+
|
|
1485
1550
|
// src/evaluation/interpolation.ts
|
|
1486
1551
|
var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
1487
1552
|
function interpolateEnv(value, env) {
|
|
@@ -1619,82 +1684,10 @@ async function expandFileReferences(tests, evalFileDir) {
|
|
|
1619
1684
|
return expanded;
|
|
1620
1685
|
}
|
|
1621
1686
|
|
|
1622
|
-
// src/evaluation/providers/types.ts
|
|
1623
|
-
var AGENT_PROVIDER_KINDS = [
|
|
1624
|
-
"codex",
|
|
1625
|
-
"copilot-sdk",
|
|
1626
|
-
"copilot-cli",
|
|
1627
|
-
"pi-coding-agent",
|
|
1628
|
-
"pi-cli",
|
|
1629
|
-
"claude",
|
|
1630
|
-
"claude-cli",
|
|
1631
|
-
"claude-sdk",
|
|
1632
|
-
"vscode",
|
|
1633
|
-
"vscode-insiders"
|
|
1634
|
-
];
|
|
1635
|
-
var KNOWN_PROVIDERS = [
|
|
1636
|
-
"openai",
|
|
1637
|
-
"openrouter",
|
|
1638
|
-
"azure",
|
|
1639
|
-
"anthropic",
|
|
1640
|
-
"gemini",
|
|
1641
|
-
"codex",
|
|
1642
|
-
"copilot-sdk",
|
|
1643
|
-
"copilot-cli",
|
|
1644
|
-
"copilot-log",
|
|
1645
|
-
"pi-coding-agent",
|
|
1646
|
-
"pi-cli",
|
|
1647
|
-
"claude",
|
|
1648
|
-
"claude-cli",
|
|
1649
|
-
"claude-sdk",
|
|
1650
|
-
"cli",
|
|
1651
|
-
"mock",
|
|
1652
|
-
"vscode",
|
|
1653
|
-
"vscode-insiders",
|
|
1654
|
-
"agentv"
|
|
1655
|
-
];
|
|
1656
|
-
var PROVIDER_ALIASES = [
|
|
1657
|
-
"azure-openai",
|
|
1658
|
-
// alias for "azure"
|
|
1659
|
-
"google",
|
|
1660
|
-
// alias for "gemini"
|
|
1661
|
-
"google-gemini",
|
|
1662
|
-
// alias for "gemini"
|
|
1663
|
-
"codex-cli",
|
|
1664
|
-
// alias for "codex"
|
|
1665
|
-
"copilot",
|
|
1666
|
-
// alias for "copilot-cli" (default copilot experience)
|
|
1667
|
-
"copilot_sdk",
|
|
1668
|
-
// alias for "copilot-sdk" (underscore variant)
|
|
1669
|
-
"pi",
|
|
1670
|
-
// alias for "pi-coding-agent"
|
|
1671
|
-
"claude-code",
|
|
1672
|
-
// alias for "claude" (legacy)
|
|
1673
|
-
"bedrock",
|
|
1674
|
-
// legacy/future support
|
|
1675
|
-
"vertex"
|
|
1676
|
-
// legacy/future support
|
|
1677
|
-
];
|
|
1678
|
-
function extractLastAssistantContent(messages) {
|
|
1679
|
-
if (!messages || messages.length === 0) {
|
|
1680
|
-
return "";
|
|
1681
|
-
}
|
|
1682
|
-
for (let i = messages.length - 1; i >= 0; i--) {
|
|
1683
|
-
const msg = messages[i];
|
|
1684
|
-
if (msg.role === "assistant" && msg.content !== void 0) {
|
|
1685
|
-
if (typeof msg.content === "string") {
|
|
1686
|
-
return msg.content;
|
|
1687
|
-
}
|
|
1688
|
-
return JSON.stringify(msg.content);
|
|
1689
|
-
}
|
|
1690
|
-
}
|
|
1691
|
-
return "";
|
|
1692
|
-
}
|
|
1693
|
-
function isAgentProvider(provider) {
|
|
1694
|
-
return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
|
|
1695
|
-
}
|
|
1696
|
-
|
|
1697
1687
|
export {
|
|
1688
|
+
isContent,
|
|
1689
|
+
isContentArray,
|
|
1690
|
+
getTextContent,
|
|
1698
1691
|
TEST_MESSAGE_ROLES,
|
|
1699
1692
|
isTestMessageRole,
|
|
1700
1693
|
isJsonObject,
|
|
@@ -1713,10 +1706,11 @@ export {
|
|
|
1713
1706
|
buildSearchRoots,
|
|
1714
1707
|
resolveFileReference,
|
|
1715
1708
|
CLI_PLACEHOLDERS,
|
|
1709
|
+
COMMON_TARGET_SETTINGS,
|
|
1716
1710
|
resolveTargetDefinition,
|
|
1717
1711
|
KNOWN_PROVIDERS,
|
|
1718
1712
|
PROVIDER_ALIASES,
|
|
1719
1713
|
extractLastAssistantContent,
|
|
1720
1714
|
isAgentProvider
|
|
1721
1715
|
};
|
|
1722
|
-
//# sourceMappingURL=chunk-
|
|
1716
|
+
//# sourceMappingURL=chunk-PXYYRDHH.js.map
|