agentv 3.14.6 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +59 -533
- package/dist/{chunk-CQRWNXVG.js → chunk-2W5JKKXC.js} +537 -727
- package/dist/chunk-2W5JKKXC.js.map +1 -0
- package/dist/{chunk-Y25VL7PX.js → chunk-4Z326WWF.js} +40 -17
- package/dist/chunk-4Z326WWF.js.map +1 -0
- package/dist/{chunk-ELQEFMGO.js → chunk-XEAW7OQT.js} +594 -296
- package/dist/chunk-XEAW7OQT.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-5EEXTTC3.js → dist-2JUUJ6PT.js} +18 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-5ESM5DWV.js → interactive-7ZYS6IOC.js} +4 -11
- package/dist/interactive-7ZYS6IOC.js.map +1 -0
- package/dist/studio/assets/index-CDGReinH.js +71 -0
- package/dist/studio/assets/index-DofvSOmX.js +11 -0
- package/dist/studio/assets/index-izxfmBKC.css +1 -0
- package/dist/studio/index.html +13 -0
- package/package.json +1 -1
- package/dist/chunk-CQRWNXVG.js.map +0 -1
- package/dist/chunk-ELQEFMGO.js.map +0 -1
- package/dist/chunk-Y25VL7PX.js.map +0 -1
- package/dist/interactive-5ESM5DWV.js.map +0 -1
- /package/dist/{dist-5EEXTTC3.js.map → dist-2JUUJ6PT.js.map} +0 -0
|
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
|
|
|
301
301
|
}
|
|
302
302
|
});
|
|
303
303
|
|
|
304
|
-
// ../../packages/core/dist/chunk-
|
|
304
|
+
// ../../packages/core/dist/chunk-PXYYRDHH.js
|
|
305
305
|
import { constants } from "node:fs";
|
|
306
306
|
import { access, readFile } from "node:fs/promises";
|
|
307
307
|
import path from "node:path";
|
|
@@ -419,11 +419,32 @@ __export(external_exports2, {
|
|
|
419
419
|
void: () => voidType
|
|
420
420
|
});
|
|
421
421
|
|
|
422
|
-
// ../../packages/core/dist/chunk-
|
|
422
|
+
// ../../packages/core/dist/chunk-PXYYRDHH.js
|
|
423
423
|
import { readFile as readFile2 } from "node:fs/promises";
|
|
424
424
|
import path3 from "node:path";
|
|
425
425
|
import fg from "fast-glob";
|
|
426
426
|
import { parse as parseYaml } from "yaml";
|
|
427
|
+
var CONTENT_TYPES = /* @__PURE__ */ new Set(["text", "image", "file"]);
|
|
428
|
+
function isContent(value) {
|
|
429
|
+
if (!value || typeof value !== "object") return false;
|
|
430
|
+
const v = value;
|
|
431
|
+
return typeof v.type === "string" && CONTENT_TYPES.has(v.type);
|
|
432
|
+
}
|
|
433
|
+
function isContentArray(value) {
|
|
434
|
+
return Array.isArray(value) && value.length > 0 && value.every(isContent);
|
|
435
|
+
}
|
|
436
|
+
function getTextContent(content) {
|
|
437
|
+
if (content == null) return "";
|
|
438
|
+
if (typeof content === "string") return content;
|
|
439
|
+
if (!Array.isArray(content)) return "";
|
|
440
|
+
const parts = [];
|
|
441
|
+
for (const block of content) {
|
|
442
|
+
if (block.type === "text") {
|
|
443
|
+
parts.push(block.text);
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
return parts.join("\n");
|
|
447
|
+
}
|
|
427
448
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
428
449
|
var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
|
|
429
450
|
var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
|
|
@@ -776,6 +797,12 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
|
|
|
776
797
|
"FILES",
|
|
777
798
|
"OUTPUT_FILE"
|
|
778
799
|
]);
|
|
800
|
+
var COMMON_TARGET_SETTINGS = [
|
|
801
|
+
"provider_batching",
|
|
802
|
+
"providerBatching",
|
|
803
|
+
"subagent_mode_allowed",
|
|
804
|
+
"subagentModeAllowed"
|
|
805
|
+
];
|
|
779
806
|
var BASE_TARGET_SCHEMA = external_exports2.object({
|
|
780
807
|
name: external_exports2.string().min(1, "target name is required"),
|
|
781
808
|
provider: external_exports2.string().min(1, "provider is required"),
|
|
@@ -784,7 +811,8 @@ var BASE_TARGET_SCHEMA = external_exports2.object({
|
|
|
784
811
|
// backward compat
|
|
785
812
|
workers: external_exports2.number().int().min(1).optional(),
|
|
786
813
|
workspace_template: external_exports2.string().optional(),
|
|
787
|
-
workspaceTemplate: external_exports2.string().optional()
|
|
814
|
+
workspaceTemplate: external_exports2.string().optional(),
|
|
815
|
+
subagent_mode_allowed: external_exports2.boolean().optional()
|
|
788
816
|
}).passthrough();
|
|
789
817
|
var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
|
|
790
818
|
var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
|
|
@@ -847,42 +875,40 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
847
875
|
const providerBatching = resolveOptionalBoolean(
|
|
848
876
|
parsed.provider_batching ?? parsed.providerBatching
|
|
849
877
|
);
|
|
878
|
+
const subagentModeAllowed = resolveOptionalBoolean(
|
|
879
|
+
parsed.subagent_mode_allowed ?? parsed.subagentModeAllowed
|
|
880
|
+
);
|
|
881
|
+
const base = {
|
|
882
|
+
name: parsed.name,
|
|
883
|
+
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
884
|
+
workers: parsed.workers,
|
|
885
|
+
providerBatching,
|
|
886
|
+
subagentModeAllowed
|
|
887
|
+
};
|
|
850
888
|
switch (provider) {
|
|
851
889
|
case "openai":
|
|
852
890
|
return {
|
|
853
891
|
kind: "openai",
|
|
854
|
-
|
|
855
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
856
|
-
workers: parsed.workers,
|
|
857
|
-
providerBatching,
|
|
892
|
+
...base,
|
|
858
893
|
config: resolveOpenAIConfig(parsed, env)
|
|
859
894
|
};
|
|
860
895
|
case "openrouter":
|
|
861
896
|
return {
|
|
862
897
|
kind: "openrouter",
|
|
863
|
-
|
|
864
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
865
|
-
workers: parsed.workers,
|
|
866
|
-
providerBatching,
|
|
898
|
+
...base,
|
|
867
899
|
config: resolveOpenRouterConfig(parsed, env)
|
|
868
900
|
};
|
|
869
901
|
case "azure":
|
|
870
902
|
case "azure-openai":
|
|
871
903
|
return {
|
|
872
904
|
kind: "azure",
|
|
873
|
-
|
|
874
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
875
|
-
workers: parsed.workers,
|
|
876
|
-
providerBatching,
|
|
905
|
+
...base,
|
|
877
906
|
config: resolveAzureConfig(parsed, env)
|
|
878
907
|
};
|
|
879
908
|
case "anthropic":
|
|
880
909
|
return {
|
|
881
910
|
kind: "anthropic",
|
|
882
|
-
|
|
883
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
884
|
-
workers: parsed.workers,
|
|
885
|
-
providerBatching,
|
|
911
|
+
...base,
|
|
886
912
|
config: resolveAnthropicConfig(parsed, env)
|
|
887
913
|
};
|
|
888
914
|
case "gemini":
|
|
@@ -890,68 +916,47 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
890
916
|
case "google-gemini":
|
|
891
917
|
return {
|
|
892
918
|
kind: "gemini",
|
|
893
|
-
|
|
894
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
895
|
-
workers: parsed.workers,
|
|
896
|
-
providerBatching,
|
|
919
|
+
...base,
|
|
897
920
|
config: resolveGeminiConfig(parsed, env)
|
|
898
921
|
};
|
|
899
922
|
case "codex":
|
|
900
923
|
case "codex-cli":
|
|
901
924
|
return {
|
|
902
925
|
kind: "codex",
|
|
903
|
-
|
|
904
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
905
|
-
workers: parsed.workers,
|
|
906
|
-
providerBatching,
|
|
926
|
+
...base,
|
|
907
927
|
config: resolveCodexConfig(parsed, env, evalFilePath)
|
|
908
928
|
};
|
|
909
929
|
case "copilot-sdk":
|
|
910
930
|
case "copilot_sdk":
|
|
911
931
|
return {
|
|
912
932
|
kind: "copilot-sdk",
|
|
913
|
-
|
|
914
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
915
|
-
workers: parsed.workers,
|
|
916
|
-
providerBatching,
|
|
933
|
+
...base,
|
|
917
934
|
config: resolveCopilotSdkConfig(parsed, env, evalFilePath)
|
|
918
935
|
};
|
|
919
936
|
case "copilot":
|
|
920
937
|
case "copilot-cli":
|
|
921
938
|
return {
|
|
922
939
|
kind: "copilot-cli",
|
|
923
|
-
|
|
924
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
925
|
-
workers: parsed.workers,
|
|
926
|
-
providerBatching,
|
|
940
|
+
...base,
|
|
927
941
|
config: resolveCopilotCliConfig(parsed, env, evalFilePath)
|
|
928
942
|
};
|
|
929
943
|
case "copilot-log":
|
|
930
944
|
return {
|
|
931
945
|
kind: "copilot-log",
|
|
932
|
-
|
|
933
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
934
|
-
workers: parsed.workers,
|
|
935
|
-
providerBatching,
|
|
946
|
+
...base,
|
|
936
947
|
config: resolveCopilotLogConfig(parsed, env)
|
|
937
948
|
};
|
|
938
949
|
case "pi":
|
|
939
950
|
case "pi-coding-agent":
|
|
940
951
|
return {
|
|
941
952
|
kind: "pi-coding-agent",
|
|
942
|
-
|
|
943
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
944
|
-
workers: parsed.workers,
|
|
945
|
-
providerBatching,
|
|
953
|
+
...base,
|
|
946
954
|
config: resolvePiCodingAgentConfig(parsed, env, evalFilePath)
|
|
947
955
|
};
|
|
948
956
|
case "pi-cli":
|
|
949
957
|
return {
|
|
950
958
|
kind: "pi-cli",
|
|
951
|
-
|
|
952
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
953
|
-
workers: parsed.workers,
|
|
954
|
-
providerBatching,
|
|
959
|
+
...base,
|
|
955
960
|
config: resolvePiCliConfig(parsed, env, evalFilePath)
|
|
956
961
|
};
|
|
957
962
|
case "claude":
|
|
@@ -959,38 +964,26 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
959
964
|
case "claude-cli":
|
|
960
965
|
return {
|
|
961
966
|
kind: "claude-cli",
|
|
962
|
-
|
|
963
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
964
|
-
workers: parsed.workers,
|
|
965
|
-
providerBatching,
|
|
967
|
+
...base,
|
|
966
968
|
config: resolveClaudeConfig(parsed, env, evalFilePath)
|
|
967
969
|
};
|
|
968
970
|
case "claude-sdk":
|
|
969
971
|
return {
|
|
970
972
|
kind: "claude-sdk",
|
|
971
|
-
|
|
972
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
973
|
-
workers: parsed.workers,
|
|
974
|
-
providerBatching,
|
|
973
|
+
...base,
|
|
975
974
|
config: resolveClaudeConfig(parsed, env, evalFilePath)
|
|
976
975
|
};
|
|
977
976
|
case "mock":
|
|
978
977
|
return {
|
|
979
978
|
kind: "mock",
|
|
980
|
-
|
|
981
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
982
|
-
workers: parsed.workers,
|
|
983
|
-
providerBatching,
|
|
979
|
+
...base,
|
|
984
980
|
config: resolveMockConfig(parsed)
|
|
985
981
|
};
|
|
986
982
|
case "vscode":
|
|
987
983
|
case "vscode-insiders":
|
|
988
984
|
return {
|
|
989
985
|
kind: provider,
|
|
990
|
-
|
|
991
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
992
|
-
workers: parsed.workers,
|
|
993
|
-
providerBatching,
|
|
986
|
+
...base,
|
|
994
987
|
config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders", evalFilePath)
|
|
995
988
|
};
|
|
996
989
|
case "agentv": {
|
|
@@ -1003,29 +996,21 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
1003
996
|
const temperature = typeof parsed.temperature === "number" ? parsed.temperature : 0;
|
|
1004
997
|
return {
|
|
1005
998
|
kind: "agentv",
|
|
1006
|
-
|
|
1007
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
999
|
+
...base,
|
|
1008
1000
|
workers: typeof parsed.workers === "number" ? parsed.workers : void 0,
|
|
1009
|
-
providerBatching,
|
|
1010
1001
|
config: { model, temperature }
|
|
1011
1002
|
};
|
|
1012
1003
|
}
|
|
1013
1004
|
case "cli":
|
|
1014
1005
|
return {
|
|
1015
1006
|
kind: "cli",
|
|
1016
|
-
|
|
1017
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
1018
|
-
workers: parsed.workers,
|
|
1019
|
-
providerBatching,
|
|
1007
|
+
...base,
|
|
1020
1008
|
config: resolveCliConfig(parsed, env, evalFilePath)
|
|
1021
1009
|
};
|
|
1022
1010
|
default:
|
|
1023
1011
|
return {
|
|
1024
1012
|
kind: "cli",
|
|
1025
|
-
|
|
1026
|
-
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
1027
|
-
workers: parsed.workers,
|
|
1028
|
-
providerBatching,
|
|
1013
|
+
...base,
|
|
1029
1014
|
config: resolveDiscoveredProviderConfig(parsed, provider, env, evalFilePath)
|
|
1030
1015
|
};
|
|
1031
1016
|
}
|
|
@@ -1653,8 +1638,8 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
1653
1638
|
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
1654
1639
|
if (!parseResult.success) {
|
|
1655
1640
|
const firstError = parseResult.error.errors[0];
|
|
1656
|
-
const
|
|
1657
|
-
const prefix =
|
|
1641
|
+
const path48 = firstError?.path.join(".") || "";
|
|
1642
|
+
const prefix = path48 ? `${target.name} ${path48}: ` : `${target.name}: `;
|
|
1658
1643
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
1659
1644
|
}
|
|
1660
1645
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
@@ -1897,6 +1882,82 @@ function resolveOptionalNumberArray(source, description) {
|
|
|
1897
1882
|
}
|
|
1898
1883
|
return resolved.length > 0 ? resolved : void 0;
|
|
1899
1884
|
}
|
|
1885
|
+
var AGENT_PROVIDER_KINDS = [
|
|
1886
|
+
"codex",
|
|
1887
|
+
"copilot-sdk",
|
|
1888
|
+
"copilot-cli",
|
|
1889
|
+
"pi-coding-agent",
|
|
1890
|
+
"pi-cli",
|
|
1891
|
+
"claude",
|
|
1892
|
+
"claude-cli",
|
|
1893
|
+
"claude-sdk",
|
|
1894
|
+
"vscode",
|
|
1895
|
+
"vscode-insiders"
|
|
1896
|
+
];
|
|
1897
|
+
var KNOWN_PROVIDERS = [
|
|
1898
|
+
"openai",
|
|
1899
|
+
"openrouter",
|
|
1900
|
+
"azure",
|
|
1901
|
+
"anthropic",
|
|
1902
|
+
"gemini",
|
|
1903
|
+
"codex",
|
|
1904
|
+
"copilot-sdk",
|
|
1905
|
+
"copilot-cli",
|
|
1906
|
+
"copilot-log",
|
|
1907
|
+
"pi-coding-agent",
|
|
1908
|
+
"pi-cli",
|
|
1909
|
+
"claude",
|
|
1910
|
+
"claude-cli",
|
|
1911
|
+
"claude-sdk",
|
|
1912
|
+
"cli",
|
|
1913
|
+
"mock",
|
|
1914
|
+
"vscode",
|
|
1915
|
+
"vscode-insiders",
|
|
1916
|
+
"agentv"
|
|
1917
|
+
];
|
|
1918
|
+
var PROVIDER_ALIASES = [
|
|
1919
|
+
"azure-openai",
|
|
1920
|
+
// alias for "azure"
|
|
1921
|
+
"google",
|
|
1922
|
+
// alias for "gemini"
|
|
1923
|
+
"google-gemini",
|
|
1924
|
+
// alias for "gemini"
|
|
1925
|
+
"codex-cli",
|
|
1926
|
+
// alias for "codex"
|
|
1927
|
+
"copilot",
|
|
1928
|
+
// alias for "copilot-cli" (default copilot experience)
|
|
1929
|
+
"copilot_sdk",
|
|
1930
|
+
// alias for "copilot-sdk" (underscore variant)
|
|
1931
|
+
"pi",
|
|
1932
|
+
// alias for "pi-coding-agent"
|
|
1933
|
+
"claude-code",
|
|
1934
|
+
// alias for "claude" (legacy)
|
|
1935
|
+
"bedrock",
|
|
1936
|
+
// legacy/future support
|
|
1937
|
+
"vertex"
|
|
1938
|
+
// legacy/future support
|
|
1939
|
+
];
|
|
1940
|
+
function extractLastAssistantContent(messages) {
|
|
1941
|
+
if (!messages || messages.length === 0) {
|
|
1942
|
+
return "";
|
|
1943
|
+
}
|
|
1944
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
1945
|
+
const msg = messages[i];
|
|
1946
|
+
if (msg.role === "assistant" && msg.content !== void 0) {
|
|
1947
|
+
if (typeof msg.content === "string") {
|
|
1948
|
+
return msg.content;
|
|
1949
|
+
}
|
|
1950
|
+
if (isContentArray(msg.content)) {
|
|
1951
|
+
return getTextContent(msg.content);
|
|
1952
|
+
}
|
|
1953
|
+
return JSON.stringify(msg.content);
|
|
1954
|
+
}
|
|
1955
|
+
}
|
|
1956
|
+
return "";
|
|
1957
|
+
}
|
|
1958
|
+
function isAgentProvider(provider) {
|
|
1959
|
+
return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
|
|
1960
|
+
}
|
|
1900
1961
|
var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
1901
1962
|
function interpolateEnv(value, env) {
|
|
1902
1963
|
if (typeof value === "string") {
|
|
@@ -2026,79 +2087,6 @@ async function expandFileReferences(tests, evalFileDir) {
|
|
|
2026
2087
|
}
|
|
2027
2088
|
return expanded;
|
|
2028
2089
|
}
|
|
2029
|
-
var AGENT_PROVIDER_KINDS = [
|
|
2030
|
-
"codex",
|
|
2031
|
-
"copilot-sdk",
|
|
2032
|
-
"copilot-cli",
|
|
2033
|
-
"pi-coding-agent",
|
|
2034
|
-
"pi-cli",
|
|
2035
|
-
"claude",
|
|
2036
|
-
"claude-cli",
|
|
2037
|
-
"claude-sdk",
|
|
2038
|
-
"vscode",
|
|
2039
|
-
"vscode-insiders"
|
|
2040
|
-
];
|
|
2041
|
-
var KNOWN_PROVIDERS = [
|
|
2042
|
-
"openai",
|
|
2043
|
-
"openrouter",
|
|
2044
|
-
"azure",
|
|
2045
|
-
"anthropic",
|
|
2046
|
-
"gemini",
|
|
2047
|
-
"codex",
|
|
2048
|
-
"copilot-sdk",
|
|
2049
|
-
"copilot-cli",
|
|
2050
|
-
"copilot-log",
|
|
2051
|
-
"pi-coding-agent",
|
|
2052
|
-
"pi-cli",
|
|
2053
|
-
"claude",
|
|
2054
|
-
"claude-cli",
|
|
2055
|
-
"claude-sdk",
|
|
2056
|
-
"cli",
|
|
2057
|
-
"mock",
|
|
2058
|
-
"vscode",
|
|
2059
|
-
"vscode-insiders",
|
|
2060
|
-
"agentv"
|
|
2061
|
-
];
|
|
2062
|
-
var PROVIDER_ALIASES = [
|
|
2063
|
-
"azure-openai",
|
|
2064
|
-
// alias for "azure"
|
|
2065
|
-
"google",
|
|
2066
|
-
// alias for "gemini"
|
|
2067
|
-
"google-gemini",
|
|
2068
|
-
// alias for "gemini"
|
|
2069
|
-
"codex-cli",
|
|
2070
|
-
// alias for "codex"
|
|
2071
|
-
"copilot",
|
|
2072
|
-
// alias for "copilot-cli" (default copilot experience)
|
|
2073
|
-
"copilot_sdk",
|
|
2074
|
-
// alias for "copilot-sdk" (underscore variant)
|
|
2075
|
-
"pi",
|
|
2076
|
-
// alias for "pi-coding-agent"
|
|
2077
|
-
"claude-code",
|
|
2078
|
-
// alias for "claude" (legacy)
|
|
2079
|
-
"bedrock",
|
|
2080
|
-
// legacy/future support
|
|
2081
|
-
"vertex"
|
|
2082
|
-
// legacy/future support
|
|
2083
|
-
];
|
|
2084
|
-
function extractLastAssistantContent(messages) {
|
|
2085
|
-
if (!messages || messages.length === 0) {
|
|
2086
|
-
return "";
|
|
2087
|
-
}
|
|
2088
|
-
for (let i = messages.length - 1; i >= 0; i--) {
|
|
2089
|
-
const msg = messages[i];
|
|
2090
|
-
if (msg.role === "assistant" && msg.content !== void 0) {
|
|
2091
|
-
if (typeof msg.content === "string") {
|
|
2092
|
-
return msg.content;
|
|
2093
|
-
}
|
|
2094
|
-
return JSON.stringify(msg.content);
|
|
2095
|
-
}
|
|
2096
|
-
}
|
|
2097
|
-
return "";
|
|
2098
|
-
}
|
|
2099
|
-
function isAgentProvider(provider) {
|
|
2100
|
-
return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
|
|
2101
|
-
}
|
|
2102
2090
|
|
|
2103
2091
|
// ../../packages/core/dist/index.js
|
|
2104
2092
|
import { readFile as readFile6 } from "node:fs/promises";
|
|
@@ -6734,7 +6722,7 @@ function createOpenRouter(options = {}) {
|
|
|
6734
6722
|
);
|
|
6735
6723
|
const createChatModel = (modelId, settings = {}) => new OpenRouterChatLanguageModel(modelId, settings, {
|
|
6736
6724
|
provider: "openrouter.chat",
|
|
6737
|
-
url: ({ path:
|
|
6725
|
+
url: ({ path: path48 }) => `${baseURL}${path48}`,
|
|
6738
6726
|
headers: getHeaders,
|
|
6739
6727
|
compatibility,
|
|
6740
6728
|
fetch: options.fetch,
|
|
@@ -6742,7 +6730,7 @@ function createOpenRouter(options = {}) {
|
|
|
6742
6730
|
});
|
|
6743
6731
|
const createCompletionModel = (modelId, settings = {}) => new OpenRouterCompletionLanguageModel(modelId, settings, {
|
|
6744
6732
|
provider: "openrouter.completion",
|
|
6745
|
-
url: ({ path:
|
|
6733
|
+
url: ({ path: path48 }) => `${baseURL}${path48}`,
|
|
6746
6734
|
headers: getHeaders,
|
|
6747
6735
|
compatibility,
|
|
6748
6736
|
fetch: options.fetch,
|
|
@@ -6750,14 +6738,14 @@ function createOpenRouter(options = {}) {
|
|
|
6750
6738
|
});
|
|
6751
6739
|
const createEmbeddingModel = (modelId, settings = {}) => new OpenRouterEmbeddingModel(modelId, settings, {
|
|
6752
6740
|
provider: "openrouter.embedding",
|
|
6753
|
-
url: ({ path:
|
|
6741
|
+
url: ({ path: path48 }) => `${baseURL}${path48}`,
|
|
6754
6742
|
headers: getHeaders,
|
|
6755
6743
|
fetch: options.fetch,
|
|
6756
6744
|
extraBody: options.extraBody
|
|
6757
6745
|
});
|
|
6758
6746
|
const createImageModel = (modelId, settings = {}) => new OpenRouterImageModel(modelId, settings, {
|
|
6759
6747
|
provider: "openrouter.image",
|
|
6760
|
-
url: ({ path:
|
|
6748
|
+
url: ({ path: path48 }) => `${baseURL}${path48}`,
|
|
6761
6749
|
headers: getHeaders,
|
|
6762
6750
|
fetch: options.fetch,
|
|
6763
6751
|
extraBody: options.extraBody
|
|
@@ -14350,6 +14338,7 @@ import { existsSync as existsSync4 } from "node:fs";
|
|
|
14350
14338
|
import path45 from "node:path";
|
|
14351
14339
|
import { mkdir as mkdir15, readFile as readFile13, writeFile as writeFile8 } from "node:fs/promises";
|
|
14352
14340
|
import path46 from "node:path";
|
|
14341
|
+
import path47 from "node:path";
|
|
14353
14342
|
function computeTraceSummary(messages) {
|
|
14354
14343
|
const toolCallCounts = {};
|
|
14355
14344
|
const toolDurations = {};
|
|
@@ -14979,15 +14968,23 @@ var TEMPLATE_VARIABLES = {
|
|
|
14979
14968
|
INPUT: "input",
|
|
14980
14969
|
OUTPUT: "output",
|
|
14981
14970
|
FILE_CHANGES: "file_changes",
|
|
14971
|
+
/** @deprecated Use INPUT instead — resolves to the same text value. */
|
|
14982
14972
|
INPUT_TEXT: "input_text",
|
|
14973
|
+
/** @deprecated Use OUTPUT instead — resolves to the same text value. */
|
|
14983
14974
|
OUTPUT_TEXT: "output_text",
|
|
14975
|
+
/** @deprecated Use EXPECTED_OUTPUT instead — resolves to the same text value. */
|
|
14984
14976
|
EXPECTED_OUTPUT_TEXT: "expected_output_text"
|
|
14985
14977
|
};
|
|
14986
14978
|
var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
|
|
14987
14979
|
var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
|
|
14988
|
-
TEMPLATE_VARIABLES.
|
|
14980
|
+
TEMPLATE_VARIABLES.OUTPUT,
|
|
14989
14981
|
TEMPLATE_VARIABLES.EXPECTED_OUTPUT
|
|
14990
14982
|
]);
|
|
14983
|
+
var DEPRECATED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Map([
|
|
14984
|
+
[TEMPLATE_VARIABLES.INPUT_TEXT, TEMPLATE_VARIABLES.INPUT],
|
|
14985
|
+
[TEMPLATE_VARIABLES.OUTPUT_TEXT, TEMPLATE_VARIABLES.OUTPUT],
|
|
14986
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT, TEMPLATE_VARIABLES.EXPECTED_OUTPUT]
|
|
14987
|
+
]);
|
|
14991
14988
|
var ANSI_YELLOW22 = "\x1B[33m";
|
|
14992
14989
|
var ANSI_RESET3 = "\x1B[0m";
|
|
14993
14990
|
async function validateCustomPromptContent(promptPath) {
|
|
@@ -15007,16 +15004,29 @@ function validateTemplateVariables(content, source) {
|
|
|
15007
15004
|
}
|
|
15008
15005
|
match = variablePattern.exec(content);
|
|
15009
15006
|
}
|
|
15010
|
-
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.OUTPUT_TEXT);
|
|
15011
|
-
const hasExpectedOutput = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_OUTPUT);
|
|
15007
|
+
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.OUTPUT) || foundVariables.has(TEMPLATE_VARIABLES.OUTPUT_TEXT);
|
|
15008
|
+
const hasExpectedOutput = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_OUTPUT) || foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT);
|
|
15012
15009
|
const hasRequiredFields = hasCandidateAnswer || hasExpectedOutput;
|
|
15013
15010
|
if (!hasRequiredFields) {
|
|
15014
15011
|
throw new Error(
|
|
15015
15012
|
`Missing required fields. Must include at least one of:
|
|
15016
|
-
- {{ ${TEMPLATE_VARIABLES.
|
|
15013
|
+
- {{ ${TEMPLATE_VARIABLES.OUTPUT} }}
|
|
15017
15014
|
- {{ ${TEMPLATE_VARIABLES.EXPECTED_OUTPUT} }}`
|
|
15018
15015
|
);
|
|
15019
15016
|
}
|
|
15017
|
+
const deprecatedUsed = [];
|
|
15018
|
+
for (const [deprecated, replacement] of DEPRECATED_TEMPLATE_VARIABLES) {
|
|
15019
|
+
if (foundVariables.has(deprecated)) {
|
|
15020
|
+
deprecatedUsed.push(`{{ ${deprecated} }} \u2192 {{ ${replacement} }}`);
|
|
15021
|
+
}
|
|
15022
|
+
}
|
|
15023
|
+
if (deprecatedUsed.length > 0) {
|
|
15024
|
+
console.warn(
|
|
15025
|
+
`${ANSI_YELLOW22}Warning: Template at ${source} uses deprecated variable names:
|
|
15026
|
+
${deprecatedUsed.join("\n ")}
|
|
15027
|
+
These still work but will be removed in a future version.${ANSI_RESET3}`
|
|
15028
|
+
);
|
|
15029
|
+
}
|
|
15020
15030
|
if (invalidVariables.length > 0) {
|
|
15021
15031
|
const warningMessage = `${ANSI_YELLOW22}Warning: Custom evaluator template at ${source}
|
|
15022
15032
|
Contains invalid variables: ${invalidVariables.map((v) => `{{ ${v} }}`).join(", ")}
|
|
@@ -16418,6 +16428,19 @@ function hasVisibleContent(segments) {
|
|
|
16418
16428
|
function asString2(value) {
|
|
16419
16429
|
return typeof value === "string" ? value : void 0;
|
|
16420
16430
|
}
|
|
16431
|
+
var IMAGE_MEDIA_TYPES = {
|
|
16432
|
+
".png": "image/png",
|
|
16433
|
+
".jpg": "image/jpeg",
|
|
16434
|
+
".jpeg": "image/jpeg",
|
|
16435
|
+
".gif": "image/gif",
|
|
16436
|
+
".webp": "image/webp",
|
|
16437
|
+
".svg": "image/svg+xml",
|
|
16438
|
+
".bmp": "image/bmp"
|
|
16439
|
+
};
|
|
16440
|
+
function detectImageMediaType(filePath) {
|
|
16441
|
+
const ext = path5.extname(filePath).toLowerCase();
|
|
16442
|
+
return IMAGE_MEDIA_TYPES[ext];
|
|
16443
|
+
}
|
|
16421
16444
|
var ANSI_YELLOW4 = "\x1B[33m";
|
|
16422
16445
|
var ANSI_RESET5 = "\x1B[0m";
|
|
16423
16446
|
async function processMessages(options) {
|
|
@@ -16483,6 +16506,47 @@ async function processMessages(options) {
|
|
|
16483
16506
|
}
|
|
16484
16507
|
continue;
|
|
16485
16508
|
}
|
|
16509
|
+
if (segmentType === "image") {
|
|
16510
|
+
const rawValue = asString3(rawSegment.value);
|
|
16511
|
+
if (!rawValue) {
|
|
16512
|
+
continue;
|
|
16513
|
+
}
|
|
16514
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference22(
|
|
16515
|
+
rawValue,
|
|
16516
|
+
searchRoots
|
|
16517
|
+
);
|
|
16518
|
+
if (!resolvedPath) {
|
|
16519
|
+
const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
|
|
16520
|
+
const context2 = messageType === "input" ? "" : " in expected_output";
|
|
16521
|
+
logWarning3(`Image file not found${context2}: ${displayPath}`, attempts);
|
|
16522
|
+
continue;
|
|
16523
|
+
}
|
|
16524
|
+
const mediaType = detectImageMediaType(resolvedPath);
|
|
16525
|
+
if (!mediaType) {
|
|
16526
|
+
logWarning3(
|
|
16527
|
+
`Unsupported image extension for ${displayPath}. Supported: ${Object.keys(IMAGE_MEDIA_TYPES).join(", ")}`
|
|
16528
|
+
);
|
|
16529
|
+
continue;
|
|
16530
|
+
}
|
|
16531
|
+
try {
|
|
16532
|
+
const imageBuffer = await readFile4(resolvedPath);
|
|
16533
|
+
const base64 = imageBuffer.toString("base64");
|
|
16534
|
+
processedContent.push({
|
|
16535
|
+
type: "image",
|
|
16536
|
+
media_type: mediaType,
|
|
16537
|
+
source: `data:${mediaType};base64,${base64}`
|
|
16538
|
+
});
|
|
16539
|
+
if (verbose) {
|
|
16540
|
+
const label = messageType === "input" ? "[Image]" : "[Expected Output Image]";
|
|
16541
|
+
console.log(` ${label} Found: ${displayPath}`);
|
|
16542
|
+
console.log(` Resolved to: ${resolvedPath} (${mediaType})`);
|
|
16543
|
+
}
|
|
16544
|
+
} catch (error) {
|
|
16545
|
+
const context2 = messageType === "input" ? "" : " expected output";
|
|
16546
|
+
logWarning3(`Could not read${context2} image ${resolvedPath}: ${error.message}`);
|
|
16547
|
+
}
|
|
16548
|
+
continue;
|
|
16549
|
+
}
|
|
16486
16550
|
const clonedSegment = cloneJsonObject(rawSegment);
|
|
16487
16551
|
processedContent.push(clonedSegment);
|
|
16488
16552
|
const inlineValue = clonedSegment.value;
|
|
@@ -16560,6 +16624,46 @@ async function processExpectedMessages(options) {
|
|
|
16560
16624
|
}
|
|
16561
16625
|
continue;
|
|
16562
16626
|
}
|
|
16627
|
+
if (segmentType === "image") {
|
|
16628
|
+
const rawValue = asString3(rawSegment.value);
|
|
16629
|
+
if (!rawValue) {
|
|
16630
|
+
continue;
|
|
16631
|
+
}
|
|
16632
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference22(
|
|
16633
|
+
rawValue,
|
|
16634
|
+
searchRoots
|
|
16635
|
+
);
|
|
16636
|
+
if (!resolvedPath) {
|
|
16637
|
+
const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
|
|
16638
|
+
logWarning3(`Image file not found in expected_output: ${displayPath}`, attempts);
|
|
16639
|
+
continue;
|
|
16640
|
+
}
|
|
16641
|
+
const mediaType = detectImageMediaType(resolvedPath);
|
|
16642
|
+
if (!mediaType) {
|
|
16643
|
+
logWarning3(
|
|
16644
|
+
`Unsupported image extension for ${displayPath}. Supported: ${Object.keys(IMAGE_MEDIA_TYPES).join(", ")}`
|
|
16645
|
+
);
|
|
16646
|
+
continue;
|
|
16647
|
+
}
|
|
16648
|
+
try {
|
|
16649
|
+
const imageBuffer = await readFile4(resolvedPath);
|
|
16650
|
+
const base64 = imageBuffer.toString("base64");
|
|
16651
|
+
processedContent.push({
|
|
16652
|
+
type: "image",
|
|
16653
|
+
media_type: mediaType,
|
|
16654
|
+
source: `data:${mediaType};base64,${base64}`
|
|
16655
|
+
});
|
|
16656
|
+
if (verbose) {
|
|
16657
|
+
console.log(` [Expected Output Image] Found: ${displayPath}`);
|
|
16658
|
+
console.log(` Resolved to: ${resolvedPath} (${mediaType})`);
|
|
16659
|
+
}
|
|
16660
|
+
} catch (error) {
|
|
16661
|
+
logWarning3(
|
|
16662
|
+
`Could not read expected output image ${resolvedPath}: ${error.message}`
|
|
16663
|
+
);
|
|
16664
|
+
}
|
|
16665
|
+
continue;
|
|
16666
|
+
}
|
|
16563
16667
|
processedContent.push(cloneJsonObject(rawSegment));
|
|
16564
16668
|
}
|
|
16565
16669
|
segment.content = processedContent;
|
|
@@ -16802,7 +16906,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
16802
16906
|
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
16803
16907
|
const testCase = {
|
|
16804
16908
|
id,
|
|
16805
|
-
|
|
16909
|
+
dataset: evalSetName,
|
|
16806
16910
|
conversation_id: conversationId,
|
|
16807
16911
|
question,
|
|
16808
16912
|
input: inputMessages,
|
|
@@ -17066,7 +17170,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
17066
17170
|
}
|
|
17067
17171
|
const suite = interpolated;
|
|
17068
17172
|
const evalSetNameFromSuite = asString5(suite.name)?.trim();
|
|
17069
|
-
const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
17173
|
+
const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
|
|
17070
17174
|
const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
|
|
17071
17175
|
const rawTestcases = resolveTests(suite);
|
|
17072
17176
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
@@ -17187,7 +17291,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
17187
17291
|
const caseTargets = extractTargetsFromTestCase(evalcase);
|
|
17188
17292
|
const testCase = {
|
|
17189
17293
|
id,
|
|
17190
|
-
|
|
17294
|
+
dataset: evalSetName,
|
|
17295
|
+
category: options?.category,
|
|
17191
17296
|
conversation_id: conversationId,
|
|
17192
17297
|
question,
|
|
17193
17298
|
input: inputMessages,
|
|
@@ -18090,6 +18195,47 @@ async function withRetry(fn, retryConfig, signal) {
|
|
|
18090
18195
|
}
|
|
18091
18196
|
throw lastError;
|
|
18092
18197
|
}
|
|
18198
|
+
function toContentArray(content) {
|
|
18199
|
+
if (!Array.isArray(content)) return void 0;
|
|
18200
|
+
let hasNonText = false;
|
|
18201
|
+
const blocks = [];
|
|
18202
|
+
for (const part of content) {
|
|
18203
|
+
if (!part || typeof part !== "object") continue;
|
|
18204
|
+
const p = part;
|
|
18205
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
18206
|
+
blocks.push({ type: "text", text: p.text });
|
|
18207
|
+
} else if (p.type === "image" && typeof p.source === "object" && p.source !== null) {
|
|
18208
|
+
const src = p.source;
|
|
18209
|
+
const mediaType = typeof p.media_type === "string" ? p.media_type : typeof src.media_type === "string" ? src.media_type : "application/octet-stream";
|
|
18210
|
+
const data = typeof src.data === "string" && src.data !== "" ? `data:${mediaType};base64,${src.data}` : typeof p.url === "string" && p.url !== "" ? p.url : "";
|
|
18211
|
+
if (!data) continue;
|
|
18212
|
+
blocks.push({ type: "image", media_type: mediaType, source: data });
|
|
18213
|
+
hasNonText = true;
|
|
18214
|
+
} else if (p.type === "tool_use") {
|
|
18215
|
+
} else if (p.type === "tool_result") {
|
|
18216
|
+
}
|
|
18217
|
+
}
|
|
18218
|
+
return hasNonText && blocks.length > 0 ? blocks : void 0;
|
|
18219
|
+
}
|
|
18220
|
+
function extractTextContent2(content) {
|
|
18221
|
+
if (typeof content === "string") {
|
|
18222
|
+
return content;
|
|
18223
|
+
}
|
|
18224
|
+
if (!Array.isArray(content)) {
|
|
18225
|
+
return void 0;
|
|
18226
|
+
}
|
|
18227
|
+
const textParts = [];
|
|
18228
|
+
for (const part of content) {
|
|
18229
|
+
if (!part || typeof part !== "object") {
|
|
18230
|
+
continue;
|
|
18231
|
+
}
|
|
18232
|
+
const p = part;
|
|
18233
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
18234
|
+
textParts.push(p.text);
|
|
18235
|
+
}
|
|
18236
|
+
}
|
|
18237
|
+
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
18238
|
+
}
|
|
18093
18239
|
var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeLogs");
|
|
18094
18240
|
var GLOBAL_SUBSCRIBERS_KEY = Symbol.for("agentv.claudeLogSubscribers");
|
|
18095
18241
|
function getClaudeLogStore() {
|
|
@@ -18249,11 +18395,12 @@ var ClaudeCliProvider = class {
|
|
|
18249
18395
|
if (betaMessage && typeof betaMessage === "object") {
|
|
18250
18396
|
const msg = betaMessage;
|
|
18251
18397
|
const content = msg.content;
|
|
18398
|
+
const structuredContent = toContentArray(content);
|
|
18252
18399
|
const textContent = extractTextContent2(content);
|
|
18253
18400
|
const toolCalls = extractToolCalls(content);
|
|
18254
18401
|
const outputMsg = {
|
|
18255
18402
|
role: "assistant",
|
|
18256
|
-
content: textContent,
|
|
18403
|
+
content: structuredContent ?? textContent,
|
|
18257
18404
|
toolCalls: toolCalls.length > 0 ? toolCalls : void 0
|
|
18258
18405
|
};
|
|
18259
18406
|
output.push(outputMsg);
|
|
@@ -18592,25 +18739,6 @@ function summarizeEvent(event) {
|
|
|
18592
18739
|
return void 0;
|
|
18593
18740
|
}
|
|
18594
18741
|
}
|
|
18595
|
-
function extractTextContent2(content) {
|
|
18596
|
-
if (typeof content === "string") {
|
|
18597
|
-
return content;
|
|
18598
|
-
}
|
|
18599
|
-
if (!Array.isArray(content)) {
|
|
18600
|
-
return void 0;
|
|
18601
|
-
}
|
|
18602
|
-
const textParts = [];
|
|
18603
|
-
for (const part of content) {
|
|
18604
|
-
if (!part || typeof part !== "object") {
|
|
18605
|
-
continue;
|
|
18606
|
-
}
|
|
18607
|
-
const p = part;
|
|
18608
|
-
if (p.type === "text" && typeof p.text === "string") {
|
|
18609
|
-
textParts.push(p.text);
|
|
18610
|
-
}
|
|
18611
|
-
}
|
|
18612
|
-
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
18613
|
-
}
|
|
18614
18742
|
function extractToolCalls(content) {
|
|
18615
18743
|
if (!Array.isArray(content)) {
|
|
18616
18744
|
return [];
|
|
@@ -18777,11 +18905,12 @@ var ClaudeSdkProvider = class {
|
|
|
18777
18905
|
if (betaMessage && typeof betaMessage === "object") {
|
|
18778
18906
|
const msg = betaMessage;
|
|
18779
18907
|
const content = msg.content;
|
|
18780
|
-
const
|
|
18908
|
+
const structuredContent = toContentArray(content);
|
|
18909
|
+
const textContent = extractTextContent2(content);
|
|
18781
18910
|
const toolCalls = extractToolCalls2(content);
|
|
18782
18911
|
const outputMsg = {
|
|
18783
18912
|
role: "assistant",
|
|
18784
|
-
content: textContent,
|
|
18913
|
+
content: structuredContent ?? textContent,
|
|
18785
18914
|
toolCalls: toolCalls.length > 0 ? toolCalls : void 0
|
|
18786
18915
|
};
|
|
18787
18916
|
output.push(outputMsg);
|
|
@@ -18899,25 +19028,6 @@ var ClaudeSdkProvider = class {
|
|
|
18899
19028
|
}
|
|
18900
19029
|
}
|
|
18901
19030
|
};
|
|
18902
|
-
function extractTextContent22(content) {
|
|
18903
|
-
if (typeof content === "string") {
|
|
18904
|
-
return content;
|
|
18905
|
-
}
|
|
18906
|
-
if (!Array.isArray(content)) {
|
|
18907
|
-
return void 0;
|
|
18908
|
-
}
|
|
18909
|
-
const textParts = [];
|
|
18910
|
-
for (const part of content) {
|
|
18911
|
-
if (!part || typeof part !== "object") {
|
|
18912
|
-
continue;
|
|
18913
|
-
}
|
|
18914
|
-
const p = part;
|
|
18915
|
-
if (p.type === "text" && typeof p.text === "string") {
|
|
18916
|
-
textParts.push(p.text);
|
|
18917
|
-
}
|
|
18918
|
-
}
|
|
18919
|
-
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
18920
|
-
}
|
|
18921
19031
|
function extractToolCalls2(content) {
|
|
18922
19032
|
if (!Array.isArray(content)) {
|
|
18923
19033
|
return [];
|
|
@@ -19133,7 +19243,7 @@ function convertMessages(messages) {
|
|
|
19133
19243
|
return messages.map((msg) => ({
|
|
19134
19244
|
role: msg.role,
|
|
19135
19245
|
name: msg.name,
|
|
19136
|
-
content: msg.content,
|
|
19246
|
+
content: isContentArray(msg.content) ? msg.content : typeof msg.content === "string" ? msg.content : void 0,
|
|
19137
19247
|
toolCalls: msg.tool_calls?.map((tc) => ({
|
|
19138
19248
|
tool: tc.tool,
|
|
19139
19249
|
input: tc.input,
|
|
@@ -21319,6 +21429,35 @@ function extractPiTextContent(content) {
|
|
|
21319
21429
|
}
|
|
21320
21430
|
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
21321
21431
|
}
|
|
21432
|
+
function toPiContentArray(content) {
|
|
21433
|
+
if (!Array.isArray(content)) return void 0;
|
|
21434
|
+
let hasNonText = false;
|
|
21435
|
+
const blocks = [];
|
|
21436
|
+
for (const part of content) {
|
|
21437
|
+
if (!part || typeof part !== "object") continue;
|
|
21438
|
+
const p = part;
|
|
21439
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
21440
|
+
blocks.push({ type: "text", text: p.text });
|
|
21441
|
+
} else if (p.type === "image") {
|
|
21442
|
+
const mediaType = typeof p.media_type === "string" ? p.media_type : "application/octet-stream";
|
|
21443
|
+
let source = "";
|
|
21444
|
+
if (typeof p.source === "object" && p.source !== null) {
|
|
21445
|
+
const src = p.source;
|
|
21446
|
+
const srcMediaType = typeof src.media_type === "string" ? src.media_type : mediaType;
|
|
21447
|
+
source = typeof src.data === "string" ? `data:${srcMediaType};base64,${src.data}` : "";
|
|
21448
|
+
}
|
|
21449
|
+
if (!source && typeof p.url === "string") {
|
|
21450
|
+
source = p.url;
|
|
21451
|
+
}
|
|
21452
|
+
if (source) {
|
|
21453
|
+
blocks.push({ type: "image", media_type: mediaType, source });
|
|
21454
|
+
hasNonText = true;
|
|
21455
|
+
}
|
|
21456
|
+
} else if (p.type === "tool_use" || p.type === "tool_result") {
|
|
21457
|
+
}
|
|
21458
|
+
}
|
|
21459
|
+
return hasNonText && blocks.length > 0 ? blocks : void 0;
|
|
21460
|
+
}
|
|
21322
21461
|
function toFiniteNumber(value) {
|
|
21323
21462
|
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
21324
21463
|
return void 0;
|
|
@@ -22478,7 +22617,8 @@ function convertAgentMessage(message, toolTrackers, completedToolResults) {
|
|
|
22478
22617
|
}
|
|
22479
22618
|
const msg = message;
|
|
22480
22619
|
const role = typeof msg.role === "string" ? msg.role : "unknown";
|
|
22481
|
-
const
|
|
22620
|
+
const structuredContent = toPiContentArray(msg.content);
|
|
22621
|
+
const content = structuredContent ?? extractPiTextContent(msg.content);
|
|
22482
22622
|
const toolCalls = extractToolCalls4(msg.content, toolTrackers, completedToolResults);
|
|
22483
22623
|
const startTimeVal = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
22484
22624
|
let msgTokenUsage;
|
|
@@ -24233,13 +24373,13 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
24233
24373
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
24234
24374
|
const { mkdir: mkdir16, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
24235
24375
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
24236
|
-
const
|
|
24376
|
+
const path48 = await import("node:path");
|
|
24237
24377
|
const { randomUUID: randomUUID10 } = await import("node:crypto");
|
|
24238
|
-
const dir =
|
|
24378
|
+
const dir = path48.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
24239
24379
|
await mkdir16(dir, { recursive: true });
|
|
24240
|
-
const stdinPath =
|
|
24241
|
-
const stdoutPath =
|
|
24242
|
-
const stderrPath =
|
|
24380
|
+
const stdinPath = path48.join(dir, "stdin.txt");
|
|
24381
|
+
const stdoutPath = path48.join(dir, "stdout.txt");
|
|
24382
|
+
const stderrPath = path48.join(dir, "stderr.txt");
|
|
24243
24383
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
24244
24384
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
24245
24385
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -24547,6 +24687,56 @@ function toCamelCaseDeep(obj) {
|
|
|
24547
24687
|
return obj;
|
|
24548
24688
|
}
|
|
24549
24689
|
var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
|
|
24690
|
+
var DATA_URI_RE = /^data:([^;]+);base64,(.+)$/s;
|
|
24691
|
+
async function materializeContentForGrader(messages, getWorkDir) {
|
|
24692
|
+
if (!messages || messages.length === 0) return messages ?? null;
|
|
24693
|
+
let hasAnyImage = false;
|
|
24694
|
+
for (const msg of messages) {
|
|
24695
|
+
if (isContentArray(msg.content)) {
|
|
24696
|
+
for (const block of msg.content) {
|
|
24697
|
+
if (block.type === "image") {
|
|
24698
|
+
hasAnyImage = true;
|
|
24699
|
+
break;
|
|
24700
|
+
}
|
|
24701
|
+
}
|
|
24702
|
+
}
|
|
24703
|
+
if (hasAnyImage) break;
|
|
24704
|
+
}
|
|
24705
|
+
if (!hasAnyImage) return messages;
|
|
24706
|
+
let counter = 0;
|
|
24707
|
+
const result = [];
|
|
24708
|
+
for (const msg of messages) {
|
|
24709
|
+
if (!isContentArray(msg.content)) {
|
|
24710
|
+
result.push(msg);
|
|
24711
|
+
continue;
|
|
24712
|
+
}
|
|
24713
|
+
if (!msg.content.some((b) => b.type === "image")) {
|
|
24714
|
+
result.push(msg);
|
|
24715
|
+
continue;
|
|
24716
|
+
}
|
|
24717
|
+
const blocks = [];
|
|
24718
|
+
for (const block of msg.content) {
|
|
24719
|
+
if (block.type !== "image") {
|
|
24720
|
+
blocks.push({ ...block });
|
|
24721
|
+
continue;
|
|
24722
|
+
}
|
|
24723
|
+
const img = block;
|
|
24724
|
+
const match = DATA_URI_RE.exec(img.source);
|
|
24725
|
+
if (match) {
|
|
24726
|
+
const [, mediaType, base64Data] = match;
|
|
24727
|
+
const ext = mediaType.split("/")[1] === "jpeg" ? "jpg" : mediaType.split("/")[1] ?? "bin";
|
|
24728
|
+
const dir = await getWorkDir();
|
|
24729
|
+
const filePath = join(dir, `img-${counter++}.${ext}`);
|
|
24730
|
+
await writeFile6(filePath, Buffer.from(base64Data, "base64"));
|
|
24731
|
+
blocks.push({ type: "image", media_type: img.media_type, path: filePath });
|
|
24732
|
+
} else {
|
|
24733
|
+
blocks.push({ type: "image", media_type: img.media_type, path: img.source });
|
|
24734
|
+
}
|
|
24735
|
+
}
|
|
24736
|
+
result.push({ ...msg, content: blocks });
|
|
24737
|
+
}
|
|
24738
|
+
return result;
|
|
24739
|
+
}
|
|
24550
24740
|
var CodeEvaluator = class {
|
|
24551
24741
|
kind = "code-grader";
|
|
24552
24742
|
command;
|
|
@@ -24562,7 +24752,18 @@ var CodeEvaluator = class {
|
|
|
24562
24752
|
this.target = options.target;
|
|
24563
24753
|
}
|
|
24564
24754
|
async evaluate(context2) {
|
|
24565
|
-
let
|
|
24755
|
+
let imageTmpDir;
|
|
24756
|
+
const getImageDir = async () => {
|
|
24757
|
+
if (!imageTmpDir) {
|
|
24758
|
+
imageTmpDir = await mkdtemp2(join(tmpdir2(), "agentv-img-"));
|
|
24759
|
+
}
|
|
24760
|
+
return imageTmpDir;
|
|
24761
|
+
};
|
|
24762
|
+
const materializedOutput = await materializeContentForGrader(
|
|
24763
|
+
context2.output,
|
|
24764
|
+
getImageDir
|
|
24765
|
+
);
|
|
24766
|
+
let outputForPayload = materializedOutput;
|
|
24566
24767
|
let outputPath;
|
|
24567
24768
|
if (outputForPayload) {
|
|
24568
24769
|
const serialized = JSON.stringify(outputForPayload);
|
|
@@ -24575,12 +24776,17 @@ var CodeEvaluator = class {
|
|
|
24575
24776
|
}
|
|
24576
24777
|
const payload = {
|
|
24577
24778
|
criteria: context2.evalCase.criteria,
|
|
24578
|
-
expectedOutput:
|
|
24579
|
-
|
|
24779
|
+
expectedOutput: await materializeContentForGrader(
|
|
24780
|
+
context2.evalCase.expected_output,
|
|
24781
|
+
getImageDir
|
|
24782
|
+
),
|
|
24580
24783
|
output: outputForPayload,
|
|
24581
24784
|
outputPath,
|
|
24582
24785
|
inputFiles: context2.evalCase.file_paths,
|
|
24583
|
-
input:
|
|
24786
|
+
input: await materializeContentForGrader(
|
|
24787
|
+
context2.evalCase.input,
|
|
24788
|
+
getImageDir
|
|
24789
|
+
),
|
|
24584
24790
|
trace: context2.trace ?? null,
|
|
24585
24791
|
tokenUsage: context2.tokenUsage ?? null,
|
|
24586
24792
|
costUsd: context2.costUsd ?? null,
|
|
@@ -24589,9 +24795,7 @@ var CodeEvaluator = class {
|
|
|
24589
24795
|
endTime: context2.endTime ?? null,
|
|
24590
24796
|
fileChanges: context2.fileChanges ?? null,
|
|
24591
24797
|
workspacePath: context2.workspacePath ?? null,
|
|
24592
|
-
config: this.config ?? null
|
|
24593
|
-
inputText: context2.evalCase.question,
|
|
24594
|
-
expectedOutputText: context2.evalCase.reference_answer ?? ""
|
|
24798
|
+
config: this.config ?? null
|
|
24595
24799
|
};
|
|
24596
24800
|
const inputPayload = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
24597
24801
|
let proxyEnv;
|
|
@@ -24681,6 +24885,10 @@ var CodeEvaluator = class {
|
|
|
24681
24885
|
await rm3(dirname(outputPath), { recursive: true, force: true }).catch(() => {
|
|
24682
24886
|
});
|
|
24683
24887
|
}
|
|
24888
|
+
if (imageTmpDir) {
|
|
24889
|
+
await rm3(imageTmpDir, { recursive: true, force: true }).catch(() => {
|
|
24890
|
+
});
|
|
24891
|
+
}
|
|
24684
24892
|
}
|
|
24685
24893
|
}
|
|
24686
24894
|
};
|
|
@@ -24749,13 +24957,13 @@ Be concise and focused in your evaluation. Provide succinct, specific feedback r
|
|
|
24749
24957
|
{{${TEMPLATE_VARIABLES.CRITERIA}}}
|
|
24750
24958
|
|
|
24751
24959
|
[[ ## question ## ]]
|
|
24752
|
-
{{${TEMPLATE_VARIABLES.
|
|
24960
|
+
{{${TEMPLATE_VARIABLES.INPUT}}}
|
|
24753
24961
|
|
|
24754
24962
|
[[ ## reference_answer ## ]]
|
|
24755
|
-
{{${TEMPLATE_VARIABLES.
|
|
24963
|
+
{{${TEMPLATE_VARIABLES.EXPECTED_OUTPUT}}}
|
|
24756
24964
|
|
|
24757
24965
|
[[ ## answer ## ]]
|
|
24758
|
-
{{${TEMPLATE_VARIABLES.
|
|
24966
|
+
{{${TEMPLATE_VARIABLES.OUTPUT}}}`;
|
|
24759
24967
|
var freeformEvaluationSchema = external_exports2.object({
|
|
24760
24968
|
score: external_exports2.number().min(0).max(1).describe("Score between 0.0 and 1.0"),
|
|
24761
24969
|
assertions: external_exports2.array(
|
|
@@ -24827,21 +25035,19 @@ var LlmGraderEvaluator = class {
|
|
|
24827
25035
|
async evaluateFreeform(context2, graderProvider) {
|
|
24828
25036
|
const formattedQuestion = context2.promptInputs.question && context2.promptInputs.question.trim().length > 0 ? context2.promptInputs.question : context2.evalCase.question;
|
|
24829
25037
|
const variables = {
|
|
24830
|
-
[TEMPLATE_VARIABLES.INPUT]:
|
|
24831
|
-
[TEMPLATE_VARIABLES.
|
|
24832
|
-
|
|
24833
|
-
null,
|
|
24834
|
-
2
|
|
24835
|
-
),
|
|
24836
|
-
[TEMPLATE_VARIABLES.OUTPUT]: JSON.stringify(context2.output ?? [], null, 2),
|
|
25038
|
+
[TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
|
|
25039
|
+
[TEMPLATE_VARIABLES.OUTPUT]: context2.candidate.trim(),
|
|
25040
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
24837
25041
|
[TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
|
|
24838
25042
|
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
|
|
25043
|
+
// Deprecated aliases — same values as the primary variables above
|
|
24839
25044
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
24840
25045
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
|
|
24841
25046
|
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim()
|
|
24842
25047
|
};
|
|
24843
25048
|
const systemPrompt = buildOutputSchema();
|
|
24844
25049
|
const evaluatorTemplate = context2.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
|
|
25050
|
+
warnDeprecatedTemplateVars(evaluatorTemplate);
|
|
24845
25051
|
let userPrompt = substituteVariables(evaluatorTemplate, variables);
|
|
24846
25052
|
if (context2.fileChanges && !context2.evaluatorTemplateOverride && !this.evaluatorTemplate) {
|
|
24847
25053
|
userPrompt += `
|
|
@@ -24853,13 +25059,15 @@ ${context2.fileChanges}`;
|
|
|
24853
25059
|
userPrompt,
|
|
24854
25060
|
systemPrompt
|
|
24855
25061
|
};
|
|
25062
|
+
const images = context2.output ? extractImageBlocks(context2.output) : [];
|
|
24856
25063
|
try {
|
|
24857
25064
|
const { data, tokenUsage } = await this.runWithRetry({
|
|
24858
25065
|
context: context2,
|
|
24859
25066
|
graderProvider,
|
|
24860
25067
|
systemPrompt,
|
|
24861
25068
|
userPrompt,
|
|
24862
|
-
schema: freeformEvaluationSchema
|
|
25069
|
+
schema: freeformEvaluationSchema,
|
|
25070
|
+
images
|
|
24863
25071
|
});
|
|
24864
25072
|
const score = clampScore(data.score);
|
|
24865
25073
|
const assertions = Array.isArray(data.assertions) ? data.assertions.slice(0, 8) : [];
|
|
@@ -24903,13 +25111,15 @@ ${context2.fileChanges}`;
|
|
|
24903
25111
|
userPrompt: prompt,
|
|
24904
25112
|
systemPrompt
|
|
24905
25113
|
};
|
|
25114
|
+
const images = context2.output ? extractImageBlocks(context2.output) : [];
|
|
24906
25115
|
try {
|
|
24907
25116
|
const { data, tokenUsage } = await this.runWithRetry({
|
|
24908
25117
|
context: context2,
|
|
24909
25118
|
graderProvider,
|
|
24910
25119
|
systemPrompt,
|
|
24911
25120
|
userPrompt: prompt,
|
|
24912
|
-
schema: rubricEvaluationSchema
|
|
25121
|
+
schema: rubricEvaluationSchema,
|
|
25122
|
+
images
|
|
24913
25123
|
});
|
|
24914
25124
|
const { score, verdict, assertions } = calculateRubricScore(data, rubrics);
|
|
24915
25125
|
return {
|
|
@@ -24946,13 +25156,15 @@ ${context2.fileChanges}`;
|
|
|
24946
25156
|
userPrompt: prompt,
|
|
24947
25157
|
systemPrompt
|
|
24948
25158
|
};
|
|
25159
|
+
const images = context2.output ? extractImageBlocks(context2.output) : [];
|
|
24949
25160
|
try {
|
|
24950
25161
|
const { data, tokenUsage } = await this.runWithRetry({
|
|
24951
25162
|
context: context2,
|
|
24952
25163
|
graderProvider,
|
|
24953
25164
|
systemPrompt,
|
|
24954
25165
|
userPrompt: prompt,
|
|
24955
|
-
schema: scoreRangeEvaluationSchema
|
|
25166
|
+
schema: scoreRangeEvaluationSchema,
|
|
25167
|
+
images
|
|
24956
25168
|
});
|
|
24957
25169
|
const { score, verdict, assertions, details } = calculateScoreRangeResult(data, rubrics);
|
|
24958
25170
|
return {
|
|
@@ -25159,12 +25371,17 @@ ${context2.fileChanges}`;
|
|
|
25159
25371
|
const formattedQuestion = context2.promptInputs.question && context2.promptInputs.question.trim().length > 0 ? context2.promptInputs.question : context2.evalCase.question;
|
|
25160
25372
|
const variables = {
|
|
25161
25373
|
[TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
|
|
25374
|
+
[TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
|
|
25375
|
+
[TEMPLATE_VARIABLES.OUTPUT]: context2.candidate.trim(),
|
|
25376
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
25377
|
+
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
|
|
25378
|
+
// Deprecated aliases
|
|
25162
25379
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
25163
25380
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
|
|
25164
|
-
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim()
|
|
25165
|
-
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? ""
|
|
25381
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim()
|
|
25166
25382
|
};
|
|
25167
25383
|
if (this.evaluatorTemplate) {
|
|
25384
|
+
warnDeprecatedTemplateVars(this.evaluatorTemplate);
|
|
25168
25385
|
return substituteVariables(this.evaluatorTemplate, variables);
|
|
25169
25386
|
}
|
|
25170
25387
|
const config = context2.evaluator;
|
|
@@ -25215,11 +25432,16 @@ ${context2.fileChanges}`;
|
|
|
25215
25432
|
if (this.evaluatorTemplate) {
|
|
25216
25433
|
const variables = {
|
|
25217
25434
|
[TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
|
|
25435
|
+
[TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
|
|
25436
|
+
[TEMPLATE_VARIABLES.OUTPUT]: context2.candidate.trim(),
|
|
25437
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
25438
|
+
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
|
|
25439
|
+
// Deprecated aliases
|
|
25218
25440
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
25219
25441
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
|
|
25220
|
-
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim()
|
|
25221
|
-
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? ""
|
|
25442
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim()
|
|
25222
25443
|
};
|
|
25444
|
+
warnDeprecatedTemplateVars(this.evaluatorTemplate);
|
|
25223
25445
|
const customPrompt = substituteVariables(this.evaluatorTemplate, variables);
|
|
25224
25446
|
const outputSchema2 = rubrics && rubrics.length > 0 ? buildRubricOutputSchema() : buildOutputSchema();
|
|
25225
25447
|
return `${customPrompt}
|
|
@@ -25390,18 +25612,35 @@ ${outputSchema2}`;
|
|
|
25390
25612
|
// LLM mode retry logic
|
|
25391
25613
|
// ---------------------------------------------------------------------------
|
|
25392
25614
|
async runWithRetry(options) {
|
|
25393
|
-
const { context: context2, graderProvider, systemPrompt, userPrompt, schema } = options;
|
|
25615
|
+
const { context: context2, graderProvider, systemPrompt, userPrompt, schema, images } = options;
|
|
25394
25616
|
let lastError;
|
|
25395
25617
|
for (let attempt = 1; attempt <= 3; attempt++) {
|
|
25396
25618
|
try {
|
|
25397
25619
|
const model = graderProvider.asLanguageModel?.();
|
|
25398
25620
|
if (model) {
|
|
25399
|
-
const
|
|
25621
|
+
const modelOptions = {
|
|
25622
|
+
...this.maxOutputTokens ? { maxTokens: this.maxOutputTokens } : {},
|
|
25623
|
+
...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
|
|
25624
|
+
};
|
|
25625
|
+
const hasImages = images && images.length > 0;
|
|
25626
|
+
const result = hasImages ? await generateText({
|
|
25627
|
+
model,
|
|
25628
|
+
system: systemPrompt,
|
|
25629
|
+
messages: [
|
|
25630
|
+
{
|
|
25631
|
+
role: "user",
|
|
25632
|
+
content: [
|
|
25633
|
+
{ type: "text", text: userPrompt },
|
|
25634
|
+
...toAiSdkImageParts(images)
|
|
25635
|
+
]
|
|
25636
|
+
}
|
|
25637
|
+
],
|
|
25638
|
+
...modelOptions
|
|
25639
|
+
}) : await generateText({
|
|
25400
25640
|
model,
|
|
25401
25641
|
system: systemPrompt,
|
|
25402
25642
|
prompt: userPrompt,
|
|
25403
|
-
...
|
|
25404
|
-
...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
|
|
25643
|
+
...modelOptions
|
|
25405
25644
|
});
|
|
25406
25645
|
const data2 = schema.parse(parseJsonFromText(result.text));
|
|
25407
25646
|
const rawUsage = result.usage;
|
|
@@ -25461,6 +25700,26 @@ function substituteVariables(template, variables) {
|
|
|
25461
25700
|
return variables[varName] ?? match;
|
|
25462
25701
|
});
|
|
25463
25702
|
}
|
|
25703
|
+
var ANSI_YELLOW7 = "\x1B[33m";
|
|
25704
|
+
var ANSI_RESET8 = "\x1B[0m";
|
|
25705
|
+
var warnedTemplateStrings = /* @__PURE__ */ new Set();
|
|
25706
|
+
function warnDeprecatedTemplateVars(template) {
|
|
25707
|
+
if (warnedTemplateStrings.has(template)) return;
|
|
25708
|
+
const used = [];
|
|
25709
|
+
for (const [deprecated, replacement] of DEPRECATED_TEMPLATE_VARIABLES) {
|
|
25710
|
+
if (new RegExp(`\\{\\{\\s*${deprecated}\\s*\\}\\}`).test(template)) {
|
|
25711
|
+
used.push(`{{ ${deprecated} }} \u2192 {{ ${replacement} }}`);
|
|
25712
|
+
}
|
|
25713
|
+
}
|
|
25714
|
+
if (used.length > 0) {
|
|
25715
|
+
warnedTemplateStrings.add(template);
|
|
25716
|
+
console.warn(
|
|
25717
|
+
`${ANSI_YELLOW7}\u26A0 Deprecated template variables detected (they still work but will be removed in a future version):
|
|
25718
|
+
${used.join("\n ")}
|
|
25719
|
+
Update your custom evaluator template to use the new names.${ANSI_RESET8}`
|
|
25720
|
+
);
|
|
25721
|
+
}
|
|
25722
|
+
}
|
|
25464
25723
|
function calculateRubricScore(result, rubrics) {
|
|
25465
25724
|
const rubricMap = new Map(rubrics.map((rubric) => [rubric.id, rubric]));
|
|
25466
25725
|
const assertions = [];
|
|
@@ -25555,6 +25814,26 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
25555
25814
|
}
|
|
25556
25815
|
};
|
|
25557
25816
|
}
|
|
25817
|
+
function extractImageBlocks(messages) {
|
|
25818
|
+
const images = [];
|
|
25819
|
+
for (const msg of messages) {
|
|
25820
|
+
if (msg.role !== "assistant") continue;
|
|
25821
|
+
if (!isContentArray(msg.content)) continue;
|
|
25822
|
+
for (const block of msg.content) {
|
|
25823
|
+
if (block.type === "image") {
|
|
25824
|
+
images.push(block);
|
|
25825
|
+
}
|
|
25826
|
+
}
|
|
25827
|
+
}
|
|
25828
|
+
return images;
|
|
25829
|
+
}
|
|
25830
|
+
function toAiSdkImageParts(images) {
|
|
25831
|
+
return images.map((img) => ({
|
|
25832
|
+
type: "image",
|
|
25833
|
+
image: img.source,
|
|
25834
|
+
mediaType: img.media_type || void 0
|
|
25835
|
+
}));
|
|
25836
|
+
}
|
|
25558
25837
|
function resolveSandboxed(basePath, relativePath) {
|
|
25559
25838
|
const resolved = path35.resolve(basePath, relativePath);
|
|
25560
25839
|
if (!resolved.startsWith(basePath + path35.sep) && resolved !== basePath) {
|
|
@@ -26288,115 +26567,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
26288
26567
|
* Evaluate a single field against the expected value.
|
|
26289
26568
|
*/
|
|
26290
26569
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
26291
|
-
const { path:
|
|
26292
|
-
const candidateValue = resolvePath(candidateData,
|
|
26293
|
-
const expectedValue = resolvePath(expectedData,
|
|
26570
|
+
const { path: path48, match, required = true, weight = 1 } = fieldConfig;
|
|
26571
|
+
const candidateValue = resolvePath(candidateData, path48);
|
|
26572
|
+
const expectedValue = resolvePath(expectedData, path48);
|
|
26294
26573
|
if (expectedValue === void 0) {
|
|
26295
26574
|
return {
|
|
26296
|
-
path:
|
|
26575
|
+
path: path48,
|
|
26297
26576
|
score: 1,
|
|
26298
26577
|
// No expected value means no comparison needed
|
|
26299
26578
|
weight,
|
|
26300
26579
|
hit: true,
|
|
26301
|
-
message: `${
|
|
26580
|
+
message: `${path48}: no expected value`
|
|
26302
26581
|
};
|
|
26303
26582
|
}
|
|
26304
26583
|
if (candidateValue === void 0) {
|
|
26305
26584
|
if (required) {
|
|
26306
26585
|
return {
|
|
26307
|
-
path:
|
|
26586
|
+
path: path48,
|
|
26308
26587
|
score: 0,
|
|
26309
26588
|
weight,
|
|
26310
26589
|
hit: false,
|
|
26311
|
-
message: `${
|
|
26590
|
+
message: `${path48} (required, missing)`
|
|
26312
26591
|
};
|
|
26313
26592
|
}
|
|
26314
26593
|
return {
|
|
26315
|
-
path:
|
|
26594
|
+
path: path48,
|
|
26316
26595
|
score: 1,
|
|
26317
26596
|
// Don't penalize missing optional fields
|
|
26318
26597
|
weight: 0,
|
|
26319
26598
|
// Zero weight means it won't affect the score
|
|
26320
26599
|
hit: true,
|
|
26321
|
-
message: `${
|
|
26600
|
+
message: `${path48}: optional field missing`
|
|
26322
26601
|
};
|
|
26323
26602
|
}
|
|
26324
26603
|
switch (match) {
|
|
26325
26604
|
case "exact":
|
|
26326
|
-
return this.compareExact(
|
|
26605
|
+
return this.compareExact(path48, candidateValue, expectedValue, weight);
|
|
26327
26606
|
case "numeric_tolerance":
|
|
26328
26607
|
return this.compareNumericTolerance(
|
|
26329
|
-
|
|
26608
|
+
path48,
|
|
26330
26609
|
candidateValue,
|
|
26331
26610
|
expectedValue,
|
|
26332
26611
|
fieldConfig,
|
|
26333
26612
|
weight
|
|
26334
26613
|
);
|
|
26335
26614
|
case "date":
|
|
26336
|
-
return this.compareDate(
|
|
26615
|
+
return this.compareDate(path48, candidateValue, expectedValue, fieldConfig, weight);
|
|
26337
26616
|
default:
|
|
26338
26617
|
return {
|
|
26339
|
-
path:
|
|
26618
|
+
path: path48,
|
|
26340
26619
|
score: 0,
|
|
26341
26620
|
weight,
|
|
26342
26621
|
hit: false,
|
|
26343
|
-
message: `${
|
|
26622
|
+
message: `${path48}: unknown match type "${match}"`
|
|
26344
26623
|
};
|
|
26345
26624
|
}
|
|
26346
26625
|
}
|
|
26347
26626
|
/**
|
|
26348
26627
|
* Exact equality comparison.
|
|
26349
26628
|
*/
|
|
26350
|
-
compareExact(
|
|
26629
|
+
compareExact(path48, candidateValue, expectedValue, weight) {
|
|
26351
26630
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
26352
26631
|
return {
|
|
26353
|
-
path:
|
|
26632
|
+
path: path48,
|
|
26354
26633
|
score: 1,
|
|
26355
26634
|
weight,
|
|
26356
26635
|
hit: true,
|
|
26357
|
-
message:
|
|
26636
|
+
message: path48
|
|
26358
26637
|
};
|
|
26359
26638
|
}
|
|
26360
26639
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
26361
26640
|
return {
|
|
26362
|
-
path:
|
|
26641
|
+
path: path48,
|
|
26363
26642
|
score: 0,
|
|
26364
26643
|
weight,
|
|
26365
26644
|
hit: false,
|
|
26366
|
-
message: `${
|
|
26645
|
+
message: `${path48} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
26367
26646
|
};
|
|
26368
26647
|
}
|
|
26369
26648
|
return {
|
|
26370
|
-
path:
|
|
26649
|
+
path: path48,
|
|
26371
26650
|
score: 0,
|
|
26372
26651
|
weight,
|
|
26373
26652
|
hit: false,
|
|
26374
|
-
message: `${
|
|
26653
|
+
message: `${path48} (value mismatch)`
|
|
26375
26654
|
};
|
|
26376
26655
|
}
|
|
26377
26656
|
/**
|
|
26378
26657
|
* Numeric comparison with absolute or relative tolerance.
|
|
26379
26658
|
*/
|
|
26380
|
-
compareNumericTolerance(
|
|
26659
|
+
compareNumericTolerance(path48, candidateValue, expectedValue, fieldConfig, weight) {
|
|
26381
26660
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
26382
26661
|
const candidateNum = toNumber(candidateValue);
|
|
26383
26662
|
const expectedNum = toNumber(expectedValue);
|
|
26384
26663
|
if (candidateNum === null || expectedNum === null) {
|
|
26385
26664
|
return {
|
|
26386
|
-
path:
|
|
26665
|
+
path: path48,
|
|
26387
26666
|
score: 0,
|
|
26388
26667
|
weight,
|
|
26389
26668
|
hit: false,
|
|
26390
|
-
message: `${
|
|
26669
|
+
message: `${path48} (non-numeric value)`
|
|
26391
26670
|
};
|
|
26392
26671
|
}
|
|
26393
26672
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
26394
26673
|
return {
|
|
26395
|
-
path:
|
|
26674
|
+
path: path48,
|
|
26396
26675
|
score: 0,
|
|
26397
26676
|
weight,
|
|
26398
26677
|
hit: false,
|
|
26399
|
-
message: `${
|
|
26678
|
+
message: `${path48} (invalid numeric value)`
|
|
26400
26679
|
};
|
|
26401
26680
|
}
|
|
26402
26681
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -26409,61 +26688,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
26409
26688
|
}
|
|
26410
26689
|
if (withinTolerance) {
|
|
26411
26690
|
return {
|
|
26412
|
-
path:
|
|
26691
|
+
path: path48,
|
|
26413
26692
|
score: 1,
|
|
26414
26693
|
weight,
|
|
26415
26694
|
hit: true,
|
|
26416
|
-
message: `${
|
|
26695
|
+
message: `${path48} (within tolerance: diff=${diff.toFixed(2)})`
|
|
26417
26696
|
};
|
|
26418
26697
|
}
|
|
26419
26698
|
return {
|
|
26420
|
-
path:
|
|
26699
|
+
path: path48,
|
|
26421
26700
|
score: 0,
|
|
26422
26701
|
weight,
|
|
26423
26702
|
hit: false,
|
|
26424
|
-
message: `${
|
|
26703
|
+
message: `${path48} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
26425
26704
|
};
|
|
26426
26705
|
}
|
|
26427
26706
|
/**
|
|
26428
26707
|
* Date comparison with format normalization.
|
|
26429
26708
|
*/
|
|
26430
|
-
compareDate(
|
|
26709
|
+
compareDate(path48, candidateValue, expectedValue, fieldConfig, weight) {
|
|
26431
26710
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
26432
26711
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
26433
26712
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
26434
26713
|
if (candidateDate === null) {
|
|
26435
26714
|
return {
|
|
26436
|
-
path:
|
|
26715
|
+
path: path48,
|
|
26437
26716
|
score: 0,
|
|
26438
26717
|
weight,
|
|
26439
26718
|
hit: false,
|
|
26440
|
-
message: `${
|
|
26719
|
+
message: `${path48} (unparseable candidate date)`
|
|
26441
26720
|
};
|
|
26442
26721
|
}
|
|
26443
26722
|
if (expectedDate === null) {
|
|
26444
26723
|
return {
|
|
26445
|
-
path:
|
|
26724
|
+
path: path48,
|
|
26446
26725
|
score: 0,
|
|
26447
26726
|
weight,
|
|
26448
26727
|
hit: false,
|
|
26449
|
-
message: `${
|
|
26728
|
+
message: `${path48} (unparseable expected date)`
|
|
26450
26729
|
};
|
|
26451
26730
|
}
|
|
26452
26731
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
26453
26732
|
return {
|
|
26454
|
-
path:
|
|
26733
|
+
path: path48,
|
|
26455
26734
|
score: 1,
|
|
26456
26735
|
weight,
|
|
26457
26736
|
hit: true,
|
|
26458
|
-
message:
|
|
26737
|
+
message: path48
|
|
26459
26738
|
};
|
|
26460
26739
|
}
|
|
26461
26740
|
return {
|
|
26462
|
-
path:
|
|
26741
|
+
path: path48,
|
|
26463
26742
|
score: 0,
|
|
26464
26743
|
weight,
|
|
26465
26744
|
hit: false,
|
|
26466
|
-
message: `${
|
|
26745
|
+
message: `${path48} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
26467
26746
|
};
|
|
26468
26747
|
}
|
|
26469
26748
|
/**
|
|
@@ -26496,11 +26775,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
26496
26775
|
};
|
|
26497
26776
|
}
|
|
26498
26777
|
};
|
|
26499
|
-
function resolvePath(obj,
|
|
26500
|
-
if (!
|
|
26778
|
+
function resolvePath(obj, path48) {
|
|
26779
|
+
if (!path48 || !obj) {
|
|
26501
26780
|
return void 0;
|
|
26502
26781
|
}
|
|
26503
|
-
const parts =
|
|
26782
|
+
const parts = path48.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
26504
26783
|
let current = obj;
|
|
26505
26784
|
for (const part of parts) {
|
|
26506
26785
|
if (current === null || current === void 0) {
|
|
@@ -26786,11 +27065,12 @@ function assembleLlmGraderPrompt(input) {
|
|
|
26786
27065
|
function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, evaluatorTemplateOverride) {
|
|
26787
27066
|
const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
|
|
26788
27067
|
const variables = {
|
|
26789
|
-
[TEMPLATE_VARIABLES.INPUT]:
|
|
26790
|
-
[TEMPLATE_VARIABLES.
|
|
26791
|
-
[TEMPLATE_VARIABLES.
|
|
27068
|
+
[TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
|
|
27069
|
+
[TEMPLATE_VARIABLES.OUTPUT]: candidate.trim(),
|
|
27070
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (evalCase.reference_answer ?? "").trim(),
|
|
26792
27071
|
[TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
|
|
26793
27072
|
[TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? "",
|
|
27073
|
+
// Deprecated aliases
|
|
26794
27074
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
26795
27075
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
|
|
26796
27076
|
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (evalCase.reference_answer ?? "").trim()
|
|
@@ -26973,8 +27253,8 @@ var TokenUsageEvaluator = class {
|
|
|
26973
27253
|
};
|
|
26974
27254
|
}
|
|
26975
27255
|
};
|
|
26976
|
-
function getNestedValue(obj,
|
|
26977
|
-
const parts =
|
|
27256
|
+
function getNestedValue(obj, path48) {
|
|
27257
|
+
const parts = path48.split(".");
|
|
26978
27258
|
let current = obj;
|
|
26979
27259
|
for (const part of parts) {
|
|
26980
27260
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -27824,16 +28104,13 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
27824
28104
|
const payload = {
|
|
27825
28105
|
criteria: context2.evalCase.criteria,
|
|
27826
28106
|
expectedOutput: context2.evalCase.expected_output,
|
|
27827
|
-
outputText: context2.candidate,
|
|
27828
28107
|
output: context2.output ?? null,
|
|
27829
28108
|
inputFiles: context2.evalCase.file_paths,
|
|
27830
28109
|
input: context2.evalCase.input,
|
|
27831
28110
|
trace: context2.trace ?? null,
|
|
27832
28111
|
fileChanges: context2.fileChanges ?? null,
|
|
27833
28112
|
workspacePath: context2.workspacePath ?? null,
|
|
27834
|
-
config: config ?? context2.config ?? null
|
|
27835
|
-
inputText: context2.evalCase.question,
|
|
27836
|
-
expectedOutputText: context2.evalCase.reference_answer ?? ""
|
|
28113
|
+
config: config ?? context2.config ?? null
|
|
27837
28114
|
};
|
|
27838
28115
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
27839
28116
|
const scriptPath = script[script.length - 1];
|
|
@@ -29469,7 +29746,8 @@ async function runEvaluation(options) {
|
|
|
29469
29746
|
const budgetResult = {
|
|
29470
29747
|
timestamp: (now2 ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
29471
29748
|
testId: evalCase.id,
|
|
29472
|
-
|
|
29749
|
+
dataset: evalCase.dataset,
|
|
29750
|
+
category: evalCase.category,
|
|
29473
29751
|
score: 0,
|
|
29474
29752
|
assertions: [],
|
|
29475
29753
|
output: [],
|
|
@@ -29505,7 +29783,8 @@ async function runEvaluation(options) {
|
|
|
29505
29783
|
const haltResult = {
|
|
29506
29784
|
timestamp: (now2 ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
29507
29785
|
testId: evalCase.id,
|
|
29508
|
-
|
|
29786
|
+
dataset: evalCase.dataset,
|
|
29787
|
+
category: evalCase.category,
|
|
29509
29788
|
score: 0,
|
|
29510
29789
|
assertions: [],
|
|
29511
29790
|
output: [],
|
|
@@ -30504,7 +30783,8 @@ async function evaluateCandidate(options) {
|
|
|
30504
30783
|
return {
|
|
30505
30784
|
timestamp: completedAt.toISOString(),
|
|
30506
30785
|
testId: evalCase.id,
|
|
30507
|
-
|
|
30786
|
+
dataset: evalCase.dataset,
|
|
30787
|
+
category: evalCase.category,
|
|
30508
30788
|
conversationId: evalCase.conversation_id,
|
|
30509
30789
|
score: score.score,
|
|
30510
30790
|
assertions: score.assertions,
|
|
@@ -30854,7 +31134,8 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
30854
31134
|
return {
|
|
30855
31135
|
timestamp: timestamp.toISOString(),
|
|
30856
31136
|
testId: evalCase.id,
|
|
30857
|
-
|
|
31137
|
+
dataset: evalCase.dataset,
|
|
31138
|
+
category: evalCase.category,
|
|
30858
31139
|
conversationId: evalCase.conversation_id,
|
|
30859
31140
|
score: 0,
|
|
30860
31141
|
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
@@ -31405,6 +31686,15 @@ function trimBaselineResult(result) {
|
|
|
31405
31686
|
}
|
|
31406
31687
|
return trimmed;
|
|
31407
31688
|
}
|
|
31689
|
+
var DEFAULT_CATEGORY = "Uncategorized";
|
|
31690
|
+
function deriveCategory(relativePath) {
|
|
31691
|
+
const parts = relativePath.split(path47.sep);
|
|
31692
|
+
if (parts.length <= 1) {
|
|
31693
|
+
return DEFAULT_CATEGORY;
|
|
31694
|
+
}
|
|
31695
|
+
const dirs = parts.slice(0, -1).filter((d) => d !== "evals");
|
|
31696
|
+
return dirs.length > 0 ? dirs.join("/") : DEFAULT_CATEGORY;
|
|
31697
|
+
}
|
|
31408
31698
|
var OTEL_BACKEND_PRESETS = {
|
|
31409
31699
|
langfuse: {
|
|
31410
31700
|
name: "langfuse",
|
|
@@ -31527,7 +31817,7 @@ var OtelTraceExporter = class {
|
|
|
31527
31817
|
rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
31528
31818
|
rootSpan.setAttribute("agentv.test_id", result.testId);
|
|
31529
31819
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
31530
|
-
if (result.
|
|
31820
|
+
if (result.dataset) rootSpan.setAttribute("agentv.dataset", result.dataset);
|
|
31531
31821
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
31532
31822
|
if (captureContent && result.output.length > 0) {
|
|
31533
31823
|
const lastMsg = result.output[result.output.length - 1];
|
|
@@ -31736,7 +32026,7 @@ var OtelStreamingObserver = class {
|
|
|
31736
32026
|
this.rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
31737
32027
|
this.rootSpan.setAttribute("agentv.test_id", testId);
|
|
31738
32028
|
this.rootSpan.setAttribute("agentv.target", target);
|
|
31739
|
-
if (evalSet) this.rootSpan.setAttribute("agentv.
|
|
32029
|
+
if (evalSet) this.rootSpan.setAttribute("agentv.dataset", evalSet);
|
|
31740
32030
|
this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
|
|
31741
32031
|
}
|
|
31742
32032
|
/** Create and immediately export a tool span */
|
|
@@ -31907,6 +32197,9 @@ function createAgentKernel() {
|
|
|
31907
32197
|
}
|
|
31908
32198
|
|
|
31909
32199
|
export {
|
|
32200
|
+
isContent,
|
|
32201
|
+
isContentArray,
|
|
32202
|
+
getTextContent,
|
|
31910
32203
|
TEST_MESSAGE_ROLES,
|
|
31911
32204
|
isTestMessageRole,
|
|
31912
32205
|
isJsonObject,
|
|
@@ -31922,11 +32215,13 @@ export {
|
|
|
31922
32215
|
buildSearchRoots,
|
|
31923
32216
|
resolveFileReference,
|
|
31924
32217
|
CLI_PLACEHOLDERS,
|
|
32218
|
+
COMMON_TARGET_SETTINGS,
|
|
31925
32219
|
resolveTargetDefinition,
|
|
31926
|
-
interpolateEnv,
|
|
31927
|
-
loadCasesFromFile,
|
|
31928
32220
|
KNOWN_PROVIDERS,
|
|
31929
32221
|
PROVIDER_ALIASES,
|
|
32222
|
+
extractLastAssistantContent,
|
|
32223
|
+
interpolateEnv,
|
|
32224
|
+
loadCasesFromFile,
|
|
31930
32225
|
computeTraceSummary,
|
|
31931
32226
|
DEFAULT_EXPLORATION_TOOLS,
|
|
31932
32227
|
explorationRatio,
|
|
@@ -32002,6 +32297,7 @@ export {
|
|
|
32002
32297
|
substituteVariables,
|
|
32003
32298
|
calculateRubricScore,
|
|
32004
32299
|
buildScoreRangeOutputSchema,
|
|
32300
|
+
extractImageBlocks,
|
|
32005
32301
|
CompositeEvaluator,
|
|
32006
32302
|
CostEvaluator,
|
|
32007
32303
|
ExecutionMetricsEvaluator,
|
|
@@ -32051,9 +32347,11 @@ export {
|
|
|
32051
32347
|
shouldEnableCache,
|
|
32052
32348
|
shouldSkipCacheForTemperature,
|
|
32053
32349
|
trimBaselineResult,
|
|
32350
|
+
DEFAULT_CATEGORY,
|
|
32351
|
+
deriveCategory,
|
|
32054
32352
|
OTEL_BACKEND_PRESETS,
|
|
32055
32353
|
OtelTraceExporter,
|
|
32056
32354
|
OtelStreamingObserver,
|
|
32057
32355
|
createAgentKernel
|
|
32058
32356
|
};
|
|
32059
|
-
//# sourceMappingURL=chunk-
|
|
32357
|
+
//# sourceMappingURL=chunk-XEAW7OQT.js.map
|