@agentv/core 3.14.6 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-HP5PFOVK.js → chunk-PXYYRDHH.js} +142 -148
- package/dist/chunk-PXYYRDHH.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +9 -2
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +3 -2
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +569 -257
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +135 -93
- package/dist/index.d.ts +135 -93
- package/dist/index.js +459 -141
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-HP5PFOVK.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import {
|
|
2
|
+
COMMON_TARGET_SETTINGS,
|
|
2
3
|
TEST_MESSAGE_ROLES,
|
|
3
4
|
buildDirectoryChain,
|
|
4
5
|
buildSearchRoots,
|
|
@@ -6,8 +7,11 @@ import {
|
|
|
6
7
|
extractLastAssistantContent,
|
|
7
8
|
fileExists,
|
|
8
9
|
findGitRoot,
|
|
10
|
+
getTextContent,
|
|
9
11
|
interpolateEnv,
|
|
10
12
|
isAgentProvider,
|
|
13
|
+
isContent,
|
|
14
|
+
isContentArray,
|
|
11
15
|
isEvaluatorKind,
|
|
12
16
|
isJsonObject,
|
|
13
17
|
isJsonValue,
|
|
@@ -19,7 +23,7 @@ import {
|
|
|
19
23
|
readTextFile,
|
|
20
24
|
resolveFileReference,
|
|
21
25
|
resolveTargetDefinition
|
|
22
|
-
} from "./chunk-
|
|
26
|
+
} from "./chunk-PXYYRDHH.js";
|
|
23
27
|
import {
|
|
24
28
|
AgentvProvider
|
|
25
29
|
} from "./chunk-W5YDZWT4.js";
|
|
@@ -690,15 +694,23 @@ var TEMPLATE_VARIABLES = {
|
|
|
690
694
|
INPUT: "input",
|
|
691
695
|
OUTPUT: "output",
|
|
692
696
|
FILE_CHANGES: "file_changes",
|
|
697
|
+
/** @deprecated Use INPUT instead — resolves to the same text value. */
|
|
693
698
|
INPUT_TEXT: "input_text",
|
|
699
|
+
/** @deprecated Use OUTPUT instead — resolves to the same text value. */
|
|
694
700
|
OUTPUT_TEXT: "output_text",
|
|
701
|
+
/** @deprecated Use EXPECTED_OUTPUT instead — resolves to the same text value. */
|
|
695
702
|
EXPECTED_OUTPUT_TEXT: "expected_output_text"
|
|
696
703
|
};
|
|
697
704
|
var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
|
|
698
705
|
var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
|
|
699
|
-
TEMPLATE_VARIABLES.
|
|
706
|
+
TEMPLATE_VARIABLES.OUTPUT,
|
|
700
707
|
TEMPLATE_VARIABLES.EXPECTED_OUTPUT
|
|
701
708
|
]);
|
|
709
|
+
var DEPRECATED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Map([
|
|
710
|
+
[TEMPLATE_VARIABLES.INPUT_TEXT, TEMPLATE_VARIABLES.INPUT],
|
|
711
|
+
[TEMPLATE_VARIABLES.OUTPUT_TEXT, TEMPLATE_VARIABLES.OUTPUT],
|
|
712
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT, TEMPLATE_VARIABLES.EXPECTED_OUTPUT]
|
|
713
|
+
]);
|
|
702
714
|
|
|
703
715
|
// src/evaluation/validation/prompt-validator.ts
|
|
704
716
|
var ANSI_YELLOW2 = "\x1B[33m";
|
|
@@ -720,16 +732,29 @@ function validateTemplateVariables(content, source) {
|
|
|
720
732
|
}
|
|
721
733
|
match = variablePattern.exec(content);
|
|
722
734
|
}
|
|
723
|
-
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.OUTPUT_TEXT);
|
|
724
|
-
const hasExpectedOutput = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_OUTPUT);
|
|
735
|
+
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.OUTPUT) || foundVariables.has(TEMPLATE_VARIABLES.OUTPUT_TEXT);
|
|
736
|
+
const hasExpectedOutput = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_OUTPUT) || foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT);
|
|
725
737
|
const hasRequiredFields = hasCandidateAnswer || hasExpectedOutput;
|
|
726
738
|
if (!hasRequiredFields) {
|
|
727
739
|
throw new Error(
|
|
728
740
|
`Missing required fields. Must include at least one of:
|
|
729
|
-
- {{ ${TEMPLATE_VARIABLES.
|
|
741
|
+
- {{ ${TEMPLATE_VARIABLES.OUTPUT} }}
|
|
730
742
|
- {{ ${TEMPLATE_VARIABLES.EXPECTED_OUTPUT} }}`
|
|
731
743
|
);
|
|
732
744
|
}
|
|
745
|
+
const deprecatedUsed = [];
|
|
746
|
+
for (const [deprecated, replacement] of DEPRECATED_TEMPLATE_VARIABLES) {
|
|
747
|
+
if (foundVariables.has(deprecated)) {
|
|
748
|
+
deprecatedUsed.push(`{{ ${deprecated} }} \u2192 {{ ${replacement} }}`);
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
if (deprecatedUsed.length > 0) {
|
|
752
|
+
console.warn(
|
|
753
|
+
`${ANSI_YELLOW2}Warning: Template at ${source} uses deprecated variable names:
|
|
754
|
+
${deprecatedUsed.join("\n ")}
|
|
755
|
+
These still work but will be removed in a future version.${ANSI_RESET3}`
|
|
756
|
+
);
|
|
757
|
+
}
|
|
733
758
|
if (invalidVariables.length > 0) {
|
|
734
759
|
const warningMessage = `${ANSI_YELLOW2}Warning: Custom evaluator template at ${source}
|
|
735
760
|
Contains invalid variables: ${invalidVariables.map((v) => `{{ ${v} }}`).join(", ")}
|
|
@@ -2147,6 +2172,19 @@ function asString2(value) {
|
|
|
2147
2172
|
}
|
|
2148
2173
|
|
|
2149
2174
|
// src/evaluation/loaders/message-processor.ts
|
|
2175
|
+
var IMAGE_MEDIA_TYPES = {
|
|
2176
|
+
".png": "image/png",
|
|
2177
|
+
".jpg": "image/jpeg",
|
|
2178
|
+
".jpeg": "image/jpeg",
|
|
2179
|
+
".gif": "image/gif",
|
|
2180
|
+
".webp": "image/webp",
|
|
2181
|
+
".svg": "image/svg+xml",
|
|
2182
|
+
".bmp": "image/bmp"
|
|
2183
|
+
};
|
|
2184
|
+
function detectImageMediaType(filePath) {
|
|
2185
|
+
const ext = path5.extname(filePath).toLowerCase();
|
|
2186
|
+
return IMAGE_MEDIA_TYPES[ext];
|
|
2187
|
+
}
|
|
2150
2188
|
var ANSI_YELLOW4 = "\x1B[33m";
|
|
2151
2189
|
var ANSI_RESET5 = "\x1B[0m";
|
|
2152
2190
|
async function processMessages(options) {
|
|
@@ -2212,6 +2250,47 @@ async function processMessages(options) {
|
|
|
2212
2250
|
}
|
|
2213
2251
|
continue;
|
|
2214
2252
|
}
|
|
2253
|
+
if (segmentType === "image") {
|
|
2254
|
+
const rawValue = asString3(rawSegment.value);
|
|
2255
|
+
if (!rawValue) {
|
|
2256
|
+
continue;
|
|
2257
|
+
}
|
|
2258
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
|
|
2259
|
+
rawValue,
|
|
2260
|
+
searchRoots
|
|
2261
|
+
);
|
|
2262
|
+
if (!resolvedPath) {
|
|
2263
|
+
const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
|
|
2264
|
+
const context = messageType === "input" ? "" : " in expected_output";
|
|
2265
|
+
logWarning3(`Image file not found${context}: ${displayPath}`, attempts);
|
|
2266
|
+
continue;
|
|
2267
|
+
}
|
|
2268
|
+
const mediaType = detectImageMediaType(resolvedPath);
|
|
2269
|
+
if (!mediaType) {
|
|
2270
|
+
logWarning3(
|
|
2271
|
+
`Unsupported image extension for ${displayPath}. Supported: ${Object.keys(IMAGE_MEDIA_TYPES).join(", ")}`
|
|
2272
|
+
);
|
|
2273
|
+
continue;
|
|
2274
|
+
}
|
|
2275
|
+
try {
|
|
2276
|
+
const imageBuffer = await readFile4(resolvedPath);
|
|
2277
|
+
const base64 = imageBuffer.toString("base64");
|
|
2278
|
+
processedContent.push({
|
|
2279
|
+
type: "image",
|
|
2280
|
+
media_type: mediaType,
|
|
2281
|
+
source: `data:${mediaType};base64,${base64}`
|
|
2282
|
+
});
|
|
2283
|
+
if (verbose) {
|
|
2284
|
+
const label = messageType === "input" ? "[Image]" : "[Expected Output Image]";
|
|
2285
|
+
console.log(` ${label} Found: ${displayPath}`);
|
|
2286
|
+
console.log(` Resolved to: ${resolvedPath} (${mediaType})`);
|
|
2287
|
+
}
|
|
2288
|
+
} catch (error) {
|
|
2289
|
+
const context = messageType === "input" ? "" : " expected output";
|
|
2290
|
+
logWarning3(`Could not read${context} image ${resolvedPath}: ${error.message}`);
|
|
2291
|
+
}
|
|
2292
|
+
continue;
|
|
2293
|
+
}
|
|
2215
2294
|
const clonedSegment = cloneJsonObject(rawSegment);
|
|
2216
2295
|
processedContent.push(clonedSegment);
|
|
2217
2296
|
const inlineValue = clonedSegment.value;
|
|
@@ -2289,6 +2368,46 @@ async function processExpectedMessages(options) {
|
|
|
2289
2368
|
}
|
|
2290
2369
|
continue;
|
|
2291
2370
|
}
|
|
2371
|
+
if (segmentType === "image") {
|
|
2372
|
+
const rawValue = asString3(rawSegment.value);
|
|
2373
|
+
if (!rawValue) {
|
|
2374
|
+
continue;
|
|
2375
|
+
}
|
|
2376
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
|
|
2377
|
+
rawValue,
|
|
2378
|
+
searchRoots
|
|
2379
|
+
);
|
|
2380
|
+
if (!resolvedPath) {
|
|
2381
|
+
const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
|
|
2382
|
+
logWarning3(`Image file not found in expected_output: ${displayPath}`, attempts);
|
|
2383
|
+
continue;
|
|
2384
|
+
}
|
|
2385
|
+
const mediaType = detectImageMediaType(resolvedPath);
|
|
2386
|
+
if (!mediaType) {
|
|
2387
|
+
logWarning3(
|
|
2388
|
+
`Unsupported image extension for ${displayPath}. Supported: ${Object.keys(IMAGE_MEDIA_TYPES).join(", ")}`
|
|
2389
|
+
);
|
|
2390
|
+
continue;
|
|
2391
|
+
}
|
|
2392
|
+
try {
|
|
2393
|
+
const imageBuffer = await readFile4(resolvedPath);
|
|
2394
|
+
const base64 = imageBuffer.toString("base64");
|
|
2395
|
+
processedContent.push({
|
|
2396
|
+
type: "image",
|
|
2397
|
+
media_type: mediaType,
|
|
2398
|
+
source: `data:${mediaType};base64,${base64}`
|
|
2399
|
+
});
|
|
2400
|
+
if (verbose) {
|
|
2401
|
+
console.log(` [Expected Output Image] Found: ${displayPath}`);
|
|
2402
|
+
console.log(` Resolved to: ${resolvedPath} (${mediaType})`);
|
|
2403
|
+
}
|
|
2404
|
+
} catch (error) {
|
|
2405
|
+
logWarning3(
|
|
2406
|
+
`Could not read expected output image ${resolvedPath}: ${error.message}`
|
|
2407
|
+
);
|
|
2408
|
+
}
|
|
2409
|
+
continue;
|
|
2410
|
+
}
|
|
2292
2411
|
processedContent.push(cloneJsonObject(rawSegment));
|
|
2293
2412
|
}
|
|
2294
2413
|
segment.content = processedContent;
|
|
@@ -2535,7 +2654,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2535
2654
|
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
2536
2655
|
const testCase = {
|
|
2537
2656
|
id,
|
|
2538
|
-
|
|
2657
|
+
dataset: evalSetName,
|
|
2539
2658
|
conversation_id: conversationId,
|
|
2540
2659
|
question,
|
|
2541
2660
|
input: inputMessages,
|
|
@@ -2806,7 +2925,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2806
2925
|
}
|
|
2807
2926
|
const suite = interpolated;
|
|
2808
2927
|
const evalSetNameFromSuite = asString5(suite.name)?.trim();
|
|
2809
|
-
const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
2928
|
+
const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
|
|
2810
2929
|
const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
|
|
2811
2930
|
const rawTestcases = resolveTests(suite);
|
|
2812
2931
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
@@ -2927,7 +3046,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2927
3046
|
const caseTargets = extractTargetsFromTestCase(evalcase);
|
|
2928
3047
|
const testCase = {
|
|
2929
3048
|
id,
|
|
2930
|
-
|
|
3049
|
+
dataset: evalSetName,
|
|
3050
|
+
category: options?.category,
|
|
2931
3051
|
conversation_id: conversationId,
|
|
2932
3052
|
question,
|
|
2933
3053
|
input: inputMessages,
|
|
@@ -3851,6 +3971,49 @@ import { createWriteStream } from "node:fs";
|
|
|
3851
3971
|
import { mkdir } from "node:fs/promises";
|
|
3852
3972
|
import path10 from "node:path";
|
|
3853
3973
|
|
|
3974
|
+
// src/evaluation/providers/claude-content.ts
|
|
3975
|
+
function toContentArray(content) {
|
|
3976
|
+
if (!Array.isArray(content)) return void 0;
|
|
3977
|
+
let hasNonText = false;
|
|
3978
|
+
const blocks = [];
|
|
3979
|
+
for (const part of content) {
|
|
3980
|
+
if (!part || typeof part !== "object") continue;
|
|
3981
|
+
const p = part;
|
|
3982
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
3983
|
+
blocks.push({ type: "text", text: p.text });
|
|
3984
|
+
} else if (p.type === "image" && typeof p.source === "object" && p.source !== null) {
|
|
3985
|
+
const src = p.source;
|
|
3986
|
+
const mediaType = typeof p.media_type === "string" ? p.media_type : typeof src.media_type === "string" ? src.media_type : "application/octet-stream";
|
|
3987
|
+
const data = typeof src.data === "string" && src.data !== "" ? `data:${mediaType};base64,${src.data}` : typeof p.url === "string" && p.url !== "" ? p.url : "";
|
|
3988
|
+
if (!data) continue;
|
|
3989
|
+
blocks.push({ type: "image", media_type: mediaType, source: data });
|
|
3990
|
+
hasNonText = true;
|
|
3991
|
+
} else if (p.type === "tool_use") {
|
|
3992
|
+
} else if (p.type === "tool_result") {
|
|
3993
|
+
}
|
|
3994
|
+
}
|
|
3995
|
+
return hasNonText && blocks.length > 0 ? blocks : void 0;
|
|
3996
|
+
}
|
|
3997
|
+
function extractTextContent(content) {
|
|
3998
|
+
if (typeof content === "string") {
|
|
3999
|
+
return content;
|
|
4000
|
+
}
|
|
4001
|
+
if (!Array.isArray(content)) {
|
|
4002
|
+
return void 0;
|
|
4003
|
+
}
|
|
4004
|
+
const textParts = [];
|
|
4005
|
+
for (const part of content) {
|
|
4006
|
+
if (!part || typeof part !== "object") {
|
|
4007
|
+
continue;
|
|
4008
|
+
}
|
|
4009
|
+
const p = part;
|
|
4010
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
4011
|
+
textParts.push(p.text);
|
|
4012
|
+
}
|
|
4013
|
+
}
|
|
4014
|
+
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
4015
|
+
}
|
|
4016
|
+
|
|
3854
4017
|
// src/evaluation/providers/claude-log-tracker.ts
|
|
3855
4018
|
var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeLogs");
|
|
3856
4019
|
var GLOBAL_SUBSCRIBERS_KEY = Symbol.for("agentv.claudeLogSubscribers");
|
|
@@ -4016,11 +4179,12 @@ var ClaudeCliProvider = class {
|
|
|
4016
4179
|
if (betaMessage && typeof betaMessage === "object") {
|
|
4017
4180
|
const msg = betaMessage;
|
|
4018
4181
|
const content = msg.content;
|
|
4182
|
+
const structuredContent = toContentArray(content);
|
|
4019
4183
|
const textContent = extractTextContent(content);
|
|
4020
4184
|
const toolCalls = extractToolCalls(content);
|
|
4021
4185
|
const outputMsg = {
|
|
4022
4186
|
role: "assistant",
|
|
4023
|
-
content: textContent,
|
|
4187
|
+
content: structuredContent ?? textContent,
|
|
4024
4188
|
toolCalls: toolCalls.length > 0 ? toolCalls : void 0
|
|
4025
4189
|
};
|
|
4026
4190
|
output.push(outputMsg);
|
|
@@ -4359,25 +4523,6 @@ function summarizeEvent(event) {
|
|
|
4359
4523
|
return void 0;
|
|
4360
4524
|
}
|
|
4361
4525
|
}
|
|
4362
|
-
function extractTextContent(content) {
|
|
4363
|
-
if (typeof content === "string") {
|
|
4364
|
-
return content;
|
|
4365
|
-
}
|
|
4366
|
-
if (!Array.isArray(content)) {
|
|
4367
|
-
return void 0;
|
|
4368
|
-
}
|
|
4369
|
-
const textParts = [];
|
|
4370
|
-
for (const part of content) {
|
|
4371
|
-
if (!part || typeof part !== "object") {
|
|
4372
|
-
continue;
|
|
4373
|
-
}
|
|
4374
|
-
const p = part;
|
|
4375
|
-
if (p.type === "text" && typeof p.text === "string") {
|
|
4376
|
-
textParts.push(p.text);
|
|
4377
|
-
}
|
|
4378
|
-
}
|
|
4379
|
-
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
4380
|
-
}
|
|
4381
4526
|
function extractToolCalls(content) {
|
|
4382
4527
|
if (!Array.isArray(content)) {
|
|
4383
4528
|
return [];
|
|
@@ -4550,11 +4695,12 @@ var ClaudeSdkProvider = class {
|
|
|
4550
4695
|
if (betaMessage && typeof betaMessage === "object") {
|
|
4551
4696
|
const msg = betaMessage;
|
|
4552
4697
|
const content = msg.content;
|
|
4553
|
-
const
|
|
4698
|
+
const structuredContent = toContentArray(content);
|
|
4699
|
+
const textContent = extractTextContent(content);
|
|
4554
4700
|
const toolCalls = extractToolCalls2(content);
|
|
4555
4701
|
const outputMsg = {
|
|
4556
4702
|
role: "assistant",
|
|
4557
|
-
content: textContent,
|
|
4703
|
+
content: structuredContent ?? textContent,
|
|
4558
4704
|
toolCalls: toolCalls.length > 0 ? toolCalls : void 0
|
|
4559
4705
|
};
|
|
4560
4706
|
output.push(outputMsg);
|
|
@@ -4672,25 +4818,6 @@ var ClaudeSdkProvider = class {
|
|
|
4672
4818
|
}
|
|
4673
4819
|
}
|
|
4674
4820
|
};
|
|
4675
|
-
function extractTextContent2(content) {
|
|
4676
|
-
if (typeof content === "string") {
|
|
4677
|
-
return content;
|
|
4678
|
-
}
|
|
4679
|
-
if (!Array.isArray(content)) {
|
|
4680
|
-
return void 0;
|
|
4681
|
-
}
|
|
4682
|
-
const textParts = [];
|
|
4683
|
-
for (const part of content) {
|
|
4684
|
-
if (!part || typeof part !== "object") {
|
|
4685
|
-
continue;
|
|
4686
|
-
}
|
|
4687
|
-
const p = part;
|
|
4688
|
-
if (p.type === "text" && typeof p.text === "string") {
|
|
4689
|
-
textParts.push(p.text);
|
|
4690
|
-
}
|
|
4691
|
-
}
|
|
4692
|
-
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
4693
|
-
}
|
|
4694
4821
|
function extractToolCalls2(content) {
|
|
4695
4822
|
if (!Array.isArray(content)) {
|
|
4696
4823
|
return [];
|
|
@@ -4914,7 +5041,7 @@ function convertMessages(messages) {
|
|
|
4914
5041
|
return messages.map((msg) => ({
|
|
4915
5042
|
role: msg.role,
|
|
4916
5043
|
name: msg.name,
|
|
4917
|
-
content: msg.content,
|
|
5044
|
+
content: isContentArray(msg.content) ? msg.content : typeof msg.content === "string" ? msg.content : void 0,
|
|
4918
5045
|
toolCalls: msg.tool_calls?.map((tc) => ({
|
|
4919
5046
|
tool: tc.tool,
|
|
4920
5047
|
input: tc.input,
|
|
@@ -7167,6 +7294,35 @@ function extractPiTextContent(content) {
|
|
|
7167
7294
|
}
|
|
7168
7295
|
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
7169
7296
|
}
|
|
7297
|
+
function toPiContentArray(content) {
|
|
7298
|
+
if (!Array.isArray(content)) return void 0;
|
|
7299
|
+
let hasNonText = false;
|
|
7300
|
+
const blocks = [];
|
|
7301
|
+
for (const part of content) {
|
|
7302
|
+
if (!part || typeof part !== "object") continue;
|
|
7303
|
+
const p = part;
|
|
7304
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
7305
|
+
blocks.push({ type: "text", text: p.text });
|
|
7306
|
+
} else if (p.type === "image") {
|
|
7307
|
+
const mediaType = typeof p.media_type === "string" ? p.media_type : "application/octet-stream";
|
|
7308
|
+
let source = "";
|
|
7309
|
+
if (typeof p.source === "object" && p.source !== null) {
|
|
7310
|
+
const src = p.source;
|
|
7311
|
+
const srcMediaType = typeof src.media_type === "string" ? src.media_type : mediaType;
|
|
7312
|
+
source = typeof src.data === "string" ? `data:${srcMediaType};base64,${src.data}` : "";
|
|
7313
|
+
}
|
|
7314
|
+
if (!source && typeof p.url === "string") {
|
|
7315
|
+
source = p.url;
|
|
7316
|
+
}
|
|
7317
|
+
if (source) {
|
|
7318
|
+
blocks.push({ type: "image", media_type: mediaType, source });
|
|
7319
|
+
hasNonText = true;
|
|
7320
|
+
}
|
|
7321
|
+
} else if (p.type === "tool_use" || p.type === "tool_result") {
|
|
7322
|
+
}
|
|
7323
|
+
}
|
|
7324
|
+
return hasNonText && blocks.length > 0 ? blocks : void 0;
|
|
7325
|
+
}
|
|
7170
7326
|
function toFiniteNumber(value) {
|
|
7171
7327
|
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
7172
7328
|
return void 0;
|
|
@@ -8337,7 +8493,8 @@ function convertAgentMessage(message, toolTrackers, completedToolResults) {
|
|
|
8337
8493
|
}
|
|
8338
8494
|
const msg = message;
|
|
8339
8495
|
const role = typeof msg.role === "string" ? msg.role : "unknown";
|
|
8340
|
-
const
|
|
8496
|
+
const structuredContent = toPiContentArray(msg.content);
|
|
8497
|
+
const content = structuredContent ?? extractPiTextContent(msg.content);
|
|
8341
8498
|
const toolCalls = extractToolCalls4(msg.content, toolTrackers, completedToolResults);
|
|
8342
8499
|
const startTimeVal = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
8343
8500
|
let msgTokenUsage;
|
|
@@ -10187,13 +10344,13 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
10187
10344
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
10188
10345
|
const { mkdir: mkdir16, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
10189
10346
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
10190
|
-
const
|
|
10347
|
+
const path48 = await import("node:path");
|
|
10191
10348
|
const { randomUUID: randomUUID10 } = await import("node:crypto");
|
|
10192
|
-
const dir =
|
|
10349
|
+
const dir = path48.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
10193
10350
|
await mkdir16(dir, { recursive: true });
|
|
10194
|
-
const stdinPath =
|
|
10195
|
-
const stdoutPath =
|
|
10196
|
-
const stderrPath =
|
|
10351
|
+
const stdinPath = path48.join(dir, "stdin.txt");
|
|
10352
|
+
const stdoutPath = path48.join(dir, "stdout.txt");
|
|
10353
|
+
const stderrPath = path48.join(dir, "stderr.txt");
|
|
10197
10354
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
10198
10355
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
10199
10356
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -10509,6 +10666,56 @@ function toCamelCaseDeep(obj) {
|
|
|
10509
10666
|
|
|
10510
10667
|
// src/evaluation/evaluators/code-evaluator.ts
|
|
10511
10668
|
var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
|
|
10669
|
+
var DATA_URI_RE = /^data:([^;]+);base64,(.+)$/s;
|
|
10670
|
+
async function materializeContentForGrader(messages, getWorkDir) {
|
|
10671
|
+
if (!messages || messages.length === 0) return messages ?? null;
|
|
10672
|
+
let hasAnyImage = false;
|
|
10673
|
+
for (const msg of messages) {
|
|
10674
|
+
if (isContentArray(msg.content)) {
|
|
10675
|
+
for (const block of msg.content) {
|
|
10676
|
+
if (block.type === "image") {
|
|
10677
|
+
hasAnyImage = true;
|
|
10678
|
+
break;
|
|
10679
|
+
}
|
|
10680
|
+
}
|
|
10681
|
+
}
|
|
10682
|
+
if (hasAnyImage) break;
|
|
10683
|
+
}
|
|
10684
|
+
if (!hasAnyImage) return messages;
|
|
10685
|
+
let counter = 0;
|
|
10686
|
+
const result = [];
|
|
10687
|
+
for (const msg of messages) {
|
|
10688
|
+
if (!isContentArray(msg.content)) {
|
|
10689
|
+
result.push(msg);
|
|
10690
|
+
continue;
|
|
10691
|
+
}
|
|
10692
|
+
if (!msg.content.some((b) => b.type === "image")) {
|
|
10693
|
+
result.push(msg);
|
|
10694
|
+
continue;
|
|
10695
|
+
}
|
|
10696
|
+
const blocks = [];
|
|
10697
|
+
for (const block of msg.content) {
|
|
10698
|
+
if (block.type !== "image") {
|
|
10699
|
+
blocks.push({ ...block });
|
|
10700
|
+
continue;
|
|
10701
|
+
}
|
|
10702
|
+
const img = block;
|
|
10703
|
+
const match = DATA_URI_RE.exec(img.source);
|
|
10704
|
+
if (match) {
|
|
10705
|
+
const [, mediaType, base64Data] = match;
|
|
10706
|
+
const ext = mediaType.split("/")[1] === "jpeg" ? "jpg" : mediaType.split("/")[1] ?? "bin";
|
|
10707
|
+
const dir = await getWorkDir();
|
|
10708
|
+
const filePath = join(dir, `img-${counter++}.${ext}`);
|
|
10709
|
+
await writeFile6(filePath, Buffer.from(base64Data, "base64"));
|
|
10710
|
+
blocks.push({ type: "image", media_type: img.media_type, path: filePath });
|
|
10711
|
+
} else {
|
|
10712
|
+
blocks.push({ type: "image", media_type: img.media_type, path: img.source });
|
|
10713
|
+
}
|
|
10714
|
+
}
|
|
10715
|
+
result.push({ ...msg, content: blocks });
|
|
10716
|
+
}
|
|
10717
|
+
return result;
|
|
10718
|
+
}
|
|
10512
10719
|
var CodeEvaluator = class {
|
|
10513
10720
|
kind = "code-grader";
|
|
10514
10721
|
command;
|
|
@@ -10524,7 +10731,18 @@ var CodeEvaluator = class {
|
|
|
10524
10731
|
this.target = options.target;
|
|
10525
10732
|
}
|
|
10526
10733
|
async evaluate(context) {
|
|
10527
|
-
let
|
|
10734
|
+
let imageTmpDir;
|
|
10735
|
+
const getImageDir = async () => {
|
|
10736
|
+
if (!imageTmpDir) {
|
|
10737
|
+
imageTmpDir = await mkdtemp2(join(tmpdir2(), "agentv-img-"));
|
|
10738
|
+
}
|
|
10739
|
+
return imageTmpDir;
|
|
10740
|
+
};
|
|
10741
|
+
const materializedOutput = await materializeContentForGrader(
|
|
10742
|
+
context.output,
|
|
10743
|
+
getImageDir
|
|
10744
|
+
);
|
|
10745
|
+
let outputForPayload = materializedOutput;
|
|
10528
10746
|
let outputPath;
|
|
10529
10747
|
if (outputForPayload) {
|
|
10530
10748
|
const serialized = JSON.stringify(outputForPayload);
|
|
@@ -10537,12 +10755,17 @@ var CodeEvaluator = class {
|
|
|
10537
10755
|
}
|
|
10538
10756
|
const payload = {
|
|
10539
10757
|
criteria: context.evalCase.criteria,
|
|
10540
|
-
expectedOutput:
|
|
10541
|
-
|
|
10758
|
+
expectedOutput: await materializeContentForGrader(
|
|
10759
|
+
context.evalCase.expected_output,
|
|
10760
|
+
getImageDir
|
|
10761
|
+
),
|
|
10542
10762
|
output: outputForPayload,
|
|
10543
10763
|
outputPath,
|
|
10544
10764
|
inputFiles: context.evalCase.file_paths,
|
|
10545
|
-
input:
|
|
10765
|
+
input: await materializeContentForGrader(
|
|
10766
|
+
context.evalCase.input,
|
|
10767
|
+
getImageDir
|
|
10768
|
+
),
|
|
10546
10769
|
trace: context.trace ?? null,
|
|
10547
10770
|
tokenUsage: context.tokenUsage ?? null,
|
|
10548
10771
|
costUsd: context.costUsd ?? null,
|
|
@@ -10551,9 +10774,7 @@ var CodeEvaluator = class {
|
|
|
10551
10774
|
endTime: context.endTime ?? null,
|
|
10552
10775
|
fileChanges: context.fileChanges ?? null,
|
|
10553
10776
|
workspacePath: context.workspacePath ?? null,
|
|
10554
|
-
config: this.config ?? null
|
|
10555
|
-
inputText: context.evalCase.question,
|
|
10556
|
-
expectedOutputText: context.evalCase.reference_answer ?? ""
|
|
10777
|
+
config: this.config ?? null
|
|
10557
10778
|
};
|
|
10558
10779
|
const inputPayload = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
10559
10780
|
let proxyEnv;
|
|
@@ -10643,6 +10864,10 @@ var CodeEvaluator = class {
|
|
|
10643
10864
|
await rm3(dirname(outputPath), { recursive: true, force: true }).catch(() => {
|
|
10644
10865
|
});
|
|
10645
10866
|
}
|
|
10867
|
+
if (imageTmpDir) {
|
|
10868
|
+
await rm3(imageTmpDir, { recursive: true, force: true }).catch(() => {
|
|
10869
|
+
});
|
|
10870
|
+
}
|
|
10646
10871
|
}
|
|
10647
10872
|
}
|
|
10648
10873
|
};
|
|
@@ -10720,13 +10945,13 @@ Be concise and focused in your evaluation. Provide succinct, specific feedback r
|
|
|
10720
10945
|
{{${TEMPLATE_VARIABLES.CRITERIA}}}
|
|
10721
10946
|
|
|
10722
10947
|
[[ ## question ## ]]
|
|
10723
|
-
{{${TEMPLATE_VARIABLES.
|
|
10948
|
+
{{${TEMPLATE_VARIABLES.INPUT}}}
|
|
10724
10949
|
|
|
10725
10950
|
[[ ## reference_answer ## ]]
|
|
10726
|
-
{{${TEMPLATE_VARIABLES.
|
|
10951
|
+
{{${TEMPLATE_VARIABLES.EXPECTED_OUTPUT}}}
|
|
10727
10952
|
|
|
10728
10953
|
[[ ## answer ## ]]
|
|
10729
|
-
{{${TEMPLATE_VARIABLES.
|
|
10954
|
+
{{${TEMPLATE_VARIABLES.OUTPUT}}}`;
|
|
10730
10955
|
var freeformEvaluationSchema = z3.object({
|
|
10731
10956
|
score: z3.number().min(0).max(1).describe("Score between 0.0 and 1.0"),
|
|
10732
10957
|
assertions: z3.array(
|
|
@@ -10798,21 +11023,19 @@ var LlmGraderEvaluator = class {
|
|
|
10798
11023
|
async evaluateFreeform(context, graderProvider) {
|
|
10799
11024
|
const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
|
|
10800
11025
|
const variables = {
|
|
10801
|
-
[TEMPLATE_VARIABLES.INPUT]:
|
|
10802
|
-
[TEMPLATE_VARIABLES.
|
|
10803
|
-
|
|
10804
|
-
null,
|
|
10805
|
-
2
|
|
10806
|
-
),
|
|
10807
|
-
[TEMPLATE_VARIABLES.OUTPUT]: JSON.stringify(context.output ?? [], null, 2),
|
|
11026
|
+
[TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
|
|
11027
|
+
[TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
|
|
11028
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? "").trim(),
|
|
10808
11029
|
[TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
|
|
10809
11030
|
[TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? "",
|
|
11031
|
+
// Deprecated aliases — same values as the primary variables above
|
|
10810
11032
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
10811
11033
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
|
|
10812
11034
|
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
|
|
10813
11035
|
};
|
|
10814
11036
|
const systemPrompt = buildOutputSchema();
|
|
10815
11037
|
const evaluatorTemplate = context.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
|
|
11038
|
+
warnDeprecatedTemplateVars(evaluatorTemplate);
|
|
10816
11039
|
let userPrompt = substituteVariables(evaluatorTemplate, variables);
|
|
10817
11040
|
if (context.fileChanges && !context.evaluatorTemplateOverride && !this.evaluatorTemplate) {
|
|
10818
11041
|
userPrompt += `
|
|
@@ -10824,13 +11047,15 @@ ${context.fileChanges}`;
|
|
|
10824
11047
|
userPrompt,
|
|
10825
11048
|
systemPrompt
|
|
10826
11049
|
};
|
|
11050
|
+
const images = context.output ? extractImageBlocks(context.output) : [];
|
|
10827
11051
|
try {
|
|
10828
11052
|
const { data, tokenUsage } = await this.runWithRetry({
|
|
10829
11053
|
context,
|
|
10830
11054
|
graderProvider,
|
|
10831
11055
|
systemPrompt,
|
|
10832
11056
|
userPrompt,
|
|
10833
|
-
schema: freeformEvaluationSchema
|
|
11057
|
+
schema: freeformEvaluationSchema,
|
|
11058
|
+
images
|
|
10834
11059
|
});
|
|
10835
11060
|
const score = clampScore(data.score);
|
|
10836
11061
|
const assertions = Array.isArray(data.assertions) ? data.assertions.slice(0, 8) : [];
|
|
@@ -10874,13 +11099,15 @@ ${context.fileChanges}`;
|
|
|
10874
11099
|
userPrompt: prompt,
|
|
10875
11100
|
systemPrompt
|
|
10876
11101
|
};
|
|
11102
|
+
const images = context.output ? extractImageBlocks(context.output) : [];
|
|
10877
11103
|
try {
|
|
10878
11104
|
const { data, tokenUsage } = await this.runWithRetry({
|
|
10879
11105
|
context,
|
|
10880
11106
|
graderProvider,
|
|
10881
11107
|
systemPrompt,
|
|
10882
11108
|
userPrompt: prompt,
|
|
10883
|
-
schema: rubricEvaluationSchema
|
|
11109
|
+
schema: rubricEvaluationSchema,
|
|
11110
|
+
images
|
|
10884
11111
|
});
|
|
10885
11112
|
const { score, verdict, assertions } = calculateRubricScore(data, rubrics);
|
|
10886
11113
|
return {
|
|
@@ -10917,13 +11144,15 @@ ${context.fileChanges}`;
|
|
|
10917
11144
|
userPrompt: prompt,
|
|
10918
11145
|
systemPrompt
|
|
10919
11146
|
};
|
|
11147
|
+
const images = context.output ? extractImageBlocks(context.output) : [];
|
|
10920
11148
|
try {
|
|
10921
11149
|
const { data, tokenUsage } = await this.runWithRetry({
|
|
10922
11150
|
context,
|
|
10923
11151
|
graderProvider,
|
|
10924
11152
|
systemPrompt,
|
|
10925
11153
|
userPrompt: prompt,
|
|
10926
|
-
schema: scoreRangeEvaluationSchema
|
|
11154
|
+
schema: scoreRangeEvaluationSchema,
|
|
11155
|
+
images
|
|
10927
11156
|
});
|
|
10928
11157
|
const { score, verdict, assertions, details } = calculateScoreRangeResult(data, rubrics);
|
|
10929
11158
|
return {
|
|
@@ -11130,12 +11359,17 @@ ${context.fileChanges}`;
|
|
|
11130
11359
|
const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
|
|
11131
11360
|
const variables = {
|
|
11132
11361
|
[TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
|
|
11362
|
+
[TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
|
|
11363
|
+
[TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
|
|
11364
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? "").trim(),
|
|
11365
|
+
[TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? "",
|
|
11366
|
+
// Deprecated aliases
|
|
11133
11367
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
11134
11368
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
|
|
11135
|
-
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
|
|
11136
|
-
[TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? ""
|
|
11369
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
|
|
11137
11370
|
};
|
|
11138
11371
|
if (this.evaluatorTemplate) {
|
|
11372
|
+
warnDeprecatedTemplateVars(this.evaluatorTemplate);
|
|
11139
11373
|
return substituteVariables(this.evaluatorTemplate, variables);
|
|
11140
11374
|
}
|
|
11141
11375
|
const config = context.evaluator;
|
|
@@ -11186,11 +11420,16 @@ ${context.fileChanges}`;
|
|
|
11186
11420
|
if (this.evaluatorTemplate) {
|
|
11187
11421
|
const variables = {
|
|
11188
11422
|
[TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
|
|
11423
|
+
[TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
|
|
11424
|
+
[TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
|
|
11425
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? "").trim(),
|
|
11426
|
+
[TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? "",
|
|
11427
|
+
// Deprecated aliases
|
|
11189
11428
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
11190
11429
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
|
|
11191
|
-
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
|
|
11192
|
-
[TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? ""
|
|
11430
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
|
|
11193
11431
|
};
|
|
11432
|
+
warnDeprecatedTemplateVars(this.evaluatorTemplate);
|
|
11194
11433
|
const customPrompt = substituteVariables(this.evaluatorTemplate, variables);
|
|
11195
11434
|
const outputSchema = rubrics && rubrics.length > 0 ? buildRubricOutputSchema() : buildOutputSchema();
|
|
11196
11435
|
return `${customPrompt}
|
|
@@ -11361,18 +11600,35 @@ ${outputSchema}`;
|
|
|
11361
11600
|
// LLM mode retry logic
|
|
11362
11601
|
// ---------------------------------------------------------------------------
|
|
11363
11602
|
async runWithRetry(options) {
|
|
11364
|
-
const { context, graderProvider, systemPrompt, userPrompt, schema } = options;
|
|
11603
|
+
const { context, graderProvider, systemPrompt, userPrompt, schema, images } = options;
|
|
11365
11604
|
let lastError;
|
|
11366
11605
|
for (let attempt = 1; attempt <= 3; attempt++) {
|
|
11367
11606
|
try {
|
|
11368
11607
|
const model = graderProvider.asLanguageModel?.();
|
|
11369
11608
|
if (model) {
|
|
11370
|
-
const
|
|
11609
|
+
const modelOptions = {
|
|
11610
|
+
...this.maxOutputTokens ? { maxTokens: this.maxOutputTokens } : {},
|
|
11611
|
+
...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
|
|
11612
|
+
};
|
|
11613
|
+
const hasImages = images && images.length > 0;
|
|
11614
|
+
const result = hasImages ? await generateText2({
|
|
11615
|
+
model,
|
|
11616
|
+
system: systemPrompt,
|
|
11617
|
+
messages: [
|
|
11618
|
+
{
|
|
11619
|
+
role: "user",
|
|
11620
|
+
content: [
|
|
11621
|
+
{ type: "text", text: userPrompt },
|
|
11622
|
+
...toAiSdkImageParts(images)
|
|
11623
|
+
]
|
|
11624
|
+
}
|
|
11625
|
+
],
|
|
11626
|
+
...modelOptions
|
|
11627
|
+
}) : await generateText2({
|
|
11371
11628
|
model,
|
|
11372
11629
|
system: systemPrompt,
|
|
11373
11630
|
prompt: userPrompt,
|
|
11374
|
-
...
|
|
11375
|
-
...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
|
|
11631
|
+
...modelOptions
|
|
11376
11632
|
});
|
|
11377
11633
|
const data2 = schema.parse(parseJsonFromText(result.text));
|
|
11378
11634
|
const rawUsage = result.usage;
|
|
@@ -11432,6 +11688,26 @@ function substituteVariables(template, variables) {
|
|
|
11432
11688
|
return variables[varName] ?? match;
|
|
11433
11689
|
});
|
|
11434
11690
|
}
|
|
11691
|
+
var ANSI_YELLOW7 = "\x1B[33m";
|
|
11692
|
+
var ANSI_RESET8 = "\x1B[0m";
|
|
11693
|
+
var warnedTemplateStrings = /* @__PURE__ */ new Set();
|
|
11694
|
+
function warnDeprecatedTemplateVars(template) {
|
|
11695
|
+
if (warnedTemplateStrings.has(template)) return;
|
|
11696
|
+
const used = [];
|
|
11697
|
+
for (const [deprecated, replacement] of DEPRECATED_TEMPLATE_VARIABLES) {
|
|
11698
|
+
if (new RegExp(`\\{\\{\\s*${deprecated}\\s*\\}\\}`).test(template)) {
|
|
11699
|
+
used.push(`{{ ${deprecated} }} \u2192 {{ ${replacement} }}`);
|
|
11700
|
+
}
|
|
11701
|
+
}
|
|
11702
|
+
if (used.length > 0) {
|
|
11703
|
+
warnedTemplateStrings.add(template);
|
|
11704
|
+
console.warn(
|
|
11705
|
+
`${ANSI_YELLOW7}\u26A0 Deprecated template variables detected (they still work but will be removed in a future version):
|
|
11706
|
+
${used.join("\n ")}
|
|
11707
|
+
Update your custom evaluator template to use the new names.${ANSI_RESET8}`
|
|
11708
|
+
);
|
|
11709
|
+
}
|
|
11710
|
+
}
|
|
11435
11711
|
function calculateRubricScore(result, rubrics) {
|
|
11436
11712
|
const rubricMap = new Map(rubrics.map((rubric) => [rubric.id, rubric]));
|
|
11437
11713
|
const assertions = [];
|
|
@@ -11526,6 +11802,26 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
11526
11802
|
}
|
|
11527
11803
|
};
|
|
11528
11804
|
}
|
|
11805
|
+
function extractImageBlocks(messages) {
|
|
11806
|
+
const images = [];
|
|
11807
|
+
for (const msg of messages) {
|
|
11808
|
+
if (msg.role !== "assistant") continue;
|
|
11809
|
+
if (!isContentArray(msg.content)) continue;
|
|
11810
|
+
for (const block of msg.content) {
|
|
11811
|
+
if (block.type === "image") {
|
|
11812
|
+
images.push(block);
|
|
11813
|
+
}
|
|
11814
|
+
}
|
|
11815
|
+
}
|
|
11816
|
+
return images;
|
|
11817
|
+
}
|
|
11818
|
+
function toAiSdkImageParts(images) {
|
|
11819
|
+
return images.map((img) => ({
|
|
11820
|
+
type: "image",
|
|
11821
|
+
image: img.source,
|
|
11822
|
+
mediaType: img.media_type || void 0
|
|
11823
|
+
}));
|
|
11824
|
+
}
|
|
11529
11825
|
function resolveSandboxed(basePath, relativePath) {
|
|
11530
11826
|
const resolved = path35.resolve(basePath, relativePath);
|
|
11531
11827
|
if (!resolved.startsWith(basePath + path35.sep) && resolved !== basePath) {
|
|
@@ -12267,115 +12563,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
12267
12563
|
* Evaluate a single field against the expected value.
|
|
12268
12564
|
*/
|
|
12269
12565
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
12270
|
-
const { path:
|
|
12271
|
-
const candidateValue = resolvePath(candidateData,
|
|
12272
|
-
const expectedValue = resolvePath(expectedData,
|
|
12566
|
+
const { path: path48, match, required = true, weight = 1 } = fieldConfig;
|
|
12567
|
+
const candidateValue = resolvePath(candidateData, path48);
|
|
12568
|
+
const expectedValue = resolvePath(expectedData, path48);
|
|
12273
12569
|
if (expectedValue === void 0) {
|
|
12274
12570
|
return {
|
|
12275
|
-
path:
|
|
12571
|
+
path: path48,
|
|
12276
12572
|
score: 1,
|
|
12277
12573
|
// No expected value means no comparison needed
|
|
12278
12574
|
weight,
|
|
12279
12575
|
hit: true,
|
|
12280
|
-
message: `${
|
|
12576
|
+
message: `${path48}: no expected value`
|
|
12281
12577
|
};
|
|
12282
12578
|
}
|
|
12283
12579
|
if (candidateValue === void 0) {
|
|
12284
12580
|
if (required) {
|
|
12285
12581
|
return {
|
|
12286
|
-
path:
|
|
12582
|
+
path: path48,
|
|
12287
12583
|
score: 0,
|
|
12288
12584
|
weight,
|
|
12289
12585
|
hit: false,
|
|
12290
|
-
message: `${
|
|
12586
|
+
message: `${path48} (required, missing)`
|
|
12291
12587
|
};
|
|
12292
12588
|
}
|
|
12293
12589
|
return {
|
|
12294
|
-
path:
|
|
12590
|
+
path: path48,
|
|
12295
12591
|
score: 1,
|
|
12296
12592
|
// Don't penalize missing optional fields
|
|
12297
12593
|
weight: 0,
|
|
12298
12594
|
// Zero weight means it won't affect the score
|
|
12299
12595
|
hit: true,
|
|
12300
|
-
message: `${
|
|
12596
|
+
message: `${path48}: optional field missing`
|
|
12301
12597
|
};
|
|
12302
12598
|
}
|
|
12303
12599
|
switch (match) {
|
|
12304
12600
|
case "exact":
|
|
12305
|
-
return this.compareExact(
|
|
12601
|
+
return this.compareExact(path48, candidateValue, expectedValue, weight);
|
|
12306
12602
|
case "numeric_tolerance":
|
|
12307
12603
|
return this.compareNumericTolerance(
|
|
12308
|
-
|
|
12604
|
+
path48,
|
|
12309
12605
|
candidateValue,
|
|
12310
12606
|
expectedValue,
|
|
12311
12607
|
fieldConfig,
|
|
12312
12608
|
weight
|
|
12313
12609
|
);
|
|
12314
12610
|
case "date":
|
|
12315
|
-
return this.compareDate(
|
|
12611
|
+
return this.compareDate(path48, candidateValue, expectedValue, fieldConfig, weight);
|
|
12316
12612
|
default:
|
|
12317
12613
|
return {
|
|
12318
|
-
path:
|
|
12614
|
+
path: path48,
|
|
12319
12615
|
score: 0,
|
|
12320
12616
|
weight,
|
|
12321
12617
|
hit: false,
|
|
12322
|
-
message: `${
|
|
12618
|
+
message: `${path48}: unknown match type "${match}"`
|
|
12323
12619
|
};
|
|
12324
12620
|
}
|
|
12325
12621
|
}
|
|
12326
12622
|
/**
|
|
12327
12623
|
* Exact equality comparison.
|
|
12328
12624
|
*/
|
|
12329
|
-
compareExact(
|
|
12625
|
+
compareExact(path48, candidateValue, expectedValue, weight) {
|
|
12330
12626
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
12331
12627
|
return {
|
|
12332
|
-
path:
|
|
12628
|
+
path: path48,
|
|
12333
12629
|
score: 1,
|
|
12334
12630
|
weight,
|
|
12335
12631
|
hit: true,
|
|
12336
|
-
message:
|
|
12632
|
+
message: path48
|
|
12337
12633
|
};
|
|
12338
12634
|
}
|
|
12339
12635
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
12340
12636
|
return {
|
|
12341
|
-
path:
|
|
12637
|
+
path: path48,
|
|
12342
12638
|
score: 0,
|
|
12343
12639
|
weight,
|
|
12344
12640
|
hit: false,
|
|
12345
|
-
message: `${
|
|
12641
|
+
message: `${path48} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
12346
12642
|
};
|
|
12347
12643
|
}
|
|
12348
12644
|
return {
|
|
12349
|
-
path:
|
|
12645
|
+
path: path48,
|
|
12350
12646
|
score: 0,
|
|
12351
12647
|
weight,
|
|
12352
12648
|
hit: false,
|
|
12353
|
-
message: `${
|
|
12649
|
+
message: `${path48} (value mismatch)`
|
|
12354
12650
|
};
|
|
12355
12651
|
}
|
|
12356
12652
|
/**
|
|
12357
12653
|
* Numeric comparison with absolute or relative tolerance.
|
|
12358
12654
|
*/
|
|
12359
|
-
compareNumericTolerance(
|
|
12655
|
+
compareNumericTolerance(path48, candidateValue, expectedValue, fieldConfig, weight) {
|
|
12360
12656
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
12361
12657
|
const candidateNum = toNumber(candidateValue);
|
|
12362
12658
|
const expectedNum = toNumber(expectedValue);
|
|
12363
12659
|
if (candidateNum === null || expectedNum === null) {
|
|
12364
12660
|
return {
|
|
12365
|
-
path:
|
|
12661
|
+
path: path48,
|
|
12366
12662
|
score: 0,
|
|
12367
12663
|
weight,
|
|
12368
12664
|
hit: false,
|
|
12369
|
-
message: `${
|
|
12665
|
+
message: `${path48} (non-numeric value)`
|
|
12370
12666
|
};
|
|
12371
12667
|
}
|
|
12372
12668
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
12373
12669
|
return {
|
|
12374
|
-
path:
|
|
12670
|
+
path: path48,
|
|
12375
12671
|
score: 0,
|
|
12376
12672
|
weight,
|
|
12377
12673
|
hit: false,
|
|
12378
|
-
message: `${
|
|
12674
|
+
message: `${path48} (invalid numeric value)`
|
|
12379
12675
|
};
|
|
12380
12676
|
}
|
|
12381
12677
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -12388,61 +12684,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
12388
12684
|
}
|
|
12389
12685
|
if (withinTolerance) {
|
|
12390
12686
|
return {
|
|
12391
|
-
path:
|
|
12687
|
+
path: path48,
|
|
12392
12688
|
score: 1,
|
|
12393
12689
|
weight,
|
|
12394
12690
|
hit: true,
|
|
12395
|
-
message: `${
|
|
12691
|
+
message: `${path48} (within tolerance: diff=${diff.toFixed(2)})`
|
|
12396
12692
|
};
|
|
12397
12693
|
}
|
|
12398
12694
|
return {
|
|
12399
|
-
path:
|
|
12695
|
+
path: path48,
|
|
12400
12696
|
score: 0,
|
|
12401
12697
|
weight,
|
|
12402
12698
|
hit: false,
|
|
12403
|
-
message: `${
|
|
12699
|
+
message: `${path48} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
12404
12700
|
};
|
|
12405
12701
|
}
|
|
12406
12702
|
/**
|
|
12407
12703
|
* Date comparison with format normalization.
|
|
12408
12704
|
*/
|
|
12409
|
-
compareDate(
|
|
12705
|
+
compareDate(path48, candidateValue, expectedValue, fieldConfig, weight) {
|
|
12410
12706
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
12411
12707
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
12412
12708
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
12413
12709
|
if (candidateDate === null) {
|
|
12414
12710
|
return {
|
|
12415
|
-
path:
|
|
12711
|
+
path: path48,
|
|
12416
12712
|
score: 0,
|
|
12417
12713
|
weight,
|
|
12418
12714
|
hit: false,
|
|
12419
|
-
message: `${
|
|
12715
|
+
message: `${path48} (unparseable candidate date)`
|
|
12420
12716
|
};
|
|
12421
12717
|
}
|
|
12422
12718
|
if (expectedDate === null) {
|
|
12423
12719
|
return {
|
|
12424
|
-
path:
|
|
12720
|
+
path: path48,
|
|
12425
12721
|
score: 0,
|
|
12426
12722
|
weight,
|
|
12427
12723
|
hit: false,
|
|
12428
|
-
message: `${
|
|
12724
|
+
message: `${path48} (unparseable expected date)`
|
|
12429
12725
|
};
|
|
12430
12726
|
}
|
|
12431
12727
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
12432
12728
|
return {
|
|
12433
|
-
path:
|
|
12729
|
+
path: path48,
|
|
12434
12730
|
score: 1,
|
|
12435
12731
|
weight,
|
|
12436
12732
|
hit: true,
|
|
12437
|
-
message:
|
|
12733
|
+
message: path48
|
|
12438
12734
|
};
|
|
12439
12735
|
}
|
|
12440
12736
|
return {
|
|
12441
|
-
path:
|
|
12737
|
+
path: path48,
|
|
12442
12738
|
score: 0,
|
|
12443
12739
|
weight,
|
|
12444
12740
|
hit: false,
|
|
12445
|
-
message: `${
|
|
12741
|
+
message: `${path48} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
12446
12742
|
};
|
|
12447
12743
|
}
|
|
12448
12744
|
/**
|
|
@@ -12475,11 +12771,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
12475
12771
|
};
|
|
12476
12772
|
}
|
|
12477
12773
|
};
|
|
12478
|
-
function resolvePath(obj,
|
|
12479
|
-
if (!
|
|
12774
|
+
function resolvePath(obj, path48) {
|
|
12775
|
+
if (!path48 || !obj) {
|
|
12480
12776
|
return void 0;
|
|
12481
12777
|
}
|
|
12482
|
-
const parts =
|
|
12778
|
+
const parts = path48.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
12483
12779
|
let current = obj;
|
|
12484
12780
|
for (const part of parts) {
|
|
12485
12781
|
if (current === null || current === void 0) {
|
|
@@ -12771,11 +13067,12 @@ function assembleLlmGraderPrompt(input) {
|
|
|
12771
13067
|
function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, evaluatorTemplateOverride) {
|
|
12772
13068
|
const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
|
|
12773
13069
|
const variables = {
|
|
12774
|
-
[TEMPLATE_VARIABLES.INPUT]:
|
|
12775
|
-
[TEMPLATE_VARIABLES.
|
|
12776
|
-
[TEMPLATE_VARIABLES.
|
|
13070
|
+
[TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
|
|
13071
|
+
[TEMPLATE_VARIABLES.OUTPUT]: candidate.trim(),
|
|
13072
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (evalCase.reference_answer ?? "").trim(),
|
|
12777
13073
|
[TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
|
|
12778
13074
|
[TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? "",
|
|
13075
|
+
// Deprecated aliases
|
|
12779
13076
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
12780
13077
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
|
|
12781
13078
|
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (evalCase.reference_answer ?? "").trim()
|
|
@@ -12962,8 +13259,8 @@ var TokenUsageEvaluator = class {
|
|
|
12962
13259
|
};
|
|
12963
13260
|
|
|
12964
13261
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
12965
|
-
function getNestedValue(obj,
|
|
12966
|
-
const parts =
|
|
13262
|
+
function getNestedValue(obj, path48) {
|
|
13263
|
+
const parts = path48.split(".");
|
|
12967
13264
|
let current = obj;
|
|
12968
13265
|
for (const part of parts) {
|
|
12969
13266
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -13832,16 +14129,13 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
13832
14129
|
const payload = {
|
|
13833
14130
|
criteria: context.evalCase.criteria,
|
|
13834
14131
|
expectedOutput: context.evalCase.expected_output,
|
|
13835
|
-
outputText: context.candidate,
|
|
13836
14132
|
output: context.output ?? null,
|
|
13837
14133
|
inputFiles: context.evalCase.file_paths,
|
|
13838
14134
|
input: context.evalCase.input,
|
|
13839
14135
|
trace: context.trace ?? null,
|
|
13840
14136
|
fileChanges: context.fileChanges ?? null,
|
|
13841
14137
|
workspacePath: context.workspacePath ?? null,
|
|
13842
|
-
config: config ?? context.config ?? null
|
|
13843
|
-
inputText: context.evalCase.question,
|
|
13844
|
-
expectedOutputText: context.evalCase.reference_answer ?? ""
|
|
14138
|
+
config: config ?? context.config ?? null
|
|
13845
14139
|
};
|
|
13846
14140
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
13847
14141
|
const scriptPath = script[script.length - 1];
|
|
@@ -15521,7 +15815,8 @@ async function runEvaluation(options) {
|
|
|
15521
15815
|
const budgetResult = {
|
|
15522
15816
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
15523
15817
|
testId: evalCase.id,
|
|
15524
|
-
|
|
15818
|
+
dataset: evalCase.dataset,
|
|
15819
|
+
category: evalCase.category,
|
|
15525
15820
|
score: 0,
|
|
15526
15821
|
assertions: [],
|
|
15527
15822
|
output: [],
|
|
@@ -15557,7 +15852,8 @@ async function runEvaluation(options) {
|
|
|
15557
15852
|
const haltResult = {
|
|
15558
15853
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
15559
15854
|
testId: evalCase.id,
|
|
15560
|
-
|
|
15855
|
+
dataset: evalCase.dataset,
|
|
15856
|
+
category: evalCase.category,
|
|
15561
15857
|
score: 0,
|
|
15562
15858
|
assertions: [],
|
|
15563
15859
|
output: [],
|
|
@@ -16556,7 +16852,8 @@ async function evaluateCandidate(options) {
|
|
|
16556
16852
|
return {
|
|
16557
16853
|
timestamp: completedAt.toISOString(),
|
|
16558
16854
|
testId: evalCase.id,
|
|
16559
|
-
|
|
16855
|
+
dataset: evalCase.dataset,
|
|
16856
|
+
category: evalCase.category,
|
|
16560
16857
|
conversationId: evalCase.conversation_id,
|
|
16561
16858
|
score: score.score,
|
|
16562
16859
|
assertions: score.assertions,
|
|
@@ -16906,7 +17203,8 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
16906
17203
|
return {
|
|
16907
17204
|
timestamp: timestamp.toISOString(),
|
|
16908
17205
|
testId: evalCase.id,
|
|
16909
|
-
|
|
17206
|
+
dataset: evalCase.dataset,
|
|
17207
|
+
category: evalCase.category,
|
|
16910
17208
|
conversationId: evalCase.conversation_id,
|
|
16911
17209
|
score: 0,
|
|
16912
17210
|
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
@@ -17479,6 +17777,18 @@ function trimBaselineResult(result) {
|
|
|
17479
17777
|
return trimmed;
|
|
17480
17778
|
}
|
|
17481
17779
|
|
|
17780
|
+
// src/evaluation/category.ts
|
|
17781
|
+
import path47 from "node:path";
|
|
17782
|
+
var DEFAULT_CATEGORY = "Uncategorized";
|
|
17783
|
+
function deriveCategory(relativePath) {
|
|
17784
|
+
const parts = relativePath.split(path47.sep);
|
|
17785
|
+
if (parts.length <= 1) {
|
|
17786
|
+
return DEFAULT_CATEGORY;
|
|
17787
|
+
}
|
|
17788
|
+
const dirs = parts.slice(0, -1).filter((d) => d !== "evals");
|
|
17789
|
+
return dirs.length > 0 ? dirs.join("/") : DEFAULT_CATEGORY;
|
|
17790
|
+
}
|
|
17791
|
+
|
|
17482
17792
|
// src/observability/otel-exporter.ts
|
|
17483
17793
|
var OTEL_BACKEND_PRESETS = {
|
|
17484
17794
|
langfuse: {
|
|
@@ -17602,7 +17912,7 @@ var OtelTraceExporter = class {
|
|
|
17602
17912
|
rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
17603
17913
|
rootSpan.setAttribute("agentv.test_id", result.testId);
|
|
17604
17914
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
17605
|
-
if (result.
|
|
17915
|
+
if (result.dataset) rootSpan.setAttribute("agentv.dataset", result.dataset);
|
|
17606
17916
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
17607
17917
|
if (captureContent && result.output.length > 0) {
|
|
17608
17918
|
const lastMsg = result.output[result.output.length - 1];
|
|
@@ -17811,7 +18121,7 @@ var OtelStreamingObserver = class {
|
|
|
17811
18121
|
this.rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
17812
18122
|
this.rootSpan.setAttribute("agentv.test_id", testId);
|
|
17813
18123
|
this.rootSpan.setAttribute("agentv.target", target);
|
|
17814
|
-
if (evalSet) this.rootSpan.setAttribute("agentv.
|
|
18124
|
+
if (evalSet) this.rootSpan.setAttribute("agentv.dataset", evalSet);
|
|
17815
18125
|
this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
|
|
17816
18126
|
}
|
|
17817
18127
|
/** Create and immediately export a tool span */
|
|
@@ -17983,9 +18293,11 @@ function createAgentKernel() {
|
|
|
17983
18293
|
return { status: "stub" };
|
|
17984
18294
|
}
|
|
17985
18295
|
export {
|
|
18296
|
+
COMMON_TARGET_SETTINGS,
|
|
17986
18297
|
CodeEvaluator,
|
|
17987
18298
|
CompositeEvaluator,
|
|
17988
18299
|
CostEvaluator,
|
|
18300
|
+
DEFAULT_CATEGORY,
|
|
17989
18301
|
DEFAULT_EVALUATOR_TEMPLATE,
|
|
17990
18302
|
DEFAULT_EVAL_PATTERNS,
|
|
17991
18303
|
DEFAULT_EXPLORATION_TOOLS,
|
|
@@ -18039,6 +18351,7 @@ export {
|
|
|
18039
18351
|
createTempWorkspace,
|
|
18040
18352
|
deepEqual,
|
|
18041
18353
|
defineConfig,
|
|
18354
|
+
deriveCategory,
|
|
18042
18355
|
detectFormat,
|
|
18043
18356
|
discoverAssertions,
|
|
18044
18357
|
discoverCopilotSessions,
|
|
@@ -18052,7 +18365,9 @@ export {
|
|
|
18052
18365
|
explorationRatio,
|
|
18053
18366
|
extractCacheConfig,
|
|
18054
18367
|
extractFailOnError,
|
|
18368
|
+
extractImageBlocks,
|
|
18055
18369
|
extractJsonBlob,
|
|
18370
|
+
extractLastAssistantContent,
|
|
18056
18371
|
extractTargetFromSuite,
|
|
18057
18372
|
extractTargetsFromSuite,
|
|
18058
18373
|
extractTargetsFromTestCase,
|
|
@@ -18066,12 +18381,15 @@ export {
|
|
|
18066
18381
|
getAgentvHome,
|
|
18067
18382
|
getOutputFilenames,
|
|
18068
18383
|
getSubagentsRoot,
|
|
18384
|
+
getTextContent,
|
|
18069
18385
|
getTraceStateRoot,
|
|
18070
18386
|
getWorkspacePath,
|
|
18071
18387
|
getWorkspacePoolRoot,
|
|
18072
18388
|
getWorkspacesRoot,
|
|
18073
18389
|
initializeBaseline,
|
|
18074
18390
|
isAgentSkillsFormat,
|
|
18391
|
+
isContent,
|
|
18392
|
+
isContentArray,
|
|
18075
18393
|
isEvaluatorKind,
|
|
18076
18394
|
isJsonObject,
|
|
18077
18395
|
isJsonValue,
|