@agentv/core 3.14.5 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-HP5PFOVK.js → chunk-PXYYRDHH.js} +142 -148
- package/dist/chunk-PXYYRDHH.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +9 -2
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +3 -2
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +567 -256
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +135 -93
- package/dist/index.d.ts +135 -93
- package/dist/index.js +457 -140
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-HP5PFOVK.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import {
|
|
2
|
+
COMMON_TARGET_SETTINGS,
|
|
2
3
|
TEST_MESSAGE_ROLES,
|
|
3
4
|
buildDirectoryChain,
|
|
4
5
|
buildSearchRoots,
|
|
@@ -6,8 +7,11 @@ import {
|
|
|
6
7
|
extractLastAssistantContent,
|
|
7
8
|
fileExists,
|
|
8
9
|
findGitRoot,
|
|
10
|
+
getTextContent,
|
|
9
11
|
interpolateEnv,
|
|
10
12
|
isAgentProvider,
|
|
13
|
+
isContent,
|
|
14
|
+
isContentArray,
|
|
11
15
|
isEvaluatorKind,
|
|
12
16
|
isJsonObject,
|
|
13
17
|
isJsonValue,
|
|
@@ -19,7 +23,7 @@ import {
|
|
|
19
23
|
readTextFile,
|
|
20
24
|
resolveFileReference,
|
|
21
25
|
resolveTargetDefinition
|
|
22
|
-
} from "./chunk-
|
|
26
|
+
} from "./chunk-PXYYRDHH.js";
|
|
23
27
|
import {
|
|
24
28
|
AgentvProvider
|
|
25
29
|
} from "./chunk-W5YDZWT4.js";
|
|
@@ -690,15 +694,23 @@ var TEMPLATE_VARIABLES = {
|
|
|
690
694
|
INPUT: "input",
|
|
691
695
|
OUTPUT: "output",
|
|
692
696
|
FILE_CHANGES: "file_changes",
|
|
697
|
+
/** @deprecated Use INPUT instead — resolves to the same text value. */
|
|
693
698
|
INPUT_TEXT: "input_text",
|
|
699
|
+
/** @deprecated Use OUTPUT instead — resolves to the same text value. */
|
|
694
700
|
OUTPUT_TEXT: "output_text",
|
|
701
|
+
/** @deprecated Use EXPECTED_OUTPUT instead — resolves to the same text value. */
|
|
695
702
|
EXPECTED_OUTPUT_TEXT: "expected_output_text"
|
|
696
703
|
};
|
|
697
704
|
var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
|
|
698
705
|
var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
|
|
699
|
-
TEMPLATE_VARIABLES.
|
|
706
|
+
TEMPLATE_VARIABLES.OUTPUT,
|
|
700
707
|
TEMPLATE_VARIABLES.EXPECTED_OUTPUT
|
|
701
708
|
]);
|
|
709
|
+
var DEPRECATED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Map([
|
|
710
|
+
[TEMPLATE_VARIABLES.INPUT_TEXT, TEMPLATE_VARIABLES.INPUT],
|
|
711
|
+
[TEMPLATE_VARIABLES.OUTPUT_TEXT, TEMPLATE_VARIABLES.OUTPUT],
|
|
712
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT, TEMPLATE_VARIABLES.EXPECTED_OUTPUT]
|
|
713
|
+
]);
|
|
702
714
|
|
|
703
715
|
// src/evaluation/validation/prompt-validator.ts
|
|
704
716
|
var ANSI_YELLOW2 = "\x1B[33m";
|
|
@@ -720,16 +732,29 @@ function validateTemplateVariables(content, source) {
|
|
|
720
732
|
}
|
|
721
733
|
match = variablePattern.exec(content);
|
|
722
734
|
}
|
|
723
|
-
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.OUTPUT_TEXT);
|
|
735
|
+
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.OUTPUT) || foundVariables.has(TEMPLATE_VARIABLES.OUTPUT_TEXT);
|
|
724
736
|
const hasExpectedOutput = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_OUTPUT);
|
|
725
737
|
const hasRequiredFields = hasCandidateAnswer || hasExpectedOutput;
|
|
726
738
|
if (!hasRequiredFields) {
|
|
727
739
|
throw new Error(
|
|
728
740
|
`Missing required fields. Must include at least one of:
|
|
729
|
-
- {{ ${TEMPLATE_VARIABLES.
|
|
741
|
+
- {{ ${TEMPLATE_VARIABLES.OUTPUT} }}
|
|
730
742
|
- {{ ${TEMPLATE_VARIABLES.EXPECTED_OUTPUT} }}`
|
|
731
743
|
);
|
|
732
744
|
}
|
|
745
|
+
const deprecatedUsed = [];
|
|
746
|
+
for (const [deprecated, replacement] of DEPRECATED_TEMPLATE_VARIABLES) {
|
|
747
|
+
if (foundVariables.has(deprecated)) {
|
|
748
|
+
deprecatedUsed.push(`{{ ${deprecated} }} \u2192 {{ ${replacement} }}`);
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
if (deprecatedUsed.length > 0) {
|
|
752
|
+
console.warn(
|
|
753
|
+
`${ANSI_YELLOW2}Warning: Template at ${source} uses deprecated variable names:
|
|
754
|
+
${deprecatedUsed.join("\n ")}
|
|
755
|
+
These still work but will be removed in a future version.${ANSI_RESET3}`
|
|
756
|
+
);
|
|
757
|
+
}
|
|
733
758
|
if (invalidVariables.length > 0) {
|
|
734
759
|
const warningMessage = `${ANSI_YELLOW2}Warning: Custom evaluator template at ${source}
|
|
735
760
|
Contains invalid variables: ${invalidVariables.map((v) => `{{ ${v} }}`).join(", ")}
|
|
@@ -2147,6 +2172,19 @@ function asString2(value) {
|
|
|
2147
2172
|
}
|
|
2148
2173
|
|
|
2149
2174
|
// src/evaluation/loaders/message-processor.ts
|
|
2175
|
+
var IMAGE_MEDIA_TYPES = {
|
|
2176
|
+
".png": "image/png",
|
|
2177
|
+
".jpg": "image/jpeg",
|
|
2178
|
+
".jpeg": "image/jpeg",
|
|
2179
|
+
".gif": "image/gif",
|
|
2180
|
+
".webp": "image/webp",
|
|
2181
|
+
".svg": "image/svg+xml",
|
|
2182
|
+
".bmp": "image/bmp"
|
|
2183
|
+
};
|
|
2184
|
+
function detectImageMediaType(filePath) {
|
|
2185
|
+
const ext = path5.extname(filePath).toLowerCase();
|
|
2186
|
+
return IMAGE_MEDIA_TYPES[ext];
|
|
2187
|
+
}
|
|
2150
2188
|
var ANSI_YELLOW4 = "\x1B[33m";
|
|
2151
2189
|
var ANSI_RESET5 = "\x1B[0m";
|
|
2152
2190
|
async function processMessages(options) {
|
|
@@ -2212,6 +2250,47 @@ async function processMessages(options) {
|
|
|
2212
2250
|
}
|
|
2213
2251
|
continue;
|
|
2214
2252
|
}
|
|
2253
|
+
if (segmentType === "image") {
|
|
2254
|
+
const rawValue = asString3(rawSegment.value);
|
|
2255
|
+
if (!rawValue) {
|
|
2256
|
+
continue;
|
|
2257
|
+
}
|
|
2258
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
|
|
2259
|
+
rawValue,
|
|
2260
|
+
searchRoots
|
|
2261
|
+
);
|
|
2262
|
+
if (!resolvedPath) {
|
|
2263
|
+
const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
|
|
2264
|
+
const context = messageType === "input" ? "" : " in expected_output";
|
|
2265
|
+
logWarning3(`Image file not found${context}: ${displayPath}`, attempts);
|
|
2266
|
+
continue;
|
|
2267
|
+
}
|
|
2268
|
+
const mediaType = detectImageMediaType(resolvedPath);
|
|
2269
|
+
if (!mediaType) {
|
|
2270
|
+
logWarning3(
|
|
2271
|
+
`Unsupported image extension for ${displayPath}. Supported: ${Object.keys(IMAGE_MEDIA_TYPES).join(", ")}`
|
|
2272
|
+
);
|
|
2273
|
+
continue;
|
|
2274
|
+
}
|
|
2275
|
+
try {
|
|
2276
|
+
const imageBuffer = await readFile4(resolvedPath);
|
|
2277
|
+
const base64 = imageBuffer.toString("base64");
|
|
2278
|
+
processedContent.push({
|
|
2279
|
+
type: "image",
|
|
2280
|
+
media_type: mediaType,
|
|
2281
|
+
source: `data:${mediaType};base64,${base64}`
|
|
2282
|
+
});
|
|
2283
|
+
if (verbose) {
|
|
2284
|
+
const label = messageType === "input" ? "[Image]" : "[Expected Output Image]";
|
|
2285
|
+
console.log(` ${label} Found: ${displayPath}`);
|
|
2286
|
+
console.log(` Resolved to: ${resolvedPath} (${mediaType})`);
|
|
2287
|
+
}
|
|
2288
|
+
} catch (error) {
|
|
2289
|
+
const context = messageType === "input" ? "" : " expected output";
|
|
2290
|
+
logWarning3(`Could not read${context} image ${resolvedPath}: ${error.message}`);
|
|
2291
|
+
}
|
|
2292
|
+
continue;
|
|
2293
|
+
}
|
|
2215
2294
|
const clonedSegment = cloneJsonObject(rawSegment);
|
|
2216
2295
|
processedContent.push(clonedSegment);
|
|
2217
2296
|
const inlineValue = clonedSegment.value;
|
|
@@ -2289,6 +2368,46 @@ async function processExpectedMessages(options) {
|
|
|
2289
2368
|
}
|
|
2290
2369
|
continue;
|
|
2291
2370
|
}
|
|
2371
|
+
if (segmentType === "image") {
|
|
2372
|
+
const rawValue = asString3(rawSegment.value);
|
|
2373
|
+
if (!rawValue) {
|
|
2374
|
+
continue;
|
|
2375
|
+
}
|
|
2376
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
|
|
2377
|
+
rawValue,
|
|
2378
|
+
searchRoots
|
|
2379
|
+
);
|
|
2380
|
+
if (!resolvedPath) {
|
|
2381
|
+
const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
|
|
2382
|
+
logWarning3(`Image file not found in expected_output: ${displayPath}`, attempts);
|
|
2383
|
+
continue;
|
|
2384
|
+
}
|
|
2385
|
+
const mediaType = detectImageMediaType(resolvedPath);
|
|
2386
|
+
if (!mediaType) {
|
|
2387
|
+
logWarning3(
|
|
2388
|
+
`Unsupported image extension for ${displayPath}. Supported: ${Object.keys(IMAGE_MEDIA_TYPES).join(", ")}`
|
|
2389
|
+
);
|
|
2390
|
+
continue;
|
|
2391
|
+
}
|
|
2392
|
+
try {
|
|
2393
|
+
const imageBuffer = await readFile4(resolvedPath);
|
|
2394
|
+
const base64 = imageBuffer.toString("base64");
|
|
2395
|
+
processedContent.push({
|
|
2396
|
+
type: "image",
|
|
2397
|
+
media_type: mediaType,
|
|
2398
|
+
source: `data:${mediaType};base64,${base64}`
|
|
2399
|
+
});
|
|
2400
|
+
if (verbose) {
|
|
2401
|
+
console.log(` [Expected Output Image] Found: ${displayPath}`);
|
|
2402
|
+
console.log(` Resolved to: ${resolvedPath} (${mediaType})`);
|
|
2403
|
+
}
|
|
2404
|
+
} catch (error) {
|
|
2405
|
+
logWarning3(
|
|
2406
|
+
`Could not read expected output image ${resolvedPath}: ${error.message}`
|
|
2407
|
+
);
|
|
2408
|
+
}
|
|
2409
|
+
continue;
|
|
2410
|
+
}
|
|
2292
2411
|
processedContent.push(cloneJsonObject(rawSegment));
|
|
2293
2412
|
}
|
|
2294
2413
|
segment.content = processedContent;
|
|
@@ -2535,7 +2654,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2535
2654
|
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
2536
2655
|
const testCase = {
|
|
2537
2656
|
id,
|
|
2538
|
-
|
|
2657
|
+
dataset: evalSetName,
|
|
2539
2658
|
conversation_id: conversationId,
|
|
2540
2659
|
question,
|
|
2541
2660
|
input: inputMessages,
|
|
@@ -2806,7 +2925,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2806
2925
|
}
|
|
2807
2926
|
const suite = interpolated;
|
|
2808
2927
|
const evalSetNameFromSuite = asString5(suite.name)?.trim();
|
|
2809
|
-
const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
2928
|
+
const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
|
|
2810
2929
|
const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
|
|
2811
2930
|
const rawTestcases = resolveTests(suite);
|
|
2812
2931
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
@@ -2927,7 +3046,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2927
3046
|
const caseTargets = extractTargetsFromTestCase(evalcase);
|
|
2928
3047
|
const testCase = {
|
|
2929
3048
|
id,
|
|
2930
|
-
|
|
3049
|
+
dataset: evalSetName,
|
|
3050
|
+
category: options?.category,
|
|
2931
3051
|
conversation_id: conversationId,
|
|
2932
3052
|
question,
|
|
2933
3053
|
input: inputMessages,
|
|
@@ -3851,6 +3971,48 @@ import { createWriteStream } from "node:fs";
|
|
|
3851
3971
|
import { mkdir } from "node:fs/promises";
|
|
3852
3972
|
import path10 from "node:path";
|
|
3853
3973
|
|
|
3974
|
+
// src/evaluation/providers/claude-content.ts
|
|
3975
|
+
function toContentArray(content) {
|
|
3976
|
+
if (!Array.isArray(content)) return void 0;
|
|
3977
|
+
let hasNonText = false;
|
|
3978
|
+
const blocks = [];
|
|
3979
|
+
for (const part of content) {
|
|
3980
|
+
if (!part || typeof part !== "object") continue;
|
|
3981
|
+
const p = part;
|
|
3982
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
3983
|
+
blocks.push({ type: "text", text: p.text });
|
|
3984
|
+
} else if (p.type === "image" && typeof p.source === "object" && p.source !== null) {
|
|
3985
|
+
const src = p.source;
|
|
3986
|
+
const mediaType = typeof p.media_type === "string" ? p.media_type : typeof src.media_type === "string" ? src.media_type : "application/octet-stream";
|
|
3987
|
+
const data = typeof src.data === "string" ? `data:${mediaType};base64,${src.data}` : typeof p.url === "string" ? p.url : "";
|
|
3988
|
+
blocks.push({ type: "image", media_type: mediaType, source: data });
|
|
3989
|
+
hasNonText = true;
|
|
3990
|
+
} else if (p.type === "tool_use") {
|
|
3991
|
+
} else if (p.type === "tool_result") {
|
|
3992
|
+
}
|
|
3993
|
+
}
|
|
3994
|
+
return hasNonText && blocks.length > 0 ? blocks : void 0;
|
|
3995
|
+
}
|
|
3996
|
+
function extractTextContent(content) {
|
|
3997
|
+
if (typeof content === "string") {
|
|
3998
|
+
return content;
|
|
3999
|
+
}
|
|
4000
|
+
if (!Array.isArray(content)) {
|
|
4001
|
+
return void 0;
|
|
4002
|
+
}
|
|
4003
|
+
const textParts = [];
|
|
4004
|
+
for (const part of content) {
|
|
4005
|
+
if (!part || typeof part !== "object") {
|
|
4006
|
+
continue;
|
|
4007
|
+
}
|
|
4008
|
+
const p = part;
|
|
4009
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
4010
|
+
textParts.push(p.text);
|
|
4011
|
+
}
|
|
4012
|
+
}
|
|
4013
|
+
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
4014
|
+
}
|
|
4015
|
+
|
|
3854
4016
|
// src/evaluation/providers/claude-log-tracker.ts
|
|
3855
4017
|
var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeLogs");
|
|
3856
4018
|
var GLOBAL_SUBSCRIBERS_KEY = Symbol.for("agentv.claudeLogSubscribers");
|
|
@@ -4016,11 +4178,12 @@ var ClaudeCliProvider = class {
|
|
|
4016
4178
|
if (betaMessage && typeof betaMessage === "object") {
|
|
4017
4179
|
const msg = betaMessage;
|
|
4018
4180
|
const content = msg.content;
|
|
4181
|
+
const structuredContent = toContentArray(content);
|
|
4019
4182
|
const textContent = extractTextContent(content);
|
|
4020
4183
|
const toolCalls = extractToolCalls(content);
|
|
4021
4184
|
const outputMsg = {
|
|
4022
4185
|
role: "assistant",
|
|
4023
|
-
content: textContent,
|
|
4186
|
+
content: structuredContent ?? textContent,
|
|
4024
4187
|
toolCalls: toolCalls.length > 0 ? toolCalls : void 0
|
|
4025
4188
|
};
|
|
4026
4189
|
output.push(outputMsg);
|
|
@@ -4359,25 +4522,6 @@ function summarizeEvent(event) {
|
|
|
4359
4522
|
return void 0;
|
|
4360
4523
|
}
|
|
4361
4524
|
}
|
|
4362
|
-
function extractTextContent(content) {
|
|
4363
|
-
if (typeof content === "string") {
|
|
4364
|
-
return content;
|
|
4365
|
-
}
|
|
4366
|
-
if (!Array.isArray(content)) {
|
|
4367
|
-
return void 0;
|
|
4368
|
-
}
|
|
4369
|
-
const textParts = [];
|
|
4370
|
-
for (const part of content) {
|
|
4371
|
-
if (!part || typeof part !== "object") {
|
|
4372
|
-
continue;
|
|
4373
|
-
}
|
|
4374
|
-
const p = part;
|
|
4375
|
-
if (p.type === "text" && typeof p.text === "string") {
|
|
4376
|
-
textParts.push(p.text);
|
|
4377
|
-
}
|
|
4378
|
-
}
|
|
4379
|
-
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
4380
|
-
}
|
|
4381
4525
|
function extractToolCalls(content) {
|
|
4382
4526
|
if (!Array.isArray(content)) {
|
|
4383
4527
|
return [];
|
|
@@ -4550,11 +4694,12 @@ var ClaudeSdkProvider = class {
|
|
|
4550
4694
|
if (betaMessage && typeof betaMessage === "object") {
|
|
4551
4695
|
const msg = betaMessage;
|
|
4552
4696
|
const content = msg.content;
|
|
4553
|
-
const
|
|
4697
|
+
const structuredContent = toContentArray(content);
|
|
4698
|
+
const textContent = extractTextContent(content);
|
|
4554
4699
|
const toolCalls = extractToolCalls2(content);
|
|
4555
4700
|
const outputMsg = {
|
|
4556
4701
|
role: "assistant",
|
|
4557
|
-
content: textContent,
|
|
4702
|
+
content: structuredContent ?? textContent,
|
|
4558
4703
|
toolCalls: toolCalls.length > 0 ? toolCalls : void 0
|
|
4559
4704
|
};
|
|
4560
4705
|
output.push(outputMsg);
|
|
@@ -4672,25 +4817,6 @@ var ClaudeSdkProvider = class {
|
|
|
4672
4817
|
}
|
|
4673
4818
|
}
|
|
4674
4819
|
};
|
|
4675
|
-
function extractTextContent2(content) {
|
|
4676
|
-
if (typeof content === "string") {
|
|
4677
|
-
return content;
|
|
4678
|
-
}
|
|
4679
|
-
if (!Array.isArray(content)) {
|
|
4680
|
-
return void 0;
|
|
4681
|
-
}
|
|
4682
|
-
const textParts = [];
|
|
4683
|
-
for (const part of content) {
|
|
4684
|
-
if (!part || typeof part !== "object") {
|
|
4685
|
-
continue;
|
|
4686
|
-
}
|
|
4687
|
-
const p = part;
|
|
4688
|
-
if (p.type === "text" && typeof p.text === "string") {
|
|
4689
|
-
textParts.push(p.text);
|
|
4690
|
-
}
|
|
4691
|
-
}
|
|
4692
|
-
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
4693
|
-
}
|
|
4694
4820
|
function extractToolCalls2(content) {
|
|
4695
4821
|
if (!Array.isArray(content)) {
|
|
4696
4822
|
return [];
|
|
@@ -4914,7 +5040,7 @@ function convertMessages(messages) {
|
|
|
4914
5040
|
return messages.map((msg) => ({
|
|
4915
5041
|
role: msg.role,
|
|
4916
5042
|
name: msg.name,
|
|
4917
|
-
content: msg.content,
|
|
5043
|
+
content: isContentArray(msg.content) ? msg.content : typeof msg.content === "string" ? msg.content : void 0,
|
|
4918
5044
|
toolCalls: msg.tool_calls?.map((tc) => ({
|
|
4919
5045
|
tool: tc.tool,
|
|
4920
5046
|
input: tc.input,
|
|
@@ -7167,6 +7293,35 @@ function extractPiTextContent(content) {
|
|
|
7167
7293
|
}
|
|
7168
7294
|
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
7169
7295
|
}
|
|
7296
|
+
function toPiContentArray(content) {
|
|
7297
|
+
if (!Array.isArray(content)) return void 0;
|
|
7298
|
+
let hasNonText = false;
|
|
7299
|
+
const blocks = [];
|
|
7300
|
+
for (const part of content) {
|
|
7301
|
+
if (!part || typeof part !== "object") continue;
|
|
7302
|
+
const p = part;
|
|
7303
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
7304
|
+
blocks.push({ type: "text", text: p.text });
|
|
7305
|
+
} else if (p.type === "image") {
|
|
7306
|
+
const mediaType = typeof p.media_type === "string" ? p.media_type : "application/octet-stream";
|
|
7307
|
+
let source = "";
|
|
7308
|
+
if (typeof p.source === "object" && p.source !== null) {
|
|
7309
|
+
const src = p.source;
|
|
7310
|
+
const srcMediaType = typeof src.media_type === "string" ? src.media_type : mediaType;
|
|
7311
|
+
source = typeof src.data === "string" ? `data:${srcMediaType};base64,${src.data}` : "";
|
|
7312
|
+
}
|
|
7313
|
+
if (!source && typeof p.url === "string") {
|
|
7314
|
+
source = p.url;
|
|
7315
|
+
}
|
|
7316
|
+
if (source) {
|
|
7317
|
+
blocks.push({ type: "image", media_type: mediaType, source });
|
|
7318
|
+
hasNonText = true;
|
|
7319
|
+
}
|
|
7320
|
+
} else if (p.type === "tool_use" || p.type === "tool_result") {
|
|
7321
|
+
}
|
|
7322
|
+
}
|
|
7323
|
+
return hasNonText && blocks.length > 0 ? blocks : void 0;
|
|
7324
|
+
}
|
|
7170
7325
|
function toFiniteNumber(value) {
|
|
7171
7326
|
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
7172
7327
|
return void 0;
|
|
@@ -8337,7 +8492,8 @@ function convertAgentMessage(message, toolTrackers, completedToolResults) {
|
|
|
8337
8492
|
}
|
|
8338
8493
|
const msg = message;
|
|
8339
8494
|
const role = typeof msg.role === "string" ? msg.role : "unknown";
|
|
8340
|
-
const
|
|
8495
|
+
const structuredContent = toPiContentArray(msg.content);
|
|
8496
|
+
const content = structuredContent ?? extractPiTextContent(msg.content);
|
|
8341
8497
|
const toolCalls = extractToolCalls4(msg.content, toolTrackers, completedToolResults);
|
|
8342
8498
|
const startTimeVal = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
8343
8499
|
let msgTokenUsage;
|
|
@@ -10187,13 +10343,13 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
10187
10343
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
10188
10344
|
const { mkdir: mkdir16, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
10189
10345
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
10190
|
-
const
|
|
10346
|
+
const path48 = await import("node:path");
|
|
10191
10347
|
const { randomUUID: randomUUID10 } = await import("node:crypto");
|
|
10192
|
-
const dir =
|
|
10348
|
+
const dir = path48.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
10193
10349
|
await mkdir16(dir, { recursive: true });
|
|
10194
|
-
const stdinPath =
|
|
10195
|
-
const stdoutPath =
|
|
10196
|
-
const stderrPath =
|
|
10350
|
+
const stdinPath = path48.join(dir, "stdin.txt");
|
|
10351
|
+
const stdoutPath = path48.join(dir, "stdout.txt");
|
|
10352
|
+
const stderrPath = path48.join(dir, "stderr.txt");
|
|
10197
10353
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
10198
10354
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
10199
10355
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -10509,6 +10665,56 @@ function toCamelCaseDeep(obj) {
|
|
|
10509
10665
|
|
|
10510
10666
|
// src/evaluation/evaluators/code-evaluator.ts
|
|
10511
10667
|
var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
|
|
10668
|
+
var DATA_URI_RE = /^data:([^;]+);base64,(.+)$/s;
|
|
10669
|
+
async function materializeContentForGrader(messages, getWorkDir) {
|
|
10670
|
+
if (!messages || messages.length === 0) return messages ?? null;
|
|
10671
|
+
let hasAnyImage = false;
|
|
10672
|
+
for (const msg of messages) {
|
|
10673
|
+
if (isContentArray(msg.content)) {
|
|
10674
|
+
for (const block of msg.content) {
|
|
10675
|
+
if (block.type === "image") {
|
|
10676
|
+
hasAnyImage = true;
|
|
10677
|
+
break;
|
|
10678
|
+
}
|
|
10679
|
+
}
|
|
10680
|
+
}
|
|
10681
|
+
if (hasAnyImage) break;
|
|
10682
|
+
}
|
|
10683
|
+
if (!hasAnyImage) return messages;
|
|
10684
|
+
let counter = 0;
|
|
10685
|
+
const result = [];
|
|
10686
|
+
for (const msg of messages) {
|
|
10687
|
+
if (!isContentArray(msg.content)) {
|
|
10688
|
+
result.push(msg);
|
|
10689
|
+
continue;
|
|
10690
|
+
}
|
|
10691
|
+
if (!msg.content.some((b) => b.type === "image")) {
|
|
10692
|
+
result.push(msg);
|
|
10693
|
+
continue;
|
|
10694
|
+
}
|
|
10695
|
+
const blocks = [];
|
|
10696
|
+
for (const block of msg.content) {
|
|
10697
|
+
if (block.type !== "image") {
|
|
10698
|
+
blocks.push({ ...block });
|
|
10699
|
+
continue;
|
|
10700
|
+
}
|
|
10701
|
+
const img = block;
|
|
10702
|
+
const match = DATA_URI_RE.exec(img.source);
|
|
10703
|
+
if (match) {
|
|
10704
|
+
const [, mediaType, base64Data] = match;
|
|
10705
|
+
const ext = mediaType.split("/")[1] === "jpeg" ? "jpg" : mediaType.split("/")[1] ?? "bin";
|
|
10706
|
+
const dir = await getWorkDir();
|
|
10707
|
+
const filePath = join(dir, `img-${counter++}.${ext}`);
|
|
10708
|
+
await writeFile6(filePath, Buffer.from(base64Data, "base64"));
|
|
10709
|
+
blocks.push({ type: "image", media_type: img.media_type, path: filePath });
|
|
10710
|
+
} else {
|
|
10711
|
+
blocks.push({ type: "image", media_type: img.media_type, path: img.source });
|
|
10712
|
+
}
|
|
10713
|
+
}
|
|
10714
|
+
result.push({ ...msg, content: blocks });
|
|
10715
|
+
}
|
|
10716
|
+
return result;
|
|
10717
|
+
}
|
|
10512
10718
|
var CodeEvaluator = class {
|
|
10513
10719
|
kind = "code-grader";
|
|
10514
10720
|
command;
|
|
@@ -10524,7 +10730,18 @@ var CodeEvaluator = class {
|
|
|
10524
10730
|
this.target = options.target;
|
|
10525
10731
|
}
|
|
10526
10732
|
async evaluate(context) {
|
|
10527
|
-
let
|
|
10733
|
+
let imageTmpDir;
|
|
10734
|
+
const getImageDir = async () => {
|
|
10735
|
+
if (!imageTmpDir) {
|
|
10736
|
+
imageTmpDir = await mkdtemp2(join(tmpdir2(), "agentv-img-"));
|
|
10737
|
+
}
|
|
10738
|
+
return imageTmpDir;
|
|
10739
|
+
};
|
|
10740
|
+
const materializedOutput = await materializeContentForGrader(
|
|
10741
|
+
context.output,
|
|
10742
|
+
getImageDir
|
|
10743
|
+
);
|
|
10744
|
+
let outputForPayload = materializedOutput;
|
|
10528
10745
|
let outputPath;
|
|
10529
10746
|
if (outputForPayload) {
|
|
10530
10747
|
const serialized = JSON.stringify(outputForPayload);
|
|
@@ -10537,12 +10754,17 @@ var CodeEvaluator = class {
|
|
|
10537
10754
|
}
|
|
10538
10755
|
const payload = {
|
|
10539
10756
|
criteria: context.evalCase.criteria,
|
|
10540
|
-
expectedOutput:
|
|
10541
|
-
|
|
10757
|
+
expectedOutput: await materializeContentForGrader(
|
|
10758
|
+
context.evalCase.expected_output,
|
|
10759
|
+
getImageDir
|
|
10760
|
+
),
|
|
10542
10761
|
output: outputForPayload,
|
|
10543
10762
|
outputPath,
|
|
10544
10763
|
inputFiles: context.evalCase.file_paths,
|
|
10545
|
-
input:
|
|
10764
|
+
input: await materializeContentForGrader(
|
|
10765
|
+
context.evalCase.input,
|
|
10766
|
+
getImageDir
|
|
10767
|
+
),
|
|
10546
10768
|
trace: context.trace ?? null,
|
|
10547
10769
|
tokenUsage: context.tokenUsage ?? null,
|
|
10548
10770
|
costUsd: context.costUsd ?? null,
|
|
@@ -10551,9 +10773,7 @@ var CodeEvaluator = class {
|
|
|
10551
10773
|
endTime: context.endTime ?? null,
|
|
10552
10774
|
fileChanges: context.fileChanges ?? null,
|
|
10553
10775
|
workspacePath: context.workspacePath ?? null,
|
|
10554
|
-
config: this.config ?? null
|
|
10555
|
-
inputText: context.evalCase.question,
|
|
10556
|
-
expectedOutputText: context.evalCase.reference_answer ?? ""
|
|
10776
|
+
config: this.config ?? null
|
|
10557
10777
|
};
|
|
10558
10778
|
const inputPayload = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
10559
10779
|
let proxyEnv;
|
|
@@ -10643,6 +10863,10 @@ var CodeEvaluator = class {
|
|
|
10643
10863
|
await rm3(dirname(outputPath), { recursive: true, force: true }).catch(() => {
|
|
10644
10864
|
});
|
|
10645
10865
|
}
|
|
10866
|
+
if (imageTmpDir) {
|
|
10867
|
+
await rm3(imageTmpDir, { recursive: true, force: true }).catch(() => {
|
|
10868
|
+
});
|
|
10869
|
+
}
|
|
10646
10870
|
}
|
|
10647
10871
|
}
|
|
10648
10872
|
};
|
|
@@ -10720,13 +10944,13 @@ Be concise and focused in your evaluation. Provide succinct, specific feedback r
|
|
|
10720
10944
|
{{${TEMPLATE_VARIABLES.CRITERIA}}}
|
|
10721
10945
|
|
|
10722
10946
|
[[ ## question ## ]]
|
|
10723
|
-
{{${TEMPLATE_VARIABLES.
|
|
10947
|
+
{{${TEMPLATE_VARIABLES.INPUT}}}
|
|
10724
10948
|
|
|
10725
10949
|
[[ ## reference_answer ## ]]
|
|
10726
|
-
{{${TEMPLATE_VARIABLES.
|
|
10950
|
+
{{${TEMPLATE_VARIABLES.EXPECTED_OUTPUT}}}
|
|
10727
10951
|
|
|
10728
10952
|
[[ ## answer ## ]]
|
|
10729
|
-
{{${TEMPLATE_VARIABLES.
|
|
10953
|
+
{{${TEMPLATE_VARIABLES.OUTPUT}}}`;
|
|
10730
10954
|
var freeformEvaluationSchema = z3.object({
|
|
10731
10955
|
score: z3.number().min(0).max(1).describe("Score between 0.0 and 1.0"),
|
|
10732
10956
|
assertions: z3.array(
|
|
@@ -10798,21 +11022,19 @@ var LlmGraderEvaluator = class {
|
|
|
10798
11022
|
async evaluateFreeform(context, graderProvider) {
|
|
10799
11023
|
const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
|
|
10800
11024
|
const variables = {
|
|
10801
|
-
[TEMPLATE_VARIABLES.INPUT]:
|
|
10802
|
-
[TEMPLATE_VARIABLES.
|
|
10803
|
-
|
|
10804
|
-
null,
|
|
10805
|
-
2
|
|
10806
|
-
),
|
|
10807
|
-
[TEMPLATE_VARIABLES.OUTPUT]: JSON.stringify(context.output ?? [], null, 2),
|
|
11025
|
+
[TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
|
|
11026
|
+
[TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
|
|
11027
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? "").trim(),
|
|
10808
11028
|
[TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
|
|
10809
11029
|
[TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? "",
|
|
11030
|
+
// Deprecated aliases — same values as the primary variables above
|
|
10810
11031
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
10811
11032
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
|
|
10812
11033
|
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
|
|
10813
11034
|
};
|
|
10814
11035
|
const systemPrompt = buildOutputSchema();
|
|
10815
11036
|
const evaluatorTemplate = context.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
|
|
11037
|
+
warnDeprecatedTemplateVars(evaluatorTemplate);
|
|
10816
11038
|
let userPrompt = substituteVariables(evaluatorTemplate, variables);
|
|
10817
11039
|
if (context.fileChanges && !context.evaluatorTemplateOverride && !this.evaluatorTemplate) {
|
|
10818
11040
|
userPrompt += `
|
|
@@ -10824,13 +11046,15 @@ ${context.fileChanges}`;
|
|
|
10824
11046
|
userPrompt,
|
|
10825
11047
|
systemPrompt
|
|
10826
11048
|
};
|
|
11049
|
+
const images = context.output ? extractImageBlocks(context.output) : [];
|
|
10827
11050
|
try {
|
|
10828
11051
|
const { data, tokenUsage } = await this.runWithRetry({
|
|
10829
11052
|
context,
|
|
10830
11053
|
graderProvider,
|
|
10831
11054
|
systemPrompt,
|
|
10832
11055
|
userPrompt,
|
|
10833
|
-
schema: freeformEvaluationSchema
|
|
11056
|
+
schema: freeformEvaluationSchema,
|
|
11057
|
+
images
|
|
10834
11058
|
});
|
|
10835
11059
|
const score = clampScore(data.score);
|
|
10836
11060
|
const assertions = Array.isArray(data.assertions) ? data.assertions.slice(0, 8) : [];
|
|
@@ -10874,13 +11098,15 @@ ${context.fileChanges}`;
|
|
|
10874
11098
|
userPrompt: prompt,
|
|
10875
11099
|
systemPrompt
|
|
10876
11100
|
};
|
|
11101
|
+
const images = context.output ? extractImageBlocks(context.output) : [];
|
|
10877
11102
|
try {
|
|
10878
11103
|
const { data, tokenUsage } = await this.runWithRetry({
|
|
10879
11104
|
context,
|
|
10880
11105
|
graderProvider,
|
|
10881
11106
|
systemPrompt,
|
|
10882
11107
|
userPrompt: prompt,
|
|
10883
|
-
schema: rubricEvaluationSchema
|
|
11108
|
+
schema: rubricEvaluationSchema,
|
|
11109
|
+
images
|
|
10884
11110
|
});
|
|
10885
11111
|
const { score, verdict, assertions } = calculateRubricScore(data, rubrics);
|
|
10886
11112
|
return {
|
|
@@ -10917,13 +11143,15 @@ ${context.fileChanges}`;
|
|
|
10917
11143
|
userPrompt: prompt,
|
|
10918
11144
|
systemPrompt
|
|
10919
11145
|
};
|
|
11146
|
+
const images = context.output ? extractImageBlocks(context.output) : [];
|
|
10920
11147
|
try {
|
|
10921
11148
|
const { data, tokenUsage } = await this.runWithRetry({
|
|
10922
11149
|
context,
|
|
10923
11150
|
graderProvider,
|
|
10924
11151
|
systemPrompt,
|
|
10925
11152
|
userPrompt: prompt,
|
|
10926
|
-
schema: scoreRangeEvaluationSchema
|
|
11153
|
+
schema: scoreRangeEvaluationSchema,
|
|
11154
|
+
images
|
|
10927
11155
|
});
|
|
10928
11156
|
const { score, verdict, assertions, details } = calculateScoreRangeResult(data, rubrics);
|
|
10929
11157
|
return {
|
|
@@ -11130,12 +11358,17 @@ ${context.fileChanges}`;
|
|
|
11130
11358
|
const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
|
|
11131
11359
|
const variables = {
|
|
11132
11360
|
[TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
|
|
11361
|
+
[TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
|
|
11362
|
+
[TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
|
|
11363
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? "").trim(),
|
|
11364
|
+
[TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? "",
|
|
11365
|
+
// Deprecated aliases
|
|
11133
11366
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
11134
11367
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
|
|
11135
|
-
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
|
|
11136
|
-
[TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? ""
|
|
11368
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
|
|
11137
11369
|
};
|
|
11138
11370
|
if (this.evaluatorTemplate) {
|
|
11371
|
+
warnDeprecatedTemplateVars(this.evaluatorTemplate);
|
|
11139
11372
|
return substituteVariables(this.evaluatorTemplate, variables);
|
|
11140
11373
|
}
|
|
11141
11374
|
const config = context.evaluator;
|
|
@@ -11186,11 +11419,16 @@ ${context.fileChanges}`;
|
|
|
11186
11419
|
if (this.evaluatorTemplate) {
|
|
11187
11420
|
const variables = {
|
|
11188
11421
|
[TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
|
|
11422
|
+
[TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
|
|
11423
|
+
[TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
|
|
11424
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? "").trim(),
|
|
11425
|
+
[TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? "",
|
|
11426
|
+
// Deprecated aliases
|
|
11189
11427
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
11190
11428
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
|
|
11191
|
-
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
|
|
11192
|
-
[TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? ""
|
|
11429
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
|
|
11193
11430
|
};
|
|
11431
|
+
warnDeprecatedTemplateVars(this.evaluatorTemplate);
|
|
11194
11432
|
const customPrompt = substituteVariables(this.evaluatorTemplate, variables);
|
|
11195
11433
|
const outputSchema = rubrics && rubrics.length > 0 ? buildRubricOutputSchema() : buildOutputSchema();
|
|
11196
11434
|
return `${customPrompt}
|
|
@@ -11361,18 +11599,35 @@ ${outputSchema}`;
|
|
|
11361
11599
|
// LLM mode retry logic
|
|
11362
11600
|
// ---------------------------------------------------------------------------
|
|
11363
11601
|
async runWithRetry(options) {
|
|
11364
|
-
const { context, graderProvider, systemPrompt, userPrompt, schema } = options;
|
|
11602
|
+
const { context, graderProvider, systemPrompt, userPrompt, schema, images } = options;
|
|
11365
11603
|
let lastError;
|
|
11366
11604
|
for (let attempt = 1; attempt <= 3; attempt++) {
|
|
11367
11605
|
try {
|
|
11368
11606
|
const model = graderProvider.asLanguageModel?.();
|
|
11369
11607
|
if (model) {
|
|
11370
|
-
const
|
|
11608
|
+
const modelOptions = {
|
|
11609
|
+
...this.maxOutputTokens ? { maxTokens: this.maxOutputTokens } : {},
|
|
11610
|
+
...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
|
|
11611
|
+
};
|
|
11612
|
+
const hasImages = images && images.length > 0;
|
|
11613
|
+
const result = hasImages ? await generateText2({
|
|
11614
|
+
model,
|
|
11615
|
+
system: systemPrompt,
|
|
11616
|
+
messages: [
|
|
11617
|
+
{
|
|
11618
|
+
role: "user",
|
|
11619
|
+
content: [
|
|
11620
|
+
{ type: "text", text: userPrompt },
|
|
11621
|
+
...toAiSdkImageParts(images)
|
|
11622
|
+
]
|
|
11623
|
+
}
|
|
11624
|
+
],
|
|
11625
|
+
...modelOptions
|
|
11626
|
+
}) : await generateText2({
|
|
11371
11627
|
model,
|
|
11372
11628
|
system: systemPrompt,
|
|
11373
11629
|
prompt: userPrompt,
|
|
11374
|
-
...
|
|
11375
|
-
...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
|
|
11630
|
+
...modelOptions
|
|
11376
11631
|
});
|
|
11377
11632
|
const data2 = schema.parse(parseJsonFromText(result.text));
|
|
11378
11633
|
const rawUsage = result.usage;
|
|
@@ -11432,6 +11687,26 @@ function substituteVariables(template, variables) {
|
|
|
11432
11687
|
return variables[varName] ?? match;
|
|
11433
11688
|
});
|
|
11434
11689
|
}
|
|
11690
|
+
var ANSI_YELLOW7 = "\x1B[33m";
|
|
11691
|
+
var ANSI_RESET8 = "\x1B[0m";
|
|
11692
|
+
var warnedTemplateStrings = /* @__PURE__ */ new Set();
|
|
11693
|
+
function warnDeprecatedTemplateVars(template) {
|
|
11694
|
+
if (warnedTemplateStrings.has(template)) return;
|
|
11695
|
+
const used = [];
|
|
11696
|
+
for (const [deprecated, replacement] of DEPRECATED_TEMPLATE_VARIABLES) {
|
|
11697
|
+
if (new RegExp(`\\{\\{\\s*${deprecated}\\s*\\}\\}`).test(template)) {
|
|
11698
|
+
used.push(`{{ ${deprecated} }} \u2192 {{ ${replacement} }}`);
|
|
11699
|
+
}
|
|
11700
|
+
}
|
|
11701
|
+
if (used.length > 0) {
|
|
11702
|
+
warnedTemplateStrings.add(template);
|
|
11703
|
+
console.warn(
|
|
11704
|
+
`${ANSI_YELLOW7}\u26A0 Deprecated template variables detected (they still work but will be removed in a future version):
|
|
11705
|
+
${used.join("\n ")}
|
|
11706
|
+
Update your custom evaluator template to use the new names.${ANSI_RESET8}`
|
|
11707
|
+
);
|
|
11708
|
+
}
|
|
11709
|
+
}
|
|
11435
11710
|
function calculateRubricScore(result, rubrics) {
|
|
11436
11711
|
const rubricMap = new Map(rubrics.map((rubric) => [rubric.id, rubric]));
|
|
11437
11712
|
const assertions = [];
|
|
@@ -11526,6 +11801,26 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
11526
11801
|
}
|
|
11527
11802
|
};
|
|
11528
11803
|
}
|
|
11804
|
+
function extractImageBlocks(messages) {
|
|
11805
|
+
const images = [];
|
|
11806
|
+
for (const msg of messages) {
|
|
11807
|
+
if (msg.role !== "assistant") continue;
|
|
11808
|
+
if (!isContentArray(msg.content)) continue;
|
|
11809
|
+
for (const block of msg.content) {
|
|
11810
|
+
if (block.type === "image") {
|
|
11811
|
+
images.push(block);
|
|
11812
|
+
}
|
|
11813
|
+
}
|
|
11814
|
+
}
|
|
11815
|
+
return images;
|
|
11816
|
+
}
|
|
11817
|
+
function toAiSdkImageParts(images) {
|
|
11818
|
+
return images.map((img) => ({
|
|
11819
|
+
type: "image",
|
|
11820
|
+
image: img.source,
|
|
11821
|
+
mediaType: img.media_type || void 0
|
|
11822
|
+
}));
|
|
11823
|
+
}
|
|
11529
11824
|
function resolveSandboxed(basePath, relativePath) {
|
|
11530
11825
|
const resolved = path35.resolve(basePath, relativePath);
|
|
11531
11826
|
if (!resolved.startsWith(basePath + path35.sep) && resolved !== basePath) {
|
|
@@ -12267,115 +12562,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
12267
12562
|
* Evaluate a single field against the expected value.
|
|
12268
12563
|
*/
|
|
12269
12564
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
12270
|
-
const { path:
|
|
12271
|
-
const candidateValue = resolvePath(candidateData,
|
|
12272
|
-
const expectedValue = resolvePath(expectedData,
|
|
12565
|
+
const { path: path48, match, required = true, weight = 1 } = fieldConfig;
|
|
12566
|
+
const candidateValue = resolvePath(candidateData, path48);
|
|
12567
|
+
const expectedValue = resolvePath(expectedData, path48);
|
|
12273
12568
|
if (expectedValue === void 0) {
|
|
12274
12569
|
return {
|
|
12275
|
-
path:
|
|
12570
|
+
path: path48,
|
|
12276
12571
|
score: 1,
|
|
12277
12572
|
// No expected value means no comparison needed
|
|
12278
12573
|
weight,
|
|
12279
12574
|
hit: true,
|
|
12280
|
-
message: `${
|
|
12575
|
+
message: `${path48}: no expected value`
|
|
12281
12576
|
};
|
|
12282
12577
|
}
|
|
12283
12578
|
if (candidateValue === void 0) {
|
|
12284
12579
|
if (required) {
|
|
12285
12580
|
return {
|
|
12286
|
-
path:
|
|
12581
|
+
path: path48,
|
|
12287
12582
|
score: 0,
|
|
12288
12583
|
weight,
|
|
12289
12584
|
hit: false,
|
|
12290
|
-
message: `${
|
|
12585
|
+
message: `${path48} (required, missing)`
|
|
12291
12586
|
};
|
|
12292
12587
|
}
|
|
12293
12588
|
return {
|
|
12294
|
-
path:
|
|
12589
|
+
path: path48,
|
|
12295
12590
|
score: 1,
|
|
12296
12591
|
// Don't penalize missing optional fields
|
|
12297
12592
|
weight: 0,
|
|
12298
12593
|
// Zero weight means it won't affect the score
|
|
12299
12594
|
hit: true,
|
|
12300
|
-
message: `${
|
|
12595
|
+
message: `${path48}: optional field missing`
|
|
12301
12596
|
};
|
|
12302
12597
|
}
|
|
12303
12598
|
switch (match) {
|
|
12304
12599
|
case "exact":
|
|
12305
|
-
return this.compareExact(
|
|
12600
|
+
return this.compareExact(path48, candidateValue, expectedValue, weight);
|
|
12306
12601
|
case "numeric_tolerance":
|
|
12307
12602
|
return this.compareNumericTolerance(
|
|
12308
|
-
|
|
12603
|
+
path48,
|
|
12309
12604
|
candidateValue,
|
|
12310
12605
|
expectedValue,
|
|
12311
12606
|
fieldConfig,
|
|
12312
12607
|
weight
|
|
12313
12608
|
);
|
|
12314
12609
|
case "date":
|
|
12315
|
-
return this.compareDate(
|
|
12610
|
+
return this.compareDate(path48, candidateValue, expectedValue, fieldConfig, weight);
|
|
12316
12611
|
default:
|
|
12317
12612
|
return {
|
|
12318
|
-
path:
|
|
12613
|
+
path: path48,
|
|
12319
12614
|
score: 0,
|
|
12320
12615
|
weight,
|
|
12321
12616
|
hit: false,
|
|
12322
|
-
message: `${
|
|
12617
|
+
message: `${path48}: unknown match type "${match}"`
|
|
12323
12618
|
};
|
|
12324
12619
|
}
|
|
12325
12620
|
}
|
|
12326
12621
|
/**
|
|
12327
12622
|
* Exact equality comparison.
|
|
12328
12623
|
*/
|
|
12329
|
-
compareExact(
|
|
12624
|
+
compareExact(path48, candidateValue, expectedValue, weight) {
|
|
12330
12625
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
12331
12626
|
return {
|
|
12332
|
-
path:
|
|
12627
|
+
path: path48,
|
|
12333
12628
|
score: 1,
|
|
12334
12629
|
weight,
|
|
12335
12630
|
hit: true,
|
|
12336
|
-
message:
|
|
12631
|
+
message: path48
|
|
12337
12632
|
};
|
|
12338
12633
|
}
|
|
12339
12634
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
12340
12635
|
return {
|
|
12341
|
-
path:
|
|
12636
|
+
path: path48,
|
|
12342
12637
|
score: 0,
|
|
12343
12638
|
weight,
|
|
12344
12639
|
hit: false,
|
|
12345
|
-
message: `${
|
|
12640
|
+
message: `${path48} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
12346
12641
|
};
|
|
12347
12642
|
}
|
|
12348
12643
|
return {
|
|
12349
|
-
path:
|
|
12644
|
+
path: path48,
|
|
12350
12645
|
score: 0,
|
|
12351
12646
|
weight,
|
|
12352
12647
|
hit: false,
|
|
12353
|
-
message: `${
|
|
12648
|
+
message: `${path48} (value mismatch)`
|
|
12354
12649
|
};
|
|
12355
12650
|
}
|
|
12356
12651
|
/**
|
|
12357
12652
|
* Numeric comparison with absolute or relative tolerance.
|
|
12358
12653
|
*/
|
|
12359
|
-
compareNumericTolerance(
|
|
12654
|
+
compareNumericTolerance(path48, candidateValue, expectedValue, fieldConfig, weight) {
|
|
12360
12655
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
12361
12656
|
const candidateNum = toNumber(candidateValue);
|
|
12362
12657
|
const expectedNum = toNumber(expectedValue);
|
|
12363
12658
|
if (candidateNum === null || expectedNum === null) {
|
|
12364
12659
|
return {
|
|
12365
|
-
path:
|
|
12660
|
+
path: path48,
|
|
12366
12661
|
score: 0,
|
|
12367
12662
|
weight,
|
|
12368
12663
|
hit: false,
|
|
12369
|
-
message: `${
|
|
12664
|
+
message: `${path48} (non-numeric value)`
|
|
12370
12665
|
};
|
|
12371
12666
|
}
|
|
12372
12667
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
12373
12668
|
return {
|
|
12374
|
-
path:
|
|
12669
|
+
path: path48,
|
|
12375
12670
|
score: 0,
|
|
12376
12671
|
weight,
|
|
12377
12672
|
hit: false,
|
|
12378
|
-
message: `${
|
|
12673
|
+
message: `${path48} (invalid numeric value)`
|
|
12379
12674
|
};
|
|
12380
12675
|
}
|
|
12381
12676
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -12388,61 +12683,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
12388
12683
|
}
|
|
12389
12684
|
if (withinTolerance) {
|
|
12390
12685
|
return {
|
|
12391
|
-
path:
|
|
12686
|
+
path: path48,
|
|
12392
12687
|
score: 1,
|
|
12393
12688
|
weight,
|
|
12394
12689
|
hit: true,
|
|
12395
|
-
message: `${
|
|
12690
|
+
message: `${path48} (within tolerance: diff=${diff.toFixed(2)})`
|
|
12396
12691
|
};
|
|
12397
12692
|
}
|
|
12398
12693
|
return {
|
|
12399
|
-
path:
|
|
12694
|
+
path: path48,
|
|
12400
12695
|
score: 0,
|
|
12401
12696
|
weight,
|
|
12402
12697
|
hit: false,
|
|
12403
|
-
message: `${
|
|
12698
|
+
message: `${path48} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
12404
12699
|
};
|
|
12405
12700
|
}
|
|
12406
12701
|
/**
|
|
12407
12702
|
* Date comparison with format normalization.
|
|
12408
12703
|
*/
|
|
12409
|
-
compareDate(
|
|
12704
|
+
compareDate(path48, candidateValue, expectedValue, fieldConfig, weight) {
|
|
12410
12705
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
12411
12706
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
12412
12707
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
12413
12708
|
if (candidateDate === null) {
|
|
12414
12709
|
return {
|
|
12415
|
-
path:
|
|
12710
|
+
path: path48,
|
|
12416
12711
|
score: 0,
|
|
12417
12712
|
weight,
|
|
12418
12713
|
hit: false,
|
|
12419
|
-
message: `${
|
|
12714
|
+
message: `${path48} (unparseable candidate date)`
|
|
12420
12715
|
};
|
|
12421
12716
|
}
|
|
12422
12717
|
if (expectedDate === null) {
|
|
12423
12718
|
return {
|
|
12424
|
-
path:
|
|
12719
|
+
path: path48,
|
|
12425
12720
|
score: 0,
|
|
12426
12721
|
weight,
|
|
12427
12722
|
hit: false,
|
|
12428
|
-
message: `${
|
|
12723
|
+
message: `${path48} (unparseable expected date)`
|
|
12429
12724
|
};
|
|
12430
12725
|
}
|
|
12431
12726
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
12432
12727
|
return {
|
|
12433
|
-
path:
|
|
12728
|
+
path: path48,
|
|
12434
12729
|
score: 1,
|
|
12435
12730
|
weight,
|
|
12436
12731
|
hit: true,
|
|
12437
|
-
message:
|
|
12732
|
+
message: path48
|
|
12438
12733
|
};
|
|
12439
12734
|
}
|
|
12440
12735
|
return {
|
|
12441
|
-
path:
|
|
12736
|
+
path: path48,
|
|
12442
12737
|
score: 0,
|
|
12443
12738
|
weight,
|
|
12444
12739
|
hit: false,
|
|
12445
|
-
message: `${
|
|
12740
|
+
message: `${path48} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
12446
12741
|
};
|
|
12447
12742
|
}
|
|
12448
12743
|
/**
|
|
@@ -12475,11 +12770,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
12475
12770
|
};
|
|
12476
12771
|
}
|
|
12477
12772
|
};
|
|
12478
|
-
function resolvePath(obj,
|
|
12479
|
-
if (!
|
|
12773
|
+
function resolvePath(obj, path48) {
|
|
12774
|
+
if (!path48 || !obj) {
|
|
12480
12775
|
return void 0;
|
|
12481
12776
|
}
|
|
12482
|
-
const parts =
|
|
12777
|
+
const parts = path48.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
12483
12778
|
let current = obj;
|
|
12484
12779
|
for (const part of parts) {
|
|
12485
12780
|
if (current === null || current === void 0) {
|
|
@@ -12771,11 +13066,12 @@ function assembleLlmGraderPrompt(input) {
|
|
|
12771
13066
|
function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, evaluatorTemplateOverride) {
|
|
12772
13067
|
const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
|
|
12773
13068
|
const variables = {
|
|
12774
|
-
[TEMPLATE_VARIABLES.INPUT]:
|
|
12775
|
-
[TEMPLATE_VARIABLES.
|
|
12776
|
-
[TEMPLATE_VARIABLES.
|
|
13069
|
+
[TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
|
|
13070
|
+
[TEMPLATE_VARIABLES.OUTPUT]: candidate.trim(),
|
|
13071
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (evalCase.reference_answer ?? "").trim(),
|
|
12777
13072
|
[TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
|
|
12778
13073
|
[TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? "",
|
|
13074
|
+
// Deprecated aliases
|
|
12779
13075
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
12780
13076
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
|
|
12781
13077
|
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (evalCase.reference_answer ?? "").trim()
|
|
@@ -12962,8 +13258,8 @@ var TokenUsageEvaluator = class {
|
|
|
12962
13258
|
};
|
|
12963
13259
|
|
|
12964
13260
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
12965
|
-
function getNestedValue(obj,
|
|
12966
|
-
const parts =
|
|
13261
|
+
function getNestedValue(obj, path48) {
|
|
13262
|
+
const parts = path48.split(".");
|
|
12967
13263
|
let current = obj;
|
|
12968
13264
|
for (const part of parts) {
|
|
12969
13265
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -13832,16 +14128,13 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
13832
14128
|
const payload = {
|
|
13833
14129
|
criteria: context.evalCase.criteria,
|
|
13834
14130
|
expectedOutput: context.evalCase.expected_output,
|
|
13835
|
-
outputText: context.candidate,
|
|
13836
14131
|
output: context.output ?? null,
|
|
13837
14132
|
inputFiles: context.evalCase.file_paths,
|
|
13838
14133
|
input: context.evalCase.input,
|
|
13839
14134
|
trace: context.trace ?? null,
|
|
13840
14135
|
fileChanges: context.fileChanges ?? null,
|
|
13841
14136
|
workspacePath: context.workspacePath ?? null,
|
|
13842
|
-
config: config ?? context.config ?? null
|
|
13843
|
-
inputText: context.evalCase.question,
|
|
13844
|
-
expectedOutputText: context.evalCase.reference_answer ?? ""
|
|
14137
|
+
config: config ?? context.config ?? null
|
|
13845
14138
|
};
|
|
13846
14139
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
13847
14140
|
const scriptPath = script[script.length - 1];
|
|
@@ -15521,7 +15814,8 @@ async function runEvaluation(options) {
|
|
|
15521
15814
|
const budgetResult = {
|
|
15522
15815
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
15523
15816
|
testId: evalCase.id,
|
|
15524
|
-
|
|
15817
|
+
dataset: evalCase.dataset,
|
|
15818
|
+
category: evalCase.category,
|
|
15525
15819
|
score: 0,
|
|
15526
15820
|
assertions: [],
|
|
15527
15821
|
output: [],
|
|
@@ -15557,7 +15851,8 @@ async function runEvaluation(options) {
|
|
|
15557
15851
|
const haltResult = {
|
|
15558
15852
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
15559
15853
|
testId: evalCase.id,
|
|
15560
|
-
|
|
15854
|
+
dataset: evalCase.dataset,
|
|
15855
|
+
category: evalCase.category,
|
|
15561
15856
|
score: 0,
|
|
15562
15857
|
assertions: [],
|
|
15563
15858
|
output: [],
|
|
@@ -16556,7 +16851,8 @@ async function evaluateCandidate(options) {
|
|
|
16556
16851
|
return {
|
|
16557
16852
|
timestamp: completedAt.toISOString(),
|
|
16558
16853
|
testId: evalCase.id,
|
|
16559
|
-
|
|
16854
|
+
dataset: evalCase.dataset,
|
|
16855
|
+
category: evalCase.category,
|
|
16560
16856
|
conversationId: evalCase.conversation_id,
|
|
16561
16857
|
score: score.score,
|
|
16562
16858
|
assertions: score.assertions,
|
|
@@ -16906,7 +17202,8 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
16906
17202
|
return {
|
|
16907
17203
|
timestamp: timestamp.toISOString(),
|
|
16908
17204
|
testId: evalCase.id,
|
|
16909
|
-
|
|
17205
|
+
dataset: evalCase.dataset,
|
|
17206
|
+
category: evalCase.category,
|
|
16910
17207
|
conversationId: evalCase.conversation_id,
|
|
16911
17208
|
score: 0,
|
|
16912
17209
|
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
@@ -17479,6 +17776,18 @@ function trimBaselineResult(result) {
|
|
|
17479
17776
|
return trimmed;
|
|
17480
17777
|
}
|
|
17481
17778
|
|
|
17779
|
+
// src/evaluation/category.ts
|
|
17780
|
+
import path47 from "node:path";
|
|
17781
|
+
var DEFAULT_CATEGORY = "Uncategorized";
|
|
17782
|
+
function deriveCategory(relativePath) {
|
|
17783
|
+
const parts = relativePath.split(path47.sep);
|
|
17784
|
+
if (parts.length <= 1) {
|
|
17785
|
+
return DEFAULT_CATEGORY;
|
|
17786
|
+
}
|
|
17787
|
+
const dirs = parts.slice(0, -1).filter((d) => d !== "evals");
|
|
17788
|
+
return dirs.length > 0 ? dirs.join("/") : DEFAULT_CATEGORY;
|
|
17789
|
+
}
|
|
17790
|
+
|
|
17482
17791
|
// src/observability/otel-exporter.ts
|
|
17483
17792
|
var OTEL_BACKEND_PRESETS = {
|
|
17484
17793
|
langfuse: {
|
|
@@ -17602,7 +17911,7 @@ var OtelTraceExporter = class {
|
|
|
17602
17911
|
rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
17603
17912
|
rootSpan.setAttribute("agentv.test_id", result.testId);
|
|
17604
17913
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
17605
|
-
if (result.
|
|
17914
|
+
if (result.dataset) rootSpan.setAttribute("agentv.dataset", result.dataset);
|
|
17606
17915
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
17607
17916
|
if (captureContent && result.output.length > 0) {
|
|
17608
17917
|
const lastMsg = result.output[result.output.length - 1];
|
|
@@ -17811,7 +18120,7 @@ var OtelStreamingObserver = class {
|
|
|
17811
18120
|
this.rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
17812
18121
|
this.rootSpan.setAttribute("agentv.test_id", testId);
|
|
17813
18122
|
this.rootSpan.setAttribute("agentv.target", target);
|
|
17814
|
-
if (evalSet) this.rootSpan.setAttribute("agentv.
|
|
18123
|
+
if (evalSet) this.rootSpan.setAttribute("agentv.dataset", evalSet);
|
|
17815
18124
|
this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
|
|
17816
18125
|
}
|
|
17817
18126
|
/** Create and immediately export a tool span */
|
|
@@ -17983,9 +18292,11 @@ function createAgentKernel() {
|
|
|
17983
18292
|
return { status: "stub" };
|
|
17984
18293
|
}
|
|
17985
18294
|
export {
|
|
18295
|
+
COMMON_TARGET_SETTINGS,
|
|
17986
18296
|
CodeEvaluator,
|
|
17987
18297
|
CompositeEvaluator,
|
|
17988
18298
|
CostEvaluator,
|
|
18299
|
+
DEFAULT_CATEGORY,
|
|
17989
18300
|
DEFAULT_EVALUATOR_TEMPLATE,
|
|
17990
18301
|
DEFAULT_EVAL_PATTERNS,
|
|
17991
18302
|
DEFAULT_EXPLORATION_TOOLS,
|
|
@@ -18039,6 +18350,7 @@ export {
|
|
|
18039
18350
|
createTempWorkspace,
|
|
18040
18351
|
deepEqual,
|
|
18041
18352
|
defineConfig,
|
|
18353
|
+
deriveCategory,
|
|
18042
18354
|
detectFormat,
|
|
18043
18355
|
discoverAssertions,
|
|
18044
18356
|
discoverCopilotSessions,
|
|
@@ -18052,7 +18364,9 @@ export {
|
|
|
18052
18364
|
explorationRatio,
|
|
18053
18365
|
extractCacheConfig,
|
|
18054
18366
|
extractFailOnError,
|
|
18367
|
+
extractImageBlocks,
|
|
18055
18368
|
extractJsonBlob,
|
|
18369
|
+
extractLastAssistantContent,
|
|
18056
18370
|
extractTargetFromSuite,
|
|
18057
18371
|
extractTargetsFromSuite,
|
|
18058
18372
|
extractTargetsFromTestCase,
|
|
@@ -18066,12 +18380,15 @@ export {
|
|
|
18066
18380
|
getAgentvHome,
|
|
18067
18381
|
getOutputFilenames,
|
|
18068
18382
|
getSubagentsRoot,
|
|
18383
|
+
getTextContent,
|
|
18069
18384
|
getTraceStateRoot,
|
|
18070
18385
|
getWorkspacePath,
|
|
18071
18386
|
getWorkspacePoolRoot,
|
|
18072
18387
|
getWorkspacesRoot,
|
|
18073
18388
|
initializeBaseline,
|
|
18074
18389
|
isAgentSkillsFormat,
|
|
18390
|
+
isContent,
|
|
18391
|
+
isContentArray,
|
|
18075
18392
|
isEvaluatorKind,
|
|
18076
18393
|
isJsonObject,
|
|
18077
18394
|
isJsonValue,
|