agentv 3.10.3 → 3.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-7LC3VNOC.js → chunk-ETMDLQ72.js} +1141 -60
- package/dist/chunk-ETMDLQ72.js.map +1 -0
- package/dist/{chunk-JUQCB3ZW.js → chunk-EZGWZVVK.js} +322 -149
- package/dist/chunk-EZGWZVVK.js.map +1 -0
- package/dist/{chunk-U556GRI3.js → chunk-JEW3FEO7.js} +68 -32
- package/dist/chunk-JEW3FEO7.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-2X7A3TTC.js → dist-QERRYDSC.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-CSA4KIND.js → interactive-AD4PRYDN.js} +3 -3
- package/dist/templates/.agentv/config.yaml +4 -13
- package/dist/templates/.agentv/targets.yaml +0 -16
- package/package.json +3 -1
- package/dist/chunk-7LC3VNOC.js.map +0 -1
- package/dist/chunk-JUQCB3ZW.js.map +0 -1
- package/dist/chunk-U556GRI3.js.map +0 -1
- package/dist/templates/.agentv/.env.example +0 -23
- /package/dist/{dist-2X7A3TTC.js.map → dist-QERRYDSC.js.map} +0 -0
- /package/dist/{interactive-CSA4KIND.js.map → interactive-AD4PRYDN.js.map} +0 -0
|
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
|
|
|
301
301
|
}
|
|
302
302
|
});
|
|
303
303
|
|
|
304
|
-
// ../../packages/core/dist/chunk-
|
|
304
|
+
// ../../packages/core/dist/chunk-AVTN5AB7.js
|
|
305
305
|
import { constants } from "node:fs";
|
|
306
306
|
import { access, readFile } from "node:fs/promises";
|
|
307
307
|
import path from "node:path";
|
|
@@ -419,7 +419,7 @@ __export(external_exports2, {
|
|
|
419
419
|
void: () => voidType
|
|
420
420
|
});
|
|
421
421
|
|
|
422
|
-
// ../../packages/core/dist/chunk-
|
|
422
|
+
// ../../packages/core/dist/chunk-AVTN5AB7.js
|
|
423
423
|
import { readFile as readFile2 } from "node:fs/promises";
|
|
424
424
|
import path3 from "node:path";
|
|
425
425
|
import fg from "fast-glob";
|
|
@@ -1363,7 +1363,7 @@ function normalizeCopilotLogFormat(value) {
|
|
|
1363
1363
|
}
|
|
1364
1364
|
function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
1365
1365
|
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
1366
|
-
const
|
|
1366
|
+
const subproviderSource = target.subprovider;
|
|
1367
1367
|
const modelSource = target.model ?? target.pi_model ?? target.piModel;
|
|
1368
1368
|
const apiKeySource = target.api_key ?? target.apiKey;
|
|
1369
1369
|
const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
|
|
@@ -1379,10 +1379,15 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
1379
1379
|
allowLiteral: true,
|
|
1380
1380
|
optionalEnv: true
|
|
1381
1381
|
}) ?? "pi";
|
|
1382
|
-
const
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1382
|
+
const subprovider = resolveOptionalString(
|
|
1383
|
+
subproviderSource,
|
|
1384
|
+
env,
|
|
1385
|
+
`${target.name} pi subprovider`,
|
|
1386
|
+
{
|
|
1387
|
+
allowLiteral: true,
|
|
1388
|
+
optionalEnv: true
|
|
1389
|
+
}
|
|
1390
|
+
);
|
|
1386
1391
|
const model = resolveOptionalString(modelSource, env, `${target.name} pi model`, {
|
|
1387
1392
|
allowLiteral: true,
|
|
1388
1393
|
optionalEnv: true
|
|
@@ -1430,7 +1435,7 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
1430
1435
|
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
1431
1436
|
return {
|
|
1432
1437
|
executable,
|
|
1433
|
-
|
|
1438
|
+
subprovider,
|
|
1434
1439
|
model,
|
|
1435
1440
|
apiKey,
|
|
1436
1441
|
tools,
|
|
@@ -1445,15 +1450,15 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
1445
1450
|
};
|
|
1446
1451
|
}
|
|
1447
1452
|
function resolvePiAgentSdkConfig(target, env) {
|
|
1448
|
-
const
|
|
1453
|
+
const subproviderSource = target.subprovider;
|
|
1449
1454
|
const modelSource = target.model ?? target.pi_model ?? target.piModel;
|
|
1450
1455
|
const apiKeySource = target.api_key ?? target.apiKey;
|
|
1451
1456
|
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
1452
1457
|
const systemPromptSource = target.system_prompt ?? target.systemPrompt;
|
|
1453
|
-
const
|
|
1454
|
-
|
|
1458
|
+
const subprovider = resolveOptionalString(
|
|
1459
|
+
subproviderSource,
|
|
1455
1460
|
env,
|
|
1456
|
-
`${target.name} pi-agent-sdk
|
|
1461
|
+
`${target.name} pi-agent-sdk subprovider`,
|
|
1457
1462
|
{
|
|
1458
1463
|
allowLiteral: true,
|
|
1459
1464
|
optionalEnv: true
|
|
@@ -1470,7 +1475,7 @@ function resolvePiAgentSdkConfig(target, env) {
|
|
|
1470
1475
|
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi-agent-sdk timeout`);
|
|
1471
1476
|
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
1472
1477
|
return {
|
|
1473
|
-
|
|
1478
|
+
subprovider,
|
|
1474
1479
|
model,
|
|
1475
1480
|
apiKey,
|
|
1476
1481
|
timeoutMs,
|
|
@@ -2039,7 +2044,7 @@ import path8 from "node:path";
|
|
|
2039
2044
|
import { parse as parse3 } from "yaml";
|
|
2040
2045
|
import { createOpenAI } from "@ai-sdk/openai";
|
|
2041
2046
|
|
|
2042
|
-
// ../../node_modules/.bun/@openrouter+ai-sdk-provider@2.3.
|
|
2047
|
+
// ../../node_modules/.bun/@openrouter+ai-sdk-provider@2.3.3+3ab978b6804fd9e7/node_modules/@openrouter/ai-sdk-provider/dist/index.mjs
|
|
2043
2048
|
var __defProp = Object.defineProperty;
|
|
2044
2049
|
var __defProps = Object.defineProperties;
|
|
2045
2050
|
var __getOwnPropDescs = Object.getOwnPropertyDescriptors;
|
|
@@ -4202,11 +4207,13 @@ function isDefinedOrNotNull(value) {
|
|
|
4202
4207
|
var ReasoningFormat = /* @__PURE__ */ ((ReasoningFormat2) => {
|
|
4203
4208
|
ReasoningFormat2["Unknown"] = "unknown";
|
|
4204
4209
|
ReasoningFormat2["OpenAIResponsesV1"] = "openai-responses-v1";
|
|
4210
|
+
ReasoningFormat2["AzureOpenAIResponsesV1"] = "azure-openai-responses-v1";
|
|
4205
4211
|
ReasoningFormat2["XAIResponsesV1"] = "xai-responses-v1";
|
|
4206
4212
|
ReasoningFormat2["AnthropicClaudeV1"] = "anthropic-claude-v1";
|
|
4207
4213
|
ReasoningFormat2["GoogleGeminiV1"] = "google-gemini-v1";
|
|
4208
4214
|
return ReasoningFormat2;
|
|
4209
4215
|
})(ReasoningFormat || {});
|
|
4216
|
+
var DEFAULT_REASONING_FORMAT = "anthropic-claude-v1";
|
|
4210
4217
|
var CommonReasoningDetailSchema = external_exports.object({
|
|
4211
4218
|
id: external_exports.string().nullish(),
|
|
4212
4219
|
format: external_exports.enum(ReasoningFormat).nullish(),
|
|
@@ -4360,7 +4367,11 @@ var OpenRouterProviderMetadataSchema = external_exports.object({
|
|
|
4360
4367
|
}).catchall(external_exports.any());
|
|
4361
4368
|
var OpenRouterProviderOptionsSchema = external_exports.object({
|
|
4362
4369
|
openrouter: external_exports.object({
|
|
4363
|
-
|
|
4370
|
+
// Use ReasoningDetailArraySchema (with unknown fallback) instead of
|
|
4371
|
+
// z.array(ReasoningDetailUnionSchema) so that a single malformed entry
|
|
4372
|
+
// (e.g., a future format not yet in the enum) is individually dropped
|
|
4373
|
+
// rather than causing the entire array to fail parsing.
|
|
4374
|
+
reasoning_details: ReasoningDetailArraySchema.optional(),
|
|
4364
4375
|
annotations: external_exports.array(FileAnnotationSchema).optional()
|
|
4365
4376
|
}).optional()
|
|
4366
4377
|
}).optional();
|
|
@@ -4758,8 +4769,24 @@ function convertToOpenRouterChatMessages(prompt) {
|
|
|
4758
4769
|
const candidateReasoningDetails = messageReasoningDetails && Array.isArray(messageReasoningDetails) && messageReasoningDetails.length > 0 ? messageReasoningDetails : findFirstReasoningDetails(content);
|
|
4759
4770
|
let finalReasoningDetails;
|
|
4760
4771
|
if (candidateReasoningDetails && candidateReasoningDetails.length > 0) {
|
|
4772
|
+
const validDetails = candidateReasoningDetails.filter((detail) => {
|
|
4773
|
+
var _a173;
|
|
4774
|
+
if (detail.type !== "reasoning.text") {
|
|
4775
|
+
return true;
|
|
4776
|
+
}
|
|
4777
|
+
const format = (_a173 = detail.format) != null ? _a173 : DEFAULT_REASONING_FORMAT;
|
|
4778
|
+
if (format !== "anthropic-claude-v1") {
|
|
4779
|
+
return true;
|
|
4780
|
+
}
|
|
4781
|
+
return !!detail.signature;
|
|
4782
|
+
});
|
|
4783
|
+
if (validDetails.length < candidateReasoningDetails.length) {
|
|
4784
|
+
console.warn(
|
|
4785
|
+
"[openrouter] Some reasoning_details entries were removed because they were missing signatures. See https://github.com/OpenRouterTeam/ai-sdk-provider/issues/423 for more details."
|
|
4786
|
+
);
|
|
4787
|
+
}
|
|
4761
4788
|
const uniqueDetails = [];
|
|
4762
|
-
for (const detail of
|
|
4789
|
+
for (const detail of validDetails) {
|
|
4763
4790
|
if (reasoningDetailsTracker.upsert(detail)) {
|
|
4764
4791
|
uniqueDetails.push(detail);
|
|
4765
4792
|
}
|
|
@@ -4808,20 +4835,135 @@ function getToolResultContent(input) {
|
|
|
4808
4835
|
return input.output.value;
|
|
4809
4836
|
case "json":
|
|
4810
4837
|
case "error-json":
|
|
4811
|
-
case "content":
|
|
4812
4838
|
return JSON.stringify(input.output.value);
|
|
4839
|
+
case "content":
|
|
4840
|
+
return mapToolResultContentParts(input.output.value);
|
|
4813
4841
|
case "execution-denied":
|
|
4814
4842
|
return (_a163 = input.output.reason) != null ? _a163 : "Tool execution denied";
|
|
4815
4843
|
}
|
|
4816
4844
|
}
|
|
4845
|
+
function mapToolResultContentParts(parts) {
|
|
4846
|
+
return parts.map((part) => {
|
|
4847
|
+
var _a163, _b162, _c;
|
|
4848
|
+
switch (part.type) {
|
|
4849
|
+
case "text":
|
|
4850
|
+
return { type: "text", text: part.text };
|
|
4851
|
+
case "image-data":
|
|
4852
|
+
return {
|
|
4853
|
+
type: "image_url",
|
|
4854
|
+
image_url: {
|
|
4855
|
+
url: buildFileDataUrl({
|
|
4856
|
+
data: part.data,
|
|
4857
|
+
mediaType: part.mediaType,
|
|
4858
|
+
defaultMediaType: "image/jpeg"
|
|
4859
|
+
})
|
|
4860
|
+
}
|
|
4861
|
+
};
|
|
4862
|
+
case "image-url":
|
|
4863
|
+
return {
|
|
4864
|
+
type: "image_url",
|
|
4865
|
+
image_url: { url: part.url }
|
|
4866
|
+
};
|
|
4867
|
+
case "file-data": {
|
|
4868
|
+
const dataUrl = buildFileDataUrl({
|
|
4869
|
+
data: part.data,
|
|
4870
|
+
mediaType: part.mediaType,
|
|
4871
|
+
defaultMediaType: "application/octet-stream"
|
|
4872
|
+
});
|
|
4873
|
+
if ((_a163 = part.mediaType) == null ? void 0 : _a163.startsWith("image/")) {
|
|
4874
|
+
return {
|
|
4875
|
+
type: "image_url",
|
|
4876
|
+
image_url: { url: dataUrl }
|
|
4877
|
+
};
|
|
4878
|
+
}
|
|
4879
|
+
if ((_b162 = part.mediaType) == null ? void 0 : _b162.startsWith("audio/")) {
|
|
4880
|
+
const rawFormat = part.mediaType.replace("audio/", "");
|
|
4881
|
+
const format = MIME_TO_FORMAT[rawFormat];
|
|
4882
|
+
if (format !== void 0) {
|
|
4883
|
+
return {
|
|
4884
|
+
type: "input_audio",
|
|
4885
|
+
input_audio: {
|
|
4886
|
+
data: getBase64FromDataUrl(dataUrl),
|
|
4887
|
+
format
|
|
4888
|
+
}
|
|
4889
|
+
};
|
|
4890
|
+
}
|
|
4891
|
+
}
|
|
4892
|
+
return {
|
|
4893
|
+
type: "file",
|
|
4894
|
+
file: {
|
|
4895
|
+
filename: (_c = part.filename) != null ? _c : "",
|
|
4896
|
+
file_data: dataUrl
|
|
4897
|
+
}
|
|
4898
|
+
};
|
|
4899
|
+
}
|
|
4900
|
+
case "file-url": {
|
|
4901
|
+
if (looksLikeImageUrl(part.url)) {
|
|
4902
|
+
return {
|
|
4903
|
+
type: "image_url",
|
|
4904
|
+
image_url: { url: part.url }
|
|
4905
|
+
};
|
|
4906
|
+
}
|
|
4907
|
+
return {
|
|
4908
|
+
type: "file",
|
|
4909
|
+
file: {
|
|
4910
|
+
filename: filenameFromUrl(part.url),
|
|
4911
|
+
file_data: part.url
|
|
4912
|
+
}
|
|
4913
|
+
};
|
|
4914
|
+
}
|
|
4915
|
+
case "file-id":
|
|
4916
|
+
case "image-file-id":
|
|
4917
|
+
case "custom":
|
|
4918
|
+
return { type: "text", text: JSON.stringify(part) };
|
|
4919
|
+
default: {
|
|
4920
|
+
const _exhaustiveCheck = part;
|
|
4921
|
+
return { type: "text", text: JSON.stringify(_exhaustiveCheck) };
|
|
4922
|
+
}
|
|
4923
|
+
}
|
|
4924
|
+
});
|
|
4925
|
+
}
|
|
4926
|
+
var IMAGE_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
4927
|
+
"jpg",
|
|
4928
|
+
"jpeg",
|
|
4929
|
+
"png",
|
|
4930
|
+
"gif",
|
|
4931
|
+
"webp",
|
|
4932
|
+
"svg",
|
|
4933
|
+
"bmp",
|
|
4934
|
+
"ico",
|
|
4935
|
+
"tif",
|
|
4936
|
+
"tiff",
|
|
4937
|
+
"avif"
|
|
4938
|
+
]);
|
|
4939
|
+
function looksLikeImageUrl(url) {
|
|
4940
|
+
var _a163;
|
|
4941
|
+
try {
|
|
4942
|
+
const pathname = new URL(url).pathname;
|
|
4943
|
+
const ext = (_a163 = pathname.split(".").pop()) == null ? void 0 : _a163.toLowerCase();
|
|
4944
|
+
return ext !== void 0 && IMAGE_EXTENSIONS.has(ext);
|
|
4945
|
+
} catch (e) {
|
|
4946
|
+
return false;
|
|
4947
|
+
}
|
|
4948
|
+
}
|
|
4949
|
+
function filenameFromUrl(url) {
|
|
4950
|
+
try {
|
|
4951
|
+
const pathname = new URL(url).pathname;
|
|
4952
|
+
const last = pathname.split("/").pop();
|
|
4953
|
+
return (last == null ? void 0 : last.includes(".")) ? last : "";
|
|
4954
|
+
} catch (e) {
|
|
4955
|
+
return "";
|
|
4956
|
+
}
|
|
4957
|
+
}
|
|
4817
4958
|
function findFirstReasoningDetails(content) {
|
|
4818
|
-
var _a163, _b162, _c;
|
|
4959
|
+
var _a163, _b162, _c, _d;
|
|
4819
4960
|
for (const part of content) {
|
|
4820
4961
|
if (part.type === "tool-call") {
|
|
4821
|
-
const
|
|
4822
|
-
|
|
4823
|
-
|
|
4824
|
-
|
|
4962
|
+
const parsed = OpenRouterProviderOptionsSchema.safeParse(
|
|
4963
|
+
part.providerOptions
|
|
4964
|
+
);
|
|
4965
|
+
if (parsed.success && ((_b162 = (_a163 = parsed.data) == null ? void 0 : _a163.openrouter) == null ? void 0 : _b162.reasoning_details) && parsed.data.openrouter.reasoning_details.length > 0) {
|
|
4966
|
+
return parsed.data.openrouter.reasoning_details;
|
|
4825
4967
|
}
|
|
4826
4968
|
}
|
|
4827
4969
|
}
|
|
@@ -4830,7 +4972,7 @@ function findFirstReasoningDetails(content) {
|
|
|
4830
4972
|
const parsed = OpenRouterProviderOptionsSchema.safeParse(
|
|
4831
4973
|
part.providerOptions
|
|
4832
4974
|
);
|
|
4833
|
-
if (parsed.success && ((
|
|
4975
|
+
if (parsed.success && ((_d = (_c = parsed.data) == null ? void 0 : _c.openrouter) == null ? void 0 : _d.reasoning_details) && parsed.data.openrouter.reasoning_details.length > 0) {
|
|
4834
4976
|
return parsed.data.openrouter.reasoning_details;
|
|
4835
4977
|
}
|
|
4836
4978
|
}
|
|
@@ -6490,7 +6632,7 @@ function withUserAgentSuffix22(headers, ...userAgentSuffixParts) {
|
|
|
6490
6632
|
"user-agent": userAgent
|
|
6491
6633
|
});
|
|
6492
6634
|
}
|
|
6493
|
-
var VERSION2 = false ? "0.0.0-test" : "2.3.
|
|
6635
|
+
var VERSION2 = false ? "0.0.0-test" : "2.3.3";
|
|
6494
6636
|
function createOpenRouter(options = {}) {
|
|
6495
6637
|
var _a163, _b162, _c;
|
|
6496
6638
|
const baseURL = (_b162 = withoutTrailingSlash2((_a163 = options.baseURL) != null ? _a163 : options.baseUrl)) != null ? _b162 : "https://openrouter.ai/api/v1";
|
|
@@ -14227,6 +14369,62 @@ function mergeExecutionMetrics(computed, metrics) {
|
|
|
14227
14369
|
endTime: metrics.endTime ?? computed.endTime
|
|
14228
14370
|
};
|
|
14229
14371
|
}
|
|
14372
|
+
function flattenInputMessages(messages) {
|
|
14373
|
+
return messages.flatMap((message) => extractContentSegments(message.content));
|
|
14374
|
+
}
|
|
14375
|
+
function collectResolvedInputFilePaths(messages) {
|
|
14376
|
+
const filePaths = [];
|
|
14377
|
+
for (const message of messages) {
|
|
14378
|
+
if (!Array.isArray(message.content)) {
|
|
14379
|
+
continue;
|
|
14380
|
+
}
|
|
14381
|
+
for (const segment of message.content) {
|
|
14382
|
+
if (isJsonObject(segment) && segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
14383
|
+
filePaths.push(segment.resolvedPath);
|
|
14384
|
+
}
|
|
14385
|
+
}
|
|
14386
|
+
}
|
|
14387
|
+
return filePaths;
|
|
14388
|
+
}
|
|
14389
|
+
function extractContentSegments(content) {
|
|
14390
|
+
if (typeof content === "string") {
|
|
14391
|
+
return content.trim().length > 0 ? [{ type: "text", value: content }] : [];
|
|
14392
|
+
}
|
|
14393
|
+
if (isJsonObject(content)) {
|
|
14394
|
+
const rendered = JSON.stringify(content, null, 2);
|
|
14395
|
+
return rendered.trim().length > 0 ? [{ type: "text", value: rendered }] : [];
|
|
14396
|
+
}
|
|
14397
|
+
if (!Array.isArray(content)) {
|
|
14398
|
+
return [];
|
|
14399
|
+
}
|
|
14400
|
+
const segments = [];
|
|
14401
|
+
for (const segment of content) {
|
|
14402
|
+
if (!isJsonObject(segment)) {
|
|
14403
|
+
continue;
|
|
14404
|
+
}
|
|
14405
|
+
segments.push(cloneJsonObject(segment));
|
|
14406
|
+
}
|
|
14407
|
+
return segments;
|
|
14408
|
+
}
|
|
14409
|
+
function cloneJsonObject(source) {
|
|
14410
|
+
const entries = Object.entries(source).map(([key, value]) => [key, cloneJsonValue(value)]);
|
|
14411
|
+
return Object.fromEntries(entries);
|
|
14412
|
+
}
|
|
14413
|
+
function cloneJsonValue(value) {
|
|
14414
|
+
if (value === null) {
|
|
14415
|
+
return null;
|
|
14416
|
+
}
|
|
14417
|
+
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
|
14418
|
+
return value;
|
|
14419
|
+
}
|
|
14420
|
+
if (Array.isArray(value)) {
|
|
14421
|
+
return value.map((item) => cloneJsonValue(item));
|
|
14422
|
+
}
|
|
14423
|
+
if (typeof value === "object") {
|
|
14424
|
+
return cloneJsonObject(value);
|
|
14425
|
+
}
|
|
14426
|
+
return value;
|
|
14427
|
+
}
|
|
14230
14428
|
var ANSI_RED = "\x1B[31m";
|
|
14231
14429
|
var ANSI_RESET2 = "\x1B[0m";
|
|
14232
14430
|
function logError(msg) {
|
|
@@ -14292,7 +14490,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
|
14292
14490
|
id: String(id),
|
|
14293
14491
|
question: prompt,
|
|
14294
14492
|
input: [{ role: "user", content: prompt }],
|
|
14295
|
-
input_segments: [{ type: "text", value: prompt }],
|
|
14296
14493
|
expected_output: evalCase.expected_output ? [{ role: "assistant", content: evalCase.expected_output }] : [],
|
|
14297
14494
|
reference_answer: evalCase.expected_output,
|
|
14298
14495
|
file_paths: filePaths,
|
|
@@ -14414,7 +14611,7 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
14414
14611
|
}
|
|
14415
14612
|
try {
|
|
14416
14613
|
const rawConfig = await readFile22(configPath, "utf8");
|
|
14417
|
-
const parsed = parse(rawConfig);
|
|
14614
|
+
const parsed = interpolateEnv(parse(rawConfig), process.env);
|
|
14418
14615
|
if (!isJsonObject(parsed)) {
|
|
14419
14616
|
logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
|
|
14420
14617
|
continue;
|
|
@@ -14632,6 +14829,27 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
14632
14829
|
} else if (otelFile !== void 0) {
|
|
14633
14830
|
logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
|
|
14634
14831
|
}
|
|
14832
|
+
if (typeof obj.export_otel === "boolean") {
|
|
14833
|
+
result.export_otel = obj.export_otel;
|
|
14834
|
+
} else if (obj.export_otel !== void 0) {
|
|
14835
|
+
logWarning(`Invalid execution.export_otel in ${configPath}, expected boolean`);
|
|
14836
|
+
}
|
|
14837
|
+
const otelBackend = obj.otel_backend;
|
|
14838
|
+
if (typeof otelBackend === "string" && otelBackend.trim().length > 0) {
|
|
14839
|
+
result.otel_backend = otelBackend.trim();
|
|
14840
|
+
} else if (otelBackend !== void 0) {
|
|
14841
|
+
logWarning(`Invalid execution.otel_backend in ${configPath}, expected non-empty string`);
|
|
14842
|
+
}
|
|
14843
|
+
if (typeof obj.otel_capture_content === "boolean") {
|
|
14844
|
+
result.otel_capture_content = obj.otel_capture_content;
|
|
14845
|
+
} else if (obj.otel_capture_content !== void 0) {
|
|
14846
|
+
logWarning(`Invalid execution.otel_capture_content in ${configPath}, expected boolean`);
|
|
14847
|
+
}
|
|
14848
|
+
if (typeof obj.otel_group_turns === "boolean") {
|
|
14849
|
+
result.otel_group_turns = obj.otel_group_turns;
|
|
14850
|
+
} else if (obj.otel_group_turns !== void 0) {
|
|
14851
|
+
logWarning(`Invalid execution.otel_group_turns in ${configPath}, expected boolean`);
|
|
14852
|
+
}
|
|
14635
14853
|
if (typeof obj.pool_workspaces === "boolean") {
|
|
14636
14854
|
result.pool_workspaces = obj.pool_workspaces;
|
|
14637
14855
|
} else if (obj.pool_workspaces !== void 0) {
|
|
@@ -16076,27 +16294,28 @@ var ANSI_YELLOW4 = "\x1B[33m";
|
|
|
16076
16294
|
var ANSI_RESET5 = "\x1B[0m";
|
|
16077
16295
|
async function processMessages(options) {
|
|
16078
16296
|
const { messages, searchRoots, repoRootPath, textParts, messageType, verbose } = options;
|
|
16079
|
-
const
|
|
16297
|
+
const processedMessages = [];
|
|
16080
16298
|
for (const message of messages) {
|
|
16081
16299
|
const content = message.content;
|
|
16082
16300
|
if (typeof content === "string") {
|
|
16083
|
-
segments.push({ type: "text", value: content });
|
|
16084
16301
|
if (textParts) {
|
|
16085
16302
|
textParts.push(content);
|
|
16086
16303
|
}
|
|
16304
|
+
processedMessages.push({ ...message, content });
|
|
16087
16305
|
continue;
|
|
16088
16306
|
}
|
|
16089
16307
|
if (isJsonObject(content)) {
|
|
16090
16308
|
const rendered = JSON.stringify(content, null, 2);
|
|
16091
|
-
segments.push({ type: "text", value: rendered });
|
|
16092
16309
|
if (textParts) {
|
|
16093
16310
|
textParts.push(rendered);
|
|
16094
16311
|
}
|
|
16312
|
+
processedMessages.push({ ...message, content: cloneJsonObject(content) });
|
|
16095
16313
|
continue;
|
|
16096
16314
|
}
|
|
16097
16315
|
if (!Array.isArray(content)) {
|
|
16098
16316
|
continue;
|
|
16099
16317
|
}
|
|
16318
|
+
const processedContent = [];
|
|
16100
16319
|
for (const rawSegment of content) {
|
|
16101
16320
|
if (!isJsonObject(rawSegment)) {
|
|
16102
16321
|
continue;
|
|
@@ -16119,8 +16338,8 @@ async function processMessages(options) {
|
|
|
16119
16338
|
}
|
|
16120
16339
|
try {
|
|
16121
16340
|
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
16122
|
-
|
|
16123
|
-
|
|
16341
|
+
processedContent.push({
|
|
16342
|
+
...cloneJsonObject(rawSegment),
|
|
16124
16343
|
path: displayPath,
|
|
16125
16344
|
text: fileContent,
|
|
16126
16345
|
resolvedPath: path5.resolve(resolvedPath)
|
|
@@ -16137,37 +16356,19 @@ async function processMessages(options) {
|
|
|
16137
16356
|
continue;
|
|
16138
16357
|
}
|
|
16139
16358
|
const clonedSegment = cloneJsonObject(rawSegment);
|
|
16140
|
-
|
|
16359
|
+
processedContent.push(clonedSegment);
|
|
16141
16360
|
const inlineValue = clonedSegment.value;
|
|
16142
16361
|
if (typeof inlineValue === "string" && textParts) {
|
|
16143
16362
|
textParts.push(inlineValue);
|
|
16144
16363
|
}
|
|
16145
16364
|
}
|
|
16365
|
+
processedMessages.push({ ...message, content: processedContent });
|
|
16146
16366
|
}
|
|
16147
|
-
return
|
|
16367
|
+
return processedMessages;
|
|
16148
16368
|
}
|
|
16149
16369
|
function asString3(value) {
|
|
16150
16370
|
return typeof value === "string" ? value : void 0;
|
|
16151
16371
|
}
|
|
16152
|
-
function cloneJsonObject(source) {
|
|
16153
|
-
const entries = Object.entries(source).map(([key, value]) => [key, cloneJsonValue(value)]);
|
|
16154
|
-
return Object.fromEntries(entries);
|
|
16155
|
-
}
|
|
16156
|
-
function cloneJsonValue(value) {
|
|
16157
|
-
if (value === null) {
|
|
16158
|
-
return null;
|
|
16159
|
-
}
|
|
16160
|
-
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
|
16161
|
-
return value;
|
|
16162
|
-
}
|
|
16163
|
-
if (Array.isArray(value)) {
|
|
16164
|
-
return value.map((item) => cloneJsonValue(item));
|
|
16165
|
-
}
|
|
16166
|
-
if (typeof value === "object") {
|
|
16167
|
-
return cloneJsonObject(value);
|
|
16168
|
-
}
|
|
16169
|
-
return value;
|
|
16170
|
-
}
|
|
16171
16372
|
function logWarning3(message, details) {
|
|
16172
16373
|
if (details && details.length > 0) {
|
|
16173
16374
|
const detailBlock = details.join("\n");
|
|
@@ -16412,10 +16613,10 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
16412
16613
|
);
|
|
16413
16614
|
}
|
|
16414
16615
|
}
|
|
16415
|
-
const
|
|
16616
|
+
const rawInputMessages = resolveInputMessages(evalcase);
|
|
16416
16617
|
const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
|
|
16417
16618
|
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
|
|
16418
|
-
if (!id || !hasEvaluationSpec || !
|
|
16619
|
+
if (!id || !hasEvaluationSpec || !rawInputMessages || rawInputMessages.length === 0) {
|
|
16419
16620
|
logError2(
|
|
16420
16621
|
`Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
|
|
16421
16622
|
);
|
|
@@ -16423,8 +16624,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
16423
16624
|
}
|
|
16424
16625
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
16425
16626
|
const inputTextParts = [];
|
|
16426
|
-
const
|
|
16427
|
-
messages:
|
|
16627
|
+
const inputMessages = await processMessages({
|
|
16628
|
+
messages: rawInputMessages,
|
|
16428
16629
|
searchRoots,
|
|
16429
16630
|
repoRootPath,
|
|
16430
16631
|
textParts: inputTextParts,
|
|
@@ -16470,19 +16671,13 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
16470
16671
|
}
|
|
16471
16672
|
}
|
|
16472
16673
|
warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
|
|
16473
|
-
const userFilePaths =
|
|
16474
|
-
for (const segment of inputSegments) {
|
|
16475
|
-
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
16476
|
-
userFilePaths.push(segment.resolvedPath);
|
|
16477
|
-
}
|
|
16478
|
-
}
|
|
16674
|
+
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
16479
16675
|
const testCase = {
|
|
16480
16676
|
id,
|
|
16481
16677
|
eval_set: evalSetName,
|
|
16482
16678
|
conversation_id: conversationId,
|
|
16483
16679
|
question,
|
|
16484
16680
|
input: inputMessages,
|
|
16485
|
-
input_segments: inputSegments,
|
|
16486
16681
|
expected_output: outputSegments,
|
|
16487
16682
|
reference_answer: referenceAnswer,
|
|
16488
16683
|
file_paths: userFilePaths,
|
|
@@ -16543,50 +16738,9 @@ function parseMetadata(suite) {
|
|
|
16543
16738
|
});
|
|
16544
16739
|
}
|
|
16545
16740
|
async function buildPromptInputs(testCase, mode = "lm") {
|
|
16546
|
-
const segmentsByMessage =
|
|
16547
|
-
|
|
16548
|
-
|
|
16549
|
-
if (segment.type === "file" && typeof segment.path === "string" && typeof segment.text === "string") {
|
|
16550
|
-
fileContentsByPath.set(segment.path, segment.text);
|
|
16551
|
-
}
|
|
16552
|
-
}
|
|
16553
|
-
for (const message of testCase.input) {
|
|
16554
|
-
const messageSegments = [];
|
|
16555
|
-
if (typeof message.content === "string") {
|
|
16556
|
-
if (message.content.trim().length > 0) {
|
|
16557
|
-
messageSegments.push({ type: "text", value: message.content });
|
|
16558
|
-
}
|
|
16559
|
-
} else if (Array.isArray(message.content)) {
|
|
16560
|
-
for (const segment of message.content) {
|
|
16561
|
-
if (typeof segment === "string") {
|
|
16562
|
-
if (segment.trim().length > 0) {
|
|
16563
|
-
messageSegments.push({ type: "text", value: segment });
|
|
16564
|
-
}
|
|
16565
|
-
} else if (isJsonObject(segment)) {
|
|
16566
|
-
const type = asString5(segment.type);
|
|
16567
|
-
if (type === "file") {
|
|
16568
|
-
const value = asString5(segment.value);
|
|
16569
|
-
if (!value) continue;
|
|
16570
|
-
const fileText = fileContentsByPath.get(value);
|
|
16571
|
-
if (fileText !== void 0) {
|
|
16572
|
-
messageSegments.push({ type: "file", text: fileText, path: value });
|
|
16573
|
-
}
|
|
16574
|
-
} else if (type === "text") {
|
|
16575
|
-
const textValue = asString5(segment.value);
|
|
16576
|
-
if (textValue && textValue.trim().length > 0) {
|
|
16577
|
-
messageSegments.push({ type: "text", value: textValue });
|
|
16578
|
-
}
|
|
16579
|
-
}
|
|
16580
|
-
}
|
|
16581
|
-
}
|
|
16582
|
-
} else if (isJsonObject(message.content)) {
|
|
16583
|
-
const rendered = JSON.stringify(message.content, null, 2);
|
|
16584
|
-
if (rendered.trim().length > 0) {
|
|
16585
|
-
messageSegments.push({ type: "text", value: rendered });
|
|
16586
|
-
}
|
|
16587
|
-
}
|
|
16588
|
-
segmentsByMessage.push(messageSegments);
|
|
16589
|
-
}
|
|
16741
|
+
const segmentsByMessage = testCase.input.map(
|
|
16742
|
+
(message) => extractContentSegments(message.content)
|
|
16743
|
+
);
|
|
16590
16744
|
const useRoleMarkers = needsRoleMarkers(testCase.input, segmentsByMessage);
|
|
16591
16745
|
let question;
|
|
16592
16746
|
if (useRoleMarkers) {
|
|
@@ -16614,7 +16768,7 @@ ${messageContent}`);
|
|
|
16614
16768
|
question = messageParts.join("\n\n");
|
|
16615
16769
|
} else {
|
|
16616
16770
|
const questionParts = [];
|
|
16617
|
-
for (const segment of testCase.
|
|
16771
|
+
for (const segment of flattenInputMessages(testCase.input)) {
|
|
16618
16772
|
const formattedContent = formatSegment(segment, mode);
|
|
16619
16773
|
if (formattedContent) {
|
|
16620
16774
|
questionParts.push(formattedContent);
|
|
@@ -16701,9 +16855,6 @@ function buildChatPromptFromSegments(options) {
|
|
|
16701
16855
|
}
|
|
16702
16856
|
return chatPrompt.length > 0 ? chatPrompt : void 0;
|
|
16703
16857
|
}
|
|
16704
|
-
function asString5(value) {
|
|
16705
|
-
return typeof value === "string" ? value : void 0;
|
|
16706
|
-
}
|
|
16707
16858
|
var ANSI_YELLOW6 = "\x1B[33m";
|
|
16708
16859
|
var ANSI_RED3 = "\x1B[31m";
|
|
16709
16860
|
var ANSI_RESET7 = "\x1B[0m";
|
|
@@ -16784,7 +16935,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
16784
16935
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
16785
16936
|
}
|
|
16786
16937
|
const suite = interpolated;
|
|
16787
|
-
const evalSetNameFromSuite =
|
|
16938
|
+
const evalSetNameFromSuite = asString5(suite.name)?.trim();
|
|
16788
16939
|
const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
16789
16940
|
const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
|
|
16790
16941
|
const rawTestcases = resolveTests(suite);
|
|
@@ -16803,7 +16954,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
16803
16954
|
const suiteInputMessages = expandInputShorthand(suite.input);
|
|
16804
16955
|
const suiteInputFiles = suite.input_files;
|
|
16805
16956
|
const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
16806
|
-
const _globalTarget =
|
|
16957
|
+
const _globalTarget = asString5(rawGlobalExecution?.target) ?? asString5(suite.target);
|
|
16807
16958
|
const suiteAssertions = suite.assertions ?? suite.assert;
|
|
16808
16959
|
if (suite.assert !== void 0 && suite.assertions === void 0) {
|
|
16809
16960
|
logWarning5("'assert' is deprecated at the suite level. Use 'assertions' instead.");
|
|
@@ -16816,17 +16967,17 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
16816
16967
|
continue;
|
|
16817
16968
|
}
|
|
16818
16969
|
const evalcase = rawEvalcase;
|
|
16819
|
-
const id =
|
|
16970
|
+
const id = asString5(evalcase.id);
|
|
16820
16971
|
if (filterPattern && (!id || !micromatch2.isMatch(id, filterPattern))) {
|
|
16821
16972
|
continue;
|
|
16822
16973
|
}
|
|
16823
|
-
const conversationId =
|
|
16824
|
-
let outcome =
|
|
16974
|
+
const conversationId = asString5(evalcase.conversation_id);
|
|
16975
|
+
let outcome = asString5(evalcase.criteria);
|
|
16825
16976
|
if (!outcome && evalcase.expected_outcome !== void 0) {
|
|
16826
|
-
outcome =
|
|
16977
|
+
outcome = asString5(evalcase.expected_outcome);
|
|
16827
16978
|
if (outcome) {
|
|
16828
16979
|
logWarning5(
|
|
16829
|
-
`Test '${
|
|
16980
|
+
`Test '${asString5(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
16830
16981
|
);
|
|
16831
16982
|
}
|
|
16832
16983
|
}
|
|
@@ -16843,10 +16994,9 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
16843
16994
|
continue;
|
|
16844
16995
|
}
|
|
16845
16996
|
const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
|
|
16846
|
-
const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
|
|
16847
16997
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
16848
16998
|
const inputTextParts = [];
|
|
16849
|
-
const
|
|
16999
|
+
const suiteResolvedInputMessages = effectiveSuiteInputMessages ? await processMessages({
|
|
16850
17000
|
messages: effectiveSuiteInputMessages,
|
|
16851
17001
|
searchRoots,
|
|
16852
17002
|
repoRootPath,
|
|
@@ -16854,7 +17004,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
16854
17004
|
messageType: "input",
|
|
16855
17005
|
verbose
|
|
16856
17006
|
}) : [];
|
|
16857
|
-
const
|
|
17007
|
+
const testResolvedInputMessages = await processMessages({
|
|
16858
17008
|
messages: testInputMessages,
|
|
16859
17009
|
searchRoots,
|
|
16860
17010
|
repoRootPath,
|
|
@@ -16862,7 +17012,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
16862
17012
|
messageType: "input",
|
|
16863
17013
|
verbose
|
|
16864
17014
|
});
|
|
16865
|
-
const
|
|
17015
|
+
const inputMessages = [...suiteResolvedInputMessages, ...testResolvedInputMessages];
|
|
16866
17016
|
const outputSegments = hasExpectedMessages ? await processExpectedMessages({
|
|
16867
17017
|
messages: expectedMessages,
|
|
16868
17018
|
searchRoots,
|
|
@@ -16900,12 +17050,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
16900
17050
|
}
|
|
16901
17051
|
}
|
|
16902
17052
|
warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
|
|
16903
|
-
const userFilePaths =
|
|
16904
|
-
for (const segment of inputSegments) {
|
|
16905
|
-
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
16906
|
-
userFilePaths.push(segment.resolvedPath);
|
|
16907
|
-
}
|
|
16908
|
-
}
|
|
17053
|
+
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
16909
17054
|
const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
|
|
16910
17055
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
16911
17056
|
const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
|
|
@@ -16916,7 +17061,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
16916
17061
|
conversation_id: conversationId,
|
|
16917
17062
|
question,
|
|
16918
17063
|
input: inputMessages,
|
|
16919
|
-
input_segments: inputSegments,
|
|
16920
17064
|
expected_output: outputSegments,
|
|
16921
17065
|
reference_answer: referenceAnswer,
|
|
16922
17066
|
file_paths: userFilePaths,
|
|
@@ -17125,7 +17269,7 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
17125
17269
|
path: caseLevel.path ?? suiteLevel.path
|
|
17126
17270
|
};
|
|
17127
17271
|
}
|
|
17128
|
-
function
|
|
17272
|
+
function asString5(value) {
|
|
17129
17273
|
return typeof value === "string" ? value : void 0;
|
|
17130
17274
|
}
|
|
17131
17275
|
function logWarning5(message, details) {
|
|
@@ -20792,7 +20936,7 @@ var PiAgentSdkProvider = class {
|
|
|
20792
20936
|
const { Agent, getModel, getEnvApiKey } = await loadPiModules();
|
|
20793
20937
|
const startTimeIso = (/* @__PURE__ */ new Date()).toISOString();
|
|
20794
20938
|
const startMs = Date.now();
|
|
20795
|
-
const providerName = this.config.
|
|
20939
|
+
const providerName = this.config.subprovider ?? "anthropic";
|
|
20796
20940
|
const modelId = this.config.model ?? "claude-sonnet-4-20250514";
|
|
20797
20941
|
const model = getModel(providerName, modelId);
|
|
20798
20942
|
const systemPrompt = this.config.systemPrompt ?? "Answer directly and concisely.";
|
|
@@ -20904,7 +21048,7 @@ var PiAgentSdkProvider = class {
|
|
|
20904
21048
|
messages: agentMessages,
|
|
20905
21049
|
systemPrompt,
|
|
20906
21050
|
model: this.config.model,
|
|
20907
|
-
|
|
21051
|
+
subprovider: this.config.subprovider
|
|
20908
21052
|
},
|
|
20909
21053
|
output,
|
|
20910
21054
|
tokenUsage,
|
|
@@ -21128,8 +21272,8 @@ var PiCodingAgentProvider = class {
|
|
|
21128
21272
|
}
|
|
21129
21273
|
buildPiArgs(prompt, inputFiles, _captureFileChanges) {
|
|
21130
21274
|
const args = [];
|
|
21131
|
-
if (this.config.
|
|
21132
|
-
args.push("--provider", this.config.
|
|
21275
|
+
if (this.config.subprovider) {
|
|
21276
|
+
args.push("--provider", this.config.subprovider);
|
|
21133
21277
|
}
|
|
21134
21278
|
if (this.config.model) {
|
|
21135
21279
|
args.push("--model", this.config.model);
|
|
@@ -21187,7 +21331,7 @@ ${prompt}` : prompt;
|
|
|
21187
21331
|
buildEnv() {
|
|
21188
21332
|
const env = { ...process.env };
|
|
21189
21333
|
if (this.config.apiKey) {
|
|
21190
|
-
const provider = this.config.
|
|
21334
|
+
const provider = this.config.subprovider?.toLowerCase() ?? "google";
|
|
21191
21335
|
switch (provider) {
|
|
21192
21336
|
case "google":
|
|
21193
21337
|
case "gemini":
|
|
@@ -23921,7 +24065,8 @@ var freeformEvaluationSchema = external_exports2.object({
|
|
|
23921
24065
|
passed: external_exports2.boolean().describe("Whether this aspect was satisfied"),
|
|
23922
24066
|
evidence: external_exports2.string().describe("Concise evidence (1-2 sentences)").optional()
|
|
23923
24067
|
})
|
|
23924
|
-
).describe("Per-aspect evaluation results \u2014 one entry per aspect checked").optional()
|
|
24068
|
+
).describe("Per-aspect evaluation results \u2014 one entry per aspect checked").optional(),
|
|
24069
|
+
details: external_exports2.record(external_exports2.unknown()).describe("Optional structured metadata for domain-specific metrics").optional()
|
|
23925
24070
|
});
|
|
23926
24071
|
var rubricCheckResultSchema = external_exports2.object({
|
|
23927
24072
|
id: external_exports2.string().describe("The ID of the rubric item being checked"),
|
|
@@ -23983,7 +24128,7 @@ var LlmGraderEvaluator = class {
|
|
|
23983
24128
|
async evaluateFreeform(context2, graderProvider) {
|
|
23984
24129
|
const formattedQuestion = context2.promptInputs.question && context2.promptInputs.question.trim().length > 0 ? context2.promptInputs.question : context2.evalCase.question;
|
|
23985
24130
|
const variables = {
|
|
23986
|
-
[TEMPLATE_VARIABLES.INPUT]: JSON.stringify(context2.evalCase.
|
|
24131
|
+
[TEMPLATE_VARIABLES.INPUT]: JSON.stringify(context2.evalCase.input, null, 2),
|
|
23987
24132
|
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: JSON.stringify(
|
|
23988
24133
|
context2.evalCase.expected_output,
|
|
23989
24134
|
null,
|
|
@@ -24026,6 +24171,7 @@ ${context2.fileChanges}`;
|
|
|
24026
24171
|
expectedAspectCount: Math.max(assertions.length, 1),
|
|
24027
24172
|
evaluatorRawRequest,
|
|
24028
24173
|
graderTarget: graderProvider.targetName,
|
|
24174
|
+
details: data.details,
|
|
24029
24175
|
tokenUsage
|
|
24030
24176
|
};
|
|
24031
24177
|
} catch (e) {
|
|
@@ -24445,7 +24591,7 @@ ${outputSchema2}`;
|
|
|
24445
24591
|
expectedAspectCount: Math.max(assertions.length, 1),
|
|
24446
24592
|
evaluatorRawRequest,
|
|
24447
24593
|
graderTarget,
|
|
24448
|
-
details
|
|
24594
|
+
details: data.details && Object.keys(data.details).length > 0 ? { ...details, ...data.details } : details
|
|
24449
24595
|
};
|
|
24450
24596
|
} catch {
|
|
24451
24597
|
return {
|
|
@@ -24592,7 +24738,8 @@ function buildOutputSchema() {
|
|
|
24592
24738
|
' "passed": <boolean>,',
|
|
24593
24739
|
' "evidence": "<concise evidence, 1-2 sentences, optional>"',
|
|
24594
24740
|
" }",
|
|
24595
|
-
" ]",
|
|
24741
|
+
" ],",
|
|
24742
|
+
' "details": {<optional object with domain-specific structured metrics>}',
|
|
24596
24743
|
"}"
|
|
24597
24744
|
].join("\n");
|
|
24598
24745
|
}
|
|
@@ -25942,7 +26089,7 @@ function assembleLlmGraderPrompt(input) {
|
|
|
25942
26089
|
function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, evaluatorTemplateOverride) {
|
|
25943
26090
|
const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
|
|
25944
26091
|
const variables = {
|
|
25945
|
-
[TEMPLATE_VARIABLES.INPUT]: JSON.stringify(evalCase.
|
|
26092
|
+
[TEMPLATE_VARIABLES.INPUT]: JSON.stringify(evalCase.input, null, 2),
|
|
25946
26093
|
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: JSON.stringify(evalCase.expected_output, null, 2),
|
|
25947
26094
|
[TEMPLATE_VARIABLES.OUTPUT]: JSON.stringify([], null, 2),
|
|
25948
26095
|
[TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
|
|
@@ -28156,6 +28303,18 @@ var QUALITY_PASS_THRESHOLD = 0.8;
|
|
|
28156
28303
|
function classifyQualityStatus(score) {
|
|
28157
28304
|
return score >= QUALITY_PASS_THRESHOLD ? "ok" : "quality_failure";
|
|
28158
28305
|
}
|
|
28306
|
+
function buildSkippedEvaluatorError(scores) {
|
|
28307
|
+
const skippedScores = scores?.filter((score) => score.verdict === "skip") ?? [];
|
|
28308
|
+
if (skippedScores.length === 0) {
|
|
28309
|
+
return void 0;
|
|
28310
|
+
}
|
|
28311
|
+
const messages = skippedScores.map((score) => {
|
|
28312
|
+
const label = score.name || score.type;
|
|
28313
|
+
const assertionMessage = score.assertions.find((assertion) => !assertion.passed)?.text ?? "Evaluator skipped";
|
|
28314
|
+
return `${label}: ${assertionMessage}`;
|
|
28315
|
+
});
|
|
28316
|
+
return messages.length === 1 ? messages[0] : `Evaluators skipped: ${messages.join(" | ")}`;
|
|
28317
|
+
}
|
|
28159
28318
|
function usesFileReferencePrompt(provider) {
|
|
28160
28319
|
return isAgentProvider(provider) || provider.kind === "cli";
|
|
28161
28320
|
}
|
|
@@ -29420,7 +29579,8 @@ async function runEvalCase(options) {
|
|
|
29420
29579
|
durationMs: totalDurationMs,
|
|
29421
29580
|
...evalRunTokenUsage ? { tokenUsage: evalRunTokenUsage } : {}
|
|
29422
29581
|
};
|
|
29423
|
-
const
|
|
29582
|
+
const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
|
|
29583
|
+
const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score);
|
|
29424
29584
|
const finalResult = providerError ? {
|
|
29425
29585
|
...result,
|
|
29426
29586
|
evalRun,
|
|
@@ -29432,7 +29592,26 @@ async function runEvalCase(options) {
|
|
|
29432
29592
|
beforeAllOutput,
|
|
29433
29593
|
beforeEachOutput,
|
|
29434
29594
|
afterEachOutput
|
|
29435
|
-
} :
|
|
29595
|
+
} : skippedEvaluatorError ? {
|
|
29596
|
+
...result,
|
|
29597
|
+
score: 0,
|
|
29598
|
+
evalRun,
|
|
29599
|
+
error: skippedEvaluatorError,
|
|
29600
|
+
executionStatus,
|
|
29601
|
+
failureStage: "evaluator",
|
|
29602
|
+
failureReasonCode: "evaluator_error",
|
|
29603
|
+
executionError: { message: skippedEvaluatorError, stage: "evaluator" },
|
|
29604
|
+
beforeAllOutput,
|
|
29605
|
+
beforeEachOutput,
|
|
29606
|
+
afterEachOutput
|
|
29607
|
+
} : {
|
|
29608
|
+
...result,
|
|
29609
|
+
evalRun,
|
|
29610
|
+
executionStatus,
|
|
29611
|
+
beforeAllOutput,
|
|
29612
|
+
beforeEachOutput,
|
|
29613
|
+
afterEachOutput
|
|
29614
|
+
};
|
|
29436
29615
|
const isFailure = !!finalResult.error || finalResult.score < 0.5;
|
|
29437
29616
|
if (workspacePath && !isSharedWorkspace) {
|
|
29438
29617
|
if (forceCleanup) {
|
|
@@ -30169,11 +30348,6 @@ async function evaluate(config) {
|
|
|
30169
30348
|
evalCases = (config.tests ?? []).map((test) => {
|
|
30170
30349
|
const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
|
|
30171
30350
|
const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
|
|
30172
|
-
const inputSegments = input.map((m) => ({
|
|
30173
|
-
type: "text",
|
|
30174
|
-
value: typeof m.content === "string" ? m.content : JSON.stringify(m.content),
|
|
30175
|
-
messageIndex: 0
|
|
30176
|
-
}));
|
|
30177
30351
|
const expectedOutputValue = test.expectedOutput ?? test.expected_output;
|
|
30178
30352
|
const expectedOutput = expectedOutputValue ? [
|
|
30179
30353
|
{ role: "assistant", content: expectedOutputValue }
|
|
@@ -30202,7 +30376,6 @@ async function evaluate(config) {
|
|
|
30202
30376
|
criteria: test.criteria ?? "",
|
|
30203
30377
|
question: String(question),
|
|
30204
30378
|
input,
|
|
30205
|
-
input_segments: inputSegments,
|
|
30206
30379
|
expected_output: expectedOutput,
|
|
30207
30380
|
reference_answer: expectedOutputValue,
|
|
30208
30381
|
file_paths: [],
|
|
@@ -31103,4 +31276,4 @@ export {
|
|
|
31103
31276
|
OtelStreamingObserver,
|
|
31104
31277
|
createAgentKernel
|
|
31105
31278
|
};
|
|
31106
|
-
//# sourceMappingURL=chunk-
|
|
31279
|
+
//# sourceMappingURL=chunk-EZGWZVVK.js.map
|