@agentv/core 3.5.0 → 3.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-EFR4JHPL.js → chunk-2IZOTQ25.js} +1 -1
- package/dist/chunk-2IZOTQ25.js.map +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +67 -55
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.js +68 -56
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-EFR4JHPL.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -2445,14 +2445,8 @@ var import_promises5 = require("fs/promises");
|
|
|
2445
2445
|
|
|
2446
2446
|
// src/evaluation/template-variables.ts
|
|
2447
2447
|
var TEMPLATE_VARIABLES = {
|
|
2448
|
-
/** @deprecated Use OUTPUT_TEXT instead */
|
|
2449
|
-
ANSWER: "answer",
|
|
2450
2448
|
EXPECTED_OUTPUT: "expected_output",
|
|
2451
|
-
/** @deprecated Use INPUT_TEXT instead */
|
|
2452
|
-
QUESTION: "question",
|
|
2453
2449
|
CRITERIA: "criteria",
|
|
2454
|
-
/** @deprecated Use EXPECTED_OUTPUT_TEXT instead */
|
|
2455
|
-
REFERENCE_ANSWER: "reference_answer",
|
|
2456
2450
|
INPUT: "input",
|
|
2457
2451
|
OUTPUT: "output",
|
|
2458
2452
|
FILE_CHANGES: "file_changes",
|
|
@@ -2462,9 +2456,8 @@ var TEMPLATE_VARIABLES = {
|
|
|
2462
2456
|
};
|
|
2463
2457
|
var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
|
|
2464
2458
|
var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
|
|
2465
|
-
TEMPLATE_VARIABLES.
|
|
2466
|
-
TEMPLATE_VARIABLES.EXPECTED_OUTPUT
|
|
2467
|
-
TEMPLATE_VARIABLES.OUTPUT_TEXT
|
|
2459
|
+
TEMPLATE_VARIABLES.OUTPUT_TEXT,
|
|
2460
|
+
TEMPLATE_VARIABLES.EXPECTED_OUTPUT
|
|
2468
2461
|
]);
|
|
2469
2462
|
|
|
2470
2463
|
// src/evaluation/validation/prompt-validator.ts
|
|
@@ -2487,13 +2480,13 @@ function validateTemplateVariables(content, source) {
|
|
|
2487
2480
|
}
|
|
2488
2481
|
match = variablePattern.exec(content);
|
|
2489
2482
|
}
|
|
2490
|
-
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.
|
|
2483
|
+
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.OUTPUT_TEXT);
|
|
2491
2484
|
const hasExpectedOutput = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_OUTPUT);
|
|
2492
2485
|
const hasRequiredFields = hasCandidateAnswer || hasExpectedOutput;
|
|
2493
2486
|
if (!hasRequiredFields) {
|
|
2494
2487
|
throw new Error(
|
|
2495
2488
|
`Missing required fields. Must include at least one of:
|
|
2496
|
-
- {{ ${TEMPLATE_VARIABLES.
|
|
2489
|
+
- {{ ${TEMPLATE_VARIABLES.OUTPUT_TEXT} }}
|
|
2497
2490
|
- {{ ${TEMPLATE_VARIABLES.EXPECTED_OUTPUT} }}`
|
|
2498
2491
|
);
|
|
2499
2492
|
}
|
|
@@ -5795,6 +5788,8 @@ async function invokeModel(options) {
|
|
|
5795
5788
|
const { model, request, defaults, retryConfig, providerOptions } = options;
|
|
5796
5789
|
const chatPrompt = buildChatPrompt(request);
|
|
5797
5790
|
const { temperature, maxOutputTokens } = resolveModelSettings(request, defaults);
|
|
5791
|
+
const startTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
5792
|
+
const startMs = Date.now();
|
|
5798
5793
|
const result = await withRetry(
|
|
5799
5794
|
() => (0, import_ai.generateText)({
|
|
5800
5795
|
model,
|
|
@@ -5808,9 +5803,11 @@ async function invokeModel(options) {
|
|
|
5808
5803
|
retryConfig,
|
|
5809
5804
|
request.signal
|
|
5810
5805
|
);
|
|
5811
|
-
|
|
5806
|
+
const endTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
5807
|
+
const durationMs = Date.now() - startMs;
|
|
5808
|
+
return mapResponse(result, { durationMs, startTime, endTime });
|
|
5812
5809
|
}
|
|
5813
|
-
function mapResponse(result) {
|
|
5810
|
+
function mapResponse(result, timing) {
|
|
5814
5811
|
const content = result.text ?? "";
|
|
5815
5812
|
const rawUsage = result.totalUsage ?? result.usage;
|
|
5816
5813
|
const reasoning = rawUsage?.outputTokenDetails?.reasoningTokens ?? void 0;
|
|
@@ -5825,7 +5822,10 @@ function mapResponse(result) {
|
|
|
5825
5822
|
raw: result,
|
|
5826
5823
|
usage: toJsonObject(rawUsage),
|
|
5827
5824
|
output: [{ role: "assistant", content }],
|
|
5828
|
-
tokenUsage
|
|
5825
|
+
tokenUsage,
|
|
5826
|
+
durationMs: timing?.durationMs,
|
|
5827
|
+
startTime: timing?.startTime,
|
|
5828
|
+
endTime: timing?.endTime
|
|
5829
5829
|
};
|
|
5830
5830
|
}
|
|
5831
5831
|
function toJsonObject(value) {
|
|
@@ -6703,10 +6703,12 @@ var ClaudeSdkProvider = class {
|
|
|
6703
6703
|
if (usage) {
|
|
6704
6704
|
const inputTokens = (usage.input_tokens ?? 0) + (usage.cache_read_input_tokens ?? 0) + (usage.cache_creation_input_tokens ?? 0);
|
|
6705
6705
|
const outputTokens = usage.output_tokens ?? 0;
|
|
6706
|
+
const reasoningTokens = usage.reasoning_tokens ?? void 0;
|
|
6706
6707
|
tokenUsage = {
|
|
6707
6708
|
input: inputTokens,
|
|
6708
6709
|
output: outputTokens,
|
|
6709
|
-
cached: usage.cache_read_input_tokens ?? void 0
|
|
6710
|
+
cached: usage.cache_read_input_tokens ?? void 0,
|
|
6711
|
+
reasoning: reasoningTokens
|
|
6710
6712
|
};
|
|
6711
6713
|
request.streamCallbacks?.onLlmCallEnd?.(this.config.model ?? "claude", tokenUsage);
|
|
6712
6714
|
}
|
|
@@ -7720,7 +7722,8 @@ ${basePrompt}` : basePrompt;
|
|
|
7720
7722
|
onUsage({
|
|
7721
7723
|
input: usage.input_tokens ?? 0,
|
|
7722
7724
|
output: usage.output_tokens ?? 0,
|
|
7723
|
-
cached: usage.cached_input_tokens ?? void 0
|
|
7725
|
+
cached: usage.cached_input_tokens ?? void 0,
|
|
7726
|
+
reasoning: usage.reasoning_tokens ?? void 0
|
|
7724
7727
|
});
|
|
7725
7728
|
}
|
|
7726
7729
|
}
|
|
@@ -9735,10 +9738,12 @@ function extractTokenUsage(events) {
|
|
|
9735
9738
|
output: output ?? 0
|
|
9736
9739
|
};
|
|
9737
9740
|
const cached = toFiniteNumber(u.cache_read_input_tokens ?? u.cached ?? u.cachedTokens);
|
|
9738
|
-
|
|
9739
|
-
|
|
9740
|
-
|
|
9741
|
-
|
|
9741
|
+
const reasoning = toFiniteNumber(u.reasoning_tokens ?? u.reasoningTokens ?? u.reasoning);
|
|
9742
|
+
return {
|
|
9743
|
+
...result,
|
|
9744
|
+
...cached !== void 0 ? { cached } : {},
|
|
9745
|
+
...reasoning !== void 0 ? { reasoning } : {}
|
|
9746
|
+
};
|
|
9742
9747
|
}
|
|
9743
9748
|
}
|
|
9744
9749
|
const messages = record.messages;
|
|
@@ -13265,11 +13270,9 @@ var CodeEvaluator = class {
|
|
|
13265
13270
|
}
|
|
13266
13271
|
}
|
|
13267
13272
|
const payload = {
|
|
13268
|
-
question: context2.evalCase.question,
|
|
13269
13273
|
criteria: context2.evalCase.criteria,
|
|
13270
13274
|
expectedOutput: context2.evalCase.expected_output,
|
|
13271
|
-
|
|
13272
|
-
answer: context2.candidate,
|
|
13275
|
+
outputText: context2.candidate,
|
|
13273
13276
|
output: outputForPayload,
|
|
13274
13277
|
outputPath,
|
|
13275
13278
|
guidelineFiles: context2.evalCase.guideline_paths,
|
|
@@ -13286,9 +13289,7 @@ var CodeEvaluator = class {
|
|
|
13286
13289
|
fileChanges: context2.fileChanges ?? null,
|
|
13287
13290
|
workspacePath: context2.workspacePath ?? null,
|
|
13288
13291
|
config: this.config ?? null,
|
|
13289
|
-
// Text convenience accessors (new names, always strings)
|
|
13290
13292
|
inputText: context2.evalCase.question,
|
|
13291
|
-
outputText: context2.candidate,
|
|
13292
13293
|
expectedOutputText: context2.evalCase.reference_answer ?? ""
|
|
13293
13294
|
};
|
|
13294
13295
|
const inputPayload = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
@@ -13488,13 +13489,13 @@ Be concise and focused in your evaluation. Provide succinct, specific feedback r
|
|
|
13488
13489
|
{{${TEMPLATE_VARIABLES.CRITERIA}}}
|
|
13489
13490
|
|
|
13490
13491
|
[[ ## question ## ]]
|
|
13491
|
-
{{${TEMPLATE_VARIABLES.
|
|
13492
|
+
{{${TEMPLATE_VARIABLES.INPUT_TEXT}}}
|
|
13492
13493
|
|
|
13493
13494
|
[[ ## reference_answer ## ]]
|
|
13494
|
-
{{${TEMPLATE_VARIABLES.
|
|
13495
|
+
{{${TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT}}}
|
|
13495
13496
|
|
|
13496
13497
|
[[ ## answer ## ]]
|
|
13497
|
-
{{${TEMPLATE_VARIABLES.
|
|
13498
|
+
{{${TEMPLATE_VARIABLES.OUTPUT_TEXT}}}`;
|
|
13498
13499
|
var freeformEvaluationSchema = import_zod4.z.object({
|
|
13499
13500
|
score: import_zod4.z.number().min(0).max(1).describe("Score between 0.0 and 1.0"),
|
|
13500
13501
|
assertions: import_zod4.z.array(
|
|
@@ -13572,12 +13573,8 @@ var LlmGraderEvaluator = class {
|
|
|
13572
13573
|
2
|
|
13573
13574
|
),
|
|
13574
13575
|
[TEMPLATE_VARIABLES.OUTPUT]: JSON.stringify(context2.output ?? [], null, 2),
|
|
13575
|
-
[TEMPLATE_VARIABLES.ANSWER]: context2.candidate.trim(),
|
|
13576
|
-
[TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
13577
13576
|
[TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
|
|
13578
|
-
[TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
|
|
13579
13577
|
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
|
|
13580
|
-
// Text convenience accessors (new names, always strings)
|
|
13581
13578
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
13582
13579
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
|
|
13583
13580
|
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim()
|
|
@@ -13882,10 +13879,10 @@ ${context2.fileChanges}`;
|
|
|
13882
13879
|
buildAgentUserPrompt(context2) {
|
|
13883
13880
|
const formattedQuestion = context2.promptInputs.question && context2.promptInputs.question.trim().length > 0 ? context2.promptInputs.question : context2.evalCase.question;
|
|
13884
13881
|
const variables = {
|
|
13885
|
-
[TEMPLATE_VARIABLES.ANSWER]: context2.candidate.trim(),
|
|
13886
|
-
[TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
13887
13882
|
[TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
|
|
13888
|
-
[TEMPLATE_VARIABLES.
|
|
13883
|
+
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
13884
|
+
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
|
|
13885
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
13889
13886
|
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? ""
|
|
13890
13887
|
};
|
|
13891
13888
|
if (this.evaluatorTemplate) {
|
|
@@ -13938,10 +13935,10 @@ ${context2.fileChanges}`;
|
|
|
13938
13935
|
const rubrics = config?.type === "llm-grader" || config?.type === "llm-judge" ? config.rubrics : void 0;
|
|
13939
13936
|
if (this.evaluatorTemplate) {
|
|
13940
13937
|
const variables = {
|
|
13941
|
-
[TEMPLATE_VARIABLES.ANSWER]: context2.candidate.trim(),
|
|
13942
|
-
[TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
13943
13938
|
[TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
|
|
13944
|
-
[TEMPLATE_VARIABLES.
|
|
13939
|
+
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
13940
|
+
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
|
|
13941
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
13945
13942
|
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? ""
|
|
13946
13943
|
};
|
|
13947
13944
|
const customPrompt = substituteVariables(this.evaluatorTemplate, variables);
|
|
@@ -15369,7 +15366,10 @@ var COPILOT_MATCHER = {
|
|
|
15369
15366
|
skillTools: ["Skill", "skill"],
|
|
15370
15367
|
skillInputField: "skill",
|
|
15371
15368
|
readTools: ["Read File", "readFile", "Read", "readTextFile"],
|
|
15372
|
-
readInputField: "file_path"
|
|
15369
|
+
readInputField: "file_path",
|
|
15370
|
+
skillToolPrefixes: ["Using skill: "],
|
|
15371
|
+
readToolPrefixes: ["Viewing "],
|
|
15372
|
+
readInputFields: ["file_path", "path"]
|
|
15373
15373
|
};
|
|
15374
15374
|
var PROVIDER_TOOL_SEMANTICS = {
|
|
15375
15375
|
claude: CLAUDE_MATCHER,
|
|
@@ -15411,12 +15411,22 @@ var SkillTriggerEvaluator = class {
|
|
|
15411
15411
|
triggered = true;
|
|
15412
15412
|
evidence = `Skill tool invoked with ${matcher.skillInputField}="${skillArg}"`;
|
|
15413
15413
|
}
|
|
15414
|
+
} else if (matcher.skillToolPrefixes?.some(
|
|
15415
|
+
(prefix) => firstTool.tool.startsWith(prefix) && firstTool.tool.includes(skillName)
|
|
15416
|
+
)) {
|
|
15417
|
+
triggered = true;
|
|
15418
|
+
evidence = `Skill tool invoked via tool name "${firstTool.tool}"`;
|
|
15414
15419
|
} else if (matcher.readTools.includes(firstTool.tool)) {
|
|
15415
|
-
const filePath =
|
|
15420
|
+
const filePath = this.readPathFromInput(input, matcher);
|
|
15416
15421
|
if (filePath.includes(skillName)) {
|
|
15417
15422
|
triggered = true;
|
|
15418
15423
|
evidence = `Read tool loaded skill file: ${filePath}`;
|
|
15419
15424
|
}
|
|
15425
|
+
} else if (matcher.readToolPrefixes?.some(
|
|
15426
|
+
(prefix) => firstTool.tool.startsWith(prefix) && firstTool.tool.includes(skillName)
|
|
15427
|
+
)) {
|
|
15428
|
+
triggered = true;
|
|
15429
|
+
evidence = `Read tool loaded skill file via tool name "${firstTool.tool}"`;
|
|
15420
15430
|
}
|
|
15421
15431
|
}
|
|
15422
15432
|
const pass = triggered === shouldTrigger;
|
|
@@ -15445,6 +15455,16 @@ var SkillTriggerEvaluator = class {
|
|
|
15445
15455
|
expectedAspectCount: 1
|
|
15446
15456
|
};
|
|
15447
15457
|
}
|
|
15458
|
+
readPathFromInput(input, matcher) {
|
|
15459
|
+
const fields = matcher.readInputFields ?? [matcher.readInputField];
|
|
15460
|
+
for (const field of fields) {
|
|
15461
|
+
const value = input[field];
|
|
15462
|
+
if (value !== void 0 && value !== null) {
|
|
15463
|
+
return String(value);
|
|
15464
|
+
}
|
|
15465
|
+
}
|
|
15466
|
+
return "";
|
|
15467
|
+
}
|
|
15448
15468
|
};
|
|
15449
15469
|
|
|
15450
15470
|
// src/evaluation/evaluators/llm-grader-prompt.ts
|
|
@@ -15479,12 +15499,8 @@ function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, evalua
|
|
|
15479
15499
|
[TEMPLATE_VARIABLES.INPUT]: JSON.stringify(evalCase.input_segments, null, 2),
|
|
15480
15500
|
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: JSON.stringify(evalCase.expected_output, null, 2),
|
|
15481
15501
|
[TEMPLATE_VARIABLES.OUTPUT]: JSON.stringify([], null, 2),
|
|
15482
|
-
[TEMPLATE_VARIABLES.ANSWER]: candidate.trim(),
|
|
15483
|
-
[TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (evalCase.reference_answer ?? "").trim(),
|
|
15484
15502
|
[TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
|
|
15485
|
-
[TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
|
|
15486
15503
|
[TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? "",
|
|
15487
|
-
// Text convenience accessors (new names, always strings)
|
|
15488
15504
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
15489
15505
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
|
|
15490
15506
|
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (evalCase.reference_answer ?? "").trim()
|
|
@@ -16541,11 +16557,9 @@ async function resolveCustomPrompt(promptConfig, context2, timeoutMs) {
|
|
|
16541
16557
|
}
|
|
16542
16558
|
async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
16543
16559
|
const payload = {
|
|
16544
|
-
question: context2.evalCase.question,
|
|
16545
16560
|
criteria: context2.evalCase.criteria,
|
|
16546
16561
|
expectedOutput: context2.evalCase.expected_output,
|
|
16547
|
-
|
|
16548
|
-
answer: context2.candidate,
|
|
16562
|
+
outputText: context2.candidate,
|
|
16549
16563
|
output: context2.output ?? null,
|
|
16550
16564
|
guidelineFiles: context2.evalCase.guideline_paths,
|
|
16551
16565
|
inputFiles: context2.evalCase.file_paths.filter(
|
|
@@ -16556,9 +16570,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
16556
16570
|
fileChanges: context2.fileChanges ?? null,
|
|
16557
16571
|
workspacePath: context2.workspacePath ?? null,
|
|
16558
16572
|
config: config ?? context2.config ?? null,
|
|
16559
|
-
// Text convenience accessors (new names, always strings)
|
|
16560
16573
|
inputText: context2.evalCase.question,
|
|
16561
|
-
outputText: context2.candidate,
|
|
16562
16574
|
expectedOutputText: context2.evalCase.reference_answer ?? ""
|
|
16563
16575
|
};
|
|
16564
16576
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
@@ -18222,7 +18234,7 @@ async function runEvaluation(options) {
|
|
|
18222
18234
|
dataset: evalCase.dataset,
|
|
18223
18235
|
score: 0,
|
|
18224
18236
|
assertions: [],
|
|
18225
|
-
|
|
18237
|
+
outputText: "",
|
|
18226
18238
|
target: target.name,
|
|
18227
18239
|
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
18228
18240
|
budgetExceeded: true,
|
|
@@ -18258,7 +18270,7 @@ async function runEvaluation(options) {
|
|
|
18258
18270
|
dataset: evalCase.dataset,
|
|
18259
18271
|
score: 0,
|
|
18260
18272
|
assertions: [],
|
|
18261
|
-
|
|
18273
|
+
outputText: "",
|
|
18262
18274
|
target: target.name,
|
|
18263
18275
|
error: errorMsg,
|
|
18264
18276
|
executionStatus: "execution_error",
|
|
@@ -19225,7 +19237,7 @@ async function evaluateCandidate(options) {
|
|
|
19225
19237
|
conversationId: evalCase.conversation_id,
|
|
19226
19238
|
score: score.score,
|
|
19227
19239
|
assertions: score.assertions,
|
|
19228
|
-
|
|
19240
|
+
outputText: candidate,
|
|
19229
19241
|
target: target.name,
|
|
19230
19242
|
tokenUsage,
|
|
19231
19243
|
costUsd,
|
|
@@ -19581,7 +19593,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
19581
19593
|
conversationId: evalCase.conversation_id,
|
|
19582
19594
|
score: 0,
|
|
19583
19595
|
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
19584
|
-
|
|
19596
|
+
outputText: `Error occurred: ${message}`,
|
|
19585
19597
|
target: targetName,
|
|
19586
19598
|
requests,
|
|
19587
19599
|
input,
|
|
@@ -20119,7 +20131,7 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
20119
20131
|
|
|
20120
20132
|
// src/evaluation/baseline.ts
|
|
20121
20133
|
var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
|
|
20122
|
-
"
|
|
20134
|
+
"outputText",
|
|
20123
20135
|
"requests",
|
|
20124
20136
|
"trace",
|
|
20125
20137
|
"workspacePath",
|
|
@@ -20293,7 +20305,7 @@ var OtelTraceExporter = class {
|
|
|
20293
20305
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
20294
20306
|
if (result.dataset) rootSpan.setAttribute("agentv.dataset", result.dataset);
|
|
20295
20307
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
20296
|
-
if (captureContent) rootSpan.setAttribute("agentv.
|
|
20308
|
+
if (captureContent) rootSpan.setAttribute("agentv.output_text", result.outputText);
|
|
20297
20309
|
if (result.durationMs != null)
|
|
20298
20310
|
rootSpan.setAttribute("agentv.trace.duration_ms", result.durationMs);
|
|
20299
20311
|
if (result.costUsd != null) rootSpan.setAttribute("agentv.trace.cost_usd", result.costUsd);
|