agentv 3.5.0 → 3.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -3
- package/dist/{chunk-5GG6DDP5.js → chunk-IP5BO54H.js} +12 -13
- package/dist/chunk-IP5BO54H.js.map +1 -0
- package/dist/{chunk-D6G4N2H2.js → chunk-K4RXLQWV.js} +70 -58
- package/dist/chunk-K4RXLQWV.js.map +1 -0
- package/dist/{chunk-RLL4QGNL.js → chunk-UU5N43YS.js} +5 -5
- package/dist/chunk-UU5N43YS.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-MZFXE6B5.js → dist-VWEFBDZ5.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-J7SUWZH2.js → interactive-5S4ILY2Y.js} +3 -3
- package/dist/templates/.agentv/.env.example +9 -11
- package/dist/templates/.agentv/config.yaml +0 -5
- package/dist/templates/.agentv/targets.yaml +16 -0
- package/package.json +1 -1
- package/dist/chunk-5GG6DDP5.js.map +0 -1
- package/dist/chunk-D6G4N2H2.js.map +0 -1
- package/dist/chunk-RLL4QGNL.js.map +0 -1
- /package/dist/{dist-MZFXE6B5.js.map → dist-VWEFBDZ5.js.map} +0 -0
- /package/dist/{interactive-J7SUWZH2.js.map → interactive-5S4ILY2Y.js.map} +0 -0
|
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
|
|
|
301
301
|
}
|
|
302
302
|
});
|
|
303
303
|
|
|
304
|
-
// ../../packages/core/dist/chunk-
|
|
304
|
+
// ../../packages/core/dist/chunk-2IZOTQ25.js
|
|
305
305
|
import { constants } from "node:fs";
|
|
306
306
|
import { access, readFile } from "node:fs/promises";
|
|
307
307
|
import path from "node:path";
|
|
@@ -419,7 +419,7 @@ __export(external_exports2, {
|
|
|
419
419
|
void: () => voidType
|
|
420
420
|
});
|
|
421
421
|
|
|
422
|
-
// ../../packages/core/dist/chunk-
|
|
422
|
+
// ../../packages/core/dist/chunk-2IZOTQ25.js
|
|
423
423
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
424
424
|
var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
|
|
425
425
|
var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
|
|
@@ -14655,14 +14655,8 @@ function logWarning(message) {
|
|
|
14655
14655
|
console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET3}`);
|
|
14656
14656
|
}
|
|
14657
14657
|
var TEMPLATE_VARIABLES = {
|
|
14658
|
-
/** @deprecated Use OUTPUT_TEXT instead */
|
|
14659
|
-
ANSWER: "answer",
|
|
14660
14658
|
EXPECTED_OUTPUT: "expected_output",
|
|
14661
|
-
/** @deprecated Use INPUT_TEXT instead */
|
|
14662
|
-
QUESTION: "question",
|
|
14663
14659
|
CRITERIA: "criteria",
|
|
14664
|
-
/** @deprecated Use EXPECTED_OUTPUT_TEXT instead */
|
|
14665
|
-
REFERENCE_ANSWER: "reference_answer",
|
|
14666
14660
|
INPUT: "input",
|
|
14667
14661
|
OUTPUT: "output",
|
|
14668
14662
|
FILE_CHANGES: "file_changes",
|
|
@@ -14672,9 +14666,8 @@ var TEMPLATE_VARIABLES = {
|
|
|
14672
14666
|
};
|
|
14673
14667
|
var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
|
|
14674
14668
|
var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
|
|
14675
|
-
TEMPLATE_VARIABLES.
|
|
14676
|
-
TEMPLATE_VARIABLES.EXPECTED_OUTPUT
|
|
14677
|
-
TEMPLATE_VARIABLES.OUTPUT_TEXT
|
|
14669
|
+
TEMPLATE_VARIABLES.OUTPUT_TEXT,
|
|
14670
|
+
TEMPLATE_VARIABLES.EXPECTED_OUTPUT
|
|
14678
14671
|
]);
|
|
14679
14672
|
var ANSI_YELLOW3 = "\x1B[33m";
|
|
14680
14673
|
var ANSI_RESET4 = "\x1B[0m";
|
|
@@ -14695,13 +14688,13 @@ function validateTemplateVariables(content, source) {
|
|
|
14695
14688
|
}
|
|
14696
14689
|
match = variablePattern.exec(content);
|
|
14697
14690
|
}
|
|
14698
|
-
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.
|
|
14691
|
+
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.OUTPUT_TEXT);
|
|
14699
14692
|
const hasExpectedOutput = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_OUTPUT);
|
|
14700
14693
|
const hasRequiredFields = hasCandidateAnswer || hasExpectedOutput;
|
|
14701
14694
|
if (!hasRequiredFields) {
|
|
14702
14695
|
throw new Error(
|
|
14703
14696
|
`Missing required fields. Must include at least one of:
|
|
14704
|
-
- {{ ${TEMPLATE_VARIABLES.
|
|
14697
|
+
- {{ ${TEMPLATE_VARIABLES.OUTPUT_TEXT} }}
|
|
14705
14698
|
- {{ ${TEMPLATE_VARIABLES.EXPECTED_OUTPUT} }}`
|
|
14706
14699
|
);
|
|
14707
14700
|
}
|
|
@@ -17843,6 +17836,8 @@ async function invokeModel(options) {
|
|
|
17843
17836
|
const { model, request, defaults, retryConfig, providerOptions } = options;
|
|
17844
17837
|
const chatPrompt = buildChatPrompt(request);
|
|
17845
17838
|
const { temperature, maxOutputTokens } = resolveModelSettings(request, defaults);
|
|
17839
|
+
const startTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
17840
|
+
const startMs = Date.now();
|
|
17846
17841
|
const result = await withRetry(
|
|
17847
17842
|
() => generateText({
|
|
17848
17843
|
model,
|
|
@@ -17856,9 +17851,11 @@ async function invokeModel(options) {
|
|
|
17856
17851
|
retryConfig,
|
|
17857
17852
|
request.signal
|
|
17858
17853
|
);
|
|
17859
|
-
|
|
17854
|
+
const endTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
17855
|
+
const durationMs = Date.now() - startMs;
|
|
17856
|
+
return mapResponse(result, { durationMs, startTime, endTime });
|
|
17860
17857
|
}
|
|
17861
|
-
function mapResponse(result) {
|
|
17858
|
+
function mapResponse(result, timing) {
|
|
17862
17859
|
const content = result.text ?? "";
|
|
17863
17860
|
const rawUsage = result.totalUsage ?? result.usage;
|
|
17864
17861
|
const reasoning = rawUsage?.outputTokenDetails?.reasoningTokens ?? void 0;
|
|
@@ -17873,7 +17870,10 @@ function mapResponse(result) {
|
|
|
17873
17870
|
raw: result,
|
|
17874
17871
|
usage: toJsonObject(rawUsage),
|
|
17875
17872
|
output: [{ role: "assistant", content }],
|
|
17876
|
-
tokenUsage
|
|
17873
|
+
tokenUsage,
|
|
17874
|
+
durationMs: timing?.durationMs,
|
|
17875
|
+
startTime: timing?.startTime,
|
|
17876
|
+
endTime: timing?.endTime
|
|
17877
17877
|
};
|
|
17878
17878
|
}
|
|
17879
17879
|
function toJsonObject(value) {
|
|
@@ -18731,10 +18731,12 @@ var ClaudeSdkProvider = class {
|
|
|
18731
18731
|
if (usage) {
|
|
18732
18732
|
const inputTokens = (usage.input_tokens ?? 0) + (usage.cache_read_input_tokens ?? 0) + (usage.cache_creation_input_tokens ?? 0);
|
|
18733
18733
|
const outputTokens = usage.output_tokens ?? 0;
|
|
18734
|
+
const reasoningTokens = usage.reasoning_tokens ?? void 0;
|
|
18734
18735
|
tokenUsage = {
|
|
18735
18736
|
input: inputTokens,
|
|
18736
18737
|
output: outputTokens,
|
|
18737
|
-
cached: usage.cache_read_input_tokens ?? void 0
|
|
18738
|
+
cached: usage.cache_read_input_tokens ?? void 0,
|
|
18739
|
+
reasoning: reasoningTokens
|
|
18738
18740
|
};
|
|
18739
18741
|
request.streamCallbacks?.onLlmCallEnd?.(this.config.model ?? "claude", tokenUsage);
|
|
18740
18742
|
}
|
|
@@ -19730,7 +19732,8 @@ ${basePrompt}` : basePrompt;
|
|
|
19730
19732
|
onUsage({
|
|
19731
19733
|
input: usage.input_tokens ?? 0,
|
|
19732
19734
|
output: usage.output_tokens ?? 0,
|
|
19733
|
-
cached: usage.cached_input_tokens ?? void 0
|
|
19735
|
+
cached: usage.cached_input_tokens ?? void 0,
|
|
19736
|
+
reasoning: usage.reasoning_tokens ?? void 0
|
|
19734
19737
|
});
|
|
19735
19738
|
}
|
|
19736
19739
|
}
|
|
@@ -21698,10 +21701,12 @@ function extractTokenUsage(events) {
|
|
|
21698
21701
|
output: output ?? 0
|
|
21699
21702
|
};
|
|
21700
21703
|
const cached = toFiniteNumber(u.cache_read_input_tokens ?? u.cached ?? u.cachedTokens);
|
|
21701
|
-
|
|
21702
|
-
|
|
21703
|
-
|
|
21704
|
-
|
|
21704
|
+
const reasoning = toFiniteNumber(u.reasoning_tokens ?? u.reasoningTokens ?? u.reasoning);
|
|
21705
|
+
return {
|
|
21706
|
+
...result,
|
|
21707
|
+
...cached !== void 0 ? { cached } : {},
|
|
21708
|
+
...reasoning !== void 0 ? { reasoning } : {}
|
|
21709
|
+
};
|
|
21705
21710
|
}
|
|
21706
21711
|
}
|
|
21707
21712
|
const messages = record.messages;
|
|
@@ -23927,11 +23932,9 @@ var CodeEvaluator = class {
|
|
|
23927
23932
|
}
|
|
23928
23933
|
}
|
|
23929
23934
|
const payload = {
|
|
23930
|
-
question: context2.evalCase.question,
|
|
23931
23935
|
criteria: context2.evalCase.criteria,
|
|
23932
23936
|
expectedOutput: context2.evalCase.expected_output,
|
|
23933
|
-
|
|
23934
|
-
answer: context2.candidate,
|
|
23937
|
+
outputText: context2.candidate,
|
|
23935
23938
|
output: outputForPayload,
|
|
23936
23939
|
outputPath,
|
|
23937
23940
|
guidelineFiles: context2.evalCase.guideline_paths,
|
|
@@ -23948,9 +23951,7 @@ var CodeEvaluator = class {
|
|
|
23948
23951
|
fileChanges: context2.fileChanges ?? null,
|
|
23949
23952
|
workspacePath: context2.workspacePath ?? null,
|
|
23950
23953
|
config: this.config ?? null,
|
|
23951
|
-
// Text convenience accessors (new names, always strings)
|
|
23952
23954
|
inputText: context2.evalCase.question,
|
|
23953
|
-
outputText: context2.candidate,
|
|
23954
23955
|
expectedOutputText: context2.evalCase.reference_answer ?? ""
|
|
23955
23956
|
};
|
|
23956
23957
|
const inputPayload = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
@@ -24109,13 +24110,13 @@ Be concise and focused in your evaluation. Provide succinct, specific feedback r
|
|
|
24109
24110
|
{{${TEMPLATE_VARIABLES.CRITERIA}}}
|
|
24110
24111
|
|
|
24111
24112
|
[[ ## question ## ]]
|
|
24112
|
-
{{${TEMPLATE_VARIABLES.
|
|
24113
|
+
{{${TEMPLATE_VARIABLES.INPUT_TEXT}}}
|
|
24113
24114
|
|
|
24114
24115
|
[[ ## reference_answer ## ]]
|
|
24115
|
-
{{${TEMPLATE_VARIABLES.
|
|
24116
|
+
{{${TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT}}}
|
|
24116
24117
|
|
|
24117
24118
|
[[ ## answer ## ]]
|
|
24118
|
-
{{${TEMPLATE_VARIABLES.
|
|
24119
|
+
{{${TEMPLATE_VARIABLES.OUTPUT_TEXT}}}`;
|
|
24119
24120
|
var freeformEvaluationSchema = external_exports2.object({
|
|
24120
24121
|
score: external_exports2.number().min(0).max(1).describe("Score between 0.0 and 1.0"),
|
|
24121
24122
|
assertions: external_exports2.array(
|
|
@@ -24193,12 +24194,8 @@ var LlmGraderEvaluator = class {
|
|
|
24193
24194
|
2
|
|
24194
24195
|
),
|
|
24195
24196
|
[TEMPLATE_VARIABLES.OUTPUT]: JSON.stringify(context2.output ?? [], null, 2),
|
|
24196
|
-
[TEMPLATE_VARIABLES.ANSWER]: context2.candidate.trim(),
|
|
24197
|
-
[TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
24198
24197
|
[TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
|
|
24199
|
-
[TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
|
|
24200
24198
|
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
|
|
24201
|
-
// Text convenience accessors (new names, always strings)
|
|
24202
24199
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
24203
24200
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
|
|
24204
24201
|
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim()
|
|
@@ -24503,10 +24500,10 @@ ${context2.fileChanges}`;
|
|
|
24503
24500
|
buildAgentUserPrompt(context2) {
|
|
24504
24501
|
const formattedQuestion = context2.promptInputs.question && context2.promptInputs.question.trim().length > 0 ? context2.promptInputs.question : context2.evalCase.question;
|
|
24505
24502
|
const variables = {
|
|
24506
|
-
[TEMPLATE_VARIABLES.ANSWER]: context2.candidate.trim(),
|
|
24507
|
-
[TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
24508
24503
|
[TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
|
|
24509
|
-
[TEMPLATE_VARIABLES.
|
|
24504
|
+
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
24505
|
+
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
|
|
24506
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
24510
24507
|
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? ""
|
|
24511
24508
|
};
|
|
24512
24509
|
if (this.evaluatorTemplate) {
|
|
@@ -24559,10 +24556,10 @@ ${context2.fileChanges}`;
|
|
|
24559
24556
|
const rubrics = config?.type === "llm-grader" || config?.type === "llm-judge" ? config.rubrics : void 0;
|
|
24560
24557
|
if (this.evaluatorTemplate) {
|
|
24561
24558
|
const variables = {
|
|
24562
|
-
[TEMPLATE_VARIABLES.ANSWER]: context2.candidate.trim(),
|
|
24563
|
-
[TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
24564
24559
|
[TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
|
|
24565
|
-
[TEMPLATE_VARIABLES.
|
|
24560
|
+
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
24561
|
+
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
|
|
24562
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
24566
24563
|
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? ""
|
|
24567
24564
|
};
|
|
24568
24565
|
const customPrompt = substituteVariables(this.evaluatorTemplate, variables);
|
|
@@ -25978,7 +25975,10 @@ var COPILOT_MATCHER = {
|
|
|
25978
25975
|
skillTools: ["Skill", "skill"],
|
|
25979
25976
|
skillInputField: "skill",
|
|
25980
25977
|
readTools: ["Read File", "readFile", "Read", "readTextFile"],
|
|
25981
|
-
readInputField: "file_path"
|
|
25978
|
+
readInputField: "file_path",
|
|
25979
|
+
skillToolPrefixes: ["Using skill: "],
|
|
25980
|
+
readToolPrefixes: ["Viewing "],
|
|
25981
|
+
readInputFields: ["file_path", "path"]
|
|
25982
25982
|
};
|
|
25983
25983
|
var PROVIDER_TOOL_SEMANTICS = {
|
|
25984
25984
|
claude: CLAUDE_MATCHER,
|
|
@@ -26020,12 +26020,22 @@ var SkillTriggerEvaluator = class {
|
|
|
26020
26020
|
triggered = true;
|
|
26021
26021
|
evidence = `Skill tool invoked with ${matcher.skillInputField}="${skillArg}"`;
|
|
26022
26022
|
}
|
|
26023
|
+
} else if (matcher.skillToolPrefixes?.some(
|
|
26024
|
+
(prefix) => firstTool.tool.startsWith(prefix) && firstTool.tool.includes(skillName)
|
|
26025
|
+
)) {
|
|
26026
|
+
triggered = true;
|
|
26027
|
+
evidence = `Skill tool invoked via tool name "${firstTool.tool}"`;
|
|
26023
26028
|
} else if (matcher.readTools.includes(firstTool.tool)) {
|
|
26024
|
-
const filePath =
|
|
26029
|
+
const filePath = this.readPathFromInput(input, matcher);
|
|
26025
26030
|
if (filePath.includes(skillName)) {
|
|
26026
26031
|
triggered = true;
|
|
26027
26032
|
evidence = `Read tool loaded skill file: ${filePath}`;
|
|
26028
26033
|
}
|
|
26034
|
+
} else if (matcher.readToolPrefixes?.some(
|
|
26035
|
+
(prefix) => firstTool.tool.startsWith(prefix) && firstTool.tool.includes(skillName)
|
|
26036
|
+
)) {
|
|
26037
|
+
triggered = true;
|
|
26038
|
+
evidence = `Read tool loaded skill file via tool name "${firstTool.tool}"`;
|
|
26029
26039
|
}
|
|
26030
26040
|
}
|
|
26031
26041
|
const pass = triggered === shouldTrigger;
|
|
@@ -26054,6 +26064,16 @@ var SkillTriggerEvaluator = class {
|
|
|
26054
26064
|
expectedAspectCount: 1
|
|
26055
26065
|
};
|
|
26056
26066
|
}
|
|
26067
|
+
readPathFromInput(input, matcher) {
|
|
26068
|
+
const fields = matcher.readInputFields ?? [matcher.readInputField];
|
|
26069
|
+
for (const field of fields) {
|
|
26070
|
+
const value = input[field];
|
|
26071
|
+
if (value !== void 0 && value !== null) {
|
|
26072
|
+
return String(value);
|
|
26073
|
+
}
|
|
26074
|
+
}
|
|
26075
|
+
return "";
|
|
26076
|
+
}
|
|
26057
26077
|
};
|
|
26058
26078
|
function assembleLlmGraderPrompt(input) {
|
|
26059
26079
|
const {
|
|
@@ -26086,12 +26106,8 @@ function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, evalua
|
|
|
26086
26106
|
[TEMPLATE_VARIABLES.INPUT]: JSON.stringify(evalCase.input_segments, null, 2),
|
|
26087
26107
|
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: JSON.stringify(evalCase.expected_output, null, 2),
|
|
26088
26108
|
[TEMPLATE_VARIABLES.OUTPUT]: JSON.stringify([], null, 2),
|
|
26089
|
-
[TEMPLATE_VARIABLES.ANSWER]: candidate.trim(),
|
|
26090
|
-
[TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (evalCase.reference_answer ?? "").trim(),
|
|
26091
26109
|
[TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
|
|
26092
|
-
[TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
|
|
26093
26110
|
[TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? "",
|
|
26094
|
-
// Text convenience accessors (new names, always strings)
|
|
26095
26111
|
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
26096
26112
|
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
|
|
26097
26113
|
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (evalCase.reference_answer ?? "").trim()
|
|
@@ -27125,11 +27141,9 @@ async function resolveCustomPrompt(promptConfig, context2, timeoutMs) {
|
|
|
27125
27141
|
}
|
|
27126
27142
|
async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
27127
27143
|
const payload = {
|
|
27128
|
-
question: context2.evalCase.question,
|
|
27129
27144
|
criteria: context2.evalCase.criteria,
|
|
27130
27145
|
expectedOutput: context2.evalCase.expected_output,
|
|
27131
|
-
|
|
27132
|
-
answer: context2.candidate,
|
|
27146
|
+
outputText: context2.candidate,
|
|
27133
27147
|
output: context2.output ?? null,
|
|
27134
27148
|
guidelineFiles: context2.evalCase.guideline_paths,
|
|
27135
27149
|
inputFiles: context2.evalCase.file_paths.filter(
|
|
@@ -27140,9 +27154,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
27140
27154
|
fileChanges: context2.fileChanges ?? null,
|
|
27141
27155
|
workspacePath: context2.workspacePath ?? null,
|
|
27142
27156
|
config: config ?? context2.config ?? null,
|
|
27143
|
-
// Text convenience accessors (new names, always strings)
|
|
27144
27157
|
inputText: context2.evalCase.question,
|
|
27145
|
-
outputText: context2.candidate,
|
|
27146
27158
|
expectedOutputText: context2.evalCase.reference_answer ?? ""
|
|
27147
27159
|
};
|
|
27148
27160
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
@@ -28762,7 +28774,7 @@ async function runEvaluation(options) {
|
|
|
28762
28774
|
dataset: evalCase.dataset,
|
|
28763
28775
|
score: 0,
|
|
28764
28776
|
assertions: [],
|
|
28765
|
-
|
|
28777
|
+
outputText: "",
|
|
28766
28778
|
target: target.name,
|
|
28767
28779
|
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
28768
28780
|
budgetExceeded: true,
|
|
@@ -28798,7 +28810,7 @@ async function runEvaluation(options) {
|
|
|
28798
28810
|
dataset: evalCase.dataset,
|
|
28799
28811
|
score: 0,
|
|
28800
28812
|
assertions: [],
|
|
28801
|
-
|
|
28813
|
+
outputText: "",
|
|
28802
28814
|
target: target.name,
|
|
28803
28815
|
error: errorMsg,
|
|
28804
28816
|
executionStatus: "execution_error",
|
|
@@ -29765,7 +29777,7 @@ async function evaluateCandidate(options) {
|
|
|
29765
29777
|
conversationId: evalCase.conversation_id,
|
|
29766
29778
|
score: score.score,
|
|
29767
29779
|
assertions: score.assertions,
|
|
29768
|
-
|
|
29780
|
+
outputText: candidate,
|
|
29769
29781
|
target: target.name,
|
|
29770
29782
|
tokenUsage,
|
|
29771
29783
|
costUsd,
|
|
@@ -30121,7 +30133,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
30121
30133
|
conversationId: evalCase.conversation_id,
|
|
30122
30134
|
score: 0,
|
|
30123
30135
|
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
30124
|
-
|
|
30136
|
+
outputText: `Error occurred: ${message}`,
|
|
30125
30137
|
target: targetName,
|
|
30126
30138
|
requests,
|
|
30127
30139
|
input,
|
|
@@ -30638,7 +30650,7 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
30638
30650
|
return false;
|
|
30639
30651
|
}
|
|
30640
30652
|
var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
|
|
30641
|
-
"
|
|
30653
|
+
"outputText",
|
|
30642
30654
|
"requests",
|
|
30643
30655
|
"trace",
|
|
30644
30656
|
"workspacePath",
|
|
@@ -30810,7 +30822,7 @@ var OtelTraceExporter = class {
|
|
|
30810
30822
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
30811
30823
|
if (result.dataset) rootSpan.setAttribute("agentv.dataset", result.dataset);
|
|
30812
30824
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
30813
|
-
if (captureContent) rootSpan.setAttribute("agentv.
|
|
30825
|
+
if (captureContent) rootSpan.setAttribute("agentv.output_text", result.outputText);
|
|
30814
30826
|
if (result.durationMs != null)
|
|
30815
30827
|
rootSpan.setAttribute("agentv.trace.duration_ms", result.durationMs);
|
|
30816
30828
|
if (result.costUsd != null) rootSpan.setAttribute("agentv.trace.cost_usd", result.costUsd);
|
|
@@ -31237,4 +31249,4 @@ export {
|
|
|
31237
31249
|
OtelStreamingObserver,
|
|
31238
31250
|
createAgentKernel
|
|
31239
31251
|
};
|
|
31240
|
-
//# sourceMappingURL=chunk-
|
|
31252
|
+
//# sourceMappingURL=chunk-K4RXLQWV.js.map
|