agentv 4.35.1 → 4.37.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-G57MG52C.js → artifact-writer-GFNKYREE.js} +4 -4
- package/dist/{chunk-INOKS5LF.js → chunk-M7AMFWBZ.js} +275 -58
- package/dist/chunk-M7AMFWBZ.js.map +1 -0
- package/dist/{chunk-KJGYL3M3.js → chunk-N6E5XFOM.js} +213 -85
- package/dist/chunk-N6E5XFOM.js.map +1 -0
- package/dist/{chunk-KNF3AGCI.js → chunk-OYI35QFW.js} +314 -49
- package/dist/chunk-OYI35QFW.js.map +1 -0
- package/dist/{chunk-CRMGUVRZ.js → chunk-P4LSNFZR.js} +85 -19
- package/dist/chunk-P4LSNFZR.js.map +1 -0
- package/dist/{chunk-6QEIZ33V.js → chunk-RL4S2FBZ.js} +2700 -456
- package/dist/chunk-RL4S2FBZ.js.map +1 -0
- package/dist/cli.js +5 -5
- package/dist/dashboard/assets/index-9tV-u4HJ.css +1 -0
- package/dist/dashboard/assets/{index-Bdk-9a_8.js → index-BDRYJsGF.js} +1 -1
- package/dist/dashboard/assets/index-DuESU7zZ.js +118 -0
- package/dist/dashboard/index.html +2 -2
- package/dist/{dist-M4B77IW4.js → dist-OY3JSP6Z.js} +125 -3
- package/dist/index.js +5 -5
- package/dist/{interactive-VYQ5SYMR.js → interactive-CQELHITQ.js} +5 -5
- package/dist/skills/agentv-eval-writer/SKILL.md +6 -0
- package/dist/{ts-eval-loader-EQJX3OLT-THE7D3GR.js → ts-eval-loader-RBTB2HG2-H5TRXZLO.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-6QEIZ33V.js.map +0 -1
- package/dist/chunk-CRMGUVRZ.js.map +0 -1
- package/dist/chunk-INOKS5LF.js.map +0 -1
- package/dist/chunk-KJGYL3M3.js.map +0 -1
- package/dist/chunk-KNF3AGCI.js.map +0 -1
- package/dist/dashboard/assets/index-BPMAZqjE.css +0 -1
- package/dist/dashboard/assets/index-BWO0UcxG.js +0 -118
- /package/dist/{artifact-writer-G57MG52C.js.map → artifact-writer-GFNKYREE.js.map} +0 -0
- /package/dist/{dist-M4B77IW4.js.map → dist-OY3JSP6Z.js.map} +0 -0
- /package/dist/{interactive-VYQ5SYMR.js.map → interactive-CQELHITQ.js.map} +0 -0
- /package/dist/{ts-eval-loader-EQJX3OLT-THE7D3GR.js.map → ts-eval-loader-RBTB2HG2-H5TRXZLO.js.map} +0 -0
|
@@ -1,41 +1,20 @@
|
|
|
1
1
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
2
2
|
import {
|
|
3
|
-
|
|
4
|
-
} from "./chunk-
|
|
3
|
+
traceToTranscriptJsonLines
|
|
4
|
+
} from "./chunk-OYI35QFW.js";
|
|
5
5
|
import {
|
|
6
6
|
DEFAULT_THRESHOLD,
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
buildTraceEnvelopeFromEvaluationResult,
|
|
8
|
+
buildTraceFromMessages,
|
|
9
|
+
extractLastAssistantContent,
|
|
10
|
+
parseYamlValue,
|
|
11
|
+
toTraceEnvelopeWire
|
|
12
|
+
} from "./chunk-RL4S2FBZ.js";
|
|
9
13
|
|
|
10
14
|
// src/commands/eval/artifact-writer.ts
|
|
11
15
|
import { mkdir as mkdir2, readFile as readFile2, writeFile as writeFile2 } from "node:fs/promises";
|
|
12
16
|
import path3 from "node:path";
|
|
13
17
|
|
|
14
|
-
// src/utils/case-conversion.ts
|
|
15
|
-
function toSnakeCase(str) {
|
|
16
|
-
if (/^[A-Z]/.test(str)) {
|
|
17
|
-
return str;
|
|
18
|
-
}
|
|
19
|
-
return str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
|
|
20
|
-
}
|
|
21
|
-
function toSnakeCaseDeep(obj) {
|
|
22
|
-
if (obj === null || obj === void 0) {
|
|
23
|
-
return obj;
|
|
24
|
-
}
|
|
25
|
-
if (Array.isArray(obj)) {
|
|
26
|
-
return obj.map((item) => toSnakeCaseDeep(item));
|
|
27
|
-
}
|
|
28
|
-
if (typeof obj === "object") {
|
|
29
|
-
const result = {};
|
|
30
|
-
for (const [key, value] of Object.entries(obj)) {
|
|
31
|
-
const snakeKey = toSnakeCase(key);
|
|
32
|
-
result[snakeKey] = toSnakeCaseDeep(value);
|
|
33
|
-
}
|
|
34
|
-
return result;
|
|
35
|
-
}
|
|
36
|
-
return obj;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
18
|
// src/commands/eval/result-layout.ts
|
|
40
19
|
import { existsSync, statSync } from "node:fs";
|
|
41
20
|
import path from "node:path";
|
|
@@ -114,6 +93,33 @@ import { createHash } from "node:crypto";
|
|
|
114
93
|
import { mkdir, readFile, readdir, stat, writeFile } from "node:fs/promises";
|
|
115
94
|
import path2 from "node:path";
|
|
116
95
|
import { stringify as stringifyYaml } from "yaml";
|
|
96
|
+
|
|
97
|
+
// src/utils/case-conversion.ts
|
|
98
|
+
function toSnakeCase(str) {
|
|
99
|
+
if (/^[A-Z]/.test(str)) {
|
|
100
|
+
return str;
|
|
101
|
+
}
|
|
102
|
+
return str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
|
|
103
|
+
}
|
|
104
|
+
function toSnakeCaseDeep(obj) {
|
|
105
|
+
if (obj === null || obj === void 0) {
|
|
106
|
+
return obj;
|
|
107
|
+
}
|
|
108
|
+
if (Array.isArray(obj)) {
|
|
109
|
+
return obj.map((item) => toSnakeCaseDeep(item));
|
|
110
|
+
}
|
|
111
|
+
if (typeof obj === "object") {
|
|
112
|
+
const result = {};
|
|
113
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
114
|
+
const snakeKey = toSnakeCase(key);
|
|
115
|
+
result[snakeKey] = toSnakeCaseDeep(value);
|
|
116
|
+
}
|
|
117
|
+
return result;
|
|
118
|
+
}
|
|
119
|
+
return obj;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// src/commands/eval/task-bundle.ts
|
|
117
123
|
var TASK_DIRNAME = "task";
|
|
118
124
|
var TASK_EVAL_FILENAME = "EVAL.yaml";
|
|
119
125
|
var TASK_TARGETS_FILENAME = "targets.yaml";
|
|
@@ -528,18 +534,8 @@ function isExecutionError(result) {
|
|
|
528
534
|
return result.executionStatus === "execution_error";
|
|
529
535
|
}
|
|
530
536
|
function countToolCalls(result) {
|
|
531
|
-
const toolCalls = {};
|
|
532
|
-
|
|
533
|
-
const trace = result.trace;
|
|
534
|
-
if (trace?.steps) {
|
|
535
|
-
for (const step of trace.steps) {
|
|
536
|
-
if (step.toolName || step.type === "tool") {
|
|
537
|
-
const name = step.toolName ?? "unknown";
|
|
538
|
-
toolCalls[name] = (toolCalls[name] ?? 0) + 1;
|
|
539
|
-
total += 1;
|
|
540
|
-
}
|
|
541
|
-
}
|
|
542
|
-
}
|
|
537
|
+
const toolCalls = { ...result.trace?.toolCalls ?? {} };
|
|
538
|
+
const total = Object.values(toolCalls).reduce((sum, count) => sum + count, 0);
|
|
543
539
|
return { toolCalls, total };
|
|
544
540
|
}
|
|
545
541
|
function parseWorkspaceChanges(fileChanges) {
|
|
@@ -588,6 +584,66 @@ function buildEvaluators(scores) {
|
|
|
588
584
|
details: s.details
|
|
589
585
|
}));
|
|
590
586
|
}
|
|
587
|
+
function toIndexAssertion(assertion) {
|
|
588
|
+
return {
|
|
589
|
+
text: assertion.text,
|
|
590
|
+
passed: assertion.passed,
|
|
591
|
+
evidence: assertion.evidence
|
|
592
|
+
};
|
|
593
|
+
}
|
|
594
|
+
function toIndexScore(score) {
|
|
595
|
+
return {
|
|
596
|
+
name: score.name,
|
|
597
|
+
type: score.type,
|
|
598
|
+
score: score.score,
|
|
599
|
+
weight: score.weight,
|
|
600
|
+
verdict: score.verdict,
|
|
601
|
+
assertions: score.assertions.map(toIndexAssertion),
|
|
602
|
+
raw_request: score.rawRequest,
|
|
603
|
+
input: score.input,
|
|
604
|
+
target: score.target,
|
|
605
|
+
scores: score.scores?.map(toIndexScore),
|
|
606
|
+
details: score.details,
|
|
607
|
+
token_usage: score.tokenUsage,
|
|
608
|
+
duration_ms: score.durationMs,
|
|
609
|
+
started_at: score.startedAt,
|
|
610
|
+
ended_at: score.endedAt
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
function toIndexScores(scores) {
|
|
614
|
+
return scores?.map(toIndexScore);
|
|
615
|
+
}
|
|
616
|
+
function dropUndefined(value) {
|
|
617
|
+
return Object.fromEntries(Object.entries(value).filter(([, entry]) => entry !== void 0));
|
|
618
|
+
}
|
|
619
|
+
function toIndexRerunSource(value) {
|
|
620
|
+
if (!isRecord2(value)) {
|
|
621
|
+
return void 0;
|
|
622
|
+
}
|
|
623
|
+
return dropUndefined({
|
|
624
|
+
mode: value.mode,
|
|
625
|
+
source_run_dir: value.sourceRunDir,
|
|
626
|
+
source_index_path: value.sourceIndexPath,
|
|
627
|
+
source_artifact_dir: value.sourceArtifactDir,
|
|
628
|
+
source_task_dir: value.sourceTaskDir,
|
|
629
|
+
source_test_id: value.sourceTestId,
|
|
630
|
+
source_target: value.sourceTarget,
|
|
631
|
+
source_timestamp: value.sourceTimestamp
|
|
632
|
+
});
|
|
633
|
+
}
|
|
634
|
+
function toIndexMetadata(metadata) {
|
|
635
|
+
if (!metadata) {
|
|
636
|
+
return void 0;
|
|
637
|
+
}
|
|
638
|
+
const rerunSource = toIndexRerunSource(metadata.rerunSource);
|
|
639
|
+
if (!rerunSource) {
|
|
640
|
+
return { ...metadata };
|
|
641
|
+
}
|
|
642
|
+
return {
|
|
643
|
+
...Object.fromEntries(Object.entries(metadata).filter(([key]) => key !== "rerunSource")),
|
|
644
|
+
rerun_source: rerunSource
|
|
645
|
+
};
|
|
646
|
+
}
|
|
591
647
|
function buildGradingArtifact(result) {
|
|
592
648
|
const assertions = buildAssertions(result);
|
|
593
649
|
const passed = assertions.filter((e) => e.passed).length;
|
|
@@ -611,7 +667,7 @@ function buildGradingArtifact(result) {
|
|
|
611
667
|
graders: buildEvaluators(result.scores),
|
|
612
668
|
workspace_changes: parseWorkspaceChanges(result.fileChanges),
|
|
613
669
|
conversation: result.conversationId ? {
|
|
614
|
-
turns: result.trace
|
|
670
|
+
turns: result.trace?.messages.filter((message) => message.role === "assistant").length ?? 0,
|
|
615
671
|
conversation_id: result.conversationId
|
|
616
672
|
} : void 0
|
|
617
673
|
};
|
|
@@ -823,6 +879,34 @@ function buildTaskBundleIndexFields(outputDir, taskBundle) {
|
|
|
823
879
|
...taskBundle.gradersPath ? { graders_path: toRelativeArtifactPath(outputDir, taskBundle.gradersPath) } : {}
|
|
824
880
|
};
|
|
825
881
|
}
|
|
882
|
+
function findResultSourceTest(result, testByTestId) {
|
|
883
|
+
return testByTestId.get(result.testId ?? "unknown");
|
|
884
|
+
}
|
|
885
|
+
function resolveEnvelopeEvalPath(result, testByTestId, fallbackEvalFile) {
|
|
886
|
+
const source = findResultSourceTest(result, testByTestId)?.source;
|
|
887
|
+
return source?.evalFileRepoPath ?? source?.evalFilePath ?? fallbackEvalFile;
|
|
888
|
+
}
|
|
889
|
+
async function writeTraceEnvelopeSidecar(params) {
|
|
890
|
+
const hasTranscript = params.result.output.length > 0 || params.result.trace.messages.length > 0;
|
|
891
|
+
const envelope = buildTraceEnvelopeFromEvaluationResult(params.result, {
|
|
892
|
+
evalPath: params.evalPath,
|
|
893
|
+
runId: path3.basename(params.outputDir),
|
|
894
|
+
experiment: params.experiment,
|
|
895
|
+
source: { path: RESULT_INDEX_FILENAME },
|
|
896
|
+
artifacts: {
|
|
897
|
+
envelope_path: "outputs/trace-envelope.json",
|
|
898
|
+
answer_path: params.result.output.length > 0 ? "outputs/answer.md" : void 0,
|
|
899
|
+
response_path: params.result.output.length > 0 ? "outputs/response.md" : void 0,
|
|
900
|
+
transcript_path: hasTranscript ? "outputs/transcript.jsonl" : void 0
|
|
901
|
+
}
|
|
902
|
+
});
|
|
903
|
+
await writeFile2(
|
|
904
|
+
path3.join(params.outputsDir, "trace-envelope.json"),
|
|
905
|
+
`${JSON.stringify(toTraceEnvelopeWire(envelope), null, 2)}
|
|
906
|
+
`,
|
|
907
|
+
"utf8"
|
|
908
|
+
);
|
|
909
|
+
}
|
|
826
910
|
function buildIndexArtifactEntry(result, options) {
|
|
827
911
|
return {
|
|
828
912
|
timestamp: result.timestamp,
|
|
@@ -832,7 +916,12 @@ function buildIndexArtifactEntry(result, options) {
|
|
|
832
916
|
conversation_id: result.conversationId,
|
|
833
917
|
score: result.score,
|
|
834
918
|
target: result.target ?? "unknown",
|
|
835
|
-
|
|
919
|
+
token_usage: result.tokenUsage,
|
|
920
|
+
cost_usd: result.costUsd,
|
|
921
|
+
duration_ms: result.durationMs,
|
|
922
|
+
start_time: result.startTime,
|
|
923
|
+
end_time: result.endTime,
|
|
924
|
+
scores: toIndexScores(result.scores),
|
|
836
925
|
execution_status: result.executionStatus,
|
|
837
926
|
error: result.error,
|
|
838
927
|
failure_stage: result.failureStage,
|
|
@@ -842,15 +931,19 @@ function buildIndexArtifactEntry(result, options) {
|
|
|
842
931
|
grading_path: toRelativeArtifactPath(options.outputDir, options.gradingPath),
|
|
843
932
|
timing_path: toRelativeArtifactPath(options.outputDir, options.timingPath),
|
|
844
933
|
output_path: options.outputPath ? toRelativeArtifactPath(options.outputDir, options.outputPath) : void 0,
|
|
934
|
+
answer_path: options.answerPath ? toRelativeArtifactPath(options.outputDir, options.answerPath) : void 0,
|
|
935
|
+
transcript_path: options.transcriptPath ? toRelativeArtifactPath(options.outputDir, options.transcriptPath) : void 0,
|
|
845
936
|
input_path: options.inputPath ? toRelativeArtifactPath(options.outputDir, options.inputPath) : void 0,
|
|
937
|
+
response_path: options.responsePath ? toRelativeArtifactPath(options.outputDir, options.responsePath) : void 0,
|
|
846
938
|
...buildTaskBundleIndexFields(options.outputDir, options.taskBundle),
|
|
847
|
-
metadata: result.metadata
|
|
939
|
+
metadata: toIndexMetadata(result.metadata)
|
|
848
940
|
};
|
|
849
941
|
}
|
|
850
942
|
function buildResultIndexArtifact(result, taskBundle) {
|
|
851
943
|
const artifactSubdir = buildArtifactSubdir(result);
|
|
852
944
|
const input = extractInput(result);
|
|
853
|
-
const
|
|
945
|
+
const hasAnswer = result.output.length > 0;
|
|
946
|
+
const hasTranscript = result.trace.messages.length > 0 || result.trace.events.length > 0;
|
|
854
947
|
return {
|
|
855
948
|
timestamp: result.timestamp,
|
|
856
949
|
test_id: result.testId ?? "unknown",
|
|
@@ -859,7 +952,12 @@ function buildResultIndexArtifact(result, taskBundle) {
|
|
|
859
952
|
conversation_id: result.conversationId,
|
|
860
953
|
score: result.score,
|
|
861
954
|
target: result.target ?? "unknown",
|
|
862
|
-
|
|
955
|
+
token_usage: result.tokenUsage,
|
|
956
|
+
cost_usd: result.costUsd,
|
|
957
|
+
duration_ms: result.durationMs,
|
|
958
|
+
start_time: result.startTime,
|
|
959
|
+
end_time: result.endTime,
|
|
960
|
+
scores: toIndexScores(result.scores),
|
|
863
961
|
execution_status: result.executionStatus,
|
|
864
962
|
error: result.error,
|
|
865
963
|
failure_stage: result.failureStage,
|
|
@@ -869,8 +967,10 @@ function buildResultIndexArtifact(result, taskBundle) {
|
|
|
869
967
|
grading_path: path3.posix.join(artifactSubdir, "grading.json"),
|
|
870
968
|
timing_path: path3.posix.join(artifactSubdir, "timing.json"),
|
|
871
969
|
input_path: input ? path3.posix.join(artifactSubdir, "input.md") : void 0,
|
|
872
|
-
output_path:
|
|
873
|
-
|
|
970
|
+
output_path: hasAnswer ? path3.posix.join(artifactSubdir, "outputs", "answer.md") : void 0,
|
|
971
|
+
answer_path: hasAnswer ? path3.posix.join(artifactSubdir, "outputs", "answer.md") : void 0,
|
|
972
|
+
transcript_path: hasTranscript ? path3.posix.join(artifactSubdir, "outputs", "transcript.jsonl") : void 0,
|
|
973
|
+
response_path: hasAnswer ? path3.posix.join(artifactSubdir, "outputs", "response.md") : void 0,
|
|
874
974
|
...taskBundle ? {
|
|
875
975
|
task_dir: path3.posix.join(artifactSubdir, "task"),
|
|
876
976
|
eval_path: path3.posix.join(artifactSubdir, "task", "EVAL.yaml"),
|
|
@@ -878,14 +978,23 @@ function buildResultIndexArtifact(result, taskBundle) {
|
|
|
878
978
|
...taskBundle.filesPath ? { files_path: path3.posix.join(artifactSubdir, "task", "files") } : {},
|
|
879
979
|
...taskBundle.gradersPath ? { graders_path: path3.posix.join(artifactSubdir, "task", "graders") } : {}
|
|
880
980
|
} : {},
|
|
881
|
-
metadata: result.metadata
|
|
981
|
+
metadata: toIndexMetadata(result.metadata)
|
|
882
982
|
};
|
|
883
983
|
}
|
|
884
984
|
async function writeJsonlFile(filePath, records) {
|
|
885
|
-
const content = records.length === 0 ? "" : `${records.map((record) => JSON.stringify(
|
|
985
|
+
const content = records.length === 0 ? "" : `${records.map((record) => JSON.stringify(record)).join("\n")}
|
|
886
986
|
`;
|
|
887
987
|
await writeFile2(filePath, content, "utf8");
|
|
888
988
|
}
|
|
989
|
+
async function writeTranscriptJsonl(filePath, result) {
|
|
990
|
+
const lines = traceToTranscriptJsonLines(result.trace, {
|
|
991
|
+
testId: result.testId,
|
|
992
|
+
target: result.target
|
|
993
|
+
});
|
|
994
|
+
const content = lines.length > 0 ? `${lines.map((line) => JSON.stringify(line)).join("\n")}
|
|
995
|
+
` : "";
|
|
996
|
+
await writeFile2(filePath, content, "utf8");
|
|
997
|
+
}
|
|
889
998
|
function isRecord2(value) {
|
|
890
999
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
891
1000
|
}
|
|
@@ -977,11 +1086,28 @@ function isOutputMessage(value) {
|
|
|
977
1086
|
function isExecutionStatus(value) {
|
|
978
1087
|
return typeof value === "string" && EXECUTION_STATUSES.has(value);
|
|
979
1088
|
}
|
|
1089
|
+
function isTraceRecord(value) {
|
|
1090
|
+
return !!value && typeof value === "object" && !Array.isArray(value) && Array.isArray(value.messages) && Array.isArray(value.events);
|
|
1091
|
+
}
|
|
980
1092
|
function normalizeParsedResult(value) {
|
|
981
1093
|
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
982
1094
|
return void 0;
|
|
983
1095
|
}
|
|
984
1096
|
const result = value;
|
|
1097
|
+
const legacyOutputMessages = Array.isArray(result.output) ? result.output.filter(isOutputMessage) : void 0;
|
|
1098
|
+
const output = typeof result.output === "string" ? result.output : extractLastAssistantContent(legacyOutputMessages);
|
|
1099
|
+
const legacySummary = result.trace && typeof result.trace === "object" && !Array.isArray(result.trace) ? result.trace : void 0;
|
|
1100
|
+
const trace = isTraceRecord(result.trace) ? result.trace : buildTraceFromMessages({
|
|
1101
|
+
input: Array.isArray(result.input) ? result.input : [],
|
|
1102
|
+
output: legacyOutputMessages,
|
|
1103
|
+
summary: legacySummary,
|
|
1104
|
+
finalOutput: output,
|
|
1105
|
+
tokenUsage: result.tokenUsage,
|
|
1106
|
+
costUsd: typeof result.costUsd === "number" ? result.costUsd : void 0,
|
|
1107
|
+
durationMs: typeof result.durationMs === "number" ? result.durationMs : void 0,
|
|
1108
|
+
target: typeof result.target === "string" ? result.target : void 0,
|
|
1109
|
+
testId: typeof result.testId === "string" ? result.testId : void 0
|
|
1110
|
+
});
|
|
985
1111
|
return {
|
|
986
1112
|
...result,
|
|
987
1113
|
timestamp: typeof result.timestamp === "string" ? result.timestamp : (/* @__PURE__ */ new Date(0)).toISOString(),
|
|
@@ -989,7 +1115,8 @@ function normalizeParsedResult(value) {
|
|
|
989
1115
|
score: typeof result.score === "number" ? result.score : 0,
|
|
990
1116
|
assertions: Array.isArray(result.assertions) ? result.assertions.filter(isAssertionEntry) : [],
|
|
991
1117
|
target: typeof result.target === "string" ? result.target : "unknown",
|
|
992
|
-
output
|
|
1118
|
+
output,
|
|
1119
|
+
trace,
|
|
993
1120
|
executionStatus: isExecutionStatus(result.executionStatus) ? result.executionStatus : "ok"
|
|
994
1121
|
};
|
|
995
1122
|
}
|
|
@@ -1021,23 +1148,10 @@ async function writeArtifacts(jsonlPath, outputDir, options) {
|
|
|
1021
1148
|
function buildTranscriptMessageLines(results) {
|
|
1022
1149
|
const lines = [];
|
|
1023
1150
|
for (const result of results) {
|
|
1024
|
-
const transcriptLines =
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
provider: result.target,
|
|
1029
|
-
sessionId: result.conversationId ?? result.testId,
|
|
1030
|
-
startedAt: result.timestamp
|
|
1031
|
-
},
|
|
1032
|
-
tokenUsage: result.tokenUsage,
|
|
1033
|
-
durationMs: result.durationMs,
|
|
1034
|
-
costUsd: result.costUsd
|
|
1035
|
-
},
|
|
1036
|
-
{
|
|
1037
|
-
testId: result.testId,
|
|
1038
|
-
target: result.target
|
|
1039
|
-
}
|
|
1040
|
-
);
|
|
1151
|
+
const transcriptLines = traceToTranscriptJsonLines(result.trace, {
|
|
1152
|
+
testId: result.testId,
|
|
1153
|
+
target: result.target
|
|
1154
|
+
});
|
|
1041
1155
|
lines.push(...transcriptLines.map((line) => JSON.stringify(line)));
|
|
1042
1156
|
}
|
|
1043
1157
|
return lines.length > 0 ? `${lines.join("\n")}
|
|
@@ -1115,15 +1229,22 @@ async function writePerTestArtifacts(results, outputDir, options) {
|
|
|
1115
1229
|
if (input) {
|
|
1116
1230
|
await writeFile2(path3.join(testDir, "input.md"), input, "utf8");
|
|
1117
1231
|
}
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
await writeFile2(
|
|
1122
|
-
|
|
1123
|
-
formatOutputMarkdown(result.output),
|
|
1124
|
-
"utf8"
|
|
1125
|
-
);
|
|
1232
|
+
const outputsDir = path3.join(testDir, "outputs");
|
|
1233
|
+
await mkdir2(outputsDir, { recursive: true });
|
|
1234
|
+
if (result.output.length > 0) {
|
|
1235
|
+
await writeFile2(path3.join(outputsDir, "answer.md"), result.output, "utf8");
|
|
1236
|
+
await writeFile2(path3.join(outputsDir, "response.md"), result.output, "utf8");
|
|
1126
1237
|
}
|
|
1238
|
+
if (result.output.length > 0 || result.trace.messages.length > 0) {
|
|
1239
|
+
await writeTranscriptJsonl(path3.join(outputsDir, "transcript.jsonl"), result);
|
|
1240
|
+
}
|
|
1241
|
+
await writeTraceEnvelopeSidecar({
|
|
1242
|
+
result,
|
|
1243
|
+
outputDir,
|
|
1244
|
+
outputsDir,
|
|
1245
|
+
evalPath: resolveEnvelopeEvalPath(result, testByTestId),
|
|
1246
|
+
experiment: options?.experiment
|
|
1247
|
+
});
|
|
1127
1248
|
const taskBundle = await materializeTaskBundleForResult({
|
|
1128
1249
|
result,
|
|
1129
1250
|
testDir,
|
|
@@ -1164,15 +1285,22 @@ async function writeArtifactsFromResults(results, outputDir, options) {
|
|
|
1164
1285
|
if (input) {
|
|
1165
1286
|
await writeFile2(path3.join(testDir, "input.md"), input, "utf8");
|
|
1166
1287
|
}
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
await writeFile2(
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
);
|
|
1288
|
+
const outputsDir = path3.join(testDir, "outputs");
|
|
1289
|
+
await mkdir2(outputsDir, { recursive: true });
|
|
1290
|
+
if (result.output.length > 0) {
|
|
1291
|
+
await writeFile2(path3.join(outputsDir, "answer.md"), result.output, "utf8");
|
|
1292
|
+
await writeFile2(path3.join(outputsDir, "response.md"), result.output, "utf8");
|
|
1293
|
+
}
|
|
1294
|
+
if (result.output.length > 0 || result.trace.messages.length > 0) {
|
|
1295
|
+
await writeTranscriptJsonl(path3.join(outputsDir, "transcript.jsonl"), result);
|
|
1175
1296
|
}
|
|
1297
|
+
await writeTraceEnvelopeSidecar({
|
|
1298
|
+
result,
|
|
1299
|
+
outputDir,
|
|
1300
|
+
outputsDir,
|
|
1301
|
+
evalPath: resolveEnvelopeEvalPath(result, testByTestId, options?.evalFile),
|
|
1302
|
+
experiment: options?.experiment
|
|
1303
|
+
});
|
|
1176
1304
|
const taskBundle = await materializeTaskBundleForResult({
|
|
1177
1305
|
result,
|
|
1178
1306
|
testDir,
|
|
@@ -1231,4 +1359,4 @@ export {
|
|
|
1231
1359
|
writePerTestArtifacts,
|
|
1232
1360
|
writeArtifactsFromResults
|
|
1233
1361
|
};
|
|
1234
|
-
//# sourceMappingURL=chunk-
|
|
1362
|
+
//# sourceMappingURL=chunk-N6E5XFOM.js.map
|