agentv 4.38.1 → 4.39.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-MK5X5MSO.js → artifact-writer-VPRAQSQM.js} +14 -11
- package/dist/{chunk-OIN3MVOD.js → chunk-4NAWRNBL.js} +67 -68
- package/dist/chunk-4NAWRNBL.js.map +1 -0
- package/dist/{chunk-6M5S4IJW.js → chunk-5JWECTVJ.js} +586 -183
- package/dist/chunk-5JWECTVJ.js.map +1 -0
- package/dist/chunk-5VQPWWUI.js +494 -0
- package/dist/chunk-5VQPWWUI.js.map +1 -0
- package/dist/{chunk-QOBQ5XYF.js → chunk-76FOHROU.js} +16 -4
- package/dist/chunk-76FOHROU.js.map +1 -0
- package/dist/{chunk-VBHHZQS6.js → chunk-DR2ZHSBE.js} +1827 -332
- package/dist/chunk-DR2ZHSBE.js.map +1 -0
- package/dist/{chunk-NLTIK3LV.js → chunk-RLMXZDDC.js} +499 -347
- package/dist/chunk-RLMXZDDC.js.map +1 -0
- package/dist/cli.js +6 -6
- package/dist/dashboard/assets/index-BnYCCJ7O.css +1 -0
- package/dist/dashboard/assets/index-DaueD7GO.js +118 -0
- package/dist/dashboard/assets/{index-SIl6NbIJ.js → index-_jpKSzIf.js} +1 -1
- package/dist/dashboard/index.html +2 -2
- package/dist/{dist-HVLBDG5F.js → dist-QAMAJMAH.js} +54 -16
- package/dist/index.js +6 -6
- package/dist/{interactive-45LPG2YJ.js → interactive-V2GW7A25.js} +6 -6
- package/dist/{otlp-json-file-exporter-RJFPCKVK-T6N4OGWG.js → otlp-json-file-exporter-RY63S3IG-PZBQPVYY.js} +2 -2
- package/dist/skills/agentv-eval-writer/SKILL.md +15 -10
- package/dist/skills/agentv-eval-writer/references/custom-evaluators.md +23 -15
- package/dist/{ts-eval-loader-TJT6BGFF-DI7XNSO4.js → ts-eval-loader-MQFJ5AEM-6V7TAKWK.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-6M5S4IJW.js.map +0 -1
- package/dist/chunk-DKUAETXE.js +0 -1362
- package/dist/chunk-DKUAETXE.js.map +0 -1
- package/dist/chunk-NLTIK3LV.js.map +0 -1
- package/dist/chunk-OIN3MVOD.js.map +0 -1
- package/dist/chunk-QOBQ5XYF.js.map +0 -1
- package/dist/chunk-VBHHZQS6.js.map +0 -1
- package/dist/dashboard/assets/index-BpnllKET.css +0 -1
- package/dist/dashboard/assets/index-Cm9SUopp.js +0 -118
- /package/dist/{artifact-writer-MK5X5MSO.js.map → artifact-writer-VPRAQSQM.js.map} +0 -0
- /package/dist/{dist-HVLBDG5F.js.map → dist-QAMAJMAH.js.map} +0 -0
- /package/dist/{interactive-45LPG2YJ.js.map → interactive-V2GW7A25.js.map} +0 -0
- /package/dist/{otlp-json-file-exporter-RJFPCKVK-T6N4OGWG.js.map → otlp-json-file-exporter-RY63S3IG-PZBQPVYY.js.map} +0 -0
- /package/dist/{ts-eval-loader-TJT6BGFF-DI7XNSO4.js.map → ts-eval-loader-MQFJ5AEM-6V7TAKWK.js.map} +0 -0
|
@@ -1,28 +1,31 @@
|
|
|
1
1
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
2
2
|
import {
|
|
3
|
+
buildIndexArtifactEntry,
|
|
4
|
+
buildResultIndexArtifact,
|
|
5
|
+
writeArtifactsFromResults,
|
|
6
|
+
writePerTestArtifacts
|
|
7
|
+
} from "./chunk-5VQPWWUI.js";
|
|
8
|
+
import "./chunk-RLMXZDDC.js";
|
|
9
|
+
import "./chunk-76FOHROU.js";
|
|
10
|
+
import "./chunk-BPGJ4HBU.js";
|
|
11
|
+
import {
|
|
12
|
+
RESULT_INDEX_FILENAME,
|
|
3
13
|
aggregateRunDir,
|
|
4
14
|
buildAggregateGradingArtifact,
|
|
5
15
|
buildBenchmarkArtifact,
|
|
6
16
|
buildGradingArtifact,
|
|
7
|
-
buildIndexArtifactEntry,
|
|
8
|
-
buildResultIndexArtifact,
|
|
9
17
|
buildTestTargetKey,
|
|
10
18
|
buildTimingArtifact,
|
|
11
19
|
deduplicateByTestIdTarget,
|
|
12
20
|
parseJsonlResults,
|
|
13
21
|
writeArtifacts,
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
writePerTestArtifacts
|
|
17
|
-
} from "./chunk-DKUAETXE.js";
|
|
18
|
-
import "./chunk-NLTIK3LV.js";
|
|
19
|
-
import "./chunk-QOBQ5XYF.js";
|
|
20
|
-
import "./chunk-BPGJ4HBU.js";
|
|
21
|
-
import "./chunk-VBHHZQS6.js";
|
|
22
|
+
writeInitialBenchmarkArtifact
|
|
23
|
+
} from "./chunk-DR2ZHSBE.js";
|
|
22
24
|
import "./chunk-NPVGBFF6.js";
|
|
23
25
|
import "./chunk-M7BUKBAF.js";
|
|
24
26
|
import "./chunk-5H446C7X.js";
|
|
25
27
|
export {
|
|
28
|
+
RESULT_INDEX_FILENAME,
|
|
26
29
|
aggregateRunDir,
|
|
27
30
|
buildAggregateGradingArtifact,
|
|
28
31
|
buildBenchmarkArtifact,
|
|
@@ -38,4 +41,4 @@ export {
|
|
|
38
41
|
writeInitialBenchmarkArtifact,
|
|
39
42
|
writePerTestArtifacts
|
|
40
43
|
};
|
|
41
|
-
//# sourceMappingURL=artifact-writer-
|
|
44
|
+
//# sourceMappingURL=artifact-writer-VPRAQSQM.js.map
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
2
2
|
import {
|
|
3
3
|
Mutex,
|
|
4
|
+
RESULT_INDEX_FILENAME,
|
|
4
5
|
TARGET_FILE_CANDIDATES,
|
|
6
|
+
buildDefaultRunDir,
|
|
5
7
|
c,
|
|
6
8
|
clearRemoteRunTags,
|
|
9
|
+
createRunDirName,
|
|
7
10
|
deleteRunTags,
|
|
8
11
|
detectFileType,
|
|
9
12
|
discoverEvalFiles,
|
|
@@ -36,6 +39,7 @@ import {
|
|
|
36
39
|
resolveEvalPaths,
|
|
37
40
|
resolveResultSourcePath,
|
|
38
41
|
resolveRunCacheFile,
|
|
42
|
+
resolveRunManifestPath,
|
|
39
43
|
runEvalCommand,
|
|
40
44
|
selectTarget,
|
|
41
45
|
setRemoteRunTags,
|
|
@@ -48,19 +52,11 @@ import {
|
|
|
48
52
|
validateTargetsFile,
|
|
49
53
|
validateWorkspacePaths,
|
|
50
54
|
writeRunTags
|
|
51
|
-
} from "./chunk-
|
|
55
|
+
} from "./chunk-5JWECTVJ.js";
|
|
52
56
|
import {
|
|
53
|
-
RESULT_INDEX_FILENAME,
|
|
54
|
-
aggregateRunDir,
|
|
55
|
-
buildBenchmarkArtifact,
|
|
56
|
-
buildDefaultRunDir,
|
|
57
|
-
buildTestTargetKey,
|
|
58
|
-
buildTimingArtifact,
|
|
59
|
-
createRunDirName,
|
|
60
|
-
resolveRunManifestPath,
|
|
61
57
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
62
58
|
writeArtifactsFromResults
|
|
63
|
-
} from "./chunk-
|
|
59
|
+
} from "./chunk-5VQPWWUI.js";
|
|
64
60
|
import {
|
|
65
61
|
DEFAULT_CATEGORY,
|
|
66
62
|
deriveCategory,
|
|
@@ -69,17 +65,19 @@ import {
|
|
|
69
65
|
getOutputFilenames,
|
|
70
66
|
parseClaudeSession,
|
|
71
67
|
parseCodexSession,
|
|
72
|
-
readTranscriptFile,
|
|
73
68
|
runBeforeSessionHook,
|
|
74
69
|
scanRepoDeps,
|
|
75
70
|
syncProjects,
|
|
76
|
-
toTranscriptJsonLines,
|
|
77
71
|
transpileEvalYamlFile,
|
|
78
72
|
trimBaselineResult
|
|
79
|
-
} from "./chunk-
|
|
73
|
+
} from "./chunk-RLMXZDDC.js";
|
|
80
74
|
import {
|
|
81
75
|
DEFAULT_THRESHOLD,
|
|
82
76
|
addProject,
|
|
77
|
+
aggregateRunDir,
|
|
78
|
+
buildBenchmarkArtifact,
|
|
79
|
+
buildTestTargetKey,
|
|
80
|
+
buildTimingArtifact,
|
|
83
81
|
buildTraceFromMessages,
|
|
84
82
|
createBuiltinRegistry,
|
|
85
83
|
discoverCopilotSessions,
|
|
@@ -93,10 +91,12 @@ import {
|
|
|
93
91
|
loadProjectRegistry,
|
|
94
92
|
loadTestSuite,
|
|
95
93
|
normalizeLineEndings,
|
|
94
|
+
normalizeResultRow,
|
|
96
95
|
parseAgentSkillsEvals,
|
|
97
96
|
parseCopilotEvents,
|
|
98
97
|
parseYamlValue,
|
|
99
98
|
readTargetDefinitions,
|
|
99
|
+
readTranscriptFile,
|
|
100
100
|
removeProject,
|
|
101
101
|
runContainsAllAssertion,
|
|
102
102
|
runContainsAnyAssertion,
|
|
@@ -111,8 +111,9 @@ import {
|
|
|
111
111
|
runStartsWithAssertion,
|
|
112
112
|
toCamelCaseDeep,
|
|
113
113
|
toSnakeCaseDeep,
|
|
114
|
+
toTranscriptJsonLines,
|
|
114
115
|
touchProject
|
|
115
|
-
} from "./chunk-
|
|
116
|
+
} from "./chunk-DR2ZHSBE.js";
|
|
116
117
|
import {
|
|
117
118
|
__commonJS,
|
|
118
119
|
__require,
|
|
@@ -3955,53 +3956,27 @@ var ASSERTION_TEMPLATES = {
|
|
|
3955
3956
|
default: `#!/usr/bin/env bun
|
|
3956
3957
|
import { defineAssertion } from '@agentv/eval';
|
|
3957
3958
|
|
|
3958
|
-
/** Extract text from the last message with the given role. */
|
|
3959
|
-
function getMessageText(messages: Array<{ role: string; content?: unknown }>, role = 'assistant'): string {
|
|
3960
|
-
for (let i = messages.length - 1; i >= 0; i--) {
|
|
3961
|
-
const msg = messages[i];
|
|
3962
|
-
if (msg.role !== role) continue;
|
|
3963
|
-
if (typeof msg.content === 'string') return msg.content;
|
|
3964
|
-
if (Array.isArray(msg.content)) {
|
|
3965
|
-
return msg.content.filter((b: any) => b.type === 'text').map((b: any) => b.text).join('\\n');
|
|
3966
|
-
}
|
|
3967
|
-
}
|
|
3968
|
-
return '';
|
|
3969
|
-
}
|
|
3970
|
-
|
|
3971
3959
|
export default defineAssertion(({ output }) => {
|
|
3972
3960
|
// TODO: Implement your assertion logic
|
|
3973
|
-
const text =
|
|
3961
|
+
const text = output ?? '';
|
|
3974
3962
|
const pass = text.length > 0;
|
|
3975
3963
|
return {
|
|
3976
3964
|
pass,
|
|
3977
|
-
|
|
3965
|
+
assertions: [{ text: pass ? 'Output has content' : 'Output is empty', passed: pass }],
|
|
3978
3966
|
};
|
|
3979
3967
|
});
|
|
3980
3968
|
`,
|
|
3981
3969
|
score: `#!/usr/bin/env bun
|
|
3982
3970
|
import { defineAssertion } from '@agentv/eval';
|
|
3983
3971
|
|
|
3984
|
-
/** Extract text from the last message with the given role. */
|
|
3985
|
-
function getMessageText(messages: Array<{ role: string; content?: unknown }>, role = 'assistant'): string {
|
|
3986
|
-
for (let i = messages.length - 1; i >= 0; i--) {
|
|
3987
|
-
const msg = messages[i];
|
|
3988
|
-
if (msg.role !== role) continue;
|
|
3989
|
-
if (typeof msg.content === 'string') return msg.content;
|
|
3990
|
-
if (Array.isArray(msg.content)) {
|
|
3991
|
-
return msg.content.filter((b: any) => b.type === 'text').map((b: any) => b.text).join('\\n');
|
|
3992
|
-
}
|
|
3993
|
-
}
|
|
3994
|
-
return '';
|
|
3995
|
-
}
|
|
3996
|
-
|
|
3997
3972
|
export default defineAssertion(({ output }) => {
|
|
3998
3973
|
// TODO: Implement your scoring logic (0.0 to 1.0)
|
|
3999
|
-
const text =
|
|
3974
|
+
const text = output ?? '';
|
|
4000
3975
|
const score = text.length > 0 ? 1.0 : 0.0;
|
|
4001
3976
|
return {
|
|
4002
3977
|
pass: score >= 0.5,
|
|
4003
3978
|
score,
|
|
4004
|
-
|
|
3979
|
+
assertions: [{ text: 'Output has content', passed: score === 1.0 }],
|
|
4005
3980
|
};
|
|
4006
3981
|
});
|
|
4007
3982
|
`
|
|
@@ -4337,7 +4312,6 @@ var evalAssertCommand = command({
|
|
|
4337
4312
|
question: resolvedInput,
|
|
4338
4313
|
criteria: "",
|
|
4339
4314
|
expected_output: [],
|
|
4340
|
-
reference_answer: "",
|
|
4341
4315
|
input_files: [],
|
|
4342
4316
|
trace,
|
|
4343
4317
|
token_usage: null,
|
|
@@ -4348,11 +4322,7 @@ var evalAssertCommand = command({
|
|
|
4348
4322
|
file_changes: null,
|
|
4349
4323
|
workspace_path: null,
|
|
4350
4324
|
config: null,
|
|
4351
|
-
metadata: {}
|
|
4352
|
-
// Text convenience accessors (new names)
|
|
4353
|
-
input_text: resolvedInput,
|
|
4354
|
-
output_text: resolvedOutput,
|
|
4355
|
-
expected_output_text: ""
|
|
4325
|
+
metadata: {}
|
|
4356
4326
|
},
|
|
4357
4327
|
null,
|
|
4358
4328
|
2
|
|
@@ -4440,6 +4410,33 @@ var evalRunCommand = command({
|
|
|
4440
4410
|
long: "experiment",
|
|
4441
4411
|
description: "Experiment label for canonical run output (default: default)"
|
|
4442
4412
|
}),
|
|
4413
|
+
resultsRepo: option({
|
|
4414
|
+
type: optional(string),
|
|
4415
|
+
long: "results-repo",
|
|
4416
|
+
description: "Results Git repo override: current/. for the source repo, a local path, Git URL, or owner/repo"
|
|
4417
|
+
}),
|
|
4418
|
+
resultsBranch: option({
|
|
4419
|
+
type: optional(string),
|
|
4420
|
+
long: "results-branch",
|
|
4421
|
+
description: "Results storage branch (default: agentv/results/v1)"
|
|
4422
|
+
}),
|
|
4423
|
+
resultsRemote: option({
|
|
4424
|
+
type: optional(string),
|
|
4425
|
+
long: "results-remote",
|
|
4426
|
+
description: "Git remote name for results push/fetch (default: origin)"
|
|
4427
|
+
}),
|
|
4428
|
+
resultsPush: flag({
|
|
4429
|
+
long: "results-push",
|
|
4430
|
+
description: "Push the results branch after publishing the completed local run"
|
|
4431
|
+
}),
|
|
4432
|
+
noResultsPush: flag({
|
|
4433
|
+
long: "no-results-push",
|
|
4434
|
+
description: "Publish to the local results branch without pushing to the remote"
|
|
4435
|
+
}),
|
|
4436
|
+
resultsRequirePush: flag({
|
|
4437
|
+
long: "results-require-push",
|
|
4438
|
+
description: "Fail the eval command if the completed results branch cannot be pushed"
|
|
4439
|
+
}),
|
|
4443
4440
|
dryRun: flag({
|
|
4444
4441
|
long: "dry-run",
|
|
4445
4442
|
description: "Use mock provider responses instead of real LLM calls"
|
|
@@ -4515,7 +4512,7 @@ var evalRunCommand = command({
|
|
|
4515
4512
|
otelBackend: option({
|
|
4516
4513
|
type: optional(string),
|
|
4517
4514
|
long: "otel-backend",
|
|
4518
|
-
description: "Use
|
|
4515
|
+
description: "Use an OTel backend resolver (langfuse, braintrust, confident, or local)"
|
|
4519
4516
|
}),
|
|
4520
4517
|
otelCaptureContent: flag({
|
|
4521
4518
|
long: "otel-capture-content",
|
|
@@ -4600,7 +4597,7 @@ var evalRunCommand = command({
|
|
|
4600
4597
|
},
|
|
4601
4598
|
handler: async (args) => {
|
|
4602
4599
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4603
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4600
|
+
const { launchInteractiveWizard } = await import("./interactive-V2GW7A25.js");
|
|
4604
4601
|
await launchInteractiveWizard();
|
|
4605
4602
|
return;
|
|
4606
4603
|
}
|
|
@@ -4609,6 +4606,10 @@ var evalRunCommand = command({
|
|
|
4609
4606
|
console.error("Error: --budget-usd must be a positive number.");
|
|
4610
4607
|
process.exit(2);
|
|
4611
4608
|
}
|
|
4609
|
+
if (args.resultsPush && args.noResultsPush) {
|
|
4610
|
+
console.error("Error: --results-push and --no-results-push cannot be used together.");
|
|
4611
|
+
process.exit(2);
|
|
4612
|
+
}
|
|
4612
4613
|
const rawOptions = {
|
|
4613
4614
|
target: args.target,
|
|
4614
4615
|
targets: args.targets,
|
|
@@ -4618,6 +4619,12 @@ var evalRunCommand = command({
|
|
|
4618
4619
|
output: args.output,
|
|
4619
4620
|
outputFormat: args.outputFormat,
|
|
4620
4621
|
experiment: args.experiment,
|
|
4622
|
+
resultsRepo: args.resultsRepo,
|
|
4623
|
+
resultsBranch: args.resultsBranch,
|
|
4624
|
+
resultsRemote: args.resultsRemote,
|
|
4625
|
+
resultsPush: args.resultsPush,
|
|
4626
|
+
noResultsPush: args.noResultsPush,
|
|
4627
|
+
resultsRequirePush: args.resultsRequirePush,
|
|
4621
4628
|
dryRun: args.dryRun,
|
|
4622
4629
|
dryRunDelay: args.dryRunDelay,
|
|
4623
4630
|
dryRunDelayMin: args.dryRunDelayMin,
|
|
@@ -6353,13 +6360,15 @@ function parseFilterableRecords(filePath) {
|
|
|
6353
6360
|
}
|
|
6354
6361
|
const lines = content.split("\n").filter((line) => line.trim());
|
|
6355
6362
|
const records = [];
|
|
6356
|
-
for (
|
|
6363
|
+
for (let i = 0; i < lines.length; i++) {
|
|
6364
|
+
const line = lines[i];
|
|
6357
6365
|
let raw;
|
|
6358
6366
|
try {
|
|
6359
6367
|
raw = JSON.parse(line);
|
|
6360
6368
|
} catch {
|
|
6361
6369
|
continue;
|
|
6362
6370
|
}
|
|
6371
|
+
raw = normalizeResultRow(raw, { lineNumber: i + 1, sourceLabel: filePath });
|
|
6363
6372
|
let experiment = typeof raw.experiment === "string" ? raw.experiment : void 0;
|
|
6364
6373
|
if (!experiment) {
|
|
6365
6374
|
const parts = filePath.split(path14.sep);
|
|
@@ -6955,7 +6964,7 @@ function searchJsonlFile(filePath, regex2, targetFilter, experimentFilter) {
|
|
|
6955
6964
|
const target = typeof record.target === "string" ? record.target : void 0;
|
|
6956
6965
|
const experiment = typeof record.experiment === "string" ? record.experiment : void 0;
|
|
6957
6966
|
const score = typeof record.score === "number" ? record.score : void 0;
|
|
6958
|
-
const testId = typeof record.test_id === "string" ? record.test_id : typeof record.source === "object" && record.source !== null ? record.source.session_id : void 0;
|
|
6967
|
+
const testId = typeof record.test_id === "string" ? record.test_id : typeof record.testId === "string" ? record.testId : typeof record.source === "object" && record.source !== null ? record.source.session_id : void 0;
|
|
6959
6968
|
if (targetFilter && target !== targetFilter) continue;
|
|
6960
6969
|
if (experimentFilter && experiment !== experimentFilter) continue;
|
|
6961
6970
|
const match = regex2.exec(line);
|
|
@@ -7748,12 +7757,6 @@ function computeStats(values) {
|
|
|
7748
7757
|
import { mkdir as mkdir7, readFile as readFile4, readdir as readdir2, writeFile as writeFile8 } from "node:fs/promises";
|
|
7749
7758
|
import { join as join2 } from "node:path";
|
|
7750
7759
|
var DEFAULT_CONCURRENCY = 10;
|
|
7751
|
-
function extractInputText(input) {
|
|
7752
|
-
if (!input || input.length === 0) return "";
|
|
7753
|
-
if (input.length === 1) return input[0].content;
|
|
7754
|
-
return input.map((m) => `@[${m.role}]:
|
|
7755
|
-
${m.content}`).join("\n\n");
|
|
7756
|
-
}
|
|
7757
7760
|
async function runCodeGraders(tasks, concurrency) {
|
|
7758
7761
|
let totalGraders = 0;
|
|
7759
7762
|
let totalPassed = 0;
|
|
@@ -7782,7 +7785,6 @@ async function runCodeGraders(tasks, concurrency) {
|
|
|
7782
7785
|
const executeCodeGrader = async (graderConfig, task) => {
|
|
7783
7786
|
const { testId, resultsDir, responseText, inputData } = task;
|
|
7784
7787
|
const graderName = graderConfig.name;
|
|
7785
|
-
const inputText = extractInputText(inputData.input);
|
|
7786
7788
|
const messages = [{ role: "assistant", content: responseText }];
|
|
7787
7789
|
const trace = buildTraceFromMessages({
|
|
7788
7790
|
input: inputData.input,
|
|
@@ -7807,10 +7809,7 @@ async function runCodeGraders(tasks, concurrency) {
|
|
|
7807
7809
|
file_changes: null,
|
|
7808
7810
|
workspace_path: null,
|
|
7809
7811
|
config: graderConfig.config ?? null,
|
|
7810
|
-
metadata: inputData.metadata ?? {}
|
|
7811
|
-
input_text: inputText,
|
|
7812
|
-
output_text: responseText,
|
|
7813
|
-
expected_output_text: ""
|
|
7812
|
+
metadata: inputData.metadata ?? {}
|
|
7814
7813
|
});
|
|
7815
7814
|
try {
|
|
7816
7815
|
const stdout = await executeScript(
|
|
@@ -8215,7 +8214,7 @@ import { existsSync as existsSync7, readFileSync as readFileSync6, unlinkSync }
|
|
|
8215
8214
|
import { mkdir as mkdir9, readFile as readFile6, readdir as readdir3, writeFile as writeFile10 } from "node:fs/promises";
|
|
8216
8215
|
import { tmpdir } from "node:os";
|
|
8217
8216
|
import { dirname as dirname2, join as join4, relative as relative2, resolve as resolve2 } from "node:path";
|
|
8218
|
-
function
|
|
8217
|
+
function extractInputText(input) {
|
|
8219
8218
|
if (!input || input.length === 0) return "";
|
|
8220
8219
|
if (input.length === 1) return input[0].content;
|
|
8221
8220
|
return input.map((m) => `@[${m.role}]:
|
|
@@ -8399,7 +8398,7 @@ var evalRunCommand2 = command({
|
|
|
8399
8398
|
const timeoutMs = invoke.timeout_ms ?? 12e4;
|
|
8400
8399
|
const promptFile = join4(tmpdir(), `agentv-prompt-${testId}-${Date.now()}.txt`);
|
|
8401
8400
|
const outputFile = join4(tmpdir(), `agentv-output-${testId}-${Date.now()}.txt`);
|
|
8402
|
-
const inputText =
|
|
8401
|
+
const inputText = extractInputText(inputData.input);
|
|
8403
8402
|
await writeFile10(promptFile, inputText, "utf8");
|
|
8404
8403
|
let rendered = template;
|
|
8405
8404
|
rendered = rendered.replace("{PROMPT_FILE}", promptFile);
|
|
@@ -15632,4 +15631,4 @@ export {
|
|
|
15632
15631
|
preprocessArgv,
|
|
15633
15632
|
runCli
|
|
15634
15633
|
};
|
|
15635
|
-
//# sourceMappingURL=chunk-
|
|
15634
|
+
//# sourceMappingURL=chunk-4NAWRNBL.js.map
|