agentv 3.5.0 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -3
- package/dist/{chunk-RLL4QGNL.js → chunk-7YS6YNJZ.js} +5 -5
- package/dist/chunk-7YS6YNJZ.js.map +1 -0
- package/dist/{chunk-5GG6DDP5.js → chunk-TR6H437M.js} +14 -16
- package/dist/chunk-TR6H437M.js.map +1 -0
- package/dist/{chunk-D6G4N2H2.js → chunk-XGG64VIY.js} +80 -69
- package/dist/chunk-XGG64VIY.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-MZFXE6B5.js → dist-VP6AXX6B.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-J7SUWZH2.js → interactive-F6XECJ33.js} +3 -3
- package/dist/templates/.agentv/.env.example +9 -11
- package/dist/templates/.agentv/config.yaml +0 -5
- package/dist/templates/.agentv/targets.yaml +16 -0
- package/package.json +1 -1
- package/dist/chunk-5GG6DDP5.js.map +0 -1
- package/dist/chunk-D6G4N2H2.js.map +0 -1
- package/dist/chunk-RLL4QGNL.js.map +0 -1
- /package/dist/{dist-MZFXE6B5.js.map → dist-VP6AXX6B.js.map} +0 -0
- /package/dist/{interactive-J7SUWZH2.js.map → interactive-F6XECJ33.js.map} +0 -0
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
validateEvalFile,
|
|
17
17
|
validateFileReferences,
|
|
18
18
|
validateTargetsFile
|
|
19
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-7YS6YNJZ.js";
|
|
20
20
|
import {
|
|
21
21
|
createBuiltinRegistry,
|
|
22
22
|
createProvider,
|
|
@@ -34,7 +34,7 @@ import {
|
|
|
34
34
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
35
35
|
transpileEvalYamlFile,
|
|
36
36
|
trimBaselineResult
|
|
37
|
-
} from "./chunk-
|
|
37
|
+
} from "./chunk-XGG64VIY.js";
|
|
38
38
|
import {
|
|
39
39
|
__commonJS,
|
|
40
40
|
__esm,
|
|
@@ -3493,9 +3493,9 @@ var ASSERTION_TEMPLATES = {
|
|
|
3493
3493
|
default: `#!/usr/bin/env bun
|
|
3494
3494
|
import { defineAssertion } from '@agentv/eval';
|
|
3495
3495
|
|
|
3496
|
-
export default defineAssertion(({
|
|
3496
|
+
export default defineAssertion(({ outputText }) => {
|
|
3497
3497
|
// TODO: Implement your assertion logic
|
|
3498
|
-
const pass =
|
|
3498
|
+
const pass = outputText.length > 0;
|
|
3499
3499
|
return {
|
|
3500
3500
|
pass,
|
|
3501
3501
|
reasoning: pass ? 'Output has content' : 'Output is empty',
|
|
@@ -3505,9 +3505,9 @@ export default defineAssertion(({ answer }) => {
|
|
|
3505
3505
|
score: `#!/usr/bin/env bun
|
|
3506
3506
|
import { defineAssertion } from '@agentv/eval';
|
|
3507
3507
|
|
|
3508
|
-
export default defineAssertion(({
|
|
3508
|
+
export default defineAssertion(({ outputText }) => {
|
|
3509
3509
|
// TODO: Implement your scoring logic (0.0 to 1.0)
|
|
3510
|
-
const score =
|
|
3510
|
+
const score = outputText.length > 0 ? 1.0 : 0.0;
|
|
3511
3511
|
return {
|
|
3512
3512
|
pass: score >= 0.5,
|
|
3513
3513
|
score,
|
|
@@ -3967,7 +3967,6 @@ var evalAssertCommand = command({
|
|
|
3967
3967
|
}
|
|
3968
3968
|
const payload = JSON.stringify(
|
|
3969
3969
|
{
|
|
3970
|
-
answer: resolvedOutput,
|
|
3971
3970
|
output: [{ role: "assistant", content: resolvedOutput }],
|
|
3972
3971
|
input: [{ role: "user", content: resolvedInput }],
|
|
3973
3972
|
question: resolvedInput,
|
|
@@ -4185,7 +4184,7 @@ var evalRunCommand = command({
|
|
|
4185
4184
|
},
|
|
4186
4185
|
handler: async (args) => {
|
|
4187
4186
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4188
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4187
|
+
const { launchInteractiveWizard } = await import("./interactive-F6XECJ33.js");
|
|
4189
4188
|
await launchInteractiveWizard();
|
|
4190
4189
|
return;
|
|
4191
4190
|
}
|
|
@@ -4707,10 +4706,10 @@ function exportResults(sourceFile, content, outputDir) {
|
|
|
4707
4706
|
const outputsDir = path8.join(outputDir, "outputs");
|
|
4708
4707
|
mkdirSync2(outputsDir, { recursive: true });
|
|
4709
4708
|
for (const result of patched) {
|
|
4710
|
-
const
|
|
4711
|
-
if (
|
|
4709
|
+
const outputText = result.outputText;
|
|
4710
|
+
if (outputText) {
|
|
4712
4711
|
const id = safeTestId(result);
|
|
4713
|
-
writeFileSync3(path8.join(outputsDir, `${id}.txt`),
|
|
4712
|
+
writeFileSync3(path8.join(outputsDir, `${id}.txt`), outputText);
|
|
4714
4713
|
}
|
|
4715
4714
|
}
|
|
4716
4715
|
}
|
|
@@ -5022,7 +5021,7 @@ function toTraceSummary(raw) {
|
|
|
5022
5021
|
return toCamelCaseDeep(raw.trace);
|
|
5023
5022
|
}
|
|
5024
5023
|
function extractCandidate(raw) {
|
|
5025
|
-
if (raw.
|
|
5024
|
+
if (raw.output_text !== void 0) return raw.output_text;
|
|
5026
5025
|
if (raw.output !== void 0)
|
|
5027
5026
|
return typeof raw.output === "string" ? raw.output : JSON.stringify(raw.output);
|
|
5028
5027
|
return "";
|
|
@@ -5221,9 +5220,8 @@ var traceScoreCommand = command({
|
|
|
5221
5220
|
function renderFlatTrace(result) {
|
|
5222
5221
|
const trace = result.trace;
|
|
5223
5222
|
const parts = [];
|
|
5224
|
-
if (trace?.
|
|
5225
|
-
const toolParts = trace.
|
|
5226
|
-
const count = trace.tool_calls_by_name?.[name] ?? 0;
|
|
5223
|
+
if (trace?.tool_calls && Object.keys(trace.tool_calls).length > 0) {
|
|
5224
|
+
const toolParts = Object.entries(trace.tool_calls).map(([name, count]) => {
|
|
5227
5225
|
return count > 1 ? `${name} \xD7${count}` : name;
|
|
5228
5226
|
});
|
|
5229
5227
|
parts.push(`Tools: ${toolParts.join(", ")}`);
|
|
@@ -6278,4 +6276,4 @@ export {
|
|
|
6278
6276
|
preprocessArgv,
|
|
6279
6277
|
runCli
|
|
6280
6278
|
};
|
|
6281
|
-
//# sourceMappingURL=chunk-
|
|
6279
|
+
//# sourceMappingURL=chunk-TR6H437M.js.map
|