@agentv/core 4.2.0 → 4.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +55 -56
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +50 -51
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -10342,13 +10342,13 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
10342
10342
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
10343
10343
|
const { mkdir: mkdir16, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
10344
10344
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
10345
|
-
const
|
|
10345
|
+
const path47 = await import("node:path");
|
|
10346
10346
|
const { randomUUID: randomUUID10 } = await import("node:crypto");
|
|
10347
|
-
const dir =
|
|
10347
|
+
const dir = path47.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
10348
10348
|
await mkdir16(dir, { recursive: true });
|
|
10349
|
-
const stdinPath =
|
|
10350
|
-
const stdoutPath =
|
|
10351
|
-
const stderrPath =
|
|
10349
|
+
const stdinPath = path47.join(dir, "stdin.txt");
|
|
10350
|
+
const stdoutPath = path47.join(dir, "stdout.txt");
|
|
10351
|
+
const stderrPath = path47.join(dir, "stderr.txt");
|
|
10352
10352
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
10353
10353
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
10354
10354
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -12561,115 +12561,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
12561
12561
|
* Evaluate a single field against the expected value.
|
|
12562
12562
|
*/
|
|
12563
12563
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
12564
|
-
const { path:
|
|
12565
|
-
const candidateValue = resolvePath(candidateData,
|
|
12566
|
-
const expectedValue = resolvePath(expectedData,
|
|
12564
|
+
const { path: path47, match, required = true, weight = 1 } = fieldConfig;
|
|
12565
|
+
const candidateValue = resolvePath(candidateData, path47);
|
|
12566
|
+
const expectedValue = resolvePath(expectedData, path47);
|
|
12567
12567
|
if (expectedValue === void 0) {
|
|
12568
12568
|
return {
|
|
12569
|
-
path:
|
|
12569
|
+
path: path47,
|
|
12570
12570
|
score: 1,
|
|
12571
12571
|
// No expected value means no comparison needed
|
|
12572
12572
|
weight,
|
|
12573
12573
|
hit: true,
|
|
12574
|
-
message: `${
|
|
12574
|
+
message: `${path47}: no expected value`
|
|
12575
12575
|
};
|
|
12576
12576
|
}
|
|
12577
12577
|
if (candidateValue === void 0) {
|
|
12578
12578
|
if (required) {
|
|
12579
12579
|
return {
|
|
12580
|
-
path:
|
|
12580
|
+
path: path47,
|
|
12581
12581
|
score: 0,
|
|
12582
12582
|
weight,
|
|
12583
12583
|
hit: false,
|
|
12584
|
-
message: `${
|
|
12584
|
+
message: `${path47} (required, missing)`
|
|
12585
12585
|
};
|
|
12586
12586
|
}
|
|
12587
12587
|
return {
|
|
12588
|
-
path:
|
|
12588
|
+
path: path47,
|
|
12589
12589
|
score: 1,
|
|
12590
12590
|
// Don't penalize missing optional fields
|
|
12591
12591
|
weight: 0,
|
|
12592
12592
|
// Zero weight means it won't affect the score
|
|
12593
12593
|
hit: true,
|
|
12594
|
-
message: `${
|
|
12594
|
+
message: `${path47}: optional field missing`
|
|
12595
12595
|
};
|
|
12596
12596
|
}
|
|
12597
12597
|
switch (match) {
|
|
12598
12598
|
case "exact":
|
|
12599
|
-
return this.compareExact(
|
|
12599
|
+
return this.compareExact(path47, candidateValue, expectedValue, weight);
|
|
12600
12600
|
case "numeric_tolerance":
|
|
12601
12601
|
return this.compareNumericTolerance(
|
|
12602
|
-
|
|
12602
|
+
path47,
|
|
12603
12603
|
candidateValue,
|
|
12604
12604
|
expectedValue,
|
|
12605
12605
|
fieldConfig,
|
|
12606
12606
|
weight
|
|
12607
12607
|
);
|
|
12608
12608
|
case "date":
|
|
12609
|
-
return this.compareDate(
|
|
12609
|
+
return this.compareDate(path47, candidateValue, expectedValue, fieldConfig, weight);
|
|
12610
12610
|
default:
|
|
12611
12611
|
return {
|
|
12612
|
-
path:
|
|
12612
|
+
path: path47,
|
|
12613
12613
|
score: 0,
|
|
12614
12614
|
weight,
|
|
12615
12615
|
hit: false,
|
|
12616
|
-
message: `${
|
|
12616
|
+
message: `${path47}: unknown match type "${match}"`
|
|
12617
12617
|
};
|
|
12618
12618
|
}
|
|
12619
12619
|
}
|
|
12620
12620
|
/**
|
|
12621
12621
|
* Exact equality comparison.
|
|
12622
12622
|
*/
|
|
12623
|
-
compareExact(
|
|
12623
|
+
compareExact(path47, candidateValue, expectedValue, weight) {
|
|
12624
12624
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
12625
12625
|
return {
|
|
12626
|
-
path:
|
|
12626
|
+
path: path47,
|
|
12627
12627
|
score: 1,
|
|
12628
12628
|
weight,
|
|
12629
12629
|
hit: true,
|
|
12630
|
-
message:
|
|
12630
|
+
message: path47
|
|
12631
12631
|
};
|
|
12632
12632
|
}
|
|
12633
12633
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
12634
12634
|
return {
|
|
12635
|
-
path:
|
|
12635
|
+
path: path47,
|
|
12636
12636
|
score: 0,
|
|
12637
12637
|
weight,
|
|
12638
12638
|
hit: false,
|
|
12639
|
-
message: `${
|
|
12639
|
+
message: `${path47} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
12640
12640
|
};
|
|
12641
12641
|
}
|
|
12642
12642
|
return {
|
|
12643
|
-
path:
|
|
12643
|
+
path: path47,
|
|
12644
12644
|
score: 0,
|
|
12645
12645
|
weight,
|
|
12646
12646
|
hit: false,
|
|
12647
|
-
message: `${
|
|
12647
|
+
message: `${path47} (value mismatch)`
|
|
12648
12648
|
};
|
|
12649
12649
|
}
|
|
12650
12650
|
/**
|
|
12651
12651
|
* Numeric comparison with absolute or relative tolerance.
|
|
12652
12652
|
*/
|
|
12653
|
-
compareNumericTolerance(
|
|
12653
|
+
compareNumericTolerance(path47, candidateValue, expectedValue, fieldConfig, weight) {
|
|
12654
12654
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
12655
12655
|
const candidateNum = toNumber(candidateValue);
|
|
12656
12656
|
const expectedNum = toNumber(expectedValue);
|
|
12657
12657
|
if (candidateNum === null || expectedNum === null) {
|
|
12658
12658
|
return {
|
|
12659
|
-
path:
|
|
12659
|
+
path: path47,
|
|
12660
12660
|
score: 0,
|
|
12661
12661
|
weight,
|
|
12662
12662
|
hit: false,
|
|
12663
|
-
message: `${
|
|
12663
|
+
message: `${path47} (non-numeric value)`
|
|
12664
12664
|
};
|
|
12665
12665
|
}
|
|
12666
12666
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
12667
12667
|
return {
|
|
12668
|
-
path:
|
|
12668
|
+
path: path47,
|
|
12669
12669
|
score: 0,
|
|
12670
12670
|
weight,
|
|
12671
12671
|
hit: false,
|
|
12672
|
-
message: `${
|
|
12672
|
+
message: `${path47} (invalid numeric value)`
|
|
12673
12673
|
};
|
|
12674
12674
|
}
|
|
12675
12675
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -12682,61 +12682,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
12682
12682
|
}
|
|
12683
12683
|
if (withinTolerance) {
|
|
12684
12684
|
return {
|
|
12685
|
-
path:
|
|
12685
|
+
path: path47,
|
|
12686
12686
|
score: 1,
|
|
12687
12687
|
weight,
|
|
12688
12688
|
hit: true,
|
|
12689
|
-
message: `${
|
|
12689
|
+
message: `${path47} (within tolerance: diff=${diff.toFixed(2)})`
|
|
12690
12690
|
};
|
|
12691
12691
|
}
|
|
12692
12692
|
return {
|
|
12693
|
-
path:
|
|
12693
|
+
path: path47,
|
|
12694
12694
|
score: 0,
|
|
12695
12695
|
weight,
|
|
12696
12696
|
hit: false,
|
|
12697
|
-
message: `${
|
|
12697
|
+
message: `${path47} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
12698
12698
|
};
|
|
12699
12699
|
}
|
|
12700
12700
|
/**
|
|
12701
12701
|
* Date comparison with format normalization.
|
|
12702
12702
|
*/
|
|
12703
|
-
compareDate(
|
|
12703
|
+
compareDate(path47, candidateValue, expectedValue, fieldConfig, weight) {
|
|
12704
12704
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
12705
12705
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
12706
12706
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
12707
12707
|
if (candidateDate === null) {
|
|
12708
12708
|
return {
|
|
12709
|
-
path:
|
|
12709
|
+
path: path47,
|
|
12710
12710
|
score: 0,
|
|
12711
12711
|
weight,
|
|
12712
12712
|
hit: false,
|
|
12713
|
-
message: `${
|
|
12713
|
+
message: `${path47} (unparseable candidate date)`
|
|
12714
12714
|
};
|
|
12715
12715
|
}
|
|
12716
12716
|
if (expectedDate === null) {
|
|
12717
12717
|
return {
|
|
12718
|
-
path:
|
|
12718
|
+
path: path47,
|
|
12719
12719
|
score: 0,
|
|
12720
12720
|
weight,
|
|
12721
12721
|
hit: false,
|
|
12722
|
-
message: `${
|
|
12722
|
+
message: `${path47} (unparseable expected date)`
|
|
12723
12723
|
};
|
|
12724
12724
|
}
|
|
12725
12725
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
12726
12726
|
return {
|
|
12727
|
-
path:
|
|
12727
|
+
path: path47,
|
|
12728
12728
|
score: 1,
|
|
12729
12729
|
weight,
|
|
12730
12730
|
hit: true,
|
|
12731
|
-
message:
|
|
12731
|
+
message: path47
|
|
12732
12732
|
};
|
|
12733
12733
|
}
|
|
12734
12734
|
return {
|
|
12735
|
-
path:
|
|
12735
|
+
path: path47,
|
|
12736
12736
|
score: 0,
|
|
12737
12737
|
weight,
|
|
12738
12738
|
hit: false,
|
|
12739
|
-
message: `${
|
|
12739
|
+
message: `${path47} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
12740
12740
|
};
|
|
12741
12741
|
}
|
|
12742
12742
|
/**
|
|
@@ -12769,11 +12769,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
12769
12769
|
};
|
|
12770
12770
|
}
|
|
12771
12771
|
};
|
|
12772
|
-
function resolvePath(obj,
|
|
12773
|
-
if (!
|
|
12772
|
+
function resolvePath(obj, path47) {
|
|
12773
|
+
if (!path47 || !obj) {
|
|
12774
12774
|
return void 0;
|
|
12775
12775
|
}
|
|
12776
|
-
const parts =
|
|
12776
|
+
const parts = path47.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
12777
12777
|
let current = obj;
|
|
12778
12778
|
for (const part of parts) {
|
|
12779
12779
|
if (current === null || current === void 0) {
|
|
@@ -13257,8 +13257,8 @@ var TokenUsageEvaluator = class {
|
|
|
13257
13257
|
};
|
|
13258
13258
|
|
|
13259
13259
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
13260
|
-
function getNestedValue(obj,
|
|
13261
|
-
const parts =
|
|
13260
|
+
function getNestedValue(obj, path47) {
|
|
13261
|
+
const parts = path47.split(".");
|
|
13262
13262
|
let current = obj;
|
|
13263
13263
|
for (const part of parts) {
|
|
13264
13264
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -17767,10 +17767,9 @@ function trimBaselineResult(result) {
|
|
|
17767
17767
|
}
|
|
17768
17768
|
|
|
17769
17769
|
// src/evaluation/category.ts
|
|
17770
|
-
import path47 from "node:path";
|
|
17771
17770
|
var DEFAULT_CATEGORY = "Uncategorized";
|
|
17772
17771
|
function deriveCategory(relativePath) {
|
|
17773
|
-
const parts = relativePath.split(
|
|
17772
|
+
const parts = relativePath.split(/[/\\]/);
|
|
17774
17773
|
if (parts.length <= 1) {
|
|
17775
17774
|
return DEFAULT_CATEGORY;
|
|
17776
17775
|
}
|