@agentv/core 4.3.0 → 4.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +55 -56
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +50 -51
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1315,12 +1315,12 @@ function serializeAttributeValue(value) {
|
|
|
1315
1315
|
if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
|
|
1316
1316
|
return { stringValue: String(value) };
|
|
1317
1317
|
}
|
|
1318
|
-
var import_promises35,
|
|
1318
|
+
var import_promises35, import_node_path51, OtlpJsonFileExporter;
|
|
1319
1319
|
var init_otlp_json_file_exporter = __esm({
|
|
1320
1320
|
"src/observability/otlp-json-file-exporter.ts"() {
|
|
1321
1321
|
"use strict";
|
|
1322
1322
|
import_promises35 = require("fs/promises");
|
|
1323
|
-
|
|
1323
|
+
import_node_path51 = require("path");
|
|
1324
1324
|
OtlpJsonFileExporter = class {
|
|
1325
1325
|
// biome-ignore lint/suspicious/noExplicitAny: serialized span data
|
|
1326
1326
|
spans = [];
|
|
@@ -1359,7 +1359,7 @@ var init_otlp_json_file_exporter = __esm({
|
|
|
1359
1359
|
}
|
|
1360
1360
|
async flush() {
|
|
1361
1361
|
if (this.spans.length === 0) return;
|
|
1362
|
-
await (0, import_promises35.mkdir)((0,
|
|
1362
|
+
await (0, import_promises35.mkdir)((0, import_node_path51.dirname)(this.filePath), { recursive: true });
|
|
1363
1363
|
const otlpJson = {
|
|
1364
1364
|
resourceSpans: [
|
|
1365
1365
|
{
|
|
@@ -11466,8 +11466,8 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
11466
11466
|
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
11467
11467
|
if (!parseResult.success) {
|
|
11468
11468
|
const firstError = parseResult.error.errors[0];
|
|
11469
|
-
const
|
|
11470
|
-
const prefix =
|
|
11469
|
+
const path50 = firstError?.path.join(".") || "";
|
|
11470
|
+
const prefix = path50 ? `${target.name} ${path50}: ` : `${target.name}: `;
|
|
11471
11471
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
11472
11472
|
}
|
|
11473
11473
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
@@ -13501,13 +13501,13 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
13501
13501
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
13502
13502
|
const { mkdir: mkdir17, readFile: readFile16, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
|
|
13503
13503
|
const { tmpdir: tmpdir3 } = await import("os");
|
|
13504
|
-
const
|
|
13504
|
+
const path50 = await import("path");
|
|
13505
13505
|
const { randomUUID: randomUUID10 } = await import("crypto");
|
|
13506
|
-
const dir =
|
|
13506
|
+
const dir = path50.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
13507
13507
|
await mkdir17(dir, { recursive: true });
|
|
13508
|
-
const stdinPath =
|
|
13509
|
-
const stdoutPath =
|
|
13510
|
-
const stderrPath =
|
|
13508
|
+
const stdinPath = path50.join(dir, "stdin.txt");
|
|
13509
|
+
const stdoutPath = path50.join(dir, "stdout.txt");
|
|
13510
|
+
const stderrPath = path50.join(dir, "stderr.txt");
|
|
13511
13511
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
13512
13512
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
13513
13513
|
const { spawn: spawn5 } = await import("child_process");
|
|
@@ -15720,115 +15720,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
15720
15720
|
* Evaluate a single field against the expected value.
|
|
15721
15721
|
*/
|
|
15722
15722
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
15723
|
-
const { path:
|
|
15724
|
-
const candidateValue = resolvePath(candidateData,
|
|
15725
|
-
const expectedValue = resolvePath(expectedData,
|
|
15723
|
+
const { path: path50, match, required = true, weight = 1 } = fieldConfig;
|
|
15724
|
+
const candidateValue = resolvePath(candidateData, path50);
|
|
15725
|
+
const expectedValue = resolvePath(expectedData, path50);
|
|
15726
15726
|
if (expectedValue === void 0) {
|
|
15727
15727
|
return {
|
|
15728
|
-
path:
|
|
15728
|
+
path: path50,
|
|
15729
15729
|
score: 1,
|
|
15730
15730
|
// No expected value means no comparison needed
|
|
15731
15731
|
weight,
|
|
15732
15732
|
hit: true,
|
|
15733
|
-
message: `${
|
|
15733
|
+
message: `${path50}: no expected value`
|
|
15734
15734
|
};
|
|
15735
15735
|
}
|
|
15736
15736
|
if (candidateValue === void 0) {
|
|
15737
15737
|
if (required) {
|
|
15738
15738
|
return {
|
|
15739
|
-
path:
|
|
15739
|
+
path: path50,
|
|
15740
15740
|
score: 0,
|
|
15741
15741
|
weight,
|
|
15742
15742
|
hit: false,
|
|
15743
|
-
message: `${
|
|
15743
|
+
message: `${path50} (required, missing)`
|
|
15744
15744
|
};
|
|
15745
15745
|
}
|
|
15746
15746
|
return {
|
|
15747
|
-
path:
|
|
15747
|
+
path: path50,
|
|
15748
15748
|
score: 1,
|
|
15749
15749
|
// Don't penalize missing optional fields
|
|
15750
15750
|
weight: 0,
|
|
15751
15751
|
// Zero weight means it won't affect the score
|
|
15752
15752
|
hit: true,
|
|
15753
|
-
message: `${
|
|
15753
|
+
message: `${path50}: optional field missing`
|
|
15754
15754
|
};
|
|
15755
15755
|
}
|
|
15756
15756
|
switch (match) {
|
|
15757
15757
|
case "exact":
|
|
15758
|
-
return this.compareExact(
|
|
15758
|
+
return this.compareExact(path50, candidateValue, expectedValue, weight);
|
|
15759
15759
|
case "numeric_tolerance":
|
|
15760
15760
|
return this.compareNumericTolerance(
|
|
15761
|
-
|
|
15761
|
+
path50,
|
|
15762
15762
|
candidateValue,
|
|
15763
15763
|
expectedValue,
|
|
15764
15764
|
fieldConfig,
|
|
15765
15765
|
weight
|
|
15766
15766
|
);
|
|
15767
15767
|
case "date":
|
|
15768
|
-
return this.compareDate(
|
|
15768
|
+
return this.compareDate(path50, candidateValue, expectedValue, fieldConfig, weight);
|
|
15769
15769
|
default:
|
|
15770
15770
|
return {
|
|
15771
|
-
path:
|
|
15771
|
+
path: path50,
|
|
15772
15772
|
score: 0,
|
|
15773
15773
|
weight,
|
|
15774
15774
|
hit: false,
|
|
15775
|
-
message: `${
|
|
15775
|
+
message: `${path50}: unknown match type "${match}"`
|
|
15776
15776
|
};
|
|
15777
15777
|
}
|
|
15778
15778
|
}
|
|
15779
15779
|
/**
|
|
15780
15780
|
* Exact equality comparison.
|
|
15781
15781
|
*/
|
|
15782
|
-
compareExact(
|
|
15782
|
+
compareExact(path50, candidateValue, expectedValue, weight) {
|
|
15783
15783
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
15784
15784
|
return {
|
|
15785
|
-
path:
|
|
15785
|
+
path: path50,
|
|
15786
15786
|
score: 1,
|
|
15787
15787
|
weight,
|
|
15788
15788
|
hit: true,
|
|
15789
|
-
message:
|
|
15789
|
+
message: path50
|
|
15790
15790
|
};
|
|
15791
15791
|
}
|
|
15792
15792
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
15793
15793
|
return {
|
|
15794
|
-
path:
|
|
15794
|
+
path: path50,
|
|
15795
15795
|
score: 0,
|
|
15796
15796
|
weight,
|
|
15797
15797
|
hit: false,
|
|
15798
|
-
message: `${
|
|
15798
|
+
message: `${path50} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
15799
15799
|
};
|
|
15800
15800
|
}
|
|
15801
15801
|
return {
|
|
15802
|
-
path:
|
|
15802
|
+
path: path50,
|
|
15803
15803
|
score: 0,
|
|
15804
15804
|
weight,
|
|
15805
15805
|
hit: false,
|
|
15806
|
-
message: `${
|
|
15806
|
+
message: `${path50} (value mismatch)`
|
|
15807
15807
|
};
|
|
15808
15808
|
}
|
|
15809
15809
|
/**
|
|
15810
15810
|
* Numeric comparison with absolute or relative tolerance.
|
|
15811
15811
|
*/
|
|
15812
|
-
compareNumericTolerance(
|
|
15812
|
+
compareNumericTolerance(path50, candidateValue, expectedValue, fieldConfig, weight) {
|
|
15813
15813
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
15814
15814
|
const candidateNum = toNumber(candidateValue);
|
|
15815
15815
|
const expectedNum = toNumber(expectedValue);
|
|
15816
15816
|
if (candidateNum === null || expectedNum === null) {
|
|
15817
15817
|
return {
|
|
15818
|
-
path:
|
|
15818
|
+
path: path50,
|
|
15819
15819
|
score: 0,
|
|
15820
15820
|
weight,
|
|
15821
15821
|
hit: false,
|
|
15822
|
-
message: `${
|
|
15822
|
+
message: `${path50} (non-numeric value)`
|
|
15823
15823
|
};
|
|
15824
15824
|
}
|
|
15825
15825
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
15826
15826
|
return {
|
|
15827
|
-
path:
|
|
15827
|
+
path: path50,
|
|
15828
15828
|
score: 0,
|
|
15829
15829
|
weight,
|
|
15830
15830
|
hit: false,
|
|
15831
|
-
message: `${
|
|
15831
|
+
message: `${path50} (invalid numeric value)`
|
|
15832
15832
|
};
|
|
15833
15833
|
}
|
|
15834
15834
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -15841,61 +15841,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
15841
15841
|
}
|
|
15842
15842
|
if (withinTolerance) {
|
|
15843
15843
|
return {
|
|
15844
|
-
path:
|
|
15844
|
+
path: path50,
|
|
15845
15845
|
score: 1,
|
|
15846
15846
|
weight,
|
|
15847
15847
|
hit: true,
|
|
15848
|
-
message: `${
|
|
15848
|
+
message: `${path50} (within tolerance: diff=${diff.toFixed(2)})`
|
|
15849
15849
|
};
|
|
15850
15850
|
}
|
|
15851
15851
|
return {
|
|
15852
|
-
path:
|
|
15852
|
+
path: path50,
|
|
15853
15853
|
score: 0,
|
|
15854
15854
|
weight,
|
|
15855
15855
|
hit: false,
|
|
15856
|
-
message: `${
|
|
15856
|
+
message: `${path50} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
15857
15857
|
};
|
|
15858
15858
|
}
|
|
15859
15859
|
/**
|
|
15860
15860
|
* Date comparison with format normalization.
|
|
15861
15861
|
*/
|
|
15862
|
-
compareDate(
|
|
15862
|
+
compareDate(path50, candidateValue, expectedValue, fieldConfig, weight) {
|
|
15863
15863
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
15864
15864
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
15865
15865
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
15866
15866
|
if (candidateDate === null) {
|
|
15867
15867
|
return {
|
|
15868
|
-
path:
|
|
15868
|
+
path: path50,
|
|
15869
15869
|
score: 0,
|
|
15870
15870
|
weight,
|
|
15871
15871
|
hit: false,
|
|
15872
|
-
message: `${
|
|
15872
|
+
message: `${path50} (unparseable candidate date)`
|
|
15873
15873
|
};
|
|
15874
15874
|
}
|
|
15875
15875
|
if (expectedDate === null) {
|
|
15876
15876
|
return {
|
|
15877
|
-
path:
|
|
15877
|
+
path: path50,
|
|
15878
15878
|
score: 0,
|
|
15879
15879
|
weight,
|
|
15880
15880
|
hit: false,
|
|
15881
|
-
message: `${
|
|
15881
|
+
message: `${path50} (unparseable expected date)`
|
|
15882
15882
|
};
|
|
15883
15883
|
}
|
|
15884
15884
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
15885
15885
|
return {
|
|
15886
|
-
path:
|
|
15886
|
+
path: path50,
|
|
15887
15887
|
score: 1,
|
|
15888
15888
|
weight,
|
|
15889
15889
|
hit: true,
|
|
15890
|
-
message:
|
|
15890
|
+
message: path50
|
|
15891
15891
|
};
|
|
15892
15892
|
}
|
|
15893
15893
|
return {
|
|
15894
|
-
path:
|
|
15894
|
+
path: path50,
|
|
15895
15895
|
score: 0,
|
|
15896
15896
|
weight,
|
|
15897
15897
|
hit: false,
|
|
15898
|
-
message: `${
|
|
15898
|
+
message: `${path50} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
15899
15899
|
};
|
|
15900
15900
|
}
|
|
15901
15901
|
/**
|
|
@@ -15928,11 +15928,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
15928
15928
|
};
|
|
15929
15929
|
}
|
|
15930
15930
|
};
|
|
15931
|
-
function resolvePath(obj,
|
|
15932
|
-
if (!
|
|
15931
|
+
function resolvePath(obj, path50) {
|
|
15932
|
+
if (!path50 || !obj) {
|
|
15933
15933
|
return void 0;
|
|
15934
15934
|
}
|
|
15935
|
-
const parts =
|
|
15935
|
+
const parts = path50.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
15936
15936
|
let current = obj;
|
|
15937
15937
|
for (const part of parts) {
|
|
15938
15938
|
if (current === null || current === void 0) {
|
|
@@ -16416,8 +16416,8 @@ var TokenUsageEvaluator = class {
|
|
|
16416
16416
|
};
|
|
16417
16417
|
|
|
16418
16418
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
16419
|
-
function getNestedValue(obj,
|
|
16420
|
-
const parts =
|
|
16419
|
+
function getNestedValue(obj, path50) {
|
|
16420
|
+
const parts = path50.split(".");
|
|
16421
16421
|
let current = obj;
|
|
16422
16422
|
for (const part of parts) {
|
|
16423
16423
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -20926,10 +20926,9 @@ function trimBaselineResult(result) {
|
|
|
20926
20926
|
}
|
|
20927
20927
|
|
|
20928
20928
|
// src/evaluation/category.ts
|
|
20929
|
-
var import_node_path51 = __toESM(require("path"), 1);
|
|
20930
20929
|
var DEFAULT_CATEGORY = "Uncategorized";
|
|
20931
20930
|
function deriveCategory(relativePath) {
|
|
20932
|
-
const parts = relativePath.split(
|
|
20931
|
+
const parts = relativePath.split(/[/\\]/);
|
|
20933
20932
|
if (parts.length <= 1) {
|
|
20934
20933
|
return DEFAULT_CATEGORY;
|
|
20935
20934
|
}
|