@agentv/core 1.2.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-V3JCB3HI.js → chunk-4A6L2F6L.js} +11 -5
- package/dist/chunk-4A6L2F6L.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +12 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +13 -1
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +176 -8
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +167 -5
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-V3JCB3HI.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -105,7 +105,7 @@ type TestMessageRole = (typeof TEST_MESSAGE_ROLE_VALUES)[number];
|
|
|
105
105
|
/**
|
|
106
106
|
* Text or structured payload attached to a message.
|
|
107
107
|
*/
|
|
108
|
-
type TestMessageContent = string | readonly JsonObject[];
|
|
108
|
+
type TestMessageContent = string | JsonObject | readonly JsonObject[];
|
|
109
109
|
/**
|
|
110
110
|
* System-authored instruction message.
|
|
111
111
|
*/
|
package/dist/index.d.ts
CHANGED
|
@@ -105,7 +105,7 @@ type TestMessageRole = (typeof TEST_MESSAGE_ROLE_VALUES)[number];
|
|
|
105
105
|
/**
|
|
106
106
|
* Text or structured payload attached to a message.
|
|
107
107
|
*/
|
|
108
|
-
type TestMessageContent = string | readonly JsonObject[];
|
|
108
|
+
type TestMessageContent = string | JsonObject | readonly JsonObject[];
|
|
109
109
|
/**
|
|
110
110
|
* System-authored instruction message.
|
|
111
111
|
*/
|
package/dist/index.js
CHANGED
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
readTextFile,
|
|
10
10
|
resolveFileReference,
|
|
11
11
|
resolveTargetDefinition
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-4A6L2F6L.js";
|
|
13
13
|
|
|
14
14
|
// src/evaluation/types.ts
|
|
15
15
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
@@ -774,6 +774,17 @@ async function processMessages(options) {
|
|
|
774
774
|
}
|
|
775
775
|
continue;
|
|
776
776
|
}
|
|
777
|
+
if (isJsonObject(content)) {
|
|
778
|
+
const rendered = JSON.stringify(content, null, 2);
|
|
779
|
+
segments.push({ type: "text", value: rendered });
|
|
780
|
+
if (textParts) {
|
|
781
|
+
textParts.push(rendered);
|
|
782
|
+
}
|
|
783
|
+
continue;
|
|
784
|
+
}
|
|
785
|
+
if (!Array.isArray(content)) {
|
|
786
|
+
continue;
|
|
787
|
+
}
|
|
777
788
|
for (const rawSegment of content) {
|
|
778
789
|
if (!isJsonObject(rawSegment)) {
|
|
779
790
|
continue;
|
|
@@ -1000,6 +1011,11 @@ async function buildPromptInputs(testCase, mode = "lm") {
|
|
|
1000
1011
|
}
|
|
1001
1012
|
}
|
|
1002
1013
|
}
|
|
1014
|
+
} else if (isJsonObject(message.content)) {
|
|
1015
|
+
const rendered = JSON.stringify(message.content, null, 2);
|
|
1016
|
+
if (rendered.trim().length > 0) {
|
|
1017
|
+
messageSegments.push({ type: "text", value: rendered });
|
|
1018
|
+
}
|
|
1003
1019
|
}
|
|
1004
1020
|
segmentsByMessage.push(messageSegments);
|
|
1005
1021
|
}
|
|
@@ -1733,7 +1749,7 @@ var CliProvider = class {
|
|
|
1733
1749
|
id;
|
|
1734
1750
|
kind = "cli";
|
|
1735
1751
|
targetName;
|
|
1736
|
-
supportsBatch =
|
|
1752
|
+
supportsBatch = true;
|
|
1737
1753
|
config;
|
|
1738
1754
|
runCommand;
|
|
1739
1755
|
verbose;
|
|
@@ -1753,6 +1769,11 @@ var CliProvider = class {
|
|
|
1753
1769
|
const outputFilePath = generateOutputFilePath(request.evalCaseId);
|
|
1754
1770
|
const templateValues = buildTemplateValues(request, this.config, outputFilePath);
|
|
1755
1771
|
const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
|
|
1772
|
+
if (this.verbose) {
|
|
1773
|
+
console.log(
|
|
1774
|
+
`[cli-provider:${this.targetName}] cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
|
|
1775
|
+
);
|
|
1776
|
+
}
|
|
1756
1777
|
const result = await this.runCommand(renderedCommand, {
|
|
1757
1778
|
cwd: this.config.cwd,
|
|
1758
1779
|
env: process.env,
|
|
@@ -1787,6 +1808,114 @@ var CliProvider = class {
|
|
|
1787
1808
|
}
|
|
1788
1809
|
};
|
|
1789
1810
|
}
|
|
1811
|
+
async invokeBatch(requests) {
|
|
1812
|
+
if (requests.length === 0) {
|
|
1813
|
+
return [];
|
|
1814
|
+
}
|
|
1815
|
+
for (const request of requests) {
|
|
1816
|
+
if (request.signal?.aborted) {
|
|
1817
|
+
throw new Error("CLI provider batch request was aborted before execution");
|
|
1818
|
+
}
|
|
1819
|
+
}
|
|
1820
|
+
const controller = new AbortController();
|
|
1821
|
+
for (const request of requests) {
|
|
1822
|
+
request.signal?.addEventListener("abort", () => controller.abort(), { once: true });
|
|
1823
|
+
}
|
|
1824
|
+
await this.ensureHealthy(controller.signal);
|
|
1825
|
+
const outputFilePath = generateOutputFilePath("batch", ".jsonl");
|
|
1826
|
+
const batchInputFiles = [];
|
|
1827
|
+
for (const request of requests) {
|
|
1828
|
+
if (request.inputFiles && request.inputFiles.length > 0) {
|
|
1829
|
+
batchInputFiles.push(...request.inputFiles);
|
|
1830
|
+
}
|
|
1831
|
+
}
|
|
1832
|
+
const templateValues = buildTemplateValues(
|
|
1833
|
+
{
|
|
1834
|
+
question: "",
|
|
1835
|
+
guidelines: "",
|
|
1836
|
+
inputFiles: batchInputFiles,
|
|
1837
|
+
evalCaseId: "batch",
|
|
1838
|
+
attempt: 0
|
|
1839
|
+
},
|
|
1840
|
+
this.config,
|
|
1841
|
+
outputFilePath
|
|
1842
|
+
);
|
|
1843
|
+
const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
|
|
1844
|
+
if (this.verbose) {
|
|
1845
|
+
console.log(
|
|
1846
|
+
`[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
|
|
1847
|
+
);
|
|
1848
|
+
}
|
|
1849
|
+
const result = await this.runCommand(renderedCommand, {
|
|
1850
|
+
cwd: this.config.cwd,
|
|
1851
|
+
env: process.env,
|
|
1852
|
+
timeoutMs: this.config.timeoutMs,
|
|
1853
|
+
signal: controller.signal
|
|
1854
|
+
});
|
|
1855
|
+
if (result.failed || (result.exitCode ?? 0) !== 0) {
|
|
1856
|
+
if (controller.signal.aborted) {
|
|
1857
|
+
throw new Error("CLI provider request was aborted");
|
|
1858
|
+
}
|
|
1859
|
+
if (result.timedOut) {
|
|
1860
|
+
throw new Error(
|
|
1861
|
+
`CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
|
|
1862
|
+
);
|
|
1863
|
+
}
|
|
1864
|
+
const codeText = result.exitCode !== null ? result.exitCode : "unknown";
|
|
1865
|
+
const detail = result.stderr.trim() || result.stdout.trim();
|
|
1866
|
+
const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
|
|
1867
|
+
throw new Error(message);
|
|
1868
|
+
}
|
|
1869
|
+
const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
|
|
1870
|
+
const recordsById = this.parseJsonlBatchOutput(responseContent);
|
|
1871
|
+
const requestedIds = requests.map((request) => request.evalCaseId).filter((id) => typeof id === "string" && id.trim().length > 0);
|
|
1872
|
+
const missingIds = requestedIds.filter((id) => !recordsById.has(id));
|
|
1873
|
+
if (missingIds.length > 0) {
|
|
1874
|
+
throw new Error(`CLI batch output missing ids: ${missingIds.join(", ")}`);
|
|
1875
|
+
}
|
|
1876
|
+
const responses = requests.map((request) => {
|
|
1877
|
+
const evalCaseId = request.evalCaseId;
|
|
1878
|
+
if (!evalCaseId) {
|
|
1879
|
+
return {
|
|
1880
|
+
text: "",
|
|
1881
|
+
raw: {
|
|
1882
|
+
command: renderedCommand,
|
|
1883
|
+
stderr: result.stderr,
|
|
1884
|
+
exitCode: result.exitCode ?? 0,
|
|
1885
|
+
cwd: this.config.cwd,
|
|
1886
|
+
outputFile: outputFilePath
|
|
1887
|
+
}
|
|
1888
|
+
};
|
|
1889
|
+
}
|
|
1890
|
+
const parsed = recordsById.get(evalCaseId);
|
|
1891
|
+
if (!parsed) {
|
|
1892
|
+
return {
|
|
1893
|
+
text: "",
|
|
1894
|
+
raw: {
|
|
1895
|
+
command: renderedCommand,
|
|
1896
|
+
stderr: result.stderr,
|
|
1897
|
+
exitCode: result.exitCode ?? 0,
|
|
1898
|
+
cwd: this.config.cwd,
|
|
1899
|
+
outputFile: outputFilePath
|
|
1900
|
+
}
|
|
1901
|
+
};
|
|
1902
|
+
}
|
|
1903
|
+
return {
|
|
1904
|
+
text: parsed.text,
|
|
1905
|
+
trace: parsed.trace,
|
|
1906
|
+
traceRef: parsed.traceRef,
|
|
1907
|
+
raw: {
|
|
1908
|
+
command: renderedCommand,
|
|
1909
|
+
stderr: result.stderr,
|
|
1910
|
+
exitCode: result.exitCode ?? 0,
|
|
1911
|
+
cwd: this.config.cwd,
|
|
1912
|
+
outputFile: outputFilePath,
|
|
1913
|
+
recordId: evalCaseId
|
|
1914
|
+
}
|
|
1915
|
+
};
|
|
1916
|
+
});
|
|
1917
|
+
return responses;
|
|
1918
|
+
}
|
|
1790
1919
|
/**
|
|
1791
1920
|
* Parse output content from CLI.
|
|
1792
1921
|
* If the content is valid JSON with a 'text' field, extract text and optional trace.
|
|
@@ -1812,6 +1941,38 @@ var CliProvider = class {
|
|
|
1812
1941
|
const validEvents = trace.filter(isTraceEvent);
|
|
1813
1942
|
return validEvents.length > 0 ? validEvents : void 0;
|
|
1814
1943
|
}
|
|
1944
|
+
parseJsonlBatchOutput(content) {
|
|
1945
|
+
const records = /* @__PURE__ */ new Map();
|
|
1946
|
+
const lines = content.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
1947
|
+
for (const line of lines) {
|
|
1948
|
+
let parsed;
|
|
1949
|
+
try {
|
|
1950
|
+
parsed = JSON.parse(line);
|
|
1951
|
+
} catch (error) {
|
|
1952
|
+
const reason = error instanceof Error ? error.message : String(error);
|
|
1953
|
+
throw new Error(`CLI batch output contains invalid JSONL line: ${reason}`);
|
|
1954
|
+
}
|
|
1955
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
1956
|
+
throw new Error("CLI batch output JSONL line must be an object");
|
|
1957
|
+
}
|
|
1958
|
+
const obj = parsed;
|
|
1959
|
+
const id = typeof obj.id === "string" ? obj.id : void 0;
|
|
1960
|
+
if (!id || id.trim().length === 0) {
|
|
1961
|
+
throw new Error("CLI batch output JSONL line missing required string field: id");
|
|
1962
|
+
}
|
|
1963
|
+
if (records.has(id)) {
|
|
1964
|
+
throw new Error(`CLI batch output contains duplicate id: ${id}`);
|
|
1965
|
+
}
|
|
1966
|
+
const text = typeof obj.text === "string" ? obj.text : obj.text === void 0 ? "" : JSON.stringify(obj.text);
|
|
1967
|
+
const traceRef = typeof obj.traceRef === "string" ? obj.traceRef : typeof obj.trace_ref === "string" ? obj.trace_ref : void 0;
|
|
1968
|
+
records.set(id, {
|
|
1969
|
+
text,
|
|
1970
|
+
trace: this.parseTrace(obj.trace),
|
|
1971
|
+
traceRef
|
|
1972
|
+
});
|
|
1973
|
+
}
|
|
1974
|
+
return records;
|
|
1975
|
+
}
|
|
1815
1976
|
async readAndCleanupOutputFile(filePath) {
|
|
1816
1977
|
try {
|
|
1817
1978
|
const content = await readTextFile(filePath);
|
|
@@ -1873,7 +2034,7 @@ var CliProvider = class {
|
|
|
1873
2034
|
);
|
|
1874
2035
|
if (this.verbose) {
|
|
1875
2036
|
console.log(
|
|
1876
|
-
`[cli-provider:${this.targetName}] (healthcheck)
|
|
2037
|
+
`[cli-provider:${this.targetName}] (healthcheck) cwd=${healthcheck.cwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
|
|
1877
2038
|
);
|
|
1878
2039
|
}
|
|
1879
2040
|
const result = await this.runCommand(renderedCommand, {
|
|
@@ -1941,11 +2102,11 @@ function shellEscape(value) {
|
|
|
1941
2102
|
}
|
|
1942
2103
|
return `'${value.replace(/'/g, `'"'"'`)}'`;
|
|
1943
2104
|
}
|
|
1944
|
-
function generateOutputFilePath(evalCaseId) {
|
|
2105
|
+
function generateOutputFilePath(evalCaseId, extension = ".json") {
|
|
1945
2106
|
const safeEvalId = evalCaseId || "unknown";
|
|
1946
2107
|
const timestamp = Date.now();
|
|
1947
2108
|
const random = Math.random().toString(36).substring(2, 9);
|
|
1948
|
-
return path7.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}
|
|
2109
|
+
return path7.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
|
|
1949
2110
|
}
|
|
1950
2111
|
function formatTimeoutSuffix(timeoutMs) {
|
|
1951
2112
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
@@ -3489,6 +3650,7 @@ var CodeEvaluator = class {
|
|
|
3489
3650
|
{
|
|
3490
3651
|
question: context.evalCase.question,
|
|
3491
3652
|
expected_outcome: context.evalCase.expected_outcome,
|
|
3653
|
+
expected_messages: context.evalCase.expected_messages,
|
|
3492
3654
|
reference_answer: context.evalCase.reference_answer,
|
|
3493
3655
|
candidate_answer: context.candidate,
|
|
3494
3656
|
guideline_files: context.evalCase.guideline_paths,
|