@agentv/core 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -105,7 +105,7 @@ type TestMessageRole = (typeof TEST_MESSAGE_ROLE_VALUES)[number];
105
105
  /**
106
106
  * Text or structured payload attached to a message.
107
107
  */
108
- type TestMessageContent = string | readonly JsonObject[];
108
+ type TestMessageContent = string | JsonObject | readonly JsonObject[];
109
109
  /**
110
110
  * System-authored instruction message.
111
111
  */
package/dist/index.d.ts CHANGED
@@ -105,7 +105,7 @@ type TestMessageRole = (typeof TEST_MESSAGE_ROLE_VALUES)[number];
105
105
  /**
106
106
  * Text or structured payload attached to a message.
107
107
  */
108
- type TestMessageContent = string | readonly JsonObject[];
108
+ type TestMessageContent = string | JsonObject | readonly JsonObject[];
109
109
  /**
110
110
  * System-authored instruction message.
111
111
  */
package/dist/index.js CHANGED
@@ -9,7 +9,7 @@ import {
9
9
  readTextFile,
10
10
  resolveFileReference,
11
11
  resolveTargetDefinition
12
- } from "./chunk-V3JCB3HI.js";
12
+ } from "./chunk-4A6L2F6L.js";
13
13
 
14
14
  // src/evaluation/types.ts
15
15
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -774,6 +774,17 @@ async function processMessages(options) {
774
774
  }
775
775
  continue;
776
776
  }
777
+ if (isJsonObject(content)) {
778
+ const rendered = JSON.stringify(content, null, 2);
779
+ segments.push({ type: "text", value: rendered });
780
+ if (textParts) {
781
+ textParts.push(rendered);
782
+ }
783
+ continue;
784
+ }
785
+ if (!Array.isArray(content)) {
786
+ continue;
787
+ }
777
788
  for (const rawSegment of content) {
778
789
  if (!isJsonObject(rawSegment)) {
779
790
  continue;
@@ -1000,6 +1011,11 @@ async function buildPromptInputs(testCase, mode = "lm") {
1000
1011
  }
1001
1012
  }
1002
1013
  }
1014
+ } else if (isJsonObject(message.content)) {
1015
+ const rendered = JSON.stringify(message.content, null, 2);
1016
+ if (rendered.trim().length > 0) {
1017
+ messageSegments.push({ type: "text", value: rendered });
1018
+ }
1003
1019
  }
1004
1020
  segmentsByMessage.push(messageSegments);
1005
1021
  }
@@ -1733,7 +1749,7 @@ var CliProvider = class {
1733
1749
  id;
1734
1750
  kind = "cli";
1735
1751
  targetName;
1736
- supportsBatch = false;
1752
+ supportsBatch = true;
1737
1753
  config;
1738
1754
  runCommand;
1739
1755
  verbose;
@@ -1753,6 +1769,11 @@ var CliProvider = class {
1753
1769
  const outputFilePath = generateOutputFilePath(request.evalCaseId);
1754
1770
  const templateValues = buildTemplateValues(request, this.config, outputFilePath);
1755
1771
  const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
1772
+ if (this.verbose) {
1773
+ console.log(
1774
+ `[cli-provider:${this.targetName}] cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
1775
+ );
1776
+ }
1756
1777
  const result = await this.runCommand(renderedCommand, {
1757
1778
  cwd: this.config.cwd,
1758
1779
  env: process.env,
@@ -1787,6 +1808,114 @@ var CliProvider = class {
1787
1808
  }
1788
1809
  };
1789
1810
  }
1811
+ async invokeBatch(requests) {
1812
+ if (requests.length === 0) {
1813
+ return [];
1814
+ }
1815
+ for (const request of requests) {
1816
+ if (request.signal?.aborted) {
1817
+ throw new Error("CLI provider batch request was aborted before execution");
1818
+ }
1819
+ }
1820
+ const controller = new AbortController();
1821
+ for (const request of requests) {
1822
+ request.signal?.addEventListener("abort", () => controller.abort(), { once: true });
1823
+ }
1824
+ await this.ensureHealthy(controller.signal);
1825
+ const outputFilePath = generateOutputFilePath("batch", ".jsonl");
1826
+ const batchInputFiles = [];
1827
+ for (const request of requests) {
1828
+ if (request.inputFiles && request.inputFiles.length > 0) {
1829
+ batchInputFiles.push(...request.inputFiles);
1830
+ }
1831
+ }
1832
+ const templateValues = buildTemplateValues(
1833
+ {
1834
+ question: "",
1835
+ guidelines: "",
1836
+ inputFiles: batchInputFiles,
1837
+ evalCaseId: "batch",
1838
+ attempt: 0
1839
+ },
1840
+ this.config,
1841
+ outputFilePath
1842
+ );
1843
+ const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
1844
+ if (this.verbose) {
1845
+ console.log(
1846
+ `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
1847
+ );
1848
+ }
1849
+ const result = await this.runCommand(renderedCommand, {
1850
+ cwd: this.config.cwd,
1851
+ env: process.env,
1852
+ timeoutMs: this.config.timeoutMs,
1853
+ signal: controller.signal
1854
+ });
1855
+ if (result.failed || (result.exitCode ?? 0) !== 0) {
1856
+ if (controller.signal.aborted) {
1857
+ throw new Error("CLI provider request was aborted");
1858
+ }
1859
+ if (result.timedOut) {
1860
+ throw new Error(
1861
+ `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
1862
+ );
1863
+ }
1864
+ const codeText = result.exitCode !== null ? result.exitCode : "unknown";
1865
+ const detail = result.stderr.trim() || result.stdout.trim();
1866
+ const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
1867
+ throw new Error(message);
1868
+ }
1869
+ const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
1870
+ const recordsById = this.parseJsonlBatchOutput(responseContent);
1871
+ const requestedIds = requests.map((request) => request.evalCaseId).filter((id) => typeof id === "string" && id.trim().length > 0);
1872
+ const missingIds = requestedIds.filter((id) => !recordsById.has(id));
1873
+ if (missingIds.length > 0) {
1874
+ throw new Error(`CLI batch output missing ids: ${missingIds.join(", ")}`);
1875
+ }
1876
+ const responses = requests.map((request) => {
1877
+ const evalCaseId = request.evalCaseId;
1878
+ if (!evalCaseId) {
1879
+ return {
1880
+ text: "",
1881
+ raw: {
1882
+ command: renderedCommand,
1883
+ stderr: result.stderr,
1884
+ exitCode: result.exitCode ?? 0,
1885
+ cwd: this.config.cwd,
1886
+ outputFile: outputFilePath
1887
+ }
1888
+ };
1889
+ }
1890
+ const parsed = recordsById.get(evalCaseId);
1891
+ if (!parsed) {
1892
+ return {
1893
+ text: "",
1894
+ raw: {
1895
+ command: renderedCommand,
1896
+ stderr: result.stderr,
1897
+ exitCode: result.exitCode ?? 0,
1898
+ cwd: this.config.cwd,
1899
+ outputFile: outputFilePath
1900
+ }
1901
+ };
1902
+ }
1903
+ return {
1904
+ text: parsed.text,
1905
+ trace: parsed.trace,
1906
+ traceRef: parsed.traceRef,
1907
+ raw: {
1908
+ command: renderedCommand,
1909
+ stderr: result.stderr,
1910
+ exitCode: result.exitCode ?? 0,
1911
+ cwd: this.config.cwd,
1912
+ outputFile: outputFilePath,
1913
+ recordId: evalCaseId
1914
+ }
1915
+ };
1916
+ });
1917
+ return responses;
1918
+ }
1790
1919
  /**
1791
1920
  * Parse output content from CLI.
1792
1921
  * If the content is valid JSON with a 'text' field, extract text and optional trace.
@@ -1812,6 +1941,38 @@ var CliProvider = class {
1812
1941
  const validEvents = trace.filter(isTraceEvent);
1813
1942
  return validEvents.length > 0 ? validEvents : void 0;
1814
1943
  }
1944
+ parseJsonlBatchOutput(content) {
1945
+ const records = /* @__PURE__ */ new Map();
1946
+ const lines = content.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
1947
+ for (const line of lines) {
1948
+ let parsed;
1949
+ try {
1950
+ parsed = JSON.parse(line);
1951
+ } catch (error) {
1952
+ const reason = error instanceof Error ? error.message : String(error);
1953
+ throw new Error(`CLI batch output contains invalid JSONL line: ${reason}`);
1954
+ }
1955
+ if (typeof parsed !== "object" || parsed === null) {
1956
+ throw new Error("CLI batch output JSONL line must be an object");
1957
+ }
1958
+ const obj = parsed;
1959
+ const id = typeof obj.id === "string" ? obj.id : void 0;
1960
+ if (!id || id.trim().length === 0) {
1961
+ throw new Error("CLI batch output JSONL line missing required string field: id");
1962
+ }
1963
+ if (records.has(id)) {
1964
+ throw new Error(`CLI batch output contains duplicate id: ${id}`);
1965
+ }
1966
+ const text = typeof obj.text === "string" ? obj.text : obj.text === void 0 ? "" : JSON.stringify(obj.text);
1967
+ const traceRef = typeof obj.traceRef === "string" ? obj.traceRef : typeof obj.trace_ref === "string" ? obj.trace_ref : void 0;
1968
+ records.set(id, {
1969
+ text,
1970
+ trace: this.parseTrace(obj.trace),
1971
+ traceRef
1972
+ });
1973
+ }
1974
+ return records;
1975
+ }
1815
1976
  async readAndCleanupOutputFile(filePath) {
1816
1977
  try {
1817
1978
  const content = await readTextFile(filePath);
@@ -1873,7 +2034,7 @@ var CliProvider = class {
1873
2034
  );
1874
2035
  if (this.verbose) {
1875
2036
  console.log(
1876
- `[cli-provider:${this.targetName}] (healthcheck) CLI_EVALS_DIR=${process.env.CLI_EVALS_DIR ?? ""} cwd=${healthcheck.cwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
2037
+ `[cli-provider:${this.targetName}] (healthcheck) cwd=${healthcheck.cwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
1877
2038
  );
1878
2039
  }
1879
2040
  const result = await this.runCommand(renderedCommand, {
@@ -1941,11 +2102,11 @@ function shellEscape(value) {
1941
2102
  }
1942
2103
  return `'${value.replace(/'/g, `'"'"'`)}'`;
1943
2104
  }
1944
- function generateOutputFilePath(evalCaseId) {
2105
+ function generateOutputFilePath(evalCaseId, extension = ".json") {
1945
2106
  const safeEvalId = evalCaseId || "unknown";
1946
2107
  const timestamp = Date.now();
1947
2108
  const random = Math.random().toString(36).substring(2, 9);
1948
- return path7.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
2109
+ return path7.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
1949
2110
  }
1950
2111
  function formatTimeoutSuffix(timeoutMs) {
1951
2112
  if (!timeoutMs || timeoutMs <= 0) {
@@ -3489,6 +3650,7 @@ var CodeEvaluator = class {
3489
3650
  {
3490
3651
  question: context.evalCase.question,
3491
3652
  expected_outcome: context.evalCase.expected_outcome,
3653
+ expected_messages: context.evalCase.expected_messages,
3492
3654
  reference_answer: context.evalCase.reference_answer,
3493
3655
  candidate_answer: context.candidate,
3494
3656
  guideline_files: context.evalCase.guideline_paths,