agentv 3.7.0 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/{chunk-7YS6YNJZ.js → chunk-GC5P5HHZ.js} +127 -46
- package/dist/chunk-GC5P5HHZ.js.map +1 -0
- package/dist/{chunk-TR6H437M.js → chunk-Q2YWV4QM.js} +21 -21
- package/dist/chunk-Q2YWV4QM.js.map +1 -0
- package/dist/{chunk-XGG64VIY.js → chunk-TXDPYXHY.js} +636 -892
- package/dist/chunk-TXDPYXHY.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-VP6AXX6B.js → dist-PIOSPBKX.js} +2 -4
- package/dist/index.js +3 -3
- package/dist/{interactive-F6XECJ33.js → interactive-3VTDK5NX.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-7YS6YNJZ.js.map +0 -1
- package/dist/chunk-TR6H437M.js.map +0 -1
- package/dist/chunk-XGG64VIY.js.map +0 -1
- /package/dist/{dist-VP6AXX6B.js.map → dist-PIOSPBKX.js.map} +0 -0
- /package/dist/{interactive-F6XECJ33.js.map → interactive-3VTDK5NX.js.map} +0 -0
|
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
|
|
|
301
301
|
}
|
|
302
302
|
});
|
|
303
303
|
|
|
304
|
-
// ../../packages/core/dist/chunk-
|
|
304
|
+
// ../../packages/core/dist/chunk-PC5TLJF6.js
|
|
305
305
|
import { constants } from "node:fs";
|
|
306
306
|
import { access, readFile } from "node:fs/promises";
|
|
307
307
|
import path from "node:path";
|
|
@@ -419,7 +419,11 @@ __export(external_exports2, {
|
|
|
419
419
|
void: () => voidType
|
|
420
420
|
});
|
|
421
421
|
|
|
422
|
-
// ../../packages/core/dist/chunk-
|
|
422
|
+
// ../../packages/core/dist/chunk-PC5TLJF6.js
|
|
423
|
+
import { readFile as readFile2 } from "node:fs/promises";
|
|
424
|
+
import path3 from "node:path";
|
|
425
|
+
import fg from "fast-glob";
|
|
426
|
+
import { parse as parseYaml } from "yaml";
|
|
423
427
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
424
428
|
var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
|
|
425
429
|
var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
|
|
@@ -769,7 +773,6 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
769
773
|
var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
|
|
770
774
|
"PROMPT",
|
|
771
775
|
"PROMPT_FILE",
|
|
772
|
-
"GUIDELINES",
|
|
773
776
|
"EVAL_ID",
|
|
774
777
|
"ATTEMPT",
|
|
775
778
|
"FILES",
|
|
@@ -1595,8 +1598,8 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
1595
1598
|
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
1596
1599
|
if (!parseResult.success) {
|
|
1597
1600
|
const firstError = parseResult.error.errors[0];
|
|
1598
|
-
const
|
|
1599
|
-
const prefix =
|
|
1601
|
+
const path44 = firstError?.path.join(".") || "";
|
|
1602
|
+
const prefix = path44 ? `${target.name} ${path44}: ` : `${target.name}: `;
|
|
1600
1603
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
1601
1604
|
}
|
|
1602
1605
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
@@ -1802,6 +1805,135 @@ function resolveOptionalNumberArray(source, description) {
|
|
|
1802
1805
|
}
|
|
1803
1806
|
return resolved.length > 0 ? resolved : void 0;
|
|
1804
1807
|
}
|
|
1808
|
+
var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
1809
|
+
function interpolateEnv(value, env) {
|
|
1810
|
+
if (typeof value === "string") {
|
|
1811
|
+
return value.replace(ENV_VAR_PATTERN, (_, varName) => env[varName] ?? "");
|
|
1812
|
+
}
|
|
1813
|
+
if (Array.isArray(value)) {
|
|
1814
|
+
return value.map((item) => interpolateEnv(item, env));
|
|
1815
|
+
}
|
|
1816
|
+
if (value !== null && typeof value === "object") {
|
|
1817
|
+
const result = {};
|
|
1818
|
+
for (const [key, val] of Object.entries(value)) {
|
|
1819
|
+
result[key] = interpolateEnv(val, env);
|
|
1820
|
+
}
|
|
1821
|
+
return result;
|
|
1822
|
+
}
|
|
1823
|
+
return value;
|
|
1824
|
+
}
|
|
1825
|
+
var ANSI_YELLOW = "\x1B[33m";
|
|
1826
|
+
var ANSI_RESET = "\x1B[0m";
|
|
1827
|
+
var FILE_PROTOCOL = "file://";
|
|
1828
|
+
function isFileReference(value) {
|
|
1829
|
+
return typeof value === "string" && value.startsWith(FILE_PROTOCOL);
|
|
1830
|
+
}
|
|
1831
|
+
function extractFilePath(ref) {
|
|
1832
|
+
return ref.slice(FILE_PROTOCOL.length);
|
|
1833
|
+
}
|
|
1834
|
+
function isGlobPattern(filePath) {
|
|
1835
|
+
return filePath.includes("*") || filePath.includes("?") || filePath.includes("{");
|
|
1836
|
+
}
|
|
1837
|
+
function parseYamlCases(content, filePath) {
|
|
1838
|
+
const raw = parseYaml(content);
|
|
1839
|
+
const parsed = interpolateEnv(raw, process.env);
|
|
1840
|
+
if (!Array.isArray(parsed)) {
|
|
1841
|
+
throw new Error(
|
|
1842
|
+
`External test file must contain a YAML array, got ${typeof parsed}: ${filePath}`
|
|
1843
|
+
);
|
|
1844
|
+
}
|
|
1845
|
+
const results = [];
|
|
1846
|
+
for (const item of parsed) {
|
|
1847
|
+
if (!isJsonObject(item)) {
|
|
1848
|
+
throw new Error(`External test file contains non-object entry: ${filePath}`);
|
|
1849
|
+
}
|
|
1850
|
+
results.push(item);
|
|
1851
|
+
}
|
|
1852
|
+
return results;
|
|
1853
|
+
}
|
|
1854
|
+
function parseJsonlCases(content, filePath) {
|
|
1855
|
+
const lines = content.split("\n");
|
|
1856
|
+
const results = [];
|
|
1857
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1858
|
+
const line = lines[i].trim();
|
|
1859
|
+
if (line === "") continue;
|
|
1860
|
+
try {
|
|
1861
|
+
const raw = JSON.parse(line);
|
|
1862
|
+
const parsed = interpolateEnv(raw, process.env);
|
|
1863
|
+
if (!isJsonObject(parsed)) {
|
|
1864
|
+
throw new Error("Expected JSON object");
|
|
1865
|
+
}
|
|
1866
|
+
results.push(parsed);
|
|
1867
|
+
} catch (error) {
|
|
1868
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1869
|
+
throw new Error(`Malformed JSONL at line ${i + 1}: ${message}
|
|
1870
|
+
File: ${filePath}`);
|
|
1871
|
+
}
|
|
1872
|
+
}
|
|
1873
|
+
return results;
|
|
1874
|
+
}
|
|
1875
|
+
async function loadCasesFromFile(filePath) {
|
|
1876
|
+
const ext = path3.extname(filePath).toLowerCase();
|
|
1877
|
+
let content;
|
|
1878
|
+
try {
|
|
1879
|
+
content = await readFile2(filePath, "utf8");
|
|
1880
|
+
} catch (error) {
|
|
1881
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1882
|
+
throw new Error(`Cannot read external test file: ${filePath}
|
|
1883
|
+
${message}`);
|
|
1884
|
+
}
|
|
1885
|
+
if (content.trim() === "") {
|
|
1886
|
+
console.warn(
|
|
1887
|
+
`${ANSI_YELLOW}Warning: External test file is empty, skipping: ${filePath}${ANSI_RESET}`
|
|
1888
|
+
);
|
|
1889
|
+
return [];
|
|
1890
|
+
}
|
|
1891
|
+
if (ext === ".yaml" || ext === ".yml") {
|
|
1892
|
+
return parseYamlCases(content, filePath);
|
|
1893
|
+
}
|
|
1894
|
+
if (ext === ".jsonl") {
|
|
1895
|
+
return parseJsonlCases(content, filePath);
|
|
1896
|
+
}
|
|
1897
|
+
throw new Error(
|
|
1898
|
+
`Unsupported external test file format '${ext}': ${filePath}. Supported: .yaml, .yml, .jsonl`
|
|
1899
|
+
);
|
|
1900
|
+
}
|
|
1901
|
+
async function resolveFileReference2(ref, evalFileDir) {
|
|
1902
|
+
const rawPath = extractFilePath(ref);
|
|
1903
|
+
const absolutePattern = path3.resolve(evalFileDir, rawPath);
|
|
1904
|
+
if (isGlobPattern(rawPath)) {
|
|
1905
|
+
const matches = await fg(absolutePattern.replaceAll("\\", "/"), {
|
|
1906
|
+
onlyFiles: true,
|
|
1907
|
+
absolute: true
|
|
1908
|
+
});
|
|
1909
|
+
if (matches.length === 0) {
|
|
1910
|
+
console.warn(
|
|
1911
|
+
`${ANSI_YELLOW}Warning: Glob pattern matched no files: ${ref} (resolved to ${absolutePattern})${ANSI_RESET}`
|
|
1912
|
+
);
|
|
1913
|
+
return [];
|
|
1914
|
+
}
|
|
1915
|
+
matches.sort();
|
|
1916
|
+
const allCases = [];
|
|
1917
|
+
for (const match of matches) {
|
|
1918
|
+
const cases = await loadCasesFromFile(match);
|
|
1919
|
+
allCases.push(...cases);
|
|
1920
|
+
}
|
|
1921
|
+
return allCases;
|
|
1922
|
+
}
|
|
1923
|
+
return loadCasesFromFile(absolutePattern);
|
|
1924
|
+
}
|
|
1925
|
+
async function expandFileReferences(tests, evalFileDir) {
|
|
1926
|
+
const expanded = [];
|
|
1927
|
+
for (const entry of tests) {
|
|
1928
|
+
if (isFileReference(entry)) {
|
|
1929
|
+
const cases = await resolveFileReference2(entry, evalFileDir);
|
|
1930
|
+
expanded.push(...cases);
|
|
1931
|
+
} else {
|
|
1932
|
+
expanded.push(entry);
|
|
1933
|
+
}
|
|
1934
|
+
}
|
|
1935
|
+
return expanded;
|
|
1936
|
+
}
|
|
1805
1937
|
var AGENT_PROVIDER_KINDS = [
|
|
1806
1938
|
"codex",
|
|
1807
1939
|
"copilot-sdk",
|
|
@@ -1876,36 +2008,29 @@ function isAgentProvider(provider) {
|
|
|
1876
2008
|
}
|
|
1877
2009
|
|
|
1878
2010
|
// ../../packages/core/dist/index.js
|
|
1879
|
-
import { readFile as
|
|
1880
|
-
import
|
|
1881
|
-
import
|
|
2011
|
+
import { readFile as readFile6 } from "node:fs/promises";
|
|
2012
|
+
import path7 from "node:path";
|
|
2013
|
+
import micromatch2 from "micromatch";
|
|
1882
2014
|
import { parse as parse2 } from "yaml";
|
|
1883
|
-
import { readFile as readFile2 } from "node:fs/promises";
|
|
1884
|
-
import path3 from "node:path";
|
|
1885
|
-
import { readFile as readFile22 } from "node:fs/promises";
|
|
1886
|
-
import path22 from "node:path";
|
|
1887
|
-
import fg from "fast-glob";
|
|
1888
|
-
import { parse as parseYaml } from "yaml";
|
|
1889
2015
|
import { readFile as readFile3 } from "node:fs/promises";
|
|
1890
2016
|
import path4 from "node:path";
|
|
1891
|
-
import
|
|
2017
|
+
import { readFile as readFile22 } from "node:fs/promises";
|
|
2018
|
+
import path32 from "node:path";
|
|
1892
2019
|
import { parse } from "yaml";
|
|
1893
2020
|
import { constants as constants2 } from "node:fs";
|
|
1894
2021
|
import { access as access2 } from "node:fs/promises";
|
|
1895
|
-
import
|
|
2022
|
+
import path22 from "node:path";
|
|
1896
2023
|
import { fileURLToPath } from "node:url";
|
|
1897
|
-
import
|
|
1898
|
-
import { readFile as
|
|
1899
|
-
import { readFile as readFile6 } from "node:fs/promises";
|
|
1900
|
-
import path7 from "node:path";
|
|
1901
|
-
import micromatch2 from "micromatch";
|
|
1902
|
-
import { parse as parseYaml2 } from "yaml";
|
|
2024
|
+
import path42 from "node:path";
|
|
2025
|
+
import { readFile as readFile32 } from "node:fs/promises";
|
|
1903
2026
|
import { readFile as readFile5 } from "node:fs/promises";
|
|
1904
2027
|
import path6 from "node:path";
|
|
1905
|
-
import
|
|
1906
|
-
import
|
|
2028
|
+
import micromatch from "micromatch";
|
|
2029
|
+
import { parse as parseYaml2 } from "yaml";
|
|
2030
|
+
import { readFile as readFile4 } from "node:fs/promises";
|
|
2031
|
+
import path5 from "node:path";
|
|
1907
2032
|
import { readFileSync } from "node:fs";
|
|
1908
|
-
import
|
|
2033
|
+
import path8 from "node:path";
|
|
1909
2034
|
import { parse as parse3 } from "yaml";
|
|
1910
2035
|
import { createOpenAI } from "@ai-sdk/openai";
|
|
1911
2036
|
|
|
@@ -6379,7 +6504,7 @@ function createOpenRouter(options = {}) {
|
|
|
6379
6504
|
);
|
|
6380
6505
|
const createChatModel = (modelId, settings = {}) => new OpenRouterChatLanguageModel(modelId, settings, {
|
|
6381
6506
|
provider: "openrouter.chat",
|
|
6382
|
-
url: ({ path:
|
|
6507
|
+
url: ({ path: path44 }) => `${baseURL}${path44}`,
|
|
6383
6508
|
headers: getHeaders,
|
|
6384
6509
|
compatibility,
|
|
6385
6510
|
fetch: options.fetch,
|
|
@@ -6387,7 +6512,7 @@ function createOpenRouter(options = {}) {
|
|
|
6387
6512
|
});
|
|
6388
6513
|
const createCompletionModel = (modelId, settings = {}) => new OpenRouterCompletionLanguageModel(modelId, settings, {
|
|
6389
6514
|
provider: "openrouter.completion",
|
|
6390
|
-
url: ({ path:
|
|
6515
|
+
url: ({ path: path44 }) => `${baseURL}${path44}`,
|
|
6391
6516
|
headers: getHeaders,
|
|
6392
6517
|
compatibility,
|
|
6393
6518
|
fetch: options.fetch,
|
|
@@ -6395,14 +6520,14 @@ function createOpenRouter(options = {}) {
|
|
|
6395
6520
|
});
|
|
6396
6521
|
const createEmbeddingModel = (modelId, settings = {}) => new OpenRouterEmbeddingModel(modelId, settings, {
|
|
6397
6522
|
provider: "openrouter.embedding",
|
|
6398
|
-
url: ({ path:
|
|
6523
|
+
url: ({ path: path44 }) => `${baseURL}${path44}`,
|
|
6399
6524
|
headers: getHeaders,
|
|
6400
6525
|
fetch: options.fetch,
|
|
6401
6526
|
extraBody: options.extraBody
|
|
6402
6527
|
});
|
|
6403
6528
|
const createImageModel = (modelId, settings = {}) => new OpenRouterImageModel(modelId, settings, {
|
|
6404
6529
|
provider: "openrouter.image",
|
|
6405
|
-
url: ({ path:
|
|
6530
|
+
url: ({ path: path44 }) => `${baseURL}${path44}`,
|
|
6406
6531
|
headers: getHeaders,
|
|
6407
6532
|
fetch: options.fetch,
|
|
6408
6533
|
extraBody: options.extraBody
|
|
@@ -12398,24 +12523,24 @@ import { spawn } from "node:child_process";
|
|
|
12398
12523
|
import { randomUUID } from "node:crypto";
|
|
12399
12524
|
import { createWriteStream } from "node:fs";
|
|
12400
12525
|
import { mkdir } from "node:fs/promises";
|
|
12401
|
-
import
|
|
12402
|
-
import
|
|
12526
|
+
import path10 from "node:path";
|
|
12527
|
+
import path9 from "node:path";
|
|
12403
12528
|
import { randomUUID as randomUUID2 } from "node:crypto";
|
|
12404
12529
|
import { createWriteStream as createWriteStream2 } from "node:fs";
|
|
12405
12530
|
import { mkdir as mkdir2 } from "node:fs/promises";
|
|
12406
|
-
import
|
|
12531
|
+
import path11 from "node:path";
|
|
12407
12532
|
import { exec as execWithCallback } from "node:child_process";
|
|
12408
12533
|
import fs from "node:fs/promises";
|
|
12409
12534
|
import os from "node:os";
|
|
12410
|
-
import
|
|
12535
|
+
import path12 from "node:path";
|
|
12411
12536
|
import { promisify } from "node:util";
|
|
12412
12537
|
import { randomUUID as randomUUID3 } from "node:crypto";
|
|
12413
12538
|
import { createWriteStream as createWriteStream3 } from "node:fs";
|
|
12414
12539
|
import { mkdir as mkdir3 } from "node:fs/promises";
|
|
12415
|
-
import
|
|
12540
|
+
import path13 from "node:path";
|
|
12416
12541
|
import { randomUUID as randomUUID5 } from "node:crypto";
|
|
12417
12542
|
import { mkdir as mkdir4 } from "node:fs/promises";
|
|
12418
|
-
import
|
|
12543
|
+
import path15 from "node:path";
|
|
12419
12544
|
import { Readable, Writable } from "node:stream";
|
|
12420
12545
|
import { spawn as spawn2 } from "node:child_process";
|
|
12421
12546
|
|
|
@@ -13900,48 +14025,48 @@ var RequestError = class _RequestError extends Error {
|
|
|
13900
14025
|
import { randomUUID as randomUUID4 } from "node:crypto";
|
|
13901
14026
|
import { createWriteStream as createWriteStream4, existsSync, readdirSync } from "node:fs";
|
|
13902
14027
|
import { arch, platform } from "node:os";
|
|
13903
|
-
import
|
|
14028
|
+
import path14 from "node:path";
|
|
13904
14029
|
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
13905
14030
|
import { randomUUID as randomUUID6 } from "node:crypto";
|
|
13906
14031
|
import { mkdir as mkdir5 } from "node:fs/promises";
|
|
13907
|
-
import
|
|
14032
|
+
import path16 from "node:path";
|
|
13908
14033
|
import { spawn as spawn3 } from "node:child_process";
|
|
13909
14034
|
import { randomUUID as randomUUID7 } from "node:crypto";
|
|
13910
14035
|
import { createWriteStream as createWriteStream5 } from "node:fs";
|
|
13911
14036
|
import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
13912
14037
|
import { tmpdir } from "node:os";
|
|
13913
|
-
import
|
|
14038
|
+
import path17 from "node:path";
|
|
13914
14039
|
import { exec as exec2 } from "node:child_process";
|
|
13915
14040
|
import { constants as constants3, access as access3, stat as stat4 } from "node:fs/promises";
|
|
13916
|
-
import
|
|
14041
|
+
import path29 from "node:path";
|
|
13917
14042
|
import { promisify as promisify3 } from "node:util";
|
|
13918
14043
|
import { stat as stat3, writeFile as writeFile4 } from "node:fs/promises";
|
|
13919
|
-
import
|
|
14044
|
+
import path27 from "node:path";
|
|
13920
14045
|
import { constants as constants22 } from "node:fs";
|
|
13921
14046
|
import { access as access22, mkdir as mkdir7, readdir, rm as rm2, stat } from "node:fs/promises";
|
|
14047
|
+
import path18 from "node:path";
|
|
14048
|
+
import path19 from "node:path";
|
|
13922
14049
|
import path20 from "node:path";
|
|
14050
|
+
import { readFile as readFile7 } from "node:fs/promises";
|
|
13923
14051
|
import path21 from "node:path";
|
|
13924
|
-
import path222 from "node:path";
|
|
13925
|
-
import { readFile as readFile9 } from "node:fs/promises";
|
|
13926
|
-
import path23 from "node:path";
|
|
13927
14052
|
import { exec, spawn as spawn4 } from "node:child_process";
|
|
13928
14053
|
import { mkdir as mkdir8, writeFile as writeFile2 } from "node:fs/promises";
|
|
13929
|
-
import
|
|
14054
|
+
import path24 from "node:path";
|
|
13930
14055
|
import { promisify as promisify2 } from "node:util";
|
|
13931
|
-
import
|
|
14056
|
+
import path23 from "node:path";
|
|
13932
14057
|
import os2 from "node:os";
|
|
13933
|
-
import
|
|
13934
|
-
import { copyFile, mkdir as mkdir9, readFile as
|
|
13935
|
-
import
|
|
13936
|
-
import
|
|
14058
|
+
import path222 from "node:path";
|
|
14059
|
+
import { copyFile, mkdir as mkdir9, readFile as readFile8, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
|
|
14060
|
+
import path26 from "node:path";
|
|
14061
|
+
import path25 from "node:path";
|
|
13937
14062
|
import JSON5 from "json5";
|
|
13938
14063
|
import { writeFile as writeFile5 } from "node:fs/promises";
|
|
13939
|
-
import
|
|
14064
|
+
import path28 from "node:path";
|
|
13940
14065
|
import { constants as constants4 } from "node:fs";
|
|
13941
|
-
import { access as access4, readFile as
|
|
13942
|
-
import
|
|
14066
|
+
import { access as access4, readFile as readFile9 } from "node:fs/promises";
|
|
14067
|
+
import path30 from "node:path";
|
|
13943
14068
|
import { parse as parse4 } from "yaml";
|
|
13944
|
-
import
|
|
14069
|
+
import path31 from "node:path";
|
|
13945
14070
|
import fg2 from "fast-glob";
|
|
13946
14071
|
import { mkdtemp as mkdtemp2, rm as rm3, writeFile as writeFile6 } from "node:fs/promises";
|
|
13947
14072
|
import { tmpdir as tmpdir2 } from "node:os";
|
|
@@ -13949,38 +14074,38 @@ import { dirname, join } from "node:path";
|
|
|
13949
14074
|
import { randomBytes } from "node:crypto";
|
|
13950
14075
|
import { createServer } from "node:http";
|
|
13951
14076
|
import fs2 from "node:fs/promises";
|
|
13952
|
-
import
|
|
14077
|
+
import path322 from "node:path";
|
|
13953
14078
|
import { createHash as createHash2, randomUUID as randomUUID8 } from "node:crypto";
|
|
13954
14079
|
import { copyFile as copyFile2, mkdir as mkdir13, readdir as readdir6, stat as stat7 } from "node:fs/promises";
|
|
13955
|
-
import
|
|
13956
|
-
import
|
|
14080
|
+
import path41 from "node:path";
|
|
14081
|
+
import micromatch3 from "micromatch";
|
|
14082
|
+
import path33 from "node:path";
|
|
14083
|
+
import path34 from "node:path";
|
|
14084
|
+
import fg22 from "fast-glob";
|
|
13957
14085
|
import path35 from "node:path";
|
|
13958
|
-
import path36 from "node:path";
|
|
13959
14086
|
import fg3 from "fast-glob";
|
|
13960
|
-
import path37 from "node:path";
|
|
13961
|
-
import fg4 from "fast-glob";
|
|
13962
14087
|
import { exec as execCallback } from "node:child_process";
|
|
13963
14088
|
import { readdirSync as readdirSync2, statSync } from "node:fs";
|
|
13964
|
-
import
|
|
14089
|
+
import path36 from "node:path";
|
|
13965
14090
|
import { promisify as promisify4 } from "node:util";
|
|
13966
14091
|
import { cp, mkdir as mkdir11, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
|
|
13967
|
-
import
|
|
14092
|
+
import path37 from "node:path";
|
|
13968
14093
|
import { execFile } from "node:child_process";
|
|
13969
14094
|
import { createHash } from "node:crypto";
|
|
13970
14095
|
import { existsSync as existsSync2 } from "node:fs";
|
|
13971
|
-
import { cp as cp2, mkdir as mkdir12, readFile as
|
|
13972
|
-
import
|
|
14096
|
+
import { cp as cp2, mkdir as mkdir12, readFile as readFile10, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
14097
|
+
import path38 from "node:path";
|
|
13973
14098
|
import { promisify as promisify5 } from "node:util";
|
|
13974
14099
|
import { execFile as execFile2 } from "node:child_process";
|
|
13975
14100
|
import { existsSync as existsSync3 } from "node:fs";
|
|
13976
|
-
import
|
|
14101
|
+
import path39 from "node:path";
|
|
13977
14102
|
import { promisify as promisify6 } from "node:util";
|
|
13978
14103
|
import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
|
|
13979
|
-
import
|
|
14104
|
+
import path40 from "node:path";
|
|
13980
14105
|
import { existsSync as existsSync4 } from "node:fs";
|
|
13981
|
-
import
|
|
13982
|
-
import { mkdir as mkdir14, readFile as
|
|
13983
|
-
import
|
|
14106
|
+
import path422 from "node:path";
|
|
14107
|
+
import { mkdir as mkdir14, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
|
|
14108
|
+
import path43 from "node:path";
|
|
13984
14109
|
function computeTraceSummary(messages) {
|
|
13985
14110
|
const toolCallCounts = {};
|
|
13986
14111
|
const toolDurations = {};
|
|
@@ -14097,27 +14222,10 @@ function mergeExecutionMetrics(computed, metrics) {
|
|
|
14097
14222
|
endTime: metrics.endTime ?? computed.endTime
|
|
14098
14223
|
};
|
|
14099
14224
|
}
|
|
14100
|
-
var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
14101
|
-
function interpolateEnv(value, env) {
|
|
14102
|
-
if (typeof value === "string") {
|
|
14103
|
-
return value.replace(ENV_VAR_PATTERN, (_, varName) => env[varName] ?? "");
|
|
14104
|
-
}
|
|
14105
|
-
if (Array.isArray(value)) {
|
|
14106
|
-
return value.map((item) => interpolateEnv(item, env));
|
|
14107
|
-
}
|
|
14108
|
-
if (value !== null && typeof value === "object") {
|
|
14109
|
-
const result = {};
|
|
14110
|
-
for (const [key, val] of Object.entries(value)) {
|
|
14111
|
-
result[key] = interpolateEnv(val, env);
|
|
14112
|
-
}
|
|
14113
|
-
return result;
|
|
14114
|
-
}
|
|
14115
|
-
return value;
|
|
14116
|
-
}
|
|
14117
14225
|
var ANSI_RED = "\x1B[31m";
|
|
14118
|
-
var
|
|
14226
|
+
var ANSI_RESET2 = "\x1B[0m";
|
|
14119
14227
|
function logError(msg) {
|
|
14120
|
-
console.error(`${ANSI_RED}Error: ${msg}${
|
|
14228
|
+
console.error(`${ANSI_RED}Error: ${msg}${ANSI_RESET2}`);
|
|
14121
14229
|
}
|
|
14122
14230
|
function isAgentSkillsFormat(parsed) {
|
|
14123
14231
|
if (typeof parsed !== "object" || parsed === null) return false;
|
|
@@ -14125,184 +14233,71 @@ function isAgentSkillsFormat(parsed) {
|
|
|
14125
14233
|
return Array.isArray(obj.evals);
|
|
14126
14234
|
}
|
|
14127
14235
|
async function loadTestsFromAgentSkills(filePath) {
|
|
14128
|
-
const raw = await
|
|
14236
|
+
const raw = await readFile3(filePath, "utf8");
|
|
14129
14237
|
let parsed;
|
|
14130
14238
|
try {
|
|
14131
14239
|
parsed = JSON.parse(raw);
|
|
14132
|
-
} catch {
|
|
14133
|
-
throw new Error(`Invalid Agent Skills evals.json: failed to parse JSON in '${filePath}'`);
|
|
14134
|
-
}
|
|
14135
|
-
return parseAgentSkillsEvals(parsed, filePath, path3.dirname(path3.resolve(filePath)));
|
|
14136
|
-
}
|
|
14137
|
-
function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
14138
|
-
if (!isAgentSkillsFormat(parsed)) {
|
|
14139
|
-
throw new Error(`Invalid Agent Skills evals.json: missing 'evals' array in '${source}'`);
|
|
14140
|
-
}
|
|
14141
|
-
const { evals, skill_name } = parsed;
|
|
14142
|
-
if (evals.length === 0) {
|
|
14143
|
-
throw new Error(`Invalid Agent Skills evals.json: 'evals' array is empty in '${source}'`);
|
|
14144
|
-
}
|
|
14145
|
-
const tests = [];
|
|
14146
|
-
for (const evalCase of evals) {
|
|
14147
|
-
const id = evalCase.id;
|
|
14148
|
-
if (typeof evalCase.prompt !== "string" || evalCase.prompt.trim() === "") {
|
|
14149
|
-
const caseRef = id !== void 0 ? `id=${id}` : "unknown";
|
|
14150
|
-
logError(`Skipping eval case ${caseRef} in '${source}': missing or empty 'prompt'`);
|
|
14151
|
-
continue;
|
|
14152
|
-
}
|
|
14153
|
-
let assertions;
|
|
14154
|
-
if (evalCase.assertions && evalCase.assertions.length > 0) {
|
|
14155
|
-
assertions = evalCase.assertions.map(
|
|
14156
|
-
(text2, i) => ({
|
|
14157
|
-
name: `assertion-${i + 1}`,
|
|
14158
|
-
type: "llm-grader",
|
|
14159
|
-
prompt: text2
|
|
14160
|
-
})
|
|
14161
|
-
);
|
|
14162
|
-
}
|
|
14163
|
-
const metadata = {};
|
|
14164
|
-
if (skill_name) {
|
|
14165
|
-
metadata.skill_name = skill_name;
|
|
14166
|
-
}
|
|
14167
|
-
const filePaths = [];
|
|
14168
|
-
if (evalCase.files && evalCase.files.length > 0) {
|
|
14169
|
-
metadata.agent_skills_files = evalCase.files;
|
|
14170
|
-
if (baseDir) {
|
|
14171
|
-
metadata.agent_skills_base_dir = baseDir;
|
|
14172
|
-
for (const file of evalCase.files) {
|
|
14173
|
-
filePaths.push(path3.resolve(baseDir, file));
|
|
14174
|
-
}
|
|
14175
|
-
}
|
|
14176
|
-
}
|
|
14177
|
-
const prompt = evalCase.prompt;
|
|
14178
|
-
const test = {
|
|
14179
|
-
id: String(id),
|
|
14180
|
-
question: prompt,
|
|
14181
|
-
input: [{ role: "user", content: prompt }],
|
|
14182
|
-
input_segments: [{ type: "text", value: prompt }],
|
|
14183
|
-
expected_output: evalCase.expected_output ? [{ role: "assistant", content: evalCase.expected_output }] : [],
|
|
14184
|
-
reference_answer: evalCase.expected_output,
|
|
14185
|
-
guideline_paths: [],
|
|
14186
|
-
file_paths: filePaths,
|
|
14187
|
-
criteria: evalCase.expected_output ?? "",
|
|
14188
|
-
assertions,
|
|
14189
|
-
...Object.keys(metadata).length > 0 ? { metadata } : {}
|
|
14190
|
-
};
|
|
14191
|
-
tests.push(test);
|
|
14192
|
-
}
|
|
14193
|
-
return tests;
|
|
14194
|
-
}
|
|
14195
|
-
var ANSI_YELLOW = "\x1B[33m";
|
|
14196
|
-
var ANSI_RESET2 = "\x1B[0m";
|
|
14197
|
-
var FILE_PROTOCOL = "file://";
|
|
14198
|
-
function isFileReference(value) {
|
|
14199
|
-
return typeof value === "string" && value.startsWith(FILE_PROTOCOL);
|
|
14200
|
-
}
|
|
14201
|
-
function extractFilePath(ref) {
|
|
14202
|
-
return ref.slice(FILE_PROTOCOL.length);
|
|
14203
|
-
}
|
|
14204
|
-
function isGlobPattern(filePath) {
|
|
14205
|
-
return filePath.includes("*") || filePath.includes("?") || filePath.includes("{");
|
|
14206
|
-
}
|
|
14207
|
-
function parseYamlCases(content, filePath) {
|
|
14208
|
-
const raw = parseYaml(content);
|
|
14209
|
-
const parsed = interpolateEnv(raw, process.env);
|
|
14210
|
-
if (!Array.isArray(parsed)) {
|
|
14211
|
-
throw new Error(
|
|
14212
|
-
`External test file must contain a YAML array, got ${typeof parsed}: ${filePath}`
|
|
14213
|
-
);
|
|
14214
|
-
}
|
|
14215
|
-
const results = [];
|
|
14216
|
-
for (const item of parsed) {
|
|
14217
|
-
if (!isJsonObject(item)) {
|
|
14218
|
-
throw new Error(`External test file contains non-object entry: ${filePath}`);
|
|
14219
|
-
}
|
|
14220
|
-
results.push(item);
|
|
14221
|
-
}
|
|
14222
|
-
return results;
|
|
14223
|
-
}
|
|
14224
|
-
function parseJsonlCases(content, filePath) {
|
|
14225
|
-
const lines = content.split("\n");
|
|
14226
|
-
const results = [];
|
|
14227
|
-
for (let i = 0; i < lines.length; i++) {
|
|
14228
|
-
const line = lines[i].trim();
|
|
14229
|
-
if (line === "") continue;
|
|
14230
|
-
try {
|
|
14231
|
-
const raw = JSON.parse(line);
|
|
14232
|
-
const parsed = interpolateEnv(raw, process.env);
|
|
14233
|
-
if (!isJsonObject(parsed)) {
|
|
14234
|
-
throw new Error("Expected JSON object");
|
|
14235
|
-
}
|
|
14236
|
-
results.push(parsed);
|
|
14237
|
-
} catch (error) {
|
|
14238
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
14239
|
-
throw new Error(`Malformed JSONL at line ${i + 1}: ${message}
|
|
14240
|
-
File: ${filePath}`);
|
|
14241
|
-
}
|
|
14242
|
-
}
|
|
14243
|
-
return results;
|
|
14244
|
-
}
|
|
14245
|
-
async function loadCasesFromFile(filePath) {
|
|
14246
|
-
const ext = path22.extname(filePath).toLowerCase();
|
|
14247
|
-
let content;
|
|
14248
|
-
try {
|
|
14249
|
-
content = await readFile22(filePath, "utf8");
|
|
14250
|
-
} catch (error) {
|
|
14251
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
14252
|
-
throw new Error(`Cannot read external test file: ${filePath}
|
|
14253
|
-
${message}`);
|
|
14254
|
-
}
|
|
14255
|
-
if (content.trim() === "") {
|
|
14256
|
-
console.warn(
|
|
14257
|
-
`${ANSI_YELLOW}Warning: External test file is empty, skipping: ${filePath}${ANSI_RESET2}`
|
|
14258
|
-
);
|
|
14259
|
-
return [];
|
|
14240
|
+
} catch {
|
|
14241
|
+
throw new Error(`Invalid Agent Skills evals.json: failed to parse JSON in '${filePath}'`);
|
|
14260
14242
|
}
|
|
14261
|
-
|
|
14262
|
-
|
|
14243
|
+
return parseAgentSkillsEvals(parsed, filePath, path4.dirname(path4.resolve(filePath)));
|
|
14244
|
+
}
|
|
14245
|
+
function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
14246
|
+
if (!isAgentSkillsFormat(parsed)) {
|
|
14247
|
+
throw new Error(`Invalid Agent Skills evals.json: missing 'evals' array in '${source}'`);
|
|
14263
14248
|
}
|
|
14264
|
-
|
|
14265
|
-
|
|
14249
|
+
const { evals, skill_name } = parsed;
|
|
14250
|
+
if (evals.length === 0) {
|
|
14251
|
+
throw new Error(`Invalid Agent Skills evals.json: 'evals' array is empty in '${source}'`);
|
|
14266
14252
|
}
|
|
14267
|
-
|
|
14268
|
-
|
|
14269
|
-
|
|
14270
|
-
|
|
14271
|
-
|
|
14272
|
-
|
|
14273
|
-
|
|
14274
|
-
|
|
14275
|
-
|
|
14276
|
-
|
|
14277
|
-
|
|
14278
|
-
|
|
14279
|
-
|
|
14280
|
-
|
|
14281
|
-
|
|
14253
|
+
const tests = [];
|
|
14254
|
+
for (const evalCase of evals) {
|
|
14255
|
+
const id = evalCase.id;
|
|
14256
|
+
if (typeof evalCase.prompt !== "string" || evalCase.prompt.trim() === "") {
|
|
14257
|
+
const caseRef = id !== void 0 ? `id=${id}` : "unknown";
|
|
14258
|
+
logError(`Skipping eval case ${caseRef} in '${source}': missing or empty 'prompt'`);
|
|
14259
|
+
continue;
|
|
14260
|
+
}
|
|
14261
|
+
let assertions;
|
|
14262
|
+
if (evalCase.assertions && evalCase.assertions.length > 0) {
|
|
14263
|
+
assertions = evalCase.assertions.map(
|
|
14264
|
+
(text2, i) => ({
|
|
14265
|
+
name: `assertion-${i + 1}`,
|
|
14266
|
+
type: "llm-grader",
|
|
14267
|
+
prompt: text2
|
|
14268
|
+
})
|
|
14282
14269
|
);
|
|
14283
|
-
return [];
|
|
14284
14270
|
}
|
|
14285
|
-
|
|
14286
|
-
|
|
14287
|
-
|
|
14288
|
-
const cases = await loadCasesFromFile(match);
|
|
14289
|
-
allCases.push(...cases);
|
|
14271
|
+
const metadata = {};
|
|
14272
|
+
if (skill_name) {
|
|
14273
|
+
metadata.skill_name = skill_name;
|
|
14290
14274
|
}
|
|
14291
|
-
|
|
14292
|
-
|
|
14293
|
-
|
|
14294
|
-
|
|
14295
|
-
|
|
14296
|
-
|
|
14297
|
-
|
|
14298
|
-
|
|
14299
|
-
|
|
14300
|
-
expanded.push(...cases);
|
|
14301
|
-
} else {
|
|
14302
|
-
expanded.push(entry);
|
|
14275
|
+
const filePaths = [];
|
|
14276
|
+
if (evalCase.files && evalCase.files.length > 0) {
|
|
14277
|
+
metadata.agent_skills_files = evalCase.files;
|
|
14278
|
+
if (baseDir) {
|
|
14279
|
+
metadata.agent_skills_base_dir = baseDir;
|
|
14280
|
+
for (const file of evalCase.files) {
|
|
14281
|
+
filePaths.push(path4.resolve(baseDir, file));
|
|
14282
|
+
}
|
|
14283
|
+
}
|
|
14303
14284
|
}
|
|
14285
|
+
const prompt = evalCase.prompt;
|
|
14286
|
+
const test = {
|
|
14287
|
+
id: String(id),
|
|
14288
|
+
question: prompt,
|
|
14289
|
+
input: [{ role: "user", content: prompt }],
|
|
14290
|
+
input_segments: [{ type: "text", value: prompt }],
|
|
14291
|
+
expected_output: evalCase.expected_output ? [{ role: "assistant", content: evalCase.expected_output }] : [],
|
|
14292
|
+
reference_answer: evalCase.expected_output,
|
|
14293
|
+
file_paths: filePaths,
|
|
14294
|
+
criteria: evalCase.expected_output ?? "",
|
|
14295
|
+
assertions,
|
|
14296
|
+
...Object.keys(metadata).length > 0 ? { metadata } : {}
|
|
14297
|
+
};
|
|
14298
|
+
tests.push(test);
|
|
14304
14299
|
}
|
|
14305
|
-
return
|
|
14300
|
+
return tests;
|
|
14306
14301
|
}
|
|
14307
14302
|
async function fileExists2(absolutePath) {
|
|
14308
14303
|
try {
|
|
@@ -14320,15 +14315,15 @@ function resolveToAbsolutePath(candidate) {
|
|
|
14320
14315
|
if (candidate.startsWith("file:")) {
|
|
14321
14316
|
return fileURLToPath(candidate);
|
|
14322
14317
|
}
|
|
14323
|
-
return
|
|
14318
|
+
return path22.resolve(candidate);
|
|
14324
14319
|
}
|
|
14325
14320
|
throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
|
|
14326
14321
|
}
|
|
14327
14322
|
function buildDirectoryChain2(filePath, repoRoot) {
|
|
14328
14323
|
const directories = [];
|
|
14329
14324
|
const seen = /* @__PURE__ */ new Set();
|
|
14330
|
-
const boundary =
|
|
14331
|
-
let current =
|
|
14325
|
+
const boundary = path22.resolve(repoRoot);
|
|
14326
|
+
let current = path22.resolve(path22.dirname(filePath));
|
|
14332
14327
|
while (current !== void 0) {
|
|
14333
14328
|
if (!seen.has(current)) {
|
|
14334
14329
|
directories.push(current);
|
|
@@ -14337,7 +14332,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
14337
14332
|
if (current === boundary) {
|
|
14338
14333
|
break;
|
|
14339
14334
|
}
|
|
14340
|
-
const parent =
|
|
14335
|
+
const parent = path22.dirname(current);
|
|
14341
14336
|
if (parent === current) {
|
|
14342
14337
|
break;
|
|
14343
14338
|
}
|
|
@@ -14351,16 +14346,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
14351
14346
|
function buildSearchRoots2(evalPath, repoRoot) {
|
|
14352
14347
|
const uniqueRoots = [];
|
|
14353
14348
|
const addRoot = (root) => {
|
|
14354
|
-
const normalized =
|
|
14349
|
+
const normalized = path22.resolve(root);
|
|
14355
14350
|
if (!uniqueRoots.includes(normalized)) {
|
|
14356
14351
|
uniqueRoots.push(normalized);
|
|
14357
14352
|
}
|
|
14358
14353
|
};
|
|
14359
|
-
let currentDir =
|
|
14354
|
+
let currentDir = path22.dirname(evalPath);
|
|
14360
14355
|
let reachedBoundary = false;
|
|
14361
14356
|
while (!reachedBoundary) {
|
|
14362
14357
|
addRoot(currentDir);
|
|
14363
|
-
const parentDir =
|
|
14358
|
+
const parentDir = path22.dirname(currentDir);
|
|
14364
14359
|
if (currentDir === repoRoot || parentDir === currentDir) {
|
|
14365
14360
|
reachedBoundary = true;
|
|
14366
14361
|
} else {
|
|
@@ -14375,19 +14370,19 @@ function trimLeadingSeparators2(value) {
|
|
|
14375
14370
|
const trimmed = value.replace(/^[/\\]+/, "");
|
|
14376
14371
|
return trimmed.length > 0 ? trimmed : value;
|
|
14377
14372
|
}
|
|
14378
|
-
async function
|
|
14373
|
+
async function resolveFileReference22(rawValue, searchRoots) {
|
|
14379
14374
|
const displayPath = trimLeadingSeparators2(rawValue);
|
|
14380
14375
|
const potentialPaths = [];
|
|
14381
|
-
if (
|
|
14382
|
-
potentialPaths.push(
|
|
14376
|
+
if (path22.isAbsolute(rawValue)) {
|
|
14377
|
+
potentialPaths.push(path22.normalize(rawValue));
|
|
14383
14378
|
}
|
|
14384
14379
|
for (const base of searchRoots) {
|
|
14385
|
-
potentialPaths.push(
|
|
14380
|
+
potentialPaths.push(path22.resolve(base, displayPath));
|
|
14386
14381
|
}
|
|
14387
14382
|
const attempted = [];
|
|
14388
14383
|
const seen = /* @__PURE__ */ new Set();
|
|
14389
14384
|
for (const candidate of potentialPaths) {
|
|
14390
|
-
const absoluteCandidate =
|
|
14385
|
+
const absoluteCandidate = path22.resolve(candidate);
|
|
14391
14386
|
if (seen.has(absoluteCandidate)) {
|
|
14392
14387
|
continue;
|
|
14393
14388
|
}
|
|
@@ -14400,7 +14395,7 @@ async function resolveFileReference3(rawValue, searchRoots) {
|
|
|
14400
14395
|
return { displayPath, attempted };
|
|
14401
14396
|
}
|
|
14402
14397
|
var ANSI_YELLOW2 = "\x1B[33m";
|
|
14403
|
-
var
|
|
14398
|
+
var ANSI_RESET22 = "\x1B[0m";
|
|
14404
14399
|
var DEFAULT_EVAL_PATTERNS = [
|
|
14405
14400
|
"**/evals/**/*.eval.yaml",
|
|
14406
14401
|
"**/evals/**/eval.yaml"
|
|
@@ -14408,12 +14403,12 @@ var DEFAULT_EVAL_PATTERNS = [
|
|
|
14408
14403
|
async function loadConfig(evalFilePath, repoRoot) {
|
|
14409
14404
|
const directories = buildDirectoryChain2(evalFilePath, repoRoot);
|
|
14410
14405
|
for (const directory of directories) {
|
|
14411
|
-
const configPath =
|
|
14406
|
+
const configPath = path32.join(directory, ".agentv", "config.yaml");
|
|
14412
14407
|
if (!await fileExists2(configPath)) {
|
|
14413
14408
|
continue;
|
|
14414
14409
|
}
|
|
14415
14410
|
try {
|
|
14416
|
-
const rawConfig = await
|
|
14411
|
+
const rawConfig = await readFile22(configPath, "utf8");
|
|
14417
14412
|
const parsed = parse(rawConfig);
|
|
14418
14413
|
if (!isJsonObject(parsed)) {
|
|
14419
14414
|
logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
|
|
@@ -14425,15 +14420,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
14425
14420
|
logWarning(`Invalid required_version in ${configPath}, expected string`);
|
|
14426
14421
|
continue;
|
|
14427
14422
|
}
|
|
14428
|
-
const guidelinePatterns = config.guideline_patterns;
|
|
14429
|
-
if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
|
|
14430
|
-
logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
|
|
14431
|
-
continue;
|
|
14432
|
-
}
|
|
14433
|
-
if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
|
|
14434
|
-
logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
|
|
14435
|
-
continue;
|
|
14436
|
-
}
|
|
14437
14423
|
const evalPatterns = config.eval_patterns;
|
|
14438
14424
|
if (evalPatterns !== void 0 && !Array.isArray(evalPatterns)) {
|
|
14439
14425
|
logWarning(`Invalid eval_patterns in ${configPath}, expected array`);
|
|
@@ -14449,7 +14435,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
14449
14435
|
);
|
|
14450
14436
|
return {
|
|
14451
14437
|
required_version: requiredVersion,
|
|
14452
|
-
guideline_patterns: guidelinePatterns,
|
|
14453
14438
|
eval_patterns: evalPatterns,
|
|
14454
14439
|
execution: executionDefaults
|
|
14455
14440
|
};
|
|
@@ -14461,11 +14446,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
14461
14446
|
}
|
|
14462
14447
|
return null;
|
|
14463
14448
|
}
|
|
14464
|
-
function isGuidelineFile(filePath, patterns) {
|
|
14465
|
-
const normalized = filePath.split("\\").join("/");
|
|
14466
|
-
const patternsToUse = patterns ?? [];
|
|
14467
|
-
return micromatch.isMatch(normalized, patternsToUse);
|
|
14468
|
-
}
|
|
14469
14449
|
function extractTargetFromSuite(suite) {
|
|
14470
14450
|
const execution = suite.execution;
|
|
14471
14451
|
if (execution && typeof execution === "object" && !Array.isArray(execution)) {
|
|
@@ -14650,7 +14630,7 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
14650
14630
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
14651
14631
|
}
|
|
14652
14632
|
function logWarning(message) {
|
|
14653
|
-
console.warn(`${ANSI_YELLOW2}Warning: ${message}${
|
|
14633
|
+
console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET22}`);
|
|
14654
14634
|
}
|
|
14655
14635
|
var TEMPLATE_VARIABLES = {
|
|
14656
14636
|
EXPECTED_OUTPUT: "expected_output",
|
|
@@ -14667,10 +14647,10 @@ var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
|
|
|
14667
14647
|
TEMPLATE_VARIABLES.OUTPUT_TEXT,
|
|
14668
14648
|
TEMPLATE_VARIABLES.EXPECTED_OUTPUT
|
|
14669
14649
|
]);
|
|
14670
|
-
var
|
|
14671
|
-
var
|
|
14650
|
+
var ANSI_YELLOW22 = "\x1B[33m";
|
|
14651
|
+
var ANSI_RESET3 = "\x1B[0m";
|
|
14672
14652
|
async function validateCustomPromptContent(promptPath) {
|
|
14673
|
-
const content = await
|
|
14653
|
+
const content = await readFile32(promptPath, "utf8");
|
|
14674
14654
|
validateTemplateVariables(content, promptPath);
|
|
14675
14655
|
}
|
|
14676
14656
|
function validateTemplateVariables(content, source) {
|
|
@@ -14697,14 +14677,14 @@ function validateTemplateVariables(content, source) {
|
|
|
14697
14677
|
);
|
|
14698
14678
|
}
|
|
14699
14679
|
if (invalidVariables.length > 0) {
|
|
14700
|
-
const warningMessage = `${
|
|
14680
|
+
const warningMessage = `${ANSI_YELLOW22}Warning: Custom evaluator template at ${source}
|
|
14701
14681
|
Contains invalid variables: ${invalidVariables.map((v) => `{{ ${v} }}`).join(", ")}
|
|
14702
|
-
Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${
|
|
14682
|
+
Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${ANSI_RESET3}`;
|
|
14703
14683
|
console.warn(warningMessage);
|
|
14704
14684
|
}
|
|
14705
14685
|
}
|
|
14706
|
-
var
|
|
14707
|
-
var
|
|
14686
|
+
var ANSI_YELLOW3 = "\x1B[33m";
|
|
14687
|
+
var ANSI_RESET4 = "\x1B[0m";
|
|
14708
14688
|
function normalizeEvaluatorType(type) {
|
|
14709
14689
|
return type.replace(/_/g, "-");
|
|
14710
14690
|
}
|
|
@@ -14806,7 +14786,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
14806
14786
|
let command;
|
|
14807
14787
|
if (rawEvaluator.script !== void 0 && rawEvaluator.command === void 0) {
|
|
14808
14788
|
console.warn(
|
|
14809
|
-
`${
|
|
14789
|
+
`${ANSI_YELLOW3}Warning: 'script' is deprecated in evaluator '${name21}' in '${evalId}'. Use 'command' instead.${ANSI_RESET4}`
|
|
14810
14790
|
);
|
|
14811
14791
|
}
|
|
14812
14792
|
const rawCommand = rawEvaluator.command ?? rawEvaluator.script;
|
|
@@ -14832,9 +14812,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
14832
14812
|
const cwd = asString(rawEvaluator.cwd);
|
|
14833
14813
|
let resolvedCwd;
|
|
14834
14814
|
if (cwd) {
|
|
14835
|
-
const resolved = await
|
|
14815
|
+
const resolved = await resolveFileReference22(cwd, searchRoots);
|
|
14836
14816
|
if (resolved.resolvedPath) {
|
|
14837
|
-
resolvedCwd =
|
|
14817
|
+
resolvedCwd = path42.resolve(resolved.resolvedPath);
|
|
14838
14818
|
} else {
|
|
14839
14819
|
logWarning2(
|
|
14840
14820
|
`Code-grader evaluator '${name21}' in '${evalId}': cwd not found (${resolved.displayPath})`,
|
|
@@ -14990,9 +14970,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
14990
14970
|
const aggregatorPrompt = asString(rawAggregator.prompt);
|
|
14991
14971
|
let promptPath2;
|
|
14992
14972
|
if (aggregatorPrompt) {
|
|
14993
|
-
const resolved = await
|
|
14973
|
+
const resolved = await resolveFileReference22(aggregatorPrompt, searchRoots);
|
|
14994
14974
|
if (resolved.resolvedPath) {
|
|
14995
|
-
promptPath2 =
|
|
14975
|
+
promptPath2 = path42.resolve(resolved.resolvedPath);
|
|
14996
14976
|
}
|
|
14997
14977
|
}
|
|
14998
14978
|
aggregator = {
|
|
@@ -15549,7 +15529,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15549
15529
|
if (isJsonObject2(rawPrompt)) {
|
|
15550
15530
|
if (rawPrompt.script !== void 0 && rawPrompt.command === void 0) {
|
|
15551
15531
|
console.warn(
|
|
15552
|
-
`${
|
|
15532
|
+
`${ANSI_YELLOW3}Warning: 'prompt.script' is deprecated in evaluator '${name21}' in '${evalId}'. Use 'prompt.command' instead.${ANSI_RESET4}`
|
|
15553
15533
|
);
|
|
15554
15534
|
}
|
|
15555
15535
|
const commandArray = asStringArray(
|
|
@@ -15560,9 +15540,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15560
15540
|
throw new Error(`Evaluator '${name21}' in '${evalId}': prompt object requires command array`);
|
|
15561
15541
|
}
|
|
15562
15542
|
const commandPath = commandArray[commandArray.length - 1];
|
|
15563
|
-
const resolved = await
|
|
15543
|
+
const resolved = await resolveFileReference22(commandPath, searchRoots);
|
|
15564
15544
|
if (resolved.resolvedPath) {
|
|
15565
|
-
resolvedPromptScript = [...commandArray.slice(0, -1),
|
|
15545
|
+
resolvedPromptScript = [...commandArray.slice(0, -1), path42.resolve(resolved.resolvedPath)];
|
|
15566
15546
|
} else {
|
|
15567
15547
|
throw new Error(
|
|
15568
15548
|
`Evaluator '${name21}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
|
|
@@ -15573,9 +15553,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15573
15553
|
}
|
|
15574
15554
|
} else if (typeof rawPrompt === "string") {
|
|
15575
15555
|
prompt = rawPrompt;
|
|
15576
|
-
const resolved = await
|
|
15556
|
+
const resolved = await resolveFileReference22(prompt, searchRoots);
|
|
15577
15557
|
if (resolved.resolvedPath) {
|
|
15578
|
-
promptPath =
|
|
15558
|
+
promptPath = path42.resolve(resolved.resolvedPath);
|
|
15579
15559
|
try {
|
|
15580
15560
|
await validateCustomPromptContent(promptPath);
|
|
15581
15561
|
} catch (error) {
|
|
@@ -15775,10 +15755,10 @@ function warnUnconsumedCriteria(_criteria, _evaluators, _testId) {
|
|
|
15775
15755
|
function logWarning2(message, details) {
|
|
15776
15756
|
if (details && details.length > 0) {
|
|
15777
15757
|
const detailBlock = details.join("\n");
|
|
15778
|
-
console.warn(`${
|
|
15779
|
-
${detailBlock}${
|
|
15758
|
+
console.warn(`${ANSI_YELLOW3}Warning: ${message}
|
|
15759
|
+
${detailBlock}${ANSI_RESET4}`);
|
|
15780
15760
|
} else {
|
|
15781
|
-
console.warn(`${
|
|
15761
|
+
console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET4}`);
|
|
15782
15762
|
}
|
|
15783
15763
|
}
|
|
15784
15764
|
function parseRequired(value) {
|
|
@@ -16044,10 +16024,6 @@ function formatSegment(segment, mode = "lm") {
|
|
|
16044
16024
|
if (type === "text") {
|
|
16045
16025
|
return asString2(segment.value);
|
|
16046
16026
|
}
|
|
16047
|
-
if (type === "guideline_ref") {
|
|
16048
|
-
const refPath = asString2(segment.path);
|
|
16049
|
-
return refPath ? `<Attached: ${refPath}>` : void 0;
|
|
16050
|
-
}
|
|
16051
16027
|
if (type === "file") {
|
|
16052
16028
|
const filePath = asString2(segment.path);
|
|
16053
16029
|
if (!filePath) {
|
|
@@ -16070,9 +16046,6 @@ function hasVisibleContent(segments) {
|
|
|
16070
16046
|
const value = asString2(segment.value);
|
|
16071
16047
|
return value !== void 0 && value.trim().length > 0;
|
|
16072
16048
|
}
|
|
16073
|
-
if (type === "guideline_ref") {
|
|
16074
|
-
return false;
|
|
16075
|
-
}
|
|
16076
16049
|
if (type === "file") {
|
|
16077
16050
|
const text2 = asString2(segment.text);
|
|
16078
16051
|
return text2 !== void 0 && text2.trim().length > 0;
|
|
@@ -16083,20 +16056,10 @@ function hasVisibleContent(segments) {
|
|
|
16083
16056
|
function asString2(value) {
|
|
16084
16057
|
return typeof value === "string" ? value : void 0;
|
|
16085
16058
|
}
|
|
16086
|
-
var
|
|
16087
|
-
var
|
|
16059
|
+
var ANSI_YELLOW4 = "\x1B[33m";
|
|
16060
|
+
var ANSI_RESET5 = "\x1B[0m";
|
|
16088
16061
|
async function processMessages(options) {
|
|
16089
|
-
const {
|
|
16090
|
-
messages,
|
|
16091
|
-
searchRoots,
|
|
16092
|
-
repoRootPath,
|
|
16093
|
-
guidelinePatterns,
|
|
16094
|
-
guidelinePaths,
|
|
16095
|
-
treatFileSegmentsAsGuidelines,
|
|
16096
|
-
textParts,
|
|
16097
|
-
messageType,
|
|
16098
|
-
verbose
|
|
16099
|
-
} = options;
|
|
16062
|
+
const { messages, searchRoots, repoRootPath, textParts, messageType, verbose } = options;
|
|
16100
16063
|
const segments = [];
|
|
16101
16064
|
for (const message of messages) {
|
|
16102
16065
|
const content = message.content;
|
|
@@ -16128,7 +16091,7 @@ async function processMessages(options) {
|
|
|
16128
16091
|
if (!rawValue) {
|
|
16129
16092
|
continue;
|
|
16130
16093
|
}
|
|
16131
|
-
const { displayPath, resolvedPath, attempted } = await
|
|
16094
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference22(
|
|
16132
16095
|
rawValue,
|
|
16133
16096
|
searchRoots
|
|
16134
16097
|
);
|
|
@@ -16139,27 +16102,12 @@ async function processMessages(options) {
|
|
|
16139
16102
|
continue;
|
|
16140
16103
|
}
|
|
16141
16104
|
try {
|
|
16142
|
-
const fileContent = (await
|
|
16143
|
-
const classifyAsGuideline = shouldTreatAsGuideline({
|
|
16144
|
-
messageType,
|
|
16145
|
-
resolvedPath,
|
|
16146
|
-
repoRootPath,
|
|
16147
|
-
guidelinePatterns,
|
|
16148
|
-
treatFileSegmentsAsGuidelines
|
|
16149
|
-
});
|
|
16150
|
-
if (classifyAsGuideline && guidelinePaths) {
|
|
16151
|
-
guidelinePaths.push(path6.resolve(resolvedPath));
|
|
16152
|
-
if (verbose) {
|
|
16153
|
-
console.log(` [Guideline] Found: ${displayPath}`);
|
|
16154
|
-
console.log(` Resolved to: ${resolvedPath}`);
|
|
16155
|
-
}
|
|
16156
|
-
continue;
|
|
16157
|
-
}
|
|
16105
|
+
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
16158
16106
|
segments.push({
|
|
16159
16107
|
type: "file",
|
|
16160
16108
|
path: displayPath,
|
|
16161
16109
|
text: fileContent,
|
|
16162
|
-
resolvedPath:
|
|
16110
|
+
resolvedPath: path5.resolve(resolvedPath)
|
|
16163
16111
|
});
|
|
16164
16112
|
if (verbose) {
|
|
16165
16113
|
const label = messageType === "input" ? "[File]" : "[Expected Output File]";
|
|
@@ -16182,26 +16130,6 @@ async function processMessages(options) {
|
|
|
16182
16130
|
}
|
|
16183
16131
|
return segments;
|
|
16184
16132
|
}
|
|
16185
|
-
function shouldTreatAsGuideline(options) {
|
|
16186
|
-
const {
|
|
16187
|
-
messageType,
|
|
16188
|
-
resolvedPath,
|
|
16189
|
-
repoRootPath,
|
|
16190
|
-
guidelinePatterns,
|
|
16191
|
-
treatFileSegmentsAsGuidelines
|
|
16192
|
-
} = options;
|
|
16193
|
-
if (messageType !== "input") {
|
|
16194
|
-
return false;
|
|
16195
|
-
}
|
|
16196
|
-
if (treatFileSegmentsAsGuidelines) {
|
|
16197
|
-
return true;
|
|
16198
|
-
}
|
|
16199
|
-
if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
16200
|
-
return false;
|
|
16201
|
-
}
|
|
16202
|
-
const relativeToRepo = path6.relative(repoRootPath, resolvedPath);
|
|
16203
|
-
return isGuidelineFile(relativeToRepo, guidelinePatterns);
|
|
16204
|
-
}
|
|
16205
16133
|
function asString3(value) {
|
|
16206
16134
|
return typeof value === "string" ? value : void 0;
|
|
16207
16135
|
}
|
|
@@ -16227,10 +16155,10 @@ function cloneJsonValue(value) {
|
|
|
16227
16155
|
function logWarning3(message, details) {
|
|
16228
16156
|
if (details && details.length > 0) {
|
|
16229
16157
|
const detailBlock = details.join("\n");
|
|
16230
|
-
console.warn(`${
|
|
16231
|
-
${detailBlock}${
|
|
16158
|
+
console.warn(`${ANSI_YELLOW4}Warning: ${message}
|
|
16159
|
+
${detailBlock}${ANSI_RESET5}`);
|
|
16232
16160
|
} else {
|
|
16233
|
-
console.warn(`${
|
|
16161
|
+
console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET5}`);
|
|
16234
16162
|
}
|
|
16235
16163
|
}
|
|
16236
16164
|
async function processExpectedMessages(options) {
|
|
@@ -16259,7 +16187,7 @@ async function processExpectedMessages(options) {
|
|
|
16259
16187
|
if (!rawValue) {
|
|
16260
16188
|
continue;
|
|
16261
16189
|
}
|
|
16262
|
-
const { displayPath, resolvedPath, attempted } = await
|
|
16190
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference22(
|
|
16263
16191
|
rawValue,
|
|
16264
16192
|
searchRoots
|
|
16265
16193
|
);
|
|
@@ -16269,12 +16197,12 @@ async function processExpectedMessages(options) {
|
|
|
16269
16197
|
continue;
|
|
16270
16198
|
}
|
|
16271
16199
|
try {
|
|
16272
|
-
const fileContent = (await
|
|
16200
|
+
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
16273
16201
|
processedContent.push({
|
|
16274
16202
|
type: "file",
|
|
16275
16203
|
path: displayPath,
|
|
16276
16204
|
text: fileContent,
|
|
16277
|
-
resolvedPath:
|
|
16205
|
+
resolvedPath: path5.resolve(resolvedPath)
|
|
16278
16206
|
});
|
|
16279
16207
|
if (verbose) {
|
|
16280
16208
|
console.log(` [Expected Output File] Found: ${displayPath}`);
|
|
@@ -16367,11 +16295,11 @@ function resolveInputMessages(raw, suiteInputFiles) {
|
|
|
16367
16295
|
function resolveExpectedMessages(raw) {
|
|
16368
16296
|
return expandExpectedOutputShorthand(raw.expected_output);
|
|
16369
16297
|
}
|
|
16370
|
-
var
|
|
16298
|
+
var ANSI_YELLOW5 = "\x1B[33m";
|
|
16371
16299
|
var ANSI_RED2 = "\x1B[31m";
|
|
16372
|
-
var
|
|
16300
|
+
var ANSI_RESET6 = "\x1B[0m";
|
|
16373
16301
|
function detectFormat(filePath) {
|
|
16374
|
-
const ext =
|
|
16302
|
+
const ext = path6.extname(filePath).toLowerCase();
|
|
16375
16303
|
if (ext === ".jsonl") return "jsonl";
|
|
16376
16304
|
if (ext === ".yaml" || ext === ".yml") return "yaml";
|
|
16377
16305
|
if (ext === ".json") return "agent-skills-json";
|
|
@@ -16380,9 +16308,9 @@ function detectFormat(filePath) {
|
|
|
16380
16308
|
);
|
|
16381
16309
|
}
|
|
16382
16310
|
async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
16383
|
-
const dir =
|
|
16384
|
-
const base =
|
|
16385
|
-
const sidecarPath =
|
|
16311
|
+
const dir = path6.dirname(jsonlPath);
|
|
16312
|
+
const base = path6.basename(jsonlPath, ".jsonl");
|
|
16313
|
+
const sidecarPath = path6.join(dir, `${base}.yaml`);
|
|
16386
16314
|
if (!await fileExists2(sidecarPath)) {
|
|
16387
16315
|
if (verbose) {
|
|
16388
16316
|
logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
|
|
@@ -16390,7 +16318,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
16390
16318
|
return {};
|
|
16391
16319
|
}
|
|
16392
16320
|
try {
|
|
16393
|
-
const content = await
|
|
16321
|
+
const content = await readFile5(sidecarPath, "utf8");
|
|
16394
16322
|
const parsed = interpolateEnv(parseYaml2(content), process.env);
|
|
16395
16323
|
if (!isJsonObject(parsed)) {
|
|
16396
16324
|
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
@@ -16398,7 +16326,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
16398
16326
|
}
|
|
16399
16327
|
return {
|
|
16400
16328
|
description: asString4(parsed.description),
|
|
16401
|
-
|
|
16329
|
+
name: asString4(parsed.name),
|
|
16402
16330
|
execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
|
|
16403
16331
|
evaluator: parsed.evaluator
|
|
16404
16332
|
};
|
|
@@ -16431,23 +16359,21 @@ function parseJsonlContent(content, filePath) {
|
|
|
16431
16359
|
async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
16432
16360
|
const verbose = options?.verbose ?? false;
|
|
16433
16361
|
const filterPattern = options?.filter;
|
|
16434
|
-
const absoluteTestPath =
|
|
16362
|
+
const absoluteTestPath = path6.resolve(evalFilePath);
|
|
16435
16363
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
16436
16364
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
16437
|
-
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
16438
|
-
const guidelinePatterns = config?.guideline_patterns;
|
|
16439
16365
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
16440
|
-
const rawFile = await
|
|
16366
|
+
const rawFile = await readFile5(absoluteTestPath, "utf8");
|
|
16441
16367
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
16442
|
-
const
|
|
16443
|
-
const
|
|
16368
|
+
const fallbackEvalSet = path6.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
16369
|
+
const evalSetName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackEvalSet;
|
|
16444
16370
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
16445
16371
|
const globalExecution = sidecar.execution;
|
|
16446
16372
|
if (verbose) {
|
|
16447
16373
|
console.log(`
|
|
16448
16374
|
[JSONL Dataset: ${evalFilePath}]`);
|
|
16449
16375
|
console.log(` Cases: ${rawCases.length}`);
|
|
16450
|
-
console.log(`
|
|
16376
|
+
console.log(` Eval set: ${evalSetName}`);
|
|
16451
16377
|
if (sidecar.description) {
|
|
16452
16378
|
console.log(` Description: ${sidecar.description}`);
|
|
16453
16379
|
}
|
|
@@ -16457,7 +16383,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
16457
16383
|
const evalcase = rawCases[lineIndex];
|
|
16458
16384
|
const lineNumber = lineIndex + 1;
|
|
16459
16385
|
const id = asString4(evalcase.id);
|
|
16460
|
-
if (filterPattern && (!id || !
|
|
16386
|
+
if (filterPattern && (!id || !micromatch.isMatch(id, filterPattern))) {
|
|
16461
16387
|
continue;
|
|
16462
16388
|
}
|
|
16463
16389
|
const conversationId = asString4(evalcase.conversation_id);
|
|
@@ -16480,14 +16406,11 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
16480
16406
|
continue;
|
|
16481
16407
|
}
|
|
16482
16408
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
16483
|
-
const guidelinePaths = [];
|
|
16484
16409
|
const inputTextParts = [];
|
|
16485
16410
|
const inputSegments = await processMessages({
|
|
16486
16411
|
messages: inputMessages,
|
|
16487
16412
|
searchRoots,
|
|
16488
16413
|
repoRootPath,
|
|
16489
|
-
guidelinePatterns,
|
|
16490
|
-
guidelinePaths,
|
|
16491
16414
|
textParts: inputTextParts,
|
|
16492
16415
|
messageType: "input",
|
|
16493
16416
|
verbose
|
|
@@ -16537,40 +16460,20 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
16537
16460
|
userFilePaths.push(segment.resolvedPath);
|
|
16538
16461
|
}
|
|
16539
16462
|
}
|
|
16540
|
-
const allFilePaths = [
|
|
16541
|
-
...guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
|
|
16542
|
-
...userFilePaths
|
|
16543
|
-
];
|
|
16544
16463
|
const testCase = {
|
|
16545
16464
|
id,
|
|
16546
|
-
|
|
16465
|
+
eval_set: evalSetName,
|
|
16547
16466
|
conversation_id: conversationId,
|
|
16548
16467
|
question,
|
|
16549
16468
|
input: inputMessages,
|
|
16550
16469
|
input_segments: inputSegments,
|
|
16551
16470
|
expected_output: outputSegments,
|
|
16552
16471
|
reference_answer: referenceAnswer,
|
|
16553
|
-
|
|
16554
|
-
guideline_patterns: guidelinePatterns,
|
|
16555
|
-
file_paths: allFilePaths,
|
|
16472
|
+
file_paths: userFilePaths,
|
|
16556
16473
|
criteria: outcome ?? "",
|
|
16557
16474
|
evaluator: evalCaseEvaluatorKind,
|
|
16558
16475
|
assertions: evaluators
|
|
16559
16476
|
};
|
|
16560
|
-
if (verbose) {
|
|
16561
|
-
console.log(`
|
|
16562
|
-
[Test: ${id}]`);
|
|
16563
|
-
if (testCase.guideline_paths.length > 0) {
|
|
16564
|
-
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
16565
|
-
for (const guidelinePath of testCase.guideline_paths) {
|
|
16566
|
-
console.log(` - ${guidelinePath}`);
|
|
16567
|
-
}
|
|
16568
|
-
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
16569
|
-
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
16570
|
-
} else {
|
|
16571
|
-
console.log(" No guidelines found");
|
|
16572
|
-
}
|
|
16573
|
-
}
|
|
16574
16477
|
results.push(testCase);
|
|
16575
16478
|
}
|
|
16576
16479
|
return results;
|
|
@@ -16581,19 +16484,19 @@ function asString4(value) {
|
|
|
16581
16484
|
function logWarning4(message, details) {
|
|
16582
16485
|
if (details && details.length > 0) {
|
|
16583
16486
|
const detailBlock = details.join("\n");
|
|
16584
|
-
console.warn(`${
|
|
16585
|
-
${detailBlock}${
|
|
16487
|
+
console.warn(`${ANSI_YELLOW5}Warning: ${message}
|
|
16488
|
+
${detailBlock}${ANSI_RESET6}`);
|
|
16586
16489
|
} else {
|
|
16587
|
-
console.warn(`${
|
|
16490
|
+
console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET6}`);
|
|
16588
16491
|
}
|
|
16589
16492
|
}
|
|
16590
16493
|
function logError2(message, details) {
|
|
16591
16494
|
if (details && details.length > 0) {
|
|
16592
16495
|
const detailBlock = details.join("\n");
|
|
16593
16496
|
console.error(`${ANSI_RED2}Error: ${message}
|
|
16594
|
-
${detailBlock}${
|
|
16497
|
+
${detailBlock}${ANSI_RESET6}`);
|
|
16595
16498
|
} else {
|
|
16596
|
-
console.error(`${ANSI_RED2}Error: ${message}${
|
|
16499
|
+
console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET6}`);
|
|
16597
16500
|
}
|
|
16598
16501
|
}
|
|
16599
16502
|
var MetadataSchema = external_exports2.object({
|
|
@@ -16623,28 +16526,7 @@ function parseMetadata(suite) {
|
|
|
16623
16526
|
requires: suite.requires
|
|
16624
16527
|
});
|
|
16625
16528
|
}
|
|
16626
|
-
var ANSI_YELLOW7 = "\x1B[33m";
|
|
16627
|
-
var ANSI_RESET8 = "\x1B[0m";
|
|
16628
16529
|
async function buildPromptInputs(testCase, mode = "lm") {
|
|
16629
|
-
const guidelineParts = [];
|
|
16630
|
-
for (const rawPath of testCase.guideline_paths) {
|
|
16631
|
-
const absolutePath = path8.resolve(rawPath);
|
|
16632
|
-
if (!await fileExists2(absolutePath)) {
|
|
16633
|
-
logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
|
|
16634
|
-
continue;
|
|
16635
|
-
}
|
|
16636
|
-
try {
|
|
16637
|
-
const content = (await readFile7(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
16638
|
-
guidelineParts.push({
|
|
16639
|
-
content,
|
|
16640
|
-
isFile: true,
|
|
16641
|
-
displayPath: path8.basename(absolutePath)
|
|
16642
|
-
});
|
|
16643
|
-
} catch (error) {
|
|
16644
|
-
logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
|
|
16645
|
-
}
|
|
16646
|
-
}
|
|
16647
|
-
const guidelines = formatFileContents(guidelineParts);
|
|
16648
16530
|
const segmentsByMessage = [];
|
|
16649
16531
|
const fileContentsByPath = /* @__PURE__ */ new Map();
|
|
16650
16532
|
for (const segment of testCase.input_segments) {
|
|
@@ -16669,10 +16551,6 @@ async function buildPromptInputs(testCase, mode = "lm") {
|
|
|
16669
16551
|
if (type === "file") {
|
|
16670
16552
|
const value = asString5(segment.value);
|
|
16671
16553
|
if (!value) continue;
|
|
16672
|
-
if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
|
|
16673
|
-
messageSegments.push({ type: "guideline_ref", path: value });
|
|
16674
|
-
continue;
|
|
16675
|
-
}
|
|
16676
16554
|
const fileText = fileContentsByPath.get(value);
|
|
16677
16555
|
if (fileText !== void 0) {
|
|
16678
16556
|
messageSegments.push({ type: "file", text: fileText, path: value });
|
|
@@ -16721,10 +16599,6 @@ ${messageContent}`);
|
|
|
16721
16599
|
} else {
|
|
16722
16600
|
const questionParts = [];
|
|
16723
16601
|
for (const segment of testCase.input_segments) {
|
|
16724
|
-
if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
|
|
16725
|
-
questionParts.push(`<Attached: ${segment.path}>`);
|
|
16726
|
-
continue;
|
|
16727
|
-
}
|
|
16728
16602
|
const formattedContent = formatSegment(segment, mode);
|
|
16729
16603
|
if (formattedContent) {
|
|
16730
16604
|
questionParts.push(formattedContent);
|
|
@@ -16735,11 +16609,9 @@ ${messageContent}`);
|
|
|
16735
16609
|
const chatPrompt = useRoleMarkers ? buildChatPromptFromSegments({
|
|
16736
16610
|
messages: testCase.input,
|
|
16737
16611
|
segmentsByMessage,
|
|
16738
|
-
guidelinePatterns: testCase.guideline_patterns,
|
|
16739
|
-
guidelineContent: guidelines,
|
|
16740
16612
|
mode
|
|
16741
16613
|
}) : void 0;
|
|
16742
|
-
return { question,
|
|
16614
|
+
return { question, chatPrompt };
|
|
16743
16615
|
}
|
|
16744
16616
|
function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
16745
16617
|
if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
|
|
@@ -16754,14 +16626,7 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
|
16754
16626
|
return messagesWithContent > 1;
|
|
16755
16627
|
}
|
|
16756
16628
|
function buildChatPromptFromSegments(options) {
|
|
16757
|
-
const {
|
|
16758
|
-
messages,
|
|
16759
|
-
segmentsByMessage,
|
|
16760
|
-
guidelinePatterns,
|
|
16761
|
-
guidelineContent,
|
|
16762
|
-
systemPrompt,
|
|
16763
|
-
mode = "lm"
|
|
16764
|
-
} = options;
|
|
16629
|
+
const { messages, segmentsByMessage, systemPrompt, mode = "lm" } = options;
|
|
16765
16630
|
if (messages.length === 0) {
|
|
16766
16631
|
return void 0;
|
|
16767
16632
|
}
|
|
@@ -16769,11 +16634,6 @@ function buildChatPromptFromSegments(options) {
|
|
|
16769
16634
|
if (systemPrompt && systemPrompt.trim().length > 0) {
|
|
16770
16635
|
systemSegments.push(systemPrompt.trim());
|
|
16771
16636
|
}
|
|
16772
|
-
if (guidelineContent && guidelineContent.trim().length > 0) {
|
|
16773
|
-
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
16774
|
-
|
|
16775
|
-
${guidelineContent.trim()}`);
|
|
16776
|
-
}
|
|
16777
16637
|
let startIndex = 0;
|
|
16778
16638
|
while (startIndex < messages.length && messages[startIndex].role === "system") {
|
|
16779
16639
|
const segments = segmentsByMessage[startIndex];
|
|
@@ -16809,15 +16669,8 @@ ${guidelineContent.trim()}`);
|
|
|
16809
16669
|
contentParts.push("@[Tool]:");
|
|
16810
16670
|
}
|
|
16811
16671
|
for (const segment of segments) {
|
|
16812
|
-
if (segment.type === "guideline_ref") {
|
|
16813
|
-
continue;
|
|
16814
|
-
}
|
|
16815
16672
|
const formatted = formatSegment(segment, mode);
|
|
16816
16673
|
if (formatted) {
|
|
16817
|
-
const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
|
|
16818
|
-
if (isGuidelineRef) {
|
|
16819
|
-
continue;
|
|
16820
|
-
}
|
|
16821
16674
|
contentParts.push(formatted);
|
|
16822
16675
|
}
|
|
16823
16676
|
}
|
|
@@ -16835,28 +16688,25 @@ ${guidelineContent.trim()}`);
|
|
|
16835
16688
|
function asString5(value) {
|
|
16836
16689
|
return typeof value === "string" ? value : void 0;
|
|
16837
16690
|
}
|
|
16838
|
-
|
|
16839
|
-
console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET8}`);
|
|
16840
|
-
}
|
|
16841
|
-
var ANSI_YELLOW8 = "\x1B[33m";
|
|
16691
|
+
var ANSI_YELLOW6 = "\x1B[33m";
|
|
16842
16692
|
var ANSI_RED3 = "\x1B[31m";
|
|
16843
|
-
var
|
|
16693
|
+
var ANSI_RESET7 = "\x1B[0m";
|
|
16844
16694
|
function resolveTests(suite) {
|
|
16845
16695
|
if (suite.tests !== void 0) return suite.tests;
|
|
16846
16696
|
if (suite.eval_cases !== void 0) {
|
|
16847
|
-
|
|
16697
|
+
logWarning5("'eval_cases' is deprecated. Use 'tests' instead.");
|
|
16848
16698
|
return suite.eval_cases;
|
|
16849
16699
|
}
|
|
16850
16700
|
if (suite.evalcases !== void 0) {
|
|
16851
|
-
|
|
16701
|
+
logWarning5("'evalcases' is deprecated. Use 'tests' instead.");
|
|
16852
16702
|
return suite.evalcases;
|
|
16853
16703
|
}
|
|
16854
16704
|
return void 0;
|
|
16855
16705
|
}
|
|
16856
16706
|
async function readTestSuiteMetadata(testFilePath) {
|
|
16857
16707
|
try {
|
|
16858
|
-
const absolutePath =
|
|
16859
|
-
const content = await
|
|
16708
|
+
const absolutePath = path7.resolve(testFilePath);
|
|
16709
|
+
const content = await readFile6(absolutePath, "utf8");
|
|
16860
16710
|
const parsed = interpolateEnv(parse2(content), process.env);
|
|
16861
16711
|
if (!isJsonObject(parsed)) {
|
|
16862
16712
|
return {};
|
|
@@ -16907,26 +16757,25 @@ var loadEvalCases = loadTests;
|
|
|
16907
16757
|
async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
16908
16758
|
const verbose = options?.verbose ?? false;
|
|
16909
16759
|
const filterPattern = options?.filter;
|
|
16910
|
-
const absoluteTestPath =
|
|
16760
|
+
const absoluteTestPath = path7.resolve(evalFilePath);
|
|
16911
16761
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
16912
16762
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
16913
16763
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
16914
|
-
const
|
|
16915
|
-
const rawFile = await readFile8(absoluteTestPath, "utf8");
|
|
16764
|
+
const rawFile = await readFile6(absoluteTestPath, "utf8");
|
|
16916
16765
|
const interpolated = interpolateEnv(parse2(rawFile), process.env);
|
|
16917
16766
|
if (!isJsonObject(interpolated)) {
|
|
16918
16767
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
16919
16768
|
}
|
|
16920
16769
|
const suite = interpolated;
|
|
16921
|
-
const
|
|
16922
|
-
const
|
|
16923
|
-
const
|
|
16770
|
+
const evalSetNameFromSuite = asString6(suite.name)?.trim();
|
|
16771
|
+
const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
16772
|
+
const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
|
|
16924
16773
|
const rawTestcases = resolveTests(suite);
|
|
16925
16774
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
16926
|
-
const evalFileDir =
|
|
16775
|
+
const evalFileDir = path7.dirname(absoluteTestPath);
|
|
16927
16776
|
let expandedTestcases;
|
|
16928
16777
|
if (typeof rawTestcases === "string") {
|
|
16929
|
-
const externalPath =
|
|
16778
|
+
const externalPath = path7.resolve(evalFileDir, rawTestcases);
|
|
16930
16779
|
expandedTestcases = await loadCasesFromFile(externalPath);
|
|
16931
16780
|
} else if (Array.isArray(rawTestcases)) {
|
|
16932
16781
|
expandedTestcases = await expandFileReferences(rawTestcases, evalFileDir);
|
|
@@ -16940,18 +16789,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
16940
16789
|
const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
|
|
16941
16790
|
const suiteAssertions = suite.assertions ?? suite.assert;
|
|
16942
16791
|
if (suite.assert !== void 0 && suite.assertions === void 0) {
|
|
16943
|
-
|
|
16792
|
+
logWarning5("'assert' is deprecated at the suite level. Use 'assertions' instead.");
|
|
16944
16793
|
}
|
|
16945
16794
|
const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
|
|
16946
16795
|
const results = [];
|
|
16947
16796
|
for (const rawEvalcase of expandedTestcases) {
|
|
16948
16797
|
if (!isJsonObject(rawEvalcase)) {
|
|
16949
|
-
|
|
16798
|
+
logWarning5("Skipping invalid test entry (expected object)");
|
|
16950
16799
|
continue;
|
|
16951
16800
|
}
|
|
16952
16801
|
const evalcase = rawEvalcase;
|
|
16953
16802
|
const id = asString6(evalcase.id);
|
|
16954
|
-
if (filterPattern && (!id || !
|
|
16803
|
+
if (filterPattern && (!id || !micromatch2.isMatch(id, filterPattern))) {
|
|
16955
16804
|
continue;
|
|
16956
16805
|
}
|
|
16957
16806
|
const conversationId = asString6(evalcase.conversation_id);
|
|
@@ -16959,7 +16808,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
16959
16808
|
if (!outcome && evalcase.expected_outcome !== void 0) {
|
|
16960
16809
|
outcome = asString6(evalcase.expected_outcome);
|
|
16961
16810
|
if (outcome) {
|
|
16962
|
-
|
|
16811
|
+
logWarning5(
|
|
16963
16812
|
`Test '${asString6(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
16964
16813
|
);
|
|
16965
16814
|
}
|
|
@@ -16979,15 +16828,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
16979
16828
|
const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
|
|
16980
16829
|
const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
|
|
16981
16830
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
16982
|
-
const guidelinePaths = [];
|
|
16983
16831
|
const inputTextParts = [];
|
|
16984
16832
|
const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
|
|
16985
16833
|
messages: effectiveSuiteInputMessages,
|
|
16986
16834
|
searchRoots,
|
|
16987
16835
|
repoRootPath,
|
|
16988
|
-
guidelinePatterns,
|
|
16989
|
-
guidelinePaths,
|
|
16990
|
-
treatFileSegmentsAsGuidelines: true,
|
|
16991
16836
|
textParts: inputTextParts,
|
|
16992
16837
|
messageType: "input",
|
|
16993
16838
|
verbose
|
|
@@ -16996,8 +16841,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
16996
16841
|
messages: testInputMessages,
|
|
16997
16842
|
searchRoots,
|
|
16998
16843
|
repoRootPath,
|
|
16999
|
-
guidelinePatterns,
|
|
17000
|
-
guidelinePaths,
|
|
17001
16844
|
textParts: inputTextParts,
|
|
17002
16845
|
messageType: "input",
|
|
17003
16846
|
verbose
|
|
@@ -17046,26 +16889,20 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
17046
16889
|
userFilePaths.push(segment.resolvedPath);
|
|
17047
16890
|
}
|
|
17048
16891
|
}
|
|
17049
|
-
const allFilePaths = [
|
|
17050
|
-
...guidelinePaths.map((guidelinePath) => path9.resolve(guidelinePath)),
|
|
17051
|
-
...userFilePaths
|
|
17052
|
-
];
|
|
17053
16892
|
const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
|
|
17054
16893
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
17055
16894
|
const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
|
|
17056
16895
|
const caseTargets = extractTargetsFromTestCase(evalcase);
|
|
17057
16896
|
const testCase = {
|
|
17058
16897
|
id,
|
|
17059
|
-
|
|
16898
|
+
eval_set: evalSetName,
|
|
17060
16899
|
conversation_id: conversationId,
|
|
17061
16900
|
question,
|
|
17062
16901
|
input: inputMessages,
|
|
17063
16902
|
input_segments: inputSegments,
|
|
17064
16903
|
expected_output: outputSegments,
|
|
17065
16904
|
reference_answer: referenceAnswer,
|
|
17066
|
-
|
|
17067
|
-
guideline_patterns: guidelinePatterns,
|
|
17068
|
-
file_paths: allFilePaths,
|
|
16905
|
+
file_paths: userFilePaths,
|
|
17069
16906
|
criteria: outcome ?? "",
|
|
17070
16907
|
evaluator: evalCaseEvaluatorKind,
|
|
17071
16908
|
assertions: evaluators,
|
|
@@ -17073,20 +16910,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
17073
16910
|
metadata,
|
|
17074
16911
|
targets: caseTargets
|
|
17075
16912
|
};
|
|
17076
|
-
if (verbose) {
|
|
17077
|
-
console.log(`
|
|
17078
|
-
[Test: ${id}]`);
|
|
17079
|
-
if (testCase.guideline_paths.length > 0) {
|
|
17080
|
-
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
17081
|
-
for (const guidelinePath of testCase.guideline_paths) {
|
|
17082
|
-
console.log(` - ${guidelinePath}`);
|
|
17083
|
-
}
|
|
17084
|
-
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
17085
|
-
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
17086
|
-
} else {
|
|
17087
|
-
console.log(" No guidelines found");
|
|
17088
|
-
}
|
|
17089
|
-
}
|
|
17090
16913
|
results.push(testCase);
|
|
17091
16914
|
}
|
|
17092
16915
|
return { tests: results, parsed: suite };
|
|
@@ -17105,7 +16928,7 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
17105
16928
|
if (!isJsonObject(raw)) return void 0;
|
|
17106
16929
|
const obj = raw;
|
|
17107
16930
|
if (obj.script !== void 0 && obj.command === void 0) {
|
|
17108
|
-
|
|
16931
|
+
logWarning5("'script' is deprecated. Use 'command' instead.");
|
|
17109
16932
|
}
|
|
17110
16933
|
const commandSource = obj.command ?? obj.script;
|
|
17111
16934
|
if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
|
|
@@ -17113,8 +16936,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
17113
16936
|
if (commandArr.length === 0) return void 0;
|
|
17114
16937
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
17115
16938
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
17116
|
-
if (cwd && !
|
|
17117
|
-
cwd =
|
|
16939
|
+
if (cwd && !path7.isAbsolute(cwd)) {
|
|
16940
|
+
cwd = path7.resolve(evalFileDir, cwd);
|
|
17118
16941
|
}
|
|
17119
16942
|
const config = { command: commandArr };
|
|
17120
16943
|
if (timeoutMs !== void 0) {
|
|
@@ -17204,10 +17027,10 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
|
17204
17027
|
}
|
|
17205
17028
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
17206
17029
|
if (typeof raw === "string") {
|
|
17207
|
-
const workspaceFilePath =
|
|
17030
|
+
const workspaceFilePath = path7.resolve(evalFileDir, raw);
|
|
17208
17031
|
let content;
|
|
17209
17032
|
try {
|
|
17210
|
-
content = await
|
|
17033
|
+
content = await readFile6(workspaceFilePath, "utf8");
|
|
17211
17034
|
} catch {
|
|
17212
17035
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
17213
17036
|
}
|
|
@@ -17217,7 +17040,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
17217
17040
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
17218
17041
|
);
|
|
17219
17042
|
}
|
|
17220
|
-
const workspaceFileDir =
|
|
17043
|
+
const workspaceFileDir = path7.dirname(workspaceFilePath);
|
|
17221
17044
|
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
17222
17045
|
}
|
|
17223
17046
|
return parseWorkspaceConfig(raw, evalFileDir);
|
|
@@ -17237,8 +17060,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
17237
17060
|
throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
|
|
17238
17061
|
}
|
|
17239
17062
|
let template = typeof obj.template === "string" ? obj.template : void 0;
|
|
17240
|
-
if (template && !
|
|
17241
|
-
template =
|
|
17063
|
+
if (template && !path7.isAbsolute(template)) {
|
|
17064
|
+
template = path7.resolve(evalFileDir, template);
|
|
17242
17065
|
}
|
|
17243
17066
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
17244
17067
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
@@ -17288,22 +17111,22 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
17288
17111
|
function asString6(value) {
|
|
17289
17112
|
return typeof value === "string" ? value : void 0;
|
|
17290
17113
|
}
|
|
17291
|
-
function
|
|
17114
|
+
function logWarning5(message, details) {
|
|
17292
17115
|
if (details && details.length > 0) {
|
|
17293
17116
|
const detailBlock = details.join("\n");
|
|
17294
|
-
console.warn(`${
|
|
17295
|
-
${detailBlock}${
|
|
17117
|
+
console.warn(`${ANSI_YELLOW6}Warning: ${message}
|
|
17118
|
+
${detailBlock}${ANSI_RESET7}`);
|
|
17296
17119
|
} else {
|
|
17297
|
-
console.warn(`${
|
|
17120
|
+
console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET7}`);
|
|
17298
17121
|
}
|
|
17299
17122
|
}
|
|
17300
17123
|
function logError3(message, details) {
|
|
17301
17124
|
if (details && details.length > 0) {
|
|
17302
17125
|
const detailBlock = details.join("\n");
|
|
17303
17126
|
console.error(`${ANSI_RED3}Error: ${message}
|
|
17304
|
-
${detailBlock}${
|
|
17127
|
+
${detailBlock}${ANSI_RESET7}`);
|
|
17305
17128
|
} else {
|
|
17306
|
-
console.error(`${ANSI_RED3}Error: ${message}${
|
|
17129
|
+
console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET7}`);
|
|
17307
17130
|
}
|
|
17308
17131
|
}
|
|
17309
17132
|
function codeGraderInstruction(graderName, description) {
|
|
@@ -17549,7 +17372,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
|
|
|
17549
17372
|
function transpileEvalYamlFile(evalYamlPath) {
|
|
17550
17373
|
const content = readFileSync(evalYamlPath, "utf8");
|
|
17551
17374
|
const parsed = parse3(content);
|
|
17552
|
-
return transpileEvalYaml(parsed,
|
|
17375
|
+
return transpileEvalYaml(parsed, path8.basename(evalYamlPath));
|
|
17553
17376
|
}
|
|
17554
17377
|
function getOutputFilenames(result) {
|
|
17555
17378
|
const names = /* @__PURE__ */ new Map();
|
|
@@ -17776,10 +17599,10 @@ function buildChatPrompt(request) {
|
|
|
17776
17599
|
if (hasSystemMessage) {
|
|
17777
17600
|
return provided;
|
|
17778
17601
|
}
|
|
17779
|
-
const systemContent2 = resolveSystemContent(request
|
|
17602
|
+
const systemContent2 = resolveSystemContent(request);
|
|
17780
17603
|
return [{ role: "system", content: systemContent2 }, ...provided];
|
|
17781
17604
|
}
|
|
17782
|
-
const systemContent = resolveSystemContent(request
|
|
17605
|
+
const systemContent = resolveSystemContent(request);
|
|
17783
17606
|
const userContent = request.question.trim();
|
|
17784
17607
|
const prompt = [
|
|
17785
17608
|
{ role: "system", content: systemContent },
|
|
@@ -17787,18 +17610,13 @@ function buildChatPrompt(request) {
|
|
|
17787
17610
|
];
|
|
17788
17611
|
return prompt;
|
|
17789
17612
|
}
|
|
17790
|
-
function resolveSystemContent(request
|
|
17613
|
+
function resolveSystemContent(request) {
|
|
17791
17614
|
const systemSegments = [];
|
|
17792
17615
|
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
17793
17616
|
systemSegments.push(request.systemPrompt.trim());
|
|
17794
17617
|
} else {
|
|
17795
17618
|
systemSegments.push(DEFAULT_SYSTEM_PROMPT);
|
|
17796
17619
|
}
|
|
17797
|
-
if (includeGuidelines && request.guidelines && request.guidelines.trim().length > 0) {
|
|
17798
|
-
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
17799
|
-
|
|
17800
|
-
${request.guidelines.trim()}`);
|
|
17801
|
-
}
|
|
17802
17620
|
return systemSegments.join("\n\n");
|
|
17803
17621
|
}
|
|
17804
17622
|
function toModelMessages(chatPrompt) {
|
|
@@ -18027,16 +17845,10 @@ function subscribeToClaudeLogEntries(listener) {
|
|
|
18027
17845
|
store.delete(listener);
|
|
18028
17846
|
};
|
|
18029
17847
|
}
|
|
18030
|
-
function buildPromptDocument(request, inputFiles
|
|
17848
|
+
function buildPromptDocument(request, inputFiles) {
|
|
18031
17849
|
const parts = [];
|
|
18032
|
-
const guidelineFiles = collectGuidelineFiles(
|
|
18033
|
-
inputFiles,
|
|
18034
|
-
options?.guidelinePatterns ?? request.guideline_patterns,
|
|
18035
|
-
options?.guidelineOverrides
|
|
18036
|
-
);
|
|
18037
17850
|
const inputFilesList = collectInputFiles(inputFiles);
|
|
18038
|
-
const
|
|
18039
|
-
const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
|
|
17851
|
+
const prereadBlock = buildMandatoryPrereadBlock(inputFilesList);
|
|
18040
17852
|
if (prereadBlock.length > 0) {
|
|
18041
17853
|
parts.push("\n", prereadBlock);
|
|
18042
17854
|
}
|
|
@@ -18049,62 +17861,36 @@ function normalizeInputFiles(inputFiles) {
|
|
|
18049
17861
|
}
|
|
18050
17862
|
const deduped = /* @__PURE__ */ new Map();
|
|
18051
17863
|
for (const inputFile of inputFiles) {
|
|
18052
|
-
const absolutePath =
|
|
17864
|
+
const absolutePath = path9.resolve(inputFile);
|
|
18053
17865
|
if (!deduped.has(absolutePath)) {
|
|
18054
17866
|
deduped.set(absolutePath, absolutePath);
|
|
18055
17867
|
}
|
|
18056
17868
|
}
|
|
18057
17869
|
return Array.from(deduped.values());
|
|
18058
17870
|
}
|
|
18059
|
-
function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
|
|
18060
|
-
if (!inputFiles || inputFiles.length === 0) {
|
|
18061
|
-
return [];
|
|
18062
|
-
}
|
|
18063
|
-
const unique = /* @__PURE__ */ new Map();
|
|
18064
|
-
for (const inputFile of inputFiles) {
|
|
18065
|
-
const absolutePath = path11.resolve(inputFile);
|
|
18066
|
-
if (overrides?.has(absolutePath)) {
|
|
18067
|
-
if (!unique.has(absolutePath)) {
|
|
18068
|
-
unique.set(absolutePath, absolutePath);
|
|
18069
|
-
}
|
|
18070
|
-
continue;
|
|
18071
|
-
}
|
|
18072
|
-
const normalized = absolutePath.split(path11.sep).join("/");
|
|
18073
|
-
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
18074
|
-
if (!unique.has(absolutePath)) {
|
|
18075
|
-
unique.set(absolutePath, absolutePath);
|
|
18076
|
-
}
|
|
18077
|
-
}
|
|
18078
|
-
}
|
|
18079
|
-
return Array.from(unique.values());
|
|
18080
|
-
}
|
|
18081
17871
|
function collectInputFiles(inputFiles) {
|
|
18082
17872
|
if (!inputFiles || inputFiles.length === 0) {
|
|
18083
17873
|
return [];
|
|
18084
17874
|
}
|
|
18085
17875
|
const unique = /* @__PURE__ */ new Map();
|
|
18086
17876
|
for (const inputFile of inputFiles) {
|
|
18087
|
-
const absolutePath =
|
|
17877
|
+
const absolutePath = path9.resolve(inputFile);
|
|
18088
17878
|
if (!unique.has(absolutePath)) {
|
|
18089
17879
|
unique.set(absolutePath, absolutePath);
|
|
18090
17880
|
}
|
|
18091
17881
|
}
|
|
18092
17882
|
return Array.from(unique.values());
|
|
18093
17883
|
}
|
|
18094
|
-
function buildMandatoryPrereadBlock(
|
|
18095
|
-
if (
|
|
17884
|
+
function buildMandatoryPrereadBlock(inputFiles) {
|
|
17885
|
+
if (inputFiles.length === 0) {
|
|
18096
17886
|
return "";
|
|
18097
17887
|
}
|
|
18098
17888
|
const buildList = (files) => files.map((absolutePath) => {
|
|
18099
|
-
const fileName =
|
|
17889
|
+
const fileName = path9.basename(absolutePath);
|
|
18100
17890
|
const fileUri = pathToFileUri(absolutePath);
|
|
18101
17891
|
return `* [${fileName}](${fileUri})`;
|
|
18102
17892
|
});
|
|
18103
17893
|
const sections = [];
|
|
18104
|
-
if (guidelineFiles.length > 0) {
|
|
18105
|
-
sections.push(`Read all guideline files:
|
|
18106
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
18107
|
-
}
|
|
18108
17894
|
if (inputFiles.length > 0) {
|
|
18109
17895
|
sections.push(`Read all input files:
|
|
18110
17896
|
${buildList(inputFiles).join("\n")}.`);
|
|
@@ -18116,7 +17902,7 @@ ${buildList(inputFiles).join("\n")}.`);
|
|
|
18116
17902
|
return sections.join("\n");
|
|
18117
17903
|
}
|
|
18118
17904
|
function pathToFileUri(filePath) {
|
|
18119
|
-
const absolutePath =
|
|
17905
|
+
const absolutePath = path9.isAbsolute(filePath) ? filePath : path9.resolve(filePath);
|
|
18120
17906
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
18121
17907
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
18122
17908
|
return `file:///${normalizedPath}`;
|
|
@@ -18261,10 +18047,10 @@ var ClaudeCliProvider = class {
|
|
|
18261
18047
|
}
|
|
18262
18048
|
resolveCwd(cwdOverride) {
|
|
18263
18049
|
if (cwdOverride) {
|
|
18264
|
-
return
|
|
18050
|
+
return path10.resolve(cwdOverride);
|
|
18265
18051
|
}
|
|
18266
18052
|
if (this.config.cwd) {
|
|
18267
|
-
return
|
|
18053
|
+
return path10.resolve(this.config.cwd);
|
|
18268
18054
|
}
|
|
18269
18055
|
return void 0;
|
|
18270
18056
|
}
|
|
@@ -18274,9 +18060,9 @@ var ClaudeCliProvider = class {
|
|
|
18274
18060
|
return void 0;
|
|
18275
18061
|
}
|
|
18276
18062
|
if (this.config.logDir) {
|
|
18277
|
-
return
|
|
18063
|
+
return path10.resolve(this.config.logDir);
|
|
18278
18064
|
}
|
|
18279
|
-
return
|
|
18065
|
+
return path10.join(process.cwd(), ".agentv", "logs", "claude-cli");
|
|
18280
18066
|
}
|
|
18281
18067
|
async createStreamLogger(request) {
|
|
18282
18068
|
const logDir = this.resolveLogDirectory();
|
|
@@ -18290,7 +18076,7 @@ var ClaudeCliProvider = class {
|
|
|
18290
18076
|
console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
|
|
18291
18077
|
return void 0;
|
|
18292
18078
|
}
|
|
18293
|
-
const filePath =
|
|
18079
|
+
const filePath = path10.join(logDir, buildLogFilename(request, this.targetName));
|
|
18294
18080
|
try {
|
|
18295
18081
|
const logger = await ClaudeCliStreamLogger.create({
|
|
18296
18082
|
filePath,
|
|
@@ -18765,10 +18551,10 @@ var ClaudeSdkProvider = class {
|
|
|
18765
18551
|
}
|
|
18766
18552
|
resolveCwd(cwdOverride) {
|
|
18767
18553
|
if (cwdOverride) {
|
|
18768
|
-
return
|
|
18554
|
+
return path11.resolve(cwdOverride);
|
|
18769
18555
|
}
|
|
18770
18556
|
if (this.config.cwd) {
|
|
18771
|
-
return
|
|
18557
|
+
return path11.resolve(this.config.cwd);
|
|
18772
18558
|
}
|
|
18773
18559
|
return void 0;
|
|
18774
18560
|
}
|
|
@@ -18778,9 +18564,9 @@ var ClaudeSdkProvider = class {
|
|
|
18778
18564
|
return void 0;
|
|
18779
18565
|
}
|
|
18780
18566
|
if (this.config.logDir) {
|
|
18781
|
-
return
|
|
18567
|
+
return path11.resolve(this.config.logDir);
|
|
18782
18568
|
}
|
|
18783
|
-
return
|
|
18569
|
+
return path11.join(process.cwd(), ".agentv", "logs", "claude");
|
|
18784
18570
|
}
|
|
18785
18571
|
async createStreamLogger(request) {
|
|
18786
18572
|
const logDir = this.resolveLogDirectory();
|
|
@@ -18794,7 +18580,7 @@ var ClaudeSdkProvider = class {
|
|
|
18794
18580
|
console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
|
|
18795
18581
|
return void 0;
|
|
18796
18582
|
}
|
|
18797
|
-
const filePath =
|
|
18583
|
+
const filePath = path11.join(logDir, buildLogFilename2(request, this.targetName));
|
|
18798
18584
|
try {
|
|
18799
18585
|
const logger = await ClaudeStreamLogger.create({
|
|
18800
18586
|
filePath,
|
|
@@ -19202,7 +18988,6 @@ var CliProvider = class {
|
|
|
19202
18988
|
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
19203
18989
|
{
|
|
19204
18990
|
question: "",
|
|
19205
|
-
guidelines: "",
|
|
19206
18991
|
inputFiles: batchInputFiles,
|
|
19207
18992
|
evalCaseId: "batch",
|
|
19208
18993
|
attempt: 0
|
|
@@ -19435,7 +19220,6 @@ var CliProvider = class {
|
|
|
19435
19220
|
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
19436
19221
|
{
|
|
19437
19222
|
question: "",
|
|
19438
|
-
guidelines: "",
|
|
19439
19223
|
inputFiles: [],
|
|
19440
19224
|
evalCaseId: "healthcheck",
|
|
19441
19225
|
attempt: 0
|
|
@@ -19476,7 +19260,6 @@ async function buildTemplateValues(request, config, outputFilePath) {
|
|
|
19476
19260
|
values: {
|
|
19477
19261
|
PROMPT: shellEscape(request.question ?? ""),
|
|
19478
19262
|
PROMPT_FILE: shellEscape(promptFilePath),
|
|
19479
|
-
GUIDELINES: shellEscape(request.guidelines ?? ""),
|
|
19480
19263
|
EVAL_ID: shellEscape(request.evalCaseId ?? ""),
|
|
19481
19264
|
ATTEMPT: shellEscape(String(request.attempt ?? 0)),
|
|
19482
19265
|
FILES: formatFileList(inputFiles, config.filesFormat),
|
|
@@ -19498,7 +19281,7 @@ function normalizeInputFiles2(inputFiles) {
|
|
|
19498
19281
|
}
|
|
19499
19282
|
const unique = /* @__PURE__ */ new Map();
|
|
19500
19283
|
for (const inputFile of inputFiles) {
|
|
19501
|
-
const absolutePath =
|
|
19284
|
+
const absolutePath = path12.resolve(inputFile);
|
|
19502
19285
|
if (!unique.has(absolutePath)) {
|
|
19503
19286
|
unique.set(absolutePath, absolutePath);
|
|
19504
19287
|
}
|
|
@@ -19512,7 +19295,7 @@ function formatFileList(files, template) {
|
|
|
19512
19295
|
const formatter = template ?? "{path}";
|
|
19513
19296
|
return files.map((filePath) => {
|
|
19514
19297
|
const escapedPath = shellEscape(filePath);
|
|
19515
|
-
const escapedName = shellEscape(
|
|
19298
|
+
const escapedName = shellEscape(path12.basename(filePath));
|
|
19516
19299
|
return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
|
|
19517
19300
|
}).join(" ");
|
|
19518
19301
|
}
|
|
@@ -19536,7 +19319,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
|
|
|
19536
19319
|
const safeEvalId = evalCaseId || "unknown";
|
|
19537
19320
|
const timestamp = Date.now();
|
|
19538
19321
|
const random = Math.random().toString(36).substring(2, 9);
|
|
19539
|
-
return
|
|
19322
|
+
return path12.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
|
|
19540
19323
|
}
|
|
19541
19324
|
function formatTimeoutSuffix2(timeoutMs) {
|
|
19542
19325
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
@@ -19775,10 +19558,10 @@ ${basePrompt}` : basePrompt;
|
|
|
19775
19558
|
}
|
|
19776
19559
|
resolveCwd(cwdOverride) {
|
|
19777
19560
|
if (cwdOverride) {
|
|
19778
|
-
return
|
|
19561
|
+
return path13.resolve(cwdOverride);
|
|
19779
19562
|
}
|
|
19780
19563
|
if (this.config.cwd) {
|
|
19781
|
-
return
|
|
19564
|
+
return path13.resolve(this.config.cwd);
|
|
19782
19565
|
}
|
|
19783
19566
|
return void 0;
|
|
19784
19567
|
}
|
|
@@ -19788,9 +19571,9 @@ ${basePrompt}` : basePrompt;
|
|
|
19788
19571
|
return void 0;
|
|
19789
19572
|
}
|
|
19790
19573
|
if (this.config.logDir) {
|
|
19791
|
-
return
|
|
19574
|
+
return path13.resolve(this.config.logDir);
|
|
19792
19575
|
}
|
|
19793
|
-
return
|
|
19576
|
+
return path13.join(process.cwd(), ".agentv", "logs", "codex");
|
|
19794
19577
|
}
|
|
19795
19578
|
async createStreamLogger(request) {
|
|
19796
19579
|
const logDir = this.resolveLogDirectory();
|
|
@@ -19804,7 +19587,7 @@ ${basePrompt}` : basePrompt;
|
|
|
19804
19587
|
console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
|
|
19805
19588
|
return void 0;
|
|
19806
19589
|
}
|
|
19807
|
-
const filePath =
|
|
19590
|
+
const filePath = path13.join(logDir, buildLogFilename3(request, this.targetName));
|
|
19808
19591
|
try {
|
|
19809
19592
|
const logger = await CodexSdkStreamLogger.create({
|
|
19810
19593
|
filePath,
|
|
@@ -20017,7 +19800,7 @@ function resolvePlatformCliPath() {
|
|
|
20017
19800
|
try {
|
|
20018
19801
|
const resolved = import.meta.resolve(`${packageName}/package.json`);
|
|
20019
19802
|
const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
|
|
20020
|
-
const binaryPath =
|
|
19803
|
+
const binaryPath = path14.join(path14.dirname(packageJsonPath), binaryName);
|
|
20021
19804
|
if (existsSync(binaryPath)) {
|
|
20022
19805
|
return binaryPath;
|
|
20023
19806
|
}
|
|
@@ -20025,7 +19808,7 @@ function resolvePlatformCliPath() {
|
|
|
20025
19808
|
}
|
|
20026
19809
|
let searchDir = process.cwd();
|
|
20027
19810
|
for (let i = 0; i < 10; i++) {
|
|
20028
|
-
const standardPath =
|
|
19811
|
+
const standardPath = path14.join(
|
|
20029
19812
|
searchDir,
|
|
20030
19813
|
"node_modules",
|
|
20031
19814
|
...packageName.split("/"),
|
|
@@ -20034,13 +19817,13 @@ function resolvePlatformCliPath() {
|
|
|
20034
19817
|
if (existsSync(standardPath)) {
|
|
20035
19818
|
return standardPath;
|
|
20036
19819
|
}
|
|
20037
|
-
const bunDir =
|
|
19820
|
+
const bunDir = path14.join(searchDir, "node_modules", ".bun");
|
|
20038
19821
|
const prefix = `@github+copilot-${osPart}-${archPart}@`;
|
|
20039
19822
|
try {
|
|
20040
19823
|
const entries = readdirSync(bunDir);
|
|
20041
19824
|
for (const entry of entries) {
|
|
20042
19825
|
if (entry.startsWith(prefix)) {
|
|
20043
|
-
const candidate =
|
|
19826
|
+
const candidate = path14.join(
|
|
20044
19827
|
bunDir,
|
|
20045
19828
|
entry,
|
|
20046
19829
|
"node_modules",
|
|
@@ -20055,7 +19838,7 @@ function resolvePlatformCliPath() {
|
|
|
20055
19838
|
}
|
|
20056
19839
|
} catch {
|
|
20057
19840
|
}
|
|
20058
|
-
const parent =
|
|
19841
|
+
const parent = path14.dirname(searchDir);
|
|
20059
19842
|
if (parent === searchDir) break;
|
|
20060
19843
|
searchDir = parent;
|
|
20061
19844
|
}
|
|
@@ -20391,10 +20174,10 @@ var CopilotCliProvider = class {
|
|
|
20391
20174
|
}
|
|
20392
20175
|
resolveCwd(cwdOverride) {
|
|
20393
20176
|
if (cwdOverride) {
|
|
20394
|
-
return
|
|
20177
|
+
return path15.resolve(cwdOverride);
|
|
20395
20178
|
}
|
|
20396
20179
|
if (this.config.cwd) {
|
|
20397
|
-
return
|
|
20180
|
+
return path15.resolve(this.config.cwd);
|
|
20398
20181
|
}
|
|
20399
20182
|
return void 0;
|
|
20400
20183
|
}
|
|
@@ -20413,9 +20196,9 @@ var CopilotCliProvider = class {
|
|
|
20413
20196
|
return void 0;
|
|
20414
20197
|
}
|
|
20415
20198
|
if (this.config.logDir) {
|
|
20416
|
-
return
|
|
20199
|
+
return path15.resolve(this.config.logDir);
|
|
20417
20200
|
}
|
|
20418
|
-
return
|
|
20201
|
+
return path15.join(process.cwd(), ".agentv", "logs", "copilot-cli");
|
|
20419
20202
|
}
|
|
20420
20203
|
async createStreamLogger(request) {
|
|
20421
20204
|
const logDir = this.resolveLogDirectory();
|
|
@@ -20429,7 +20212,7 @@ var CopilotCliProvider = class {
|
|
|
20429
20212
|
console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
|
|
20430
20213
|
return void 0;
|
|
20431
20214
|
}
|
|
20432
|
-
const filePath =
|
|
20215
|
+
const filePath = path15.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
|
|
20433
20216
|
try {
|
|
20434
20217
|
const logger = await CopilotStreamLogger.create(
|
|
20435
20218
|
{
|
|
@@ -20794,10 +20577,10 @@ var CopilotSdkProvider = class {
|
|
|
20794
20577
|
}
|
|
20795
20578
|
resolveCwd(cwdOverride) {
|
|
20796
20579
|
if (cwdOverride) {
|
|
20797
|
-
return
|
|
20580
|
+
return path16.resolve(cwdOverride);
|
|
20798
20581
|
}
|
|
20799
20582
|
if (this.config.cwd) {
|
|
20800
|
-
return
|
|
20583
|
+
return path16.resolve(this.config.cwd);
|
|
20801
20584
|
}
|
|
20802
20585
|
return void 0;
|
|
20803
20586
|
}
|
|
@@ -20806,9 +20589,9 @@ var CopilotSdkProvider = class {
|
|
|
20806
20589
|
return void 0;
|
|
20807
20590
|
}
|
|
20808
20591
|
if (this.config.logDir) {
|
|
20809
|
-
return
|
|
20592
|
+
return path16.resolve(this.config.logDir);
|
|
20810
20593
|
}
|
|
20811
|
-
return
|
|
20594
|
+
return path16.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
|
|
20812
20595
|
}
|
|
20813
20596
|
async createStreamLogger(request) {
|
|
20814
20597
|
const logDir = this.resolveLogDirectory();
|
|
@@ -20822,7 +20605,7 @@ var CopilotSdkProvider = class {
|
|
|
20822
20605
|
console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
|
|
20823
20606
|
return void 0;
|
|
20824
20607
|
}
|
|
20825
|
-
const filePath =
|
|
20608
|
+
const filePath = path16.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
|
|
20826
20609
|
try {
|
|
20827
20610
|
const logger = await CopilotStreamLogger.create(
|
|
20828
20611
|
{
|
|
@@ -20897,8 +20680,7 @@ var MockProvider = class {
|
|
|
20897
20680
|
return {
|
|
20898
20681
|
output: [{ role: "assistant", content: this.cannedResponse }],
|
|
20899
20682
|
raw: {
|
|
20900
|
-
question: request.question
|
|
20901
|
-
guidelines: request.guidelines
|
|
20683
|
+
question: request.question
|
|
20902
20684
|
}
|
|
20903
20685
|
};
|
|
20904
20686
|
}
|
|
@@ -21241,7 +21023,7 @@ var PiCodingAgentProvider = class {
|
|
|
21241
21023
|
const workspaceRoot = await this.createWorkspace();
|
|
21242
21024
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
21243
21025
|
try {
|
|
21244
|
-
const promptFile =
|
|
21026
|
+
const promptFile = path17.join(workspaceRoot, PROMPT_FILENAME);
|
|
21245
21027
|
await writeFile(promptFile, request.question, "utf8");
|
|
21246
21028
|
const args = this.buildPiArgs(request.question, inputFiles, request.captureFileChanges);
|
|
21247
21029
|
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
@@ -21303,12 +21085,12 @@ var PiCodingAgentProvider = class {
|
|
|
21303
21085
|
}
|
|
21304
21086
|
resolveCwd(workspaceRoot, cwdOverride) {
|
|
21305
21087
|
if (cwdOverride) {
|
|
21306
|
-
return
|
|
21088
|
+
return path17.resolve(cwdOverride);
|
|
21307
21089
|
}
|
|
21308
21090
|
if (!this.config.cwd) {
|
|
21309
21091
|
return workspaceRoot;
|
|
21310
21092
|
}
|
|
21311
|
-
return
|
|
21093
|
+
return path17.resolve(this.config.cwd);
|
|
21312
21094
|
}
|
|
21313
21095
|
buildPiArgs(prompt, inputFiles, _captureFileChanges) {
|
|
21314
21096
|
const args = [];
|
|
@@ -21397,7 +21179,7 @@ ${prompt}` : prompt;
|
|
|
21397
21179
|
return env;
|
|
21398
21180
|
}
|
|
21399
21181
|
async createWorkspace() {
|
|
21400
|
-
return await mkdtemp(
|
|
21182
|
+
return await mkdtemp(path17.join(tmpdir(), WORKSPACE_PREFIX));
|
|
21401
21183
|
}
|
|
21402
21184
|
async cleanupWorkspace(workspaceRoot) {
|
|
21403
21185
|
try {
|
|
@@ -21407,9 +21189,9 @@ ${prompt}` : prompt;
|
|
|
21407
21189
|
}
|
|
21408
21190
|
resolveLogDirectory() {
|
|
21409
21191
|
if (this.config.logDir) {
|
|
21410
|
-
return
|
|
21192
|
+
return path17.resolve(this.config.logDir);
|
|
21411
21193
|
}
|
|
21412
|
-
return
|
|
21194
|
+
return path17.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
21413
21195
|
}
|
|
21414
21196
|
async createStreamLogger(request) {
|
|
21415
21197
|
const logDir = this.resolveLogDirectory();
|
|
@@ -21423,7 +21205,7 @@ ${prompt}` : prompt;
|
|
|
21423
21205
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
21424
21206
|
return void 0;
|
|
21425
21207
|
}
|
|
21426
|
-
const filePath =
|
|
21208
|
+
const filePath = path17.join(logDir, buildLogFilename5(request, this.targetName));
|
|
21427
21209
|
try {
|
|
21428
21210
|
const logger = await PiStreamLogger.create({
|
|
21429
21211
|
filePath,
|
|
@@ -21933,7 +21715,7 @@ async function readDirEntries(target) {
|
|
|
21933
21715
|
const entries = await readdir(target, { withFileTypes: true });
|
|
21934
21716
|
return entries.map((entry) => ({
|
|
21935
21717
|
name: entry.name,
|
|
21936
|
-
absolutePath:
|
|
21718
|
+
absolutePath: path18.join(target, entry.name),
|
|
21937
21719
|
isDirectory: entry.isDirectory()
|
|
21938
21720
|
}));
|
|
21939
21721
|
}
|
|
@@ -21947,7 +21729,7 @@ async function removeIfExists(target) {
|
|
|
21947
21729
|
}
|
|
21948
21730
|
}
|
|
21949
21731
|
function pathToFileUri2(filePath) {
|
|
21950
|
-
const absolutePath =
|
|
21732
|
+
const absolutePath = path19.isAbsolute(filePath) ? filePath : path19.resolve(filePath);
|
|
21951
21733
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
21952
21734
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
21953
21735
|
return `file:///${normalizedPath}`;
|
|
@@ -22039,8 +21821,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
22039
21821
|
});
|
|
22040
21822
|
}
|
|
22041
21823
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
22042
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
22043
|
-
const responseList = responseFiles.map((file) => `"${
|
|
21824
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path20.basename(file)}`).join("\n");
|
|
21825
|
+
const responseList = responseFiles.map((file) => `"${path20.basename(file)}"`).join(", ");
|
|
22044
21826
|
return renderTemplate2(templateContent, {
|
|
22045
21827
|
requestFiles: requestLines,
|
|
22046
21828
|
responseList
|
|
@@ -22079,7 +21861,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
22079
21861
|
const maxAttempts = 10;
|
|
22080
21862
|
while (attempts < maxAttempts) {
|
|
22081
21863
|
try {
|
|
22082
|
-
const content = await
|
|
21864
|
+
const content = await readFile7(responseFileFinal, { encoding: "utf8" });
|
|
22083
21865
|
if (!silent) {
|
|
22084
21866
|
process.stdout.write(`${content}
|
|
22085
21867
|
`);
|
|
@@ -22100,7 +21882,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
22100
21882
|
}
|
|
22101
21883
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
22102
21884
|
if (!silent) {
|
|
22103
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
21885
|
+
const fileList = responseFilesFinal.map((file) => path21.basename(file)).join(", ");
|
|
22104
21886
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
22105
21887
|
}
|
|
22106
21888
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -22109,7 +21891,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
22109
21891
|
while (pending.size > 0) {
|
|
22110
21892
|
if (Date.now() >= deadline) {
|
|
22111
21893
|
if (!silent) {
|
|
22112
|
-
const remaining = [...pending].map((f) =>
|
|
21894
|
+
const remaining = [...pending].map((f) => path21.basename(f)).join(", ");
|
|
22113
21895
|
console.error(
|
|
22114
21896
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
22115
21897
|
);
|
|
@@ -22136,7 +21918,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
22136
21918
|
const maxAttempts = 10;
|
|
22137
21919
|
while (attempts < maxAttempts) {
|
|
22138
21920
|
try {
|
|
22139
|
-
const content = await
|
|
21921
|
+
const content = await readFile7(file, { encoding: "utf8" });
|
|
22140
21922
|
if (!silent) {
|
|
22141
21923
|
process.stdout.write(`${content}
|
|
22142
21924
|
`);
|
|
@@ -22166,25 +21948,25 @@ function getAgentvHome() {
|
|
|
22166
21948
|
}
|
|
22167
21949
|
return envHome;
|
|
22168
21950
|
}
|
|
22169
|
-
return
|
|
21951
|
+
return path222.join(os2.homedir(), ".agentv");
|
|
22170
21952
|
}
|
|
22171
21953
|
function getWorkspacesRoot() {
|
|
22172
|
-
return
|
|
21954
|
+
return path222.join(getAgentvHome(), "workspaces");
|
|
22173
21955
|
}
|
|
22174
21956
|
function getSubagentsRoot() {
|
|
22175
|
-
return
|
|
21957
|
+
return path222.join(getAgentvHome(), "subagents");
|
|
22176
21958
|
}
|
|
22177
21959
|
function getTraceStateRoot() {
|
|
22178
|
-
return
|
|
21960
|
+
return path222.join(getAgentvHome(), "trace-state");
|
|
22179
21961
|
}
|
|
22180
21962
|
function getWorkspacePoolRoot() {
|
|
22181
|
-
return
|
|
21963
|
+
return path222.join(getAgentvHome(), "workspace-pool");
|
|
22182
21964
|
}
|
|
22183
21965
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
22184
21966
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
22185
21967
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
22186
21968
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
22187
|
-
return
|
|
21969
|
+
return path23.join(getSubagentsRoot(), folder);
|
|
22188
21970
|
}
|
|
22189
21971
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
22190
21972
|
var execAsync2 = promisify2(exec);
|
|
@@ -22249,11 +22031,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
22249
22031
|
await raceSpawnError(child);
|
|
22250
22032
|
return true;
|
|
22251
22033
|
}
|
|
22252
|
-
const aliveFile =
|
|
22034
|
+
const aliveFile = path24.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
22253
22035
|
await removeIfExists(aliveFile);
|
|
22254
|
-
const githubAgentsDir =
|
|
22036
|
+
const githubAgentsDir = path24.join(subagentDir, ".github", "agents");
|
|
22255
22037
|
await mkdir8(githubAgentsDir, { recursive: true });
|
|
22256
|
-
const wakeupDst =
|
|
22038
|
+
const wakeupDst = path24.join(githubAgentsDir, "wakeup.md");
|
|
22257
22039
|
await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
22258
22040
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
22259
22041
|
label: "open-workspace"
|
|
@@ -22266,7 +22048,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
22266
22048
|
"chat",
|
|
22267
22049
|
"-m",
|
|
22268
22050
|
wakeupChatId,
|
|
22269
|
-
`create a file named .alive in the ${
|
|
22051
|
+
`create a file named .alive in the ${path24.basename(subagentDir)} folder`
|
|
22270
22052
|
];
|
|
22271
22053
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
22272
22054
|
await raceSpawnError(wakeupChild);
|
|
@@ -22281,10 +22063,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
22281
22063
|
return true;
|
|
22282
22064
|
}
|
|
22283
22065
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
22284
|
-
const workspacePath =
|
|
22285
|
-
const messagesDir =
|
|
22066
|
+
const workspacePath = path24.join(subagentDir, `${path24.basename(subagentDir)}.code-workspace`);
|
|
22067
|
+
const messagesDir = path24.join(subagentDir, "messages");
|
|
22286
22068
|
await mkdir8(messagesDir, { recursive: true });
|
|
22287
|
-
const reqFile =
|
|
22069
|
+
const reqFile = path24.join(messagesDir, `${timestamp}_req.md`);
|
|
22288
22070
|
await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
|
|
22289
22071
|
const reqUri = pathToFileUri2(reqFile);
|
|
22290
22072
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
@@ -22292,16 +22074,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
22292
22074
|
chatArgs.push("-a", attachment);
|
|
22293
22075
|
}
|
|
22294
22076
|
chatArgs.push("-a", reqFile);
|
|
22295
|
-
chatArgs.push(`Follow instructions in [${
|
|
22077
|
+
chatArgs.push(`Follow instructions in [${path24.basename(reqFile)}](${reqUri})`);
|
|
22296
22078
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
22297
22079
|
workspacePath,
|
|
22298
|
-
|
|
22080
|
+
path24.basename(subagentDir),
|
|
22299
22081
|
subagentDir,
|
|
22300
22082
|
vscodeCmd
|
|
22301
22083
|
);
|
|
22302
22084
|
if (!workspaceReady) {
|
|
22303
22085
|
throw new Error(
|
|
22304
|
-
`VS Code workspace '${
|
|
22086
|
+
`VS Code workspace '${path24.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
22305
22087
|
);
|
|
22306
22088
|
}
|
|
22307
22089
|
await sleep2(500);
|
|
@@ -22309,8 +22091,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
22309
22091
|
await raceSpawnError(child);
|
|
22310
22092
|
}
|
|
22311
22093
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
22312
|
-
const workspacePath =
|
|
22313
|
-
const messagesDir =
|
|
22094
|
+
const workspacePath = path24.join(subagentDir, `${path24.basename(subagentDir)}.code-workspace`);
|
|
22095
|
+
const messagesDir = path24.join(subagentDir, "messages");
|
|
22314
22096
|
await mkdir8(messagesDir, { recursive: true });
|
|
22315
22097
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
22316
22098
|
for (const attachment of attachmentPaths) {
|
|
@@ -22319,13 +22101,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
22319
22101
|
chatArgs.push(chatInstruction);
|
|
22320
22102
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
22321
22103
|
workspacePath,
|
|
22322
|
-
|
|
22104
|
+
path24.basename(subagentDir),
|
|
22323
22105
|
subagentDir,
|
|
22324
22106
|
vscodeCmd
|
|
22325
22107
|
);
|
|
22326
22108
|
if (!workspaceReady) {
|
|
22327
22109
|
throw new Error(
|
|
22328
|
-
`VS Code workspace '${
|
|
22110
|
+
`VS Code workspace '${path24.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
22329
22111
|
);
|
|
22330
22112
|
}
|
|
22331
22113
|
await sleep2(500);
|
|
@@ -22347,10 +22129,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
22347
22129
|
}
|
|
22348
22130
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
22349
22131
|
const folderPath = folder.path;
|
|
22350
|
-
if (
|
|
22132
|
+
if (path25.isAbsolute(folderPath)) {
|
|
22351
22133
|
return folder;
|
|
22352
22134
|
}
|
|
22353
|
-
const absolutePath =
|
|
22135
|
+
const absolutePath = path25.resolve(templateDir, folderPath);
|
|
22354
22136
|
return {
|
|
22355
22137
|
...folder,
|
|
22356
22138
|
path: absolutePath
|
|
@@ -22372,19 +22154,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
22372
22154
|
if (locationMap && typeof locationMap === "object") {
|
|
22373
22155
|
const transformedMap = {};
|
|
22374
22156
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
22375
|
-
const isAbsolute =
|
|
22157
|
+
const isAbsolute = path25.isAbsolute(locationPath);
|
|
22376
22158
|
if (isAbsolute) {
|
|
22377
22159
|
transformedMap[locationPath] = value;
|
|
22378
22160
|
} else {
|
|
22379
22161
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
22380
22162
|
if (firstGlobIndex === -1) {
|
|
22381
|
-
const resolvedPath =
|
|
22163
|
+
const resolvedPath = path25.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
22382
22164
|
transformedMap[resolvedPath] = value;
|
|
22383
22165
|
} else {
|
|
22384
22166
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
22385
22167
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
22386
22168
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
22387
|
-
const resolvedPath = (
|
|
22169
|
+
const resolvedPath = (path25.resolve(templateDir, basePath) + patternPath).replace(
|
|
22388
22170
|
/\\/g,
|
|
22389
22171
|
"/"
|
|
22390
22172
|
);
|
|
@@ -22423,7 +22205,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
22423
22205
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
22424
22206
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
22425
22207
|
for (const subagent of subagents) {
|
|
22426
|
-
const lockFile =
|
|
22208
|
+
const lockFile = path26.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
22427
22209
|
if (!await pathExists(lockFile)) {
|
|
22428
22210
|
return subagent.absolutePath;
|
|
22429
22211
|
}
|
|
@@ -22433,7 +22215,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
22433
22215
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
22434
22216
|
let workspaceContent;
|
|
22435
22217
|
if (workspaceTemplate) {
|
|
22436
|
-
const workspaceSrc =
|
|
22218
|
+
const workspaceSrc = path26.resolve(workspaceTemplate);
|
|
22437
22219
|
if (!await pathExists(workspaceSrc)) {
|
|
22438
22220
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
22439
22221
|
}
|
|
@@ -22441,18 +22223,18 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
22441
22223
|
if (!stats.isFile()) {
|
|
22442
22224
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
22443
22225
|
}
|
|
22444
|
-
const templateText = await
|
|
22226
|
+
const templateText = await readFile8(workspaceSrc, "utf8");
|
|
22445
22227
|
workspaceContent = JSON.parse(templateText);
|
|
22446
22228
|
} else {
|
|
22447
22229
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
22448
22230
|
}
|
|
22449
|
-
const workspaceName = `${
|
|
22450
|
-
const workspaceDst =
|
|
22451
|
-
const templateDir = workspaceTemplate ?
|
|
22231
|
+
const workspaceName = `${path26.basename(subagentDir)}.code-workspace`;
|
|
22232
|
+
const workspaceDst = path26.join(subagentDir, workspaceName);
|
|
22233
|
+
const templateDir = workspaceTemplate ? path26.dirname(path26.resolve(workspaceTemplate)) : subagentDir;
|
|
22452
22234
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
22453
22235
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
22454
22236
|
if (cwd) {
|
|
22455
|
-
const absCwd =
|
|
22237
|
+
const absCwd = path26.resolve(cwd);
|
|
22456
22238
|
const parsed = JSON.parse(transformedContent);
|
|
22457
22239
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
22458
22240
|
if (!alreadyPresent) {
|
|
@@ -22461,35 +22243,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
22461
22243
|
}
|
|
22462
22244
|
}
|
|
22463
22245
|
await writeFile3(workspaceDst, transformedContent, "utf8");
|
|
22464
|
-
const messagesDir =
|
|
22246
|
+
const messagesDir = path26.join(subagentDir, "messages");
|
|
22465
22247
|
await mkdir9(messagesDir, { recursive: true });
|
|
22466
22248
|
return { workspace: workspaceDst, messagesDir };
|
|
22467
22249
|
}
|
|
22468
22250
|
async function createSubagentLock(subagentDir) {
|
|
22469
|
-
const messagesDir =
|
|
22251
|
+
const messagesDir = path26.join(subagentDir, "messages");
|
|
22470
22252
|
if (await pathExists(messagesDir)) {
|
|
22471
22253
|
const files = await readdir2(messagesDir);
|
|
22472
22254
|
await Promise.all(
|
|
22473
22255
|
files.map(async (file) => {
|
|
22474
|
-
const target =
|
|
22256
|
+
const target = path26.join(messagesDir, file);
|
|
22475
22257
|
await removeIfExists(target);
|
|
22476
22258
|
})
|
|
22477
22259
|
);
|
|
22478
22260
|
}
|
|
22479
|
-
const githubAgentsDir =
|
|
22261
|
+
const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
|
|
22480
22262
|
if (await pathExists(githubAgentsDir)) {
|
|
22481
22263
|
const agentFiles = await readdir2(githubAgentsDir);
|
|
22482
22264
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
22483
22265
|
await Promise.all(
|
|
22484
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
22266
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path26.join(githubAgentsDir, file)))
|
|
22485
22267
|
);
|
|
22486
22268
|
}
|
|
22487
|
-
const lockFile =
|
|
22269
|
+
const lockFile = path26.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
22488
22270
|
await writeFile3(lockFile, "", { encoding: "utf8" });
|
|
22489
22271
|
return lockFile;
|
|
22490
22272
|
}
|
|
22491
22273
|
async function removeSubagentLock(subagentDir) {
|
|
22492
|
-
const lockFile =
|
|
22274
|
+
const lockFile = path26.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
22493
22275
|
await removeIfExists(lockFile);
|
|
22494
22276
|
}
|
|
22495
22277
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -22509,9 +22291,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
22509
22291
|
return 1;
|
|
22510
22292
|
}
|
|
22511
22293
|
if (promptFile) {
|
|
22512
|
-
const githubAgentsDir =
|
|
22294
|
+
const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
|
|
22513
22295
|
await mkdir9(githubAgentsDir, { recursive: true });
|
|
22514
|
-
const agentFile =
|
|
22296
|
+
const agentFile = path26.join(githubAgentsDir, `${chatId}.md`);
|
|
22515
22297
|
try {
|
|
22516
22298
|
await copyFile(promptFile, agentFile);
|
|
22517
22299
|
} catch (error) {
|
|
@@ -22528,7 +22310,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
22528
22310
|
if (!promptFile) {
|
|
22529
22311
|
return void 0;
|
|
22530
22312
|
}
|
|
22531
|
-
const resolvedPrompt =
|
|
22313
|
+
const resolvedPrompt = path27.resolve(promptFile);
|
|
22532
22314
|
if (!await pathExists(resolvedPrompt)) {
|
|
22533
22315
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
22534
22316
|
}
|
|
@@ -22544,7 +22326,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
22544
22326
|
}
|
|
22545
22327
|
const resolved = [];
|
|
22546
22328
|
for (const attachment of extraAttachments) {
|
|
22547
|
-
const resolvedPath =
|
|
22329
|
+
const resolvedPath = path27.resolve(attachment);
|
|
22548
22330
|
if (!await pathExists(resolvedPath)) {
|
|
22549
22331
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
22550
22332
|
}
|
|
@@ -22586,7 +22368,7 @@ async function dispatchAgentSession(options) {
|
|
|
22586
22368
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
22587
22369
|
};
|
|
22588
22370
|
}
|
|
22589
|
-
const subagentName =
|
|
22371
|
+
const subagentName = path27.basename(subagentDir);
|
|
22590
22372
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
22591
22373
|
const preparationResult = await prepareSubagentDirectory(
|
|
22592
22374
|
subagentDir,
|
|
@@ -22614,9 +22396,9 @@ async function dispatchAgentSession(options) {
|
|
|
22614
22396
|
};
|
|
22615
22397
|
}
|
|
22616
22398
|
const timestamp = generateTimestamp();
|
|
22617
|
-
const messagesDir =
|
|
22618
|
-
const responseFileTmp =
|
|
22619
|
-
const responseFileFinal =
|
|
22399
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
22400
|
+
const responseFileTmp = path27.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
22401
|
+
const responseFileFinal = path27.join(messagesDir, `${timestamp}_res.md`);
|
|
22620
22402
|
const requestInstructions = createRequestPrompt(
|
|
22621
22403
|
userQuery,
|
|
22622
22404
|
responseFileTmp,
|
|
@@ -22721,7 +22503,7 @@ async function dispatchBatchAgent(options) {
|
|
|
22721
22503
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
22722
22504
|
};
|
|
22723
22505
|
}
|
|
22724
|
-
subagentName =
|
|
22506
|
+
subagentName = path27.basename(subagentDir);
|
|
22725
22507
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
22726
22508
|
const preparationResult = await prepareSubagentDirectory(
|
|
22727
22509
|
subagentDir,
|
|
@@ -22752,17 +22534,17 @@ async function dispatchBatchAgent(options) {
|
|
|
22752
22534
|
};
|
|
22753
22535
|
}
|
|
22754
22536
|
const timestamp = generateTimestamp();
|
|
22755
|
-
const messagesDir =
|
|
22537
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
22756
22538
|
requestFiles = userQueries.map(
|
|
22757
|
-
(_, index) =>
|
|
22539
|
+
(_, index) => path27.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
22758
22540
|
);
|
|
22759
22541
|
const responseTmpFiles = userQueries.map(
|
|
22760
|
-
(_, index) =>
|
|
22542
|
+
(_, index) => path27.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
22761
22543
|
);
|
|
22762
22544
|
responseFilesFinal = userQueries.map(
|
|
22763
|
-
(_, index) =>
|
|
22545
|
+
(_, index) => path27.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
22764
22546
|
);
|
|
22765
|
-
const orchestratorFile =
|
|
22547
|
+
const orchestratorFile = path27.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
22766
22548
|
if (!dryRun) {
|
|
22767
22549
|
await Promise.all(
|
|
22768
22550
|
userQueries.map((query, index) => {
|
|
@@ -22875,7 +22657,7 @@ async function provisionSubagents(options) {
|
|
|
22875
22657
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
22876
22658
|
throw new Error("subagents must be a positive integer");
|
|
22877
22659
|
}
|
|
22878
|
-
const targetPath =
|
|
22660
|
+
const targetPath = path28.resolve(targetRoot);
|
|
22879
22661
|
if (!dryRun) {
|
|
22880
22662
|
await ensureDir(targetPath);
|
|
22881
22663
|
}
|
|
@@ -22895,7 +22677,7 @@ async function provisionSubagents(options) {
|
|
|
22895
22677
|
continue;
|
|
22896
22678
|
}
|
|
22897
22679
|
highestNumber = Math.max(highestNumber, parsed);
|
|
22898
|
-
const lockFile =
|
|
22680
|
+
const lockFile = path28.join(entry.absolutePath, lockName);
|
|
22899
22681
|
const locked = await pathExists(lockFile);
|
|
22900
22682
|
if (locked) {
|
|
22901
22683
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -22912,10 +22694,10 @@ async function provisionSubagents(options) {
|
|
|
22912
22694
|
break;
|
|
22913
22695
|
}
|
|
22914
22696
|
const subagentDir = subagent.absolutePath;
|
|
22915
|
-
const githubAgentsDir =
|
|
22916
|
-
const lockFile =
|
|
22917
|
-
const workspaceDst =
|
|
22918
|
-
const wakeupDst =
|
|
22697
|
+
const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
|
|
22698
|
+
const lockFile = path28.join(subagentDir, lockName);
|
|
22699
|
+
const workspaceDst = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
|
|
22700
|
+
const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
|
|
22919
22701
|
const isLocked = await pathExists(lockFile);
|
|
22920
22702
|
if (isLocked && !force) {
|
|
22921
22703
|
continue;
|
|
@@ -22953,10 +22735,10 @@ async function provisionSubagents(options) {
|
|
|
22953
22735
|
let nextIndex = highestNumber;
|
|
22954
22736
|
while (subagentsProvisioned < subagents) {
|
|
22955
22737
|
nextIndex += 1;
|
|
22956
|
-
const subagentDir =
|
|
22957
|
-
const githubAgentsDir =
|
|
22958
|
-
const workspaceDst =
|
|
22959
|
-
const wakeupDst =
|
|
22738
|
+
const subagentDir = path28.join(targetPath, `subagent-${nextIndex}`);
|
|
22739
|
+
const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
|
|
22740
|
+
const workspaceDst = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
|
|
22741
|
+
const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
|
|
22960
22742
|
if (!dryRun) {
|
|
22961
22743
|
await ensureDir(subagentDir);
|
|
22962
22744
|
await ensureDir(githubAgentsDir);
|
|
@@ -23019,7 +22801,7 @@ var VSCodeProvider = class {
|
|
|
23019
22801
|
}
|
|
23020
22802
|
await this.ensureEnvironmentReady();
|
|
23021
22803
|
const inputFiles = normalizeAttachments(request.inputFiles);
|
|
23022
|
-
const promptContent = buildPromptDocument2(request, inputFiles
|
|
22804
|
+
const promptContent = buildPromptDocument2(request, inputFiles);
|
|
23023
22805
|
const workspaceTemplate = request.workspaceFile ?? await resolveWorkspaceTemplateFile(this.config.workspaceTemplate);
|
|
23024
22806
|
const startTime = Date.now();
|
|
23025
22807
|
const session = await dispatchAgentSession({
|
|
@@ -23073,7 +22855,7 @@ var VSCodeProvider = class {
|
|
|
23073
22855
|
normalizedRequests.map(({ inputFiles }) => inputFiles)
|
|
23074
22856
|
);
|
|
23075
22857
|
const userQueries = normalizedRequests.map(
|
|
23076
|
-
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles
|
|
22858
|
+
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles)
|
|
23077
22859
|
);
|
|
23078
22860
|
const batchWorkspaceTemplate = await resolveWorkspaceTemplateFile(
|
|
23079
22861
|
this.config.workspaceTemplate
|
|
@@ -23142,7 +22924,7 @@ var VSCodeProvider = class {
|
|
|
23142
22924
|
async function locateVSCodeExecutable(candidate) {
|
|
23143
22925
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
23144
22926
|
if (includesPathSeparator) {
|
|
23145
|
-
const resolved =
|
|
22927
|
+
const resolved = path29.isAbsolute(candidate) ? candidate : path29.resolve(candidate);
|
|
23146
22928
|
try {
|
|
23147
22929
|
await access3(resolved, constants3.F_OK);
|
|
23148
22930
|
return resolved;
|
|
@@ -23171,41 +22953,35 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
23171
22953
|
return void 0;
|
|
23172
22954
|
}
|
|
23173
22955
|
try {
|
|
23174
|
-
const stats = await stat4(
|
|
22956
|
+
const stats = await stat4(path29.resolve(template));
|
|
23175
22957
|
return stats.isFile() ? template : void 0;
|
|
23176
22958
|
} catch {
|
|
23177
22959
|
return template;
|
|
23178
22960
|
}
|
|
23179
22961
|
}
|
|
23180
|
-
function buildPromptDocument2(request, attachments
|
|
22962
|
+
function buildPromptDocument2(request, attachments) {
|
|
23181
22963
|
const parts = [];
|
|
23182
22964
|
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
23183
22965
|
parts.push(request.systemPrompt.trim());
|
|
23184
22966
|
}
|
|
23185
|
-
const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
|
|
23186
22967
|
const attachmentFiles = collectAttachmentFiles(attachments);
|
|
23187
|
-
const
|
|
23188
|
-
const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
|
|
22968
|
+
const prereadBlock = buildMandatoryPrereadBlock2(attachmentFiles);
|
|
23189
22969
|
if (prereadBlock.length > 0) {
|
|
23190
22970
|
parts.push("\n", prereadBlock);
|
|
23191
22971
|
}
|
|
23192
22972
|
parts.push("\n[[ ## user_query ## ]]\n", request.question.trim());
|
|
23193
22973
|
return parts.join("\n").trim();
|
|
23194
22974
|
}
|
|
23195
|
-
function buildMandatoryPrereadBlock2(
|
|
23196
|
-
if (
|
|
22975
|
+
function buildMandatoryPrereadBlock2(attachmentFiles) {
|
|
22976
|
+
if (attachmentFiles.length === 0) {
|
|
23197
22977
|
return "";
|
|
23198
22978
|
}
|
|
23199
22979
|
const buildList = (files) => files.map((absolutePath) => {
|
|
23200
|
-
const fileName =
|
|
22980
|
+
const fileName = path29.basename(absolutePath);
|
|
23201
22981
|
const fileUri = pathToFileUri3(absolutePath);
|
|
23202
22982
|
return `* [${fileName}](${fileUri})`;
|
|
23203
22983
|
});
|
|
23204
22984
|
const sections = [];
|
|
23205
|
-
if (guidelineFiles.length > 0) {
|
|
23206
|
-
sections.push(`Read all guideline files:
|
|
23207
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
23208
|
-
}
|
|
23209
22985
|
if (attachmentFiles.length > 0) {
|
|
23210
22986
|
sections.push(`Read all attachment files:
|
|
23211
22987
|
${buildList(attachmentFiles).join("\n")}.`);
|
|
@@ -23216,29 +22992,13 @@ ${buildList(attachmentFiles).join("\n")}.`);
|
|
|
23216
22992
|
);
|
|
23217
22993
|
return sections.join("\n");
|
|
23218
22994
|
}
|
|
23219
|
-
function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
23220
|
-
if (!attachments || attachments.length === 0) {
|
|
23221
|
-
return [];
|
|
23222
|
-
}
|
|
23223
|
-
const unique = /* @__PURE__ */ new Map();
|
|
23224
|
-
for (const attachment of attachments) {
|
|
23225
|
-
const absolutePath = path31.resolve(attachment);
|
|
23226
|
-
const normalized = absolutePath.split(path31.sep).join("/");
|
|
23227
|
-
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
23228
|
-
if (!unique.has(absolutePath)) {
|
|
23229
|
-
unique.set(absolutePath, absolutePath);
|
|
23230
|
-
}
|
|
23231
|
-
}
|
|
23232
|
-
}
|
|
23233
|
-
return Array.from(unique.values());
|
|
23234
|
-
}
|
|
23235
22995
|
function collectAttachmentFiles(attachments) {
|
|
23236
22996
|
if (!attachments || attachments.length === 0) {
|
|
23237
22997
|
return [];
|
|
23238
22998
|
}
|
|
23239
22999
|
const unique = /* @__PURE__ */ new Map();
|
|
23240
23000
|
for (const attachment of attachments) {
|
|
23241
|
-
const absolutePath =
|
|
23001
|
+
const absolutePath = path29.resolve(attachment);
|
|
23242
23002
|
if (!unique.has(absolutePath)) {
|
|
23243
23003
|
unique.set(absolutePath, absolutePath);
|
|
23244
23004
|
}
|
|
@@ -23246,7 +23006,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
23246
23006
|
return Array.from(unique.values());
|
|
23247
23007
|
}
|
|
23248
23008
|
function pathToFileUri3(filePath) {
|
|
23249
|
-
const absolutePath =
|
|
23009
|
+
const absolutePath = path29.isAbsolute(filePath) ? filePath : path29.resolve(filePath);
|
|
23250
23010
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
23251
23011
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
23252
23012
|
return `file:///${normalizedPath}`;
|
|
@@ -23259,7 +23019,7 @@ function normalizeAttachments(attachments) {
|
|
|
23259
23019
|
}
|
|
23260
23020
|
const deduped = /* @__PURE__ */ new Set();
|
|
23261
23021
|
for (const attachment of attachments) {
|
|
23262
|
-
deduped.add(
|
|
23022
|
+
deduped.add(path29.resolve(attachment));
|
|
23263
23023
|
}
|
|
23264
23024
|
return Array.from(deduped);
|
|
23265
23025
|
}
|
|
@@ -23268,7 +23028,7 @@ function mergeAttachments(all) {
|
|
|
23268
23028
|
for (const list of all) {
|
|
23269
23029
|
if (!list) continue;
|
|
23270
23030
|
for (const inputFile of list) {
|
|
23271
|
-
deduped.add(
|
|
23031
|
+
deduped.add(path29.resolve(inputFile));
|
|
23272
23032
|
}
|
|
23273
23033
|
}
|
|
23274
23034
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -23348,11 +23108,11 @@ async function fileExists3(filePath) {
|
|
|
23348
23108
|
}
|
|
23349
23109
|
}
|
|
23350
23110
|
async function readTargetDefinitions(filePath) {
|
|
23351
|
-
const absolutePath =
|
|
23111
|
+
const absolutePath = path30.resolve(filePath);
|
|
23352
23112
|
if (!await fileExists3(absolutePath)) {
|
|
23353
23113
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
23354
23114
|
}
|
|
23355
|
-
const raw = await
|
|
23115
|
+
const raw = await readFile9(absolutePath, "utf8");
|
|
23356
23116
|
const parsed = parse4(raw);
|
|
23357
23117
|
if (!isRecord(parsed)) {
|
|
23358
23118
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -23369,11 +23129,11 @@ function listTargetNames(definitions) {
|
|
|
23369
23129
|
async function discoverProviders(registry, baseDir) {
|
|
23370
23130
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
23371
23131
|
const candidateDirs = [];
|
|
23372
|
-
let dir =
|
|
23373
|
-
const root =
|
|
23132
|
+
let dir = path31.resolve(baseDir);
|
|
23133
|
+
const root = path31.parse(dir).root;
|
|
23374
23134
|
while (dir !== root) {
|
|
23375
|
-
candidateDirs.push(
|
|
23376
|
-
dir =
|
|
23135
|
+
candidateDirs.push(path31.join(dir, ".agentv", "providers"));
|
|
23136
|
+
dir = path31.dirname(dir);
|
|
23377
23137
|
}
|
|
23378
23138
|
let files = [];
|
|
23379
23139
|
for (const providersDir of candidateDirs) {
|
|
@@ -23389,7 +23149,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
23389
23149
|
}
|
|
23390
23150
|
const discoveredKinds = [];
|
|
23391
23151
|
for (const filePath of files) {
|
|
23392
|
-
const basename =
|
|
23152
|
+
const basename = path31.basename(filePath);
|
|
23393
23153
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
23394
23154
|
if (registry.has(kindName)) {
|
|
23395
23155
|
continue;
|
|
@@ -23587,15 +23347,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
23587
23347
|
});
|
|
23588
23348
|
}
|
|
23589
23349
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
23590
|
-
const { mkdir: mkdir15, readFile:
|
|
23350
|
+
const { mkdir: mkdir15, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
23591
23351
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
23592
|
-
const
|
|
23352
|
+
const path44 = await import("node:path");
|
|
23593
23353
|
const { randomUUID: randomUUID9 } = await import("node:crypto");
|
|
23594
|
-
const dir =
|
|
23354
|
+
const dir = path44.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
|
|
23595
23355
|
await mkdir15(dir, { recursive: true });
|
|
23596
|
-
const stdinPath =
|
|
23597
|
-
const stdoutPath =
|
|
23598
|
-
const stderrPath =
|
|
23356
|
+
const stdinPath = path44.join(dir, "stdin.txt");
|
|
23357
|
+
const stdoutPath = path44.join(dir, "stdout.txt");
|
|
23358
|
+
const stderrPath = path44.join(dir, "stderr.txt");
|
|
23599
23359
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
23600
23360
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
23601
23361
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -23625,8 +23385,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
23625
23385
|
resolve2(code ?? 0);
|
|
23626
23386
|
});
|
|
23627
23387
|
});
|
|
23628
|
-
const stdout = (await
|
|
23629
|
-
const stderr = (await
|
|
23388
|
+
const stdout = (await readFile12(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
23389
|
+
const stderr = (await readFile12(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
23630
23390
|
return { stdout, stderr, exitCode };
|
|
23631
23391
|
} finally {
|
|
23632
23392
|
await rm6(dir, { recursive: true, force: true });
|
|
@@ -23935,10 +23695,7 @@ var CodeEvaluator = class {
|
|
|
23935
23695
|
outputText: context2.candidate,
|
|
23936
23696
|
output: outputForPayload,
|
|
23937
23697
|
outputPath,
|
|
23938
|
-
|
|
23939
|
-
inputFiles: context2.evalCase.file_paths.filter(
|
|
23940
|
-
(path46) => !context2.evalCase.guideline_paths.includes(path46)
|
|
23941
|
-
),
|
|
23698
|
+
inputFiles: context2.evalCase.file_paths,
|
|
23942
23699
|
input: context2.evalCase.input,
|
|
23943
23700
|
trace: context2.trace ?? null,
|
|
23944
23701
|
tokenUsage: context2.tokenUsage ?? null,
|
|
@@ -24892,8 +24649,8 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
24892
24649
|
};
|
|
24893
24650
|
}
|
|
24894
24651
|
function resolveSandboxed(basePath, relativePath) {
|
|
24895
|
-
const resolved =
|
|
24896
|
-
if (!resolved.startsWith(basePath +
|
|
24652
|
+
const resolved = path322.resolve(basePath, relativePath);
|
|
24653
|
+
if (!resolved.startsWith(basePath + path322.sep) && resolved !== basePath) {
|
|
24897
24654
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
24898
24655
|
}
|
|
24899
24656
|
return resolved;
|
|
@@ -24983,11 +24740,11 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
24983
24740
|
for (const entry of entries) {
|
|
24984
24741
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
24985
24742
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
24986
|
-
const fullPath =
|
|
24743
|
+
const fullPath = path322.join(dirPath, entry.name);
|
|
24987
24744
|
if (entry.isDirectory()) {
|
|
24988
24745
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
24989
24746
|
} else if (entry.isFile()) {
|
|
24990
|
-
const ext =
|
|
24747
|
+
const ext = path322.extname(entry.name).toLowerCase();
|
|
24991
24748
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
24992
24749
|
try {
|
|
24993
24750
|
const stat8 = await fs2.stat(fullPath);
|
|
@@ -24999,7 +24756,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
24999
24756
|
regex.lastIndex = 0;
|
|
25000
24757
|
if (regex.test(lines[i])) {
|
|
25001
24758
|
matches.push({
|
|
25002
|
-
file:
|
|
24759
|
+
file: path322.relative(workspacePath, fullPath),
|
|
25003
24760
|
line: i + 1,
|
|
25004
24761
|
text: lines[i].substring(0, 200)
|
|
25005
24762
|
});
|
|
@@ -25626,115 +25383,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
25626
25383
|
* Evaluate a single field against the expected value.
|
|
25627
25384
|
*/
|
|
25628
25385
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
25629
|
-
const { path:
|
|
25630
|
-
const candidateValue = resolvePath(candidateData,
|
|
25631
|
-
const expectedValue = resolvePath(expectedData,
|
|
25386
|
+
const { path: path44, match, required = true, weight = 1 } = fieldConfig;
|
|
25387
|
+
const candidateValue = resolvePath(candidateData, path44);
|
|
25388
|
+
const expectedValue = resolvePath(expectedData, path44);
|
|
25632
25389
|
if (expectedValue === void 0) {
|
|
25633
25390
|
return {
|
|
25634
|
-
path:
|
|
25391
|
+
path: path44,
|
|
25635
25392
|
score: 1,
|
|
25636
25393
|
// No expected value means no comparison needed
|
|
25637
25394
|
weight,
|
|
25638
25395
|
hit: true,
|
|
25639
|
-
message: `${
|
|
25396
|
+
message: `${path44}: no expected value`
|
|
25640
25397
|
};
|
|
25641
25398
|
}
|
|
25642
25399
|
if (candidateValue === void 0) {
|
|
25643
25400
|
if (required) {
|
|
25644
25401
|
return {
|
|
25645
|
-
path:
|
|
25402
|
+
path: path44,
|
|
25646
25403
|
score: 0,
|
|
25647
25404
|
weight,
|
|
25648
25405
|
hit: false,
|
|
25649
|
-
message: `${
|
|
25406
|
+
message: `${path44} (required, missing)`
|
|
25650
25407
|
};
|
|
25651
25408
|
}
|
|
25652
25409
|
return {
|
|
25653
|
-
path:
|
|
25410
|
+
path: path44,
|
|
25654
25411
|
score: 1,
|
|
25655
25412
|
// Don't penalize missing optional fields
|
|
25656
25413
|
weight: 0,
|
|
25657
25414
|
// Zero weight means it won't affect the score
|
|
25658
25415
|
hit: true,
|
|
25659
|
-
message: `${
|
|
25416
|
+
message: `${path44}: optional field missing`
|
|
25660
25417
|
};
|
|
25661
25418
|
}
|
|
25662
25419
|
switch (match) {
|
|
25663
25420
|
case "exact":
|
|
25664
|
-
return this.compareExact(
|
|
25421
|
+
return this.compareExact(path44, candidateValue, expectedValue, weight);
|
|
25665
25422
|
case "numeric_tolerance":
|
|
25666
25423
|
return this.compareNumericTolerance(
|
|
25667
|
-
|
|
25424
|
+
path44,
|
|
25668
25425
|
candidateValue,
|
|
25669
25426
|
expectedValue,
|
|
25670
25427
|
fieldConfig,
|
|
25671
25428
|
weight
|
|
25672
25429
|
);
|
|
25673
25430
|
case "date":
|
|
25674
|
-
return this.compareDate(
|
|
25431
|
+
return this.compareDate(path44, candidateValue, expectedValue, fieldConfig, weight);
|
|
25675
25432
|
default:
|
|
25676
25433
|
return {
|
|
25677
|
-
path:
|
|
25434
|
+
path: path44,
|
|
25678
25435
|
score: 0,
|
|
25679
25436
|
weight,
|
|
25680
25437
|
hit: false,
|
|
25681
|
-
message: `${
|
|
25438
|
+
message: `${path44}: unknown match type "${match}"`
|
|
25682
25439
|
};
|
|
25683
25440
|
}
|
|
25684
25441
|
}
|
|
25685
25442
|
/**
|
|
25686
25443
|
* Exact equality comparison.
|
|
25687
25444
|
*/
|
|
25688
|
-
compareExact(
|
|
25445
|
+
compareExact(path44, candidateValue, expectedValue, weight) {
|
|
25689
25446
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
25690
25447
|
return {
|
|
25691
|
-
path:
|
|
25448
|
+
path: path44,
|
|
25692
25449
|
score: 1,
|
|
25693
25450
|
weight,
|
|
25694
25451
|
hit: true,
|
|
25695
|
-
message:
|
|
25452
|
+
message: path44
|
|
25696
25453
|
};
|
|
25697
25454
|
}
|
|
25698
25455
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
25699
25456
|
return {
|
|
25700
|
-
path:
|
|
25457
|
+
path: path44,
|
|
25701
25458
|
score: 0,
|
|
25702
25459
|
weight,
|
|
25703
25460
|
hit: false,
|
|
25704
|
-
message: `${
|
|
25461
|
+
message: `${path44} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
25705
25462
|
};
|
|
25706
25463
|
}
|
|
25707
25464
|
return {
|
|
25708
|
-
path:
|
|
25465
|
+
path: path44,
|
|
25709
25466
|
score: 0,
|
|
25710
25467
|
weight,
|
|
25711
25468
|
hit: false,
|
|
25712
|
-
message: `${
|
|
25469
|
+
message: `${path44} (value mismatch)`
|
|
25713
25470
|
};
|
|
25714
25471
|
}
|
|
25715
25472
|
/**
|
|
25716
25473
|
* Numeric comparison with absolute or relative tolerance.
|
|
25717
25474
|
*/
|
|
25718
|
-
compareNumericTolerance(
|
|
25475
|
+
compareNumericTolerance(path44, candidateValue, expectedValue, fieldConfig, weight) {
|
|
25719
25476
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
25720
25477
|
const candidateNum = toNumber(candidateValue);
|
|
25721
25478
|
const expectedNum = toNumber(expectedValue);
|
|
25722
25479
|
if (candidateNum === null || expectedNum === null) {
|
|
25723
25480
|
return {
|
|
25724
|
-
path:
|
|
25481
|
+
path: path44,
|
|
25725
25482
|
score: 0,
|
|
25726
25483
|
weight,
|
|
25727
25484
|
hit: false,
|
|
25728
|
-
message: `${
|
|
25485
|
+
message: `${path44} (non-numeric value)`
|
|
25729
25486
|
};
|
|
25730
25487
|
}
|
|
25731
25488
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
25732
25489
|
return {
|
|
25733
|
-
path:
|
|
25490
|
+
path: path44,
|
|
25734
25491
|
score: 0,
|
|
25735
25492
|
weight,
|
|
25736
25493
|
hit: false,
|
|
25737
|
-
message: `${
|
|
25494
|
+
message: `${path44} (invalid numeric value)`
|
|
25738
25495
|
};
|
|
25739
25496
|
}
|
|
25740
25497
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -25747,61 +25504,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
25747
25504
|
}
|
|
25748
25505
|
if (withinTolerance) {
|
|
25749
25506
|
return {
|
|
25750
|
-
path:
|
|
25507
|
+
path: path44,
|
|
25751
25508
|
score: 1,
|
|
25752
25509
|
weight,
|
|
25753
25510
|
hit: true,
|
|
25754
|
-
message: `${
|
|
25511
|
+
message: `${path44} (within tolerance: diff=${diff.toFixed(2)})`
|
|
25755
25512
|
};
|
|
25756
25513
|
}
|
|
25757
25514
|
return {
|
|
25758
|
-
path:
|
|
25515
|
+
path: path44,
|
|
25759
25516
|
score: 0,
|
|
25760
25517
|
weight,
|
|
25761
25518
|
hit: false,
|
|
25762
|
-
message: `${
|
|
25519
|
+
message: `${path44} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
25763
25520
|
};
|
|
25764
25521
|
}
|
|
25765
25522
|
/**
|
|
25766
25523
|
* Date comparison with format normalization.
|
|
25767
25524
|
*/
|
|
25768
|
-
compareDate(
|
|
25525
|
+
compareDate(path44, candidateValue, expectedValue, fieldConfig, weight) {
|
|
25769
25526
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
25770
25527
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
25771
25528
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
25772
25529
|
if (candidateDate === null) {
|
|
25773
25530
|
return {
|
|
25774
|
-
path:
|
|
25531
|
+
path: path44,
|
|
25775
25532
|
score: 0,
|
|
25776
25533
|
weight,
|
|
25777
25534
|
hit: false,
|
|
25778
|
-
message: `${
|
|
25535
|
+
message: `${path44} (unparseable candidate date)`
|
|
25779
25536
|
};
|
|
25780
25537
|
}
|
|
25781
25538
|
if (expectedDate === null) {
|
|
25782
25539
|
return {
|
|
25783
|
-
path:
|
|
25540
|
+
path: path44,
|
|
25784
25541
|
score: 0,
|
|
25785
25542
|
weight,
|
|
25786
25543
|
hit: false,
|
|
25787
|
-
message: `${
|
|
25544
|
+
message: `${path44} (unparseable expected date)`
|
|
25788
25545
|
};
|
|
25789
25546
|
}
|
|
25790
25547
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
25791
25548
|
return {
|
|
25792
|
-
path:
|
|
25549
|
+
path: path44,
|
|
25793
25550
|
score: 1,
|
|
25794
25551
|
weight,
|
|
25795
25552
|
hit: true,
|
|
25796
|
-
message:
|
|
25553
|
+
message: path44
|
|
25797
25554
|
};
|
|
25798
25555
|
}
|
|
25799
25556
|
return {
|
|
25800
|
-
path:
|
|
25557
|
+
path: path44,
|
|
25801
25558
|
score: 0,
|
|
25802
25559
|
weight,
|
|
25803
25560
|
hit: false,
|
|
25804
|
-
message: `${
|
|
25561
|
+
message: `${path44} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
25805
25562
|
};
|
|
25806
25563
|
}
|
|
25807
25564
|
/**
|
|
@@ -25834,11 +25591,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
25834
25591
|
};
|
|
25835
25592
|
}
|
|
25836
25593
|
};
|
|
25837
|
-
function resolvePath(obj,
|
|
25838
|
-
if (!
|
|
25594
|
+
function resolvePath(obj, path44) {
|
|
25595
|
+
if (!path44 || !obj) {
|
|
25839
25596
|
return void 0;
|
|
25840
25597
|
}
|
|
25841
|
-
const parts =
|
|
25598
|
+
const parts = path44.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
25842
25599
|
let current = obj;
|
|
25843
25600
|
for (const part of parts) {
|
|
25844
25601
|
if (current === null || current === void 0) {
|
|
@@ -26288,8 +26045,8 @@ var TokenUsageEvaluator = class {
|
|
|
26288
26045
|
};
|
|
26289
26046
|
}
|
|
26290
26047
|
};
|
|
26291
|
-
function getNestedValue(obj,
|
|
26292
|
-
const parts =
|
|
26048
|
+
function getNestedValue(obj, path44) {
|
|
26049
|
+
const parts = path44.split(".");
|
|
26293
26050
|
let current = obj;
|
|
26294
26051
|
for (const part of parts) {
|
|
26295
26052
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -27141,10 +26898,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
27141
26898
|
expectedOutput: context2.evalCase.expected_output,
|
|
27142
26899
|
outputText: context2.candidate,
|
|
27143
26900
|
output: context2.output ?? null,
|
|
27144
|
-
|
|
27145
|
-
inputFiles: context2.evalCase.file_paths.filter(
|
|
27146
|
-
(p) => !context2.evalCase.guideline_paths.includes(p)
|
|
27147
|
-
),
|
|
26901
|
+
inputFiles: context2.evalCase.file_paths,
|
|
27148
26902
|
input: context2.evalCase.input,
|
|
27149
26903
|
trace: context2.trace ?? null,
|
|
27150
26904
|
fileChanges: context2.fileChanges ?? null,
|
|
@@ -27155,7 +26909,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
27155
26909
|
};
|
|
27156
26910
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
27157
26911
|
const scriptPath = script[script.length - 1];
|
|
27158
|
-
const cwd =
|
|
26912
|
+
const cwd = path33.dirname(scriptPath);
|
|
27159
26913
|
try {
|
|
27160
26914
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
27161
26915
|
const prompt = stdout.trim();
|
|
@@ -27426,16 +27180,16 @@ function createBuiltinRegistry() {
|
|
|
27426
27180
|
async function discoverAssertions(registry, baseDir) {
|
|
27427
27181
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
27428
27182
|
const candidateDirs = [];
|
|
27429
|
-
let dir =
|
|
27430
|
-
const root =
|
|
27183
|
+
let dir = path34.resolve(baseDir);
|
|
27184
|
+
const root = path34.parse(dir).root;
|
|
27431
27185
|
while (dir !== root) {
|
|
27432
|
-
candidateDirs.push(
|
|
27433
|
-
dir =
|
|
27186
|
+
candidateDirs.push(path34.join(dir, ".agentv", "assertions"));
|
|
27187
|
+
dir = path34.dirname(dir);
|
|
27434
27188
|
}
|
|
27435
27189
|
let files = [];
|
|
27436
27190
|
for (const assertionsDir of candidateDirs) {
|
|
27437
27191
|
try {
|
|
27438
|
-
const found = await
|
|
27192
|
+
const found = await fg22(patterns, {
|
|
27439
27193
|
cwd: assertionsDir,
|
|
27440
27194
|
absolute: true,
|
|
27441
27195
|
onlyFiles: true
|
|
@@ -27446,7 +27200,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
27446
27200
|
}
|
|
27447
27201
|
const discoveredTypes = [];
|
|
27448
27202
|
for (const filePath of files) {
|
|
27449
|
-
const basename =
|
|
27203
|
+
const basename = path34.basename(filePath);
|
|
27450
27204
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
27451
27205
|
if (registry.has(typeName)) {
|
|
27452
27206
|
continue;
|
|
@@ -27465,17 +27219,17 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
27465
27219
|
async function discoverGraders(registry, baseDir) {
|
|
27466
27220
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
27467
27221
|
const candidateDirs = [];
|
|
27468
|
-
let dir =
|
|
27469
|
-
const root =
|
|
27222
|
+
let dir = path35.resolve(baseDir);
|
|
27223
|
+
const root = path35.parse(dir).root;
|
|
27470
27224
|
while (dir !== root) {
|
|
27471
|
-
candidateDirs.push(
|
|
27472
|
-
candidateDirs.push(
|
|
27473
|
-
dir =
|
|
27225
|
+
candidateDirs.push(path35.join(dir, ".agentv", "graders"));
|
|
27226
|
+
candidateDirs.push(path35.join(dir, ".agentv", "judges"));
|
|
27227
|
+
dir = path35.dirname(dir);
|
|
27474
27228
|
}
|
|
27475
27229
|
let files = [];
|
|
27476
27230
|
for (const gradersDir of candidateDirs) {
|
|
27477
27231
|
try {
|
|
27478
|
-
const found = await
|
|
27232
|
+
const found = await fg3(patterns, {
|
|
27479
27233
|
cwd: gradersDir,
|
|
27480
27234
|
absolute: true,
|
|
27481
27235
|
onlyFiles: true
|
|
@@ -27486,7 +27240,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
27486
27240
|
}
|
|
27487
27241
|
const discoveredTypes = [];
|
|
27488
27242
|
for (const filePath of files) {
|
|
27489
|
-
const basename =
|
|
27243
|
+
const basename = path35.basename(filePath);
|
|
27490
27244
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
27491
27245
|
if (registry.has(typeName)) {
|
|
27492
27246
|
continue;
|
|
@@ -27672,10 +27426,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
27672
27426
|
}
|
|
27673
27427
|
for (const entry of entries) {
|
|
27674
27428
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
27675
|
-
const childPath =
|
|
27429
|
+
const childPath = path36.join(workspacePath, entry);
|
|
27676
27430
|
try {
|
|
27677
27431
|
if (!statSync(childPath).isDirectory()) continue;
|
|
27678
|
-
if (!statSync(
|
|
27432
|
+
if (!statSync(path36.join(childPath, ".git")).isDirectory()) continue;
|
|
27679
27433
|
} catch {
|
|
27680
27434
|
continue;
|
|
27681
27435
|
}
|
|
@@ -27712,14 +27466,14 @@ async function isDirectory(filePath) {
|
|
|
27712
27466
|
}
|
|
27713
27467
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
27714
27468
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
27715
|
-
return
|
|
27469
|
+
return path37.join(root, evalRunId, caseId);
|
|
27716
27470
|
}
|
|
27717
27471
|
async function copyDirectoryRecursive(src, dest) {
|
|
27718
27472
|
await mkdir11(dest, { recursive: true });
|
|
27719
27473
|
const entries = await readdir3(src, { withFileTypes: true });
|
|
27720
27474
|
for (const entry of entries) {
|
|
27721
|
-
const srcPath =
|
|
27722
|
-
const destPath =
|
|
27475
|
+
const srcPath = path37.join(src, entry.name);
|
|
27476
|
+
const destPath = path37.join(dest, entry.name);
|
|
27723
27477
|
if (entry.name === ".git") {
|
|
27724
27478
|
continue;
|
|
27725
27479
|
}
|
|
@@ -27731,7 +27485,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
27731
27485
|
}
|
|
27732
27486
|
}
|
|
27733
27487
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
27734
|
-
const resolvedTemplatePath =
|
|
27488
|
+
const resolvedTemplatePath = path37.resolve(templatePath);
|
|
27735
27489
|
if (!await fileExists(resolvedTemplatePath)) {
|
|
27736
27490
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
27737
27491
|
}
|
|
@@ -27780,7 +27534,7 @@ async function cleanupWorkspace(workspacePath) {
|
|
|
27780
27534
|
}
|
|
27781
27535
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
27782
27536
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
27783
|
-
const evalDir =
|
|
27537
|
+
const evalDir = path37.join(root, evalRunId);
|
|
27784
27538
|
if (await fileExists(evalDir)) {
|
|
27785
27539
|
await rm4(evalDir, { recursive: true, force: true });
|
|
27786
27540
|
}
|
|
@@ -27837,8 +27591,8 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
|
27837
27591
|
await mkdir12(dest, { recursive: true });
|
|
27838
27592
|
const entries = await readdir4(src, { withFileTypes: true });
|
|
27839
27593
|
for (const entry of entries) {
|
|
27840
|
-
const srcPath =
|
|
27841
|
-
const destPath =
|
|
27594
|
+
const srcPath = path38.join(src, entry.name);
|
|
27595
|
+
const destPath = path38.join(dest, entry.name);
|
|
27842
27596
|
if (entry.name === ".git") {
|
|
27843
27597
|
continue;
|
|
27844
27598
|
}
|
|
@@ -27871,7 +27625,7 @@ var WorkspacePoolManager = class {
|
|
|
27871
27625
|
async acquireWorkspace(options) {
|
|
27872
27626
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
27873
27627
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
27874
|
-
const poolDir =
|
|
27628
|
+
const poolDir = path38.join(this.poolRoot, fingerprint);
|
|
27875
27629
|
await mkdir12(poolDir, { recursive: true });
|
|
27876
27630
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
27877
27631
|
if (drifted) {
|
|
@@ -27881,7 +27635,7 @@ var WorkspacePoolManager = class {
|
|
|
27881
27635
|
await this.removeAllSlots(poolDir);
|
|
27882
27636
|
}
|
|
27883
27637
|
for (let i = 0; i < maxSlots; i++) {
|
|
27884
|
-
const slotPath =
|
|
27638
|
+
const slotPath = path38.join(poolDir, `slot-${i}`);
|
|
27885
27639
|
const lockPath = `${slotPath}.lock`;
|
|
27886
27640
|
const locked = await this.tryLock(lockPath);
|
|
27887
27641
|
if (!locked) {
|
|
@@ -27943,7 +27697,7 @@ var WorkspacePoolManager = class {
|
|
|
27943
27697
|
throw err;
|
|
27944
27698
|
}
|
|
27945
27699
|
try {
|
|
27946
|
-
const pidStr = await
|
|
27700
|
+
const pidStr = await readFile10(lockPath, "utf-8");
|
|
27947
27701
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
27948
27702
|
if (!Number.isNaN(pid)) {
|
|
27949
27703
|
try {
|
|
@@ -27968,9 +27722,9 @@ var WorkspacePoolManager = class {
|
|
|
27968
27722
|
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
27969
27723
|
*/
|
|
27970
27724
|
async checkDrift(poolDir, fingerprint) {
|
|
27971
|
-
const metadataPath =
|
|
27725
|
+
const metadataPath = path38.join(poolDir, "metadata.json");
|
|
27972
27726
|
try {
|
|
27973
|
-
const raw = await
|
|
27727
|
+
const raw = await readFile10(metadataPath, "utf-8");
|
|
27974
27728
|
const metadata = JSON.parse(raw);
|
|
27975
27729
|
return metadata.fingerprint !== fingerprint;
|
|
27976
27730
|
} catch {
|
|
@@ -27985,17 +27739,17 @@ var WorkspacePoolManager = class {
|
|
|
27985
27739
|
repos,
|
|
27986
27740
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
27987
27741
|
};
|
|
27988
|
-
await writeFile7(
|
|
27742
|
+
await writeFile7(path38.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
27989
27743
|
}
|
|
27990
27744
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
27991
27745
|
async removeAllSlots(poolDir) {
|
|
27992
27746
|
const entries = await readdir4(poolDir);
|
|
27993
27747
|
for (const entry of entries) {
|
|
27994
27748
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
27995
|
-
const lockPath =
|
|
27749
|
+
const lockPath = path38.join(poolDir, `${entry}.lock`);
|
|
27996
27750
|
if (existsSync2(lockPath)) {
|
|
27997
27751
|
try {
|
|
27998
|
-
const pidStr = await
|
|
27752
|
+
const pidStr = await readFile10(lockPath, "utf-8");
|
|
27999
27753
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
28000
27754
|
if (!Number.isNaN(pid)) {
|
|
28001
27755
|
try {
|
|
@@ -28008,12 +27762,12 @@ var WorkspacePoolManager = class {
|
|
|
28008
27762
|
} catch {
|
|
28009
27763
|
}
|
|
28010
27764
|
}
|
|
28011
|
-
await rm5(
|
|
27765
|
+
await rm5(path38.join(poolDir, entry), { recursive: true, force: true });
|
|
28012
27766
|
await rm5(lockPath, { force: true }).catch(() => {
|
|
28013
27767
|
});
|
|
28014
27768
|
}
|
|
28015
27769
|
}
|
|
28016
|
-
await rm5(
|
|
27770
|
+
await rm5(path38.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
28017
27771
|
});
|
|
28018
27772
|
}
|
|
28019
27773
|
/**
|
|
@@ -28023,7 +27777,7 @@ var WorkspacePoolManager = class {
|
|
|
28023
27777
|
*/
|
|
28024
27778
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
28025
27779
|
for (const repo of repos) {
|
|
28026
|
-
const repoDir =
|
|
27780
|
+
const repoDir = path38.join(slotPath, repo.path);
|
|
28027
27781
|
if (!existsSync2(repoDir)) {
|
|
28028
27782
|
continue;
|
|
28029
27783
|
}
|
|
@@ -28144,7 +27898,7 @@ ${lines.join("\n")}`;
|
|
|
28144
27898
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
28145
27899
|
*/
|
|
28146
27900
|
async materialize(repo, workspacePath) {
|
|
28147
|
-
const targetDir =
|
|
27901
|
+
const targetDir = path39.join(workspacePath, repo.path);
|
|
28148
27902
|
const sourceUrl = getSourceUrl(repo.source);
|
|
28149
27903
|
const startedAt = Date.now();
|
|
28150
27904
|
if (this.verbose) {
|
|
@@ -28235,7 +27989,7 @@ ${lines.join("\n")}`;
|
|
|
28235
27989
|
async reset(repos, workspacePath, reset) {
|
|
28236
27990
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
28237
27991
|
for (const repo of repos) {
|
|
28238
|
-
const targetDir =
|
|
27992
|
+
const targetDir = path39.join(workspacePath, repo.path);
|
|
28239
27993
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
28240
27994
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
28241
27995
|
}
|
|
@@ -28245,11 +27999,11 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
28245
27999
|
if (!templatePath) {
|
|
28246
28000
|
return void 0;
|
|
28247
28001
|
}
|
|
28248
|
-
const resolved =
|
|
28002
|
+
const resolved = path40.resolve(templatePath);
|
|
28249
28003
|
const stats = await stat6(resolved);
|
|
28250
28004
|
if (stats.isFile()) {
|
|
28251
28005
|
return {
|
|
28252
|
-
dir:
|
|
28006
|
+
dir: path40.dirname(resolved),
|
|
28253
28007
|
workspaceFile: resolved
|
|
28254
28008
|
};
|
|
28255
28009
|
}
|
|
@@ -28261,14 +28015,14 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
28261
28015
|
if (workspaceFiles.length === 1) {
|
|
28262
28016
|
return {
|
|
28263
28017
|
dir: resolved,
|
|
28264
|
-
workspaceFile:
|
|
28018
|
+
workspaceFile: path40.join(resolved, workspaceFiles[0])
|
|
28265
28019
|
};
|
|
28266
28020
|
}
|
|
28267
28021
|
if (workspaceFiles.length > 1) {
|
|
28268
28022
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
28269
28023
|
return {
|
|
28270
28024
|
dir: resolved,
|
|
28271
|
-
workspaceFile: conventionFile ?
|
|
28025
|
+
workspaceFile: conventionFile ? path40.join(resolved, conventionFile) : void 0
|
|
28272
28026
|
};
|
|
28273
28027
|
}
|
|
28274
28028
|
return { dir: resolved };
|
|
@@ -28468,7 +28222,7 @@ async function runEvaluation(options) {
|
|
|
28468
28222
|
];
|
|
28469
28223
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
|
|
28470
28224
|
const typeRegistry = createBuiltinRegistry();
|
|
28471
|
-
const discoveryBaseDir = evalFilePath ?
|
|
28225
|
+
const discoveryBaseDir = evalFilePath ? path41.dirname(path41.resolve(evalFilePath)) : process.cwd();
|
|
28472
28226
|
const evalDir = discoveryBaseDir;
|
|
28473
28227
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
28474
28228
|
await discoverGraders(typeRegistry, discoveryBaseDir);
|
|
@@ -28657,7 +28411,7 @@ async function runEvaluation(options) {
|
|
|
28657
28411
|
}
|
|
28658
28412
|
try {
|
|
28659
28413
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
28660
|
-
const copiedWorkspaceFile =
|
|
28414
|
+
const copiedWorkspaceFile = path41.join(sharedWorkspacePath, path41.basename(suiteWorkspaceFile));
|
|
28661
28415
|
try {
|
|
28662
28416
|
await stat7(copiedWorkspaceFile);
|
|
28663
28417
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
@@ -28767,10 +28521,10 @@ async function runEvaluation(options) {
|
|
|
28767
28521
|
const budgetResult = {
|
|
28768
28522
|
timestamp: (now2 ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
28769
28523
|
testId: evalCase.id,
|
|
28770
|
-
|
|
28524
|
+
eval_set: evalCase.eval_set,
|
|
28771
28525
|
score: 0,
|
|
28772
28526
|
assertions: [],
|
|
28773
|
-
|
|
28527
|
+
output: [],
|
|
28774
28528
|
target: target.name,
|
|
28775
28529
|
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
28776
28530
|
budgetExceeded: true,
|
|
@@ -28803,10 +28557,10 @@ async function runEvaluation(options) {
|
|
|
28803
28557
|
const haltResult = {
|
|
28804
28558
|
timestamp: (now2 ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
28805
28559
|
testId: evalCase.id,
|
|
28806
|
-
|
|
28560
|
+
eval_set: evalCase.eval_set,
|
|
28807
28561
|
score: 0,
|
|
28808
28562
|
assertions: [],
|
|
28809
|
-
|
|
28563
|
+
output: [],
|
|
28810
28564
|
target: target.name,
|
|
28811
28565
|
error: errorMsg,
|
|
28812
28566
|
executionStatus: "execution_error",
|
|
@@ -29035,8 +28789,6 @@ async function runBatchEvaluation(options) {
|
|
|
29035
28789
|
const promptInputs = promptInputsList[index];
|
|
29036
28790
|
return {
|
|
29037
28791
|
question: promptInputs.question,
|
|
29038
|
-
guidelines: promptInputs.guidelines,
|
|
29039
|
-
guideline_patterns: evalCase.guideline_patterns,
|
|
29040
28792
|
inputFiles: evalCase.file_paths,
|
|
29041
28793
|
evalCaseId: evalCase.id,
|
|
29042
28794
|
metadata: {
|
|
@@ -29234,7 +28986,7 @@ async function runEvalCase(options) {
|
|
|
29234
28986
|
);
|
|
29235
28987
|
}
|
|
29236
28988
|
if (caseWorkspaceFile && workspacePath) {
|
|
29237
|
-
const copiedFile =
|
|
28989
|
+
const copiedFile = path41.join(workspacePath, path41.basename(caseWorkspaceFile));
|
|
29238
28990
|
try {
|
|
29239
28991
|
await stat7(copiedFile);
|
|
29240
28992
|
caseWorkspaceFile = copiedFile;
|
|
@@ -29294,10 +29046,10 @@ async function runEvalCase(options) {
|
|
|
29294
29046
|
const files = evalCase.metadata.agent_skills_files;
|
|
29295
29047
|
if (baseDir && files.length > 0) {
|
|
29296
29048
|
for (const relPath of files) {
|
|
29297
|
-
const srcPath =
|
|
29298
|
-
const destPath =
|
|
29049
|
+
const srcPath = path41.resolve(baseDir, relPath);
|
|
29050
|
+
const destPath = path41.resolve(workspacePath, relPath);
|
|
29299
29051
|
try {
|
|
29300
|
-
await mkdir13(
|
|
29052
|
+
await mkdir13(path41.dirname(destPath), { recursive: true });
|
|
29301
29053
|
await copyFile2(srcPath, destPath);
|
|
29302
29054
|
} catch (error) {
|
|
29303
29055
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -29744,8 +29496,7 @@ async function evaluateCandidate(options) {
|
|
|
29744
29496
|
let lmRequest;
|
|
29745
29497
|
if (isAgentProvider(provider)) {
|
|
29746
29498
|
agentRequest = {
|
|
29747
|
-
question: promptInputs.question
|
|
29748
|
-
guideline_paths: evalCase.guideline_paths
|
|
29499
|
+
question: promptInputs.question
|
|
29749
29500
|
};
|
|
29750
29501
|
} else {
|
|
29751
29502
|
if (promptInputs.chatPrompt) {
|
|
@@ -29754,8 +29505,7 @@ async function evaluateCandidate(options) {
|
|
|
29754
29505
|
};
|
|
29755
29506
|
} else {
|
|
29756
29507
|
lmRequest = {
|
|
29757
|
-
question: promptInputs.question
|
|
29758
|
-
guidelines: promptInputs.guidelines
|
|
29508
|
+
question: promptInputs.question
|
|
29759
29509
|
};
|
|
29760
29510
|
}
|
|
29761
29511
|
}
|
|
@@ -29769,11 +29519,10 @@ async function evaluateCandidate(options) {
|
|
|
29769
29519
|
return {
|
|
29770
29520
|
timestamp: completedAt.toISOString(),
|
|
29771
29521
|
testId: evalCase.id,
|
|
29772
|
-
|
|
29522
|
+
eval_set: evalCase.eval_set,
|
|
29773
29523
|
conversationId: evalCase.conversation_id,
|
|
29774
29524
|
score: score.score,
|
|
29775
29525
|
assertions: score.assertions,
|
|
29776
|
-
outputText: candidate,
|
|
29777
29526
|
target: target.name,
|
|
29778
29527
|
tokenUsage,
|
|
29779
29528
|
costUsd,
|
|
@@ -29784,7 +29533,7 @@ async function evaluateCandidate(options) {
|
|
|
29784
29533
|
input,
|
|
29785
29534
|
scores,
|
|
29786
29535
|
trace: trace2,
|
|
29787
|
-
output,
|
|
29536
|
+
output: output ?? [{ role: "assistant", content: candidate }],
|
|
29788
29537
|
fileChanges,
|
|
29789
29538
|
executionStatus: classifyQualityStatus(score.score)
|
|
29790
29539
|
};
|
|
@@ -29918,7 +29667,7 @@ async function runEvaluatorList(options) {
|
|
|
29918
29667
|
fileChanges,
|
|
29919
29668
|
workspacePath
|
|
29920
29669
|
};
|
|
29921
|
-
const evalFileDir = evalCase.
|
|
29670
|
+
const evalFileDir = evalCase.file_paths[0] ? path41.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
29922
29671
|
const dispatchContext = {
|
|
29923
29672
|
graderProvider,
|
|
29924
29673
|
targetResolver,
|
|
@@ -29949,7 +29698,7 @@ async function runEvaluatorList(options) {
|
|
|
29949
29698
|
weight,
|
|
29950
29699
|
verdict: score2.verdict,
|
|
29951
29700
|
assertions: score2.assertions,
|
|
29952
|
-
|
|
29701
|
+
input: score2.evaluatorRawRequest,
|
|
29953
29702
|
details: score2.details,
|
|
29954
29703
|
scores: mapChildResults(score2.scores),
|
|
29955
29704
|
tokenUsage: score2.tokenUsage,
|
|
@@ -30032,7 +29781,7 @@ function filterEvalCases(evalCases, filter2) {
|
|
|
30032
29781
|
if (!filter2) {
|
|
30033
29782
|
return evalCases;
|
|
30034
29783
|
}
|
|
30035
|
-
return evalCases.filter((evalCase) =>
|
|
29784
|
+
return evalCases.filter((evalCase) => micromatch3.isMatch(evalCase.id, filter2));
|
|
30036
29785
|
}
|
|
30037
29786
|
function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
|
|
30038
29787
|
const llmGrader = overrides?.["llm-grader"] ?? overrides?.["llm-judge"] ?? new LlmGraderEvaluator({
|
|
@@ -30069,8 +29818,6 @@ async function invokeProvider(provider, options) {
|
|
|
30069
29818
|
const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
|
|
30070
29819
|
return await provider.invoke({
|
|
30071
29820
|
question: promptInputs.question,
|
|
30072
|
-
guidelines: promptInputs.guidelines,
|
|
30073
|
-
guideline_patterns: evalCase.guideline_patterns,
|
|
30074
29821
|
chatPrompt: promptInputs.chatPrompt,
|
|
30075
29822
|
inputFiles: evalCase.file_paths,
|
|
30076
29823
|
evalCaseId: evalCase.id,
|
|
@@ -30098,21 +29845,17 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
30098
29845
|
if (isAgentProvider(provider)) {
|
|
30099
29846
|
agentRequest = {
|
|
30100
29847
|
question: promptInputs.question,
|
|
30101
|
-
guideline_paths: evalCase.guideline_paths,
|
|
30102
29848
|
error: message
|
|
30103
29849
|
};
|
|
30104
29850
|
} else {
|
|
30105
29851
|
if (promptInputs.chatPrompt) {
|
|
30106
29852
|
lmRequest = {
|
|
30107
29853
|
chat_prompt: promptInputs.chatPrompt,
|
|
30108
|
-
guideline_paths: evalCase.guideline_paths,
|
|
30109
29854
|
error: message
|
|
30110
29855
|
};
|
|
30111
29856
|
} else {
|
|
30112
29857
|
lmRequest = {
|
|
30113
29858
|
question: promptInputs.question,
|
|
30114
|
-
guidelines: promptInputs.guidelines,
|
|
30115
|
-
guideline_paths: evalCase.guideline_paths,
|
|
30116
29859
|
error: message
|
|
30117
29860
|
};
|
|
30118
29861
|
}
|
|
@@ -30125,11 +29868,11 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
30125
29868
|
return {
|
|
30126
29869
|
timestamp: timestamp.toISOString(),
|
|
30127
29870
|
testId: evalCase.id,
|
|
30128
|
-
|
|
29871
|
+
eval_set: evalCase.eval_set,
|
|
30129
29872
|
conversationId: evalCase.conversation_id,
|
|
30130
29873
|
score: 0,
|
|
30131
29874
|
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
30132
|
-
|
|
29875
|
+
output: [{ role: "assistant", content: `Error occurred: ${message}` }],
|
|
30133
29876
|
target: targetName,
|
|
30134
29877
|
requests,
|
|
30135
29878
|
input,
|
|
@@ -30158,7 +29901,6 @@ function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
|
30158
29901
|
hash.update(target.name);
|
|
30159
29902
|
hash.update(evalCase.id);
|
|
30160
29903
|
hash.update(promptInputs.question);
|
|
30161
|
-
hash.update(promptInputs.guidelines);
|
|
30162
29904
|
hash.update(promptInputs.systemMessage ?? "");
|
|
30163
29905
|
if (promptInputs.chatPrompt) {
|
|
30164
29906
|
hash.update(JSON.stringify(promptInputs.chatPrompt));
|
|
@@ -30173,7 +29915,7 @@ function buildResultInput(promptInputs) {
|
|
|
30173
29915
|
content: message.content
|
|
30174
29916
|
}));
|
|
30175
29917
|
}
|
|
30176
|
-
return promptInputs.question;
|
|
29918
|
+
return [{ role: "user", content: promptInputs.question }];
|
|
30177
29919
|
}
|
|
30178
29920
|
function aggregateEvaluatorTokenUsage(scores) {
|
|
30179
29921
|
if (!scores || scores.length === 0) return void 0;
|
|
@@ -30239,7 +29981,7 @@ function mapChildResults(children) {
|
|
|
30239
29981
|
weight: child.weight,
|
|
30240
29982
|
verdict: child.verdict,
|
|
30241
29983
|
assertions: child.assertions,
|
|
30242
|
-
|
|
29984
|
+
input: child.evaluatorRawRequest,
|
|
30243
29985
|
scores: mapChildResults(child.scores),
|
|
30244
29986
|
details: child.details,
|
|
30245
29987
|
tokenUsage: child.tokenUsage
|
|
@@ -30287,7 +30029,7 @@ async function evaluate(config) {
|
|
|
30287
30029
|
}
|
|
30288
30030
|
const gitRoot = await findGitRoot(process.cwd());
|
|
30289
30031
|
const repoRoot = gitRoot ?? process.cwd();
|
|
30290
|
-
const testFilePath = config.specFile ?
|
|
30032
|
+
const testFilePath = config.specFile ? path422.resolve(config.specFile) : path422.join(process.cwd(), "__programmatic__.yaml");
|
|
30291
30033
|
await loadEnvHierarchy(repoRoot, testFilePath);
|
|
30292
30034
|
let resolvedTarget;
|
|
30293
30035
|
let taskProvider;
|
|
@@ -30353,8 +30095,6 @@ async function evaluate(config) {
|
|
|
30353
30095
|
input_segments: inputSegments,
|
|
30354
30096
|
expected_output: expectedOutput,
|
|
30355
30097
|
reference_answer: expectedOutputValue,
|
|
30356
|
-
guideline_paths: [],
|
|
30357
|
-
guideline_patterns: [],
|
|
30358
30098
|
file_paths: [],
|
|
30359
30099
|
assertions: assertConfigs.length > 0 ? assertConfigs : void 0,
|
|
30360
30100
|
metadata: test.metadata
|
|
@@ -30416,10 +30156,10 @@ function computeSummary(results, durationMs) {
|
|
|
30416
30156
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
30417
30157
|
async function discoverDefaultTarget(repoRoot) {
|
|
30418
30158
|
const cwd = process.cwd();
|
|
30419
|
-
const chain = buildDirectoryChain(
|
|
30159
|
+
const chain = buildDirectoryChain(path422.join(cwd, "_placeholder"), repoRoot);
|
|
30420
30160
|
for (const dir of chain) {
|
|
30421
30161
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
30422
|
-
const targetsPath =
|
|
30162
|
+
const targetsPath = path422.join(dir, candidate);
|
|
30423
30163
|
if (!existsSync4(targetsPath)) continue;
|
|
30424
30164
|
try {
|
|
30425
30165
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
@@ -30436,7 +30176,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
30436
30176
|
const chain = buildDirectoryChain(startPath, repoRoot);
|
|
30437
30177
|
const envFiles = [];
|
|
30438
30178
|
for (const dir of chain) {
|
|
30439
|
-
const envPath =
|
|
30179
|
+
const envPath = path422.join(dir, ".env");
|
|
30440
30180
|
if (existsSync4(envPath)) envFiles.push(envPath);
|
|
30441
30181
|
}
|
|
30442
30182
|
for (let i = 0; i < envFiles.length; i++) {
|
|
@@ -30617,7 +30357,7 @@ var ResponseCache = class {
|
|
|
30617
30357
|
async get(key) {
|
|
30618
30358
|
const filePath = this.keyToPath(key);
|
|
30619
30359
|
try {
|
|
30620
|
-
const data = await
|
|
30360
|
+
const data = await readFile11(filePath, "utf8");
|
|
30621
30361
|
return JSON.parse(data);
|
|
30622
30362
|
} catch {
|
|
30623
30363
|
return void 0;
|
|
@@ -30625,13 +30365,13 @@ var ResponseCache = class {
|
|
|
30625
30365
|
}
|
|
30626
30366
|
async set(key, value) {
|
|
30627
30367
|
const filePath = this.keyToPath(key);
|
|
30628
|
-
const dir =
|
|
30368
|
+
const dir = path43.dirname(filePath);
|
|
30629
30369
|
await mkdir14(dir, { recursive: true });
|
|
30630
30370
|
await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
30631
30371
|
}
|
|
30632
30372
|
keyToPath(key) {
|
|
30633
30373
|
const prefix = key.slice(0, 2);
|
|
30634
|
-
return
|
|
30374
|
+
return path43.join(this.cachePath, prefix, `${key}.json`);
|
|
30635
30375
|
}
|
|
30636
30376
|
};
|
|
30637
30377
|
function shouldEnableCache(params) {
|
|
@@ -30646,7 +30386,6 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
30646
30386
|
return false;
|
|
30647
30387
|
}
|
|
30648
30388
|
var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
|
|
30649
|
-
"outputText",
|
|
30650
30389
|
"requests",
|
|
30651
30390
|
"trace",
|
|
30652
30391
|
"workspacePath",
|
|
@@ -30663,7 +30402,7 @@ var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
|
|
|
30663
30402
|
"startTime",
|
|
30664
30403
|
"endTime"
|
|
30665
30404
|
]);
|
|
30666
|
-
var STRIPPED_EVALUATOR_FIELDS = /* @__PURE__ */ new Set(["rawRequest", "
|
|
30405
|
+
var STRIPPED_EVALUATOR_FIELDS = /* @__PURE__ */ new Set(["rawRequest", "input"]);
|
|
30667
30406
|
function trimEvaluatorResult(result) {
|
|
30668
30407
|
const trimmed = {};
|
|
30669
30408
|
for (const [key, value] of Object.entries(result)) {
|
|
@@ -30816,9 +30555,13 @@ var OtelTraceExporter = class {
|
|
|
30816
30555
|
rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
30817
30556
|
rootSpan.setAttribute("agentv.test_id", result.testId);
|
|
30818
30557
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
30819
|
-
if (result.
|
|
30558
|
+
if (result.eval_set) rootSpan.setAttribute("agentv.eval_set", result.eval_set);
|
|
30820
30559
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
30821
|
-
if (captureContent
|
|
30560
|
+
if (captureContent && result.output.length > 0) {
|
|
30561
|
+
const lastMsg = result.output[result.output.length - 1];
|
|
30562
|
+
const text2 = typeof lastMsg.content === "string" ? lastMsg.content : JSON.stringify(lastMsg.content);
|
|
30563
|
+
rootSpan.setAttribute("agentv.output_text", text2);
|
|
30564
|
+
}
|
|
30822
30565
|
if (result.durationMs != null)
|
|
30823
30566
|
rootSpan.setAttribute("agentv.trace.duration_ms", result.durationMs);
|
|
30824
30567
|
if (result.costUsd != null) rootSpan.setAttribute("agentv.trace.cost_usd", result.costUsd);
|
|
@@ -30997,14 +30740,14 @@ var OtelStreamingObserver = class {
|
|
|
30997
30740
|
// biome-ignore lint/suspicious/noExplicitAny: OTel context loaded dynamically
|
|
30998
30741
|
rootCtx = null;
|
|
30999
30742
|
/** Create root eval span immediately (visible in backend right away) */
|
|
31000
|
-
startEvalCase(testId, target,
|
|
30743
|
+
startEvalCase(testId, target, evalSet) {
|
|
31001
30744
|
const ctx = this.parentCtx ?? this.api.context.active();
|
|
31002
30745
|
this.rootSpan = this.tracer.startSpan("agentv.eval", void 0, ctx);
|
|
31003
30746
|
this.rootSpan.setAttribute("gen_ai.operation.name", "evaluate");
|
|
31004
30747
|
this.rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
31005
30748
|
this.rootSpan.setAttribute("agentv.test_id", testId);
|
|
31006
30749
|
this.rootSpan.setAttribute("agentv.target", target);
|
|
31007
|
-
if (
|
|
30750
|
+
if (evalSet) this.rootSpan.setAttribute("agentv.eval_set", evalSet);
|
|
31008
30751
|
this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
|
|
31009
30752
|
}
|
|
31010
30753
|
/** Create and immediately export a tool span */
|
|
@@ -31119,6 +30862,8 @@ export {
|
|
|
31119
30862
|
resolveFileReference,
|
|
31120
30863
|
CLI_PLACEHOLDERS,
|
|
31121
30864
|
resolveTargetDefinition,
|
|
30865
|
+
interpolateEnv,
|
|
30866
|
+
loadCasesFromFile,
|
|
31122
30867
|
KNOWN_PROVIDERS,
|
|
31123
30868
|
PROVIDER_ALIASES,
|
|
31124
30869
|
computeTraceSummary,
|
|
@@ -31131,7 +30876,6 @@ export {
|
|
|
31131
30876
|
parseAgentSkillsEvals,
|
|
31132
30877
|
DEFAULT_EVAL_PATTERNS,
|
|
31133
30878
|
loadConfig,
|
|
31134
|
-
isGuidelineFile,
|
|
31135
30879
|
extractTargetFromSuite,
|
|
31136
30880
|
extractTargetsFromSuite,
|
|
31137
30881
|
extractTargetsFromTestCase,
|
|
@@ -31248,4 +30992,4 @@ export {
|
|
|
31248
30992
|
OtelStreamingObserver,
|
|
31249
30993
|
createAgentKernel
|
|
31250
30994
|
};
|
|
31251
|
-
//# sourceMappingURL=chunk-
|
|
30995
|
+
//# sourceMappingURL=chunk-TXDPYXHY.js.map
|