@agentv/core 2.16.0 → 2.17.1-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-E6AJPAXM.js → chunk-PSYFRPNT.js} +1 -1
- package/dist/chunk-PSYFRPNT.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +5 -5
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +6 -6
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +122 -62
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +10 -10
- package/dist/index.d.ts +10 -10
- package/dist/index.js +123 -63
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-E6AJPAXM.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -1777,6 +1777,25 @@ var import_node_path8 = __toESM(require("path"), 1);
|
|
|
1777
1777
|
var import_micromatch3 = __toESM(require("micromatch"), 1);
|
|
1778
1778
|
var import_yaml4 = require("yaml");
|
|
1779
1779
|
|
|
1780
|
+
// src/evaluation/interpolation.ts
|
|
1781
|
+
var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
1782
|
+
function interpolateEnv(value, env) {
|
|
1783
|
+
if (typeof value === "string") {
|
|
1784
|
+
return value.replace(ENV_VAR_PATTERN, (_, varName) => env[varName] ?? "");
|
|
1785
|
+
}
|
|
1786
|
+
if (Array.isArray(value)) {
|
|
1787
|
+
return value.map((item) => interpolateEnv(item, env));
|
|
1788
|
+
}
|
|
1789
|
+
if (value !== null && typeof value === "object") {
|
|
1790
|
+
const result = {};
|
|
1791
|
+
for (const [key, val] of Object.entries(value)) {
|
|
1792
|
+
result[key] = interpolateEnv(val, env);
|
|
1793
|
+
}
|
|
1794
|
+
return result;
|
|
1795
|
+
}
|
|
1796
|
+
return value;
|
|
1797
|
+
}
|
|
1798
|
+
|
|
1780
1799
|
// src/evaluation/loaders/case-file-loader.ts
|
|
1781
1800
|
var import_promises = require("fs/promises");
|
|
1782
1801
|
var import_node_path = __toESM(require("path"), 1);
|
|
@@ -1795,7 +1814,8 @@ function isGlobPattern(filePath) {
|
|
|
1795
1814
|
return filePath.includes("*") || filePath.includes("?") || filePath.includes("{");
|
|
1796
1815
|
}
|
|
1797
1816
|
function parseYamlCases(content, filePath) {
|
|
1798
|
-
const
|
|
1817
|
+
const raw = (0, import_yaml.parse)(content);
|
|
1818
|
+
const parsed = interpolateEnv(raw, process.env);
|
|
1799
1819
|
if (!Array.isArray(parsed)) {
|
|
1800
1820
|
throw new Error(
|
|
1801
1821
|
`External test file must contain a YAML array, got ${typeof parsed}: ${filePath}`
|
|
@@ -1817,7 +1837,8 @@ function parseJsonlCases(content, filePath) {
|
|
|
1817
1837
|
const line = lines[i].trim();
|
|
1818
1838
|
if (line === "") continue;
|
|
1819
1839
|
try {
|
|
1820
|
-
const
|
|
1840
|
+
const raw = JSON.parse(line);
|
|
1841
|
+
const parsed = interpolateEnv(raw, process.env);
|
|
1821
1842
|
if (!isJsonObject(parsed)) {
|
|
1822
1843
|
throw new Error("Expected JSON object");
|
|
1823
1844
|
}
|
|
@@ -3966,7 +3987,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
3966
3987
|
}
|
|
3967
3988
|
try {
|
|
3968
3989
|
const content = await (0, import_promises6.readFile)(sidecarPath, "utf8");
|
|
3969
|
-
const parsed = (0, import_yaml3.parse)(content);
|
|
3990
|
+
const parsed = interpolateEnv((0, import_yaml3.parse)(content), process.env);
|
|
3970
3991
|
if (!isJsonObject(parsed)) {
|
|
3971
3992
|
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
3972
3993
|
return {};
|
|
@@ -3989,7 +4010,8 @@ function parseJsonlContent(content, filePath) {
|
|
|
3989
4010
|
const line = lines[i].trim();
|
|
3990
4011
|
if (line === "") continue;
|
|
3991
4012
|
try {
|
|
3992
|
-
const
|
|
4013
|
+
const raw = JSON.parse(line);
|
|
4014
|
+
const parsed = interpolateEnv(raw, process.env);
|
|
3993
4015
|
if (!isJsonObject(parsed)) {
|
|
3994
4016
|
throw new Error("Expected JSON object");
|
|
3995
4017
|
}
|
|
@@ -4046,9 +4068,10 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4046
4068
|
}
|
|
4047
4069
|
const inputMessages = resolveInputMessages(evalcase);
|
|
4048
4070
|
const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
|
|
4049
|
-
|
|
4071
|
+
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
|
|
4072
|
+
if (!id || !hasEvaluationSpec || !inputMessages || inputMessages.length === 0) {
|
|
4050
4073
|
logError(
|
|
4051
|
-
`Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id,
|
|
4074
|
+
`Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
|
|
4052
4075
|
);
|
|
4053
4076
|
continue;
|
|
4054
4077
|
}
|
|
@@ -4126,7 +4149,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4126
4149
|
guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path6.default.resolve(guidelinePath)),
|
|
4127
4150
|
guideline_patterns: guidelinePatterns,
|
|
4128
4151
|
file_paths: allFilePaths,
|
|
4129
|
-
criteria: outcome,
|
|
4152
|
+
criteria: outcome ?? "",
|
|
4130
4153
|
evaluator: evalCaseEvaluatorKind,
|
|
4131
4154
|
evaluators
|
|
4132
4155
|
};
|
|
@@ -4439,7 +4462,7 @@ async function readTestSuiteMetadata(testFilePath) {
|
|
|
4439
4462
|
try {
|
|
4440
4463
|
const absolutePath = import_node_path8.default.resolve(testFilePath);
|
|
4441
4464
|
const content = await (0, import_promises8.readFile)(absolutePath, "utf8");
|
|
4442
|
-
const parsed = (0, import_yaml4.parse)(content);
|
|
4465
|
+
const parsed = interpolateEnv((0, import_yaml4.parse)(content), process.env);
|
|
4443
4466
|
if (!isJsonObject(parsed)) {
|
|
4444
4467
|
return {};
|
|
4445
4468
|
}
|
|
@@ -4489,11 +4512,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4489
4512
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
4490
4513
|
const guidelinePatterns = config?.guideline_patterns;
|
|
4491
4514
|
const rawFile = await (0, import_promises8.readFile)(absoluteTestPath, "utf8");
|
|
4492
|
-
const
|
|
4493
|
-
if (!isJsonObject(
|
|
4515
|
+
const interpolated = interpolateEnv((0, import_yaml4.parse)(rawFile), process.env);
|
|
4516
|
+
if (!isJsonObject(interpolated)) {
|
|
4494
4517
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
4495
4518
|
}
|
|
4496
|
-
const suite =
|
|
4519
|
+
const suite = interpolated;
|
|
4497
4520
|
const datasetNameFromSuite = asString6(suite.dataset)?.trim();
|
|
4498
4521
|
const fallbackDataset = import_node_path8.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
4499
4522
|
const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
|
|
@@ -4537,9 +4560,10 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4537
4560
|
}
|
|
4538
4561
|
const testInputMessages = resolveInputMessages(evalcase);
|
|
4539
4562
|
const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
|
|
4540
|
-
|
|
4563
|
+
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
|
|
4564
|
+
if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
|
|
4541
4565
|
logError2(
|
|
4542
|
-
`Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id,
|
|
4566
|
+
`Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
|
|
4543
4567
|
);
|
|
4544
4568
|
continue;
|
|
4545
4569
|
}
|
|
@@ -4635,7 +4659,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4635
4659
|
guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path8.default.resolve(guidelinePath)),
|
|
4636
4660
|
guideline_patterns: guidelinePatterns,
|
|
4637
4661
|
file_paths: allFilePaths,
|
|
4638
|
-
criteria: outcome,
|
|
4662
|
+
criteria: outcome ?? "",
|
|
4639
4663
|
evaluator: evalCaseEvaluatorKind,
|
|
4640
4664
|
evaluators,
|
|
4641
4665
|
workspace: mergedWorkspace,
|
|
@@ -4745,30 +4769,24 @@ function parseWorkspaceHookConfig(raw, evalFileDir) {
|
|
|
4745
4769
|
const script = parseWorkspaceScriptConfig(raw, evalFileDir);
|
|
4746
4770
|
const obj = raw;
|
|
4747
4771
|
const reset = obj.reset === "none" || obj.reset === "fast" || obj.reset === "strict" ? obj.reset : void 0;
|
|
4748
|
-
|
|
4749
|
-
if (!script && !reset && !clean) return void 0;
|
|
4772
|
+
if (!script && !reset) return void 0;
|
|
4750
4773
|
return {
|
|
4751
4774
|
...script ?? {},
|
|
4752
|
-
...reset !== void 0 && { reset }
|
|
4753
|
-
...clean !== void 0 && { clean }
|
|
4775
|
+
...reset !== void 0 && { reset }
|
|
4754
4776
|
};
|
|
4755
4777
|
}
|
|
4756
4778
|
function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
4757
4779
|
if (!isJsonObject(raw)) return void 0;
|
|
4758
4780
|
const obj = raw;
|
|
4759
|
-
const
|
|
4760
|
-
const
|
|
4761
|
-
const
|
|
4762
|
-
const
|
|
4763
|
-
const onReuse = parseWorkspaceHookConfig(obj.on_reuse, evalFileDir);
|
|
4764
|
-
const onFinish = parseWorkspaceHookConfig(obj.on_finish, evalFileDir);
|
|
4781
|
+
const beforeAll = parseWorkspaceHookConfig(obj.before_all, evalFileDir);
|
|
4782
|
+
const beforeEach = parseWorkspaceHookConfig(obj.before_each, evalFileDir);
|
|
4783
|
+
const afterEach = parseWorkspaceHookConfig(obj.after_each, evalFileDir);
|
|
4784
|
+
const afterAll = parseWorkspaceHookConfig(obj.after_all, evalFileDir);
|
|
4765
4785
|
const hooks = {
|
|
4766
|
-
...
|
|
4767
|
-
...
|
|
4768
|
-
...
|
|
4769
|
-
...
|
|
4770
|
-
...onReuse !== void 0 && { on_reuse: onReuse },
|
|
4771
|
-
...onFinish !== void 0 && { on_finish: onFinish }
|
|
4786
|
+
...beforeAll !== void 0 && { before_all: beforeAll },
|
|
4787
|
+
...beforeEach !== void 0 && { before_each: beforeEach },
|
|
4788
|
+
...afterEach !== void 0 && { after_each: afterEach },
|
|
4789
|
+
...afterAll !== void 0 && { after_all: afterAll }
|
|
4772
4790
|
};
|
|
4773
4791
|
return Object.keys(hooks).length > 0 ? hooks : void 0;
|
|
4774
4792
|
}
|
|
@@ -4781,7 +4799,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
4781
4799
|
} catch {
|
|
4782
4800
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
4783
4801
|
}
|
|
4784
|
-
const parsed = (0, import_yaml4.parse)(content);
|
|
4802
|
+
const parsed = interpolateEnv((0, import_yaml4.parse)(content), process.env);
|
|
4785
4803
|
if (!isJsonObject(parsed)) {
|
|
4786
4804
|
throw new Error(
|
|
4787
4805
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
@@ -4829,18 +4847,10 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
4829
4847
|
};
|
|
4830
4848
|
};
|
|
4831
4849
|
const mergedHooks = {
|
|
4832
|
-
|
|
4833
|
-
|
|
4834
|
-
|
|
4835
|
-
)
|
|
4836
|
-
before_each_test: mergeHook(
|
|
4837
|
-
suiteLevel.hooks?.before_each_test,
|
|
4838
|
-
caseLevel.hooks?.before_each_test
|
|
4839
|
-
),
|
|
4840
|
-
after_each_test: mergeHook(suiteLevel.hooks?.after_each_test, caseLevel.hooks?.after_each_test),
|
|
4841
|
-
after_all_tests: mergeHook(suiteLevel.hooks?.after_all_tests, caseLevel.hooks?.after_all_tests),
|
|
4842
|
-
on_reuse: mergeHook(suiteLevel.hooks?.on_reuse, caseLevel.hooks?.on_reuse),
|
|
4843
|
-
on_finish: mergeHook(suiteLevel.hooks?.on_finish, caseLevel.hooks?.on_finish)
|
|
4850
|
+
before_all: mergeHook(suiteLevel.hooks?.before_all, caseLevel.hooks?.before_all),
|
|
4851
|
+
before_each: mergeHook(suiteLevel.hooks?.before_each, caseLevel.hooks?.before_each),
|
|
4852
|
+
after_each: mergeHook(suiteLevel.hooks?.after_each, caseLevel.hooks?.after_each),
|
|
4853
|
+
after_all: mergeHook(suiteLevel.hooks?.after_all, caseLevel.hooks?.after_all)
|
|
4844
4854
|
};
|
|
4845
4855
|
const hasHooks = Object.values(mergedHooks).some((hook) => hook !== void 0);
|
|
4846
4856
|
return {
|
|
@@ -7086,6 +7096,7 @@ var CopilotCliProvider = class {
|
|
|
7086
7096
|
const agentProcess = (0, import_node_child_process2.spawn)(executable, args, {
|
|
7087
7097
|
stdio: ["pipe", "pipe", "inherit"]
|
|
7088
7098
|
});
|
|
7099
|
+
await waitForProcessSpawn(agentProcess, executable, this.targetName);
|
|
7089
7100
|
const toolCallsInProgress = /* @__PURE__ */ new Map();
|
|
7090
7101
|
const completedToolCalls = [];
|
|
7091
7102
|
let finalContent = "";
|
|
@@ -7365,6 +7376,47 @@ var CopilotCliProvider = class {
|
|
|
7365
7376
|
}
|
|
7366
7377
|
}
|
|
7367
7378
|
};
|
|
7379
|
+
async function waitForProcessSpawn(proc, executable, targetName) {
|
|
7380
|
+
if (proc.pid) {
|
|
7381
|
+
return;
|
|
7382
|
+
}
|
|
7383
|
+
await new Promise((resolve, reject) => {
|
|
7384
|
+
const onSpawn = () => {
|
|
7385
|
+
cleanup();
|
|
7386
|
+
resolve();
|
|
7387
|
+
};
|
|
7388
|
+
const onError = (error) => {
|
|
7389
|
+
cleanup();
|
|
7390
|
+
reject(new Error(formatCopilotSpawnError(error, executable, targetName)));
|
|
7391
|
+
};
|
|
7392
|
+
const cleanup = () => {
|
|
7393
|
+
proc.off("spawn", onSpawn);
|
|
7394
|
+
proc.off("error", onError);
|
|
7395
|
+
};
|
|
7396
|
+
proc.once("spawn", onSpawn);
|
|
7397
|
+
proc.once("error", onError);
|
|
7398
|
+
});
|
|
7399
|
+
}
|
|
7400
|
+
function formatCopilotSpawnError(error, executable, targetName) {
|
|
7401
|
+
const code = error.code;
|
|
7402
|
+
const base = `Failed to start Copilot CLI executable '${executable}' for target '${targetName}'. ${error.message}`;
|
|
7403
|
+
if (process.platform !== "win32") {
|
|
7404
|
+
return base;
|
|
7405
|
+
}
|
|
7406
|
+
if (code !== "ENOENT" && code !== "EINVAL") {
|
|
7407
|
+
return base;
|
|
7408
|
+
}
|
|
7409
|
+
return `${base}
|
|
7410
|
+
|
|
7411
|
+
On Windows, shell commands like 'copilot -h' can work via .ps1/.bat shims, but AgentV launches a subprocess that needs a directly spawnable executable path.
|
|
7412
|
+
|
|
7413
|
+
Fix options:
|
|
7414
|
+
1) Install native Copilot binary package:
|
|
7415
|
+
npm install -g @github/copilot-win32-x64
|
|
7416
|
+
2) Set explicit executable for Copilot targets:
|
|
7417
|
+
- In .env: COPILOT_EXE=C:\\Users\\<you>\\AppData\\Roaming\\npm\\node_modules\\@github\\copilot-win32-x64\\copilot.exe
|
|
7418
|
+
- In .agentv/targets.yaml: executable: \${{ COPILOT_EXE }}`;
|
|
7419
|
+
}
|
|
7368
7420
|
function summarizeAcpEvent(eventType, data) {
|
|
7369
7421
|
if (!data || typeof data !== "object") {
|
|
7370
7422
|
return eventType;
|
|
@@ -16424,9 +16476,8 @@ async function runEvaluation(options) {
|
|
|
16424
16476
|
const hasSharedWorkspace = !!(useStaticWorkspace || workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation);
|
|
16425
16477
|
const poolEnabled = configuredMode === "pooled" ? true : configuredMode === "ephemeral" || useStaticWorkspace ? false : suiteWorkspace?.pool ?? poolWorkspaces ?? true;
|
|
16426
16478
|
const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
|
|
16427
|
-
const
|
|
16428
|
-
const
|
|
16429
|
-
const resolvedRetainOnFailure = (finishCleanPolicy === "always" || finishCleanPolicy === "on_failure" ? "cleanup" : finishCleanPolicy === "on_success" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
|
|
16479
|
+
const resolvedRetainOnSuccess = retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
|
|
16480
|
+
const resolvedRetainOnFailure = retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
|
|
16430
16481
|
const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
|
|
16431
16482
|
const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
|
|
16432
16483
|
setupLog(
|
|
@@ -16461,7 +16512,7 @@ async function runEvaluation(options) {
|
|
|
16461
16512
|
repos: suiteWorkspace.repos,
|
|
16462
16513
|
maxSlots: poolMaxSlots,
|
|
16463
16514
|
repoManager: poolRepoManager,
|
|
16464
|
-
poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ??
|
|
16515
|
+
poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? "fast"
|
|
16465
16516
|
});
|
|
16466
16517
|
poolSlots.push(slot);
|
|
16467
16518
|
setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
|
|
@@ -16512,7 +16563,7 @@ async function runEvaluation(options) {
|
|
|
16512
16563
|
throw new Error(`Failed to materialize repos: ${message}`);
|
|
16513
16564
|
}
|
|
16514
16565
|
}
|
|
16515
|
-
const suiteBeforeAllHook = suiteWorkspace?.hooks?.
|
|
16566
|
+
const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all;
|
|
16516
16567
|
if (sharedWorkspacePath && hasHookCommand(suiteBeforeAllHook)) {
|
|
16517
16568
|
const beforeAllHook = suiteBeforeAllHook;
|
|
16518
16569
|
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
@@ -16527,7 +16578,7 @@ async function runEvaluation(options) {
|
|
|
16527
16578
|
};
|
|
16528
16579
|
try {
|
|
16529
16580
|
beforeAllOutput = await executeWorkspaceScript(
|
|
16530
|
-
toScriptConfig(beforeAllHook, "
|
|
16581
|
+
toScriptConfig(beforeAllHook, "before_all", "suite workspace"),
|
|
16531
16582
|
scriptContext
|
|
16532
16583
|
);
|
|
16533
16584
|
setupLog("shared before_all completed");
|
|
@@ -16552,7 +16603,7 @@ async function runEvaluation(options) {
|
|
|
16552
16603
|
};
|
|
16553
16604
|
try {
|
|
16554
16605
|
const output = await executeWorkspaceScript(
|
|
16555
|
-
toScriptConfig(beforeAllHook, "
|
|
16606
|
+
toScriptConfig(beforeAllHook, "before_all", "suite workspace"),
|
|
16556
16607
|
scriptContext
|
|
16557
16608
|
);
|
|
16558
16609
|
if (!beforeAllOutput) beforeAllOutput = output;
|
|
@@ -16781,7 +16832,7 @@ async function runEvaluation(options) {
|
|
|
16781
16832
|
}
|
|
16782
16833
|
}
|
|
16783
16834
|
const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
|
|
16784
|
-
const suiteAfterAllHook = suiteWorkspace?.hooks?.
|
|
16835
|
+
const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all;
|
|
16785
16836
|
if (afterAllWorkspaces.length > 0 && hasHookCommand(suiteAfterAllHook)) {
|
|
16786
16837
|
const afterAllHook = suiteAfterAllHook;
|
|
16787
16838
|
for (const wsPath of afterAllWorkspaces) {
|
|
@@ -16793,7 +16844,7 @@ async function runEvaluation(options) {
|
|
|
16793
16844
|
};
|
|
16794
16845
|
try {
|
|
16795
16846
|
const afterAllOutput = await executeWorkspaceScript(
|
|
16796
|
-
toScriptConfig(afterAllHook, "
|
|
16847
|
+
toScriptConfig(afterAllHook, "after_all", "suite workspace"),
|
|
16797
16848
|
scriptContext,
|
|
16798
16849
|
"warn"
|
|
16799
16850
|
);
|
|
@@ -17090,7 +17141,7 @@ async function runEvalCase(options) {
|
|
|
17090
17141
|
);
|
|
17091
17142
|
}
|
|
17092
17143
|
}
|
|
17093
|
-
const caseBeforeAllHook = evalCase.workspace?.hooks?.
|
|
17144
|
+
const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all;
|
|
17094
17145
|
if (workspacePath && hasHookCommand(caseBeforeAllHook)) {
|
|
17095
17146
|
const beforeAllHook = caseBeforeAllHook;
|
|
17096
17147
|
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
@@ -17109,7 +17160,7 @@ async function runEvalCase(options) {
|
|
|
17109
17160
|
};
|
|
17110
17161
|
try {
|
|
17111
17162
|
beforeAllOutput = await executeWorkspaceScript(
|
|
17112
|
-
toScriptConfig(beforeAllHook, "
|
|
17163
|
+
toScriptConfig(beforeAllHook, "before_all", `test '${evalCase.id}'`),
|
|
17113
17164
|
scriptContext
|
|
17114
17165
|
);
|
|
17115
17166
|
if (setupDebug) {
|
|
@@ -17134,7 +17185,7 @@ async function runEvalCase(options) {
|
|
|
17134
17185
|
}
|
|
17135
17186
|
}
|
|
17136
17187
|
}
|
|
17137
|
-
const caseBeforeEachHook = evalCase.workspace?.hooks?.
|
|
17188
|
+
const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each;
|
|
17138
17189
|
if (workspacePath && hasHookCommand(caseBeforeEachHook)) {
|
|
17139
17190
|
const beforeEachHook = caseBeforeEachHook;
|
|
17140
17191
|
const scriptContext = {
|
|
@@ -17147,7 +17198,7 @@ async function runEvalCase(options) {
|
|
|
17147
17198
|
};
|
|
17148
17199
|
try {
|
|
17149
17200
|
beforeEachOutput = await executeWorkspaceScript(
|
|
17150
|
-
toScriptConfig(beforeEachHook, "
|
|
17201
|
+
toScriptConfig(beforeEachHook, "before_each", `test '${evalCase.id}'`),
|
|
17151
17202
|
scriptContext
|
|
17152
17203
|
);
|
|
17153
17204
|
} catch (error) {
|
|
@@ -17264,17 +17315,17 @@ async function runEvalCase(options) {
|
|
|
17264
17315
|
}
|
|
17265
17316
|
}
|
|
17266
17317
|
const providerError = extractProviderError(providerResponse);
|
|
17267
|
-
if (repoManager && workspacePath && evalCase.workspace?.hooks?.
|
|
17318
|
+
if (repoManager && workspacePath && evalCase.workspace?.hooks?.after_each?.reset && evalCase.workspace.hooks.after_each.reset !== "none" && evalCase.workspace.repos) {
|
|
17268
17319
|
try {
|
|
17269
17320
|
await repoManager.reset(
|
|
17270
17321
|
evalCase.workspace.repos,
|
|
17271
17322
|
workspacePath,
|
|
17272
|
-
evalCase.workspace.hooks.
|
|
17323
|
+
evalCase.workspace.hooks.after_each.reset
|
|
17273
17324
|
);
|
|
17274
17325
|
} catch {
|
|
17275
17326
|
}
|
|
17276
17327
|
}
|
|
17277
|
-
const caseAfterEachHook = evalCase.workspace?.hooks?.
|
|
17328
|
+
const caseAfterEachHook = evalCase.workspace?.hooks?.after_each;
|
|
17278
17329
|
if (workspacePath && hasHookCommand(caseAfterEachHook)) {
|
|
17279
17330
|
const afterEachHook = caseAfterEachHook;
|
|
17280
17331
|
const scriptContext = {
|
|
@@ -17287,7 +17338,7 @@ async function runEvalCase(options) {
|
|
|
17287
17338
|
};
|
|
17288
17339
|
try {
|
|
17289
17340
|
afterEachOutput = await executeWorkspaceScript(
|
|
17290
|
-
toScriptConfig(afterEachHook, "
|
|
17341
|
+
toScriptConfig(afterEachHook, "after_each", `test '${evalCase.id}'`),
|
|
17291
17342
|
scriptContext,
|
|
17292
17343
|
"warn"
|
|
17293
17344
|
);
|
|
@@ -17687,9 +17738,11 @@ async function runEvaluatorList(options) {
|
|
|
17687
17738
|
registry: typeRegistry
|
|
17688
17739
|
};
|
|
17689
17740
|
for (const evaluatorConfig of evaluators ?? []) {
|
|
17741
|
+
const startedAt = /* @__PURE__ */ new Date();
|
|
17690
17742
|
try {
|
|
17691
17743
|
const evaluatorInstance = await typeRegistry.create(evaluatorConfig, dispatchContext);
|
|
17692
17744
|
const score2 = await evaluatorInstance.evaluate(evalContext);
|
|
17745
|
+
const endedAt = /* @__PURE__ */ new Date();
|
|
17693
17746
|
const weight = evaluatorConfig.weight ?? 1;
|
|
17694
17747
|
scored.push({
|
|
17695
17748
|
score: score2,
|
|
@@ -17710,9 +17763,13 @@ async function runEvaluatorList(options) {
|
|
|
17710
17763
|
evaluatorProviderRequest: score2.evaluatorRawRequest,
|
|
17711
17764
|
details: score2.details,
|
|
17712
17765
|
scores: mapChildResults(score2.scores),
|
|
17713
|
-
tokenUsage: score2.tokenUsage
|
|
17766
|
+
tokenUsage: score2.tokenUsage,
|
|
17767
|
+
durationMs: endedAt.getTime() - startedAt.getTime(),
|
|
17768
|
+
startedAt: startedAt.toISOString(),
|
|
17769
|
+
endedAt: endedAt.toISOString()
|
|
17714
17770
|
});
|
|
17715
17771
|
} catch (error) {
|
|
17772
|
+
const endedAt = /* @__PURE__ */ new Date();
|
|
17716
17773
|
const message = error instanceof Error ? error.message : String(error);
|
|
17717
17774
|
const fallbackScore = {
|
|
17718
17775
|
score: 0,
|
|
@@ -17738,7 +17795,10 @@ async function runEvaluatorList(options) {
|
|
|
17738
17795
|
verdict: "fail",
|
|
17739
17796
|
hits: [],
|
|
17740
17797
|
misses: [`Evaluator '${evaluatorConfig.name ?? "unknown"}' failed: ${message}`],
|
|
17741
|
-
reasoning: message
|
|
17798
|
+
reasoning: message,
|
|
17799
|
+
durationMs: endedAt.getTime() - startedAt.getTime(),
|
|
17800
|
+
startedAt: startedAt.toISOString(),
|
|
17801
|
+
endedAt: endedAt.toISOString()
|
|
17742
17802
|
});
|
|
17743
17803
|
}
|
|
17744
17804
|
if (evaluatorConfig.negate === true && scored.length > 0) {
|