@agentv/core 2.16.0 → 2.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-E6AJPAXM.js → chunk-CPPYERD2.js} +1 -1
- package/dist/chunk-CPPYERD2.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +5 -5
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +6 -6
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +72 -45
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +4 -10
- package/dist/index.d.ts +4 -10
- package/dist/index.js +73 -46
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-E6AJPAXM.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -526,22 +526,16 @@ type WorkspaceHookConfig = {
|
|
|
526
526
|
readonly cwd?: string;
|
|
527
527
|
/** Optional reset policy for this hook */
|
|
528
528
|
readonly reset?: 'none' | 'fast' | 'strict';
|
|
529
|
-
/** Optional cleanup policy for this hook */
|
|
530
|
-
readonly clean?: 'always' | 'on_success' | 'on_failure' | 'never';
|
|
531
529
|
};
|
|
532
530
|
type WorkspaceHooksConfig = {
|
|
533
531
|
/** Runs once before first test in the workspace lifecycle */
|
|
534
|
-
readonly
|
|
532
|
+
readonly before_all?: WorkspaceHookConfig;
|
|
535
533
|
/** Runs before each test case */
|
|
536
|
-
readonly
|
|
534
|
+
readonly before_each?: WorkspaceHookConfig;
|
|
537
535
|
/** Runs after each test case */
|
|
538
|
-
readonly
|
|
536
|
+
readonly after_each?: WorkspaceHookConfig;
|
|
539
537
|
/** Runs once after final test in the workspace lifecycle */
|
|
540
|
-
readonly
|
|
541
|
-
/** Runs when reusing a pooled workspace slot */
|
|
542
|
-
readonly on_reuse?: WorkspaceHookConfig;
|
|
543
|
-
/** Runs/controls behavior when workspace lifecycle finishes */
|
|
544
|
-
readonly on_finish?: WorkspaceHookConfig;
|
|
538
|
+
readonly after_all?: WorkspaceHookConfig;
|
|
545
539
|
};
|
|
546
540
|
type WorkspaceConfig = {
|
|
547
541
|
/** Template directory or .code-workspace file. Directories are copied to temp workspace.
|
package/dist/index.d.ts
CHANGED
|
@@ -526,22 +526,16 @@ type WorkspaceHookConfig = {
|
|
|
526
526
|
readonly cwd?: string;
|
|
527
527
|
/** Optional reset policy for this hook */
|
|
528
528
|
readonly reset?: 'none' | 'fast' | 'strict';
|
|
529
|
-
/** Optional cleanup policy for this hook */
|
|
530
|
-
readonly clean?: 'always' | 'on_success' | 'on_failure' | 'never';
|
|
531
529
|
};
|
|
532
530
|
type WorkspaceHooksConfig = {
|
|
533
531
|
/** Runs once before first test in the workspace lifecycle */
|
|
534
|
-
readonly
|
|
532
|
+
readonly before_all?: WorkspaceHookConfig;
|
|
535
533
|
/** Runs before each test case */
|
|
536
|
-
readonly
|
|
534
|
+
readonly before_each?: WorkspaceHookConfig;
|
|
537
535
|
/** Runs after each test case */
|
|
538
|
-
readonly
|
|
536
|
+
readonly after_each?: WorkspaceHookConfig;
|
|
539
537
|
/** Runs once after final test in the workspace lifecycle */
|
|
540
|
-
readonly
|
|
541
|
-
/** Runs when reusing a pooled workspace slot */
|
|
542
|
-
readonly on_reuse?: WorkspaceHookConfig;
|
|
543
|
-
/** Runs/controls behavior when workspace lifecycle finishes */
|
|
544
|
-
readonly on_finish?: WorkspaceHookConfig;
|
|
538
|
+
readonly after_all?: WorkspaceHookConfig;
|
|
545
539
|
};
|
|
546
540
|
type WorkspaceConfig = {
|
|
547
541
|
/** Template directory or .code-workspace file. Directories are copied to temp workspace.
|
package/dist/index.js
CHANGED
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
readTextFile,
|
|
18
18
|
resolveFileReference,
|
|
19
19
|
resolveTargetDefinition
|
|
20
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-CPPYERD2.js";
|
|
21
21
|
import {
|
|
22
22
|
OtlpJsonFileExporter
|
|
23
23
|
} from "./chunk-HFSYZHGF.js";
|
|
@@ -3119,30 +3119,24 @@ function parseWorkspaceHookConfig(raw, evalFileDir) {
|
|
|
3119
3119
|
const script = parseWorkspaceScriptConfig(raw, evalFileDir);
|
|
3120
3120
|
const obj = raw;
|
|
3121
3121
|
const reset = obj.reset === "none" || obj.reset === "fast" || obj.reset === "strict" ? obj.reset : void 0;
|
|
3122
|
-
|
|
3123
|
-
if (!script && !reset && !clean) return void 0;
|
|
3122
|
+
if (!script && !reset) return void 0;
|
|
3124
3123
|
return {
|
|
3125
3124
|
...script ?? {},
|
|
3126
|
-
...reset !== void 0 && { reset }
|
|
3127
|
-
...clean !== void 0 && { clean }
|
|
3125
|
+
...reset !== void 0 && { reset }
|
|
3128
3126
|
};
|
|
3129
3127
|
}
|
|
3130
3128
|
function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
3131
3129
|
if (!isJsonObject(raw)) return void 0;
|
|
3132
3130
|
const obj = raw;
|
|
3133
|
-
const
|
|
3134
|
-
const
|
|
3135
|
-
const
|
|
3136
|
-
const
|
|
3137
|
-
const onReuse = parseWorkspaceHookConfig(obj.on_reuse, evalFileDir);
|
|
3138
|
-
const onFinish = parseWorkspaceHookConfig(obj.on_finish, evalFileDir);
|
|
3131
|
+
const beforeAll = parseWorkspaceHookConfig(obj.before_all, evalFileDir);
|
|
3132
|
+
const beforeEach = parseWorkspaceHookConfig(obj.before_each, evalFileDir);
|
|
3133
|
+
const afterEach = parseWorkspaceHookConfig(obj.after_each, evalFileDir);
|
|
3134
|
+
const afterAll = parseWorkspaceHookConfig(obj.after_all, evalFileDir);
|
|
3139
3135
|
const hooks = {
|
|
3140
|
-
...
|
|
3141
|
-
...
|
|
3142
|
-
...
|
|
3143
|
-
...
|
|
3144
|
-
...onReuse !== void 0 && { on_reuse: onReuse },
|
|
3145
|
-
...onFinish !== void 0 && { on_finish: onFinish }
|
|
3136
|
+
...beforeAll !== void 0 && { before_all: beforeAll },
|
|
3137
|
+
...beforeEach !== void 0 && { before_each: beforeEach },
|
|
3138
|
+
...afterEach !== void 0 && { after_each: afterEach },
|
|
3139
|
+
...afterAll !== void 0 && { after_all: afterAll }
|
|
3146
3140
|
};
|
|
3147
3141
|
return Object.keys(hooks).length > 0 ? hooks : void 0;
|
|
3148
3142
|
}
|
|
@@ -3203,18 +3197,10 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
3203
3197
|
};
|
|
3204
3198
|
};
|
|
3205
3199
|
const mergedHooks = {
|
|
3206
|
-
|
|
3207
|
-
|
|
3208
|
-
|
|
3209
|
-
)
|
|
3210
|
-
before_each_test: mergeHook(
|
|
3211
|
-
suiteLevel.hooks?.before_each_test,
|
|
3212
|
-
caseLevel.hooks?.before_each_test
|
|
3213
|
-
),
|
|
3214
|
-
after_each_test: mergeHook(suiteLevel.hooks?.after_each_test, caseLevel.hooks?.after_each_test),
|
|
3215
|
-
after_all_tests: mergeHook(suiteLevel.hooks?.after_all_tests, caseLevel.hooks?.after_all_tests),
|
|
3216
|
-
on_reuse: mergeHook(suiteLevel.hooks?.on_reuse, caseLevel.hooks?.on_reuse),
|
|
3217
|
-
on_finish: mergeHook(suiteLevel.hooks?.on_finish, caseLevel.hooks?.on_finish)
|
|
3200
|
+
before_all: mergeHook(suiteLevel.hooks?.before_all, caseLevel.hooks?.before_all),
|
|
3201
|
+
before_each: mergeHook(suiteLevel.hooks?.before_each, caseLevel.hooks?.before_each),
|
|
3202
|
+
after_each: mergeHook(suiteLevel.hooks?.after_each, caseLevel.hooks?.after_each),
|
|
3203
|
+
after_all: mergeHook(suiteLevel.hooks?.after_all, caseLevel.hooks?.after_all)
|
|
3218
3204
|
};
|
|
3219
3205
|
const hasHooks = Object.values(mergedHooks).some((hook) => hook !== void 0);
|
|
3220
3206
|
return {
|
|
@@ -5344,6 +5330,7 @@ var CopilotCliProvider = class {
|
|
|
5344
5330
|
const agentProcess = spawn(executable, args, {
|
|
5345
5331
|
stdio: ["pipe", "pipe", "inherit"]
|
|
5346
5332
|
});
|
|
5333
|
+
await waitForProcessSpawn(agentProcess, executable, this.targetName);
|
|
5347
5334
|
const toolCallsInProgress = /* @__PURE__ */ new Map();
|
|
5348
5335
|
const completedToolCalls = [];
|
|
5349
5336
|
let finalContent = "";
|
|
@@ -5623,6 +5610,47 @@ var CopilotCliProvider = class {
|
|
|
5623
5610
|
}
|
|
5624
5611
|
}
|
|
5625
5612
|
};
|
|
5613
|
+
async function waitForProcessSpawn(proc, executable, targetName) {
|
|
5614
|
+
if (proc.pid) {
|
|
5615
|
+
return;
|
|
5616
|
+
}
|
|
5617
|
+
await new Promise((resolve, reject) => {
|
|
5618
|
+
const onSpawn = () => {
|
|
5619
|
+
cleanup();
|
|
5620
|
+
resolve();
|
|
5621
|
+
};
|
|
5622
|
+
const onError = (error) => {
|
|
5623
|
+
cleanup();
|
|
5624
|
+
reject(new Error(formatCopilotSpawnError(error, executable, targetName)));
|
|
5625
|
+
};
|
|
5626
|
+
const cleanup = () => {
|
|
5627
|
+
proc.off("spawn", onSpawn);
|
|
5628
|
+
proc.off("error", onError);
|
|
5629
|
+
};
|
|
5630
|
+
proc.once("spawn", onSpawn);
|
|
5631
|
+
proc.once("error", onError);
|
|
5632
|
+
});
|
|
5633
|
+
}
|
|
5634
|
+
function formatCopilotSpawnError(error, executable, targetName) {
|
|
5635
|
+
const code = error.code;
|
|
5636
|
+
const base = `Failed to start Copilot CLI executable '${executable}' for target '${targetName}'. ${error.message}`;
|
|
5637
|
+
if (process.platform !== "win32") {
|
|
5638
|
+
return base;
|
|
5639
|
+
}
|
|
5640
|
+
if (code !== "ENOENT" && code !== "EINVAL") {
|
|
5641
|
+
return base;
|
|
5642
|
+
}
|
|
5643
|
+
return `${base}
|
|
5644
|
+
|
|
5645
|
+
On Windows, shell commands like 'copilot -h' can work via .ps1/.bat shims, but AgentV launches a subprocess that needs a directly spawnable executable path.
|
|
5646
|
+
|
|
5647
|
+
Fix options:
|
|
5648
|
+
1) Install native Copilot binary package:
|
|
5649
|
+
npm install -g @github/copilot-win32-x64
|
|
5650
|
+
2) Set explicit executable for Copilot targets:
|
|
5651
|
+
- In .env: COPILOT_EXE=C:\\Users\\<you>\\AppData\\Roaming\\npm\\node_modules\\@github\\copilot-win32-x64\\copilot.exe
|
|
5652
|
+
- In .agentv/targets.yaml: executable: \${{ COPILOT_EXE }}`;
|
|
5653
|
+
}
|
|
5626
5654
|
function summarizeAcpEvent(eventType, data) {
|
|
5627
5655
|
if (!data || typeof data !== "object") {
|
|
5628
5656
|
return eventType;
|
|
@@ -13557,9 +13585,8 @@ async function runEvaluation(options) {
|
|
|
13557
13585
|
const hasSharedWorkspace = !!(useStaticWorkspace || workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation);
|
|
13558
13586
|
const poolEnabled = configuredMode === "pooled" ? true : configuredMode === "ephemeral" || useStaticWorkspace ? false : suiteWorkspace?.pool ?? poolWorkspaces ?? true;
|
|
13559
13587
|
const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
|
|
13560
|
-
const
|
|
13561
|
-
const
|
|
13562
|
-
const resolvedRetainOnFailure = (finishCleanPolicy === "always" || finishCleanPolicy === "on_failure" ? "cleanup" : finishCleanPolicy === "on_success" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
|
|
13588
|
+
const resolvedRetainOnSuccess = retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
|
|
13589
|
+
const resolvedRetainOnFailure = retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
|
|
13563
13590
|
const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
|
|
13564
13591
|
const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
|
|
13565
13592
|
setupLog(
|
|
@@ -13594,7 +13621,7 @@ async function runEvaluation(options) {
|
|
|
13594
13621
|
repos: suiteWorkspace.repos,
|
|
13595
13622
|
maxSlots: poolMaxSlots,
|
|
13596
13623
|
repoManager: poolRepoManager,
|
|
13597
|
-
poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ??
|
|
13624
|
+
poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? "fast"
|
|
13598
13625
|
});
|
|
13599
13626
|
poolSlots.push(slot);
|
|
13600
13627
|
setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
|
|
@@ -13645,7 +13672,7 @@ async function runEvaluation(options) {
|
|
|
13645
13672
|
throw new Error(`Failed to materialize repos: ${message}`);
|
|
13646
13673
|
}
|
|
13647
13674
|
}
|
|
13648
|
-
const suiteBeforeAllHook = suiteWorkspace?.hooks?.
|
|
13675
|
+
const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all;
|
|
13649
13676
|
if (sharedWorkspacePath && hasHookCommand(suiteBeforeAllHook)) {
|
|
13650
13677
|
const beforeAllHook = suiteBeforeAllHook;
|
|
13651
13678
|
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
@@ -13660,7 +13687,7 @@ async function runEvaluation(options) {
|
|
|
13660
13687
|
};
|
|
13661
13688
|
try {
|
|
13662
13689
|
beforeAllOutput = await executeWorkspaceScript(
|
|
13663
|
-
toScriptConfig(beforeAllHook, "
|
|
13690
|
+
toScriptConfig(beforeAllHook, "before_all", "suite workspace"),
|
|
13664
13691
|
scriptContext
|
|
13665
13692
|
);
|
|
13666
13693
|
setupLog("shared before_all completed");
|
|
@@ -13685,7 +13712,7 @@ async function runEvaluation(options) {
|
|
|
13685
13712
|
};
|
|
13686
13713
|
try {
|
|
13687
13714
|
const output = await executeWorkspaceScript(
|
|
13688
|
-
toScriptConfig(beforeAllHook, "
|
|
13715
|
+
toScriptConfig(beforeAllHook, "before_all", "suite workspace"),
|
|
13689
13716
|
scriptContext
|
|
13690
13717
|
);
|
|
13691
13718
|
if (!beforeAllOutput) beforeAllOutput = output;
|
|
@@ -13914,7 +13941,7 @@ async function runEvaluation(options) {
|
|
|
13914
13941
|
}
|
|
13915
13942
|
}
|
|
13916
13943
|
const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
|
|
13917
|
-
const suiteAfterAllHook = suiteWorkspace?.hooks?.
|
|
13944
|
+
const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all;
|
|
13918
13945
|
if (afterAllWorkspaces.length > 0 && hasHookCommand(suiteAfterAllHook)) {
|
|
13919
13946
|
const afterAllHook = suiteAfterAllHook;
|
|
13920
13947
|
for (const wsPath of afterAllWorkspaces) {
|
|
@@ -13926,7 +13953,7 @@ async function runEvaluation(options) {
|
|
|
13926
13953
|
};
|
|
13927
13954
|
try {
|
|
13928
13955
|
const afterAllOutput = await executeWorkspaceScript(
|
|
13929
|
-
toScriptConfig(afterAllHook, "
|
|
13956
|
+
toScriptConfig(afterAllHook, "after_all", "suite workspace"),
|
|
13930
13957
|
scriptContext,
|
|
13931
13958
|
"warn"
|
|
13932
13959
|
);
|
|
@@ -14223,7 +14250,7 @@ async function runEvalCase(options) {
|
|
|
14223
14250
|
);
|
|
14224
14251
|
}
|
|
14225
14252
|
}
|
|
14226
|
-
const caseBeforeAllHook = evalCase.workspace?.hooks?.
|
|
14253
|
+
const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all;
|
|
14227
14254
|
if (workspacePath && hasHookCommand(caseBeforeAllHook)) {
|
|
14228
14255
|
const beforeAllHook = caseBeforeAllHook;
|
|
14229
14256
|
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
@@ -14242,7 +14269,7 @@ async function runEvalCase(options) {
|
|
|
14242
14269
|
};
|
|
14243
14270
|
try {
|
|
14244
14271
|
beforeAllOutput = await executeWorkspaceScript(
|
|
14245
|
-
toScriptConfig(beforeAllHook, "
|
|
14272
|
+
toScriptConfig(beforeAllHook, "before_all", `test '${evalCase.id}'`),
|
|
14246
14273
|
scriptContext
|
|
14247
14274
|
);
|
|
14248
14275
|
if (setupDebug) {
|
|
@@ -14267,7 +14294,7 @@ async function runEvalCase(options) {
|
|
|
14267
14294
|
}
|
|
14268
14295
|
}
|
|
14269
14296
|
}
|
|
14270
|
-
const caseBeforeEachHook = evalCase.workspace?.hooks?.
|
|
14297
|
+
const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each;
|
|
14271
14298
|
if (workspacePath && hasHookCommand(caseBeforeEachHook)) {
|
|
14272
14299
|
const beforeEachHook = caseBeforeEachHook;
|
|
14273
14300
|
const scriptContext = {
|
|
@@ -14280,7 +14307,7 @@ async function runEvalCase(options) {
|
|
|
14280
14307
|
};
|
|
14281
14308
|
try {
|
|
14282
14309
|
beforeEachOutput = await executeWorkspaceScript(
|
|
14283
|
-
toScriptConfig(beforeEachHook, "
|
|
14310
|
+
toScriptConfig(beforeEachHook, "before_each", `test '${evalCase.id}'`),
|
|
14284
14311
|
scriptContext
|
|
14285
14312
|
);
|
|
14286
14313
|
} catch (error) {
|
|
@@ -14397,17 +14424,17 @@ async function runEvalCase(options) {
|
|
|
14397
14424
|
}
|
|
14398
14425
|
}
|
|
14399
14426
|
const providerError = extractProviderError(providerResponse);
|
|
14400
|
-
if (repoManager && workspacePath && evalCase.workspace?.hooks?.
|
|
14427
|
+
if (repoManager && workspacePath && evalCase.workspace?.hooks?.after_each?.reset && evalCase.workspace.hooks.after_each.reset !== "none" && evalCase.workspace.repos) {
|
|
14401
14428
|
try {
|
|
14402
14429
|
await repoManager.reset(
|
|
14403
14430
|
evalCase.workspace.repos,
|
|
14404
14431
|
workspacePath,
|
|
14405
|
-
evalCase.workspace.hooks.
|
|
14432
|
+
evalCase.workspace.hooks.after_each.reset
|
|
14406
14433
|
);
|
|
14407
14434
|
} catch {
|
|
14408
14435
|
}
|
|
14409
14436
|
}
|
|
14410
|
-
const caseAfterEachHook = evalCase.workspace?.hooks?.
|
|
14437
|
+
const caseAfterEachHook = evalCase.workspace?.hooks?.after_each;
|
|
14411
14438
|
if (workspacePath && hasHookCommand(caseAfterEachHook)) {
|
|
14412
14439
|
const afterEachHook = caseAfterEachHook;
|
|
14413
14440
|
const scriptContext = {
|
|
@@ -14420,7 +14447,7 @@ async function runEvalCase(options) {
|
|
|
14420
14447
|
};
|
|
14421
14448
|
try {
|
|
14422
14449
|
afterEachOutput = await executeWorkspaceScript(
|
|
14423
|
-
toScriptConfig(afterEachHook, "
|
|
14450
|
+
toScriptConfig(afterEachHook, "after_each", `test '${evalCase.id}'`),
|
|
14424
14451
|
scriptContext,
|
|
14425
14452
|
"warn"
|
|
14426
14453
|
);
|