@agentv/core 2.18.1 → 2.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-I4VQY3XJ.js → chunk-V42NUK73.js} +1 -1
- package/dist/chunk-V42NUK73.js.map +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +54 -21
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +4 -2
- package/dist/index.d.ts +4 -2
- package/dist/index.js +56 -23
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-I4VQY3XJ.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -528,6 +528,8 @@ type WorkspaceHookConfig = {
|
|
|
528
528
|
readonly reset?: 'none' | 'fast' | 'strict';
|
|
529
529
|
};
|
|
530
530
|
type WorkspaceHooksConfig = {
|
|
531
|
+
/** Whether hooks are enabled (default: true). When false, all hooks are skipped. */
|
|
532
|
+
readonly enabled?: boolean;
|
|
531
533
|
/** Runs once before first test in the workspace lifecycle */
|
|
532
534
|
readonly before_all?: WorkspaceHookConfig;
|
|
533
535
|
/** Runs before each test case */
|
|
@@ -2599,7 +2601,7 @@ interface EvalConfig {
|
|
|
2599
2601
|
readonly workers?: number;
|
|
2600
2602
|
/** Maximum retries on failure (default: 2) */
|
|
2601
2603
|
readonly maxRetries?: number;
|
|
2602
|
-
/** Agent timeout in milliseconds
|
|
2604
|
+
/** Agent timeout in milliseconds. No timeout if not set. */
|
|
2603
2605
|
readonly agentTimeoutMs?: number;
|
|
2604
2606
|
/** Enable response caching */
|
|
2605
2607
|
readonly cache?: boolean;
|
|
@@ -2706,7 +2708,7 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
2706
2708
|
workers: z.ZodOptional<z.ZodNumber>;
|
|
2707
2709
|
/** Maximum retries on failure (default: 2) */
|
|
2708
2710
|
maxRetries: z.ZodOptional<z.ZodNumber>;
|
|
2709
|
-
/** Agent timeout in milliseconds
|
|
2711
|
+
/** Agent timeout in milliseconds. No timeout if not set. */
|
|
2710
2712
|
agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
|
|
2711
2713
|
/** Enable verbose logging */
|
|
2712
2714
|
verbose: z.ZodOptional<z.ZodBoolean>;
|
package/dist/index.d.ts
CHANGED
|
@@ -528,6 +528,8 @@ type WorkspaceHookConfig = {
|
|
|
528
528
|
readonly reset?: 'none' | 'fast' | 'strict';
|
|
529
529
|
};
|
|
530
530
|
type WorkspaceHooksConfig = {
|
|
531
|
+
/** Whether hooks are enabled (default: true). When false, all hooks are skipped. */
|
|
532
|
+
readonly enabled?: boolean;
|
|
531
533
|
/** Runs once before first test in the workspace lifecycle */
|
|
532
534
|
readonly before_all?: WorkspaceHookConfig;
|
|
533
535
|
/** Runs before each test case */
|
|
@@ -2599,7 +2601,7 @@ interface EvalConfig {
|
|
|
2599
2601
|
readonly workers?: number;
|
|
2600
2602
|
/** Maximum retries on failure (default: 2) */
|
|
2601
2603
|
readonly maxRetries?: number;
|
|
2602
|
-
/** Agent timeout in milliseconds
|
|
2604
|
+
/** Agent timeout in milliseconds. No timeout if not set. */
|
|
2603
2605
|
readonly agentTimeoutMs?: number;
|
|
2604
2606
|
/** Enable response caching */
|
|
2605
2607
|
readonly cache?: boolean;
|
|
@@ -2706,7 +2708,7 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
2706
2708
|
workers: z.ZodOptional<z.ZodNumber>;
|
|
2707
2709
|
/** Maximum retries on failure (default: 2) */
|
|
2708
2710
|
maxRetries: z.ZodOptional<z.ZodNumber>;
|
|
2709
|
-
/** Agent timeout in milliseconds
|
|
2711
|
+
/** Agent timeout in milliseconds. No timeout if not set. */
|
|
2710
2712
|
agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
|
|
2711
2713
|
/** Enable verbose logging */
|
|
2712
2714
|
verbose: z.ZodOptional<z.ZodBoolean>;
|
package/dist/index.js
CHANGED
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
readTextFile,
|
|
18
18
|
resolveFileReference,
|
|
19
19
|
resolveTargetDefinition
|
|
20
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-V42NUK73.js";
|
|
21
21
|
import {
|
|
22
22
|
OtlpJsonFileExporter
|
|
23
23
|
} from "./chunk-HFSYZHGF.js";
|
|
@@ -3152,11 +3152,13 @@ function parseWorkspaceHookConfig(raw, evalFileDir) {
|
|
|
3152
3152
|
function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
3153
3153
|
if (!isJsonObject(raw)) return void 0;
|
|
3154
3154
|
const obj = raw;
|
|
3155
|
+
const enabled = typeof obj.enabled === "boolean" ? obj.enabled : void 0;
|
|
3155
3156
|
const beforeAll = parseWorkspaceHookConfig(obj.before_all, evalFileDir);
|
|
3156
3157
|
const beforeEach = parseWorkspaceHookConfig(obj.before_each, evalFileDir);
|
|
3157
3158
|
const afterEach = parseWorkspaceHookConfig(obj.after_each, evalFileDir);
|
|
3158
3159
|
const afterAll = parseWorkspaceHookConfig(obj.after_all, evalFileDir);
|
|
3159
3160
|
const hooks = {
|
|
3161
|
+
...enabled !== void 0 && { enabled },
|
|
3160
3162
|
...beforeAll !== void 0 && { before_all: beforeAll },
|
|
3161
3163
|
...beforeEach !== void 0 && { before_each: beforeEach },
|
|
3162
3164
|
...afterEach !== void 0 && { after_each: afterEach },
|
|
@@ -3229,13 +3231,15 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
3229
3231
|
...caseHook ?? {}
|
|
3230
3232
|
};
|
|
3231
3233
|
};
|
|
3234
|
+
const mergedEnabled = caseLevel.hooks?.enabled ?? suiteLevel.hooks?.enabled;
|
|
3232
3235
|
const mergedHooks = {
|
|
3236
|
+
...mergedEnabled !== void 0 && { enabled: mergedEnabled },
|
|
3233
3237
|
before_all: mergeHook(suiteLevel.hooks?.before_all, caseLevel.hooks?.before_all),
|
|
3234
3238
|
before_each: mergeHook(suiteLevel.hooks?.before_each, caseLevel.hooks?.before_each),
|
|
3235
3239
|
after_each: mergeHook(suiteLevel.hooks?.after_each, caseLevel.hooks?.after_each),
|
|
3236
3240
|
after_all: mergeHook(suiteLevel.hooks?.after_all, caseLevel.hooks?.after_all)
|
|
3237
3241
|
};
|
|
3238
|
-
const hasHooks = Object.values(mergedHooks).some((hook) => hook !== void 0);
|
|
3242
|
+
const hasHooks = mergedEnabled !== void 0 || Object.values(mergedHooks).some((hook) => hook !== void 0 && typeof hook === "object");
|
|
3239
3243
|
return {
|
|
3240
3244
|
template: caseLevel.template ?? suiteLevel.template,
|
|
3241
3245
|
isolation: caseLevel.isolation ?? suiteLevel.isolation,
|
|
@@ -6181,14 +6185,18 @@ var PiAgentSdkProvider = class {
|
|
|
6181
6185
|
}
|
|
6182
6186
|
});
|
|
6183
6187
|
try {
|
|
6184
|
-
|
|
6185
|
-
|
|
6186
|
-
|
|
6187
|
-
(
|
|
6188
|
-
|
|
6189
|
-
|
|
6190
|
-
|
|
6191
|
-
|
|
6188
|
+
if (this.config.timeoutMs) {
|
|
6189
|
+
const timeoutMs = this.config.timeoutMs;
|
|
6190
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
6191
|
+
setTimeout(
|
|
6192
|
+
() => reject(new Error(`Pi agent SDK timed out after ${timeoutMs}ms`)),
|
|
6193
|
+
timeoutMs
|
|
6194
|
+
);
|
|
6195
|
+
});
|
|
6196
|
+
await Promise.race([agent.prompt(request.question), timeoutPromise]);
|
|
6197
|
+
} else {
|
|
6198
|
+
await agent.prompt(request.question);
|
|
6199
|
+
}
|
|
6192
6200
|
await agent.waitForIdle();
|
|
6193
6201
|
const agentMessages = agent.state.messages;
|
|
6194
6202
|
for (const msg of agentMessages) {
|
|
@@ -7326,7 +7334,7 @@ import path21 from "node:path";
|
|
|
7326
7334
|
var logged = false;
|
|
7327
7335
|
function getAgentvHome() {
|
|
7328
7336
|
const envHome = process.env.AGENTV_HOME;
|
|
7329
|
-
if (envHome) {
|
|
7337
|
+
if (envHome && envHome !== "undefined") {
|
|
7330
7338
|
if (!logged) {
|
|
7331
7339
|
logged = true;
|
|
7332
7340
|
console.warn(`Using AGENTV_HOME: ${envHome}`);
|
|
@@ -12028,7 +12036,7 @@ function runEqualsAssertion(output, value) {
|
|
|
12028
12036
|
|
|
12029
12037
|
// src/evaluation/orchestrator.ts
|
|
12030
12038
|
import { createHash as createHash2, randomUUID as randomUUID7 } from "node:crypto";
|
|
12031
|
-
import { mkdir as mkdir12, stat as stat7 } from "node:fs/promises";
|
|
12039
|
+
import { mkdir as mkdir12, readdir as readdir6, stat as stat7 } from "node:fs/promises";
|
|
12032
12040
|
import path39 from "node:path";
|
|
12033
12041
|
import micromatch4 from "micromatch";
|
|
12034
12042
|
|
|
@@ -13428,6 +13436,9 @@ function toScriptConfig(hook, hookName, context) {
|
|
|
13428
13436
|
function hasHookCommand(hook) {
|
|
13429
13437
|
return !!(hook?.command && hook.command.length > 0 || hook?.script && hook.script.length > 0);
|
|
13430
13438
|
}
|
|
13439
|
+
function hooksEnabled(workspace) {
|
|
13440
|
+
return workspace?.hooks?.enabled !== false;
|
|
13441
|
+
}
|
|
13431
13442
|
function getWorkspaceTemplate(target) {
|
|
13432
13443
|
const config = target.config;
|
|
13433
13444
|
if ("workspaceTemplate" in config && typeof config.workspaceTemplate === "string") {
|
|
@@ -13646,9 +13657,28 @@ async function runEvaluation(options) {
|
|
|
13646
13657
|
const availablePoolSlots = [];
|
|
13647
13658
|
const poolSlotBaselines = /* @__PURE__ */ new Map();
|
|
13648
13659
|
const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
|
|
13660
|
+
let staticMaterialised = false;
|
|
13649
13661
|
if (useStaticWorkspace && configuredStaticPath) {
|
|
13662
|
+
const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
|
|
13663
|
+
const dirExists = await stat7(configuredStaticPath).then(
|
|
13664
|
+
(s) => s.isDirectory(),
|
|
13665
|
+
() => false
|
|
13666
|
+
);
|
|
13667
|
+
const isEmpty = dirExists ? (await readdir6(configuredStaticPath)).length === 0 : false;
|
|
13668
|
+
if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
|
|
13669
|
+
if (!dirExists) {
|
|
13670
|
+
await mkdir12(configuredStaticPath, { recursive: true });
|
|
13671
|
+
}
|
|
13672
|
+
if (workspaceTemplate) {
|
|
13673
|
+
await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
|
|
13674
|
+
setupLog(`copied template into static workspace: ${configuredStaticPath}`);
|
|
13675
|
+
}
|
|
13676
|
+
staticMaterialised = true;
|
|
13677
|
+
setupLog(`materialised static workspace at: ${configuredStaticPath}`);
|
|
13678
|
+
} else {
|
|
13679
|
+
setupLog(`reusing existing static workspace: ${configuredStaticPath}`);
|
|
13680
|
+
}
|
|
13650
13681
|
sharedWorkspacePath = configuredStaticPath;
|
|
13651
|
-
setupLog(`using static workspace: ${configuredStaticPath}`);
|
|
13652
13682
|
} else if (usePool && suiteWorkspace?.repos) {
|
|
13653
13683
|
const slotsNeeded = workers;
|
|
13654
13684
|
setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
|
|
@@ -13694,7 +13724,8 @@ async function runEvaluation(options) {
|
|
|
13694
13724
|
} catch {
|
|
13695
13725
|
}
|
|
13696
13726
|
}
|
|
13697
|
-
const
|
|
13727
|
+
const needsRepoMaterialisation = !!suiteWorkspace?.repos?.length && !usePool && (!useStaticWorkspace || staticMaterialised);
|
|
13728
|
+
const repoManager = needsRepoMaterialisation ? new RepoManager(verbose) : void 0;
|
|
13698
13729
|
if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
|
|
13699
13730
|
setupLog(
|
|
13700
13731
|
`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
|
|
@@ -13711,8 +13742,9 @@ async function runEvaluation(options) {
|
|
|
13711
13742
|
throw new Error(`Failed to materialize repos: ${message}`);
|
|
13712
13743
|
}
|
|
13713
13744
|
}
|
|
13745
|
+
const suiteHooksEnabled = hooksEnabled(suiteWorkspace);
|
|
13714
13746
|
const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all;
|
|
13715
|
-
if (sharedWorkspacePath && hasHookCommand(suiteBeforeAllHook)) {
|
|
13747
|
+
if (sharedWorkspacePath && suiteHooksEnabled && hasHookCommand(suiteBeforeAllHook)) {
|
|
13716
13748
|
const beforeAllHook = suiteBeforeAllHook;
|
|
13717
13749
|
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
13718
13750
|
setupLog(
|
|
@@ -13739,7 +13771,7 @@ async function runEvaluation(options) {
|
|
|
13739
13771
|
throw new Error(`before_all script failed: ${message}`);
|
|
13740
13772
|
}
|
|
13741
13773
|
}
|
|
13742
|
-
if (availablePoolSlots.length > 0 && hasHookCommand(suiteBeforeAllHook)) {
|
|
13774
|
+
if (availablePoolSlots.length > 0 && suiteHooksEnabled && hasHookCommand(suiteBeforeAllHook)) {
|
|
13743
13775
|
const beforeAllHook = suiteBeforeAllHook;
|
|
13744
13776
|
for (const slot of availablePoolSlots) {
|
|
13745
13777
|
setupLog(`running before_all on pool slot ${slot.index}`);
|
|
@@ -13981,7 +14013,7 @@ async function runEvaluation(options) {
|
|
|
13981
14013
|
}
|
|
13982
14014
|
const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
|
|
13983
14015
|
const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all;
|
|
13984
|
-
if (afterAllWorkspaces.length > 0 && hasHookCommand(suiteAfterAllHook)) {
|
|
14016
|
+
if (afterAllWorkspaces.length > 0 && suiteHooksEnabled && hasHookCommand(suiteAfterAllHook)) {
|
|
13985
14017
|
const afterAllHook = suiteAfterAllHook;
|
|
13986
14018
|
for (const wsPath of afterAllWorkspaces) {
|
|
13987
14019
|
const scriptContext = {
|
|
@@ -14229,6 +14261,7 @@ async function runEvalCase(options) {
|
|
|
14229
14261
|
let afterEachOutput;
|
|
14230
14262
|
const isSharedWorkspace = !!sharedWorkspacePath;
|
|
14231
14263
|
let caseWorkspaceFile;
|
|
14264
|
+
const caseHooksEnabled = hooksEnabled(evalCase.workspace);
|
|
14232
14265
|
if (!workspacePath) {
|
|
14233
14266
|
const rawCaseTemplate = evalCase.workspace?.template ?? getWorkspaceTemplate(target);
|
|
14234
14267
|
const resolvedCaseTemplate = await resolveWorkspaceTemplate(rawCaseTemplate);
|
|
@@ -14290,7 +14323,7 @@ async function runEvalCase(options) {
|
|
|
14290
14323
|
}
|
|
14291
14324
|
}
|
|
14292
14325
|
const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all;
|
|
14293
|
-
if (workspacePath && hasHookCommand(caseBeforeAllHook)) {
|
|
14326
|
+
if (workspacePath && caseHooksEnabled && hasHookCommand(caseBeforeAllHook)) {
|
|
14294
14327
|
const beforeAllHook = caseBeforeAllHook;
|
|
14295
14328
|
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
14296
14329
|
if (setupDebug) {
|
|
@@ -14334,7 +14367,7 @@ async function runEvalCase(options) {
|
|
|
14334
14367
|
}
|
|
14335
14368
|
}
|
|
14336
14369
|
const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each;
|
|
14337
|
-
if (workspacePath && hasHookCommand(caseBeforeEachHook)) {
|
|
14370
|
+
if (workspacePath && caseHooksEnabled && hasHookCommand(caseBeforeEachHook)) {
|
|
14338
14371
|
const beforeEachHook = caseBeforeEachHook;
|
|
14339
14372
|
const scriptContext = {
|
|
14340
14373
|
workspacePath,
|
|
@@ -14463,7 +14496,7 @@ async function runEvalCase(options) {
|
|
|
14463
14496
|
}
|
|
14464
14497
|
}
|
|
14465
14498
|
const providerError = extractProviderError(providerResponse);
|
|
14466
|
-
if (repoManager && workspacePath && evalCase.workspace?.hooks?.after_each?.reset && evalCase.workspace.hooks.after_each.reset !== "none" && evalCase.workspace.repos) {
|
|
14499
|
+
if (caseHooksEnabled && repoManager && workspacePath && evalCase.workspace?.hooks?.after_each?.reset && evalCase.workspace.hooks.after_each.reset !== "none" && evalCase.workspace.repos) {
|
|
14467
14500
|
try {
|
|
14468
14501
|
await repoManager.reset(
|
|
14469
14502
|
evalCase.workspace.repos,
|
|
@@ -14474,7 +14507,7 @@ async function runEvalCase(options) {
|
|
|
14474
14507
|
}
|
|
14475
14508
|
}
|
|
14476
14509
|
const caseAfterEachHook = evalCase.workspace?.hooks?.after_each;
|
|
14477
|
-
if (workspacePath && hasHookCommand(caseAfterEachHook)) {
|
|
14510
|
+
if (workspacePath && caseHooksEnabled && hasHookCommand(caseAfterEachHook)) {
|
|
14478
14511
|
const afterEachHook = caseAfterEachHook;
|
|
14479
14512
|
const scriptContext = {
|
|
14480
14513
|
workspacePath,
|
|
@@ -15253,7 +15286,7 @@ async function evaluate(config) {
|
|
|
15253
15286
|
repoRoot,
|
|
15254
15287
|
target: resolvedTarget,
|
|
15255
15288
|
maxRetries: config.maxRetries ?? 2,
|
|
15256
|
-
agentTimeoutMs: config.agentTimeoutMs
|
|
15289
|
+
agentTimeoutMs: config.agentTimeoutMs,
|
|
15257
15290
|
verbose: config.verbose,
|
|
15258
15291
|
maxConcurrency: config.workers ?? 3,
|
|
15259
15292
|
filter: config.filter,
|
|
@@ -15356,7 +15389,7 @@ var AgentVConfigSchema = z5.object({
|
|
|
15356
15389
|
workers: z5.number().int().min(1).max(50).optional(),
|
|
15357
15390
|
/** Maximum retries on failure (default: 2) */
|
|
15358
15391
|
maxRetries: z5.number().int().min(0).optional(),
|
|
15359
|
-
/** Agent timeout in milliseconds
|
|
15392
|
+
/** Agent timeout in milliseconds. No timeout if not set. */
|
|
15360
15393
|
agentTimeoutMs: z5.number().int().min(0).optional(),
|
|
15361
15394
|
/** Enable verbose logging */
|
|
15362
15395
|
verbose: z5.boolean().optional(),
|