@agentv/core 0.2.8 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -900,6 +900,9 @@ function normalizeAzureApiVersion(value) {
900
900
  function resolveTargetDefinition(definition, env = process.env) {
901
901
  const parsed = BASE_TARGET_SCHEMA.parse(definition);
902
902
  const provider = parsed.provider.toLowerCase();
903
+ const providerBatching = resolveOptionalBoolean(
904
+ parsed.settings?.provider_batching ?? parsed.settings?.providerBatching
905
+ );
903
906
  switch (provider) {
904
907
  case "azure":
905
908
  case "azure-openai":
@@ -908,6 +911,7 @@ function resolveTargetDefinition(definition, env = process.env) {
908
911
  name: parsed.name,
909
912
  judgeTarget: parsed.judge_target,
910
913
  workers: parsed.workers,
914
+ providerBatching,
911
915
  config: resolveAzureConfig(parsed, env)
912
916
  };
913
917
  case "anthropic":
@@ -916,6 +920,7 @@ function resolveTargetDefinition(definition, env = process.env) {
916
920
  name: parsed.name,
917
921
  judgeTarget: parsed.judge_target,
918
922
  workers: parsed.workers,
923
+ providerBatching,
919
924
  config: resolveAnthropicConfig(parsed, env)
920
925
  };
921
926
  case "gemini":
@@ -926,6 +931,7 @@ function resolveTargetDefinition(definition, env = process.env) {
926
931
  name: parsed.name,
927
932
  judgeTarget: parsed.judge_target,
928
933
  workers: parsed.workers,
934
+ providerBatching,
929
935
  config: resolveGeminiConfig(parsed, env)
930
936
  };
931
937
  case "mock":
@@ -934,6 +940,7 @@ function resolveTargetDefinition(definition, env = process.env) {
934
940
  name: parsed.name,
935
941
  judgeTarget: parsed.judge_target,
936
942
  workers: parsed.workers,
943
+ providerBatching,
937
944
  config: resolveMockConfig(parsed)
938
945
  };
939
946
  case "vscode":
@@ -943,6 +950,7 @@ function resolveTargetDefinition(definition, env = process.env) {
943
950
  name: parsed.name,
944
951
  judgeTarget: parsed.judge_target,
945
952
  workers: parsed.workers,
953
+ providerBatching,
946
954
  config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
947
955
  };
948
956
  default:
@@ -1134,6 +1142,7 @@ var VSCodeProvider = class {
1134
1142
  id;
1135
1143
  kind;
1136
1144
  targetName;
1145
+ supportsBatch = true;
1137
1146
  config;
1138
1147
  constructor(targetName, config, kind) {
1139
1148
  this.id = `${kind}:${targetName}`;
@@ -1180,38 +1189,102 @@ var VSCodeProvider = class {
1180
1189
  }
1181
1190
  };
1182
1191
  }
1192
+ async invokeBatch(requests) {
1193
+ if (requests.length === 0) {
1194
+ return [];
1195
+ }
1196
+ const normalizedRequests = requests.map((req) => ({
1197
+ request: req,
1198
+ attachments: normalizeAttachments(req.attachments)
1199
+ }));
1200
+ const combinedAttachments = mergeAttachments(
1201
+ normalizedRequests.map(({ attachments }) => attachments)
1202
+ );
1203
+ const userQueries = normalizedRequests.map(
1204
+ ({ request, attachments }) => buildPromptDocument(request, attachments, request.guideline_patterns)
1205
+ );
1206
+ const session = await (0, import_subagent.dispatchBatchAgent)({
1207
+ userQueries,
1208
+ extraAttachments: combinedAttachments,
1209
+ wait: this.config.waitForResponse,
1210
+ dryRun: this.config.dryRun,
1211
+ vscodeCmd: this.config.command,
1212
+ subagentRoot: this.config.subagentRoot,
1213
+ workspaceTemplate: this.config.workspaceTemplate,
1214
+ silent: true
1215
+ });
1216
+ if (session.exitCode !== 0 || !session.responseFiles) {
1217
+ const failure = session.error ?? "VS Code subagent did not produce batch responses";
1218
+ throw new Error(failure);
1219
+ }
1220
+ if (this.config.dryRun) {
1221
+ return normalizedRequests.map(({ attachments }) => ({
1222
+ text: "",
1223
+ raw: {
1224
+ session,
1225
+ attachments,
1226
+ allAttachments: combinedAttachments
1227
+ }
1228
+ }));
1229
+ }
1230
+ if (session.responseFiles.length !== requests.length) {
1231
+ throw new Error(
1232
+ `VS Code batch returned ${session.responseFiles.length} responses for ${requests.length} requests`
1233
+ );
1234
+ }
1235
+ const responses = [];
1236
+ for (const [index, responseFile] of session.responseFiles.entries()) {
1237
+ const responseText = await (0, import_promises3.readFile)(responseFile, "utf8");
1238
+ responses.push({
1239
+ text: responseText,
1240
+ raw: {
1241
+ session,
1242
+ attachments: normalizedRequests[index]?.attachments,
1243
+ allAttachments: combinedAttachments,
1244
+ responseFile
1245
+ }
1246
+ });
1247
+ }
1248
+ return responses;
1249
+ }
1183
1250
  };
1184
1251
  function buildPromptDocument(request, attachments, guidelinePatterns) {
1185
1252
  const parts = [];
1186
1253
  const guidelineFiles = collectGuidelineFiles(attachments, guidelinePatterns);
1187
- if (guidelineFiles.length > 0) {
1188
- parts.push("\n", buildMandatoryPrereadBlock(guidelineFiles));
1254
+ const attachmentFiles = collectAttachmentFiles(attachments);
1255
+ const nonGuidelineAttachments = attachmentFiles.filter(
1256
+ (file) => !guidelineFiles.includes(file)
1257
+ );
1258
+ const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineAttachments);
1259
+ if (prereadBlock.length > 0) {
1260
+ parts.push("\n", prereadBlock);
1189
1261
  }
1190
1262
  parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
1191
1263
  return parts.join("\n").trim();
1192
1264
  }
1193
- function buildMandatoryPrereadBlock(guidelineFiles) {
1194
- if (guidelineFiles.length === 0) {
1265
+ function buildMandatoryPrereadBlock(guidelineFiles, attachmentFiles) {
1266
+ if (guidelineFiles.length === 0 && attachmentFiles.length === 0) {
1195
1267
  return "";
1196
1268
  }
1197
- const fileList = [];
1198
- let counter = 0;
1199
- for (const absolutePath of guidelineFiles) {
1200
- counter += 1;
1269
+ const buildList = (files) => files.map((absolutePath) => {
1201
1270
  const fileName = import_node_path3.default.basename(absolutePath);
1202
1271
  const fileUri = pathToFileUri(absolutePath);
1203
- fileList.push(`* [${fileName}](${fileUri})`);
1204
- }
1205
- const filesText = fileList.join("\n");
1206
- const instruction = [
1207
- `Read all guideline files:
1208
- ${filesText}.
1209
- `,
1210
- `If any file is missing, fail with ERROR: missing-file <filename> and stop.
1211
- `,
1212
- `Then apply system_instructions on the user query below.`
1213
- ].join("");
1214
- return `${instruction}`;
1272
+ return `* [${fileName}](${fileUri})`;
1273
+ });
1274
+ const sections = [];
1275
+ if (guidelineFiles.length > 0) {
1276
+ sections.push(`Read all guideline files:
1277
+ ${buildList(guidelineFiles).join("\n")}.`);
1278
+ }
1279
+ if (attachmentFiles.length > 0) {
1280
+ sections.push(`Read all attachment files:
1281
+ ${buildList(attachmentFiles).join("\n")}.`);
1282
+ }
1283
+ sections.push(
1284
+ "If any file is missing, fail with ERROR: missing-file <filename> and stop.",
1285
+ "Then apply system_instructions on the user query below."
1286
+ );
1287
+ return sections.join("\n");
1215
1288
  }
1216
1289
  function collectGuidelineFiles(attachments, guidelinePatterns) {
1217
1290
  if (!attachments || attachments.length === 0) {
@@ -1229,6 +1302,19 @@ function collectGuidelineFiles(attachments, guidelinePatterns) {
1229
1302
  }
1230
1303
  return Array.from(unique.values());
1231
1304
  }
1305
+ function collectAttachmentFiles(attachments) {
1306
+ if (!attachments || attachments.length === 0) {
1307
+ return [];
1308
+ }
1309
+ const unique = /* @__PURE__ */ new Map();
1310
+ for (const attachment of attachments) {
1311
+ const absolutePath = import_node_path3.default.resolve(attachment);
1312
+ if (!unique.has(absolutePath)) {
1313
+ unique.set(absolutePath, absolutePath);
1314
+ }
1315
+ }
1316
+ return Array.from(unique.values());
1317
+ }
1232
1318
  function pathToFileUri(filePath) {
1233
1319
  const absolutePath = import_node_path3.default.isAbsolute(filePath) ? filePath : import_node_path3.default.resolve(filePath);
1234
1320
  const normalizedPath = absolutePath.replace(/\\/g, "/");
@@ -1247,6 +1333,16 @@ function normalizeAttachments(attachments) {
1247
1333
  }
1248
1334
  return Array.from(deduped);
1249
1335
  }
1336
+ function mergeAttachments(all) {
1337
+ const deduped = /* @__PURE__ */ new Set();
1338
+ for (const list of all) {
1339
+ if (!list) continue;
1340
+ for (const attachment of list) {
1341
+ deduped.add(import_node_path3.default.resolve(attachment));
1342
+ }
1343
+ }
1344
+ return deduped.size > 0 ? Array.from(deduped) : void 0;
1345
+ }
1250
1346
  async function ensureVSCodeSubagents(options) {
1251
1347
  const { kind, count, verbose = false } = options;
1252
1348
  const vscodeCmd = kind === "vscode-insiders" ? "code-insiders" : "code";
@@ -1981,6 +2077,12 @@ async function runEvaluation(options) {
1981
2077
  };
1982
2078
  const graderRegistry = buildGraderRegistry(graders, resolveJudgeProvider);
1983
2079
  const primaryProvider = getOrCreateProvider(target);
2080
+ const providerSupportsBatch = target.providerBatching === true && primaryProvider.supportsBatch === true && typeof primaryProvider.invokeBatch === "function";
2081
+ if (target.providerBatching && !providerSupportsBatch && verbose) {
2082
+ console.warn(
2083
+ `Provider batching requested for target '${target.name}', but provider does not advertise batch support. Using per-case dispatch.`
2084
+ );
2085
+ }
1984
2086
  if (onProgress && filteredEvalCases.length > 0) {
1985
2087
  for (let i = 0; i < filteredEvalCases.length; i++) {
1986
2088
  await onProgress({
@@ -1990,6 +2092,27 @@ async function runEvaluation(options) {
1990
2092
  });
1991
2093
  }
1992
2094
  }
2095
+ if (providerSupportsBatch) {
2096
+ try {
2097
+ return await runBatchEvaluation({
2098
+ evalCases: filteredEvalCases,
2099
+ provider: primaryProvider,
2100
+ target,
2101
+ graderRegistry,
2102
+ promptDumpDir,
2103
+ nowFn: now ?? (() => /* @__PURE__ */ new Date()),
2104
+ onProgress,
2105
+ onResult,
2106
+ verbose,
2107
+ resolveJudgeProvider
2108
+ });
2109
+ } catch (error) {
2110
+ if (verbose) {
2111
+ const message = error instanceof Error ? error.message : String(error);
2112
+ console.warn(`Provider batch execution failed, falling back to per-case dispatch: ${message}`);
2113
+ }
2114
+ }
2115
+ }
1993
2116
  const workers = options.maxConcurrency ?? target.workers ?? 1;
1994
2117
  const limit = pLimit(workers);
1995
2118
  let nextWorkerId = 1;
@@ -2073,6 +2196,137 @@ async function runEvaluation(options) {
2073
2196
  }
2074
2197
  return results;
2075
2198
  }
2199
+ async function runBatchEvaluation(options) {
2200
+ const {
2201
+ evalCases,
2202
+ provider,
2203
+ target,
2204
+ graderRegistry,
2205
+ promptDumpDir,
2206
+ nowFn,
2207
+ onProgress,
2208
+ onResult,
2209
+ resolveJudgeProvider
2210
+ } = options;
2211
+ const promptInputsList = [];
2212
+ for (const evalCase of evalCases) {
2213
+ const promptInputs = await buildPromptInputs(evalCase);
2214
+ if (promptDumpDir) {
2215
+ await dumpPrompt(promptDumpDir, evalCase, promptInputs);
2216
+ }
2217
+ promptInputsList.push(promptInputs);
2218
+ }
2219
+ const batchRequests = evalCases.map((evalCase, index) => {
2220
+ const promptInputs = promptInputsList[index];
2221
+ return {
2222
+ prompt: promptInputs.request,
2223
+ guidelines: promptInputs.guidelines,
2224
+ guideline_patterns: evalCase.guideline_patterns,
2225
+ attachments: evalCase.file_paths,
2226
+ evalCaseId: evalCase.id,
2227
+ metadata: {
2228
+ systemPrompt: promptInputs.systemMessage ?? ""
2229
+ }
2230
+ };
2231
+ });
2232
+ const batchResponse = await provider.invokeBatch?.(batchRequests);
2233
+ if (!Array.isArray(batchResponse)) {
2234
+ throw new Error("Provider batching failed: invokeBatch did not return an array");
2235
+ }
2236
+ if (batchResponse.length !== evalCases.length) {
2237
+ throw new Error(
2238
+ `Provider batching failed: expected ${evalCases.length} responses, received ${batchResponse.length}`
2239
+ );
2240
+ }
2241
+ if (onProgress) {
2242
+ const startedAt = Date.now();
2243
+ for (let i = 0; i < evalCases.length; i++) {
2244
+ await onProgress({
2245
+ workerId: 1,
2246
+ evalId: evalCases[i].id,
2247
+ status: "running",
2248
+ startedAt
2249
+ });
2250
+ }
2251
+ }
2252
+ const results = [];
2253
+ for (let i = 0; i < evalCases.length; i++) {
2254
+ const evalCase = evalCases[i];
2255
+ const promptInputs = promptInputsList[i];
2256
+ const providerResponse = batchResponse[i];
2257
+ const now = nowFn();
2258
+ const graderKind = evalCase.grader ?? "heuristic";
2259
+ const activeGrader = graderRegistry[graderKind] ?? graderRegistry.heuristic;
2260
+ if (!activeGrader) {
2261
+ throw new Error(`No grader registered for kind '${graderKind}'`);
2262
+ }
2263
+ let grade;
2264
+ try {
2265
+ grade = await activeGrader.grade({
2266
+ evalCase,
2267
+ candidate: providerResponse.text ?? "",
2268
+ target,
2269
+ provider,
2270
+ attempt: 0,
2271
+ promptInputs,
2272
+ now,
2273
+ judgeProvider: await resolveJudgeProvider(target)
2274
+ });
2275
+ } catch (error) {
2276
+ const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
2277
+ results.push(errorResult);
2278
+ if (onResult) {
2279
+ await onResult(errorResult);
2280
+ }
2281
+ if (onProgress) {
2282
+ await onProgress({
2283
+ workerId: 1,
2284
+ evalId: evalCase.id,
2285
+ status: "failed",
2286
+ completedAt: Date.now(),
2287
+ error: error instanceof Error ? error.message : String(error)
2288
+ });
2289
+ }
2290
+ continue;
2291
+ }
2292
+ const completedAt = nowFn();
2293
+ const rawRequest = {
2294
+ request: promptInputs.request,
2295
+ guidelines: promptInputs.guidelines,
2296
+ guideline_paths: evalCase.guideline_paths,
2297
+ system_message: promptInputs.systemMessage ?? ""
2298
+ };
2299
+ const result = {
2300
+ eval_id: evalCase.id,
2301
+ conversation_id: evalCase.conversation_id,
2302
+ score: grade.score,
2303
+ hits: grade.hits,
2304
+ misses: grade.misses,
2305
+ model_answer: providerResponse.text ?? "",
2306
+ expected_aspect_count: grade.expectedAspectCount,
2307
+ target: target.name,
2308
+ timestamp: completedAt.toISOString(),
2309
+ reasoning: grade.reasoning,
2310
+ raw_aspects: grade.rawAspects,
2311
+ raw_request: rawRequest,
2312
+ grader_raw_request: grade.graderRawRequest
2313
+ };
2314
+ results.push(result);
2315
+ if (onResult) {
2316
+ await onResult(result);
2317
+ }
2318
+ if (onProgress) {
2319
+ await onProgress({
2320
+ workerId: 1,
2321
+ evalId: evalCase.id,
2322
+ status: "completed",
2323
+ startedAt: 0,
2324
+ completedAt: Date.now()
2325
+ });
2326
+ }
2327
+ }
2328
+ return results;
2329
+ }
2076
2330
  async function runEvalCase(options) {
2077
2331
  const {
2078
2332
  evalCase,