@agentv/core 0.13.0 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -814,13 +814,12 @@ ${guidelineContent.trim()}`);
814
814
  const segments = segmentsByMessage[i];
815
815
  const contentParts = [];
816
816
  let role = message.role;
817
- let name;
818
817
  if (role === "system") {
819
818
  role = "assistant";
820
819
  contentParts.push("@[System]:");
821
820
  } else if (role === "tool") {
822
- role = "function";
823
- name = "tool";
821
+ role = "assistant";
822
+ contentParts.push("@[Tool]:");
824
823
  }
825
824
  for (const segment of segments) {
826
825
  if (segment.type === "guideline_ref") {
@@ -838,10 +837,10 @@ ${guidelineContent.trim()}`);
838
837
  if (contentParts.length === 0) {
839
838
  continue;
840
839
  }
840
+ const content = contentParts.join("\n");
841
841
  chatPrompt.push({
842
842
  role,
843
- content: contentParts.join("\n"),
844
- ...name ? { name } : {}
843
+ content
845
844
  });
846
845
  }
847
846
  return chatPrompt.length > 0 ? chatPrompt : void 0;
@@ -1122,68 +1121,225 @@ async function resolveFileReference2(rawValue, searchRoots) {
1122
1121
  return { displayPath, attempted };
1123
1122
  }
1124
1123
 
1125
- // src/evaluation/providers/ax.ts
1126
- var import_ax = require("@ax-llm/ax");
1124
+ // src/evaluation/providers/ai-sdk.ts
1125
+ var import_anthropic = require("@ai-sdk/anthropic");
1126
+ var import_azure = require("@ai-sdk/azure");
1127
+ var import_google = require("@ai-sdk/google");
1128
+ var import_ai = require("ai");
1127
1129
  var DEFAULT_SYSTEM_PROMPT = "You are a careful assistant. Follow all provided instructions and do not fabricate results.";
1130
+ var AzureProvider = class {
1131
+ constructor(targetName, config) {
1132
+ this.config = config;
1133
+ this.id = `azure:${targetName}`;
1134
+ this.targetName = targetName;
1135
+ this.defaults = {
1136
+ temperature: config.temperature,
1137
+ maxOutputTokens: config.maxOutputTokens
1138
+ };
1139
+ this.retryConfig = config.retry;
1140
+ const azure = (0, import_azure.createAzure)(buildAzureOptions(config));
1141
+ this.model = azure(config.deploymentName);
1142
+ }
1143
+ id;
1144
+ kind = "azure";
1145
+ targetName;
1146
+ model;
1147
+ defaults;
1148
+ retryConfig;
1149
+ async invoke(request) {
1150
+ return invokeModel({
1151
+ model: this.model,
1152
+ request,
1153
+ defaults: this.defaults,
1154
+ retryConfig: this.retryConfig
1155
+ });
1156
+ }
1157
+ };
1158
+ var AnthropicProvider = class {
1159
+ constructor(targetName, config) {
1160
+ this.config = config;
1161
+ this.id = `anthropic:${targetName}`;
1162
+ this.targetName = targetName;
1163
+ this.defaults = {
1164
+ temperature: config.temperature,
1165
+ maxOutputTokens: config.maxOutputTokens,
1166
+ thinkingBudget: config.thinkingBudget
1167
+ };
1168
+ this.retryConfig = config.retry;
1169
+ const anthropic = (0, import_anthropic.createAnthropic)({
1170
+ apiKey: config.apiKey
1171
+ });
1172
+ this.model = anthropic(config.model);
1173
+ }
1174
+ id;
1175
+ kind = "anthropic";
1176
+ targetName;
1177
+ model;
1178
+ defaults;
1179
+ retryConfig;
1180
+ async invoke(request) {
1181
+ const providerOptions = buildAnthropicProviderOptions(this.defaults);
1182
+ return invokeModel({
1183
+ model: this.model,
1184
+ request,
1185
+ defaults: this.defaults,
1186
+ retryConfig: this.retryConfig,
1187
+ providerOptions
1188
+ });
1189
+ }
1190
+ };
1191
+ var GeminiProvider = class {
1192
+ constructor(targetName, config) {
1193
+ this.config = config;
1194
+ this.id = `gemini:${targetName}`;
1195
+ this.targetName = targetName;
1196
+ this.defaults = {
1197
+ temperature: config.temperature,
1198
+ maxOutputTokens: config.maxOutputTokens
1199
+ };
1200
+ this.retryConfig = config.retry;
1201
+ const google = (0, import_google.createGoogleGenerativeAI)({
1202
+ apiKey: config.apiKey
1203
+ });
1204
+ this.model = google(config.model);
1205
+ }
1206
+ id;
1207
+ kind = "gemini";
1208
+ targetName;
1209
+ model;
1210
+ defaults;
1211
+ retryConfig;
1212
+ async invoke(request) {
1213
+ return invokeModel({
1214
+ model: this.model,
1215
+ request,
1216
+ defaults: this.defaults,
1217
+ retryConfig: this.retryConfig
1218
+ });
1219
+ }
1220
+ };
1221
+ function buildAzureOptions(config) {
1222
+ const options = {
1223
+ apiKey: config.apiKey,
1224
+ apiVersion: config.version,
1225
+ useDeploymentBasedUrls: true
1226
+ };
1227
+ const baseURL = normalizeAzureBaseUrl(config.resourceName);
1228
+ if (baseURL) {
1229
+ options.baseURL = baseURL;
1230
+ } else {
1231
+ options.resourceName = config.resourceName;
1232
+ }
1233
+ return options;
1234
+ }
1235
+ function normalizeAzureBaseUrl(resourceName) {
1236
+ const trimmed = resourceName.trim();
1237
+ if (!/^https?:\/\//i.test(trimmed)) {
1238
+ return void 0;
1239
+ }
1240
+ const withoutSlash = trimmed.replace(/\/+$/, "");
1241
+ const normalized = withoutSlash.endsWith("/openai") ? withoutSlash : `${withoutSlash}/openai`;
1242
+ return normalized;
1243
+ }
1244
+ function buildAnthropicProviderOptions(defaults) {
1245
+ if (defaults.thinkingBudget === void 0) {
1246
+ return void 0;
1247
+ }
1248
+ return {
1249
+ anthropic: {
1250
+ thinking: {
1251
+ type: "enabled",
1252
+ budgetTokens: defaults.thinkingBudget
1253
+ }
1254
+ }
1255
+ };
1256
+ }
1128
1257
  function buildChatPrompt(request) {
1129
- if (request.chatPrompt) {
1130
- const hasSystemMessage = request.chatPrompt.some((message) => message.role === "system");
1258
+ const provided = request.chatPrompt?.length ? request.chatPrompt : void 0;
1259
+ if (provided) {
1260
+ const hasSystemMessage = provided.some((message) => message.role === "system");
1131
1261
  if (hasSystemMessage) {
1132
- return request.chatPrompt;
1262
+ return provided;
1133
1263
  }
1134
- const systemContent2 = resolveSystemContent(request);
1135
- return [{ role: "system", content: systemContent2 }, ...request.chatPrompt];
1264
+ const systemContent2 = resolveSystemContent(request, false);
1265
+ return [{ role: "system", content: systemContent2 }, ...provided];
1136
1266
  }
1137
- const systemContent = resolveSystemContent(request);
1267
+ const systemContent = resolveSystemContent(request, true);
1138
1268
  const userContent = request.question.trim();
1139
1269
  const prompt = [
1140
- {
1141
- role: "system",
1142
- content: systemContent
1143
- },
1144
- {
1145
- role: "user",
1146
- content: userContent
1147
- }
1270
+ { role: "system", content: systemContent },
1271
+ { role: "user", content: userContent }
1148
1272
  ];
1149
1273
  return prompt;
1150
1274
  }
1151
- function resolveSystemContent(request) {
1275
+ function resolveSystemContent(request, includeGuidelines) {
1152
1276
  const systemSegments = [];
1153
1277
  if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
1154
1278
  systemSegments.push(request.systemPrompt.trim());
1155
1279
  } else {
1156
1280
  systemSegments.push(DEFAULT_SYSTEM_PROMPT);
1157
1281
  }
1158
- if (request.guidelines && request.guidelines.trim().length > 0) {
1282
+ if (includeGuidelines && request.guidelines && request.guidelines.trim().length > 0) {
1159
1283
  systemSegments.push(`[[ ## Guidelines ## ]]
1160
1284
 
1161
1285
  ${request.guidelines.trim()}`);
1162
1286
  }
1163
1287
  return systemSegments.join("\n\n");
1164
1288
  }
1165
- function extractModelConfig(request, defaults) {
1289
+ function toModelMessages(chatPrompt) {
1290
+ return chatPrompt.map((message) => {
1291
+ if (message.role === "tool" || message.role === "function") {
1292
+ const prefix = message.name ? `@[${message.name}]: ` : "@[Tool]: ";
1293
+ return {
1294
+ role: "assistant",
1295
+ content: `${prefix}${message.content}`
1296
+ };
1297
+ }
1298
+ if (message.role === "assistant" || message.role === "system" || message.role === "user") {
1299
+ return {
1300
+ role: message.role,
1301
+ content: message.content
1302
+ };
1303
+ }
1304
+ return {
1305
+ role: "user",
1306
+ content: message.content
1307
+ };
1308
+ });
1309
+ }
1310
+ function resolveModelSettings(request, defaults) {
1166
1311
  const temperature = request.temperature ?? defaults.temperature;
1167
- const maxTokens = request.maxOutputTokens ?? defaults.maxOutputTokens;
1168
- const config = {};
1169
- if (temperature !== void 0) {
1170
- config.temperature = temperature;
1171
- }
1172
- if (maxTokens !== void 0) {
1173
- config.maxTokens = maxTokens;
1174
- }
1175
- return Object.keys(config).length > 0 ? config : void 0;
1312
+ const maxOutputTokens = request.maxOutputTokens ?? defaults.maxOutputTokens;
1313
+ return {
1314
+ temperature,
1315
+ maxOutputTokens
1316
+ };
1317
+ }
1318
+ async function invokeModel(options) {
1319
+ const { model, request, defaults, retryConfig, providerOptions } = options;
1320
+ const chatPrompt = buildChatPrompt(request);
1321
+ const { temperature, maxOutputTokens } = resolveModelSettings(request, defaults);
1322
+ const result = await withRetry(
1323
+ () => (0, import_ai.generateText)({
1324
+ model,
1325
+ messages: toModelMessages(chatPrompt),
1326
+ temperature,
1327
+ maxOutputTokens,
1328
+ maxRetries: 0,
1329
+ abortSignal: request.signal,
1330
+ ...providerOptions ? { providerOptions } : {}
1331
+ }),
1332
+ retryConfig,
1333
+ request.signal
1334
+ );
1335
+ return mapResponse(result);
1176
1336
  }
1177
- function mapResponse(response) {
1178
- const primary = response.results[0];
1179
- const text = typeof primary?.content === "string" ? primary.content : "";
1180
- const reasoning = primary?.thought ?? primary?.thoughtBlock?.data;
1181
- const usage = toJsonObject(response.modelUsage);
1337
+ function mapResponse(result) {
1182
1338
  return {
1183
- text,
1184
- reasoning,
1185
- raw: response,
1186
- usage
1339
+ text: result.text ?? "",
1340
+ reasoning: result.reasoningText ?? void 0,
1341
+ raw: result,
1342
+ usage: toJsonObject(result.totalUsage ?? result.usage)
1187
1343
  };
1188
1344
  }
1189
1345
  function toJsonObject(value) {
@@ -1196,34 +1352,59 @@ function toJsonObject(value) {
1196
1352
  return void 0;
1197
1353
  }
1198
1354
  }
1199
- function ensureChatResponse(result) {
1200
- if (typeof ReadableStream !== "undefined" && result instanceof ReadableStream) {
1201
- throw new Error("Streaming responses are not supported for this provider");
1355
+ function extractStatus(error) {
1356
+ if (!error || typeof error !== "object") {
1357
+ return void 0;
1358
+ }
1359
+ const candidate = error;
1360
+ const directStatus = candidate.status ?? candidate.statusCode;
1361
+ if (typeof directStatus === "number" && Number.isFinite(directStatus)) {
1362
+ return directStatus;
1202
1363
  }
1203
- if (!result || typeof result !== "object" || !("results" in result)) {
1204
- throw new Error("Unexpected response type from AxAI provider");
1364
+ const responseStatus = typeof candidate.response === "object" && candidate.response ? candidate.response.status : void 0;
1365
+ if (typeof responseStatus === "number" && Number.isFinite(responseStatus)) {
1366
+ return responseStatus;
1205
1367
  }
1206
- return result;
1368
+ const message = typeof candidate.message === "string" ? candidate.message : void 0;
1369
+ if (message) {
1370
+ const match = message.match(/HTTP\s+(\d{3})/i);
1371
+ if (match) {
1372
+ const parsed = Number.parseInt(match[1], 10);
1373
+ if (Number.isFinite(parsed)) {
1374
+ return parsed;
1375
+ }
1376
+ }
1377
+ }
1378
+ return void 0;
1207
1379
  }
1208
- function isRetryableError(error, retryableStatusCodes) {
1380
+ function isNetworkError(error) {
1209
1381
  if (!error || typeof error !== "object") {
1210
1382
  return false;
1211
1383
  }
1212
- if ("status" in error && typeof error.status === "number") {
1213
- return retryableStatusCodes.includes(error.status);
1384
+ const candidate = error;
1385
+ if (candidate.name === "AbortError") {
1386
+ return false;
1214
1387
  }
1215
- if ("message" in error && typeof error.message === "string") {
1216
- const match = error.message.match(/HTTP (\d{3})/);
1217
- if (match) {
1218
- const status = Number.parseInt(match[1], 10);
1219
- return retryableStatusCodes.includes(status);
1220
- }
1388
+ const code = candidate.code;
1389
+ if (typeof code === "string" && /^E(AI|CONN|HOST|NET|PIPE|TIME|REFUSED|RESET)/i.test(code)) {
1390
+ return true;
1221
1391
  }
1222
- if ("name" in error && error.name === "AxAIServiceNetworkError") {
1392
+ const message = typeof candidate.message === "string" ? candidate.message : void 0;
1393
+ if (message && /(network|fetch failed|ECONNRESET|ENOTFOUND|EAI_AGAIN|ETIMEDOUT|ECONNREFUSED)/i.test(message)) {
1223
1394
  return true;
1224
1395
  }
1225
1396
  return false;
1226
1397
  }
1398
+ function isRetryableError(error, retryableStatusCodes) {
1399
+ const status = extractStatus(error);
1400
+ if (status === 401 || status === 403) {
1401
+ return false;
1402
+ }
1403
+ if (typeof status === "number") {
1404
+ return retryableStatusCodes.includes(status);
1405
+ }
1406
+ return isNetworkError(error);
1407
+ }
1227
1408
  function calculateRetryDelay(attempt, config) {
1228
1409
  const delay = Math.min(
1229
1410
  config.maxDelayMs,
@@ -1259,146 +1440,10 @@ async function withRetry(fn, retryConfig, signal) {
1259
1440
  }
1260
1441
  const delay = calculateRetryDelay(attempt, config);
1261
1442
  await sleep(delay);
1262
- if (signal?.aborted) {
1263
- throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
1264
- }
1265
1443
  }
1266
1444
  }
1267
1445
  throw lastError;
1268
1446
  }
1269
- var AzureProvider = class {
1270
- constructor(targetName, config) {
1271
- this.config = config;
1272
- this.id = `azure:${targetName}`;
1273
- this.targetName = targetName;
1274
- this.defaults = {
1275
- temperature: config.temperature,
1276
- maxOutputTokens: config.maxOutputTokens
1277
- };
1278
- this.retryConfig = config.retry;
1279
- this.ai = import_ax.AxAI.create({
1280
- name: "azure-openai",
1281
- apiKey: config.apiKey,
1282
- resourceName: config.resourceName,
1283
- deploymentName: config.deploymentName,
1284
- version: config.version,
1285
- config: {
1286
- stream: false
1287
- }
1288
- });
1289
- }
1290
- id;
1291
- kind = "azure";
1292
- targetName;
1293
- ai;
1294
- defaults;
1295
- retryConfig;
1296
- async invoke(request) {
1297
- const chatPrompt = buildChatPrompt(request);
1298
- const modelConfig = extractModelConfig(request, this.defaults);
1299
- const response = await withRetry(
1300
- async () => await this.ai.chat(
1301
- {
1302
- chatPrompt,
1303
- model: this.config.deploymentName,
1304
- ...modelConfig ? { modelConfig } : {}
1305
- },
1306
- request.signal ? { abortSignal: request.signal } : void 0
1307
- ),
1308
- this.retryConfig,
1309
- request.signal
1310
- );
1311
- return mapResponse(ensureChatResponse(response));
1312
- }
1313
- getAxAI() {
1314
- return this.ai;
1315
- }
1316
- };
1317
- var AnthropicProvider = class {
1318
- constructor(targetName, config) {
1319
- this.config = config;
1320
- this.id = `anthropic:${targetName}`;
1321
- this.targetName = targetName;
1322
- this.defaults = {
1323
- temperature: config.temperature,
1324
- maxOutputTokens: config.maxOutputTokens,
1325
- thinkingBudget: config.thinkingBudget
1326
- };
1327
- this.retryConfig = config.retry;
1328
- this.ai = import_ax.AxAI.create({
1329
- name: "anthropic",
1330
- apiKey: config.apiKey
1331
- });
1332
- }
1333
- id;
1334
- kind = "anthropic";
1335
- targetName;
1336
- ai;
1337
- defaults;
1338
- retryConfig;
1339
- async invoke(request) {
1340
- const chatPrompt = buildChatPrompt(request);
1341
- const modelConfig = extractModelConfig(request, this.defaults);
1342
- const response = await withRetry(
1343
- async () => await this.ai.chat(
1344
- {
1345
- chatPrompt,
1346
- model: this.config.model,
1347
- ...modelConfig ? { modelConfig } : {}
1348
- },
1349
- request.signal ? { abortSignal: request.signal } : void 0
1350
- ),
1351
- this.retryConfig,
1352
- request.signal
1353
- );
1354
- return mapResponse(ensureChatResponse(response));
1355
- }
1356
- getAxAI() {
1357
- return this.ai;
1358
- }
1359
- };
1360
- var GeminiProvider = class {
1361
- constructor(targetName, config) {
1362
- this.config = config;
1363
- this.id = `gemini:${targetName}`;
1364
- this.targetName = targetName;
1365
- this.defaults = {
1366
- temperature: config.temperature,
1367
- maxOutputTokens: config.maxOutputTokens
1368
- };
1369
- this.retryConfig = config.retry;
1370
- this.ai = import_ax.AxAI.create({
1371
- name: "google-gemini",
1372
- apiKey: config.apiKey
1373
- });
1374
- }
1375
- id;
1376
- kind = "gemini";
1377
- targetName;
1378
- ai;
1379
- defaults;
1380
- retryConfig;
1381
- async invoke(request) {
1382
- const chatPrompt = buildChatPrompt(request);
1383
- const modelConfig = extractModelConfig(request, this.defaults);
1384
- const response = await withRetry(
1385
- async () => await this.ai.chat(
1386
- {
1387
- chatPrompt,
1388
- model: this.config.model,
1389
- ...modelConfig ? { modelConfig } : {}
1390
- },
1391
- request.signal ? { abortSignal: request.signal } : void 0
1392
- ),
1393
- this.retryConfig,
1394
- request.signal
1395
- );
1396
- return mapResponse(ensureChatResponse(response));
1397
- }
1398
- getAxAI() {
1399
- return this.ai;
1400
- }
1401
- };
1402
1447
 
1403
1448
  // src/evaluation/providers/cli.ts
1404
1449
  var import_node_child_process = require("child_process");
@@ -1446,12 +1491,14 @@ var CliProvider = class {
1446
1491
  supportsBatch = false;
1447
1492
  config;
1448
1493
  runCommand;
1494
+ verbose;
1449
1495
  healthcheckPromise;
1450
1496
  constructor(targetName, config, runner = defaultCommandRunner) {
1451
1497
  this.targetName = targetName;
1452
1498
  this.id = `cli:${targetName}`;
1453
1499
  this.config = config;
1454
1500
  this.runCommand = runner;
1501
+ this.verbose = config.verbose ?? false;
1455
1502
  }
1456
1503
  async invoke(request) {
1457
1504
  if (request.signal?.aborted) {
@@ -1552,6 +1599,11 @@ var CliProvider = class {
1552
1599
  generateOutputFilePath("healthcheck")
1553
1600
  )
1554
1601
  );
1602
+ if (this.verbose) {
1603
+ console.log(
1604
+ `[cli-provider:${this.targetName}] (healthcheck) CLI_EVALS_DIR=${process.env.CLI_EVALS_DIR ?? ""} cwd=${healthcheck.cwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
1605
+ );
1606
+ }
1555
1607
  const result = await this.runCommand(renderedCommand, {
1556
1608
  cwd: healthcheck.cwd ?? this.config.cwd,
1557
1609
  env: process.env,