@agentv/core 0.13.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -2
- package/dist/{chunk-U3GEJ3K7.js → chunk-IOCVST3R.js} +1 -1
- package/dist/chunk-IOCVST3R.js.map +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +246 -194
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +17 -8
- package/dist/index.d.ts +17 -8
- package/dist/index.js +247 -195
- package/dist/index.js.map +1 -1
- package/package.json +5 -2
- package/dist/chunk-U3GEJ3K7.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import { AxChatRequest, AxAI } from '@ax-llm/ax';
|
|
2
|
-
|
|
3
1
|
/**
|
|
4
2
|
* JSON primitive values appearing in AgentV payloads.
|
|
5
3
|
*/
|
|
@@ -149,7 +147,13 @@ interface EvaluatorResult {
|
|
|
149
147
|
*/
|
|
150
148
|
declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
|
|
151
149
|
|
|
152
|
-
type
|
|
150
|
+
type ChatMessageRole = "system" | "user" | "assistant" | "tool" | "function";
|
|
151
|
+
interface ChatMessage {
|
|
152
|
+
readonly role: ChatMessageRole;
|
|
153
|
+
readonly content: string;
|
|
154
|
+
readonly name?: string;
|
|
155
|
+
}
|
|
156
|
+
type ChatPrompt = readonly ChatMessage[];
|
|
153
157
|
type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
|
|
154
158
|
interface ProviderRequest {
|
|
155
159
|
readonly question: string;
|
|
@@ -185,11 +189,6 @@ interface Provider {
|
|
|
185
189
|
* the orchestrator may send multiple requests in a single provider session.
|
|
186
190
|
*/
|
|
187
191
|
invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
|
|
188
|
-
/**
|
|
189
|
-
* Optional access to the underlying AxAI instance.
|
|
190
|
-
* This enables using advanced Ax features like structured output signatures.
|
|
191
|
-
*/
|
|
192
|
-
getAxAI?(): AxAI;
|
|
193
192
|
}
|
|
194
193
|
type EnvLookup = Readonly<Record<string, string | undefined>>;
|
|
195
194
|
interface TargetDefinition {
|
|
@@ -341,6 +340,9 @@ interface RetryConfig {
|
|
|
341
340
|
readonly backoffFactor?: number;
|
|
342
341
|
readonly retryableStatusCodes?: readonly number[];
|
|
343
342
|
}
|
|
343
|
+
/**
|
|
344
|
+
* Azure OpenAI settings used by the Vercel AI SDK.
|
|
345
|
+
*/
|
|
344
346
|
interface AzureResolvedConfig {
|
|
345
347
|
readonly resourceName: string;
|
|
346
348
|
readonly deploymentName: string;
|
|
@@ -350,6 +352,9 @@ interface AzureResolvedConfig {
|
|
|
350
352
|
readonly maxOutputTokens?: number;
|
|
351
353
|
readonly retry?: RetryConfig;
|
|
352
354
|
}
|
|
355
|
+
/**
|
|
356
|
+
* Anthropic Claude settings used by the Vercel AI SDK.
|
|
357
|
+
*/
|
|
353
358
|
interface AnthropicResolvedConfig {
|
|
354
359
|
readonly apiKey: string;
|
|
355
360
|
readonly model: string;
|
|
@@ -358,6 +363,9 @@ interface AnthropicResolvedConfig {
|
|
|
358
363
|
readonly thinkingBudget?: number;
|
|
359
364
|
readonly retry?: RetryConfig;
|
|
360
365
|
}
|
|
366
|
+
/**
|
|
367
|
+
* Google Gemini settings used by the Vercel AI SDK.
|
|
368
|
+
*/
|
|
361
369
|
interface GeminiResolvedConfig {
|
|
362
370
|
readonly apiKey: string;
|
|
363
371
|
readonly model: string;
|
|
@@ -402,6 +410,7 @@ interface CliResolvedConfig {
|
|
|
402
410
|
readonly cwd?: string;
|
|
403
411
|
readonly timeoutMs?: number;
|
|
404
412
|
readonly healthcheck?: CliHealthcheck;
|
|
413
|
+
readonly verbose?: boolean;
|
|
405
414
|
}
|
|
406
415
|
type ResolvedTarget = {
|
|
407
416
|
readonly kind: "azure";
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import { AxChatRequest, AxAI } from '@ax-llm/ax';
|
|
2
|
-
|
|
3
1
|
/**
|
|
4
2
|
* JSON primitive values appearing in AgentV payloads.
|
|
5
3
|
*/
|
|
@@ -149,7 +147,13 @@ interface EvaluatorResult {
|
|
|
149
147
|
*/
|
|
150
148
|
declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
|
|
151
149
|
|
|
152
|
-
type
|
|
150
|
+
type ChatMessageRole = "system" | "user" | "assistant" | "tool" | "function";
|
|
151
|
+
interface ChatMessage {
|
|
152
|
+
readonly role: ChatMessageRole;
|
|
153
|
+
readonly content: string;
|
|
154
|
+
readonly name?: string;
|
|
155
|
+
}
|
|
156
|
+
type ChatPrompt = readonly ChatMessage[];
|
|
153
157
|
type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
|
|
154
158
|
interface ProviderRequest {
|
|
155
159
|
readonly question: string;
|
|
@@ -185,11 +189,6 @@ interface Provider {
|
|
|
185
189
|
* the orchestrator may send multiple requests in a single provider session.
|
|
186
190
|
*/
|
|
187
191
|
invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
|
|
188
|
-
/**
|
|
189
|
-
* Optional access to the underlying AxAI instance.
|
|
190
|
-
* This enables using advanced Ax features like structured output signatures.
|
|
191
|
-
*/
|
|
192
|
-
getAxAI?(): AxAI;
|
|
193
192
|
}
|
|
194
193
|
type EnvLookup = Readonly<Record<string, string | undefined>>;
|
|
195
194
|
interface TargetDefinition {
|
|
@@ -341,6 +340,9 @@ interface RetryConfig {
|
|
|
341
340
|
readonly backoffFactor?: number;
|
|
342
341
|
readonly retryableStatusCodes?: readonly number[];
|
|
343
342
|
}
|
|
343
|
+
/**
|
|
344
|
+
* Azure OpenAI settings used by the Vercel AI SDK.
|
|
345
|
+
*/
|
|
344
346
|
interface AzureResolvedConfig {
|
|
345
347
|
readonly resourceName: string;
|
|
346
348
|
readonly deploymentName: string;
|
|
@@ -350,6 +352,9 @@ interface AzureResolvedConfig {
|
|
|
350
352
|
readonly maxOutputTokens?: number;
|
|
351
353
|
readonly retry?: RetryConfig;
|
|
352
354
|
}
|
|
355
|
+
/**
|
|
356
|
+
* Anthropic Claude settings used by the Vercel AI SDK.
|
|
357
|
+
*/
|
|
353
358
|
interface AnthropicResolvedConfig {
|
|
354
359
|
readonly apiKey: string;
|
|
355
360
|
readonly model: string;
|
|
@@ -358,6 +363,9 @@ interface AnthropicResolvedConfig {
|
|
|
358
363
|
readonly thinkingBudget?: number;
|
|
359
364
|
readonly retry?: RetryConfig;
|
|
360
365
|
}
|
|
366
|
+
/**
|
|
367
|
+
* Google Gemini settings used by the Vercel AI SDK.
|
|
368
|
+
*/
|
|
361
369
|
interface GeminiResolvedConfig {
|
|
362
370
|
readonly apiKey: string;
|
|
363
371
|
readonly model: string;
|
|
@@ -402,6 +410,7 @@ interface CliResolvedConfig {
|
|
|
402
410
|
readonly cwd?: string;
|
|
403
411
|
readonly timeoutMs?: number;
|
|
404
412
|
readonly healthcheck?: CliHealthcheck;
|
|
413
|
+
readonly verbose?: boolean;
|
|
405
414
|
}
|
|
406
415
|
type ResolvedTarget = {
|
|
407
416
|
readonly kind: "azure";
|
package/dist/index.js
CHANGED
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
readTextFile,
|
|
10
10
|
resolveFileReference,
|
|
11
11
|
resolveTargetDefinition
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-IOCVST3R.js";
|
|
13
13
|
|
|
14
14
|
// src/evaluation/types.ts
|
|
15
15
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
@@ -760,13 +760,12 @@ ${guidelineContent.trim()}`);
|
|
|
760
760
|
const segments = segmentsByMessage[i];
|
|
761
761
|
const contentParts = [];
|
|
762
762
|
let role = message.role;
|
|
763
|
-
let name;
|
|
764
763
|
if (role === "system") {
|
|
765
764
|
role = "assistant";
|
|
766
765
|
contentParts.push("@[System]:");
|
|
767
766
|
} else if (role === "tool") {
|
|
768
|
-
role = "
|
|
769
|
-
|
|
767
|
+
role = "assistant";
|
|
768
|
+
contentParts.push("@[Tool]:");
|
|
770
769
|
}
|
|
771
770
|
for (const segment of segments) {
|
|
772
771
|
if (segment.type === "guideline_ref") {
|
|
@@ -784,10 +783,10 @@ ${guidelineContent.trim()}`);
|
|
|
784
783
|
if (contentParts.length === 0) {
|
|
785
784
|
continue;
|
|
786
785
|
}
|
|
786
|
+
const content = contentParts.join("\n");
|
|
787
787
|
chatPrompt.push({
|
|
788
788
|
role,
|
|
789
|
-
content
|
|
790
|
-
...name ? { name } : {}
|
|
789
|
+
content
|
|
791
790
|
});
|
|
792
791
|
}
|
|
793
792
|
return chatPrompt.length > 0 ? chatPrompt : void 0;
|
|
@@ -957,68 +956,225 @@ ${detailBlock}${ANSI_RESET5}`);
|
|
|
957
956
|
}
|
|
958
957
|
}
|
|
959
958
|
|
|
960
|
-
// src/evaluation/providers/
|
|
961
|
-
import {
|
|
959
|
+
// src/evaluation/providers/ai-sdk.ts
|
|
960
|
+
import { createAnthropic } from "@ai-sdk/anthropic";
|
|
961
|
+
import { createAzure } from "@ai-sdk/azure";
|
|
962
|
+
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
|
963
|
+
import { generateText } from "ai";
|
|
962
964
|
var DEFAULT_SYSTEM_PROMPT = "You are a careful assistant. Follow all provided instructions and do not fabricate results.";
|
|
965
|
+
var AzureProvider = class {
|
|
966
|
+
constructor(targetName, config) {
|
|
967
|
+
this.config = config;
|
|
968
|
+
this.id = `azure:${targetName}`;
|
|
969
|
+
this.targetName = targetName;
|
|
970
|
+
this.defaults = {
|
|
971
|
+
temperature: config.temperature,
|
|
972
|
+
maxOutputTokens: config.maxOutputTokens
|
|
973
|
+
};
|
|
974
|
+
this.retryConfig = config.retry;
|
|
975
|
+
const azure = createAzure(buildAzureOptions(config));
|
|
976
|
+
this.model = azure(config.deploymentName);
|
|
977
|
+
}
|
|
978
|
+
id;
|
|
979
|
+
kind = "azure";
|
|
980
|
+
targetName;
|
|
981
|
+
model;
|
|
982
|
+
defaults;
|
|
983
|
+
retryConfig;
|
|
984
|
+
async invoke(request) {
|
|
985
|
+
return invokeModel({
|
|
986
|
+
model: this.model,
|
|
987
|
+
request,
|
|
988
|
+
defaults: this.defaults,
|
|
989
|
+
retryConfig: this.retryConfig
|
|
990
|
+
});
|
|
991
|
+
}
|
|
992
|
+
};
|
|
993
|
+
var AnthropicProvider = class {
|
|
994
|
+
constructor(targetName, config) {
|
|
995
|
+
this.config = config;
|
|
996
|
+
this.id = `anthropic:${targetName}`;
|
|
997
|
+
this.targetName = targetName;
|
|
998
|
+
this.defaults = {
|
|
999
|
+
temperature: config.temperature,
|
|
1000
|
+
maxOutputTokens: config.maxOutputTokens,
|
|
1001
|
+
thinkingBudget: config.thinkingBudget
|
|
1002
|
+
};
|
|
1003
|
+
this.retryConfig = config.retry;
|
|
1004
|
+
const anthropic = createAnthropic({
|
|
1005
|
+
apiKey: config.apiKey
|
|
1006
|
+
});
|
|
1007
|
+
this.model = anthropic(config.model);
|
|
1008
|
+
}
|
|
1009
|
+
id;
|
|
1010
|
+
kind = "anthropic";
|
|
1011
|
+
targetName;
|
|
1012
|
+
model;
|
|
1013
|
+
defaults;
|
|
1014
|
+
retryConfig;
|
|
1015
|
+
async invoke(request) {
|
|
1016
|
+
const providerOptions = buildAnthropicProviderOptions(this.defaults);
|
|
1017
|
+
return invokeModel({
|
|
1018
|
+
model: this.model,
|
|
1019
|
+
request,
|
|
1020
|
+
defaults: this.defaults,
|
|
1021
|
+
retryConfig: this.retryConfig,
|
|
1022
|
+
providerOptions
|
|
1023
|
+
});
|
|
1024
|
+
}
|
|
1025
|
+
};
|
|
1026
|
+
var GeminiProvider = class {
|
|
1027
|
+
constructor(targetName, config) {
|
|
1028
|
+
this.config = config;
|
|
1029
|
+
this.id = `gemini:${targetName}`;
|
|
1030
|
+
this.targetName = targetName;
|
|
1031
|
+
this.defaults = {
|
|
1032
|
+
temperature: config.temperature,
|
|
1033
|
+
maxOutputTokens: config.maxOutputTokens
|
|
1034
|
+
};
|
|
1035
|
+
this.retryConfig = config.retry;
|
|
1036
|
+
const google = createGoogleGenerativeAI({
|
|
1037
|
+
apiKey: config.apiKey
|
|
1038
|
+
});
|
|
1039
|
+
this.model = google(config.model);
|
|
1040
|
+
}
|
|
1041
|
+
id;
|
|
1042
|
+
kind = "gemini";
|
|
1043
|
+
targetName;
|
|
1044
|
+
model;
|
|
1045
|
+
defaults;
|
|
1046
|
+
retryConfig;
|
|
1047
|
+
async invoke(request) {
|
|
1048
|
+
return invokeModel({
|
|
1049
|
+
model: this.model,
|
|
1050
|
+
request,
|
|
1051
|
+
defaults: this.defaults,
|
|
1052
|
+
retryConfig: this.retryConfig
|
|
1053
|
+
});
|
|
1054
|
+
}
|
|
1055
|
+
};
|
|
1056
|
+
function buildAzureOptions(config) {
|
|
1057
|
+
const options = {
|
|
1058
|
+
apiKey: config.apiKey,
|
|
1059
|
+
apiVersion: config.version,
|
|
1060
|
+
useDeploymentBasedUrls: true
|
|
1061
|
+
};
|
|
1062
|
+
const baseURL = normalizeAzureBaseUrl(config.resourceName);
|
|
1063
|
+
if (baseURL) {
|
|
1064
|
+
options.baseURL = baseURL;
|
|
1065
|
+
} else {
|
|
1066
|
+
options.resourceName = config.resourceName;
|
|
1067
|
+
}
|
|
1068
|
+
return options;
|
|
1069
|
+
}
|
|
1070
|
+
function normalizeAzureBaseUrl(resourceName) {
|
|
1071
|
+
const trimmed = resourceName.trim();
|
|
1072
|
+
if (!/^https?:\/\//i.test(trimmed)) {
|
|
1073
|
+
return void 0;
|
|
1074
|
+
}
|
|
1075
|
+
const withoutSlash = trimmed.replace(/\/+$/, "");
|
|
1076
|
+
const normalized = withoutSlash.endsWith("/openai") ? withoutSlash : `${withoutSlash}/openai`;
|
|
1077
|
+
return normalized;
|
|
1078
|
+
}
|
|
1079
|
+
function buildAnthropicProviderOptions(defaults) {
|
|
1080
|
+
if (defaults.thinkingBudget === void 0) {
|
|
1081
|
+
return void 0;
|
|
1082
|
+
}
|
|
1083
|
+
return {
|
|
1084
|
+
anthropic: {
|
|
1085
|
+
thinking: {
|
|
1086
|
+
type: "enabled",
|
|
1087
|
+
budgetTokens: defaults.thinkingBudget
|
|
1088
|
+
}
|
|
1089
|
+
}
|
|
1090
|
+
};
|
|
1091
|
+
}
|
|
963
1092
|
function buildChatPrompt(request) {
|
|
964
|
-
|
|
965
|
-
|
|
1093
|
+
const provided = request.chatPrompt?.length ? request.chatPrompt : void 0;
|
|
1094
|
+
if (provided) {
|
|
1095
|
+
const hasSystemMessage = provided.some((message) => message.role === "system");
|
|
966
1096
|
if (hasSystemMessage) {
|
|
967
|
-
return
|
|
1097
|
+
return provided;
|
|
968
1098
|
}
|
|
969
|
-
const systemContent2 = resolveSystemContent(request);
|
|
970
|
-
return [{ role: "system", content: systemContent2 }, ...
|
|
1099
|
+
const systemContent2 = resolveSystemContent(request, false);
|
|
1100
|
+
return [{ role: "system", content: systemContent2 }, ...provided];
|
|
971
1101
|
}
|
|
972
|
-
const systemContent = resolveSystemContent(request);
|
|
1102
|
+
const systemContent = resolveSystemContent(request, true);
|
|
973
1103
|
const userContent = request.question.trim();
|
|
974
1104
|
const prompt = [
|
|
975
|
-
{
|
|
976
|
-
|
|
977
|
-
content: systemContent
|
|
978
|
-
},
|
|
979
|
-
{
|
|
980
|
-
role: "user",
|
|
981
|
-
content: userContent
|
|
982
|
-
}
|
|
1105
|
+
{ role: "system", content: systemContent },
|
|
1106
|
+
{ role: "user", content: userContent }
|
|
983
1107
|
];
|
|
984
1108
|
return prompt;
|
|
985
1109
|
}
|
|
986
|
-
function resolveSystemContent(request) {
|
|
1110
|
+
function resolveSystemContent(request, includeGuidelines) {
|
|
987
1111
|
const systemSegments = [];
|
|
988
1112
|
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
989
1113
|
systemSegments.push(request.systemPrompt.trim());
|
|
990
1114
|
} else {
|
|
991
1115
|
systemSegments.push(DEFAULT_SYSTEM_PROMPT);
|
|
992
1116
|
}
|
|
993
|
-
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
1117
|
+
if (includeGuidelines && request.guidelines && request.guidelines.trim().length > 0) {
|
|
994
1118
|
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
995
1119
|
|
|
996
1120
|
${request.guidelines.trim()}`);
|
|
997
1121
|
}
|
|
998
1122
|
return systemSegments.join("\n\n");
|
|
999
1123
|
}
|
|
1000
|
-
function
|
|
1124
|
+
function toModelMessages(chatPrompt) {
|
|
1125
|
+
return chatPrompt.map((message) => {
|
|
1126
|
+
if (message.role === "tool" || message.role === "function") {
|
|
1127
|
+
const prefix = message.name ? `@[${message.name}]: ` : "@[Tool]: ";
|
|
1128
|
+
return {
|
|
1129
|
+
role: "assistant",
|
|
1130
|
+
content: `${prefix}${message.content}`
|
|
1131
|
+
};
|
|
1132
|
+
}
|
|
1133
|
+
if (message.role === "assistant" || message.role === "system" || message.role === "user") {
|
|
1134
|
+
return {
|
|
1135
|
+
role: message.role,
|
|
1136
|
+
content: message.content
|
|
1137
|
+
};
|
|
1138
|
+
}
|
|
1139
|
+
return {
|
|
1140
|
+
role: "user",
|
|
1141
|
+
content: message.content
|
|
1142
|
+
};
|
|
1143
|
+
});
|
|
1144
|
+
}
|
|
1145
|
+
function resolveModelSettings(request, defaults) {
|
|
1001
1146
|
const temperature = request.temperature ?? defaults.temperature;
|
|
1002
|
-
const
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
}
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
}
|
|
1010
|
-
|
|
1147
|
+
const maxOutputTokens = request.maxOutputTokens ?? defaults.maxOutputTokens;
|
|
1148
|
+
return {
|
|
1149
|
+
temperature,
|
|
1150
|
+
maxOutputTokens
|
|
1151
|
+
};
|
|
1152
|
+
}
|
|
1153
|
+
async function invokeModel(options) {
|
|
1154
|
+
const { model, request, defaults, retryConfig, providerOptions } = options;
|
|
1155
|
+
const chatPrompt = buildChatPrompt(request);
|
|
1156
|
+
const { temperature, maxOutputTokens } = resolveModelSettings(request, defaults);
|
|
1157
|
+
const result = await withRetry(
|
|
1158
|
+
() => generateText({
|
|
1159
|
+
model,
|
|
1160
|
+
messages: toModelMessages(chatPrompt),
|
|
1161
|
+
temperature,
|
|
1162
|
+
maxOutputTokens,
|
|
1163
|
+
maxRetries: 0,
|
|
1164
|
+
abortSignal: request.signal,
|
|
1165
|
+
...providerOptions ? { providerOptions } : {}
|
|
1166
|
+
}),
|
|
1167
|
+
retryConfig,
|
|
1168
|
+
request.signal
|
|
1169
|
+
);
|
|
1170
|
+
return mapResponse(result);
|
|
1011
1171
|
}
|
|
1012
|
-
function mapResponse(
|
|
1013
|
-
const primary = response.results[0];
|
|
1014
|
-
const text = typeof primary?.content === "string" ? primary.content : "";
|
|
1015
|
-
const reasoning = primary?.thought ?? primary?.thoughtBlock?.data;
|
|
1016
|
-
const usage = toJsonObject(response.modelUsage);
|
|
1172
|
+
function mapResponse(result) {
|
|
1017
1173
|
return {
|
|
1018
|
-
text,
|
|
1019
|
-
reasoning,
|
|
1020
|
-
raw:
|
|
1021
|
-
usage
|
|
1174
|
+
text: result.text ?? "",
|
|
1175
|
+
reasoning: result.reasoningText ?? void 0,
|
|
1176
|
+
raw: result,
|
|
1177
|
+
usage: toJsonObject(result.totalUsage ?? result.usage)
|
|
1022
1178
|
};
|
|
1023
1179
|
}
|
|
1024
1180
|
function toJsonObject(value) {
|
|
@@ -1031,34 +1187,59 @@ function toJsonObject(value) {
|
|
|
1031
1187
|
return void 0;
|
|
1032
1188
|
}
|
|
1033
1189
|
}
|
|
1034
|
-
function
|
|
1035
|
-
if (typeof
|
|
1036
|
-
|
|
1190
|
+
function extractStatus(error) {
|
|
1191
|
+
if (!error || typeof error !== "object") {
|
|
1192
|
+
return void 0;
|
|
1037
1193
|
}
|
|
1038
|
-
|
|
1039
|
-
|
|
1194
|
+
const candidate = error;
|
|
1195
|
+
const directStatus = candidate.status ?? candidate.statusCode;
|
|
1196
|
+
if (typeof directStatus === "number" && Number.isFinite(directStatus)) {
|
|
1197
|
+
return directStatus;
|
|
1040
1198
|
}
|
|
1041
|
-
|
|
1199
|
+
const responseStatus = typeof candidate.response === "object" && candidate.response ? candidate.response.status : void 0;
|
|
1200
|
+
if (typeof responseStatus === "number" && Number.isFinite(responseStatus)) {
|
|
1201
|
+
return responseStatus;
|
|
1202
|
+
}
|
|
1203
|
+
const message = typeof candidate.message === "string" ? candidate.message : void 0;
|
|
1204
|
+
if (message) {
|
|
1205
|
+
const match = message.match(/HTTP\s+(\d{3})/i);
|
|
1206
|
+
if (match) {
|
|
1207
|
+
const parsed = Number.parseInt(match[1], 10);
|
|
1208
|
+
if (Number.isFinite(parsed)) {
|
|
1209
|
+
return parsed;
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
return void 0;
|
|
1042
1214
|
}
|
|
1043
|
-
function
|
|
1215
|
+
function isNetworkError(error) {
|
|
1044
1216
|
if (!error || typeof error !== "object") {
|
|
1045
1217
|
return false;
|
|
1046
1218
|
}
|
|
1047
|
-
|
|
1048
|
-
|
|
1219
|
+
const candidate = error;
|
|
1220
|
+
if (candidate.name === "AbortError") {
|
|
1221
|
+
return false;
|
|
1049
1222
|
}
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
const status = Number.parseInt(match[1], 10);
|
|
1054
|
-
return retryableStatusCodes.includes(status);
|
|
1055
|
-
}
|
|
1223
|
+
const code = candidate.code;
|
|
1224
|
+
if (typeof code === "string" && /^E(AI|CONN|HOST|NET|PIPE|TIME|REFUSED|RESET)/i.test(code)) {
|
|
1225
|
+
return true;
|
|
1056
1226
|
}
|
|
1057
|
-
|
|
1227
|
+
const message = typeof candidate.message === "string" ? candidate.message : void 0;
|
|
1228
|
+
if (message && /(network|fetch failed|ECONNRESET|ENOTFOUND|EAI_AGAIN|ETIMEDOUT|ECONNREFUSED)/i.test(message)) {
|
|
1058
1229
|
return true;
|
|
1059
1230
|
}
|
|
1060
1231
|
return false;
|
|
1061
1232
|
}
|
|
1233
|
+
function isRetryableError(error, retryableStatusCodes) {
|
|
1234
|
+
const status = extractStatus(error);
|
|
1235
|
+
if (status === 401 || status === 403) {
|
|
1236
|
+
return false;
|
|
1237
|
+
}
|
|
1238
|
+
if (typeof status === "number") {
|
|
1239
|
+
return retryableStatusCodes.includes(status);
|
|
1240
|
+
}
|
|
1241
|
+
return isNetworkError(error);
|
|
1242
|
+
}
|
|
1062
1243
|
function calculateRetryDelay(attempt, config) {
|
|
1063
1244
|
const delay = Math.min(
|
|
1064
1245
|
config.maxDelayMs,
|
|
@@ -1094,146 +1275,10 @@ async function withRetry(fn, retryConfig, signal) {
|
|
|
1094
1275
|
}
|
|
1095
1276
|
const delay = calculateRetryDelay(attempt, config);
|
|
1096
1277
|
await sleep(delay);
|
|
1097
|
-
if (signal?.aborted) {
|
|
1098
|
-
throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
|
|
1099
|
-
}
|
|
1100
1278
|
}
|
|
1101
1279
|
}
|
|
1102
1280
|
throw lastError;
|
|
1103
1281
|
}
|
|
1104
|
-
var AzureProvider = class {
|
|
1105
|
-
constructor(targetName, config) {
|
|
1106
|
-
this.config = config;
|
|
1107
|
-
this.id = `azure:${targetName}`;
|
|
1108
|
-
this.targetName = targetName;
|
|
1109
|
-
this.defaults = {
|
|
1110
|
-
temperature: config.temperature,
|
|
1111
|
-
maxOutputTokens: config.maxOutputTokens
|
|
1112
|
-
};
|
|
1113
|
-
this.retryConfig = config.retry;
|
|
1114
|
-
this.ai = AxAI.create({
|
|
1115
|
-
name: "azure-openai",
|
|
1116
|
-
apiKey: config.apiKey,
|
|
1117
|
-
resourceName: config.resourceName,
|
|
1118
|
-
deploymentName: config.deploymentName,
|
|
1119
|
-
version: config.version,
|
|
1120
|
-
config: {
|
|
1121
|
-
stream: false
|
|
1122
|
-
}
|
|
1123
|
-
});
|
|
1124
|
-
}
|
|
1125
|
-
id;
|
|
1126
|
-
kind = "azure";
|
|
1127
|
-
targetName;
|
|
1128
|
-
ai;
|
|
1129
|
-
defaults;
|
|
1130
|
-
retryConfig;
|
|
1131
|
-
async invoke(request) {
|
|
1132
|
-
const chatPrompt = buildChatPrompt(request);
|
|
1133
|
-
const modelConfig = extractModelConfig(request, this.defaults);
|
|
1134
|
-
const response = await withRetry(
|
|
1135
|
-
async () => await this.ai.chat(
|
|
1136
|
-
{
|
|
1137
|
-
chatPrompt,
|
|
1138
|
-
model: this.config.deploymentName,
|
|
1139
|
-
...modelConfig ? { modelConfig } : {}
|
|
1140
|
-
},
|
|
1141
|
-
request.signal ? { abortSignal: request.signal } : void 0
|
|
1142
|
-
),
|
|
1143
|
-
this.retryConfig,
|
|
1144
|
-
request.signal
|
|
1145
|
-
);
|
|
1146
|
-
return mapResponse(ensureChatResponse(response));
|
|
1147
|
-
}
|
|
1148
|
-
getAxAI() {
|
|
1149
|
-
return this.ai;
|
|
1150
|
-
}
|
|
1151
|
-
};
|
|
1152
|
-
var AnthropicProvider = class {
|
|
1153
|
-
constructor(targetName, config) {
|
|
1154
|
-
this.config = config;
|
|
1155
|
-
this.id = `anthropic:${targetName}`;
|
|
1156
|
-
this.targetName = targetName;
|
|
1157
|
-
this.defaults = {
|
|
1158
|
-
temperature: config.temperature,
|
|
1159
|
-
maxOutputTokens: config.maxOutputTokens,
|
|
1160
|
-
thinkingBudget: config.thinkingBudget
|
|
1161
|
-
};
|
|
1162
|
-
this.retryConfig = config.retry;
|
|
1163
|
-
this.ai = AxAI.create({
|
|
1164
|
-
name: "anthropic",
|
|
1165
|
-
apiKey: config.apiKey
|
|
1166
|
-
});
|
|
1167
|
-
}
|
|
1168
|
-
id;
|
|
1169
|
-
kind = "anthropic";
|
|
1170
|
-
targetName;
|
|
1171
|
-
ai;
|
|
1172
|
-
defaults;
|
|
1173
|
-
retryConfig;
|
|
1174
|
-
async invoke(request) {
|
|
1175
|
-
const chatPrompt = buildChatPrompt(request);
|
|
1176
|
-
const modelConfig = extractModelConfig(request, this.defaults);
|
|
1177
|
-
const response = await withRetry(
|
|
1178
|
-
async () => await this.ai.chat(
|
|
1179
|
-
{
|
|
1180
|
-
chatPrompt,
|
|
1181
|
-
model: this.config.model,
|
|
1182
|
-
...modelConfig ? { modelConfig } : {}
|
|
1183
|
-
},
|
|
1184
|
-
request.signal ? { abortSignal: request.signal } : void 0
|
|
1185
|
-
),
|
|
1186
|
-
this.retryConfig,
|
|
1187
|
-
request.signal
|
|
1188
|
-
);
|
|
1189
|
-
return mapResponse(ensureChatResponse(response));
|
|
1190
|
-
}
|
|
1191
|
-
getAxAI() {
|
|
1192
|
-
return this.ai;
|
|
1193
|
-
}
|
|
1194
|
-
};
|
|
1195
|
-
var GeminiProvider = class {
|
|
1196
|
-
constructor(targetName, config) {
|
|
1197
|
-
this.config = config;
|
|
1198
|
-
this.id = `gemini:${targetName}`;
|
|
1199
|
-
this.targetName = targetName;
|
|
1200
|
-
this.defaults = {
|
|
1201
|
-
temperature: config.temperature,
|
|
1202
|
-
maxOutputTokens: config.maxOutputTokens
|
|
1203
|
-
};
|
|
1204
|
-
this.retryConfig = config.retry;
|
|
1205
|
-
this.ai = AxAI.create({
|
|
1206
|
-
name: "google-gemini",
|
|
1207
|
-
apiKey: config.apiKey
|
|
1208
|
-
});
|
|
1209
|
-
}
|
|
1210
|
-
id;
|
|
1211
|
-
kind = "gemini";
|
|
1212
|
-
targetName;
|
|
1213
|
-
ai;
|
|
1214
|
-
defaults;
|
|
1215
|
-
retryConfig;
|
|
1216
|
-
async invoke(request) {
|
|
1217
|
-
const chatPrompt = buildChatPrompt(request);
|
|
1218
|
-
const modelConfig = extractModelConfig(request, this.defaults);
|
|
1219
|
-
const response = await withRetry(
|
|
1220
|
-
async () => await this.ai.chat(
|
|
1221
|
-
{
|
|
1222
|
-
chatPrompt,
|
|
1223
|
-
model: this.config.model,
|
|
1224
|
-
...modelConfig ? { modelConfig } : {}
|
|
1225
|
-
},
|
|
1226
|
-
request.signal ? { abortSignal: request.signal } : void 0
|
|
1227
|
-
),
|
|
1228
|
-
this.retryConfig,
|
|
1229
|
-
request.signal
|
|
1230
|
-
);
|
|
1231
|
-
return mapResponse(ensureChatResponse(response));
|
|
1232
|
-
}
|
|
1233
|
-
getAxAI() {
|
|
1234
|
-
return this.ai;
|
|
1235
|
-
}
|
|
1236
|
-
};
|
|
1237
1282
|
|
|
1238
1283
|
// src/evaluation/providers/cli.ts
|
|
1239
1284
|
import { exec as execWithCallback } from "node:child_process";
|
|
@@ -1281,12 +1326,14 @@ var CliProvider = class {
|
|
|
1281
1326
|
supportsBatch = false;
|
|
1282
1327
|
config;
|
|
1283
1328
|
runCommand;
|
|
1329
|
+
verbose;
|
|
1284
1330
|
healthcheckPromise;
|
|
1285
1331
|
constructor(targetName, config, runner = defaultCommandRunner) {
|
|
1286
1332
|
this.targetName = targetName;
|
|
1287
1333
|
this.id = `cli:${targetName}`;
|
|
1288
1334
|
this.config = config;
|
|
1289
1335
|
this.runCommand = runner;
|
|
1336
|
+
this.verbose = config.verbose ?? false;
|
|
1290
1337
|
}
|
|
1291
1338
|
async invoke(request) {
|
|
1292
1339
|
if (request.signal?.aborted) {
|
|
@@ -1387,6 +1434,11 @@ var CliProvider = class {
|
|
|
1387
1434
|
generateOutputFilePath("healthcheck")
|
|
1388
1435
|
)
|
|
1389
1436
|
);
|
|
1437
|
+
if (this.verbose) {
|
|
1438
|
+
console.log(
|
|
1439
|
+
`[cli-provider:${this.targetName}] (healthcheck) CLI_EVALS_DIR=${process.env.CLI_EVALS_DIR ?? ""} cwd=${healthcheck.cwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
|
|
1440
|
+
);
|
|
1441
|
+
}
|
|
1390
1442
|
const result = await this.runCommand(renderedCommand, {
|
|
1391
1443
|
cwd: healthcheck.cwd ?? this.config.cwd,
|
|
1392
1444
|
env: process.env,
|