@agentv/core 0.7.5 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-7XM7HYRS.js → chunk-SNTZFB24.js} +97 -67
- package/dist/chunk-SNTZFB24.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +32 -57
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +31 -55
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +204 -102
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +75 -2
- package/dist/index.d.ts +75 -2
- package/dist/index.js +109 -37
- package/dist/index.js.map +1 -1
- package/package.json +1 -2
- package/dist/chunk-7XM7HYRS.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -828,6 +828,67 @@ function ensureChatResponse(result) {
|
|
|
828
828
|
}
|
|
829
829
|
return result;
|
|
830
830
|
}
|
|
831
|
+
function isRetryableError(error, retryableStatusCodes) {
|
|
832
|
+
if (!error || typeof error !== "object") {
|
|
833
|
+
return false;
|
|
834
|
+
}
|
|
835
|
+
if ("status" in error && typeof error.status === "number") {
|
|
836
|
+
return retryableStatusCodes.includes(error.status);
|
|
837
|
+
}
|
|
838
|
+
if ("message" in error && typeof error.message === "string") {
|
|
839
|
+
const match = error.message.match(/HTTP (\d{3})/);
|
|
840
|
+
if (match) {
|
|
841
|
+
const status = Number.parseInt(match[1], 10);
|
|
842
|
+
return retryableStatusCodes.includes(status);
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
if ("name" in error && error.name === "AxAIServiceNetworkError") {
|
|
846
|
+
return true;
|
|
847
|
+
}
|
|
848
|
+
return false;
|
|
849
|
+
}
|
|
850
|
+
function calculateRetryDelay(attempt, config) {
|
|
851
|
+
const delay = Math.min(
|
|
852
|
+
config.maxDelayMs,
|
|
853
|
+
config.initialDelayMs * config.backoffFactor ** attempt
|
|
854
|
+
);
|
|
855
|
+
return delay * (0.75 + Math.random() * 0.5);
|
|
856
|
+
}
|
|
857
|
+
async function sleep(ms) {
|
|
858
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
859
|
+
}
|
|
860
|
+
async function withRetry(fn, retryConfig, signal) {
|
|
861
|
+
const config = {
|
|
862
|
+
maxRetries: retryConfig?.maxRetries ?? 3,
|
|
863
|
+
initialDelayMs: retryConfig?.initialDelayMs ?? 1e3,
|
|
864
|
+
maxDelayMs: retryConfig?.maxDelayMs ?? 6e4,
|
|
865
|
+
backoffFactor: retryConfig?.backoffFactor ?? 2,
|
|
866
|
+
retryableStatusCodes: retryConfig?.retryableStatusCodes ?? [500, 408, 429, 502, 503, 504]
|
|
867
|
+
};
|
|
868
|
+
let lastError;
|
|
869
|
+
for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
|
|
870
|
+
if (signal?.aborted) {
|
|
871
|
+
throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
|
|
872
|
+
}
|
|
873
|
+
try {
|
|
874
|
+
return await fn();
|
|
875
|
+
} catch (error) {
|
|
876
|
+
lastError = error;
|
|
877
|
+
if (attempt >= config.maxRetries) {
|
|
878
|
+
break;
|
|
879
|
+
}
|
|
880
|
+
if (!isRetryableError(error, config.retryableStatusCodes)) {
|
|
881
|
+
throw error;
|
|
882
|
+
}
|
|
883
|
+
const delay = calculateRetryDelay(attempt, config);
|
|
884
|
+
await sleep(delay);
|
|
885
|
+
if (signal?.aborted) {
|
|
886
|
+
throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
throw lastError;
|
|
891
|
+
}
|
|
831
892
|
var AzureProvider = class {
|
|
832
893
|
constructor(targetName, config) {
|
|
833
894
|
this.config = config;
|
|
@@ -837,6 +898,7 @@ var AzureProvider = class {
|
|
|
837
898
|
temperature: config.temperature,
|
|
838
899
|
maxOutputTokens: config.maxOutputTokens
|
|
839
900
|
};
|
|
901
|
+
this.retryConfig = config.retry;
|
|
840
902
|
this.ai = import_ax.AxAI.create({
|
|
841
903
|
name: "azure-openai",
|
|
842
904
|
apiKey: config.apiKey,
|
|
@@ -853,16 +915,21 @@ var AzureProvider = class {
|
|
|
853
915
|
targetName;
|
|
854
916
|
ai;
|
|
855
917
|
defaults;
|
|
918
|
+
retryConfig;
|
|
856
919
|
async invoke(request) {
|
|
857
920
|
const chatPrompt = buildChatPrompt(request);
|
|
858
921
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
859
|
-
const response = await
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
922
|
+
const response = await withRetry(
|
|
923
|
+
async () => await this.ai.chat(
|
|
924
|
+
{
|
|
925
|
+
chatPrompt,
|
|
926
|
+
model: this.config.deploymentName,
|
|
927
|
+
...modelConfig ? { modelConfig } : {}
|
|
928
|
+
},
|
|
929
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
930
|
+
),
|
|
931
|
+
this.retryConfig,
|
|
932
|
+
request.signal
|
|
866
933
|
);
|
|
867
934
|
return mapResponse(ensureChatResponse(response));
|
|
868
935
|
}
|
|
@@ -880,6 +947,7 @@ var AnthropicProvider = class {
|
|
|
880
947
|
maxOutputTokens: config.maxOutputTokens,
|
|
881
948
|
thinkingBudget: config.thinkingBudget
|
|
882
949
|
};
|
|
950
|
+
this.retryConfig = config.retry;
|
|
883
951
|
this.ai = import_ax.AxAI.create({
|
|
884
952
|
name: "anthropic",
|
|
885
953
|
apiKey: config.apiKey
|
|
@@ -890,16 +958,21 @@ var AnthropicProvider = class {
|
|
|
890
958
|
targetName;
|
|
891
959
|
ai;
|
|
892
960
|
defaults;
|
|
961
|
+
retryConfig;
|
|
893
962
|
async invoke(request) {
|
|
894
963
|
const chatPrompt = buildChatPrompt(request);
|
|
895
964
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
896
|
-
const response = await
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
965
|
+
const response = await withRetry(
|
|
966
|
+
async () => await this.ai.chat(
|
|
967
|
+
{
|
|
968
|
+
chatPrompt,
|
|
969
|
+
model: this.config.model,
|
|
970
|
+
...modelConfig ? { modelConfig } : {}
|
|
971
|
+
},
|
|
972
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
973
|
+
),
|
|
974
|
+
this.retryConfig,
|
|
975
|
+
request.signal
|
|
903
976
|
);
|
|
904
977
|
return mapResponse(ensureChatResponse(response));
|
|
905
978
|
}
|
|
@@ -916,6 +989,7 @@ var GeminiProvider = class {
|
|
|
916
989
|
temperature: config.temperature,
|
|
917
990
|
maxOutputTokens: config.maxOutputTokens
|
|
918
991
|
};
|
|
992
|
+
this.retryConfig = config.retry;
|
|
919
993
|
this.ai = import_ax.AxAI.create({
|
|
920
994
|
name: "google-gemini",
|
|
921
995
|
apiKey: config.apiKey
|
|
@@ -926,16 +1000,21 @@ var GeminiProvider = class {
|
|
|
926
1000
|
targetName;
|
|
927
1001
|
ai;
|
|
928
1002
|
defaults;
|
|
1003
|
+
retryConfig;
|
|
929
1004
|
async invoke(request) {
|
|
930
1005
|
const chatPrompt = buildChatPrompt(request);
|
|
931
1006
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
932
|
-
const response = await
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
1007
|
+
const response = await withRetry(
|
|
1008
|
+
async () => await this.ai.chat(
|
|
1009
|
+
{
|
|
1010
|
+
chatPrompt,
|
|
1011
|
+
model: this.config.model,
|
|
1012
|
+
...modelConfig ? { modelConfig } : {}
|
|
1013
|
+
},
|
|
1014
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
1015
|
+
),
|
|
1016
|
+
this.retryConfig,
|
|
1017
|
+
request.signal
|
|
939
1018
|
);
|
|
940
1019
|
return mapResponse(ensureChatResponse(response));
|
|
941
1020
|
}
|
|
@@ -1005,10 +1084,9 @@ var CliProvider = class {
|
|
|
1005
1084
|
const outputFilePath = generateOutputFilePath(request.evalCaseId);
|
|
1006
1085
|
const templateValues = buildTemplateValues(request, this.config, outputFilePath);
|
|
1007
1086
|
const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
|
|
1008
|
-
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
1009
1087
|
const result = await this.runCommand(renderedCommand, {
|
|
1010
1088
|
cwd: this.config.cwd,
|
|
1011
|
-
env,
|
|
1089
|
+
env: process.env,
|
|
1012
1090
|
timeoutMs: this.config.timeoutMs,
|
|
1013
1091
|
signal: request.signal
|
|
1014
1092
|
});
|
|
@@ -1097,10 +1175,9 @@ var CliProvider = class {
|
|
|
1097
1175
|
generateOutputFilePath("healthcheck")
|
|
1098
1176
|
)
|
|
1099
1177
|
);
|
|
1100
|
-
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
1101
1178
|
const result = await this.runCommand(renderedCommand, {
|
|
1102
1179
|
cwd: healthcheck.cwd ?? this.config.cwd,
|
|
1103
|
-
env,
|
|
1180
|
+
env: process.env,
|
|
1104
1181
|
timeoutMs,
|
|
1105
1182
|
signal
|
|
1106
1183
|
});
|
|
@@ -2051,10 +2128,9 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID
|
|
|
2051
2128
|
var BASE_TARGET_SCHEMA = import_zod.z.object({
|
|
2052
2129
|
name: import_zod.z.string().min(1, "target name is required"),
|
|
2053
2130
|
provider: import_zod.z.string().min(1, "provider is required"),
|
|
2054
|
-
settings: import_zod.z.record(import_zod.z.unknown()).optional(),
|
|
2055
2131
|
judge_target: import_zod.z.string().optional(),
|
|
2056
2132
|
workers: import_zod.z.number().int().min(1).optional()
|
|
2057
|
-
});
|
|
2133
|
+
}).passthrough();
|
|
2058
2134
|
var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
|
|
2059
2135
|
function normalizeAzureApiVersion(value) {
|
|
2060
2136
|
if (!value) {
|
|
@@ -2067,11 +2143,43 @@ function normalizeAzureApiVersion(value) {
|
|
|
2067
2143
|
const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
|
|
2068
2144
|
return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
|
|
2069
2145
|
}
|
|
2146
|
+
function resolveRetryConfig(target) {
|
|
2147
|
+
const maxRetries = resolveOptionalNumber(
|
|
2148
|
+
target.max_retries ?? target.maxRetries,
|
|
2149
|
+
`${target.name} max retries`
|
|
2150
|
+
);
|
|
2151
|
+
const initialDelayMs = resolveOptionalNumber(
|
|
2152
|
+
target.retry_initial_delay_ms ?? target.retryInitialDelayMs,
|
|
2153
|
+
`${target.name} retry initial delay`
|
|
2154
|
+
);
|
|
2155
|
+
const maxDelayMs = resolveOptionalNumber(
|
|
2156
|
+
target.retry_max_delay_ms ?? target.retryMaxDelayMs,
|
|
2157
|
+
`${target.name} retry max delay`
|
|
2158
|
+
);
|
|
2159
|
+
const backoffFactor = resolveOptionalNumber(
|
|
2160
|
+
target.retry_backoff_factor ?? target.retryBackoffFactor,
|
|
2161
|
+
`${target.name} retry backoff factor`
|
|
2162
|
+
);
|
|
2163
|
+
const retryableStatusCodes = resolveOptionalNumberArray(
|
|
2164
|
+
target.retry_status_codes ?? target.retryStatusCodes,
|
|
2165
|
+
`${target.name} retry status codes`
|
|
2166
|
+
);
|
|
2167
|
+
if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
|
|
2168
|
+
return void 0;
|
|
2169
|
+
}
|
|
2170
|
+
return {
|
|
2171
|
+
maxRetries,
|
|
2172
|
+
initialDelayMs,
|
|
2173
|
+
maxDelayMs,
|
|
2174
|
+
backoffFactor,
|
|
2175
|
+
retryableStatusCodes
|
|
2176
|
+
};
|
|
2177
|
+
}
|
|
2070
2178
|
function resolveTargetDefinition(definition, env = process.env) {
|
|
2071
2179
|
const parsed = BASE_TARGET_SCHEMA.parse(definition);
|
|
2072
2180
|
const provider = parsed.provider.toLowerCase();
|
|
2073
2181
|
const providerBatching = resolveOptionalBoolean(
|
|
2074
|
-
parsed.
|
|
2182
|
+
parsed.provider_batching ?? parsed.providerBatching
|
|
2075
2183
|
);
|
|
2076
2184
|
switch (provider) {
|
|
2077
2185
|
case "azure":
|
|
@@ -2147,13 +2255,12 @@ function resolveTargetDefinition(definition, env = process.env) {
|
|
|
2147
2255
|
}
|
|
2148
2256
|
}
|
|
2149
2257
|
function resolveAzureConfig(target, env) {
|
|
2150
|
-
const
|
|
2151
|
-
const
|
|
2152
|
-
const
|
|
2153
|
-
const
|
|
2154
|
-
const
|
|
2155
|
-
const
|
|
2156
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
2258
|
+
const endpointSource = target.endpoint ?? target.resource ?? target.resourceName;
|
|
2259
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
2260
|
+
const deploymentSource = target.deployment ?? target.deploymentName ?? target.model;
|
|
2261
|
+
const versionSource = target.version ?? target.api_version;
|
|
2262
|
+
const temperatureSource = target.temperature;
|
|
2263
|
+
const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
|
|
2157
2264
|
const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
|
|
2158
2265
|
const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
|
|
2159
2266
|
const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
|
|
@@ -2165,58 +2272,61 @@ function resolveAzureConfig(target, env) {
|
|
|
2165
2272
|
maxTokensSource,
|
|
2166
2273
|
`${target.name} max output tokens`
|
|
2167
2274
|
);
|
|
2275
|
+
const retry = resolveRetryConfig(target);
|
|
2168
2276
|
return {
|
|
2169
2277
|
resourceName,
|
|
2170
2278
|
deploymentName,
|
|
2171
2279
|
apiKey,
|
|
2172
2280
|
version,
|
|
2173
2281
|
temperature,
|
|
2174
|
-
maxOutputTokens
|
|
2282
|
+
maxOutputTokens,
|
|
2283
|
+
retry
|
|
2175
2284
|
};
|
|
2176
2285
|
}
|
|
2177
2286
|
function resolveAnthropicConfig(target, env) {
|
|
2178
|
-
const
|
|
2179
|
-
const
|
|
2180
|
-
const
|
|
2181
|
-
const
|
|
2182
|
-
const
|
|
2183
|
-
const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
|
|
2287
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
2288
|
+
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
2289
|
+
const temperatureSource = target.temperature;
|
|
2290
|
+
const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
|
|
2291
|
+
const thinkingBudgetSource = target.thinking_budget ?? target.thinkingBudget;
|
|
2184
2292
|
const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
|
|
2185
2293
|
const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
|
|
2294
|
+
const retry = resolveRetryConfig(target);
|
|
2186
2295
|
return {
|
|
2187
2296
|
apiKey,
|
|
2188
2297
|
model,
|
|
2189
2298
|
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
2190
2299
|
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
2191
|
-
thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
|
|
2300
|
+
thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`),
|
|
2301
|
+
retry
|
|
2192
2302
|
};
|
|
2193
2303
|
}
|
|
2194
2304
|
function resolveGeminiConfig(target, env) {
|
|
2195
|
-
const
|
|
2196
|
-
const
|
|
2197
|
-
const
|
|
2198
|
-
const
|
|
2199
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
2305
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
2306
|
+
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
2307
|
+
const temperatureSource = target.temperature;
|
|
2308
|
+
const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
|
|
2200
2309
|
const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
|
|
2201
2310
|
const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
|
|
2202
2311
|
allowLiteral: true,
|
|
2203
2312
|
optionalEnv: true
|
|
2204
2313
|
}) ?? "gemini-2.5-flash";
|
|
2314
|
+
const retry = resolveRetryConfig(target);
|
|
2205
2315
|
return {
|
|
2206
2316
|
apiKey,
|
|
2207
2317
|
model,
|
|
2208
2318
|
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
2209
|
-
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
|
|
2319
|
+
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
2320
|
+
retry
|
|
2210
2321
|
};
|
|
2211
2322
|
}
|
|
2212
2323
|
function resolveCodexConfig(target, env) {
|
|
2213
|
-
const
|
|
2214
|
-
const
|
|
2215
|
-
const
|
|
2216
|
-
const
|
|
2217
|
-
const
|
|
2218
|
-
const
|
|
2219
|
-
const logFormatSource = settings.log_format ?? settings.logFormat ?? settings.log_output_format ?? settings.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
2324
|
+
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
2325
|
+
const argsSource = target.args ?? target.arguments;
|
|
2326
|
+
const cwdSource = target.cwd;
|
|
2327
|
+
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
2328
|
+
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
2329
|
+
const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
2220
2330
|
const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
|
|
2221
2331
|
allowLiteral: true,
|
|
2222
2332
|
optionalEnv: true
|
|
@@ -2255,21 +2365,19 @@ function normalizeCodexLogFormat(value) {
|
|
|
2255
2365
|
throw new Error("codex log format must be 'summary' or 'json'");
|
|
2256
2366
|
}
|
|
2257
2367
|
function resolveMockConfig(target) {
|
|
2258
|
-
const
|
|
2259
|
-
const response = typeof settings.response === "string" ? settings.response : void 0;
|
|
2368
|
+
const response = typeof target.response === "string" ? target.response : void 0;
|
|
2260
2369
|
return { response };
|
|
2261
2370
|
}
|
|
2262
2371
|
function resolveVSCodeConfig(target, env, insiders) {
|
|
2263
|
-
const
|
|
2264
|
-
const workspaceTemplateEnvVar = resolveOptionalLiteralString(settings.workspace_template ?? settings.workspaceTemplate);
|
|
2372
|
+
const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template ?? target.workspaceTemplate);
|
|
2265
2373
|
const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
|
|
2266
2374
|
allowLiteral: false,
|
|
2267
2375
|
optionalEnv: true
|
|
2268
2376
|
}) : void 0;
|
|
2269
|
-
const commandSource =
|
|
2270
|
-
const waitSource =
|
|
2271
|
-
const dryRunSource =
|
|
2272
|
-
const subagentRootSource =
|
|
2377
|
+
const commandSource = target.vscode_cmd ?? target.command;
|
|
2378
|
+
const waitSource = target.wait;
|
|
2379
|
+
const dryRunSource = target.dry_run ?? target.dryRun;
|
|
2380
|
+
const subagentRootSource = target.subagent_root ?? target.subagentRoot;
|
|
2273
2381
|
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
2274
2382
|
const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
|
|
2275
2383
|
return {
|
|
@@ -2284,18 +2392,16 @@ function resolveVSCodeConfig(target, env, insiders) {
|
|
|
2284
2392
|
};
|
|
2285
2393
|
}
|
|
2286
2394
|
function resolveCliConfig(target, env) {
|
|
2287
|
-
const
|
|
2288
|
-
const commandTemplateSource = settings.command_template ?? settings.commandTemplate;
|
|
2395
|
+
const commandTemplateSource = target.command_template ?? target.commandTemplate;
|
|
2289
2396
|
const filesFormat = resolveOptionalLiteralString(
|
|
2290
|
-
|
|
2397
|
+
target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
|
|
2291
2398
|
);
|
|
2292
|
-
const cwd = resolveOptionalString(
|
|
2399
|
+
const cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
|
|
2293
2400
|
allowLiteral: true,
|
|
2294
2401
|
optionalEnv: true
|
|
2295
2402
|
});
|
|
2296
|
-
const
|
|
2297
|
-
const
|
|
2298
|
-
const healthcheck = resolveCliHealthcheck(settings.healthcheck, env, target.name);
|
|
2403
|
+
const timeoutMs = resolveTimeoutMs(target.timeout_seconds ?? target.timeoutSeconds, `${target.name} timeout`);
|
|
2404
|
+
const healthcheck = resolveCliHealthcheck(target.healthcheck, env, target.name);
|
|
2299
2405
|
const commandTemplate = resolveString(
|
|
2300
2406
|
commandTemplateSource,
|
|
2301
2407
|
env,
|
|
@@ -2307,29 +2413,10 @@ function resolveCliConfig(target, env) {
|
|
|
2307
2413
|
commandTemplate,
|
|
2308
2414
|
filesFormat,
|
|
2309
2415
|
cwd,
|
|
2310
|
-
env: envOverrides,
|
|
2311
2416
|
timeoutMs,
|
|
2312
2417
|
healthcheck
|
|
2313
2418
|
};
|
|
2314
2419
|
}
|
|
2315
|
-
function resolveEnvOverrides(source, env, targetName) {
|
|
2316
|
-
if (source === void 0 || source === null) {
|
|
2317
|
-
return void 0;
|
|
2318
|
-
}
|
|
2319
|
-
if (typeof source !== "object" || Array.isArray(source)) {
|
|
2320
|
-
throw new Error(`${targetName} env overrides must be an object map of strings`);
|
|
2321
|
-
}
|
|
2322
|
-
const entries = Object.entries(source);
|
|
2323
|
-
const resolved = {};
|
|
2324
|
-
for (const [key, value] of entries) {
|
|
2325
|
-
if (typeof value !== "string") {
|
|
2326
|
-
throw new Error(`${targetName} env override '${key}' must be a string`);
|
|
2327
|
-
}
|
|
2328
|
-
const resolvedValue = resolveString(value, env, `${targetName} env override '${key}'`);
|
|
2329
|
-
resolved[key] = resolvedValue;
|
|
2330
|
-
}
|
|
2331
|
-
return Object.keys(resolved).length > 0 ? resolved : void 0;
|
|
2332
|
-
}
|
|
2333
2420
|
function resolveTimeoutMs(source, description) {
|
|
2334
2421
|
const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
|
|
2335
2422
|
if (seconds === void 0) {
|
|
@@ -2525,6 +2612,26 @@ function resolveOptionalStringArray(source, env, description) {
|
|
|
2525
2612
|
}
|
|
2526
2613
|
return resolved.length > 0 ? resolved : void 0;
|
|
2527
2614
|
}
|
|
2615
|
+
function resolveOptionalNumberArray(source, description) {
|
|
2616
|
+
if (source === void 0 || source === null) {
|
|
2617
|
+
return void 0;
|
|
2618
|
+
}
|
|
2619
|
+
if (!Array.isArray(source)) {
|
|
2620
|
+
throw new Error(`${description} must be an array of numbers`);
|
|
2621
|
+
}
|
|
2622
|
+
if (source.length === 0) {
|
|
2623
|
+
return void 0;
|
|
2624
|
+
}
|
|
2625
|
+
const resolved = [];
|
|
2626
|
+
for (let i = 0; i < source.length; i++) {
|
|
2627
|
+
const item = source[i];
|
|
2628
|
+
if (typeof item !== "number" || !Number.isFinite(item)) {
|
|
2629
|
+
throw new Error(`${description}[${i}] must be a number`);
|
|
2630
|
+
}
|
|
2631
|
+
resolved.push(item);
|
|
2632
|
+
}
|
|
2633
|
+
return resolved.length > 0 ? resolved : void 0;
|
|
2634
|
+
}
|
|
2528
2635
|
|
|
2529
2636
|
// src/evaluation/providers/vscode.ts
|
|
2530
2637
|
var import_node_path6 = __toESM(require("path"), 1);
|
|
@@ -2784,7 +2891,7 @@ var AGENT_PROVIDER_KINDS = [
|
|
|
2784
2891
|
"vscode",
|
|
2785
2892
|
"vscode-insiders"
|
|
2786
2893
|
];
|
|
2787
|
-
var TARGETS_SCHEMA_V2 = "agentv-targets-v2.
|
|
2894
|
+
var TARGETS_SCHEMA_V2 = "agentv-targets-v2.2";
|
|
2788
2895
|
function isAgentProvider(provider) {
|
|
2789
2896
|
return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
|
|
2790
2897
|
}
|
|
@@ -2827,20 +2934,13 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
2827
2934
|
}
|
|
2828
2935
|
const name = value.name;
|
|
2829
2936
|
const provider = value.provider;
|
|
2830
|
-
const settings = value.settings;
|
|
2831
|
-
const judgeTarget = value.judge_target;
|
|
2832
2937
|
if (typeof name !== "string" || name.trim().length === 0) {
|
|
2833
2938
|
throw new Error(`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`);
|
|
2834
2939
|
}
|
|
2835
2940
|
if (typeof provider !== "string" || provider.trim().length === 0) {
|
|
2836
2941
|
throw new Error(`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider'`);
|
|
2837
2942
|
}
|
|
2838
|
-
return
|
|
2839
|
-
name,
|
|
2840
|
-
provider,
|
|
2841
|
-
settings: isRecord(settings) ? settings : void 0,
|
|
2842
|
-
judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
|
|
2843
|
-
};
|
|
2943
|
+
return value;
|
|
2844
2944
|
}
|
|
2845
2945
|
async function fileExists3(filePath) {
|
|
2846
2946
|
try {
|
|
@@ -3481,10 +3581,11 @@ async function runEvaluation(options) {
|
|
|
3481
3581
|
await onProgress({
|
|
3482
3582
|
workerId,
|
|
3483
3583
|
evalId: evalCase.id,
|
|
3484
|
-
status: "completed",
|
|
3584
|
+
status: result.error ? "failed" : "completed",
|
|
3485
3585
|
startedAt: 0,
|
|
3486
3586
|
// Not used for completed status
|
|
3487
|
-
completedAt: Date.now()
|
|
3587
|
+
completedAt: Date.now(),
|
|
3588
|
+
error: result.error
|
|
3488
3589
|
});
|
|
3489
3590
|
}
|
|
3490
3591
|
if (onResult) {
|
|
@@ -4022,7 +4123,8 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
4022
4123
|
target: targetName,
|
|
4023
4124
|
timestamp: timestamp.toISOString(),
|
|
4024
4125
|
raw_aspects: [],
|
|
4025
|
-
raw_request: rawRequest
|
|
4126
|
+
raw_request: rawRequest,
|
|
4127
|
+
error: message
|
|
4026
4128
|
};
|
|
4027
4129
|
}
|
|
4028
4130
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|