@agentv/core 0.7.4 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-L6RCDZ4Z.js → chunk-SNTZFB24.js} +102 -68
- package/dist/chunk-SNTZFB24.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +32 -57
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +31 -55
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +211 -107
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +81 -3
- package/dist/index.d.ts +81 -3
- package/dist/index.js +112 -41
- package/dist/index.js.map +1 -1
- package/package.json +1 -2
- package/dist/chunk-L6RCDZ4Z.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -52,6 +52,7 @@ __export(index_exports, {
|
|
|
52
52
|
isTestMessageRole: () => isTestMessageRole,
|
|
53
53
|
listTargetNames: () => listTargetNames,
|
|
54
54
|
loadEvalCases: () => loadEvalCases,
|
|
55
|
+
normalizeLineEndings: () => normalizeLineEndings,
|
|
55
56
|
readTargetDefinitions: () => readTargetDefinitions,
|
|
56
57
|
readTextFile: () => readTextFile,
|
|
57
58
|
resolveAndCreateProvider: () => resolveAndCreateProvider,
|
|
@@ -133,9 +134,12 @@ async function fileExists(filePath) {
|
|
|
133
134
|
return false;
|
|
134
135
|
}
|
|
135
136
|
}
|
|
137
|
+
function normalizeLineEndings(content) {
|
|
138
|
+
return content.replace(/\r\n/g, "\n");
|
|
139
|
+
}
|
|
136
140
|
async function readTextFile(filePath) {
|
|
137
141
|
const content = await (0, import_promises.readFile)(filePath, "utf8");
|
|
138
|
-
return content
|
|
142
|
+
return normalizeLineEndings(content);
|
|
139
143
|
}
|
|
140
144
|
async function findGitRoot(startPath) {
|
|
141
145
|
let currentDir = import_node_path.default.dirname(import_node_path.default.resolve(startPath));
|
|
@@ -824,6 +828,67 @@ function ensureChatResponse(result) {
|
|
|
824
828
|
}
|
|
825
829
|
return result;
|
|
826
830
|
}
|
|
831
|
+
function isRetryableError(error, retryableStatusCodes) {
|
|
832
|
+
if (!error || typeof error !== "object") {
|
|
833
|
+
return false;
|
|
834
|
+
}
|
|
835
|
+
if ("status" in error && typeof error.status === "number") {
|
|
836
|
+
return retryableStatusCodes.includes(error.status);
|
|
837
|
+
}
|
|
838
|
+
if ("message" in error && typeof error.message === "string") {
|
|
839
|
+
const match = error.message.match(/HTTP (\d{3})/);
|
|
840
|
+
if (match) {
|
|
841
|
+
const status = Number.parseInt(match[1], 10);
|
|
842
|
+
return retryableStatusCodes.includes(status);
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
if ("name" in error && error.name === "AxAIServiceNetworkError") {
|
|
846
|
+
return true;
|
|
847
|
+
}
|
|
848
|
+
return false;
|
|
849
|
+
}
|
|
850
|
+
function calculateRetryDelay(attempt, config) {
|
|
851
|
+
const delay = Math.min(
|
|
852
|
+
config.maxDelayMs,
|
|
853
|
+
config.initialDelayMs * config.backoffFactor ** attempt
|
|
854
|
+
);
|
|
855
|
+
return delay * (0.75 + Math.random() * 0.5);
|
|
856
|
+
}
|
|
857
|
+
async function sleep(ms) {
|
|
858
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
859
|
+
}
|
|
860
|
+
async function withRetry(fn, retryConfig, signal) {
|
|
861
|
+
const config = {
|
|
862
|
+
maxRetries: retryConfig?.maxRetries ?? 3,
|
|
863
|
+
initialDelayMs: retryConfig?.initialDelayMs ?? 1e3,
|
|
864
|
+
maxDelayMs: retryConfig?.maxDelayMs ?? 6e4,
|
|
865
|
+
backoffFactor: retryConfig?.backoffFactor ?? 2,
|
|
866
|
+
retryableStatusCodes: retryConfig?.retryableStatusCodes ?? [500, 408, 429, 502, 503, 504]
|
|
867
|
+
};
|
|
868
|
+
let lastError;
|
|
869
|
+
for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
|
|
870
|
+
if (signal?.aborted) {
|
|
871
|
+
throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
|
|
872
|
+
}
|
|
873
|
+
try {
|
|
874
|
+
return await fn();
|
|
875
|
+
} catch (error) {
|
|
876
|
+
lastError = error;
|
|
877
|
+
if (attempt >= config.maxRetries) {
|
|
878
|
+
break;
|
|
879
|
+
}
|
|
880
|
+
if (!isRetryableError(error, config.retryableStatusCodes)) {
|
|
881
|
+
throw error;
|
|
882
|
+
}
|
|
883
|
+
const delay = calculateRetryDelay(attempt, config);
|
|
884
|
+
await sleep(delay);
|
|
885
|
+
if (signal?.aborted) {
|
|
886
|
+
throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
throw lastError;
|
|
891
|
+
}
|
|
827
892
|
var AzureProvider = class {
|
|
828
893
|
constructor(targetName, config) {
|
|
829
894
|
this.config = config;
|
|
@@ -833,6 +898,7 @@ var AzureProvider = class {
|
|
|
833
898
|
temperature: config.temperature,
|
|
834
899
|
maxOutputTokens: config.maxOutputTokens
|
|
835
900
|
};
|
|
901
|
+
this.retryConfig = config.retry;
|
|
836
902
|
this.ai = import_ax.AxAI.create({
|
|
837
903
|
name: "azure-openai",
|
|
838
904
|
apiKey: config.apiKey,
|
|
@@ -849,16 +915,21 @@ var AzureProvider = class {
|
|
|
849
915
|
targetName;
|
|
850
916
|
ai;
|
|
851
917
|
defaults;
|
|
918
|
+
retryConfig;
|
|
852
919
|
async invoke(request) {
|
|
853
920
|
const chatPrompt = buildChatPrompt(request);
|
|
854
921
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
855
|
-
const response = await
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
922
|
+
const response = await withRetry(
|
|
923
|
+
async () => await this.ai.chat(
|
|
924
|
+
{
|
|
925
|
+
chatPrompt,
|
|
926
|
+
model: this.config.deploymentName,
|
|
927
|
+
...modelConfig ? { modelConfig } : {}
|
|
928
|
+
},
|
|
929
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
930
|
+
),
|
|
931
|
+
this.retryConfig,
|
|
932
|
+
request.signal
|
|
862
933
|
);
|
|
863
934
|
return mapResponse(ensureChatResponse(response));
|
|
864
935
|
}
|
|
@@ -876,6 +947,7 @@ var AnthropicProvider = class {
|
|
|
876
947
|
maxOutputTokens: config.maxOutputTokens,
|
|
877
948
|
thinkingBudget: config.thinkingBudget
|
|
878
949
|
};
|
|
950
|
+
this.retryConfig = config.retry;
|
|
879
951
|
this.ai = import_ax.AxAI.create({
|
|
880
952
|
name: "anthropic",
|
|
881
953
|
apiKey: config.apiKey
|
|
@@ -886,16 +958,21 @@ var AnthropicProvider = class {
|
|
|
886
958
|
targetName;
|
|
887
959
|
ai;
|
|
888
960
|
defaults;
|
|
961
|
+
retryConfig;
|
|
889
962
|
async invoke(request) {
|
|
890
963
|
const chatPrompt = buildChatPrompt(request);
|
|
891
964
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
892
|
-
const response = await
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
965
|
+
const response = await withRetry(
|
|
966
|
+
async () => await this.ai.chat(
|
|
967
|
+
{
|
|
968
|
+
chatPrompt,
|
|
969
|
+
model: this.config.model,
|
|
970
|
+
...modelConfig ? { modelConfig } : {}
|
|
971
|
+
},
|
|
972
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
973
|
+
),
|
|
974
|
+
this.retryConfig,
|
|
975
|
+
request.signal
|
|
899
976
|
);
|
|
900
977
|
return mapResponse(ensureChatResponse(response));
|
|
901
978
|
}
|
|
@@ -912,6 +989,7 @@ var GeminiProvider = class {
|
|
|
912
989
|
temperature: config.temperature,
|
|
913
990
|
maxOutputTokens: config.maxOutputTokens
|
|
914
991
|
};
|
|
992
|
+
this.retryConfig = config.retry;
|
|
915
993
|
this.ai = import_ax.AxAI.create({
|
|
916
994
|
name: "google-gemini",
|
|
917
995
|
apiKey: config.apiKey
|
|
@@ -922,16 +1000,21 @@ var GeminiProvider = class {
|
|
|
922
1000
|
targetName;
|
|
923
1001
|
ai;
|
|
924
1002
|
defaults;
|
|
1003
|
+
retryConfig;
|
|
925
1004
|
async invoke(request) {
|
|
926
1005
|
const chatPrompt = buildChatPrompt(request);
|
|
927
1006
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
928
|
-
const response = await
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
1007
|
+
const response = await withRetry(
|
|
1008
|
+
async () => await this.ai.chat(
|
|
1009
|
+
{
|
|
1010
|
+
chatPrompt,
|
|
1011
|
+
model: this.config.model,
|
|
1012
|
+
...modelConfig ? { modelConfig } : {}
|
|
1013
|
+
},
|
|
1014
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
1015
|
+
),
|
|
1016
|
+
this.retryConfig,
|
|
1017
|
+
request.signal
|
|
935
1018
|
);
|
|
936
1019
|
return mapResponse(ensureChatResponse(response));
|
|
937
1020
|
}
|
|
@@ -959,7 +1042,6 @@ async function defaultCommandRunner(command, options) {
|
|
|
959
1042
|
};
|
|
960
1043
|
try {
|
|
961
1044
|
const { stdout, stderr } = await execAsync(command, execOptions);
|
|
962
|
-
console.error(`[CLI DEBUG] SUCCESS - stdout: ${stdout.length} bytes, stderr: ${stderr.length} bytes`);
|
|
963
1045
|
return {
|
|
964
1046
|
stdout,
|
|
965
1047
|
stderr,
|
|
@@ -970,8 +1052,6 @@ async function defaultCommandRunner(command, options) {
|
|
|
970
1052
|
};
|
|
971
1053
|
} catch (error) {
|
|
972
1054
|
const execError = error;
|
|
973
|
-
console.error(`[CLI DEBUG] ERROR - code: ${execError.code}, message: ${execError.message}`);
|
|
974
|
-
console.error(`[CLI DEBUG] stdout: ${execError.stdout?.length ?? 0} bytes, stderr: ${execError.stderr?.length ?? 0} bytes`);
|
|
975
1055
|
return {
|
|
976
1056
|
stdout: execError.stdout ?? "",
|
|
977
1057
|
stderr: execError.stderr ?? "",
|
|
@@ -1004,10 +1084,9 @@ var CliProvider = class {
|
|
|
1004
1084
|
const outputFilePath = generateOutputFilePath(request.evalCaseId);
|
|
1005
1085
|
const templateValues = buildTemplateValues(request, this.config, outputFilePath);
|
|
1006
1086
|
const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
|
|
1007
|
-
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
1008
1087
|
const result = await this.runCommand(renderedCommand, {
|
|
1009
1088
|
cwd: this.config.cwd,
|
|
1010
|
-
env,
|
|
1089
|
+
env: process.env,
|
|
1011
1090
|
timeoutMs: this.config.timeoutMs,
|
|
1012
1091
|
signal: request.signal
|
|
1013
1092
|
});
|
|
@@ -1039,7 +1118,7 @@ var CliProvider = class {
|
|
|
1039
1118
|
}
|
|
1040
1119
|
async readAndCleanupOutputFile(filePath) {
|
|
1041
1120
|
try {
|
|
1042
|
-
const content = await
|
|
1121
|
+
const content = await readTextFile(filePath);
|
|
1043
1122
|
return content;
|
|
1044
1123
|
} catch (error) {
|
|
1045
1124
|
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
@@ -1096,10 +1175,9 @@ var CliProvider = class {
|
|
|
1096
1175
|
generateOutputFilePath("healthcheck")
|
|
1097
1176
|
)
|
|
1098
1177
|
);
|
|
1099
|
-
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
1100
1178
|
const result = await this.runCommand(renderedCommand, {
|
|
1101
1179
|
cwd: healthcheck.cwd ?? this.config.cwd,
|
|
1102
|
-
env,
|
|
1180
|
+
env: process.env,
|
|
1103
1181
|
timeoutMs,
|
|
1104
1182
|
signal
|
|
1105
1183
|
});
|
|
@@ -2050,10 +2128,9 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID
|
|
|
2050
2128
|
var BASE_TARGET_SCHEMA = import_zod.z.object({
|
|
2051
2129
|
name: import_zod.z.string().min(1, "target name is required"),
|
|
2052
2130
|
provider: import_zod.z.string().min(1, "provider is required"),
|
|
2053
|
-
settings: import_zod.z.record(import_zod.z.unknown()).optional(),
|
|
2054
2131
|
judge_target: import_zod.z.string().optional(),
|
|
2055
2132
|
workers: import_zod.z.number().int().min(1).optional()
|
|
2056
|
-
});
|
|
2133
|
+
}).passthrough();
|
|
2057
2134
|
var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
|
|
2058
2135
|
function normalizeAzureApiVersion(value) {
|
|
2059
2136
|
if (!value) {
|
|
@@ -2066,11 +2143,43 @@ function normalizeAzureApiVersion(value) {
|
|
|
2066
2143
|
const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
|
|
2067
2144
|
return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
|
|
2068
2145
|
}
|
|
2146
|
+
function resolveRetryConfig(target) {
|
|
2147
|
+
const maxRetries = resolveOptionalNumber(
|
|
2148
|
+
target.max_retries ?? target.maxRetries,
|
|
2149
|
+
`${target.name} max retries`
|
|
2150
|
+
);
|
|
2151
|
+
const initialDelayMs = resolveOptionalNumber(
|
|
2152
|
+
target.retry_initial_delay_ms ?? target.retryInitialDelayMs,
|
|
2153
|
+
`${target.name} retry initial delay`
|
|
2154
|
+
);
|
|
2155
|
+
const maxDelayMs = resolveOptionalNumber(
|
|
2156
|
+
target.retry_max_delay_ms ?? target.retryMaxDelayMs,
|
|
2157
|
+
`${target.name} retry max delay`
|
|
2158
|
+
);
|
|
2159
|
+
const backoffFactor = resolveOptionalNumber(
|
|
2160
|
+
target.retry_backoff_factor ?? target.retryBackoffFactor,
|
|
2161
|
+
`${target.name} retry backoff factor`
|
|
2162
|
+
);
|
|
2163
|
+
const retryableStatusCodes = resolveOptionalNumberArray(
|
|
2164
|
+
target.retry_status_codes ?? target.retryStatusCodes,
|
|
2165
|
+
`${target.name} retry status codes`
|
|
2166
|
+
);
|
|
2167
|
+
if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
|
|
2168
|
+
return void 0;
|
|
2169
|
+
}
|
|
2170
|
+
return {
|
|
2171
|
+
maxRetries,
|
|
2172
|
+
initialDelayMs,
|
|
2173
|
+
maxDelayMs,
|
|
2174
|
+
backoffFactor,
|
|
2175
|
+
retryableStatusCodes
|
|
2176
|
+
};
|
|
2177
|
+
}
|
|
2069
2178
|
function resolveTargetDefinition(definition, env = process.env) {
|
|
2070
2179
|
const parsed = BASE_TARGET_SCHEMA.parse(definition);
|
|
2071
2180
|
const provider = parsed.provider.toLowerCase();
|
|
2072
2181
|
const providerBatching = resolveOptionalBoolean(
|
|
2073
|
-
parsed.
|
|
2182
|
+
parsed.provider_batching ?? parsed.providerBatching
|
|
2074
2183
|
);
|
|
2075
2184
|
switch (provider) {
|
|
2076
2185
|
case "azure":
|
|
@@ -2146,13 +2255,12 @@ function resolveTargetDefinition(definition, env = process.env) {
|
|
|
2146
2255
|
}
|
|
2147
2256
|
}
|
|
2148
2257
|
function resolveAzureConfig(target, env) {
|
|
2149
|
-
const
|
|
2150
|
-
const
|
|
2151
|
-
const
|
|
2152
|
-
const
|
|
2153
|
-
const
|
|
2154
|
-
const
|
|
2155
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
2258
|
+
const endpointSource = target.endpoint ?? target.resource ?? target.resourceName;
|
|
2259
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
2260
|
+
const deploymentSource = target.deployment ?? target.deploymentName ?? target.model;
|
|
2261
|
+
const versionSource = target.version ?? target.api_version;
|
|
2262
|
+
const temperatureSource = target.temperature;
|
|
2263
|
+
const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
|
|
2156
2264
|
const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
|
|
2157
2265
|
const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
|
|
2158
2266
|
const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
|
|
@@ -2164,58 +2272,61 @@ function resolveAzureConfig(target, env) {
|
|
|
2164
2272
|
maxTokensSource,
|
|
2165
2273
|
`${target.name} max output tokens`
|
|
2166
2274
|
);
|
|
2275
|
+
const retry = resolveRetryConfig(target);
|
|
2167
2276
|
return {
|
|
2168
2277
|
resourceName,
|
|
2169
2278
|
deploymentName,
|
|
2170
2279
|
apiKey,
|
|
2171
2280
|
version,
|
|
2172
2281
|
temperature,
|
|
2173
|
-
maxOutputTokens
|
|
2282
|
+
maxOutputTokens,
|
|
2283
|
+
retry
|
|
2174
2284
|
};
|
|
2175
2285
|
}
|
|
2176
2286
|
function resolveAnthropicConfig(target, env) {
|
|
2177
|
-
const
|
|
2178
|
-
const
|
|
2179
|
-
const
|
|
2180
|
-
const
|
|
2181
|
-
const
|
|
2182
|
-
const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
|
|
2287
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
2288
|
+
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
2289
|
+
const temperatureSource = target.temperature;
|
|
2290
|
+
const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
|
|
2291
|
+
const thinkingBudgetSource = target.thinking_budget ?? target.thinkingBudget;
|
|
2183
2292
|
const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
|
|
2184
2293
|
const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
|
|
2294
|
+
const retry = resolveRetryConfig(target);
|
|
2185
2295
|
return {
|
|
2186
2296
|
apiKey,
|
|
2187
2297
|
model,
|
|
2188
2298
|
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
2189
2299
|
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
2190
|
-
thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
|
|
2300
|
+
thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`),
|
|
2301
|
+
retry
|
|
2191
2302
|
};
|
|
2192
2303
|
}
|
|
2193
2304
|
function resolveGeminiConfig(target, env) {
|
|
2194
|
-
const
|
|
2195
|
-
const
|
|
2196
|
-
const
|
|
2197
|
-
const
|
|
2198
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
2305
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
2306
|
+
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
2307
|
+
const temperatureSource = target.temperature;
|
|
2308
|
+
const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
|
|
2199
2309
|
const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
|
|
2200
2310
|
const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
|
|
2201
2311
|
allowLiteral: true,
|
|
2202
2312
|
optionalEnv: true
|
|
2203
2313
|
}) ?? "gemini-2.5-flash";
|
|
2314
|
+
const retry = resolveRetryConfig(target);
|
|
2204
2315
|
return {
|
|
2205
2316
|
apiKey,
|
|
2206
2317
|
model,
|
|
2207
2318
|
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
2208
|
-
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
|
|
2319
|
+
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
2320
|
+
retry
|
|
2209
2321
|
};
|
|
2210
2322
|
}
|
|
2211
2323
|
function resolveCodexConfig(target, env) {
|
|
2212
|
-
const
|
|
2213
|
-
const
|
|
2214
|
-
const
|
|
2215
|
-
const
|
|
2216
|
-
const
|
|
2217
|
-
const
|
|
2218
|
-
const logFormatSource = settings.log_format ?? settings.logFormat ?? settings.log_output_format ?? settings.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
2324
|
+
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
2325
|
+
const argsSource = target.args ?? target.arguments;
|
|
2326
|
+
const cwdSource = target.cwd;
|
|
2327
|
+
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
2328
|
+
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
2329
|
+
const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
2219
2330
|
const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
|
|
2220
2331
|
allowLiteral: true,
|
|
2221
2332
|
optionalEnv: true
|
|
@@ -2254,21 +2365,19 @@ function normalizeCodexLogFormat(value) {
|
|
|
2254
2365
|
throw new Error("codex log format must be 'summary' or 'json'");
|
|
2255
2366
|
}
|
|
2256
2367
|
function resolveMockConfig(target) {
|
|
2257
|
-
const
|
|
2258
|
-
const response = typeof settings.response === "string" ? settings.response : void 0;
|
|
2368
|
+
const response = typeof target.response === "string" ? target.response : void 0;
|
|
2259
2369
|
return { response };
|
|
2260
2370
|
}
|
|
2261
2371
|
function resolveVSCodeConfig(target, env, insiders) {
|
|
2262
|
-
const
|
|
2263
|
-
const workspaceTemplateEnvVar = resolveOptionalLiteralString(settings.workspace_template ?? settings.workspaceTemplate);
|
|
2372
|
+
const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template ?? target.workspaceTemplate);
|
|
2264
2373
|
const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
|
|
2265
2374
|
allowLiteral: false,
|
|
2266
2375
|
optionalEnv: true
|
|
2267
2376
|
}) : void 0;
|
|
2268
|
-
const commandSource =
|
|
2269
|
-
const waitSource =
|
|
2270
|
-
const dryRunSource =
|
|
2271
|
-
const subagentRootSource =
|
|
2377
|
+
const commandSource = target.vscode_cmd ?? target.command;
|
|
2378
|
+
const waitSource = target.wait;
|
|
2379
|
+
const dryRunSource = target.dry_run ?? target.dryRun;
|
|
2380
|
+
const subagentRootSource = target.subagent_root ?? target.subagentRoot;
|
|
2272
2381
|
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
2273
2382
|
const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
|
|
2274
2383
|
return {
|
|
@@ -2283,18 +2392,16 @@ function resolveVSCodeConfig(target, env, insiders) {
|
|
|
2283
2392
|
};
|
|
2284
2393
|
}
|
|
2285
2394
|
function resolveCliConfig(target, env) {
|
|
2286
|
-
const
|
|
2287
|
-
const commandTemplateSource = settings.command_template ?? settings.commandTemplate;
|
|
2395
|
+
const commandTemplateSource = target.command_template ?? target.commandTemplate;
|
|
2288
2396
|
const filesFormat = resolveOptionalLiteralString(
|
|
2289
|
-
|
|
2397
|
+
target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
|
|
2290
2398
|
);
|
|
2291
|
-
const cwd = resolveOptionalString(
|
|
2399
|
+
const cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
|
|
2292
2400
|
allowLiteral: true,
|
|
2293
2401
|
optionalEnv: true
|
|
2294
2402
|
});
|
|
2295
|
-
const
|
|
2296
|
-
const
|
|
2297
|
-
const healthcheck = resolveCliHealthcheck(settings.healthcheck, env, target.name);
|
|
2403
|
+
const timeoutMs = resolveTimeoutMs(target.timeout_seconds ?? target.timeoutSeconds, `${target.name} timeout`);
|
|
2404
|
+
const healthcheck = resolveCliHealthcheck(target.healthcheck, env, target.name);
|
|
2298
2405
|
const commandTemplate = resolveString(
|
|
2299
2406
|
commandTemplateSource,
|
|
2300
2407
|
env,
|
|
@@ -2306,29 +2413,10 @@ function resolveCliConfig(target, env) {
|
|
|
2306
2413
|
commandTemplate,
|
|
2307
2414
|
filesFormat,
|
|
2308
2415
|
cwd,
|
|
2309
|
-
env: envOverrides,
|
|
2310
2416
|
timeoutMs,
|
|
2311
2417
|
healthcheck
|
|
2312
2418
|
};
|
|
2313
2419
|
}
|
|
2314
|
-
function resolveEnvOverrides(source, env, targetName) {
|
|
2315
|
-
if (source === void 0 || source === null) {
|
|
2316
|
-
return void 0;
|
|
2317
|
-
}
|
|
2318
|
-
if (typeof source !== "object" || Array.isArray(source)) {
|
|
2319
|
-
throw new Error(`${targetName} env overrides must be an object map of strings`);
|
|
2320
|
-
}
|
|
2321
|
-
const entries = Object.entries(source);
|
|
2322
|
-
const resolved = {};
|
|
2323
|
-
for (const [key, value] of entries) {
|
|
2324
|
-
if (typeof value !== "string") {
|
|
2325
|
-
throw new Error(`${targetName} env override '${key}' must be a string`);
|
|
2326
|
-
}
|
|
2327
|
-
const resolvedValue = resolveString(value, env, `${targetName} env override '${key}'`);
|
|
2328
|
-
resolved[key] = resolvedValue;
|
|
2329
|
-
}
|
|
2330
|
-
return Object.keys(resolved).length > 0 ? resolved : void 0;
|
|
2331
|
-
}
|
|
2332
2420
|
function resolveTimeoutMs(source, description) {
|
|
2333
2421
|
const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
|
|
2334
2422
|
if (seconds === void 0) {
|
|
@@ -2524,6 +2612,26 @@ function resolveOptionalStringArray(source, env, description) {
|
|
|
2524
2612
|
}
|
|
2525
2613
|
return resolved.length > 0 ? resolved : void 0;
|
|
2526
2614
|
}
|
|
2615
|
+
function resolveOptionalNumberArray(source, description) {
|
|
2616
|
+
if (source === void 0 || source === null) {
|
|
2617
|
+
return void 0;
|
|
2618
|
+
}
|
|
2619
|
+
if (!Array.isArray(source)) {
|
|
2620
|
+
throw new Error(`${description} must be an array of numbers`);
|
|
2621
|
+
}
|
|
2622
|
+
if (source.length === 0) {
|
|
2623
|
+
return void 0;
|
|
2624
|
+
}
|
|
2625
|
+
const resolved = [];
|
|
2626
|
+
for (let i = 0; i < source.length; i++) {
|
|
2627
|
+
const item = source[i];
|
|
2628
|
+
if (typeof item !== "number" || !Number.isFinite(item)) {
|
|
2629
|
+
throw new Error(`${description}[${i}] must be a number`);
|
|
2630
|
+
}
|
|
2631
|
+
resolved.push(item);
|
|
2632
|
+
}
|
|
2633
|
+
return resolved.length > 0 ? resolved : void 0;
|
|
2634
|
+
}
|
|
2527
2635
|
|
|
2528
2636
|
// src/evaluation/providers/vscode.ts
|
|
2529
2637
|
var import_node_path6 = __toESM(require("path"), 1);
|
|
@@ -2783,7 +2891,7 @@ var AGENT_PROVIDER_KINDS = [
|
|
|
2783
2891
|
"vscode",
|
|
2784
2892
|
"vscode-insiders"
|
|
2785
2893
|
];
|
|
2786
|
-
var TARGETS_SCHEMA_V2 = "agentv-targets-v2.
|
|
2894
|
+
var TARGETS_SCHEMA_V2 = "agentv-targets-v2.2";
|
|
2787
2895
|
function isAgentProvider(provider) {
|
|
2788
2896
|
return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
|
|
2789
2897
|
}
|
|
@@ -2826,20 +2934,13 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
2826
2934
|
}
|
|
2827
2935
|
const name = value.name;
|
|
2828
2936
|
const provider = value.provider;
|
|
2829
|
-
const settings = value.settings;
|
|
2830
|
-
const judgeTarget = value.judge_target;
|
|
2831
2937
|
if (typeof name !== "string" || name.trim().length === 0) {
|
|
2832
2938
|
throw new Error(`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`);
|
|
2833
2939
|
}
|
|
2834
2940
|
if (typeof provider !== "string" || provider.trim().length === 0) {
|
|
2835
2941
|
throw new Error(`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider'`);
|
|
2836
2942
|
}
|
|
2837
|
-
return
|
|
2838
|
-
name,
|
|
2839
|
-
provider,
|
|
2840
|
-
settings: isRecord(settings) ? settings : void 0,
|
|
2841
|
-
judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
|
|
2842
|
-
};
|
|
2943
|
+
return value;
|
|
2843
2944
|
}
|
|
2844
2945
|
async function fileExists3(filePath) {
|
|
2845
2946
|
try {
|
|
@@ -3480,10 +3581,11 @@ async function runEvaluation(options) {
|
|
|
3480
3581
|
await onProgress({
|
|
3481
3582
|
workerId,
|
|
3482
3583
|
evalId: evalCase.id,
|
|
3483
|
-
status: "completed",
|
|
3584
|
+
status: result.error ? "failed" : "completed",
|
|
3484
3585
|
startedAt: 0,
|
|
3485
3586
|
// Not used for completed status
|
|
3486
|
-
completedAt: Date.now()
|
|
3587
|
+
completedAt: Date.now(),
|
|
3588
|
+
error: result.error
|
|
3487
3589
|
});
|
|
3488
3590
|
}
|
|
3489
3591
|
if (onResult) {
|
|
@@ -4021,7 +4123,8 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
4021
4123
|
target: targetName,
|
|
4022
4124
|
timestamp: timestamp.toISOString(),
|
|
4023
4125
|
raw_aspects: [],
|
|
4024
|
-
raw_request: rawRequest
|
|
4126
|
+
raw_request: rawRequest,
|
|
4127
|
+
error: message
|
|
4025
4128
|
};
|
|
4026
4129
|
}
|
|
4027
4130
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
@@ -4078,6 +4181,7 @@ function createAgentKernel() {
|
|
|
4078
4181
|
isTestMessageRole,
|
|
4079
4182
|
listTargetNames,
|
|
4080
4183
|
loadEvalCases,
|
|
4184
|
+
normalizeLineEndings,
|
|
4081
4185
|
readTargetDefinitions,
|
|
4082
4186
|
readTextFile,
|
|
4083
4187
|
resolveAndCreateProvider,
|