agentv 0.7.4 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -37
- package/dist/{chunk-R2OCC2OH.js → chunk-X2VVUCIB.js} +334 -208
- package/dist/chunk-X2VVUCIB.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/index.js +1 -1
- package/dist/templates/agentv/targets.yaml +35 -43
- package/package.json +2 -3
- package/dist/chunk-R2OCC2OH.js.map +0 -1
|
@@ -590,7 +590,7 @@ import fg from "fast-glob";
|
|
|
590
590
|
import { stat as stat3 } from "node:fs/promises";
|
|
591
591
|
import path15 from "node:path";
|
|
592
592
|
|
|
593
|
-
// ../../packages/core/dist/chunk-
|
|
593
|
+
// ../../packages/core/dist/chunk-SNTZFB24.js
|
|
594
594
|
import { constants } from "node:fs";
|
|
595
595
|
import { access, readFile } from "node:fs/promises";
|
|
596
596
|
import path from "node:path";
|
|
@@ -4636,7 +4636,7 @@ var coerce = {
|
|
|
4636
4636
|
};
|
|
4637
4637
|
var NEVER = INVALID;
|
|
4638
4638
|
|
|
4639
|
-
// ../../packages/core/dist/chunk-
|
|
4639
|
+
// ../../packages/core/dist/chunk-SNTZFB24.js
|
|
4640
4640
|
async function fileExists(filePath) {
|
|
4641
4641
|
try {
|
|
4642
4642
|
await access(filePath, constants.F_OK);
|
|
@@ -4645,9 +4645,12 @@ async function fileExists(filePath) {
|
|
|
4645
4645
|
return false;
|
|
4646
4646
|
}
|
|
4647
4647
|
}
|
|
4648
|
+
function normalizeLineEndings(content) {
|
|
4649
|
+
return content.replace(/\r\n/g, "\n");
|
|
4650
|
+
}
|
|
4648
4651
|
async function readTextFile(filePath) {
|
|
4649
4652
|
const content = await readFile(filePath, "utf8");
|
|
4650
|
-
return content
|
|
4653
|
+
return normalizeLineEndings(content);
|
|
4651
4654
|
}
|
|
4652
4655
|
async function findGitRoot(startPath) {
|
|
4653
4656
|
let currentDir = path.dirname(path.resolve(startPath));
|
|
@@ -4744,10 +4747,9 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID
|
|
|
4744
4747
|
var BASE_TARGET_SCHEMA = external_exports.object({
|
|
4745
4748
|
name: external_exports.string().min(1, "target name is required"),
|
|
4746
4749
|
provider: external_exports.string().min(1, "provider is required"),
|
|
4747
|
-
settings: external_exports.record(external_exports.unknown()).optional(),
|
|
4748
4750
|
judge_target: external_exports.string().optional(),
|
|
4749
4751
|
workers: external_exports.number().int().min(1).optional()
|
|
4750
|
-
});
|
|
4752
|
+
}).passthrough();
|
|
4751
4753
|
var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
|
|
4752
4754
|
function normalizeAzureApiVersion(value) {
|
|
4753
4755
|
if (!value) {
|
|
@@ -4760,11 +4762,43 @@ function normalizeAzureApiVersion(value) {
|
|
|
4760
4762
|
const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
|
|
4761
4763
|
return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
|
|
4762
4764
|
}
|
|
4765
|
+
function resolveRetryConfig(target) {
|
|
4766
|
+
const maxRetries = resolveOptionalNumber(
|
|
4767
|
+
target.max_retries ?? target.maxRetries,
|
|
4768
|
+
`${target.name} max retries`
|
|
4769
|
+
);
|
|
4770
|
+
const initialDelayMs = resolveOptionalNumber(
|
|
4771
|
+
target.retry_initial_delay_ms ?? target.retryInitialDelayMs,
|
|
4772
|
+
`${target.name} retry initial delay`
|
|
4773
|
+
);
|
|
4774
|
+
const maxDelayMs = resolveOptionalNumber(
|
|
4775
|
+
target.retry_max_delay_ms ?? target.retryMaxDelayMs,
|
|
4776
|
+
`${target.name} retry max delay`
|
|
4777
|
+
);
|
|
4778
|
+
const backoffFactor = resolveOptionalNumber(
|
|
4779
|
+
target.retry_backoff_factor ?? target.retryBackoffFactor,
|
|
4780
|
+
`${target.name} retry backoff factor`
|
|
4781
|
+
);
|
|
4782
|
+
const retryableStatusCodes = resolveOptionalNumberArray(
|
|
4783
|
+
target.retry_status_codes ?? target.retryStatusCodes,
|
|
4784
|
+
`${target.name} retry status codes`
|
|
4785
|
+
);
|
|
4786
|
+
if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
|
|
4787
|
+
return void 0;
|
|
4788
|
+
}
|
|
4789
|
+
return {
|
|
4790
|
+
maxRetries,
|
|
4791
|
+
initialDelayMs,
|
|
4792
|
+
maxDelayMs,
|
|
4793
|
+
backoffFactor,
|
|
4794
|
+
retryableStatusCodes
|
|
4795
|
+
};
|
|
4796
|
+
}
|
|
4763
4797
|
function resolveTargetDefinition(definition, env = process.env) {
|
|
4764
4798
|
const parsed = BASE_TARGET_SCHEMA.parse(definition);
|
|
4765
4799
|
const provider = parsed.provider.toLowerCase();
|
|
4766
4800
|
const providerBatching = resolveOptionalBoolean(
|
|
4767
|
-
parsed.
|
|
4801
|
+
parsed.provider_batching ?? parsed.providerBatching
|
|
4768
4802
|
);
|
|
4769
4803
|
switch (provider) {
|
|
4770
4804
|
case "azure":
|
|
@@ -4840,13 +4874,12 @@ function resolveTargetDefinition(definition, env = process.env) {
|
|
|
4840
4874
|
}
|
|
4841
4875
|
}
|
|
4842
4876
|
function resolveAzureConfig(target, env) {
|
|
4843
|
-
const
|
|
4844
|
-
const
|
|
4845
|
-
const
|
|
4846
|
-
const
|
|
4847
|
-
const
|
|
4848
|
-
const
|
|
4849
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
4877
|
+
const endpointSource = target.endpoint ?? target.resource ?? target.resourceName;
|
|
4878
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
4879
|
+
const deploymentSource = target.deployment ?? target.deploymentName ?? target.model;
|
|
4880
|
+
const versionSource = target.version ?? target.api_version;
|
|
4881
|
+
const temperatureSource = target.temperature;
|
|
4882
|
+
const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
|
|
4850
4883
|
const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
|
|
4851
4884
|
const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
|
|
4852
4885
|
const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
|
|
@@ -4858,58 +4891,61 @@ function resolveAzureConfig(target, env) {
|
|
|
4858
4891
|
maxTokensSource,
|
|
4859
4892
|
`${target.name} max output tokens`
|
|
4860
4893
|
);
|
|
4894
|
+
const retry = resolveRetryConfig(target);
|
|
4861
4895
|
return {
|
|
4862
4896
|
resourceName,
|
|
4863
4897
|
deploymentName,
|
|
4864
4898
|
apiKey,
|
|
4865
4899
|
version,
|
|
4866
4900
|
temperature,
|
|
4867
|
-
maxOutputTokens
|
|
4901
|
+
maxOutputTokens,
|
|
4902
|
+
retry
|
|
4868
4903
|
};
|
|
4869
4904
|
}
|
|
4870
4905
|
function resolveAnthropicConfig(target, env) {
|
|
4871
|
-
const
|
|
4872
|
-
const
|
|
4873
|
-
const
|
|
4874
|
-
const
|
|
4875
|
-
const
|
|
4876
|
-
const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
|
|
4906
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
4907
|
+
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
4908
|
+
const temperatureSource = target.temperature;
|
|
4909
|
+
const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
|
|
4910
|
+
const thinkingBudgetSource = target.thinking_budget ?? target.thinkingBudget;
|
|
4877
4911
|
const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
|
|
4878
4912
|
const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
|
|
4913
|
+
const retry = resolveRetryConfig(target);
|
|
4879
4914
|
return {
|
|
4880
4915
|
apiKey,
|
|
4881
4916
|
model,
|
|
4882
4917
|
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
4883
4918
|
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
4884
|
-
thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
|
|
4919
|
+
thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`),
|
|
4920
|
+
retry
|
|
4885
4921
|
};
|
|
4886
4922
|
}
|
|
4887
4923
|
function resolveGeminiConfig(target, env) {
|
|
4888
|
-
const
|
|
4889
|
-
const
|
|
4890
|
-
const
|
|
4891
|
-
const
|
|
4892
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
4924
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
4925
|
+
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
4926
|
+
const temperatureSource = target.temperature;
|
|
4927
|
+
const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
|
|
4893
4928
|
const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
|
|
4894
4929
|
const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
|
|
4895
4930
|
allowLiteral: true,
|
|
4896
4931
|
optionalEnv: true
|
|
4897
4932
|
}) ?? "gemini-2.5-flash";
|
|
4933
|
+
const retry = resolveRetryConfig(target);
|
|
4898
4934
|
return {
|
|
4899
4935
|
apiKey,
|
|
4900
4936
|
model,
|
|
4901
4937
|
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
4902
|
-
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
|
|
4938
|
+
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
4939
|
+
retry
|
|
4903
4940
|
};
|
|
4904
4941
|
}
|
|
4905
4942
|
function resolveCodexConfig(target, env) {
|
|
4906
|
-
const
|
|
4907
|
-
const
|
|
4908
|
-
const
|
|
4909
|
-
const
|
|
4910
|
-
const
|
|
4911
|
-
const
|
|
4912
|
-
const logFormatSource = settings.log_format ?? settings.logFormat ?? settings.log_output_format ?? settings.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
4943
|
+
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
4944
|
+
const argsSource = target.args ?? target.arguments;
|
|
4945
|
+
const cwdSource = target.cwd;
|
|
4946
|
+
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
4947
|
+
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
4948
|
+
const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
4913
4949
|
const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
|
|
4914
4950
|
allowLiteral: true,
|
|
4915
4951
|
optionalEnv: true
|
|
@@ -4948,21 +4984,19 @@ function normalizeCodexLogFormat(value) {
|
|
|
4948
4984
|
throw new Error("codex log format must be 'summary' or 'json'");
|
|
4949
4985
|
}
|
|
4950
4986
|
function resolveMockConfig(target) {
|
|
4951
|
-
const
|
|
4952
|
-
const response = typeof settings.response === "string" ? settings.response : void 0;
|
|
4987
|
+
const response = typeof target.response === "string" ? target.response : void 0;
|
|
4953
4988
|
return { response };
|
|
4954
4989
|
}
|
|
4955
4990
|
function resolveVSCodeConfig(target, env, insiders) {
|
|
4956
|
-
const
|
|
4957
|
-
const workspaceTemplateEnvVar = resolveOptionalLiteralString(settings.workspace_template ?? settings.workspaceTemplate);
|
|
4991
|
+
const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template ?? target.workspaceTemplate);
|
|
4958
4992
|
const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
|
|
4959
4993
|
allowLiteral: false,
|
|
4960
4994
|
optionalEnv: true
|
|
4961
4995
|
}) : void 0;
|
|
4962
|
-
const commandSource =
|
|
4963
|
-
const waitSource =
|
|
4964
|
-
const dryRunSource =
|
|
4965
|
-
const subagentRootSource =
|
|
4996
|
+
const commandSource = target.vscode_cmd ?? target.command;
|
|
4997
|
+
const waitSource = target.wait;
|
|
4998
|
+
const dryRunSource = target.dry_run ?? target.dryRun;
|
|
4999
|
+
const subagentRootSource = target.subagent_root ?? target.subagentRoot;
|
|
4966
5000
|
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
4967
5001
|
const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
|
|
4968
5002
|
return {
|
|
@@ -4977,18 +5011,16 @@ function resolveVSCodeConfig(target, env, insiders) {
|
|
|
4977
5011
|
};
|
|
4978
5012
|
}
|
|
4979
5013
|
function resolveCliConfig(target, env) {
|
|
4980
|
-
const
|
|
4981
|
-
const commandTemplateSource = settings.command_template ?? settings.commandTemplate;
|
|
5014
|
+
const commandTemplateSource = target.command_template ?? target.commandTemplate;
|
|
4982
5015
|
const filesFormat = resolveOptionalLiteralString(
|
|
4983
|
-
|
|
5016
|
+
target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
|
|
4984
5017
|
);
|
|
4985
|
-
const cwd = resolveOptionalString(
|
|
5018
|
+
const cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
|
|
4986
5019
|
allowLiteral: true,
|
|
4987
5020
|
optionalEnv: true
|
|
4988
5021
|
});
|
|
4989
|
-
const
|
|
4990
|
-
const
|
|
4991
|
-
const healthcheck = resolveCliHealthcheck(settings.healthcheck, env, target.name);
|
|
5022
|
+
const timeoutMs = resolveTimeoutMs(target.timeout_seconds ?? target.timeoutSeconds, `${target.name} timeout`);
|
|
5023
|
+
const healthcheck = resolveCliHealthcheck(target.healthcheck, env, target.name);
|
|
4992
5024
|
const commandTemplate = resolveString(
|
|
4993
5025
|
commandTemplateSource,
|
|
4994
5026
|
env,
|
|
@@ -5000,29 +5032,10 @@ function resolveCliConfig(target, env) {
|
|
|
5000
5032
|
commandTemplate,
|
|
5001
5033
|
filesFormat,
|
|
5002
5034
|
cwd,
|
|
5003
|
-
env: envOverrides,
|
|
5004
5035
|
timeoutMs,
|
|
5005
5036
|
healthcheck
|
|
5006
5037
|
};
|
|
5007
5038
|
}
|
|
5008
|
-
function resolveEnvOverrides(source2, env, targetName) {
|
|
5009
|
-
if (source2 === void 0 || source2 === null) {
|
|
5010
|
-
return void 0;
|
|
5011
|
-
}
|
|
5012
|
-
if (typeof source2 !== "object" || Array.isArray(source2)) {
|
|
5013
|
-
throw new Error(`${targetName} env overrides must be an object map of strings`);
|
|
5014
|
-
}
|
|
5015
|
-
const entries = Object.entries(source2);
|
|
5016
|
-
const resolved = {};
|
|
5017
|
-
for (const [key2, value] of entries) {
|
|
5018
|
-
if (typeof value !== "string") {
|
|
5019
|
-
throw new Error(`${targetName} env override '${key2}' must be a string`);
|
|
5020
|
-
}
|
|
5021
|
-
const resolvedValue = resolveString(value, env, `${targetName} env override '${key2}'`);
|
|
5022
|
-
resolved[key2] = resolvedValue;
|
|
5023
|
-
}
|
|
5024
|
-
return Object.keys(resolved).length > 0 ? resolved : void 0;
|
|
5025
|
-
}
|
|
5026
5039
|
function resolveTimeoutMs(source2, description) {
|
|
5027
5040
|
const seconds = resolveOptionalNumber(source2, `${description} (seconds)`);
|
|
5028
5041
|
if (seconds === void 0) {
|
|
@@ -5218,6 +5231,26 @@ function resolveOptionalStringArray(source2, env, description) {
|
|
|
5218
5231
|
}
|
|
5219
5232
|
return resolved.length > 0 ? resolved : void 0;
|
|
5220
5233
|
}
|
|
5234
|
+
function resolveOptionalNumberArray(source2, description) {
|
|
5235
|
+
if (source2 === void 0 || source2 === null) {
|
|
5236
|
+
return void 0;
|
|
5237
|
+
}
|
|
5238
|
+
if (!Array.isArray(source2)) {
|
|
5239
|
+
throw new Error(`${description} must be an array of numbers`);
|
|
5240
|
+
}
|
|
5241
|
+
if (source2.length === 0) {
|
|
5242
|
+
return void 0;
|
|
5243
|
+
}
|
|
5244
|
+
const resolved = [];
|
|
5245
|
+
for (let i6 = 0; i6 < source2.length; i6++) {
|
|
5246
|
+
const item = source2[i6];
|
|
5247
|
+
if (typeof item !== "number" || !Number.isFinite(item)) {
|
|
5248
|
+
throw new Error(`${description}[${i6}] must be a number`);
|
|
5249
|
+
}
|
|
5250
|
+
resolved.push(item);
|
|
5251
|
+
}
|
|
5252
|
+
return resolved.length > 0 ? resolved : void 0;
|
|
5253
|
+
}
|
|
5221
5254
|
var AGENT_PROVIDER_KINDS = [
|
|
5222
5255
|
"codex",
|
|
5223
5256
|
"vscode",
|
|
@@ -5249,7 +5282,7 @@ var PROVIDER_ALIASES = [
|
|
|
5249
5282
|
"vertex"
|
|
5250
5283
|
// legacy/future support
|
|
5251
5284
|
];
|
|
5252
|
-
var TARGETS_SCHEMA_V2 = "agentv-targets-v2.
|
|
5285
|
+
var TARGETS_SCHEMA_V2 = "agentv-targets-v2.2";
|
|
5253
5286
|
function isAgentProvider(provider) {
|
|
5254
5287
|
return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
|
|
5255
5288
|
}
|
|
@@ -12305,6 +12338,67 @@ function ensureChatResponse(result) {
|
|
|
12305
12338
|
}
|
|
12306
12339
|
return result;
|
|
12307
12340
|
}
|
|
12341
|
+
function isRetryableError(error, retryableStatusCodes) {
|
|
12342
|
+
if (!error || typeof error !== "object") {
|
|
12343
|
+
return false;
|
|
12344
|
+
}
|
|
12345
|
+
if ("status" in error && typeof error.status === "number") {
|
|
12346
|
+
return retryableStatusCodes.includes(error.status);
|
|
12347
|
+
}
|
|
12348
|
+
if ("message" in error && typeof error.message === "string") {
|
|
12349
|
+
const match = error.message.match(/HTTP (\d{3})/);
|
|
12350
|
+
if (match) {
|
|
12351
|
+
const status = Number.parseInt(match[1], 10);
|
|
12352
|
+
return retryableStatusCodes.includes(status);
|
|
12353
|
+
}
|
|
12354
|
+
}
|
|
12355
|
+
if ("name" in error && error.name === "AxAIServiceNetworkError") {
|
|
12356
|
+
return true;
|
|
12357
|
+
}
|
|
12358
|
+
return false;
|
|
12359
|
+
}
|
|
12360
|
+
function calculateRetryDelay(attempt, config) {
|
|
12361
|
+
const delay = Math.min(
|
|
12362
|
+
config.maxDelayMs,
|
|
12363
|
+
config.initialDelayMs * config.backoffFactor ** attempt
|
|
12364
|
+
);
|
|
12365
|
+
return delay * (0.75 + Math.random() * 0.5);
|
|
12366
|
+
}
|
|
12367
|
+
async function sleep2(ms) {
|
|
12368
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
12369
|
+
}
|
|
12370
|
+
async function withRetry(fn, retryConfig, signal) {
|
|
12371
|
+
const config = {
|
|
12372
|
+
maxRetries: retryConfig?.maxRetries ?? 3,
|
|
12373
|
+
initialDelayMs: retryConfig?.initialDelayMs ?? 1e3,
|
|
12374
|
+
maxDelayMs: retryConfig?.maxDelayMs ?? 6e4,
|
|
12375
|
+
backoffFactor: retryConfig?.backoffFactor ?? 2,
|
|
12376
|
+
retryableStatusCodes: retryConfig?.retryableStatusCodes ?? [500, 408, 429, 502, 503, 504]
|
|
12377
|
+
};
|
|
12378
|
+
let lastError;
|
|
12379
|
+
for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
|
|
12380
|
+
if (signal?.aborted) {
|
|
12381
|
+
throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
|
|
12382
|
+
}
|
|
12383
|
+
try {
|
|
12384
|
+
return await fn();
|
|
12385
|
+
} catch (error) {
|
|
12386
|
+
lastError = error;
|
|
12387
|
+
if (attempt >= config.maxRetries) {
|
|
12388
|
+
break;
|
|
12389
|
+
}
|
|
12390
|
+
if (!isRetryableError(error, config.retryableStatusCodes)) {
|
|
12391
|
+
throw error;
|
|
12392
|
+
}
|
|
12393
|
+
const delay = calculateRetryDelay(attempt, config);
|
|
12394
|
+
await sleep2(delay);
|
|
12395
|
+
if (signal?.aborted) {
|
|
12396
|
+
throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
|
|
12397
|
+
}
|
|
12398
|
+
}
|
|
12399
|
+
}
|
|
12400
|
+
throw lastError;
|
|
12401
|
+
}
|
|
12308
12402
|
var AzureProvider = class {
|
|
12309
12403
|
constructor(targetName, config) {
|
|
12310
12404
|
this.config = config;
|
|
@@ -12314,6 +12408,7 @@ var AzureProvider = class {
|
|
|
12314
12408
|
temperature: config.temperature,
|
|
12315
12409
|
maxOutputTokens: config.maxOutputTokens
|
|
12316
12410
|
};
|
|
12411
|
+
this.retryConfig = config.retry;
|
|
12317
12412
|
this.ai = Wn.create({
|
|
12318
12413
|
name: "azure-openai",
|
|
12319
12414
|
apiKey: config.apiKey,
|
|
@@ -12330,16 +12425,21 @@ var AzureProvider = class {
|
|
|
12330
12425
|
targetName;
|
|
12331
12426
|
ai;
|
|
12332
12427
|
defaults;
|
|
12428
|
+
retryConfig;
|
|
12333
12429
|
async invoke(request) {
|
|
12334
12430
|
const chatPrompt = buildChatPrompt(request);
|
|
12335
12431
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
12336
|
-
const response = await
|
|
12337
|
-
|
|
12338
|
-
|
|
12339
|
-
|
|
12340
|
-
|
|
12341
|
-
|
|
12342
|
-
|
|
12432
|
+
const response = await withRetry(
|
|
12433
|
+
async () => await this.ai.chat(
|
|
12434
|
+
{
|
|
12435
|
+
chatPrompt,
|
|
12436
|
+
model: this.config.deploymentName,
|
|
12437
|
+
...modelConfig ? { modelConfig } : {}
|
|
12438
|
+
},
|
|
12439
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
12440
|
+
),
|
|
12441
|
+
this.retryConfig,
|
|
12442
|
+
request.signal
|
|
12343
12443
|
);
|
|
12344
12444
|
return mapResponse(ensureChatResponse(response));
|
|
12345
12445
|
}
|
|
@@ -12357,6 +12457,7 @@ var AnthropicProvider = class {
|
|
|
12357
12457
|
maxOutputTokens: config.maxOutputTokens,
|
|
12358
12458
|
thinkingBudget: config.thinkingBudget
|
|
12359
12459
|
};
|
|
12460
|
+
this.retryConfig = config.retry;
|
|
12360
12461
|
this.ai = Wn.create({
|
|
12361
12462
|
name: "anthropic",
|
|
12362
12463
|
apiKey: config.apiKey
|
|
@@ -12367,16 +12468,21 @@ var AnthropicProvider = class {
|
|
|
12367
12468
|
targetName;
|
|
12368
12469
|
ai;
|
|
12369
12470
|
defaults;
|
|
12471
|
+
retryConfig;
|
|
12370
12472
|
async invoke(request) {
|
|
12371
12473
|
const chatPrompt = buildChatPrompt(request);
|
|
12372
12474
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
12373
|
-
const response = await
|
|
12374
|
-
|
|
12375
|
-
|
|
12376
|
-
|
|
12377
|
-
|
|
12378
|
-
|
|
12379
|
-
|
|
12475
|
+
const response = await withRetry(
|
|
12476
|
+
async () => await this.ai.chat(
|
|
12477
|
+
{
|
|
12478
|
+
chatPrompt,
|
|
12479
|
+
model: this.config.model,
|
|
12480
|
+
...modelConfig ? { modelConfig } : {}
|
|
12481
|
+
},
|
|
12482
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
12483
|
+
),
|
|
12484
|
+
this.retryConfig,
|
|
12485
|
+
request.signal
|
|
12380
12486
|
);
|
|
12381
12487
|
return mapResponse(ensureChatResponse(response));
|
|
12382
12488
|
}
|
|
@@ -12393,6 +12499,7 @@ var GeminiProvider = class {
|
|
|
12393
12499
|
temperature: config.temperature,
|
|
12394
12500
|
maxOutputTokens: config.maxOutputTokens
|
|
12395
12501
|
};
|
|
12502
|
+
this.retryConfig = config.retry;
|
|
12396
12503
|
this.ai = Wn.create({
|
|
12397
12504
|
name: "google-gemini",
|
|
12398
12505
|
apiKey: config.apiKey
|
|
@@ -12403,16 +12510,21 @@ var GeminiProvider = class {
|
|
|
12403
12510
|
targetName;
|
|
12404
12511
|
ai;
|
|
12405
12512
|
defaults;
|
|
12513
|
+
retryConfig;
|
|
12406
12514
|
async invoke(request) {
|
|
12407
12515
|
const chatPrompt = buildChatPrompt(request);
|
|
12408
12516
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
12409
|
-
const response = await
|
|
12410
|
-
|
|
12411
|
-
|
|
12412
|
-
|
|
12413
|
-
|
|
12414
|
-
|
|
12415
|
-
|
|
12517
|
+
const response = await withRetry(
|
|
12518
|
+
async () => await this.ai.chat(
|
|
12519
|
+
{
|
|
12520
|
+
chatPrompt,
|
|
12521
|
+
model: this.config.model,
|
|
12522
|
+
...modelConfig ? { modelConfig } : {}
|
|
12523
|
+
},
|
|
12524
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
12525
|
+
),
|
|
12526
|
+
this.retryConfig,
|
|
12527
|
+
request.signal
|
|
12416
12528
|
);
|
|
12417
12529
|
return mapResponse(ensureChatResponse(response));
|
|
12418
12530
|
}
|
|
@@ -12433,7 +12545,6 @@ async function defaultCommandRunner(command, options) {
|
|
|
12433
12545
|
};
|
|
12434
12546
|
try {
|
|
12435
12547
|
const { stdout, stderr } = await execAsync2(command, execOptions);
|
|
12436
|
-
console.error(`[CLI DEBUG] SUCCESS - stdout: ${stdout.length} bytes, stderr: ${stderr.length} bytes`);
|
|
12437
12548
|
return {
|
|
12438
12549
|
stdout,
|
|
12439
12550
|
stderr,
|
|
@@ -12444,8 +12555,6 @@ async function defaultCommandRunner(command, options) {
|
|
|
12444
12555
|
};
|
|
12445
12556
|
} catch (error) {
|
|
12446
12557
|
const execError = error;
|
|
12447
|
-
console.error(`[CLI DEBUG] ERROR - code: ${execError.code}, message: ${execError.message}`);
|
|
12448
|
-
console.error(`[CLI DEBUG] stdout: ${execError.stdout?.length ?? 0} bytes, stderr: ${execError.stderr?.length ?? 0} bytes`);
|
|
12449
12558
|
return {
|
|
12450
12559
|
stdout: execError.stdout ?? "",
|
|
12451
12560
|
stderr: execError.stderr ?? "",
|
|
@@ -12478,10 +12587,9 @@ var CliProvider = class {
|
|
|
12478
12587
|
const outputFilePath = generateOutputFilePath(request.evalCaseId);
|
|
12479
12588
|
const templateValues = buildTemplateValues(request, this.config, outputFilePath);
|
|
12480
12589
|
const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
|
|
12481
|
-
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
12482
12590
|
const result = await this.runCommand(renderedCommand, {
|
|
12483
12591
|
cwd: this.config.cwd,
|
|
12484
|
-
env,
|
|
12592
|
+
env: process.env,
|
|
12485
12593
|
timeoutMs: this.config.timeoutMs,
|
|
12486
12594
|
signal: request.signal
|
|
12487
12595
|
});
|
|
@@ -12513,7 +12621,7 @@ var CliProvider = class {
|
|
|
12513
12621
|
}
|
|
12514
12622
|
async readAndCleanupOutputFile(filePath) {
|
|
12515
12623
|
try {
|
|
12516
|
-
const content = await
|
|
12624
|
+
const content = await readTextFile(filePath);
|
|
12517
12625
|
return content;
|
|
12518
12626
|
} catch (error) {
|
|
12519
12627
|
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
@@ -12570,10 +12678,9 @@ var CliProvider = class {
|
|
|
12570
12678
|
generateOutputFilePath("healthcheck")
|
|
12571
12679
|
)
|
|
12572
12680
|
);
|
|
12573
|
-
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
12574
12681
|
const result = await this.runCommand(renderedCommand, {
|
|
12575
12682
|
cwd: healthcheck.cwd ?? this.config.cwd,
|
|
12576
|
-
env,
|
|
12683
|
+
env: process.env,
|
|
12577
12684
|
timeoutMs,
|
|
12578
12685
|
signal
|
|
12579
12686
|
});
|
|
@@ -13771,20 +13878,13 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
13771
13878
|
}
|
|
13772
13879
|
const name = value.name;
|
|
13773
13880
|
const provider = value.provider;
|
|
13774
|
-
const settings = value.settings;
|
|
13775
|
-
const judgeTarget = value.judge_target;
|
|
13776
13881
|
if (typeof name !== "string" || name.trim().length === 0) {
|
|
13777
13882
|
throw new Error(`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`);
|
|
13778
13883
|
}
|
|
13779
13884
|
if (typeof provider !== "string" || provider.trim().length === 0) {
|
|
13780
13885
|
throw new Error(`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider'`);
|
|
13781
13886
|
}
|
|
13782
|
-
return
|
|
13783
|
-
name,
|
|
13784
|
-
provider,
|
|
13785
|
-
settings: isRecord(settings) ? settings : void 0,
|
|
13786
|
-
judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
|
|
13787
|
-
};
|
|
13887
|
+
return value;
|
|
13788
13888
|
}
|
|
13789
13889
|
async function fileExists3(filePath) {
|
|
13790
13890
|
try {
|
|
@@ -14405,10 +14505,11 @@ async function runEvaluation(options) {
|
|
|
14405
14505
|
await onProgress({
|
|
14406
14506
|
workerId,
|
|
14407
14507
|
evalId: evalCase.id,
|
|
14408
|
-
status: "completed",
|
|
14508
|
+
status: result.error ? "failed" : "completed",
|
|
14409
14509
|
startedAt: 0,
|
|
14410
14510
|
// Not used for completed status
|
|
14411
|
-
completedAt: Date.now()
|
|
14511
|
+
completedAt: Date.now(),
|
|
14512
|
+
error: result.error
|
|
14412
14513
|
});
|
|
14413
14514
|
}
|
|
14414
14515
|
if (onResult) {
|
|
@@ -14946,7 +15047,8 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
14946
15047
|
target: targetName,
|
|
14947
15048
|
timestamp: timestamp.toISOString(),
|
|
14948
15049
|
raw_aspects: [],
|
|
14949
|
-
raw_request: rawRequest
|
|
15050
|
+
raw_request: rawRequest,
|
|
15051
|
+
error: message
|
|
14950
15052
|
};
|
|
14951
15053
|
}
|
|
14952
15054
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
@@ -15331,9 +15433,10 @@ var YamlWriter = class _YamlWriter {
|
|
|
15331
15433
|
// Let YAML library choose appropriate string style based on content
|
|
15332
15434
|
// (will use block literal for multiline strings with actual newlines)
|
|
15333
15435
|
});
|
|
15436
|
+
const normalizedYaml = normalizeLineEndings(yamlDoc);
|
|
15334
15437
|
const separator = this.isFirst ? "---\n" : "\n---\n";
|
|
15335
15438
|
this.isFirst = false;
|
|
15336
|
-
const content = `${separator}${
|
|
15439
|
+
const content = `${separator}${normalizedYaml}`;
|
|
15337
15440
|
if (!this.stream.write(content)) {
|
|
15338
15441
|
await new Promise((resolve, reject) => {
|
|
15339
15442
|
this.stream.once("drain", resolve);
|
|
@@ -15379,27 +15482,42 @@ function getDefaultExtension(format) {
|
|
|
15379
15482
|
}
|
|
15380
15483
|
|
|
15381
15484
|
// src/commands/eval/progress-display.ts
|
|
15382
|
-
import
|
|
15485
|
+
import { stripVTControlCharacters } from "node:util";
|
|
15486
|
+
var ESC = "\x1B[";
|
|
15487
|
+
var CLEAR_LINE = `${ESC}K`;
|
|
15488
|
+
var MOVE_CURSOR_UP = `${ESC}1A`;
|
|
15489
|
+
var SYNC_START = `${ESC}?2026h`;
|
|
15490
|
+
var SYNC_END = `${ESC}?2026l`;
|
|
15383
15491
|
var ProgressDisplay = class {
|
|
15384
15492
|
workers = /* @__PURE__ */ new Map();
|
|
15385
15493
|
maxWorkers;
|
|
15386
15494
|
totalTests = 0;
|
|
15387
15495
|
completedTests = 0;
|
|
15388
15496
|
renderTimer;
|
|
15497
|
+
renderScheduled = false;
|
|
15389
15498
|
isInteractive;
|
|
15390
15499
|
logPaths = [];
|
|
15391
15500
|
logPathSet = /* @__PURE__ */ new Set();
|
|
15392
15501
|
hasPrintedLogHeader = false;
|
|
15502
|
+
windowHeight = 0;
|
|
15503
|
+
started = false;
|
|
15504
|
+
finished = false;
|
|
15393
15505
|
constructor(maxWorkers) {
|
|
15394
15506
|
this.maxWorkers = maxWorkers;
|
|
15395
|
-
this.isInteractive = process.
|
|
15507
|
+
this.isInteractive = process.stdout.isTTY && !process.env.CI;
|
|
15396
15508
|
}
|
|
15397
15509
|
isInteractiveMode() {
|
|
15398
15510
|
return this.isInteractive;
|
|
15399
15511
|
}
|
|
15400
15512
|
start() {
|
|
15513
|
+
this.started = true;
|
|
15514
|
+
this.finished = false;
|
|
15401
15515
|
if (this.isInteractive) {
|
|
15402
|
-
|
|
15516
|
+
this.write("\n");
|
|
15517
|
+
this.renderTimer = setInterval(() => {
|
|
15518
|
+
this.scheduleRender();
|
|
15519
|
+
}, 1e3);
|
|
15520
|
+
this.renderTimer.unref?.();
|
|
15403
15521
|
}
|
|
15404
15522
|
}
|
|
15405
15523
|
setTotalTests(count) {
|
|
@@ -15449,22 +15567,42 @@ var ProgressDisplay = class {
|
|
|
15449
15567
|
});
|
|
15450
15568
|
}
|
|
15451
15569
|
scheduleRender() {
|
|
15452
|
-
if (this.
|
|
15570
|
+
if (this.renderScheduled || this.finished) {
|
|
15453
15571
|
return;
|
|
15454
15572
|
}
|
|
15455
|
-
this.
|
|
15456
|
-
|
|
15573
|
+
this.renderScheduled = true;
|
|
15574
|
+
setTimeout(() => {
|
|
15575
|
+
this.renderScheduled = false;
|
|
15457
15576
|
this.render();
|
|
15458
15577
|
}, 100);
|
|
15459
15578
|
}
|
|
15579
|
+
write(content) {
|
|
15580
|
+
process.stdout.write(content);
|
|
15581
|
+
}
|
|
15582
|
+
clearWindow() {
|
|
15583
|
+
if (this.windowHeight === 0) {
|
|
15584
|
+
return;
|
|
15585
|
+
}
|
|
15586
|
+
this.write(`\r${CLEAR_LINE}`);
|
|
15587
|
+
for (let i6 = 1; i6 < this.windowHeight; i6++) {
|
|
15588
|
+
this.write(`${MOVE_CURSOR_UP}\r${CLEAR_LINE}`);
|
|
15589
|
+
}
|
|
15590
|
+
this.windowHeight = 0;
|
|
15591
|
+
}
|
|
15592
|
+
getRenderedRowCount(rows) {
|
|
15593
|
+
const columns = process.stdout.columns || 80;
|
|
15594
|
+
let count = 0;
|
|
15595
|
+
for (const row of rows) {
|
|
15596
|
+
const text = stripVTControlCharacters(row);
|
|
15597
|
+
count += Math.max(1, Math.ceil(text.length / columns));
|
|
15598
|
+
}
|
|
15599
|
+
return count;
|
|
15600
|
+
}
|
|
15460
15601
|
render() {
|
|
15461
|
-
if (!this.isInteractive) {
|
|
15602
|
+
if (!this.isInteractive || !this.started || this.finished) {
|
|
15462
15603
|
return;
|
|
15463
15604
|
}
|
|
15464
15605
|
const lines = [];
|
|
15465
|
-
const progressBar = this.buildProgressBar(this.completedTests, this.totalTests);
|
|
15466
|
-
lines.push(`${progressBar} ${this.completedTests}/${this.totalTests} evals`);
|
|
15467
|
-
lines.push("");
|
|
15468
15606
|
const sortedWorkers = Array.from(this.workers.values()).sort((a, b) => a.workerId - b.workerId);
|
|
15469
15607
|
for (const worker of sortedWorkers) {
|
|
15470
15608
|
const line2 = this.formatWorkerLine(worker);
|
|
@@ -15477,22 +15615,26 @@ var ProgressDisplay = class {
|
|
|
15477
15615
|
lines.push(`${index + 1}. ${path19}`);
|
|
15478
15616
|
});
|
|
15479
15617
|
}
|
|
15480
|
-
|
|
15618
|
+
const rowCount = this.getRenderedRowCount(lines);
|
|
15619
|
+
this.clearWindow();
|
|
15620
|
+
if (lines.length > 0) {
|
|
15621
|
+
this.write(lines.join("\n"));
|
|
15622
|
+
}
|
|
15623
|
+
this.windowHeight = rowCount;
|
|
15481
15624
|
}
|
|
15482
15625
|
formatWorkerLine(worker) {
|
|
15483
15626
|
const workerLabel = `${worker.workerId}.`.padEnd(4);
|
|
15484
15627
|
const statusIcon = this.getStatusIcon(worker.status);
|
|
15485
|
-
const elapsed = worker.startedAt ? this.formatElapsed(Date.now() - worker.startedAt) : "";
|
|
15486
|
-
const timeLabel = elapsed ? ` (${elapsed})` : "";
|
|
15487
15628
|
const targetLabel = worker.targetLabel ? ` | ${worker.targetLabel}` : "";
|
|
15488
|
-
const
|
|
15489
|
-
const
|
|
15629
|
+
const columns = process.stdout.columns || 80;
|
|
15630
|
+
const maxLineLength = Math.max(40, columns - 4);
|
|
15631
|
+
const reservedLength = workerLabel.length + statusIcon.length + targetLabel.length + 4;
|
|
15490
15632
|
const availableLabelLength = Math.max(15, maxLineLength - reservedLength);
|
|
15491
15633
|
let testLabel = worker.evalId;
|
|
15492
15634
|
if (testLabel.length > availableLabelLength) {
|
|
15493
15635
|
testLabel = `${testLabel.substring(0, Math.max(0, availableLabelLength - 3))}...`;
|
|
15494
15636
|
}
|
|
15495
|
-
return `${workerLabel} ${statusIcon} ${testLabel}${
|
|
15637
|
+
return `${workerLabel} ${statusIcon} ${testLabel}${targetLabel}`;
|
|
15496
15638
|
}
|
|
15497
15639
|
getStatusIcon(status) {
|
|
15498
15640
|
switch (status) {
|
|
@@ -15508,39 +15650,26 @@ var ProgressDisplay = class {
|
|
|
15508
15650
|
return " ";
|
|
15509
15651
|
}
|
|
15510
15652
|
}
|
|
15511
|
-
formatElapsed(ms) {
|
|
15512
|
-
const seconds = Math.floor(ms / 1e3);
|
|
15513
|
-
if (seconds < 60) {
|
|
15514
|
-
return `${seconds}s`;
|
|
15515
|
-
}
|
|
15516
|
-
const minutes = Math.floor(seconds / 60);
|
|
15517
|
-
const remainingSeconds = seconds % 60;
|
|
15518
|
-
return `${minutes}m ${remainingSeconds}s`;
|
|
15519
|
-
}
|
|
15520
|
-
buildProgressBar(current, total) {
|
|
15521
|
-
if (total === 0) {
|
|
15522
|
-
return "[ ]";
|
|
15523
|
-
}
|
|
15524
|
-
const width = 20;
|
|
15525
|
-
const filled = Math.floor(current / total * width);
|
|
15526
|
-
const empty = width - filled;
|
|
15527
|
-
const bar = "\u2588".repeat(filled) + "\u2591".repeat(empty);
|
|
15528
|
-
const percentage = Math.floor(current / total * 100);
|
|
15529
|
-
return `[${bar}] ${percentage}%`;
|
|
15530
|
-
}
|
|
15531
15653
|
finish() {
|
|
15532
15654
|
if (this.renderTimer) {
|
|
15533
|
-
|
|
15655
|
+
clearInterval(this.renderTimer);
|
|
15534
15656
|
this.renderTimer = void 0;
|
|
15535
15657
|
}
|
|
15536
|
-
|
|
15537
|
-
|
|
15538
|
-
|
|
15658
|
+
this.finished = true;
|
|
15659
|
+
if (this.isInteractive && this.started) {
|
|
15660
|
+
this.clearWindow();
|
|
15661
|
+
const sortedWorkers = Array.from(this.workers.values()).sort(
|
|
15662
|
+
(a, b) => a.workerId - b.workerId
|
|
15663
|
+
);
|
|
15664
|
+
for (const worker of sortedWorkers) {
|
|
15665
|
+
this.write(this.formatWorkerLine(worker) + "\n");
|
|
15666
|
+
}
|
|
15667
|
+
this.write("\n");
|
|
15539
15668
|
}
|
|
15540
15669
|
}
|
|
15541
15670
|
clear() {
|
|
15542
15671
|
if (this.isInteractive) {
|
|
15543
|
-
|
|
15672
|
+
this.clearWindow();
|
|
15544
15673
|
}
|
|
15545
15674
|
}
|
|
15546
15675
|
};
|
|
@@ -15597,6 +15726,8 @@ function buildHistogram(values) {
|
|
|
15597
15726
|
function calculateEvaluationSummary(results) {
|
|
15598
15727
|
const scores = results.map((result) => result.score);
|
|
15599
15728
|
const total = results.length;
|
|
15729
|
+
const errors = results.filter((result) => result.error !== void 0).map((result) => ({ evalId: result.eval_id, error: result.error }));
|
|
15730
|
+
const errorCount = errors.length;
|
|
15600
15731
|
if (total === 0) {
|
|
15601
15732
|
return {
|
|
15602
15733
|
total: 0,
|
|
@@ -15607,7 +15738,9 @@ function calculateEvaluationSummary(results) {
|
|
|
15607
15738
|
standardDeviation: void 0,
|
|
15608
15739
|
histogram: buildHistogram([]),
|
|
15609
15740
|
topResults: [],
|
|
15610
|
-
bottomResults: []
|
|
15741
|
+
bottomResults: [],
|
|
15742
|
+
errorCount: 0,
|
|
15743
|
+
errors: []
|
|
15611
15744
|
};
|
|
15612
15745
|
}
|
|
15613
15746
|
const mean = computeMean(scores);
|
|
@@ -15628,7 +15761,9 @@ function calculateEvaluationSummary(results) {
|
|
|
15628
15761
|
standardDeviation,
|
|
15629
15762
|
histogram,
|
|
15630
15763
|
topResults,
|
|
15631
|
-
bottomResults
|
|
15764
|
+
bottomResults,
|
|
15765
|
+
errorCount,
|
|
15766
|
+
errors
|
|
15632
15767
|
};
|
|
15633
15768
|
}
|
|
15634
15769
|
function formatScore(value) {
|
|
@@ -15639,10 +15774,25 @@ function formatEvaluationSummary(summary) {
|
|
|
15639
15774
|
return "\nNo results to summarize";
|
|
15640
15775
|
}
|
|
15641
15776
|
const lines = [];
|
|
15777
|
+
if (summary.errorCount > 0) {
|
|
15778
|
+
lines.push("\n==================================================");
|
|
15779
|
+
lines.push("ERRORS");
|
|
15780
|
+
lines.push("==================================================");
|
|
15781
|
+
summary.errors.forEach((error) => {
|
|
15782
|
+
lines.push(`
|
|
15783
|
+
\u274C ${error.evalId}`);
|
|
15784
|
+
lines.push(` ${error.error}`);
|
|
15785
|
+
});
|
|
15786
|
+
lines.push("");
|
|
15787
|
+
}
|
|
15642
15788
|
lines.push("\n==================================================");
|
|
15643
15789
|
lines.push("EVALUATION SUMMARY");
|
|
15644
15790
|
lines.push("==================================================");
|
|
15645
15791
|
lines.push(`Total eval cases: ${summary.total}`);
|
|
15792
|
+
if (summary.errorCount > 0) {
|
|
15793
|
+
lines.push(`Failed: ${summary.errorCount}`);
|
|
15794
|
+
lines.push(`Passed: ${summary.total - summary.errorCount}`);
|
|
15795
|
+
}
|
|
15646
15796
|
lines.push(`Mean score: ${formatScore(summary.mean)}`);
|
|
15647
15797
|
lines.push(`Median score: ${formatScore(summary.median)}`);
|
|
15648
15798
|
lines.push(`Min score: ${formatScore(summary.min)}`);
|
|
@@ -15681,7 +15831,7 @@ import { readFile as readFile5 } from "node:fs/promises";
|
|
|
15681
15831
|
import path33 from "node:path";
|
|
15682
15832
|
import { parse as parse5 } from "yaml";
|
|
15683
15833
|
var SCHEMA_EVAL_V22 = "agentv-eval-v2";
|
|
15684
|
-
var SCHEMA_TARGETS_V2 = "agentv-targets-v2.
|
|
15834
|
+
var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
|
|
15685
15835
|
var SCHEMA_CONFIG_V22 = "agentv-config-v2";
|
|
15686
15836
|
async function detectFileType(filePath) {
|
|
15687
15837
|
try {
|
|
@@ -15908,8 +16058,21 @@ var COMMON_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
15908
16058
|
"provider_batching",
|
|
15909
16059
|
"providerBatching"
|
|
15910
16060
|
]);
|
|
16061
|
+
var RETRY_SETTINGS = /* @__PURE__ */ new Set([
|
|
16062
|
+
"max_retries",
|
|
16063
|
+
"maxRetries",
|
|
16064
|
+
"retry_initial_delay_ms",
|
|
16065
|
+
"retryInitialDelayMs",
|
|
16066
|
+
"retry_max_delay_ms",
|
|
16067
|
+
"retryMaxDelayMs",
|
|
16068
|
+
"retry_backoff_factor",
|
|
16069
|
+
"retryBackoffFactor",
|
|
16070
|
+
"retry_status_codes",
|
|
16071
|
+
"retryStatusCodes"
|
|
16072
|
+
]);
|
|
15911
16073
|
var AZURE_SETTINGS = /* @__PURE__ */ new Set([
|
|
15912
16074
|
...COMMON_SETTINGS,
|
|
16075
|
+
...RETRY_SETTINGS,
|
|
15913
16076
|
"endpoint",
|
|
15914
16077
|
"resource",
|
|
15915
16078
|
"resourceName",
|
|
@@ -15926,6 +16089,7 @@ var AZURE_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
15926
16089
|
]);
|
|
15927
16090
|
var ANTHROPIC_SETTINGS = /* @__PURE__ */ new Set([
|
|
15928
16091
|
...COMMON_SETTINGS,
|
|
16092
|
+
...RETRY_SETTINGS,
|
|
15929
16093
|
"api_key",
|
|
15930
16094
|
"apiKey",
|
|
15931
16095
|
"model",
|
|
@@ -15939,6 +16103,7 @@ var ANTHROPIC_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
15939
16103
|
]);
|
|
15940
16104
|
var GEMINI_SETTINGS = /* @__PURE__ */ new Set([
|
|
15941
16105
|
...COMMON_SETTINGS,
|
|
16106
|
+
...RETRY_SETTINGS,
|
|
15942
16107
|
"api_key",
|
|
15943
16108
|
"apiKey",
|
|
15944
16109
|
"model",
|
|
@@ -16026,13 +16191,14 @@ function getKnownSettings(provider) {
|
|
|
16026
16191
|
return null;
|
|
16027
16192
|
}
|
|
16028
16193
|
}
|
|
16029
|
-
function validateUnknownSettings(
|
|
16194
|
+
function validateUnknownSettings(target, provider, absolutePath, location, errors) {
|
|
16030
16195
|
const knownSettings = getKnownSettings(provider);
|
|
16031
16196
|
if (!knownSettings) {
|
|
16032
16197
|
return;
|
|
16033
16198
|
}
|
|
16034
|
-
|
|
16035
|
-
|
|
16199
|
+
const baseFields = /* @__PURE__ */ new Set(["name", "provider", "judge_target", "workers", "$schema", "targets"]);
|
|
16200
|
+
for (const key2 of Object.keys(target)) {
|
|
16201
|
+
if (!baseFields.has(key2) && !knownSettings.has(key2)) {
|
|
16036
16202
|
errors.push({
|
|
16037
16203
|
severity: "warning",
|
|
16038
16204
|
filePath: absolutePath,
|
|
@@ -16062,17 +16228,8 @@ async function validateTargetsFile(filePath) {
|
|
|
16062
16228
|
errors
|
|
16063
16229
|
};
|
|
16064
16230
|
}
|
|
16065
|
-
function validateCliSettings(
|
|
16066
|
-
|
|
16067
|
-
errors2.push({
|
|
16068
|
-
severity: "error",
|
|
16069
|
-
filePath: absolutePath2,
|
|
16070
|
-
location,
|
|
16071
|
-
message: "CLI provider requires a 'settings' object"
|
|
16072
|
-
});
|
|
16073
|
-
return;
|
|
16074
|
-
}
|
|
16075
|
-
const commandTemplate = settings["command_template"] ?? settings["commandTemplate"];
|
|
16231
|
+
function validateCliSettings(target, absolutePath2, location, errors2) {
|
|
16232
|
+
const commandTemplate = target["command_template"] ?? target["commandTemplate"];
|
|
16076
16233
|
if (typeof commandTemplate !== "string" || commandTemplate.trim().length === 0) {
|
|
16077
16234
|
errors2.push({
|
|
16078
16235
|
severity: "error",
|
|
@@ -16083,7 +16240,7 @@ async function validateTargetsFile(filePath) {
|
|
|
16083
16240
|
} else {
|
|
16084
16241
|
recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
|
|
16085
16242
|
}
|
|
16086
|
-
const attachmentsFormat =
|
|
16243
|
+
const attachmentsFormat = target["attachments_format"] ?? target["attachmentsFormat"];
|
|
16087
16244
|
if (attachmentsFormat !== void 0 && typeof attachmentsFormat !== "string") {
|
|
16088
16245
|
errors2.push({
|
|
16089
16246
|
severity: "error",
|
|
@@ -16092,7 +16249,7 @@ async function validateTargetsFile(filePath) {
|
|
|
16092
16249
|
message: "'attachmentsFormat' must be a string when provided"
|
|
16093
16250
|
});
|
|
16094
16251
|
}
|
|
16095
|
-
const filesFormat =
|
|
16252
|
+
const filesFormat = target["files_format"] ?? target["filesFormat"];
|
|
16096
16253
|
if (filesFormat !== void 0 && typeof filesFormat !== "string") {
|
|
16097
16254
|
errors2.push({
|
|
16098
16255
|
severity: "error",
|
|
@@ -16101,7 +16258,7 @@ async function validateTargetsFile(filePath) {
|
|
|
16101
16258
|
message: "'filesFormat' must be a string when provided"
|
|
16102
16259
|
});
|
|
16103
16260
|
}
|
|
16104
|
-
const cwd =
|
|
16261
|
+
const cwd = target["cwd"];
|
|
16105
16262
|
if (cwd !== void 0 && typeof cwd !== "string") {
|
|
16106
16263
|
errors2.push({
|
|
16107
16264
|
severity: "error",
|
|
@@ -16110,7 +16267,7 @@ async function validateTargetsFile(filePath) {
|
|
|
16110
16267
|
message: "'cwd' must be a string when provided"
|
|
16111
16268
|
});
|
|
16112
16269
|
}
|
|
16113
|
-
const timeoutSeconds =
|
|
16270
|
+
const timeoutSeconds = target["timeout_seconds"] ?? target["timeoutSeconds"];
|
|
16114
16271
|
if (timeoutSeconds !== void 0) {
|
|
16115
16272
|
const numericTimeout = Number(timeoutSeconds);
|
|
16116
16273
|
if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
|
|
@@ -16122,29 +16279,7 @@ async function validateTargetsFile(filePath) {
|
|
|
16122
16279
|
});
|
|
16123
16280
|
}
|
|
16124
16281
|
}
|
|
16125
|
-
const
|
|
16126
|
-
if (envOverrides !== void 0) {
|
|
16127
|
-
if (!isObject2(envOverrides)) {
|
|
16128
|
-
errors2.push({
|
|
16129
|
-
severity: "error",
|
|
16130
|
-
filePath: absolutePath2,
|
|
16131
|
-
location: `${location}.env`,
|
|
16132
|
-
message: "'env' must be an object with string values"
|
|
16133
|
-
});
|
|
16134
|
-
} else {
|
|
16135
|
-
for (const [key2, value] of Object.entries(envOverrides)) {
|
|
16136
|
-
if (typeof value !== "string" || value.trim().length === 0) {
|
|
16137
|
-
errors2.push({
|
|
16138
|
-
severity: "error",
|
|
16139
|
-
filePath: absolutePath2,
|
|
16140
|
-
location: `${location}.env.${key2}`,
|
|
16141
|
-
message: `Environment override '${key2}' must be a non-empty string`
|
|
16142
|
-
});
|
|
16143
|
-
}
|
|
16144
|
-
}
|
|
16145
|
-
}
|
|
16146
|
-
}
|
|
16147
|
-
const healthcheck = settings["healthcheck"];
|
|
16282
|
+
const healthcheck = target["healthcheck"];
|
|
16148
16283
|
if (healthcheck !== void 0) {
|
|
16149
16284
|
validateCliHealthcheck(healthcheck, absolutePath2, `${location}.healthcheck`, errors2);
|
|
16150
16285
|
}
|
|
@@ -16315,20 +16450,11 @@ async function validateTargetsFile(filePath) {
|
|
|
16315
16450
|
message: `Unknown provider '${provider}'. Known providers: ${knownProviders.join(", ")}`
|
|
16316
16451
|
});
|
|
16317
16452
|
}
|
|
16318
|
-
const settings = target["settings"];
|
|
16319
|
-
if (providerValue !== "cli" && settings !== void 0 && !isObject2(settings)) {
|
|
16320
|
-
errors.push({
|
|
16321
|
-
severity: "error",
|
|
16322
|
-
filePath: absolutePath,
|
|
16323
|
-
location: `${location}.settings`,
|
|
16324
|
-
message: "Invalid 'settings' field (must be an object)"
|
|
16325
|
-
});
|
|
16326
|
-
}
|
|
16327
16453
|
if (providerValue === "cli") {
|
|
16328
|
-
validateCliSettings(
|
|
16454
|
+
validateCliSettings(target, absolutePath, location, errors);
|
|
16329
16455
|
}
|
|
16330
|
-
if (
|
|
16331
|
-
validateUnknownSettings(
|
|
16456
|
+
if (typeof provider === "string") {
|
|
16457
|
+
validateUnknownSettings(target, provider, absolutePath, location, errors);
|
|
16332
16458
|
}
|
|
16333
16459
|
const judgeTarget = target["judge_target"];
|
|
16334
16460
|
if (judgeTarget !== void 0 && typeof judgeTarget !== "string") {
|
|
@@ -17539,4 +17665,4 @@ export {
|
|
|
17539
17665
|
createProgram,
|
|
17540
17666
|
runCli
|
|
17541
17667
|
};
|
|
17542
|
-
//# sourceMappingURL=chunk-
|
|
17668
|
+
//# sourceMappingURL=chunk-X2VVUCIB.js.map
|