agentv 0.7.5 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -37
- package/dist/{chunk-J3LVKRRT.js → chunk-J5HK75TC.js} +614 -256
- package/dist/chunk-J5HK75TC.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/index.js +1 -1
- package/dist/templates/agentv/targets.yaml +35 -43
- package/package.json +2 -2
- package/dist/chunk-J3LVKRRT.js.map +0 -1
|
@@ -590,7 +590,7 @@ import fg from "fast-glob";
|
|
|
590
590
|
import { stat as stat3 } from "node:fs/promises";
|
|
591
591
|
import path15 from "node:path";
|
|
592
592
|
|
|
593
|
-
// ../../packages/core/dist/chunk-
|
|
593
|
+
// ../../packages/core/dist/chunk-YQBJAT5I.js
|
|
594
594
|
import { constants } from "node:fs";
|
|
595
595
|
import { access, readFile } from "node:fs/promises";
|
|
596
596
|
import path from "node:path";
|
|
@@ -4636,7 +4636,7 @@ var coerce = {
|
|
|
4636
4636
|
};
|
|
4637
4637
|
var NEVER = INVALID;
|
|
4638
4638
|
|
|
4639
|
-
// ../../packages/core/dist/chunk-
|
|
4639
|
+
// ../../packages/core/dist/chunk-YQBJAT5I.js
|
|
4640
4640
|
async function fileExists(filePath) {
|
|
4641
4641
|
try {
|
|
4642
4642
|
await access(filePath, constants.F_OK);
|
|
@@ -4747,10 +4747,9 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID
|
|
|
4747
4747
|
var BASE_TARGET_SCHEMA = external_exports.object({
|
|
4748
4748
|
name: external_exports.string().min(1, "target name is required"),
|
|
4749
4749
|
provider: external_exports.string().min(1, "provider is required"),
|
|
4750
|
-
settings: external_exports.record(external_exports.unknown()).optional(),
|
|
4751
4750
|
judge_target: external_exports.string().optional(),
|
|
4752
4751
|
workers: external_exports.number().int().min(1).optional()
|
|
4753
|
-
});
|
|
4752
|
+
}).passthrough();
|
|
4754
4753
|
var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
|
|
4755
4754
|
function normalizeAzureApiVersion(value) {
|
|
4756
4755
|
if (!value) {
|
|
@@ -4763,11 +4762,43 @@ function normalizeAzureApiVersion(value) {
|
|
|
4763
4762
|
const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
|
|
4764
4763
|
return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
|
|
4765
4764
|
}
|
|
4765
|
+
function resolveRetryConfig(target) {
|
|
4766
|
+
const maxRetries = resolveOptionalNumber(
|
|
4767
|
+
target.max_retries ?? target.maxRetries,
|
|
4768
|
+
`${target.name} max retries`
|
|
4769
|
+
);
|
|
4770
|
+
const initialDelayMs = resolveOptionalNumber(
|
|
4771
|
+
target.retry_initial_delay_ms ?? target.retryInitialDelayMs,
|
|
4772
|
+
`${target.name} retry initial delay`
|
|
4773
|
+
);
|
|
4774
|
+
const maxDelayMs = resolveOptionalNumber(
|
|
4775
|
+
target.retry_max_delay_ms ?? target.retryMaxDelayMs,
|
|
4776
|
+
`${target.name} retry max delay`
|
|
4777
|
+
);
|
|
4778
|
+
const backoffFactor = resolveOptionalNumber(
|
|
4779
|
+
target.retry_backoff_factor ?? target.retryBackoffFactor,
|
|
4780
|
+
`${target.name} retry backoff factor`
|
|
4781
|
+
);
|
|
4782
|
+
const retryableStatusCodes = resolveOptionalNumberArray(
|
|
4783
|
+
target.retry_status_codes ?? target.retryStatusCodes,
|
|
4784
|
+
`${target.name} retry status codes`
|
|
4785
|
+
);
|
|
4786
|
+
if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
|
|
4787
|
+
return void 0;
|
|
4788
|
+
}
|
|
4789
|
+
return {
|
|
4790
|
+
maxRetries,
|
|
4791
|
+
initialDelayMs,
|
|
4792
|
+
maxDelayMs,
|
|
4793
|
+
backoffFactor,
|
|
4794
|
+
retryableStatusCodes
|
|
4795
|
+
};
|
|
4796
|
+
}
|
|
4766
4797
|
function resolveTargetDefinition(definition, env = process.env) {
|
|
4767
4798
|
const parsed = BASE_TARGET_SCHEMA.parse(definition);
|
|
4768
4799
|
const provider = parsed.provider.toLowerCase();
|
|
4769
4800
|
const providerBatching = resolveOptionalBoolean(
|
|
4770
|
-
parsed.
|
|
4801
|
+
parsed.provider_batching ?? parsed.providerBatching
|
|
4771
4802
|
);
|
|
4772
4803
|
switch (provider) {
|
|
4773
4804
|
case "azure":
|
|
@@ -4843,13 +4874,12 @@ function resolveTargetDefinition(definition, env = process.env) {
|
|
|
4843
4874
|
}
|
|
4844
4875
|
}
|
|
4845
4876
|
function resolveAzureConfig(target, env) {
|
|
4846
|
-
const
|
|
4847
|
-
const
|
|
4848
|
-
const
|
|
4849
|
-
const
|
|
4850
|
-
const
|
|
4851
|
-
const
|
|
4852
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
4877
|
+
const endpointSource = target.endpoint ?? target.resource ?? target.resourceName;
|
|
4878
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
4879
|
+
const deploymentSource = target.deployment ?? target.deploymentName ?? target.model;
|
|
4880
|
+
const versionSource = target.version ?? target.api_version;
|
|
4881
|
+
const temperatureSource = target.temperature;
|
|
4882
|
+
const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
|
|
4853
4883
|
const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
|
|
4854
4884
|
const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
|
|
4855
4885
|
const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
|
|
@@ -4861,58 +4891,61 @@ function resolveAzureConfig(target, env) {
|
|
|
4861
4891
|
maxTokensSource,
|
|
4862
4892
|
`${target.name} max output tokens`
|
|
4863
4893
|
);
|
|
4894
|
+
const retry = resolveRetryConfig(target);
|
|
4864
4895
|
return {
|
|
4865
4896
|
resourceName,
|
|
4866
4897
|
deploymentName,
|
|
4867
4898
|
apiKey,
|
|
4868
4899
|
version,
|
|
4869
4900
|
temperature,
|
|
4870
|
-
maxOutputTokens
|
|
4901
|
+
maxOutputTokens,
|
|
4902
|
+
retry
|
|
4871
4903
|
};
|
|
4872
4904
|
}
|
|
4873
4905
|
function resolveAnthropicConfig(target, env) {
|
|
4874
|
-
const
|
|
4875
|
-
const
|
|
4876
|
-
const
|
|
4877
|
-
const
|
|
4878
|
-
const
|
|
4879
|
-
const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
|
|
4906
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
4907
|
+
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
4908
|
+
const temperatureSource = target.temperature;
|
|
4909
|
+
const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
|
|
4910
|
+
const thinkingBudgetSource = target.thinking_budget ?? target.thinkingBudget;
|
|
4880
4911
|
const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
|
|
4881
4912
|
const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
|
|
4913
|
+
const retry = resolveRetryConfig(target);
|
|
4882
4914
|
return {
|
|
4883
4915
|
apiKey,
|
|
4884
4916
|
model,
|
|
4885
4917
|
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
4886
4918
|
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
4887
|
-
thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
|
|
4919
|
+
thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`),
|
|
4920
|
+
retry
|
|
4888
4921
|
};
|
|
4889
4922
|
}
|
|
4890
4923
|
function resolveGeminiConfig(target, env) {
|
|
4891
|
-
const
|
|
4892
|
-
const
|
|
4893
|
-
const
|
|
4894
|
-
const
|
|
4895
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
4924
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
4925
|
+
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
4926
|
+
const temperatureSource = target.temperature;
|
|
4927
|
+
const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
|
|
4896
4928
|
const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
|
|
4897
4929
|
const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
|
|
4898
4930
|
allowLiteral: true,
|
|
4899
4931
|
optionalEnv: true
|
|
4900
4932
|
}) ?? "gemini-2.5-flash";
|
|
4933
|
+
const retry = resolveRetryConfig(target);
|
|
4901
4934
|
return {
|
|
4902
4935
|
apiKey,
|
|
4903
4936
|
model,
|
|
4904
4937
|
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
4905
|
-
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
|
|
4938
|
+
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
4939
|
+
retry
|
|
4906
4940
|
};
|
|
4907
4941
|
}
|
|
4908
4942
|
function resolveCodexConfig(target, env) {
|
|
4909
|
-
const
|
|
4910
|
-
const
|
|
4911
|
-
const
|
|
4912
|
-
const
|
|
4913
|
-
const
|
|
4914
|
-
const
|
|
4915
|
-
const logFormatSource = settings.log_format ?? settings.logFormat ?? settings.log_output_format ?? settings.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
4943
|
+
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
4944
|
+
const argsSource = target.args ?? target.arguments;
|
|
4945
|
+
const cwdSource = target.cwd;
|
|
4946
|
+
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
4947
|
+
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
4948
|
+
const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
4916
4949
|
const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
|
|
4917
4950
|
allowLiteral: true,
|
|
4918
4951
|
optionalEnv: true
|
|
@@ -4951,21 +4984,19 @@ function normalizeCodexLogFormat(value) {
|
|
|
4951
4984
|
throw new Error("codex log format must be 'summary' or 'json'");
|
|
4952
4985
|
}
|
|
4953
4986
|
function resolveMockConfig(target) {
|
|
4954
|
-
const
|
|
4955
|
-
const response = typeof settings.response === "string" ? settings.response : void 0;
|
|
4987
|
+
const response = typeof target.response === "string" ? target.response : void 0;
|
|
4956
4988
|
return { response };
|
|
4957
4989
|
}
|
|
4958
4990
|
function resolveVSCodeConfig(target, env, insiders) {
|
|
4959
|
-
const
|
|
4960
|
-
const workspaceTemplateEnvVar = resolveOptionalLiteralString(settings.workspace_template ?? settings.workspaceTemplate);
|
|
4991
|
+
const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template ?? target.workspaceTemplate);
|
|
4961
4992
|
const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
|
|
4962
4993
|
allowLiteral: false,
|
|
4963
4994
|
optionalEnv: true
|
|
4964
4995
|
}) : void 0;
|
|
4965
|
-
const commandSource =
|
|
4966
|
-
const waitSource =
|
|
4967
|
-
const dryRunSource =
|
|
4968
|
-
const subagentRootSource =
|
|
4996
|
+
const commandSource = target.vscode_cmd ?? target.command;
|
|
4997
|
+
const waitSource = target.wait;
|
|
4998
|
+
const dryRunSource = target.dry_run ?? target.dryRun;
|
|
4999
|
+
const subagentRootSource = target.subagent_root ?? target.subagentRoot;
|
|
4969
5000
|
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
4970
5001
|
const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
|
|
4971
5002
|
return {
|
|
@@ -4980,18 +5011,16 @@ function resolveVSCodeConfig(target, env, insiders) {
|
|
|
4980
5011
|
};
|
|
4981
5012
|
}
|
|
4982
5013
|
function resolveCliConfig(target, env) {
|
|
4983
|
-
const
|
|
4984
|
-
const commandTemplateSource = settings.command_template ?? settings.commandTemplate;
|
|
5014
|
+
const commandTemplateSource = target.command_template ?? target.commandTemplate;
|
|
4985
5015
|
const filesFormat = resolveOptionalLiteralString(
|
|
4986
|
-
|
|
5016
|
+
target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
|
|
4987
5017
|
);
|
|
4988
|
-
const cwd = resolveOptionalString(
|
|
5018
|
+
const cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
|
|
4989
5019
|
allowLiteral: true,
|
|
4990
5020
|
optionalEnv: true
|
|
4991
5021
|
});
|
|
4992
|
-
const
|
|
4993
|
-
const
|
|
4994
|
-
const healthcheck = resolveCliHealthcheck(settings.healthcheck, env, target.name);
|
|
5022
|
+
const timeoutMs = resolveTimeoutMs(target.timeout_seconds ?? target.timeoutSeconds, `${target.name} timeout`);
|
|
5023
|
+
const healthcheck = resolveCliHealthcheck(target.healthcheck, env, target.name);
|
|
4995
5024
|
const commandTemplate = resolveString(
|
|
4996
5025
|
commandTemplateSource,
|
|
4997
5026
|
env,
|
|
@@ -5003,29 +5032,10 @@ function resolveCliConfig(target, env) {
|
|
|
5003
5032
|
commandTemplate,
|
|
5004
5033
|
filesFormat,
|
|
5005
5034
|
cwd,
|
|
5006
|
-
env: envOverrides,
|
|
5007
5035
|
timeoutMs,
|
|
5008
5036
|
healthcheck
|
|
5009
5037
|
};
|
|
5010
5038
|
}
|
|
5011
|
-
function resolveEnvOverrides(source2, env, targetName) {
|
|
5012
|
-
if (source2 === void 0 || source2 === null) {
|
|
5013
|
-
return void 0;
|
|
5014
|
-
}
|
|
5015
|
-
if (typeof source2 !== "object" || Array.isArray(source2)) {
|
|
5016
|
-
throw new Error(`${targetName} env overrides must be an object map of strings`);
|
|
5017
|
-
}
|
|
5018
|
-
const entries = Object.entries(source2);
|
|
5019
|
-
const resolved = {};
|
|
5020
|
-
for (const [key2, value] of entries) {
|
|
5021
|
-
if (typeof value !== "string") {
|
|
5022
|
-
throw new Error(`${targetName} env override '${key2}' must be a string`);
|
|
5023
|
-
}
|
|
5024
|
-
const resolvedValue = resolveString(value, env, `${targetName} env override '${key2}'`);
|
|
5025
|
-
resolved[key2] = resolvedValue;
|
|
5026
|
-
}
|
|
5027
|
-
return Object.keys(resolved).length > 0 ? resolved : void 0;
|
|
5028
|
-
}
|
|
5029
5039
|
function resolveTimeoutMs(source2, description) {
|
|
5030
5040
|
const seconds = resolveOptionalNumber(source2, `${description} (seconds)`);
|
|
5031
5041
|
if (seconds === void 0) {
|
|
@@ -5221,6 +5231,26 @@ function resolveOptionalStringArray(source2, env, description) {
|
|
|
5221
5231
|
}
|
|
5222
5232
|
return resolved.length > 0 ? resolved : void 0;
|
|
5223
5233
|
}
|
|
5234
|
+
function resolveOptionalNumberArray(source2, description) {
|
|
5235
|
+
if (source2 === void 0 || source2 === null) {
|
|
5236
|
+
return void 0;
|
|
5237
|
+
}
|
|
5238
|
+
if (!Array.isArray(source2)) {
|
|
5239
|
+
throw new Error(`${description} must be an array of numbers`);
|
|
5240
|
+
}
|
|
5241
|
+
if (source2.length === 0) {
|
|
5242
|
+
return void 0;
|
|
5243
|
+
}
|
|
5244
|
+
const resolved = [];
|
|
5245
|
+
for (let i6 = 0; i6 < source2.length; i6++) {
|
|
5246
|
+
const item = source2[i6];
|
|
5247
|
+
if (typeof item !== "number" || !Number.isFinite(item)) {
|
|
5248
|
+
throw new Error(`${description}[${i6}] must be a number`);
|
|
5249
|
+
}
|
|
5250
|
+
resolved.push(item);
|
|
5251
|
+
}
|
|
5252
|
+
return resolved.length > 0 ? resolved : void 0;
|
|
5253
|
+
}
|
|
5224
5254
|
var AGENT_PROVIDER_KINDS = [
|
|
5225
5255
|
"codex",
|
|
5226
5256
|
"vscode",
|
|
@@ -5252,7 +5282,7 @@ var PROVIDER_ALIASES = [
|
|
|
5252
5282
|
"vertex"
|
|
5253
5283
|
// legacy/future support
|
|
5254
5284
|
];
|
|
5255
|
-
var TARGETS_SCHEMA_V2 = "agentv-targets-v2.
|
|
5285
|
+
var TARGETS_SCHEMA_V2 = "agentv-targets-v2.2";
|
|
5256
5286
|
function isAgentProvider(provider) {
|
|
5257
5287
|
return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
|
|
5258
5288
|
}
|
|
@@ -11917,14 +11947,11 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
11917
11947
|
logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
|
|
11918
11948
|
continue;
|
|
11919
11949
|
}
|
|
11920
|
-
|
|
11921
|
-
logWarning(`Eval case '${id}' missing expected_messages array`);
|
|
11922
|
-
continue;
|
|
11923
|
-
}
|
|
11950
|
+
const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
|
|
11924
11951
|
const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
|
|
11925
|
-
const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
|
|
11926
|
-
if (expectedMessages.length === 0) {
|
|
11927
|
-
logWarning(`No expected message found for eval case: ${id}`);
|
|
11952
|
+
const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
|
|
11953
|
+
if (hasExpectedMessages && expectedMessages.length === 0) {
|
|
11954
|
+
logWarning(`No valid expected message found for eval case: ${id}`);
|
|
11928
11955
|
continue;
|
|
11929
11956
|
}
|
|
11930
11957
|
if (expectedMessages.length > 1) {
|
|
@@ -11942,17 +11969,17 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
11942
11969
|
messageType: "input",
|
|
11943
11970
|
verbose
|
|
11944
11971
|
});
|
|
11945
|
-
const outputSegments = await processMessages({
|
|
11972
|
+
const outputSegments = hasExpectedMessages ? await processMessages({
|
|
11946
11973
|
messages: expectedMessages,
|
|
11947
11974
|
searchRoots,
|
|
11948
11975
|
repoRootPath,
|
|
11949
11976
|
guidelinePatterns,
|
|
11950
11977
|
messageType: "output",
|
|
11951
11978
|
verbose
|
|
11952
|
-
});
|
|
11979
|
+
}) : [];
|
|
11953
11980
|
const codeSnippets = extractCodeBlocks(inputSegments);
|
|
11954
11981
|
const expectedContent = expectedMessages[0]?.content;
|
|
11955
|
-
const referenceAnswer = await resolveAssistantContent(expectedContent, searchRoots, verbose);
|
|
11982
|
+
const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
|
|
11956
11983
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
11957
11984
|
const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
|
|
11958
11985
|
const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
|
|
@@ -11971,6 +11998,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
11971
11998
|
dataset: datasetName,
|
|
11972
11999
|
conversation_id: conversationId,
|
|
11973
12000
|
question,
|
|
12001
|
+
input_messages: inputMessages,
|
|
11974
12002
|
input_segments: inputSegments,
|
|
11975
12003
|
output_segments: outputSegments,
|
|
11976
12004
|
reference_answer: referenceAnswer,
|
|
@@ -11998,6 +12026,54 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
11998
12026
|
}
|
|
11999
12027
|
return results;
|
|
12000
12028
|
}
|
|
12029
|
+
function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
12030
|
+
if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
|
|
12031
|
+
return true;
|
|
12032
|
+
}
|
|
12033
|
+
let messagesWithContent = 0;
|
|
12034
|
+
for (const segments of processedSegmentsByMessage) {
|
|
12035
|
+
if (hasVisibleContent(segments)) {
|
|
12036
|
+
messagesWithContent++;
|
|
12037
|
+
}
|
|
12038
|
+
}
|
|
12039
|
+
return messagesWithContent > 1;
|
|
12040
|
+
}
|
|
12041
|
+
function hasVisibleContent(segments) {
|
|
12042
|
+
return segments.some((segment) => {
|
|
12043
|
+
const type = asString(segment.type);
|
|
12044
|
+
if (type === "text") {
|
|
12045
|
+
const value = asString(segment.value);
|
|
12046
|
+
return value !== void 0 && value.trim().length > 0;
|
|
12047
|
+
}
|
|
12048
|
+
if (type === "guideline_ref") {
|
|
12049
|
+
return false;
|
|
12050
|
+
}
|
|
12051
|
+
if (type === "file") {
|
|
12052
|
+
const text = asString(segment.text);
|
|
12053
|
+
return text !== void 0 && text.trim().length > 0;
|
|
12054
|
+
}
|
|
12055
|
+
return false;
|
|
12056
|
+
});
|
|
12057
|
+
}
|
|
12058
|
+
function formatSegment(segment) {
|
|
12059
|
+
const type = asString(segment.type);
|
|
12060
|
+
if (type === "text") {
|
|
12061
|
+
return asString(segment.value);
|
|
12062
|
+
}
|
|
12063
|
+
if (type === "guideline_ref") {
|
|
12064
|
+
const refPath = asString(segment.path);
|
|
12065
|
+
return refPath ? `<Attached: ${refPath}>` : void 0;
|
|
12066
|
+
}
|
|
12067
|
+
if (type === "file") {
|
|
12068
|
+
const text = asString(segment.text);
|
|
12069
|
+
const filePath = asString(segment.path);
|
|
12070
|
+
if (text && filePath) {
|
|
12071
|
+
return `=== ${filePath} ===
|
|
12072
|
+
${text}`;
|
|
12073
|
+
}
|
|
12074
|
+
}
|
|
12075
|
+
return void 0;
|
|
12076
|
+
}
|
|
12001
12077
|
async function buildPromptInputs(testCase) {
|
|
12002
12078
|
const guidelineContents = [];
|
|
12003
12079
|
for (const rawPath of testCase.guideline_paths) {
|
|
@@ -12014,36 +12090,168 @@ ${content}`);
|
|
|
12014
12090
|
logWarning(`Could not read guideline file ${absolutePath}: ${error.message}`);
|
|
12015
12091
|
}
|
|
12016
12092
|
}
|
|
12017
|
-
const
|
|
12093
|
+
const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
|
|
12094
|
+
const segmentsByMessage = [];
|
|
12095
|
+
const fileContentsByPath = /* @__PURE__ */ new Map();
|
|
12018
12096
|
for (const segment of testCase.input_segments) {
|
|
12019
|
-
|
|
12020
|
-
|
|
12021
|
-
const pathValue = segment.path;
|
|
12022
|
-
const textValue = segment.text;
|
|
12023
|
-
const label = typeof pathValue === "string" ? pathValue : "file";
|
|
12024
|
-
const body = typeof textValue === "string" ? textValue : "";
|
|
12025
|
-
questionParts.push(`=== ${label} ===
|
|
12026
|
-
${body}`);
|
|
12027
|
-
continue;
|
|
12097
|
+
if (segment.type === "file" && typeof segment.path === "string" && typeof segment.text === "string") {
|
|
12098
|
+
fileContentsByPath.set(segment.path, segment.text);
|
|
12028
12099
|
}
|
|
12029
|
-
|
|
12030
|
-
|
|
12031
|
-
|
|
12032
|
-
|
|
12100
|
+
}
|
|
12101
|
+
for (const message of testCase.input_messages) {
|
|
12102
|
+
const messageSegments = [];
|
|
12103
|
+
if (typeof message.content === "string") {
|
|
12104
|
+
if (message.content.trim().length > 0) {
|
|
12105
|
+
messageSegments.push({ type: "text", value: message.content });
|
|
12106
|
+
}
|
|
12107
|
+
} else if (Array.isArray(message.content)) {
|
|
12108
|
+
for (const segment of message.content) {
|
|
12109
|
+
if (typeof segment === "string") {
|
|
12110
|
+
if (segment.trim().length > 0) {
|
|
12111
|
+
messageSegments.push({ type: "text", value: segment });
|
|
12112
|
+
}
|
|
12113
|
+
} else if (isJsonObject(segment)) {
|
|
12114
|
+
const type = asString(segment.type);
|
|
12115
|
+
if (type === "file") {
|
|
12116
|
+
const value = asString(segment.value);
|
|
12117
|
+
if (!value) continue;
|
|
12118
|
+
if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
|
|
12119
|
+
messageSegments.push({ type: "guideline_ref", path: value });
|
|
12120
|
+
continue;
|
|
12121
|
+
}
|
|
12122
|
+
const fileText = fileContentsByPath.get(value);
|
|
12123
|
+
if (fileText !== void 0) {
|
|
12124
|
+
messageSegments.push({ type: "file", text: fileText, path: value });
|
|
12125
|
+
}
|
|
12126
|
+
} else if (type === "text") {
|
|
12127
|
+
const textValue = asString(segment.value);
|
|
12128
|
+
if (textValue && textValue.trim().length > 0) {
|
|
12129
|
+
messageSegments.push({ type: "text", value: textValue });
|
|
12130
|
+
}
|
|
12131
|
+
}
|
|
12132
|
+
}
|
|
12133
|
+
}
|
|
12134
|
+
}
|
|
12135
|
+
segmentsByMessage.push(messageSegments);
|
|
12136
|
+
}
|
|
12137
|
+
const useRoleMarkers = needsRoleMarkers(testCase.input_messages, segmentsByMessage);
|
|
12138
|
+
let question;
|
|
12139
|
+
if (useRoleMarkers) {
|
|
12140
|
+
const messageParts = [];
|
|
12141
|
+
for (let i6 = 0; i6 < testCase.input_messages.length; i6++) {
|
|
12142
|
+
const message = testCase.input_messages[i6];
|
|
12143
|
+
const segments = segmentsByMessage[i6];
|
|
12144
|
+
if (!hasVisibleContent(segments)) {
|
|
12145
|
+
continue;
|
|
12146
|
+
}
|
|
12147
|
+
const roleLabel = message.role.charAt(0).toUpperCase() + message.role.slice(1);
|
|
12148
|
+
const contentParts = [];
|
|
12149
|
+
for (const segment of segments) {
|
|
12150
|
+
const formattedContent = formatSegment(segment);
|
|
12151
|
+
if (formattedContent) {
|
|
12152
|
+
contentParts.push(formattedContent);
|
|
12153
|
+
}
|
|
12154
|
+
}
|
|
12155
|
+
if (contentParts.length > 0) {
|
|
12156
|
+
const messageContent = contentParts.join("\n");
|
|
12157
|
+
messageParts.push(`@[${roleLabel}]:
|
|
12158
|
+
${messageContent}`);
|
|
12159
|
+
}
|
|
12160
|
+
}
|
|
12161
|
+
question = messageParts.join("\n\n");
|
|
12162
|
+
} else {
|
|
12163
|
+
const questionParts = [];
|
|
12164
|
+
for (const segment of testCase.input_segments) {
|
|
12165
|
+
const formattedContent = formatSegment(segment);
|
|
12166
|
+
if (formattedContent) {
|
|
12167
|
+
questionParts.push(formattedContent);
|
|
12168
|
+
}
|
|
12169
|
+
}
|
|
12170
|
+
if (testCase.code_snippets.length > 0) {
|
|
12171
|
+
questionParts.push(testCase.code_snippets.join("\n"));
|
|
12172
|
+
}
|
|
12173
|
+
question = questionParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
|
|
12174
|
+
}
|
|
12175
|
+
const chatPrompt = useRoleMarkers ? buildChatPromptFromSegments({
|
|
12176
|
+
messages: testCase.input_messages,
|
|
12177
|
+
segmentsByMessage,
|
|
12178
|
+
guidelinePatterns: testCase.guideline_patterns,
|
|
12179
|
+
guidelineContent: guidelines
|
|
12180
|
+
}) : void 0;
|
|
12181
|
+
return { question, guidelines, chatPrompt };
|
|
12182
|
+
}
|
|
12183
|
+
function buildChatPromptFromSegments(options) {
|
|
12184
|
+
const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt } = options;
|
|
12185
|
+
if (messages.length === 0) {
|
|
12186
|
+
return void 0;
|
|
12187
|
+
}
|
|
12188
|
+
const systemSegments = [];
|
|
12189
|
+
if (systemPrompt && systemPrompt.trim().length > 0) {
|
|
12190
|
+
systemSegments.push(systemPrompt.trim());
|
|
12191
|
+
}
|
|
12192
|
+
if (guidelineContent && guidelineContent.trim().length > 0) {
|
|
12193
|
+
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
12194
|
+
|
|
12195
|
+
${guidelineContent.trim()}`);
|
|
12196
|
+
}
|
|
12197
|
+
let startIndex = 0;
|
|
12198
|
+
while (startIndex < messages.length && messages[startIndex].role === "system") {
|
|
12199
|
+
const segments = segmentsByMessage[startIndex];
|
|
12200
|
+
const contentParts = [];
|
|
12201
|
+
for (const segment of segments) {
|
|
12202
|
+
const formatted = formatSegment(segment);
|
|
12203
|
+
if (formatted) {
|
|
12204
|
+
contentParts.push(formatted);
|
|
12033
12205
|
}
|
|
12034
|
-
continue;
|
|
12035
12206
|
}
|
|
12036
|
-
|
|
12037
|
-
|
|
12038
|
-
questionParts.push(genericValue);
|
|
12207
|
+
if (contentParts.length > 0) {
|
|
12208
|
+
systemSegments.push(contentParts.join("\n"));
|
|
12039
12209
|
}
|
|
12210
|
+
startIndex += 1;
|
|
12040
12211
|
}
|
|
12041
|
-
|
|
12042
|
-
|
|
12212
|
+
const chatPrompt = [];
|
|
12213
|
+
if (systemSegments.length > 0) {
|
|
12214
|
+
chatPrompt.push({
|
|
12215
|
+
role: "system",
|
|
12216
|
+
content: systemSegments.join("\n\n")
|
|
12217
|
+
});
|
|
12043
12218
|
}
|
|
12044
|
-
|
|
12045
|
-
|
|
12046
|
-
|
|
12219
|
+
for (let i6 = startIndex; i6 < messages.length; i6++) {
|
|
12220
|
+
const message = messages[i6];
|
|
12221
|
+
const segments = segmentsByMessage[i6];
|
|
12222
|
+
const contentParts = [];
|
|
12223
|
+
let role = message.role;
|
|
12224
|
+
let name;
|
|
12225
|
+
if (role === "system") {
|
|
12226
|
+
role = "assistant";
|
|
12227
|
+
contentParts.push("@[System]:");
|
|
12228
|
+
} else if (role === "tool") {
|
|
12229
|
+
role = "function";
|
|
12230
|
+
name = "tool";
|
|
12231
|
+
}
|
|
12232
|
+
for (const segment of segments) {
|
|
12233
|
+
if (segment.type === "guideline_ref") {
|
|
12234
|
+
continue;
|
|
12235
|
+
}
|
|
12236
|
+
const formatted = formatSegment(segment);
|
|
12237
|
+
if (formatted) {
|
|
12238
|
+
const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
|
|
12239
|
+
if (isGuidelineRef) {
|
|
12240
|
+
continue;
|
|
12241
|
+
}
|
|
12242
|
+
contentParts.push(formatted);
|
|
12243
|
+
}
|
|
12244
|
+
}
|
|
12245
|
+
if (contentParts.length === 0) {
|
|
12246
|
+
continue;
|
|
12247
|
+
}
|
|
12248
|
+
chatPrompt.push({
|
|
12249
|
+
role,
|
|
12250
|
+
content: contentParts.join("\n"),
|
|
12251
|
+
...name ? { name } : {}
|
|
12252
|
+
});
|
|
12253
|
+
}
|
|
12254
|
+
return chatPrompt.length > 0 ? chatPrompt : void 0;
|
|
12047
12255
|
}
|
|
12048
12256
|
async function fileExists2(absolutePath) {
|
|
12049
12257
|
try {
|
|
@@ -12237,21 +12445,14 @@ ${detailBlock}${ANSI_RESET}`);
|
|
|
12237
12445
|
var DEFAULT_SYSTEM_PROMPT = "You are a careful assistant. Follow all provided instructions and do not fabricate results.";
|
|
12238
12446
|
function buildChatPrompt(request) {
|
|
12239
12447
|
if (request.chatPrompt) {
|
|
12240
|
-
|
|
12241
|
-
|
|
12242
|
-
|
|
12243
|
-
|
|
12244
|
-
|
|
12245
|
-
|
|
12246
|
-
} else {
|
|
12247
|
-
systemSegments.push(DEFAULT_SYSTEM_PROMPT);
|
|
12248
|
-
}
|
|
12249
|
-
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
12250
|
-
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
12251
|
-
|
|
12252
|
-
${request.guidelines.trim()}`);
|
|
12448
|
+
const hasSystemMessage = request.chatPrompt.some((message) => message.role === "system");
|
|
12449
|
+
if (hasSystemMessage) {
|
|
12450
|
+
return request.chatPrompt;
|
|
12451
|
+
}
|
|
12452
|
+
const systemContent2 = resolveSystemContent(request);
|
|
12453
|
+
return [{ role: "system", content: systemContent2 }, ...request.chatPrompt];
|
|
12253
12454
|
}
|
|
12254
|
-
const systemContent =
|
|
12455
|
+
const systemContent = resolveSystemContent(request);
|
|
12255
12456
|
const userContent = request.question.trim();
|
|
12256
12457
|
const prompt = [
|
|
12257
12458
|
{
|
|
@@ -12265,6 +12466,21 @@ ${request.guidelines.trim()}`);
|
|
|
12265
12466
|
];
|
|
12266
12467
|
return prompt;
|
|
12267
12468
|
}
|
|
12469
|
+
function resolveSystemContent(request) {
|
|
12470
|
+
const systemSegments = [];
|
|
12471
|
+
const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
|
|
12472
|
+
if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
|
|
12473
|
+
systemSegments.push(metadataSystemPrompt.trim());
|
|
12474
|
+
} else {
|
|
12475
|
+
systemSegments.push(DEFAULT_SYSTEM_PROMPT);
|
|
12476
|
+
}
|
|
12477
|
+
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
12478
|
+
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
12479
|
+
|
|
12480
|
+
${request.guidelines.trim()}`);
|
|
12481
|
+
}
|
|
12482
|
+
return systemSegments.join("\n\n");
|
|
12483
|
+
}
|
|
12268
12484
|
function extractModelConfig(request, defaults) {
|
|
12269
12485
|
const temperature = request.temperature ?? defaults.temperature;
|
|
12270
12486
|
const maxTokens = request.maxOutputTokens ?? defaults.maxOutputTokens;
|
|
@@ -12308,6 +12524,67 @@ function ensureChatResponse(result) {
|
|
|
12308
12524
|
}
|
|
12309
12525
|
return result;
|
|
12310
12526
|
}
|
|
12527
|
+
function isRetryableError(error, retryableStatusCodes) {
|
|
12528
|
+
if (!error || typeof error !== "object") {
|
|
12529
|
+
return false;
|
|
12530
|
+
}
|
|
12531
|
+
if ("status" in error && typeof error.status === "number") {
|
|
12532
|
+
return retryableStatusCodes.includes(error.status);
|
|
12533
|
+
}
|
|
12534
|
+
if ("message" in error && typeof error.message === "string") {
|
|
12535
|
+
const match = error.message.match(/HTTP (\d{3})/);
|
|
12536
|
+
if (match) {
|
|
12537
|
+
const status = Number.parseInt(match[1], 10);
|
|
12538
|
+
return retryableStatusCodes.includes(status);
|
|
12539
|
+
}
|
|
12540
|
+
}
|
|
12541
|
+
if ("name" in error && error.name === "AxAIServiceNetworkError") {
|
|
12542
|
+
return true;
|
|
12543
|
+
}
|
|
12544
|
+
return false;
|
|
12545
|
+
}
|
|
12546
|
+
function calculateRetryDelay(attempt, config) {
|
|
12547
|
+
const delay = Math.min(
|
|
12548
|
+
config.maxDelayMs,
|
|
12549
|
+
config.initialDelayMs * config.backoffFactor ** attempt
|
|
12550
|
+
);
|
|
12551
|
+
return delay * (0.75 + Math.random() * 0.5);
|
|
12552
|
+
}
|
|
12553
|
+
async function sleep2(ms) {
|
|
12554
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
12555
|
+
}
|
|
12556
|
+
async function withRetry(fn, retryConfig, signal) {
|
|
12557
|
+
const config = {
|
|
12558
|
+
maxRetries: retryConfig?.maxRetries ?? 3,
|
|
12559
|
+
initialDelayMs: retryConfig?.initialDelayMs ?? 1e3,
|
|
12560
|
+
maxDelayMs: retryConfig?.maxDelayMs ?? 6e4,
|
|
12561
|
+
backoffFactor: retryConfig?.backoffFactor ?? 2,
|
|
12562
|
+
retryableStatusCodes: retryConfig?.retryableStatusCodes ?? [500, 408, 429, 502, 503, 504]
|
|
12563
|
+
};
|
|
12564
|
+
let lastError;
|
|
12565
|
+
for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
|
|
12566
|
+
if (signal?.aborted) {
|
|
12567
|
+
throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
|
|
12568
|
+
}
|
|
12569
|
+
try {
|
|
12570
|
+
return await fn();
|
|
12571
|
+
} catch (error) {
|
|
12572
|
+
lastError = error;
|
|
12573
|
+
if (attempt >= config.maxRetries) {
|
|
12574
|
+
break;
|
|
12575
|
+
}
|
|
12576
|
+
if (!isRetryableError(error, config.retryableStatusCodes)) {
|
|
12577
|
+
throw error;
|
|
12578
|
+
}
|
|
12579
|
+
const delay = calculateRetryDelay(attempt, config);
|
|
12580
|
+
await sleep2(delay);
|
|
12581
|
+
if (signal?.aborted) {
|
|
12582
|
+
throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
|
|
12583
|
+
}
|
|
12584
|
+
}
|
|
12585
|
+
}
|
|
12586
|
+
throw lastError;
|
|
12587
|
+
}
|
|
12311
12588
|
var AzureProvider = class {
|
|
12312
12589
|
constructor(targetName, config) {
|
|
12313
12590
|
this.config = config;
|
|
@@ -12317,6 +12594,7 @@ var AzureProvider = class {
|
|
|
12317
12594
|
temperature: config.temperature,
|
|
12318
12595
|
maxOutputTokens: config.maxOutputTokens
|
|
12319
12596
|
};
|
|
12597
|
+
this.retryConfig = config.retry;
|
|
12320
12598
|
this.ai = Wn.create({
|
|
12321
12599
|
name: "azure-openai",
|
|
12322
12600
|
apiKey: config.apiKey,
|
|
@@ -12333,16 +12611,21 @@ var AzureProvider = class {
|
|
|
12333
12611
|
targetName;
|
|
12334
12612
|
ai;
|
|
12335
12613
|
defaults;
|
|
12614
|
+
retryConfig;
|
|
12336
12615
|
async invoke(request) {
|
|
12337
12616
|
const chatPrompt = buildChatPrompt(request);
|
|
12338
12617
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
12339
|
-
const response = await
|
|
12340
|
-
|
|
12341
|
-
|
|
12342
|
-
|
|
12343
|
-
|
|
12344
|
-
|
|
12345
|
-
|
|
12618
|
+
const response = await withRetry(
|
|
12619
|
+
async () => await this.ai.chat(
|
|
12620
|
+
{
|
|
12621
|
+
chatPrompt,
|
|
12622
|
+
model: this.config.deploymentName,
|
|
12623
|
+
...modelConfig ? { modelConfig } : {}
|
|
12624
|
+
},
|
|
12625
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
12626
|
+
),
|
|
12627
|
+
this.retryConfig,
|
|
12628
|
+
request.signal
|
|
12346
12629
|
);
|
|
12347
12630
|
return mapResponse(ensureChatResponse(response));
|
|
12348
12631
|
}
|
|
@@ -12360,6 +12643,7 @@ var AnthropicProvider = class {
|
|
|
12360
12643
|
maxOutputTokens: config.maxOutputTokens,
|
|
12361
12644
|
thinkingBudget: config.thinkingBudget
|
|
12362
12645
|
};
|
|
12646
|
+
this.retryConfig = config.retry;
|
|
12363
12647
|
this.ai = Wn.create({
|
|
12364
12648
|
name: "anthropic",
|
|
12365
12649
|
apiKey: config.apiKey
|
|
@@ -12370,16 +12654,21 @@ var AnthropicProvider = class {
|
|
|
12370
12654
|
targetName;
|
|
12371
12655
|
ai;
|
|
12372
12656
|
defaults;
|
|
12657
|
+
retryConfig;
|
|
12373
12658
|
async invoke(request) {
|
|
12374
12659
|
const chatPrompt = buildChatPrompt(request);
|
|
12375
12660
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
12376
|
-
const response = await
|
|
12377
|
-
|
|
12378
|
-
|
|
12379
|
-
|
|
12380
|
-
|
|
12381
|
-
|
|
12382
|
-
|
|
12661
|
+
const response = await withRetry(
|
|
12662
|
+
async () => await this.ai.chat(
|
|
12663
|
+
{
|
|
12664
|
+
chatPrompt,
|
|
12665
|
+
model: this.config.model,
|
|
12666
|
+
...modelConfig ? { modelConfig } : {}
|
|
12667
|
+
},
|
|
12668
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
12669
|
+
),
|
|
12670
|
+
this.retryConfig,
|
|
12671
|
+
request.signal
|
|
12383
12672
|
);
|
|
12384
12673
|
return mapResponse(ensureChatResponse(response));
|
|
12385
12674
|
}
|
|
@@ -12396,6 +12685,7 @@ var GeminiProvider = class {
|
|
|
12396
12685
|
temperature: config.temperature,
|
|
12397
12686
|
maxOutputTokens: config.maxOutputTokens
|
|
12398
12687
|
};
|
|
12688
|
+
this.retryConfig = config.retry;
|
|
12399
12689
|
this.ai = Wn.create({
|
|
12400
12690
|
name: "google-gemini",
|
|
12401
12691
|
apiKey: config.apiKey
|
|
@@ -12406,16 +12696,21 @@ var GeminiProvider = class {
|
|
|
12406
12696
|
targetName;
|
|
12407
12697
|
ai;
|
|
12408
12698
|
defaults;
|
|
12699
|
+
retryConfig;
|
|
12409
12700
|
async invoke(request) {
|
|
12410
12701
|
const chatPrompt = buildChatPrompt(request);
|
|
12411
12702
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
12412
|
-
const response = await
|
|
12413
|
-
|
|
12414
|
-
|
|
12415
|
-
|
|
12416
|
-
|
|
12417
|
-
|
|
12418
|
-
|
|
12703
|
+
const response = await withRetry(
|
|
12704
|
+
async () => await this.ai.chat(
|
|
12705
|
+
{
|
|
12706
|
+
chatPrompt,
|
|
12707
|
+
model: this.config.model,
|
|
12708
|
+
...modelConfig ? { modelConfig } : {}
|
|
12709
|
+
},
|
|
12710
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
12711
|
+
),
|
|
12712
|
+
this.retryConfig,
|
|
12713
|
+
request.signal
|
|
12419
12714
|
);
|
|
12420
12715
|
return mapResponse(ensureChatResponse(response));
|
|
12421
12716
|
}
|
|
@@ -12478,10 +12773,9 @@ var CliProvider = class {
|
|
|
12478
12773
|
const outputFilePath = generateOutputFilePath(request.evalCaseId);
|
|
12479
12774
|
const templateValues = buildTemplateValues(request, this.config, outputFilePath);
|
|
12480
12775
|
const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
|
|
12481
|
-
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
12482
12776
|
const result = await this.runCommand(renderedCommand, {
|
|
12483
12777
|
cwd: this.config.cwd,
|
|
12484
|
-
env,
|
|
12778
|
+
env: process.env,
|
|
12485
12779
|
timeoutMs: this.config.timeoutMs,
|
|
12486
12780
|
signal: request.signal
|
|
12487
12781
|
});
|
|
@@ -12570,10 +12864,9 @@ var CliProvider = class {
|
|
|
12570
12864
|
generateOutputFilePath("healthcheck")
|
|
12571
12865
|
)
|
|
12572
12866
|
);
|
|
12573
|
-
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
12574
12867
|
const result = await this.runCommand(renderedCommand, {
|
|
12575
12868
|
cwd: healthcheck.cwd ?? this.config.cwd,
|
|
12576
|
-
env,
|
|
12869
|
+
env: process.env,
|
|
12577
12870
|
timeoutMs,
|
|
12578
12871
|
signal
|
|
12579
12872
|
});
|
|
@@ -13771,20 +14064,13 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
13771
14064
|
}
|
|
13772
14065
|
const name = value.name;
|
|
13773
14066
|
const provider = value.provider;
|
|
13774
|
-
const settings = value.settings;
|
|
13775
|
-
const judgeTarget = value.judge_target;
|
|
13776
14067
|
if (typeof name !== "string" || name.trim().length === 0) {
|
|
13777
14068
|
throw new Error(`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`);
|
|
13778
14069
|
}
|
|
13779
14070
|
if (typeof provider !== "string" || provider.trim().length === 0) {
|
|
13780
14071
|
throw new Error(`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider'`);
|
|
13781
14072
|
}
|
|
13782
|
-
return
|
|
13783
|
-
name,
|
|
13784
|
-
provider,
|
|
13785
|
-
settings: isRecord(settings) ? settings : void 0,
|
|
13786
|
-
judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
|
|
13787
|
-
};
|
|
14073
|
+
return value;
|
|
13788
14074
|
}
|
|
13789
14075
|
async function fileExists3(filePath) {
|
|
13790
14076
|
try {
|
|
@@ -13855,19 +14141,21 @@ var LlmJudgeEvaluator = class {
|
|
|
13855
14141
|
return this.evaluateWithPrompt(context2, judgeProvider);
|
|
13856
14142
|
}
|
|
13857
14143
|
async evaluateWithPrompt(context2, judgeProvider) {
|
|
13858
|
-
|
|
13859
|
-
|
|
14144
|
+
const hasReferenceAnswer = hasNonEmptyReferenceAnswer(context2.evalCase);
|
|
14145
|
+
const formattedQuestion = context2.promptInputs.question && context2.promptInputs.question.trim().length > 0 ? context2.promptInputs.question : context2.evalCase.question;
|
|
14146
|
+
let prompt = buildQualityPrompt(context2.evalCase, context2.candidate, formattedQuestion);
|
|
14147
|
+
let systemPrompt = context2.systemPrompt ?? this.customPrompt ?? buildSystemPrompt(hasReferenceAnswer);
|
|
13860
14148
|
if (systemPrompt && hasTemplateVariables(systemPrompt)) {
|
|
13861
14149
|
const variables = {
|
|
13862
14150
|
input_messages: JSON.stringify(context2.evalCase.input_segments, null, 2),
|
|
13863
14151
|
output_messages: JSON.stringify(context2.evalCase.output_segments, null, 2),
|
|
13864
14152
|
candidate_answer: context2.candidate,
|
|
13865
|
-
reference_answer: context2.evalCase.reference_answer,
|
|
14153
|
+
reference_answer: context2.evalCase.reference_answer ?? "",
|
|
13866
14154
|
expected_outcome: context2.evalCase.expected_outcome,
|
|
13867
|
-
question:
|
|
14155
|
+
question: formattedQuestion
|
|
13868
14156
|
};
|
|
13869
14157
|
prompt = substituteVariables(systemPrompt, variables);
|
|
13870
|
-
systemPrompt =
|
|
14158
|
+
systemPrompt = buildSystemPrompt(hasReferenceAnswer);
|
|
13871
14159
|
}
|
|
13872
14160
|
const metadata = {
|
|
13873
14161
|
...systemPrompt !== void 0 ? { systemPrompt } : {},
|
|
@@ -13905,38 +14193,51 @@ var LlmJudgeEvaluator = class {
|
|
|
13905
14193
|
};
|
|
13906
14194
|
}
|
|
13907
14195
|
};
|
|
13908
|
-
|
|
13909
|
-
|
|
13910
|
-
|
|
13911
|
-
|
|
13912
|
-
|
|
13913
|
-
|
|
13914
|
-
|
|
13915
|
-
|
|
13916
|
-
|
|
13917
|
-
|
|
13918
|
-
|
|
13919
|
-
|
|
13920
|
-
|
|
13921
|
-
|
|
13922
|
-
|
|
13923
|
-
|
|
13924
|
-
|
|
14196
|
+
function buildSystemPrompt(hasReferenceAnswer) {
|
|
14197
|
+
const basePrompt = [
|
|
14198
|
+
"You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.",
|
|
14199
|
+
""
|
|
14200
|
+
];
|
|
14201
|
+
if (hasReferenceAnswer) {
|
|
14202
|
+
basePrompt.push(
|
|
14203
|
+
"Use the reference_answer as a gold standard for a high-quality response. The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.",
|
|
14204
|
+
""
|
|
14205
|
+
);
|
|
14206
|
+
}
|
|
14207
|
+
basePrompt.push(
|
|
14208
|
+
"Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.",
|
|
14209
|
+
"",
|
|
14210
|
+
"You must respond with a single JSON object matching this schema:",
|
|
14211
|
+
"",
|
|
14212
|
+
"{",
|
|
14213
|
+
' "score": <number between 0.0 and 1.0>,',
|
|
14214
|
+
' "hits": [<array of strings, max 4 items, brief specific achievements>],',
|
|
14215
|
+
' "misses": [<array of strings, max 4 items, brief specific failures or omissions, empty if none>],',
|
|
14216
|
+
' "reasoning": "<string, concise explanation for the score, 1-2 sentences max>"',
|
|
14217
|
+
"}"
|
|
14218
|
+
);
|
|
14219
|
+
return basePrompt.join("\n");
|
|
14220
|
+
}
|
|
14221
|
+
function buildQualityPrompt(evalCase, candidate, question) {
|
|
13925
14222
|
const parts = [
|
|
13926
14223
|
"[[ ## expected_outcome ## ]]",
|
|
13927
14224
|
evalCase.expected_outcome.trim(),
|
|
13928
14225
|
"",
|
|
13929
14226
|
"[[ ## question ## ]]",
|
|
13930
|
-
|
|
13931
|
-
""
|
|
13932
|
-
"[[ ## reference_answer ## ]]",
|
|
13933
|
-
evalCase.reference_answer.trim(),
|
|
13934
|
-
"",
|
|
13935
|
-
"[[ ## candidate_answer ## ]]",
|
|
13936
|
-
candidate.trim(),
|
|
13937
|
-
"",
|
|
13938
|
-
"Respond with a single JSON object matching the schema described in the system prompt."
|
|
14227
|
+
question.trim(),
|
|
14228
|
+
""
|
|
13939
14229
|
];
|
|
14230
|
+
if (hasNonEmptyReferenceAnswer(evalCase)) {
|
|
14231
|
+
parts.push(
|
|
14232
|
+
"[[ ## reference_answer ## ]]",
|
|
14233
|
+
evalCase.reference_answer.trim(),
|
|
14234
|
+
""
|
|
14235
|
+
);
|
|
14236
|
+
}
|
|
14237
|
+
parts.push(
|
|
14238
|
+
"[[ ## candidate_answer ## ]]",
|
|
14239
|
+
candidate.trim()
|
|
14240
|
+
);
|
|
13940
14241
|
return parts.join("\n");
|
|
13941
14242
|
}
|
|
13942
14243
|
function clampScore(value) {
|
|
@@ -14019,6 +14320,9 @@ function extractJsonBlob(text) {
|
|
|
14019
14320
|
function isNonEmptyString(value) {
|
|
14020
14321
|
return typeof value === "string" && value.trim().length > 0;
|
|
14021
14322
|
}
|
|
14323
|
+
function hasNonEmptyReferenceAnswer(evalCase) {
|
|
14324
|
+
return evalCase.reference_answer !== void 0 && evalCase.reference_answer.trim().length > 0;
|
|
14325
|
+
}
|
|
14022
14326
|
var CodeEvaluator = class {
|
|
14023
14327
|
kind = "code";
|
|
14024
14328
|
script;
|
|
@@ -14405,10 +14709,11 @@ async function runEvaluation(options) {
|
|
|
14405
14709
|
await onProgress({
|
|
14406
14710
|
workerId,
|
|
14407
14711
|
evalId: evalCase.id,
|
|
14408
|
-
status: "completed",
|
|
14712
|
+
status: result.error ? "failed" : "completed",
|
|
14409
14713
|
startedAt: 0,
|
|
14410
14714
|
// Not used for completed status
|
|
14411
|
-
completedAt: Date.now()
|
|
14715
|
+
completedAt: Date.now(),
|
|
14716
|
+
error: result.error
|
|
14412
14717
|
});
|
|
14413
14718
|
}
|
|
14414
14719
|
if (onResult) {
|
|
@@ -14665,11 +14970,27 @@ async function evaluateCandidate(options) {
|
|
|
14665
14970
|
agentTimeoutMs
|
|
14666
14971
|
});
|
|
14667
14972
|
const completedAt = nowFn();
|
|
14668
|
-
|
|
14669
|
-
|
|
14670
|
-
|
|
14671
|
-
|
|
14672
|
-
|
|
14973
|
+
let agentProviderRequest;
|
|
14974
|
+
let lmProviderRequest;
|
|
14975
|
+
if (isAgentProvider(provider)) {
|
|
14976
|
+
agentProviderRequest = {
|
|
14977
|
+
question: promptInputs.question,
|
|
14978
|
+
guideline_paths: evalCase.guideline_paths
|
|
14979
|
+
};
|
|
14980
|
+
} else {
|
|
14981
|
+
if (promptInputs.chatPrompt) {
|
|
14982
|
+
lmProviderRequest = {
|
|
14983
|
+
chat_prompt: promptInputs.chatPrompt,
|
|
14984
|
+
guideline_paths: evalCase.guideline_paths
|
|
14985
|
+
};
|
|
14986
|
+
} else {
|
|
14987
|
+
lmProviderRequest = {
|
|
14988
|
+
question: promptInputs.question,
|
|
14989
|
+
guidelines: promptInputs.guidelines,
|
|
14990
|
+
guideline_paths: evalCase.guideline_paths
|
|
14991
|
+
};
|
|
14992
|
+
}
|
|
14993
|
+
}
|
|
14673
14994
|
return {
|
|
14674
14995
|
eval_id: evalCase.id,
|
|
14675
14996
|
dataset: evalCase.dataset,
|
|
@@ -14683,7 +15004,8 @@ async function evaluateCandidate(options) {
|
|
|
14683
15004
|
timestamp: completedAt.toISOString(),
|
|
14684
15005
|
reasoning: score.reasoning,
|
|
14685
15006
|
raw_aspects: score.rawAspects,
|
|
14686
|
-
|
|
15007
|
+
agent_provider_request: agentProviderRequest,
|
|
15008
|
+
lm_provider_request: lmProviderRequest,
|
|
14687
15009
|
evaluator_raw_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
|
|
14688
15010
|
evaluator_results: evaluatorResults
|
|
14689
15011
|
};
|
|
@@ -14912,6 +15234,7 @@ async function invokeProvider(provider, options) {
|
|
|
14912
15234
|
question: promptInputs.question,
|
|
14913
15235
|
guidelines: promptInputs.guidelines,
|
|
14914
15236
|
guideline_patterns: evalCase.guideline_patterns,
|
|
15237
|
+
chatPrompt: promptInputs.chatPrompt,
|
|
14915
15238
|
inputFiles: evalCase.file_paths,
|
|
14916
15239
|
evalCaseId: evalCase.id,
|
|
14917
15240
|
attempt,
|
|
@@ -14928,12 +15251,30 @@ async function invokeProvider(provider, options) {
|
|
|
14928
15251
|
}
|
|
14929
15252
|
function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider) {
|
|
14930
15253
|
const message = error instanceof Error ? error.message : String(error);
|
|
14931
|
-
|
|
14932
|
-
|
|
14933
|
-
|
|
14934
|
-
|
|
14935
|
-
|
|
14936
|
-
|
|
15254
|
+
let agentProviderRequest;
|
|
15255
|
+
let lmProviderRequest;
|
|
15256
|
+
if (isAgentProvider(provider)) {
|
|
15257
|
+
agentProviderRequest = {
|
|
15258
|
+
question: promptInputs.question,
|
|
15259
|
+
guideline_paths: evalCase.guideline_paths,
|
|
15260
|
+
error: message
|
|
15261
|
+
};
|
|
15262
|
+
} else {
|
|
15263
|
+
if (promptInputs.chatPrompt) {
|
|
15264
|
+
lmProviderRequest = {
|
|
15265
|
+
chat_prompt: promptInputs.chatPrompt,
|
|
15266
|
+
guideline_paths: evalCase.guideline_paths,
|
|
15267
|
+
error: message
|
|
15268
|
+
};
|
|
15269
|
+
} else {
|
|
15270
|
+
lmProviderRequest = {
|
|
15271
|
+
question: promptInputs.question,
|
|
15272
|
+
guidelines: promptInputs.guidelines,
|
|
15273
|
+
guideline_paths: evalCase.guideline_paths,
|
|
15274
|
+
error: message
|
|
15275
|
+
};
|
|
15276
|
+
}
|
|
15277
|
+
}
|
|
14937
15278
|
return {
|
|
14938
15279
|
eval_id: evalCase.id,
|
|
14939
15280
|
dataset: evalCase.dataset,
|
|
@@ -14946,7 +15287,9 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
14946
15287
|
target: targetName,
|
|
14947
15288
|
timestamp: timestamp.toISOString(),
|
|
14948
15289
|
raw_aspects: [],
|
|
14949
|
-
|
|
15290
|
+
agent_provider_request: agentProviderRequest,
|
|
15291
|
+
lm_provider_request: lmProviderRequest,
|
|
15292
|
+
error: message
|
|
14950
15293
|
};
|
|
14951
15294
|
}
|
|
14952
15295
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
@@ -14957,6 +15300,9 @@ function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
|
14957
15300
|
hash.update(promptInputs.question);
|
|
14958
15301
|
hash.update(promptInputs.guidelines);
|
|
14959
15302
|
hash.update(promptInputs.systemMessage ?? "");
|
|
15303
|
+
if (promptInputs.chatPrompt) {
|
|
15304
|
+
hash.update(JSON.stringify(promptInputs.chatPrompt));
|
|
15305
|
+
}
|
|
14960
15306
|
return hash.digest("hex");
|
|
14961
15307
|
}
|
|
14962
15308
|
function isTimeoutLike(error) {
|
|
@@ -15384,8 +15730,6 @@ import { stripVTControlCharacters } from "node:util";
|
|
|
15384
15730
|
var ESC = "\x1B[";
|
|
15385
15731
|
var CLEAR_LINE = `${ESC}K`;
|
|
15386
15732
|
var MOVE_CURSOR_UP = `${ESC}1A`;
|
|
15387
|
-
var SYNC_START = `${ESC}?2026h`;
|
|
15388
|
-
var SYNC_END = `${ESC}?2026l`;
|
|
15389
15733
|
var ProgressDisplay = class {
|
|
15390
15734
|
workers = /* @__PURE__ */ new Map();
|
|
15391
15735
|
maxWorkers;
|
|
@@ -15624,6 +15968,8 @@ function buildHistogram(values) {
|
|
|
15624
15968
|
function calculateEvaluationSummary(results) {
|
|
15625
15969
|
const scores = results.map((result) => result.score);
|
|
15626
15970
|
const total = results.length;
|
|
15971
|
+
const errors = results.filter((result) => result.error !== void 0).map((result) => ({ evalId: result.eval_id, error: result.error }));
|
|
15972
|
+
const errorCount = errors.length;
|
|
15627
15973
|
if (total === 0) {
|
|
15628
15974
|
return {
|
|
15629
15975
|
total: 0,
|
|
@@ -15634,7 +15980,9 @@ function calculateEvaluationSummary(results) {
|
|
|
15634
15980
|
standardDeviation: void 0,
|
|
15635
15981
|
histogram: buildHistogram([]),
|
|
15636
15982
|
topResults: [],
|
|
15637
|
-
bottomResults: []
|
|
15983
|
+
bottomResults: [],
|
|
15984
|
+
errorCount: 0,
|
|
15985
|
+
errors: []
|
|
15638
15986
|
};
|
|
15639
15987
|
}
|
|
15640
15988
|
const mean = computeMean(scores);
|
|
@@ -15655,7 +16003,9 @@ function calculateEvaluationSummary(results) {
|
|
|
15655
16003
|
standardDeviation,
|
|
15656
16004
|
histogram,
|
|
15657
16005
|
topResults,
|
|
15658
|
-
bottomResults
|
|
16006
|
+
bottomResults,
|
|
16007
|
+
errorCount,
|
|
16008
|
+
errors
|
|
15659
16009
|
};
|
|
15660
16010
|
}
|
|
15661
16011
|
function formatScore(value) {
|
|
@@ -15666,10 +16016,25 @@ function formatEvaluationSummary(summary) {
|
|
|
15666
16016
|
return "\nNo results to summarize";
|
|
15667
16017
|
}
|
|
15668
16018
|
const lines = [];
|
|
16019
|
+
if (summary.errorCount > 0) {
|
|
16020
|
+
lines.push("\n==================================================");
|
|
16021
|
+
lines.push("ERRORS");
|
|
16022
|
+
lines.push("==================================================");
|
|
16023
|
+
summary.errors.forEach((error) => {
|
|
16024
|
+
lines.push(`
|
|
16025
|
+
\u274C ${error.evalId}`);
|
|
16026
|
+
lines.push(` ${error.error}`);
|
|
16027
|
+
});
|
|
16028
|
+
lines.push("");
|
|
16029
|
+
}
|
|
15669
16030
|
lines.push("\n==================================================");
|
|
15670
16031
|
lines.push("EVALUATION SUMMARY");
|
|
15671
16032
|
lines.push("==================================================");
|
|
15672
16033
|
lines.push(`Total eval cases: ${summary.total}`);
|
|
16034
|
+
if (summary.errorCount > 0) {
|
|
16035
|
+
lines.push(`Failed: ${summary.errorCount}`);
|
|
16036
|
+
lines.push(`Passed: ${summary.total - summary.errorCount}`);
|
|
16037
|
+
}
|
|
15673
16038
|
lines.push(`Mean score: ${formatScore(summary.mean)}`);
|
|
15674
16039
|
lines.push(`Median score: ${formatScore(summary.median)}`);
|
|
15675
16040
|
lines.push(`Min score: ${formatScore(summary.min)}`);
|
|
@@ -15708,7 +16073,7 @@ import { readFile as readFile5 } from "node:fs/promises";
|
|
|
15708
16073
|
import path33 from "node:path";
|
|
15709
16074
|
import { parse as parse5 } from "yaml";
|
|
15710
16075
|
var SCHEMA_EVAL_V22 = "agentv-eval-v2";
|
|
15711
|
-
var SCHEMA_TARGETS_V2 = "agentv-targets-v2.
|
|
16076
|
+
var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
|
|
15712
16077
|
var SCHEMA_CONFIG_V22 = "agentv-config-v2";
|
|
15713
16078
|
async function detectFileType(filePath) {
|
|
15714
16079
|
try {
|
|
@@ -15840,14 +16205,14 @@ async function validateEvalFile(filePath) {
|
|
|
15840
16205
|
validateMessages(inputMessages, `${location}.input_messages`, absolutePath, errors);
|
|
15841
16206
|
}
|
|
15842
16207
|
const expectedMessages = evalCase["expected_messages"];
|
|
15843
|
-
if (!Array.isArray(expectedMessages)) {
|
|
16208
|
+
if (expectedMessages !== void 0 && !Array.isArray(expectedMessages)) {
|
|
15844
16209
|
errors.push({
|
|
15845
16210
|
severity: "error",
|
|
15846
16211
|
filePath: absolutePath,
|
|
15847
16212
|
location: `${location}.expected_messages`,
|
|
15848
|
-
message: "
|
|
16213
|
+
message: "Invalid 'expected_messages' field (must be an array if provided)"
|
|
15849
16214
|
});
|
|
15850
|
-
} else {
|
|
16215
|
+
} else if (Array.isArray(expectedMessages)) {
|
|
15851
16216
|
validateMessages(expectedMessages, `${location}.expected_messages`, absolutePath, errors);
|
|
15852
16217
|
}
|
|
15853
16218
|
}
|
|
@@ -15883,11 +16248,13 @@ function validateMessages(messages, location, filePath, errors) {
|
|
|
15883
16248
|
}
|
|
15884
16249
|
const content = message["content"];
|
|
15885
16250
|
if (typeof content === "string") {
|
|
16251
|
+
validateContentForRoleMarkers(content, `${msgLocation}.content`, filePath, errors);
|
|
15886
16252
|
} else if (Array.isArray(content)) {
|
|
15887
16253
|
for (let j2 = 0; j2 < content.length; j2++) {
|
|
15888
16254
|
const contentItem = content[j2];
|
|
15889
16255
|
const contentLocation = `${msgLocation}.content[${j2}]`;
|
|
15890
16256
|
if (typeof contentItem === "string") {
|
|
16257
|
+
validateContentForRoleMarkers(contentItem, contentLocation, filePath, errors);
|
|
15891
16258
|
} else if (isObject(contentItem)) {
|
|
15892
16259
|
const type = contentItem["type"];
|
|
15893
16260
|
if (typeof type !== "string") {
|
|
@@ -15907,6 +16274,8 @@ function validateMessages(messages, location, filePath, errors) {
|
|
|
15907
16274
|
location: `${contentLocation}.value`,
|
|
15908
16275
|
message: "Content with type 'text' must have a 'value' field"
|
|
15909
16276
|
});
|
|
16277
|
+
} else {
|
|
16278
|
+
validateContentForRoleMarkers(value, `${contentLocation}.value`, filePath, errors);
|
|
15910
16279
|
}
|
|
15911
16280
|
}
|
|
15912
16281
|
} else {
|
|
@@ -15928,6 +16297,19 @@ function validateMessages(messages, location, filePath, errors) {
|
|
|
15928
16297
|
}
|
|
15929
16298
|
}
|
|
15930
16299
|
}
|
|
16300
|
+
function validateContentForRoleMarkers(content, location, filePath, errors) {
|
|
16301
|
+
const markers = ["@[System]:", "@[User]:", "@[Assistant]:", "@[Tool]:"];
|
|
16302
|
+
for (const marker of markers) {
|
|
16303
|
+
if (content.toLowerCase().includes(marker.toLowerCase())) {
|
|
16304
|
+
errors.push({
|
|
16305
|
+
severity: "warning",
|
|
16306
|
+
filePath,
|
|
16307
|
+
location,
|
|
16308
|
+
message: `Content contains potential role marker '${marker}'. This may confuse agentic providers or cause prompt injection.`
|
|
16309
|
+
});
|
|
16310
|
+
}
|
|
16311
|
+
}
|
|
16312
|
+
}
|
|
15931
16313
|
function isObject2(value) {
|
|
15932
16314
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
15933
16315
|
}
|
|
@@ -15935,8 +16317,21 @@ var COMMON_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
15935
16317
|
"provider_batching",
|
|
15936
16318
|
"providerBatching"
|
|
15937
16319
|
]);
|
|
16320
|
+
var RETRY_SETTINGS = /* @__PURE__ */ new Set([
|
|
16321
|
+
"max_retries",
|
|
16322
|
+
"maxRetries",
|
|
16323
|
+
"retry_initial_delay_ms",
|
|
16324
|
+
"retryInitialDelayMs",
|
|
16325
|
+
"retry_max_delay_ms",
|
|
16326
|
+
"retryMaxDelayMs",
|
|
16327
|
+
"retry_backoff_factor",
|
|
16328
|
+
"retryBackoffFactor",
|
|
16329
|
+
"retry_status_codes",
|
|
16330
|
+
"retryStatusCodes"
|
|
16331
|
+
]);
|
|
15938
16332
|
var AZURE_SETTINGS = /* @__PURE__ */ new Set([
|
|
15939
16333
|
...COMMON_SETTINGS,
|
|
16334
|
+
...RETRY_SETTINGS,
|
|
15940
16335
|
"endpoint",
|
|
15941
16336
|
"resource",
|
|
15942
16337
|
"resourceName",
|
|
@@ -15953,6 +16348,7 @@ var AZURE_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
15953
16348
|
]);
|
|
15954
16349
|
var ANTHROPIC_SETTINGS = /* @__PURE__ */ new Set([
|
|
15955
16350
|
...COMMON_SETTINGS,
|
|
16351
|
+
...RETRY_SETTINGS,
|
|
15956
16352
|
"api_key",
|
|
15957
16353
|
"apiKey",
|
|
15958
16354
|
"model",
|
|
@@ -15966,6 +16362,7 @@ var ANTHROPIC_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
15966
16362
|
]);
|
|
15967
16363
|
var GEMINI_SETTINGS = /* @__PURE__ */ new Set([
|
|
15968
16364
|
...COMMON_SETTINGS,
|
|
16365
|
+
...RETRY_SETTINGS,
|
|
15969
16366
|
"api_key",
|
|
15970
16367
|
"apiKey",
|
|
15971
16368
|
"model",
|
|
@@ -16053,13 +16450,14 @@ function getKnownSettings(provider) {
|
|
|
16053
16450
|
return null;
|
|
16054
16451
|
}
|
|
16055
16452
|
}
|
|
16056
|
-
function validateUnknownSettings(
|
|
16453
|
+
function validateUnknownSettings(target, provider, absolutePath, location, errors) {
|
|
16057
16454
|
const knownSettings = getKnownSettings(provider);
|
|
16058
16455
|
if (!knownSettings) {
|
|
16059
16456
|
return;
|
|
16060
16457
|
}
|
|
16061
|
-
|
|
16062
|
-
|
|
16458
|
+
const baseFields = /* @__PURE__ */ new Set(["name", "provider", "judge_target", "workers", "$schema", "targets"]);
|
|
16459
|
+
for (const key2 of Object.keys(target)) {
|
|
16460
|
+
if (!baseFields.has(key2) && !knownSettings.has(key2)) {
|
|
16063
16461
|
errors.push({
|
|
16064
16462
|
severity: "warning",
|
|
16065
16463
|
filePath: absolutePath,
|
|
@@ -16089,17 +16487,8 @@ async function validateTargetsFile(filePath) {
|
|
|
16089
16487
|
errors
|
|
16090
16488
|
};
|
|
16091
16489
|
}
|
|
16092
|
-
function validateCliSettings(
|
|
16093
|
-
|
|
16094
|
-
errors2.push({
|
|
16095
|
-
severity: "error",
|
|
16096
|
-
filePath: absolutePath2,
|
|
16097
|
-
location,
|
|
16098
|
-
message: "CLI provider requires a 'settings' object"
|
|
16099
|
-
});
|
|
16100
|
-
return;
|
|
16101
|
-
}
|
|
16102
|
-
const commandTemplate = settings["command_template"] ?? settings["commandTemplate"];
|
|
16490
|
+
function validateCliSettings(target, absolutePath2, location, errors2) {
|
|
16491
|
+
const commandTemplate = target["command_template"] ?? target["commandTemplate"];
|
|
16103
16492
|
if (typeof commandTemplate !== "string" || commandTemplate.trim().length === 0) {
|
|
16104
16493
|
errors2.push({
|
|
16105
16494
|
severity: "error",
|
|
@@ -16110,7 +16499,7 @@ async function validateTargetsFile(filePath) {
|
|
|
16110
16499
|
} else {
|
|
16111
16500
|
recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
|
|
16112
16501
|
}
|
|
16113
|
-
const attachmentsFormat =
|
|
16502
|
+
const attachmentsFormat = target["attachments_format"] ?? target["attachmentsFormat"];
|
|
16114
16503
|
if (attachmentsFormat !== void 0 && typeof attachmentsFormat !== "string") {
|
|
16115
16504
|
errors2.push({
|
|
16116
16505
|
severity: "error",
|
|
@@ -16119,7 +16508,7 @@ async function validateTargetsFile(filePath) {
|
|
|
16119
16508
|
message: "'attachmentsFormat' must be a string when provided"
|
|
16120
16509
|
});
|
|
16121
16510
|
}
|
|
16122
|
-
const filesFormat =
|
|
16511
|
+
const filesFormat = target["files_format"] ?? target["filesFormat"];
|
|
16123
16512
|
if (filesFormat !== void 0 && typeof filesFormat !== "string") {
|
|
16124
16513
|
errors2.push({
|
|
16125
16514
|
severity: "error",
|
|
@@ -16128,7 +16517,7 @@ async function validateTargetsFile(filePath) {
|
|
|
16128
16517
|
message: "'filesFormat' must be a string when provided"
|
|
16129
16518
|
});
|
|
16130
16519
|
}
|
|
16131
|
-
const cwd =
|
|
16520
|
+
const cwd = target["cwd"];
|
|
16132
16521
|
if (cwd !== void 0 && typeof cwd !== "string") {
|
|
16133
16522
|
errors2.push({
|
|
16134
16523
|
severity: "error",
|
|
@@ -16137,7 +16526,7 @@ async function validateTargetsFile(filePath) {
|
|
|
16137
16526
|
message: "'cwd' must be a string when provided"
|
|
16138
16527
|
});
|
|
16139
16528
|
}
|
|
16140
|
-
const timeoutSeconds =
|
|
16529
|
+
const timeoutSeconds = target["timeout_seconds"] ?? target["timeoutSeconds"];
|
|
16141
16530
|
if (timeoutSeconds !== void 0) {
|
|
16142
16531
|
const numericTimeout = Number(timeoutSeconds);
|
|
16143
16532
|
if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
|
|
@@ -16149,29 +16538,7 @@ async function validateTargetsFile(filePath) {
|
|
|
16149
16538
|
});
|
|
16150
16539
|
}
|
|
16151
16540
|
}
|
|
16152
|
-
const
|
|
16153
|
-
if (envOverrides !== void 0) {
|
|
16154
|
-
if (!isObject2(envOverrides)) {
|
|
16155
|
-
errors2.push({
|
|
16156
|
-
severity: "error",
|
|
16157
|
-
filePath: absolutePath2,
|
|
16158
|
-
location: `${location}.env`,
|
|
16159
|
-
message: "'env' must be an object with string values"
|
|
16160
|
-
});
|
|
16161
|
-
} else {
|
|
16162
|
-
for (const [key2, value] of Object.entries(envOverrides)) {
|
|
16163
|
-
if (typeof value !== "string" || value.trim().length === 0) {
|
|
16164
|
-
errors2.push({
|
|
16165
|
-
severity: "error",
|
|
16166
|
-
filePath: absolutePath2,
|
|
16167
|
-
location: `${location}.env.${key2}`,
|
|
16168
|
-
message: `Environment override '${key2}' must be a non-empty string`
|
|
16169
|
-
});
|
|
16170
|
-
}
|
|
16171
|
-
}
|
|
16172
|
-
}
|
|
16173
|
-
}
|
|
16174
|
-
const healthcheck = settings["healthcheck"];
|
|
16541
|
+
const healthcheck = target["healthcheck"];
|
|
16175
16542
|
if (healthcheck !== void 0) {
|
|
16176
16543
|
validateCliHealthcheck(healthcheck, absolutePath2, `${location}.healthcheck`, errors2);
|
|
16177
16544
|
}
|
|
@@ -16342,20 +16709,11 @@ async function validateTargetsFile(filePath) {
|
|
|
16342
16709
|
message: `Unknown provider '${provider}'. Known providers: ${knownProviders.join(", ")}`
|
|
16343
16710
|
});
|
|
16344
16711
|
}
|
|
16345
|
-
const settings = target["settings"];
|
|
16346
|
-
if (providerValue !== "cli" && settings !== void 0 && !isObject2(settings)) {
|
|
16347
|
-
errors.push({
|
|
16348
|
-
severity: "error",
|
|
16349
|
-
filePath: absolutePath,
|
|
16350
|
-
location: `${location}.settings`,
|
|
16351
|
-
message: "Invalid 'settings' field (must be an object)"
|
|
16352
|
-
});
|
|
16353
|
-
}
|
|
16354
16712
|
if (providerValue === "cli") {
|
|
16355
|
-
validateCliSettings(
|
|
16713
|
+
validateCliSettings(target, absolutePath, location, errors);
|
|
16356
16714
|
}
|
|
16357
|
-
if (
|
|
16358
|
-
validateUnknownSettings(
|
|
16715
|
+
if (typeof provider === "string") {
|
|
16716
|
+
validateUnknownSettings(target, provider, absolutePath, location, errors);
|
|
16359
16717
|
}
|
|
16360
16718
|
const judgeTarget = target["judge_target"];
|
|
16361
16719
|
if (judgeTarget !== void 0 && typeof judgeTarget !== "string") {
|
|
@@ -17566,4 +17924,4 @@ export {
|
|
|
17566
17924
|
createProgram,
|
|
17567
17925
|
runCli
|
|
17568
17926
|
};
|
|
17569
|
-
//# sourceMappingURL=chunk-
|
|
17927
|
+
//# sourceMappingURL=chunk-J5HK75TC.js.map
|