agentv 4.5.1 → 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-5DEZ72J3.js → chunk-5GZJIXTY.js} +155 -59
- package/dist/chunk-5GZJIXTY.js.map +1 -0
- package/dist/{chunk-7DRAXDVC.js → chunk-KQQTEWZF.js} +111 -47
- package/dist/chunk-KQQTEWZF.js.map +1 -0
- package/dist/{chunk-BQC2CDLN.js → chunk-U2LSJ6Y4.js} +19 -5
- package/dist/chunk-U2LSJ6Y4.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-VWMHFUXR.js → dist-FBPCDLOY.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-OG7ZJIHG.js → interactive-6D3ULOMN.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-5DEZ72J3.js.map +0 -1
- package/dist/chunk-7DRAXDVC.js.map +0 -1
- package/dist/chunk-BQC2CDLN.js.map +0 -1
- /package/dist/{dist-VWMHFUXR.js.map → dist-FBPCDLOY.js.map} +0 -0
- /package/dist/{interactive-OG7ZJIHG.js.map → interactive-6D3ULOMN.js.map} +0 -0
|
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
|
|
|
301
301
|
}
|
|
302
302
|
});
|
|
303
303
|
|
|
304
|
-
// ../../packages/core/dist/chunk-
|
|
304
|
+
// ../../packages/core/dist/chunk-AIQ5FO4G.js
|
|
305
305
|
import { constants } from "node:fs";
|
|
306
306
|
import { access, readFile } from "node:fs/promises";
|
|
307
307
|
import path from "node:path";
|
|
@@ -419,7 +419,7 @@ __export(external_exports2, {
|
|
|
419
419
|
void: () => voidType
|
|
420
420
|
});
|
|
421
421
|
|
|
422
|
-
// ../../packages/core/dist/chunk-
|
|
422
|
+
// ../../packages/core/dist/chunk-AIQ5FO4G.js
|
|
423
423
|
import { readFile as readFile2 } from "node:fs/promises";
|
|
424
424
|
import path3 from "node:path";
|
|
425
425
|
import fg from "fast-glob";
|
|
@@ -798,21 +798,27 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
|
|
|
798
798
|
"OUTPUT_FILE"
|
|
799
799
|
]);
|
|
800
800
|
var COMMON_TARGET_SETTINGS = [
|
|
801
|
+
"use_target",
|
|
801
802
|
"provider_batching",
|
|
802
803
|
"providerBatching",
|
|
803
804
|
"subagent_mode_allowed",
|
|
804
|
-
"subagentModeAllowed"
|
|
805
|
+
"subagentModeAllowed",
|
|
806
|
+
"fallback_targets",
|
|
807
|
+
"fallbackTargets"
|
|
805
808
|
];
|
|
806
809
|
var BASE_TARGET_SCHEMA = external_exports2.object({
|
|
807
810
|
name: external_exports2.string().min(1, "target name is required"),
|
|
808
|
-
provider: external_exports2.string().
|
|
811
|
+
provider: external_exports2.string().optional(),
|
|
812
|
+
use_target: external_exports2.string().optional(),
|
|
809
813
|
grader_target: external_exports2.string().optional(),
|
|
810
814
|
judge_target: external_exports2.string().optional(),
|
|
811
815
|
// backward compat
|
|
812
816
|
workers: external_exports2.number().int().min(1).optional(),
|
|
813
817
|
workspace_template: external_exports2.string().optional(),
|
|
814
818
|
workspaceTemplate: external_exports2.string().optional(),
|
|
815
|
-
subagent_mode_allowed: external_exports2.boolean().optional()
|
|
819
|
+
subagent_mode_allowed: external_exports2.boolean().optional(),
|
|
820
|
+
fallback_targets: external_exports2.array(external_exports2.string().min(1)).optional(),
|
|
821
|
+
fallbackTargets: external_exports2.array(external_exports2.string().min(1)).optional()
|
|
816
822
|
}).passthrough();
|
|
817
823
|
var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
|
|
818
824
|
var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
|
|
@@ -866,6 +872,11 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
866
872
|
`${parsed.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
|
|
867
873
|
);
|
|
868
874
|
}
|
|
875
|
+
if (!parsed.provider) {
|
|
876
|
+
throw new Error(
|
|
877
|
+
`${parsed.name}: 'provider' is required (targets with use_target must be resolved before calling resolveTargetDefinition)`
|
|
878
|
+
);
|
|
879
|
+
}
|
|
869
880
|
const provider = resolveString(
|
|
870
881
|
parsed.provider,
|
|
871
882
|
env,
|
|
@@ -878,12 +889,14 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
878
889
|
const subagentModeAllowed = resolveOptionalBoolean(
|
|
879
890
|
parsed.subagent_mode_allowed ?? parsed.subagentModeAllowed
|
|
880
891
|
);
|
|
892
|
+
const fallbackTargets = parsed.fallback_targets ?? parsed.fallbackTargets;
|
|
881
893
|
const base = {
|
|
882
894
|
name: parsed.name,
|
|
883
895
|
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
884
896
|
workers: parsed.workers,
|
|
885
897
|
providerBatching,
|
|
886
|
-
subagentModeAllowed
|
|
898
|
+
subagentModeAllowed,
|
|
899
|
+
...fallbackTargets ? { fallbackTargets } : {}
|
|
887
900
|
};
|
|
888
901
|
switch (provider) {
|
|
889
902
|
case "openai":
|
|
@@ -1057,6 +1070,14 @@ function resolveAzureConfig(target, env) {
|
|
|
1057
1070
|
retry
|
|
1058
1071
|
};
|
|
1059
1072
|
}
|
|
1073
|
+
function resolveApiFormat(target, targetName) {
|
|
1074
|
+
const raw = target.api_format ?? target.apiFormat;
|
|
1075
|
+
if (raw === void 0) return void 0;
|
|
1076
|
+
if (raw === "chat" || raw === "responses") return raw;
|
|
1077
|
+
throw new Error(
|
|
1078
|
+
`Invalid api_format '${raw}' for target '${targetName}'. Must be 'chat' or 'responses'.`
|
|
1079
|
+
);
|
|
1080
|
+
}
|
|
1060
1081
|
function resolveOpenAIConfig(target, env) {
|
|
1061
1082
|
const endpointSource = target.endpoint ?? target.base_url ?? target.baseUrl;
|
|
1062
1083
|
const apiKeySource = target.api_key ?? target.apiKey;
|
|
@@ -1076,6 +1097,7 @@ function resolveOpenAIConfig(target, env) {
|
|
|
1076
1097
|
baseURL,
|
|
1077
1098
|
apiKey,
|
|
1078
1099
|
model,
|
|
1100
|
+
apiFormat: resolveApiFormat(target, target.name),
|
|
1079
1101
|
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
1080
1102
|
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
1081
1103
|
retry
|
|
@@ -15043,6 +15065,7 @@ function validateTemplateVariables(content, source) {
|
|
|
15043
15065
|
}
|
|
15044
15066
|
var ANSI_YELLOW3 = "\x1B[33m";
|
|
15045
15067
|
var ANSI_RESET4 = "\x1B[0m";
|
|
15068
|
+
var PROMPT_FILE_PREFIX = "file://";
|
|
15046
15069
|
function normalizeEvaluatorType(type) {
|
|
15047
15070
|
return type.replace(/_/g, "-");
|
|
15048
15071
|
}
|
|
@@ -15341,12 +15364,23 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15341
15364
|
threshold: thresholdValue
|
|
15342
15365
|
};
|
|
15343
15366
|
} else {
|
|
15344
|
-
const
|
|
15367
|
+
const rawAggPrompt = asString(rawAggregator.prompt);
|
|
15368
|
+
let aggregatorPrompt;
|
|
15345
15369
|
let promptPath2;
|
|
15346
|
-
if (
|
|
15347
|
-
|
|
15348
|
-
|
|
15349
|
-
|
|
15370
|
+
if (rawAggPrompt) {
|
|
15371
|
+
if (rawAggPrompt.startsWith(PROMPT_FILE_PREFIX)) {
|
|
15372
|
+
const fileRef = rawAggPrompt.slice(PROMPT_FILE_PREFIX.length);
|
|
15373
|
+
aggregatorPrompt = fileRef;
|
|
15374
|
+
const resolved = await resolveFileReference22(fileRef, searchRoots);
|
|
15375
|
+
if (resolved.resolvedPath) {
|
|
15376
|
+
promptPath2 = path42.resolve(resolved.resolvedPath);
|
|
15377
|
+
} else {
|
|
15378
|
+
throw new Error(
|
|
15379
|
+
`Composite aggregator in '${evalId}': prompt file not found: ${resolved.displayPath}`
|
|
15380
|
+
);
|
|
15381
|
+
}
|
|
15382
|
+
} else {
|
|
15383
|
+
aggregatorPrompt = rawAggPrompt;
|
|
15350
15384
|
}
|
|
15351
15385
|
}
|
|
15352
15386
|
aggregator = {
|
|
@@ -15926,21 +15960,25 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15926
15960
|
promptScriptConfig = rawPrompt.config;
|
|
15927
15961
|
}
|
|
15928
15962
|
} else if (typeof rawPrompt === "string") {
|
|
15929
|
-
|
|
15930
|
-
|
|
15931
|
-
|
|
15932
|
-
|
|
15933
|
-
|
|
15934
|
-
|
|
15935
|
-
|
|
15936
|
-
|
|
15937
|
-
|
|
15963
|
+
if (rawPrompt.startsWith(PROMPT_FILE_PREFIX)) {
|
|
15964
|
+
const fileRef = rawPrompt.slice(PROMPT_FILE_PREFIX.length);
|
|
15965
|
+
prompt = fileRef;
|
|
15966
|
+
const resolved = await resolveFileReference22(fileRef, searchRoots);
|
|
15967
|
+
if (resolved.resolvedPath) {
|
|
15968
|
+
promptPath = path42.resolve(resolved.resolvedPath);
|
|
15969
|
+
try {
|
|
15970
|
+
await validateCustomPromptContent(promptPath);
|
|
15971
|
+
} catch (error) {
|
|
15972
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
15973
|
+
throw new Error(`Evaluator '${name21}' template (${promptPath}): ${message}`);
|
|
15974
|
+
}
|
|
15975
|
+
} else {
|
|
15976
|
+
throw new Error(
|
|
15977
|
+
`Evaluator '${name21}' in '${evalId}': prompt file not found: ${resolved.displayPath}`
|
|
15978
|
+
);
|
|
15938
15979
|
}
|
|
15939
15980
|
} else {
|
|
15940
|
-
|
|
15941
|
-
`Inline prompt used for evaluator '${name21}' in '${evalId}' (file not found: ${resolved.displayPath})`,
|
|
15942
|
-
resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
|
|
15943
|
-
);
|
|
15981
|
+
prompt = rawPrompt;
|
|
15944
15982
|
}
|
|
15945
15983
|
}
|
|
15946
15984
|
const _model = asString(rawEvaluator.model);
|
|
@@ -17811,7 +17849,7 @@ var OpenAIProvider = class {
|
|
|
17811
17849
|
apiKey: config.apiKey,
|
|
17812
17850
|
baseURL: config.baseURL
|
|
17813
17851
|
});
|
|
17814
|
-
this.model = openai(config.model);
|
|
17852
|
+
this.model = config.apiFormat === "responses" ? openai(config.model) : openai.chat(config.model);
|
|
17815
17853
|
}
|
|
17816
17854
|
id;
|
|
17817
17855
|
kind = "openai";
|
|
@@ -24132,8 +24170,11 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
24132
24170
|
`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`
|
|
24133
24171
|
);
|
|
24134
24172
|
}
|
|
24135
|
-
|
|
24136
|
-
|
|
24173
|
+
const hasUseTarget = typeof value.use_target === "string" && value.use_target.trim().length > 0;
|
|
24174
|
+
if (!hasUseTarget && (typeof provider !== "string" || provider.trim().length === 0)) {
|
|
24175
|
+
throw new Error(
|
|
24176
|
+
`targets.yaml entry '${name21}' in ${filePath} is missing a valid 'provider' (or use use_target for delegation)`
|
|
24177
|
+
);
|
|
24137
24178
|
}
|
|
24138
24179
|
return value;
|
|
24139
24180
|
}
|
|
@@ -29427,10 +29468,20 @@ async function runEvaluation(options) {
|
|
|
29427
29468
|
if (resolvedTargetsByName.has(name21)) {
|
|
29428
29469
|
return resolvedTargetsByName.get(name21);
|
|
29429
29470
|
}
|
|
29430
|
-
|
|
29471
|
+
let definition = targetDefinitions.get(name21);
|
|
29431
29472
|
if (!definition) {
|
|
29432
29473
|
return void 0;
|
|
29433
29474
|
}
|
|
29475
|
+
for (let depth = 0; depth < 5; depth++) {
|
|
29476
|
+
const useTarget = definition.use_target;
|
|
29477
|
+
if (typeof useTarget !== "string" || useTarget.trim().length === 0) break;
|
|
29478
|
+
const envMatch = useTarget.trim().match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
|
|
29479
|
+
const resolvedName = envMatch ? envLookup[envMatch[1]] ?? "" : useTarget.trim();
|
|
29480
|
+
if (resolvedName.length === 0) break;
|
|
29481
|
+
const next = targetDefinitions.get(resolvedName);
|
|
29482
|
+
if (!next) break;
|
|
29483
|
+
definition = next;
|
|
29484
|
+
}
|
|
29434
29485
|
const resolved = resolveTargetDefinition(definition, envLookup, evalFilePath);
|
|
29435
29486
|
resolvedTargetsByName.set(name21, resolved);
|
|
29436
29487
|
return resolved;
|
|
@@ -30435,6 +30486,7 @@ async function runEvalCase(options) {
|
|
|
30435
30486
|
let attempt = 0;
|
|
30436
30487
|
let providerResponse = cachedResponse;
|
|
30437
30488
|
let lastError;
|
|
30489
|
+
let targetUsed;
|
|
30438
30490
|
while (!providerResponse && attempt < attemptBudget) {
|
|
30439
30491
|
try {
|
|
30440
30492
|
providerResponse = await invokeProvider(provider, {
|
|
@@ -30457,25 +30509,33 @@ async function runEvalCase(options) {
|
|
|
30457
30509
|
attempt += 1;
|
|
30458
30510
|
continue;
|
|
30459
30511
|
}
|
|
30460
|
-
|
|
30461
|
-
|
|
30462
|
-
|
|
30463
|
-
|
|
30464
|
-
|
|
30465
|
-
|
|
30466
|
-
|
|
30467
|
-
|
|
30468
|
-
|
|
30469
|
-
|
|
30470
|
-
|
|
30471
|
-
|
|
30472
|
-
|
|
30473
|
-
|
|
30474
|
-
|
|
30475
|
-
|
|
30476
|
-
|
|
30512
|
+
break;
|
|
30513
|
+
}
|
|
30514
|
+
}
|
|
30515
|
+
if (!providerResponse && target.fallbackTargets?.length && targetResolver) {
|
|
30516
|
+
for (const fallbackName of target.fallbackTargets) {
|
|
30517
|
+
const fallbackProvider = targetResolver(fallbackName);
|
|
30518
|
+
if (!fallbackProvider) {
|
|
30519
|
+
continue;
|
|
30520
|
+
}
|
|
30521
|
+
try {
|
|
30522
|
+
providerResponse = await invokeProvider(fallbackProvider, {
|
|
30523
|
+
evalCase,
|
|
30524
|
+
target,
|
|
30525
|
+
promptInputs,
|
|
30526
|
+
attempt: 0,
|
|
30527
|
+
agentTimeoutMs,
|
|
30528
|
+
signal,
|
|
30529
|
+
cwd: workspacePath,
|
|
30530
|
+
workspaceFile: caseWorkspaceFile ?? suiteWorkspaceFile,
|
|
30531
|
+
captureFileChanges: !!baselineCommit,
|
|
30532
|
+
streamCallbacks: options.streamCallbacks
|
|
30533
|
+
});
|
|
30534
|
+
targetUsed = fallbackName;
|
|
30535
|
+
break;
|
|
30536
|
+
} catch (error) {
|
|
30537
|
+
lastError = error;
|
|
30477
30538
|
}
|
|
30478
|
-
return errorResult;
|
|
30479
30539
|
}
|
|
30480
30540
|
}
|
|
30481
30541
|
if (!providerResponse) {
|
|
@@ -30601,8 +30661,10 @@ async function runEvalCase(options) {
|
|
|
30601
30661
|
};
|
|
30602
30662
|
const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
|
|
30603
30663
|
const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, caseThreshold);
|
|
30664
|
+
const targetUsedField = targetUsed ? { targetUsed } : {};
|
|
30604
30665
|
const finalResult = providerError ? {
|
|
30605
30666
|
...result,
|
|
30667
|
+
...targetUsedField,
|
|
30606
30668
|
evalRun,
|
|
30607
30669
|
error: providerError,
|
|
30608
30670
|
executionStatus,
|
|
@@ -30614,6 +30676,7 @@ async function runEvalCase(options) {
|
|
|
30614
30676
|
afterEachOutput
|
|
30615
30677
|
} : skippedEvaluatorError ? {
|
|
30616
30678
|
...result,
|
|
30679
|
+
...targetUsedField,
|
|
30617
30680
|
score: 0,
|
|
30618
30681
|
evalRun,
|
|
30619
30682
|
error: skippedEvaluatorError,
|
|
@@ -30626,6 +30689,7 @@ async function runEvalCase(options) {
|
|
|
30626
30689
|
afterEachOutput
|
|
30627
30690
|
} : {
|
|
30628
30691
|
...result,
|
|
30692
|
+
...targetUsedField,
|
|
30629
30693
|
evalRun,
|
|
30630
30694
|
executionStatus,
|
|
30631
30695
|
beforeAllOutput,
|
|
@@ -32771,4 +32835,4 @@ export {
|
|
|
32771
32835
|
readTranscriptFile,
|
|
32772
32836
|
createAgentKernel
|
|
32773
32837
|
};
|
|
32774
|
-
//# sourceMappingURL=chunk-
|
|
32838
|
+
//# sourceMappingURL=chunk-KQQTEWZF.js.map
|