@agentv/core 4.5.2 → 4.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-M65PVDQ5.js → chunk-ZK4GG7PR.js} +87 -5
- package/dist/chunk-ZK4GG7PR.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +15 -6
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +7 -4
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +322 -93
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +28 -3
- package/dist/index.d.ts +28 -3
- package/dist/index.js +245 -98
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-M65PVDQ5.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -1561,6 +1561,7 @@ __export(index_exports, {
|
|
|
1561
1561
|
readTranscriptFile: () => readTranscriptFile,
|
|
1562
1562
|
removeProject: () => removeProject,
|
|
1563
1563
|
resolveAndCreateProvider: () => resolveAndCreateProvider,
|
|
1564
|
+
resolveDelegatedTargetDefinition: () => resolveDelegatedTargetDefinition,
|
|
1564
1565
|
resolveFileReference: () => resolveFileReference3,
|
|
1565
1566
|
resolveTargetDefinition: () => resolveTargetDefinition,
|
|
1566
1567
|
resolveWorkspaceTemplate: () => resolveWorkspaceTemplate,
|
|
@@ -2585,6 +2586,7 @@ function validateTemplateVariables(content, source) {
|
|
|
2585
2586
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
2586
2587
|
var ANSI_YELLOW4 = "\x1B[33m";
|
|
2587
2588
|
var ANSI_RESET5 = "\x1B[0m";
|
|
2589
|
+
var PROMPT_FILE_PREFIX = "file://";
|
|
2588
2590
|
function normalizeEvaluatorType(type) {
|
|
2589
2591
|
return type.replace(/_/g, "-");
|
|
2590
2592
|
}
|
|
@@ -2883,12 +2885,23 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2883
2885
|
threshold: thresholdValue
|
|
2884
2886
|
};
|
|
2885
2887
|
} else {
|
|
2886
|
-
const
|
|
2888
|
+
const rawAggPrompt = asString(rawAggregator.prompt);
|
|
2889
|
+
let aggregatorPrompt;
|
|
2887
2890
|
let promptPath2;
|
|
2888
|
-
if (
|
|
2889
|
-
|
|
2890
|
-
|
|
2891
|
-
|
|
2891
|
+
if (rawAggPrompt) {
|
|
2892
|
+
if (rawAggPrompt.startsWith(PROMPT_FILE_PREFIX)) {
|
|
2893
|
+
const fileRef = rawAggPrompt.slice(PROMPT_FILE_PREFIX.length);
|
|
2894
|
+
aggregatorPrompt = fileRef;
|
|
2895
|
+
const resolved = await resolveFileReference2(fileRef, searchRoots);
|
|
2896
|
+
if (resolved.resolvedPath) {
|
|
2897
|
+
promptPath2 = import_node_path5.default.resolve(resolved.resolvedPath);
|
|
2898
|
+
} else {
|
|
2899
|
+
throw new Error(
|
|
2900
|
+
`Composite aggregator in '${evalId}': prompt file not found: ${resolved.displayPath}`
|
|
2901
|
+
);
|
|
2902
|
+
}
|
|
2903
|
+
} else {
|
|
2904
|
+
aggregatorPrompt = rawAggPrompt;
|
|
2892
2905
|
}
|
|
2893
2906
|
}
|
|
2894
2907
|
aggregator = {
|
|
@@ -3468,21 +3481,25 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3468
3481
|
promptScriptConfig = rawPrompt.config;
|
|
3469
3482
|
}
|
|
3470
3483
|
} else if (typeof rawPrompt === "string") {
|
|
3471
|
-
|
|
3472
|
-
|
|
3473
|
-
|
|
3474
|
-
|
|
3475
|
-
|
|
3476
|
-
|
|
3477
|
-
|
|
3478
|
-
|
|
3479
|
-
|
|
3484
|
+
if (rawPrompt.startsWith(PROMPT_FILE_PREFIX)) {
|
|
3485
|
+
const fileRef = rawPrompt.slice(PROMPT_FILE_PREFIX.length);
|
|
3486
|
+
prompt = fileRef;
|
|
3487
|
+
const resolved = await resolveFileReference2(fileRef, searchRoots);
|
|
3488
|
+
if (resolved.resolvedPath) {
|
|
3489
|
+
promptPath = import_node_path5.default.resolve(resolved.resolvedPath);
|
|
3490
|
+
try {
|
|
3491
|
+
await validateCustomPromptContent(promptPath);
|
|
3492
|
+
} catch (error) {
|
|
3493
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
3494
|
+
throw new Error(`Evaluator '${name}' template (${promptPath}): ${message}`);
|
|
3495
|
+
}
|
|
3496
|
+
} else {
|
|
3497
|
+
throw new Error(
|
|
3498
|
+
`Evaluator '${name}' in '${evalId}': prompt file not found: ${resolved.displayPath}`
|
|
3499
|
+
);
|
|
3480
3500
|
}
|
|
3481
3501
|
} else {
|
|
3482
|
-
|
|
3483
|
-
`Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
|
|
3484
|
-
resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
|
|
3485
|
-
);
|
|
3502
|
+
prompt = rawPrompt;
|
|
3486
3503
|
}
|
|
3487
3504
|
}
|
|
3488
3505
|
const _model = asString(rawEvaluator.model);
|
|
@@ -5519,7 +5536,7 @@ var OpenAIProvider = class {
|
|
|
5519
5536
|
apiKey: config.apiKey,
|
|
5520
5537
|
baseURL: config.baseURL
|
|
5521
5538
|
});
|
|
5522
|
-
this.model = openai(config.model);
|
|
5539
|
+
this.model = config.apiFormat === "responses" ? openai(config.model) : openai.chat(config.model);
|
|
5523
5540
|
}
|
|
5524
5541
|
id;
|
|
5525
5542
|
kind = "openai";
|
|
@@ -7153,15 +7170,16 @@ var CliProvider = class {
|
|
|
7153
7170
|
outputFilePath
|
|
7154
7171
|
);
|
|
7155
7172
|
const renderedCommand = renderTemplate(this.config.command, templateValues);
|
|
7173
|
+
const effectiveCwd = requests[0]?.cwd ?? this.config.cwd;
|
|
7156
7174
|
if (this.verbose) {
|
|
7157
7175
|
console.log(
|
|
7158
|
-
`[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${
|
|
7176
|
+
`[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${effectiveCwd ?? ""} command=${renderedCommand}`
|
|
7159
7177
|
);
|
|
7160
7178
|
}
|
|
7161
7179
|
try {
|
|
7162
7180
|
const startTime = Date.now();
|
|
7163
7181
|
const result = await this.runCommand(renderedCommand, {
|
|
7164
|
-
cwd:
|
|
7182
|
+
cwd: effectiveCwd,
|
|
7165
7183
|
env: process.env,
|
|
7166
7184
|
timeoutMs: this.config.timeoutMs,
|
|
7167
7185
|
signal: controller.signal
|
|
@@ -7194,7 +7212,7 @@ var CliProvider = class {
|
|
|
7194
7212
|
command: renderedCommand,
|
|
7195
7213
|
stderr: result.stderr,
|
|
7196
7214
|
exitCode: result.exitCode ?? 0,
|
|
7197
|
-
cwd:
|
|
7215
|
+
cwd: effectiveCwd,
|
|
7198
7216
|
outputFile: outputFilePath
|
|
7199
7217
|
}
|
|
7200
7218
|
};
|
|
@@ -7212,7 +7230,7 @@ var CliProvider = class {
|
|
|
7212
7230
|
command: renderedCommand,
|
|
7213
7231
|
stderr: result.stderr,
|
|
7214
7232
|
exitCode: result.exitCode ?? 0,
|
|
7215
|
-
cwd:
|
|
7233
|
+
cwd: effectiveCwd,
|
|
7216
7234
|
outputFile: outputFilePath,
|
|
7217
7235
|
error: errorMessage
|
|
7218
7236
|
}
|
|
@@ -7227,7 +7245,7 @@ var CliProvider = class {
|
|
|
7227
7245
|
command: renderedCommand,
|
|
7228
7246
|
stderr: result.stderr,
|
|
7229
7247
|
exitCode: result.exitCode ?? 0,
|
|
7230
|
-
cwd:
|
|
7248
|
+
cwd: effectiveCwd,
|
|
7231
7249
|
outputFile: outputFilePath,
|
|
7232
7250
|
recordId: evalCaseId
|
|
7233
7251
|
}
|
|
@@ -9251,6 +9269,60 @@ function subscribeToPiLogEntries(listener) {
|
|
|
9251
9269
|
};
|
|
9252
9270
|
}
|
|
9253
9271
|
|
|
9272
|
+
// src/evaluation/providers/pi-provider-aliases.ts
|
|
9273
|
+
init_cjs_shims();
|
|
9274
|
+
var SUBPROVIDER_ALIASES = {
|
|
9275
|
+
azure: "azure-openai-responses"
|
|
9276
|
+
};
|
|
9277
|
+
var SUBPROVIDER_ALIASES_WITH_BASE_URL = {
|
|
9278
|
+
// Azure v1 endpoints are OpenAI-compatible; use the standard client
|
|
9279
|
+
// to avoid AzureOpenAI adding api-version query params.
|
|
9280
|
+
azure: "openai-responses"
|
|
9281
|
+
};
|
|
9282
|
+
var ENV_KEY_MAP = {
|
|
9283
|
+
google: "GEMINI_API_KEY",
|
|
9284
|
+
gemini: "GEMINI_API_KEY",
|
|
9285
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
9286
|
+
openai: "OPENAI_API_KEY",
|
|
9287
|
+
groq: "GROQ_API_KEY",
|
|
9288
|
+
xai: "XAI_API_KEY",
|
|
9289
|
+
openrouter: "OPENROUTER_API_KEY",
|
|
9290
|
+
azure: "AZURE_OPENAI_API_KEY"
|
|
9291
|
+
};
|
|
9292
|
+
var ENV_BASE_URL_MAP = {
|
|
9293
|
+
openai: "OPENAI_BASE_URL",
|
|
9294
|
+
azure: "AZURE_OPENAI_BASE_URL",
|
|
9295
|
+
openrouter: "OPENROUTER_BASE_URL"
|
|
9296
|
+
};
|
|
9297
|
+
function resolveSubprovider(name, hasBaseUrl = false) {
|
|
9298
|
+
const lower = name.toLowerCase();
|
|
9299
|
+
if (hasBaseUrl) {
|
|
9300
|
+
const alias = SUBPROVIDER_ALIASES_WITH_BASE_URL[lower];
|
|
9301
|
+
if (alias) return alias;
|
|
9302
|
+
}
|
|
9303
|
+
return SUBPROVIDER_ALIASES[lower] ?? name;
|
|
9304
|
+
}
|
|
9305
|
+
function resolveCliProvider(name) {
|
|
9306
|
+
const lower = name.toLowerCase();
|
|
9307
|
+
if (lower === "azure") return "azure-openai-responses";
|
|
9308
|
+
return name;
|
|
9309
|
+
}
|
|
9310
|
+
function resolveEnvKeyName(provider, hasBaseUrl = false) {
|
|
9311
|
+
const lower = provider.toLowerCase();
|
|
9312
|
+
if (hasBaseUrl && lower === "azure") return "OPENAI_API_KEY";
|
|
9313
|
+
return ENV_KEY_MAP[lower];
|
|
9314
|
+
}
|
|
9315
|
+
function resolveEnvBaseUrlName(provider, hasBaseUrl = false) {
|
|
9316
|
+
const lower = provider.toLowerCase();
|
|
9317
|
+
if (hasBaseUrl && lower === "azure") return "OPENAI_BASE_URL";
|
|
9318
|
+
return ENV_BASE_URL_MAP[lower];
|
|
9319
|
+
}
|
|
9320
|
+
function extractAzureResourceName(baseUrl) {
|
|
9321
|
+
const urlMatch = baseUrl.match(/^https?:\/\/([^./]+)/);
|
|
9322
|
+
if (urlMatch) return urlMatch[1];
|
|
9323
|
+
return baseUrl;
|
|
9324
|
+
}
|
|
9325
|
+
|
|
9254
9326
|
// src/evaluation/providers/pi-utils.ts
|
|
9255
9327
|
init_cjs_shims();
|
|
9256
9328
|
function extractPiTextContent(content) {
|
|
@@ -9410,12 +9482,12 @@ var PiCliProvider = class {
|
|
|
9410
9482
|
buildPiArgs(prompt, inputFiles) {
|
|
9411
9483
|
const args = [];
|
|
9412
9484
|
if (this.config.subprovider) {
|
|
9413
|
-
args.push("--provider", this.config.subprovider);
|
|
9485
|
+
args.push("--provider", resolveCliProvider(this.config.subprovider));
|
|
9414
9486
|
}
|
|
9415
9487
|
if (this.config.model) {
|
|
9416
9488
|
args.push("--model", this.config.model);
|
|
9417
9489
|
}
|
|
9418
|
-
if (this.config.apiKey) {
|
|
9490
|
+
if (this.config.apiKey && this.config.subprovider?.toLowerCase() !== "azure") {
|
|
9419
9491
|
args.push("--api-key", this.config.apiKey);
|
|
9420
9492
|
}
|
|
9421
9493
|
args.push("--mode", "json");
|
|
@@ -9467,35 +9539,35 @@ ${prompt}` : prompt;
|
|
|
9467
9539
|
}
|
|
9468
9540
|
buildEnv() {
|
|
9469
9541
|
const env = { ...process.env };
|
|
9470
|
-
|
|
9471
|
-
|
|
9472
|
-
|
|
9473
|
-
|
|
9474
|
-
|
|
9475
|
-
|
|
9476
|
-
|
|
9477
|
-
|
|
9478
|
-
|
|
9479
|
-
|
|
9480
|
-
|
|
9481
|
-
|
|
9482
|
-
|
|
9483
|
-
|
|
9542
|
+
const provider = this.config.subprovider?.toLowerCase() ?? "google";
|
|
9543
|
+
if (provider === "azure") {
|
|
9544
|
+
if (this.config.apiKey) {
|
|
9545
|
+
env.AZURE_OPENAI_API_KEY = this.config.apiKey;
|
|
9546
|
+
}
|
|
9547
|
+
if (this.config.baseUrl) {
|
|
9548
|
+
env.AZURE_OPENAI_RESOURCE_NAME = extractAzureResourceName(this.config.baseUrl);
|
|
9549
|
+
}
|
|
9550
|
+
} else {
|
|
9551
|
+
if (this.config.apiKey) {
|
|
9552
|
+
const envKey = resolveEnvKeyName(provider);
|
|
9553
|
+
if (envKey) {
|
|
9554
|
+
env[envKey] = this.config.apiKey;
|
|
9555
|
+
}
|
|
9484
9556
|
}
|
|
9485
9557
|
}
|
|
9486
9558
|
if (this.config.subprovider) {
|
|
9487
|
-
const
|
|
9559
|
+
const resolvedProvider = resolveCliProvider(this.config.subprovider);
|
|
9488
9560
|
const PROVIDER_OWN_PREFIXES = {
|
|
9489
9561
|
openrouter: ["OPENROUTER_"],
|
|
9490
9562
|
anthropic: ["ANTHROPIC_"],
|
|
9491
9563
|
openai: ["OPENAI_"],
|
|
9492
|
-
azure: ["AZURE_OPENAI_"],
|
|
9564
|
+
"azure-openai-responses": ["AZURE_OPENAI_"],
|
|
9493
9565
|
google: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
|
|
9494
9566
|
gemini: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
|
|
9495
9567
|
groq: ["GROQ_"],
|
|
9496
9568
|
xai: ["XAI_"]
|
|
9497
9569
|
};
|
|
9498
|
-
const ownPrefixes = PROVIDER_OWN_PREFIXES[
|
|
9570
|
+
const ownPrefixes = PROVIDER_OWN_PREFIXES[resolvedProvider] ?? [];
|
|
9499
9571
|
const allOtherPrefixes = Object.entries(PROVIDER_OWN_PREFIXES).filter(([key]) => key !== provider).flatMap(([, prefixes]) => prefixes);
|
|
9500
9572
|
for (const key of Object.keys(env)) {
|
|
9501
9573
|
if (allOtherPrefixes.some((prefix) => key.startsWith(prefix)) && !ownPrefixes.some((prefix) => key.startsWith(prefix))) {
|
|
@@ -9786,6 +9858,24 @@ function extractMessages(events) {
|
|
|
9786
9858
|
}
|
|
9787
9859
|
}
|
|
9788
9860
|
}
|
|
9861
|
+
if (messages) {
|
|
9862
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
9863
|
+
if (messages[i].role === "assistant" && !messages[i].content) {
|
|
9864
|
+
for (let j = events.length - 1; j >= 0; j--) {
|
|
9865
|
+
const evt = events[j];
|
|
9866
|
+
if (!evt || evt.type !== "message_end") continue;
|
|
9867
|
+
const msg = evt.message;
|
|
9868
|
+
if (msg?.role !== "assistant") continue;
|
|
9869
|
+
const text = extractPiTextContent(msg.content);
|
|
9870
|
+
if (text) {
|
|
9871
|
+
messages[i] = { ...messages[i], content: text };
|
|
9872
|
+
break;
|
|
9873
|
+
}
|
|
9874
|
+
}
|
|
9875
|
+
break;
|
|
9876
|
+
}
|
|
9877
|
+
}
|
|
9878
|
+
}
|
|
9789
9879
|
const eventToolCalls = extractToolCallsFromEvents(events);
|
|
9790
9880
|
if (eventToolCalls.length > 0) {
|
|
9791
9881
|
injectEventToolCalls(messages, eventToolCalls);
|
|
@@ -9970,17 +10060,43 @@ function formatTimeoutSuffix3(timeoutMs) {
|
|
|
9970
10060
|
if (!timeoutMs || timeoutMs <= 0) return "";
|
|
9971
10061
|
return ` after ${Math.ceil(timeoutMs / 1e3)}s`;
|
|
9972
10062
|
}
|
|
10063
|
+
function resolveWindowsCmd(executable) {
|
|
10064
|
+
if (process.platform !== "win32") return [executable, []];
|
|
10065
|
+
const lower = executable.toLowerCase();
|
|
10066
|
+
if (lower.endsWith(".js") || lower.endsWith(".exe")) return [executable, []];
|
|
10067
|
+
let fullPath;
|
|
10068
|
+
try {
|
|
10069
|
+
fullPath = (0, import_node_child_process4.execSync)(`where ${executable}`, { encoding: "utf-8" }).trim().split(/\r?\n/)[0].trim();
|
|
10070
|
+
} catch {
|
|
10071
|
+
return [executable, []];
|
|
10072
|
+
}
|
|
10073
|
+
const cmdPath = fullPath.endsWith(".cmd") ? fullPath : `${fullPath}.cmd`;
|
|
10074
|
+
try {
|
|
10075
|
+
const content = (0, import_node_fs9.readFileSync)(cmdPath, "utf-8");
|
|
10076
|
+
const match = content.match(/"?%_prog%"?\s+"([^"]+\.js)"/);
|
|
10077
|
+
if (match) {
|
|
10078
|
+
const dp0 = import_node_path21.default.dirname(import_node_path21.default.resolve(cmdPath));
|
|
10079
|
+
const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${import_node_path21.default.sep}`);
|
|
10080
|
+
try {
|
|
10081
|
+
(0, import_node_fs9.accessSync)(scriptPath);
|
|
10082
|
+
return ["node", [scriptPath]];
|
|
10083
|
+
} catch {
|
|
10084
|
+
}
|
|
10085
|
+
}
|
|
10086
|
+
} catch {
|
|
10087
|
+
}
|
|
10088
|
+
return [executable, []];
|
|
10089
|
+
}
|
|
9973
10090
|
async function defaultPiRunner(options) {
|
|
9974
10091
|
return await new Promise((resolve, reject) => {
|
|
9975
10092
|
const parts = options.executable.split(/\s+/);
|
|
9976
|
-
const
|
|
9977
|
-
const executableArgs = parts.slice(1);
|
|
10093
|
+
const [resolvedExe, prefixArgs] = resolveWindowsCmd(parts[0]);
|
|
10094
|
+
const executableArgs = [...prefixArgs, ...parts.slice(1)];
|
|
9978
10095
|
const allArgs = [...executableArgs, ...options.args];
|
|
9979
|
-
const child = (0, import_node_child_process4.spawn)(
|
|
10096
|
+
const child = (0, import_node_child_process4.spawn)(resolvedExe, allArgs, {
|
|
9980
10097
|
cwd: options.cwd,
|
|
9981
10098
|
env: options.env,
|
|
9982
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
9983
|
-
shell: false
|
|
10099
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
9984
10100
|
});
|
|
9985
10101
|
let stdout = "";
|
|
9986
10102
|
let stderr = "";
|
|
@@ -10127,7 +10243,9 @@ async function loadSdkModules() {
|
|
|
10127
10243
|
codingTools: piSdk.codingTools,
|
|
10128
10244
|
toolMap,
|
|
10129
10245
|
SessionManager: piSdk.SessionManager,
|
|
10130
|
-
getModel: piAi.getModel
|
|
10246
|
+
getModel: piAi.getModel,
|
|
10247
|
+
// biome-ignore lint/suspicious/noExplicitAny: registerBuiltInApiProviders exists at runtime but not in type defs
|
|
10248
|
+
registerBuiltInApiProviders: piAi.registerBuiltInApiProviders
|
|
10131
10249
|
};
|
|
10132
10250
|
}
|
|
10133
10251
|
var PiCodingAgentProvider = class {
|
|
@@ -10149,17 +10267,31 @@ var PiCodingAgentProvider = class {
|
|
|
10149
10267
|
const startTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
10150
10268
|
const startMs = Date.now();
|
|
10151
10269
|
const sdk = await loadSdkModules();
|
|
10270
|
+
sdk.registerBuiltInApiProviders();
|
|
10152
10271
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
10153
10272
|
try {
|
|
10154
10273
|
const cwd = this.resolveCwd(request.cwd);
|
|
10155
|
-
const
|
|
10274
|
+
const rawProvider = this.config.subprovider ?? "google";
|
|
10275
|
+
const hasBaseUrl = !!this.config.baseUrl;
|
|
10276
|
+
const providerName = resolveSubprovider(rawProvider, hasBaseUrl);
|
|
10156
10277
|
const modelId = this.config.model ?? "gemini-2.5-flash";
|
|
10157
|
-
this.setApiKeyEnv(
|
|
10158
|
-
|
|
10278
|
+
this.setApiKeyEnv(rawProvider, hasBaseUrl);
|
|
10279
|
+
this.setBaseUrlEnv(rawProvider, hasBaseUrl);
|
|
10280
|
+
let model = sdk.getModel(providerName, modelId);
|
|
10159
10281
|
if (!model) {
|
|
10160
|
-
|
|
10161
|
-
|
|
10162
|
-
|
|
10282
|
+
const envProvider = providerName.replace(/-responses$/, "");
|
|
10283
|
+
model = {
|
|
10284
|
+
id: modelId,
|
|
10285
|
+
name: modelId,
|
|
10286
|
+
api: providerName,
|
|
10287
|
+
provider: envProvider,
|
|
10288
|
+
baseUrl: this.config.baseUrl ?? "",
|
|
10289
|
+
reasoning: false,
|
|
10290
|
+
input: ["text"],
|
|
10291
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
10292
|
+
contextWindow: 128e3,
|
|
10293
|
+
maxTokens: 16384
|
|
10294
|
+
};
|
|
10163
10295
|
}
|
|
10164
10296
|
const tools = this.resolveTools(sdk);
|
|
10165
10297
|
const { session } = await sdk.createAgentSession({
|
|
@@ -10312,22 +10444,21 @@ ${fileList}`;
|
|
|
10312
10444
|
}
|
|
10313
10445
|
}
|
|
10314
10446
|
/** Maps config apiKey to the provider-specific env var the SDK reads. */
|
|
10315
|
-
setApiKeyEnv(providerName) {
|
|
10447
|
+
setApiKeyEnv(providerName, hasBaseUrl = false) {
|
|
10316
10448
|
if (!this.config.apiKey) return;
|
|
10317
|
-
const
|
|
10318
|
-
google: "GEMINI_API_KEY",
|
|
10319
|
-
gemini: "GEMINI_API_KEY",
|
|
10320
|
-
anthropic: "ANTHROPIC_API_KEY",
|
|
10321
|
-
openai: "OPENAI_API_KEY",
|
|
10322
|
-
groq: "GROQ_API_KEY",
|
|
10323
|
-
xai: "XAI_API_KEY",
|
|
10324
|
-
openrouter: "OPENROUTER_API_KEY"
|
|
10325
|
-
};
|
|
10326
|
-
const envKey = ENV_KEY_MAP[providerName.toLowerCase()];
|
|
10449
|
+
const envKey = resolveEnvKeyName(providerName, hasBaseUrl);
|
|
10327
10450
|
if (envKey) {
|
|
10328
10451
|
process.env[envKey] = this.config.apiKey;
|
|
10329
10452
|
}
|
|
10330
10453
|
}
|
|
10454
|
+
/** Maps config baseUrl to the provider-specific env var the SDK reads. */
|
|
10455
|
+
setBaseUrlEnv(providerName, hasBaseUrl = false) {
|
|
10456
|
+
if (!this.config.baseUrl) return;
|
|
10457
|
+
const envKey = resolveEnvBaseUrlName(providerName, hasBaseUrl);
|
|
10458
|
+
if (envKey) {
|
|
10459
|
+
process.env[envKey] = this.config.baseUrl;
|
|
10460
|
+
}
|
|
10461
|
+
}
|
|
10331
10462
|
resolveCwd(cwdOverride) {
|
|
10332
10463
|
if (cwdOverride) {
|
|
10333
10464
|
return import_node_path22.default.resolve(cwdOverride);
|
|
@@ -10752,21 +10883,28 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
|
|
|
10752
10883
|
"OUTPUT_FILE"
|
|
10753
10884
|
]);
|
|
10754
10885
|
var COMMON_TARGET_SETTINGS = [
|
|
10886
|
+
"use_target",
|
|
10755
10887
|
"provider_batching",
|
|
10756
10888
|
"providerBatching",
|
|
10757
10889
|
"subagent_mode_allowed",
|
|
10758
|
-
"subagentModeAllowed"
|
|
10890
|
+
"subagentModeAllowed",
|
|
10891
|
+
"fallback_targets",
|
|
10892
|
+
"fallbackTargets"
|
|
10759
10893
|
];
|
|
10894
|
+
var USE_TARGET_ENV_PATTERN = /^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i;
|
|
10760
10895
|
var BASE_TARGET_SCHEMA = import_zod3.z.object({
|
|
10761
10896
|
name: import_zod3.z.string().min(1, "target name is required"),
|
|
10762
|
-
provider: import_zod3.z.string().
|
|
10897
|
+
provider: import_zod3.z.string().optional(),
|
|
10898
|
+
use_target: import_zod3.z.string().optional(),
|
|
10763
10899
|
grader_target: import_zod3.z.string().optional(),
|
|
10764
10900
|
judge_target: import_zod3.z.string().optional(),
|
|
10765
10901
|
// backward compat
|
|
10766
10902
|
workers: import_zod3.z.number().int().min(1).optional(),
|
|
10767
10903
|
workspace_template: import_zod3.z.string().optional(),
|
|
10768
10904
|
workspaceTemplate: import_zod3.z.string().optional(),
|
|
10769
|
-
subagent_mode_allowed: import_zod3.z.boolean().optional()
|
|
10905
|
+
subagent_mode_allowed: import_zod3.z.boolean().optional(),
|
|
10906
|
+
fallback_targets: import_zod3.z.array(import_zod3.z.string().min(1)).optional(),
|
|
10907
|
+
fallbackTargets: import_zod3.z.array(import_zod3.z.string().min(1)).optional()
|
|
10770
10908
|
}).passthrough();
|
|
10771
10909
|
var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
|
|
10772
10910
|
var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
|
|
@@ -10813,6 +10951,52 @@ function resolveRetryConfig(target) {
|
|
|
10813
10951
|
retryableStatusCodes
|
|
10814
10952
|
};
|
|
10815
10953
|
}
|
|
10954
|
+
function resolveDelegatedTargetDefinition(name, definitions, env = process.env) {
|
|
10955
|
+
let definition = definitions.get(name);
|
|
10956
|
+
if (!definition) {
|
|
10957
|
+
return void 0;
|
|
10958
|
+
}
|
|
10959
|
+
const visited = [definition.name];
|
|
10960
|
+
for (let depth = 0; depth < 10; depth++) {
|
|
10961
|
+
const rawUseTarget = typeof definition.use_target === "string" ? definition.use_target.trim() : void 0;
|
|
10962
|
+
if (!rawUseTarget) {
|
|
10963
|
+
return definition;
|
|
10964
|
+
}
|
|
10965
|
+
const envMatch = rawUseTarget.match(USE_TARGET_ENV_PATTERN);
|
|
10966
|
+
const envVarName = envMatch?.[1];
|
|
10967
|
+
const resolvedName = envVarName ? env[envVarName]?.trim() ?? "" : rawUseTarget;
|
|
10968
|
+
if (resolvedName.length === 0) {
|
|
10969
|
+
if (envVarName) {
|
|
10970
|
+
throw new Error(
|
|
10971
|
+
`Target "${definition.name}" uses use_target: \${{ ${envVarName} }}, but ${envVarName} is not set. Set ${envVarName} to the name of a concrete target (for example, "azure") before running the eval.`
|
|
10972
|
+
);
|
|
10973
|
+
}
|
|
10974
|
+
throw new Error(
|
|
10975
|
+
`Target "${definition.name}" has an empty use_target value. Point it at a concrete target name before running the eval.`
|
|
10976
|
+
);
|
|
10977
|
+
}
|
|
10978
|
+
const next = definitions.get(resolvedName);
|
|
10979
|
+
if (!next) {
|
|
10980
|
+
if (envVarName) {
|
|
10981
|
+
throw new Error(
|
|
10982
|
+
`Target "${definition.name}" uses use_target: \${{ ${envVarName} }}, which resolved to "${resolvedName}", but no target named "${resolvedName}" exists.`
|
|
10983
|
+
);
|
|
10984
|
+
}
|
|
10985
|
+
throw new Error(
|
|
10986
|
+
`Target "${definition.name}" uses use_target: "${resolvedName}", but no target named "${resolvedName}" exists.`
|
|
10987
|
+
);
|
|
10988
|
+
}
|
|
10989
|
+
if (visited.includes(next.name)) {
|
|
10990
|
+
const chain = [...visited, next.name].join(" -> ");
|
|
10991
|
+
throw new Error(`Circular use_target reference detected: ${chain}`);
|
|
10992
|
+
}
|
|
10993
|
+
definition = next;
|
|
10994
|
+
visited.push(definition.name);
|
|
10995
|
+
}
|
|
10996
|
+
throw new Error(
|
|
10997
|
+
`Target "${name}" exceeded the maximum use_target resolution depth (10). Check for a delegation loop or overly deep alias chain.`
|
|
10998
|
+
);
|
|
10999
|
+
}
|
|
10816
11000
|
function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
10817
11001
|
const parsed = BASE_TARGET_SCHEMA.parse(definition);
|
|
10818
11002
|
if (parsed.workspace_template !== void 0 || parsed.workspaceTemplate !== void 0) {
|
|
@@ -10820,6 +11004,11 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
10820
11004
|
`${parsed.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
|
|
10821
11005
|
);
|
|
10822
11006
|
}
|
|
11007
|
+
if (!parsed.provider) {
|
|
11008
|
+
throw new Error(
|
|
11009
|
+
`${parsed.name}: 'provider' is required (targets with use_target must be resolved before calling resolveTargetDefinition)`
|
|
11010
|
+
);
|
|
11011
|
+
}
|
|
10823
11012
|
const provider = resolveString(
|
|
10824
11013
|
parsed.provider,
|
|
10825
11014
|
env,
|
|
@@ -10832,12 +11021,14 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
10832
11021
|
const subagentModeAllowed = resolveOptionalBoolean(
|
|
10833
11022
|
parsed.subagent_mode_allowed ?? parsed.subagentModeAllowed
|
|
10834
11023
|
);
|
|
11024
|
+
const fallbackTargets = parsed.fallback_targets ?? parsed.fallbackTargets;
|
|
10835
11025
|
const base = {
|
|
10836
11026
|
name: parsed.name,
|
|
10837
11027
|
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
10838
11028
|
workers: parsed.workers,
|
|
10839
11029
|
providerBatching,
|
|
10840
|
-
subagentModeAllowed
|
|
11030
|
+
subagentModeAllowed,
|
|
11031
|
+
...fallbackTargets ? { fallbackTargets } : {}
|
|
10841
11032
|
};
|
|
10842
11033
|
switch (provider) {
|
|
10843
11034
|
case "openai":
|
|
@@ -11011,6 +11202,14 @@ function resolveAzureConfig(target, env) {
|
|
|
11011
11202
|
retry
|
|
11012
11203
|
};
|
|
11013
11204
|
}
|
|
11205
|
+
function resolveApiFormat(target, targetName) {
|
|
11206
|
+
const raw = target.api_format ?? target.apiFormat;
|
|
11207
|
+
if (raw === void 0) return void 0;
|
|
11208
|
+
if (raw === "chat" || raw === "responses") return raw;
|
|
11209
|
+
throw new Error(
|
|
11210
|
+
`Invalid api_format '${raw}' for target '${targetName}'. Must be 'chat' or 'responses'.`
|
|
11211
|
+
);
|
|
11212
|
+
}
|
|
11014
11213
|
function resolveOpenAIConfig(target, env) {
|
|
11015
11214
|
const endpointSource = target.endpoint ?? target.base_url ?? target.baseUrl;
|
|
11016
11215
|
const apiKeySource = target.api_key ?? target.apiKey;
|
|
@@ -11030,6 +11229,7 @@ function resolveOpenAIConfig(target, env) {
|
|
|
11030
11229
|
baseURL,
|
|
11031
11230
|
apiKey,
|
|
11032
11231
|
model,
|
|
11232
|
+
apiFormat: resolveApiFormat(target, target.name),
|
|
11033
11233
|
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
11034
11234
|
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
11035
11235
|
retry
|
|
@@ -11336,6 +11536,11 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
11336
11536
|
allowLiteral: false,
|
|
11337
11537
|
optionalEnv: true
|
|
11338
11538
|
});
|
|
11539
|
+
const baseUrlSource = target.base_url ?? target.baseUrl ?? target.endpoint;
|
|
11540
|
+
const baseUrl = resolveOptionalString(baseUrlSource, env, `${target.name} pi base url`, {
|
|
11541
|
+
allowLiteral: true,
|
|
11542
|
+
optionalEnv: true
|
|
11543
|
+
});
|
|
11339
11544
|
const tools = resolveOptionalString(toolsSource, env, `${target.name} pi tools`, {
|
|
11340
11545
|
allowLiteral: true,
|
|
11341
11546
|
optionalEnv: true
|
|
@@ -11376,6 +11581,7 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
11376
11581
|
subprovider,
|
|
11377
11582
|
model,
|
|
11378
11583
|
apiKey,
|
|
11584
|
+
baseUrl,
|
|
11379
11585
|
tools,
|
|
11380
11586
|
thinking,
|
|
11381
11587
|
cwd,
|
|
@@ -11417,6 +11623,11 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
11417
11623
|
allowLiteral: false,
|
|
11418
11624
|
optionalEnv: true
|
|
11419
11625
|
});
|
|
11626
|
+
const baseUrlSource = target.base_url ?? target.baseUrl ?? target.endpoint;
|
|
11627
|
+
const baseUrl = resolveOptionalString(baseUrlSource, env, `${target.name} pi-cli base url`, {
|
|
11628
|
+
allowLiteral: true,
|
|
11629
|
+
optionalEnv: true
|
|
11630
|
+
});
|
|
11420
11631
|
const tools = resolveOptionalString(toolsSource, env, `${target.name} pi-cli tools`, {
|
|
11421
11632
|
allowLiteral: true,
|
|
11422
11633
|
optionalEnv: true
|
|
@@ -11455,6 +11666,7 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
11455
11666
|
subprovider,
|
|
11456
11667
|
model,
|
|
11457
11668
|
apiKey,
|
|
11669
|
+
baseUrl,
|
|
11458
11670
|
tools,
|
|
11459
11671
|
thinking,
|
|
11460
11672
|
args,
|
|
@@ -13364,8 +13576,11 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
13364
13576
|
`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`
|
|
13365
13577
|
);
|
|
13366
13578
|
}
|
|
13367
|
-
|
|
13368
|
-
|
|
13579
|
+
const hasUseTarget = typeof value.use_target === "string" && value.use_target.trim().length > 0;
|
|
13580
|
+
if (!hasUseTarget && (typeof provider !== "string" || provider.trim().length === 0)) {
|
|
13581
|
+
throw new Error(
|
|
13582
|
+
`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider' (or use use_target for delegation)`
|
|
13583
|
+
);
|
|
13369
13584
|
}
|
|
13370
13585
|
return value;
|
|
13371
13586
|
}
|
|
@@ -18818,7 +19033,7 @@ async function runEvaluation(options) {
|
|
|
18818
19033
|
if (resolvedTargetsByName.has(name)) {
|
|
18819
19034
|
return resolvedTargetsByName.get(name);
|
|
18820
19035
|
}
|
|
18821
|
-
const definition =
|
|
19036
|
+
const definition = resolveDelegatedTargetDefinition(name, targetDefinitions, envLookup);
|
|
18822
19037
|
if (!definition) {
|
|
18823
19038
|
return void 0;
|
|
18824
19039
|
}
|
|
@@ -19826,6 +20041,7 @@ async function runEvalCase(options) {
|
|
|
19826
20041
|
let attempt = 0;
|
|
19827
20042
|
let providerResponse = cachedResponse;
|
|
19828
20043
|
let lastError;
|
|
20044
|
+
let targetUsed;
|
|
19829
20045
|
while (!providerResponse && attempt < attemptBudget) {
|
|
19830
20046
|
try {
|
|
19831
20047
|
providerResponse = await invokeProvider(provider, {
|
|
@@ -19848,25 +20064,33 @@ async function runEvalCase(options) {
|
|
|
19848
20064
|
attempt += 1;
|
|
19849
20065
|
continue;
|
|
19850
20066
|
}
|
|
19851
|
-
|
|
19852
|
-
|
|
19853
|
-
|
|
19854
|
-
|
|
19855
|
-
|
|
19856
|
-
|
|
19857
|
-
|
|
19858
|
-
|
|
19859
|
-
|
|
19860
|
-
|
|
19861
|
-
|
|
19862
|
-
|
|
19863
|
-
|
|
19864
|
-
|
|
19865
|
-
|
|
19866
|
-
|
|
19867
|
-
|
|
20067
|
+
break;
|
|
20068
|
+
}
|
|
20069
|
+
}
|
|
20070
|
+
if (!providerResponse && target.fallbackTargets?.length && targetResolver) {
|
|
20071
|
+
for (const fallbackName of target.fallbackTargets) {
|
|
20072
|
+
const fallbackProvider = targetResolver(fallbackName);
|
|
20073
|
+
if (!fallbackProvider) {
|
|
20074
|
+
continue;
|
|
20075
|
+
}
|
|
20076
|
+
try {
|
|
20077
|
+
providerResponse = await invokeProvider(fallbackProvider, {
|
|
20078
|
+
evalCase,
|
|
20079
|
+
target,
|
|
20080
|
+
promptInputs,
|
|
20081
|
+
attempt: 0,
|
|
20082
|
+
agentTimeoutMs,
|
|
20083
|
+
signal,
|
|
20084
|
+
cwd: workspacePath,
|
|
20085
|
+
workspaceFile: caseWorkspaceFile ?? suiteWorkspaceFile,
|
|
20086
|
+
captureFileChanges: !!baselineCommit,
|
|
20087
|
+
streamCallbacks: options.streamCallbacks
|
|
20088
|
+
});
|
|
20089
|
+
targetUsed = fallbackName;
|
|
20090
|
+
break;
|
|
20091
|
+
} catch (error) {
|
|
20092
|
+
lastError = error;
|
|
19868
20093
|
}
|
|
19869
|
-
return errorResult;
|
|
19870
20094
|
}
|
|
19871
20095
|
}
|
|
19872
20096
|
if (!providerResponse) {
|
|
@@ -19992,8 +20216,10 @@ async function runEvalCase(options) {
|
|
|
19992
20216
|
};
|
|
19993
20217
|
const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
|
|
19994
20218
|
const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, caseThreshold);
|
|
20219
|
+
const targetUsedField = targetUsed ? { targetUsed } : {};
|
|
19995
20220
|
const finalResult = providerError ? {
|
|
19996
20221
|
...result,
|
|
20222
|
+
...targetUsedField,
|
|
19997
20223
|
evalRun,
|
|
19998
20224
|
error: providerError,
|
|
19999
20225
|
executionStatus,
|
|
@@ -20005,6 +20231,7 @@ async function runEvalCase(options) {
|
|
|
20005
20231
|
afterEachOutput
|
|
20006
20232
|
} : skippedEvaluatorError ? {
|
|
20007
20233
|
...result,
|
|
20234
|
+
...targetUsedField,
|
|
20008
20235
|
score: 0,
|
|
20009
20236
|
evalRun,
|
|
20010
20237
|
error: skippedEvaluatorError,
|
|
@@ -20017,6 +20244,7 @@ async function runEvalCase(options) {
|
|
|
20017
20244
|
afterEachOutput
|
|
20018
20245
|
} : {
|
|
20019
20246
|
...result,
|
|
20247
|
+
...targetUsedField,
|
|
20020
20248
|
evalRun,
|
|
20021
20249
|
executionStatus,
|
|
20022
20250
|
beforeAllOutput,
|
|
@@ -20896,7 +21124,7 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
20896
21124
|
return null;
|
|
20897
21125
|
}
|
|
20898
21126
|
async function loadEnvHierarchy(repoRoot, startPath) {
|
|
20899
|
-
const { readFileSync:
|
|
21127
|
+
const { readFileSync: readFileSync4 } = await import("fs");
|
|
20900
21128
|
const chain = buildDirectoryChain2(startPath, repoRoot);
|
|
20901
21129
|
const envFiles = [];
|
|
20902
21130
|
for (const dir of chain) {
|
|
@@ -20905,7 +21133,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
20905
21133
|
}
|
|
20906
21134
|
for (let i = 0; i < envFiles.length; i++) {
|
|
20907
21135
|
try {
|
|
20908
|
-
const content =
|
|
21136
|
+
const content = readFileSync4(envFiles[i], "utf8");
|
|
20909
21137
|
for (const line of content.split("\n")) {
|
|
20910
21138
|
const trimmed = line.trim();
|
|
20911
21139
|
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
@@ -22195,6 +22423,7 @@ function createAgentKernel() {
|
|
|
22195
22423
|
readTranscriptFile,
|
|
22196
22424
|
removeProject,
|
|
22197
22425
|
resolveAndCreateProvider,
|
|
22426
|
+
resolveDelegatedTargetDefinition,
|
|
22198
22427
|
resolveFileReference,
|
|
22199
22428
|
resolveTargetDefinition,
|
|
22200
22429
|
resolveWorkspaceTemplate,
|