@agentv/core 4.5.2 → 4.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-M65PVDQ5.js → chunk-ZK4GG7PR.js} +87 -5
- package/dist/chunk-ZK4GG7PR.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +15 -6
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +7 -4
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +322 -93
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +28 -3
- package/dist/index.d.ts +28 -3
- package/dist/index.js +245 -98
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-M65PVDQ5.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -21,9 +21,10 @@ import {
|
|
|
21
21
|
normalizeLineEndings,
|
|
22
22
|
readJsonFile,
|
|
23
23
|
readTextFile,
|
|
24
|
+
resolveDelegatedTargetDefinition,
|
|
24
25
|
resolveFileReference,
|
|
25
26
|
resolveTargetDefinition
|
|
26
|
-
} from "./chunk-
|
|
27
|
+
} from "./chunk-ZK4GG7PR.js";
|
|
27
28
|
import {
|
|
28
29
|
AgentvProvider
|
|
29
30
|
} from "./chunk-PRNXHNLF.js";
|
|
@@ -766,6 +767,7 @@ function validateTemplateVariables(content, source) {
|
|
|
766
767
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
767
768
|
var ANSI_YELLOW3 = "\x1B[33m";
|
|
768
769
|
var ANSI_RESET4 = "\x1B[0m";
|
|
770
|
+
var PROMPT_FILE_PREFIX = "file://";
|
|
769
771
|
function normalizeEvaluatorType(type) {
|
|
770
772
|
return type.replace(/_/g, "-");
|
|
771
773
|
}
|
|
@@ -1064,12 +1066,23 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1064
1066
|
threshold: thresholdValue
|
|
1065
1067
|
};
|
|
1066
1068
|
} else {
|
|
1067
|
-
const
|
|
1069
|
+
const rawAggPrompt = asString(rawAggregator.prompt);
|
|
1070
|
+
let aggregatorPrompt;
|
|
1068
1071
|
let promptPath2;
|
|
1069
|
-
if (
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1072
|
+
if (rawAggPrompt) {
|
|
1073
|
+
if (rawAggPrompt.startsWith(PROMPT_FILE_PREFIX)) {
|
|
1074
|
+
const fileRef = rawAggPrompt.slice(PROMPT_FILE_PREFIX.length);
|
|
1075
|
+
aggregatorPrompt = fileRef;
|
|
1076
|
+
const resolved = await resolveFileReference2(fileRef, searchRoots);
|
|
1077
|
+
if (resolved.resolvedPath) {
|
|
1078
|
+
promptPath2 = path4.resolve(resolved.resolvedPath);
|
|
1079
|
+
} else {
|
|
1080
|
+
throw new Error(
|
|
1081
|
+
`Composite aggregator in '${evalId}': prompt file not found: ${resolved.displayPath}`
|
|
1082
|
+
);
|
|
1083
|
+
}
|
|
1084
|
+
} else {
|
|
1085
|
+
aggregatorPrompt = rawAggPrompt;
|
|
1073
1086
|
}
|
|
1074
1087
|
}
|
|
1075
1088
|
aggregator = {
|
|
@@ -1649,21 +1662,25 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1649
1662
|
promptScriptConfig = rawPrompt.config;
|
|
1650
1663
|
}
|
|
1651
1664
|
} else if (typeof rawPrompt === "string") {
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1665
|
+
if (rawPrompt.startsWith(PROMPT_FILE_PREFIX)) {
|
|
1666
|
+
const fileRef = rawPrompt.slice(PROMPT_FILE_PREFIX.length);
|
|
1667
|
+
prompt = fileRef;
|
|
1668
|
+
const resolved = await resolveFileReference2(fileRef, searchRoots);
|
|
1669
|
+
if (resolved.resolvedPath) {
|
|
1670
|
+
promptPath = path4.resolve(resolved.resolvedPath);
|
|
1671
|
+
try {
|
|
1672
|
+
await validateCustomPromptContent(promptPath);
|
|
1673
|
+
} catch (error) {
|
|
1674
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1675
|
+
throw new Error(`Evaluator '${name}' template (${promptPath}): ${message}`);
|
|
1676
|
+
}
|
|
1677
|
+
} else {
|
|
1678
|
+
throw new Error(
|
|
1679
|
+
`Evaluator '${name}' in '${evalId}': prompt file not found: ${resolved.displayPath}`
|
|
1680
|
+
);
|
|
1661
1681
|
}
|
|
1662
1682
|
} else {
|
|
1663
|
-
|
|
1664
|
-
`Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
|
|
1665
|
-
resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
|
|
1666
|
-
);
|
|
1683
|
+
prompt = rawPrompt;
|
|
1667
1684
|
}
|
|
1668
1685
|
}
|
|
1669
1686
|
const _model = asString(rawEvaluator.model);
|
|
@@ -3572,7 +3589,7 @@ var OpenAIProvider = class {
|
|
|
3572
3589
|
apiKey: config.apiKey,
|
|
3573
3590
|
baseURL: config.baseURL
|
|
3574
3591
|
});
|
|
3575
|
-
this.model = openai(config.model);
|
|
3592
|
+
this.model = config.apiFormat === "responses" ? openai(config.model) : openai.chat(config.model);
|
|
3576
3593
|
}
|
|
3577
3594
|
id;
|
|
3578
3595
|
kind = "openai";
|
|
@@ -5200,15 +5217,16 @@ var CliProvider = class {
|
|
|
5200
5217
|
outputFilePath
|
|
5201
5218
|
);
|
|
5202
5219
|
const renderedCommand = renderTemplate(this.config.command, templateValues);
|
|
5220
|
+
const effectiveCwd = requests[0]?.cwd ?? this.config.cwd;
|
|
5203
5221
|
if (this.verbose) {
|
|
5204
5222
|
console.log(
|
|
5205
|
-
`[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${
|
|
5223
|
+
`[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${effectiveCwd ?? ""} command=${renderedCommand}`
|
|
5206
5224
|
);
|
|
5207
5225
|
}
|
|
5208
5226
|
try {
|
|
5209
5227
|
const startTime = Date.now();
|
|
5210
5228
|
const result = await this.runCommand(renderedCommand, {
|
|
5211
|
-
cwd:
|
|
5229
|
+
cwd: effectiveCwd,
|
|
5212
5230
|
env: process.env,
|
|
5213
5231
|
timeoutMs: this.config.timeoutMs,
|
|
5214
5232
|
signal: controller.signal
|
|
@@ -5241,7 +5259,7 @@ var CliProvider = class {
|
|
|
5241
5259
|
command: renderedCommand,
|
|
5242
5260
|
stderr: result.stderr,
|
|
5243
5261
|
exitCode: result.exitCode ?? 0,
|
|
5244
|
-
cwd:
|
|
5262
|
+
cwd: effectiveCwd,
|
|
5245
5263
|
outputFile: outputFilePath
|
|
5246
5264
|
}
|
|
5247
5265
|
};
|
|
@@ -5259,7 +5277,7 @@ var CliProvider = class {
|
|
|
5259
5277
|
command: renderedCommand,
|
|
5260
5278
|
stderr: result.stderr,
|
|
5261
5279
|
exitCode: result.exitCode ?? 0,
|
|
5262
|
-
cwd:
|
|
5280
|
+
cwd: effectiveCwd,
|
|
5263
5281
|
outputFile: outputFilePath,
|
|
5264
5282
|
error: errorMessage
|
|
5265
5283
|
}
|
|
@@ -5274,7 +5292,7 @@ var CliProvider = class {
|
|
|
5274
5292
|
command: renderedCommand,
|
|
5275
5293
|
stderr: result.stderr,
|
|
5276
5294
|
exitCode: result.exitCode ?? 0,
|
|
5277
|
-
cwd:
|
|
5295
|
+
cwd: effectiveCwd,
|
|
5278
5296
|
outputFile: outputFilePath,
|
|
5279
5297
|
recordId: evalCaseId
|
|
5280
5298
|
}
|
|
@@ -7224,9 +7242,9 @@ var MockProvider = class {
|
|
|
7224
7242
|
};
|
|
7225
7243
|
|
|
7226
7244
|
// src/evaluation/providers/pi-cli.ts
|
|
7227
|
-
import { spawn as spawn3 } from "node:child_process";
|
|
7245
|
+
import { execSync, spawn as spawn3 } from "node:child_process";
|
|
7228
7246
|
import { randomUUID as randomUUID7 } from "node:crypto";
|
|
7229
|
-
import { createWriteStream as createWriteStream5 } from "node:fs";
|
|
7247
|
+
import { accessSync, createWriteStream as createWriteStream5, readFileSync as readFileSync2 } from "node:fs";
|
|
7230
7248
|
import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
7231
7249
|
import { tmpdir } from "node:os";
|
|
7232
7250
|
import path19 from "node:path";
|
|
@@ -7284,6 +7302,59 @@ function subscribeToPiLogEntries(listener) {
|
|
|
7284
7302
|
};
|
|
7285
7303
|
}
|
|
7286
7304
|
|
|
7305
|
+
// src/evaluation/providers/pi-provider-aliases.ts
|
|
7306
|
+
var SUBPROVIDER_ALIASES = {
|
|
7307
|
+
azure: "azure-openai-responses"
|
|
7308
|
+
};
|
|
7309
|
+
var SUBPROVIDER_ALIASES_WITH_BASE_URL = {
|
|
7310
|
+
// Azure v1 endpoints are OpenAI-compatible; use the standard client
|
|
7311
|
+
// to avoid AzureOpenAI adding api-version query params.
|
|
7312
|
+
azure: "openai-responses"
|
|
7313
|
+
};
|
|
7314
|
+
var ENV_KEY_MAP = {
|
|
7315
|
+
google: "GEMINI_API_KEY",
|
|
7316
|
+
gemini: "GEMINI_API_KEY",
|
|
7317
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
7318
|
+
openai: "OPENAI_API_KEY",
|
|
7319
|
+
groq: "GROQ_API_KEY",
|
|
7320
|
+
xai: "XAI_API_KEY",
|
|
7321
|
+
openrouter: "OPENROUTER_API_KEY",
|
|
7322
|
+
azure: "AZURE_OPENAI_API_KEY"
|
|
7323
|
+
};
|
|
7324
|
+
var ENV_BASE_URL_MAP = {
|
|
7325
|
+
openai: "OPENAI_BASE_URL",
|
|
7326
|
+
azure: "AZURE_OPENAI_BASE_URL",
|
|
7327
|
+
openrouter: "OPENROUTER_BASE_URL"
|
|
7328
|
+
};
|
|
7329
|
+
function resolveSubprovider(name, hasBaseUrl = false) {
|
|
7330
|
+
const lower = name.toLowerCase();
|
|
7331
|
+
if (hasBaseUrl) {
|
|
7332
|
+
const alias = SUBPROVIDER_ALIASES_WITH_BASE_URL[lower];
|
|
7333
|
+
if (alias) return alias;
|
|
7334
|
+
}
|
|
7335
|
+
return SUBPROVIDER_ALIASES[lower] ?? name;
|
|
7336
|
+
}
|
|
7337
|
+
function resolveCliProvider(name) {
|
|
7338
|
+
const lower = name.toLowerCase();
|
|
7339
|
+
if (lower === "azure") return "azure-openai-responses";
|
|
7340
|
+
return name;
|
|
7341
|
+
}
|
|
7342
|
+
function resolveEnvKeyName(provider, hasBaseUrl = false) {
|
|
7343
|
+
const lower = provider.toLowerCase();
|
|
7344
|
+
if (hasBaseUrl && lower === "azure") return "OPENAI_API_KEY";
|
|
7345
|
+
return ENV_KEY_MAP[lower];
|
|
7346
|
+
}
|
|
7347
|
+
function resolveEnvBaseUrlName(provider, hasBaseUrl = false) {
|
|
7348
|
+
const lower = provider.toLowerCase();
|
|
7349
|
+
if (hasBaseUrl && lower === "azure") return "OPENAI_BASE_URL";
|
|
7350
|
+
return ENV_BASE_URL_MAP[lower];
|
|
7351
|
+
}
|
|
7352
|
+
function extractAzureResourceName(baseUrl) {
|
|
7353
|
+
const urlMatch = baseUrl.match(/^https?:\/\/([^./]+)/);
|
|
7354
|
+
if (urlMatch) return urlMatch[1];
|
|
7355
|
+
return baseUrl;
|
|
7356
|
+
}
|
|
7357
|
+
|
|
7287
7358
|
// src/evaluation/providers/pi-utils.ts
|
|
7288
7359
|
function extractPiTextContent(content) {
|
|
7289
7360
|
if (typeof content === "string") {
|
|
@@ -7442,12 +7513,12 @@ var PiCliProvider = class {
|
|
|
7442
7513
|
buildPiArgs(prompt, inputFiles) {
|
|
7443
7514
|
const args = [];
|
|
7444
7515
|
if (this.config.subprovider) {
|
|
7445
|
-
args.push("--provider", this.config.subprovider);
|
|
7516
|
+
args.push("--provider", resolveCliProvider(this.config.subprovider));
|
|
7446
7517
|
}
|
|
7447
7518
|
if (this.config.model) {
|
|
7448
7519
|
args.push("--model", this.config.model);
|
|
7449
7520
|
}
|
|
7450
|
-
if (this.config.apiKey) {
|
|
7521
|
+
if (this.config.apiKey && this.config.subprovider?.toLowerCase() !== "azure") {
|
|
7451
7522
|
args.push("--api-key", this.config.apiKey);
|
|
7452
7523
|
}
|
|
7453
7524
|
args.push("--mode", "json");
|
|
@@ -7499,35 +7570,35 @@ ${prompt}` : prompt;
|
|
|
7499
7570
|
}
|
|
7500
7571
|
buildEnv() {
|
|
7501
7572
|
const env = { ...process.env };
|
|
7502
|
-
|
|
7503
|
-
|
|
7504
|
-
|
|
7505
|
-
|
|
7506
|
-
|
|
7507
|
-
|
|
7508
|
-
|
|
7509
|
-
|
|
7510
|
-
|
|
7511
|
-
|
|
7512
|
-
|
|
7513
|
-
|
|
7514
|
-
|
|
7515
|
-
|
|
7573
|
+
const provider = this.config.subprovider?.toLowerCase() ?? "google";
|
|
7574
|
+
if (provider === "azure") {
|
|
7575
|
+
if (this.config.apiKey) {
|
|
7576
|
+
env.AZURE_OPENAI_API_KEY = this.config.apiKey;
|
|
7577
|
+
}
|
|
7578
|
+
if (this.config.baseUrl) {
|
|
7579
|
+
env.AZURE_OPENAI_RESOURCE_NAME = extractAzureResourceName(this.config.baseUrl);
|
|
7580
|
+
}
|
|
7581
|
+
} else {
|
|
7582
|
+
if (this.config.apiKey) {
|
|
7583
|
+
const envKey = resolveEnvKeyName(provider);
|
|
7584
|
+
if (envKey) {
|
|
7585
|
+
env[envKey] = this.config.apiKey;
|
|
7586
|
+
}
|
|
7516
7587
|
}
|
|
7517
7588
|
}
|
|
7518
7589
|
if (this.config.subprovider) {
|
|
7519
|
-
const
|
|
7590
|
+
const resolvedProvider = resolveCliProvider(this.config.subprovider);
|
|
7520
7591
|
const PROVIDER_OWN_PREFIXES = {
|
|
7521
7592
|
openrouter: ["OPENROUTER_"],
|
|
7522
7593
|
anthropic: ["ANTHROPIC_"],
|
|
7523
7594
|
openai: ["OPENAI_"],
|
|
7524
|
-
azure: ["AZURE_OPENAI_"],
|
|
7595
|
+
"azure-openai-responses": ["AZURE_OPENAI_"],
|
|
7525
7596
|
google: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
|
|
7526
7597
|
gemini: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
|
|
7527
7598
|
groq: ["GROQ_"],
|
|
7528
7599
|
xai: ["XAI_"]
|
|
7529
7600
|
};
|
|
7530
|
-
const ownPrefixes = PROVIDER_OWN_PREFIXES[
|
|
7601
|
+
const ownPrefixes = PROVIDER_OWN_PREFIXES[resolvedProvider] ?? [];
|
|
7531
7602
|
const allOtherPrefixes = Object.entries(PROVIDER_OWN_PREFIXES).filter(([key]) => key !== provider).flatMap(([, prefixes]) => prefixes);
|
|
7532
7603
|
for (const key of Object.keys(env)) {
|
|
7533
7604
|
if (allOtherPrefixes.some((prefix) => key.startsWith(prefix)) && !ownPrefixes.some((prefix) => key.startsWith(prefix))) {
|
|
@@ -7818,6 +7889,24 @@ function extractMessages(events) {
|
|
|
7818
7889
|
}
|
|
7819
7890
|
}
|
|
7820
7891
|
}
|
|
7892
|
+
if (messages) {
|
|
7893
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
7894
|
+
if (messages[i].role === "assistant" && !messages[i].content) {
|
|
7895
|
+
for (let j = events.length - 1; j >= 0; j--) {
|
|
7896
|
+
const evt = events[j];
|
|
7897
|
+
if (!evt || evt.type !== "message_end") continue;
|
|
7898
|
+
const msg = evt.message;
|
|
7899
|
+
if (msg?.role !== "assistant") continue;
|
|
7900
|
+
const text = extractPiTextContent(msg.content);
|
|
7901
|
+
if (text) {
|
|
7902
|
+
messages[i] = { ...messages[i], content: text };
|
|
7903
|
+
break;
|
|
7904
|
+
}
|
|
7905
|
+
}
|
|
7906
|
+
break;
|
|
7907
|
+
}
|
|
7908
|
+
}
|
|
7909
|
+
}
|
|
7821
7910
|
const eventToolCalls = extractToolCallsFromEvents(events);
|
|
7822
7911
|
if (eventToolCalls.length > 0) {
|
|
7823
7912
|
injectEventToolCalls(messages, eventToolCalls);
|
|
@@ -8002,17 +8091,43 @@ function formatTimeoutSuffix3(timeoutMs) {
|
|
|
8002
8091
|
if (!timeoutMs || timeoutMs <= 0) return "";
|
|
8003
8092
|
return ` after ${Math.ceil(timeoutMs / 1e3)}s`;
|
|
8004
8093
|
}
|
|
8094
|
+
function resolveWindowsCmd(executable) {
|
|
8095
|
+
if (process.platform !== "win32") return [executable, []];
|
|
8096
|
+
const lower = executable.toLowerCase();
|
|
8097
|
+
if (lower.endsWith(".js") || lower.endsWith(".exe")) return [executable, []];
|
|
8098
|
+
let fullPath;
|
|
8099
|
+
try {
|
|
8100
|
+
fullPath = execSync(`where ${executable}`, { encoding: "utf-8" }).trim().split(/\r?\n/)[0].trim();
|
|
8101
|
+
} catch {
|
|
8102
|
+
return [executable, []];
|
|
8103
|
+
}
|
|
8104
|
+
const cmdPath = fullPath.endsWith(".cmd") ? fullPath : `${fullPath}.cmd`;
|
|
8105
|
+
try {
|
|
8106
|
+
const content = readFileSync2(cmdPath, "utf-8");
|
|
8107
|
+
const match = content.match(/"?%_prog%"?\s+"([^"]+\.js)"/);
|
|
8108
|
+
if (match) {
|
|
8109
|
+
const dp0 = path19.dirname(path19.resolve(cmdPath));
|
|
8110
|
+
const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${path19.sep}`);
|
|
8111
|
+
try {
|
|
8112
|
+
accessSync(scriptPath);
|
|
8113
|
+
return ["node", [scriptPath]];
|
|
8114
|
+
} catch {
|
|
8115
|
+
}
|
|
8116
|
+
}
|
|
8117
|
+
} catch {
|
|
8118
|
+
}
|
|
8119
|
+
return [executable, []];
|
|
8120
|
+
}
|
|
8005
8121
|
async function defaultPiRunner(options) {
|
|
8006
8122
|
return await new Promise((resolve, reject) => {
|
|
8007
8123
|
const parts = options.executable.split(/\s+/);
|
|
8008
|
-
const
|
|
8009
|
-
const executableArgs = parts.slice(1);
|
|
8124
|
+
const [resolvedExe, prefixArgs] = resolveWindowsCmd(parts[0]);
|
|
8125
|
+
const executableArgs = [...prefixArgs, ...parts.slice(1)];
|
|
8010
8126
|
const allArgs = [...executableArgs, ...options.args];
|
|
8011
|
-
const child = spawn3(
|
|
8127
|
+
const child = spawn3(resolvedExe, allArgs, {
|
|
8012
8128
|
cwd: options.cwd,
|
|
8013
8129
|
env: options.env,
|
|
8014
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
8015
|
-
shell: false
|
|
8130
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
8016
8131
|
});
|
|
8017
8132
|
let stdout = "";
|
|
8018
8133
|
let stderr = "";
|
|
@@ -8067,9 +8182,9 @@ async function defaultPiRunner(options) {
|
|
|
8067
8182
|
}
|
|
8068
8183
|
|
|
8069
8184
|
// src/evaluation/providers/pi-coding-agent.ts
|
|
8070
|
-
import { execSync } from "node:child_process";
|
|
8185
|
+
import { execSync as execSync2 } from "node:child_process";
|
|
8071
8186
|
import { randomUUID as randomUUID8 } from "node:crypto";
|
|
8072
|
-
import { accessSync, createWriteStream as createWriteStream6 } from "node:fs";
|
|
8187
|
+
import { accessSync as accessSync2, createWriteStream as createWriteStream6 } from "node:fs";
|
|
8073
8188
|
import { mkdir as mkdir7 } from "node:fs/promises";
|
|
8074
8189
|
import path20 from "node:path";
|
|
8075
8190
|
import { createInterface } from "node:readline";
|
|
@@ -8097,7 +8212,7 @@ function findAgentvRoot() {
|
|
|
8097
8212
|
for (let i = 0; i < 10; i++) {
|
|
8098
8213
|
try {
|
|
8099
8214
|
const pkg = path20.join(dir, "package.json");
|
|
8100
|
-
|
|
8215
|
+
accessSync2(pkg);
|
|
8101
8216
|
return dir;
|
|
8102
8217
|
} catch {
|
|
8103
8218
|
const parent = path20.dirname(dir);
|
|
@@ -8117,7 +8232,7 @@ async function doLoadSdkModules() {
|
|
|
8117
8232
|
if (await promptInstall()) {
|
|
8118
8233
|
const installDir = findAgentvRoot();
|
|
8119
8234
|
console.error(`Installing @mariozechner/pi-coding-agent into ${installDir}...`);
|
|
8120
|
-
|
|
8235
|
+
execSync2("bun add @mariozechner/pi-coding-agent", {
|
|
8121
8236
|
cwd: installDir,
|
|
8122
8237
|
stdio: "inherit"
|
|
8123
8238
|
});
|
|
@@ -8158,7 +8273,9 @@ async function loadSdkModules() {
|
|
|
8158
8273
|
codingTools: piSdk.codingTools,
|
|
8159
8274
|
toolMap,
|
|
8160
8275
|
SessionManager: piSdk.SessionManager,
|
|
8161
|
-
getModel: piAi.getModel
|
|
8276
|
+
getModel: piAi.getModel,
|
|
8277
|
+
// biome-ignore lint/suspicious/noExplicitAny: registerBuiltInApiProviders exists at runtime but not in type defs
|
|
8278
|
+
registerBuiltInApiProviders: piAi.registerBuiltInApiProviders
|
|
8162
8279
|
};
|
|
8163
8280
|
}
|
|
8164
8281
|
var PiCodingAgentProvider = class {
|
|
@@ -8180,17 +8297,31 @@ var PiCodingAgentProvider = class {
|
|
|
8180
8297
|
const startTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
8181
8298
|
const startMs = Date.now();
|
|
8182
8299
|
const sdk = await loadSdkModules();
|
|
8300
|
+
sdk.registerBuiltInApiProviders();
|
|
8183
8301
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
8184
8302
|
try {
|
|
8185
8303
|
const cwd = this.resolveCwd(request.cwd);
|
|
8186
|
-
const
|
|
8304
|
+
const rawProvider = this.config.subprovider ?? "google";
|
|
8305
|
+
const hasBaseUrl = !!this.config.baseUrl;
|
|
8306
|
+
const providerName = resolveSubprovider(rawProvider, hasBaseUrl);
|
|
8187
8307
|
const modelId = this.config.model ?? "gemini-2.5-flash";
|
|
8188
|
-
this.setApiKeyEnv(
|
|
8189
|
-
|
|
8308
|
+
this.setApiKeyEnv(rawProvider, hasBaseUrl);
|
|
8309
|
+
this.setBaseUrlEnv(rawProvider, hasBaseUrl);
|
|
8310
|
+
let model = sdk.getModel(providerName, modelId);
|
|
8190
8311
|
if (!model) {
|
|
8191
|
-
|
|
8192
|
-
|
|
8193
|
-
|
|
8312
|
+
const envProvider = providerName.replace(/-responses$/, "");
|
|
8313
|
+
model = {
|
|
8314
|
+
id: modelId,
|
|
8315
|
+
name: modelId,
|
|
8316
|
+
api: providerName,
|
|
8317
|
+
provider: envProvider,
|
|
8318
|
+
baseUrl: this.config.baseUrl ?? "",
|
|
8319
|
+
reasoning: false,
|
|
8320
|
+
input: ["text"],
|
|
8321
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
8322
|
+
contextWindow: 128e3,
|
|
8323
|
+
maxTokens: 16384
|
|
8324
|
+
};
|
|
8194
8325
|
}
|
|
8195
8326
|
const tools = this.resolveTools(sdk);
|
|
8196
8327
|
const { session } = await sdk.createAgentSession({
|
|
@@ -8343,22 +8474,21 @@ ${fileList}`;
|
|
|
8343
8474
|
}
|
|
8344
8475
|
}
|
|
8345
8476
|
/** Maps config apiKey to the provider-specific env var the SDK reads. */
|
|
8346
|
-
setApiKeyEnv(providerName) {
|
|
8477
|
+
setApiKeyEnv(providerName, hasBaseUrl = false) {
|
|
8347
8478
|
if (!this.config.apiKey) return;
|
|
8348
|
-
const
|
|
8349
|
-
google: "GEMINI_API_KEY",
|
|
8350
|
-
gemini: "GEMINI_API_KEY",
|
|
8351
|
-
anthropic: "ANTHROPIC_API_KEY",
|
|
8352
|
-
openai: "OPENAI_API_KEY",
|
|
8353
|
-
groq: "GROQ_API_KEY",
|
|
8354
|
-
xai: "XAI_API_KEY",
|
|
8355
|
-
openrouter: "OPENROUTER_API_KEY"
|
|
8356
|
-
};
|
|
8357
|
-
const envKey = ENV_KEY_MAP[providerName.toLowerCase()];
|
|
8479
|
+
const envKey = resolveEnvKeyName(providerName, hasBaseUrl);
|
|
8358
8480
|
if (envKey) {
|
|
8359
8481
|
process.env[envKey] = this.config.apiKey;
|
|
8360
8482
|
}
|
|
8361
8483
|
}
|
|
8484
|
+
/** Maps config baseUrl to the provider-specific env var the SDK reads. */
|
|
8485
|
+
setBaseUrlEnv(providerName, hasBaseUrl = false) {
|
|
8486
|
+
if (!this.config.baseUrl) return;
|
|
8487
|
+
const envKey = resolveEnvBaseUrlName(providerName, hasBaseUrl);
|
|
8488
|
+
if (envKey) {
|
|
8489
|
+
process.env[envKey] = this.config.baseUrl;
|
|
8490
|
+
}
|
|
8491
|
+
}
|
|
8362
8492
|
resolveCwd(cwdOverride) {
|
|
8363
8493
|
if (cwdOverride) {
|
|
8364
8494
|
return path20.resolve(cwdOverride);
|
|
@@ -10082,8 +10212,11 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
10082
10212
|
`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`
|
|
10083
10213
|
);
|
|
10084
10214
|
}
|
|
10085
|
-
|
|
10086
|
-
|
|
10215
|
+
const hasUseTarget = typeof value.use_target === "string" && value.use_target.trim().length > 0;
|
|
10216
|
+
if (!hasUseTarget && (typeof provider !== "string" || provider.trim().length === 0)) {
|
|
10217
|
+
throw new Error(
|
|
10218
|
+
`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider' (or use use_target for delegation)`
|
|
10219
|
+
);
|
|
10087
10220
|
}
|
|
10088
10221
|
return value;
|
|
10089
10222
|
}
|
|
@@ -15490,7 +15623,7 @@ async function runEvaluation(options) {
|
|
|
15490
15623
|
if (resolvedTargetsByName.has(name)) {
|
|
15491
15624
|
return resolvedTargetsByName.get(name);
|
|
15492
15625
|
}
|
|
15493
|
-
const definition =
|
|
15626
|
+
const definition = resolveDelegatedTargetDefinition(name, targetDefinitions, envLookup);
|
|
15494
15627
|
if (!definition) {
|
|
15495
15628
|
return void 0;
|
|
15496
15629
|
}
|
|
@@ -16498,6 +16631,7 @@ async function runEvalCase(options) {
|
|
|
16498
16631
|
let attempt = 0;
|
|
16499
16632
|
let providerResponse = cachedResponse;
|
|
16500
16633
|
let lastError;
|
|
16634
|
+
let targetUsed;
|
|
16501
16635
|
while (!providerResponse && attempt < attemptBudget) {
|
|
16502
16636
|
try {
|
|
16503
16637
|
providerResponse = await invokeProvider(provider, {
|
|
@@ -16520,25 +16654,33 @@ async function runEvalCase(options) {
|
|
|
16520
16654
|
attempt += 1;
|
|
16521
16655
|
continue;
|
|
16522
16656
|
}
|
|
16523
|
-
|
|
16524
|
-
|
|
16525
|
-
|
|
16526
|
-
|
|
16527
|
-
|
|
16528
|
-
|
|
16529
|
-
|
|
16530
|
-
|
|
16531
|
-
|
|
16532
|
-
|
|
16533
|
-
|
|
16534
|
-
|
|
16535
|
-
|
|
16536
|
-
|
|
16537
|
-
|
|
16538
|
-
|
|
16539
|
-
|
|
16657
|
+
break;
|
|
16658
|
+
}
|
|
16659
|
+
}
|
|
16660
|
+
if (!providerResponse && target.fallbackTargets?.length && targetResolver) {
|
|
16661
|
+
for (const fallbackName of target.fallbackTargets) {
|
|
16662
|
+
const fallbackProvider = targetResolver(fallbackName);
|
|
16663
|
+
if (!fallbackProvider) {
|
|
16664
|
+
continue;
|
|
16665
|
+
}
|
|
16666
|
+
try {
|
|
16667
|
+
providerResponse = await invokeProvider(fallbackProvider, {
|
|
16668
|
+
evalCase,
|
|
16669
|
+
target,
|
|
16670
|
+
promptInputs,
|
|
16671
|
+
attempt: 0,
|
|
16672
|
+
agentTimeoutMs,
|
|
16673
|
+
signal,
|
|
16674
|
+
cwd: workspacePath,
|
|
16675
|
+
workspaceFile: caseWorkspaceFile ?? suiteWorkspaceFile,
|
|
16676
|
+
captureFileChanges: !!baselineCommit,
|
|
16677
|
+
streamCallbacks: options.streamCallbacks
|
|
16678
|
+
});
|
|
16679
|
+
targetUsed = fallbackName;
|
|
16680
|
+
break;
|
|
16681
|
+
} catch (error) {
|
|
16682
|
+
lastError = error;
|
|
16540
16683
|
}
|
|
16541
|
-
return errorResult;
|
|
16542
16684
|
}
|
|
16543
16685
|
}
|
|
16544
16686
|
if (!providerResponse) {
|
|
@@ -16664,8 +16806,10 @@ async function runEvalCase(options) {
|
|
|
16664
16806
|
};
|
|
16665
16807
|
const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
|
|
16666
16808
|
const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, caseThreshold);
|
|
16809
|
+
const targetUsedField = targetUsed ? { targetUsed } : {};
|
|
16667
16810
|
const finalResult = providerError ? {
|
|
16668
16811
|
...result,
|
|
16812
|
+
...targetUsedField,
|
|
16669
16813
|
evalRun,
|
|
16670
16814
|
error: providerError,
|
|
16671
16815
|
executionStatus,
|
|
@@ -16677,6 +16821,7 @@ async function runEvalCase(options) {
|
|
|
16677
16821
|
afterEachOutput
|
|
16678
16822
|
} : skippedEvaluatorError ? {
|
|
16679
16823
|
...result,
|
|
16824
|
+
...targetUsedField,
|
|
16680
16825
|
score: 0,
|
|
16681
16826
|
evalRun,
|
|
16682
16827
|
error: skippedEvaluatorError,
|
|
@@ -16689,6 +16834,7 @@ async function runEvalCase(options) {
|
|
|
16689
16834
|
afterEachOutput
|
|
16690
16835
|
} : {
|
|
16691
16836
|
...result,
|
|
16837
|
+
...targetUsedField,
|
|
16692
16838
|
evalRun,
|
|
16693
16839
|
executionStatus,
|
|
16694
16840
|
beforeAllOutput,
|
|
@@ -17566,7 +17712,7 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
17566
17712
|
return null;
|
|
17567
17713
|
}
|
|
17568
17714
|
async function loadEnvHierarchy(repoRoot, startPath) {
|
|
17569
|
-
const { readFileSync:
|
|
17715
|
+
const { readFileSync: readFileSync4 } = await import("node:fs");
|
|
17570
17716
|
const chain = buildDirectoryChain(startPath, repoRoot);
|
|
17571
17717
|
const envFiles = [];
|
|
17572
17718
|
for (const dir of chain) {
|
|
@@ -17575,7 +17721,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
17575
17721
|
}
|
|
17576
17722
|
for (let i = 0; i < envFiles.length; i++) {
|
|
17577
17723
|
try {
|
|
17578
|
-
const content =
|
|
17724
|
+
const content = readFileSync4(envFiles[i], "utf8");
|
|
17579
17725
|
for (const line of content.split("\n")) {
|
|
17580
17726
|
const trimmed = line.trim();
|
|
17581
17727
|
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
@@ -17790,7 +17936,7 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
17790
17936
|
}
|
|
17791
17937
|
|
|
17792
17938
|
// src/projects.ts
|
|
17793
|
-
import { existsSync as existsSync6, mkdirSync, readFileSync as
|
|
17939
|
+
import { existsSync as existsSync6, mkdirSync, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
|
|
17794
17940
|
import path47 from "node:path";
|
|
17795
17941
|
import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
|
|
17796
17942
|
function getProjectsRegistryPath() {
|
|
@@ -17802,7 +17948,7 @@ function loadProjectRegistry() {
|
|
|
17802
17948
|
return { projects: [] };
|
|
17803
17949
|
}
|
|
17804
17950
|
try {
|
|
17805
|
-
const raw =
|
|
17951
|
+
const raw = readFileSync3(registryPath, "utf-8");
|
|
17806
17952
|
const parsed = parseYaml3(raw);
|
|
17807
17953
|
if (!parsed || !Array.isArray(parsed.projects)) {
|
|
17808
17954
|
return { projects: [] };
|
|
@@ -18839,6 +18985,7 @@ export {
|
|
|
18839
18985
|
readTranscriptFile,
|
|
18840
18986
|
removeProject,
|
|
18841
18987
|
resolveAndCreateProvider,
|
|
18988
|
+
resolveDelegatedTargetDefinition,
|
|
18842
18989
|
resolveFileReference,
|
|
18843
18990
|
resolveTargetDefinition,
|
|
18844
18991
|
resolveWorkspaceTemplate,
|