@agentv/core 4.5.2 → 4.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -21,9 +21,10 @@ import {
21
21
  normalizeLineEndings,
22
22
  readJsonFile,
23
23
  readTextFile,
24
+ resolveDelegatedTargetDefinition,
24
25
  resolveFileReference,
25
26
  resolveTargetDefinition
26
- } from "./chunk-M65PVDQ5.js";
27
+ } from "./chunk-ZK4GG7PR.js";
27
28
  import {
28
29
  AgentvProvider
29
30
  } from "./chunk-PRNXHNLF.js";
@@ -766,6 +767,7 @@ function validateTemplateVariables(content, source) {
766
767
  // src/evaluation/loaders/evaluator-parser.ts
767
768
  var ANSI_YELLOW3 = "\x1B[33m";
768
769
  var ANSI_RESET4 = "\x1B[0m";
770
+ var PROMPT_FILE_PREFIX = "file://";
769
771
  function normalizeEvaluatorType(type) {
770
772
  return type.replace(/_/g, "-");
771
773
  }
@@ -1064,12 +1066,23 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1064
1066
  threshold: thresholdValue
1065
1067
  };
1066
1068
  } else {
1067
- const aggregatorPrompt = asString(rawAggregator.prompt);
1069
+ const rawAggPrompt = asString(rawAggregator.prompt);
1070
+ let aggregatorPrompt;
1068
1071
  let promptPath2;
1069
- if (aggregatorPrompt) {
1070
- const resolved = await resolveFileReference2(aggregatorPrompt, searchRoots);
1071
- if (resolved.resolvedPath) {
1072
- promptPath2 = path4.resolve(resolved.resolvedPath);
1072
+ if (rawAggPrompt) {
1073
+ if (rawAggPrompt.startsWith(PROMPT_FILE_PREFIX)) {
1074
+ const fileRef = rawAggPrompt.slice(PROMPT_FILE_PREFIX.length);
1075
+ aggregatorPrompt = fileRef;
1076
+ const resolved = await resolveFileReference2(fileRef, searchRoots);
1077
+ if (resolved.resolvedPath) {
1078
+ promptPath2 = path4.resolve(resolved.resolvedPath);
1079
+ } else {
1080
+ throw new Error(
1081
+ `Composite aggregator in '${evalId}': prompt file not found: ${resolved.displayPath}`
1082
+ );
1083
+ }
1084
+ } else {
1085
+ aggregatorPrompt = rawAggPrompt;
1073
1086
  }
1074
1087
  }
1075
1088
  aggregator = {
@@ -1649,21 +1662,25 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1649
1662
  promptScriptConfig = rawPrompt.config;
1650
1663
  }
1651
1664
  } else if (typeof rawPrompt === "string") {
1652
- prompt = rawPrompt;
1653
- const resolved = await resolveFileReference2(prompt, searchRoots);
1654
- if (resolved.resolvedPath) {
1655
- promptPath = path4.resolve(resolved.resolvedPath);
1656
- try {
1657
- await validateCustomPromptContent(promptPath);
1658
- } catch (error) {
1659
- const message = error instanceof Error ? error.message : String(error);
1660
- throw new Error(`Evaluator '${name}' template (${promptPath}): ${message}`);
1665
+ if (rawPrompt.startsWith(PROMPT_FILE_PREFIX)) {
1666
+ const fileRef = rawPrompt.slice(PROMPT_FILE_PREFIX.length);
1667
+ prompt = fileRef;
1668
+ const resolved = await resolveFileReference2(fileRef, searchRoots);
1669
+ if (resolved.resolvedPath) {
1670
+ promptPath = path4.resolve(resolved.resolvedPath);
1671
+ try {
1672
+ await validateCustomPromptContent(promptPath);
1673
+ } catch (error) {
1674
+ const message = error instanceof Error ? error.message : String(error);
1675
+ throw new Error(`Evaluator '${name}' template (${promptPath}): ${message}`);
1676
+ }
1677
+ } else {
1678
+ throw new Error(
1679
+ `Evaluator '${name}' in '${evalId}': prompt file not found: ${resolved.displayPath}`
1680
+ );
1661
1681
  }
1662
1682
  } else {
1663
- logWarning2(
1664
- `Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
1665
- resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
1666
- );
1683
+ prompt = rawPrompt;
1667
1684
  }
1668
1685
  }
1669
1686
  const _model = asString(rawEvaluator.model);
@@ -3572,7 +3589,7 @@ var OpenAIProvider = class {
3572
3589
  apiKey: config.apiKey,
3573
3590
  baseURL: config.baseURL
3574
3591
  });
3575
- this.model = openai(config.model);
3592
+ this.model = config.apiFormat === "responses" ? openai(config.model) : openai.chat(config.model);
3576
3593
  }
3577
3594
  id;
3578
3595
  kind = "openai";
@@ -5200,15 +5217,16 @@ var CliProvider = class {
5200
5217
  outputFilePath
5201
5218
  );
5202
5219
  const renderedCommand = renderTemplate(this.config.command, templateValues);
5220
+ const effectiveCwd = requests[0]?.cwd ?? this.config.cwd;
5203
5221
  if (this.verbose) {
5204
5222
  console.log(
5205
- `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
5223
+ `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${effectiveCwd ?? ""} command=${renderedCommand}`
5206
5224
  );
5207
5225
  }
5208
5226
  try {
5209
5227
  const startTime = Date.now();
5210
5228
  const result = await this.runCommand(renderedCommand, {
5211
- cwd: this.config.cwd,
5229
+ cwd: effectiveCwd,
5212
5230
  env: process.env,
5213
5231
  timeoutMs: this.config.timeoutMs,
5214
5232
  signal: controller.signal
@@ -5241,7 +5259,7 @@ var CliProvider = class {
5241
5259
  command: renderedCommand,
5242
5260
  stderr: result.stderr,
5243
5261
  exitCode: result.exitCode ?? 0,
5244
- cwd: this.config.cwd,
5262
+ cwd: effectiveCwd,
5245
5263
  outputFile: outputFilePath
5246
5264
  }
5247
5265
  };
@@ -5259,7 +5277,7 @@ var CliProvider = class {
5259
5277
  command: renderedCommand,
5260
5278
  stderr: result.stderr,
5261
5279
  exitCode: result.exitCode ?? 0,
5262
- cwd: this.config.cwd,
5280
+ cwd: effectiveCwd,
5263
5281
  outputFile: outputFilePath,
5264
5282
  error: errorMessage
5265
5283
  }
@@ -5274,7 +5292,7 @@ var CliProvider = class {
5274
5292
  command: renderedCommand,
5275
5293
  stderr: result.stderr,
5276
5294
  exitCode: result.exitCode ?? 0,
5277
- cwd: this.config.cwd,
5295
+ cwd: effectiveCwd,
5278
5296
  outputFile: outputFilePath,
5279
5297
  recordId: evalCaseId
5280
5298
  }
@@ -7224,9 +7242,9 @@ var MockProvider = class {
7224
7242
  };
7225
7243
 
7226
7244
  // src/evaluation/providers/pi-cli.ts
7227
- import { spawn as spawn3 } from "node:child_process";
7245
+ import { execSync, spawn as spawn3 } from "node:child_process";
7228
7246
  import { randomUUID as randomUUID7 } from "node:crypto";
7229
- import { createWriteStream as createWriteStream5 } from "node:fs";
7247
+ import { accessSync, createWriteStream as createWriteStream5, readFileSync as readFileSync2 } from "node:fs";
7230
7248
  import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
7231
7249
  import { tmpdir } from "node:os";
7232
7250
  import path19 from "node:path";
@@ -7284,6 +7302,59 @@ function subscribeToPiLogEntries(listener) {
7284
7302
  };
7285
7303
  }
7286
7304
 
7305
+ // src/evaluation/providers/pi-provider-aliases.ts
7306
+ var SUBPROVIDER_ALIASES = {
7307
+ azure: "azure-openai-responses"
7308
+ };
7309
+ var SUBPROVIDER_ALIASES_WITH_BASE_URL = {
7310
+ // Azure v1 endpoints are OpenAI-compatible; use the standard client
7311
+ // to avoid AzureOpenAI adding api-version query params.
7312
+ azure: "openai-responses"
7313
+ };
7314
+ var ENV_KEY_MAP = {
7315
+ google: "GEMINI_API_KEY",
7316
+ gemini: "GEMINI_API_KEY",
7317
+ anthropic: "ANTHROPIC_API_KEY",
7318
+ openai: "OPENAI_API_KEY",
7319
+ groq: "GROQ_API_KEY",
7320
+ xai: "XAI_API_KEY",
7321
+ openrouter: "OPENROUTER_API_KEY",
7322
+ azure: "AZURE_OPENAI_API_KEY"
7323
+ };
7324
+ var ENV_BASE_URL_MAP = {
7325
+ openai: "OPENAI_BASE_URL",
7326
+ azure: "AZURE_OPENAI_BASE_URL",
7327
+ openrouter: "OPENROUTER_BASE_URL"
7328
+ };
7329
+ function resolveSubprovider(name, hasBaseUrl = false) {
7330
+ const lower = name.toLowerCase();
7331
+ if (hasBaseUrl) {
7332
+ const alias = SUBPROVIDER_ALIASES_WITH_BASE_URL[lower];
7333
+ if (alias) return alias;
7334
+ }
7335
+ return SUBPROVIDER_ALIASES[lower] ?? name;
7336
+ }
7337
+ function resolveCliProvider(name) {
7338
+ const lower = name.toLowerCase();
7339
+ if (lower === "azure") return "azure-openai-responses";
7340
+ return name;
7341
+ }
7342
+ function resolveEnvKeyName(provider, hasBaseUrl = false) {
7343
+ const lower = provider.toLowerCase();
7344
+ if (hasBaseUrl && lower === "azure") return "OPENAI_API_KEY";
7345
+ return ENV_KEY_MAP[lower];
7346
+ }
7347
+ function resolveEnvBaseUrlName(provider, hasBaseUrl = false) {
7348
+ const lower = provider.toLowerCase();
7349
+ if (hasBaseUrl && lower === "azure") return "OPENAI_BASE_URL";
7350
+ return ENV_BASE_URL_MAP[lower];
7351
+ }
7352
+ function extractAzureResourceName(baseUrl) {
7353
+ const urlMatch = baseUrl.match(/^https?:\/\/([^./]+)/);
7354
+ if (urlMatch) return urlMatch[1];
7355
+ return baseUrl;
7356
+ }
7357
+
7287
7358
  // src/evaluation/providers/pi-utils.ts
7288
7359
  function extractPiTextContent(content) {
7289
7360
  if (typeof content === "string") {
@@ -7442,12 +7513,12 @@ var PiCliProvider = class {
7442
7513
  buildPiArgs(prompt, inputFiles) {
7443
7514
  const args = [];
7444
7515
  if (this.config.subprovider) {
7445
- args.push("--provider", this.config.subprovider);
7516
+ args.push("--provider", resolveCliProvider(this.config.subprovider));
7446
7517
  }
7447
7518
  if (this.config.model) {
7448
7519
  args.push("--model", this.config.model);
7449
7520
  }
7450
- if (this.config.apiKey) {
7521
+ if (this.config.apiKey && this.config.subprovider?.toLowerCase() !== "azure") {
7451
7522
  args.push("--api-key", this.config.apiKey);
7452
7523
  }
7453
7524
  args.push("--mode", "json");
@@ -7499,35 +7570,35 @@ ${prompt}` : prompt;
7499
7570
  }
7500
7571
  buildEnv() {
7501
7572
  const env = { ...process.env };
7502
- if (this.config.apiKey) {
7503
- const provider = this.config.subprovider?.toLowerCase() ?? "google";
7504
- const ENV_KEY_MAP = {
7505
- google: "GEMINI_API_KEY",
7506
- gemini: "GEMINI_API_KEY",
7507
- anthropic: "ANTHROPIC_API_KEY",
7508
- openai: "OPENAI_API_KEY",
7509
- groq: "GROQ_API_KEY",
7510
- xai: "XAI_API_KEY",
7511
- openrouter: "OPENROUTER_API_KEY"
7512
- };
7513
- const envKey = ENV_KEY_MAP[provider];
7514
- if (envKey) {
7515
- env[envKey] = this.config.apiKey;
7573
+ const provider = this.config.subprovider?.toLowerCase() ?? "google";
7574
+ if (provider === "azure") {
7575
+ if (this.config.apiKey) {
7576
+ env.AZURE_OPENAI_API_KEY = this.config.apiKey;
7577
+ }
7578
+ if (this.config.baseUrl) {
7579
+ env.AZURE_OPENAI_RESOURCE_NAME = extractAzureResourceName(this.config.baseUrl);
7580
+ }
7581
+ } else {
7582
+ if (this.config.apiKey) {
7583
+ const envKey = resolveEnvKeyName(provider);
7584
+ if (envKey) {
7585
+ env[envKey] = this.config.apiKey;
7586
+ }
7516
7587
  }
7517
7588
  }
7518
7589
  if (this.config.subprovider) {
7519
- const provider = this.config.subprovider.toLowerCase();
7590
+ const resolvedProvider = resolveCliProvider(this.config.subprovider);
7520
7591
  const PROVIDER_OWN_PREFIXES = {
7521
7592
  openrouter: ["OPENROUTER_"],
7522
7593
  anthropic: ["ANTHROPIC_"],
7523
7594
  openai: ["OPENAI_"],
7524
- azure: ["AZURE_OPENAI_"],
7595
+ "azure-openai-responses": ["AZURE_OPENAI_"],
7525
7596
  google: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
7526
7597
  gemini: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
7527
7598
  groq: ["GROQ_"],
7528
7599
  xai: ["XAI_"]
7529
7600
  };
7530
- const ownPrefixes = PROVIDER_OWN_PREFIXES[provider] ?? [];
7601
+ const ownPrefixes = PROVIDER_OWN_PREFIXES[resolvedProvider] ?? [];
7531
7602
  const allOtherPrefixes = Object.entries(PROVIDER_OWN_PREFIXES).filter(([key]) => key !== provider).flatMap(([, prefixes]) => prefixes);
7532
7603
  for (const key of Object.keys(env)) {
7533
7604
  if (allOtherPrefixes.some((prefix) => key.startsWith(prefix)) && !ownPrefixes.some((prefix) => key.startsWith(prefix))) {
@@ -7818,6 +7889,24 @@ function extractMessages(events) {
7818
7889
  }
7819
7890
  }
7820
7891
  }
7892
+ if (messages) {
7893
+ for (let i = messages.length - 1; i >= 0; i--) {
7894
+ if (messages[i].role === "assistant" && !messages[i].content) {
7895
+ for (let j = events.length - 1; j >= 0; j--) {
7896
+ const evt = events[j];
7897
+ if (!evt || evt.type !== "message_end") continue;
7898
+ const msg = evt.message;
7899
+ if (msg?.role !== "assistant") continue;
7900
+ const text = extractPiTextContent(msg.content);
7901
+ if (text) {
7902
+ messages[i] = { ...messages[i], content: text };
7903
+ break;
7904
+ }
7905
+ }
7906
+ break;
7907
+ }
7908
+ }
7909
+ }
7821
7910
  const eventToolCalls = extractToolCallsFromEvents(events);
7822
7911
  if (eventToolCalls.length > 0) {
7823
7912
  injectEventToolCalls(messages, eventToolCalls);
@@ -8002,17 +8091,43 @@ function formatTimeoutSuffix3(timeoutMs) {
8002
8091
  if (!timeoutMs || timeoutMs <= 0) return "";
8003
8092
  return ` after ${Math.ceil(timeoutMs / 1e3)}s`;
8004
8093
  }
8094
+ function resolveWindowsCmd(executable) {
8095
+ if (process.platform !== "win32") return [executable, []];
8096
+ const lower = executable.toLowerCase();
8097
+ if (lower.endsWith(".js") || lower.endsWith(".exe")) return [executable, []];
8098
+ let fullPath;
8099
+ try {
8100
+ fullPath = execSync(`where ${executable}`, { encoding: "utf-8" }).trim().split(/\r?\n/)[0].trim();
8101
+ } catch {
8102
+ return [executable, []];
8103
+ }
8104
+ const cmdPath = fullPath.endsWith(".cmd") ? fullPath : `${fullPath}.cmd`;
8105
+ try {
8106
+ const content = readFileSync2(cmdPath, "utf-8");
8107
+ const match = content.match(/"?%_prog%"?\s+"([^"]+\.js)"/);
8108
+ if (match) {
8109
+ const dp0 = path19.dirname(path19.resolve(cmdPath));
8110
+ const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${path19.sep}`);
8111
+ try {
8112
+ accessSync(scriptPath);
8113
+ return ["node", [scriptPath]];
8114
+ } catch {
8115
+ }
8116
+ }
8117
+ } catch {
8118
+ }
8119
+ return [executable, []];
8120
+ }
8005
8121
  async function defaultPiRunner(options) {
8006
8122
  return await new Promise((resolve, reject) => {
8007
8123
  const parts = options.executable.split(/\s+/);
8008
- const executable = parts[0];
8009
- const executableArgs = parts.slice(1);
8124
+ const [resolvedExe, prefixArgs] = resolveWindowsCmd(parts[0]);
8125
+ const executableArgs = [...prefixArgs, ...parts.slice(1)];
8010
8126
  const allArgs = [...executableArgs, ...options.args];
8011
- const child = spawn3(executable, allArgs, {
8127
+ const child = spawn3(resolvedExe, allArgs, {
8012
8128
  cwd: options.cwd,
8013
8129
  env: options.env,
8014
- stdio: ["pipe", "pipe", "pipe"],
8015
- shell: false
8130
+ stdio: ["pipe", "pipe", "pipe"]
8016
8131
  });
8017
8132
  let stdout = "";
8018
8133
  let stderr = "";
@@ -8067,9 +8182,9 @@ async function defaultPiRunner(options) {
8067
8182
  }
8068
8183
 
8069
8184
  // src/evaluation/providers/pi-coding-agent.ts
8070
- import { execSync } from "node:child_process";
8185
+ import { execSync as execSync2 } from "node:child_process";
8071
8186
  import { randomUUID as randomUUID8 } from "node:crypto";
8072
- import { accessSync, createWriteStream as createWriteStream6 } from "node:fs";
8187
+ import { accessSync as accessSync2, createWriteStream as createWriteStream6 } from "node:fs";
8073
8188
  import { mkdir as mkdir7 } from "node:fs/promises";
8074
8189
  import path20 from "node:path";
8075
8190
  import { createInterface } from "node:readline";
@@ -8097,7 +8212,7 @@ function findAgentvRoot() {
8097
8212
  for (let i = 0; i < 10; i++) {
8098
8213
  try {
8099
8214
  const pkg = path20.join(dir, "package.json");
8100
- accessSync(pkg);
8215
+ accessSync2(pkg);
8101
8216
  return dir;
8102
8217
  } catch {
8103
8218
  const parent = path20.dirname(dir);
@@ -8117,7 +8232,7 @@ async function doLoadSdkModules() {
8117
8232
  if (await promptInstall()) {
8118
8233
  const installDir = findAgentvRoot();
8119
8234
  console.error(`Installing @mariozechner/pi-coding-agent into ${installDir}...`);
8120
- execSync("bun add @mariozechner/pi-coding-agent", {
8235
+ execSync2("bun add @mariozechner/pi-coding-agent", {
8121
8236
  cwd: installDir,
8122
8237
  stdio: "inherit"
8123
8238
  });
@@ -8158,7 +8273,9 @@ async function loadSdkModules() {
8158
8273
  codingTools: piSdk.codingTools,
8159
8274
  toolMap,
8160
8275
  SessionManager: piSdk.SessionManager,
8161
- getModel: piAi.getModel
8276
+ getModel: piAi.getModel,
8277
+ // biome-ignore lint/suspicious/noExplicitAny: registerBuiltInApiProviders exists at runtime but not in type defs
8278
+ registerBuiltInApiProviders: piAi.registerBuiltInApiProviders
8162
8279
  };
8163
8280
  }
8164
8281
  var PiCodingAgentProvider = class {
@@ -8180,17 +8297,31 @@ var PiCodingAgentProvider = class {
8180
8297
  const startTime = (/* @__PURE__ */ new Date()).toISOString();
8181
8298
  const startMs = Date.now();
8182
8299
  const sdk = await loadSdkModules();
8300
+ sdk.registerBuiltInApiProviders();
8183
8301
  const logger = await this.createStreamLogger(request).catch(() => void 0);
8184
8302
  try {
8185
8303
  const cwd = this.resolveCwd(request.cwd);
8186
- const providerName = this.config.subprovider ?? "google";
8304
+ const rawProvider = this.config.subprovider ?? "google";
8305
+ const hasBaseUrl = !!this.config.baseUrl;
8306
+ const providerName = resolveSubprovider(rawProvider, hasBaseUrl);
8187
8307
  const modelId = this.config.model ?? "gemini-2.5-flash";
8188
- this.setApiKeyEnv(providerName);
8189
- const model = sdk.getModel(providerName, modelId);
8308
+ this.setApiKeyEnv(rawProvider, hasBaseUrl);
8309
+ this.setBaseUrlEnv(rawProvider, hasBaseUrl);
8310
+ let model = sdk.getModel(providerName, modelId);
8190
8311
  if (!model) {
8191
- throw new Error(
8192
- `pi-coding-agent: getModel('${providerName}', '${modelId}') returned undefined. The model '${modelId}' is not registered for provider '${providerName}' in pi-ai. Check that subprovider and model are correct in your target config.`
8193
- );
8312
+ const envProvider = providerName.replace(/-responses$/, "");
8313
+ model = {
8314
+ id: modelId,
8315
+ name: modelId,
8316
+ api: providerName,
8317
+ provider: envProvider,
8318
+ baseUrl: this.config.baseUrl ?? "",
8319
+ reasoning: false,
8320
+ input: ["text"],
8321
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
8322
+ contextWindow: 128e3,
8323
+ maxTokens: 16384
8324
+ };
8194
8325
  }
8195
8326
  const tools = this.resolveTools(sdk);
8196
8327
  const { session } = await sdk.createAgentSession({
@@ -8343,22 +8474,21 @@ ${fileList}`;
8343
8474
  }
8344
8475
  }
8345
8476
  /** Maps config apiKey to the provider-specific env var the SDK reads. */
8346
- setApiKeyEnv(providerName) {
8477
+ setApiKeyEnv(providerName, hasBaseUrl = false) {
8347
8478
  if (!this.config.apiKey) return;
8348
- const ENV_KEY_MAP = {
8349
- google: "GEMINI_API_KEY",
8350
- gemini: "GEMINI_API_KEY",
8351
- anthropic: "ANTHROPIC_API_KEY",
8352
- openai: "OPENAI_API_KEY",
8353
- groq: "GROQ_API_KEY",
8354
- xai: "XAI_API_KEY",
8355
- openrouter: "OPENROUTER_API_KEY"
8356
- };
8357
- const envKey = ENV_KEY_MAP[providerName.toLowerCase()];
8479
+ const envKey = resolveEnvKeyName(providerName, hasBaseUrl);
8358
8480
  if (envKey) {
8359
8481
  process.env[envKey] = this.config.apiKey;
8360
8482
  }
8361
8483
  }
8484
+ /** Maps config baseUrl to the provider-specific env var the SDK reads. */
8485
+ setBaseUrlEnv(providerName, hasBaseUrl = false) {
8486
+ if (!this.config.baseUrl) return;
8487
+ const envKey = resolveEnvBaseUrlName(providerName, hasBaseUrl);
8488
+ if (envKey) {
8489
+ process.env[envKey] = this.config.baseUrl;
8490
+ }
8491
+ }
8362
8492
  resolveCwd(cwdOverride) {
8363
8493
  if (cwdOverride) {
8364
8494
  return path20.resolve(cwdOverride);
@@ -10082,8 +10212,11 @@ function assertTargetDefinition(value, index, filePath) {
10082
10212
  `targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`
10083
10213
  );
10084
10214
  }
10085
- if (typeof provider !== "string" || provider.trim().length === 0) {
10086
- throw new Error(`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider'`);
10215
+ const hasUseTarget = typeof value.use_target === "string" && value.use_target.trim().length > 0;
10216
+ if (!hasUseTarget && (typeof provider !== "string" || provider.trim().length === 0)) {
10217
+ throw new Error(
10218
+ `targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider' (or use use_target for delegation)`
10219
+ );
10087
10220
  }
10088
10221
  return value;
10089
10222
  }
@@ -15490,7 +15623,7 @@ async function runEvaluation(options) {
15490
15623
  if (resolvedTargetsByName.has(name)) {
15491
15624
  return resolvedTargetsByName.get(name);
15492
15625
  }
15493
- const definition = targetDefinitions.get(name);
15626
+ const definition = resolveDelegatedTargetDefinition(name, targetDefinitions, envLookup);
15494
15627
  if (!definition) {
15495
15628
  return void 0;
15496
15629
  }
@@ -16498,6 +16631,7 @@ async function runEvalCase(options) {
16498
16631
  let attempt = 0;
16499
16632
  let providerResponse = cachedResponse;
16500
16633
  let lastError;
16634
+ let targetUsed;
16501
16635
  while (!providerResponse && attempt < attemptBudget) {
16502
16636
  try {
16503
16637
  providerResponse = await invokeProvider(provider, {
@@ -16520,25 +16654,33 @@ async function runEvalCase(options) {
16520
16654
  attempt += 1;
16521
16655
  continue;
16522
16656
  }
16523
- const errorResult = buildErrorResult(
16524
- evalCase,
16525
- target.name,
16526
- nowFn(),
16527
- error,
16528
- promptInputs,
16529
- provider,
16530
- "agent",
16531
- "provider_error",
16532
- verbose
16533
- );
16534
- if (workspacePath) {
16535
- if (forceCleanup) {
16536
- await cleanupWorkspace(workspacePath).catch(() => {
16537
- });
16538
- }
16539
- return { ...errorResult, workspacePath };
16657
+ break;
16658
+ }
16659
+ }
16660
+ if (!providerResponse && target.fallbackTargets?.length && targetResolver) {
16661
+ for (const fallbackName of target.fallbackTargets) {
16662
+ const fallbackProvider = targetResolver(fallbackName);
16663
+ if (!fallbackProvider) {
16664
+ continue;
16665
+ }
16666
+ try {
16667
+ providerResponse = await invokeProvider(fallbackProvider, {
16668
+ evalCase,
16669
+ target,
16670
+ promptInputs,
16671
+ attempt: 0,
16672
+ agentTimeoutMs,
16673
+ signal,
16674
+ cwd: workspacePath,
16675
+ workspaceFile: caseWorkspaceFile ?? suiteWorkspaceFile,
16676
+ captureFileChanges: !!baselineCommit,
16677
+ streamCallbacks: options.streamCallbacks
16678
+ });
16679
+ targetUsed = fallbackName;
16680
+ break;
16681
+ } catch (error) {
16682
+ lastError = error;
16540
16683
  }
16541
- return errorResult;
16542
16684
  }
16543
16685
  }
16544
16686
  if (!providerResponse) {
@@ -16664,8 +16806,10 @@ async function runEvalCase(options) {
16664
16806
  };
16665
16807
  const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
16666
16808
  const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, caseThreshold);
16809
+ const targetUsedField = targetUsed ? { targetUsed } : {};
16667
16810
  const finalResult = providerError ? {
16668
16811
  ...result,
16812
+ ...targetUsedField,
16669
16813
  evalRun,
16670
16814
  error: providerError,
16671
16815
  executionStatus,
@@ -16677,6 +16821,7 @@ async function runEvalCase(options) {
16677
16821
  afterEachOutput
16678
16822
  } : skippedEvaluatorError ? {
16679
16823
  ...result,
16824
+ ...targetUsedField,
16680
16825
  score: 0,
16681
16826
  evalRun,
16682
16827
  error: skippedEvaluatorError,
@@ -16689,6 +16834,7 @@ async function runEvalCase(options) {
16689
16834
  afterEachOutput
16690
16835
  } : {
16691
16836
  ...result,
16837
+ ...targetUsedField,
16692
16838
  evalRun,
16693
16839
  executionStatus,
16694
16840
  beforeAllOutput,
@@ -17566,7 +17712,7 @@ async function discoverDefaultTarget(repoRoot) {
17566
17712
  return null;
17567
17713
  }
17568
17714
  async function loadEnvHierarchy(repoRoot, startPath) {
17569
- const { readFileSync: readFileSync3 } = await import("node:fs");
17715
+ const { readFileSync: readFileSync4 } = await import("node:fs");
17570
17716
  const chain = buildDirectoryChain(startPath, repoRoot);
17571
17717
  const envFiles = [];
17572
17718
  for (const dir of chain) {
@@ -17575,7 +17721,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
17575
17721
  }
17576
17722
  for (let i = 0; i < envFiles.length; i++) {
17577
17723
  try {
17578
- const content = readFileSync3(envFiles[i], "utf8");
17724
+ const content = readFileSync4(envFiles[i], "utf8");
17579
17725
  for (const line of content.split("\n")) {
17580
17726
  const trimmed = line.trim();
17581
17727
  if (!trimmed || trimmed.startsWith("#")) continue;
@@ -17790,7 +17936,7 @@ function shouldSkipCacheForTemperature(targetConfig) {
17790
17936
  }
17791
17937
 
17792
17938
  // src/projects.ts
17793
- import { existsSync as existsSync6, mkdirSync, readFileSync as readFileSync2, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
17939
+ import { existsSync as existsSync6, mkdirSync, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
17794
17940
  import path47 from "node:path";
17795
17941
  import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
17796
17942
  function getProjectsRegistryPath() {
@@ -17802,7 +17948,7 @@ function loadProjectRegistry() {
17802
17948
  return { projects: [] };
17803
17949
  }
17804
17950
  try {
17805
- const raw = readFileSync2(registryPath, "utf-8");
17951
+ const raw = readFileSync3(registryPath, "utf-8");
17806
17952
  const parsed = parseYaml3(raw);
17807
17953
  if (!parsed || !Array.isArray(parsed.projects)) {
17808
17954
  return { projects: [] };
@@ -18839,6 +18985,7 @@ export {
18839
18985
  readTranscriptFile,
18840
18986
  removeProject,
18841
18987
  resolveAndCreateProvider,
18988
+ resolveDelegatedTargetDefinition,
18842
18989
  resolveFileReference,
18843
18990
  resolveTargetDefinition,
18844
18991
  resolveWorkspaceTemplate,