@agentv/core 3.10.3 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ import {
8
8
  isEvaluatorKind,
9
9
  loadCasesFromFile,
10
10
  resolveFileReference
11
- } from "../../chunk-VCFYWLFV.js";
11
+ } from "../../chunk-AVTN5AB7.js";
12
12
 
13
13
  // src/evaluation/validation/file-type.ts
14
14
  import { readFile } from "node:fs/promises";
package/dist/index.cjs CHANGED
@@ -1854,6 +1854,64 @@ var import_node_path8 = __toESM(require("path"), 1);
1854
1854
  var import_micromatch2 = __toESM(require("micromatch"), 1);
1855
1855
  var import_yaml4 = require("yaml");
1856
1856
 
1857
+ // src/evaluation/input-message-utils.ts
1858
+ function flattenInputMessages(messages) {
1859
+ return messages.flatMap((message) => extractContentSegments(message.content));
1860
+ }
1861
+ function collectResolvedInputFilePaths(messages) {
1862
+ const filePaths = [];
1863
+ for (const message of messages) {
1864
+ if (!Array.isArray(message.content)) {
1865
+ continue;
1866
+ }
1867
+ for (const segment of message.content) {
1868
+ if (isJsonObject(segment) && segment.type === "file" && typeof segment.resolvedPath === "string") {
1869
+ filePaths.push(segment.resolvedPath);
1870
+ }
1871
+ }
1872
+ }
1873
+ return filePaths;
1874
+ }
1875
+ function extractContentSegments(content) {
1876
+ if (typeof content === "string") {
1877
+ return content.trim().length > 0 ? [{ type: "text", value: content }] : [];
1878
+ }
1879
+ if (isJsonObject(content)) {
1880
+ const rendered = JSON.stringify(content, null, 2);
1881
+ return rendered.trim().length > 0 ? [{ type: "text", value: rendered }] : [];
1882
+ }
1883
+ if (!Array.isArray(content)) {
1884
+ return [];
1885
+ }
1886
+ const segments = [];
1887
+ for (const segment of content) {
1888
+ if (!isJsonObject(segment)) {
1889
+ continue;
1890
+ }
1891
+ segments.push(cloneJsonObject(segment));
1892
+ }
1893
+ return segments;
1894
+ }
1895
+ function cloneJsonObject(source) {
1896
+ const entries = Object.entries(source).map(([key, value]) => [key, cloneJsonValue(value)]);
1897
+ return Object.fromEntries(entries);
1898
+ }
1899
+ function cloneJsonValue(value) {
1900
+ if (value === null) {
1901
+ return null;
1902
+ }
1903
+ if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
1904
+ return value;
1905
+ }
1906
+ if (Array.isArray(value)) {
1907
+ return value.map((item) => cloneJsonValue(item));
1908
+ }
1909
+ if (typeof value === "object") {
1910
+ return cloneJsonObject(value);
1911
+ }
1912
+ return value;
1913
+ }
1914
+
1857
1915
  // src/evaluation/interpolation.ts
1858
1916
  var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
1859
1917
  function interpolateEnv(value, env) {
@@ -1941,7 +1999,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
1941
1999
  id: String(id),
1942
2000
  question: prompt,
1943
2001
  input: [{ role: "user", content: prompt }],
1944
- input_segments: [{ type: "text", value: prompt }],
1945
2002
  expected_output: evalCase.expected_output ? [{ role: "assistant", content: evalCase.expected_output }] : [],
1946
2003
  reference_answer: evalCase.expected_output,
1947
2004
  file_paths: filePaths,
@@ -2194,7 +2251,7 @@ async function loadConfig(evalFilePath, repoRoot) {
2194
2251
  }
2195
2252
  try {
2196
2253
  const rawConfig = await (0, import_promises4.readFile)(configPath, "utf8");
2197
- const parsed = (0, import_yaml2.parse)(rawConfig);
2254
+ const parsed = interpolateEnv((0, import_yaml2.parse)(rawConfig), process.env);
2198
2255
  if (!isJsonObject(parsed)) {
2199
2256
  logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
2200
2257
  continue;
@@ -2412,6 +2469,27 @@ function parseExecutionDefaults(raw, configPath) {
2412
2469
  } else if (otelFile !== void 0) {
2413
2470
  logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
2414
2471
  }
2472
+ if (typeof obj.export_otel === "boolean") {
2473
+ result.export_otel = obj.export_otel;
2474
+ } else if (obj.export_otel !== void 0) {
2475
+ logWarning(`Invalid execution.export_otel in ${configPath}, expected boolean`);
2476
+ }
2477
+ const otelBackend = obj.otel_backend;
2478
+ if (typeof otelBackend === "string" && otelBackend.trim().length > 0) {
2479
+ result.otel_backend = otelBackend.trim();
2480
+ } else if (otelBackend !== void 0) {
2481
+ logWarning(`Invalid execution.otel_backend in ${configPath}, expected non-empty string`);
2482
+ }
2483
+ if (typeof obj.otel_capture_content === "boolean") {
2484
+ result.otel_capture_content = obj.otel_capture_content;
2485
+ } else if (obj.otel_capture_content !== void 0) {
2486
+ logWarning(`Invalid execution.otel_capture_content in ${configPath}, expected boolean`);
2487
+ }
2488
+ if (typeof obj.otel_group_turns === "boolean") {
2489
+ result.otel_group_turns = obj.otel_group_turns;
2490
+ } else if (obj.otel_group_turns !== void 0) {
2491
+ logWarning(`Invalid execution.otel_group_turns in ${configPath}, expected boolean`);
2492
+ }
2415
2493
  if (typeof obj.pool_workspaces === "boolean") {
2416
2494
  result.pool_workspaces = obj.pool_workspaces;
2417
2495
  } else if (obj.pool_workspaces !== void 0) {
@@ -3882,27 +3960,28 @@ var ANSI_YELLOW5 = "\x1B[33m";
3882
3960
  var ANSI_RESET6 = "\x1B[0m";
3883
3961
  async function processMessages(options) {
3884
3962
  const { messages, searchRoots, repoRootPath, textParts, messageType, verbose } = options;
3885
- const segments = [];
3963
+ const processedMessages = [];
3886
3964
  for (const message of messages) {
3887
3965
  const content = message.content;
3888
3966
  if (typeof content === "string") {
3889
- segments.push({ type: "text", value: content });
3890
3967
  if (textParts) {
3891
3968
  textParts.push(content);
3892
3969
  }
3970
+ processedMessages.push({ ...message, content });
3893
3971
  continue;
3894
3972
  }
3895
3973
  if (isJsonObject(content)) {
3896
3974
  const rendered = JSON.stringify(content, null, 2);
3897
- segments.push({ type: "text", value: rendered });
3898
3975
  if (textParts) {
3899
3976
  textParts.push(rendered);
3900
3977
  }
3978
+ processedMessages.push({ ...message, content: cloneJsonObject(content) });
3901
3979
  continue;
3902
3980
  }
3903
3981
  if (!Array.isArray(content)) {
3904
3982
  continue;
3905
3983
  }
3984
+ const processedContent = [];
3906
3985
  for (const rawSegment of content) {
3907
3986
  if (!isJsonObject(rawSegment)) {
3908
3987
  continue;
@@ -3925,8 +4004,8 @@ async function processMessages(options) {
3925
4004
  }
3926
4005
  try {
3927
4006
  const fileContent = (await (0, import_promises6.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
3928
- segments.push({
3929
- type: "file",
4007
+ processedContent.push({
4008
+ ...cloneJsonObject(rawSegment),
3930
4009
  path: displayPath,
3931
4010
  text: fileContent,
3932
4011
  resolvedPath: import_node_path6.default.resolve(resolvedPath)
@@ -3943,37 +4022,19 @@ async function processMessages(options) {
3943
4022
  continue;
3944
4023
  }
3945
4024
  const clonedSegment = cloneJsonObject(rawSegment);
3946
- segments.push(clonedSegment);
4025
+ processedContent.push(clonedSegment);
3947
4026
  const inlineValue = clonedSegment.value;
3948
4027
  if (typeof inlineValue === "string" && textParts) {
3949
4028
  textParts.push(inlineValue);
3950
4029
  }
3951
4030
  }
4031
+ processedMessages.push({ ...message, content: processedContent });
3952
4032
  }
3953
- return segments;
4033
+ return processedMessages;
3954
4034
  }
3955
4035
  function asString3(value) {
3956
4036
  return typeof value === "string" ? value : void 0;
3957
4037
  }
3958
- function cloneJsonObject(source) {
3959
- const entries = Object.entries(source).map(([key, value]) => [key, cloneJsonValue(value)]);
3960
- return Object.fromEntries(entries);
3961
- }
3962
- function cloneJsonValue(value) {
3963
- if (value === null) {
3964
- return null;
3965
- }
3966
- if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
3967
- return value;
3968
- }
3969
- if (Array.isArray(value)) {
3970
- return value.map((item) => cloneJsonValue(item));
3971
- }
3972
- if (typeof value === "object") {
3973
- return cloneJsonObject(value);
3974
- }
3975
- return value;
3976
- }
3977
4038
  function logWarning3(message, details) {
3978
4039
  if (details && details.length > 0) {
3979
4040
  const detailBlock = details.join("\n");
@@ -4222,10 +4283,10 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
4222
4283
  );
4223
4284
  }
4224
4285
  }
4225
- const inputMessages = resolveInputMessages(evalcase);
4286
+ const rawInputMessages = resolveInputMessages(evalcase);
4226
4287
  const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
4227
4288
  const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
4228
- if (!id || !hasEvaluationSpec || !inputMessages || inputMessages.length === 0) {
4289
+ if (!id || !hasEvaluationSpec || !rawInputMessages || rawInputMessages.length === 0) {
4229
4290
  logError2(
4230
4291
  `Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
4231
4292
  );
@@ -4233,8 +4294,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
4233
4294
  }
4234
4295
  const hasExpectedMessages = expectedMessages.length > 0;
4235
4296
  const inputTextParts = [];
4236
- const inputSegments = await processMessages({
4237
- messages: inputMessages,
4297
+ const inputMessages = await processMessages({
4298
+ messages: rawInputMessages,
4238
4299
  searchRoots,
4239
4300
  repoRootPath,
4240
4301
  textParts: inputTextParts,
@@ -4280,19 +4341,13 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
4280
4341
  }
4281
4342
  }
4282
4343
  warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
4283
- const userFilePaths = [];
4284
- for (const segment of inputSegments) {
4285
- if (segment.type === "file" && typeof segment.resolvedPath === "string") {
4286
- userFilePaths.push(segment.resolvedPath);
4287
- }
4288
- }
4344
+ const userFilePaths = collectResolvedInputFilePaths(inputMessages);
4289
4345
  const testCase = {
4290
4346
  id,
4291
4347
  eval_set: evalSetName,
4292
4348
  conversation_id: conversationId,
4293
4349
  question,
4294
4350
  input: inputMessages,
4295
- input_segments: inputSegments,
4296
4351
  expected_output: outputSegments,
4297
4352
  reference_answer: referenceAnswer,
4298
4353
  file_paths: userFilePaths,
@@ -4358,50 +4413,9 @@ function parseMetadata(suite) {
4358
4413
 
4359
4414
  // src/evaluation/formatting/prompt-builder.ts
4360
4415
  async function buildPromptInputs(testCase, mode = "lm") {
4361
- const segmentsByMessage = [];
4362
- const fileContentsByPath = /* @__PURE__ */ new Map();
4363
- for (const segment of testCase.input_segments) {
4364
- if (segment.type === "file" && typeof segment.path === "string" && typeof segment.text === "string") {
4365
- fileContentsByPath.set(segment.path, segment.text);
4366
- }
4367
- }
4368
- for (const message of testCase.input) {
4369
- const messageSegments = [];
4370
- if (typeof message.content === "string") {
4371
- if (message.content.trim().length > 0) {
4372
- messageSegments.push({ type: "text", value: message.content });
4373
- }
4374
- } else if (Array.isArray(message.content)) {
4375
- for (const segment of message.content) {
4376
- if (typeof segment === "string") {
4377
- if (segment.trim().length > 0) {
4378
- messageSegments.push({ type: "text", value: segment });
4379
- }
4380
- } else if (isJsonObject(segment)) {
4381
- const type = asString5(segment.type);
4382
- if (type === "file") {
4383
- const value = asString5(segment.value);
4384
- if (!value) continue;
4385
- const fileText = fileContentsByPath.get(value);
4386
- if (fileText !== void 0) {
4387
- messageSegments.push({ type: "file", text: fileText, path: value });
4388
- }
4389
- } else if (type === "text") {
4390
- const textValue = asString5(segment.value);
4391
- if (textValue && textValue.trim().length > 0) {
4392
- messageSegments.push({ type: "text", value: textValue });
4393
- }
4394
- }
4395
- }
4396
- }
4397
- } else if (isJsonObject(message.content)) {
4398
- const rendered = JSON.stringify(message.content, null, 2);
4399
- if (rendered.trim().length > 0) {
4400
- messageSegments.push({ type: "text", value: rendered });
4401
- }
4402
- }
4403
- segmentsByMessage.push(messageSegments);
4404
- }
4416
+ const segmentsByMessage = testCase.input.map(
4417
+ (message) => extractContentSegments(message.content)
4418
+ );
4405
4419
  const useRoleMarkers = needsRoleMarkers(testCase.input, segmentsByMessage);
4406
4420
  let question;
4407
4421
  if (useRoleMarkers) {
@@ -4429,7 +4443,7 @@ ${messageContent}`);
4429
4443
  question = messageParts.join("\n\n");
4430
4444
  } else {
4431
4445
  const questionParts = [];
4432
- for (const segment of testCase.input_segments) {
4446
+ for (const segment of flattenInputMessages(testCase.input)) {
4433
4447
  const formattedContent = formatSegment(segment, mode);
4434
4448
  if (formattedContent) {
4435
4449
  questionParts.push(formattedContent);
@@ -4516,9 +4530,6 @@ function buildChatPromptFromSegments(options) {
4516
4530
  }
4517
4531
  return chatPrompt.length > 0 ? chatPrompt : void 0;
4518
4532
  }
4519
- function asString5(value) {
4520
- return typeof value === "string" ? value : void 0;
4521
- }
4522
4533
 
4523
4534
  // src/evaluation/yaml-parser.ts
4524
4535
  var ANSI_YELLOW7 = "\x1B[33m";
@@ -4601,7 +4612,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4601
4612
  throw new Error(`Invalid test file format: ${evalFilePath}`);
4602
4613
  }
4603
4614
  const suite = interpolated;
4604
- const evalSetNameFromSuite = asString6(suite.name)?.trim();
4615
+ const evalSetNameFromSuite = asString5(suite.name)?.trim();
4605
4616
  const fallbackEvalSet = import_node_path8.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
4606
4617
  const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
4607
4618
  const rawTestcases = resolveTests(suite);
@@ -4620,7 +4631,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4620
4631
  const suiteInputMessages = expandInputShorthand(suite.input);
4621
4632
  const suiteInputFiles = suite.input_files;
4622
4633
  const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
4623
- const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
4634
+ const _globalTarget = asString5(rawGlobalExecution?.target) ?? asString5(suite.target);
4624
4635
  const suiteAssertions = suite.assertions ?? suite.assert;
4625
4636
  if (suite.assert !== void 0 && suite.assertions === void 0) {
4626
4637
  logWarning5("'assert' is deprecated at the suite level. Use 'assertions' instead.");
@@ -4633,17 +4644,17 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4633
4644
  continue;
4634
4645
  }
4635
4646
  const evalcase = rawEvalcase;
4636
- const id = asString6(evalcase.id);
4647
+ const id = asString5(evalcase.id);
4637
4648
  if (filterPattern && (!id || !import_micromatch2.default.isMatch(id, filterPattern))) {
4638
4649
  continue;
4639
4650
  }
4640
- const conversationId = asString6(evalcase.conversation_id);
4641
- let outcome = asString6(evalcase.criteria);
4651
+ const conversationId = asString5(evalcase.conversation_id);
4652
+ let outcome = asString5(evalcase.criteria);
4642
4653
  if (!outcome && evalcase.expected_outcome !== void 0) {
4643
- outcome = asString6(evalcase.expected_outcome);
4654
+ outcome = asString5(evalcase.expected_outcome);
4644
4655
  if (outcome) {
4645
4656
  logWarning5(
4646
- `Test '${asString6(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
4657
+ `Test '${asString5(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
4647
4658
  );
4648
4659
  }
4649
4660
  }
@@ -4660,10 +4671,9 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4660
4671
  continue;
4661
4672
  }
4662
4673
  const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
4663
- const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
4664
4674
  const hasExpectedMessages = expectedMessages.length > 0;
4665
4675
  const inputTextParts = [];
4666
- const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
4676
+ const suiteResolvedInputMessages = effectiveSuiteInputMessages ? await processMessages({
4667
4677
  messages: effectiveSuiteInputMessages,
4668
4678
  searchRoots,
4669
4679
  repoRootPath,
@@ -4671,7 +4681,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4671
4681
  messageType: "input",
4672
4682
  verbose
4673
4683
  }) : [];
4674
- const testInputSegments = await processMessages({
4684
+ const testResolvedInputMessages = await processMessages({
4675
4685
  messages: testInputMessages,
4676
4686
  searchRoots,
4677
4687
  repoRootPath,
@@ -4679,7 +4689,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4679
4689
  messageType: "input",
4680
4690
  verbose
4681
4691
  });
4682
- const inputSegments = [...suiteInputSegments, ...testInputSegments];
4692
+ const inputMessages = [...suiteResolvedInputMessages, ...testResolvedInputMessages];
4683
4693
  const outputSegments = hasExpectedMessages ? await processExpectedMessages({
4684
4694
  messages: expectedMessages,
4685
4695
  searchRoots,
@@ -4717,12 +4727,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4717
4727
  }
4718
4728
  }
4719
4729
  warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
4720
- const userFilePaths = [];
4721
- for (const segment of inputSegments) {
4722
- if (segment.type === "file" && typeof segment.resolvedPath === "string") {
4723
- userFilePaths.push(segment.resolvedPath);
4724
- }
4725
- }
4730
+ const userFilePaths = collectResolvedInputFilePaths(inputMessages);
4726
4731
  const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
4727
4732
  const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
4728
4733
  const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
@@ -4733,7 +4738,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4733
4738
  conversation_id: conversationId,
4734
4739
  question,
4735
4740
  input: inputMessages,
4736
- input_segments: inputSegments,
4737
4741
  expected_output: outputSegments,
4738
4742
  reference_answer: referenceAnswer,
4739
4743
  file_paths: userFilePaths,
@@ -4942,7 +4946,7 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
4942
4946
  path: caseLevel.path ?? suiteLevel.path
4943
4947
  };
4944
4948
  }
4945
- function asString6(value) {
4949
+ function asString5(value) {
4946
4950
  return typeof value === "string" ? value : void 0;
4947
4951
  }
4948
4952
  function logWarning5(message, details) {
@@ -8813,7 +8817,7 @@ var PiAgentSdkProvider = class {
8813
8817
  const { Agent, getModel, getEnvApiKey } = await loadPiModules();
8814
8818
  const startTimeIso = (/* @__PURE__ */ new Date()).toISOString();
8815
8819
  const startMs = Date.now();
8816
- const providerName = this.config.provider ?? "anthropic";
8820
+ const providerName = this.config.subprovider ?? "anthropic";
8817
8821
  const modelId = this.config.model ?? "claude-sonnet-4-20250514";
8818
8822
  const model = getModel(providerName, modelId);
8819
8823
  const systemPrompt = this.config.systemPrompt ?? "Answer directly and concisely.";
@@ -8925,7 +8929,7 @@ var PiAgentSdkProvider = class {
8925
8929
  messages: agentMessages,
8926
8930
  systemPrompt,
8927
8931
  model: this.config.model,
8928
- provider: this.config.provider
8932
+ subprovider: this.config.subprovider
8929
8933
  },
8930
8934
  output,
8931
8935
  tokenUsage,
@@ -9161,8 +9165,8 @@ var PiCodingAgentProvider = class {
9161
9165
  }
9162
9166
  buildPiArgs(prompt, inputFiles, _captureFileChanges) {
9163
9167
  const args = [];
9164
- if (this.config.provider) {
9165
- args.push("--provider", this.config.provider);
9168
+ if (this.config.subprovider) {
9169
+ args.push("--provider", this.config.subprovider);
9166
9170
  }
9167
9171
  if (this.config.model) {
9168
9172
  args.push("--model", this.config.model);
@@ -9220,7 +9224,7 @@ ${prompt}` : prompt;
9220
9224
  buildEnv() {
9221
9225
  const env = { ...process.env };
9222
9226
  if (this.config.apiKey) {
9223
- const provider = this.config.provider?.toLowerCase() ?? "google";
9227
+ const provider = this.config.subprovider?.toLowerCase() ?? "google";
9224
9228
  switch (provider) {
9225
9229
  case "google":
9226
9230
  case "gemini":
@@ -10531,7 +10535,7 @@ function normalizeCopilotLogFormat(value) {
10531
10535
  }
10532
10536
  function resolvePiCodingAgentConfig(target, env, evalFilePath) {
10533
10537
  const executableSource = target.executable ?? target.command ?? target.binary;
10534
- const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
10538
+ const subproviderSource = target.subprovider;
10535
10539
  const modelSource = target.model ?? target.pi_model ?? target.piModel;
10536
10540
  const apiKeySource = target.api_key ?? target.apiKey;
10537
10541
  const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
@@ -10547,10 +10551,15 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
10547
10551
  allowLiteral: true,
10548
10552
  optionalEnv: true
10549
10553
  }) ?? "pi";
10550
- const provider = resolveOptionalString(providerSource, env, `${target.name} pi provider`, {
10551
- allowLiteral: true,
10552
- optionalEnv: true
10553
- });
10554
+ const subprovider = resolveOptionalString(
10555
+ subproviderSource,
10556
+ env,
10557
+ `${target.name} pi subprovider`,
10558
+ {
10559
+ allowLiteral: true,
10560
+ optionalEnv: true
10561
+ }
10562
+ );
10554
10563
  const model = resolveOptionalString(modelSource, env, `${target.name} pi model`, {
10555
10564
  allowLiteral: true,
10556
10565
  optionalEnv: true
@@ -10598,7 +10607,7 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
10598
10607
  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
10599
10608
  return {
10600
10609
  executable,
10601
- provider,
10610
+ subprovider,
10602
10611
  model,
10603
10612
  apiKey,
10604
10613
  tools,
@@ -10613,15 +10622,15 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
10613
10622
  };
10614
10623
  }
10615
10624
  function resolvePiAgentSdkConfig(target, env) {
10616
- const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
10625
+ const subproviderSource = target.subprovider;
10617
10626
  const modelSource = target.model ?? target.pi_model ?? target.piModel;
10618
10627
  const apiKeySource = target.api_key ?? target.apiKey;
10619
10628
  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
10620
10629
  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
10621
- const provider = resolveOptionalString(
10622
- providerSource,
10630
+ const subprovider = resolveOptionalString(
10631
+ subproviderSource,
10623
10632
  env,
10624
- `${target.name} pi-agent-sdk provider`,
10633
+ `${target.name} pi-agent-sdk subprovider`,
10625
10634
  {
10626
10635
  allowLiteral: true,
10627
10636
  optionalEnv: true
@@ -10638,7 +10647,7 @@ function resolvePiAgentSdkConfig(target, env) {
10638
10647
  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi-agent-sdk timeout`);
10639
10648
  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
10640
10649
  return {
10641
- provider,
10650
+ subprovider,
10642
10651
  model,
10643
10652
  apiKey,
10644
10653
  timeoutMs,
@@ -13300,7 +13309,8 @@ var freeformEvaluationSchema = import_zod4.z.object({
13300
13309
  passed: import_zod4.z.boolean().describe("Whether this aspect was satisfied"),
13301
13310
  evidence: import_zod4.z.string().describe("Concise evidence (1-2 sentences)").optional()
13302
13311
  })
13303
- ).describe("Per-aspect evaluation results \u2014 one entry per aspect checked").optional()
13312
+ ).describe("Per-aspect evaluation results \u2014 one entry per aspect checked").optional(),
13313
+ details: import_zod4.z.record(import_zod4.z.unknown()).describe("Optional structured metadata for domain-specific metrics").optional()
13304
13314
  });
13305
13315
  var rubricCheckResultSchema = import_zod4.z.object({
13306
13316
  id: import_zod4.z.string().describe("The ID of the rubric item being checked"),
@@ -13362,7 +13372,7 @@ var LlmGraderEvaluator = class {
13362
13372
  async evaluateFreeform(context2, graderProvider) {
13363
13373
  const formattedQuestion = context2.promptInputs.question && context2.promptInputs.question.trim().length > 0 ? context2.promptInputs.question : context2.evalCase.question;
13364
13374
  const variables = {
13365
- [TEMPLATE_VARIABLES.INPUT]: JSON.stringify(context2.evalCase.input_segments, null, 2),
13375
+ [TEMPLATE_VARIABLES.INPUT]: JSON.stringify(context2.evalCase.input, null, 2),
13366
13376
  [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: JSON.stringify(
13367
13377
  context2.evalCase.expected_output,
13368
13378
  null,
@@ -13405,6 +13415,7 @@ ${context2.fileChanges}`;
13405
13415
  expectedAspectCount: Math.max(assertions.length, 1),
13406
13416
  evaluatorRawRequest,
13407
13417
  graderTarget: graderProvider.targetName,
13418
+ details: data.details,
13408
13419
  tokenUsage
13409
13420
  };
13410
13421
  } catch (e) {
@@ -13824,7 +13835,7 @@ ${outputSchema}`;
13824
13835
  expectedAspectCount: Math.max(assertions.length, 1),
13825
13836
  evaluatorRawRequest,
13826
13837
  graderTarget,
13827
- details
13838
+ details: data.details && Object.keys(data.details).length > 0 ? { ...details, ...data.details } : details
13828
13839
  };
13829
13840
  } catch {
13830
13841
  return {
@@ -13971,7 +13982,8 @@ function buildOutputSchema() {
13971
13982
  ' "passed": <boolean>,',
13972
13983
  ' "evidence": "<concise evidence, 1-2 sentences, optional>"',
13973
13984
  " }",
13974
- " ]",
13985
+ " ],",
13986
+ ' "details": {<optional object with domain-specific structured metrics>}',
13975
13987
  "}"
13976
13988
  ].join("\n");
13977
13989
  }
@@ -15335,7 +15347,7 @@ function assembleLlmGraderPrompt(input) {
15335
15347
  function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, evaluatorTemplateOverride) {
15336
15348
  const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
15337
15349
  const variables = {
15338
- [TEMPLATE_VARIABLES.INPUT]: JSON.stringify(evalCase.input_segments, null, 2),
15350
+ [TEMPLATE_VARIABLES.INPUT]: JSON.stringify(evalCase.input, null, 2),
15339
15351
  [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: JSON.stringify(evalCase.expected_output, null, 2),
15340
15352
  [TEMPLATE_VARIABLES.OUTPUT]: JSON.stringify([], null, 2),
15341
15353
  [TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
@@ -17616,6 +17628,18 @@ var QUALITY_PASS_THRESHOLD = 0.8;
17616
17628
  function classifyQualityStatus(score) {
17617
17629
  return score >= QUALITY_PASS_THRESHOLD ? "ok" : "quality_failure";
17618
17630
  }
17631
+ function buildSkippedEvaluatorError(scores) {
17632
+ const skippedScores = scores?.filter((score) => score.verdict === "skip") ?? [];
17633
+ if (skippedScores.length === 0) {
17634
+ return void 0;
17635
+ }
17636
+ const messages = skippedScores.map((score) => {
17637
+ const label = score.name || score.type;
17638
+ const assertionMessage = score.assertions.find((assertion) => !assertion.passed)?.text ?? "Evaluator skipped";
17639
+ return `${label}: ${assertionMessage}`;
17640
+ });
17641
+ return messages.length === 1 ? messages[0] : `Evaluators skipped: ${messages.join(" | ")}`;
17642
+ }
17619
17643
  function usesFileReferencePrompt(provider) {
17620
17644
  return isAgentProvider(provider) || provider.kind === "cli";
17621
17645
  }
@@ -18880,7 +18904,8 @@ async function runEvalCase(options) {
18880
18904
  durationMs: totalDurationMs,
18881
18905
  ...evalRunTokenUsage ? { tokenUsage: evalRunTokenUsage } : {}
18882
18906
  };
18883
- const executionStatus = providerError ? "execution_error" : classifyQualityStatus(result.score);
18907
+ const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
18908
+ const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score);
18884
18909
  const finalResult = providerError ? {
18885
18910
  ...result,
18886
18911
  evalRun,
@@ -18892,7 +18917,26 @@ async function runEvalCase(options) {
18892
18917
  beforeAllOutput,
18893
18918
  beforeEachOutput,
18894
18919
  afterEachOutput
18895
- } : { ...result, evalRun, executionStatus, beforeAllOutput, beforeEachOutput, afterEachOutput };
18920
+ } : skippedEvaluatorError ? {
18921
+ ...result,
18922
+ score: 0,
18923
+ evalRun,
18924
+ error: skippedEvaluatorError,
18925
+ executionStatus,
18926
+ failureStage: "evaluator",
18927
+ failureReasonCode: "evaluator_error",
18928
+ executionError: { message: skippedEvaluatorError, stage: "evaluator" },
18929
+ beforeAllOutput,
18930
+ beforeEachOutput,
18931
+ afterEachOutput
18932
+ } : {
18933
+ ...result,
18934
+ evalRun,
18935
+ executionStatus,
18936
+ beforeAllOutput,
18937
+ beforeEachOutput,
18938
+ afterEachOutput
18939
+ };
18896
18940
  const isFailure = !!finalResult.error || finalResult.score < 0.5;
18897
18941
  if (workspacePath && !isSharedWorkspace) {
18898
18942
  if (forceCleanup) {
@@ -19637,11 +19681,6 @@ async function evaluate(config) {
19637
19681
  evalCases = (config.tests ?? []).map((test) => {
19638
19682
  const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
19639
19683
  const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
19640
- const inputSegments = input.map((m) => ({
19641
- type: "text",
19642
- value: typeof m.content === "string" ? m.content : JSON.stringify(m.content),
19643
- messageIndex: 0
19644
- }));
19645
19684
  const expectedOutputValue = test.expectedOutput ?? test.expected_output;
19646
19685
  const expectedOutput = expectedOutputValue ? [
19647
19686
  { role: "assistant", content: expectedOutputValue }
@@ -19670,7 +19709,6 @@ async function evaluate(config) {
19670
19709
  criteria: test.criteria ?? "",
19671
19710
  question: String(question),
19672
19711
  input,
19673
- input_segments: inputSegments,
19674
19712
  expected_output: expectedOutput,
19675
19713
  reference_answer: expectedOutputValue,
19676
19714
  file_paths: [],