agentv 4.35.1 → 4.36.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/{artifact-writer-G57MG52C.js → artifact-writer-3YRN6YTA.js} +4 -4
  2. package/dist/{chunk-CRMGUVRZ.js → chunk-4M6FAQTW.js} +85 -19
  3. package/dist/chunk-4M6FAQTW.js.map +1 -0
  4. package/dist/{chunk-INOKS5LF.js → chunk-7KZ2AF26.js} +269 -57
  5. package/dist/chunk-7KZ2AF26.js.map +1 -0
  6. package/dist/{chunk-KJGYL3M3.js → chunk-HVBAVOAH.js} +72 -50
  7. package/dist/chunk-HVBAVOAH.js.map +1 -0
  8. package/dist/{chunk-KNF3AGCI.js → chunk-P5JONEWJ.js} +231 -35
  9. package/dist/chunk-P5JONEWJ.js.map +1 -0
  10. package/dist/{chunk-6QEIZ33V.js → chunk-TUTURE2B.js} +1227 -372
  11. package/dist/chunk-TUTURE2B.js.map +1 -0
  12. package/dist/cli.js +5 -5
  13. package/dist/dashboard/assets/index-DA96FAM5.js +119 -0
  14. package/dist/dashboard/assets/{index-Bdk-9a_8.js → index-l4t97uO8.js} +1 -1
  15. package/dist/dashboard/assets/index-nmrFBoNd.css +1 -0
  16. package/dist/dashboard/index.html +2 -2
  17. package/dist/{dist-M4B77IW4.js → dist-BSFUYS54.js} +73 -3
  18. package/dist/index.js +5 -5
  19. package/dist/{interactive-VYQ5SYMR.js → interactive-IEC63EVP.js} +5 -5
  20. package/dist/skills/agentv-eval-writer/SKILL.md +6 -0
  21. package/dist/{ts-eval-loader-EQJX3OLT-THE7D3GR.js → ts-eval-loader-4DU65XGW-YM47FFG2.js} +2 -2
  22. package/package.json +1 -1
  23. package/dist/chunk-6QEIZ33V.js.map +0 -1
  24. package/dist/chunk-CRMGUVRZ.js.map +0 -1
  25. package/dist/chunk-INOKS5LF.js.map +0 -1
  26. package/dist/chunk-KJGYL3M3.js.map +0 -1
  27. package/dist/chunk-KNF3AGCI.js.map +0 -1
  28. package/dist/dashboard/assets/index-BPMAZqjE.css +0 -1
  29. package/dist/dashboard/assets/index-BWO0UcxG.js +0 -118
  30. /package/dist/{artifact-writer-G57MG52C.js.map → artifact-writer-3YRN6YTA.js.map} +0 -0
  31. /package/dist/{dist-M4B77IW4.js.map → dist-BSFUYS54.js.map} +0 -0
  32. /package/dist/{interactive-VYQ5SYMR.js.map → interactive-IEC63EVP.js.map} +0 -0
  33. /package/dist/{ts-eval-loader-EQJX3OLT-THE7D3GR.js.map → ts-eval-loader-4DU65XGW-YM47FFG2.js.map} +0 -0
@@ -493,8 +493,8 @@ function getErrorMap() {
493
493
 
494
494
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
495
495
  var makeIssue = (params) => {
496
- const { data, path: path47, errorMaps, issueData } = params;
497
- const fullPath = [...path47, ...issueData.path || []];
496
+ const { data, path: path48, errorMaps, issueData } = params;
497
+ const fullPath = [...path48, ...issueData.path || []];
498
498
  const fullIssue = {
499
499
  ...issueData,
500
500
  path: fullPath
@@ -610,11 +610,11 @@ var errorUtil;
610
610
 
611
611
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
612
612
  var ParseInputLazyPath = class {
613
- constructor(parent, value, path47, key) {
613
+ constructor(parent, value, path48, key) {
614
614
  this._cachedPath = [];
615
615
  this.parent = parent;
616
616
  this.data = value;
617
- this._path = path47;
617
+ this._path = path48;
618
618
  this._key = key;
619
619
  }
620
620
  get path() {
@@ -4056,7 +4056,7 @@ var coerce = {
4056
4056
  };
4057
4057
  var NEVER = INVALID;
4058
4058
 
4059
- // ../../packages/core/dist/chunk-EW5X2RGJ.js
4059
+ // ../../packages/core/dist/chunk-GPRJDMQ6.js
4060
4060
  import { parse } from "yaml";
4061
4061
  import os from "node:os";
4062
4062
  import path from "node:path";
@@ -4348,7 +4348,8 @@ var KNOWN_PROVIDERS = [
4348
4348
  "vscode",
4349
4349
  "vscode-insiders",
4350
4350
  "agentv",
4351
- "transcript"
4351
+ "transcript",
4352
+ "replay"
4352
4353
  ];
4353
4354
  var PROVIDER_ALIASES = [
4354
4355
  "azure-openai",
@@ -4890,6 +4891,12 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath, op
4890
4891
  ...base,
4891
4892
  config: resolveCliConfig(parsed, env, evalFilePath)
4892
4893
  };
4894
+ case "replay":
4895
+ return {
4896
+ kind: "replay",
4897
+ ...base,
4898
+ config: resolveReplayConfig(parsed, env, evalFilePath)
4899
+ };
4893
4900
  default:
4894
4901
  return {
4895
4902
  kind: "cli",
@@ -5549,6 +5556,35 @@ function resolveMockConfig(target) {
5549
5556
  const response = typeof target.response === "string" ? target.response : void 0;
5550
5557
  return { response };
5551
5558
  }
5559
+ function resolveReplayConfig(target, env, evalFilePath) {
5560
+ const fixtures = resolveString(target.fixtures, env, `${target.name} replay fixtures`, true);
5561
+ const fixturesPath = evalFilePath && !path3.isAbsolute(fixtures) ? path3.resolve(path3.dirname(path3.resolve(evalFilePath)), fixtures) : path3.resolve(fixtures);
5562
+ const sourceTarget = resolveString(
5563
+ target.source_target,
5564
+ env,
5565
+ `${target.name} replay source_target`,
5566
+ true
5567
+ );
5568
+ const suite = resolveOptionalString(target.suite, env, `${target.name} replay suite`, {
5569
+ allowLiteral: true,
5570
+ optionalEnv: true
5571
+ });
5572
+ const evalPath = resolveOptionalString(target.eval_path, env, `${target.name} replay eval_path`, {
5573
+ allowLiteral: true,
5574
+ optionalEnv: true
5575
+ });
5576
+ const variant = resolveOptionalString(target.variant, env, `${target.name} replay variant`, {
5577
+ allowLiteral: true,
5578
+ optionalEnv: true
5579
+ });
5580
+ return {
5581
+ fixturesPath,
5582
+ sourceTarget,
5583
+ suite,
5584
+ evalPath,
5585
+ variant
5586
+ };
5587
+ }
5552
5588
  function resolveVSCodeConfig(target, env, insiders, _evalFilePath) {
5553
5589
  const executableSource = target.executable;
5554
5590
  const waitSource = target.wait;
@@ -6087,19 +6123,20 @@ async function expandFileReferences(tests, evalFileDir) {
6087
6123
  return expanded;
6088
6124
  }
6089
6125
 
6090
- // ../../packages/core/dist/chunk-7QB53OPK.js
6091
- import path46 from "node:path";
6126
+ // ../../packages/core/dist/chunk-FQ5QRJIT.js
6127
+ import path47 from "node:path";
6092
6128
  import { pathToFileURL as pathToFileURL2 } from "node:url";
6093
6129
  import { existsSync as existsSync6 } from "node:fs";
6094
- import path45 from "node:path";
6130
+ import path46 from "node:path";
6095
6131
  import micromatch4 from "micromatch";
6096
6132
  import { mkdir, readFile as readFile3, writeFile } from "node:fs/promises";
6097
6133
  import path5 from "node:path";
6098
6134
  import { execFile as execFile3 } from "node:child_process";
6099
- import { createHash as createHash2, randomUUID as randomUUID9 } from "node:crypto";
6135
+ import { createHash as createHash3, randomUUID as randomUUID9 } from "node:crypto";
6100
6136
  import { existsSync as existsSync5 } from "node:fs";
6101
- import { copyFile as copyFile2, mkdir as mkdir15, readdir as readdir8, stat as stat9 } from "node:fs/promises";
6102
- import path44 from "node:path";
6137
+ import { copyFile as copyFile2, mkdir as mkdir16, readdir as readdir8, stat as stat9 } from "node:fs/promises";
6138
+ import path45 from "node:path";
6139
+ import { fileURLToPath as fileURLToPath5 } from "node:url";
6103
6140
  import { promisify as promisify7 } from "node:util";
6104
6141
  import micromatch3 from "micromatch";
6105
6142
  import { mkdtemp, rm, writeFile as writeFile2 } from "node:fs/promises";
@@ -6818,10 +6855,10 @@ function assignProp(target, prop, value) {
6818
6855
  configurable: true
6819
6856
  });
6820
6857
  }
6821
- function getElementAtPath(obj, path47) {
6822
- if (!path47)
6858
+ function getElementAtPath(obj, path48) {
6859
+ if (!path48)
6823
6860
  return obj;
6824
- return path47.reduce((acc, key) => acc?.[key], obj);
6861
+ return path48.reduce((acc, key) => acc?.[key], obj);
6825
6862
  }
6826
6863
  function promiseAllObject(promisesObj) {
6827
6864
  const keys = Object.keys(promisesObj);
@@ -7141,11 +7178,11 @@ function aborted(x, startIndex = 0) {
7141
7178
  }
7142
7179
  return false;
7143
7180
  }
7144
- function prefixIssues(path47, issues) {
7181
+ function prefixIssues(path48, issues) {
7145
7182
  return issues.map((iss) => {
7146
7183
  var _a;
7147
7184
  (_a = iss).path ?? (_a.path = []);
7148
- iss.path.unshift(path47);
7185
+ iss.path.unshift(path48);
7149
7186
  return iss;
7150
7187
  });
7151
7188
  }
@@ -7282,7 +7319,7 @@ function treeifyError(error40, _mapper) {
7282
7319
  return issue2.message;
7283
7320
  };
7284
7321
  const result = { errors: [] };
7285
- const processError = (error41, path47 = []) => {
7322
+ const processError = (error41, path48 = []) => {
7286
7323
  var _a, _b;
7287
7324
  for (const issue2 of error41.issues) {
7288
7325
  if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -7292,7 +7329,7 @@ function treeifyError(error40, _mapper) {
7292
7329
  } else if (issue2.code === "invalid_element") {
7293
7330
  processError({ issues: issue2.issues }, issue2.path);
7294
7331
  } else {
7295
- const fullpath = [...path47, ...issue2.path];
7332
+ const fullpath = [...path48, ...issue2.path];
7296
7333
  if (fullpath.length === 0) {
7297
7334
  result.errors.push(mapper(issue2));
7298
7335
  continue;
@@ -7322,9 +7359,9 @@ function treeifyError(error40, _mapper) {
7322
7359
  processError(error40);
7323
7360
  return result;
7324
7361
  }
7325
- function toDotPath(path47) {
7362
+ function toDotPath(path48) {
7326
7363
  const segs = [];
7327
- for (const seg of path47) {
7364
+ for (const seg of path48) {
7328
7365
  if (typeof seg === "number")
7329
7366
  segs.push(`[${seg}]`);
7330
7367
  else if (typeof seg === "symbol")
@@ -18754,7 +18791,7 @@ var RequestError = class _RequestError extends Error {
18754
18791
  }
18755
18792
  };
18756
18793
 
18757
- // ../../packages/core/dist/chunk-7QB53OPK.js
18794
+ // ../../packages/core/dist/chunk-FQ5QRJIT.js
18758
18795
  import { exec as execCallback } from "node:child_process";
18759
18796
  import { readdirSync, statSync } from "node:fs";
18760
18797
  import { readFile as readFile32, readdir as readdir2, stat as stat2 } from "node:fs/promises";
@@ -18788,74 +18825,77 @@ import { mkdir as mkdir8 } from "node:fs/promises";
18788
18825
  import path16 from "node:path";
18789
18826
  import { createInterface } from "node:readline";
18790
18827
  import { fileURLToPath as fileURLToPath3, pathToFileURL } from "node:url";
18828
+ import { createHash } from "node:crypto";
18829
+ import { mkdir as mkdir9, readFile as readFile6, writeFile as writeFile4 } from "node:fs/promises";
18830
+ import path17 from "node:path";
18791
18831
  import { exec as exec2 } from "node:child_process";
18792
18832
  import { constants as constants2, access as access2 } from "node:fs/promises";
18793
- import path27 from "node:path";
18833
+ import path28 from "node:path";
18794
18834
  import { promisify as promisify4 } from "node:util";
18795
- import { stat as stat5, writeFile as writeFile6 } from "node:fs/promises";
18796
- import path25 from "node:path";
18835
+ import { stat as stat5, writeFile as writeFile7 } from "node:fs/promises";
18836
+ import path26 from "node:path";
18797
18837
  import { constants as constants3 } from "node:fs";
18798
- import { access as access3, mkdir as mkdir9, readdir as readdir3, rm as rm3, stat as stat3 } from "node:fs/promises";
18799
- import path17 from "node:path";
18838
+ import { access as access3, mkdir as mkdir10, readdir as readdir3, rm as rm3, stat as stat3 } from "node:fs/promises";
18800
18839
  import path18 from "node:path";
18801
18840
  import path19 from "node:path";
18802
- import { readFile as readFile6 } from "node:fs/promises";
18803
18841
  import path20 from "node:path";
18842
+ import { readFile as readFile7 } from "node:fs/promises";
18843
+ import path21 from "node:path";
18804
18844
  import { exec, spawn as spawn4 } from "node:child_process";
18805
- import { mkdir as mkdir10, writeFile as writeFile4 } from "node:fs/promises";
18806
- import path222 from "node:path";
18845
+ import { mkdir as mkdir11, writeFile as writeFile5 } from "node:fs/promises";
18846
+ import path23 from "node:path";
18807
18847
  import { promisify as promisify3 } from "node:util";
18808
- import path21 from "node:path";
18809
- import { copyFile, mkdir as mkdir11, readFile as readFile7, readdir as readdir4, stat as stat4, writeFile as writeFile5 } from "node:fs/promises";
18848
+ import path222 from "node:path";
18849
+ import { copyFile, mkdir as mkdir12, readFile as readFile8, readdir as readdir4, stat as stat4, writeFile as writeFile6 } from "node:fs/promises";
18850
+ import path25 from "node:path";
18810
18851
  import path24 from "node:path";
18811
- import path23 from "node:path";
18812
18852
  import JSON5 from "json5";
18813
- import { writeFile as writeFile7 } from "node:fs/promises";
18814
- import path26 from "node:path";
18853
+ import { writeFile as writeFile8 } from "node:fs/promises";
18854
+ import path27 from "node:path";
18815
18855
  import { constants as constants32 } from "node:fs";
18816
- import { access as access32, readFile as readFile8 } from "node:fs/promises";
18817
- import path28 from "node:path";
18856
+ import { access as access32, readFile as readFile9 } from "node:fs/promises";
18818
18857
  import path29 from "node:path";
18819
- import fg2 from "fast-glob";
18820
18858
  import path30 from "node:path";
18859
+ import fg2 from "fast-glob";
18821
18860
  import path31 from "node:path";
18822
- import fg22 from "fast-glob";
18823
18861
  import path322 from "node:path";
18824
- import fg3 from "fast-glob";
18825
- import { cp, mkdir as mkdir13, readdir as readdir5, rm as rm4, stat as stat6 } from "node:fs/promises";
18862
+ import fg22 from "fast-glob";
18826
18863
  import path33 from "node:path";
18864
+ import fg3 from "fast-glob";
18865
+ import { cp, mkdir as mkdir14, readdir as readdir5, rm as rm4, stat as stat6 } from "node:fs/promises";
18866
+ import path34 from "node:path";
18827
18867
  import { execFile } from "node:child_process";
18828
- import { createHash } from "node:crypto";
18868
+ import { createHash as createHash2 } from "node:crypto";
18829
18869
  import { existsSync as existsSync3 } from "node:fs";
18830
- import { cp as cp2, mkdir as mkdir14, readFile as readFile9, readdir as readdir6, rm as rm5, unlink, writeFile as writeFile8 } from "node:fs/promises";
18831
- import path34 from "node:path";
18870
+ import { cp as cp2, mkdir as mkdir15, readFile as readFile10, readdir as readdir6, rm as rm5, unlink, writeFile as writeFile9 } from "node:fs/promises";
18871
+ import path35 from "node:path";
18832
18872
  import { promisify as promisify5 } from "node:util";
18833
18873
  import { execFile as execFile2 } from "node:child_process";
18834
18874
  import { existsSync as existsSync4 } from "node:fs";
18835
- import path35 from "node:path";
18875
+ import path36 from "node:path";
18836
18876
  import { promisify as promisify6 } from "node:util";
18837
18877
  import { readdir as readdir7, stat as stat7 } from "node:fs/promises";
18838
- import path36 from "node:path";
18839
- import { readFile as readFile16, stat as stat8 } from "node:fs/promises";
18840
- import path43 from "node:path";
18878
+ import path37 from "node:path";
18879
+ import { readFile as readFile17, stat as stat8 } from "node:fs/promises";
18880
+ import path44 from "node:path";
18841
18881
  import micromatch2 from "micromatch";
18842
18882
  import { stringify as stringifyYaml } from "yaml";
18843
- import { readFile as readFile10 } from "node:fs/promises";
18844
- import path37 from "node:path";
18845
18883
  import { readFile as readFile11 } from "node:fs/promises";
18846
- import path39 from "node:path";
18884
+ import path38 from "node:path";
18885
+ import { readFile as readFile12 } from "node:fs/promises";
18886
+ import path40 from "node:path";
18847
18887
  import { constants as constants4 } from "node:fs";
18848
18888
  import { access as access4 } from "node:fs/promises";
18849
- import path38 from "node:path";
18889
+ import path39 from "node:path";
18850
18890
  import { fileURLToPath as fileURLToPath4 } from "node:url";
18891
+ import { readFile as readFile14 } from "node:fs/promises";
18892
+ import path41 from "node:path";
18851
18893
  import { readFile as readFile13 } from "node:fs/promises";
18852
- import path40 from "node:path";
18853
- import { readFile as readFile12 } from "node:fs/promises";
18894
+ import { readFile as readFile16 } from "node:fs/promises";
18895
+ import path43 from "node:path";
18896
+ import micromatch from "micromatch";
18854
18897
  import { readFile as readFile15 } from "node:fs/promises";
18855
18898
  import path422 from "node:path";
18856
- import micromatch from "micromatch";
18857
- import { readFile as readFile14 } from "node:fs/promises";
18858
- import path41 from "node:path";
18859
18899
  var DEFAULT_CACHE_PATH = ".agentv/cache";
18860
18900
  var ResponseCache = class {
18861
18901
  cachePath;
@@ -19469,13 +19509,14 @@ var CodeGrader = class {
19469
19509
  }
19470
19510
  return imageTmpDir;
19471
19511
  };
19472
- const materializedOutput = await materializeContentForGrader(
19473
- context.output,
19512
+ const transcriptMessages = context.trace?.messages ?? context.output ?? [];
19513
+ const materializedMessages = await materializeContentForGrader(
19514
+ transcriptMessages,
19474
19515
  getImageDir
19475
19516
  );
19476
- let outputForPayload = materializedOutput;
19517
+ let outputForPayload = context.candidate;
19477
19518
  let outputPath;
19478
- if (outputForPayload) {
19519
+ if (outputForPayload !== null) {
19479
19520
  const serialized = JSON.stringify(outputForPayload);
19480
19521
  if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
19481
19522
  const tmpDir = await mkdtemp(join(tmpdir(), "agentv-grader-"));
@@ -19484,6 +19525,10 @@ var CodeGrader = class {
19484
19525
  outputForPayload = null;
19485
19526
  }
19486
19527
  }
19528
+ const traceForPayload = context.trace ? {
19529
+ ...context.trace,
19530
+ messages: materializedMessages ?? context.trace.messages
19531
+ } : null;
19487
19532
  const payload = {
19488
19533
  criteria: context.evalCase.criteria,
19489
19534
  expectedOutput: await materializeContentForGrader(
@@ -19491,6 +19536,8 @@ var CodeGrader = class {
19491
19536
  getImageDir
19492
19537
  ),
19493
19538
  output: outputForPayload,
19539
+ answer: context.candidate,
19540
+ messages: materializedMessages ?? [],
19494
19541
  outputPath,
19495
19542
  inputFiles: context.evalCase.file_paths,
19496
19543
  input: await materializeContentForGrader(
@@ -19498,7 +19545,14 @@ var CodeGrader = class {
19498
19545
  getImageDir
19499
19546
  ),
19500
19547
  metadata: context.evalCase.metadata ?? null,
19501
- trace: context.trace ?? null,
19548
+ trace: traceForPayload,
19549
+ traceSummary: context.trace ? {
19550
+ eventCount: context.trace.eventCount,
19551
+ toolCalls: context.trace.toolCalls,
19552
+ errorCount: context.trace.errorCount,
19553
+ toolDurations: context.trace.toolDurations,
19554
+ llmCallCount: context.trace.llmCallCount
19555
+ } : null,
19502
19556
  tokenUsage: context.tokenUsage ?? null,
19503
19557
  costUsd: context.costUsd ?? null,
19504
19558
  durationMs: context.durationMs ?? null,
@@ -21367,10 +21421,17 @@ var NORMALIZED_TRACE_EVENT_TYPES = [
21367
21421
  "message",
21368
21422
  "model_turn",
21369
21423
  "tool_call",
21370
- "tool_result"
21424
+ "tool_result",
21425
+ "final_response",
21426
+ "error"
21371
21427
  ];
21372
21428
  var NORMALIZED_TOOL_STATUSES = ["ok", "error", "timeout", "cancelled", "unknown"];
21373
21429
  var NORMALIZED_REDACTION_LEVELS = ["none", "partial", "full"];
21430
+ var TRACE_SCHEMA_VERSION = NORMALIZED_TRAJECTORY_SCHEMA_VERSION;
21431
+ var TRACE_SOURCE_KINDS = NORMALIZED_TRACE_SOURCE_KINDS;
21432
+ var TRACE_EVENT_TYPES = NORMALIZED_TRACE_EVENT_TYPES;
21433
+ var TRACE_TOOL_STATUSES = NORMALIZED_TOOL_STATUSES;
21434
+ var TRACE_REDACTION_LEVELS = NORMALIZED_REDACTION_LEVELS;
21374
21435
  function omitUndefinedProperties(value) {
21375
21436
  return Object.fromEntries(
21376
21437
  Object.entries(value).filter(([, property]) => property !== void 0)
@@ -21474,6 +21535,7 @@ var NormalizedTraceEventWireSchema = external_exports.object({
21474
21535
  message: NormalizedTraceMessageWireSchema.optional(),
21475
21536
  model: NormalizedTraceModelWireSchema.optional(),
21476
21537
  tool: NormalizedTraceToolWireSchema.optional(),
21538
+ error: NormalizedTraceErrorWireSchema.optional(),
21477
21539
  source_ref: NormalizedTraceSourceRefWireSchema.optional(),
21478
21540
  raw_evidence: external_exports.array(NormalizedRawEvidenceWireSchema).optional(),
21479
21541
  redaction: NormalizedRedactionStateWireSchema.optional(),
@@ -21492,6 +21554,18 @@ var NormalizedTrajectoryWireSchema = external_exports.object({
21492
21554
  ended_at: external_exports.string().optional(),
21493
21555
  metadata: MetadataWireSchema.optional()
21494
21556
  });
21557
+ var TraceRedactionStateWireSchema = NormalizedRedactionStateWireSchema;
21558
+ var TraceErrorWireSchema = NormalizedTraceErrorWireSchema;
21559
+ var TraceSourceWireSchema = NormalizedTraceSourceWireSchema;
21560
+ var TraceSessionWireSchema = NormalizedTraceSessionWireSchema;
21561
+ var TraceBranchWireSchema = NormalizedTraceBranchWireSchema;
21562
+ var TraceSourceRefWireSchema = NormalizedTraceSourceRefWireSchema;
21563
+ var TraceRawEvidenceWireSchema = NormalizedRawEvidenceWireSchema;
21564
+ var TraceMessageWireSchema = NormalizedTraceMessageWireSchema;
21565
+ var TraceModelWireSchema = NormalizedTraceModelWireSchema;
21566
+ var TraceToolWireSchema = NormalizedTraceToolWireSchema;
21567
+ var TraceEventWireSchema = NormalizedTraceEventWireSchema;
21568
+ var TraceArtifactWireSchema = NormalizedTrajectoryWireSchema;
21495
21569
  function toNormalizedTrajectoryWire(trajectory) {
21496
21570
  return NormalizedTrajectoryWireSchema.parse(
21497
21571
  omitUndefinedProperties({
@@ -21525,6 +21599,12 @@ function fromNormalizedTrajectoryWire(input) {
21525
21599
  metadata: wire.metadata
21526
21600
  };
21527
21601
  }
21602
+ function toTraceArtifactWire(artifact) {
21603
+ return toNormalizedTrajectoryWire(artifact);
21604
+ }
21605
+ function fromTraceArtifactWire(input) {
21606
+ return fromNormalizedTrajectoryWire(input);
21607
+ }
21528
21608
  function toNormalizedTraceSourceWire(source) {
21529
21609
  return omitUndefinedProperties({
21530
21610
  kind: source.kind,
@@ -21599,6 +21679,7 @@ function toNormalizedTraceEventWire(event) {
21599
21679
  message: event.message ? toNormalizedTraceMessageWire(event.message) : void 0,
21600
21680
  model: event.model ? toNormalizedTraceModelWire(event.model) : void 0,
21601
21681
  tool: event.tool ? toNormalizedTraceToolWire(event.tool) : void 0,
21682
+ error: event.error ? toNormalizedTraceErrorWire(event.error) : void 0,
21602
21683
  source_ref: event.sourceRef ? toNormalizedTraceSourceRefWire(event.sourceRef) : void 0,
21603
21684
  raw_evidence: event.rawEvidence?.map(toNormalizedRawEvidenceWire),
21604
21685
  redaction: event.redaction,
@@ -21619,6 +21700,7 @@ function fromNormalizedTraceEventWire(event) {
21619
21700
  message: event.message ? fromNormalizedTraceMessageWire(event.message) : void 0,
21620
21701
  model: event.model ? fromNormalizedTraceModelWire(event.model) : void 0,
21621
21702
  tool: event.tool ? fromNormalizedTraceToolWire(event.tool) : void 0,
21703
+ error: event.error ? fromNormalizedTraceErrorWire(event.error) : void 0,
21622
21704
  sourceRef: event.source_ref ? fromNormalizedTraceSourceRefWire(event.source_ref) : void 0,
21623
21705
  rawEvidence: event.raw_evidence?.map(fromNormalizedRawEvidenceWire),
21624
21706
  redaction: event.redaction,
@@ -21687,6 +21769,24 @@ function fromNormalizedTraceToolWire(tool) {
21687
21769
  metadata: tool.metadata
21688
21770
  };
21689
21771
  }
21772
+ function toNormalizedTraceErrorWire(error40) {
21773
+ return omitUndefinedProperties({
21774
+ message: error40.message,
21775
+ name: error40.name,
21776
+ code: error40.code,
21777
+ stack: error40.stack,
21778
+ metadata: error40.metadata
21779
+ });
21780
+ }
21781
+ function fromNormalizedTraceErrorWire(error40) {
21782
+ return {
21783
+ message: error40.message,
21784
+ name: error40.name,
21785
+ code: error40.code,
21786
+ stack: error40.stack,
21787
+ metadata: error40.metadata
21788
+ };
21789
+ }
21690
21790
  function toNormalizedTraceSourceRefWire(sourceRef) {
21691
21791
  return omitUndefinedProperties({
21692
21792
  event_id: sourceRef.eventId,
@@ -21731,6 +21831,163 @@ function fromNormalizedRawEvidenceWire(evidence) {
21731
21831
  metadata: evidence.metadata
21732
21832
  };
21733
21833
  }
21834
+ function sameMessageContent(first, second) {
21835
+ if (!first || !second) return false;
21836
+ return first.role === second.role && JSON.stringify(first.content) === JSON.stringify(second.content);
21837
+ }
21838
+ function buildTraceMessages(input, output) {
21839
+ const outputMessages = output ?? [];
21840
+ if (outputMessages.length === 0) {
21841
+ return input ?? [];
21842
+ }
21843
+ const outputLooksLikeFullTranscript = outputMessages.some(
21844
+ (message) => message.role === "user" || message.role === "system"
21845
+ );
21846
+ if (outputLooksLikeFullTranscript) {
21847
+ return outputMessages;
21848
+ }
21849
+ const inputMessages = input ?? [];
21850
+ if (inputMessages.length === 1 && outputMessages.length > 0 && sameMessageContent(inputMessages[0], outputMessages[0])) {
21851
+ return outputMessages;
21852
+ }
21853
+ return [...inputMessages, ...outputMessages];
21854
+ }
21855
+ function toTraceMessage(message) {
21856
+ return {
21857
+ role: message.role,
21858
+ name: message.name,
21859
+ content: message.content,
21860
+ tokenUsage: message.tokenUsage,
21861
+ metadata: message.metadata
21862
+ };
21863
+ }
21864
+ function toTraceError(error40) {
21865
+ return typeof error40 === "string" ? { message: error40 } : error40;
21866
+ }
21867
+ function buildTraceFromMessages(options = {}) {
21868
+ const messages = buildTraceMessages(options.input, options.output);
21869
+ const computed = computeTraceSummary(messages);
21870
+ const summary = options.summary ?? computed.trace;
21871
+ const events = [];
21872
+ let ordinal = 0;
21873
+ for (const [messageIndex, message] of messages.entries()) {
21874
+ const eventId = `message-${messageIndex}`;
21875
+ events.push({
21876
+ eventId,
21877
+ ordinal: ordinal++,
21878
+ type: "message",
21879
+ timestamp: message.startTime,
21880
+ durationMs: message.durationMs,
21881
+ message: toTraceMessage(message),
21882
+ metadata: { message_index: messageIndex }
21883
+ });
21884
+ for (const [toolIndex, toolCall] of (message.toolCalls ?? []).entries()) {
21885
+ const toolEventId = `message-${messageIndex}-tool-${toolIndex}`;
21886
+ events.push({
21887
+ eventId: toolEventId,
21888
+ parentEventId: eventId,
21889
+ ordinal: ordinal++,
21890
+ type: "tool_call",
21891
+ timestamp: toolCall.startTime,
21892
+ durationMs: toolCall.durationMs,
21893
+ tool: {
21894
+ name: toolCall.tool,
21895
+ callId: toolCall.id,
21896
+ input: toolCall.input,
21897
+ output: toolCall.output,
21898
+ status: "ok"
21899
+ },
21900
+ metadata: {
21901
+ message_index: messageIndex,
21902
+ tool_index: toolIndex
21903
+ }
21904
+ });
21905
+ if (toolCall.output !== void 0) {
21906
+ events.push({
21907
+ eventId: `${toolEventId}-result`,
21908
+ parentEventId: toolEventId,
21909
+ ordinal: ordinal++,
21910
+ type: "tool_result",
21911
+ timestamp: toolCall.endTime,
21912
+ tool: {
21913
+ name: toolCall.tool,
21914
+ callId: toolCall.id,
21915
+ output: toolCall.output,
21916
+ status: "ok"
21917
+ },
21918
+ metadata: {
21919
+ message_index: messageIndex,
21920
+ tool_index: toolIndex
21921
+ }
21922
+ });
21923
+ }
21924
+ }
21925
+ }
21926
+ const finalAssistantIndex = [...messages].map((message, index) => ({ message, index })).reverse().find((entry) => entry.message.role === "assistant")?.index;
21927
+ if (finalAssistantIndex !== void 0) {
21928
+ const finalMessage = messages[finalAssistantIndex];
21929
+ events.push({
21930
+ eventId: "final-response",
21931
+ parentEventId: `message-${finalAssistantIndex}`,
21932
+ ordinal: ordinal++,
21933
+ type: "final_response",
21934
+ timestamp: finalMessage.endTime ?? finalMessage.startTime ?? options.endTime,
21935
+ message: {
21936
+ ...toTraceMessage(finalMessage),
21937
+ content: options.finalOutput ?? finalMessage.content
21938
+ },
21939
+ metadata: { message_index: finalAssistantIndex }
21940
+ });
21941
+ }
21942
+ if (options.error) {
21943
+ events.push({
21944
+ eventId: "error",
21945
+ ordinal: ordinal++,
21946
+ type: "error",
21947
+ timestamp: options.endTime,
21948
+ error: toTraceError(options.error)
21949
+ });
21950
+ }
21951
+ return {
21952
+ schemaVersion: TRACE_SCHEMA_VERSION,
21953
+ eventCount: summary.eventCount,
21954
+ toolCalls: summary.toolCalls,
21955
+ errorCount: summary.errorCount + (options.error ? 1 : 0),
21956
+ llmCallCount: summary.llmCallCount,
21957
+ ...summary.toolDurations ? { toolDurations: summary.toolDurations } : {},
21958
+ messages,
21959
+ events,
21960
+ tokenUsage: options.tokenUsage,
21961
+ costUsd: options.costUsd,
21962
+ durationMs: options.durationMs,
21963
+ startTime: options.startTime ?? computed.startTime,
21964
+ endTime: options.endTime ?? computed.endTime,
21965
+ metadata: {
21966
+ ...options.provider ? { provider: options.provider } : {},
21967
+ ...options.target ? { target: options.target } : {},
21968
+ ...options.testId ? { eval_case_id: options.testId } : {},
21969
+ ...options.conversationId ? { provider_session_id: options.conversationId } : {},
21970
+ ...options.metadata
21971
+ }
21972
+ };
21973
+ }
21974
+ function appendErrorEventToTrace(trace, error40, metadata) {
21975
+ return {
21976
+ ...trace,
21977
+ errorCount: trace.errorCount + 1,
21978
+ events: [
21979
+ ...trace.events,
21980
+ {
21981
+ eventId: `error-${trace.events.length}`,
21982
+ ordinal: trace.events.length,
21983
+ type: "error",
21984
+ timestamp: trace.endTime,
21985
+ error: toTraceError(error40),
21986
+ metadata
21987
+ }
21988
+ ]
21989
+ };
21990
+ }
21734
21991
  function computeTraceSummary(messages) {
21735
21992
  const toolCallCounts = {};
21736
21993
  const toolDurations = {};
@@ -22209,115 +22466,115 @@ var FieldAccuracyGrader = class {
22209
22466
  * Evaluate a single field against the expected value.
22210
22467
  */
22211
22468
  evaluateField(fieldConfig, candidateData, expectedData) {
22212
- const { path: path47, match, required: required2 = true, weight = 1 } = fieldConfig;
22213
- const candidateValue = resolvePath(candidateData, path47);
22214
- const expectedValue = resolvePath(expectedData, path47);
22469
+ const { path: path48, match, required: required2 = true, weight = 1 } = fieldConfig;
22470
+ const candidateValue = resolvePath(candidateData, path48);
22471
+ const expectedValue = resolvePath(expectedData, path48);
22215
22472
  if (expectedValue === void 0) {
22216
22473
  return {
22217
- path: path47,
22474
+ path: path48,
22218
22475
  score: 1,
22219
22476
  // No expected value means no comparison needed
22220
22477
  weight,
22221
22478
  hit: true,
22222
- message: `${path47}: no expected value`
22479
+ message: `${path48}: no expected value`
22223
22480
  };
22224
22481
  }
22225
22482
  if (candidateValue === void 0) {
22226
22483
  if (required2) {
22227
22484
  return {
22228
- path: path47,
22485
+ path: path48,
22229
22486
  score: 0,
22230
22487
  weight,
22231
22488
  hit: false,
22232
- message: `${path47} (required, missing)`
22489
+ message: `${path48} (required, missing)`
22233
22490
  };
22234
22491
  }
22235
22492
  return {
22236
- path: path47,
22493
+ path: path48,
22237
22494
  score: 1,
22238
22495
  // Don't penalize missing optional fields
22239
22496
  weight: 0,
22240
22497
  // Zero weight means it won't affect the score
22241
22498
  hit: true,
22242
- message: `${path47}: optional field missing`
22499
+ message: `${path48}: optional field missing`
22243
22500
  };
22244
22501
  }
22245
22502
  switch (match) {
22246
22503
  case "exact":
22247
- return this.compareExact(path47, candidateValue, expectedValue, weight);
22504
+ return this.compareExact(path48, candidateValue, expectedValue, weight);
22248
22505
  case "numeric_tolerance":
22249
22506
  return this.compareNumericTolerance(
22250
- path47,
22507
+ path48,
22251
22508
  candidateValue,
22252
22509
  expectedValue,
22253
22510
  fieldConfig,
22254
22511
  weight
22255
22512
  );
22256
22513
  case "date":
22257
- return this.compareDate(path47, candidateValue, expectedValue, fieldConfig, weight);
22514
+ return this.compareDate(path48, candidateValue, expectedValue, fieldConfig, weight);
22258
22515
  default:
22259
22516
  return {
22260
- path: path47,
22517
+ path: path48,
22261
22518
  score: 0,
22262
22519
  weight,
22263
22520
  hit: false,
22264
- message: `${path47}: unknown match type "${match}"`
22521
+ message: `${path48}: unknown match type "${match}"`
22265
22522
  };
22266
22523
  }
22267
22524
  }
22268
22525
  /**
22269
22526
  * Exact equality comparison.
22270
22527
  */
22271
- compareExact(path47, candidateValue, expectedValue, weight) {
22528
+ compareExact(path48, candidateValue, expectedValue, weight) {
22272
22529
  if (deepEqual(candidateValue, expectedValue)) {
22273
22530
  return {
22274
- path: path47,
22531
+ path: path48,
22275
22532
  score: 1,
22276
22533
  weight,
22277
22534
  hit: true,
22278
- message: path47
22535
+ message: path48
22279
22536
  };
22280
22537
  }
22281
22538
  if (typeof candidateValue !== typeof expectedValue) {
22282
22539
  return {
22283
- path: path47,
22540
+ path: path48,
22284
22541
  score: 0,
22285
22542
  weight,
22286
22543
  hit: false,
22287
- message: `${path47} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
22544
+ message: `${path48} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
22288
22545
  };
22289
22546
  }
22290
22547
  return {
22291
- path: path47,
22548
+ path: path48,
22292
22549
  score: 0,
22293
22550
  weight,
22294
22551
  hit: false,
22295
- message: `${path47} (value mismatch)`
22552
+ message: `${path48} (value mismatch)`
22296
22553
  };
22297
22554
  }
22298
22555
  /**
22299
22556
  * Numeric comparison with absolute or relative tolerance.
22300
22557
  */
22301
- compareNumericTolerance(path47, candidateValue, expectedValue, fieldConfig, weight) {
22558
+ compareNumericTolerance(path48, candidateValue, expectedValue, fieldConfig, weight) {
22302
22559
  const { tolerance = 0, relative = false } = fieldConfig;
22303
22560
  const candidateNum = toNumber(candidateValue);
22304
22561
  const expectedNum = toNumber(expectedValue);
22305
22562
  if (candidateNum === null || expectedNum === null) {
22306
22563
  return {
22307
- path: path47,
22564
+ path: path48,
22308
22565
  score: 0,
22309
22566
  weight,
22310
22567
  hit: false,
22311
- message: `${path47} (non-numeric value)`
22568
+ message: `${path48} (non-numeric value)`
22312
22569
  };
22313
22570
  }
22314
22571
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
22315
22572
  return {
22316
- path: path47,
22573
+ path: path48,
22317
22574
  score: 0,
22318
22575
  weight,
22319
22576
  hit: false,
22320
- message: `${path47} (invalid numeric value)`
22577
+ message: `${path48} (invalid numeric value)`
22321
22578
  };
22322
22579
  }
22323
22580
  const diff = Math.abs(candidateNum - expectedNum);
@@ -22330,61 +22587,61 @@ var FieldAccuracyGrader = class {
22330
22587
  }
22331
22588
  if (withinTolerance) {
22332
22589
  return {
22333
- path: path47,
22590
+ path: path48,
22334
22591
  score: 1,
22335
22592
  weight,
22336
22593
  hit: true,
22337
- message: `${path47} (within tolerance: diff=${diff.toFixed(2)})`
22594
+ message: `${path48} (within tolerance: diff=${diff.toFixed(2)})`
22338
22595
  };
22339
22596
  }
22340
22597
  return {
22341
- path: path47,
22598
+ path: path48,
22342
22599
  score: 0,
22343
22600
  weight,
22344
22601
  hit: false,
22345
- message: `${path47} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
22602
+ message: `${path48} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
22346
22603
  };
22347
22604
  }
22348
22605
  /**
22349
22606
  * Date comparison with format normalization.
22350
22607
  */
22351
- compareDate(path47, candidateValue, expectedValue, fieldConfig, weight) {
22608
+ compareDate(path48, candidateValue, expectedValue, fieldConfig, weight) {
22352
22609
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
22353
22610
  const candidateDate = parseDate(String(candidateValue), formats);
22354
22611
  const expectedDate = parseDate(String(expectedValue), formats);
22355
22612
  if (candidateDate === null) {
22356
22613
  return {
22357
- path: path47,
22614
+ path: path48,
22358
22615
  score: 0,
22359
22616
  weight,
22360
22617
  hit: false,
22361
- message: `${path47} (unparseable candidate date)`
22618
+ message: `${path48} (unparseable candidate date)`
22362
22619
  };
22363
22620
  }
22364
22621
  if (expectedDate === null) {
22365
22622
  return {
22366
- path: path47,
22623
+ path: path48,
22367
22624
  score: 0,
22368
22625
  weight,
22369
22626
  hit: false,
22370
- message: `${path47} (unparseable expected date)`
22627
+ message: `${path48} (unparseable expected date)`
22371
22628
  };
22372
22629
  }
22373
22630
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
22374
22631
  return {
22375
- path: path47,
22632
+ path: path48,
22376
22633
  score: 1,
22377
22634
  weight,
22378
22635
  hit: true,
22379
- message: path47
22636
+ message: path48
22380
22637
  };
22381
22638
  }
22382
22639
  return {
22383
- path: path47,
22640
+ path: path48,
22384
22641
  score: 0,
22385
22642
  weight,
22386
22643
  hit: false,
22387
- message: `${path47} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
22644
+ message: `${path48} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
22388
22645
  };
22389
22646
  }
22390
22647
  /**
@@ -22417,11 +22674,11 @@ var FieldAccuracyGrader = class {
22417
22674
  };
22418
22675
  }
22419
22676
  };
22420
- function resolvePath(obj, path47) {
22421
- if (!path47 || !obj) {
22677
+ function resolvePath(obj, path48) {
22678
+ if (!path48 || !obj) {
22422
22679
  return void 0;
22423
22680
  }
22424
- const parts = path47.split(/\.|\[|\]/).filter((p) => p.length > 0);
22681
+ const parts = path48.split(/\.|\[|\]/).filter((p) => p.length > 0);
22425
22682
  let current = obj;
22426
22683
  for (const part of parts) {
22427
22684
  if (current === null || current === void 0) {
@@ -22955,8 +23212,8 @@ var TokenUsageGrader = class {
22955
23212
  };
22956
23213
  }
22957
23214
  };
22958
- function getNestedValue(obj, path47) {
22959
- const parts = path47.split(".");
23215
+ function getNestedValue(obj, path48) {
23216
+ const parts = path48.split(".");
22960
23217
  let current = obj;
22961
23218
  for (const part of parts) {
22962
23219
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -27515,7 +27772,11 @@ ${prompt}` : prompt;
27515
27772
  env.AZURE_OPENAI_API_KEY = this.config.apiKey;
27516
27773
  }
27517
27774
  if (this.config.baseUrl) {
27518
- env.AZURE_OPENAI_RESOURCE_NAME = extractAzureResourceName(this.config.baseUrl);
27775
+ if (/^https?:\/\//.test(this.config.baseUrl)) {
27776
+ env.AZURE_OPENAI_BASE_URL = this.config.baseUrl;
27777
+ } else {
27778
+ env.AZURE_OPENAI_RESOURCE_NAME = extractAzureResourceName(this.config.baseUrl);
27779
+ }
27519
27780
  }
27520
27781
  } else {
27521
27782
  if (this.config.apiKey) {
@@ -28774,6 +29035,399 @@ var ProviderRegistry = class {
28774
29035
  return factory(target);
28775
29036
  }
28776
29037
  };
29038
+ var REPLAY_FIXTURE_SCHEMA_VERSION = "agentv.replay_fixture.v1";
29039
+ var TokenUsageWireSchema2 = external_exports.object({
29040
+ input: external_exports.number().nonnegative(),
29041
+ output: external_exports.number().nonnegative(),
29042
+ cached: external_exports.number().nonnegative().optional(),
29043
+ reasoning: external_exports.number().nonnegative().optional()
29044
+ }).strict();
29045
+ var ToolCallWireSchema = external_exports.object({
29046
+ tool: external_exports.string().min(1),
29047
+ input: external_exports.unknown().optional(),
29048
+ output: external_exports.unknown().optional(),
29049
+ id: external_exports.string().optional(),
29050
+ start_time: external_exports.string().optional(),
29051
+ end_time: external_exports.string().optional(),
29052
+ duration_ms: external_exports.number().nonnegative().optional()
29053
+ }).strict();
29054
+ var MessageWireSchema = external_exports.object({
29055
+ role: external_exports.string().min(1),
29056
+ name: external_exports.string().optional(),
29057
+ content: external_exports.unknown().optional(),
29058
+ tool_calls: external_exports.array(ToolCallWireSchema).optional(),
29059
+ start_time: external_exports.string().optional(),
29060
+ end_time: external_exports.string().optional(),
29061
+ duration_ms: external_exports.number().nonnegative().optional(),
29062
+ metadata: external_exports.record(external_exports.unknown()).optional(),
29063
+ token_usage: TokenUsageWireSchema2.optional()
29064
+ }).strict();
29065
+ var ReplayFixtureWireSchema = external_exports.object({
29066
+ schema_version: external_exports.literal(REPLAY_FIXTURE_SCHEMA_VERSION),
29067
+ suite: external_exports.string().min(1),
29068
+ eval_path: external_exports.string().min(1).optional(),
29069
+ test_id: external_exports.string().min(1),
29070
+ source_target: external_exports.string().min(1),
29071
+ attempt: external_exports.number().int().min(0).optional(),
29072
+ variant: external_exports.string().min(1).nullable().optional(),
29073
+ fixture_id: external_exports.string().min(1).optional(),
29074
+ recorded_at: external_exports.string().optional(),
29075
+ source: external_exports.record(external_exports.unknown()).optional(),
29076
+ redaction: external_exports.record(external_exports.unknown()).optional(),
29077
+ output: external_exports.array(MessageWireSchema),
29078
+ transcript: external_exports.unknown().optional(),
29079
+ token_usage: TokenUsageWireSchema2.optional(),
29080
+ cost_usd: external_exports.number().nonnegative().optional(),
29081
+ duration_ms: external_exports.number().nonnegative().optional(),
29082
+ start_time: external_exports.string().optional(),
29083
+ end_time: external_exports.string().optional()
29084
+ }).strict();
29085
+ var appendQueues = /* @__PURE__ */ new Map();
29086
+ function fromWireRecord(wire) {
29087
+ return {
29088
+ schemaVersion: wire.schema_version,
29089
+ suite: wire.suite,
29090
+ evalPath: wire.eval_path,
29091
+ testId: wire.test_id,
29092
+ sourceTarget: wire.source_target,
29093
+ attempt: wire.attempt ?? 0,
29094
+ variant: wire.variant ?? void 0,
29095
+ fixtureId: wire.fixture_id,
29096
+ recordedAt: wire.recorded_at,
29097
+ source: wire.source,
29098
+ redaction: wire.redaction,
29099
+ output: wire.output.map(fromWireMessage),
29100
+ transcript: wire.transcript,
29101
+ tokenUsage: wire.token_usage,
29102
+ costUsd: wire.cost_usd,
29103
+ durationMs: wire.duration_ms,
29104
+ startTime: wire.start_time,
29105
+ endTime: wire.end_time
29106
+ };
29107
+ }
29108
+ function fromWireMessage(wire) {
29109
+ return {
29110
+ role: wire.role,
29111
+ name: wire.name,
29112
+ content: wire.content,
29113
+ toolCalls: wire.tool_calls?.map(fromWireToolCall),
29114
+ startTime: wire.start_time,
29115
+ endTime: wire.end_time,
29116
+ durationMs: wire.duration_ms,
29117
+ metadata: wire.metadata,
29118
+ tokenUsage: wire.token_usage
29119
+ };
29120
+ }
29121
+ function fromWireToolCall(wire) {
29122
+ return {
29123
+ tool: wire.tool,
29124
+ input: wire.input,
29125
+ output: wire.output,
29126
+ id: wire.id,
29127
+ startTime: wire.start_time,
29128
+ endTime: wire.end_time,
29129
+ durationMs: wire.duration_ms
29130
+ };
29131
+ }
29132
+ function toWireRecord(record2) {
29133
+ const wire = {
29134
+ schema_version: record2.schemaVersion,
29135
+ suite: record2.suite,
29136
+ eval_path: record2.evalPath,
29137
+ test_id: record2.testId,
29138
+ source_target: record2.sourceTarget,
29139
+ attempt: record2.attempt,
29140
+ variant: record2.variant ?? null,
29141
+ fixture_id: record2.fixtureId,
29142
+ recorded_at: record2.recordedAt,
29143
+ source: record2.source,
29144
+ redaction: record2.redaction,
29145
+ output: record2.output.map(toWireMessage),
29146
+ transcript: record2.transcript,
29147
+ token_usage: record2.tokenUsage,
29148
+ cost_usd: record2.costUsd,
29149
+ duration_ms: record2.durationMs,
29150
+ start_time: record2.startTime,
29151
+ end_time: record2.endTime
29152
+ };
29153
+ const parsed = ReplayFixtureWireSchema.parse(dropUndefined(wire));
29154
+ return parsed;
29155
+ }
29156
+ function toWireMessage(message) {
29157
+ return {
29158
+ role: message.role,
29159
+ name: message.name,
29160
+ content: message.content,
29161
+ tool_calls: message.toolCalls?.map(toWireToolCall),
29162
+ start_time: message.startTime,
29163
+ end_time: message.endTime,
29164
+ duration_ms: message.durationMs,
29165
+ metadata: message.metadata,
29166
+ token_usage: message.tokenUsage
29167
+ };
29168
+ }
29169
+ function toWireToolCall(toolCall) {
29170
+ return {
29171
+ tool: toolCall.tool,
29172
+ input: toolCall.input,
29173
+ output: toolCall.output,
29174
+ id: toolCall.id,
29175
+ start_time: toolCall.startTime,
29176
+ end_time: toolCall.endTime,
29177
+ duration_ms: toolCall.durationMs
29178
+ };
29179
+ }
29180
+ function dropUndefined(value) {
29181
+ return Object.fromEntries(Object.entries(value).filter(([, entry]) => entry !== void 0));
29182
+ }
29183
+ function formatZodError(error40) {
29184
+ return error40.errors.map((issue2) => {
29185
+ const location = issue2.path.length > 0 ? issue2.path.join(".") : "<record>";
29186
+ return `${location}: ${issue2.message}`;
29187
+ }).join("; ");
29188
+ }
29189
+ async function readReplayFixtureRecords(fixturesPath) {
29190
+ let raw;
29191
+ try {
29192
+ raw = await readFile6(fixturesPath, "utf8");
29193
+ } catch (error40) {
29194
+ const reason = error40 instanceof Error ? error40.message : String(error40);
29195
+ throw new Error(`Replay fixture file not found or unreadable: ${fixturesPath}: ${reason}`);
29196
+ }
29197
+ const records = [];
29198
+ const lines = raw.split(/\r?\n/);
29199
+ for (let i = 0; i < lines.length; i++) {
29200
+ const line = lines[i].trim();
29201
+ if (line.length === 0) {
29202
+ continue;
29203
+ }
29204
+ let parsed;
29205
+ try {
29206
+ parsed = JSON.parse(line);
29207
+ } catch (error40) {
29208
+ const reason = error40 instanceof Error ? error40.message : String(error40);
29209
+ throw new Error(`Invalid replay fixture JSONL at ${fixturesPath}:${i + 1}: ${reason}`);
29210
+ }
29211
+ const result = ReplayFixtureWireSchema.safeParse(parsed);
29212
+ if (!result.success) {
29213
+ throw new Error(
29214
+ `Invalid replay fixture record at ${fixturesPath}:${i + 1}: ${formatZodError(result.error)}`
29215
+ );
29216
+ }
29217
+ records.push(fromWireRecord(result.data));
29218
+ }
29219
+ return records;
29220
+ }
29221
+ function serializeReplayFixtureRecord(record2) {
29222
+ return JSON.stringify(toWireRecord(record2));
29223
+ }
29224
+ async function appendReplayFixtureRecord(fixturesPath, record2) {
29225
+ const absolutePath = path17.resolve(fixturesPath);
29226
+ const previous = appendQueues.get(absolutePath) ?? Promise.resolve();
29227
+ const next = previous.then(async () => {
29228
+ await mkdir9(path17.dirname(absolutePath), { recursive: true });
29229
+ await writeFile4(absolutePath, `${serializeReplayFixtureRecord(record2)}
29230
+ `, {
29231
+ encoding: "utf8",
29232
+ flag: "a"
29233
+ });
29234
+ });
29235
+ appendQueues.set(
29236
+ absolutePath,
29237
+ next.catch(() => {
29238
+ })
29239
+ );
29240
+ await next;
29241
+ }
29242
+ function findReplayFixtureRecord(records, lookup) {
29243
+ const matches = records.filter((record2) => replayRecordMatches(record2, lookup));
29244
+ if (matches.length === 1) {
29245
+ return matches[0];
29246
+ }
29247
+ const key = formatLookupKey(lookup);
29248
+ if (matches.length === 0) {
29249
+ throw new Error(`Replay fixture lookup found no record for ${key}`);
29250
+ }
29251
+ throw new Error(`Replay fixture lookup found ${matches.length} duplicate records for ${key}`);
29252
+ }
29253
+ function replayRecordMatches(record2, lookup) {
29254
+ if (lookup.suite && record2.suite !== lookup.suite) {
29255
+ return false;
29256
+ }
29257
+ if (!lookup.suite && !lookup.evalPath) {
29258
+ throw new Error("Replay fixture lookup requires suite or eval_path identity");
29259
+ }
29260
+ if (record2.evalPath && !lookup.evalPath) {
29261
+ return false;
29262
+ }
29263
+ if (record2.evalPath && lookup.evalPath && !sameEvalPath(record2.evalPath, lookup.evalPath)) {
29264
+ return false;
29265
+ }
29266
+ return record2.testId === lookup.testId && record2.sourceTarget === lookup.sourceTarget && record2.attempt === (lookup.attempt ?? 0) && (record2.variant ?? null) === (lookup.variant ?? null);
29267
+ }
29268
+ function normalizeEvalPath(value) {
29269
+ return value.replace(/\\/g, "/").replace(/^\.\//, "").replace(/\/+/g, "/");
29270
+ }
29271
+ function sameEvalPath(recordPath, lookupPath) {
29272
+ const record2 = normalizeEvalPath(recordPath);
29273
+ const lookup = normalizeEvalPath(lookupPath);
29274
+ if (record2 === lookup) {
29275
+ return true;
29276
+ }
29277
+ return path17.isAbsolute(lookupPath) && lookup.endsWith(`/${record2}`);
29278
+ }
29279
+ function formatLookupKey(lookup) {
29280
+ const parts = [
29281
+ lookup.suite ? `suite=${lookup.suite}` : void 0,
29282
+ lookup.evalPath ? `eval_path=${lookup.evalPath}` : void 0,
29283
+ `test_id=${lookup.testId}`,
29284
+ `source_target=${lookup.sourceTarget}`,
29285
+ `attempt=${lookup.attempt ?? 0}`,
29286
+ `variant=${lookup.variant ?? "<none>"}`
29287
+ ].filter((part) => part !== void 0);
29288
+ return parts.join(" ");
29289
+ }
29290
+ function replayFixtureRecordToProviderResponse(record2) {
29291
+ return {
29292
+ output: record2.output,
29293
+ tokenUsage: record2.tokenUsage,
29294
+ costUsd: record2.costUsd,
29295
+ durationMs: record2.durationMs,
29296
+ startTime: record2.startTime,
29297
+ endTime: record2.endTime,
29298
+ raw: {
29299
+ replay_fixture: dropUndefined({
29300
+ fixture_id: record2.fixtureId,
29301
+ suite: record2.suite,
29302
+ eval_path: record2.evalPath,
29303
+ test_id: record2.testId,
29304
+ source_target: record2.sourceTarget,
29305
+ attempt: record2.attempt,
29306
+ variant: record2.variant,
29307
+ source: record2.source,
29308
+ redaction: record2.redaction,
29309
+ transcript: record2.transcript
29310
+ })
29311
+ }
29312
+ };
29313
+ }
29314
+ function buildReplayFixtureRecord({
29315
+ evalCase,
29316
+ evalFilePath,
29317
+ repoRoot,
29318
+ target,
29319
+ sourceTarget,
29320
+ attempt,
29321
+ variant,
29322
+ response,
29323
+ now = () => /* @__PURE__ */ new Date()
29324
+ }) {
29325
+ const suite = evalCase.suite?.trim();
29326
+ if (!suite) {
29327
+ throw new Error(`Cannot record replay fixture for test '${evalCase.id}': suite is missing`);
29328
+ }
29329
+ const evalPath = path17.relative(repoRoot, path17.resolve(evalFilePath)).replace(/\\/g, "/");
29330
+ const resolvedSourceTarget = sourceTarget?.trim() || target.name;
29331
+ const fixtureId = buildFixtureId({
29332
+ suite,
29333
+ evalPath,
29334
+ testId: evalCase.id,
29335
+ sourceTarget: resolvedSourceTarget,
29336
+ attempt,
29337
+ variant
29338
+ });
29339
+ return {
29340
+ schemaVersion: REPLAY_FIXTURE_SCHEMA_VERSION,
29341
+ suite,
29342
+ evalPath,
29343
+ testId: evalCase.id,
29344
+ sourceTarget: resolvedSourceTarget,
29345
+ attempt,
29346
+ variant,
29347
+ fixtureId,
29348
+ recordedAt: now().toISOString(),
29349
+ source: buildSourceMetadata(target, resolvedSourceTarget),
29350
+ output: response.output ?? [],
29351
+ transcript: extractTranscript(response.raw),
29352
+ tokenUsage: response.tokenUsage,
29353
+ costUsd: response.costUsd,
29354
+ durationMs: response.durationMs,
29355
+ startTime: response.startTime,
29356
+ endTime: response.endTime
29357
+ };
29358
+ }
29359
+ function buildFixtureId(input) {
29360
+ const stable = [
29361
+ input.suite,
29362
+ input.evalPath,
29363
+ input.testId,
29364
+ input.sourceTarget,
29365
+ String(input.attempt),
29366
+ input.variant ?? ""
29367
+ ].join("\0");
29368
+ const digest = createHash("sha256").update(stable).digest("hex").slice(0, 12);
29369
+ return `${input.sourceTarget}-${input.testId}-${digest}`;
29370
+ }
29371
+ function buildSourceMetadata(target, sourceTarget) {
29372
+ return dropUndefined({
29373
+ provider: target.kind,
29374
+ target_name: sourceTarget,
29375
+ resolved_target: target.name,
29376
+ model: extractModelName(target)
29377
+ });
29378
+ }
29379
+ function extractModelName(target) {
29380
+ const config2 = target.config;
29381
+ if (typeof config2.model === "string") {
29382
+ return config2.model;
29383
+ }
29384
+ if (typeof config2.deploymentName === "string") {
29385
+ return config2.deploymentName;
29386
+ }
29387
+ return void 0;
29388
+ }
29389
+ function extractTranscript(raw) {
29390
+ if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
29391
+ return void 0;
29392
+ }
29393
+ const transcript = raw.transcript;
29394
+ return transcript;
29395
+ }
29396
+ var ReplayProvider = class {
29397
+ id;
29398
+ kind = "replay";
29399
+ targetName;
29400
+ supportsBatch = true;
29401
+ config;
29402
+ constructor(targetName, config2) {
29403
+ this.id = `replay:${targetName}`;
29404
+ this.targetName = targetName;
29405
+ this.config = config2;
29406
+ }
29407
+ async invoke(request) {
29408
+ const records = await readReplayFixtureRecords(this.config.fixturesPath);
29409
+ return this.responseForRequest(records, request);
29410
+ }
29411
+ async invokeBatch(requests) {
29412
+ const records = await readReplayFixtureRecords(this.config.fixturesPath);
29413
+ return requests.map((request) => this.responseForRequest(records, request));
29414
+ }
29415
+ responseForRequest(records, request) {
29416
+ const testId = request.evalCaseId;
29417
+ if (!testId) {
29418
+ throw new Error("Replay provider requires evalCaseId on provider requests");
29419
+ }
29420
+ const record2 = findReplayFixtureRecord(records, {
29421
+ suite: this.config.suite ?? request.suite,
29422
+ evalPath: this.config.evalPath ?? request.evalFilePath,
29423
+ testId,
29424
+ sourceTarget: this.config.sourceTarget,
29425
+ attempt: request.attempt ?? 0,
29426
+ variant: this.config.variant
29427
+ });
29428
+ return replayFixtureRecordToProviderResponse(record2);
29429
+ }
29430
+ };
28777
29431
  async function pathExists(target) {
28778
29432
  try {
28779
29433
  await access3(target, constants3.F_OK);
@@ -28783,13 +29437,13 @@ async function pathExists(target) {
28783
29437
  }
28784
29438
  }
28785
29439
  async function ensureDir(target) {
28786
- await mkdir9(target, { recursive: true });
29440
+ await mkdir10(target, { recursive: true });
28787
29441
  }
28788
29442
  async function readDirEntries(target) {
28789
29443
  const entries = await readdir3(target, { withFileTypes: true });
28790
29444
  return entries.map((entry) => ({
28791
29445
  name: entry.name,
28792
- absolutePath: path17.join(target, entry.name),
29446
+ absolutePath: path18.join(target, entry.name),
28793
29447
  isDirectory: entry.isDirectory()
28794
29448
  }));
28795
29449
  }
@@ -28803,7 +29457,7 @@ async function removeIfExists(target) {
28803
29457
  }
28804
29458
  }
28805
29459
  function pathToFileUri2(filePath) {
28806
- const absolutePath = path18.isAbsolute(filePath) ? filePath : path18.resolve(filePath);
29460
+ const absolutePath = path19.isAbsolute(filePath) ? filePath : path19.resolve(filePath);
28807
29461
  const normalizedPath = absolutePath.replace(/\\/g, "/");
28808
29462
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
28809
29463
  return `file:///${normalizedPath}`;
@@ -28895,8 +29549,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
28895
29549
  });
28896
29550
  }
28897
29551
  function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
28898
- const requestLines = requestFiles.map((file2, index) => `${index + 1}. messages/${path19.basename(file2)}`).join("\n");
28899
- const responseList = responseFiles.map((file2) => `"${path19.basename(file2)}"`).join(", ");
29552
+ const requestLines = requestFiles.map((file2, index) => `${index + 1}. messages/${path20.basename(file2)}`).join("\n");
29553
+ const responseList = responseFiles.map((file2) => `"${path20.basename(file2)}"`).join(", ");
28900
29554
  return renderTemplate2(templateContent, {
28901
29555
  requestFiles: requestLines,
28902
29556
  responseList
@@ -28935,7 +29589,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
28935
29589
  const maxAttempts = 10;
28936
29590
  while (attempts < maxAttempts) {
28937
29591
  try {
28938
- const content = await readFile6(responseFileFinal, { encoding: "utf8" });
29592
+ const content = await readFile7(responseFileFinal, { encoding: "utf8" });
28939
29593
  if (!silent) {
28940
29594
  process.stdout.write(`${content}
28941
29595
  `);
@@ -28956,7 +29610,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
28956
29610
  }
28957
29611
  async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
28958
29612
  if (!silent) {
28959
- const fileList = responseFilesFinal.map((file2) => path20.basename(file2)).join(", ");
29613
+ const fileList = responseFilesFinal.map((file2) => path21.basename(file2)).join(", ");
28960
29614
  console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
28961
29615
  }
28962
29616
  const deadline = Date.now() + timeoutMs;
@@ -28965,7 +29619,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
28965
29619
  while (pending.size > 0) {
28966
29620
  if (Date.now() >= deadline) {
28967
29621
  if (!silent) {
28968
- const remaining = [...pending].map((f) => path20.basename(f)).join(", ");
29622
+ const remaining = [...pending].map((f) => path21.basename(f)).join(", ");
28969
29623
  console.error(
28970
29624
  `error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
28971
29625
  );
@@ -28992,7 +29646,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
28992
29646
  const maxAttempts = 10;
28993
29647
  while (attempts < maxAttempts) {
28994
29648
  try {
28995
- const content = await readFile6(file2, { encoding: "utf8" });
29649
+ const content = await readFile7(file2, { encoding: "utf8" });
28996
29650
  if (!silent) {
28997
29651
  process.stdout.write(`${content}
28998
29652
  `);
@@ -29016,7 +29670,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
29016
29670
  var DEFAULT_ALIVE_FILENAME = ".alive";
29017
29671
  function getDefaultSubagentRoot(vscodeCmd = "code") {
29018
29672
  const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
29019
- return path21.join(getSubagentsRoot(), folder);
29673
+ return path222.join(getSubagentsRoot(), folder);
29020
29674
  }
29021
29675
  var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
29022
29676
  var execAsync3 = promisify3(exec);
@@ -29082,12 +29736,12 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
29082
29736
  await raceSpawnError(child);
29083
29737
  return true;
29084
29738
  }
29085
- const aliveFile = path222.join(subagentDir, DEFAULT_ALIVE_FILENAME);
29739
+ const aliveFile = path23.join(subagentDir, DEFAULT_ALIVE_FILENAME);
29086
29740
  await removeIfExists(aliveFile);
29087
- const githubAgentsDir = path222.join(subagentDir, ".github", "agents");
29088
- await mkdir10(githubAgentsDir, { recursive: true });
29089
- const wakeupDst = path222.join(githubAgentsDir, "wakeup.md");
29090
- await writeFile4(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
29741
+ const githubAgentsDir = path23.join(subagentDir, ".github", "agents");
29742
+ await mkdir11(githubAgentsDir, { recursive: true });
29743
+ const wakeupDst = path23.join(githubAgentsDir, "wakeup.md");
29744
+ await writeFile5(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
29091
29745
  const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
29092
29746
  label: "open-workspace"
29093
29747
  });
@@ -29099,7 +29753,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
29099
29753
  "chat",
29100
29754
  "-m",
29101
29755
  wakeupChatId,
29102
- `create a file named .alive in the ${path222.basename(subagentDir)} folder`
29756
+ `create a file named .alive in the ${path23.basename(subagentDir)} folder`
29103
29757
  ];
29104
29758
  const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
29105
29759
  await raceSpawnError(wakeupChild);
@@ -29114,27 +29768,27 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
29114
29768
  return true;
29115
29769
  }
29116
29770
  async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
29117
- const workspacePath = path222.join(subagentDir, `${path222.basename(subagentDir)}.code-workspace`);
29118
- const messagesDir = path222.join(subagentDir, "messages");
29119
- await mkdir10(messagesDir, { recursive: true });
29120
- const reqFile = path222.join(messagesDir, `${timestamp}_req.md`);
29121
- await writeFile4(reqFile, requestInstructions, { encoding: "utf8" });
29771
+ const workspacePath = path23.join(subagentDir, `${path23.basename(subagentDir)}.code-workspace`);
29772
+ const messagesDir = path23.join(subagentDir, "messages");
29773
+ await mkdir11(messagesDir, { recursive: true });
29774
+ const reqFile = path23.join(messagesDir, `${timestamp}_req.md`);
29775
+ await writeFile5(reqFile, requestInstructions, { encoding: "utf8" });
29122
29776
  const reqUri = pathToFileUri2(reqFile);
29123
29777
  const chatArgs = ["-r", "chat", "-m", chatId];
29124
29778
  for (const attachment of attachmentPaths) {
29125
29779
  chatArgs.push("-a", attachment);
29126
29780
  }
29127
29781
  chatArgs.push("-a", reqFile);
29128
- chatArgs.push(`Follow instructions in [${path222.basename(reqFile)}](${reqUri})`);
29782
+ chatArgs.push(`Follow instructions in [${path23.basename(reqFile)}](${reqUri})`);
29129
29783
  const workspaceReady = await ensureWorkspaceFocused(
29130
29784
  workspacePath,
29131
- path222.basename(subagentDir),
29785
+ path23.basename(subagentDir),
29132
29786
  subagentDir,
29133
29787
  vscodeCmd
29134
29788
  );
29135
29789
  if (!workspaceReady) {
29136
29790
  throw new Error(
29137
- `VS Code workspace '${path222.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
29791
+ `VS Code workspace '${path23.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
29138
29792
  );
29139
29793
  }
29140
29794
  await sleep(500);
@@ -29142,9 +29796,9 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
29142
29796
  await raceSpawnError(child);
29143
29797
  }
29144
29798
  async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
29145
- const workspacePath = path222.join(subagentDir, `${path222.basename(subagentDir)}.code-workspace`);
29146
- const messagesDir = path222.join(subagentDir, "messages");
29147
- await mkdir10(messagesDir, { recursive: true });
29799
+ const workspacePath = path23.join(subagentDir, `${path23.basename(subagentDir)}.code-workspace`);
29800
+ const messagesDir = path23.join(subagentDir, "messages");
29801
+ await mkdir11(messagesDir, { recursive: true });
29148
29802
  const chatArgs = ["-r", "chat", "-m", chatId];
29149
29803
  for (const attachment of attachmentPaths) {
29150
29804
  chatArgs.push("-a", attachment);
@@ -29152,13 +29806,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
29152
29806
  chatArgs.push(chatInstruction);
29153
29807
  const workspaceReady = await ensureWorkspaceFocused(
29154
29808
  workspacePath,
29155
- path222.basename(subagentDir),
29809
+ path23.basename(subagentDir),
29156
29810
  subagentDir,
29157
29811
  vscodeCmd
29158
29812
  );
29159
29813
  if (!workspaceReady) {
29160
29814
  throw new Error(
29161
- `VS Code workspace '${path222.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
29815
+ `VS Code workspace '${path23.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
29162
29816
  );
29163
29817
  }
29164
29818
  await sleep(500);
@@ -29180,10 +29834,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
29180
29834
  }
29181
29835
  const transformedFolders = workspace.folders.map((folder) => {
29182
29836
  const folderPath = folder.path;
29183
- if (path23.isAbsolute(folderPath)) {
29837
+ if (path24.isAbsolute(folderPath)) {
29184
29838
  return folder;
29185
29839
  }
29186
- const absolutePath = path23.resolve(templateDir, folderPath);
29840
+ const absolutePath = path24.resolve(templateDir, folderPath);
29187
29841
  return {
29188
29842
  ...folder,
29189
29843
  path: absolutePath
@@ -29205,19 +29859,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
29205
29859
  if (locationMap && typeof locationMap === "object") {
29206
29860
  const transformedMap = {};
29207
29861
  for (const [locationPath, value] of Object.entries(locationMap)) {
29208
- const isAbsolute = path23.isAbsolute(locationPath);
29862
+ const isAbsolute = path24.isAbsolute(locationPath);
29209
29863
  if (isAbsolute) {
29210
29864
  transformedMap[locationPath] = value;
29211
29865
  } else {
29212
29866
  const firstGlobIndex = locationPath.search(/[*]/);
29213
29867
  if (firstGlobIndex === -1) {
29214
- const resolvedPath = path23.resolve(templateDir, locationPath).replace(/\\/g, "/");
29868
+ const resolvedPath = path24.resolve(templateDir, locationPath).replace(/\\/g, "/");
29215
29869
  transformedMap[resolvedPath] = value;
29216
29870
  } else {
29217
29871
  const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
29218
29872
  const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
29219
29873
  const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
29220
- const resolvedPath = (path23.resolve(templateDir, basePath) + patternPath).replace(
29874
+ const resolvedPath = (path24.resolve(templateDir, basePath) + patternPath).replace(
29221
29875
  /\\/g,
29222
29876
  "/"
29223
29877
  );
@@ -29256,7 +29910,7 @@ async function findUnlockedSubagent(subagentRoot) {
29256
29910
  number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
29257
29911
  })).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
29258
29912
  for (const subagent of subagents) {
29259
- const lockFile = path24.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
29913
+ const lockFile = path25.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
29260
29914
  if (!await pathExists(lockFile)) {
29261
29915
  return subagent.absolutePath;
29262
29916
  }
@@ -29266,7 +29920,7 @@ async function findUnlockedSubagent(subagentRoot) {
29266
29920
  async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
29267
29921
  let workspaceContent;
29268
29922
  if (workspaceTemplate) {
29269
- const workspaceSrc = path24.resolve(workspaceTemplate);
29923
+ const workspaceSrc = path25.resolve(workspaceTemplate);
29270
29924
  if (!await pathExists(workspaceSrc)) {
29271
29925
  throw new Error(`workspace template not found: ${workspaceSrc}`);
29272
29926
  }
@@ -29274,18 +29928,18 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
29274
29928
  if (!stats.isFile()) {
29275
29929
  throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
29276
29930
  }
29277
- const templateText = await readFile7(workspaceSrc, "utf8");
29931
+ const templateText = await readFile8(workspaceSrc, "utf8");
29278
29932
  workspaceContent = JSON.parse(templateText);
29279
29933
  } else {
29280
29934
  workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
29281
29935
  }
29282
- const workspaceName = `${path24.basename(subagentDir)}.code-workspace`;
29283
- const workspaceDst = path24.join(subagentDir, workspaceName);
29284
- const templateDir = workspaceTemplate ? path24.dirname(path24.resolve(workspaceTemplate)) : subagentDir;
29936
+ const workspaceName = `${path25.basename(subagentDir)}.code-workspace`;
29937
+ const workspaceDst = path25.join(subagentDir, workspaceName);
29938
+ const templateDir = workspaceTemplate ? path25.dirname(path25.resolve(workspaceTemplate)) : subagentDir;
29285
29939
  const workspaceJson = JSON.stringify(workspaceContent, null, 2);
29286
29940
  let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
29287
29941
  if (cwd) {
29288
- const absCwd = path24.resolve(cwd);
29942
+ const absCwd = path25.resolve(cwd);
29289
29943
  const parsed = JSON.parse(transformedContent);
29290
29944
  const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
29291
29945
  if (!alreadyPresent) {
@@ -29293,36 +29947,36 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
29293
29947
  transformedContent = JSON.stringify(parsed, null, 2);
29294
29948
  }
29295
29949
  }
29296
- await writeFile5(workspaceDst, transformedContent, "utf8");
29297
- const messagesDir = path24.join(subagentDir, "messages");
29298
- await mkdir11(messagesDir, { recursive: true });
29950
+ await writeFile6(workspaceDst, transformedContent, "utf8");
29951
+ const messagesDir = path25.join(subagentDir, "messages");
29952
+ await mkdir12(messagesDir, { recursive: true });
29299
29953
  return { workspace: workspaceDst, messagesDir };
29300
29954
  }
29301
29955
  async function createSubagentLock(subagentDir) {
29302
- const messagesDir = path24.join(subagentDir, "messages");
29956
+ const messagesDir = path25.join(subagentDir, "messages");
29303
29957
  if (await pathExists(messagesDir)) {
29304
29958
  const files = await readdir4(messagesDir);
29305
29959
  await Promise.all(
29306
29960
  files.map(async (file2) => {
29307
- const target = path24.join(messagesDir, file2);
29961
+ const target = path25.join(messagesDir, file2);
29308
29962
  await removeIfExists(target);
29309
29963
  })
29310
29964
  );
29311
29965
  }
29312
- const githubAgentsDir = path24.join(subagentDir, ".github", "agents");
29966
+ const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
29313
29967
  if (await pathExists(githubAgentsDir)) {
29314
29968
  const agentFiles = await readdir4(githubAgentsDir);
29315
29969
  const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
29316
29970
  await Promise.all(
29317
- agentFiles.filter((file2) => file2.endsWith(".md") && !preservedFiles.has(file2)).map((file2) => removeIfExists(path24.join(githubAgentsDir, file2)))
29971
+ agentFiles.filter((file2) => file2.endsWith(".md") && !preservedFiles.has(file2)).map((file2) => removeIfExists(path25.join(githubAgentsDir, file2)))
29318
29972
  );
29319
29973
  }
29320
- const lockFile = path24.join(subagentDir, DEFAULT_LOCK_NAME);
29321
- await writeFile5(lockFile, "", { encoding: "utf8" });
29974
+ const lockFile = path25.join(subagentDir, DEFAULT_LOCK_NAME);
29975
+ await writeFile6(lockFile, "", { encoding: "utf8" });
29322
29976
  return lockFile;
29323
29977
  }
29324
29978
  async function removeSubagentLock(subagentDir) {
29325
- const lockFile = path24.join(subagentDir, DEFAULT_LOCK_NAME);
29979
+ const lockFile = path25.join(subagentDir, DEFAULT_LOCK_NAME);
29326
29980
  await removeIfExists(lockFile);
29327
29981
  }
29328
29982
  async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
@@ -29342,9 +29996,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
29342
29996
  return 1;
29343
29997
  }
29344
29998
  if (promptFile) {
29345
- const githubAgentsDir = path24.join(subagentDir, ".github", "agents");
29346
- await mkdir11(githubAgentsDir, { recursive: true });
29347
- const agentFile = path24.join(githubAgentsDir, `${chatId}.md`);
29999
+ const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
30000
+ await mkdir12(githubAgentsDir, { recursive: true });
30001
+ const agentFile = path25.join(githubAgentsDir, `${chatId}.md`);
29348
30002
  try {
29349
30003
  await copyFile(promptFile, agentFile);
29350
30004
  } catch (error40) {
@@ -29361,7 +30015,7 @@ async function resolvePromptFile(promptFile) {
29361
30015
  if (!promptFile) {
29362
30016
  return void 0;
29363
30017
  }
29364
- const resolvedPrompt = path25.resolve(promptFile);
30018
+ const resolvedPrompt = path26.resolve(promptFile);
29365
30019
  if (!await pathExists(resolvedPrompt)) {
29366
30020
  throw new Error(`Prompt file not found: ${resolvedPrompt}`);
29367
30021
  }
@@ -29377,7 +30031,7 @@ async function resolveAttachments(extraAttachments) {
29377
30031
  }
29378
30032
  const resolved = [];
29379
30033
  for (const attachment of extraAttachments) {
29380
- const resolvedPath = path25.resolve(attachment);
30034
+ const resolvedPath = path26.resolve(attachment);
29381
30035
  if (!await pathExists(resolvedPath)) {
29382
30036
  throw new Error(`Attachment not found: ${resolvedPath}`);
29383
30037
  }
@@ -29419,7 +30073,7 @@ async function dispatchAgentSession(options) {
29419
30073
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
29420
30074
  };
29421
30075
  }
29422
- const subagentName = path25.basename(subagentDir);
30076
+ const subagentName = path26.basename(subagentDir);
29423
30077
  const chatId = Math.random().toString(16).slice(2, 10);
29424
30078
  const preparationResult = await prepareSubagentDirectory(
29425
30079
  subagentDir,
@@ -29447,9 +30101,9 @@ async function dispatchAgentSession(options) {
29447
30101
  };
29448
30102
  }
29449
30103
  const timestamp = generateTimestamp();
29450
- const messagesDir = path25.join(subagentDir, "messages");
29451
- const responseFileTmp = path25.join(messagesDir, `${timestamp}_res.tmp.md`);
29452
- const responseFileFinal = path25.join(messagesDir, `${timestamp}_res.md`);
30104
+ const messagesDir = path26.join(subagentDir, "messages");
30105
+ const responseFileTmp = path26.join(messagesDir, `${timestamp}_res.tmp.md`);
30106
+ const responseFileFinal = path26.join(messagesDir, `${timestamp}_res.md`);
29453
30107
  const requestInstructions = createRequestPrompt(
29454
30108
  userQuery,
29455
30109
  responseFileTmp,
@@ -29554,7 +30208,7 @@ async function dispatchBatchAgent(options) {
29554
30208
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
29555
30209
  };
29556
30210
  }
29557
- subagentName = path25.basename(subagentDir);
30211
+ subagentName = path26.basename(subagentDir);
29558
30212
  const chatId = Math.random().toString(16).slice(2, 10);
29559
30213
  const preparationResult = await prepareSubagentDirectory(
29560
30214
  subagentDir,
@@ -29585,24 +30239,24 @@ async function dispatchBatchAgent(options) {
29585
30239
  };
29586
30240
  }
29587
30241
  const timestamp = generateTimestamp();
29588
- const messagesDir = path25.join(subagentDir, "messages");
30242
+ const messagesDir = path26.join(subagentDir, "messages");
29589
30243
  requestFiles = userQueries.map(
29590
- (_, index) => path25.join(messagesDir, `${timestamp}_${index}_req.md`)
30244
+ (_, index) => path26.join(messagesDir, `${timestamp}_${index}_req.md`)
29591
30245
  );
29592
30246
  const responseTmpFiles = userQueries.map(
29593
- (_, index) => path25.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
30247
+ (_, index) => path26.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
29594
30248
  );
29595
30249
  responseFilesFinal = userQueries.map(
29596
- (_, index) => path25.join(messagesDir, `${timestamp}_${index}_res.md`)
30250
+ (_, index) => path26.join(messagesDir, `${timestamp}_${index}_res.md`)
29597
30251
  );
29598
- const orchestratorFile = path25.join(messagesDir, `${timestamp}_orchestrator.md`);
30252
+ const orchestratorFile = path26.join(messagesDir, `${timestamp}_orchestrator.md`);
29599
30253
  if (!dryRun) {
29600
30254
  await Promise.all(
29601
30255
  userQueries.map((query, index) => {
29602
30256
  const reqFile = requestFiles[index];
29603
30257
  const tmpFile = responseTmpFiles[index];
29604
30258
  const finalFile = responseFilesFinal[index];
29605
- return writeFile6(
30259
+ return writeFile7(
29606
30260
  reqFile,
29607
30261
  createBatchRequestPrompt(query, tmpFile, finalFile, batchRequestTemplateContent),
29608
30262
  { encoding: "utf8" }
@@ -29614,7 +30268,7 @@ async function dispatchBatchAgent(options) {
29614
30268
  responseFilesFinal,
29615
30269
  orchestratorTemplateContent
29616
30270
  );
29617
- await writeFile6(orchestratorFile, orchestratorContent, { encoding: "utf8" });
30271
+ await writeFile7(orchestratorFile, orchestratorContent, { encoding: "utf8" });
29618
30272
  }
29619
30273
  const chatAttachments = [orchestratorFile, ...attachments];
29620
30274
  const orchestratorUri = pathToFileUri2(orchestratorFile);
@@ -29708,7 +30362,7 @@ async function provisionSubagents(options) {
29708
30362
  if (!Number.isInteger(subagents) || subagents < 1) {
29709
30363
  throw new Error("subagents must be a positive integer");
29710
30364
  }
29711
- const targetPath = path26.resolve(targetRoot);
30365
+ const targetPath = path27.resolve(targetRoot);
29712
30366
  if (!dryRun) {
29713
30367
  await ensureDir(targetPath);
29714
30368
  }
@@ -29728,7 +30382,7 @@ async function provisionSubagents(options) {
29728
30382
  continue;
29729
30383
  }
29730
30384
  highestNumber = Math.max(highestNumber, parsed);
29731
- const lockFile = path26.join(entry.absolutePath, lockName);
30385
+ const lockFile = path27.join(entry.absolutePath, lockName);
29732
30386
  const locked = await pathExists(lockFile);
29733
30387
  if (locked) {
29734
30388
  lockedSubagents.add(entry.absolutePath);
@@ -29745,10 +30399,10 @@ async function provisionSubagents(options) {
29745
30399
  break;
29746
30400
  }
29747
30401
  const subagentDir = subagent.absolutePath;
29748
- const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
29749
- const lockFile = path26.join(subagentDir, lockName);
29750
- const workspaceDst = path26.join(subagentDir, `${path26.basename(subagentDir)}.code-workspace`);
29751
- const wakeupDst = path26.join(githubAgentsDir, "wakeup.md");
30402
+ const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
30403
+ const lockFile = path27.join(subagentDir, lockName);
30404
+ const workspaceDst = path27.join(subagentDir, `${path27.basename(subagentDir)}.code-workspace`);
30405
+ const wakeupDst = path27.join(githubAgentsDir, "wakeup.md");
29752
30406
  const isLocked = await pathExists(lockFile);
29753
30407
  if (isLocked && !force) {
29754
30408
  continue;
@@ -29757,8 +30411,8 @@ async function provisionSubagents(options) {
29757
30411
  if (!dryRun) {
29758
30412
  await removeIfExists(lockFile);
29759
30413
  await ensureDir(githubAgentsDir);
29760
- await writeFile7(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
29761
- await writeFile7(wakeupDst, wakeupContent, "utf8");
30414
+ await writeFile8(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
30415
+ await writeFile8(wakeupDst, wakeupContent, "utf8");
29762
30416
  }
29763
30417
  created.push(subagentDir);
29764
30418
  lockedSubagents.delete(subagentDir);
@@ -29768,8 +30422,8 @@ async function provisionSubagents(options) {
29768
30422
  if (!isLocked && force) {
29769
30423
  if (!dryRun) {
29770
30424
  await ensureDir(githubAgentsDir);
29771
- await writeFile7(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
29772
- await writeFile7(wakeupDst, wakeupContent, "utf8");
30425
+ await writeFile8(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
30426
+ await writeFile8(wakeupDst, wakeupContent, "utf8");
29773
30427
  }
29774
30428
  created.push(subagentDir);
29775
30429
  subagentsProvisioned += 1;
@@ -29777,8 +30431,8 @@ async function provisionSubagents(options) {
29777
30431
  }
29778
30432
  if (!dryRun && !await pathExists(workspaceDst)) {
29779
30433
  await ensureDir(githubAgentsDir);
29780
- await writeFile7(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
29781
- await writeFile7(wakeupDst, wakeupContent, "utf8");
30434
+ await writeFile8(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
30435
+ await writeFile8(wakeupDst, wakeupContent, "utf8");
29782
30436
  }
29783
30437
  skippedExisting.push(subagentDir);
29784
30438
  subagentsProvisioned += 1;
@@ -29786,15 +30440,15 @@ async function provisionSubagents(options) {
29786
30440
  let nextIndex = highestNumber;
29787
30441
  while (subagentsProvisioned < subagents) {
29788
30442
  nextIndex += 1;
29789
- const subagentDir = path26.join(targetPath, `subagent-${nextIndex}`);
29790
- const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
29791
- const workspaceDst = path26.join(subagentDir, `${path26.basename(subagentDir)}.code-workspace`);
29792
- const wakeupDst = path26.join(githubAgentsDir, "wakeup.md");
30443
+ const subagentDir = path27.join(targetPath, `subagent-${nextIndex}`);
30444
+ const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
30445
+ const workspaceDst = path27.join(subagentDir, `${path27.basename(subagentDir)}.code-workspace`);
30446
+ const wakeupDst = path27.join(githubAgentsDir, "wakeup.md");
29793
30447
  if (!dryRun) {
29794
30448
  await ensureDir(subagentDir);
29795
30449
  await ensureDir(githubAgentsDir);
29796
- await writeFile7(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
29797
- await writeFile7(wakeupDst, wakeupContent, "utf8");
30450
+ await writeFile8(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
30451
+ await writeFile8(wakeupDst, wakeupContent, "utf8");
29798
30452
  }
29799
30453
  created.push(subagentDir);
29800
30454
  subagentsProvisioned += 1;
@@ -29974,7 +30628,7 @@ var VSCodeProvider = class {
29974
30628
  async function locateVSCodeExecutable(candidate) {
29975
30629
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
29976
30630
  if (includesPathSeparator) {
29977
- const resolved = path27.isAbsolute(candidate) ? candidate : path27.resolve(candidate);
30631
+ const resolved = path28.isAbsolute(candidate) ? candidate : path28.resolve(candidate);
29978
30632
  try {
29979
30633
  await access2(resolved, constants2.F_OK);
29980
30634
  return resolved;
@@ -30016,7 +30670,7 @@ function buildMandatoryPrereadBlock2(attachmentFiles) {
30016
30670
  return "";
30017
30671
  }
30018
30672
  const buildList = (files) => files.map((absolutePath) => {
30019
- const fileName = path27.basename(absolutePath);
30673
+ const fileName = path28.basename(absolutePath);
30020
30674
  const fileUri = pathToFileUri3(absolutePath);
30021
30675
  return `* [${fileName}](${fileUri})`;
30022
30676
  });
@@ -30037,7 +30691,7 @@ function collectAttachmentFiles(attachments) {
30037
30691
  }
30038
30692
  const unique = /* @__PURE__ */ new Map();
30039
30693
  for (const attachment of attachments) {
30040
- const absolutePath = path27.resolve(attachment);
30694
+ const absolutePath = path28.resolve(attachment);
30041
30695
  if (!unique.has(absolutePath)) {
30042
30696
  unique.set(absolutePath, absolutePath);
30043
30697
  }
@@ -30045,7 +30699,7 @@ function collectAttachmentFiles(attachments) {
30045
30699
  return Array.from(unique.values());
30046
30700
  }
30047
30701
  function pathToFileUri3(filePath) {
30048
- const absolutePath = path27.isAbsolute(filePath) ? filePath : path27.resolve(filePath);
30702
+ const absolutePath = path28.isAbsolute(filePath) ? filePath : path28.resolve(filePath);
30049
30703
  const normalizedPath = absolutePath.replace(/\\/g, "/");
30050
30704
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
30051
30705
  return `file:///${normalizedPath}`;
@@ -30058,7 +30712,7 @@ function normalizeAttachments(attachments) {
30058
30712
  }
30059
30713
  const deduped = /* @__PURE__ */ new Set();
30060
30714
  for (const attachment of attachments) {
30061
- deduped.add(path27.resolve(attachment));
30715
+ deduped.add(path28.resolve(attachment));
30062
30716
  }
30063
30717
  return Array.from(deduped);
30064
30718
  }
@@ -30067,7 +30721,7 @@ function mergeAttachments(all) {
30067
30721
  for (const list of all) {
30068
30722
  if (!list) continue;
30069
30723
  for (const inputFile of list) {
30070
- deduped.add(path27.resolve(inputFile));
30724
+ deduped.add(path28.resolve(inputFile));
30071
30725
  }
30072
30726
  }
30073
30727
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -30150,11 +30804,11 @@ async function fileExists2(filePath) {
30150
30804
  }
30151
30805
  }
30152
30806
  async function readTargetDefinitions(filePath) {
30153
- const absolutePath = path28.resolve(filePath);
30807
+ const absolutePath = path29.resolve(filePath);
30154
30808
  if (!await fileExists2(absolutePath)) {
30155
30809
  throw new Error(`targets.yaml not found at ${absolutePath}`);
30156
30810
  }
30157
- const raw = await readFile8(absolutePath, "utf8");
30811
+ const raw = await readFile9(absolutePath, "utf8");
30158
30812
  const parsed = parseYamlValue(raw);
30159
30813
  if (!isRecord(parsed)) {
30160
30814
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -30171,11 +30825,11 @@ function listTargetNames(definitions) {
30171
30825
  async function discoverProviders(registry2, baseDir) {
30172
30826
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
30173
30827
  const candidateDirs = [];
30174
- let dir = path29.resolve(baseDir);
30175
- const root = path29.parse(dir).root;
30828
+ let dir = path30.resolve(baseDir);
30829
+ const root = path30.parse(dir).root;
30176
30830
  while (dir !== root) {
30177
- candidateDirs.push(path29.join(dir, ".agentv", "providers"));
30178
- dir = path29.dirname(dir);
30831
+ candidateDirs.push(path30.join(dir, ".agentv", "providers"));
30832
+ dir = path30.dirname(dir);
30179
30833
  }
30180
30834
  let files = [];
30181
30835
  for (const providersDir of candidateDirs) {
@@ -30191,7 +30845,7 @@ async function discoverProviders(registry2, baseDir) {
30191
30845
  }
30192
30846
  const discoveredKinds = [];
30193
30847
  for (const filePath of files) {
30194
- const basename = path29.basename(filePath);
30848
+ const basename = path30.basename(filePath);
30195
30849
  const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
30196
30850
  if (registry2.has(kindName)) {
30197
30851
  continue;
@@ -30207,7 +30861,7 @@ async function discoverProviders(registry2, baseDir) {
30207
30861
  }
30208
30862
  function createBuiltinProviderRegistry() {
30209
30863
  const registry2 = new ProviderRegistry();
30210
- registry2.register("openai", (t) => new OpenAIProvider(t.name, t.config)).register("openrouter", (t) => new OpenRouterProvider(t.name, t.config)).register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("copilot-log", (t) => new CopilotLogProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-cli", (t) => new PiCliProvider(t.name, t.config)).register("claude-cli", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude-sdk", (t) => new ClaudeSdkProvider(t.name, t.config)).register("mock", (t) => new MockProvider(t.name, t.config)).register("agentv", (t) => new AgentvProvider(t.name, t.config)).register("vscode", (t) => new VSCodeProvider(t.name, t.config, "vscode")).register(
30864
+ registry2.register("openai", (t) => new OpenAIProvider(t.name, t.config)).register("openrouter", (t) => new OpenRouterProvider(t.name, t.config)).register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("copilot-log", (t) => new CopilotLogProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-cli", (t) => new PiCliProvider(t.name, t.config)).register("claude-cli", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude-sdk", (t) => new ClaudeSdkProvider(t.name, t.config)).register("mock", (t) => new MockProvider(t.name, t.config)).register("agentv", (t) => new AgentvProvider(t.name, t.config)).register("replay", (t) => new ReplayProvider(t.name, t.config)).register("vscode", (t) => new VSCodeProvider(t.name, t.config, "vscode")).register(
30211
30865
  "vscode-insiders",
30212
30866
  (t) => new VSCodeProvider(t.name, t.config, "vscode-insiders")
30213
30867
  );
@@ -30326,10 +30980,13 @@ function containsTemplateVariables(text) {
30326
30980
  return false;
30327
30981
  }
30328
30982
  async function executePromptTemplate(script, context, config2, timeoutMs) {
30983
+ const messages = context.trace?.messages ?? context.output ?? [];
30329
30984
  const payload = {
30330
30985
  criteria: context.evalCase.criteria,
30331
30986
  expectedOutput: context.evalCase.expected_output,
30332
- output: context.output ?? null,
30987
+ output: context.candidate,
30988
+ answer: context.candidate,
30989
+ messages,
30333
30990
  inputFiles: context.evalCase.file_paths,
30334
30991
  input: context.evalCase.input,
30335
30992
  metadata: context.evalCase.metadata ?? null,
@@ -30340,7 +30997,7 @@ async function executePromptTemplate(script, context, config2, timeoutMs) {
30340
30997
  };
30341
30998
  const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
30342
30999
  const scriptPath = script[script.length - 1];
30343
- const cwd = path30.dirname(scriptPath);
31000
+ const cwd = path31.dirname(scriptPath);
30344
31001
  try {
30345
31002
  const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
30346
31003
  const prompt = stdout.trim();
@@ -30622,11 +31279,11 @@ function createBuiltinRegistry() {
30622
31279
  async function discoverAssertions(registry2, baseDir) {
30623
31280
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
30624
31281
  const candidateDirs = [];
30625
- let dir = path31.resolve(baseDir);
30626
- const root = path31.parse(dir).root;
31282
+ let dir = path322.resolve(baseDir);
31283
+ const root = path322.parse(dir).root;
30627
31284
  while (dir !== root) {
30628
- candidateDirs.push(path31.join(dir, ".agentv", "assertions"));
30629
- dir = path31.dirname(dir);
31285
+ candidateDirs.push(path322.join(dir, ".agentv", "assertions"));
31286
+ dir = path322.dirname(dir);
30630
31287
  }
30631
31288
  let files = [];
30632
31289
  for (const assertionsDir of candidateDirs) {
@@ -30642,7 +31299,7 @@ async function discoverAssertions(registry2, baseDir) {
30642
31299
  }
30643
31300
  const discoveredTypes = [];
30644
31301
  for (const filePath of files) {
30645
- const basename = path31.basename(filePath);
31302
+ const basename = path322.basename(filePath);
30646
31303
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
30647
31304
  if (registry2.has(typeName)) {
30648
31305
  continue;
@@ -30661,12 +31318,12 @@ async function discoverAssertions(registry2, baseDir) {
30661
31318
  async function discoverGraders(registry2, baseDir) {
30662
31319
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
30663
31320
  const candidateDirs = [];
30664
- let dir = path322.resolve(baseDir);
30665
- const root = path322.parse(dir).root;
31321
+ let dir = path33.resolve(baseDir);
31322
+ const root = path33.parse(dir).root;
30666
31323
  while (dir !== root) {
30667
- candidateDirs.push(path322.join(dir, ".agentv", "graders"));
30668
- candidateDirs.push(path322.join(dir, ".agentv", "judges"));
30669
- dir = path322.dirname(dir);
31324
+ candidateDirs.push(path33.join(dir, ".agentv", "graders"));
31325
+ candidateDirs.push(path33.join(dir, ".agentv", "judges"));
31326
+ dir = path33.dirname(dir);
30670
31327
  }
30671
31328
  let files = [];
30672
31329
  for (const gradersDir of candidateDirs) {
@@ -30682,7 +31339,7 @@ async function discoverGraders(registry2, baseDir) {
30682
31339
  }
30683
31340
  const discoveredTypes = [];
30684
31341
  for (const filePath of files) {
30685
- const basename = path322.basename(filePath);
31342
+ const basename = path33.basename(filePath);
30686
31343
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
30687
31344
  if (registry2.has(typeName)) {
30688
31345
  continue;
@@ -30865,14 +31522,14 @@ async function isDirectory(filePath) {
30865
31522
  }
30866
31523
  function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
30867
31524
  const root = workspaceRoot ?? getWorkspacesRoot();
30868
- return path33.join(root, evalRunId, caseId);
31525
+ return path34.join(root, evalRunId, caseId);
30869
31526
  }
30870
31527
  async function copyDirectoryRecursive(src, dest) {
30871
- await mkdir13(dest, { recursive: true });
31528
+ await mkdir14(dest, { recursive: true });
30872
31529
  const entries = await readdir5(src, { withFileTypes: true });
30873
31530
  for (const entry of entries) {
30874
- const srcPath = path33.join(src, entry.name);
30875
- const destPath = path33.join(dest, entry.name);
31531
+ const srcPath = path34.join(src, entry.name);
31532
+ const destPath = path34.join(dest, entry.name);
30876
31533
  if (entry.name === ".git") {
30877
31534
  continue;
30878
31535
  }
@@ -30884,7 +31541,7 @@ async function copyDirectoryRecursive(src, dest) {
30884
31541
  }
30885
31542
  }
30886
31543
  async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
30887
- const resolvedTemplatePath = path33.resolve(templatePath);
31544
+ const resolvedTemplatePath = path34.resolve(templatePath);
30888
31545
  if (!await fileExists(resolvedTemplatePath)) {
30889
31546
  throw new TemplateNotFoundError(resolvedTemplatePath);
30890
31547
  }
@@ -30933,7 +31590,7 @@ async function cleanupWorkspace(workspacePath) {
30933
31590
  }
30934
31591
  async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
30935
31592
  const root = workspaceRoot ?? getWorkspacesRoot();
30936
- const evalDir = path33.join(root, evalRunId);
31593
+ const evalDir = path34.join(root, evalRunId);
30937
31594
  if (await fileExists(evalDir)) {
30938
31595
  await rm4(evalDir, { recursive: true, force: true });
30939
31596
  }
@@ -30986,14 +31643,14 @@ function computeWorkspaceFingerprint(repos) {
30986
31643
  const canonical = {
30987
31644
  repos: [...repos].sort((a, b) => (a.path ?? "").localeCompare(b.path ?? "")).map(normalizeRepoForFingerprint)
30988
31645
  };
30989
- return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
31646
+ return createHash2("sha256").update(JSON.stringify(canonical)).digest("hex");
30990
31647
  }
30991
31648
  async function copyDirectoryRecursive2(src, dest, skipDirs) {
30992
- await mkdir14(dest, { recursive: true });
31649
+ await mkdir15(dest, { recursive: true });
30993
31650
  const entries = await readdir6(src, { withFileTypes: true });
30994
31651
  for (const entry of entries) {
30995
- const srcPath = path34.join(src, entry.name);
30996
- const destPath = path34.join(dest, entry.name);
31652
+ const srcPath = path35.join(src, entry.name);
31653
+ const destPath = path35.join(dest, entry.name);
30997
31654
  if (entry.name === ".git") {
30998
31655
  continue;
30999
31656
  }
@@ -31026,8 +31683,8 @@ var WorkspacePoolManager = class {
31026
31683
  async acquireWorkspace(options) {
31027
31684
  const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
31028
31685
  const fingerprint = computeWorkspaceFingerprint(repos);
31029
- const poolDir = path34.join(this.poolRoot, fingerprint);
31030
- await mkdir14(poolDir, { recursive: true });
31686
+ const poolDir = path35.join(this.poolRoot, fingerprint);
31687
+ await mkdir15(poolDir, { recursive: true });
31031
31688
  const drifted = await this.checkDrift(poolDir, fingerprint);
31032
31689
  if (drifted) {
31033
31690
  console.warn(
@@ -31036,7 +31693,7 @@ var WorkspacePoolManager = class {
31036
31693
  await this.removeAllSlots(poolDir);
31037
31694
  }
31038
31695
  for (let i = 0; i < maxSlots; i++) {
31039
- const slotPath = path34.join(poolDir, `slot-${i}`);
31696
+ const slotPath = path35.join(poolDir, `slot-${i}`);
31040
31697
  const lockPath = `${slotPath}.lock`;
31041
31698
  const locked = await this.tryLock(lockPath);
31042
31699
  if (!locked) {
@@ -31054,7 +31711,7 @@ var WorkspacePoolManager = class {
31054
31711
  poolDir
31055
31712
  };
31056
31713
  }
31057
- await mkdir14(slotPath, { recursive: true });
31714
+ await mkdir15(slotPath, { recursive: true });
31058
31715
  if (templatePath) {
31059
31716
  await copyDirectoryRecursive2(templatePath, slotPath);
31060
31717
  }
@@ -31091,14 +31748,14 @@ var WorkspacePoolManager = class {
31091
31748
  async tryLock(lockPath) {
31092
31749
  for (let attempt = 0; attempt < 3; attempt++) {
31093
31750
  try {
31094
- await writeFile8(lockPath, String(process.pid), { flag: "wx" });
31751
+ await writeFile9(lockPath, String(process.pid), { flag: "wx" });
31095
31752
  return true;
31096
31753
  } catch (err) {
31097
31754
  if (err.code !== "EEXIST") {
31098
31755
  throw err;
31099
31756
  }
31100
31757
  try {
31101
- const pidStr = await readFile9(lockPath, "utf-8");
31758
+ const pidStr = await readFile10(lockPath, "utf-8");
31102
31759
  const pid = Number.parseInt(pidStr.trim(), 10);
31103
31760
  if (!Number.isNaN(pid)) {
31104
31761
  try {
@@ -31123,9 +31780,9 @@ var WorkspacePoolManager = class {
31123
31780
  * Returns false (no drift) if metadata.json doesn't exist (first use).
31124
31781
  */
31125
31782
  async checkDrift(poolDir, fingerprint) {
31126
- const metadataPath = path34.join(poolDir, "metadata.json");
31783
+ const metadataPath = path35.join(poolDir, "metadata.json");
31127
31784
  try {
31128
- const raw = await readFile9(metadataPath, "utf-8");
31785
+ const raw = await readFile10(metadataPath, "utf-8");
31129
31786
  const metadata = JSON.parse(raw);
31130
31787
  return metadata.fingerprint !== fingerprint;
31131
31788
  } catch {
@@ -31140,17 +31797,17 @@ var WorkspacePoolManager = class {
31140
31797
  repos,
31141
31798
  createdAt: (/* @__PURE__ */ new Date()).toISOString()
31142
31799
  };
31143
- await writeFile8(path34.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
31800
+ await writeFile9(path35.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
31144
31801
  }
31145
31802
  /** Remove all slot directories and their lock files from a pool directory. */
31146
31803
  async removeAllSlots(poolDir) {
31147
31804
  const entries = await readdir6(poolDir);
31148
31805
  for (const entry of entries) {
31149
31806
  if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
31150
- const lockPath = path34.join(poolDir, `${entry}.lock`);
31807
+ const lockPath = path35.join(poolDir, `${entry}.lock`);
31151
31808
  if (existsSync3(lockPath)) {
31152
31809
  try {
31153
- const pidStr = await readFile9(lockPath, "utf-8");
31810
+ const pidStr = await readFile10(lockPath, "utf-8");
31154
31811
  const pid = Number.parseInt(pidStr.trim(), 10);
31155
31812
  if (!Number.isNaN(pid)) {
31156
31813
  try {
@@ -31163,12 +31820,12 @@ var WorkspacePoolManager = class {
31163
31820
  } catch {
31164
31821
  }
31165
31822
  }
31166
- await rm5(path34.join(poolDir, entry), { recursive: true, force: true });
31823
+ await rm5(path35.join(poolDir, entry), { recursive: true, force: true });
31167
31824
  await rm5(lockPath, { force: true }).catch(() => {
31168
31825
  });
31169
31826
  }
31170
31827
  }
31171
- await rm5(path34.join(poolDir, "metadata.json"), { force: true }).catch(() => {
31828
+ await rm5(path35.join(poolDir, "metadata.json"), { force: true }).catch(() => {
31172
31829
  });
31173
31830
  }
31174
31831
  /**
@@ -31179,7 +31836,7 @@ var WorkspacePoolManager = class {
31179
31836
  async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
31180
31837
  for (const repo of repos) {
31181
31838
  if (!repo.path || !repo.source) continue;
31182
- const repoDir = path34.join(slotPath, repo.path);
31839
+ const repoDir = path35.join(slotPath, repo.path);
31183
31840
  if (!existsSync3(repoDir)) {
31184
31841
  continue;
31185
31842
  }
@@ -31319,7 +31976,7 @@ ${lines.join("\n")}`;
31319
31976
  }
31320
31977
  return;
31321
31978
  }
31322
- const targetDir = path35.join(workspacePath, repo.path);
31979
+ const targetDir = path36.join(workspacePath, repo.path);
31323
31980
  const sourceUrl = getSourceUrl(repo.source);
31324
31981
  const startedAt = Date.now();
31325
31982
  if (this.verbose) {
@@ -31416,7 +32073,7 @@ ${lines.join("\n")}`;
31416
32073
  const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
31417
32074
  for (const repo of repos) {
31418
32075
  if (!repo.path || !repo.source) continue;
31419
- const targetDir = path35.join(workspacePath, repo.path);
32076
+ const targetDir = path36.join(workspacePath, repo.path);
31420
32077
  await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
31421
32078
  await this.runGit(["clean", cleanFlag], { cwd: targetDir });
31422
32079
  }
@@ -31426,11 +32083,11 @@ async function resolveWorkspaceTemplate(templatePath) {
31426
32083
  if (!templatePath) {
31427
32084
  return void 0;
31428
32085
  }
31429
- const resolved = path36.resolve(templatePath);
32086
+ const resolved = path37.resolve(templatePath);
31430
32087
  const stats = await stat7(resolved);
31431
32088
  if (stats.isFile()) {
31432
32089
  return {
31433
- dir: path36.dirname(resolved),
32090
+ dir: path37.dirname(resolved),
31434
32091
  workspaceFile: resolved
31435
32092
  };
31436
32093
  }
@@ -31442,14 +32099,14 @@ async function resolveWorkspaceTemplate(templatePath) {
31442
32099
  if (workspaceFiles.length === 1) {
31443
32100
  return {
31444
32101
  dir: resolved,
31445
- workspaceFile: path36.join(resolved, workspaceFiles[0])
32102
+ workspaceFile: path37.join(resolved, workspaceFiles[0])
31446
32103
  };
31447
32104
  }
31448
32105
  if (workspaceFiles.length > 1) {
31449
32106
  const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
31450
32107
  return {
31451
32108
  dir: resolved,
31452
- workspaceFile: conventionFile ? path36.join(resolved, conventionFile) : void 0
32109
+ workspaceFile: conventionFile ? path37.join(resolved, conventionFile) : void 0
31453
32110
  };
31454
32111
  }
31455
32112
  return { dir: resolved };
@@ -31568,14 +32225,14 @@ function isAgentSkillsFormat(parsed) {
31568
32225
  return Array.isArray(obj.evals);
31569
32226
  }
31570
32227
  async function loadTestsFromAgentSkills(filePath) {
31571
- const raw = await readFile10(filePath, "utf8");
32228
+ const raw = await readFile11(filePath, "utf8");
31572
32229
  let parsed;
31573
32230
  try {
31574
32231
  parsed = JSON.parse(raw);
31575
32232
  } catch {
31576
32233
  throw new Error(`Invalid Agent Skills evals.json: failed to parse JSON in '${filePath}'`);
31577
32234
  }
31578
- return parseAgentSkillsEvals(parsed, filePath, path37.dirname(path37.resolve(filePath)));
32235
+ return parseAgentSkillsEvals(parsed, filePath, path38.dirname(path38.resolve(filePath)));
31579
32236
  }
31580
32237
  function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
31581
32238
  if (!isAgentSkillsFormat(parsed)) {
@@ -31613,7 +32270,7 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
31613
32270
  if (baseDir) {
31614
32271
  metadata.agent_skills_base_dir = baseDir;
31615
32272
  for (const file2 of evalCase.files) {
31616
- filePaths.push(path37.resolve(baseDir, file2));
32273
+ filePaths.push(path38.resolve(baseDir, file2));
31617
32274
  }
31618
32275
  }
31619
32276
  }
@@ -31649,15 +32306,15 @@ function resolveToAbsolutePath(candidate) {
31649
32306
  if (candidate.startsWith("file:")) {
31650
32307
  return fileURLToPath4(candidate);
31651
32308
  }
31652
- return path38.resolve(candidate);
32309
+ return path39.resolve(candidate);
31653
32310
  }
31654
32311
  throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
31655
32312
  }
31656
32313
  function buildDirectoryChain2(filePath, repoRoot) {
31657
32314
  const directories = [];
31658
32315
  const seen = /* @__PURE__ */ new Set();
31659
- const boundary = path38.resolve(repoRoot);
31660
- let current = path38.resolve(path38.dirname(filePath));
32316
+ const boundary = path39.resolve(repoRoot);
32317
+ let current = path39.resolve(path39.dirname(filePath));
31661
32318
  while (current !== void 0) {
31662
32319
  if (!seen.has(current)) {
31663
32320
  directories.push(current);
@@ -31666,7 +32323,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
31666
32323
  if (current === boundary) {
31667
32324
  break;
31668
32325
  }
31669
- const parent = path38.dirname(current);
32326
+ const parent = path39.dirname(current);
31670
32327
  if (parent === current) {
31671
32328
  break;
31672
32329
  }
@@ -31680,16 +32337,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
31680
32337
  function buildSearchRoots2(evalPath, repoRoot) {
31681
32338
  const uniqueRoots = [];
31682
32339
  const addRoot = (root) => {
31683
- const normalized = path38.resolve(root);
32340
+ const normalized = path39.resolve(root);
31684
32341
  if (!uniqueRoots.includes(normalized)) {
31685
32342
  uniqueRoots.push(normalized);
31686
32343
  }
31687
32344
  };
31688
- let currentDir = path38.dirname(evalPath);
32345
+ let currentDir = path39.dirname(evalPath);
31689
32346
  let reachedBoundary = false;
31690
32347
  while (!reachedBoundary) {
31691
32348
  addRoot(currentDir);
31692
- const parentDir = path38.dirname(currentDir);
32349
+ const parentDir = path39.dirname(currentDir);
31693
32350
  if (currentDir === repoRoot || parentDir === currentDir) {
31694
32351
  reachedBoundary = true;
31695
32352
  } else {
@@ -31707,16 +32364,16 @@ function trimLeadingSeparators2(value) {
31707
32364
  async function resolveFileReference3(rawValue, searchRoots) {
31708
32365
  const displayPath = trimLeadingSeparators2(rawValue);
31709
32366
  const potentialPaths = [];
31710
- if (path38.isAbsolute(rawValue)) {
31711
- potentialPaths.push(path38.normalize(rawValue));
32367
+ if (path39.isAbsolute(rawValue)) {
32368
+ potentialPaths.push(path39.normalize(rawValue));
31712
32369
  }
31713
32370
  for (const base of searchRoots) {
31714
- potentialPaths.push(path38.resolve(base, displayPath));
32371
+ potentialPaths.push(path39.resolve(base, displayPath));
31715
32372
  }
31716
32373
  const attempted = [];
31717
32374
  const seen = /* @__PURE__ */ new Set();
31718
32375
  for (const candidate of potentialPaths) {
31719
- const absoluteCandidate = path38.resolve(candidate);
32376
+ const absoluteCandidate = path39.resolve(candidate);
31720
32377
  if (seen.has(absoluteCandidate)) {
31721
32378
  continue;
31722
32379
  }
@@ -31737,9 +32394,9 @@ var DEFAULT_EVAL_PATTERNS = [
31737
32394
  ];
31738
32395
  async function loadConfig(evalFilePath, repoRoot) {
31739
32396
  const directories = buildDirectoryChain2(evalFilePath, repoRoot);
31740
- const globalConfigPath = path39.join(getAgentvConfigDir(), "config.yaml");
32397
+ const globalConfigPath = path40.join(getAgentvConfigDir(), "config.yaml");
31741
32398
  for (const directory of directories) {
31742
- const configPath = path39.join(directory, ".agentv", "config.yaml");
32399
+ const configPath = path40.join(directory, ".agentv", "config.yaml");
31743
32400
  if (!await fileExists3(configPath)) {
31744
32401
  continue;
31745
32402
  }
@@ -31752,7 +32409,7 @@ async function loadConfig(evalFilePath, repoRoot) {
31752
32409
  }
31753
32410
  async function readConfigFile(configPath) {
31754
32411
  try {
31755
- const rawConfig = await readFile11(configPath, "utf8");
32412
+ const rawConfig = await readFile12(configPath, "utf8");
31756
32413
  const parsed = interpolateEnv(parseYamlValue(rawConfig), process.env);
31757
32414
  if (!isJsonObject(parsed)) {
31758
32415
  logWarning(`Invalid config.yaml format at ${configPath}`);
@@ -32102,6 +32759,14 @@ function parseResultsConfig(raw, configPath) {
32102
32759
  logWarning(`Invalid results.repo in ${configPath}, expected non-empty string`);
32103
32760
  return void 0;
32104
32761
  }
32762
+ let branch;
32763
+ if (obj.branch !== void 0) {
32764
+ if (typeof obj.branch !== "string" || obj.branch.trim().length === 0) {
32765
+ logWarning(`Invalid results.branch in ${configPath}, expected non-empty string`);
32766
+ return void 0;
32767
+ }
32768
+ branch = obj.branch.trim();
32769
+ }
32105
32770
  let resultsPath;
32106
32771
  if (obj.path !== void 0) {
32107
32772
  if (typeof obj.path !== "string" || obj.path.trim().length === 0) {
@@ -32132,6 +32797,7 @@ function parseResultsConfig(raw, configPath) {
32132
32797
  return {
32133
32798
  mode: "github",
32134
32799
  repo,
32800
+ ...branch !== void 0 && { branch },
32135
32801
  ...resultsPath !== void 0 && { path: resultsPath },
32136
32802
  ...typeof obj.auto_push === "boolean" && { auto_push: obj.auto_push },
32137
32803
  ...branchPrefix && { branch_prefix: branchPrefix }
@@ -32168,7 +32834,7 @@ function logWarning(message) {
32168
32834
  var ANSI_YELLOW3 = "\x1B[33m";
32169
32835
  var ANSI_RESET4 = "\x1B[0m";
32170
32836
  async function validateCustomPromptContent(promptPath) {
32171
- const content = await readFile12(promptPath, "utf8");
32837
+ const content = await readFile13(promptPath, "utf8");
32172
32838
  validateTemplateVariables(content, promptPath);
32173
32839
  }
32174
32840
  function validateTemplateVariables(content, source) {
@@ -32257,8 +32923,8 @@ function isTemplateReference(value) {
32257
32923
  }
32258
32924
  async function resolveAssertionTemplateReference(include, searchRoots) {
32259
32925
  const templateCandidates = isTemplateReference(include) ? [
32260
- path40.join(".agentv", "templates", `${include}.yaml`),
32261
- path40.join(".agentv", "templates", `${include}.yml`)
32926
+ path41.join(".agentv", "templates", `${include}.yaml`),
32927
+ path41.join(".agentv", "templates", `${include}.yml`)
32262
32928
  ] : [include];
32263
32929
  const attempted = [];
32264
32930
  for (const candidate of templateCandidates) {
@@ -32298,7 +32964,7 @@ ${resolved.attempted.map((attempt) => ` Tried: ${attempt}`).join("\n")}` : "";
32298
32964
  const cycle = [...includeContext.chain, resolved.resolvedPath].join(" -> ");
32299
32965
  throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
32300
32966
  }
32301
- const content = await readFile13(resolved.resolvedPath, "utf8");
32967
+ const content = await readFile14(resolved.resolvedPath, "utf8");
32302
32968
  const parsed = interpolateEnv(parseYamlValue(content), process.env);
32303
32969
  if (!isJsonObject2(parsed)) {
32304
32970
  throw new Error(
@@ -32311,10 +32977,10 @@ ${resolved.attempted.map((attempt) => ` Tried: ${attempt}`).join("\n")}` : "";
32311
32977
  `Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} is missing a top-level assertions array`
32312
32978
  );
32313
32979
  }
32314
- const templateDir = path40.dirname(resolved.resolvedPath);
32980
+ const templateDir = path41.dirname(resolved.resolvedPath);
32315
32981
  const nestedSearchRoots = [
32316
32982
  templateDir,
32317
- ...searchRoots.filter((root) => path40.resolve(root) !== templateDir)
32983
+ ...searchRoots.filter((root) => path41.resolve(root) !== templateDir)
32318
32984
  ];
32319
32985
  return await expandGraderEntries(assertions, nestedSearchRoots, evalId, {
32320
32986
  depth: nextDepth,
@@ -32375,20 +33041,20 @@ async function collectAssertionTemplateReferencesFromValue(value, searchRoots, e
32375
33041
  references.push({
32376
33042
  kind: "assertion_template",
32377
33043
  displayPath: resolved.displayPath,
32378
- ...resolved.resolvedPath ? { resolvedPath: path40.resolve(resolved.resolvedPath) } : {}
33044
+ ...resolved.resolvedPath ? { resolvedPath: path41.resolve(resolved.resolvedPath) } : {}
32379
33045
  });
32380
33046
  if (resolved.resolvedPath) {
32381
33047
  if (includeContext.chain.includes(resolved.resolvedPath)) {
32382
33048
  const cycle = [...includeContext.chain, resolved.resolvedPath].join(" -> ");
32383
33049
  throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
32384
33050
  }
32385
- const content = await readFile13(resolved.resolvedPath, "utf8");
33051
+ const content = await readFile14(resolved.resolvedPath, "utf8");
32386
33052
  const parsed = interpolateEnv(parseYamlValue(content), process.env);
32387
33053
  if (isJsonObject2(parsed) && Array.isArray(parsed.assertions)) {
32388
- const templateDir = path40.dirname(resolved.resolvedPath);
33054
+ const templateDir = path41.dirname(resolved.resolvedPath);
32389
33055
  const nestedSearchRoots = [
32390
33056
  templateDir,
32391
- ...searchRoots.filter((root) => path40.resolve(root) !== templateDir)
33057
+ ...searchRoots.filter((root) => path41.resolve(root) !== templateDir)
32392
33058
  ];
32393
33059
  references.push(
32394
33060
  ...await collectAssertionTemplateReferencesFromValue(
@@ -32574,7 +33240,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
32574
33240
  if (cwd) {
32575
33241
  const resolved = await resolveFileReference3(cwd, searchRoots);
32576
33242
  if (resolved.resolvedPath) {
32577
- resolvedCwd = path40.resolve(resolved.resolvedPath);
33243
+ resolvedCwd = path41.resolve(resolved.resolvedPath);
32578
33244
  } else {
32579
33245
  logWarning2(
32580
33246
  `Code-grader evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
@@ -32760,7 +33426,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
32760
33426
  aggregatorPrompt = fileRef;
32761
33427
  const resolved = await resolveFileReference3(fileRef, searchRoots);
32762
33428
  if (resolved.resolvedPath) {
32763
- promptPath2 = path40.resolve(resolved.resolvedPath);
33429
+ promptPath2 = path41.resolve(resolved.resolvedPath);
32764
33430
  } else {
32765
33431
  throw new Error(
32766
33432
  `Composite aggregator in '${evalId}': prompt file not found: ${resolved.displayPath}`
@@ -33440,7 +34106,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
33440
34106
  const commandPath = commandArray[commandArray.length - 1];
33441
34107
  const resolved = await resolveFileReference3(commandPath, searchRoots);
33442
34108
  if (resolved.resolvedPath) {
33443
- resolvedPromptScript = [...commandArray.slice(0, -1), path40.resolve(resolved.resolvedPath)];
34109
+ resolvedPromptScript = [...commandArray.slice(0, -1), path41.resolve(resolved.resolvedPath)];
33444
34110
  } else {
33445
34111
  throw new Error(
33446
34112
  `Grader '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
@@ -33455,7 +34121,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
33455
34121
  prompt = fileRef;
33456
34122
  const resolved = await resolveFileReference3(fileRef, searchRoots);
33457
34123
  if (resolved.resolvedPath) {
33458
- promptPath = path40.resolve(resolved.resolvedPath);
34124
+ promptPath = path41.resolve(resolved.resolvedPath);
33459
34125
  try {
33460
34126
  await validateCustomPromptContent(promptPath);
33461
34127
  } catch (error40) {
@@ -33613,7 +34279,7 @@ async function parsePreprocessors(rawValue, searchRoots, evaluatorName, evalId)
33613
34279
  preprocessors.push({
33614
34280
  type,
33615
34281
  command,
33616
- resolvedCommand: [...command.slice(0, -1), path40.resolve(resolved.resolvedPath)]
34282
+ resolvedCommand: [...command.slice(0, -1), path41.resolve(resolved.resolvedPath)]
33617
34283
  });
33618
34284
  }
33619
34285
  return preprocessors;
@@ -33708,10 +34374,10 @@ async function resolveOptionalCommandSource(command, searchRoots) {
33708
34374
  return void 0;
33709
34375
  }
33710
34376
  const resolved = await resolveFileReference3(candidate, searchRoots);
33711
- return resolved.resolvedPath ? path40.resolve(resolved.resolvedPath) : void 0;
34377
+ return resolved.resolvedPath ? path41.resolve(resolved.resolvedPath) : void 0;
33712
34378
  }
33713
34379
  function looksLikeFilePath(value) {
33714
- return path40.isAbsolute(value) || value.startsWith(".") || value.includes("/") || value.includes("\\") || /\.[cm]?[jt]sx?$|\.py$|\.sh$|\.bash$|\.rb$|\.go$|\.rs$/i.test(value);
34380
+ return path41.isAbsolute(value) || value.startsWith(".") || value.includes("/") || value.includes("\\") || /\.[cm]?[jt]sx?$|\.py$|\.sh$|\.bash$|\.rb$|\.go$|\.rs$/i.test(value);
33715
34381
  }
33716
34382
  function parseCommandToArgv(command) {
33717
34383
  if (process.platform === "win32") {
@@ -34099,7 +34765,7 @@ var IMAGE_MEDIA_TYPES = {
34099
34765
  ".bmp": "image/bmp"
34100
34766
  };
34101
34767
  function detectImageMediaType(filePath) {
34102
- const ext = path41.extname(filePath).toLowerCase();
34768
+ const ext = path422.extname(filePath).toLowerCase();
34103
34769
  return IMAGE_MEDIA_TYPES[ext];
34104
34770
  }
34105
34771
  var ANSI_YELLOW5 = "\x1B[33m";
@@ -34158,12 +34824,12 @@ async function processMessages(options) {
34158
34824
  continue;
34159
34825
  }
34160
34826
  try {
34161
- const fileContent = (await readFile14(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
34827
+ const fileContent = (await readFile15(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
34162
34828
  processedContent.push({
34163
34829
  ...cloneJsonObject(rawSegment),
34164
34830
  path: displayPath,
34165
34831
  text: fileContent,
34166
- resolvedPath: path41.resolve(resolvedPath)
34832
+ resolvedPath: path422.resolve(resolvedPath)
34167
34833
  });
34168
34834
  if (verbose) {
34169
34835
  const label = messageType === "input" ? "[File]" : "[Expected Output File]";
@@ -34199,7 +34865,7 @@ async function processMessages(options) {
34199
34865
  continue;
34200
34866
  }
34201
34867
  try {
34202
- const imageBuffer = await readFile14(resolvedPath);
34868
+ const imageBuffer = await readFile15(resolvedPath);
34203
34869
  const base643 = imageBuffer.toString("base64");
34204
34870
  processedContent.push({
34205
34871
  type: "image",
@@ -34282,12 +34948,12 @@ async function processExpectedMessages(options) {
34282
34948
  continue;
34283
34949
  }
34284
34950
  try {
34285
- const fileContent = (await readFile14(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
34951
+ const fileContent = (await readFile15(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
34286
34952
  processedContent.push({
34287
34953
  type: "file",
34288
34954
  path: displayPath,
34289
34955
  text: fileContent,
34290
- resolvedPath: path41.resolve(resolvedPath)
34956
+ resolvedPath: path422.resolve(resolvedPath)
34291
34957
  });
34292
34958
  if (verbose) {
34293
34959
  console.log(` [Expected Output File] Found: ${displayPath}`);
@@ -34322,7 +34988,7 @@ async function processExpectedMessages(options) {
34322
34988
  continue;
34323
34989
  }
34324
34990
  try {
34325
- const imageBuffer = await readFile14(resolvedPath);
34991
+ const imageBuffer = await readFile15(resolvedPath);
34326
34992
  const base643 = imageBuffer.toString("base64");
34327
34993
  processedContent.push({
34328
34994
  type: "image",
@@ -34433,7 +35099,7 @@ function matchesFilter(id, filter) {
34433
35099
  return typeof filter === "string" ? micromatch.isMatch(id, filter) : filter.some((pattern) => micromatch.isMatch(id, pattern));
34434
35100
  }
34435
35101
  function detectFormat(filePath) {
34436
- const ext = path422.extname(filePath).toLowerCase();
35102
+ const ext = path43.extname(filePath).toLowerCase();
34437
35103
  if (ext === ".jsonl") return "jsonl";
34438
35104
  if (ext === ".yaml" || ext === ".yml") return "yaml";
34439
35105
  if (ext === ".json") return "agent-skills-json";
@@ -34443,9 +35109,9 @@ function detectFormat(filePath) {
34443
35109
  );
34444
35110
  }
34445
35111
  async function loadSidecarMetadata(jsonlPath, verbose) {
34446
- const dir = path422.dirname(jsonlPath);
34447
- const base = path422.basename(jsonlPath, ".jsonl");
34448
- const sidecarPath = path422.join(dir, `${base}.yaml`);
35112
+ const dir = path43.dirname(jsonlPath);
35113
+ const base = path43.basename(jsonlPath, ".jsonl");
35114
+ const sidecarPath = path43.join(dir, `${base}.yaml`);
34449
35115
  if (!await fileExists3(sidecarPath)) {
34450
35116
  if (verbose) {
34451
35117
  logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
@@ -34453,7 +35119,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
34453
35119
  return {};
34454
35120
  }
34455
35121
  try {
34456
- const content = await readFile15(sidecarPath, "utf8");
35122
+ const content = await readFile16(sidecarPath, "utf8");
34457
35123
  const parsed = interpolateEnv(parseYamlValue(content), process.env);
34458
35124
  if (!isJsonObject(parsed)) {
34459
35125
  logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
@@ -34494,13 +35160,13 @@ function parseJsonlContent(content, filePath) {
34494
35160
  async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
34495
35161
  const verbose = options?.verbose ?? false;
34496
35162
  const filterPattern = options?.filter;
34497
- const absoluteTestPath = path422.resolve(evalFilePath);
35163
+ const absoluteTestPath = path43.resolve(evalFilePath);
34498
35164
  const repoRootPath = resolveToAbsolutePath(repoRoot);
34499
35165
  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
34500
35166
  const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
34501
- const rawFile = await readFile15(absoluteTestPath, "utf8");
35167
+ const rawFile = await readFile16(absoluteTestPath, "utf8");
34502
35168
  const rawCases = parseJsonlContent(rawFile, evalFilePath);
34503
- const fallbackSuiteName = path422.basename(absoluteTestPath, ".jsonl") || "eval";
35169
+ const fallbackSuiteName = path43.basename(absoluteTestPath, ".jsonl") || "eval";
34504
35170
  const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
34505
35171
  const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
34506
35172
  const globalExecution = sidecar.execution;
@@ -34906,8 +35572,8 @@ function interpolateRawEvalCase(raw, vars) {
34906
35572
  }
34907
35573
  async function readTestSuiteMetadata(testFilePath) {
34908
35574
  try {
34909
- const absolutePath = path43.resolve(testFilePath);
34910
- const content = await readFile16(absolutePath, "utf8");
35575
+ const absolutePath = path44.resolve(testFilePath);
35576
+ const content = await readFile17(absolutePath, "utf8");
34911
35577
  const parsed = interpolateEnv(parseYamlValue(content), process.env);
34912
35578
  if (!isJsonObject(parsed)) {
34913
35579
  return {};
@@ -34931,7 +35597,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
34931
35597
  return { tests: await loadTestsFromAgentSkills(evalFilePath) };
34932
35598
  }
34933
35599
  if (format === "typescript") {
34934
- const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-EQJX3OLT-THE7D3GR.js");
35600
+ const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-4DU65XGW-YM47FFG2.js");
34935
35601
  return loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
34936
35602
  }
34937
35603
  const { tests, parsed, suiteWorkspacePath } = await loadTestsFromYaml(
@@ -34966,7 +35632,7 @@ async function loadTests(evalFilePath, repoRoot, options) {
34966
35632
  return loadTestsFromAgentSkills(evalFilePath);
34967
35633
  }
34968
35634
  if (format === "typescript") {
34969
- const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-EQJX3OLT-THE7D3GR.js");
35635
+ const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-4DU65XGW-YM47FFG2.js");
34970
35636
  const suite = await loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
34971
35637
  return suite.tests;
34972
35638
  }
@@ -34977,11 +35643,11 @@ var loadEvalCases = loadTests;
34977
35643
  async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
34978
35644
  const verbose = options?.verbose ?? false;
34979
35645
  const filterPattern = options?.filter;
34980
- const absoluteTestPath = path43.resolve(evalFilePath);
35646
+ const absoluteTestPath = path44.resolve(evalFilePath);
34981
35647
  const repoRootPath = resolveToAbsolutePath(repoRoot);
34982
35648
  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
34983
35649
  const config2 = await loadConfig(absoluteTestPath, repoRootPath);
34984
- const rawFile = await readFile16(absoluteTestPath, "utf8");
35650
+ const rawFile = await readFile17(absoluteTestPath, "utf8");
34985
35651
  const rawParsed = parseYamlValue(rawFile);
34986
35652
  const rawCaseSnapshots = buildRawInlineTestSnapshots(rawParsed);
34987
35653
  const interpolated = interpolateEnv(rawParsed, process.env);
@@ -34990,7 +35656,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
34990
35656
  }
34991
35657
  const suite = interpolated;
34992
35658
  const suiteNameFromFile = asString5(suite.name)?.trim();
34993
- const fallbackSuiteName = path43.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
35659
+ const fallbackSuiteName = path44.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
34994
35660
  const suiteName = suiteNameFromFile && suiteNameFromFile.length > 0 ? suiteNameFromFile : fallbackSuiteName;
34995
35661
  const rawTestCases = resolveTests(suite);
34996
35662
  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
@@ -35000,10 +35666,10 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
35000
35666
  "<suite>",
35001
35667
  absoluteTestPath
35002
35668
  );
35003
- const evalFileDir = path43.dirname(absoluteTestPath);
35669
+ const evalFileDir = path44.dirname(absoluteTestPath);
35004
35670
  let expandedTestCases;
35005
35671
  if (typeof rawTestCases === "string") {
35006
- const externalPath = path43.resolve(evalFileDir, rawTestCases);
35672
+ const externalPath = path44.resolve(evalFileDir, rawTestCases);
35007
35673
  let isDir = false;
35008
35674
  try {
35009
35675
  const pathStat = await stat8(externalPath);
@@ -35307,7 +35973,7 @@ function collectInputSourceReferences(inputMessages) {
35307
35973
  references.push({
35308
35974
  kind: "input_file",
35309
35975
  displayPath,
35310
- ...typeof segment.resolvedPath === "string" ? { resolvedPath: path43.resolve(segment.resolvedPath) } : {}
35976
+ ...typeof segment.resolvedPath === "string" ? { resolvedPath: path44.resolve(segment.resolvedPath) } : {}
35311
35977
  });
35312
35978
  }
35313
35979
  }
@@ -35380,7 +36046,7 @@ function collectSingleGraderSourceReferences(evaluator) {
35380
36046
  references.push({
35381
36047
  kind: "code_grader_command",
35382
36048
  displayPath: evaluator.aggregator.path,
35383
- resolvedPath: path43.resolve(evaluator.aggregator.cwd ?? "", evaluator.aggregator.path),
36049
+ resolvedPath: path44.resolve(evaluator.aggregator.cwd ?? "", evaluator.aggregator.path),
35384
36050
  graderName: evaluator.name
35385
36051
  });
35386
36052
  } else if (evaluator.aggregator.type === "llm-grader" && evaluator.aggregator.promptPath) {
@@ -35413,9 +36079,9 @@ function dedupeSourceReferences(references) {
35413
36079
  return deduped;
35414
36080
  }
35415
36081
  function toPortableRelativePath(root, candidate) {
35416
- const relative = path43.relative(root, candidate);
35417
- if (relative && !relative.startsWith("..") && !path43.isAbsolute(relative)) {
35418
- return relative.split(path43.sep).join("/");
36082
+ const relative = path44.relative(root, candidate);
36083
+ if (relative && !relative.startsWith("..") && !path44.isAbsolute(relative)) {
36084
+ return relative.split(path44.sep).join("/");
35419
36085
  }
35420
36086
  return void 0;
35421
36087
  }
@@ -35469,8 +36135,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
35469
36135
  if (!command) return void 0;
35470
36136
  const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
35471
36137
  let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
35472
- if (cwd && !path43.isAbsolute(cwd)) {
35473
- cwd = path43.resolve(evalFileDir, cwd);
36138
+ if (cwd && !path44.isAbsolute(cwd)) {
36139
+ cwd = path44.resolve(evalFileDir, cwd);
35474
36140
  }
35475
36141
  const config2 = { command };
35476
36142
  if (timeoutMs !== void 0) {
@@ -35508,10 +36174,10 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
35508
36174
  }
35509
36175
  async function resolveWorkspaceConfig(raw, evalFileDir) {
35510
36176
  if (typeof raw === "string") {
35511
- const workspaceFilePath = path43.resolve(evalFileDir, raw);
36177
+ const workspaceFilePath = path44.resolve(evalFileDir, raw);
35512
36178
  let content;
35513
36179
  try {
35514
- content = await readFile16(workspaceFilePath, "utf8");
36180
+ content = await readFile17(workspaceFilePath, "utf8");
35515
36181
  } catch {
35516
36182
  throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
35517
36183
  }
@@ -35521,7 +36187,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
35521
36187
  `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
35522
36188
  );
35523
36189
  }
35524
- const workspaceFileDir = path43.dirname(workspaceFilePath);
36190
+ const workspaceFileDir = path44.dirname(workspaceFilePath);
35525
36191
  const resolvedWorkspace = parseWorkspaceConfig(parsed, workspaceFileDir);
35526
36192
  if (resolvedWorkspace) {
35527
36193
  return { ...resolvedWorkspace, workspaceFileDir };
@@ -35555,8 +36221,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
35555
36221
  throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
35556
36222
  }
35557
36223
  let template = typeof obj.template === "string" ? obj.template : void 0;
35558
- if (template && !path43.isAbsolute(template)) {
35559
- template = path43.resolve(evalFileDir, template);
36224
+ if (template && !path44.isAbsolute(template)) {
36225
+ template = path44.resolve(evalFileDir, template);
35560
36226
  }
35561
36227
  const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
35562
36228
  const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
@@ -35695,6 +36361,9 @@ ${detailBlock}${ANSI_RESET8}`);
35695
36361
  }
35696
36362
  var execFileAsync3 = promisify7(execFile3);
35697
36363
  var WORKSPACE_GIT_TIMEOUT_MS = 3e5;
36364
+ function pathFromRoot(root) {
36365
+ return root instanceof URL ? fileURLToPath5(root) : String(root);
36366
+ }
35698
36367
  function classifyQualityStatus(score, threshold = DEFAULT_THRESHOLD) {
35699
36368
  return score >= threshold ? "ok" : "quality_failure";
35700
36369
  }
@@ -35747,7 +36416,7 @@ function workspaceGitEnv() {
35747
36416
  };
35748
36417
  }
35749
36418
  async function resetWorkspaceRoot(workspacePath, resetMode, baselineRef) {
35750
- if (!existsSync5(path44.join(workspacePath, ".git"))) {
36419
+ if (!existsSync5(path45.join(workspacePath, ".git"))) {
35751
36420
  return false;
35752
36421
  }
35753
36422
  const cleanFlag = resetMode === "strict" ? "-fdx" : "-fd";
@@ -35790,18 +36459,18 @@ function validateDependencyGraph(tests) {
35790
36459
  }
35791
36460
  const visited = /* @__PURE__ */ new Set();
35792
36461
  const visiting = /* @__PURE__ */ new Set();
35793
- function visit(id, path47) {
36462
+ function visit(id, path48) {
35794
36463
  if (visiting.has(id)) {
35795
- const cycle = [...path47.slice(path47.indexOf(id)), id];
36464
+ const cycle = [...path48.slice(path48.indexOf(id)), id];
35796
36465
  throw new Error(`Circular dependency detected: ${cycle.join(" \u2192 ")}`);
35797
36466
  }
35798
36467
  if (visited.has(id)) return;
35799
36468
  visiting.add(id);
35800
- path47.push(id);
36469
+ path48.push(id);
35801
36470
  for (const dep of depMap.get(id) ?? []) {
35802
- visit(dep, path47);
36471
+ visit(dep, path48);
35803
36472
  }
35804
- path47.pop();
36473
+ path48.pop();
35805
36474
  visiting.delete(id);
35806
36475
  visited.add(id);
35807
36476
  }
@@ -35890,8 +36559,10 @@ async function runEvaluation(options) {
35890
36559
  retainOnFailure,
35891
36560
  graderTarget: cliGraderTarget,
35892
36561
  model: cliModel,
35893
- threshold: scoreThreshold
36562
+ threshold: scoreThreshold,
36563
+ replayRecording
35894
36564
  } = options;
36565
+ const repoRootPath = pathFromRoot(repoRoot);
35895
36566
  let useCache = options.useCache;
35896
36567
  if (trials && trials.count > 1 && useCache) {
35897
36568
  console.warn(
@@ -35981,7 +36652,7 @@ async function runEvaluation(options) {
35981
36652
  ];
35982
36653
  const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
35983
36654
  const typeRegistry = createBuiltinRegistry();
35984
- const discoveryBaseDir = evalFilePath ? path44.dirname(path44.resolve(evalFilePath)) : process.cwd();
36655
+ const discoveryBaseDir = evalFilePath ? path45.dirname(path45.resolve(evalFilePath)) : process.cwd();
35985
36656
  const evalDir = discoveryBaseDir;
35986
36657
  await discoverAssertions(typeRegistry, discoveryBaseDir);
35987
36658
  await discoverGraders(typeRegistry, discoveryBaseDir);
@@ -36023,7 +36694,10 @@ async function runEvaluation(options) {
36023
36694
  agentTimeoutMs,
36024
36695
  targetResolver,
36025
36696
  availableTargets,
36026
- threshold: scoreThreshold
36697
+ threshold: scoreThreshold,
36698
+ replayRecording,
36699
+ evalFilePath,
36700
+ repoRoot: repoRootPath
36027
36701
  });
36028
36702
  } catch (error40) {
36029
36703
  if (verbose) {
@@ -36135,7 +36809,7 @@ async function runEvaluation(options) {
36135
36809
  const isEmpty = dirExists ? (await readdir8(configuredStaticPath)).length === 0 : false;
36136
36810
  if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
36137
36811
  if (!dirExists) {
36138
- await mkdir15(configuredStaticPath, { recursive: true });
36812
+ await mkdir16(configuredStaticPath, { recursive: true });
36139
36813
  }
36140
36814
  if (workspaceTemplate) {
36141
36815
  await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
@@ -36180,15 +36854,14 @@ async function runEvaluation(options) {
36180
36854
  }
36181
36855
  } else if (!isPerTestIsolation && (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length)) {
36182
36856
  sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
36183
- await mkdir15(sharedWorkspacePath, { recursive: true });
36857
+ await mkdir16(sharedWorkspacePath, { recursive: true });
36184
36858
  setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
36185
36859
  }
36186
36860
  try {
36187
36861
  let toDependencyResult2 = function(r) {
36188
- const outputText = extractLastAssistantContent(r.output);
36189
36862
  return {
36190
36863
  score: r.score,
36191
- output: outputText,
36864
+ output: r.output,
36192
36865
  workspace_path: r.workspacePath,
36193
36866
  details: r.scores ? Object.fromEntries(
36194
36867
  r.scores.map((s) => [s.name, { score: s.score, verdict: s.verdict }])
@@ -36222,7 +36895,7 @@ async function runEvaluation(options) {
36222
36895
  };
36223
36896
  var toDependencyResult = toDependencyResult2, checkDependencies = checkDependencies2, extractEvaluationCostUsd = extractEvaluationCostUsd2;
36224
36897
  if (suiteWorkspaceFile && sharedWorkspacePath) {
36225
- const copiedWorkspaceFile = path44.join(sharedWorkspacePath, path44.basename(suiteWorkspaceFile));
36898
+ const copiedWorkspaceFile = path45.join(sharedWorkspacePath, path45.basename(suiteWorkspaceFile));
36226
36899
  try {
36227
36900
  await stat9(copiedWorkspaceFile);
36228
36901
  suiteWorkspaceFile = copiedWorkspaceFile;
@@ -36238,7 +36911,7 @@ async function runEvaluation(options) {
36238
36911
  if (needsPerRepoCheck) {
36239
36912
  for (const repo of suiteWorkspace.repos) {
36240
36913
  if (!repo.path || !repo.source) continue;
36241
- const targetDir = path44.join(sharedWorkspacePath, repo.path);
36914
+ const targetDir = path45.join(sharedWorkspacePath, repo.path);
36242
36915
  if (existsSync5(targetDir)) {
36243
36916
  setupLog(`reusing existing repo at: ${targetDir}`);
36244
36917
  continue;
@@ -36427,6 +37100,7 @@ async function runEvaluation(options) {
36427
37100
  const workerId = nextWorkerId++;
36428
37101
  workerIdByEvalId.set(evalCase.id, workerId);
36429
37102
  if (runBudgetTracker?.isExceeded()) {
37103
+ const errorMessage = `Run budget exceeded ($${runBudgetTracker.currentCostUsd.toFixed(4)} / $${runBudgetTracker.budgetCapUsd.toFixed(4)})`;
36430
37104
  const budgetResult = {
36431
37105
  timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
36432
37106
  testId: evalCase.id,
@@ -36434,15 +37108,24 @@ async function runEvaluation(options) {
36434
37108
  category: evalCase.category,
36435
37109
  score: 0,
36436
37110
  assertions: [],
36437
- output: [],
37111
+ output: errorMessage,
37112
+ trace: buildTraceFromMessages({
37113
+ input: evalCase.input,
37114
+ output: [{ role: "assistant", content: errorMessage }],
37115
+ finalOutput: errorMessage,
37116
+ target: target.name,
37117
+ testId: evalCase.id,
37118
+ conversationId: evalCase.conversation_id,
37119
+ error: errorMessage
37120
+ }),
36438
37121
  target: target.name,
36439
- error: `Run budget exceeded ($${runBudgetTracker.currentCostUsd.toFixed(4)} / $${runBudgetTracker.budgetCapUsd.toFixed(4)})`,
37122
+ error: errorMessage,
36440
37123
  budgetExceeded: true,
36441
37124
  executionStatus: "execution_error",
36442
37125
  failureStage: "setup",
36443
37126
  failureReasonCode: "budget_exceeded",
36444
37127
  executionError: {
36445
- message: `Run budget exceeded ($${runBudgetTracker.currentCostUsd.toFixed(4)} / $${runBudgetTracker.budgetCapUsd.toFixed(4)})`,
37128
+ message: errorMessage,
36446
37129
  stage: "setup"
36447
37130
  }
36448
37131
  };
@@ -36463,6 +37146,7 @@ async function runEvaluation(options) {
36463
37146
  return budgetResult;
36464
37147
  }
36465
37148
  if (budgetUsd !== void 0 && budgetExhausted) {
37149
+ const errorMessage = `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${budgetUsd.toFixed(4)})`;
36466
37150
  const budgetResult = {
36467
37151
  timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
36468
37152
  testId: evalCase.id,
@@ -36470,15 +37154,24 @@ async function runEvaluation(options) {
36470
37154
  category: evalCase.category,
36471
37155
  score: 0,
36472
37156
  assertions: [],
36473
- output: [],
37157
+ output: errorMessage,
37158
+ trace: buildTraceFromMessages({
37159
+ input: evalCase.input,
37160
+ output: [{ role: "assistant", content: errorMessage }],
37161
+ finalOutput: errorMessage,
37162
+ target: target.name,
37163
+ testId: evalCase.id,
37164
+ conversationId: evalCase.conversation_id,
37165
+ error: errorMessage
37166
+ }),
36474
37167
  target: target.name,
36475
- error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${budgetUsd.toFixed(4)})`,
37168
+ error: errorMessage,
36476
37169
  budgetExceeded: true,
36477
37170
  executionStatus: "execution_error",
36478
37171
  failureStage: "setup",
36479
37172
  failureReasonCode: "budget_exceeded",
36480
37173
  executionError: {
36481
- message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${budgetUsd.toFixed(4)})`,
37174
+ message: errorMessage,
36482
37175
  stage: "setup"
36483
37176
  }
36484
37177
  };
@@ -36507,7 +37200,16 @@ async function runEvaluation(options) {
36507
37200
  category: evalCase.category,
36508
37201
  score: 0,
36509
37202
  assertions: [],
36510
- output: [],
37203
+ output: errorMsg,
37204
+ trace: buildTraceFromMessages({
37205
+ input: evalCase.input,
37206
+ output: [{ role: "assistant", content: errorMsg }],
37207
+ finalOutput: errorMsg,
37208
+ target: target.name,
37209
+ testId: evalCase.id,
37210
+ conversationId: evalCase.conversation_id,
37211
+ error: errorMsg
37212
+ }),
36511
37213
  target: target.name,
36512
37214
  error: errorMsg,
36513
37215
  executionStatus: "execution_error",
@@ -36572,6 +37274,9 @@ async function runEvaluation(options) {
36572
37274
  verbose,
36573
37275
  threshold: scoreThreshold,
36574
37276
  targetHooks: options.targetHooks,
37277
+ replayRecording,
37278
+ evalFilePath,
37279
+ repoRoot: repoRootPath,
36575
37280
  ...depResults && Object.keys(depResults).length > 0 ? { dependencyResults: depResults } : {}
36576
37281
  };
36577
37282
  let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
@@ -36653,7 +37358,16 @@ async function runEvaluation(options) {
36653
37358
  category: evalCase.category,
36654
37359
  score: 0,
36655
37360
  assertions: [],
36656
- output: [],
37361
+ output: errorMsg,
37362
+ trace: buildTraceFromMessages({
37363
+ input: evalCase.input,
37364
+ output: [{ role: "assistant", content: errorMsg }],
37365
+ finalOutput: errorMsg,
37366
+ target: target.name,
37367
+ testId: evalCase.id,
37368
+ conversationId: evalCase.conversation_id,
37369
+ error: errorMsg
37370
+ }),
36657
37371
  target: target.name,
36658
37372
  error: errorMsg,
36659
37373
  executionStatus: "execution_error",
@@ -36803,7 +37517,10 @@ async function runBatchEvaluation(options) {
36803
37517
  agentTimeoutMs,
36804
37518
  targetResolver,
36805
37519
  availableTargets,
36806
- threshold: batchThreshold
37520
+ threshold: batchThreshold,
37521
+ replayRecording,
37522
+ evalFilePath,
37523
+ repoRoot
36807
37524
  } = options;
36808
37525
  const promptInputsList = [];
36809
37526
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
@@ -36817,7 +37534,9 @@ async function runBatchEvaluation(options) {
36817
37534
  question: promptInputs.question,
36818
37535
  systemPrompt: promptInputs.systemMessage,
36819
37536
  inputFiles: evalCase.file_paths,
36820
- evalCaseId: evalCase.id
37537
+ evalCaseId: evalCase.id,
37538
+ suite: evalCase.suite,
37539
+ evalFilePath
36821
37540
  };
36822
37541
  });
36823
37542
  const batchResponse = await provider.invokeBatch?.(batchRequests);
@@ -36845,6 +37564,16 @@ async function runBatchEvaluation(options) {
36845
37564
  const evalCase = evalCases[i];
36846
37565
  const promptInputs = promptInputsList[i];
36847
37566
  const providerResponse = batchResponse[i];
37567
+ await maybeRecordReplayFixture({
37568
+ replayRecording,
37569
+ evalCase,
37570
+ evalFilePath,
37571
+ repoRoot,
37572
+ target,
37573
+ attempt: 0,
37574
+ response: providerResponse,
37575
+ nowFn
37576
+ });
36848
37577
  const output = providerResponse.output;
36849
37578
  const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
36850
37579
  const computed = output ? computeTraceSummary(output) : hasExecutionMetrics ? { trace: { eventCount: 0, toolCalls: {}, errorCount: 0 } } : void 0;
@@ -36892,6 +37621,10 @@ async function runBatchEvaluation(options) {
36892
37621
  if (providerError) {
36893
37622
  result = {
36894
37623
  ...result,
37624
+ trace: appendErrorEventToTrace(result.trace, providerError, {
37625
+ failure_stage: "agent",
37626
+ failure_reason_code: "provider_error"
37627
+ }),
36895
37628
  error: providerError,
36896
37629
  executionStatus: "execution_error",
36897
37630
  failureStage: "agent",
@@ -36950,6 +37683,27 @@ async function runBatchEvaluation(options) {
36950
37683
  }
36951
37684
  return results;
36952
37685
  }
37686
+ async function maybeRecordReplayFixture(options) {
37687
+ const { replayRecording, evalCase, evalFilePath, repoRoot, target, attempt, response, nowFn } = options;
37688
+ if (!replayRecording || target.kind === "replay") {
37689
+ return;
37690
+ }
37691
+ if (!evalFilePath || !repoRoot) {
37692
+ throw new Error("Replay recording requires evalFilePath and repoRoot");
37693
+ }
37694
+ const record2 = buildReplayFixtureRecord({
37695
+ evalCase,
37696
+ evalFilePath,
37697
+ repoRoot,
37698
+ target,
37699
+ sourceTarget: replayRecording.sourceTarget,
37700
+ attempt,
37701
+ variant: replayRecording.variant,
37702
+ response,
37703
+ now: nowFn
37704
+ });
37705
+ await appendReplayFixtureRecord(replayRecording.fixturesPath, record2);
37706
+ }
36953
37707
  async function runEvalCase(options) {
36954
37708
  const {
36955
37709
  evalCase,
@@ -36978,7 +37732,10 @@ async function runEvalCase(options) {
36978
37732
  evalDir,
36979
37733
  verbose,
36980
37734
  threshold: caseThreshold,
36981
- dependencyResults
37735
+ dependencyResults,
37736
+ replayRecording,
37737
+ evalFilePath,
37738
+ repoRoot
36982
37739
  } = options;
36983
37740
  const setupDebug = process.env.AGENTV_SETUP_DEBUG === "1";
36984
37741
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
@@ -37020,7 +37777,7 @@ async function runEvalCase(options) {
37020
37777
  );
37021
37778
  }
37022
37779
  if (caseWorkspaceFile && workspacePath) {
37023
- const copiedFile = path44.join(workspacePath, path44.basename(caseWorkspaceFile));
37780
+ const copiedFile = path45.join(workspacePath, path45.basename(caseWorkspaceFile));
37024
37781
  try {
37025
37782
  await stat9(copiedFile);
37026
37783
  caseWorkspaceFile = copiedFile;
@@ -37030,7 +37787,7 @@ async function runEvalCase(options) {
37030
37787
  }
37031
37788
  if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
37032
37789
  workspacePath = getWorkspacePath(evalRunId, evalCase.id);
37033
- await mkdir15(workspacePath, { recursive: true });
37790
+ await mkdir16(workspacePath, { recursive: true });
37034
37791
  }
37035
37792
  if (evalCase.workspace?.repos?.length && workspacePath) {
37036
37793
  const localPathErrors = RepoManager.validateLocalPaths(evalCase.workspace.repos);
@@ -37082,10 +37839,10 @@ async function runEvalCase(options) {
37082
37839
  const files = evalCase.metadata.agent_skills_files;
37083
37840
  if (baseDir && files.length > 0) {
37084
37841
  for (const relPath of files) {
37085
- const srcPath = path44.resolve(baseDir, relPath);
37086
- const destPath = path44.resolve(workspacePath, relPath);
37842
+ const srcPath = path45.resolve(baseDir, relPath);
37843
+ const destPath = path45.resolve(workspacePath, relPath);
37087
37844
  try {
37088
- await mkdir15(path44.dirname(destPath), { recursive: true });
37845
+ await mkdir16(path45.dirname(destPath), { recursive: true });
37089
37846
  await copyFile2(srcPath, destPath);
37090
37847
  } catch (error40) {
37091
37848
  const message = error40 instanceof Error ? error40.message : String(error40);
@@ -37277,7 +38034,8 @@ async function runEvalCase(options) {
37277
38034
  verbose,
37278
38035
  threshold: evalCase.threshold ?? caseThreshold,
37279
38036
  targetResolver,
37280
- availableTargets
38037
+ availableTargets,
38038
+ evalFilePath
37281
38039
  });
37282
38040
  if (workspacePath && !isSharedWorkspace) {
37283
38041
  const shouldRetain = conversationResult.executionStatus === "ok" ? retainOnSuccess === "keep" || keepWorkspaces : retainOnFailure === "keep" || !forceCleanup && !keepWorkspaces;
@@ -37301,6 +38059,7 @@ async function runEvalCase(options) {
37301
38059
  target,
37302
38060
  promptInputs,
37303
38061
  attempt,
38062
+ evalFilePath,
37304
38063
  agentTimeoutMs,
37305
38064
  signal,
37306
38065
  cwd: workspacePath,
@@ -37331,6 +38090,7 @@ async function runEvalCase(options) {
37331
38090
  target,
37332
38091
  promptInputs,
37333
38092
  attempt: 0,
38093
+ evalFilePath,
37334
38094
  agentTimeoutMs,
37335
38095
  signal,
37336
38096
  cwd: workspacePath,
@@ -37366,6 +38126,19 @@ async function runEvalCase(options) {
37366
38126
  }
37367
38127
  return errorResult;
37368
38128
  }
38129
+ const responseWasCached = cachedResponse !== void 0 && providerResponse === cachedResponse;
38130
+ if (!responseWasCached) {
38131
+ await maybeRecordReplayFixture({
38132
+ replayRecording,
38133
+ evalCase,
38134
+ evalFilePath,
38135
+ repoRoot,
38136
+ target: targetUsed ? { ...target, name: targetUsed } : target,
38137
+ attempt,
38138
+ response: providerResponse,
38139
+ nowFn
38140
+ });
38141
+ }
37369
38142
  if (cacheKey && cache && !cachedResponse) {
37370
38143
  await cache.set(cacheKey, providerResponse);
37371
38144
  }
@@ -37512,6 +38285,10 @@ ${providerFileChanges}` : providerFileChanges;
37512
38285
  ...result,
37513
38286
  ...targetUsedField,
37514
38287
  evalRun,
38288
+ trace: appendErrorEventToTrace(result.trace, providerError, {
38289
+ failure_stage: "agent",
38290
+ failure_reason_code: "provider_error"
38291
+ }),
37515
38292
  error: providerError,
37516
38293
  executionStatus,
37517
38294
  failureStage: "agent",
@@ -37525,6 +38302,10 @@ ${providerFileChanges}` : providerFileChanges;
37525
38302
  ...targetUsedField,
37526
38303
  score: 0,
37527
38304
  evalRun,
38305
+ trace: appendErrorEventToTrace(result.trace, skippedEvaluatorError, {
38306
+ failure_stage: "evaluator",
38307
+ failure_reason_code: "evaluator_error"
38308
+ }),
37528
38309
  error: skippedEvaluatorError,
37529
38310
  executionStatus,
37530
38311
  failureStage: "evaluator",
@@ -37689,6 +38470,23 @@ async function evaluateCandidate(options) {
37689
38470
  threshold: evalThreshold,
37690
38471
  dependencyResults
37691
38472
  } = options;
38473
+ const input = buildResultInput(promptInputs);
38474
+ const outputMessages = output ?? [{ role: "assistant", content: candidate }];
38475
+ const evaluationTrace = buildTraceFromMessages({
38476
+ input,
38477
+ output: outputMessages,
38478
+ summary: trace,
38479
+ finalOutput: candidate,
38480
+ tokenUsage,
38481
+ costUsd,
38482
+ durationMs,
38483
+ startTime,
38484
+ endTime,
38485
+ provider: provider.kind,
38486
+ target: target.name,
38487
+ testId: evalCase.id,
38488
+ conversationId: evalCase.conversation_id
38489
+ });
37692
38490
  const gradeTimestamp = nowFn();
37693
38491
  const { score, scores } = await runEvaluatorsForCase({
37694
38492
  evalCase,
@@ -37703,7 +38501,7 @@ async function evaluateCandidate(options) {
37703
38501
  graderProvider,
37704
38502
  agentTimeoutMs,
37705
38503
  output,
37706
- trace,
38504
+ trace: evaluationTrace,
37707
38505
  costUsd,
37708
38506
  durationMs,
37709
38507
  tokenUsage,
@@ -37743,7 +38541,6 @@ async function evaluateCandidate(options) {
37743
38541
  ...lmRequest ? { lm: lmRequest } : {},
37744
38542
  ...evaluatorRequest ? { evaluator: evaluatorRequest } : {}
37745
38543
  } : void 0;
37746
- const input = buildResultInput(promptInputs);
37747
38544
  return {
37748
38545
  timestamp: completedAt.toISOString(),
37749
38546
  testId: evalCase.id,
@@ -37760,9 +38557,9 @@ async function evaluateCandidate(options) {
37760
38557
  endTime,
37761
38558
  requests,
37762
38559
  input,
37763
- output: output ?? [{ role: "assistant", content: candidate }],
38560
+ output: candidate,
37764
38561
  scores,
37765
- trace,
38562
+ trace: evaluationTrace,
37766
38563
  fileChanges,
37767
38564
  executionStatus: classifyQualityStatus(score.score, evalThreshold)
37768
38565
  };
@@ -37925,7 +38722,7 @@ async function runEvaluatorList(options) {
37925
38722
  dockerConfig,
37926
38723
  dependencyResults
37927
38724
  };
37928
- const evalFileDir = evalCase.file_paths[0] ? path44.dirname(evalCase.file_paths[0]) : process.cwd();
38725
+ const evalFileDir = evalCase.file_paths[0] ? path45.dirname(evalCase.file_paths[0]) : process.cwd();
37929
38726
  const dispatchContext = {
37930
38727
  graderProvider,
37931
38728
  targetResolver,
@@ -38082,7 +38879,8 @@ async function runConversationMode(options) {
38082
38879
  verbose,
38083
38880
  threshold,
38084
38881
  targetResolver,
38085
- availableTargets
38882
+ availableTargets,
38883
+ evalFilePath
38086
38884
  } = options;
38087
38885
  const turns = evalCase.turns;
38088
38886
  const aggregation = evalCase.aggregation ?? "mean";
@@ -38120,6 +38918,8 @@ async function runConversationMode(options) {
38120
38918
  question: userContent,
38121
38919
  chatPrompt: chatPromptForProvider,
38122
38920
  evalCaseId: `${evalCase.id}/turn-${turnIndex}`,
38921
+ suite: evalCase.suite,
38922
+ evalFilePath,
38123
38923
  signal,
38124
38924
  cwd: workspacePath,
38125
38925
  workspaceFile: caseWorkspaceFile,
@@ -38257,8 +39057,19 @@ async function runConversationMode(options) {
38257
39057
  role: m.role,
38258
39058
  content: m.content
38259
39059
  }));
38260
- const flatAssertions = allResultScores.flatMap((s) => [...s.assertions]);
38261
39060
  const totalDurationMs = Date.now() - caseStartMs;
39061
+ const finalOutput = extractLastAssistantContent(outputMessages);
39062
+ const trace = buildTraceFromMessages({
39063
+ input: evalCase.input,
39064
+ output: outputMessages,
39065
+ finalOutput,
39066
+ durationMs: totalDurationMs,
39067
+ provider: provider.kind,
39068
+ target: target.name,
39069
+ testId: evalCase.id,
39070
+ conversationId: evalCase.conversation_id
39071
+ });
39072
+ const flatAssertions = allResultScores.flatMap((s) => [...s.assertions]);
38262
39073
  return {
38263
39074
  timestamp: nowFn().toISOString(),
38264
39075
  testId: evalCase.id,
@@ -38267,7 +39078,8 @@ async function runConversationMode(options) {
38267
39078
  score: finalScore,
38268
39079
  assertions: flatAssertions,
38269
39080
  target: target.name,
38270
- output: outputMessages,
39081
+ output: finalOutput,
39082
+ trace,
38271
39083
  scores: allResultScores,
38272
39084
  executionStatus: classifyQualityStatus(finalScore, threshold ?? DEFAULT_THRESHOLD),
38273
39085
  input: evalCase.input.map((m) => ({
@@ -38339,6 +39151,7 @@ async function invokeProvider(provider, options) {
38339
39151
  evalCase,
38340
39152
  promptInputs,
38341
39153
  attempt,
39154
+ evalFilePath,
38342
39155
  agentTimeoutMs,
38343
39156
  signal,
38344
39157
  cwd,
@@ -38359,6 +39172,8 @@ async function invokeProvider(provider, options) {
38359
39172
  chatPrompt: promptInputs.chatPrompt,
38360
39173
  inputFiles: evalCase.file_paths,
38361
39174
  evalCaseId: evalCase.id,
39175
+ suite: evalCase.suite,
39176
+ evalFilePath,
38362
39177
  attempt,
38363
39178
  signal: controller.signal,
38364
39179
  cwd,
@@ -38400,6 +39215,16 @@ function buildErrorResult(evalCase, targetName, timestamp, error40, promptInputs
38400
39215
  ...lmRequest ? { lm: lmRequest } : {}
38401
39216
  } : void 0;
38402
39217
  const input = buildResultInput(promptInputs);
39218
+ const output = `Error occurred: ${message}`;
39219
+ const trace = buildTraceFromMessages({
39220
+ input,
39221
+ output: [{ role: "assistant", content: output }],
39222
+ finalOutput: output,
39223
+ target: targetName,
39224
+ testId: evalCase.id,
39225
+ conversationId: evalCase.conversation_id,
39226
+ error: message
39227
+ });
38403
39228
  return {
38404
39229
  timestamp: timestamp.toISOString(),
38405
39230
  testId: evalCase.id,
@@ -38411,7 +39236,8 @@ function buildErrorResult(evalCase, targetName, timestamp, error40, promptInputs
38411
39236
  target: targetName,
38412
39237
  requests,
38413
39238
  input,
38414
- output: [{ role: "assistant", content: `Error occurred: ${message}` }],
39239
+ output,
39240
+ trace,
38415
39241
  error: message,
38416
39242
  executionStatus: "execution_error",
38417
39243
  failureStage,
@@ -38432,7 +39258,7 @@ function extractProviderError(response) {
38432
39258
  return trimmed.length > 0 ? trimmed : void 0;
38433
39259
  }
38434
39260
  function createCacheKey(provider, target, evalCase, promptInputs) {
38435
- const hash = createHash2("sha256");
39261
+ const hash = createHash3("sha256");
38436
39262
  hash.update(provider.id);
38437
39263
  hash.update(target.name);
38438
39264
  hash.update(evalCase.id);
@@ -38654,7 +39480,7 @@ async function evaluate(config2) {
38654
39480
  cliNoCache: false,
38655
39481
  yamlCache: config2.cache === void 0 ? materialized.cache : void 0
38656
39482
  });
38657
- const cache = cacheEnabled ? new ResponseCache(materialized.cachePath ? path45.resolve(materialized.cachePath) : void 0) : void 0;
39483
+ const cache = cacheEnabled ? new ResponseCache(materialized.cachePath ? path46.resolve(materialized.cachePath) : void 0) : void 0;
38658
39484
  const results = await runEvaluation({
38659
39485
  testFilePath,
38660
39486
  repoRoot,
@@ -38685,7 +39511,7 @@ async function evaluate(config2) {
38685
39511
  async function materializeEvalConfig(config2, options) {
38686
39512
  const baseDir = options?.baseDir ?? process.cwd();
38687
39513
  const repoRoot = options?.repoRoot ?? await findGitRoot(baseDir) ?? baseDir;
38688
- const testFilePath = config2.specFile ? path45.resolve(baseDir, config2.specFile) : path45.join(baseDir, "__programmatic__.yaml");
39514
+ const testFilePath = config2.specFile ? path46.resolve(baseDir, config2.specFile) : path46.join(baseDir, "__programmatic__.yaml");
38689
39515
  const effectiveFilter = options?.filter ?? config2.filter;
38690
39516
  if (config2.specFile) {
38691
39517
  const suite = await loadTestSuite(testFilePath, repoRoot, {
@@ -38762,7 +39588,7 @@ function convertAssertions(entries) {
38762
39588
  }
38763
39589
  function buildInlineEvalTests(config2, options) {
38764
39590
  const suiteWorkspace = config2.beforeAll ? { hooks: { before_all: toBeforeAllHook(config2.beforeAll) } } : void 0;
38765
- const derivedSuiteName = path45.basename(options.testFilePath).replace(/\.eval\.[cm]?ts$/i, "").replace(/\.[cm]?ts$/i, "");
39591
+ const derivedSuiteName = path46.basename(options.testFilePath).replace(/\.eval\.[cm]?ts$/i, "").replace(/\.[cm]?ts$/i, "");
38766
39592
  const suiteName = config2.metadata?.name ?? (derivedSuiteName || "eval");
38767
39593
  return (config2.tests ?? []).filter((test) => !options.filter || matchesFilter4(test.id, options.filter)).map((test) => {
38768
39594
  const isConversation = test.mode === "conversation" || test.turns && test.turns.length > 0;
@@ -38858,10 +39684,10 @@ function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
38858
39684
  var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
38859
39685
  async function discoverDefaultTarget(repoRoot) {
38860
39686
  const cwd = process.cwd();
38861
- const chain = buildDirectoryChain(path45.join(cwd, "_placeholder"), repoRoot);
39687
+ const chain = buildDirectoryChain(path46.join(cwd, "_placeholder"), repoRoot);
38862
39688
  for (const dir of chain) {
38863
39689
  for (const candidate of TARGET_FILE_CANDIDATES) {
38864
- const targetsPath = path45.join(dir, candidate);
39690
+ const targetsPath = path46.join(dir, candidate);
38865
39691
  if (!existsSync6(targetsPath)) continue;
38866
39692
  try {
38867
39693
  const definitions = await readTargetDefinitions(targetsPath);
@@ -38878,7 +39704,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
38878
39704
  const chain = buildDirectoryChain(startPath, repoRoot);
38879
39705
  const envFiles = [];
38880
39706
  for (const dir of chain) {
38881
- const envPath = path45.join(dir, ".env");
39707
+ const envPath = path46.join(dir, ".env");
38882
39708
  if (existsSync6(envPath)) envFiles.push(envPath);
38883
39709
  }
38884
39710
  for (let i = 0; i < envFiles.length; i++) {
@@ -38904,7 +39730,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
38904
39730
  }
38905
39731
  var EXPORT_NAMES = ["default", "config", "evalConfig"];
38906
39732
  async function loadTsEvalFile(filePath) {
38907
- const absolutePath = path46.resolve(filePath);
39733
+ const absolutePath = path47.resolve(filePath);
38908
39734
  const moduleUrl = pathToFileURL2(absolutePath).href;
38909
39735
  const module = await import(moduleUrl);
38910
39736
  let config2;
@@ -38926,7 +39752,7 @@ async function loadTsEvalSuite(filePath, repoRoot, options) {
38926
39752
  const { config: config2, filePath: absolutePath } = await loadTsEvalFile(filePath);
38927
39753
  const materialized = await materializeEvalConfig(config2, {
38928
39754
  repoRoot,
38929
- baseDir: path46.dirname(absolutePath),
39755
+ baseDir: path47.dirname(absolutePath),
38930
39756
  filter: options?.filter,
38931
39757
  category: options?.category
38932
39758
  });
@@ -39034,6 +39860,11 @@ export {
39034
39860
  NORMALIZED_TRACE_EVENT_TYPES,
39035
39861
  NORMALIZED_TOOL_STATUSES,
39036
39862
  NORMALIZED_REDACTION_LEVELS,
39863
+ TRACE_SCHEMA_VERSION,
39864
+ TRACE_SOURCE_KINDS,
39865
+ TRACE_EVENT_TYPES,
39866
+ TRACE_TOOL_STATUSES,
39867
+ TRACE_REDACTION_LEVELS,
39037
39868
  NormalizedRedactionStateWireSchema,
39038
39869
  NormalizedTraceErrorWireSchema,
39039
39870
  NormalizedTraceSourceWireSchema,
@@ -39046,8 +39877,24 @@ export {
39046
39877
  NormalizedTraceToolWireSchema,
39047
39878
  NormalizedTraceEventWireSchema,
39048
39879
  NormalizedTrajectoryWireSchema,
39880
+ TraceRedactionStateWireSchema,
39881
+ TraceErrorWireSchema,
39882
+ TraceSourceWireSchema,
39883
+ TraceSessionWireSchema,
39884
+ TraceBranchWireSchema,
39885
+ TraceSourceRefWireSchema,
39886
+ TraceRawEvidenceWireSchema,
39887
+ TraceMessageWireSchema,
39888
+ TraceModelWireSchema,
39889
+ TraceToolWireSchema,
39890
+ TraceEventWireSchema,
39891
+ TraceArtifactWireSchema,
39049
39892
  toNormalizedTrajectoryWire,
39050
39893
  fromNormalizedTrajectoryWire,
39894
+ toTraceArtifactWire,
39895
+ fromTraceArtifactWire,
39896
+ buildTraceFromMessages,
39897
+ appendErrorEventToTrace,
39051
39898
  computeTraceSummary,
39052
39899
  getSelectedTrajectoryEvents,
39053
39900
  computeTraceSummaryFromTrajectory,
@@ -39094,6 +39941,14 @@ export {
39094
39941
  consumePiLogEntries,
39095
39942
  subscribeToPiLogEntries,
39096
39943
  ProviderRegistry,
39944
+ REPLAY_FIXTURE_SCHEMA_VERSION,
39945
+ readReplayFixtureRecords,
39946
+ serializeReplayFixtureRecord,
39947
+ appendReplayFixtureRecord,
39948
+ findReplayFixtureRecord,
39949
+ replayFixtureRecordToProviderResponse,
39950
+ buildReplayFixtureRecord,
39951
+ ReplayProvider,
39097
39952
  ensureVSCodeSubagents,
39098
39953
  readTargetDefinitions,
39099
39954
  listTargetNames,
@@ -39150,4 +40005,4 @@ export {
39150
40005
  loadTsEvalFile,
39151
40006
  loadTsEvalSuite
39152
40007
  };
39153
- //# sourceMappingURL=chunk-6QEIZ33V.js.map
40008
+ //# sourceMappingURL=chunk-TUTURE2B.js.map