agentv 2.10.0 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -148,7 +148,7 @@ var require_dist = __commonJS({
148
148
  }
149
149
  });
150
150
 
151
- // ../../packages/core/dist/chunk-7Q4PH265.js
151
+ // ../../packages/core/dist/chunk-REN5PS7B.js
152
152
  import { constants } from "node:fs";
153
153
  import { access, readFile } from "node:fs/promises";
154
154
  import path from "node:path";
@@ -632,8 +632,8 @@ function getErrorMap() {
632
632
 
633
633
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
634
634
  var makeIssue = (params) => {
635
- const { data, path: path39, errorMaps, issueData } = params;
636
- const fullPath = [...path39, ...issueData.path || []];
635
+ const { data, path: path40, errorMaps, issueData } = params;
636
+ const fullPath = [...path40, ...issueData.path || []];
637
637
  const fullIssue = {
638
638
  ...issueData,
639
639
  path: fullPath
@@ -749,11 +749,11 @@ var errorUtil;
749
749
 
750
750
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
751
751
  var ParseInputLazyPath = class {
752
- constructor(parent, value, path39, key) {
752
+ constructor(parent, value, path40, key) {
753
753
  this._cachedPath = [];
754
754
  this.parent = parent;
755
755
  this.data = value;
756
- this._path = path39;
756
+ this._path = path40;
757
757
  this._key = key;
758
758
  }
759
759
  get path() {
@@ -4195,7 +4195,7 @@ var coerce = {
4195
4195
  };
4196
4196
  var NEVER = INVALID;
4197
4197
 
4198
- // ../../packages/core/dist/chunk-7Q4PH265.js
4198
+ // ../../packages/core/dist/chunk-REN5PS7B.js
4199
4199
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
4200
4200
  var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
4201
4201
  var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
@@ -4255,6 +4255,13 @@ var EVALUATOR_KIND_VALUES = [
4255
4255
  "execution_metrics",
4256
4256
  "agent_judge",
4257
4257
  "contains",
4258
+ "contains_any",
4259
+ "contains_all",
4260
+ "icontains",
4261
+ "icontains_any",
4262
+ "icontains_all",
4263
+ "starts_with",
4264
+ "ends_with",
4258
4265
  "regex",
4259
4266
  "is_json",
4260
4267
  "equals",
@@ -4641,17 +4648,17 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
4641
4648
  providerBatching,
4642
4649
  config: resolveCodexConfig(parsed, env, evalFilePath)
4643
4650
  };
4644
- case "copilot":
4645
4651
  case "copilot-sdk":
4646
4652
  case "copilot_sdk":
4647
4653
  return {
4648
- kind: "copilot",
4654
+ kind: "copilot-sdk",
4649
4655
  name: parsed.name,
4650
4656
  judgeTarget: parsed.judge_target,
4651
4657
  workers: parsed.workers,
4652
4658
  providerBatching,
4653
4659
  config: resolveCopilotSdkConfig(parsed, env, evalFilePath)
4654
4660
  };
4661
+ case "copilot":
4655
4662
  case "copilot-cli":
4656
4663
  return {
4657
4664
  kind: "copilot-cli",
@@ -5262,8 +5269,8 @@ function resolveCliConfig(target, env, evalFilePath) {
5262
5269
  const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
5263
5270
  if (!parseResult.success) {
5264
5271
  const firstError = parseResult.error.errors[0];
5265
- const path39 = firstError?.path.join(".") || "";
5266
- const prefix = path39 ? `${target.name} ${path39}: ` : `${target.name}: `;
5272
+ const path310 = firstError?.path.join(".") || "";
5273
+ const prefix = path310 ? `${target.name} ${path310}: ` : `${target.name}: `;
5267
5274
  throw new Error(`${prefix}${firstError?.message}`);
5268
5275
  }
5269
5276
  const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
@@ -5471,7 +5478,7 @@ function resolveOptionalNumberArray(source, description) {
5471
5478
  }
5472
5479
  var AGENT_PROVIDER_KINDS = [
5473
5480
  "codex",
5474
- "copilot",
5481
+ "copilot-sdk",
5475
5482
  "copilot-cli",
5476
5483
  "pi-coding-agent",
5477
5484
  "claude",
@@ -5483,7 +5490,7 @@ var KNOWN_PROVIDERS = [
5483
5490
  "anthropic",
5484
5491
  "gemini",
5485
5492
  "codex",
5486
- "copilot",
5493
+ "copilot-sdk",
5487
5494
  "copilot-cli",
5488
5495
  "pi-coding-agent",
5489
5496
  "pi-agent-sdk",
@@ -5502,10 +5509,10 @@ var PROVIDER_ALIASES = [
5502
5509
  // alias for "gemini"
5503
5510
  "codex-cli",
5504
5511
  // alias for "codex"
5505
- "copilot-sdk",
5506
- // alias for "copilot"
5512
+ "copilot",
5513
+ // alias for "copilot-cli" (default copilot experience)
5507
5514
  "copilot_sdk",
5508
- // alias for "copilot" (underscore variant)
5515
+ // alias for "copilot-sdk" (underscore variant)
5509
5516
  "pi",
5510
5517
  // alias for "pi-coding-agent"
5511
5518
  "claude-code",
@@ -6654,10 +6661,10 @@ function assignProp(target, prop, value) {
6654
6661
  configurable: true
6655
6662
  });
6656
6663
  }
6657
- function getElementAtPath(obj, path39) {
6658
- if (!path39)
6664
+ function getElementAtPath(obj, path40) {
6665
+ if (!path40)
6659
6666
  return obj;
6660
- return path39.reduce((acc, key) => acc?.[key], obj);
6667
+ return path40.reduce((acc, key) => acc?.[key], obj);
6661
6668
  }
6662
6669
  function promiseAllObject(promisesObj) {
6663
6670
  const keys = Object.keys(promisesObj);
@@ -6977,11 +6984,11 @@ function aborted(x, startIndex = 0) {
6977
6984
  }
6978
6985
  return false;
6979
6986
  }
6980
- function prefixIssues(path39, issues) {
6987
+ function prefixIssues(path40, issues) {
6981
6988
  return issues.map((iss) => {
6982
6989
  var _a17;
6983
6990
  (_a17 = iss).path ?? (_a17.path = []);
6984
- iss.path.unshift(path39);
6991
+ iss.path.unshift(path40);
6985
6992
  return iss;
6986
6993
  });
6987
6994
  }
@@ -7118,7 +7125,7 @@ function treeifyError(error40, _mapper) {
7118
7125
  return issue2.message;
7119
7126
  };
7120
7127
  const result = { errors: [] };
7121
- const processError = (error41, path39 = []) => {
7128
+ const processError = (error41, path40 = []) => {
7122
7129
  var _a17, _b8;
7123
7130
  for (const issue2 of error41.issues) {
7124
7131
  if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -7128,7 +7135,7 @@ function treeifyError(error40, _mapper) {
7128
7135
  } else if (issue2.code === "invalid_element") {
7129
7136
  processError({ issues: issue2.issues }, issue2.path);
7130
7137
  } else {
7131
- const fullpath = [...path39, ...issue2.path];
7138
+ const fullpath = [...path40, ...issue2.path];
7132
7139
  if (fullpath.length === 0) {
7133
7140
  result.errors.push(mapper(issue2));
7134
7141
  continue;
@@ -7158,9 +7165,9 @@ function treeifyError(error40, _mapper) {
7158
7165
  processError(error40);
7159
7166
  return result;
7160
7167
  }
7161
- function toDotPath(path39) {
7168
+ function toDotPath(path40) {
7162
7169
  const segs = [];
7163
- for (const seg of path39) {
7170
+ for (const seg of path40) {
7164
7171
  if (typeof seg === "number")
7165
7172
  segs.push(`[${seg}]`);
7166
7173
  else if (typeof seg === "symbol")
@@ -26713,14 +26720,14 @@ function createAzure(options = {}) {
26713
26720
  description: "Azure OpenAI resource name"
26714
26721
  });
26715
26722
  const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
26716
- const url2 = ({ path: path39, modelId }) => {
26723
+ const url2 = ({ path: path40, modelId }) => {
26717
26724
  var _a24;
26718
26725
  const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
26719
26726
  let fullUrl;
26720
26727
  if (options.useDeploymentBasedUrls) {
26721
- fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path39}`);
26728
+ fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path40}`);
26722
26729
  } else {
26723
- fullUrl = new URL(`${baseUrlPrefix}/v1${path39}`);
26730
+ fullUrl = new URL(`${baseUrlPrefix}/v1${path40}`);
26724
26731
  }
26725
26732
  fullUrl.searchParams.set("api-version", apiVersion);
26726
26733
  return fullUrl.toString();
@@ -33952,9 +33959,9 @@ import { randomBytes } from "node:crypto";
33952
33959
  import { createServer } from "node:http";
33953
33960
  import fs2 from "node:fs/promises";
33954
33961
  import path30 from "node:path";
33955
- import { createHash, randomUUID as randomUUID7 } from "node:crypto";
33956
- import { mkdir as mkdir11 } from "node:fs/promises";
33957
- import path36 from "node:path";
33962
+ import { createHash as createHash2, randomUUID as randomUUID7 } from "node:crypto";
33963
+ import { mkdir as mkdir12 } from "node:fs/promises";
33964
+ import path37 from "node:path";
33958
33965
  import micromatch4 from "micromatch";
33959
33966
  import { readFileSync } from "node:fs";
33960
33967
  import path31 from "node:path";
@@ -33967,12 +33974,19 @@ import { promisify as promisify4 } from "node:util";
33967
33974
  import { cp, mkdir as mkdir10, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
33968
33975
  import os3 from "node:os";
33969
33976
  import path34 from "node:path";
33970
- import { readdir as readdir4, stat as stat6 } from "node:fs/promises";
33971
- import path35 from "node:path";
33977
+ import { execFile } from "node:child_process";
33978
+ import { createHash } from "node:crypto";
33972
33979
  import { existsSync as existsSync2 } from "node:fs";
33973
- import path37 from "node:path";
33974
- import { mkdir as mkdir12, readFile as readFile11, writeFile as writeFile7 } from "node:fs/promises";
33980
+ import { mkdir as mkdir11, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
33981
+ import os4 from "node:os";
33982
+ import path35 from "node:path";
33983
+ import { promisify as promisify5 } from "node:util";
33984
+ import { readdir as readdir4, stat as stat6 } from "node:fs/promises";
33985
+ import path36 from "node:path";
33986
+ import { existsSync as existsSync3 } from "node:fs";
33975
33987
  import path38 from "node:path";
33988
+ import { mkdir as mkdir13, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
33989
+ import path39 from "node:path";
33976
33990
  function computeTraceSummary(messages) {
33977
33991
  const toolCallCounts = {};
33978
33992
  const toolDurations = {};
@@ -35194,18 +35208,96 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
35194
35208
  });
35195
35209
  continue;
35196
35210
  }
35211
+ if (typeValue === "contains_any" || typeValue === "contains_all") {
35212
+ const value = asStringArrayStrict(rawEvaluator.value);
35213
+ if (!value || value.length === 0) {
35214
+ logWarning2(
35215
+ `Skipping ${typeValue} evaluator '${name16}' in '${evalId}': value must be a non-empty string array`
35216
+ );
35217
+ continue;
35218
+ }
35219
+ const weight2 = validateWeight(rawEvaluator.weight, name16, evalId);
35220
+ const required22 = parseRequired(rawEvaluator.required);
35221
+ evaluators.push({
35222
+ name: name16,
35223
+ type: typeValue,
35224
+ value,
35225
+ ...weight2 !== void 0 ? { weight: weight2 } : {},
35226
+ ...required22 !== void 0 ? { required: required22 } : {},
35227
+ ...negate !== void 0 ? { negate } : {}
35228
+ });
35229
+ continue;
35230
+ }
35231
+ if (typeValue === "icontains") {
35232
+ const value = asString(rawEvaluator.value);
35233
+ if (!value) {
35234
+ logWarning2(`Skipping icontains evaluator '${name16}' in '${evalId}': missing value`);
35235
+ continue;
35236
+ }
35237
+ const weight2 = validateWeight(rawEvaluator.weight, name16, evalId);
35238
+ const required22 = parseRequired(rawEvaluator.required);
35239
+ evaluators.push({
35240
+ name: name16,
35241
+ type: "icontains",
35242
+ value,
35243
+ ...weight2 !== void 0 ? { weight: weight2 } : {},
35244
+ ...required22 !== void 0 ? { required: required22 } : {},
35245
+ ...negate !== void 0 ? { negate } : {}
35246
+ });
35247
+ continue;
35248
+ }
35249
+ if (typeValue === "icontains_any" || typeValue === "icontains_all") {
35250
+ const value = asStringArrayStrict(rawEvaluator.value);
35251
+ if (!value || value.length === 0) {
35252
+ logWarning2(
35253
+ `Skipping ${typeValue} evaluator '${name16}' in '${evalId}': value must be a non-empty string array`
35254
+ );
35255
+ continue;
35256
+ }
35257
+ const weight2 = validateWeight(rawEvaluator.weight, name16, evalId);
35258
+ const required22 = parseRequired(rawEvaluator.required);
35259
+ evaluators.push({
35260
+ name: name16,
35261
+ type: typeValue,
35262
+ value,
35263
+ ...weight2 !== void 0 ? { weight: weight2 } : {},
35264
+ ...required22 !== void 0 ? { required: required22 } : {},
35265
+ ...negate !== void 0 ? { negate } : {}
35266
+ });
35267
+ continue;
35268
+ }
35269
+ if (typeValue === "starts_with" || typeValue === "ends_with") {
35270
+ const value = asString(rawEvaluator.value);
35271
+ if (!value) {
35272
+ logWarning2(`Skipping ${typeValue} evaluator '${name16}' in '${evalId}': missing value`);
35273
+ continue;
35274
+ }
35275
+ const weight2 = validateWeight(rawEvaluator.weight, name16, evalId);
35276
+ const required22 = parseRequired(rawEvaluator.required);
35277
+ evaluators.push({
35278
+ name: name16,
35279
+ type: typeValue,
35280
+ value,
35281
+ ...weight2 !== void 0 ? { weight: weight2 } : {},
35282
+ ...required22 !== void 0 ? { required: required22 } : {},
35283
+ ...negate !== void 0 ? { negate } : {}
35284
+ });
35285
+ continue;
35286
+ }
35197
35287
  if (typeValue === "regex") {
35198
35288
  const value = asString(rawEvaluator.value);
35199
35289
  if (!value) {
35200
35290
  logWarning2(`Skipping regex evaluator '${name16}' in '${evalId}': missing value`);
35201
35291
  continue;
35202
35292
  }
35293
+ const flags = asString(rawEvaluator.flags);
35203
35294
  const weight2 = validateWeight(rawEvaluator.weight, name16, evalId);
35204
35295
  const required22 = parseRequired(rawEvaluator.required);
35205
35296
  evaluators.push({
35206
35297
  name: name16,
35207
35298
  type: "regex",
35208
35299
  value,
35300
+ ...flags !== void 0 ? { flags } : {},
35209
35301
  ...weight2 !== void 0 ? { weight: weight2 } : {},
35210
35302
  ...required22 !== void 0 ? { required: required22 } : {},
35211
35303
  ...negate !== void 0 ? { negate } : {}
@@ -35378,15 +35470,43 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
35378
35470
  }
35379
35471
  return evaluators.length > 0 ? evaluators : void 0;
35380
35472
  }
35381
- var ASSERTION_TYPES = /* @__PURE__ */ new Set(["contains", "regex", "is_json", "equals", "rubrics"]);
35473
+ var ASSERTION_TYPES = /* @__PURE__ */ new Set([
35474
+ "contains",
35475
+ "contains_any",
35476
+ "contains_all",
35477
+ "icontains",
35478
+ "icontains_any",
35479
+ "icontains_all",
35480
+ "starts_with",
35481
+ "ends_with",
35482
+ "regex",
35483
+ "is_json",
35484
+ "equals",
35485
+ "rubrics"
35486
+ ]);
35382
35487
  function generateAssertionName(typeValue, rawEvaluator) {
35383
35488
  if (!ASSERTION_TYPES.has(typeValue)) {
35384
35489
  return void 0;
35385
35490
  }
35386
35491
  const value = asString(rawEvaluator.value);
35492
+ const arrayValue = Array.isArray(rawEvaluator.value) ? rawEvaluator.value : void 0;
35387
35493
  switch (typeValue) {
35388
35494
  case "contains":
35389
35495
  return value ? `contains-${value}` : "contains";
35496
+ case "contains_any":
35497
+ return arrayValue ? `contains_any-${arrayValue.length}` : "contains_any";
35498
+ case "contains_all":
35499
+ return arrayValue ? `contains_all-${arrayValue.length}` : "contains_all";
35500
+ case "icontains":
35501
+ return value ? `icontains-${value}` : "icontains";
35502
+ case "icontains_any":
35503
+ return arrayValue ? `icontains_any-${arrayValue.length}` : "icontains_any";
35504
+ case "icontains_all":
35505
+ return arrayValue ? `icontains_all-${arrayValue.length}` : "icontains_all";
35506
+ case "starts_with":
35507
+ return value ? `starts_with-${value}` : "starts_with";
35508
+ case "ends_with":
35509
+ return value ? `ends_with-${value}` : "ends_with";
35390
35510
  case "regex":
35391
35511
  return value ? `regex-${value.length > 30 ? value.slice(0, 30) : value}` : "regex";
35392
35512
  case "is_json":
@@ -35412,6 +35532,13 @@ function coerceEvaluator(candidate, contextId) {
35412
35532
  function asString(value) {
35413
35533
  return typeof value === "string" ? value : void 0;
35414
35534
  }
35535
+ function asStringArrayStrict(value) {
35536
+ if (!Array.isArray(value)) {
35537
+ return void 0;
35538
+ }
35539
+ const result = value.filter((v) => typeof v === "string");
35540
+ return result.length > 0 ? result : void 0;
35541
+ }
35415
35542
  function asStringArray(value, description) {
35416
35543
  if (value === void 0) {
35417
35544
  return void 0;
@@ -36702,6 +36829,69 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
36702
36829
  }
36703
36830
  return cwd ? { ...config2, cwd } : config2;
36704
36831
  }
36832
+ function parseRepoSource(raw) {
36833
+ if (!isJsonObject(raw)) return void 0;
36834
+ const obj = raw;
36835
+ if (obj.type === "git" && typeof obj.url === "string") {
36836
+ return { type: "git", url: obj.url };
36837
+ }
36838
+ if (obj.type === "local" && typeof obj.path === "string") {
36839
+ return { type: "local", path: obj.path };
36840
+ }
36841
+ return void 0;
36842
+ }
36843
+ function parseRepoCheckout(raw) {
36844
+ if (!isJsonObject(raw)) return void 0;
36845
+ const obj = raw;
36846
+ const ref = typeof obj.ref === "string" ? obj.ref : void 0;
36847
+ const resolve2 = obj.resolve === "remote" || obj.resolve === "local" ? obj.resolve : void 0;
36848
+ const ancestor = typeof obj.ancestor === "number" ? obj.ancestor : void 0;
36849
+ if (!ref && !resolve2 && ancestor === void 0) return void 0;
36850
+ return {
36851
+ ...ref !== void 0 && { ref },
36852
+ ...resolve2 !== void 0 && { resolve: resolve2 },
36853
+ ...ancestor !== void 0 && { ancestor }
36854
+ };
36855
+ }
36856
+ function parseRepoClone(raw) {
36857
+ if (!isJsonObject(raw)) return void 0;
36858
+ const obj = raw;
36859
+ const depth = typeof obj.depth === "number" ? obj.depth : void 0;
36860
+ const filter2 = typeof obj.filter === "string" ? obj.filter : void 0;
36861
+ const sparse = Array.isArray(obj.sparse) ? obj.sparse.filter((s) => typeof s === "string") : void 0;
36862
+ if (depth === void 0 && !filter2 && !sparse) return void 0;
36863
+ return {
36864
+ ...depth !== void 0 && { depth },
36865
+ ...filter2 !== void 0 && { filter: filter2 },
36866
+ ...sparse !== void 0 && { sparse }
36867
+ };
36868
+ }
36869
+ function parseRepoConfig(raw) {
36870
+ if (!isJsonObject(raw)) return void 0;
36871
+ const obj = raw;
36872
+ const repoPath = typeof obj.path === "string" ? obj.path : void 0;
36873
+ const source = parseRepoSource(obj.source);
36874
+ if (!repoPath || !source) return void 0;
36875
+ const checkout = parseRepoCheckout(obj.checkout);
36876
+ const clone2 = parseRepoClone(obj.clone);
36877
+ return {
36878
+ path: repoPath,
36879
+ source,
36880
+ ...checkout !== void 0 && { checkout },
36881
+ ...clone2 !== void 0 && { clone: clone2 }
36882
+ };
36883
+ }
36884
+ function parseResetConfig(raw) {
36885
+ if (!isJsonObject(raw)) return void 0;
36886
+ const obj = raw;
36887
+ const strategy = obj.strategy === "none" || obj.strategy === "hard" || obj.strategy === "recreate" ? obj.strategy : void 0;
36888
+ const afterEach = typeof obj.after_each === "boolean" ? obj.after_each : void 0;
36889
+ if (!strategy && afterEach === void 0) return void 0;
36890
+ return {
36891
+ ...strategy !== void 0 && { strategy },
36892
+ ...afterEach !== void 0 && { after_each: afterEach }
36893
+ };
36894
+ }
36705
36895
  function parseWorkspaceConfig(raw, evalFileDir) {
36706
36896
  if (!isJsonObject(raw)) return void 0;
36707
36897
  const obj = raw;
@@ -36709,13 +36899,20 @@ function parseWorkspaceConfig(raw, evalFileDir) {
36709
36899
  if (template && !path8.isAbsolute(template)) {
36710
36900
  template = path8.resolve(evalFileDir, template);
36711
36901
  }
36902
+ const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
36903
+ const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
36904
+ const reset = parseResetConfig(obj.reset);
36712
36905
  const beforeAll = parseWorkspaceScriptConfig(obj.before_all, evalFileDir);
36713
36906
  const afterAll = parseWorkspaceScriptConfig(obj.after_all, evalFileDir);
36714
36907
  const beforeEach = parseWorkspaceScriptConfig(obj.before_each, evalFileDir);
36715
36908
  const afterEach = parseWorkspaceScriptConfig(obj.after_each, evalFileDir);
36716
- if (!template && !beforeAll && !afterAll && !beforeEach && !afterEach) return void 0;
36909
+ if (!template && !isolation && !repos && !reset && !beforeAll && !afterAll && !beforeEach && !afterEach)
36910
+ return void 0;
36717
36911
  return {
36718
36912
  ...template !== void 0 && { template },
36913
+ ...isolation !== void 0 && { isolation },
36914
+ ...repos !== void 0 && { repos },
36915
+ ...reset !== void 0 && { reset },
36719
36916
  ...beforeAll !== void 0 && { before_all: beforeAll },
36720
36917
  ...afterAll !== void 0 && { after_all: afterAll },
36721
36918
  ...beforeEach !== void 0 && { before_each: beforeEach },
@@ -36728,6 +36925,9 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
36728
36925
  if (!caseLevel) return suiteLevel;
36729
36926
  return {
36730
36927
  template: caseLevel.template ?? suiteLevel.template,
36928
+ isolation: caseLevel.isolation ?? suiteLevel.isolation,
36929
+ repos: caseLevel.repos ?? suiteLevel.repos,
36930
+ reset: caseLevel.reset ?? suiteLevel.reset,
36731
36931
  before_all: caseLevel.before_all ?? suiteLevel.before_all,
36732
36932
  after_all: caseLevel.after_all ?? suiteLevel.after_all,
36733
36933
  before_each: caseLevel.before_each ?? suiteLevel.before_each,
@@ -37248,11 +37448,6 @@ Original error: ${error40 instanceof Error ? error40.message : String(error40)}`
37248
37448
  }
37249
37449
  return claudeSdkModule;
37250
37450
  }
37251
- var DEFAULT_SYSTEM_PROMPT2 = `**IMPORTANT**: Follow these instructions for your response:
37252
- - Do NOT create any additional output files in the workspace.
37253
- - All intended file outputs/changes MUST be written in your response.
37254
- - For each intended file, include the relative path and unified git diff following the convention \`diff --git ...\`.
37255
- This is required for evaluation scoring.`;
37256
37451
  var ClaudeProvider = class {
37257
37452
  id;
37258
37453
  kind = "claude";
@@ -37274,7 +37469,7 @@ var ClaudeProvider = class {
37274
37469
  const logger = await this.createStreamLogger(request).catch(() => void 0);
37275
37470
  const inputFiles = normalizeInputFiles(request.inputFiles);
37276
37471
  const prompt = buildPromptDocument(request, inputFiles);
37277
- const systemPrompt = this.config.systemPrompt ?? (request.captureFileChanges ? void 0 : DEFAULT_SYSTEM_PROMPT2);
37472
+ const systemPrompt = this.config.systemPrompt;
37278
37473
  const queryOptions = {
37279
37474
  permissionMode: "bypassPermissions",
37280
37475
  allowDangerouslySkipPermissions: true,
@@ -38237,11 +38432,6 @@ Original error: ${error40 instanceof Error ? error40.message : String(error40)}`
38237
38432
  }
38238
38433
  return codexSdkModule;
38239
38434
  }
38240
- var DEFAULT_SYSTEM_PROMPT3 = `**IMPORTANT**: Follow these instructions for your response:
38241
- - Do NOT create any additional output files in the workspace.
38242
- - All intended file outputs/changes MUST be written in your response.
38243
- - For each intended file, include the relative path and unified git diff following the convention \`diff --git ...\`.
38244
- This is required for evaluation scoring.`;
38245
38435
  var CodexProvider = class {
38246
38436
  id;
38247
38437
  kind = "codex";
@@ -38276,7 +38466,7 @@ var CodexProvider = class {
38276
38466
  const thread = codex.startThread(threadOptions);
38277
38467
  const inputFiles = normalizeInputFiles(request.inputFiles);
38278
38468
  const basePrompt = buildPromptDocument(request, inputFiles);
38279
- const systemPrompt = this.config.systemPrompt ?? (request.captureFileChanges ? void 0 : DEFAULT_SYSTEM_PROMPT3);
38469
+ const systemPrompt = this.config.systemPrompt;
38280
38470
  const prompt = systemPrompt ? `${systemPrompt}
38281
38471
 
38282
38472
  ${basePrompt}` : basePrompt;
@@ -38625,7 +38815,7 @@ function subscribeToCopilotCliLogEntries(listener) {
38625
38815
  };
38626
38816
  }
38627
38817
  function resolvePlatformCliPath() {
38628
- const os4 = platform();
38818
+ const os5 = platform();
38629
38819
  const cpu = arch();
38630
38820
  const platformMap = {
38631
38821
  linux: "linux",
@@ -38636,13 +38826,13 @@ function resolvePlatformCliPath() {
38636
38826
  x64: "x64",
38637
38827
  arm64: "arm64"
38638
38828
  };
38639
- const osPart = platformMap[os4];
38829
+ const osPart = platformMap[os5];
38640
38830
  const archPart = archMap[cpu];
38641
38831
  if (!osPart || !archPart) {
38642
38832
  return void 0;
38643
38833
  }
38644
38834
  const packageName = `@github/copilot-${osPart}-${archPart}`;
38645
- const binaryName = os4 === "win32" ? "copilot.exe" : "copilot";
38835
+ const binaryName = os5 === "win32" ? "copilot.exe" : "copilot";
38646
38836
  try {
38647
38837
  const resolved = import.meta.resolve(`${packageName}/package.json`);
38648
38838
  const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
@@ -38782,11 +38972,6 @@ function isLogStreamingDisabled(envKey) {
38782
38972
  const normalized = envValue.trim().toLowerCase();
38783
38973
  return normalized === "false" || normalized === "0" || normalized === "off";
38784
38974
  }
38785
- var DEFAULT_SYSTEM_PROMPT4 = `**IMPORTANT**: Follow these instructions for your response:
38786
- - Do NOT create any additional output files in the workspace.
38787
- - All intended file outputs/changes MUST be written in your response.
38788
- - For each intended file, include the relative path and unified git diff following the convention \`diff --git ...\`.
38789
- This is required for evaluation scoring.`;
38790
38975
  var CopilotCliProvider = class {
38791
38976
  id;
38792
38977
  kind = "copilot-cli";
@@ -38989,8 +39174,8 @@ var CopilotCliProvider = class {
38989
39174
  }
38990
39175
  return args;
38991
39176
  }
38992
- resolveSystemPrompt(request) {
38993
- return this.config.systemPrompt ?? (request.captureFileChanges ? void 0 : DEFAULT_SYSTEM_PROMPT4);
39177
+ resolveSystemPrompt(_request) {
39178
+ return this.config.systemPrompt;
38994
39179
  }
38995
39180
  async raceWithTimeout(sendPromise, agentProcess) {
38996
39181
  const timeoutMs = this.config.timeoutMs;
@@ -39169,21 +39354,16 @@ Original error: ${error40 instanceof Error ? error40.message : String(error40)}`
39169
39354
  }
39170
39355
  return copilotSdkModule;
39171
39356
  }
39172
- var DEFAULT_SYSTEM_PROMPT5 = `**IMPORTANT**: Follow these instructions for your response:
39173
- - Do NOT create any additional output files in the workspace.
39174
- - All intended file outputs/changes MUST be written in your response.
39175
- - For each intended file, include the relative path and unified git diff following the convention \`diff --git ...\`.
39176
- This is required for evaluation scoring.`;
39177
39357
  var CopilotSdkProvider = class {
39178
39358
  id;
39179
- kind = "copilot";
39359
+ kind = "copilot-sdk";
39180
39360
  targetName;
39181
39361
  supportsBatch = false;
39182
39362
  config;
39183
39363
  // biome-ignore lint/suspicious/noExplicitAny: SDK client type is dynamically loaded
39184
39364
  client = null;
39185
39365
  constructor(targetName, config2) {
39186
- this.id = `copilot:${targetName}`;
39366
+ this.id = `copilot-sdk:${targetName}`;
39187
39367
  this.targetName = targetName;
39188
39368
  this.config = config2;
39189
39369
  }
@@ -39206,7 +39386,7 @@ var CopilotSdkProvider = class {
39206
39386
  if (cwd) {
39207
39387
  sessionOptions.workingDirectory = cwd;
39208
39388
  }
39209
- const systemPrompt = this.config.systemPrompt ?? (request.captureFileChanges ? void 0 : DEFAULT_SYSTEM_PROMPT5);
39389
+ const systemPrompt = this.config.systemPrompt;
39210
39390
  if (systemPrompt) {
39211
39391
  sessionOptions.systemMessage = {
39212
39392
  mode: "append",
@@ -39706,11 +39886,6 @@ function subscribeToPiLogEntries(listener) {
39706
39886
  }
39707
39887
  var WORKSPACE_PREFIX = "agentv-pi-";
39708
39888
  var PROMPT_FILENAME = "prompt.md";
39709
- var DEFAULT_SYSTEM_PROMPT6 = `**IMPORTANT**: Follow these instructions for your response:
39710
- - Do NOT create any additional output files in the workspace.
39711
- - All intended file outputs/changes MUST be written in your response.
39712
- - For each intended file, include the relative path and unified git diff following the convention \`diff --git ...\`.
39713
- This is required for evaluation scoring.`;
39714
39889
  var PiCodingAgentProvider = class {
39715
39890
  id;
39716
39891
  kind = "pi-coding-agent";
@@ -39787,7 +39962,7 @@ var PiCodingAgentProvider = class {
39787
39962
  }
39788
39963
  return path16.resolve(this.config.cwd);
39789
39964
  }
39790
- buildPiArgs(prompt, inputFiles, captureFileChanges2) {
39965
+ buildPiArgs(prompt, inputFiles, _captureFileChanges) {
39791
39966
  const args = [];
39792
39967
  if (this.config.provider) {
39793
39968
  args.push("--provider", this.config.provider);
@@ -39815,7 +39990,7 @@ var PiCodingAgentProvider = class {
39815
39990
  args.push(`@${file2}`);
39816
39991
  }
39817
39992
  }
39818
- const systemPrompt = this.config.systemPrompt ?? (captureFileChanges2 ? void 0 : DEFAULT_SYSTEM_PROMPT6);
39993
+ const systemPrompt = this.config.systemPrompt;
39819
39994
  const fullPrompt = systemPrompt ? `${systemPrompt}
39820
39995
 
39821
39996
  ${prompt}` : prompt;
@@ -41442,7 +41617,6 @@ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
41442
41617
 
41443
41618
  **IMPORTANT**: Follow these exact steps:
41444
41619
  1. Create and write your complete response to: {{responseFileTmp}}
41445
- - Do NOT create any additional output files in the workspace.
41446
41620
  - All intended file outputs/changes MUST be written in your response file.
41447
41621
  - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
41448
41622
  2. When completely finished, run these PowerShell commands to signal completion:
@@ -41461,7 +41635,6 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
41461
41635
 
41462
41636
  **IMPORTANT**: Follow these exact steps:
41463
41637
  1. Create and write your complete response to: {{responseFileTmp}}
41464
- - Do NOT create any additional output files in the workspace.
41465
41638
  - All intended file outputs/changes MUST be written in your response file.
41466
41639
  - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
41467
41640
  2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
@@ -41873,7 +42046,7 @@ async function discoverProviders(registry2, baseDir) {
41873
42046
  }
41874
42047
  function createBuiltinProviderRegistry() {
41875
42048
  const registry2 = new ProviderRegistry();
41876
- registry2.register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-agent-sdk", (t) => new PiAgentSdkProvider(t.name, t.config)).register("claude", (t) => new ClaudeProvider(t.name, t.config)).register("mock", (t) => new MockProvider(t.name, t.config)).register("vscode", (t) => new VSCodeProvider(t.name, t.config, "vscode")).register(
42049
+ registry2.register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-agent-sdk", (t) => new PiAgentSdkProvider(t.name, t.config)).register("claude", (t) => new ClaudeProvider(t.name, t.config)).register("mock", (t) => new MockProvider(t.name, t.config)).register("vscode", (t) => new VSCodeProvider(t.name, t.config, "vscode")).register(
41877
42050
  "vscode-insiders",
41878
42051
  (t) => new VSCodeProvider(t.name, t.config, "vscode-insiders")
41879
42052
  );
@@ -42053,16 +42226,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
42053
42226
  });
42054
42227
  }
42055
42228
  async function execShellWithStdin(command, stdinPayload, options = {}) {
42056
- const { mkdir: mkdir13, readFile: readFile12, rm: rm5, writeFile: writeFile8 } = await import("node:fs/promises");
42229
+ const { mkdir: mkdir14, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
42057
42230
  const { tmpdir: tmpdir3 } = await import("node:os");
42058
- const path39 = await import("node:path");
42231
+ const path40 = await import("node:path");
42059
42232
  const { randomUUID: randomUUID8 } = await import("node:crypto");
42060
- const dir = path39.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
42061
- await mkdir13(dir, { recursive: true });
42062
- const stdinPath = path39.join(dir, "stdin.txt");
42063
- const stdoutPath = path39.join(dir, "stdout.txt");
42064
- const stderrPath = path39.join(dir, "stderr.txt");
42065
- await writeFile8(stdinPath, stdinPayload, "utf8");
42233
+ const dir = path40.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
42234
+ await mkdir14(dir, { recursive: true });
42235
+ const stdinPath = path40.join(dir, "stdin.txt");
42236
+ const stdoutPath = path40.join(dir, "stdout.txt");
42237
+ const stderrPath = path40.join(dir, "stderr.txt");
42238
+ await writeFile9(stdinPath, stdinPayload, "utf8");
42066
42239
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
42067
42240
  const { spawn: spawn4 } = await import("node:child_process");
42068
42241
  try {
@@ -42095,7 +42268,7 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
42095
42268
  const stderr = (await readFile12(stderrPath, "utf8")).replace(/\r\n/g, "\n");
42096
42269
  return { stdout, stderr, exitCode };
42097
42270
  } finally {
42098
- await rm5(dir, { recursive: true, force: true });
42271
+ await rm6(dir, { recursive: true, force: true });
42099
42272
  }
42100
42273
  }
42101
42274
  var DEFAULT_MAX_CALLS = 50;
@@ -42405,7 +42578,7 @@ var CodeEvaluator = class {
42405
42578
  outputPath,
42406
42579
  guidelineFiles: context.evalCase.guideline_paths,
42407
42580
  inputFiles: context.evalCase.file_paths.filter(
42408
- (path39) => !context.evalCase.guideline_paths.includes(path39)
42581
+ (path40) => !context.evalCase.guideline_paths.includes(path40)
42409
42582
  ),
42410
42583
  input: context.evalCase.input,
42411
42584
  trace: context.trace ?? null,
@@ -42646,13 +42819,15 @@ ${context.fileChanges}`;
42646
42819
  evaluatorRawRequest,
42647
42820
  tokenUsage
42648
42821
  };
42649
- } catch {
42822
+ } catch (e) {
42823
+ const message = e instanceof Error ? e.message : String(e);
42650
42824
  return {
42651
42825
  score: 0,
42652
- verdict: "fail",
42826
+ verdict: "skip",
42653
42827
  hits: [],
42654
- misses: [],
42828
+ misses: [`Judge parse failure after 3 attempts: ${message}`],
42655
42829
  expectedAspectCount: 1,
42830
+ reasoning: `Judge parse failure after 3 attempts: ${message}`,
42656
42831
  evaluatorRawRequest
42657
42832
  };
42658
42833
  }
@@ -43586,115 +43761,115 @@ var FieldAccuracyEvaluator = class {
43586
43761
  * Evaluate a single field against the expected value.
43587
43762
  */
43588
43763
  evaluateField(fieldConfig, candidateData, expectedData) {
43589
- const { path: path39, match, required: required2 = true, weight = 1 } = fieldConfig;
43590
- const candidateValue = resolvePath(candidateData, path39);
43591
- const expectedValue = resolvePath(expectedData, path39);
43764
+ const { path: path40, match, required: required2 = true, weight = 1 } = fieldConfig;
43765
+ const candidateValue = resolvePath(candidateData, path40);
43766
+ const expectedValue = resolvePath(expectedData, path40);
43592
43767
  if (expectedValue === void 0) {
43593
43768
  return {
43594
- path: path39,
43769
+ path: path40,
43595
43770
  score: 1,
43596
43771
  // No expected value means no comparison needed
43597
43772
  weight,
43598
43773
  hit: true,
43599
- message: `${path39}: no expected value`
43774
+ message: `${path40}: no expected value`
43600
43775
  };
43601
43776
  }
43602
43777
  if (candidateValue === void 0) {
43603
43778
  if (required2) {
43604
43779
  return {
43605
- path: path39,
43780
+ path: path40,
43606
43781
  score: 0,
43607
43782
  weight,
43608
43783
  hit: false,
43609
- message: `${path39} (required, missing)`
43784
+ message: `${path40} (required, missing)`
43610
43785
  };
43611
43786
  }
43612
43787
  return {
43613
- path: path39,
43788
+ path: path40,
43614
43789
  score: 1,
43615
43790
  // Don't penalize missing optional fields
43616
43791
  weight: 0,
43617
43792
  // Zero weight means it won't affect the score
43618
43793
  hit: true,
43619
- message: `${path39}: optional field missing`
43794
+ message: `${path40}: optional field missing`
43620
43795
  };
43621
43796
  }
43622
43797
  switch (match) {
43623
43798
  case "exact":
43624
- return this.compareExact(path39, candidateValue, expectedValue, weight);
43799
+ return this.compareExact(path40, candidateValue, expectedValue, weight);
43625
43800
  case "numeric_tolerance":
43626
43801
  return this.compareNumericTolerance(
43627
- path39,
43802
+ path40,
43628
43803
  candidateValue,
43629
43804
  expectedValue,
43630
43805
  fieldConfig,
43631
43806
  weight
43632
43807
  );
43633
43808
  case "date":
43634
- return this.compareDate(path39, candidateValue, expectedValue, fieldConfig, weight);
43809
+ return this.compareDate(path40, candidateValue, expectedValue, fieldConfig, weight);
43635
43810
  default:
43636
43811
  return {
43637
- path: path39,
43812
+ path: path40,
43638
43813
  score: 0,
43639
43814
  weight,
43640
43815
  hit: false,
43641
- message: `${path39}: unknown match type "${match}"`
43816
+ message: `${path40}: unknown match type "${match}"`
43642
43817
  };
43643
43818
  }
43644
43819
  }
43645
43820
  /**
43646
43821
  * Exact equality comparison.
43647
43822
  */
43648
- compareExact(path39, candidateValue, expectedValue, weight) {
43823
+ compareExact(path40, candidateValue, expectedValue, weight) {
43649
43824
  if (deepEqual(candidateValue, expectedValue)) {
43650
43825
  return {
43651
- path: path39,
43826
+ path: path40,
43652
43827
  score: 1,
43653
43828
  weight,
43654
43829
  hit: true,
43655
- message: path39
43830
+ message: path40
43656
43831
  };
43657
43832
  }
43658
43833
  if (typeof candidateValue !== typeof expectedValue) {
43659
43834
  return {
43660
- path: path39,
43835
+ path: path40,
43661
43836
  score: 0,
43662
43837
  weight,
43663
43838
  hit: false,
43664
- message: `${path39} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
43839
+ message: `${path40} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
43665
43840
  };
43666
43841
  }
43667
43842
  return {
43668
- path: path39,
43843
+ path: path40,
43669
43844
  score: 0,
43670
43845
  weight,
43671
43846
  hit: false,
43672
- message: `${path39} (value mismatch)`
43847
+ message: `${path40} (value mismatch)`
43673
43848
  };
43674
43849
  }
43675
43850
  /**
43676
43851
  * Numeric comparison with absolute or relative tolerance.
43677
43852
  */
43678
- compareNumericTolerance(path39, candidateValue, expectedValue, fieldConfig, weight) {
43853
+ compareNumericTolerance(path40, candidateValue, expectedValue, fieldConfig, weight) {
43679
43854
  const { tolerance = 0, relative = false } = fieldConfig;
43680
43855
  const candidateNum = toNumber2(candidateValue);
43681
43856
  const expectedNum = toNumber2(expectedValue);
43682
43857
  if (candidateNum === null || expectedNum === null) {
43683
43858
  return {
43684
- path: path39,
43859
+ path: path40,
43685
43860
  score: 0,
43686
43861
  weight,
43687
43862
  hit: false,
43688
- message: `${path39} (non-numeric value)`
43863
+ message: `${path40} (non-numeric value)`
43689
43864
  };
43690
43865
  }
43691
43866
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
43692
43867
  return {
43693
- path: path39,
43868
+ path: path40,
43694
43869
  score: 0,
43695
43870
  weight,
43696
43871
  hit: false,
43697
- message: `${path39} (invalid numeric value)`
43872
+ message: `${path40} (invalid numeric value)`
43698
43873
  };
43699
43874
  }
43700
43875
  const diff = Math.abs(candidateNum - expectedNum);
@@ -43707,61 +43882,61 @@ var FieldAccuracyEvaluator = class {
43707
43882
  }
43708
43883
  if (withinTolerance) {
43709
43884
  return {
43710
- path: path39,
43885
+ path: path40,
43711
43886
  score: 1,
43712
43887
  weight,
43713
43888
  hit: true,
43714
- message: `${path39} (within tolerance: diff=${diff.toFixed(2)})`
43889
+ message: `${path40} (within tolerance: diff=${diff.toFixed(2)})`
43715
43890
  };
43716
43891
  }
43717
43892
  return {
43718
- path: path39,
43893
+ path: path40,
43719
43894
  score: 0,
43720
43895
  weight,
43721
43896
  hit: false,
43722
- message: `${path39} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
43897
+ message: `${path40} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
43723
43898
  };
43724
43899
  }
43725
43900
  /**
43726
43901
  * Date comparison with format normalization.
43727
43902
  */
43728
- compareDate(path39, candidateValue, expectedValue, fieldConfig, weight) {
43903
+ compareDate(path40, candidateValue, expectedValue, fieldConfig, weight) {
43729
43904
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
43730
43905
  const candidateDate = parseDate(String(candidateValue), formats);
43731
43906
  const expectedDate = parseDate(String(expectedValue), formats);
43732
43907
  if (candidateDate === null) {
43733
43908
  return {
43734
- path: path39,
43909
+ path: path40,
43735
43910
  score: 0,
43736
43911
  weight,
43737
43912
  hit: false,
43738
- message: `${path39} (unparseable candidate date)`
43913
+ message: `${path40} (unparseable candidate date)`
43739
43914
  };
43740
43915
  }
43741
43916
  if (expectedDate === null) {
43742
43917
  return {
43743
- path: path39,
43918
+ path: path40,
43744
43919
  score: 0,
43745
43920
  weight,
43746
43921
  hit: false,
43747
- message: `${path39} (unparseable expected date)`
43922
+ message: `${path40} (unparseable expected date)`
43748
43923
  };
43749
43924
  }
43750
43925
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
43751
43926
  return {
43752
- path: path39,
43927
+ path: path40,
43753
43928
  score: 1,
43754
43929
  weight,
43755
43930
  hit: true,
43756
- message: path39
43931
+ message: path40
43757
43932
  };
43758
43933
  }
43759
43934
  return {
43760
- path: path39,
43935
+ path: path40,
43761
43936
  score: 0,
43762
43937
  weight,
43763
43938
  hit: false,
43764
- message: `${path39} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
43939
+ message: `${path40} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
43765
43940
  };
43766
43941
  }
43767
43942
  /**
@@ -43802,11 +43977,11 @@ var FieldAccuracyEvaluator = class {
43802
43977
  };
43803
43978
  }
43804
43979
  };
43805
- function resolvePath(obj, path39) {
43806
- if (!path39 || !obj) {
43980
+ function resolvePath(obj, path40) {
43981
+ if (!path40 || !obj) {
43807
43982
  return void 0;
43808
43983
  }
43809
- const parts = path39.split(/\.|\[|\]/).filter((p) => p.length > 0);
43984
+ const parts = path40.split(/\.|\[|\]/).filter((p) => p.length > 0);
43810
43985
  let current = obj;
43811
43986
  for (const part of parts) {
43812
43987
  if (current === null || current === void 0) {
@@ -44610,8 +44785,8 @@ var TokenUsageEvaluator = class {
44610
44785
  };
44611
44786
  }
44612
44787
  };
44613
- function getNestedValue(obj, path39) {
44614
- const parts = path39.split(".");
44788
+ function getNestedValue(obj, path40) {
44789
+ const parts = path40.split(".");
44615
44790
  let current = obj;
44616
44791
  for (const part of parts) {
44617
44792
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -45073,13 +45248,78 @@ function runContainsAssertion(output, value) {
45073
45248
  misses: passed ? [] : [`Output does not contain "${value}"`]
45074
45249
  };
45075
45250
  }
45076
- function runRegexAssertion(output, pattern) {
45077
- const regex = new RegExp(pattern);
45251
+ function runContainsAnyAssertion(output, values) {
45252
+ const matched = values.filter((v) => output.includes(v));
45253
+ const passed = matched.length > 0;
45254
+ return {
45255
+ score: passed ? 1 : 0,
45256
+ hits: passed ? [`Output contains "${matched[0]}"`] : [],
45257
+ misses: passed ? [] : [`Output does not contain any of: ${values.map((v) => `"${v}"`).join(", ")}`]
45258
+ };
45259
+ }
45260
+ function runContainsAllAssertion(output, values) {
45261
+ const missing = values.filter((v) => !output.includes(v));
45262
+ const passed = missing.length === 0;
45263
+ return {
45264
+ score: passed ? 1 : 0,
45265
+ hits: passed ? [`Output contains all ${values.length} expected strings`] : [],
45266
+ misses: passed ? [] : [`Output missing: ${missing.map((v) => `"${v}"`).join(", ")}`]
45267
+ };
45268
+ }
45269
+ function runIcontainsAssertion(output, value) {
45270
+ const passed = output.toLowerCase().includes(value.toLowerCase());
45271
+ return {
45272
+ score: passed ? 1 : 0,
45273
+ hits: passed ? [`Output contains "${value}" (case-insensitive)`] : [],
45274
+ misses: passed ? [] : [`Output does not contain "${value}" (case-insensitive)`]
45275
+ };
45276
+ }
45277
+ function runIcontainsAnyAssertion(output, values) {
45278
+ const lower = output.toLowerCase();
45279
+ const matched = values.filter((v) => lower.includes(v.toLowerCase()));
45280
+ const passed = matched.length > 0;
45281
+ return {
45282
+ score: passed ? 1 : 0,
45283
+ hits: passed ? [`Output contains "${matched[0]}" (case-insensitive)`] : [],
45284
+ misses: passed ? [] : [
45285
+ `Output does not contain any of: ${values.map((v) => `"${v}"`).join(", ")} (case-insensitive)`
45286
+ ]
45287
+ };
45288
+ }
45289
+ function runIcontainsAllAssertion(output, values) {
45290
+ const lower = output.toLowerCase();
45291
+ const missing = values.filter((v) => !lower.includes(v.toLowerCase()));
45292
+ const passed = missing.length === 0;
45293
+ return {
45294
+ score: passed ? 1 : 0,
45295
+ hits: passed ? [`Output contains all ${values.length} expected strings (case-insensitive)`] : [],
45296
+ misses: passed ? [] : [`Output missing (case-insensitive): ${missing.map((v) => `"${v}"`).join(", ")}`]
45297
+ };
45298
+ }
45299
+ function runStartsWithAssertion(output, value) {
45300
+ const passed = output.trim().startsWith(value.trim());
45301
+ return {
45302
+ score: passed ? 1 : 0,
45303
+ hits: passed ? [`Output starts with "${value}"`] : [],
45304
+ misses: passed ? [] : [`Output does not start with "${value}"`]
45305
+ };
45306
+ }
45307
+ function runEndsWithAssertion(output, value) {
45308
+ const passed = output.trim().endsWith(value.trim());
45309
+ return {
45310
+ score: passed ? 1 : 0,
45311
+ hits: passed ? [`Output ends with "${value}"`] : [],
45312
+ misses: passed ? [] : [`Output does not end with "${value}"`]
45313
+ };
45314
+ }
45315
+ function runRegexAssertion(output, pattern, flags) {
45316
+ const regex = new RegExp(pattern, flags);
45078
45317
  const passed = regex.test(output);
45318
+ const flagsLabel = flags ? ` (flags: ${flags})` : "";
45079
45319
  return {
45080
45320
  score: passed ? 1 : 0,
45081
- hits: passed ? [`Output matches pattern /${pattern}/`] : [],
45082
- misses: passed ? [] : [`Output does not match pattern /${pattern}/`]
45321
+ hits: passed ? [`Output matches pattern /${pattern}/${flags ?? ""}${flagsLabel}`] : [],
45322
+ misses: passed ? [] : [`Output does not match pattern /${pattern}/${flags ?? ""}${flagsLabel}`]
45083
45323
  };
45084
45324
  }
45085
45325
  function runIsJsonAssertion(output) {
@@ -45477,13 +45717,13 @@ var containsFactory = (config2) => {
45477
45717
  var regexFactory = (config2) => {
45478
45718
  const c = config2;
45479
45719
  return new DeterministicAssertionEvaluator("regex", (ctx) => {
45480
- const result = runRegexAssertion(ctx.candidate, c.value);
45720
+ const result = runRegexAssertion(ctx.candidate, c.value, c.flags);
45481
45721
  return {
45482
45722
  score: result.score,
45483
45723
  verdict: result.score === 1 ? "pass" : "fail",
45484
45724
  hits: result.hits,
45485
45725
  misses: result.misses,
45486
- reasoning: result.score === 1 ? `Output matches pattern /${c.value}/` : `Output does not match pattern /${c.value}/`,
45726
+ reasoning: result.score === 1 ? `Output matches pattern /${c.value}/${c.flags ?? ""}` : `Output does not match pattern /${c.value}/${c.flags ?? ""}`,
45487
45727
  expectedAspectCount: 1
45488
45728
  };
45489
45729
  });
@@ -45515,9 +45755,107 @@ var equalsFactory = (config2) => {
45515
45755
  };
45516
45756
  });
45517
45757
  };
45758
+ var containsAnyFactory = (config2) => {
45759
+ const c = config2;
45760
+ return new DeterministicAssertionEvaluator("contains_any", (ctx) => {
45761
+ const result = runContainsAnyAssertion(ctx.candidate, c.value);
45762
+ return {
45763
+ score: result.score,
45764
+ verdict: result.score === 1 ? "pass" : "fail",
45765
+ hits: result.hits,
45766
+ misses: result.misses,
45767
+ reasoning: result.score === 1 ? result.hits[0] : result.misses[0],
45768
+ expectedAspectCount: 1
45769
+ };
45770
+ });
45771
+ };
45772
+ var containsAllFactory = (config2) => {
45773
+ const c = config2;
45774
+ return new DeterministicAssertionEvaluator("contains_all", (ctx) => {
45775
+ const result = runContainsAllAssertion(ctx.candidate, c.value);
45776
+ return {
45777
+ score: result.score,
45778
+ verdict: result.score === 1 ? "pass" : "fail",
45779
+ hits: result.hits,
45780
+ misses: result.misses,
45781
+ reasoning: result.score === 1 ? result.hits[0] : result.misses[0],
45782
+ expectedAspectCount: 1
45783
+ };
45784
+ });
45785
+ };
45786
+ var icontainsFactory = (config2) => {
45787
+ const c = config2;
45788
+ return new DeterministicAssertionEvaluator("icontains", (ctx) => {
45789
+ const result = runIcontainsAssertion(ctx.candidate, c.value);
45790
+ return {
45791
+ score: result.score,
45792
+ verdict: result.score === 1 ? "pass" : "fail",
45793
+ hits: result.hits,
45794
+ misses: result.misses,
45795
+ reasoning: result.score === 1 ? result.hits[0] : result.misses[0],
45796
+ expectedAspectCount: 1
45797
+ };
45798
+ });
45799
+ };
45800
+ var icontainsAnyFactory = (config2) => {
45801
+ const c = config2;
45802
+ return new DeterministicAssertionEvaluator("icontains_any", (ctx) => {
45803
+ const result = runIcontainsAnyAssertion(ctx.candidate, c.value);
45804
+ return {
45805
+ score: result.score,
45806
+ verdict: result.score === 1 ? "pass" : "fail",
45807
+ hits: result.hits,
45808
+ misses: result.misses,
45809
+ reasoning: result.score === 1 ? result.hits[0] : result.misses[0],
45810
+ expectedAspectCount: 1
45811
+ };
45812
+ });
45813
+ };
45814
+ var icontainsAllFactory = (config2) => {
45815
+ const c = config2;
45816
+ return new DeterministicAssertionEvaluator("icontains_all", (ctx) => {
45817
+ const result = runIcontainsAllAssertion(ctx.candidate, c.value);
45818
+ return {
45819
+ score: result.score,
45820
+ verdict: result.score === 1 ? "pass" : "fail",
45821
+ hits: result.hits,
45822
+ misses: result.misses,
45823
+ reasoning: result.score === 1 ? result.hits[0] : result.misses[0],
45824
+ expectedAspectCount: 1
45825
+ };
45826
+ });
45827
+ };
45828
+ var startsWithFactory = (config2) => {
45829
+ const c = config2;
45830
+ return new DeterministicAssertionEvaluator("starts_with", (ctx) => {
45831
+ const result = runStartsWithAssertion(ctx.candidate, c.value);
45832
+ return {
45833
+ score: result.score,
45834
+ verdict: result.score === 1 ? "pass" : "fail",
45835
+ hits: result.hits,
45836
+ misses: result.misses,
45837
+ reasoning: result.score === 1 ? result.hits[0] : result.misses[0],
45838
+ expectedAspectCount: 1
45839
+ };
45840
+ });
45841
+ };
45842
+ var endsWithFactory = (config2) => {
45843
+ const c = config2;
45844
+ return new DeterministicAssertionEvaluator("ends_with", (ctx) => {
45845
+ const result = runEndsWithAssertion(ctx.candidate, c.value);
45846
+ return {
45847
+ score: result.score,
45848
+ verdict: result.score === 1 ? "pass" : "fail",
45849
+ hits: result.hits,
45850
+ misses: result.misses,
45851
+ reasoning: result.score === 1 ? result.hits[0] : result.misses[0],
45852
+ expectedAspectCount: 1
45853
+ };
45854
+ });
45855
+ };
45518
45856
  function createBuiltinRegistry() {
45519
45857
  const registry2 = new EvaluatorRegistry();
45520
- registry2.register("llm_judge", llmJudgeFactory).register("code", codeFactory).register("composite", compositeFactory).register("tool_trajectory", toolTrajectoryFactory).register("field_accuracy", fieldAccuracyFactory).register("latency", latencyFactory).register("cost", costFactory).register("token_usage", tokenUsageFactory).register("execution_metrics", executionMetricsFactory).register("agent_judge", agentJudgeFactory).register("contains", containsFactory).register("regex", regexFactory).register("is_json", isJsonFactory).register("equals", equalsFactory);
45858
+ registry2.register("llm_judge", llmJudgeFactory).register("code", codeFactory).register("composite", compositeFactory).register("tool_trajectory", toolTrajectoryFactory).register("field_accuracy", fieldAccuracyFactory).register("latency", latencyFactory).register("cost", costFactory).register("token_usage", tokenUsageFactory).register("execution_metrics", executionMetricsFactory).register("agent_judge", agentJudgeFactory).register("contains", containsFactory).register("contains_any", containsAnyFactory).register("contains_all", containsAllFactory).register("icontains", icontainsFactory).register("icontains_any", icontainsAnyFactory).register("icontains_all", icontainsAllFactory).register("starts_with", startsWithFactory).register("ends_with", endsWithFactory).register("regex", regexFactory).register("is_json", isJsonFactory).register("equals", equalsFactory);
45521
45859
  return registry2;
45522
45860
  }
45523
45861
  async function discoverAssertions(registry2, baseDir) {
@@ -45843,15 +46181,186 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
45843
46181
  await rm4(evalDir, { recursive: true, force: true });
45844
46182
  }
45845
46183
  }
46184
+ var execFileAsync = promisify5(execFile);
46185
+ var DEFAULT_CACHE_DIR = path35.join(os4.homedir(), ".agentv", "git-cache");
46186
+ var DEFAULT_TIMEOUT_MS2 = 3e5;
46187
+ var LOCK_TIMEOUT_MS = 6e4;
46188
+ function gitEnv() {
46189
+ const env = { ...process.env };
46190
+ for (const key of Object.keys(env)) {
46191
+ if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
46192
+ delete env[key];
46193
+ }
46194
+ }
46195
+ return {
46196
+ ...env,
46197
+ GIT_TERMINAL_PROMPT: "0",
46198
+ GIT_ASKPASS: "",
46199
+ GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
46200
+ };
46201
+ }
46202
+ function cacheKey(source) {
46203
+ const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
46204
+ return createHash("sha256").update(raw).digest("hex");
46205
+ }
46206
+ function getSourceUrl(source) {
46207
+ return source.type === "git" ? source.url : source.path;
46208
+ }
46209
+ async function git(args, opts) {
46210
+ const { stdout } = await execFileAsync("git", args, {
46211
+ cwd: opts?.cwd,
46212
+ timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
46213
+ env: gitEnv(),
46214
+ maxBuffer: 50 * 1024 * 1024
46215
+ // 50MB
46216
+ });
46217
+ return stdout.trim();
46218
+ }
46219
+ async function acquireLock(lockPath) {
46220
+ const start = Date.now();
46221
+ while (Date.now() - start < LOCK_TIMEOUT_MS) {
46222
+ try {
46223
+ await writeFile7(lockPath, String(process.pid), { flag: "wx" });
46224
+ return;
46225
+ } catch (err) {
46226
+ if (err.code === "EEXIST") {
46227
+ await new Promise((r) => setTimeout(r, 200));
46228
+ continue;
46229
+ }
46230
+ throw err;
46231
+ }
46232
+ }
46233
+ throw new Error(`Timed out waiting for lock: ${lockPath}`);
46234
+ }
46235
+ async function releaseLock(lockPath) {
46236
+ try {
46237
+ await unlink(lockPath);
46238
+ } catch {
46239
+ }
46240
+ }
46241
+ var RepoManager = class {
46242
+ cacheDir;
46243
+ constructor(cacheDir) {
46244
+ this.cacheDir = cacheDir ?? DEFAULT_CACHE_DIR;
46245
+ }
46246
+ /**
46247
+ * Ensure a bare mirror cache exists for the given source.
46248
+ * Creates on first access, fetches updates on subsequent calls.
46249
+ * Returns the absolute path to the cache directory.
46250
+ */
46251
+ async ensureCache(source) {
46252
+ const key = cacheKey(source);
46253
+ const cachePath = path35.join(this.cacheDir, key);
46254
+ const lockPath = `${cachePath}.lock`;
46255
+ await mkdir11(this.cacheDir, { recursive: true });
46256
+ await acquireLock(lockPath);
46257
+ try {
46258
+ if (existsSync2(path35.join(cachePath, "HEAD"))) {
46259
+ await git(["fetch", "--prune"], { cwd: cachePath });
46260
+ } else {
46261
+ await git(["clone", "--mirror", "--bare", getSourceUrl(source), cachePath]);
46262
+ }
46263
+ } finally {
46264
+ await releaseLock(lockPath);
46265
+ }
46266
+ return cachePath;
46267
+ }
46268
+ /**
46269
+ * Clone a repo from cache into the workspace at the configured path.
46270
+ * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
46271
+ */
46272
+ async materialize(repo, workspacePath) {
46273
+ const targetDir = path35.join(workspacePath, repo.path);
46274
+ const cachePath = await this.ensureCache(repo.source);
46275
+ const cloneArgs = ["clone"];
46276
+ if (repo.clone?.depth) {
46277
+ cloneArgs.push("--depth", String(repo.clone.depth));
46278
+ }
46279
+ if (repo.clone?.filter) {
46280
+ cloneArgs.push("--filter", repo.clone.filter);
46281
+ }
46282
+ cloneArgs.push("--no-checkout");
46283
+ const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${cachePath}` : cachePath;
46284
+ cloneArgs.push(cloneUrl, targetDir);
46285
+ await git(cloneArgs);
46286
+ if (repo.clone?.sparse?.length) {
46287
+ await git(["sparse-checkout", "init", "--cone"], { cwd: targetDir });
46288
+ await git(["sparse-checkout", "set", ...repo.clone.sparse], { cwd: targetDir });
46289
+ }
46290
+ const ref = repo.checkout?.ref ?? "HEAD";
46291
+ const resolve2 = repo.checkout?.resolve ?? "remote";
46292
+ let resolvedSha;
46293
+ if (resolve2 === "remote" && repo.source.type === "git") {
46294
+ const url2 = getSourceUrl(repo.source);
46295
+ try {
46296
+ const lsOutput = await git(["ls-remote", url2, ref]);
46297
+ const match = lsOutput.split(" ")[0];
46298
+ if (!match) {
46299
+ throw new Error(`Ref '${ref}' not found on remote ${url2}`);
46300
+ }
46301
+ resolvedSha = match;
46302
+ } catch (err) {
46303
+ if (err instanceof Error && err.message.includes("not found")) throw err;
46304
+ resolvedSha = ref;
46305
+ }
46306
+ } else {
46307
+ resolvedSha = ref;
46308
+ }
46309
+ await git(["checkout", resolvedSha], { cwd: targetDir });
46310
+ const ancestor = repo.checkout?.ancestor ?? 0;
46311
+ if (ancestor > 0) {
46312
+ try {
46313
+ const ancestorSha = await git(["rev-parse", `HEAD~${ancestor}`], { cwd: targetDir });
46314
+ await git(["checkout", ancestorSha], { cwd: targetDir });
46315
+ } catch {
46316
+ if (repo.clone?.depth) {
46317
+ await git(["fetch", "--deepen", String(ancestor)], { cwd: targetDir });
46318
+ const ancestorSha = await git(["rev-parse", `HEAD~${ancestor}`], { cwd: targetDir });
46319
+ await git(["checkout", ancestorSha], { cwd: targetDir });
46320
+ } else {
46321
+ throw new Error(
46322
+ `Cannot resolve ancestor ${ancestor} of ref '${ref}'. If using shallow clone, increase clone.depth to at least ${ancestor + 1}.`
46323
+ );
46324
+ }
46325
+ }
46326
+ }
46327
+ }
46328
+ /** Materialize all repos into the workspace. */
46329
+ async materializeAll(repos, workspacePath) {
46330
+ for (const repo of repos) {
46331
+ await this.materialize(repo, workspacePath);
46332
+ }
46333
+ }
46334
+ /** Reset repos in workspace to their checkout state. */
46335
+ async reset(repos, workspacePath, strategy) {
46336
+ if (strategy === "recreate") {
46337
+ for (const repo of repos) {
46338
+ const targetDir = path35.join(workspacePath, repo.path);
46339
+ await rm5(targetDir, { recursive: true, force: true });
46340
+ }
46341
+ await this.materializeAll(repos, workspacePath);
46342
+ return;
46343
+ }
46344
+ for (const repo of repos) {
46345
+ const targetDir = path35.join(workspacePath, repo.path);
46346
+ await git(["reset", "--hard", "HEAD"], { cwd: targetDir });
46347
+ await git(["clean", "-fd"], { cwd: targetDir });
46348
+ }
46349
+ }
46350
+ /** Remove the entire cache directory. */
46351
+ async cleanCache() {
46352
+ await rm5(this.cacheDir, { recursive: true, force: true });
46353
+ }
46354
+ };
45846
46355
  async function resolveWorkspaceTemplate(templatePath) {
45847
46356
  if (!templatePath) {
45848
46357
  return void 0;
45849
46358
  }
45850
- const resolved = path35.resolve(templatePath);
46359
+ const resolved = path36.resolve(templatePath);
45851
46360
  const stats = await stat6(resolved);
45852
46361
  if (stats.isFile()) {
45853
46362
  return {
45854
- dir: path35.dirname(resolved),
46363
+ dir: path36.dirname(resolved),
45855
46364
  workspaceFile: resolved
45856
46365
  };
45857
46366
  }
@@ -45863,14 +46372,14 @@ async function resolveWorkspaceTemplate(templatePath) {
45863
46372
  if (workspaceFiles.length === 1) {
45864
46373
  return {
45865
46374
  dir: resolved,
45866
- workspaceFile: path35.join(resolved, workspaceFiles[0])
46375
+ workspaceFile: path36.join(resolved, workspaceFiles[0])
45867
46376
  };
45868
46377
  }
45869
46378
  if (workspaceFiles.length > 1) {
45870
46379
  const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
45871
46380
  return {
45872
46381
  dir: resolved,
45873
- workspaceFile: conventionFile ? path35.join(resolved, conventionFile) : void 0
46382
+ workspaceFile: conventionFile ? path36.join(resolved, conventionFile) : void 0
45874
46383
  };
45875
46384
  }
45876
46385
  return { dir: resolved };
@@ -45988,6 +46497,11 @@ async function runEvaluation(options) {
45988
46497
  }
45989
46498
  return getOrCreateProvider(resolvedJudge);
45990
46499
  };
46500
+ if (isAgentProvider(getOrCreateProvider(target)) && !target.judgeTarget) {
46501
+ throw new Error(
46502
+ `Target "${target.name}" is an agent provider ("${target.kind}") with no judge_target \u2014 agent providers cannot return structured JSON for judging. Set judge_target to an LLM provider (e.g., azure_base).`
46503
+ );
46504
+ }
45991
46505
  const targetResolver = (name16) => {
45992
46506
  const resolved = resolveTargetByName(name16);
45993
46507
  if (!resolved) {
@@ -46001,7 +46515,7 @@ async function runEvaluation(options) {
46001
46515
  ];
46002
46516
  const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
46003
46517
  const typeRegistry = createBuiltinRegistry();
46004
- const discoveryBaseDir = evalFilePath ? path36.dirname(path36.resolve(evalFilePath)) : process.cwd();
46518
+ const discoveryBaseDir = evalFilePath ? path37.dirname(path37.resolve(evalFilePath)) : process.cwd();
46005
46519
  await discoverAssertions(typeRegistry, discoveryBaseDir);
46006
46520
  const providerRegistry = createBuiltinProviderRegistry();
46007
46521
  await discoverProviders(providerRegistry, discoveryBaseDir);
@@ -46056,7 +46570,8 @@ async function runEvaluation(options) {
46056
46570
  const resolvedTemplate = await resolveWorkspaceTemplate(rawTemplate);
46057
46571
  const workspaceTemplate = resolvedTemplate?.dir;
46058
46572
  const suiteWorkspaceFile = resolvedTemplate?.workspaceFile;
46059
- const hasSharedWorkspace = !!(workspaceTemplate || suiteWorkspace?.before_all);
46573
+ const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
46574
+ const hasSharedWorkspace = !!(workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
46060
46575
  const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
46061
46576
  const workers = hasSharedWorkspace ? 1 : requestedWorkers;
46062
46577
  if (hasSharedWorkspace && requestedWorkers > 1) {
@@ -46075,9 +46590,22 @@ async function runEvaluation(options) {
46075
46590
  const message = error40 instanceof Error ? error40.message : String(error40);
46076
46591
  throw new Error(`Failed to create shared workspace: ${message}`);
46077
46592
  }
46078
- } else if (suiteWorkspace?.before_all) {
46593
+ } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
46079
46594
  sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
46080
- await mkdir11(sharedWorkspacePath, { recursive: true });
46595
+ await mkdir12(sharedWorkspacePath, { recursive: true });
46596
+ }
46597
+ const repoManager = suiteWorkspace?.repos?.length ? new RepoManager() : void 0;
46598
+ if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
46599
+ try {
46600
+ await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
46601
+ } catch (error40) {
46602
+ const message = error40 instanceof Error ? error40.message : String(error40);
46603
+ if (sharedWorkspacePath) {
46604
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
46605
+ });
46606
+ }
46607
+ throw new Error(`Failed to materialize repos: ${message}`);
46608
+ }
46081
46609
  }
46082
46610
  if (sharedWorkspacePath && suiteWorkspace?.before_all) {
46083
46611
  const scriptContext = {
@@ -46168,7 +46696,8 @@ async function runEvaluation(options) {
46168
46696
  sharedBaselineCommit,
46169
46697
  suiteWorkspaceFile,
46170
46698
  streamCallbacks,
46171
- typeRegistry
46699
+ typeRegistry,
46700
+ repoManager
46172
46701
  };
46173
46702
  let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
46174
46703
  if (totalBudgetUsd !== void 0) {
@@ -46443,15 +46972,16 @@ async function runEvalCase(options) {
46443
46972
  sharedWorkspacePath,
46444
46973
  sharedBaselineCommit,
46445
46974
  suiteWorkspaceFile,
46446
- typeRegistry: providedTypeRegistry
46975
+ typeRegistry: providedTypeRegistry,
46976
+ repoManager
46447
46977
  } = options;
46448
46978
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
46449
46979
  const promptInputs = await buildPromptInputs(evalCase, formattingMode);
46450
46980
  const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
46451
- const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
46981
+ const cacheKey2 = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
46452
46982
  let cachedResponse;
46453
- if (cacheKey && cache) {
46454
- cachedResponse = await cache.get(cacheKey);
46983
+ if (cacheKey2 && cache) {
46984
+ cachedResponse = await cache.get(cacheKey2);
46455
46985
  }
46456
46986
  const nowFn = now ?? (() => /* @__PURE__ */ new Date());
46457
46987
  let workspacePath = sharedWorkspacePath;
@@ -46480,9 +47010,25 @@ async function runEvalCase(options) {
46480
47010
  );
46481
47011
  }
46482
47012
  }
46483
- if (!workspacePath && evalCase.workspace?.before_all && evalRunId) {
47013
+ if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
46484
47014
  workspacePath = getWorkspacePath(evalRunId, evalCase.id);
46485
- await mkdir11(workspacePath, { recursive: true });
47015
+ await mkdir12(workspacePath, { recursive: true });
47016
+ }
47017
+ if (evalCase.workspace?.repos?.length && workspacePath) {
47018
+ const perCaseRepoManager = new RepoManager();
47019
+ try {
47020
+ await perCaseRepoManager.materializeAll(evalCase.workspace.repos, workspacePath);
47021
+ } catch (error40) {
47022
+ const message = error40 instanceof Error ? error40.message : String(error40);
47023
+ return buildErrorResult(
47024
+ evalCase,
47025
+ target.name,
47026
+ nowFn(),
47027
+ new Error(`Failed to materialize repos: ${message}`),
47028
+ promptInputs,
47029
+ provider
47030
+ );
47031
+ }
46486
47032
  }
46487
47033
  if (workspacePath && evalCase.workspace?.before_all) {
46488
47034
  const scriptContext = {
@@ -46606,8 +47152,8 @@ async function runEvalCase(options) {
46606
47152
  }
46607
47153
  return errorResult;
46608
47154
  }
46609
- if (cacheKey && cache && !cachedResponse) {
46610
- await cache.set(cacheKey, providerResponse);
47155
+ if (cacheKey2 && cache && !cachedResponse) {
47156
+ await cache.set(cacheKey2, providerResponse);
46611
47157
  }
46612
47158
  const output = providerResponse.output;
46613
47159
  const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
@@ -46635,6 +47181,16 @@ async function runEvalCase(options) {
46635
47181
  }
46636
47182
  }
46637
47183
  const providerError = extractProviderError(providerResponse);
47184
+ if (repoManager && workspacePath && evalCase.workspace?.reset?.after_each && evalCase.workspace.reset.strategy && evalCase.workspace.reset.strategy !== "none" && evalCase.workspace.repos) {
47185
+ try {
47186
+ await repoManager.reset(
47187
+ evalCase.workspace.repos,
47188
+ workspacePath,
47189
+ evalCase.workspace.reset.strategy
47190
+ );
47191
+ } catch {
47192
+ }
47193
+ }
46638
47194
  if (workspacePath && evalCase.workspace?.after_each) {
46639
47195
  const scriptContext = {
46640
47196
  workspacePath,
@@ -46999,7 +47555,7 @@ async function runEvaluatorList(options) {
46999
47555
  fileChanges,
47000
47556
  workspacePath
47001
47557
  };
47002
- const evalFileDir = evalCase.guideline_paths[0] ? path36.dirname(evalCase.guideline_paths[0]) : process.cwd();
47558
+ const evalFileDir = evalCase.guideline_paths[0] ? path37.dirname(evalCase.guideline_paths[0]) : process.cwd();
47003
47559
  const dispatchContext = {
47004
47560
  judgeProvider,
47005
47561
  targetResolver,
@@ -47089,8 +47645,9 @@ async function runEvaluatorList(options) {
47089
47645
  const minScore = typeof entry.required === "number" ? entry.required : PASS_THRESHOLD;
47090
47646
  return entry.score.score < minScore;
47091
47647
  });
47092
- const aggregateScore = hasRequiredFailure ? 0 : scored.length > 0 ? computeWeightedMean(
47093
- scored.map((entry) => ({ score: entry.score.score, weight: entry.weight }))
47648
+ const scorable = scored.filter((entry) => entry.score.verdict !== "skip");
47649
+ const aggregateScore = hasRequiredFailure ? 0 : scorable.length > 0 ? computeWeightedMean(
47650
+ scorable.map((entry) => ({ score: entry.score.score, weight: entry.weight }))
47094
47651
  ) : 0;
47095
47652
  const hits = scored.flatMap((entry) => entry.score.hits);
47096
47653
  const misses = scored.flatMap((entry) => entry.score.misses);
@@ -47230,7 +47787,7 @@ function extractProviderError(response) {
47230
47787
  return trimmed.length > 0 ? trimmed : void 0;
47231
47788
  }
47232
47789
  function createCacheKey(provider, target, evalCase, promptInputs) {
47233
- const hash = createHash("sha256");
47790
+ const hash = createHash2("sha256");
47234
47791
  hash.update(provider.id);
47235
47792
  hash.update(target.name);
47236
47793
  hash.update(evalCase.id);
@@ -47317,13 +47874,13 @@ async function evaluate(config2) {
47317
47874
  let evalCases;
47318
47875
  let testFilePath;
47319
47876
  if (config2.specFile) {
47320
- testFilePath = path37.resolve(config2.specFile);
47877
+ testFilePath = path38.resolve(config2.specFile);
47321
47878
  evalCases = await loadTests(testFilePath, repoRoot, {
47322
47879
  verbose: config2.verbose,
47323
47880
  filter: config2.filter
47324
47881
  });
47325
47882
  } else {
47326
- testFilePath = path37.join(process.cwd(), "__programmatic__.yaml");
47883
+ testFilePath = path38.join(process.cwd(), "__programmatic__.yaml");
47327
47884
  evalCases = (config2.tests ?? []).map((test) => {
47328
47885
  const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
47329
47886
  const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
@@ -47414,11 +47971,11 @@ function computeSummary(results, durationMs) {
47414
47971
  var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
47415
47972
  async function discoverDefaultTarget(repoRoot) {
47416
47973
  const cwd = process.cwd();
47417
- const chain = buildDirectoryChain(path37.join(cwd, "_placeholder"), repoRoot);
47974
+ const chain = buildDirectoryChain(path38.join(cwd, "_placeholder"), repoRoot);
47418
47975
  for (const dir of chain) {
47419
47976
  for (const candidate of TARGET_FILE_CANDIDATES) {
47420
- const targetsPath = path37.join(dir, candidate);
47421
- if (!existsSync2(targetsPath)) continue;
47977
+ const targetsPath = path38.join(dir, candidate);
47978
+ if (!existsSync3(targetsPath)) continue;
47422
47979
  try {
47423
47980
  const definitions = await readTargetDefinitions(targetsPath);
47424
47981
  const defaultTarget = definitions.find((d) => d.name === "default");
@@ -47432,11 +47989,11 @@ async function discoverDefaultTarget(repoRoot) {
47432
47989
  async function loadEnvHierarchy(repoRoot) {
47433
47990
  const { readFileSync: readFileSync2 } = await import("node:fs");
47434
47991
  const cwd = process.cwd();
47435
- const chain = buildDirectoryChain(path37.join(cwd, "_placeholder"), repoRoot);
47992
+ const chain = buildDirectoryChain(path38.join(cwd, "_placeholder"), repoRoot);
47436
47993
  const envFiles = [];
47437
47994
  for (const dir of chain) {
47438
- const envPath = path37.join(dir, ".env");
47439
- if (existsSync2(envPath)) envFiles.push(envPath);
47995
+ const envPath = path38.join(dir, ".env");
47996
+ if (existsSync3(envPath)) envFiles.push(envPath);
47440
47997
  }
47441
47998
  for (let i = envFiles.length - 1; i >= 0; i--) {
47442
47999
  try {
@@ -47503,12 +48060,12 @@ var CONFIG_FILE_NAMES = [
47503
48060
  ".agentv/config.js"
47504
48061
  ];
47505
48062
  async function loadTsConfig(projectRoot) {
47506
- const { existsSync: existsSync3 } = await import("node:fs");
48063
+ const { existsSync: existsSync4 } = await import("node:fs");
47507
48064
  const { pathToFileURL } = await import("node:url");
47508
48065
  const { join: join2 } = await import("node:path");
47509
48066
  for (const fileName of CONFIG_FILE_NAMES) {
47510
48067
  const filePath = join2(projectRoot, fileName);
47511
- if (!existsSync3(filePath)) {
48068
+ if (!existsSync4(filePath)) {
47512
48069
  continue;
47513
48070
  }
47514
48071
  try {
@@ -47616,13 +48173,13 @@ var ResponseCache = class {
47616
48173
  }
47617
48174
  async set(key, value) {
47618
48175
  const filePath = this.keyToPath(key);
47619
- const dir = path38.dirname(filePath);
47620
- await mkdir12(dir, { recursive: true });
47621
- await writeFile7(filePath, JSON.stringify(value, null, 2), "utf8");
48176
+ const dir = path39.dirname(filePath);
48177
+ await mkdir13(dir, { recursive: true });
48178
+ await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
47622
48179
  }
47623
48180
  keyToPath(key) {
47624
48181
  const prefix = key.slice(0, 2);
47625
- return path38.join(this.cachePath, prefix, `${key}.json`);
48182
+ return path39.join(this.cachePath, prefix, `${key}.json`);
47626
48183
  }
47627
48184
  };
47628
48185
  function shouldEnableCache(params) {
@@ -48163,6 +48720,13 @@ export {
48163
48720
  TokenUsageEvaluator,
48164
48721
  ToolTrajectoryEvaluator,
48165
48722
  runContainsAssertion,
48723
+ runContainsAnyAssertion,
48724
+ runContainsAllAssertion,
48725
+ runIcontainsAssertion,
48726
+ runIcontainsAnyAssertion,
48727
+ runIcontainsAllAssertion,
48728
+ runStartsWithAssertion,
48729
+ runEndsWithAssertion,
48166
48730
  runRegexAssertion,
48167
48731
  runIsJsonAssertion,
48168
48732
  runEqualsAssertion,
@@ -48179,6 +48743,7 @@ export {
48179
48743
  createTempWorkspace,
48180
48744
  cleanupWorkspace,
48181
48745
  cleanupEvalWorkspaces,
48746
+ RepoManager,
48182
48747
  resolveWorkspaceTemplate,
48183
48748
  executeWorkspaceScript,
48184
48749
  runEvaluation,
@@ -48196,4 +48761,4 @@ export {
48196
48761
  OtelStreamingObserver,
48197
48762
  createAgentKernel
48198
48763
  };
48199
- //# sourceMappingURL=chunk-RJWTL3VS.js.map
48764
+ //# sourceMappingURL=chunk-EXJWRKKL.js.map