@tangle-network/agent-eval 0.20.8 → 0.20.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,3 +1,8 @@
1
+ import {
2
+ BENCHMARK_SPLIT_SEED,
3
+ benchmarks_exports,
4
+ deterministicSplit
5
+ } from "./chunk-42I2QC2L.js";
1
6
  import {
2
7
  LlmCallError,
3
8
  LlmClient,
@@ -6,9 +11,7 @@ import {
6
11
  probeLlm,
7
12
  stripFencedJson
8
13
  } from "./chunk-JAOLXRIA.js";
9
- import {
10
- __export
11
- } from "./chunk-PZ5AY32C.js";
14
+ import "./chunk-PZ5AY32C.js";
12
15
 
13
16
  // src/client.ts
14
17
  var ProductClient = class {
@@ -649,9 +652,9 @@ function feedbackTrajectoryToOptimizerRow(trajectory) {
649
652
  function feedbackTrajectoriesToOptimizerRows(trajectories) {
650
653
  return trajectories.map(feedbackTrajectoryToOptimizerRow);
651
654
  }
652
- async function replayFeedbackTrajectory(trajectory, adapter2) {
655
+ async function replayFeedbackTrajectory(trajectory, adapter) {
653
656
  try {
654
- const result = await adapter2.replay(trajectory);
657
+ const result = await adapter.replay(trajectory);
655
658
  return {
656
659
  trajectoryId: trajectory.id,
657
660
  ...result
@@ -680,10 +683,10 @@ async function replayFeedbackTrajectory(trajectory, adapter2) {
680
683
  };
681
684
  }
682
685
  }
683
- async function replayFeedbackTrajectories(trajectories, adapter2) {
686
+ async function replayFeedbackTrajectories(trajectories, adapter) {
684
687
  const results = [];
685
688
  for (const trajectory of trajectories) {
686
- results.push(await replayFeedbackTrajectory(trajectory, adapter2));
689
+ results.push(await replayFeedbackTrajectory(trajectory, adapter));
687
690
  }
688
691
  return results;
689
692
  }
@@ -2342,7 +2345,7 @@ var DEFAULT_BUDGET = {
2342
2345
  maxWallMs: 5 * 60 * 1e3
2343
2346
  };
2344
2347
  async function runAgentControlLoop(config) {
2345
- const budget = { ...DEFAULT_BUDGET, ...config.budget };
2348
+ const budget = normalizeBudget(config.budget);
2346
2349
  const actionFailure = config.actionFailure ?? "continue";
2347
2350
  const controller = new AbortController();
2348
2351
  const upstreamAbort = () => controller.abort(config.signal?.reason);
@@ -2379,12 +2382,13 @@ async function runAgentControlLoop(config) {
2379
2382
  try {
2380
2383
  state = await config.observe({ history, abortSignal: controller.signal });
2381
2384
  } catch (err) {
2382
- runtimeErrors.push(runtimeError("observe", 0, err));
2385
+ const error = runtimeError("observe", 0, err);
2386
+ runtimeErrors.push(error);
2383
2387
  return finish(emitter, {
2384
2388
  intent: config.intent,
2385
2389
  pass: false,
2386
2390
  completed: false,
2387
- reason: runtimeErrors[0].message,
2391
+ reason: error.message,
2388
2392
  steps: history,
2389
2393
  finalState: void 0,
2390
2394
  finalEvals: [],
@@ -2400,12 +2404,13 @@ async function runAgentControlLoop(config) {
2400
2404
  evals = await config.validate({ intent: config.intent, state, history, abortSignal: controller.signal });
2401
2405
  await recordEvalSpans(emitter, evals, "initial", runtimeErrors, 0);
2402
2406
  } catch (err) {
2403
- runtimeErrors.push(runtimeError("validate", 0, err));
2407
+ const error = runtimeError("validate", 0, err);
2408
+ runtimeErrors.push(error);
2404
2409
  return finish(emitter, {
2405
2410
  intent: config.intent,
2406
2411
  pass: false,
2407
2412
  completed: false,
2408
- reason: runtimeErrors[0].message,
2413
+ reason: error.message,
2409
2414
  steps: history,
2410
2415
  finalState: state,
2411
2416
  finalEvals: [],
@@ -2575,13 +2580,14 @@ async function runAgentControlLoop(config) {
2575
2580
  let actionOutcome;
2576
2581
  try {
2577
2582
  const result = await config.act(decision.action, ctx);
2578
- const costUsd = config.getActionCostUsd?.({
2583
+ const rawCostUsd = config.getActionCostUsd?.({
2579
2584
  action: decision.action,
2580
2585
  result,
2581
2586
  state,
2582
2587
  evals,
2583
2588
  history
2584
2589
  });
2590
+ const costUsd = normalizeActionCostUsd(rawCostUsd, runtimeErrors, stepIndex);
2585
2591
  if (costUsd !== void 0 && Number.isFinite(costUsd) && costUsd > 0) {
2586
2592
  spentCostUsd += costUsd;
2587
2593
  await recordCostBudget(emitter, budget, spentCostUsd, stepHandle, runtimeErrors, stepIndex);
@@ -2874,6 +2880,34 @@ function objectiveEval(input) {
2874
2880
  function subjectiveEval(input) {
2875
2881
  return { ...input, objective: false };
2876
2882
  }
2883
+ function normalizeBudget(input) {
2884
+ const raw = { ...DEFAULT_BUDGET, ...input };
2885
+ if (!Number.isInteger(raw.maxSteps) || raw.maxSteps < 1) {
2886
+ throw new RangeError(`ControlRuntime budget.maxSteps must be an integer >= 1, got ${String(raw.maxSteps)}`);
2887
+ }
2888
+ const budget = { maxSteps: raw.maxSteps };
2889
+ if (raw.maxWallMs !== void 0) {
2890
+ if (typeof raw.maxWallMs !== "number" || !Number.isFinite(raw.maxWallMs) || raw.maxWallMs <= 0) {
2891
+ throw new RangeError(`ControlRuntime budget.maxWallMs must be a positive finite number, got ${String(raw.maxWallMs)}`);
2892
+ }
2893
+ budget.maxWallMs = raw.maxWallMs;
2894
+ }
2895
+ if (raw.maxCostUsd !== void 0) {
2896
+ if (typeof raw.maxCostUsd !== "number" || !Number.isFinite(raw.maxCostUsd) || raw.maxCostUsd < 0) {
2897
+ throw new RangeError(`ControlRuntime budget.maxCostUsd must be a nonnegative finite number, got ${String(raw.maxCostUsd)}`);
2898
+ }
2899
+ budget.maxCostUsd = raw.maxCostUsd;
2900
+ }
2901
+ return budget;
2902
+ }
2903
+ function normalizeActionCostUsd(costUsd, runtimeErrors, stepIndex) {
2904
+ if (costUsd === void 0) return void 0;
2905
+ if (!Number.isFinite(costUsd) || costUsd < 0) {
2906
+ runtimeErrors.push(runtimeError("act", stepIndex, new Error(`invalid action costUsd: ${String(costUsd)}`)));
2907
+ return void 0;
2908
+ }
2909
+ return costUsd;
2910
+ }
2877
2911
  function allCriticalPassed(evals) {
2878
2912
  return evals.every((result) => result.passed || result.severity !== "critical" && result.severity !== "error");
2879
2913
  }
@@ -3124,7 +3158,7 @@ function isRequirementMissing(requirement, now) {
3124
3158
  function isExpired(requirement, now) {
3125
3159
  if (!requirement.validUntil) return false;
3126
3160
  const deadline = Date.parse(requirement.validUntil);
3127
- if (!Number.isFinite(deadline)) return false;
3161
+ if (!Number.isFinite(deadline)) return true;
3128
3162
  return deadline <= now.getTime();
3129
3163
  }
3130
3164
  function isBlockingGap(requirement) {
@@ -3133,11 +3167,11 @@ function isBlockingGap(requirement) {
3133
3167
  function chooseRecommendedAction(blocking, nonBlocking) {
3134
3168
  const gaps = blocking.length > 0 ? blocking : nonBlocking;
3135
3169
  if (gaps.length === 0) return "run_agent";
3136
- if (blocking.some((gap) => gap.acquisitionMode === "ask_user" || gap.fallbackPolicy === "ask")) return "ask_user";
3137
- if (blocking.some((gap) => gap.acquisitionMode === "query_connector")) return "query_connectors";
3138
- if (blocking.some((gap) => gap.acquisitionMode === "inspect_repo" || gap.acquisitionMode === "run_command")) return "inspect_repo";
3139
- if (blocking.some((gap) => gap.acquisitionMode === "search_web")) return "collect_web_data";
3140
- if (blocking.some((gap) => gap.acquisitionMode === "not_available")) return "abort_or_rescope";
3170
+ if (gaps.some((gap) => gap.acquisitionMode === "ask_user" || gap.fallbackPolicy === "ask")) return "ask_user";
3171
+ if (gaps.some((gap) => gap.acquisitionMode === "query_connector")) return "query_connectors";
3172
+ if (gaps.some((gap) => gap.acquisitionMode === "inspect_repo" || gap.acquisitionMode === "run_command")) return "inspect_repo";
3173
+ if (gaps.some((gap) => gap.acquisitionMode === "search_web")) return "collect_web_data";
3174
+ if (gaps.some((gap) => gap.acquisitionMode === "not_available")) return "abort_or_rescope";
3141
3175
  if (nonBlocking.some((gap) => gap.importance === "high")) return "build_domain_wiki";
3142
3176
  return "continue_with_caveat";
3143
3177
  }
@@ -4074,12 +4108,15 @@ var DEFAULT_RUN_SCORE_WEIGHTS = {
4074
4108
  };
4075
4109
  function aggregateRunScore(score, weights = {}) {
4076
4110
  const w = { ...DEFAULT_RUN_SCORE_WEIGHTS, ...weights };
4077
- return w.success * clamp012(score.success) + w.goalProgress * clamp012(score.goalProgress) + w.repoGroundedness * clamp012(score.repoGroundedness) + w.driftPenalty * clamp012(score.driftPenalty) + w.toolUseQuality * clamp012(score.toolUseQuality) + w.patchQuality * clamp012(score.patchQuality) + w.testReality * clamp012(score.testReality) + w.finalGate * clamp012(score.finalGate) + w.reviewerBlockers * clamp012(score.reviewerBlockers) + w.costUsd * Math.max(0, score.costUsd) + w.wallSeconds * Math.max(0, score.wallSeconds / 60);
4111
+ return w.success * clamp012(score.success) + w.goalProgress * clamp012(score.goalProgress) + w.repoGroundedness * clamp012(score.repoGroundedness) + w.driftPenalty * clamp012(score.driftPenalty) + w.toolUseQuality * clamp012(score.toolUseQuality) + w.patchQuality * clamp012(score.patchQuality) + w.testReality * clamp012(score.testReality) + w.finalGate * clamp012(score.finalGate) + w.reviewerBlockers * clamp012(score.reviewerBlockers) + w.costUsd * Math.max(0, finiteOrZero(score.costUsd)) + w.wallSeconds * Math.max(0, finiteOrZero(score.wallSeconds) / 60);
4078
4112
  }
4079
4113
  function clamp012(value) {
4080
4114
  if (!Number.isFinite(value)) return 0;
4081
4115
  return Math.max(0, Math.min(1, value));
4082
4116
  }
4117
+ function finiteOrZero(value) {
4118
+ return Number.isFinite(value) ? value : 0;
4119
+ }
4083
4120
 
4084
4121
  // src/run-critic.ts
4085
4122
  var DEFAULT_DRIFT_PATTERNS = [
@@ -4286,13 +4323,15 @@ var AxGepaSteeringOptimizer = class {
4286
4323
  const compiled = await optimizer.compile(
4287
4324
  selector,
4288
4325
  train,
4289
- (({ prediction, example }) => prediction?.variantId === example?.variantId ? 1 : 0),
4326
+ ({ prediction, example }) => prediction?.variantId === example?.variantId ? 1 : 0,
4290
4327
  {
4291
4328
  validationExamples: validation,
4292
4329
  maxMetricCalls: 64
4293
4330
  }
4294
4331
  );
4295
- selector.applyOptimization(compiled.optimizedProgram);
4332
+ if (compiled.optimizedProgram !== void 0) {
4333
+ selector.applyOptimization(compiled.optimizedProgram);
4334
+ }
4296
4335
  return {
4297
4336
  ...fallback,
4298
4337
  backend: "ax-gepa",
@@ -10410,20 +10449,20 @@ function mergeLayerResults(name, perAdapter, options = {}) {
10410
10449
  let durationMs = 0;
10411
10450
  const reasonParts = [];
10412
10451
  const diagnostics = {};
10413
- for (const { adapter: adapter2, result } of perAdapter) {
10452
+ for (const { adapter, result } of perAdapter) {
10414
10453
  status = worst(status, result.status);
10415
10454
  if (typeof result.score === "number") {
10416
10455
  weightedScoreSum += result.score;
10417
10456
  weightCount += 1;
10418
10457
  }
10419
10458
  durationMs = mergeDuration === "sum" ? durationMs + result.durationMs : Math.max(durationMs, result.durationMs);
10420
- reasonParts.push(`${adapter2}: ${result.status}`);
10459
+ reasonParts.push(`${adapter}: ${result.status}`);
10421
10460
  for (const f2 of result.findings) {
10422
10461
  findings.push({
10423
10462
  ...f2,
10424
10463
  layer: name,
10425
- message: prefix ? `${prefix(adapter2)} ${f2.message}` : f2.message,
10426
- detail: { ...f2.detail ?? {}, adapter: adapter2 }
10464
+ message: prefix ? `${prefix(adapter)} ${f2.message}` : f2.message,
10465
+ detail: { ...f2.detail ?? {}, adapter }
10427
10466
  });
10428
10467
  }
10429
10468
  for (const [k, v] of Object.entries(result.diagnostics ?? {})) {
@@ -10442,8 +10481,8 @@ function mergeLayerResults(name, perAdapter, options = {}) {
10442
10481
  reason: reasonParts.join(" \xB7 "),
10443
10482
  diagnostics: Object.keys(diagnostics).length > 0 ? diagnostics : void 0,
10444
10483
  detail: {
10445
- adapters: perAdapter.map(({ adapter: adapter2, result }) => ({
10446
- adapter: adapter2,
10484
+ adapters: perAdapter.map(({ adapter, result }) => ({
10485
+ adapter,
10447
10486
  status: result.status,
10448
10487
  score: result.score ?? null
10449
10488
  })),
@@ -10469,10 +10508,10 @@ function multiToolchainLayer(config) {
10469
10508
  reason: "no adapters detected"
10470
10509
  };
10471
10510
  }
10472
- const runOne = async (adapter2) => {
10473
- const adapterName = config.adapterName(adapter2);
10511
+ const runOne = async (adapter) => {
10512
+ const adapterName = config.adapterName(adapter);
10474
10513
  try {
10475
- const r = await config.run(adapter2, ctx);
10514
+ const r = await config.run(adapter, ctx);
10476
10515
  return { adapter: adapterName, result: r };
10477
10516
  } catch (err) {
10478
10517
  return {
@@ -11908,8 +11947,8 @@ function formatPct(value) {
11908
11947
  function bySplitOrder(a, b) {
11909
11948
  return ALL_SPLITS.indexOf(a) - ALL_SPLITS.indexOf(b);
11910
11949
  }
11911
- function runAdapter(adapter2, scenario, context) {
11912
- return typeof adapter2 === "function" ? adapter2(scenario, context) : adapter2.run(scenario, context);
11950
+ function runAdapter(adapter, scenario, context) {
11951
+ return typeof adapter === "function" ? adapter(scenario, context) : adapter.run(scenario, context);
11913
11952
  }
11914
11953
  function throwIfAborted(signal) {
11915
11954
  if (!signal?.aborted) return;
@@ -12325,6 +12364,24 @@ function fmt2(x) {
12325
12364
  }
12326
12365
 
12327
12366
  // src/researcher.ts
12367
+ var CallbackResearcher = class {
12368
+ constructor(callbacks) {
12369
+ this.callbacks = callbacks;
12370
+ }
12371
+ callbacks;
12372
+ inspectFailures(runs) {
12373
+ return this.callbacks.inspectFailures(runs);
12374
+ }
12375
+ proposeChange(failures) {
12376
+ return this.callbacks.proposeChange(failures);
12377
+ }
12378
+ applyChange(changes, baseline) {
12379
+ return this.callbacks.applyChange(changes, baseline);
12380
+ }
12381
+ evaluateChange(plan) {
12382
+ return this.callbacks.evaluateChange(plan);
12383
+ }
12384
+ };
12328
12385
  var NoopResearcher = class {
12329
12386
  hint;
12330
12387
  constructor(hint = "NoopResearcher: no implementation wired") {
@@ -12777,214 +12834,6 @@ function mean7(xs) {
12777
12834
  return xs.reduce((s, x) => s + x, 0) / xs.length;
12778
12835
  }
12779
12836
 
12780
- // src/benchmarks/types.ts
12781
- function fnv1a32(input) {
12782
- let h = 2166136261;
12783
- for (let i = 0; i < input.length; i++) {
12784
- h ^= input.charCodeAt(i) & 255;
12785
- h = h + ((h << 1) + (h << 4) + (h << 7) + (h << 8) + (h << 24)) >>> 0;
12786
- }
12787
- return h >>> 0;
12788
- }
12789
- var BENCHMARK_SPLIT_SEED = "agent-eval-v1";
12790
- function deterministicSplit(itemId, seed = BENCHMARK_SPLIT_SEED) {
12791
- const h = fnv1a32(`${seed}::${itemId}`);
12792
- const pos = h / 4294967296;
12793
- if (pos < 0.6) return "search";
12794
- if (pos < 0.8) return "dev";
12795
- return "holdout";
12796
- }
12797
-
12798
- // src/benchmarks/index.ts
12799
- var benchmarks_exports = {};
12800
- __export(benchmarks_exports, {
12801
- BENCHMARK_SPLIT_SEED: () => BENCHMARK_SPLIT_SEED,
12802
- deterministicSplit: () => deterministicSplit,
12803
- routing: () => routing_exports
12804
- });
12805
-
12806
- // src/benchmarks/routing/index.ts
12807
- var routing_exports = {};
12808
- __export(routing_exports, {
12809
- ROUTING_DATASET: () => ROUTING_DATASET,
12810
- RoutingAdapter: () => RoutingAdapter,
12811
- assignSplit: () => assignSplit,
12812
- evaluate: () => evaluate,
12813
- extractRouteTokens: () => extractRouteTokens,
12814
- loadDataset: () => loadDataset
12815
- });
12816
-
12817
- // src/benchmarks/routing/dataset.ts
12818
- var ROUTING_DATASET = [
12819
- {
12820
- id: "file_001",
12821
- category: "file",
12822
- prompt: "Save the meeting notes to /tmp/notes-2025-04.md as markdown.",
12823
- route: "fs.write",
12824
- synonyms: ["filesystem.write", "write_file"],
12825
- hardNegatives: ["fs.read", "chat.reply"]
12826
- },
12827
- {
12828
- id: "file_002",
12829
- category: "file",
12830
- prompt: "Read the contents of /etc/hosts and summarize the entries.",
12831
- route: "fs.read",
12832
- synonyms: ["filesystem.read", "read_file"],
12833
- hardNegatives: ["fs.write", "search.web"]
12834
- },
12835
- {
12836
- id: "file_003",
12837
- category: "file",
12838
- prompt: "List every Python file under src/ recursively.",
12839
- route: "fs.list",
12840
- synonyms: ["filesystem.list", "list_files"],
12841
- hardNegatives: ["fs.read", "search.code"]
12842
- },
12843
- {
12844
- id: "file_004",
12845
- category: "file",
12846
- prompt: "Delete the cached build at .turbo/cache.",
12847
- route: "fs.delete",
12848
- synonyms: ["filesystem.delete", "remove_file"],
12849
- hardNegatives: ["fs.write", "fs.list"]
12850
- },
12851
- {
12852
- id: "math_001",
12853
- category: "math",
12854
- prompt: "What is the integral of 3x^2 + 2x from 0 to 5?",
12855
- route: "math.integral",
12856
- synonyms: ["calculator.integral", "math.solve"],
12857
- hardNegatives: ["math.derivative", "chat.reply"]
12858
- },
12859
- {
12860
- id: "math_002",
12861
- category: "math",
12862
- prompt: "Compute the derivative of sin(x) * cos(x).",
12863
- route: "math.derivative",
12864
- synonyms: ["calculator.derivative", "math.solve"],
12865
- hardNegatives: ["math.integral", "math.algebra"]
12866
- },
12867
- {
12868
- id: "math_003",
12869
- category: "math",
12870
- prompt: "Solve 2x + 7 = 19 for x.",
12871
- route: "math.algebra",
12872
- synonyms: ["calculator.algebra", "math.solve"],
12873
- hardNegatives: ["math.derivative", "math.integral"]
12874
- },
12875
- {
12876
- id: "math_004",
12877
- category: "math",
12878
- prompt: "What is the prime factorization of 360?",
12879
- route: "math.numbertheory",
12880
- synonyms: ["calculator.factor", "math.solve"],
12881
- hardNegatives: ["math.algebra", "search.web"]
12882
- },
12883
- {
12884
- id: "search_001",
12885
- category: "search",
12886
- prompt: "Find recent papers on agent prompt optimization with held-out promotion gates.",
12887
- route: "search.web",
12888
- synonyms: ["web.search", "search.papers"],
12889
- hardNegatives: ["search.code", "chat.reply"]
12890
- },
12891
- {
12892
- id: "search_002",
12893
- category: "search",
12894
- prompt: "Search the codebase for every call site of `runProposeReview`.",
12895
- route: "search.code",
12896
- synonyms: ["code.search", "grep"],
12897
- hardNegatives: ["search.web", "fs.read"]
12898
- },
12899
- {
12900
- id: "search_003",
12901
- category: "search",
12902
- prompt: "What is the latest release of the Tangle network on GitHub?",
12903
- route: "search.web",
12904
- synonyms: ["web.search", "github.releases"],
12905
- hardNegatives: ["search.code", "chat.reply"]
12906
- },
12907
- {
12908
- id: "search_004",
12909
- category: "search",
12910
- prompt: "Find all TODO comments in the agent-eval src tree.",
12911
- route: "search.code",
12912
- synonyms: ["code.search", "grep"],
12913
- hardNegatives: ["search.web", "fs.list"]
12914
- },
12915
- {
12916
- id: "chat_001",
12917
- category: "chat",
12918
- prompt: "Hi there, how are you doing today?",
12919
- route: "chat.reply",
12920
- synonyms: ["conversation.reply"],
12921
- hardNegatives: ["search.web", "fs.read"]
12922
- },
12923
- {
12924
- id: "chat_002",
12925
- category: "chat",
12926
- prompt: "Please explain the difference between an LLM and a foundation model.",
12927
- route: "chat.reply",
12928
- synonyms: ["conversation.reply", "qa.answer"],
12929
- hardNegatives: ["search.web", "math.algebra"]
12930
- },
12931
- {
12932
- id: "chat_003",
12933
- category: "chat",
12934
- prompt: "Tell me a short joke about distributed systems.",
12935
- route: "chat.reply",
12936
- synonyms: ["conversation.reply"],
12937
- hardNegatives: ["search.web", "fs.read"]
12938
- },
12939
- {
12940
- id: "chat_004",
12941
- category: "chat",
12942
- prompt: "Acknowledge my last message with a thumbs up.",
12943
- route: "chat.reply",
12944
- synonyms: ["conversation.reply", "react"],
12945
- hardNegatives: ["fs.write", "search.web"]
12946
- }
12947
- ];
12948
-
12949
- // src/benchmarks/routing/index.ts
12950
- var RoutingAdapter = class {
12951
- async loadDataset(split) {
12952
- return ROUTING_DATASET.map((item) => ({ id: item.id, payload: item })).filter((it) => assignSplitImpl(it.id) === split);
12953
- }
12954
- async evaluate(item, response) {
12955
- const tokens2 = extractRouteTokens(response);
12956
- const correct = new Set([item.payload.route, ...item.payload.synonyms].map((s) => s.toLowerCase()));
12957
- const hardNeg = new Set(item.payload.hardNegatives.map((s) => s.toLowerCase()));
12958
- const firstMatch = tokens2.find((t) => correct.has(t.toLowerCase())) ?? null;
12959
- const firstHardNeg = tokens2.find((t) => hardNeg.has(t.toLowerCase())) ?? null;
12960
- const score = firstMatch ? 1 : 0;
12961
- return {
12962
- score,
12963
- raw: {
12964
- firstToken: tokens2[0] ?? null,
12965
- matchedRoute: firstMatch,
12966
- hitHardNegative: Boolean(firstHardNeg),
12967
- hardNegativeRoute: firstHardNeg,
12968
- category: item.payload.category
12969
- }
12970
- };
12971
- }
12972
- assignSplit(itemId) {
12973
- return assignSplitImpl(itemId);
12974
- }
12975
- };
12976
- function assignSplitImpl(itemId) {
12977
- return deterministicSplit(`routing::${itemId}`);
12978
- }
12979
- function extractRouteTokens(response) {
12980
- const matches2 = response.match(/[a-z][a-z0-9_]*\.[a-z][a-z0-9_]*/gi);
12981
- return matches2 ?? [];
12982
- }
12983
- var adapter = new RoutingAdapter();
12984
- var loadDataset = adapter.loadDataset.bind(adapter);
12985
- var evaluate = adapter.evaluate.bind(adapter);
12986
- var assignSplit = adapter.assignSplit.bind(adapter);
12987
-
12988
12837
  // src/reference-replay-steering.ts
12989
12838
  function referenceReplayRunsToSteeringRows(runs, options = {}) {
12990
12839
  const rows = [];
@@ -14517,7 +14366,13 @@ var TRACE_ANALYST_TRUNCATION_MARKER_PREFIX = "[trace-analyst truncated:";
14517
14366
 
14518
14367
  // src/trace-analyst/store.ts
14519
14368
  function compileSearchRegex(pattern) {
14520
- return new RegExp(pattern, "m");
14369
+ let source = pattern;
14370
+ let flags = "m";
14371
+ if (source.startsWith("(?i)")) {
14372
+ source = source.slice(4);
14373
+ flags += "i";
14374
+ }
14375
+ return new RegExp(source, flags);
14521
14376
  }
14522
14377
  function truncateForBudget(value, byteCap) {
14523
14378
  const original = Buffer.byteLength(value, "utf8");
@@ -14690,19 +14545,26 @@ var OtlpFileTraceStore = class {
14690
14545
  const buf = await this.buffer();
14691
14546
  const hits = [];
14692
14547
  let total = 0;
14548
+ let capped = false;
14693
14549
  for (const s of trace.spans) {
14694
- const localHits = await this.scanSpanForMatches(buf, trace.trace_id, s, re, this.perMatchTextBudget);
14550
+ const remaining = max_matches - hits.length;
14551
+ const localHits = await this.scanSpanForMatches(buf, trace.trace_id, s, re, this.perMatchTextBudget, remaining);
14695
14552
  total += localHits.total;
14696
14553
  for (const h of localHits.records) {
14697
14554
  if (hits.length >= max_matches) break;
14698
14555
  hits.push(h);
14699
14556
  }
14557
+ if (hits.length >= max_matches) {
14558
+ capped = true;
14559
+ total = Math.max(total, hits.length + 1);
14560
+ break;
14561
+ }
14700
14562
  }
14701
14563
  return {
14702
14564
  trace_id: trace.trace_id,
14703
14565
  hits,
14704
14566
  total_matches: total,
14705
- has_more: total > hits.length
14567
+ has_more: capped || total > hits.length
14706
14568
  };
14707
14569
  }
14708
14570
  async searchSpan(opts) {
@@ -14719,14 +14581,13 @@ var OtlpFileTraceStore = class {
14719
14581
  }
14720
14582
  const re = compileSearchRegex(opts.regex_pattern);
14721
14583
  const buf = await this.buffer();
14722
- const localHits = await this.scanSpanForMatches(buf, trace.trace_id, span, re, this.perMatchTextBudget);
14723
- const truncated = localHits.records.slice(0, max_matches);
14584
+ const localHits = await this.scanSpanForMatches(buf, trace.trace_id, span, re, this.perMatchTextBudget, max_matches);
14724
14585
  return {
14725
14586
  trace_id: trace.trace_id,
14726
14587
  span_id: span.span_id,
14727
- hits: truncated,
14588
+ hits: localHits.records,
14728
14589
  total_matches: localHits.total,
14729
- has_more: localHits.total > truncated.length
14590
+ has_more: localHits.total > localHits.records.length
14730
14591
  };
14731
14592
  }
14732
14593
  // ─── Index building ────────────────────────────────────────────────
@@ -14958,15 +14819,20 @@ var OtlpFileTraceStore = class {
14958
14819
  error_span_count: errorCount
14959
14820
  };
14960
14821
  }
14961
- async scanSpanForMatches(buf, trace_id, s, re, textBudget) {
14822
+ async scanSpanForMatches(buf, trace_id, s, re, textBudget, recordCap) {
14962
14823
  const slice = buf.subarray(s.line_byte_offset, s.line_byte_offset + s.line_byte_length).toString("utf8");
14963
14824
  const records = [];
14964
14825
  const globalRe = new RegExp(re.source, re.flags.includes("g") ? re.flags : `${re.flags}g`);
14965
14826
  let total = 0;
14827
+ let hasMore = false;
14966
14828
  let m;
14967
14829
  while ((m = globalRe.exec(slice)) !== null) {
14968
14830
  total += 1;
14969
14831
  if (m.index === globalRe.lastIndex) globalRe.lastIndex += 1;
14832
+ if (records.length >= recordCap) {
14833
+ hasMore = true;
14834
+ break;
14835
+ }
14970
14836
  const before = slice.slice(Math.max(0, m.index - textBudget / 2), m.index);
14971
14837
  const after = slice.slice(
14972
14838
  m.index + m[0].length,
@@ -14984,7 +14850,7 @@ var OtlpFileTraceStore = class {
14984
14850
  match_offset: m.index
14985
14851
  });
14986
14852
  }
14987
- return { records, total };
14853
+ return { records, total, hasMore };
14988
14854
  }
14989
14855
  };
14990
14856
  var TraceFileMissingError = class extends Error {
@@ -15436,11 +15302,22 @@ async function analyzeTraces(input, options) {
15436
15302
  findings: Array.isArray(result.findings) ? result.findings.filter((s) => typeof s === "string") : [],
15437
15303
  turns,
15438
15304
  turnCount: turns.length,
15439
- usage: analyst.getUsage(),
15440
- chatLog: analyst.getChatLog(),
15305
+ usage: normalizeRoleArrays(analyst.getUsage()),
15306
+ chatLog: normalizeRoleArrays(analyst.getChatLog()),
15441
15307
  actorPromptVersion: TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION
15442
15308
  };
15443
15309
  }
15310
+ function normalizeRoleArrays(value) {
15311
+ const record = value && typeof value === "object" ? value : {};
15312
+ return {
15313
+ actor: normalizeRecordArray(record.actor),
15314
+ responder: normalizeRecordArray(record.responder)
15315
+ };
15316
+ }
15317
+ function normalizeRecordArray(value) {
15318
+ if (!Array.isArray(value)) return [];
15319
+ return value.map((item) => item && typeof item === "object" ? { ...item } : { value: item });
15320
+ }
15444
15321
 
15445
15322
  // src/trace-analyst/insights.ts
15446
15323
  var DOMAIN_STOP_WORDS = /* @__PURE__ */ new Set([
@@ -15696,6 +15573,7 @@ export {
15696
15573
  BudgetBreachError,
15697
15574
  BudgetGuard,
15698
15575
  BuilderSession,
15576
+ CallbackResearcher,
15699
15577
  ConvergenceTracker,
15700
15578
  CostLedger,
15701
15579
  CostTracker,