@tangle-network/agent-eval 0.17.2 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -2252,8 +2252,6 @@ async function finish(emitter, result) {
2252
2252
  }
2253
2253
 
2254
2254
  // src/feedback-trajectory.ts
2255
- import { appendFile, mkdir, readFile } from "fs/promises";
2256
- import { join } from "path";
2257
2255
  var DEFAULT_SPLIT_POLICY = {
2258
2256
  trainPct: 70,
2259
2257
  devPct: 15,
@@ -2330,12 +2328,16 @@ var FileSystemFeedbackTrajectoryStore = class {
2330
2328
  return next;
2331
2329
  }
2332
2330
  async append(record) {
2331
+ const { appendFile, mkdir } = await import("fs/promises");
2332
+ const { join: join3 } = await import("path");
2333
2333
  await mkdir(this.dir, { recursive: true });
2334
- await appendFile(join(this.dir, "feedback-trajectories.ndjson"), JSON.stringify(record) + "\n", "utf8");
2334
+ await appendFile(join3(this.dir, "feedback-trajectories.ndjson"), JSON.stringify(record) + "\n", "utf8");
2335
2335
  }
2336
2336
  async load() {
2337
2337
  if (this.loaded) return;
2338
- const file = join(this.dir, "feedback-trajectories.ndjson");
2338
+ const { readFile } = await import("fs/promises");
2339
+ const { join: join3 } = await import("path");
2340
+ const file = join3(this.dir, "feedback-trajectories.ndjson");
2339
2341
  try {
2340
2342
  const raw = await readFile(file, "utf8");
2341
2343
  for (const line of raw.split("\n")) {
@@ -2422,6 +2424,44 @@ function feedbackTrajectoryToOptimizerRow(trajectory) {
2422
2424
  function feedbackTrajectoriesToOptimizerRows(trajectories) {
2423
2425
  return trajectories.map(feedbackTrajectoryToOptimizerRow);
2424
2426
  }
2427
+ async function replayFeedbackTrajectory(trajectory, adapter2) {
2428
+ try {
2429
+ const result = await adapter2.replay(trajectory);
2430
+ return {
2431
+ trajectoryId: trajectory.id,
2432
+ ...result
2433
+ };
2434
+ } catch (err) {
2435
+ const createdAt = (/* @__PURE__ */ new Date()).toISOString();
2436
+ const message = err instanceof Error ? err.message : String(err);
2437
+ return {
2438
+ trajectoryId: trajectory.id,
2439
+ pass: false,
2440
+ labels: [{
2441
+ source: "system",
2442
+ kind: "reject",
2443
+ value: false,
2444
+ reason: message,
2445
+ severity: "error",
2446
+ createdAt
2447
+ }],
2448
+ outcome: {
2449
+ success: false,
2450
+ score: 0,
2451
+ detail: message,
2452
+ observedAt: createdAt
2453
+ },
2454
+ metadata: { replayError: true }
2455
+ };
2456
+ }
2457
+ }
2458
+ async function replayFeedbackTrajectories(trajectories, adapter2) {
2459
+ const results = [];
2460
+ for (const trajectory of trajectories) {
2461
+ results.push(await replayFeedbackTrajectory(trajectory, adapter2));
2462
+ }
2463
+ return results;
2464
+ }
2425
2465
  function summarizePreferenceMemory(trajectories, options = {}) {
2426
2466
  const maxEntries = options.maxEntries ?? 20;
2427
2467
  const entries = [];
@@ -2585,6 +2625,69 @@ function canonicalize(value) {
2585
2625
  return out;
2586
2626
  }
2587
2627
 
2628
+ // src/action-policy.ts
2629
+ function evaluateActionPolicy(action, policy = {}, options = {}) {
2630
+ const reasons = [];
2631
+ let blocked = false;
2632
+ let requiresApproval = Boolean(action.requiresApproval);
2633
+ if (policy.allowedTypes?.length && !policy.allowedTypes.includes(action.type)) {
2634
+ blocked = true;
2635
+ reasons.push(`action type "${action.type}" is not allowed`);
2636
+ }
2637
+ if (policy.blockedTypes?.includes(action.type)) {
2638
+ blocked = true;
2639
+ reasons.push(`action type "${action.type}" is blocked`);
2640
+ }
2641
+ if (policy.alwaysRequireApprovalTypes?.includes(action.type)) {
2642
+ requiresApproval = true;
2643
+ reasons.push(`action type "${action.type}" requires approval`);
2644
+ }
2645
+ if (policy.requireApprovalForExternalSideEffects && action.externalSideEffect) {
2646
+ requiresApproval = true;
2647
+ reasons.push("external side effect requires approval");
2648
+ }
2649
+ if (policy.requireApprovalAboveCostUsd !== void 0 && (action.costUsd ?? 0) > policy.requireApprovalAboveCostUsd) {
2650
+ requiresApproval = true;
2651
+ reasons.push(`cost ${action.costUsd} exceeds approval threshold ${policy.requireApprovalAboveCostUsd}`);
2652
+ }
2653
+ if (policy.maxActionCostUsd !== void 0 && (action.costUsd ?? 0) > policy.maxActionCostUsd) {
2654
+ blocked = true;
2655
+ reasons.push(`cost ${action.costUsd} exceeds max action cost ${policy.maxActionCostUsd}`);
2656
+ }
2657
+ if (policy.remainingBudgetUsd !== void 0 && (action.costUsd ?? 0) > policy.remainingBudgetUsd) {
2658
+ blocked = true;
2659
+ reasons.push(`cost ${action.costUsd} exceeds remaining budget ${policy.remainingBudgetUsd}`);
2660
+ }
2661
+ if (policy.expectedOutcomeRequired && !action.metadata?.expectedOutcome) {
2662
+ blocked = true;
2663
+ reasons.push("expected outcome is required");
2664
+ }
2665
+ if (policy.killCriteriaRequired && !action.metadata?.killCriteria) {
2666
+ blocked = true;
2667
+ reasons.push("kill criteria are required");
2668
+ }
2669
+ if (policy.autoApproveTypes?.includes(action.type) && requiresApproval) {
2670
+ reasons.push(`action type "${action.type}" is auto-approved only when no approval policy applies`);
2671
+ }
2672
+ if (!reasons.length) reasons.push(requiresApproval ? "approval required" : "action allowed");
2673
+ const label = blocked || requiresApproval ? {
2674
+ source: "policy",
2675
+ kind: blocked ? "policy_block" : "comment",
2676
+ value: { actionType: action.type, blocked, requiresApproval },
2677
+ reason: reasons.join("; "),
2678
+ severity: blocked ? "critical" : "warning",
2679
+ createdAt: options.createdAt ?? (/* @__PURE__ */ new Date()).toISOString(),
2680
+ metadata: { action, policy }
2681
+ } : void 0;
2682
+ return {
2683
+ allowed: !blocked,
2684
+ blocked,
2685
+ requiresApproval: !blocked && requiresApproval,
2686
+ reasons,
2687
+ label
2688
+ };
2689
+ }
2690
+
2588
2691
  // src/prompt-registry.ts
2589
2692
  var PromptRegistry = class {
2590
2693
  entries = /* @__PURE__ */ new Map();
@@ -6382,7 +6485,7 @@ function assertNonNegative(n, name) {
6382
6485
 
6383
6486
  // src/muffled-gate-scanner.ts
6384
6487
  import { readFileSync as readFileSync2, existsSync as existsSync2, readdirSync, statSync } from "fs";
6385
- import { join as join2 } from "path";
6488
+ import { join } from "path";
6386
6489
  function codeOf(line) {
6387
6490
  return line.replace(/\/\/.*$/, "").replace(/^\s*\*.*$/, "");
6388
6491
  }
@@ -6486,11 +6589,11 @@ var UNIVERSAL_FINDERS = [
6486
6589
  function autoDeriveImporters(repoRoot, roots, extensions, importsContain) {
6487
6590
  const matches2 = [];
6488
6591
  const walk = (rel) => {
6489
- const abs = join2(repoRoot, rel);
6592
+ const abs = join(repoRoot, rel);
6490
6593
  if (!existsSync2(abs)) return;
6491
6594
  for (const entry of readdirSync(abs)) {
6492
- const sub = join2(rel, entry);
6493
- const subAbs = join2(repoRoot, sub);
6595
+ const sub = join(rel, entry);
6596
+ const subAbs = join(repoRoot, sub);
6494
6597
  let st;
6495
6598
  try {
6496
6599
  st = statSync(subAbs);
@@ -6519,7 +6622,7 @@ function scanForMuffledGates(opts) {
6519
6622
  const findings = [];
6520
6623
  const scanned = /* @__PURE__ */ new Set();
6521
6624
  for (const file of opts.scanFiles) {
6522
- const abs = join2(opts.repoRoot, file);
6625
+ const abs = join(opts.repoRoot, file);
6523
6626
  if (!existsSync2(abs)) continue;
6524
6627
  const text = readFileSync2(abs, "utf8");
6525
6628
  for (const find of opts.finders) findings.push(...find(file, text));
@@ -6534,7 +6637,7 @@ function scanForMuffledGates(opts) {
6534
6637
  );
6535
6638
  for (const file of importers) {
6536
6639
  if (scanned.has(file)) continue;
6537
- const abs = join2(opts.repoRoot, file);
6640
+ const abs = join(opts.repoRoot, file);
6538
6641
  if (!existsSync2(abs)) continue;
6539
6642
  const text = readFileSync2(abs, "utf8");
6540
6643
  for (const find of opts.autoDerive.universalFinders) findings.push(...find(file, text));
@@ -8522,7 +8625,7 @@ async function commitBisect(options) {
8522
8625
  }
8523
8626
  async function promptBisect(options) {
8524
8627
  const split = options.paragraphSplitter ?? ((p) => p.split(/\n\s*\n/));
8525
- const join4 = (paragraphs) => paragraphs.join("\n\n");
8628
+ const join3 = (paragraphs) => paragraphs.join("\n\n");
8526
8629
  const goodParas = split(options.good);
8527
8630
  const badParas = split(options.bad);
8528
8631
  if (goodParas.length !== badParas.length) {
@@ -8540,7 +8643,7 @@ async function promptBisect(options) {
8540
8643
  const result = await bisect({
8541
8644
  good: goodMask,
8542
8645
  bad: badMask,
8543
- runEval: (mask) => options.runEval(join4(paragraphsFor(mask))),
8646
+ runEval: (mask) => options.runEval(join3(paragraphsFor(mask))),
8544
8647
  maxIterations: options.maxIterations ?? n + 5,
8545
8648
  halfway: (g, b) => {
8546
8649
  for (let i = 0; i < g.length; i++) {
@@ -8571,12 +8674,12 @@ async function promptBisect(options) {
8571
8674
  }
8572
8675
  }
8573
8676
  const materializedPath = result.path.map((s) => ({
8574
- state: join4(paragraphsFor(s.state)),
8677
+ state: join3(paragraphsFor(s.state)),
8575
8678
  score: s.score,
8576
8679
  pass: s.pass
8577
8680
  }));
8578
8681
  return {
8579
- culprit: join4(paragraphsFor(culprit)),
8682
+ culprit: join3(paragraphsFor(culprit)),
8580
8683
  path: materializedPath,
8581
8684
  converged: result.converged,
8582
8685
  inputInconsistent: result.inputInconsistent,
@@ -9631,7 +9734,7 @@ function mergeSignals(a, b) {
9631
9734
  // src/command-runner.ts
9632
9735
  import { spawnSync } from "child_process";
9633
9736
  import { existsSync as existsSync3, readFileSync as readFileSync3, readdirSync as readdirSync2, statSync as statSync2 } from "fs";
9634
- import { join as join3 } from "path";
9737
+ import { join as join2 } from "path";
9635
9738
  var localCommandRunner = {
9636
9739
  name: "local",
9637
9740
  async run(input) {
@@ -9678,7 +9781,7 @@ var localCommandRunner = {
9678
9781
  const out = [];
9679
9782
  for (const name of entries) {
9680
9783
  try {
9681
- const st = statSync2(join3(path, name));
9784
+ const st = statSync2(join2(path, name));
9682
9785
  out.push({
9683
9786
  name,
9684
9787
  isDirectory: st.isDirectory(),
@@ -12589,6 +12692,274 @@ function samePopulation(a, b) {
12589
12692
  return b.every((id) => setA.has(id));
12590
12693
  }
12591
12694
 
12695
+ // src/multi-shot-optimization.ts
12696
+ async function runMultiShotOptimization(config) {
12697
+ validateConfig(config);
12698
+ const scoreAdapter = {
12699
+ score: (args) => scoreOne(config, args.variant, args.scenarioId, args.rep, "search")
12700
+ };
12701
+ const evolution = await runPromptEvolution({
12702
+ runId: config.runId,
12703
+ target: config.target,
12704
+ seedVariants: config.seedVariants,
12705
+ scenarioIds: config.searchScenarioIds,
12706
+ reps: config.reps,
12707
+ generations: config.generations,
12708
+ populationSize: config.populationSize,
12709
+ scoreConcurrency: config.scoreConcurrency ?? 1,
12710
+ scoreAdapter,
12711
+ mutateAdapter: {
12712
+ mutate: (args) => config.mutateAdapter.mutate({
12713
+ ...args,
12714
+ topTrials: args.topTrials,
12715
+ bottomTrials: args.bottomTrials
12716
+ })
12717
+ },
12718
+ objectives: config.objectives ?? defaultMultiShotObjectives(),
12719
+ scalarWeights: config.scalarWeights,
12720
+ earlyStopOnNoImprovement: config.earlyStopOnNoImprovement,
12721
+ cache: config.cache,
12722
+ onProgress: config.onProgress
12723
+ });
12724
+ let gate = null;
12725
+ const baseline = config.seedVariants[0];
12726
+ let promotedVariant = evolution.bestVariant;
12727
+ let promotedAggregate = evolution.bestAggregate;
12728
+ if (config.gate && evolution.bestVariant.id !== baseline.id) {
12729
+ gate = await evaluateMultiShotGate(config, baseline, evolution.bestVariant);
12730
+ if (!gate.decision.promote) {
12731
+ promotedVariant = baseline;
12732
+ promotedAggregate = aggregateFor(evolution, baseline.id);
12733
+ }
12734
+ }
12735
+ return {
12736
+ evolution,
12737
+ searchBestVariant: evolution.bestVariant,
12738
+ searchBestAggregate: evolution.bestAggregate,
12739
+ promotedVariant,
12740
+ promotedAggregate,
12741
+ gate
12742
+ };
12743
+ }
12744
+ function defaultMultiShotObjectives() {
12745
+ return [
12746
+ { name: "score", direction: "maximize", value: (a) => a.meanScore },
12747
+ { name: "cost", direction: "minimize", value: (a) => a.meanCost }
12748
+ ];
12749
+ }
12750
+ function trialTraceFromMultiShotTrial(trial) {
12751
+ return {
12752
+ id: `${trial.variantId}/${trial.scenarioId}/r${trial.rep}`,
12753
+ score: trial.score,
12754
+ inputName: trial.scenarioId,
12755
+ expectations: (trial.asi ?? []).map((item, i) => ({
12756
+ id: item.expectationId ?? `asi-${i}`,
12757
+ phrase: item.message,
12758
+ matched: item.matched ?? false
12759
+ })),
12760
+ emitted: trial.emitted ?? traceExcerpt(trial.trace),
12761
+ metrics: trial.metrics
12762
+ };
12763
+ }
12764
+ async function evaluateMultiShotGate(config, baseline, candidate) {
12765
+ const gateConfig = config.gate;
12766
+ const reps = gateConfig.reps ?? config.reps;
12767
+ const candidateRuns = [];
12768
+ const baselineRuns = [];
12769
+ const searchIds = gateConfig.searchScenarioIds ?? config.searchScenarioIds;
12770
+ for (const scenarioId of searchIds) {
12771
+ for (let rep = 0; rep < reps; rep++) {
12772
+ const seed = seedFor(config, scenarioId, rep);
12773
+ const baseTrial = await scoreOne(config, baseline, scenarioId, rep, "search");
12774
+ const candTrial = await scoreOne(config, candidate, scenarioId, rep, "search");
12775
+ baselineRuns.push(toValidatedRecord(config, baseline, scenarioId, rep, "search", seed, baseTrial));
12776
+ candidateRuns.push(toValidatedRecord(config, candidate, scenarioId, rep, "search", seed, candTrial));
12777
+ }
12778
+ }
12779
+ for (const scenarioId of gateConfig.holdoutScenarioIds) {
12780
+ for (let rep = 0; rep < reps; rep++) {
12781
+ const seed = seedFor(config, scenarioId, rep);
12782
+ const baseTrial = await scoreOne(config, baseline, scenarioId, rep, "holdout");
12783
+ const candTrial = await scoreOne(config, candidate, scenarioId, rep, "holdout");
12784
+ baselineRuns.push(toValidatedRecord(config, baseline, scenarioId, rep, "holdout", seed, baseTrial));
12785
+ candidateRuns.push(toValidatedRecord(config, candidate, scenarioId, rep, "holdout", seed, candTrial));
12786
+ }
12787
+ }
12788
+ const decision = new HeldOutGate(gateConfig.gate).evaluate(candidateRuns, baselineRuns);
12789
+ return { decision, candidateRuns, baselineRuns };
12790
+ }
12791
+ async function scoreOne(config, variant, scenarioId, rep, split) {
12792
+ const seed = seedFor(config, scenarioId, rep);
12793
+ const input = { variant, scenarioId, rep, split, seed };
12794
+ try {
12795
+ const run = await config.runner.run(input);
12796
+ const scored = await config.scorer.score({ ...input, run });
12797
+ const asi = scored.asi ?? [];
12798
+ return {
12799
+ variantId: variant.id,
12800
+ scenarioId,
12801
+ rep,
12802
+ ok: scored.ok ?? true,
12803
+ score: clamp013(scored.score),
12804
+ cost: scored.costUsd ?? run.costUsd ?? 0,
12805
+ durationMs: scored.durationMs ?? run.durationMs ?? 0,
12806
+ metrics: {
12807
+ ...numericMetrics(scored.metrics),
12808
+ ...asiMetrics(asi)
12809
+ },
12810
+ split,
12811
+ seed,
12812
+ trace: run.trace,
12813
+ asi,
12814
+ emitted: scored.emitted ?? traceExcerpt(run.trace),
12815
+ metadata: scored.metadata
12816
+ };
12817
+ } catch (err) {
12818
+ return {
12819
+ variantId: variant.id,
12820
+ scenarioId,
12821
+ rep,
12822
+ ok: false,
12823
+ score: 0,
12824
+ cost: 0,
12825
+ durationMs: 0,
12826
+ metrics: { error: 1 },
12827
+ error: err instanceof Error ? err.message : String(err),
12828
+ split,
12829
+ seed,
12830
+ asi: [{
12831
+ severity: "critical",
12832
+ message: err instanceof Error ? err.message : String(err),
12833
+ responsibleSurface: config.target
12834
+ }],
12835
+ emitted: ""
12836
+ };
12837
+ }
12838
+ }
12839
+ function toValidatedRecord(config, variant, scenarioId, rep, split, seed, trial) {
12840
+ const record = config.gate.toRunRecord({ variant, scenarioId, rep, split, seed, trial });
12841
+ return validateRunRecord(record);
12842
+ }
12843
+ function validateConfig(config) {
12844
+ if (!config.runId.trim()) throw new Error("runMultiShotOptimization: runId must not be empty");
12845
+ if (!config.target.trim()) throw new Error("runMultiShotOptimization: target must not be empty");
12846
+ if (config.seedVariants.length === 0) {
12847
+ throw new Error("runMultiShotOptimization: seedVariants must not be empty");
12848
+ }
12849
+ if (config.searchScenarioIds.length === 0) {
12850
+ throw new Error("runMultiShotOptimization: searchScenarioIds must not be empty");
12851
+ }
12852
+ requirePositiveInteger(config.reps, "reps");
12853
+ requirePositiveInteger(config.generations, "generations");
12854
+ requirePositiveInteger(config.populationSize, "populationSize");
12855
+ if (config.scoreConcurrency !== void 0) requirePositiveInteger(config.scoreConcurrency, "scoreConcurrency");
12856
+ if (config.populationSize < config.seedVariants.length) {
12857
+ throw new Error("runMultiShotOptimization: populationSize must be >= seedVariants.length");
12858
+ }
12859
+ assertUnique(config.seedVariants.map((v) => v.id), "seedVariants.id");
12860
+ assertUnique(config.searchScenarioIds, "searchScenarioIds");
12861
+ if (config.gate) {
12862
+ if (config.gate.holdoutScenarioIds.length === 0) {
12863
+ throw new Error("runMultiShotOptimization: gate.holdoutScenarioIds must not be empty");
12864
+ }
12865
+ if (config.gate.reps !== void 0) requirePositiveInteger(config.gate.reps, "gate.reps");
12866
+ assertUnique(config.gate.holdoutScenarioIds, "gate.holdoutScenarioIds");
12867
+ if (config.gate.searchScenarioIds) assertUnique(config.gate.searchScenarioIds, "gate.searchScenarioIds");
12868
+ const searchIds = new Set(config.searchScenarioIds);
12869
+ for (const id of config.gate.holdoutScenarioIds) {
12870
+ if (searchIds.has(id)) {
12871
+ throw new Error(`runMultiShotOptimization: holdout scenario "${id}" also appears in searchScenarioIds`);
12872
+ }
12873
+ }
12874
+ const baselineId = config.seedVariants[0].id;
12875
+ if (config.gate.gate.baselineKey !== baselineId) {
12876
+ throw new Error(
12877
+ `runMultiShotOptimization: gate.gate.baselineKey must match first seed variant id "${baselineId}"`
12878
+ );
12879
+ }
12880
+ }
12881
+ }
12882
+ function requirePositiveInteger(value, name) {
12883
+ if (!Number.isInteger(value) || value <= 0) {
12884
+ throw new Error(`runMultiShotOptimization: ${name} must be a positive integer`);
12885
+ }
12886
+ }
12887
+ function assertUnique(values, name) {
12888
+ const seen = /* @__PURE__ */ new Set();
12889
+ for (const value of values) {
12890
+ if (!value.trim()) throw new Error(`runMultiShotOptimization: ${name} must not contain empty values`);
12891
+ if (seen.has(value)) throw new Error(`runMultiShotOptimization: duplicate ${name} "${value}"`);
12892
+ seen.add(value);
12893
+ }
12894
+ }
12895
+ function aggregateFor(evolution, variantId) {
12896
+ const final = evolution.generations[evolution.generations.length - 1];
12897
+ const aggregate2 = final?.aggregates.find((a) => a.variantId === variantId);
12898
+ if (!aggregate2) {
12899
+ throw new Error(`runMultiShotOptimization: missing aggregate for variant "${variantId}"`);
12900
+ }
12901
+ return aggregate2;
12902
+ }
12903
+ function seedFor(config, scenarioId, rep) {
12904
+ const base = config.seedBase ?? 0;
12905
+ return (base + stableHash2(`${scenarioId}${rep}`)) % Number.MAX_SAFE_INTEGER;
12906
+ }
12907
+ function stableHash2(input) {
12908
+ let h = 2166136261;
12909
+ for (let i = 0; i < input.length; i++) {
12910
+ h ^= input.charCodeAt(i);
12911
+ h = Math.imul(h, 16777619);
12912
+ }
12913
+ return h >>> 0;
12914
+ }
12915
+ function clamp013(n) {
12916
+ if (!Number.isFinite(n)) return 0;
12917
+ return Math.max(0, Math.min(1, n));
12918
+ }
12919
+ function numericMetrics(metrics) {
12920
+ const out = {};
12921
+ for (const [k, v] of Object.entries(metrics ?? {})) {
12922
+ if (Number.isFinite(v)) out[k] = v;
12923
+ }
12924
+ return out;
12925
+ }
12926
+ function asiMetrics(asi) {
12927
+ const out = { asi: asi.length };
12928
+ for (const item of asi.slice(0, 1e3)) {
12929
+ const sev = normalizeSeverity(item.severity);
12930
+ out[`asi.${sev}`] = (out[`asi.${sev}`] ?? 0) + 1;
12931
+ if (item.responsibleSurface) {
12932
+ const key = `surface.${metricKeySegment(item.responsibleSurface)}`;
12933
+ out[key] = (out[key] ?? 0) + 1;
12934
+ }
12935
+ }
12936
+ return out;
12937
+ }
12938
+ function normalizeSeverity(severity) {
12939
+ if (severity === "info" || severity === "warning" || severity === "error" || severity === "critical") {
12940
+ return severity;
12941
+ }
12942
+ return "error";
12943
+ }
12944
+ function metricKeySegment(raw) {
12945
+ return raw.trim().replace(/[^a-zA-Z0-9._-]+/g, "_").slice(0, 80) || "unknown";
12946
+ }
12947
+ function traceExcerpt(trace) {
12948
+ if (!trace) return void 0;
12949
+ if (typeof trace.output === "string") return trace.output;
12950
+ if (trace.transcript) return trace.transcript;
12951
+ if (trace.turns) {
12952
+ try {
12953
+ const clipped = trace.turns.slice(0, 20);
12954
+ const suffix = trace.turns.length > clipped.length ? ` ... ${trace.turns.length - clipped.length} more turn(s)` : "";
12955
+ return `${JSON.stringify(clipped).slice(0, 2e3)}${suffix}`;
12956
+ } catch {
12957
+ return "[unserializable trace turns]";
12958
+ }
12959
+ }
12960
+ return void 0;
12961
+ }
12962
+
12592
12963
  // src/jsonl-trial-cache.ts
12593
12964
  import { appendFileSync as appendFileSync4, existsSync as existsSync6, mkdirSync as mkdirSync4, readFileSync as readFileSync5 } from "fs";
12594
12965
  import { dirname as dirname4 } from "path";
@@ -13708,6 +14079,7 @@ export {
13708
14079
  decideReferenceReplayPromotion,
13709
14080
  decideReferenceReplayRunPromotion,
13710
14081
  defaultJudges,
14082
+ defaultMultiShotObjectives,
13711
14083
  defaultReferenceReplayMatcher,
13712
14084
  deployGateLayer,
13713
14085
  distillPlaybook,
@@ -13715,6 +14087,7 @@ export {
13715
14087
  estimateCost,
13716
14088
  estimateTokens,
13717
14089
  euAiActReport,
14090
+ evaluateActionPolicy,
13718
14091
  evaluateContract,
13719
14092
  evaluateHypothesis,
13720
14093
  evaluateOracles,
@@ -13822,6 +14195,8 @@ export {
13822
14195
  renderPlaybookMarkdown,
13823
14196
  renderPreferenceMemoryMarkdown,
13824
14197
  renderSteeringText,
14198
+ replayFeedbackTrajectories,
14199
+ replayFeedbackTrajectory,
13825
14200
  replayScorerOverCorpus,
13826
14201
  replayTraceThroughJudge,
13827
14202
  requiredSampleSize,
@@ -13842,6 +14217,7 @@ export {
13842
14217
  runJudgeFleet,
13843
14218
  runKeywordCoverageJudge,
13844
14219
  runKeywordCoverageJudgeUrl,
14220
+ runMultiShotOptimization,
13845
14221
  runPromptEvolution,
13846
14222
  runProposeReview,
13847
14223
  runProposeReviewAsControlLoop,
@@ -13885,6 +14261,7 @@ export {
13885
14261
  toolSpans,
13886
14262
  toolSuccessRubric,
13887
14263
  toolWasteView,
14264
+ trialTraceFromMultiShotTrial,
13888
14265
  typoMutator,
13889
14266
  urlContains,
13890
14267
  validateRunRecord,