npm - @tangle-network/agent-eval - Versions diffs - 0.24.0 → 0.27.0 - Mend

@tangle-network/agent-eval 0.24.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/CHANGELOG.md +138 -0
package/README.md +72 -0
package/dist/{chunk-SY6WAAAD.js → chunk-5LBB5B3Z.js} +296 -5
package/dist/chunk-5LBB5B3Z.js.map +1 -0
package/dist/{chunk-OHEPNJQN.js → chunk-JLZQWFV3.js} +65 -1
package/dist/chunk-JLZQWFV3.js.map +1 -0
package/dist/{chunk-VRJVTXRV.js → chunk-WHZMVFUV.js} +85 -85
package/dist/chunk-WHZMVFUV.js.map +1 -0
package/dist/cli.js +1 -1
package/dist/governance/index.d.ts +1 -1
package/dist/{index-Oj9fAPPN.d.ts → index-D3iBCjdF.d.ts} +63 -2
package/dist/index.d.ts +529 -12
package/dist/index.js +1106 -17
package/dist/index.js.map +1 -1
package/dist/openapi.json +491 -1
package/dist/optimization.d.ts +2 -2
package/dist/optimization.js +1 -1
package/dist/pipelines/index.js +3 -67
package/dist/pipelines/index.js.map +1 -1
package/dist/{release-report-TDPn1cxq.d.ts → release-report-wfUySN5F.d.ts} +1 -1
package/dist/reporting.d.ts +2 -2
package/dist/{researcher-CUOiGcGv.d.ts → researcher-bGkI7vCl.d.ts} +1 -1
package/dist/rl.d.ts +3 -3
package/dist/{summary-report-BXGs_9V0.d.ts → summary-report-DZVXOCK_.d.ts} +13 -1
package/dist/wire/index.d.ts +347 -3
package/dist/wire/index.js +19 -1
package/docs/concepts.md +11 -0
package/package.json +1 -1
package/dist/chunk-OHEPNJQN.js.map +0 -1
package/dist/chunk-SY6WAAAD.js.map +0 -1
package/dist/chunk-VRJVTXRV.js.map +0 -1

package/dist/index.js CHANGED Viewed

@@ -8,9 +8,10 @@ import {
   classifyFailure,
   compareToBaseline,
   computeToolUseMetrics,
+  failureClusterView,
   iqr,
   welchsTTest
-} from "./chunk-OHEPNJQN.js";
+} from "./chunk-JLZQWFV3.js";
 import {
   exportTrainingData,
   toNdjson
@@ -95,7 +96,7 @@ import {
   summarizePreferenceMemory,
   trialTraceFromMultiShotTrial,
   withAssignedFeedbackSplit
-} from "./chunk-VRJVTXRV.js";
+} from "./chunk-WHZMVFUV.js";
 import {
   RunRecordValidationError,
   isRunRecord,
@@ -220,6 +221,304 @@ import {
 } from "./chunk-NG236HPC.js";
 import "./chunk-PZ5AY32C.js";
+// src/auto-pr.ts
+async function proposeAutomatedPullRequest(client, input) {
+  validate(input);
+  return client.proposeChange(input);
+}
+function validate(input) {
+  if (!input.repo.owner.trim() || !input.repo.name.trim()) {
+    throw new ValidationError("proposeAutomatedPullRequest: repo.owner and repo.name required");
+  }
+  if (!input.branchName.trim() || /\s/.test(input.branchName)) {
+    throw new ValidationError(
+      "proposeAutomatedPullRequest: branchName must be non-empty and contain no whitespace"
+    );
+  }
+  if (input.branchName === (input.baseBranch ?? "main")) {
+    throw new ValidationError("proposeAutomatedPullRequest: branchName must differ from baseBranch");
+  }
+  if (input.fileChanges.length === 0) {
+    throw new ValidationError("proposeAutomatedPullRequest: fileChanges must not be empty");
+  }
+  const seenPaths = /* @__PURE__ */ new Set();
+  for (const change of input.fileChanges) {
+    if (!change.path.trim() || change.path.includes("..") || change.path.startsWith("/")) {
+      throw new ValidationError(
+        `proposeAutomatedPullRequest: invalid file path "${change.path}" (no '..' or leading '/')`
+      );
+    }
+    if (seenPaths.has(change.path)) {
+      throw new ValidationError(`proposeAutomatedPullRequest: duplicate file path "${change.path}"`);
+    }
+    seenPaths.add(change.path);
+  }
+  if (!input.title.trim()) {
+    throw new ValidationError("proposeAutomatedPullRequest: title must not be empty");
+  }
+}
+function httpGithubClient(opts) {
+  const fetchImpl = opts.fetchImpl ?? fetch;
+  const apiBase = (opts.apiBase ?? "https://api.github.com").replace(/\/+$/, "");
+  const now = opts.now ?? (() => /* @__PURE__ */ new Date());
+  async function api(method, path, body, accept404 = false) {
+    const res = await fetchImpl(`${apiBase}${path}`, {
+      method,
+      headers: {
+        accept: "application/vnd.github+json",
+        "content-type": "application/json",
+        authorization: `Bearer ${opts.token}`,
+        "x-github-api-version": "2022-11-28"
+      },
+      body: body === void 0 ? void 0 : JSON.stringify(body)
+    });
+    if (accept404 && res.status === 404) return null;
+    if (!res.ok) {
+      const text = await res.text().catch(() => "");
+      throw new ConfigError(
+        `proposeAutomatedPullRequest: GitHub ${method} ${path} \u2192 ${res.status} ${text.slice(0, 400)}`
+      );
+    }
+    return await res.json();
+  }
+  return {
+    async proposeChange(input) {
+      const baseBranch = input.baseBranch ?? "main";
+      const repoPath = `/repos/${input.repo.owner}/${input.repo.name}`;
+      if (input.dryRun) {
+        return {
+          prUrl: `https://github.com/${input.repo.owner}/${input.repo.name}/compare/${baseBranch}...${input.branchName}`,
+          branchName: input.branchName,
+          headSha: "dry-run",
+          dryRun: true
+        };
+      }
+      const baseRef = await api("GET", `${repoPath}/git/ref/heads/${baseBranch}`);
+      if (!baseRef) {
+        throw new ConfigError(`proposeAutomatedPullRequest: base branch "${baseBranch}" not found`);
+      }
+      const baseSha = baseRef.object.sha;
+      const baseCommit = await api("GET", `${repoPath}/git/commits/${baseSha}`);
+      if (!baseCommit) {
+        throw new ConfigError(
+          `proposeAutomatedPullRequest: base commit ${baseSha} not found (race condition?)`
+        );
+      }
+      const treeEntries = [];
+      for (const change of input.fileChanges) {
+        const blob = await api("POST", `${repoPath}/git/blobs`, {
+          content: change.contents,
+          encoding: "utf-8"
+        });
+        if (!blob) throw new ConfigError("proposeAutomatedPullRequest: blob creation returned null");
+        treeEntries.push({
+          path: change.path,
+          mode: "100644",
+          type: "blob",
+          sha: blob.sha
+        });
+      }
+      const tree = await api("POST", `${repoPath}/git/trees`, {
+        base_tree: baseCommit.tree.sha,
+        tree: treeEntries
+      });
+      if (!tree) throw new ConfigError("proposeAutomatedPullRequest: tree creation returned null");
+      const author = input.authorName && input.authorEmail ? { name: input.authorName, email: input.authorEmail, date: now().toISOString() } : void 0;
+      const commitMessage = renderCommitMessage(input);
+      const commit = await api("POST", `${repoPath}/git/commits`, {
+        message: commitMessage,
+        tree: tree.sha,
+        parents: [baseSha],
+        ...author ? { author, committer: author } : {}
+      });
+      if (!commit)
+        throw new ConfigError("proposeAutomatedPullRequest: commit creation returned null");
+      const existing = await api(
+        "GET",
+        `${repoPath}/git/ref/heads/${input.branchName}`,
+        void 0,
+        true
+      );
+      if (!existing) {
+        await api("POST", `${repoPath}/git/refs`, {
+          ref: `refs/heads/${input.branchName}`,
+          sha: commit.sha
+        });
+      } else if (existing.object.sha !== commit.sha) {
+        await api("PATCH", `${repoPath}/git/refs/heads/${input.branchName}`, {
+          sha: commit.sha,
+          force: true
+        });
+      }
+      const openPrs = await api(
+        "GET",
+        `${repoPath}/pulls?state=open&head=${encodeURIComponent(`${input.repo.owner}:${input.branchName}`)}`
+      );
+      let pr;
+      if (openPrs && openPrs.length > 0) {
+        pr = openPrs[0];
+      } else {
+        const created = await api("POST", `${repoPath}/pulls`, {
+          title: input.title,
+          body: input.body,
+          head: input.branchName,
+          base: baseBranch
+        });
+        if (!created)
+          throw new ConfigError("proposeAutomatedPullRequest: PR creation returned null");
+        pr = created;
+      }
+      if (input.reviewers && input.reviewers.length > 0) {
+        await api(
+          "POST",
+          `${repoPath}/pulls/${pr.number}/requested_reviewers`,
+          { reviewers: input.reviewers },
+          true
+        ).catch(() => {
+        });
+      }
+      if (input.labels && input.labels.length > 0) {
+        await api(
+          "POST",
+          `${repoPath}/issues/${pr.number}/labels`,
+          { labels: input.labels },
+          true
+        ).catch(() => {
+        });
+      }
+      return {
+        prUrl: pr.html_url,
+        branchName: input.branchName,
+        headSha: commit.sha,
+        dryRun: false
+      };
+    }
+  };
+}
+function ghCliClient(opts = {}) {
+  const bin = opts.bin ?? "gh";
+  const cwd = opts.cwd ?? process.cwd();
+  const exec = opts.exec ?? defaultExec;
+  async function run(cmd, args, stdin) {
+    const r = await exec(cmd, args, { cwd, stdin });
+    if (r.exitCode !== 0) {
+      throw new ConfigError(
+        `proposeAutomatedPullRequest: ${cmd} ${args.join(" ")} failed (${r.exitCode}): ${r.stderr.trim() || r.stdout.trim()}`
+      );
+    }
+    return r;
+  }
+  return {
+    async proposeChange(input) {
+      const baseBranch = input.baseBranch ?? "main";
+      if (input.dryRun) {
+        return {
+          prUrl: `https://github.com/${input.repo.owner}/${input.repo.name}/compare/${baseBranch}...${input.branchName}`,
+          branchName: input.branchName,
+          headSha: "dry-run",
+          dryRun: true
+        };
+      }
+      await run("git", ["fetch", "origin", baseBranch]);
+      await run("git", ["checkout", baseBranch]);
+      await run("git", ["reset", "--hard", `origin/${baseBranch}`]);
+      await exec("git", ["branch", "-D", input.branchName], { cwd });
+      await run("git", ["checkout", "-b", input.branchName]);
+      const { mkdir, writeFile } = await import("fs/promises");
+      const { dirname: dirname5, join: join4, resolve } = await import("path");
+      for (const change of input.fileChanges) {
+        const abs = resolve(cwd, change.path);
+        await mkdir(dirname5(abs), { recursive: true });
+        await writeFile(abs, change.contents, "utf8");
+        await run("git", ["add", join4(change.path)]);
+      }
+      const env = {};
+      if (input.authorName) env.GIT_AUTHOR_NAME = input.authorName;
+      if (input.authorEmail) env.GIT_AUTHOR_EMAIL = input.authorEmail;
+      if (input.authorName) env.GIT_COMMITTER_NAME = input.authorName;
+      if (input.authorEmail) env.GIT_COMMITTER_EMAIL = input.authorEmail;
+      const message = renderCommitMessage(input);
+      await run("git", ["commit", "-m", message]);
+      const headRes = await run("git", ["rev-parse", "HEAD"]);
+      const headSha = headRes.stdout.trim();
+      await run("git", ["push", "-f", "origin", input.branchName]);
+      const existing = await exec(
+        bin,
+        [
+          "pr",
+          "list",
+          "--state",
+          "open",
+          "--head",
+          input.branchName,
+          "--json",
+          "url,number",
+          "--limit",
+          "1"
+        ],
+        { cwd }
+      );
+      let prUrl = "";
+      if (existing.exitCode === 0 && existing.stdout.trim()) {
+        const parsed = JSON.parse(existing.stdout);
+        if (parsed.length > 0 && parsed[0]) prUrl = parsed[0].url;
+      }
+      if (!prUrl) {
+        const args = [
+          "pr",
+          "create",
+          "--title",
+          input.title,
+          "--body",
+          input.body,
+          "--base",
+          baseBranch
+        ];
+        if (input.reviewers && input.reviewers.length > 0) {
+          args.push("--reviewer", input.reviewers.join(","));
+        }
+        if (input.labels && input.labels.length > 0) {
+          args.push("--label", input.labels.join(","));
+        }
+        const r = await run(bin, args);
+        const match = r.stdout.match(/https?:\/\/\S+/);
+        prUrl = match ? match[0] : r.stdout.trim();
+      }
+      return { prUrl, branchName: input.branchName, headSha, dryRun: false };
+    }
+  };
+}
+async function defaultExec(bin, args, opts) {
+  const { spawn } = await import("child_process");
+  return new Promise((resolveExec) => {
+    const child = spawn(bin, args, { cwd: opts.cwd });
+    let stdout = "";
+    let stderr = "";
+    child.stdout.on("data", (d) => {
+      stdout += d.toString();
+    });
+    child.stderr.on("data", (d) => {
+      stderr += d.toString();
+    });
+    if (opts.stdin) child.stdin.end(opts.stdin);
+    child.on("error", (err) => {
+      resolveExec({ stdout, stderr: `${stderr}${err.message}`, exitCode: 1 });
+    });
+    child.on("close", (code) => {
+      resolveExec({ stdout, stderr, exitCode: code ?? 1 });
+    });
+  });
+}
+function renderCommitMessage(input) {
+  const lines = [input.title, ""];
+  for (const change of input.fileChanges) {
+    if (change.rationale) lines.push(`- ${change.path}: ${change.rationale}`);
+  }
+  if (lines[lines.length - 1] !== "") lines.push("");
+  lines.push(input.body.trim());
+  return lines.join("\n").trim();
+}
 // src/executor.ts
 async function executeScenario(tc, scenario, config) {
   const startTime = Date.now();
@@ -1534,6 +1833,396 @@ function liveProofToReleaseTrace(config, trajectory, durationMs) {
   };
 }
+// src/production-loop.ts
+async function runProductionLoop(opts) {
+  validate2(opts);
+  const now = opts.now ?? (() => /* @__PURE__ */ new Date());
+  const startedAt = now().toISOString();
+  const observedRuns = await opts.traceStore.listRuns();
+  const observedFeedback = await opts.feedbackStore.list();
+  const clusterReport = await failureClusterView(opts.traceStore, {
+    minClusterSize: opts.cluster.minClusterSize ?? 1
+  });
+  const minSize = opts.cluster.minClusterSize ?? 5;
+  const minSeverity = opts.cluster.minSeverityRatio ?? 0.05;
+  const maxClusters = opts.cluster.maxClustersPerCycle ?? 1;
+  const totalRuns = clusterReport.totalRuns;
+  const actionable = clusterReport.clusters.filter((c) => c.runCount >= minSize).filter((c) => totalRuns === 0 || c.runCount / totalRuns >= minSeverity).slice(0, maxClusters);
+  if (actionable.length === 0) {
+    return finalize({
+      opts,
+      decision: "no_actionable_failures",
+      startedAt,
+      now,
+      observedRunCount: observedRuns.length,
+      observedFeedbackCount: observedFeedback.length,
+      clusters: clusterReport.clusters,
+      actedOnCluster: null,
+      evolution: null,
+      release: null,
+      gate: null,
+      promotedPrompt: opts.evolve.baselinePrompt,
+      pullRequest: null
+    });
+  }
+  const actedOn = actionable[0];
+  const baseline = {
+    id: opts.evolve.baselineId ?? "baseline",
+    label: opts.evolve.baselineId ?? "baseline",
+    generation: 0,
+    payload: opts.evolve.baselinePrompt
+  };
+  const holdoutIds = uniqueIds(opts.evolve.holdoutScenarios.map((s) => s.id));
+  const searchIds = uniqueIds(
+    (opts.evolve.searchScenarios ?? deriveSearchScenarios(opts.evolve.holdoutScenarios)).map(
+      (s) => s.id
+    )
+  );
+  if (searchIds.some((id) => holdoutIds.includes(id))) {
+    throw new ValidationError(
+      "runProductionLoop: searchScenarios and holdoutScenarios must be disjoint"
+    );
+  }
+  const reps = opts.evolve.reps ?? 3;
+  const generations = opts.evolve.generations ?? 3;
+  const populationSize = opts.evolve.populationSize ?? Math.max(2, opts.evolve.reps ?? 4);
+  const evolution = await runMultiShotOptimization({
+    runId: `${opts.runId}/evolve`,
+    target: opts.target,
+    seedVariants: [baseline],
+    searchScenarioIds: searchIds,
+    reps,
+    generations,
+    populationSize,
+    scoreConcurrency: opts.evolve.scoreConcurrency ?? 1,
+    runner: opts.evolve.runner,
+    scorer: opts.evolve.scorer,
+    mutateAdapter: opts.evolve.mutator,
+    gate: {
+      holdoutScenarioIds: holdoutIds,
+      reps,
+      gate: { ...opts.evolve.gate, baselineKey: baseline.id },
+      toRunRecord: opts.evolve.toRunRecord ?? (({ variant, scenarioId, rep, split, seed, trial }) => syntheticRunRecord({
+        runId: `${opts.runId}-${variant.id}-${scenarioId}-${rep}-${split}`,
+        variant,
+        scenarioId,
+        rep,
+        split,
+        seed,
+        trial,
+        target: opts.target
+      }))
+    }
+  });
+  const gate = evolution.gate?.decision ?? null;
+  const promotedVariant = evolution.promotedVariant;
+  const promoted = promotedVariant.payload;
+  const promotedChanged = promotedVariant.id !== baseline.id;
+  const allTrials = evolution.evolution.generations.flatMap(
+    (g) => g.trials
+  );
+  const traceEvidence = releaseTraceEvidenceFromMultiShotTrials(allTrials);
+  const releaseScenarios = [
+    ...(opts.evolve.searchScenarios ?? []).map((s) => ({
+      id: s.id,
+      payload: s,
+      split: "train",
+      tags: { persona: s.persona, label: s.label }
+    })),
+    ...opts.evolve.holdoutScenarios.map((s) => ({
+      id: s.id,
+      payload: s,
+      split: "holdout",
+      tags: { persona: s.persona, label: s.label }
+    }))
+  ];
+  const release = evaluateReleaseConfidence({
+    target: opts.target,
+    candidateId: promotedVariant.id,
+    baselineId: baseline.id,
+    scenarios: releaseScenarios,
+    traces: traceEvidence,
+    gateDecision: gate ?? void 0,
+    thresholds: opts.releaseThresholds,
+    runs: [...evolution.gate?.candidateRuns ?? [], ...evolution.gate?.baselineRuns ?? []]
+  });
+  if (!promotedChanged) {
+    return finalize({
+      opts,
+      decision: "evolve_yielded_no_improvement",
+      startedAt,
+      now,
+      observedRunCount: observedRuns.length,
+      observedFeedbackCount: observedFeedback.length,
+      clusters: clusterReport.clusters,
+      actedOnCluster: actedOn,
+      evolution,
+      release,
+      gate,
+      promotedPrompt: promoted,
+      pullRequest: null
+    });
+  }
+  if (release.status === "fail" || gate && !gate.promote) {
+    return finalize({
+      opts,
+      decision: "gate_failed",
+      startedAt,
+      now,
+      observedRunCount: observedRuns.length,
+      observedFeedbackCount: observedFeedback.length,
+      clusters: clusterReport.clusters,
+      actedOnCluster: actedOn,
+      evolution,
+      release,
+      gate,
+      promotedPrompt: promoted,
+      pullRequest: null
+    });
+  }
+  if (!opts.ship) {
+    return finalize({
+      opts,
+      decision: "proposed_change",
+      startedAt,
+      now,
+      observedRunCount: observedRuns.length,
+      observedFeedbackCount: observedFeedback.length,
+      clusters: clusterReport.clusters,
+      actedOnCluster: actedOn,
+      evolution,
+      release,
+      gate,
+      promotedPrompt: promoted,
+      pullRequest: null
+    });
+  }
+  const baselineStr = toPromptString(baseline.payload);
+  const promotedStr = toPromptString(promoted);
+  const ctx = {
+    runId: opts.runId,
+    target: opts.target,
+    decision: "pr_opened",
+    clusters: clusterReport.clusters,
+    actedOnCluster: actedOn,
+    observedRunCount: observedRuns.length,
+    observedFeedbackCount: observedFeedback.length,
+    evolution,
+    release,
+    gate,
+    baselinePromptString: baselineStr,
+    promotedPromptString: promotedStr
+  };
+  const renderBody = opts.ship.renderBody ?? defaultRenderBody;
+  const renderFile = opts.ship.renderPromptFile ?? ((next, _prev) => `${next}
+`);
+  const currentFile = opts.ship.readCurrentPromptFile ? await opts.ship.readCurrentPromptFile() : null;
+  const pr = await proposeAutomatedPullRequest(opts.ship.client, {
+    repo: opts.ship.repo,
+    baseBranch: opts.ship.baseBranch ?? "main",
+    branchName: `${opts.ship.branchPrefix.replace(/\/+$/, "")}/${opts.runId}`,
+    title: `${opts.target}: production-loop prompt update (${opts.runId})`,
+    body: renderBody(ctx),
+    reviewers: opts.ship.reviewers,
+    labels: opts.ship.labels,
+    fileChanges: [
+      {
+        path: opts.ship.promptFilePath,
+        contents: renderFile(promotedStr, currentFile),
+        rationale: `Auto-improved against cluster "${actedOn.failureClass}" (${actedOn.runCount} prod failures)`
+      }
+    ],
+    dryRun: opts.ship.dryRun
+  });
+  return finalize({
+    opts,
+    decision: "pr_opened",
+    startedAt,
+    now,
+    observedRunCount: observedRuns.length,
+    observedFeedbackCount: observedFeedback.length,
+    clusters: clusterReport.clusters,
+    actedOnCluster: actedOn,
+    evolution,
+    release,
+    gate,
+    promotedPrompt: promoted,
+    pullRequest: pr
+  });
+}
+function finalize(args) {
+  return {
+    runId: args.opts.runId,
+    target: args.opts.target,
+    decision: args.decision,
+    startedAt: args.startedAt,
+    finishedAt: args.now().toISOString(),
+    observedRunCount: args.observedRunCount,
+    observedFeedbackCount: args.observedFeedbackCount,
+    clusters: args.clusters,
+    actedOnCluster: args.actedOnCluster,
+    evolution: args.evolution,
+    release: args.release,
+    gate: args.gate,
+    baselinePrompt: args.opts.evolve.baselinePrompt,
+    promotedPrompt: args.promotedPrompt,
+    pullRequest: args.pullRequest,
+    cron: args.opts.cron ?? null
+  };
+}
+function validate2(opts) {
+  if (!opts.runId.trim()) throw new ValidationError("runProductionLoop: runId required");
+  if (!opts.target.trim()) throw new ValidationError("runProductionLoop: target required");
+  if (opts.evolve.holdoutScenarios.length === 0) {
+    throw new ValidationError("runProductionLoop: evolve.holdoutScenarios must not be empty");
+  }
+  if (opts.evolve.searchScenarios && opts.evolve.searchScenarios.length === 0) {
+    throw new ValidationError(
+      "runProductionLoop: evolve.searchScenarios must be omitted or non-empty"
+    );
+  }
+  if (!opts.evolve.gate.baselineKey && !opts.evolve.baselineId) {
+  }
+  if (opts.ship) {
+    if (!opts.ship.branchPrefix.trim()) {
+      throw new ValidationError("runProductionLoop: ship.branchPrefix required");
+    }
+    if (!opts.ship.promptFilePath.trim()) {
+      throw new ValidationError("runProductionLoop: ship.promptFilePath required");
+    }
+  }
+}
+function uniqueIds(ids) {
+  const seen = /* @__PURE__ */ new Set();
+  const out = [];
+  for (const id of ids) {
+    if (seen.has(id)) continue;
+    seen.add(id);
+    out.push(id);
+  }
+  return out;
+}
+function deriveSearchScenarios(holdout) {
+  if (holdout.length < 4) {
+    return [
+      {
+        ...holdout[0],
+        id: `${holdout[0].id}__search`
+      }
+    ];
+  }
+  return holdout.filter((_, i) => i % 4 === 0).map((s) => ({ ...s, id: `${s.id}__search` }));
+}
+function syntheticRunRecord(input) {
+  const scoreKey = input.split === "holdout" ? "holdoutScore" : "searchScore";
+  return {
+    runId: input.runId,
+    experimentId: input.target,
+    candidateId: input.variant.id,
+    seed: input.seed,
+    model: "production-loop@synthetic",
+    promptHash: "0".repeat(64),
+    configHash: "0".repeat(64),
+    commitSha: "0".repeat(40),
+    wallMs: input.trial.durationMs ?? 1,
+    costUsd: input.trial.cost ?? 0,
+    tokenUsage: { input: 0, output: 0 },
+    outcome: {
+      [scoreKey]: input.trial.score,
+      raw: { score: input.trial.score, ok: input.trial.ok ? 1 : 0 }
+    },
+    splitTag: input.split,
+    scenarioId: input.scenarioId
+  };
+}
+function toPromptString(payload) {
+  if (typeof payload === "string") return payload;
+  if (payload == null) return "";
+  try {
+    return JSON.stringify(payload, null, 2);
+  } catch {
+    return String(payload);
+  }
+}
+function defaultRenderBody(ctx) {
+  const cluster = ctx.actedOnCluster;
+  const release = ctx.release;
+  const gate = ctx.gate;
+  const lines = [];
+  lines.push(`## Production-loop prompt update \u2014 \`${ctx.target}\``);
+  lines.push("");
+  lines.push(`Run id: \`${ctx.runId}\``);
+  lines.push(`Decision: \`${ctx.decision}\``);
+  lines.push(
+    `Observed in this cycle: ${ctx.observedRunCount} prod runs, ${ctx.observedFeedbackCount} feedback trajectories.`
+  );
+  lines.push("");
+  if (cluster) {
+    lines.push("### Triggering failure cluster");
+    lines.push("");
+    lines.push(`- **class**: \`${cluster.failureClass}\``);
+    lines.push(`- **runs in cluster**: ${cluster.runCount}`);
+    lines.push(`- **distinct scenarios**: ${cluster.scenarioIds.length}`);
+    if (cluster.toolName) lines.push(`- **tool**: \`${cluster.toolName}\``);
+    if (cluster.dimension) lines.push(`- **judge dimension**: \`${cluster.dimension}\``);
+    if (cluster.exampleError) {
+      lines.push(
+        `- **example error**: \`${cluster.exampleError.slice(0, 200).replace(/\n/g, " ")}\``
+      );
+    }
+    lines.push("");
+  }
+  if (gate) {
+    lines.push("### Held-out promotion gate");
+    lines.push("");
+    lines.push(`- **decision**: \`${gate.promote ? "PROMOTE" : "REJECT"}\``);
+    lines.push(`- **paired median delta**: ${gate.evidence.medianPairedDelta.toFixed(4)}`);
+    lines.push(
+      `- **paired 95% CI**: [${gate.evidence.pairedCI.low.toFixed(4)}, ${gate.evidence.pairedCI.high.toFixed(4)}]`
+    );
+    lines.push(`- **paired p-value**: ${gate.evidence.pairedPValue.toFixed(4)}`);
+    lines.push(
+      `- **search/holdout means**: ${gate.evidence.searchScore.toFixed(4)} / ${gate.evidence.holdoutScore.toFixed(4)}`
+    );
+    lines.push(`- **overfit gap**: ${gate.evidence.overfitGap.toFixed(4)}`);
+    lines.push("");
+  }
+  if (release) {
+    lines.push("### Release confidence");
+    lines.push("");
+    lines.push(`- **status**: \`${release.status}\``);
+    lines.push(`- **pass rate**: ${release.metrics.passRate.toFixed(4)}`);
+    lines.push(`- **mean score**: ${release.metrics.meanScore.toFixed(4)}`);
+    if (release.issues.length > 0) {
+      lines.push("- **issues**:");
+      for (const issue of release.issues) {
+        lines.push(`  - \`${issue.severity}\` ${issue.axis}: ${issue.detail}`);
+      }
+    }
+    lines.push("");
+  }
+  lines.push("### Prompt diff");
+  lines.push("");
+  lines.push("```diff");
+  lines.push(unifiedDiff(ctx.baselinePromptString, ctx.promotedPromptString));
+  lines.push("```");
+  return lines.join("\n");
+}
+function unifiedDiff(a, b) {
+  const aLines = a.split("\n");
+  const bLines = b.split("\n");
+  const out = [];
+  const max = Math.max(aLines.length, bLines.length);
+  for (let i = 0; i < max; i++) {
+    const al = aLines[i];
+    const bl = bLines[i];
+    if (al === bl) continue;
+    if (al !== void 0) out.push(`- ${al}`);
+    if (bl !== void 0) out.push(`+ ${bl}`);
+  }
+  return out.join("\n");
+}
 // src/registry.ts
 var ScenarioRegistry = class {
   scenarios = [];
@@ -2384,36 +3073,36 @@ var FileSystemExperimentStore = class {
     return idx.listRuns(experimentId);
   }
   async ensureDir() {
-    const fs = await import("fs/promises");
-    await fs.mkdir(this.dir, { recursive: true });
+    const fs2 = await import("fs/promises");
+    await fs2.mkdir(this.dir, { recursive: true });
   }
   async append(name, record) {
     await this.ensureDir();
-    const fs = await import("fs/promises");
+    const fs2 = await import("fs/promises");
     const path = await import("path");
     const active = path.join(this.dir, `${name}.ndjson`);
     try {
-      const stat = await fs.stat(active);
+      const stat = await fs2.stat(active);
       if (stat.size >= this.maxBytes) {
         const rolled = path.join(this.dir, `${name}.${Date.now()}.ndjson`);
-        await fs.rename(active, rolled);
+        await fs2.rename(active, rolled);
       }
     } catch {
     }
-    await fs.appendFile(active, `${JSON.stringify(record)}
+    await fs2.appendFile(active, `${JSON.stringify(record)}
 `, "utf8");
   }
   async load() {
     if (this.loaded && this.index) return this.index;
-    const fs = await import("fs/promises");
+    const fs2 = await import("fs/promises");
     const path = await import("path");
     const store = new InMemoryExperimentStore();
     try {
-      const entries = await fs.readdir(this.dir);
+      const entries = await fs2.readdir(this.dir);
       const sorted = entries.filter((f) => f.endsWith(".ndjson")).sort((a, b) => a.localeCompare(b));
       for (const file of sorted) {
         const full = path.join(this.dir, file);
-        const content = await fs.readFile(full, "utf8");
+        const content = await fs2.readFile(full, "utf8");
         const base = file.split(".")[0];
         for (const line of content.split("\n")) {
           if (!line.trim()) continue;
@@ -4374,6 +5063,218 @@ function weightedKappa(a, b) {
   if (den === 0) return 1;
   return 1 - num / den;
 }
+function continuousAgreement(scores, opts = {}) {
+  const bootstrap = opts.bootstrap ?? 1e3;
+  const weights = opts.weights ?? "quadratic";
+  const seed = opts.seed ?? 12648430;
+  const ciLevel = opts.ciLevel ?? 0.95;
+  const matrix = scores.filter((row) => row.length >= 2 && row.every((v) => Number.isFinite(v)));
+  const raters = matrix[0]?.length ?? 0;
+  const clean = matrix.filter((row) => row.length === raters);
+  const nClean = clean.length;
+  if (nClean < 2 || raters < 2) {
+    return {
+      weightedKappa: NaN,
+      icc: NaN,
+      pearson: NaN,
+      spearman: NaN,
+      ci: { icc: [NaN, NaN], weightedKappa: [NaN, NaN] },
+      n: nClean,
+      raters
+    };
+  }
+  const kappa = continuousWeightedKappa(clean, weights);
+  const icc = icc21(clean);
+  const pearson = avgPairwise(clean, pearsonR);
+  const spearman = avgPairwise(clean, spearmanR);
+  const ciIcc = [NaN, NaN];
+  const ciKappa = [NaN, NaN];
+  if (bootstrap > 0) {
+    const rng = mulberry32(seed);
+    const iccs = [];
+    const kappas = [];
+    for (let b = 0; b < bootstrap; b++) {
+      const sample = new Array(nClean);
+      for (let i = 0; i < nClean; i++) {
+        sample[i] = clean[Math.floor(rng() * nClean)];
+      }
+      const iccB = icc21(sample);
+      const kB = continuousWeightedKappa(sample, weights);
+      if (Number.isFinite(iccB)) iccs.push(iccB);
+      if (Number.isFinite(kB)) kappas.push(kB);
+    }
+    const [lo, hi] = percentileBounds(ciLevel);
+    if (iccs.length > 0) {
+      iccs.sort((a, b) => a - b);
+      ciIcc[0] = quantile(iccs, lo);
+      ciIcc[1] = quantile(iccs, hi);
+    }
+    if (kappas.length > 0) {
+      kappas.sort((a, b) => a - b);
+      ciKappa[0] = quantile(kappas, lo);
+      ciKappa[1] = quantile(kappas, hi);
+    }
+  }
+  return {
+    weightedKappa: kappa,
+    icc,
+    pearson,
+    spearman,
+    ci: { icc: ciIcc, weightedKappa: ciKappa },
+    n: nClean,
+    raters
+  };
+}
+function calibrateJudgeContinuous(golden, candidate, opts = {}) {
+  const base = calibrateJudge(golden, candidate);
+  const map = /* @__PURE__ */ new Map();
+  for (const g of golden) map.set(g.itemId, { h: g.humanScore, j: NaN });
+  for (const c of candidate) {
+    const entry = map.get(c.itemId);
+    if (entry) entry.j = c.score;
+  }
+  const rows = [];
+  for (const v of map.values()) {
+    if (Number.isFinite(v.j)) rows.push([v.h, v.j]);
+  }
+  const agreement = continuousAgreement(rows, opts);
+  return {
+    ...base,
+    weightedKappaContinuous: agreement.weightedKappa,
+    icc: agreement.icc,
+    spearman: agreement.spearman,
+    ci: agreement.ci
+  };
+}
+function continuousWeightedKappa(rows, scheme) {
+  if (rows.length === 0) return NaN;
+  const raters = rows[0].length;
+  if (raters < 2) return NaN;
+  const wFn = scheme === "linear" ? (x, y) => Math.abs(x - y) : (x, y) => (x - y) ** 2;
+  let sum2 = 0;
+  let pairs = 0;
+  for (let r1 = 0; r1 < raters; r1++) {
+    for (let r2 = r1 + 1; r2 < raters; r2++) {
+      const a = rows.map((row) => row[r1]);
+      const b = rows.map((row) => row[r2]);
+      const n = a.length;
+      let obs = 0;
+      for (let i = 0; i < n; i++) obs += wFn(a[i], b[i]);
+      obs /= n;
+      let exp = 0;
+      for (let i = 0; i < n; i++) {
+        for (let j = 0; j < n; j++) exp += wFn(a[i], b[j]);
+      }
+      exp /= n * n;
+      if (exp === 0) {
+        sum2 += obs === 0 ? 1 : 0;
+      } else {
+        sum2 += 1 - obs / exp;
+      }
+      pairs++;
+    }
+  }
+  return pairs === 0 ? NaN : sum2 / pairs;
+}
+function icc21(rows) {
+  const n = rows.length;
+  if (n < 2) return NaN;
+  const k = rows[0].length;
+  if (k < 2) return NaN;
+  const rowMeans = rows.map((row) => row.reduce((s, v) => s + v, 0) / k);
+  const colMeans = new Array(k).fill(0);
+  for (let j = 0; j < k; j++) {
+    let s = 0;
+    for (let i = 0; i < n; i++) s += rows[i][j];
+    colMeans[j] = s / n;
+  }
+  let grand = 0;
+  for (let i = 0; i < n; i++) grand += rowMeans[i];
+  grand /= n;
+  let ssR = 0;
+  for (let i = 0; i < n; i++) ssR += (rowMeans[i] - grand) ** 2;
+  ssR *= k;
+  let ssC = 0;
+  for (let j = 0; j < k; j++) ssC += (colMeans[j] - grand) ** 2;
+  ssC *= n;
+  let ssT = 0;
+  for (let i = 0; i < n; i++) {
+    for (let j = 0; j < k; j++) ssT += (rows[i][j] - grand) ** 2;
+  }
+  const ssE = ssT - ssR - ssC;
+  const dfR = n - 1;
+  const dfC = k - 1;
+  const dfE = (n - 1) * (k - 1);
+  const msR = ssR / dfR;
+  const msC = ssC / dfC;
+  const msE = dfE > 0 ? ssE / dfE : 0;
+  const denom = msR + (k - 1) * msE + k * (msC - msE) / n;
+  if (denom === 0) {
+    return msR === 0 && msE === 0 ? 1 : 0;
+  }
+  return (msR - msE) / denom;
+}
+function avgPairwise(rows, fn) {
+  const k = rows[0]?.length ?? 0;
+  if (k < 2) return NaN;
+  let sum2 = 0;
+  let pairs = 0;
+  for (let i = 0; i < k; i++) {
+    for (let j = i + 1; j < k; j++) {
+      const a = rows.map((row) => row[i]);
+      const b = rows.map((row) => row[j]);
+      const r = fn(a, b);
+      if (Number.isFinite(r)) {
+        sum2 += r;
+        pairs++;
+      }
+    }
+  }
+  return pairs === 0 ? NaN : sum2 / pairs;
+}
+function spearmanR(a, b) {
+  if (a.length !== b.length || a.length < 2) return NaN;
+  return pearsonR(rankWithTies(a), rankWithTies(b));
+}
+function rankWithTies(xs) {
+  const n = xs.length;
+  const indexed = xs.map((v, i2) => ({ v, i: i2 }));
+  indexed.sort((x, y) => x.v - y.v);
+  const ranks = new Array(n).fill(0);
+  let i = 0;
+  while (i < n) {
+    let j = i;
+    while (j + 1 < n && indexed[j + 1].v === indexed[i].v) j++;
+    const avg = (i + j) / 2 + 1;
+    for (let k = i; k <= j; k++) ranks[indexed[k].i] = avg;
+    i = j + 1;
+  }
+  return ranks;
+}
+function mulberry32(seed) {
+  let a = seed >>> 0;
+  return () => {
+    a = a + 1831565813 >>> 0;
+    let t = a;
+    t = Math.imul(t ^ t >>> 15, t | 1);
+    t ^= t + Math.imul(t ^ t >>> 7, t | 61);
+    return ((t ^ t >>> 14) >>> 0) / 4294967296;
+  };
+}
+function percentileBounds(ciLevel) {
+  const tail = (1 - ciLevel) / 2;
+  return [tail, 1 - tail];
+}
+function quantile(sorted, q) {
+  if (sorted.length === 0) return NaN;
+  if (sorted.length === 1) return sorted[0];
+  const pos = q * (sorted.length - 1);
+  const lo = Math.floor(pos);
+  const hi = Math.ceil(pos);
+  if (lo === hi) return sorted[lo];
+  const frac = pos - lo;
+  return sorted[lo] * (1 - frac) + sorted[hi] * frac;
+}
 // src/observability.ts
 async function toLangfuseEnvelope(store, runId) {
@@ -4875,7 +5776,7 @@ async function commitBisect(options) {
 }
 async function promptBisect(options) {
   const split = options.paragraphSplitter ?? ((p) => p.split(/\n\s*\n/));
-  const join3 = (paragraphs) => paragraphs.join("\n\n");
+  const join4 = (paragraphs) => paragraphs.join("\n\n");
   const goodParas = split(options.good);
   const badParas = split(options.bad);
   if (goodParas.length !== badParas.length) {
@@ -4895,7 +5796,7 @@ async function promptBisect(options) {
   const result = await bisect({
     good: goodMask,
     bad: badMask,
-    runEval: (mask) => options.runEval(join3(paragraphsFor(mask))),
+    runEval: (mask) => options.runEval(join4(paragraphsFor(mask))),
     maxIterations: options.maxIterations ?? n + 5,
     halfway: (g, b) => {
       for (let i = 0; i < g.length; i++) {
@@ -4926,12 +5827,12 @@ async function promptBisect(options) {
     }
   }
   const materializedPath = result.path.map((s) => ({
-    state: join3(paragraphsFor(s.state)),
+    state: join4(paragraphsFor(s.state)),
     score: s.score,
     pass: s.pass
   }));
   return {
-    culprit: join3(paragraphsFor(culprit)),
+    culprit: join4(paragraphsFor(culprit)),
     path: materializedPath,
     converged: result.converged,
     inputInconsistent: result.inputInconsistent,
@@ -5176,7 +6077,7 @@ async function proposeSynthesisTargets(dataset, traceStore, options = {}) {
     runCountByScenario.set(r.scenarioId, (runCountByScenario.get(r.scenarioId) ?? 0) + 1);
   }
   const runCounts = [...runCountByScenario.values()];
-  const p25 = runCounts.length > 0 ? quantile(runCounts, 0.25) : 0;
+  const p25 = runCounts.length > 0 ? quantile2(runCounts, 0.25) : 0;
   for (const s of scenarios) {
     const count = runCountByScenario.get(s.id) ?? 0;
     if (count <= p25 && count < 3) {
@@ -5230,7 +6131,7 @@ async function proposeSynthesisTargets(dataset, traceStore, options = {}) {
   }
   return targets.sort((a, b) => b.priority - a.priority).slice(0, topK);
 }
-function quantile(xs, p) {
+function quantile2(xs, p) {
   const sorted = [...xs].sort((a, b) => a - b);
   const idx = p * (sorted.length - 1);
   const lo = Math.floor(idx);
@@ -7619,6 +8520,52 @@ function createCompositeMutator(opts) {
   };
 }
+// src/discover-personas.ts
+import { promises as fs } from "fs";
+import { basename, extname, join as join3 } from "path";
+var DEFAULT_PATTERN = /^\d{2}-.+\.(yaml|yml|json|md)$/;
+async function discoverPersonas(dir, opts = {}) {
+  const pattern = opts.pattern ?? DEFAULT_PATTERN;
+  const exclude = new Set(opts.exclude ?? []);
+  const include = opts.include;
+  async function walk(d) {
+    let entries;
+    try {
+      const raw = await fs.readdir(d, { withFileTypes: true });
+      entries = raw.map((e) => ({ name: e.name, isDir: e.isDirectory() }));
+    } catch (err) {
+      const code = err.code;
+      if (code === "ENOENT") return [];
+      throw err;
+    }
+    const out = [];
+    for (const entry of entries) {
+      const full = join3(d, entry.name);
+      if (entry.isDir) {
+        if (opts.recursive) out.push(...await walk(full));
+        continue;
+      }
+      if (!pattern.test(entry.name)) continue;
+      if (exclude.has(entry.name) || exclude.has(basename(entry.name, extname(entry.name))))
+        continue;
+      if (include && include.length > 0) {
+        const id = basename(entry.name, extname(entry.name));
+        const matched = include.some((needle) => entry.name.includes(needle) || id.includes(needle));
+        if (!matched) continue;
+      }
+      out.push({
+        path: full,
+        filename: entry.name,
+        id: basename(entry.name, extname(entry.name))
+      });
+    }
+    return out;
+  }
+  const results = await walk(dir);
+  results.sort((a, b) => a.filename.localeCompare(b.filename));
+  return results;
+}
 // src/evolution-telemetry.ts
 import { appendFileSync as appendFileSync3, existsSync as existsSync5, mkdirSync as mkdirSync3, readFileSync as readFileSync4, writeFileSync } from "fs";
 import { dirname as dirname3 } from "path";
@@ -8008,6 +8955,90 @@ var JsonlTrialCache = class {
   }
 };
+// src/judge-retry.ts
+var DEFAULT_MAX_ATTEMPTS = 3;
+var DEFAULT_TIMEOUT_MS = 9e4;
+var DEFAULT_BACKOFF = (attempt) => Math.min(500 * 2 ** attempt, 16e3);
+var ABORT_PATTERNS = [
+  /AbortError/i,
+  /TimeoutError/i,
+  /fetch failed/i,
+  /ECONNRESET/i,
+  /ETIMEDOUT/i,
+  /EAI_AGAIN/i,
+  /this operation was aborted/i,
+  /stream.*ended.*unexpectedly/i,
+  /socket hang up/i
+];
+var RETRYABLE_HTTP_STATUS = /* @__PURE__ */ new Set([429, 502, 503, 504]);
+function defaultIsRetryable(err) {
+  if (err instanceof Error) {
+    if (ABORT_PATTERNS.some((p) => p.test(err.message) || p.test(err.name))) return true;
+    const status = err.status;
+    if (typeof status === "number" && RETRYABLE_HTTP_STATUS.has(status)) return true;
+  }
+  return false;
+}
+function sleep(ms) {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+async function withJudgeRetry(judgeFn, policy = {}) {
+  const maxAttempts = policy.maxAttempts ?? DEFAULT_MAX_ATTEMPTS;
+  const timeoutMs = policy.timeoutMs ?? DEFAULT_TIMEOUT_MS;
+  const backoff = policy.backoffMs ?? DEFAULT_BACKOFF;
+  const isRetryable = policy.isRetryable ?? defaultIsRetryable;
+  const models = policy.models && policy.models.length > 0 ? policy.models : [void 0];
+  let totalAttempts = 0;
+  const attemptErrors = [];
+  let lastError;
+  for (const model of models) {
+    for (let attempt = 0; attempt < maxAttempts; attempt++) {
+      totalAttempts += 1;
+      const controller = new AbortController();
+      const timer = setTimeout(() => controller.abort(new Error("TimeoutError")), timeoutMs);
+      try {
+        const value = await judgeFn(model, controller.signal);
+        clearTimeout(timer);
+        return {
+          value,
+          succeeded: true,
+          attempts: totalAttempts,
+          modelUsed: model,
+          attemptErrors
+        };
+      } catch (err) {
+        clearTimeout(timer);
+        const errObj = err instanceof Error ? err : new Error(String(err));
+        lastError = errObj;
+        attemptErrors.push({
+          attempt: totalAttempts,
+          model: model ?? "(default)",
+          error: errObj.message
+        });
+        if (!isRetryable(errObj)) {
+          return {
+            value: null,
+            succeeded: false,
+            attempts: totalAttempts,
+            error: errObj,
+            attemptErrors
+          };
+        }
+        if (attempt < maxAttempts - 1) {
+          await sleep(backoff(attempt));
+        }
+      }
+    }
+  }
+  return {
+    value: null,
+    succeeded: false,
+    attempts: totalAttempts,
+    error: lastError,
+    attemptErrors
+  };
+}
 // src/orthogonality.ts
 function passOrthogonality(input) {
   const passes = input.passes;
@@ -8225,6 +9256,55 @@ function createSandboxPool(opts) {
     utilization
   };
 }
+// src/trial-aggregator.ts
+function meanOf(xs) {
+  if (xs.length === 0) return 0;
+  return xs.reduce((a, b) => a + b, 0) / xs.length;
+}
+function meanMetrics(rows) {
+  if (rows.length === 0) return {};
+  const keys = /* @__PURE__ */ new Set();
+  for (const row of rows) for (const k of Object.keys(row)) keys.add(k);
+  const out = {};
+  for (const k of keys) {
+    const xs = rows.map((r) => r[k]).filter((x) => typeof x === "number");
+    if (xs.length > 0) out[k] = meanOf(xs);
+  }
+  return out;
+}
+function aggregateTrialsByMode(trials, opts) {
+  const gradedTrials = trials.filter((t) => !t.error);
+  const judgeOk = gradedTrials.filter((t) => t.judgeSucceeded !== false);
+  const judgeFailed = gradedTrials.filter((t) => t.judgeSucceeded === false);
+  if (opts.mode === "strict-fail" && judgeFailed.length > 0) {
+    return {
+      meanScore: 0,
+      meanCost: 0,
+      meanDurationMs: 0,
+      okRate: 0,
+      countedTrials: 0,
+      excludedFailedTrials: judgeFailed.length,
+      totalTrials: trials.length,
+      metrics: {},
+      strictFailure: {
+        failedCount: judgeFailed.length,
+        firstError: judgeFailed.find((t) => t.judgeError)?.judgeError
+      }
+    };
+  }
+  const counted = opts.mode === "exclude-failed" ? judgeOk : gradedTrials;
+  return {
+    meanScore: meanOf(counted.map((t) => t.score)),
+    meanCost: meanOf(counted.map((t) => t.cost ?? 0)),
+    meanDurationMs: meanOf(counted.map((t) => t.durationMs ?? 0)),
+    okRate: gradedTrials.length === 0 ? 0 : gradedTrials.filter((t) => t.ok).length / gradedTrials.length,
+    countedTrials: counted.length,
+    excludedFailedTrials: judgeFailed.length,
+    totalTrials: trials.length,
+    metrics: meanMetrics(counted.map((t) => t.metrics ?? {}))
+  };
+}
 export {
   AgentDriver,
   AgentEvalError,
@@ -8314,6 +9394,7 @@ export {
   adversarialJudge,
   aggregateLlm,
   aggregateRunScore,
+  aggregateTrialsByMode,
   allCriticalPassed,
   analyzeAntiSlop,
   analyzeSeries,
@@ -8336,6 +9417,7 @@ export {
   buildTrajectory,
   byteLengthRange,
   calibrateJudge,
+  calibrateJudgeContinuous,
   callLlm,
   callLlmJson,
   canaryLeakView,
@@ -8360,6 +9442,7 @@ export {
   computeToolUseMetrics,
   confidenceInterval,
   containsAll,
+  continuousAgreement,
   controlFailureClassFromVerification,
   controlRunToFeedbackTrajectory,
   controlRunToRunRecord,
@@ -8384,6 +9467,7 @@ export {
   defaultProviderRedactor,
   defaultReferenceReplayMatcher,
   deployGateLayer,
+  discoverPersonas,
   distillPlaybook,
   dominates,
   estimateCost,
@@ -8417,6 +9501,7 @@ export {
   formatDriverReport,
   formatFindings,
   gainHistogram,
+  ghCliClient,
   precision as goldenPrecision,
   gradeSemanticStatus,
   groupBy,
@@ -8424,6 +9509,7 @@ export {
   hashJson,
   hashScenarios,
   htmlContainsElement,
+  httpGithubClient,
   inMemoryReferenceReplayStore,
   inMemoryReviewStore,
   integrationAsi,
@@ -8484,6 +9570,7 @@ export {
   printDriverSummary,
   probeLlm,
   promptBisect,
+  proposeAutomatedPullRequest,
   proposeSynthesisTargets,
   providerFromBaseUrl,
   pytestTestParser,
@@ -8528,6 +9615,7 @@ export {
   runKeywordCoverageJudgeUrl,
   runLiveProof,
   runMultiShotOptimization,
+  runProductionLoop,
   runPromptEvolution,
   runProposeReview,
   runProposeReviewAsControlLoop,
@@ -8582,6 +9670,7 @@ export {
   whitespaceCollapseMutator,
   wilcoxonSignedRank,
   withAssignedFeedbackSplit,
+  withJudgeRetry,
   wranglerDeployRunner
 };
 //# sourceMappingURL=index.js.map