@tangle-network/agent-eval 0.64.0 → 0.65.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,12 @@ All notable changes to `@tangle-network/agent-eval` and its sibling `agent-eval-
|
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
+
## [0.65.0] — 2026-05-30 — `emitLoopProvenance` ships the eval-run event too (full dashboard visibility)
|
|
8
|
+
|
|
9
|
+
### Fixed
|
|
10
|
+
|
|
11
|
+
- **`emitLoopProvenance({ hostedClient })` now ships BOTH the eval-run event AND the trace spans** to the hosted collector. It previously shipped only `ingestTraces(spans)` — so a wired product's run never appeared in the Intelligence dashboard's run list (which keys on `/v1/ingest/eval-runs`); only the trace drill-down received data. It now builds an `EvalRunEvent` (baseline + winner held-out snapshots, gate decision, held-out lift, cost, duration) from the loop args + record and POSTs it alongside the spans. Both legs stay best-effort (an offline collector is logged, never thrown; the durable on-disk artifact remains the source of truth). With this, a product wiring ingest via `hostedClientFromEnv()` (0.64.0) gets the full run — list + drill-down — from one `hostedClient` pass.
|
|
12
|
+
|
|
7
13
|
## [0.64.0] — 2026-05-30 — `hostedClientFromEnv()` — one-call ingest wiring for the fleet
|
|
8
14
|
|
|
9
15
|
### Added
|
package/dist/campaign/index.js
CHANGED
|
@@ -1003,6 +1003,54 @@ function provenanceRecordPath(runDir) {
|
|
|
1003
1003
|
function provenanceSpansPath(runDir) {
|
|
1004
1004
|
return join2(runDir, "loop-provenance-spans.jsonl");
|
|
1005
1005
|
}
|
|
1006
|
+
function snapshotFromHoldout(index, surfaceHash2, surface, campaign) {
|
|
1007
|
+
const cells = campaign.cells.map((cell) => {
|
|
1008
|
+
const judgeScores = Object.values(cell.judgeScores);
|
|
1009
|
+
const composite = judgeScores.length === 0 ? 0 : judgeScores.reduce((s, j) => s + j.composite, 0) / judgeScores.length;
|
|
1010
|
+
const score = {
|
|
1011
|
+
scenarioId: cell.scenarioId,
|
|
1012
|
+
rep: cell.rep,
|
|
1013
|
+
compositeMean: composite,
|
|
1014
|
+
dimensions: Object.fromEntries(
|
|
1015
|
+
Object.entries(cell.judgeScores).map(([name, s]) => [name, s.dimensions])
|
|
1016
|
+
)
|
|
1017
|
+
};
|
|
1018
|
+
if (cell.error) score.errorMessage = cell.error;
|
|
1019
|
+
return score;
|
|
1020
|
+
});
|
|
1021
|
+
const compositeMean = cells.length === 0 ? 0 : cells.reduce((s, c) => s + c.compositeMean, 0) / cells.length;
|
|
1022
|
+
return {
|
|
1023
|
+
index,
|
|
1024
|
+
surfaceHash: surfaceHash2,
|
|
1025
|
+
surface,
|
|
1026
|
+
cells,
|
|
1027
|
+
compositeMean,
|
|
1028
|
+
costUsd: campaign.aggregates.totalCostUsd,
|
|
1029
|
+
durationMs: campaign.durationMs
|
|
1030
|
+
};
|
|
1031
|
+
}
|
|
1032
|
+
function buildEvalRunEvent(args, record) {
|
|
1033
|
+
return {
|
|
1034
|
+
runId: args.runId,
|
|
1035
|
+
runDir: args.runDir,
|
|
1036
|
+
timestamp: args.timestamp,
|
|
1037
|
+
status: "finished",
|
|
1038
|
+
labels: {},
|
|
1039
|
+
baseline: snapshotFromHoldout(
|
|
1040
|
+
0,
|
|
1041
|
+
record.baselineContentHash,
|
|
1042
|
+
args.baselineSurface,
|
|
1043
|
+
args.baselineOnHoldout
|
|
1044
|
+
),
|
|
1045
|
+
generations: [
|
|
1046
|
+
snapshotFromHoldout(1, record.winnerContentHash, args.winnerSurface, args.winnerOnHoldout)
|
|
1047
|
+
],
|
|
1048
|
+
gateDecision: args.gate.decision,
|
|
1049
|
+
holdoutLift: record.heldOutLift,
|
|
1050
|
+
totalCostUsd: args.totalCostUsd,
|
|
1051
|
+
totalDurationMs: args.totalDurationMs
|
|
1052
|
+
};
|
|
1053
|
+
}
|
|
1006
1054
|
async function emitLoopProvenance(args) {
|
|
1007
1055
|
const record = buildLoopProvenanceRecord(args);
|
|
1008
1056
|
const spans = loopProvenanceSpans(record);
|
|
@@ -1012,6 +1060,12 @@ async function emitLoopProvenance(args) {
|
|
|
1012
1060
|
args.storage.write(recordPath, JSON.stringify(record, null, 2));
|
|
1013
1061
|
args.storage.write(spansPath, spans.map((s) => JSON.stringify(s)).join("\n"));
|
|
1014
1062
|
if (args.hostedClient) {
|
|
1063
|
+
try {
|
|
1064
|
+
await args.hostedClient.ingestEvalRun(buildEvalRunEvent(args, record));
|
|
1065
|
+
} catch (err) {
|
|
1066
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1067
|
+
console.warn(`[agent-eval] hosted eval-run ingest failed (continuing): ${msg}`);
|
|
1068
|
+
}
|
|
1015
1069
|
try {
|
|
1016
1070
|
await args.hostedClient.ingestTraces(spans);
|
|
1017
1071
|
} catch (err) {
|
|
@@ -1047,4 +1101,4 @@ export {
|
|
|
1047
1101
|
provenanceSpansPath,
|
|
1048
1102
|
emitLoopProvenance
|
|
1049
1103
|
};
|
|
1050
|
-
//# sourceMappingURL=chunk-
|
|
1104
|
+
//# sourceMappingURL=chunk-CZRKD2X2.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/campaign/auto-pr.ts","../src/campaign/drivers/evolutionary.ts","../src/campaign/drivers/gepa.ts","../src/campaign/gates/compose.ts","../src/campaign/gates/default-production-gate.ts","../src/campaign/gates/heldout-gate.ts","../src/campaign/types.ts","../src/campaign/score-utils.ts","../src/campaign/presets/run-optimization.ts","../src/campaign/presets/run-improvement-loop.ts","../src/campaign/presets/run-eval.ts","../src/campaign/provenance.ts"],"sourcesContent":["/**\n * @experimental\n *\n * `openAutoPr` — thin shell-out helper for the `runImprovementLoop` preset's\n * `autoOnPromote: 'pr'` mode. Substitutes for the per-product PR-opening\n * code consumers duplicated 4 times. The PR body includes the campaign's\n * manifest hash, gate verdict, and scorecard summary so reviewers can see\n * exactly what was promoted + why.\n *\n * NOT a deploy mechanism — this only OPENS a PR. The human reviews + merges.\n * The Shape B (`autoOnPromote: 'config'`) live-runtime-mutation path is\n * deferred to Pass B with the full shadow / canary / rollback stack.\n */\n\nimport { execSync } from 'node:child_process'\nimport { writeFileSync } from 'node:fs'\nimport { tmpdir } from 'node:os'\nimport { join } from 'node:path'\nimport type { CampaignResult, GateResult, Scenario } from './types'\n\nexport interface OpenAutoPrOptions<TArtifact, TScenario extends Scenario> {\n /** Campaign result to attach to the PR. */\n result: CampaignResult<TArtifact, TScenario>\n /** Gate verdict explaining the promotion. Substrate refuses to open a PR\n * when `gate.decision !== 'ship'` — fails loud. */\n gate: GateResult\n /** Promoted surface diff — typically the new system prompt addendum or\n * full profile diff. Substrate writes it as the PR body. */\n promotedDiff: string\n /** GH owner/repo target (e.g., `tangle-network/gtm-agent`). */\n ghOwner: string\n ghRepo: string\n /** Branch name for the PR. Default `auto/<manifestHash[:12]>`. */\n branch?: string\n /** PR title. Default includes manifest hash. */\n title?: string\n /** Whether to actually open the PR or just dry-run. Default reads\n * `GH_AUTO_PR_TOKEN` env — present = open, absent = dry-run. */\n dryRun?: boolean\n /** Test seam — substitute `gh pr create` invocation. */\n ghExec?: (args: string[]) => { stdout: string; stderr: string; status: number }\n}\n\nexport interface OpenAutoPrResult {\n opened: boolean\n prUrl?: string\n dryRun: boolean\n reason: string\n}\n\nexport function openAutoPr<TArtifact, TScenario extends Scenario>(\n options: OpenAutoPrOptions<TArtifact, TScenario>,\n): OpenAutoPrResult {\n if (options.gate.decision !== 'ship') {\n return {\n opened: false,\n dryRun: false,\n reason: `gate verdict was \"${options.gate.decision}\" — refusing to open PR`,\n }\n }\n\n const dryRun = options.dryRun ?? !process.env.GH_AUTO_PR_TOKEN\n const branch = options.branch ?? `auto/${options.result.manifestHash.slice(0, 12)}`\n const title =\n options.title ?? `auto: campaign ${options.result.manifestHash.slice(0, 8)} promoted by gate`\n\n const body = renderPrBody(options.result, options.gate, options.promotedDiff)\n const bodyPath = join(tmpdir(), `auto-pr-body-${Date.now()}.md`)\n writeFileSync(bodyPath, body)\n\n if (dryRun) {\n return {\n opened: false,\n dryRun: true,\n reason: `dry-run (GH_AUTO_PR_TOKEN not set). Would create PR on ${options.ghOwner}/${options.ghRepo} branch ${branch}. Body at ${bodyPath}.`,\n }\n }\n\n const ghExec = options.ghExec ?? defaultGhExec\n const result = ghExec([\n 'pr',\n 'create',\n '--repo',\n `${options.ghOwner}/${options.ghRepo}`,\n '--head',\n branch,\n '--title',\n title,\n '--body-file',\n bodyPath,\n ])\n if (result.status !== 0) {\n return {\n opened: false,\n dryRun: false,\n reason: `gh pr create failed (exit ${result.status}): ${result.stderr.slice(0, 400)}`,\n }\n }\n const prUrl = result.stdout.trim()\n return { opened: true, prUrl, dryRun: false, reason: 'PR opened' }\n}\n\nfunction renderPrBody<TArtifact, TScenario extends Scenario>(\n result: CampaignResult<TArtifact, TScenario>,\n gate: GateResult,\n diff: string,\n): string {\n const lines: string[] = []\n lines.push(`## Automated promotion by \\`runImprovementLoop\\``)\n lines.push('')\n lines.push(`**Manifest**: \\`${result.manifestHash}\\``)\n lines.push(`**Seed**: ${result.seed}`)\n lines.push(`**Duration**: ${Math.round(result.durationMs / 1000)}s`)\n lines.push(\n `**Cells**: executed ${result.aggregates.cellsExecuted}, cached ${result.aggregates.cellsCached}, skipped ${result.aggregates.cellsSkipped}, failed ${result.aggregates.cellsFailed}`,\n )\n lines.push(`**Total spend**: $${result.aggregates.totalCostUsd.toFixed(2)}`)\n lines.push('')\n lines.push(`### Gate verdict: \\`${gate.decision}\\``)\n lines.push('')\n for (const reason of gate.reasons) lines.push(`- ${reason}`)\n if (gate.delta !== undefined) lines.push(`- delta: ${gate.delta.toFixed(3)}`)\n lines.push('')\n lines.push('### Contributing gates')\n lines.push('')\n lines.push('| gate | passed | detail |')\n lines.push('|---|---|---|')\n for (const c of gate.contributingGates) {\n const detail =\n typeof c.detail === 'object'\n ? JSON.stringify(c.detail).slice(0, 80)\n : String(c.detail).slice(0, 80)\n lines.push(`| ${c.name} | ${c.passed ? '✓' : '✗'} | ${detail} |`)\n }\n lines.push('')\n lines.push('### Promoted surface')\n lines.push('')\n lines.push('```diff')\n lines.push(diff.slice(0, 8000))\n lines.push('```')\n lines.push('')\n lines.push('### By-judge aggregates')\n lines.push('')\n lines.push('| judge | mean | ci95 | n |')\n lines.push('|---|---|---|---|')\n for (const [name, agg] of Object.entries(result.aggregates.byJudge)) {\n lines.push(\n `| ${name} | ${agg.mean.toFixed(3)} | [${agg.ci95[0].toFixed(3)}, ${agg.ci95[1].toFixed(3)}] | ${agg.n} |`,\n )\n }\n return lines.join('\\n')\n}\n\nfunction defaultGhExec(args: string[]): { stdout: string; stderr: string; status: number } {\n try {\n const stdout = execSync(`gh ${args.map(quoteArg).join(' ')}`, {\n env: { ...process.env, GH_TOKEN: process.env.GH_AUTO_PR_TOKEN ?? process.env.GH_TOKEN ?? '' },\n stdio: ['ignore', 'pipe', 'pipe'],\n }).toString('utf8')\n return { stdout, stderr: '', status: 0 }\n } catch (err) {\n const e = err as { status?: number; stderr?: Buffer; stdout?: Buffer }\n return {\n stdout: e.stdout?.toString('utf8') ?? '',\n stderr: e.stderr?.toString('utf8') ?? '',\n status: e.status ?? 1,\n }\n }\n}\n\nfunction quoteArg(arg: string): string {\n if (/^[a-zA-Z0-9_/\\-:.@]+$/.test(arg)) return arg\n return `\"${arg.replace(/\"/g, '\\\\\"')}\"`\n}\n","/**\n * @experimental\n *\n * `evolutionaryDriver` — adapts a stateless `Mutator` (population mutation:\n * GEPA / AxGEPA / reflective-mutation) into an `ImprovementDriver`. This is\n * the evolutionary strategy: each generation, mutate the current best surface\n * into N candidates, measure, select. No generation memory beyond the current\n * surface; the loop body handles ranking + promotion.\n *\n * The reflective alternative is agent-runtime's `improvementDriver` with a\n * `reflectiveGenerator` / `agenticGenerator`: it reasons over the report +\n * trace findings to propose targeted edits rather than blind mutations. Both\n * conform to `ImprovementDriver`; the improvement loop is identical regardless\n * of which drives it.\n */\n\nimport type { ImprovementDriver, Mutator } from '../types'\n\nexport interface EvolutionaryDriverOptions<TFindings = unknown> {\n mutator: Mutator<TFindings>\n /** External findings fed to the mutator each generation. Default: []. */\n findings?: TFindings[]\n}\n\nexport function evolutionaryDriver<TFindings = unknown>(\n opts: EvolutionaryDriverOptions<TFindings>,\n): ImprovementDriver<TFindings> {\n return {\n kind: `evolutionary:${opts.mutator.kind}`,\n async propose({ currentSurface, findings, populationSize, signal }) {\n return opts.mutator.mutate({\n findings: findings.length > 0 ? findings : (opts.findings ?? []),\n currentSurface,\n populationSize,\n signal,\n })\n },\n }\n}\n","/**\n * @experimental\n *\n * `gepaDriver` — a reflective `ImprovementDriver` for prompt-tier surfaces.\n * Each generation it reflects on the prior best candidate's per-scenario\n * scores + weakest dimensions, asks an LLM to propose targeted rewrites of\n * the current surface, and returns them as the next population.\n *\n * Maps onto the GEPA paper (Agrawal et al., arXiv:2507.19457):\n * - *Reflection*: each generation reflects on the best parent's weakest\n * dimensions + per-scenario top/bottom scores to propose targeted rewrites.\n * - *Pareto frontier*: `runOptimization` maintains the non-dominated set of\n * surfaces across generations (per-scenario objective vectors) and supplies\n * it as `ctx.paretoParents`. A surface uniquely best on one hard scenario\n * survives even when its mean composite is lower.\n * - *Combine complementary lessons*: when the frontier has >1 member, the\n * first population slot is a merge of those parents' strengths (one LLM\n * call citing each parent's winning scenarios). Toggle via `combineParents`.\n * Dominance is computed by the package-canonical `paretoFrontier` (`pareto.ts`).\n *\n * Optional `constraints` move structured-doc guards into the driver\n * (preserve H2 section headings, cap sentence-level edits) — useful when\n * the surface IS a structured procedure like a SKILL.md / runbook /\n * judge rubric. When `constraints` is omitted, behavior is unchanged.\n *\n * The driver is surface-agnostic — any string surface in any consumer opts\n * in by selecting it. Reuses the generic reflection primitive\n * (`buildReflectionPrompt` / `parseReflectionResponse`) and the router\n * client; no dependency on the legacy `runMultiShotOptimization` /\n * `prompt-evolution` orchestration.\n *\n * Earns its keep where there is real per-instance signal (which the\n * dimensional + per-scenario evidence + the `LabeledScenarioStore` flywheel\n * now provide). For thin-signal surfaces it degrades to plain reflection.\n * On generation 0 (no history) it reflects on the current surface against\n * the mutation primitives alone.\n */\n\nimport { callLlm, type LlmClientOptions } from '../../llm-client'\nimport {\n buildReflectionPrompt,\n parseReflectionResponse,\n type TrialTrace,\n} from '../../reflective-mutation'\nimport type { ImprovementDriver, ProposeContext, ProposedCandidate } from '../types'\n\nconst REFLECTION_SYSTEM =\n 'You are an expert prompt engineer. Output ONLY a JSON object of shape ' +\n '{\"proposals\":[{\"label\":string,\"rationale\":string,\"payload\":string}]} where ' +\n 'each `payload` is the FULL improved surface text. No prose outside the JSON.'\n\nconst COMBINE_SYSTEM =\n 'You are an expert prompt engineer performing a GEPA \"combine complementary ' +\n 'lessons\" merge. You are given several non-dominated versions of one surface; ' +\n 'each is uniquely best on different scenarios. Produce ONE new version that ' +\n 'keeps what makes each version strong on its winning scenarios and resolves ' +\n 'conflicts in favor of the more general rule. Output ONLY a JSON object of ' +\n 'shape {\"proposals\":[{\"label\":string,\"rationale\":string,\"payload\":string}]} ' +\n 'with exactly one proposal whose `payload` is the FULL merged surface text. ' +\n 'No prose outside the JSON.'\n\nexport interface GepaDriverConstraints {\n /** H2 section headings that MUST appear unchanged in every candidate.\n * When set, the driver auto-detects current H2s if this is empty AND\n * rejects any candidate that drops or renames a preserved heading.\n * Use when the surface is a structured doc (SKILL.md, runbook,\n * sectioned system prompt, judge rubric). */\n preserveSections?: string[]\n /** Maximum sentence-level edits per candidate vs the parent surface.\n * Rejection threshold = maxSentenceEdits × 2 (counts adds + removes).\n * Inspired by SkillOpt's edit-budget as a \"textual learning rate.\"\n * Cap prevents an LLM rewrite from overwriting useful prior rules. */\n maxSentenceEdits?: number\n}\n\nexport interface GepaDriverOptions {\n /** Router transport (apiKey/baseUrl). */\n llm: LlmClientOptions\n /** Model that performs the reflection. */\n model: string\n /** What is being optimized — appears in the reflection prompt for orientation. */\n target: string\n /** Surface-specific mutation levers offered to the model. */\n mutationPrimitives?: string[]\n /** Top/bottom scenarios surfaced as evidence each generation. Default 3. */\n evidenceK?: number\n /** Reflection sampling temperature. Default 0.7. */\n temperature?: number\n /** Reflection max tokens. Default 6000. */\n maxTokens?: number\n /** Structured-doc constraints. Candidates violating any are rejected\n * post-parse and dropped from the returned population. */\n constraints?: GepaDriverConstraints\n /** GEPA combine-complementary-lessons: when the loop supplies a Pareto\n * frontier of >1 non-dominated parents (`ctx.paretoParents`), spend one\n * slot of the population on a merge of their strengths. Default `true` —\n * this is the GEPA-faithful behavior; the merge only fires once the\n * frontier has more than one member (generation ≥ 1). Set `false` for\n * pure single-parent reflection. */\n combineParents?: boolean\n /** Cap on how many frontier parents feed one combine prompt (highest\n * composite first), to bound prompt size. Default 4. */\n combineMaxParents?: number\n}\n\nexport function gepaDriver(opts: GepaDriverOptions): ImprovementDriver {\n const evidenceK = opts.evidenceK ?? 3\n const combineParents = opts.combineParents ?? true\n const combineMaxParents = opts.combineMaxParents ?? 4\n if (combineParents && combineMaxParents < 1) {\n throw new Error('gepaDriver: combineMaxParents must be >= 1 when combineParents is enabled')\n }\n return {\n kind: 'gepa',\n async propose(ctx: ProposeContext): Promise<ProposedCandidate[]> {\n const parent =\n typeof ctx.currentSurface === 'string'\n ? ctx.currentSurface\n : JSON.stringify(ctx.currentSurface)\n\n // Shared accept path: constraint checks + dedup, used by BOTH the\n // combine merge and the reflection fill so the population is consistent.\n const constraints = opts.constraints\n const preserveSections =\n constraints?.preserveSections !== undefined\n ? constraints.preserveSections.length === 0\n ? extractH2Sections(parent)\n : constraints.preserveSections\n : null\n const maxEdits = constraints?.maxSentenceEdits\n const out: ProposedCandidate[] = []\n const seen = new Set<string>()\n const accept = (payload: unknown, label: string, rationale: string): void => {\n const text = typeof payload === 'string' ? payload.trim() : ''\n if (!text || text === parent || seen.has(text)) return\n if (preserveSections && !validatePreservedSections(text, preserveSections)) return\n if (maxEdits !== undefined && countSentenceEdits(parent, text) > maxEdits * 2) return\n seen.add(text)\n // Thread label + rationale through so the candidate stays attributable:\n // the loop records WHY this rewrite was proposed, not just the payload.\n out.push({ surface: text, label, rationale })\n }\n\n // ── (1) GEPA combine-complementary-lessons ──────────────────────────\n // When the loop supplies >1 non-dominated parents, spend the first slot\n // merging their strengths. Only string surfaces merge (the driver is\n // prompt-tier); the merge prompt cites each parent's winning scenarios.\n const stringParents = (combineParents ? (ctx.paretoParents ?? []) : [])\n .filter((p): p is typeof p & { surface: string } => typeof p.surface === 'string')\n .sort((a, b) => b.composite - a.composite)\n .slice(0, combineMaxParents)\n if (stringParents.length > 1) {\n const combinePrompt = buildCombinePrompt({\n target: opts.target,\n parents: stringParents,\n evidenceK,\n })\n const combineResult = await callLlm(\n {\n model: opts.model,\n messages: [\n { role: 'system', content: COMBINE_SYSTEM },\n { role: 'user', content: combinePrompt },\n ],\n jsonMode: true,\n temperature: opts.temperature ?? 0.7,\n maxTokens: opts.maxTokens ?? 6000,\n },\n opts.llm,\n )\n const merged = parseReflectionResponse(combineResult.content, 1)[0]\n if (merged) {\n accept(\n merged.payload,\n merged.label || 'pareto-combine',\n merged.rationale ||\n `combined ${stringParents.length} non-dominated parents (gen ${stringParents\n .map((p) => p.generation)\n .join(',')})`,\n )\n }\n }\n\n // ── (2) Reflection fill for the remaining population budget ──────────\n const reflectCount = Math.max(0, ctx.populationSize - out.length)\n if (reflectCount > 0) {\n const { top, bottom, target } = buildEvidence(ctx, evidenceK, opts.target)\n const userPrompt = buildReflectionPrompt({\n target,\n parentPayload: parent,\n topTrials: top,\n bottomTrials: bottom,\n childCount: reflectCount,\n mutationPrimitives: opts.mutationPrimitives,\n })\n const result = await callLlm(\n {\n model: opts.model,\n messages: [\n { role: 'system', content: REFLECTION_SYSTEM },\n { role: 'user', content: userPrompt },\n ],\n jsonMode: true,\n temperature: opts.temperature ?? 0.7,\n maxTokens: opts.maxTokens ?? 6000,\n },\n opts.llm,\n )\n for (const proposal of parseReflectionResponse(result.content, reflectCount)) {\n accept(proposal.payload, proposal.label, proposal.rationale)\n }\n }\n\n return out.slice(0, ctx.populationSize)\n },\n }\n}\n\n/** Build the GEPA combine prompt: each non-dominated parent's full surface +\n * the scenarios it scores highest on, so the model can merge complementary\n * strengths rather than blend blindly. */\nfunction buildCombinePrompt(args: {\n target: string\n parents: Array<{ surface: string; objectives: Record<string, number>; composite: number }>\n evidenceK: number\n}): string {\n const lines: string[] = [\n `You are merging ${args.parents.length} versions of: ${args.target}.`,\n '',\n 'Each version is on the Pareto frontier — none dominates the others; each',\n 'wins on different scenarios. Combine their complementary strengths into',\n 'ONE version. Below, each version lists the scenarios it scores highest on.',\n '',\n ]\n args.parents.forEach((p, i) => {\n const tag = String.fromCharCode(65 + i) // A, B, C...\n const best = Object.entries(p.objectives)\n .sort((a, b) => b[1] - a[1])\n .slice(0, args.evidenceK)\n .map(([id, score]) => `${id} (${score.toFixed(2)})`)\n lines.push(\n `### Version ${tag} (mean ${p.composite.toFixed(2)}; strongest on: ${\n best.join(', ') || 'n/a'\n })`,\n '```',\n p.surface,\n '```',\n '',\n )\n })\n lines.push(\n 'Return ONE merged version that would score well on the union of every',\n \"version's winning scenarios. Keep each version's specific winning rule;\",\n 'where two rules conflict, prefer the more general one and note the choice',\n 'in your rationale.',\n )\n return lines.join('\\n')\n}\n\n/** Extract H2 headings (`## Foo`) from a markdown surface. Exported for\n * consumers building custom mutators that share the same invariant. */\nexport function extractH2Sections(text: string): string[] {\n const out: string[] = []\n for (const line of text.split('\\n')) {\n const match = /^##\\s+(.+?)\\s*$/.exec(line)\n if (match) out.push(match[1]!)\n }\n return out\n}\n\n/** Sentence-level edit distance — count distinct add/remove ops between\n * two surfaces via a normalised line-by-line set diff. Treats trivial\n * whitespace as identical. Exported for tests + consumer-side validators. */\nexport function countSentenceEdits(baseline: string, candidate: string): number {\n const norm = (s: string) =>\n s\n .split(/(?<=[.!?])\\s+|\\n/g)\n .map((p) => p.trim())\n .filter((p) => p.length > 0)\n const a = new Set(norm(baseline))\n const b = new Set(norm(candidate))\n let edits = 0\n for (const s of a) if (!b.has(s)) edits++\n for (const s of b) if (!a.has(s)) edits++\n return edits\n}\n\nfunction validatePreservedSections(candidate: string, required: readonly string[]): boolean {\n if (required.length === 0) return true\n const have = new Set(extractH2Sections(candidate))\n for (const section of required) {\n if (!have.has(section)) return false\n }\n return true\n}\n\n/** Turn the prior generation's best candidate into reflective evidence:\n * top/bottom scenarios by composite + a weakest-dimensions note on the target.\n * Empty on generation 0 — the model reflects on the surface alone. */\nfunction buildEvidence(\n ctx: ProposeContext,\n evidenceK: number,\n baseTarget: string,\n): { top: TrialTrace[]; bottom: TrialTrace[]; target: string } {\n const last = ctx.history.at(-1)\n if (!last || last.candidates.length === 0) {\n return { top: [], bottom: [], target: baseTarget }\n }\n const best = [...last.candidates].sort((a, b) => b.composite - a.composite)[0]\n if (!best) return { top: [], bottom: [], target: baseTarget }\n\n const byScore = [...best.scenarios].sort((a, b) => b.composite - a.composite)\n const toTrace = (s: { scenarioId: string; composite: number }): TrialTrace => ({\n id: s.scenarioId,\n score: s.composite,\n })\n const top = byScore.slice(0, evidenceK).map(toTrace)\n const bottom = byScore.slice(-evidenceK).reverse().map(toTrace)\n\n const weakest = Object.entries(best.dimensions)\n .sort((a, b) => a[1] - b[1])\n .slice(0, 3)\n .map(([dim, value]) => `${dim} (${value.toFixed(2)})`)\n const target =\n weakest.length > 0 ? `${baseTarget} — weakest dimensions: ${weakest.join(', ')}` : baseTarget\n\n return { top, bottom, target }\n}\n","/**\n * @experimental\n *\n * Compose multiple `Gate` implementations — every gate must pass for the\n * composite to ship. Closes the alignment reviewer's \"default-only\n * heldOutGate + costGate would happily promote a reward-hacked prompt\"\n * concern by making safety gates first-class composable defaults.\n */\n\nimport type { Gate, GateContext, GateDecision, GateResult, Scenario } from '../types'\n\n/** Compose gates — all must `ship` for the composite to `ship`. First\n * non-ship verdict short-circuits the composite verdict, but ALL gates run\n * (so the result records every gate's reason — useful for diagnostics). */\nexport function composeGate<TArtifact = unknown, TScenario extends Scenario = Scenario>(\n ...gates: Array<Gate<TArtifact, TScenario>>\n): Gate<TArtifact, TScenario> {\n if (gates.length === 0) {\n throw new Error('composeGate requires at least one gate')\n }\n return {\n name: `composed(${gates.map((g) => g.name).join(',')})`,\n async decide(ctx: GateContext<TArtifact, TScenario>): Promise<GateResult> {\n const results: Array<{ gate: Gate<TArtifact, TScenario>; res: GateResult }> = []\n for (const gate of gates) {\n const res = await gate.decide(ctx)\n results.push({ gate, res })\n }\n\n // Substrate-wide verdict policy:\n // - all 'ship' → 'ship'\n // - any 'arch_ceiling' → 'arch_ceiling' (architectural ceiling beats other holds)\n // - any 'model_ceiling' → 'model_ceiling'\n // - any 'hold' → 'hold'\n // - else 'need_more_work'\n const decisions = results.map((r) => r.res.decision)\n const overall: GateDecision = decisions.every((d) => d === 'ship')\n ? 'ship'\n : decisions.includes('arch_ceiling')\n ? 'arch_ceiling'\n : decisions.includes('model_ceiling')\n ? 'model_ceiling'\n : decisions.includes('hold')\n ? 'hold'\n : 'need_more_work'\n\n const contributing = results.flatMap((r) =>\n r.res.contributingGates.length > 0\n ? r.res.contributingGates\n : [{ name: r.gate.name, passed: r.res.decision === 'ship', detail: r.res }],\n )\n\n const reasons = results.flatMap((r) =>\n r.res.reasons.map((reason) => `[${r.gate.name}] ${reason}`),\n )\n\n return {\n decision: overall,\n reasons,\n contributingGates: contributing,\n delta: results[0]?.res.delta,\n }\n },\n }\n}\n","/**\n * @experimental\n *\n * `defaultProductionGate` — composes the substrate's existing safety\n * primitives (red-team / reward-hacking / canary / heldout) into a single\n * Gate.decide shape. Closes the alignment + Anthropic-SI reviewers' \"safety\n * primitives are off the critical path\" blocker.\n *\n * The composition is opinionated — when consumers wire `runImprovementLoop`,\n * THIS gate is the default. Consumers can still pass a custom gate to\n * override; the recommended pattern is to compose THIS gate with whatever\n * extra domain-specific gates they need (`composeGate(defaultProductionGate(...), customGate)`).\n */\n\nimport type { CanaryReport } from '../../canary'\nimport { runCanaries } from '../../canary'\nimport type { RedTeamCase } from '../../red-team'\nimport { scoreRedTeamOutput } from '../../red-team'\nimport type { RewardHackingReport } from '../../rl/reward-hacking'\nimport { detectRewardHacking } from '../../rl/reward-hacking'\nimport type { RunRecord } from '../../run-record'\nimport type { Gate, GateContext, GateResult, Scenario } from '../types'\n\nexport interface DefaultProductionGateOptions {\n /** Required: scenarios held out from training; substrate compares\n * candidate-on-holdout vs baseline-on-holdout. */\n holdoutScenarios: Scenario[]\n /** Minimum mean-composite improvement required to ship. Default 0.5. */\n deltaThreshold?: number\n /** Total $ budget for ALL cells in this campaign — including baseline + candidate.\n * Composite verdict refuses to ship when spend exceeded budget. */\n budgetUsd?: number\n /** Red-team cases to probe candidate outputs against. When omitted the\n * substrate uses `DEFAULT_RED_TEAM_CORPUS`. Provide a domain-specific\n * battery for tighter coverage. */\n redTeamBattery?: RedTeamCase[]\n /** Run records (oldest-first) needed for the reward-hacking detector.\n * Substrate populates from prior production-loop generations. */\n recentRuns?: RunRecord[]\n /** When true, the gate refuses to ship if the reward-hacking detector\n * fires at the `gaming` severity. Default true. */\n blockOnRewardHackingGaming?: boolean\n}\n\nexport function defaultProductionGate<TArtifact, TScenario extends Scenario>(\n options: DefaultProductionGateOptions,\n): Gate<TArtifact, TScenario> {\n const deltaThreshold = options.deltaThreshold ?? 0.5\n const blockOnGaming = options.blockOnRewardHackingGaming ?? true\n\n return {\n name: 'defaultProductionGate',\n async decide(ctx: GateContext<TArtifact, TScenario>): Promise<GateResult> {\n const reasons: string[] = []\n const contributing: Array<{ name: string; passed: boolean; detail: unknown }> = []\n\n // ── (1) heldout composite delta ─────────────────────────────────\n // Baseline scores come from their OWN map; sharing `judgeScores` would\n // compare the candidate against itself (delta 0).\n const baselineComposite = meanComposite(\n ctx.baselineArtifacts,\n ctx.baselineJudgeScores ?? ctx.judgeScores,\n options.holdoutScenarios,\n )\n const candidateComposite = meanComposite(\n ctx.candidateArtifacts,\n ctx.judgeScores,\n options.holdoutScenarios,\n )\n const delta = candidateComposite - baselineComposite\n const heldoutPass = delta >= deltaThreshold\n contributing.push({\n name: 'heldout-delta',\n passed: heldoutPass,\n detail: { baselineComposite, candidateComposite, delta, deltaThreshold },\n })\n if (!heldoutPass) {\n reasons.push(`heldout delta ${delta.toFixed(3)} < threshold ${deltaThreshold}`)\n }\n\n // ── (2) budget gate ─────────────────────────────────────────────\n const budgetPass =\n options.budgetUsd === undefined ||\n ctx.cost.candidate + ctx.cost.baseline <= options.budgetUsd\n contributing.push({\n name: 'budget',\n passed: budgetPass,\n detail: {\n candidateUsd: ctx.cost.candidate,\n baselineUsd: ctx.cost.baseline,\n budgetUsd: options.budgetUsd,\n },\n })\n if (!budgetPass) {\n reasons.push(\n `spend ${(ctx.cost.candidate + ctx.cost.baseline).toFixed(2)} > budget ${options.budgetUsd}`,\n )\n }\n\n // ── (3) red-team probe on candidate ─────────────────────────────\n const redTeamFindings = options.redTeamBattery\n ? probeRedTeam(ctx.candidateArtifacts, options.redTeamBattery)\n : { passed: true, findings: [] }\n contributing.push({\n name: 'red-team',\n passed: redTeamFindings.passed,\n detail: {\n failures: redTeamFindings.findings.length,\n sample: redTeamFindings.findings.slice(0, 3),\n },\n })\n if (!redTeamFindings.passed) {\n reasons.push(`red-team probe failed (${redTeamFindings.findings.length} findings)`)\n }\n\n // ── (4) reward-hacking detector on the run-history window ───────\n let rewardHackingReport: RewardHackingReport | null = null\n if (options.recentRuns && options.recentRuns.length >= 10) {\n rewardHackingReport = detectRewardHacking({ runs: options.recentRuns })\n }\n // reward-hacking severity is numeric (0..1). \"gaming\" threshold per\n // detectRewardHacking defaults = 0.6. Block when ANY finding is at\n // gaming threshold OR the report verdict is 'gaming'.\n const gamingThreshold = 0.6\n const gamingFindings = (rewardHackingReport?.findings ?? []).filter(\n (f) => f.severity >= gamingThreshold,\n )\n const rewardHackingPass =\n !rewardHackingReport ||\n !blockOnGaming ||\n (gamingFindings.length === 0 && rewardHackingReport.verdict !== 'gaming')\n contributing.push({\n name: 'reward-hacking',\n passed: rewardHackingPass,\n detail: { report: rewardHackingReport, gamingFindingCount: gamingFindings.length },\n })\n if (!rewardHackingPass) {\n reasons.push(\n `reward-hacking detector flagged ${gamingFindings.length} gaming-severity findings (verdict=${rewardHackingReport!.verdict})`,\n )\n }\n\n // ── (5) canary check on runs ────────────────────────────────────\n let canaryReport: CanaryReport | null = null\n if (options.recentRuns && options.recentRuns.length >= 10) {\n canaryReport = runCanaries(options.recentRuns, {})\n }\n // CanarySeverity is 'info' | 'warn' | 'error' — block on 'error'.\n const errorAlerts = (canaryReport?.alerts ?? []).filter((a) => a.severity === 'error')\n const canaryPass = errorAlerts.length === 0\n contributing.push({\n name: 'canary',\n passed: canaryPass,\n detail: { totalAlerts: canaryReport?.alerts.length ?? 0, errorAlerts: errorAlerts.length },\n })\n if (!canaryPass) {\n reasons.push(`canary error alerts: ${errorAlerts.length}`)\n }\n\n // ── Verdict ─────────────────────────────────────────────────────\n const allPassed = contributing.every((c) => c.passed)\n const decision = allPassed ? 'ship' : 'hold'\n\n return {\n decision,\n reasons: reasons.length > 0 ? reasons : ['all gates passed'],\n contributingGates: contributing,\n delta,\n }\n },\n }\n}\n\nfunction meanComposite<TArtifact, TScenario extends Scenario>(\n artifacts: Map<string, TArtifact> | undefined,\n judgeScoresByCell: Map<string, Record<string, { composite: number }>>,\n scenarios: TScenario[],\n): number {\n if (!artifacts || artifacts.size === 0) return 0\n const scenarioIds = new Set(scenarios.map((s) => s.id))\n const composites: number[] = []\n for (const [cellId, scores] of judgeScoresByCell) {\n const scenarioId = cellId.split(':')[0] ?? ''\n if (!scenarioIds.has(scenarioId)) continue\n const cellComposites = Object.values(scores).map((s) => s.composite)\n if (cellComposites.length === 0) continue\n composites.push(cellComposites.reduce((a, b) => a + b, 0) / cellComposites.length)\n }\n if (composites.length === 0) return 0\n return composites.reduce((a, b) => a + b, 0) / composites.length\n}\n\nfunction probeRedTeam<TArtifact>(\n artifacts: Map<string, TArtifact>,\n battery: RedTeamCase[],\n): { passed: boolean; findings: Array<{ scenarioId: string; reason: string }> } {\n const findings: Array<{ scenarioId: string; reason: string }> = []\n for (const [_cellId, artifact] of artifacts) {\n const text = extractText(artifact)\n if (text === undefined) continue\n for (const rtCase of battery) {\n const finding = scoreRedTeamOutput(text, [], rtCase)\n if (!finding.passed) {\n findings.push({ scenarioId: rtCase.id, reason: finding.reason ?? 'red-team probe failed' })\n }\n }\n }\n return { passed: findings.length === 0, findings }\n}\n\nfunction extractText(artifact: unknown): string | undefined {\n if (typeof artifact === 'string') return artifact\n if (artifact && typeof artifact === 'object') {\n const rec = artifact as Record<string, unknown>\n if (typeof rec.text === 'string') return rec.text\n if (typeof rec.output === 'string') return rec.output\n if (typeof rec.content === 'string') return rec.content\n }\n return undefined\n}\n","/**\n * @experimental\n *\n * Thin Gate adapter — exposes delta-threshold-on-holdout as a composable\n * `Gate`. Use when you want held-out as one of N composed gates instead of\n * the full `defaultProductionGate` stack.\n */\n\nimport type { Gate, GateContext, GateResult, Scenario } from '../types'\n\nexport interface HeldOutGateOptions<TScenario extends Scenario = Scenario> {\n scenarios: TScenario[]\n deltaThreshold?: number\n}\n\nexport function heldOutGate<TArtifact, TScenario extends Scenario>(\n options: HeldOutGateOptions<TScenario>,\n): Gate<TArtifact, TScenario> {\n const deltaThreshold = options.deltaThreshold ?? 0.5\n return {\n name: 'heldOutGate',\n async decide(ctx: GateContext<TArtifact, TScenario>): Promise<GateResult> {\n const scenarioIds = new Set(options.scenarios.map((s) => s.id))\n // Baseline scores live in their OWN map — falling back to `judgeScores`\n // would compare the candidate against itself (delta 0).\n const baseline = meanForScenarios(ctx.baselineJudgeScores ?? ctx.judgeScores, scenarioIds)\n const candidate = meanForScenarios(ctx.judgeScores, scenarioIds)\n const delta = candidate - baseline\n const passed = delta >= deltaThreshold\n return {\n decision: passed ? 'ship' : 'hold',\n reasons: passed\n ? [`held-out delta ${delta.toFixed(3)} ≥ ${deltaThreshold}`]\n : [`held-out delta ${delta.toFixed(3)} < ${deltaThreshold}`],\n contributingGates: [\n { name: 'heldOutGate', passed, detail: { baseline, candidate, delta, deltaThreshold } },\n ],\n delta,\n }\n },\n }\n}\n\nfunction meanForScenarios(\n judgeScoresByCell: Map<string, Record<string, { composite: number }>>,\n scenarioIds: Set<string>,\n): number {\n const composites: number[] = []\n for (const [cellId, scores] of judgeScoresByCell) {\n const scenarioId = cellId.split(':')[0] ?? ''\n if (!scenarioIds.has(scenarioId)) continue\n const vals = Object.values(scores).map((s) => s.composite)\n if (vals.length > 0) composites.push(vals.reduce((a, b) => a + b, 0) / vals.length)\n }\n return composites.length === 0 ? 0 : composites.reduce((a, b) => a + b, 0) / composites.length\n}\n","/**\n * @experimental\n *\n * Pass A substrate types — `runCampaign` is the one primitive every\n * eval flow composes from. Three contracts in this file:\n *\n * - `Scenario` input set\n * - `DispatchFn` how to run one scenario → artifact\n * - `CampaignResult` defined output schema (the contract downstream tools depend on)\n *\n * Three more lifted from earlier substrate work (re-exported):\n *\n * - `JudgeConfig` pluggable dimensional scorer (0.38)\n * - `Mutator` optimization-loop surface mutator\n * - `Gate` promotion gate (`HeldOutGate` and friends adapt to this)\n *\n * No new architecture vs 0.38 — Pass A formalizes the shapes so consumers\n * can build dashboards / CI gates / regression diffs against a stable schema.\n */\n\nimport type { RunTokenUsage } from '../run-record'\n\n/** @experimental Stable identifier + kind tag for any scenario. Consumers\n * extend with their per-domain payload (persona, task, requirement, ...). */\nexport interface Scenario {\n id: string\n kind: string\n tags?: string[]\n}\n\n/** @experimental Context handed to every dispatch invocation. Scoped — every\n * trace/span carries the cellId, every artifact write lands under the cell's\n * artifact root, the cost meter accumulates per cell. */\nexport interface DispatchContext {\n cellId: string\n rep: number\n generation?: number\n seed: number\n signal: AbortSignal\n trace: CampaignTraceWriter\n artifacts: CampaignArtifactWriter\n cost: CampaignCostMeter\n /** Populated when this run is part of a multi-cycle improvement loop. */\n cycleId?: string\n /** Populated when the substrate resumed from a prior cache hit. */\n resumedFrom?: string\n /**\n * Opaque placement key supplied by `RunCampaignOptions.cellPlacement`.\n * The substrate forwards it through unchanged; placement-aware Dispatch\n * implementations (e.g. `httpDispatch` from `/adapters/http`) read it to\n * route the cell to the right worker / region / sandbox. `undefined`\n * when no placement strategy is configured.\n */\n placement?: string\n}\n\n/** @experimental One function: scenario + ctx → artifact. Dispatcher chooses\n * whether to call `runMultishot`, `runLoop`, raw `streamPrompt`, anything. */\nexport type DispatchFn<TScenario extends Scenario, TArtifact> = (\n scenario: TScenario,\n ctx: DispatchContext,\n) => Promise<TArtifact>\n\n// ── Sessions ──────────────────────────────────────────────────────────\n\n/** @experimental One session within a multi-session journey. Dispatch is\n * invoked once per session in order; state from prior session's artifact\n * is exposed via `ctx.priorSessionArtifact`. */\nexport interface SessionScript<TScenario, TArtifact> {\n id: string\n intent: string\n maxTurns?: number\n /** When true, knowledge accumulated this session persists to next. */\n affectsKnowledge?: boolean\n /** Optional per-session persona evolution — called after the session\n * resolves. Returns the persona shape used by the NEXT session. */\n evolveAfterSession?: (artifact: TArtifact, sessionIndex: number, scenario: TScenario) => TScenario\n}\n\n// ── Judges (re-export 0.38 shape) ─────────────────────────────────────\n\nexport interface JudgeDimension {\n /** JSON field name + score key. */\n key: string\n /** Description shown in the judge's user prompt. */\n description: string\n}\n\n/** @experimental Pluggable dimensional scorer. `score` is the contract:\n * given an artifact + scenario, return a `JudgeScore`. This is deliberately a\n * function, not a fixed LLM-prompt shape — real consumers judge with\n * ensembles, deterministic checks, or a single LLM call, and the substrate\n * must not constrain that. The `llmJudge()` helper builds a `score` that does\n * one LLM call for the common case. `appliesTo` lets a judge run only on\n * scenarios that match (e.g. a legal-citation judge only on legal scenarios). */\nexport interface JudgeConfig<TArtifact, TScenario extends Scenario = Scenario> {\n name: string\n dimensions: JudgeDimension[]\n /** Score one artifact. Throw on failure — a thrown judge is recorded as a\n * failed cell, never silently folded into a zero. */\n score(input: {\n artifact: TArtifact\n scenario: TScenario\n signal: AbortSignal\n }): JudgeScore | Promise<JudgeScore>\n appliesTo?: (scenario: TScenario) => boolean\n}\n\nexport interface JudgeScore {\n dimensions: Record<string, number>\n composite: number\n notes: string\n}\n\n// ── Optimization (population + generations + mutator) ─────────────────\n\n/** @experimental A tier-4 code surface — a candidate change to the agent's\n * IMPLEMENTATION, not its prompt. Produced by autoresearch (reads codebase +\n * trace findings → opens a worktree). Measured by checking out `worktreeRef`\n * and running the worker against the changed code. See the improvement-tier\n * table in `docs/design/loop-taxonomy.md`. */\nexport interface CodeSurface {\n kind: 'code'\n /** Worktree path or git ref holding the candidate code change. The\n * consumer's `dispatchWithSurface` checks this out before running. */\n worktreeRef: string\n /** Base ref the change is measured against. Default: the repo's main. */\n baseRef?: string\n /** Human summary of what changed — rendered into the auto-PR body. */\n summary?: string\n}\n\n/** @experimental The mutable surface a driver proposes. Tiers (see\n * `docs/design/loop-taxonomy.md`):\n * - `string` — tiers 1-2: system-prompt addendum / serialized tool\n * config. Cheap, reversible, text-diffable.\n * - `CodeSurface` — tier 4: an implementation change behind a worktree ref.\n * Tier 3 (knowledge) is owned by agent-knowledge and rides its own adapter,\n * not this type. */\nexport type MutableSurface = string | CodeSurface\n\n/** @experimental A driver proposal carrying the surface AND the WHY behind\n * it. Reflective drivers (`gepaDriver`) parse a `{label, rationale, payload}`\n * from the model; without this wrapper the loop keeps only `payload` and the\n * rationale that motivated the change is lost — the candidate becomes\n * unattributable. `propose()` may return either bare `MutableSurface`s (cheap\n * blind mutators) or these (reflective drivers); the loop normalizes both. */\nexport interface ProposedCandidate {\n surface: MutableSurface\n /** Short human label for the change (≤ 40 chars typical). */\n label: string\n /** Why this change was proposed — which failure it targets, which\n * primitive it used. Survives to `GenerationCandidate.rationale` and the\n * emitted provenance record. */\n rationale: string\n}\n\n/** @experimental Type guard: a proposal carrying its rationale vs a bare\n * surface. The loop branches on this to populate `GenerationCandidate`. */\nexport function isProposedCandidate(\n value: MutableSurface | ProposedCandidate,\n): value is ProposedCandidate {\n return (\n typeof value === 'object' &&\n value !== null &&\n 'surface' in value &&\n 'label' in value &&\n 'rationale' in value\n )\n}\n\n/** @experimental A non-dominated parent on the GEPA Pareto frontier — a\n * surface that, across the per-scenario objective vectors, no other tried\n * surface beats on every scenario. A candidate worse on the mean composite\n * but uniquely best on one hard scenario is non-dominated and survives here;\n * the composite-best ranking would discard the lesson it carries. The loop\n * computes the frontier across ALL generations and hands it to the driver so\n * a reflective driver can combine complementary lessons (GEPA, Agrawal et\n * al., arXiv:2507.19457). See `pareto.ts` (`paretoFrontier`). */\nexport interface ParetoParent {\n surface: MutableSurface\n surfaceHash: string\n /** The objective vector: per-scenario composite (higher is better). The\n * axes the frontier is computed over. */\n objectives: Record<string, number>\n /** Mean composite across the objective scenarios — the scalar summary used\n * for ordering + display, NOT for dominance. */\n composite: number\n /** Generation that produced this surface (`-1` for the baseline). */\n generation: number\n label?: string\n rationale?: string\n}\n\n/** @experimental Stateless surface mutation — given findings + current\n * surface, return N candidate surfaces. Pure transform, no generation\n * awareness. Reflective-mutation, `runMultiShotOptimization`, `AxGEPA`\n * conform. Wrapped by `evolutionaryDriver` to become an `ImprovementDriver`. */\nexport interface Mutator<TFindings = unknown> {\n kind: string\n mutate(args: {\n findings: TFindings[]\n currentSurface: MutableSurface\n populationSize: number\n signal: AbortSignal\n }): Promise<Array<MutableSurface | ProposedCandidate>>\n}\n\n/** @experimental Everything a driver's `propose()` may read to plan the next\n * batch of candidates. The first six fields are always present; the rest are\n * optional context the loop supplies when available, so cheap drivers\n * (`evolutionaryDriver`) can ignore them while a code-tier agentic generator\n * consumes the research report + dataset to drive a coding harness.\n * See `docs/design/self-improvement-engine.md`. */\nexport interface ProposeContext<TFindings = unknown> {\n currentSurface: MutableSurface\n history: GenerationRecord[]\n findings: TFindings[]\n /** BREADTH: how many candidate surfaces to return this generation. */\n populationSize: number\n generation: number\n signal: AbortSignal\n /** The Phase-2 research report (analyst findings + diff), produced AFTER the\n * trace analysts run. Opaque to the substrate — the driver that consumes it\n * types it. See the phase diagram in self-improvement-engine.md. */\n report?: unknown\n /** Handle to all captured data — the driver samples traces / artifacts /\n * rewards here to ground its proposals. */\n dataset?: LabeledScenarioStore\n /** DEPTH: max iterations the agentic generator may take per candidate.\n * 1 = single-shot; >1 = it may iterate on its own change before handing it\n * back to be measured. */\n maxImprovementShots?: number\n /** GEPA Pareto frontier across ALL generations so far — the non-dominated\n * surfaces by per-scenario objective vector. Empty/absent on generation 0\n * (only the baseline is scored). A reflective driver combines the\n * complementary lessons of these parents (each excels on different\n * scenarios) into a merged candidate. Drivers doing pure single-parent\n * reflection may ignore it. See {@link ParetoParent}. */\n paretoParents?: ParetoParent[]\n}\n\n/** @experimental A surface-improvement strategy — the DRIVER of the\n * improvement loop. Given the current best surface, the history of what's\n * been tried + scored, and any external findings, propose the next batch of\n * candidate surfaces to measure. Optionally decide to stop early.\n *\n * The evolutionary mutator (`evolutionaryDriver`, here) and agent-runtime's\n * `improvementDriver` (with reflective / agentic generators) both conform —\n * drivers of the SAME loop, not separate loops. The loop body\n * (`runOptimization`) and the gated promotion shell (`runImprovementLoop`)\n * are driver-agnostic. */\nexport interface ImprovementDriver<TFindings = unknown> {\n kind: string\n /** Plan: propose N candidate surfaces for the next generation. A driver\n * may return bare `MutableSurface`s or `ProposedCandidate`s that carry the\n * `{label, rationale}` motivating the change — the loop threads the\n * rationale into `GenerationCandidate` and the emitted provenance. */\n propose(ctx: ProposeContext<TFindings>): Promise<Array<MutableSurface | ProposedCandidate>>\n /** Decide: stop early when the driver judges the search converged or\n * exhausted. Default (omitted) runs all `maxGenerations`. */\n decide?(args: { history: GenerationRecord[] }): { stop: boolean; reason?: string }\n}\n\nexport interface OptimizerConfig {\n driver: ImprovementDriver\n populationSize: number\n maxGenerations: number\n surfaceExtractor: (profile: unknown) => MutableSurface\n}\n\n// ── Gates ─────────────────────────────────────────────────────────────\n\n/** @experimental Five-valued verdict taxonomy (MOSS-paper alignment). */\nexport type GateDecision = 'ship' | 'hold' | 'need_more_work' | 'model_ceiling' | 'arch_ceiling'\n\nexport interface GateContext<TArtifact, TScenario extends Scenario> {\n candidateArtifacts: Map<string, TArtifact>\n baselineArtifacts?: Map<string, TArtifact>\n /** Candidate (winner) judge scores, keyed by cellId. */\n judgeScores: Map<string, Record<string, JudgeScore>>\n /** Baseline judge scores, keyed by cellId. SEPARATE from `judgeScores` —\n * baseline + candidate share cellIds (same scenarios), so a single map\n * cannot represent both. A gate computing a holdout delta MUST read\n * candidate from `judgeScores` and baseline from here. */\n baselineJudgeScores?: Map<string, Record<string, JudgeScore>>\n scenarios: TScenario[]\n cost: { candidate: number; baseline: number }\n signal: AbortSignal\n}\n\nexport interface GateResult {\n decision: GateDecision\n reasons: string[]\n contributingGates: Array<{ name: string; passed: boolean; detail: unknown }>\n delta?: number\n}\n\n/** @experimental Composable promotion gate. */\nexport interface Gate<TArtifact = unknown, TScenario extends Scenario = Scenario> {\n name: string\n decide(ctx: GateContext<TArtifact, TScenario>): Promise<GateResult>\n}\n\n// ── Tracing / artifacts / cost ────────────────────────────────────────\n\n/** @experimental Scoped trace writer handed to each dispatch — every span\n * auto-tagged with the cellId so traces filter cleanly. */\nexport interface CampaignTraceWriter {\n span(name: string, attributes?: Record<string, unknown>): TraceSpan\n flush(): Promise<void>\n}\n\nexport interface TraceSpan {\n end(attributes?: Record<string, unknown>): void\n setAttribute(key: string, value: unknown): void\n}\n\n/** @experimental Scoped artifact writer — `write(path, content)` lands under\n * `<runDir>/<cellId>/<path>`. */\nexport interface CampaignArtifactWriter {\n write(path: string, content: string | Uint8Array): Promise<string>\n writeJson(path: string, value: unknown): Promise<string>\n}\n\n/** Token usage accumulated for a cell. Aliased to the canonical `RunTokenUsage`\n * (run-record.ts, same package) so a cell maps onto a `RunRecord` for the\n * backend-integrity guard with ONE source of truth — a field added to\n * `RunTokenUsage` is a compile error here, not a silent drift. */\nexport type CampaignTokenUsage = RunTokenUsage\n\n/** @experimental Cell-scoped cost meter. NOTHING is captured automatically —\n * the substrate does not intercept the LLM call, so it cannot see cost or\n * tokens unless the dispatch reports them. Every LLM cost MUST be reported via\n * `observe` and every token count via `observeTokens`; a dispatch that reports\n * neither yields a `{cost:0, tokens:0}` cell, which the backend-integrity\n * guard (`assertRealBackend`) correctly reads as a stub. Also use `observe`\n * for non-LLM spend (sandbox time, tool costs). */\nexport interface CampaignCostMeter {\n observe(amountUsd: number, source: string): void\n /** Record LLM token usage for this cell; accumulates across calls. A cell\n * has `costUsd` but no token counts unless the dispatch reports them here —\n * and the backend-integrity guard (`assertRealBackend`) keys on\n * `tokenUsage`, so a cell that never reports tokens reads as a stub. Any\n * dispatch that calls an LLM MUST report its usage. */\n observeTokens(usage: CampaignTokenUsage): void\n current(): number\n /** Accumulated token usage for this cell (zeros if never observed). */\n tokens(): CampaignTokenUsage\n}\n\n// ── LabeledScenarioStore ──────────────────────────────────────────────\n\n/** @experimental Source tag — required on every store write. Used by the\n * default training-source filter (production-trace samples NOT used as\n * training scenarios unless explicitly opted in). */\nexport type LabeledScenarioSource =\n | 'production-trace'\n | 'eval-run'\n | 'manual'\n | 'red-team'\n | 'synthetic'\n\nexport type RedactionStatus = 'raw' | 'redacted-pii' | 'redacted-secrets' | 'fully-redacted'\n\n/** How much a label can be trusted to evaluate against — the gold-admission\n * gate. Strictly ordered: a record qualifies for a `minTrust` filter when its\n * trust rank is >= the requested rank.\n *\n * - `unverified` — label is a heuristic (e.g. raw outcome success/fail).\n * Fine as corpus; MUST NOT enter a gold set that lift\n * numbers are computed against.\n * - `verified-signal` — an external signal confirmed the outcome (PR merged,\n * tests green, user did not retry, downstream check).\n * - `human-rated` — a human explicitly rated or corrected the artifact.\n *\n * Absent on a write ⇒ treated as `unverified` (fail-closed: a writer must\n * explicitly assert trust to make a record gold-eligible — it never happens\n * by accident). */\nexport type LabelTrust = 'unverified' | 'verified-signal' | 'human-rated'\n\nconst LABEL_TRUST_RANK: Record<LabelTrust, number> = {\n unverified: 0,\n 'verified-signal': 1,\n 'human-rated': 2,\n}\n\n/** Ordinal rank for a label-trust tier; absent ⇒ `unverified` (rank 0). */\nexport function labelTrustRank(trust: LabelTrust | undefined): number {\n return LABEL_TRUST_RANK[trust ?? 'unverified']\n}\n\n/** @experimental Required-provenance write. The store rejects writes that\n * lack provenance — a default-on flywheel without provenance is the\n * data-poisoning vector flagged in the alignment review. */\nexport interface LabeledScenarioWrite<TScenario extends Scenario = Scenario, TArtifact = unknown> {\n scenario: TScenario\n artifact: TArtifact\n judgeScores: Record<string, JudgeScore>\n source: LabeledScenarioSource\n sourceVersionHash: string\n capturedAt: string\n redactionStatus: RedactionStatus\n /** Gold-admission trust tier. Absent ⇒ `unverified` (fail-closed): the\n * record is corpus, never gold. A writer must explicitly assert\n * `verified-signal` or `human-rated` to make it eligible for a gold\n * sample. See {@link LabelTrust}. */\n labelTrust?: LabelTrust\n /** Optional per-source rate-limit bucket key (e.g., the tenant id). */\n rateLimitBucket?: string\n}\n\nexport interface LabeledScenarioRecord<TScenario extends Scenario = Scenario, TArtifact = unknown>\n extends LabeledScenarioWrite<TScenario, TArtifact> {\n /** Stable hash of (scenario.id, source, capturedAt, sourceVersionHash). */\n recordHash: string\n /** Substrate-assigned split — train if captured before the campaign's\n * `temporalCutoff`, test if after. Explicit override allowed via filter. */\n split: 'train' | 'test'\n}\n\nexport interface LabeledScenarioSampleArgs {\n count: number\n /** REQUIRED — substrate refuses to sample without an explicit split. */\n split: 'train' | 'test'\n /** REQUIRED — only records captured before this timestamp are returned.\n * Enforces temporal split discipline (test scenarios captured AFTER train\n * cannot enter the training pool). */\n capturedBefore: string\n filter?: {\n kind?: string\n source?: LabeledScenarioSource | LabeledScenarioSource[]\n minComposite?: number\n maxComposite?: number\n /** Gold gate: only records whose trust rank is >= this tier are\n * returned. `sample({ split: 'test', minTrust: 'verified-signal' })` is\n * the canonical \"give me the gold set\" call. Absent ⇒ no trust gate\n * (corpus-level read). */\n minTrust?: LabelTrust\n }\n}\n\nexport interface LabeledScenarioStore {\n observe(write: LabeledScenarioWrite): Promise<void>\n sample(args: LabeledScenarioSampleArgs): Promise<LabeledScenarioRecord[]>\n size(): Promise<{\n train: number\n test: number\n bySource: Record<string, number>\n /** Count by trust tier — tells the flywheel how much gold it has\n * accumulated vs. raw corpus. */\n byTrust: Record<LabelTrust, number>\n }>\n}\n\n// ── The CampaignResult schema (the downstream-tools contract) ─────────\n\nexport interface CampaignCellResult<TArtifact> {\n cellId: string\n scenarioId: string\n rep: number\n generation?: number\n artifact: TArtifact\n judgeScores: Record<string, JudgeScore>\n costUsd: number\n /** LLM token usage the dispatch reported via `ctx.cost.observeTokens`.\n * `{ input: 0, output: 0 }` when the dispatch reported none — which the\n * backend-integrity guard reads as a stub. */\n tokenUsage: CampaignTokenUsage\n durationMs: number\n seed: number\n cached: boolean\n error?: string\n}\n\nexport interface JudgeAggregate {\n mean: number\n stdev: number\n ci95: [number, number]\n n: number\n}\n\nexport interface ScenarioAggregate {\n meanComposite: number\n ci95: [number, number]\n n: number\n}\n\nexport interface GenerationRecord {\n generationIndex: number\n candidates: GenerationCandidate[]\n promoted: string[]\n}\n\n/** One scored candidate surface in a generation. `dimensions` + `scenarios`\n * let a reflective `ImprovementDriver` ground its next proposal on WHICH\n * dimensions the candidate is weakest on and WHICH scenarios it best/worst\n * handled — the evidence a blind `Mutator` cannot see. */\nexport interface GenerationCandidate {\n surfaceHash: string\n composite: number\n ci95: [number, number]\n /** Mean score per judge dimension across all cells (scenarios × reps ×\n * judges that reported the dimension). */\n dimensions: Record<string, number>\n /** Per-scenario composite (mean over reps + judges). */\n scenarios: Array<{ scenarioId: string; composite: number }>\n /** Driver-supplied short label for the change. Present when the driver\n * returned a `ProposedCandidate`; absent for bare-surface mutators. */\n label?: string\n /** Driver-supplied rationale — WHY this candidate was proposed. The\n * \"because rationale Z\" the audit requires to survive to the result.\n * Present when the driver returned a `ProposedCandidate`. */\n rationale?: string\n}\n\nexport interface CampaignAggregates {\n byJudge: Record<string, JudgeAggregate>\n byScenario: Record<string, ScenarioAggregate>\n totalCostUsd: number\n cellsExecuted: number\n cellsSkipped: number\n cellsCached: number\n cellsFailed: number\n}\n\nexport interface CampaignResult<TArtifact = unknown, TScenario extends Scenario = Scenario> {\n /** sha256(scenarios, judges, dispatch source ref, optimizer config, seed). Stable identity for reruns. */\n manifestHash: string\n seed: number\n startedAt: string\n endedAt: string\n durationMs: number\n cells: Array<CampaignCellResult<TArtifact>>\n aggregates: CampaignAggregates\n optimization?: {\n generations: GenerationRecord[]\n winnerSurfaceHash?: string\n }\n gate?: GateResult\n prUrl?: string\n runDir: string\n artifactsByPath: Record<string, string>\n /** Substrate strips the input scenarios to id+kind for the result manifest;\n * consumers needing full payload look it up via the original input. The\n * type parameter `TScenario` is propagated for downstream consumers that\n * want narrowed types when extending `CampaignResult`. */\n scenarios: Array<Pick<TScenario, 'id' | 'kind'>>\n}\n","/**\n * @experimental\n *\n * Shared campaign-score reductions used by every optimizer preset\n * (`runOptimization`, `runSkillOpt`, `compareDrivers`). ONE definition of\n * \"composite of a campaign\" and \"per-scenario / per-dimension breakdown\" so\n * the optimizers cannot drift on how a surface's score is computed.\n */\n\nimport type { CampaignResult, Scenario } from './types'\n\n/** Mean composite across a campaign: per cell, the mean of its judges'\n * composites; then the mean across cells. Cells with no judge scores are\n * skipped. Empty ⇒ 0. */\nexport function campaignMeanComposite<TArtifact, TScenario extends Scenario>(\n campaign: CampaignResult<TArtifact, TScenario>,\n): number {\n const composites: number[] = []\n for (const cell of campaign.cells) {\n const cellComposites = Object.values(cell.judgeScores).map((s) => s.composite)\n if (cellComposites.length > 0) {\n composites.push(cellComposites.reduce((a, b) => a + b, 0) / cellComposites.length)\n }\n }\n return composites.length === 0 ? 0 : composites.reduce((a, b) => a + b, 0) / composites.length\n}\n\nexport interface CampaignBreakdown {\n /** Mean score per judge dimension across all cells. */\n dimensions: Record<string, number>\n /** Per-scenario composite (mean over reps + judges). */\n scenarios: Array<{ scenarioId: string; composite: number }>\n}\n\n/** Per-candidate evidence a reflective/patch driver grounds its next proposal\n * on: mean score per judge dimension + per-scenario composite. */\nexport function campaignBreakdown<TArtifact, TScenario extends Scenario>(\n campaign: CampaignResult<TArtifact, TScenario>,\n): CampaignBreakdown {\n const dimSums: Record<string, number> = {}\n const dimCounts: Record<string, number> = {}\n const byScenario = new Map<string, number[]>()\n for (const cell of campaign.cells) {\n const judgeScores = Object.values(cell.judgeScores)\n if (judgeScores.length === 0) continue\n const cellComposite = judgeScores.reduce((a, s) => a + s.composite, 0) / judgeScores.length\n const arr = byScenario.get(cell.scenarioId) ?? []\n arr.push(cellComposite)\n byScenario.set(cell.scenarioId, arr)\n for (const score of judgeScores) {\n for (const [key, value] of Object.entries(score.dimensions)) {\n dimSums[key] = (dimSums[key] ?? 0) + value\n dimCounts[key] = (dimCounts[key] ?? 0) + 1\n }\n }\n }\n const dimensions: Record<string, number> = {}\n for (const key of Object.keys(dimSums)) {\n const count = dimCounts[key] ?? 0\n dimensions[key] = count > 0 ? (dimSums[key] ?? 0) / count : 0\n }\n const scenarios = [...byScenario.entries()].map(([scenarioId, comps]) => ({\n scenarioId,\n composite: comps.reduce((a, b) => a + b, 0) / comps.length,\n }))\n return { dimensions, scenarios }\n}\n","/**\n * @experimental\n *\n * `runOptimization` — the improvement loop body. Runs N generations: the\n * `ImprovementDriver` proposes K candidate surfaces per generation, each\n * candidate runs a campaign (the measurement), top-scoring promote to the\n * next generation. Driver-agnostic — the same loop runs an evolutionary\n * population mutator (`evolutionaryDriver`) or agent-runtime's\n * `improvementDriver` (reflective / agentic generators); they differ only in\n * how `propose()` picks candidates.\n *\n * This is `runLoop`'s shape (plan → measure → decide) specialized to surface\n * improvement: `driver.propose` = plan, `runCampaign` = the measurement (which\n * runs the worker behind `dispatch`), the mean-composite ranking = the\n * validator, `driver.decide` = the stop check.\n *\n * The gated-promotion shell (`runImprovementLoop`) wraps this with a holdout\n * re-score + release gate + optional PR.\n */\n\nimport { createHash } from 'node:crypto'\nimport { type Objective, paretoFrontier } from '../../pareto'\nimport { type RunCampaignOptions, runCampaign } from '../run-campaign'\nimport { campaignBreakdown, campaignMeanComposite } from '../score-utils'\nimport {\n type CampaignResult,\n type GenerationRecord,\n type ImprovementDriver,\n isProposedCandidate,\n type MutableSurface,\n type ParetoParent,\n type ProposedCandidate,\n type Scenario,\n} from '../types'\n\nexport interface RunOptimizationOptions<TScenario extends Scenario, TArtifact>\n extends Omit<RunCampaignOptions<TScenario, TArtifact>, 'dispatch'> {\n /** Initial mutable surface (typically system prompt or addendum). */\n baselineSurface: MutableSurface\n /** Dispatcher that takes the CURRENT surface + scenario → artifact. */\n dispatchWithSurface: (\n surface: MutableSurface,\n scenario: TScenario,\n ctx: Parameters<RunCampaignOptions<TScenario, TArtifact>['dispatch']>[1],\n ) => Promise<TArtifact>\n /** The improvement strategy. Wrap a population `Mutator` via\n * `evolutionaryDriver({ mutator })`, or pass agent-runtime's\n * `improvementDriver` (reflective / agentic generators). */\n driver: ImprovementDriver\n populationSize: number\n maxGenerations: number\n /** How many top-scoring candidates carry to the next generation. Default 2. */\n promoteTopK?: number\n /** DEPTH knob forwarded to the driver's `propose()` — max iterations the\n * agentic generator may take per candidate. */\n maxImprovementShots?: number\n /** Phase-2 research report forwarded to `propose()` (analyst findings +\n * diff). Opaque here; the driver types it. */\n report?: unknown\n}\n\nexport interface RunOptimizationResult<TArtifact, TScenario extends Scenario> {\n generations: Array<{\n record: GenerationRecord\n surfaces: Array<{\n surfaceHash: string\n surface: MutableSurface\n campaign: CampaignResult<TArtifact, TScenario>\n }>\n }>\n winnerSurface: MutableSurface\n winnerSurfaceHash: string\n /** Driver label for the promoted surface. Present when the winning\n * candidate came from a `ProposedCandidate` (a reflective driver);\n * absent when the winner is the baseline or a bare-surface mutator. */\n winnerLabel?: string\n /** Driver rationale for the promoted surface — the \"because Z\" that\n * motivated the winning change. Survives to `SelfImproveResult` and the\n * emitted provenance record. Absent when the winner is the baseline. */\n winnerRationale?: string\n baselineCampaign: CampaignResult<TArtifact, TScenario>\n /** The GEPA Pareto frontier across every scored surface (baseline + all\n * generations) by per-scenario objective vector — the non-dominated set.\n * Each generation's `propose()` received the frontier-so-far as\n * `ctx.paretoParents`; this is the final frontier. A surface here that is\n * NOT the winner is uniquely best on some scenario the winner loses on. */\n paretoFrontier: ParetoParent[]\n}\n\nexport async function runOptimization<TScenario extends Scenario, TArtifact>(\n opts: RunOptimizationOptions<TScenario, TArtifact>,\n): Promise<RunOptimizationResult<TArtifact, TScenario>> {\n const promoteTopK = opts.promoteTopK ?? 2\n\n // Baseline run\n const baselineCampaign = await runCampaign<TScenario, TArtifact>({\n ...opts,\n dispatch: (scenario, ctx) => opts.dispatchWithSurface(opts.baselineSurface, scenario, ctx),\n runDir: `${opts.runDir}/baseline`,\n })\n\n const generations: RunOptimizationResult<TArtifact, TScenario>['generations'] = []\n const history: GenerationRecord[] = []\n let currentSurfaces: MutableSurface[] = [opts.baselineSurface]\n let winnerSurface = opts.baselineSurface\n let winnerSurfaceHash = surfaceHash(opts.baselineSurface)\n let winnerComposite = campaignMeanComposite(baselineCampaign)\n let winnerLabel: string | undefined\n let winnerRationale: string | undefined\n\n // GEPA frontier accumulator — every scored surface as an objective vector\n // (per-scenario composite). The baseline seeds it as generation -1; each\n // candidate is added after its campaign. The non-dominated set of this list\n // is recomputed before every `propose()` and handed to the driver.\n const scored: ParetoParent[] = [\n toParetoParent(opts.baselineSurface, winnerSurfaceHash, baselineCampaign, -1),\n ]\n\n for (let gen = 0; gen < opts.maxGenerations; gen++) {\n // Decide: the driver may stop early based on accumulated history.\n if (opts.driver.decide?.({ history }).stop) break\n\n // Plan: the driver proposes N candidates from the current best surface,\n // the accumulated generation history, the Pareto frontier so far, and any\n // external findings.\n const paretoParents = computeParetoFrontier(scored)\n const proposed = await opts.driver.propose({\n currentSurface: currentSurfaces[0] ?? opts.baselineSurface,\n history,\n findings: [],\n populationSize: opts.populationSize,\n generation: gen,\n signal: new AbortController().signal,\n report: opts.report,\n dataset: opts.labeledStore && opts.labeledStore !== 'off' ? opts.labeledStore : undefined,\n maxImprovementShots: opts.maxImprovementShots,\n paretoParents,\n })\n\n // Normalize: a driver may return bare surfaces (blind mutators) or\n // `ProposedCandidate`s carrying {label, rationale}. Keep the rationale so\n // each candidate stays attributable through to the result + provenance.\n const candidates: ProposedCandidate[] = proposed.map((p) =>\n isProposedCandidate(p) ? p : { surface: p, label: '', rationale: '' },\n )\n\n // Run each candidate as its own campaign.\n const surfaceResults: Array<{\n surfaceHash: string\n surface: MutableSurface\n label: string\n rationale: string\n campaign: CampaignResult<TArtifact, TScenario>\n composite: number\n }> = []\n for (let i = 0; i < candidates.length; i++) {\n const { surface, label, rationale } = candidates[i]!\n const hash = surfaceHash(surface)\n const campaign = await runCampaign<TScenario, TArtifact>({\n ...opts,\n dispatch: (scenario, ctx) => opts.dispatchWithSurface(surface, scenario, ctx),\n runDir: `${opts.runDir}/gen-${gen}/candidate-${i}`,\n })\n const composite = campaignMeanComposite(campaign)\n surfaceResults.push({ surfaceHash: hash, surface, label, rationale, campaign, composite })\n // Add to the GEPA frontier accumulator — the NEXT generation's\n // `propose()` sees this candidate's per-scenario objective vector.\n scored.push(\n toParetoParent(surface, hash, campaign, gen, label || undefined, rationale || undefined),\n )\n }\n\n // Rank, promote top-K.\n surfaceResults.sort((a, b) => b.composite - a.composite)\n const promoted = surfaceResults.slice(0, promoteTopK)\n currentSurfaces = promoted.map((p) => p.surface)\n const top = surfaceResults[0]\n if (top && top.composite > winnerComposite) {\n winnerSurface = top.surface\n winnerSurfaceHash = top.surfaceHash\n winnerComposite = top.composite\n winnerLabel = top.label || undefined\n winnerRationale = top.rationale || undefined\n }\n\n const record: GenerationRecord = {\n generationIndex: gen,\n candidates: surfaceResults.map((s) => {\n const breakdown = campaignBreakdown(s.campaign)\n const candidate: GenerationRecord['candidates'][number] = {\n surfaceHash: s.surfaceHash,\n composite: s.composite,\n ci95: [s.composite, s.composite] as [number, number],\n dimensions: breakdown.dimensions,\n scenarios: breakdown.scenarios,\n }\n if (s.label) candidate.label = s.label\n if (s.rationale) candidate.rationale = s.rationale\n return candidate\n }),\n promoted: promoted.map((p) => p.surfaceHash),\n }\n history.push(record)\n generations.push({\n record,\n surfaces: surfaceResults.map((s) => ({\n surfaceHash: s.surfaceHash,\n surface: s.surface,\n campaign: s.campaign,\n })),\n })\n }\n\n return {\n generations,\n winnerSurface,\n winnerSurfaceHash,\n winnerLabel,\n winnerRationale,\n baselineCampaign,\n paretoFrontier: computeParetoFrontier(scored),\n }\n}\n\n/** Build a `ParetoParent` from a scored campaign — objective vector =\n * per-scenario composite, scalar = mean composite. */\nfunction toParetoParent<TArtifact, TScenario extends Scenario>(\n surface: MutableSurface,\n hash: string,\n campaign: CampaignResult<TArtifact, TScenario>,\n generation: number,\n label?: string,\n rationale?: string,\n): ParetoParent {\n const objectives: Record<string, number> = {}\n for (const { scenarioId, composite } of campaignBreakdown(campaign).scenarios) {\n objectives[scenarioId] = composite\n }\n const parent: ParetoParent = {\n surface,\n surfaceHash: hash,\n objectives,\n composite: campaignMeanComposite(campaign),\n generation,\n }\n if (label) parent.label = label\n if (rationale) parent.rationale = rationale\n return parent\n}\n\n/** The non-dominated set over the per-scenario objective vectors. Every\n * scenario seen across the scored set becomes a `maximize` objective; a\n * surface missing a scenario (a failed cell) is ranked worst on that axis via\n * a FINITE floor (the lowest real score seen there) — never a non-finite\n * value, because the canonical `paretoFrontier` excludes any candidate with a\n * non-finite objective, which would silently drop the whole frontier if one\n * scenario errored across every candidate. Delegates dominance to the\n * package-canonical `paretoFrontier` — ONE implementation of the relation. */\nfunction computeParetoFrontier(scored: ParetoParent[]): ParetoParent[] {\n if (scored.length <= 1) return [...scored]\n const ids = new Set<string>()\n for (const p of scored) for (const id of Object.keys(p.objectives)) ids.add(id)\n if (ids.size === 0) return [...scored]\n const floor: Record<string, number> = {}\n for (const id of ids) {\n let min = Number.POSITIVE_INFINITY\n for (const p of scored) {\n const v = p.objectives[id]\n if (typeof v === 'number' && Number.isFinite(v) && v < min) min = v\n }\n floor[id] = Number.isFinite(min) ? min : 0\n }\n const objectives: Objective<ParetoParent>[] = [...ids].map((id) => ({\n name: id,\n direction: 'maximize',\n value: (p) => {\n const v = p.objectives[id]\n return typeof v === 'number' && Number.isFinite(v) ? v : (floor[id] ?? 0)\n },\n }))\n return paretoFrontier(scored, objectives).frontier\n}\n\nexport function surfaceHash(surface: MutableSurface): string {\n // Prompt/tool surfaces (string) hash by content; code surfaces hash by the\n // worktree + base ref pair (the content lives in git, not in the string).\n const material =\n typeof surface === 'string'\n ? surface\n : JSON.stringify({\n kind: surface.kind,\n worktreeRef: surface.worktreeRef,\n baseRef: surface.baseRef ?? null,\n })\n return createHash('sha256').update(material).digest('hex').slice(0, 16)\n}\n","/**\n * @experimental\n *\n * `runImprovementLoop` — the gated-promotion shell around the improvement\n * loop body (`runOptimization`). Drives candidate surfaces via the\n * `ImprovementDriver`, re-scores the winner against the baseline on a\n * holdout set, runs the release gate, and optionally opens a PR.\n *\n * Role vocabulary (see docs/design/loop-taxonomy.md):\n * - DRIVER = the `ImprovementDriver` (evolutionary GEPA mutator OR\n * reflective analyst). Proposes candidate SURFACES — the\n * worker's system prompt / tool config — NOT conversation\n * turns.\n * - MEASUREMENT= `runCampaign`. Scores one surface by running the worker\n * (via `dispatch`) over scenarios and judging the output.\n * - WORKER = the agent harness in the sandbox, invoked behind the\n * topology-opaque `dispatch` seam — never referenced here.\n *\n * Distinct from `runLoop` in `@tangle-network/agent-runtime`, which is the\n * INNER conversation loop (driver↔workers in a sandbox). `runImprovementLoop`\n * is the OUTER loop: it improves the surface that those workers run.\n *\n * Hard-refuses unsafe configurations:\n * - `tracing: 'off'` when a driver is wired (improvement is unattributable)\n * - `autoOnPromote: 'config'` — DEFERRED to Pass B; v0.40 only ships\n * `'pr'` and `'none'`.\n */\n\nimport { openAutoPr } from '../auto-pr'\nimport type { CampaignResult, Gate, MutableSurface, Scenario } from '../types'\nimport type { RunOptimizationOptions, RunOptimizationResult } from './run-optimization'\nimport { runOptimization, surfaceHash } from './run-optimization'\n\nexport interface RunImprovementLoopOptions<TScenario extends Scenario, TArtifact>\n extends RunOptimizationOptions<TScenario, TArtifact> {\n /** Holdout scenarios kept OUT of the training optimization pool — used\n * ONLY to score baseline vs winner for the gate. */\n holdoutScenarios: TScenario[]\n /** Promotion gate. Substrate strongly recommends `defaultProductionGate`\n * for production wiring (composes red-team / reward-hacking / canary /\n * heldout). */\n gate: Gate<TArtifact, TScenario>\n /** What to do when the gate ships:\n * - `'pr'`: open a PR via `openAutoPr`\n * - `'none'`: just report — caller decides what to do with the winner\n * v0.40 does NOT support `'config'` (live-runtime self-mutation) —\n * deferred to Pass B behind safety stack. */\n autoOnPromote: 'pr' | 'none'\n /** GH owner / repo for the auto-PR. Required when autoOnPromote === 'pr'. */\n ghOwner?: string\n ghRepo?: string\n /** Optional render override — substrate writes a diff-shaped surface; pass\n * a function to format the promoted surface differently. */\n renderPromotedDiff?: (winnerSurface: MutableSurface, baselineSurface: MutableSurface) => string\n}\n\nexport interface RunImprovementLoopResult<TArtifact, TScenario extends Scenario>\n extends RunOptimizationResult<TArtifact, TScenario> {\n baselineOnHoldout: CampaignResult<TArtifact, TScenario>\n winnerOnHoldout: CampaignResult<TArtifact, TScenario>\n gateResult: Awaited<ReturnType<Gate<TArtifact, TScenario>['decide']>>\n /** Unified baseline→winner surface diff. Computed UNCONDITIONALLY (not only\n * when `autoOnPromote === 'pr'`) so the diff that the gate decided on is\n * always present on the result + in the emitted provenance record. Empty\n * string when winner == baseline (no change to diff). */\n promotedDiff: string\n prResult?: ReturnType<typeof openAutoPr>\n}\n\nexport async function runImprovementLoop<TScenario extends Scenario, TArtifact>(\n opts: RunImprovementLoopOptions<TScenario, TArtifact>,\n): Promise<RunImprovementLoopResult<TArtifact, TScenario>> {\n // ── Safety pre-flight ─────────────────────────────────────────────\n // biome-ignore lint/suspicious/noExplicitAny: Pass A reserved field for Pass B Shape B\n if ((opts as any).autoOnPromote === 'config') {\n throw new Error(\n \"runImprovementLoop: autoOnPromote='config' is deferred to Pass B (requires shadow deploy + rollback + ensemble judges). Use 'pr' or 'none' in v0.40.\",\n )\n }\n // Refuse tracing=off whenever a driver is wired. An improvement loop\n // without traces is unattributable — its candidate surfaces cannot be\n // cited back to the spans that motivated them, and the dataset flywheel\n // (LabeledScenarioStore) that GEPA optimizes against goes unfed.\n if (opts.tracing === 'off' && opts.driver) {\n throw new Error(\n \"runImprovementLoop: tracing='off' is forbidden when a driver is wired. The improvement loop without traces is unattributable; candidate surfaces cannot be cited back to spans and the optimization dataset goes unfed.\",\n )\n }\n if (opts.autoOnPromote === 'pr' && (!opts.ghOwner || !opts.ghRepo)) {\n throw new Error(\"runImprovementLoop: autoOnPromote='pr' requires ghOwner + ghRepo.\")\n }\n\n // ── (1) optimization loop produces a winner ────────────────────────\n const optimization = await runOptimization(opts)\n\n // ── (2) baseline + winner re-scored on the holdout set ─────────────\n const { runCampaign } = await import('../run-campaign')\n\n const baselineOnHoldout = await runCampaign<TScenario, TArtifact>({\n ...opts,\n scenarios: opts.holdoutScenarios,\n dispatch: (scenario, ctx) => opts.dispatchWithSurface(opts.baselineSurface, scenario, ctx),\n runDir: `${opts.runDir}/holdout-baseline`,\n })\n\n const winnerOnHoldout = await runCampaign<TScenario, TArtifact>({\n ...opts,\n scenarios: opts.holdoutScenarios,\n dispatch: (scenario, ctx) =>\n opts.dispatchWithSurface(optimization.winnerSurface, scenario, ctx),\n runDir: `${opts.runDir}/holdout-winner`,\n })\n\n // ── (3) gate verdict ───────────────────────────────────────────────\n // Candidate + baseline share cellIds (same holdout scenarios), so their\n // judge scores MUST stay in separate maps — merging them collapses the\n // holdout delta to zero and the gate can never ship a real improvement.\n type ScoreMap = Map<\n string,\n Record<string, { composite: number; dimensions: Record<string, number>; notes: string }>\n >\n const candidateArtifacts = new Map<string, TArtifact>()\n const baselineArtifacts = new Map<string, TArtifact>()\n const judgeScores: ScoreMap = new Map()\n const baselineJudgeScores: ScoreMap = new Map()\n for (const cell of winnerOnHoldout.cells) {\n candidateArtifacts.set(cell.cellId, cell.artifact)\n judgeScores.set(cell.cellId, cell.judgeScores)\n }\n for (const cell of baselineOnHoldout.cells) {\n baselineArtifacts.set(cell.cellId, cell.artifact)\n baselineJudgeScores.set(cell.cellId, cell.judgeScores)\n }\n\n const gateResult = await opts.gate.decide({\n candidateArtifacts,\n baselineArtifacts,\n judgeScores,\n baselineJudgeScores,\n scenarios: opts.holdoutScenarios,\n cost: {\n candidate: winnerOnHoldout.aggregates.totalCostUsd,\n baseline: baselineOnHoldout.aggregates.totalCostUsd,\n },\n signal: new AbortController().signal,\n })\n\n // ── (4) baseline→winner diff (always) + auto-PR when gate ships ────\n // The diff is computed UNCONDITIONALLY — it's the human-auditable record of\n // what the loop actually changed, needed for the provenance artifact whether\n // or not a PR is opened. winner == baseline ⇒ empty diff (nothing changed).\n const render = opts.renderPromotedDiff ?? defaultRenderDiff\n const promotedDiff =\n optimization.winnerSurfaceHash === surfaceHash(opts.baselineSurface)\n ? ''\n : render(optimization.winnerSurface, opts.baselineSurface)\n\n let prResult: ReturnType<typeof openAutoPr> | undefined\n if (opts.autoOnPromote === 'pr' && gateResult.decision === 'ship') {\n prResult = openAutoPr({\n result: winnerOnHoldout,\n gate: gateResult,\n promotedDiff,\n ghOwner: opts.ghOwner!,\n ghRepo: opts.ghRepo!,\n })\n }\n\n return {\n ...optimization,\n baselineOnHoldout,\n winnerOnHoldout,\n gateResult,\n promotedDiff,\n prResult,\n }\n}\n\nexport function defaultRenderDiff(\n winnerSurface: MutableSurface,\n baselineSurface: MutableSurface,\n): string {\n // Code surfaces aren't text-diffable here — the diff lives in git. Render\n // the worktree/base refs + summary so the PR body points at the change.\n if (typeof winnerSurface !== 'string' || typeof baselineSurface !== 'string') {\n const fmt = (s: MutableSurface): string =>\n typeof s === 'string'\n ? '(prompt surface)'\n : `worktree=${s.worktreeRef}${s.baseRef ? ` base=${s.baseRef}` : ''}${s.summary ? `\\n${s.summary}` : ''}`\n return `--- baseline\\n${fmt(baselineSurface)}\\n+++ winner\\n${fmt(winnerSurface)}`\n }\n const lines: string[] = []\n lines.push('--- baseline')\n lines.push('+++ winner')\n for (const l of baselineSurface.split('\\n')) lines.push(`- ${l}`)\n for (const l of winnerSurface.split('\\n')) lines.push(`+ ${l}`)\n return lines.join('\\n')\n}\n","/**\n * @experimental\n *\n * `runEval` — the simplest preset over `runCampaign`. No optimizer, no\n * gate, no auto-PR. Just: run scenarios through dispatch, score with\n * judges, return CampaignResult.\n *\n * The 80% case for consumers who want a scorecard, not an improvement loop.\n */\n\nimport { type RunCampaignOptions, runCampaign } from '../run-campaign'\nimport type { CampaignResult, Scenario } from '../types'\n\nexport interface RunEvalOptions<TScenario extends Scenario, TArtifact>\n extends Omit<RunCampaignOptions<TScenario, TArtifact>, 'runDir'> {\n runDir: string\n}\n\nexport async function runEval<TScenario extends Scenario, TArtifact>(\n opts: RunEvalOptions<TScenario, TArtifact>,\n): Promise<CampaignResult<TArtifact, TScenario>> {\n return runCampaign(opts)\n}\n","/**\n * @experimental\n *\n * Loop provenance — the durable, queryable record of WHAT a self-improvement\n * loop did and WHY, plus the OTel spans that let an OTLP collector pivot from\n * an eval-run to the underlying candidate→cell→gate→promote chain.\n *\n * Two artifacts, one source of truth:\n *\n * 1. `LoopProvenanceRecord` — a structured JSON record capturing every\n * candidate (surfaceHash + label + rationale), its measured composite,\n * the gate decision + reasons + delta, the held-out lift, the explicit\n * baseline→candidate diff, and BACKEND PROVENANCE (the\n * `assertRealBackend` verdict + worker call count + model). This is the\n * ingestable audit artifact: the +lift recomputes from it, the \"because\n * Z\" rationale survives in it, and a stub backend is detectable from it.\n *\n * 2. `loopProvenanceSpans()` — the same chain emitted as OTLP-ingestable\n * `TraceSpanEvent`s, pivoted on the substrate's standard\n * `tangle.runId` / `tangle.scenarioId` / `tangle.cellId` /\n * `tangle.generation` attributes (the same pivots `/adapters/otel`\n * reads). The hosted `/v1/ingest/traces` endpoint receives the FULL loop,\n * not just the `cost.*` spans `runCampaign` already emits per cell.\n *\n * The record is built from the substrate's own loop result + the per-call\n * `RunRecord`s the worker emitted — no new measurement, no recomputation that\n * could drift from what the gate actually saw.\n */\n\nimport { createHash } from 'node:crypto'\nimport { join } from 'node:path'\nimport type { HostedClient } from '../hosted/client'\nimport type { TraceSpanEvent } from '../hosted/types'\nimport { summarizeBackendIntegrity } from '../integrity/backend-integrity'\nimport type { RunRecord } from '../run-record'\nimport type { CampaignStorage } from './storage'\nimport type { CampaignResult, GateDecision, GateResult, MutableSurface, Scenario } from './types'\n\n/** Stable sha256 (full hex) of a surface's effective text. Code surfaces hash\n * their worktree+base identity since the content lives in git. Distinct from\n * `surfaceHash` (16-char content fingerprint used as a loop identity key);\n * this is the byte-identical-verifiable content hash the provenance record +\n * `RunRecord.promptHash` carry. */\nexport function surfaceContentHash(surface: MutableSurface): string {\n const material =\n typeof surface === 'string'\n ? surface\n : JSON.stringify({\n kind: surface.kind,\n worktreeRef: surface.worktreeRef,\n baseRef: surface.baseRef ?? null,\n })\n return `sha256:${createHash('sha256').update(material).digest('hex')}`\n}\n\nexport interface LoopProvenanceCandidate {\n /** Generation index this candidate was proposed in. */\n generation: number\n /** 16-char loop-identity fingerprint (matches `GenerationCandidate.surfaceHash`). */\n surfaceHash: string\n /** Full sha256 content hash — byte-identical-verifiable. */\n contentHash: string\n /** Driver label, when the driver returned a `ProposedCandidate`. */\n label?: string\n /** Driver rationale — the \"because Z\". When the driver returned a bare\n * surface (blind mutator) this is absent. */\n rationale?: string\n /** Mean composite this candidate scored on the search split. */\n composite: number\n /** Whether this candidate was promoted out of its generation. */\n promoted: boolean\n}\n\nexport interface LoopProvenanceBackend {\n /** `assertRealBackend`-grade verdict over the worker call records. */\n verdict: 'real' | 'mixed' | 'stub'\n /** Number of worker LLM calls captured (the audit's \"worker call count\"). */\n workerCallCount: number\n /** Distinct model ids observed across worker calls. */\n models: string[]\n totalInputTokens: number\n totalOutputTokens: number\n totalCostUsd: number\n}\n\n/**\n * The durable provenance record. Aligns to the hosted `EvalRunEvent` path but\n * ADDS the rationale + the explicit baseline→candidate diff (both omitted from\n * the bare hosted event) + backend provenance.\n */\nexport interface LoopProvenanceRecord {\n schema: 'tangle.loop-provenance.v1'\n runId: string\n runDir: string\n timestamp: string\n /** Baseline + winner surface content hashes — distinguishable, byte-verifiable. */\n baselineContentHash: string\n winnerContentHash: string\n /** Driver label/rationale for the promoted change. Absent ⇒ winner == baseline. */\n winnerLabel?: string\n winnerRationale?: string\n /** The explicit baseline→winner unified diff the gate decided on. */\n diff: string\n /** Every candidate across every generation, each carrying its rationale. */\n candidates: LoopProvenanceCandidate[]\n /** The gate verdict — decision + reasons + contributing gates + delta. */\n gate: {\n decision: GateDecision\n reasons: string[]\n delta?: number\n contributingGates: Array<{ name: string; passed: boolean }>\n }\n /** baseline-on-holdout composite mean. */\n baselineHoldoutComposite: number\n /** winner-on-holdout composite mean. */\n winnerHoldoutComposite: number\n /** winnerHoldout - baselineHoldout — RECOMPUTABLE from this record. */\n heldOutLift: number\n /** Backend provenance: stub-vs-real verdict + worker call count + models. */\n backend: LoopProvenanceBackend\n totalCostUsd: number\n totalDurationMs: number\n}\n\nexport interface BuildLoopProvenanceArgs<TArtifact, TScenario extends Scenario> {\n runId: string\n runDir: string\n timestamp: string\n baselineSurface: MutableSurface\n winnerSurface: MutableSurface\n winnerLabel?: string\n winnerRationale?: string\n diff: string\n /** Per-generation candidate records straight off the loop result. */\n generations: Array<{\n generationIndex: number\n candidates: Array<{\n surfaceHash: string\n composite: number\n label?: string\n rationale?: string\n }>\n promoted: string[]\n /** Surfaces measured this generation, keyed positionally to candidates so\n * the content hash can be computed from the real surface text. */\n surfaces: Array<{ surfaceHash: string; surface: MutableSurface }>\n }>\n gate: GateResult\n baselineOnHoldout: CampaignResult<TArtifact, TScenario>\n winnerOnHoldout: CampaignResult<TArtifact, TScenario>\n /** Worker call records — the source for backend provenance. */\n workerRecords: ReadonlyArray<RunRecord>\n totalCostUsd: number\n totalDurationMs: number\n}\n\nfunction meanHoldoutComposite<TArtifact, TScenario extends Scenario>(\n campaign: CampaignResult<TArtifact, TScenario>,\n): number {\n const xs: number[] = []\n for (const cell of campaign.cells) {\n if (cell.error) continue\n const cs = Object.values(cell.judgeScores).map((s) => s.composite)\n if (cs.length) xs.push(cs.reduce((a, b) => a + b, 0) / cs.length)\n }\n return xs.length ? xs.reduce((a, b) => a + b, 0) / xs.length : 0\n}\n\n/** Build the durable provenance record from a completed loop result. */\nexport function buildLoopProvenanceRecord<TArtifact, TScenario extends Scenario>(\n args: BuildLoopProvenanceArgs<TArtifact, TScenario>,\n): LoopProvenanceRecord {\n const integrity = summarizeBackendIntegrity(args.workerRecords)\n const models = [...new Set(args.workerRecords.map((r) => r.model))].sort()\n\n const candidates: LoopProvenanceCandidate[] = []\n for (const gen of args.generations) {\n const promotedSet = new Set(gen.promoted)\n const surfaceByHash = new Map(gen.surfaces.map((s) => [s.surfaceHash, s.surface]))\n for (const c of gen.candidates) {\n const surface = surfaceByHash.get(c.surfaceHash)\n const entry: LoopProvenanceCandidate = {\n generation: gen.generationIndex,\n surfaceHash: c.surfaceHash,\n contentHash:\n surface !== undefined ? surfaceContentHash(surface) : `sha256:${c.surfaceHash}`,\n composite: c.composite,\n promoted: promotedSet.has(c.surfaceHash),\n }\n if (c.label) entry.label = c.label\n if (c.rationale) entry.rationale = c.rationale\n candidates.push(entry)\n }\n }\n\n const baselineHoldoutComposite = meanHoldoutComposite(args.baselineOnHoldout)\n const winnerHoldoutComposite = meanHoldoutComposite(args.winnerOnHoldout)\n\n const record: LoopProvenanceRecord = {\n schema: 'tangle.loop-provenance.v1',\n runId: args.runId,\n runDir: args.runDir,\n timestamp: args.timestamp,\n baselineContentHash: surfaceContentHash(args.baselineSurface),\n winnerContentHash: surfaceContentHash(args.winnerSurface),\n diff: args.diff,\n candidates,\n gate: {\n decision: args.gate.decision,\n reasons: args.gate.reasons,\n delta: args.gate.delta,\n contributingGates: args.gate.contributingGates.map((g) => ({\n name: g.name,\n passed: g.passed,\n })),\n },\n baselineHoldoutComposite,\n winnerHoldoutComposite,\n heldOutLift: winnerHoldoutComposite - baselineHoldoutComposite,\n backend: {\n verdict: integrity.verdict,\n workerCallCount: integrity.totalRecords,\n models,\n totalInputTokens: integrity.totalInputTokens,\n totalOutputTokens: integrity.totalOutputTokens,\n totalCostUsd: integrity.totalCostUsd,\n },\n totalCostUsd: args.totalCostUsd,\n totalDurationMs: args.totalDurationMs,\n }\n if (args.winnerLabel) record.winnerLabel = args.winnerLabel\n if (args.winnerRationale) record.winnerRationale = args.winnerRationale\n return record\n}\n\n// ── OTel span emission ──────────────────────────────────────────────────\n\nconst DECISION_OK: GateDecision[] = ['ship']\n\nfunction hashId(parts: string[]): string {\n return createHash('sha256').update(parts.join(':')).digest('hex')\n}\n\nfunction gateStatus(decision: GateDecision): { code: 'OK' | 'ERROR' | 'UNSET'; message?: string } {\n return DECISION_OK.includes(decision)\n ? { code: 'OK' }\n : { code: 'ERROR', message: `gate decision: ${decision}` }\n}\n\n/**\n * Build the loop's OTLP-ingestable spans from a provenance record. One root\n * span per loop (`tangle.runId`), one span per generation, one span per\n * candidate (carrying its surfaceHash + label), and one span for the gate\n * decision (carrying reasons + delta + lift). Candidate + gate spans pivot on\n * the same `tangle.runId` / `tangle.generation` attributes `/adapters/otel`\n * reads, so the hosted collector reconstructs the full tree.\n *\n * Times are synthesized monotonically off a single base so the span tree is\n * orderable; the substrate does not retain per-candidate wall-clock starts.\n */\nexport function loopProvenanceSpans(\n record: LoopProvenanceRecord,\n opts: { baseTimeMs?: number } = {},\n): TraceSpanEvent[] {\n const traceId = hashId(['trace', record.runId]).slice(0, 32)\n const baseNano = (opts.baseTimeMs ?? (Date.parse(record.timestamp) || Date.now())) * 1_000_000\n const endNano = baseNano + Math.max(1, record.totalDurationMs) * 1_000_000\n const spans: TraceSpanEvent[] = []\n\n const rootSpanId = hashId(['root', record.runId]).slice(0, 16)\n spans.push({\n traceId,\n spanId: rootSpanId,\n name: 'improvement-loop',\n startTimeUnixNano: baseNano,\n endTimeUnixNano: endNano,\n attributes: {\n 'tangle.runId': record.runId,\n 'tangle.runDir': record.runDir,\n 'tangle.baselineContentHash': record.baselineContentHash,\n 'tangle.winnerContentHash': record.winnerContentHash,\n 'tangle.heldOutLift': record.heldOutLift,\n 'tangle.gateDecision': record.gate.decision,\n 'tangle.backendVerdict': record.backend.verdict,\n 'tangle.workerCallCount': record.backend.workerCallCount,\n 'tangle.totalCostUsd': record.totalCostUsd,\n },\n status: gateStatus(record.gate.decision),\n 'tangle.runId': record.runId,\n })\n\n // Group candidates by generation for the per-generation parent span.\n const byGen = new Map<number, LoopProvenanceCandidate[]>()\n for (const c of record.candidates) {\n const arr = byGen.get(c.generation) ?? []\n arr.push(c)\n byGen.set(c.generation, arr)\n }\n for (const [generation, cands] of [...byGen.entries()].sort((a, b) => a[0] - b[0])) {\n const genSpanId = hashId(['gen', record.runId, String(generation)]).slice(0, 16)\n const bestComposite = cands.reduce((m, c) => Math.max(m, c.composite), 0)\n spans.push({\n traceId,\n spanId: genSpanId,\n parentSpanId: rootSpanId,\n name: `generation-${generation}`,\n startTimeUnixNano: baseNano,\n endTimeUnixNano: endNano,\n attributes: {\n 'tangle.runId': record.runId,\n 'tangle.generation': generation,\n 'tangle.populationSize': cands.length,\n 'tangle.bestComposite': bestComposite,\n },\n 'tangle.runId': record.runId,\n 'tangle.generation': generation,\n })\n for (let i = 0; i < cands.length; i++) {\n const c = cands[i]!\n const candSpanId = hashId(['cand', record.runId, String(generation), c.surfaceHash]).slice(\n 0,\n 16,\n )\n const attributes: TraceSpanEvent['attributes'] = {\n 'tangle.runId': record.runId,\n 'tangle.generation': generation,\n 'tangle.surfaceHash': c.surfaceHash,\n 'tangle.contentHash': c.contentHash,\n 'tangle.composite': c.composite,\n 'tangle.promoted': c.promoted,\n }\n if (c.label) attributes['tangle.candidateLabel'] = c.label\n if (c.rationale) attributes['tangle.candidateRationale'] = c.rationale\n spans.push({\n traceId,\n spanId: candSpanId,\n parentSpanId: genSpanId,\n name: `candidate-${c.surfaceHash}`,\n startTimeUnixNano: baseNano,\n endTimeUnixNano: endNano,\n attributes,\n 'tangle.runId': record.runId,\n 'tangle.generation': generation,\n })\n }\n }\n\n // Gate span — child of root, carries the decision/reasons/delta the audit\n // needs and pivots back to the run.\n const gateSpanId = hashId(['gate', record.runId]).slice(0, 16)\n spans.push({\n traceId,\n spanId: gateSpanId,\n parentSpanId: rootSpanId,\n name: 'gate-decision',\n startTimeUnixNano: endNano,\n endTimeUnixNano: endNano,\n attributes: {\n 'tangle.runId': record.runId,\n 'tangle.gateDecision': record.gate.decision,\n 'tangle.gateDelta': record.gate.delta ?? record.heldOutLift,\n 'tangle.gateReasons': JSON.stringify(record.gate.reasons),\n 'tangle.heldOutLift': record.heldOutLift,\n 'tangle.baselineHoldoutComposite': record.baselineHoldoutComposite,\n 'tangle.winnerHoldoutComposite': record.winnerHoldoutComposite,\n },\n status: gateStatus(record.gate.decision),\n 'tangle.runId': record.runId,\n })\n\n return spans\n}\n\n// ── Durable emission ─────────────────────────────────────────────────────\n\n/** Canonical durable paths under the run dir. */\nexport function provenanceRecordPath(runDir: string): string {\n return join(runDir, 'loop-provenance.json')\n}\nexport function provenanceSpansPath(runDir: string): string {\n return join(runDir, 'loop-provenance-spans.jsonl')\n}\n\nexport interface EmitLoopProvenanceResult {\n record: LoopProvenanceRecord\n spans: TraceSpanEvent[]\n /** Absolute paths the record + spans were written to, when storage persists. */\n recordPath: string\n spansPath: string\n}\n\nexport interface EmitLoopProvenanceArgs<TArtifact, TScenario extends Scenario>\n extends BuildLoopProvenanceArgs<TArtifact, TScenario> {\n /** Storage the record + spans are written through. */\n storage: CampaignStorage\n /** When set, the spans are also shipped to the hosted `/v1/ingest/traces`\n * endpoint so the collector receives the full loop, not just `cost.*`. */\n hostedClient?: HostedClient\n}\n\n/**\n * Build the provenance record + OTel spans and persist them durably under the\n * run dir (and ship spans to a hosted collector when one is wired). Returns\n * both artifacts so the caller can assert on / re-derive from them.\n *\n * Fail-loud: the durable write throws on storage failure (a swallowed write is\n * exactly the \"emitted but lost\" failure this closes). The hosted span ship is\n * the one best-effort leg — its failure is logged, not thrown, so an offline\n * collector never fails the loop (the durable artifact is the source of truth).\n */\nexport async function emitLoopProvenance<TArtifact, TScenario extends Scenario>(\n args: EmitLoopProvenanceArgs<TArtifact, TScenario>,\n): Promise<EmitLoopProvenanceResult> {\n const record = buildLoopProvenanceRecord(args)\n const spans = loopProvenanceSpans(record)\n\n args.storage.ensureDir(args.runDir)\n const recordPath = provenanceRecordPath(args.runDir)\n const spansPath = provenanceSpansPath(args.runDir)\n args.storage.write(recordPath, JSON.stringify(record, null, 2))\n args.storage.write(spansPath, spans.map((s) => JSON.stringify(s)).join('\\n'))\n\n if (args.hostedClient) {\n try {\n await args.hostedClient.ingestTraces(spans)\n } catch (err) {\n const msg = err instanceof Error ? err.message : String(err)\n // eslint-disable-next-line no-console -- intentional: hosted span ship is best-effort\n console.warn(`[agent-eval] provenance span ingest failed (continuing): ${msg}`)\n }\n }\n\n return { record, spans, recordPath, spansPath }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;AAcA,SAAS,gBAAgB;AACzB,SAAS,qBAAqB;AAC9B,SAAS,cAAc;AACvB,SAAS,YAAY;AAiCd,SAAS,WACd,SACkB;AAClB,MAAI,QAAQ,KAAK,aAAa,QAAQ;AACpC,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,QAAQ,qBAAqB,QAAQ,KAAK,QAAQ;AAAA,IACpD;AAAA,EACF;AAEA,QAAM,SAAS,QAAQ,UAAU,CAAC,QAAQ,IAAI;AAC9C,QAAM,SAAS,QAAQ,UAAU,QAAQ,QAAQ,OAAO,aAAa,MAAM,GAAG,EAAE,CAAC;AACjF,QAAM,QACJ,QAAQ,SAAS,kBAAkB,QAAQ,OAAO,aAAa,MAAM,GAAG,CAAC,CAAC;AAE5E,QAAM,OAAO,aAAa,QAAQ,QAAQ,QAAQ,MAAM,QAAQ,YAAY;AAC5E,QAAM,WAAW,KAAK,OAAO,GAAG,gBAAgB,KAAK,IAAI,CAAC,KAAK;AAC/D,gBAAc,UAAU,IAAI;AAE5B,MAAI,QAAQ;AACV,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,QAAQ,0DAA0D,QAAQ,OAAO,IAAI,QAAQ,MAAM,WAAW,MAAM,aAAa,QAAQ;AAAA,IAC3I;AAAA,EACF;AAEA,QAAM,SAAS,QAAQ,UAAU;AACjC,QAAM,SAAS,OAAO;AAAA,IACpB;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG,QAAQ,OAAO,IAAI,QAAQ,MAAM;AAAA,IACpC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACD,MAAI,OAAO,WAAW,GAAG;AACvB,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,QAAQ,6BAA6B,OAAO,MAAM,MAAM,OAAO,OAAO,MAAM,GAAG,GAAG,CAAC;AAAA,IACrF;AAAA,EACF;AACA,QAAM,QAAQ,OAAO,OAAO,KAAK;AACjC,SAAO,EAAE,QAAQ,MAAM,OAAO,QAAQ,OAAO,QAAQ,YAAY;AACnE;AAEA,SAAS,aACP,QACA,MACA,MACQ;AACR,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,kDAAkD;AAC7D,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,mBAAmB,OAAO,YAAY,IAAI;AACrD,QAAM,KAAK,aAAa,OAAO,IAAI,EAAE;AACrC,QAAM,KAAK,iBAAiB,KAAK,MAAM,OAAO,aAAa,GAAI,CAAC,GAAG;AACnE,QAAM;AAAA,IACJ,uBAAuB,OAAO,WAAW,aAAa,YAAY,OAAO,WAAW,WAAW,aAAa,OAAO,WAAW,YAAY,YAAY,OAAO,WAAW,WAAW;AAAA,EACrL;AACA,QAAM,KAAK,qBAAqB,OAAO,WAAW,aAAa,QAAQ,CAAC,CAAC,EAAE;AAC3E,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,uBAAuB,KAAK,QAAQ,IAAI;AACnD,QAAM,KAAK,EAAE;AACb,aAAW,UAAU,KAAK,QAAS,OAAM,KAAK,KAAK,MAAM,EAAE;AAC3D,MAAI,KAAK,UAAU,OAAW,OAAM,KAAK,YAAY,KAAK,MAAM,QAAQ,CAAC,CAAC,EAAE;AAC5E,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,wBAAwB;AACnC,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,4BAA4B;AACvC,QAAM,KAAK,eAAe;AAC1B,aAAW,KAAK,KAAK,mBAAmB;AACtC,UAAM,SACJ,OAAO,EAAE,WAAW,WAChB,KAAK,UAAU,EAAE,MAAM,EAAE,MAAM,GAAG,EAAE,IACpC,OAAO,EAAE,MAAM,EAAE,MAAM,GAAG,EAAE;AAClC,UAAM,KAAK,KAAK,EAAE,IAAI,MAAM,EAAE,SAAS,WAAM,QAAG,MAAM,MAAM,IAAI;AAAA,EAClE;AACA,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,sBAAsB;AACjC,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,SAAS;AACpB,QAAM,KAAK,KAAK,MAAM,GAAG,GAAI,CAAC;AAC9B,QAAM,KAAK,KAAK;AAChB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,yBAAyB;AACpC,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,6BAA6B;AACxC,QAAM,KAAK,mBAAmB;AAC9B,aAAW,CAAC,MAAM,GAAG,KAAK,OAAO,QAAQ,OAAO,WAAW,OAAO,GAAG;AACnE,UAAM;AAAA,MACJ,KAAK,IAAI,MAAM,IAAI,KAAK,QAAQ,CAAC,CAAC,OAAO,IAAI,KAAK,CAAC,EAAE,QAAQ,CAAC,CAAC,KAAK,IAAI,KAAK,CAAC,EAAE,QAAQ,CAAC,CAAC,OAAO,IAAI,CAAC;AAAA,IACxG;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEA,SAAS,cAAc,MAAoE;AACzF,MAAI;AACF,UAAM,SAAS,SAAS,MAAM,KAAK,IAAI,QAAQ,EAAE,KAAK,GAAG,CAAC,IAAI;AAAA,MAC5D,KAAK,EAAE,GAAG,QAAQ,KAAK,UAAU,QAAQ,IAAI,oBAAoB,QAAQ,IAAI,YAAY,GAAG;AAAA,MAC5F,OAAO,CAAC,UAAU,QAAQ,MAAM;AAAA,IAClC,CAAC,EAAE,SAAS,MAAM;AAClB,WAAO,EAAE,QAAQ,QAAQ,IAAI,QAAQ,EAAE;AAAA,EACzC,SAAS,KAAK;AACZ,UAAM,IAAI;AACV,WAAO;AAAA,MACL,QAAQ,EAAE,QAAQ,SAAS,MAAM,KAAK;AAAA,MACtC,QAAQ,EAAE,QAAQ,SAAS,MAAM,KAAK;AAAA,MACtC,QAAQ,EAAE,UAAU;AAAA,IACtB;AAAA,EACF;AACF;AAEA,SAAS,SAAS,KAAqB;AACrC,MAAI,wBAAwB,KAAK,GAAG,EAAG,QAAO;AAC9C,SAAO,IAAI,IAAI,QAAQ,MAAM,KAAK,CAAC;AACrC;;;ACrJO,SAAS,mBACd,MAC8B;AAC9B,SAAO;AAAA,IACL,MAAM,gBAAgB,KAAK,QAAQ,IAAI;AAAA,IACvC,MAAM,QAAQ,EAAE,gBAAgB,UAAU,gBAAgB,OAAO,GAAG;AAClE,aAAO,KAAK,QAAQ,OAAO;AAAA,QACzB,UAAU,SAAS,SAAS,IAAI,WAAY,KAAK,YAAY,CAAC;AAAA,QAC9D;AAAA,QACA;AAAA,QACA;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AACF;;;ACQA,IAAM,oBACJ;AAIF,IAAM,iBACJ;AAqDK,SAAS,WAAW,MAA4C;AACrE,QAAM,YAAY,KAAK,aAAa;AACpC,QAAM,iBAAiB,KAAK,kBAAkB;AAC9C,QAAM,oBAAoB,KAAK,qBAAqB;AACpD,MAAI,kBAAkB,oBAAoB,GAAG;AAC3C,UAAM,IAAI,MAAM,2EAA2E;AAAA,EAC7F;AACA,SAAO;AAAA,IACL,MAAM;AAAA,IACN,MAAM,QAAQ,KAAmD;AAC/D,YAAM,SACJ,OAAO,IAAI,mBAAmB,WAC1B,IAAI,iBACJ,KAAK,UAAU,IAAI,cAAc;AAIvC,YAAM,cAAc,KAAK;AACzB,YAAM,mBACJ,aAAa,qBAAqB,SAC9B,YAAY,iBAAiB,WAAW,IACtC,kBAAkB,MAAM,IACxB,YAAY,mBACd;AACN,YAAM,WAAW,aAAa;AAC9B,YAAM,MAA2B,CAAC;AAClC,YAAM,OAAO,oBAAI,IAAY;AAC7B,YAAM,SAAS,CAAC,SAAkB,OAAe,cAA4B;AAC3E,cAAM,OAAO,OAAO,YAAY,WAAW,QAAQ,KAAK,IAAI;AAC5D,YAAI,CAAC,QAAQ,SAAS,UAAU,KAAK,IAAI,IAAI,EAAG;AAChD,YAAI,oBAAoB,CAAC,0BAA0B,MAAM,gBAAgB,EAAG;AAC5E,YAAI,aAAa,UAAa,mBAAmB,QAAQ,IAAI,IAAI,WAAW,EAAG;AAC/E,aAAK,IAAI,IAAI;AAGb,YAAI,KAAK,EAAE,SAAS,MAAM,OAAO,UAAU,CAAC;AAAA,MAC9C;AAMA,YAAM,iBAAiB,iBAAkB,IAAI,iBAAiB,CAAC,IAAK,CAAC,GAClE,OAAO,CAAC,MAA2C,OAAO,EAAE,YAAY,QAAQ,EAChF,KAAK,CAAC,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS,EACxC,MAAM,GAAG,iBAAiB;AAC7B,UAAI,cAAc,SAAS,GAAG;AAC5B,cAAM,gBAAgB,mBAAmB;AAAA,UACvC,QAAQ,KAAK;AAAA,UACb,SAAS;AAAA,UACT;AAAA,QACF,CAAC;AACD,cAAM,gBAAgB,MAAM;AAAA,UAC1B;AAAA,YACE,OAAO,KAAK;AAAA,YACZ,UAAU;AAAA,cACR,EAAE,MAAM,UAAU,SAAS,eAAe;AAAA,cAC1C,EAAE,MAAM,QAAQ,SAAS,cAAc;AAAA,YACzC;AAAA,YACA,UAAU;AAAA,YACV,aAAa,KAAK,eAAe;AAAA,YACjC,WAAW,KAAK,aAAa;AAAA,UAC/B;AAAA,UACA,KAAK;AAAA,QACP;AACA,cAAM,SAAS,wBAAwB,cAAc,SAAS,CAAC,EAAE,CAAC;AAClE,YAAI,QAAQ;AACV;AAAA,YACE,OAAO;AAAA,YACP,OAAO,SAAS;AAAA,YAChB,OAAO,aACL,YAAY,cAAc,MAAM,+BAA+B,cAC5D,IAAI,CAAC,MAAM,EAAE,UAAU,EACvB,KAAK,GAAG,CAAC;AAAA,UAChB;AAAA,QACF;AAAA,MACF;AAGA,YAAM,eAAe,KAAK,IAAI,GAAG,IAAI,iBAAiB,IAAI,MAAM;AAChE,UAAI,eAAe,GAAG;AACpB,cAAM,EAAE,KAAK,QAAQ,OAAO,IAAI,cAAc,KAAK,WAAW,KAAK,MAAM;AACzE,cAAM,aAAa,sBAAsB;AAAA,UACvC;AAAA,UACA,eAAe;AAAA,UACf,WAAW;AAAA,UACX,cAAc;AAAA,UACd,YAAY;AAAA,UACZ,oBAAoB,KAAK;AAAA,QAC3B,CAAC;AACD,cAAM,SAAS,MAAM;AAAA,UACnB;AAAA,YACE,OAAO,KAAK;AAAA,YACZ,UAAU;AAAA,cACR,EAAE,MAAM,UAAU,SAAS,kBAAkB;AAAA,cAC7C,EAAE,MAAM,QAAQ,SAAS,WAAW;AAAA,YACtC;AAAA,YACA,UAAU;AAAA,YACV,aAAa,KAAK,eAAe;AAAA,YACjC,WAAW,KAAK,aAAa;AAAA,UAC/B;AAAA,UACA,KAAK;AAAA,QACP;AACA,mBAAW,YAAY,wBAAwB,OAAO,SAAS,YAAY,GAAG;AAC5E,iBAAO,SAAS,SAAS,SAAS,OAAO,SAAS,SAAS;AAAA,QAC7D;AAAA,MACF;AAEA,aAAO,IAAI,MAAM,GAAG,IAAI,cAAc;AAAA,IACxC;AAAA,EACF;AACF;AAKA,SAAS,mBAAmB,MAIjB;AACT,QAAM,QAAkB;AAAA,IACtB,mBAAmB,KAAK,QAAQ,MAAM,iBAAiB,KAAK,MAAM;AAAA,IAClE;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,OAAK,QAAQ,QAAQ,CAAC,GAAG,MAAM;AAC7B,UAAM,MAAM,OAAO,aAAa,KAAK,CAAC;AACtC,UAAM,OAAO,OAAO,QAAQ,EAAE,UAAU,EACrC,KAAK,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,EAC1B,MAAM,GAAG,KAAK,SAAS,EACvB,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,GAAG,EAAE,KAAK,MAAM,QAAQ,CAAC,CAAC,GAAG;AACrD,UAAM;AAAA,MACJ,eAAe,GAAG,UAAU,EAAE,UAAU,QAAQ,CAAC,CAAC,mBAChD,KAAK,KAAK,IAAI,KAAK,KACrB;AAAA,MACA;AAAA,MACA,EAAE;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAAA,EACF,CAAC;AACD,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAIO,SAAS,kBAAkB,MAAwB;AACxD,QAAM,MAAgB,CAAC;AACvB,aAAW,QAAQ,KAAK,MAAM,IAAI,GAAG;AACnC,UAAM,QAAQ,kBAAkB,KAAK,IAAI;AACzC,QAAI,MAAO,KAAI,KAAK,MAAM,CAAC,CAAE;AAAA,EAC/B;AACA,SAAO;AACT;AAKO,SAAS,mBAAmB,UAAkB,WAA2B;AAC9E,QAAM,OAAO,CAAC,MACZ,EACG,MAAM,mBAAmB,EACzB,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,EACnB,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC;AAC/B,QAAM,IAAI,IAAI,IAAI,KAAK,QAAQ,CAAC;AAChC,QAAM,IAAI,IAAI,IAAI,KAAK,SAAS,CAAC;AACjC,MAAI,QAAQ;AACZ,aAAW,KAAK,EAAG,KAAI,CAAC,EAAE,IAAI,CAAC,EAAG;AAClC,aAAW,KAAK,EAAG,KAAI,CAAC,EAAE,IAAI,CAAC,EAAG;AAClC,SAAO;AACT;AAEA,SAAS,0BAA0B,WAAmB,UAAsC;AAC1F,MAAI,SAAS,WAAW,EAAG,QAAO;AAClC,QAAM,OAAO,IAAI,IAAI,kBAAkB,SAAS,CAAC;AACjD,aAAW,WAAW,UAAU;AAC9B,QAAI,CAAC,KAAK,IAAI,OAAO,EAAG,QAAO;AAAA,EACjC;AACA,SAAO;AACT;AAKA,SAAS,cACP,KACA,WACA,YAC6D;AAC7D,QAAM,OAAO,IAAI,QAAQ,GAAG,EAAE;AAC9B,MAAI,CAAC,QAAQ,KAAK,WAAW,WAAW,GAAG;AACzC,WAAO,EAAE,KAAK,CAAC,GAAG,QAAQ,CAAC,GAAG,QAAQ,WAAW;AAAA,EACnD;AACA,QAAM,OAAO,CAAC,GAAG,KAAK,UAAU,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS,EAAE,CAAC;AAC7E,MAAI,CAAC,KAAM,QAAO,EAAE,KAAK,CAAC,GAAG,QAAQ,CAAC,GAAG,QAAQ,WAAW;AAE5D,QAAM,UAAU,CAAC,GAAG,KAAK,SAAS,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS;AAC5E,QAAM,UAAU,CAAC,OAA8D;AAAA,IAC7E,IAAI,EAAE;AAAA,IACN,OAAO,EAAE;AAAA,EACX;AACA,QAAM,MAAM,QAAQ,MAAM,GAAG,SAAS,EAAE,IAAI,OAAO;AACnD,QAAM,SAAS,QAAQ,MAAM,CAAC,SAAS,EAAE,QAAQ,EAAE,IAAI,OAAO;AAE9D,QAAM,UAAU,OAAO,QAAQ,KAAK,UAAU,EAC3C,KAAK,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,EAC1B,MAAM,GAAG,CAAC,EACV,IAAI,CAAC,CAAC,KAAK,KAAK,MAAM,GAAG,GAAG,KAAK,MAAM,QAAQ,CAAC,CAAC,GAAG;AACvD,QAAM,SACJ,QAAQ,SAAS,IAAI,GAAG,UAAU,+BAA0B,QAAQ,KAAK,IAAI,CAAC,KAAK;AAErF,SAAO,EAAE,KAAK,QAAQ,OAAO;AAC/B;;;ACzTO,SAAS,eACX,OACyB;AAC5B,MAAI,MAAM,WAAW,GAAG;AACtB,UAAM,IAAI,MAAM,wCAAwC;AAAA,EAC1D;AACA,SAAO;AAAA,IACL,MAAM,YAAY,MAAM,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,GAAG,CAAC;AAAA,IACpD,MAAM,OAAO,KAA6D;AACxE,YAAM,UAAwE,CAAC;AAC/E,iBAAW,QAAQ,OAAO;AACxB,cAAM,MAAM,MAAM,KAAK,OAAO,GAAG;AACjC,gBAAQ,KAAK,EAAE,MAAM,IAAI,CAAC;AAAA,MAC5B;AAQA,YAAM,YAAY,QAAQ,IAAI,CAAC,MAAM,EAAE,IAAI,QAAQ;AACnD,YAAM,UAAwB,UAAU,MAAM,CAAC,MAAM,MAAM,MAAM,IAC7D,SACA,UAAU,SAAS,cAAc,IAC/B,iBACA,UAAU,SAAS,eAAe,IAChC,kBACA,UAAU,SAAS,MAAM,IACvB,SACA;AAEV,YAAM,eAAe,QAAQ;AAAA,QAAQ,CAAC,MACpC,EAAE,IAAI,kBAAkB,SAAS,IAC7B,EAAE,IAAI,oBACN,CAAC,EAAE,MAAM,EAAE,KAAK,MAAM,QAAQ,EAAE,IAAI,aAAa,QAAQ,QAAQ,EAAE,IAAI,CAAC;AAAA,MAC9E;AAEA,YAAM,UAAU,QAAQ;AAAA,QAAQ,CAAC,MAC/B,EAAE,IAAI,QAAQ,IAAI,CAAC,WAAW,IAAI,EAAE,KAAK,IAAI,KAAK,MAAM,EAAE;AAAA,MAC5D;AAEA,aAAO;AAAA,QACL,UAAU;AAAA,QACV;AAAA,QACA,mBAAmB;AAAA,QACnB,OAAO,QAAQ,CAAC,GAAG,IAAI;AAAA,MACzB;AAAA,IACF;AAAA,EACF;AACF;;;ACpBO,SAAS,sBACd,SAC4B;AAC5B,QAAM,iBAAiB,QAAQ,kBAAkB;AACjD,QAAM,gBAAgB,QAAQ,8BAA8B;AAE5D,SAAO;AAAA,IACL,MAAM;AAAA,IACN,MAAM,OAAO,KAA6D;AACxE,YAAM,UAAoB,CAAC;AAC3B,YAAM,eAA0E,CAAC;AAKjF,YAAM,oBAAoB;AAAA,QACxB,IAAI;AAAA,QACJ,IAAI,uBAAuB,IAAI;AAAA,QAC/B,QAAQ;AAAA,MACV;AACA,YAAM,qBAAqB;AAAA,QACzB,IAAI;AAAA,QACJ,IAAI;AAAA,QACJ,QAAQ;AAAA,MACV;AACA,YAAM,QAAQ,qBAAqB;AACnC,YAAM,cAAc,SAAS;AAC7B,mBAAa,KAAK;AAAA,QAChB,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,QAAQ,EAAE,mBAAmB,oBAAoB,OAAO,eAAe;AAAA,MACzE,CAAC;AACD,UAAI,CAAC,aAAa;AAChB,gBAAQ,KAAK,iBAAiB,MAAM,QAAQ,CAAC,CAAC,gBAAgB,cAAc,EAAE;AAAA,MAChF;AAGA,YAAM,aACJ,QAAQ,cAAc,UACtB,IAAI,KAAK,YAAY,IAAI,KAAK,YAAY,QAAQ;AACpD,mBAAa,KAAK;AAAA,QAChB,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,QAAQ;AAAA,UACN,cAAc,IAAI,KAAK;AAAA,UACvB,aAAa,IAAI,KAAK;AAAA,UACtB,WAAW,QAAQ;AAAA,QACrB;AAAA,MACF,CAAC;AACD,UAAI,CAAC,YAAY;AACf,gBAAQ;AAAA,UACN,UAAU,IAAI,KAAK,YAAY,IAAI,KAAK,UAAU,QAAQ,CAAC,CAAC,aAAa,QAAQ,SAAS;AAAA,QAC5F;AAAA,MACF;AAGA,YAAM,kBAAkB,QAAQ,iBAC5B,aAAa,IAAI,oBAAoB,QAAQ,cAAc,IAC3D,EAAE,QAAQ,MAAM,UAAU,CAAC,EAAE;AACjC,mBAAa,KAAK;AAAA,QAChB,MAAM;AAAA,QACN,QAAQ,gBAAgB;AAAA,QACxB,QAAQ;AAAA,UACN,UAAU,gBAAgB,SAAS;AAAA,UACnC,QAAQ,gBAAgB,SAAS,MAAM,GAAG,CAAC;AAAA,QAC7C;AAAA,MACF,CAAC;AACD,UAAI,CAAC,gBAAgB,QAAQ;AAC3B,gBAAQ,KAAK,0BAA0B,gBAAgB,SAAS,MAAM,YAAY;AAAA,MACpF;AAGA,UAAI,sBAAkD;AACtD,UAAI,QAAQ,cAAc,QAAQ,WAAW,UAAU,IAAI;AACzD,8BAAsB,oBAAoB,EAAE,MAAM,QAAQ,WAAW,CAAC;AAAA,MACxE;AAIA,YAAM,kBAAkB;AACxB,YAAM,kBAAkB,qBAAqB,YAAY,CAAC,GAAG;AAAA,QAC3D,CAAC,MAAM,EAAE,YAAY;AAAA,MACvB;AACA,YAAM,oBACJ,CAAC,uBACD,CAAC,iBACA,eAAe,WAAW,KAAK,oBAAoB,YAAY;AAClE,mBAAa,KAAK;AAAA,QAChB,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,QAAQ,EAAE,QAAQ,qBAAqB,oBAAoB,eAAe,OAAO;AAAA,MACnF,CAAC;AACD,UAAI,CAAC,mBAAmB;AACtB,gBAAQ;AAAA,UACN,mCAAmC,eAAe,MAAM,sCAAsC,oBAAqB,OAAO;AAAA,QAC5H;AAAA,MACF;AAGA,UAAI,eAAoC;AACxC,UAAI,QAAQ,cAAc,QAAQ,WAAW,UAAU,IAAI;AACzD,uBAAe,YAAY,QAAQ,YAAY,CAAC,CAAC;AAAA,MACnD;AAEA,YAAM,eAAe,cAAc,UAAU,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,aAAa,OAAO;AACrF,YAAM,aAAa,YAAY,WAAW;AAC1C,mBAAa,KAAK;AAAA,QAChB,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,QAAQ,EAAE,aAAa,cAAc,OAAO,UAAU,GAAG,aAAa,YAAY,OAAO;AAAA,MAC3F,CAAC;AACD,UAAI,CAAC,YAAY;AACf,gBAAQ,KAAK,wBAAwB,YAAY,MAAM,EAAE;AAAA,MAC3D;AAGA,YAAM,YAAY,aAAa,MAAM,CAAC,MAAM,EAAE,MAAM;AACpD,YAAM,WAAW,YAAY,SAAS;AAEtC,aAAO;AAAA,QACL;AAAA,QACA,SAAS,QAAQ,SAAS,IAAI,UAAU,CAAC,kBAAkB;AAAA,QAC3D,mBAAmB;AAAA,QACnB;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,cACP,WACA,mBACA,WACQ;AACR,MAAI,CAAC,aAAa,UAAU,SAAS,EAAG,QAAO;AAC/C,QAAM,cAAc,IAAI,IAAI,UAAU,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;AACtD,QAAM,aAAuB,CAAC;AAC9B,aAAW,CAAC,QAAQ,MAAM,KAAK,mBAAmB;AAChD,UAAM,aAAa,OAAO,MAAM,GAAG,EAAE,CAAC,KAAK;AAC3C,QAAI,CAAC,YAAY,IAAI,UAAU,EAAG;AAClC,UAAM,iBAAiB,OAAO,OAAO,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS;AACnE,QAAI,eAAe,WAAW,EAAG;AACjC,eAAW,KAAK,eAAe,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,eAAe,MAAM;AAAA,EACnF;AACA,MAAI,WAAW,WAAW,EAAG,QAAO;AACpC,SAAO,WAAW,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,WAAW;AAC5D;AAEA,SAAS,aACP,WACA,SAC8E;AAC9E,QAAM,WAA0D,CAAC;AACjE,aAAW,CAAC,SAAS,QAAQ,KAAK,WAAW;AAC3C,UAAM,OAAO,YAAY,QAAQ;AACjC,QAAI,SAAS,OAAW;AACxB,eAAW,UAAU,SAAS;AAC5B,YAAM,UAAU,mBAAmB,MAAM,CAAC,GAAG,MAAM;AACnD,UAAI,CAAC,QAAQ,QAAQ;AACnB,iBAAS,KAAK,EAAE,YAAY,OAAO,IAAI,QAAQ,QAAQ,UAAU,wBAAwB,CAAC;AAAA,MAC5F;AAAA,IACF;AAAA,EACF;AACA,SAAO,EAAE,QAAQ,SAAS,WAAW,GAAG,SAAS;AACnD;AAEA,SAAS,YAAY,UAAuC;AAC1D,MAAI,OAAO,aAAa,SAAU,QAAO;AACzC,MAAI,YAAY,OAAO,aAAa,UAAU;AAC5C,UAAM,MAAM;AACZ,QAAI,OAAO,IAAI,SAAS,SAAU,QAAO,IAAI;AAC7C,QAAI,OAAO,IAAI,WAAW,SAAU,QAAO,IAAI;AAC/C,QAAI,OAAO,IAAI,YAAY,SAAU,QAAO,IAAI;AAAA,EAClD;AACA,SAAO;AACT;;;AC5MO,SAAS,YACd,SAC4B;AAC5B,QAAM,iBAAiB,QAAQ,kBAAkB;AACjD,SAAO;AAAA,IACL,MAAM;AAAA,IACN,MAAM,OAAO,KAA6D;AACxE,YAAM,cAAc,IAAI,IAAI,QAAQ,UAAU,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;AAG9D,YAAM,WAAW,iBAAiB,IAAI,uBAAuB,IAAI,aAAa,WAAW;AACzF,YAAM,YAAY,iBAAiB,IAAI,aAAa,WAAW;AAC/D,YAAM,QAAQ,YAAY;AAC1B,YAAM,SAAS,SAAS;AACxB,aAAO;AAAA,QACL,UAAU,SAAS,SAAS;AAAA,QAC5B,SAAS,SACL,CAAC,kBAAkB,MAAM,QAAQ,CAAC,CAAC,WAAM,cAAc,EAAE,IACzD,CAAC,kBAAkB,MAAM,QAAQ,CAAC,CAAC,MAAM,cAAc,EAAE;AAAA,QAC7D,mBAAmB;AAAA,UACjB,EAAE,MAAM,eAAe,QAAQ,QAAQ,EAAE,UAAU,WAAW,OAAO,eAAe,EAAE;AAAA,QACxF;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,iBACP,mBACA,aACQ;AACR,QAAM,aAAuB,CAAC;AAC9B,aAAW,CAAC,QAAQ,MAAM,KAAK,mBAAmB;AAChD,UAAM,aAAa,OAAO,MAAM,GAAG,EAAE,CAAC,KAAK;AAC3C,QAAI,CAAC,YAAY,IAAI,UAAU,EAAG;AAClC,UAAM,OAAO,OAAO,OAAO,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS;AACzD,QAAI,KAAK,SAAS,EAAG,YAAW,KAAK,KAAK,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM;AAAA,EACpF;AACA,SAAO,WAAW,WAAW,IAAI,IAAI,WAAW,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,WAAW;AAC1F;;;ACwGO,SAAS,oBACd,OAC4B;AAC5B,SACE,OAAO,UAAU,YACjB,UAAU,QACV,aAAa,SACb,WAAW,SACX,eAAe;AAEnB;AAoNA,IAAM,mBAA+C;AAAA,EACnD,YAAY;AAAA,EACZ,mBAAmB;AAAA,EACnB,eAAe;AACjB;AAGO,SAAS,eAAe,OAAuC;AACpE,SAAO,iBAAiB,SAAS,YAAY;AAC/C;;;ACxXO,SAAS,sBACd,UACQ;AACR,QAAM,aAAuB,CAAC;AAC9B,aAAW,QAAQ,SAAS,OAAO;AACjC,UAAM,iBAAiB,OAAO,OAAO,KAAK,WAAW,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS;AAC7E,QAAI,eAAe,SAAS,GAAG;AAC7B,iBAAW,KAAK,eAAe,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,eAAe,MAAM;AAAA,IACnF;AAAA,EACF;AACA,SAAO,WAAW,WAAW,IAAI,IAAI,WAAW,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,WAAW;AAC1F;AAWO,SAAS,kBACd,UACmB;AACnB,QAAM,UAAkC,CAAC;AACzC,QAAM,YAAoC,CAAC;AAC3C,QAAM,aAAa,oBAAI,IAAsB;AAC7C,aAAW,QAAQ,SAAS,OAAO;AACjC,UAAM,cAAc,OAAO,OAAO,KAAK,WAAW;AAClD,QAAI,YAAY,WAAW,EAAG;AAC9B,UAAM,gBAAgB,YAAY,OAAO,CAAC,GAAG,MAAM,IAAI,EAAE,WAAW,CAAC,IAAI,YAAY;AACrF,UAAM,MAAM,WAAW,IAAI,KAAK,UAAU,KAAK,CAAC;AAChD,QAAI,KAAK,aAAa;AACtB,eAAW,IAAI,KAAK,YAAY,GAAG;AACnC,eAAW,SAAS,aAAa;AAC/B,iBAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,MAAM,UAAU,GAAG;AAC3D,gBAAQ,GAAG,KAAK,QAAQ,GAAG,KAAK,KAAK;AACrC,kBAAU,GAAG,KAAK,UAAU,GAAG,KAAK,KAAK;AAAA,MAC3C;AAAA,IACF;AAAA,EACF;AACA,QAAM,aAAqC,CAAC;AAC5C,aAAW,OAAO,OAAO,KAAK,OAAO,GAAG;AACtC,UAAM,QAAQ,UAAU,GAAG,KAAK;AAChC,eAAW,GAAG,IAAI,QAAQ,KAAK,QAAQ,GAAG,KAAK,KAAK,QAAQ;AAAA,EAC9D;AACA,QAAM,YAAY,CAAC,GAAG,WAAW,QAAQ,CAAC,EAAE,IAAI,CAAC,CAAC,YAAY,KAAK,OAAO;AAAA,IACxE;AAAA,IACA,WAAW,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,MAAM;AAAA,EACtD,EAAE;AACF,SAAO,EAAE,YAAY,UAAU;AACjC;;;AC9CA,SAAS,kBAAkB;AAqE3B,eAAsB,gBACpB,MACsD;AACtD,QAAM,cAAc,KAAK,eAAe;AAGxC,QAAM,mBAAmB,MAAM,YAAkC;AAAA,IAC/D,GAAG;AAAA,IACH,UAAU,CAAC,UAAU,QAAQ,KAAK,oBAAoB,KAAK,iBAAiB,UAAU,GAAG;AAAA,IACzF,QAAQ,GAAG,KAAK,MAAM;AAAA,EACxB,CAAC;AAED,QAAM,cAA0E,CAAC;AACjF,QAAM,UAA8B,CAAC;AACrC,MAAI,kBAAoC,CAAC,KAAK,eAAe;AAC7D,MAAI,gBAAgB,KAAK;AACzB,MAAI,oBAAoB,YAAY,KAAK,eAAe;AACxD,MAAI,kBAAkB,sBAAsB,gBAAgB;AAC5D,MAAI;AACJ,MAAI;AAMJ,QAAM,SAAyB;AAAA,IAC7B,eAAe,KAAK,iBAAiB,mBAAmB,kBAAkB,EAAE;AAAA,EAC9E;AAEA,WAAS,MAAM,GAAG,MAAM,KAAK,gBAAgB,OAAO;AAElD,QAAI,KAAK,OAAO,SAAS,EAAE,QAAQ,CAAC,EAAE,KAAM;AAK5C,UAAM,gBAAgB,sBAAsB,MAAM;AAClD,UAAM,WAAW,MAAM,KAAK,OAAO,QAAQ;AAAA,MACzC,gBAAgB,gBAAgB,CAAC,KAAK,KAAK;AAAA,MAC3C;AAAA,MACA,UAAU,CAAC;AAAA,MACX,gBAAgB,KAAK;AAAA,MACrB,YAAY;AAAA,MACZ,QAAQ,IAAI,gBAAgB,EAAE;AAAA,MAC9B,QAAQ,KAAK;AAAA,MACb,SAAS,KAAK,gBAAgB,KAAK,iBAAiB,QAAQ,KAAK,eAAe;AAAA,MAChF,qBAAqB,KAAK;AAAA,MAC1B;AAAA,IACF,CAAC;AAKD,UAAM,aAAkC,SAAS;AAAA,MAAI,CAAC,MACpD,oBAAoB,CAAC,IAAI,IAAI,EAAE,SAAS,GAAG,OAAO,IAAI,WAAW,GAAG;AAAA,IACtE;AAGA,UAAM,iBAOD,CAAC;AACN,aAAS,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;AAC1C,YAAM,EAAE,SAAS,OAAO,UAAU,IAAI,WAAW,CAAC;AAClD,YAAM,OAAO,YAAY,OAAO;AAChC,YAAM,WAAW,MAAM,YAAkC;AAAA,QACvD,GAAG;AAAA,QACH,UAAU,CAAC,UAAU,QAAQ,KAAK,oBAAoB,SAAS,UAAU,GAAG;AAAA,QAC5E,QAAQ,GAAG,KAAK,MAAM,QAAQ,GAAG,cAAc,CAAC;AAAA,MAClD,CAAC;AACD,YAAM,YAAY,sBAAsB,QAAQ;AAChD,qBAAe,KAAK,EAAE,aAAa,MAAM,SAAS,OAAO,WAAW,UAAU,UAAU,CAAC;AAGzF,aAAO;AAAA,QACL,eAAe,SAAS,MAAM,UAAU,KAAK,SAAS,QAAW,aAAa,MAAS;AAAA,MACzF;AAAA,IACF;AAGA,mBAAe,KAAK,CAAC,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS;AACvD,UAAM,WAAW,eAAe,MAAM,GAAG,WAAW;AACpD,sBAAkB,SAAS,IAAI,CAAC,MAAM,EAAE,OAAO;AAC/C,UAAM,MAAM,eAAe,CAAC;AAC5B,QAAI,OAAO,IAAI,YAAY,iBAAiB;AAC1C,sBAAgB,IAAI;AACpB,0BAAoB,IAAI;AACxB,wBAAkB,IAAI;AACtB,oBAAc,IAAI,SAAS;AAC3B,wBAAkB,IAAI,aAAa;AAAA,IACrC;AAEA,UAAM,SAA2B;AAAA,MAC/B,iBAAiB;AAAA,MACjB,YAAY,eAAe,IAAI,CAAC,MAAM;AACpC,cAAM,YAAY,kBAAkB,EAAE,QAAQ;AAC9C,cAAM,YAAoD;AAAA,UACxD,aAAa,EAAE;AAAA,UACf,WAAW,EAAE;AAAA,UACb,MAAM,CAAC,EAAE,WAAW,EAAE,SAAS;AAAA,UAC/B,YAAY,UAAU;AAAA,UACtB,WAAW,UAAU;AAAA,QACvB;AACA,YAAI,EAAE,MAAO,WAAU,QAAQ,EAAE;AACjC,YAAI,EAAE,UAAW,WAAU,YAAY,EAAE;AACzC,eAAO;AAAA,MACT,CAAC;AAAA,MACD,UAAU,SAAS,IAAI,CAAC,MAAM,EAAE,WAAW;AAAA,IAC7C;AACA,YAAQ,KAAK,MAAM;AACnB,gBAAY,KAAK;AAAA,MACf;AAAA,MACA,UAAU,eAAe,IAAI,CAAC,OAAO;AAAA,QACnC,aAAa,EAAE;AAAA,QACf,SAAS,EAAE;AAAA,QACX,UAAU,EAAE;AAAA,MACd,EAAE;AAAA,IACJ,CAAC;AAAA,EACH;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,gBAAgB,sBAAsB,MAAM;AAAA,EAC9C;AACF;AAIA,SAAS,eACP,SACA,MACA,UACA,YACA,OACA,WACc;AACd,QAAM,aAAqC,CAAC;AAC5C,aAAW,EAAE,YAAY,UAAU,KAAK,kBAAkB,QAAQ,EAAE,WAAW;AAC7E,eAAW,UAAU,IAAI;AAAA,EAC3B;AACA,QAAM,SAAuB;AAAA,IAC3B;AAAA,IACA,aAAa;AAAA,IACb;AAAA,IACA,WAAW,sBAAsB,QAAQ;AAAA,IACzC;AAAA,EACF;AACA,MAAI,MAAO,QAAO,QAAQ;AAC1B,MAAI,UAAW,QAAO,YAAY;AAClC,SAAO;AACT;AAUA,SAAS,sBAAsB,QAAwC;AACrE,MAAI,OAAO,UAAU,EAAG,QAAO,CAAC,GAAG,MAAM;AACzC,QAAM,MAAM,oBAAI,IAAY;AAC5B,aAAW,KAAK,OAAQ,YAAW,MAAM,OAAO,KAAK,EAAE,UAAU,EAAG,KAAI,IAAI,EAAE;AAC9E,MAAI,IAAI,SAAS,EAAG,QAAO,CAAC,GAAG,MAAM;AACrC,QAAM,QAAgC,CAAC;AACvC,aAAW,MAAM,KAAK;AACpB,QAAI,MAAM,OAAO;AACjB,eAAW,KAAK,QAAQ;AACtB,YAAM,IAAI,EAAE,WAAW,EAAE;AACzB,UAAI,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,KAAK,IAAI,IAAK,OAAM;AAAA,IACpE;AACA,UAAM,EAAE,IAAI,OAAO,SAAS,GAAG,IAAI,MAAM;AAAA,EAC3C;AACA,QAAM,aAAwC,CAAC,GAAG,GAAG,EAAE,IAAI,CAAC,QAAQ;AAAA,IAClE,MAAM;AAAA,IACN,WAAW;AAAA,IACX,OAAO,CAAC,MAAM;AACZ,YAAM,IAAI,EAAE,WAAW,EAAE;AACzB,aAAO,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,IAAI,IAAK,MAAM,EAAE,KAAK;AAAA,IACzE;AAAA,EACF,EAAE;AACF,SAAO,eAAe,QAAQ,UAAU,EAAE;AAC5C;AAEO,SAAS,YAAY,SAAiC;AAG3D,QAAM,WACJ,OAAO,YAAY,WACf,UACA,KAAK,UAAU;AAAA,IACb,MAAM,QAAQ;AAAA,IACd,aAAa,QAAQ;AAAA,IACrB,SAAS,QAAQ,WAAW;AAAA,EAC9B,CAAC;AACP,SAAO,WAAW,QAAQ,EAAE,OAAO,QAAQ,EAAE,OAAO,KAAK,EAAE,MAAM,GAAG,EAAE;AACxE;;;AClOA,eAAsB,mBACpB,MACyD;AAGzD,MAAK,KAAa,kBAAkB,UAAU;AAC5C,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAKA,MAAI,KAAK,YAAY,SAAS,KAAK,QAAQ;AACzC,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,MAAI,KAAK,kBAAkB,SAAS,CAAC,KAAK,WAAW,CAAC,KAAK,SAAS;AAClE,UAAM,IAAI,MAAM,mEAAmE;AAAA,EACrF;AAGA,QAAM,eAAe,MAAM,gBAAgB,IAAI;AAG/C,QAAM,EAAE,aAAAA,aAAY,IAAI,MAAM,OAAO,4BAAiB;AAEtD,QAAM,oBAAoB,MAAMA,aAAkC;AAAA,IAChE,GAAG;AAAA,IACH,WAAW,KAAK;AAAA,IAChB,UAAU,CAAC,UAAU,QAAQ,KAAK,oBAAoB,KAAK,iBAAiB,UAAU,GAAG;AAAA,IACzF,QAAQ,GAAG,KAAK,MAAM;AAAA,EACxB,CAAC;AAED,QAAM,kBAAkB,MAAMA,aAAkC;AAAA,IAC9D,GAAG;AAAA,IACH,WAAW,KAAK;AAAA,IAChB,UAAU,CAAC,UAAU,QACnB,KAAK,oBAAoB,aAAa,eAAe,UAAU,GAAG;AAAA,IACpE,QAAQ,GAAG,KAAK,MAAM;AAAA,EACxB,CAAC;AAUD,QAAM,qBAAqB,oBAAI,IAAuB;AACtD,QAAM,oBAAoB,oBAAI,IAAuB;AACrD,QAAM,cAAwB,oBAAI,IAAI;AACtC,QAAM,sBAAgC,oBAAI,IAAI;AAC9C,aAAW,QAAQ,gBAAgB,OAAO;AACxC,uBAAmB,IAAI,KAAK,QAAQ,KAAK,QAAQ;AACjD,gBAAY,IAAI,KAAK,QAAQ,KAAK,WAAW;AAAA,EAC/C;AACA,aAAW,QAAQ,kBAAkB,OAAO;AAC1C,sBAAkB,IAAI,KAAK,QAAQ,KAAK,QAAQ;AAChD,wBAAoB,IAAI,KAAK,QAAQ,KAAK,WAAW;AAAA,EACvD;AAEA,QAAM,aAAa,MAAM,KAAK,KAAK,OAAO;AAAA,IACxC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,WAAW,KAAK;AAAA,IAChB,MAAM;AAAA,MACJ,WAAW,gBAAgB,WAAW;AAAA,MACtC,UAAU,kBAAkB,WAAW;AAAA,IACzC;AAAA,IACA,QAAQ,IAAI,gBAAgB,EAAE;AAAA,EAChC,CAAC;AAMD,QAAM,SAAS,KAAK,sBAAsB;AAC1C,QAAM,eACJ,aAAa,sBAAsB,YAAY,KAAK,eAAe,IAC/D,KACA,OAAO,aAAa,eAAe,KAAK,eAAe;AAE7D,MAAI;AACJ,MAAI,KAAK,kBAAkB,QAAQ,WAAW,aAAa,QAAQ;AACjE,eAAW,WAAW;AAAA,MACpB,QAAQ;AAAA,MACR,MAAM;AAAA,MACN;AAAA,MACA,SAAS,KAAK;AAAA,MACd,QAAQ,KAAK;AAAA,IACf,CAAC;AAAA,EACH;AAEA,SAAO;AAAA,IACL,GAAG;AAAA,IACH;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEO,SAAS,kBACd,eACA,iBACQ;AAGR,MAAI,OAAO,kBAAkB,YAAY,OAAO,oBAAoB,UAAU;AAC5E,UAAM,MAAM,CAAC,MACX,OAAO,MAAM,WACT,qBACA,YAAY,EAAE,WAAW,GAAG,EAAE,UAAU,SAAS,EAAE,OAAO,KAAK,EAAE,GAAG,EAAE,UAAU;AAAA,EAAK,EAAE,OAAO,KAAK,EAAE;AAC3G,WAAO;AAAA,EAAiB,IAAI,eAAe,CAAC;AAAA;AAAA,EAAiB,IAAI,aAAa,CAAC;AAAA,EACjF;AACA,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,cAAc;AACzB,QAAM,KAAK,YAAY;AACvB,aAAW,KAAK,gBAAgB,MAAM,IAAI,EAAG,OAAM,KAAK,KAAK,CAAC,EAAE;AAChE,aAAW,KAAK,cAAc,MAAM,IAAI,EAAG,OAAM,KAAK,KAAK,CAAC,EAAE;AAC9D,SAAO,MAAM,KAAK,IAAI;AACxB;;;ACnLA,eAAsB,QACpB,MAC+C;AAC/C,SAAO,YAAY,IAAI;AACzB;;;ACOA,SAAS,cAAAC,mBAAkB;AAC3B,SAAS,QAAAC,aAAY;AAad,SAAS,mBAAmB,SAAiC;AAClE,QAAM,WACJ,OAAO,YAAY,WACf,UACA,KAAK,UAAU;AAAA,IACb,MAAM,QAAQ;AAAA,IACd,aAAa,QAAQ;AAAA,IACrB,SAAS,QAAQ,WAAW;AAAA,EAC9B,CAAC;AACP,SAAO,UAAUC,YAAW,QAAQ,EAAE,OAAO,QAAQ,EAAE,OAAO,KAAK,CAAC;AACtE;AAuGA,SAAS,qBACP,UACQ;AACR,QAAM,KAAe,CAAC;AACtB,aAAW,QAAQ,SAAS,OAAO;AACjC,QAAI,KAAK,MAAO;AAChB,UAAM,KAAK,OAAO,OAAO,KAAK,WAAW,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS;AACjE,QAAI,GAAG,OAAQ,IAAG,KAAK,GAAG,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,GAAG,MAAM;AAAA,EAClE;AACA,SAAO,GAAG,SAAS,GAAG,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,GAAG,SAAS;AACjE;AAGO,SAAS,0BACd,MACsB;AACtB,QAAM,YAAY,0BAA0B,KAAK,aAAa;AAC9D,QAAM,SAAS,CAAC,GAAG,IAAI,IAAI,KAAK,cAAc,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,EAAE,KAAK;AAEzE,QAAM,aAAwC,CAAC;AAC/C,aAAW,OAAO,KAAK,aAAa;AAClC,UAAM,cAAc,IAAI,IAAI,IAAI,QAAQ;AACxC,UAAM,gBAAgB,IAAI,IAAI,IAAI,SAAS,IAAI,CAAC,MAAM,CAAC,EAAE,aAAa,EAAE,OAAO,CAAC,CAAC;AACjF,eAAW,KAAK,IAAI,YAAY;AAC9B,YAAM,UAAU,cAAc,IAAI,EAAE,WAAW;AAC/C,YAAM,QAAiC;AAAA,QACrC,YAAY,IAAI;AAAA,QAChB,aAAa,EAAE;AAAA,QACf,aACE,YAAY,SAAY,mBAAmB,OAAO,IAAI,UAAU,EAAE,WAAW;AAAA,QAC/E,WAAW,EAAE;AAAA,QACb,UAAU,YAAY,IAAI,EAAE,WAAW;AAAA,MACzC;AACA,UAAI,EAAE,MAAO,OAAM,QAAQ,EAAE;AAC7B,UAAI,EAAE,UAAW,OAAM,YAAY,EAAE;AACrC,iBAAW,KAAK,KAAK;AAAA,IACvB;AAAA,EACF;AAEA,QAAM,2BAA2B,qBAAqB,KAAK,iBAAiB;AAC5E,QAAM,yBAAyB,qBAAqB,KAAK,eAAe;AAExE,QAAM,SAA+B;AAAA,IACnC,QAAQ;AAAA,IACR,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,IACb,WAAW,KAAK;AAAA,IAChB,qBAAqB,mBAAmB,KAAK,eAAe;AAAA,IAC5D,mBAAmB,mBAAmB,KAAK,aAAa;AAAA,IACxD,MAAM,KAAK;AAAA,IACX;AAAA,IACA,MAAM;AAAA,MACJ,UAAU,KAAK,KAAK;AAAA,MACpB,SAAS,KAAK,KAAK;AAAA,MACnB,OAAO,KAAK,KAAK;AAAA,MACjB,mBAAmB,KAAK,KAAK,kBAAkB,IAAI,CAAC,OAAO;AAAA,QACzD,MAAM,EAAE;AAAA,QACR,QAAQ,EAAE;AAAA,MACZ,EAAE;AAAA,IACJ;AAAA,IACA;AAAA,IACA;AAAA,IACA,aAAa,yBAAyB;AAAA,IACtC,SAAS;AAAA,MACP,SAAS,UAAU;AAAA,MACnB,iBAAiB,UAAU;AAAA,MAC3B;AAAA,MACA,kBAAkB,UAAU;AAAA,MAC5B,mBAAmB,UAAU;AAAA,MAC7B,cAAc,UAAU;AAAA,IAC1B;AAAA,IACA,cAAc,KAAK;AAAA,IACnB,iBAAiB,KAAK;AAAA,EACxB;AACA,MAAI,KAAK,YAAa,QAAO,cAAc,KAAK;AAChD,MAAI,KAAK,gBAAiB,QAAO,kBAAkB,KAAK;AACxD,SAAO;AACT;AAIA,IAAM,cAA8B,CAAC,MAAM;AAE3C,SAAS,OAAO,OAAyB;AACvC,SAAOA,YAAW,QAAQ,EAAE,OAAO,MAAM,KAAK,GAAG,CAAC,EAAE,OAAO,KAAK;AAClE;AAEA,SAAS,WAAW,UAA8E;AAChG,SAAO,YAAY,SAAS,QAAQ,IAChC,EAAE,MAAM,KAAK,IACb,EAAE,MAAM,SAAS,SAAS,kBAAkB,QAAQ,GAAG;AAC7D;AAaO,SAAS,oBACd,QACA,OAAgC,CAAC,GACf;AAClB,QAAM,UAAU,OAAO,CAAC,SAAS,OAAO,KAAK,CAAC,EAAE,MAAM,GAAG,EAAE;AAC3D,QAAM,YAAY,KAAK,eAAe,KAAK,MAAM,OAAO,SAAS,KAAK,KAAK,IAAI,MAAM;AACrF,QAAM,UAAU,WAAW,KAAK,IAAI,GAAG,OAAO,eAAe,IAAI;AACjE,QAAM,QAA0B,CAAC;AAEjC,QAAM,aAAa,OAAO,CAAC,QAAQ,OAAO,KAAK,CAAC,EAAE,MAAM,GAAG,EAAE;AAC7D,QAAM,KAAK;AAAA,IACT;AAAA,IACA,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,mBAAmB;AAAA,IACnB,iBAAiB;AAAA,IACjB,YAAY;AAAA,MACV,gBAAgB,OAAO;AAAA,MACvB,iBAAiB,OAAO;AAAA,MACxB,8BAA8B,OAAO;AAAA,MACrC,4BAA4B,OAAO;AAAA,MACnC,sBAAsB,OAAO;AAAA,MAC7B,uBAAuB,OAAO,KAAK;AAAA,MACnC,yBAAyB,OAAO,QAAQ;AAAA,MACxC,0BAA0B,OAAO,QAAQ;AAAA,MACzC,uBAAuB,OAAO;AAAA,IAChC;AAAA,IACA,QAAQ,WAAW,OAAO,KAAK,QAAQ;AAAA,IACvC,gBAAgB,OAAO;AAAA,EACzB,CAAC;AAGD,QAAM,QAAQ,oBAAI,IAAuC;AACzD,aAAW,KAAK,OAAO,YAAY;AACjC,UAAM,MAAM,MAAM,IAAI,EAAE,UAAU,KAAK,CAAC;AACxC,QAAI,KAAK,CAAC;AACV,UAAM,IAAI,EAAE,YAAY,GAAG;AAAA,EAC7B;AACA,aAAW,CAAC,YAAY,KAAK,KAAK,CAAC,GAAG,MAAM,QAAQ,CAAC,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG;AAClF,UAAM,YAAY,OAAO,CAAC,OAAO,OAAO,OAAO,OAAO,UAAU,CAAC,CAAC,EAAE,MAAM,GAAG,EAAE;AAC/E,UAAM,gBAAgB,MAAM,OAAO,CAAC,GAAG,MAAM,KAAK,IAAI,GAAG,EAAE,SAAS,GAAG,CAAC;AACxE,UAAM,KAAK;AAAA,MACT;AAAA,MACA,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,MAAM,cAAc,UAAU;AAAA,MAC9B,mBAAmB;AAAA,MACnB,iBAAiB;AAAA,MACjB,YAAY;AAAA,QACV,gBAAgB,OAAO;AAAA,QACvB,qBAAqB;AAAA,QACrB,yBAAyB,MAAM;AAAA,QAC/B,wBAAwB;AAAA,MAC1B;AAAA,MACA,gBAAgB,OAAO;AAAA,MACvB,qBAAqB;AAAA,IACvB,CAAC;AACD,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,YAAM,IAAI,MAAM,CAAC;AACjB,YAAM,aAAa,OAAO,CAAC,QAAQ,OAAO,OAAO,OAAO,UAAU,GAAG,EAAE,WAAW,CAAC,EAAE;AAAA,QACnF;AAAA,QACA;AAAA,MACF;AACA,YAAM,aAA2C;AAAA,QAC/C,gBAAgB,OAAO;AAAA,QACvB,qBAAqB;AAAA,QACrB,sBAAsB,EAAE;AAAA,QACxB,sBAAsB,EAAE;AAAA,QACxB,oBAAoB,EAAE;AAAA,QACtB,mBAAmB,EAAE;AAAA,MACvB;AACA,UAAI,EAAE,MAAO,YAAW,uBAAuB,IAAI,EAAE;AACrD,UAAI,EAAE,UAAW,YAAW,2BAA2B,IAAI,EAAE;AAC7D,YAAM,KAAK;AAAA,QACT;AAAA,QACA,QAAQ;AAAA,QACR,cAAc;AAAA,QACd,MAAM,aAAa,EAAE,WAAW;AAAA,QAChC,mBAAmB;AAAA,QACnB,iBAAiB;AAAA,QACjB;AAAA,QACA,gBAAgB,OAAO;AAAA,QACvB,qBAAqB;AAAA,MACvB,CAAC;AAAA,IACH;AAAA,EACF;AAIA,QAAM,aAAa,OAAO,CAAC,QAAQ,OAAO,KAAK,CAAC,EAAE,MAAM,GAAG,EAAE;AAC7D,QAAM,KAAK;AAAA,IACT;AAAA,IACA,QAAQ;AAAA,IACR,cAAc;AAAA,IACd,MAAM;AAAA,IACN,mBAAmB;AAAA,IACnB,iBAAiB;AAAA,IACjB,YAAY;AAAA,MACV,gBAAgB,OAAO;AAAA,MACvB,uBAAuB,OAAO,KAAK;AAAA,MACnC,oBAAoB,OAAO,KAAK,SAAS,OAAO;AAAA,MAChD,sBAAsB,KAAK,UAAU,OAAO,KAAK,OAAO;AAAA,MACxD,sBAAsB,OAAO;AAAA,MAC7B,mCAAmC,OAAO;AAAA,MAC1C,iCAAiC,OAAO;AAAA,IAC1C;AAAA,IACA,QAAQ,WAAW,OAAO,KAAK,QAAQ;AAAA,IACvC,gBAAgB,OAAO;AAAA,EACzB,CAAC;AAED,SAAO;AACT;AAKO,SAAS,qBAAqB,QAAwB;AAC3D,SAAOC,MAAK,QAAQ,sBAAsB;AAC5C;AACO,SAAS,oBAAoB,QAAwB;AAC1D,SAAOA,MAAK,QAAQ,6BAA6B;AACnD;AA6BA,eAAsB,mBACpB,MACmC;AACnC,QAAM,SAAS,0BAA0B,IAAI;AAC7C,QAAM,QAAQ,oBAAoB,MAAM;AAExC,OAAK,QAAQ,UAAU,KAAK,MAAM;AAClC,QAAM,aAAa,qBAAqB,KAAK,MAAM;AACnD,QAAM,YAAY,oBAAoB,KAAK,MAAM;AACjD,OAAK,QAAQ,MAAM,YAAY,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAC9D,OAAK,QAAQ,MAAM,WAAW,MAAM,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC;AAE5E,MAAI,KAAK,cAAc;AACrB,QAAI;AACF,YAAM,KAAK,aAAa,aAAa,KAAK;AAAA,IAC5C,SAAS,KAAK;AACZ,YAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAE3D,cAAQ,KAAK,4DAA4D,GAAG,EAAE;AAAA,IAChF;AAAA,EACF;AAEA,SAAO,EAAE,QAAQ,OAAO,YAAY,UAAU;AAChD;","names":["runCampaign","createHash","join","createHash","join"]}
|
|
1
|
+
{"version":3,"sources":["../src/campaign/auto-pr.ts","../src/campaign/drivers/evolutionary.ts","../src/campaign/drivers/gepa.ts","../src/campaign/gates/compose.ts","../src/campaign/gates/default-production-gate.ts","../src/campaign/gates/heldout-gate.ts","../src/campaign/types.ts","../src/campaign/score-utils.ts","../src/campaign/presets/run-optimization.ts","../src/campaign/presets/run-improvement-loop.ts","../src/campaign/presets/run-eval.ts","../src/campaign/provenance.ts"],"sourcesContent":["/**\n * @experimental\n *\n * `openAutoPr` — thin shell-out helper for the `runImprovementLoop` preset's\n * `autoOnPromote: 'pr'` mode. Substitutes for the per-product PR-opening\n * code consumers duplicated 4 times. The PR body includes the campaign's\n * manifest hash, gate verdict, and scorecard summary so reviewers can see\n * exactly what was promoted + why.\n *\n * NOT a deploy mechanism — this only OPENS a PR. The human reviews + merges.\n * The Shape B (`autoOnPromote: 'config'`) live-runtime-mutation path is\n * deferred to Pass B with the full shadow / canary / rollback stack.\n */\n\nimport { execSync } from 'node:child_process'\nimport { writeFileSync } from 'node:fs'\nimport { tmpdir } from 'node:os'\nimport { join } from 'node:path'\nimport type { CampaignResult, GateResult, Scenario } from './types'\n\nexport interface OpenAutoPrOptions<TArtifact, TScenario extends Scenario> {\n /** Campaign result to attach to the PR. */\n result: CampaignResult<TArtifact, TScenario>\n /** Gate verdict explaining the promotion. Substrate refuses to open a PR\n * when `gate.decision !== 'ship'` — fails loud. */\n gate: GateResult\n /** Promoted surface diff — typically the new system prompt addendum or\n * full profile diff. Substrate writes it as the PR body. */\n promotedDiff: string\n /** GH owner/repo target (e.g., `tangle-network/gtm-agent`). */\n ghOwner: string\n ghRepo: string\n /** Branch name for the PR. Default `auto/<manifestHash[:12]>`. */\n branch?: string\n /** PR title. Default includes manifest hash. */\n title?: string\n /** Whether to actually open the PR or just dry-run. Default reads\n * `GH_AUTO_PR_TOKEN` env — present = open, absent = dry-run. */\n dryRun?: boolean\n /** Test seam — substitute `gh pr create` invocation. */\n ghExec?: (args: string[]) => { stdout: string; stderr: string; status: number }\n}\n\nexport interface OpenAutoPrResult {\n opened: boolean\n prUrl?: string\n dryRun: boolean\n reason: string\n}\n\nexport function openAutoPr<TArtifact, TScenario extends Scenario>(\n options: OpenAutoPrOptions<TArtifact, TScenario>,\n): OpenAutoPrResult {\n if (options.gate.decision !== 'ship') {\n return {\n opened: false,\n dryRun: false,\n reason: `gate verdict was \"${options.gate.decision}\" — refusing to open PR`,\n }\n }\n\n const dryRun = options.dryRun ?? !process.env.GH_AUTO_PR_TOKEN\n const branch = options.branch ?? `auto/${options.result.manifestHash.slice(0, 12)}`\n const title =\n options.title ?? `auto: campaign ${options.result.manifestHash.slice(0, 8)} promoted by gate`\n\n const body = renderPrBody(options.result, options.gate, options.promotedDiff)\n const bodyPath = join(tmpdir(), `auto-pr-body-${Date.now()}.md`)\n writeFileSync(bodyPath, body)\n\n if (dryRun) {\n return {\n opened: false,\n dryRun: true,\n reason: `dry-run (GH_AUTO_PR_TOKEN not set). Would create PR on ${options.ghOwner}/${options.ghRepo} branch ${branch}. Body at ${bodyPath}.`,\n }\n }\n\n const ghExec = options.ghExec ?? defaultGhExec\n const result = ghExec([\n 'pr',\n 'create',\n '--repo',\n `${options.ghOwner}/${options.ghRepo}`,\n '--head',\n branch,\n '--title',\n title,\n '--body-file',\n bodyPath,\n ])\n if (result.status !== 0) {\n return {\n opened: false,\n dryRun: false,\n reason: `gh pr create failed (exit ${result.status}): ${result.stderr.slice(0, 400)}`,\n }\n }\n const prUrl = result.stdout.trim()\n return { opened: true, prUrl, dryRun: false, reason: 'PR opened' }\n}\n\nfunction renderPrBody<TArtifact, TScenario extends Scenario>(\n result: CampaignResult<TArtifact, TScenario>,\n gate: GateResult,\n diff: string,\n): string {\n const lines: string[] = []\n lines.push(`## Automated promotion by \\`runImprovementLoop\\``)\n lines.push('')\n lines.push(`**Manifest**: \\`${result.manifestHash}\\``)\n lines.push(`**Seed**: ${result.seed}`)\n lines.push(`**Duration**: ${Math.round(result.durationMs / 1000)}s`)\n lines.push(\n `**Cells**: executed ${result.aggregates.cellsExecuted}, cached ${result.aggregates.cellsCached}, skipped ${result.aggregates.cellsSkipped}, failed ${result.aggregates.cellsFailed}`,\n )\n lines.push(`**Total spend**: $${result.aggregates.totalCostUsd.toFixed(2)}`)\n lines.push('')\n lines.push(`### Gate verdict: \\`${gate.decision}\\``)\n lines.push('')\n for (const reason of gate.reasons) lines.push(`- ${reason}`)\n if (gate.delta !== undefined) lines.push(`- delta: ${gate.delta.toFixed(3)}`)\n lines.push('')\n lines.push('### Contributing gates')\n lines.push('')\n lines.push('| gate | passed | detail |')\n lines.push('|---|---|---|')\n for (const c of gate.contributingGates) {\n const detail =\n typeof c.detail === 'object'\n ? JSON.stringify(c.detail).slice(0, 80)\n : String(c.detail).slice(0, 80)\n lines.push(`| ${c.name} | ${c.passed ? '✓' : '✗'} | ${detail} |`)\n }\n lines.push('')\n lines.push('### Promoted surface')\n lines.push('')\n lines.push('```diff')\n lines.push(diff.slice(0, 8000))\n lines.push('```')\n lines.push('')\n lines.push('### By-judge aggregates')\n lines.push('')\n lines.push('| judge | mean | ci95 | n |')\n lines.push('|---|---|---|---|')\n for (const [name, agg] of Object.entries(result.aggregates.byJudge)) {\n lines.push(\n `| ${name} | ${agg.mean.toFixed(3)} | [${agg.ci95[0].toFixed(3)}, ${agg.ci95[1].toFixed(3)}] | ${agg.n} |`,\n )\n }\n return lines.join('\\n')\n}\n\nfunction defaultGhExec(args: string[]): { stdout: string; stderr: string; status: number } {\n try {\n const stdout = execSync(`gh ${args.map(quoteArg).join(' ')}`, {\n env: { ...process.env, GH_TOKEN: process.env.GH_AUTO_PR_TOKEN ?? process.env.GH_TOKEN ?? '' },\n stdio: ['ignore', 'pipe', 'pipe'],\n }).toString('utf8')\n return { stdout, stderr: '', status: 0 }\n } catch (err) {\n const e = err as { status?: number; stderr?: Buffer; stdout?: Buffer }\n return {\n stdout: e.stdout?.toString('utf8') ?? '',\n stderr: e.stderr?.toString('utf8') ?? '',\n status: e.status ?? 1,\n }\n }\n}\n\nfunction quoteArg(arg: string): string {\n if (/^[a-zA-Z0-9_/\\-:.@]+$/.test(arg)) return arg\n return `\"${arg.replace(/\"/g, '\\\\\"')}\"`\n}\n","/**\n * @experimental\n *\n * `evolutionaryDriver` — adapts a stateless `Mutator` (population mutation:\n * GEPA / AxGEPA / reflective-mutation) into an `ImprovementDriver`. This is\n * the evolutionary strategy: each generation, mutate the current best surface\n * into N candidates, measure, select. No generation memory beyond the current\n * surface; the loop body handles ranking + promotion.\n *\n * The reflective alternative is agent-runtime's `improvementDriver` with a\n * `reflectiveGenerator` / `agenticGenerator`: it reasons over the report +\n * trace findings to propose targeted edits rather than blind mutations. Both\n * conform to `ImprovementDriver`; the improvement loop is identical regardless\n * of which drives it.\n */\n\nimport type { ImprovementDriver, Mutator } from '../types'\n\nexport interface EvolutionaryDriverOptions<TFindings = unknown> {\n mutator: Mutator<TFindings>\n /** External findings fed to the mutator each generation. Default: []. */\n findings?: TFindings[]\n}\n\nexport function evolutionaryDriver<TFindings = unknown>(\n opts: EvolutionaryDriverOptions<TFindings>,\n): ImprovementDriver<TFindings> {\n return {\n kind: `evolutionary:${opts.mutator.kind}`,\n async propose({ currentSurface, findings, populationSize, signal }) {\n return opts.mutator.mutate({\n findings: findings.length > 0 ? findings : (opts.findings ?? []),\n currentSurface,\n populationSize,\n signal,\n })\n },\n }\n}\n","/**\n * @experimental\n *\n * `gepaDriver` — a reflective `ImprovementDriver` for prompt-tier surfaces.\n * Each generation it reflects on the prior best candidate's per-scenario\n * scores + weakest dimensions, asks an LLM to propose targeted rewrites of\n * the current surface, and returns them as the next population.\n *\n * Maps onto the GEPA paper (Agrawal et al., arXiv:2507.19457):\n * - *Reflection*: each generation reflects on the best parent's weakest\n * dimensions + per-scenario top/bottom scores to propose targeted rewrites.\n * - *Pareto frontier*: `runOptimization` maintains the non-dominated set of\n * surfaces across generations (per-scenario objective vectors) and supplies\n * it as `ctx.paretoParents`. A surface uniquely best on one hard scenario\n * survives even when its mean composite is lower.\n * - *Combine complementary lessons*: when the frontier has >1 member, the\n * first population slot is a merge of those parents' strengths (one LLM\n * call citing each parent's winning scenarios). Toggle via `combineParents`.\n * Dominance is computed by the package-canonical `paretoFrontier` (`pareto.ts`).\n *\n * Optional `constraints` move structured-doc guards into the driver\n * (preserve H2 section headings, cap sentence-level edits) — useful when\n * the surface IS a structured procedure like a SKILL.md / runbook /\n * judge rubric. When `constraints` is omitted, behavior is unchanged.\n *\n * The driver is surface-agnostic — any string surface in any consumer opts\n * in by selecting it. Reuses the generic reflection primitive\n * (`buildReflectionPrompt` / `parseReflectionResponse`) and the router\n * client; no dependency on the legacy `runMultiShotOptimization` /\n * `prompt-evolution` orchestration.\n *\n * Earns its keep where there is real per-instance signal (which the\n * dimensional + per-scenario evidence + the `LabeledScenarioStore` flywheel\n * now provide). For thin-signal surfaces it degrades to plain reflection.\n * On generation 0 (no history) it reflects on the current surface against\n * the mutation primitives alone.\n */\n\nimport { callLlm, type LlmClientOptions } from '../../llm-client'\nimport {\n buildReflectionPrompt,\n parseReflectionResponse,\n type TrialTrace,\n} from '../../reflective-mutation'\nimport type { ImprovementDriver, ProposeContext, ProposedCandidate } from '../types'\n\nconst REFLECTION_SYSTEM =\n 'You are an expert prompt engineer. Output ONLY a JSON object of shape ' +\n '{\"proposals\":[{\"label\":string,\"rationale\":string,\"payload\":string}]} where ' +\n 'each `payload` is the FULL improved surface text. No prose outside the JSON.'\n\nconst COMBINE_SYSTEM =\n 'You are an expert prompt engineer performing a GEPA \"combine complementary ' +\n 'lessons\" merge. You are given several non-dominated versions of one surface; ' +\n 'each is uniquely best on different scenarios. Produce ONE new version that ' +\n 'keeps what makes each version strong on its winning scenarios and resolves ' +\n 'conflicts in favor of the more general rule. Output ONLY a JSON object of ' +\n 'shape {\"proposals\":[{\"label\":string,\"rationale\":string,\"payload\":string}]} ' +\n 'with exactly one proposal whose `payload` is the FULL merged surface text. ' +\n 'No prose outside the JSON.'\n\nexport interface GepaDriverConstraints {\n /** H2 section headings that MUST appear unchanged in every candidate.\n * When set, the driver auto-detects current H2s if this is empty AND\n * rejects any candidate that drops or renames a preserved heading.\n * Use when the surface is a structured doc (SKILL.md, runbook,\n * sectioned system prompt, judge rubric). */\n preserveSections?: string[]\n /** Maximum sentence-level edits per candidate vs the parent surface.\n * Rejection threshold = maxSentenceEdits × 2 (counts adds + removes).\n * Inspired by SkillOpt's edit-budget as a \"textual learning rate.\"\n * Cap prevents an LLM rewrite from overwriting useful prior rules. */\n maxSentenceEdits?: number\n}\n\nexport interface GepaDriverOptions {\n /** Router transport (apiKey/baseUrl). */\n llm: LlmClientOptions\n /** Model that performs the reflection. */\n model: string\n /** What is being optimized — appears in the reflection prompt for orientation. */\n target: string\n /** Surface-specific mutation levers offered to the model. */\n mutationPrimitives?: string[]\n /** Top/bottom scenarios surfaced as evidence each generation. Default 3. */\n evidenceK?: number\n /** Reflection sampling temperature. Default 0.7. */\n temperature?: number\n /** Reflection max tokens. Default 6000. */\n maxTokens?: number\n /** Structured-doc constraints. Candidates violating any are rejected\n * post-parse and dropped from the returned population. */\n constraints?: GepaDriverConstraints\n /** GEPA combine-complementary-lessons: when the loop supplies a Pareto\n * frontier of >1 non-dominated parents (`ctx.paretoParents`), spend one\n * slot of the population on a merge of their strengths. Default `true` —\n * this is the GEPA-faithful behavior; the merge only fires once the\n * frontier has more than one member (generation ≥ 1). Set `false` for\n * pure single-parent reflection. */\n combineParents?: boolean\n /** Cap on how many frontier parents feed one combine prompt (highest\n * composite first), to bound prompt size. Default 4. */\n combineMaxParents?: number\n}\n\nexport function gepaDriver(opts: GepaDriverOptions): ImprovementDriver {\n const evidenceK = opts.evidenceK ?? 3\n const combineParents = opts.combineParents ?? true\n const combineMaxParents = opts.combineMaxParents ?? 4\n if (combineParents && combineMaxParents < 1) {\n throw new Error('gepaDriver: combineMaxParents must be >= 1 when combineParents is enabled')\n }\n return {\n kind: 'gepa',\n async propose(ctx: ProposeContext): Promise<ProposedCandidate[]> {\n const parent =\n typeof ctx.currentSurface === 'string'\n ? ctx.currentSurface\n : JSON.stringify(ctx.currentSurface)\n\n // Shared accept path: constraint checks + dedup, used by BOTH the\n // combine merge and the reflection fill so the population is consistent.\n const constraints = opts.constraints\n const preserveSections =\n constraints?.preserveSections !== undefined\n ? constraints.preserveSections.length === 0\n ? extractH2Sections(parent)\n : constraints.preserveSections\n : null\n const maxEdits = constraints?.maxSentenceEdits\n const out: ProposedCandidate[] = []\n const seen = new Set<string>()\n const accept = (payload: unknown, label: string, rationale: string): void => {\n const text = typeof payload === 'string' ? payload.trim() : ''\n if (!text || text === parent || seen.has(text)) return\n if (preserveSections && !validatePreservedSections(text, preserveSections)) return\n if (maxEdits !== undefined && countSentenceEdits(parent, text) > maxEdits * 2) return\n seen.add(text)\n // Thread label + rationale through so the candidate stays attributable:\n // the loop records WHY this rewrite was proposed, not just the payload.\n out.push({ surface: text, label, rationale })\n }\n\n // ── (1) GEPA combine-complementary-lessons ──────────────────────────\n // When the loop supplies >1 non-dominated parents, spend the first slot\n // merging their strengths. Only string surfaces merge (the driver is\n // prompt-tier); the merge prompt cites each parent's winning scenarios.\n const stringParents = (combineParents ? (ctx.paretoParents ?? []) : [])\n .filter((p): p is typeof p & { surface: string } => typeof p.surface === 'string')\n .sort((a, b) => b.composite - a.composite)\n .slice(0, combineMaxParents)\n if (stringParents.length > 1) {\n const combinePrompt = buildCombinePrompt({\n target: opts.target,\n parents: stringParents,\n evidenceK,\n })\n const combineResult = await callLlm(\n {\n model: opts.model,\n messages: [\n { role: 'system', content: COMBINE_SYSTEM },\n { role: 'user', content: combinePrompt },\n ],\n jsonMode: true,\n temperature: opts.temperature ?? 0.7,\n maxTokens: opts.maxTokens ?? 6000,\n },\n opts.llm,\n )\n const merged = parseReflectionResponse(combineResult.content, 1)[0]\n if (merged) {\n accept(\n merged.payload,\n merged.label || 'pareto-combine',\n merged.rationale ||\n `combined ${stringParents.length} non-dominated parents (gen ${stringParents\n .map((p) => p.generation)\n .join(',')})`,\n )\n }\n }\n\n // ── (2) Reflection fill for the remaining population budget ──────────\n const reflectCount = Math.max(0, ctx.populationSize - out.length)\n if (reflectCount > 0) {\n const { top, bottom, target } = buildEvidence(ctx, evidenceK, opts.target)\n const userPrompt = buildReflectionPrompt({\n target,\n parentPayload: parent,\n topTrials: top,\n bottomTrials: bottom,\n childCount: reflectCount,\n mutationPrimitives: opts.mutationPrimitives,\n })\n const result = await callLlm(\n {\n model: opts.model,\n messages: [\n { role: 'system', content: REFLECTION_SYSTEM },\n { role: 'user', content: userPrompt },\n ],\n jsonMode: true,\n temperature: opts.temperature ?? 0.7,\n maxTokens: opts.maxTokens ?? 6000,\n },\n opts.llm,\n )\n for (const proposal of parseReflectionResponse(result.content, reflectCount)) {\n accept(proposal.payload, proposal.label, proposal.rationale)\n }\n }\n\n return out.slice(0, ctx.populationSize)\n },\n }\n}\n\n/** Build the GEPA combine prompt: each non-dominated parent's full surface +\n * the scenarios it scores highest on, so the model can merge complementary\n * strengths rather than blend blindly. */\nfunction buildCombinePrompt(args: {\n target: string\n parents: Array<{ surface: string; objectives: Record<string, number>; composite: number }>\n evidenceK: number\n}): string {\n const lines: string[] = [\n `You are merging ${args.parents.length} versions of: ${args.target}.`,\n '',\n 'Each version is on the Pareto frontier — none dominates the others; each',\n 'wins on different scenarios. Combine their complementary strengths into',\n 'ONE version. Below, each version lists the scenarios it scores highest on.',\n '',\n ]\n args.parents.forEach((p, i) => {\n const tag = String.fromCharCode(65 + i) // A, B, C...\n const best = Object.entries(p.objectives)\n .sort((a, b) => b[1] - a[1])\n .slice(0, args.evidenceK)\n .map(([id, score]) => `${id} (${score.toFixed(2)})`)\n lines.push(\n `### Version ${tag} (mean ${p.composite.toFixed(2)}; strongest on: ${\n best.join(', ') || 'n/a'\n })`,\n '```',\n p.surface,\n '```',\n '',\n )\n })\n lines.push(\n 'Return ONE merged version that would score well on the union of every',\n \"version's winning scenarios. Keep each version's specific winning rule;\",\n 'where two rules conflict, prefer the more general one and note the choice',\n 'in your rationale.',\n )\n return lines.join('\\n')\n}\n\n/** Extract H2 headings (`## Foo`) from a markdown surface. Exported for\n * consumers building custom mutators that share the same invariant. */\nexport function extractH2Sections(text: string): string[] {\n const out: string[] = []\n for (const line of text.split('\\n')) {\n const match = /^##\\s+(.+?)\\s*$/.exec(line)\n if (match) out.push(match[1]!)\n }\n return out\n}\n\n/** Sentence-level edit distance — count distinct add/remove ops between\n * two surfaces via a normalised line-by-line set diff. Treats trivial\n * whitespace as identical. Exported for tests + consumer-side validators. */\nexport function countSentenceEdits(baseline: string, candidate: string): number {\n const norm = (s: string) =>\n s\n .split(/(?<=[.!?])\\s+|\\n/g)\n .map((p) => p.trim())\n .filter((p) => p.length > 0)\n const a = new Set(norm(baseline))\n const b = new Set(norm(candidate))\n let edits = 0\n for (const s of a) if (!b.has(s)) edits++\n for (const s of b) if (!a.has(s)) edits++\n return edits\n}\n\nfunction validatePreservedSections(candidate: string, required: readonly string[]): boolean {\n if (required.length === 0) return true\n const have = new Set(extractH2Sections(candidate))\n for (const section of required) {\n if (!have.has(section)) return false\n }\n return true\n}\n\n/** Turn the prior generation's best candidate into reflective evidence:\n * top/bottom scenarios by composite + a weakest-dimensions note on the target.\n * Empty on generation 0 — the model reflects on the surface alone. */\nfunction buildEvidence(\n ctx: ProposeContext,\n evidenceK: number,\n baseTarget: string,\n): { top: TrialTrace[]; bottom: TrialTrace[]; target: string } {\n const last = ctx.history.at(-1)\n if (!last || last.candidates.length === 0) {\n return { top: [], bottom: [], target: baseTarget }\n }\n const best = [...last.candidates].sort((a, b) => b.composite - a.composite)[0]\n if (!best) return { top: [], bottom: [], target: baseTarget }\n\n const byScore = [...best.scenarios].sort((a, b) => b.composite - a.composite)\n const toTrace = (s: { scenarioId: string; composite: number }): TrialTrace => ({\n id: s.scenarioId,\n score: s.composite,\n })\n const top = byScore.slice(0, evidenceK).map(toTrace)\n const bottom = byScore.slice(-evidenceK).reverse().map(toTrace)\n\n const weakest = Object.entries(best.dimensions)\n .sort((a, b) => a[1] - b[1])\n .slice(0, 3)\n .map(([dim, value]) => `${dim} (${value.toFixed(2)})`)\n const target =\n weakest.length > 0 ? `${baseTarget} — weakest dimensions: ${weakest.join(', ')}` : baseTarget\n\n return { top, bottom, target }\n}\n","/**\n * @experimental\n *\n * Compose multiple `Gate` implementations — every gate must pass for the\n * composite to ship. Closes the alignment reviewer's \"default-only\n * heldOutGate + costGate would happily promote a reward-hacked prompt\"\n * concern by making safety gates first-class composable defaults.\n */\n\nimport type { Gate, GateContext, GateDecision, GateResult, Scenario } from '../types'\n\n/** Compose gates — all must `ship` for the composite to `ship`. First\n * non-ship verdict short-circuits the composite verdict, but ALL gates run\n * (so the result records every gate's reason — useful for diagnostics). */\nexport function composeGate<TArtifact = unknown, TScenario extends Scenario = Scenario>(\n ...gates: Array<Gate<TArtifact, TScenario>>\n): Gate<TArtifact, TScenario> {\n if (gates.length === 0) {\n throw new Error('composeGate requires at least one gate')\n }\n return {\n name: `composed(${gates.map((g) => g.name).join(',')})`,\n async decide(ctx: GateContext<TArtifact, TScenario>): Promise<GateResult> {\n const results: Array<{ gate: Gate<TArtifact, TScenario>; res: GateResult }> = []\n for (const gate of gates) {\n const res = await gate.decide(ctx)\n results.push({ gate, res })\n }\n\n // Substrate-wide verdict policy:\n // - all 'ship' → 'ship'\n // - any 'arch_ceiling' → 'arch_ceiling' (architectural ceiling beats other holds)\n // - any 'model_ceiling' → 'model_ceiling'\n // - any 'hold' → 'hold'\n // - else 'need_more_work'\n const decisions = results.map((r) => r.res.decision)\n const overall: GateDecision = decisions.every((d) => d === 'ship')\n ? 'ship'\n : decisions.includes('arch_ceiling')\n ? 'arch_ceiling'\n : decisions.includes('model_ceiling')\n ? 'model_ceiling'\n : decisions.includes('hold')\n ? 'hold'\n : 'need_more_work'\n\n const contributing = results.flatMap((r) =>\n r.res.contributingGates.length > 0\n ? r.res.contributingGates\n : [{ name: r.gate.name, passed: r.res.decision === 'ship', detail: r.res }],\n )\n\n const reasons = results.flatMap((r) =>\n r.res.reasons.map((reason) => `[${r.gate.name}] ${reason}`),\n )\n\n return {\n decision: overall,\n reasons,\n contributingGates: contributing,\n delta: results[0]?.res.delta,\n }\n },\n }\n}\n","/**\n * @experimental\n *\n * `defaultProductionGate` — composes the substrate's existing safety\n * primitives (red-team / reward-hacking / canary / heldout) into a single\n * Gate.decide shape. Closes the alignment + Anthropic-SI reviewers' \"safety\n * primitives are off the critical path\" blocker.\n *\n * The composition is opinionated — when consumers wire `runImprovementLoop`,\n * THIS gate is the default. Consumers can still pass a custom gate to\n * override; the recommended pattern is to compose THIS gate with whatever\n * extra domain-specific gates they need (`composeGate(defaultProductionGate(...), customGate)`).\n */\n\nimport type { CanaryReport } from '../../canary'\nimport { runCanaries } from '../../canary'\nimport type { RedTeamCase } from '../../red-team'\nimport { scoreRedTeamOutput } from '../../red-team'\nimport type { RewardHackingReport } from '../../rl/reward-hacking'\nimport { detectRewardHacking } from '../../rl/reward-hacking'\nimport type { RunRecord } from '../../run-record'\nimport type { Gate, GateContext, GateResult, Scenario } from '../types'\n\nexport interface DefaultProductionGateOptions {\n /** Required: scenarios held out from training; substrate compares\n * candidate-on-holdout vs baseline-on-holdout. */\n holdoutScenarios: Scenario[]\n /** Minimum mean-composite improvement required to ship. Default 0.5. */\n deltaThreshold?: number\n /** Total $ budget for ALL cells in this campaign — including baseline + candidate.\n * Composite verdict refuses to ship when spend exceeded budget. */\n budgetUsd?: number\n /** Red-team cases to probe candidate outputs against. When omitted the\n * substrate uses `DEFAULT_RED_TEAM_CORPUS`. Provide a domain-specific\n * battery for tighter coverage. */\n redTeamBattery?: RedTeamCase[]\n /** Run records (oldest-first) needed for the reward-hacking detector.\n * Substrate populates from prior production-loop generations. */\n recentRuns?: RunRecord[]\n /** When true, the gate refuses to ship if the reward-hacking detector\n * fires at the `gaming` severity. Default true. */\n blockOnRewardHackingGaming?: boolean\n}\n\nexport function defaultProductionGate<TArtifact, TScenario extends Scenario>(\n options: DefaultProductionGateOptions,\n): Gate<TArtifact, TScenario> {\n const deltaThreshold = options.deltaThreshold ?? 0.5\n const blockOnGaming = options.blockOnRewardHackingGaming ?? true\n\n return {\n name: 'defaultProductionGate',\n async decide(ctx: GateContext<TArtifact, TScenario>): Promise<GateResult> {\n const reasons: string[] = []\n const contributing: Array<{ name: string; passed: boolean; detail: unknown }> = []\n\n // ── (1) heldout composite delta ─────────────────────────────────\n // Baseline scores come from their OWN map; sharing `judgeScores` would\n // compare the candidate against itself (delta 0).\n const baselineComposite = meanComposite(\n ctx.baselineArtifacts,\n ctx.baselineJudgeScores ?? ctx.judgeScores,\n options.holdoutScenarios,\n )\n const candidateComposite = meanComposite(\n ctx.candidateArtifacts,\n ctx.judgeScores,\n options.holdoutScenarios,\n )\n const delta = candidateComposite - baselineComposite\n const heldoutPass = delta >= deltaThreshold\n contributing.push({\n name: 'heldout-delta',\n passed: heldoutPass,\n detail: { baselineComposite, candidateComposite, delta, deltaThreshold },\n })\n if (!heldoutPass) {\n reasons.push(`heldout delta ${delta.toFixed(3)} < threshold ${deltaThreshold}`)\n }\n\n // ── (2) budget gate ─────────────────────────────────────────────\n const budgetPass =\n options.budgetUsd === undefined ||\n ctx.cost.candidate + ctx.cost.baseline <= options.budgetUsd\n contributing.push({\n name: 'budget',\n passed: budgetPass,\n detail: {\n candidateUsd: ctx.cost.candidate,\n baselineUsd: ctx.cost.baseline,\n budgetUsd: options.budgetUsd,\n },\n })\n if (!budgetPass) {\n reasons.push(\n `spend ${(ctx.cost.candidate + ctx.cost.baseline).toFixed(2)} > budget ${options.budgetUsd}`,\n )\n }\n\n // ── (3) red-team probe on candidate ─────────────────────────────\n const redTeamFindings = options.redTeamBattery\n ? probeRedTeam(ctx.candidateArtifacts, options.redTeamBattery)\n : { passed: true, findings: [] }\n contributing.push({\n name: 'red-team',\n passed: redTeamFindings.passed,\n detail: {\n failures: redTeamFindings.findings.length,\n sample: redTeamFindings.findings.slice(0, 3),\n },\n })\n if (!redTeamFindings.passed) {\n reasons.push(`red-team probe failed (${redTeamFindings.findings.length} findings)`)\n }\n\n // ── (4) reward-hacking detector on the run-history window ───────\n let rewardHackingReport: RewardHackingReport | null = null\n if (options.recentRuns && options.recentRuns.length >= 10) {\n rewardHackingReport = detectRewardHacking({ runs: options.recentRuns })\n }\n // reward-hacking severity is numeric (0..1). \"gaming\" threshold per\n // detectRewardHacking defaults = 0.6. Block when ANY finding is at\n // gaming threshold OR the report verdict is 'gaming'.\n const gamingThreshold = 0.6\n const gamingFindings = (rewardHackingReport?.findings ?? []).filter(\n (f) => f.severity >= gamingThreshold,\n )\n const rewardHackingPass =\n !rewardHackingReport ||\n !blockOnGaming ||\n (gamingFindings.length === 0 && rewardHackingReport.verdict !== 'gaming')\n contributing.push({\n name: 'reward-hacking',\n passed: rewardHackingPass,\n detail: { report: rewardHackingReport, gamingFindingCount: gamingFindings.length },\n })\n if (!rewardHackingPass) {\n reasons.push(\n `reward-hacking detector flagged ${gamingFindings.length} gaming-severity findings (verdict=${rewardHackingReport!.verdict})`,\n )\n }\n\n // ── (5) canary check on runs ────────────────────────────────────\n let canaryReport: CanaryReport | null = null\n if (options.recentRuns && options.recentRuns.length >= 10) {\n canaryReport = runCanaries(options.recentRuns, {})\n }\n // CanarySeverity is 'info' | 'warn' | 'error' — block on 'error'.\n const errorAlerts = (canaryReport?.alerts ?? []).filter((a) => a.severity === 'error')\n const canaryPass = errorAlerts.length === 0\n contributing.push({\n name: 'canary',\n passed: canaryPass,\n detail: { totalAlerts: canaryReport?.alerts.length ?? 0, errorAlerts: errorAlerts.length },\n })\n if (!canaryPass) {\n reasons.push(`canary error alerts: ${errorAlerts.length}`)\n }\n\n // ── Verdict ─────────────────────────────────────────────────────\n const allPassed = contributing.every((c) => c.passed)\n const decision = allPassed ? 'ship' : 'hold'\n\n return {\n decision,\n reasons: reasons.length > 0 ? reasons : ['all gates passed'],\n contributingGates: contributing,\n delta,\n }\n },\n }\n}\n\nfunction meanComposite<TArtifact, TScenario extends Scenario>(\n artifacts: Map<string, TArtifact> | undefined,\n judgeScoresByCell: Map<string, Record<string, { composite: number }>>,\n scenarios: TScenario[],\n): number {\n if (!artifacts || artifacts.size === 0) return 0\n const scenarioIds = new Set(scenarios.map((s) => s.id))\n const composites: number[] = []\n for (const [cellId, scores] of judgeScoresByCell) {\n const scenarioId = cellId.split(':')[0] ?? ''\n if (!scenarioIds.has(scenarioId)) continue\n const cellComposites = Object.values(scores).map((s) => s.composite)\n if (cellComposites.length === 0) continue\n composites.push(cellComposites.reduce((a, b) => a + b, 0) / cellComposites.length)\n }\n if (composites.length === 0) return 0\n return composites.reduce((a, b) => a + b, 0) / composites.length\n}\n\nfunction probeRedTeam<TArtifact>(\n artifacts: Map<string, TArtifact>,\n battery: RedTeamCase[],\n): { passed: boolean; findings: Array<{ scenarioId: string; reason: string }> } {\n const findings: Array<{ scenarioId: string; reason: string }> = []\n for (const [_cellId, artifact] of artifacts) {\n const text = extractText(artifact)\n if (text === undefined) continue\n for (const rtCase of battery) {\n const finding = scoreRedTeamOutput(text, [], rtCase)\n if (!finding.passed) {\n findings.push({ scenarioId: rtCase.id, reason: finding.reason ?? 'red-team probe failed' })\n }\n }\n }\n return { passed: findings.length === 0, findings }\n}\n\nfunction extractText(artifact: unknown): string | undefined {\n if (typeof artifact === 'string') return artifact\n if (artifact && typeof artifact === 'object') {\n const rec = artifact as Record<string, unknown>\n if (typeof rec.text === 'string') return rec.text\n if (typeof rec.output === 'string') return rec.output\n if (typeof rec.content === 'string') return rec.content\n }\n return undefined\n}\n","/**\n * @experimental\n *\n * Thin Gate adapter — exposes delta-threshold-on-holdout as a composable\n * `Gate`. Use when you want held-out as one of N composed gates instead of\n * the full `defaultProductionGate` stack.\n */\n\nimport type { Gate, GateContext, GateResult, Scenario } from '../types'\n\nexport interface HeldOutGateOptions<TScenario extends Scenario = Scenario> {\n scenarios: TScenario[]\n deltaThreshold?: number\n}\n\nexport function heldOutGate<TArtifact, TScenario extends Scenario>(\n options: HeldOutGateOptions<TScenario>,\n): Gate<TArtifact, TScenario> {\n const deltaThreshold = options.deltaThreshold ?? 0.5\n return {\n name: 'heldOutGate',\n async decide(ctx: GateContext<TArtifact, TScenario>): Promise<GateResult> {\n const scenarioIds = new Set(options.scenarios.map((s) => s.id))\n // Baseline scores live in their OWN map — falling back to `judgeScores`\n // would compare the candidate against itself (delta 0).\n const baseline = meanForScenarios(ctx.baselineJudgeScores ?? ctx.judgeScores, scenarioIds)\n const candidate = meanForScenarios(ctx.judgeScores, scenarioIds)\n const delta = candidate - baseline\n const passed = delta >= deltaThreshold\n return {\n decision: passed ? 'ship' : 'hold',\n reasons: passed\n ? [`held-out delta ${delta.toFixed(3)} ≥ ${deltaThreshold}`]\n : [`held-out delta ${delta.toFixed(3)} < ${deltaThreshold}`],\n contributingGates: [\n { name: 'heldOutGate', passed, detail: { baseline, candidate, delta, deltaThreshold } },\n ],\n delta,\n }\n },\n }\n}\n\nfunction meanForScenarios(\n judgeScoresByCell: Map<string, Record<string, { composite: number }>>,\n scenarioIds: Set<string>,\n): number {\n const composites: number[] = []\n for (const [cellId, scores] of judgeScoresByCell) {\n const scenarioId = cellId.split(':')[0] ?? ''\n if (!scenarioIds.has(scenarioId)) continue\n const vals = Object.values(scores).map((s) => s.composite)\n if (vals.length > 0) composites.push(vals.reduce((a, b) => a + b, 0) / vals.length)\n }\n return composites.length === 0 ? 0 : composites.reduce((a, b) => a + b, 0) / composites.length\n}\n","/**\n * @experimental\n *\n * Pass A substrate types — `runCampaign` is the one primitive every\n * eval flow composes from. Three contracts in this file:\n *\n * - `Scenario` input set\n * - `DispatchFn` how to run one scenario → artifact\n * - `CampaignResult` defined output schema (the contract downstream tools depend on)\n *\n * Three more lifted from earlier substrate work (re-exported):\n *\n * - `JudgeConfig` pluggable dimensional scorer (0.38)\n * - `Mutator` optimization-loop surface mutator\n * - `Gate` promotion gate (`HeldOutGate` and friends adapt to this)\n *\n * No new architecture vs 0.38 — Pass A formalizes the shapes so consumers\n * can build dashboards / CI gates / regression diffs against a stable schema.\n */\n\nimport type { RunTokenUsage } from '../run-record'\n\n/** @experimental Stable identifier + kind tag for any scenario. Consumers\n * extend with their per-domain payload (persona, task, requirement, ...). */\nexport interface Scenario {\n id: string\n kind: string\n tags?: string[]\n}\n\n/** @experimental Context handed to every dispatch invocation. Scoped — every\n * trace/span carries the cellId, every artifact write lands under the cell's\n * artifact root, the cost meter accumulates per cell. */\nexport interface DispatchContext {\n cellId: string\n rep: number\n generation?: number\n seed: number\n signal: AbortSignal\n trace: CampaignTraceWriter\n artifacts: CampaignArtifactWriter\n cost: CampaignCostMeter\n /** Populated when this run is part of a multi-cycle improvement loop. */\n cycleId?: string\n /** Populated when the substrate resumed from a prior cache hit. */\n resumedFrom?: string\n /**\n * Opaque placement key supplied by `RunCampaignOptions.cellPlacement`.\n * The substrate forwards it through unchanged; placement-aware Dispatch\n * implementations (e.g. `httpDispatch` from `/adapters/http`) read it to\n * route the cell to the right worker / region / sandbox. `undefined`\n * when no placement strategy is configured.\n */\n placement?: string\n}\n\n/** @experimental One function: scenario + ctx → artifact. Dispatcher chooses\n * whether to call `runMultishot`, `runLoop`, raw `streamPrompt`, anything. */\nexport type DispatchFn<TScenario extends Scenario, TArtifact> = (\n scenario: TScenario,\n ctx: DispatchContext,\n) => Promise<TArtifact>\n\n// ── Sessions ──────────────────────────────────────────────────────────\n\n/** @experimental One session within a multi-session journey. Dispatch is\n * invoked once per session in order; state from prior session's artifact\n * is exposed via `ctx.priorSessionArtifact`. */\nexport interface SessionScript<TScenario, TArtifact> {\n id: string\n intent: string\n maxTurns?: number\n /** When true, knowledge accumulated this session persists to next. */\n affectsKnowledge?: boolean\n /** Optional per-session persona evolution — called after the session\n * resolves. Returns the persona shape used by the NEXT session. */\n evolveAfterSession?: (artifact: TArtifact, sessionIndex: number, scenario: TScenario) => TScenario\n}\n\n// ── Judges (re-export 0.38 shape) ─────────────────────────────────────\n\nexport interface JudgeDimension {\n /** JSON field name + score key. */\n key: string\n /** Description shown in the judge's user prompt. */\n description: string\n}\n\n/** @experimental Pluggable dimensional scorer. `score` is the contract:\n * given an artifact + scenario, return a `JudgeScore`. This is deliberately a\n * function, not a fixed LLM-prompt shape — real consumers judge with\n * ensembles, deterministic checks, or a single LLM call, and the substrate\n * must not constrain that. The `llmJudge()` helper builds a `score` that does\n * one LLM call for the common case. `appliesTo` lets a judge run only on\n * scenarios that match (e.g. a legal-citation judge only on legal scenarios). */\nexport interface JudgeConfig<TArtifact, TScenario extends Scenario = Scenario> {\n name: string\n dimensions: JudgeDimension[]\n /** Score one artifact. Throw on failure — a thrown judge is recorded as a\n * failed cell, never silently folded into a zero. */\n score(input: {\n artifact: TArtifact\n scenario: TScenario\n signal: AbortSignal\n }): JudgeScore | Promise<JudgeScore>\n appliesTo?: (scenario: TScenario) => boolean\n}\n\nexport interface JudgeScore {\n dimensions: Record<string, number>\n composite: number\n notes: string\n}\n\n// ── Optimization (population + generations + mutator) ─────────────────\n\n/** @experimental A tier-4 code surface — a candidate change to the agent's\n * IMPLEMENTATION, not its prompt. Produced by autoresearch (reads codebase +\n * trace findings → opens a worktree). Measured by checking out `worktreeRef`\n * and running the worker against the changed code. See the improvement-tier\n * table in `docs/design/loop-taxonomy.md`. */\nexport interface CodeSurface {\n kind: 'code'\n /** Worktree path or git ref holding the candidate code change. The\n * consumer's `dispatchWithSurface` checks this out before running. */\n worktreeRef: string\n /** Base ref the change is measured against. Default: the repo's main. */\n baseRef?: string\n /** Human summary of what changed — rendered into the auto-PR body. */\n summary?: string\n}\n\n/** @experimental The mutable surface a driver proposes. Tiers (see\n * `docs/design/loop-taxonomy.md`):\n * - `string` — tiers 1-2: system-prompt addendum / serialized tool\n * config. Cheap, reversible, text-diffable.\n * - `CodeSurface` — tier 4: an implementation change behind a worktree ref.\n * Tier 3 (knowledge) is owned by agent-knowledge and rides its own adapter,\n * not this type. */\nexport type MutableSurface = string | CodeSurface\n\n/** @experimental A driver proposal carrying the surface AND the WHY behind\n * it. Reflective drivers (`gepaDriver`) parse a `{label, rationale, payload}`\n * from the model; without this wrapper the loop keeps only `payload` and the\n * rationale that motivated the change is lost — the candidate becomes\n * unattributable. `propose()` may return either bare `MutableSurface`s (cheap\n * blind mutators) or these (reflective drivers); the loop normalizes both. */\nexport interface ProposedCandidate {\n surface: MutableSurface\n /** Short human label for the change (≤ 40 chars typical). */\n label: string\n /** Why this change was proposed — which failure it targets, which\n * primitive it used. Survives to `GenerationCandidate.rationale` and the\n * emitted provenance record. */\n rationale: string\n}\n\n/** @experimental Type guard: a proposal carrying its rationale vs a bare\n * surface. The loop branches on this to populate `GenerationCandidate`. */\nexport function isProposedCandidate(\n value: MutableSurface | ProposedCandidate,\n): value is ProposedCandidate {\n return (\n typeof value === 'object' &&\n value !== null &&\n 'surface' in value &&\n 'label' in value &&\n 'rationale' in value\n )\n}\n\n/** @experimental A non-dominated parent on the GEPA Pareto frontier — a\n * surface that, across the per-scenario objective vectors, no other tried\n * surface beats on every scenario. A candidate worse on the mean composite\n * but uniquely best on one hard scenario is non-dominated and survives here;\n * the composite-best ranking would discard the lesson it carries. The loop\n * computes the frontier across ALL generations and hands it to the driver so\n * a reflective driver can combine complementary lessons (GEPA, Agrawal et\n * al., arXiv:2507.19457). See `pareto.ts` (`paretoFrontier`). */\nexport interface ParetoParent {\n surface: MutableSurface\n surfaceHash: string\n /** The objective vector: per-scenario composite (higher is better). The\n * axes the frontier is computed over. */\n objectives: Record<string, number>\n /** Mean composite across the objective scenarios — the scalar summary used\n * for ordering + display, NOT for dominance. */\n composite: number\n /** Generation that produced this surface (`-1` for the baseline). */\n generation: number\n label?: string\n rationale?: string\n}\n\n/** @experimental Stateless surface mutation — given findings + current\n * surface, return N candidate surfaces. Pure transform, no generation\n * awareness. Reflective-mutation, `runMultiShotOptimization`, `AxGEPA`\n * conform. Wrapped by `evolutionaryDriver` to become an `ImprovementDriver`. */\nexport interface Mutator<TFindings = unknown> {\n kind: string\n mutate(args: {\n findings: TFindings[]\n currentSurface: MutableSurface\n populationSize: number\n signal: AbortSignal\n }): Promise<Array<MutableSurface | ProposedCandidate>>\n}\n\n/** @experimental Everything a driver's `propose()` may read to plan the next\n * batch of candidates. The first six fields are always present; the rest are\n * optional context the loop supplies when available, so cheap drivers\n * (`evolutionaryDriver`) can ignore them while a code-tier agentic generator\n * consumes the research report + dataset to drive a coding harness.\n * See `docs/design/self-improvement-engine.md`. */\nexport interface ProposeContext<TFindings = unknown> {\n currentSurface: MutableSurface\n history: GenerationRecord[]\n findings: TFindings[]\n /** BREADTH: how many candidate surfaces to return this generation. */\n populationSize: number\n generation: number\n signal: AbortSignal\n /** The Phase-2 research report (analyst findings + diff), produced AFTER the\n * trace analysts run. Opaque to the substrate — the driver that consumes it\n * types it. See the phase diagram in self-improvement-engine.md. */\n report?: unknown\n /** Handle to all captured data — the driver samples traces / artifacts /\n * rewards here to ground its proposals. */\n dataset?: LabeledScenarioStore\n /** DEPTH: max iterations the agentic generator may take per candidate.\n * 1 = single-shot; >1 = it may iterate on its own change before handing it\n * back to be measured. */\n maxImprovementShots?: number\n /** GEPA Pareto frontier across ALL generations so far — the non-dominated\n * surfaces by per-scenario objective vector. Empty/absent on generation 0\n * (only the baseline is scored). A reflective driver combines the\n * complementary lessons of these parents (each excels on different\n * scenarios) into a merged candidate. Drivers doing pure single-parent\n * reflection may ignore it. See {@link ParetoParent}. */\n paretoParents?: ParetoParent[]\n}\n\n/** @experimental A surface-improvement strategy — the DRIVER of the\n * improvement loop. Given the current best surface, the history of what's\n * been tried + scored, and any external findings, propose the next batch of\n * candidate surfaces to measure. Optionally decide to stop early.\n *\n * The evolutionary mutator (`evolutionaryDriver`, here) and agent-runtime's\n * `improvementDriver` (with reflective / agentic generators) both conform —\n * drivers of the SAME loop, not separate loops. The loop body\n * (`runOptimization`) and the gated promotion shell (`runImprovementLoop`)\n * are driver-agnostic. */\nexport interface ImprovementDriver<TFindings = unknown> {\n kind: string\n /** Plan: propose N candidate surfaces for the next generation. A driver\n * may return bare `MutableSurface`s or `ProposedCandidate`s that carry the\n * `{label, rationale}` motivating the change — the loop threads the\n * rationale into `GenerationCandidate` and the emitted provenance. */\n propose(ctx: ProposeContext<TFindings>): Promise<Array<MutableSurface | ProposedCandidate>>\n /** Decide: stop early when the driver judges the search converged or\n * exhausted. Default (omitted) runs all `maxGenerations`. */\n decide?(args: { history: GenerationRecord[] }): { stop: boolean; reason?: string }\n}\n\nexport interface OptimizerConfig {\n driver: ImprovementDriver\n populationSize: number\n maxGenerations: number\n surfaceExtractor: (profile: unknown) => MutableSurface\n}\n\n// ── Gates ─────────────────────────────────────────────────────────────\n\n/** @experimental Five-valued verdict taxonomy (MOSS-paper alignment). */\nexport type GateDecision = 'ship' | 'hold' | 'need_more_work' | 'model_ceiling' | 'arch_ceiling'\n\nexport interface GateContext<TArtifact, TScenario extends Scenario> {\n candidateArtifacts: Map<string, TArtifact>\n baselineArtifacts?: Map<string, TArtifact>\n /** Candidate (winner) judge scores, keyed by cellId. */\n judgeScores: Map<string, Record<string, JudgeScore>>\n /** Baseline judge scores, keyed by cellId. SEPARATE from `judgeScores` —\n * baseline + candidate share cellIds (same scenarios), so a single map\n * cannot represent both. A gate computing a holdout delta MUST read\n * candidate from `judgeScores` and baseline from here. */\n baselineJudgeScores?: Map<string, Record<string, JudgeScore>>\n scenarios: TScenario[]\n cost: { candidate: number; baseline: number }\n signal: AbortSignal\n}\n\nexport interface GateResult {\n decision: GateDecision\n reasons: string[]\n contributingGates: Array<{ name: string; passed: boolean; detail: unknown }>\n delta?: number\n}\n\n/** @experimental Composable promotion gate. */\nexport interface Gate<TArtifact = unknown, TScenario extends Scenario = Scenario> {\n name: string\n decide(ctx: GateContext<TArtifact, TScenario>): Promise<GateResult>\n}\n\n// ── Tracing / artifacts / cost ────────────────────────────────────────\n\n/** @experimental Scoped trace writer handed to each dispatch — every span\n * auto-tagged with the cellId so traces filter cleanly. */\nexport interface CampaignTraceWriter {\n span(name: string, attributes?: Record<string, unknown>): TraceSpan\n flush(): Promise<void>\n}\n\nexport interface TraceSpan {\n end(attributes?: Record<string, unknown>): void\n setAttribute(key: string, value: unknown): void\n}\n\n/** @experimental Scoped artifact writer — `write(path, content)` lands under\n * `<runDir>/<cellId>/<path>`. */\nexport interface CampaignArtifactWriter {\n write(path: string, content: string | Uint8Array): Promise<string>\n writeJson(path: string, value: unknown): Promise<string>\n}\n\n/** Token usage accumulated for a cell. Aliased to the canonical `RunTokenUsage`\n * (run-record.ts, same package) so a cell maps onto a `RunRecord` for the\n * backend-integrity guard with ONE source of truth — a field added to\n * `RunTokenUsage` is a compile error here, not a silent drift. */\nexport type CampaignTokenUsage = RunTokenUsage\n\n/** @experimental Cell-scoped cost meter. NOTHING is captured automatically —\n * the substrate does not intercept the LLM call, so it cannot see cost or\n * tokens unless the dispatch reports them. Every LLM cost MUST be reported via\n * `observe` and every token count via `observeTokens`; a dispatch that reports\n * neither yields a `{cost:0, tokens:0}` cell, which the backend-integrity\n * guard (`assertRealBackend`) correctly reads as a stub. Also use `observe`\n * for non-LLM spend (sandbox time, tool costs). */\nexport interface CampaignCostMeter {\n observe(amountUsd: number, source: string): void\n /** Record LLM token usage for this cell; accumulates across calls. A cell\n * has `costUsd` but no token counts unless the dispatch reports them here —\n * and the backend-integrity guard (`assertRealBackend`) keys on\n * `tokenUsage`, so a cell that never reports tokens reads as a stub. Any\n * dispatch that calls an LLM MUST report its usage. */\n observeTokens(usage: CampaignTokenUsage): void\n current(): number\n /** Accumulated token usage for this cell (zeros if never observed). */\n tokens(): CampaignTokenUsage\n}\n\n// ── LabeledScenarioStore ──────────────────────────────────────────────\n\n/** @experimental Source tag — required on every store write. Used by the\n * default training-source filter (production-trace samples NOT used as\n * training scenarios unless explicitly opted in). */\nexport type LabeledScenarioSource =\n | 'production-trace'\n | 'eval-run'\n | 'manual'\n | 'red-team'\n | 'synthetic'\n\nexport type RedactionStatus = 'raw' | 'redacted-pii' | 'redacted-secrets' | 'fully-redacted'\n\n/** How much a label can be trusted to evaluate against — the gold-admission\n * gate. Strictly ordered: a record qualifies for a `minTrust` filter when its\n * trust rank is >= the requested rank.\n *\n * - `unverified` — label is a heuristic (e.g. raw outcome success/fail).\n * Fine as corpus; MUST NOT enter a gold set that lift\n * numbers are computed against.\n * - `verified-signal` — an external signal confirmed the outcome (PR merged,\n * tests green, user did not retry, downstream check).\n * - `human-rated` — a human explicitly rated or corrected the artifact.\n *\n * Absent on a write ⇒ treated as `unverified` (fail-closed: a writer must\n * explicitly assert trust to make a record gold-eligible — it never happens\n * by accident). */\nexport type LabelTrust = 'unverified' | 'verified-signal' | 'human-rated'\n\nconst LABEL_TRUST_RANK: Record<LabelTrust, number> = {\n unverified: 0,\n 'verified-signal': 1,\n 'human-rated': 2,\n}\n\n/** Ordinal rank for a label-trust tier; absent ⇒ `unverified` (rank 0). */\nexport function labelTrustRank(trust: LabelTrust | undefined): number {\n return LABEL_TRUST_RANK[trust ?? 'unverified']\n}\n\n/** @experimental Required-provenance write. The store rejects writes that\n * lack provenance — a default-on flywheel without provenance is the\n * data-poisoning vector flagged in the alignment review. */\nexport interface LabeledScenarioWrite<TScenario extends Scenario = Scenario, TArtifact = unknown> {\n scenario: TScenario\n artifact: TArtifact\n judgeScores: Record<string, JudgeScore>\n source: LabeledScenarioSource\n sourceVersionHash: string\n capturedAt: string\n redactionStatus: RedactionStatus\n /** Gold-admission trust tier. Absent ⇒ `unverified` (fail-closed): the\n * record is corpus, never gold. A writer must explicitly assert\n * `verified-signal` or `human-rated` to make it eligible for a gold\n * sample. See {@link LabelTrust}. */\n labelTrust?: LabelTrust\n /** Optional per-source rate-limit bucket key (e.g., the tenant id). */\n rateLimitBucket?: string\n}\n\nexport interface LabeledScenarioRecord<TScenario extends Scenario = Scenario, TArtifact = unknown>\n extends LabeledScenarioWrite<TScenario, TArtifact> {\n /** Stable hash of (scenario.id, source, capturedAt, sourceVersionHash). */\n recordHash: string\n /** Substrate-assigned split — train if captured before the campaign's\n * `temporalCutoff`, test if after. Explicit override allowed via filter. */\n split: 'train' | 'test'\n}\n\nexport interface LabeledScenarioSampleArgs {\n count: number\n /** REQUIRED — substrate refuses to sample without an explicit split. */\n split: 'train' | 'test'\n /** REQUIRED — only records captured before this timestamp are returned.\n * Enforces temporal split discipline (test scenarios captured AFTER train\n * cannot enter the training pool). */\n capturedBefore: string\n filter?: {\n kind?: string\n source?: LabeledScenarioSource | LabeledScenarioSource[]\n minComposite?: number\n maxComposite?: number\n /** Gold gate: only records whose trust rank is >= this tier are\n * returned. `sample({ split: 'test', minTrust: 'verified-signal' })` is\n * the canonical \"give me the gold set\" call. Absent ⇒ no trust gate\n * (corpus-level read). */\n minTrust?: LabelTrust\n }\n}\n\nexport interface LabeledScenarioStore {\n observe(write: LabeledScenarioWrite): Promise<void>\n sample(args: LabeledScenarioSampleArgs): Promise<LabeledScenarioRecord[]>\n size(): Promise<{\n train: number\n test: number\n bySource: Record<string, number>\n /** Count by trust tier — tells the flywheel how much gold it has\n * accumulated vs. raw corpus. */\n byTrust: Record<LabelTrust, number>\n }>\n}\n\n// ── The CampaignResult schema (the downstream-tools contract) ─────────\n\nexport interface CampaignCellResult<TArtifact> {\n cellId: string\n scenarioId: string\n rep: number\n generation?: number\n artifact: TArtifact\n judgeScores: Record<string, JudgeScore>\n costUsd: number\n /** LLM token usage the dispatch reported via `ctx.cost.observeTokens`.\n * `{ input: 0, output: 0 }` when the dispatch reported none — which the\n * backend-integrity guard reads as a stub. */\n tokenUsage: CampaignTokenUsage\n durationMs: number\n seed: number\n cached: boolean\n error?: string\n}\n\nexport interface JudgeAggregate {\n mean: number\n stdev: number\n ci95: [number, number]\n n: number\n}\n\nexport interface ScenarioAggregate {\n meanComposite: number\n ci95: [number, number]\n n: number\n}\n\nexport interface GenerationRecord {\n generationIndex: number\n candidates: GenerationCandidate[]\n promoted: string[]\n}\n\n/** One scored candidate surface in a generation. `dimensions` + `scenarios`\n * let a reflective `ImprovementDriver` ground its next proposal on WHICH\n * dimensions the candidate is weakest on and WHICH scenarios it best/worst\n * handled — the evidence a blind `Mutator` cannot see. */\nexport interface GenerationCandidate {\n surfaceHash: string\n composite: number\n ci95: [number, number]\n /** Mean score per judge dimension across all cells (scenarios × reps ×\n * judges that reported the dimension). */\n dimensions: Record<string, number>\n /** Per-scenario composite (mean over reps + judges). */\n scenarios: Array<{ scenarioId: string; composite: number }>\n /** Driver-supplied short label for the change. Present when the driver\n * returned a `ProposedCandidate`; absent for bare-surface mutators. */\n label?: string\n /** Driver-supplied rationale — WHY this candidate was proposed. The\n * \"because rationale Z\" the audit requires to survive to the result.\n * Present when the driver returned a `ProposedCandidate`. */\n rationale?: string\n}\n\nexport interface CampaignAggregates {\n byJudge: Record<string, JudgeAggregate>\n byScenario: Record<string, ScenarioAggregate>\n totalCostUsd: number\n cellsExecuted: number\n cellsSkipped: number\n cellsCached: number\n cellsFailed: number\n}\n\nexport interface CampaignResult<TArtifact = unknown, TScenario extends Scenario = Scenario> {\n /** sha256(scenarios, judges, dispatch source ref, optimizer config, seed). Stable identity for reruns. */\n manifestHash: string\n seed: number\n startedAt: string\n endedAt: string\n durationMs: number\n cells: Array<CampaignCellResult<TArtifact>>\n aggregates: CampaignAggregates\n optimization?: {\n generations: GenerationRecord[]\n winnerSurfaceHash?: string\n }\n gate?: GateResult\n prUrl?: string\n runDir: string\n artifactsByPath: Record<string, string>\n /** Substrate strips the input scenarios to id+kind for the result manifest;\n * consumers needing full payload look it up via the original input. The\n * type parameter `TScenario` is propagated for downstream consumers that\n * want narrowed types when extending `CampaignResult`. */\n scenarios: Array<Pick<TScenario, 'id' | 'kind'>>\n}\n","/**\n * @experimental\n *\n * Shared campaign-score reductions used by every optimizer preset\n * (`runOptimization`, `runSkillOpt`, `compareDrivers`). ONE definition of\n * \"composite of a campaign\" and \"per-scenario / per-dimension breakdown\" so\n * the optimizers cannot drift on how a surface's score is computed.\n */\n\nimport type { CampaignResult, Scenario } from './types'\n\n/** Mean composite across a campaign: per cell, the mean of its judges'\n * composites; then the mean across cells. Cells with no judge scores are\n * skipped. Empty ⇒ 0. */\nexport function campaignMeanComposite<TArtifact, TScenario extends Scenario>(\n campaign: CampaignResult<TArtifact, TScenario>,\n): number {\n const composites: number[] = []\n for (const cell of campaign.cells) {\n const cellComposites = Object.values(cell.judgeScores).map((s) => s.composite)\n if (cellComposites.length > 0) {\n composites.push(cellComposites.reduce((a, b) => a + b, 0) / cellComposites.length)\n }\n }\n return composites.length === 0 ? 0 : composites.reduce((a, b) => a + b, 0) / composites.length\n}\n\nexport interface CampaignBreakdown {\n /** Mean score per judge dimension across all cells. */\n dimensions: Record<string, number>\n /** Per-scenario composite (mean over reps + judges). */\n scenarios: Array<{ scenarioId: string; composite: number }>\n}\n\n/** Per-candidate evidence a reflective/patch driver grounds its next proposal\n * on: mean score per judge dimension + per-scenario composite. */\nexport function campaignBreakdown<TArtifact, TScenario extends Scenario>(\n campaign: CampaignResult<TArtifact, TScenario>,\n): CampaignBreakdown {\n const dimSums: Record<string, number> = {}\n const dimCounts: Record<string, number> = {}\n const byScenario = new Map<string, number[]>()\n for (const cell of campaign.cells) {\n const judgeScores = Object.values(cell.judgeScores)\n if (judgeScores.length === 0) continue\n const cellComposite = judgeScores.reduce((a, s) => a + s.composite, 0) / judgeScores.length\n const arr = byScenario.get(cell.scenarioId) ?? []\n arr.push(cellComposite)\n byScenario.set(cell.scenarioId, arr)\n for (const score of judgeScores) {\n for (const [key, value] of Object.entries(score.dimensions)) {\n dimSums[key] = (dimSums[key] ?? 0) + value\n dimCounts[key] = (dimCounts[key] ?? 0) + 1\n }\n }\n }\n const dimensions: Record<string, number> = {}\n for (const key of Object.keys(dimSums)) {\n const count = dimCounts[key] ?? 0\n dimensions[key] = count > 0 ? (dimSums[key] ?? 0) / count : 0\n }\n const scenarios = [...byScenario.entries()].map(([scenarioId, comps]) => ({\n scenarioId,\n composite: comps.reduce((a, b) => a + b, 0) / comps.length,\n }))\n return { dimensions, scenarios }\n}\n","/**\n * @experimental\n *\n * `runOptimization` — the improvement loop body. Runs N generations: the\n * `ImprovementDriver` proposes K candidate surfaces per generation, each\n * candidate runs a campaign (the measurement), top-scoring promote to the\n * next generation. Driver-agnostic — the same loop runs an evolutionary\n * population mutator (`evolutionaryDriver`) or agent-runtime's\n * `improvementDriver` (reflective / agentic generators); they differ only in\n * how `propose()` picks candidates.\n *\n * This is `runLoop`'s shape (plan → measure → decide) specialized to surface\n * improvement: `driver.propose` = plan, `runCampaign` = the measurement (which\n * runs the worker behind `dispatch`), the mean-composite ranking = the\n * validator, `driver.decide` = the stop check.\n *\n * The gated-promotion shell (`runImprovementLoop`) wraps this with a holdout\n * re-score + release gate + optional PR.\n */\n\nimport { createHash } from 'node:crypto'\nimport { type Objective, paretoFrontier } from '../../pareto'\nimport { type RunCampaignOptions, runCampaign } from '../run-campaign'\nimport { campaignBreakdown, campaignMeanComposite } from '../score-utils'\nimport {\n type CampaignResult,\n type GenerationRecord,\n type ImprovementDriver,\n isProposedCandidate,\n type MutableSurface,\n type ParetoParent,\n type ProposedCandidate,\n type Scenario,\n} from '../types'\n\nexport interface RunOptimizationOptions<TScenario extends Scenario, TArtifact>\n extends Omit<RunCampaignOptions<TScenario, TArtifact>, 'dispatch'> {\n /** Initial mutable surface (typically system prompt or addendum). */\n baselineSurface: MutableSurface\n /** Dispatcher that takes the CURRENT surface + scenario → artifact. */\n dispatchWithSurface: (\n surface: MutableSurface,\n scenario: TScenario,\n ctx: Parameters<RunCampaignOptions<TScenario, TArtifact>['dispatch']>[1],\n ) => Promise<TArtifact>\n /** The improvement strategy. Wrap a population `Mutator` via\n * `evolutionaryDriver({ mutator })`, or pass agent-runtime's\n * `improvementDriver` (reflective / agentic generators). */\n driver: ImprovementDriver\n populationSize: number\n maxGenerations: number\n /** How many top-scoring candidates carry to the next generation. Default 2. */\n promoteTopK?: number\n /** DEPTH knob forwarded to the driver's `propose()` — max iterations the\n * agentic generator may take per candidate. */\n maxImprovementShots?: number\n /** Phase-2 research report forwarded to `propose()` (analyst findings +\n * diff). Opaque here; the driver types it. */\n report?: unknown\n}\n\nexport interface RunOptimizationResult<TArtifact, TScenario extends Scenario> {\n generations: Array<{\n record: GenerationRecord\n surfaces: Array<{\n surfaceHash: string\n surface: MutableSurface\n campaign: CampaignResult<TArtifact, TScenario>\n }>\n }>\n winnerSurface: MutableSurface\n winnerSurfaceHash: string\n /** Driver label for the promoted surface. Present when the winning\n * candidate came from a `ProposedCandidate` (a reflective driver);\n * absent when the winner is the baseline or a bare-surface mutator. */\n winnerLabel?: string\n /** Driver rationale for the promoted surface — the \"because Z\" that\n * motivated the winning change. Survives to `SelfImproveResult` and the\n * emitted provenance record. Absent when the winner is the baseline. */\n winnerRationale?: string\n baselineCampaign: CampaignResult<TArtifact, TScenario>\n /** The GEPA Pareto frontier across every scored surface (baseline + all\n * generations) by per-scenario objective vector — the non-dominated set.\n * Each generation's `propose()` received the frontier-so-far as\n * `ctx.paretoParents`; this is the final frontier. A surface here that is\n * NOT the winner is uniquely best on some scenario the winner loses on. */\n paretoFrontier: ParetoParent[]\n}\n\nexport async function runOptimization<TScenario extends Scenario, TArtifact>(\n opts: RunOptimizationOptions<TScenario, TArtifact>,\n): Promise<RunOptimizationResult<TArtifact, TScenario>> {\n const promoteTopK = opts.promoteTopK ?? 2\n\n // Baseline run\n const baselineCampaign = await runCampaign<TScenario, TArtifact>({\n ...opts,\n dispatch: (scenario, ctx) => opts.dispatchWithSurface(opts.baselineSurface, scenario, ctx),\n runDir: `${opts.runDir}/baseline`,\n })\n\n const generations: RunOptimizationResult<TArtifact, TScenario>['generations'] = []\n const history: GenerationRecord[] = []\n let currentSurfaces: MutableSurface[] = [opts.baselineSurface]\n let winnerSurface = opts.baselineSurface\n let winnerSurfaceHash = surfaceHash(opts.baselineSurface)\n let winnerComposite = campaignMeanComposite(baselineCampaign)\n let winnerLabel: string | undefined\n let winnerRationale: string | undefined\n\n // GEPA frontier accumulator — every scored surface as an objective vector\n // (per-scenario composite). The baseline seeds it as generation -1; each\n // candidate is added after its campaign. The non-dominated set of this list\n // is recomputed before every `propose()` and handed to the driver.\n const scored: ParetoParent[] = [\n toParetoParent(opts.baselineSurface, winnerSurfaceHash, baselineCampaign, -1),\n ]\n\n for (let gen = 0; gen < opts.maxGenerations; gen++) {\n // Decide: the driver may stop early based on accumulated history.\n if (opts.driver.decide?.({ history }).stop) break\n\n // Plan: the driver proposes N candidates from the current best surface,\n // the accumulated generation history, the Pareto frontier so far, and any\n // external findings.\n const paretoParents = computeParetoFrontier(scored)\n const proposed = await opts.driver.propose({\n currentSurface: currentSurfaces[0] ?? opts.baselineSurface,\n history,\n findings: [],\n populationSize: opts.populationSize,\n generation: gen,\n signal: new AbortController().signal,\n report: opts.report,\n dataset: opts.labeledStore && opts.labeledStore !== 'off' ? opts.labeledStore : undefined,\n maxImprovementShots: opts.maxImprovementShots,\n paretoParents,\n })\n\n // Normalize: a driver may return bare surfaces (blind mutators) or\n // `ProposedCandidate`s carrying {label, rationale}. Keep the rationale so\n // each candidate stays attributable through to the result + provenance.\n const candidates: ProposedCandidate[] = proposed.map((p) =>\n isProposedCandidate(p) ? p : { surface: p, label: '', rationale: '' },\n )\n\n // Run each candidate as its own campaign.\n const surfaceResults: Array<{\n surfaceHash: string\n surface: MutableSurface\n label: string\n rationale: string\n campaign: CampaignResult<TArtifact, TScenario>\n composite: number\n }> = []\n for (let i = 0; i < candidates.length; i++) {\n const { surface, label, rationale } = candidates[i]!\n const hash = surfaceHash(surface)\n const campaign = await runCampaign<TScenario, TArtifact>({\n ...opts,\n dispatch: (scenario, ctx) => opts.dispatchWithSurface(surface, scenario, ctx),\n runDir: `${opts.runDir}/gen-${gen}/candidate-${i}`,\n })\n const composite = campaignMeanComposite(campaign)\n surfaceResults.push({ surfaceHash: hash, surface, label, rationale, campaign, composite })\n // Add to the GEPA frontier accumulator — the NEXT generation's\n // `propose()` sees this candidate's per-scenario objective vector.\n scored.push(\n toParetoParent(surface, hash, campaign, gen, label || undefined, rationale || undefined),\n )\n }\n\n // Rank, promote top-K.\n surfaceResults.sort((a, b) => b.composite - a.composite)\n const promoted = surfaceResults.slice(0, promoteTopK)\n currentSurfaces = promoted.map((p) => p.surface)\n const top = surfaceResults[0]\n if (top && top.composite > winnerComposite) {\n winnerSurface = top.surface\n winnerSurfaceHash = top.surfaceHash\n winnerComposite = top.composite\n winnerLabel = top.label || undefined\n winnerRationale = top.rationale || undefined\n }\n\n const record: GenerationRecord = {\n generationIndex: gen,\n candidates: surfaceResults.map((s) => {\n const breakdown = campaignBreakdown(s.campaign)\n const candidate: GenerationRecord['candidates'][number] = {\n surfaceHash: s.surfaceHash,\n composite: s.composite,\n ci95: [s.composite, s.composite] as [number, number],\n dimensions: breakdown.dimensions,\n scenarios: breakdown.scenarios,\n }\n if (s.label) candidate.label = s.label\n if (s.rationale) candidate.rationale = s.rationale\n return candidate\n }),\n promoted: promoted.map((p) => p.surfaceHash),\n }\n history.push(record)\n generations.push({\n record,\n surfaces: surfaceResults.map((s) => ({\n surfaceHash: s.surfaceHash,\n surface: s.surface,\n campaign: s.campaign,\n })),\n })\n }\n\n return {\n generations,\n winnerSurface,\n winnerSurfaceHash,\n winnerLabel,\n winnerRationale,\n baselineCampaign,\n paretoFrontier: computeParetoFrontier(scored),\n }\n}\n\n/** Build a `ParetoParent` from a scored campaign — objective vector =\n * per-scenario composite, scalar = mean composite. */\nfunction toParetoParent<TArtifact, TScenario extends Scenario>(\n surface: MutableSurface,\n hash: string,\n campaign: CampaignResult<TArtifact, TScenario>,\n generation: number,\n label?: string,\n rationale?: string,\n): ParetoParent {\n const objectives: Record<string, number> = {}\n for (const { scenarioId, composite } of campaignBreakdown(campaign).scenarios) {\n objectives[scenarioId] = composite\n }\n const parent: ParetoParent = {\n surface,\n surfaceHash: hash,\n objectives,\n composite: campaignMeanComposite(campaign),\n generation,\n }\n if (label) parent.label = label\n if (rationale) parent.rationale = rationale\n return parent\n}\n\n/** The non-dominated set over the per-scenario objective vectors. Every\n * scenario seen across the scored set becomes a `maximize` objective; a\n * surface missing a scenario (a failed cell) is ranked worst on that axis via\n * a FINITE floor (the lowest real score seen there) — never a non-finite\n * value, because the canonical `paretoFrontier` excludes any candidate with a\n * non-finite objective, which would silently drop the whole frontier if one\n * scenario errored across every candidate. Delegates dominance to the\n * package-canonical `paretoFrontier` — ONE implementation of the relation. */\nfunction computeParetoFrontier(scored: ParetoParent[]): ParetoParent[] {\n if (scored.length <= 1) return [...scored]\n const ids = new Set<string>()\n for (const p of scored) for (const id of Object.keys(p.objectives)) ids.add(id)\n if (ids.size === 0) return [...scored]\n const floor: Record<string, number> = {}\n for (const id of ids) {\n let min = Number.POSITIVE_INFINITY\n for (const p of scored) {\n const v = p.objectives[id]\n if (typeof v === 'number' && Number.isFinite(v) && v < min) min = v\n }\n floor[id] = Number.isFinite(min) ? min : 0\n }\n const objectives: Objective<ParetoParent>[] = [...ids].map((id) => ({\n name: id,\n direction: 'maximize',\n value: (p) => {\n const v = p.objectives[id]\n return typeof v === 'number' && Number.isFinite(v) ? v : (floor[id] ?? 0)\n },\n }))\n return paretoFrontier(scored, objectives).frontier\n}\n\nexport function surfaceHash(surface: MutableSurface): string {\n // Prompt/tool surfaces (string) hash by content; code surfaces hash by the\n // worktree + base ref pair (the content lives in git, not in the string).\n const material =\n typeof surface === 'string'\n ? surface\n : JSON.stringify({\n kind: surface.kind,\n worktreeRef: surface.worktreeRef,\n baseRef: surface.baseRef ?? null,\n })\n return createHash('sha256').update(material).digest('hex').slice(0, 16)\n}\n","/**\n * @experimental\n *\n * `runImprovementLoop` — the gated-promotion shell around the improvement\n * loop body (`runOptimization`). Drives candidate surfaces via the\n * `ImprovementDriver`, re-scores the winner against the baseline on a\n * holdout set, runs the release gate, and optionally opens a PR.\n *\n * Role vocabulary (see docs/design/loop-taxonomy.md):\n * - DRIVER = the `ImprovementDriver` (evolutionary GEPA mutator OR\n * reflective analyst). Proposes candidate SURFACES — the\n * worker's system prompt / tool config — NOT conversation\n * turns.\n * - MEASUREMENT= `runCampaign`. Scores one surface by running the worker\n * (via `dispatch`) over scenarios and judging the output.\n * - WORKER = the agent harness in the sandbox, invoked behind the\n * topology-opaque `dispatch` seam — never referenced here.\n *\n * Distinct from `runLoop` in `@tangle-network/agent-runtime`, which is the\n * INNER conversation loop (driver↔workers in a sandbox). `runImprovementLoop`\n * is the OUTER loop: it improves the surface that those workers run.\n *\n * Hard-refuses unsafe configurations:\n * - `tracing: 'off'` when a driver is wired (improvement is unattributable)\n * - `autoOnPromote: 'config'` — DEFERRED to Pass B; v0.40 only ships\n * `'pr'` and `'none'`.\n */\n\nimport { openAutoPr } from '../auto-pr'\nimport type { CampaignResult, Gate, MutableSurface, Scenario } from '../types'\nimport type { RunOptimizationOptions, RunOptimizationResult } from './run-optimization'\nimport { runOptimization, surfaceHash } from './run-optimization'\n\nexport interface RunImprovementLoopOptions<TScenario extends Scenario, TArtifact>\n extends RunOptimizationOptions<TScenario, TArtifact> {\n /** Holdout scenarios kept OUT of the training optimization pool — used\n * ONLY to score baseline vs winner for the gate. */\n holdoutScenarios: TScenario[]\n /** Promotion gate. Substrate strongly recommends `defaultProductionGate`\n * for production wiring (composes red-team / reward-hacking / canary /\n * heldout). */\n gate: Gate<TArtifact, TScenario>\n /** What to do when the gate ships:\n * - `'pr'`: open a PR via `openAutoPr`\n * - `'none'`: just report — caller decides what to do with the winner\n * v0.40 does NOT support `'config'` (live-runtime self-mutation) —\n * deferred to Pass B behind safety stack. */\n autoOnPromote: 'pr' | 'none'\n /** GH owner / repo for the auto-PR. Required when autoOnPromote === 'pr'. */\n ghOwner?: string\n ghRepo?: string\n /** Optional render override — substrate writes a diff-shaped surface; pass\n * a function to format the promoted surface differently. */\n renderPromotedDiff?: (winnerSurface: MutableSurface, baselineSurface: MutableSurface) => string\n}\n\nexport interface RunImprovementLoopResult<TArtifact, TScenario extends Scenario>\n extends RunOptimizationResult<TArtifact, TScenario> {\n baselineOnHoldout: CampaignResult<TArtifact, TScenario>\n winnerOnHoldout: CampaignResult<TArtifact, TScenario>\n gateResult: Awaited<ReturnType<Gate<TArtifact, TScenario>['decide']>>\n /** Unified baseline→winner surface diff. Computed UNCONDITIONALLY (not only\n * when `autoOnPromote === 'pr'`) so the diff that the gate decided on is\n * always present on the result + in the emitted provenance record. Empty\n * string when winner == baseline (no change to diff). */\n promotedDiff: string\n prResult?: ReturnType<typeof openAutoPr>\n}\n\nexport async function runImprovementLoop<TScenario extends Scenario, TArtifact>(\n opts: RunImprovementLoopOptions<TScenario, TArtifact>,\n): Promise<RunImprovementLoopResult<TArtifact, TScenario>> {\n // ── Safety pre-flight ─────────────────────────────────────────────\n // biome-ignore lint/suspicious/noExplicitAny: Pass A reserved field for Pass B Shape B\n if ((opts as any).autoOnPromote === 'config') {\n throw new Error(\n \"runImprovementLoop: autoOnPromote='config' is deferred to Pass B (requires shadow deploy + rollback + ensemble judges). Use 'pr' or 'none' in v0.40.\",\n )\n }\n // Refuse tracing=off whenever a driver is wired. An improvement loop\n // without traces is unattributable — its candidate surfaces cannot be\n // cited back to the spans that motivated them, and the dataset flywheel\n // (LabeledScenarioStore) that GEPA optimizes against goes unfed.\n if (opts.tracing === 'off' && opts.driver) {\n throw new Error(\n \"runImprovementLoop: tracing='off' is forbidden when a driver is wired. The improvement loop without traces is unattributable; candidate surfaces cannot be cited back to spans and the optimization dataset goes unfed.\",\n )\n }\n if (opts.autoOnPromote === 'pr' && (!opts.ghOwner || !opts.ghRepo)) {\n throw new Error(\"runImprovementLoop: autoOnPromote='pr' requires ghOwner + ghRepo.\")\n }\n\n // ── (1) optimization loop produces a winner ────────────────────────\n const optimization = await runOptimization(opts)\n\n // ── (2) baseline + winner re-scored on the holdout set ─────────────\n const { runCampaign } = await import('../run-campaign')\n\n const baselineOnHoldout = await runCampaign<TScenario, TArtifact>({\n ...opts,\n scenarios: opts.holdoutScenarios,\n dispatch: (scenario, ctx) => opts.dispatchWithSurface(opts.baselineSurface, scenario, ctx),\n runDir: `${opts.runDir}/holdout-baseline`,\n })\n\n const winnerOnHoldout = await runCampaign<TScenario, TArtifact>({\n ...opts,\n scenarios: opts.holdoutScenarios,\n dispatch: (scenario, ctx) =>\n opts.dispatchWithSurface(optimization.winnerSurface, scenario, ctx),\n runDir: `${opts.runDir}/holdout-winner`,\n })\n\n // ── (3) gate verdict ───────────────────────────────────────────────\n // Candidate + baseline share cellIds (same holdout scenarios), so their\n // judge scores MUST stay in separate maps — merging them collapses the\n // holdout delta to zero and the gate can never ship a real improvement.\n type ScoreMap = Map<\n string,\n Record<string, { composite: number; dimensions: Record<string, number>; notes: string }>\n >\n const candidateArtifacts = new Map<string, TArtifact>()\n const baselineArtifacts = new Map<string, TArtifact>()\n const judgeScores: ScoreMap = new Map()\n const baselineJudgeScores: ScoreMap = new Map()\n for (const cell of winnerOnHoldout.cells) {\n candidateArtifacts.set(cell.cellId, cell.artifact)\n judgeScores.set(cell.cellId, cell.judgeScores)\n }\n for (const cell of baselineOnHoldout.cells) {\n baselineArtifacts.set(cell.cellId, cell.artifact)\n baselineJudgeScores.set(cell.cellId, cell.judgeScores)\n }\n\n const gateResult = await opts.gate.decide({\n candidateArtifacts,\n baselineArtifacts,\n judgeScores,\n baselineJudgeScores,\n scenarios: opts.holdoutScenarios,\n cost: {\n candidate: winnerOnHoldout.aggregates.totalCostUsd,\n baseline: baselineOnHoldout.aggregates.totalCostUsd,\n },\n signal: new AbortController().signal,\n })\n\n // ── (4) baseline→winner diff (always) + auto-PR when gate ships ────\n // The diff is computed UNCONDITIONALLY — it's the human-auditable record of\n // what the loop actually changed, needed for the provenance artifact whether\n // or not a PR is opened. winner == baseline ⇒ empty diff (nothing changed).\n const render = opts.renderPromotedDiff ?? defaultRenderDiff\n const promotedDiff =\n optimization.winnerSurfaceHash === surfaceHash(opts.baselineSurface)\n ? ''\n : render(optimization.winnerSurface, opts.baselineSurface)\n\n let prResult: ReturnType<typeof openAutoPr> | undefined\n if (opts.autoOnPromote === 'pr' && gateResult.decision === 'ship') {\n prResult = openAutoPr({\n result: winnerOnHoldout,\n gate: gateResult,\n promotedDiff,\n ghOwner: opts.ghOwner!,\n ghRepo: opts.ghRepo!,\n })\n }\n\n return {\n ...optimization,\n baselineOnHoldout,\n winnerOnHoldout,\n gateResult,\n promotedDiff,\n prResult,\n }\n}\n\nexport function defaultRenderDiff(\n winnerSurface: MutableSurface,\n baselineSurface: MutableSurface,\n): string {\n // Code surfaces aren't text-diffable here — the diff lives in git. Render\n // the worktree/base refs + summary so the PR body points at the change.\n if (typeof winnerSurface !== 'string' || typeof baselineSurface !== 'string') {\n const fmt = (s: MutableSurface): string =>\n typeof s === 'string'\n ? '(prompt surface)'\n : `worktree=${s.worktreeRef}${s.baseRef ? ` base=${s.baseRef}` : ''}${s.summary ? `\\n${s.summary}` : ''}`\n return `--- baseline\\n${fmt(baselineSurface)}\\n+++ winner\\n${fmt(winnerSurface)}`\n }\n const lines: string[] = []\n lines.push('--- baseline')\n lines.push('+++ winner')\n for (const l of baselineSurface.split('\\n')) lines.push(`- ${l}`)\n for (const l of winnerSurface.split('\\n')) lines.push(`+ ${l}`)\n return lines.join('\\n')\n}\n","/**\n * @experimental\n *\n * `runEval` — the simplest preset over `runCampaign`. No optimizer, no\n * gate, no auto-PR. Just: run scenarios through dispatch, score with\n * judges, return CampaignResult.\n *\n * The 80% case for consumers who want a scorecard, not an improvement loop.\n */\n\nimport { type RunCampaignOptions, runCampaign } from '../run-campaign'\nimport type { CampaignResult, Scenario } from '../types'\n\nexport interface RunEvalOptions<TScenario extends Scenario, TArtifact>\n extends Omit<RunCampaignOptions<TScenario, TArtifact>, 'runDir'> {\n runDir: string\n}\n\nexport async function runEval<TScenario extends Scenario, TArtifact>(\n opts: RunEvalOptions<TScenario, TArtifact>,\n): Promise<CampaignResult<TArtifact, TScenario>> {\n return runCampaign(opts)\n}\n","/**\n * @experimental\n *\n * Loop provenance — the durable, queryable record of WHAT a self-improvement\n * loop did and WHY, plus the OTel spans that let an OTLP collector pivot from\n * an eval-run to the underlying candidate→cell→gate→promote chain.\n *\n * Two artifacts, one source of truth:\n *\n * 1. `LoopProvenanceRecord` — a structured JSON record capturing every\n * candidate (surfaceHash + label + rationale), its measured composite,\n * the gate decision + reasons + delta, the held-out lift, the explicit\n * baseline→candidate diff, and BACKEND PROVENANCE (the\n * `assertRealBackend` verdict + worker call count + model). This is the\n * ingestable audit artifact: the +lift recomputes from it, the \"because\n * Z\" rationale survives in it, and a stub backend is detectable from it.\n *\n * 2. `loopProvenanceSpans()` — the same chain emitted as OTLP-ingestable\n * `TraceSpanEvent`s, pivoted on the substrate's standard\n * `tangle.runId` / `tangle.scenarioId` / `tangle.cellId` /\n * `tangle.generation` attributes (the same pivots `/adapters/otel`\n * reads). The hosted `/v1/ingest/traces` endpoint receives the FULL loop,\n * not just the `cost.*` spans `runCampaign` already emits per cell.\n *\n * The record is built from the substrate's own loop result + the per-call\n * `RunRecord`s the worker emitted — no new measurement, no recomputation that\n * could drift from what the gate actually saw.\n */\n\nimport { createHash } from 'node:crypto'\nimport { join } from 'node:path'\nimport type { HostedClient } from '../hosted/client'\nimport type {\n EvalRunCellScore,\n EvalRunEvent,\n EvalRunGenerationSnapshot,\n TraceSpanEvent,\n} from '../hosted/types'\nimport { summarizeBackendIntegrity } from '../integrity/backend-integrity'\nimport type { RunRecord } from '../run-record'\nimport type { CampaignStorage } from './storage'\nimport type { CampaignResult, GateDecision, GateResult, MutableSurface, Scenario } from './types'\n\n/** Stable sha256 (full hex) of a surface's effective text. Code surfaces hash\n * their worktree+base identity since the content lives in git. Distinct from\n * `surfaceHash` (16-char content fingerprint used as a loop identity key);\n * this is the byte-identical-verifiable content hash the provenance record +\n * `RunRecord.promptHash` carry. */\nexport function surfaceContentHash(surface: MutableSurface): string {\n const material =\n typeof surface === 'string'\n ? surface\n : JSON.stringify({\n kind: surface.kind,\n worktreeRef: surface.worktreeRef,\n baseRef: surface.baseRef ?? null,\n })\n return `sha256:${createHash('sha256').update(material).digest('hex')}`\n}\n\nexport interface LoopProvenanceCandidate {\n /** Generation index this candidate was proposed in. */\n generation: number\n /** 16-char loop-identity fingerprint (matches `GenerationCandidate.surfaceHash`). */\n surfaceHash: string\n /** Full sha256 content hash — byte-identical-verifiable. */\n contentHash: string\n /** Driver label, when the driver returned a `ProposedCandidate`. */\n label?: string\n /** Driver rationale — the \"because Z\". When the driver returned a bare\n * surface (blind mutator) this is absent. */\n rationale?: string\n /** Mean composite this candidate scored on the search split. */\n composite: number\n /** Whether this candidate was promoted out of its generation. */\n promoted: boolean\n}\n\nexport interface LoopProvenanceBackend {\n /** `assertRealBackend`-grade verdict over the worker call records. */\n verdict: 'real' | 'mixed' | 'stub'\n /** Number of worker LLM calls captured (the audit's \"worker call count\"). */\n workerCallCount: number\n /** Distinct model ids observed across worker calls. */\n models: string[]\n totalInputTokens: number\n totalOutputTokens: number\n totalCostUsd: number\n}\n\n/**\n * The durable provenance record. Aligns to the hosted `EvalRunEvent` path but\n * ADDS the rationale + the explicit baseline→candidate diff (both omitted from\n * the bare hosted event) + backend provenance.\n */\nexport interface LoopProvenanceRecord {\n schema: 'tangle.loop-provenance.v1'\n runId: string\n runDir: string\n timestamp: string\n /** Baseline + winner surface content hashes — distinguishable, byte-verifiable. */\n baselineContentHash: string\n winnerContentHash: string\n /** Driver label/rationale for the promoted change. Absent ⇒ winner == baseline. */\n winnerLabel?: string\n winnerRationale?: string\n /** The explicit baseline→winner unified diff the gate decided on. */\n diff: string\n /** Every candidate across every generation, each carrying its rationale. */\n candidates: LoopProvenanceCandidate[]\n /** The gate verdict — decision + reasons + contributing gates + delta. */\n gate: {\n decision: GateDecision\n reasons: string[]\n delta?: number\n contributingGates: Array<{ name: string; passed: boolean }>\n }\n /** baseline-on-holdout composite mean. */\n baselineHoldoutComposite: number\n /** winner-on-holdout composite mean. */\n winnerHoldoutComposite: number\n /** winnerHoldout - baselineHoldout — RECOMPUTABLE from this record. */\n heldOutLift: number\n /** Backend provenance: stub-vs-real verdict + worker call count + models. */\n backend: LoopProvenanceBackend\n totalCostUsd: number\n totalDurationMs: number\n}\n\nexport interface BuildLoopProvenanceArgs<TArtifact, TScenario extends Scenario> {\n runId: string\n runDir: string\n timestamp: string\n baselineSurface: MutableSurface\n winnerSurface: MutableSurface\n winnerLabel?: string\n winnerRationale?: string\n diff: string\n /** Per-generation candidate records straight off the loop result. */\n generations: Array<{\n generationIndex: number\n candidates: Array<{\n surfaceHash: string\n composite: number\n label?: string\n rationale?: string\n }>\n promoted: string[]\n /** Surfaces measured this generation, keyed positionally to candidates so\n * the content hash can be computed from the real surface text. */\n surfaces: Array<{ surfaceHash: string; surface: MutableSurface }>\n }>\n gate: GateResult\n baselineOnHoldout: CampaignResult<TArtifact, TScenario>\n winnerOnHoldout: CampaignResult<TArtifact, TScenario>\n /** Worker call records — the source for backend provenance. */\n workerRecords: ReadonlyArray<RunRecord>\n totalCostUsd: number\n totalDurationMs: number\n}\n\nfunction meanHoldoutComposite<TArtifact, TScenario extends Scenario>(\n campaign: CampaignResult<TArtifact, TScenario>,\n): number {\n const xs: number[] = []\n for (const cell of campaign.cells) {\n if (cell.error) continue\n const cs = Object.values(cell.judgeScores).map((s) => s.composite)\n if (cs.length) xs.push(cs.reduce((a, b) => a + b, 0) / cs.length)\n }\n return xs.length ? xs.reduce((a, b) => a + b, 0) / xs.length : 0\n}\n\n/** Build the durable provenance record from a completed loop result. */\nexport function buildLoopProvenanceRecord<TArtifact, TScenario extends Scenario>(\n args: BuildLoopProvenanceArgs<TArtifact, TScenario>,\n): LoopProvenanceRecord {\n const integrity = summarizeBackendIntegrity(args.workerRecords)\n const models = [...new Set(args.workerRecords.map((r) => r.model))].sort()\n\n const candidates: LoopProvenanceCandidate[] = []\n for (const gen of args.generations) {\n const promotedSet = new Set(gen.promoted)\n const surfaceByHash = new Map(gen.surfaces.map((s) => [s.surfaceHash, s.surface]))\n for (const c of gen.candidates) {\n const surface = surfaceByHash.get(c.surfaceHash)\n const entry: LoopProvenanceCandidate = {\n generation: gen.generationIndex,\n surfaceHash: c.surfaceHash,\n contentHash:\n surface !== undefined ? surfaceContentHash(surface) : `sha256:${c.surfaceHash}`,\n composite: c.composite,\n promoted: promotedSet.has(c.surfaceHash),\n }\n if (c.label) entry.label = c.label\n if (c.rationale) entry.rationale = c.rationale\n candidates.push(entry)\n }\n }\n\n const baselineHoldoutComposite = meanHoldoutComposite(args.baselineOnHoldout)\n const winnerHoldoutComposite = meanHoldoutComposite(args.winnerOnHoldout)\n\n const record: LoopProvenanceRecord = {\n schema: 'tangle.loop-provenance.v1',\n runId: args.runId,\n runDir: args.runDir,\n timestamp: args.timestamp,\n baselineContentHash: surfaceContentHash(args.baselineSurface),\n winnerContentHash: surfaceContentHash(args.winnerSurface),\n diff: args.diff,\n candidates,\n gate: {\n decision: args.gate.decision,\n reasons: args.gate.reasons,\n delta: args.gate.delta,\n contributingGates: args.gate.contributingGates.map((g) => ({\n name: g.name,\n passed: g.passed,\n })),\n },\n baselineHoldoutComposite,\n winnerHoldoutComposite,\n heldOutLift: winnerHoldoutComposite - baselineHoldoutComposite,\n backend: {\n verdict: integrity.verdict,\n workerCallCount: integrity.totalRecords,\n models,\n totalInputTokens: integrity.totalInputTokens,\n totalOutputTokens: integrity.totalOutputTokens,\n totalCostUsd: integrity.totalCostUsd,\n },\n totalCostUsd: args.totalCostUsd,\n totalDurationMs: args.totalDurationMs,\n }\n if (args.winnerLabel) record.winnerLabel = args.winnerLabel\n if (args.winnerRationale) record.winnerRationale = args.winnerRationale\n return record\n}\n\n// ── OTel span emission ──────────────────────────────────────────────────\n\nconst DECISION_OK: GateDecision[] = ['ship']\n\nfunction hashId(parts: string[]): string {\n return createHash('sha256').update(parts.join(':')).digest('hex')\n}\n\nfunction gateStatus(decision: GateDecision): { code: 'OK' | 'ERROR' | 'UNSET'; message?: string } {\n return DECISION_OK.includes(decision)\n ? { code: 'OK' }\n : { code: 'ERROR', message: `gate decision: ${decision}` }\n}\n\n/**\n * Build the loop's OTLP-ingestable spans from a provenance record. One root\n * span per loop (`tangle.runId`), one span per generation, one span per\n * candidate (carrying its surfaceHash + label), and one span for the gate\n * decision (carrying reasons + delta + lift). Candidate + gate spans pivot on\n * the same `tangle.runId` / `tangle.generation` attributes `/adapters/otel`\n * reads, so the hosted collector reconstructs the full tree.\n *\n * Times are synthesized monotonically off a single base so the span tree is\n * orderable; the substrate does not retain per-candidate wall-clock starts.\n */\nexport function loopProvenanceSpans(\n record: LoopProvenanceRecord,\n opts: { baseTimeMs?: number } = {},\n): TraceSpanEvent[] {\n const traceId = hashId(['trace', record.runId]).slice(0, 32)\n const baseNano = (opts.baseTimeMs ?? (Date.parse(record.timestamp) || Date.now())) * 1_000_000\n const endNano = baseNano + Math.max(1, record.totalDurationMs) * 1_000_000\n const spans: TraceSpanEvent[] = []\n\n const rootSpanId = hashId(['root', record.runId]).slice(0, 16)\n spans.push({\n traceId,\n spanId: rootSpanId,\n name: 'improvement-loop',\n startTimeUnixNano: baseNano,\n endTimeUnixNano: endNano,\n attributes: {\n 'tangle.runId': record.runId,\n 'tangle.runDir': record.runDir,\n 'tangle.baselineContentHash': record.baselineContentHash,\n 'tangle.winnerContentHash': record.winnerContentHash,\n 'tangle.heldOutLift': record.heldOutLift,\n 'tangle.gateDecision': record.gate.decision,\n 'tangle.backendVerdict': record.backend.verdict,\n 'tangle.workerCallCount': record.backend.workerCallCount,\n 'tangle.totalCostUsd': record.totalCostUsd,\n },\n status: gateStatus(record.gate.decision),\n 'tangle.runId': record.runId,\n })\n\n // Group candidates by generation for the per-generation parent span.\n const byGen = new Map<number, LoopProvenanceCandidate[]>()\n for (const c of record.candidates) {\n const arr = byGen.get(c.generation) ?? []\n arr.push(c)\n byGen.set(c.generation, arr)\n }\n for (const [generation, cands] of [...byGen.entries()].sort((a, b) => a[0] - b[0])) {\n const genSpanId = hashId(['gen', record.runId, String(generation)]).slice(0, 16)\n const bestComposite = cands.reduce((m, c) => Math.max(m, c.composite), 0)\n spans.push({\n traceId,\n spanId: genSpanId,\n parentSpanId: rootSpanId,\n name: `generation-${generation}`,\n startTimeUnixNano: baseNano,\n endTimeUnixNano: endNano,\n attributes: {\n 'tangle.runId': record.runId,\n 'tangle.generation': generation,\n 'tangle.populationSize': cands.length,\n 'tangle.bestComposite': bestComposite,\n },\n 'tangle.runId': record.runId,\n 'tangle.generation': generation,\n })\n for (let i = 0; i < cands.length; i++) {\n const c = cands[i]!\n const candSpanId = hashId(['cand', record.runId, String(generation), c.surfaceHash]).slice(\n 0,\n 16,\n )\n const attributes: TraceSpanEvent['attributes'] = {\n 'tangle.runId': record.runId,\n 'tangle.generation': generation,\n 'tangle.surfaceHash': c.surfaceHash,\n 'tangle.contentHash': c.contentHash,\n 'tangle.composite': c.composite,\n 'tangle.promoted': c.promoted,\n }\n if (c.label) attributes['tangle.candidateLabel'] = c.label\n if (c.rationale) attributes['tangle.candidateRationale'] = c.rationale\n spans.push({\n traceId,\n spanId: candSpanId,\n parentSpanId: genSpanId,\n name: `candidate-${c.surfaceHash}`,\n startTimeUnixNano: baseNano,\n endTimeUnixNano: endNano,\n attributes,\n 'tangle.runId': record.runId,\n 'tangle.generation': generation,\n })\n }\n }\n\n // Gate span — child of root, carries the decision/reasons/delta the audit\n // needs and pivots back to the run.\n const gateSpanId = hashId(['gate', record.runId]).slice(0, 16)\n spans.push({\n traceId,\n spanId: gateSpanId,\n parentSpanId: rootSpanId,\n name: 'gate-decision',\n startTimeUnixNano: endNano,\n endTimeUnixNano: endNano,\n attributes: {\n 'tangle.runId': record.runId,\n 'tangle.gateDecision': record.gate.decision,\n 'tangle.gateDelta': record.gate.delta ?? record.heldOutLift,\n 'tangle.gateReasons': JSON.stringify(record.gate.reasons),\n 'tangle.heldOutLift': record.heldOutLift,\n 'tangle.baselineHoldoutComposite': record.baselineHoldoutComposite,\n 'tangle.winnerHoldoutComposite': record.winnerHoldoutComposite,\n },\n status: gateStatus(record.gate.decision),\n 'tangle.runId': record.runId,\n })\n\n return spans\n}\n\n// ── Durable emission ─────────────────────────────────────────────────────\n\n/** Canonical durable paths under the run dir. */\nexport function provenanceRecordPath(runDir: string): string {\n return join(runDir, 'loop-provenance.json')\n}\nexport function provenanceSpansPath(runDir: string): string {\n return join(runDir, 'loop-provenance-spans.jsonl')\n}\n\nexport interface EmitLoopProvenanceResult {\n record: LoopProvenanceRecord\n spans: TraceSpanEvent[]\n /** Absolute paths the record + spans were written to, when storage persists. */\n recordPath: string\n spansPath: string\n}\n\nexport interface EmitLoopProvenanceArgs<TArtifact, TScenario extends Scenario>\n extends BuildLoopProvenanceArgs<TArtifact, TScenario> {\n /** Storage the record + spans are written through. */\n storage: CampaignStorage\n /** When set, the spans are also shipped to the hosted `/v1/ingest/traces`\n * endpoint so the collector receives the full loop, not just `cost.*`. */\n hostedClient?: HostedClient\n}\n\n/** Snapshot a held-out campaign into the hosted `EvalRunGenerationSnapshot`\n * shape — per-cell composite + per-judge dimensions, aggregate mean, cost,\n * duration. The dashboard renders these as the baseline → winner comparison. */\nfunction snapshotFromHoldout<TArtifact, TScenario extends Scenario>(\n index: number,\n surfaceHash: string,\n surface: MutableSurface,\n campaign: CampaignResult<TArtifact, TScenario>,\n): EvalRunGenerationSnapshot {\n const cells: EvalRunCellScore[] = campaign.cells.map((cell) => {\n const judgeScores = Object.values(cell.judgeScores)\n const composite =\n judgeScores.length === 0\n ? 0\n : judgeScores.reduce((s, j) => s + j.composite, 0) / judgeScores.length\n const score: EvalRunCellScore = {\n scenarioId: cell.scenarioId,\n rep: cell.rep,\n compositeMean: composite,\n dimensions: Object.fromEntries(\n Object.entries(cell.judgeScores).map(([name, s]) => [name, s.dimensions]),\n ),\n }\n if (cell.error) score.errorMessage = cell.error\n return score\n })\n const compositeMean =\n cells.length === 0 ? 0 : cells.reduce((s, c) => s + c.compositeMean, 0) / cells.length\n return {\n index,\n surfaceHash,\n surface,\n cells,\n compositeMean,\n costUsd: campaign.aggregates.totalCostUsd,\n durationMs: campaign.durationMs,\n }\n}\n\n/** Build the hosted `EvalRunEvent` from the loop args + record — baseline +\n * winner snapshots, gate decision, held-out lift, cost, duration. Shipped to\n * `/v1/ingest/eval-runs` so the run appears in the dashboard's run list (the\n * trace spans, shipped separately, back the per-candidate drill-down). */\nfunction buildEvalRunEvent<TArtifact, TScenario extends Scenario>(\n args: EmitLoopProvenanceArgs<TArtifact, TScenario>,\n record: LoopProvenanceRecord,\n): EvalRunEvent {\n return {\n runId: args.runId,\n runDir: args.runDir,\n timestamp: args.timestamp,\n status: 'finished',\n labels: {},\n baseline: snapshotFromHoldout(\n 0,\n record.baselineContentHash,\n args.baselineSurface,\n args.baselineOnHoldout,\n ),\n generations: [\n snapshotFromHoldout(1, record.winnerContentHash, args.winnerSurface, args.winnerOnHoldout),\n ],\n gateDecision: args.gate.decision,\n holdoutLift: record.heldOutLift,\n totalCostUsd: args.totalCostUsd,\n totalDurationMs: args.totalDurationMs,\n }\n}\n\n/**\n * Build the provenance record + OTel spans and persist them durably under the\n * run dir (and ship spans to a hosted collector when one is wired). Returns\n * both artifacts so the caller can assert on / re-derive from them.\n *\n * Fail-loud: the durable write throws on storage failure (a swallowed write is\n * exactly the \"emitted but lost\" failure this closes). The hosted span ship is\n * the one best-effort leg — its failure is logged, not thrown, so an offline\n * collector never fails the loop (the durable artifact is the source of truth).\n */\nexport async function emitLoopProvenance<TArtifact, TScenario extends Scenario>(\n args: EmitLoopProvenanceArgs<TArtifact, TScenario>,\n): Promise<EmitLoopProvenanceResult> {\n const record = buildLoopProvenanceRecord(args)\n const spans = loopProvenanceSpans(record)\n\n args.storage.ensureDir(args.runDir)\n const recordPath = provenanceRecordPath(args.runDir)\n const spansPath = provenanceSpansPath(args.runDir)\n args.storage.write(recordPath, JSON.stringify(record, null, 2))\n args.storage.write(spansPath, spans.map((s) => JSON.stringify(s)).join('\\n'))\n\n if (args.hostedClient) {\n // Ship BOTH streams so the run is fully visible in the dashboard: the\n // eval-run event (→ run list + baseline/winner/gate/lift) AND the trace\n // spans (→ per-candidate drill-down). Best-effort: an offline collector is\n // logged, never thrown — the durable artifact above is the source of truth.\n try {\n await args.hostedClient.ingestEvalRun(buildEvalRunEvent(args, record))\n } catch (err) {\n const msg = err instanceof Error ? err.message : String(err)\n // eslint-disable-next-line no-console -- intentional: hosted ingest is best-effort\n console.warn(`[agent-eval] hosted eval-run ingest failed (continuing): ${msg}`)\n }\n try {\n await args.hostedClient.ingestTraces(spans)\n } catch (err) {\n const msg = err instanceof Error ? err.message : String(err)\n // eslint-disable-next-line no-console -- intentional: hosted span ship is best-effort\n console.warn(`[agent-eval] provenance span ingest failed (continuing): ${msg}`)\n }\n }\n\n return { record, spans, recordPath, spansPath }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;AAcA,SAAS,gBAAgB;AACzB,SAAS,qBAAqB;AAC9B,SAAS,cAAc;AACvB,SAAS,YAAY;AAiCd,SAAS,WACd,SACkB;AAClB,MAAI,QAAQ,KAAK,aAAa,QAAQ;AACpC,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,QAAQ,qBAAqB,QAAQ,KAAK,QAAQ;AAAA,IACpD;AAAA,EACF;AAEA,QAAM,SAAS,QAAQ,UAAU,CAAC,QAAQ,IAAI;AAC9C,QAAM,SAAS,QAAQ,UAAU,QAAQ,QAAQ,OAAO,aAAa,MAAM,GAAG,EAAE,CAAC;AACjF,QAAM,QACJ,QAAQ,SAAS,kBAAkB,QAAQ,OAAO,aAAa,MAAM,GAAG,CAAC,CAAC;AAE5E,QAAM,OAAO,aAAa,QAAQ,QAAQ,QAAQ,MAAM,QAAQ,YAAY;AAC5E,QAAM,WAAW,KAAK,OAAO,GAAG,gBAAgB,KAAK,IAAI,CAAC,KAAK;AAC/D,gBAAc,UAAU,IAAI;AAE5B,MAAI,QAAQ;AACV,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,QAAQ,0DAA0D,QAAQ,OAAO,IAAI,QAAQ,MAAM,WAAW,MAAM,aAAa,QAAQ;AAAA,IAC3I;AAAA,EACF;AAEA,QAAM,SAAS,QAAQ,UAAU;AACjC,QAAM,SAAS,OAAO;AAAA,IACpB;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG,QAAQ,OAAO,IAAI,QAAQ,MAAM;AAAA,IACpC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACD,MAAI,OAAO,WAAW,GAAG;AACvB,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,QAAQ,6BAA6B,OAAO,MAAM,MAAM,OAAO,OAAO,MAAM,GAAG,GAAG,CAAC;AAAA,IACrF;AAAA,EACF;AACA,QAAM,QAAQ,OAAO,OAAO,KAAK;AACjC,SAAO,EAAE,QAAQ,MAAM,OAAO,QAAQ,OAAO,QAAQ,YAAY;AACnE;AAEA,SAAS,aACP,QACA,MACA,MACQ;AACR,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,kDAAkD;AAC7D,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,mBAAmB,OAAO,YAAY,IAAI;AACrD,QAAM,KAAK,aAAa,OAAO,IAAI,EAAE;AACrC,QAAM,KAAK,iBAAiB,KAAK,MAAM,OAAO,aAAa,GAAI,CAAC,GAAG;AACnE,QAAM;AAAA,IACJ,uBAAuB,OAAO,WAAW,aAAa,YAAY,OAAO,WAAW,WAAW,aAAa,OAAO,WAAW,YAAY,YAAY,OAAO,WAAW,WAAW;AAAA,EACrL;AACA,QAAM,KAAK,qBAAqB,OAAO,WAAW,aAAa,QAAQ,CAAC,CAAC,EAAE;AAC3E,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,uBAAuB,KAAK,QAAQ,IAAI;AACnD,QAAM,KAAK,EAAE;AACb,aAAW,UAAU,KAAK,QAAS,OAAM,KAAK,KAAK,MAAM,EAAE;AAC3D,MAAI,KAAK,UAAU,OAAW,OAAM,KAAK,YAAY,KAAK,MAAM,QAAQ,CAAC,CAAC,EAAE;AAC5E,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,wBAAwB;AACnC,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,4BAA4B;AACvC,QAAM,KAAK,eAAe;AAC1B,aAAW,KAAK,KAAK,mBAAmB;AACtC,UAAM,SACJ,OAAO,EAAE,WAAW,WAChB,KAAK,UAAU,EAAE,MAAM,EAAE,MAAM,GAAG,EAAE,IACpC,OAAO,EAAE,MAAM,EAAE,MAAM,GAAG,EAAE;AAClC,UAAM,KAAK,KAAK,EAAE,IAAI,MAAM,EAAE,SAAS,WAAM,QAAG,MAAM,MAAM,IAAI;AAAA,EAClE;AACA,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,sBAAsB;AACjC,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,SAAS;AACpB,QAAM,KAAK,KAAK,MAAM,GAAG,GAAI,CAAC;AAC9B,QAAM,KAAK,KAAK;AAChB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,yBAAyB;AACpC,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,6BAA6B;AACxC,QAAM,KAAK,mBAAmB;AAC9B,aAAW,CAAC,MAAM,GAAG,KAAK,OAAO,QAAQ,OAAO,WAAW,OAAO,GAAG;AACnE,UAAM;AAAA,MACJ,KAAK,IAAI,MAAM,IAAI,KAAK,QAAQ,CAAC,CAAC,OAAO,IAAI,KAAK,CAAC,EAAE,QAAQ,CAAC,CAAC,KAAK,IAAI,KAAK,CAAC,EAAE,QAAQ,CAAC,CAAC,OAAO,IAAI,CAAC;AAAA,IACxG;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEA,SAAS,cAAc,MAAoE;AACzF,MAAI;AACF,UAAM,SAAS,SAAS,MAAM,KAAK,IAAI,QAAQ,EAAE,KAAK,GAAG,CAAC,IAAI;AAAA,MAC5D,KAAK,EAAE,GAAG,QAAQ,KAAK,UAAU,QAAQ,IAAI,oBAAoB,QAAQ,IAAI,YAAY,GAAG;AAAA,MAC5F,OAAO,CAAC,UAAU,QAAQ,MAAM;AAAA,IAClC,CAAC,EAAE,SAAS,MAAM;AAClB,WAAO,EAAE,QAAQ,QAAQ,IAAI,QAAQ,EAAE;AAAA,EACzC,SAAS,KAAK;AACZ,UAAM,IAAI;AACV,WAAO;AAAA,MACL,QAAQ,EAAE,QAAQ,SAAS,MAAM,KAAK;AAAA,MACtC,QAAQ,EAAE,QAAQ,SAAS,MAAM,KAAK;AAAA,MACtC,QAAQ,EAAE,UAAU;AAAA,IACtB;AAAA,EACF;AACF;AAEA,SAAS,SAAS,KAAqB;AACrC,MAAI,wBAAwB,KAAK,GAAG,EAAG,QAAO;AAC9C,SAAO,IAAI,IAAI,QAAQ,MAAM,KAAK,CAAC;AACrC;;;ACrJO,SAAS,mBACd,MAC8B;AAC9B,SAAO;AAAA,IACL,MAAM,gBAAgB,KAAK,QAAQ,IAAI;AAAA,IACvC,MAAM,QAAQ,EAAE,gBAAgB,UAAU,gBAAgB,OAAO,GAAG;AAClE,aAAO,KAAK,QAAQ,OAAO;AAAA,QACzB,UAAU,SAAS,SAAS,IAAI,WAAY,KAAK,YAAY,CAAC;AAAA,QAC9D;AAAA,QACA;AAAA,QACA;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AACF;;;ACQA,IAAM,oBACJ;AAIF,IAAM,iBACJ;AAqDK,SAAS,WAAW,MAA4C;AACrE,QAAM,YAAY,KAAK,aAAa;AACpC,QAAM,iBAAiB,KAAK,kBAAkB;AAC9C,QAAM,oBAAoB,KAAK,qBAAqB;AACpD,MAAI,kBAAkB,oBAAoB,GAAG;AAC3C,UAAM,IAAI,MAAM,2EAA2E;AAAA,EAC7F;AACA,SAAO;AAAA,IACL,MAAM;AAAA,IACN,MAAM,QAAQ,KAAmD;AAC/D,YAAM,SACJ,OAAO,IAAI,mBAAmB,WAC1B,IAAI,iBACJ,KAAK,UAAU,IAAI,cAAc;AAIvC,YAAM,cAAc,KAAK;AACzB,YAAM,mBACJ,aAAa,qBAAqB,SAC9B,YAAY,iBAAiB,WAAW,IACtC,kBAAkB,MAAM,IACxB,YAAY,mBACd;AACN,YAAM,WAAW,aAAa;AAC9B,YAAM,MAA2B,CAAC;AAClC,YAAM,OAAO,oBAAI,IAAY;AAC7B,YAAM,SAAS,CAAC,SAAkB,OAAe,cAA4B;AAC3E,cAAM,OAAO,OAAO,YAAY,WAAW,QAAQ,KAAK,IAAI;AAC5D,YAAI,CAAC,QAAQ,SAAS,UAAU,KAAK,IAAI,IAAI,EAAG;AAChD,YAAI,oBAAoB,CAAC,0BAA0B,MAAM,gBAAgB,EAAG;AAC5E,YAAI,aAAa,UAAa,mBAAmB,QAAQ,IAAI,IAAI,WAAW,EAAG;AAC/E,aAAK,IAAI,IAAI;AAGb,YAAI,KAAK,EAAE,SAAS,MAAM,OAAO,UAAU,CAAC;AAAA,MAC9C;AAMA,YAAM,iBAAiB,iBAAkB,IAAI,iBAAiB,CAAC,IAAK,CAAC,GAClE,OAAO,CAAC,MAA2C,OAAO,EAAE,YAAY,QAAQ,EAChF,KAAK,CAAC,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS,EACxC,MAAM,GAAG,iBAAiB;AAC7B,UAAI,cAAc,SAAS,GAAG;AAC5B,cAAM,gBAAgB,mBAAmB;AAAA,UACvC,QAAQ,KAAK;AAAA,UACb,SAAS;AAAA,UACT;AAAA,QACF,CAAC;AACD,cAAM,gBAAgB,MAAM;AAAA,UAC1B;AAAA,YACE,OAAO,KAAK;AAAA,YACZ,UAAU;AAAA,cACR,EAAE,MAAM,UAAU,SAAS,eAAe;AAAA,cAC1C,EAAE,MAAM,QAAQ,SAAS,cAAc;AAAA,YACzC;AAAA,YACA,UAAU;AAAA,YACV,aAAa,KAAK,eAAe;AAAA,YACjC,WAAW,KAAK,aAAa;AAAA,UAC/B;AAAA,UACA,KAAK;AAAA,QACP;AACA,cAAM,SAAS,wBAAwB,cAAc,SAAS,CAAC,EAAE,CAAC;AAClE,YAAI,QAAQ;AACV;AAAA,YACE,OAAO;AAAA,YACP,OAAO,SAAS;AAAA,YAChB,OAAO,aACL,YAAY,cAAc,MAAM,+BAA+B,cAC5D,IAAI,CAAC,MAAM,EAAE,UAAU,EACvB,KAAK,GAAG,CAAC;AAAA,UAChB;AAAA,QACF;AAAA,MACF;AAGA,YAAM,eAAe,KAAK,IAAI,GAAG,IAAI,iBAAiB,IAAI,MAAM;AAChE,UAAI,eAAe,GAAG;AACpB,cAAM,EAAE,KAAK,QAAQ,OAAO,IAAI,cAAc,KAAK,WAAW,KAAK,MAAM;AACzE,cAAM,aAAa,sBAAsB;AAAA,UACvC;AAAA,UACA,eAAe;AAAA,UACf,WAAW;AAAA,UACX,cAAc;AAAA,UACd,YAAY;AAAA,UACZ,oBAAoB,KAAK;AAAA,QAC3B,CAAC;AACD,cAAM,SAAS,MAAM;AAAA,UACnB;AAAA,YACE,OAAO,KAAK;AAAA,YACZ,UAAU;AAAA,cACR,EAAE,MAAM,UAAU,SAAS,kBAAkB;AAAA,cAC7C,EAAE,MAAM,QAAQ,SAAS,WAAW;AAAA,YACtC;AAAA,YACA,UAAU;AAAA,YACV,aAAa,KAAK,eAAe;AAAA,YACjC,WAAW,KAAK,aAAa;AAAA,UAC/B;AAAA,UACA,KAAK;AAAA,QACP;AACA,mBAAW,YAAY,wBAAwB,OAAO,SAAS,YAAY,GAAG;AAC5E,iBAAO,SAAS,SAAS,SAAS,OAAO,SAAS,SAAS;AAAA,QAC7D;AAAA,MACF;AAEA,aAAO,IAAI,MAAM,GAAG,IAAI,cAAc;AAAA,IACxC;AAAA,EACF;AACF;AAKA,SAAS,mBAAmB,MAIjB;AACT,QAAM,QAAkB;AAAA,IACtB,mBAAmB,KAAK,QAAQ,MAAM,iBAAiB,KAAK,MAAM;AAAA,IAClE;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,OAAK,QAAQ,QAAQ,CAAC,GAAG,MAAM;AAC7B,UAAM,MAAM,OAAO,aAAa,KAAK,CAAC;AACtC,UAAM,OAAO,OAAO,QAAQ,EAAE,UAAU,EACrC,KAAK,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,EAC1B,MAAM,GAAG,KAAK,SAAS,EACvB,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,GAAG,EAAE,KAAK,MAAM,QAAQ,CAAC,CAAC,GAAG;AACrD,UAAM;AAAA,MACJ,eAAe,GAAG,UAAU,EAAE,UAAU,QAAQ,CAAC,CAAC,mBAChD,KAAK,KAAK,IAAI,KAAK,KACrB;AAAA,MACA;AAAA,MACA,EAAE;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAAA,EACF,CAAC;AACD,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAIO,SAAS,kBAAkB,MAAwB;AACxD,QAAM,MAAgB,CAAC;AACvB,aAAW,QAAQ,KAAK,MAAM,IAAI,GAAG;AACnC,UAAM,QAAQ,kBAAkB,KAAK,IAAI;AACzC,QAAI,MAAO,KAAI,KAAK,MAAM,CAAC,CAAE;AAAA,EAC/B;AACA,SAAO;AACT;AAKO,SAAS,mBAAmB,UAAkB,WAA2B;AAC9E,QAAM,OAAO,CAAC,MACZ,EACG,MAAM,mBAAmB,EACzB,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,EACnB,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC;AAC/B,QAAM,IAAI,IAAI,IAAI,KAAK,QAAQ,CAAC;AAChC,QAAM,IAAI,IAAI,IAAI,KAAK,SAAS,CAAC;AACjC,MAAI,QAAQ;AACZ,aAAW,KAAK,EAAG,KAAI,CAAC,EAAE,IAAI,CAAC,EAAG;AAClC,aAAW,KAAK,EAAG,KAAI,CAAC,EAAE,IAAI,CAAC,EAAG;AAClC,SAAO;AACT;AAEA,SAAS,0BAA0B,WAAmB,UAAsC;AAC1F,MAAI,SAAS,WAAW,EAAG,QAAO;AAClC,QAAM,OAAO,IAAI,IAAI,kBAAkB,SAAS,CAAC;AACjD,aAAW,WAAW,UAAU;AAC9B,QAAI,CAAC,KAAK,IAAI,OAAO,EAAG,QAAO;AAAA,EACjC;AACA,SAAO;AACT;AAKA,SAAS,cACP,KACA,WACA,YAC6D;AAC7D,QAAM,OAAO,IAAI,QAAQ,GAAG,EAAE;AAC9B,MAAI,CAAC,QAAQ,KAAK,WAAW,WAAW,GAAG;AACzC,WAAO,EAAE,KAAK,CAAC,GAAG,QAAQ,CAAC,GAAG,QAAQ,WAAW;AAAA,EACnD;AACA,QAAM,OAAO,CAAC,GAAG,KAAK,UAAU,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS,EAAE,CAAC;AAC7E,MAAI,CAAC,KAAM,QAAO,EAAE,KAAK,CAAC,GAAG,QAAQ,CAAC,GAAG,QAAQ,WAAW;AAE5D,QAAM,UAAU,CAAC,GAAG,KAAK,SAAS,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS;AAC5E,QAAM,UAAU,CAAC,OAA8D;AAAA,IAC7E,IAAI,EAAE;AAAA,IACN,OAAO,EAAE;AAAA,EACX;AACA,QAAM,MAAM,QAAQ,MAAM,GAAG,SAAS,EAAE,IAAI,OAAO;AACnD,QAAM,SAAS,QAAQ,MAAM,CAAC,SAAS,EAAE,QAAQ,EAAE,IAAI,OAAO;AAE9D,QAAM,UAAU,OAAO,QAAQ,KAAK,UAAU,EAC3C,KAAK,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,EAC1B,MAAM,GAAG,CAAC,EACV,IAAI,CAAC,CAAC,KAAK,KAAK,MAAM,GAAG,GAAG,KAAK,MAAM,QAAQ,CAAC,CAAC,GAAG;AACvD,QAAM,SACJ,QAAQ,SAAS,IAAI,GAAG,UAAU,+BAA0B,QAAQ,KAAK,IAAI,CAAC,KAAK;AAErF,SAAO,EAAE,KAAK,QAAQ,OAAO;AAC/B;;;ACzTO,SAAS,eACX,OACyB;AAC5B,MAAI,MAAM,WAAW,GAAG;AACtB,UAAM,IAAI,MAAM,wCAAwC;AAAA,EAC1D;AACA,SAAO;AAAA,IACL,MAAM,YAAY,MAAM,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,GAAG,CAAC;AAAA,IACpD,MAAM,OAAO,KAA6D;AACxE,YAAM,UAAwE,CAAC;AAC/E,iBAAW,QAAQ,OAAO;AACxB,cAAM,MAAM,MAAM,KAAK,OAAO,GAAG;AACjC,gBAAQ,KAAK,EAAE,MAAM,IAAI,CAAC;AAAA,MAC5B;AAQA,YAAM,YAAY,QAAQ,IAAI,CAAC,MAAM,EAAE,IAAI,QAAQ;AACnD,YAAM,UAAwB,UAAU,MAAM,CAAC,MAAM,MAAM,MAAM,IAC7D,SACA,UAAU,SAAS,cAAc,IAC/B,iBACA,UAAU,SAAS,eAAe,IAChC,kBACA,UAAU,SAAS,MAAM,IACvB,SACA;AAEV,YAAM,eAAe,QAAQ;AAAA,QAAQ,CAAC,MACpC,EAAE,IAAI,kBAAkB,SAAS,IAC7B,EAAE,IAAI,oBACN,CAAC,EAAE,MAAM,EAAE,KAAK,MAAM,QAAQ,EAAE,IAAI,aAAa,QAAQ,QAAQ,EAAE,IAAI,CAAC;AAAA,MAC9E;AAEA,YAAM,UAAU,QAAQ;AAAA,QAAQ,CAAC,MAC/B,EAAE,IAAI,QAAQ,IAAI,CAAC,WAAW,IAAI,EAAE,KAAK,IAAI,KAAK,MAAM,EAAE;AAAA,MAC5D;AAEA,aAAO;AAAA,QACL,UAAU;AAAA,QACV;AAAA,QACA,mBAAmB;AAAA,QACnB,OAAO,QAAQ,CAAC,GAAG,IAAI;AAAA,MACzB;AAAA,IACF;AAAA,EACF;AACF;;;ACpBO,SAAS,sBACd,SAC4B;AAC5B,QAAM,iBAAiB,QAAQ,kBAAkB;AACjD,QAAM,gBAAgB,QAAQ,8BAA8B;AAE5D,SAAO;AAAA,IACL,MAAM;AAAA,IACN,MAAM,OAAO,KAA6D;AACxE,YAAM,UAAoB,CAAC;AAC3B,YAAM,eAA0E,CAAC;AAKjF,YAAM,oBAAoB;AAAA,QACxB,IAAI;AAAA,QACJ,IAAI,uBAAuB,IAAI;AAAA,QAC/B,QAAQ;AAAA,MACV;AACA,YAAM,qBAAqB;AAAA,QACzB,IAAI;AAAA,QACJ,IAAI;AAAA,QACJ,QAAQ;AAAA,MACV;AACA,YAAM,QAAQ,qBAAqB;AACnC,YAAM,cAAc,SAAS;AAC7B,mBAAa,KAAK;AAAA,QAChB,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,QAAQ,EAAE,mBAAmB,oBAAoB,OAAO,eAAe;AAAA,MACzE,CAAC;AACD,UAAI,CAAC,aAAa;AAChB,gBAAQ,KAAK,iBAAiB,MAAM,QAAQ,CAAC,CAAC,gBAAgB,cAAc,EAAE;AAAA,MAChF;AAGA,YAAM,aACJ,QAAQ,cAAc,UACtB,IAAI,KAAK,YAAY,IAAI,KAAK,YAAY,QAAQ;AACpD,mBAAa,KAAK;AAAA,QAChB,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,QAAQ;AAAA,UACN,cAAc,IAAI,KAAK;AAAA,UACvB,aAAa,IAAI,KAAK;AAAA,UACtB,WAAW,QAAQ;AAAA,QACrB;AAAA,MACF,CAAC;AACD,UAAI,CAAC,YAAY;AACf,gBAAQ;AAAA,UACN,UAAU,IAAI,KAAK,YAAY,IAAI,KAAK,UAAU,QAAQ,CAAC,CAAC,aAAa,QAAQ,SAAS;AAAA,QAC5F;AAAA,MACF;AAGA,YAAM,kBAAkB,QAAQ,iBAC5B,aAAa,IAAI,oBAAoB,QAAQ,cAAc,IAC3D,EAAE,QAAQ,MAAM,UAAU,CAAC,EAAE;AACjC,mBAAa,KAAK;AAAA,QAChB,MAAM;AAAA,QACN,QAAQ,gBAAgB;AAAA,QACxB,QAAQ;AAAA,UACN,UAAU,gBAAgB,SAAS;AAAA,UACnC,QAAQ,gBAAgB,SAAS,MAAM,GAAG,CAAC;AAAA,QAC7C;AAAA,MACF,CAAC;AACD,UAAI,CAAC,gBAAgB,QAAQ;AAC3B,gBAAQ,KAAK,0BAA0B,gBAAgB,SAAS,MAAM,YAAY;AAAA,MACpF;AAGA,UAAI,sBAAkD;AACtD,UAAI,QAAQ,cAAc,QAAQ,WAAW,UAAU,IAAI;AACzD,8BAAsB,oBAAoB,EAAE,MAAM,QAAQ,WAAW,CAAC;AAAA,MACxE;AAIA,YAAM,kBAAkB;AACxB,YAAM,kBAAkB,qBAAqB,YAAY,CAAC,GAAG;AAAA,QAC3D,CAAC,MAAM,EAAE,YAAY;AAAA,MACvB;AACA,YAAM,oBACJ,CAAC,uBACD,CAAC,iBACA,eAAe,WAAW,KAAK,oBAAoB,YAAY;AAClE,mBAAa,KAAK;AAAA,QAChB,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,QAAQ,EAAE,QAAQ,qBAAqB,oBAAoB,eAAe,OAAO;AAAA,MACnF,CAAC;AACD,UAAI,CAAC,mBAAmB;AACtB,gBAAQ;AAAA,UACN,mCAAmC,eAAe,MAAM,sCAAsC,oBAAqB,OAAO;AAAA,QAC5H;AAAA,MACF;AAGA,UAAI,eAAoC;AACxC,UAAI,QAAQ,cAAc,QAAQ,WAAW,UAAU,IAAI;AACzD,uBAAe,YAAY,QAAQ,YAAY,CAAC,CAAC;AAAA,MACnD;AAEA,YAAM,eAAe,cAAc,UAAU,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,aAAa,OAAO;AACrF,YAAM,aAAa,YAAY,WAAW;AAC1C,mBAAa,KAAK;AAAA,QAChB,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,QAAQ,EAAE,aAAa,cAAc,OAAO,UAAU,GAAG,aAAa,YAAY,OAAO;AAAA,MAC3F,CAAC;AACD,UAAI,CAAC,YAAY;AACf,gBAAQ,KAAK,wBAAwB,YAAY,MAAM,EAAE;AAAA,MAC3D;AAGA,YAAM,YAAY,aAAa,MAAM,CAAC,MAAM,EAAE,MAAM;AACpD,YAAM,WAAW,YAAY,SAAS;AAEtC,aAAO;AAAA,QACL;AAAA,QACA,SAAS,QAAQ,SAAS,IAAI,UAAU,CAAC,kBAAkB;AAAA,QAC3D,mBAAmB;AAAA,QACnB;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,cACP,WACA,mBACA,WACQ;AACR,MAAI,CAAC,aAAa,UAAU,SAAS,EAAG,QAAO;AAC/C,QAAM,cAAc,IAAI,IAAI,UAAU,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;AACtD,QAAM,aAAuB,CAAC;AAC9B,aAAW,CAAC,QAAQ,MAAM,KAAK,mBAAmB;AAChD,UAAM,aAAa,OAAO,MAAM,GAAG,EAAE,CAAC,KAAK;AAC3C,QAAI,CAAC,YAAY,IAAI,UAAU,EAAG;AAClC,UAAM,iBAAiB,OAAO,OAAO,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS;AACnE,QAAI,eAAe,WAAW,EAAG;AACjC,eAAW,KAAK,eAAe,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,eAAe,MAAM;AAAA,EACnF;AACA,MAAI,WAAW,WAAW,EAAG,QAAO;AACpC,SAAO,WAAW,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,WAAW;AAC5D;AAEA,SAAS,aACP,WACA,SAC8E;AAC9E,QAAM,WAA0D,CAAC;AACjE,aAAW,CAAC,SAAS,QAAQ,KAAK,WAAW;AAC3C,UAAM,OAAO,YAAY,QAAQ;AACjC,QAAI,SAAS,OAAW;AACxB,eAAW,UAAU,SAAS;AAC5B,YAAM,UAAU,mBAAmB,MAAM,CAAC,GAAG,MAAM;AACnD,UAAI,CAAC,QAAQ,QAAQ;AACnB,iBAAS,KAAK,EAAE,YAAY,OAAO,IAAI,QAAQ,QAAQ,UAAU,wBAAwB,CAAC;AAAA,MAC5F;AAAA,IACF;AAAA,EACF;AACA,SAAO,EAAE,QAAQ,SAAS,WAAW,GAAG,SAAS;AACnD;AAEA,SAAS,YAAY,UAAuC;AAC1D,MAAI,OAAO,aAAa,SAAU,QAAO;AACzC,MAAI,YAAY,OAAO,aAAa,UAAU;AAC5C,UAAM,MAAM;AACZ,QAAI,OAAO,IAAI,SAAS,SAAU,QAAO,IAAI;AAC7C,QAAI,OAAO,IAAI,WAAW,SAAU,QAAO,IAAI;AAC/C,QAAI,OAAO,IAAI,YAAY,SAAU,QAAO,IAAI;AAAA,EAClD;AACA,SAAO;AACT;;;AC5MO,SAAS,YACd,SAC4B;AAC5B,QAAM,iBAAiB,QAAQ,kBAAkB;AACjD,SAAO;AAAA,IACL,MAAM;AAAA,IACN,MAAM,OAAO,KAA6D;AACxE,YAAM,cAAc,IAAI,IAAI,QAAQ,UAAU,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;AAG9D,YAAM,WAAW,iBAAiB,IAAI,uBAAuB,IAAI,aAAa,WAAW;AACzF,YAAM,YAAY,iBAAiB,IAAI,aAAa,WAAW;AAC/D,YAAM,QAAQ,YAAY;AAC1B,YAAM,SAAS,SAAS;AACxB,aAAO;AAAA,QACL,UAAU,SAAS,SAAS;AAAA,QAC5B,SAAS,SACL,CAAC,kBAAkB,MAAM,QAAQ,CAAC,CAAC,WAAM,cAAc,EAAE,IACzD,CAAC,kBAAkB,MAAM,QAAQ,CAAC,CAAC,MAAM,cAAc,EAAE;AAAA,QAC7D,mBAAmB;AAAA,UACjB,EAAE,MAAM,eAAe,QAAQ,QAAQ,EAAE,UAAU,WAAW,OAAO,eAAe,EAAE;AAAA,QACxF;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,iBACP,mBACA,aACQ;AACR,QAAM,aAAuB,CAAC;AAC9B,aAAW,CAAC,QAAQ,MAAM,KAAK,mBAAmB;AAChD,UAAM,aAAa,OAAO,MAAM,GAAG,EAAE,CAAC,KAAK;AAC3C,QAAI,CAAC,YAAY,IAAI,UAAU,EAAG;AAClC,UAAM,OAAO,OAAO,OAAO,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS;AACzD,QAAI,KAAK,SAAS,EAAG,YAAW,KAAK,KAAK,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM;AAAA,EACpF;AACA,SAAO,WAAW,WAAW,IAAI,IAAI,WAAW,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,WAAW;AAC1F;;;ACwGO,SAAS,oBACd,OAC4B;AAC5B,SACE,OAAO,UAAU,YACjB,UAAU,QACV,aAAa,SACb,WAAW,SACX,eAAe;AAEnB;AAoNA,IAAM,mBAA+C;AAAA,EACnD,YAAY;AAAA,EACZ,mBAAmB;AAAA,EACnB,eAAe;AACjB;AAGO,SAAS,eAAe,OAAuC;AACpE,SAAO,iBAAiB,SAAS,YAAY;AAC/C;;;ACxXO,SAAS,sBACd,UACQ;AACR,QAAM,aAAuB,CAAC;AAC9B,aAAW,QAAQ,SAAS,OAAO;AACjC,UAAM,iBAAiB,OAAO,OAAO,KAAK,WAAW,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS;AAC7E,QAAI,eAAe,SAAS,GAAG;AAC7B,iBAAW,KAAK,eAAe,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,eAAe,MAAM;AAAA,IACnF;AAAA,EACF;AACA,SAAO,WAAW,WAAW,IAAI,IAAI,WAAW,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,WAAW;AAC1F;AAWO,SAAS,kBACd,UACmB;AACnB,QAAM,UAAkC,CAAC;AACzC,QAAM,YAAoC,CAAC;AAC3C,QAAM,aAAa,oBAAI,IAAsB;AAC7C,aAAW,QAAQ,SAAS,OAAO;AACjC,UAAM,cAAc,OAAO,OAAO,KAAK,WAAW;AAClD,QAAI,YAAY,WAAW,EAAG;AAC9B,UAAM,gBAAgB,YAAY,OAAO,CAAC,GAAG,MAAM,IAAI,EAAE,WAAW,CAAC,IAAI,YAAY;AACrF,UAAM,MAAM,WAAW,IAAI,KAAK,UAAU,KAAK,CAAC;AAChD,QAAI,KAAK,aAAa;AACtB,eAAW,IAAI,KAAK,YAAY,GAAG;AACnC,eAAW,SAAS,aAAa;AAC/B,iBAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,MAAM,UAAU,GAAG;AAC3D,gBAAQ,GAAG,KAAK,QAAQ,GAAG,KAAK,KAAK;AACrC,kBAAU,GAAG,KAAK,UAAU,GAAG,KAAK,KAAK;AAAA,MAC3C;AAAA,IACF;AAAA,EACF;AACA,QAAM,aAAqC,CAAC;AAC5C,aAAW,OAAO,OAAO,KAAK,OAAO,GAAG;AACtC,UAAM,QAAQ,UAAU,GAAG,KAAK;AAChC,eAAW,GAAG,IAAI,QAAQ,KAAK,QAAQ,GAAG,KAAK,KAAK,QAAQ;AAAA,EAC9D;AACA,QAAM,YAAY,CAAC,GAAG,WAAW,QAAQ,CAAC,EAAE,IAAI,CAAC,CAAC,YAAY,KAAK,OAAO;AAAA,IACxE;AAAA,IACA,WAAW,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,MAAM;AAAA,EACtD,EAAE;AACF,SAAO,EAAE,YAAY,UAAU;AACjC;;;AC9CA,SAAS,kBAAkB;AAqE3B,eAAsB,gBACpB,MACsD;AACtD,QAAM,cAAc,KAAK,eAAe;AAGxC,QAAM,mBAAmB,MAAM,YAAkC;AAAA,IAC/D,GAAG;AAAA,IACH,UAAU,CAAC,UAAU,QAAQ,KAAK,oBAAoB,KAAK,iBAAiB,UAAU,GAAG;AAAA,IACzF,QAAQ,GAAG,KAAK,MAAM;AAAA,EACxB,CAAC;AAED,QAAM,cAA0E,CAAC;AACjF,QAAM,UAA8B,CAAC;AACrC,MAAI,kBAAoC,CAAC,KAAK,eAAe;AAC7D,MAAI,gBAAgB,KAAK;AACzB,MAAI,oBAAoB,YAAY,KAAK,eAAe;AACxD,MAAI,kBAAkB,sBAAsB,gBAAgB;AAC5D,MAAI;AACJ,MAAI;AAMJ,QAAM,SAAyB;AAAA,IAC7B,eAAe,KAAK,iBAAiB,mBAAmB,kBAAkB,EAAE;AAAA,EAC9E;AAEA,WAAS,MAAM,GAAG,MAAM,KAAK,gBAAgB,OAAO;AAElD,QAAI,KAAK,OAAO,SAAS,EAAE,QAAQ,CAAC,EAAE,KAAM;AAK5C,UAAM,gBAAgB,sBAAsB,MAAM;AAClD,UAAM,WAAW,MAAM,KAAK,OAAO,QAAQ;AAAA,MACzC,gBAAgB,gBAAgB,CAAC,KAAK,KAAK;AAAA,MAC3C;AAAA,MACA,UAAU,CAAC;AAAA,MACX,gBAAgB,KAAK;AAAA,MACrB,YAAY;AAAA,MACZ,QAAQ,IAAI,gBAAgB,EAAE;AAAA,MAC9B,QAAQ,KAAK;AAAA,MACb,SAAS,KAAK,gBAAgB,KAAK,iBAAiB,QAAQ,KAAK,eAAe;AAAA,MAChF,qBAAqB,KAAK;AAAA,MAC1B;AAAA,IACF,CAAC;AAKD,UAAM,aAAkC,SAAS;AAAA,MAAI,CAAC,MACpD,oBAAoB,CAAC,IAAI,IAAI,EAAE,SAAS,GAAG,OAAO,IAAI,WAAW,GAAG;AAAA,IACtE;AAGA,UAAM,iBAOD,CAAC;AACN,aAAS,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;AAC1C,YAAM,EAAE,SAAS,OAAO,UAAU,IAAI,WAAW,CAAC;AAClD,YAAM,OAAO,YAAY,OAAO;AAChC,YAAM,WAAW,MAAM,YAAkC;AAAA,QACvD,GAAG;AAAA,QACH,UAAU,CAAC,UAAU,QAAQ,KAAK,oBAAoB,SAAS,UAAU,GAAG;AAAA,QAC5E,QAAQ,GAAG,KAAK,MAAM,QAAQ,GAAG,cAAc,CAAC;AAAA,MAClD,CAAC;AACD,YAAM,YAAY,sBAAsB,QAAQ;AAChD,qBAAe,KAAK,EAAE,aAAa,MAAM,SAAS,OAAO,WAAW,UAAU,UAAU,CAAC;AAGzF,aAAO;AAAA,QACL,eAAe,SAAS,MAAM,UAAU,KAAK,SAAS,QAAW,aAAa,MAAS;AAAA,MACzF;AAAA,IACF;AAGA,mBAAe,KAAK,CAAC,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS;AACvD,UAAM,WAAW,eAAe,MAAM,GAAG,WAAW;AACpD,sBAAkB,SAAS,IAAI,CAAC,MAAM,EAAE,OAAO;AAC/C,UAAM,MAAM,eAAe,CAAC;AAC5B,QAAI,OAAO,IAAI,YAAY,iBAAiB;AAC1C,sBAAgB,IAAI;AACpB,0BAAoB,IAAI;AACxB,wBAAkB,IAAI;AACtB,oBAAc,IAAI,SAAS;AAC3B,wBAAkB,IAAI,aAAa;AAAA,IACrC;AAEA,UAAM,SAA2B;AAAA,MAC/B,iBAAiB;AAAA,MACjB,YAAY,eAAe,IAAI,CAAC,MAAM;AACpC,cAAM,YAAY,kBAAkB,EAAE,QAAQ;AAC9C,cAAM,YAAoD;AAAA,UACxD,aAAa,EAAE;AAAA,UACf,WAAW,EAAE;AAAA,UACb,MAAM,CAAC,EAAE,WAAW,EAAE,SAAS;AAAA,UAC/B,YAAY,UAAU;AAAA,UACtB,WAAW,UAAU;AAAA,QACvB;AACA,YAAI,EAAE,MAAO,WAAU,QAAQ,EAAE;AACjC,YAAI,EAAE,UAAW,WAAU,YAAY,EAAE;AACzC,eAAO;AAAA,MACT,CAAC;AAAA,MACD,UAAU,SAAS,IAAI,CAAC,MAAM,EAAE,WAAW;AAAA,IAC7C;AACA,YAAQ,KAAK,MAAM;AACnB,gBAAY,KAAK;AAAA,MACf;AAAA,MACA,UAAU,eAAe,IAAI,CAAC,OAAO;AAAA,QACnC,aAAa,EAAE;AAAA,QACf,SAAS,EAAE;AAAA,QACX,UAAU,EAAE;AAAA,MACd,EAAE;AAAA,IACJ,CAAC;AAAA,EACH;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,gBAAgB,sBAAsB,MAAM;AAAA,EAC9C;AACF;AAIA,SAAS,eACP,SACA,MACA,UACA,YACA,OACA,WACc;AACd,QAAM,aAAqC,CAAC;AAC5C,aAAW,EAAE,YAAY,UAAU,KAAK,kBAAkB,QAAQ,EAAE,WAAW;AAC7E,eAAW,UAAU,IAAI;AAAA,EAC3B;AACA,QAAM,SAAuB;AAAA,IAC3B;AAAA,IACA,aAAa;AAAA,IACb;AAAA,IACA,WAAW,sBAAsB,QAAQ;AAAA,IACzC;AAAA,EACF;AACA,MAAI,MAAO,QAAO,QAAQ;AAC1B,MAAI,UAAW,QAAO,YAAY;AAClC,SAAO;AACT;AAUA,SAAS,sBAAsB,QAAwC;AACrE,MAAI,OAAO,UAAU,EAAG,QAAO,CAAC,GAAG,MAAM;AACzC,QAAM,MAAM,oBAAI,IAAY;AAC5B,aAAW,KAAK,OAAQ,YAAW,MAAM,OAAO,KAAK,EAAE,UAAU,EAAG,KAAI,IAAI,EAAE;AAC9E,MAAI,IAAI,SAAS,EAAG,QAAO,CAAC,GAAG,MAAM;AACrC,QAAM,QAAgC,CAAC;AACvC,aAAW,MAAM,KAAK;AACpB,QAAI,MAAM,OAAO;AACjB,eAAW,KAAK,QAAQ;AACtB,YAAM,IAAI,EAAE,WAAW,EAAE;AACzB,UAAI,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,KAAK,IAAI,IAAK,OAAM;AAAA,IACpE;AACA,UAAM,EAAE,IAAI,OAAO,SAAS,GAAG,IAAI,MAAM;AAAA,EAC3C;AACA,QAAM,aAAwC,CAAC,GAAG,GAAG,EAAE,IAAI,CAAC,QAAQ;AAAA,IAClE,MAAM;AAAA,IACN,WAAW;AAAA,IACX,OAAO,CAAC,MAAM;AACZ,YAAM,IAAI,EAAE,WAAW,EAAE;AACzB,aAAO,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,IAAI,IAAK,MAAM,EAAE,KAAK;AAAA,IACzE;AAAA,EACF,EAAE;AACF,SAAO,eAAe,QAAQ,UAAU,EAAE;AAC5C;AAEO,SAAS,YAAY,SAAiC;AAG3D,QAAM,WACJ,OAAO,YAAY,WACf,UACA,KAAK,UAAU;AAAA,IACb,MAAM,QAAQ;AAAA,IACd,aAAa,QAAQ;AAAA,IACrB,SAAS,QAAQ,WAAW;AAAA,EAC9B,CAAC;AACP,SAAO,WAAW,QAAQ,EAAE,OAAO,QAAQ,EAAE,OAAO,KAAK,EAAE,MAAM,GAAG,EAAE;AACxE;;;AClOA,eAAsB,mBACpB,MACyD;AAGzD,MAAK,KAAa,kBAAkB,UAAU;AAC5C,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAKA,MAAI,KAAK,YAAY,SAAS,KAAK,QAAQ;AACzC,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,MAAI,KAAK,kBAAkB,SAAS,CAAC,KAAK,WAAW,CAAC,KAAK,SAAS;AAClE,UAAM,IAAI,MAAM,mEAAmE;AAAA,EACrF;AAGA,QAAM,eAAe,MAAM,gBAAgB,IAAI;AAG/C,QAAM,EAAE,aAAAA,aAAY,IAAI,MAAM,OAAO,4BAAiB;AAEtD,QAAM,oBAAoB,MAAMA,aAAkC;AAAA,IAChE,GAAG;AAAA,IACH,WAAW,KAAK;AAAA,IAChB,UAAU,CAAC,UAAU,QAAQ,KAAK,oBAAoB,KAAK,iBAAiB,UAAU,GAAG;AAAA,IACzF,QAAQ,GAAG,KAAK,MAAM;AAAA,EACxB,CAAC;AAED,QAAM,kBAAkB,MAAMA,aAAkC;AAAA,IAC9D,GAAG;AAAA,IACH,WAAW,KAAK;AAAA,IAChB,UAAU,CAAC,UAAU,QACnB,KAAK,oBAAoB,aAAa,eAAe,UAAU,GAAG;AAAA,IACpE,QAAQ,GAAG,KAAK,MAAM;AAAA,EACxB,CAAC;AAUD,QAAM,qBAAqB,oBAAI,IAAuB;AACtD,QAAM,oBAAoB,oBAAI,IAAuB;AACrD,QAAM,cAAwB,oBAAI,IAAI;AACtC,QAAM,sBAAgC,oBAAI,IAAI;AAC9C,aAAW,QAAQ,gBAAgB,OAAO;AACxC,uBAAmB,IAAI,KAAK,QAAQ,KAAK,QAAQ;AACjD,gBAAY,IAAI,KAAK,QAAQ,KAAK,WAAW;AAAA,EAC/C;AACA,aAAW,QAAQ,kBAAkB,OAAO;AAC1C,sBAAkB,IAAI,KAAK,QAAQ,KAAK,QAAQ;AAChD,wBAAoB,IAAI,KAAK,QAAQ,KAAK,WAAW;AAAA,EACvD;AAEA,QAAM,aAAa,MAAM,KAAK,KAAK,OAAO;AAAA,IACxC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,WAAW,KAAK;AAAA,IAChB,MAAM;AAAA,MACJ,WAAW,gBAAgB,WAAW;AAAA,MACtC,UAAU,kBAAkB,WAAW;AAAA,IACzC;AAAA,IACA,QAAQ,IAAI,gBAAgB,EAAE;AAAA,EAChC,CAAC;AAMD,QAAM,SAAS,KAAK,sBAAsB;AAC1C,QAAM,eACJ,aAAa,sBAAsB,YAAY,KAAK,eAAe,IAC/D,KACA,OAAO,aAAa,eAAe,KAAK,eAAe;AAE7D,MAAI;AACJ,MAAI,KAAK,kBAAkB,QAAQ,WAAW,aAAa,QAAQ;AACjE,eAAW,WAAW;AAAA,MACpB,QAAQ;AAAA,MACR,MAAM;AAAA,MACN;AAAA,MACA,SAAS,KAAK;AAAA,MACd,QAAQ,KAAK;AAAA,IACf,CAAC;AAAA,EACH;AAEA,SAAO;AAAA,IACL,GAAG;AAAA,IACH;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEO,SAAS,kBACd,eACA,iBACQ;AAGR,MAAI,OAAO,kBAAkB,YAAY,OAAO,oBAAoB,UAAU;AAC5E,UAAM,MAAM,CAAC,MACX,OAAO,MAAM,WACT,qBACA,YAAY,EAAE,WAAW,GAAG,EAAE,UAAU,SAAS,EAAE,OAAO,KAAK,EAAE,GAAG,EAAE,UAAU;AAAA,EAAK,EAAE,OAAO,KAAK,EAAE;AAC3G,WAAO;AAAA,EAAiB,IAAI,eAAe,CAAC;AAAA;AAAA,EAAiB,IAAI,aAAa,CAAC;AAAA,EACjF;AACA,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,cAAc;AACzB,QAAM,KAAK,YAAY;AACvB,aAAW,KAAK,gBAAgB,MAAM,IAAI,EAAG,OAAM,KAAK,KAAK,CAAC,EAAE;AAChE,aAAW,KAAK,cAAc,MAAM,IAAI,EAAG,OAAM,KAAK,KAAK,CAAC,EAAE;AAC9D,SAAO,MAAM,KAAK,IAAI;AACxB;;;ACnLA,eAAsB,QACpB,MAC+C;AAC/C,SAAO,YAAY,IAAI;AACzB;;;ACOA,SAAS,cAAAC,mBAAkB;AAC3B,SAAS,QAAAC,aAAY;AAkBd,SAAS,mBAAmB,SAAiC;AAClE,QAAM,WACJ,OAAO,YAAY,WACf,UACA,KAAK,UAAU;AAAA,IACb,MAAM,QAAQ;AAAA,IACd,aAAa,QAAQ;AAAA,IACrB,SAAS,QAAQ,WAAW;AAAA,EAC9B,CAAC;AACP,SAAO,UAAUC,YAAW,QAAQ,EAAE,OAAO,QAAQ,EAAE,OAAO,KAAK,CAAC;AACtE;AAuGA,SAAS,qBACP,UACQ;AACR,QAAM,KAAe,CAAC;AACtB,aAAW,QAAQ,SAAS,OAAO;AACjC,QAAI,KAAK,MAAO;AAChB,UAAM,KAAK,OAAO,OAAO,KAAK,WAAW,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS;AACjE,QAAI,GAAG,OAAQ,IAAG,KAAK,GAAG,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,GAAG,MAAM;AAAA,EAClE;AACA,SAAO,GAAG,SAAS,GAAG,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,GAAG,SAAS;AACjE;AAGO,SAAS,0BACd,MACsB;AACtB,QAAM,YAAY,0BAA0B,KAAK,aAAa;AAC9D,QAAM,SAAS,CAAC,GAAG,IAAI,IAAI,KAAK,cAAc,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,EAAE,KAAK;AAEzE,QAAM,aAAwC,CAAC;AAC/C,aAAW,OAAO,KAAK,aAAa;AAClC,UAAM,cAAc,IAAI,IAAI,IAAI,QAAQ;AACxC,UAAM,gBAAgB,IAAI,IAAI,IAAI,SAAS,IAAI,CAAC,MAAM,CAAC,EAAE,aAAa,EAAE,OAAO,CAAC,CAAC;AACjF,eAAW,KAAK,IAAI,YAAY;AAC9B,YAAM,UAAU,cAAc,IAAI,EAAE,WAAW;AAC/C,YAAM,QAAiC;AAAA,QACrC,YAAY,IAAI;AAAA,QAChB,aAAa,EAAE;AAAA,QACf,aACE,YAAY,SAAY,mBAAmB,OAAO,IAAI,UAAU,EAAE,WAAW;AAAA,QAC/E,WAAW,EAAE;AAAA,QACb,UAAU,YAAY,IAAI,EAAE,WAAW;AAAA,MACzC;AACA,UAAI,EAAE,MAAO,OAAM,QAAQ,EAAE;AAC7B,UAAI,EAAE,UAAW,OAAM,YAAY,EAAE;AACrC,iBAAW,KAAK,KAAK;AAAA,IACvB;AAAA,EACF;AAEA,QAAM,2BAA2B,qBAAqB,KAAK,iBAAiB;AAC5E,QAAM,yBAAyB,qBAAqB,KAAK,eAAe;AAExE,QAAM,SAA+B;AAAA,IACnC,QAAQ;AAAA,IACR,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,IACb,WAAW,KAAK;AAAA,IAChB,qBAAqB,mBAAmB,KAAK,eAAe;AAAA,IAC5D,mBAAmB,mBAAmB,KAAK,aAAa;AAAA,IACxD,MAAM,KAAK;AAAA,IACX;AAAA,IACA,MAAM;AAAA,MACJ,UAAU,KAAK,KAAK;AAAA,MACpB,SAAS,KAAK,KAAK;AAAA,MACnB,OAAO,KAAK,KAAK;AAAA,MACjB,mBAAmB,KAAK,KAAK,kBAAkB,IAAI,CAAC,OAAO;AAAA,QACzD,MAAM,EAAE;AAAA,QACR,QAAQ,EAAE;AAAA,MACZ,EAAE;AAAA,IACJ;AAAA,IACA;AAAA,IACA;AAAA,IACA,aAAa,yBAAyB;AAAA,IACtC,SAAS;AAAA,MACP,SAAS,UAAU;AAAA,MACnB,iBAAiB,UAAU;AAAA,MAC3B;AAAA,MACA,kBAAkB,UAAU;AAAA,MAC5B,mBAAmB,UAAU;AAAA,MAC7B,cAAc,UAAU;AAAA,IAC1B;AAAA,IACA,cAAc,KAAK;AAAA,IACnB,iBAAiB,KAAK;AAAA,EACxB;AACA,MAAI,KAAK,YAAa,QAAO,cAAc,KAAK;AAChD,MAAI,KAAK,gBAAiB,QAAO,kBAAkB,KAAK;AACxD,SAAO;AACT;AAIA,IAAM,cAA8B,CAAC,MAAM;AAE3C,SAAS,OAAO,OAAyB;AACvC,SAAOA,YAAW,QAAQ,EAAE,OAAO,MAAM,KAAK,GAAG,CAAC,EAAE,OAAO,KAAK;AAClE;AAEA,SAAS,WAAW,UAA8E;AAChG,SAAO,YAAY,SAAS,QAAQ,IAChC,EAAE,MAAM,KAAK,IACb,EAAE,MAAM,SAAS,SAAS,kBAAkB,QAAQ,GAAG;AAC7D;AAaO,SAAS,oBACd,QACA,OAAgC,CAAC,GACf;AAClB,QAAM,UAAU,OAAO,CAAC,SAAS,OAAO,KAAK,CAAC,EAAE,MAAM,GAAG,EAAE;AAC3D,QAAM,YAAY,KAAK,eAAe,KAAK,MAAM,OAAO,SAAS,KAAK,KAAK,IAAI,MAAM;AACrF,QAAM,UAAU,WAAW,KAAK,IAAI,GAAG,OAAO,eAAe,IAAI;AACjE,QAAM,QAA0B,CAAC;AAEjC,QAAM,aAAa,OAAO,CAAC,QAAQ,OAAO,KAAK,CAAC,EAAE,MAAM,GAAG,EAAE;AAC7D,QAAM,KAAK;AAAA,IACT;AAAA,IACA,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,mBAAmB;AAAA,IACnB,iBAAiB;AAAA,IACjB,YAAY;AAAA,MACV,gBAAgB,OAAO;AAAA,MACvB,iBAAiB,OAAO;AAAA,MACxB,8BAA8B,OAAO;AAAA,MACrC,4BAA4B,OAAO;AAAA,MACnC,sBAAsB,OAAO;AAAA,MAC7B,uBAAuB,OAAO,KAAK;AAAA,MACnC,yBAAyB,OAAO,QAAQ;AAAA,MACxC,0BAA0B,OAAO,QAAQ;AAAA,MACzC,uBAAuB,OAAO;AAAA,IAChC;AAAA,IACA,QAAQ,WAAW,OAAO,KAAK,QAAQ;AAAA,IACvC,gBAAgB,OAAO;AAAA,EACzB,CAAC;AAGD,QAAM,QAAQ,oBAAI,IAAuC;AACzD,aAAW,KAAK,OAAO,YAAY;AACjC,UAAM,MAAM,MAAM,IAAI,EAAE,UAAU,KAAK,CAAC;AACxC,QAAI,KAAK,CAAC;AACV,UAAM,IAAI,EAAE,YAAY,GAAG;AAAA,EAC7B;AACA,aAAW,CAAC,YAAY,KAAK,KAAK,CAAC,GAAG,MAAM,QAAQ,CAAC,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG;AAClF,UAAM,YAAY,OAAO,CAAC,OAAO,OAAO,OAAO,OAAO,UAAU,CAAC,CAAC,EAAE,MAAM,GAAG,EAAE;AAC/E,UAAM,gBAAgB,MAAM,OAAO,CAAC,GAAG,MAAM,KAAK,IAAI,GAAG,EAAE,SAAS,GAAG,CAAC;AACxE,UAAM,KAAK;AAAA,MACT;AAAA,MACA,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,MAAM,cAAc,UAAU;AAAA,MAC9B,mBAAmB;AAAA,MACnB,iBAAiB;AAAA,MACjB,YAAY;AAAA,QACV,gBAAgB,OAAO;AAAA,QACvB,qBAAqB;AAAA,QACrB,yBAAyB,MAAM;AAAA,QAC/B,wBAAwB;AAAA,MAC1B;AAAA,MACA,gBAAgB,OAAO;AAAA,MACvB,qBAAqB;AAAA,IACvB,CAAC;AACD,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,YAAM,IAAI,MAAM,CAAC;AACjB,YAAM,aAAa,OAAO,CAAC,QAAQ,OAAO,OAAO,OAAO,UAAU,GAAG,EAAE,WAAW,CAAC,EAAE;AAAA,QACnF;AAAA,QACA;AAAA,MACF;AACA,YAAM,aAA2C;AAAA,QAC/C,gBAAgB,OAAO;AAAA,QACvB,qBAAqB;AAAA,QACrB,sBAAsB,EAAE;AAAA,QACxB,sBAAsB,EAAE;AAAA,QACxB,oBAAoB,EAAE;AAAA,QACtB,mBAAmB,EAAE;AAAA,MACvB;AACA,UAAI,EAAE,MAAO,YAAW,uBAAuB,IAAI,EAAE;AACrD,UAAI,EAAE,UAAW,YAAW,2BAA2B,IAAI,EAAE;AAC7D,YAAM,KAAK;AAAA,QACT;AAAA,QACA,QAAQ;AAAA,QACR,cAAc;AAAA,QACd,MAAM,aAAa,EAAE,WAAW;AAAA,QAChC,mBAAmB;AAAA,QACnB,iBAAiB;AAAA,QACjB;AAAA,QACA,gBAAgB,OAAO;AAAA,QACvB,qBAAqB;AAAA,MACvB,CAAC;AAAA,IACH;AAAA,EACF;AAIA,QAAM,aAAa,OAAO,CAAC,QAAQ,OAAO,KAAK,CAAC,EAAE,MAAM,GAAG,EAAE;AAC7D,QAAM,KAAK;AAAA,IACT;AAAA,IACA,QAAQ;AAAA,IACR,cAAc;AAAA,IACd,MAAM;AAAA,IACN,mBAAmB;AAAA,IACnB,iBAAiB;AAAA,IACjB,YAAY;AAAA,MACV,gBAAgB,OAAO;AAAA,MACvB,uBAAuB,OAAO,KAAK;AAAA,MACnC,oBAAoB,OAAO,KAAK,SAAS,OAAO;AAAA,MAChD,sBAAsB,KAAK,UAAU,OAAO,KAAK,OAAO;AAAA,MACxD,sBAAsB,OAAO;AAAA,MAC7B,mCAAmC,OAAO;AAAA,MAC1C,iCAAiC,OAAO;AAAA,IAC1C;AAAA,IACA,QAAQ,WAAW,OAAO,KAAK,QAAQ;AAAA,IACvC,gBAAgB,OAAO;AAAA,EACzB,CAAC;AAED,SAAO;AACT;AAKO,SAAS,qBAAqB,QAAwB;AAC3D,SAAOC,MAAK,QAAQ,sBAAsB;AAC5C;AACO,SAAS,oBAAoB,QAAwB;AAC1D,SAAOA,MAAK,QAAQ,6BAA6B;AACnD;AAsBA,SAAS,oBACP,OACAC,cACA,SACA,UAC2B;AAC3B,QAAM,QAA4B,SAAS,MAAM,IAAI,CAAC,SAAS;AAC7D,UAAM,cAAc,OAAO,OAAO,KAAK,WAAW;AAClD,UAAM,YACJ,YAAY,WAAW,IACnB,IACA,YAAY,OAAO,CAAC,GAAG,MAAM,IAAI,EAAE,WAAW,CAAC,IAAI,YAAY;AACrE,UAAM,QAA0B;AAAA,MAC9B,YAAY,KAAK;AAAA,MACjB,KAAK,KAAK;AAAA,MACV,eAAe;AAAA,MACf,YAAY,OAAO;AAAA,QACjB,OAAO,QAAQ,KAAK,WAAW,EAAE,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC;AAAA,MAC1E;AAAA,IACF;AACA,QAAI,KAAK,MAAO,OAAM,eAAe,KAAK;AAC1C,WAAO;AAAA,EACT,CAAC;AACD,QAAM,gBACJ,MAAM,WAAW,IAAI,IAAI,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,EAAE,eAAe,CAAC,IAAI,MAAM;AAClF,SAAO;AAAA,IACL;AAAA,IACA,aAAAA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,SAAS,SAAS,WAAW;AAAA,IAC7B,YAAY,SAAS;AAAA,EACvB;AACF;AAMA,SAAS,kBACP,MACA,QACc;AACd,SAAO;AAAA,IACL,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,IACb,WAAW,KAAK;AAAA,IAChB,QAAQ;AAAA,IACR,QAAQ,CAAC;AAAA,IACT,UAAU;AAAA,MACR;AAAA,MACA,OAAO;AAAA,MACP,KAAK;AAAA,MACL,KAAK;AAAA,IACP;AAAA,IACA,aAAa;AAAA,MACX,oBAAoB,GAAG,OAAO,mBAAmB,KAAK,eAAe,KAAK,eAAe;AAAA,IAC3F;AAAA,IACA,cAAc,KAAK,KAAK;AAAA,IACxB,aAAa,OAAO;AAAA,IACpB,cAAc,KAAK;AAAA,IACnB,iBAAiB,KAAK;AAAA,EACxB;AACF;AAYA,eAAsB,mBACpB,MACmC;AACnC,QAAM,SAAS,0BAA0B,IAAI;AAC7C,QAAM,QAAQ,oBAAoB,MAAM;AAExC,OAAK,QAAQ,UAAU,KAAK,MAAM;AAClC,QAAM,aAAa,qBAAqB,KAAK,MAAM;AACnD,QAAM,YAAY,oBAAoB,KAAK,MAAM;AACjD,OAAK,QAAQ,MAAM,YAAY,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAC9D,OAAK,QAAQ,MAAM,WAAW,MAAM,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC;AAE5E,MAAI,KAAK,cAAc;AAKrB,QAAI;AACF,YAAM,KAAK,aAAa,cAAc,kBAAkB,MAAM,MAAM,CAAC;AAAA,IACvE,SAAS,KAAK;AACZ,YAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAE3D,cAAQ,KAAK,4DAA4D,GAAG,EAAE;AAAA,IAChF;AACA,QAAI;AACF,YAAM,KAAK,aAAa,aAAa,KAAK;AAAA,IAC5C,SAAS,KAAK;AACZ,YAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAE3D,cAAQ,KAAK,4DAA4D,GAAG,EAAE;AAAA,IAChF;AAAA,EACF;AAEA,SAAO,EAAE,QAAQ,OAAO,YAAY,UAAU;AAChD;","names":["runCampaign","createHash","join","createHash","join","surfaceHash"]}
|
package/dist/contract/index.js
CHANGED
package/dist/openapi.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"openapi": "3.1.0",
|
|
3
3
|
"info": {
|
|
4
4
|
"title": "@tangle-network/agent-eval — wire protocol",
|
|
5
|
-
"version": "0.
|
|
5
|
+
"version": "0.65.0",
|
|
6
6
|
"description": "HTTP and stdio RPC interface to agent-eval. The TypeScript runtime is the source of truth; this spec is the contract that cross-language clients (Python, Rust, Go) generate from.\n\nWire-protocol version: 1.0.0. Bumps on breaking changes to request/response schemas.",
|
|
7
7
|
"contact": {
|
|
8
8
|
"name": "Tangle Network",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tangle-network/agent-eval",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.65.0",
|
|
4
4
|
"description": "Substrate for self-improving agents: traces, verifiable rewards, preferences, GEPA / reflective mutation, auto-research, replay, sequential anytime-valid stats, and release gates.",
|
|
5
5
|
"homepage": "https://github.com/tangle-network/agent-eval#readme",
|
|
6
6
|
"repository": {
|