@tangle-network/agent-eval 0.43.1 → 0.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,190 +1,14 @@
1
- import { S as Scenario, C as CampaignResult, G as GateResult, M as Mutator, I as ImprovementDriver, a as Gate, L as LabeledScenarioStore, b as LabeledScenarioWrite, c as LabeledScenarioSampleArgs, d as LabeledScenarioRecord, D as DispatchFn, J as JudgeConfig, e as CampaignTraceWriter, f as MutableSurface, g as GenerationRecord, h as CodeSurface } from '../types-BLbRTxoc.js';
2
- export { i as CampaignAggregates, j as CampaignArtifactWriter, k as CampaignCellResult, l as CampaignCostMeter, m as DispatchContext, n as GateContext, o as GateDecision, p as GenerationCandidate, q as JudgeAggregate, r as JudgeDimension, s as JudgeScore, t as LabeledScenarioSource, O as OptimizerConfig, P as ProposeContext, R as RedactionStatus, u as ScenarioAggregate, v as SessionScript, T as TraceSpan } from '../types-BLbRTxoc.js';
3
- import { L as LlmClientOptions } from '../llm-client-BXVRUZyX.js';
4
- import { RunRecord } from '@tangle-network/agent-runtime';
5
- import { R as RedTeamCase } from '../red-team-30II1T4o.js';
1
+ export { C as CampaignStorage, D as DefaultProductionGateOptions, E as EvolutionaryDriverOptions, G as GepaDriverOptions, H as HeldOutGateOptions, O as OpenAutoPrOptions, m as OpenAutoPrResult, R as RunCampaignOptions, a as RunEvalOptions, b as RunImprovementLoopOptions, c as RunImprovementLoopResult, n as RunOptimizationOptions, o as RunOptimizationResult, d as composeGate, e as defaultProductionGate, f as evolutionaryDriver, g as fsCampaignStorage, h as gepaDriver, i as heldOutGate, j as inMemoryCampaignStorage, p as openAutoPr, r as runCampaign, k as runEval, l as runImprovementLoop, q as runOptimization, s as surfaceHash } from '../run-improvement-loop-CbilHQAb.js';
2
+ import { L as LabeledScenarioStore, q as LabeledScenarioWrite, r as LabeledScenarioSampleArgs, s as LabeledScenarioRecord, f as CodeSurface } from '../types-DToGONFA.js';
3
+ export { C as CampaignAggregates, a as CampaignArtifactWriter, b as CampaignCellResult, c as CampaignCostMeter, d as CampaignResult, e as CampaignTraceWriter, g as DispatchContext, D as DispatchFn, G as Gate, h as GateContext, i as GateDecision, j as GateResult, k as GenerationCandidate, l as GenerationRecord, I as ImprovementDriver, t as JudgeAggregate, J as JudgeConfig, m as JudgeDimension, n as JudgeScore, u as LabeledScenarioSource, M as MutableSurface, o as Mutator, O as OptimizerConfig, P as ProposeContext, R as RedactionStatus, S as Scenario, v as ScenarioAggregate, p as SessionScript, T as TraceSpan } from '../types-DToGONFA.js';
4
+ import '../llm-client-BXVRUZyX.js';
6
5
  import '../errors-mje_cKOs.js';
7
6
  import '../raw-provider-sink-C46HDghv.js';
7
+ import '@tangle-network/agent-runtime';
8
+ import '../red-team-30II1T4o.js';
8
9
  import '../dataset-BlwAtYYf.js';
9
10
  import '../store-Db2Bv8Cf.js';
10
11
 
11
- /**
12
- * @experimental
13
- *
14
- * `openAutoPr` — thin shell-out helper for the `runImprovementLoop` preset's
15
- * `autoOnPromote: 'pr'` mode. Substitutes for the per-product PR-opening
16
- * code consumers duplicated 4 times. The PR body includes the campaign's
17
- * manifest hash, gate verdict, and scorecard summary so reviewers can see
18
- * exactly what was promoted + why.
19
- *
20
- * NOT a deploy mechanism — this only OPENS a PR. The human reviews + merges.
21
- * The Shape B (`autoOnPromote: 'config'`) live-runtime-mutation path is
22
- * deferred to Pass B with the full shadow / canary / rollback stack.
23
- */
24
-
25
- interface OpenAutoPrOptions<TArtifact, TScenario extends Scenario> {
26
- /** Campaign result to attach to the PR. */
27
- result: CampaignResult<TArtifact, TScenario>;
28
- /** Gate verdict explaining the promotion. Substrate refuses to open a PR
29
- * when `gate.decision !== 'ship'` — fails loud. */
30
- gate: GateResult;
31
- /** Promoted surface diff — typically the new system prompt addendum or
32
- * full profile diff. Substrate writes it as the PR body. */
33
- promotedDiff: string;
34
- /** GH owner/repo target (e.g., `tangle-network/gtm-agent`). */
35
- ghOwner: string;
36
- ghRepo: string;
37
- /** Branch name for the PR. Default `auto/<manifestHash[:12]>`. */
38
- branch?: string;
39
- /** PR title. Default includes manifest hash. */
40
- title?: string;
41
- /** Whether to actually open the PR or just dry-run. Default reads
42
- * `GH_AUTO_PR_TOKEN` env — present = open, absent = dry-run. */
43
- dryRun?: boolean;
44
- /** Test seam — substitute `gh pr create` invocation. */
45
- ghExec?: (args: string[]) => {
46
- stdout: string;
47
- stderr: string;
48
- status: number;
49
- };
50
- }
51
- interface OpenAutoPrResult {
52
- opened: boolean;
53
- prUrl?: string;
54
- dryRun: boolean;
55
- reason: string;
56
- }
57
- declare function openAutoPr<TArtifact, TScenario extends Scenario>(options: OpenAutoPrOptions<TArtifact, TScenario>): OpenAutoPrResult;
58
-
59
- /**
60
- * @experimental
61
- *
62
- * `evolutionaryDriver` — adapts a stateless `Mutator` (population mutation:
63
- * GEPA / AxGEPA / reflective-mutation) into an `ImprovementDriver`. This is
64
- * the evolutionary strategy: each generation, mutate the current best surface
65
- * into N candidates, measure, select. No generation memory beyond the current
66
- * surface; the loop body handles ranking + promotion.
67
- *
68
- * The reflective alternative is agent-runtime's `improvementDriver` with a
69
- * `reflectiveGenerator` / `agenticGenerator`: it reasons over the report +
70
- * trace findings to propose targeted edits rather than blind mutations. Both
71
- * conform to `ImprovementDriver`; the improvement loop is identical regardless
72
- * of which drives it.
73
- */
74
-
75
- interface EvolutionaryDriverOptions<TFindings = unknown> {
76
- mutator: Mutator<TFindings>;
77
- /** External findings fed to the mutator each generation. Default: []. */
78
- findings?: TFindings[];
79
- }
80
- declare function evolutionaryDriver<TFindings = unknown>(opts: EvolutionaryDriverOptions<TFindings>): ImprovementDriver<TFindings>;
81
-
82
- /**
83
- * @experimental
84
- *
85
- * `gepaDriver` — a reflective `ImprovementDriver` for prompt-tier surfaces.
86
- * Each generation it reflects on the prior best candidate's per-scenario
87
- * scores + weakest dimensions (the `GenerationCandidate` evidence from
88
- * `runOptimization`), asks an LLM to propose targeted rewrites of the current
89
- * surface, and returns them as the next population.
90
- *
91
- * This is the substrate's best-in-class prompt optimizer: surface-agnostic, so
92
- * ANY string surface in ANY consumer opts in by selecting it — system prompts,
93
- * prompt addenda, judge/reviewer prompts, even a driver's own reflection
94
- * prompt. It reuses the generic reflection primitive (`buildReflectionPrompt` /
95
- * `parseReflectionResponse`) and the router client; it has NO dependency on the
96
- * legacy `runMultiShotOptimization` / `prompt-evolution` orchestration.
97
- *
98
- * It earns its keep where there is real per-instance signal (which the
99
- * dimensional + per-scenario evidence + the `LabeledScenarioStore` flywheel
100
- * now provide). For thin-signal surfaces it degrades to plain reflection — so
101
- * it is a SELECTABLE driver, never a forced default. On generation 0 (no
102
- * history) it reflects on the current surface against the mutation primitives
103
- * alone.
104
- */
105
-
106
- interface GepaDriverOptions {
107
- /** Router transport (apiKey/baseUrl). */
108
- llm: LlmClientOptions;
109
- /** Model that performs the reflection. */
110
- model: string;
111
- /** What is being optimized — appears in the reflection prompt for orientation. */
112
- target: string;
113
- /** Surface-specific mutation levers offered to the model. */
114
- mutationPrimitives?: string[];
115
- /** Top/bottom scenarios surfaced as evidence each generation. Default 3. */
116
- evidenceK?: number;
117
- /** Reflection sampling temperature. Default 0.7. */
118
- temperature?: number;
119
- /** Reflection max tokens. Default 6000. */
120
- maxTokens?: number;
121
- }
122
- declare function gepaDriver(opts: GepaDriverOptions): ImprovementDriver;
123
-
124
- /**
125
- * @experimental
126
- *
127
- * Compose multiple `Gate` implementations — every gate must pass for the
128
- * composite to ship. Closes the alignment reviewer's "default-only
129
- * heldOutGate + costGate would happily promote a reward-hacked prompt"
130
- * concern by making safety gates first-class composable defaults.
131
- */
132
-
133
- /** Compose gates — all must `ship` for the composite to `ship`. First
134
- * non-ship verdict short-circuits the composite verdict, but ALL gates run
135
- * (so the result records every gate's reason — useful for diagnostics). */
136
- declare function composeGate<TArtifact = unknown, TScenario extends Scenario = Scenario>(...gates: Array<Gate<TArtifact, TScenario>>): Gate<TArtifact, TScenario>;
137
-
138
- /**
139
- * @experimental
140
- *
141
- * `defaultProductionGate` — composes the substrate's existing safety
142
- * primitives (red-team / reward-hacking / canary / heldout) into a single
143
- * Gate.decide shape. Closes the alignment + Anthropic-SI reviewers' "safety
144
- * primitives are off the critical path" blocker.
145
- *
146
- * The composition is opinionated — when consumers wire `runImprovementLoop`,
147
- * THIS gate is the default. Consumers can still pass a custom gate to
148
- * override; the recommended pattern is to compose THIS gate with whatever
149
- * extra domain-specific gates they need (`composeGate(defaultProductionGate(...), customGate)`).
150
- */
151
-
152
- interface DefaultProductionGateOptions {
153
- /** Required: scenarios held out from training; substrate compares
154
- * candidate-on-holdout vs baseline-on-holdout. */
155
- holdoutScenarios: Scenario[];
156
- /** Minimum mean-composite improvement required to ship. Default 0.5. */
157
- deltaThreshold?: number;
158
- /** Total $ budget for ALL cells in this campaign — including baseline + candidate.
159
- * Composite verdict refuses to ship when spend exceeded budget. */
160
- budgetUsd?: number;
161
- /** Red-team cases to probe candidate outputs against. When omitted the
162
- * substrate uses `DEFAULT_RED_TEAM_CORPUS`. Provide a domain-specific
163
- * battery for tighter coverage. */
164
- redTeamBattery?: RedTeamCase[];
165
- /** Run records (oldest-first) needed for the reward-hacking detector.
166
- * Substrate populates from prior production-loop generations. */
167
- recentRuns?: RunRecord[];
168
- /** When true, the gate refuses to ship if the reward-hacking detector
169
- * fires at the `gaming` severity. Default true. */
170
- blockOnRewardHackingGaming?: boolean;
171
- }
172
- declare function defaultProductionGate<TArtifact, TScenario extends Scenario>(options: DefaultProductionGateOptions): Gate<TArtifact, TScenario>;
173
-
174
- /**
175
- * @experimental
176
- *
177
- * Thin Gate adapter — exposes delta-threshold-on-holdout as a composable
178
- * `Gate`. Use when you want held-out as one of N composed gates instead of
179
- * the full `defaultProductionGate` stack.
180
- */
181
-
182
- interface HeldOutGateOptions<TScenario extends Scenario = Scenario> {
183
- scenarios: TScenario[];
184
- deltaThreshold?: number;
185
- }
186
- declare function heldOutGate<TArtifact, TScenario extends Scenario>(options: HeldOutGateOptions<TScenario>): Gate<TArtifact, TScenario>;
187
-
188
12
  /**
189
13
  * @experimental
190
14
  *
@@ -245,224 +69,6 @@ declare class FsLabeledScenarioStore implements LabeledScenarioStore {
245
69
  private pathForSource;
246
70
  }
247
71
 
248
- /**
249
- * @experimental
250
- *
251
- * `CampaignStorage` — the filesystem seam `runCampaign` writes through
252
- * (run/cell dirs, the resumability cache, per-cell artifacts, trace spans).
253
- *
254
- * The default (`fsCampaignStorage`) is the Node filesystem — identical
255
- * behavior to the inline `node:fs` calls it replaces, so existing CLI
256
- * consumers are unaffected. `inMemoryCampaignStorage` keeps everything in a
257
- * `Map`, so the substrate runs in environments WITHOUT a filesystem
258
- * (Cloudflare Workers, Deno Deploy, other edge runtimes) — the campaign
259
- * still produces its `CampaignResult` (cells + aggregates) in memory;
260
- * artifacts/traces simply aren't persisted to disk.
261
- *
262
- * Paths are opaque keys to the in-memory adapter — it does not parse them,
263
- * so the same `join(...)`-built paths work unchanged across both adapters.
264
- */
265
- interface CampaignStorage {
266
- /** Ensure a directory exists (recursive). No-op for in-memory. */
267
- ensureDir(dir: string): void;
268
- /** Does this path exist (as a written file or an ensured dir)? */
269
- exists(path: string): boolean;
270
- /** Read a UTF-8 file; `undefined` when missing or unreadable. */
271
- read(path: string): string | undefined;
272
- /** Write a file (string or bytes). Parent dir is assumed ensured. */
273
- write(path: string, content: string | Uint8Array): void;
274
- }
275
- /** Node-filesystem storage — the default. Lazily requires `node:fs` so the
276
- * module imports cleanly in non-Node runtimes (where the caller passes
277
- * `inMemoryCampaignStorage` instead and never constructs this). */
278
- declare function fsCampaignStorage(): CampaignStorage;
279
- /** In-memory storage for filesystem-less runtimes. Artifacts + trace spans
280
- * live in a `Map` for the duration of the run; the `CampaignResult` is
281
- * fully populated, but nothing is persisted to disk. */
282
- declare function inMemoryCampaignStorage(): CampaignStorage;
283
-
284
- /**
285
- * @experimental
286
- *
287
- * `runCampaign` — Pass A substrate primitive. ONE function that orchestrates
288
- * scenarios → dispatch → artifacts → judges → aggregates, with full
289
- * reproducibility (seed + manifest hash), cell-level resumability, bootstrap
290
- * CIs, and the `LabeledScenarioStore` capture flywheel.
291
- *
292
- * Improvement loops (optimizer / gate / autoOnPromote) ride on top of this
293
- * primitive but live in `presets/run-improvement-loop.ts`. This file keeps
294
- * the core orchestrator minimal — Phase 1 of the Pass A track.
295
- */
296
-
297
- interface RunCampaignOptions<TScenario extends Scenario, TArtifact> {
298
- scenarios: TScenario[];
299
- dispatch: DispatchFn<TScenario, TArtifact>;
300
- judges?: JudgeConfig<TArtifact, TScenario>[];
301
- /** Required for reproducibility. Default 42. */
302
- seed?: number;
303
- /** Per-scenario replicates for CI bands. Default 1; raise to 5+ for
304
- * bootstrap-tight intervals on critical eval. */
305
- reps?: number;
306
- /** When true (default), completed cells are cached by
307
- * (manifestHash, scenarioId, rep, generation). Re-runs skip cached cells. */
308
- resumable?: boolean;
309
- /** Optional store — when present, every artifact + judge score is captured
310
- * with the configured `captureSource`. Capture is default ON; pass `'off'`
311
- * to disable. */
312
- labeledStore?: LabeledScenarioStore | 'off';
313
- captureSource?: 'production-trace' | 'eval-run' | 'manual' | 'red-team' | 'synthetic';
314
- captureSourceVersionHash?: string;
315
- /** Wall-clock cost cap across all cells. Cells beyond ceiling are skipped. */
316
- costCeiling?: number;
317
- /** Max concurrent cells. Default 2. */
318
- maxConcurrency?: number;
319
- /** Required: where artifacts + traces land. */
320
- runDir: string;
321
- /** Tracing posture. Default is the substrate's `FileSystemTraceStore` rooted
322
- * at `<runDir>/traces/`. `'off'` disables capture entirely — substrate
323
- * refuses this when the caller wires `autoOnPromote !== 'none'`. */
324
- tracing?: 'on' | 'off';
325
- /** Test seam — override the wall clock for deterministic tests. */
326
- now?: () => Date;
327
- /** Test seam — override per-cell trace writer factory. */
328
- buildTraceWriter?: (cellId: string, dir: string) => CampaignTraceWriter;
329
- /** Storage backend for run/cell dirs, the resumability cache, artifacts,
330
- * and trace spans. Default: the Node filesystem (`fsCampaignStorage`).
331
- * Pass `inMemoryCampaignStorage()` to run in a filesystem-less runtime
332
- * (Cloudflare Workers, Deno, edge) — the `CampaignResult` is still
333
- * produced; artifacts/traces just aren't persisted to disk. */
334
- storage?: CampaignStorage;
335
- }
336
- declare function runCampaign<TScenario extends Scenario, TArtifact>(opts: RunCampaignOptions<TScenario, TArtifact>): Promise<CampaignResult<TArtifact, TScenario>>;
337
-
338
- /**
339
- * @experimental
340
- *
341
- * `runEval` — the simplest preset over `runCampaign`. No optimizer, no
342
- * gate, no auto-PR. Just: run scenarios through dispatch, score with
343
- * judges, return CampaignResult.
344
- *
345
- * The 80% case for consumers who want a scorecard, not an improvement loop.
346
- */
347
-
348
- interface RunEvalOptions<TScenario extends Scenario, TArtifact> extends Omit<RunCampaignOptions<TScenario, TArtifact>, 'runDir'> {
349
- runDir: string;
350
- }
351
- declare function runEval<TScenario extends Scenario, TArtifact>(opts: RunEvalOptions<TScenario, TArtifact>): Promise<CampaignResult<TArtifact, TScenario>>;
352
-
353
- /**
354
- * @experimental
355
- *
356
- * `runOptimization` — the improvement loop body. Runs N generations: the
357
- * `ImprovementDriver` proposes K candidate surfaces per generation, each
358
- * candidate runs a campaign (the measurement), top-scoring promote to the
359
- * next generation. Driver-agnostic — the same loop runs an evolutionary
360
- * population mutator (`evolutionaryDriver`) or agent-runtime's
361
- * `improvementDriver` (reflective / agentic generators); they differ only in
362
- * how `propose()` picks candidates.
363
- *
364
- * This is `runLoop`'s shape (plan → measure → decide) specialized to surface
365
- * improvement: `driver.propose` = plan, `runCampaign` = the measurement (which
366
- * runs the worker behind `dispatch`), the mean-composite ranking = the
367
- * validator, `driver.decide` = the stop check.
368
- *
369
- * The gated-promotion shell (`runImprovementLoop`) wraps this with a holdout
370
- * re-score + release gate + optional PR.
371
- */
372
-
373
- interface RunOptimizationOptions<TScenario extends Scenario, TArtifact> extends Omit<RunCampaignOptions<TScenario, TArtifact>, 'dispatch'> {
374
- /** Initial mutable surface (typically system prompt or addendum). */
375
- baselineSurface: MutableSurface;
376
- /** Dispatcher that takes the CURRENT surface + scenario → artifact. */
377
- dispatchWithSurface: (surface: MutableSurface, scenario: TScenario, ctx: Parameters<RunCampaignOptions<TScenario, TArtifact>['dispatch']>[1]) => Promise<TArtifact>;
378
- /** The improvement strategy. Wrap a population `Mutator` via
379
- * `evolutionaryDriver({ mutator })`, or pass agent-runtime's
380
- * `improvementDriver` (reflective / agentic generators). */
381
- driver: ImprovementDriver;
382
- populationSize: number;
383
- maxGenerations: number;
384
- /** How many top-scoring candidates carry to the next generation. Default 2. */
385
- promoteTopK?: number;
386
- /** DEPTH knob forwarded to the driver's `propose()` — max iterations the
387
- * agentic generator may take per candidate. */
388
- maxImprovementShots?: number;
389
- /** Phase-2 research report forwarded to `propose()` (analyst findings +
390
- * diff). Opaque here; the driver types it. */
391
- report?: unknown;
392
- }
393
- interface RunOptimizationResult<TArtifact, TScenario extends Scenario> {
394
- generations: Array<{
395
- record: GenerationRecord;
396
- surfaces: Array<{
397
- surfaceHash: string;
398
- surface: MutableSurface;
399
- campaign: CampaignResult<TArtifact, TScenario>;
400
- }>;
401
- }>;
402
- winnerSurface: MutableSurface;
403
- winnerSurfaceHash: string;
404
- baselineCampaign: CampaignResult<TArtifact, TScenario>;
405
- }
406
- declare function runOptimization<TScenario extends Scenario, TArtifact>(opts: RunOptimizationOptions<TScenario, TArtifact>): Promise<RunOptimizationResult<TArtifact, TScenario>>;
407
- declare function surfaceHash(surface: MutableSurface): string;
408
-
409
- /**
410
- * @experimental
411
- *
412
- * `runImprovementLoop` — the gated-promotion shell around the improvement
413
- * loop body (`runOptimization`). Drives candidate surfaces via the
414
- * `ImprovementDriver`, re-scores the winner against the baseline on a
415
- * holdout set, runs the release gate, and optionally opens a PR.
416
- *
417
- * Role vocabulary (see docs/design/loop-taxonomy.md):
418
- * - DRIVER = the `ImprovementDriver` (evolutionary GEPA mutator OR
419
- * reflective analyst). Proposes candidate SURFACES — the
420
- * worker's system prompt / tool config — NOT conversation
421
- * turns.
422
- * - MEASUREMENT= `runCampaign`. Scores one surface by running the worker
423
- * (via `dispatch`) over scenarios and judging the output.
424
- * - WORKER = the agent harness in the sandbox, invoked behind the
425
- * topology-opaque `dispatch` seam — never referenced here.
426
- *
427
- * Distinct from `runLoop` in `@tangle-network/agent-runtime`, which is the
428
- * INNER conversation loop (driver↔workers in a sandbox). `runImprovementLoop`
429
- * is the OUTER loop: it improves the surface that those workers run.
430
- *
431
- * Hard-refuses unsafe configurations:
432
- * - `tracing: 'off'` when a driver is wired (improvement is unattributable)
433
- * - `autoOnPromote: 'config'` — DEFERRED to Pass B; v0.40 only ships
434
- * `'pr'` and `'none'`.
435
- */
436
-
437
- interface RunImprovementLoopOptions<TScenario extends Scenario, TArtifact> extends RunOptimizationOptions<TScenario, TArtifact> {
438
- /** Holdout scenarios kept OUT of the training optimization pool — used
439
- * ONLY to score baseline vs winner for the gate. */
440
- holdoutScenarios: TScenario[];
441
- /** Promotion gate. Substrate strongly recommends `defaultProductionGate`
442
- * for production wiring (composes red-team / reward-hacking / canary /
443
- * heldout). */
444
- gate: Gate<TArtifact, TScenario>;
445
- /** What to do when the gate ships:
446
- * - `'pr'`: open a PR via `openAutoPr`
447
- * - `'none'`: just report — caller decides what to do with the winner
448
- * v0.40 does NOT support `'config'` (live-runtime self-mutation) —
449
- * deferred to Pass B behind safety stack. */
450
- autoOnPromote: 'pr' | 'none';
451
- /** GH owner / repo for the auto-PR. Required when autoOnPromote === 'pr'. */
452
- ghOwner?: string;
453
- ghRepo?: string;
454
- /** Optional render override — substrate writes a diff-shaped surface; pass
455
- * a function to format the promoted surface differently. */
456
- renderPromotedDiff?: (winnerSurface: MutableSurface, baselineSurface: MutableSurface) => string;
457
- }
458
- interface RunImprovementLoopResult<TArtifact, TScenario extends Scenario> extends RunOptimizationResult<TArtifact, TScenario> {
459
- baselineOnHoldout: CampaignResult<TArtifact, TScenario>;
460
- winnerOnHoldout: CampaignResult<TArtifact, TScenario>;
461
- gateResult: Awaited<ReturnType<Gate<TArtifact, TScenario>['decide']>>;
462
- prResult?: ReturnType<typeof openAutoPr>;
463
- }
464
- declare function runImprovementLoop<TScenario extends Scenario, TArtifact>(opts: RunImprovementLoopOptions<TScenario, TArtifact>): Promise<RunImprovementLoopResult<TArtifact, TScenario>>;
465
-
466
72
  /**
467
73
  * @experimental
468
74
  *
@@ -518,4 +124,4 @@ declare function gitWorktreeAdapter(opts: GitWorktreeAdapterOptions): WorktreeAd
518
124
  * as a ref under the adapter's worktree dir. */
519
125
  declare function resolveWorktreePath(surface: CodeSurface, worktreeDir?: string): string;
520
126
 
521
- export { CampaignResult, type CampaignStorage, CampaignTraceWriter, CodeSurface, type DefaultProductionGateOptions, DispatchFn, type EvolutionaryDriverOptions, FsLabeledScenarioStore, type FsLabeledScenarioStoreOptions, Gate, GateResult, GenerationRecord, type GepaDriverOptions, type GitWorktreeAdapterOptions, type HeldOutGateOptions, ImprovementDriver, JudgeConfig, LabeledScenarioRecord, LabeledScenarioSampleArgs, LabeledScenarioStore, LabeledScenarioStoreError, LabeledScenarioWrite, MutableSurface, Mutator, type OpenAutoPrOptions, type OpenAutoPrResult, type RunCampaignOptions, type RunEvalOptions, type RunImprovementLoopOptions, type RunImprovementLoopResult, type RunOptimizationOptions, type RunOptimizationResult, Scenario, type Worktree, type WorktreeAdapter, WorktreeAdapterError, composeGate, defaultProductionGate, evolutionaryDriver, fsCampaignStorage, gepaDriver, gitWorktreeAdapter, heldOutGate, inMemoryCampaignStorage, openAutoPr, resolveWorktreePath, runCampaign, runEval, runImprovementLoop, runOptimization, surfaceHash };
127
+ export { CodeSurface, FsLabeledScenarioStore, type FsLabeledScenarioStoreOptions, type GitWorktreeAdapterOptions, LabeledScenarioRecord, LabeledScenarioSampleArgs, LabeledScenarioStore, LabeledScenarioStoreError, LabeledScenarioWrite, type Worktree, type WorktreeAdapter, WorktreeAdapterError, gitWorktreeAdapter, resolveWorktreePath };