@tangle-network/agent-runtime 0.43.0 → 0.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +96 -202
  2. package/dist/agent.d.ts +5 -4
  3. package/dist/agent.js +5 -7
  4. package/dist/agent.js.map +1 -1
  5. package/dist/analyst-loop.d.ts +65 -4
  6. package/dist/analyst-loop.js +6 -1
  7. package/dist/audit.d.ts +93 -0
  8. package/dist/audit.js +312 -0
  9. package/dist/audit.js.map +1 -0
  10. package/dist/chunk-4B6U4CVQ.js +15 -0
  11. package/dist/chunk-4B6U4CVQ.js.map +1 -0
  12. package/dist/chunk-FK53TXOP.js +603 -0
  13. package/dist/chunk-FK53TXOP.js.map +1 -0
  14. package/dist/{chunk-MJDGCRAT.js → chunk-IJ6FGOPO.js} +5 -5
  15. package/dist/chunk-IJ6FGOPO.js.map +1 -0
  16. package/dist/{chunk-HVYOHJHK.js → chunk-IJGS6J7X.js} +2 -2
  17. package/dist/chunk-IJGS6J7X.js.map +1 -0
  18. package/dist/chunk-KEWO4KI6.js +3599 -0
  19. package/dist/chunk-KEWO4KI6.js.map +1 -0
  20. package/dist/{chunk-NRZOXCJK.js → chunk-KSMX62JF.js} +2 -2
  21. package/dist/{chunk-C5HMTTNY.js → chunk-NYN5RTLP.js} +13 -12
  22. package/dist/chunk-NYN5RTLP.js.map +1 -0
  23. package/dist/chunk-PRX45WE2.js +264 -0
  24. package/dist/chunk-PRX45WE2.js.map +1 -0
  25. package/dist/{chunk-3HMHSN22.js → chunk-QR4UUC5P.js} +6 -6
  26. package/dist/chunk-QR4UUC5P.js.map +1 -0
  27. package/dist/chunk-WIR4HOOJ.js +27 -0
  28. package/dist/chunk-WIR4HOOJ.js.map +1 -0
  29. package/dist/{chunk-MNCB4SJ5.js → chunk-Z2QXVBA6.js} +296 -8
  30. package/dist/chunk-Z2QXVBA6.js.map +1 -0
  31. package/dist/coder-CczgMqFx.d.ts +114 -0
  32. package/dist/dynamic-BvllHV6M.d.ts +221 -0
  33. package/dist/{improvement-adapter-BC4HhuAR.d.ts → improvement-adapter-CWegd3vw.d.ts} +1 -1
  34. package/dist/improvement.d.ts +2 -3
  35. package/dist/improvement.js +0 -5
  36. package/dist/improvement.js.map +1 -1
  37. package/dist/index.d.ts +123 -10
  38. package/dist/index.js +407 -19
  39. package/dist/index.js.map +1 -1
  40. package/dist/{kb-gate-DTBum3vH.d.ts → kb-gate-D9GBocLN.d.ts} +82 -5
  41. package/dist/{loop-runner-bin-CVoCBmYk.d.ts → loop-runner-bin-CPrCoKqC.d.ts} +14 -10
  42. package/dist/loop-runner-bin.d.ts +9 -7
  43. package/dist/loop-runner-bin.js +6 -8
  44. package/dist/loops.d.ts +7 -371
  45. package/dist/loops.js +96 -19
  46. package/dist/mcp/bin.js +7 -7
  47. package/dist/mcp/bin.js.map +1 -1
  48. package/dist/mcp/index.d.ts +284 -11
  49. package/dist/mcp/index.js +341 -9
  50. package/dist/mcp/index.js.map +1 -1
  51. package/dist/{otel-export-BzvF1Ela.d.ts → otel-export-Dy2DyUCU.d.ts} +1 -1
  52. package/dist/profiles.d.ts +385 -86
  53. package/dist/profiles.js +549 -4
  54. package/dist/profiles.js.map +1 -1
  55. package/dist/run-loop--hSoIknW.d.ts +112 -0
  56. package/dist/runtime-hooks-C7JwKb9E.d.ts +70 -0
  57. package/dist/runtime.d.ts +1860 -0
  58. package/dist/runtime.js +114 -0
  59. package/dist/runtime.js.map +1 -0
  60. package/dist/substrate-CUgk7F7s.d.ts +77 -0
  61. package/dist/topology.d.ts +73 -0
  62. package/dist/topology.js +111 -0
  63. package/dist/topology.js.map +1 -0
  64. package/dist/types-1HbsFa7H.d.ts +438 -0
  65. package/dist/{types-p8dWBIXL.d.ts → types-BtRLF2U3.d.ts} +1 -1
  66. package/dist/{types-Bcp071Jg.d.ts → types-DdzkffAm.d.ts} +95 -1
  67. package/dist/workflow.d.ts +551 -0
  68. package/dist/workflow.js +1778 -0
  69. package/dist/workflow.js.map +1 -0
  70. package/package.json +53 -16
  71. package/skills/agent-runtime-adoption/SKILL.md +29 -26
  72. package/dist/chunk-3HMHSN22.js.map +0 -1
  73. package/dist/chunk-C5HMTTNY.js.map +0 -1
  74. package/dist/chunk-EKBSQYZE.js +0 -813
  75. package/dist/chunk-EKBSQYZE.js.map +0 -1
  76. package/dist/chunk-HVYOHJHK.js.map +0 -1
  77. package/dist/chunk-MJDGCRAT.js.map +0 -1
  78. package/dist/chunk-MNCB4SJ5.js.map +0 -1
  79. package/dist/chunk-PY6NMZYX.js +0 -52
  80. package/dist/chunk-PY6NMZYX.js.map +0 -1
  81. package/dist/chunk-SQSCRJ7U.js +0 -65
  82. package/dist/chunk-SQSCRJ7U.js.map +0 -1
  83. package/dist/chunk-VOX6Z3II.js +0 -90
  84. package/dist/chunk-VOX6Z3II.js.map +0 -1
  85. package/dist/chunk-XBUG326M.js +0 -261
  86. package/dist/chunk-XBUG326M.js.map +0 -1
  87. package/dist/dynamic-B_7GgCwu.d.ts +0 -108
  88. package/dist/optimize-prompt-D-urF2wW.d.ts +0 -129
  89. /package/dist/{chunk-NRZOXCJK.js.map → chunk-KSMX62JF.js.map} +0 -0
@@ -1,6 +1,7 @@
1
- import { CoderOutput, CoderTask } from './profiles.js';
2
- import { L as LoopSandboxClient, c as LoopTraceEmitter } from './types-Bcp071Jg.js';
1
+ import { C as CoderOutput, a as CoderTask } from './coder-CczgMqFx.js';
2
+ import { b as LoopSandboxClient, f as LoopTraceEmitter } from './types-DdzkffAm.js';
3
3
  import { SandboxInstance } from '@tangle-network/sandbox';
4
+ import { a as UiLens, U as UiFinding } from './substrate-CUgk7F7s.js';
4
5
 
5
6
  /**
6
7
  * @experimental
@@ -102,7 +103,7 @@ declare function createFleetWorkspaceExecutor(options: FleetWorkspaceExecutorOpt
102
103
  */
103
104
 
104
105
  /** @experimental */
105
- type DelegationProfile = 'coder' | 'researcher';
106
+ type DelegationProfile = 'coder' | 'researcher' | 'ui-auditor';
106
107
  /** @experimental */
107
108
  type DelegationStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
108
109
  /**
@@ -223,7 +224,73 @@ type DelegationResultPayload = {
223
224
  } | {
224
225
  profile: 'researcher';
225
226
  output: ResearchOutputShape;
227
+ } | {
228
+ profile: 'ui-auditor';
229
+ output: UiAuditorDelegationOutput;
226
230
  };
231
+ /**
232
+ * Wire-shape of a completed UI-audit delegation. The `findings` array
233
+ * contains every finding persisted to the workspace during the run,
234
+ * already enriched with `id` and `createdAt` by the writer. `workspaceDir`
235
+ * is the absolute path to the workspace; `indexFile` is the workspace-
236
+ * relative path to the regenerated index.md.
237
+ *
238
+ * @experimental
239
+ */
240
+ interface UiAuditorDelegationOutput {
241
+ workspaceDir: string;
242
+ indexFile: string;
243
+ findings: UiFinding[];
244
+ /** Total iterations the loop ran for this delegation. */
245
+ iterations: number;
246
+ }
247
+ /** @experimental */
248
+ type UiAuditLensFilter = readonly UiLens[];
249
+ /** Optional per-route capture spec the agent surfaces over the wire. */
250
+ interface DelegateUiAuditRoute {
251
+ /** Stable route name (used in screenshot filenames + finding metadata). */
252
+ name: string;
253
+ /** Fully-qualified URL. */
254
+ url: string;
255
+ /** Viewports to capture at. Defaults to `[{ width: 1280, height: 800 }]`. */
256
+ viewports?: readonly {
257
+ width: number;
258
+ height: number;
259
+ }[];
260
+ /** Default false. Full-page captures for the broad lenses. */
261
+ fullPage?: boolean;
262
+ /** Selector to wait for before capture. */
263
+ waitFor?: string;
264
+ }
265
+ /** @experimental */
266
+ interface DelegateUiAuditConfig {
267
+ /**
268
+ * Lenses to iterate. Default: every lens except `'other'`. Order is
269
+ * preserved — the driver iterates lens-by-lens.
270
+ */
271
+ lenses?: UiAuditLensFilter;
272
+ /** Maximum total iterations across all (lens × route) pairs. Default 33 (11 lenses × 3 routes). */
273
+ maxIterations?: number;
274
+ /** Maximum concurrent iterations within a single plan() round. Default 2. */
275
+ maxConcurrency?: number;
276
+ /** Free-form product context surfaced to the judge. */
277
+ productContext?: string;
278
+ }
279
+ /** @experimental */
280
+ interface DelegateUiAuditArgs {
281
+ /** Workspace root for the audit (absolute path). */
282
+ workspaceDir: string;
283
+ /** Routes to audit. Must be non-empty. */
284
+ routes: readonly DelegateUiAuditRoute[];
285
+ /** Multi-tenant scope. */
286
+ namespace?: string;
287
+ config?: DelegateUiAuditConfig;
288
+ }
289
+ /** @experimental */
290
+ interface DelegateUiAuditResult {
291
+ taskId: string;
292
+ estimatedDurationMs?: number;
293
+ }
227
294
  /**
228
295
  * Loose shape of a research output over the wire — the substrate cannot
229
296
  * import the `ResearchOutput` type from agent-knowledge without inducing
@@ -274,7 +341,7 @@ interface DelegationHistoryEntry {
274
341
  taskId: string;
275
342
  profile: DelegationProfile;
276
343
  namespace?: string;
277
- args: DelegateCodeArgs | DelegateResearchArgs;
344
+ args: DelegateCodeArgs | DelegateResearchArgs | DelegateUiAuditArgs;
278
345
  status: DelegationStatus;
279
346
  feedback?: DelegationFeedbackSnapshot[];
280
347
  costUsd?: number;
@@ -295,6 +362,16 @@ interface DelegateRunCtx {
295
362
  type CoderDelegate = (args: DelegateCodeArgs, ctx: DelegateRunCtx) => Promise<CoderOutput>;
296
363
  /** @experimental */
297
364
  type ResearcherDelegate = (args: DelegateResearchArgs, ctx: DelegateRunCtx) => Promise<ResearchOutputShape>;
365
+ /**
366
+ * UI-auditor delegate — fully consumer-injected. agent-runtime ships no
367
+ * default factory because the inputs are workspace path + judge function
368
+ * + (optionally) a `LoopSandboxClient`, and the judge is the consumer's
369
+ * model seam. See `createInProcessUiAuditClient` + `uiAuditorProfile` in
370
+ * `@tangle-network/agent-runtime/profiles` for the canonical wiring.
371
+ *
372
+ * @experimental
373
+ */
374
+ type UiAuditorDelegate = (args: DelegateUiAuditArgs, ctx: DelegateRunCtx) => Promise<UiAuditorDelegationOutput>;
298
375
  /** @experimental Structured review verdict over a coder candidate. */
299
376
  interface CoderReview {
300
377
  /** Gate: only approved candidates are eligible to win. */
@@ -443,4 +520,4 @@ interface CreateKbGateOptions {
443
520
  */
444
521
  declare function createKbGate(options?: CreateKbGateOptions): (candidate: FactCandidate) => Promise<KbGateResult>;
445
522
 
446
- export { type DelegateRunCtx as A, type FactJudge as B, type CoderReviewer as C, type DelegateCodeArgs as D, type FactJudgeVerdict as E, type FactCandidate as F, type FeedbackRating as G, type FeedbackRefersTo as H, type FleetWorkspaceExecutorOptions as I, type ResearchOutputShape as J, type KbGateResult as K, createDefaultCoderDelegate as L, createFleetWorkspaceExecutor as M, createKbGate as N, createSiblingSandboxExecutor as O, type ResearcherDelegate as R, type SiblingSandboxExecutorOptions as S, type CoderWinnerSelection as a, type CreateKbGateOptions as b, type FleetHandle as c, type DelegationExecutor as d, type DelegateFeedbackArgs as e, type DelegationFeedbackSnapshot as f, type DelegationProfile as g, type DelegateResearchArgs as h, type DelegationStatus as i, type DelegationProgress as j, type DelegationResultPayload as k, type DelegationError as l, type DelegationStatusResult as m, type DelegationHistoryArgs as n, type DelegationHistoryEntry as o, type CoderDelegate as p, type DelegateCodeResult as q, type DelegateFeedbackResult as r, type ResearchSource as s, type DelegateResearchResult as t, type DelegationHistoryResult as u, type DelegationStatusArgs as v, type CoderReview as w, type CreateDefaultCoderDelegateOptions as x, type DelegateCodeConfig as y, type DelegateResearchConfig as z };
523
+ export { type DelegateCodeConfig as A, type DelegateResearchConfig as B, type CoderReviewer as C, type DelegateCodeArgs as D, type DelegateRunCtx as E, type FactCandidate as F, type DelegateUiAuditConfig as G, type DelegateUiAuditRoute as H, type FactJudge as I, type FactJudgeVerdict as J, type FeedbackRating as K, type FeedbackRefersTo as L, type FleetWorkspaceExecutorOptions as M, type KbGateResult as N, type ResearchOutputShape as O, type UiAuditorDelegationOutput as P, createDefaultCoderDelegate as Q, type ResearcherDelegate as R, type SiblingSandboxExecutorOptions as S, createFleetWorkspaceExecutor as T, type UiAuditorDelegate as U, createKbGate as V, createSiblingSandboxExecutor as W, type CoderWinnerSelection as a, type CreateKbGateOptions as b, type FleetHandle as c, type DelegationExecutor as d, type DelegateFeedbackArgs as e, type DelegationFeedbackSnapshot as f, type DelegationProfile as g, type DelegateResearchArgs as h, type DelegateUiAuditArgs as i, type DelegationStatus as j, type DelegationProgress as k, type DelegationResultPayload as l, type DelegationError as m, type DelegationStatusResult as n, type DelegationHistoryArgs as o, type DelegationHistoryEntry as p, type CoderDelegate as q, type DelegateCodeResult as r, type DelegateFeedbackResult as s, type ResearchSource as t, type DelegateResearchResult as u, type DelegateUiAuditResult as v, type DelegationHistoryResult as w, type DelegationStatusArgs as x, type CoderReview as y, type CreateDefaultCoderDelegateOptions as z };
@@ -1,10 +1,10 @@
1
1
  import { Scenario } from '@tangle-network/agent-eval/campaign';
2
- import { R as RunAnalystLoopOpts, a as RunAnalystLoopResult } from './types-p8dWBIXL.js';
3
- import { O as OptimizePromptOptions, a as OptimizePromptResult } from './optimize-prompt-D-urF2wW.js';
4
- import { T as TopologyPlanner, D as DynamicDecision } from './dynamic-B_7GgCwu.js';
5
- import { L as LoopSandboxClient, a as OutputAdapter, V as Validator, A as AgentRunSpec, b as LoopResult } from './types-Bcp071Jg.js';
6
- import { D as DelegateCodeArgs, C as CoderReviewer, a as CoderWinnerSelection, F as FactCandidate, b as CreateKbGateOptions } from './kb-gate-DTBum3vH.js';
7
- import { CoderOutput } from './profiles.js';
2
+ import { SelfImproveOptions, SelfImproveResult } from '@tangle-network/agent-eval/contract';
3
+ import { R as RunAnalystLoopOpts, a as RunAnalystLoopResult } from './types-BtRLF2U3.js';
4
+ import { D as DelegateCodeArgs, C as CoderReviewer, a as CoderWinnerSelection, F as FactCandidate, b as CreateKbGateOptions } from './kb-gate-D9GBocLN.js';
5
+ import { C as CoderOutput } from './coder-CczgMqFx.js';
6
+ import { b as LoopSandboxClient, O as OutputAdapter, V as Validator, A as AgentRunSpec, c as LoopResult } from './types-DdzkffAm.js';
7
+ import { T as TopologyPlanner, C as CreateDynamicDriverOptions, D as DynamicDecision } from './dynamic-BvllHV6M.js';
8
8
 
9
9
  /**
10
10
  * @experimental
@@ -19,7 +19,7 @@ import { CoderOutput } from './profiles.js';
19
19
  * review → code mode with a REQUIRED reviewer (the gate is the point)
20
20
  * research → research-in-a-loop with valid-only KB growth (createKbGate)
21
21
  * audit → analyze trace/run data → findings (runAnalystLoop, caller-wired)
22
- * self-improve → identity-gated prompt optimization (optimizePrompt, caller-wired)
22
+ * self-improve → closed-loop text/config optimization (selfImprove, held-out gated)
23
23
  * dynamic → agent-authored topology (runLoop + createDynamicDriver)
24
24
  *
25
25
  * It is intentionally a thin façade: the value is that EVERY product reuses the
@@ -91,7 +91,7 @@ declare function reviewLoopRunner(options: CoderLoopRunnerOptions & {
91
91
  /** @experimental Options for the default `dynamic` runner. */
92
92
  interface DynamicLoopRunnerOptions<Task, Output> {
93
93
  sandboxClient: LoopSandboxClient;
94
- /** The agent-authored topology planner (e.g. `createSandboxPlanner(...)`). */
94
+ /** The agent-authored topology planner (sync or async; an async planner is where an LLM call goes). */
95
95
  planner: TopologyPlanner<Task, Output>;
96
96
  task: Task;
97
97
  output: OutputAdapter<Output>;
@@ -101,6 +101,10 @@ interface DynamicLoopRunnerOptions<Task, Output> {
101
101
  agentRuns?: AgentRunSpec<Task>[];
102
102
  maxIterations?: number;
103
103
  maxFanout?: number;
104
+ /** Optional trace-analyst hook forwarded to the dynamic driver so the loop runs
105
+ * `f(trace, findings)` — see `CreateDynamicDriverOptions.analyze`. Caller-side
106
+ * seam to `runAnalystLoop`; keeps this runner analyst-free. */
107
+ analyze?: CreateDynamicDriverOptions<Task, Output>['analyze'];
104
108
  }
105
109
  /** @experimental `dynamic` mode — agent-authored topology over `runLoop`. */
106
110
  declare function dynamicLoopRunner<Task, Output>(o: DynamicLoopRunnerOptions<Task, Output>): DelegatedLoopRunner<LoopResult<Task, Output, DynamicDecision>>;
@@ -142,8 +146,8 @@ interface ResearchLoopRunnerOptions {
142
146
  * never silently dropped) so the caller audits vs retries.
143
147
  */
144
148
  declare function researchLoopRunner(o: ResearchLoopRunnerOptions): DelegatedLoopRunner<ResearchLoopResult>;
145
- /** @experimental `self-improve` mode — identity-gated prompt optimization. */
146
- declare function selfImproveLoopRunner<TScenario extends Scenario, TArtifact>(options: OptimizePromptOptions<TScenario, TArtifact>): DelegatedLoopRunner<OptimizePromptResult<TArtifact, TScenario>>;
149
+ /** @experimental `self-improve` mode — agent-eval's one-call closed loop (held-out gated). */
150
+ declare function selfImproveLoopRunner<TScenario extends Scenario, TArtifact>(options: SelfImproveOptions<TScenario, TArtifact>): DelegatedLoopRunner<SelfImproveResult<TScenario, TArtifact>>;
147
151
  /** @experimental `audit` mode — analyst loop over captured trace/run data. */
148
152
  declare function auditLoopRunner<TProposal = unknown, TEdit = unknown>(options: RunAnalystLoopOpts): DelegatedLoopRunner<RunAnalystLoopResult<TProposal, TEdit>>;
149
153
 
@@ -1,11 +1,13 @@
1
1
  #!/usr/bin/env node
2
- export { L as LoopRunnerCliArgs, f as LoopRunnerCliResult, p as parseLoopRunnerArgv, o as runLoopRunnerCli } from './loop-runner-bin-CVoCBmYk.js';
2
+ export { L as LoopRunnerCliArgs, f as LoopRunnerCliResult, p as parseLoopRunnerArgv, o as runLoopRunnerCli } from './loop-runner-bin-CPrCoKqC.js';
3
3
  import '@tangle-network/agent-eval/campaign';
4
- import './types-p8dWBIXL.js';
4
+ import '@tangle-network/agent-eval/contract';
5
+ import './types-BtRLF2U3.js';
5
6
  import '@tangle-network/agent-eval';
6
- import './optimize-prompt-D-urF2wW.js';
7
- import './dynamic-B_7GgCwu.js';
8
- import './types-Bcp071Jg.js';
7
+ import './kb-gate-D9GBocLN.js';
8
+ import './coder-CczgMqFx.js';
9
9
  import '@tangle-network/sandbox';
10
- import './kb-gate-DTBum3vH.js';
11
- import './profiles.js';
10
+ import './types-DdzkffAm.js';
11
+ import './runtime-hooks-C7JwKb9E.js';
12
+ import './substrate-CUgk7F7s.js';
13
+ import './dynamic-BvllHV6M.js';
@@ -2,15 +2,13 @@
2
2
  import {
3
3
  parseLoopRunnerArgv,
4
4
  runLoopRunnerCli
5
- } from "./chunk-C5HMTTNY.js";
6
- import "./chunk-XBUG326M.js";
7
- import "./chunk-VOX6Z3II.js";
5
+ } from "./chunk-NYN5RTLP.js";
8
6
  import "./chunk-FNMGYYSS.js";
9
- import "./chunk-MJDGCRAT.js";
10
- import "./chunk-EKBSQYZE.js";
11
- import "./chunk-3HMHSN22.js";
12
- import "./chunk-PY6NMZYX.js";
13
- import "./chunk-SQSCRJ7U.js";
7
+ import "./chunk-IJ6FGOPO.js";
8
+ import "./chunk-QR4UUC5P.js";
9
+ import "./chunk-FK53TXOP.js";
10
+ import "./chunk-KEWO4KI6.js";
11
+ import "./chunk-PRX45WE2.js";
14
12
  import "./chunk-DGUM43GV.js";
15
13
  export {
16
14
  parseLoopRunnerArgv,
package/dist/loops.d.ts CHANGED
@@ -1,373 +1,9 @@
1
- import { AgentProfile, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
2
1
  export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
3
- import { P as PlannerContext, T as TopologyPlanner } from './dynamic-B_7GgCwu.js';
4
- export { C as CreateDynamicDriverOptions, D as DynamicDecision, a as TopologyMove, c as createDynamicDriver, s as summarizeHistory } from './dynamic-B_7GgCwu.js';
5
- import { D as Driver, I as Iteration, L as LoopSandboxClient, A as AgentRunSpec, a as OutputAdapter, V as Validator, E as ExecCtx, d as LoopWinner, b as LoopResult, R as RuntimeStreamEvent } from './types-Bcp071Jg.js';
6
- export { e as LoopDecisionPayload, f as LoopEndedPayload, g as LoopIterationDispatchPayload, h as LoopIterationEndedPayload, i as LoopIterationStartedPayload, j as LoopPlanDescription, k as LoopPlanPayload, l as LoopSandboxPlacement, m as LoopStartedPayload, n as LoopTokenUsage, c as LoopTraceEmitter, o as LoopTraceEvent, p as ValidationCtx } from './types-Bcp071Jg.js';
7
- import { DefaultVerdict, AgentProfile as AgentProfile$1 } from '@tangle-network/agent-eval';
2
+ export { AssertTraceDerivedFindings, BudgetPool, BudgetReadout, CheckpointCapableBox, CliSeam, CombinatorShape, Corpus, CorpusFilter, CorpusRecord, CreateScopeAnalystOptions, CriuCapableClient, DefinePersona, DefinePersonaInput, EqualKArm, EqualKOnCost, EqualKOnCostOptions, EqualKVerdict, Fanout, FanoutOptions, FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, FlatWidenGate, ForkCapableBox, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, LoopDispatchOptions, LoopOptionsForDispatch, LoopShape, LoopUntil, LoopUntilSpec, LoopUntilState, Outcome, Panel, PanelJudge, PanelSpec, PanelVerdict, Persona, PersonaContext, PersonaExecutors, Pipeline, PipelineStage, RenderCorpusToInstructions, RenderCorpusToInstructionsOptions, ReservationTicket, RouterSeam, RunPersonified, RunPersonifiedOptions, SandboxCapabilities, SandboxLineage, SandboxLineageHandle, SandboxSeam, ScopeAnalyst, ScopeAnalyzeInput, ScopeWidenGate, SessionCapableBox, ShapeBudget, ShapeContext, ShapeRegistry, SteerContext, TrajectoryNode, TrajectoryReport, TrajectoryReportFn, TrajectoryReportOptions, UsageSink, Verify, VerifySpec, Widen, WidenDecision, WidenLineage, WidenSpec, acquireSandbox, assertTraceDerivedFindings, buildSteerContext, builtinShapes, cliExecutor, contentAddress, createBudgetPool, createExecutorRegistry, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, definePersona, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, panel, pipeline, probeSandboxCapabilities, registerShape, renderCorpusToInstructions, replaySpawnTree, reportLoopUsage, routerInlineExecutor, runPersonified, sandboxExecutor, settledToIteration, spendFromUsageEvents, trajectoryReport, verify, widen } from './runtime.js';
3
+ export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDynamicDriverOptions, D as DynamicDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDynamicDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './dynamic-BvllHV6M.js';
4
+ export { R as RunLoopOptions, c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop--hSoIknW.js';
5
+ export { A as Agent, d as AgentSpec, B as Budget, i as ExecutorContext, E as ExecutorRegistry, H as Handle, j as LeafExecutor, L as LeafExecutorFactory, k as LeafResult, N as NodeId, l as NodeSnapshot, m as NodeStatus, n as Restart, R as ResultBlobStore, e as RootHandle, o as RootSignal, p as Runtime, S as Scope, c as Settled, b as SpawnEvent, a as SpawnJournal, q as SpawnOpts, g as Spend, f as SupervisedResult, h as Supervisor, r as SupervisorOpts, T as TreeView, U as UsageEvent, W as WidenGate } from './types-1HbsFa7H.js';
6
+ export { A as AgentRunSpec, D as Driver, E as ExecCtx, I as Iteration, h as LoopDecisionPayload, i as LoopEndedPayload, j as LoopIterationDispatchPayload, k as LoopIterationEndedPayload, l as LoopIterationStartedPayload, a as LoopLineageOptions, m as LoopPlanDescription, n as LoopPlanPayload, c as LoopResult, b as LoopSandboxClient, g as LoopSandboxPlacement, o as LoopStartedPayload, p as LoopTeardownFailedPayload, d as LoopTokenUsage, f as LoopTraceEmitter, q as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, r as ValidationCtx, V as Validator } from './types-DdzkffAm.js';
8
7
  export { DefaultVerdict } from '@tangle-network/agent-eval';
9
- import { Scenario, DispatchFn, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
10
-
11
- /**
12
- * @experimental
13
- *
14
- * FanoutVote driver — N parallel attempts in iteration 0, pick the highest-
15
- * scoring valid output. No second iteration: the topology is "spawn N, score,
16
- * pick winner". The kernel handles heterogeneous fanout via the
17
- * `agentRuns: AgentRunSpec[]` form on `runLoop`.
18
- */
19
-
20
- type FanoutVoteDecision = 'pick-winner' | 'fail';
21
- /** @experimental */
22
- interface FanoutVoteScored<Task, Output> {
23
- task: Task;
24
- output: Output;
25
- verdict?: DefaultVerdict;
26
- iterationIndex: number;
27
- agentRunName: string;
28
- }
29
- /** @experimental */
30
- interface CreateFanoutVoteDriverOptions<Task, Output> {
31
- /** Number of parallel attempts. Must be >= 1. */
32
- n: number;
33
- /**
34
- * Pick the winner from the scored set. Default: highest `verdict.score`
35
- * among valid outputs (ties broken by smallest iteration index). When
36
- * no valid outputs exist, returns `undefined` and `decide()` resolves
37
- * to `'fail'`. The kernel still records winners structurally — this
38
- * selector only feeds `decide()`'s pass/fail signal.
39
- */
40
- selector?: (scored: FanoutVoteScored<Task, Output>[]) => FanoutVoteScored<Task, Output> | undefined;
41
- /** Stable identifier surfaced in trace events. Default `'fanout-vote'`. */
42
- name?: string;
43
- }
44
- /** @experimental */
45
- declare function createFanoutVoteDriver<Task, Output>(options: CreateFanoutVoteDriverOptions<Task, Output>): Driver<Task, Output, FanoutVoteDecision>;
46
- /**
47
- * Test helper: surface the per-iteration scored view a custom `selector`
48
- * would receive. Exposed so consumers writing a custom selector can test it
49
- * standalone without driving the full kernel.
50
- *
51
- * @experimental
52
- */
53
- declare function scoreFanoutVoteIterations<Task, Output>(iterations: ReadonlyArray<Iteration<Task, Output>>): FanoutVoteScored<Task, Output>[];
54
-
55
- /**
56
- * @experimental
57
- *
58
- * Refine driver — single task per iteration, validator-gated.
59
- *
60
- * `plan` returns `[task]` (possibly transformed via `refineTask`) until the
61
- * prior verdict is valid OR the local cap is hit, then `[]`.
62
- * `decide` returns `'stop'` once the latest verdict is valid OR the cap is
63
- * reached. The kernel's `maxIterations` is an orthogonal safety cap;
64
- * whichever is lower wins.
65
- */
66
-
67
- type RefineDecision = 'continue' | 'stop';
68
- /** @experimental */
69
- interface CreateRefineDriverOptions<Task> {
70
- /** Hard cap on iterations. Default 5. */
71
- maxIterations?: number;
72
- /**
73
- * Optional task transform applied each round based on the prior verdict.
74
- * When omitted, the same task is replayed and the agent is expected to
75
- * inspect the sandbox session state for prior attempts.
76
- */
77
- refineTask?: (task: Task, prior: DefaultVerdict) => Task;
78
- /** Stable identifier surfaced in trace events. Default `'refine'`. */
79
- name?: string;
80
- }
81
- /** @experimental */
82
- declare function createRefineDriver<Task, Output>(options?: CreateRefineDriverOptions<Task>): Driver<Task, Output, RefineDecision>;
83
- /**
84
- * Test helper: select the last-valid iteration (or the last attempt if
85
- * none passed). Mirrors the kernel's default selector ordering for refine
86
- * topologies — the most recent successful attempt wins.
87
- *
88
- * @experimental
89
- */
90
- declare function refineWinnerIndex<Task, Output>(iterations: ReadonlyArray<Iteration<Task, Output>>): number | undefined;
91
-
92
- /**
93
- * @experimental
94
- *
95
- * `createSandboxPlanner` — wire the dynamic driver's `TopologyPlanner` to a
96
- * real agent. Each round it spins a sandbox on `profile`, streams a prompt that
97
- * carries the history summary, and decodes the agent's chosen `TopologyMove`
98
- * from a JSON envelope it emits. This is the "agent authors its own loop
99
- * topology" path: the planner profile can be any harness (claude-code, codex,
100
- * opencode, pi) — its only job is to read what happened and emit the next move.
101
- *
102
- * The planner profile is deliberately distinct from the worker `agentRuns`: a
103
- * cheap fast model can steer topology while expensive workers do the labor, and
104
- * the planner never names which harness runs a branch — the kernel's
105
- * `agentRuns` round-robin decides that.
106
- *
107
- * Envelope contract the agent must emit (fenced ```json or a structured
108
- * `result`/`final` event payload):
109
- * { "kind": "refine" | "fanout" | "stop",
110
- * "tasks"?: [ <task>, ... ], // decoded via `decodeTask`
111
- * "n"?: number, // fanout shorthand: N copies of the root task
112
- * "rationale"?: string }
113
- *
114
- * A missing / unparseable / unknown-kind envelope throws `PlannerError` — the
115
- * loop never silently runs a topology the agent did not choose.
116
- */
117
-
118
- /** Raw, pre-decode envelope an agent emits to choose the next move. */
119
- interface TopologyMoveEnvelope {
120
- kind: string;
121
- tasks?: unknown[];
122
- n?: number;
123
- rationale?: string;
124
- }
125
- /** @experimental */
126
- interface CreateSandboxPlannerOptions<Task, Output> {
127
- /** Sandbox client — the planner calls `.create()` once per round. */
128
- client: LoopSandboxClient;
129
- /** The planner agent. Steers topology; does not run the work. */
130
- profile: AgentProfile;
131
- /**
132
- * Decode one raw task from the envelope's `tasks[]` into a domain `Task`.
133
- * Required because `Task` is opaque to this module — only the caller knows
134
- * its shape. Throw to reject a malformed task; the error surfaces as a
135
- * `PlannerError`.
136
- */
137
- decodeTask: (raw: unknown, ctx: PlannerContext<Task, Output>) => Task;
138
- /** Override the default prompt (history summary + envelope contract). */
139
- buildPrompt?: (ctx: PlannerContext<Task, Output>) => string;
140
- /** Override envelope extraction from the event stream. */
141
- parseEnvelope?: (events: SandboxEvent[]) => TopologyMoveEnvelope | undefined;
142
- /** Sandbox overrides for the planner sandbox (timeouts, env, etc.). */
143
- sandboxOverrides?: AgentRunSpec<Task>['sandboxOverrides'];
144
- /** Cancellation for the planner's own LLM call. */
145
- signal?: AbortSignal;
146
- }
147
- /** @experimental */
148
- declare function createSandboxPlanner<Task, Output>(opts: CreateSandboxPlannerOptions<Task, Output>): TopologyPlanner<Task, Output>;
149
-
150
- /**
151
- * @experimental
152
- *
153
- * `runLoop` — the topology-agnostic kernel built atop the sandbox SDK.
154
- *
155
- * Each iteration:
156
- * 1. `driver.plan(task, history)` → N tasks (1 = refine, N = fanout, 0 = stop)
157
- * 2. For each task (parallel, bounded by `maxConcurrency`):
158
- * a. round-robin an `AgentRunSpec` from `agentRuns`
159
- * b. `sandboxClient.create({ backend: { profile }, ...overrides })`
160
- * c. emit `loop.iteration.dispatch` with the placement
161
- * (`{ sibling, sandboxId }` or `{ fleet, fleetId, machineId, sandboxId }`)
162
- * d. iterate `box.streamPrompt(taskToPrompt(task))` and collect events
163
- * 3. `output.parse(events)` → typed `Output`
164
- * 4. `validator?.validate(output)` → `DefaultVerdict`
165
- * 5. Append `Iteration` to history; emit `loop.iteration.ended`
166
- * 6. `driver.decide(history)` → if terminal, return result + winner
167
- *
168
- * The kernel owns: iteration accounting, per-iteration timing, error
169
- * capture, abort propagation, concurrency cap, cost aggregation, and trace
170
- * emission. The kernel does NOT own: what the agent runs (sandbox SDK +
171
- * profile), how outputs are decoded (output adapter), how outputs are
172
- * scored (validator), or topology (driver).
173
- */
174
-
175
- /** @experimental */
176
- interface RunLoopOptions<Task, Output, Decision> {
177
- driver: Driver<Task, Output, Decision>;
178
- /**
179
- * Single agent spec — every iteration uses this profile. Mutually
180
- * exclusive with `agentRuns`.
181
- */
182
- agentRun?: AgentRunSpec<Task>;
183
- /**
184
- * Multiple specs for heterogeneous fanout. The kernel round-robins
185
- * through them when the driver plans N tasks. Mutually exclusive with
186
- * `agentRun`.
187
- */
188
- agentRuns?: AgentRunSpec<Task>[];
189
- output: OutputAdapter<Output>;
190
- validator?: Validator<Output>;
191
- task: Task;
192
- ctx: ExecCtx;
193
- /** Default 10. Hard cap on total iterations across all `plan()` rounds. */
194
- maxIterations?: number;
195
- /** Default 4. In-flight worker cap within a single `plan()` batch. */
196
- maxConcurrency?: number;
197
- /**
198
- * Pre-allocated id for trace correlation. Default = `loop-${random}`.
199
- * Surfaces as `runId` on every emitted `LoopTraceEvent`.
200
- */
201
- runId?: string;
202
- /**
203
- * Clock override; default `Date.now`. Deterministic tests pass a
204
- * monotonic counter to stabilize iteration timing fields.
205
- */
206
- now?: () => number;
207
- /**
208
- * Override the default winner selector (highest-valid-score, ties broken
209
- * by earliest iteration).
210
- */
211
- selectWinner?: (iterations: Iteration<Task, Output>[]) => LoopWinner<Task, Output> | undefined;
212
- }
213
- /** @experimental */
214
- declare function runLoop<Task, Output, Decision>(options: RunLoopOptions<Task, Output, Decision>): Promise<LoopResult<Task, Output, Decision>>;
215
- /**
216
- * Instantiate a sandbox for an `AgentRunSpec`: sets `backend.profile` to the
217
- * spec's profile (inferring the backend type when the spec doesn't override
218
- * it) and merges `sandboxOverrides`. Shared by the loop kernel and the
219
- * `AgentRuntime.act` sandbox bridge so both boot the sandbox identically.
220
- */
221
- declare function createSandboxForSpec<Task>(client: LoopSandboxClient, spec: AgentRunSpec<Task>, signal: AbortSignal): Promise<SandboxInstance>;
222
-
223
- /**
224
- * `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
225
- *
226
- * Without this adapter a consumer wiring `runLoop` into `runProfileMatrix` /
227
- * `runCampaign` has to, by hand, every time: (a) build an `ExecCtx` with a
228
- * sandbox client, (b) adapt the campaign `DispatchContext.trace` into a
229
- * `LoopTraceEmitter` (or lose all loop trace correlation), and (c) remember to
230
- * forward the loop's cost + tokens via `ctx.cost` (forgetting it yields a
231
- * `{0,0}` cell the backend-integrity guard reads as a stub). Three foot-guns,
232
- * the third silent. The fleet's products skipped (c) and fell back to a
233
- * `workerRecords[]` side-channel — the exact anti-pattern the substrate exists
234
- * to kill.
235
- *
236
- * `loopDispatch` collapses all three into one typed call:
237
- *
238
- * const dispatch = loopDispatch({
239
- * sandboxClient,
240
- * toLoopOptions: (scenario, profile) => ({ driver, agentRun, output, validator, task }),
241
- * })
242
- * await runProfileMatrix({ profiles, scenarios, dispatch, judges, commitSha })
243
- *
244
- * Usage is reported automatically; trace events are forwarded automatically;
245
- * the ctx is built automatically. The seam becomes impossible to mis-wire.
246
- *
247
- * Typed structurally against the campaign `DispatchContext` (imported type-only
248
- * from `@tangle-network/agent-eval/campaign`) — a downward dependency, never an
249
- * inversion.
250
- */
251
-
252
- /** runLoop options minus the `ctx` (loopDispatch builds the ctx). */
253
- type LoopOptionsForDispatch<Task, Output, Decision> = Omit<RunLoopOptions<Task, Output, Decision>, 'ctx'>;
254
- interface LoopDispatchOptions<Task, Output, Decision, TScenario extends Scenario, TArtifact> {
255
- /** Sandbox client used for every cell's `runLoop`. Supplied once. */
256
- sandboxClient: LoopSandboxClient;
257
- /** Build the per-cell runLoop options from the scenario (+ profile, when
258
- * used with `runProfileMatrix`). */
259
- toLoopOptions: (scenario: TScenario, profile: AgentProfile$1) => LoopOptionsForDispatch<Task, Output, Decision>;
260
- /** Map the finished loop to the artifact the judges score. Default:
261
- * `result.winner?.output`. A loop with no winner yields `undefined` (judges
262
- * skip the cell) — but the loop's token usage is STILL reported, so the
263
- * integrity guard sees real activity. */
264
- toArtifact?: (result: LoopResult<Task, Output, Decision>) => TArtifact;
265
- /** Forward `loop.*` trace events into the campaign's scoped trace so loop
266
- * spans correlate with the cell. Default true. */
267
- forwardTrace?: boolean;
268
- /** Cost-meter source label for the loop's spend. Default `'loop'`. */
269
- costSource?: string;
270
- }
271
- /**
272
- * Adapter for `runProfileMatrix` (profile is an axis). Returns a
273
- * `ProfileDispatchFn` that runs `runLoop` per (profile, scenario) cell and
274
- * reports usage automatically.
275
- */
276
- declare function loopDispatch<Task, Output, Decision, TScenario extends Scenario, TArtifact>(opts: LoopDispatchOptions<Task, Output, Decision, TScenario, TArtifact>): ProfileDispatchFn<TScenario, TArtifact>;
277
- /**
278
- * Adapter for `runCampaign` (no profile axis). `toLoopOptions` receives only
279
- * the scenario; the `profile` passed to the shared core is a stable sentinel
280
- * so a single `runLoop` config is reused across cells.
281
- */
282
- declare function loopCampaignDispatch<Task, Output, Decision, TScenario extends Scenario, TArtifact>(opts: Omit<LoopDispatchOptions<Task, Output, Decision, TScenario, TArtifact>, 'toLoopOptions'> & {
283
- toLoopOptions: (scenario: TScenario) => LoopOptionsForDispatch<Task, Output, Decision>;
284
- }): DispatchFn<TScenario, TArtifact>;
285
-
286
- /**
287
- * Bridge a finished `runLoop` into an agent-eval campaign / profile-matrix
288
- * dispatch.
289
- *
290
- * `runProfileMatrix` (and `runCampaign`) run the backend-integrity guard over
291
- * the token usage a dispatch reports through `ctx.cost`. A dispatch that wraps
292
- * `runLoop` must forward the loop's cost AND token usage, or the guard reads
293
- * the run as a stub and throws. `reportLoopUsage` is that one line:
294
- *
295
- * const dispatch: ProfileDispatchFn<S, A> = async (profile, scenario, ctx) => {
296
- * const result = await runLoop({ ...optsFor(profile, scenario), ctx: loopCtx })
297
- * reportLoopUsage(ctx, result)
298
- * return result.winner?.output as A
299
- * }
300
- *
301
- * Typed structurally against the campaign `DispatchContext.cost` so this module
302
- * stays free of an agent-eval import — it works with any cost meter exposing
303
- * `observe` + `observeTokens`.
304
- */
305
-
306
- /** The slice of an agent-eval campaign `DispatchContext.cost` this needs. */
307
- interface UsageSink {
308
- observe(amountUsd: number, source: string): void;
309
- observeTokens(usage: {
310
- input: number;
311
- output: number;
312
- }): void;
313
- }
314
- /**
315
- * Forward a `LoopResult`'s aggregated cost + token usage into a campaign cost
316
- * meter so the backend-integrity guard sees real LLM activity. `source`
317
- * defaults to `'loop'`.
318
- */
319
- declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
320
-
321
- /**
322
- * Sandbox-event → runtime-event mapping.
323
- *
324
- * The sandbox SDK emits a polymorphic `SandboxEvent = { type, data, id? }`
325
- * whose `type` vocabulary is backend-determined (opencode, etc.) rather than
326
- * enumerated by the SDK. Two consumers project it:
327
- * - the loop kernel's cost ledger (`extractLlmCallEvent`) — sums usage off
328
- * every cost-bearing event, regardless of stream shape;
329
- * - the `AgentRuntime.act` streaming contract (`mapSandboxEvent`) — projects
330
- * incremental events to the `RuntimeStreamEvent` chat-UX vocabulary.
331
- *
332
- * Both live here so the empirically-observed `type` vocabulary has one home.
333
- */
334
-
335
- /**
336
- * Extract a `RuntimeStreamEvent`-shaped `llm_call` from a sandbox event when
337
- * the event carries usage/cost data. Returns `undefined` for non-cost events
338
- * so the kernel can iterate the full stream without branching.
339
- *
340
- * Canonical cost-carrying types observed in the wild:
341
- * - `llm_call` — `data: { model, tokensIn, tokensOut, costUsd, ... }`
342
- * - `message.completed` / `result` — `data: { usage: { inputTokens,
343
- * outputTokens, totalCostUsd? } }`
344
- * - `cost.usage` / `usage` — same shape under a dedicated type
345
- *
346
- * Numeric coercion is strict: `Number.isFinite` gates every accumulator write
347
- * so a sentinel `NaN` from a misbehaving backend cannot poison the ledger.
348
- */
349
- declare function extractLlmCallEvent(event: SandboxEvent, agentRunName: string): (RuntimeStreamEvent & {
350
- type: 'llm_call';
351
- }) | undefined;
352
- /**
353
- * Project one `SandboxEvent` onto the `RuntimeStreamEvent` chat-UX vocabulary,
354
- * for runtimes that bridge a sandbox `streamPrompt` into the
355
- * `AgentRuntime.act` streaming contract. Returns `undefined` for events that
356
- * have no faithful projection — the raw stream is preserved separately for the
357
- * `OutputAdapter`, so an unmapped event never loses data.
358
- *
359
- * Mapped (the task-optional incremental variants — no synthesized task
360
- * lifecycle, no guessed tool-part shapes):
361
- * - `message.part.updated` text part → `text_delta`
362
- * - `message.part.updated` reasoning/thinking part → `reasoning_delta`
363
- * - cost-bearing events → `llm_call` (shared with the ledger extractor)
364
- *
365
- * The opencode backend emits incremental text as
366
- * `{ type: 'message.part.updated', data: { part: { type, text }, delta } }`;
367
- * `delta` is the increment, `part.text` the running accumulation.
368
- */
369
- declare function mapSandboxEvent(event: SandboxEvent, opts?: {
370
- agentRunName?: string;
371
- }): RuntimeStreamEvent | undefined;
372
-
373
- export { AgentRunSpec, type CreateFanoutVoteDriverOptions, type CreateRefineDriverOptions, type CreateSandboxPlannerOptions, Driver, ExecCtx, type FanoutVoteDecision, type FanoutVoteScored, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, LoopWinner, OutputAdapter, PlannerContext, type RefineDecision, type RunLoopOptions, type TopologyMoveEnvelope, TopologyPlanner, type UsageSink, Validator, createFanoutVoteDriver, createRefineDriver, createSandboxForSpec, createSandboxPlanner, extractLlmCallEvent, loopCampaignDispatch, loopDispatch, mapSandboxEvent, refineWinnerIndex, reportLoopUsage, runLoop, scoreFanoutVoteIterations };
8
+ import '@tangle-network/agent-eval/campaign';
9
+ import './runtime-hooks-C7JwKb9E.js';