@tangle-network/agent-runtime 0.43.0 → 0.45.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -202
- package/dist/agent.d.ts +5 -4
- package/dist/agent.js +5 -7
- package/dist/agent.js.map +1 -1
- package/dist/analyst-loop.d.ts +65 -4
- package/dist/analyst-loop.js +6 -1
- package/dist/audit.d.ts +93 -0
- package/dist/audit.js +312 -0
- package/dist/audit.js.map +1 -0
- package/dist/chunk-4B6U4CVQ.js +15 -0
- package/dist/chunk-4B6U4CVQ.js.map +1 -0
- package/dist/chunk-FK53TXOP.js +603 -0
- package/dist/chunk-FK53TXOP.js.map +1 -0
- package/dist/{chunk-MJDGCRAT.js → chunk-IJ6FGOPO.js} +5 -5
- package/dist/chunk-IJ6FGOPO.js.map +1 -0
- package/dist/{chunk-HVYOHJHK.js → chunk-IJGS6J7X.js} +2 -2
- package/dist/chunk-IJGS6J7X.js.map +1 -0
- package/dist/chunk-KEWO4KI6.js +3599 -0
- package/dist/chunk-KEWO4KI6.js.map +1 -0
- package/dist/{chunk-NRZOXCJK.js → chunk-KSMX62JF.js} +2 -2
- package/dist/{chunk-C5HMTTNY.js → chunk-NYN5RTLP.js} +13 -12
- package/dist/chunk-NYN5RTLP.js.map +1 -0
- package/dist/chunk-PRX45WE2.js +264 -0
- package/dist/chunk-PRX45WE2.js.map +1 -0
- package/dist/{chunk-3HMHSN22.js → chunk-QR4UUC5P.js} +6 -6
- package/dist/chunk-QR4UUC5P.js.map +1 -0
- package/dist/chunk-WIR4HOOJ.js +27 -0
- package/dist/chunk-WIR4HOOJ.js.map +1 -0
- package/dist/{chunk-MNCB4SJ5.js → chunk-Z2QXVBA6.js} +296 -8
- package/dist/chunk-Z2QXVBA6.js.map +1 -0
- package/dist/coder-CczgMqFx.d.ts +114 -0
- package/dist/dynamic-BvllHV6M.d.ts +221 -0
- package/dist/{improvement-adapter-BC4HhuAR.d.ts → improvement-adapter-CWegd3vw.d.ts} +1 -1
- package/dist/improvement.d.ts +2 -3
- package/dist/improvement.js +0 -5
- package/dist/improvement.js.map +1 -1
- package/dist/index.d.ts +123 -10
- package/dist/index.js +407 -19
- package/dist/index.js.map +1 -1
- package/dist/{kb-gate-DTBum3vH.d.ts → kb-gate-D9GBocLN.d.ts} +82 -5
- package/dist/{loop-runner-bin-CVoCBmYk.d.ts → loop-runner-bin-CPrCoKqC.d.ts} +14 -10
- package/dist/loop-runner-bin.d.ts +9 -7
- package/dist/loop-runner-bin.js +6 -8
- package/dist/loops.d.ts +7 -371
- package/dist/loops.js +96 -19
- package/dist/mcp/bin.js +7 -7
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +284 -11
- package/dist/mcp/index.js +341 -9
- package/dist/mcp/index.js.map +1 -1
- package/dist/{otel-export-BzvF1Ela.d.ts → otel-export-Dy2DyUCU.d.ts} +1 -1
- package/dist/profiles.d.ts +385 -86
- package/dist/profiles.js +549 -4
- package/dist/profiles.js.map +1 -1
- package/dist/run-loop--hSoIknW.d.ts +112 -0
- package/dist/runtime-hooks-C7JwKb9E.d.ts +70 -0
- package/dist/runtime.d.ts +1860 -0
- package/dist/runtime.js +114 -0
- package/dist/runtime.js.map +1 -0
- package/dist/substrate-CUgk7F7s.d.ts +77 -0
- package/dist/topology.d.ts +73 -0
- package/dist/topology.js +111 -0
- package/dist/topology.js.map +1 -0
- package/dist/types-1HbsFa7H.d.ts +438 -0
- package/dist/{types-p8dWBIXL.d.ts → types-BtRLF2U3.d.ts} +1 -1
- package/dist/{types-Bcp071Jg.d.ts → types-DdzkffAm.d.ts} +95 -1
- package/dist/workflow.d.ts +551 -0
- package/dist/workflow.js +1778 -0
- package/dist/workflow.js.map +1 -0
- package/package.json +53 -16
- package/skills/agent-runtime-adoption/SKILL.md +29 -26
- package/dist/chunk-3HMHSN22.js.map +0 -1
- package/dist/chunk-C5HMTTNY.js.map +0 -1
- package/dist/chunk-EKBSQYZE.js +0 -813
- package/dist/chunk-EKBSQYZE.js.map +0 -1
- package/dist/chunk-HVYOHJHK.js.map +0 -1
- package/dist/chunk-MJDGCRAT.js.map +0 -1
- package/dist/chunk-MNCB4SJ5.js.map +0 -1
- package/dist/chunk-PY6NMZYX.js +0 -52
- package/dist/chunk-PY6NMZYX.js.map +0 -1
- package/dist/chunk-SQSCRJ7U.js +0 -65
- package/dist/chunk-SQSCRJ7U.js.map +0 -1
- package/dist/chunk-VOX6Z3II.js +0 -90
- package/dist/chunk-VOX6Z3II.js.map +0 -1
- package/dist/chunk-XBUG326M.js +0 -261
- package/dist/chunk-XBUG326M.js.map +0 -1
- package/dist/dynamic-B_7GgCwu.d.ts +0 -108
- package/dist/optimize-prompt-D-urF2wW.d.ts +0 -129
- /package/dist/{chunk-NRZOXCJK.js.map → chunk-KSMX62JF.js.map} +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { CoderOutput, CoderTask } from './
|
|
2
|
-
import {
|
|
1
|
+
import { C as CoderOutput, a as CoderTask } from './coder-CczgMqFx.js';
|
|
2
|
+
import { b as LoopSandboxClient, f as LoopTraceEmitter } from './types-DdzkffAm.js';
|
|
3
3
|
import { SandboxInstance } from '@tangle-network/sandbox';
|
|
4
|
+
import { a as UiLens, U as UiFinding } from './substrate-CUgk7F7s.js';
|
|
4
5
|
|
|
5
6
|
/**
|
|
6
7
|
* @experimental
|
|
@@ -102,7 +103,7 @@ declare function createFleetWorkspaceExecutor(options: FleetWorkspaceExecutorOpt
|
|
|
102
103
|
*/
|
|
103
104
|
|
|
104
105
|
/** @experimental */
|
|
105
|
-
type DelegationProfile = 'coder' | 'researcher';
|
|
106
|
+
type DelegationProfile = 'coder' | 'researcher' | 'ui-auditor';
|
|
106
107
|
/** @experimental */
|
|
107
108
|
type DelegationStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
|
|
108
109
|
/**
|
|
@@ -223,7 +224,73 @@ type DelegationResultPayload = {
|
|
|
223
224
|
} | {
|
|
224
225
|
profile: 'researcher';
|
|
225
226
|
output: ResearchOutputShape;
|
|
227
|
+
} | {
|
|
228
|
+
profile: 'ui-auditor';
|
|
229
|
+
output: UiAuditorDelegationOutput;
|
|
226
230
|
};
|
|
231
|
+
/**
|
|
232
|
+
* Wire-shape of a completed UI-audit delegation. The `findings` array
|
|
233
|
+
* contains every finding persisted to the workspace during the run,
|
|
234
|
+
* already enriched with `id` and `createdAt` by the writer. `workspaceDir`
|
|
235
|
+
* is the absolute path to the workspace; `indexFile` is the workspace-
|
|
236
|
+
* relative path to the regenerated index.md.
|
|
237
|
+
*
|
|
238
|
+
* @experimental
|
|
239
|
+
*/
|
|
240
|
+
interface UiAuditorDelegationOutput {
|
|
241
|
+
workspaceDir: string;
|
|
242
|
+
indexFile: string;
|
|
243
|
+
findings: UiFinding[];
|
|
244
|
+
/** Total iterations the loop ran for this delegation. */
|
|
245
|
+
iterations: number;
|
|
246
|
+
}
|
|
247
|
+
/** @experimental */
|
|
248
|
+
type UiAuditLensFilter = readonly UiLens[];
|
|
249
|
+
/** Optional per-route capture spec the agent surfaces over the wire. */
|
|
250
|
+
interface DelegateUiAuditRoute {
|
|
251
|
+
/** Stable route name (used in screenshot filenames + finding metadata). */
|
|
252
|
+
name: string;
|
|
253
|
+
/** Fully-qualified URL. */
|
|
254
|
+
url: string;
|
|
255
|
+
/** Viewports to capture at. Defaults to `[{ width: 1280, height: 800 }]`. */
|
|
256
|
+
viewports?: readonly {
|
|
257
|
+
width: number;
|
|
258
|
+
height: number;
|
|
259
|
+
}[];
|
|
260
|
+
/** Default false. Full-page captures for the broad lenses. */
|
|
261
|
+
fullPage?: boolean;
|
|
262
|
+
/** Selector to wait for before capture. */
|
|
263
|
+
waitFor?: string;
|
|
264
|
+
}
|
|
265
|
+
/** @experimental */
|
|
266
|
+
interface DelegateUiAuditConfig {
|
|
267
|
+
/**
|
|
268
|
+
* Lenses to iterate. Default: every lens except `'other'`. Order is
|
|
269
|
+
* preserved — the driver iterates lens-by-lens.
|
|
270
|
+
*/
|
|
271
|
+
lenses?: UiAuditLensFilter;
|
|
272
|
+
/** Maximum total iterations across all (lens × route) pairs. Default 33 (11 lenses × 3 routes). */
|
|
273
|
+
maxIterations?: number;
|
|
274
|
+
/** Maximum concurrent iterations within a single plan() round. Default 2. */
|
|
275
|
+
maxConcurrency?: number;
|
|
276
|
+
/** Free-form product context surfaced to the judge. */
|
|
277
|
+
productContext?: string;
|
|
278
|
+
}
|
|
279
|
+
/** @experimental */
|
|
280
|
+
interface DelegateUiAuditArgs {
|
|
281
|
+
/** Workspace root for the audit (absolute path). */
|
|
282
|
+
workspaceDir: string;
|
|
283
|
+
/** Routes to audit. Must be non-empty. */
|
|
284
|
+
routes: readonly DelegateUiAuditRoute[];
|
|
285
|
+
/** Multi-tenant scope. */
|
|
286
|
+
namespace?: string;
|
|
287
|
+
config?: DelegateUiAuditConfig;
|
|
288
|
+
}
|
|
289
|
+
/** @experimental */
|
|
290
|
+
interface DelegateUiAuditResult {
|
|
291
|
+
taskId: string;
|
|
292
|
+
estimatedDurationMs?: number;
|
|
293
|
+
}
|
|
227
294
|
/**
|
|
228
295
|
* Loose shape of a research output over the wire — the substrate cannot
|
|
229
296
|
* import the `ResearchOutput` type from agent-knowledge without inducing
|
|
@@ -274,7 +341,7 @@ interface DelegationHistoryEntry {
|
|
|
274
341
|
taskId: string;
|
|
275
342
|
profile: DelegationProfile;
|
|
276
343
|
namespace?: string;
|
|
277
|
-
args: DelegateCodeArgs | DelegateResearchArgs;
|
|
344
|
+
args: DelegateCodeArgs | DelegateResearchArgs | DelegateUiAuditArgs;
|
|
278
345
|
status: DelegationStatus;
|
|
279
346
|
feedback?: DelegationFeedbackSnapshot[];
|
|
280
347
|
costUsd?: number;
|
|
@@ -295,6 +362,16 @@ interface DelegateRunCtx {
|
|
|
295
362
|
type CoderDelegate = (args: DelegateCodeArgs, ctx: DelegateRunCtx) => Promise<CoderOutput>;
|
|
296
363
|
/** @experimental */
|
|
297
364
|
type ResearcherDelegate = (args: DelegateResearchArgs, ctx: DelegateRunCtx) => Promise<ResearchOutputShape>;
|
|
365
|
+
/**
|
|
366
|
+
* UI-auditor delegate — fully consumer-injected. agent-runtime ships no
|
|
367
|
+
* default factory because the inputs are workspace path + judge function
|
|
368
|
+
* + (optionally) a `LoopSandboxClient`, and the judge is the consumer's
|
|
369
|
+
* model seam. See `createInProcessUiAuditClient` + `uiAuditorProfile` in
|
|
370
|
+
* `@tangle-network/agent-runtime/profiles` for the canonical wiring.
|
|
371
|
+
*
|
|
372
|
+
* @experimental
|
|
373
|
+
*/
|
|
374
|
+
type UiAuditorDelegate = (args: DelegateUiAuditArgs, ctx: DelegateRunCtx) => Promise<UiAuditorDelegationOutput>;
|
|
298
375
|
/** @experimental Structured review verdict over a coder candidate. */
|
|
299
376
|
interface CoderReview {
|
|
300
377
|
/** Gate: only approved candidates are eligible to win. */
|
|
@@ -443,4 +520,4 @@ interface CreateKbGateOptions {
|
|
|
443
520
|
*/
|
|
444
521
|
declare function createKbGate(options?: CreateKbGateOptions): (candidate: FactCandidate) => Promise<KbGateResult>;
|
|
445
522
|
|
|
446
|
-
export { type
|
|
523
|
+
export { type DelegateCodeConfig as A, type DelegateResearchConfig as B, type CoderReviewer as C, type DelegateCodeArgs as D, type DelegateRunCtx as E, type FactCandidate as F, type DelegateUiAuditConfig as G, type DelegateUiAuditRoute as H, type FactJudge as I, type FactJudgeVerdict as J, type FeedbackRating as K, type FeedbackRefersTo as L, type FleetWorkspaceExecutorOptions as M, type KbGateResult as N, type ResearchOutputShape as O, type UiAuditorDelegationOutput as P, createDefaultCoderDelegate as Q, type ResearcherDelegate as R, type SiblingSandboxExecutorOptions as S, createFleetWorkspaceExecutor as T, type UiAuditorDelegate as U, createKbGate as V, createSiblingSandboxExecutor as W, type CoderWinnerSelection as a, type CreateKbGateOptions as b, type FleetHandle as c, type DelegationExecutor as d, type DelegateFeedbackArgs as e, type DelegationFeedbackSnapshot as f, type DelegationProfile as g, type DelegateResearchArgs as h, type DelegateUiAuditArgs as i, type DelegationStatus as j, type DelegationProgress as k, type DelegationResultPayload as l, type DelegationError as m, type DelegationStatusResult as n, type DelegationHistoryArgs as o, type DelegationHistoryEntry as p, type CoderDelegate as q, type DelegateCodeResult as r, type DelegateFeedbackResult as s, type ResearchSource as t, type DelegateResearchResult as u, type DelegateUiAuditResult as v, type DelegationHistoryResult as w, type DelegationStatusArgs as x, type CoderReview as y, type CreateDefaultCoderDelegateOptions as z };
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { Scenario } from '@tangle-network/agent-eval/campaign';
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
2
|
+
import { SelfImproveOptions, SelfImproveResult } from '@tangle-network/agent-eval/contract';
|
|
3
|
+
import { R as RunAnalystLoopOpts, a as RunAnalystLoopResult } from './types-BtRLF2U3.js';
|
|
4
|
+
import { D as DelegateCodeArgs, C as CoderReviewer, a as CoderWinnerSelection, F as FactCandidate, b as CreateKbGateOptions } from './kb-gate-D9GBocLN.js';
|
|
5
|
+
import { C as CoderOutput } from './coder-CczgMqFx.js';
|
|
6
|
+
import { b as LoopSandboxClient, O as OutputAdapter, V as Validator, A as AgentRunSpec, c as LoopResult } from './types-DdzkffAm.js';
|
|
7
|
+
import { T as TopologyPlanner, C as CreateDynamicDriverOptions, D as DynamicDecision } from './dynamic-BvllHV6M.js';
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
10
|
* @experimental
|
|
@@ -19,7 +19,7 @@ import { CoderOutput } from './profiles.js';
|
|
|
19
19
|
* review → code mode with a REQUIRED reviewer (the gate is the point)
|
|
20
20
|
* research → research-in-a-loop with valid-only KB growth (createKbGate)
|
|
21
21
|
* audit → analyze trace/run data → findings (runAnalystLoop, caller-wired)
|
|
22
|
-
* self-improve →
|
|
22
|
+
* self-improve → closed-loop text/config optimization (selfImprove, held-out gated)
|
|
23
23
|
* dynamic → agent-authored topology (runLoop + createDynamicDriver)
|
|
24
24
|
*
|
|
25
25
|
* It is intentionally a thin façade: the value is that EVERY product reuses the
|
|
@@ -91,7 +91,7 @@ declare function reviewLoopRunner(options: CoderLoopRunnerOptions & {
|
|
|
91
91
|
/** @experimental Options for the default `dynamic` runner. */
|
|
92
92
|
interface DynamicLoopRunnerOptions<Task, Output> {
|
|
93
93
|
sandboxClient: LoopSandboxClient;
|
|
94
|
-
/** The agent-authored topology planner (
|
|
94
|
+
/** The agent-authored topology planner (sync or async; an async planner is where an LLM call goes). */
|
|
95
95
|
planner: TopologyPlanner<Task, Output>;
|
|
96
96
|
task: Task;
|
|
97
97
|
output: OutputAdapter<Output>;
|
|
@@ -101,6 +101,10 @@ interface DynamicLoopRunnerOptions<Task, Output> {
|
|
|
101
101
|
agentRuns?: AgentRunSpec<Task>[];
|
|
102
102
|
maxIterations?: number;
|
|
103
103
|
maxFanout?: number;
|
|
104
|
+
/** Optional trace-analyst hook forwarded to the dynamic driver so the loop runs
|
|
105
|
+
* `f(trace, findings)` — see `CreateDynamicDriverOptions.analyze`. Caller-side
|
|
106
|
+
* seam to `runAnalystLoop`; keeps this runner analyst-free. */
|
|
107
|
+
analyze?: CreateDynamicDriverOptions<Task, Output>['analyze'];
|
|
104
108
|
}
|
|
105
109
|
/** @experimental `dynamic` mode — agent-authored topology over `runLoop`. */
|
|
106
110
|
declare function dynamicLoopRunner<Task, Output>(o: DynamicLoopRunnerOptions<Task, Output>): DelegatedLoopRunner<LoopResult<Task, Output, DynamicDecision>>;
|
|
@@ -142,8 +146,8 @@ interface ResearchLoopRunnerOptions {
|
|
|
142
146
|
* never silently dropped) so the caller audits vs retries.
|
|
143
147
|
*/
|
|
144
148
|
declare function researchLoopRunner(o: ResearchLoopRunnerOptions): DelegatedLoopRunner<ResearchLoopResult>;
|
|
145
|
-
/** @experimental `self-improve` mode —
|
|
146
|
-
declare function selfImproveLoopRunner<TScenario extends Scenario, TArtifact>(options:
|
|
149
|
+
/** @experimental `self-improve` mode — agent-eval's one-call closed loop (held-out gated). */
|
|
150
|
+
declare function selfImproveLoopRunner<TScenario extends Scenario, TArtifact>(options: SelfImproveOptions<TScenario, TArtifact>): DelegatedLoopRunner<SelfImproveResult<TScenario, TArtifact>>;
|
|
147
151
|
/** @experimental `audit` mode — analyst loop over captured trace/run data. */
|
|
148
152
|
declare function auditLoopRunner<TProposal = unknown, TEdit = unknown>(options: RunAnalystLoopOpts): DelegatedLoopRunner<RunAnalystLoopResult<TProposal, TEdit>>;
|
|
149
153
|
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
export { L as LoopRunnerCliArgs, f as LoopRunnerCliResult, p as parseLoopRunnerArgv, o as runLoopRunnerCli } from './loop-runner-bin-
|
|
2
|
+
export { L as LoopRunnerCliArgs, f as LoopRunnerCliResult, p as parseLoopRunnerArgv, o as runLoopRunnerCli } from './loop-runner-bin-CPrCoKqC.js';
|
|
3
3
|
import '@tangle-network/agent-eval/campaign';
|
|
4
|
-
import '
|
|
4
|
+
import '@tangle-network/agent-eval/contract';
|
|
5
|
+
import './types-BtRLF2U3.js';
|
|
5
6
|
import '@tangle-network/agent-eval';
|
|
6
|
-
import './
|
|
7
|
-
import './
|
|
8
|
-
import './types-Bcp071Jg.js';
|
|
7
|
+
import './kb-gate-D9GBocLN.js';
|
|
8
|
+
import './coder-CczgMqFx.js';
|
|
9
9
|
import '@tangle-network/sandbox';
|
|
10
|
-
import './
|
|
11
|
-
import './
|
|
10
|
+
import './types-DdzkffAm.js';
|
|
11
|
+
import './runtime-hooks-C7JwKb9E.js';
|
|
12
|
+
import './substrate-CUgk7F7s.js';
|
|
13
|
+
import './dynamic-BvllHV6M.js';
|
package/dist/loop-runner-bin.js
CHANGED
|
@@ -2,15 +2,13 @@
|
|
|
2
2
|
import {
|
|
3
3
|
parseLoopRunnerArgv,
|
|
4
4
|
runLoopRunnerCli
|
|
5
|
-
} from "./chunk-
|
|
6
|
-
import "./chunk-XBUG326M.js";
|
|
7
|
-
import "./chunk-VOX6Z3II.js";
|
|
5
|
+
} from "./chunk-NYN5RTLP.js";
|
|
8
6
|
import "./chunk-FNMGYYSS.js";
|
|
9
|
-
import "./chunk-
|
|
10
|
-
import "./chunk-
|
|
11
|
-
import "./chunk-
|
|
12
|
-
import "./chunk-
|
|
13
|
-
import "./chunk-
|
|
7
|
+
import "./chunk-IJ6FGOPO.js";
|
|
8
|
+
import "./chunk-QR4UUC5P.js";
|
|
9
|
+
import "./chunk-FK53TXOP.js";
|
|
10
|
+
import "./chunk-KEWO4KI6.js";
|
|
11
|
+
import "./chunk-PRX45WE2.js";
|
|
14
12
|
import "./chunk-DGUM43GV.js";
|
|
15
13
|
export {
|
|
16
14
|
parseLoopRunnerArgv,
|
package/dist/loops.d.ts
CHANGED
|
@@ -1,373 +1,9 @@
|
|
|
1
|
-
import { AgentProfile, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
|
|
2
1
|
export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
|
|
3
|
-
|
|
4
|
-
export { C as CreateDynamicDriverOptions, D as DynamicDecision,
|
|
5
|
-
|
|
6
|
-
export {
|
|
7
|
-
|
|
2
|
+
export { AssertTraceDerivedFindings, BudgetPool, BudgetReadout, CheckpointCapableBox, CliSeam, CombinatorShape, Corpus, CorpusFilter, CorpusRecord, CreateScopeAnalystOptions, CriuCapableClient, DefinePersona, DefinePersonaInput, EqualKArm, EqualKOnCost, EqualKOnCostOptions, EqualKVerdict, Fanout, FanoutOptions, FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, FlatWidenGate, ForkCapableBox, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, LoopDispatchOptions, LoopOptionsForDispatch, LoopShape, LoopUntil, LoopUntilSpec, LoopUntilState, Outcome, Panel, PanelJudge, PanelSpec, PanelVerdict, Persona, PersonaContext, PersonaExecutors, Pipeline, PipelineStage, RenderCorpusToInstructions, RenderCorpusToInstructionsOptions, ReservationTicket, RouterSeam, RunPersonified, RunPersonifiedOptions, SandboxCapabilities, SandboxLineage, SandboxLineageHandle, SandboxSeam, ScopeAnalyst, ScopeAnalyzeInput, ScopeWidenGate, SessionCapableBox, ShapeBudget, ShapeContext, ShapeRegistry, SteerContext, TrajectoryNode, TrajectoryReport, TrajectoryReportFn, TrajectoryReportOptions, UsageSink, Verify, VerifySpec, Widen, WidenDecision, WidenLineage, WidenSpec, acquireSandbox, assertTraceDerivedFindings, buildSteerContext, builtinShapes, cliExecutor, contentAddress, createBudgetPool, createExecutorRegistry, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, definePersona, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, panel, pipeline, probeSandboxCapabilities, registerShape, renderCorpusToInstructions, replaySpawnTree, reportLoopUsage, routerInlineExecutor, runPersonified, sandboxExecutor, settledToIteration, spendFromUsageEvents, trajectoryReport, verify, widen } from './runtime.js';
|
|
3
|
+
export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDynamicDriverOptions, D as DynamicDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDynamicDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './dynamic-BvllHV6M.js';
|
|
4
|
+
export { R as RunLoopOptions, c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop--hSoIknW.js';
|
|
5
|
+
export { A as Agent, d as AgentSpec, B as Budget, i as ExecutorContext, E as ExecutorRegistry, H as Handle, j as LeafExecutor, L as LeafExecutorFactory, k as LeafResult, N as NodeId, l as NodeSnapshot, m as NodeStatus, n as Restart, R as ResultBlobStore, e as RootHandle, o as RootSignal, p as Runtime, S as Scope, c as Settled, b as SpawnEvent, a as SpawnJournal, q as SpawnOpts, g as Spend, f as SupervisedResult, h as Supervisor, r as SupervisorOpts, T as TreeView, U as UsageEvent, W as WidenGate } from './types-1HbsFa7H.js';
|
|
6
|
+
export { A as AgentRunSpec, D as Driver, E as ExecCtx, I as Iteration, h as LoopDecisionPayload, i as LoopEndedPayload, j as LoopIterationDispatchPayload, k as LoopIterationEndedPayload, l as LoopIterationStartedPayload, a as LoopLineageOptions, m as LoopPlanDescription, n as LoopPlanPayload, c as LoopResult, b as LoopSandboxClient, g as LoopSandboxPlacement, o as LoopStartedPayload, p as LoopTeardownFailedPayload, d as LoopTokenUsage, f as LoopTraceEmitter, q as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, r as ValidationCtx, V as Validator } from './types-DdzkffAm.js';
|
|
8
7
|
export { DefaultVerdict } from '@tangle-network/agent-eval';
|
|
9
|
-
import
|
|
10
|
-
|
|
11
|
-
/**
|
|
12
|
-
* @experimental
|
|
13
|
-
*
|
|
14
|
-
* FanoutVote driver — N parallel attempts in iteration 0, pick the highest-
|
|
15
|
-
* scoring valid output. No second iteration: the topology is "spawn N, score,
|
|
16
|
-
* pick winner". The kernel handles heterogeneous fanout via the
|
|
17
|
-
* `agentRuns: AgentRunSpec[]` form on `runLoop`.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
type FanoutVoteDecision = 'pick-winner' | 'fail';
|
|
21
|
-
/** @experimental */
|
|
22
|
-
interface FanoutVoteScored<Task, Output> {
|
|
23
|
-
task: Task;
|
|
24
|
-
output: Output;
|
|
25
|
-
verdict?: DefaultVerdict;
|
|
26
|
-
iterationIndex: number;
|
|
27
|
-
agentRunName: string;
|
|
28
|
-
}
|
|
29
|
-
/** @experimental */
|
|
30
|
-
interface CreateFanoutVoteDriverOptions<Task, Output> {
|
|
31
|
-
/** Number of parallel attempts. Must be >= 1. */
|
|
32
|
-
n: number;
|
|
33
|
-
/**
|
|
34
|
-
* Pick the winner from the scored set. Default: highest `verdict.score`
|
|
35
|
-
* among valid outputs (ties broken by smallest iteration index). When
|
|
36
|
-
* no valid outputs exist, returns `undefined` and `decide()` resolves
|
|
37
|
-
* to `'fail'`. The kernel still records winners structurally — this
|
|
38
|
-
* selector only feeds `decide()`'s pass/fail signal.
|
|
39
|
-
*/
|
|
40
|
-
selector?: (scored: FanoutVoteScored<Task, Output>[]) => FanoutVoteScored<Task, Output> | undefined;
|
|
41
|
-
/** Stable identifier surfaced in trace events. Default `'fanout-vote'`. */
|
|
42
|
-
name?: string;
|
|
43
|
-
}
|
|
44
|
-
/** @experimental */
|
|
45
|
-
declare function createFanoutVoteDriver<Task, Output>(options: CreateFanoutVoteDriverOptions<Task, Output>): Driver<Task, Output, FanoutVoteDecision>;
|
|
46
|
-
/**
|
|
47
|
-
* Test helper: surface the per-iteration scored view a custom `selector`
|
|
48
|
-
* would receive. Exposed so consumers writing a custom selector can test it
|
|
49
|
-
* standalone without driving the full kernel.
|
|
50
|
-
*
|
|
51
|
-
* @experimental
|
|
52
|
-
*/
|
|
53
|
-
declare function scoreFanoutVoteIterations<Task, Output>(iterations: ReadonlyArray<Iteration<Task, Output>>): FanoutVoteScored<Task, Output>[];
|
|
54
|
-
|
|
55
|
-
/**
|
|
56
|
-
* @experimental
|
|
57
|
-
*
|
|
58
|
-
* Refine driver — single task per iteration, validator-gated.
|
|
59
|
-
*
|
|
60
|
-
* `plan` returns `[task]` (possibly transformed via `refineTask`) until the
|
|
61
|
-
* prior verdict is valid OR the local cap is hit, then `[]`.
|
|
62
|
-
* `decide` returns `'stop'` once the latest verdict is valid OR the cap is
|
|
63
|
-
* reached. The kernel's `maxIterations` is an orthogonal safety cap;
|
|
64
|
-
* whichever is lower wins.
|
|
65
|
-
*/
|
|
66
|
-
|
|
67
|
-
type RefineDecision = 'continue' | 'stop';
|
|
68
|
-
/** @experimental */
|
|
69
|
-
interface CreateRefineDriverOptions<Task> {
|
|
70
|
-
/** Hard cap on iterations. Default 5. */
|
|
71
|
-
maxIterations?: number;
|
|
72
|
-
/**
|
|
73
|
-
* Optional task transform applied each round based on the prior verdict.
|
|
74
|
-
* When omitted, the same task is replayed and the agent is expected to
|
|
75
|
-
* inspect the sandbox session state for prior attempts.
|
|
76
|
-
*/
|
|
77
|
-
refineTask?: (task: Task, prior: DefaultVerdict) => Task;
|
|
78
|
-
/** Stable identifier surfaced in trace events. Default `'refine'`. */
|
|
79
|
-
name?: string;
|
|
80
|
-
}
|
|
81
|
-
/** @experimental */
|
|
82
|
-
declare function createRefineDriver<Task, Output>(options?: CreateRefineDriverOptions<Task>): Driver<Task, Output, RefineDecision>;
|
|
83
|
-
/**
|
|
84
|
-
* Test helper: select the last-valid iteration (or the last attempt if
|
|
85
|
-
* none passed). Mirrors the kernel's default selector ordering for refine
|
|
86
|
-
* topologies — the most recent successful attempt wins.
|
|
87
|
-
*
|
|
88
|
-
* @experimental
|
|
89
|
-
*/
|
|
90
|
-
declare function refineWinnerIndex<Task, Output>(iterations: ReadonlyArray<Iteration<Task, Output>>): number | undefined;
|
|
91
|
-
|
|
92
|
-
/**
|
|
93
|
-
* @experimental
|
|
94
|
-
*
|
|
95
|
-
* `createSandboxPlanner` — wire the dynamic driver's `TopologyPlanner` to a
|
|
96
|
-
* real agent. Each round it spins a sandbox on `profile`, streams a prompt that
|
|
97
|
-
* carries the history summary, and decodes the agent's chosen `TopologyMove`
|
|
98
|
-
* from a JSON envelope it emits. This is the "agent authors its own loop
|
|
99
|
-
* topology" path: the planner profile can be any harness (claude-code, codex,
|
|
100
|
-
* opencode, pi) — its only job is to read what happened and emit the next move.
|
|
101
|
-
*
|
|
102
|
-
* The planner profile is deliberately distinct from the worker `agentRuns`: a
|
|
103
|
-
* cheap fast model can steer topology while expensive workers do the labor, and
|
|
104
|
-
* the planner never names which harness runs a branch — the kernel's
|
|
105
|
-
* `agentRuns` round-robin decides that.
|
|
106
|
-
*
|
|
107
|
-
* Envelope contract the agent must emit (fenced ```json or a structured
|
|
108
|
-
* `result`/`final` event payload):
|
|
109
|
-
* { "kind": "refine" | "fanout" | "stop",
|
|
110
|
-
* "tasks"?: [ <task>, ... ], // decoded via `decodeTask`
|
|
111
|
-
* "n"?: number, // fanout shorthand: N copies of the root task
|
|
112
|
-
* "rationale"?: string }
|
|
113
|
-
*
|
|
114
|
-
* A missing / unparseable / unknown-kind envelope throws `PlannerError` — the
|
|
115
|
-
* loop never silently runs a topology the agent did not choose.
|
|
116
|
-
*/
|
|
117
|
-
|
|
118
|
-
/** Raw, pre-decode envelope an agent emits to choose the next move. */
|
|
119
|
-
interface TopologyMoveEnvelope {
|
|
120
|
-
kind: string;
|
|
121
|
-
tasks?: unknown[];
|
|
122
|
-
n?: number;
|
|
123
|
-
rationale?: string;
|
|
124
|
-
}
|
|
125
|
-
/** @experimental */
|
|
126
|
-
interface CreateSandboxPlannerOptions<Task, Output> {
|
|
127
|
-
/** Sandbox client — the planner calls `.create()` once per round. */
|
|
128
|
-
client: LoopSandboxClient;
|
|
129
|
-
/** The planner agent. Steers topology; does not run the work. */
|
|
130
|
-
profile: AgentProfile;
|
|
131
|
-
/**
|
|
132
|
-
* Decode one raw task from the envelope's `tasks[]` into a domain `Task`.
|
|
133
|
-
* Required because `Task` is opaque to this module — only the caller knows
|
|
134
|
-
* its shape. Throw to reject a malformed task; the error surfaces as a
|
|
135
|
-
* `PlannerError`.
|
|
136
|
-
*/
|
|
137
|
-
decodeTask: (raw: unknown, ctx: PlannerContext<Task, Output>) => Task;
|
|
138
|
-
/** Override the default prompt (history summary + envelope contract). */
|
|
139
|
-
buildPrompt?: (ctx: PlannerContext<Task, Output>) => string;
|
|
140
|
-
/** Override envelope extraction from the event stream. */
|
|
141
|
-
parseEnvelope?: (events: SandboxEvent[]) => TopologyMoveEnvelope | undefined;
|
|
142
|
-
/** Sandbox overrides for the planner sandbox (timeouts, env, etc.). */
|
|
143
|
-
sandboxOverrides?: AgentRunSpec<Task>['sandboxOverrides'];
|
|
144
|
-
/** Cancellation for the planner's own LLM call. */
|
|
145
|
-
signal?: AbortSignal;
|
|
146
|
-
}
|
|
147
|
-
/** @experimental */
|
|
148
|
-
declare function createSandboxPlanner<Task, Output>(opts: CreateSandboxPlannerOptions<Task, Output>): TopologyPlanner<Task, Output>;
|
|
149
|
-
|
|
150
|
-
/**
|
|
151
|
-
* @experimental
|
|
152
|
-
*
|
|
153
|
-
* `runLoop` — the topology-agnostic kernel built atop the sandbox SDK.
|
|
154
|
-
*
|
|
155
|
-
* Each iteration:
|
|
156
|
-
* 1. `driver.plan(task, history)` → N tasks (1 = refine, N = fanout, 0 = stop)
|
|
157
|
-
* 2. For each task (parallel, bounded by `maxConcurrency`):
|
|
158
|
-
* a. round-robin an `AgentRunSpec` from `agentRuns`
|
|
159
|
-
* b. `sandboxClient.create({ backend: { profile }, ...overrides })`
|
|
160
|
-
* c. emit `loop.iteration.dispatch` with the placement
|
|
161
|
-
* (`{ sibling, sandboxId }` or `{ fleet, fleetId, machineId, sandboxId }`)
|
|
162
|
-
* d. iterate `box.streamPrompt(taskToPrompt(task))` and collect events
|
|
163
|
-
* 3. `output.parse(events)` → typed `Output`
|
|
164
|
-
* 4. `validator?.validate(output)` → `DefaultVerdict`
|
|
165
|
-
* 5. Append `Iteration` to history; emit `loop.iteration.ended`
|
|
166
|
-
* 6. `driver.decide(history)` → if terminal, return result + winner
|
|
167
|
-
*
|
|
168
|
-
* The kernel owns: iteration accounting, per-iteration timing, error
|
|
169
|
-
* capture, abort propagation, concurrency cap, cost aggregation, and trace
|
|
170
|
-
* emission. The kernel does NOT own: what the agent runs (sandbox SDK +
|
|
171
|
-
* profile), how outputs are decoded (output adapter), how outputs are
|
|
172
|
-
* scored (validator), or topology (driver).
|
|
173
|
-
*/
|
|
174
|
-
|
|
175
|
-
/** @experimental */
|
|
176
|
-
interface RunLoopOptions<Task, Output, Decision> {
|
|
177
|
-
driver: Driver<Task, Output, Decision>;
|
|
178
|
-
/**
|
|
179
|
-
* Single agent spec — every iteration uses this profile. Mutually
|
|
180
|
-
* exclusive with `agentRuns`.
|
|
181
|
-
*/
|
|
182
|
-
agentRun?: AgentRunSpec<Task>;
|
|
183
|
-
/**
|
|
184
|
-
* Multiple specs for heterogeneous fanout. The kernel round-robins
|
|
185
|
-
* through them when the driver plans N tasks. Mutually exclusive with
|
|
186
|
-
* `agentRun`.
|
|
187
|
-
*/
|
|
188
|
-
agentRuns?: AgentRunSpec<Task>[];
|
|
189
|
-
output: OutputAdapter<Output>;
|
|
190
|
-
validator?: Validator<Output>;
|
|
191
|
-
task: Task;
|
|
192
|
-
ctx: ExecCtx;
|
|
193
|
-
/** Default 10. Hard cap on total iterations across all `plan()` rounds. */
|
|
194
|
-
maxIterations?: number;
|
|
195
|
-
/** Default 4. In-flight worker cap within a single `plan()` batch. */
|
|
196
|
-
maxConcurrency?: number;
|
|
197
|
-
/**
|
|
198
|
-
* Pre-allocated id for trace correlation. Default = `loop-${random}`.
|
|
199
|
-
* Surfaces as `runId` on every emitted `LoopTraceEvent`.
|
|
200
|
-
*/
|
|
201
|
-
runId?: string;
|
|
202
|
-
/**
|
|
203
|
-
* Clock override; default `Date.now`. Deterministic tests pass a
|
|
204
|
-
* monotonic counter to stabilize iteration timing fields.
|
|
205
|
-
*/
|
|
206
|
-
now?: () => number;
|
|
207
|
-
/**
|
|
208
|
-
* Override the default winner selector (highest-valid-score, ties broken
|
|
209
|
-
* by earliest iteration).
|
|
210
|
-
*/
|
|
211
|
-
selectWinner?: (iterations: Iteration<Task, Output>[]) => LoopWinner<Task, Output> | undefined;
|
|
212
|
-
}
|
|
213
|
-
/** @experimental */
|
|
214
|
-
declare function runLoop<Task, Output, Decision>(options: RunLoopOptions<Task, Output, Decision>): Promise<LoopResult<Task, Output, Decision>>;
|
|
215
|
-
/**
|
|
216
|
-
* Instantiate a sandbox for an `AgentRunSpec`: sets `backend.profile` to the
|
|
217
|
-
* spec's profile (inferring the backend type when the spec doesn't override
|
|
218
|
-
* it) and merges `sandboxOverrides`. Shared by the loop kernel and the
|
|
219
|
-
* `AgentRuntime.act` sandbox bridge so both boot the sandbox identically.
|
|
220
|
-
*/
|
|
221
|
-
declare function createSandboxForSpec<Task>(client: LoopSandboxClient, spec: AgentRunSpec<Task>, signal: AbortSignal): Promise<SandboxInstance>;
|
|
222
|
-
|
|
223
|
-
/**
|
|
224
|
-
* `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
|
|
225
|
-
*
|
|
226
|
-
* Without this adapter a consumer wiring `runLoop` into `runProfileMatrix` /
|
|
227
|
-
* `runCampaign` has to, by hand, every time: (a) build an `ExecCtx` with a
|
|
228
|
-
* sandbox client, (b) adapt the campaign `DispatchContext.trace` into a
|
|
229
|
-
* `LoopTraceEmitter` (or lose all loop trace correlation), and (c) remember to
|
|
230
|
-
* forward the loop's cost + tokens via `ctx.cost` (forgetting it yields a
|
|
231
|
-
* `{0,0}` cell the backend-integrity guard reads as a stub). Three foot-guns,
|
|
232
|
-
* the third silent. The fleet's products skipped (c) and fell back to a
|
|
233
|
-
* `workerRecords[]` side-channel — the exact anti-pattern the substrate exists
|
|
234
|
-
* to kill.
|
|
235
|
-
*
|
|
236
|
-
* `loopDispatch` collapses all three into one typed call:
|
|
237
|
-
*
|
|
238
|
-
* const dispatch = loopDispatch({
|
|
239
|
-
* sandboxClient,
|
|
240
|
-
* toLoopOptions: (scenario, profile) => ({ driver, agentRun, output, validator, task }),
|
|
241
|
-
* })
|
|
242
|
-
* await runProfileMatrix({ profiles, scenarios, dispatch, judges, commitSha })
|
|
243
|
-
*
|
|
244
|
-
* Usage is reported automatically; trace events are forwarded automatically;
|
|
245
|
-
* the ctx is built automatically. The seam becomes impossible to mis-wire.
|
|
246
|
-
*
|
|
247
|
-
* Typed structurally against the campaign `DispatchContext` (imported type-only
|
|
248
|
-
* from `@tangle-network/agent-eval/campaign`) — a downward dependency, never an
|
|
249
|
-
* inversion.
|
|
250
|
-
*/
|
|
251
|
-
|
|
252
|
-
/** runLoop options minus the `ctx` (loopDispatch builds the ctx). */
|
|
253
|
-
type LoopOptionsForDispatch<Task, Output, Decision> = Omit<RunLoopOptions<Task, Output, Decision>, 'ctx'>;
|
|
254
|
-
interface LoopDispatchOptions<Task, Output, Decision, TScenario extends Scenario, TArtifact> {
|
|
255
|
-
/** Sandbox client used for every cell's `runLoop`. Supplied once. */
|
|
256
|
-
sandboxClient: LoopSandboxClient;
|
|
257
|
-
/** Build the per-cell runLoop options from the scenario (+ profile, when
|
|
258
|
-
* used with `runProfileMatrix`). */
|
|
259
|
-
toLoopOptions: (scenario: TScenario, profile: AgentProfile$1) => LoopOptionsForDispatch<Task, Output, Decision>;
|
|
260
|
-
/** Map the finished loop to the artifact the judges score. Default:
|
|
261
|
-
* `result.winner?.output`. A loop with no winner yields `undefined` (judges
|
|
262
|
-
* skip the cell) — but the loop's token usage is STILL reported, so the
|
|
263
|
-
* integrity guard sees real activity. */
|
|
264
|
-
toArtifact?: (result: LoopResult<Task, Output, Decision>) => TArtifact;
|
|
265
|
-
/** Forward `loop.*` trace events into the campaign's scoped trace so loop
|
|
266
|
-
* spans correlate with the cell. Default true. */
|
|
267
|
-
forwardTrace?: boolean;
|
|
268
|
-
/** Cost-meter source label for the loop's spend. Default `'loop'`. */
|
|
269
|
-
costSource?: string;
|
|
270
|
-
}
|
|
271
|
-
/**
|
|
272
|
-
* Adapter for `runProfileMatrix` (profile is an axis). Returns a
|
|
273
|
-
* `ProfileDispatchFn` that runs `runLoop` per (profile, scenario) cell and
|
|
274
|
-
* reports usage automatically.
|
|
275
|
-
*/
|
|
276
|
-
declare function loopDispatch<Task, Output, Decision, TScenario extends Scenario, TArtifact>(opts: LoopDispatchOptions<Task, Output, Decision, TScenario, TArtifact>): ProfileDispatchFn<TScenario, TArtifact>;
|
|
277
|
-
/**
|
|
278
|
-
* Adapter for `runCampaign` (no profile axis). `toLoopOptions` receives only
|
|
279
|
-
* the scenario; the `profile` passed to the shared core is a stable sentinel
|
|
280
|
-
* so a single `runLoop` config is reused across cells.
|
|
281
|
-
*/
|
|
282
|
-
declare function loopCampaignDispatch<Task, Output, Decision, TScenario extends Scenario, TArtifact>(opts: Omit<LoopDispatchOptions<Task, Output, Decision, TScenario, TArtifact>, 'toLoopOptions'> & {
|
|
283
|
-
toLoopOptions: (scenario: TScenario) => LoopOptionsForDispatch<Task, Output, Decision>;
|
|
284
|
-
}): DispatchFn<TScenario, TArtifact>;
|
|
285
|
-
|
|
286
|
-
/**
|
|
287
|
-
* Bridge a finished `runLoop` into an agent-eval campaign / profile-matrix
|
|
288
|
-
* dispatch.
|
|
289
|
-
*
|
|
290
|
-
* `runProfileMatrix` (and `runCampaign`) run the backend-integrity guard over
|
|
291
|
-
* the token usage a dispatch reports through `ctx.cost`. A dispatch that wraps
|
|
292
|
-
* `runLoop` must forward the loop's cost AND token usage, or the guard reads
|
|
293
|
-
* the run as a stub and throws. `reportLoopUsage` is that one line:
|
|
294
|
-
*
|
|
295
|
-
* const dispatch: ProfileDispatchFn<S, A> = async (profile, scenario, ctx) => {
|
|
296
|
-
* const result = await runLoop({ ...optsFor(profile, scenario), ctx: loopCtx })
|
|
297
|
-
* reportLoopUsage(ctx, result)
|
|
298
|
-
* return result.winner?.output as A
|
|
299
|
-
* }
|
|
300
|
-
*
|
|
301
|
-
* Typed structurally against the campaign `DispatchContext.cost` so this module
|
|
302
|
-
* stays free of an agent-eval import — it works with any cost meter exposing
|
|
303
|
-
* `observe` + `observeTokens`.
|
|
304
|
-
*/
|
|
305
|
-
|
|
306
|
-
/** The slice of an agent-eval campaign `DispatchContext.cost` this needs. */
|
|
307
|
-
interface UsageSink {
|
|
308
|
-
observe(amountUsd: number, source: string): void;
|
|
309
|
-
observeTokens(usage: {
|
|
310
|
-
input: number;
|
|
311
|
-
output: number;
|
|
312
|
-
}): void;
|
|
313
|
-
}
|
|
314
|
-
/**
|
|
315
|
-
* Forward a `LoopResult`'s aggregated cost + token usage into a campaign cost
|
|
316
|
-
* meter so the backend-integrity guard sees real LLM activity. `source`
|
|
317
|
-
* defaults to `'loop'`.
|
|
318
|
-
*/
|
|
319
|
-
declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
|
|
320
|
-
|
|
321
|
-
/**
|
|
322
|
-
* Sandbox-event → runtime-event mapping.
|
|
323
|
-
*
|
|
324
|
-
* The sandbox SDK emits a polymorphic `SandboxEvent = { type, data, id? }`
|
|
325
|
-
* whose `type` vocabulary is backend-determined (opencode, etc.) rather than
|
|
326
|
-
* enumerated by the SDK. Two consumers project it:
|
|
327
|
-
* - the loop kernel's cost ledger (`extractLlmCallEvent`) — sums usage off
|
|
328
|
-
* every cost-bearing event, regardless of stream shape;
|
|
329
|
-
* - the `AgentRuntime.act` streaming contract (`mapSandboxEvent`) — projects
|
|
330
|
-
* incremental events to the `RuntimeStreamEvent` chat-UX vocabulary.
|
|
331
|
-
*
|
|
332
|
-
* Both live here so the empirically-observed `type` vocabulary has one home.
|
|
333
|
-
*/
|
|
334
|
-
|
|
335
|
-
/**
|
|
336
|
-
* Extract a `RuntimeStreamEvent`-shaped `llm_call` from a sandbox event when
|
|
337
|
-
* the event carries usage/cost data. Returns `undefined` for non-cost events
|
|
338
|
-
* so the kernel can iterate the full stream without branching.
|
|
339
|
-
*
|
|
340
|
-
* Canonical cost-carrying types observed in the wild:
|
|
341
|
-
* - `llm_call` — `data: { model, tokensIn, tokensOut, costUsd, ... }`
|
|
342
|
-
* - `message.completed` / `result` — `data: { usage: { inputTokens,
|
|
343
|
-
* outputTokens, totalCostUsd? } }`
|
|
344
|
-
* - `cost.usage` / `usage` — same shape under a dedicated type
|
|
345
|
-
*
|
|
346
|
-
* Numeric coercion is strict: `Number.isFinite` gates every accumulator write
|
|
347
|
-
* so a sentinel `NaN` from a misbehaving backend cannot poison the ledger.
|
|
348
|
-
*/
|
|
349
|
-
declare function extractLlmCallEvent(event: SandboxEvent, agentRunName: string): (RuntimeStreamEvent & {
|
|
350
|
-
type: 'llm_call';
|
|
351
|
-
}) | undefined;
|
|
352
|
-
/**
|
|
353
|
-
* Project one `SandboxEvent` onto the `RuntimeStreamEvent` chat-UX vocabulary,
|
|
354
|
-
* for runtimes that bridge a sandbox `streamPrompt` into the
|
|
355
|
-
* `AgentRuntime.act` streaming contract. Returns `undefined` for events that
|
|
356
|
-
* have no faithful projection — the raw stream is preserved separately for the
|
|
357
|
-
* `OutputAdapter`, so an unmapped event never loses data.
|
|
358
|
-
*
|
|
359
|
-
* Mapped (the task-optional incremental variants — no synthesized task
|
|
360
|
-
* lifecycle, no guessed tool-part shapes):
|
|
361
|
-
* - `message.part.updated` text part → `text_delta`
|
|
362
|
-
* - `message.part.updated` reasoning/thinking part → `reasoning_delta`
|
|
363
|
-
* - cost-bearing events → `llm_call` (shared with the ledger extractor)
|
|
364
|
-
*
|
|
365
|
-
* The opencode backend emits incremental text as
|
|
366
|
-
* `{ type: 'message.part.updated', data: { part: { type, text }, delta } }`;
|
|
367
|
-
* `delta` is the increment, `part.text` the running accumulation.
|
|
368
|
-
*/
|
|
369
|
-
declare function mapSandboxEvent(event: SandboxEvent, opts?: {
|
|
370
|
-
agentRunName?: string;
|
|
371
|
-
}): RuntimeStreamEvent | undefined;
|
|
372
|
-
|
|
373
|
-
export { AgentRunSpec, type CreateFanoutVoteDriverOptions, type CreateRefineDriverOptions, type CreateSandboxPlannerOptions, Driver, ExecCtx, type FanoutVoteDecision, type FanoutVoteScored, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, LoopWinner, OutputAdapter, PlannerContext, type RefineDecision, type RunLoopOptions, type TopologyMoveEnvelope, TopologyPlanner, type UsageSink, Validator, createFanoutVoteDriver, createRefineDriver, createSandboxForSpec, createSandboxPlanner, extractLlmCallEvent, loopCampaignDispatch, loopDispatch, mapSandboxEvent, refineWinnerIndex, reportLoopUsage, runLoop, scoreFanoutVoteIterations };
|
|
8
|
+
import '@tangle-network/agent-eval/campaign';
|
|
9
|
+
import './runtime-hooks-C7JwKb9E.js';
|