@tangle-network/agent-runtime 0.44.0 → 0.45.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -203
- package/dist/agent.d.ts +5 -4
- package/dist/agent.js +5 -7
- package/dist/agent.js.map +1 -1
- package/dist/analyst-loop.d.ts +65 -4
- package/dist/analyst-loop.js +6 -1
- package/dist/audit.d.ts +93 -0
- package/dist/audit.js +312 -0
- package/dist/audit.js.map +1 -0
- package/dist/chunk-4B6U4CVQ.js +15 -0
- package/dist/chunk-4B6U4CVQ.js.map +1 -0
- package/dist/chunk-FK53TXOP.js +603 -0
- package/dist/chunk-FK53TXOP.js.map +1 -0
- package/dist/{chunk-SKUZZCHE.js → chunk-IJ6FGOPO.js} +5 -5
- package/dist/chunk-IJ6FGOPO.js.map +1 -0
- package/dist/{chunk-HVYOHJHK.js → chunk-IJGS6J7X.js} +2 -2
- package/dist/chunk-IJGS6J7X.js.map +1 -0
- package/dist/chunk-KEWO4KI6.js +3599 -0
- package/dist/chunk-KEWO4KI6.js.map +1 -0
- package/dist/{chunk-NRZOXCJK.js → chunk-KSMX62JF.js} +2 -2
- package/dist/{chunk-GFKVVRQ7.js → chunk-NYN5RTLP.js} +11 -10
- package/dist/chunk-NYN5RTLP.js.map +1 -0
- package/dist/chunk-PRX45WE2.js +264 -0
- package/dist/chunk-PRX45WE2.js.map +1 -0
- package/dist/{chunk-3HMHSN22.js → chunk-QR4UUC5P.js} +6 -6
- package/dist/chunk-QR4UUC5P.js.map +1 -0
- package/dist/chunk-WIR4HOOJ.js +27 -0
- package/dist/chunk-WIR4HOOJ.js.map +1 -0
- package/dist/{chunk-KDMRUD2P.js → chunk-Z2QXVBA6.js} +296 -8
- package/dist/chunk-Z2QXVBA6.js.map +1 -0
- package/dist/coder-CczgMqFx.d.ts +114 -0
- package/dist/dynamic-BvllHV6M.d.ts +221 -0
- package/dist/{improvement-adapter-BC4HhuAR.d.ts → improvement-adapter-CWegd3vw.d.ts} +1 -1
- package/dist/improvement.d.ts +2 -3
- package/dist/improvement.js +0 -5
- package/dist/improvement.js.map +1 -1
- package/dist/index.d.ts +123 -10
- package/dist/index.js +398 -10
- package/dist/index.js.map +1 -1
- package/dist/{kb-gate-D0ZIhFOU.d.ts → kb-gate-D9GBocLN.d.ts} +82 -5
- package/dist/{loop-runner-bin-BLMa8He3.d.ts → loop-runner-bin-CPrCoKqC.d.ts} +14 -10
- package/dist/loop-runner-bin.d.ts +9 -7
- package/dist/loop-runner-bin.js +6 -8
- package/dist/loops.d.ts +7 -393
- package/dist/loops.js +94 -25
- package/dist/mcp/bin.js +7 -7
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +284 -11
- package/dist/mcp/index.js +341 -9
- package/dist/mcp/index.js.map +1 -1
- package/dist/{otel-export-wFDmmurL.d.ts → otel-export-Dy2DyUCU.d.ts} +1 -1
- package/dist/profiles.d.ts +385 -86
- package/dist/profiles.js +549 -4
- package/dist/profiles.js.map +1 -1
- package/dist/{run-loop-C4L1Sted.d.ts → run-loop--hSoIknW.d.ts} +35 -12
- package/dist/runtime-hooks-C7JwKb9E.d.ts +70 -0
- package/dist/runtime.d.ts +1860 -0
- package/dist/runtime.js +114 -0
- package/dist/runtime.js.map +1 -0
- package/dist/substrate-CUgk7F7s.d.ts +77 -0
- package/dist/topology.d.ts +73 -0
- package/dist/topology.js +111 -0
- package/dist/topology.js.map +1 -0
- package/dist/types-1HbsFa7H.d.ts +438 -0
- package/dist/{types-p8dWBIXL.d.ts → types-BtRLF2U3.d.ts} +1 -1
- package/dist/{types-DbJzz2uf.d.ts → types-DdzkffAm.d.ts} +95 -1
- package/dist/workflow.d.ts +3 -2
- package/dist/workflow.js +4 -5
- package/dist/workflow.js.map +1 -1
- package/package.json +26 -6
- package/skills/agent-runtime-adoption/SKILL.md +29 -26
- package/dist/chunk-3HMHSN22.js.map +0 -1
- package/dist/chunk-GFKVVRQ7.js.map +0 -1
- package/dist/chunk-HVYOHJHK.js.map +0 -1
- package/dist/chunk-KDMRUD2P.js.map +0 -1
- package/dist/chunk-PY6NMZYX.js +0 -52
- package/dist/chunk-PY6NMZYX.js.map +0 -1
- package/dist/chunk-S7JXV32P.js +0 -947
- package/dist/chunk-S7JXV32P.js.map +0 -1
- package/dist/chunk-SKUZZCHE.js.map +0 -1
- package/dist/chunk-SQSCRJ7U.js +0 -65
- package/dist/chunk-SQSCRJ7U.js.map +0 -1
- package/dist/chunk-VOX6Z3II.js +0 -90
- package/dist/chunk-VOX6Z3II.js.map +0 -1
- package/dist/chunk-XBUG326M.js +0 -261
- package/dist/chunk-XBUG326M.js.map +0 -1
- package/dist/dynamic-wUgp6UKs.d.ts +0 -108
- package/dist/optimize-prompt-D-urF2wW.d.ts +0 -129
- /package/dist/{chunk-NRZOXCJK.js.map → chunk-KSMX62JF.js.map} +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { CoderOutput, CoderTask } from './
|
|
2
|
-
import {
|
|
1
|
+
import { C as CoderOutput, a as CoderTask } from './coder-CczgMqFx.js';
|
|
2
|
+
import { b as LoopSandboxClient, f as LoopTraceEmitter } from './types-DdzkffAm.js';
|
|
3
3
|
import { SandboxInstance } from '@tangle-network/sandbox';
|
|
4
|
+
import { a as UiLens, U as UiFinding } from './substrate-CUgk7F7s.js';
|
|
4
5
|
|
|
5
6
|
/**
|
|
6
7
|
* @experimental
|
|
@@ -102,7 +103,7 @@ declare function createFleetWorkspaceExecutor(options: FleetWorkspaceExecutorOpt
|
|
|
102
103
|
*/
|
|
103
104
|
|
|
104
105
|
/** @experimental */
|
|
105
|
-
type DelegationProfile = 'coder' | 'researcher';
|
|
106
|
+
type DelegationProfile = 'coder' | 'researcher' | 'ui-auditor';
|
|
106
107
|
/** @experimental */
|
|
107
108
|
type DelegationStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
|
|
108
109
|
/**
|
|
@@ -223,7 +224,73 @@ type DelegationResultPayload = {
|
|
|
223
224
|
} | {
|
|
224
225
|
profile: 'researcher';
|
|
225
226
|
output: ResearchOutputShape;
|
|
227
|
+
} | {
|
|
228
|
+
profile: 'ui-auditor';
|
|
229
|
+
output: UiAuditorDelegationOutput;
|
|
226
230
|
};
|
|
231
|
+
/**
|
|
232
|
+
* Wire-shape of a completed UI-audit delegation. The `findings` array
|
|
233
|
+
* contains every finding persisted to the workspace during the run,
|
|
234
|
+
* already enriched with `id` and `createdAt` by the writer. `workspaceDir`
|
|
235
|
+
* is the absolute path to the workspace; `indexFile` is the workspace-
|
|
236
|
+
* relative path to the regenerated index.md.
|
|
237
|
+
*
|
|
238
|
+
* @experimental
|
|
239
|
+
*/
|
|
240
|
+
interface UiAuditorDelegationOutput {
|
|
241
|
+
workspaceDir: string;
|
|
242
|
+
indexFile: string;
|
|
243
|
+
findings: UiFinding[];
|
|
244
|
+
/** Total iterations the loop ran for this delegation. */
|
|
245
|
+
iterations: number;
|
|
246
|
+
}
|
|
247
|
+
/** @experimental */
|
|
248
|
+
type UiAuditLensFilter = readonly UiLens[];
|
|
249
|
+
/** Optional per-route capture spec the agent surfaces over the wire. */
|
|
250
|
+
interface DelegateUiAuditRoute {
|
|
251
|
+
/** Stable route name (used in screenshot filenames + finding metadata). */
|
|
252
|
+
name: string;
|
|
253
|
+
/** Fully-qualified URL. */
|
|
254
|
+
url: string;
|
|
255
|
+
/** Viewports to capture at. Defaults to `[{ width: 1280, height: 800 }]`. */
|
|
256
|
+
viewports?: readonly {
|
|
257
|
+
width: number;
|
|
258
|
+
height: number;
|
|
259
|
+
}[];
|
|
260
|
+
/** Default false. Full-page captures for the broad lenses. */
|
|
261
|
+
fullPage?: boolean;
|
|
262
|
+
/** Selector to wait for before capture. */
|
|
263
|
+
waitFor?: string;
|
|
264
|
+
}
|
|
265
|
+
/** @experimental */
|
|
266
|
+
interface DelegateUiAuditConfig {
|
|
267
|
+
/**
|
|
268
|
+
* Lenses to iterate. Default: every lens except `'other'`. Order is
|
|
269
|
+
* preserved — the driver iterates lens-by-lens.
|
|
270
|
+
*/
|
|
271
|
+
lenses?: UiAuditLensFilter;
|
|
272
|
+
/** Maximum total iterations across all (lens × route) pairs. Default 33 (11 lenses × 3 routes). */
|
|
273
|
+
maxIterations?: number;
|
|
274
|
+
/** Maximum concurrent iterations within a single plan() round. Default 2. */
|
|
275
|
+
maxConcurrency?: number;
|
|
276
|
+
/** Free-form product context surfaced to the judge. */
|
|
277
|
+
productContext?: string;
|
|
278
|
+
}
|
|
279
|
+
/** @experimental */
|
|
280
|
+
interface DelegateUiAuditArgs {
|
|
281
|
+
/** Workspace root for the audit (absolute path). */
|
|
282
|
+
workspaceDir: string;
|
|
283
|
+
/** Routes to audit. Must be non-empty. */
|
|
284
|
+
routes: readonly DelegateUiAuditRoute[];
|
|
285
|
+
/** Multi-tenant scope. */
|
|
286
|
+
namespace?: string;
|
|
287
|
+
config?: DelegateUiAuditConfig;
|
|
288
|
+
}
|
|
289
|
+
/** @experimental */
|
|
290
|
+
interface DelegateUiAuditResult {
|
|
291
|
+
taskId: string;
|
|
292
|
+
estimatedDurationMs?: number;
|
|
293
|
+
}
|
|
227
294
|
/**
|
|
228
295
|
* Loose shape of a research output over the wire — the substrate cannot
|
|
229
296
|
* import the `ResearchOutput` type from agent-knowledge without inducing
|
|
@@ -274,7 +341,7 @@ interface DelegationHistoryEntry {
|
|
|
274
341
|
taskId: string;
|
|
275
342
|
profile: DelegationProfile;
|
|
276
343
|
namespace?: string;
|
|
277
|
-
args: DelegateCodeArgs | DelegateResearchArgs;
|
|
344
|
+
args: DelegateCodeArgs | DelegateResearchArgs | DelegateUiAuditArgs;
|
|
278
345
|
status: DelegationStatus;
|
|
279
346
|
feedback?: DelegationFeedbackSnapshot[];
|
|
280
347
|
costUsd?: number;
|
|
@@ -295,6 +362,16 @@ interface DelegateRunCtx {
|
|
|
295
362
|
type CoderDelegate = (args: DelegateCodeArgs, ctx: DelegateRunCtx) => Promise<CoderOutput>;
|
|
296
363
|
/** @experimental */
|
|
297
364
|
type ResearcherDelegate = (args: DelegateResearchArgs, ctx: DelegateRunCtx) => Promise<ResearchOutputShape>;
|
|
365
|
+
/**
|
|
366
|
+
* UI-auditor delegate — fully consumer-injected. agent-runtime ships no
|
|
367
|
+
* default factory because the inputs are workspace path + judge function
|
|
368
|
+
* + (optionally) a `LoopSandboxClient`, and the judge is the consumer's
|
|
369
|
+
* model seam. See `createInProcessUiAuditClient` + `uiAuditorProfile` in
|
|
370
|
+
* `@tangle-network/agent-runtime/profiles` for the canonical wiring.
|
|
371
|
+
*
|
|
372
|
+
* @experimental
|
|
373
|
+
*/
|
|
374
|
+
type UiAuditorDelegate = (args: DelegateUiAuditArgs, ctx: DelegateRunCtx) => Promise<UiAuditorDelegationOutput>;
|
|
298
375
|
/** @experimental Structured review verdict over a coder candidate. */
|
|
299
376
|
interface CoderReview {
|
|
300
377
|
/** Gate: only approved candidates are eligible to win. */
|
|
@@ -443,4 +520,4 @@ interface CreateKbGateOptions {
|
|
|
443
520
|
*/
|
|
444
521
|
declare function createKbGate(options?: CreateKbGateOptions): (candidate: FactCandidate) => Promise<KbGateResult>;
|
|
445
522
|
|
|
446
|
-
export { type
|
|
523
|
+
export { type DelegateCodeConfig as A, type DelegateResearchConfig as B, type CoderReviewer as C, type DelegateCodeArgs as D, type DelegateRunCtx as E, type FactCandidate as F, type DelegateUiAuditConfig as G, type DelegateUiAuditRoute as H, type FactJudge as I, type FactJudgeVerdict as J, type FeedbackRating as K, type FeedbackRefersTo as L, type FleetWorkspaceExecutorOptions as M, type KbGateResult as N, type ResearchOutputShape as O, type UiAuditorDelegationOutput as P, createDefaultCoderDelegate as Q, type ResearcherDelegate as R, type SiblingSandboxExecutorOptions as S, createFleetWorkspaceExecutor as T, type UiAuditorDelegate as U, createKbGate as V, createSiblingSandboxExecutor as W, type CoderWinnerSelection as a, type CreateKbGateOptions as b, type FleetHandle as c, type DelegationExecutor as d, type DelegateFeedbackArgs as e, type DelegationFeedbackSnapshot as f, type DelegationProfile as g, type DelegateResearchArgs as h, type DelegateUiAuditArgs as i, type DelegationStatus as j, type DelegationProgress as k, type DelegationResultPayload as l, type DelegationError as m, type DelegationStatusResult as n, type DelegationHistoryArgs as o, type DelegationHistoryEntry as p, type CoderDelegate as q, type DelegateCodeResult as r, type DelegateFeedbackResult as s, type ResearchSource as t, type DelegateResearchResult as u, type DelegateUiAuditResult as v, type DelegationHistoryResult as w, type DelegationStatusArgs as x, type CoderReview as y, type CreateDefaultCoderDelegateOptions as z };
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { Scenario } from '@tangle-network/agent-eval/campaign';
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
2
|
+
import { SelfImproveOptions, SelfImproveResult } from '@tangle-network/agent-eval/contract';
|
|
3
|
+
import { R as RunAnalystLoopOpts, a as RunAnalystLoopResult } from './types-BtRLF2U3.js';
|
|
4
|
+
import { D as DelegateCodeArgs, C as CoderReviewer, a as CoderWinnerSelection, F as FactCandidate, b as CreateKbGateOptions } from './kb-gate-D9GBocLN.js';
|
|
5
|
+
import { C as CoderOutput } from './coder-CczgMqFx.js';
|
|
6
|
+
import { b as LoopSandboxClient, O as OutputAdapter, V as Validator, A as AgentRunSpec, c as LoopResult } from './types-DdzkffAm.js';
|
|
7
|
+
import { T as TopologyPlanner, C as CreateDynamicDriverOptions, D as DynamicDecision } from './dynamic-BvllHV6M.js';
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
10
|
* @experimental
|
|
@@ -19,7 +19,7 @@ import { CoderOutput } from './profiles.js';
|
|
|
19
19
|
* review → code mode with a REQUIRED reviewer (the gate is the point)
|
|
20
20
|
* research → research-in-a-loop with valid-only KB growth (createKbGate)
|
|
21
21
|
* audit → analyze trace/run data → findings (runAnalystLoop, caller-wired)
|
|
22
|
-
* self-improve →
|
|
22
|
+
* self-improve → closed-loop text/config optimization (selfImprove, held-out gated)
|
|
23
23
|
* dynamic → agent-authored topology (runLoop + createDynamicDriver)
|
|
24
24
|
*
|
|
25
25
|
* It is intentionally a thin façade: the value is that EVERY product reuses the
|
|
@@ -91,7 +91,7 @@ declare function reviewLoopRunner(options: CoderLoopRunnerOptions & {
|
|
|
91
91
|
/** @experimental Options for the default `dynamic` runner. */
|
|
92
92
|
interface DynamicLoopRunnerOptions<Task, Output> {
|
|
93
93
|
sandboxClient: LoopSandboxClient;
|
|
94
|
-
/** The agent-authored topology planner (
|
|
94
|
+
/** The agent-authored topology planner (sync or async; an async planner is where an LLM call goes). */
|
|
95
95
|
planner: TopologyPlanner<Task, Output>;
|
|
96
96
|
task: Task;
|
|
97
97
|
output: OutputAdapter<Output>;
|
|
@@ -101,6 +101,10 @@ interface DynamicLoopRunnerOptions<Task, Output> {
|
|
|
101
101
|
agentRuns?: AgentRunSpec<Task>[];
|
|
102
102
|
maxIterations?: number;
|
|
103
103
|
maxFanout?: number;
|
|
104
|
+
/** Optional trace-analyst hook forwarded to the dynamic driver so the loop runs
|
|
105
|
+
* `f(trace, findings)` — see `CreateDynamicDriverOptions.analyze`. Caller-side
|
|
106
|
+
* seam to `runAnalystLoop`; keeps this runner analyst-free. */
|
|
107
|
+
analyze?: CreateDynamicDriverOptions<Task, Output>['analyze'];
|
|
104
108
|
}
|
|
105
109
|
/** @experimental `dynamic` mode — agent-authored topology over `runLoop`. */
|
|
106
110
|
declare function dynamicLoopRunner<Task, Output>(o: DynamicLoopRunnerOptions<Task, Output>): DelegatedLoopRunner<LoopResult<Task, Output, DynamicDecision>>;
|
|
@@ -142,8 +146,8 @@ interface ResearchLoopRunnerOptions {
|
|
|
142
146
|
* never silently dropped) so the caller audits vs retries.
|
|
143
147
|
*/
|
|
144
148
|
declare function researchLoopRunner(o: ResearchLoopRunnerOptions): DelegatedLoopRunner<ResearchLoopResult>;
|
|
145
|
-
/** @experimental `self-improve` mode —
|
|
146
|
-
declare function selfImproveLoopRunner<TScenario extends Scenario, TArtifact>(options:
|
|
149
|
+
/** @experimental `self-improve` mode — agent-eval's one-call closed loop (held-out gated). */
|
|
150
|
+
declare function selfImproveLoopRunner<TScenario extends Scenario, TArtifact>(options: SelfImproveOptions<TScenario, TArtifact>): DelegatedLoopRunner<SelfImproveResult<TScenario, TArtifact>>;
|
|
147
151
|
/** @experimental `audit` mode — analyst loop over captured trace/run data. */
|
|
148
152
|
declare function auditLoopRunner<TProposal = unknown, TEdit = unknown>(options: RunAnalystLoopOpts): DelegatedLoopRunner<RunAnalystLoopResult<TProposal, TEdit>>;
|
|
149
153
|
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
export { L as LoopRunnerCliArgs, f as LoopRunnerCliResult, p as parseLoopRunnerArgv, o as runLoopRunnerCli } from './loop-runner-bin-
|
|
2
|
+
export { L as LoopRunnerCliArgs, f as LoopRunnerCliResult, p as parseLoopRunnerArgv, o as runLoopRunnerCli } from './loop-runner-bin-CPrCoKqC.js';
|
|
3
3
|
import '@tangle-network/agent-eval/campaign';
|
|
4
|
-
import '
|
|
4
|
+
import '@tangle-network/agent-eval/contract';
|
|
5
|
+
import './types-BtRLF2U3.js';
|
|
5
6
|
import '@tangle-network/agent-eval';
|
|
6
|
-
import './
|
|
7
|
-
import './
|
|
8
|
-
import './types-DbJzz2uf.js';
|
|
7
|
+
import './kb-gate-D9GBocLN.js';
|
|
8
|
+
import './coder-CczgMqFx.js';
|
|
9
9
|
import '@tangle-network/sandbox';
|
|
10
|
-
import './
|
|
11
|
-
import './
|
|
10
|
+
import './types-DdzkffAm.js';
|
|
11
|
+
import './runtime-hooks-C7JwKb9E.js';
|
|
12
|
+
import './substrate-CUgk7F7s.js';
|
|
13
|
+
import './dynamic-BvllHV6M.js';
|
package/dist/loop-runner-bin.js
CHANGED
|
@@ -2,15 +2,13 @@
|
|
|
2
2
|
import {
|
|
3
3
|
parseLoopRunnerArgv,
|
|
4
4
|
runLoopRunnerCli
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-NYN5RTLP.js";
|
|
6
6
|
import "./chunk-FNMGYYSS.js";
|
|
7
|
-
import "./chunk-
|
|
8
|
-
import "./chunk-
|
|
9
|
-
import "./chunk-
|
|
10
|
-
import "./chunk-
|
|
11
|
-
import "./chunk-
|
|
12
|
-
import "./chunk-PY6NMZYX.js";
|
|
13
|
-
import "./chunk-SQSCRJ7U.js";
|
|
7
|
+
import "./chunk-IJ6FGOPO.js";
|
|
8
|
+
import "./chunk-QR4UUC5P.js";
|
|
9
|
+
import "./chunk-FK53TXOP.js";
|
|
10
|
+
import "./chunk-KEWO4KI6.js";
|
|
11
|
+
import "./chunk-PRX45WE2.js";
|
|
14
12
|
import "./chunk-DGUM43GV.js";
|
|
15
13
|
export {
|
|
16
14
|
parseLoopRunnerArgv,
|
package/dist/loops.d.ts
CHANGED
|
@@ -1,395 +1,9 @@
|
|
|
1
|
-
import { AgentProfile, SandboxEvent, SandboxInstance, CreateSandboxOptions } from '@tangle-network/sandbox';
|
|
2
1
|
export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
|
|
3
|
-
|
|
4
|
-
export { C as CreateDynamicDriverOptions, D as DynamicDecision,
|
|
5
|
-
|
|
6
|
-
export {
|
|
7
|
-
|
|
2
|
+
export { AssertTraceDerivedFindings, BudgetPool, BudgetReadout, CheckpointCapableBox, CliSeam, CombinatorShape, Corpus, CorpusFilter, CorpusRecord, CreateScopeAnalystOptions, CriuCapableClient, DefinePersona, DefinePersonaInput, EqualKArm, EqualKOnCost, EqualKOnCostOptions, EqualKVerdict, Fanout, FanoutOptions, FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, FlatWidenGate, ForkCapableBox, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, LoopDispatchOptions, LoopOptionsForDispatch, LoopShape, LoopUntil, LoopUntilSpec, LoopUntilState, Outcome, Panel, PanelJudge, PanelSpec, PanelVerdict, Persona, PersonaContext, PersonaExecutors, Pipeline, PipelineStage, RenderCorpusToInstructions, RenderCorpusToInstructionsOptions, ReservationTicket, RouterSeam, RunPersonified, RunPersonifiedOptions, SandboxCapabilities, SandboxLineage, SandboxLineageHandle, SandboxSeam, ScopeAnalyst, ScopeAnalyzeInput, ScopeWidenGate, SessionCapableBox, ShapeBudget, ShapeContext, ShapeRegistry, SteerContext, TrajectoryNode, TrajectoryReport, TrajectoryReportFn, TrajectoryReportOptions, UsageSink, Verify, VerifySpec, Widen, WidenDecision, WidenLineage, WidenSpec, acquireSandbox, assertTraceDerivedFindings, buildSteerContext, builtinShapes, cliExecutor, contentAddress, createBudgetPool, createExecutorRegistry, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, definePersona, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, panel, pipeline, probeSandboxCapabilities, registerShape, renderCorpusToInstructions, replaySpawnTree, reportLoopUsage, routerInlineExecutor, runPersonified, sandboxExecutor, settledToIteration, spendFromUsageEvents, trajectoryReport, verify, widen } from './runtime.js';
|
|
3
|
+
export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDynamicDriverOptions, D as DynamicDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDynamicDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './dynamic-BvllHV6M.js';
|
|
4
|
+
export { R as RunLoopOptions, c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop--hSoIknW.js';
|
|
5
|
+
export { A as Agent, d as AgentSpec, B as Budget, i as ExecutorContext, E as ExecutorRegistry, H as Handle, j as LeafExecutor, L as LeafExecutorFactory, k as LeafResult, N as NodeId, l as NodeSnapshot, m as NodeStatus, n as Restart, R as ResultBlobStore, e as RootHandle, o as RootSignal, p as Runtime, S as Scope, c as Settled, b as SpawnEvent, a as SpawnJournal, q as SpawnOpts, g as Spend, f as SupervisedResult, h as Supervisor, r as SupervisorOpts, T as TreeView, U as UsageEvent, W as WidenGate } from './types-1HbsFa7H.js';
|
|
6
|
+
export { A as AgentRunSpec, D as Driver, E as ExecCtx, I as Iteration, h as LoopDecisionPayload, i as LoopEndedPayload, j as LoopIterationDispatchPayload, k as LoopIterationEndedPayload, l as LoopIterationStartedPayload, a as LoopLineageOptions, m as LoopPlanDescription, n as LoopPlanPayload, c as LoopResult, b as LoopSandboxClient, g as LoopSandboxPlacement, o as LoopStartedPayload, p as LoopTeardownFailedPayload, d as LoopTokenUsage, f as LoopTraceEmitter, q as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, r as ValidationCtx, V as Validator } from './types-DdzkffAm.js';
|
|
8
7
|
export { DefaultVerdict } from '@tangle-network/agent-eval';
|
|
9
|
-
import
|
|
10
|
-
import
|
|
11
|
-
export { c as createSandboxForSpec, r as runLoop } from './run-loop-C4L1Sted.js';
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* @experimental
|
|
15
|
-
*
|
|
16
|
-
* FanoutVote driver — N parallel attempts in iteration 0, pick the highest-
|
|
17
|
-
* scoring valid output. No second iteration: the topology is "spawn N, score,
|
|
18
|
-
* pick winner". The kernel handles heterogeneous fanout via the
|
|
19
|
-
* `agentRuns: AgentRunSpec[]` form on `runLoop`.
|
|
20
|
-
*/
|
|
21
|
-
|
|
22
|
-
type FanoutVoteDecision = 'pick-winner' | 'fail';
|
|
23
|
-
/** @experimental */
|
|
24
|
-
interface FanoutVoteScored<Task, Output> {
|
|
25
|
-
task: Task;
|
|
26
|
-
output: Output;
|
|
27
|
-
verdict?: DefaultVerdict;
|
|
28
|
-
iterationIndex: number;
|
|
29
|
-
agentRunName: string;
|
|
30
|
-
}
|
|
31
|
-
/** @experimental */
|
|
32
|
-
interface CreateFanoutVoteDriverOptions<Task, Output> {
|
|
33
|
-
/** Number of parallel attempts. Must be >= 1. */
|
|
34
|
-
n: number;
|
|
35
|
-
/**
|
|
36
|
-
* Pick the winner from the scored set. Default: highest `verdict.score`
|
|
37
|
-
* among valid outputs (ties broken by smallest iteration index). When
|
|
38
|
-
* no valid outputs exist, returns `undefined` and `decide()` resolves
|
|
39
|
-
* to `'fail'`. The kernel still records winners structurally — this
|
|
40
|
-
* selector only feeds `decide()`'s pass/fail signal.
|
|
41
|
-
*/
|
|
42
|
-
selector?: (scored: FanoutVoteScored<Task, Output>[]) => FanoutVoteScored<Task, Output> | undefined;
|
|
43
|
-
/** Stable identifier surfaced in trace events. Default `'fanout-vote'`. */
|
|
44
|
-
name?: string;
|
|
45
|
-
}
|
|
46
|
-
/** @experimental */
|
|
47
|
-
declare function createFanoutVoteDriver<Task, Output>(options: CreateFanoutVoteDriverOptions<Task, Output>): Driver<Task, Output, FanoutVoteDecision>;
|
|
48
|
-
/**
|
|
49
|
-
* Test helper: surface the per-iteration scored view a custom `selector`
|
|
50
|
-
* would receive. Exposed so consumers writing a custom selector can test it
|
|
51
|
-
* standalone without driving the full kernel.
|
|
52
|
-
*
|
|
53
|
-
* @experimental
|
|
54
|
-
*/
|
|
55
|
-
declare function scoreFanoutVoteIterations<Task, Output>(iterations: ReadonlyArray<Iteration<Task, Output>>): FanoutVoteScored<Task, Output>[];
|
|
56
|
-
|
|
57
|
-
/**
|
|
58
|
-
* @experimental
|
|
59
|
-
*
|
|
60
|
-
* Named driver policies — a registry, not a constructor zoo.
|
|
61
|
-
*
|
|
62
|
-
* A "driver variant" is just a `TopologyPlanner` chosen by name and run by the
|
|
63
|
-
* one interpreter (`createDynamicDriver`). The agentic variants are sandboxed
|
|
64
|
-
* agents (`createSandboxPlanner`) — an LLM/agent in a box that emits the next
|
|
65
|
-
* move; this registry holds the *deterministic* ones a benchmark needs as
|
|
66
|
-
* controls. Today that's `blind`: a single attempt, no steering — the baseline
|
|
67
|
-
* you measure a real driver against.
|
|
68
|
-
*
|
|
69
|
-
* Adding a variant is a line here (or a sandboxed planner registered by name),
|
|
70
|
-
* never a new `createXDriver` factory and never a spec schema.
|
|
71
|
-
*/
|
|
72
|
-
|
|
73
|
-
/** A driver policy over prompt-shaped (string) tasks. Output is consulted only
|
|
74
|
-
* through the iteration's verdict, so it stays `unknown`. */
|
|
75
|
-
type PromptPlanner = TopologyPlanner<string, unknown>;
|
|
76
|
-
/**
|
|
77
|
-
* `blind` — one attempt, then stop. The no-driver control: a single worker run
|
|
78
|
-
* with no steering, so a benchmark can isolate what a real driver adds.
|
|
79
|
-
*/
|
|
80
|
-
declare const blind: PromptPlanner;
|
|
81
|
-
/** The registry. Pick a driver by name (e.g. `DRIVER=blind`); fail loud on an
|
|
82
|
-
* unknown key. Sandboxed-agent planners can be registered here too. */
|
|
83
|
-
declare const PROMPT_PLANNERS: Record<string, PromptPlanner>;
|
|
84
|
-
/** Resolve a planner by name; fail loud on an unknown variant. */
|
|
85
|
-
declare function resolvePlanner(name: string): PromptPlanner;
|
|
86
|
-
|
|
87
|
-
/**
|
|
88
|
-
* @experimental
|
|
89
|
-
*
|
|
90
|
-
* Refine driver — single task per iteration, validator-gated.
|
|
91
|
-
*
|
|
92
|
-
* `plan` returns `[task]` (possibly transformed via `refineTask`) until the
|
|
93
|
-
* prior verdict is valid OR the local cap is hit, then `[]`.
|
|
94
|
-
* `decide` returns `'stop'` once the latest verdict is valid OR the cap is
|
|
95
|
-
* reached. The kernel's `maxIterations` is an orthogonal safety cap;
|
|
96
|
-
* whichever is lower wins.
|
|
97
|
-
*/
|
|
98
|
-
|
|
99
|
-
type RefineDecision = 'continue' | 'stop';
|
|
100
|
-
/** @experimental */
|
|
101
|
-
interface CreateRefineDriverOptions<Task> {
|
|
102
|
-
/** Hard cap on iterations. Default 5. */
|
|
103
|
-
maxIterations?: number;
|
|
104
|
-
/**
|
|
105
|
-
* Optional task transform applied each round based on the prior verdict.
|
|
106
|
-
* When omitted, the same task is replayed and the agent is expected to
|
|
107
|
-
* inspect the sandbox session state for prior attempts.
|
|
108
|
-
*/
|
|
109
|
-
refineTask?: (task: Task, prior: DefaultVerdict) => Task;
|
|
110
|
-
/** Stable identifier surfaced in trace events. Default `'refine'`. */
|
|
111
|
-
name?: string;
|
|
112
|
-
}
|
|
113
|
-
/** @experimental */
|
|
114
|
-
declare function createRefineDriver<Task, Output>(options?: CreateRefineDriverOptions<Task>): Driver<Task, Output, RefineDecision>;
|
|
115
|
-
/**
|
|
116
|
-
* Test helper: select the last-valid iteration (or the last attempt if
|
|
117
|
-
* none passed). Mirrors the kernel's default selector ordering for refine
|
|
118
|
-
* topologies — the most recent successful attempt wins.
|
|
119
|
-
*
|
|
120
|
-
* @experimental
|
|
121
|
-
*/
|
|
122
|
-
declare function refineWinnerIndex<Task, Output>(iterations: ReadonlyArray<Iteration<Task, Output>>): number | undefined;
|
|
123
|
-
|
|
124
|
-
/**
|
|
125
|
-
* @experimental
|
|
126
|
-
*
|
|
127
|
-
* `createSandboxPlanner` — wire the dynamic driver's `TopologyPlanner` to a
|
|
128
|
-
* real agent. Each round it spins a sandbox on `profile`, streams a prompt that
|
|
129
|
-
* carries the history summary, and decodes the agent's chosen `TopologyMove`
|
|
130
|
-
* from a JSON envelope it emits. This is the "agent authors its own loop
|
|
131
|
-
* topology" path: the planner profile can be any harness (claude-code, codex,
|
|
132
|
-
* opencode, pi) — its only job is to read what happened and emit the next move.
|
|
133
|
-
*
|
|
134
|
-
* The planner profile is deliberately distinct from the worker `agentRuns`: a
|
|
135
|
-
* cheap fast model can steer topology while expensive workers do the labor, and
|
|
136
|
-
* the planner never names which harness runs a branch — the kernel's
|
|
137
|
-
* `agentRuns` round-robin decides that.
|
|
138
|
-
*
|
|
139
|
-
* Three execution modes, all the same code path:
|
|
140
|
-
* - LLM call — a cheap-model `profile`; one prompt → one move.
|
|
141
|
-
* - different sandbox (default) — a fresh planner-owned box per round.
|
|
142
|
-
* - same sandbox — pass `reuseBox` to stream the move into the worker's
|
|
143
|
-
* own box (a session against its live filesystem/state),
|
|
144
|
-
* so the driver steers from what the worker actually did.
|
|
145
|
-
*
|
|
146
|
-
* Envelope contract the agent must emit (fenced ```json or a structured
|
|
147
|
-
* `result`/`final` event payload):
|
|
148
|
-
* { "kind": "refine" | "fanout" | "stop",
|
|
149
|
-
* "tasks"?: [ <task>, ... ], // decoded via `decodeTask`
|
|
150
|
-
* "n"?: number, // fanout shorthand: N copies of the root task
|
|
151
|
-
* "rationale"?: string }
|
|
152
|
-
*
|
|
153
|
-
* A missing / unparseable / unknown-kind envelope throws `PlannerError` — the
|
|
154
|
-
* loop never silently runs a topology the agent did not choose.
|
|
155
|
-
*/
|
|
156
|
-
|
|
157
|
-
/** Raw, pre-decode envelope an agent emits to choose the next move. */
|
|
158
|
-
interface TopologyMoveEnvelope {
|
|
159
|
-
kind: string;
|
|
160
|
-
tasks?: unknown[];
|
|
161
|
-
n?: number;
|
|
162
|
-
rationale?: string;
|
|
163
|
-
}
|
|
164
|
-
/** @experimental */
|
|
165
|
-
interface CreateSandboxPlannerOptions<Task, Output> {
|
|
166
|
-
/** Sandbox client — the planner calls `.create()` once per round. */
|
|
167
|
-
client: LoopSandboxClient;
|
|
168
|
-
/** The planner agent. Steers topology; does not run the work. */
|
|
169
|
-
profile: AgentProfile;
|
|
170
|
-
/**
|
|
171
|
-
* Decode one raw task from the envelope's `tasks[]` into a domain `Task`.
|
|
172
|
-
* Required because `Task` is opaque to this module — only the caller knows
|
|
173
|
-
* its shape. Throw to reject a malformed task; the error surfaces as a
|
|
174
|
-
* `PlannerError`.
|
|
175
|
-
*/
|
|
176
|
-
decodeTask: (raw: unknown, ctx: PlannerContext<Task, Output>) => Task;
|
|
177
|
-
/** Override the default prompt (history summary + envelope contract). */
|
|
178
|
-
buildPrompt?: (ctx: PlannerContext<Task, Output>) => string | Promise<string>;
|
|
179
|
-
/** Override envelope extraction from the event stream. */
|
|
180
|
-
parseEnvelope?: (events: SandboxEvent[]) => TopologyMoveEnvelope | undefined;
|
|
181
|
-
/** Sandbox overrides for the planner sandbox (timeouts, env, etc.). */
|
|
182
|
-
sandboxOverrides?: AgentRunSpec<Task>['sandboxOverrides'];
|
|
183
|
-
/** Cancellation for the planner's own LLM call. */
|
|
184
|
-
signal?: AbortSignal;
|
|
185
|
-
/**
|
|
186
|
-
* Same-sandbox mode. Return an existing box and the planner streams its move
|
|
187
|
-
* INTO that box (a session against the worker's environment) instead of
|
|
188
|
-
* spinning its own — so the driver can inspect the worker's real filesystem
|
|
189
|
-
* and state, not just the history summary. The returned box's lifecycle is
|
|
190
|
-
* the CALLER's: the planner neither creates nor deletes it. Return
|
|
191
|
-
* `undefined` to fall back to the default (a fresh, planner-owned box =
|
|
192
|
-
* different-sandbox mode). Omit entirely for the default.
|
|
193
|
-
*/
|
|
194
|
-
reuseBox?: () => SandboxInstance | undefined | Promise<SandboxInstance | undefined>;
|
|
195
|
-
}
|
|
196
|
-
/** @experimental */
|
|
197
|
-
declare function createSandboxPlanner<Task, Output>(opts: CreateSandboxPlannerOptions<Task, Output>): TopologyPlanner<Task, Output>;
|
|
198
|
-
|
|
199
|
-
/**
|
|
200
|
-
* `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
|
|
201
|
-
*
|
|
202
|
-
* Without this adapter a consumer wiring `runLoop` into `runProfileMatrix` /
|
|
203
|
-
* `runCampaign` has to, by hand, every time: (a) build an `ExecCtx` with a
|
|
204
|
-
* sandbox client, (b) adapt the campaign `DispatchContext.trace` into a
|
|
205
|
-
* `LoopTraceEmitter` (or lose all loop trace correlation), and (c) remember to
|
|
206
|
-
* forward the loop's cost + tokens via `ctx.cost` (forgetting it yields a
|
|
207
|
-
* `{0,0}` cell the backend-integrity guard reads as a stub). Three foot-guns,
|
|
208
|
-
* the third silent. The fleet's products skipped (c) and fell back to a
|
|
209
|
-
* `workerRecords[]` side-channel — the exact anti-pattern the substrate exists
|
|
210
|
-
* to kill.
|
|
211
|
-
*
|
|
212
|
-
* `loopDispatch` collapses all three into one typed call:
|
|
213
|
-
*
|
|
214
|
-
* const dispatch = loopDispatch({
|
|
215
|
-
* sandboxClient,
|
|
216
|
-
* toLoopOptions: (scenario, profile) => ({ driver, agentRun, output, validator, task }),
|
|
217
|
-
* })
|
|
218
|
-
* await runProfileMatrix({ profiles, scenarios, dispatch, judges, commitSha })
|
|
219
|
-
*
|
|
220
|
-
* Usage is reported automatically; trace events are forwarded automatically;
|
|
221
|
-
* the ctx is built automatically. The seam becomes impossible to mis-wire.
|
|
222
|
-
*
|
|
223
|
-
* Typed structurally against the campaign `DispatchContext` (imported type-only
|
|
224
|
-
* from `@tangle-network/agent-eval/campaign`) — a downward dependency, never an
|
|
225
|
-
* inversion.
|
|
226
|
-
*/
|
|
227
|
-
|
|
228
|
-
/** runLoop options minus the `ctx` (loopDispatch builds the ctx). */
|
|
229
|
-
type LoopOptionsForDispatch<Task, Output, Decision> = Omit<RunLoopOptions<Task, Output, Decision>, 'ctx'>;
|
|
230
|
-
interface LoopDispatchOptions<Task, Output, Decision, TScenario extends Scenario, TArtifact> {
|
|
231
|
-
/** Sandbox client used for every cell's `runLoop`. Supplied once. */
|
|
232
|
-
sandboxClient: LoopSandboxClient;
|
|
233
|
-
/** Build the per-cell runLoop options from the scenario (+ profile, when
|
|
234
|
-
* used with `runProfileMatrix`). */
|
|
235
|
-
toLoopOptions: (scenario: TScenario, profile: AgentProfile$1) => LoopOptionsForDispatch<Task, Output, Decision>;
|
|
236
|
-
/** Map the finished loop to the artifact the judges score. Default:
|
|
237
|
-
* `result.winner?.output`. A loop with no winner yields `undefined` (judges
|
|
238
|
-
* skip the cell) — but the loop's token usage is STILL reported, so the
|
|
239
|
-
* integrity guard sees real activity. */
|
|
240
|
-
toArtifact?: (result: LoopResult<Task, Output, Decision>) => TArtifact;
|
|
241
|
-
/** Forward `loop.*` trace events into the campaign's scoped trace so loop
|
|
242
|
-
* spans correlate with the cell. Default true. */
|
|
243
|
-
forwardTrace?: boolean;
|
|
244
|
-
/** Cost-meter source label for the loop's spend. Default `'loop'`. */
|
|
245
|
-
costSource?: string;
|
|
246
|
-
}
|
|
247
|
-
/**
|
|
248
|
-
* Adapter for `runProfileMatrix` (profile is an axis). Returns a
|
|
249
|
-
* `ProfileDispatchFn` that runs `runLoop` per (profile, scenario) cell and
|
|
250
|
-
* reports usage automatically.
|
|
251
|
-
*/
|
|
252
|
-
declare function loopDispatch<Task, Output, Decision, TScenario extends Scenario, TArtifact>(opts: LoopDispatchOptions<Task, Output, Decision, TScenario, TArtifact>): ProfileDispatchFn<TScenario, TArtifact>;
|
|
253
|
-
/**
|
|
254
|
-
* Adapter for `runCampaign` (no profile axis). `toLoopOptions` receives only
|
|
255
|
-
* the scenario; the `profile` passed to the shared core is a stable sentinel
|
|
256
|
-
* so a single `runLoop` config is reused across cells.
|
|
257
|
-
*/
|
|
258
|
-
declare function loopCampaignDispatch<Task, Output, Decision, TScenario extends Scenario, TArtifact>(opts: Omit<LoopDispatchOptions<Task, Output, Decision, TScenario, TArtifact>, 'toLoopOptions'> & {
|
|
259
|
-
toLoopOptions: (scenario: TScenario) => LoopOptionsForDispatch<Task, Output, Decision>;
|
|
260
|
-
}): DispatchFn<TScenario, TArtifact>;
|
|
261
|
-
|
|
262
|
-
/**
|
|
263
|
-
* Bridge a finished `runLoop` into an agent-eval campaign / profile-matrix
|
|
264
|
-
* dispatch.
|
|
265
|
-
*
|
|
266
|
-
* `runProfileMatrix` (and `runCampaign`) run the backend-integrity guard over
|
|
267
|
-
* the token usage a dispatch reports through `ctx.cost`. A dispatch that wraps
|
|
268
|
-
* `runLoop` must forward the loop's cost AND token usage, or the guard reads
|
|
269
|
-
* the run as a stub and throws. `reportLoopUsage` is that one line:
|
|
270
|
-
*
|
|
271
|
-
* const dispatch: ProfileDispatchFn<S, A> = async (profile, scenario, ctx) => {
|
|
272
|
-
* const result = await runLoop({ ...optsFor(profile, scenario), ctx: loopCtx })
|
|
273
|
-
* reportLoopUsage(ctx, result)
|
|
274
|
-
* return result.winner?.output as A
|
|
275
|
-
* }
|
|
276
|
-
*
|
|
277
|
-
* Typed structurally against the campaign `DispatchContext.cost` so this module
|
|
278
|
-
* stays free of an agent-eval import — it works with any cost meter exposing
|
|
279
|
-
* `observe` + `observeTokens`.
|
|
280
|
-
*/
|
|
281
|
-
|
|
282
|
-
/** The slice of an agent-eval campaign `DispatchContext.cost` this needs. */
|
|
283
|
-
interface UsageSink {
|
|
284
|
-
observe(amountUsd: number, source: string): void;
|
|
285
|
-
observeTokens(usage: {
|
|
286
|
-
input: number;
|
|
287
|
-
output: number;
|
|
288
|
-
}): void;
|
|
289
|
-
}
|
|
290
|
-
/**
|
|
291
|
-
* Forward a `LoopResult`'s aggregated cost + token usage into a campaign cost
|
|
292
|
-
* meter so the backend-integrity guard sees real LLM activity. `source`
|
|
293
|
-
* defaults to `'loop'`.
|
|
294
|
-
*/
|
|
295
|
-
declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
|
|
296
|
-
|
|
297
|
-
/**
|
|
298
|
-
* @experimental
|
|
299
|
-
*
|
|
300
|
-
* `acquireSandbox` — cold-start-resilient sandbox acquisition. Eliminates the
|
|
301
|
-
* "create timed out at the proxy" failure mode conceptually by DECOUPLING "the
|
|
302
|
-
* create HTTP call returned" from "the sandbox is ready":
|
|
303
|
-
*
|
|
304
|
-
* - Create is initiated with a known `name`.
|
|
305
|
-
* - Readiness is observed from the sandbox's own `status` (`refresh()` polls
|
|
306
|
-
* true state), NOT from whether the create call returned in time.
|
|
307
|
-
* - If the create call itself times out at a gateway (502/503/504/522/524 or
|
|
308
|
-
* a transport timeout), provisioning is still running server-side — so we
|
|
309
|
-
* find the named sandbox via `list()` and wait for it to reach `running`.
|
|
310
|
-
*
|
|
311
|
-
* Result: a scale-from-zero cold start (node boot + host-agent registration,
|
|
312
|
-
* minutes) can no longer surface as a create failure behind a ~100s proxy
|
|
313
|
-
* limit. The loop becomes indifferent to whether the host pool is warm or cold.
|
|
314
|
-
*
|
|
315
|
-
* Backward-compatible: an instance that reports no `status` (the minimal fakes
|
|
316
|
-
* the loop tests use) is treated as ready — only an explicit `pending`/
|
|
317
|
-
* `provisioning` status triggers waiting, and only a retryable THROW triggers
|
|
318
|
-
* the find-by-name path. Real errors (auth, validation, budget) fail loud.
|
|
319
|
-
*/
|
|
320
|
-
|
|
321
|
-
/** @experimental */
|
|
322
|
-
interface AcquireOptions {
|
|
323
|
-
/**
|
|
324
|
-
* Total budget for the sandbox to reach `running`, covering on-demand node
|
|
325
|
-
* cold-start. Default 600_000ms — matches the orchestrator's pending-host
|
|
326
|
-
* registration window so we never give up before the platform itself would.
|
|
327
|
-
*/
|
|
328
|
-
readyTimeoutMs?: number;
|
|
329
|
-
/** Poll interval while waiting for `running` / for the named sandbox to appear. */
|
|
330
|
-
pollIntervalMs?: number;
|
|
331
|
-
/** Cancellation (user abort). Distinct from create-call timeouts. */
|
|
332
|
-
signal?: AbortSignal;
|
|
333
|
-
/** Stamp a name so a timed-out create is recoverable by lookup. Auto-generated if absent. */
|
|
334
|
-
name?: string;
|
|
335
|
-
/** Clock override for deterministic tests. */
|
|
336
|
-
now?: () => number;
|
|
337
|
-
/** Sleep override for deterministic tests. */
|
|
338
|
-
sleep?: (ms: number) => Promise<void>;
|
|
339
|
-
}
|
|
340
|
-
/** @experimental */
|
|
341
|
-
declare function acquireSandbox(client: LoopSandboxClient, options: CreateSandboxOptions, acquire?: AcquireOptions): Promise<SandboxInstance>;
|
|
342
|
-
|
|
343
|
-
/**
|
|
344
|
-
* Sandbox-event → runtime-event mapping.
|
|
345
|
-
*
|
|
346
|
-
* The sandbox SDK emits a polymorphic `SandboxEvent = { type, data, id? }`
|
|
347
|
-
* whose `type` vocabulary is backend-determined (opencode, etc.) rather than
|
|
348
|
-
* enumerated by the SDK. Two consumers project it:
|
|
349
|
-
* - the loop kernel's cost ledger (`extractLlmCallEvent`) — sums usage off
|
|
350
|
-
* every cost-bearing event, regardless of stream shape;
|
|
351
|
-
* - the `AgentRuntime.act` streaming contract (`mapSandboxEvent`) — projects
|
|
352
|
-
* incremental events to the `RuntimeStreamEvent` chat-UX vocabulary.
|
|
353
|
-
*
|
|
354
|
-
* Both live here so the empirically-observed `type` vocabulary has one home.
|
|
355
|
-
*/
|
|
356
|
-
|
|
357
|
-
/**
|
|
358
|
-
* Extract a `RuntimeStreamEvent`-shaped `llm_call` from a sandbox event when
|
|
359
|
-
* the event carries usage/cost data. Returns `undefined` for non-cost events
|
|
360
|
-
* so the kernel can iterate the full stream without branching.
|
|
361
|
-
*
|
|
362
|
-
* Canonical cost-carrying types observed in the wild:
|
|
363
|
-
* - `llm_call` — `data: { model, tokensIn, tokensOut, costUsd, ... }`
|
|
364
|
-
* - `message.completed` / `result` — `data: { usage: { inputTokens,
|
|
365
|
-
* outputTokens, totalCostUsd? } }`
|
|
366
|
-
* - `cost.usage` / `usage` — same shape under a dedicated type
|
|
367
|
-
*
|
|
368
|
-
* Numeric coercion is strict: `Number.isFinite` gates every accumulator write
|
|
369
|
-
* so a sentinel `NaN` from a misbehaving backend cannot poison the ledger.
|
|
370
|
-
*/
|
|
371
|
-
declare function extractLlmCallEvent(event: SandboxEvent, agentRunName: string): (RuntimeStreamEvent & {
|
|
372
|
-
type: 'llm_call';
|
|
373
|
-
}) | undefined;
|
|
374
|
-
/**
|
|
375
|
-
* Project one `SandboxEvent` onto the `RuntimeStreamEvent` chat-UX vocabulary,
|
|
376
|
-
* for runtimes that bridge a sandbox `streamPrompt` into the
|
|
377
|
-
* `AgentRuntime.act` streaming contract. Returns `undefined` for events that
|
|
378
|
-
* have no faithful projection — the raw stream is preserved separately for the
|
|
379
|
-
* `OutputAdapter`, so an unmapped event never loses data.
|
|
380
|
-
*
|
|
381
|
-
* Mapped (the task-optional incremental variants — no synthesized task
|
|
382
|
-
* lifecycle, no guessed tool-part shapes):
|
|
383
|
-
* - `message.part.updated` text part → `text_delta`
|
|
384
|
-
* - `message.part.updated` reasoning/thinking part → `reasoning_delta`
|
|
385
|
-
* - cost-bearing events → `llm_call` (shared with the ledger extractor)
|
|
386
|
-
*
|
|
387
|
-
* The opencode backend emits incremental text as
|
|
388
|
-
* `{ type: 'message.part.updated', data: { part: { type, text }, delta } }`;
|
|
389
|
-
* `delta` is the increment, `part.text` the running accumulation.
|
|
390
|
-
*/
|
|
391
|
-
declare function mapSandboxEvent(event: SandboxEvent, opts?: {
|
|
392
|
-
agentRunName?: string;
|
|
393
|
-
}): RuntimeStreamEvent | undefined;
|
|
394
|
-
|
|
395
|
-
export { type AcquireOptions, AgentRunSpec, type CreateFanoutVoteDriverOptions, type CreateRefineDriverOptions, type CreateSandboxPlannerOptions, Driver, type FanoutVoteDecision, type FanoutVoteScored, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, PROMPT_PLANNERS, PlannerContext, type PromptPlanner, type RefineDecision, RunLoopOptions, type TopologyMoveEnvelope, TopologyPlanner, type UsageSink, acquireSandbox, blind, createFanoutVoteDriver, createRefineDriver, createSandboxPlanner, extractLlmCallEvent, loopCampaignDispatch, loopDispatch, mapSandboxEvent, refineWinnerIndex, reportLoopUsage, resolvePlanner, scoreFanoutVoteIterations };
|
|
8
|
+
import '@tangle-network/agent-eval/campaign';
|
|
9
|
+
import './runtime-hooks-C7JwKb9E.js';
|