@tangle-network/agent-runtime 0.45.0 → 0.47.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/agent.d.ts +5 -5
- package/dist/agent.js +2 -2
- package/dist/agent.js.map +1 -1
- package/dist/analyst-loop.d.ts +5 -40
- package/dist/analyst-loop.js +2 -4
- package/dist/{chunk-IJ6FGOPO.js → chunk-5YDS7BLC.js} +12 -7
- package/dist/chunk-5YDS7BLC.js.map +1 -0
- package/dist/{chunk-KEWO4KI6.js → chunk-72JQCHOZ.js} +850 -131
- package/dist/chunk-72JQCHOZ.js.map +1 -0
- package/dist/{chunk-PRX45WE2.js → chunk-GSUO5QS6.js} +1 -119
- package/dist/chunk-GSUO5QS6.js.map +1 -0
- package/dist/{chunk-FK53TXOP.js → chunk-HNUXAZIJ.js} +4 -27
- package/dist/chunk-HNUXAZIJ.js.map +1 -0
- package/dist/{chunk-IJGS6J7X.js → chunk-JNPK46YH.js} +2 -2
- package/dist/{chunk-QR4UUC5P.js → chunk-KADIJAD4.js} +33 -19
- package/dist/chunk-KADIJAD4.js.map +1 -0
- package/dist/{chunk-NYN5RTLP.js → chunk-MGFEUYOH.js} +7 -7
- package/dist/chunk-MGFEUYOH.js.map +1 -0
- package/dist/{chunk-Z2QXVBA6.js → chunk-T4OQQEE3.js} +4 -4
- package/dist/chunk-T4OQQEE3.js.map +1 -0
- package/dist/{chunk-KSMX62JF.js → chunk-VR4JIC5H.js} +2 -2
- package/dist/{coder-CczgMqFx.d.ts → coder-CVZNGbyg.d.ts} +1 -1
- package/dist/{dynamic-BvllHV6M.d.ts → driver-DYU2sgHr.d.ts} +6 -6
- package/dist/{improvement-adapter-CWegd3vw.d.ts → improvement-adapter-BC4HhuAR.d.ts} +1 -1
- package/dist/improvement.d.ts +2 -2
- package/dist/index.d.ts +8 -8
- package/dist/index.js +8 -8
- package/dist/{kb-gate-D9GBocLN.d.ts → kb-gate-51BlLlVM.d.ts} +13 -7
- package/dist/{loop-runner-bin-CPrCoKqC.d.ts → loop-runner-bin-DEm4roYF.d.ts} +11 -11
- package/dist/loop-runner-bin.d.ts +6 -6
- package/dist/loop-runner-bin.js +6 -6
- package/dist/loops.d.ts +5 -5
- package/dist/loops.js +18 -10
- package/dist/mcp/bin.js +6 -6
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +75 -74
- package/dist/mcp/index.js +203 -31
- package/dist/mcp/index.js.map +1 -1
- package/dist/{otel-export-Dy2DyUCU.d.ts → otel-export-EzfsVUhh.d.ts} +1 -1
- package/dist/profiles.d.ts +8 -8
- package/dist/profiles.js +1 -1
- package/dist/profiles.js.map +1 -1
- package/dist/{run-loop--hSoIknW.d.ts → run-loop-DvD4aGiE.d.ts} +2 -2
- package/dist/runtime.d.ts +244 -57
- package/dist/runtime.js +18 -10
- package/dist/{types-1HbsFa7H.d.ts → types-Cbx3dNK5.d.ts} +23 -23
- package/dist/{types-DdzkffAm.d.ts → types-nBMuollC.d.ts} +34 -5
- package/dist/{types-BtRLF2U3.d.ts → types-p8dWBIXL.d.ts} +1 -1
- package/dist/workflow.d.ts +3 -3
- package/dist/workflow.js +2 -2
- package/dist/workflow.js.map +1 -1
- package/package.json +1 -1
- package/skills/agent-runtime-adoption/SKILL.md +3 -3
- package/skills/generate-eval/SKILL.md +60 -0
- package/skills/loop-writer/SKILL.md +163 -0
- package/dist/chunk-FK53TXOP.js.map +0 -1
- package/dist/chunk-IJ6FGOPO.js.map +0 -1
- package/dist/chunk-KEWO4KI6.js.map +0 -1
- package/dist/chunk-NYN5RTLP.js.map +0 -1
- package/dist/chunk-PRX45WE2.js.map +0 -1
- package/dist/chunk-QR4UUC5P.js.map +0 -1
- package/dist/chunk-Z2QXVBA6.js.map +0 -1
- /package/dist/{chunk-IJGS6J7X.js.map → chunk-JNPK46YH.js.map} +0 -0
- /package/dist/{chunk-KSMX62JF.js.map → chunk-VR4JIC5H.js.map} +0 -0
package/dist/runtime.d.ts
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import { AgentProfile as AgentProfile$1, BackendType, CreateSandboxOptions, SandboxInstance, SandboxEvent } from '@tangle-network/sandbox';
|
|
2
2
|
export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
|
|
3
|
-
import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, d as AgentSpec,
|
|
4
|
-
export {
|
|
5
|
-
export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as
|
|
6
|
-
import {
|
|
3
|
+
import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, E as ExecutorFactory, d as AgentSpec, e as ExecutorRegistry, B as Budget, A as Agent, f as RootHandle, g as SupervisedResult, h as Spend, S as Scope, U as UsageEvent, i as Supervisor } from './types-Cbx3dNK5.js';
|
|
4
|
+
export { j as Executor, k as ExecutorContext, l as ExecutorResult, H as Handle, m as NodeSnapshot, n as NodeStatus, o as Restart, p as RootSignal, q as Runtime, r as SpawnOpts, s as SupervisorOpts, W as WidenGate } from './types-Cbx3dNK5.js';
|
|
5
|
+
export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDriverOptions, D as DriverDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './driver-DYU2sgHr.js';
|
|
6
|
+
import { S as SandboxClient, b as LoopResult, c as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-nBMuollC.js';
|
|
7
|
+
export { D as Driver, C as LoopDecisionPayload, F as LoopEndedPayload, G as LoopIterationDispatchPayload, H as LoopIterationEndedPayload, J as LoopIterationStartedPayload, a as LoopLineageOptions, M as LoopPlanDescription, N as LoopPlanPayload, f as LoopSandboxPlacement, P as LoopStartedPayload, Q as LoopTeardownFailedPayload, e as LoopTraceEmitter, T as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, U as ValidationCtx, V as Validator } from './types-nBMuollC.js';
|
|
8
|
+
import { AgentProfile, AnalystFinding, DefaultVerdict, ChatClient } from '@tangle-network/agent-eval';
|
|
7
9
|
export { DefaultVerdict } from '@tangle-network/agent-eval';
|
|
8
10
|
import { Scenario, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
|
|
9
|
-
import { R as RunLoopOptions } from './run-loop
|
|
10
|
-
export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop
|
|
11
|
-
import { b as LoopSandboxClient, c as LoopResult, d as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-DdzkffAm.js';
|
|
12
|
-
export { D as Driver, h as LoopDecisionPayload, i as LoopEndedPayload, j as LoopIterationDispatchPayload, k as LoopIterationEndedPayload, l as LoopIterationStartedPayload, a as LoopLineageOptions, m as LoopPlanDescription, n as LoopPlanPayload, g as LoopSandboxPlacement, o as LoopStartedPayload, p as LoopTeardownFailedPayload, f as LoopTraceEmitter, q as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, r as ValidationCtx, V as Validator } from './types-DdzkffAm.js';
|
|
11
|
+
import { R as RunLoopOptions } from './run-loop-DvD4aGiE.js';
|
|
12
|
+
export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop-DvD4aGiE.js';
|
|
13
13
|
import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
|
|
14
14
|
|
|
15
15
|
/**
|
|
@@ -113,6 +113,13 @@ declare function replaySpawnTree(journal: SpawnJournal, blobs: ResultBlobStore,
|
|
|
113
113
|
*/
|
|
114
114
|
declare function materializeTreeView(events: SpawnEvent[]): TreeView;
|
|
115
115
|
|
|
116
|
+
/**
|
|
117
|
+
* Adapt an `ExecutorFactory` into a `SandboxClient` for `runLoop`. The factory is
|
|
118
|
+
* instantiated fresh per `streamPrompt` (mirrors the per-spawn executor lifecycle):
|
|
119
|
+
* run once on the prompt, emit the terminal result event, tear down.
|
|
120
|
+
*/
|
|
121
|
+
declare function inlineSandboxClient(factory: ExecutorFactory<unknown>): SandboxClient;
|
|
122
|
+
|
|
116
123
|
/**
|
|
117
124
|
* `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
|
|
118
125
|
*
|
|
@@ -146,7 +153,7 @@ declare function materializeTreeView(events: SpawnEvent[]): TreeView;
|
|
|
146
153
|
type LoopOptionsForDispatch<Task, Output, Decision> = Omit<RunLoopOptions<Task, Output, Decision>, 'ctx'>;
|
|
147
154
|
interface LoopDispatchOptions<Task, Output, Decision, TScenario extends Scenario, TArtifact> {
|
|
148
155
|
/** Sandbox client used for every cell's `runLoop`. Supplied once. */
|
|
149
|
-
sandboxClient:
|
|
156
|
+
sandboxClient: SandboxClient;
|
|
150
157
|
/** Build the per-cell runLoop options from the scenario (+ profile, when
|
|
151
158
|
* used with `runProfileMatrix`). */
|
|
152
159
|
toLoopOptions: (scenario: TScenario, profile: AgentProfile) => LoopOptionsForDispatch<Task, Output, Decision>;
|
|
@@ -317,7 +324,7 @@ interface ShapeContext<D = unknown> {
|
|
|
317
324
|
* Wrap an `AgentSpec` into a leaf `Agent` carrying it as `executorSpec`, so the shape can
|
|
318
325
|
* `scope.spawn(spawnChild(spec), task, opts)`. `name` labels the child for traces. The
|
|
319
326
|
* returned agent's `act` is never invoked by the keystone (it is spawned, not run) — the
|
|
320
|
-
* spec drives the resolved `
|
|
327
|
+
* spec drives the resolved `Executor`; `act` exists only to satisfy the `Agent` shape.
|
|
321
328
|
*/
|
|
322
329
|
spawnChild(name: string, spec: AgentSpec): Agent<unknown, Outcome<D>>;
|
|
323
330
|
/** Derive a child `AgentSpec` from the persona's root spec with an overridden profile —
|
|
@@ -892,6 +899,61 @@ interface EqualKOnCostOptions {
|
|
|
892
899
|
/** `equalKOnCost(arms, opts)` — the cross-arm equal-compute check on conserved cost. */
|
|
893
900
|
type EqualKOnCost = (arms: ReadonlyArray<EqualKArm>, options?: EqualKOnCostOptions) => EqualKVerdict;
|
|
894
901
|
|
|
902
|
+
/**
|
|
903
|
+
* The third-person observer — the connective tissue that closes the loop.
|
|
904
|
+
*
|
|
905
|
+
* A driver spawns a worker; the worker can't see itself. `observe` reads the
|
|
906
|
+
* worker's TRACE (what it actually did — every tool call, cost, failure) and
|
|
907
|
+
* produces two streams:
|
|
908
|
+
* - `findings` / `report` — fed back DOWN (a steer for the next attempt) and
|
|
909
|
+
* OUT (the operator-facing "what I noticed + what to change").
|
|
910
|
+
* - `learned` — durable facts written to the cross-run `Corpus` so the NEXT
|
|
911
|
+
* run starts smarter (the continuous half of "continuous self-improvement").
|
|
912
|
+
*
|
|
913
|
+
* Findings are TRACE-derived, never JUDGE-derived (`derived_from_judge:false`):
|
|
914
|
+
* the observer reads behavior, never the acceptance verdict — the selector≠judge
|
|
915
|
+
* firewall (docs/learning-flywheel.md). The observer is harness-agnostic: it
|
|
916
|
+
* reads a trace + an output, so it watches opencode, codex, hermes, or a BYO
|
|
917
|
+
* agent identically.
|
|
918
|
+
*/
|
|
919
|
+
|
|
920
|
+
interface ObserveInput {
|
|
921
|
+
/** What the worker was asked to do. */
|
|
922
|
+
task: string;
|
|
923
|
+
/** What it produced (its final answer / artifact summary). */
|
|
924
|
+
output: string;
|
|
925
|
+
/** The worker's trace — any event array (sandbox events, tool-call records). */
|
|
926
|
+
trace: ReadonlyArray<unknown>;
|
|
927
|
+
/** Terminal status only (passed/failed/unknown) — NOT a judge score; the
|
|
928
|
+
* observer never reads the verdict, it reads behavior. */
|
|
929
|
+
outcome?: 'passed' | 'failed' | 'unknown';
|
|
930
|
+
/** Provenance back to the run. */
|
|
931
|
+
runId?: string;
|
|
932
|
+
}
|
|
933
|
+
interface ObserveOptions {
|
|
934
|
+
/** The model-call seam (agent-eval `createChatClient`: router / cli-bridge / …). */
|
|
935
|
+
chat: ChatClient;
|
|
936
|
+
model?: string;
|
|
937
|
+
/** When set, learned facts are appended (idempotent) for the next run to read. */
|
|
938
|
+
corpus?: Corpus;
|
|
939
|
+
/** Tags written onto learned facts + used by the next run's corpus query. */
|
|
940
|
+
tags?: ReadonlyArray<string>;
|
|
941
|
+
signal?: AbortSignal;
|
|
942
|
+
/** Cap the trace lines fed to the observer (keeps the call cheap). Default 80. */
|
|
943
|
+
maxTraceLines?: number;
|
|
944
|
+
}
|
|
945
|
+
interface Observation {
|
|
946
|
+
findings: AnalystFinding[];
|
|
947
|
+
/** Facts persisted to the corpus (empty when no corpus was supplied). */
|
|
948
|
+
learned: CorpusRecord[];
|
|
949
|
+
/** Operator-facing markdown: what the observer noticed + what to change. */
|
|
950
|
+
report: string;
|
|
951
|
+
}
|
|
952
|
+
declare function observe(input: ObserveInput, opts: ObserveOptions): Promise<Observation>;
|
|
953
|
+
/** Operator-facing report, split by who should act. The agent block is the
|
|
954
|
+
* steer; the operator block is the advice. */
|
|
955
|
+
declare function renderReport(findings: ReadonlyArray<AnalystFinding>): string;
|
|
956
|
+
|
|
895
957
|
/**
|
|
896
958
|
* @experimental
|
|
897
959
|
*
|
|
@@ -1299,7 +1361,7 @@ interface AcquireOptions {
|
|
|
1299
1361
|
sleep?: (ms: number) => Promise<void>;
|
|
1300
1362
|
}
|
|
1301
1363
|
/** @experimental */
|
|
1302
|
-
declare function acquireSandbox(client:
|
|
1364
|
+
declare function acquireSandbox(client: SandboxClient, options: CreateSandboxOptions, acquire?: AcquireOptions): Promise<SandboxInstance>;
|
|
1303
1365
|
|
|
1304
1366
|
/**
|
|
1305
1367
|
* @experimental
|
|
@@ -1340,9 +1402,9 @@ interface SandboxCapabilities {
|
|
|
1340
1402
|
*
|
|
1341
1403
|
* @experimental
|
|
1342
1404
|
*/
|
|
1343
|
-
declare function probeSandboxCapabilities(client:
|
|
1405
|
+
declare function probeSandboxCapabilities(client: SandboxClient): Promise<SandboxCapabilities>;
|
|
1344
1406
|
/**
|
|
1345
|
-
* Narrowed view of the optional CRIU probe. The loop-side `
|
|
1407
|
+
* Narrowed view of the optional CRIU probe. The loop-side `SandboxClient`
|
|
1346
1408
|
* does not require `criuStatus`; this widens it optionally so the probe can be
|
|
1347
1409
|
* read without importing sandbox-backend specifics. @experimental
|
|
1348
1410
|
*/
|
|
@@ -1514,12 +1576,13 @@ interface SandboxLineage {
|
|
|
1514
1576
|
*
|
|
1515
1577
|
* @experimental
|
|
1516
1578
|
*/
|
|
1517
|
-
declare function createSandboxLineage(client:
|
|
1579
|
+
declare function createSandboxLineage(client: SandboxClient, capabilities: SandboxCapabilities, options?: {
|
|
1518
1580
|
maxConcurrency?: number;
|
|
1581
|
+
streaming?: 'sse' | 'poll';
|
|
1519
1582
|
}): SandboxLineage;
|
|
1520
1583
|
/**
|
|
1521
1584
|
* Loop-side widening of the box's optional checkpoint method. The
|
|
1522
|
-
* `
|
|
1585
|
+
* `SandboxClient`/`SandboxInstance` surface the kernel relies on does not
|
|
1523
1586
|
* require checkpointing; this reads it optionally so the lineage can probe-gate
|
|
1524
1587
|
* without importing sandbox-backend specifics. @experimental
|
|
1525
1588
|
*/
|
|
@@ -1550,20 +1613,118 @@ interface SessionCapableBox {
|
|
|
1550
1613
|
};
|
|
1551
1614
|
}
|
|
1552
1615
|
|
|
1616
|
+
/**
|
|
1617
|
+
* `openSandboxRun` — the ONE harness-agnostic seam for running an agent in a
|
|
1618
|
+
* sandbox over a persistent artifact: run it, stream it, RESUME the same session
|
|
1619
|
+
* across turns. Domain-agnostic: a coding agent, a research agent, a tax/legal
|
|
1620
|
+
* agent — all flow through this; the domain lives only in the `Deliverable<Out>`
|
|
1621
|
+
* the caller supplies, never in a per-domain copy of this function.
|
|
1622
|
+
*
|
|
1623
|
+
* It is a thin facade (NOT a new layer) over code that already exists and is
|
|
1624
|
+
* already hardened:
|
|
1625
|
+
* - `acquireSandbox` — cold-start / 502-503-504 / gateway-timeout recovery,
|
|
1626
|
+
* - `buildBackendOptions` — the harness IS `backend.type` (opencode / codex /
|
|
1627
|
+
* claude-code / kimi-code / hermes / pi); the only "which agent" knob,
|
|
1628
|
+
* - `createSandboxLineage` — `start` mints a session; `resume` continues the
|
|
1629
|
+
* SAME server-side session with a fail-loud `assertSessionLive`.
|
|
1630
|
+
*
|
|
1631
|
+
* The one genuinely-new piece is {@link Deliverable}: it widens the pure
|
|
1632
|
+
* `OutputAdapter.parse(events)` to ALSO admit a post-turn read off the box FS —
|
|
1633
|
+
* the structural gap that made the bench gates hand-roll `box.fs.read`, because a
|
|
1634
|
+
* large produced file (a git diff, a generated document) truncates in the chat
|
|
1635
|
+
* stream and a pure events-parser cannot reach the workspace. Per the SDK, a
|
|
1636
|
+
* RELATIVE `deliverable.path` resolves from the workspace root and an ABSOLUTE one
|
|
1637
|
+
* (e.g. `/tmp/solution.patch`) reads the container filesystem directly — both are
|
|
1638
|
+
* valid; pick the one the agent actually wrote to. Avoid `..` traversal segments.
|
|
1639
|
+
*
|
|
1640
|
+
* What this deliberately does NOT do (so it stays a facade, not slop): no custom
|
|
1641
|
+
* reconnect/replay (the SDK + platform own per-session buffering + `Last-Event-ID`);
|
|
1642
|
+
* no fork verb (platform CRIU is probe-gated and currently absent — fork lives in
|
|
1643
|
+
* `SandboxLineage.fork` behind the capability probe, surfaced only if it returns).
|
|
1644
|
+
* It is also distinct from `runLoop`: `runLoop` is the multi-round, driver-driven
|
|
1645
|
+
* kernel (fresh box per round, events deliverable); this is a SINGLE rollout +
|
|
1646
|
+
* artifact-or-events deliverable + resume over ONE persistent box.
|
|
1647
|
+
*/
|
|
1648
|
+
|
|
1649
|
+
/**
|
|
1650
|
+
* @experimental
|
|
1651
|
+
* How a typed deliverable `Out` is materialized from a finished turn.
|
|
1652
|
+
* - `events` — pure parse over the event array (identical to `OutputAdapter`).
|
|
1653
|
+
* - `artifact` — read a file off the box AFTER the turn drains, then map it (+ the
|
|
1654
|
+
* events). For diffs/codebases/documents that don't fit the chat
|
|
1655
|
+
* stream. `path` relative ⇒ workspace root; absolute ⇒ container FS.
|
|
1656
|
+
*/
|
|
1657
|
+
type Deliverable<Out> = {
|
|
1658
|
+
kind: 'events';
|
|
1659
|
+
fromEvents: (events: SandboxEvent[]) => Out;
|
|
1660
|
+
} | {
|
|
1661
|
+
kind: 'artifact';
|
|
1662
|
+
path: string;
|
|
1663
|
+
fromArtifact: (raw: string, events: SandboxEvent[]) => Out;
|
|
1664
|
+
};
|
|
1665
|
+
/**
|
|
1666
|
+
* @experimental
|
|
1667
|
+
* One finished turn over the artifact. A failed FS read is surfaced in `readError`
|
|
1668
|
+
* (never masked as an empty deliverable) so a caller distinguishes "agent produced
|
|
1669
|
+
* nothing" from a transport/FS fault.
|
|
1670
|
+
*/
|
|
1671
|
+
interface TurnResult<Out> {
|
|
1672
|
+
out: Out;
|
|
1673
|
+
events: SandboxEvent[];
|
|
1674
|
+
readError?: string;
|
|
1675
|
+
}
|
|
1676
|
+
/** @experimental A live run over ONE persistent artifact (box + session). Close it
|
|
1677
|
+
* when done — `close()` tears the box down. */
|
|
1678
|
+
interface SandboxRun<Out> {
|
|
1679
|
+
readonly box: SandboxInstance;
|
|
1680
|
+
readonly sessionId: string;
|
|
1681
|
+
/** First turn over the fresh box (mints the session). Throws if already started. */
|
|
1682
|
+
start(prompt: string): Promise<TurnResult<Out>>;
|
|
1683
|
+
/** Continue THE SAME session over THE SAME artifact — a resumed turn/rollout. */
|
|
1684
|
+
resume(prompt: string): Promise<TurnResult<Out>>;
|
|
1685
|
+
close(): Promise<void>;
|
|
1686
|
+
}
|
|
1687
|
+
/** @experimental */
|
|
1688
|
+
interface OpenSandboxRunOptions {
|
|
1689
|
+
/** Profile + sandbox env/overrides. `sandboxOverrides.backend.type` is the harness. */
|
|
1690
|
+
agentRun: AgentRunSpec<string>;
|
|
1691
|
+
signal: AbortSignal;
|
|
1692
|
+
/** Optional execution-scoped observers. Hook failures never fail the run. */
|
|
1693
|
+
hooks?: RuntimeHooks;
|
|
1694
|
+
/** Stable run id for trace joins. Defaults to a short runtime-minted id. */
|
|
1695
|
+
runId?: string;
|
|
1696
|
+
/** Optional benchmark/scenario id carried into emitted hook events. */
|
|
1697
|
+
scenarioId?: string;
|
|
1698
|
+
/** Test seam for deterministic hook timestamps. Defaults to `Date.now`. */
|
|
1699
|
+
now?: () => number;
|
|
1700
|
+
/** Bounds box-creation bursts inside lineage fanout. Default from lineage. */
|
|
1701
|
+
maxConcurrency?: number;
|
|
1702
|
+
/** Base backoff (ms) for retrying a transient artifact `fs.read` failure; the i-th
|
|
1703
|
+
* retry waits `readRetryDelayMs * i`. Default 1000. Set 0 to disable the wait (tests). */
|
|
1704
|
+
readRetryDelayMs?: number;
|
|
1705
|
+
}
|
|
1706
|
+
/**
|
|
1707
|
+
* @experimental
|
|
1708
|
+
* Open a sandbox run. Harness-agnostic: the harness lives in
|
|
1709
|
+
* `options.agentRun.sandboxOverrides.backend.type`, so opencode/codex/claude-code/
|
|
1710
|
+
* kimi-code all flow through this one entrypoint with identical env/auth wiring.
|
|
1711
|
+
*/
|
|
1712
|
+
declare function openSandboxRun<Out>(client: SandboxClient, options: OpenSandboxRunOptions, deliverable: Deliverable<Out>): Promise<SandboxRun<Out>>;
|
|
1713
|
+
|
|
1553
1714
|
/**
|
|
1554
1715
|
* @experimental
|
|
1555
1716
|
*
|
|
1556
1717
|
* The conserved budget reservation pool — the invariant the whole instrument
|
|
1557
1718
|
* rests on (critique M5/B3). One root `Budget` becomes a conserved pool of three
|
|
1558
|
-
* quantities (tokens, usd, iterations) plus an absolute deadline. Children
|
|
1559
|
-
* atomically at spawn and
|
|
1719
|
+
* quantities (tokens, usd, iterations) plus an absolute deadline. Children reserve
|
|
1720
|
+
* atomically at spawn and reconcile at settle:
|
|
1560
1721
|
*
|
|
1561
1722
|
* total ≡ free + reserved + committed (invariant, always)
|
|
1562
1723
|
*
|
|
1563
|
-
* `reserve` moves a child's whole ceiling from `free` → `reserved` and
|
|
1724
|
+
* `reserve` moves a child's whole ceiling from `free` → `reserved` and fails closed
|
|
1564
1725
|
* when `free` can't cover it (never read-then-spawn overcommit, so `Σk(treatment) ≡
|
|
1565
1726
|
* Σk(blind)` by construction). `reconcile` releases the reservation, commits ACTUAL
|
|
1566
|
-
* spend, and refunds the unspent remainder to `free`. Tokens and usd are
|
|
1727
|
+
* spend, and refunds the unspent remainder to `free`. Tokens and usd are separate
|
|
1567
1728
|
* channels (`LoopTokenUsage` has no `usd`); iterations are conserved alongside them.
|
|
1568
1729
|
*
|
|
1569
1730
|
* Pure and deterministic: `now()` is injected, there is no I/O, and no wall-clock or
|
|
@@ -1635,7 +1796,7 @@ declare function createBudgetPool(root: Budget, now?: () => number): BudgetPool;
|
|
|
1635
1796
|
/**
|
|
1636
1797
|
* @experimental
|
|
1637
1798
|
*
|
|
1638
|
-
* The leaf runtime — the built-in `
|
|
1799
|
+
* The leaf runtime — the built-in `Executor` IMPLEMENTATIONS behind the ONE
|
|
1639
1800
|
* open interface frozen in `./types`, plus the open resolver/registry that maps
|
|
1640
1801
|
* an `AgentSpec` to one of them OR accepts a bring-your-own executor verbatim.
|
|
1641
1802
|
*
|
|
@@ -1649,7 +1810,7 @@ declare function createBudgetPool(root: Budget, now?: () => number): BudgetPool;
|
|
|
1649
1810
|
* excluded from the equal-k arms by construction (streaming).
|
|
1650
1811
|
* Every metered runtime reports through the SAME normalized `UsageEvent` channel
|
|
1651
1812
|
* so the conserved budget pool meters them identically. A user's own agent is
|
|
1652
|
-
* first-class the moment it implements `
|
|
1813
|
+
* first-class the moment it implements `Executor` — register it by name or
|
|
1653
1814
|
* pass it as `AgentSpec.executor`.
|
|
1654
1815
|
*
|
|
1655
1816
|
* Layering: `estimateCost`/`isModelPriced` are substrate primitives from
|
|
@@ -1675,7 +1836,7 @@ interface RouterSeam {
|
|
|
1675
1836
|
* checkpoint/fork.
|
|
1676
1837
|
*/
|
|
1677
1838
|
interface SandboxSeam {
|
|
1678
|
-
sandboxClient:
|
|
1839
|
+
sandboxClient: SandboxClient;
|
|
1679
1840
|
/** Forwarded into the composed `runLoop`'s `ctx` (trace emitter, run handle, etc.). */
|
|
1680
1841
|
loopCtx?: Partial<Omit<ExecCtx, 'sandboxClient' | 'signal'>>;
|
|
1681
1842
|
/** PR #150 `RunLoopOptions.lineage` passthrough — opaque; forwarded, not parsed. */
|
|
@@ -1694,40 +1855,38 @@ interface CliSeam {
|
|
|
1694
1855
|
cwd?: string;
|
|
1695
1856
|
}
|
|
1696
1857
|
/**
|
|
1697
|
-
* A
|
|
1698
|
-
*
|
|
1699
|
-
*
|
|
1700
|
-
*
|
|
1701
|
-
*
|
|
1702
|
-
*
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
* COMPOSES `runLoop` as a single-task leaf: one box, a refine driver bounded to
|
|
1712
|
-
* the seam's `maxIterations` (default 1), the spec's profile as the agent run.
|
|
1713
|
-
* Surfaces the loop's aggregated `tokenUsage` + `costUsd` as `UsageEvent`s after
|
|
1714
|
-
* it drains, and yields one `iteration` event per loop iteration. Forwards the
|
|
1715
|
-
* optional `lineage` passthrough WITHOUT importing sandbox-lineage / reinventing
|
|
1716
|
-
* checkpoint/fork.
|
|
1717
|
-
*
|
|
1718
|
-
* Streaming shape: the loop runs to completion inside the first `next()`, then
|
|
1719
|
-
* the recorded usage events are yielded; the terminal artifact is read from
|
|
1720
|
-
* `resultArtifact()` after the stream drains.
|
|
1721
|
-
*/
|
|
1722
|
-
declare const sandboxExecutor: LeafExecutorFactory<unknown>;
|
|
1858
|
+
* cli-bridge seam. A local OpenAI-compatible bridge that fronts harness CLIs
|
|
1859
|
+
* (claude-code / opencode / kimi / pi) behind one HTTP surface; `model` doubles
|
|
1860
|
+
* as the harness selector (e.g. `claude-code/sonnet`, `opencode/<provider>/<model>`).
|
|
1861
|
+
* `agentProfile` is the bridge-dialect profile (metadata.disallowedTools, mcp)
|
|
1862
|
+
* forwarded verbatim per request — how an arm disables native tools or injects
|
|
1863
|
+
* a provider search MCP.
|
|
1864
|
+
*/
|
|
1865
|
+
interface BridgeSeam {
|
|
1866
|
+
bridgeUrl: string;
|
|
1867
|
+
bridgeBearer: string;
|
|
1868
|
+
model: string;
|
|
1869
|
+
agentProfile?: Record<string, unknown>;
|
|
1870
|
+
timeoutMs?: number;
|
|
1871
|
+
}
|
|
1723
1872
|
/**
|
|
1724
|
-
*
|
|
1725
|
-
*
|
|
1726
|
-
*
|
|
1727
|
-
*
|
|
1728
|
-
*
|
|
1729
|
-
*/
|
|
1730
|
-
|
|
1873
|
+
* The single built-in executor entrypoint. The backend is DATA — the cost dial a
|
|
1874
|
+
* profile, an experiment config, or a replay journal can name — not an import
|
|
1875
|
+
* choice. Injects the matching seam and delegates to the built-in implementation;
|
|
1876
|
+
* the port stays OPEN: bring-your-own agents implement `Executor` directly and
|
|
1877
|
+
* never pass through here.
|
|
1878
|
+
*/
|
|
1879
|
+
type ExecutorConfig = ({
|
|
1880
|
+
backend: 'router';
|
|
1881
|
+
} & RouterSeam) | ({
|
|
1882
|
+
backend: 'bridge';
|
|
1883
|
+
} & BridgeSeam) | ({
|
|
1884
|
+
backend: 'cli';
|
|
1885
|
+
} & CliSeam) | ({
|
|
1886
|
+
backend: 'sandbox';
|
|
1887
|
+
harness?: BackendType;
|
|
1888
|
+
} & SandboxSeam);
|
|
1889
|
+
declare function createExecutor(config: ExecutorConfig): ExecutorFactory<unknown>;
|
|
1731
1890
|
/**
|
|
1732
1891
|
* The open resolver/registry. Pre-registers the three built-ins under their
|
|
1733
1892
|
* runtime tags (`'router'`, `'sandbox'`, `'cli'`) and accepts `register(name,
|
|
@@ -1749,7 +1908,7 @@ declare function createExecutorRegistry(): ExecutorRegistry;
|
|
|
1749
1908
|
* An `Agent.act` runs inside a `Scope`. It `spawn`s children dynamically and reacts to
|
|
1750
1909
|
* them via `next()`. The scope owns ONE in-memory nursery — the authoritative live set —
|
|
1751
1910
|
* and is the single place that drives a child's lifecycle: reserve budget atomically,
|
|
1752
|
-
* resolve a `
|
|
1911
|
+
* resolve a `Executor` through the open registry, run it (one-shot OR streaming),
|
|
1753
1912
|
* fold its normalized `UsageEvent`s into a conserved `Spend`, reconcile the reservation
|
|
1754
1913
|
* (refunding the unspent remainder), persist the result blob + journal records, and
|
|
1755
1914
|
* deliver the `Settled` through the `next()` cursor.
|
|
@@ -1857,4 +2016,32 @@ declare function createSupervisor<Task, Out>(): Supervisor<Task, Out>;
|
|
|
1857
2016
|
*/
|
|
1858
2017
|
declare function createRootHandle<Out>(): RootHandle<Out>;
|
|
1859
2018
|
|
|
1860
|
-
|
|
2019
|
+
/** Command runner seam. Host code can use `localShell`; sandbox code can wrap `box.exec`. */
|
|
2020
|
+
type Shell = (args: ReadonlyArray<string>, cwd?: string) => Promise<{
|
|
2021
|
+
stdout: string;
|
|
2022
|
+
stderr: string;
|
|
2023
|
+
code: number;
|
|
2024
|
+
}>;
|
|
2025
|
+
type WorkspaceCommit = {
|
|
2026
|
+
readonly ok: true;
|
|
2027
|
+
readonly rev: string;
|
|
2028
|
+
} | {
|
|
2029
|
+
readonly ok: false;
|
|
2030
|
+
readonly conflict: string;
|
|
2031
|
+
};
|
|
2032
|
+
interface Workspace {
|
|
2033
|
+
readonly ref: string;
|
|
2034
|
+
materialize(dir: string): Promise<void>;
|
|
2035
|
+
commit(dir: string, message: string): Promise<WorkspaceCommit>;
|
|
2036
|
+
head(): Promise<string>;
|
|
2037
|
+
}
|
|
2038
|
+
declare function localShell(): Shell;
|
|
2039
|
+
interface GitWorkspaceOptions {
|
|
2040
|
+
readonly ref: string;
|
|
2041
|
+
readonly shell?: Shell;
|
|
2042
|
+
readonly branch?: string;
|
|
2043
|
+
readonly noHooks?: boolean;
|
|
2044
|
+
}
|
|
2045
|
+
declare function gitWorkspace(opts: GitWorkspaceOptions): Workspace;
|
|
2046
|
+
|
|
2047
|
+
export { Agent, AgentRunSpec, AgentSpec, type AssertTraceDerivedFindings, type BridgeSeam, Budget, type BudgetPool, type BudgetReadout, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type Deliverable, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, ExecCtx, type ExecutorConfig, ExecutorFactory, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, type GitWorkspaceOptions, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, type LoopShape, LoopTokenUsage, type LoopUntil, type LoopUntilSpec, type LoopUntilState, NodeId, type Observation, type ObserveInput, type ObserveOptions, type OpenSandboxRunOptions, type Outcome, type Panel, type PanelJudge, type PanelSpec, type PanelVerdict, type Persona, type PersonaContext, type PersonaExecutors, type Pipeline, type PipelineStage, type RenderCorpusToInstructions, type RenderCorpusToInstructionsOptions, type ReservationTicket, ResultBlobStore, RootHandle, type RouterSeam, RunLoopOptions, type RunPersonified, type RunPersonifiedOptions, type SandboxCapabilities, SandboxClient, type SandboxLineage, type SandboxLineageHandle, type SandboxRun, type SandboxSeam, Scope, type ScopeAnalyst, type ScopeAnalyzeInput, type ScopeWidenGate, type SessionCapableBox, Settled, type ShapeBudget, type ShapeContext, type ShapeRegistry, type Shell, SpawnEvent, SpawnJournal, Spend, type SteerContext, SupervisedResult, Supervisor, type TrajectoryNode, type TrajectoryReport, type TrajectoryReportFn, type TrajectoryReportOptions, TreeView, type TurnResult, UsageEvent, type UsageSink, type Verify, type VerifySpec, type Widen, type WidenDecision, type WidenLineage, type WidenSpec, type Workspace, type WorkspaceCommit, acquireSandbox, assertTraceDerivedFindings, buildSteerContext, builtinShapes, contentAddress, createBudgetPool, createExecutor, createExecutorRegistry, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, definePersona, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, gitWorkspace, inlineSandboxClient, localShell, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, observe, openSandboxRun, panel, pipeline, probeSandboxCapabilities, registerShape, renderCorpusToInstructions, renderReport, replaySpawnTree, reportLoopUsage, runPersonified, settledToIteration, spendFromUsageEvents, trajectoryReport, verify, widen };
|
package/dist/runtime.js
CHANGED
|
@@ -9,11 +9,11 @@ import {
|
|
|
9
9
|
assertTraceDerivedFindings,
|
|
10
10
|
buildSteerContext,
|
|
11
11
|
builtinShapes,
|
|
12
|
-
cliExecutor,
|
|
13
12
|
completionAuthorizes,
|
|
14
13
|
contentAddress,
|
|
15
14
|
createBudgetPool,
|
|
16
|
-
|
|
15
|
+
createDriver,
|
|
16
|
+
createExecutor,
|
|
17
17
|
createExecutorRegistry,
|
|
18
18
|
createRootHandle,
|
|
19
19
|
createSandboxForSpec,
|
|
@@ -28,21 +28,25 @@ import {
|
|
|
28
28
|
equalKOnCost,
|
|
29
29
|
fanout,
|
|
30
30
|
flatWidenGate,
|
|
31
|
+
gitWorkspace,
|
|
32
|
+
inlineSandboxClient,
|
|
33
|
+
localShell,
|
|
31
34
|
loopDispatch,
|
|
32
35
|
loopUntil,
|
|
33
36
|
materializeTreeView,
|
|
37
|
+
observe,
|
|
38
|
+
openSandboxRun,
|
|
34
39
|
panel,
|
|
35
40
|
pipeline,
|
|
36
41
|
probeSandboxCapabilities,
|
|
37
42
|
registerShape,
|
|
38
43
|
renderAnalyses,
|
|
39
44
|
renderCorpusToInstructions,
|
|
45
|
+
renderReport,
|
|
40
46
|
replaySpawnTree,
|
|
41
47
|
reportLoopUsage,
|
|
42
|
-
routerInlineExecutor,
|
|
43
48
|
runLoop,
|
|
44
49
|
runPersonified,
|
|
45
|
-
sandboxExecutor,
|
|
46
50
|
sentinelCompletion,
|
|
47
51
|
settledToIteration,
|
|
48
52
|
spendFromUsageEvents,
|
|
@@ -50,11 +54,11 @@ import {
|
|
|
50
54
|
trajectoryReport,
|
|
51
55
|
verify,
|
|
52
56
|
widen
|
|
53
|
-
} from "./chunk-
|
|
57
|
+
} from "./chunk-72JQCHOZ.js";
|
|
54
58
|
import {
|
|
55
59
|
extractLlmCallEvent,
|
|
56
60
|
mapSandboxEvent
|
|
57
|
-
} from "./chunk-
|
|
61
|
+
} from "./chunk-GSUO5QS6.js";
|
|
58
62
|
import "./chunk-DGUM43GV.js";
|
|
59
63
|
export {
|
|
60
64
|
FileCorpus,
|
|
@@ -67,11 +71,11 @@ export {
|
|
|
67
71
|
assertTraceDerivedFindings,
|
|
68
72
|
buildSteerContext,
|
|
69
73
|
builtinShapes,
|
|
70
|
-
cliExecutor,
|
|
71
74
|
completionAuthorizes,
|
|
72
75
|
contentAddress,
|
|
73
76
|
createBudgetPool,
|
|
74
|
-
|
|
77
|
+
createDriver,
|
|
78
|
+
createExecutor,
|
|
75
79
|
createExecutorRegistry,
|
|
76
80
|
createRootHandle,
|
|
77
81
|
createSandboxForSpec,
|
|
@@ -87,22 +91,26 @@ export {
|
|
|
87
91
|
extractLlmCallEvent,
|
|
88
92
|
fanout,
|
|
89
93
|
flatWidenGate,
|
|
94
|
+
gitWorkspace,
|
|
95
|
+
inlineSandboxClient,
|
|
96
|
+
localShell,
|
|
90
97
|
loopDispatch,
|
|
91
98
|
loopUntil,
|
|
92
99
|
mapSandboxEvent,
|
|
93
100
|
materializeTreeView,
|
|
101
|
+
observe,
|
|
102
|
+
openSandboxRun,
|
|
94
103
|
panel,
|
|
95
104
|
pipeline,
|
|
96
105
|
probeSandboxCapabilities,
|
|
97
106
|
registerShape,
|
|
98
107
|
renderAnalyses,
|
|
99
108
|
renderCorpusToInstructions,
|
|
109
|
+
renderReport,
|
|
100
110
|
replaySpawnTree,
|
|
101
111
|
reportLoopUsage,
|
|
102
|
-
routerInlineExecutor,
|
|
103
112
|
runLoop,
|
|
104
113
|
runPersonified,
|
|
105
|
-
sandboxExecutor,
|
|
106
114
|
sentinelCompletion,
|
|
107
115
|
settledToIteration,
|
|
108
116
|
spendFromUsageEvents,
|