@tangle-network/agent-runtime 0.30.1 → 0.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-CBQVID7G.js → chunk-5QVVET72.js} +2 -2
- package/dist/{chunk-ZJACJZF7.js → chunk-AAJVQRPL.js} +4 -4
- package/dist/{chunk-TZ53F7M7.js → chunk-GLTUUKTN.js} +308 -5
- package/dist/chunk-GLTUUKTN.js.map +1 -0
- package/dist/{chunk-UNQM6XQO.js → chunk-HSX6PFZR.js} +2 -2
- package/dist/{chunk-URDSRUPQ.js → chunk-PY6NMZYX.js} +2 -2
- package/dist/{chunk-QZEDHTT2.js → chunk-RO7K6JNF.js} +36 -3
- package/dist/{chunk-QZEDHTT2.js.map → chunk-RO7K6JNF.js.map} +1 -1
- package/dist/{chunk-XZYF3YJN.js → chunk-SQSCRJ7U.js} +7 -1
- package/dist/{chunk-XZYF3YJN.js.map → chunk-SQSCRJ7U.js.map} +1 -1
- package/dist/improvement.d.ts +128 -3
- package/dist/improvement.js +86 -0
- package/dist/improvement.js.map +1 -1
- package/dist/index.d.ts +18 -2
- package/dist/index.js +9 -3
- package/dist/index.js.map +1 -1
- package/dist/loops.d.ts +265 -4
- package/dist/loops.js +17 -5
- package/dist/mcp/bin.js +6 -6
- package/dist/mcp/index.d.ts +3 -3
- package/dist/mcp/index.js +7 -7
- package/dist/{otel-export-B2UBcPV4.d.ts → otel-export-CsgwKFq8.d.ts} +56 -1
- package/dist/profiles.d.ts +1 -1
- package/dist/profiles.js +3 -3
- package/dist/{types-Cbe54nB7.d.ts → types-CpCX1pfx.d.ts} +15 -1
- package/package.json +24 -13
- package/dist/chunk-TZ53F7M7.js.map +0 -1
- /package/dist/{chunk-CBQVID7G.js.map → chunk-5QVVET72.js.map} +0 -0
- /package/dist/{chunk-ZJACJZF7.js.map → chunk-AAJVQRPL.js.map} +0 -0
- /package/dist/{chunk-UNQM6XQO.js.map → chunk-HSX6PFZR.js.map} +0 -0
- /package/dist/{chunk-URDSRUPQ.js.map → chunk-PY6NMZYX.js.map} +0 -0
package/dist/loops.d.ts
CHANGED
|
@@ -1,11 +1,116 @@
|
|
|
1
|
+
import { AgentProfile, SandboxEvent } from '@tangle-network/sandbox';
|
|
1
2
|
export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
|
|
2
|
-
import { D as Driver,
|
|
3
|
-
export {
|
|
4
|
-
import { DefaultVerdict } from '@tangle-network/agent-eval';
|
|
3
|
+
import { I as Iteration, D as Driver, L as LoopSandboxClient, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, a as LoopWinner, b as LoopResult } from './types-CpCX1pfx.js';
|
|
4
|
+
export { c as LoopDecisionPayload, d as LoopEndedPayload, e as LoopIterationDispatchPayload, f as LoopIterationEndedPayload, g as LoopIterationStartedPayload, h as LoopSandboxPlacement, i as LoopStartedPayload, j as LoopTokenUsage, k as LoopTraceEmitter, l as LoopTraceEvent, m as ValidationCtx } from './types-CpCX1pfx.js';
|
|
5
|
+
import { DefaultVerdict, AgentProfile as AgentProfile$1 } from '@tangle-network/agent-eval';
|
|
5
6
|
export { DefaultVerdict } from '@tangle-network/agent-eval';
|
|
7
|
+
import { Scenario, DispatchFn, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
|
|
6
8
|
import './runtime-run-B8VIiOhI.js';
|
|
7
9
|
import './types-CsCCryln.js';
|
|
8
10
|
|
|
11
|
+
/**
|
|
12
|
+
* @experimental
|
|
13
|
+
*
|
|
14
|
+
* Dynamic driver — the agent authors the loop topology at runtime.
|
|
15
|
+
*
|
|
16
|
+
* Where `refine` and `fanout-vote` encode a fixed shape as a pure function of
|
|
17
|
+
* history, this driver delegates the per-round shape to an injected
|
|
18
|
+
* `TopologyPlanner`. Each round the planner inspects the task + iteration
|
|
19
|
+
* history and emits one `TopologyMove`:
|
|
20
|
+
* - `refine` → one task next round (optionally rewritten from the prior attempt)
|
|
21
|
+
* - `fanout` → N tasks next round (the kernel round-robins `agentRuns`, so a
|
|
22
|
+
* 2-harness fanout dispatches branch 0 to harness A and branch 1 to harness B)
|
|
23
|
+
* - `stop` → terminate; the kernel selects the winner across all iterations
|
|
24
|
+
*
|
|
25
|
+
* The planner is the brain; this driver is the structure. It maps moves onto
|
|
26
|
+
* the kernel's `plan`/`decide` contract, enforces the iteration + fanout caps,
|
|
27
|
+
* and fails loud on a malformed move. The planner is injected exactly like
|
|
28
|
+
* `refine`'s `refineTask` and `fanout-vote`'s `selector` — so a test can drive
|
|
29
|
+
* a deterministic policy through the real kernel, and production can wire it to
|
|
30
|
+
* an LLM via `createSandboxPlanner`.
|
|
31
|
+
*
|
|
32
|
+
* Topology is orthogonal to harness: the planner never names a backend. Which
|
|
33
|
+
* harness runs a branch is decided by the `AgentRunSpec` the kernel round-robins
|
|
34
|
+
* to, so one dynamic driver works across claude-code, codex, opencode, pi —
|
|
35
|
+
* including fanning a single round across several at once.
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
/** Terminal once `decide` returns `'done'` (a kernel terminal decision). */
|
|
39
|
+
type DynamicDecision = 'continue' | 'done';
|
|
40
|
+
/**
|
|
41
|
+
* One topology decision for the next round. `fanout` carries explicit tasks
|
|
42
|
+
* rather than a count so the planner can issue heterogeneous branches (a
|
|
43
|
+
* different sub-task per harness); pass N copies of one task for a homogeneous
|
|
44
|
+
* fanout that relies on `agentRuns` diversity instead.
|
|
45
|
+
*
|
|
46
|
+
* @experimental
|
|
47
|
+
*/
|
|
48
|
+
type TopologyMove<Task> = {
|
|
49
|
+
kind: 'refine';
|
|
50
|
+
task: Task;
|
|
51
|
+
rationale?: string;
|
|
52
|
+
} | {
|
|
53
|
+
kind: 'fanout';
|
|
54
|
+
tasks: Task[];
|
|
55
|
+
rationale?: string;
|
|
56
|
+
} | {
|
|
57
|
+
kind: 'stop';
|
|
58
|
+
rationale?: string;
|
|
59
|
+
};
|
|
60
|
+
/** @experimental */
|
|
61
|
+
interface PlannerContext<Task, Output> {
|
|
62
|
+
/** The root task the loop was invoked with — stable across rounds. */
|
|
63
|
+
task: Task;
|
|
64
|
+
/** Every iteration so far, in dispatch order, with outputs + verdicts. */
|
|
65
|
+
history: ReadonlyArray<Iteration<Task, Output>>;
|
|
66
|
+
/** `history.length` — iterations already spent. */
|
|
67
|
+
iterationsSpent: number;
|
|
68
|
+
/** Iterations left before the driver's `maxIterations` cap forces a stop. */
|
|
69
|
+
iterationsRemaining: number;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Chooses the next topology move from the task + history. Sync or async; an
|
|
73
|
+
* async planner is where an LLM call goes (see `createSandboxPlanner`).
|
|
74
|
+
*
|
|
75
|
+
* @experimental
|
|
76
|
+
*/
|
|
77
|
+
type TopologyPlanner<Task, Output> = (ctx: PlannerContext<Task, Output>) => TopologyMove<Task> | Promise<TopologyMove<Task>>;
|
|
78
|
+
/** @experimental */
|
|
79
|
+
interface CreateDynamicDriverOptions<Task, Output> {
|
|
80
|
+
/** The agent-authored topology policy. Invoked once per round in `plan`. */
|
|
81
|
+
planner: TopologyPlanner<Task, Output>;
|
|
82
|
+
/**
|
|
83
|
+
* Hard safety cap on total iterations. When reached, the driver stops before
|
|
84
|
+
* consulting the planner. Default 8. Set the kernel's `runLoop`
|
|
85
|
+
* `maxIterations >= ` this so the driver's cap governs and the loop closes on
|
|
86
|
+
* a clean `'done'` rather than a truncated `'continue'`.
|
|
87
|
+
*/
|
|
88
|
+
maxIterations?: number;
|
|
89
|
+
/** Max branches a single `fanout` move may dispatch. Default 4. */
|
|
90
|
+
maxFanout?: number;
|
|
91
|
+
/** Stable identifier surfaced in trace events. Default `'dynamic'`. */
|
|
92
|
+
name?: string;
|
|
93
|
+
}
|
|
94
|
+
/** @experimental */
|
|
95
|
+
declare function createDynamicDriver<Task, Output>(options: CreateDynamicDriverOptions<Task, Output>): Driver<Task, Output, DynamicDecision>;
|
|
96
|
+
/**
|
|
97
|
+
* Compact, planner-friendly view of iteration history — what an LLM planner
|
|
98
|
+
* needs to choose the next move without the raw event streams. Output is
|
|
99
|
+
* truncated so a long run's prompt stays bounded.
|
|
100
|
+
*
|
|
101
|
+
* @experimental
|
|
102
|
+
*/
|
|
103
|
+
declare function summarizeHistory<Task, Output>(history: ReadonlyArray<Iteration<Task, Output>>, opts?: {
|
|
104
|
+
maxOutputChars?: number;
|
|
105
|
+
}): Array<{
|
|
106
|
+
index: number;
|
|
107
|
+
agentRunName: string;
|
|
108
|
+
valid?: boolean;
|
|
109
|
+
score?: number;
|
|
110
|
+
error?: string;
|
|
111
|
+
output?: string;
|
|
112
|
+
}>;
|
|
113
|
+
|
|
9
114
|
/**
|
|
10
115
|
* @experimental
|
|
11
116
|
*
|
|
@@ -87,6 +192,64 @@ declare function createRefineDriver<Task, Output>(options?: CreateRefineDriverOp
|
|
|
87
192
|
*/
|
|
88
193
|
declare function refineWinnerIndex<Task, Output>(iterations: ReadonlyArray<Iteration<Task, Output>>): number | undefined;
|
|
89
194
|
|
|
195
|
+
/**
|
|
196
|
+
* @experimental
|
|
197
|
+
*
|
|
198
|
+
* `createSandboxPlanner` — wire the dynamic driver's `TopologyPlanner` to a
|
|
199
|
+
* real agent. Each round it spins a sandbox on `profile`, streams a prompt that
|
|
200
|
+
* carries the history summary, and decodes the agent's chosen `TopologyMove`
|
|
201
|
+
* from a JSON envelope it emits. This is the "agent authors its own loop
|
|
202
|
+
* topology" path: the planner profile can be any harness (claude-code, codex,
|
|
203
|
+
* opencode, pi) — its only job is to read what happened and emit the next move.
|
|
204
|
+
*
|
|
205
|
+
* The planner profile is deliberately distinct from the worker `agentRuns`: a
|
|
206
|
+
* cheap fast model can steer topology while expensive workers do the labor, and
|
|
207
|
+
* the planner never names which harness runs a branch — the kernel's
|
|
208
|
+
* `agentRuns` round-robin decides that.
|
|
209
|
+
*
|
|
210
|
+
* Envelope contract the agent must emit (fenced ```json or a structured
|
|
211
|
+
* `result`/`final` event payload):
|
|
212
|
+
* { "kind": "refine" | "fanout" | "stop",
|
|
213
|
+
* "tasks"?: [ <task>, ... ], // decoded via `decodeTask`
|
|
214
|
+
* "n"?: number, // fanout shorthand: N copies of the root task
|
|
215
|
+
* "rationale"?: string }
|
|
216
|
+
*
|
|
217
|
+
* A missing / unparseable / unknown-kind envelope throws `PlannerError` — the
|
|
218
|
+
* loop never silently runs a topology the agent did not choose.
|
|
219
|
+
*/
|
|
220
|
+
|
|
221
|
+
/** Raw, pre-decode envelope an agent emits to choose the next move. */
|
|
222
|
+
interface TopologyMoveEnvelope {
|
|
223
|
+
kind: string;
|
|
224
|
+
tasks?: unknown[];
|
|
225
|
+
n?: number;
|
|
226
|
+
rationale?: string;
|
|
227
|
+
}
|
|
228
|
+
/** @experimental */
|
|
229
|
+
interface CreateSandboxPlannerOptions<Task, Output> {
|
|
230
|
+
/** Sandbox client — the planner calls `.create()` once per round. */
|
|
231
|
+
client: LoopSandboxClient;
|
|
232
|
+
/** The planner agent. Steers topology; does not run the work. */
|
|
233
|
+
profile: AgentProfile;
|
|
234
|
+
/**
|
|
235
|
+
* Decode one raw task from the envelope's `tasks[]` into a domain `Task`.
|
|
236
|
+
* Required because `Task` is opaque to this module — only the caller knows
|
|
237
|
+
* its shape. Throw to reject a malformed task; the error surfaces as a
|
|
238
|
+
* `PlannerError`.
|
|
239
|
+
*/
|
|
240
|
+
decodeTask: (raw: unknown, ctx: PlannerContext<Task, Output>) => Task;
|
|
241
|
+
/** Override the default prompt (history summary + envelope contract). */
|
|
242
|
+
buildPrompt?: (ctx: PlannerContext<Task, Output>) => string;
|
|
243
|
+
/** Override envelope extraction from the event stream. */
|
|
244
|
+
parseEnvelope?: (events: SandboxEvent[]) => TopologyMoveEnvelope | undefined;
|
|
245
|
+
/** Sandbox overrides for the planner sandbox (timeouts, env, etc.). */
|
|
246
|
+
sandboxOverrides?: AgentRunSpec<Task>['sandboxOverrides'];
|
|
247
|
+
/** Cancellation for the planner's own LLM call. */
|
|
248
|
+
signal?: AbortSignal;
|
|
249
|
+
}
|
|
250
|
+
/** @experimental */
|
|
251
|
+
declare function createSandboxPlanner<Task, Output>(opts: CreateSandboxPlannerOptions<Task, Output>): TopologyPlanner<Task, Output>;
|
|
252
|
+
|
|
90
253
|
/**
|
|
91
254
|
* @experimental
|
|
92
255
|
*
|
|
@@ -153,4 +316,102 @@ interface RunLoopOptions<Task, Output, Decision> {
|
|
|
153
316
|
/** @experimental */
|
|
154
317
|
declare function runLoop<Task, Output, Decision>(options: RunLoopOptions<Task, Output, Decision>): Promise<LoopResult<Task, Output, Decision>>;
|
|
155
318
|
|
|
156
|
-
|
|
319
|
+
/**
|
|
320
|
+
* Bridge a finished `runLoop` into an agent-eval campaign / profile-matrix
|
|
321
|
+
* dispatch.
|
|
322
|
+
*
|
|
323
|
+
* `runProfileMatrix` (and `runCampaign`) run the backend-integrity guard over
|
|
324
|
+
* the token usage a dispatch reports through `ctx.cost`. A dispatch that wraps
|
|
325
|
+
* `runLoop` must forward the loop's cost AND token usage, or the guard reads
|
|
326
|
+
* the run as a stub and throws. `reportLoopUsage` is that one line:
|
|
327
|
+
*
|
|
328
|
+
* const dispatch: ProfileDispatchFn<S, A> = async (profile, scenario, ctx) => {
|
|
329
|
+
* const result = await runLoop({ ...optsFor(profile, scenario), ctx: loopCtx })
|
|
330
|
+
* reportLoopUsage(ctx, result)
|
|
331
|
+
* return result.winner?.output as A
|
|
332
|
+
* }
|
|
333
|
+
*
|
|
334
|
+
* Typed structurally against the campaign `DispatchContext.cost` so this module
|
|
335
|
+
* stays free of an agent-eval import — it works with any cost meter exposing
|
|
336
|
+
* `observe` + `observeTokens`.
|
|
337
|
+
*/
|
|
338
|
+
|
|
339
|
+
/** The slice of an agent-eval campaign `DispatchContext.cost` this needs. */
|
|
340
|
+
interface UsageSink {
|
|
341
|
+
observe(amountUsd: number, source: string): void;
|
|
342
|
+
observeTokens(usage: {
|
|
343
|
+
input: number;
|
|
344
|
+
output: number;
|
|
345
|
+
}): void;
|
|
346
|
+
}
|
|
347
|
+
/**
|
|
348
|
+
* Forward a `LoopResult`'s aggregated cost + token usage into a campaign cost
|
|
349
|
+
* meter so the backend-integrity guard sees real LLM activity. `source`
|
|
350
|
+
* defaults to `'loop'`.
|
|
351
|
+
*/
|
|
352
|
+
declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
|
|
356
|
+
*
|
|
357
|
+
* Without this adapter a consumer wiring `runLoop` into `runProfileMatrix` /
|
|
358
|
+
* `runCampaign` has to, by hand, every time: (a) build an `ExecCtx` with a
|
|
359
|
+
* sandbox client, (b) adapt the campaign `DispatchContext.trace` into a
|
|
360
|
+
* `LoopTraceEmitter` (or lose all loop trace correlation), and (c) remember to
|
|
361
|
+
* forward the loop's cost + tokens via `ctx.cost` (forgetting it yields a
|
|
362
|
+
* `{0,0}` cell the backend-integrity guard reads as a stub). Three foot-guns,
|
|
363
|
+
* the third silent. The fleet's products skipped (c) and fell back to a
|
|
364
|
+
* `workerRecords[]` side-channel — the exact anti-pattern the substrate exists
|
|
365
|
+
* to kill.
|
|
366
|
+
*
|
|
367
|
+
* `loopDispatch` collapses all three into one typed call:
|
|
368
|
+
*
|
|
369
|
+
* const dispatch = loopDispatch({
|
|
370
|
+
* sandboxClient,
|
|
371
|
+
* toLoopOptions: (scenario, profile) => ({ driver, agentRun, output, validator, task }),
|
|
372
|
+
* })
|
|
373
|
+
* await runProfileMatrix({ profiles, scenarios, dispatch, judges, commitSha })
|
|
374
|
+
*
|
|
375
|
+
* Usage is reported automatically; trace events are forwarded automatically;
|
|
376
|
+
* the ctx is built automatically. The seam becomes impossible to mis-wire.
|
|
377
|
+
*
|
|
378
|
+
* Typed structurally against the campaign `DispatchContext` (imported type-only
|
|
379
|
+
* from `@tangle-network/agent-eval/campaign`) — a downward dependency, never an
|
|
380
|
+
* inversion.
|
|
381
|
+
*/
|
|
382
|
+
|
|
383
|
+
/** runLoop options minus the `ctx` (loopDispatch builds the ctx). */
|
|
384
|
+
type LoopOptionsForDispatch<Task, Output, Decision> = Omit<RunLoopOptions<Task, Output, Decision>, 'ctx'>;
|
|
385
|
+
interface LoopDispatchOptions<Task, Output, Decision, TScenario extends Scenario, TArtifact> {
|
|
386
|
+
/** Sandbox client used for every cell's `runLoop`. Supplied once. */
|
|
387
|
+
sandboxClient: LoopSandboxClient;
|
|
388
|
+
/** Build the per-cell runLoop options from the scenario (+ profile, when
|
|
389
|
+
* used with `runProfileMatrix`). */
|
|
390
|
+
toLoopOptions: (scenario: TScenario, profile: AgentProfile$1) => LoopOptionsForDispatch<Task, Output, Decision>;
|
|
391
|
+
/** Map the finished loop to the artifact the judges score. Default:
|
|
392
|
+
* `result.winner?.output`. A loop with no winner yields `undefined` (judges
|
|
393
|
+
* skip the cell) — but the loop's token usage is STILL reported, so the
|
|
394
|
+
* integrity guard sees real activity. */
|
|
395
|
+
toArtifact?: (result: LoopResult<Task, Output, Decision>) => TArtifact;
|
|
396
|
+
/** Forward `loop.*` trace events into the campaign's scoped trace so loop
|
|
397
|
+
* spans correlate with the cell. Default true. */
|
|
398
|
+
forwardTrace?: boolean;
|
|
399
|
+
/** Cost-meter source label for the loop's spend. Default `'loop'`. */
|
|
400
|
+
costSource?: string;
|
|
401
|
+
}
|
|
402
|
+
/**
|
|
403
|
+
* Adapter for `runProfileMatrix` (profile is an axis). Returns a
|
|
404
|
+
* `ProfileDispatchFn` that runs `runLoop` per (profile, scenario) cell and
|
|
405
|
+
* reports usage automatically.
|
|
406
|
+
*/
|
|
407
|
+
declare function loopDispatch<Task, Output, Decision, TScenario extends Scenario, TArtifact>(opts: LoopDispatchOptions<Task, Output, Decision, TScenario, TArtifact>): ProfileDispatchFn<TScenario, TArtifact>;
|
|
408
|
+
/**
|
|
409
|
+
* Adapter for `runCampaign` (no profile axis). `toLoopOptions` receives only
|
|
410
|
+
* the scenario; the `profile` passed to the shared core is a stable sentinel
|
|
411
|
+
* so a single `runLoop` config is reused across cells.
|
|
412
|
+
*/
|
|
413
|
+
declare function loopCampaignDispatch<Task, Output, Decision, TScenario extends Scenario, TArtifact>(opts: Omit<LoopDispatchOptions<Task, Output, Decision, TScenario, TArtifact>, 'toLoopOptions'> & {
|
|
414
|
+
toLoopOptions: (scenario: TScenario) => LoopOptionsForDispatch<Task, Output, Decision>;
|
|
415
|
+
}): DispatchFn<TScenario, TArtifact>;
|
|
416
|
+
|
|
417
|
+
export { AgentRunSpec, type CreateDynamicDriverOptions, type CreateFanoutVoteDriverOptions, type CreateRefineDriverOptions, type CreateSandboxPlannerOptions, Driver, type DynamicDecision, ExecCtx, type FanoutVoteDecision, type FanoutVoteScored, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, LoopWinner, OutputAdapter, type PlannerContext, type RefineDecision, type RunLoopOptions, type TopologyMove, type TopologyMoveEnvelope, type TopologyPlanner, type UsageSink, Validator, createDynamicDriver, createFanoutVoteDriver, createRefineDriver, createSandboxPlanner, loopCampaignDispatch, loopDispatch, refineWinnerIndex, reportLoopUsage, runLoop, scoreFanoutVoteIterations, summarizeHistory };
|
package/dist/loops.js
CHANGED
|
@@ -1,19 +1,31 @@
|
|
|
1
1
|
import {
|
|
2
|
+
createDynamicDriver,
|
|
2
3
|
createRefineDriver,
|
|
4
|
+
createSandboxPlanner,
|
|
5
|
+
loopCampaignDispatch,
|
|
6
|
+
loopDispatch,
|
|
3
7
|
refineWinnerIndex,
|
|
4
|
-
|
|
5
|
-
|
|
8
|
+
reportLoopUsage,
|
|
9
|
+
runLoop,
|
|
10
|
+
summarizeHistory
|
|
11
|
+
} from "./chunk-GLTUUKTN.js";
|
|
6
12
|
import {
|
|
7
13
|
createFanoutVoteDriver,
|
|
8
14
|
scoreFanoutVoteIterations
|
|
9
|
-
} from "./chunk-
|
|
10
|
-
import "./chunk-
|
|
15
|
+
} from "./chunk-PY6NMZYX.js";
|
|
16
|
+
import "./chunk-SQSCRJ7U.js";
|
|
11
17
|
import "./chunk-DGUM43GV.js";
|
|
12
18
|
export {
|
|
19
|
+
createDynamicDriver,
|
|
13
20
|
createFanoutVoteDriver,
|
|
14
21
|
createRefineDriver,
|
|
22
|
+
createSandboxPlanner,
|
|
23
|
+
loopCampaignDispatch,
|
|
24
|
+
loopDispatch,
|
|
15
25
|
refineWinnerIndex,
|
|
26
|
+
reportLoopUsage,
|
|
16
27
|
runLoop,
|
|
17
|
-
scoreFanoutVoteIterations
|
|
28
|
+
scoreFanoutVoteIterations,
|
|
29
|
+
summarizeHistory
|
|
18
30
|
};
|
|
19
31
|
//# sourceMappingURL=loops.js.map
|
package/dist/mcp/bin.js
CHANGED
|
@@ -3,15 +3,15 @@ import {
|
|
|
3
3
|
createDefaultCoderDelegate,
|
|
4
4
|
createMcpServer,
|
|
5
5
|
detectExecutor
|
|
6
|
-
} from "../chunk-
|
|
7
|
-
import "../chunk-
|
|
6
|
+
} from "../chunk-AAJVQRPL.js";
|
|
7
|
+
import "../chunk-HSX6PFZR.js";
|
|
8
8
|
import "../chunk-GLR25NG7.js";
|
|
9
9
|
import {
|
|
10
10
|
runLoop
|
|
11
|
-
} from "../chunk-
|
|
12
|
-
import "../chunk-
|
|
13
|
-
import "../chunk-
|
|
14
|
-
import "../chunk-
|
|
11
|
+
} from "../chunk-GLTUUKTN.js";
|
|
12
|
+
import "../chunk-5QVVET72.js";
|
|
13
|
+
import "../chunk-PY6NMZYX.js";
|
|
14
|
+
import "../chunk-SQSCRJ7U.js";
|
|
15
15
|
import "../chunk-DGUM43GV.js";
|
|
16
16
|
|
|
17
17
|
// src/mcp/bin.ts
|
package/dist/mcp/index.d.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { L as LoopSandboxClient, h as LoopSandboxPlacement, k as LoopTraceEmitter } from '../types-CpCX1pfx.js';
|
|
2
2
|
import { SandboxInstance } from '@tangle-network/sandbox';
|
|
3
3
|
import { CoderOutput } from '../profiles.js';
|
|
4
4
|
import { L as LocalHarness, r as runLocalHarness } from '../local-harness-KrdFTY5R.js';
|
|
5
5
|
export { a as LocalHarnessResult, R as RunLocalHarnessOptions } from '../local-harness-KrdFTY5R.js';
|
|
6
|
-
import { O as OtelExporter } from '../otel-export-
|
|
7
|
-
export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from '../otel-export-
|
|
6
|
+
import { O as OtelExporter } from '../otel-export-CsgwKFq8.js';
|
|
7
|
+
export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from '../otel-export-CsgwKFq8.js';
|
|
8
8
|
import '@tangle-network/agent-eval';
|
|
9
9
|
import '../runtime-run-B8VIiOhI.js';
|
|
10
10
|
import '../types-CsCCryln.js';
|
package/dist/mcp/index.js
CHANGED
|
@@ -9,13 +9,13 @@ import {
|
|
|
9
9
|
createWorktree,
|
|
10
10
|
detectExecutor,
|
|
11
11
|
removeWorktree
|
|
12
|
-
} from "../chunk-
|
|
12
|
+
} from "../chunk-AAJVQRPL.js";
|
|
13
13
|
import {
|
|
14
14
|
createOtelExporter,
|
|
15
15
|
loopEventToOtelSpan,
|
|
16
16
|
mcpToolsForRuntimeMcp,
|
|
17
17
|
mcpToolsForRuntimeMcpSubset
|
|
18
|
-
} from "../chunk-
|
|
18
|
+
} from "../chunk-RO7K6JNF.js";
|
|
19
19
|
import {
|
|
20
20
|
DELEGATE_CODE_DESCRIPTION,
|
|
21
21
|
DELEGATE_CODE_INPUT_SCHEMA,
|
|
@@ -46,14 +46,14 @@ import {
|
|
|
46
46
|
validateDelegateResearchArgs,
|
|
47
47
|
validateDelegationHistoryArgs,
|
|
48
48
|
validateDelegationStatusArgs
|
|
49
|
-
} from "../chunk-
|
|
49
|
+
} from "../chunk-HSX6PFZR.js";
|
|
50
50
|
import {
|
|
51
51
|
runLocalHarness
|
|
52
52
|
} from "../chunk-GLR25NG7.js";
|
|
53
|
-
import "../chunk-
|
|
54
|
-
import "../chunk-
|
|
55
|
-
import "../chunk-
|
|
56
|
-
import "../chunk-
|
|
53
|
+
import "../chunk-GLTUUKTN.js";
|
|
54
|
+
import "../chunk-5QVVET72.js";
|
|
55
|
+
import "../chunk-PY6NMZYX.js";
|
|
56
|
+
import "../chunk-SQSCRJ7U.js";
|
|
57
57
|
import "../chunk-DGUM43GV.js";
|
|
58
58
|
|
|
59
59
|
// src/mcp/trace-propagation.ts
|
|
@@ -110,5 +110,60 @@ declare function loopEventToOtelSpan(event: {
|
|
|
110
110
|
timestamp: number;
|
|
111
111
|
payload: object;
|
|
112
112
|
}, traceId: string, parentSpanId?: string): OtelSpan;
|
|
113
|
+
/** Wire version the eval-runs ingest enforces (X-Tangle-Wire-Version + body). */
|
|
114
|
+
declare const INTELLIGENCE_WIRE_VERSION = "2026-05-26.v1";
|
|
115
|
+
interface EvalRunGeneration {
|
|
116
|
+
/** 0-based ordinal of this generation within the run (required by ingest). */
|
|
117
|
+
index: number;
|
|
118
|
+
/** Identity of the proposed surface change (content-addressed hash). */
|
|
119
|
+
surfaceHash: string;
|
|
120
|
+
/** Arbitrary provenance for this generation (rationale, evidence, source). */
|
|
121
|
+
surface?: unknown;
|
|
122
|
+
/** Per-scenario results; empty until the generation is measured. */
|
|
123
|
+
cells?: unknown[];
|
|
124
|
+
/** Mean composite score (0 when unmeasured — pair with labels.measured). */
|
|
125
|
+
compositeMean: number;
|
|
126
|
+
costUsd: number;
|
|
127
|
+
durationMs: number;
|
|
128
|
+
}
|
|
129
|
+
interface EvalRunEvent {
|
|
130
|
+
runId: string;
|
|
131
|
+
runDir: string;
|
|
132
|
+
/** ISO timestamp. */
|
|
133
|
+
timestamp: string;
|
|
134
|
+
status: 'started' | 'baseline-complete' | 'generation-complete' | 'gate-decided' | 'finished' | 'errored';
|
|
135
|
+
labels?: Record<string, string>;
|
|
136
|
+
baseline?: EvalRunGeneration;
|
|
137
|
+
generations?: EvalRunGeneration[];
|
|
138
|
+
gateDecision?: 'ship' | 'hold' | 'need_more_work' | 'model_ceiling' | 'arch_ceiling';
|
|
139
|
+
holdoutLift?: number;
|
|
140
|
+
totalCostUsd: number;
|
|
141
|
+
totalDurationMs: number;
|
|
142
|
+
errorMessage?: string;
|
|
143
|
+
}
|
|
144
|
+
interface EvalRunsExportConfig {
|
|
145
|
+
/** Bearer key — tenant is resolved server-side from it. Reads TANGLE_API_KEY. */
|
|
146
|
+
apiKey?: string;
|
|
147
|
+
/** Intelligence base. Reads INTELLIGENCE_BASE env, else prod. */
|
|
148
|
+
base?: string;
|
|
149
|
+
/** Idempotency-Key header (e.g. the runId) — safe retries + upsert. */
|
|
150
|
+
idempotencyKey?: string;
|
|
151
|
+
}
|
|
152
|
+
interface EvalRunsExportResult {
|
|
153
|
+
ok: boolean;
|
|
154
|
+
status: number;
|
|
155
|
+
accepted: number;
|
|
156
|
+
rejected: Array<{
|
|
157
|
+
index: number;
|
|
158
|
+
reason: string;
|
|
159
|
+
}>;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Ship self-improvement eval-run events to Tangle Intelligence. Unlike the
|
|
163
|
+
* best-effort span exporter, this RESOLVES with the ingest verdict (accepted /
|
|
164
|
+
* rejected per event) so a consumer's loop can assert its provenance landed.
|
|
165
|
+
* Throws only on a missing key or network failure.
|
|
166
|
+
*/
|
|
167
|
+
declare function exportEvalRuns(events: EvalRunEvent[], config?: EvalRunsExportConfig): Promise<EvalRunsExportResult>;
|
|
113
168
|
|
|
114
|
-
export { type OtelExporter as O, mcpToolsForRuntimeMcpSubset as a, type
|
|
169
|
+
export { type EvalRunEvent as E, INTELLIGENCE_WIRE_VERSION as I, type OtelExporter as O, mcpToolsForRuntimeMcpSubset as a, type EvalRunGeneration as b, type EvalRunsExportConfig as c, type EvalRunsExportResult as d, type OtelAttribute as e, type OtelExportConfig as f, type OtelSpan as g, createOtelExporter as h, exportEvalRuns as i, loopEventToOtelSpan as l, mcpToolsForRuntimeMcp as m };
|
package/dist/profiles.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { AgentProfile } from '@tangle-network/sandbox';
|
|
2
|
-
import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-
|
|
2
|
+
import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-CpCX1pfx.js';
|
|
3
3
|
import '@tangle-network/agent-eval';
|
|
4
4
|
import './runtime-run-B8VIiOhI.js';
|
|
5
5
|
import './types-CsCCryln.js';
|
package/dist/profiles.js
CHANGED
|
@@ -2,9 +2,9 @@ import {
|
|
|
2
2
|
coderProfile,
|
|
3
3
|
createCoderValidator,
|
|
4
4
|
multiHarnessCoderFanout
|
|
5
|
-
} from "./chunk-
|
|
6
|
-
import "./chunk-
|
|
7
|
-
import "./chunk-
|
|
5
|
+
} from "./chunk-5QVVET72.js";
|
|
6
|
+
import "./chunk-PY6NMZYX.js";
|
|
7
|
+
import "./chunk-SQSCRJ7U.js";
|
|
8
8
|
import "./chunk-DGUM43GV.js";
|
|
9
9
|
export {
|
|
10
10
|
coderProfile,
|
|
@@ -74,6 +74,14 @@ interface AgentRunSpec<Task> {
|
|
|
74
74
|
interface OutputAdapter<Output> {
|
|
75
75
|
parse(events: SandboxEvent[]): Output;
|
|
76
76
|
}
|
|
77
|
+
/** LLM token usage. Structurally matches agent-eval's `RunTokenUsage` /
|
|
78
|
+
* `CampaignTokenUsage` ({ input, output }) so a loop result maps straight
|
|
79
|
+
* onto `ctx.cost.observeTokens` in a `runProfileMatrix` dispatch — without
|
|
80
|
+
* which the backend-integrity guard reads the run as a stub. */
|
|
81
|
+
interface LoopTokenUsage {
|
|
82
|
+
input: number;
|
|
83
|
+
output: number;
|
|
84
|
+
}
|
|
77
85
|
/** @experimental */
|
|
78
86
|
interface Iteration<Task, Output> {
|
|
79
87
|
/** 0-based iteration index assigned by the kernel. */
|
|
@@ -89,6 +97,8 @@ interface Iteration<Task, Output> {
|
|
|
89
97
|
startedAt: number;
|
|
90
98
|
endedAt: number;
|
|
91
99
|
costUsd: number;
|
|
100
|
+
/** Summed LLM token usage across every `llm_call` event in this iteration. */
|
|
101
|
+
tokenUsage: LoopTokenUsage;
|
|
92
102
|
}
|
|
93
103
|
/** @experimental */
|
|
94
104
|
interface Driver<Task, Output, Decision> {
|
|
@@ -125,6 +135,10 @@ interface LoopResult<Task, Output, Decision> {
|
|
|
125
135
|
durationMs: number;
|
|
126
136
|
/** Sum of every iteration's `costUsd`. */
|
|
127
137
|
costUsd: number;
|
|
138
|
+
/** Sum of every iteration's token usage. Forward to
|
|
139
|
+
* `ctx.cost.observeTokens` in a `runProfileMatrix` dispatch so the
|
|
140
|
+
* integrity guard sees real LLM activity. */
|
|
141
|
+
tokenUsage: LoopTokenUsage;
|
|
128
142
|
}
|
|
129
143
|
/**
|
|
130
144
|
* Minimal sandbox client surface the kernel calls. Satisfied structurally by
|
|
@@ -267,4 +281,4 @@ interface ExecCtx {
|
|
|
267
281
|
parentSpanId?: string;
|
|
268
282
|
}
|
|
269
283
|
|
|
270
|
-
export type { AgentRunSpec as A, Driver as D, ExecCtx as E, Iteration as I,
|
|
284
|
+
export type { AgentRunSpec as A, Driver as D, ExecCtx as E, Iteration as I, LoopSandboxClient as L, OutputAdapter as O, Validator as V, LoopWinner as a, LoopResult as b, LoopDecisionPayload as c, LoopEndedPayload as d, LoopIterationDispatchPayload as e, LoopIterationEndedPayload as f, LoopIterationStartedPayload as g, LoopSandboxPlacement as h, LoopStartedPayload as i, LoopTokenUsage as j, LoopTraceEmitter as k, LoopTraceEvent as l, ValidationCtx as m };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tangle-network/agent-runtime",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.33.0",
|
|
4
4
|
"description": "Reusable runtime lifecycle for domain-specific agents.",
|
|
5
5
|
"homepage": "https://github.com/tangle-network/agent-runtime#readme",
|
|
6
6
|
"repository": {
|
|
@@ -65,22 +65,42 @@
|
|
|
65
65
|
"publishConfig": {
|
|
66
66
|
"access": "public"
|
|
67
67
|
},
|
|
68
|
-
"
|
|
69
|
-
"
|
|
68
|
+
"scripts": {
|
|
69
|
+
"build": "tsup",
|
|
70
|
+
"dev": "tsup --watch",
|
|
71
|
+
"prepare": "tsup",
|
|
72
|
+
"test": "vitest run",
|
|
73
|
+
"test:watch": "vitest",
|
|
74
|
+
"lint": "biome check src tests examples",
|
|
75
|
+
"lint:fix": "biome check --write src tests examples",
|
|
76
|
+
"typecheck": "tsc --noEmit"
|
|
70
77
|
},
|
|
78
|
+
"dependencies": {},
|
|
71
79
|
"devDependencies": {
|
|
72
80
|
"@biomejs/biome": "^2.4.0",
|
|
81
|
+
"@tangle-network/agent-eval": "^0.61.0",
|
|
73
82
|
"@tangle-network/sandbox": "^0.4.0",
|
|
74
83
|
"@types/node": "^25.6.0",
|
|
75
84
|
"tsup": "^8.0.0",
|
|
76
85
|
"typescript": "^5.7.0",
|
|
77
86
|
"vitest": "^3.0.0"
|
|
78
87
|
},
|
|
88
|
+
"pnpm": {
|
|
89
|
+
"minimumReleaseAge": 4320,
|
|
90
|
+
"minimumReleaseAgeExclude": [
|
|
91
|
+
"@tangle-network/agent-eval"
|
|
92
|
+
],
|
|
93
|
+
"onlyBuiltDependencies": [
|
|
94
|
+
"esbuild"
|
|
95
|
+
]
|
|
96
|
+
},
|
|
79
97
|
"engines": {
|
|
80
98
|
"node": ">=20"
|
|
81
99
|
},
|
|
82
100
|
"license": "MIT",
|
|
101
|
+
"packageManager": "pnpm@10.28.0",
|
|
83
102
|
"peerDependencies": {
|
|
103
|
+
"@tangle-network/agent-eval": ">=0.61.0 <1.0.0",
|
|
84
104
|
"@tangle-network/agent-knowledge": ">=1.3.0 <2.0.0",
|
|
85
105
|
"@tangle-network/sandbox": ">=0.1.2 <0.5.0"
|
|
86
106
|
},
|
|
@@ -91,14 +111,5 @@
|
|
|
91
111
|
"@tangle-network/sandbox": {
|
|
92
112
|
"optional": true
|
|
93
113
|
}
|
|
94
|
-
},
|
|
95
|
-
"scripts": {
|
|
96
|
-
"build": "tsup",
|
|
97
|
-
"dev": "tsup --watch",
|
|
98
|
-
"test": "vitest run",
|
|
99
|
-
"test:watch": "vitest",
|
|
100
|
-
"lint": "biome check src tests examples",
|
|
101
|
-
"lint:fix": "biome check --write src tests examples",
|
|
102
|
-
"typecheck": "tsc --noEmit"
|
|
103
114
|
}
|
|
104
|
-
}
|
|
115
|
+
}
|