@tangle-network/agent-runtime 0.18.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,74 @@
1
+ import {
2
+ DELEGATE_CODE_DESCRIPTION,
3
+ DELEGATE_CODE_INPUT_SCHEMA,
4
+ DELEGATE_CODE_TOOL_NAME,
5
+ DELEGATE_FEEDBACK_DESCRIPTION,
6
+ DELEGATE_FEEDBACK_INPUT_SCHEMA,
7
+ DELEGATE_FEEDBACK_TOOL_NAME,
8
+ DELEGATE_RESEARCH_DESCRIPTION,
9
+ DELEGATE_RESEARCH_INPUT_SCHEMA,
10
+ DELEGATE_RESEARCH_TOOL_NAME,
11
+ DELEGATION_HISTORY_DESCRIPTION,
12
+ DELEGATION_HISTORY_INPUT_SCHEMA,
13
+ DELEGATION_HISTORY_TOOL_NAME,
14
+ DELEGATION_STATUS_DESCRIPTION,
15
+ DELEGATION_STATUS_INPUT_SCHEMA,
16
+ DELEGATION_STATUS_TOOL_NAME,
17
+ DelegationTaskQueue,
18
+ InMemoryFeedbackStore,
19
+ createDefaultCoderDelegate,
20
+ createDelegateCodeHandler,
21
+ createDelegateFeedbackHandler,
22
+ createDelegateResearchHandler,
23
+ createDelegationHistoryHandler,
24
+ createDelegationStatusHandler,
25
+ createInProcessTransport,
26
+ createMcpServer,
27
+ eventToSnapshot,
28
+ hashIdempotencyInput,
29
+ validateDelegateCodeArgs,
30
+ validateDelegateFeedbackArgs,
31
+ validateDelegateResearchArgs,
32
+ validateDelegationHistoryArgs,
33
+ validateDelegationStatusArgs
34
+ } from "../chunk-LPPM7EGS.js";
35
+ import "../chunk-VFUEE6DF.js";
36
+ import "../chunk-Z5LKAYAS.js";
37
+ import "../chunk-XLWPTPRP.js";
38
+ import "../chunk-RZAOYKCO.js";
39
+ import "../chunk-DGUM43GV.js";
40
+ export {
41
+ DELEGATE_CODE_DESCRIPTION,
42
+ DELEGATE_CODE_INPUT_SCHEMA,
43
+ DELEGATE_CODE_TOOL_NAME,
44
+ DELEGATE_FEEDBACK_DESCRIPTION,
45
+ DELEGATE_FEEDBACK_INPUT_SCHEMA,
46
+ DELEGATE_FEEDBACK_TOOL_NAME,
47
+ DELEGATE_RESEARCH_DESCRIPTION,
48
+ DELEGATE_RESEARCH_INPUT_SCHEMA,
49
+ DELEGATE_RESEARCH_TOOL_NAME,
50
+ DELEGATION_HISTORY_DESCRIPTION,
51
+ DELEGATION_HISTORY_INPUT_SCHEMA,
52
+ DELEGATION_HISTORY_TOOL_NAME,
53
+ DELEGATION_STATUS_DESCRIPTION,
54
+ DELEGATION_STATUS_INPUT_SCHEMA,
55
+ DELEGATION_STATUS_TOOL_NAME,
56
+ DelegationTaskQueue,
57
+ InMemoryFeedbackStore,
58
+ createDefaultCoderDelegate,
59
+ createDelegateCodeHandler,
60
+ createDelegateFeedbackHandler,
61
+ createDelegateResearchHandler,
62
+ createDelegationHistoryHandler,
63
+ createDelegationStatusHandler,
64
+ createInProcessTransport,
65
+ createMcpServer,
66
+ eventToSnapshot,
67
+ hashIdempotencyInput,
68
+ validateDelegateCodeArgs,
69
+ validateDelegateFeedbackArgs,
70
+ validateDelegateResearchArgs,
71
+ validateDelegationHistoryArgs,
72
+ validateDelegationStatusArgs
73
+ };
74
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
@@ -0,0 +1,133 @@
1
+ import { AgentProfile } from '@tangle-network/sandbox';
2
+ import { O as OutputAdapter, V as Validator, A as AgentRunSpec, a as Driver } from './types-Bx-tArkc.js';
3
+ import './runtime-run-B2j-hvBj.js';
4
+ import './types-DvJIha6w.js';
5
+ import '@tangle-network/agent-eval';
6
+
7
+ /**
8
+ * @experimental
9
+ *
10
+ * `coderProfile` — opinionated preset for code-modification tasks.
11
+ *
12
+ * The agent is told to:
13
+ * - work on a fresh branch inside the sandbox workspace
14
+ * - keep the patch minimal (under `maxDiffLines`)
15
+ * - avoid `forbiddenPaths`
16
+ * - run `testCmd` and `typecheckCmd`
17
+ * - emit a final JSON result the output adapter parses
18
+ *
19
+ * The profile is stateless and agent-agnostic — `harness` selects the
20
+ * sandbox-SDK backend (`claude-code`, `codex`, `opencode/*`). For
21
+ * heterogeneous fanout, use `multiHarnessCoderFanout`.
22
+ */
23
+
24
+ /** @experimental */
25
+ interface CoderTask {
26
+ /** What the agent must accomplish. Free-form prose. */
27
+ goal: string;
28
+ /** Absolute path inside the sandbox where the repo lives. */
29
+ repoRoot: string;
30
+ /** Default `main`. The branch the agent diffs against. */
31
+ baseBranch?: string;
32
+ /** Default `pnpm test --run`. */
33
+ testCmd?: string;
34
+ /** Default `pnpm typecheck`. */
35
+ typecheckCmd?: string;
36
+ /** Files the agent may inspect for context. Surfaced verbatim in the prompt. */
37
+ contextFiles?: string[];
38
+ /**
39
+ * Paths the agent must not touch. Validator hard-fails on any match.
40
+ * Use glob-free literal path prefixes for unambiguous enforcement.
41
+ */
42
+ forbiddenPaths?: string[];
43
+ /** Default 400. Hard cap; validator hard-fails when exceeded. */
44
+ maxDiffLines?: number;
45
+ }
46
+ /** @experimental */
47
+ interface CoderOutput {
48
+ /** Branch the agent wrote the patch on. */
49
+ branch: string;
50
+ /** Unified diff (`git diff <base>..HEAD`). */
51
+ patch: string;
52
+ testResult: {
53
+ passed: boolean;
54
+ output: string;
55
+ };
56
+ typecheckResult: {
57
+ passed: boolean;
58
+ output: string;
59
+ };
60
+ diffStats: {
61
+ filesChanged: number;
62
+ insertions: number;
63
+ deletions: number;
64
+ };
65
+ /** Optional reviewer commentary surfaced by the agent. */
66
+ reviewerNotes?: string;
67
+ }
68
+ /** @experimental */
69
+ interface CoderProfileOptions {
70
+ /** Sandbox-SDK backend.type. Default `'claude-code'`. */
71
+ harness?: string;
72
+ /** Default model id passed in `AgentProfile.model.default`. */
73
+ model?: string;
74
+ /** Custom system prompt replacement. Default = built-in coder preset. */
75
+ systemPrompt?: string;
76
+ /** Stable name for `AgentRunSpec.name`. Default = `coder-${harness}`. */
77
+ name?: string;
78
+ }
79
+ /**
80
+ * Build a coder preset.
81
+ *
82
+ * `validator` enforces test + typecheck + a 400-line default diff cap. For
83
+ * per-task `forbiddenPaths` / `maxDiffLines` enforcement, pass `task` here
84
+ * — the returned validator closes over its constraints. Without a task
85
+ * the validator falls back to the default cap and skips path enforcement.
86
+ *
87
+ * @experimental
88
+ */
89
+ declare function coderProfile(options?: CoderProfileOptions & {
90
+ task?: CoderTask;
91
+ }): {
92
+ profile: AgentProfile;
93
+ taskToPrompt: (task: CoderTask) => string;
94
+ output: OutputAdapter<CoderOutput>;
95
+ validator: Validator<CoderOutput>;
96
+ agentRunSpec: AgentRunSpec<CoderTask>;
97
+ };
98
+ /** @experimental */
99
+ interface MultiHarnessCoderFanoutOptions {
100
+ /**
101
+ * Sandbox-SDK backend.type identifiers, one per parallel agent. Default:
102
+ * `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']`.
103
+ */
104
+ harnesses?: string[];
105
+ /** Optional per-harness model override. Indexed parallel to `harnesses`. */
106
+ models?: (string | undefined)[];
107
+ }
108
+ /** @experimental */
109
+ declare function multiHarnessCoderFanout(options?: MultiHarnessCoderFanoutOptions): {
110
+ agentRuns: AgentRunSpec<CoderTask>[];
111
+ output: OutputAdapter<CoderOutput>;
112
+ validator: Validator<CoderOutput>;
113
+ driver: Driver<CoderTask, CoderOutput, 'pick-winner' | 'fail'>;
114
+ };
115
+ /**
116
+ * Build a validator that closes over a specific `CoderTask`'s constraints.
117
+ *
118
+ * Checks in order:
119
+ * 1. Forbidden-path: any `+++` / `---` header in the patch matching a
120
+ * path prefix in `task.forbiddenPaths` fails hard.
121
+ * 2. Diff size: line count above `task.maxDiffLines` (default 400) fails
122
+ * hard; below cap, the score shrinks linearly.
123
+ * 3. Tests: `output.testResult.passed` must be `true`.
124
+ * 4. Typecheck: `output.typecheckResult.passed` must be `true`.
125
+ *
126
+ * Aggregate score: `0.5 * tests + 0.3 * typecheck + 0.2 * (1 - diffLines/maxDiff)`.
127
+ * `valid` is the conjunction of all four.
128
+ *
129
+ * @experimental
130
+ */
131
+ declare function createCoderValidator(task: CoderTask): Validator<CoderOutput>;
132
+
133
+ export { type CoderOutput, type CoderProfileOptions, type CoderTask, type MultiHarnessCoderFanoutOptions, coderProfile, createCoderValidator, multiHarnessCoderFanout };
@@ -0,0 +1,14 @@
1
+ import {
2
+ coderProfile,
3
+ createCoderValidator,
4
+ multiHarnessCoderFanout
5
+ } from "./chunk-Z5LKAYAS.js";
6
+ import "./chunk-XLWPTPRP.js";
7
+ import "./chunk-RZAOYKCO.js";
8
+ import "./chunk-DGUM43GV.js";
9
+ export {
10
+ coderProfile,
11
+ createCoderValidator,
12
+ multiHarnessCoderFanout
13
+ };
14
+ //# sourceMappingURL=profiles.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
@@ -0,0 +1,137 @@
1
+ import { A as AgentTaskSpec, R as RuntimeStreamEvent } from './types-DvJIha6w.js';
2
+
3
+ /**
4
+ * @stable
5
+ *
6
+ * Production-run lifecycle: record what the agent did on behalf of a customer,
7
+ * what it cost, and how it ended.
8
+ *
9
+ * Three concerns live in this module:
10
+ *
11
+ * 1. **Lifecycle state machine** — `running` -> `completed | failed | cancelled`,
12
+ * enforced by `RuntimeRunStateError`. Completion is idempotent for the same
13
+ * status (a second `complete()` call is a no-op so retries / cleanup paths
14
+ * don't double-fire side effects). A different terminal status is a state
15
+ * error.
16
+ *
17
+ * 2. **Cost ledger** — every `llm_call` event the handle observes contributes
18
+ * `tokensIn`, `tokensOut`, `costUsd`, and bumps `llmCalls`. Wall time is
19
+ * measured from `startRuntimeRun()` to `complete()`. Surface via
20
+ * `handle.cost()` for cost-per-task dashboards.
21
+ *
22
+ * 3. **Persistence adapter** — `RuntimeRunPersistenceAdapter` is the seam
23
+ * consumers plug in to write a `RuntimeRunRow` to their D1 / postgres /
24
+ * KV store. The adapter receives a sanitized row shape; no telemetry
25
+ * payload bytes flow through it unless the consumer opts in via
26
+ * `RuntimeRunOptions.telemetryEvents`.
27
+ */
28
+
29
+ /** @stable */
30
+ type RuntimeRunStatus = 'running' | 'completed' | 'failed' | 'cancelled';
31
+ /** @stable */
32
+ interface RuntimeRunCost {
33
+ /** Cumulative input tokens across every observed `llm_call` event. */
34
+ tokensIn: number;
35
+ /** Cumulative output tokens across every observed `llm_call` event. */
36
+ tokensOut: number;
37
+ /** Sum of `costUsd` from every observed `llm_call` event. */
38
+ costUsd: number;
39
+ /** Wall time from `startRuntimeRun()` to `complete()` (or `now()` if not yet completed). */
40
+ wallMs: number;
41
+ /** Count of `llm_call` events observed during the run. */
42
+ llmCalls: number;
43
+ }
44
+ /** @stable */
45
+ interface RuntimeRunCompleteInput {
46
+ status: Exclude<RuntimeRunStatus, 'running'>;
47
+ resultSummary?: string;
48
+ /** Optional explicit cost override; if omitted, the accumulated ledger is used. */
49
+ cost?: Partial<RuntimeRunCost>;
50
+ /** Stable error message when `status === 'failed'`. */
51
+ error?: string;
52
+ /** Additional adapter-specific fields merged into the persisted row. */
53
+ metadata?: Record<string, unknown>;
54
+ }
55
+ /** @stable */
56
+ interface RuntimeRunRow {
57
+ /** Stable runtime-side identifier. Adapters may translate to their own primary key. */
58
+ id: string;
59
+ workspaceId: string;
60
+ sessionId?: string;
61
+ agentId?: string;
62
+ domain?: string;
63
+ taskId: string;
64
+ scenarioId?: string;
65
+ status: RuntimeRunStatus;
66
+ resultSummary?: string;
67
+ error?: string;
68
+ cost: RuntimeRunCost;
69
+ startedAt: string;
70
+ completedAt?: string;
71
+ metadata?: Record<string, unknown>;
72
+ }
73
+ /** @stable */
74
+ interface RuntimeRunPersistenceAdapter {
75
+ /**
76
+ * Called once when `handle.persist()` runs. Implementations write `row` to
77
+ * their durable store (D1, postgres, KV) and return whatever the consumer
78
+ * wants the caller to see (often the storage-side row id). Errors thrown
79
+ * here propagate out of `persist()` so the caller can decide whether to
80
+ * retry or log-and-continue.
81
+ */
82
+ upsert(row: RuntimeRunRow): Promise<void> | void;
83
+ }
84
+ /** @stable */
85
+ interface RuntimeRunOptions {
86
+ workspaceId: string;
87
+ sessionId?: string;
88
+ agentId?: string;
89
+ taskSpec: AgentTaskSpec;
90
+ scenarioId?: string;
91
+ /** Optional persistence adapter; if omitted, `persist()` is a no-op. */
92
+ adapter?: RuntimeRunPersistenceAdapter;
93
+ /** Override the row id; default = `${taskSpec.id}:${random suffix}`. */
94
+ id?: string;
95
+ /** Override the clock; default = `Date.now()`. Useful for deterministic tests. */
96
+ now?: () => number;
97
+ }
98
+ /** @stable */
99
+ interface RuntimeRunHandle {
100
+ /** Stable id assigned at start. */
101
+ readonly id: string;
102
+ readonly workspaceId: string;
103
+ readonly sessionId: string | undefined;
104
+ readonly taskSpec: AgentTaskSpec;
105
+ readonly status: RuntimeRunStatus;
106
+ /**
107
+ * Observe a single `RuntimeStreamEvent`. The handle ignores non-cost events
108
+ * (text deltas, tool calls) silently so consumers can pipe the whole stream
109
+ * through `handle.observe`. `llm_call` events update the ledger.
110
+ */
111
+ observe(event: RuntimeStreamEvent): void;
112
+ /** Snapshot of the current cost ledger. Safe to call at any time. */
113
+ cost(): RuntimeRunCost;
114
+ /**
115
+ * Transition to a terminal state. Idempotent for the same status; throws
116
+ * `RuntimeRunStateError` for a different terminal status (state machines
117
+ * don't time-travel).
118
+ */
119
+ complete(input: RuntimeRunCompleteInput): void;
120
+ /** Build the current row without writing it. Useful for tests + dry runs. */
121
+ toRow(metadata?: Record<string, unknown>): RuntimeRunRow;
122
+ /**
123
+ * Persist the current row via the configured adapter. Must be called after
124
+ * `complete()`. Idempotent for the same terminal state (the adapter sees
125
+ * the same row on retry).
126
+ */
127
+ persist(metadata?: Record<string, unknown>): Promise<void>;
128
+ }
129
+ /**
130
+ * @stable
131
+ *
132
+ * Construct a runtime-run handle. The returned handle is mutable across its
133
+ * lifetime; consumers should not share it across requests.
134
+ */
135
+ declare function startRuntimeRun(options: RuntimeRunOptions): RuntimeRunHandle;
136
+
137
+ export { type RuntimeRunHandle as R, type RuntimeRunPersistenceAdapter as a, type RuntimeRunRow as b, startRuntimeRun as s };
@@ -0,0 +1,225 @@
1
+ import { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
2
+ import { R as RuntimeRunHandle } from './runtime-run-B2j-hvBj.js';
3
+
4
+ /**
5
+ * @experimental
6
+ *
7
+ * Driven-loop substrate — type surface.
8
+ *
9
+ * The loop kernel orchestrates around the sandbox SDK; it does not invent
10
+ * its own notion of "what an agent is". Each iteration is a sandbox-SDK
11
+ * `streamPrompt` call against an `AgentProfile`. The kernel owns iteration
12
+ * accounting, concurrency, abort propagation, cost aggregation, and trace
13
+ * emission; the driver owns topology (plan + decide); the validator owns
14
+ * output scoring; the output adapter owns event-stream → typed-output decode.
15
+ */
16
+
17
+ /** @experimental */
18
+ interface DefaultVerdict {
19
+ /** Whether the output meets the validator's pass criteria. */
20
+ valid: boolean;
21
+ /** Aggregate score in [0, 1]. Drivers use this for winner selection. */
22
+ score: number;
23
+ /** Per-dimension scores. Free-form; weighted into `score` by the validator. */
24
+ scores?: Record<string, number>;
25
+ /** Human-readable rationale; surfaces in trace + final-result `winner.verdict`. */
26
+ notes?: string;
27
+ }
28
+ /** @experimental */
29
+ interface ValidationCtx {
30
+ /** Iteration index this output came from (0-based). */
31
+ iteration: number;
32
+ /** Cooperative cancellation channel. */
33
+ signal: AbortSignal;
34
+ }
35
+ /** @experimental */
36
+ interface Validator<Output, Verdict = DefaultVerdict> {
37
+ validate(output: Output, ctx: ValidationCtx): Promise<Verdict>;
38
+ }
39
+ /**
40
+ * Sandbox-SDK-shaped agent specification.
41
+ *
42
+ * The kernel uses `profile` to instantiate a sandbox per iteration, formats
43
+ * `task` into a prompt via `taskToPrompt`, and merges `sandboxOverrides` into
44
+ * the `CreateSandboxOptions` it passes to `client.create`. Heterogeneous
45
+ * fanout supplies multiple `AgentRunSpec`s and the kernel round-robins
46
+ * through them when the driver plans N tasks.
47
+ *
48
+ * @experimental
49
+ */
50
+ interface AgentRunSpec<Task> {
51
+ /** Sandbox SDK profile — what kind of agent runs the task. */
52
+ profile: AgentProfile;
53
+ /** Task → prompt formatter. Pure and deterministic. */
54
+ taskToPrompt: (task: Task) => string;
55
+ /**
56
+ * Per-spec stable name. Surfaced in trace events and the default winner
57
+ * selector tiebreak. Falls back to `profile.name ?? 'agent'`.
58
+ */
59
+ name?: string;
60
+ /**
61
+ * Optional sandbox-SDK `CreateSandboxOptions` overrides merged on top of
62
+ * the kernel's defaults. `backend.profile` is set to `profile` by the
63
+ * kernel and cannot be overridden here — use `profile` itself for that.
64
+ */
65
+ sandboxOverrides?: Partial<Omit<CreateSandboxOptions, 'backend'>> & {
66
+ backend?: Omit<NonNullable<CreateSandboxOptions['backend']>, 'profile'>;
67
+ };
68
+ }
69
+ /**
70
+ * Stream of `SandboxEvent`s → typed `Output`.
71
+ *
72
+ * Adapters are pure functions over the already-collected event array; they
73
+ * do not receive the live AsyncIterable so they can be replayed against
74
+ * persisted streams during tests / replays.
75
+ *
76
+ * @experimental
77
+ */
78
+ interface OutputAdapter<Output> {
79
+ parse(events: SandboxEvent[]): Output;
80
+ }
81
+ /** @experimental */
82
+ interface Iteration<Task, Output> {
83
+ /** 0-based iteration index assigned by the kernel. */
84
+ index: number;
85
+ task: Task;
86
+ /** Stable name of the `AgentRunSpec` that produced this iteration. */
87
+ agentRunName: string;
88
+ output?: Output;
89
+ verdict?: DefaultVerdict;
90
+ error?: Error;
91
+ /** Raw sandbox event stream collected for this iteration. */
92
+ events: SandboxEvent[];
93
+ startedAt: number;
94
+ endedAt: number;
95
+ costUsd: number;
96
+ }
97
+ /** @experimental */
98
+ interface Driver<Task, Output, Decision> {
99
+ /**
100
+ * Stable identifier surfaced in trace events. Default `'driver'`.
101
+ */
102
+ readonly name?: string;
103
+ /**
104
+ * Tasks to issue this iteration. `[task]` → refine; N copies → fanout;
105
+ * `[]` → no more work this round (kernel proceeds to `decide`).
106
+ */
107
+ plan(task: Task, history: ReadonlyArray<Iteration<Task, Output>>): Promise<Task[]>;
108
+ /**
109
+ * Inspect history and return the next state. The kernel terminates the
110
+ * loop when `decide` returns a value listed in `isTerminalDecision`
111
+ * (`'stop' | 'pick-winner' | 'fail' | 'done'`), when `maxIterations`
112
+ * is hit, or when the abort signal fires.
113
+ */
114
+ decide(history: ReadonlyArray<Iteration<Task, Output>>): Decision | Promise<Decision>;
115
+ }
116
+ /** @experimental */
117
+ interface LoopWinner<Task, Output> {
118
+ task: Task;
119
+ output: Output;
120
+ verdict?: DefaultVerdict;
121
+ iterationIndex: number;
122
+ agentRunName: string;
123
+ }
124
+ /** @experimental */
125
+ interface LoopResult<Task, Output, Decision> {
126
+ decision: Decision;
127
+ iterations: Iteration<Task, Output>[];
128
+ winner?: LoopWinner<Task, Output>;
129
+ durationMs: number;
130
+ /** Sum of every iteration's `costUsd`. */
131
+ costUsd: number;
132
+ }
133
+ /**
134
+ * Minimal sandbox client surface the kernel calls. Satisfied structurally by
135
+ * `new Sandbox({ apiKey, baseUrl })` — declared as a structural type so
136
+ * tests can pass a stub without instantiating the SDK.
137
+ *
138
+ * @experimental
139
+ */
140
+ interface LoopSandboxClient {
141
+ create(options?: CreateSandboxOptions): Promise<SandboxInstance>;
142
+ }
143
+ /** @experimental */
144
+ interface LoopTraceEmitter {
145
+ emit(event: LoopTraceEvent): void | Promise<void>;
146
+ }
147
+ /** @experimental */
148
+ type LoopTraceEvent = {
149
+ kind: 'loop.started';
150
+ runId: string;
151
+ timestamp: number;
152
+ payload: LoopStartedPayload;
153
+ } | {
154
+ kind: 'loop.iteration.started';
155
+ runId: string;
156
+ timestamp: number;
157
+ payload: LoopIterationStartedPayload;
158
+ } | {
159
+ kind: 'loop.iteration.ended';
160
+ runId: string;
161
+ timestamp: number;
162
+ payload: LoopIterationEndedPayload;
163
+ } | {
164
+ kind: 'loop.decision';
165
+ runId: string;
166
+ timestamp: number;
167
+ payload: LoopDecisionPayload;
168
+ } | {
169
+ kind: 'loop.ended';
170
+ runId: string;
171
+ timestamp: number;
172
+ payload: LoopEndedPayload;
173
+ };
174
+ /** @experimental */
175
+ interface LoopStartedPayload {
176
+ driver: string;
177
+ agentRunNames: string[];
178
+ maxIterations: number;
179
+ maxConcurrency: number;
180
+ }
181
+ /** @experimental */
182
+ interface LoopIterationStartedPayload {
183
+ iterationIndex: number;
184
+ agentRunName: string;
185
+ taskHash: string;
186
+ }
187
+ /** @experimental */
188
+ interface LoopIterationEndedPayload {
189
+ iterationIndex: number;
190
+ agentRunName: string;
191
+ outputHash?: string;
192
+ verdict?: DefaultVerdict;
193
+ error?: string;
194
+ costUsd: number;
195
+ durationMs: number;
196
+ }
197
+ /** @experimental */
198
+ interface LoopDecisionPayload {
199
+ decision: string;
200
+ historyLength: number;
201
+ }
202
+ /** @experimental */
203
+ interface LoopEndedPayload {
204
+ winnerIterationIndex?: number;
205
+ totalCostUsd: number;
206
+ durationMs: number;
207
+ iterations: number;
208
+ }
209
+ /** @experimental */
210
+ interface ExecCtx {
211
+ /** Sandbox SDK client — the kernel calls `.create()` per iteration. */
212
+ sandboxClient: LoopSandboxClient;
213
+ /** Optional trace emitter. When set, the kernel emits `loop.*` events. */
214
+ traceEmitter?: LoopTraceEmitter;
215
+ /**
216
+ * Optional production-run handle. When set, every synthesized `llm_call`
217
+ * the kernel infers from a sandbox event stream is forwarded via
218
+ * `runHandle.observe` so per-run cost aggregates pick up loop spend.
219
+ */
220
+ runHandle?: RuntimeRunHandle;
221
+ /** Cooperative cancellation signal. */
222
+ signal?: AbortSignal;
223
+ }
224
+
225
+ export type { AgentRunSpec as A, DefaultVerdict as D, ExecCtx as E, Iteration as I, LoopWinner as L, OutputAdapter as O, Validator as V, Driver as a, LoopResult as b, LoopDecisionPayload as c, LoopEndedPayload as d, LoopIterationEndedPayload as e, LoopIterationStartedPayload as f, LoopSandboxClient as g, LoopStartedPayload as h, LoopTraceEmitter as i, LoopTraceEvent as j, ValidationCtx as k };
@@ -376,4 +376,4 @@ interface KnowledgeReadinessDecision {
376
376
  nonBlockingGapIds: string[];
377
377
  }
378
378
 
379
- export type { AgentBackendInput as A, KnowledgeReadinessDecision as K, RuntimeStreamEvent as R, AgentExecutionBackend as a, AgentBackendContext as b, RunAgentTaskOptions as c, AgentTaskRunResult as d, RunAgentTaskStreamOptions as e, AgentTaskSpec as f, AgentRuntimeEvent as g, AgentTaskStatus as h, RuntimeSessionStore as i, RuntimeSession as j, AgentAdapter as k, AgentKnowledgeProvider as l, AgentRuntimeEventSink as m, AgentTaskContext as n };
379
+ export type { AgentTaskSpec as A, KnowledgeReadinessDecision as K, RuntimeStreamEvent as R, AgentBackendInput as a, AgentExecutionBackend as b, AgentBackendContext as c, RunAgentTaskOptions as d, AgentTaskRunResult as e, RunAgentTaskStreamOptions as f, AgentRuntimeEvent as g, AgentTaskStatus as h, RuntimeSessionStore as i, RuntimeSession as j, AgentAdapter as k, AgentKnowledgeProvider as l, AgentRuntimeEventSink as m, AgentTaskContext as n };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tangle-network/agent-runtime",
3
- "version": "0.18.0",
3
+ "version": "0.20.0",
4
4
  "description": "Reusable runtime lifecycle for domain-specific agents.",
5
5
  "homepage": "https://github.com/tangle-network/agent-runtime#readme",
6
6
  "repository": {
@@ -33,8 +33,26 @@
33
33
  "types": "./dist/agent.d.ts",
34
34
  "import": "./dist/agent.js",
35
35
  "default": "./dist/agent.js"
36
+ },
37
+ "./loops": {
38
+ "types": "./dist/loops.d.ts",
39
+ "import": "./dist/loops.js",
40
+ "default": "./dist/loops.js"
41
+ },
42
+ "./profiles": {
43
+ "types": "./dist/profiles.d.ts",
44
+ "import": "./dist/profiles.js",
45
+ "default": "./dist/profiles.js"
46
+ },
47
+ "./mcp": {
48
+ "types": "./dist/mcp/index.d.ts",
49
+ "import": "./dist/mcp/index.js",
50
+ "default": "./dist/mcp/index.js"
36
51
  }
37
52
  },
53
+ "bin": {
54
+ "agent-runtime-mcp": "./dist/mcp/bin.js"
55
+ },
38
56
  "files": [
39
57
  "dist",
40
58
  "README.md"
@@ -78,6 +96,12 @@
78
96
  "license": "MIT",
79
97
  "packageManager": "pnpm@10.28.0",
80
98
  "peerDependencies": {
99
+ "@tangle-network/agent-knowledge": ">=1.3.0 <2.0.0",
81
100
  "@tangle-network/sandbox": ">=0.1.2 <0.3.0"
101
+ },
102
+ "peerDependenciesMeta": {
103
+ "@tangle-network/agent-knowledge": {
104
+ "optional": true
105
+ }
82
106
  }
83
107
  }