@tangle-network/agent-runtime 0.14.1 → 0.15.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +166 -84
- package/dist/agent.d.ts +1 -1
- package/dist/index.d.ts +363 -5
- package/dist/index.js +433 -4
- package/dist/index.js.map +1 -1
- package/dist/{types-jr_EFhrD.d.ts → types-CYxfw14J.d.ts} +9 -0
- package/package.json +7 -3
package/README.md
CHANGED
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
Production runtime substrate for domain agents. Owns the task lifecycle
|
|
4
4
|
(knowledge readiness, control loop, session resume, sanitized telemetry,
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
durable runs across worker / DO crashes, canonical `RuntimeRunRow`
|
|
6
|
+
persistence + cost ledger), the chat-model catalog + admission, and the
|
|
7
|
+
declarative `defineAgent` manifest — so domain repos stop inventing their
|
|
8
|
+
own.
|
|
7
9
|
|
|
8
10
|
```bash
|
|
9
11
|
pnpm add @tangle-network/agent-runtime @tangle-network/agent-eval
|
|
@@ -15,12 +17,20 @@ pnpm add @tangle-network/agent-runtime @tangle-network/agent-eval
|
|
|
15
17
|
|---|---|
|
|
16
18
|
| `runAgentTask` | Single-shot adapter-driven task with eval/verification |
|
|
17
19
|
| `runAgentTaskStream` | Streaming product loop with session resume + backends |
|
|
20
|
+
| `runDurableTurn` | Checkpoint+replay chat turn — survives a worker crash *after* completion |
|
|
21
|
+
| `runSupervisedTurn` | Always-attached durable turn — re-attaches an in-flight sandbox run *during* a crash |
|
|
22
|
+
| `SessionSupervisorDO` | Cloudflare Durable Object host for `runSupervisedTurn` (with alarm-driven orphan re-attach) |
|
|
23
|
+
| `DurableChatTurnEngine` | Framework-neutral chat-turn orchestrator (durable turn + NDJSON + session lifecycle + product hooks) |
|
|
18
24
|
| `startRuntimeRun` | Canonical production-run row + cost ledger |
|
|
25
|
+
| `runDurable` + `*DurableRunStore` | General durable-step substrate (in-memory / file-system / D1) |
|
|
26
|
+
| `defineAgent` | Declarative per-vertical agent manifest — surfaces, knowledge, rubric, run fn |
|
|
27
|
+
| `resolveChatModel` / `validateChatModelId` / `getModels` | Router catalog fetch + fail-closed admission + precedence resolver |
|
|
19
28
|
| `createTraceBridge` | Map `RuntimeStreamEvent` → `agent-eval` `TraceEvent` |
|
|
20
29
|
| `decideKnowledgeReadiness` | `ready` / `blocked` / `caveat` branch for routes / UI |
|
|
21
30
|
| `createOpenAICompatibleBackend` | OpenAI-compatible streaming backend (TCloud / cli-bridge) |
|
|
22
31
|
| `createSandboxPromptBackend` | Sandbox / sidecar `streamPrompt` clients |
|
|
23
32
|
| `createRuntimeStreamEventCollector` | Default-redacted sanitized telemetry over a stream |
|
|
33
|
+
| `PlatformAuthClient` + `PlatformHubClient` (`/platform`) | Cross-site SSO + integrations hub |
|
|
24
34
|
|
|
25
35
|
Every public export is annotated `@stable` or `@experimental`. `@stable`
|
|
26
36
|
exports do not change shape inside a minor; `@experimental` exports may
|
|
@@ -32,55 +42,147 @@ change inside a minor and require a deliberate consumer bump.
|
|
|
32
42
|
import { runAgentTask } from '@tangle-network/agent-runtime'
|
|
33
43
|
|
|
34
44
|
const result = await runAgentTask({
|
|
35
|
-
task: {
|
|
36
|
-
id: 'review-2026-return',
|
|
37
|
-
intent: 'Review the return for missing evidence',
|
|
38
|
-
domain: 'tax',
|
|
39
|
-
},
|
|
45
|
+
task: { id: 'review-2026-return', intent: 'Review the return', domain: 'tax' },
|
|
40
46
|
adapter: {
|
|
41
47
|
async observe() { return { /* domain state */ } },
|
|
42
48
|
async validate({ state }) { return [/* eval results */] },
|
|
43
|
-
async decide({ state }) {
|
|
44
|
-
return { type: 'stop', pass: true, score: 1, reason: 'review complete' }
|
|
45
|
-
},
|
|
49
|
+
async decide({ state }) { return { type: 'stop', pass: true, score: 1, reason: 'done' } },
|
|
46
50
|
async act() { return undefined },
|
|
47
51
|
},
|
|
48
52
|
})
|
|
49
|
-
|
|
50
53
|
console.log(result.status, result.runRecords)
|
|
51
54
|
```
|
|
52
55
|
|
|
56
|
+
## Durable chat turns
|
|
57
|
+
|
|
58
|
+
A 15-minute agentic turn must survive a Cloudflare worker isolate dying.
|
|
59
|
+
`runDurableTurn` replays a *completed* turn from cache (worker died after
|
|
60
|
+
the turn finished). `runSupervisedTurn` closes the harder gap — a turn
|
|
61
|
+
interrupted *mid-stream* — by relocating the durability boundary off the
|
|
62
|
+
ephemeral worker:
|
|
63
|
+
|
|
64
|
+
- The supervisor drains every event into the substrate's own ordered log
|
|
65
|
+
(`appendStreamEvent`, idempotent on `eventId`).
|
|
66
|
+
- It persists the substrate `RunHandle` the instant the sandbox yields it.
|
|
67
|
+
- A fresh supervisor reads the log for its cursor and resumes via
|
|
68
|
+
`adapter.attach(handle, cursor)` — no event lost, none delivered twice.
|
|
69
|
+
|
|
70
|
+
The reconnect glue is one typed contract — `SandboxReconnectAdapter` —
|
|
71
|
+
implemented once per substrate, not per product.
|
|
72
|
+
|
|
73
|
+
```ts
|
|
74
|
+
import { runSupervisedTurn, InMemoryDurableRunStore } from '@tangle-network/agent-runtime'
|
|
75
|
+
|
|
76
|
+
const store = new InMemoryDurableRunStore()
|
|
77
|
+
const supervised = runSupervisedTurn({
|
|
78
|
+
store, runId: `chat:${threadId}:${turnIndex}`, manifest, workerId,
|
|
79
|
+
adapter: mySandboxAdapter,
|
|
80
|
+
})
|
|
81
|
+
for await (const event of supervised.stream) sendToClient(event)
|
|
82
|
+
// supervised.mode() === 'fresh' | 'resumed' | 'replayed'
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Full runnable: [`examples/durable-supervisor/`](./examples/durable-supervisor/).
|
|
86
|
+
|
|
87
|
+
### Cloudflare Durable Object host
|
|
88
|
+
|
|
89
|
+
`SessionSupervisorDO` hosts the supervisor on a real DO — `fetch` streams the
|
|
90
|
+
turn, `alarm()` re-attaches a run a dropped response stream abandoned.
|
|
91
|
+
|
|
92
|
+
```ts
|
|
93
|
+
import { createSessionSupervisorDO } from '@tangle-network/agent-runtime'
|
|
94
|
+
|
|
95
|
+
export const SessionSupervisor = createSessionSupervisorDO({
|
|
96
|
+
resolveRun(request, env, state) { /* return RunSupervisorOptions */ },
|
|
97
|
+
resolveOrphan(runId, env, state) { /* same, for the alarm path */ },
|
|
98
|
+
encodeEvent(event) { return `data: ${JSON.stringify(event)}\n\n` },
|
|
99
|
+
})
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
```toml
|
|
103
|
+
# wrangler.toml
|
|
104
|
+
[[durable_objects.bindings]]
|
|
105
|
+
name = "SESSION_SUPERVISOR"
|
|
106
|
+
class_name = "SessionSupervisor"
|
|
107
|
+
[[migrations]]
|
|
108
|
+
tag = "v1"
|
|
109
|
+
new_classes = ["SessionSupervisor"]
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
CF types are structural (`DurableObjectStateLike`) — no
|
|
113
|
+
`@cloudflare/workers-types` runtime dep.
|
|
114
|
+
|
|
115
|
+
## Chat-model resolution
|
|
116
|
+
|
|
117
|
+
One primitive every chat handler needs and was hand-rolling per repo:
|
|
118
|
+
router catalog fetch, malformed-id guard, fail-closed catalog admission,
|
|
119
|
+
precedence resolver. Policy-free — the caller passes its own precedence
|
|
120
|
+
order and known-good allowlist.
|
|
121
|
+
|
|
122
|
+
```ts
|
|
123
|
+
import {
|
|
124
|
+
resolveChatModel, resolveRouterBaseUrl, validateChatModelId, getModels,
|
|
125
|
+
} from '@tangle-network/agent-runtime'
|
|
126
|
+
|
|
127
|
+
const routerBaseUrl = resolveRouterBaseUrl(env)
|
|
128
|
+
const { model, source } = resolveChatModel(
|
|
129
|
+
[
|
|
130
|
+
{ source: 'request', model: requestBody.model },
|
|
131
|
+
{ source: 'workspace', model: workspace.pinnedModel },
|
|
132
|
+
{ source: 'env', model: env.TCLOUD_CHAT_MODEL },
|
|
133
|
+
],
|
|
134
|
+
{ source: 'default', model: 'claude-sonnet-4-6' },
|
|
135
|
+
)
|
|
136
|
+
const validation = await validateChatModelId(model, {
|
|
137
|
+
routerBaseUrl,
|
|
138
|
+
allowlist: ['claude-sonnet-4-6'],
|
|
139
|
+
})
|
|
140
|
+
if (!validation.succeeded) throw new ConfigError(validation.error)
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Full runnable: [`examples/model-resolution/`](./examples/model-resolution/).
|
|
144
|
+
|
|
145
|
+
## Define an agent — declarative manifest
|
|
146
|
+
|
|
147
|
+
`defineAgent` is the per-vertical layer that pairs a runtime adapter with
|
|
148
|
+
the surfaces / knowledge / rubric / outcome contract `agent-eval`'s analyst
|
|
149
|
+
loop drives improvement against.
|
|
150
|
+
|
|
151
|
+
```ts
|
|
152
|
+
import { defineAgent } from '@tangle-network/agent-runtime/agent'
|
|
153
|
+
|
|
154
|
+
export const myAgent = defineAgent({
|
|
155
|
+
id: 'legal-agent',
|
|
156
|
+
surfaces: { /* prompt, tools, skills — the levers an analyst can edit */ },
|
|
157
|
+
knowledge: { /* requirements + provider */ },
|
|
158
|
+
rubric: { /* dimensions + weights */ },
|
|
159
|
+
run: async (ctx) => {
|
|
160
|
+
/* product-specific run — typically wraps runSupervisedTurn or runAgentTaskStream */
|
|
161
|
+
},
|
|
162
|
+
})
|
|
163
|
+
```
|
|
164
|
+
|
|
53
165
|
## Canonical production-run lifecycle
|
|
54
166
|
|
|
55
|
-
`startRuntimeRun` records what the agent did
|
|
56
|
-
|
|
57
|
-
|
|
167
|
+
`startRuntimeRun` records what the agent did for a customer, what it
|
|
168
|
+
cost, and how it ended. Replaces bespoke `agentRuns` helpers across
|
|
169
|
+
consumer repos.
|
|
58
170
|
|
|
59
171
|
```ts
|
|
60
172
|
import { startRuntimeRun, runAgentTaskStream } from '@tangle-network/agent-runtime'
|
|
61
173
|
|
|
62
174
|
const run = startRuntimeRun({
|
|
63
|
-
workspaceId: 'ws-1',
|
|
64
|
-
|
|
65
|
-
agentId: 'legal-chat-runtime',
|
|
66
|
-
taskSpec,
|
|
67
|
-
scenarioId: `legal-chat:${threadId}`,
|
|
175
|
+
workspaceId: 'ws-1', sessionId: threadId, agentId: 'legal-chat-runtime',
|
|
176
|
+
taskSpec, scenarioId: `legal-chat:${threadId}`,
|
|
68
177
|
adapter: { upsert: (row) => db.insert(agentRuns).values(row) },
|
|
69
178
|
})
|
|
70
|
-
|
|
71
179
|
for await (const event of runAgentTaskStream({ task: taskSpec, backend, input })) {
|
|
72
|
-
run.observe(event)
|
|
180
|
+
run.observe(event)
|
|
73
181
|
if (event.type === 'final') {
|
|
74
|
-
run.complete({
|
|
75
|
-
status: event.status === 'completed' ? 'completed' : 'failed',
|
|
76
|
-
resultSummary: event.text ?? '',
|
|
77
|
-
error: event.status === 'failed' ? event.reason : undefined,
|
|
78
|
-
})
|
|
182
|
+
run.complete({ status: event.status === 'completed' ? 'completed' : 'failed', resultSummary: event.text ?? '' })
|
|
79
183
|
}
|
|
80
184
|
}
|
|
81
|
-
|
|
82
185
|
await run.persist({ runtimeEvents: telemetry.events })
|
|
83
|
-
console.log(run.cost()) // { tokensIn, tokensOut, costUsd, wallMs, llmCalls }
|
|
84
186
|
```
|
|
85
187
|
|
|
86
188
|
Full runnable: [`examples/runtime-run/`](./examples/runtime-run/).
|
|
@@ -89,7 +191,7 @@ Full runnable: [`examples/runtime-run/`](./examples/runtime-run/).
|
|
|
89
191
|
|
|
90
192
|
If you persist traces in agent-eval's `TraceStore`, the bridge maps
|
|
91
193
|
runtime stream events to `TraceEvent` so consumer repos don't hand-roll
|
|
92
|
-
the adapter
|
|
194
|
+
the adapter.
|
|
93
195
|
|
|
94
196
|
```ts
|
|
95
197
|
import { createTraceBridge } from '@tangle-network/agent-runtime'
|
|
@@ -103,8 +205,6 @@ for await (const event of runAgentTaskStream({ task, backend, input })) {
|
|
|
103
205
|
|
|
104
206
|
## Error taxonomy
|
|
105
207
|
|
|
106
|
-
Every public function throws one of:
|
|
107
|
-
|
|
108
208
|
| Error | When |
|
|
109
209
|
|---|---|
|
|
110
210
|
| `ValidationError` | Caller passed invalid arguments |
|
|
@@ -113,85 +213,67 @@ Every public function throws one of:
|
|
|
113
213
|
| `BackendTransportError` | Backend HTTP / IPC call returned non-success |
|
|
114
214
|
| `SessionMismatchError` | Resume requested against a different backend |
|
|
115
215
|
| `RuntimeRunStateError` | `RuntimeRunHandle` lifecycle methods called out of order |
|
|
216
|
+
| `DurableRunLeaseHeldError` | Another worker holds a live lease on the run |
|
|
217
|
+
| `DurableRunInputMismatchError` | A `runId` exists with a different manifest hash |
|
|
218
|
+
| `DurableRunDivergenceError` | A step's intent changed across replays |
|
|
116
219
|
|
|
117
220
|
All extend `AgentEvalError` (re-exported from `@tangle-network/agent-eval`)
|
|
118
|
-
and carry a stable `code` so cross-package handlers
|
|
221
|
+
and carry a stable `code` so cross-package handlers pattern-match
|
|
119
222
|
without importing the runtime.
|
|
120
223
|
|
|
121
224
|
## Sanitized telemetry
|
|
122
225
|
|
|
123
226
|
`task.intent` flows through sanitized telemetry on every event. **Never
|
|
124
227
|
set it to user input** — use a fixed string describing the operation
|
|
125
|
-
kind (e.g. `"Run a chat turn"`, `"Score a tax return"`). Route
|
|
126
|
-
visible content through `task.inputs` (redacted by default).
|
|
228
|
+
kind (e.g. `"Run a chat turn"`, `"Score a tax return"`). Route
|
|
229
|
+
user-visible content through `task.inputs` (redacted by default).
|
|
127
230
|
|
|
128
231
|
```ts
|
|
129
232
|
import { createRuntimeStreamEventCollector, runAgentTaskStream } from '@tangle-network/agent-runtime'
|
|
130
233
|
|
|
131
234
|
const telemetry = createRuntimeStreamEventCollector()
|
|
132
|
-
for await (const event of runAgentTaskStream({ task, backend }))
|
|
133
|
-
telemetry.onEvent(event)
|
|
134
|
-
}
|
|
235
|
+
for await (const event of runAgentTaskStream({ task, backend })) telemetry.onEvent(event)
|
|
135
236
|
console.log(telemetry.events, telemetry.summary())
|
|
136
237
|
```
|
|
137
238
|
|
|
138
|
-
By default the collector redacts task inputs, user answers, credential
|
|
139
|
-
questions, control payloads, evidence IDs, task metadata, and eval
|
|
140
|
-
details. Private diagnostics opt-in via `RuntimeTelemetryOptions`.
|
|
141
|
-
|
|
142
239
|
## Package boundaries
|
|
143
240
|
|
|
144
241
|
| Package | Owns |
|
|
145
242
|
|---|---|
|
|
146
|
-
| `agent-runtime` | Lifecycle, adapters, backends,
|
|
147
|
-
| `agent-runtime/platform` |
|
|
148
|
-
| `agent-
|
|
149
|
-
| `agent-
|
|
243
|
+
| `agent-runtime` | Lifecycle, adapters, backends, durable substrate, supervisor + DO, model resolution, trace bridge, `defineAgent` |
|
|
244
|
+
| `agent-runtime/platform` | Cross-site SSO (`PlatformAuthClient`) + integrations hub (`PlatformHubClient`) |
|
|
245
|
+
| `agent-runtime/agent` | `defineAgent` + surfaces / outcome adapters |
|
|
246
|
+
| `agent-runtime/analyst-loop` | `runAnalystLoop` — analyst registry driver |
|
|
247
|
+
| `agent-eval` | Control loops, readiness scoring, traces, evals, judges, RL, release evidence |
|
|
248
|
+
| `agent-knowledge` | Evidence, claims, wiki pages, retrieval |
|
|
150
249
|
| Domain packages | Domain tools, policies, credentials, UI text, rubrics |
|
|
151
250
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
```ts
|
|
155
|
-
import {
|
|
156
|
-
PlatformAuthClient,
|
|
157
|
-
PlatformHubClient,
|
|
158
|
-
} from '@tangle-network/agent-runtime/platform'
|
|
159
|
-
|
|
160
|
-
// Login with Tangle (cross-site SSO bridge).
|
|
161
|
-
const auth = new PlatformAuthClient({
|
|
162
|
-
baseUrl: process.env.TANGLE_PLATFORM_URL!, // https://id.tangle.tools
|
|
163
|
-
appId: 'gtm-agent', // must be registered in TRUSTED_APPS
|
|
164
|
-
})
|
|
165
|
-
const url = auth.authorizeUrl({ state: csrfToken, redirectUri: callbackUrl })
|
|
166
|
-
// …user redirected to `url`, returns to callbackUrl with ?code=…
|
|
167
|
-
const { apiKey, user } = await auth.exchange(code)
|
|
168
|
-
|
|
169
|
-
// Integrations hub (uses the user's apiKey from cross-site exchange).
|
|
170
|
-
const hub = new PlatformHubClient({
|
|
171
|
-
baseUrl: process.env.TANGLE_PLATFORM_URL!,
|
|
172
|
-
bearer: apiKey,
|
|
173
|
-
})
|
|
174
|
-
const connections = await hub.listConnections()
|
|
175
|
-
const { authorizationUrl } = await hub.startAuth({
|
|
176
|
-
providerId: 'google',
|
|
177
|
-
connectorId: 'gmail',
|
|
178
|
-
returnUrl: 'https://gtm.tangle.tools/integrations',
|
|
179
|
-
})
|
|
180
|
-
```
|
|
181
|
-
|
|
182
|
-
The API uses `runAgentTask`, not `runVerticalAgentTask`. `domain` is
|
|
183
|
-
metadata on the task because the runtime is reusable across many kinds of
|
|
184
|
-
agents without baking taxonomy into type names.
|
|
251
|
+
See [`docs/concepts.md`](./docs/concepts.md) for the mental model.
|
|
185
252
|
|
|
186
253
|
## Examples
|
|
187
254
|
|
|
188
|
-
Runnable in [`examples/`](./examples/)
|
|
255
|
+
Runnable in [`examples/`](./examples/). Every example imports from
|
|
256
|
+
`@tangle-network/agent-runtime` (the same surface consumers use):
|
|
189
257
|
|
|
190
258
|
- [`basic-task/`](./examples/basic-task/) — smallest `runAgentTask`
|
|
191
|
-
- [`with-knowledge-readiness/`](./examples/with-knowledge-readiness/) — readiness gating
|
|
192
|
-
- [`sanitized-telemetry/`](./examples/sanitized-telemetry/)
|
|
193
|
-
- [`
|
|
194
|
-
- [`sse-stream/`](./examples/sse-stream/) — Server-Sent Events for browser clients
|
|
259
|
+
- [`with-knowledge-readiness/`](./examples/with-knowledge-readiness/) — readiness gating
|
|
260
|
+
- [`sanitized-telemetry/`](./examples/sanitized-telemetry/) + [`-streaming/`](./examples/sanitized-telemetry-streaming/) — redaction
|
|
261
|
+
- [`sse-stream/`](./examples/sse-stream/) — SSE helpers for browser clients
|
|
195
262
|
- [`sandbox-stream-backend/`](./examples/sandbox-stream-backend/) — `createSandboxPromptBackend`
|
|
196
263
|
- [`openai-stream-backend/`](./examples/openai-stream-backend/) — `createOpenAICompatibleBackend`
|
|
197
|
-
- [`runtime-run/`](./examples/runtime-run/) —
|
|
264
|
+
- [`runtime-run/`](./examples/runtime-run/) — production-run row + cost ledger
|
|
265
|
+
- [`model-resolution/`](./examples/model-resolution/) — router catalog + fail-closed admission
|
|
266
|
+
- [`durable-supervisor/`](./examples/durable-supervisor/) — cross-worker resume keystone
|
|
267
|
+
- [`agent-into-reviewer/`](./examples/agent-into-reviewer/) — pipe one runtime's stream into a reviewer agent
|
|
268
|
+
- [`chat-handler/`](./examples/chat-handler/) — `DurableChatTurnEngine.runTurn` (the centerpiece production pattern)
|
|
269
|
+
- [`production-trace-sink/`](./examples/production-trace-sink/) — `createProductionTraceSink` data capture
|
|
270
|
+
|
|
271
|
+
## Tests
|
|
272
|
+
|
|
273
|
+
```bash
|
|
274
|
+
pnpm test # full Node suite (251 tests)
|
|
275
|
+
pnpm test:workers # real workerd DO integration test
|
|
276
|
+
pnpm typecheck
|
|
277
|
+
pnpm lint
|
|
278
|
+
pnpm build
|
|
279
|
+
```
|
package/dist/agent.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as _tangle_network_agent_eval from '@tangle-network/agent-eval';
|
|
2
2
|
import { FindingSubject, TraceAnalystKindSpec, AnalystFinding, TraceStore, RunCompleteHook, FeedbackLabel, FeedbackTrajectoryStore } from '@tangle-network/agent-eval';
|
|
3
|
-
import { R as RuntimeStreamEvent } from './types-
|
|
3
|
+
import { R as RuntimeStreamEvent } from './types-CYxfw14J.js';
|
|
4
4
|
import { I as ImprovementAdapter, K as KnowledgeAdapter, a as RunAnalystLoopResult } from './types-D_MXrmJP.js';
|
|
5
5
|
|
|
6
6
|
/**
|