@tangle-network/agent-runtime 0.44.0 → 0.45.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -203
- package/dist/agent.d.ts +5 -4
- package/dist/agent.js +5 -7
- package/dist/agent.js.map +1 -1
- package/dist/analyst-loop.d.ts +65 -4
- package/dist/analyst-loop.js +6 -1
- package/dist/audit.d.ts +93 -0
- package/dist/audit.js +312 -0
- package/dist/audit.js.map +1 -0
- package/dist/chunk-4B6U4CVQ.js +15 -0
- package/dist/chunk-4B6U4CVQ.js.map +1 -0
- package/dist/chunk-FK53TXOP.js +603 -0
- package/dist/chunk-FK53TXOP.js.map +1 -0
- package/dist/{chunk-SKUZZCHE.js → chunk-IJ6FGOPO.js} +5 -5
- package/dist/chunk-IJ6FGOPO.js.map +1 -0
- package/dist/{chunk-HVYOHJHK.js → chunk-IJGS6J7X.js} +2 -2
- package/dist/chunk-IJGS6J7X.js.map +1 -0
- package/dist/chunk-KEWO4KI6.js +3599 -0
- package/dist/chunk-KEWO4KI6.js.map +1 -0
- package/dist/{chunk-NRZOXCJK.js → chunk-KSMX62JF.js} +2 -2
- package/dist/{chunk-GFKVVRQ7.js → chunk-NYN5RTLP.js} +11 -10
- package/dist/chunk-NYN5RTLP.js.map +1 -0
- package/dist/chunk-PRX45WE2.js +264 -0
- package/dist/chunk-PRX45WE2.js.map +1 -0
- package/dist/{chunk-3HMHSN22.js → chunk-QR4UUC5P.js} +6 -6
- package/dist/chunk-QR4UUC5P.js.map +1 -0
- package/dist/chunk-WIR4HOOJ.js +27 -0
- package/dist/chunk-WIR4HOOJ.js.map +1 -0
- package/dist/{chunk-KDMRUD2P.js → chunk-Z2QXVBA6.js} +296 -8
- package/dist/chunk-Z2QXVBA6.js.map +1 -0
- package/dist/coder-CczgMqFx.d.ts +114 -0
- package/dist/dynamic-BvllHV6M.d.ts +221 -0
- package/dist/{improvement-adapter-BC4HhuAR.d.ts → improvement-adapter-CWegd3vw.d.ts} +1 -1
- package/dist/improvement.d.ts +2 -3
- package/dist/improvement.js +0 -5
- package/dist/improvement.js.map +1 -1
- package/dist/index.d.ts +123 -10
- package/dist/index.js +398 -10
- package/dist/index.js.map +1 -1
- package/dist/{kb-gate-D0ZIhFOU.d.ts → kb-gate-D9GBocLN.d.ts} +82 -5
- package/dist/{loop-runner-bin-BLMa8He3.d.ts → loop-runner-bin-CPrCoKqC.d.ts} +14 -10
- package/dist/loop-runner-bin.d.ts +9 -7
- package/dist/loop-runner-bin.js +6 -8
- package/dist/loops.d.ts +7 -393
- package/dist/loops.js +94 -25
- package/dist/mcp/bin.js +7 -7
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +284 -11
- package/dist/mcp/index.js +341 -9
- package/dist/mcp/index.js.map +1 -1
- package/dist/{otel-export-wFDmmurL.d.ts → otel-export-Dy2DyUCU.d.ts} +1 -1
- package/dist/profiles.d.ts +385 -86
- package/dist/profiles.js +549 -4
- package/dist/profiles.js.map +1 -1
- package/dist/{run-loop-C4L1Sted.d.ts → run-loop--hSoIknW.d.ts} +35 -12
- package/dist/runtime-hooks-C7JwKb9E.d.ts +70 -0
- package/dist/runtime.d.ts +1860 -0
- package/dist/runtime.js +114 -0
- package/dist/runtime.js.map +1 -0
- package/dist/substrate-CUgk7F7s.d.ts +77 -0
- package/dist/topology.d.ts +73 -0
- package/dist/topology.js +111 -0
- package/dist/topology.js.map +1 -0
- package/dist/types-1HbsFa7H.d.ts +438 -0
- package/dist/{types-p8dWBIXL.d.ts → types-BtRLF2U3.d.ts} +1 -1
- package/dist/{types-DbJzz2uf.d.ts → types-DdzkffAm.d.ts} +95 -1
- package/dist/workflow.d.ts +3 -2
- package/dist/workflow.js +4 -5
- package/dist/workflow.js.map +1 -1
- package/package.json +26 -6
- package/skills/agent-runtime-adoption/SKILL.md +29 -26
- package/dist/chunk-3HMHSN22.js.map +0 -1
- package/dist/chunk-GFKVVRQ7.js.map +0 -1
- package/dist/chunk-HVYOHJHK.js.map +0 -1
- package/dist/chunk-KDMRUD2P.js.map +0 -1
- package/dist/chunk-PY6NMZYX.js +0 -52
- package/dist/chunk-PY6NMZYX.js.map +0 -1
- package/dist/chunk-S7JXV32P.js +0 -947
- package/dist/chunk-S7JXV32P.js.map +0 -1
- package/dist/chunk-SKUZZCHE.js.map +0 -1
- package/dist/chunk-SQSCRJ7U.js +0 -65
- package/dist/chunk-SQSCRJ7U.js.map +0 -1
- package/dist/chunk-VOX6Z3II.js +0 -90
- package/dist/chunk-VOX6Z3II.js.map +0 -1
- package/dist/chunk-XBUG326M.js +0 -261
- package/dist/chunk-XBUG326M.js.map +0 -1
- package/dist/dynamic-wUgp6UKs.d.ts +0 -108
- package/dist/optimize-prompt-D-urF2wW.d.ts +0 -129
- /package/dist/{chunk-NRZOXCJK.js.map → chunk-KSMX62JF.js.map} +0 -0
package/README.md
CHANGED
|
@@ -1,40 +1,25 @@
|
|
|
1
1
|
# @tangle-network/agent-runtime
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
The shared task-lifecycle skeleton for agents. It runs an agent (a chat turn, a one-shot task, or a multi-attempt loop), captures every run as a trace, and feeds those traces into eval-gated self-improvement.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
It owns the lifecycle and the loop kernel. It delegates domain behavior (models, tools, knowledge) to adapters, scoring and the ship gate to [`@tangle-network/agent-eval`](https://www.npmjs.com/package/@tangle-network/agent-eval), and sandboxed long-running execution to [`@tangle-network/sandbox`](https://www.npmjs.com/package/@tangle-network/sandbox).
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
8
|
pnpm add @tangle-network/agent-runtime @tangle-network/agent-eval @tangle-network/sandbox
|
|
9
9
|
```
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
- [2. Driven loops + topology drivers](#2-driven-loops--topology-drivers)
|
|
20
|
-
- [3. Agent-authored topology — `createDynamicDriver`](#3-agent-authored-topology--createdynamicdriver)
|
|
21
|
-
- [4. Delegated loop-runner — `runDelegatedLoop`](#4-delegated-loop-runner--rundelegatedloop)
|
|
22
|
-
- [5. Reliable build-in-a-loop — the coder delegate](#5-reliable-build-in-a-loop--the-coder-delegate)
|
|
23
|
-
- [6. Valid-only research — `createKbGate`](#6-valid-only-research--createkbgate)
|
|
24
|
-
- [7. Identity-gated prompt optimization — `optimizePrompt`](#7-identity-gated-prompt-optimization--optimizeprompt)
|
|
25
|
-
- [8. OpenTelemetry GenAI topology tracing](#8-opentelemetry-genai-topology-tracing)
|
|
26
|
-
- [9. MCP delegation server — `agent-runtime-mcp`](#9-mcp-delegation-server--agent-runtime-mcp)
|
|
27
|
-
- [Defaults](#defaults)
|
|
28
|
-
- [Composition with the stack](#composition-with-the-stack)
|
|
29
|
-
- [Subpath exports](#subpath-exports)
|
|
30
|
-
- [Adoption skill](#adoption-skill)
|
|
31
|
-
- [Stability · Tests · Docs](#stability--tests--docs)
|
|
32
|
-
|
|
33
|
-
---
|
|
11
|
+
## The model
|
|
12
|
+
|
|
13
|
+
One recursive `Agent` atom, run at two timescales, over many tasks. `docs/architecture.md` is the canonical spine. The short version:
|
|
14
|
+
|
|
15
|
+
1. **One atom.** `driver`, `worker`, `selector`, and `coordinator` are not separate types. They are what a single `Agent` returns from `act`. The recursion bottoms out at execution.
|
|
16
|
+
2. **Two timescales, one machinery.** The same loop runs at inference time (steer a worker over k attempts) and at optimization time (search the steer or the prompt with GEPA, gated on a held-out split).
|
|
17
|
+
3. **A benchmark is an adapter.** A new task is a loader plus a worker plus a judge. The loop, the drivers, the corpus, and the selector are the shared spine, written once.
|
|
18
|
+
4. **The selector is not the judge.** At inference time the selector picks which answer to return without seeing the judge's verdict. The judge is write-only. A steer may read the trace but never the verdict (the firewall that keeps the loop from gaming its own score).
|
|
34
19
|
|
|
35
20
|
## Getting started
|
|
36
21
|
|
|
37
|
-
Every product agent is a `handleChatTurn` call inside a route. This is what gtm
|
|
22
|
+
Every product agent is a `handleChatTurn` call inside a route. This is what the gtm, creative, legal, and tax products run in production:
|
|
38
23
|
|
|
39
24
|
```ts
|
|
40
25
|
import { handleChatTurn } from '@tangle-network/agent-runtime'
|
|
@@ -59,181 +44,95 @@ export async function POST({ request, env, ctx }: { request: Request; env: Env;
|
|
|
59
44
|
}
|
|
60
45
|
```
|
|
61
46
|
|
|
62
|
-
That
|
|
63
|
-
|
|
64
|
-
---
|
|
47
|
+
That is the common case. Everything below is for when one chat turn is not enough: multi-attempt loops, delegation, optimization, and the telemetry that makes them auditable.
|
|
65
48
|
|
|
66
49
|
## Which entry point do I reach for?
|
|
67
50
|
|
|
68
|
-
| You want to
|
|
51
|
+
| You want to | Reach for | Subpath |
|
|
69
52
|
|---|---|---|
|
|
70
|
-
| Run a production chat turn (
|
|
71
|
-
| Declare an agent (profile
|
|
72
|
-
|
|
|
73
|
-
|
|
|
74
|
-
| Let the
|
|
75
|
-
| Delegate a disciplined loop by mode (code
|
|
53
|
+
| Run a production chat turn (most products) | `handleChatTurn` | root |
|
|
54
|
+
| Declare an agent (profile, surfaces, adapters) | `defineAgent` | `/agent` |
|
|
55
|
+
| Run a one-shot task with verification and eval | `runAgentTask` | root |
|
|
56
|
+
| Run a multi-attempt loop (refine or fanout-vote) | `runLoop` plus a driver | `/loops` |
|
|
57
|
+
| Let the agent choose the loop shape per round | `createDynamicDriver` plus `createSandboxPlanner` | `/loops` |
|
|
58
|
+
| Delegate a disciplined loop by mode (code, research, ...) | `runDelegatedLoop` or `agent-runtime-loop` | root |
|
|
76
59
|
| Build code reliably (reviewed, gated) | `createDefaultCoderDelegate` | `/mcp` |
|
|
77
|
-
| Grow a
|
|
78
|
-
| Improve a prompt safely (identity-gated) | `
|
|
79
|
-
| Ship loop traces to a GenAI viewer | `buildLoopOtelSpans`
|
|
80
|
-
| Expose delegation as MCP tools to a sandbox agent | `createMcpServer`
|
|
60
|
+
| Grow a knowledge base with only grounded facts | `createKbGate` | `/mcp` |
|
|
61
|
+
| Improve a prompt safely (identity-gated) | `selfImprove` | `@tangle-network/agent-eval/contract` |
|
|
62
|
+
| Ship loop traces to a GenAI viewer | `buildLoopOtelSpans` plus `createOtelExporter` | root |
|
|
63
|
+
| Expose delegation as MCP tools to a sandbox agent | `createMcpServer` or `agent-runtime-mcp` | `/mcp` |
|
|
81
64
|
| Mutate surfaces from trace findings | `runAnalystLoop` | `/analyst-loop` |
|
|
82
|
-
| Persist a run
|
|
83
|
-
|
|
84
|
-
---
|
|
85
|
-
|
|
86
|
-
## Capabilities
|
|
87
|
-
|
|
88
|
-
### 1. Chat turns — `handleChatTurn`
|
|
89
|
-
|
|
90
|
-
The production turn envelope: frames a producer with the `session.run.*` NDJSON protocol, the persist → post-process → trace-flush hook order, and a stable execution id for client-retry replay. See [Getting started](#getting-started) and [`examples/chat-handler/`](./examples/chat-handler/).
|
|
65
|
+
| Persist a run plus its cost ledger | `startRuntimeRun` | root |
|
|
91
66
|
|
|
92
|
-
|
|
67
|
+
## The loop kernel
|
|
93
68
|
|
|
94
|
-
`runLoop` is a topology-agnostic kernel
|
|
69
|
+
`runLoop` is a topology-agnostic kernel. Each iteration spawns a sandbox on an `AgentRunSpec`, decodes the output, validates it, and asks a driver what to do next. The driver owns topology. The validator owns scoring. The kernel owns iteration accounting, concurrency, cost and token aggregation, and trace emission.
|
|
95
70
|
|
|
96
71
|
```ts
|
|
97
72
|
import { runLoop, createFanoutVoteDriver } from '@tangle-network/agent-runtime/loops'
|
|
98
73
|
|
|
99
74
|
const result = await runLoop({
|
|
100
|
-
driver: createFanoutVoteDriver({ n: 3 }),
|
|
101
|
-
agentRuns: [claudeSpec, codexSpec, glmSpec],
|
|
102
|
-
output,
|
|
103
|
-
validator,
|
|
75
|
+
driver: createFanoutVoteDriver({ n: 3 }), // 3 parallel attempts, pick the best valid one
|
|
76
|
+
agentRuns: [claudeSpec, codexSpec, glmSpec], // heterogeneous: one harness per branch
|
|
77
|
+
output, // events to typed Output
|
|
78
|
+
validator, // Output to { valid, score }
|
|
104
79
|
task,
|
|
105
80
|
ctx: { sandboxClient: sandbox },
|
|
106
81
|
})
|
|
107
82
|
result.winner // highest-scoring valid attempt
|
|
108
83
|
```
|
|
109
84
|
|
|
110
|
-
Shipped drivers (`/loops/drivers`):
|
|
85
|
+
Shipped drivers (`/loops/drivers`): `createRefineDriver` (single task, iterate until valid), `createFanoutVoteDriver` (N parallel, vote), and `createDynamicDriver` (the agent authors the topology at runtime). The dynamic driver emits one `TopologyMove` per round (`refine`, `fanout`, or `stop`) from an injected planner; a malformed move throws `PlannerError`, so the loop never runs a topology nobody chose. Topology is orthogonal to harness: the planner never names a backend, and the kernel's `agentRuns` decide which harness runs each branch.
|
|
111
86
|
|
|
112
|
-
|
|
87
|
+
`runProgram` (also in `/loops`) is the recursive op-set (`sample`, `steer`, `fork`, `parallel`, `select`, `seq`, `stop`) plus a tree executor, for programs that compose sub-loops.
|
|
113
88
|
|
|
114
|
-
|
|
89
|
+
## Self-improvement
|
|
115
90
|
|
|
116
|
-
|
|
117
|
-
import { runLoop, createDynamicDriver, createSandboxPlanner } from '@tangle-network/agent-runtime/loops'
|
|
91
|
+
The same machinery, run at the optimization timescale.
|
|
118
92
|
|
|
119
|
-
|
|
120
|
-
client: sandbox,
|
|
121
|
-
profile: { name: 'planner', metadata: { backendType: 'claude-code' } }, // cheap model is fine
|
|
122
|
-
decodeTask: (raw) => raw as Task,
|
|
123
|
-
})
|
|
93
|
+
The one entry point is agent-eval's **`selfImprove`** (`@tangle-network/agent-eval/contract`). It runs a closed loop over any text/config surface, identity-gated by construction: it evaluates, proposes candidates (default `gepaDriver`), and a held-out gate ships a winner only if it beats the baseline. `result.winner.surface` is the baseline unless `result.gateDecision === 'ship'`, so registering a surface for optimization can never regress it.
|
|
124
94
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
95
|
+
```ts
|
|
96
|
+
import { selfImprove } from '@tangle-network/agent-eval/contract'
|
|
97
|
+
|
|
98
|
+
const result = await selfImprove({
|
|
99
|
+
baselineSurface: CURRENT_SYSTEM_PROMPT,
|
|
100
|
+
agent: (surface, scenario, ctx) => runYourThing(surface, scenario),
|
|
101
|
+
scenarios,
|
|
102
|
+
judge,
|
|
103
|
+
budget: { holdoutScenarios, generations: 3 },
|
|
104
|
+
llm: { baseUrl, apiKey, model: 'claude-sonnet-4-6' },
|
|
130
105
|
})
|
|
106
|
+
// result.winner.surface is the safe one — the baseline unless gateDecision === 'ship'
|
|
131
107
|
```
|
|
132
108
|
|
|
133
|
-
|
|
109
|
+
agent-runtime contributes the runtime-specific piece: the **CODE-surface `improvementDriver`** (`/improvement`) — a git-worktree mutator you pass to `selfImprove` as `driver` to optimize code instead of a string.
|
|
110
|
+
|
|
111
|
+
`runAnalystLoop` (`/analyst-loop`) mines real run traces into findings; `createAnalystDriverHook` feeds those findings to a dynamic-driver planner via `PlannerContext.analyses`, with a firewall (`assertTraceDerivedFindings`) that rejects any finding derived from a judge verdict. Production intake — turning real run traces into the corpus `selfImprove` optimizes against — is agent-eval's `analyzeRuns` / `partitionRunsByAuthoringModel` (`/contract`).
|
|
134
112
|
|
|
135
|
-
|
|
113
|
+
## Delegated loops
|
|
136
114
|
|
|
137
|
-
|
|
115
|
+
`runDelegatedLoop` is one entrypoint a worker agent or a scheduled routine calls to run a disciplined loop in a chosen mode, over the hardened engines below. It fails loud on an unwired mode; a thrown engine is captured as `{ ok: false }`, so unattended runs record rather than crash.
|
|
138
116
|
|
|
139
117
|
```ts
|
|
140
|
-
import {
|
|
141
|
-
runDelegatedLoop, coderLoopRunner, researchLoopRunner, type DelegatedLoopRegistry,
|
|
142
|
-
} from '@tangle-network/agent-runtime'
|
|
118
|
+
import { runDelegatedLoop, coderLoopRunner, researchLoopRunner, type DelegatedLoopRegistry } from '@tangle-network/agent-runtime'
|
|
143
119
|
|
|
144
120
|
const registry: DelegatedLoopRegistry = {
|
|
145
|
-
code: coderLoopRunner({
|
|
146
|
-
sandboxClient,
|
|
147
|
-
args: { goal: 'fix the flaky retry test', repoRoot: '/repo' },
|
|
148
|
-
reviewer, // optional adversarial gate
|
|
149
|
-
winnerSelection: 'smallest-diff',
|
|
150
|
-
}),
|
|
121
|
+
code: coderLoopRunner({ sandboxClient, args: { goal: 'fix the flaky retry test', repoRoot: '/repo' }, reviewer, winnerSelection: 'smallest-diff' }),
|
|
151
122
|
research: researchLoopRunner({ research, gate: { selfArtifactKinds: ['spec'] }, maxRounds: 3 }),
|
|
152
123
|
}
|
|
153
|
-
|
|
154
124
|
const result = await runDelegatedLoop('code', registry)
|
|
155
|
-
// → { mode: 'code', ok: true, output: CoderOutput, durationMs }
|
|
156
125
|
```
|
|
157
126
|
|
|
158
|
-
Modes: `code
|
|
159
|
-
|
|
160
|
-
**Schedulable**: the `agent-runtime-loop` bin runs it from a cron/routine. The config module wires the registry (with full env/creds access):
|
|
127
|
+
Modes: `code`, `review`, `research`, `audit`, `self-improve`, `dynamic`. The `agent-runtime-loop` bin runs the registry from a cron or routine and exits 0 (ok), 1 (recorded failure), or 2 (usage or config error).
|
|
161
128
|
|
|
162
|
-
|
|
163
|
-
agent-runtime-loop --mode research --config ./loops.config.js
|
|
164
|
-
# exits 0 (ok) · 1 (recorded failure) · 2 (usage/config error); prints the result as JSON
|
|
165
|
-
```
|
|
166
|
-
|
|
167
|
-
```ts
|
|
168
|
-
// loops.config.js — default-exports a DelegatedLoopRegistry (or a factory)
|
|
169
|
-
import { researchLoopRunner } from '@tangle-network/agent-runtime'
|
|
170
|
-
export default { research: researchLoopRunner({ research: myResearchEngine, maxRounds: 3 }) }
|
|
171
|
-
```
|
|
129
|
+
The coder delegate (`createDefaultCoderDelegate`, `/mcp`) has default-on safety gates: no-op rejection (an empty patch cannot pass trivially), an always-on secret-path floor (`.env`, keys, wallets), an optional `reviewer` gate, and a `winnerSelection` policy (`highest-score`, `smallest-diff`, `highest-readiness`, `first-approved`).
|
|
172
130
|
|
|
173
|
-
|
|
131
|
+
The knowledge-base gate (`createKbGate`, `/mcp`) is fail-closed: a fact's `verbatimPassage` must appear in its `sourceText`, the asserted value must be in the passage, and citations cannot point at self-generated artifacts. `researchLoopRunner` wraps it with a correct-on-veto loop that re-researches the vetoed gaps up to `maxRounds`, then returns the unverified ones rather than dropping them.
|
|
174
132
|
|
|
175
|
-
|
|
133
|
+
## Tracing
|
|
176
134
|
|
|
177
|
-
|
|
178
|
-
- **secret-path floor** — always-on, independent of `forbiddenPaths` (`.env`, keys, wallets, …),
|
|
179
|
-
- optional **`reviewer`** gate — a candidate must pass tests/typecheck **and** be approved to win,
|
|
180
|
-
- **`winnerSelection`** — `highest-score` (default) · `smallest-diff` · `highest-readiness` · `first-approved`.
|
|
181
|
-
|
|
182
|
-
```ts
|
|
183
|
-
import { createDefaultCoderDelegate } from '@tangle-network/agent-runtime/mcp'
|
|
184
|
-
|
|
185
|
-
const coder = createDefaultCoderDelegate({
|
|
186
|
-
sandboxClient,
|
|
187
|
-
fanoutHarnesses: ['claude-code', 'codex'],
|
|
188
|
-
reviewer: async (output, task) => ({ approved: output.testResult.passed, recommendation: 'ship', readiness: 0.9 }),
|
|
189
|
-
winnerSelection: 'highest-readiness',
|
|
190
|
-
})
|
|
191
|
-
const out = await coder({ goal: 'add a retry with backoff', repoRoot: '/repo', variants: 2 }, ctx)
|
|
192
|
-
```
|
|
193
|
-
|
|
194
|
-
See [`examples/coder-loop/`](./examples/coder-loop/) and [`examples/agent-into-reviewer/`](./examples/agent-into-reviewer/).
|
|
195
|
-
|
|
196
|
-
### 6. Valid-only research — `createKbGate`
|
|
197
|
-
|
|
198
|
-
A fail-closed gate so a knowledge base grows with **only grounded facts**. The always-on floor: a fact's `verbatimPassage` must literally appear in its `sourceText` (anti-hallucination), the asserted value must be in the passage, and citations can't point at self-generated artifacts (laundering). Plug in your own judges; verdict-only (remediation is yours).
|
|
199
|
-
|
|
200
|
-
```ts
|
|
201
|
-
import { createKbGate } from '@tangle-network/agent-runtime/mcp'
|
|
202
|
-
|
|
203
|
-
const gate = createKbGate({ selfArtifactKinds: ['spec', 'cad_params'] })
|
|
204
|
-
const verdict = await gate({
|
|
205
|
-
claim: 'revenue was $1.2B in 2025',
|
|
206
|
-
value: 1_200_000_000,
|
|
207
|
-
verbatimPassage: 'total revenue was $1,200,000,000 for the fiscal year',
|
|
208
|
-
sourceText: rawSource,
|
|
209
|
-
})
|
|
210
|
-
if (verdict.accepted) writeToKb(fact)
|
|
211
|
-
else console.warn('vetoed by', verdict.vetoedBy, verdict.reason)
|
|
212
|
-
```
|
|
213
|
-
|
|
214
|
-
`researchLoopRunner` (mode `research`) wraps this with a correct-on-veto remediation loop: research → gate → re-research the vetoed gaps up to `maxRounds`, then **return** the unverified ones (escalate, never silently drop).
|
|
215
|
-
|
|
216
|
-
### 7. Identity-gated prompt optimization — `optimizePrompt`
|
|
217
|
-
|
|
218
|
-
Optimize any text prompt over agent-eval's `runImprovementLoop`, **identity-gated by construction**: it runs evals, proposes candidates (default `gepaDriver`), and the held-out gate compares candidate vs baseline. `result.prompt` is the **baseline unless the gate decided `ship`** — so registering a prompt for optimization can never regress it.
|
|
219
|
-
|
|
220
|
-
```ts
|
|
221
|
-
import { optimizePrompt } from '@tangle-network/agent-runtime/improvement'
|
|
222
|
-
|
|
223
|
-
const { prompt, improved, delta } = await optimizePrompt({
|
|
224
|
-
baselinePrompt: CURRENT_SYSTEM_PROMPT,
|
|
225
|
-
runWithPrompt: (candidate, scenario, ctx) => runYourThing(candidate, scenario),
|
|
226
|
-
scenarios, holdoutScenarios, judges, runDir,
|
|
227
|
-
reflection: { llm, model: 'claude-sonnet-4-6' },
|
|
228
|
-
})
|
|
229
|
-
// assign `prompt` unconditionally — it's the safe one
|
|
230
|
-
```
|
|
231
|
-
|
|
232
|
-
See [`examples/self-improving-loop/`](./examples/self-improving-loop/).
|
|
233
|
-
|
|
234
|
-
### 8. OpenTelemetry GenAI topology tracing
|
|
235
|
-
|
|
236
|
-
`runLoop` emits a structured event stream; `buildLoopOtelSpans` turns it into a **nested, real-duration span tree** that any GenAI trace viewer (Phoenix, Langfuse, Grafana Tempo, Tangle Intelligence) renders natively. Attributes follow the current GenAI semantic conventions (`gen_ai.operation.name`, `gen_ai.agent.name`, `gen_ai.usage.input_tokens/output_tokens`) plus a `tangle.loop.*` extension for the topology (move kind/rationale, edge lineage, verdict, placement, cost).
|
|
135
|
+
`runLoop` emits a structured event stream. `buildLoopOtelSpans` turns it into a nested, real-duration span tree that any GenAI trace viewer (Phoenix, Langfuse, Grafana Tempo, Tangle Intelligence) renders natively. Attributes follow the current GenAI semantic conventions (`gen_ai.operation.name`, `gen_ai.agent.name`, `gen_ai.usage.input_tokens`, `gen_ai.usage.output_tokens`) plus a `tangle.loop.*` extension for the topology (move kind and rationale, edge lineage, verdict, placement, cost).
|
|
237
136
|
|
|
238
137
|
```ts
|
|
239
138
|
import { buildLoopOtelSpans, createOtelExporter } from '@tangle-network/agent-runtime'
|
|
@@ -243,94 +142,87 @@ for (const span of buildLoopOtelSpans(loopEvents, traceId)) exporter?.exportSpan
|
|
|
243
142
|
await exporter?.flush()
|
|
244
143
|
```
|
|
245
144
|
|
|
246
|
-
The shape: `loop
|
|
145
|
+
The shape: `loop` to `loop.round` (move plus rationale) to `loop.iteration` (agent, usage, verdict, cost, parent edge).
|
|
247
146
|
|
|
248
|
-
|
|
147
|
+
## MCP delegation server
|
|
249
148
|
|
|
250
|
-
Expose the
|
|
149
|
+
Expose the delegation tools (`delegate_code`, `delegate_research`, `delegate_feedback`, `delegation_status`, `delegation_history`) to a sandbox coding agent. Mount the canonical server instead of forking delegation logic.
|
|
251
150
|
|
|
252
151
|
```ts
|
|
253
152
|
import { createMcpServer, createDefaultCoderDelegate } from '@tangle-network/agent-runtime/mcp'
|
|
254
153
|
|
|
255
|
-
const server = createMcpServer({
|
|
256
|
-
coderDelegate: createDefaultCoderDelegate({ sandboxClient }),
|
|
257
|
-
researcherDelegate, // wire your KB-backed researcher
|
|
258
|
-
})
|
|
154
|
+
const server = createMcpServer({ coderDelegate: createDefaultCoderDelegate({ sandboxClient }), researcherDelegate })
|
|
259
155
|
```
|
|
260
156
|
|
|
261
|
-
Or mount the `agent-runtime-mcp` stdio bin on a production `AgentProfile.mcp`.
|
|
157
|
+
Or mount the `agent-runtime-mcp` stdio bin on a production `AgentProfile.mcp`.
|
|
262
158
|
|
|
263
|
-
|
|
159
|
+
## The experiment harness (bench/)
|
|
264
160
|
|
|
265
|
-
|
|
161
|
+
`bench/` is the internal harness that asks the binding empirical question: does any non-blind topology beat blind compute at equal k, under a deployable (non-oracle) selector, on a real benchmark? It runs through the same kernel, not a reimplementation.
|
|
266
162
|
|
|
267
|
-
|
|
163
|
+
One entrypoint, `runExperiment(adapter, { sandboxClient, agentRun, arms, ... })`: N instances times a set of arms, each arm a topology driven through `runLoop`, judged by the adapter, written to a durable canonical corpus. An arm is one steer function `f(rootPrompt, history) => nextPrompt`: `random` ignores history (the compute control), `refine` carries the prior answer plus a directive, `diverse` rotates a strategy lens. The cost dial is the backend type (`hermes` for a direct router call, `opencode` or `claude-code` or `codex` for agent CLIs). The deep statistics (paired bootstrap with Benjamini-Hochberg correction, selector replay) come from `corpus-report.mts` and `corpus-replay.mts` over the written corpus, computed once. See `bench/HARNESS.md` and `docs/learning-flywheel.md`.
|
|
164
|
+
|
|
165
|
+
## Defaults
|
|
268
166
|
|
|
269
167
|
| Knob | Default | Override |
|
|
270
168
|
|---|---|---|
|
|
271
|
-
| Backend model | `gpt-4o-mini` (via `createOpenAICompatibleBackend`) | `model` option
|
|
169
|
+
| Backend model | `gpt-4o-mini` (via `createOpenAICompatibleBackend`) | `model` option or `MODEL_NAME` env |
|
|
272
170
|
| Backend provider | `openai-compat` when `TANGLE_API_KEY`, else `openai` if `OPENAI_API_KEY` | `MODEL_PROVIDER` env |
|
|
273
171
|
| Router base URL | `https://router.tangle.tools/v1` | `TANGLE_ROUTER_BASE_URL` env |
|
|
274
172
|
| Sandbox base URL | `https://sandbox.tangle.tools` | `SANDBOX_API_URL` env |
|
|
275
|
-
| Loop iteration cap | 10 (`runLoop`)
|
|
276
|
-
| Driver | none
|
|
173
|
+
| Loop iteration cap | 10 (`runLoop`), 8 (dynamic driver) | `runLoop({ maxIterations })` |
|
|
174
|
+
| Driver | none, required by `runLoop` | `createRefineDriver`, `createFanoutVoteDriver`, `createDynamicDriver` |
|
|
277
175
|
| Winner selection (coder delegate) | `highest-score` | `winnerSelection` option |
|
|
278
176
|
| KB gate min passage | 12 chars | `createKbGate({ minPassageChars })` |
|
|
279
|
-
| `
|
|
177
|
+
| `selfImprove` gate | held-out gate (default) | pass `gate: defaultProductionGate` for red-team hardening |
|
|
280
178
|
| OTEL export | off | set `OTEL_EXPORTER_OTLP_ENDPOINT` |
|
|
281
179
|
| Loop-runner mode failure | recorded as `{ ok: false }` | `runDelegatedLoop` never crashes on a thrown engine |
|
|
282
180
|
|
|
283
|
-
---
|
|
284
|
-
|
|
285
181
|
## Composition with the stack
|
|
286
182
|
|
|
287
183
|
```
|
|
288
|
-
agent-runtime
|
|
289
|
-
|
|
184
|
+
agent-runtime handleChatTurn, runLoop + drivers, runProgram, runDelegatedLoop, createMcpServer,
|
|
185
|
+
improvementDriver, createKbGate, buildLoopOtelSpans, defineAgent
|
|
290
186
|
|
|
291
|
-
agent-eval
|
|
292
|
-
|
|
187
|
+
agent-eval selfImprove (the optimization entry point), runEvalCampaign,
|
|
188
|
+
runImprovementLoop (gepaDriver), heldOutGate, runAgentMatrix, analyzeRuns.
|
|
189
|
+
Consumes runtime traces, scores, gates promotion. agent-runtime depends on it,
|
|
190
|
+
never the reverse.
|
|
293
191
|
|
|
294
|
-
agent-knowledge
|
|
295
|
-
|
|
192
|
+
agent-knowledge proposeKnowledgeWrites, applyKnowledgeWriteBlocks. The analyst loop produces
|
|
193
|
+
these; the runtime and createKbGate consume them.
|
|
296
194
|
|
|
297
|
-
sandbox
|
|
298
|
-
|
|
195
|
+
sandbox AgentProfile, Sandbox.create, streamPrompt, exportTraceBundle. The harness
|
|
196
|
+
execution surface every loop runs on.
|
|
299
197
|
```
|
|
300
198
|
|
|
301
|
-
---
|
|
302
|
-
|
|
303
199
|
## Subpath exports
|
|
304
200
|
|
|
305
201
|
| Import | Owns |
|
|
306
202
|
|---|---|
|
|
307
203
|
| `@tangle-network/agent-runtime` | chat turns, delegated loop-runner, OTEL export, errors, model resolution |
|
|
308
|
-
|
|
|
309
|
-
|
|
|
310
|
-
|
|
|
311
|
-
|
|
|
312
|
-
|
|
|
313
|
-
|
|
|
314
|
-
|
|
|
315
|
-
|
|
316
|
-
Bins: `agent-runtime-mcp` (delegation MCP server) · `agent-runtime-loop` (schedulable delegated loop-runner).
|
|
204
|
+
| `.../agent` | `defineAgent` plus surface and outcome adapters |
|
|
205
|
+
| `.../loops` | the `runLoop` kernel, the `refine` / `fanout-vote` / `dynamic` drivers, `runProgram`, `loopDispatch` |
|
|
206
|
+
| `.../profiles` | `coderProfile`, `researcherProfile` presets |
|
|
207
|
+
| `.../mcp` | `createMcpServer`, `createDefaultCoderDelegate`, `createKbGate`, the `agent-runtime-mcp` bin |
|
|
208
|
+
| `.../improvement` | `improvementDriver` (code/worktree `CandidateGenerator`), `agenticGenerator`, `reflectiveGenerator` — the code-surface driver you pass to agent-eval's `selfImprove` |
|
|
209
|
+
| `.../analyst-loop` | `runAnalystLoop`, the analyst registry driver |
|
|
210
|
+
| `.../platform` | cross-site SSO and the integrations hub |
|
|
317
211
|
|
|
318
|
-
|
|
212
|
+
Bins: `agent-runtime-mcp` (delegation MCP server), `agent-runtime-loop` (schedulable delegated loop-runner).
|
|
319
213
|
|
|
320
214
|
## Adoption skill
|
|
321
215
|
|
|
322
|
-
This package ships a
|
|
323
|
-
|
|
324
|
-
---
|
|
216
|
+
This package ships a self-contained adoption skill at [`skills/agent-runtime-adoption/SKILL.md`](./skills/agent-runtime-adoption/SKILL.md): driven loops, topology drivers, the `loopDispatch` campaign bridge, MCP delegation, and the code-surface `improvementDriver` for agent-eval's `selfImprove`. It needs only this package plus `@tangle-network/agent-eval`. For the full self-improving pipeline (trace sink, analyst loop, scorecard, production loop, CI), see the `agent-eval-adoption` and `agent-stack-adoption` skills.
|
|
325
217
|
|
|
326
|
-
## Stability
|
|
218
|
+
## Stability, tests, docs
|
|
327
219
|
|
|
328
|
-
Every public export is annotated `@stable` or `@experimental`. `@stable` exports
|
|
220
|
+
Every public export is annotated `@stable` or `@experimental`. `@stable` exports do not change shape inside a minor version; `@experimental` ones may, and require a deliberate consumer bump.
|
|
329
221
|
|
|
330
222
|
```bash
|
|
331
|
-
pnpm test #
|
|
223
|
+
pnpm test # kernel, drivers, MCP, delegate hardening, kb-gate, loop-runner, backends
|
|
332
224
|
pnpm typecheck
|
|
333
225
|
pnpm build
|
|
334
226
|
```
|
|
335
227
|
|
|
336
|
-
Deeper docs: [`docs/concepts.md`](./docs/concepts.md) (mental model)
|
|
228
|
+
Deeper docs: [`docs/architecture.md`](./docs/architecture.md) (the canonical spine), [`docs/learning-flywheel.md`](./docs/learning-flywheel.md) (the self-improvement thesis and the open gate), [`docs/concepts.md`](./docs/concepts.md) (mental model), [`docs/agent-bus-protocol.md`](./docs/agent-bus-protocol.md) (cross-gateway header contract), [`docs/conversation-economics.md`](./docs/conversation-economics.md) (who pays), [`docs/durability-adapters.md`](./docs/durability-adapters.md) (SQL-backed `ConversationJournal`).
|
package/dist/agent.d.ts
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import * as _tangle_network_agent_eval from '@tangle-network/agent-eval';
|
|
2
2
|
import { TraceAnalystKindSpec, AnalystFinding } from '@tangle-network/agent-eval';
|
|
3
|
-
import { R as RuntimeStreamEvent,
|
|
4
|
-
import { A as AgentSurfaces } from './improvement-adapter-
|
|
5
|
-
export { C as CreateSurfaceImprovementAdapterOpts, D as DraftPatchInput, a as DraftPatchOutput, R as ResolvedSurface, S as SurfaceImprovementEdit, b as SurfaceValidationIssue, c as createSurfaceImprovementAdapter, r as renderSurfaceIssues, d as resolveSubjectPath, v as validateSurfaces } from './improvement-adapter-
|
|
6
|
-
import { K as KnowledgeAdapter, a as RunAnalystLoopResult } from './types-
|
|
3
|
+
import { R as RuntimeStreamEvent, b as LoopSandboxClient, O as OutputAdapter, A as AgentRunSpec } from './types-DdzkffAm.js';
|
|
4
|
+
import { A as AgentSurfaces } from './improvement-adapter-CWegd3vw.js';
|
|
5
|
+
export { C as CreateSurfaceImprovementAdapterOpts, D as DraftPatchInput, a as DraftPatchOutput, R as ResolvedSurface, S as SurfaceImprovementEdit, b as SurfaceValidationIssue, c as createSurfaceImprovementAdapter, r as renderSurfaceIssues, d as resolveSubjectPath, v as validateSurfaces } from './improvement-adapter-CWegd3vw.js';
|
|
6
|
+
import { K as KnowledgeAdapter, a as RunAnalystLoopResult } from './types-BtRLF2U3.js';
|
|
7
7
|
import { AgentProfile, SandboxEvent } from '@tangle-network/sandbox';
|
|
8
8
|
import { C as ComposeProductionAgentProfileOptions } from './delegation-profile-1GbW5yA3.js';
|
|
9
|
+
import './runtime-hooks-C7JwKb9E.js';
|
|
9
10
|
|
|
10
11
|
/**
|
|
11
12
|
* The full agent manifest. Each agent ships ONE of these.
|
package/dist/agent.js
CHANGED
|
@@ -2,11 +2,11 @@ import {
|
|
|
2
2
|
composeProductionAgentProfile
|
|
3
3
|
} from "./chunk-7JITYN6T.js";
|
|
4
4
|
import {
|
|
5
|
-
createSandboxForSpec
|
|
5
|
+
createSandboxForSpec
|
|
6
|
+
} from "./chunk-KEWO4KI6.js";
|
|
7
|
+
import {
|
|
6
8
|
mapSandboxEvent
|
|
7
|
-
} from "./chunk-
|
|
8
|
-
import "./chunk-PY6NMZYX.js";
|
|
9
|
-
import "./chunk-SQSCRJ7U.js";
|
|
9
|
+
} from "./chunk-PRX45WE2.js";
|
|
10
10
|
import {
|
|
11
11
|
__require
|
|
12
12
|
} from "./chunk-DGUM43GV.js";
|
|
@@ -193,9 +193,7 @@ function defineAgent(manifest) {
|
|
|
193
193
|
// src/agent/improvement-adapter.ts
|
|
194
194
|
import { spawnSync } from "child_process";
|
|
195
195
|
import { readFileSync } from "fs";
|
|
196
|
-
import {
|
|
197
|
-
parseFindingSubject
|
|
198
|
-
} from "@tangle-network/agent-eval";
|
|
196
|
+
import { parseFindingSubject } from "@tangle-network/agent-eval/analyst";
|
|
199
197
|
var DEFAULT_CREATE_KINDS = [
|
|
200
198
|
"knowledge.wiki",
|
|
201
199
|
"knowledge.claim",
|