@tangle-network/agent-eval 0.45.0 → 0.47.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/http.d.ts +1 -1
- package/dist/adapters/http.js +11 -4
- package/dist/adapters/http.js.map +1 -1
- package/dist/adapters/langchain.d.ts +1 -1
- package/dist/campaign/index.d.ts +3 -3
- package/dist/chunk-ZQABFCVJ.js +85 -0
- package/dist/chunk-ZQABFCVJ.js.map +1 -0
- package/dist/contract/index.d.ts +217 -2
- package/dist/contract/index.js +206 -1
- package/dist/contract/index.js.map +1 -1
- package/dist/hosted/index.d.ts +192 -0
- package/dist/hosted/index.js +10 -0
- package/dist/hosted/index.js.map +1 -0
- package/dist/openapi.json +1 -1
- package/dist/rl.d.ts +1 -1
- package/dist/{run-improvement-loop-pJ4yrx4X.d.ts → run-improvement-loop-Bfam3MT1.d.ts} +2 -2
- package/dist/{types-BURGZ8Ug.d.ts → types-8u72Gc76.d.ts} +1 -1
- package/docs/design/external-agent-wedge.md +2 -2
- package/docs/design/phase-d-rfc.md +125 -0
- package/docs/hosted-ingest-spec.md +204 -0
- package/docs/phase-b-pairing-kit.md +188 -0
- package/docs/phase-b-runbook.md +176 -0
- package/docs/quickstart-external.md +43 -4
- package/package.json +6 -1
|
@@ -13,12 +13,51 @@ Tangle sandbox, no Tangle account, and no hosted infrastructure.
|
|
|
13
13
|
## Install
|
|
14
14
|
|
|
15
15
|
```sh
|
|
16
|
-
npm i @tangle-network/agent-eval@^0.
|
|
16
|
+
npm i @tangle-network/agent-eval@^0.46.0
|
|
17
17
|
```
|
|
18
18
|
|
|
19
|
-
The package's `@tangle-network/sandbox` peer is `optional
|
|
20
|
-
|
|
21
|
-
|
|
19
|
+
The package's `@tangle-network/sandbox` peer is `optional`. Foreign
|
|
20
|
+
consumers install agent-eval and run the full LAND tier without our
|
|
21
|
+
sandbox or its dependencies.
|
|
22
|
+
|
|
23
|
+
## The one-shot happy path
|
|
24
|
+
|
|
25
|
+
If you don't want to learn the substrate, the entire LAND tier reduces
|
|
26
|
+
to one function call:
|
|
27
|
+
|
|
28
|
+
```ts
|
|
29
|
+
import { selfImprove } from '@tangle-network/agent-eval/contract'
|
|
30
|
+
|
|
31
|
+
const result = await selfImprove({
|
|
32
|
+
agent: (surface, scenario, ctx) =>
|
|
33
|
+
runYourAgent({ systemPrompt: surface as string, scenario, signal: ctx.signal }),
|
|
34
|
+
scenarios,
|
|
35
|
+
judge,
|
|
36
|
+
baselineSurface: 'You are a senior copywriter…',
|
|
37
|
+
budget: { dollars: 10, generations: 3 },
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
console.log(`lift: ${result.lift.toFixed(3)} (${result.gateDecision})`)
|
|
41
|
+
if (result.gateDecision === 'ship') {
|
|
42
|
+
// result.winner.surface is the optimized prompt
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
That's the LAND happy path. Smart defaults pick: in-memory storage,
|
|
47
|
+
`gepaDriver` with copywriting-flavored mutation primitives,
|
|
48
|
+
`defaultProductionGate` with `deltaThreshold: 0.05`, 25% deterministic
|
|
49
|
+
train/holdout split.
|
|
50
|
+
|
|
51
|
+
Every escape hatch the substrate exposes is reachable from
|
|
52
|
+
`selfImprove` — custom `driver`, custom `gate`, distributed-driver
|
|
53
|
+
`cellPlacement`, `onProgress` streaming callback, `autoOnPromote: 'pr'`
|
|
54
|
+
to open a GitHub PR with the winner. See the type signatures in
|
|
55
|
+
[`src/contract/self-improve.ts`](../src/contract/self-improve.ts) for
|
|
56
|
+
the full surface.
|
|
57
|
+
|
|
58
|
+
The sections below are the lower-level path — useful when you want
|
|
59
|
+
fine-grained control over each piece. Read those next if `selfImprove`
|
|
60
|
+
isn't enough.
|
|
22
61
|
|
|
23
62
|
## Five types, four functions
|
|
24
63
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tangle-network/agent-eval",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.47.0",
|
|
4
4
|
"description": "Substrate for self-improving agents: traces, verifiable rewards, preferences, GEPA / reflective mutation, auto-research, replay, sequential anytime-valid stats, and release gates.",
|
|
5
5
|
"homepage": "https://github.com/tangle-network/agent-eval#readme",
|
|
6
6
|
"repository": {
|
|
@@ -119,6 +119,11 @@
|
|
|
119
119
|
"import": "./dist/adapters/http.js",
|
|
120
120
|
"default": "./dist/adapters/http.js"
|
|
121
121
|
},
|
|
122
|
+
"./hosted": {
|
|
123
|
+
"types": "./dist/hosted/index.d.ts",
|
|
124
|
+
"import": "./dist/hosted/index.js",
|
|
125
|
+
"default": "./dist/hosted/index.js"
|
|
126
|
+
},
|
|
122
127
|
"./openapi.json": {
|
|
123
128
|
"default": "./dist/openapi.json"
|
|
124
129
|
}
|