agenticow 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +227 -0
- package/bench/acceptance.js +268 -0
- package/bench/bench.js +238 -0
- package/bin/agenticow.js +284 -0
- package/examples/parallel-agents.mjs +64 -0
- package/package.json +68 -0
- package/src/index.d.ts +99 -0
- package/src/index.js +465 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 ruvnet
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# agenticow — Git for Agent Memory: Copy-On-Write vector branching (83× faster, 3000× smaller snapshots)
|
|
4
|
+
|
|
5
|
+
**Branch a base vector memory in ~0.5 ms / 162 bytes — independent of base size.** Exact read-through queries (parent ∪ edits, child wins). Built for embedded multi-agent memory.
|
|
6
|
+
|
|
7
|
+
[](https://www.npmjs.com/package/agenticow)
|
|
8
|
+
[](./LICENSE)
|
|
9
|
+
[](./test)
|
|
10
|
+
[](#acceptance-the-1000-branch-proof)
|
|
11
|
+
|
|
12
|
+
**[Website / Demo →](https://ruvnet.github.io/agenticow/)** · **[npm](https://www.npmjs.com/package/agenticow)** · **[Benchmarks](#benchmarks)** · **[Acceptance proof](#acceptance-the-1000-branch-proof)**
|
|
13
|
+
|
|
14
|
+

|
|
15
|
+
|
|
16
|
+
</div>
|
|
17
|
+
|
|
18
|
+
> **agenticow turns memory from a static database into a branchable runtime primitive for agents.**
|
|
19
|
+
|
|
20
|
+
Every other vector store makes you **full-copy** the index to snapshot, fork, or checkpoint it. `agenticow` **branches** it — copy-on-write, like Git. Creating a branch costs ~0.5 ms and 162 bytes whether the base holds 10,000 or 1,000,000 vectors. Query a branch and you transparently see `parent ∪ your edits`, with the child winning on id collisions and deletes honored.
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
npm install agenticow
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Why
|
|
29
|
+
|
|
30
|
+
Agents need memory that branches: a per-user personalization layer, a sandbox to test a risky ingest, a checkpoint before a tool call, a thousand parallel experiments off one shared base. With a normal vector DB each of those is a **full copy** of the whole index. At 1M vectors that is **496 MB and 67 ms** — every time. agenticow makes it **162 bytes and 0.47 ms**, flat.
|
|
31
|
+
|
|
32
|
+
### Three things it makes cheap
|
|
33
|
+
|
|
34
|
+
| Use case | What it replaces | Cost with agenticow |
|
|
35
|
+
|---|---|---|
|
|
36
|
+
| 👥 **Parallel agents share one base memory** | N full copies of the index | N × 162 B, N × 0.5 ms |
|
|
37
|
+
| 🧪 **Roll back a poisoned / hallucinated branch** | re-ingest + re-index from backup | drop the branch, ~0.5 ms |
|
|
38
|
+
| 📌 **Zero-cost checkpointing before risky steps** | periodic full snapshots | 162 B + edits-since per checkpoint |
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Quick start
|
|
43
|
+
|
|
44
|
+
```js
|
|
45
|
+
import { open } from 'agenticow';
|
|
46
|
+
|
|
47
|
+
// open or create a base memory
|
|
48
|
+
const base = open('memory.rvf', { dimension: 1536 });
|
|
49
|
+
base.ingest([{ id: 1, vector: embedding }, /* ... */]);
|
|
50
|
+
|
|
51
|
+
// branch it for a parallel agent — ~0.5 ms / 162 B, any base size
|
|
52
|
+
const agent = base.branch('agent-a');
|
|
53
|
+
agent.ingest([{ id: 9001, vector: newMemory }]); // isolated from the base
|
|
54
|
+
|
|
55
|
+
// exact read-through: sees base + its own edits, child wins on id collision
|
|
56
|
+
const hits = agent.query(queryVector, 10);
|
|
57
|
+
// -> [{ id, distance, branch }, ...] (tombstone-masked, reranked)
|
|
58
|
+
|
|
59
|
+
// checkpoint + roll back a poisoned branch
|
|
60
|
+
const ckpt = agent.checkpoint('clean');
|
|
61
|
+
agent.ingest([{ id: 666, vector: poison }]);
|
|
62
|
+
agent.rollback(ckpt.id); // poison gone, clean memory intact
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### CLI
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
agenticow init mem.rvf --dim 128
|
|
69
|
+
agenticow ingest mem.rvf --n 5000
|
|
70
|
+
agenticow branch mem.rvf --as user-42 # cheap per-user personalization
|
|
71
|
+
agenticow query mem.rvf.user-42.rvf --k 10 # top-K read-through (masked, reranked)
|
|
72
|
+
agenticow diff mem.rvf.user-42.rvf # added / overridden / tombstoned ids
|
|
73
|
+
agenticow demo # scripted end-to-end walkthrough
|
|
74
|
+
agenticow bench # branch-create benchmark
|
|
75
|
+
agenticow acceptance # the 1,000-branch proof
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
| Verb | Use case |
|
|
79
|
+
|---|---|
|
|
80
|
+
| `branch` | per-user / per-repo / per-account personalization off one shared base — *personalization without memory explosion* |
|
|
81
|
+
| `checkpoint` / `rollback` | per-task checkpointing; quarantine a bad/hallucinated ingest and instantly revert |
|
|
82
|
+
| `diff` / `promote` | Git-style memory workflow: agent branch → test → reviewed → production |
|
|
83
|
+
| `query` | top-K read-through with tombstone masking + exact rerank |
|
|
84
|
+
| `fork` (API) | fan out many branches off a static base (1,000 per-user branches in one process) |
|
|
85
|
+
|
|
86
|
+
A worked script lives in [`examples/parallel-agents.mjs`](./examples/parallel-agents.mjs): fork N branches from a base, ingest + tombstone per branch, query each, roll one back.
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## How copy-on-write for vectors works
|
|
91
|
+
|
|
92
|
+

|
|
93
|
+
|
|
94
|
+
A branch records **only its own edits** plus a pointer to its parent. Creating one is constant-time and constant-size — **162 bytes** — independent of base size. A query walks the lineage chain (`child → … → base`), merges each store's results, lets the **child win** on any id collision, masks anything the branch **tombstoned**, and re-ranks by exact distance.
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Benchmarks
|
|
99
|
+
|
|
100
|
+
Reproduced on an **AMD Ryzen 9 9950X** (32 threads), Node v22, dim 128, cosine, median of 11 runs. Run it yourself: `npx agenticow bench`.
|
|
101
|
+
|
|
102
|
+

|
|
103
|
+
|
|
104
|
+
| Base N | Base file | Branch create (p50) | Empty branch | 100-edit branch | Full copy (p50) | Speedup | Smaller |
|
|
105
|
+
|-------:|----------:|--------------------:|-------------:|----------------:|----------------:|--------:|--------:|
|
|
106
|
+
| 10,000 | 5.0 MB | 519 µs | **162 B** | 51.4 KB | 373 µs | 1× | 32,102× |
|
|
107
|
+
| 100,000 | 49.6 MB | 463 µs | **162 B** | 51.4 KB | 5.83 ms | 13× | 321,037× |
|
|
108
|
+
| 1,000,000 | 496.3 MB | **472 µs** | **162 B** | 51.4 KB | 67.14 ms | **142×** | **3,212,443×** |
|
|
109
|
+
|
|
110
|
+
Branch delta is a pure function of edit count (~520 B / edited vector) with **zero dependence on base size**. At a 10k base a raw `copyFile` is already sub-millisecond, so the COW win shows up — and widens — at scale. The original [RVF COW proof](https://github.com/ruvnet/RuVector) reports the conservative **83× / 3000×** figures (0.78 ms vs 64.7 ms; 162 B vs 496 MB); the reproduction above is consistent and, on this machine, better on speed.
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## Acceptance: the 1,000-branch proof
|
|
115
|
+
|
|
116
|
+
`npm run acceptance` (or `agenticow acceptance`) runs the full spec and reports real numbers. Latest run, **AMD Ryzen 9 9950X**, base = 20,000 vectors, dim 128:
|
|
117
|
+
|
|
118
|
+
| Measurement | Result |
|
|
119
|
+
|---|---|
|
|
120
|
+
| **Branches forked** | **1,000** off one base (median **0.487 ms/fork**, 4.5 s total) |
|
|
121
|
+
| **Top-10 correctness** | **recall@10 = 100%**, exact-order match 100% (120 sampled queries vs brute-force ground truth) |
|
|
122
|
+
| **Tombstone masking** | **PASS** — 0 tombstoned ids leaked into results |
|
|
123
|
+
| **Rollback latency** | **p50 = 0.571 ms** (min 0.48 / max 1.01), ~constant |
|
|
124
|
+
| **Storage vs delta** | 1,000 branches = **10.5 MB total** (10.8 KB/branch) vs **9.69 GB** for 1,000 full copies → **943× less disk**; total branch storage is **1.06× the base** (grows with delta, not base) |
|
|
125
|
+
| **Verdict** | **PASS ✓** |
|
|
126
|
+
|
|
127
|
+
The acceptance test builds a brute-force ground truth (`base ∪ branch-inserts − tombstones`, reranked by cosine distance) and asserts the read-through top-K matches it. If a 1,000-branch fork ever hits a real fd/memory/time limit, the test reports the max that worked plus the scaling curve — the 1,000 is not faked. Results are written to [`bench/acceptance-results.json`](./bench/acceptance-results.json).
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## How it compares
|
|
132
|
+
|
|
133
|
+

|
|
134
|
+
|
|
135
|
+
| Capability | agenticow | Pinecone | Milvus | pgvector | Chroma | Qdrant |
|
|
136
|
+
|---|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
137
|
+
| Native COW branch of the index | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
138
|
+
| O(1)-in-base branch create | ✅ 162 B | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
139
|
+
| Snapshot mechanism | COW delta | full copy | full copy | SQL dump | full copy | full copy |
|
|
140
|
+
| Exact read-through (parent ∪ edits) | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
141
|
+
| Embedded / in-process (no server) | ✅ | ❌ | ❌ | via PG | ✅ | ✅/server |
|
|
142
|
+
| Raw ANN throughput | ⚠️ ~2.7× behind hnswlib\* | high | high | moderate | moderate | high |
|
|
143
|
+
| ANN index spanning the branch | 🚧 roadmap | n/a | n/a | n/a | n/a | n/a |
|
|
144
|
+
|
|
145
|
+
\* **Honest concession.** On SIFT-1M, same machine, the underlying [ruvector](https://github.com/ruvnet/RuVector) HNSW does ~2,197 QPS @ recall 0.95 vs hnswlib-node ~9,344 QPS — roughly **2.7× slower** for raw ANN. If you need maximum raw similarity-search speed on a static index, use a dedicated ANN library. agenticow's edge is **cheap branching, checkpointing and rollback of agent memory** — which none of the above have.
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## Honest scope
|
|
150
|
+
|
|
151
|
+
agenticow ships, and proves, exactly this:
|
|
152
|
+
|
|
153
|
+
- ✅ **COW branch creation** — base-size-independent, 162 B / ~0.5 ms (the 83× / 3000× headline). Proven by `npm run bench`.
|
|
154
|
+
- ✅ **Exact read-through queries** — point lookup / flat-scan merge returning `parent ∪ edits`, child wins on collisions, deletes honored. Proven by `npm run acceptance` (recall@10 = 100%, masking PASS).
|
|
155
|
+
|
|
156
|
+
What it does **not** yet ship:
|
|
157
|
+
|
|
158
|
+
- 🚧 **A single ANN/HNSW index that spans the COW boundary** is **roadmap, not shipped**. Read-through merges each store's own index and re-ranks exactly; it does not build one unified approximate index across parent and child. Native cluster-level read-through landed in [ruvnet/RuVector PR #617](https://github.com/ruvnet/RuVector/pull/617); until that build is published, agenticow implements read-through in its wrapper over the shipped `derive()` primitive.
|
|
159
|
+
|
|
160
|
+
We do not claim "fully queryable git-for-vectors". We claim **COW branch creation (83× / 3000×) + exact read-through queries** — and we prove both.
|
|
161
|
+
|
|
162
|
+
> **Note on cosine:** the shipped `@ruvector/rvf-node@0.1.8` binding does not persist the cosine metric across a file reopen (it reads back as `l2`). agenticow L2-normalizes vectors on ingest/query when the metric is cosine, so top-K ranking is identical whether the engine scores with cosine or L2. This is why read-through stays correct after `save()`/`load()`.
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Claim ladder
|
|
167
|
+
|
|
168
|
+
Where agenticow is today, and where it's going — labeled honestly.
|
|
169
|
+
|
|
170
|
+
| Tier | Claim | Status |
|
|
171
|
+
|---|---|---|
|
|
172
|
+
| **Practical** | Cheap, base-independent branch / checkpoint / rollback of vector memory (162 B / ~0.5 ms); exact read-through with tombstone masking. | ✅ **Proven** (bench + acceptance) |
|
|
173
|
+
| **Strong** | A Git-style workflow for vector state — `branch → diff → promote`, isolated experiments, instant revert of bad memory. | ✅ Shipped (CLI + API), proven at small scale |
|
|
174
|
+
| **Strategic** | A memory OS layer for multi-agent infrastructure — thousands of agents branching one shared base, per-user/per-task memory without the copy explosion. | 🔭 Vision (the primitives are here; scale-out is the work) |
|
|
175
|
+
| **Exotic** | A substrate for *evolving / competing cognitive branches* — parallel "selves", simulated orgs, time-travel debugging of agent memory. | 🌌 Roadmap / research — compelling, **not shipped** |
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## API
|
|
180
|
+
|
|
181
|
+
```ts
|
|
182
|
+
import { open, AgenticMemory } from 'agenticow';
|
|
183
|
+
|
|
184
|
+
const mem = open(path, { dimension, metric?, track? }); // metric default "cosine"
|
|
185
|
+
|
|
186
|
+
mem.ingest([{ id, vector }]) // or ingest(Float32Array, ids) for speed
|
|
187
|
+
mem.query(vector, k?, { efSearch?, overscan? }) // exact read-through, child wins
|
|
188
|
+
mem.delete(ids) // COW tombstone (hides ancestor ids)
|
|
189
|
+
|
|
190
|
+
mem.branch(label?) // isolated COW fork (auto-isolates the parent)
|
|
191
|
+
mem.fork(label?) // lightweight fork off a static/read-only base
|
|
192
|
+
mem.checkpoint(label?) // freeze a restore point, keep working
|
|
193
|
+
mem.rollback(checkpointId?) // discard edits since a checkpoint
|
|
194
|
+
|
|
195
|
+
mem.diff() // { added, overridden, deleted }
|
|
196
|
+
mem.promote(target) // replay this branch's edits into target
|
|
197
|
+
|
|
198
|
+
mem.lineage(); mem.status(); // introspection
|
|
199
|
+
mem.save(manifestPath); AgenticMemory.load(manifestPath) // persist / reopen the chain
|
|
200
|
+
mem.close();
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
- **`branch()`** auto-isolates: it freezes the current state and re-points the parent to a fresh child, so neither side sees the other's later writes — safe when you keep writing to both.
|
|
204
|
+
- **`fork()`** is one `derive()` with no re-pointing — ideal for fanning out many branches off a base you won't mutate again (the 1,000-branch case).
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## Install & requirements
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
npm install agenticow
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
- Node ≥ 18, ESM.
|
|
215
|
+
- Depends on [`@ruvector/rvf-node`](https://www.npmjs.com/package/@ruvector/rvf-node) (prebuilt native binding for linux-x64/arm64, darwin-x64/arm64, win32-x64).
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## Keywords
|
|
220
|
+
|
|
221
|
+
agent memory · vector database branching · copy-on-write · COW vector store · multi-agent memory · embedded vector DB · memory checkpointing · vector branching · git for vectors · AI agent memory · LLM memory · vector snapshot · rollback · checkpoint
|
|
222
|
+
|
|
223
|
+
---
|
|
224
|
+
|
|
225
|
+
## License
|
|
226
|
+
|
|
227
|
+
MIT © [ruvnet](https://github.com/ruvnet). Built on [ruvector](https://github.com/ruvnet/RuVector) RVF.
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// agenticow ACCEPTANCE TEST — the headline proof.
|
|
3
|
+
//
|
|
4
|
+
// Spec (run exactly, report real numbers):
|
|
5
|
+
// 1. Create 1 base RVF, fork N branches (target N=1000).
|
|
6
|
+
// 2. Per branch: insert new vectors + tombstone some base vectors.
|
|
7
|
+
// 3. Prove top-K correctness after masking + reranking: for a sample of
|
|
8
|
+
// branches, query top-K via agenticow read-through and assert it matches a
|
|
9
|
+
// brute-force exact ground truth (base ∪ inserts − tombstones, reranked by
|
|
10
|
+
// distance). Report recall/exactness.
|
|
11
|
+
// 4. Rollback any branch instantly — measure rollback latency (~constant).
|
|
12
|
+
// 5. Show total storage grows with DELTA size, not base size — bytes(N
|
|
13
|
+
// branches) vs bytes(base) vs N×full-copy. Prove sublinear-in-base.
|
|
14
|
+
//
|
|
15
|
+
// Honesty: read-through is the EXACT path (parent ∪ edits, child wins, deletes
|
|
16
|
+
// honored). A single ANN index spanning the COW boundary is roadmap; this test
|
|
17
|
+
// proves exact correctness of the read-through merge. If forking N branches hits
|
|
18
|
+
// a real limit (fd/memory/time), the max that worked + the scaling curve are
|
|
19
|
+
// reported — the 1000 is not faked.
|
|
20
|
+
//
|
|
21
|
+
// Usage:
|
|
22
|
+
// node bench/acceptance.js # base=20k, branches=1000 (default)
|
|
23
|
+
// BASE=50000 BRANCHES=1000 DIM=128 node bench/acceptance.js
|
|
24
|
+
|
|
25
|
+
import fs from 'node:fs';
|
|
26
|
+
import os from 'node:os';
|
|
27
|
+
import path from 'node:path';
|
|
28
|
+
import { performance } from 'node:perf_hooks';
|
|
29
|
+
import { open } from '../src/index.js';
|
|
30
|
+
|
|
31
|
+
const DIM = Number(process.env.DIM || 128);
|
|
32
|
+
const BASE = Number(process.env.BASE || 20000);
|
|
33
|
+
const BRANCHES = Number(process.env.BRANCHES || 1000);
|
|
34
|
+
const INSERTS_PER_BRANCH = Number(process.env.INSERTS || 8);
|
|
35
|
+
const TOMBSTONES_PER_BRANCH = Number(process.env.TOMBS || 4);
|
|
36
|
+
const SAMPLE = Number(process.env.SAMPLE || 40); // branches to verify for correctness
|
|
37
|
+
const K = Number(process.env.K || 10);
|
|
38
|
+
|
|
39
|
+
const workDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agenticow-acc-'));
|
|
40
|
+
const basePath = path.join(workDir, 'base.rvf');
|
|
41
|
+
|
|
42
|
+
function rndVec() {
|
|
43
|
+
const v = new Float32Array(DIM);
|
|
44
|
+
for (let i = 0; i < DIM; i++) v[i] = Math.random() * 2 - 1;
|
|
45
|
+
return v;
|
|
46
|
+
}
|
|
47
|
+
function cosineDist(a, b) {
|
|
48
|
+
let dot = 0, na = 0, nb = 0;
|
|
49
|
+
for (let i = 0; i < DIM; i++) { dot += a[i] * b[i]; na += a[i] * a[i]; nb += b[i] * b[i]; }
|
|
50
|
+
const den = Math.sqrt(na) * Math.sqrt(nb);
|
|
51
|
+
return den === 0 ? 1 : 1 - dot / den;
|
|
52
|
+
}
|
|
53
|
+
function fmtBytes(b) {
|
|
54
|
+
if (b < 1024) return `${b} B`;
|
|
55
|
+
if (b < 1024 * 1024) return `${(b / 1024).toFixed(1)} KB`;
|
|
56
|
+
if (b < 1024 * 1024 * 1024) return `${(b / 1024 / 1024).toFixed(1)} MB`;
|
|
57
|
+
return `${(b / 1024 / 1024 / 1024).toFixed(2)} GB`;
|
|
58
|
+
}
|
|
59
|
+
function median(xs) { const s = [...xs].sort((a, b) => a - b); return s[Math.floor(s.length / 2)]; }
|
|
60
|
+
function dirSize(dir, filter) {
|
|
61
|
+
let total = 0;
|
|
62
|
+
for (const f of fs.readdirSync(dir)) {
|
|
63
|
+
if (filter && !filter(f)) continue;
|
|
64
|
+
try { total += fs.statSync(path.join(dir, f)).size; } catch { /* */ }
|
|
65
|
+
}
|
|
66
|
+
return total;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async function main() {
|
|
70
|
+
console.log('agenticow — ACCEPTANCE TEST');
|
|
71
|
+
console.log('='.repeat(72));
|
|
72
|
+
console.log(`machine : ${os.cpus()[0].model.trim()} (${os.cpus().length} threads)`);
|
|
73
|
+
console.log(`node : ${process.version} platform: ${process.platform}-${process.arch}`);
|
|
74
|
+
console.log(`config : base=${BASE.toLocaleString()} vectors target branches=${BRANCHES} dim=${DIM}`);
|
|
75
|
+
console.log(` : ${INSERTS_PER_BRANCH} inserts + ${TOMBSTONES_PER_BRANCH} tombstones / branch`);
|
|
76
|
+
console.log('='.repeat(72));
|
|
77
|
+
|
|
78
|
+
// ---- 1. Base ----
|
|
79
|
+
process.stdout.write(`\n[1] building base of ${BASE.toLocaleString()} vectors ... `);
|
|
80
|
+
const baseVecs = []; // keep ground-truth copies (base is small enough for the test)
|
|
81
|
+
const base = open(basePath, { dimension: DIM, metric: 'cosine', track: false });
|
|
82
|
+
const BATCH = 10000;
|
|
83
|
+
for (let off = 0; off < BASE; off += BATCH) {
|
|
84
|
+
const m = Math.min(BATCH, BASE - off);
|
|
85
|
+
const flat = new Float32Array(m * DIM);
|
|
86
|
+
const ids = new Array(m);
|
|
87
|
+
for (let i = 0; i < m; i++) {
|
|
88
|
+
const v = rndVec();
|
|
89
|
+
flat.set(v, i * DIM);
|
|
90
|
+
ids[i] = off + i;
|
|
91
|
+
baseVecs.push(v);
|
|
92
|
+
}
|
|
93
|
+
base.ingest(flat, ids);
|
|
94
|
+
}
|
|
95
|
+
const baseBytes = fs.statSync(basePath).size;
|
|
96
|
+
console.log(`${fmtBytes(baseBytes)}`);
|
|
97
|
+
|
|
98
|
+
// ---- 2. Fork N branches with inserts + tombstones ----
|
|
99
|
+
// Ground truth per branch: { inserts: Map(id->vec), tombstones: Set(id) }
|
|
100
|
+
process.stdout.write(`[2] forking branches (insert + tombstone each) ... `);
|
|
101
|
+
const branches = [];
|
|
102
|
+
const truth = [];
|
|
103
|
+
let maxForked = 0;
|
|
104
|
+
const forkLat = [];
|
|
105
|
+
const tFork0 = performance.now();
|
|
106
|
+
try {
|
|
107
|
+
for (let b = 0; b < BRANCHES; b++) {
|
|
108
|
+
const tf = performance.now();
|
|
109
|
+
const br = base.fork(`u${b}`);
|
|
110
|
+
forkLat.push(performance.now() - tf);
|
|
111
|
+
const inserts = new Map();
|
|
112
|
+
for (let i = 0; i < INSERTS_PER_BRANCH; i++) {
|
|
113
|
+
const id = 1_000_000_000 + b * 1000 + i; // unique new id space per branch
|
|
114
|
+
const v = rndVec();
|
|
115
|
+
br.ingest([{ id, vector: v }]);
|
|
116
|
+
inserts.set(id, v);
|
|
117
|
+
}
|
|
118
|
+
const tombs = new Set();
|
|
119
|
+
for (let i = 0; i < TOMBSTONES_PER_BRANCH; i++) {
|
|
120
|
+
const id = (b * 7 + i * 13) % BASE; // deterministic spread over base ids
|
|
121
|
+
tombs.add(id);
|
|
122
|
+
}
|
|
123
|
+
br.delete([...tombs]);
|
|
124
|
+
branches.push(br);
|
|
125
|
+
truth.push({ inserts, tombstones: tombs });
|
|
126
|
+
maxForked = b + 1;
|
|
127
|
+
if ((b + 1) % 200 === 0) process.stdout.write(`${b + 1} `);
|
|
128
|
+
}
|
|
129
|
+
} catch (e) {
|
|
130
|
+
console.log(`\n ! hit a limit at ${maxForked} branches: ${e.message}`);
|
|
131
|
+
console.log(` reporting results for the ${maxForked} branches that succeeded.`);
|
|
132
|
+
}
|
|
133
|
+
const forkWallMs = performance.now() - tFork0;
|
|
134
|
+
const N = maxForked;
|
|
135
|
+
console.log(`\n forked ${N} branches in ${forkWallMsFmt(forkWallMs)} ` +
|
|
136
|
+
`(median ${median(forkLat).toFixed(3)} ms/fork)`);
|
|
137
|
+
|
|
138
|
+
// ---- 3. Top-K correctness vs brute-force ground truth ----
|
|
139
|
+
process.stdout.write(`[3] verifying top-${K} correctness on ${Math.min(SAMPLE, N)} sampled branches ... `);
|
|
140
|
+
let exactMatches = 0;
|
|
141
|
+
let recallSum = 0;
|
|
142
|
+
let checks = 0;
|
|
143
|
+
const sampleIdxs = [];
|
|
144
|
+
for (let s = 0; s < Math.min(SAMPLE, N); s++) sampleIdxs.push(Math.floor((s * N) / Math.min(SAMPLE, N)));
|
|
145
|
+
for (const bi of sampleIdxs) {
|
|
146
|
+
const br = branches[bi];
|
|
147
|
+
const t = truth[bi];
|
|
148
|
+
// a few query vectors per branch: one near an insert, one near a base vec, one random
|
|
149
|
+
const queries = [];
|
|
150
|
+
const anyInsert = [...t.inserts.values()][0];
|
|
151
|
+
if (anyInsert) queries.push(anyInsert);
|
|
152
|
+
queries.push(baseVecs[(bi * 17) % BASE]);
|
|
153
|
+
queries.push(rndVec());
|
|
154
|
+
for (const q of queries) {
|
|
155
|
+
// ground truth: brute force over (base − tombstones) ∪ inserts
|
|
156
|
+
const cand = [];
|
|
157
|
+
for (let id = 0; id < BASE; id++) {
|
|
158
|
+
if (t.tombstones.has(id)) continue;
|
|
159
|
+
cand.push({ id, distance: cosineDist(q, baseVecs[id]) });
|
|
160
|
+
}
|
|
161
|
+
for (const [id, v] of t.inserts) cand.push({ id, distance: cosineDist(q, v) });
|
|
162
|
+
cand.sort((a, b) => a.distance - b.distance);
|
|
163
|
+
const gold = cand.slice(0, K).map((c) => c.id);
|
|
164
|
+
|
|
165
|
+
const got = br.query(q, K).map((h) => h.id);
|
|
166
|
+
const goldSet = new Set(gold);
|
|
167
|
+
const inter = got.filter((id) => goldSet.has(id)).length;
|
|
168
|
+
recallSum += inter / K;
|
|
169
|
+
if (gold.length === got.length && gold.every((id, i) => id === got[i])) exactMatches++;
|
|
170
|
+
checks++;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
const recall = recallSum / checks;
|
|
174
|
+
const exactRate = exactMatches / checks;
|
|
175
|
+
console.log(`done`);
|
|
176
|
+
console.log(` recall@${K} = ${(recall * 100).toFixed(1)}% ` +
|
|
177
|
+
`exact-order match = ${(exactRate * 100).toFixed(1)}% (${checks} queries)`);
|
|
178
|
+
|
|
179
|
+
// verify tombstone masking explicitly: a tombstoned base id must never appear
|
|
180
|
+
let maskViolations = 0;
|
|
181
|
+
for (const bi of sampleIdxs) {
|
|
182
|
+
const t = truth[bi];
|
|
183
|
+
const br = branches[bi];
|
|
184
|
+
for (const tid of t.tombstones) {
|
|
185
|
+
const hits = br.query(baseVecs[tid], 3).map((h) => h.id);
|
|
186
|
+
if (hits.includes(tid)) maskViolations++;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
console.log(` tombstone masking: ${maskViolations === 0 ? 'PASS' : 'FAIL'} ` +
|
|
190
|
+
`(${maskViolations} leaked tombstones)`);
|
|
191
|
+
|
|
192
|
+
// ---- 4. Rollback latency ----
|
|
193
|
+
process.stdout.write(`[4] rollback latency (checkpoint, poison, rollback) ... `);
|
|
194
|
+
const rbLat = [];
|
|
195
|
+
const rbSample = Math.min(50, N);
|
|
196
|
+
for (let s = 0; s < rbSample; s++) {
|
|
197
|
+
const br = branches[Math.floor((s * N) / rbSample)];
|
|
198
|
+
const ck = br.checkpoint('clean');
|
|
199
|
+
for (let i = 0; i < 20; i++) br.ingest([{ id: 2_000_000_000 + s * 100 + i, vector: rndVec() }]);
|
|
200
|
+
const tr = performance.now();
|
|
201
|
+
br.rollback(ck.id);
|
|
202
|
+
rbLat.push(performance.now() - tr);
|
|
203
|
+
}
|
|
204
|
+
console.log(`done`);
|
|
205
|
+
console.log(` rollback p50 = ${median(rbLat).toFixed(3)} ms ` +
|
|
206
|
+
`min ${Math.min(...rbLat).toFixed(3)} / max ${Math.max(...rbLat).toFixed(3)} ms (${rbSample} samples)`);
|
|
207
|
+
|
|
208
|
+
// ---- 5. Storage: delta, not base ----
|
|
209
|
+
const branchBytes = dirSize(workDir, (f) => f !== 'base.rvf' && f.endsWith('.rvf'));
|
|
210
|
+
const perBranch = branchBytes / N;
|
|
211
|
+
const fullCopyEquiv = baseBytes * N;
|
|
212
|
+
console.log(`[5] storage`);
|
|
213
|
+
console.log(` base file : ${fmtBytes(baseBytes)}`);
|
|
214
|
+
console.log(` ${N} branches (total) : ${fmtBytes(branchBytes)} (${fmtBytes(perBranch)}/branch)`);
|
|
215
|
+
console.log(` N x full-copy would be: ${fmtBytes(fullCopyEquiv)}`);
|
|
216
|
+
console.log(` => branches use ${(fullCopyEquiv / branchBytes).toFixed(0)}x less disk than N full copies`);
|
|
217
|
+
console.log(` => total branch storage is ${(branchBytes / baseBytes).toFixed(2)}x the base ` +
|
|
218
|
+
`(grows with DELTA, not base)`);
|
|
219
|
+
|
|
220
|
+
// ---- verdict ----
|
|
221
|
+
const pass1000 = N >= 1000;
|
|
222
|
+
const correctnessOk = recall >= 0.99 && maskViolations === 0;
|
|
223
|
+
console.log('\n' + '='.repeat(72));
|
|
224
|
+
console.log('VERDICT');
|
|
225
|
+
console.log(` branches forked : ${N}${pass1000 ? ' (>= 1000 target met)' : ' (below 1000 target)'}`);
|
|
226
|
+
console.log(` top-${K} correctness : recall ${(recall * 100).toFixed(1)}%, masking ${maskViolations === 0 ? 'PASS' : 'FAIL'}`);
|
|
227
|
+
console.log(` rollback latency : ${median(rbLat).toFixed(3)} ms p50 (~constant)`);
|
|
228
|
+
console.log(` storage vs base : ${(branchBytes / baseBytes).toFixed(2)}x base for ${N} branches (sublinear in base)`);
|
|
229
|
+
console.log(` ACCEPTANCE: ${pass1000 && correctnessOk ? 'PASS ✓' : 'PARTIAL — see notes above'}`);
|
|
230
|
+
console.log('='.repeat(72));
|
|
231
|
+
|
|
232
|
+
// write results.json for README/site
|
|
233
|
+
const out = {
|
|
234
|
+
machine: os.cpus()[0].model.trim(),
|
|
235
|
+
node: process.version,
|
|
236
|
+
date: new Date().toISOString(),
|
|
237
|
+
config: { base: BASE, branchesTarget: BRANCHES, dim: DIM, insertsPerBranch: INSERTS_PER_BRANCH, tombstonesPerBranch: TOMBSTONES_PER_BRANCH, k: K },
|
|
238
|
+
branchesForked: N,
|
|
239
|
+
forkMedianMs: median(forkLat),
|
|
240
|
+
forkTotalMs: forkWallMs,
|
|
241
|
+
recallAtK: recall,
|
|
242
|
+
exactOrderMatch: exactRate,
|
|
243
|
+
maskViolations,
|
|
244
|
+
rollbackP50Ms: median(rbLat),
|
|
245
|
+
rollbackMinMs: Math.min(...rbLat),
|
|
246
|
+
rollbackMaxMs: Math.max(...rbLat),
|
|
247
|
+
baseBytes,
|
|
248
|
+
branchTotalBytes: branchBytes,
|
|
249
|
+
perBranchBytes: perBranch,
|
|
250
|
+
fullCopyEquivBytes: fullCopyEquiv,
|
|
251
|
+
diskSavingsVsFullCopy: fullCopyEquiv / branchBytes,
|
|
252
|
+
storageVsBaseRatio: branchBytes / baseBytes,
|
|
253
|
+
pass: pass1000 && correctnessOk,
|
|
254
|
+
};
|
|
255
|
+
try {
|
|
256
|
+
fs.writeFileSync(path.join(process.cwd(), 'bench', 'acceptance-results.json'), JSON.stringify(out, null, 2));
|
|
257
|
+
console.log(`\nwrote bench/acceptance-results.json`);
|
|
258
|
+
} catch (e) { console.log(`(could not write results: ${e.message})`); }
|
|
259
|
+
|
|
260
|
+
for (const br of branches) { try { br.close(); } catch { /* */ } }
|
|
261
|
+
try { base.close(); } catch { /* */ }
|
|
262
|
+
fs.rmSync(workDir, { recursive: true, force: true });
|
|
263
|
+
process.exit(out.pass ? 0 : 1);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
function forkWallMsFmt(ms) { return ms < 1000 ? `${ms.toFixed(0)} ms` : `${(ms / 1000).toFixed(1)} s`; }
|
|
267
|
+
|
|
268
|
+
main().catch((e) => { console.error(e); process.exit(1); });
|