@agentmemory/agentmemory 0.7.0 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. package/AGENTS.md +2 -2
  2. package/README.md +76 -82
  3. package/dist/cli.mjs +99 -32
  4. package/dist/cli.mjs.map +1 -1
  5. package/dist/index.mjs +4 -2
  6. package/dist/index.mjs.map +1 -1
  7. package/dist/{src-QxitMPfJ.mjs → src-sYZDDbiA.mjs} +5 -3
  8. package/dist/src-sYZDDbiA.mjs.map +1 -0
  9. package/dist/standalone.mjs +1 -1
  10. package/dist/standalone.mjs.map +1 -1
  11. package/package.json +11 -1
  12. package/plugin/.claude-plugin/plugin.json +1 -1
  13. package/plugin/scripts/notification.d.mts +1 -0
  14. package/plugin/scripts/notification.mjs.map +1 -0
  15. package/plugin/scripts/post-tool-failure.d.mts +1 -0
  16. package/plugin/scripts/post-tool-failure.mjs.map +1 -0
  17. package/plugin/scripts/post-tool-use.d.mts +1 -0
  18. package/plugin/scripts/post-tool-use.mjs.map +1 -0
  19. package/plugin/scripts/pre-compact.d.mts +1 -0
  20. package/plugin/scripts/pre-compact.mjs.map +1 -0
  21. package/plugin/scripts/pre-tool-use.d.mts +1 -0
  22. package/plugin/scripts/pre-tool-use.mjs.map +1 -0
  23. package/plugin/scripts/prompt-submit.d.mts +1 -0
  24. package/plugin/scripts/prompt-submit.mjs.map +1 -0
  25. package/plugin/scripts/session-end.d.mts +1 -0
  26. package/plugin/scripts/session-end.mjs.map +1 -0
  27. package/plugin/scripts/session-start.d.mts +1 -0
  28. package/plugin/scripts/session-start.mjs.map +1 -0
  29. package/plugin/scripts/stop.d.mts +1 -0
  30. package/plugin/scripts/stop.mjs.map +1 -0
  31. package/plugin/scripts/subagent-start.d.mts +1 -0
  32. package/plugin/scripts/subagent-start.mjs.map +1 -0
  33. package/plugin/scripts/subagent-stop.d.mts +1 -0
  34. package/plugin/scripts/subagent-stop.mjs.map +1 -0
  35. package/plugin/scripts/task-completed.d.mts +1 -0
  36. package/plugin/scripts/task-completed.mjs.map +1 -0
  37. package/.claude-plugin/marketplace.json +0 -14
  38. package/.github/workflows/ci.yml +0 -22
  39. package/.github/workflows/publish.yml +0 -28
  40. package/assets/banner.png +0 -0
  41. package/assets/demo.gif +0 -0
  42. package/assets/demo.mp4 +0 -0
  43. package/benchmark/QUALITY.md +0 -73
  44. package/benchmark/REAL-EMBEDDINGS.md +0 -67
  45. package/benchmark/SCALE.md +0 -110
  46. package/benchmark/dataset.ts +0 -293
  47. package/benchmark/quality-eval.ts +0 -643
  48. package/benchmark/real-embeddings-eval.ts +0 -405
  49. package/benchmark/scale-eval.ts +0 -398
  50. package/dist/src-QxitMPfJ.mjs.map +0 -1
  51. package/src/auth.ts +0 -12
  52. package/src/cli.ts +0 -159
  53. package/src/config.ts +0 -221
  54. package/src/eval/metrics-store.ts +0 -65
  55. package/src/eval/quality.ts +0 -51
  56. package/src/eval/schemas.ts +0 -124
  57. package/src/eval/self-correct.ts +0 -28
  58. package/src/eval/validator.ts +0 -31
  59. package/src/functions/actions.ts +0 -288
  60. package/src/functions/audit.ts +0 -61
  61. package/src/functions/auto-forget.ts +0 -169
  62. package/src/functions/branch-aware.ts +0 -169
  63. package/src/functions/cascade.ts +0 -80
  64. package/src/functions/checkpoints.ts +0 -209
  65. package/src/functions/claude-bridge.ts +0 -161
  66. package/src/functions/compress.ts +0 -194
  67. package/src/functions/consolidate.ts +0 -212
  68. package/src/functions/consolidation-pipeline.ts +0 -258
  69. package/src/functions/context.ts +0 -169
  70. package/src/functions/crystallize.ts +0 -293
  71. package/src/functions/dedup.ts +0 -57
  72. package/src/functions/diagnostics.ts +0 -785
  73. package/src/functions/enrich.ts +0 -132
  74. package/src/functions/evict.ts +0 -163
  75. package/src/functions/export-import.ts +0 -508
  76. package/src/functions/facets.ts +0 -248
  77. package/src/functions/file-index.ts +0 -106
  78. package/src/functions/flow-compress.ts +0 -214
  79. package/src/functions/frontier.ts +0 -196
  80. package/src/functions/governance.ts +0 -131
  81. package/src/functions/graph-retrieval.ts +0 -277
  82. package/src/functions/graph.ts +0 -275
  83. package/src/functions/leases.ts +0 -216
  84. package/src/functions/lessons.ts +0 -253
  85. package/src/functions/mesh.ts +0 -434
  86. package/src/functions/migrate.ts +0 -165
  87. package/src/functions/observe.ts +0 -144
  88. package/src/functions/obsidian-export.ts +0 -310
  89. package/src/functions/patterns.ts +0 -138
  90. package/src/functions/privacy.ts +0 -39
  91. package/src/functions/profile.ts +0 -155
  92. package/src/functions/query-expansion.ts +0 -186
  93. package/src/functions/relations.ts +0 -237
  94. package/src/functions/remember.ts +0 -162
  95. package/src/functions/retention.ts +0 -235
  96. package/src/functions/routines.ts +0 -289
  97. package/src/functions/search.ts +0 -80
  98. package/src/functions/sentinels.ts +0 -417
  99. package/src/functions/signals.ts +0 -186
  100. package/src/functions/sketches.ts +0 -274
  101. package/src/functions/sliding-window.ts +0 -257
  102. package/src/functions/smart-search.ts +0 -115
  103. package/src/functions/snapshot.ts +0 -219
  104. package/src/functions/summarize.ts +0 -155
  105. package/src/functions/team.ts +0 -147
  106. package/src/functions/temporal-graph.ts +0 -476
  107. package/src/functions/timeline.ts +0 -138
  108. package/src/functions/verify.ts +0 -117
  109. package/src/health/monitor.ts +0 -110
  110. package/src/health/thresholds.ts +0 -73
  111. package/src/hooks/notification.ts +0 -52
  112. package/src/hooks/post-tool-failure.ts +0 -58
  113. package/src/hooks/post-tool-use.ts +0 -62
  114. package/src/hooks/pre-compact.ts +0 -60
  115. package/src/hooks/pre-tool-use.ts +0 -72
  116. package/src/hooks/prompt-submit.ts +0 -46
  117. package/src/hooks/session-end.ts +0 -71
  118. package/src/hooks/session-start.ts +0 -48
  119. package/src/hooks/stop.ts +0 -39
  120. package/src/hooks/subagent-start.ts +0 -49
  121. package/src/hooks/subagent-stop.ts +0 -54
  122. package/src/hooks/task-completed.ts +0 -54
  123. package/src/index.ts +0 -342
  124. package/src/mcp/in-memory-kv.ts +0 -61
  125. package/src/mcp/server.ts +0 -1455
  126. package/src/mcp/standalone.ts +0 -177
  127. package/src/mcp/tools-registry.ts +0 -769
  128. package/src/mcp/transport.ts +0 -91
  129. package/src/prompts/compression.ts +0 -67
  130. package/src/prompts/consolidation.ts +0 -48
  131. package/src/prompts/graph-extraction.ts +0 -35
  132. package/src/prompts/summary.ts +0 -38
  133. package/src/prompts/xml.ts +0 -26
  134. package/src/providers/agent-sdk.ts +0 -34
  135. package/src/providers/anthropic.ts +0 -35
  136. package/src/providers/circuit-breaker.ts +0 -82
  137. package/src/providers/embedding/cohere.ts +0 -46
  138. package/src/providers/embedding/gemini.ts +0 -54
  139. package/src/providers/embedding/index.ts +0 -39
  140. package/src/providers/embedding/local.ts +0 -52
  141. package/src/providers/embedding/openai.ts +0 -45
  142. package/src/providers/embedding/openrouter.ts +0 -51
  143. package/src/providers/embedding/voyage.ts +0 -46
  144. package/src/providers/fallback-chain.ts +0 -31
  145. package/src/providers/index.ts +0 -84
  146. package/src/providers/openrouter.ts +0 -71
  147. package/src/providers/resilient.ts +0 -37
  148. package/src/state/hybrid-search.ts +0 -295
  149. package/src/state/index-persistence.ts +0 -63
  150. package/src/state/keyed-mutex.ts +0 -18
  151. package/src/state/kv.ts +0 -33
  152. package/src/state/schema.ts +0 -71
  153. package/src/state/search-index.ts +0 -245
  154. package/src/state/stemmer.ts +0 -104
  155. package/src/state/synonyms.ts +0 -63
  156. package/src/state/vector-index.ts +0 -130
  157. package/src/telemetry/setup.ts +0 -116
  158. package/src/triggers/api.ts +0 -1904
  159. package/src/triggers/events.ts +0 -71
  160. package/src/types.ts +0 -769
  161. package/src/version.ts +0 -1
  162. package/src/viewer/index.html +0 -2497
  163. package/src/viewer/server.ts +0 -207
  164. package/src/xenova.d.ts +0 -3
  165. package/test/actions.test.ts +0 -490
  166. package/test/audit.test.ts +0 -108
  167. package/test/auto-forget.test.ts +0 -188
  168. package/test/cascade.test.ts +0 -277
  169. package/test/checkpoints.test.ts +0 -493
  170. package/test/circuit-breaker.test.ts +0 -107
  171. package/test/claude-bridge.test.ts +0 -178
  172. package/test/confidence.test.ts +0 -247
  173. package/test/consistency.test.ts +0 -61
  174. package/test/consolidation-pipeline.test.ts +0 -251
  175. package/test/crystallize.test.ts +0 -521
  176. package/test/diagnostics.test.ts +0 -638
  177. package/test/embedding-provider.test.ts +0 -49
  178. package/test/enrich.test.ts +0 -209
  179. package/test/eval.test.ts +0 -300
  180. package/test/export-import.test.ts +0 -251
  181. package/test/facets.test.ts +0 -448
  182. package/test/fallback-chain.test.ts +0 -93
  183. package/test/frontier.test.ts +0 -485
  184. package/test/governance.test.ts +0 -147
  185. package/test/graph-retrieval.test.ts +0 -186
  186. package/test/graph.test.ts +0 -160
  187. package/test/helpers/mocks.ts +0 -40
  188. package/test/hybrid-search.test.ts +0 -145
  189. package/test/index-persistence.test.ts +0 -124
  190. package/test/integration.test.ts +0 -265
  191. package/test/leases.test.ts +0 -399
  192. package/test/mcp-prompts.test.ts +0 -218
  193. package/test/mcp-resources.test.ts +0 -286
  194. package/test/mcp-standalone.test.ts +0 -113
  195. package/test/mesh.test.ts +0 -700
  196. package/test/privacy.test.ts +0 -87
  197. package/test/profile.test.ts +0 -161
  198. package/test/query-expansion.test.ts +0 -154
  199. package/test/relations.test.ts +0 -198
  200. package/test/retention.test.ts +0 -245
  201. package/test/routines.test.ts +0 -497
  202. package/test/schema-fingerprint.test.ts +0 -81
  203. package/test/schema.test.ts +0 -42
  204. package/test/search-index.test.ts +0 -128
  205. package/test/sentinels.test.ts +0 -626
  206. package/test/signals.test.ts +0 -410
  207. package/test/sketches.test.ts +0 -549
  208. package/test/sliding-window.test.ts +0 -199
  209. package/test/smart-search.test.ts +0 -169
  210. package/test/snapshot.test.ts +0 -165
  211. package/test/team.test.ts +0 -156
  212. package/test/temporal-graph.test.ts +0 -378
  213. package/test/timeline.test.ts +0 -148
  214. package/test/vector-index.test.ts +0 -79
  215. package/test/verify.test.ts +0 -209
  216. package/test/xml.test.ts +0 -65
  217. package/tsconfig.json +0 -22
  218. package/tsdown.config.ts +0 -62
@@ -1,398 +0,0 @@
1
- import { SearchIndex } from "../src/state/search-index.js";
2
- import { VectorIndex } from "../src/state/vector-index.js";
3
- import { HybridSearch } from "../src/state/hybrid-search.js";
4
- import type { CompressedObservation } from "../src/types.js";
5
- import { generateScaleDataset, generateDataset } from "./dataset.js";
6
- import { writeFileSync } from "node:fs";
7
-
8
- function mockKV() {
9
- const store = new Map<string, Map<string, unknown>>();
10
- return {
11
- get: async <T>(scope: string, key: string): Promise<T | null> =>
12
- (store.get(scope)?.get(key) as T) ?? null,
13
- set: async <T>(scope: string, key: string, data: T): Promise<T> => {
14
- if (!store.has(scope)) store.set(scope, new Map());
15
- store.get(scope)!.set(key, data);
16
- return data;
17
- },
18
- delete: async (scope: string, key: string): Promise<void> => {
19
- store.get(scope)?.delete(key);
20
- },
21
- list: async <T>(scope: string): Promise<T[]> => {
22
- const entries = store.get(scope);
23
- return entries ? (Array.from(entries.values()) as T[]) : [];
24
- },
25
- };
26
- }
27
-
28
- function deterministicEmbedding(text: string, dims = 384): Float32Array {
29
- const arr = new Float32Array(dims);
30
- const words = text.toLowerCase().split(/\W+/).filter(w => w.length > 2);
31
- for (const word of words) {
32
- for (let i = 0; i < word.length; i++) {
33
- const idx = (word.charCodeAt(i) * 31 + i * 17) % dims;
34
- arr[idx] += 1;
35
- const idx2 = (word.charCodeAt(i) * 37 + i * 13 + word.length * 7) % dims;
36
- arr[idx2] += 0.5;
37
- }
38
- }
39
- const norm = Math.sqrt(arr.reduce((s, v) => s + v * v, 0));
40
- if (norm > 0) for (let i = 0; i < dims; i++) arr[i] /= norm;
41
- return arr;
42
- }
43
-
44
- function estimateTokens(text: string): number {
45
- return Math.ceil(text.length / 4);
46
- }
47
-
48
- interface ScaleResult {
49
- scale: number;
50
- sessions: number;
51
- index_build_ms: number;
52
- index_build_per_doc_ms: number;
53
- bm25_search_ms: number;
54
- hybrid_search_ms: number;
55
- index_size_kb: number;
56
- vector_size_kb: number;
57
- heap_mb: number;
58
- builtin_tokens: number;
59
- builtin_200line_tokens: number;
60
- agentmemory_tokens: number;
61
- token_savings_pct: number;
62
- builtin_unreachable_pct: number;
63
- }
64
-
65
- interface CrossSessionResult {
66
- query: string;
67
- target_session: string;
68
- current_session: string;
69
- sessions_apart: number;
70
- bm25_found: boolean;
71
- bm25_rank: number;
72
- hybrid_found: boolean;
73
- hybrid_rank: number;
74
- builtin_found: boolean;
75
- latency_ms: number;
76
- }
77
-
78
- const SEARCH_QUERIES = [
79
- "authentication middleware JWT",
80
- "PostgreSQL connection pooling",
81
- "Kubernetes pod crash",
82
- "rate limiting API",
83
- "Playwright E2E tests",
84
- "Docker multi-stage build",
85
- "Redis caching layer",
86
- "CI/CD GitHub Actions",
87
- "Prisma migration drift",
88
- "monitoring Datadog alerts",
89
- ];
90
-
91
- async function benchmarkScale(counts: number[]): Promise<ScaleResult[]> {
92
- const results: ScaleResult[] = [];
93
-
94
- for (const count of counts) {
95
- console.log(` Scale: ${count.toLocaleString()} observations...`);
96
- const observations = generateScaleDataset(count);
97
- const sessionCount = new Set(observations.map(o => o.sessionId)).size;
98
-
99
- const heapBefore = process.memoryUsage().heapUsed;
100
-
101
- const buildStart = performance.now();
102
- const bm25 = new SearchIndex();
103
- const vector = new VectorIndex();
104
- const kv = mockKV();
105
- const dims = 384;
106
-
107
- for (const obs of observations) {
108
- bm25.add(obs);
109
- const text = [obs.title, obs.narrative, ...obs.concepts].join(" ");
110
- vector.add(obs.id, obs.sessionId, deterministicEmbedding(text, dims));
111
- await kv.set(`mem:obs:${obs.sessionId}`, obs.id, obs);
112
- }
113
- const buildMs = performance.now() - buildStart;
114
-
115
- const heapAfter = process.memoryUsage().heapUsed;
116
-
117
- const mockEmbed: any = {
118
- name: "deterministic", dimensions: dims,
119
- embed: async (t: string) => deterministicEmbedding(t, dims),
120
- embedBatch: async (ts: string[]) => ts.map(t => deterministicEmbedding(t, dims)),
121
- };
122
- const hybrid = new HybridSearch(bm25, vector, mockEmbed, kv as never, 0.4, 0.6, 0);
123
-
124
- let bm25Total = 0;
125
- let hybridTotal = 0;
126
- const iters = 20;
127
-
128
- for (let i = 0; i < iters; i++) {
129
- const q = SEARCH_QUERIES[i % SEARCH_QUERIES.length];
130
- const s1 = performance.now();
131
- bm25.search(q, 10);
132
- bm25Total += performance.now() - s1;
133
-
134
- const s2 = performance.now();
135
- await hybrid.search(q, 10);
136
- hybridTotal += performance.now() - s2;
137
- }
138
-
139
- const bm25Ser = bm25.serialize();
140
- const vecSer = vector.serialize();
141
-
142
- const allText = observations.map(o =>
143
- `- ${o.title}: ${o.narrative.slice(0, 80)}... [${o.concepts.slice(0, 3).join(", ")}]`
144
- ).join("\n");
145
- const builtinTokens = estimateTokens(allText);
146
-
147
- const truncatedText = observations.slice(0, 200).map(o =>
148
- `- ${o.title}: ${o.narrative.slice(0, 60)}... [${o.concepts.slice(0, 3).join(", ")}]`
149
- ).join("\n");
150
- const builtin200Tokens = estimateTokens(truncatedText);
151
-
152
- let totalResultTokens = 0;
153
- for (let i = 0; i < iters; i++) {
154
- const q = SEARCH_QUERIES[i % SEARCH_QUERIES.length];
155
- const results = await hybrid.search(q, 10);
156
- totalResultTokens += estimateTokens(JSON.stringify(results.map(r => r.observation)));
157
- }
158
- const agentmemoryTokens = Math.round(totalResultTokens / iters);
159
-
160
- results.push({
161
- scale: count,
162
- sessions: sessionCount,
163
- index_build_ms: Math.round(buildMs),
164
- index_build_per_doc_ms: +(buildMs / count).toFixed(3),
165
- bm25_search_ms: +(bm25Total / iters).toFixed(3),
166
- hybrid_search_ms: +(hybridTotal / iters).toFixed(3),
167
- index_size_kb: Math.round(Buffer.byteLength(bm25Ser, "utf-8") / 1024),
168
- vector_size_kb: Math.round(Buffer.byteLength(vecSer, "utf-8") / 1024),
169
- heap_mb: Math.round((heapAfter - heapBefore) / 1024 / 1024),
170
- builtin_tokens: builtinTokens,
171
- builtin_200line_tokens: builtin200Tokens,
172
- agentmemory_tokens: agentmemoryTokens,
173
- token_savings_pct: Math.round((1 - agentmemoryTokens / builtinTokens) * 100),
174
- builtin_unreachable_pct: count <= 200 ? 0 : Math.round((1 - 200 / count) * 100),
175
- });
176
- }
177
-
178
- return results;
179
- }
180
-
181
- async function benchmarkCrossSession(): Promise<CrossSessionResult[]> {
182
- const { observations } = generateDataset();
183
- const results: CrossSessionResult[] = [];
184
-
185
- const bm25 = new SearchIndex();
186
- const kv = mockKV();
187
- const vector = new VectorIndex();
188
- const dims = 384;
189
-
190
- for (const obs of observations) {
191
- bm25.add(obs);
192
- const text = [obs.title, obs.narrative, ...obs.concepts].join(" ");
193
- vector.add(obs.id, obs.sessionId, deterministicEmbedding(text, dims));
194
- await kv.set(`mem:obs:${obs.sessionId}`, obs.id, obs);
195
- }
196
-
197
- const mockEmbed: any = {
198
- name: "deterministic", dimensions: dims,
199
- embed: async (t: string) => deterministicEmbedding(t, dims),
200
- embedBatch: async (ts: string[]) => ts.map(t => deterministicEmbedding(t, dims)),
201
- };
202
- const hybrid = new HybridSearch(bm25, vector, mockEmbed, kv as never, 0.4, 0.6, 0);
203
-
204
- const crossQueries: Array<{
205
- query: string;
206
- targetConcepts: string[];
207
- targetSessionRange: [number, number];
208
- currentSession: number;
209
- }> = [
210
- { query: "How did we set up OAuth providers?", targetConcepts: ["oauth", "nextauth"], targetSessionRange: [5, 9], currentSession: 29 },
211
- { query: "What was the N+1 query fix?", targetConcepts: ["n+1", "eager-loading"], targetSessionRange: [10, 14], currentSession: 28 },
212
- { query: "PostgreSQL full-text search setup", targetConcepts: ["full-text-search", "tsvector"], targetSessionRange: [10, 14], currentSession: 27 },
213
- { query: "bcrypt password hashing configuration", targetConcepts: ["bcrypt", "password-hashing"], targetSessionRange: [5, 9], currentSession: 25 },
214
- { query: "Vitest unit testing setup", targetConcepts: ["vitest", "unit-testing"], targetSessionRange: [20, 24], currentSession: 29 },
215
- { query: "webhook retry exponential backoff", targetConcepts: ["webhooks", "exponential-backoff"], targetSessionRange: [15, 19], currentSession: 29 },
216
- { query: "ESLint flat config migration", targetConcepts: ["eslint", "linting"], targetSessionRange: [0, 4], currentSession: 29 },
217
- { query: "Kubernetes HPA autoscaling configuration", targetConcepts: ["hpa", "autoscaling", "kubernetes"], targetSessionRange: [25, 29], currentSession: 29 },
218
- { query: "Prisma database seed script", targetConcepts: ["seeding", "faker", "prisma"], targetSessionRange: [10, 14], currentSession: 26 },
219
- { query: "API cursor-based pagination", targetConcepts: ["cursor-based", "pagination"], targetSessionRange: [15, 19], currentSession: 29 },
220
- { query: "CSRF protection double-submit cookie", targetConcepts: ["csrf", "cookies"], targetSessionRange: [5, 9], currentSession: 29 },
221
- { query: "blue-green deployment rollback", targetConcepts: ["blue-green", "rollback", "zero-downtime"], targetSessionRange: [25, 29], currentSession: 29 },
222
- ];
223
-
224
- for (const cq of crossQueries) {
225
- const targetObs = observations.filter(o =>
226
- o.concepts.some(c => cq.targetConcepts.includes(c))
227
- );
228
- const targetIds = new Set(targetObs.map(o => o.id));
229
-
230
- const start = performance.now();
231
- const bm25Results = bm25.search(cq.query, 20);
232
- const hybridResults = await hybrid.search(cq.query, 20);
233
- const latency = performance.now() - start;
234
-
235
- const bm25Rank = bm25Results.findIndex(r => targetIds.has(r.obsId));
236
- const hybridRank = hybridResults.findIndex(r => targetIds.has(r.observation.id));
237
-
238
- const builtinLines = 200;
239
- const visibleObs = observations.slice(0, builtinLines);
240
- const builtinFound = visibleObs.some(o => targetIds.has(o.id));
241
-
242
- const sessionsApart = cq.currentSession - cq.targetSessionRange[0];
243
-
244
- results.push({
245
- query: cq.query,
246
- target_session: `ses_${cq.targetSessionRange[0].toString().padStart(3, "0")}-${cq.targetSessionRange[1].toString().padStart(3, "0")}`,
247
- current_session: `ses_${cq.currentSession.toString().padStart(3, "0")}`,
248
- sessions_apart: sessionsApart,
249
- bm25_found: bm25Rank >= 0,
250
- bm25_rank: bm25Rank >= 0 ? bm25Rank + 1 : -1,
251
- hybrid_found: hybridRank >= 0,
252
- hybrid_rank: hybridRank >= 0 ? hybridRank + 1 : -1,
253
- builtin_found: builtinFound,
254
- latency_ms: latency,
255
- });
256
- }
257
-
258
- return results;
259
- }
260
-
261
- function generateReport(scale: ScaleResult[], cross: CrossSessionResult[]): string {
262
- const lines: string[] = [];
263
- const w = (s: string) => lines.push(s);
264
-
265
- w("# agentmemory v0.6.0 — Scale & Cross-Session Evaluation");
266
- w("");
267
- w(`**Date:** ${new Date().toISOString()}`);
268
- w(`**Platform:** ${process.platform} ${process.arch}, Node ${process.version}`);
269
- w("");
270
-
271
- w("## 1. Scale: agentmemory vs Built-in Memory");
272
- w("");
273
- w("Every built-in agent memory (CLAUDE.md, .cursorrules, Cline's memory-bank) loads ALL memory into context every session. agentmemory searches and returns only relevant results.");
274
- w("");
275
- w("| Observations | Sessions | Index Build | BM25 Search | Hybrid Search | Heap | Context Tokens (built-in) | Context Tokens (agentmemory) | Savings | Built-in Unreachable |");
276
- w("|-------------|----------|------------|-------------|---------------|------|--------------------------|-----------------------------|---------|--------------------|");
277
-
278
- for (const r of scale) {
279
- w(`| ${r.scale.toLocaleString()} | ${r.sessions} | ${r.index_build_ms}ms | ${r.bm25_search_ms}ms | ${r.hybrid_search_ms}ms | ${r.heap_mb}MB | ${r.builtin_tokens.toLocaleString()} | ${r.agentmemory_tokens.toLocaleString()} | ${r.token_savings_pct}% | ${r.builtin_unreachable_pct}% |`);
280
- }
281
-
282
- w("");
283
- w("### What the numbers mean");
284
- w("");
285
- w("**Context Tokens (built-in):** How many tokens Claude Code/Cursor/Cline would consume loading ALL memory into the context window. At 5,000 observations, this is ~250K tokens — exceeding most context windows entirely.");
286
- w("");
287
- w("**Context Tokens (agentmemory):** How many tokens the top-10 search results consume. Stays constant regardless of corpus size.");
288
- w("");
289
- w("**Built-in Unreachable:** Percentage of memories that built-in systems CANNOT access because they exceed the 200-line MEMORY.md cap or context window limits. At 1,000 observations, 80% of your project history is invisible.");
290
- w("");
291
-
292
- w("### Storage Costs");
293
- w("");
294
- w("| Observations | BM25 Index | Vector Index (d=384) | Total Storage |");
295
- w("|-------------|-----------|---------------------|---------------|");
296
- for (const r of scale) {
297
- const total = r.index_size_kb + r.vector_size_kb;
298
- w(`| ${r.scale.toLocaleString()} | ${r.index_size_kb.toLocaleString()} KB | ${r.vector_size_kb.toLocaleString()} KB | ${(total / 1024).toFixed(1)} MB |`);
299
- }
300
-
301
- w("");
302
- w("## 2. Cross-Session Retrieval");
303
- w("");
304
- w("Can the system find relevant information from past sessions? This is impossible for built-in memory once observations exceed the line/context cap.");
305
- w("");
306
- w("| Query | Target Session | Gap | BM25 Found | BM25 Rank | Hybrid Found | Hybrid Rank | Built-in Visible |");
307
- w("|-------|---------------|-----|-----------|-----------|-------------|-------------|-----------------|");
308
-
309
- for (const r of cross) {
310
- w(`| ${r.query.slice(0, 40)}${r.query.length > 40 ? "..." : ""} | ${r.target_session} | ${r.sessions_apart} | ${r.bm25_found ? "Yes" : "No"} | ${r.bm25_rank > 0 ? `#${r.bm25_rank}` : "-"} | ${r.hybrid_found ? "Yes" : "No"} | ${r.hybrid_rank > 0 ? `#${r.hybrid_rank}` : "-"} | ${r.builtin_found ? "Yes" : "No"} |`);
311
- }
312
-
313
- const bm25Found = cross.filter(r => r.bm25_found).length;
314
- const hybridFound = cross.filter(r => r.hybrid_found).length;
315
- const builtinFound = cross.filter(r => r.builtin_found).length;
316
-
317
- w("");
318
- w(`**Summary:** agentmemory BM25 found ${bm25Found}/${cross.length} cross-session queries. Hybrid found ${hybridFound}/${cross.length}. Built-in memory (200-line cap) could only reach ${builtinFound}/${cross.length}.`);
319
-
320
- w("");
321
- w("## 3. The Context Window Problem");
322
- w("");
323
- w("```");
324
- w("Agent context window: ~200K tokens");
325
- w("System prompt + tools: ~20K tokens");
326
- w("User conversation: ~30K tokens");
327
- w("Available for memory: ~150K tokens");
328
- w("");
329
- w("At 50 tokens/observation:");
330
- w(" 200 observations = 10,000 tokens (fits, but 200-line cap hits first)");
331
- w(" 1,000 observations = 50,000 tokens (33% of available budget)");
332
- w(" 5,000 observations = 250,000 tokens (EXCEEDS total context window)");
333
- w("");
334
- w("agentmemory top-10 results:");
335
- w(` Any corpus size = ~${scale[0]?.agentmemory_tokens.toLocaleString() || "500"} tokens (0.3% of budget)`);
336
- w("```");
337
- w("");
338
-
339
- w("## 4. What Built-in Memory Cannot Do");
340
- w("");
341
- w("| Capability | Built-in (CLAUDE.md) | agentmemory |");
342
- w("|-----------|---------------------|-------------|");
343
- w("| Semantic search | No (keyword grep only) | BM25 + vector + graph |");
344
- w("| Scale beyond 200 lines | No (hard cap) | Unlimited |");
345
- w("| Cross-session recall | Only if in 200-line window | Full corpus search |");
346
- w("| Cross-agent sharing | No (per-agent files) | MCP + REST API |");
347
- w("| Multi-agent coordination | No | Leases, signals, actions |");
348
- w("| Temporal queries | No | Point-in-time graph |");
349
- w("| Memory lifecycle | No (manual pruning) | Ebbinghaus decay + eviction |");
350
- w("| Knowledge graph | No | Entity extraction + traversal |");
351
- w("| Query expansion | No | LLM-generated reformulations |");
352
- w("| Retention scoring | No | Time-frequency decay model |");
353
- w("| Real-time dashboard | No (read files manually) | Viewer on :3113 |");
354
- w("| Concurrent access | No (file lock) | Keyed mutex + KV store |");
355
- w("");
356
-
357
- w("## 5. When to Use What");
358
- w("");
359
- w("**Use built-in memory (CLAUDE.md) when:**");
360
- w("- You have < 200 items to remember");
361
- w("- Single agent, single project");
362
- w("- Preferences and quick facts only");
363
- w("- Zero setup is the priority");
364
- w("");
365
- w("**Use agentmemory when:**");
366
- w("- Project history exceeds 200 observations");
367
- w("- You need to recall specific incidents from weeks ago");
368
- w("- Multiple agents work on the same codebase");
369
- w("- You want semantic search (\"how does auth work?\") not just keyword matching");
370
- w("- You need to track memory quality, decay, and lifecycle");
371
- w("- You want a shared memory layer across Claude Code, Cursor, Windsurf, etc.");
372
- w("");
373
- w("Built-in memory is your sticky notes. agentmemory is the searchable database behind them.");
374
- w("");
375
-
376
- w("---");
377
- w(`*Scale tests: ${scale.length} corpus sizes. Cross-session tests: ${cross.length} queries targeting specific past sessions.*`);
378
-
379
- return lines.join("\n");
380
- }
381
-
382
- async function main() {
383
- console.log("=== agentmemory Scale & Cross-Session Evaluation ===\n");
384
-
385
- console.log("1. Scale benchmarks...");
386
- const scaleResults = await benchmarkScale([240, 1_000, 5_000, 10_000, 50_000]);
387
-
388
- console.log("\n2. Cross-session retrieval...");
389
- const crossResults = await benchmarkCrossSession();
390
-
391
- console.log("");
392
- const report = generateReport(scaleResults, crossResults);
393
- writeFileSync("benchmark/SCALE.md", report);
394
- console.log(report);
395
- console.log(`\nReport written to benchmark/SCALE.md`);
396
- }
397
-
398
- main().catch(console.error);