agent-memory-store 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/search.js ADDED
@@ -0,0 +1,151 @@
1
+ /**
2
+ * Hybrid search engine combining FTS5 BM25 (native SQLite) and vector similarity.
3
+ *
4
+ * Search modes:
5
+ * - "hybrid" — FTS5 BM25 + vector cosine similarity merged via Reciprocal Rank Fusion
6
+ * - "bm25" — FTS5 only (no embeddings needed)
7
+ * - "semantic" — Vector similarity only
8
+ *
9
+ * Falls back to BM25-only if embeddings are not available.
10
+ */
11
+
12
+ import { searchFTS, getAllEmbeddings, getChunk } from "./db.js";
13
+ import { embed, isEmbeddingAvailable } from "./embeddings.js";
14
+
15
+ // ─── Vector Search ──────────────────────────────────────────────────────────
16
+
17
+ /**
18
+ * Computes cosine similarity between two Float32Arrays.
19
+ * Assumes both vectors are already L2-normalized (dot product = cosine sim).
20
+ */
21
+ function cosineSimilarity(a, b) {
22
+ let dot = 0;
23
+ for (let i = 0; i < a.length; i++) dot += a[i] * b[i];
24
+ return dot;
25
+ }
26
+
27
+ /**
28
+ * Brute-force vector search over all chunk embeddings.
29
+ */
30
+ function vectorSearch(queryEmbedding, { agent, tags = [], topK = 18 }) {
31
+ const embeddings = getAllEmbeddings({ agent, tags });
32
+ if (!embeddings.length) return [];
33
+
34
+ return embeddings
35
+ .map(({ id, embedding }) => ({
36
+ id,
37
+ score: cosineSimilarity(queryEmbedding, embedding),
38
+ }))
39
+ .filter((r) => r.score > 0)
40
+ .sort((a, b) => b.score - a.score)
41
+ .slice(0, topK);
42
+ }
43
+
44
+ // ─── Fusion ─────────────────────────────────────────────────────────────────
45
+
46
+ /**
47
+ * Reciprocal Rank Fusion — merges two ranked lists into one.
48
+ */
49
+ function reciprocalRankFusion(bm25Hits, vecHits, wBM25 = 0.4, wVec = 0.6) {
50
+ const K = 60;
51
+ const scores = new Map();
52
+
53
+ bm25Hits.forEach(({ id }, rank) => {
54
+ scores.set(id, (scores.get(id) || 0) + wBM25 / (K + rank + 1));
55
+ });
56
+
57
+ vecHits.forEach(({ id }, rank) => {
58
+ scores.set(id, (scores.get(id) || 0) + wVec / (K + rank + 1));
59
+ });
60
+
61
+ return [...scores.entries()]
62
+ .map(([id, score]) => ({ id, score }))
63
+ .sort((a, b) => b.score - a.score);
64
+ }
65
+
66
+ // ─── Main Search ────────────────────────────────────────────────────────────
67
+
68
+ /**
69
+ * Main search function — performs hybrid, BM25, or semantic search.
70
+ *
71
+ * @param {object} opts
72
+ * @param {string} opts.query
73
+ * @param {string[]} [opts.tags]
74
+ * @param {string} [opts.agent]
75
+ * @param {number} [opts.topK]
76
+ * @param {number} [opts.minScore]
77
+ * @param {string} [opts.mode] - "hybrid" | "bm25" | "semantic"
78
+ * @returns {Promise<Array>}
79
+ */
80
+ export async function hybridSearch({
81
+ query,
82
+ tags = [],
83
+ agent,
84
+ topK = 6,
85
+ minScore = 0.1,
86
+ mode = "hybrid",
87
+ }) {
88
+ const candidateK = topK * 3;
89
+ const embeddingsReady = isEmbeddingAvailable();
90
+
91
+ // Determine effective mode
92
+ let effectiveMode = mode;
93
+ if ((mode === "hybrid" || mode === "semantic") && !embeddingsReady) {
94
+ effectiveMode = "bm25";
95
+ }
96
+
97
+ let fusedResults;
98
+
99
+ if (effectiveMode === "bm25") {
100
+ fusedResults = searchFTS({ query, agent, tags, topK: candidateK });
101
+ } else if (effectiveMode === "semantic") {
102
+ const queryEmbedding = await embed(query);
103
+ if (!queryEmbedding) {
104
+ fusedResults = searchFTS({ query, agent, tags, topK: candidateK });
105
+ } else {
106
+ fusedResults = vectorSearch(queryEmbedding, {
107
+ agent,
108
+ tags,
109
+ topK: candidateK,
110
+ });
111
+ }
112
+ } else {
113
+ // Hybrid: run FTS5 (sync) and embed query (async) in parallel
114
+ const queryEmbeddingPromise = embed(query);
115
+ const bm25Hits = searchFTS({ query, agent, tags, topK: candidateK });
116
+ const queryEmbedding = await queryEmbeddingPromise;
117
+
118
+ if (!queryEmbedding) {
119
+ fusedResults = bm25Hits;
120
+ } else {
121
+ const vecHits = vectorSearch(queryEmbedding, {
122
+ agent,
123
+ tags,
124
+ topK: candidateK,
125
+ });
126
+ fusedResults = reciprocalRankFusion(bm25Hits, vecHits);
127
+ }
128
+ }
129
+
130
+ // Take topK and enrich with full chunk data
131
+ const topResults = fusedResults.slice(0, topK);
132
+ const enriched = [];
133
+
134
+ for (const { id, score } of topResults) {
135
+ const chunk = getChunk(id);
136
+ if (!chunk) continue;
137
+
138
+ enriched.push({
139
+ id: chunk.id,
140
+ topic: chunk.topic,
141
+ agent: chunk.agent,
142
+ tags: chunk.tags,
143
+ importance: chunk.importance,
144
+ score: Math.round(score * 100) / 100,
145
+ content: chunk.content,
146
+ updated: chunk.updatedAt,
147
+ });
148
+ }
149
+
150
+ return enriched;
151
+ }
package/src/store.js CHANGED
@@ -1,119 +1,93 @@
1
1
  /**
2
- * context-store: file-based persistent memory for multi-agent systems.
2
+ * context-store: SQLite-backed persistent memory for multi-agent systems.
3
3
  *
4
- * Storage layout:
5
- * <CONTEXT_STORE_PATH>/
6
- * chunks/ → one .md file per chunk, YAML frontmatter + markdown body
7
- * state/ → one .json file per key (session state / pipeline variables)
8
- *
9
- * Chunk file format:
10
- * ---
11
- * id: <sha1-10>
12
- * topic: "Descriptive title of the chunk"
13
- * agent: agent-id
14
- * tags: [tag1, tag2]
15
- * importance: low | medium | high | critical
16
- * updated: ISO-8601
17
- * expires: ISO-8601 # optional — omit for permanent chunks
18
- * ---
19
- * Markdown content here.
4
+ * Storage: single SQLite database at <STORE_PATH>/store.db
5
+ * Search: hybrid (BM25 via FTS5 + vector cosine similarity + RRF)
6
+ * Embeddings: local via @huggingface/transformers (graceful fallback to BM25-only)
20
7
  */
21
8
 
22
- import fs from "fs/promises";
23
- import path from "path";
24
- import matter from "gray-matter";
25
9
  import { createHash } from "crypto";
26
- import { BM25 } from "./bm25.js";
27
-
28
- const STORE_PATH = process.env.AGENT_STORE_PATH
29
- ? path.resolve(process.env.AGENT_STORE_PATH)
30
- : path.join(process.cwd(), ".agent-memory-store");
31
-
32
- const CHUNKS_DIR = path.join(STORE_PATH, "chunks");
33
- const STATE_DIR = path.join(STORE_PATH, "state");
34
-
35
- /** Ensures storage directories exist. */
36
- async function ensureDirs() {
37
- await fs.mkdir(CHUNKS_DIR, { recursive: true });
38
- await fs.mkdir(STATE_DIR, { recursive: true });
39
- }
10
+ import {
11
+ getDb,
12
+ insertChunk,
13
+ getChunk,
14
+ deleteChunkById,
15
+ listChunksDb,
16
+ getStateDb,
17
+ setStateDb,
18
+ updateEmbedding,
19
+ getChunksWithoutEmbedding,
20
+ } from "./db.js";
21
+ import { hybridSearch } from "./search.js";
22
+ import { embed, prepareText, warmup } from "./embeddings.js";
23
+ import { migrateIfNeeded } from "./migrate.js";
40
24
 
41
25
  /**
42
- * Generates a stable short ID from agent + topic + current timestamp.
43
- * @param {string} agentId
44
- * @param {string} topic
45
- * @returns {string} 10-char hex string
26
+ * Initializes the store: runs migration if needed, warms up DB and embeddings.
27
+ * Called once at startup.
46
28
  */
47
- function generateId(agentId, topic) {
48
- const seed = `${agentId}:${topic}:${Date.now()}:${Math.random()}`;
49
- return createHash("sha1").update(seed).digest("hex").slice(0, 10);
29
+ export async function initStore() {
30
+ // Ensure DB is ready (also runs schema + expiry purge)
31
+ getDb();
32
+
33
+ // Migrate from filesystem if needed
34
+ await migrateIfNeeded();
35
+
36
+ // Warm up embedding model in background (non-blocking)
37
+ warmup().then(() => backfillEmbeddings());
50
38
  }
51
39
 
52
40
  /**
53
- * Reads all non-expired chunks from disk.
54
- * Expired chunks are automatically deleted on read.
55
- *
56
- * @returns {Promise<Array<{ file: string, meta: object, content: string }>>}
41
+ * Background task: computes embeddings for chunks that don't have one yet.
57
42
  */
58
- async function loadAllChunks() {
59
- await ensureDirs();
60
- const files = await fs.readdir(CHUNKS_DIR).catch(() => []);
61
- const chunks = [];
43
+ async function backfillEmbeddings() {
44
+ const chunks = getChunksWithoutEmbedding();
45
+ if (!chunks.length) return;
62
46
 
63
- await Promise.all(
64
- files.map(async (file) => {
65
- if (!file.endsWith(".md")) return;
66
- try {
67
- const raw = await fs.readFile(path.join(CHUNKS_DIR, file), "utf8");
68
- const { data: meta, content } = matter(raw);
47
+ process.stderr.write(
48
+ `[agent-memory-store] Backfilling embeddings for ${chunks.length} chunks...\n`,
49
+ );
69
50
 
70
- if (meta.expires && new Date(meta.expires) < new Date()) {
71
- await fs.unlink(path.join(CHUNKS_DIR, file)).catch(() => {});
72
- return;
73
- }
51
+ for (const chunk of chunks) {
52
+ const text = prepareText({
53
+ topic: chunk.topic,
54
+ tags: chunk.tags,
55
+ content: chunk.content,
56
+ });
57
+ const embedding = await embed(text);
58
+ if (embedding) {
59
+ updateEmbedding(chunk.id, embedding);
60
+ }
61
+ }
74
62
 
75
- chunks.push({ file, meta, content: content.trim() });
76
- } catch {
77
- // Skip unreadable files silently
78
- }
79
- }),
63
+ process.stderr.write(
64
+ `[agent-memory-store] Embedding backfill complete.\n`,
80
65
  );
81
-
82
- return chunks;
83
66
  }
84
67
 
85
68
  /**
86
- * Builds a BM25 index from a list of chunks.
87
- * Searchable text = topic + tags + agent + body content.
88
- *
89
- * @param {Array} chunks
90
- * @returns {BM25}
69
+ * Generates a stable short ID from agent + topic + current timestamp.
70
+ * @param {string} agentId
71
+ * @param {string} topic
72
+ * @returns {string} 10-char hex string
91
73
  */
92
- function buildIndex(chunks) {
93
- const engine = new BM25();
94
- for (const c of chunks) {
95
- const searchText = [
96
- c.meta.topic || "",
97
- (c.meta.tags || []).join(" "),
98
- c.meta.agent || "",
99
- c.content,
100
- ].join(" ");
101
- engine.addDocument(c.file, searchText, c.meta);
102
- }
103
- return engine;
74
+ function generateId(agentId, topic) {
75
+ const seed = `${agentId}:${topic}:${Date.now()}:${Math.random()}`;
76
+ return createHash("sha1").update(seed).digest("hex").slice(0, 10);
104
77
  }
105
78
 
106
79
  // ─── Public API ───────────────────────────────────────────────────────────────
107
80
 
108
81
  /**
109
- * Searches chunks by relevance using BM25, with optional tag and agent filters.
82
+ * Searches chunks using hybrid search (BM25 + vector + RRF).
110
83
  *
111
84
  * @param {object} opts
112
- * @param {string} opts.query - Search query text
113
- * @param {string[]} [opts.tags] - Filter: only chunks matching any of these tags
114
- * @param {string} [opts.agent] - Filter: only chunks written by this agent
115
- * @param {number} [opts.topK] - Max results (default: 6)
116
- * @param {number} [opts.minScore] - Minimum BM25 score threshold (default: 0.1)
85
+ * @param {string} opts.query
86
+ * @param {string[]} [opts.tags]
87
+ * @param {string} [opts.agent]
88
+ * @param {number} [opts.topK]
89
+ * @param {number} [opts.minScore]
90
+ * @param {string} [opts.mode] - "hybrid" | "bm25" | "semantic"
117
91
  * @returns {Promise<Array>}
118
92
  */
119
93
  export async function searchChunks({
@@ -122,53 +96,23 @@ export async function searchChunks({
122
96
  agent,
123
97
  topK = 6,
124
98
  minScore = 0.1,
99
+ mode = "hybrid",
125
100
  }) {
126
- const chunks = await loadAllChunks();
127
- if (chunks.length === 0) return [];
128
-
129
- const engine = buildIndex(chunks);
130
- const hasFilter = tags.length > 0 || !!agent;
131
-
132
- const filter = hasFilter
133
- ? (meta) => {
134
- if (agent && meta.agent !== agent) return false;
135
- if (tags.length > 0 && !tags.some((t) => (meta.tags || []).includes(t)))
136
- return false;
137
- return true;
138
- }
139
- : null;
140
-
141
- const hits = engine.search(query, topK, filter);
142
- const byFile = Object.fromEntries(chunks.map((c) => [c.file, c]));
143
-
144
- return hits
145
- .filter((h) => h.score >= minScore)
146
- .map((h) => {
147
- const c = byFile[h.id];
148
- return {
149
- id: c.meta.id,
150
- topic: c.meta.topic,
151
- agent: c.meta.agent,
152
- tags: c.meta.tags || [],
153
- importance: c.meta.importance || "medium",
154
- score: Math.round(h.score * 100) / 100,
155
- content: c.content,
156
- updated: c.meta.updated,
157
- };
158
- });
101
+ return hybridSearch({ query, tags, agent, topK, minScore, mode });
159
102
  }
160
103
 
161
104
  /**
162
- * Writes a new chunk to disk.
105
+ * Writes a new chunk to the database.
106
+ * Embedding is computed asynchronously in the background.
163
107
  *
164
108
  * @param {object} opts
165
- * @param {string} opts.topic - Short descriptive title
166
- * @param {string} opts.content - Markdown body content
167
- * @param {string} [opts.agent] - Agent identifier
168
- * @param {string[]} [opts.tags] - Search tags
169
- * @param {string} [opts.importance] - low | medium | high | critical
170
- * @param {number} [opts.ttlDays] - Days until auto-expiry (omit = permanent)
171
- * @returns {Promise<{ id, file, topic, tags, importance }>}
109
+ * @param {string} opts.topic
110
+ * @param {string} opts.content
111
+ * @param {string} [opts.agent]
112
+ * @param {string[]} [opts.tags]
113
+ * @param {string} [opts.importance]
114
+ * @param {number} [opts.ttlDays]
115
+ * @returns {Promise<{ id, topic, tags, importance }>}
172
116
  */
173
117
  export async function writeChunk({
174
118
  topic,
@@ -178,29 +122,32 @@ export async function writeChunk({
178
122
  importance = "medium",
179
123
  ttlDays = null,
180
124
  }) {
181
- await ensureDirs();
182
-
183
125
  const id = generateId(agent, topic);
184
126
  const now = new Date().toISOString();
185
- const expires = ttlDays
127
+ const expiresAt = ttlDays
186
128
  ? new Date(Date.now() + ttlDays * 86_400_000).toISOString()
187
129
  : null;
188
130
 
189
- const meta = {
131
+ insertChunk({
190
132
  id,
191
133
  topic,
192
134
  agent,
193
135
  tags,
194
136
  importance,
195
- updated: now,
196
- ...(expires ? { expires } : {}),
197
- };
198
-
199
- const fileContent = matter.stringify(`\n${content}\n`, meta);
200
- const filename = `${id}.md`;
201
-
202
- await fs.writeFile(path.join(CHUNKS_DIR, filename), fileContent, "utf8");
203
- return { id, file: filename, topic, tags, importance };
137
+ content,
138
+ embedding: null, // Computed in background
139
+ createdAt: now,
140
+ updatedAt: now,
141
+ expiresAt,
142
+ });
143
+
144
+ // Compute embedding in background (non-blocking)
145
+ const text = prepareText({ topic, tags, content });
146
+ embed(text).then((embedding) => {
147
+ if (embedding) updateEmbedding(id, embedding);
148
+ });
149
+
150
+ return { id, topic, tags, importance };
204
151
  }
205
152
 
206
153
  /**
@@ -210,27 +157,36 @@ export async function writeChunk({
210
157
  * @returns {Promise<{ meta: object, content: string } | null>}
211
158
  */
212
159
  export async function readChunk(id) {
213
- const chunks = await loadAllChunks();
214
- return chunks.find((c) => c.meta.id === id) ?? null;
160
+ const chunk = getChunk(id);
161
+ if (!chunk) return null;
162
+
163
+ return {
164
+ meta: {
165
+ id: chunk.id,
166
+ topic: chunk.topic,
167
+ agent: chunk.agent,
168
+ tags: chunk.tags,
169
+ importance: chunk.importance,
170
+ updated: chunk.updatedAt,
171
+ ...(chunk.expiresAt ? { expires: chunk.expiresAt } : {}),
172
+ },
173
+ content: chunk.content,
174
+ };
215
175
  }
216
176
 
217
177
  /**
218
178
  * Deletes a chunk by ID.
219
179
  *
220
180
  * @param {string} id
221
- * @returns {Promise<boolean>} true if deleted, false if not found
181
+ * @returns {Promise<boolean>}
222
182
  */
223
183
  export async function deleteChunk(id) {
224
- const chunks = await loadAllChunks();
225
- const target = chunks.find((c) => c.meta.id === id);
226
- if (!target) return false;
227
- await fs.unlink(path.join(CHUNKS_DIR, target.file));
228
- return true;
184
+ return deleteChunkById(id);
229
185
  }
230
186
 
231
187
  /**
232
188
  * Lists chunk metadata without loading full content.
233
- * Results are sorted by most recently updated.
189
+ * Sorted by most recently updated.
234
190
  *
235
191
  * @param {object} opts
236
192
  * @param {string} [opts.agent]
@@ -238,23 +194,7 @@ export async function deleteChunk(id) {
238
194
  * @returns {Promise<Array>}
239
195
  */
240
196
  export async function listChunks({ agent, tags = [] } = {}) {
241
- const chunks = await loadAllChunks();
242
- return chunks
243
- .filter((c) => {
244
- if (agent && c.meta.agent !== agent) return false;
245
- if (tags.length > 0 && !tags.some((t) => (c.meta.tags || []).includes(t)))
246
- return false;
247
- return true;
248
- })
249
- .map((c) => ({
250
- id: c.meta.id,
251
- topic: c.meta.topic,
252
- agent: c.meta.agent,
253
- tags: c.meta.tags || [],
254
- importance: c.meta.importance || "medium",
255
- updated: c.meta.updated,
256
- }))
257
- .sort((a, b) => new Date(b.updated) - new Date(a.updated));
197
+ return listChunksDb({ agent, tags });
258
198
  }
259
199
 
260
200
  /**
@@ -264,29 +204,16 @@ export async function listChunks({ agent, tags = [] } = {}) {
264
204
  * @returns {Promise<any | null>}
265
205
  */
266
206
  export async function getState(key) {
267
- await ensureDirs();
268
- try {
269
- const raw = await fs.readFile(path.join(STATE_DIR, `${key}.json`), "utf8");
270
- return JSON.parse(raw).value;
271
- } catch {
272
- return null;
273
- }
207
+ return getStateDb(key);
274
208
  }
275
209
 
276
210
  /**
277
- * Writes a session state variable (any JSON-serializable value).
211
+ * Writes a session state variable.
278
212
  *
279
213
  * @param {string} key
280
214
  * @param {any} value
281
215
  * @returns {Promise<{ key: string, updated: string }>}
282
216
  */
283
217
  export async function setState(key, value) {
284
- await ensureDirs();
285
- const updated = new Date().toISOString();
286
- await fs.writeFile(
287
- path.join(STATE_DIR, `${key}.json`),
288
- JSON.stringify({ key, value, updated }, null, 2),
289
- "utf8",
290
- );
291
- return { key, updated };
218
+ return setStateDb(key, value);
292
219
  }