@andespindola/brainlink 0.1.0-beta.13 → 0.1.0-beta.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +5 -5
- package/CHANGELOG.md +2 -2
- package/CONTRIBUTING.md +2 -2
- package/README.md +13 -14
- package/SECURITY.md +1 -1
- package/dist/application/analyze-vault.js +1 -15
- package/dist/application/get-graph-layout.js +2 -2
- package/dist/application/get-graph-node.js +3 -3
- package/dist/application/get-graph-summary.js +3 -3
- package/dist/application/get-graph.js +3 -3
- package/dist/application/index-vault.js +5 -5
- package/dist/application/list-agents.js +3 -3
- package/dist/application/list-links.js +5 -5
- package/dist/application/search-graph-node-ids.js +3 -3
- package/dist/application/search-knowledge.js +6 -6
- package/dist/benchmarks/large-vault.js +1 -1
- package/dist/infrastructure/file-index.js +291 -0
- package/dist/infrastructure/search-packs.js +31 -6
- package/docs/AGENT_USAGE.md +14 -15
- package/docs/ARCHITECTURE.md +19 -27
- package/package.json +1 -3
- package/dist/infrastructure/sqlite/document-writer.js +0 -51
- package/dist/infrastructure/sqlite/graph-reader.js +0 -267
- package/dist/infrastructure/sqlite/recovery.js +0 -163
- package/dist/infrastructure/sqlite/schema.js +0 -114
- package/dist/infrastructure/sqlite/search-reader.js +0 -188
- package/dist/infrastructure/sqlite/types.js +0 -1
- package/dist/infrastructure/sqlite-index.js +0 -38
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
import { mkdir, readFile, rename, writeFile } from 'node:fs/promises';
|
|
2
|
+
import { dirname, join } from 'node:path';
|
|
3
|
+
import { cosineSimilarity } from '../domain/embeddings.js';
|
|
4
|
+
const queryTokenPattern = /[\p{L}\p{N}_-]+/gu;
|
|
5
|
+
const emptyIndex = () => ({
|
|
6
|
+
version: 1,
|
|
7
|
+
updatedAt: new Date().toISOString(),
|
|
8
|
+
documents: [],
|
|
9
|
+
chunks: [],
|
|
10
|
+
links: []
|
|
11
|
+
});
|
|
12
|
+
export const indexStoragePath = (vaultPath) => join(vaultPath, '.brainlink', 'index.json');
|
|
13
|
+
const readIndex = async (vaultPath) => {
|
|
14
|
+
try {
|
|
15
|
+
const parsed = JSON.parse(await readFile(indexStoragePath(vaultPath), 'utf8'));
|
|
16
|
+
return {
|
|
17
|
+
version: 1,
|
|
18
|
+
updatedAt: typeof parsed.updatedAt === 'string' ? parsed.updatedAt : new Date().toISOString(),
|
|
19
|
+
documents: Array.isArray(parsed.documents) ? parsed.documents : [],
|
|
20
|
+
chunks: Array.isArray(parsed.chunks) ? parsed.chunks : [],
|
|
21
|
+
links: Array.isArray(parsed.links) ? parsed.links : []
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
catch (error) {
|
|
25
|
+
if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {
|
|
26
|
+
return emptyIndex();
|
|
27
|
+
}
|
|
28
|
+
return emptyIndex();
|
|
29
|
+
}
|
|
30
|
+
};
|
|
31
|
+
const writeIndex = async (vaultPath, index) => {
|
|
32
|
+
const target = indexStoragePath(vaultPath);
|
|
33
|
+
const temp = `${target}.tmp`;
|
|
34
|
+
await mkdir(dirname(target), { recursive: true, mode: 0o700 });
|
|
35
|
+
await writeFile(temp, `${JSON.stringify(index)}\n`, { encoding: 'utf8', mode: 0o600 });
|
|
36
|
+
await rename(temp, target);
|
|
37
|
+
};
|
|
38
|
+
const normalizeToken = (value) => value
|
|
39
|
+
.normalize('NFKD')
|
|
40
|
+
.replace(/\p{Diacritic}/gu, '')
|
|
41
|
+
.toLowerCase();
|
|
42
|
+
const tokenize = (query) => query
|
|
43
|
+
.match(queryTokenPattern)
|
|
44
|
+
?.map(normalizeToken)
|
|
45
|
+
.filter((token) => token.length > 1) ?? [];
|
|
46
|
+
const countOccurrences = (text, token) => {
|
|
47
|
+
let hits = 0;
|
|
48
|
+
let cursor = 0;
|
|
49
|
+
while (cursor < text.length) {
|
|
50
|
+
const index = text.indexOf(token, cursor);
|
|
51
|
+
if (index < 0) {
|
|
52
|
+
break;
|
|
53
|
+
}
|
|
54
|
+
hits += 1;
|
|
55
|
+
cursor = index + token.length;
|
|
56
|
+
}
|
|
57
|
+
return hits;
|
|
58
|
+
};
|
|
59
|
+
const textScore = (row, tokens) => {
|
|
60
|
+
if (tokens.length === 0) {
|
|
61
|
+
return 0;
|
|
62
|
+
}
|
|
63
|
+
const title = normalizeToken(row.title);
|
|
64
|
+
const path = normalizeToken(row.path);
|
|
65
|
+
const content = normalizeToken(row.content);
|
|
66
|
+
const tags = normalizeToken(row.tags.join(' '));
|
|
67
|
+
return tokens.reduce((score, token) => {
|
|
68
|
+
const titleHits = countOccurrences(title, token);
|
|
69
|
+
const tagHits = countOccurrences(tags, token);
|
|
70
|
+
const pathHits = countOccurrences(path, token);
|
|
71
|
+
const contentHits = countOccurrences(content, token);
|
|
72
|
+
return score + titleHits * 5 + tagHits * 4 + pathHits * 2 + Math.min(contentHits, 6);
|
|
73
|
+
}, 0);
|
|
74
|
+
};
|
|
75
|
+
const semanticScore = (row, queryEmbedding) => queryEmbedding.length > 0 && row.embedding.length > 0 ? cosineSimilarity(queryEmbedding, row.embedding) : 0;
|
|
76
|
+
const toResult = (row, mode, text, semantic) => {
|
|
77
|
+
const score = mode === 'fts' ? text : mode === 'semantic' ? semantic : text + semantic * 8;
|
|
78
|
+
return {
|
|
79
|
+
documentId: row.documentId,
|
|
80
|
+
agentId: row.agentId,
|
|
81
|
+
title: row.title,
|
|
82
|
+
path: row.path,
|
|
83
|
+
chunkId: row.chunkId,
|
|
84
|
+
content: row.content,
|
|
85
|
+
score,
|
|
86
|
+
textScore: text,
|
|
87
|
+
semanticScore: semantic,
|
|
88
|
+
searchMode: mode,
|
|
89
|
+
tags: row.tags
|
|
90
|
+
};
|
|
91
|
+
};
|
|
92
|
+
const toGraphLink = (link, documentsById) => {
|
|
93
|
+
const source = documentsById.get(link.fromDocumentId);
|
|
94
|
+
const target = link.toDocumentId ? documentsById.get(link.toDocumentId) : undefined;
|
|
95
|
+
return {
|
|
96
|
+
agentId: source?.agentId ?? 'shared',
|
|
97
|
+
fromTitle: source?.title ?? 'Unknown',
|
|
98
|
+
fromPath: source?.path ?? 'Unknown',
|
|
99
|
+
toTitle: target?.title ?? link.toTitle,
|
|
100
|
+
toPath: target?.path ?? null,
|
|
101
|
+
weight: link.weight,
|
|
102
|
+
priority: link.priority
|
|
103
|
+
};
|
|
104
|
+
};
|
|
105
|
+
export const openFileIndex = (vaultPath) => {
|
|
106
|
+
const load = async () => readIndex(vaultPath);
|
|
107
|
+
const persist = async (index) => writeIndex(vaultPath, index);
|
|
108
|
+
return {
|
|
109
|
+
reset: async () => {
|
|
110
|
+
await persist(emptyIndex());
|
|
111
|
+
},
|
|
112
|
+
saveDocuments: async (documents) => {
|
|
113
|
+
const chunks = documents.flatMap((document) => document.chunks);
|
|
114
|
+
const links = documents.flatMap((document) => document.links);
|
|
115
|
+
await persist({
|
|
116
|
+
version: 1,
|
|
117
|
+
updatedAt: new Date().toISOString(),
|
|
118
|
+
documents: documents.map((document) => document.document),
|
|
119
|
+
chunks,
|
|
120
|
+
links
|
|
121
|
+
});
|
|
122
|
+
},
|
|
123
|
+
search: async (query, limit, agentId, mode = 'hybrid', queryEmbedding = []) => {
|
|
124
|
+
const index = await load();
|
|
125
|
+
const documentsById = new Map(index.documents.map((document) => [document.id, document]));
|
|
126
|
+
const rows = index.chunks.flatMap((chunk) => {
|
|
127
|
+
const document = documentsById.get(chunk.documentId);
|
|
128
|
+
if (!document) {
|
|
129
|
+
return [];
|
|
130
|
+
}
|
|
131
|
+
if (agentId && document.agentId !== agentId) {
|
|
132
|
+
return [];
|
|
133
|
+
}
|
|
134
|
+
return [
|
|
135
|
+
{
|
|
136
|
+
documentId: document.id,
|
|
137
|
+
agentId: document.agentId,
|
|
138
|
+
title: document.title,
|
|
139
|
+
path: document.path,
|
|
140
|
+
chunkId: chunk.id,
|
|
141
|
+
content: chunk.content,
|
|
142
|
+
tags: document.tags,
|
|
143
|
+
embedding: chunk.embedding
|
|
144
|
+
}
|
|
145
|
+
];
|
|
146
|
+
});
|
|
147
|
+
const tokens = tokenize(query);
|
|
148
|
+
const results = rows
|
|
149
|
+
.map((row) => {
|
|
150
|
+
const text = textScore(row, tokens);
|
|
151
|
+
const semantic = semanticScore(row, queryEmbedding);
|
|
152
|
+
return toResult(row, mode, text, semantic);
|
|
153
|
+
})
|
|
154
|
+
.filter((row) => row.score > 0 || tokens.length === 0)
|
|
155
|
+
.sort((left, right) => right.score - left.score || left.title.localeCompare(right.title))
|
|
156
|
+
.slice(0, Math.max(0, limit));
|
|
157
|
+
return results;
|
|
158
|
+
},
|
|
159
|
+
listLinks: async (agentId) => {
|
|
160
|
+
const index = await load();
|
|
161
|
+
const documentsById = new Map(index.documents.map((document) => [document.id, document]));
|
|
162
|
+
return index.links
|
|
163
|
+
.filter((link) => {
|
|
164
|
+
const source = documentsById.get(link.fromDocumentId);
|
|
165
|
+
return agentId ? source?.agentId === agentId : true;
|
|
166
|
+
})
|
|
167
|
+
.map((link) => toGraphLink(link, documentsById))
|
|
168
|
+
.sort((left, right) => left.fromTitle.localeCompare(right.fromTitle));
|
|
169
|
+
},
|
|
170
|
+
listBacklinks: async (title, agentId) => {
|
|
171
|
+
const index = await load();
|
|
172
|
+
const titleKey = title.toLowerCase();
|
|
173
|
+
const documentsById = new Map(index.documents.map((document) => [document.id, document]));
|
|
174
|
+
return index.links
|
|
175
|
+
.filter((link) => link.toTitle.toLowerCase() === titleKey)
|
|
176
|
+
.filter((link) => {
|
|
177
|
+
const source = documentsById.get(link.fromDocumentId);
|
|
178
|
+
return agentId ? source?.agentId === agentId : true;
|
|
179
|
+
})
|
|
180
|
+
.map((link) => toGraphLink(link, documentsById))
|
|
181
|
+
.sort((left, right) => right.weight - left.weight || left.fromTitle.localeCompare(right.fromTitle));
|
|
182
|
+
},
|
|
183
|
+
getGraph: async (agentId) => {
|
|
184
|
+
const index = await load();
|
|
185
|
+
const documents = agentId ? index.documents.filter((document) => document.agentId === agentId) : index.documents;
|
|
186
|
+
const documentIds = new Set(documents.map((document) => document.id));
|
|
187
|
+
const edges = index.links
|
|
188
|
+
.filter((link) => documentIds.has(link.fromDocumentId))
|
|
189
|
+
.map((link) => ({
|
|
190
|
+
source: link.fromDocumentId,
|
|
191
|
+
target: link.toDocumentId,
|
|
192
|
+
targetTitle: link.toTitle,
|
|
193
|
+
weight: link.weight,
|
|
194
|
+
priority: link.priority
|
|
195
|
+
}));
|
|
196
|
+
return {
|
|
197
|
+
nodes: documents.map((document) => ({
|
|
198
|
+
id: document.id,
|
|
199
|
+
agentId: document.agentId,
|
|
200
|
+
title: document.title,
|
|
201
|
+
path: document.path,
|
|
202
|
+
content: document.content,
|
|
203
|
+
tags: document.tags
|
|
204
|
+
})),
|
|
205
|
+
edges
|
|
206
|
+
};
|
|
207
|
+
},
|
|
208
|
+
getGraphSummary: async (agentId) => {
|
|
209
|
+
const graph = await (async () => {
|
|
210
|
+
const index = await load();
|
|
211
|
+
const documents = agentId ? index.documents.filter((document) => document.agentId === agentId) : index.documents;
|
|
212
|
+
const documentIds = new Set(documents.map((document) => document.id));
|
|
213
|
+
const edges = index.links
|
|
214
|
+
.filter((link) => documentIds.has(link.fromDocumentId))
|
|
215
|
+
.map((link) => ({
|
|
216
|
+
source: link.fromDocumentId,
|
|
217
|
+
target: link.toDocumentId,
|
|
218
|
+
targetTitle: link.toTitle,
|
|
219
|
+
weight: link.weight,
|
|
220
|
+
priority: link.priority
|
|
221
|
+
}));
|
|
222
|
+
return {
|
|
223
|
+
nodes: documents.map((document) => ({
|
|
224
|
+
id: document.id,
|
|
225
|
+
agentId: document.agentId,
|
|
226
|
+
title: document.title,
|
|
227
|
+
path: document.path,
|
|
228
|
+
content: '',
|
|
229
|
+
tags: document.tags
|
|
230
|
+
})),
|
|
231
|
+
edges
|
|
232
|
+
};
|
|
233
|
+
})();
|
|
234
|
+
return graph;
|
|
235
|
+
},
|
|
236
|
+
getGraphNode: async (id, agentId) => {
|
|
237
|
+
const index = await load();
|
|
238
|
+
const document = index.documents.find((row) => row.id === id && (!agentId || row.agentId === agentId));
|
|
239
|
+
return document
|
|
240
|
+
? {
|
|
241
|
+
id: document.id,
|
|
242
|
+
agentId: document.agentId,
|
|
243
|
+
title: document.title,
|
|
244
|
+
path: document.path,
|
|
245
|
+
content: document.content,
|
|
246
|
+
tags: document.tags
|
|
247
|
+
}
|
|
248
|
+
: undefined;
|
|
249
|
+
},
|
|
250
|
+
searchGraphNodeIds: async (query, limit, agentId) => {
|
|
251
|
+
const index = await load();
|
|
252
|
+
const normalized = normalizeToken(query);
|
|
253
|
+
if (normalized.length === 0 || limit <= 0) {
|
|
254
|
+
return [];
|
|
255
|
+
}
|
|
256
|
+
const tokens = tokenize(query);
|
|
257
|
+
const scored = index.documents
|
|
258
|
+
.filter((document) => (!agentId || document.agentId === agentId))
|
|
259
|
+
.map((document) => {
|
|
260
|
+
const score = textScore({
|
|
261
|
+
documentId: document.id,
|
|
262
|
+
agentId: document.agentId,
|
|
263
|
+
title: document.title,
|
|
264
|
+
path: document.path,
|
|
265
|
+
chunkId: document.id,
|
|
266
|
+
content: document.content,
|
|
267
|
+
tags: document.tags,
|
|
268
|
+
embedding: []
|
|
269
|
+
}, tokens);
|
|
270
|
+
return { id: document.id, score };
|
|
271
|
+
})
|
|
272
|
+
.filter((row) => row.score > 0)
|
|
273
|
+
.sort((left, right) => right.score - left.score || left.id.localeCompare(right.id))
|
|
274
|
+
.slice(0, limit);
|
|
275
|
+
return scored.map((row) => row.id);
|
|
276
|
+
},
|
|
277
|
+
listAgents: async () => {
|
|
278
|
+
const index = await load();
|
|
279
|
+
const counts = index.documents.reduce((state, document) => {
|
|
280
|
+
state.set(document.agentId, (state.get(document.agentId) ?? 0) + 1);
|
|
281
|
+
return state;
|
|
282
|
+
}, new Map());
|
|
283
|
+
return Array.from(counts.entries())
|
|
284
|
+
.sort((left, right) => left[0].localeCompare(right[0]))
|
|
285
|
+
.map(([id, documentCount]) => ({ id, documentCount }));
|
|
286
|
+
},
|
|
287
|
+
close: () => {
|
|
288
|
+
// File-based index has no persistent connection.
|
|
289
|
+
}
|
|
290
|
+
};
|
|
291
|
+
};
|
|
@@ -100,14 +100,15 @@ const sortedPackFiles = async (vaultPath) => {
|
|
|
100
100
|
throw error;
|
|
101
101
|
}
|
|
102
102
|
};
|
|
103
|
-
|
|
103
|
+
const writeRowsAsPrivatePacks = async (vaultPath, rows, clearExisting) => {
|
|
104
104
|
const directory = toPackDirectory(vaultPath);
|
|
105
|
-
const rows = toRows(documents);
|
|
106
105
|
await mkdir(directory, { recursive: true });
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
106
|
+
if (clearExisting) {
|
|
107
|
+
const current = await readdir(directory);
|
|
108
|
+
await Promise.all(current
|
|
109
|
+
.filter((name) => name.endsWith('.blpk') || name.endsWith('.jsonl.gz') || name === manifestFileName)
|
|
110
|
+
.map((name) => rm(join(directory, name), { force: true })));
|
|
111
|
+
}
|
|
111
112
|
const chunks = chunkRows(rows, rowChunkSize);
|
|
112
113
|
await Promise.all(chunks.map(async (chunk, index) => {
|
|
113
114
|
const fileName = `pack-${String(index + 1).padStart(4, '0')}.blpk`;
|
|
@@ -127,6 +128,30 @@ export const buildSearchPacks = async (vaultPath, documents) => {
|
|
|
127
128
|
recordCount: rows.length
|
|
128
129
|
};
|
|
129
130
|
};
|
|
131
|
+
export const buildSearchPacks = async (vaultPath, documents) => {
|
|
132
|
+
return writeRowsAsPrivatePacks(vaultPath, toRows(documents), true);
|
|
133
|
+
};
|
|
134
|
+
export const ensurePrivatePacksFromLegacyIndex = async (vaultPath) => {
|
|
135
|
+
const files = await sortedPackFiles(vaultPath);
|
|
136
|
+
if (files.some((file) => file.endsWith('.blpk'))) {
|
|
137
|
+
return { imported: false };
|
|
138
|
+
}
|
|
139
|
+
const legacyPackFiles = files.filter((file) => file.endsWith('.jsonl.gz'));
|
|
140
|
+
if (legacyPackFiles.length > 0) {
|
|
141
|
+
const rows = [];
|
|
142
|
+
for (const file of legacyPackFiles) {
|
|
143
|
+
const parsed = await parseRowsFromPack(vaultPath, await readFile(join(toPackDirectory(vaultPath), file)));
|
|
144
|
+
rows.push(...parsed);
|
|
145
|
+
}
|
|
146
|
+
const report = await writeRowsAsPrivatePacks(vaultPath, rows, true);
|
|
147
|
+
return {
|
|
148
|
+
imported: true,
|
|
149
|
+
source: 'legacy-packs',
|
|
150
|
+
...report
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
return { imported: false };
|
|
154
|
+
};
|
|
130
155
|
export const searchInPacks = async (vaultPath, query, limit, agentId) => {
|
|
131
156
|
const normalizedAgent = agentId?.trim();
|
|
132
157
|
const tokens = tokenize(query);
|
package/docs/AGENT_USAGE.md
CHANGED
|
@@ -18,7 +18,7 @@ The correct dependency direction is:
|
|
|
18
18
|
agent -> Brainlink CLI -> Markdown vault + derived index
|
|
19
19
|
```
|
|
20
20
|
|
|
21
|
-
Agents should never depend on
|
|
21
|
+
Agents should never depend on internal index persistence files as a public API.
|
|
22
22
|
|
|
23
23
|
The installed CLI exposes two equivalent binaries:
|
|
24
24
|
|
|
@@ -180,16 +180,16 @@ Required write behavior:
|
|
|
180
180
|
Good linked note:
|
|
181
181
|
|
|
182
182
|
```bash
|
|
183
|
-
blink add "
|
|
183
|
+
blink add "Index Rebuild" \
|
|
184
184
|
--agent coding-agent \
|
|
185
|
-
--content "
|
|
185
|
+
--content "Derived index artifacts are rebuildable and disposable. Related: [[Architecture]], [[Agent Namespaces]]. #index #architecture #decision"
|
|
186
186
|
blink validate --agent coding-agent
|
|
187
187
|
```
|
|
188
188
|
|
|
189
189
|
Poor disconnected note:
|
|
190
190
|
|
|
191
191
|
```bash
|
|
192
|
-
blink add "
|
|
192
|
+
blink add "Index Rebuild" \
|
|
193
193
|
--agent coding-agent \
|
|
194
194
|
--content "We rebuild old indexes now."
|
|
195
195
|
```
|
|
@@ -460,11 +460,11 @@ If `--mode`/`--limit` are omitted, Brainlink resolves those values from the acti
|
|
|
460
460
|
|
|
461
461
|
Search modes:
|
|
462
462
|
|
|
463
|
-
- `hybrid`: default; combines
|
|
464
|
-
- `fts`: lexical
|
|
465
|
-
- `semantic`: local deterministic embedding similarity
|
|
463
|
+
- `hybrid`: default; combines lexical matching and local embedding similarity.
|
|
464
|
+
- `fts`: lexical full-text matching only.
|
|
465
|
+
- `semantic`: local deterministic embedding similarity.
|
|
466
466
|
|
|
467
|
-
Hybrid results are cached in-memory for a short TTL and invalidated when `.brainlink/
|
|
467
|
+
Hybrid results are cached in-memory for a short TTL and invalidated when `.brainlink/index.json` changes.
|
|
468
468
|
|
|
469
469
|
### Build Agent Context
|
|
470
470
|
|
|
@@ -634,8 +634,7 @@ GET /api/validate
|
|
|
634
634
|
|
|
635
635
|
The HTTP API is read-only. Use the CLI for writes and indexing.
|
|
636
636
|
|
|
637
|
-
|
|
638
|
-
Indexing also writes private encrypted search packs at `.brainlink/search-packs/*.blpk`; when SQLite cannot be opened, Brainlink falls back to pack-based search automatically.
|
|
637
|
+
Indexing writes private encrypted search packs at `.brainlink/search-packs/*.blpk` for resilient retrieval and portability.
|
|
639
638
|
Pack decryption keys are resolved from `$BRAINLINK_HOME/keys` (or `BRAINLINK_SEARCH_PACK_KEY` when explicitly set).
|
|
640
639
|
|
|
641
640
|
## Agent Integration Contract
|
|
@@ -669,9 +668,9 @@ Non-goals:
|
|
|
669
668
|
## Operational Rules
|
|
670
669
|
|
|
671
670
|
- Re-run `index` after modifying notes.
|
|
672
|
-
- Treat `.brainlink/
|
|
673
|
-
- Commit Markdown notes, not local
|
|
674
|
-
- Do not manually edit
|
|
671
|
+
- Treat `.brainlink/index.json` and `.brainlink/search-packs/` as disposable.
|
|
672
|
+
- Commit Markdown notes, not local index files.
|
|
673
|
+
- Do not manually edit generated index artifacts.
|
|
675
674
|
- Keep generated context short enough for the target model.
|
|
676
675
|
- Prefer specific queries over broad queries.
|
|
677
676
|
- Write explicit `[[wiki links]]` when durable memory should be connected.
|
|
@@ -701,9 +700,9 @@ Weak retrieval usually means:
|
|
|
701
700
|
|
|
702
701
|
## Current Limits
|
|
703
702
|
|
|
704
|
-
- Search supports FTS, local semantic embeddings
|
|
703
|
+
- Search supports FTS, local semantic embeddings and hybrid ranking.
|
|
705
704
|
- Local embeddings are deterministic and provider-free; remote embedding providers are not implemented yet.
|
|
706
705
|
- MCP integration is available through the `brainlink-mcp` stdio server.
|
|
707
706
|
- HTTP API is local and unauthenticated.
|
|
708
|
-
- Bucket vaults support S3-compatible `s3://bucket/prefix` URIs and use
|
|
707
|
+
- Bucket vaults support S3-compatible `s3://bucket/prefix` URIs and use local cache/index artifacts.
|
|
709
708
|
- Watch mode depends on platform filesystem watcher behavior and is only supported for local filesystem vaults.
|
package/docs/ARCHITECTURE.md
CHANGED
|
@@ -8,7 +8,7 @@ CLI -> application use cases -> domain functions -> infrastructure adapters
|
|
|
8
8
|
|
|
9
9
|
The core rule is simple:
|
|
10
10
|
|
|
11
|
-
Domain code must not know about the CLI, filesystem, or
|
|
11
|
+
Domain code must not know about the CLI, filesystem, or index persistence format.
|
|
12
12
|
|
|
13
13
|
## Modules
|
|
14
14
|
|
|
@@ -53,14 +53,11 @@ src/
|
|
|
53
53
|
types.ts
|
|
54
54
|
|
|
55
55
|
infrastructure/
|
|
56
|
-
|
|
57
|
-
document-writer.ts
|
|
58
|
-
graph-reader.ts
|
|
59
|
-
schema.ts
|
|
60
|
-
search-reader.ts
|
|
56
|
+
file-index.ts
|
|
61
57
|
file-system-vault.ts
|
|
58
|
+
private-pack-codec.ts
|
|
59
|
+
search-packs.ts
|
|
62
60
|
session-state.ts
|
|
63
|
-
sqlite-index.ts
|
|
64
61
|
|
|
65
62
|
mcp/
|
|
66
63
|
main.ts
|
|
@@ -80,7 +77,6 @@ The domain layer contains pure knowledge rules:
|
|
|
80
77
|
- extract `#tags`
|
|
81
78
|
- split documents into chunks
|
|
82
79
|
- create deterministic local embeddings
|
|
83
|
-
- create deterministic embedding buckets for semantic candidate retrieval
|
|
84
80
|
- calculate cosine similarity
|
|
85
81
|
- estimate token counts
|
|
86
82
|
- select context sections
|
|
@@ -116,12 +112,11 @@ The infrastructure layer handles side effects:
|
|
|
116
112
|
- mirroring S3-compatible bucket Markdown into a local cache
|
|
117
113
|
- writing Markdown notes
|
|
118
114
|
- creating `.brainlink`
|
|
119
|
-
- writing and querying
|
|
120
|
-
- running
|
|
121
|
-
- narrowing semantic candidates through SQLite embedding buckets before cosine scoring
|
|
115
|
+
- writing and querying file-based indexes
|
|
116
|
+
- running lexical, semantic and hybrid retrieval
|
|
122
117
|
|
|
123
|
-
|
|
124
|
-
objects in the bucket remain canonical and
|
|
118
|
+
|
|
119
|
+
Index artifacts are rebuildable and are not canonical storage. For bucket vaults, Markdown objects in the bucket remain canonical and local index files are derived data.
|
|
125
120
|
|
|
126
121
|
## Indexing Flow
|
|
127
122
|
|
|
@@ -132,11 +127,9 @@ read markdown files
|
|
|
132
127
|
-> resolve links
|
|
133
128
|
-> split chunks
|
|
134
129
|
-> create chunk embeddings
|
|
135
|
-
-> reset
|
|
130
|
+
-> reset file index
|
|
136
131
|
-> persist documents, chunks and links
|
|
137
|
-
->
|
|
138
|
-
-> persist embedding vectors
|
|
139
|
-
-> persist embedding buckets
|
|
132
|
+
-> persist chunks, links and embeddings in file index
|
|
140
133
|
```
|
|
141
134
|
|
|
142
135
|
## Retrieval Flow
|
|
@@ -145,7 +138,7 @@ read markdown files
|
|
|
145
138
|
question
|
|
146
139
|
-> selected mode: fts | semantic | hybrid
|
|
147
140
|
-> optional query embedding
|
|
148
|
-
->
|
|
141
|
+
-> lexical scoring and/or semantic cosine scoring
|
|
149
142
|
-> cosine similarity over candidate chunks
|
|
150
143
|
-> ranked chunks with textScore and semanticScore
|
|
151
144
|
-> token-budget selection
|
|
@@ -163,7 +156,7 @@ server command
|
|
|
163
156
|
-> browser renders graph canvas
|
|
164
157
|
```
|
|
165
158
|
|
|
166
|
-
The graph UI is intentionally read-only. Markdown remains the write interface and
|
|
159
|
+
The graph UI is intentionally read-only. Markdown remains the write interface and index artifacts remain derived data.
|
|
167
160
|
|
|
168
161
|
## HTTP API Flow
|
|
169
162
|
|
|
@@ -171,7 +164,7 @@ The graph UI is intentionally read-only. Markdown remains the write interface an
|
|
|
171
164
|
HTTP request
|
|
172
165
|
-> route handler
|
|
173
166
|
-> application use case
|
|
174
|
-
-> filesystem and
|
|
167
|
+
-> filesystem and index adapters
|
|
175
168
|
-> JSON response
|
|
176
169
|
```
|
|
177
170
|
|
|
@@ -282,11 +275,10 @@ vault/agents/<agent-id>/**/*.md
|
|
|
282
275
|
|
|
283
276
|
Rebuildable:
|
|
284
277
|
|
|
285
|
-
- `.brainlink/
|
|
278
|
+
- `.brainlink/index.json`
|
|
279
|
+
- `.brainlink/search-packs/*.blpk`
|
|
286
280
|
- `$BRAINLINK_HOME/bucket-cache`
|
|
287
|
-
- FTS records
|
|
288
281
|
- local embedding vectors
|
|
289
|
-
- local embedding bucket index
|
|
290
282
|
- chunks
|
|
291
283
|
- resolved links
|
|
292
284
|
|
|
@@ -296,13 +288,13 @@ Rebuildable:
|
|
|
296
288
|
|
|
297
289
|
Markdown keeps the system portable, inspectable, Git-friendly, and compatible with Obsidian-like workflows.
|
|
298
290
|
|
|
299
|
-
###
|
|
291
|
+
### File Index As Local Index
|
|
300
292
|
|
|
301
|
-
|
|
293
|
+
Brainlink uses a local JSON index plus encrypted pack exports for fast rebuildable retrieval without external infrastructure.
|
|
302
294
|
Hybrid retrieval also uses a short-lived in-memory cache keyed by vault/query/agent and invalidated by index file mtime to reduce repeated query latency.
|
|
303
|
-
|
|
304
|
-
Indexing additionally exports private encrypted pack files (`.brainlink/search-packs/*.blpk`) from indexed chunks. Search falls back to these packs when SQLite is unavailable, preserving retrieval continuity in degraded mode.
|
|
295
|
+
Indexing exports private encrypted pack files (`.brainlink/search-packs/*.blpk`) from indexed chunks for fast retrieval and recovery continuity.
|
|
305
296
|
Pack encryption keys are resolved from `$BRAINLINK_HOME/keys` or from `BRAINLINK_SEARCH_PACK_KEY` when configured.
|
|
297
|
+
Legacy `.jsonl.gz` search packs are auto-upgraded to `.blpk` on first retrieval flow.
|
|
306
298
|
|
|
307
299
|
### CLI First
|
|
308
300
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@andespindola/brainlink",
|
|
3
|
-
"version": "0.1.0-beta.
|
|
3
|
+
"version": "0.1.0-beta.15",
|
|
4
4
|
"description": "Local-first knowledge memory for agents with Markdown, backlinks, indexing and context retrieval.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -58,12 +58,10 @@
|
|
|
58
58
|
"dependencies": {
|
|
59
59
|
"@aws-sdk/client-s3": "^3.1038.0",
|
|
60
60
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
61
|
-
"better-sqlite3": "^12.9.0",
|
|
62
61
|
"commander": "^14.0.2",
|
|
63
62
|
"zod": "^4.3.6"
|
|
64
63
|
},
|
|
65
64
|
"devDependencies": {
|
|
66
|
-
"@types/better-sqlite3": "^7.6.13",
|
|
67
65
|
"@types/node": "^24.9.2",
|
|
68
66
|
"tsx": "^4.21.0",
|
|
69
67
|
"typescript": "^5.9.3",
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
import { createEmbeddingBuckets } from '../../domain/embeddings.js';
|
|
2
|
-
const toTitleKey = (title) => title.toLowerCase();
|
|
3
|
-
export const createIndexWriter = (database) => ({
|
|
4
|
-
reset: () => {
|
|
5
|
-
database.exec(`
|
|
6
|
-
DELETE FROM embedding_buckets;
|
|
7
|
-
DELETE FROM chunks_fts;
|
|
8
|
-
DELETE FROM links;
|
|
9
|
-
DELETE FROM chunks;
|
|
10
|
-
DELETE FROM documents;
|
|
11
|
-
`);
|
|
12
|
-
},
|
|
13
|
-
saveDocuments: (documents) => {
|
|
14
|
-
const insertDocument = database.prepare(`
|
|
15
|
-
INSERT INTO documents (id, agent_id, title, path, content, tags_json, frontmatter_json, created_at, updated_at)
|
|
16
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
17
|
-
`);
|
|
18
|
-
const insertChunk = database.prepare(`
|
|
19
|
-
INSERT INTO chunks (id, document_id, ordinal, content, token_count, embedding_provider, embedding_json)
|
|
20
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
21
|
-
`);
|
|
22
|
-
const insertChunkFts = database.prepare(`
|
|
23
|
-
INSERT INTO chunks_fts (chunk_id, document_id, agent_id, title, content)
|
|
24
|
-
VALUES (?, ?, ?, ?, ?)
|
|
25
|
-
`);
|
|
26
|
-
const insertEmbeddingBucket = database.prepare(`
|
|
27
|
-
INSERT OR IGNORE INTO embedding_buckets (bucket, chunk_id)
|
|
28
|
-
VALUES (?, ?)
|
|
29
|
-
`);
|
|
30
|
-
const insertLink = database.prepare(`
|
|
31
|
-
INSERT INTO links (from_document_id, to_title, to_title_key, to_document_id, weight, priority)
|
|
32
|
-
VALUES (?, ?, ?, ?, ?, ?)
|
|
33
|
-
`);
|
|
34
|
-
const transaction = database.transaction(() => {
|
|
35
|
-
documents.forEach(({ document, chunks, links }) => {
|
|
36
|
-
insertDocument.run(document.id, document.agentId, document.title, document.path, document.content, JSON.stringify(document.tags), JSON.stringify(document.frontmatter), document.createdAt, document.updatedAt);
|
|
37
|
-
chunks.forEach((chunk) => {
|
|
38
|
-
insertChunk.run(chunk.id, chunk.documentId, chunk.ordinal, chunk.content, chunk.tokenCount, chunk.embeddingProvider, JSON.stringify(chunk.embedding));
|
|
39
|
-
insertChunkFts.run(chunk.id, chunk.documentId, document.agentId, document.title, chunk.content);
|
|
40
|
-
createEmbeddingBuckets(chunk.embedding).forEach((bucket) => insertEmbeddingBucket.run(bucket, chunk.id));
|
|
41
|
-
});
|
|
42
|
-
});
|
|
43
|
-
documents.forEach(({ links }) => {
|
|
44
|
-
links.forEach((link) => {
|
|
45
|
-
insertLink.run(link.fromDocumentId, link.toTitle, toTitleKey(link.toTitle), link.toDocumentId, link.weight, link.priority);
|
|
46
|
-
});
|
|
47
|
-
});
|
|
48
|
-
});
|
|
49
|
-
transaction();
|
|
50
|
-
}
|
|
51
|
-
});
|