@andespindola/brainlink 0.1.0-beta.14 → 0.1.0-beta.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +5 -5
- package/CHANGELOG.md +2 -2
- package/CONTRIBUTING.md +2 -2
- package/README.md +13 -15
- package/SECURITY.md +1 -1
- package/dist/application/analyze-vault.js +1 -15
- package/dist/application/frontend/client-css.js +2 -0
- package/dist/application/frontend/client-js.js +96 -9
- package/dist/application/get-graph-layout.js +2 -2
- package/dist/application/get-graph-node.js +3 -3
- package/dist/application/get-graph-summary.js +3 -3
- package/dist/application/get-graph.js +3 -3
- package/dist/application/index-vault.js +5 -5
- package/dist/application/list-agents.js +3 -3
- package/dist/application/list-links.js +5 -5
- package/dist/application/search-graph-node-ids.js +3 -3
- package/dist/application/search-knowledge.js +4 -5
- package/dist/benchmarks/large-vault.js +1 -1
- package/dist/infrastructure/file-index.js +291 -0
- package/dist/infrastructure/search-packs.js +1 -83
- package/docs/AGENT_USAGE.md +14 -16
- package/docs/ARCHITECTURE.md +19 -28
- package/package.json +1 -3
- package/dist/infrastructure/sqlite/document-writer.js +0 -51
- package/dist/infrastructure/sqlite/graph-reader.js +0 -267
- package/dist/infrastructure/sqlite/recovery.js +0 -163
- package/dist/infrastructure/sqlite/schema.js +0 -114
- package/dist/infrastructure/sqlite/search-reader.js +0 -188
- package/dist/infrastructure/sqlite/types.js +0 -1
- package/dist/infrastructure/sqlite-index.js +0 -38
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
import { mkdir, readFile, rename, writeFile } from 'node:fs/promises';
|
|
2
|
+
import { dirname, join } from 'node:path';
|
|
3
|
+
import { cosineSimilarity } from '../domain/embeddings.js';
|
|
4
|
+
const queryTokenPattern = /[\p{L}\p{N}_-]+/gu;
|
|
5
|
+
const emptyIndex = () => ({
|
|
6
|
+
version: 1,
|
|
7
|
+
updatedAt: new Date().toISOString(),
|
|
8
|
+
documents: [],
|
|
9
|
+
chunks: [],
|
|
10
|
+
links: []
|
|
11
|
+
});
|
|
12
|
+
export const indexStoragePath = (vaultPath) => join(vaultPath, '.brainlink', 'index.json');
|
|
13
|
+
const readIndex = async (vaultPath) => {
|
|
14
|
+
try {
|
|
15
|
+
const parsed = JSON.parse(await readFile(indexStoragePath(vaultPath), 'utf8'));
|
|
16
|
+
return {
|
|
17
|
+
version: 1,
|
|
18
|
+
updatedAt: typeof parsed.updatedAt === 'string' ? parsed.updatedAt : new Date().toISOString(),
|
|
19
|
+
documents: Array.isArray(parsed.documents) ? parsed.documents : [],
|
|
20
|
+
chunks: Array.isArray(parsed.chunks) ? parsed.chunks : [],
|
|
21
|
+
links: Array.isArray(parsed.links) ? parsed.links : []
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
catch (error) {
|
|
25
|
+
if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {
|
|
26
|
+
return emptyIndex();
|
|
27
|
+
}
|
|
28
|
+
return emptyIndex();
|
|
29
|
+
}
|
|
30
|
+
};
|
|
31
|
+
const writeIndex = async (vaultPath, index) => {
|
|
32
|
+
const target = indexStoragePath(vaultPath);
|
|
33
|
+
const temp = `${target}.tmp`;
|
|
34
|
+
await mkdir(dirname(target), { recursive: true, mode: 0o700 });
|
|
35
|
+
await writeFile(temp, `${JSON.stringify(index)}\n`, { encoding: 'utf8', mode: 0o600 });
|
|
36
|
+
await rename(temp, target);
|
|
37
|
+
};
|
|
38
|
+
const normalizeToken = (value) => value
|
|
39
|
+
.normalize('NFKD')
|
|
40
|
+
.replace(/\p{Diacritic}/gu, '')
|
|
41
|
+
.toLowerCase();
|
|
42
|
+
const tokenize = (query) => query
|
|
43
|
+
.match(queryTokenPattern)
|
|
44
|
+
?.map(normalizeToken)
|
|
45
|
+
.filter((token) => token.length > 1) ?? [];
|
|
46
|
+
const countOccurrences = (text, token) => {
|
|
47
|
+
let hits = 0;
|
|
48
|
+
let cursor = 0;
|
|
49
|
+
while (cursor < text.length) {
|
|
50
|
+
const index = text.indexOf(token, cursor);
|
|
51
|
+
if (index < 0) {
|
|
52
|
+
break;
|
|
53
|
+
}
|
|
54
|
+
hits += 1;
|
|
55
|
+
cursor = index + token.length;
|
|
56
|
+
}
|
|
57
|
+
return hits;
|
|
58
|
+
};
|
|
59
|
+
const textScore = (row, tokens) => {
|
|
60
|
+
if (tokens.length === 0) {
|
|
61
|
+
return 0;
|
|
62
|
+
}
|
|
63
|
+
const title = normalizeToken(row.title);
|
|
64
|
+
const path = normalizeToken(row.path);
|
|
65
|
+
const content = normalizeToken(row.content);
|
|
66
|
+
const tags = normalizeToken(row.tags.join(' '));
|
|
67
|
+
return tokens.reduce((score, token) => {
|
|
68
|
+
const titleHits = countOccurrences(title, token);
|
|
69
|
+
const tagHits = countOccurrences(tags, token);
|
|
70
|
+
const pathHits = countOccurrences(path, token);
|
|
71
|
+
const contentHits = countOccurrences(content, token);
|
|
72
|
+
return score + titleHits * 5 + tagHits * 4 + pathHits * 2 + Math.min(contentHits, 6);
|
|
73
|
+
}, 0);
|
|
74
|
+
};
|
|
75
|
+
const semanticScore = (row, queryEmbedding) => queryEmbedding.length > 0 && row.embedding.length > 0 ? cosineSimilarity(queryEmbedding, row.embedding) : 0;
|
|
76
|
+
const toResult = (row, mode, text, semantic) => {
|
|
77
|
+
const score = mode === 'fts' ? text : mode === 'semantic' ? semantic : text + semantic * 8;
|
|
78
|
+
return {
|
|
79
|
+
documentId: row.documentId,
|
|
80
|
+
agentId: row.agentId,
|
|
81
|
+
title: row.title,
|
|
82
|
+
path: row.path,
|
|
83
|
+
chunkId: row.chunkId,
|
|
84
|
+
content: row.content,
|
|
85
|
+
score,
|
|
86
|
+
textScore: text,
|
|
87
|
+
semanticScore: semantic,
|
|
88
|
+
searchMode: mode,
|
|
89
|
+
tags: row.tags
|
|
90
|
+
};
|
|
91
|
+
};
|
|
92
|
+
const toGraphLink = (link, documentsById) => {
|
|
93
|
+
const source = documentsById.get(link.fromDocumentId);
|
|
94
|
+
const target = link.toDocumentId ? documentsById.get(link.toDocumentId) : undefined;
|
|
95
|
+
return {
|
|
96
|
+
agentId: source?.agentId ?? 'shared',
|
|
97
|
+
fromTitle: source?.title ?? 'Unknown',
|
|
98
|
+
fromPath: source?.path ?? 'Unknown',
|
|
99
|
+
toTitle: target?.title ?? link.toTitle,
|
|
100
|
+
toPath: target?.path ?? null,
|
|
101
|
+
weight: link.weight,
|
|
102
|
+
priority: link.priority
|
|
103
|
+
};
|
|
104
|
+
};
|
|
105
|
+
export const openFileIndex = (vaultPath) => {
|
|
106
|
+
const load = async () => readIndex(vaultPath);
|
|
107
|
+
const persist = async (index) => writeIndex(vaultPath, index);
|
|
108
|
+
return {
|
|
109
|
+
reset: async () => {
|
|
110
|
+
await persist(emptyIndex());
|
|
111
|
+
},
|
|
112
|
+
saveDocuments: async (documents) => {
|
|
113
|
+
const chunks = documents.flatMap((document) => document.chunks);
|
|
114
|
+
const links = documents.flatMap((document) => document.links);
|
|
115
|
+
await persist({
|
|
116
|
+
version: 1,
|
|
117
|
+
updatedAt: new Date().toISOString(),
|
|
118
|
+
documents: documents.map((document) => document.document),
|
|
119
|
+
chunks,
|
|
120
|
+
links
|
|
121
|
+
});
|
|
122
|
+
},
|
|
123
|
+
search: async (query, limit, agentId, mode = 'hybrid', queryEmbedding = []) => {
|
|
124
|
+
const index = await load();
|
|
125
|
+
const documentsById = new Map(index.documents.map((document) => [document.id, document]));
|
|
126
|
+
const rows = index.chunks.flatMap((chunk) => {
|
|
127
|
+
const document = documentsById.get(chunk.documentId);
|
|
128
|
+
if (!document) {
|
|
129
|
+
return [];
|
|
130
|
+
}
|
|
131
|
+
if (agentId && document.agentId !== agentId) {
|
|
132
|
+
return [];
|
|
133
|
+
}
|
|
134
|
+
return [
|
|
135
|
+
{
|
|
136
|
+
documentId: document.id,
|
|
137
|
+
agentId: document.agentId,
|
|
138
|
+
title: document.title,
|
|
139
|
+
path: document.path,
|
|
140
|
+
chunkId: chunk.id,
|
|
141
|
+
content: chunk.content,
|
|
142
|
+
tags: document.tags,
|
|
143
|
+
embedding: chunk.embedding
|
|
144
|
+
}
|
|
145
|
+
];
|
|
146
|
+
});
|
|
147
|
+
const tokens = tokenize(query);
|
|
148
|
+
const results = rows
|
|
149
|
+
.map((row) => {
|
|
150
|
+
const text = textScore(row, tokens);
|
|
151
|
+
const semantic = semanticScore(row, queryEmbedding);
|
|
152
|
+
return toResult(row, mode, text, semantic);
|
|
153
|
+
})
|
|
154
|
+
.filter((row) => row.score > 0 || tokens.length === 0)
|
|
155
|
+
.sort((left, right) => right.score - left.score || left.title.localeCompare(right.title))
|
|
156
|
+
.slice(0, Math.max(0, limit));
|
|
157
|
+
return results;
|
|
158
|
+
},
|
|
159
|
+
listLinks: async (agentId) => {
|
|
160
|
+
const index = await load();
|
|
161
|
+
const documentsById = new Map(index.documents.map((document) => [document.id, document]));
|
|
162
|
+
return index.links
|
|
163
|
+
.filter((link) => {
|
|
164
|
+
const source = documentsById.get(link.fromDocumentId);
|
|
165
|
+
return agentId ? source?.agentId === agentId : true;
|
|
166
|
+
})
|
|
167
|
+
.map((link) => toGraphLink(link, documentsById))
|
|
168
|
+
.sort((left, right) => left.fromTitle.localeCompare(right.fromTitle));
|
|
169
|
+
},
|
|
170
|
+
listBacklinks: async (title, agentId) => {
|
|
171
|
+
const index = await load();
|
|
172
|
+
const titleKey = title.toLowerCase();
|
|
173
|
+
const documentsById = new Map(index.documents.map((document) => [document.id, document]));
|
|
174
|
+
return index.links
|
|
175
|
+
.filter((link) => link.toTitle.toLowerCase() === titleKey)
|
|
176
|
+
.filter((link) => {
|
|
177
|
+
const source = documentsById.get(link.fromDocumentId);
|
|
178
|
+
return agentId ? source?.agentId === agentId : true;
|
|
179
|
+
})
|
|
180
|
+
.map((link) => toGraphLink(link, documentsById))
|
|
181
|
+
.sort((left, right) => right.weight - left.weight || left.fromTitle.localeCompare(right.fromTitle));
|
|
182
|
+
},
|
|
183
|
+
getGraph: async (agentId) => {
|
|
184
|
+
const index = await load();
|
|
185
|
+
const documents = agentId ? index.documents.filter((document) => document.agentId === agentId) : index.documents;
|
|
186
|
+
const documentIds = new Set(documents.map((document) => document.id));
|
|
187
|
+
const edges = index.links
|
|
188
|
+
.filter((link) => documentIds.has(link.fromDocumentId))
|
|
189
|
+
.map((link) => ({
|
|
190
|
+
source: link.fromDocumentId,
|
|
191
|
+
target: link.toDocumentId,
|
|
192
|
+
targetTitle: link.toTitle,
|
|
193
|
+
weight: link.weight,
|
|
194
|
+
priority: link.priority
|
|
195
|
+
}));
|
|
196
|
+
return {
|
|
197
|
+
nodes: documents.map((document) => ({
|
|
198
|
+
id: document.id,
|
|
199
|
+
agentId: document.agentId,
|
|
200
|
+
title: document.title,
|
|
201
|
+
path: document.path,
|
|
202
|
+
content: document.content,
|
|
203
|
+
tags: document.tags
|
|
204
|
+
})),
|
|
205
|
+
edges
|
|
206
|
+
};
|
|
207
|
+
},
|
|
208
|
+
getGraphSummary: async (agentId) => {
|
|
209
|
+
const graph = await (async () => {
|
|
210
|
+
const index = await load();
|
|
211
|
+
const documents = agentId ? index.documents.filter((document) => document.agentId === agentId) : index.documents;
|
|
212
|
+
const documentIds = new Set(documents.map((document) => document.id));
|
|
213
|
+
const edges = index.links
|
|
214
|
+
.filter((link) => documentIds.has(link.fromDocumentId))
|
|
215
|
+
.map((link) => ({
|
|
216
|
+
source: link.fromDocumentId,
|
|
217
|
+
target: link.toDocumentId,
|
|
218
|
+
targetTitle: link.toTitle,
|
|
219
|
+
weight: link.weight,
|
|
220
|
+
priority: link.priority
|
|
221
|
+
}));
|
|
222
|
+
return {
|
|
223
|
+
nodes: documents.map((document) => ({
|
|
224
|
+
id: document.id,
|
|
225
|
+
agentId: document.agentId,
|
|
226
|
+
title: document.title,
|
|
227
|
+
path: document.path,
|
|
228
|
+
content: '',
|
|
229
|
+
tags: document.tags
|
|
230
|
+
})),
|
|
231
|
+
edges
|
|
232
|
+
};
|
|
233
|
+
})();
|
|
234
|
+
return graph;
|
|
235
|
+
},
|
|
236
|
+
getGraphNode: async (id, agentId) => {
|
|
237
|
+
const index = await load();
|
|
238
|
+
const document = index.documents.find((row) => row.id === id && (!agentId || row.agentId === agentId));
|
|
239
|
+
return document
|
|
240
|
+
? {
|
|
241
|
+
id: document.id,
|
|
242
|
+
agentId: document.agentId,
|
|
243
|
+
title: document.title,
|
|
244
|
+
path: document.path,
|
|
245
|
+
content: document.content,
|
|
246
|
+
tags: document.tags
|
|
247
|
+
}
|
|
248
|
+
: undefined;
|
|
249
|
+
},
|
|
250
|
+
searchGraphNodeIds: async (query, limit, agentId) => {
|
|
251
|
+
const index = await load();
|
|
252
|
+
const normalized = normalizeToken(query);
|
|
253
|
+
if (normalized.length === 0 || limit <= 0) {
|
|
254
|
+
return [];
|
|
255
|
+
}
|
|
256
|
+
const tokens = tokenize(query);
|
|
257
|
+
const scored = index.documents
|
|
258
|
+
.filter((document) => (!agentId || document.agentId === agentId))
|
|
259
|
+
.map((document) => {
|
|
260
|
+
const score = textScore({
|
|
261
|
+
documentId: document.id,
|
|
262
|
+
agentId: document.agentId,
|
|
263
|
+
title: document.title,
|
|
264
|
+
path: document.path,
|
|
265
|
+
chunkId: document.id,
|
|
266
|
+
content: document.content,
|
|
267
|
+
tags: document.tags,
|
|
268
|
+
embedding: []
|
|
269
|
+
}, tokens);
|
|
270
|
+
return { id: document.id, score };
|
|
271
|
+
})
|
|
272
|
+
.filter((row) => row.score > 0)
|
|
273
|
+
.sort((left, right) => right.score - left.score || left.id.localeCompare(right.id))
|
|
274
|
+
.slice(0, limit);
|
|
275
|
+
return scored.map((row) => row.id);
|
|
276
|
+
},
|
|
277
|
+
listAgents: async () => {
|
|
278
|
+
const index = await load();
|
|
279
|
+
const counts = index.documents.reduce((state, document) => {
|
|
280
|
+
state.set(document.agentId, (state.get(document.agentId) ?? 0) + 1);
|
|
281
|
+
return state;
|
|
282
|
+
}, new Map());
|
|
283
|
+
return Array.from(counts.entries())
|
|
284
|
+
.sort((left, right) => left[0].localeCompare(right[0]))
|
|
285
|
+
.map(([id, documentCount]) => ({ id, documentCount }));
|
|
286
|
+
},
|
|
287
|
+
close: () => {
|
|
288
|
+
// File-based index has no persistent connection.
|
|
289
|
+
}
|
|
290
|
+
};
|
|
291
|
+
};
|
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
import Database from 'better-sqlite3';
|
|
2
1
|
import { gunzipSync } from 'node:zlib';
|
|
3
2
|
import { mkdir, readdir, readFile, rm, writeFile } from 'node:fs/promises';
|
|
4
3
|
import { join } from 'node:path';
|
|
5
|
-
import { existsSync } from 'node:fs';
|
|
6
4
|
import { decodePrivatePack, encodePrivatePack, isPrivatePackPayload } from './private-pack-codec.js';
|
|
7
5
|
const packsDirectoryName = 'search-packs';
|
|
8
6
|
const manifestFileName = 'manifest.json';
|
|
@@ -10,7 +8,6 @@ const rowChunkSize = 5_000;
|
|
|
10
8
|
const queryTokenPattern = /[\p{L}\p{N}_-]+/gu;
|
|
11
9
|
const toPackDirectory = (vaultPath) => join(vaultPath, '.brainlink', packsDirectoryName);
|
|
12
10
|
const toManifestPath = (vaultPath) => join(toPackDirectory(vaultPath), manifestFileName);
|
|
13
|
-
const toDatabasePath = (vaultPath) => join(vaultPath, '.brainlink', 'brainlink.db');
|
|
14
11
|
const parseRowsFromPack = async (vaultPath, content) => {
|
|
15
12
|
const raw = isPrivatePackPayload(content) ? await decodePrivatePack(vaultPath, content) : gunzipSync(content);
|
|
16
13
|
return raw
|
|
@@ -32,15 +29,6 @@ const toRows = (documents) => documents.flatMap((document) => document.chunks.ma
|
|
|
32
29
|
const writeManifest = async (vaultPath, manifest) => {
|
|
33
30
|
await writeFile(toManifestPath(vaultPath), `${JSON.stringify(manifest, null, 2)}\n`, 'utf8');
|
|
34
31
|
};
|
|
35
|
-
const parseTags = (value) => {
|
|
36
|
-
try {
|
|
37
|
-
const parsed = JSON.parse(value);
|
|
38
|
-
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === 'string') : [];
|
|
39
|
-
}
|
|
40
|
-
catch {
|
|
41
|
-
return [];
|
|
42
|
-
}
|
|
43
|
-
};
|
|
44
32
|
const chunkRows = (rows, size) => {
|
|
45
33
|
const chunks = [];
|
|
46
34
|
for (let index = 0; index < rows.length; index += size) {
|
|
@@ -140,67 +128,6 @@ const writeRowsAsPrivatePacks = async (vaultPath, rows, clearExisting) => {
|
|
|
140
128
|
recordCount: rows.length
|
|
141
129
|
};
|
|
142
130
|
};
|
|
143
|
-
const tableExists = (database, table) => {
|
|
144
|
-
const row = database.prepare("SELECT name FROM sqlite_master WHERE type = 'table' AND name = ?").get(table);
|
|
145
|
-
return row?.name === table;
|
|
146
|
-
};
|
|
147
|
-
const tableColumns = (database, table) => {
|
|
148
|
-
const rows = database.prepare(`SELECT name FROM pragma_table_info('${table.replaceAll("'", "''")}')`).all();
|
|
149
|
-
return new Set(rows.map((row) => row.name));
|
|
150
|
-
};
|
|
151
|
-
const loadRowsFromLegacySqlite = (vaultPath) => {
|
|
152
|
-
const databasePath = toDatabasePath(vaultPath);
|
|
153
|
-
if (!existsSync(databasePath)) {
|
|
154
|
-
return [];
|
|
155
|
-
}
|
|
156
|
-
const database = new Database(databasePath, { readonly: true, fileMustExist: true });
|
|
157
|
-
try {
|
|
158
|
-
if (!tableExists(database, 'documents') || !tableExists(database, 'chunks')) {
|
|
159
|
-
return [];
|
|
160
|
-
}
|
|
161
|
-
const documentColumns = tableColumns(database, 'documents');
|
|
162
|
-
const chunkColumns = tableColumns(database, 'chunks');
|
|
163
|
-
if (!documentColumns.has('id') || !documentColumns.has('title') || !chunkColumns.has('document_id')) {
|
|
164
|
-
return [];
|
|
165
|
-
}
|
|
166
|
-
const agentExpr = documentColumns.has('agent_id') ? 'documents.agent_id' : "'shared'";
|
|
167
|
-
const pathExpr = documentColumns.has('path') ? 'documents.path' : "documents.title";
|
|
168
|
-
const tagsExpr = documentColumns.has('tags_json') ? 'documents.tags_json' : "'[]'";
|
|
169
|
-
const chunkIdExpr = chunkColumns.has('id') ? 'chunks.id' : "documents.id || ':' || chunks.rowid";
|
|
170
|
-
const chunkContentExpr = chunkColumns.has('content')
|
|
171
|
-
? 'chunks.content'
|
|
172
|
-
: documentColumns.has('content')
|
|
173
|
-
? 'documents.content'
|
|
174
|
-
: "''";
|
|
175
|
-
const chunkOrderExpr = chunkColumns.has('ordinal') ? 'chunks.ordinal' : 'chunks.rowid';
|
|
176
|
-
const statement = database.prepare(`
|
|
177
|
-
SELECT
|
|
178
|
-
documents.id AS document_id,
|
|
179
|
-
${agentExpr} AS agent_id,
|
|
180
|
-
documents.title AS title,
|
|
181
|
-
${pathExpr} AS path,
|
|
182
|
-
${chunkIdExpr} AS chunk_id,
|
|
183
|
-
${chunkContentExpr} AS content,
|
|
184
|
-
${tagsExpr} AS tags_json
|
|
185
|
-
FROM chunks
|
|
186
|
-
JOIN documents ON documents.id = chunks.document_id
|
|
187
|
-
ORDER BY documents.title, ${chunkOrderExpr}
|
|
188
|
-
`);
|
|
189
|
-
const rows = statement.all();
|
|
190
|
-
return rows.map((row) => ({
|
|
191
|
-
documentId: row.document_id,
|
|
192
|
-
agentId: typeof row.agent_id === 'string' && row.agent_id.length > 0 ? row.agent_id : 'shared',
|
|
193
|
-
title: row.title,
|
|
194
|
-
path: row.path,
|
|
195
|
-
chunkId: row.chunk_id,
|
|
196
|
-
content: row.content ?? '',
|
|
197
|
-
tags: parseTags(row.tags_json)
|
|
198
|
-
}));
|
|
199
|
-
}
|
|
200
|
-
finally {
|
|
201
|
-
database.close();
|
|
202
|
-
}
|
|
203
|
-
};
|
|
204
131
|
export const buildSearchPacks = async (vaultPath, documents) => {
|
|
205
132
|
return writeRowsAsPrivatePacks(vaultPath, toRows(documents), true);
|
|
206
133
|
};
|
|
@@ -223,16 +150,7 @@ export const ensurePrivatePacksFromLegacyIndex = async (vaultPath) => {
|
|
|
223
150
|
...report
|
|
224
151
|
};
|
|
225
152
|
}
|
|
226
|
-
|
|
227
|
-
if (legacyRows.length === 0) {
|
|
228
|
-
return { imported: false };
|
|
229
|
-
}
|
|
230
|
-
const report = await writeRowsAsPrivatePacks(vaultPath, legacyRows, true);
|
|
231
|
-
return {
|
|
232
|
-
imported: true,
|
|
233
|
-
source: 'legacy-sqlite',
|
|
234
|
-
...report
|
|
235
|
-
};
|
|
153
|
+
return { imported: false };
|
|
236
154
|
};
|
|
237
155
|
export const searchInPacks = async (vaultPath, query, limit, agentId) => {
|
|
238
156
|
const normalizedAgent = agentId?.trim();
|
package/docs/AGENT_USAGE.md
CHANGED
|
@@ -18,7 +18,7 @@ The correct dependency direction is:
|
|
|
18
18
|
agent -> Brainlink CLI -> Markdown vault + derived index
|
|
19
19
|
```
|
|
20
20
|
|
|
21
|
-
Agents should never depend on
|
|
21
|
+
Agents should never depend on internal index persistence files as a public API.
|
|
22
22
|
|
|
23
23
|
The installed CLI exposes two equivalent binaries:
|
|
24
24
|
|
|
@@ -180,16 +180,16 @@ Required write behavior:
|
|
|
180
180
|
Good linked note:
|
|
181
181
|
|
|
182
182
|
```bash
|
|
183
|
-
blink add "
|
|
183
|
+
blink add "Index Rebuild" \
|
|
184
184
|
--agent coding-agent \
|
|
185
|
-
--content "
|
|
185
|
+
--content "Derived index artifacts are rebuildable and disposable. Related: [[Architecture]], [[Agent Namespaces]]. #index #architecture #decision"
|
|
186
186
|
blink validate --agent coding-agent
|
|
187
187
|
```
|
|
188
188
|
|
|
189
189
|
Poor disconnected note:
|
|
190
190
|
|
|
191
191
|
```bash
|
|
192
|
-
blink add "
|
|
192
|
+
blink add "Index Rebuild" \
|
|
193
193
|
--agent coding-agent \
|
|
194
194
|
--content "We rebuild old indexes now."
|
|
195
195
|
```
|
|
@@ -460,11 +460,11 @@ If `--mode`/`--limit` are omitted, Brainlink resolves those values from the acti
|
|
|
460
460
|
|
|
461
461
|
Search modes:
|
|
462
462
|
|
|
463
|
-
- `hybrid`: default; combines
|
|
464
|
-
- `fts`: lexical
|
|
465
|
-
- `semantic`: local deterministic embedding similarity
|
|
463
|
+
- `hybrid`: default; combines lexical matching and local embedding similarity.
|
|
464
|
+
- `fts`: lexical full-text matching only.
|
|
465
|
+
- `semantic`: local deterministic embedding similarity.
|
|
466
466
|
|
|
467
|
-
Hybrid results are cached in-memory for a short TTL and invalidated when `.brainlink/
|
|
467
|
+
Hybrid results are cached in-memory for a short TTL and invalidated when `.brainlink/index.json` changes.
|
|
468
468
|
|
|
469
469
|
### Build Agent Context
|
|
470
470
|
|
|
@@ -634,10 +634,8 @@ GET /api/validate
|
|
|
634
634
|
|
|
635
635
|
The HTTP API is read-only. Use the CLI for writes and indexing.
|
|
636
636
|
|
|
637
|
-
|
|
638
|
-
Indexing also writes private encrypted search packs at `.brainlink/search-packs/*.blpk`; when SQLite cannot be opened, Brainlink falls back to pack-based search automatically.
|
|
637
|
+
Indexing writes private encrypted search packs at `.brainlink/search-packs/*.blpk` for resilient retrieval and portability.
|
|
639
638
|
Pack decryption keys are resolved from `$BRAINLINK_HOME/keys` (or `BRAINLINK_SEARCH_PACK_KEY` when explicitly set).
|
|
640
|
-
For legacy installations, when SQLite already exists but private packs are missing, Brainlink auto-imports index context rows from `brainlink.db` into `.blpk` on first retrieval.
|
|
641
639
|
|
|
642
640
|
## Agent Integration Contract
|
|
643
641
|
|
|
@@ -670,9 +668,9 @@ Non-goals:
|
|
|
670
668
|
## Operational Rules
|
|
671
669
|
|
|
672
670
|
- Re-run `index` after modifying notes.
|
|
673
|
-
- Treat `.brainlink/
|
|
674
|
-
- Commit Markdown notes, not local
|
|
675
|
-
- Do not manually edit
|
|
671
|
+
- Treat `.brainlink/index.json` and `.brainlink/search-packs/` as disposable.
|
|
672
|
+
- Commit Markdown notes, not local index files.
|
|
673
|
+
- Do not manually edit generated index artifacts.
|
|
676
674
|
- Keep generated context short enough for the target model.
|
|
677
675
|
- Prefer specific queries over broad queries.
|
|
678
676
|
- Write explicit `[[wiki links]]` when durable memory should be connected.
|
|
@@ -702,9 +700,9 @@ Weak retrieval usually means:
|
|
|
702
700
|
|
|
703
701
|
## Current Limits
|
|
704
702
|
|
|
705
|
-
- Search supports FTS, local semantic embeddings
|
|
703
|
+
- Search supports FTS, local semantic embeddings and hybrid ranking.
|
|
706
704
|
- Local embeddings are deterministic and provider-free; remote embedding providers are not implemented yet.
|
|
707
705
|
- MCP integration is available through the `brainlink-mcp` stdio server.
|
|
708
706
|
- HTTP API is local and unauthenticated.
|
|
709
|
-
- Bucket vaults support S3-compatible `s3://bucket/prefix` URIs and use
|
|
707
|
+
- Bucket vaults support S3-compatible `s3://bucket/prefix` URIs and use local cache/index artifacts.
|
|
710
708
|
- Watch mode depends on platform filesystem watcher behavior and is only supported for local filesystem vaults.
|
package/docs/ARCHITECTURE.md
CHANGED
|
@@ -8,7 +8,7 @@ CLI -> application use cases -> domain functions -> infrastructure adapters
|
|
|
8
8
|
|
|
9
9
|
The core rule is simple:
|
|
10
10
|
|
|
11
|
-
Domain code must not know about the CLI, filesystem, or
|
|
11
|
+
Domain code must not know about the CLI, filesystem, or index persistence format.
|
|
12
12
|
|
|
13
13
|
## Modules
|
|
14
14
|
|
|
@@ -53,14 +53,11 @@ src/
|
|
|
53
53
|
types.ts
|
|
54
54
|
|
|
55
55
|
infrastructure/
|
|
56
|
-
|
|
57
|
-
document-writer.ts
|
|
58
|
-
graph-reader.ts
|
|
59
|
-
schema.ts
|
|
60
|
-
search-reader.ts
|
|
56
|
+
file-index.ts
|
|
61
57
|
file-system-vault.ts
|
|
58
|
+
private-pack-codec.ts
|
|
59
|
+
search-packs.ts
|
|
62
60
|
session-state.ts
|
|
63
|
-
sqlite-index.ts
|
|
64
61
|
|
|
65
62
|
mcp/
|
|
66
63
|
main.ts
|
|
@@ -80,7 +77,6 @@ The domain layer contains pure knowledge rules:
|
|
|
80
77
|
- extract `#tags`
|
|
81
78
|
- split documents into chunks
|
|
82
79
|
- create deterministic local embeddings
|
|
83
|
-
- create deterministic embedding buckets for semantic candidate retrieval
|
|
84
80
|
- calculate cosine similarity
|
|
85
81
|
- estimate token counts
|
|
86
82
|
- select context sections
|
|
@@ -116,12 +112,11 @@ The infrastructure layer handles side effects:
|
|
|
116
112
|
- mirroring S3-compatible bucket Markdown into a local cache
|
|
117
113
|
- writing Markdown notes
|
|
118
114
|
- creating `.brainlink`
|
|
119
|
-
- writing and querying
|
|
120
|
-
- running
|
|
121
|
-
- narrowing semantic candidates through SQLite embedding buckets before cosine scoring
|
|
115
|
+
- writing and querying file-based indexes
|
|
116
|
+
- running lexical, semantic and hybrid retrieval
|
|
122
117
|
|
|
123
|
-
|
|
124
|
-
objects in the bucket remain canonical and
|
|
118
|
+
|
|
119
|
+
Index artifacts are rebuildable and are not canonical storage. For bucket vaults, Markdown objects in the bucket remain canonical and local index files are derived data.
|
|
125
120
|
|
|
126
121
|
## Indexing Flow
|
|
127
122
|
|
|
@@ -132,11 +127,9 @@ read markdown files
|
|
|
132
127
|
-> resolve links
|
|
133
128
|
-> split chunks
|
|
134
129
|
-> create chunk embeddings
|
|
135
|
-
-> reset
|
|
130
|
+
-> reset file index
|
|
136
131
|
-> persist documents, chunks and links
|
|
137
|
-
->
|
|
138
|
-
-> persist embedding vectors
|
|
139
|
-
-> persist embedding buckets
|
|
132
|
+
-> persist chunks, links and embeddings in file index
|
|
140
133
|
```
|
|
141
134
|
|
|
142
135
|
## Retrieval Flow
|
|
@@ -145,7 +138,7 @@ read markdown files
|
|
|
145
138
|
question
|
|
146
139
|
-> selected mode: fts | semantic | hybrid
|
|
147
140
|
-> optional query embedding
|
|
148
|
-
->
|
|
141
|
+
-> lexical scoring and/or semantic cosine scoring
|
|
149
142
|
-> cosine similarity over candidate chunks
|
|
150
143
|
-> ranked chunks with textScore and semanticScore
|
|
151
144
|
-> token-budget selection
|
|
@@ -163,7 +156,7 @@ server command
|
|
|
163
156
|
-> browser renders graph canvas
|
|
164
157
|
```
|
|
165
158
|
|
|
166
|
-
The graph UI is intentionally read-only. Markdown remains the write interface and
|
|
159
|
+
The graph UI is intentionally read-only. Markdown remains the write interface and index artifacts remain derived data.
|
|
167
160
|
|
|
168
161
|
## HTTP API Flow
|
|
169
162
|
|
|
@@ -171,7 +164,7 @@ The graph UI is intentionally read-only. Markdown remains the write interface an
|
|
|
171
164
|
HTTP request
|
|
172
165
|
-> route handler
|
|
173
166
|
-> application use case
|
|
174
|
-
-> filesystem and
|
|
167
|
+
-> filesystem and index adapters
|
|
175
168
|
-> JSON response
|
|
176
169
|
```
|
|
177
170
|
|
|
@@ -282,11 +275,10 @@ vault/agents/<agent-id>/**/*.md
|
|
|
282
275
|
|
|
283
276
|
Rebuildable:
|
|
284
277
|
|
|
285
|
-
- `.brainlink/
|
|
278
|
+
- `.brainlink/index.json`
|
|
279
|
+
- `.brainlink/search-packs/*.blpk`
|
|
286
280
|
- `$BRAINLINK_HOME/bucket-cache`
|
|
287
|
-
- FTS records
|
|
288
281
|
- local embedding vectors
|
|
289
|
-
- local embedding bucket index
|
|
290
282
|
- chunks
|
|
291
283
|
- resolved links
|
|
292
284
|
|
|
@@ -296,14 +288,13 @@ Rebuildable:
|
|
|
296
288
|
|
|
297
289
|
Markdown keeps the system portable, inspectable, Git-friendly, and compatible with Obsidian-like workflows.
|
|
298
290
|
|
|
299
|
-
###
|
|
291
|
+
### File Index As Local Index
|
|
300
292
|
|
|
301
|
-
|
|
293
|
+
Brainlink uses a local JSON index plus encrypted pack exports for fast rebuildable retrieval without external infrastructure.
|
|
302
294
|
Hybrid retrieval also uses a short-lived in-memory cache keyed by vault/query/agent and invalidated by index file mtime to reduce repeated query latency.
|
|
303
|
-
|
|
304
|
-
Indexing additionally exports private encrypted pack files (`.brainlink/search-packs/*.blpk`) from indexed chunks. Search falls back to these packs when SQLite is unavailable, preserving retrieval continuity in degraded mode.
|
|
295
|
+
Indexing exports private encrypted pack files (`.brainlink/search-packs/*.blpk`) from indexed chunks for fast retrieval and recovery continuity.
|
|
305
296
|
Pack encryption keys are resolved from `$BRAINLINK_HOME/keys` or from `BRAINLINK_SEARCH_PACK_KEY` when configured.
|
|
306
|
-
Legacy
|
|
297
|
+
Legacy `.jsonl.gz` search packs are auto-upgraded to `.blpk` on first retrieval flow.
|
|
307
298
|
|
|
308
299
|
### CLI First
|
|
309
300
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@andespindola/brainlink",
|
|
3
|
-
"version": "0.1.0-beta.
|
|
3
|
+
"version": "0.1.0-beta.16",
|
|
4
4
|
"description": "Local-first knowledge memory for agents with Markdown, backlinks, indexing and context retrieval.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -58,12 +58,10 @@
|
|
|
58
58
|
"dependencies": {
|
|
59
59
|
"@aws-sdk/client-s3": "^3.1038.0",
|
|
60
60
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
61
|
-
"better-sqlite3": "^12.9.0",
|
|
62
61
|
"commander": "^14.0.2",
|
|
63
62
|
"zod": "^4.3.6"
|
|
64
63
|
},
|
|
65
64
|
"devDependencies": {
|
|
66
|
-
"@types/better-sqlite3": "^7.6.13",
|
|
67
65
|
"@types/node": "^24.9.2",
|
|
68
66
|
"tsx": "^4.21.0",
|
|
69
67
|
"typescript": "^5.9.3",
|