@andespindola/brainlink 0.1.0-beta.11 → 0.1.0-beta.110

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/AGENTS.md +8 -5
  2. package/CHANGELOG.md +26 -2
  3. package/CONTRIBUTING.md +2 -2
  4. package/COPYRIGHT.md +5 -0
  5. package/README.md +143 -18
  6. package/SECURITY.md +1 -1
  7. package/dist/application/analyze-vault.js +1 -9
  8. package/dist/application/build-context.js +56 -1
  9. package/dist/application/dedupe-notes.js +226 -0
  10. package/dist/application/frontend/client-css.js +93 -45
  11. package/dist/application/frontend/client-html.js +34 -25
  12. package/dist/application/frontend/client-js.js +3504 -132
  13. package/dist/application/frontend/client-worker-js.js +66 -0
  14. package/dist/application/get-graph-layout.js +2 -2
  15. package/dist/application/get-graph-node.js +3 -3
  16. package/dist/application/get-graph-summary.js +3 -3
  17. package/dist/application/get-graph.js +3 -3
  18. package/dist/application/import-legacy-sqlite.js +296 -0
  19. package/dist/application/index-vault.js +250 -24
  20. package/dist/application/list-agents.js +3 -3
  21. package/dist/application/list-links.js +5 -5
  22. package/dist/application/offline-pack-backup.js +44 -0
  23. package/dist/application/search-graph-node-ids.js +3 -3
  24. package/dist/application/search-knowledge.js +6 -6
  25. package/dist/application/server/routes.js +76 -1
  26. package/dist/application/start-server.js +75 -4
  27. package/dist/application/watch-vault.js +23 -2
  28. package/dist/benchmarks/large-vault.js +1 -1
  29. package/dist/cli/commands/agent-commands.js +7 -0
  30. package/dist/cli/commands/write-commands.js +818 -8
  31. package/dist/domain/context.js +53 -11
  32. package/dist/domain/graph-layout.js +47 -2
  33. package/dist/domain/middle-out.js +18 -0
  34. package/dist/infrastructure/config.js +38 -0
  35. package/dist/infrastructure/file-index.js +358 -0
  36. package/dist/infrastructure/file-system-vault.js +15 -0
  37. package/dist/infrastructure/index-state.js +56 -0
  38. package/dist/infrastructure/private-pack-codec.js +134 -0
  39. package/dist/infrastructure/search-packs.js +327 -26
  40. package/dist/mcp/server.js +11 -1
  41. package/dist/mcp/tools.js +62 -0
  42. package/docs/AGENT_USAGE.md +97 -17
  43. package/docs/ARCHITECTURE.md +23 -27
  44. package/docs/QUICKSTART.md +7 -0
  45. package/package.json +6 -4
  46. package/dist/infrastructure/sqlite/document-writer.js +0 -51
  47. package/dist/infrastructure/sqlite/graph-reader.js +0 -267
  48. package/dist/infrastructure/sqlite/recovery.js +0 -83
  49. package/dist/infrastructure/sqlite/schema.js +0 -114
  50. package/dist/infrastructure/sqlite/search-reader.js +0 -188
  51. package/dist/infrastructure/sqlite/types.js +0 -1
  52. package/dist/infrastructure/sqlite-index.js +0 -38
@@ -0,0 +1,134 @@
1
+ import { createCipheriv, createDecipheriv, createHash, randomBytes } from 'node:crypto';
2
+ import { brotliCompressSync, brotliDecompressSync, constants as zlibConstants } from 'node:zlib';
3
+ import { mkdir, readFile, writeFile } from 'node:fs/promises';
4
+ import { dirname, join } from 'node:path';
5
+ import { getBrainlinkHomePath } from './paths.js';
6
+ const magic = Buffer.from('BLPK2', 'ascii');
7
+ const legacyVersion = 1;
8
+ const currentVersion = 2;
9
+ const nonceLength = 12;
10
+ const authTagLength = 16;
11
+ const algorithm = 'aes-256-gcm';
12
+ const compressionLevelMask = 0x0f;
13
+ const compressionDictionaryMask = 0x10;
14
+ const defaultCompressionLevel = 5;
15
+ const builtinDictionary = Buffer.from([
16
+ '"documentId","agentId","title","path","chunkId","chunkOrdinal","content","tags"',
17
+ '"searchMode","textScore","semanticScore","weight","priority","shared"',
18
+ 'agents/shared memory-hub architecture context index search graph markdown tags links',
19
+ '#memory #architecture #context #graph #search #index [[Memory Hub]] [[Architecture]]',
20
+ 'The quick brown fox jumps over the lazy dog. Brainlink context package metadata.',
21
+ 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-:/.#[]{}(), '
22
+ ].join('\n'), 'utf8');
23
+ const keyFilePath = (vaultPath) => {
24
+ const vaultHash = createHash('sha256').update(vaultPath).digest('hex').slice(0, 24);
25
+ return join(getBrainlinkHomePath(), 'keys', `search-pack-${vaultHash}.key`);
26
+ };
27
+ const deriveKeyFromSecret = (secret) => createHash('sha256').update(secret, 'utf8').digest();
28
+ const readOrCreateKey = async (vaultPath) => {
29
+ const envSecret = process.env.BRAINLINK_SEARCH_PACK_KEY?.trim();
30
+ if (envSecret && envSecret.length > 0) {
31
+ return deriveKeyFromSecret(envSecret);
32
+ }
33
+ const path = keyFilePath(vaultPath);
34
+ try {
35
+ const existing = (await readFile(path, 'utf8')).trim();
36
+ if (existing.length > 0) {
37
+ return deriveKeyFromSecret(existing);
38
+ }
39
+ }
40
+ catch (error) {
41
+ if (!(error instanceof Error) || !('code' in error) || error.code !== 'ENOENT') {
42
+ throw error;
43
+ }
44
+ }
45
+ const secret = randomBytes(48).toString('base64url');
46
+ await mkdir(dirname(path), { recursive: true, mode: 0o700 });
47
+ await writeFile(path, `${secret}\n`, { encoding: 'utf8', mode: 0o600 });
48
+ return deriveKeyFromSecret(secret);
49
+ };
50
+ const parseHeader = (payload) => {
51
+ if (payload.length < magic.length + 1 + nonceLength + authTagLength) {
52
+ throw new Error('Invalid private pack payload: too short.');
53
+ }
54
+ const payloadMagic = payload.subarray(0, magic.length);
55
+ const payloadVersion = payload[magic.length] ?? 0;
56
+ if (!payloadMagic.equals(magic) || (payloadVersion !== legacyVersion && payloadVersion !== currentVersion)) {
57
+ throw new Error('Invalid private pack payload: unsupported format.');
58
+ }
59
+ const hasCompressionSettings = payloadVersion >= 2;
60
+ const settingsByte = hasCompressionSettings ? payload[magic.length + 1] ?? 0 : null;
61
+ const nonceStart = magic.length + 1 + (hasCompressionSettings ? 1 : 0);
62
+ const authTagStart = nonceStart + nonceLength;
63
+ const dataStart = authTagStart + authTagLength;
64
+ return {
65
+ compression: settingsByte != null
66
+ ? {
67
+ compressionLevel: settingsByte & compressionLevelMask,
68
+ useDictionary: (settingsByte & compressionDictionaryMask) !== 0
69
+ }
70
+ : {
71
+ compressionLevel: defaultCompressionLevel,
72
+ useDictionary: false
73
+ },
74
+ nonce: payload.subarray(nonceStart, authTagStart),
75
+ authTag: payload.subarray(authTagStart, dataStart),
76
+ ciphertext: payload.subarray(dataStart)
77
+ };
78
+ };
79
+ const toCompressionLevel = (value) => {
80
+ if (typeof value !== 'number' || !Number.isFinite(value)) {
81
+ return defaultCompressionLevel;
82
+ }
83
+ const normalized = Math.round(value);
84
+ if (normalized < 0) {
85
+ return 0;
86
+ }
87
+ if (normalized > 11) {
88
+ return 11;
89
+ }
90
+ return normalized;
91
+ };
92
+ const encodeCompressionSettings = (settings) => (settings.compressionLevel & compressionLevelMask) | (settings.useDictionary ? compressionDictionaryMask : 0);
93
+ const brotliEncode = (content, settings) => {
94
+ const options = {
95
+ params: {
96
+ [zlibConstants.BROTLI_PARAM_MODE]: zlibConstants.BROTLI_MODE_TEXT,
97
+ [zlibConstants.BROTLI_PARAM_QUALITY]: settings.compressionLevel
98
+ }
99
+ };
100
+ if (settings.useDictionary) {
101
+ options.dictionary = builtinDictionary;
102
+ }
103
+ return brotliCompressSync(content, options);
104
+ };
105
+ const brotliDecode = (content, settings) => {
106
+ const options = {};
107
+ if (settings.useDictionary) {
108
+ options.dictionary = builtinDictionary;
109
+ }
110
+ return brotliDecompressSync(content, options);
111
+ };
112
+ export const encodePrivatePack = async (vaultPath, content, settings) => {
113
+ const key = await readOrCreateKey(vaultPath);
114
+ const nonce = randomBytes(nonceLength);
115
+ const normalizedSettings = {
116
+ compressionLevel: toCompressionLevel(settings?.compressionLevel),
117
+ useDictionary: settings?.useDictionary ?? true
118
+ };
119
+ const compressed = brotliEncode(content, normalizedSettings);
120
+ const cipher = createCipheriv(algorithm, key, nonce);
121
+ const ciphertext = Buffer.concat([cipher.update(compressed), cipher.final()]);
122
+ const authTag = cipher.getAuthTag();
123
+ const settingsByte = Buffer.from([encodeCompressionSettings(normalizedSettings)]);
124
+ return Buffer.concat([magic, Buffer.from([currentVersion]), settingsByte, nonce, authTag, ciphertext]);
125
+ };
126
+ export const decodePrivatePack = async (vaultPath, payload) => {
127
+ const key = await readOrCreateKey(vaultPath);
128
+ const { nonce, authTag, ciphertext, compression } = parseHeader(payload);
129
+ const decipher = createDecipheriv(algorithm, key, nonce);
130
+ decipher.setAuthTag(authTag);
131
+ const compressed = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
132
+ return brotliDecode(compressed, compression);
133
+ };
134
+ export const isPrivatePackPayload = (payload) => payload.length >= magic.length + 1 && payload.subarray(0, magic.length).equals(magic);
@@ -1,30 +1,173 @@
1
- import { gunzipSync, gzipSync } from 'node:zlib';
1
+ import { gunzipSync } from 'node:zlib';
2
2
  import { mkdir, readdir, readFile, rm, writeFile } from 'node:fs/promises';
3
3
  import { join } from 'node:path';
4
+ import { middleOutIndices } from '../domain/middle-out.js';
5
+ import { decodePrivatePack, encodePrivatePack, isPrivatePackPayload } from './private-pack-codec.js';
4
6
  const packsDirectoryName = 'search-packs';
5
7
  const manifestFileName = 'manifest.json';
6
- const rowChunkSize = 5_000;
8
+ const defaultBuildOptions = {
9
+ rowChunkSize: 5_000,
10
+ compressionLevel: 5,
11
+ useDictionary: true
12
+ };
7
13
  const queryTokenPattern = /[\p{L}\p{N}_-]+/gu;
14
+ const bloomBytes = 256;
15
+ const bloomBitSize = bloomBytes * 8;
16
+ const bloomSeeds = [0x9e3779b1, 0x85ebca6b, 0xc2b2ae35];
8
17
  const toPackDirectory = (vaultPath) => join(vaultPath, '.brainlink', packsDirectoryName);
9
18
  const toManifestPath = (vaultPath) => join(toPackDirectory(vaultPath), manifestFileName);
10
- const parseRowsFromPack = (content) => gunzipSync(content)
11
- .toString('utf8')
12
- .split('\n')
13
- .map((line) => line.trim())
14
- .filter((line) => line.length > 0)
15
- .map((line) => JSON.parse(line));
19
+ const parseRowsFromPack = async (vaultPath, content) => {
20
+ const raw = isPrivatePackPayload(content) ? await decodePrivatePack(vaultPath, content) : gunzipSync(content);
21
+ return raw
22
+ .toString('utf8')
23
+ .split('\n')
24
+ .map((line) => line.trim())
25
+ .filter((line) => line.length > 0)
26
+ .map((line) => JSON.parse(line))
27
+ .flatMap((row) => {
28
+ if (typeof row.documentId !== 'string' ||
29
+ typeof row.agentId !== 'string' ||
30
+ typeof row.title !== 'string' ||
31
+ typeof row.path !== 'string' ||
32
+ typeof row.chunkId !== 'string' ||
33
+ typeof row.content !== 'string') {
34
+ return [];
35
+ }
36
+ return [
37
+ {
38
+ documentId: row.documentId,
39
+ agentId: row.agentId,
40
+ title: row.title,
41
+ path: row.path,
42
+ chunkId: row.chunkId,
43
+ chunkOrdinal: typeof row.chunkOrdinal === 'number' ? row.chunkOrdinal : 0,
44
+ content: row.content,
45
+ tags: Array.isArray(row.tags) ? row.tags.filter((item) => typeof item === 'string') : []
46
+ }
47
+ ];
48
+ });
49
+ };
16
50
  const toRows = (documents) => documents.flatMap((document) => document.chunks.map((chunk) => ({
17
51
  documentId: document.document.id,
18
52
  agentId: document.document.agentId,
19
53
  title: document.document.title,
20
54
  path: document.document.path,
21
55
  chunkId: chunk.id,
56
+ chunkOrdinal: chunk.ordinal,
22
57
  content: chunk.content,
23
58
  tags: document.document.tags
24
59
  })));
25
60
  const writeManifest = async (vaultPath, manifest) => {
26
61
  await writeFile(toManifestPath(vaultPath), `${JSON.stringify(manifest, null, 2)}\n`, 'utf8');
27
62
  };
63
+ const readManifest = async (vaultPath) => {
64
+ try {
65
+ const parsed = JSON.parse(await readFile(toManifestPath(vaultPath), 'utf8'));
66
+ if (parsed.version === 2 && parsed.format === 'private-v2') {
67
+ return {
68
+ version: 2,
69
+ createdAt: typeof parsed.createdAt === 'string' ? parsed.createdAt : new Date().toISOString(),
70
+ packCount: typeof parsed.packCount === 'number' ? parsed.packCount : 0,
71
+ recordCount: typeof parsed.recordCount === 'number' ? parsed.recordCount : 0,
72
+ format: 'private-v2'
73
+ };
74
+ }
75
+ if (parsed.version === 3 && parsed.format === 'private-v2') {
76
+ const packIndex = Array.isArray(parsed.packIndex)
77
+ ? parsed.packIndex.flatMap((entry) => {
78
+ if (!entry || typeof entry !== 'object') {
79
+ return [];
80
+ }
81
+ const candidate = entry;
82
+ if (typeof candidate.fileName !== 'string' || typeof candidate.tokenBloomB64 !== 'string') {
83
+ return [];
84
+ }
85
+ return [
86
+ {
87
+ fileName: candidate.fileName,
88
+ recordCount: typeof candidate.recordCount === 'number' ? candidate.recordCount : 0,
89
+ agents: Array.isArray(candidate.agents) ? candidate.agents.filter((item) => typeof item === 'string') : [],
90
+ tokenBloomB64: candidate.tokenBloomB64
91
+ }
92
+ ];
93
+ })
94
+ : [];
95
+ return {
96
+ version: 3,
97
+ createdAt: typeof parsed.createdAt === 'string' ? parsed.createdAt : new Date().toISOString(),
98
+ packCount: typeof parsed.packCount === 'number' ? parsed.packCount : packIndex.length,
99
+ recordCount: typeof parsed.recordCount === 'number' ? parsed.recordCount : 0,
100
+ format: 'private-v2',
101
+ packIndex,
102
+ ...(parsed.packConfig && typeof parsed.packConfig === 'object'
103
+ ? {
104
+ packConfig: {
105
+ rowChunkSize: typeof parsed.packConfig.rowChunkSize === 'number'
106
+ ? parsed.packConfig.rowChunkSize
107
+ : defaultBuildOptions.rowChunkSize,
108
+ compressionLevel: typeof parsed.packConfig.compressionLevel === 'number'
109
+ ? parsed.packConfig.compressionLevel
110
+ : defaultBuildOptions.compressionLevel,
111
+ useDictionary: typeof parsed.packConfig.useDictionary === 'boolean'
112
+ ? parsed.packConfig.useDictionary
113
+ : defaultBuildOptions.useDictionary
114
+ }
115
+ }
116
+ : {}),
117
+ ...(parsed.compression &&
118
+ typeof parsed.compression === 'object' &&
119
+ typeof parsed.compression.inputBytes === 'number' &&
120
+ typeof parsed.compression.outputBytes === 'number' &&
121
+ typeof parsed.compression.ratio === 'number' &&
122
+ typeof parsed.compression.savedBytes === 'number'
123
+ ? {
124
+ compression: {
125
+ inputBytes: parsed.compression.inputBytes,
126
+ outputBytes: parsed.compression.outputBytes,
127
+ ratio: parsed.compression.ratio,
128
+ savedBytes: parsed.compression.savedBytes
129
+ }
130
+ }
131
+ : {})
132
+ };
133
+ }
134
+ return null;
135
+ }
136
+ catch {
137
+ return null;
138
+ }
139
+ };
140
+ export const ensureSearchPackManifest = async (vaultPath) => {
141
+ const manifest = await readManifest(vaultPath);
142
+ if (manifest) {
143
+ return {
144
+ repaired: false,
145
+ source: 'not-needed',
146
+ packCount: manifest.packCount
147
+ };
148
+ }
149
+ const files = await sortedPackFiles(vaultPath);
150
+ const packFiles = files.filter((file) => file.endsWith('.blpk'));
151
+ if (packFiles.length === 0) {
152
+ return {
153
+ repaired: false,
154
+ source: 'no-packs',
155
+ packCount: 0
156
+ };
157
+ }
158
+ await writeManifest(vaultPath, {
159
+ version: 2,
160
+ createdAt: new Date().toISOString(),
161
+ packCount: packFiles.length,
162
+ recordCount: 0,
163
+ format: 'private-v2'
164
+ });
165
+ return {
166
+ repaired: true,
167
+ source: 'existing-packs',
168
+ packCount: packFiles.length
169
+ };
170
+ };
28
171
  const chunkRows = (rows, size) => {
29
172
  const chunks = [];
30
173
  for (let index = 0; index < rows.length; index += size) {
@@ -53,6 +196,51 @@ const countOccurrences = (text, token) => {
53
196
  }
54
197
  return hits;
55
198
  };
199
+ const hashToken = (token, seed) => {
200
+ let hash = seed >>> 0;
201
+ for (let index = 0; index < token.length; index += 1) {
202
+ hash ^= token.charCodeAt(index);
203
+ hash = Math.imul(hash, 16777619) >>> 0;
204
+ }
205
+ return hash >>> 0;
206
+ };
207
+ const createBloom = () => new Uint8Array(bloomBytes);
208
+ const bloomAdd = (bloom, token) => {
209
+ bloomSeeds.forEach((seed) => {
210
+ const bit = hashToken(token, seed) % bloomBitSize;
211
+ bloom[Math.floor(bit / 8)] |= 1 << (bit % 8);
212
+ });
213
+ };
214
+ const bloomMayContain = (bloom, token) => bloomSeeds.every((seed) => {
215
+ const bit = hashToken(token, seed) % bloomBitSize;
216
+ return (bloom[Math.floor(bit / 8)] & (1 << (bit % 8))) !== 0;
217
+ });
218
+ const bloomFromRows = (rows) => {
219
+ const bloom = createBloom();
220
+ rows.forEach((row) => {
221
+ tokenize([row.title, row.path, row.tags.join(' '), row.content].join(' ')).forEach((token) => bloomAdd(bloom, token));
222
+ });
223
+ return bloom;
224
+ };
225
+ const bloomToBase64 = (bloom) => Buffer.from(bloom).toString('base64url');
226
+ const bloomFromBase64 = (value) => {
227
+ try {
228
+ const decoded = Buffer.from(value, 'base64url');
229
+ if (decoded.byteLength === bloomBytes) {
230
+ return {
231
+ bloom: new Uint8Array(decoded),
232
+ valid: true
233
+ };
234
+ }
235
+ }
236
+ catch {
237
+ // fallback below
238
+ }
239
+ return {
240
+ bloom: createBloom(),
241
+ valid: false
242
+ };
243
+ };
56
244
  const computeTextScore = (row, tokens) => {
57
245
  if (tokens.length === 0) {
58
246
  return 0;
@@ -75,6 +263,7 @@ const toSearchResult = (row, score) => ({
75
263
  title: row.title,
76
264
  path: row.path,
77
265
  chunkId: row.chunkId,
266
+ chunkOrdinal: row.chunkOrdinal,
78
267
  content: row.content,
79
268
  score,
80
269
  textScore: score,
@@ -86,7 +275,7 @@ const sortedPackFiles = async (vaultPath) => {
86
275
  try {
87
276
  const files = await readdir(toPackDirectory(vaultPath));
88
277
  return files
89
- .filter((file) => file.endsWith('.jsonl.gz'))
278
+ .filter((file) => file.endsWith('.blpk') || file.endsWith('.jsonl.gz'))
90
279
  .sort((left, right) => left.localeCompare(right));
91
280
  }
92
281
  catch (error) {
@@ -96,46 +285,158 @@ const sortedPackFiles = async (vaultPath) => {
96
285
  throw error;
97
286
  }
98
287
  };
99
- export const buildSearchPacks = async (vaultPath, documents) => {
288
+ const writeRowsAsPrivatePacks = async (vaultPath, rows, clearExisting, options) => {
289
+ const startedAt = process.hrtime.bigint();
100
290
  const directory = toPackDirectory(vaultPath);
101
- const rows = toRows(documents);
102
291
  await mkdir(directory, { recursive: true });
103
- const current = await readdir(directory);
104
- await Promise.all(current
105
- .filter((name) => name.endsWith('.jsonl.gz') || name === manifestFileName)
106
- .map((name) => rm(join(directory, name), { force: true })));
107
- const chunks = chunkRows(rows, rowChunkSize);
108
- await Promise.all(chunks.map(async (chunk, index) => {
109
- const fileName = `pack-${String(index + 1).padStart(4, '0')}.jsonl.gz`;
292
+ if (clearExisting) {
293
+ const current = await readdir(directory);
294
+ await Promise.all(current
295
+ .filter((name) => name.endsWith('.blpk') || name.endsWith('.jsonl.gz') || name === manifestFileName)
296
+ .map((name) => rm(join(directory, name), { force: true })));
297
+ }
298
+ const chunks = chunkRows(rows, options.rowChunkSize);
299
+ const packIndex = [];
300
+ let inputBytes = 0;
301
+ let outputBytes = 0;
302
+ for (let index = 0; index < chunks.length; index += 1) {
303
+ const chunk = chunks[index];
304
+ const fileName = `pack-${String(index + 1).padStart(4, '0')}.blpk`;
110
305
  const serialized = `${chunk.map((row) => JSON.stringify(row)).join('\n')}\n`;
111
- const compressed = gzipSync(Buffer.from(serialized, 'utf8'), { level: 6 });
306
+ const compressed = await encodePrivatePack(vaultPath, Buffer.from(serialized, 'utf8'), {
307
+ compressionLevel: options.compressionLevel,
308
+ useDictionary: options.useDictionary
309
+ });
310
+ const tokenBloomB64 = bloomToBase64(bloomFromRows(chunk));
112
311
  await writeFile(join(directory, fileName), compressed);
113
- }));
312
+ inputBytes += Buffer.byteLength(serialized, 'utf8');
313
+ outputBytes += compressed.byteLength;
314
+ packIndex.push({
315
+ fileName,
316
+ recordCount: chunk.length,
317
+ agents: Array.from(new Set(chunk.map((row) => row.agentId))).sort((left, right) => left.localeCompare(right)),
318
+ tokenBloomB64
319
+ });
320
+ }
114
321
  await writeManifest(vaultPath, {
115
- version: 1,
322
+ version: 3,
116
323
  createdAt: new Date().toISOString(),
117
324
  packCount: chunks.length,
118
- recordCount: rows.length
325
+ recordCount: rows.length,
326
+ format: 'private-v2',
327
+ packIndex,
328
+ packConfig: {
329
+ rowChunkSize: options.rowChunkSize,
330
+ compressionLevel: options.compressionLevel,
331
+ useDictionary: options.useDictionary
332
+ },
333
+ compression: {
334
+ inputBytes,
335
+ outputBytes,
336
+ ratio: outputBytes / Math.max(inputBytes, 1),
337
+ savedBytes: Math.max(inputBytes - outputBytes, 0)
338
+ }
119
339
  });
340
+ const durationMs = Number(process.hrtime.bigint() - startedAt) / 1_000_000;
341
+ const safeInput = Math.max(inputBytes, 1);
342
+ const savedBytes = Math.max(inputBytes - outputBytes, 0);
120
343
  return {
121
344
  packCount: chunks.length,
122
- recordCount: rows.length
345
+ recordCount: rows.length,
346
+ compression: {
347
+ inputBytes,
348
+ outputBytes,
349
+ ratio: outputBytes / safeInput,
350
+ savedBytes
351
+ },
352
+ durationMs
123
353
  };
124
354
  };
355
+ const selectCandidatePackFiles = async (vaultPath, tokens, agentId) => {
356
+ const allFiles = await sortedPackFiles(vaultPath);
357
+ if (allFiles.length === 0) {
358
+ return [];
359
+ }
360
+ const manifest = await readManifest(vaultPath);
361
+ if (!manifest || manifest.version !== 3 || !Array.isArray(manifest.packIndex)) {
362
+ return allFiles;
363
+ }
364
+ const normalizedAgent = agentId?.trim();
365
+ const byAgent = manifest.packIndex.filter((entry) => normalizedAgent ? entry.agents.includes(normalizedAgent) : true);
366
+ if (tokens.length === 0) {
367
+ return byAgent.map((entry) => entry.fileName);
368
+ }
369
+ let hasInvalidBloomIndex = false;
370
+ const byToken = byAgent.filter((entry) => {
371
+ const decoded = bloomFromBase64(entry.tokenBloomB64);
372
+ if (!decoded.valid) {
373
+ hasInvalidBloomIndex = true;
374
+ return true;
375
+ }
376
+ return tokens.some((token) => bloomMayContain(decoded.bloom, token));
377
+ });
378
+ // Lossless guarantee: if compressed metadata is partially invalid, do not prune packs.
379
+ if (hasInvalidBloomIndex) {
380
+ return byAgent.map((entry) => entry.fileName);
381
+ }
382
+ if (byToken.length > 0) {
383
+ return byToken.map((entry) => entry.fileName);
384
+ }
385
+ return byAgent.length > 0 ? byAgent.map((entry) => entry.fileName) : allFiles;
386
+ };
387
+ export const buildSearchPacks = async (vaultPath, documents, options) => {
388
+ const resolvedOptions = {
389
+ rowChunkSize: options?.rowChunkSize ?? defaultBuildOptions.rowChunkSize,
390
+ compressionLevel: options?.compressionLevel ?? defaultBuildOptions.compressionLevel,
391
+ useDictionary: options?.useDictionary ?? defaultBuildOptions.useDictionary
392
+ };
393
+ return writeRowsAsPrivatePacks(vaultPath, toRows(documents), true, resolvedOptions);
394
+ };
395
+ export const ensurePrivatePacksFromLegacyIndex = async (vaultPath) => {
396
+ const files = await sortedPackFiles(vaultPath);
397
+ if (files.some((file) => file.endsWith('.blpk'))) {
398
+ return { imported: false };
399
+ }
400
+ const legacyPackFiles = files.filter((file) => file.endsWith('.jsonl.gz'));
401
+ if (legacyPackFiles.length > 0) {
402
+ const rows = [];
403
+ for (const file of legacyPackFiles) {
404
+ const parsed = await parseRowsFromPack(vaultPath, await readFile(join(toPackDirectory(vaultPath), file)));
405
+ rows.push(...parsed);
406
+ }
407
+ const report = await writeRowsAsPrivatePacks(vaultPath, rows, true, defaultBuildOptions);
408
+ return {
409
+ imported: true,
410
+ source: 'legacy-packs',
411
+ ...report
412
+ };
413
+ }
414
+ return { imported: false };
415
+ };
416
+ export const toSearchPackBuildOptions = (config) => ({
417
+ rowChunkSize: config.searchPack.rowChunkSize,
418
+ compressionLevel: config.searchPack.compressionLevel,
419
+ useDictionary: config.searchPack.useDictionary
420
+ });
125
421
  export const searchInPacks = async (vaultPath, query, limit, agentId) => {
126
422
  const normalizedAgent = agentId?.trim();
127
423
  const tokens = tokenize(query);
128
424
  if (limit <= 0 || tokens.length === 0) {
129
425
  return [];
130
426
  }
131
- const files = await sortedPackFiles(vaultPath);
427
+ const files = await selectCandidatePackFiles(vaultPath, tokens, normalizedAgent);
132
428
  if (files.length === 0) {
133
429
  return [];
134
430
  }
135
431
  const scored = [];
136
432
  for (const file of files) {
137
- const rows = parseRowsFromPack(await readFile(join(toPackDirectory(vaultPath), file)));
138
- rows.forEach((row) => {
433
+ const rows = await parseRowsFromPack(vaultPath, await readFile(join(toPackDirectory(vaultPath), file)));
434
+ const traversal = middleOutIndices(rows.length, Math.floor(rows.length / 2));
435
+ traversal.forEach((rowIndex) => {
436
+ const row = rows[rowIndex];
437
+ if (!row) {
438
+ return;
439
+ }
139
440
  if (normalizedAgent && row.agentId !== normalizedAgent) {
140
441
  return;
141
442
  }
@@ -2,7 +2,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
2
  import { readFileSync } from 'node:fs';
3
3
  import { dirname, join } from 'node:path';
4
4
  import { fileURLToPath } from 'node:url';
5
- import { addNoteInputSchema, addFileInputSchema, addFileTool, addNoteTool, brokenLinksInputSchema, brokenLinksTool, bootstrapInputSchema, bootstrapTool, contextInputSchema, contextTool, graphInputSchema, graphTool, indexInputSchema, indexTool, orphansInputSchema, orphansTool, policyInputSchema, policyTool, recommendationsInputSchema, recommendationsTool, searchInputSchema, searchTool, statsInputSchema, statsTool, syncInputSchema, syncTool, validateInputSchema, validateTool } from './tools.js';
5
+ import { addNoteInputSchema, addFileInputSchema, addFileTool, addNoteTool, dedupeInputSchema, dedupeResolveInputSchema, dedupeResolveTool, dedupeTool, brokenLinksInputSchema, brokenLinksTool, bootstrapInputSchema, bootstrapTool, contextInputSchema, contextTool, graphInputSchema, graphTool, indexInputSchema, indexTool, orphansInputSchema, orphansTool, policyInputSchema, policyTool, recommendationsInputSchema, recommendationsTool, searchInputSchema, searchTool, statsInputSchema, statsTool, syncInputSchema, syncTool, validateInputSchema, validateTool } from './tools.js';
6
6
  const readPackageVersion = () => {
7
7
  const packagePath = join(dirname(fileURLToPath(import.meta.url)), '../../package.json');
8
8
  const metadata = JSON.parse(readFileSync(packagePath, 'utf8'));
@@ -40,6 +40,16 @@ export const createBrainlinkMcpServer = () => {
40
40
  description: 'Search indexed Brainlink notes with FTS, semantic or hybrid retrieval.',
41
41
  inputSchema: searchInputSchema
42
42
  }, searchTool);
43
+ server.registerTool('brainlink_dedupe', {
44
+ title: 'Detect Duplicate Notes',
45
+ description: 'Detect possible duplicate notes using exact content hash and semantic similarity scoring.',
46
+ inputSchema: dedupeInputSchema
47
+ }, dedupeTool);
48
+ server.registerTool('brainlink_resolve_duplicate', {
49
+ title: 'Resolve Duplicate Notes',
50
+ description: 'Resolve a duplicate pair with merge, link or ignore. Non-merge actions still create low-priority related edges.',
51
+ inputSchema: dedupeResolveInputSchema
52
+ }, dedupeResolveTool);
43
53
  server.registerTool('brainlink_add_note', {
44
54
  title: 'Add Brainlink Note',
45
55
  description: 'Write durable Markdown memory, then reindex the vault. Include explicit [[wiki links]] for connected graph memory. Add priority markers near links, such as priority: high, #important or #critical, when a relationship should be weighted higher.',