@andespindola/brainlink 0.1.0-beta.9 → 0.1.0-beta.90
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +8 -5
- package/CHANGELOG.md +26 -2
- package/CONTRIBUTING.md +2 -2
- package/COPYRIGHT.md +5 -0
- package/README.md +146 -17
- package/SECURITY.md +1 -1
- package/dist/application/analyze-vault.js +7 -7
- package/dist/application/build-context.js +56 -1
- package/dist/application/dedupe-notes.js +226 -0
- package/dist/application/frontend/client-css.js +154 -102
- package/dist/application/frontend/client-html.js +49 -40
- package/dist/application/frontend/client-js.js +3130 -166
- package/dist/application/frontend/client-worker-js.js +66 -0
- package/dist/application/get-graph-layout.js +18 -6
- package/dist/application/get-graph-node.js +12 -0
- package/dist/application/get-graph-summary.js +12 -0
- package/dist/application/get-graph.js +3 -3
- package/dist/application/import-legacy-sqlite.js +296 -0
- package/dist/application/index-vault.js +252 -19
- package/dist/application/list-agents.js +3 -3
- package/dist/application/list-links.js +5 -5
- package/dist/application/offline-pack-backup.js +44 -0
- package/dist/application/search-graph-node-ids.js +12 -0
- package/dist/application/search-knowledge.js +25 -10
- package/dist/application/server/routes.js +102 -1
- package/dist/application/start-server.js +75 -4
- package/dist/application/watch-vault.js +23 -2
- package/dist/benchmarks/large-vault.js +1 -1
- package/dist/cli/commands/agent-commands.js +20 -3
- package/dist/cli/commands/write-commands.js +818 -8
- package/dist/domain/context.js +53 -11
- package/dist/domain/embeddings.js +2 -1
- package/dist/domain/graph-layout.js +67 -16
- package/dist/domain/middle-out.js +18 -0
- package/dist/infrastructure/config.js +38 -0
- package/dist/infrastructure/file-index.js +358 -0
- package/dist/infrastructure/file-system-vault.js +15 -0
- package/dist/infrastructure/index-state.js +56 -0
- package/dist/infrastructure/private-pack-codec.js +134 -0
- package/dist/infrastructure/search-packs.js +452 -0
- package/dist/infrastructure/session-state.js +57 -2
- package/dist/mcp/server.js +11 -1
- package/dist/mcp/tools.js +215 -3
- package/docs/AGENT_USAGE.md +103 -16
- package/docs/ARCHITECTURE.md +25 -26
- package/docs/QUICKSTART.md +9 -1
- package/package.json +6 -4
- package/dist/infrastructure/sqlite/document-writer.js +0 -51
- package/dist/infrastructure/sqlite/graph-reader.js +0 -120
- package/dist/infrastructure/sqlite/schema.js +0 -111
- package/dist/infrastructure/sqlite/search-reader.js +0 -156
- package/dist/infrastructure/sqlite/types.js +0 -1
- package/dist/infrastructure/sqlite-index.js +0 -25
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { readFile, writeFile } from 'node:fs/promises';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
const indexStateFileName = 'index-state.json';
|
|
4
|
+
const toIndexStatePath = (vaultPath) => join(vaultPath, '.brainlink', indexStateFileName);
|
|
5
|
+
export const readIndexState = async (vaultPath) => {
|
|
6
|
+
try {
|
|
7
|
+
const parsed = JSON.parse(await readFile(toIndexStatePath(vaultPath), 'utf8'));
|
|
8
|
+
if (parsed.version !== 1 || !Array.isArray(parsed.files)) {
|
|
9
|
+
return null;
|
|
10
|
+
}
|
|
11
|
+
const files = parsed.files.flatMap((entry) => {
|
|
12
|
+
if (!entry || typeof entry !== 'object') {
|
|
13
|
+
return [];
|
|
14
|
+
}
|
|
15
|
+
const row = entry;
|
|
16
|
+
if (typeof row.path !== 'string' || typeof row.mtimeMs !== 'number' || typeof row.size !== 'number') {
|
|
17
|
+
return [];
|
|
18
|
+
}
|
|
19
|
+
return [
|
|
20
|
+
{
|
|
21
|
+
path: row.path,
|
|
22
|
+
mtimeMs: row.mtimeMs,
|
|
23
|
+
size: row.size
|
|
24
|
+
}
|
|
25
|
+
];
|
|
26
|
+
});
|
|
27
|
+
return {
|
|
28
|
+
version: 1,
|
|
29
|
+
updatedAt: typeof parsed.updatedAt === 'string' ? parsed.updatedAt : new Date().toISOString(),
|
|
30
|
+
chunkSize: typeof parsed.chunkSize === 'number' ? parsed.chunkSize : 1200,
|
|
31
|
+
embeddingProvider: typeof parsed.embeddingProvider === 'string' ? parsed.embeddingProvider : 'none',
|
|
32
|
+
searchPackRowChunkSize: typeof parsed.searchPackRowChunkSize === 'number' ? parsed.searchPackRowChunkSize : 5_000,
|
|
33
|
+
searchPackCompressionLevel: typeof parsed.searchPackCompressionLevel === 'number' ? parsed.searchPackCompressionLevel : 5,
|
|
34
|
+
searchPackUseDictionary: typeof parsed.searchPackUseDictionary === 'boolean' ? parsed.searchPackUseDictionary : true,
|
|
35
|
+
files,
|
|
36
|
+
pendingPackChanges: typeof parsed.pendingPackChanges === 'number' && parsed.pendingPackChanges >= 0 ? parsed.pendingPackChanges : 0
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
catch {
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
export const writeIndexState = async (vaultPath, state) => {
|
|
44
|
+
const payload = {
|
|
45
|
+
version: 1,
|
|
46
|
+
updatedAt: new Date().toISOString(),
|
|
47
|
+
chunkSize: state.chunkSize,
|
|
48
|
+
embeddingProvider: state.embeddingProvider,
|
|
49
|
+
searchPackRowChunkSize: state.searchPackRowChunkSize,
|
|
50
|
+
searchPackCompressionLevel: state.searchPackCompressionLevel,
|
|
51
|
+
searchPackUseDictionary: state.searchPackUseDictionary,
|
|
52
|
+
files: [...state.files].sort((left, right) => left.path.localeCompare(right.path)),
|
|
53
|
+
pendingPackChanges: Math.max(0, Math.floor(state.pendingPackChanges))
|
|
54
|
+
};
|
|
55
|
+
await writeFile(toIndexStatePath(vaultPath), `${JSON.stringify(payload)}\n`, 'utf8');
|
|
56
|
+
};
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import { createCipheriv, createDecipheriv, createHash, randomBytes } from 'node:crypto';
|
|
2
|
+
import { brotliCompressSync, brotliDecompressSync, constants as zlibConstants } from 'node:zlib';
|
|
3
|
+
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
|
4
|
+
import { dirname, join } from 'node:path';
|
|
5
|
+
import { getBrainlinkHomePath } from './paths.js';
|
|
6
|
+
const magic = Buffer.from('BLPK2', 'ascii');
|
|
7
|
+
const legacyVersion = 1;
|
|
8
|
+
const currentVersion = 2;
|
|
9
|
+
const nonceLength = 12;
|
|
10
|
+
const authTagLength = 16;
|
|
11
|
+
const algorithm = 'aes-256-gcm';
|
|
12
|
+
const compressionLevelMask = 0x0f;
|
|
13
|
+
const compressionDictionaryMask = 0x10;
|
|
14
|
+
const defaultCompressionLevel = 5;
|
|
15
|
+
const builtinDictionary = Buffer.from([
|
|
16
|
+
'"documentId","agentId","title","path","chunkId","chunkOrdinal","content","tags"',
|
|
17
|
+
'"searchMode","textScore","semanticScore","weight","priority","shared"',
|
|
18
|
+
'agents/shared memory-hub architecture context index search graph markdown tags links',
|
|
19
|
+
'#memory #architecture #context #graph #search #index [[Memory Hub]] [[Architecture]]',
|
|
20
|
+
'The quick brown fox jumps over the lazy dog. Brainlink context package metadata.',
|
|
21
|
+
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-:/.#[]{}(), '
|
|
22
|
+
].join('\n'), 'utf8');
|
|
23
|
+
const keyFilePath = (vaultPath) => {
|
|
24
|
+
const vaultHash = createHash('sha256').update(vaultPath).digest('hex').slice(0, 24);
|
|
25
|
+
return join(getBrainlinkHomePath(), 'keys', `search-pack-${vaultHash}.key`);
|
|
26
|
+
};
|
|
27
|
+
const deriveKeyFromSecret = (secret) => createHash('sha256').update(secret, 'utf8').digest();
|
|
28
|
+
const readOrCreateKey = async (vaultPath) => {
|
|
29
|
+
const envSecret = process.env.BRAINLINK_SEARCH_PACK_KEY?.trim();
|
|
30
|
+
if (envSecret && envSecret.length > 0) {
|
|
31
|
+
return deriveKeyFromSecret(envSecret);
|
|
32
|
+
}
|
|
33
|
+
const path = keyFilePath(vaultPath);
|
|
34
|
+
try {
|
|
35
|
+
const existing = (await readFile(path, 'utf8')).trim();
|
|
36
|
+
if (existing.length > 0) {
|
|
37
|
+
return deriveKeyFromSecret(existing);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
catch (error) {
|
|
41
|
+
if (!(error instanceof Error) || !('code' in error) || error.code !== 'ENOENT') {
|
|
42
|
+
throw error;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
const secret = randomBytes(48).toString('base64url');
|
|
46
|
+
await mkdir(dirname(path), { recursive: true, mode: 0o700 });
|
|
47
|
+
await writeFile(path, `${secret}\n`, { encoding: 'utf8', mode: 0o600 });
|
|
48
|
+
return deriveKeyFromSecret(secret);
|
|
49
|
+
};
|
|
50
|
+
const parseHeader = (payload) => {
|
|
51
|
+
if (payload.length < magic.length + 1 + nonceLength + authTagLength) {
|
|
52
|
+
throw new Error('Invalid private pack payload: too short.');
|
|
53
|
+
}
|
|
54
|
+
const payloadMagic = payload.subarray(0, magic.length);
|
|
55
|
+
const payloadVersion = payload[magic.length] ?? 0;
|
|
56
|
+
if (!payloadMagic.equals(magic) || (payloadVersion !== legacyVersion && payloadVersion !== currentVersion)) {
|
|
57
|
+
throw new Error('Invalid private pack payload: unsupported format.');
|
|
58
|
+
}
|
|
59
|
+
const hasCompressionSettings = payloadVersion >= 2;
|
|
60
|
+
const settingsByte = hasCompressionSettings ? payload[magic.length + 1] ?? 0 : null;
|
|
61
|
+
const nonceStart = magic.length + 1 + (hasCompressionSettings ? 1 : 0);
|
|
62
|
+
const authTagStart = nonceStart + nonceLength;
|
|
63
|
+
const dataStart = authTagStart + authTagLength;
|
|
64
|
+
return {
|
|
65
|
+
compression: settingsByte != null
|
|
66
|
+
? {
|
|
67
|
+
compressionLevel: settingsByte & compressionLevelMask,
|
|
68
|
+
useDictionary: (settingsByte & compressionDictionaryMask) !== 0
|
|
69
|
+
}
|
|
70
|
+
: {
|
|
71
|
+
compressionLevel: defaultCompressionLevel,
|
|
72
|
+
useDictionary: false
|
|
73
|
+
},
|
|
74
|
+
nonce: payload.subarray(nonceStart, authTagStart),
|
|
75
|
+
authTag: payload.subarray(authTagStart, dataStart),
|
|
76
|
+
ciphertext: payload.subarray(dataStart)
|
|
77
|
+
};
|
|
78
|
+
};
|
|
79
|
+
const toCompressionLevel = (value) => {
|
|
80
|
+
if (typeof value !== 'number' || !Number.isFinite(value)) {
|
|
81
|
+
return defaultCompressionLevel;
|
|
82
|
+
}
|
|
83
|
+
const normalized = Math.round(value);
|
|
84
|
+
if (normalized < 0) {
|
|
85
|
+
return 0;
|
|
86
|
+
}
|
|
87
|
+
if (normalized > 11) {
|
|
88
|
+
return 11;
|
|
89
|
+
}
|
|
90
|
+
return normalized;
|
|
91
|
+
};
|
|
92
|
+
const encodeCompressionSettings = (settings) => (settings.compressionLevel & compressionLevelMask) | (settings.useDictionary ? compressionDictionaryMask : 0);
|
|
93
|
+
const brotliEncode = (content, settings) => {
|
|
94
|
+
const options = {
|
|
95
|
+
params: {
|
|
96
|
+
[zlibConstants.BROTLI_PARAM_MODE]: zlibConstants.BROTLI_MODE_TEXT,
|
|
97
|
+
[zlibConstants.BROTLI_PARAM_QUALITY]: settings.compressionLevel
|
|
98
|
+
}
|
|
99
|
+
};
|
|
100
|
+
if (settings.useDictionary) {
|
|
101
|
+
options.dictionary = builtinDictionary;
|
|
102
|
+
}
|
|
103
|
+
return brotliCompressSync(content, options);
|
|
104
|
+
};
|
|
105
|
+
const brotliDecode = (content, settings) => {
|
|
106
|
+
const options = {};
|
|
107
|
+
if (settings.useDictionary) {
|
|
108
|
+
options.dictionary = builtinDictionary;
|
|
109
|
+
}
|
|
110
|
+
return brotliDecompressSync(content, options);
|
|
111
|
+
};
|
|
112
|
+
export const encodePrivatePack = async (vaultPath, content, settings) => {
|
|
113
|
+
const key = await readOrCreateKey(vaultPath);
|
|
114
|
+
const nonce = randomBytes(nonceLength);
|
|
115
|
+
const normalizedSettings = {
|
|
116
|
+
compressionLevel: toCompressionLevel(settings?.compressionLevel),
|
|
117
|
+
useDictionary: settings?.useDictionary ?? true
|
|
118
|
+
};
|
|
119
|
+
const compressed = brotliEncode(content, normalizedSettings);
|
|
120
|
+
const cipher = createCipheriv(algorithm, key, nonce);
|
|
121
|
+
const ciphertext = Buffer.concat([cipher.update(compressed), cipher.final()]);
|
|
122
|
+
const authTag = cipher.getAuthTag();
|
|
123
|
+
const settingsByte = Buffer.from([encodeCompressionSettings(normalizedSettings)]);
|
|
124
|
+
return Buffer.concat([magic, Buffer.from([currentVersion]), settingsByte, nonce, authTag, ciphertext]);
|
|
125
|
+
};
|
|
126
|
+
export const decodePrivatePack = async (vaultPath, payload) => {
|
|
127
|
+
const key = await readOrCreateKey(vaultPath);
|
|
128
|
+
const { nonce, authTag, ciphertext, compression } = parseHeader(payload);
|
|
129
|
+
const decipher = createDecipheriv(algorithm, key, nonce);
|
|
130
|
+
decipher.setAuthTag(authTag);
|
|
131
|
+
const compressed = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
|
|
132
|
+
return brotliDecode(compressed, compression);
|
|
133
|
+
};
|
|
134
|
+
export const isPrivatePackPayload = (payload) => payload.length >= magic.length + 1 && payload.subarray(0, magic.length).equals(magic);
|
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
import { gunzipSync } from 'node:zlib';
|
|
2
|
+
import { mkdir, readdir, readFile, rm, writeFile } from 'node:fs/promises';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { middleOutIndices } from '../domain/middle-out.js';
|
|
5
|
+
import { decodePrivatePack, encodePrivatePack, isPrivatePackPayload } from './private-pack-codec.js';
|
|
6
|
+
const packsDirectoryName = 'search-packs';
|
|
7
|
+
const manifestFileName = 'manifest.json';
|
|
8
|
+
const defaultBuildOptions = {
|
|
9
|
+
rowChunkSize: 5_000,
|
|
10
|
+
compressionLevel: 5,
|
|
11
|
+
useDictionary: true
|
|
12
|
+
};
|
|
13
|
+
const queryTokenPattern = /[\p{L}\p{N}_-]+/gu;
|
|
14
|
+
const bloomBytes = 256;
|
|
15
|
+
const bloomBitSize = bloomBytes * 8;
|
|
16
|
+
const bloomSeeds = [0x9e3779b1, 0x85ebca6b, 0xc2b2ae35];
|
|
17
|
+
const toPackDirectory = (vaultPath) => join(vaultPath, '.brainlink', packsDirectoryName);
|
|
18
|
+
const toManifestPath = (vaultPath) => join(toPackDirectory(vaultPath), manifestFileName);
|
|
19
|
+
const parseRowsFromPack = async (vaultPath, content) => {
|
|
20
|
+
const raw = isPrivatePackPayload(content) ? await decodePrivatePack(vaultPath, content) : gunzipSync(content);
|
|
21
|
+
return raw
|
|
22
|
+
.toString('utf8')
|
|
23
|
+
.split('\n')
|
|
24
|
+
.map((line) => line.trim())
|
|
25
|
+
.filter((line) => line.length > 0)
|
|
26
|
+
.map((line) => JSON.parse(line))
|
|
27
|
+
.flatMap((row) => {
|
|
28
|
+
if (typeof row.documentId !== 'string' ||
|
|
29
|
+
typeof row.agentId !== 'string' ||
|
|
30
|
+
typeof row.title !== 'string' ||
|
|
31
|
+
typeof row.path !== 'string' ||
|
|
32
|
+
typeof row.chunkId !== 'string' ||
|
|
33
|
+
typeof row.content !== 'string') {
|
|
34
|
+
return [];
|
|
35
|
+
}
|
|
36
|
+
return [
|
|
37
|
+
{
|
|
38
|
+
documentId: row.documentId,
|
|
39
|
+
agentId: row.agentId,
|
|
40
|
+
title: row.title,
|
|
41
|
+
path: row.path,
|
|
42
|
+
chunkId: row.chunkId,
|
|
43
|
+
chunkOrdinal: typeof row.chunkOrdinal === 'number' ? row.chunkOrdinal : 0,
|
|
44
|
+
content: row.content,
|
|
45
|
+
tags: Array.isArray(row.tags) ? row.tags.filter((item) => typeof item === 'string') : []
|
|
46
|
+
}
|
|
47
|
+
];
|
|
48
|
+
});
|
|
49
|
+
};
|
|
50
|
+
const toRows = (documents) => documents.flatMap((document) => document.chunks.map((chunk) => ({
|
|
51
|
+
documentId: document.document.id,
|
|
52
|
+
agentId: document.document.agentId,
|
|
53
|
+
title: document.document.title,
|
|
54
|
+
path: document.document.path,
|
|
55
|
+
chunkId: chunk.id,
|
|
56
|
+
chunkOrdinal: chunk.ordinal,
|
|
57
|
+
content: chunk.content,
|
|
58
|
+
tags: document.document.tags
|
|
59
|
+
})));
|
|
60
|
+
const writeManifest = async (vaultPath, manifest) => {
|
|
61
|
+
await writeFile(toManifestPath(vaultPath), `${JSON.stringify(manifest, null, 2)}\n`, 'utf8');
|
|
62
|
+
};
|
|
63
|
+
const readManifest = async (vaultPath) => {
|
|
64
|
+
try {
|
|
65
|
+
const parsed = JSON.parse(await readFile(toManifestPath(vaultPath), 'utf8'));
|
|
66
|
+
if (parsed.version === 2 && parsed.format === 'private-v2') {
|
|
67
|
+
return {
|
|
68
|
+
version: 2,
|
|
69
|
+
createdAt: typeof parsed.createdAt === 'string' ? parsed.createdAt : new Date().toISOString(),
|
|
70
|
+
packCount: typeof parsed.packCount === 'number' ? parsed.packCount : 0,
|
|
71
|
+
recordCount: typeof parsed.recordCount === 'number' ? parsed.recordCount : 0,
|
|
72
|
+
format: 'private-v2'
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
if (parsed.version === 3 && parsed.format === 'private-v2') {
|
|
76
|
+
const packIndex = Array.isArray(parsed.packIndex)
|
|
77
|
+
? parsed.packIndex.flatMap((entry) => {
|
|
78
|
+
if (!entry || typeof entry !== 'object') {
|
|
79
|
+
return [];
|
|
80
|
+
}
|
|
81
|
+
const candidate = entry;
|
|
82
|
+
if (typeof candidate.fileName !== 'string' || typeof candidate.tokenBloomB64 !== 'string') {
|
|
83
|
+
return [];
|
|
84
|
+
}
|
|
85
|
+
return [
|
|
86
|
+
{
|
|
87
|
+
fileName: candidate.fileName,
|
|
88
|
+
recordCount: typeof candidate.recordCount === 'number' ? candidate.recordCount : 0,
|
|
89
|
+
agents: Array.isArray(candidate.agents) ? candidate.agents.filter((item) => typeof item === 'string') : [],
|
|
90
|
+
tokenBloomB64: candidate.tokenBloomB64
|
|
91
|
+
}
|
|
92
|
+
];
|
|
93
|
+
})
|
|
94
|
+
: [];
|
|
95
|
+
return {
|
|
96
|
+
version: 3,
|
|
97
|
+
createdAt: typeof parsed.createdAt === 'string' ? parsed.createdAt : new Date().toISOString(),
|
|
98
|
+
packCount: typeof parsed.packCount === 'number' ? parsed.packCount : packIndex.length,
|
|
99
|
+
recordCount: typeof parsed.recordCount === 'number' ? parsed.recordCount : 0,
|
|
100
|
+
format: 'private-v2',
|
|
101
|
+
packIndex,
|
|
102
|
+
...(parsed.packConfig && typeof parsed.packConfig === 'object'
|
|
103
|
+
? {
|
|
104
|
+
packConfig: {
|
|
105
|
+
rowChunkSize: typeof parsed.packConfig.rowChunkSize === 'number'
|
|
106
|
+
? parsed.packConfig.rowChunkSize
|
|
107
|
+
: defaultBuildOptions.rowChunkSize,
|
|
108
|
+
compressionLevel: typeof parsed.packConfig.compressionLevel === 'number'
|
|
109
|
+
? parsed.packConfig.compressionLevel
|
|
110
|
+
: defaultBuildOptions.compressionLevel,
|
|
111
|
+
useDictionary: typeof parsed.packConfig.useDictionary === 'boolean'
|
|
112
|
+
? parsed.packConfig.useDictionary
|
|
113
|
+
: defaultBuildOptions.useDictionary
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
: {}),
|
|
117
|
+
...(parsed.compression &&
|
|
118
|
+
typeof parsed.compression === 'object' &&
|
|
119
|
+
typeof parsed.compression.inputBytes === 'number' &&
|
|
120
|
+
typeof parsed.compression.outputBytes === 'number' &&
|
|
121
|
+
typeof parsed.compression.ratio === 'number' &&
|
|
122
|
+
typeof parsed.compression.savedBytes === 'number'
|
|
123
|
+
? {
|
|
124
|
+
compression: {
|
|
125
|
+
inputBytes: parsed.compression.inputBytes,
|
|
126
|
+
outputBytes: parsed.compression.outputBytes,
|
|
127
|
+
ratio: parsed.compression.ratio,
|
|
128
|
+
savedBytes: parsed.compression.savedBytes
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
: {})
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
return null;
|
|
135
|
+
}
|
|
136
|
+
catch {
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
export const ensureSearchPackManifest = async (vaultPath) => {
|
|
141
|
+
const manifest = await readManifest(vaultPath);
|
|
142
|
+
if (manifest) {
|
|
143
|
+
return {
|
|
144
|
+
repaired: false,
|
|
145
|
+
source: 'not-needed',
|
|
146
|
+
packCount: manifest.packCount
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
const files = await sortedPackFiles(vaultPath);
|
|
150
|
+
const packFiles = files.filter((file) => file.endsWith('.blpk'));
|
|
151
|
+
if (packFiles.length === 0) {
|
|
152
|
+
return {
|
|
153
|
+
repaired: false,
|
|
154
|
+
source: 'no-packs',
|
|
155
|
+
packCount: 0
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
await writeManifest(vaultPath, {
|
|
159
|
+
version: 2,
|
|
160
|
+
createdAt: new Date().toISOString(),
|
|
161
|
+
packCount: packFiles.length,
|
|
162
|
+
recordCount: 0,
|
|
163
|
+
format: 'private-v2'
|
|
164
|
+
});
|
|
165
|
+
return {
|
|
166
|
+
repaired: true,
|
|
167
|
+
source: 'existing-packs',
|
|
168
|
+
packCount: packFiles.length
|
|
169
|
+
};
|
|
170
|
+
};
|
|
171
|
+
const chunkRows = (rows, size) => {
|
|
172
|
+
const chunks = [];
|
|
173
|
+
for (let index = 0; index < rows.length; index += size) {
|
|
174
|
+
chunks.push(rows.slice(index, index + size));
|
|
175
|
+
}
|
|
176
|
+
return chunks;
|
|
177
|
+
};
|
|
178
|
+
const normalizeToken = (value) => value
|
|
179
|
+
.normalize('NFKD')
|
|
180
|
+
.replace(/\p{Diacritic}/gu, '')
|
|
181
|
+
.toLowerCase();
|
|
182
|
+
const tokenize = (query) => query
|
|
183
|
+
.match(queryTokenPattern)
|
|
184
|
+
?.map(normalizeToken)
|
|
185
|
+
.filter((token) => token.length > 1) ?? [];
|
|
186
|
+
const countOccurrences = (text, token) => {
|
|
187
|
+
let hits = 0;
|
|
188
|
+
let start = 0;
|
|
189
|
+
while (start < text.length) {
|
|
190
|
+
const index = text.indexOf(token, start);
|
|
191
|
+
if (index < 0) {
|
|
192
|
+
break;
|
|
193
|
+
}
|
|
194
|
+
hits += 1;
|
|
195
|
+
start = index + token.length;
|
|
196
|
+
}
|
|
197
|
+
return hits;
|
|
198
|
+
};
|
|
199
|
+
const hashToken = (token, seed) => {
|
|
200
|
+
let hash = seed >>> 0;
|
|
201
|
+
for (let index = 0; index < token.length; index += 1) {
|
|
202
|
+
hash ^= token.charCodeAt(index);
|
|
203
|
+
hash = Math.imul(hash, 16777619) >>> 0;
|
|
204
|
+
}
|
|
205
|
+
return hash >>> 0;
|
|
206
|
+
};
|
|
207
|
+
const createBloom = () => new Uint8Array(bloomBytes);
|
|
208
|
+
const bloomAdd = (bloom, token) => {
|
|
209
|
+
bloomSeeds.forEach((seed) => {
|
|
210
|
+
const bit = hashToken(token, seed) % bloomBitSize;
|
|
211
|
+
bloom[Math.floor(bit / 8)] |= 1 << (bit % 8);
|
|
212
|
+
});
|
|
213
|
+
};
|
|
214
|
+
const bloomMayContain = (bloom, token) => bloomSeeds.every((seed) => {
|
|
215
|
+
const bit = hashToken(token, seed) % bloomBitSize;
|
|
216
|
+
return (bloom[Math.floor(bit / 8)] & (1 << (bit % 8))) !== 0;
|
|
217
|
+
});
|
|
218
|
+
const bloomFromRows = (rows) => {
|
|
219
|
+
const bloom = createBloom();
|
|
220
|
+
rows.forEach((row) => {
|
|
221
|
+
tokenize([row.title, row.path, row.tags.join(' '), row.content].join(' ')).forEach((token) => bloomAdd(bloom, token));
|
|
222
|
+
});
|
|
223
|
+
return bloom;
|
|
224
|
+
};
|
|
225
|
+
const bloomToBase64 = (bloom) => Buffer.from(bloom).toString('base64url');
|
|
226
|
+
const bloomFromBase64 = (value) => {
|
|
227
|
+
try {
|
|
228
|
+
const decoded = Buffer.from(value, 'base64url');
|
|
229
|
+
if (decoded.byteLength === bloomBytes) {
|
|
230
|
+
return {
|
|
231
|
+
bloom: new Uint8Array(decoded),
|
|
232
|
+
valid: true
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
catch {
|
|
237
|
+
// fallback below
|
|
238
|
+
}
|
|
239
|
+
return {
|
|
240
|
+
bloom: createBloom(),
|
|
241
|
+
valid: false
|
|
242
|
+
};
|
|
243
|
+
};
|
|
244
|
+
const computeTextScore = (row, tokens) => {
|
|
245
|
+
if (tokens.length === 0) {
|
|
246
|
+
return 0;
|
|
247
|
+
}
|
|
248
|
+
const title = normalizeToken(row.title);
|
|
249
|
+
const path = normalizeToken(row.path);
|
|
250
|
+
const content = normalizeToken(row.content);
|
|
251
|
+
const tags = normalizeToken(row.tags.join(' '));
|
|
252
|
+
return tokens.reduce((score, token) => {
|
|
253
|
+
const titleHits = countOccurrences(title, token);
|
|
254
|
+
const tagHits = countOccurrences(tags, token);
|
|
255
|
+
const pathHits = countOccurrences(path, token);
|
|
256
|
+
const contentHits = countOccurrences(content, token);
|
|
257
|
+
return score + titleHits * 5 + tagHits * 4 + pathHits * 2 + Math.min(contentHits, 5);
|
|
258
|
+
}, 0);
|
|
259
|
+
};
|
|
260
|
+
const toSearchResult = (row, score) => ({
|
|
261
|
+
documentId: row.documentId,
|
|
262
|
+
agentId: row.agentId,
|
|
263
|
+
title: row.title,
|
|
264
|
+
path: row.path,
|
|
265
|
+
chunkId: row.chunkId,
|
|
266
|
+
chunkOrdinal: row.chunkOrdinal,
|
|
267
|
+
content: row.content,
|
|
268
|
+
score,
|
|
269
|
+
textScore: score,
|
|
270
|
+
semanticScore: 0,
|
|
271
|
+
searchMode: 'fts',
|
|
272
|
+
tags: row.tags
|
|
273
|
+
});
|
|
274
|
+
const sortedPackFiles = async (vaultPath) => {
|
|
275
|
+
try {
|
|
276
|
+
const files = await readdir(toPackDirectory(vaultPath));
|
|
277
|
+
return files
|
|
278
|
+
.filter((file) => file.endsWith('.blpk') || file.endsWith('.jsonl.gz'))
|
|
279
|
+
.sort((left, right) => left.localeCompare(right));
|
|
280
|
+
}
|
|
281
|
+
catch (error) {
|
|
282
|
+
if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {
|
|
283
|
+
return [];
|
|
284
|
+
}
|
|
285
|
+
throw error;
|
|
286
|
+
}
|
|
287
|
+
};
|
|
288
|
+
const writeRowsAsPrivatePacks = async (vaultPath, rows, clearExisting, options) => {
|
|
289
|
+
const startedAt = process.hrtime.bigint();
|
|
290
|
+
const directory = toPackDirectory(vaultPath);
|
|
291
|
+
await mkdir(directory, { recursive: true });
|
|
292
|
+
if (clearExisting) {
|
|
293
|
+
const current = await readdir(directory);
|
|
294
|
+
await Promise.all(current
|
|
295
|
+
.filter((name) => name.endsWith('.blpk') || name.endsWith('.jsonl.gz') || name === manifestFileName)
|
|
296
|
+
.map((name) => rm(join(directory, name), { force: true })));
|
|
297
|
+
}
|
|
298
|
+
const chunks = chunkRows(rows, options.rowChunkSize);
|
|
299
|
+
const packIndex = [];
|
|
300
|
+
let inputBytes = 0;
|
|
301
|
+
let outputBytes = 0;
|
|
302
|
+
for (let index = 0; index < chunks.length; index += 1) {
|
|
303
|
+
const chunk = chunks[index];
|
|
304
|
+
const fileName = `pack-${String(index + 1).padStart(4, '0')}.blpk`;
|
|
305
|
+
const serialized = `${chunk.map((row) => JSON.stringify(row)).join('\n')}\n`;
|
|
306
|
+
const compressed = await encodePrivatePack(vaultPath, Buffer.from(serialized, 'utf8'), {
|
|
307
|
+
compressionLevel: options.compressionLevel,
|
|
308
|
+
useDictionary: options.useDictionary
|
|
309
|
+
});
|
|
310
|
+
const tokenBloomB64 = bloomToBase64(bloomFromRows(chunk));
|
|
311
|
+
await writeFile(join(directory, fileName), compressed);
|
|
312
|
+
inputBytes += Buffer.byteLength(serialized, 'utf8');
|
|
313
|
+
outputBytes += compressed.byteLength;
|
|
314
|
+
packIndex.push({
|
|
315
|
+
fileName,
|
|
316
|
+
recordCount: chunk.length,
|
|
317
|
+
agents: Array.from(new Set(chunk.map((row) => row.agentId))).sort((left, right) => left.localeCompare(right)),
|
|
318
|
+
tokenBloomB64
|
|
319
|
+
});
|
|
320
|
+
}
|
|
321
|
+
await writeManifest(vaultPath, {
|
|
322
|
+
version: 3,
|
|
323
|
+
createdAt: new Date().toISOString(),
|
|
324
|
+
packCount: chunks.length,
|
|
325
|
+
recordCount: rows.length,
|
|
326
|
+
format: 'private-v2',
|
|
327
|
+
packIndex,
|
|
328
|
+
packConfig: {
|
|
329
|
+
rowChunkSize: options.rowChunkSize,
|
|
330
|
+
compressionLevel: options.compressionLevel,
|
|
331
|
+
useDictionary: options.useDictionary
|
|
332
|
+
},
|
|
333
|
+
compression: {
|
|
334
|
+
inputBytes,
|
|
335
|
+
outputBytes,
|
|
336
|
+
ratio: outputBytes / Math.max(inputBytes, 1),
|
|
337
|
+
savedBytes: Math.max(inputBytes - outputBytes, 0)
|
|
338
|
+
}
|
|
339
|
+
});
|
|
340
|
+
const durationMs = Number(process.hrtime.bigint() - startedAt) / 1_000_000;
|
|
341
|
+
const safeInput = Math.max(inputBytes, 1);
|
|
342
|
+
const savedBytes = Math.max(inputBytes - outputBytes, 0);
|
|
343
|
+
return {
|
|
344
|
+
packCount: chunks.length,
|
|
345
|
+
recordCount: rows.length,
|
|
346
|
+
compression: {
|
|
347
|
+
inputBytes,
|
|
348
|
+
outputBytes,
|
|
349
|
+
ratio: outputBytes / safeInput,
|
|
350
|
+
savedBytes
|
|
351
|
+
},
|
|
352
|
+
durationMs
|
|
353
|
+
};
|
|
354
|
+
};
|
|
355
|
+
const selectCandidatePackFiles = async (vaultPath, tokens, agentId) => {
|
|
356
|
+
const allFiles = await sortedPackFiles(vaultPath);
|
|
357
|
+
if (allFiles.length === 0) {
|
|
358
|
+
return [];
|
|
359
|
+
}
|
|
360
|
+
const manifest = await readManifest(vaultPath);
|
|
361
|
+
if (!manifest || manifest.version !== 3 || !Array.isArray(manifest.packIndex)) {
|
|
362
|
+
return allFiles;
|
|
363
|
+
}
|
|
364
|
+
const normalizedAgent = agentId?.trim();
|
|
365
|
+
const byAgent = manifest.packIndex.filter((entry) => normalizedAgent ? entry.agents.includes(normalizedAgent) : true);
|
|
366
|
+
if (tokens.length === 0) {
|
|
367
|
+
return byAgent.map((entry) => entry.fileName);
|
|
368
|
+
}
|
|
369
|
+
let hasInvalidBloomIndex = false;
|
|
370
|
+
const byToken = byAgent.filter((entry) => {
|
|
371
|
+
const decoded = bloomFromBase64(entry.tokenBloomB64);
|
|
372
|
+
if (!decoded.valid) {
|
|
373
|
+
hasInvalidBloomIndex = true;
|
|
374
|
+
return true;
|
|
375
|
+
}
|
|
376
|
+
return tokens.some((token) => bloomMayContain(decoded.bloom, token));
|
|
377
|
+
});
|
|
378
|
+
// Lossless guarantee: if compressed metadata is partially invalid, do not prune packs.
|
|
379
|
+
if (hasInvalidBloomIndex) {
|
|
380
|
+
return byAgent.map((entry) => entry.fileName);
|
|
381
|
+
}
|
|
382
|
+
if (byToken.length > 0) {
|
|
383
|
+
return byToken.map((entry) => entry.fileName);
|
|
384
|
+
}
|
|
385
|
+
return byAgent.length > 0 ? byAgent.map((entry) => entry.fileName) : allFiles;
|
|
386
|
+
};
|
|
387
|
+
export const buildSearchPacks = async (vaultPath, documents, options) => {
|
|
388
|
+
const resolvedOptions = {
|
|
389
|
+
rowChunkSize: options?.rowChunkSize ?? defaultBuildOptions.rowChunkSize,
|
|
390
|
+
compressionLevel: options?.compressionLevel ?? defaultBuildOptions.compressionLevel,
|
|
391
|
+
useDictionary: options?.useDictionary ?? defaultBuildOptions.useDictionary
|
|
392
|
+
};
|
|
393
|
+
return writeRowsAsPrivatePacks(vaultPath, toRows(documents), true, resolvedOptions);
|
|
394
|
+
};
|
|
395
|
+
export const ensurePrivatePacksFromLegacyIndex = async (vaultPath) => {
|
|
396
|
+
const files = await sortedPackFiles(vaultPath);
|
|
397
|
+
if (files.some((file) => file.endsWith('.blpk'))) {
|
|
398
|
+
return { imported: false };
|
|
399
|
+
}
|
|
400
|
+
const legacyPackFiles = files.filter((file) => file.endsWith('.jsonl.gz'));
|
|
401
|
+
if (legacyPackFiles.length > 0) {
|
|
402
|
+
const rows = [];
|
|
403
|
+
for (const file of legacyPackFiles) {
|
|
404
|
+
const parsed = await parseRowsFromPack(vaultPath, await readFile(join(toPackDirectory(vaultPath), file)));
|
|
405
|
+
rows.push(...parsed);
|
|
406
|
+
}
|
|
407
|
+
const report = await writeRowsAsPrivatePacks(vaultPath, rows, true, defaultBuildOptions);
|
|
408
|
+
return {
|
|
409
|
+
imported: true,
|
|
410
|
+
source: 'legacy-packs',
|
|
411
|
+
...report
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
return { imported: false };
|
|
415
|
+
};
|
|
416
|
+
export const toSearchPackBuildOptions = (config) => ({
|
|
417
|
+
rowChunkSize: config.searchPack.rowChunkSize,
|
|
418
|
+
compressionLevel: config.searchPack.compressionLevel,
|
|
419
|
+
useDictionary: config.searchPack.useDictionary
|
|
420
|
+
});
|
|
421
|
+
export const searchInPacks = async (vaultPath, query, limit, agentId) => {
|
|
422
|
+
const normalizedAgent = agentId?.trim();
|
|
423
|
+
const tokens = tokenize(query);
|
|
424
|
+
if (limit <= 0 || tokens.length === 0) {
|
|
425
|
+
return [];
|
|
426
|
+
}
|
|
427
|
+
const files = await selectCandidatePackFiles(vaultPath, tokens, normalizedAgent);
|
|
428
|
+
if (files.length === 0) {
|
|
429
|
+
return [];
|
|
430
|
+
}
|
|
431
|
+
const scored = [];
|
|
432
|
+
for (const file of files) {
|
|
433
|
+
const rows = await parseRowsFromPack(vaultPath, await readFile(join(toPackDirectory(vaultPath), file)));
|
|
434
|
+
const traversal = middleOutIndices(rows.length, Math.floor(rows.length / 2));
|
|
435
|
+
traversal.forEach((rowIndex) => {
|
|
436
|
+
const row = rows[rowIndex];
|
|
437
|
+
if (!row) {
|
|
438
|
+
return;
|
|
439
|
+
}
|
|
440
|
+
if (normalizedAgent && row.agentId !== normalizedAgent) {
|
|
441
|
+
return;
|
|
442
|
+
}
|
|
443
|
+
const score = computeTextScore(row, tokens);
|
|
444
|
+
if (score > 0) {
|
|
445
|
+
scored.push(toSearchResult(row, score));
|
|
446
|
+
}
|
|
447
|
+
});
|
|
448
|
+
}
|
|
449
|
+
return scored
|
|
450
|
+
.sort((left, right) => right.score - left.score || left.title.localeCompare(right.title))
|
|
451
|
+
.slice(0, limit);
|
|
452
|
+
};
|