@andespindola/brainlink 0.1.0-beta.13 → 0.1.0-beta.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -60,6 +60,7 @@ Markdown is the source of truth. `.brainlink/brainlink.db` is only a rebuildable
|
|
|
60
60
|
Brainlink now keeps an automatic rollback snapshot at `.brainlink/brainlink.db.backup` plus rotating snapshots in `.brainlink/brainlink.db.backup.snapshots/`. If the main SQLite file is corrupted, Brainlink automatically restores the newest valid snapshot (or recreates a clean index when no snapshot exists).
|
|
61
61
|
After each index run, Brainlink also writes private encrypted search packs at `.brainlink/search-packs/*.blpk`. If SQLite is unavailable, search falls back to these packs automatically.
|
|
62
62
|
Pack decryption uses a Brainlink key from `$BRAINLINK_HOME/keys` or from `BRAINLINK_SEARCH_PACK_KEY` when explicitly configured.
|
|
63
|
+
On upgrade, if a legacy SQLite index exists without private packs, Brainlink imports index rows into `.blpk` automatically on first search/context access.
|
|
63
64
|
|
|
64
65
|
## Features
|
|
65
66
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { stat } from 'node:fs/promises';
|
|
2
2
|
import { join } from 'node:path';
|
|
3
3
|
import { ensureVault } from '../infrastructure/file-system-vault.js';
|
|
4
|
-
import { searchInPacks } from '../infrastructure/search-packs.js';
|
|
4
|
+
import { ensurePrivatePacksFromLegacyIndex, searchInPacks } from '../infrastructure/search-packs.js';
|
|
5
5
|
import { openSqliteIndex } from '../infrastructure/sqlite-index.js';
|
|
6
6
|
import { createEmbeddingProvider } from '../domain/embeddings.js';
|
|
7
7
|
import { loadBrainlinkConfig, sanitizeSearchMode } from '../infrastructure/config.js';
|
|
@@ -47,6 +47,7 @@ export const searchKnowledge = async (vaultPath, query, limit, agentId, mode) =>
|
|
|
47
47
|
const absoluteVaultPath = await ensureVault(vaultPath);
|
|
48
48
|
const config = await loadBrainlinkConfig();
|
|
49
49
|
const searchMode = sanitizeSearchMode(mode, config.defaultSearchMode);
|
|
50
|
+
await ensurePrivatePacksFromLegacyIndex(absoluteVaultPath);
|
|
50
51
|
const cacheKey = searchMode === 'hybrid' ? toCacheKey(absoluteVaultPath, query, limit, agentId) : undefined;
|
|
51
52
|
const indexMtimeMs = cacheKey ? await readIndexMtimeMs(absoluteVaultPath) : 0;
|
|
52
53
|
const cached = cacheKey ? cacheGet(cacheKey, indexMtimeMs) : undefined;
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
import Database from 'better-sqlite3';
|
|
1
2
|
import { gunzipSync } from 'node:zlib';
|
|
2
3
|
import { mkdir, readdir, readFile, rm, writeFile } from 'node:fs/promises';
|
|
3
4
|
import { join } from 'node:path';
|
|
5
|
+
import { existsSync } from 'node:fs';
|
|
4
6
|
import { decodePrivatePack, encodePrivatePack, isPrivatePackPayload } from './private-pack-codec.js';
|
|
5
7
|
const packsDirectoryName = 'search-packs';
|
|
6
8
|
const manifestFileName = 'manifest.json';
|
|
@@ -8,6 +10,7 @@ const rowChunkSize = 5_000;
|
|
|
8
10
|
const queryTokenPattern = /[\p{L}\p{N}_-]+/gu;
|
|
9
11
|
const toPackDirectory = (vaultPath) => join(vaultPath, '.brainlink', packsDirectoryName);
|
|
10
12
|
const toManifestPath = (vaultPath) => join(toPackDirectory(vaultPath), manifestFileName);
|
|
13
|
+
const toDatabasePath = (vaultPath) => join(vaultPath, '.brainlink', 'brainlink.db');
|
|
11
14
|
const parseRowsFromPack = async (vaultPath, content) => {
|
|
12
15
|
const raw = isPrivatePackPayload(content) ? await decodePrivatePack(vaultPath, content) : gunzipSync(content);
|
|
13
16
|
return raw
|
|
@@ -29,6 +32,15 @@ const toRows = (documents) => documents.flatMap((document) => document.chunks.ma
|
|
|
29
32
|
const writeManifest = async (vaultPath, manifest) => {
|
|
30
33
|
await writeFile(toManifestPath(vaultPath), `${JSON.stringify(manifest, null, 2)}\n`, 'utf8');
|
|
31
34
|
};
|
|
35
|
+
const parseTags = (value) => {
|
|
36
|
+
try {
|
|
37
|
+
const parsed = JSON.parse(value);
|
|
38
|
+
return Array.isArray(parsed) ? parsed.filter((item) => typeof item === 'string') : [];
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
return [];
|
|
42
|
+
}
|
|
43
|
+
};
|
|
32
44
|
const chunkRows = (rows, size) => {
|
|
33
45
|
const chunks = [];
|
|
34
46
|
for (let index = 0; index < rows.length; index += size) {
|
|
@@ -100,14 +112,15 @@ const sortedPackFiles = async (vaultPath) => {
|
|
|
100
112
|
throw error;
|
|
101
113
|
}
|
|
102
114
|
};
|
|
103
|
-
|
|
115
|
+
const writeRowsAsPrivatePacks = async (vaultPath, rows, clearExisting) => {
|
|
104
116
|
const directory = toPackDirectory(vaultPath);
|
|
105
|
-
const rows = toRows(documents);
|
|
106
117
|
await mkdir(directory, { recursive: true });
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
118
|
+
if (clearExisting) {
|
|
119
|
+
const current = await readdir(directory);
|
|
120
|
+
await Promise.all(current
|
|
121
|
+
.filter((name) => name.endsWith('.blpk') || name.endsWith('.jsonl.gz') || name === manifestFileName)
|
|
122
|
+
.map((name) => rm(join(directory, name), { force: true })));
|
|
123
|
+
}
|
|
111
124
|
const chunks = chunkRows(rows, rowChunkSize);
|
|
112
125
|
await Promise.all(chunks.map(async (chunk, index) => {
|
|
113
126
|
const fileName = `pack-${String(index + 1).padStart(4, '0')}.blpk`;
|
|
@@ -127,6 +140,100 @@ export const buildSearchPacks = async (vaultPath, documents) => {
|
|
|
127
140
|
recordCount: rows.length
|
|
128
141
|
};
|
|
129
142
|
};
|
|
143
|
+
const tableExists = (database, table) => {
|
|
144
|
+
const row = database.prepare("SELECT name FROM sqlite_master WHERE type = 'table' AND name = ?").get(table);
|
|
145
|
+
return row?.name === table;
|
|
146
|
+
};
|
|
147
|
+
const tableColumns = (database, table) => {
|
|
148
|
+
const rows = database.prepare(`SELECT name FROM pragma_table_info('${table.replaceAll("'", "''")}')`).all();
|
|
149
|
+
return new Set(rows.map((row) => row.name));
|
|
150
|
+
};
|
|
151
|
+
const loadRowsFromLegacySqlite = (vaultPath) => {
|
|
152
|
+
const databasePath = toDatabasePath(vaultPath);
|
|
153
|
+
if (!existsSync(databasePath)) {
|
|
154
|
+
return [];
|
|
155
|
+
}
|
|
156
|
+
const database = new Database(databasePath, { readonly: true, fileMustExist: true });
|
|
157
|
+
try {
|
|
158
|
+
if (!tableExists(database, 'documents') || !tableExists(database, 'chunks')) {
|
|
159
|
+
return [];
|
|
160
|
+
}
|
|
161
|
+
const documentColumns = tableColumns(database, 'documents');
|
|
162
|
+
const chunkColumns = tableColumns(database, 'chunks');
|
|
163
|
+
if (!documentColumns.has('id') || !documentColumns.has('title') || !chunkColumns.has('document_id')) {
|
|
164
|
+
return [];
|
|
165
|
+
}
|
|
166
|
+
const agentExpr = documentColumns.has('agent_id') ? 'documents.agent_id' : "'shared'";
|
|
167
|
+
const pathExpr = documentColumns.has('path') ? 'documents.path' : "documents.title";
|
|
168
|
+
const tagsExpr = documentColumns.has('tags_json') ? 'documents.tags_json' : "'[]'";
|
|
169
|
+
const chunkIdExpr = chunkColumns.has('id') ? 'chunks.id' : "documents.id || ':' || chunks.rowid";
|
|
170
|
+
const chunkContentExpr = chunkColumns.has('content')
|
|
171
|
+
? 'chunks.content'
|
|
172
|
+
: documentColumns.has('content')
|
|
173
|
+
? 'documents.content'
|
|
174
|
+
: "''";
|
|
175
|
+
const chunkOrderExpr = chunkColumns.has('ordinal') ? 'chunks.ordinal' : 'chunks.rowid';
|
|
176
|
+
const statement = database.prepare(`
|
|
177
|
+
SELECT
|
|
178
|
+
documents.id AS document_id,
|
|
179
|
+
${agentExpr} AS agent_id,
|
|
180
|
+
documents.title AS title,
|
|
181
|
+
${pathExpr} AS path,
|
|
182
|
+
${chunkIdExpr} AS chunk_id,
|
|
183
|
+
${chunkContentExpr} AS content,
|
|
184
|
+
${tagsExpr} AS tags_json
|
|
185
|
+
FROM chunks
|
|
186
|
+
JOIN documents ON documents.id = chunks.document_id
|
|
187
|
+
ORDER BY documents.title, ${chunkOrderExpr}
|
|
188
|
+
`);
|
|
189
|
+
const rows = statement.all();
|
|
190
|
+
return rows.map((row) => ({
|
|
191
|
+
documentId: row.document_id,
|
|
192
|
+
agentId: typeof row.agent_id === 'string' && row.agent_id.length > 0 ? row.agent_id : 'shared',
|
|
193
|
+
title: row.title,
|
|
194
|
+
path: row.path,
|
|
195
|
+
chunkId: row.chunk_id,
|
|
196
|
+
content: row.content ?? '',
|
|
197
|
+
tags: parseTags(row.tags_json)
|
|
198
|
+
}));
|
|
199
|
+
}
|
|
200
|
+
finally {
|
|
201
|
+
database.close();
|
|
202
|
+
}
|
|
203
|
+
};
|
|
204
|
+
export const buildSearchPacks = async (vaultPath, documents) => {
|
|
205
|
+
return writeRowsAsPrivatePacks(vaultPath, toRows(documents), true);
|
|
206
|
+
};
|
|
207
|
+
export const ensurePrivatePacksFromLegacyIndex = async (vaultPath) => {
|
|
208
|
+
const files = await sortedPackFiles(vaultPath);
|
|
209
|
+
if (files.some((file) => file.endsWith('.blpk'))) {
|
|
210
|
+
return { imported: false };
|
|
211
|
+
}
|
|
212
|
+
const legacyPackFiles = files.filter((file) => file.endsWith('.jsonl.gz'));
|
|
213
|
+
if (legacyPackFiles.length > 0) {
|
|
214
|
+
const rows = [];
|
|
215
|
+
for (const file of legacyPackFiles) {
|
|
216
|
+
const parsed = await parseRowsFromPack(vaultPath, await readFile(join(toPackDirectory(vaultPath), file)));
|
|
217
|
+
rows.push(...parsed);
|
|
218
|
+
}
|
|
219
|
+
const report = await writeRowsAsPrivatePacks(vaultPath, rows, true);
|
|
220
|
+
return {
|
|
221
|
+
imported: true,
|
|
222
|
+
source: 'legacy-packs',
|
|
223
|
+
...report
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
const legacyRows = loadRowsFromLegacySqlite(vaultPath);
|
|
227
|
+
if (legacyRows.length === 0) {
|
|
228
|
+
return { imported: false };
|
|
229
|
+
}
|
|
230
|
+
const report = await writeRowsAsPrivatePacks(vaultPath, legacyRows, true);
|
|
231
|
+
return {
|
|
232
|
+
imported: true,
|
|
233
|
+
source: 'legacy-sqlite',
|
|
234
|
+
...report
|
|
235
|
+
};
|
|
236
|
+
};
|
|
130
237
|
export const searchInPacks = async (vaultPath, query, limit, agentId) => {
|
|
131
238
|
const normalizedAgent = agentId?.trim();
|
|
132
239
|
const tokens = tokenize(query);
|
package/docs/AGENT_USAGE.md
CHANGED
|
@@ -637,6 +637,7 @@ The HTTP API is read-only. Use the CLI for writes and indexing.
|
|
|
637
637
|
Brainlink maintains an automatic SQLite rollback snapshot at `.brainlink/brainlink.db.backup` and rotating snapshots in `.brainlink/brainlink.db.backup.snapshots/`. When `.brainlink/brainlink.db` is corrupted, Brainlink restores the newest valid snapshot automatically or recreates a clean index if no snapshot exists yet.
|
|
638
638
|
Indexing also writes private encrypted search packs at `.brainlink/search-packs/*.blpk`; when SQLite cannot be opened, Brainlink falls back to pack-based search automatically.
|
|
639
639
|
Pack decryption keys are resolved from `$BRAINLINK_HOME/keys` (or `BRAINLINK_SEARCH_PACK_KEY` when explicitly set).
|
|
640
|
+
For legacy installations, when SQLite already exists but private packs are missing, Brainlink auto-imports index context rows from `brainlink.db` into `.blpk` on first retrieval.
|
|
640
641
|
|
|
641
642
|
## Agent Integration Contract
|
|
642
643
|
|
package/docs/ARCHITECTURE.md
CHANGED
|
@@ -303,6 +303,7 @@ Hybrid retrieval also uses a short-lived in-memory cache keyed by vault/query/ag
|
|
|
303
303
|
Brainlink also writes a local rollback snapshot (`.brainlink/brainlink.db.backup`) plus rotating point-in-time snapshots (`.brainlink/brainlink.db.backup.snapshots/`) after successful indexing. On corruption detection (`quick_check`/SQLite malformed errors), Brainlink restores the newest valid snapshot automatically before reopening the index.
|
|
304
304
|
Indexing additionally exports private encrypted pack files (`.brainlink/search-packs/*.blpk`) from indexed chunks. Search falls back to these packs when SQLite is unavailable, preserving retrieval continuity in degraded mode.
|
|
305
305
|
Pack encryption keys are resolved from `$BRAINLINK_HOME/keys` or from `BRAINLINK_SEARCH_PACK_KEY` when configured.
|
|
306
|
+
Legacy upgrades are automatic: when a vault has `brainlink.db` but no `.blpk` packs yet, Brainlink extracts indexed context rows from SQLite and writes private packs on first retrieval flow.
|
|
306
307
|
|
|
307
308
|
### CLI First
|
|
308
309
|
|
package/package.json
CHANGED