@andespindola/brainlink 0.1.0-beta.14 → 0.1.0-beta.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +5 -5
- package/CHANGELOG.md +2 -2
- package/CONTRIBUTING.md +2 -2
- package/README.md +13 -15
- package/SECURITY.md +1 -1
- package/dist/application/analyze-vault.js +1 -15
- package/dist/application/frontend/client-css.js +2 -0
- package/dist/application/frontend/client-js.js +96 -9
- package/dist/application/get-graph-layout.js +2 -2
- package/dist/application/get-graph-node.js +3 -3
- package/dist/application/get-graph-summary.js +3 -3
- package/dist/application/get-graph.js +3 -3
- package/dist/application/index-vault.js +5 -5
- package/dist/application/list-agents.js +3 -3
- package/dist/application/list-links.js +5 -5
- package/dist/application/search-graph-node-ids.js +3 -3
- package/dist/application/search-knowledge.js +4 -5
- package/dist/benchmarks/large-vault.js +1 -1
- package/dist/infrastructure/file-index.js +291 -0
- package/dist/infrastructure/search-packs.js +1 -83
- package/docs/AGENT_USAGE.md +14 -16
- package/docs/ARCHITECTURE.md +19 -28
- package/package.json +1 -3
- package/dist/infrastructure/sqlite/document-writer.js +0 -51
- package/dist/infrastructure/sqlite/graph-reader.js +0 -267
- package/dist/infrastructure/sqlite/recovery.js +0 -163
- package/dist/infrastructure/sqlite/schema.js +0 -114
- package/dist/infrastructure/sqlite/search-reader.js +0 -188
- package/dist/infrastructure/sqlite/types.js +0 -1
- package/dist/infrastructure/sqlite-index.js +0 -38
|
@@ -1,188 +0,0 @@
|
|
|
1
|
-
import { sanitizeAgentId } from '../../domain/agents.js';
|
|
2
|
-
import { cosineSimilarity, createEmbeddingBuckets } from '../../domain/embeddings.js';
|
|
3
|
-
const toFtsQuery = (query) => query
|
|
4
|
-
.toLowerCase()
|
|
5
|
-
.match(/[\p{L}\p{N}_-]+/gu)
|
|
6
|
-
?.map((term) => `"${term.replaceAll('"', '""')}"*`)
|
|
7
|
-
.join(' OR ') ?? '';
|
|
8
|
-
const normalizeAgentFilter = (agentId) => agentId ? sanitizeAgentId(agentId) : undefined;
|
|
9
|
-
const parseJsonArray = (value) => {
|
|
10
|
-
if (!value) {
|
|
11
|
-
return [];
|
|
12
|
-
}
|
|
13
|
-
try {
|
|
14
|
-
const parsed = JSON.parse(value);
|
|
15
|
-
return Array.isArray(parsed) ? parsed : [];
|
|
16
|
-
}
|
|
17
|
-
catch {
|
|
18
|
-
return [];
|
|
19
|
-
}
|
|
20
|
-
};
|
|
21
|
-
const toTextScore = (index, total) => total === 0 ? 0 : 1 - index / (total + 1);
|
|
22
|
-
const toSearchResult = (row, score, textScore, semanticScore, searchMode) => ({
|
|
23
|
-
documentId: row.document_id,
|
|
24
|
-
agentId: row.agent_id,
|
|
25
|
-
title: row.title,
|
|
26
|
-
path: row.path,
|
|
27
|
-
chunkId: row.chunk_id,
|
|
28
|
-
content: row.content,
|
|
29
|
-
score,
|
|
30
|
-
textScore,
|
|
31
|
-
semanticScore,
|
|
32
|
-
searchMode,
|
|
33
|
-
tags: parseJsonArray(row.tags_json).filter((value) => typeof value === 'string')
|
|
34
|
-
});
|
|
35
|
-
const sortByScore = (results) => [...results].sort((left, right) => right.score - left.score || left.title.localeCompare(right.title));
|
|
36
|
-
const mergeHybridResults = (ftsResults, semanticResults, limit) => {
|
|
37
|
-
const rows = new Map();
|
|
38
|
-
[...semanticResults, ...ftsResults].forEach((result) => {
|
|
39
|
-
const current = rows.get(result.chunkId);
|
|
40
|
-
const textScore = Math.max(current?.textScore ?? 0, result.textScore);
|
|
41
|
-
const semanticScore = Math.max(current?.semanticScore ?? 0, result.semanticScore);
|
|
42
|
-
const score = textScore * 0.62 + semanticScore * 0.38;
|
|
43
|
-
rows.set(result.chunkId, {
|
|
44
|
-
...result,
|
|
45
|
-
score,
|
|
46
|
-
textScore,
|
|
47
|
-
semanticScore,
|
|
48
|
-
searchMode: 'hybrid'
|
|
49
|
-
});
|
|
50
|
-
});
|
|
51
|
-
return sortByScore(Array.from(rows.values())).slice(0, limit);
|
|
52
|
-
};
|
|
53
|
-
const placeholders = (count) => Array.from({ length: count }, () => '?').join(', ');
|
|
54
|
-
const readBucketedSemanticRows = (database, normalizedAgentId, queryEmbedding, limit) => {
|
|
55
|
-
const buckets = createEmbeddingBuckets(queryEmbedding);
|
|
56
|
-
if (buckets.length === 0) {
|
|
57
|
-
return [];
|
|
58
|
-
}
|
|
59
|
-
const agentFilter = normalizedAgentId ? 'AND documents.agent_id = ?' : '';
|
|
60
|
-
const params = normalizedAgentId ? [...buckets, normalizedAgentId, limit] : [...buckets, limit];
|
|
61
|
-
return database
|
|
62
|
-
.prepare(`
|
|
63
|
-
SELECT
|
|
64
|
-
documents.id AS document_id,
|
|
65
|
-
documents.agent_id AS agent_id,
|
|
66
|
-
documents.title AS title,
|
|
67
|
-
documents.path AS path,
|
|
68
|
-
chunks.id AS chunk_id,
|
|
69
|
-
chunks.content AS content,
|
|
70
|
-
documents.tags_json AS tags_json,
|
|
71
|
-
chunks.embedding_json AS embedding_json,
|
|
72
|
-
count(*) AS score
|
|
73
|
-
FROM embedding_buckets
|
|
74
|
-
JOIN chunks ON chunks.id = embedding_buckets.chunk_id
|
|
75
|
-
JOIN documents ON documents.id = chunks.document_id
|
|
76
|
-
WHERE embedding_buckets.bucket IN (${placeholders(buckets.length)})
|
|
77
|
-
${agentFilter}
|
|
78
|
-
GROUP BY chunks.id
|
|
79
|
-
ORDER BY score DESC, chunks.token_count ASC, documents.title ASC
|
|
80
|
-
LIMIT ?
|
|
81
|
-
`)
|
|
82
|
-
.all(...params);
|
|
83
|
-
};
|
|
84
|
-
export const createSearchReader = (database) => (() => {
|
|
85
|
-
const ftsRowsStatement = database.prepare(`
|
|
86
|
-
SELECT
|
|
87
|
-
documents.id AS document_id,
|
|
88
|
-
documents.agent_id AS agent_id,
|
|
89
|
-
documents.title AS title,
|
|
90
|
-
documents.path AS path,
|
|
91
|
-
chunks_fts.chunk_id AS chunk_id,
|
|
92
|
-
chunks_fts.content AS content,
|
|
93
|
-
bm25(chunks_fts) * -1 AS score,
|
|
94
|
-
documents.tags_json AS tags_json
|
|
95
|
-
FROM chunks_fts
|
|
96
|
-
JOIN documents ON documents.id = chunks_fts.document_id
|
|
97
|
-
WHERE chunks_fts MATCH ?
|
|
98
|
-
ORDER BY bm25(chunks_fts)
|
|
99
|
-
LIMIT ?
|
|
100
|
-
`);
|
|
101
|
-
const ftsRowsByAgentStatement = database.prepare(`
|
|
102
|
-
SELECT
|
|
103
|
-
documents.id AS document_id,
|
|
104
|
-
documents.agent_id AS agent_id,
|
|
105
|
-
documents.title AS title,
|
|
106
|
-
documents.path AS path,
|
|
107
|
-
chunks_fts.chunk_id AS chunk_id,
|
|
108
|
-
chunks_fts.content AS content,
|
|
109
|
-
bm25(chunks_fts) * -1 AS score,
|
|
110
|
-
documents.tags_json AS tags_json
|
|
111
|
-
FROM chunks_fts
|
|
112
|
-
JOIN documents ON documents.id = chunks_fts.document_id
|
|
113
|
-
WHERE chunks_fts MATCH ?
|
|
114
|
-
AND documents.agent_id = ?
|
|
115
|
-
ORDER BY bm25(chunks_fts)
|
|
116
|
-
LIMIT ?
|
|
117
|
-
`);
|
|
118
|
-
const semanticRowsStatement = database.prepare(`
|
|
119
|
-
SELECT
|
|
120
|
-
documents.id AS document_id,
|
|
121
|
-
documents.agent_id AS agent_id,
|
|
122
|
-
documents.title AS title,
|
|
123
|
-
documents.path AS path,
|
|
124
|
-
chunks.id AS chunk_id,
|
|
125
|
-
chunks.content AS content,
|
|
126
|
-
documents.tags_json AS tags_json,
|
|
127
|
-
chunks.embedding_json AS embedding_json
|
|
128
|
-
FROM chunks
|
|
129
|
-
JOIN documents ON documents.id = chunks.document_id
|
|
130
|
-
ORDER BY chunks.token_count ASC, documents.title ASC
|
|
131
|
-
LIMIT ?
|
|
132
|
-
`);
|
|
133
|
-
const semanticRowsByAgentStatement = database.prepare(`
|
|
134
|
-
SELECT
|
|
135
|
-
documents.id AS document_id,
|
|
136
|
-
documents.agent_id AS agent_id,
|
|
137
|
-
documents.title AS title,
|
|
138
|
-
documents.path AS path,
|
|
139
|
-
chunks.id AS chunk_id,
|
|
140
|
-
chunks.content AS content,
|
|
141
|
-
documents.tags_json AS tags_json,
|
|
142
|
-
chunks.embedding_json AS embedding_json
|
|
143
|
-
FROM chunks
|
|
144
|
-
JOIN documents ON documents.id = chunks.document_id
|
|
145
|
-
WHERE documents.agent_id = ?
|
|
146
|
-
ORDER BY chunks.token_count ASC, documents.title ASC
|
|
147
|
-
LIMIT ?
|
|
148
|
-
`);
|
|
149
|
-
const readAllSemanticRowsForLimit = (normalizedAgentId, limit) => (normalizedAgentId
|
|
150
|
-
? semanticRowsByAgentStatement.all(normalizedAgentId, limit)
|
|
151
|
-
: semanticRowsStatement.all(limit));
|
|
152
|
-
const readSemanticRows = (normalizedAgentId, queryEmbedding, limit) => {
|
|
153
|
-
const candidateLimit = Math.min(Math.max(limit * 96, 768), 12_000);
|
|
154
|
-
const bucketedRows = readBucketedSemanticRows(database, normalizedAgentId, queryEmbedding, candidateLimit);
|
|
155
|
-
return bucketedRows.length > 0 ? bucketedRows : readAllSemanticRowsForLimit(normalizedAgentId, candidateLimit);
|
|
156
|
-
};
|
|
157
|
-
return {
|
|
158
|
-
search: (query, limit, agentId, mode = 'hybrid', queryEmbedding = []) => {
|
|
159
|
-
const normalizedQuery = query.trim();
|
|
160
|
-
if (!normalizedQuery || limit <= 0) {
|
|
161
|
-
return [];
|
|
162
|
-
}
|
|
163
|
-
const normalizedAgentId = normalizeAgentFilter(agentId);
|
|
164
|
-
const ftsQuery = toFtsQuery(query);
|
|
165
|
-
const expandedLimit = Math.max(limit * 4, 24);
|
|
166
|
-
const ftsRows = mode === 'semantic' || !ftsQuery
|
|
167
|
-
? []
|
|
168
|
-
: (normalizedAgentId
|
|
169
|
-
? ftsRowsByAgentStatement.all(ftsQuery, normalizedAgentId, expandedLimit)
|
|
170
|
-
: ftsRowsStatement.all(ftsQuery, expandedLimit));
|
|
171
|
-
const ftsResults = ftsRows.map((row, index) => toSearchResult(row, toTextScore(index, ftsRows.length), toTextScore(index, ftsRows.length), 0, 'fts'));
|
|
172
|
-
const semanticRows = mode === 'fts' || queryEmbedding.length === 0 ? [] : readSemanticRows(normalizedAgentId, queryEmbedding, expandedLimit);
|
|
173
|
-
const semanticResults = sortByScore(semanticRows
|
|
174
|
-
.map((row) => {
|
|
175
|
-
const semanticScore = Math.max(0, cosineSimilarity(queryEmbedding, parseJsonArray(row.embedding_json).filter((value) => typeof value === 'number')));
|
|
176
|
-
return toSearchResult(row, semanticScore, 0, semanticScore, 'semantic');
|
|
177
|
-
})
|
|
178
|
-
.filter((result) => result.semanticScore > 0)).slice(0, expandedLimit);
|
|
179
|
-
if (mode === 'fts') {
|
|
180
|
-
return ftsResults.slice(0, limit);
|
|
181
|
-
}
|
|
182
|
-
if (mode === 'semantic') {
|
|
183
|
-
return semanticResults.slice(0, limit);
|
|
184
|
-
}
|
|
185
|
-
return mergeHybridResults(ftsResults, semanticResults, limit);
|
|
186
|
-
}
|
|
187
|
-
};
|
|
188
|
-
})();
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
import { chmodSync } from 'node:fs';
|
|
2
|
-
import { join } from 'node:path';
|
|
3
|
-
import { createIndexWriter } from './sqlite/document-writer.js';
|
|
4
|
-
import { createGraphReader } from './sqlite/graph-reader.js';
|
|
5
|
-
import { createRecoverySnapshot, openDatabaseWithRecovery } from './sqlite/recovery.js';
|
|
6
|
-
import { createSchema } from './sqlite/schema.js';
|
|
7
|
-
import { createSearchReader } from './sqlite/search-reader.js';
|
|
8
|
-
export const openSqliteIndex = (vaultPath) => {
|
|
9
|
-
const databasePath = join(vaultPath, '.brainlink', 'brainlink.db');
|
|
10
|
-
const backupPath = join(vaultPath, '.brainlink', 'brainlink.db.backup');
|
|
11
|
-
const database = openDatabaseWithRecovery(databasePath, backupPath);
|
|
12
|
-
const indexWriter = createIndexWriter(database);
|
|
13
|
-
chmodSync(databasePath, 0o600);
|
|
14
|
-
database.exec(`
|
|
15
|
-
PRAGMA foreign_keys = ON;
|
|
16
|
-
PRAGMA journal_mode = WAL;
|
|
17
|
-
PRAGMA synchronous = NORMAL;
|
|
18
|
-
PRAGMA temp_store = MEMORY;
|
|
19
|
-
PRAGMA busy_timeout = 5000;
|
|
20
|
-
PRAGMA cache_size = -20000;
|
|
21
|
-
`);
|
|
22
|
-
createSchema(database);
|
|
23
|
-
return {
|
|
24
|
-
reset: () => indexWriter.reset(),
|
|
25
|
-
saveDocuments: (documents) => {
|
|
26
|
-
indexWriter.saveDocuments(documents);
|
|
27
|
-
try {
|
|
28
|
-
createRecoverySnapshot(database, backupPath);
|
|
29
|
-
}
|
|
30
|
-
catch {
|
|
31
|
-
// Snapshot creation is best-effort. Indexing success should not fail because of backup I/O.
|
|
32
|
-
}
|
|
33
|
-
},
|
|
34
|
-
...createSearchReader(database),
|
|
35
|
-
...createGraphReader(database),
|
|
36
|
-
close: () => database.close()
|
|
37
|
-
};
|
|
38
|
-
};
|