@andespindola/brainlink 0.1.0-beta.1 → 0.1.0-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/CHANGELOG.md +46 -0
  2. package/README.md +241 -10
  3. package/dist/application/add-note.js +62 -13
  4. package/dist/application/analyze-vault.js +104 -9
  5. package/dist/application/frontend/client-css.js +154 -71
  6. package/dist/application/frontend/client-html.js +42 -33
  7. package/dist/application/frontend/client-js.js +316 -84
  8. package/dist/application/get-graph-layout.js +22 -7
  9. package/dist/application/get-graph-node.js +12 -0
  10. package/dist/application/get-graph-summary.js +12 -0
  11. package/dist/application/index-vault.js +7 -0
  12. package/dist/application/migrate-vault.js +91 -0
  13. package/dist/application/search-graph-node-ids.js +12 -0
  14. package/dist/application/search-knowledge.js +74 -4
  15. package/dist/application/server/routes.js +27 -1
  16. package/dist/cli/commands/agent-commands.js +412 -0
  17. package/dist/cli/commands/config-commands.js +167 -0
  18. package/dist/cli/commands/read-commands.js +25 -8
  19. package/dist/cli/commands/write-commands.js +173 -4
  20. package/dist/cli/main.js +4 -0
  21. package/dist/cli/runtime.js +5 -2
  22. package/dist/domain/embeddings.js +2 -1
  23. package/dist/domain/graph-layout.js +20 -14
  24. package/dist/domain/markdown.js +36 -4
  25. package/dist/infrastructure/config.js +94 -8
  26. package/dist/infrastructure/file-system-vault.js +15 -0
  27. package/dist/infrastructure/paths.js +9 -1
  28. package/dist/infrastructure/search-packs.js +151 -0
  29. package/dist/infrastructure/session-state.js +172 -0
  30. package/dist/infrastructure/sqlite/graph-reader.js +252 -105
  31. package/dist/infrastructure/sqlite/recovery.js +83 -0
  32. package/dist/infrastructure/sqlite/schema.js +4 -1
  33. package/dist/infrastructure/sqlite/search-reader.js +104 -72
  34. package/dist/infrastructure/sqlite-index.js +16 -3
  35. package/dist/mcp/main.js +11 -3
  36. package/dist/mcp/server.js +17 -2
  37. package/dist/mcp/startup.js +35 -0
  38. package/dist/mcp/tools.js +571 -19
  39. package/docs/AGENT_USAGE.md +87 -3
  40. package/docs/ARCHITECTURE.md +16 -1
  41. package/docs/QUICKSTART.md +104 -0
  42. package/docs/RELEASE.md +3 -3
  43. package/package.json +1 -1
@@ -10,111 +10,258 @@ const toGraphLink = (row) => ({
10
10
  });
11
11
  const normalizeAgentFilter = (agentId) => agentId ? sanitizeAgentId(agentId) : undefined;
12
12
  const toTitleKey = (title) => title.toLowerCase();
13
- export const createGraphReader = (database) => ({
14
- listLinks: (agentId) => {
15
- const normalizedAgentId = normalizeAgentFilter(agentId);
16
- const agentFilter = normalizedAgentId ? 'WHERE source.agent_id = ?' : '';
17
- const rows = database
18
- .prepare(`
19
- SELECT
20
- source.agent_id AS agent_id,
21
- source.title AS from_title,
22
- source.path AS from_path,
23
- COALESCE(target.title, links.to_title) AS to_title,
24
- target.path AS to_path,
25
- links.weight AS weight,
26
- links.priority AS priority
27
- FROM links
28
- JOIN documents source ON source.id = links.from_document_id
29
- LEFT JOIN documents target ON target.id = links.to_document_id
30
- ${agentFilter}
31
- ORDER BY source.title, links.weight DESC, to_title
32
- `)
33
- .all(...(normalizedAgentId ? [normalizedAgentId] : []));
34
- return rows.map(toGraphLink);
35
- },
36
- listBacklinks: (title, agentId) => {
37
- const normalizedAgentId = normalizeAgentFilter(agentId);
38
- const agentFilter = normalizedAgentId ? 'AND source.agent_id = ?' : '';
39
- const titleKey = toTitleKey(title);
40
- const rows = database
41
- .prepare(`
42
- SELECT
43
- source.agent_id AS agent_id,
44
- source.title AS from_title,
45
- source.path AS from_path,
46
- COALESCE(target.title, links.to_title) AS to_title,
47
- target.path AS to_path,
48
- links.weight AS weight,
49
- links.priority AS priority
50
- FROM links
51
- JOIN documents source ON source.id = links.from_document_id
52
- LEFT JOIN documents target ON target.id = links.to_document_id
53
- WHERE links.to_title_key = ?
54
- ${agentFilter}
55
- ORDER BY links.weight DESC, source.title
56
- `)
57
- .all(...(normalizedAgentId ? [titleKey, normalizedAgentId] : [titleKey]));
58
- return rows.map(toGraphLink);
59
- },
60
- getGraph: (agentId) => {
61
- const normalizedAgentId = normalizeAgentFilter(agentId);
62
- const documentAgentFilter = normalizedAgentId ? 'WHERE agent_id = ?' : '';
63
- const edgeAgentFilter = normalizedAgentId ? 'WHERE source.agent_id = ?' : '';
64
- const nodeRows = database
65
- .prepare(`
66
- SELECT id, agent_id, title, path, content, tags_json
67
- FROM documents
68
- ${documentAgentFilter}
69
- ORDER BY title
70
- `)
71
- .all(...(normalizedAgentId ? [normalizedAgentId] : []));
72
- const edgeRows = database
73
- .prepare(`
74
- SELECT
75
- links.from_document_id AS source,
76
- links.to_document_id AS target,
77
- links.to_title AS target_title,
78
- links.weight AS weight,
79
- links.priority AS priority
80
- FROM links
81
- JOIN documents source ON source.id = links.from_document_id
82
- ${edgeAgentFilter}
83
- ORDER BY links.from_document_id, links.weight DESC, links.to_title
84
- `)
85
- .all(...(normalizedAgentId ? [normalizedAgentId] : []));
86
- const nodes = nodeRows.map((row) => ({
87
- id: row.id,
88
- agentId: row.agent_id,
89
- title: row.title,
90
- path: row.path,
91
- content: row.content,
92
- tags: JSON.parse(row.tags_json)
93
- }));
94
- const edges = edgeRows.map((row) => ({
95
- source: row.source,
96
- target: row.target,
97
- targetTitle: row.target_title,
98
- weight: row.weight,
99
- priority: row.priority
100
- }));
101
- return {
102
- nodes,
103
- edges
104
- };
105
- },
106
- listAgents: () => {
107
- const rows = database
108
- .prepare(`
109
- SELECT agent_id AS id, count(*) AS document_count
110
- FROM documents
111
- GROUP BY agent_id
112
- ORDER BY agent_id
113
- `)
114
- .all();
115
- return rows.map((row) => ({
13
+ const toFtsQuery = (query) => query
14
+ .toLowerCase()
15
+ .match(/[\p{L}\p{N}_-]+/gu)
16
+ ?.map((term) => `"${term.replaceAll('"', '""')}"*`)
17
+ .join(' OR ') ?? '';
18
+ export const createGraphReader = (database) => (() => {
19
+ const listLinksStatement = database.prepare(`
20
+ SELECT
21
+ source.agent_id AS agent_id,
22
+ source.title AS from_title,
23
+ source.path AS from_path,
24
+ COALESCE(target.title, links.to_title) AS to_title,
25
+ target.path AS to_path,
26
+ links.weight AS weight,
27
+ links.priority AS priority
28
+ FROM links
29
+ JOIN documents source ON source.id = links.from_document_id
30
+ LEFT JOIN documents target ON target.id = links.to_document_id
31
+ ORDER BY source.title, links.weight DESC, to_title
32
+ `);
33
+ const listLinksByAgentStatement = database.prepare(`
34
+ SELECT
35
+ source.agent_id AS agent_id,
36
+ source.title AS from_title,
37
+ source.path AS from_path,
38
+ COALESCE(target.title, links.to_title) AS to_title,
39
+ target.path AS to_path,
40
+ links.weight AS weight,
41
+ links.priority AS priority
42
+ FROM links
43
+ JOIN documents source ON source.id = links.from_document_id
44
+ LEFT JOIN documents target ON target.id = links.to_document_id
45
+ WHERE source.agent_id = ?
46
+ ORDER BY source.title, links.weight DESC, to_title
47
+ `);
48
+ const listBacklinksStatement = database.prepare(`
49
+ SELECT
50
+ source.agent_id AS agent_id,
51
+ source.title AS from_title,
52
+ source.path AS from_path,
53
+ COALESCE(target.title, links.to_title) AS to_title,
54
+ target.path AS to_path,
55
+ links.weight AS weight,
56
+ links.priority AS priority
57
+ FROM links
58
+ JOIN documents source ON source.id = links.from_document_id
59
+ LEFT JOIN documents target ON target.id = links.to_document_id
60
+ WHERE links.to_title_key = ?
61
+ ORDER BY links.weight DESC, source.title
62
+ `);
63
+ const listBacklinksByAgentStatement = database.prepare(`
64
+ SELECT
65
+ source.agent_id AS agent_id,
66
+ source.title AS from_title,
67
+ source.path AS from_path,
68
+ COALESCE(target.title, links.to_title) AS to_title,
69
+ target.path AS to_path,
70
+ links.weight AS weight,
71
+ links.priority AS priority
72
+ FROM links
73
+ JOIN documents source ON source.id = links.from_document_id
74
+ LEFT JOIN documents target ON target.id = links.to_document_id
75
+ WHERE links.to_title_key = ? AND source.agent_id = ?
76
+ ORDER BY links.weight DESC, source.title
77
+ `);
78
+ const graphNodesStatement = database.prepare(`
79
+ SELECT id, agent_id, title, path, content, tags_json
80
+ FROM documents
81
+ ORDER BY title
82
+ `);
83
+ const graphNodesByAgentStatement = database.prepare(`
84
+ SELECT id, agent_id, title, path, content, tags_json
85
+ FROM documents
86
+ WHERE agent_id = ?
87
+ ORDER BY title
88
+ `);
89
+ const graphSummaryNodesStatement = database.prepare(`
90
+ SELECT id, agent_id, title, path, '' AS content, tags_json
91
+ FROM documents
92
+ ORDER BY title
93
+ `);
94
+ const graphSummaryNodesByAgentStatement = database.prepare(`
95
+ SELECT id, agent_id, title, path, '' AS content, tags_json
96
+ FROM documents
97
+ WHERE agent_id = ?
98
+ ORDER BY title
99
+ `);
100
+ const graphEdgesStatement = database.prepare(`
101
+ SELECT
102
+ links.from_document_id AS source,
103
+ links.to_document_id AS target,
104
+ links.to_title AS target_title,
105
+ links.weight AS weight,
106
+ links.priority AS priority
107
+ FROM links
108
+ JOIN documents source ON source.id = links.from_document_id
109
+ ORDER BY links.from_document_id, links.weight DESC, links.to_title
110
+ `);
111
+ const graphEdgesByAgentStatement = database.prepare(`
112
+ SELECT
113
+ links.from_document_id AS source,
114
+ links.to_document_id AS target,
115
+ links.to_title AS target_title,
116
+ links.weight AS weight,
117
+ links.priority AS priority
118
+ FROM links
119
+ JOIN documents source ON source.id = links.from_document_id
120
+ WHERE source.agent_id = ?
121
+ ORDER BY links.from_document_id, links.weight DESC, links.to_title
122
+ `);
123
+ const graphNodeByIdStatement = database.prepare(`
124
+ SELECT id, agent_id, title, path, content, tags_json
125
+ FROM documents
126
+ WHERE id = ?
127
+ `);
128
+ const graphNodeByIdAndAgentStatement = database.prepare(`
129
+ SELECT id, agent_id, title, path, content, tags_json
130
+ FROM documents
131
+ WHERE id = ? AND agent_id = ?
132
+ `);
133
+ const filterNodeIdsMetadataStatement = database.prepare(`
134
+ SELECT id
135
+ FROM documents
136
+ WHERE lower(title) LIKE ?
137
+ OR lower(path) LIKE ?
138
+ OR lower(tags_json) LIKE ?
139
+ ORDER BY title
140
+ LIMIT ?
141
+ `);
142
+ const filterNodeIdsMetadataByAgentStatement = database.prepare(`
143
+ SELECT id
144
+ FROM documents
145
+ WHERE agent_id = ?
146
+ AND (
147
+ lower(title) LIKE ?
148
+ OR lower(path) LIKE ?
149
+ OR lower(tags_json) LIKE ?
150
+ )
151
+ ORDER BY title
152
+ LIMIT ?
153
+ `);
154
+ const filterNodeIdsContentStatement = database.prepare(`
155
+ SELECT DISTINCT documents.id AS id
156
+ FROM chunks_fts
157
+ JOIN documents ON documents.id = chunks_fts.document_id
158
+ WHERE chunks_fts MATCH ?
159
+ LIMIT ?
160
+ `);
161
+ const filterNodeIdsContentByAgentStatement = database.prepare(`
162
+ SELECT DISTINCT documents.id AS id
163
+ FROM chunks_fts
164
+ JOIN documents ON documents.id = chunks_fts.document_id
165
+ WHERE chunks_fts MATCH ?
166
+ AND documents.agent_id = ?
167
+ LIMIT ?
168
+ `);
169
+ const listAgentsStatement = database.prepare(`
170
+ SELECT agent_id AS id, count(*) AS document_count
171
+ FROM documents
172
+ GROUP BY agent_id
173
+ ORDER BY agent_id
174
+ `);
175
+ return {
176
+ listLinks: (agentId) => {
177
+ const normalizedAgentId = normalizeAgentFilter(agentId);
178
+ const rows = (normalizedAgentId
179
+ ? listLinksByAgentStatement.all(normalizedAgentId)
180
+ : listLinksStatement.all());
181
+ return rows.map(toGraphLink);
182
+ },
183
+ listBacklinks: (title, agentId) => {
184
+ const normalizedAgentId = normalizeAgentFilter(agentId);
185
+ const titleKey = toTitleKey(title);
186
+ const rows = (normalizedAgentId
187
+ ? listBacklinksByAgentStatement.all(titleKey, normalizedAgentId)
188
+ : listBacklinksStatement.all(titleKey));
189
+ return rows.map(toGraphLink);
190
+ },
191
+ getGraph: (agentId) => {
192
+ const normalizedAgentId = normalizeAgentFilter(agentId);
193
+ const nodeRows = (normalizedAgentId
194
+ ? graphNodesByAgentStatement.all(normalizedAgentId)
195
+ : graphNodesStatement.all());
196
+ const edgeRows = (normalizedAgentId
197
+ ? graphEdgesByAgentStatement.all(normalizedAgentId)
198
+ : graphEdgesStatement.all());
199
+ return {
200
+ nodes: nodeRows.map(toGraphNode),
201
+ edges: edgeRows.map(toGraphEdge)
202
+ };
203
+ },
204
+ getGraphSummary: (agentId) => {
205
+ const normalizedAgentId = normalizeAgentFilter(agentId);
206
+ const nodeRows = (normalizedAgentId
207
+ ? graphSummaryNodesByAgentStatement.all(normalizedAgentId)
208
+ : graphSummaryNodesStatement.all());
209
+ const edgeRows = (normalizedAgentId
210
+ ? graphEdgesByAgentStatement.all(normalizedAgentId)
211
+ : graphEdgesStatement.all());
212
+ return {
213
+ nodes: nodeRows.map(toGraphNode),
214
+ edges: edgeRows.map(toGraphEdge)
215
+ };
216
+ },
217
+ getGraphNode: (id, agentId) => {
218
+ const normalizedAgentId = normalizeAgentFilter(agentId);
219
+ const row = (normalizedAgentId
220
+ ? graphNodeByIdAndAgentStatement.get(id, normalizedAgentId)
221
+ : graphNodeByIdStatement.get(id));
222
+ return row ? toGraphNode(row) : undefined;
223
+ },
224
+ searchGraphNodeIds: (query, limit, agentId) => {
225
+ const normalizedQuery = query.trim().toLowerCase();
226
+ if (!normalizedQuery || limit <= 0) {
227
+ return [];
228
+ }
229
+ const normalizedAgentId = normalizeAgentFilter(agentId);
230
+ const likeQuery = `%${normalizedQuery}%`;
231
+ const metadataRows = (normalizedAgentId
232
+ ? filterNodeIdsMetadataByAgentStatement.all(normalizedAgentId, likeQuery, likeQuery, likeQuery, limit)
233
+ : filterNodeIdsMetadataStatement.all(likeQuery, likeQuery, likeQuery, limit));
234
+ const ids = new Set(metadataRows.map((row) => row.id));
235
+ const remainingLimit = Math.max(limit - ids.size, 0);
236
+ if (remainingLimit > 0) {
237
+ const ftsQuery = toFtsQuery(normalizedQuery);
238
+ if (ftsQuery) {
239
+ const contentRows = (normalizedAgentId
240
+ ? filterNodeIdsContentByAgentStatement.all(ftsQuery, normalizedAgentId, remainingLimit)
241
+ : filterNodeIdsContentStatement.all(ftsQuery, remainingLimit));
242
+ contentRows.forEach((row) => ids.add(row.id));
243
+ }
244
+ }
245
+ return Array.from(ids).slice(0, limit);
246
+ },
247
+ listAgents: () => listAgentsStatement.all().map((row) => ({
116
248
  id: row.id,
117
249
  documentCount: row.document_count
118
- }));
119
- }
250
+ }))
251
+ };
252
+ })();
253
+ const toGraphNode = (row) => ({
254
+ id: row.id,
255
+ agentId: row.agent_id,
256
+ title: row.title,
257
+ path: row.path,
258
+ content: row.content,
259
+ tags: JSON.parse(row.tags_json)
260
+ });
261
+ const toGraphEdge = (row) => ({
262
+ source: row.source,
263
+ target: row.target,
264
+ targetTitle: row.target_title,
265
+ weight: row.weight,
266
+ priority: row.priority
120
267
  });
@@ -0,0 +1,83 @@
1
+ import Database from 'better-sqlite3';
2
+ import { copyFileSync, existsSync, mkdirSync, renameSync, rmSync, unlinkSync } from 'node:fs';
3
+ import { dirname } from 'node:path';
4
+ const sqliteCorruptionHints = [
5
+ 'database disk image is malformed',
6
+ 'file is not a database',
7
+ 'database is corrupted',
8
+ 'malformed database schema',
9
+ 'sqlite quick_check failed'
10
+ ];
11
+ const normalizeMessage = (error) => error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase();
12
+ const isSqliteCorruptionError = (error) => sqliteCorruptionHints.some((hint) => normalizeMessage(error).includes(hint));
13
+ const safeUnlink = (path) => {
14
+ if (!existsSync(path)) {
15
+ return;
16
+ }
17
+ try {
18
+ unlinkSync(path);
19
+ }
20
+ catch {
21
+ // Ignore best-effort cleanup failures.
22
+ }
23
+ };
24
+ const clearSidecars = (databasePath) => {
25
+ safeUnlink(`${databasePath}-wal`);
26
+ safeUnlink(`${databasePath}-shm`);
27
+ };
28
+ const assertQuickCheck = (database) => {
29
+ const rows = database.prepare('PRAGMA quick_check').all();
30
+ const first = rows[0]?.quick_check?.toLowerCase() ?? 'ok';
31
+ if (first !== 'ok') {
32
+ throw new Error(`sqlite quick_check failed: ${first}`);
33
+ }
34
+ };
35
+ const archiveCorruptedDatabase = (databasePath) => {
36
+ if (!existsSync(databasePath)) {
37
+ return;
38
+ }
39
+ const archivedPath = `${databasePath}.corrupt-${Date.now()}`;
40
+ renameSync(databasePath, archivedPath);
41
+ };
42
+ const restoreFromBackupOrReset = (databasePath, backupPath) => {
43
+ clearSidecars(databasePath);
44
+ archiveCorruptedDatabase(databasePath);
45
+ if (existsSync(backupPath)) {
46
+ copyFileSync(backupPath, databasePath);
47
+ clearSidecars(databasePath);
48
+ return;
49
+ }
50
+ rmSync(databasePath, { force: true });
51
+ };
52
+ const openCheckedDatabase = (databasePath) => {
53
+ const database = new Database(databasePath);
54
+ try {
55
+ assertQuickCheck(database);
56
+ }
57
+ catch (error) {
58
+ database.close();
59
+ throw error;
60
+ }
61
+ return database;
62
+ };
63
+ export const createRecoverySnapshot = (database, backupPath) => {
64
+ const backupDirectory = dirname(backupPath);
65
+ const tempBackupPath = `${backupPath}.tmp`;
66
+ mkdirSync(backupDirectory, { recursive: true });
67
+ rmSync(tempBackupPath, { force: true });
68
+ database.prepare('VACUUM INTO ?').run(tempBackupPath);
69
+ renameSync(tempBackupPath, backupPath);
70
+ };
71
+ export const openDatabaseWithRecovery = (databasePath, backupPath) => {
72
+ mkdirSync(dirname(databasePath), { recursive: true });
73
+ try {
74
+ return openCheckedDatabase(databasePath);
75
+ }
76
+ catch (error) {
77
+ if (!isSqliteCorruptionError(error)) {
78
+ throw error;
79
+ }
80
+ restoreFromBackupOrReset(databasePath, backupPath);
81
+ return openCheckedDatabase(databasePath);
82
+ }
83
+ };
@@ -1,4 +1,4 @@
1
- const schemaVersion = 5;
1
+ const schemaVersion = 6;
2
2
  const requiredTableColumns = {
3
3
  documents: ['id', 'agent_id', 'title', 'path', 'content', 'tags_json', 'frontmatter_json', 'created_at', 'updated_at'],
4
4
  chunks: ['id', 'document_id', 'ordinal', 'content', 'token_count', 'embedding_provider', 'embedding_json'],
@@ -67,7 +67,9 @@ export const createSchema = (database) => {
67
67
  );
68
68
 
69
69
  CREATE INDEX IF NOT EXISTS idx_documents_agent_title ON documents(agent_id, title);
70
+ CREATE INDEX IF NOT EXISTS idx_documents_agent_id ON documents(agent_id, id);
70
71
  CREATE INDEX IF NOT EXISTS idx_chunks_document_ordinal ON chunks(document_id, ordinal);
72
+ CREATE INDEX IF NOT EXISTS idx_chunks_token_count ON chunks(token_count);
71
73
 
72
74
  CREATE TABLE IF NOT EXISTS embedding_buckets (
73
75
  bucket TEXT NOT NULL,
@@ -92,6 +94,7 @@ export const createSchema = (database) => {
92
94
 
93
95
  CREATE INDEX IF NOT EXISTS idx_links_to_document_id ON links(to_document_id);
94
96
  CREATE INDEX IF NOT EXISTS idx_links_to_title_key ON links(to_title_key);
97
+ CREATE INDEX IF NOT EXISTS idx_links_source_weight ON links(from_document_id, weight DESC, to_title);
95
98
 
96
99
  CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
97
100
  chunk_id UNINDEXED,
@@ -51,25 +51,6 @@ const mergeHybridResults = (ftsResults, semanticResults, limit) => {
51
51
  return sortByScore(Array.from(rows.values())).slice(0, limit);
52
52
  };
53
53
  const placeholders = (count) => Array.from({ length: count }, () => '?').join(', ');
54
- const readAllSemanticRows = (database, normalizedAgentId) => {
55
- const semanticAgentFilter = normalizedAgentId ? 'WHERE documents.agent_id = ?' : '';
56
- return database
57
- .prepare(`
58
- SELECT
59
- documents.id AS document_id,
60
- documents.agent_id AS agent_id,
61
- documents.title AS title,
62
- documents.path AS path,
63
- chunks.id AS chunk_id,
64
- chunks.content AS content,
65
- documents.tags_json AS tags_json,
66
- chunks.embedding_json AS embedding_json
67
- FROM chunks
68
- JOIN documents ON documents.id = chunks.document_id
69
- ${semanticAgentFilter}
70
- `)
71
- .all(...(normalizedAgentId ? [normalizedAgentId] : []));
72
- };
73
54
  const readBucketedSemanticRows = (database, normalizedAgentId, queryEmbedding, limit) => {
74
55
  const buckets = createEmbeddingBuckets(queryEmbedding);
75
56
  if (buckets.length === 0) {
@@ -100,57 +81,108 @@ const readBucketedSemanticRows = (database, normalizedAgentId, queryEmbedding, l
100
81
  `)
101
82
  .all(...params);
102
83
  };
103
- const readSemanticRows = (database, normalizedAgentId, queryEmbedding, limit) => {
104
- const candidateLimit = Math.max(limit * 96, 768);
105
- const bucketedRows = readBucketedSemanticRows(database, normalizedAgentId, queryEmbedding, candidateLimit);
106
- return bucketedRows.length > 0 ? bucketedRows : readAllSemanticRows(database, normalizedAgentId);
107
- };
108
- export const createSearchReader = (database) => ({
109
- search: (query, limit, agentId, mode = 'hybrid', queryEmbedding = []) => {
110
- const normalizedQuery = query.trim();
111
- if (!normalizedQuery || limit <= 0) {
112
- return [];
113
- }
114
- const normalizedAgentId = normalizeAgentFilter(agentId);
115
- const ftsQuery = toFtsQuery(query);
116
- const expandedLimit = Math.max(limit * 4, 24);
117
- const ftsAgentFilter = normalizedAgentId ? 'AND documents.agent_id = ?' : '';
118
- const ftsParams = normalizedAgentId ? [ftsQuery, normalizedAgentId, expandedLimit] : [ftsQuery, expandedLimit];
119
- const ftsRows = mode === 'semantic' || !ftsQuery
120
- ? []
121
- : database
122
- .prepare(`
123
- SELECT
124
- documents.id AS document_id,
125
- documents.agent_id AS agent_id,
126
- documents.title AS title,
127
- documents.path AS path,
128
- chunks_fts.chunk_id AS chunk_id,
129
- chunks_fts.content AS content,
130
- bm25(chunks_fts) * -1 AS score,
131
- documents.tags_json AS tags_json
132
- FROM chunks_fts
133
- JOIN documents ON documents.id = chunks_fts.document_id
134
- WHERE chunks_fts MATCH ?
135
- ${ftsAgentFilter}
136
- ORDER BY bm25(chunks_fts)
137
- LIMIT ?
138
- `)
139
- .all(...ftsParams);
140
- const ftsResults = ftsRows.map((row, index) => toSearchResult(row, toTextScore(index, ftsRows.length), toTextScore(index, ftsRows.length), 0, 'fts'));
141
- const semanticRows = mode === 'fts' || queryEmbedding.length === 0 ? [] : readSemanticRows(database, normalizedAgentId, queryEmbedding, expandedLimit);
142
- const semanticResults = sortByScore(semanticRows
143
- .map((row) => {
144
- const semanticScore = Math.max(0, cosineSimilarity(queryEmbedding, parseJsonArray(row.embedding_json).filter((value) => typeof value === 'number')));
145
- return toSearchResult(row, semanticScore, 0, semanticScore, 'semantic');
146
- })
147
- .filter((result) => result.semanticScore > 0)).slice(0, expandedLimit);
148
- if (mode === 'fts') {
149
- return ftsResults.slice(0, limit);
150
- }
151
- if (mode === 'semantic') {
152
- return semanticResults.slice(0, limit);
84
+ export const createSearchReader = (database) => (() => {
85
+ const ftsRowsStatement = database.prepare(`
86
+ SELECT
87
+ documents.id AS document_id,
88
+ documents.agent_id AS agent_id,
89
+ documents.title AS title,
90
+ documents.path AS path,
91
+ chunks_fts.chunk_id AS chunk_id,
92
+ chunks_fts.content AS content,
93
+ bm25(chunks_fts) * -1 AS score,
94
+ documents.tags_json AS tags_json
95
+ FROM chunks_fts
96
+ JOIN documents ON documents.id = chunks_fts.document_id
97
+ WHERE chunks_fts MATCH ?
98
+ ORDER BY bm25(chunks_fts)
99
+ LIMIT ?
100
+ `);
101
+ const ftsRowsByAgentStatement = database.prepare(`
102
+ SELECT
103
+ documents.id AS document_id,
104
+ documents.agent_id AS agent_id,
105
+ documents.title AS title,
106
+ documents.path AS path,
107
+ chunks_fts.chunk_id AS chunk_id,
108
+ chunks_fts.content AS content,
109
+ bm25(chunks_fts) * -1 AS score,
110
+ documents.tags_json AS tags_json
111
+ FROM chunks_fts
112
+ JOIN documents ON documents.id = chunks_fts.document_id
113
+ WHERE chunks_fts MATCH ?
114
+ AND documents.agent_id = ?
115
+ ORDER BY bm25(chunks_fts)
116
+ LIMIT ?
117
+ `);
118
+ const semanticRowsStatement = database.prepare(`
119
+ SELECT
120
+ documents.id AS document_id,
121
+ documents.agent_id AS agent_id,
122
+ documents.title AS title,
123
+ documents.path AS path,
124
+ chunks.id AS chunk_id,
125
+ chunks.content AS content,
126
+ documents.tags_json AS tags_json,
127
+ chunks.embedding_json AS embedding_json
128
+ FROM chunks
129
+ JOIN documents ON documents.id = chunks.document_id
130
+ ORDER BY chunks.token_count ASC, documents.title ASC
131
+ LIMIT ?
132
+ `);
133
+ const semanticRowsByAgentStatement = database.prepare(`
134
+ SELECT
135
+ documents.id AS document_id,
136
+ documents.agent_id AS agent_id,
137
+ documents.title AS title,
138
+ documents.path AS path,
139
+ chunks.id AS chunk_id,
140
+ chunks.content AS content,
141
+ documents.tags_json AS tags_json,
142
+ chunks.embedding_json AS embedding_json
143
+ FROM chunks
144
+ JOIN documents ON documents.id = chunks.document_id
145
+ WHERE documents.agent_id = ?
146
+ ORDER BY chunks.token_count ASC, documents.title ASC
147
+ LIMIT ?
148
+ `);
149
+ const readAllSemanticRowsForLimit = (normalizedAgentId, limit) => (normalizedAgentId
150
+ ? semanticRowsByAgentStatement.all(normalizedAgentId, limit)
151
+ : semanticRowsStatement.all(limit));
152
+ const readSemanticRows = (normalizedAgentId, queryEmbedding, limit) => {
153
+ const candidateLimit = Math.min(Math.max(limit * 96, 768), 12_000);
154
+ const bucketedRows = readBucketedSemanticRows(database, normalizedAgentId, queryEmbedding, candidateLimit);
155
+ return bucketedRows.length > 0 ? bucketedRows : readAllSemanticRowsForLimit(normalizedAgentId, candidateLimit);
156
+ };
157
+ return {
158
+ search: (query, limit, agentId, mode = 'hybrid', queryEmbedding = []) => {
159
+ const normalizedQuery = query.trim();
160
+ if (!normalizedQuery || limit <= 0) {
161
+ return [];
162
+ }
163
+ const normalizedAgentId = normalizeAgentFilter(agentId);
164
+ const ftsQuery = toFtsQuery(query);
165
+ const expandedLimit = Math.max(limit * 4, 24);
166
+ const ftsRows = mode === 'semantic' || !ftsQuery
167
+ ? []
168
+ : (normalizedAgentId
169
+ ? ftsRowsByAgentStatement.all(ftsQuery, normalizedAgentId, expandedLimit)
170
+ : ftsRowsStatement.all(ftsQuery, expandedLimit));
171
+ const ftsResults = ftsRows.map((row, index) => toSearchResult(row, toTextScore(index, ftsRows.length), toTextScore(index, ftsRows.length), 0, 'fts'));
172
+ const semanticRows = mode === 'fts' || queryEmbedding.length === 0 ? [] : readSemanticRows(normalizedAgentId, queryEmbedding, expandedLimit);
173
+ const semanticResults = sortByScore(semanticRows
174
+ .map((row) => {
175
+ const semanticScore = Math.max(0, cosineSimilarity(queryEmbedding, parseJsonArray(row.embedding_json).filter((value) => typeof value === 'number')));
176
+ return toSearchResult(row, semanticScore, 0, semanticScore, 'semantic');
177
+ })
178
+ .filter((result) => result.semanticScore > 0)).slice(0, expandedLimit);
179
+ if (mode === 'fts') {
180
+ return ftsResults.slice(0, limit);
181
+ }
182
+ if (mode === 'semantic') {
183
+ return semanticResults.slice(0, limit);
184
+ }
185
+ return mergeHybridResults(ftsResults, semanticResults, limit);
153
186
  }
154
- return mergeHybridResults(ftsResults, semanticResults, limit);
155
- }
156
- });
187
+ };
188
+ })();