@andespindola/brainlink 0.1.0-beta.1 → 0.1.0-beta.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/README.md +241 -10
- package/dist/application/add-note.js +62 -13
- package/dist/application/analyze-vault.js +104 -9
- package/dist/application/frontend/client-css.js +154 -71
- package/dist/application/frontend/client-html.js +42 -33
- package/dist/application/frontend/client-js.js +316 -84
- package/dist/application/get-graph-layout.js +22 -7
- package/dist/application/get-graph-node.js +12 -0
- package/dist/application/get-graph-summary.js +12 -0
- package/dist/application/index-vault.js +7 -0
- package/dist/application/migrate-vault.js +91 -0
- package/dist/application/search-graph-node-ids.js +12 -0
- package/dist/application/search-knowledge.js +74 -4
- package/dist/application/server/routes.js +27 -1
- package/dist/cli/commands/agent-commands.js +412 -0
- package/dist/cli/commands/config-commands.js +167 -0
- package/dist/cli/commands/read-commands.js +25 -8
- package/dist/cli/commands/write-commands.js +173 -4
- package/dist/cli/main.js +4 -0
- package/dist/cli/runtime.js +5 -2
- package/dist/domain/embeddings.js +2 -1
- package/dist/domain/graph-layout.js +20 -14
- package/dist/domain/markdown.js +36 -4
- package/dist/infrastructure/config.js +94 -8
- package/dist/infrastructure/file-system-vault.js +15 -0
- package/dist/infrastructure/paths.js +9 -1
- package/dist/infrastructure/search-packs.js +151 -0
- package/dist/infrastructure/session-state.js +172 -0
- package/dist/infrastructure/sqlite/graph-reader.js +252 -105
- package/dist/infrastructure/sqlite/recovery.js +83 -0
- package/dist/infrastructure/sqlite/schema.js +4 -1
- package/dist/infrastructure/sqlite/search-reader.js +104 -72
- package/dist/infrastructure/sqlite-index.js +16 -3
- package/dist/mcp/main.js +11 -3
- package/dist/mcp/server.js +17 -2
- package/dist/mcp/startup.js +35 -0
- package/dist/mcp/tools.js +571 -19
- package/docs/AGENT_USAGE.md +87 -3
- package/docs/ARCHITECTURE.md +16 -1
- package/docs/QUICKSTART.md +104 -0
- package/docs/RELEASE.md +3 -3
- package/package.json +1 -1
|
@@ -10,111 +10,258 @@ const toGraphLink = (row) => ({
|
|
|
10
10
|
});
|
|
11
11
|
const normalizeAgentFilter = (agentId) => agentId ? sanitizeAgentId(agentId) : undefined;
|
|
12
12
|
const toTitleKey = (title) => title.toLowerCase();
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
13
|
+
const toFtsQuery = (query) => query
|
|
14
|
+
.toLowerCase()
|
|
15
|
+
.match(/[\p{L}\p{N}_-]+/gu)
|
|
16
|
+
?.map((term) => `"${term.replaceAll('"', '""')}"*`)
|
|
17
|
+
.join(' OR ') ?? '';
|
|
18
|
+
export const createGraphReader = (database) => (() => {
|
|
19
|
+
const listLinksStatement = database.prepare(`
|
|
20
|
+
SELECT
|
|
21
|
+
source.agent_id AS agent_id,
|
|
22
|
+
source.title AS from_title,
|
|
23
|
+
source.path AS from_path,
|
|
24
|
+
COALESCE(target.title, links.to_title) AS to_title,
|
|
25
|
+
target.path AS to_path,
|
|
26
|
+
links.weight AS weight,
|
|
27
|
+
links.priority AS priority
|
|
28
|
+
FROM links
|
|
29
|
+
JOIN documents source ON source.id = links.from_document_id
|
|
30
|
+
LEFT JOIN documents target ON target.id = links.to_document_id
|
|
31
|
+
ORDER BY source.title, links.weight DESC, to_title
|
|
32
|
+
`);
|
|
33
|
+
const listLinksByAgentStatement = database.prepare(`
|
|
34
|
+
SELECT
|
|
35
|
+
source.agent_id AS agent_id,
|
|
36
|
+
source.title AS from_title,
|
|
37
|
+
source.path AS from_path,
|
|
38
|
+
COALESCE(target.title, links.to_title) AS to_title,
|
|
39
|
+
target.path AS to_path,
|
|
40
|
+
links.weight AS weight,
|
|
41
|
+
links.priority AS priority
|
|
42
|
+
FROM links
|
|
43
|
+
JOIN documents source ON source.id = links.from_document_id
|
|
44
|
+
LEFT JOIN documents target ON target.id = links.to_document_id
|
|
45
|
+
WHERE source.agent_id = ?
|
|
46
|
+
ORDER BY source.title, links.weight DESC, to_title
|
|
47
|
+
`);
|
|
48
|
+
const listBacklinksStatement = database.prepare(`
|
|
49
|
+
SELECT
|
|
50
|
+
source.agent_id AS agent_id,
|
|
51
|
+
source.title AS from_title,
|
|
52
|
+
source.path AS from_path,
|
|
53
|
+
COALESCE(target.title, links.to_title) AS to_title,
|
|
54
|
+
target.path AS to_path,
|
|
55
|
+
links.weight AS weight,
|
|
56
|
+
links.priority AS priority
|
|
57
|
+
FROM links
|
|
58
|
+
JOIN documents source ON source.id = links.from_document_id
|
|
59
|
+
LEFT JOIN documents target ON target.id = links.to_document_id
|
|
60
|
+
WHERE links.to_title_key = ?
|
|
61
|
+
ORDER BY links.weight DESC, source.title
|
|
62
|
+
`);
|
|
63
|
+
const listBacklinksByAgentStatement = database.prepare(`
|
|
64
|
+
SELECT
|
|
65
|
+
source.agent_id AS agent_id,
|
|
66
|
+
source.title AS from_title,
|
|
67
|
+
source.path AS from_path,
|
|
68
|
+
COALESCE(target.title, links.to_title) AS to_title,
|
|
69
|
+
target.path AS to_path,
|
|
70
|
+
links.weight AS weight,
|
|
71
|
+
links.priority AS priority
|
|
72
|
+
FROM links
|
|
73
|
+
JOIN documents source ON source.id = links.from_document_id
|
|
74
|
+
LEFT JOIN documents target ON target.id = links.to_document_id
|
|
75
|
+
WHERE links.to_title_key = ? AND source.agent_id = ?
|
|
76
|
+
ORDER BY links.weight DESC, source.title
|
|
77
|
+
`);
|
|
78
|
+
const graphNodesStatement = database.prepare(`
|
|
79
|
+
SELECT id, agent_id, title, path, content, tags_json
|
|
80
|
+
FROM documents
|
|
81
|
+
ORDER BY title
|
|
82
|
+
`);
|
|
83
|
+
const graphNodesByAgentStatement = database.prepare(`
|
|
84
|
+
SELECT id, agent_id, title, path, content, tags_json
|
|
85
|
+
FROM documents
|
|
86
|
+
WHERE agent_id = ?
|
|
87
|
+
ORDER BY title
|
|
88
|
+
`);
|
|
89
|
+
const graphSummaryNodesStatement = database.prepare(`
|
|
90
|
+
SELECT id, agent_id, title, path, '' AS content, tags_json
|
|
91
|
+
FROM documents
|
|
92
|
+
ORDER BY title
|
|
93
|
+
`);
|
|
94
|
+
const graphSummaryNodesByAgentStatement = database.prepare(`
|
|
95
|
+
SELECT id, agent_id, title, path, '' AS content, tags_json
|
|
96
|
+
FROM documents
|
|
97
|
+
WHERE agent_id = ?
|
|
98
|
+
ORDER BY title
|
|
99
|
+
`);
|
|
100
|
+
const graphEdgesStatement = database.prepare(`
|
|
101
|
+
SELECT
|
|
102
|
+
links.from_document_id AS source,
|
|
103
|
+
links.to_document_id AS target,
|
|
104
|
+
links.to_title AS target_title,
|
|
105
|
+
links.weight AS weight,
|
|
106
|
+
links.priority AS priority
|
|
107
|
+
FROM links
|
|
108
|
+
JOIN documents source ON source.id = links.from_document_id
|
|
109
|
+
ORDER BY links.from_document_id, links.weight DESC, links.to_title
|
|
110
|
+
`);
|
|
111
|
+
const graphEdgesByAgentStatement = database.prepare(`
|
|
112
|
+
SELECT
|
|
113
|
+
links.from_document_id AS source,
|
|
114
|
+
links.to_document_id AS target,
|
|
115
|
+
links.to_title AS target_title,
|
|
116
|
+
links.weight AS weight,
|
|
117
|
+
links.priority AS priority
|
|
118
|
+
FROM links
|
|
119
|
+
JOIN documents source ON source.id = links.from_document_id
|
|
120
|
+
WHERE source.agent_id = ?
|
|
121
|
+
ORDER BY links.from_document_id, links.weight DESC, links.to_title
|
|
122
|
+
`);
|
|
123
|
+
const graphNodeByIdStatement = database.prepare(`
|
|
124
|
+
SELECT id, agent_id, title, path, content, tags_json
|
|
125
|
+
FROM documents
|
|
126
|
+
WHERE id = ?
|
|
127
|
+
`);
|
|
128
|
+
const graphNodeByIdAndAgentStatement = database.prepare(`
|
|
129
|
+
SELECT id, agent_id, title, path, content, tags_json
|
|
130
|
+
FROM documents
|
|
131
|
+
WHERE id = ? AND agent_id = ?
|
|
132
|
+
`);
|
|
133
|
+
const filterNodeIdsMetadataStatement = database.prepare(`
|
|
134
|
+
SELECT id
|
|
135
|
+
FROM documents
|
|
136
|
+
WHERE lower(title) LIKE ?
|
|
137
|
+
OR lower(path) LIKE ?
|
|
138
|
+
OR lower(tags_json) LIKE ?
|
|
139
|
+
ORDER BY title
|
|
140
|
+
LIMIT ?
|
|
141
|
+
`);
|
|
142
|
+
const filterNodeIdsMetadataByAgentStatement = database.prepare(`
|
|
143
|
+
SELECT id
|
|
144
|
+
FROM documents
|
|
145
|
+
WHERE agent_id = ?
|
|
146
|
+
AND (
|
|
147
|
+
lower(title) LIKE ?
|
|
148
|
+
OR lower(path) LIKE ?
|
|
149
|
+
OR lower(tags_json) LIKE ?
|
|
150
|
+
)
|
|
151
|
+
ORDER BY title
|
|
152
|
+
LIMIT ?
|
|
153
|
+
`);
|
|
154
|
+
const filterNodeIdsContentStatement = database.prepare(`
|
|
155
|
+
SELECT DISTINCT documents.id AS id
|
|
156
|
+
FROM chunks_fts
|
|
157
|
+
JOIN documents ON documents.id = chunks_fts.document_id
|
|
158
|
+
WHERE chunks_fts MATCH ?
|
|
159
|
+
LIMIT ?
|
|
160
|
+
`);
|
|
161
|
+
const filterNodeIdsContentByAgentStatement = database.prepare(`
|
|
162
|
+
SELECT DISTINCT documents.id AS id
|
|
163
|
+
FROM chunks_fts
|
|
164
|
+
JOIN documents ON documents.id = chunks_fts.document_id
|
|
165
|
+
WHERE chunks_fts MATCH ?
|
|
166
|
+
AND documents.agent_id = ?
|
|
167
|
+
LIMIT ?
|
|
168
|
+
`);
|
|
169
|
+
const listAgentsStatement = database.prepare(`
|
|
170
|
+
SELECT agent_id AS id, count(*) AS document_count
|
|
171
|
+
FROM documents
|
|
172
|
+
GROUP BY agent_id
|
|
173
|
+
ORDER BY agent_id
|
|
174
|
+
`);
|
|
175
|
+
return {
|
|
176
|
+
listLinks: (agentId) => {
|
|
177
|
+
const normalizedAgentId = normalizeAgentFilter(agentId);
|
|
178
|
+
const rows = (normalizedAgentId
|
|
179
|
+
? listLinksByAgentStatement.all(normalizedAgentId)
|
|
180
|
+
: listLinksStatement.all());
|
|
181
|
+
return rows.map(toGraphLink);
|
|
182
|
+
},
|
|
183
|
+
listBacklinks: (title, agentId) => {
|
|
184
|
+
const normalizedAgentId = normalizeAgentFilter(agentId);
|
|
185
|
+
const titleKey = toTitleKey(title);
|
|
186
|
+
const rows = (normalizedAgentId
|
|
187
|
+
? listBacklinksByAgentStatement.all(titleKey, normalizedAgentId)
|
|
188
|
+
: listBacklinksStatement.all(titleKey));
|
|
189
|
+
return rows.map(toGraphLink);
|
|
190
|
+
},
|
|
191
|
+
getGraph: (agentId) => {
|
|
192
|
+
const normalizedAgentId = normalizeAgentFilter(agentId);
|
|
193
|
+
const nodeRows = (normalizedAgentId
|
|
194
|
+
? graphNodesByAgentStatement.all(normalizedAgentId)
|
|
195
|
+
: graphNodesStatement.all());
|
|
196
|
+
const edgeRows = (normalizedAgentId
|
|
197
|
+
? graphEdgesByAgentStatement.all(normalizedAgentId)
|
|
198
|
+
: graphEdgesStatement.all());
|
|
199
|
+
return {
|
|
200
|
+
nodes: nodeRows.map(toGraphNode),
|
|
201
|
+
edges: edgeRows.map(toGraphEdge)
|
|
202
|
+
};
|
|
203
|
+
},
|
|
204
|
+
getGraphSummary: (agentId) => {
|
|
205
|
+
const normalizedAgentId = normalizeAgentFilter(agentId);
|
|
206
|
+
const nodeRows = (normalizedAgentId
|
|
207
|
+
? graphSummaryNodesByAgentStatement.all(normalizedAgentId)
|
|
208
|
+
: graphSummaryNodesStatement.all());
|
|
209
|
+
const edgeRows = (normalizedAgentId
|
|
210
|
+
? graphEdgesByAgentStatement.all(normalizedAgentId)
|
|
211
|
+
: graphEdgesStatement.all());
|
|
212
|
+
return {
|
|
213
|
+
nodes: nodeRows.map(toGraphNode),
|
|
214
|
+
edges: edgeRows.map(toGraphEdge)
|
|
215
|
+
};
|
|
216
|
+
},
|
|
217
|
+
getGraphNode: (id, agentId) => {
|
|
218
|
+
const normalizedAgentId = normalizeAgentFilter(agentId);
|
|
219
|
+
const row = (normalizedAgentId
|
|
220
|
+
? graphNodeByIdAndAgentStatement.get(id, normalizedAgentId)
|
|
221
|
+
: graphNodeByIdStatement.get(id));
|
|
222
|
+
return row ? toGraphNode(row) : undefined;
|
|
223
|
+
},
|
|
224
|
+
searchGraphNodeIds: (query, limit, agentId) => {
|
|
225
|
+
const normalizedQuery = query.trim().toLowerCase();
|
|
226
|
+
if (!normalizedQuery || limit <= 0) {
|
|
227
|
+
return [];
|
|
228
|
+
}
|
|
229
|
+
const normalizedAgentId = normalizeAgentFilter(agentId);
|
|
230
|
+
const likeQuery = `%${normalizedQuery}%`;
|
|
231
|
+
const metadataRows = (normalizedAgentId
|
|
232
|
+
? filterNodeIdsMetadataByAgentStatement.all(normalizedAgentId, likeQuery, likeQuery, likeQuery, limit)
|
|
233
|
+
: filterNodeIdsMetadataStatement.all(likeQuery, likeQuery, likeQuery, limit));
|
|
234
|
+
const ids = new Set(metadataRows.map((row) => row.id));
|
|
235
|
+
const remainingLimit = Math.max(limit - ids.size, 0);
|
|
236
|
+
if (remainingLimit > 0) {
|
|
237
|
+
const ftsQuery = toFtsQuery(normalizedQuery);
|
|
238
|
+
if (ftsQuery) {
|
|
239
|
+
const contentRows = (normalizedAgentId
|
|
240
|
+
? filterNodeIdsContentByAgentStatement.all(ftsQuery, normalizedAgentId, remainingLimit)
|
|
241
|
+
: filterNodeIdsContentStatement.all(ftsQuery, remainingLimit));
|
|
242
|
+
contentRows.forEach((row) => ids.add(row.id));
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
return Array.from(ids).slice(0, limit);
|
|
246
|
+
},
|
|
247
|
+
listAgents: () => listAgentsStatement.all().map((row) => ({
|
|
116
248
|
id: row.id,
|
|
117
249
|
documentCount: row.document_count
|
|
118
|
-
}))
|
|
119
|
-
}
|
|
250
|
+
}))
|
|
251
|
+
};
|
|
252
|
+
})();
|
|
253
|
+
const toGraphNode = (row) => ({
|
|
254
|
+
id: row.id,
|
|
255
|
+
agentId: row.agent_id,
|
|
256
|
+
title: row.title,
|
|
257
|
+
path: row.path,
|
|
258
|
+
content: row.content,
|
|
259
|
+
tags: JSON.parse(row.tags_json)
|
|
260
|
+
});
|
|
261
|
+
const toGraphEdge = (row) => ({
|
|
262
|
+
source: row.source,
|
|
263
|
+
target: row.target,
|
|
264
|
+
targetTitle: row.target_title,
|
|
265
|
+
weight: row.weight,
|
|
266
|
+
priority: row.priority
|
|
120
267
|
});
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import Database from 'better-sqlite3';
|
|
2
|
+
import { copyFileSync, existsSync, mkdirSync, renameSync, rmSync, unlinkSync } from 'node:fs';
|
|
3
|
+
import { dirname } from 'node:path';
|
|
4
|
+
const sqliteCorruptionHints = [
|
|
5
|
+
'database disk image is malformed',
|
|
6
|
+
'file is not a database',
|
|
7
|
+
'database is corrupted',
|
|
8
|
+
'malformed database schema',
|
|
9
|
+
'sqlite quick_check failed'
|
|
10
|
+
];
|
|
11
|
+
const normalizeMessage = (error) => error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase();
|
|
12
|
+
const isSqliteCorruptionError = (error) => sqliteCorruptionHints.some((hint) => normalizeMessage(error).includes(hint));
|
|
13
|
+
const safeUnlink = (path) => {
|
|
14
|
+
if (!existsSync(path)) {
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
try {
|
|
18
|
+
unlinkSync(path);
|
|
19
|
+
}
|
|
20
|
+
catch {
|
|
21
|
+
// Ignore best-effort cleanup failures.
|
|
22
|
+
}
|
|
23
|
+
};
|
|
24
|
+
const clearSidecars = (databasePath) => {
|
|
25
|
+
safeUnlink(`${databasePath}-wal`);
|
|
26
|
+
safeUnlink(`${databasePath}-shm`);
|
|
27
|
+
};
|
|
28
|
+
const assertQuickCheck = (database) => {
|
|
29
|
+
const rows = database.prepare('PRAGMA quick_check').all();
|
|
30
|
+
const first = rows[0]?.quick_check?.toLowerCase() ?? 'ok';
|
|
31
|
+
if (first !== 'ok') {
|
|
32
|
+
throw new Error(`sqlite quick_check failed: ${first}`);
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
const archiveCorruptedDatabase = (databasePath) => {
|
|
36
|
+
if (!existsSync(databasePath)) {
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
const archivedPath = `${databasePath}.corrupt-${Date.now()}`;
|
|
40
|
+
renameSync(databasePath, archivedPath);
|
|
41
|
+
};
|
|
42
|
+
const restoreFromBackupOrReset = (databasePath, backupPath) => {
|
|
43
|
+
clearSidecars(databasePath);
|
|
44
|
+
archiveCorruptedDatabase(databasePath);
|
|
45
|
+
if (existsSync(backupPath)) {
|
|
46
|
+
copyFileSync(backupPath, databasePath);
|
|
47
|
+
clearSidecars(databasePath);
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
rmSync(databasePath, { force: true });
|
|
51
|
+
};
|
|
52
|
+
const openCheckedDatabase = (databasePath) => {
|
|
53
|
+
const database = new Database(databasePath);
|
|
54
|
+
try {
|
|
55
|
+
assertQuickCheck(database);
|
|
56
|
+
}
|
|
57
|
+
catch (error) {
|
|
58
|
+
database.close();
|
|
59
|
+
throw error;
|
|
60
|
+
}
|
|
61
|
+
return database;
|
|
62
|
+
};
|
|
63
|
+
export const createRecoverySnapshot = (database, backupPath) => {
|
|
64
|
+
const backupDirectory = dirname(backupPath);
|
|
65
|
+
const tempBackupPath = `${backupPath}.tmp`;
|
|
66
|
+
mkdirSync(backupDirectory, { recursive: true });
|
|
67
|
+
rmSync(tempBackupPath, { force: true });
|
|
68
|
+
database.prepare('VACUUM INTO ?').run(tempBackupPath);
|
|
69
|
+
renameSync(tempBackupPath, backupPath);
|
|
70
|
+
};
|
|
71
|
+
export const openDatabaseWithRecovery = (databasePath, backupPath) => {
|
|
72
|
+
mkdirSync(dirname(databasePath), { recursive: true });
|
|
73
|
+
try {
|
|
74
|
+
return openCheckedDatabase(databasePath);
|
|
75
|
+
}
|
|
76
|
+
catch (error) {
|
|
77
|
+
if (!isSqliteCorruptionError(error)) {
|
|
78
|
+
throw error;
|
|
79
|
+
}
|
|
80
|
+
restoreFromBackupOrReset(databasePath, backupPath);
|
|
81
|
+
return openCheckedDatabase(databasePath);
|
|
82
|
+
}
|
|
83
|
+
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const schemaVersion =
|
|
1
|
+
const schemaVersion = 6;
|
|
2
2
|
const requiredTableColumns = {
|
|
3
3
|
documents: ['id', 'agent_id', 'title', 'path', 'content', 'tags_json', 'frontmatter_json', 'created_at', 'updated_at'],
|
|
4
4
|
chunks: ['id', 'document_id', 'ordinal', 'content', 'token_count', 'embedding_provider', 'embedding_json'],
|
|
@@ -67,7 +67,9 @@ export const createSchema = (database) => {
|
|
|
67
67
|
);
|
|
68
68
|
|
|
69
69
|
CREATE INDEX IF NOT EXISTS idx_documents_agent_title ON documents(agent_id, title);
|
|
70
|
+
CREATE INDEX IF NOT EXISTS idx_documents_agent_id ON documents(agent_id, id);
|
|
70
71
|
CREATE INDEX IF NOT EXISTS idx_chunks_document_ordinal ON chunks(document_id, ordinal);
|
|
72
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_token_count ON chunks(token_count);
|
|
71
73
|
|
|
72
74
|
CREATE TABLE IF NOT EXISTS embedding_buckets (
|
|
73
75
|
bucket TEXT NOT NULL,
|
|
@@ -92,6 +94,7 @@ export const createSchema = (database) => {
|
|
|
92
94
|
|
|
93
95
|
CREATE INDEX IF NOT EXISTS idx_links_to_document_id ON links(to_document_id);
|
|
94
96
|
CREATE INDEX IF NOT EXISTS idx_links_to_title_key ON links(to_title_key);
|
|
97
|
+
CREATE INDEX IF NOT EXISTS idx_links_source_weight ON links(from_document_id, weight DESC, to_title);
|
|
95
98
|
|
|
96
99
|
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
97
100
|
chunk_id UNINDEXED,
|
|
@@ -51,25 +51,6 @@ const mergeHybridResults = (ftsResults, semanticResults, limit) => {
|
|
|
51
51
|
return sortByScore(Array.from(rows.values())).slice(0, limit);
|
|
52
52
|
};
|
|
53
53
|
const placeholders = (count) => Array.from({ length: count }, () => '?').join(', ');
|
|
54
|
-
const readAllSemanticRows = (database, normalizedAgentId) => {
|
|
55
|
-
const semanticAgentFilter = normalizedAgentId ? 'WHERE documents.agent_id = ?' : '';
|
|
56
|
-
return database
|
|
57
|
-
.prepare(`
|
|
58
|
-
SELECT
|
|
59
|
-
documents.id AS document_id,
|
|
60
|
-
documents.agent_id AS agent_id,
|
|
61
|
-
documents.title AS title,
|
|
62
|
-
documents.path AS path,
|
|
63
|
-
chunks.id AS chunk_id,
|
|
64
|
-
chunks.content AS content,
|
|
65
|
-
documents.tags_json AS tags_json,
|
|
66
|
-
chunks.embedding_json AS embedding_json
|
|
67
|
-
FROM chunks
|
|
68
|
-
JOIN documents ON documents.id = chunks.document_id
|
|
69
|
-
${semanticAgentFilter}
|
|
70
|
-
`)
|
|
71
|
-
.all(...(normalizedAgentId ? [normalizedAgentId] : []));
|
|
72
|
-
};
|
|
73
54
|
const readBucketedSemanticRows = (database, normalizedAgentId, queryEmbedding, limit) => {
|
|
74
55
|
const buckets = createEmbeddingBuckets(queryEmbedding);
|
|
75
56
|
if (buckets.length === 0) {
|
|
@@ -100,57 +81,108 @@ const readBucketedSemanticRows = (database, normalizedAgentId, queryEmbedding, l
|
|
|
100
81
|
`)
|
|
101
82
|
.all(...params);
|
|
102
83
|
};
|
|
103
|
-
const
|
|
104
|
-
const
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
84
|
+
export const createSearchReader = (database) => (() => {
|
|
85
|
+
const ftsRowsStatement = database.prepare(`
|
|
86
|
+
SELECT
|
|
87
|
+
documents.id AS document_id,
|
|
88
|
+
documents.agent_id AS agent_id,
|
|
89
|
+
documents.title AS title,
|
|
90
|
+
documents.path AS path,
|
|
91
|
+
chunks_fts.chunk_id AS chunk_id,
|
|
92
|
+
chunks_fts.content AS content,
|
|
93
|
+
bm25(chunks_fts) * -1 AS score,
|
|
94
|
+
documents.tags_json AS tags_json
|
|
95
|
+
FROM chunks_fts
|
|
96
|
+
JOIN documents ON documents.id = chunks_fts.document_id
|
|
97
|
+
WHERE chunks_fts MATCH ?
|
|
98
|
+
ORDER BY bm25(chunks_fts)
|
|
99
|
+
LIMIT ?
|
|
100
|
+
`);
|
|
101
|
+
const ftsRowsByAgentStatement = database.prepare(`
|
|
102
|
+
SELECT
|
|
103
|
+
documents.id AS document_id,
|
|
104
|
+
documents.agent_id AS agent_id,
|
|
105
|
+
documents.title AS title,
|
|
106
|
+
documents.path AS path,
|
|
107
|
+
chunks_fts.chunk_id AS chunk_id,
|
|
108
|
+
chunks_fts.content AS content,
|
|
109
|
+
bm25(chunks_fts) * -1 AS score,
|
|
110
|
+
documents.tags_json AS tags_json
|
|
111
|
+
FROM chunks_fts
|
|
112
|
+
JOIN documents ON documents.id = chunks_fts.document_id
|
|
113
|
+
WHERE chunks_fts MATCH ?
|
|
114
|
+
AND documents.agent_id = ?
|
|
115
|
+
ORDER BY bm25(chunks_fts)
|
|
116
|
+
LIMIT ?
|
|
117
|
+
`);
|
|
118
|
+
const semanticRowsStatement = database.prepare(`
|
|
119
|
+
SELECT
|
|
120
|
+
documents.id AS document_id,
|
|
121
|
+
documents.agent_id AS agent_id,
|
|
122
|
+
documents.title AS title,
|
|
123
|
+
documents.path AS path,
|
|
124
|
+
chunks.id AS chunk_id,
|
|
125
|
+
chunks.content AS content,
|
|
126
|
+
documents.tags_json AS tags_json,
|
|
127
|
+
chunks.embedding_json AS embedding_json
|
|
128
|
+
FROM chunks
|
|
129
|
+
JOIN documents ON documents.id = chunks.document_id
|
|
130
|
+
ORDER BY chunks.token_count ASC, documents.title ASC
|
|
131
|
+
LIMIT ?
|
|
132
|
+
`);
|
|
133
|
+
const semanticRowsByAgentStatement = database.prepare(`
|
|
134
|
+
SELECT
|
|
135
|
+
documents.id AS document_id,
|
|
136
|
+
documents.agent_id AS agent_id,
|
|
137
|
+
documents.title AS title,
|
|
138
|
+
documents.path AS path,
|
|
139
|
+
chunks.id AS chunk_id,
|
|
140
|
+
chunks.content AS content,
|
|
141
|
+
documents.tags_json AS tags_json,
|
|
142
|
+
chunks.embedding_json AS embedding_json
|
|
143
|
+
FROM chunks
|
|
144
|
+
JOIN documents ON documents.id = chunks.document_id
|
|
145
|
+
WHERE documents.agent_id = ?
|
|
146
|
+
ORDER BY chunks.token_count ASC, documents.title ASC
|
|
147
|
+
LIMIT ?
|
|
148
|
+
`);
|
|
149
|
+
const readAllSemanticRowsForLimit = (normalizedAgentId, limit) => (normalizedAgentId
|
|
150
|
+
? semanticRowsByAgentStatement.all(normalizedAgentId, limit)
|
|
151
|
+
: semanticRowsStatement.all(limit));
|
|
152
|
+
const readSemanticRows = (normalizedAgentId, queryEmbedding, limit) => {
|
|
153
|
+
const candidateLimit = Math.min(Math.max(limit * 96, 768), 12_000);
|
|
154
|
+
const bucketedRows = readBucketedSemanticRows(database, normalizedAgentId, queryEmbedding, candidateLimit);
|
|
155
|
+
return bucketedRows.length > 0 ? bucketedRows : readAllSemanticRowsForLimit(normalizedAgentId, candidateLimit);
|
|
156
|
+
};
|
|
157
|
+
return {
|
|
158
|
+
search: (query, limit, agentId, mode = 'hybrid', queryEmbedding = []) => {
|
|
159
|
+
const normalizedQuery = query.trim();
|
|
160
|
+
if (!normalizedQuery || limit <= 0) {
|
|
161
|
+
return [];
|
|
162
|
+
}
|
|
163
|
+
const normalizedAgentId = normalizeAgentFilter(agentId);
|
|
164
|
+
const ftsQuery = toFtsQuery(query);
|
|
165
|
+
const expandedLimit = Math.max(limit * 4, 24);
|
|
166
|
+
const ftsRows = mode === 'semantic' || !ftsQuery
|
|
167
|
+
? []
|
|
168
|
+
: (normalizedAgentId
|
|
169
|
+
? ftsRowsByAgentStatement.all(ftsQuery, normalizedAgentId, expandedLimit)
|
|
170
|
+
: ftsRowsStatement.all(ftsQuery, expandedLimit));
|
|
171
|
+
const ftsResults = ftsRows.map((row, index) => toSearchResult(row, toTextScore(index, ftsRows.length), toTextScore(index, ftsRows.length), 0, 'fts'));
|
|
172
|
+
const semanticRows = mode === 'fts' || queryEmbedding.length === 0 ? [] : readSemanticRows(normalizedAgentId, queryEmbedding, expandedLimit);
|
|
173
|
+
const semanticResults = sortByScore(semanticRows
|
|
174
|
+
.map((row) => {
|
|
175
|
+
const semanticScore = Math.max(0, cosineSimilarity(queryEmbedding, parseJsonArray(row.embedding_json).filter((value) => typeof value === 'number')));
|
|
176
|
+
return toSearchResult(row, semanticScore, 0, semanticScore, 'semantic');
|
|
177
|
+
})
|
|
178
|
+
.filter((result) => result.semanticScore > 0)).slice(0, expandedLimit);
|
|
179
|
+
if (mode === 'fts') {
|
|
180
|
+
return ftsResults.slice(0, limit);
|
|
181
|
+
}
|
|
182
|
+
if (mode === 'semantic') {
|
|
183
|
+
return semanticResults.slice(0, limit);
|
|
184
|
+
}
|
|
185
|
+
return mergeHybridResults(ftsResults, semanticResults, limit);
|
|
153
186
|
}
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
});
|
|
187
|
+
};
|
|
188
|
+
})();
|