@andespindola/brainlink 0.1.0-beta.9 → 0.1.0-beta.91
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +8 -5
- package/CHANGELOG.md +26 -2
- package/CONTRIBUTING.md +2 -2
- package/COPYRIGHT.md +5 -0
- package/README.md +146 -17
- package/SECURITY.md +1 -1
- package/dist/application/analyze-vault.js +7 -7
- package/dist/application/build-context.js +56 -1
- package/dist/application/dedupe-notes.js +226 -0
- package/dist/application/frontend/client-css.js +154 -102
- package/dist/application/frontend/client-html.js +49 -40
- package/dist/application/frontend/client-js.js +3118 -167
- package/dist/application/frontend/client-worker-js.js +66 -0
- package/dist/application/get-graph-layout.js +18 -6
- package/dist/application/get-graph-node.js +12 -0
- package/dist/application/get-graph-summary.js +12 -0
- package/dist/application/get-graph.js +3 -3
- package/dist/application/import-legacy-sqlite.js +296 -0
- package/dist/application/index-vault.js +252 -19
- package/dist/application/list-agents.js +3 -3
- package/dist/application/list-links.js +5 -5
- package/dist/application/offline-pack-backup.js +44 -0
- package/dist/application/search-graph-node-ids.js +12 -0
- package/dist/application/search-knowledge.js +25 -10
- package/dist/application/server/routes.js +102 -1
- package/dist/application/start-server.js +75 -4
- package/dist/application/watch-vault.js +23 -2
- package/dist/benchmarks/large-vault.js +1 -1
- package/dist/cli/commands/agent-commands.js +20 -3
- package/dist/cli/commands/write-commands.js +818 -8
- package/dist/domain/context.js +53 -11
- package/dist/domain/embeddings.js +2 -1
- package/dist/domain/graph-layout.js +67 -16
- package/dist/domain/middle-out.js +18 -0
- package/dist/infrastructure/config.js +38 -0
- package/dist/infrastructure/file-index.js +358 -0
- package/dist/infrastructure/file-system-vault.js +15 -0
- package/dist/infrastructure/index-state.js +56 -0
- package/dist/infrastructure/private-pack-codec.js +134 -0
- package/dist/infrastructure/search-packs.js +452 -0
- package/dist/infrastructure/session-state.js +57 -2
- package/dist/mcp/server.js +11 -1
- package/dist/mcp/tools.js +215 -3
- package/docs/AGENT_USAGE.md +103 -16
- package/docs/ARCHITECTURE.md +25 -26
- package/docs/QUICKSTART.md +9 -1
- package/package.json +6 -4
- package/dist/infrastructure/sqlite/document-writer.js +0 -51
- package/dist/infrastructure/sqlite/graph-reader.js +0 -120
- package/dist/infrastructure/sqlite/schema.js +0 -111
- package/dist/infrastructure/sqlite/search-reader.js +0 -156
- package/dist/infrastructure/sqlite/types.js +0 -1
- package/dist/infrastructure/sqlite-index.js +0 -25
package/dist/domain/context.js
CHANGED
|
@@ -1,13 +1,50 @@
|
|
|
1
|
+
import { middleOutIndices } from './middle-out.js';
|
|
2
|
+
const maxSectionsPerDocument = 3;
|
|
3
|
+
const byScore = (left, right) => right.score - left.score || left.title.localeCompare(right.title);
|
|
4
|
+
const byOrdinal = (left, right) => (left.chunkOrdinal ?? Number.MAX_SAFE_INTEGER) - (right.chunkOrdinal ?? Number.MAX_SAFE_INTEGER);
|
|
5
|
+
const middleOutDocumentResults = (results) => {
|
|
6
|
+
if (results.length <= 1) {
|
|
7
|
+
return results;
|
|
8
|
+
}
|
|
9
|
+
const sortedByOrdinal = [...results].sort(byOrdinal);
|
|
10
|
+
const pivotChunkId = [...results].sort(byScore)[0]?.chunkId;
|
|
11
|
+
const pivotIndex = sortedByOrdinal.findIndex((result) => result.chunkId === pivotChunkId);
|
|
12
|
+
if (pivotIndex < 0) {
|
|
13
|
+
return [...results].sort(byScore);
|
|
14
|
+
}
|
|
15
|
+
return middleOutIndices(sortedByOrdinal.length, pivotIndex).map((index) => sortedByOrdinal[index]);
|
|
16
|
+
};
|
|
1
17
|
export const selectContextSections = (results, maxTokens) => {
|
|
2
|
-
const
|
|
3
|
-
const
|
|
4
|
-
|
|
5
|
-
|
|
18
|
+
const grouped = results.reduce((state, result) => {
|
|
19
|
+
const current = state.get(result.documentId) ?? [];
|
|
20
|
+
state.set(result.documentId, [...current, result]);
|
|
21
|
+
return state;
|
|
22
|
+
}, new Map());
|
|
23
|
+
const documentOrder = Array.from(results.reduce((state, result) => {
|
|
24
|
+
if (!state.has(result.documentId)) {
|
|
25
|
+
state.set(result.documentId, result.score);
|
|
6
26
|
}
|
|
7
|
-
return
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
27
|
+
return state;
|
|
28
|
+
}, new Map()).entries())
|
|
29
|
+
.sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
|
|
30
|
+
.map(([documentId]) => documentId);
|
|
31
|
+
const selected = documentOrder.reduce((state, documentId) => {
|
|
32
|
+
const ordered = middleOutDocumentResults(grouped.get(documentId) ?? []);
|
|
33
|
+
let usedTokens = state.usedTokens;
|
|
34
|
+
let sections = state.sections;
|
|
35
|
+
let seenChunks = state.seenChunks;
|
|
36
|
+
for (let index = 0; index < ordered.length && index < maxSectionsPerDocument; index += 1) {
|
|
37
|
+
const result = ordered[index];
|
|
38
|
+
if (seenChunks.has(result.chunkId)) {
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
const tokenCost = Math.ceil(result.content.length / 4);
|
|
42
|
+
if (usedTokens + tokenCost > maxTokens) {
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
usedTokens += tokenCost;
|
|
46
|
+
sections = [
|
|
47
|
+
...sections,
|
|
11
48
|
{
|
|
12
49
|
title: result.title,
|
|
13
50
|
path: result.path,
|
|
@@ -16,13 +53,18 @@ export const selectContextSections = (results, maxTokens) => {
|
|
|
16
53
|
searchMode: result.searchMode,
|
|
17
54
|
tags: result.tags
|
|
18
55
|
}
|
|
19
|
-
]
|
|
20
|
-
|
|
56
|
+
];
|
|
57
|
+
seenChunks = new Set([...seenChunks, result.chunkId]);
|
|
58
|
+
}
|
|
59
|
+
return {
|
|
60
|
+
usedTokens,
|
|
61
|
+
sections,
|
|
62
|
+
seenChunks
|
|
21
63
|
};
|
|
22
64
|
}, {
|
|
23
65
|
usedTokens: 0,
|
|
24
66
|
sections: [],
|
|
25
|
-
|
|
67
|
+
seenChunks: new Set()
|
|
26
68
|
});
|
|
27
69
|
return selected.sections;
|
|
28
70
|
};
|
|
@@ -58,7 +58,8 @@ const tokenize = (input) => input
|
|
|
58
58
|
.match(tokenPattern)
|
|
59
59
|
?.map(normalizeToken)
|
|
60
60
|
.filter((token) => token.length > 1 && !stopWords.has(token)) ?? [];
|
|
61
|
-
const
|
|
61
|
+
const getAliasesForToken = (token) => Object.hasOwn(aliases, token) ? aliases[token] ?? [] : [];
|
|
62
|
+
const expandTokens = (tokens) => tokens.flatMap((token) => [token, ...getAliasesForToken(token)]);
|
|
62
63
|
const hash = (value) => Array.from(value).reduce((state, char) => Math.imul(state ^ char.charCodeAt(0), 16777619), 2166136261) >>> 0;
|
|
63
64
|
const featureHash = (feature) => {
|
|
64
65
|
const value = hash(feature);
|
|
@@ -20,6 +20,7 @@ const segmentAngles = {
|
|
|
20
20
|
Evaluation: 2.08,
|
|
21
21
|
Security: 2.82
|
|
22
22
|
};
|
|
23
|
+
const hubTitlePattern = /\b(memory\s*hub|knowledge\s*root|moc|map)\b/i;
|
|
23
24
|
const hashText = (value) => Array.from(value).reduce((hash, char) => ((hash << 5) - hash + char.charCodeAt(0)) | 0, 0);
|
|
24
25
|
const jitter = (value, range) => {
|
|
25
26
|
const normalized = Math.abs(hashText(value) % 1000) / 1000;
|
|
@@ -45,26 +46,61 @@ const countDegrees = (edges) => edges.reduce((degrees, edge) => {
|
|
|
45
46
|
? incrementDegreeBy(incrementDegreeBy(degrees, edge.source, weight), edge.target, weight)
|
|
46
47
|
: incrementDegreeBy(degrees, edge.source, weight);
|
|
47
48
|
}, new Map());
|
|
48
|
-
const uniqueIds = (ids) => Array.from(new Set(ids));
|
|
49
49
|
const createAdjacency = (nodes, edges) => {
|
|
50
50
|
const nodeIds = new Set(nodes.map((node) => node.id));
|
|
51
|
-
const
|
|
52
|
-
|
|
51
|
+
const adjacency = new Map(nodes.map((node) => [node.id, new Set()]));
|
|
52
|
+
edges.forEach((edge) => {
|
|
53
53
|
if (!edge.target || !nodeIds.has(edge.source) || !nodeIds.has(edge.target)) {
|
|
54
|
-
return
|
|
54
|
+
return;
|
|
55
55
|
}
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
]);
|
|
61
|
-
}, emptyAdjacency);
|
|
56
|
+
adjacency.get(edge.source)?.add(edge.target);
|
|
57
|
+
adjacency.get(edge.target)?.add(edge.source);
|
|
58
|
+
});
|
|
59
|
+
return new Map(Array.from(adjacency.entries(), ([id, neighbors]) => [id, Array.from(neighbors)]));
|
|
62
60
|
};
|
|
63
61
|
const byTitle = (left, right) => left.title.localeCompare(right.title);
|
|
64
62
|
const byDegreeThenTitle = (degrees) => (left, right) => {
|
|
65
63
|
const degreeDelta = (degrees.get(right.id) ?? 0) - (degrees.get(left.id) ?? 0);
|
|
66
64
|
return degreeDelta === 0 ? byTitle(left, right) : degreeDelta;
|
|
67
65
|
};
|
|
66
|
+
const hubScore = (node) => {
|
|
67
|
+
const title = node.title.trim().toLowerCase();
|
|
68
|
+
if (title === 'memory hub')
|
|
69
|
+
return 5;
|
|
70
|
+
if (title === 'knowledge root')
|
|
71
|
+
return 4;
|
|
72
|
+
if (/\bmoc\b/i.test(node.title))
|
|
73
|
+
return 3;
|
|
74
|
+
return hubTitlePattern.test(node.title) ? 2 : 0;
|
|
75
|
+
};
|
|
76
|
+
const selectPrimaryHubId = (nodes, degrees) => {
|
|
77
|
+
const ranked = [...nodes]
|
|
78
|
+
.filter((node) => hubScore(node) > 0)
|
|
79
|
+
.sort((left, right) => {
|
|
80
|
+
const scoreDelta = hubScore(right) - hubScore(left);
|
|
81
|
+
if (scoreDelta !== 0)
|
|
82
|
+
return scoreDelta;
|
|
83
|
+
const degreeDelta = (degrees.get(right.id) ?? 0) - (degrees.get(left.id) ?? 0);
|
|
84
|
+
if (degreeDelta !== 0)
|
|
85
|
+
return degreeDelta;
|
|
86
|
+
return left.title.localeCompare(right.title);
|
|
87
|
+
});
|
|
88
|
+
return ranked[0]?.id ?? null;
|
|
89
|
+
};
|
|
90
|
+
const centerLayoutByNode = (nodes, nodeId) => {
|
|
91
|
+
if (!nodeId) {
|
|
92
|
+
return nodes;
|
|
93
|
+
}
|
|
94
|
+
const anchor = nodes.find((node) => node.id === nodeId);
|
|
95
|
+
if (!anchor) {
|
|
96
|
+
return nodes;
|
|
97
|
+
}
|
|
98
|
+
return nodes.map((node) => ({
|
|
99
|
+
...node,
|
|
100
|
+
x: node.x - anchor.x,
|
|
101
|
+
y: node.y - anchor.y
|
|
102
|
+
}));
|
|
103
|
+
};
|
|
68
104
|
const naturalSegmentSeed = (node) => groupKey(node) === '00-maps' || /\b(moc|map)\b/i.test(node.title);
|
|
69
105
|
const segmentName = (node) => node.title.replace(/^MOC\s+/i, '').replace(/\s+Memory Map$/i, '').trim() || node.title;
|
|
70
106
|
const collectComponent = (adjacency, startId, visited) => {
|
|
@@ -117,18 +153,31 @@ const assignSegments = (nodes, edges, degrees) => {
|
|
|
117
153
|
}
|
|
118
154
|
return new Map(nodes.map((node) => [node.id, assignments.get(node.id) ?? groupLabel(groupKey(node))]));
|
|
119
155
|
};
|
|
120
|
-
const groupNodesBySegment = (nodes, segments) =>
|
|
121
|
-
const
|
|
122
|
-
|
|
123
|
-
|
|
156
|
+
const groupNodesBySegment = (nodes, segments) => {
|
|
157
|
+
const groups = new Map();
|
|
158
|
+
nodes.forEach((node) => {
|
|
159
|
+
const segment = segments.get(node.id) ?? groupLabel(groupKey(node));
|
|
160
|
+
const bucket = groups.get(segment);
|
|
161
|
+
if (bucket) {
|
|
162
|
+
bucket.push(node);
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
groups.set(segment, [node]);
|
|
166
|
+
});
|
|
167
|
+
return new Map(groups);
|
|
168
|
+
};
|
|
124
169
|
const segmentAngle = (segment, index, count) => segmentAngles[segment] ?? (Math.PI * 2 * index) / Math.max(count, 1) - Math.PI / 2;
|
|
170
|
+
const petalSpreadForSegmentSize = (size) => {
|
|
171
|
+
const safeSize = Math.max(size, 1);
|
|
172
|
+
return 180 + Math.log2(safeSize + 1) * 6;
|
|
173
|
+
};
|
|
125
174
|
const createSegmentNodes = (segments, degrees, segmentCount) => ([segment, nodes], segmentIndex) => {
|
|
126
175
|
const sortedNodes = [...nodes].sort(byDegreeThenTitle(degrees));
|
|
127
176
|
const angle = segmentAngle(segment, segmentIndex, segmentCount);
|
|
128
177
|
const baseRadius = segmentCount === 1 ? 0 : 340 + Math.min(sortedNodes.length, 22) * 10;
|
|
129
178
|
const centerX = Math.cos(angle) * baseRadius;
|
|
130
179
|
const centerY = Math.sin(angle) * (baseRadius * 0.78);
|
|
131
|
-
const petalSpread =
|
|
180
|
+
const petalSpread = petalSpreadForSegmentSize(sortedNodes.length);
|
|
132
181
|
return sortedNodes.map((node, index) => {
|
|
133
182
|
const localAngle = index * 2.399963 + jitter(node.title, 0.42);
|
|
134
183
|
const localRadius = Math.sqrt(index + 1) * petalSpread;
|
|
@@ -240,8 +289,10 @@ export const createCauliflowerGraphLayout = (graph) => {
|
|
|
240
289
|
const segmentGroups = Array.from(groupNodesBySegment(graph.nodes, segments).entries())
|
|
241
290
|
.sort(([left], [right]) => left.localeCompare(right));
|
|
242
291
|
const nodes = relaxCollisions(segmentGroups.flatMap(createSegmentNodes(segments, degrees, segmentGroups.length)));
|
|
292
|
+
const primaryHubId = selectPrimaryHubId(graph.nodes, degrees);
|
|
293
|
+
const centeredNodes = centerLayoutByNode(nodes, primaryHubId);
|
|
243
294
|
return {
|
|
244
|
-
nodes,
|
|
295
|
+
nodes: centeredNodes,
|
|
245
296
|
edges: graph.edges
|
|
246
297
|
};
|
|
247
298
|
};
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export const middleOutIndices = (size, pivotIndex) => {
|
|
2
|
+
if (!Number.isFinite(size) || size <= 0) {
|
|
3
|
+
return [];
|
|
4
|
+
}
|
|
5
|
+
const clampedPivot = Math.max(0, Math.min(Math.floor(pivotIndex), size - 1));
|
|
6
|
+
const indices = [clampedPivot];
|
|
7
|
+
for (let offset = 1; indices.length < size; offset += 1) {
|
|
8
|
+
const left = clampedPivot - offset;
|
|
9
|
+
const right = clampedPivot + offset;
|
|
10
|
+
if (left >= 0) {
|
|
11
|
+
indices.push(left);
|
|
12
|
+
}
|
|
13
|
+
if (right < size) {
|
|
14
|
+
indices.push(right);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
return indices;
|
|
18
|
+
};
|
|
@@ -15,6 +15,13 @@ export const defaultBrainlinkConfig = {
|
|
|
15
15
|
embeddingProvider: 'local',
|
|
16
16
|
defaultSearchMode: 'hybrid',
|
|
17
17
|
chunkSize: 1200,
|
|
18
|
+
searchPack: {
|
|
19
|
+
rowChunkSize: 5_000,
|
|
20
|
+
compressionLevel: 5,
|
|
21
|
+
useDictionary: true,
|
|
22
|
+
guardrailMinSavingsPercent: 8,
|
|
23
|
+
guardrailMaxLatencyRegressionPercent: 5
|
|
24
|
+
},
|
|
18
25
|
agentProfiles: {}
|
|
19
26
|
};
|
|
20
27
|
const configFilenames = ['brainlink.config.json', '.brainlink.json'];
|
|
@@ -37,6 +44,36 @@ const sanitizeEmbeddingProvider = (value) => typeof value === 'string' && embedd
|
|
|
37
44
|
export const sanitizeSearchMode = (value, fallback = defaultBrainlinkConfig.defaultSearchMode) => typeof value === 'string' && searchModes.has(value) ? value : fallback;
|
|
38
45
|
const sanitizeAllowedVaults = (value) => Array.isArray(value) ? value.filter((item) => typeof item === 'string' && item.trim().length > 0) : [];
|
|
39
46
|
const sanitizePositiveNumber = (value) => typeof value === 'number' && Number.isFinite(value) && value > 0 ? value : undefined;
|
|
47
|
+
const sanitizeIntegerInRange = (value, fallback, minimum, maximum) => {
|
|
48
|
+
if (typeof value !== 'number' || !Number.isFinite(value)) {
|
|
49
|
+
return fallback;
|
|
50
|
+
}
|
|
51
|
+
const rounded = Math.round(value);
|
|
52
|
+
if (rounded < minimum) {
|
|
53
|
+
return minimum;
|
|
54
|
+
}
|
|
55
|
+
if (rounded > maximum) {
|
|
56
|
+
return maximum;
|
|
57
|
+
}
|
|
58
|
+
return rounded;
|
|
59
|
+
};
|
|
60
|
+
const sanitizeSearchPackConfig = (value) => {
|
|
61
|
+
const fallback = defaultBrainlinkConfig.searchPack;
|
|
62
|
+
if (!isRecord(value)) {
|
|
63
|
+
return fallback;
|
|
64
|
+
}
|
|
65
|
+
return {
|
|
66
|
+
rowChunkSize: sanitizeIntegerInRange(value.rowChunkSize, fallback.rowChunkSize, 100, 100_000),
|
|
67
|
+
compressionLevel: sanitizeIntegerInRange(value.compressionLevel, fallback.compressionLevel, 0, 11),
|
|
68
|
+
useDictionary: typeof value.useDictionary === 'boolean' ? value.useDictionary : fallback.useDictionary,
|
|
69
|
+
guardrailMinSavingsPercent: typeof value.guardrailMinSavingsPercent === 'number' && Number.isFinite(value.guardrailMinSavingsPercent)
|
|
70
|
+
? Math.max(0, Math.min(95, value.guardrailMinSavingsPercent))
|
|
71
|
+
: fallback.guardrailMinSavingsPercent,
|
|
72
|
+
guardrailMaxLatencyRegressionPercent: typeof value.guardrailMaxLatencyRegressionPercent === 'number' && Number.isFinite(value.guardrailMaxLatencyRegressionPercent)
|
|
73
|
+
? Math.max(0, Math.min(300, value.guardrailMaxLatencyRegressionPercent))
|
|
74
|
+
: fallback.guardrailMaxLatencyRegressionPercent
|
|
75
|
+
};
|
|
76
|
+
};
|
|
40
77
|
const sanitizeAgentProfile = (value) => {
|
|
41
78
|
if (!isRecord(value)) {
|
|
42
79
|
return null;
|
|
@@ -130,6 +167,7 @@ const sanitizeConfig = (value) => ({
|
|
|
130
167
|
: defaultBrainlinkConfig.defaultContextTokens,
|
|
131
168
|
allowedVaults: [...sanitizeAllowedVaults(value.allowedVaults), ...readAllowedVaultsFromEnv()],
|
|
132
169
|
chunkSize: typeof value.chunkSize === 'number' && value.chunkSize > 0 ? value.chunkSize : defaultBrainlinkConfig.chunkSize,
|
|
170
|
+
searchPack: sanitizeSearchPackConfig(value.searchPack),
|
|
133
171
|
embeddingProvider: sanitizeEmbeddingProvider(value.embeddingProvider),
|
|
134
172
|
defaultSearchMode: sanitizeSearchMode(value.defaultSearchMode),
|
|
135
173
|
agentProfiles: sanitizeAgentProfiles(value.agentProfiles)
|
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
import { mkdir, readFile, rename, stat, writeFile } from 'node:fs/promises';
|
|
2
|
+
import { dirname, join } from 'node:path';
|
|
3
|
+
import { cosineSimilarity } from '../domain/embeddings.js';
|
|
4
|
+
const queryTokenPattern = /[\p{L}\p{N}_-]+/gu;
|
|
5
|
+
const indexCacheMaxEntries = 16;
|
|
6
|
+
const indexCache = new Map();
|
|
7
|
+
const emptyIndex = () => ({
|
|
8
|
+
version: 1,
|
|
9
|
+
updatedAt: new Date().toISOString(),
|
|
10
|
+
documents: [],
|
|
11
|
+
chunks: [],
|
|
12
|
+
links: []
|
|
13
|
+
});
|
|
14
|
+
export const indexStoragePath = (vaultPath) => join(vaultPath, '.brainlink', 'index.json');
|
|
15
|
+
const readIndex = async (vaultPath) => {
|
|
16
|
+
const path = indexStoragePath(vaultPath);
|
|
17
|
+
let stats = null;
|
|
18
|
+
try {
|
|
19
|
+
const fileStats = await stat(path);
|
|
20
|
+
stats = { mtimeMs: fileStats.mtimeMs, size: fileStats.size };
|
|
21
|
+
}
|
|
22
|
+
catch (error) {
|
|
23
|
+
if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {
|
|
24
|
+
indexCache.delete(path);
|
|
25
|
+
return emptyIndex();
|
|
26
|
+
}
|
|
27
|
+
return emptyIndex();
|
|
28
|
+
}
|
|
29
|
+
const cached = indexCache.get(path);
|
|
30
|
+
if (cached && cached.mtimeMs === stats.mtimeMs && cached.size === stats.size) {
|
|
31
|
+
return cached.index;
|
|
32
|
+
}
|
|
33
|
+
try {
|
|
34
|
+
const parsed = JSON.parse(await readFile(path, 'utf8'));
|
|
35
|
+
const loaded = {
|
|
36
|
+
version: 1,
|
|
37
|
+
updatedAt: typeof parsed.updatedAt === 'string' ? parsed.updatedAt : new Date().toISOString(),
|
|
38
|
+
documents: Array.isArray(parsed.documents) ? parsed.documents : [],
|
|
39
|
+
chunks: Array.isArray(parsed.chunks) ? parsed.chunks : [],
|
|
40
|
+
links: Array.isArray(parsed.links) ? parsed.links : []
|
|
41
|
+
};
|
|
42
|
+
indexCache.set(path, { ...stats, index: loaded });
|
|
43
|
+
if (indexCache.size > indexCacheMaxEntries) {
|
|
44
|
+
const oldest = indexCache.keys().next().value;
|
|
45
|
+
if (typeof oldest === 'string') {
|
|
46
|
+
indexCache.delete(oldest);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return loaded;
|
|
50
|
+
}
|
|
51
|
+
catch (error) {
|
|
52
|
+
if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {
|
|
53
|
+
indexCache.delete(path);
|
|
54
|
+
return emptyIndex();
|
|
55
|
+
}
|
|
56
|
+
return emptyIndex();
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
const writeIndex = async (vaultPath, index) => {
|
|
60
|
+
const target = indexStoragePath(vaultPath);
|
|
61
|
+
const temp = `${target}.tmp`;
|
|
62
|
+
await mkdir(dirname(target), { recursive: true, mode: 0o700 });
|
|
63
|
+
await writeFile(temp, `${JSON.stringify(index)}\n`, { encoding: 'utf8', mode: 0o600 });
|
|
64
|
+
await rename(temp, target);
|
|
65
|
+
const fileStats = await stat(target);
|
|
66
|
+
indexCache.set(target, {
|
|
67
|
+
mtimeMs: fileStats.mtimeMs,
|
|
68
|
+
size: fileStats.size,
|
|
69
|
+
index
|
|
70
|
+
});
|
|
71
|
+
};
|
|
72
|
+
const normalizeToken = (value) => value
|
|
73
|
+
.normalize('NFKD')
|
|
74
|
+
.replace(/\p{Diacritic}/gu, '')
|
|
75
|
+
.toLowerCase();
|
|
76
|
+
const tokenize = (query) => query
|
|
77
|
+
.match(queryTokenPattern)
|
|
78
|
+
?.map(normalizeToken)
|
|
79
|
+
.filter((token) => token.length > 1) ?? [];
|
|
80
|
+
const countOccurrences = (text, token) => {
|
|
81
|
+
let hits = 0;
|
|
82
|
+
let cursor = 0;
|
|
83
|
+
while (cursor < text.length) {
|
|
84
|
+
const index = text.indexOf(token, cursor);
|
|
85
|
+
if (index < 0) {
|
|
86
|
+
break;
|
|
87
|
+
}
|
|
88
|
+
hits += 1;
|
|
89
|
+
cursor = index + token.length;
|
|
90
|
+
}
|
|
91
|
+
return hits;
|
|
92
|
+
};
|
|
93
|
+
const textScore = (row, tokens) => {
|
|
94
|
+
if (tokens.length === 0) {
|
|
95
|
+
return 0;
|
|
96
|
+
}
|
|
97
|
+
const title = normalizeToken(row.title);
|
|
98
|
+
const path = normalizeToken(row.path);
|
|
99
|
+
const content = normalizeToken(row.content);
|
|
100
|
+
const tags = normalizeToken(row.tags.join(' '));
|
|
101
|
+
return tokens.reduce((score, token) => {
|
|
102
|
+
const titleHits = countOccurrences(title, token);
|
|
103
|
+
const tagHits = countOccurrences(tags, token);
|
|
104
|
+
const pathHits = countOccurrences(path, token);
|
|
105
|
+
const contentHits = countOccurrences(content, token);
|
|
106
|
+
return score + titleHits * 5 + tagHits * 4 + pathHits * 2 + Math.min(contentHits, 6);
|
|
107
|
+
}, 0);
|
|
108
|
+
};
|
|
109
|
+
const semanticScore = (row, queryEmbedding) => queryEmbedding.length > 0 && row.embedding.length > 0 ? cosineSimilarity(queryEmbedding, row.embedding) : 0;
|
|
110
|
+
const toResult = (row, mode, text, semantic) => {
|
|
111
|
+
const score = mode === 'fts' ? text : mode === 'semantic' ? semantic : text + semantic * 8;
|
|
112
|
+
return {
|
|
113
|
+
documentId: row.documentId,
|
|
114
|
+
agentId: row.agentId,
|
|
115
|
+
title: row.title,
|
|
116
|
+
path: row.path,
|
|
117
|
+
chunkId: row.chunkId,
|
|
118
|
+
chunkOrdinal: row.chunkOrdinal,
|
|
119
|
+
content: row.content,
|
|
120
|
+
score,
|
|
121
|
+
textScore: text,
|
|
122
|
+
semanticScore: semantic,
|
|
123
|
+
searchMode: mode,
|
|
124
|
+
tags: row.tags
|
|
125
|
+
};
|
|
126
|
+
};
|
|
127
|
+
const toGraphLink = (link, documentsById) => {
|
|
128
|
+
const source = documentsById.get(link.fromDocumentId);
|
|
129
|
+
const target = link.toDocumentId ? documentsById.get(link.toDocumentId) : undefined;
|
|
130
|
+
return {
|
|
131
|
+
agentId: source?.agentId ?? 'shared',
|
|
132
|
+
fromTitle: source?.title ?? 'Unknown',
|
|
133
|
+
fromPath: source?.path ?? 'Unknown',
|
|
134
|
+
toTitle: target?.title ?? link.toTitle,
|
|
135
|
+
toPath: target?.path ?? null,
|
|
136
|
+
weight: link.weight,
|
|
137
|
+
priority: link.priority
|
|
138
|
+
};
|
|
139
|
+
};
|
|
140
|
+
export const openFileIndex = (vaultPath) => {
|
|
141
|
+
const load = async () => readIndex(vaultPath);
|
|
142
|
+
const persist = async (index) => writeIndex(vaultPath, index);
|
|
143
|
+
return {
|
|
144
|
+
reset: async () => {
|
|
145
|
+
await persist(emptyIndex());
|
|
146
|
+
},
|
|
147
|
+
saveDocuments: async (documents) => {
|
|
148
|
+
const chunks = documents.flatMap((document) => document.chunks);
|
|
149
|
+
const links = documents.flatMap((document) => document.links);
|
|
150
|
+
await persist({
|
|
151
|
+
version: 1,
|
|
152
|
+
updatedAt: new Date().toISOString(),
|
|
153
|
+
documents: documents.map((document) => document.document),
|
|
154
|
+
chunks,
|
|
155
|
+
links
|
|
156
|
+
});
|
|
157
|
+
},
|
|
158
|
+
getIndexedDocuments: async (agentId) => {
|
|
159
|
+
const index = await load();
|
|
160
|
+
const documents = agentId ? index.documents.filter((document) => document.agentId === agentId) : index.documents;
|
|
161
|
+
const selectedDocumentIds = new Set(documents.map((document) => document.id));
|
|
162
|
+
const chunksByDocumentId = index.chunks.reduce((state, chunk) => {
|
|
163
|
+
if (!selectedDocumentIds.has(chunk.documentId)) {
|
|
164
|
+
return state;
|
|
165
|
+
}
|
|
166
|
+
const current = state.get(chunk.documentId) ?? [];
|
|
167
|
+
current.push(chunk);
|
|
168
|
+
state.set(chunk.documentId, current);
|
|
169
|
+
return state;
|
|
170
|
+
}, new Map());
|
|
171
|
+
const linksByDocumentId = index.links.reduce((state, link) => {
|
|
172
|
+
if (!selectedDocumentIds.has(link.fromDocumentId)) {
|
|
173
|
+
return state;
|
|
174
|
+
}
|
|
175
|
+
const current = state.get(link.fromDocumentId) ?? [];
|
|
176
|
+
current.push(link);
|
|
177
|
+
state.set(link.fromDocumentId, current);
|
|
178
|
+
return state;
|
|
179
|
+
}, new Map());
|
|
180
|
+
return documents
|
|
181
|
+
.map((document) => ({
|
|
182
|
+
document,
|
|
183
|
+
chunks: [...(chunksByDocumentId.get(document.id) ?? [])].sort((left, right) => left.ordinal - right.ordinal),
|
|
184
|
+
links: linksByDocumentId.get(document.id) ?? []
|
|
185
|
+
}))
|
|
186
|
+
.sort((left, right) => left.document.path.localeCompare(right.document.path));
|
|
187
|
+
},
|
|
188
|
+
search: async (query, limit, agentId, mode = 'hybrid', queryEmbedding = []) => {
|
|
189
|
+
const index = await load();
|
|
190
|
+
const documentsById = new Map(index.documents.map((document) => [document.id, document]));
|
|
191
|
+
const rows = index.chunks.flatMap((chunk) => {
|
|
192
|
+
const document = documentsById.get(chunk.documentId);
|
|
193
|
+
if (!document) {
|
|
194
|
+
return [];
|
|
195
|
+
}
|
|
196
|
+
if (agentId && document.agentId !== agentId) {
|
|
197
|
+
return [];
|
|
198
|
+
}
|
|
199
|
+
return [
|
|
200
|
+
{
|
|
201
|
+
documentId: document.id,
|
|
202
|
+
agentId: document.agentId,
|
|
203
|
+
title: document.title,
|
|
204
|
+
path: document.path,
|
|
205
|
+
chunkId: chunk.id,
|
|
206
|
+
chunkOrdinal: chunk.ordinal,
|
|
207
|
+
content: chunk.content,
|
|
208
|
+
tags: document.tags,
|
|
209
|
+
embedding: chunk.embedding
|
|
210
|
+
}
|
|
211
|
+
];
|
|
212
|
+
});
|
|
213
|
+
const tokens = tokenize(query);
|
|
214
|
+
const results = rows
|
|
215
|
+
.map((row) => {
|
|
216
|
+
const text = textScore(row, tokens);
|
|
217
|
+
const semantic = semanticScore(row, queryEmbedding);
|
|
218
|
+
return toResult(row, mode, text, semantic);
|
|
219
|
+
})
|
|
220
|
+
.filter((row) => row.score > 0 || tokens.length === 0)
|
|
221
|
+
.sort((left, right) => right.score - left.score || left.title.localeCompare(right.title))
|
|
222
|
+
.slice(0, Math.max(0, limit));
|
|
223
|
+
return results;
|
|
224
|
+
},
|
|
225
|
+
listLinks: async (agentId) => {
|
|
226
|
+
const index = await load();
|
|
227
|
+
const documentsById = new Map(index.documents.map((document) => [document.id, document]));
|
|
228
|
+
return index.links
|
|
229
|
+
.filter((link) => {
|
|
230
|
+
const source = documentsById.get(link.fromDocumentId);
|
|
231
|
+
return agentId ? source?.agentId === agentId : true;
|
|
232
|
+
})
|
|
233
|
+
.map((link) => toGraphLink(link, documentsById))
|
|
234
|
+
.sort((left, right) => left.fromTitle.localeCompare(right.fromTitle));
|
|
235
|
+
},
|
|
236
|
+
listBacklinks: async (title, agentId) => {
|
|
237
|
+
const index = await load();
|
|
238
|
+
const titleKey = title.toLowerCase();
|
|
239
|
+
const documentsById = new Map(index.documents.map((document) => [document.id, document]));
|
|
240
|
+
return index.links
|
|
241
|
+
.filter((link) => link.toTitle.toLowerCase() === titleKey)
|
|
242
|
+
.filter((link) => {
|
|
243
|
+
const source = documentsById.get(link.fromDocumentId);
|
|
244
|
+
return agentId ? source?.agentId === agentId : true;
|
|
245
|
+
})
|
|
246
|
+
.map((link) => toGraphLink(link, documentsById))
|
|
247
|
+
.sort((left, right) => right.weight - left.weight || left.fromTitle.localeCompare(right.fromTitle));
|
|
248
|
+
},
|
|
249
|
+
getGraph: async (agentId) => {
|
|
250
|
+
const index = await load();
|
|
251
|
+
const documents = agentId ? index.documents.filter((document) => document.agentId === agentId) : index.documents;
|
|
252
|
+
const documentIds = new Set(documents.map((document) => document.id));
|
|
253
|
+
const edges = index.links
|
|
254
|
+
.filter((link) => documentIds.has(link.fromDocumentId))
|
|
255
|
+
.map((link) => ({
|
|
256
|
+
source: link.fromDocumentId,
|
|
257
|
+
target: link.toDocumentId,
|
|
258
|
+
targetTitle: link.toTitle,
|
|
259
|
+
weight: link.weight,
|
|
260
|
+
priority: link.priority
|
|
261
|
+
}));
|
|
262
|
+
return {
|
|
263
|
+
nodes: documents.map((document) => ({
|
|
264
|
+
id: document.id,
|
|
265
|
+
agentId: document.agentId,
|
|
266
|
+
title: document.title,
|
|
267
|
+
path: document.path,
|
|
268
|
+
content: document.content,
|
|
269
|
+
tags: document.tags
|
|
270
|
+
})),
|
|
271
|
+
edges
|
|
272
|
+
};
|
|
273
|
+
},
|
|
274
|
+
getGraphSummary: async (agentId) => {
|
|
275
|
+
const graph = await (async () => {
|
|
276
|
+
const index = await load();
|
|
277
|
+
const documents = agentId ? index.documents.filter((document) => document.agentId === agentId) : index.documents;
|
|
278
|
+
const documentIds = new Set(documents.map((document) => document.id));
|
|
279
|
+
const edges = index.links
|
|
280
|
+
.filter((link) => documentIds.has(link.fromDocumentId))
|
|
281
|
+
.map((link) => ({
|
|
282
|
+
source: link.fromDocumentId,
|
|
283
|
+
target: link.toDocumentId,
|
|
284
|
+
targetTitle: link.toTitle,
|
|
285
|
+
weight: link.weight,
|
|
286
|
+
priority: link.priority
|
|
287
|
+
}));
|
|
288
|
+
return {
|
|
289
|
+
nodes: documents.map((document) => ({
|
|
290
|
+
id: document.id,
|
|
291
|
+
agentId: document.agentId,
|
|
292
|
+
title: document.title,
|
|
293
|
+
path: document.path,
|
|
294
|
+
content: '',
|
|
295
|
+
tags: document.tags
|
|
296
|
+
})),
|
|
297
|
+
edges
|
|
298
|
+
};
|
|
299
|
+
})();
|
|
300
|
+
return graph;
|
|
301
|
+
},
|
|
302
|
+
getGraphNode: async (id, agentId) => {
|
|
303
|
+
const index = await load();
|
|
304
|
+
const document = index.documents.find((row) => row.id === id && (!agentId || row.agentId === agentId));
|
|
305
|
+
return document
|
|
306
|
+
? {
|
|
307
|
+
id: document.id,
|
|
308
|
+
agentId: document.agentId,
|
|
309
|
+
title: document.title,
|
|
310
|
+
path: document.path,
|
|
311
|
+
content: document.content,
|
|
312
|
+
tags: document.tags
|
|
313
|
+
}
|
|
314
|
+
: undefined;
|
|
315
|
+
},
|
|
316
|
+
searchGraphNodeIds: async (query, limit, agentId) => {
|
|
317
|
+
const index = await load();
|
|
318
|
+
const normalized = normalizeToken(query);
|
|
319
|
+
if (normalized.length === 0 || limit <= 0) {
|
|
320
|
+
return [];
|
|
321
|
+
}
|
|
322
|
+
const tokens = tokenize(query);
|
|
323
|
+
const scored = index.documents
|
|
324
|
+
.filter((document) => (!agentId || document.agentId === agentId))
|
|
325
|
+
.map((document) => {
|
|
326
|
+
const score = textScore({
|
|
327
|
+
documentId: document.id,
|
|
328
|
+
agentId: document.agentId,
|
|
329
|
+
title: document.title,
|
|
330
|
+
path: document.path,
|
|
331
|
+
chunkId: document.id,
|
|
332
|
+
chunkOrdinal: 0,
|
|
333
|
+
content: document.content,
|
|
334
|
+
tags: document.tags,
|
|
335
|
+
embedding: []
|
|
336
|
+
}, tokens);
|
|
337
|
+
return { id: document.id, score };
|
|
338
|
+
})
|
|
339
|
+
.filter((row) => row.score > 0)
|
|
340
|
+
.sort((left, right) => right.score - left.score || left.id.localeCompare(right.id))
|
|
341
|
+
.slice(0, limit);
|
|
342
|
+
return scored.map((row) => row.id);
|
|
343
|
+
},
|
|
344
|
+
listAgents: async () => {
|
|
345
|
+
const index = await load();
|
|
346
|
+
const counts = index.documents.reduce((state, document) => {
|
|
347
|
+
state.set(document.agentId, (state.get(document.agentId) ?? 0) + 1);
|
|
348
|
+
return state;
|
|
349
|
+
}, new Map());
|
|
350
|
+
return Array.from(counts.entries())
|
|
351
|
+
.sort((left, right) => left[0].localeCompare(right[0]))
|
|
352
|
+
.map(([id, documentCount]) => ({ id, documentCount }));
|
|
353
|
+
},
|
|
354
|
+
close: () => {
|
|
355
|
+
// File-based index has no persistent connection.
|
|
356
|
+
}
|
|
357
|
+
};
|
|
358
|
+
};
|
|
@@ -76,6 +76,21 @@ export const readMarkdownFiles = async (vaultPath) => {
|
|
|
76
76
|
};
|
|
77
77
|
}));
|
|
78
78
|
};
|
|
79
|
+
export const readMarkdownFileSummaries = async (vaultPath) => {
|
|
80
|
+
const absoluteVaultPath = await ensureVault(vaultPath);
|
|
81
|
+
const paths = await walkMarkdownFiles(absoluteVaultPath);
|
|
82
|
+
const summaries = await Promise.all(paths.map(async (absolutePath) => {
|
|
83
|
+
const fileStats = await stat(absolutePath);
|
|
84
|
+
return {
|
|
85
|
+
absolutePath,
|
|
86
|
+
relativePath: relative(absoluteVaultPath, absolutePath),
|
|
87
|
+
createdAt: fileStats.birthtime,
|
|
88
|
+
updatedAt: fileStats.mtime,
|
|
89
|
+
size: fileStats.size
|
|
90
|
+
};
|
|
91
|
+
}));
|
|
92
|
+
return summaries.sort((left, right) => left.relativePath.localeCompare(right.relativePath));
|
|
93
|
+
};
|
|
79
94
|
export const listVaultFiles = async (vaultPath) => {
|
|
80
95
|
const absoluteVaultPath = await ensureVault(vaultPath);
|
|
81
96
|
return walkVaultFiles(absoluteVaultPath);
|