@andespindola/brainlink 0.1.0-beta.16 → 0.1.0-beta.161
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +9 -6
- package/CHANGELOG.md +27 -0
- package/COPYRIGHT.md +5 -0
- package/README.md +177 -20
- package/dist/application/add-note.js +13 -44
- package/dist/application/auto-migrate-configured-vault.js +37 -0
- package/dist/application/build-context.js +64 -3
- package/dist/application/canonical-context-links.js +209 -0
- package/dist/application/dedupe-notes.js +226 -0
- package/dist/application/frontend/client-css.js +258 -51
- package/dist/application/frontend/client-html.js +50 -27
- package/dist/application/frontend/client-js.js +1369 -605
- package/dist/application/frontend/client-render-worker-js.js +645 -0
- package/dist/application/frontend/client-worker-js.js +66 -0
- package/dist/application/get-graph-contexts.js +33 -0
- package/dist/application/get-graph-layout.js +62 -8
- package/dist/application/get-graph-stream-chunk.js +326 -0
- package/dist/application/get-graph-view.js +246 -0
- package/dist/application/graph-view-state.js +66 -0
- package/dist/application/import-legacy-sqlite.js +266 -0
- package/dist/application/index-vault.js +262 -23
- package/dist/application/migrate-context-links.js +79 -0
- package/dist/application/offline-pack-backup.js +44 -0
- package/dist/application/search-graph-node-ids.js +63 -3
- package/dist/application/server/routes.js +247 -7
- package/dist/application/start-server.js +75 -4
- package/dist/application/watch-vault.js +23 -2
- package/dist/cli/commands/agent-commands.js +7 -0
- package/dist/cli/commands/write-commands.js +924 -14
- package/dist/cli/runtime.js +10 -2
- package/dist/domain/context.js +54 -11
- package/dist/domain/graph-contexts.js +180 -0
- package/dist/domain/graph-layout.js +389 -18
- package/dist/domain/markdown.js +53 -9
- package/dist/domain/middle-out.js +18 -0
- package/dist/infrastructure/config.js +121 -4
- package/dist/infrastructure/file-index.js +76 -6
- package/dist/infrastructure/file-system-vault.js +15 -0
- package/dist/infrastructure/index-state.js +58 -0
- package/dist/infrastructure/private-pack-codec.js +71 -10
- package/dist/infrastructure/search-packs.js +286 -15
- package/dist/infrastructure/vault-migration-state.js +69 -0
- package/dist/infrastructure/volatile-memory.js +100 -0
- package/dist/mcp/runtime.js +20 -0
- package/dist/mcp/server.js +39 -11
- package/dist/mcp/tools.js +183 -7
- package/docs/AGENT_USAGE.md +96 -5
- package/docs/ARCHITECTURE.md +8 -0
- package/docs/QUICKSTART.md +7 -0
- package/package.json +7 -2
|
@@ -1,13 +1,74 @@
|
|
|
1
|
+
import { stat } from 'node:fs/promises';
|
|
1
2
|
import { formatContextPackage, selectContextSections } from '../domain/context.js';
|
|
3
|
+
import { indexStoragePath } from '../infrastructure/file-index.js';
|
|
4
|
+
import { searchVolatileMemory, volatileMemoryStoragePath } from '../infrastructure/volatile-memory.js';
|
|
2
5
|
import { searchKnowledge } from './search-knowledge.js';
|
|
6
|
+
const contextCacheTtlMs = 45_000;
|
|
7
|
+
const contextCacheMaxEntries = 200;
|
|
8
|
+
const contextCache = new Map();
|
|
9
|
+
const readFileSignature = async (path) => {
|
|
10
|
+
try {
|
|
11
|
+
const info = await stat(path);
|
|
12
|
+
return `${Math.floor(info.mtimeMs)}:${info.size}`;
|
|
13
|
+
}
|
|
14
|
+
catch {
|
|
15
|
+
return '0:0';
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
const readContextDataSignature = async (vaultPath) => `${await readFileSignature(indexStoragePath(vaultPath))}|${await readFileSignature(volatileMemoryStoragePath(vaultPath))}`;
|
|
19
|
+
const toCacheKey = (vaultPath, query, limit, maxTokens, agentId, mode) => JSON.stringify({
|
|
20
|
+
vaultPath,
|
|
21
|
+
query: query.trim().toLowerCase(),
|
|
22
|
+
limit,
|
|
23
|
+
maxTokens,
|
|
24
|
+
agentId: agentId?.trim().toLowerCase() ?? '*',
|
|
25
|
+
mode: mode ?? 'default'
|
|
26
|
+
});
|
|
27
|
+
const contextCacheGet = (key, dataSignature) => {
|
|
28
|
+
const entry = contextCache.get(key);
|
|
29
|
+
if (!entry) {
|
|
30
|
+
return undefined;
|
|
31
|
+
}
|
|
32
|
+
const fresh = Date.now() - entry.createdAt <= contextCacheTtlMs && entry.dataSignature === dataSignature;
|
|
33
|
+
if (!fresh) {
|
|
34
|
+
contextCache.delete(key);
|
|
35
|
+
return undefined;
|
|
36
|
+
}
|
|
37
|
+
return entry.context;
|
|
38
|
+
};
|
|
39
|
+
const contextCacheSet = (entry) => {
|
|
40
|
+
contextCache.set(entry.key, entry);
|
|
41
|
+
if (contextCache.size <= contextCacheMaxEntries) {
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
const overflow = contextCache.size - contextCacheMaxEntries;
|
|
45
|
+
const keys = Array.from(contextCache.keys()).slice(0, overflow);
|
|
46
|
+
keys.forEach((key) => contextCache.delete(key));
|
|
47
|
+
};
|
|
3
48
|
export const buildContextPackage = async (vaultPath, query, limit, maxTokens, agentId, mode) => {
|
|
49
|
+
const cacheKey = toCacheKey(vaultPath, query, limit, maxTokens, agentId, mode);
|
|
50
|
+
const dataSignature = await readContextDataSignature(vaultPath);
|
|
51
|
+
const cached = contextCacheGet(cacheKey, dataSignature);
|
|
52
|
+
if (cached) {
|
|
53
|
+
return cached;
|
|
54
|
+
}
|
|
4
55
|
const results = await searchKnowledge(vaultPath, query, limit, agentId, mode);
|
|
5
|
-
const
|
|
6
|
-
|
|
56
|
+
const durableSections = selectContextSections(results, maxTokens);
|
|
57
|
+
const volatileSections = await searchVolatileMemory(vaultPath, query, Math.min(3, limit), agentId, mode ?? 'hybrid');
|
|
58
|
+
const sections = [...volatileSections, ...durableSections];
|
|
59
|
+
const context = {
|
|
7
60
|
query,
|
|
8
61
|
sections,
|
|
9
|
-
content: formatContextPackage(query, sections)
|
|
62
|
+
content: formatContextPackage(query, sections),
|
|
63
|
+
...(volatileSections.length > 0 ? { volatileSections } : {})
|
|
10
64
|
};
|
|
65
|
+
contextCacheSet({
|
|
66
|
+
key: cacheKey,
|
|
67
|
+
createdAt: Date.now(),
|
|
68
|
+
dataSignature,
|
|
69
|
+
context
|
|
70
|
+
});
|
|
71
|
+
return context;
|
|
11
72
|
};
|
|
12
73
|
export const buildContext = async (vaultPath, query, limit, maxTokens, agentId, mode) => {
|
|
13
74
|
const contextPackage = await buildContextPackage(vaultPath, query, limit, maxTokens, agentId, mode);
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
import { readFile } from 'node:fs/promises';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
import { inferVisualGraphContext } from '../domain/graph-contexts.js';
|
|
4
|
+
import { sanitizeAgentId, sharedAgentId } from '../domain/agents.js';
|
|
5
|
+
import { extractContextLinkWeights, parseMarkdownDocument } from '../domain/markdown.js';
|
|
6
|
+
import { ensureVault, readMarkdownFileSummaries, writeMarkdownFile } from '../infrastructure/file-system-vault.js';
|
|
7
|
+
const canonicalPriority = 'high';
|
|
8
|
+
const slugify = (title) => title
|
|
9
|
+
.normalize('NFKD')
|
|
10
|
+
.replace(/[\u0300-\u036f]/g, '')
|
|
11
|
+
.toLowerCase()
|
|
12
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
13
|
+
.replace(/^-+|-+$/g, '');
|
|
14
|
+
export const hubTitleForContext = (contextTitle) => `${contextTitle} Hub`;
|
|
15
|
+
const hubPathForContext = (contextTitle, agentId) => {
|
|
16
|
+
if (contextTitle === 'GitHub Repositories')
|
|
17
|
+
return 'github-repos/github-repositories-hub.md';
|
|
18
|
+
if (contextTitle === 'GitHub Organizations')
|
|
19
|
+
return 'github-org-repos/github-organizations-hub.md';
|
|
20
|
+
if (contextTitle === 'Machine Configuration')
|
|
21
|
+
return 'machine-config/machine-configuration-hub.md';
|
|
22
|
+
return join('agents', sanitizeAgentId(agentId), `${slugify(hubTitleForContext(contextTitle))}.md`).replaceAll('\\', '/');
|
|
23
|
+
};
|
|
24
|
+
const normalizeTitle = (title) => title.trim().replace(/\.md$/i, '').toLowerCase();
|
|
25
|
+
const hasCanonicalLink = (content, hubTitle) => extractContextLinkWeights(content).some((link) => normalizeTitle(link.title) === normalizeTitle(hubTitle));
|
|
26
|
+
const linkLine = (hubTitle) => `- [[${hubTitle}]] priority: ${canonicalPriority}`;
|
|
27
|
+
const contextLinksHeading = (line) => line.match(/^(#{2,6})\s+(Context Links|Links de Contexto)\s*$/i);
|
|
28
|
+
export const upsertCanonicalContextLink = (content, hubTitle) => {
|
|
29
|
+
if (hasCanonicalLink(content, hubTitle)) {
|
|
30
|
+
return content;
|
|
31
|
+
}
|
|
32
|
+
const lines = content.replace(/\s+$/u, '').split('\n');
|
|
33
|
+
const headingIndex = lines.findIndex((line) => contextLinksHeading(line.trim()));
|
|
34
|
+
if (headingIndex === -1) {
|
|
35
|
+
return `${lines.join('\n')}\n\n## Context Links\n\n${linkLine(hubTitle)}\n`;
|
|
36
|
+
}
|
|
37
|
+
const heading = contextLinksHeading(lines[headingIndex].trim());
|
|
38
|
+
const headingDepth = heading?.[1]?.length ?? 2;
|
|
39
|
+
const insertIndex = lines.findIndex((line, index) => {
|
|
40
|
+
if (index <= headingIndex)
|
|
41
|
+
return false;
|
|
42
|
+
const candidate = line.match(/^(#{2,6})\s+/);
|
|
43
|
+
return Boolean(candidate && candidate[1].length <= headingDepth);
|
|
44
|
+
});
|
|
45
|
+
const targetIndex = insertIndex === -1 ? lines.length : insertIndex;
|
|
46
|
+
const before = lines.slice(0, targetIndex);
|
|
47
|
+
const after = lines.slice(targetIndex);
|
|
48
|
+
const needsSpacer = before[before.length - 1]?.trim() !== '';
|
|
49
|
+
const nextLines = [...before, ...(needsSpacer ? [''] : []), linkLine(hubTitle), ...after];
|
|
50
|
+
return `${nextLines.join('\n').replace(/\s+$/u, '')}\n`;
|
|
51
|
+
};
|
|
52
|
+
const buildHubContent = (hubTitle, contextTitle, agentId) => [
|
|
53
|
+
'---',
|
|
54
|
+
`title: "${hubTitle.replaceAll('"', '\\"')}"`,
|
|
55
|
+
`agent: "${sanitizeAgentId(agentId)}"`,
|
|
56
|
+
'---',
|
|
57
|
+
'',
|
|
58
|
+
`# ${hubTitle}`,
|
|
59
|
+
'',
|
|
60
|
+
`Canonical hub for the ${contextTitle} context. #memory #hub`,
|
|
61
|
+
''
|
|
62
|
+
].join('\n');
|
|
63
|
+
const readNotes = async (vaultPath) => {
|
|
64
|
+
const absoluteVaultPath = await ensureVault(vaultPath);
|
|
65
|
+
const summaries = await readMarkdownFileSummaries(absoluteVaultPath);
|
|
66
|
+
return Promise.all(summaries.map(async (summary) => {
|
|
67
|
+
const content = await readFile(summary.absolutePath, 'utf8');
|
|
68
|
+
const document = parseMarkdownDocument({
|
|
69
|
+
absolutePath: summary.absolutePath,
|
|
70
|
+
vaultPath: absoluteVaultPath,
|
|
71
|
+
content,
|
|
72
|
+
createdAt: summary.createdAt,
|
|
73
|
+
updatedAt: summary.updatedAt
|
|
74
|
+
});
|
|
75
|
+
return {
|
|
76
|
+
summary,
|
|
77
|
+
content,
|
|
78
|
+
document
|
|
79
|
+
};
|
|
80
|
+
}));
|
|
81
|
+
};
|
|
82
|
+
export const ensureCanonicalContextHub = async (vaultPath, contextTitle, agentId = sharedAgentId) => {
|
|
83
|
+
const hubTitle = hubTitleForContext(contextTitle);
|
|
84
|
+
const notes = await readNotes(vaultPath);
|
|
85
|
+
const existing = notes.find((note) => normalizeTitle(note.document.title) === normalizeTitle(hubTitle));
|
|
86
|
+
const hubPath = existing?.summary.relativePath ?? hubPathForContext(contextTitle, agentId);
|
|
87
|
+
if (existing) {
|
|
88
|
+
return {
|
|
89
|
+
created: false,
|
|
90
|
+
title: hubTitle,
|
|
91
|
+
path: hubPath
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
const path = await writeMarkdownFile(vaultPath, hubPath, buildHubContent(hubTitle, contextTitle, agentId));
|
|
95
|
+
return {
|
|
96
|
+
created: true,
|
|
97
|
+
title: hubTitle,
|
|
98
|
+
path
|
|
99
|
+
};
|
|
100
|
+
};
|
|
101
|
+
export const canonicalizeContextLinks = async (vaultPath, options = {}) => {
|
|
102
|
+
const agentId = options.agentId ? sanitizeAgentId(options.agentId) : undefined;
|
|
103
|
+
const createMissingHubs = options.createMissingHubs !== false;
|
|
104
|
+
const notes = await readNotes(vaultPath);
|
|
105
|
+
const scopedNotes = agentId ? notes.filter((note) => note.document.agentId === agentId) : notes;
|
|
106
|
+
const knownTitles = new Set(notes.map((note) => normalizeTitle(note.document.title)));
|
|
107
|
+
const entries = [];
|
|
108
|
+
const ensureHub = async (contextTitle, hubTitle, targetAgentId) => {
|
|
109
|
+
if (knownTitles.has(normalizeTitle(hubTitle))) {
|
|
110
|
+
return true;
|
|
111
|
+
}
|
|
112
|
+
const path = hubPathForContext(contextTitle, targetAgentId);
|
|
113
|
+
if (!createMissingHubs) {
|
|
114
|
+
entries.push({
|
|
115
|
+
path,
|
|
116
|
+
title: hubTitle,
|
|
117
|
+
context: contextTitle,
|
|
118
|
+
hubTitle,
|
|
119
|
+
changed: false,
|
|
120
|
+
reason: 'missing-hub'
|
|
121
|
+
});
|
|
122
|
+
return false;
|
|
123
|
+
}
|
|
124
|
+
knownTitles.add(normalizeTitle(hubTitle));
|
|
125
|
+
if (!options.dryRun) {
|
|
126
|
+
await writeMarkdownFile(vaultPath, path, buildHubContent(hubTitle, contextTitle, targetAgentId));
|
|
127
|
+
}
|
|
128
|
+
entries.push({
|
|
129
|
+
path,
|
|
130
|
+
title: hubTitle,
|
|
131
|
+
context: contextTitle,
|
|
132
|
+
hubTitle,
|
|
133
|
+
changed: true,
|
|
134
|
+
reason: 'created-hub'
|
|
135
|
+
});
|
|
136
|
+
return true;
|
|
137
|
+
};
|
|
138
|
+
for (const note of scopedNotes) {
|
|
139
|
+
const context = inferVisualGraphContext(note.document);
|
|
140
|
+
const hubTitle = hubTitleForContext(context.title);
|
|
141
|
+
const isHub = normalizeTitle(note.document.title) === normalizeTitle(hubTitle);
|
|
142
|
+
if (isHub) {
|
|
143
|
+
entries.push({
|
|
144
|
+
path: note.summary.relativePath,
|
|
145
|
+
title: note.document.title,
|
|
146
|
+
context: context.title,
|
|
147
|
+
hubTitle,
|
|
148
|
+
changed: false,
|
|
149
|
+
reason: 'hub-note'
|
|
150
|
+
});
|
|
151
|
+
continue;
|
|
152
|
+
}
|
|
153
|
+
const hubAvailable = await ensureHub(context.title, hubTitle, note.document.agentId || sharedAgentId);
|
|
154
|
+
if (!hubAvailable) {
|
|
155
|
+
continue;
|
|
156
|
+
}
|
|
157
|
+
if (hasCanonicalLink(note.content, hubTitle)) {
|
|
158
|
+
entries.push({
|
|
159
|
+
path: note.summary.relativePath,
|
|
160
|
+
title: note.document.title,
|
|
161
|
+
context: context.title,
|
|
162
|
+
hubTitle,
|
|
163
|
+
changed: false,
|
|
164
|
+
reason: 'already-linked'
|
|
165
|
+
});
|
|
166
|
+
continue;
|
|
167
|
+
}
|
|
168
|
+
const nextContent = upsertCanonicalContextLink(note.content, hubTitle);
|
|
169
|
+
if (!options.dryRun) {
|
|
170
|
+
await writeMarkdownFile(vaultPath, note.summary.relativePath, nextContent);
|
|
171
|
+
}
|
|
172
|
+
entries.push({
|
|
173
|
+
path: note.summary.relativePath,
|
|
174
|
+
title: note.document.title,
|
|
175
|
+
context: context.title,
|
|
176
|
+
hubTitle,
|
|
177
|
+
changed: true,
|
|
178
|
+
reason: 'added-context-link'
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
const changed = entries.filter((entry) => entry.changed).length;
|
|
182
|
+
const createdHubs = entries.filter((entry) => entry.reason === 'created-hub' && entry.changed).length;
|
|
183
|
+
return {
|
|
184
|
+
dryRun: options.dryRun === true,
|
|
185
|
+
scanned: scopedNotes.length,
|
|
186
|
+
changed,
|
|
187
|
+
createdHubs,
|
|
188
|
+
skipped: entries.length - changed,
|
|
189
|
+
entries
|
|
190
|
+
};
|
|
191
|
+
};
|
|
192
|
+
export const addCanonicalContextLinkToContent = (title, content) => {
|
|
193
|
+
const context = inferVisualGraphContext({
|
|
194
|
+
id: '',
|
|
195
|
+
agentId: sharedAgentId,
|
|
196
|
+
title,
|
|
197
|
+
path: '',
|
|
198
|
+
content,
|
|
199
|
+
tags: [],
|
|
200
|
+
});
|
|
201
|
+
const hubTitle = hubTitleForContext(context.title);
|
|
202
|
+
const nextContent = normalizeTitle(title) === normalizeTitle(hubTitle) ? content : upsertCanonicalContextLink(content, hubTitle);
|
|
203
|
+
return {
|
|
204
|
+
content: nextContent,
|
|
205
|
+
context: context.title,
|
|
206
|
+
hubTitle,
|
|
207
|
+
changed: nextContent !== content
|
|
208
|
+
};
|
|
209
|
+
};
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import { createEmbeddingBuckets, createLocalEmbedding, cosineSimilarity } from '../domain/embeddings.js';
|
|
3
|
+
import { parseMarkdownDocument } from '../domain/markdown.js';
|
|
4
|
+
import { writeMarkdownFile, ensureVault, readMarkdownFiles } from '../infrastructure/file-system-vault.js';
|
|
5
|
+
import { indexVault } from './index-vault.js';
|
|
6
|
+
const tokenPattern = /[\p{L}\p{N}_-]+/gu;
|
|
7
|
+
const frontmatterPattern = /^---\n[\s\S]*?\n---\n?/m;
|
|
8
|
+
const rootHeadingPattern = /^#\s+.+\n+/m;
|
|
9
|
+
const maxCandidatesPerBucket = 240;
|
|
10
|
+
const normalizePath = (path) => path.replaceAll('\\', '/').replace(/^\.\//, '');
|
|
11
|
+
const toComparableBody = (content) => content
|
|
12
|
+
.replace(frontmatterPattern, '')
|
|
13
|
+
.replace(rootHeadingPattern, '')
|
|
14
|
+
.replaceAll('\r\n', '\n')
|
|
15
|
+
.trim();
|
|
16
|
+
const normalizeStrictContent = (content) => toComparableBody(content);
|
|
17
|
+
const normalizeSemanticContent = (content) => toComparableBody(content)
|
|
18
|
+
.replace(/\s+/g, ' ')
|
|
19
|
+
.trim();
|
|
20
|
+
const toHash = (value) => createHash('sha256').update(value, 'utf8').digest('hex');
|
|
21
|
+
const toCandidateId = (leftPath, rightPath) => [normalizePath(leftPath), normalizePath(rightPath)].sort((left, right) => left.localeCompare(right)).join('|');
|
|
22
|
+
const hasSharedTokens = (left, right) => {
|
|
23
|
+
const leftTokens = new Set((left.match(tokenPattern) ?? []).map((token) => token.toLowerCase()).filter((token) => token.length > 2));
|
|
24
|
+
const rightTokens = new Set((right.match(tokenPattern) ?? []).map((token) => token.toLowerCase()).filter((token) => token.length > 2));
|
|
25
|
+
if (leftTokens.size === 0 || rightTokens.size === 0) {
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
for (const token of leftTokens) {
|
|
29
|
+
if (rightTokens.has(token)) {
|
|
30
|
+
return true;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return false;
|
|
34
|
+
};
|
|
35
|
+
const relatedMarker = (targetTitle) => `Related: [[${targetTitle}]] priority: low #related-to`;
|
|
36
|
+
const ensureRelatedEdgeLine = (content, targetTitle) => {
|
|
37
|
+
const linkPattern = new RegExp(`\\[\\[\\s*${targetTitle.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\s*(?:[\\]|#])?`, 'i');
|
|
38
|
+
if (linkPattern.test(content)) {
|
|
39
|
+
return content;
|
|
40
|
+
}
|
|
41
|
+
const trimmed = content.trimEnd();
|
|
42
|
+
return `${trimmed}\n\n${relatedMarker(targetTitle)}\n`;
|
|
43
|
+
};
|
|
44
|
+
const ensureMergedMarker = (content, targetTitle) => {
|
|
45
|
+
const marker = `Merged into [[${targetTitle}]]`;
|
|
46
|
+
if (content.includes(marker)) {
|
|
47
|
+
return content;
|
|
48
|
+
}
|
|
49
|
+
return `${content.trimEnd()}\n\n${marker} priority: low #related-to\n`;
|
|
50
|
+
};
|
|
51
|
+
const appendMergedContent = (baseContent, mergedTitle, mergedContent) => {
|
|
52
|
+
const marker = `## Merged Memory From [[${mergedTitle}]]`;
|
|
53
|
+
if (baseContent.includes(marker)) {
|
|
54
|
+
return baseContent;
|
|
55
|
+
}
|
|
56
|
+
const mergedBody = normalizeSemanticContent(mergedContent);
|
|
57
|
+
return `${baseContent.trimEnd()}\n\n${marker}\n\n${mergedBody}\n`;
|
|
58
|
+
};
|
|
59
|
+
const loadNoteRecords = async (vaultPath, agentId) => {
|
|
60
|
+
const absoluteVaultPath = await ensureVault(vaultPath);
|
|
61
|
+
const files = await readMarkdownFiles(vaultPath);
|
|
62
|
+
return files
|
|
63
|
+
.map((file) => {
|
|
64
|
+
const parsed = parseMarkdownDocument({
|
|
65
|
+
absolutePath: file.absolutePath,
|
|
66
|
+
vaultPath: absoluteVaultPath,
|
|
67
|
+
content: file.content,
|
|
68
|
+
createdAt: file.createdAt,
|
|
69
|
+
updatedAt: file.updatedAt
|
|
70
|
+
});
|
|
71
|
+
const strict = normalizeStrictContent(parsed.content);
|
|
72
|
+
const semantic = normalizeSemanticContent(parsed.content);
|
|
73
|
+
const embedding = createLocalEmbedding(`${parsed.title}\n${semantic}`);
|
|
74
|
+
return {
|
|
75
|
+
title: parsed.title,
|
|
76
|
+
path: normalizePath(parsed.path),
|
|
77
|
+
agentId: parsed.agentId,
|
|
78
|
+
content: parsed.content,
|
|
79
|
+
normalizedStrictContent: strict,
|
|
80
|
+
semanticContent: semantic,
|
|
81
|
+
embedding,
|
|
82
|
+
buckets: createEmbeddingBuckets(embedding, 20)
|
|
83
|
+
};
|
|
84
|
+
})
|
|
85
|
+
.filter((record) => (agentId ? record.agentId === agentId : true));
|
|
86
|
+
};
|
|
87
|
+
const pairToCandidate = (left, right, kind, score, reason) => ({
|
|
88
|
+
id: toCandidateId(left.path, right.path),
|
|
89
|
+
possibleDuplicate: true,
|
|
90
|
+
kind,
|
|
91
|
+
score: Number(score.toFixed(4)),
|
|
92
|
+
left: {
|
|
93
|
+
title: left.title,
|
|
94
|
+
path: left.path,
|
|
95
|
+
agentId: left.agentId
|
|
96
|
+
},
|
|
97
|
+
right: {
|
|
98
|
+
title: right.title,
|
|
99
|
+
path: right.path,
|
|
100
|
+
agentId: right.agentId
|
|
101
|
+
},
|
|
102
|
+
reason
|
|
103
|
+
});
|
|
104
|
+
const indexCandidatePairs = (notes) => {
|
|
105
|
+
const bucketMap = new Map();
|
|
106
|
+
notes.forEach((note, index) => {
|
|
107
|
+
note.buckets.forEach((bucket) => {
|
|
108
|
+
const current = bucketMap.get(bucket) ?? [];
|
|
109
|
+
if (current.length < maxCandidatesPerBucket) {
|
|
110
|
+
current.push(index);
|
|
111
|
+
bucketMap.set(bucket, current);
|
|
112
|
+
}
|
|
113
|
+
});
|
|
114
|
+
});
|
|
115
|
+
const pairKeys = new Set();
|
|
116
|
+
const pairs = [];
|
|
117
|
+
bucketMap.forEach((indexes) => {
|
|
118
|
+
for (let leftIndex = 0; leftIndex < indexes.length; leftIndex += 1) {
|
|
119
|
+
for (let rightIndex = leftIndex + 1; rightIndex < indexes.length; rightIndex += 1) {
|
|
120
|
+
const left = Math.min(indexes[leftIndex] ?? 0, indexes[rightIndex] ?? 0);
|
|
121
|
+
const right = Math.max(indexes[leftIndex] ?? 0, indexes[rightIndex] ?? 0);
|
|
122
|
+
const key = `${left}|${right}`;
|
|
123
|
+
if (!pairKeys.has(key)) {
|
|
124
|
+
pairKeys.add(key);
|
|
125
|
+
pairs.push([left, right]);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
return pairs;
|
|
131
|
+
};
|
|
132
|
+
export const scanDuplicateNotes = async (vaultPath, options = {}) => {
|
|
133
|
+
const notes = await loadNoteRecords(vaultPath, options.agentId);
|
|
134
|
+
if (notes.length < 2) {
|
|
135
|
+
return [];
|
|
136
|
+
}
|
|
137
|
+
const minSemanticScore = options.minSemanticScore ?? 0.92;
|
|
138
|
+
const includeSemantic = options.includeSemantic !== false;
|
|
139
|
+
const seen = new Map();
|
|
140
|
+
const byHash = notes.reduce((state, note) => {
|
|
141
|
+
const key = toHash(note.normalizedStrictContent);
|
|
142
|
+
const current = state.get(key) ?? [];
|
|
143
|
+
current.push(note);
|
|
144
|
+
state.set(key, current);
|
|
145
|
+
return state;
|
|
146
|
+
}, new Map());
|
|
147
|
+
byHash.forEach((group) => {
|
|
148
|
+
if (group.length < 2) {
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
const [base, ...rest] = group.sort((left, right) => left.path.localeCompare(right.path));
|
|
152
|
+
rest.forEach((note) => {
|
|
153
|
+
const candidate = pairToCandidate(base, note, 'exact', 1, 'Exact content hash match');
|
|
154
|
+
seen.set(candidate.id, candidate);
|
|
155
|
+
});
|
|
156
|
+
});
|
|
157
|
+
if (includeSemantic) {
|
|
158
|
+
const pairs = indexCandidatePairs(notes);
|
|
159
|
+
pairs.forEach(([leftIndex, rightIndex]) => {
|
|
160
|
+
const left = notes[leftIndex];
|
|
161
|
+
const right = notes[rightIndex];
|
|
162
|
+
if (!left || !right || left.path === right.path) {
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
const id = toCandidateId(left.path, right.path);
|
|
166
|
+
if (seen.has(id)) {
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
const score = cosineSimilarity(left.embedding, right.embedding);
|
|
170
|
+
const titleShared = hasSharedTokens(left.title, right.title);
|
|
171
|
+
const contentShared = hasSharedTokens(left.semanticContent, right.semanticContent);
|
|
172
|
+
if (score >= minSemanticScore && (titleShared || contentShared || score >= 0.975)) {
|
|
173
|
+
const candidate = pairToCandidate(left, right, 'semantic', score, 'High semantic similarity');
|
|
174
|
+
seen.set(id, candidate);
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
const focusPath = options.focusPath ? normalizePath(options.focusPath) : undefined;
|
|
179
|
+
const limited = Array.from(seen.values())
|
|
180
|
+
.filter((item) => (focusPath ? item.left.path === focusPath || item.right.path === focusPath : true))
|
|
181
|
+
.sort((left, right) => right.score - left.score || left.left.path.localeCompare(right.left.path))
|
|
182
|
+
.slice(0, Math.max(1, options.limit ?? 25));
|
|
183
|
+
return limited;
|
|
184
|
+
};
|
|
185
|
+
export const resolveDuplicateNotes = async (vaultPath, options) => {
|
|
186
|
+
const leftPath = normalizePath(options.leftPath);
|
|
187
|
+
const rightPath = normalizePath(options.rightPath);
|
|
188
|
+
if (leftPath === rightPath) {
|
|
189
|
+
throw new Error('leftPath and rightPath must be different notes.');
|
|
190
|
+
}
|
|
191
|
+
const notes = await loadNoteRecords(vaultPath);
|
|
192
|
+
const byPath = new Map(notes.map((note) => [note.path, note]));
|
|
193
|
+
const left = byPath.get(leftPath);
|
|
194
|
+
const right = byPath.get(rightPath);
|
|
195
|
+
if (!left || !right) {
|
|
196
|
+
throw new Error(`Duplicate resolution paths were not found in vault index source: ${leftPath}, ${rightPath}`);
|
|
197
|
+
}
|
|
198
|
+
const updates = new Map();
|
|
199
|
+
const leftRelated = ensureRelatedEdgeLine(left.content, right.title);
|
|
200
|
+
const rightRelated = ensureRelatedEdgeLine(right.content, left.title);
|
|
201
|
+
if (options.action === 'link') {
|
|
202
|
+
updates.set(left.path, leftRelated);
|
|
203
|
+
updates.set(right.path, rightRelated);
|
|
204
|
+
}
|
|
205
|
+
else if (options.action === 'ignore') {
|
|
206
|
+
updates.set(left.path, leftRelated);
|
|
207
|
+
}
|
|
208
|
+
else {
|
|
209
|
+
const mergedLeft = appendMergedContent(leftRelated, right.title, right.content);
|
|
210
|
+
const mergedRight = ensureMergedMarker(rightRelated, left.title);
|
|
211
|
+
updates.set(left.path, mergedLeft);
|
|
212
|
+
updates.set(right.path, mergedRight);
|
|
213
|
+
}
|
|
214
|
+
for (const [path, content] of updates) {
|
|
215
|
+
await writeMarkdownFile(vaultPath, path, content);
|
|
216
|
+
}
|
|
217
|
+
const shouldIndex = options.autoIndex !== false;
|
|
218
|
+
const index = shouldIndex ? await indexVault(vaultPath) : undefined;
|
|
219
|
+
return {
|
|
220
|
+
action: options.action,
|
|
221
|
+
leftPath,
|
|
222
|
+
rightPath,
|
|
223
|
+
updatedPaths: Array.from(updates.keys()).sort((leftValue, rightValue) => leftValue.localeCompare(rightValue)),
|
|
224
|
+
...(index ? { index } : {})
|
|
225
|
+
};
|
|
226
|
+
};
|