@andespindola/brainlink 0.1.0-beta.8 → 0.1.0-beta.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +8 -5
- package/CHANGELOG.md +58 -2
- package/CONTRIBUTING.md +2 -2
- package/COPYRIGHT.md +5 -0
- package/README.md +266 -20
- package/SECURITY.md +1 -1
- package/dist/application/add-note.js +62 -13
- package/dist/application/analyze-vault.js +95 -8
- package/dist/application/build-context.js +56 -1
- package/dist/application/dedupe-notes.js +226 -0
- package/dist/application/frontend/client-css.js +138 -103
- package/dist/application/frontend/client-html.js +47 -41
- package/dist/application/frontend/client-js.js +2449 -156
- package/dist/application/frontend/client-worker-js.js +66 -0
- package/dist/application/get-graph-layout.js +18 -6
- package/dist/application/get-graph-node.js +12 -0
- package/dist/application/get-graph-summary.js +12 -0
- package/dist/application/get-graph.js +3 -3
- package/dist/application/import-legacy-sqlite.js +296 -0
- package/dist/application/index-vault.js +252 -19
- package/dist/application/list-agents.js +3 -3
- package/dist/application/list-links.js +5 -5
- package/dist/application/migrate-vault.js +46 -16
- package/dist/application/offline-pack-backup.js +44 -0
- package/dist/application/search-graph-node-ids.js +12 -0
- package/dist/application/search-knowledge.js +75 -5
- package/dist/application/server/routes.js +102 -1
- package/dist/application/start-server.js +75 -4
- package/dist/application/watch-vault.js +23 -2
- package/dist/benchmarks/large-vault.js +1 -1
- package/dist/cli/commands/agent-commands.js +419 -0
- package/dist/cli/commands/config-commands.js +167 -0
- package/dist/cli/commands/read-commands.js +25 -8
- package/dist/cli/commands/write-commands.js +973 -10
- package/dist/cli/main.js +4 -0
- package/dist/cli/runtime.js +5 -2
- package/dist/domain/context.js +53 -11
- package/dist/domain/embeddings.js +2 -1
- package/dist/domain/graph-layout.js +67 -16
- package/dist/domain/markdown.js +36 -4
- package/dist/domain/middle-out.js +18 -0
- package/dist/infrastructure/config.js +132 -8
- package/dist/infrastructure/file-index.js +358 -0
- package/dist/infrastructure/file-system-vault.js +15 -0
- package/dist/infrastructure/index-state.js +56 -0
- package/dist/infrastructure/paths.js +9 -1
- package/dist/infrastructure/private-pack-codec.js +134 -0
- package/dist/infrastructure/search-packs.js +452 -0
- package/dist/infrastructure/session-state.js +172 -0
- package/dist/mcp/main.js +11 -3
- package/dist/mcp/server.js +27 -2
- package/dist/mcp/startup.js +35 -0
- package/dist/mcp/tools.js +633 -19
- package/docs/AGENT_USAGE.md +177 -15
- package/docs/ARCHITECTURE.md +37 -26
- package/docs/QUICKSTART.md +111 -0
- package/package.json +6 -4
- package/dist/infrastructure/sqlite/document-writer.js +0 -51
- package/dist/infrastructure/sqlite/graph-reader.js +0 -120
- package/dist/infrastructure/sqlite/schema.js +0 -111
- package/dist/infrastructure/sqlite/search-reader.js +0 -156
- package/dist/infrastructure/sqlite/types.js +0 -1
- package/dist/infrastructure/sqlite-index.js +0 -25
|
@@ -1,30 +1,79 @@
|
|
|
1
|
+
import { access } from 'node:fs/promises';
|
|
2
|
+
import { join } from 'node:path';
|
|
1
3
|
import { writeMarkdownFile } from '../infrastructure/file-system-vault.js';
|
|
2
4
|
import { sanitizeAgentId, sharedAgentId } from '../domain/agents.js';
|
|
5
|
+
import { extractWikiLinks } from '../domain/markdown.js';
|
|
3
6
|
import { validateNoteInput } from '../domain/note-safety.js';
|
|
7
|
+
import { ensureVault } from '../infrastructure/file-system-vault.js';
|
|
4
8
|
const slugify = (title) => title
|
|
5
9
|
.normalize('NFKD')
|
|
6
10
|
.replace(/[\u0300-\u036f]/g, '')
|
|
7
11
|
.toLowerCase()
|
|
8
12
|
.replace(/[^a-z0-9]+/g, '-')
|
|
9
13
|
.replace(/^-+|-+$/g, '');
|
|
10
|
-
|
|
14
|
+
const systemHubTitle = 'Memory Hub';
|
|
15
|
+
const systemRootTitle = 'Knowledge Root';
|
|
16
|
+
const normalizeTitle = (title) => title.trim().replace(/\.md$/i, '').toLowerCase();
|
|
17
|
+
const noteFilename = (agentId, title) => `agents/${agentId}/${slugify(title) || 'untitled'}.md`;
|
|
18
|
+
const buildNote = (title, content, agentId) => [
|
|
19
|
+
`---`,
|
|
20
|
+
`title: "${title.replaceAll('"', '\\"')}"`,
|
|
21
|
+
`agent: "${agentId}"`,
|
|
22
|
+
`---`,
|
|
23
|
+
'',
|
|
24
|
+
`# ${title}`,
|
|
25
|
+
'',
|
|
26
|
+
content.trim(),
|
|
27
|
+
''
|
|
28
|
+
].join('\n');
|
|
29
|
+
const ensureSystemNote = async (vaultPath, absoluteVaultPath, agentId, title, content) => {
|
|
30
|
+
const filename = noteFilename(agentId, title);
|
|
31
|
+
const absolutePath = join(absoluteVaultPath, filename);
|
|
32
|
+
try {
|
|
33
|
+
await access(absolutePath);
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
catch { }
|
|
37
|
+
await writeMarkdownFile(vaultPath, filename, buildNote(title, content, agentId));
|
|
38
|
+
};
|
|
39
|
+
const ensureNonOrphanContent = async (vaultPath, absoluteVaultPath, title, content, agentId) => {
|
|
40
|
+
const links = extractWikiLinks(content).filter((link) => normalizeTitle(link) !== normalizeTitle(title));
|
|
41
|
+
if (links.length > 0) {
|
|
42
|
+
return {
|
|
43
|
+
content: content.trim(),
|
|
44
|
+
autoLinked: false,
|
|
45
|
+
linkTarget: null
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
const fallbackTitle = normalizeTitle(title) === normalizeTitle(systemHubTitle) ? systemRootTitle : systemHubTitle;
|
|
49
|
+
if (fallbackTitle === systemRootTitle) {
|
|
50
|
+
await ensureSystemNote(vaultPath, absoluteVaultPath, agentId, systemRootTitle, `Entry point for agent memory. [[${systemHubTitle}]] #memory #root`);
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
await ensureSystemNote(vaultPath, absoluteVaultPath, agentId, systemHubTitle, 'Central memory index for this agent namespace. #memory #hub');
|
|
54
|
+
}
|
|
55
|
+
return {
|
|
56
|
+
content: `${content.trim()}\n\nRelated: [[${fallbackTitle}]]`,
|
|
57
|
+
autoLinked: true,
|
|
58
|
+
linkTarget: fallbackTitle
|
|
59
|
+
};
|
|
60
|
+
};
|
|
61
|
+
export const addNoteWithMetadata = async (vaultPath, title, content, agentId = sharedAgentId, options = {}) => {
|
|
11
62
|
validateNoteInput({
|
|
12
63
|
title,
|
|
13
64
|
content,
|
|
14
65
|
allowSensitive: options.allowSensitive
|
|
15
66
|
});
|
|
16
67
|
const sanitizedAgentId = sanitizeAgentId(agentId);
|
|
68
|
+
const absoluteVaultPath = await ensureVault(vaultPath);
|
|
17
69
|
const filename = `agents/${sanitizedAgentId}/${slugify(title) || 'untitled'}.md`;
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
content.trim(),
|
|
27
|
-
''
|
|
28
|
-
].join('\n');
|
|
29
|
-
return writeMarkdownFile(vaultPath, filename, note);
|
|
70
|
+
const linkedContent = await ensureNonOrphanContent(vaultPath, absoluteVaultPath, title, content, sanitizedAgentId);
|
|
71
|
+
const note = buildNote(title, linkedContent.content, sanitizedAgentId);
|
|
72
|
+
const path = await writeMarkdownFile(vaultPath, filename, note);
|
|
73
|
+
return {
|
|
74
|
+
path,
|
|
75
|
+
autoLinked: linkedContent.autoLinked,
|
|
76
|
+
linkTarget: linkedContent.linkTarget
|
|
77
|
+
};
|
|
30
78
|
};
|
|
79
|
+
export const addNote = async (vaultPath, title, content, agentId = sharedAgentId, options = {}) => (await addNoteWithMetadata(vaultPath, title, content, agentId, options)).path;
|
|
@@ -1,10 +1,89 @@
|
|
|
1
|
+
import { stat } from 'node:fs/promises';
|
|
2
|
+
import { performance } from 'node:perf_hooks';
|
|
1
3
|
import { validateGraph, getBrokenLinks, getOrphanNodes, getVaultStats } from '../domain/graph-analysis.js';
|
|
2
|
-
import { ensureVault, readMarkdownFiles } from '../infrastructure/file-system-vault.js';
|
|
3
|
-
import {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
4
|
+
import { ensureVault, listVaultFiles, readMarkdownFiles } from '../infrastructure/file-system-vault.js';
|
|
5
|
+
import { resolveAgentRuntimeDefaults } from '../infrastructure/config.js';
|
|
6
|
+
import { getGraphSummary } from './get-graph-summary.js';
|
|
7
|
+
import { buildContextPackage } from './build-context.js';
|
|
8
|
+
import { indexVault } from './index-vault.js';
|
|
9
|
+
import { searchKnowledge } from './search-knowledge.js';
|
|
10
|
+
import { loadBrainlinkConfig } from '../infrastructure/config.js';
|
|
11
|
+
export const getStats = async (vaultPath, agentId) => getVaultStats(await getGraphSummary(vaultPath, agentId));
|
|
12
|
+
export const getBrokenLinksReport = async (vaultPath, agentId) => getBrokenLinks(await getGraphSummary(vaultPath, agentId));
|
|
13
|
+
export const getOrphansReport = async (vaultPath, agentId) => getOrphanNodes(await getGraphSummary(vaultPath, agentId));
|
|
14
|
+
export const validateVault = async (vaultPath, agentId) => validateGraph(await getGraphSummary(vaultPath, agentId));
|
|
15
|
+
const toRatio = (part, total) => total === 0 ? 0 : Number((part / total).toFixed(4));
|
|
16
|
+
export const getExtendedStats = async (vaultPath, agentId) => {
|
|
17
|
+
const absoluteVaultPath = await ensureVault(vaultPath);
|
|
18
|
+
const graph = await getGraphSummary(absoluteVaultPath, agentId);
|
|
19
|
+
const stats = getVaultStats(graph);
|
|
20
|
+
const markdownFiles = await readMarkdownFiles(absoluteVaultPath);
|
|
21
|
+
const allFiles = await listVaultFiles(absoluteVaultPath);
|
|
22
|
+
const totalBytes = (await Promise.all(allFiles.map(async (filePath) => {
|
|
23
|
+
try {
|
|
24
|
+
return (await stat(filePath)).size;
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
return 0;
|
|
28
|
+
}
|
|
29
|
+
}))).reduce((sum, value) => sum + value, 0);
|
|
30
|
+
const updatedAt = markdownFiles
|
|
31
|
+
.map((file) => file.updatedAt.getTime())
|
|
32
|
+
.filter((time) => Number.isFinite(time))
|
|
33
|
+
.sort((left, right) => left - right);
|
|
34
|
+
const priorities = graph.edges.reduce((state, edge) => ({
|
|
35
|
+
...state,
|
|
36
|
+
[edge.priority]: state[edge.priority] + 1
|
|
37
|
+
}), {
|
|
38
|
+
low: 0,
|
|
39
|
+
normal: 0,
|
|
40
|
+
high: 0,
|
|
41
|
+
critical: 0
|
|
42
|
+
});
|
|
43
|
+
const config = await loadBrainlinkConfig();
|
|
44
|
+
const defaults = resolveAgentRuntimeDefaults(config, agentId);
|
|
45
|
+
const probeQuery = graph.nodes[0]?.title ?? 'architecture';
|
|
46
|
+
const indexStart = performance.now();
|
|
47
|
+
await indexVault(absoluteVaultPath);
|
|
48
|
+
const indexLatency = performance.now() - indexStart;
|
|
49
|
+
const searchStart = performance.now();
|
|
50
|
+
await searchKnowledge(absoluteVaultPath, probeQuery, Math.min(defaults.defaultSearchLimit, 8), agentId, 'hybrid');
|
|
51
|
+
const searchLatency = performance.now() - searchStart;
|
|
52
|
+
const contextStart = performance.now();
|
|
53
|
+
await buildContextPackage(absoluteVaultPath, probeQuery, Math.min(defaults.defaultSearchLimit, 8), defaults.defaultContextTokens, agentId, 'hybrid');
|
|
54
|
+
const contextLatency = performance.now() - contextStart;
|
|
55
|
+
return {
|
|
56
|
+
stats,
|
|
57
|
+
storage: {
|
|
58
|
+
markdownFileCount: markdownFiles.length,
|
|
59
|
+
totalFileCount: allFiles.length,
|
|
60
|
+
totalBytes,
|
|
61
|
+
averageMarkdownBytes: markdownFiles.length === 0
|
|
62
|
+
? 0
|
|
63
|
+
: Math.round(markdownFiles.reduce((sum, file) => sum + Buffer.byteLength(file.content, 'utf8'), 0) / markdownFiles.length),
|
|
64
|
+
...(updatedAt.length > 0
|
|
65
|
+
? {
|
|
66
|
+
oldestNoteUpdatedAt: new Date(updatedAt[0]).toISOString(),
|
|
67
|
+
newestNoteUpdatedAt: new Date(updatedAt[updatedAt.length - 1]).toISOString()
|
|
68
|
+
}
|
|
69
|
+
: {})
|
|
70
|
+
},
|
|
71
|
+
quality: {
|
|
72
|
+
resolvedLinkRatio: toRatio(stats.resolvedLinkCount, stats.linkCount),
|
|
73
|
+
brokenLinkRatio: toRatio(stats.brokenLinkCount, stats.linkCount),
|
|
74
|
+
orphanRatio: toRatio(stats.orphanCount, Math.max(stats.documentCount, 1)),
|
|
75
|
+
priorityDistribution: priorities
|
|
76
|
+
},
|
|
77
|
+
observability: {
|
|
78
|
+
probeQuery,
|
|
79
|
+
latenciesMs: {
|
|
80
|
+
index: Number(indexLatency.toFixed(2)),
|
|
81
|
+
search: Number(searchLatency.toFixed(2)),
|
|
82
|
+
context: Number(contextLatency.toFixed(2))
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
};
|
|
86
|
+
};
|
|
8
87
|
const createCheck = (name, ok, message) => ({
|
|
9
88
|
name,
|
|
10
89
|
ok,
|
|
@@ -13,7 +92,7 @@ const createCheck = (name, ok, message) => ({
|
|
|
13
92
|
export const doctorVault = async (vaultPath) => {
|
|
14
93
|
const absoluteVaultPath = await ensureVault(vaultPath);
|
|
15
94
|
const files = await readMarkdownFiles(absoluteVaultPath);
|
|
16
|
-
const graph = await
|
|
95
|
+
const graph = await getGraphSummary(absoluteVaultPath);
|
|
17
96
|
const validation = validateGraph(graph);
|
|
18
97
|
const checks = [
|
|
19
98
|
createCheck('vault', true, `Vault ready at ${absoluteVaultPath}`),
|
|
@@ -21,8 +100,16 @@ export const doctorVault = async (vaultPath) => {
|
|
|
21
100
|
createCheck('index', graph.nodes.length > 0, `${graph.nodes.length} indexed documents found`),
|
|
22
101
|
createCheck('broken-links', validation.brokenLinks.length === 0, `${validation.brokenLinks.length} broken links found`)
|
|
23
102
|
];
|
|
103
|
+
const recommendations = files.length === 0 && graph.nodes.length === 0
|
|
104
|
+
? [
|
|
105
|
+
`Vault is empty. Add your first note: blink add "Architecture" --vault "${absoluteVaultPath}" --content "Markdown source of truth. #architecture"`,
|
|
106
|
+
`If this path is not the expected vault, inspect active config: blink config where`,
|
|
107
|
+
`If you changed vault recently, migrate existing memory: blink migrate-vault --from ~/.brainlink/vault --to "${absoluteVaultPath}"`
|
|
108
|
+
]
|
|
109
|
+
: [];
|
|
24
110
|
return {
|
|
25
111
|
ok: checks.every((check) => check.ok),
|
|
26
|
-
checks
|
|
112
|
+
checks,
|
|
113
|
+
...(recommendations.length > 0 ? { recommendations } : {})
|
|
27
114
|
};
|
|
28
115
|
};
|
|
@@ -1,13 +1,68 @@
|
|
|
1
|
+
import { stat } from 'node:fs/promises';
|
|
1
2
|
import { formatContextPackage, selectContextSections } from '../domain/context.js';
|
|
3
|
+
import { indexStoragePath } from '../infrastructure/file-index.js';
|
|
2
4
|
import { searchKnowledge } from './search-knowledge.js';
|
|
5
|
+
const contextCacheTtlMs = 45_000;
|
|
6
|
+
const contextCacheMaxEntries = 200;
|
|
7
|
+
const contextCache = new Map();
|
|
8
|
+
const readIndexMtimeMs = async (vaultPath) => {
|
|
9
|
+
try {
|
|
10
|
+
return (await stat(indexStoragePath(vaultPath))).mtimeMs;
|
|
11
|
+
}
|
|
12
|
+
catch {
|
|
13
|
+
return 0;
|
|
14
|
+
}
|
|
15
|
+
};
|
|
16
|
+
const toCacheKey = (vaultPath, query, limit, maxTokens, agentId, mode) => JSON.stringify({
|
|
17
|
+
vaultPath,
|
|
18
|
+
query: query.trim().toLowerCase(),
|
|
19
|
+
limit,
|
|
20
|
+
maxTokens,
|
|
21
|
+
agentId: agentId?.trim().toLowerCase() ?? '*',
|
|
22
|
+
mode: mode ?? 'default'
|
|
23
|
+
});
|
|
24
|
+
const contextCacheGet = (key, indexMtimeMs) => {
|
|
25
|
+
const entry = contextCache.get(key);
|
|
26
|
+
if (!entry) {
|
|
27
|
+
return undefined;
|
|
28
|
+
}
|
|
29
|
+
const fresh = Date.now() - entry.createdAt <= contextCacheTtlMs && entry.indexMtimeMs === indexMtimeMs;
|
|
30
|
+
if (!fresh) {
|
|
31
|
+
contextCache.delete(key);
|
|
32
|
+
return undefined;
|
|
33
|
+
}
|
|
34
|
+
return entry.context;
|
|
35
|
+
};
|
|
36
|
+
const contextCacheSet = (entry) => {
|
|
37
|
+
contextCache.set(entry.key, entry);
|
|
38
|
+
if (contextCache.size <= contextCacheMaxEntries) {
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
const overflow = contextCache.size - contextCacheMaxEntries;
|
|
42
|
+
const keys = Array.from(contextCache.keys()).slice(0, overflow);
|
|
43
|
+
keys.forEach((key) => contextCache.delete(key));
|
|
44
|
+
};
|
|
3
45
|
export const buildContextPackage = async (vaultPath, query, limit, maxTokens, agentId, mode) => {
|
|
46
|
+
const cacheKey = toCacheKey(vaultPath, query, limit, maxTokens, agentId, mode);
|
|
47
|
+
const indexMtimeMs = await readIndexMtimeMs(vaultPath);
|
|
48
|
+
const cached = contextCacheGet(cacheKey, indexMtimeMs);
|
|
49
|
+
if (cached) {
|
|
50
|
+
return cached;
|
|
51
|
+
}
|
|
4
52
|
const results = await searchKnowledge(vaultPath, query, limit, agentId, mode);
|
|
5
53
|
const sections = selectContextSections(results, maxTokens);
|
|
6
|
-
|
|
54
|
+
const context = {
|
|
7
55
|
query,
|
|
8
56
|
sections,
|
|
9
57
|
content: formatContextPackage(query, sections)
|
|
10
58
|
};
|
|
59
|
+
contextCacheSet({
|
|
60
|
+
key: cacheKey,
|
|
61
|
+
createdAt: Date.now(),
|
|
62
|
+
indexMtimeMs,
|
|
63
|
+
context
|
|
64
|
+
});
|
|
65
|
+
return context;
|
|
11
66
|
};
|
|
12
67
|
export const buildContext = async (vaultPath, query, limit, maxTokens, agentId, mode) => {
|
|
13
68
|
const contextPackage = await buildContextPackage(vaultPath, query, limit, maxTokens, agentId, mode);
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import { createEmbeddingBuckets, createLocalEmbedding, cosineSimilarity } from '../domain/embeddings.js';
|
|
3
|
+
import { parseMarkdownDocument } from '../domain/markdown.js';
|
|
4
|
+
import { writeMarkdownFile, ensureVault, readMarkdownFiles } from '../infrastructure/file-system-vault.js';
|
|
5
|
+
import { indexVault } from './index-vault.js';
|
|
6
|
+
const tokenPattern = /[\p{L}\p{N}_-]+/gu;
|
|
7
|
+
const frontmatterPattern = /^---\n[\s\S]*?\n---\n?/m;
|
|
8
|
+
const rootHeadingPattern = /^#\s+.+\n+/m;
|
|
9
|
+
const maxCandidatesPerBucket = 240;
|
|
10
|
+
const normalizePath = (path) => path.replaceAll('\\', '/').replace(/^\.\//, '');
|
|
11
|
+
const toComparableBody = (content) => content
|
|
12
|
+
.replace(frontmatterPattern, '')
|
|
13
|
+
.replace(rootHeadingPattern, '')
|
|
14
|
+
.replaceAll('\r\n', '\n')
|
|
15
|
+
.trim();
|
|
16
|
+
const normalizeStrictContent = (content) => toComparableBody(content);
|
|
17
|
+
const normalizeSemanticContent = (content) => toComparableBody(content)
|
|
18
|
+
.replace(/\s+/g, ' ')
|
|
19
|
+
.trim();
|
|
20
|
+
const toHash = (value) => createHash('sha256').update(value, 'utf8').digest('hex');
|
|
21
|
+
const toCandidateId = (leftPath, rightPath) => [normalizePath(leftPath), normalizePath(rightPath)].sort((left, right) => left.localeCompare(right)).join('|');
|
|
22
|
+
const hasSharedTokens = (left, right) => {
|
|
23
|
+
const leftTokens = new Set((left.match(tokenPattern) ?? []).map((token) => token.toLowerCase()).filter((token) => token.length > 2));
|
|
24
|
+
const rightTokens = new Set((right.match(tokenPattern) ?? []).map((token) => token.toLowerCase()).filter((token) => token.length > 2));
|
|
25
|
+
if (leftTokens.size === 0 || rightTokens.size === 0) {
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
for (const token of leftTokens) {
|
|
29
|
+
if (rightTokens.has(token)) {
|
|
30
|
+
return true;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return false;
|
|
34
|
+
};
|
|
35
|
+
const relatedMarker = (targetTitle) => `Related: [[${targetTitle}]] priority: low #related-to`;
|
|
36
|
+
const ensureRelatedEdgeLine = (content, targetTitle) => {
|
|
37
|
+
const linkPattern = new RegExp(`\\[\\[\\s*${targetTitle.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\s*(?:[\\]|#])?`, 'i');
|
|
38
|
+
if (linkPattern.test(content)) {
|
|
39
|
+
return content;
|
|
40
|
+
}
|
|
41
|
+
const trimmed = content.trimEnd();
|
|
42
|
+
return `${trimmed}\n\n${relatedMarker(targetTitle)}\n`;
|
|
43
|
+
};
|
|
44
|
+
const ensureMergedMarker = (content, targetTitle) => {
|
|
45
|
+
const marker = `Merged into [[${targetTitle}]]`;
|
|
46
|
+
if (content.includes(marker)) {
|
|
47
|
+
return content;
|
|
48
|
+
}
|
|
49
|
+
return `${content.trimEnd()}\n\n${marker} priority: low #related-to\n`;
|
|
50
|
+
};
|
|
51
|
+
const appendMergedContent = (baseContent, mergedTitle, mergedContent) => {
|
|
52
|
+
const marker = `## Merged Memory From [[${mergedTitle}]]`;
|
|
53
|
+
if (baseContent.includes(marker)) {
|
|
54
|
+
return baseContent;
|
|
55
|
+
}
|
|
56
|
+
const mergedBody = normalizeSemanticContent(mergedContent);
|
|
57
|
+
return `${baseContent.trimEnd()}\n\n${marker}\n\n${mergedBody}\n`;
|
|
58
|
+
};
|
|
59
|
+
const loadNoteRecords = async (vaultPath, agentId) => {
|
|
60
|
+
const absoluteVaultPath = await ensureVault(vaultPath);
|
|
61
|
+
const files = await readMarkdownFiles(vaultPath);
|
|
62
|
+
return files
|
|
63
|
+
.map((file) => {
|
|
64
|
+
const parsed = parseMarkdownDocument({
|
|
65
|
+
absolutePath: file.absolutePath,
|
|
66
|
+
vaultPath: absoluteVaultPath,
|
|
67
|
+
content: file.content,
|
|
68
|
+
createdAt: file.createdAt,
|
|
69
|
+
updatedAt: file.updatedAt
|
|
70
|
+
});
|
|
71
|
+
const strict = normalizeStrictContent(parsed.content);
|
|
72
|
+
const semantic = normalizeSemanticContent(parsed.content);
|
|
73
|
+
const embedding = createLocalEmbedding(`${parsed.title}\n${semantic}`);
|
|
74
|
+
return {
|
|
75
|
+
title: parsed.title,
|
|
76
|
+
path: normalizePath(parsed.path),
|
|
77
|
+
agentId: parsed.agentId,
|
|
78
|
+
content: parsed.content,
|
|
79
|
+
normalizedStrictContent: strict,
|
|
80
|
+
semanticContent: semantic,
|
|
81
|
+
embedding,
|
|
82
|
+
buckets: createEmbeddingBuckets(embedding, 20)
|
|
83
|
+
};
|
|
84
|
+
})
|
|
85
|
+
.filter((record) => (agentId ? record.agentId === agentId : true));
|
|
86
|
+
};
|
|
87
|
+
const pairToCandidate = (left, right, kind, score, reason) => ({
|
|
88
|
+
id: toCandidateId(left.path, right.path),
|
|
89
|
+
possibleDuplicate: true,
|
|
90
|
+
kind,
|
|
91
|
+
score: Number(score.toFixed(4)),
|
|
92
|
+
left: {
|
|
93
|
+
title: left.title,
|
|
94
|
+
path: left.path,
|
|
95
|
+
agentId: left.agentId
|
|
96
|
+
},
|
|
97
|
+
right: {
|
|
98
|
+
title: right.title,
|
|
99
|
+
path: right.path,
|
|
100
|
+
agentId: right.agentId
|
|
101
|
+
},
|
|
102
|
+
reason
|
|
103
|
+
});
|
|
104
|
+
const indexCandidatePairs = (notes) => {
|
|
105
|
+
const bucketMap = new Map();
|
|
106
|
+
notes.forEach((note, index) => {
|
|
107
|
+
note.buckets.forEach((bucket) => {
|
|
108
|
+
const current = bucketMap.get(bucket) ?? [];
|
|
109
|
+
if (current.length < maxCandidatesPerBucket) {
|
|
110
|
+
current.push(index);
|
|
111
|
+
bucketMap.set(bucket, current);
|
|
112
|
+
}
|
|
113
|
+
});
|
|
114
|
+
});
|
|
115
|
+
const pairKeys = new Set();
|
|
116
|
+
const pairs = [];
|
|
117
|
+
bucketMap.forEach((indexes) => {
|
|
118
|
+
for (let leftIndex = 0; leftIndex < indexes.length; leftIndex += 1) {
|
|
119
|
+
for (let rightIndex = leftIndex + 1; rightIndex < indexes.length; rightIndex += 1) {
|
|
120
|
+
const left = Math.min(indexes[leftIndex] ?? 0, indexes[rightIndex] ?? 0);
|
|
121
|
+
const right = Math.max(indexes[leftIndex] ?? 0, indexes[rightIndex] ?? 0);
|
|
122
|
+
const key = `${left}|${right}`;
|
|
123
|
+
if (!pairKeys.has(key)) {
|
|
124
|
+
pairKeys.add(key);
|
|
125
|
+
pairs.push([left, right]);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
return pairs;
|
|
131
|
+
};
|
|
132
|
+
export const scanDuplicateNotes = async (vaultPath, options = {}) => {
|
|
133
|
+
const notes = await loadNoteRecords(vaultPath, options.agentId);
|
|
134
|
+
if (notes.length < 2) {
|
|
135
|
+
return [];
|
|
136
|
+
}
|
|
137
|
+
const minSemanticScore = options.minSemanticScore ?? 0.92;
|
|
138
|
+
const includeSemantic = options.includeSemantic !== false;
|
|
139
|
+
const seen = new Map();
|
|
140
|
+
const byHash = notes.reduce((state, note) => {
|
|
141
|
+
const key = toHash(note.normalizedStrictContent);
|
|
142
|
+
const current = state.get(key) ?? [];
|
|
143
|
+
current.push(note);
|
|
144
|
+
state.set(key, current);
|
|
145
|
+
return state;
|
|
146
|
+
}, new Map());
|
|
147
|
+
byHash.forEach((group) => {
|
|
148
|
+
if (group.length < 2) {
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
const [base, ...rest] = group.sort((left, right) => left.path.localeCompare(right.path));
|
|
152
|
+
rest.forEach((note) => {
|
|
153
|
+
const candidate = pairToCandidate(base, note, 'exact', 1, 'Exact content hash match');
|
|
154
|
+
seen.set(candidate.id, candidate);
|
|
155
|
+
});
|
|
156
|
+
});
|
|
157
|
+
if (includeSemantic) {
|
|
158
|
+
const pairs = indexCandidatePairs(notes);
|
|
159
|
+
pairs.forEach(([leftIndex, rightIndex]) => {
|
|
160
|
+
const left = notes[leftIndex];
|
|
161
|
+
const right = notes[rightIndex];
|
|
162
|
+
if (!left || !right || left.path === right.path) {
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
const id = toCandidateId(left.path, right.path);
|
|
166
|
+
if (seen.has(id)) {
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
const score = cosineSimilarity(left.embedding, right.embedding);
|
|
170
|
+
const titleShared = hasSharedTokens(left.title, right.title);
|
|
171
|
+
const contentShared = hasSharedTokens(left.semanticContent, right.semanticContent);
|
|
172
|
+
if (score >= minSemanticScore && (titleShared || contentShared || score >= 0.975)) {
|
|
173
|
+
const candidate = pairToCandidate(left, right, 'semantic', score, 'High semantic similarity');
|
|
174
|
+
seen.set(id, candidate);
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
const focusPath = options.focusPath ? normalizePath(options.focusPath) : undefined;
|
|
179
|
+
const limited = Array.from(seen.values())
|
|
180
|
+
.filter((item) => (focusPath ? item.left.path === focusPath || item.right.path === focusPath : true))
|
|
181
|
+
.sort((left, right) => right.score - left.score || left.left.path.localeCompare(right.left.path))
|
|
182
|
+
.slice(0, Math.max(1, options.limit ?? 25));
|
|
183
|
+
return limited;
|
|
184
|
+
};
|
|
185
|
+
export const resolveDuplicateNotes = async (vaultPath, options) => {
|
|
186
|
+
const leftPath = normalizePath(options.leftPath);
|
|
187
|
+
const rightPath = normalizePath(options.rightPath);
|
|
188
|
+
if (leftPath === rightPath) {
|
|
189
|
+
throw new Error('leftPath and rightPath must be different notes.');
|
|
190
|
+
}
|
|
191
|
+
const notes = await loadNoteRecords(vaultPath);
|
|
192
|
+
const byPath = new Map(notes.map((note) => [note.path, note]));
|
|
193
|
+
const left = byPath.get(leftPath);
|
|
194
|
+
const right = byPath.get(rightPath);
|
|
195
|
+
if (!left || !right) {
|
|
196
|
+
throw new Error(`Duplicate resolution paths were not found in vault index source: ${leftPath}, ${rightPath}`);
|
|
197
|
+
}
|
|
198
|
+
const updates = new Map();
|
|
199
|
+
const leftRelated = ensureRelatedEdgeLine(left.content, right.title);
|
|
200
|
+
const rightRelated = ensureRelatedEdgeLine(right.content, left.title);
|
|
201
|
+
if (options.action === 'link') {
|
|
202
|
+
updates.set(left.path, leftRelated);
|
|
203
|
+
updates.set(right.path, rightRelated);
|
|
204
|
+
}
|
|
205
|
+
else if (options.action === 'ignore') {
|
|
206
|
+
updates.set(left.path, leftRelated);
|
|
207
|
+
}
|
|
208
|
+
else {
|
|
209
|
+
const mergedLeft = appendMergedContent(leftRelated, right.title, right.content);
|
|
210
|
+
const mergedRight = ensureMergedMarker(rightRelated, left.title);
|
|
211
|
+
updates.set(left.path, mergedLeft);
|
|
212
|
+
updates.set(right.path, mergedRight);
|
|
213
|
+
}
|
|
214
|
+
for (const [path, content] of updates) {
|
|
215
|
+
await writeMarkdownFile(vaultPath, path, content);
|
|
216
|
+
}
|
|
217
|
+
const shouldIndex = options.autoIndex !== false;
|
|
218
|
+
const index = shouldIndex ? await indexVault(vaultPath) : undefined;
|
|
219
|
+
return {
|
|
220
|
+
action: options.action,
|
|
221
|
+
leftPath,
|
|
222
|
+
rightPath,
|
|
223
|
+
updatedPaths: Array.from(updates.keys()).sort((leftValue, rightValue) => leftValue.localeCompare(rightValue)),
|
|
224
|
+
...(index ? { index } : {})
|
|
225
|
+
};
|
|
226
|
+
};
|