@andespindola/brainlink 0.1.0-beta.37 → 0.1.0-beta.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -69,6 +69,7 @@ Legacy `.jsonl.gz` packs are upgraded to `.blpk` automatically on first search/c
69
69
  - Backlinks, broken-link reports, orphan detection and validation.
70
70
  - Full-text, semantic and hybrid retrieval on a local file index.
71
71
  - Middle-out context assembly around the strongest chunk per document.
72
+ - In-process index and context caching with automatic invalidation on index updates.
72
73
  - Compressed-space prefiltering for `.blpk` packs before decryption and scan.
73
74
  - Agent namespaces under `agents/<agent-id>/`.
74
75
  - S3-compatible bucket vaults through `s3://bucket/prefix` URIs.
@@ -515,6 +516,8 @@ Available tools:
515
516
  - `brainlink_recommendations`: return an automatic action plan so agents can run Brainlink in the recommended order.
516
517
  - `brainlink_context`: read indexed context for a task or question.
517
518
  - `brainlink_search`: search indexed notes.
519
+ - `brainlink_dedupe`: detect duplicate candidates using exact hash + semantic similarity scores.
520
+ - `brainlink_resolve_duplicate`: resolve duplicate pairs (`merge`, `link`, `ignore`) with connectivity-safe fallback edges.
518
521
  - `brainlink_add_note`: write durable Markdown memory and reindex.
519
522
  - `brainlink_add_file`: ingest a local file as a note and reindex.
520
523
  - `brainlink_index`: rebuild the vault index.
@@ -716,6 +719,28 @@ blink add "Note Title" --vault ./vault --content-file ./notes.md --no-auto-index
716
719
 
717
720
  Creates a Markdown note under `agents/<agent-id>/`. Common secret patterns are blocked by default; use `--allow-sensitive` only for an intentionally protected vault.
718
721
  To avoid disconnected memory, Brainlink auto-adds a fallback wiki edge when a note is written without links, creating agent hub notes when needed.
722
+ `add` also returns `possibleDuplicates` (exact hash + semantic candidates) so agents can resolve duplicate memory right after writes.
723
+
724
+ ### `dedupe`
725
+
726
+ ```bash
727
+ blink dedupe --vault ./vault --json
728
+ blink dedupe --vault ./vault --agent coding-agent --limit 20 --min-score 0.92 --json
729
+ blink dedupe --vault ./vault --no-semantic --json
730
+ ```
731
+
732
+ Detects `possibleDuplicate` pairs using exact content hashes and optional semantic similarity.
733
+
734
+ ### `dedupe-resolve`
735
+
736
+ ```bash
737
+ blink dedupe-resolve --vault ./vault --left agents/shared/a.md --right agents/shared/b.md --action merge --json
738
+ blink dedupe-resolve --vault ./vault --left agents/shared/a.md --right agents/shared/b.md --action link --json
739
+ blink dedupe-resolve --vault ./vault --left agents/shared/a.md --right agents/shared/b.md --action ignore --json
740
+ ```
741
+
742
+ Resolves a duplicate pair with `merge`, `link` or `ignore`.
743
+ When action is not `merge`, Brainlink still creates a low-priority related edge (`#related-to`) so notes remain connected.
719
744
 
720
745
  ### `index`
721
746
 
@@ -764,6 +789,7 @@ blink context "question" --vault ./vault --agent coding-agent --mode hybrid --js
764
789
  ```
765
790
 
766
791
  Builds a compact context package for an agent.
792
+ Repeated calls with the same vault, agent, query, mode and token/limit settings are served from a short in-memory cache while the index is unchanged.
767
793
 
768
794
  ### `links`
769
795
 
@@ -1,13 +1,68 @@
1
+ import { stat } from 'node:fs/promises';
1
2
  import { formatContextPackage, selectContextSections } from '../domain/context.js';
3
+ import { indexStoragePath } from '../infrastructure/file-index.js';
2
4
  import { searchKnowledge } from './search-knowledge.js';
5
+ const contextCacheTtlMs = 45_000;
6
+ const contextCacheMaxEntries = 200;
7
+ const contextCache = new Map();
8
+ const readIndexMtimeMs = async (vaultPath) => {
9
+ try {
10
+ return (await stat(indexStoragePath(vaultPath))).mtimeMs;
11
+ }
12
+ catch {
13
+ return 0;
14
+ }
15
+ };
16
+ const toCacheKey = (vaultPath, query, limit, maxTokens, agentId, mode) => JSON.stringify({
17
+ vaultPath,
18
+ query: query.trim().toLowerCase(),
19
+ limit,
20
+ maxTokens,
21
+ agentId: agentId?.trim().toLowerCase() ?? '*',
22
+ mode: mode ?? 'default'
23
+ });
24
+ const contextCacheGet = (key, indexMtimeMs) => {
25
+ const entry = contextCache.get(key);
26
+ if (!entry) {
27
+ return undefined;
28
+ }
29
+ const fresh = Date.now() - entry.createdAt <= contextCacheTtlMs && entry.indexMtimeMs === indexMtimeMs;
30
+ if (!fresh) {
31
+ contextCache.delete(key);
32
+ return undefined;
33
+ }
34
+ return entry.context;
35
+ };
36
+ const contextCacheSet = (entry) => {
37
+ contextCache.set(entry.key, entry);
38
+ if (contextCache.size <= contextCacheMaxEntries) {
39
+ return;
40
+ }
41
+ const overflow = contextCache.size - contextCacheMaxEntries;
42
+ const keys = Array.from(contextCache.keys()).slice(0, overflow);
43
+ keys.forEach((key) => contextCache.delete(key));
44
+ };
3
45
  export const buildContextPackage = async (vaultPath, query, limit, maxTokens, agentId, mode) => {
46
+ const cacheKey = toCacheKey(vaultPath, query, limit, maxTokens, agentId, mode);
47
+ const indexMtimeMs = await readIndexMtimeMs(vaultPath);
48
+ const cached = contextCacheGet(cacheKey, indexMtimeMs);
49
+ if (cached) {
50
+ return cached;
51
+ }
4
52
  const results = await searchKnowledge(vaultPath, query, limit, agentId, mode);
5
53
  const sections = selectContextSections(results, maxTokens);
6
- return {
54
+ const context = {
7
55
  query,
8
56
  sections,
9
57
  content: formatContextPackage(query, sections)
10
58
  };
59
+ contextCacheSet({
60
+ key: cacheKey,
61
+ createdAt: Date.now(),
62
+ indexMtimeMs,
63
+ context
64
+ });
65
+ return context;
11
66
  };
12
67
  export const buildContext = async (vaultPath, query, limit, maxTokens, agentId, mode) => {
13
68
  const contextPackage = await buildContextPackage(vaultPath, query, limit, maxTokens, agentId, mode);
@@ -0,0 +1,226 @@
1
+ import { createHash } from 'node:crypto';
2
+ import { createEmbeddingBuckets, createLocalEmbedding, cosineSimilarity } from '../domain/embeddings.js';
3
+ import { parseMarkdownDocument } from '../domain/markdown.js';
4
+ import { writeMarkdownFile, ensureVault, readMarkdownFiles } from '../infrastructure/file-system-vault.js';
5
+ import { indexVault } from './index-vault.js';
6
+ const tokenPattern = /[\p{L}\p{N}_-]+/gu;
7
+ const frontmatterPattern = /^---\n[\s\S]*?\n---\n?/m;
8
+ const rootHeadingPattern = /^#\s+.+\n+/m;
9
+ const maxCandidatesPerBucket = 240;
10
+ const normalizePath = (path) => path.replaceAll('\\', '/').replace(/^\.\//, '');
11
+ const toComparableBody = (content) => content
12
+ .replace(frontmatterPattern, '')
13
+ .replace(rootHeadingPattern, '')
14
+ .replaceAll('\r\n', '\n')
15
+ .trim();
16
+ const normalizeStrictContent = (content) => toComparableBody(content);
17
+ const normalizeSemanticContent = (content) => toComparableBody(content)
18
+ .replace(/\s+/g, ' ')
19
+ .trim();
20
+ const toHash = (value) => createHash('sha256').update(value, 'utf8').digest('hex');
21
+ const toCandidateId = (leftPath, rightPath) => [normalizePath(leftPath), normalizePath(rightPath)].sort((left, right) => left.localeCompare(right)).join('|');
22
+ const hasSharedTokens = (left, right) => {
23
+ const leftTokens = new Set((left.match(tokenPattern) ?? []).map((token) => token.toLowerCase()).filter((token) => token.length > 2));
24
+ const rightTokens = new Set((right.match(tokenPattern) ?? []).map((token) => token.toLowerCase()).filter((token) => token.length > 2));
25
+ if (leftTokens.size === 0 || rightTokens.size === 0) {
26
+ return false;
27
+ }
28
+ for (const token of leftTokens) {
29
+ if (rightTokens.has(token)) {
30
+ return true;
31
+ }
32
+ }
33
+ return false;
34
+ };
35
+ const relatedMarker = (targetTitle) => `Related: [[${targetTitle}]] priority: low #related-to`;
36
+ const ensureRelatedEdgeLine = (content, targetTitle) => {
37
+ const linkPattern = new RegExp(`\\[\\[\\s*${targetTitle.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\s*(?:[\\]|#])?`, 'i');
38
+ if (linkPattern.test(content)) {
39
+ return content;
40
+ }
41
+ const trimmed = content.trimEnd();
42
+ return `${trimmed}\n\n${relatedMarker(targetTitle)}\n`;
43
+ };
44
+ const ensureMergedMarker = (content, targetTitle) => {
45
+ const marker = `Merged into [[${targetTitle}]]`;
46
+ if (content.includes(marker)) {
47
+ return content;
48
+ }
49
+ return `${content.trimEnd()}\n\n${marker} priority: low #related-to\n`;
50
+ };
51
+ const appendMergedContent = (baseContent, mergedTitle, mergedContent) => {
52
+ const marker = `## Merged Memory From [[${mergedTitle}]]`;
53
+ if (baseContent.includes(marker)) {
54
+ return baseContent;
55
+ }
56
+ const mergedBody = normalizeSemanticContent(mergedContent);
57
+ return `${baseContent.trimEnd()}\n\n${marker}\n\n${mergedBody}\n`;
58
+ };
59
+ const loadNoteRecords = async (vaultPath, agentId) => {
60
+ const absoluteVaultPath = await ensureVault(vaultPath);
61
+ const files = await readMarkdownFiles(vaultPath);
62
+ return files
63
+ .map((file) => {
64
+ const parsed = parseMarkdownDocument({
65
+ absolutePath: file.absolutePath,
66
+ vaultPath: absoluteVaultPath,
67
+ content: file.content,
68
+ createdAt: file.createdAt,
69
+ updatedAt: file.updatedAt
70
+ });
71
+ const strict = normalizeStrictContent(parsed.content);
72
+ const semantic = normalizeSemanticContent(parsed.content);
73
+ const embedding = createLocalEmbedding(`${parsed.title}\n${semantic}`);
74
+ return {
75
+ title: parsed.title,
76
+ path: normalizePath(parsed.path),
77
+ agentId: parsed.agentId,
78
+ content: parsed.content,
79
+ normalizedStrictContent: strict,
80
+ semanticContent: semantic,
81
+ embedding,
82
+ buckets: createEmbeddingBuckets(embedding, 20)
83
+ };
84
+ })
85
+ .filter((record) => (agentId ? record.agentId === agentId : true));
86
+ };
87
+ const pairToCandidate = (left, right, kind, score, reason) => ({
88
+ id: toCandidateId(left.path, right.path),
89
+ possibleDuplicate: true,
90
+ kind,
91
+ score: Number(score.toFixed(4)),
92
+ left: {
93
+ title: left.title,
94
+ path: left.path,
95
+ agentId: left.agentId
96
+ },
97
+ right: {
98
+ title: right.title,
99
+ path: right.path,
100
+ agentId: right.agentId
101
+ },
102
+ reason
103
+ });
104
+ const indexCandidatePairs = (notes) => {
105
+ const bucketMap = new Map();
106
+ notes.forEach((note, index) => {
107
+ note.buckets.forEach((bucket) => {
108
+ const current = bucketMap.get(bucket) ?? [];
109
+ if (current.length < maxCandidatesPerBucket) {
110
+ current.push(index);
111
+ bucketMap.set(bucket, current);
112
+ }
113
+ });
114
+ });
115
+ const pairKeys = new Set();
116
+ const pairs = [];
117
+ bucketMap.forEach((indexes) => {
118
+ for (let leftIndex = 0; leftIndex < indexes.length; leftIndex += 1) {
119
+ for (let rightIndex = leftIndex + 1; rightIndex < indexes.length; rightIndex += 1) {
120
+ const left = Math.min(indexes[leftIndex] ?? 0, indexes[rightIndex] ?? 0);
121
+ const right = Math.max(indexes[leftIndex] ?? 0, indexes[rightIndex] ?? 0);
122
+ const key = `${left}|${right}`;
123
+ if (!pairKeys.has(key)) {
124
+ pairKeys.add(key);
125
+ pairs.push([left, right]);
126
+ }
127
+ }
128
+ }
129
+ });
130
+ return pairs;
131
+ };
132
+ export const scanDuplicateNotes = async (vaultPath, options = {}) => {
133
+ const notes = await loadNoteRecords(vaultPath, options.agentId);
134
+ if (notes.length < 2) {
135
+ return [];
136
+ }
137
+ const minSemanticScore = options.minSemanticScore ?? 0.92;
138
+ const includeSemantic = options.includeSemantic !== false;
139
+ const seen = new Map();
140
+ const byHash = notes.reduce((state, note) => {
141
+ const key = toHash(note.normalizedStrictContent);
142
+ const current = state.get(key) ?? [];
143
+ current.push(note);
144
+ state.set(key, current);
145
+ return state;
146
+ }, new Map());
147
+ byHash.forEach((group) => {
148
+ if (group.length < 2) {
149
+ return;
150
+ }
151
+ const [base, ...rest] = group.sort((left, right) => left.path.localeCompare(right.path));
152
+ rest.forEach((note) => {
153
+ const candidate = pairToCandidate(base, note, 'exact', 1, 'Exact content hash match');
154
+ seen.set(candidate.id, candidate);
155
+ });
156
+ });
157
+ if (includeSemantic) {
158
+ const pairs = indexCandidatePairs(notes);
159
+ pairs.forEach(([leftIndex, rightIndex]) => {
160
+ const left = notes[leftIndex];
161
+ const right = notes[rightIndex];
162
+ if (!left || !right || left.path === right.path) {
163
+ return;
164
+ }
165
+ const id = toCandidateId(left.path, right.path);
166
+ if (seen.has(id)) {
167
+ return;
168
+ }
169
+ const score = cosineSimilarity(left.embedding, right.embedding);
170
+ const titleShared = hasSharedTokens(left.title, right.title);
171
+ const contentShared = hasSharedTokens(left.semanticContent, right.semanticContent);
172
+ if (score >= minSemanticScore && (titleShared || contentShared || score >= 0.975)) {
173
+ const candidate = pairToCandidate(left, right, 'semantic', score, 'High semantic similarity');
174
+ seen.set(id, candidate);
175
+ }
176
+ });
177
+ }
178
+ const focusPath = options.focusPath ? normalizePath(options.focusPath) : undefined;
179
+ const limited = Array.from(seen.values())
180
+ .filter((item) => (focusPath ? item.left.path === focusPath || item.right.path === focusPath : true))
181
+ .sort((left, right) => right.score - left.score || left.left.path.localeCompare(right.left.path))
182
+ .slice(0, Math.max(1, options.limit ?? 25));
183
+ return limited;
184
+ };
185
+ export const resolveDuplicateNotes = async (vaultPath, options) => {
186
+ const leftPath = normalizePath(options.leftPath);
187
+ const rightPath = normalizePath(options.rightPath);
188
+ if (leftPath === rightPath) {
189
+ throw new Error('leftPath and rightPath must be different notes.');
190
+ }
191
+ const notes = await loadNoteRecords(vaultPath);
192
+ const byPath = new Map(notes.map((note) => [note.path, note]));
193
+ const left = byPath.get(leftPath);
194
+ const right = byPath.get(rightPath);
195
+ if (!left || !right) {
196
+ throw new Error(`Duplicate resolution paths were not found in vault index source: ${leftPath}, ${rightPath}`);
197
+ }
198
+ const updates = new Map();
199
+ const leftRelated = ensureRelatedEdgeLine(left.content, right.title);
200
+ const rightRelated = ensureRelatedEdgeLine(right.content, left.title);
201
+ if (options.action === 'link') {
202
+ updates.set(left.path, leftRelated);
203
+ updates.set(right.path, rightRelated);
204
+ }
205
+ else if (options.action === 'ignore') {
206
+ updates.set(left.path, leftRelated);
207
+ }
208
+ else {
209
+ const mergedLeft = appendMergedContent(leftRelated, right.title, right.content);
210
+ const mergedRight = ensureMergedMarker(rightRelated, left.title);
211
+ updates.set(left.path, mergedLeft);
212
+ updates.set(right.path, mergedRight);
213
+ }
214
+ for (const [path, content] of updates) {
215
+ await writeMarkdownFile(vaultPath, path, content);
216
+ }
217
+ const shouldIndex = options.autoIndex !== false;
218
+ const index = shouldIndex ? await indexVault(vaultPath) : undefined;
219
+ return {
220
+ action: options.action,
221
+ leftPath,
222
+ rightPath,
223
+ updatedPaths: Array.from(updates.keys()).sort((leftValue, rightValue) => leftValue.localeCompare(rightValue)),
224
+ ...(index ? { index } : {})
225
+ };
226
+ };
@@ -3,8 +3,9 @@ import { mkdir, writeFile } from 'node:fs/promises';
3
3
  import { dirname, join, relative, resolve } from 'node:path';
4
4
  import { platform, tmpdir } from 'node:os';
5
5
  import { spawn, spawnSync } from 'node:child_process';
6
- import { addNote } from '../../application/add-note.js';
6
+ import { addNoteWithMetadata } from '../../application/add-note.js';
7
7
  import { buildContextPackage } from '../../application/build-context.js';
8
+ import { resolveDuplicateNotes, scanDuplicateNotes } from '../../application/dedupe-notes.js';
8
9
  import { importLegacySqliteDatabase } from '../../application/import-legacy-sqlite.js';
9
10
  import { indexVault } from '../../application/index-vault.js';
10
11
  import { migrateVaultContent, planVaultMigration, previewVaultMigration, shouldMigrateDefaultVault } from '../../application/migrate-vault.js';
@@ -26,6 +27,16 @@ const resolveAddContent = (options) => {
26
27
  }
27
28
  return readFileSync(options.contentFile, 'utf8');
28
29
  };
30
+ const parseScore = (value, fallback) => {
31
+ if (value == null) {
32
+ return fallback;
33
+ }
34
+ const parsed = Number.parseFloat(value);
35
+ if (!Number.isFinite(parsed) || parsed < 0 || parsed > 1) {
36
+ throw new Error(`Invalid score value: ${value}. Expected a number between 0 and 1.`);
37
+ }
38
+ return parsed;
39
+ };
29
40
  const spawnDetached = (command, args) => {
30
41
  try {
31
42
  const child = spawn(command, args, { detached: true, stdio: 'ignore' });
@@ -494,12 +505,95 @@ export const registerWriteCommands = (program) => {
494
505
  .action(async (title, options) => {
495
506
  const resolved = await resolveOptions(options);
496
507
  const content = resolveAddContent(options);
497
- const notePath = await addNote(resolved.vault, title, content, resolved.agent, {
508
+ const added = await addNoteWithMetadata(resolved.vault, title, content, resolved.agent, {
498
509
  allowSensitive: Boolean(options.allowSensitive)
499
510
  });
500
511
  const shouldAutoIndex = options.autoIndex !== false && resolved.config.autoIndexOnWrite;
501
512
  const index = shouldAutoIndex ? await indexVault(resolved.vault) : undefined;
502
- print(options.json, { title, agent: resolved.agent ?? 'shared', path: notePath, ...(index ? { index } : {}) }, () => `Created note at ${notePath}`);
513
+ const absoluteVaultPath = await ensureVault(resolved.vault);
514
+ const focusPath = added.path.startsWith(absoluteVaultPath)
515
+ ? relative(absoluteVaultPath, added.path).replaceAll('\\', '/')
516
+ : added.path.includes('agents/')
517
+ ? added.path.slice(added.path.indexOf('agents/')).replaceAll('\\', '/')
518
+ : undefined;
519
+ const possibleDuplicates = await scanDuplicateNotes(resolved.vault, {
520
+ agentId: resolved.agent,
521
+ focusPath,
522
+ limit: 5,
523
+ minSemanticScore: 0.92,
524
+ includeSemantic: true
525
+ });
526
+ print(options.json, {
527
+ title,
528
+ agent: resolved.agent ?? 'shared',
529
+ path: added.path,
530
+ writeConnectivity: {
531
+ autoLinked: added.autoLinked,
532
+ linkTarget: added.linkTarget,
533
+ guaranteedEdge: true
534
+ },
535
+ possibleDuplicates,
536
+ ...(index ? { index } : {})
537
+ }, () => {
538
+ const duplicateMessage = possibleDuplicates.length > 0
539
+ ? `\nPotential duplicates: ${possibleDuplicates.length}. Use "blink dedupe --json" or "blink dedupe-resolve".`
540
+ : '';
541
+ return `Created note at ${added.path}${duplicateMessage}`;
542
+ });
543
+ });
544
+ program
545
+ .command('dedupe')
546
+ .option('-v, --vault <vault>', 'vault directory')
547
+ .option('-a, --agent <agent>', 'agent memory namespace')
548
+ .option('-l, --limit <limit>', 'maximum duplicate candidate pairs')
549
+ .option('--min-score <score>', 'minimum semantic similarity score between 0 and 1', '0.92')
550
+ .option('--no-semantic', 'disable semantic duplicate detection and keep exact-content matching only')
551
+ .option('--json', 'print machine-readable JSON')
552
+ .description('detect possible duplicate notes with exact hash and semantic similarity scores')
553
+ .action(async (options) => {
554
+ const resolved = await resolveOptions(options);
555
+ const duplicates = await scanDuplicateNotes(resolved.vault, {
556
+ agentId: resolved.agent,
557
+ limit: parsePositiveInteger(options.limit ?? '25', 25),
558
+ minSemanticScore: parseScore(options.minScore, 0.92),
559
+ includeSemantic: options.semantic !== false
560
+ });
561
+ print(options.json, { vault: resolved.vault, agent: resolved.agent, duplicates }, () => {
562
+ if (duplicates.length === 0) {
563
+ return 'No possible duplicates found.';
564
+ }
565
+ return duplicates
566
+ .map((item, index) => `${index + 1}. [${item.kind}] score=${item.score.toFixed(4)} ${item.left.path} <-> ${item.right.path} (${item.reason})`)
567
+ .join('\n');
568
+ });
569
+ });
570
+ program
571
+ .command('dedupe-resolve')
572
+ .option('-v, --vault <vault>', 'vault directory')
573
+ .option('--left <path>', 'left note relative path from dedupe result')
574
+ .option('--right <path>', 'right note relative path from dedupe result')
575
+ .option('--action <action>', 'resolution action: merge, link or ignore')
576
+ .option('--no-auto-index', 'skip reindex after duplicate resolution')
577
+ .option('--json', 'print machine-readable JSON')
578
+ .description('resolve a duplicate candidate with merge, link or ignore')
579
+ .action(async (options) => {
580
+ const resolved = await resolveOptions(options);
581
+ if (!options.left || !options.right) {
582
+ throw new Error('Use --left <path> and --right <path> to resolve a duplicate pair.');
583
+ }
584
+ if (options.action !== 'merge' && options.action !== 'link' && options.action !== 'ignore') {
585
+ throw new Error('Use --action merge|link|ignore.');
586
+ }
587
+ const result = await resolveDuplicateNotes(resolved.vault, {
588
+ leftPath: options.left,
589
+ rightPath: options.right,
590
+ action: options.action,
591
+ autoIndex: options.autoIndex !== false
592
+ });
593
+ print(options.json, {
594
+ vault: resolved.vault,
595
+ ...result
596
+ }, () => `Resolved duplicate (${result.action}) for ${result.leftPath} <-> ${result.rightPath}`);
503
597
  });
504
598
  program
505
599
  .command('index')
@@ -1,7 +1,9 @@
1
- import { mkdir, readFile, rename, writeFile } from 'node:fs/promises';
1
+ import { mkdir, readFile, rename, stat, writeFile } from 'node:fs/promises';
2
2
  import { dirname, join } from 'node:path';
3
3
  import { cosineSimilarity } from '../domain/embeddings.js';
4
4
  const queryTokenPattern = /[\p{L}\p{N}_-]+/gu;
5
+ const indexCacheMaxEntries = 16;
6
+ const indexCache = new Map();
5
7
  const emptyIndex = () => ({
6
8
  version: 1,
7
9
  updatedAt: new Date().toISOString(),
@@ -11,18 +13,44 @@ const emptyIndex = () => ({
11
13
  });
12
14
  export const indexStoragePath = (vaultPath) => join(vaultPath, '.brainlink', 'index.json');
13
15
  const readIndex = async (vaultPath) => {
16
+ const path = indexStoragePath(vaultPath);
17
+ let stats = null;
14
18
  try {
15
- const parsed = JSON.parse(await readFile(indexStoragePath(vaultPath), 'utf8'));
16
- return {
19
+ const fileStats = await stat(path);
20
+ stats = { mtimeMs: fileStats.mtimeMs, size: fileStats.size };
21
+ }
22
+ catch (error) {
23
+ if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {
24
+ indexCache.delete(path);
25
+ return emptyIndex();
26
+ }
27
+ return emptyIndex();
28
+ }
29
+ const cached = indexCache.get(path);
30
+ if (cached && cached.mtimeMs === stats.mtimeMs && cached.size === stats.size) {
31
+ return cached.index;
32
+ }
33
+ try {
34
+ const parsed = JSON.parse(await readFile(path, 'utf8'));
35
+ const loaded = {
17
36
  version: 1,
18
37
  updatedAt: typeof parsed.updatedAt === 'string' ? parsed.updatedAt : new Date().toISOString(),
19
38
  documents: Array.isArray(parsed.documents) ? parsed.documents : [],
20
39
  chunks: Array.isArray(parsed.chunks) ? parsed.chunks : [],
21
40
  links: Array.isArray(parsed.links) ? parsed.links : []
22
41
  };
42
+ indexCache.set(path, { ...stats, index: loaded });
43
+ if (indexCache.size > indexCacheMaxEntries) {
44
+ const oldest = indexCache.keys().next().value;
45
+ if (typeof oldest === 'string') {
46
+ indexCache.delete(oldest);
47
+ }
48
+ }
49
+ return loaded;
23
50
  }
24
51
  catch (error) {
25
52
  if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {
53
+ indexCache.delete(path);
26
54
  return emptyIndex();
27
55
  }
28
56
  return emptyIndex();
@@ -34,6 +62,12 @@ const writeIndex = async (vaultPath, index) => {
34
62
  await mkdir(dirname(target), { recursive: true, mode: 0o700 });
35
63
  await writeFile(temp, `${JSON.stringify(index)}\n`, { encoding: 'utf8', mode: 0o600 });
36
64
  await rename(temp, target);
65
+ const fileStats = await stat(target);
66
+ indexCache.set(target, {
67
+ mtimeMs: fileStats.mtimeMs,
68
+ size: fileStats.size,
69
+ index
70
+ });
37
71
  };
38
72
  const normalizeToken = (value) => value
39
73
  .normalize('NFKD')
@@ -2,7 +2,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
2
  import { readFileSync } from 'node:fs';
3
3
  import { dirname, join } from 'node:path';
4
4
  import { fileURLToPath } from 'node:url';
5
- import { addNoteInputSchema, addFileInputSchema, addFileTool, addNoteTool, brokenLinksInputSchema, brokenLinksTool, bootstrapInputSchema, bootstrapTool, contextInputSchema, contextTool, graphInputSchema, graphTool, indexInputSchema, indexTool, orphansInputSchema, orphansTool, policyInputSchema, policyTool, recommendationsInputSchema, recommendationsTool, searchInputSchema, searchTool, statsInputSchema, statsTool, syncInputSchema, syncTool, validateInputSchema, validateTool } from './tools.js';
5
+ import { addNoteInputSchema, addFileInputSchema, addFileTool, addNoteTool, dedupeInputSchema, dedupeResolveInputSchema, dedupeResolveTool, dedupeTool, brokenLinksInputSchema, brokenLinksTool, bootstrapInputSchema, bootstrapTool, contextInputSchema, contextTool, graphInputSchema, graphTool, indexInputSchema, indexTool, orphansInputSchema, orphansTool, policyInputSchema, policyTool, recommendationsInputSchema, recommendationsTool, searchInputSchema, searchTool, statsInputSchema, statsTool, syncInputSchema, syncTool, validateInputSchema, validateTool } from './tools.js';
6
6
  const readPackageVersion = () => {
7
7
  const packagePath = join(dirname(fileURLToPath(import.meta.url)), '../../package.json');
8
8
  const metadata = JSON.parse(readFileSync(packagePath, 'utf8'));
@@ -40,6 +40,16 @@ export const createBrainlinkMcpServer = () => {
40
40
  description: 'Search indexed Brainlink notes with FTS, semantic or hybrid retrieval.',
41
41
  inputSchema: searchInputSchema
42
42
  }, searchTool);
43
+ server.registerTool('brainlink_dedupe', {
44
+ title: 'Detect Duplicate Notes',
45
+ description: 'Detect possible duplicate notes using exact content hash and semantic similarity scoring.',
46
+ inputSchema: dedupeInputSchema
47
+ }, dedupeTool);
48
+ server.registerTool('brainlink_resolve_duplicate', {
49
+ title: 'Resolve Duplicate Notes',
50
+ description: 'Resolve a duplicate pair with merge, link or ignore. Non-merge actions still create low-priority related edges.',
51
+ inputSchema: dedupeResolveInputSchema
52
+ }, dedupeResolveTool);
43
53
  server.registerTool('brainlink_add_note', {
44
54
  title: 'Add Brainlink Note',
45
55
  description: 'Write durable Markdown memory, then reindex the vault. Include explicit [[wiki links]] for connected graph memory. Add priority markers near links, such as priority: high, #important or #critical, when a relationship should be weighted higher.',
package/dist/mcp/tools.js CHANGED
@@ -4,6 +4,7 @@ import { z } from 'zod';
4
4
  import { getBrokenLinksReport, getOrphansReport, getStats, validateVault } from '../application/analyze-vault.js';
5
5
  import { addNoteWithMetadata } from '../application/add-note.js';
6
6
  import { buildContextPackage } from '../application/build-context.js';
7
+ import { resolveDuplicateNotes, scanDuplicateNotes } from '../application/dedupe-notes.js';
7
8
  import { getGraph } from '../application/get-graph.js';
8
9
  import { indexVault } from '../application/index-vault.js';
9
10
  import { searchKnowledge } from '../application/search-knowledge.js';
@@ -311,6 +312,20 @@ export const recommendationsInputSchema = {
311
312
  limit: optionalPositiveInteger().describe('Optional context limit override for generated recommendations.'),
312
313
  tokens: optionalPositiveInteger().describe('Optional context token budget override for generated recommendations.')
313
314
  };
315
+ export const dedupeInputSchema = {
316
+ ...vaultInput,
317
+ ...agentInput,
318
+ limit: optionalPositiveInteger().describe('Maximum duplicate candidate pairs to return.'),
319
+ minScore: z.number().min(0).max(1).optional().describe('Minimum semantic similarity score between 0 and 1.'),
320
+ semantic: z.boolean().optional().default(true).describe('Enable semantic duplicate detection in addition to exact content hash matches.')
321
+ };
322
+ export const dedupeResolveInputSchema = {
323
+ ...vaultInput,
324
+ leftPath: z.string().min(1).describe('Left note path from dedupe results.'),
325
+ rightPath: z.string().min(1).describe('Right note path from dedupe results.'),
326
+ action: z.enum(['merge', 'link', 'ignore']).describe('Resolution action.'),
327
+ autoIndex: z.boolean().optional().default(true).describe('Reindex after duplicate resolution.')
328
+ };
314
329
  export const contextTool = async (input) => {
315
330
  const context = await resolveExecutionContext(input);
316
331
  const readiness = await ensureBootstrapReady(context, input, 'brainlink_context');
@@ -364,6 +379,14 @@ export const addNoteTool = async (input) => {
364
379
  allowSensitive: input.allowSensitive
365
380
  });
366
381
  const index = shouldIndex ? await indexVault(context.vault) : undefined;
382
+ const focusPath = added.path.includes('agents/') ? added.path.slice(added.path.indexOf('agents/')).replaceAll('\\', '/') : undefined;
383
+ const possibleDuplicates = await scanDuplicateNotes(context.vault, {
384
+ agentId: context.agent,
385
+ focusPath,
386
+ limit: 5,
387
+ minSemanticScore: 0.92,
388
+ includeSemantic: true
389
+ });
367
390
  return jsonResult({
368
391
  vault: context.vault,
369
392
  title: input.title,
@@ -374,6 +397,7 @@ export const addNoteTool = async (input) => {
374
397
  linkTarget: added.linkTarget,
375
398
  guaranteedEdge: true
376
399
  },
400
+ possibleDuplicates,
377
401
  ...(index ? { index } : {})
378
402
  });
379
403
  };
@@ -792,6 +816,17 @@ export const recommendationsTool = async (input) => {
792
816
  tokens
793
817
  }
794
818
  },
819
+ {
820
+ tool: 'brainlink_dedupe',
821
+ reason: 'Detect and resolve duplicate durable notes to keep memory quality high.',
822
+ args: {
823
+ vault: context.vault,
824
+ ...(context.agent ? { agent: context.agent } : {}),
825
+ limit: 10,
826
+ minScore: 0.92,
827
+ semantic: true
828
+ }
829
+ },
795
830
  {
796
831
  tool: 'brainlink_add_note',
797
832
  reason: 'Persist durable outcomes after task completion (write responses include connectivity metadata).',
@@ -818,3 +853,30 @@ export const recommendationsTool = async (input) => {
818
853
  recommendations
819
854
  });
820
855
  };
856
+ export const dedupeTool = async (input) => {
857
+ const context = await resolveExecutionContext(input);
858
+ const duplicates = await scanDuplicateNotes(context.vault, {
859
+ agentId: context.agent,
860
+ limit: input.limit ?? 25,
861
+ minSemanticScore: input.minScore ?? 0.92,
862
+ includeSemantic: input.semantic !== false
863
+ });
864
+ return jsonResult({
865
+ vault: context.vault,
866
+ agent: context.agent,
867
+ duplicates
868
+ });
869
+ };
870
+ export const dedupeResolveTool = async (input) => {
871
+ const context = await resolveExecutionContext(input);
872
+ const result = await resolveDuplicateNotes(context.vault, {
873
+ leftPath: input.leftPath,
874
+ rightPath: input.rightPath,
875
+ action: input.action,
876
+ autoIndex: isTruthy(input.autoIndex)
877
+ });
878
+ return jsonResult({
879
+ vault: context.vault,
880
+ ...result
881
+ });
882
+ };
@@ -429,6 +429,25 @@ This creates a slugged Markdown file with frontmatter and a heading.
429
429
 
430
430
  The CLI blocks common secret patterns by default. Do not use `--allow-sensitive` unless the vault is intentionally protected.
431
431
  Brainlink also auto-connects notes that have no `[[wiki links]]` by adding a fallback edge to an agent hub note, so new memory does not stay disconnected.
432
+ `add` also returns `possibleDuplicates` (exact hash + semantic candidates) so agents can decide duplicate resolution immediately.
433
+
434
+ ### Detect Duplicate Notes
435
+
436
+ ```bash
437
+ blink dedupe --vault ./vault --json
438
+ blink dedupe --vault ./vault --agent coding-agent --limit 20 --min-score 0.92 --json
439
+ blink dedupe --vault ./vault --no-semantic --json
440
+ ```
441
+
442
+ ### Resolve Duplicate Notes
443
+
444
+ ```bash
445
+ blink dedupe-resolve --vault ./vault --left agents/shared/a.md --right agents/shared/b.md --action merge --json
446
+ blink dedupe-resolve --vault ./vault --left agents/shared/a.md --right agents/shared/b.md --action link --json
447
+ blink dedupe-resolve --vault ./vault --left agents/shared/a.md --right agents/shared/b.md --action ignore --json
448
+ ```
449
+
450
+ `dedupe-resolve` keeps connectivity: non-merge actions still create a low-priority related edge (`#related-to`).
432
451
 
433
452
  For agent-private memory:
434
453
 
@@ -613,6 +632,8 @@ Available MCP tools:
613
632
  - `brainlink_recommendations`
614
633
  - `brainlink_context`
615
634
  - `brainlink_search`
635
+ - `brainlink_dedupe`
636
+ - `brainlink_resolve_duplicate`
616
637
  - `brainlink_add_note`
617
638
  - `brainlink_add_file`
618
639
  - `brainlink_index`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@andespindola/brainlink",
3
- "version": "0.1.0-beta.37",
3
+ "version": "0.1.0-beta.39",
4
4
  "description": "Local-first knowledge memory for agents with Markdown, backlinks, indexing and context retrieval.",
5
5
  "type": "module",
6
6
  "license": "MIT",