@mauricio.wolff/mcp-obsidian 0.7.4 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +272 -20
- package/dist/server.js +63 -5
- package/dist/src/filesystem.js +127 -3
- package/dist/src/filesystem.test.js +189 -1
- package/dist/src/integration.test.js +31 -0
- package/dist/src/pathfilter.js +20 -5
- package/dist/src/pathfilter.test.js +16 -0
- package/dist/src/search.js +117 -42
- package/package.json +2 -1
package/dist/src/search.js
CHANGED
|
@@ -13,19 +13,36 @@ export class SearchService {
|
|
|
13
13
|
if (!query || query.trim().length === 0) {
|
|
14
14
|
throw new Error('Search query cannot be empty');
|
|
15
15
|
}
|
|
16
|
-
const results = [];
|
|
17
16
|
const maxLimit = Math.min(limit, 20);
|
|
17
|
+
// Corpus stats for reranking
|
|
18
|
+
let totalDocLength = 0;
|
|
19
|
+
let docCount = 0;
|
|
20
|
+
const termDocFreq = new Map();
|
|
21
|
+
const candidates = [];
|
|
22
|
+
const searchQuery = caseSensitive ? query : query.toLowerCase();
|
|
23
|
+
const terms = searchQuery.split(/\s+/).filter(t => t.length > 0);
|
|
24
|
+
const scoringTerms = terms.length > 1 ? [...terms, searchQuery] : terms;
|
|
18
25
|
// Recursively find all .md files
|
|
19
26
|
const markdownFiles = await this.findMarkdownFiles(this.vaultPath);
|
|
27
|
+
// Pre-filter by pathFilter before I/O
|
|
28
|
+
const prefixLen = this.vaultPath.length + 1;
|
|
29
|
+
const allowedFiles = [];
|
|
20
30
|
for (const fullPath of markdownFiles) {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
31
|
+
const relativePath = fullPath.substring(prefixLen).replace(/\\/g, '/');
|
|
32
|
+
if (this.pathFilter.isAllowed(relativePath)) {
|
|
33
|
+
allowedFiles.push({ fullPath, relativePath });
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
// Read files in parallel batches
|
|
37
|
+
const BATCH_SIZE = 5;
|
|
38
|
+
for (let start = 0; start < allowedFiles.length; start += BATCH_SIZE) {
|
|
39
|
+
const batch = allowedFiles.slice(start, start + BATCH_SIZE);
|
|
40
|
+
const contents = await Promise.all(batch.map(f => readFile(f.fullPath, 'utf-8').catch(() => null)));
|
|
41
|
+
for (let i = 0; i < batch.length; i++) {
|
|
42
|
+
const content = contents[i];
|
|
43
|
+
if (content === null || content === undefined)
|
|
44
|
+
continue;
|
|
45
|
+
const { relativePath } = batch[i];
|
|
29
46
|
let searchableText = '';
|
|
30
47
|
// Prepare search text based on options
|
|
31
48
|
if (searchContent && searchFrontmatter) {
|
|
@@ -42,45 +59,86 @@ export class SearchService {
|
|
|
42
59
|
searchableText = frontmatterMatch ? frontmatterMatch[1] || '' : '';
|
|
43
60
|
}
|
|
44
61
|
const searchIn = caseSensitive ? searchableText : searchableText.toLowerCase();
|
|
45
|
-
|
|
46
|
-
const
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
62
|
+
// Collect corpus stats for reranking
|
|
63
|
+
const docLength = searchIn.split(/\s+/).filter(w => w.length > 0).length;
|
|
64
|
+
totalDocLength += docLength;
|
|
65
|
+
docCount++;
|
|
66
|
+
for (const term of scoringTerms) {
|
|
67
|
+
if (searchIn.includes(term)) {
|
|
68
|
+
termDocFreq.set(term, (termDocFreq.get(term) || 0) + 1);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// Extract title from filename
|
|
72
|
+
const title = relativePath.split('/').pop()?.replace(/\.md$/, '') || relativePath;
|
|
73
|
+
// Check filename match (any term)
|
|
74
|
+
const filenameToSearch = caseSensitive ? title : title.toLowerCase();
|
|
75
|
+
const filenameMatch = terms.some(term => filenameToSearch.includes(term));
|
|
76
|
+
// Check content match (any term)
|
|
77
|
+
const termIndices = terms.map(term => searchIn.indexOf(term));
|
|
78
|
+
const anyTermFound = termIndices.some(idx => idx !== -1);
|
|
79
|
+
const firstIndex = anyTermFound
|
|
80
|
+
? Math.min(...termIndices.filter(idx => idx !== -1))
|
|
81
|
+
: -1;
|
|
82
|
+
if (firstIndex !== -1 || filenameMatch) {
|
|
83
|
+
let excerpt;
|
|
58
84
|
let matchCount = 0;
|
|
59
|
-
let
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
85
|
+
let lineNumber = 0;
|
|
86
|
+
const termFreqs = new Map();
|
|
87
|
+
if (firstIndex !== -1) {
|
|
88
|
+
// Find the term that matched first for excerpt
|
|
89
|
+
const firstTermIdx = termIndices.indexOf(firstIndex);
|
|
90
|
+
const firstTerm = terms[firstTermIdx];
|
|
91
|
+
// Extract excerpt around first content match
|
|
92
|
+
const excerptStart = Math.max(0, firstIndex - 21);
|
|
93
|
+
const excerptEnd = Math.min(searchableText.length, firstIndex + firstTerm.length + 21);
|
|
94
|
+
excerpt = searchableText.slice(excerptStart, excerptEnd).trim();
|
|
95
|
+
// Add ellipsis if excerpt is truncated
|
|
96
|
+
if (excerptStart > 0)
|
|
97
|
+
excerpt = '...' + excerpt;
|
|
98
|
+
if (excerptEnd < searchableText.length)
|
|
99
|
+
excerpt = excerpt + '...';
|
|
100
|
+
// Count total content matches across all terms
|
|
101
|
+
for (const term of scoringTerms) {
|
|
102
|
+
let count = 0;
|
|
103
|
+
let searchIndex = 0;
|
|
104
|
+
while ((searchIndex = searchIn.indexOf(term, searchIndex)) !== -1) {
|
|
105
|
+
count++;
|
|
106
|
+
searchIndex += term.length;
|
|
107
|
+
}
|
|
108
|
+
termFreqs.set(term, count);
|
|
109
|
+
matchCount += count;
|
|
110
|
+
}
|
|
111
|
+
// Find line number of first match
|
|
112
|
+
const lines = searchableText.slice(0, firstIndex).split('\n');
|
|
113
|
+
lineNumber = lines.length;
|
|
63
114
|
}
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
115
|
+
else {
|
|
116
|
+
// Filename-only match: use beginning of content as excerpt
|
|
117
|
+
excerpt = searchableText.slice(0, 50).trim();
|
|
118
|
+
if (searchableText.length > 50)
|
|
119
|
+
excerpt = excerpt + '...';
|
|
120
|
+
matchCount = 0;
|
|
121
|
+
lineNumber = 0;
|
|
122
|
+
}
|
|
123
|
+
// Add filename match to count
|
|
124
|
+
if (filenameMatch)
|
|
125
|
+
matchCount++;
|
|
126
|
+
candidates.push({
|
|
127
|
+
result: {
|
|
128
|
+
p: relativePath,
|
|
129
|
+
t: title,
|
|
130
|
+
ex: excerpt,
|
|
131
|
+
mc: matchCount,
|
|
132
|
+
ln: lineNumber,
|
|
133
|
+
uri: generateObsidianUri(this.vaultPath, relativePath)
|
|
134
|
+
},
|
|
135
|
+
termFreqs,
|
|
136
|
+
docLength
|
|
76
137
|
});
|
|
77
138
|
}
|
|
78
139
|
}
|
|
79
|
-
catch (error) {
|
|
80
|
-
// Skip files that can't be read
|
|
81
|
-
continue;
|
|
82
|
-
}
|
|
83
140
|
}
|
|
141
|
+
const results = this.rerank(candidates, scoringTerms, termDocFreq, docCount, totalDocLength, maxLimit);
|
|
84
142
|
return results;
|
|
85
143
|
}
|
|
86
144
|
async findMarkdownFiles(dirPath) {
|
|
@@ -104,4 +162,21 @@ export class SearchService {
|
|
|
104
162
|
}
|
|
105
163
|
return markdownFiles;
|
|
106
164
|
}
|
|
165
|
+
rerank(candidates, terms, termDocFreq, docCount, totalDocLength, maxLimit) {
|
|
166
|
+
const avgdl = docCount > 0 ? totalDocLength / docCount : 1;
|
|
167
|
+
const k1 = 1.2;
|
|
168
|
+
const b = 0.75;
|
|
169
|
+
const scored = candidates.map(c => {
|
|
170
|
+
let score = 0;
|
|
171
|
+
for (const term of terms) {
|
|
172
|
+
const tf = c.termFreqs.get(term) || 0;
|
|
173
|
+
const df = termDocFreq.get(term) || 0;
|
|
174
|
+
const idf = Math.log(1 + (docCount - df + 0.5) / (df + 0.5));
|
|
175
|
+
score += idf * (tf * (k1 + 1)) / (tf + k1 * (1 - b + b * c.docLength / avgdl));
|
|
176
|
+
}
|
|
177
|
+
return { score, result: c.result };
|
|
178
|
+
});
|
|
179
|
+
scored.sort((a, b) => b.score - a.score);
|
|
180
|
+
return scored.slice(0, maxLimit).map(s => s.result);
|
|
181
|
+
}
|
|
107
182
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mauricio.wolff/mcp-obsidian",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.1",
|
|
4
4
|
"description": "Universal AI bridge for Obsidian vaults - connect any MCP-compatible assistant",
|
|
5
5
|
"author": "bitbonsai",
|
|
6
6
|
"license": "MIT",
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
],
|
|
19
19
|
"scripts": {
|
|
20
20
|
"start": "tsx server.ts",
|
|
21
|
+
"website": "cd website && bun dev",
|
|
21
22
|
"build": "tsc --project tsconfig.build.json",
|
|
22
23
|
"test": "vitest run",
|
|
23
24
|
"test:watch": "vitest",
|