@oomkapwn/enquire-mcp 3.7.13 → 3.7.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/CHANGELOG.md +75 -0
  2. package/README.md +4 -4
  3. package/dist/index.d.ts +1 -1
  4. package/dist/index.js +1 -1
  5. package/dist/tools/write.d.ts +10 -4
  6. package/dist/tools/write.d.ts.map +1 -1
  7. package/dist/tools/write.js +10 -4
  8. package/dist/tools/write.js.map +1 -1
  9. package/dist/vault.d.ts +21 -4
  10. package/dist/vault.d.ts.map +1 -1
  11. package/dist/vault.js +91 -19
  12. package/dist/vault.js.map +1 -1
  13. package/docs/COMPARISON.md +2 -2
  14. package/package.json +2 -2
  15. package/dist/tools.d.ts +0 -980
  16. package/dist/tools.d.ts.map +0 -1
  17. package/dist/tools.js +0 -3132
  18. package/dist/tools.js.map +0 -1
  19. package/docs/api-reference/.nojekyll +0 -1
  20. package/docs/api-reference/assets/hierarchy.js +0 -1
  21. package/docs/api-reference/assets/highlight.css +0 -71
  22. package/docs/api-reference/assets/icons.js +0 -18
  23. package/docs/api-reference/assets/icons.svg +0 -1
  24. package/docs/api-reference/assets/main.js +0 -60
  25. package/docs/api-reference/assets/navigation.js +0 -1
  26. package/docs/api-reference/assets/search.js +0 -1
  27. package/docs/api-reference/assets/style.css +0 -1633
  28. package/docs/api-reference/functions/index.buildEmbedText.html +0 -15
  29. package/docs/api-reference/functions/index.buildMcpServer.html +0 -4
  30. package/docs/api-reference/functions/index.formatReadyBanner.html +0 -4
  31. package/docs/api-reference/functions/index.main.html +0 -1
  32. package/docs/api-reference/functions/index.parsePositiveInt.html +0 -1
  33. package/docs/api-reference/functions/index.parseQuantizationMode.html +0 -5
  34. package/docs/api-reference/functions/index.prepareServerDeps.html +0 -5
  35. package/docs/api-reference/functions/index.startServer.html +0 -1
  36. package/docs/api-reference/functions/tools.appendToNote.html +0 -17
  37. package/docs/api-reference/functions/tools.archiveNote.html +0 -15
  38. package/docs/api-reference/functions/tools.assertHnswModelMatchesEmbedder.html +0 -13
  39. package/docs/api-reference/functions/tools.chatThreadAppend.html +0 -22
  40. package/docs/api-reference/functions/tools.chatThreadRead.html +0 -16
  41. package/docs/api-reference/functions/tools.contextPack.html +0 -21
  42. package/docs/api-reference/functions/tools.createNote.html +0 -19
  43. package/docs/api-reference/functions/tools.dataviewQuery.html +0 -16
  44. package/docs/api-reference/functions/tools.embeddingsSearch.html +0 -40
  45. package/docs/api-reference/functions/tools.findPath.html +0 -23
  46. package/docs/api-reference/functions/tools.findSimilar.html +0 -21
  47. package/docs/api-reference/functions/tools.frontmatterGet.html +0 -15
  48. package/docs/api-reference/functions/tools.frontmatterSearch.html +0 -16
  49. package/docs/api-reference/functions/tools.frontmatterSet.html +0 -19
  50. package/docs/api-reference/functions/tools.getBacklinks.html +0 -15
  51. package/docs/api-reference/functions/tools.getNoteNeighbors.html +0 -16
  52. package/docs/api-reference/functions/tools.getOpenQuestions.html +0 -19
  53. package/docs/api-reference/functions/tools.getOutboundLinks.html +0 -16
  54. package/docs/api-reference/functions/tools.getRecentEdits.html +0 -14
  55. package/docs/api-reference/functions/tools.getUnresolvedWikilinks.html +0 -14
  56. package/docs/api-reference/functions/tools.getVaultStats.html +0 -13
  57. package/docs/api-reference/functions/tools.lintWiki.html +0 -20
  58. package/docs/api-reference/functions/tools.listCanvases.html +0 -16
  59. package/docs/api-reference/functions/tools.listNotes.html +0 -19
  60. package/docs/api-reference/functions/tools.listPdfs.html +0 -15
  61. package/docs/api-reference/functions/tools.listTags.html +0 -14
  62. package/docs/api-reference/functions/tools.ocrPdf.html +0 -18
  63. package/docs/api-reference/functions/tools.openInUi.html +0 -17
  64. package/docs/api-reference/functions/tools.paperAudit.html +0 -16
  65. package/docs/api-reference/functions/tools.pickEmbedTextForHyde.html +0 -8
  66. package/docs/api-reference/functions/tools.readCanvas.html +0 -19
  67. package/docs/api-reference/functions/tools.readNote.html +0 -20
  68. package/docs/api-reference/functions/tools.readPdf.html +0 -18
  69. package/docs/api-reference/functions/tools.renameNote.html +0 -24
  70. package/docs/api-reference/functions/tools.replaceInNotes.html +0 -20
  71. package/docs/api-reference/functions/tools.resolveTarget.html +0 -24
  72. package/docs/api-reference/functions/tools.resolveWikilink.html +0 -20
  73. package/docs/api-reference/functions/tools.searchHybrid.html +0 -62
  74. package/docs/api-reference/functions/tools.searchText.html +0 -19
  75. package/docs/api-reference/functions/tools.semanticSearch.html +0 -19
  76. package/docs/api-reference/functions/tools.validateNoteProposal.html +0 -19
  77. package/docs/api-reference/hierarchy.html +0 -1
  78. package/docs/api-reference/index.html +0 -1
  79. package/docs/api-reference/interfaces/index.ServeOptions.html +0 -74
  80. package/docs/api-reference/interfaces/index.ServerDeps.html +0 -27
  81. package/docs/api-reference/interfaces/tool-manifest.ToolManifestEntry.html +0 -33
  82. package/docs/api-reference/interfaces/tools.ArchiveNoteArgs.html +0 -12
  83. package/docs/api-reference/interfaces/tools.BacklinkHit.html +0 -15
  84. package/docs/api-reference/interfaces/tools.CanvasEdge.html +0 -19
  85. package/docs/api-reference/interfaces/tools.CanvasSummary.html +0 -16
  86. package/docs/api-reference/interfaces/tools.ChatThreadAppendArgs.html +0 -10
  87. package/docs/api-reference/interfaces/tools.ChatThreadMessage.html +0 -14
  88. package/docs/api-reference/interfaces/tools.ChatThreadReadResult.html +0 -10
  89. package/docs/api-reference/interfaces/tools.ContextPackArgs.html +0 -12
  90. package/docs/api-reference/interfaces/tools.ContextPackResult.html +0 -20
  91. package/docs/api-reference/interfaces/tools.EmbedHit.html +0 -21
  92. package/docs/api-reference/interfaces/tools.EmbedSearchResponse.html +0 -14
  93. package/docs/api-reference/interfaces/tools.FindPathResult.html +0 -17
  94. package/docs/api-reference/interfaces/tools.FrontmatterSearchArgs.html +0 -20
  95. package/docs/api-reference/interfaces/tools.FrontmatterSetArgs.html +0 -13
  96. package/docs/api-reference/interfaces/tools.HnswSearchContext.html +0 -21
  97. package/docs/api-reference/interfaces/tools.LintWikiArgs.html +0 -14
  98. package/docs/api-reference/interfaces/tools.LintWikiFinding.html +0 -14
  99. package/docs/api-reference/interfaces/tools.LintWikiResult.html +0 -9
  100. package/docs/api-reference/interfaces/tools.NoteNeighbors.html +0 -17
  101. package/docs/api-reference/interfaces/tools.NoteReadFull.html +0 -20
  102. package/docs/api-reference/interfaces/tools.NoteReadMap.html +0 -25
  103. package/docs/api-reference/interfaces/tools.NoteSummary.html +0 -14
  104. package/docs/api-reference/interfaces/tools.OcrPdfArgs.html +0 -16
  105. package/docs/api-reference/interfaces/tools.OcrPdfPage.html +0 -15
  106. package/docs/api-reference/interfaces/tools.OcrPdfResult.html +0 -18
  107. package/docs/api-reference/interfaces/tools.OpenInUiResult.html +0 -11
  108. package/docs/api-reference/interfaces/tools.OpenQuestion.html +0 -20
  109. package/docs/api-reference/interfaces/tools.OutboundLink.html +0 -20
  110. package/docs/api-reference/interfaces/tools.PaperAuditFinding.html +0 -17
  111. package/docs/api-reference/interfaces/tools.PathStep.html +0 -9
  112. package/docs/api-reference/interfaces/tools.PdfSummary.html +0 -9
  113. package/docs/api-reference/interfaces/tools.ReadCanvasResult.html +0 -15
  114. package/docs/api-reference/interfaces/tools.ReadPdfArgs.html +0 -8
  115. package/docs/api-reference/interfaces/tools.ReadPdfPage.html +0 -13
  116. package/docs/api-reference/interfaces/tools.ReadPdfResult.html +0 -18
  117. package/docs/api-reference/interfaces/tools.RenameNoteResult.html +0 -14
  118. package/docs/api-reference/interfaces/tools.RenameProposal.html +0 -13
  119. package/docs/api-reference/interfaces/tools.ReplaceInNotesArgs.html +0 -15
  120. package/docs/api-reference/interfaces/tools.ReplaceInNotesFileResult.html +0 -6
  121. package/docs/api-reference/interfaces/tools.ReplaceInNotesResult.html +0 -21
  122. package/docs/api-reference/interfaces/tools.SearchHit.html +0 -16
  123. package/docs/api-reference/interfaces/tools.SearchHybridHit.html +0 -30
  124. package/docs/api-reference/interfaces/tools.SearchHybridResponse.html +0 -23
  125. package/docs/api-reference/interfaces/tools.SearchResponse.html +0 -13
  126. package/docs/api-reference/interfaces/tools.SemanticHit.html +0 -15
  127. package/docs/api-reference/interfaces/tools.SimilarNote.html +0 -15
  128. package/docs/api-reference/interfaces/tools.TagSummary.html +0 -13
  129. package/docs/api-reference/interfaces/tools.UnresolvedWikilink.html +0 -22
  130. package/docs/api-reference/interfaces/tools.ValidateProposalArgs.html +0 -10
  131. package/docs/api-reference/interfaces/tools.ValidateProposalResult.html +0 -14
  132. package/docs/api-reference/interfaces/tools.VaultStats.html +0 -26
  133. package/docs/api-reference/modules/index.html +0 -1
  134. package/docs/api-reference/modules/tool-manifest.html +0 -1
  135. package/docs/api-reference/modules/tools.html +0 -1
  136. package/docs/api-reference/types/tools.CanvasNode.html +0 -7
  137. package/docs/api-reference/types/tools.SearchMode.html +0 -7
  138. package/docs/api-reference/variables/index.VERSION.html +0 -9
  139. package/docs/api-reference/variables/tool-manifest.TOOL_MANIFEST.html +0 -1
package/dist/tools.js DELETED
@@ -1,3132 +0,0 @@
1
- import * as path from "node:path";
2
- import matter from "gray-matter";
3
- import { parseDql, runDql } from "./dql.js";
4
- import { resolvePeriodicNoteName } from "./periodic.js";
5
- export async function listNotes(vault, args) {
6
- await vault.ensureExists();
7
- const limit = args.limit ?? 50;
8
- const sinceMs = args.since_date ? Date.parse(args.since_date) : null;
9
- if (sinceMs !== null && Number.isNaN(sinceMs)) {
10
- throw new Error(`Invalid since_date: ${args.since_date}. Use ISO 8601 (YYYY-MM-DD).`);
11
- }
12
- const wantTag = args.tag ? normalizeTag(args.tag) : null;
13
- const entries = await vault.listMarkdown(args.folder);
14
- entries.sort((a, b) => b.mtimeMs - a.mtimeMs);
15
- const out = [];
16
- for (const e of entries) {
17
- if (sinceMs !== null && e.mtimeMs < sinceMs)
18
- continue;
19
- const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
20
- if (wantTag && !parsed.tags.some((t) => normalizeTag(t) === wantTag))
21
- continue;
22
- out.push({
23
- title: stripMd(e.basename),
24
- path: e.relPath,
25
- frontmatter: parsed.frontmatter,
26
- tags: parsed.tags,
27
- mtime: new Date(e.mtimeMs).toISOString()
28
- });
29
- if (out.length >= limit)
30
- break;
31
- }
32
- return out;
33
- }
34
- export async function readNote(vault, args) {
35
- await vault.ensureExists();
36
- const entry = await resolveTarget(vault, args);
37
- const { content, parsed, mtimeMs } = await vault.readNote(entry.absPath, entry.mtimeMs);
38
- if (args.format === "map") {
39
- // Document-map projection — headings + frontmatter keys + counts. Lets an
40
- // LLM plan a surgical edit without paying token cost for the full body.
41
- return {
42
- path: entry.relPath,
43
- title: stripMd(entry.basename),
44
- format: "map",
45
- frontmatter_keys: Object.keys(parsed.frontmatter),
46
- headings: extractHeadings(parsed.body),
47
- wikilinks_count: parsed.wikilinks.length,
48
- embeds_count: parsed.embeds.length,
49
- tags: parsed.tags,
50
- mtime: new Date(mtimeMs).toISOString(),
51
- byte_size: Buffer.byteLength(content, "utf8")
52
- };
53
- }
54
- return {
55
- path: entry.relPath,
56
- title: stripMd(entry.basename),
57
- content: parsed.body,
58
- frontmatter: parsed.frontmatter,
59
- wikilinks: parsed.wikilinks,
60
- embeds: parsed.embeds,
61
- tags: parsed.tags,
62
- mtime: new Date(mtimeMs).toISOString()
63
- };
64
- }
65
- /** Pull ATX headings (`#`, `##`, `###`, etc.) out of note body for the
66
- * document-map projection. Skips ATX inside fenced code blocks via a simple
67
- * line-by-line backtick toggle. */
68
- function extractHeadings(body) {
69
- const out = [];
70
- const lines = body.split("\n");
71
- let inFence = false;
72
- for (let i = 0; i < lines.length; i++) {
73
- const line = lines[i] ?? "";
74
- if (/^\s*```/.test(line)) {
75
- inFence = !inFence;
76
- continue;
77
- }
78
- if (inFence)
79
- continue;
80
- const m = /^(#{1,6})\s+(.+?)\s*#*\s*$/.exec(line);
81
- if (m?.[1] && m[2]) {
82
- out.push({ level: m[1].length, text: m[2], line: i + 1 });
83
- }
84
- }
85
- return out;
86
- }
87
- export async function resolveWikilink(vault, args) {
88
- await vault.ensureExists();
89
- const cleaned = args.wikilink.replace(/^!?\[\[|\]\]$/g, "");
90
- const aliasIdx = cleaned.indexOf("|");
91
- const alias = aliasIdx === -1 ? null : cleaned.slice(aliasIdx + 1).trim();
92
- let rest = aliasIdx === -1 ? cleaned : cleaned.slice(0, aliasIdx);
93
- const blockIdx = rest.indexOf("^");
94
- const block = blockIdx === -1 ? null : rest.slice(blockIdx + 1).trim();
95
- rest = blockIdx === -1 ? rest : rest.slice(0, blockIdx);
96
- const hashIdx = rest.indexOf("#");
97
- const section = hashIdx === -1 ? null : rest.slice(hashIdx + 1).trim();
98
- const target = (hashIdx === -1 ? rest : rest.slice(0, hashIdx)).trim();
99
- if (!target) {
100
- return { found: false, path: null, title: null, content: null, section, block, alias };
101
- }
102
- const all = await vault.listMarkdown();
103
- const match = findBestMatch(all, target, args.from_note);
104
- if (!match) {
105
- return { found: false, path: null, title: null, content: null, section, block, alias };
106
- }
107
- let body = null;
108
- if (args.include_content !== false) {
109
- const { parsed } = await vault.readNote(match.absPath, match.mtimeMs);
110
- body = parsed.body;
111
- }
112
- return {
113
- found: true,
114
- path: match.relPath,
115
- title: stripMd(match.basename),
116
- content: body,
117
- section,
118
- block,
119
- alias
120
- };
121
- }
122
- export async function searchText(vault, args) {
123
- await vault.ensureExists();
124
- const limit = args.limit ?? 25;
125
- const mode = args.mode ?? "all";
126
- const q = args.query;
127
- if (!q.trim())
128
- throw new Error("query must not be empty");
129
- // Tokenize on whitespace for "all" / "any". Phrase mode keeps the raw query.
130
- const tokens = mode === "phrase" ? [q] : q.trim().split(/\s+/);
131
- const lowerTokens = tokens.map((t) => t.toLowerCase());
132
- const entries = await vault.listMarkdown(args.folder);
133
- // Parallel file reads — was sequential, slow on large vaults. Chunk to
134
- // bound concurrency (avoid blowing the open-fd limit on huge vaults).
135
- const CHUNK = 16;
136
- const matches = [];
137
- for (let i = 0; i < entries.length; i += CHUNK) {
138
- const chunk = entries.slice(i, i + CHUNK);
139
- const results = await Promise.all(chunk.map(async (e) => {
140
- const { content } = await vault.readNote(e.absPath, e.mtimeMs);
141
- const lower = content.toLowerCase();
142
- let totalScore = 0;
143
- let firstHit = -1;
144
- let firstHitLen = 0;
145
- const matched = [];
146
- for (let t = 0; t < lowerTokens.length; t++) {
147
- const lowerT = lowerTokens[t];
148
- if (lowerT === undefined || lowerT === "")
149
- continue;
150
- let tokenScore = 0;
151
- let from = 0;
152
- while (true) {
153
- const idx = lower.indexOf(lowerT, from);
154
- if (idx === -1)
155
- break;
156
- tokenScore += 1;
157
- if (firstHit === -1 || idx < firstHit) {
158
- firstHit = idx;
159
- firstHitLen = lowerT.length;
160
- }
161
- from = idx + lowerT.length;
162
- }
163
- if (tokenScore > 0) {
164
- totalScore += tokenScore;
165
- matched.push(tokens[t] ?? lowerT);
166
- }
167
- }
168
- // Mode policy: "all" requires every token to match; "any" requires at
169
- // least one; "phrase" requires the raw query (single token).
170
- if (mode === "all" && matched.length !== lowerTokens.filter(Boolean).length)
171
- return null;
172
- if (totalScore === 0)
173
- return null;
174
- const { snippet, line } = sliceSnippet(content, firstHit, firstHitLen);
175
- const hit = {
176
- path: e.relPath,
177
- snippet,
178
- score: totalScore,
179
- line,
180
- matched_terms: matched
181
- };
182
- return hit;
183
- }));
184
- for (const r of results)
185
- if (r)
186
- matches.push(r);
187
- }
188
- matches.sort((a, b) => b.score - a.score);
189
- return {
190
- query: q,
191
- mode,
192
- scanned_notes: entries.length,
193
- matches: matches.slice(0, limit)
194
- };
195
- }
196
- export async function getRecentEdits(vault, args) {
197
- await vault.ensureExists();
198
- const limit = args.limit ?? 20;
199
- const sinceMs = args.since_minutes !== undefined ? Date.now() - args.since_minutes * 60_000 : null;
200
- const entries = await vault.listMarkdown(args.folder);
201
- entries.sort((a, b) => b.mtimeMs - a.mtimeMs);
202
- const out = [];
203
- for (const e of entries) {
204
- if (sinceMs !== null && e.mtimeMs < sinceMs)
205
- break;
206
- const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
207
- out.push({
208
- title: stripMd(e.basename),
209
- path: e.relPath,
210
- frontmatter: parsed.frontmatter,
211
- tags: parsed.tags,
212
- mtime: new Date(e.mtimeMs).toISOString()
213
- });
214
- if (out.length >= limit)
215
- break;
216
- }
217
- return out;
218
- }
219
- export async function getBacklinks(vault, args) {
220
- await vault.ensureExists();
221
- const limit = args.limit ?? 50;
222
- const includeEmbeds = args.include_embeds !== false;
223
- const target = await resolveTarget(vault, args);
224
- const targetAbs = target.absPath;
225
- const all = await vault.listMarkdown();
226
- const hits = [];
227
- for (const e of all) {
228
- if (e.absPath === targetAbs)
229
- continue;
230
- const { content, parsed } = await vault.readNote(e.absPath, e.mtimeMs);
231
- const linkBag = [
232
- ...parsed.wikilinks.map((l) => ({ link: l, kind: "wikilink" })),
233
- ...(includeEmbeds ? parsed.embeds.map((l) => ({ link: l, kind: "embed" })) : [])
234
- ];
235
- if (!linkBag.length)
236
- continue;
237
- let count = 0;
238
- const kindFlags = { wikilink: false, embed: false };
239
- const snippets = [];
240
- for (const { link, kind } of linkBag) {
241
- const match = findBestMatch(all, link.target, e.relPath);
242
- if (!match || match.absPath !== targetAbs)
243
- continue;
244
- count += 1;
245
- kindFlags[kind] = true;
246
- if (snippets.length < 2) {
247
- const literal = `${(kind === "embed" ? "![[" : "[[") + link.raw}]]`;
248
- const idx = content.indexOf(literal);
249
- const { snippet } = sliceSnippet(content, idx, literal.length);
250
- if (snippet)
251
- snippets.push(snippet);
252
- }
253
- }
254
- if (count === 0)
255
- continue;
256
- hits.push({
257
- path: e.relPath,
258
- title: stripMd(e.basename),
259
- count,
260
- snippets,
261
- link_kind: kindFlags.wikilink && kindFlags.embed ? "mixed" : kindFlags.embed ? "embed" : "wikilink"
262
- });
263
- }
264
- hits.sort((a, b) => b.count - a.count);
265
- return hits.slice(0, limit);
266
- }
267
- export async function dataviewQuery(vault, args) {
268
- await vault.ensureExists();
269
- const parsed = parseDql(args.query);
270
- const rows = await runDql(vault, parsed);
271
- return { query: args.query, rows };
272
- }
273
- export async function getUnresolvedWikilinks(vault, args) {
274
- await vault.ensureExists();
275
- const limit = args.limit ?? 200;
276
- const includeEmbeds = args.include_embeds !== false;
277
- const entries = await vault.listMarkdown(args.folder);
278
- const all = await vault.listMarkdown();
279
- const out = [];
280
- for (const e of entries) {
281
- if (out.length >= limit)
282
- break;
283
- const { content, parsed } = await vault.readNote(e.absPath, e.mtimeMs);
284
- const candidates = [
285
- ...parsed.wikilinks.map((l) => ({ link: l, kind: "wikilink" })),
286
- ...(includeEmbeds ? parsed.embeds.map((l) => ({ link: l, kind: "embed" })) : [])
287
- ];
288
- for (const { link, kind } of candidates) {
289
- if (out.length >= limit)
290
- break;
291
- if (!link.target)
292
- continue;
293
- const match = findBestMatch(all, link.target, e.relPath);
294
- if (match)
295
- continue;
296
- const literal = `${(kind === "embed" ? "![[" : "[[") + link.raw}]]`;
297
- const idx = content.indexOf(literal);
298
- const { snippet, line } = sliceSnippet(content, idx, literal.length);
299
- out.push({
300
- from_path: e.relPath,
301
- target: link.target,
302
- raw: link.raw,
303
- kind,
304
- alias: link.alias ?? null,
305
- section: link.section ?? null,
306
- block: link.block ?? null,
307
- line,
308
- snippet
309
- });
310
- }
311
- }
312
- return out;
313
- }
314
- export async function getOutboundLinks(vault, args) {
315
- await vault.ensureExists();
316
- const includeEmbeds = args.include_embeds !== false;
317
- const includeUnresolved = args.include_unresolved !== false;
318
- const entry = await resolveTarget(vault, args);
319
- const { parsed } = await vault.readNote(entry.absPath, entry.mtimeMs);
320
- const all = await vault.listMarkdown();
321
- const candidates = [
322
- ...parsed.wikilinks.map((l) => ({ link: l, kind: "wikilink" })),
323
- ...(includeEmbeds ? parsed.embeds.map((l) => ({ link: l, kind: "embed" })) : [])
324
- ];
325
- const links = [];
326
- for (const { link, kind } of candidates) {
327
- const match = findBestMatch(all, link.target, entry.relPath);
328
- if (!match && !includeUnresolved)
329
- continue;
330
- links.push({
331
- raw: link.raw,
332
- target: link.target,
333
- kind,
334
- alias: link.alias ?? null,
335
- section: link.section ?? null,
336
- block: link.block ?? null,
337
- resolved_path: match ? match.relPath : null,
338
- resolved_title: match ? stripMd(match.basename) : null
339
- });
340
- }
341
- return {
342
- from_path: entry.relPath,
343
- from_title: stripMd(entry.basename),
344
- links
345
- };
346
- }
347
- export async function listTags(vault, args) {
348
- await vault.ensureExists();
349
- const limit = args.limit ?? 200;
350
- const minCount = args.min_count ?? 1;
351
- const entries = await vault.listMarkdown(args.folder);
352
- const counts = new Map();
353
- for (const e of entries) {
354
- const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
355
- const fmSet = new Set(extractFrontmatterTagsLower(parsed.frontmatter));
356
- for (const t of parsed.tags) {
357
- const key = t.toLowerCase();
358
- const slot = counts.get(key) ?? { count: 0, fm: 0, inline: 0 };
359
- slot.count += 1;
360
- if (fmSet.has(key))
361
- slot.fm += 1;
362
- else
363
- slot.inline += 1;
364
- counts.set(key, slot);
365
- }
366
- }
367
- const out = [];
368
- for (const [tag, slot] of counts) {
369
- if (slot.count < minCount)
370
- continue;
371
- out.push({ tag, count: slot.count, frontmatter_count: slot.fm, inline_count: slot.inline });
372
- }
373
- out.sort((a, b) => b.count - a.count || a.tag.localeCompare(b.tag));
374
- return out.slice(0, limit);
375
- }
376
- export async function createNote(vault, args) {
377
- await vault.ensureExists();
378
- const body = composeNote(args.frontmatter, args.content);
379
- const result = await vault.writeNote(args.path, body, { overwrite: args.overwrite });
380
- return {
381
- path: result.relPath,
382
- mtime: new Date(result.mtimeMs).toISOString(),
383
- bytes: result.bytes
384
- };
385
- }
386
- export async function appendToNote(vault, args) {
387
- await vault.ensureExists();
388
- const target = await resolveTarget(vault, args);
389
- const sep = args.separator ?? "\n\n";
390
- const result = await vault.appendNote(target.absPath, sep + args.content);
391
- return {
392
- path: result.relPath,
393
- mtime: new Date(result.mtimeMs).toISOString(),
394
- appended_bytes: result.appended_bytes
395
- };
396
- }
397
- export async function renameNote(vault, args) {
398
- await vault.ensureExists();
399
- const dryRun = args.dry_run === true;
400
- const fromRelNorm = args.from.toLowerCase().endsWith(".md") ? args.from : `${args.from}.md`;
401
- const toRelNorm = args.to.toLowerCase().endsWith(".md") ? args.to : `${args.to}.md`;
402
- // Resolve from (must exist) — vault.stat() rejects traversal + excluded paths
403
- // and confirms the file is real. resolveInside() is the public wrapper for
404
- // the same path-normalization logic without an existence check.
405
- const fromAbs = vault.resolveInside(fromRelNorm);
406
- const fromRel = vault.toRel(fromAbs);
407
- await vault.stat(fromAbs); // throws on missing source — fail fast.
408
- // Validate to-path early so we don't do O(N) work then fail.
409
- const toAbsCheck = vault.resolveInside(toRelNorm);
410
- const toRelCheck = vault.toRel(toAbsCheck);
411
- const renameReason = vault.exclusionReason(toRelCheck);
412
- if (renameReason) {
413
- // v2.0.0-beta.2 P1 fix: distinguish allowlist-vs-denylist same as
414
- // writeNote and Vault.renameFile do. Pre-fix the message always blamed
415
- // --exclude-glob even when --read-paths was the reason.
416
- throw new Error(`Refusing to rename — destination is excluded by ${renameReason}: ${toRelCheck}`);
417
- }
418
- if (fromRel === toRelCheck) {
419
- throw new Error(`from and to are the same path: ${fromRel}`);
420
- }
421
- if (!args.overwrite) {
422
- const exists = await vault
423
- .stat(toAbsCheck)
424
- .then(() => true)
425
- .catch(() => false);
426
- if (exists) {
427
- throw new Error(`Destination already exists: ${toRelCheck} (pass overwrite=true to replace)`);
428
- }
429
- }
430
- const newBasename = stripMd(path.basename(toRelNorm));
431
- const newDir = path.dirname(toRelNorm).replace(/\\/g, "/");
432
- const entries = await vault.listMarkdown();
433
- // Build the rewrite plan. INCLUDES the source file itself so that any
434
- // self-references (e.g. `[[Foo]]` inside `Foo.md`) are also rewritten —
435
- // otherwise the renamed file would ship with a broken self-link. The source
436
- // is rewritten in place at the OLD path; fs.rename then carries the new
437
- // content to the new path in one atomic step.
438
- const plan = [];
439
- let totalRewrites = 0;
440
- let sourcePlan = null;
441
- for (const e of entries) {
442
- const isSource = e.absPath === fromAbs;
443
- const { content, parsed } = await vault.readNote(e.absPath, e.mtimeMs);
444
- // Find every wikilink + embed whose target resolves to fromAbs. Group by
445
- // raw inner text — multiple identical literals in the same file rewrite
446
- // together.
447
- const oldRawsToNew = new Map();
448
- const candidates = [
449
- ...parsed.wikilinks.map((l) => ({ raw: l.raw, target: l.target, kind: "wikilink" })),
450
- ...parsed.embeds.map((l) => ({ raw: l.raw, target: l.target, kind: "embed" }))
451
- ];
452
- for (const c of candidates) {
453
- if (oldRawsToNew.has(c.raw))
454
- continue; // already mapped
455
- const m = findBestMatch(entries, c.target, e.relPath);
456
- if (!m || m.absPath !== fromAbs)
457
- continue;
458
- const newRaw = rewriteRawTarget(c.raw, c.target, newBasename, newDir);
459
- if (newRaw === c.raw)
460
- continue; // already correct (e.g., basename happened to match)
461
- oldRawsToNew.set(c.raw, { kind: c.kind, newRaw });
462
- }
463
- if (oldRawsToNew.size === 0)
464
- continue;
465
- // Apply the replacements with a code-fence-aware line walker so wikilinks
466
- // inside ``` / ~~~ blocks (which the parser ignores) stay verbatim.
467
- const { content: newContent, count } = rewriteOutsideCodeFences(content, oldRawsToNew);
468
- if (count === 0)
469
- continue;
470
- const proposal = { path: e.relPath, rewrites: count, before: content, after: newContent };
471
- if (isSource) {
472
- // The source file's rewrite is held separately so we can write it last,
473
- // immediately before fs.rename, keeping the disk in a maximally-recoverable
474
- // state if anything between writes fails.
475
- sourcePlan = proposal;
476
- }
477
- else {
478
- plan.push(proposal);
479
- }
480
- totalRewrites += count;
481
- }
482
- if (!dryRun) {
483
- // Write order:
484
- // 1. All backlink-bearing files (other notes pointing at the source).
485
- // 2. Source file's rewritten content, written to its OLD path.
486
- // 3. fs.rename source's old path → new path.
487
- // A failure at any step leaves backlinks pointing at the still-present old
488
- // name (worst case: safe, recoverable).
489
- for (const p of plan) {
490
- await vault.writeNote(p.path, p.after, { overwrite: true });
491
- }
492
- if (sourcePlan) {
493
- await vault.writeNote(sourcePlan.path, sourcePlan.after, { overwrite: true });
494
- }
495
- // Atomic file move + cache invalidation.
496
- await vault.renameFile(fromRelNorm, toRelNorm, { overwrite: args.overwrite });
497
- }
498
- // Combine plans for the response so the caller sees the full picture.
499
- const allPlans = sourcePlan ? [...plan, sourcePlan] : plan;
500
- // Strip `before`/`after` from the response — the caller doesn't need the
501
- // full file contents back, just the per-file count. We kept them for the
502
- // pre-write loop; the response trims them. The source-file entry uses its
503
- // POST-rename path so the caller sees where the rewrite ended up.
504
- const trimmedPlan = allPlans.map((p) => ({
505
- path: p === sourcePlan ? toRelCheck : p.path,
506
- rewrites: p.rewrites,
507
- before: "",
508
- after: ""
509
- }));
510
- return {
511
- from: fromRel,
512
- to: toRelCheck,
513
- dry_run: dryRun,
514
- files_updated: trimmedPlan,
515
- total_links_rewritten: totalRewrites
516
- };
517
- }
518
- const CHAT_HEADING_RE = /^### (user|assistant|system) · (.+?)\s*$/;
519
- // Multi-line flag: `## Chat:` heading can appear anywhere in the body, not
520
- // only at string start. The append codepath uses .test(body); the read
521
- // codepath uses .exec(line) per-line so the flag is harmless there.
522
- const CHAT_THREAD_TITLE_RE = /^## Chat: (.+?)\s*$/m;
523
- /** Append a message to a note's chat thread. Creates the note (and the
524
- * `## Chat: <title>` heading) if absent. Idempotent in the sense that
525
- * appending always creates a fresh `### <role> · <timestamp>` block — no
526
- * silent overwrites. */
527
- export async function chatThreadAppend(vault, args) {
528
- await vault.ensureExists();
529
- if (!args.note_path?.trim())
530
- throw new Error("chat_thread_append: `note_path` is required");
531
- if (!args.content?.trim())
532
- throw new Error("chat_thread_append: `content` is required");
533
- const role = args.role;
534
- if (role !== "user" && role !== "assistant" && role !== "system") {
535
- throw new Error(`chat_thread_append: invalid role "${role}" (must be user|assistant|system)`);
536
- }
537
- const targetRel = args.note_path.toLowerCase().endsWith(".md") ? args.note_path : `${args.note_path}.md`;
538
- const abs = vault.resolveInside(targetRel);
539
- const timestamp = new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
540
- const messageBlock = `\n### ${role} · ${timestamp}\n\n${args.content.trim()}\n`;
541
- // Read existing or create new with thread heading.
542
- let existed = true;
543
- let body = "";
544
- try {
545
- body = await vault.readFile(abs);
546
- }
547
- catch {
548
- existed = false;
549
- }
550
- let toAppend;
551
- if (existed && CHAT_THREAD_TITLE_RE.test(body)) {
552
- // Existing thread — just append message.
553
- toAppend = messageBlock;
554
- }
555
- else if (existed) {
556
- // Existing note without a chat heading — add heading first.
557
- const title = args.thread_title?.trim() || `chat — ${timestamp.slice(0, 10)}`;
558
- toAppend = `\n\n## Chat: ${title}\n${messageBlock}`;
559
- }
560
- else {
561
- // New note from scratch.
562
- const title = args.thread_title?.trim() || `chat — ${timestamp.slice(0, 10)}`;
563
- const initial = `# ${title}\n\n## Chat: ${title}\n${messageBlock}`;
564
- const result = await vault.writeNote(targetRel, initial, { overwrite: false });
565
- return {
566
- note_path: result.relPath,
567
- line_start: 4,
568
- line_end: 4 + messageBlock.split("\n").length
569
- };
570
- }
571
- const before = body.length;
572
- const newBody = body.replace(/\n+$/, "") + toAppend;
573
- await vault.writeNote(targetRel, newBody, { overwrite: true });
574
- const lineStart = (body.slice(0, before).match(/\n/g) ?? []).length + 1;
575
- return {
576
- note_path: vault.toRel(abs),
577
- line_start: lineStart,
578
- line_end: lineStart + toAppend.split("\n").length
579
- };
580
- }
581
- /** Parse a note's chat thread into structured messages. Non-chat content
582
- * (anything outside the `## Chat: <title>` block) is ignored. */
583
- export async function chatThreadRead(vault, args) {
584
- await vault.ensureExists();
585
- const targetRel = args.note_path.toLowerCase().endsWith(".md") ? args.note_path : `${args.note_path}.md`;
586
- const abs = vault.resolveInside(targetRel);
587
- const body = await vault.readFile(abs);
588
- const lines = body.split("\n");
589
- let threadTitle = null;
590
- let inThread = false;
591
- const messages = [];
592
- let current = null;
593
- for (let i = 0; i < lines.length; i++) {
594
- const ln = lines[i] ?? "";
595
- const titleMatch = CHAT_THREAD_TITLE_RE.exec(ln);
596
- if (titleMatch) {
597
- if (current) {
598
- messages.push({
599
- role: current.role,
600
- timestamp: current.timestamp,
601
- content: current.lines.join("\n").trim(),
602
- line_start: current.line_start,
603
- line_end: i
604
- });
605
- current = null;
606
- }
607
- threadTitle = (titleMatch[1] ?? "").trim();
608
- inThread = true;
609
- continue;
610
- }
611
- if (!inThread)
612
- continue;
613
- // Higher-level heading or a different `## Chat:` block ends the thread.
614
- if (/^# /.test(ln) || (/^## /.test(ln) && !CHAT_THREAD_TITLE_RE.test(ln))) {
615
- if (current) {
616
- messages.push({
617
- role: current.role,
618
- timestamp: current.timestamp,
619
- content: current.lines.join("\n").trim(),
620
- line_start: current.line_start,
621
- line_end: i
622
- });
623
- current = null;
624
- }
625
- inThread = false;
626
- continue;
627
- }
628
- const headingMatch = CHAT_HEADING_RE.exec(ln);
629
- if (headingMatch?.[1] && headingMatch[2]) {
630
- if (current) {
631
- messages.push({
632
- role: current.role,
633
- timestamp: current.timestamp,
634
- content: current.lines.join("\n").trim(),
635
- line_start: current.line_start,
636
- line_end: i
637
- });
638
- }
639
- current = {
640
- role: headingMatch[1],
641
- timestamp: headingMatch[2].trim(),
642
- line_start: i + 1,
643
- lines: []
644
- };
645
- continue;
646
- }
647
- if (current)
648
- current.lines.push(ln);
649
- }
650
- if (current) {
651
- messages.push({
652
- role: current.role,
653
- timestamp: current.timestamp,
654
- content: current.lines.join("\n").trim(),
655
- line_start: current.line_start,
656
- line_end: lines.length
657
- });
658
- }
659
- return {
660
- note_path: vault.toRel(abs),
661
- thread_title: threadTitle,
662
- messages,
663
- message_count: messages.length
664
- };
665
- }
666
- // ─── obsidian_frontmatter_{get,set,search} (v2.3.0 — atomic YAML ops) ──────
667
- // Surgical YAML manipulation. Pre-fix, agents wanting to set `status:
668
- // published` on 12 notes had to find/replace text — error-prone (multi-line
669
- // strings, special chars, key-collision). Now: parse via gray-matter, edit,
670
- // rewrite. Code-fence-aware via gray-matter (frontmatter is delimited
671
- // strictly by leading `---`, so no fence ambiguity).
672
- //
673
- // _get is read-only; _set + _delete are write-gated.
674
- export async function frontmatterGet(vault, args) {
675
- await vault.ensureExists();
676
- const target = await resolveTarget(vault, args);
677
- const note = await vault.readNote(target.absPath, target.mtimeMs);
678
- if (args.key) {
679
- return {
680
- path: target.relPath,
681
- frontmatter: note.parsed.frontmatter,
682
- value: note.parsed.frontmatter[args.key]
683
- };
684
- }
685
- return { path: target.relPath, frontmatter: note.parsed.frontmatter };
686
- }
687
- export async function frontmatterSet(vault, args) {
688
- await vault.ensureExists();
689
- if (!args.set || Object.keys(args.set).length === 0) {
690
- throw new Error("frontmatter_set: `set` must be a non-empty object");
691
- }
692
- const target = await resolveTarget(vault, args);
693
- const note = await vault.readNote(target.absPath, target.mtimeMs);
694
- const before = { ...note.parsed.frontmatter };
695
- const after = { ...before };
696
- const changed = [];
697
- for (const [k, v] of Object.entries(args.set)) {
698
- if (v === null) {
699
- if (k in after) {
700
- delete after[k];
701
- changed.push(`-${k}`);
702
- }
703
- }
704
- else {
705
- const prev = after[k];
706
- if (JSON.stringify(prev) !== JSON.stringify(v)) {
707
- after[k] = v;
708
- changed.push(`${k in before ? "~" : "+"}${k}`);
709
- }
710
- }
711
- }
712
- if (changed.length === 0 || args.dry_run === true) {
713
- return { path: target.relPath, changed_keys: changed, before, after, dry_run: args.dry_run === true };
714
- }
715
- // Round-trip via gray-matter — same writer pattern as createNote.
716
- const newDoc = matter.stringify(note.parsed.body, after);
717
- await vault.writeNote(target.relPath, newDoc, { overwrite: true });
718
- return { path: target.relPath, changed_keys: changed, before, after, dry_run: false };
719
- }
720
- export async function frontmatterSearch(vault, args) {
721
- await vault.ensureExists();
722
- if (!args.key)
723
- throw new Error("frontmatter_search: `key` is required");
724
- const predicates = [args.equals !== undefined, args.exists !== undefined, args.contains !== undefined].filter(Boolean);
725
- if (predicates.length !== 1) {
726
- throw new Error("frontmatter_search: exactly one of `equals` / `exists` / `contains` must be set");
727
- }
728
- const limit = args.limit ?? 100;
729
- const entries = await vault.listMarkdown(args.folder);
730
- const matches = [];
731
- for (const e of entries) {
732
- if (matches.length >= limit)
733
- break;
734
- try {
735
- const note = await vault.readNote(e.absPath, e.mtimeMs);
736
- const value = note.parsed.frontmatter[args.key];
737
- let hit = false;
738
- if (args.exists === true)
739
- hit = value !== undefined;
740
- else if (args.equals !== undefined)
741
- hit = JSON.stringify(value) === JSON.stringify(args.equals);
742
- else if (args.contains !== undefined) {
743
- if (Array.isArray(value)) {
744
- hit = value.some((v) => JSON.stringify(v) === JSON.stringify(args.contains));
745
- }
746
- }
747
- if (hit) {
748
- matches.push({ path: e.relPath, value, mtime: new Date(e.mtimeMs).toISOString() });
749
- }
750
- }
751
- catch {
752
- // skip unparseable notes
753
- }
754
- }
755
- return { key: args.key, total_matches: matches.length, matches };
756
- }
757
- export async function archiveNote(vault, args) {
758
- await vault.ensureExists();
759
- if (!args.path)
760
- throw new Error("archive_note: `path` is required");
761
- const folder = (args.archive_folder ?? "Archive").replace(/\/+$/, "");
762
- // Strip leading folders from the source so the basename lands cleanly in
763
- // the archive — e.g. `Inbox/Foo.md` → `Archive/Foo.md`, not
764
- // `Archive/Inbox/Foo.md`. Preserves the user's `.md` extension or appends
765
- // it if missing (renameNote handles that anyway).
766
- const basename = path.basename(args.path);
767
- const renameArgs = {
768
- from: args.path,
769
- to: `${folder}/${basename}`
770
- };
771
- if (args.dry_run !== undefined)
772
- renameArgs.dry_run = args.dry_run;
773
- if (args.overwrite !== undefined)
774
- renameArgs.overwrite = args.overwrite;
775
- return renameNote(vault, renameArgs);
776
- }
777
- export async function replaceInNotes(vault, args) {
778
- await vault.ensureExists();
779
- const dryRun = args.dry_run === true;
780
- const caseSensitive = args.case_sensitive !== false;
781
- if (!args.search) {
782
- throw new Error("replace_in_notes: `search` must be a non-empty string");
783
- }
784
- if (args.search === args.replace) {
785
- throw new Error("replace_in_notes: `search` and `replace` are identical — no-op refused");
786
- }
787
- // v2.0.0-beta.2 P2 fix: reject early if `args.folder` itself is excluded.
788
- // Pre-fix, listMarkdown(excludedFolder) returned [] and the response said
789
- // "scope: 02_Personal/, files_scanned: 0" — confirming the folder name
790
- // existed in the user's vault layout. Now we refuse, returning a clean
791
- // error that doesn't reveal whether the folder is real-but-empty,
792
- // real-but-excluded, or nonexistent.
793
- // Test both `<folder>` (folder itself excluded) and `<folder>/_probe.md`
794
- // (a representative path inside) — the user's glob may use `**` which
795
- // matches subpaths but not the bare folder name.
796
- if (args.folder) {
797
- const folderTrim = args.folder.replace(/\/+$/, "");
798
- if (vault.isExcluded(folderTrim) || vault.isExcluded(`${folderTrim}/_probe.md`)) {
799
- throw new Error(`replace_in_notes: folder is excluded by privacy filter: ${args.folder}`);
800
- }
801
- }
802
- const entries = await vault.listMarkdown(args.folder);
803
- const plan = [];
804
- let total = 0;
805
- for (const e of entries) {
806
- const { content } = await vault.readNote(e.absPath, e.mtimeMs);
807
- const { content: rewritten, count } = replaceStringOutsideCodeFences(content, args.search, args.replace, caseSensitive);
808
- if (count === 0)
809
- continue;
810
- plan.push({ path: e.relPath, before: content, after: rewritten, count });
811
- total += count;
812
- }
813
- // v2.0.0-beta.2 P1 fix: per-file error collection on apply. Pre-fix, a
814
- // throw on file 5 of 20 would lose the response — files 1-4 silently
815
- // committed, agent had no way to discover which. Now we continue past
816
- // failures, collect errors, and return both `files_updated` (committed)
817
- // and `errors` (uncommitted) with `partial: true` flag.
818
- //
819
- // Systemic-error fast-path: if the vault is read-only OR the first write
820
- // fails synchronously (e.g. all paths excluded by --read-paths), throw
821
- // immediately rather than returning a "partial: true" with N errors —
822
- // that's a config problem, not a per-file failure.
823
- const updated = [];
824
- const errors = [];
825
- if (!dryRun) {
826
- if (!vault.writeEnabled) {
827
- throw new Error("Vault is read-only — start the server with --enable-write to allow note creation");
828
- }
829
- for (const p of plan) {
830
- try {
831
- await vault.writeNote(p.path, p.after, { overwrite: true });
832
- updated.push({ path: p.path, occurrences: p.count });
833
- }
834
- catch (err) {
835
- errors.push({ path: p.path, message: err instanceof Error ? err.message : String(err) });
836
- }
837
- }
838
- }
839
- else {
840
- for (const p of plan)
841
- updated.push({ path: p.path, occurrences: p.count });
842
- }
843
- const result = {
844
- search: args.search,
845
- replace: args.replace,
846
- case_sensitive: caseSensitive,
847
- dry_run: dryRun,
848
- scope: args.folder ?? "(whole vault)",
849
- files_scanned: entries.length,
850
- files_updated: updated,
851
- total_replacements: total,
852
- partial: errors.length > 0
853
- };
854
- if (errors.length > 0)
855
- result.errors = errors;
856
- return result;
857
- }
858
- /** Given the raw inner text of a wikilink (`Foo|alias`, `Folder/Foo#sec`, etc.)
859
- * and the resolved target string the parser already extracted, produce the new
860
- * raw text after the file has been renamed. Preserves alias/section/block and
861
- * the user's chosen path-qualification convention (bare-basename vs path). */
862
- function rewriteRawTarget(raw, oldTarget, newBasename, newDir) {
863
- const wasPathQualified = oldTarget.includes("/");
864
- const newTargetBare = wasPathQualified
865
- ? newDir === "." || newDir === ""
866
- ? newBasename
867
- : `${newDir}/${newBasename}`
868
- : newBasename;
869
- // The raw text is `<target><suffix>` where suffix starts with the first of
870
- // |, #, or ^. Find the boundary.
871
- const pipeIdx = raw.indexOf("|");
872
- const hashIdx = raw.indexOf("#");
873
- const blockIdx = raw.indexOf("^");
874
- const idxs = [pipeIdx, hashIdx, blockIdx].filter((i) => i !== -1);
875
- const suffixStart = idxs.length === 0 ? raw.length : Math.min(...idxs);
876
- const suffix = raw.slice(suffixStart);
877
- return `${newTargetBare}${suffix}`;
878
- }
879
- /** Walk file content line by line. Toggle `inFence` at any line that opens or
880
- * closes a ``` or ~~~ fence. Inside a fence, leave content untouched. Outside,
881
- * replace each old literal with its new literal. Returns { content, count }
882
- * where count is the total number of literal replacements applied. */
883
- function rewriteOutsideCodeFences(content, oldRawsToNew) {
884
- const lines = content.split("\n");
885
- let inFence = false;
886
- let count = 0;
887
- const out = [];
888
- for (const line of lines) {
889
- if (/^\s*(```|~~~)/.test(line)) {
890
- inFence = !inFence;
891
- out.push(line);
892
- continue;
893
- }
894
- if (inFence) {
895
- out.push(line);
896
- continue;
897
- }
898
- let mutated = line;
899
- for (const [oldRaw, { kind, newRaw }] of oldRawsToNew) {
900
- const oldLit = `${kind === "embed" ? "![[" : "[["}${oldRaw}]]`;
901
- const newLit = `${kind === "embed" ? "![[" : "[["}${newRaw}]]`;
902
- if (oldLit === newLit)
903
- continue;
904
- // Use indexOf-based replacement so we count occurrences accurately.
905
- let idx = mutated.indexOf(oldLit);
906
- while (idx !== -1) {
907
- mutated = mutated.slice(0, idx) + newLit + mutated.slice(idx + oldLit.length);
908
- count += 1;
909
- idx = mutated.indexOf(oldLit, idx + newLit.length);
910
- }
911
- }
912
- out.push(mutated);
913
- }
914
- return { content: out.join("\n"), count };
915
- }
916
- /** Generic code-fence-aware string replacer used by replaceInNotes (v1.9).
917
- * Walks line-by-line, tracks ` ``` ` / `~~~` fences, and replaces every
918
- * occurrence of `search` with `replace` outside fenced blocks. Case-sensitive
919
- * by default; pass `caseSensitive: false` for case-insensitive substring
920
- * match. Returns the rewritten content + replacement count. */
921
- function replaceStringOutsideCodeFences(content, search, replace, caseSensitive) {
922
- if (!search)
923
- return { content, count: 0 };
924
- const lines = content.split("\n");
925
- let inFence = false;
926
- let count = 0;
927
- const out = [];
928
- const needle = caseSensitive ? search : search.toLowerCase();
929
- for (const line of lines) {
930
- if (/^\s*(```|~~~)/.test(line)) {
931
- inFence = !inFence;
932
- out.push(line);
933
- continue;
934
- }
935
- if (inFence) {
936
- out.push(line);
937
- continue;
938
- }
939
- if (caseSensitive) {
940
- let mutated = line;
941
- let idx = mutated.indexOf(needle);
942
- while (idx !== -1) {
943
- mutated = mutated.slice(0, idx) + replace + mutated.slice(idx + search.length);
944
- count += 1;
945
- idx = mutated.indexOf(needle, idx + replace.length);
946
- }
947
- out.push(mutated);
948
- }
949
- else {
950
- // Case-insensitive: walk by lowering only when comparing, but preserve
951
- // the rest of the original line. Replace verbatim with `replace`.
952
- let mutated = line;
953
- let lowered = mutated.toLowerCase();
954
- let idx = lowered.indexOf(needle);
955
- while (idx !== -1) {
956
- mutated = mutated.slice(0, idx) + replace + mutated.slice(idx + search.length);
957
- lowered = mutated.toLowerCase();
958
- count += 1;
959
- idx = lowered.indexOf(needle, idx + replace.length);
960
- }
961
- out.push(mutated);
962
- }
963
- }
964
- return { content: out.join("\n"), count };
965
- }
966
- function composeNote(frontmatter, content) {
967
- if (!frontmatter || Object.keys(frontmatter).length === 0)
968
- return content;
969
- // Use gray-matter's stringify (backed by js-yaml) so YAML-special strings —
970
- // date-like ("2026-05-03"), !-prefixed, pipe-containing, etc. — are
971
- // round-trip-safe. The hand-rolled renderer this replaced silently corrupted
972
- // a long tail of valid string values (e.g. "due: 2026-05-03" came back as a
973
- // Date object on read).
974
- return matter.stringify(content, frontmatter);
975
- }
976
- function extractFrontmatterTagsLower(fm) {
977
- const raw = fm.tags ?? fm.tag;
978
- if (!raw)
979
- return [];
980
- const list = Array.isArray(raw)
981
- ? raw.filter((t) => typeof t === "string")
982
- : typeof raw === "string"
983
- ? raw.split(/[,\s]+/).filter(Boolean)
984
- : [];
985
- return list.map((t) => t.replace(/^#+/, "").toLowerCase());
986
- }
987
- /** Resolve "today"/"daily"/"weekly"/"monthly" to today's periodic-note name
988
- * using the standard Obsidian Daily-Notes-plugin formats. Custom formats are
989
- * out of scope (users with non-default conventions address by exact name). */
990
- function resolvePeriodicAlias(title) {
991
- const lower = title.trim().toLowerCase();
992
- if (lower !== "daily" && lower !== "today" && lower !== "weekly" && lower !== "monthly") {
993
- return null;
994
- }
995
- const now = new Date();
996
- const yyyy = now.getFullYear();
997
- const mm = String(now.getMonth() + 1).padStart(2, "0");
998
- const dd = String(now.getDate()).padStart(2, "0");
999
- if (lower === "daily" || lower === "today")
1000
- return `${yyyy}-${mm}-${dd}`;
1001
- if (lower === "monthly")
1002
- return `${yyyy}-${mm}`;
1003
- // ISO week number (Mon-based, ISO 8601). Weekly format: YYYY-Www.
1004
- const target = new Date(Date.UTC(now.getFullYear(), now.getMonth(), now.getDate()));
1005
- const dayNum = target.getUTCDay() || 7; // Mon=1..Sun=7
1006
- target.setUTCDate(target.getUTCDate() + 4 - dayNum); // Thursday of this week
1007
- const yearStart = new Date(Date.UTC(target.getUTCFullYear(), 0, 1));
1008
- const weekNo = Math.ceil(((target.valueOf() - yearStart.valueOf()) / 86400000 + 1) / 7);
1009
- return `${target.getUTCFullYear()}-W${String(weekNo).padStart(2, "0")}`;
1010
- }
1011
- /** Up to 3 vault-relative paths whose basename or relPath looks similar to
1012
- * the missing target. Used to enrich `Note not found` errors with did-you-mean
1013
- * hints — meaningful for LLMs that mistype a note name. */
1014
- async function suggestSimilar(vault, target) {
1015
- try {
1016
- const all = await vault.listMarkdown();
1017
- const lower = target.toLowerCase().replace(/\.md$/i, "");
1018
- const ranked = all
1019
- .map((e) => {
1020
- const baseLower = stripMd(e.basename).toLowerCase();
1021
- const relLower = e.relPath.toLowerCase();
1022
- let score = 0;
1023
- if (baseLower === lower)
1024
- score = 100;
1025
- else if (baseLower.startsWith(lower) || lower.startsWith(baseLower))
1026
- score = 70;
1027
- else if (baseLower.includes(lower) || lower.includes(baseLower))
1028
- score = 50;
1029
- else if (relLower.includes(lower))
1030
- score = 30;
1031
- return { path: e.relPath, score };
1032
- })
1033
- .filter((r) => r.score > 0)
1034
- .sort((a, b) => b.score - a.score)
1035
- .slice(0, 3);
1036
- return ranked.map((r) => r.path);
1037
- }
1038
- catch {
1039
- return [];
1040
- }
1041
- }
1042
- async function resolveTarget(vault, args) {
1043
- if (args.path) {
1044
- const candidates = args.path.toLowerCase().endsWith(".md") ? [args.path] : [args.path, `${args.path}.md`];
1045
- let lastErr;
1046
- for (const candidate of candidates) {
1047
- const abs = vault.resolveInside(candidate);
1048
- try {
1049
- const stat = await vault.stat(abs);
1050
- return {
1051
- absPath: abs,
1052
- relPath: vault.toRel(abs),
1053
- basename: path.basename(abs),
1054
- mtimeMs: stat.mtimeMs
1055
- };
1056
- }
1057
- catch (err) {
1058
- lastErr = err;
1059
- }
1060
- }
1061
- const suggestions = await suggestSimilar(vault, args.path);
1062
- const hint = suggestions.length ? `. Did you mean: ${suggestions.join(", ")}?` : "";
1063
- throw lastErr instanceof Error
1064
- ? new Error(`${lastErr.message}${hint}`)
1065
- : new Error(`Note not found: ${args.path}${hint}`);
1066
- }
1067
- if (args.title) {
1068
- // Try literal title first — a user may have an actual file named
1069
- // "Daily.md" / "Today.md" they meant to address. Only fall back to the
1070
- // periodic-note alias when the literal lookup misses.
1071
- const literal = await vault.findByTitle(args.title);
1072
- if (literal)
1073
- return literal;
1074
- // v1.10: try the user's Daily / Periodic Notes plugin config first. The
1075
- // user may have configured `Daily Notes/YYYY-MM-DD` or a custom format —
1076
- // honor that before the v0.11 hard-coded defaults.
1077
- const periodicConfig = await vault.getPeriodicConfig();
1078
- const periodicResolved = resolvePeriodicNoteName(args.title, periodicConfig);
1079
- if (periodicResolved) {
1080
- // The user's config produced a vault-relative path stem. Look it up by
1081
- // path (with .md appended); if THAT misses, fall back to basename match
1082
- // for users whose plugin folder is empty (vault-root files).
1083
- try {
1084
- const tryPath = `${periodicResolved.relPath}.md`;
1085
- const abs = vault.resolveInside(tryPath);
1086
- const stat = await vault.stat(abs);
1087
- return {
1088
- absPath: abs,
1089
- relPath: vault.toRel(abs),
1090
- basename: path.basename(abs),
1091
- mtimeMs: stat.mtimeMs
1092
- };
1093
- }
1094
- catch (err) {
1095
- // v1.11.1: surface exclusion errors instead of masking them as
1096
- // "not found". The path-based lookup above already does this via
1097
- // lastErr — keep both codepaths consistent. Exclusion errors come
1098
- // from a user's own --read-paths / --exclude-glob config, so they
1099
- // deserve a clear "excluded" message rather than silent fallthrough
1100
- // to the legacy alias resolver (which won't help anyway).
1101
- if (err instanceof Error && /excluded by --(read-paths|exclude-glob)/.test(err.message)) {
1102
- throw err;
1103
- }
1104
- // Fall through to basename match on ENOENT-class errors only.
1105
- }
1106
- // v2.0.0-beta.2 P1 fix: only fall through to basename match if the
1107
- // user's periodic config produces a folder-less stem (i.e., they keep
1108
- // periodic notes at the vault root). If they configured a specific
1109
- // folder, returning a same-basename note from a DIFFERENT folder is a
1110
- // privacy/correctness hazard — silently redirects "today" to a note
1111
- // the user never configured. The architecture audit (P1-4) traced an
1112
- // exploit: with `--exclude-glob 'Daily Notes/**'` set AND a Public/
1113
- // file named `2026-05-08.md`, basename match would surface that
1114
- // unrelated note as "today".
1115
- const periodicHasFolder = periodicResolved.relPath.includes("/");
1116
- if (!periodicHasFolder) {
1117
- const basenameMatch = await vault.findByTitle(path.basename(periodicResolved.relPath));
1118
- if (basenameMatch)
1119
- return basenameMatch;
1120
- }
1121
- }
1122
- // Last-resort: legacy v0.11 hard-coded alias resolver, in case the user
1123
- // has neither plugin configured but expects the default formats to work.
1124
- const aliased = resolvePeriodicAlias(args.title);
1125
- if (aliased) {
1126
- const aliasMatch = await vault.findByTitle(aliased);
1127
- if (aliasMatch)
1128
- return aliasMatch;
1129
- }
1130
- const suggestions = await suggestSimilar(vault, args.title);
1131
- const hint = suggestions.length ? `. Did you mean: ${suggestions.join(", ")}?` : "";
1132
- const aliasHint = periodicResolved ? ` (also tried periodic alias "${periodicResolved.relPath}")` : "";
1133
- throw new Error(`No note found with title: ${args.title}${aliasHint}${hint}`);
1134
- }
1135
- throw new Error("Either path or title is required");
1136
- }
1137
- export async function validateNoteProposal(vault, args) {
1138
- await vault.ensureExists();
1139
- const mode = args.mode ?? "create";
1140
- const errors = [];
1141
- const warnings = [];
1142
- // 1. Path sanity. resolveInside throws on traversal — capture as error,
1143
- // don't let it propagate as a generic exception (the validator should
1144
- // return a structured result for ANY input).
1145
- let normalizedPath = args.path.toLowerCase().endsWith(".md") ? args.path : `${args.path}.md`;
1146
- let absPath = null;
1147
- try {
1148
- absPath = vault.resolveInside(normalizedPath);
1149
- normalizedPath = vault.toRel(absPath);
1150
- }
1151
- catch (err) {
1152
- errors.push({
1153
- kind: "path-traversal",
1154
- message: err instanceof Error ? err.message : String(err)
1155
- });
1156
- }
1157
- // 2. YAML parse via gray-matter (the same parser used at write time).
1158
- const yamlReport = { parsed: false, error: null, keys: [] };
1159
- let bodyAfterFm = args.content;
1160
- try {
1161
- const parsed = matter(args.content);
1162
- yamlReport.parsed = true;
1163
- yamlReport.keys = Object.keys(parsed.data ?? {});
1164
- bodyAfterFm = parsed.content;
1165
- }
1166
- catch (err) {
1167
- yamlReport.error = err instanceof Error ? err.message : String(err);
1168
- errors.push({ kind: "yaml-invalid", message: `YAML frontmatter could not be parsed: ${yamlReport.error}` });
1169
- }
1170
- // 3. Wikilink resolution against the live vault.
1171
- const all = await vault.listMarkdown();
1172
- const wikilinkRe = /(?<!!)\[\[([^\]\n]+?)\]\]/g;
1173
- const wikilinks = [];
1174
- for (const m of bodyAfterFm.matchAll(wikilinkRe)) {
1175
- const raw = m[0];
1176
- const inner = (m[1] ?? "").trim();
1177
- if (!inner)
1178
- continue;
1179
- // Strip alias / section / block to get the bare target name.
1180
- const beforePipe = inner.split("|")[0] ?? "";
1181
- const beforeHash = beforePipe.split("#")[0] ?? "";
1182
- const target = beforeHash.split("^")[0]?.trim() ?? "";
1183
- if (!target)
1184
- continue;
1185
- const match = findBestMatch(all, target, normalizedPath);
1186
- if (match) {
1187
- wikilinks.push({
1188
- raw,
1189
- target,
1190
- status: "resolved",
1191
- resolved_path: match.relPath,
1192
- suggestions: []
1193
- });
1194
- }
1195
- else {
1196
- const suggestions = await suggestSimilar(vault, target);
1197
- wikilinks.push({
1198
- raw,
1199
- target,
1200
- status: "broken",
1201
- resolved_path: null,
1202
- suggestions
1203
- });
1204
- warnings.push({
1205
- kind: "broken-wikilink",
1206
- message: `[[${target}]] does not resolve to any existing note`,
1207
- suggestion: suggestions.length ? `Closest matches: ${suggestions.join(", ")}` : undefined
1208
- });
1209
- }
1210
- }
1211
- // 4. Tag pre-classification (existing vs new).
1212
- const existingTags = new Set((await listTags(vault, {})).map((t) => t.tag.toLowerCase()));
1213
- const proposedTagsRaw = new Set();
1214
- // Frontmatter tags.
1215
- const fmData = yamlReport.parsed ? matter(args.content).data : {};
1216
- const fmTags = fmData.tags ?? fmData.tag;
1217
- if (Array.isArray(fmTags)) {
1218
- for (const t of fmTags)
1219
- if (typeof t === "string" && t)
1220
- proposedTagsRaw.add(t.replace(/^#/, ""));
1221
- }
1222
- else if (typeof fmTags === "string" && fmTags) {
1223
- for (const t of fmTags.split(/[\s,]+/))
1224
- if (t)
1225
- proposedTagsRaw.add(t.replace(/^#/, ""));
1226
- }
1227
- // Inline tags.
1228
- const inlineTagRe = /(?:^|[\s([{>])#([\p{L}][\p{L}\p{N}_/-]*)/gu;
1229
- for (const m of bodyAfterFm.matchAll(inlineTagRe)) {
1230
- if (m[1])
1231
- proposedTagsRaw.add(m[1]);
1232
- }
1233
- const tags = [];
1234
- for (const t of proposedTagsRaw) {
1235
- const status = existingTags.has(t.toLowerCase()) ? "existing" : "new";
1236
- tags.push({ name: t, status });
1237
- if (status === "new") {
1238
- warnings.push({
1239
- kind: "new-tag",
1240
- message: `#${t} is new — won't fork an existing tag (case-insensitive check)`
1241
- });
1242
- }
1243
- }
1244
- // 5. Path collision check.
1245
- let collision = { kind: "none" };
1246
- if (absPath) {
1247
- try {
1248
- await vault.stat(absPath);
1249
- // Path exists.
1250
- if (mode === "create") {
1251
- errors.push({
1252
- kind: "path-collision",
1253
- message: `Note already exists at ${normalizedPath} (mode="create" refuses overwrite)`
1254
- });
1255
- }
1256
- collision = { kind: "path-exists", existing_path: normalizedPath };
1257
- }
1258
- catch {
1259
- // Path doesn't exist — try title collision (an existing note at a different path).
1260
- const titleFromBasename = stripMd(path.basename(normalizedPath));
1261
- const existing = await vault.findByTitle(titleFromBasename);
1262
- if (existing && existing.relPath !== normalizedPath) {
1263
- warnings.push({
1264
- kind: "title-collision",
1265
- message: `A note titled "${titleFromBasename}" already exists at ${existing.relPath} — proceeding will create a same-titled file at a different path`,
1266
- suggestion: existing.relPath
1267
- });
1268
- collision = { kind: "title-exists-elsewhere", existing_path: existing.relPath };
1269
- }
1270
- }
1271
- }
1272
- return {
1273
- ok: errors.length === 0,
1274
- proposed_path: normalizedPath,
1275
- mode,
1276
- errors,
1277
- warnings,
1278
- yaml: yamlReport,
1279
- wikilinks,
1280
- tags,
1281
- collision
1282
- };
1283
- }
1284
- export async function findSimilar(vault, args) {
1285
- await vault.ensureExists();
1286
- const limit = args.limit ?? 10;
1287
- const minScore = args.min_score ?? 0.05;
1288
- const target = await resolveTarget(vault, args);
1289
- const entries = await vault.listMarkdown();
1290
- const metas = new Map();
1291
- for (const e of entries) {
1292
- const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
1293
- const tags = new Set(parsed.tags.map((t) => t.toLowerCase()));
1294
- const title3grams = ngrams(stripMd(e.basename).toLowerCase(), 3);
1295
- const outbound = new Set();
1296
- for (const link of parsed.wikilinks) {
1297
- const m = findBestMatch(entries, link.target, e.relPath);
1298
- if (m)
1299
- outbound.add(m.relPath);
1300
- }
1301
- metas.set(e.relPath, { entry: e, tags, title3grams, outbound });
1302
- }
1303
- const targetMeta = metas.get(target.relPath);
1304
- if (!targetMeta) {
1305
- // The target was found by resolveTarget but may have been excluded from
1306
- // listMarkdown by --exclude-glob. Treat as zero results rather than crash.
1307
- return [];
1308
- }
1309
- // For co-backlink: build "who links to X?" for everyone we care about
1310
- // (target + all candidates). Single pass over outbound sets.
1311
- const inboundFor = new Map();
1312
- for (const [from, m] of metas) {
1313
- for (const to of m.outbound) {
1314
- const set = inboundFor.get(to) ?? new Set();
1315
- set.add(from);
1316
- inboundFor.set(to, set);
1317
- }
1318
- }
1319
- const targetInbound = inboundFor.get(target.relPath) ?? new Set();
1320
- const out = [];
1321
- for (const [relPath, m] of metas) {
1322
- if (relPath === target.relPath)
1323
- continue;
1324
- const tagJ = jaccard(targetMeta.tags, m.tags);
1325
- const titleJ = jaccard(targetMeta.title3grams, m.title3grams);
1326
- const candInbound = inboundFor.get(relPath) ?? new Set();
1327
- // shared_outbound: how much of A's outbound is also in B's
1328
- const sharedOut = targetMeta.outbound.size === 0 ? 0 : intersectionSize(targetMeta.outbound, m.outbound) / targetMeta.outbound.size;
1329
- // co_backlink: how many notes link to both target and candidate, over union
1330
- const coBack = jaccard(targetInbound, candInbound);
1331
- const score = 3.0 * tagJ + 1.5 * titleJ + 2.0 * sharedOut + 2.0 * coBack;
1332
- if (score < minScore)
1333
- continue;
1334
- const shared = [];
1335
- for (const t of targetMeta.tags)
1336
- if (m.tags.has(t))
1337
- shared.push(t);
1338
- shared.sort();
1339
- out.push({
1340
- path: m.entry.relPath,
1341
- title: stripMd(m.entry.basename),
1342
- score: Math.round(score * 10000) / 10000,
1343
- signals: {
1344
- tag_jaccard: Math.round(tagJ * 10000) / 10000,
1345
- title_3gram: Math.round(titleJ * 10000) / 10000,
1346
- shared_outbound: Math.round(sharedOut * 10000) / 10000,
1347
- co_backlink: Math.round(coBack * 10000) / 10000
1348
- },
1349
- shared_tags: shared,
1350
- mtime: new Date(m.entry.mtimeMs).toISOString()
1351
- });
1352
- }
1353
- out.sort((a, b) => b.score - a.score);
1354
- return out.slice(0, limit);
1355
- }
1356
- export async function getNoteNeighbors(vault, args) {
1357
- await vault.ensureExists();
1358
- const cap = args.max_per_bucket ?? 20;
1359
- const target = await resolveTarget(vault, args);
1360
- const entries = await vault.listMarkdown();
1361
- const { parsed: targetParsed } = await vault.readNote(target.absPath, target.mtimeMs);
1362
- const targetTagsLower = new Set(targetParsed.tags.map((t) => t.toLowerCase()));
1363
- // Outbound: resolved unique destinations from the target.
1364
- const seenOut = new Set();
1365
- const outbound = [];
1366
- for (const link of targetParsed.wikilinks) {
1367
- const m = findBestMatch(entries, link.target, target.relPath);
1368
- if (!m || seenOut.has(m.relPath))
1369
- continue;
1370
- seenOut.add(m.relPath);
1371
- const { parsed: nbrParsed } = await vault.readNote(m.absPath, m.mtimeMs);
1372
- outbound.push({ path: m.relPath, title: stripMd(m.basename), tags: nbrParsed.tags });
1373
- if (outbound.length >= cap)
1374
- break;
1375
- }
1376
- // Inbound: notes that link to target, with backlink count.
1377
- const inboundCounts = new Map();
1378
- for (const e of entries) {
1379
- if (e.absPath === target.absPath)
1380
- continue;
1381
- const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
1382
- let cnt = 0;
1383
- for (const link of parsed.wikilinks) {
1384
- const m = findBestMatch(entries, link.target, e.relPath);
1385
- if (m && m.absPath === target.absPath)
1386
- cnt += 1;
1387
- }
1388
- if (cnt > 0)
1389
- inboundCounts.set(e.relPath, { entry: e, count: cnt, tags: parsed.tags });
1390
- }
1391
- const inbound = [...inboundCounts.values()]
1392
- .sort((a, b) => b.count - a.count)
1393
- .slice(0, cap)
1394
- .map((x) => ({ path: x.entry.relPath, title: stripMd(x.entry.basename), tags: x.tags, count: x.count }));
1395
- // Tag siblings: notes sharing ≥1 tag with target, excluding outbound/inbound.
1396
- const tag_siblings = [];
1397
- if (targetTagsLower.size > 0) {
1398
- const exclude = new Set([target.relPath, ...seenOut, ...inboundCounts.keys()]);
1399
- const candidates = [];
1400
- for (const e of entries) {
1401
- if (exclude.has(e.relPath))
1402
- continue;
1403
- const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
1404
- const shared = [];
1405
- for (const t of parsed.tags) {
1406
- if (targetTagsLower.has(t.toLowerCase()))
1407
- shared.push(t);
1408
- }
1409
- if (shared.length > 0) {
1410
- candidates.push({ path: e.relPath, title: stripMd(e.basename), shared });
1411
- }
1412
- }
1413
- candidates.sort((a, b) => b.shared.length - a.shared.length);
1414
- for (const c of candidates.slice(0, cap)) {
1415
- tag_siblings.push({ path: c.path, title: c.title, shared_tags: c.shared });
1416
- }
1417
- }
1418
- return {
1419
- center: {
1420
- path: target.relPath,
1421
- title: stripMd(target.basename),
1422
- tags: targetParsed.tags,
1423
- mtime: new Date(target.mtimeMs).toISOString()
1424
- },
1425
- outbound,
1426
- inbound,
1427
- tag_siblings
1428
- };
1429
- }
1430
- export async function getVaultStats(vault, args) {
1431
- await vault.ensureExists();
1432
- const topTagsLimit = args.top_tags ?? 10;
1433
- const entries = await vault.listMarkdown();
1434
- const sevenDaysMs = Date.now() - 7 * 24 * 3600 * 1000;
1435
- let totalSize = 0;
1436
- let totalWords = 0;
1437
- let recent = 0;
1438
- let withFm = 0;
1439
- const tagCounts = new Map();
1440
- // Build inbound map in one pass so orphans and broken counts are O(N).
1441
- const inbound = new Map();
1442
- let broken = 0;
1443
- // outboundPresence is collected in the same single pass (cache hits keep
1444
- // this O(N) instead of the previous O(2N) re-read).
1445
- const outboundPresence = new Set();
1446
- for (const e of entries) {
1447
- const { content, parsed } = await vault.readNote(e.absPath, e.mtimeMs);
1448
- totalSize += Buffer.byteLength(content, "utf8");
1449
- totalWords += content.trim() ? content.trim().split(/\s+/).length : 0;
1450
- if (e.mtimeMs >= sevenDaysMs)
1451
- recent += 1;
1452
- if (Object.keys(parsed.frontmatter).length > 0)
1453
- withFm += 1;
1454
- if (parsed.wikilinks.length > 0)
1455
- outboundPresence.add(e.relPath);
1456
- for (const t of parsed.tags) {
1457
- const key = t.toLowerCase();
1458
- tagCounts.set(key, (tagCounts.get(key) ?? 0) + 1);
1459
- }
1460
- for (const link of parsed.wikilinks) {
1461
- const m = findBestMatch(entries, link.target, e.relPath);
1462
- if (!m) {
1463
- broken += 1;
1464
- continue;
1465
- }
1466
- inbound.set(m.relPath, (inbound.get(m.relPath) ?? 0) + 1);
1467
- }
1468
- }
1469
- let orphans = 0;
1470
- for (const e of entries) {
1471
- if (!inbound.get(e.relPath) && !outboundPresence.has(e.relPath))
1472
- orphans += 1;
1473
- }
1474
- const top_tags = [...tagCounts.entries()]
1475
- .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
1476
- .slice(0, topTagsLimit)
1477
- .map(([tag, count]) => ({ tag, count }));
1478
- return {
1479
- total_notes: entries.length,
1480
- total_size_bytes: totalSize,
1481
- avg_note_words: entries.length === 0 ? 0 : Math.round(totalWords / entries.length),
1482
- recently_modified_7d: recent,
1483
- orphans,
1484
- broken_wikilinks: broken,
1485
- total_tags: tagCounts.size,
1486
- top_tags,
1487
- notes_with_frontmatter: withFm,
1488
- generated_at: new Date().toISOString()
1489
- };
1490
- }
1491
- export async function lintWiki(vault, args) {
1492
- await vault.ensureExists();
1493
- const stubThreshold = args.stub_word_threshold ?? 100;
1494
- const staleDays = args.stale_days ?? 365;
1495
- const conceptMinMentions = args.concept_min_mentions ?? 3;
1496
- const cap = args.max_per_bucket ?? 50;
1497
- const entries = await vault.listMarkdown(args.folder);
1498
- const allEntries = await vault.listMarkdown();
1499
- const staleMs = Date.now() - staleDays * 24 * 3600 * 1000;
1500
- // Single pass: collect inbound counts, outbound presence, broken links,
1501
- // word counts, last-reviewed times, capitalised-phrase mentions.
1502
- const inbound = new Map();
1503
- const outboundPresence = new Set();
1504
- const broken = [];
1505
- const stubs = [];
1506
- const stale = [];
1507
- const titleSet = new Set();
1508
- for (const e of allEntries)
1509
- titleSet.add(stripMd(e.basename).toLowerCase());
1510
- // Capitalised-phrase mentions across the whole vault. A phrase is 1-3
1511
- // CapitalCase tokens (e.g. "Reinforcement Learning", "Attention Heads").
1512
- // Stop-words: dropped when they appear at the start of a phrase.
1513
- const conceptStopwords = new Set([
1514
- "The",
1515
- "A",
1516
- "An",
1517
- "This",
1518
- "That",
1519
- "These",
1520
- "Those",
1521
- "If",
1522
- "When",
1523
- "While",
1524
- "But",
1525
- "And",
1526
- "Or"
1527
- ]);
1528
- const capPhraseRe = /\b((?:[A-Z][a-z][a-z]+(?:\s+[A-Z][a-z][a-z]+){0,2}))\b/g;
1529
- const conceptMentions = new Map(); // phrase → set of source paths
1530
- for (const e of entries) {
1531
- const { parsed, mtimeMs } = await vault.readNote(e.absPath, e.mtimeMs);
1532
- // Outbound + broken pass.
1533
- if (parsed.wikilinks.length > 0)
1534
- outboundPresence.add(e.relPath);
1535
- for (const link of parsed.wikilinks) {
1536
- const m = findBestMatch(allEntries, link.target, e.relPath);
1537
- if (m) {
1538
- inbound.set(m.relPath, (inbound.get(m.relPath) ?? 0) + 1);
1539
- }
1540
- else if (broken.length < cap) {
1541
- broken.push({
1542
- kind: "broken-link",
1543
- path: e.relPath,
1544
- message: `[[${link.target}]] in ${e.relPath} doesn't resolve`,
1545
- suggestion: "create the missing note, fix the link, or remove it",
1546
- details: { target: link.target, raw: link.raw }
1547
- });
1548
- }
1549
- }
1550
- // Stub pass.
1551
- const wordCount = parsed.body.trim() ? parsed.body.trim().split(/\s+/).length : 0;
1552
- if (wordCount < stubThreshold && stubs.length < cap) {
1553
- stubs.push({
1554
- kind: "stub",
1555
- path: e.relPath,
1556
- message: `${e.relPath} is ${wordCount} words (threshold ${stubThreshold})`,
1557
- suggestion: "develop, merge into a hub, or archive",
1558
- details: { word_count: wordCount, mtime: new Date(mtimeMs).toISOString() }
1559
- });
1560
- }
1561
- // Stale pass — frontmatter `last_reviewed` overrides mtime if present.
1562
- // gray-matter (js-yaml) parses ISO dates into Date objects automatically,
1563
- // so we accept Date | string | number.
1564
- const lastReviewedRaw = parsed.frontmatter?.last_reviewed ?? parsed.frontmatter?.["last-reviewed"];
1565
- let lastTouchedMs = mtimeMs;
1566
- if (lastReviewedRaw instanceof Date) {
1567
- const t = lastReviewedRaw.getTime();
1568
- if (Number.isFinite(t))
1569
- lastTouchedMs = t;
1570
- }
1571
- else if (typeof lastReviewedRaw === "string") {
1572
- const t = Date.parse(lastReviewedRaw);
1573
- if (Number.isFinite(t))
1574
- lastTouchedMs = t;
1575
- }
1576
- else if (typeof lastReviewedRaw === "number" && Number.isFinite(lastReviewedRaw)) {
1577
- lastTouchedMs = lastReviewedRaw;
1578
- }
1579
- if (lastTouchedMs < staleMs && stale.length < cap) {
1580
- stale.push({
1581
- kind: "stale",
1582
- path: e.relPath,
1583
- message: `${e.relPath} not touched since ${new Date(lastTouchedMs).toISOString().slice(0, 10)}`,
1584
- suggestion: "review for accuracy or archive",
1585
- details: {
1586
- last_touched: new Date(lastTouchedMs).toISOString(),
1587
- source: lastReviewedRaw !== undefined ? "frontmatter.last_reviewed" : "mtime"
1588
- }
1589
- });
1590
- }
1591
- // Concept-mention pass — capitalised phrases in the body that aren't
1592
- // already a wikilink target. Cap at 30 unique phrases per source to
1593
- // bound memory, but loose enough that real concepts in long notes don't
1594
- // get truncated.
1595
- const seenInThisNote = new Set();
1596
- for (const m of parsed.body.matchAll(capPhraseRe)) {
1597
- const phrase = m[1];
1598
- if (!phrase)
1599
- continue;
1600
- const firstWord = phrase.split(/\s+/)[0];
1601
- if (firstWord !== undefined && conceptStopwords.has(firstWord))
1602
- continue;
1603
- if (seenInThisNote.has(phrase))
1604
- continue;
1605
- if (seenInThisNote.size >= 30)
1606
- break;
1607
- // Skip phrases that are already a vault note (basename match).
1608
- if (titleSet.has(phrase.toLowerCase()))
1609
- continue;
1610
- seenInThisNote.add(phrase);
1611
- const set = conceptMentions.get(phrase) ?? new Set();
1612
- set.add(e.relPath);
1613
- conceptMentions.set(phrase, set);
1614
- }
1615
- }
1616
- // Orphan findings (no inbound AND no outbound).
1617
- const orphans = [];
1618
- for (const e of entries) {
1619
- if (orphans.length >= cap)
1620
- break;
1621
- if (!inbound.get(e.relPath) && !outboundPresence.has(e.relPath)) {
1622
- orphans.push({
1623
- kind: "orphan",
1624
- path: e.relPath,
1625
- message: `${e.relPath} has no inbound or outbound wikilinks`,
1626
- suggestion: "link from a hub note, archive, or delete",
1627
- details: { mtime: new Date(e.mtimeMs).toISOString() }
1628
- });
1629
- }
1630
- }
1631
- // Concept candidates — phrases mentioned by ≥ N distinct notes.
1632
- const conceptCandidates = [];
1633
- const ranked = [...conceptMentions.entries()]
1634
- .filter(([, sources]) => sources.size >= conceptMinMentions)
1635
- .sort((a, b) => b[1].size - a[1].size);
1636
- for (const [phrase, sources] of ranked) {
1637
- if (conceptCandidates.length >= cap)
1638
- break;
1639
- conceptCandidates.push({
1640
- kind: "concept-without-page",
1641
- message: `"${phrase}" is mentioned by ${sources.size} notes but has no page of its own`,
1642
- suggestion: `create a page \`${phrase}.md\` and refile the most-developed mentions into it`,
1643
- details: { phrase, mention_count: sources.size, sources: [...sources].slice(0, 5) }
1644
- });
1645
- }
1646
- return {
1647
- scope: args.folder ?? "(whole vault)",
1648
- scanned: entries.length,
1649
- generated_at: new Date().toISOString(),
1650
- summary: {
1651
- orphans: orphans.length,
1652
- broken_links: broken.length,
1653
- stubs: stubs.length,
1654
- stale: stale.length,
1655
- concept_candidates: conceptCandidates.length
1656
- },
1657
- findings: {
1658
- orphans,
1659
- broken_links: broken,
1660
- stubs,
1661
- stale,
1662
- concept_candidates: conceptCandidates
1663
- }
1664
- };
1665
- }
1666
- export async function getOpenQuestions(vault, args) {
1667
- await vault.ensureExists();
1668
- const limit = args.limit ?? 100;
1669
- // Default pattern: "Open question:" / "Open question -" / "Q:" / "TODO?" / "??"
1670
- // followed by space + question text. Anchored at line start (with optional
1671
- // list-bullet / quote / heading prefix).
1672
- // Default pattern matches deferred-thinking markers at line start (with
1673
- // optional list-bullet / quote / heading prefix). Single-line `i` flag —
1674
- // we apply it line-by-line below.
1675
- const defaultPat = "^\\s*(?:[#\\->\\*\\d\\.]+\\s+)?(?:open\\s+question|q|todo\\?|\\?\\?)\\s*[:\\-]?\\s*(.+)$";
1676
- const re = new RegExp(args.pattern ?? defaultPat, "i");
1677
- const entries = await vault.listMarkdown(args.folder);
1678
- const out = [];
1679
- const now = Date.now();
1680
- for (const e of entries) {
1681
- if (out.length >= limit)
1682
- break;
1683
- const { parsed, mtimeMs } = await vault.readNote(e.absPath, e.mtimeMs);
1684
- // Scan parsed.body so frontmatter lines (which can contain "Q:" -ish
1685
- // tokens) don't pollute results.
1686
- const lines = parsed.body.split("\n");
1687
- let currentHeading = null;
1688
- for (let i = 0; i < lines.length; i++) {
1689
- const line = lines[i] ?? "";
1690
- const headingMatch = /^(#{1,6})\s+(.+?)\s*#*\s*$/.exec(line);
1691
- if (headingMatch?.[2]) {
1692
- currentHeading = headingMatch[2];
1693
- // A heading line itself isn't a question hit — skip the regex match.
1694
- continue;
1695
- }
1696
- const m = re.exec(line);
1697
- if (!m?.[1])
1698
- continue;
1699
- out.push({
1700
- question: m[1].trim(),
1701
- source_path: e.relPath,
1702
- source_title: stripMd(e.basename),
1703
- context_heading: currentHeading,
1704
- line: i + 1,
1705
- age_days: Math.round((now - mtimeMs) / (24 * 3600 * 1000)),
1706
- mtime: new Date(mtimeMs).toISOString()
1707
- });
1708
- if (out.length >= limit)
1709
- break;
1710
- }
1711
- }
1712
- // Sort oldest-first so things aging out surface first.
1713
- out.sort((a, b) => b.age_days - a.age_days);
1714
- return out;
1715
- }
1716
- export async function paperAudit(vault, args) {
1717
- await vault.ensureExists();
1718
- const tag = (args.tag ?? "paper").replace(/^#+/, "").toLowerCase();
1719
- const limit = args.limit ?? 100;
1720
- const entries = await vault.listMarkdown(args.folder);
1721
- const arxivRe = /\barxiv[:\s]*([0-9]{4}\.[0-9]{4,5}(?:v\d+)?)\b/gi;
1722
- const doiRe = /\bdoi[:\s]*(10\.\d{4,9}\/[\w\-._;()/:]+)/gi;
1723
- const urlRe = /\bhttps?:\/\/[^\s<>")\]]+/g;
1724
- let scanned = 0;
1725
- const flagged = [];
1726
- for (const e of entries) {
1727
- if (flagged.length >= limit)
1728
- break;
1729
- const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
1730
- const tagsLower = parsed.tags.map((t) => t.toLowerCase());
1731
- if (!tagsLower.includes(tag))
1732
- continue;
1733
- scanned += 1;
1734
- const fm = parsed.frontmatter ?? {};
1735
- const fmKeys = new Set(Object.keys(fm).map((k) => k.toLowerCase()));
1736
- const hasFmCitation = fmKeys.has("arxiv") || fmKeys.has("doi") || fmKeys.has("url") || fmKeys.has("isbn");
1737
- // Scan parsed.body so the frontmatter's own arxiv/doi keys don't get
1738
- // re-detected as "found in body".
1739
- const body = parsed.body;
1740
- const arxivIds = [...body.matchAll(arxivRe)].map((m) => m[1]).filter((v) => !!v);
1741
- const doiIds = [...body.matchAll(doiRe)].map((m) => m[1]).filter((v) => !!v);
1742
- const urls = [...body.matchAll(urlRe)].map((m) => m[0]);
1743
- const foundInBody = {
1744
- arxiv: [...new Set(arxivIds)],
1745
- doi: [...new Set(doiIds)],
1746
- url: [...new Set(urls)].slice(0, 3)
1747
- };
1748
- const bodyHasAnyId = foundInBody.arxiv.length > 0 || foundInBody.doi.length > 0 || foundInBody.url.length > 0;
1749
- // Clean ⇒ has a frontmatter citation. The body might cite OTHER papers,
1750
- // but this note itself is properly identified.
1751
- if (hasFmCitation)
1752
- continue;
1753
- let proposed = null;
1754
- if (bodyHasAnyId) {
1755
- proposed = {};
1756
- if (foundInBody.arxiv[0])
1757
- proposed.arxiv = foundInBody.arxiv[0];
1758
- if (foundInBody.doi[0])
1759
- proposed.doi = foundInBody.doi[0];
1760
- if (foundInBody.url[0] && !proposed.arxiv && !proposed.doi)
1761
- proposed.url = foundInBody.url[0];
1762
- }
1763
- const msg = bodyHasAnyId
1764
- ? `${e.relPath} has identifiers in body (${[
1765
- ...foundInBody.arxiv.map((v) => `arxiv:${v}`),
1766
- ...foundInBody.doi.map((v) => `doi:${v}`)
1767
- ]
1768
- .slice(0, 2)
1769
- .join(", ")}) but missing frontmatter`
1770
- : `${e.relPath} has #${tag} but no arxiv/doi/url anywhere — citation missing`;
1771
- flagged.push({
1772
- path: e.relPath,
1773
- title: stripMd(e.basename),
1774
- has_frontmatter_citation: hasFmCitation,
1775
- found_in_body: foundInBody,
1776
- proposed_frontmatter_patch: proposed,
1777
- message: msg
1778
- });
1779
- }
1780
- return { scanned, flagged };
1781
- }
1782
- export async function findPath(vault, args) {
1783
- await vault.ensureExists();
1784
- const maxDepth = args.max_depth ?? 5;
1785
- const includeAlts = args.include_alternatives === true;
1786
- const followEmbeds = args.follow_embeds !== false;
1787
- const fromArgs = {};
1788
- if (args.from !== undefined)
1789
- fromArgs.path = args.from;
1790
- else if (args.from_title !== undefined)
1791
- fromArgs.title = args.from_title;
1792
- const fromEntry = await resolveTarget(vault, fromArgs);
1793
- const toArgs = {};
1794
- if (args.to !== undefined)
1795
- toArgs.path = args.to;
1796
- else if (args.to_title !== undefined)
1797
- toArgs.title = args.to_title;
1798
- const toEntry = await resolveTarget(vault, toArgs);
1799
- if (fromEntry.absPath === toEntry.absPath) {
1800
- return {
1801
- from: fromEntry.relPath,
1802
- to: toEntry.relPath,
1803
- found: true,
1804
- hops: 0,
1805
- path: [{ path: fromEntry.relPath, title: stripMd(fromEntry.basename), via: "" }]
1806
- };
1807
- }
1808
- const entries = await vault.listMarkdown();
1809
- // BFS layer-by-layer. visited tracks shortest-known-depth so we don't
1810
- // revisit at greater depths. We continue collecting at the depth where
1811
- // we first hit the target IF include_alternatives is set.
1812
- // v1.8.1 perf fix: build a relPath → entry map ONCE before the BFS loop.
1813
- // Pre-fix: entries.find((e) => e.relPath === node.rel) was O(N) per visited
1814
- // node, making the whole BFS O(N²) on large vaults.
1815
- const byRel = new Map();
1816
- for (const e of entries)
1817
- byRel.set(e.relPath, e);
1818
- const visited = new Set([fromEntry.relPath]);
1819
- let frontier = [
1820
- { rel: fromEntry.relPath, trail: [{ path: fromEntry.relPath, title: stripMd(fromEntry.basename), via: "" }] }
1821
- ];
1822
- const found = [];
1823
- let foundDepth = -1;
1824
- for (let depth = 0; depth < maxDepth && frontier.length > 0; depth++) {
1825
- const next = [];
1826
- for (const node of frontier) {
1827
- const entry = byRel.get(node.rel);
1828
- if (!entry)
1829
- continue;
1830
- const { parsed } = await vault.readNote(entry.absPath, entry.mtimeMs);
1831
- const links = followEmbeds ? [...parsed.wikilinks, ...parsed.embeds] : parsed.wikilinks;
1832
- for (const link of links) {
1833
- const m = findBestMatch(entries, link.target, entry.relPath);
1834
- if (!m)
1835
- continue;
1836
- if (visited.has(m.relPath) && m.absPath !== toEntry.absPath)
1837
- continue;
1838
- const newTrail = [...node.trail, { path: m.relPath, title: stripMd(m.basename), via: link.raw }];
1839
- if (m.absPath === toEntry.absPath) {
1840
- if (foundDepth === -1)
1841
- foundDepth = depth + 1;
1842
- if (foundDepth === depth + 1) {
1843
- found.push(newTrail);
1844
- if (!includeAlts) {
1845
- return {
1846
- from: fromEntry.relPath,
1847
- to: toEntry.relPath,
1848
- found: true,
1849
- hops: foundDepth,
1850
- path: newTrail
1851
- };
1852
- }
1853
- }
1854
- }
1855
- else {
1856
- visited.add(m.relPath);
1857
- next.push({ rel: m.relPath, trail: newTrail });
1858
- }
1859
- }
1860
- }
1861
- if (foundDepth !== -1 && depth + 1 === foundDepth)
1862
- break;
1863
- frontier = next;
1864
- }
1865
- if (found.length > 0) {
1866
- found.sort((a, b) => a.length - b.length || (a[0]?.path ?? "").localeCompare(b[0]?.path ?? ""));
1867
- const first = found[0];
1868
- if (!first) {
1869
- return { from: fromEntry.relPath, to: toEntry.relPath, found: false, hops: -1, path: [] };
1870
- }
1871
- const result = {
1872
- from: fromEntry.relPath,
1873
- to: toEntry.relPath,
1874
- found: true,
1875
- hops: foundDepth,
1876
- path: first
1877
- };
1878
- if (includeAlts)
1879
- result.alternatives = found.slice(0, 10);
1880
- return result;
1881
- }
1882
- return { from: fromEntry.relPath, to: toEntry.relPath, found: false, hops: -1, path: [] };
1883
- }
1884
- export async function openInUi(vault, args) {
1885
- await vault.ensureExists();
1886
- const target = await resolveTarget(vault, args);
1887
- // Vault name = leaf of the vault root path. obsidian:// matches by name OR
1888
- // by the file's absolute path; if the user opened the vault from a
1889
- // different name in Obsidian, the file argument still resolves correctly.
1890
- const vaultName = path.basename(vault.root);
1891
- const noteRel = stripMd(target.relPath);
1892
- const params = new URLSearchParams({ vault: vaultName, file: noteRel });
1893
- if (args.new_pane)
1894
- params.set("newpane", "true");
1895
- return {
1896
- uri: `obsidian://open?${params.toString()}`,
1897
- vault_name: vaultName,
1898
- path: target.relPath,
1899
- title: stripMd(target.basename)
1900
- };
1901
- }
1902
- export async function listCanvases(vault, args) {
1903
- await vault.ensureExists();
1904
- const limit = args.limit ?? 100;
1905
- const all = await vault.listFilesByExtension(".canvas", args.folder);
1906
- const out = [];
1907
- for (const e of all) {
1908
- if (out.length >= limit)
1909
- break;
1910
- let nodeCount = 0;
1911
- let edgeCount = 0;
1912
- let size = e.mtimeMs; // placeholder; replaced below
1913
- try {
1914
- const buf = await vault.readBinaryFile(e.absPath);
1915
- size = buf.byteLength;
1916
- const txt = buf.toString("utf8");
1917
- const parsed = JSON.parse(txt);
1918
- nodeCount = Array.isArray(parsed.nodes) ? parsed.nodes.length : 0;
1919
- edgeCount = Array.isArray(parsed.edges) ? parsed.edges.length : 0;
1920
- }
1921
- catch {
1922
- // Malformed canvas — fall through with 0 counts. Don't poison the listing.
1923
- }
1924
- out.push({
1925
- path: e.relPath,
1926
- name: e.basename.replace(/\.canvas$/i, ""),
1927
- size_bytes: size,
1928
- mtime: new Date(e.mtimeMs).toISOString(),
1929
- node_count: nodeCount,
1930
- edge_count: edgeCount
1931
- });
1932
- }
1933
- out.sort((a, b) => b.mtime.localeCompare(a.mtime));
1934
- return out;
1935
- }
1936
- export async function readCanvas(vault, args) {
1937
- await vault.ensureExists();
1938
- if (!args.path)
1939
- throw new Error("path is required");
1940
- const normalized = args.path.toLowerCase().endsWith(".canvas") ? args.path : `${args.path}.canvas`;
1941
- const abs = vault.resolveInside(normalized);
1942
- await vault.stat(abs); // throws if missing or excluded — fail fast
1943
- const rel = vault.toRel(abs);
1944
- const buf = await vault.readBinaryFile(abs);
1945
- let parsed;
1946
- try {
1947
- parsed = JSON.parse(buf.toString("utf8"));
1948
- }
1949
- catch (err) {
1950
- throw new Error(`Canvas file is not valid JSON: ${rel} — ${err instanceof Error ? err.message : String(err)}`);
1951
- }
1952
- // Resolve each `file:` node's reference against the vault's current
1953
- // markdown index — surfaces broken canvas links the same way
1954
- // get_unresolved_wikilinks does for note bodies.
1955
- const allMarkdown = await vault.listMarkdown();
1956
- const nodes = [];
1957
- const summary = { text: 0, file: 0, link: 0, group: 0, unknown: 0 };
1958
- const brokenRefs = [];
1959
- if (Array.isArray(parsed.nodes)) {
1960
- for (const raw of parsed.nodes) {
1961
- if (!raw || typeof raw !== "object")
1962
- continue;
1963
- const n = raw;
1964
- const id = typeof n.id === "string" ? n.id : "";
1965
- const x = typeof n.x === "number" ? n.x : 0;
1966
- const y = typeof n.y === "number" ? n.y : 0;
1967
- const width = typeof n.width === "number" ? n.width : 0;
1968
- const height = typeof n.height === "number" ? n.height : 0;
1969
- const color = typeof n.color === "string" ? n.color : undefined;
1970
- const type = typeof n.type === "string" ? n.type : "unknown";
1971
- switch (type) {
1972
- case "text":
1973
- nodes.push({
1974
- kind: "text",
1975
- id,
1976
- x,
1977
- y,
1978
- width,
1979
- height,
1980
- text: typeof n.text === "string" ? n.text : "",
1981
- ...(color !== undefined ? { color } : {})
1982
- });
1983
- summary.text += 1;
1984
- break;
1985
- case "file": {
1986
- const fileRef = typeof n.file === "string" ? n.file : "";
1987
- // Strip leading slash so `findBestMatch` treats it as relative.
1988
- const cleaned = fileRef.replace(/^\/+/, "");
1989
- // findBestMatch only looks at the basename; for canvases we have a full
1990
- // vault-relative path, so try direct match first. Fall through to
1991
- // findBestMatch (basename) for the path-stripped case.
1992
- const direct = cleaned.length > 0 ? allMarkdown.find((m) => m.relPath.replace(/\\/g, "/") === cleaned) : undefined;
1993
- const resolved = direct ?? (cleaned ? findBestMatch(allMarkdown, cleaned) : null);
1994
- if (cleaned && !resolved)
1995
- brokenRefs.push(cleaned);
1996
- nodes.push({
1997
- kind: "file",
1998
- id,
1999
- x,
2000
- y,
2001
- width,
2002
- height,
2003
- file: fileRef,
2004
- file_resolved: resolved ? resolved.relPath : null,
2005
- ...(typeof n.subpath === "string" ? { subpath: n.subpath } : {}),
2006
- ...(color !== undefined ? { color } : {})
2007
- });
2008
- summary.file += 1;
2009
- break;
2010
- }
2011
- case "link":
2012
- nodes.push({
2013
- kind: "link",
2014
- id,
2015
- x,
2016
- y,
2017
- width,
2018
- height,
2019
- url: typeof n.url === "string" ? n.url : "",
2020
- ...(color !== undefined ? { color } : {})
2021
- });
2022
- summary.link += 1;
2023
- break;
2024
- case "group":
2025
- nodes.push({
2026
- kind: "group",
2027
- id,
2028
- x,
2029
- y,
2030
- width,
2031
- height,
2032
- ...(typeof n.label === "string" ? { label: n.label } : {}),
2033
- ...(color !== undefined ? { color } : {})
2034
- });
2035
- summary.group += 1;
2036
- break;
2037
- default:
2038
- nodes.push({ kind: "unknown", id, raw_type: type, raw: n });
2039
- summary.unknown += 1;
2040
- }
2041
- }
2042
- }
2043
- const edges = [];
2044
- if (Array.isArray(parsed.edges)) {
2045
- for (const raw of parsed.edges) {
2046
- if (!raw || typeof raw !== "object")
2047
- continue;
2048
- const e = raw;
2049
- const id = typeof e.id === "string" ? e.id : "";
2050
- const fromNode = typeof e.fromNode === "string" ? e.fromNode : "";
2051
- const toNode = typeof e.toNode === "string" ? e.toNode : "";
2052
- if (!fromNode || !toNode)
2053
- continue;
2054
- edges.push({
2055
- id,
2056
- from_node: fromNode,
2057
- ...(typeof e.fromSide === "string" ? { from_side: e.fromSide } : {}),
2058
- to_node: toNode,
2059
- ...(typeof e.toSide === "string" ? { to_side: e.toSide } : {}),
2060
- ...(typeof e.label === "string" ? { label: e.label } : {}),
2061
- ...(typeof e.color === "string" ? { color: e.color } : {})
2062
- });
2063
- }
2064
- }
2065
- const stat = await vault.stat(abs);
2066
- return {
2067
- path: rel,
2068
- name: path.basename(rel).replace(/\.canvas$/i, ""),
2069
- size_bytes: stat.size,
2070
- mtime: new Date(stat.mtimeMs).toISOString(),
2071
- nodes,
2072
- edges,
2073
- summary,
2074
- broken_file_refs: brokenRefs
2075
- };
2076
- }
2077
- const tfidfCache = new WeakMap();
2078
- const STOP_WORDS = new Set([
2079
- "a",
2080
- "an",
2081
- "and",
2082
- "are",
2083
- "as",
2084
- "at",
2085
- "be",
2086
- "but",
2087
- "by",
2088
- "for",
2089
- "from",
2090
- "has",
2091
- "have",
2092
- "if",
2093
- "in",
2094
- "is",
2095
- "it",
2096
- "its",
2097
- "of",
2098
- "on",
2099
- "or",
2100
- "that",
2101
- "the",
2102
- "this",
2103
- "to",
2104
- "was",
2105
- "were",
2106
- "will",
2107
- "with",
2108
- "i",
2109
- "you",
2110
- "we",
2111
- "they",
2112
- "he",
2113
- "she",
2114
- "not",
2115
- "no",
2116
- "do",
2117
- "does",
2118
- "did",
2119
- "had",
2120
- "been",
2121
- "being",
2122
- "so",
2123
- "than",
2124
- "then",
2125
- "there",
2126
- "their",
2127
- "them",
2128
- "these",
2129
- "those",
2130
- "what",
2131
- "when",
2132
- "where",
2133
- "which",
2134
- "who",
2135
- "why",
2136
- "how"
2137
- ]);
2138
- // v2.1.0: detect Chinese / Japanese / Thai / Khmer / Lao via script ranges.
2139
- // These languages don't use spaces between words, so the Unicode-regex
2140
- // tokenizer falls back to character-level (or huge multi-word tokens),
2141
- // which tanks BM25 + TF-IDF precision. Intl.Segmenter (Node 16+ ICU)
2142
- // gives word-break per language. Detection is per-document, branching the
2143
- // tokenizer.
2144
- const CJK_OR_THAI_RANGES = /[぀-ヿ㐀-䶿一-鿿가-힯฀-๿ༀ-࿿ក-៿]/;
2145
- function tokenizeForTfidf(text) {
2146
- // v1.11.1: Unicode-aware tokenizer. The previous ASCII-only regex
2147
- // (`/[a-z0-9][a-z0-9_-]*/g`) silently dropped Cyrillic, Greek, CJK,
2148
- // Hebrew, Arabic, and any non-Latin content from the TF-IDF index.
2149
- // `\p{L}` matches any Unicode letter; `\p{N}` matches any Unicode number.
2150
- //
2151
- // v2.1.0: when the text contains CJK / Thai / Khmer / Lao chars (no-
2152
- // whitespace scripts), use Intl.Segmenter for proper word-break first,
2153
- // then run the Unicode regex per-segment. This produces real word tokens
2154
- // instead of "認可サーバーがアクセストークン" as a single 12-char token
2155
- // that the length filter would drop.
2156
- const lower = text.toLowerCase();
2157
- const out = [];
2158
- if (CJK_OR_THAI_RANGES.test(lower) && typeof Intl !== "undefined" && typeof Intl.Segmenter !== "undefined") {
2159
- const segmenter = new Intl.Segmenter(undefined, { granularity: "word" });
2160
- for (const seg of segmenter.segment(lower)) {
2161
- if (!seg.isWordLike)
2162
- continue;
2163
- const t = seg.segment;
2164
- if (t.length < 1)
2165
- continue;
2166
- if (t.length > 40)
2167
- continue;
2168
- if (STOP_WORDS.has(t))
2169
- continue;
2170
- out.push(t);
2171
- }
2172
- return out;
2173
- }
2174
- for (const m of lower.matchAll(/[\p{L}\p{N}][\p{L}\p{N}_-]*/gu)) {
2175
- const t = m[0];
2176
- if (t.length < 2)
2177
- continue;
2178
- if (t.length > 40)
2179
- continue;
2180
- if (STOP_WORDS.has(t))
2181
- continue;
2182
- out.push(t);
2183
- }
2184
- return out;
2185
- }
2186
- async function buildTfidfIndex(vault) {
2187
- const entries = await vault.listMarkdown();
2188
- const cached = tfidfCache.get(vault);
2189
- if (cached &&
2190
- cached.entriesRef.length === entries.length &&
2191
- cached.entriesRef.every((e, i) => entries[i]?.relPath === e.relPath && entries[i]?.mtimeMs === e.mtimeMs)) {
2192
- return cached;
2193
- }
2194
- const rawDocs = [];
2195
- const docFreq = new Map();
2196
- for (const e of entries) {
2197
- const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
2198
- const tokens = tokenizeForTfidf(parsed.body);
2199
- const tf = new Map();
2200
- for (const t of tokens)
2201
- tf.set(t, (tf.get(t) ?? 0) + 1);
2202
- rawDocs.push({ entry: e, tf });
2203
- for (const t of tf.keys())
2204
- docFreq.set(t, (docFreq.get(t) ?? 0) + 1);
2205
- }
2206
- // Smoothed IDF: ln(1 + N / (1 + df)). Smoothing keeps every-doc terms
2207
- // non-zero and tames inflation on small vaults.
2208
- const N = rawDocs.length || 1;
2209
- const idf = new Map();
2210
- for (const [term, df] of docFreq) {
2211
- idf.set(term, Math.log(1 + N / (1 + df)));
2212
- }
2213
- const docs = [];
2214
- for (const r of rawDocs) {
2215
- const weights = new Map();
2216
- let normSq = 0;
2217
- for (const [term, count] of r.tf) {
2218
- const w = (1 + Math.log(count)) * (idf.get(term) ?? 0);
2219
- if (w === 0)
2220
- continue;
2221
- weights.set(term, w);
2222
- normSq += w * w;
2223
- }
2224
- const norm = Math.sqrt(normSq);
2225
- if (norm > 0) {
2226
- for (const [t, w] of weights)
2227
- weights.set(t, w / norm);
2228
- }
2229
- docs.push({
2230
- relPath: r.entry.relPath,
2231
- basename: r.entry.basename,
2232
- mtimeMs: r.entry.mtimeMs,
2233
- weights
2234
- });
2235
- }
2236
- const result = { docs, idf, entriesRef: entries };
2237
- tfidfCache.set(vault, result);
2238
- return result;
2239
- }
2240
- export async function semanticSearch(vault, args) {
2241
- await vault.ensureExists();
2242
- const limit = args.limit ?? 10;
2243
- const minScore = args.min_score ?? 0.05;
2244
- if (!args.query.trim())
2245
- throw new Error("query must not be empty");
2246
- const { docs, idf } = await buildTfidfIndex(vault);
2247
- // Vectorize query: same tokenization, IDF from the corpus, L2 normalize.
2248
- const qTokens = tokenizeForTfidf(args.query);
2249
- const qTf = new Map();
2250
- for (const t of qTokens)
2251
- qTf.set(t, (qTf.get(t) ?? 0) + 1);
2252
- const qWeights = new Map();
2253
- let qNormSq = 0;
2254
- for (const [t, count] of qTf) {
2255
- const w = (1 + Math.log(count)) * (idf.get(t) ?? 0);
2256
- if (w === 0)
2257
- continue;
2258
- qWeights.set(t, w);
2259
- qNormSq += w * w;
2260
- }
2261
- const qNorm = Math.sqrt(qNormSq);
2262
- if (qNorm > 0) {
2263
- for (const [t, w] of qWeights)
2264
- qWeights.set(t, w / qNorm);
2265
- }
2266
- // Cosine = Σ q[t]·d[t] over shared terms (both vectors are L2-normed).
2267
- const folderPrefix = args.folder ? `${args.folder.replace(/\/+$/, "")}/` : null;
2268
- const scored = [];
2269
- for (const doc of docs) {
2270
- if (folderPrefix && !doc.relPath.startsWith(folderPrefix) && doc.relPath !== args.folder)
2271
- continue;
2272
- let s = 0;
2273
- const matched = [];
2274
- for (const [t, qw] of qWeights) {
2275
- const dw = doc.weights.get(t);
2276
- if (dw !== undefined) {
2277
- s += qw * dw;
2278
- matched.push(t);
2279
- }
2280
- }
2281
- if (s < minScore)
2282
- continue;
2283
- scored.push({ doc, score: s, matchedTerms: matched });
2284
- }
2285
- scored.sort((a, b) => b.score - a.score);
2286
- const matches = [];
2287
- for (const { doc, score, matchedTerms } of scored.slice(0, limit)) {
2288
- matchedTerms.sort((a, b) => (idf.get(b) ?? 0) - (idf.get(a) ?? 0));
2289
- // v1.8.1 fix: snippet was being built from `content` (full file with
2290
- // frontmatter), so a matched term that lived in the YAML block could leak
2291
- // YAML keys/values into the response. Use `parsed.body` instead — TF-IDF
2292
- // is built from body too, so the indexOf below is guaranteed to land if
2293
- // the term contributed to the cosine score.
2294
- const { parsed } = await vault.readNote(vault.resolveInside(doc.relPath), doc.mtimeMs);
2295
- const body = parsed.body;
2296
- let snippetText = "";
2297
- for (const t of matchedTerms) {
2298
- const idx = body.toLowerCase().indexOf(t);
2299
- if (idx >= 0) {
2300
- const { snippet } = sliceSnippet(body, idx, t.length);
2301
- snippetText = snippet;
2302
- break;
2303
- }
2304
- }
2305
- matches.push({
2306
- path: doc.relPath,
2307
- title: stripMd(doc.basename),
2308
- score: Math.round(score * 10000) / 10000,
2309
- snippet: snippetText,
2310
- matched_terms: matchedTerms.slice(0, 8),
2311
- mtime: new Date(doc.mtimeMs).toISOString()
2312
- });
2313
- }
2314
- return { query: args.query, total_docs: docs.length, method: "tfidf-cosine", matches };
2315
- }
2316
- /**
2317
- * v3.1.0 — pick the text that should be embedded for an embeddings-search
2318
- * call. HyDE-augmented retrieval prefers the agent-supplied
2319
- * `hypothetical_answer` (Gao et al 2023); falls back to the raw query
2320
- * when that's absent / empty / whitespace-only.
2321
- *
2322
- * Pure helper so we can unit-test the decision in isolation (the real
2323
- * `embeddingsSearch` function loads the @huggingface/transformers
2324
- * embedder, which is out of scope for unit tests).
2325
- */
2326
- export function pickEmbedTextForHyde(args) {
2327
- const ha = args.hypothetical_answer?.trim() ?? "";
2328
- if (ha.length > 0)
2329
- return { text: ha, usedHyde: true };
2330
- return { text: args.query, usedHyde: false };
2331
- }
2332
- export async function embeddingsSearch(vault, args, embedFile, hnsw) {
2333
- await vault.ensureExists();
2334
- if (!args.query.trim())
2335
- throw new Error("query must not be empty");
2336
- // v3.1.0 — pick the actual text to embed. HyDE prefers the
2337
- // hypothetical answer when present; otherwise fall back to the query.
2338
- const { text: embedText, usedHyde } = pickEmbedTextForHyde(args);
2339
- const limit = args.limit ?? 10;
2340
- const minScore = args.min_score ?? 0.3;
2341
- // Lazy-load embed-db + embeddings only when the tool is actually called.
2342
- const [{ EmbedDb }, { loadEmbedder, resolveModel }] = await Promise.all([
2343
- import("./embed-db.js"),
2344
- import("./embeddings.js")
2345
- ]);
2346
- // Verify the embed db exists before doing anything heavy. This separates
2347
- // "user hasn't built the index yet" from "model failed to load".
2348
- const fsMod = await import("node:fs");
2349
- if (!fsMod.existsSync(embedFile)) {
2350
- throw new Error(`Embedding index not found at ${embedFile}. ` +
2351
- `Run: enquire-mcp build-embeddings --vault ${vault.root} ` +
2352
- `(first-time setup also needs: enquire-mcp install-model multilingual)`);
2353
- }
2354
- const model = resolveModel(args.model);
2355
- const db = new EmbedDb({
2356
- file: embedFile,
2357
- vaultRoot: vault.root,
2358
- modelAlias: model.alias,
2359
- dim: model.dim
2360
- });
2361
- await db.open();
2362
- try {
2363
- const total = db.totalChunks();
2364
- if (total === 0) {
2365
- return { query: args.query, method: "embeddings-cosine", model: model.alias, total_chunks: 0, matches: [] };
2366
- }
2367
- const embedder = await loadEmbedder(args.model);
2368
- const [qVec] = await embedder.embed([embedText]);
2369
- if (!qVec)
2370
- throw new Error("Embedder returned no vectors for the query");
2371
- // v2.0.0-beta.2 P0 fix: filter excluded paths from the embedding-index
2372
- // hits BEFORE returning. The persistent .embed.db is built once and may
2373
- // contain entries for paths now excluded by --exclude-glob / --read-paths
2374
- // (added between build-embeddings and serve, or between two serve runs).
2375
- // Pre-fix, those entries leaked through `text_preview` and `rel_path`,
2376
- // bypassing the privacy contract — same shape as the writeNote bug.
2377
- // We over-fetch by 2× to keep top-K stable when many hits get filtered.
2378
- const overFetch = limit * 2;
2379
- let rawHits;
2380
- if (hnsw) {
2381
- // v2.13.0 — HNSW path. Sub-10ms top-K at any scale. We over-fetch
2382
- // slightly more (3×) than brute-force because HNSW can occasionally
2383
- // miss a true nearest neighbor; the privacy filter then pares down.
2384
- const k = Math.min(Math.max(overFetch * 2, 30), Math.max(hnsw.rowByLabel.size, 1));
2385
- const result = hnsw.index.searchKnn(qVec, k, hnsw.ef !== undefined ? { ef: hnsw.ef } : undefined);
2386
- const { hnswResultsToHits } = await import("./hnsw.js");
2387
- rawHits = hnswResultsToHits(result, hnsw.rowByLabel);
2388
- // HNSW returns scores in [-1, 1] like brute-force cosine. Apply the
2389
- // same min_score floor + folder filter brute-force does.
2390
- if (args.folder) {
2391
- const prefix = `${args.folder.replace(/\/+$/, "")}/`;
2392
- rawHits = rawHits.filter((h) => h.rel_path.startsWith(prefix));
2393
- }
2394
- rawHits = rawHits.filter((h) => h.score >= minScore);
2395
- }
2396
- else {
2397
- rawHits = db.search(qVec, overFetch, { folder: args.folder, minScore });
2398
- }
2399
- const hits = rawHits.filter((h) => !vault.isExcluded(h.rel_path)).slice(0, limit);
2400
- const matches = hits.map((h) => ({
2401
- path: h.rel_path,
2402
- title: stripMd(path.basename(h.rel_path)),
2403
- score: Math.round(h.score * 10000) / 10000,
2404
- snippet: h.text_preview.slice(0, 240),
2405
- chunk_index: h.chunk_index,
2406
- line_start: h.line_start,
2407
- line_end: h.line_end,
2408
- kind: h.kind
2409
- }));
2410
- return {
2411
- query: args.query,
2412
- method: "embeddings-cosine",
2413
- model: model.alias,
2414
- total_chunks: total,
2415
- matches,
2416
- ...(usedHyde ? { hyde: true } : {})
2417
- };
2418
- }
2419
- finally {
2420
- db.close();
2421
- }
2422
- }
2423
- export async function searchHybrid(vault, args, ctx) {
2424
- await vault.ensureExists();
2425
- if (!args.query.trim())
2426
- throw new Error("query must not be empty");
2427
- const limit = args.limit ?? 10;
2428
- const minSignals = args.min_signals ?? 1;
2429
- const granularity = args.granularity ?? "note";
2430
- // Fan-out per-ranker top-K. Bigger than user's `limit` so RRF has room
2431
- // to surface a doc that's mid-rank in one signal but top in another.
2432
- const fanOutK = Math.max(50, limit * 5);
2433
- const [{ reciprocalRankFusion, RRF_K }, { existsSync }] = await Promise.all([import("./rrf.js"), import("node:fs")]);
2434
- // v2.0.0-beta.2 P1 fix: collect per-signal errors for response-side observability.
2435
- const signalErrors = {};
2436
- const signalsUsed = [];
2437
- // ─── BM25 (FTS5) ────────────────────────────────────────────────────────
2438
- // Note-level: collapse multi-chunk hits to the best rank per note.
2439
- let bm25Ranked = [];
2440
- if (ctx.ftsIndex) {
2441
- try {
2442
- // v2.0.0-beta.2 P0 fix: filter excluded paths from FTS5 hits BEFORE
2443
- // chunk-collapse + RRF. The .fts5.db can contain entries from when the
2444
- // index was built without exclusion flags (or with different flags).
2445
- // Pre-fix, BM25 search returned excluded chunks via the hybrid pipeline.
2446
- const rawFtsHits = ctx.ftsIndex.search(args.query, { limit: fanOutK, folder: args.folder });
2447
- const ftsHits = rawFtsHits.filter((h) => !vault.isExcluded(h.rel_path));
2448
- // v2.2.0: granularity branch.
2449
- // "note" → collapse multi-chunk hits per note (best-rank wins),
2450
- // RRF fuses on path key.
2451
- // "block" → keep each chunk distinct, RRF fuses on `path#chunk_index`.
2452
- if (granularity === "block") {
2453
- bm25Ranked = ftsHits.map((h, i) => ({
2454
- id: `${h.rel_path}#${h.chunk_index}`,
2455
- rank: i + 1,
2456
- score: h.score,
2457
- snippet: h.snippet,
2458
- chunk_index: h.chunk_index,
2459
- line_start: h.line_start,
2460
- line_end: h.line_end,
2461
- kind: h.kind
2462
- }));
2463
- }
2464
- else {
2465
- const bestPerNote = new Map();
2466
- ftsHits.forEach((h, i) => {
2467
- const existing = bestPerNote.get(h.rel_path);
2468
- if (!existing || i < existing.rank) {
2469
- bestPerNote.set(h.rel_path, {
2470
- score: h.score,
2471
- rank: i + 1,
2472
- snippet: h.snippet,
2473
- chunk_index: h.chunk_index,
2474
- line_start: h.line_start,
2475
- line_end: h.line_end,
2476
- kind: h.kind
2477
- });
2478
- }
2479
- });
2480
- bm25Ranked = Array.from(bestPerNote.entries()).map(([id, b]) => ({
2481
- id,
2482
- rank: b.rank,
2483
- score: b.score,
2484
- snippet: b.snippet,
2485
- chunk_index: b.chunk_index,
2486
- line_start: b.line_start,
2487
- line_end: b.line_end,
2488
- kind: b.kind
2489
- }));
2490
- // Re-sort to ensure 1-based ranks are consecutive after dedup.
2491
- bm25Ranked.sort((a, b) => a.rank - b.rank);
2492
- for (let i = 0; i < bm25Ranked.length; i++) {
2493
- const hit = bm25Ranked[i];
2494
- if (hit)
2495
- hit.rank = i + 1;
2496
- }
2497
- }
2498
- if (bm25Ranked.length > 0)
2499
- signalsUsed.push("bm25");
2500
- }
2501
- catch (err) {
2502
- const msg = err instanceof Error ? err.message : String(err);
2503
- signalErrors.bm25 = msg;
2504
- process.stderr.write(`obsidian_search: BM25 ranker failed — ${msg}\n`);
2505
- }
2506
- }
2507
- // ─── TF-IDF ─────────────────────────────────────────────────────────────
2508
- // Always available (in-memory, no native deps).
2509
- let tfidfRanked = [];
2510
- try {
2511
- const tfidf = await semanticSearch(vault, {
2512
- query: args.query,
2513
- folder: args.folder,
2514
- limit: fanOutK,
2515
- min_score: 0.05
2516
- });
2517
- tfidfRanked = tfidf.matches.map((m, i) => ({
2518
- id: m.path,
2519
- rank: i + 1,
2520
- score: m.score,
2521
- snippet: m.snippet
2522
- }));
2523
- if (tfidfRanked.length > 0)
2524
- signalsUsed.push("tfidf");
2525
- }
2526
- catch (err) {
2527
- const msg = err instanceof Error ? err.message : String(err);
2528
- signalErrors.tfidf = msg;
2529
- process.stderr.write(`obsidian_search: TF-IDF ranker failed — ${msg}\n`);
2530
- }
2531
- // ─── ML embeddings (if .embed.db exists) ────────────────────────────────
2532
- let embedRanked = [];
2533
- if (existsSync(ctx.embedFile)) {
2534
- try {
2535
- // v2.0.0-beta.1 P1 fix: pass `min_score: 0` to fan-out the embeddings
2536
- // ranker uniformly with BM25 (no floor) and TF-IDF (0.05 floor). The
2537
- // user-facing precision filter happens AFTER fusion via `min_signals`,
2538
- // not before — pre-fix, embeddings used the standalone tool's 0.3
2539
- // default which silently shrank the embedding-side candidate pool and
2540
- // starved RRF of cross-signal evidence.
2541
- const embed = await embeddingsSearch(vault, { query: args.query, folder: args.folder, limit: fanOutK, model: args.embedding_model, min_score: 0 }, ctx.embedFile, ctx.hnsw);
2542
- // v2.2.0: granularity branch — same shape as BM25 above.
2543
- if (granularity === "block") {
2544
- embedRanked = embed.matches.map((m, i) => ({
2545
- id: `${m.path}#${m.chunk_index ?? 0}`,
2546
- rank: i + 1,
2547
- score: m.score,
2548
- snippet: m.snippet,
2549
- chunk_index: m.chunk_index,
2550
- line_start: m.line_start,
2551
- line_end: m.line_end,
2552
- kind: m.kind
2553
- }));
2554
- }
2555
- else {
2556
- const bestPerNote = new Map();
2557
- embed.matches.forEach((m, i) => {
2558
- const existing = bestPerNote.get(m.path);
2559
- if (!existing || i < existing.rank) {
2560
- bestPerNote.set(m.path, {
2561
- score: m.score,
2562
- rank: i + 1,
2563
- snippet: m.snippet,
2564
- chunk_index: m.chunk_index,
2565
- line_start: m.line_start,
2566
- line_end: m.line_end,
2567
- kind: m.kind
2568
- });
2569
- }
2570
- });
2571
- embedRanked = Array.from(bestPerNote.entries()).map(([id, b]) => ({
2572
- id,
2573
- rank: b.rank,
2574
- score: b.score,
2575
- snippet: b.snippet,
2576
- chunk_index: b.chunk_index,
2577
- line_start: b.line_start,
2578
- line_end: b.line_end,
2579
- kind: b.kind
2580
- }));
2581
- embedRanked.sort((a, b) => a.rank - b.rank);
2582
- for (let i = 0; i < embedRanked.length; i++) {
2583
- const hit = embedRanked[i];
2584
- if (hit)
2585
- hit.rank = i + 1;
2586
- }
2587
- }
2588
- if (embedRanked.length > 0)
2589
- signalsUsed.push("embeddings");
2590
- }
2591
- catch (err) {
2592
- const msg = err instanceof Error ? err.message : String(err);
2593
- signalErrors.embeddings = msg;
2594
- process.stderr.write(`obsidian_search: embeddings ranker failed — ${msg}\n`);
2595
- }
2596
- }
2597
- // ─── RRF fusion ─────────────────────────────────────────────────────────
2598
- const fused = reciprocalRankFusion({
2599
- bm25: bm25Ranked.map((h) => ({ id: h.id, rank: h.rank, score: h.score })),
2600
- tfidf: tfidfRanked.map((h) => ({ id: h.id, rank: h.rank, score: h.score })),
2601
- embeddings: embedRanked.map((h) => ({ id: h.id, rank: h.rank, score: h.score }))
2602
- }, { topK: Math.max(limit * 4, 30) } // overshoot — graph boost may rerank
2603
- );
2604
- // ─── v2.3.0: Wikilink graph-boost ───────────────────────────────────────
2605
- // Re-rank top-K by counting how many *other* top-K hits link to each one.
2606
- // Equivalent to a 1-step personalised PageRank seeded by the fused top-K.
2607
- // Boost is small (α=0.005) — enough to break ties but won't override
2608
- // strong single-ranker signals. Requires no new index — uses already-
2609
- // cached parsed wikilinks per note.
2610
- // This is the "only enquire-mcp does this" feature: generic vector stores
2611
- // can't do this without an Obsidian-aware layer; Smart Connections doesn't
2612
- // do it either. Wikilinks ARE the differentiating Obsidian primitive.
2613
- const graphBoost = args.graph_boost !== false; // default ON
2614
- if (graphBoost && fused.length > 1) {
2615
- const candidatePaths = new Set();
2616
- for (const f of fused) {
2617
- candidatePaths.add(f.id.includes("#") ? (f.id.split("#")[0] ?? f.id) : f.id);
2618
- }
2619
- const outLinks = new Map();
2620
- for (const candidatePath of candidatePaths) {
2621
- try {
2622
- const note = await vault.readNote(vault.resolveInside(candidatePath));
2623
- const targets = new Set();
2624
- for (const wl of note.parsed.wikilinks) {
2625
- if (!wl.target)
2626
- continue;
2627
- // Wikilinks can be by basename ("Foo") or relative path ("Sub/Foo").
2628
- // Normalize both forms so the membership test catches either.
2629
- targets.add(wl.target);
2630
- targets.add(stripMd(wl.target));
2631
- }
2632
- outLinks.set(candidatePath, targets);
2633
- }
2634
- catch {
2635
- // skip unreadable notes
2636
- }
2637
- }
2638
- const ALPHA = 0.005;
2639
- for (const f of fused) {
2640
- const fPath = f.id.includes("#") ? (f.id.split("#")[0] ?? f.id) : f.id;
2641
- const fBasename = stripMd(path.basename(fPath));
2642
- let inDegree = 0;
2643
- for (const [otherPath, targets] of outLinks) {
2644
- if (otherPath === fPath)
2645
- continue;
2646
- if (targets.has(fPath) || targets.has(stripMd(fPath)) || targets.has(fBasename)) {
2647
- inDegree += 1;
2648
- }
2649
- }
2650
- if (inDegree > 0)
2651
- f.score += ALPHA * inDegree;
2652
- }
2653
- fused.sort((a, b) => b.score - a.score);
2654
- }
2655
- // Build snippet/chunk lookup tables for attaching the best evidence per
2656
- // note in the final response.
2657
- const bm25Map = new Map(bm25Ranked.map((h) => [h.id, h]));
2658
- const tfidfMap = new Map(tfidfRanked.map((h) => [h.id, h]));
2659
- const embedMap = new Map(embedRanked.map((h) => [h.id, h]));
2660
- // ─── v2.9.0: Cross-encoder reranking (post-RRF, post-graph-boost) ────────
2661
- // Take the top-N fused candidates, score each (query, snippet) pair with a
2662
- // BGE-style cross-encoder, and re-sort. Cross-encoder is far more accurate
2663
- // than bi-encoder cosine for relevance ranking — it sees query+document
2664
- // interaction directly. ~30-50ms per query overhead on M1 CPU at N=50.
2665
- //
2666
- // Failures are caught and surfaced as `signal_errors.reranker` so a model
2667
- // load problem doesn't poison the whole search response. The fused order
2668
- // (RRF + graph-boost) is preserved if reranking fails.
2669
- let rerankerScores = null;
2670
- if ((ctx.reranker || ctx.rerankerOverride) && fused.length > 0) {
2671
- const topN = ctx.reranker?.topN ?? 50;
2672
- const rerankBatch = fused.slice(0, topN);
2673
- try {
2674
- // Prefer the test-injected reranker when present; otherwise lazy-load.
2675
- let reranker;
2676
- if (ctx.rerankerOverride) {
2677
- reranker = ctx.rerankerOverride;
2678
- }
2679
- else {
2680
- const { loadReranker } = await import("./embeddings.js");
2681
- reranker = await loadReranker(ctx.reranker?.alias);
2682
- }
2683
- // For each candidate, find the best snippet (BM25 > embeddings > TF-IDF)
2684
- // and pair it with the query. Empty-snippet candidates go to the bottom
2685
- // by getting a -Infinity score (sort below scored candidates).
2686
- const passages = rerankBatch.map((f) => {
2687
- const bm = bm25Map.get(f.id);
2688
- const emb = embedMap.get(f.id);
2689
- const tf = tfidfMap.get(f.id);
2690
- const snippet = bm?.snippet ?? emb?.snippet ?? tf?.snippet ?? "";
2691
- // Strip FTS5 «…» highlight markers — they're cosmetic and the
2692
- // reranker should see clean prose. Limit to ~600 chars to stay
2693
- // safely under the model's 512-token budget (rough char/token ratio
2694
- // varies by language; 600 chars ≈ 200 tokens for English / Cyrillic
2695
- // per the multilingual model's tokenizer, well under 512).
2696
- return snippet.replace(/[«»]/g, "").slice(0, 600);
2697
- });
2698
- const scores = await reranker.score(args.query, passages);
2699
- rerankerScores = new Map();
2700
- for (let i = 0; i < rerankBatch.length; i++) {
2701
- const f = rerankBatch[i];
2702
- const s = scores[i];
2703
- if (f && typeof s === "number")
2704
- rerankerScores.set(f.id, s);
2705
- }
2706
- // Sort the top-N by reranker score; everything below top-N keeps RRF
2707
- // order. We do this by re-ordering fused[0..topN] in place.
2708
- const reordered = [...rerankBatch].sort((a, b) => {
2709
- const sa = rerankerScores?.get(a.id) ?? -Infinity;
2710
- const sb = rerankerScores?.get(b.id) ?? -Infinity;
2711
- return sb - sa;
2712
- });
2713
- for (let i = 0; i < reordered.length; i++) {
2714
- fused[i] = reordered[i];
2715
- }
2716
- }
2717
- catch (err) {
2718
- const msg = err instanceof Error ? err.message : String(err);
2719
- // Add to signalErrors so it surfaces in the response. Reranker is not
2720
- // a "signal" per se but the existing dict is the right home.
2721
- signalErrors.reranker = msg;
2722
- process.stderr.write(`obsidian_search: reranker failed — ${msg}\n`);
2723
- }
2724
- }
2725
- const matches = [];
2726
- for (const f of fused) {
2727
- const numSignals = Object.keys(f.per_signal).length;
2728
- if (numSignals < minSignals)
2729
- continue;
2730
- // Snippet preference: BM25 > embeddings > TF-IDF (BM25 snippets bracket
2731
- // the matched terms with «…», highest signal-to-noise).
2732
- const bm = bm25Map.get(f.id);
2733
- const emb = embedMap.get(f.id);
2734
- const tf = tfidfMap.get(f.id);
2735
- const bestEvidence = bm ?? emb ?? tf;
2736
- // Build per_signal as a Partial — only include keys that actually
2737
- // contributed. Setting `key: undefined` keeps the key visible in
2738
- // Object.keys() and JSON.stringify, which leaks "this signal exists
2739
- // but didn't match" instead of "this signal wasn't even running".
2740
- const perSignal = {};
2741
- if (f.per_signal.bm25)
2742
- perSignal.bm25 = { rank: f.per_signal.bm25.rank, score: f.per_signal.bm25.score };
2743
- if (f.per_signal.tfidf)
2744
- perSignal.tfidf = { rank: f.per_signal.tfidf.rank, score: f.per_signal.tfidf.score };
2745
- if (f.per_signal.embeddings) {
2746
- perSignal.embeddings = { rank: f.per_signal.embeddings.rank, score: f.per_signal.embeddings.score };
2747
- }
2748
- // v2.2.0: when granularity is "block", f.id is "path#chunk_index" — split
2749
- // back into path + chunk_index for the response. When "note", f.id is
2750
- // just the path.
2751
- let pathPart = f.id;
2752
- let chunkFromId;
2753
- if (granularity === "block") {
2754
- const hashIdx = f.id.lastIndexOf("#");
2755
- if (hashIdx > 0) {
2756
- pathPart = f.id.slice(0, hashIdx);
2757
- const parsed = Number.parseInt(f.id.slice(hashIdx + 1), 10);
2758
- if (Number.isInteger(parsed) && parsed >= 0)
2759
- chunkFromId = parsed;
2760
- }
2761
- }
2762
- // v2.8.0: derive content-source kind. BM25 / embeddings hits carry it
2763
- // explicitly; TF-IDF doesn't (it only runs over markdown). Either
2764
- // ranker reporting "pdf" wins; otherwise fall back to "md".
2765
- const kind = bm?.kind === "pdf" || emb?.kind === "pdf" ? "pdf" : "md";
2766
- // For PDFs, the title is best derived from the filename without
2767
- // `.md`-stripping (PDFs don't have that extension); use the .pdf-stripped
2768
- // form so titles read naturally in agent output.
2769
- const baseName = path.basename(pathPart);
2770
- const title = kind === "pdf" ? baseName.replace(/\.pdf$/i, "") : stripMd(baseName);
2771
- const rerankerScore = rerankerScores?.get(f.id);
2772
- matches.push({
2773
- path: pathPart,
2774
- title,
2775
- score: Math.round(f.score * 100000) / 100000,
2776
- snippet: bestEvidence?.snippet ?? "",
2777
- chunk_index: chunkFromId ?? bm?.chunk_index ?? emb?.chunk_index,
2778
- line_start: bm?.line_start ?? emb?.line_start,
2779
- line_end: bm?.line_end ?? emb?.line_end,
2780
- kind,
2781
- per_signal: perSignal,
2782
- ...(typeof rerankerScore === "number" && Number.isFinite(rerankerScore)
2783
- ? { reranker_score: Math.round(rerankerScore * 100000) / 100000 }
2784
- : {})
2785
- });
2786
- if (matches.length >= limit)
2787
- break;
2788
- }
2789
- // v2.0.0-beta.2 P1 fix: surface signal_errors only when at least one
2790
- // ranker actually failed. Omit the key when all signals ran cleanly so
2791
- // happy-path responses stay narrow.
2792
- const response = {
2793
- query: args.query,
2794
- method: "rrf",
2795
- k: RRF_K,
2796
- signals_used: signalsUsed,
2797
- total_candidates: fused.length,
2798
- matches
2799
- };
2800
- if (Object.keys(signalErrors).length > 0) {
2801
- response.signal_errors = signalErrors;
2802
- }
2803
- return response;
2804
- }
2805
- export async function contextPack(vault, args, ctx) {
2806
- await vault.ensureExists();
2807
- if (!args.query?.trim())
2808
- throw new Error("context_pack: `query` is required");
2809
- const budget = args.budget_tokens ?? 4000;
2810
- const charBudget = budget * 4; // ~4 chars/token
2811
- const includeBacklinks = args.include_backlinks !== false;
2812
- const recentN = Math.max(0, args.recent_dailies ?? 0);
2813
- // 1) Hybrid retrieval — top-K notes
2814
- const search = await searchHybrid(vault, { query: args.query, folder: args.folder, limit: 10 }, { ftsIndex: ctx.ftsIndex, embedFile: ctx.embedFile });
2815
- const sections = [`# Context for: ${args.query}\n`];
2816
- const includedNotes = [];
2817
- let charsUsed = sections[0]?.length ?? 0;
2818
- let notesBytes = 0;
2819
- let backlinksBytes = 0;
2820
- let dailiesBytes = 0;
2821
- // 2) Pack note bodies until budget exhausted
2822
- sections.push("## Top notes");
2823
- for (const m of search.matches) {
2824
- if (charsUsed >= charBudget)
2825
- break;
2826
- try {
2827
- const note = await vault.readNote(vault.resolveInside(m.path), undefined);
2828
- const body = note.parsed.body.trim();
2829
- const headerLen = m.path.length + 5;
2830
- const remaining = charBudget - charsUsed;
2831
- // Truncate body to fit remaining budget for THIS note (~50% of remainder
2832
- // so we leave room for backlinks + dailies).
2833
- const noteCap = Math.min(body.length, Math.max(500, Math.floor(remaining * 0.5)));
2834
- const trimmed = body.length <= noteCap ? body : `${body.slice(0, noteCap)}\n\n[…truncated…]`;
2835
- const block = `### ${m.path}\n\n${trimmed}\n`;
2836
- sections.push(block);
2837
- charsUsed += block.length + headerLen;
2838
- notesBytes += block.length;
2839
- includedNotes.push(m.path);
2840
- }
2841
- catch {
2842
- // skip unreadable notes
2843
- }
2844
- }
2845
- // 3) 1-line backlink summaries for top-3
2846
- if (includeBacklinks && includedNotes.length > 0 && charsUsed < charBudget) {
2847
- sections.push("## Backlinks");
2848
- let backlinksAdded = 0;
2849
- for (const notePath of includedNotes.slice(0, 3)) {
2850
- if (charsUsed >= charBudget)
2851
- break;
2852
- try {
2853
- const links = await getBacklinks(vault, { path: notePath, limit: 5 });
2854
- if (links.length > 0) {
2855
- const block = `### → ${notePath}\n${links.map((l) => `- ${l.path} : ${(l.snippets[0] ?? "").slice(0, 80)}`).join("\n")}\n`;
2856
- sections.push(block);
2857
- charsUsed += block.length;
2858
- backlinksBytes += block.length;
2859
- backlinksAdded += links.length;
2860
- }
2861
- }
2862
- catch {
2863
- // skip
2864
- }
2865
- }
2866
- if (backlinksAdded === 0)
2867
- sections.pop(); // remove empty heading
2868
- }
2869
- // 4) Recent daily notes
2870
- if (recentN > 0 && charsUsed < charBudget) {
2871
- try {
2872
- const recent = await getRecentEdits(vault, { since_minutes: 60 * 24 * 7, limit: recentN, folder: args.folder });
2873
- const dailies = recent.filter((r) => /\d{4}-\d{2}-\d{2}/.test(r.path));
2874
- if (dailies.length > 0) {
2875
- sections.push(`## Recent (${dailies.length} dailies, last 7 days)`);
2876
- for (const d of dailies) {
2877
- if (charsUsed >= charBudget)
2878
- break;
2879
- const block = `- ${d.path} (${d.mtime})`;
2880
- sections.push(block);
2881
- charsUsed += block.length;
2882
- dailiesBytes += block.length;
2883
- }
2884
- }
2885
- }
2886
- catch {
2887
- // skip
2888
- }
2889
- }
2890
- const bundle = sections.join("\n");
2891
- return {
2892
- query: args.query,
2893
- bundle,
2894
- estimated_tokens: Math.ceil(bundle.length / 4),
2895
- budget_tokens: budget,
2896
- sections: { notes: notesBytes, backlinks: backlinksBytes, dailies: dailiesBytes },
2897
- included_notes: includedNotes
2898
- };
2899
- }
2900
- // ─── small set / string helpers shared by find_similar / get_note_neighbors ─
2901
- function jaccard(a, b) {
2902
- if (a.size === 0 && b.size === 0)
2903
- return 0;
2904
- let inter = 0;
2905
- for (const x of a)
2906
- if (b.has(x))
2907
- inter += 1;
2908
- const union = a.size + b.size - inter;
2909
- return union === 0 ? 0 : inter / union;
2910
- }
2911
- function intersectionSize(a, b) {
2912
- let n = 0;
2913
- for (const x of a)
2914
- if (b.has(x))
2915
- n += 1;
2916
- return n;
2917
- }
2918
- function ngrams(s, n) {
2919
- const out = new Set();
2920
- if (s.length < n) {
2921
- if (s)
2922
- out.add(s);
2923
- return out;
2924
- }
2925
- for (let i = 0; i <= s.length - n; i++)
2926
- out.add(s.slice(i, i + n));
2927
- return out;
2928
- }
2929
- const entryIndexCache = new WeakMap();
2930
- function indexFor(entries) {
2931
- const cached = entryIndexCache.get(entries);
2932
- if (cached)
2933
- return cached;
2934
- const byBasename = new Map();
2935
- const byRelPath = new Map();
2936
- for (const e of entries) {
2937
- const key = stripMd(e.basename).toLowerCase();
2938
- const slot = byBasename.get(key);
2939
- if (slot)
2940
- slot.push(e);
2941
- else
2942
- byBasename.set(key, [e]);
2943
- byRelPath.set(stripMd(e.relPath).toLowerCase(), e);
2944
- }
2945
- const idx = { byBasename, byRelPath };
2946
- entryIndexCache.set(entries, idx);
2947
- return idx;
2948
- }
2949
- function findBestMatch(entries, target, fromNote) {
2950
- const idx = indexFor(entries);
2951
- if (target.startsWith("./") || target.startsWith("../") || target.includes("/../")) {
2952
- if (fromNote) {
2953
- const fromDir = path.dirname(fromNote);
2954
- const joined = path.posix.normalize(path.posix.join(fromDir.split(path.sep).join("/"), target));
2955
- const lower = stripMd(joined).toLowerCase();
2956
- const rel = idx.byRelPath.get(lower);
2957
- if (rel)
2958
- return rel;
2959
- }
2960
- }
2961
- const norm = stripMd(target).toLowerCase();
2962
- const exact = idx.byBasename.get(norm) ?? [];
2963
- if (exact.length === 1)
2964
- return exact[0] ?? null;
2965
- if (exact.length > 1 && fromNote) {
2966
- const fromDir = path.dirname(fromNote);
2967
- const sameDir = exact.find((e) => path.dirname(e.relPath) === fromDir);
2968
- if (sameDir)
2969
- return sameDir;
2970
- }
2971
- if (exact.length > 0)
2972
- return exact[0] ?? null;
2973
- if (target.includes("/")) {
2974
- const lower = stripMd(target).toLowerCase();
2975
- const path1 = idx.byRelPath.get(lower);
2976
- if (path1)
2977
- return path1;
2978
- // endsWith match — falls back to a scan, but only for path-qualified
2979
- // targets that don't exact-match (rare).
2980
- for (const e of entries) {
2981
- if (stripMd(e.relPath).toLowerCase().endsWith(`/${lower}`))
2982
- return e;
2983
- }
2984
- }
2985
- return null;
2986
- }
2987
- function sliceSnippet(text, idx, qLen) {
2988
- if (idx < 0)
2989
- return { snippet: "", line: 0 };
2990
- const before = Math.max(0, idx - 60);
2991
- const after = Math.min(text.length, idx + qLen + 60);
2992
- let snippet = text.slice(before, after).replace(/\s+/g, " ").trim();
2993
- if (before > 0)
2994
- snippet = `…${snippet}`;
2995
- if (after < text.length)
2996
- snippet = `${snippet}…`;
2997
- const line = text.slice(0, idx).split("\n").length;
2998
- return { snippet, line };
2999
- }
3000
- function stripMd(name) {
3001
- return name.replace(/\.md$/i, "");
3002
- }
3003
- function normalizeTag(t) {
3004
- return t.replace(/^#+/, "").toLowerCase();
3005
- }
3006
- export async function listPdfs(vault, args) {
3007
- await vault.ensureExists();
3008
- const limit = args.limit ?? 100;
3009
- const all = await vault.listFilesByExtension(".pdf", args.folder);
3010
- const out = [];
3011
- for (const e of all) {
3012
- if (out.length >= limit)
3013
- break;
3014
- let size = 0;
3015
- try {
3016
- const buf = await vault.readBinaryFile(e.absPath);
3017
- size = buf.byteLength;
3018
- }
3019
- catch {
3020
- // Unreadable PDF — skip without poisoning the listing.
3021
- continue;
3022
- }
3023
- out.push({
3024
- path: e.relPath,
3025
- name: e.basename.replace(/\.pdf$/i, ""),
3026
- size_bytes: size,
3027
- mtime: new Date(e.mtimeMs).toISOString()
3028
- });
3029
- }
3030
- out.sort((a, b) => b.mtime.localeCompare(a.mtime));
3031
- return out;
3032
- }
3033
- export async function readPdf(vault, args) {
3034
- await vault.ensureExists();
3035
- if (!args.path)
3036
- throw new Error("path is required");
3037
- const normalized = args.path.toLowerCase().endsWith(".pdf") ? args.path : `${args.path}.pdf`;
3038
- const abs = vault.resolveInside(normalized);
3039
- const stat = await vault.stat(abs); // throws if missing or excluded
3040
- const rel = vault.toRel(abs);
3041
- const buf = await vault.readBinaryFile(abs);
3042
- // Lazy import — keeps the markdown-only path zero-cost when pdfjs-dist
3043
- // isn't installed (--omit=optional users).
3044
- const { extractPdfText } = await import("./pdf.js");
3045
- const result = await extractPdfText(buf);
3046
- // Optional page-range slice (1-indexed inclusive). Validated lightly —
3047
- // out-of-range bounds clamp rather than throw, matching how `slice()`
3048
- // behaves elsewhere in the toolkit.
3049
- let pages = result.pages;
3050
- if (args.pages && args.pages.length === 2) {
3051
- const [from, to] = args.pages;
3052
- if (typeof from === "number" && typeof to === "number" && from > 0 && to >= from) {
3053
- pages = result.pages.slice(from - 1, to);
3054
- }
3055
- }
3056
- const out = {
3057
- path: rel,
3058
- name: rel
3059
- .split("/")
3060
- .pop()
3061
- ?.replace(/\.pdf$/i, "") ?? rel,
3062
- size_bytes: buf.byteLength,
3063
- mtime: new Date(stat.mtimeMs).toISOString(),
3064
- page_count: pages.length,
3065
- has_text: pages.some((p) => !p.isEmpty),
3066
- pages: pages.map((p) => ({
3067
- page_number: p.pageNumber,
3068
- text: p.text,
3069
- is_empty: p.isEmpty,
3070
- char_count: p.charCount
3071
- })),
3072
- full_text: pages
3073
- .map((p) => p.text)
3074
- .filter((t) => t.length > 0)
3075
- .join("\n\n"),
3076
- total_page_count: result.pageCount
3077
- };
3078
- if (args.include_metadata !== false && Object.keys(result.metadata).length > 0) {
3079
- out.metadata = {
3080
- title: result.metadata.title,
3081
- author: result.metadata.author,
3082
- subject: result.metadata.subject,
3083
- keywords: result.metadata.keywords,
3084
- creator: result.metadata.creator,
3085
- producer: result.metadata.producer,
3086
- creation_date: result.metadata.creationDate,
3087
- mod_date: result.metadata.modDate
3088
- };
3089
- }
3090
- return out;
3091
- }
3092
- export async function ocrPdf(vault, args) {
3093
- await vault.ensureExists();
3094
- if (!args.path)
3095
- throw new Error("path is required");
3096
- const normalized = args.path.toLowerCase().endsWith(".pdf") ? args.path : `${args.path}.pdf`;
3097
- const abs = vault.resolveInside(normalized);
3098
- const stat = await vault.stat(abs); // throws if missing or excluded
3099
- const rel = vault.toRel(abs);
3100
- const buf = await vault.readBinaryFile(abs);
3101
- // Lazy import — keeps the markdown-only path zero-cost when tesseract /
3102
- // canvas optionalDeps aren't installed.
3103
- const { extractPdfWithOcr } = await import("./ocr.js");
3104
- const result = await extractPdfWithOcr(buf, {
3105
- ...(args.lang ? { langs: args.lang } : {}),
3106
- ...(args.pages ? { pages: args.pages } : {}),
3107
- ...(typeof args.scale === "number" ? { scale: args.scale } : {})
3108
- });
3109
- return {
3110
- path: rel,
3111
- name: rel
3112
- .split("/")
3113
- .pop()
3114
- ?.replace(/\.pdf$/i, "") ?? rel,
3115
- size_bytes: buf.byteLength,
3116
- mtime: new Date(stat.mtimeMs).toISOString(),
3117
- page_count: result.pages.length,
3118
- total_page_count: result.pageCount,
3119
- has_text: result.hasText,
3120
- pages: result.pages.map((p) => ({
3121
- page_number: p.pageNumber,
3122
- text: p.text,
3123
- is_empty: p.isEmpty,
3124
- char_count: p.charCount,
3125
- confidence: Math.round(p.confidence * 10) / 10
3126
- })),
3127
- full_text: result.fullText,
3128
- mean_confidence: Number.isFinite(result.meanConfidence) ? Math.round(result.meanConfidence * 10) / 10 : Number.NaN,
3129
- langs: result.langs
3130
- };
3131
- }
3132
- //# sourceMappingURL=tools.js.map